Index: stable/10/sys/kern/kern_ktr.c
===================================================================
--- stable/10/sys/kern/kern_ktr.c	(revision 293852)
+++ stable/10/sys/kern/kern_ktr.c	(revision 293853)
@@ -1,492 +1,489 @@
 /*-
  * Copyright (c) 2000 John Baldwin <jhb@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 /*
  * This module holds the global variables used by KTR and the ktr_tracepoint()
  * function that does the actual tracing.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_ddb.h"
 #include "opt_ktr.h"
 #include "opt_alq.h"
 
 #include <sys/param.h>
 #include <sys/queue.h>
 #include <sys/alq.h>
 #include <sys/cons.h>
 #include <sys/cpuset.h>
 #include <sys/kernel.h>
 #include <sys/ktr.h>
 #include <sys/libkern.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/smp.h>
 #include <sys/sysctl.h>
 #include <sys/systm.h>
 #include <sys/time.h>
 
 #include <machine/cpu.h>
-#ifdef __sparc64__
-#include <machine/ktr.h>
-#endif
 
 #ifdef DDB
 #include <ddb/ddb.h>
 #include <ddb/db_output.h>
 #endif
 
 #ifndef KTR_BOOT_ENTRIES
 #define	KTR_BOOT_ENTRIES	1024
 #endif
 
 #ifndef KTR_ENTRIES
 #define	KTR_ENTRIES	1024
 #endif
 
 /* Limit the allocations to something manageable. */
 #define	KTR_ENTRIES_MAX	(8 * 1024 * 1024)
 
 #ifndef KTR_MASK
 #define	KTR_MASK	(0)
 #endif
 
 #ifndef KTR_CPUMASK
 #define	KTR_CPUMASK	CPUSET_FSET
 #endif
 
 #ifndef KTR_TIME
 #define	KTR_TIME	get_cyclecount()
 #endif
 
 #ifndef KTR_CPU
 #define	KTR_CPU		PCPU_GET(cpuid)
 #endif
 
 static MALLOC_DEFINE(M_KTR, "KTR", "KTR");
 
 FEATURE(ktr, "Kernel support for KTR kernel tracing facility");
 
 volatile int	ktr_idx = 0;
 int	ktr_mask = KTR_MASK;
 int	ktr_compile = KTR_COMPILE;
 int	ktr_entries = KTR_BOOT_ENTRIES;
 int	ktr_version = KTR_VERSION;
 struct	ktr_entry ktr_buf_init[KTR_BOOT_ENTRIES];
 struct	ktr_entry *ktr_buf = ktr_buf_init;
 cpuset_t ktr_cpumask = CPUSET_T_INITIALIZER(KTR_CPUMASK);
 static char ktr_cpumask_str[CPUSETBUFSIZ];
 
 TUNABLE_INT("debug.ktr.mask", &ktr_mask);
 
 TUNABLE_STR("debug.ktr.cpumask", ktr_cpumask_str, sizeof(ktr_cpumask_str));
 
 static SYSCTL_NODE(_debug, OID_AUTO, ktr, CTLFLAG_RD, 0, "KTR options");
 
 SYSCTL_INT(_debug_ktr, OID_AUTO, version, CTLFLAG_RD,
     &ktr_version, 0, "Version of the KTR interface");
 
 SYSCTL_UINT(_debug_ktr, OID_AUTO, compile, CTLFLAG_RD,
     &ktr_compile, 0, "Bitmask of KTR event classes compiled into the kernel");
 
 static void
 ktr_cpumask_initializer(void *dummy __unused)
 {
 
 	/*
 	 * TUNABLE_STR() runs with SI_ORDER_MIDDLE priority, thus it must be
 	 * already set, if necessary.
 	 */
 	if (ktr_cpumask_str[0] != '\0' &&
 	    cpusetobj_strscan(&ktr_cpumask, ktr_cpumask_str) == -1)
 		CPU_FILL(&ktr_cpumask);
 }
 SYSINIT(ktr_cpumask_initializer, SI_SUB_TUNABLES, SI_ORDER_ANY,
     ktr_cpumask_initializer, NULL);
 
 static int
 sysctl_debug_ktr_cpumask(SYSCTL_HANDLER_ARGS)
 {
 	char lktr_cpumask_str[CPUSETBUFSIZ];
 	cpuset_t imask;
 	int error;
 
 	cpusetobj_strprint(lktr_cpumask_str, &ktr_cpumask);
 	error = sysctl_handle_string(oidp, lktr_cpumask_str,
 	    sizeof(lktr_cpumask_str), req);
 	if (error != 0 || req->newptr == NULL)
 		return (error);
 	if (cpusetobj_strscan(&imask, lktr_cpumask_str) == -1)
 		return (EINVAL);
 	CPU_COPY(&imask, &ktr_cpumask);
 
 	return (error);
 }
 SYSCTL_PROC(_debug_ktr, OID_AUTO, cpumask,
     CTLFLAG_RW | CTLFLAG_MPSAFE | CTLTYPE_STRING, NULL, 0,
     sysctl_debug_ktr_cpumask, "S",
     "Bitmask of CPUs on which KTR logging is enabled");
 
 static int
 sysctl_debug_ktr_clear(SYSCTL_HANDLER_ARGS)
 {
 	int clear, error;
 
 	clear = 0;
 	error = sysctl_handle_int(oidp, &clear, 0, req);
 	if (error || !req->newptr)
 		return (error);
 
 	if (clear) {
 		bzero(ktr_buf, sizeof(*ktr_buf) * ktr_entries);
 		ktr_idx = 0;
 	}
 
 	return (error);
 }
 SYSCTL_PROC(_debug_ktr, OID_AUTO, clear, CTLTYPE_INT|CTLFLAG_RW, 0, 0,
     sysctl_debug_ktr_clear, "I", "Clear KTR Buffer");
 
 /*
  * This is a sysctl proc so that it is serialized as !MPSAFE along with
  * the other ktr sysctl procs.
  */
 static int
 sysctl_debug_ktr_mask(SYSCTL_HANDLER_ARGS)
 {
 	int mask, error;
 
 	mask = ktr_mask;
 	error = sysctl_handle_int(oidp, &mask, 0, req);
 	if (error || !req->newptr)
 		return (error);
 	ktr_mask = mask;
 	return (error);
 }
 
 SYSCTL_PROC(_debug_ktr, OID_AUTO, mask, CTLTYPE_UINT|CTLFLAG_RW, 0, 0,
     sysctl_debug_ktr_mask, "IU",
     "Bitmask of KTR event classes for which logging is enabled");
 
 #if KTR_ENTRIES > KTR_BOOT_ENTRIES
 /*
  * A simplified version of sysctl_debug_ktr_entries.
  * No need to care about SMP, scheduling, etc.
  */
 static void
 ktr_entries_initializer(void *dummy __unused)
 {
 	int mask;
 
 	/* Temporarily disable ktr in case malloc() is being traced. */
 	mask = ktr_mask;
 	ktr_mask = 0;
 	ktr_buf = malloc(sizeof(*ktr_buf) * KTR_ENTRIES, M_KTR,
 	    M_WAITOK | M_ZERO);
 	memcpy(ktr_buf, ktr_buf_init + ktr_idx,
 	    (KTR_BOOT_ENTRIES - ktr_idx) * sizeof(*ktr_buf));
 	if (ktr_idx != 0)
 		memcpy(ktr_buf + KTR_BOOT_ENTRIES - ktr_idx, ktr_buf_init,
 		    ktr_idx * sizeof(*ktr_buf));
 	ktr_entries = KTR_ENTRIES;
 	ktr_mask = mask;
 }
 SYSINIT(ktr_entries_initializer, SI_SUB_KMEM, SI_ORDER_ANY,
     ktr_entries_initializer, NULL);
 #endif
 
 static int
 sysctl_debug_ktr_entries(SYSCTL_HANDLER_ARGS)
 {
 	int entries, error, mask;
 	struct ktr_entry *buf, *oldbuf;
 
 	entries = ktr_entries;
 	error = sysctl_handle_int(oidp, &entries, 0, req);
 	if (error || !req->newptr)
 		return (error);
 	if (entries > KTR_ENTRIES_MAX)
 		return (ERANGE);
 	/* Disable ktr temporarily. */
 	mask = ktr_mask;
 	atomic_store_rel_int(&ktr_mask, 0);
 	/* Wait for threads to go idle. */
 	if ((error = quiesce_all_cpus("ktrent", PCATCH)) != 0) {
 		ktr_mask = mask;
 		return (error);
 	}
 	if (ktr_buf != ktr_buf_init)
 		oldbuf = ktr_buf;
 	else
 		oldbuf = NULL;
 	/* Allocate a new buffer. */
 	buf = malloc(sizeof(*buf) * entries, M_KTR, M_WAITOK | M_ZERO);
 	/* Install the new buffer and restart ktr. */
 	ktr_buf = buf;
 	ktr_entries = entries;
 	ktr_idx = 0;
 	atomic_store_rel_int(&ktr_mask, mask);
 	if (oldbuf != NULL)
 		free(oldbuf, M_KTR);
 
 	return (error);
 }
 
 SYSCTL_PROC(_debug_ktr, OID_AUTO, entries, CTLTYPE_INT|CTLFLAG_RW, 0, 0,
     sysctl_debug_ktr_entries, "I", "Number of entries in the KTR buffer");
 
 #ifdef KTR_VERBOSE
 int	ktr_verbose = KTR_VERBOSE;
 TUNABLE_INT("debug.ktr.verbose", &ktr_verbose);
 SYSCTL_INT(_debug_ktr, OID_AUTO, verbose, CTLFLAG_RW, &ktr_verbose, 0, "");
 #endif
 
 #ifdef KTR_ALQ
 struct alq *ktr_alq;
 char	ktr_alq_file[MAXPATHLEN] = "/tmp/ktr.out";
 int	ktr_alq_cnt = 0;
 int	ktr_alq_depth = KTR_ENTRIES;
 int	ktr_alq_enabled = 0;
 int	ktr_alq_failed = 0;
 int	ktr_alq_max = 0;
 
 SYSCTL_INT(_debug_ktr, OID_AUTO, alq_max, CTLFLAG_RW, &ktr_alq_max, 0,
     "Maximum number of entries to write");
 SYSCTL_INT(_debug_ktr, OID_AUTO, alq_cnt, CTLFLAG_RD, &ktr_alq_cnt, 0,
     "Current number of written entries");
 SYSCTL_INT(_debug_ktr, OID_AUTO, alq_failed, CTLFLAG_RD, &ktr_alq_failed, 0,
     "Number of times we overran the buffer");
 SYSCTL_INT(_debug_ktr, OID_AUTO, alq_depth, CTLFLAG_RW, &ktr_alq_depth, 0,
     "Number of items in the write buffer");
 SYSCTL_STRING(_debug_ktr, OID_AUTO, alq_file, CTLFLAG_RW, ktr_alq_file,
     sizeof(ktr_alq_file), "KTR logging file");
 
 static int
 sysctl_debug_ktr_alq_enable(SYSCTL_HANDLER_ARGS)
 {
 	int error;
 	int enable;
 
 	enable = ktr_alq_enabled;
 
 	error = sysctl_handle_int(oidp, &enable, 0, req);
 	if (error || !req->newptr)
 		return (error);
 
 	if (enable) {
 		if (ktr_alq_enabled)
 			return (0);
 		error = alq_open(&ktr_alq, (const char *)ktr_alq_file,
 		    req->td->td_ucred, ALQ_DEFAULT_CMODE,
 		    sizeof(struct ktr_entry), ktr_alq_depth);
 		if (error == 0) {
 			ktr_alq_cnt = 0;
 			ktr_alq_failed = 0;
 			ktr_alq_enabled = 1;
 		}
 	} else {
 		if (ktr_alq_enabled == 0)
 			return (0);
 		ktr_alq_enabled = 0;
 		alq_close(ktr_alq);
 		ktr_alq = NULL;
 	}
 
 	return (error);
 }
 SYSCTL_PROC(_debug_ktr, OID_AUTO, alq_enable,
     CTLTYPE_INT|CTLFLAG_RW, 0, 0, sysctl_debug_ktr_alq_enable,
     "I", "Enable KTR logging");
 #endif
 
 void
 ktr_tracepoint(u_int mask, const char *file, int line, const char *format,
     u_long arg1, u_long arg2, u_long arg3, u_long arg4, u_long arg5,
     u_long arg6)
 {
 	struct ktr_entry *entry;
 #ifdef KTR_ALQ
 	struct ale *ale = NULL;
 #endif
 	int newindex, saveindex;
 #if defined(KTR_VERBOSE) || defined(KTR_ALQ)
 	struct thread *td;
 #endif
 	int cpu;
 
 	if (panicstr)
 		return;
 	if ((ktr_mask & mask) == 0 || ktr_buf == NULL)
 		return;
 	cpu = KTR_CPU;
 	if (!CPU_ISSET(cpu, &ktr_cpumask))
 		return;
 #if defined(KTR_VERBOSE) || defined(KTR_ALQ)
 	td = curthread;
 	if (td->td_pflags & TDP_INKTR)
 		return;
 	td->td_pflags |= TDP_INKTR;
 #endif
 #ifdef KTR_ALQ
 	if (ktr_alq_enabled) {
 		if (td->td_critnest == 0 &&
 		    (td->td_flags & TDF_IDLETD) == 0 &&
 		    td != ald_thread) {
 			if (ktr_alq_max && ktr_alq_cnt > ktr_alq_max)
 				goto done;
 			if ((ale = alq_get(ktr_alq, ALQ_NOWAIT)) == NULL) {
 				ktr_alq_failed++;
 				goto done;
 			}
 			ktr_alq_cnt++;
 			entry = (struct ktr_entry *)ale->ae_data;
 		} else {
 			goto done;
 		}
 	} else
 #endif
 	{
 		do {
 			saveindex = ktr_idx;
 			newindex = (saveindex + 1) % ktr_entries;
 		} while (atomic_cmpset_rel_int(&ktr_idx, saveindex, newindex) == 0);
 		entry = &ktr_buf[saveindex];
 	}
 	entry->ktr_timestamp = KTR_TIME;
 	entry->ktr_cpu = cpu;
 	entry->ktr_thread = curthread;
 	if (file != NULL)
 		while (strncmp(file, "../", 3) == 0)
 			file += 3;
 	entry->ktr_file = file;
 	entry->ktr_line = line;
 #ifdef KTR_VERBOSE
 	if (ktr_verbose) {
 #ifdef SMP
 		printf("cpu%d ", cpu);
 #endif
 		if (ktr_verbose > 1) {
 			printf("%s.%d\t", entry->ktr_file,
 			    entry->ktr_line);
 		}
 		printf(format, arg1, arg2, arg3, arg4, arg5, arg6);
 		printf("\n");
 	}
 #endif
 	entry->ktr_desc = format;
 	entry->ktr_parms[0] = arg1;
 	entry->ktr_parms[1] = arg2;
 	entry->ktr_parms[2] = arg3;
 	entry->ktr_parms[3] = arg4;
 	entry->ktr_parms[4] = arg5;
 	entry->ktr_parms[5] = arg6;
 #ifdef KTR_ALQ
 	if (ktr_alq_enabled && ale)
 		alq_post(ktr_alq, ale);
 done:
 #endif
 #if defined(KTR_VERBOSE) || defined(KTR_ALQ)
 	td->td_pflags &= ~TDP_INKTR;
 #endif
 }
 
 #ifdef DDB
 
 struct tstate {
 	int	cur;
 	int	first;
 };
 static	struct tstate tstate;
 static	int db_ktr_verbose;
 static	int db_mach_vtrace(void);
 
 DB_SHOW_COMMAND(ktr, db_ktr_all)
 {
 	
 	tstate.cur = (ktr_idx - 1) % ktr_entries;
 	tstate.first = -1;
 	db_ktr_verbose = 0;
 	db_ktr_verbose |= (strchr(modif, 'v') != NULL) ? 2 : 0;
 	db_ktr_verbose |= (strchr(modif, 'V') != NULL) ? 1 : 0; /* just timestap please */
 	if (strchr(modif, 'a') != NULL) {
 		db_disable_pager();
 		while (cncheckc() != -1)
 			if (db_mach_vtrace() == 0)
 				break;
 	} else {
 		while (!db_pager_quit)
 			if (db_mach_vtrace() == 0)
 				break;
 	}
 }
 
 static int
 db_mach_vtrace(void)
 {
 	struct ktr_entry	*kp;
 
 	if (tstate.cur == tstate.first || ktr_buf == NULL) {
 		db_printf("--- End of trace buffer ---\n");
 		return (0);
 	}
 	kp = &ktr_buf[tstate.cur];
 
 	/* Skip over unused entries. */
 	if (kp->ktr_desc == NULL) {
 		db_printf("--- End of trace buffer ---\n");
 		return (0);
 	}
 	db_printf("%d (%p", tstate.cur, kp->ktr_thread);
 #ifdef SMP
 	db_printf(":cpu%d", kp->ktr_cpu);
 #endif
 	db_printf(")");
 	if (db_ktr_verbose >= 1) {
 		db_printf(" %10.10lld", (long long)kp->ktr_timestamp);
 	}
 	if (db_ktr_verbose >= 2) {
 		db_printf(" %s.%d", kp->ktr_file, kp->ktr_line);
 	}
 	db_printf(": ");
 	db_printf(kp->ktr_desc, kp->ktr_parms[0], kp->ktr_parms[1],
 	    kp->ktr_parms[2], kp->ktr_parms[3], kp->ktr_parms[4],
 	    kp->ktr_parms[5]);
 	db_printf("\n");
 
 	if (tstate.first == -1)
 		tstate.first = tstate.cur;
 
 	if (--tstate.cur < 0)
 		tstate.cur = ktr_entries - 1;
 
 	return (1);
 }
 
 #endif	/* DDB */
Index: stable/10/sys/sparc64/include/ktr.h
===================================================================
--- stable/10/sys/sparc64/include/ktr.h	(revision 293852)
+++ stable/10/sys/sparc64/include/ktr.h	(revision 293853)
@@ -1,108 +1,99 @@
 /*-
  * Copyright (c) 1996 Berkeley Software Design, Inc. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Berkeley Software Design Inc's name may not be used to endorse or
  *    promote products derived from this software without specific prior
  *    written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY BERKELEY SOFTWARE DESIGN INC ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL BERKELEY SOFTWARE DESIGN INC BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	from BSDI $Id: ktr.h,v 1.10.2.7 2000/03/16 21:44:42 cp Exp $
  * $FreeBSD$
  */
 
 #ifndef _MACHINE_KTR_H_
 #define _MACHINE_KTR_H_
 
 #include <sys/ktr.h>
 
-#ifndef LOCORE
+#ifdef LOCORE
 
-#define	KTR_CPU	PCPU_GET(mid)
-
-#else
-
 /*
- * XXX could really use another register...
+ * XXX could really use another register ...
  */
 #define	ATR(desc, r1, r2, r3, l1, l2) \
 	.sect	.rodata ; \
 l1:	.asciz	desc ; \
 	.previous ; \
 	SET(ktr_idx, r2, r1) ; \
 	lduw	[r1], r2 ; \
 l2:	add	r2, 1, r3 ; \
 	set	KTR_ENTRIES - 1, r1 ; \
 	and	r3, r1, r3 ; \
 	set	ktr_idx, r1 ; \
 	casa	[r1] ASI_N, r2, r3 ; \
 	cmp	r2, r3 ; \
 	bne	%icc, l2 ## b ; \
 	 mov	r3, r2 ; \
 	SET(ktr_buf, r3, r1) ; \
 	ldx	[r1], r1 ; \
 	mulx	r2, KTR_SIZEOF, r2 ; \
 	add	r1, r2, r1 ; \
 	rd	%tick, r2 ; \
 	stx	r2, [r1 + KTR_TIMESTAMP] ; \
-	lduw	[PCPU(MID)], r2 ; \
+	lduw	[PCPU(CPUID)], r2 ; \
 	stw	r2, [r1 + KTR_CPU] ; \
 	stw	%g0, [r1 + KTR_LINE] ; \
 	stx	%g0, [r1 + KTR_FILE] ; \
 	SET(l1 ## b, r3, r2) ; \
 	stx	r2, [r1 + KTR_DESC]
 
-/*
- * NB: this clobbers %y.
- */
 #define CATR(mask, desc, r1, r2, r3, l1, l2, l3) \
 	set	mask, r1 ; \
 	SET(ktr_mask, r3, r2) ; \
 	lduw	[r2], r2 ; \
 	and	r2, r1, r1 ; \
 	brz	r1, l3 ## f ; \
 	 nop ; \
 	lduw	[PCPU(CPUID)], r2 ; \
 	mov	_NCPUBITS, r3 ; \
-	mov	%g0, %y ; \
-	udiv	r2, r3, r2 ; \
+	udivx	r2, r3, r2 ; \
 	srl	r2, 0, r2 ; \
 	sllx	r2, PTR_SHIFT, r2 ; \
 	SET(ktr_cpumask, r3, r1) ; \
 	ldx	[r1 + r2], r1 ; \
 	lduw	[PCPU(CPUID)], r2 ; \
 	mov	_NCPUBITS, r3 ; \
-	mov	%g0, %y ; \
-	udiv	r2, r3, r2 ; \
+	udivx	r2, r3, r2 ; \
 	srl	r2, 0, r2 ; \
 	smul	r2, r3, r3 ; \
 	lduw	[PCPU(CPUID)], r2 ; \
 	sub	r2, r3, r3 ; \
 	mov	1, r2 ; \
 	sllx	r2, r3, r2 ; \
 	andn	r1, r2, r1 ; \
 	brz	r1, l3 ## f ; \
 	 nop ; \
 	ATR(desc, r1, r2, r3, l1, l2)
 
 #endif /* LOCORE */
 
 #endif /* !_MACHINE_KTR_H_ */
Index: stable/10/sys/sparc64/sparc64/exception.S
===================================================================
--- stable/10/sys/sparc64/sparc64/exception.S	(revision 293852)
+++ stable/10/sys/sparc64/sparc64/exception.S	(revision 293853)
@@ -1,3082 +1,3068 @@
 /*-
  * Copyright (c) 1997 Berkeley Software Design, Inc. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Berkeley Software Design Inc's name may not be used to endorse or
  *    promote products derived from this software without specific prior
  *    written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY BERKELEY SOFTWARE DESIGN INC ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL BERKELEY SOFTWARE DESIGN INC BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	BSDI $Id: locore.s,v 1.36.2.15 1999/08/23 22:34:41 cp Exp $
  */
 /*-
  * Copyright (c) 2001 Jake Burkholder.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <machine/asm.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_compat.h"
 #include "opt_ddb.h"
 
 #include <machine/asi.h>
 #include <machine/asmacros.h>
 #include <machine/frame.h>
 #include <machine/fsr.h>
 #include <machine/intr_machdep.h>
 #include <machine/ktr.h>
 #include <machine/pcb.h>
 #include <machine/pstate.h>
 #include <machine/trap.h>
 #include <machine/tsb.h>
 #include <machine/tstate.h>
 #include <machine/utrap.h>
 #include <machine/wstate.h>
 
 #include "assym.s"
 
 #define	TSB_ASI			0x0
 #define	TSB_KERNEL		0x0
 #define	TSB_KERNEL_MASK		0x0
 #define	TSB_KERNEL_PHYS		0x0
 #define	TSB_KERNEL_PHYS_END	0x0
 #define	TSB_QUAD_LDD		0x0
 
 	.register %g2,#ignore
 	.register %g3,#ignore
 	.register %g6,#ignore
 	.register %g7,#ignore
 
 /*
  * Atomically set a bit in a TTE.
  */
 #define	TTE_SET_BIT(r1, r2, r3, bit, a, asi) \
 	add	r1, TTE_DATA, r1 ; \
 	LD(x, a) [r1] asi, r2 ; \
 9:	or	r2, bit, r3 ; \
 	CAS(x, a) [r1] asi, r2, r3 ; \
 	cmp	r2, r3 ; \
 	bne,pn	%xcc, 9b ; \
 	 mov	r3, r2
 
 #define	TTE_SET_REF(r1, r2, r3, a, asi)	TTE_SET_BIT(r1, r2, r3, TD_REF, a, asi)
 #define	TTE_SET_W(r1, r2, r3, a, asi)	TTE_SET_BIT(r1, r2, r3, TD_W, a, asi)
 
 /*
  * Macros for spilling and filling live windows.
  *
  * NOTE: These macros use exactly 16 instructions, and it is assumed that the
  * handler will not use more than 24 instructions total, to leave room for
  * resume vectors which occupy the last 8 instructions.
  */
 
 #define	SPILL(storer, base, size, asi) \
 	storer	%l0, [base + (0 * size)] asi ; \
 	storer	%l1, [base + (1 * size)] asi ; \
 	storer	%l2, [base + (2 * size)] asi ; \
 	storer	%l3, [base + (3 * size)] asi ; \
 	storer	%l4, [base + (4 * size)] asi ; \
 	storer	%l5, [base + (5 * size)] asi ; \
 	storer	%l6, [base + (6 * size)] asi ; \
 	storer	%l7, [base + (7 * size)] asi ; \
 	storer	%i0, [base + (8 * size)] asi ; \
 	storer	%i1, [base + (9 * size)] asi ; \
 	storer	%i2, [base + (10 * size)] asi ; \
 	storer	%i3, [base + (11 * size)] asi ; \
 	storer	%i4, [base + (12 * size)] asi ; \
 	storer	%i5, [base + (13 * size)] asi ; \
 	storer	%i6, [base + (14 * size)] asi ; \
 	storer	%i7, [base + (15 * size)] asi
 
 #define	FILL(loader, base, size, asi) \
 	loader	[base + (0 * size)] asi, %l0 ; \
 	loader	[base + (1 * size)] asi, %l1 ; \
 	loader	[base + (2 * size)] asi, %l2 ; \
 	loader	[base + (3 * size)] asi, %l3 ; \
 	loader	[base + (4 * size)] asi, %l4 ; \
 	loader	[base + (5 * size)] asi, %l5 ; \
 	loader	[base + (6 * size)] asi, %l6 ; \
 	loader	[base + (7 * size)] asi, %l7 ; \
 	loader	[base + (8 * size)] asi, %i0 ; \
 	loader	[base + (9 * size)] asi, %i1 ; \
 	loader	[base + (10 * size)] asi, %i2 ; \
 	loader	[base + (11 * size)] asi, %i3 ; \
 	loader	[base + (12 * size)] asi, %i4 ; \
 	loader	[base + (13 * size)] asi, %i5 ; \
 	loader	[base + (14 * size)] asi, %i6 ; \
 	loader	[base + (15 * size)] asi, %i7
 
 #define	ERRATUM50(reg)	mov reg, reg
 
 #define	KSTACK_SLOP	1024
 
 /*
  * Sanity check the kernel stack and bail out if it's wrong.
  * XXX: doesn't handle being on the panic stack.
  */
 #define	KSTACK_CHECK \
 	dec	16, ASP_REG ; \
 	stx	%g1, [ASP_REG + 0] ; \
 	stx	%g2, [ASP_REG + 8] ; \
 	add	%sp, SPOFF, %g1 ; \
 	andcc	%g1, (1 << PTR_SHIFT) - 1, %g0 ; \
 	bnz,a	%xcc, tl1_kstack_fault ; \
 	 inc	16, ASP_REG ; \
 	ldx	[PCPU(CURTHREAD)], %g2 ; \
 	ldx	[%g2 + TD_KSTACK], %g2 ; \
 	add	%g2, KSTACK_SLOP, %g2 ; \
 	subcc	%g1, %g2, %g1 ; \
 	ble,a	%xcc, tl1_kstack_fault ; \
 	 inc	16, ASP_REG ; \
 	set	KSTACK_PAGES * PAGE_SIZE, %g2 ; \
 	cmp	%g1, %g2 ; \
 	bgt,a	%xcc, tl1_kstack_fault ; \
 	 inc	16, ASP_REG ; \
 	ldx	[ASP_REG + 8], %g2 ; \
 	ldx	[ASP_REG + 0], %g1 ; \
 	inc	16, ASP_REG
 
 	.globl	tl_text_begin
 tl_text_begin:
 	nop
 
 ENTRY(tl1_kstack_fault)
 	rdpr	%tl, %g1
 1:	cmp	%g1, 2
 	be,a	2f
 	 nop
 
 #if KTR_COMPILE & KTR_TRAP
 	CATR(KTR_TRAP, "tl1_kstack_fault: tl=%#lx tpc=%#lx tnpc=%#lx"
 	    , %g2, %g3, %g4, 7, 8, 9)
 	rdpr	%tl, %g3
 	stx	%g3, [%g2 + KTR_PARM1]
 	rdpr	%tpc, %g3
 	stx	%g3, [%g2 + KTR_PARM1]
 	rdpr	%tnpc, %g3
 	stx	%g3, [%g2 + KTR_PARM1]
 9:
 #endif
 
 	sub	%g1, 1, %g1
 	wrpr	%g1, 0, %tl
 	ba,a	%xcc, 1b
 	 nop
 
 2:
 #if KTR_COMPILE & KTR_TRAP
 	CATR(KTR_TRAP,
 	    "tl1_kstack_fault: sp=%#lx ks=%#lx cr=%#lx cs=%#lx ow=%#lx ws=%#lx"
 	    , %g1, %g2, %g3, 7, 8, 9)
 	add	%sp, SPOFF, %g2
 	stx	%g2, [%g1 + KTR_PARM1]
 	ldx	[PCPU(CURTHREAD)], %g2
 	ldx	[%g2 + TD_KSTACK], %g2
 	stx	%g2, [%g1 + KTR_PARM2]
 	rdpr	%canrestore, %g2
 	stx	%g2, [%g1 + KTR_PARM3]
 	rdpr	%cansave, %g2
 	stx	%g2, [%g1 + KTR_PARM4]
 	rdpr	%otherwin, %g2
 	stx	%g2, [%g1 + KTR_PARM5]
 	rdpr	%wstate, %g2
 	stx	%g2, [%g1 + KTR_PARM6]
 9:
 #endif
 
 	wrpr	%g0, 0, %canrestore
 	wrpr	%g0, 6, %cansave
 	wrpr	%g0, 0, %otherwin
 	wrpr	%g0, WSTATE_KERNEL, %wstate
 
 	sub	ASP_REG, SPOFF + CCFSZ, %sp
 	clr	%fp
 
 	set	trap, %o2
 	ba	%xcc, tl1_trap
 	 mov	T_KSTACK_FAULT | T_KERNEL, %o0
 END(tl1_kstack_fault)
 
 /*
  * Magic to resume from a spill or fill trap.  If we get an alignment or an
  * MMU fault during a spill or a fill, this macro will detect the fault and
  * resume at a set instruction offset in the trap handler.
  *
  * To check if the previous trap was a spill/fill we convert the trapped pc
  * to a trap type and verify that it is in the range of spill/fill vectors.
  * The spill/fill vectors are types 0x80-0xff and 0x280-0x2ff, masking off the
  * tl bit allows us to detect both ranges with one test.
  *
  * This is:
  *	0x80 <= (((%tpc - %tba) >> 5) & ~0x200) < 0x100
  *
  * To calculate the new pc we take advantage of the xor feature of wrpr.
  * Forcing all the low bits of the trapped pc on we can produce any offset
  * into the spill/fill vector.  The size of a spill/fill trap vector is 0x80.
  *
  *	0x7f ^ 0x1f == 0x60
  *	0x1f == (0x80 - 0x60) - 1
  *
  * Which are the offset and xor value used to resume from alignment faults.
  */
 
 /*
  * Determine if we have trapped inside of a spill/fill vector, and if so resume
  * at a fixed instruction offset in the trap vector.  Must be called on
  * alternate globals.
  */
 #define	RESUME_SPILLFILL_MAGIC(stxa_g0_sfsr, xor) \
 	dec	16, ASP_REG ; \
 	stx	%g1, [ASP_REG + 0] ; \
 	stx	%g2, [ASP_REG + 8] ; \
 	rdpr	%tpc, %g1 ; \
 	ERRATUM50(%g1) ; \
 	rdpr	%tba, %g2 ; \
 	sub	%g1, %g2, %g2 ; \
 	srlx	%g2, 5, %g2 ; \
 	andn	%g2, 0x200, %g2 ; \
 	cmp	%g2, 0x80 ; \
 	blu,pt	%xcc, 9f ; \
 	 cmp	%g2, 0x100 ; \
 	bgeu,pt	%xcc, 9f ; \
 	 or	%g1, 0x7f, %g1 ; \
 	wrpr	%g1, xor, %tnpc ; \
 	stxa_g0_sfsr ; \
 	ldx	[ASP_REG + 8], %g2 ; \
 	ldx	[ASP_REG + 0], %g1 ; \
 	inc	16, ASP_REG ; \
 	done ; \
 9:	ldx	[ASP_REG + 8], %g2 ; \
 	ldx	[ASP_REG + 0], %g1 ; \
 	inc	16, ASP_REG
 
 /*
  * For certain faults we need to clear the SFSR MMU register before returning.
  */
 #define	RSF_CLR_SFSR \
 	wr	%g0, ASI_DMMU, %asi ; \
 	stxa	%g0, [%g0 + AA_DMMU_SFSR] %asi
 
 #define	RSF_XOR(off)	((0x80 - off) - 1)
 
 /*
  * Instruction offsets in spill and fill trap handlers for handling certain
  * nested traps, and corresponding xor constants for wrpr.
  */
 #define	RSF_OFF_ALIGN	0x60
 #define	RSF_OFF_MMU	0x70
 
 #define	RESUME_SPILLFILL_ALIGN \
 	RESUME_SPILLFILL_MAGIC(RSF_CLR_SFSR, RSF_XOR(RSF_OFF_ALIGN))
 #define	RESUME_SPILLFILL_MMU \
 	RESUME_SPILLFILL_MAGIC(EMPTY, RSF_XOR(RSF_OFF_MMU))
 #define	RESUME_SPILLFILL_MMU_CLR_SFSR \
 	RESUME_SPILLFILL_MAGIC(RSF_CLR_SFSR, RSF_XOR(RSF_OFF_MMU))
 
 /*
  * Constant to add to %tnpc when taking a fill trap just before returning to
  * user mode.
  */
 #define	RSF_FILL_INC	tl0_ret_fill_end - tl0_ret_fill
 
 /*
  * Generate a T_SPILL or T_FILL trap if the window operation fails.
  */
 #define	RSF_TRAP(type) \
 	ba	%xcc, tl0_sftrap ; \
 	 mov	type, %g2 ; \
 	.align	16
 
 /*
  * Game over if the window operation fails.
  */
 #define	RSF_FATAL(type) \
 	ba	%xcc, rsf_fatal ; \
 	 mov	type, %g2 ; \
 	.align	16
 
 /*
  * Magic to resume from a failed fill a few instructions after the corrsponding
  * restore.  This is used on return from the kernel to usermode.
  */
 #define	RSF_FILL_MAGIC \
 	rdpr	%tnpc, %g1 ; \
 	add	%g1, RSF_FILL_INC, %g1 ; \
 	wrpr	%g1, 0, %tnpc ; \
 	done ; \
 	.align	16
 
 /*
  * Spill to the pcb if a spill to the user stack in kernel mode fails.
  */
 #define	RSF_SPILL_TOPCB \
 	ba,a	%xcc, tl1_spill_topcb ; \
 	 nop ; \
 	.align	16
 
 ENTRY(rsf_fatal)
 #if KTR_COMPILE & KTR_TRAP
 	CATR(KTR_TRAP, "rsf_fatal: bad window trap tt=%#lx type=%#lx"
 	    , %g1, %g3, %g4, 7, 8, 9)
 	rdpr	%tt, %g3
 	stx	%g3, [%g1 + KTR_PARM1]
 	stx	%g2, [%g1 + KTR_PARM2]
 9:
 #endif
 
 	KSTACK_CHECK
 
 	sir
 END(rsf_fatal)
 
 	.data
 	_ALIGN_DATA
 	.globl	intrnames, sintrnames
 intrnames:
 	.space	(IV_MAX + PIL_MAX) * (MAXCOMLEN + 1)
 sintrnames:
 	.quad	(IV_MAX + PIL_MAX) * (MAXCOMLEN + 1)
 
 	.globl	intrcnt, sintrcnt
 intrcnt:
 	.space	(IV_MAX + PIL_MAX) * 8
 sintrcnt:
 	.quad	(IV_MAX + PIL_MAX) * 8
 
 	.text
 
 /*
  * Trap table and associated macros
  *
  * Due to its size a trap table is an inherently hard thing to represent in
  * code in a clean way.  There are approximately 1024 vectors, of 8 or 32
  * instructions each, many of which are identical.  The way that this is
  * laid out is the instructions (8 or 32) for the actual trap vector appear
  * as an AS macro.  In general this code branches to tl0_trap or tl1_trap,
  * but if not supporting code can be placed just after the definition of the
  * macro.  The macros are then instantiated in a different section (.trap),
  * which is setup to be placed by the linker at the beginning of .text, and the
  * code around the macros is moved to the end of trap table.  In this way the
  * code that must be sequential in memory can be split up, and located near
  * its supporting code so that it is easier to follow.
  */
 
 	/*
 	 * Clean window traps occur when %cleanwin is zero to ensure that data
 	 * is not leaked between address spaces in registers.
 	 */
 	.macro	clean_window
 	clr	%o0
 	clr	%o1
 	clr	%o2
 	clr	%o3
 	clr	%o4
 	clr	%o5
 	clr	%o6
 	clr	%o7
 	clr	%l0
 	clr	%l1
 	clr	%l2
 	clr	%l3
 	clr	%l4
 	clr	%l5
 	clr	%l6
 	rdpr	%cleanwin, %l7
 	inc	%l7
 	wrpr	%l7, 0, %cleanwin
 	clr	%l7
 	retry
 	.align	128
 	.endm
 
 	/*
 	 * Stack fixups for entry from user mode.  We are still running on the
 	 * user stack, and with its live registers, so we must save soon.  We
 	 * are on alternate globals so we do have some registers.  Set the
 	 * transitional window state, and do the save.  If this traps we
 	 * attempt to spill a window to the user stack.  If this fails, we
 	 * spill the window to the pcb and continue.  Spilling to the pcb
 	 * must not fail.
 	 *
 	 * NOTE: Must be called with alternate globals and clobbers %g1.
 	 */
 
 	.macro	tl0_split
 	rdpr	%wstate, %g1
 	wrpr	%g1, WSTATE_TRANSITION, %wstate
 	save
 	.endm
 
 	.macro	tl0_setup	type
 	tl0_split
 	clr	%o1
 	set	trap, %o2
 	ba	%xcc, tl0_utrap
 	 mov	\type, %o0
 	.endm
 
 	/*
 	 * Generic trap type.  Call trap() with the specified type.
 	 */
 	.macro	tl0_gen		type
 	tl0_setup \type
 	.align	32
 	.endm
 
 	/*
 	 * This is used to suck up the massive swaths of reserved trap types.
 	 * Generates count "reserved" trap vectors.
 	 */
 	.macro	tl0_reserved	count
 	.rept	\count
 	tl0_gen	T_RESERVED
 	.endr
 	.endm
 
 	.macro	tl1_split
 	rdpr	%wstate, %g1
 	wrpr	%g1, WSTATE_NESTED, %wstate
 	save	%sp, -(CCFSZ + TF_SIZEOF), %sp
 	.endm
 
 	.macro	tl1_setup	type
 	tl1_split
 	clr	%o1
 	set	trap, %o2
 	ba	%xcc, tl1_trap
 	 mov	\type | T_KERNEL, %o0
 	.endm
 
 	.macro	tl1_gen		type
 	tl1_setup \type
 	.align	32
 	.endm
 
 	.macro	tl1_reserved	count
 	.rept	\count
 	tl1_gen	T_RESERVED
 	.endr
 	.endm
 
 	.macro	tl0_insn_excptn
 	wrpr	%g0, PSTATE_ALT, %pstate
 	wr	%g0, ASI_IMMU, %asi
 	rdpr	%tpc, %g3
 	ldxa	[%g0 + AA_IMMU_SFSR] %asi, %g4
 	/*
 	 * XXX in theory, a store to AA_IMMU_SFSR must be immediately
 	 * followed by a DONE, FLUSH or RETRY for USIII.  In practice,
 	 * this triggers a RED state exception though.
 	 */
 	stxa	%g0, [%g0 + AA_IMMU_SFSR] %asi
 	membar	#Sync
 	ba	%xcc, tl0_sfsr_trap
 	 mov	T_INSTRUCTION_EXCEPTION, %g2
 	.align	32
 	.endm
 
 	.macro	tl0_data_excptn
 	wrpr	%g0, PSTATE_ALT, %pstate
 	wr	%g0, ASI_DMMU, %asi
 	ldxa	[%g0 + AA_DMMU_SFAR] %asi, %g3
 	ldxa	[%g0 + AA_DMMU_SFSR] %asi, %g4
 	stxa	%g0, [%g0 + AA_DMMU_SFSR] %asi
 	membar	#Sync
 	ba	%xcc, tl0_sfsr_trap
 	 mov	T_DATA_EXCEPTION, %g2
 	.align	32
 	.endm
 
 	.macro	tl0_align
 	wr	%g0, ASI_DMMU, %asi
 	ldxa	[%g0 + AA_DMMU_SFAR] %asi, %g3
 	ldxa	[%g0 + AA_DMMU_SFSR] %asi, %g4
 	stxa	%g0, [%g0 + AA_DMMU_SFSR] %asi
 	membar	#Sync
 	ba	%xcc, tl0_sfsr_trap
 	 mov	T_MEM_ADDRESS_NOT_ALIGNED, %g2
 	.align	32
 	.endm
 
 ENTRY(tl0_sfsr_trap)
 	tl0_split
 	clr	%o1
 	set	trap, %o2
 	mov	%g3, %o4
 	mov	%g4, %o5
 	ba	%xcc, tl0_utrap
 	 mov	%g2, %o0
 END(tl0_sfsr_trap)
 
 	.macro	tl0_intr level, mask
 	tl0_split
 	set	\mask, %o1
 	ba	%xcc, tl0_intr
 	 mov	\level, %o0
 	.align	32
 	.endm
 
 #define	INTR(level, traplvl)						\
 	tl ## traplvl ## _intr	level, 1 << level
 
 #define	TICK(traplvl) \
 	tl ## traplvl ## _intr	PIL_TICK, 0x10001
 
 #define	INTR_LEVEL(tl)							\
 	INTR(1, tl) ;							\
 	INTR(2, tl) ;							\
 	INTR(3, tl) ;							\
 	INTR(4, tl) ;							\
 	INTR(5, tl) ;							\
 	INTR(6, tl) ;							\
 	INTR(7, tl) ;							\
 	INTR(8, tl) ;							\
 	INTR(9, tl) ;							\
 	INTR(10, tl) ;							\
 	INTR(11, tl) ;							\
 	INTR(12, tl) ;							\
 	INTR(13, tl) ;							\
 	TICK(tl) ;							\
 	INTR(15, tl) ;
 
 	.macro	tl0_intr_level
 	INTR_LEVEL(0)
 	.endm
 
 	.macro	intr_vector
 	ldxa	[%g0] ASI_INTR_RECEIVE, %g1
 	andcc	%g1, IRSR_BUSY, %g0
 	bnz,a,pt %xcc, intr_vector
 	 nop
 	ba,a,pt	%xcc, intr_vector_stray
 	 nop
 	.align	32
 	.endm
 
 	.macro	tl0_immu_miss
 	/*
 	 * Load the context and the virtual page number from the tag access
 	 * register.  We ignore the context.
 	 */
 	wr	%g0, ASI_IMMU, %asi
 	ldxa	[%g0 + AA_IMMU_TAR] %asi, %g1
 
 	/*
 	 * Initialize the page size walker.
 	 */
 	mov	TS_MIN, %g2
 
 	/*
 	 * Loop over all supported page sizes.
 	 */
 
 	/*
 	 * Compute the page shift for the page size we are currently looking
 	 * for.
 	 */
 1:	add	%g2, %g2, %g3
 	add	%g3, %g2, %g3
 	add	%g3, PAGE_SHIFT, %g3
 
 	/*
 	 * Extract the virtual page number from the contents of the tag
 	 * access register.
 	 */
 	srlx	%g1, %g3, %g3
 
 	/*
 	 * Compute the TTE bucket address.
 	 */
 	ldxa	[%g0 + AA_IMMU_TSB] %asi, %g5
 	and	%g3, TSB_BUCKET_MASK, %g4
 	sllx	%g4, TSB_BUCKET_SHIFT + TTE_SHIFT, %g4
 	add	%g4, %g5, %g4
 
 	/*
 	 * Compute the TTE tag target.
 	 */
 	sllx	%g3, TV_SIZE_BITS, %g3
 	or	%g3, %g2, %g3
 
 	/*
 	 * Loop over the TTEs in this bucket.
 	 */
 
 	/*
 	 * Load the TTE.  Note that this instruction may fault, clobbering
 	 * the contents of the tag access register, %g5, %g6, and %g7.  We
 	 * do not use %g5, and %g6 and %g7 are not used until this instruction
 	 * completes successfully.
 	 */
 2:	ldda	[%g4] ASI_NUCLEUS_QUAD_LDD, %g6 /*, %g7 */
 
 	/*
 	 * Check that it's valid and executable and that the TTE tags match.
 	 */
 	brgez,pn %g7, 3f
 	 andcc	%g7, TD_EXEC, %g0
 	bz,pn	%xcc, 3f
 	 cmp	%g3, %g6
 	bne,pn	%xcc, 3f
 	 EMPTY
 
 	/*
 	 * We matched a TTE, load the TLB.
 	 */
 
 	/*
 	 * Set the reference bit, if it's currently clear.
 	 */
 	 andcc	%g7, TD_REF, %g0
 	bz,a,pn	%xcc, tl0_immu_miss_set_ref
 	 nop
 
 	/*
 	 * Load the TTE tag and data into the TLB and retry the instruction.
 	 */
 	stxa	%g1, [%g0 + AA_IMMU_TAR] %asi
 	stxa	%g7, [%g0] ASI_ITLB_DATA_IN_REG
 	retry
 
 	/*
 	 * Advance to the next TTE in this bucket, and check the low bits
 	 * of the bucket pointer to see if we've finished the bucket.
 	 */
 3:	add	%g4, 1 << TTE_SHIFT, %g4
 	andcc	%g4, (1 << (TSB_BUCKET_SHIFT + TTE_SHIFT)) - 1, %g0
 	bnz,pt	%xcc, 2b
 	 EMPTY
 
 	/*
 	 * See if we just checked the largest page size, and advance to the
 	 * next one if not.
 	 */
 	 cmp	%g2, TS_MAX
 	bne,pt	%xcc, 1b
 	 add	%g2, 1, %g2
 
 	/*
 	 * Not in user TSB, call C code.
 	 */
 	ba,a	%xcc, tl0_immu_miss_trap
 	.align	128
 	.endm
 
 ENTRY(tl0_immu_miss_set_ref)
 	/*
 	 * Set the reference bit.
 	 */
 	TTE_SET_REF(%g4, %g2, %g3, a, ASI_N)
 
 	/*
 	 * May have become invalid during casxa, in which case start over.
 	 */
 	brgez,pn %g2, 1f
 	 nop
 
 	/*
 	 * Load the TTE tag and data into the TLB and retry the instruction.
 	 */
 	stxa	%g1, [%g0 + AA_IMMU_TAR] %asi
 	stxa	%g2, [%g0] ASI_ITLB_DATA_IN_REG
 1:	retry
 END(tl0_immu_miss_set_ref)
 
 ENTRY(tl0_immu_miss_trap)
 	/*
 	 * Put back the contents of the tag access register, in case we
 	 * faulted.
 	 */
 	sethi	%hi(KERNBASE), %g2
 	stxa	%g1, [%g0 + AA_IMMU_TAR] %asi
 	flush	%g2
 
 	/*
 	 * Switch to alternate globals.
 	 */
 	wrpr	%g0, PSTATE_ALT, %pstate
 
 	/*
 	 * Reload the tag access register.
 	 */
 	ldxa	[%g0 + AA_IMMU_TAR] %asi, %g2
 
 	/*
 	 * Save the tag access register, and call common trap code.
 	 */
 	tl0_split
 	clr	%o1
 	set	trap, %o2
 	mov	%g2, %o3
 	ba	%xcc, tl0_utrap
 	 mov	T_INSTRUCTION_MISS, %o0
 END(tl0_immu_miss_trap)
 
 	.macro	tl0_dmmu_miss
 	/*
 	 * Load the context and the virtual page number from the tag access
 	 * register.  We ignore the context.
 	 */
 	wr	%g0, ASI_DMMU, %asi
 	ldxa	[%g0 + AA_DMMU_TAR] %asi, %g1
 
 	/*
 	 * Initialize the page size walker.
 	 */
 tl1_dmmu_miss_user:
 	mov	TS_MIN, %g2
 
 	/*
 	 * Loop over all supported page sizes.
 	 */
 
 	/*
 	 * Compute the page shift for the page size we are currently looking
 	 * for.
 	 */
 1:	add	%g2, %g2, %g3
 	add	%g3, %g2, %g3
 	add	%g3, PAGE_SHIFT, %g3
 
 	/*
 	 * Extract the virtual page number from the contents of the tag
 	 * access register.
 	 */
 	srlx	%g1, %g3, %g3
 
 	/*
 	 * Compute the TTE bucket address.
 	 */
 	ldxa	[%g0 + AA_DMMU_TSB] %asi, %g5
 	and	%g3, TSB_BUCKET_MASK, %g4
 	sllx	%g4, TSB_BUCKET_SHIFT + TTE_SHIFT, %g4
 	add	%g4, %g5, %g4
 
 	/*
 	 * Compute the TTE tag target.
 	 */
 	sllx	%g3, TV_SIZE_BITS, %g3
 	or	%g3, %g2, %g3
 
 	/*
 	 * Loop over the TTEs in this bucket.
 	 */
 
 	/*
 	 * Load the TTE.  Note that this instruction may fault, clobbering
 	 * the contents of the tag access register, %g5, %g6, and %g7.  We
 	 * do not use %g5, and %g6 and %g7 are not used until this instruction
 	 * completes successfully.
 	 */
 2:	ldda	[%g4] ASI_NUCLEUS_QUAD_LDD, %g6 /*, %g7 */
 
 	/*
 	 * Check that it's valid and that the virtual page numbers match.
 	 */
 	brgez,pn %g7, 3f
 	 cmp	%g3, %g6
 	bne,pn	%xcc, 3f
 	 EMPTY
 
 	/*
 	 * We matched a TTE, load the TLB.
 	 */
 
 	/*
 	 * Set the reference bit, if it's currently clear.
 	 */
 	 andcc	%g7, TD_REF, %g0
 	bz,a,pn	%xcc, tl0_dmmu_miss_set_ref
 	 nop
 
 	/*
 	 * Load the TTE tag and data into the TLB and retry the instruction.
 	 */
 	stxa	%g1, [%g0 + AA_DMMU_TAR] %asi
 	stxa	%g7, [%g0] ASI_DTLB_DATA_IN_REG
 	retry
 
 	/*
 	 * Advance to the next TTE in this bucket, and check the low bits
 	 * of the bucket pointer to see if we've finished the bucket.
 	 */
 3:	add	%g4, 1 << TTE_SHIFT, %g4
 	andcc	%g4, (1 << (TSB_BUCKET_SHIFT + TTE_SHIFT)) - 1, %g0
 	bnz,pt	%xcc, 2b
 	 EMPTY
 
 	/*
 	 * See if we just checked the largest page size, and advance to the
 	 * next one if not.
 	 */
 	 cmp	%g2, TS_MAX
 	bne,pt	%xcc, 1b
 	 add	%g2, 1, %g2
 
 	/*
 	 * Not in user TSB, call C code.
 	 */
 	ba,a	%xcc, tl0_dmmu_miss_trap
 	.align	128
 	.endm
 
 ENTRY(tl0_dmmu_miss_set_ref)
 	/*
 	 * Set the reference bit.
 	 */
 	TTE_SET_REF(%g4, %g2, %g3, a, ASI_N)
 
 	/*
 	 * May have become invalid during casxa, in which case start over.
 	 */
 	brgez,pn %g2, 1f
 	 nop
 
 	/*
 	 * Load the TTE tag and data into the TLB and retry the instruction.
 	 */
 	stxa	%g1, [%g0 + AA_DMMU_TAR] %asi
 	stxa	%g2, [%g0] ASI_DTLB_DATA_IN_REG
 1:	retry
 END(tl0_dmmu_miss_set_ref)
 
 ENTRY(tl0_dmmu_miss_trap)
 	/*
 	 * Put back the contents of the tag access register, in case we
 	 * faulted.
 	 */
 	stxa	%g1, [%g0 + AA_DMMU_TAR] %asi
 	membar	#Sync
 
 	/*
 	 * Switch to alternate globals.
 	 */
 	wrpr	%g0, PSTATE_ALT, %pstate
 
 	/*
 	 * Check if we actually came from the kernel.
 	 */
 	rdpr	%tl, %g1
 	cmp	%g1, 1
 	bgt,a,pn %xcc, 1f
 	 nop
 
 	/*
 	 * Reload the tag access register.
 	 */
 	ldxa	[%g0 + AA_DMMU_TAR] %asi, %g2
 
 	/*
 	 * Save the tag access register and call common trap code.
 	 */
 	tl0_split
 	clr	%o1
 	set	trap, %o2
 	mov	%g2, %o3
 	ba	%xcc, tl0_utrap
 	 mov	T_DATA_MISS, %o0
 
 	/*
 	 * Handle faults during window spill/fill.
 	 */
 1:	RESUME_SPILLFILL_MMU
 
 	/*
 	 * Reload the tag access register.
 	 */
 	ldxa	[%g0 + AA_DMMU_TAR] %asi, %g2
 
 	tl1_split
 	clr	%o1
 	set	trap, %o2
 	mov	%g2, %o3
 	ba	%xcc, tl1_trap
 	 mov	T_DATA_MISS | T_KERNEL, %o0
 END(tl0_dmmu_miss_trap)
 
 	.macro	tl0_dmmu_prot
 	ba,a	%xcc, tl0_dmmu_prot_1
 	 nop
 	.align	128
 	.endm
 
 ENTRY(tl0_dmmu_prot_1)
 	/*
 	 * Load the context and the virtual page number from the tag access
 	 * register.  We ignore the context.
 	 */
 	wr	%g0, ASI_DMMU, %asi
 	ldxa	[%g0 + AA_DMMU_TAR] %asi, %g1
 
 	/*
 	 * Initialize the page size walker.
 	 */
 tl1_dmmu_prot_user:
 	mov	TS_MIN, %g2
 
 	/*
 	 * Loop over all supported page sizes.
 	 */
 
 	/*
 	 * Compute the page shift for the page size we are currently looking
 	 * for.
 	 */
 1:	add	%g2, %g2, %g3
 	add	%g3, %g2, %g3
 	add	%g3, PAGE_SHIFT, %g3
 
 	/*
 	 * Extract the virtual page number from the contents of the tag
 	 * access register.
 	 */
 	srlx	%g1, %g3, %g3
 
 	/*
 	 * Compute the TTE bucket address.
 	 */
 	ldxa	[%g0 + AA_DMMU_TSB] %asi, %g5
 	and	%g3, TSB_BUCKET_MASK, %g4
 	sllx	%g4, TSB_BUCKET_SHIFT + TTE_SHIFT, %g4
 	add	%g4, %g5, %g4
 
 	/*
 	 * Compute the TTE tag target.
 	 */
 	sllx	%g3, TV_SIZE_BITS, %g3
 	or	%g3, %g2, %g3
 
 	/*
 	 * Loop over the TTEs in this bucket.
 	 */
 
 	/*
 	 * Load the TTE.  Note that this instruction may fault, clobbering
 	 * the contents of the tag access register, %g5, %g6, and %g7.  We
 	 * do not use %g5, and %g6 and %g7 are not used until this instruction
 	 * completes successfully.
 	 */
 2:	ldda	[%g4] ASI_NUCLEUS_QUAD_LDD, %g6 /*, %g7 */
 
 	/*
 	 * Check that it's valid and writable and that the virtual page
 	 * numbers match.
 	 */
 	brgez,pn %g7, 4f
 	 andcc	%g7, TD_SW, %g0
 	bz,pn	%xcc, 4f
 	 cmp	%g3, %g6
 	bne,pn	%xcc, 4f
 	 nop
 
 	/*
 	 * Set the hardware write bit.
 	 */
 	TTE_SET_W(%g4, %g2, %g3, a, ASI_N)
 
 	/*
 	 * Delete the old TLB entry and clear the SFSR.
 	 */
 	srlx	%g1, PAGE_SHIFT, %g3
 	sllx	%g3, PAGE_SHIFT, %g3
 	stxa	%g0, [%g3] ASI_DMMU_DEMAP
 	stxa	%g0, [%g0 + AA_DMMU_SFSR] %asi
 	membar	#Sync
 
 	/*
 	 * May have become invalid during casxa, in which case start over.
 	 */
 	brgez,pn %g2, 3f
 	 or	%g2, TD_W, %g2
 
 	/*
 	 * Load the TTE data into the TLB and retry the instruction.
 	 */
 	stxa	%g1, [%g0 + AA_DMMU_TAR] %asi
 	stxa	%g2, [%g0] ASI_DTLB_DATA_IN_REG
 3:	retry
 
 	/*
 	 * Check the low bits to see if we've finished the bucket.
 	 */
 4:	add	%g4, 1 << TTE_SHIFT, %g4
 	andcc	%g4, (1 << (TSB_BUCKET_SHIFT + TTE_SHIFT)) - 1, %g0
 	bnz,pt	%xcc, 2b
 	 EMPTY
 
 	/*
 	 * See if we just checked the largest page size, and advance to the
 	 * next one if not.
 	 */
 	 cmp	%g2, TS_MAX
 	bne,pt	%xcc, 1b
 	 add	%g2, 1, %g2
 
 	/*
 	 * Not in user TSB, call C code.
 	 */
 	ba,a	%xcc, tl0_dmmu_prot_trap
 	 nop
 END(tl0_dmmu_prot_1)
 
 ENTRY(tl0_dmmu_prot_trap)
 	/*
 	 * Put back the contents of the tag access register, in case we
 	 * faulted.
 	 */
 	stxa	%g1, [%g0 + AA_DMMU_TAR] %asi
 	membar	#Sync
 
 	/*
 	 * Switch to alternate globals.
 	 */
 	wrpr	%g0, PSTATE_ALT, %pstate
 
 	/*
 	 * Check if we actually came from the kernel.
 	 */
 	rdpr	%tl, %g1
 	cmp	%g1, 1
 	bgt,a,pn %xcc, 1f
 	 nop
 
 	/*
 	 * Load the SFAR, SFSR and TAR.
 	 */
 	ldxa	[%g0 + AA_DMMU_TAR] %asi, %g2
 	ldxa	[%g0 + AA_DMMU_SFAR] %asi, %g3
 	ldxa	[%g0 + AA_DMMU_SFSR] %asi, %g4
 	stxa	%g0, [%g0 + AA_DMMU_SFSR] %asi
 	membar	#Sync
 
 	/*
 	 * Save the MMU registers and call common trap code.
 	 */
 	tl0_split
 	clr	%o1
 	set	trap, %o2
 	mov	%g2, %o3
 	mov	%g3, %o4
 	mov	%g4, %o5
 	ba	%xcc, tl0_utrap
 	 mov	T_DATA_PROTECTION, %o0
 
 	/*
 	 * Handle faults during window spill/fill.
 	 */
 1:	RESUME_SPILLFILL_MMU_CLR_SFSR
 
 	/*
 	 * Load the SFAR, SFSR and TAR.  Clear the SFSR.
 	 */
 	ldxa	[%g0 + AA_DMMU_TAR] %asi, %g2
 	ldxa	[%g0 + AA_DMMU_SFAR] %asi, %g3
 	ldxa	[%g0 + AA_DMMU_SFSR] %asi, %g4
 	stxa	%g0, [%g0 + AA_DMMU_SFSR] %asi
 	membar	#Sync
 
 	tl1_split
 	clr	%o1
 	set	trap, %o2
 	mov	%g2, %o3
 	mov	%g3, %o4
 	mov	%g4, %o5
 	ba	%xcc, tl1_trap
 	 mov	T_DATA_PROTECTION | T_KERNEL, %o0
 END(tl0_dmmu_prot_trap)
 
 	.macro	tl0_spill_0_n
 	wr	%g0, ASI_AIUP, %asi
 	SPILL(stxa, %sp + SPOFF, 8, %asi)
 	saved
 	retry
 	.align	32
 	RSF_TRAP(T_SPILL)
 	RSF_TRAP(T_SPILL)
 	.endm
 
 	.macro	tl0_spill_1_n
 	wr	%g0, ASI_AIUP, %asi
 	SPILL(stwa, %sp, 4, %asi)
 	saved
 	retry
 	.align	32
 	RSF_TRAP(T_SPILL)
 	RSF_TRAP(T_SPILL)
 	.endm
 
 	.macro	tl0_fill_0_n
 	wr	%g0, ASI_AIUP, %asi
 	FILL(ldxa, %sp + SPOFF, 8, %asi)
 	restored
 	retry
 	.align	32
 	RSF_TRAP(T_FILL)
 	RSF_TRAP(T_FILL)
 	.endm
 
 	.macro	tl0_fill_1_n
 	wr	%g0, ASI_AIUP, %asi
 	FILL(lduwa, %sp, 4, %asi)
 	restored
 	retry
 	.align	32
 	RSF_TRAP(T_FILL)
 	RSF_TRAP(T_FILL)
 	.endm
 
 ENTRY(tl0_sftrap)
 	rdpr	%tstate, %g1
 	and	%g1, TSTATE_CWP_MASK, %g1
 	wrpr	%g1, 0, %cwp
 	tl0_split
 	clr	%o1
 	set	trap, %o2
 	ba	%xcc, tl0_trap
 	 mov	%g2, %o0
 END(tl0_sftrap)
 
 	.macro	tl0_spill_bad	count
 	.rept	\count
 	sir
 	.align	128
 	.endr
 	.endm
 
 	.macro	tl0_fill_bad	count
 	.rept	\count
 	sir
 	.align	128
 	.endr
 	.endm
 
 	.macro	tl0_syscall
 	tl0_split
 	clr	%o1
 	set	syscall, %o2
 	ba	%xcc, tl0_trap
 	 mov	T_SYSCALL, %o0
 	.align	32
 	.endm
 
 	.macro	tl0_fp_restore
 	ba,a	%xcc, tl0_fp_restore
 	 nop
 	.align	32
 	.endm
 
 ENTRY(tl0_fp_restore)
 	ldx	[PCB_REG + PCB_FLAGS], %g1
 	andn	%g1, PCB_FEF, %g1
 	stx	%g1, [PCB_REG + PCB_FLAGS]
 
 	wr	%g0, FPRS_FEF, %fprs
 	wr	%g0, ASI_BLK_S, %asi
 	ldda	[PCB_REG + PCB_UFP + (0 * 64)] %asi, %f0
 	ldda	[PCB_REG + PCB_UFP + (1 * 64)] %asi, %f16
 	ldda	[PCB_REG + PCB_UFP + (2 * 64)] %asi, %f32
 	ldda	[PCB_REG + PCB_UFP + (3 * 64)] %asi, %f48
 	membar	#Sync
 	done
 END(tl0_fp_restore)
 
 	.macro	tl1_insn_excptn
 	wrpr	%g0, PSTATE_ALT, %pstate
 	wr	%g0, ASI_IMMU, %asi
 	rdpr	%tpc, %g3
 	ldxa	[%g0 + AA_IMMU_SFSR] %asi, %g4
 	/*
 	 * XXX in theory, a store to AA_IMMU_SFSR must be immediately
 	 * followed by a DONE, FLUSH or RETRY for USIII.  In practice,
 	 * this triggers a RED state exception though.
 	 */
 	stxa	%g0, [%g0 + AA_IMMU_SFSR] %asi
 	membar	#Sync
 	ba	%xcc, tl1_insn_exceptn_trap
 	 mov	T_INSTRUCTION_EXCEPTION | T_KERNEL, %g2
 	.align	32
 	.endm
 
 ENTRY(tl1_insn_exceptn_trap)
 	tl1_split
 	clr	%o1
 	set	trap, %o2
 	mov	%g3, %o4
 	mov	%g4, %o5
 	ba	%xcc, tl1_trap
 	 mov	%g2, %o0
 END(tl1_insn_exceptn_trap)
 
 	.macro	tl1_fp_disabled
 	ba,a	%xcc, tl1_fp_disabled_1
 	 nop
 	.align	32
 	.endm
 
 ENTRY(tl1_fp_disabled_1)
 	rdpr	%tpc, %g1
 	set	fpu_fault_begin, %g2
 	sub	%g1, %g2, %g1
 	cmp	%g1, fpu_fault_size
 	bgeu,a,pn %xcc, 1f
 	 nop
 
 	wr	%g0, FPRS_FEF, %fprs
 	wr	%g0, ASI_BLK_S, %asi
 	ldda	[PCB_REG + PCB_KFP + (0 * 64)] %asi, %f0
 	ldda	[PCB_REG + PCB_KFP + (1 * 64)] %asi, %f16
 	ldda	[PCB_REG + PCB_KFP + (2 * 64)] %asi, %f32
 	ldda	[PCB_REG + PCB_KFP + (3 * 64)] %asi, %f48
 	membar	#Sync
 	retry
 
 1:	tl1_split
 	clr	%o1
 	set	trap, %o2
 	ba	%xcc, tl1_trap
 	 mov	T_FP_DISABLED | T_KERNEL, %o0
 END(tl1_fp_disabled_1)
 
 	.macro	tl1_data_excptn
 	wrpr	%g0, PSTATE_ALT, %pstate
 	ba,a	%xcc, tl1_data_excptn_trap
 	 nop
 	.align	32
 	.endm
 
 ENTRY(tl1_data_excptn_trap)
 	RESUME_SPILLFILL_MMU_CLR_SFSR
 	ba	%xcc, tl1_sfsr_trap
 	 mov	T_DATA_EXCEPTION | T_KERNEL, %g2
 END(tl1_data_excptn_trap)
 
 	.macro	tl1_align
 	wrpr	%g0, PSTATE_ALT, %pstate
 	ba,a	%xcc, tl1_align_trap
 	 nop
 	.align	32
 	.endm
 
 ENTRY(tl1_align_trap)
 	RESUME_SPILLFILL_ALIGN
 	ba	%xcc, tl1_sfsr_trap
 	 mov	T_MEM_ADDRESS_NOT_ALIGNED | T_KERNEL, %g2
 END(tl1_align_trap)
 
 ENTRY(tl1_sfsr_trap)
 	wr	%g0, ASI_DMMU, %asi
 	ldxa	[%g0 + AA_DMMU_SFAR] %asi, %g3
 	ldxa	[%g0 + AA_DMMU_SFSR] %asi, %g4
 	stxa	%g0, [%g0 + AA_DMMU_SFSR] %asi
 	membar	#Sync
 
 	tl1_split
 	clr	%o1
 	set	trap, %o2
 	mov	%g3, %o4
 	mov	%g4, %o5
 	ba	%xcc, tl1_trap
 	 mov	%g2, %o0
 END(tl1_sfsr_trap)
 
 	.macro	tl1_intr level, mask
 	tl1_split
 	set	\mask, %o1
 	ba	%xcc, tl1_intr
 	 mov	\level, %o0
 	.align	32
 	.endm
 
 	.macro	tl1_intr_level
 	INTR_LEVEL(1)
 	.endm
 
 	.macro	tl1_immu_miss
 	/*
 	 * Load the context and the virtual page number from the tag access
 	 * register.  We ignore the context.
 	 */
 	wr	%g0, ASI_IMMU, %asi
 	ldxa	[%g0 + AA_IMMU_TAR] %asi, %g5
 
 	/*
 	 * Compute the address of the TTE.  The TSB mask and address of the
 	 * TSB are patched at startup.
 	 */
 	.globl	tl1_immu_miss_patch_tsb_1
 tl1_immu_miss_patch_tsb_1:
 	sethi	%uhi(TSB_KERNEL), %g6
 	or	%g6, %ulo(TSB_KERNEL), %g6
 	sllx	%g6, 32, %g6
 	sethi	%hi(TSB_KERNEL), %g7
 	or	%g7, %g6, %g7
 	.globl	tl1_immu_miss_patch_tsb_mask_1
 tl1_immu_miss_patch_tsb_mask_1:
 	sethi	%hi(TSB_KERNEL_MASK), %g6
 	or	%g6, %lo(TSB_KERNEL_MASK), %g6
 
 	srlx	%g5, TAR_VPN_SHIFT, %g5
 	and	%g5, %g6, %g6
 	sllx	%g6, TTE_SHIFT, %g6
 	add	%g6, %g7, %g6
 
 	/*
 	 * Load the TTE.
 	 */
 	.globl	tl1_immu_miss_patch_quad_ldd_1
 tl1_immu_miss_patch_quad_ldd_1:
 	ldda	[%g6] TSB_QUAD_LDD, %g6 /*, %g7 */
 
 	/*
 	 * Check that it's valid and executable and that the virtual page
 	 * numbers match.
 	 */
 	brgez,pn %g7, tl1_immu_miss_trap
 	 andcc	%g7, TD_EXEC, %g0
 	bz,pn	%xcc, tl1_immu_miss_trap
 	 srlx	%g6, TV_SIZE_BITS, %g6
 	cmp	%g5, %g6
 	bne,pn	%xcc, tl1_immu_miss_trap
 	 EMPTY
 
 	/*
 	 * Set the reference bit if it's currently clear.
 	 */
 	 andcc	%g7, TD_REF, %g0
 	bz,a,pn	%xcc, tl1_immu_miss_set_ref
 	 nop
 
 	/*
 	 * Load the TTE data into the TLB and retry the instruction.
 	 */
 	stxa	%g7, [%g0] ASI_ITLB_DATA_IN_REG
 	retry
 	.align	128
 	.endm
 
 ENTRY(tl1_immu_miss_set_ref)
 	/*
 	 * Recompute the TTE address, which we clobbered loading the TTE.
 	 * The TSB mask and address of the TSB are patched at startup.
 	 */
 	.globl	tl1_immu_miss_patch_tsb_2
 tl1_immu_miss_patch_tsb_2:
 	sethi	%uhi(TSB_KERNEL), %g6
 	or	%g6, %ulo(TSB_KERNEL), %g6
 	sllx	%g6, 32, %g6
 	sethi	%hi(TSB_KERNEL), %g7
 	or	%g7, %g6, %g7
 	.globl	tl1_immu_miss_patch_tsb_mask_2
 tl1_immu_miss_patch_tsb_mask_2:
 	sethi	%hi(TSB_KERNEL_MASK), %g6
 	or	%g6, %lo(TSB_KERNEL_MASK), %g6
 
 	and	%g5, %g6, %g5
 	sllx	%g5, TTE_SHIFT, %g5
 	add	%g5, %g7, %g5
 
 	/*
 	 * Set the reference bit.
 	 */
 	.globl	tl1_immu_miss_patch_asi_1
 tl1_immu_miss_patch_asi_1:
 	wr	%g0, TSB_ASI, %asi
 	TTE_SET_REF(%g5, %g6, %g7, a, %asi)
 
 	/*
 	 * May have become invalid during casxa, in which case start over.
 	 */
 	brgez,pn %g6, 1f
 	 nop
 
 	/*
 	 * Load the TTE data into the TLB and retry the instruction.
 	 */
 	stxa	%g6, [%g0] ASI_ITLB_DATA_IN_REG
 1:	retry
 END(tl1_immu_miss_set_ref)
 
 ENTRY(tl1_immu_miss_trap)
 	/*
 	 * Switch to alternate globals.
 	 */
 	wrpr	%g0, PSTATE_ALT, %pstate
 
 	ldxa	[%g0 + AA_IMMU_TAR] %asi, %g2
 
 	tl1_split
 	clr	%o1
 	set	trap, %o2
 	mov	%g2, %o3
 	ba	%xcc, tl1_trap
 	 mov	T_INSTRUCTION_MISS | T_KERNEL, %o0
 END(tl1_immu_miss_trap)
 
 	.macro	tl1_dmmu_miss
 	/*
 	 * Load the context and the virtual page number from the tag access
 	 * register.
 	 */
 	wr	%g0, ASI_DMMU, %asi
 	ldxa	[%g0 + AA_DMMU_TAR] %asi, %g5
 
 	/*
 	 * Extract the context from the contents of the tag access register.
 	 * If it's non-zero this is a fault on a user address.  Note that the
 	 * faulting address is passed in %g1.
 	 */
 	sllx	%g5, 64 - TAR_VPN_SHIFT, %g6
 	brnz,a,pn %g6, tl1_dmmu_miss_user
 	 mov	%g5, %g1
 
 	/*
 	 * Check for the direct mapped physical region.  These addresses have
 	 * the high bit set so they are negative.
 	 */
 	brlz,pn %g5, tl1_dmmu_miss_direct
 	 EMPTY
 
 	/*
 	 * Compute the address of the TTE.  The TSB mask and address of the
 	 * TSB are patched at startup.
 	 */
 	.globl	tl1_dmmu_miss_patch_tsb_1
 tl1_dmmu_miss_patch_tsb_1:
 	sethi	%uhi(TSB_KERNEL), %g6
 	or	%g6, %ulo(TSB_KERNEL), %g6
 	sllx	%g6, 32, %g6
 	sethi	%hi(TSB_KERNEL), %g7
 	or	%g7, %g6, %g7
 	.globl	tl1_dmmu_miss_patch_tsb_mask_1
 tl1_dmmu_miss_patch_tsb_mask_1:
 	sethi	%hi(TSB_KERNEL_MASK), %g6
 	or	%g6, %lo(TSB_KERNEL_MASK), %g6
 
 	srlx	%g5, TAR_VPN_SHIFT, %g5
 	and	%g5, %g6, %g6
 	sllx	%g6, TTE_SHIFT, %g6
 	add	%g6, %g7, %g6
 
 	/*
 	 * Load the TTE.
 	 */
 	.globl	tl1_dmmu_miss_patch_quad_ldd_1
 tl1_dmmu_miss_patch_quad_ldd_1:
 	ldda	[%g6] TSB_QUAD_LDD, %g6 /*, %g7 */
 
 	/*
 	 * Check that it's valid and that the virtual page numbers match.
 	 */
 	brgez,pn %g7, tl1_dmmu_miss_trap
 	 srlx	%g6, TV_SIZE_BITS, %g6
 	cmp	%g5, %g6
 	bne,pn %xcc, tl1_dmmu_miss_trap
 	 EMPTY
 
 	/*
 	 * Set the reference bit if it's currently clear.
 	 */
 	 andcc	%g7, TD_REF, %g0
 	bz,a,pt	%xcc, tl1_dmmu_miss_set_ref
 	 nop
 
 	/*
 	 * Load the TTE data into the TLB and retry the instruction.
 	 */
 	stxa	%g7, [%g0] ASI_DTLB_DATA_IN_REG
 	retry
 	.align	128
 	.endm
 
 ENTRY(tl1_dmmu_miss_set_ref)
 	/*
 	 * Recompute the TTE address, which we clobbered loading the TTE.
 	 * The TSB mask and address of the TSB are patched at startup.
 	 */
 	.globl	tl1_dmmu_miss_patch_tsb_mask_2
 tl1_dmmu_miss_patch_tsb_2:
 	sethi	%uhi(TSB_KERNEL), %g6
 	or	%g6, %ulo(TSB_KERNEL), %g6
 	sllx	%g6, 32, %g6
 	sethi	%hi(TSB_KERNEL), %g7
 	or	%g7, %g6, %g7
 	.globl	tl1_dmmu_miss_patch_tsb_2
 tl1_dmmu_miss_patch_tsb_mask_2:
 	sethi	%hi(TSB_KERNEL_MASK), %g6
 	or	%g6, %lo(TSB_KERNEL_MASK), %g6
 
 	and	%g5, %g6, %g5
 	sllx	%g5, TTE_SHIFT, %g5
 	add	%g5, %g7, %g5
 
 	/*
 	 * Set the reference bit.
 	 */
 	.globl	tl1_dmmu_miss_patch_asi_1
 tl1_dmmu_miss_patch_asi_1:
 	wr	%g0, TSB_ASI, %asi
 	TTE_SET_REF(%g5, %g6, %g7, a, %asi)
 
 	/*
 	 * May have become invalid during casxa, in which case start over.
 	 */
 	brgez,pn %g6, 1f
 	 nop
 
 	/*
 	 * Load the TTE data into the TLB and retry the instruction.
 	 */
 	stxa	%g6, [%g0] ASI_DTLB_DATA_IN_REG
 1:	retry
 END(tl1_dmmu_miss_set_ref)
 
 ENTRY(tl1_dmmu_miss_trap)
 	/*
 	 * Switch to alternate globals.
 	 */
 	wrpr	%g0, PSTATE_ALT, %pstate
 
 	ldxa	[%g0 + AA_DMMU_TAR] %asi, %g2
 
 	KSTACK_CHECK
 
 	tl1_split
 	clr	%o1
 	set	trap, %o2
 	mov	%g2, %o3
 	ba	%xcc, tl1_trap
 	 mov	T_DATA_MISS | T_KERNEL, %o0
 END(tl1_dmmu_miss_trap)
 
 ENTRY(tl1_dmmu_miss_direct)
 	/*
 	 * Mask off the high bits of the virtual address to get the physical
 	 * address, and or in the TTE bits.  The virtual address bits that
 	 * correspond to the TTE valid and page size bits are left set, so
 	 * they don't have to be included in the TTE bits below.  We know they
 	 * are set because the virtual address is in the upper va hole.
 	 * NB: if we are taking advantage of the ASI_ATOMIC_QUAD_LDD_PHYS
 	 * and we get a miss on the directly accessed kernel TSB we must not
 	 * set TD_CV in order to access it uniformly bypassing the D$.
 	 */
 	setx	TLB_DIRECT_ADDRESS_MASK, %g7, %g4
 	and	%g5, %g4, %g4
 	setx	TLB_DIRECT_TO_TTE_MASK, %g7, %g6
 	and	%g5, %g6, %g5
 	.globl	tl1_dmmu_miss_direct_patch_tsb_phys_1
 tl1_dmmu_miss_direct_patch_tsb_phys_1:
 	sethi	%uhi(TSB_KERNEL_PHYS), %g3
 	or	%g3, %ulo(TSB_KERNEL_PHYS), %g3
 	sllx	%g3, 32, %g3
 	sethi	%hi(TSB_KERNEL_PHYS), %g3
 	or	%g7, %g3, %g7
 	cmp	%g4, %g7
 	bl,pt	%xcc, 1f
 	 or	%g5, TD_CP | TD_W, %g5
 	.globl	tl1_dmmu_miss_direct_patch_tsb_phys_end_1
 tl1_dmmu_miss_direct_patch_tsb_phys_end_1:
 	sethi	%uhi(TSB_KERNEL_PHYS_END), %g3
 	or	%g3, %ulo(TSB_KERNEL_PHYS_END), %g3
 	sllx	%g3, 32, %g3
 	sethi	%hi(TSB_KERNEL_PHYS_END), %g7
 	or	%g7, %g3, %g7
 	cmp	%g4, %g7
 	bg,a,pt	%xcc, 1f
 	 nop
 	ba,pt	%xcc, 2f
 	 nop
 1:	or	%g5, TD_CV, %g5
 
 	/*
 	 * Load the TTE data into the TLB and retry the instruction.
 	 */
 2:	stxa	%g5, [%g0] ASI_DTLB_DATA_IN_REG
 	retry
 END(tl1_dmmu_miss_direct)
 
 	.macro	tl1_dmmu_prot
 	ba,a	%xcc, tl1_dmmu_prot_1
 	 nop
 	.align	128
 	.endm
 
 ENTRY(tl1_dmmu_prot_1)
 	/*
 	 * Load the context and the virtual page number from the tag access
 	 * register.
 	 */
 	wr	%g0, ASI_DMMU, %asi
 	ldxa	[%g0 + AA_DMMU_TAR] %asi, %g5
 
 	/*
 	 * Extract the context from the contents of the tag access register.
 	 * If it's non-zero this is a fault on a user address.  Note that the
 	 * faulting address is passed in %g1.
 	 */
 	sllx	%g5, 64 - TAR_VPN_SHIFT, %g6
 	brnz,a,pn %g6, tl1_dmmu_prot_user
 	 mov	%g5, %g1
 
 	/*
 	 * Compute the address of the TTE.  The TSB mask and address of the
 	 * TSB are patched at startup.
 	 */
 	.globl	tl1_dmmu_prot_patch_tsb_1
 tl1_dmmu_prot_patch_tsb_1:
 	sethi	%uhi(TSB_KERNEL), %g6
 	or	%g6, %ulo(TSB_KERNEL), %g6
 	sllx	%g6, 32, %g6
 	sethi	%hi(TSB_KERNEL), %g7
 	or	%g7, %g6, %g7
 	.globl	tl1_dmmu_prot_patch_tsb_mask_1
 tl1_dmmu_prot_patch_tsb_mask_1:
 	sethi	%hi(TSB_KERNEL_MASK), %g6
 	or	%g6, %lo(TSB_KERNEL_MASK), %g6
 
 	srlx	%g5, TAR_VPN_SHIFT, %g5
 	and	%g5, %g6, %g6
 	sllx	%g6, TTE_SHIFT, %g6
 	add	%g6, %g7, %g6
 
 	/*
 	 * Load the TTE.
 	 */
 	.globl	tl1_dmmu_prot_patch_quad_ldd_1
 tl1_dmmu_prot_patch_quad_ldd_1:
 	ldda	[%g6] TSB_QUAD_LDD, %g6 /*, %g7 */
 
 	/*
 	 * Check that it's valid and writeable and that the virtual page
 	 * numbers match.
 	 */
 	brgez,pn %g7, tl1_dmmu_prot_trap
 	 andcc	%g7, TD_SW, %g0
 	bz,pn	%xcc, tl1_dmmu_prot_trap
 	 srlx	%g6, TV_SIZE_BITS, %g6
 	cmp	%g5, %g6
 	bne,pn	%xcc, tl1_dmmu_prot_trap
 	 EMPTY
 
 	/*
 	 * Delete the old TLB entry and clear the SFSR.
 	 */
 	 sllx	%g5, TAR_VPN_SHIFT, %g6
 	or	%g6, TLB_DEMAP_NUCLEUS, %g6
 	stxa	%g0, [%g6] ASI_DMMU_DEMAP
 	stxa	%g0, [%g0 + AA_DMMU_SFSR] %asi
 	membar	#Sync
 
 	/*
 	 * Recompute the TTE address, which we clobbered loading the TTE.
 	 * The TSB mask and address of the TSB are patched at startup.
 	 */
 	.globl	tl1_dmmu_prot_patch_tsb_2
 tl1_dmmu_prot_patch_tsb_2:
 	sethi	%uhi(TSB_KERNEL), %g6
 	or	%g6, %ulo(TSB_KERNEL), %g6
 	sllx	%g6, 32, %g6
 	sethi	%hi(TSB_KERNEL), %g7
 	or	%g7, %g6, %g7
 	.globl	tl1_dmmu_prot_patch_tsb_mask_2
 tl1_dmmu_prot_patch_tsb_mask_2:
 	sethi	%hi(TSB_KERNEL_MASK), %g6
 	or	%g6, %lo(TSB_KERNEL_MASK), %g6
 	and	%g5, %g6, %g5
 	sllx	%g5, TTE_SHIFT, %g5
 	add	%g5, %g7, %g5
 
 	/*
 	 * Set the hardware write bit.
 	 */
 	.globl	tl1_dmmu_prot_patch_asi_1
 tl1_dmmu_prot_patch_asi_1:
 	wr	%g0, TSB_ASI, %asi
 	TTE_SET_W(%g5, %g6, %g7, a, %asi)
 
 	/*
 	 * May have become invalid during casxa, in which case start over.
 	 */
 	brgez,pn %g6, 1f
 	 or	%g6, TD_W, %g6
 
 	/*
 	 * Load the TTE data into the TLB and retry the instruction.
 	 */
 	stxa	%g6, [%g0] ASI_DTLB_DATA_IN_REG
 1:	retry
 END(tl1_dmmu_prot_1)
 
 ENTRY(tl1_dmmu_prot_trap)
 	/*
 	 * Switch to alternate globals.
 	 */
 	wrpr	%g0, PSTATE_ALT, %pstate
 
 	/*
 	 * Load the SFAR, SFSR and TAR.  Clear the SFSR.
 	 */
 	ldxa	[%g0 + AA_DMMU_TAR] %asi, %g2
 	ldxa	[%g0 + AA_DMMU_SFAR] %asi, %g3
 	ldxa	[%g0 + AA_DMMU_SFSR] %asi, %g4
 	stxa	%g0, [%g0 + AA_DMMU_SFSR] %asi
 	membar	#Sync
 
 	tl1_split
 	clr	%o1
 	set	trap, %o2
 	mov	%g2, %o3
 	mov	%g3, %o4
 	mov	%g4, %o5
 	ba	%xcc, tl1_trap
 	 mov	T_DATA_PROTECTION | T_KERNEL, %o0
 END(tl1_dmmu_prot_trap)
 
 	.macro	tl1_spill_0_n
 	SPILL(stx, %sp + SPOFF, 8, EMPTY)
 	saved
 	retry
 	.align	32
 	RSF_FATAL(T_SPILL)
 	RSF_FATAL(T_SPILL)
 	.endm
 
 	.macro	tl1_spill_2_n
 	wr	%g0, ASI_AIUP, %asi
 	SPILL(stxa, %sp + SPOFF, 8, %asi)
 	saved
 	retry
 	.align	32
 	RSF_SPILL_TOPCB
 	RSF_SPILL_TOPCB
 	.endm
 
 	.macro	tl1_spill_3_n
 	wr	%g0, ASI_AIUP, %asi
 	SPILL(stwa, %sp, 4, %asi)
 	saved
 	retry
 	.align	32
 	RSF_SPILL_TOPCB
 	RSF_SPILL_TOPCB
 	.endm
 
 	.macro	tl1_spill_7_n
 	btst	1, %sp
 	bnz,a,pn %xcc, tl1_spill_0_n
 	 nop
 	srl	%sp, 0, %sp
 	SPILL(stw, %sp, 4, EMPTY)
 	saved
 	retry
 	.align	32
 	RSF_FATAL(T_SPILL)
 	RSF_FATAL(T_SPILL)
 	.endm
 
 	.macro	tl1_spill_0_o
 	wr	%g0, ASI_AIUP, %asi
 	SPILL(stxa, %sp + SPOFF, 8, %asi)
 	saved
 	retry
 	.align	32
 	RSF_SPILL_TOPCB
 	RSF_SPILL_TOPCB
 	.endm
 
 	.macro	tl1_spill_1_o
 	wr	%g0, ASI_AIUP, %asi
 	SPILL(stwa, %sp, 4, %asi)
 	saved
 	retry
 	.align	32
 	RSF_SPILL_TOPCB
 	RSF_SPILL_TOPCB
 	.endm
 
 	.macro	tl1_spill_2_o
 	RSF_SPILL_TOPCB
 	.align	128
 	.endm
 
 	.macro	tl1_fill_0_n
 	FILL(ldx, %sp + SPOFF, 8, EMPTY)
 	restored
 	retry
 	.align	32
 	RSF_FATAL(T_FILL)
 	RSF_FATAL(T_FILL)
 	.endm
 
 	.macro	tl1_fill_2_n
 	wr	%g0, ASI_AIUP, %asi
 	FILL(ldxa, %sp + SPOFF, 8, %asi)
 	restored
 	retry
 	.align 32
 	RSF_FILL_MAGIC
 	RSF_FILL_MAGIC
 	.endm
 
 	.macro	tl1_fill_3_n
 	wr	%g0, ASI_AIUP, %asi
 	FILL(lduwa, %sp, 4, %asi)
 	restored
 	retry
 	.align 32
 	RSF_FILL_MAGIC
 	RSF_FILL_MAGIC
 	.endm
 
 	.macro	tl1_fill_7_n
 	btst	1, %sp
 	bnz,a,pt %xcc, tl1_fill_0_n
 	 nop
 	srl	%sp, 0, %sp
 	FILL(lduw, %sp, 4, EMPTY)
 	restored
 	retry
 	.align	32
 	RSF_FATAL(T_FILL)
 	RSF_FATAL(T_FILL)
 	.endm
 
 /*
  * This is used to spill windows that are still occupied with user
  * data on kernel entry to the pcb.
  */
 ENTRY(tl1_spill_topcb)
 	wrpr	%g0, PSTATE_ALT, %pstate
 
 	/* Free some globals for our use. */
 	dec	24, ASP_REG
 	stx	%g1, [ASP_REG + 0]
 	stx	%g2, [ASP_REG + 8]
 	stx	%g3, [ASP_REG + 16]
 
 	ldx	[PCB_REG + PCB_NSAVED], %g1
 
 	sllx	%g1, PTR_SHIFT, %g2
 	add	%g2, PCB_REG, %g2
 	stx	%sp, [%g2 + PCB_RWSP]
 
 	sllx	%g1, RW_SHIFT, %g2
 	add	%g2, PCB_REG, %g2
 	SPILL(stx, %g2 + PCB_RW, 8, EMPTY)
 
 	inc	%g1
 	stx	%g1, [PCB_REG + PCB_NSAVED]
 
 #if KTR_COMPILE & KTR_TRAP
 	CATR(KTR_TRAP, "tl1_spill_topcb: pc=%#lx npc=%#lx sp=%#lx nsaved=%d"
 	   , %g1, %g2, %g3, 7, 8, 9)
 	rdpr	%tpc, %g2
 	stx	%g2, [%g1 + KTR_PARM1]
 	rdpr	%tnpc, %g2
 	stx	%g2, [%g1 + KTR_PARM2]
 	stx	%sp, [%g1 + KTR_PARM3]
 	ldx	[PCB_REG + PCB_NSAVED], %g2
 	stx	%g2, [%g1 + KTR_PARM4]
 9:
 #endif
 
 	saved
 
 	ldx	[ASP_REG + 16], %g3
 	ldx	[ASP_REG + 8], %g2
 	ldx	[ASP_REG + 0], %g1
 	inc	24, ASP_REG
 	retry
 END(tl1_spill_topcb)
 
 	.macro	tl1_spill_bad	count
 	.rept	\count
 	sir
 	.align	128
 	.endr
 	.endm
 
 	.macro	tl1_fill_bad	count
 	.rept	\count
 	sir
 	.align	128
 	.endr
 	.endm
 
 	.macro	tl1_soft	count
 	.rept	\count
 	tl1_gen	T_SOFT | T_KERNEL
 	.endr
 	.endm
 
 	.sect	.trap
 	.globl	tl_trap_begin
 tl_trap_begin:
 	nop
 
 	.align	0x8000
 	.globl	tl0_base
 
 tl0_base:
 	tl0_reserved	8				! 0x0-0x7
 tl0_insn_excptn:
 	tl0_insn_excptn					! 0x8
 	tl0_reserved	1				! 0x9
 tl0_insn_error:
 	tl0_gen		T_INSTRUCTION_ERROR		! 0xa
 	tl0_reserved	5				! 0xb-0xf
 tl0_insn_illegal:
 	tl0_gen		T_ILLEGAL_INSTRUCTION		! 0x10
 tl0_priv_opcode:
 	tl0_gen		T_PRIVILEGED_OPCODE		! 0x11
 	tl0_reserved	14				! 0x12-0x1f
 tl0_fp_disabled:
 	tl0_gen		T_FP_DISABLED			! 0x20
 tl0_fp_ieee:
 	tl0_gen		T_FP_EXCEPTION_IEEE_754		! 0x21
 tl0_fp_other:
 	tl0_gen		T_FP_EXCEPTION_OTHER		! 0x22
 tl0_tag_ovflw:
 	tl0_gen		T_TAG_OVERFLOW			! 0x23
 tl0_clean_window:
 	clean_window					! 0x24
 tl0_divide:
 	tl0_gen		T_DIVISION_BY_ZERO		! 0x28
 	tl0_reserved	7				! 0x29-0x2f
 tl0_data_excptn:
 	tl0_data_excptn					! 0x30
 	tl0_reserved	1				! 0x31
 tl0_data_error:
 	tl0_gen		T_DATA_ERROR			! 0x32
 	tl0_reserved	1				! 0x33
 tl0_align:
 	tl0_align					! 0x34
 tl0_align_lddf:
 	tl0_gen		T_RESERVED			! 0x35
 tl0_align_stdf:
 	tl0_gen		T_RESERVED			! 0x36
 tl0_priv_action:
 	tl0_gen		T_PRIVILEGED_ACTION		! 0x37
 	tl0_reserved	9				! 0x38-0x40
 tl0_intr_level:
 	tl0_intr_level					! 0x41-0x4f
 	tl0_reserved	16				! 0x50-0x5f
 tl0_intr_vector:
 	intr_vector					! 0x60
 tl0_watch_phys:
 	tl0_gen		T_PA_WATCHPOINT			! 0x61
 tl0_watch_virt:
 	tl0_gen		T_VA_WATCHPOINT			! 0x62
 tl0_ecc:
 	tl0_gen		T_CORRECTED_ECC_ERROR		! 0x63
 tl0_immu_miss:
 	tl0_immu_miss					! 0x64
 tl0_dmmu_miss:
 	tl0_dmmu_miss					! 0x68
 tl0_dmmu_prot:
 	tl0_dmmu_prot					! 0x6c
 	tl0_reserved	16				! 0x70-0x7f
 tl0_spill_0_n:
 	tl0_spill_0_n					! 0x80
 tl0_spill_1_n:
 	tl0_spill_1_n					! 0x84
 	tl0_spill_bad	14				! 0x88-0xbf
 tl0_fill_0_n:
 	tl0_fill_0_n					! 0xc0
 tl0_fill_1_n:
 	tl0_fill_1_n					! 0xc4
 	tl0_fill_bad	14				! 0xc8-0xff
 tl0_soft:
 	tl0_gen		T_SYSCALL			! 0x100
 	tl0_gen		T_BREAKPOINT			! 0x101
 	tl0_gen		T_DIVISION_BY_ZERO		! 0x102
 	tl0_reserved	1				! 0x103
 	tl0_gen		T_CLEAN_WINDOW			! 0x104
 	tl0_gen		T_RANGE_CHECK			! 0x105
 	tl0_gen		T_FIX_ALIGNMENT			! 0x106
 	tl0_gen		T_INTEGER_OVERFLOW		! 0x107
 	tl0_gen		T_SYSCALL			! 0x108
 	tl0_gen		T_SYSCALL			! 0x109
 	tl0_fp_restore					! 0x10a
 	tl0_reserved	5				! 0x10b-0x10f
 	tl0_gen		T_TRAP_INSTRUCTION_16		! 0x110
 	tl0_gen		T_TRAP_INSTRUCTION_17		! 0x111
 	tl0_gen		T_TRAP_INSTRUCTION_18		! 0x112
 	tl0_gen		T_TRAP_INSTRUCTION_19		! 0x113
 	tl0_gen		T_TRAP_INSTRUCTION_20		! 0x114
 	tl0_gen		T_TRAP_INSTRUCTION_21		! 0x115
 	tl0_gen		T_TRAP_INSTRUCTION_22		! 0x116
 	tl0_gen		T_TRAP_INSTRUCTION_23		! 0x117
 	tl0_gen		T_TRAP_INSTRUCTION_24		! 0x118
 	tl0_gen		T_TRAP_INSTRUCTION_25		! 0x119
 	tl0_gen		T_TRAP_INSTRUCTION_26		! 0x11a
 	tl0_gen		T_TRAP_INSTRUCTION_27		! 0x11b
 	tl0_gen		T_TRAP_INSTRUCTION_28		! 0x11c
 	tl0_gen		T_TRAP_INSTRUCTION_29		! 0x11d
 	tl0_gen		T_TRAP_INSTRUCTION_30		! 0x11e
 	tl0_gen		T_TRAP_INSTRUCTION_31		! 0x11f
 	tl0_reserved	32				! 0x120-0x13f
 	tl0_gen		T_SYSCALL			! 0x140
 	tl0_syscall					! 0x141
 	tl0_gen		T_SYSCALL			! 0x142
 	tl0_gen		T_SYSCALL			! 0x143
 	tl0_reserved	188				! 0x144-0x1ff
 
 tl1_base:
 	tl1_reserved	8				! 0x200-0x207
 tl1_insn_excptn:
 	tl1_insn_excptn					! 0x208
 	tl1_reserved	1				! 0x209
 tl1_insn_error:
 	tl1_gen		T_INSTRUCTION_ERROR		! 0x20a
 	tl1_reserved	5				! 0x20b-0x20f
 tl1_insn_illegal:
 	tl1_gen		T_ILLEGAL_INSTRUCTION		! 0x210
 tl1_priv_opcode:
 	tl1_gen		T_PRIVILEGED_OPCODE		! 0x211
 	tl1_reserved	14				! 0x212-0x21f
 tl1_fp_disabled:
 	tl1_fp_disabled					! 0x220
 tl1_fp_ieee:
 	tl1_gen		T_FP_EXCEPTION_IEEE_754		! 0x221
 tl1_fp_other:
 	tl1_gen		T_FP_EXCEPTION_OTHER		! 0x222
 tl1_tag_ovflw:
 	tl1_gen		T_TAG_OVERFLOW			! 0x223
 tl1_clean_window:
 	clean_window					! 0x224
 tl1_divide:
 	tl1_gen		T_DIVISION_BY_ZERO		! 0x228
 	tl1_reserved	7				! 0x229-0x22f
 tl1_data_excptn:
 	tl1_data_excptn					! 0x230
 	tl1_reserved	1				! 0x231
 tl1_data_error:
 	tl1_gen		T_DATA_ERROR			! 0x232
 	tl1_reserved	1				! 0x233
 tl1_align:
 	tl1_align					! 0x234
 tl1_align_lddf:
 	tl1_gen		T_RESERVED			! 0x235
 tl1_align_stdf:
 	tl1_gen		T_RESERVED			! 0x236
 tl1_priv_action:
 	tl1_gen		T_PRIVILEGED_ACTION		! 0x237
 	tl1_reserved	9				! 0x238-0x240
 tl1_intr_level:
 	tl1_intr_level					! 0x241-0x24f
 	tl1_reserved	16				! 0x250-0x25f
 tl1_intr_vector:
 	intr_vector					! 0x260
 tl1_watch_phys:
 	tl1_gen		T_PA_WATCHPOINT			! 0x261
 tl1_watch_virt:
 	tl1_gen		T_VA_WATCHPOINT			! 0x262
 tl1_ecc:
 	tl1_gen		T_CORRECTED_ECC_ERROR		! 0x263
 tl1_immu_miss:
 	tl1_immu_miss					! 0x264
 tl1_dmmu_miss:
 	tl1_dmmu_miss					! 0x268
 tl1_dmmu_prot:
 	tl1_dmmu_prot					! 0x26c
 	tl1_reserved	16				! 0x270-0x27f
 tl1_spill_0_n:
 	tl1_spill_0_n					! 0x280
 	tl1_spill_bad	1				! 0x284
 tl1_spill_2_n:
 	tl1_spill_2_n					! 0x288
 tl1_spill_3_n:
 	tl1_spill_3_n					! 0x28c
 	tl1_spill_bad	3				! 0x290-0x29b
 tl1_spill_7_n:
 	tl1_spill_7_n					! 0x29c
 tl1_spill_0_o:
 	tl1_spill_0_o					! 0x2a0
 tl1_spill_1_o:
 	tl1_spill_1_o					! 0x2a4
 tl1_spill_2_o:
 	tl1_spill_2_o					! 0x2a8
 	tl1_spill_bad	5				! 0x2ac-0x2bf
 tl1_fill_0_n:
 	tl1_fill_0_n					! 0x2c0
 	tl1_fill_bad	1				! 0x2c4
 tl1_fill_2_n:
 	tl1_fill_2_n					! 0x2c8
 tl1_fill_3_n:
 	tl1_fill_3_n					! 0x2cc
 	tl1_fill_bad	3				! 0x2d0-0x2db
 tl1_fill_7_n:
 	tl1_fill_7_n					! 0x2dc
 	tl1_fill_bad	8				! 0x2e0-0x2ff
 	tl1_reserved	1				! 0x300
 tl1_breakpoint:
 	tl1_gen		T_BREAKPOINT			! 0x301
 	tl1_gen		T_RSTRWP_PHYS			! 0x302
 	tl1_gen		T_RSTRWP_VIRT			! 0x303
 	tl1_reserved	252				! 0x304-0x3ff
 
 	.globl	tl_trap_end
 tl_trap_end:
 	nop
 
 /*
  * User trap entry point
  *
  * void tl0_utrap(u_long type, u_long o1, u_long o2, u_long tar, u_long sfar,
  *     u_long sfsr)
  *
  * This handles redirecting a trap back to usermode as a user trap.  The user
  * program must have first registered a trap handler with the kernel using
  * sysarch(SPARC_UTRAP_INSTALL).  The trap handler is passed enough state
  * for it to return to the trapping code directly, it will not return through
  * the kernel.  The trap type is passed in %o0, all out registers must be
  * passed through to tl0_trap or to usermode untouched.  Note that the
  * parameters passed in out registers may be used by the user trap handler.
  * Do not change the registers they are passed in or you will break the ABI.
  *
  * If the trap type allows user traps, setup state to execute the user trap
  * handler and bounce back to usermode, otherwise branch to tl0_trap.
  */
 ENTRY(tl0_utrap)
 	/*
 	 * Check if the trap type allows user traps.
 	 */
 	cmp	%o0, UT_MAX
 	bge,a,pt %xcc, tl0_trap
 	 nop
 
 	/*
 	 * Load the user trap handler from the utrap table.
 	 */
 	ldx	[PCPU(CURTHREAD)], %l0
 	ldx	[%l0 + TD_PROC], %l0
 	ldx	[%l0 + P_MD + MD_UTRAP], %l0
 	brz,pt	%l0, tl0_trap
 	 sllx	%o0, PTR_SHIFT, %l1
 	ldx	[%l0 + %l1], %l0
 	brz,a,pt %l0, tl0_trap
 	 nop
 
 	/*
 	 * If the save we did on entry to the kernel had to spill a window
 	 * to the pcb, pretend we took a spill trap instead.  Any windows
 	 * that are in the pcb must be copied out or the fill handler will
 	 * not be able to find them, since the user trap handler returns
 	 * directly to the trapping code.  Note that we only support precise
 	 * user traps, which implies that the condition that caused the trap
 	 * in the first place is still valid, so it will occur again when we
 	 * re-execute the trapping instruction.
 	 */
 	ldx	[PCB_REG + PCB_NSAVED], %l1
 	brnz,a,pn %l1, tl0_trap
 	 mov	T_SPILL, %o0
 
 	/*
 	 * Pass %fsr in %l4, %tstate in %l5, %tpc in %l6 and %tnpc in %l7.
 	 * The ABI specifies only %l6 and %l7, but we need to pass %fsr or
 	 * it may be clobbered by an interrupt before the user trap code
 	 * can read it, and we must pass %tstate in order to restore %ccr
 	 * and %asi.  The %fsr must be stored to memory, so we use the
 	 * temporary stack for that.
 	 */
 	rd	%fprs, %l1
 	or	%l1, FPRS_FEF, %l2
 	wr	%l2, 0, %fprs
 	dec	8, ASP_REG
 	stx	%fsr, [ASP_REG]
 	ldx	[ASP_REG], %l4
 	inc	8, ASP_REG
 	wr	%l1, 0, %fprs
 
 	rdpr	%tstate, %l5
 	rdpr	%tpc, %l6
 	rdpr	%tnpc, %l7
 
 	/*
 	 * Setup %tnpc to return to.
 	 */
 	wrpr	%l0, 0, %tnpc
 
 	/*
 	 * Setup %wstate for return, clear WSTATE_TRANSITION.
 	 */
 	rdpr	%wstate, %l1
 	and	%l1, WSTATE_NORMAL_MASK, %l1
 	wrpr	%l1, 0, %wstate
 
 	/*
 	 * Setup %tstate for return, change the saved cwp to point to the
 	 * current window instead of the window at the time of the trap.
 	 */
 	andn	%l5, TSTATE_CWP_MASK, %l1
 	rdpr	%cwp, %l2
 	wrpr	%l1, %l2, %tstate
 
 	/*
 	 * Setup %sp.  Userland processes will crash if this is not setup.
 	 */
 	sub	%fp, CCFSZ, %sp
 
 	/*
 	 * Execute the user trap handler.
 	 */
 	done
 END(tl0_utrap)
 
 /*
  * (Real) User trap entry point
  *
  * void tl0_trap(u_int type, u_long o1, u_long o2, u_long tar, u_long sfsr,
  *     u_int sfsr)
  *
  * The following setup has been performed:
  *	- the windows have been split and the active user window has been saved
  *	  (maybe just to the pcb)
  *	- we are on alternate globals and interrupts are disabled
  *
  * We switch to the kernel stack, build a trapframe, switch to normal
  * globals, enable interrupts and call trap.
  *
  * NOTE: We must be very careful setting up the per-cpu pointer.  We know that
  * it has been pre-set in alternate globals, so we read it from there and setup
  * the normal %g7 *before* enabling interrupts.  This avoids any possibility
  * of cpu migration and using the wrong pcpup.
  */
 ENTRY(tl0_trap)
 	/*
 	 * Force kernel store order.
 	 */
 	wrpr	%g0, PSTATE_ALT, %pstate
 
 	rdpr	%tstate, %l0
 	rdpr	%tpc, %l1
 	rdpr	%tnpc, %l2
 	rd	%y, %l3
 	rd	%fprs, %l4
 	rdpr	%wstate, %l5
 
 #if KTR_COMPILE & KTR_TRAP
 	CATR(KTR_TRAP,
 	    "tl0_trap: td=%p type=%#x pil=%#lx pc=%#lx npc=%#lx sp=%#lx"
 	    , %g1, %g2, %g3, 7, 8, 9)
 	ldx	[PCPU(CURTHREAD)], %g2
 	stx	%g2, [%g1 + KTR_PARM1]
 	stx	%o0, [%g1 + KTR_PARM2]
 	rdpr	%pil, %g2
 	stx	%g2, [%g1 + KTR_PARM3]
 	stx	%l1, [%g1 + KTR_PARM4]
 	stx	%l2, [%g1 + KTR_PARM5]
 	stx	%i6, [%g1 + KTR_PARM6]
 9:
 #endif
 
 1:	and	%l5, WSTATE_NORMAL_MASK, %l5
 	sllx	%l5, WSTATE_OTHER_SHIFT, %l5
 	wrpr	%l5, WSTATE_KERNEL, %wstate
 	rdpr	%canrestore, %l6
 	wrpr	%l6, 0, %otherwin
 	wrpr	%g0, 0, %canrestore
 
 	sub	PCB_REG, SPOFF + CCFSZ + TF_SIZEOF, %sp
 
 	stx	%o0, [%sp + SPOFF + CCFSZ + TF_TYPE]
 	stx	%o1, [%sp + SPOFF + CCFSZ + TF_LEVEL]
 	stx	%o3, [%sp + SPOFF + CCFSZ + TF_TAR]
 	stx	%o4, [%sp + SPOFF + CCFSZ + TF_SFAR]
 	stx	%o5, [%sp + SPOFF + CCFSZ + TF_SFSR]
 
 	stx	%l0, [%sp + SPOFF + CCFSZ + TF_TSTATE]
 	stx	%l1, [%sp + SPOFF + CCFSZ + TF_TPC]
 	stx	%l2, [%sp + SPOFF + CCFSZ + TF_TNPC]
 	stx	%l3, [%sp + SPOFF + CCFSZ + TF_Y]
 	stx	%l4, [%sp + SPOFF + CCFSZ + TF_FPRS]
 	stx	%l5, [%sp + SPOFF + CCFSZ + TF_WSTATE]
 
 	wr	%g0, FPRS_FEF, %fprs
 	stx	%fsr, [%sp + SPOFF + CCFSZ + TF_FSR]
 	rd	%gsr, %l6
 	stx	%l6, [%sp + SPOFF + CCFSZ + TF_GSR]
 	wr	%g0, 0, %fprs
 
 	mov	PCB_REG, %l0
 	mov	PCPU_REG, %l1
 	wrpr	%g0, PSTATE_NORMAL, %pstate
 
 	stx	%g6, [%sp + SPOFF + CCFSZ + TF_G6]
 	stx	%g7, [%sp + SPOFF + CCFSZ + TF_G7]
 
 	mov	%l0, PCB_REG
 	mov	%l1, PCPU_REG
 	wrpr	%g0, PSTATE_KERNEL, %pstate
 
 	stx	%i0, [%sp + SPOFF + CCFSZ + TF_O0]
 	stx	%i1, [%sp + SPOFF + CCFSZ + TF_O1]
 	stx	%i2, [%sp + SPOFF + CCFSZ + TF_O2]
 	stx	%i3, [%sp + SPOFF + CCFSZ + TF_O3]
 	stx	%i4, [%sp + SPOFF + CCFSZ + TF_O4]
 	stx	%i5, [%sp + SPOFF + CCFSZ + TF_O5]
 	stx	%i6, [%sp + SPOFF + CCFSZ + TF_O6]
 	stx	%i7, [%sp + SPOFF + CCFSZ + TF_O7]
 
 	stx	%g1, [%sp + SPOFF + CCFSZ + TF_G1]
 	stx	%g2, [%sp + SPOFF + CCFSZ + TF_G2]
 	stx	%g3, [%sp + SPOFF + CCFSZ + TF_G3]
 	stx	%g4, [%sp + SPOFF + CCFSZ + TF_G4]
 	stx	%g5, [%sp + SPOFF + CCFSZ + TF_G5]
 
 	set	tl0_ret - 8, %o7
 	jmpl	%o2, %g0
 	 add	%sp, CCFSZ + SPOFF, %o0
 END(tl0_trap)
 
 /*
  * void tl0_intr(u_int level, u_int mask)
  */
 ENTRY(tl0_intr)
 	/*
 	 * Force kernel store order.
 	 */
 	wrpr	%g0, PSTATE_ALT, %pstate
 
 	rdpr	%tstate, %l0
 	rdpr	%tpc, %l1
 	rdpr	%tnpc, %l2
 	rd	%y, %l3
 	rd	%fprs, %l4
 	rdpr	%wstate, %l5
 
 #if KTR_COMPILE & KTR_INTR
 	CATR(KTR_INTR,
 	    "tl0_intr: td=%p level=%#x pil=%#lx pc=%#lx npc=%#lx sp=%#lx"
 	    , %g1, %g2, %g3, 7, 8, 9)
 	ldx	[PCPU(CURTHREAD)], %g2
 	stx	%g2, [%g1 + KTR_PARM1]
 	stx	%o0, [%g1 + KTR_PARM2]
 	rdpr	%pil, %g2
 	stx	%g2, [%g1 + KTR_PARM3]
 	stx	%l1, [%g1 + KTR_PARM4]
 	stx	%l2, [%g1 + KTR_PARM5]
 	stx	%i6, [%g1 + KTR_PARM6]
 9:
 #endif
 
 	wrpr	%o0, 0, %pil
 	wr	%o1, 0, %clear_softint
 
 	and	%l5, WSTATE_NORMAL_MASK, %l5
 	sllx	%l5, WSTATE_OTHER_SHIFT, %l5
 	wrpr	%l5, WSTATE_KERNEL, %wstate
 	rdpr	%canrestore, %l6
 	wrpr	%l6, 0, %otherwin
 	wrpr	%g0, 0, %canrestore
 
 	sub	PCB_REG, SPOFF + CCFSZ + TF_SIZEOF, %sp
 
 	stx	%l0, [%sp + SPOFF + CCFSZ + TF_TSTATE]
 	stx	%l1, [%sp + SPOFF + CCFSZ + TF_TPC]
 	stx	%l2, [%sp + SPOFF + CCFSZ + TF_TNPC]
 	stx	%l3, [%sp + SPOFF + CCFSZ + TF_Y]
 	stx	%l4, [%sp + SPOFF + CCFSZ + TF_FPRS]
 	stx	%l5, [%sp + SPOFF + CCFSZ + TF_WSTATE]
 
 	wr	%g0, FPRS_FEF, %fprs
 	stx	%fsr, [%sp + SPOFF + CCFSZ + TF_FSR]
 	rd	%gsr, %l6
 	stx	%l6, [%sp + SPOFF + CCFSZ + TF_GSR]
 	wr	%g0, 0, %fprs
 
 	mov	%o0, %l3
 	mov	T_INTERRUPT, %o1
 
 	stx	%o0, [%sp + SPOFF + CCFSZ + TF_LEVEL]
 	stx	%o1, [%sp + SPOFF + CCFSZ + TF_TYPE]
 
 	mov	PCB_REG, %l0
 	mov	PCPU_REG, %l1
 	wrpr	%g0, PSTATE_NORMAL, %pstate
 
 	stx	%g1, [%sp + SPOFF + CCFSZ + TF_G1]
 	stx	%g2, [%sp + SPOFF + CCFSZ + TF_G2]
 	stx	%g3, [%sp + SPOFF + CCFSZ + TF_G3]
 	stx	%g4, [%sp + SPOFF + CCFSZ + TF_G4]
 	stx	%g5, [%sp + SPOFF + CCFSZ + TF_G5]
 	stx	%g6, [%sp + SPOFF + CCFSZ + TF_G6]
 	stx	%g7, [%sp + SPOFF + CCFSZ + TF_G7]
 
 	mov	%l0, PCB_REG
 	mov	%l1, PCPU_REG
 	wrpr	%g0, PSTATE_KERNEL, %pstate
 
 	stx	%i0, [%sp + SPOFF + CCFSZ + TF_O0]
 	stx	%i1, [%sp + SPOFF + CCFSZ + TF_O1]
 	stx	%i2, [%sp + SPOFF + CCFSZ + TF_O2]
 	stx	%i3, [%sp + SPOFF + CCFSZ + TF_O3]
 	stx	%i4, [%sp + SPOFF + CCFSZ + TF_O4]
 	stx	%i5, [%sp + SPOFF + CCFSZ + TF_O5]
 	stx	%i6, [%sp + SPOFF + CCFSZ + TF_O6]
 	stx	%i7, [%sp + SPOFF + CCFSZ + TF_O7]
 
 	SET(intr_handlers, %l1, %l0)
 	sllx	%l3, IH_SHIFT, %l1
 	ldx	[%l0 + %l1], %l1
 	KASSERT(%l1, "tl0_intr: ih null")
 	call	%l1
 	 add	%sp, CCFSZ + SPOFF, %o0
 
 	/* %l3 contains PIL */
 	SET(intrcnt, %l1, %l2)
 	prefetcha [%l2] ASI_N, 1
 	SET(pil_countp, %l1, %l0)
 	sllx	%l3, 1, %l1
 	lduh	[%l0 + %l1], %l0
 	sllx	%l0, 3, %l0
 	add	%l0, %l2, %l0
 	ldx	[%l0], %l1
 	inc	%l1
 	stx	%l1, [%l0]
 
 	lduw	[PCPU(CNT) + V_INTR], %l0
 	inc	%l0
 	stw	%l0, [PCPU(CNT) + V_INTR]
 
 	ba,a	%xcc, tl0_ret
 	 nop
 END(tl0_intr)
 
 /*
  * Initiate return to usermode.
  *
  * Called with a trapframe on the stack.  The window that was setup in
  * tl0_trap may have been used by "fast" trap handlers that pretend to be
  * leaf functions, so all ins and locals may have been clobbered since
  * then.
  *
  * This code is rather long and complicated.
  */
 ENTRY(tl0_ret)
 	/*
 	 * Check for pending asts atomically with returning.  We must raise
 	 * the PIL before checking, and if no asts are found the PIL must
 	 * remain raised until the retry is executed, or we risk missing asts
 	 * caused by interrupts occurring after the test.  If the PIL is
 	 * lowered, as it is when we call ast, the check must be re-executed.
 	 */
 	wrpr	%g0, PIL_TICK, %pil
 	ldx	[PCPU(CURTHREAD)], %l0
 	lduw	[%l0 + TD_FLAGS], %l1
 	set	TDF_ASTPENDING | TDF_NEEDRESCHED, %l2
 	and	%l1, %l2, %l1
 	brz,a,pt %l1, 1f
 	 nop
 
 	/*
 	 * We have an AST.  Re-enable interrupts and handle it, then restart
 	 * the return sequence.
 	 */
 	wrpr	%g0, 0, %pil
 	call	ast
 	 add	%sp, CCFSZ + SPOFF, %o0
 	ba,a	%xcc, tl0_ret
 	 nop
 
 	/*
 	 * Check for windows that were spilled to the pcb and need to be
 	 * copied out.  This must be the last thing that is done before the
 	 * return to usermode.  If there are still user windows in the cpu
 	 * and we call a nested function after this, which causes them to be
 	 * spilled to the pcb, they will not be copied out and the stack will
 	 * be inconsistent.
 	 */
 1:	ldx	[PCB_REG + PCB_NSAVED], %l1
 	brz,a,pt %l1, 2f
 	 nop
 	wrpr	%g0, 0, %pil
 	mov	T_SPILL, %o0
 	stx	%o0, [%sp + SPOFF + CCFSZ + TF_TYPE]
 	call	trap
 	 add	%sp, SPOFF + CCFSZ, %o0
 	ba,a	%xcc, tl0_ret
 	 nop
 
 	/*
 	 * Restore the out and most global registers from the trapframe.
 	 * The ins will become the outs when we restore below.
 	 */
 2:	ldx	[%sp + SPOFF + CCFSZ + TF_O0], %i0
 	ldx	[%sp + SPOFF + CCFSZ + TF_O1], %i1
 	ldx	[%sp + SPOFF + CCFSZ + TF_O2], %i2
 	ldx	[%sp + SPOFF + CCFSZ + TF_O3], %i3
 	ldx	[%sp + SPOFF + CCFSZ + TF_O4], %i4
 	ldx	[%sp + SPOFF + CCFSZ + TF_O5], %i5
 	ldx	[%sp + SPOFF + CCFSZ + TF_O6], %i6
 	ldx	[%sp + SPOFF + CCFSZ + TF_O7], %i7
 
 	ldx	[%sp + SPOFF + CCFSZ + TF_G1], %g1
 	ldx	[%sp + SPOFF + CCFSZ + TF_G2], %g2
 	ldx	[%sp + SPOFF + CCFSZ + TF_G3], %g3
 	ldx	[%sp + SPOFF + CCFSZ + TF_G4], %g4
 	ldx	[%sp + SPOFF + CCFSZ + TF_G5], %g5
 
 	/*
 	 * Load everything we need to restore below before disabling
 	 * interrupts.
 	 */
 	ldx	[%sp + SPOFF + CCFSZ + TF_FPRS], %l0
 	ldx	[%sp + SPOFF + CCFSZ + TF_GSR], %l1
 	ldx	[%sp + SPOFF + CCFSZ + TF_TNPC], %l2
 	ldx	[%sp + SPOFF + CCFSZ + TF_TPC], %l3
 	ldx	[%sp + SPOFF + CCFSZ + TF_TSTATE], %l4
 	ldx	[%sp + SPOFF + CCFSZ + TF_Y], %l5
 	ldx	[%sp + SPOFF + CCFSZ + TF_WSTATE], %l6
 
 	/*
 	 * Disable interrupts to restore the special globals.  They are not
 	 * saved and restored for all kernel traps, so an interrupt at the
 	 * wrong time would clobber them.
 	 */
 	wrpr	%g0, PSTATE_NORMAL, %pstate
 
 	ldx	[%sp + SPOFF + CCFSZ + TF_G6], %g6
 	ldx	[%sp + SPOFF + CCFSZ + TF_G7], %g7
 
 	/*
 	 * Switch to alternate globals.  This frees up some registers we
 	 * can use after the restore changes our window.
 	 */
 	wrpr	%g0, PSTATE_ALT, %pstate
 
 	/*
 	 * Drop %pil to zero.  It must have been zero at the time of the
 	 * trap, since we were in usermode, but it was raised above in
 	 * order to check for asts atomically.  We have interrupts disabled
 	 * so any interrupts will not be serviced until we complete the
 	 * return to usermode.
 	 */
 	wrpr	%g0, 0, %pil
 
 	/*
 	 * Save %fprs in an alternate global so it can be restored after the
 	 * restore instruction below.  If we restore it before the restore,
 	 * and the restore traps we may run for a while with floating point
 	 * enabled in the kernel, which we want to avoid.
 	 */
 	mov	%l0, %g1
 
 	/*
 	 * Restore %fsr and %gsr.  These need floating point enabled in %fprs,
 	 * so we set it temporarily and then clear it.
 	 */
 	wr	%g0, FPRS_FEF, %fprs
 	ldx	[%sp + SPOFF + CCFSZ + TF_FSR], %fsr
 	wr	%l1, 0, %gsr
 	wr	%g0, 0, %fprs
 
 	/*
 	 * Restore program counters.  This could be done after the restore
 	 * but we're out of alternate globals to store them in...
 	 */
 	wrpr	%l2, 0, %tnpc
 	wrpr	%l3, 0, %tpc
 
 	/*
 	 * Save %tstate in an alternate global and clear the %cwp field.  %cwp
 	 * will be affected by the restore below and we need to make sure it
 	 * points to the current window at that time, not the window that was
 	 * active at the time of the trap.
 	 */
 	andn	%l4, TSTATE_CWP_MASK, %g2
 
 	/*
-	 * Save %y in an alternate global.
+	 * Restore %y.  Could also be below if we had more alternate globals.
 	 */
-	mov	%l5, %g4
+	wr	%l5, 0, %y
 
 	/*
 	 * Setup %wstate for return.  We need to restore the user window state
 	 * which we saved in wstate.other when we trapped.  We also need to
 	 * set the transition bit so the restore will be handled specially
 	 * if it traps, use the xor feature of wrpr to do that.
 	 */
 	srlx	%l6, WSTATE_OTHER_SHIFT, %g3
 	wrpr	%g3, WSTATE_TRANSITION, %wstate
 
 	/*
 	 * Setup window management registers for return.  If not all user
 	 * windows were spilled in the kernel %otherwin will be non-zero,
 	 * so we need to transfer it to %canrestore to correctly restore
 	 * those windows.  Otherwise everything gets set to zero and the
 	 * restore below will fill a window directly from the user stack.
 	 */
 	rdpr	%otherwin, %o0
 	wrpr	%o0, 0, %canrestore
 	wrpr	%g0, 0, %otherwin
 	wrpr	%o0, 0, %cleanwin
 
 	/*
 	 * Now do the restore.  If this instruction causes a fill trap which
 	 * fails to fill a window from the user stack, we will resume at
 	 * tl0_ret_fill_end and call back into the kernel.
 	 */
 	restore
 tl0_ret_fill:
 
 	/*
 	 * We made it.  We're back in the window that was active at the time
 	 * of the trap, and ready to return to usermode.
 	 */
 
 	/*
 	 * Restore %frps.  This was saved in an alternate global above.
 	 */
 	wr	%g1, 0, %fprs
 
 	/*
 	 * Fixup %tstate so the saved %cwp points to the current window and
 	 * restore it.
 	 */
-	rdpr	%cwp, %g1
-	wrpr	%g2, %g1, %tstate
+	rdpr	%cwp, %g4
+	wrpr	%g2, %g4, %tstate
 
 	/*
 	 * Restore the user window state.  The transition bit was set above
 	 * for special handling of the restore, this clears it.
 	 */
 	wrpr	%g3, 0, %wstate
 
 #if KTR_COMPILE & KTR_TRAP
 	CATR(KTR_TRAP, "tl0_ret: td=%#lx pil=%#lx pc=%#lx npc=%#lx sp=%#lx"
-	    , %g1, %g2, %g3, 7, 8, 9)
-	ldx	[PCPU(CURTHREAD)], %g2
-	stx	%g2, [%g1 + KTR_PARM1]
-	rdpr	%pil, %g2
-	stx	%g2, [%g1 + KTR_PARM2]
-	rdpr	%tpc, %g2
-	stx	%g2, [%g1 + KTR_PARM3]
-	rdpr	%tnpc, %g2
-	stx	%g2, [%g1 + KTR_PARM4]
-	stx	%sp, [%g1 + KTR_PARM5]
+	    , %g2, %g3, %g4, 7, 8, 9)
+	ldx	[PCPU(CURTHREAD)], %g3
+	stx	%g3, [%g2 + KTR_PARM1]
+	rdpr	%pil, %g3
+	stx	%g3, [%g2 + KTR_PARM2]
+	rdpr	%tpc, %g3
+	stx	%g3, [%g2 + KTR_PARM3]
+	rdpr	%tnpc, %g3
+	stx	%g3, [%g2 + KTR_PARM4]
+	stx	%sp, [%g2 + KTR_PARM5]
 9:
 #endif
 
 	/*
-	 * Restore %y.  Note that the CATR above clobbered it.
-	 */
-	wr	%g4, 0, %y
-
-	/*
 	 * Return to usermode.
 	 */
 	retry
 tl0_ret_fill_end:
 
 #if KTR_COMPILE & KTR_TRAP
 	CATR(KTR_TRAP, "tl0_ret: fill magic ps=%#lx ws=%#lx sp=%#lx"
 	    , %l0, %l1, %l2, 7, 8, 9)
 	rdpr	%pstate, %l1
 	stx	%l1, [%l0 + KTR_PARM1]
 	stx	%l6, [%l0 + KTR_PARM2]
 	stx	%sp, [%l0 + KTR_PARM3]
 9:
-
-	/*
-	 * Restore %y clobbered by the CATR.  This was saved in %l5 above.
-	 */
-	wr	%l5, 0, %y
 #endif
 
 	/*
 	 * The restore above caused a fill trap and the fill handler was
 	 * unable to fill a window from the user stack.  The special fill
 	 * handler recognized this and punted, sending us here.  We need
 	 * to carefully undo any state that was restored before the restore
 	 * was executed and call trap again.  Trap will copyin a window
 	 * from the user stack which will fault in the page we need so the
 	 * restore above will succeed when we try again.  If this fails
 	 * the process has trashed its stack, so we kill it.
 	 */
 
 	/*
 	 * Restore the kernel window state.  This was saved in %l6 above, and
 	 * since the restore failed we're back in the same window.
 	 */
 	wrpr	%l6, 0, %wstate
 
 	/*
 	 * Restore the normal globals which have predefined values in the
 	 * kernel.  We clobbered them above restoring the user's globals
 	 * so this is very important.
 	 * XXX PSTATE_ALT must already be set.
 	 */
 	wrpr	%g0, PSTATE_ALT, %pstate
 	mov	PCB_REG, %o0
 	mov	PCPU_REG, %o1
 	wrpr	%g0, PSTATE_NORMAL, %pstate
 	mov	%o0, PCB_REG
 	mov	%o1, PCPU_REG
 	wrpr	%g0, PSTATE_KERNEL, %pstate
 
 	/*
 	 * Simulate a fill trap and then start the whole return sequence over
 	 * again.  This is special because it only copies in 1 window, not 2
 	 * as we would for a normal failed fill.  This may be the first time
 	 * the process has been run, so there may not be 2 windows worth of
 	 * stack to copyin.
 	 */
 	mov	T_FILL_RET, %o0
 	stx	%o0, [%sp + SPOFF + CCFSZ + TF_TYPE]
 	call	trap
 	 add	%sp, SPOFF + CCFSZ, %o0
 	ba,a	%xcc, tl0_ret
 	 nop
 END(tl0_ret)
 
 /*
  * Kernel trap entry point
  *
  * void tl1_trap(u_int type, u_long o1, u_long o2, u_long tar, u_long sfar,
  *     u_int sfsr)
  *
  * This is easy because the stack is already setup and the windows don't need
  * to be split.  We build a trapframe and call trap(), the same as above, but
  * the outs don't need to be saved.
  */
 ENTRY(tl1_trap)
 	rdpr	%tstate, %l0
 	rdpr	%tpc, %l1
 	rdpr	%tnpc, %l2
 	rdpr	%pil, %l3
 	rd	%y, %l4
 	rdpr	%wstate, %l5
 
 #if KTR_COMPILE & KTR_TRAP
 	CATR(KTR_TRAP, "tl1_trap: td=%p type=%#lx pil=%#lx pc=%#lx sp=%#lx"
 	    , %g1, %g2, %g3, 7, 8, 9)
 	ldx	[PCPU(CURTHREAD)], %g2
 	stx	%g2, [%g1 + KTR_PARM1]
 	stx	%o0, [%g1 + KTR_PARM2]
 	stx	%l3, [%g1 + KTR_PARM3]
 	stx	%l1, [%g1 + KTR_PARM4]
 	stx	%i6, [%g1 + KTR_PARM5]
 9:
 #endif
 
 	wrpr	%g0, 1, %tl
 
 	and	%l5, WSTATE_OTHER_MASK, %l5
 	wrpr	%l5, WSTATE_KERNEL, %wstate
 
 	stx	%o0, [%sp + SPOFF + CCFSZ + TF_TYPE]
 	stx	%o1, [%sp + SPOFF + CCFSZ + TF_LEVEL]
 	stx	%o3, [%sp + SPOFF + CCFSZ + TF_TAR]
 	stx	%o4, [%sp + SPOFF + CCFSZ + TF_SFAR]
 	stx	%o5, [%sp + SPOFF + CCFSZ + TF_SFSR]
 
 	stx	%l0, [%sp + SPOFF + CCFSZ + TF_TSTATE]
 	stx	%l1, [%sp + SPOFF + CCFSZ + TF_TPC]
 	stx	%l2, [%sp + SPOFF + CCFSZ + TF_TNPC]
 	stx	%l3, [%sp + SPOFF + CCFSZ + TF_PIL]
 	stx	%l4, [%sp + SPOFF + CCFSZ + TF_Y]
 
 	mov	PCB_REG, %l0
 	mov	PCPU_REG, %l1
 	wrpr	%g0, PSTATE_NORMAL, %pstate
 
 	stx	%g6, [%sp + SPOFF + CCFSZ + TF_G6]
 	stx	%g7, [%sp + SPOFF + CCFSZ + TF_G7]
 
 	mov	%l0, PCB_REG
 	mov	%l1, PCPU_REG
 	wrpr	%g0, PSTATE_KERNEL, %pstate
 
 	stx	%i0, [%sp + SPOFF + CCFSZ + TF_O0]
 	stx	%i1, [%sp + SPOFF + CCFSZ + TF_O1]
 	stx	%i2, [%sp + SPOFF + CCFSZ + TF_O2]
 	stx	%i3, [%sp + SPOFF + CCFSZ + TF_O3]
 	stx	%i4, [%sp + SPOFF + CCFSZ + TF_O4]
 	stx	%i5, [%sp + SPOFF + CCFSZ + TF_O5]
 	stx	%i6, [%sp + SPOFF + CCFSZ + TF_O6]
 	stx	%i7, [%sp + SPOFF + CCFSZ + TF_O7]
 
 	stx	%g1, [%sp + SPOFF + CCFSZ + TF_G1]
 	stx	%g2, [%sp + SPOFF + CCFSZ + TF_G2]
 	stx	%g3, [%sp + SPOFF + CCFSZ + TF_G3]
 	stx	%g4, [%sp + SPOFF + CCFSZ + TF_G4]
 	stx	%g5, [%sp + SPOFF + CCFSZ + TF_G5]
 
 	set	tl1_ret - 8, %o7
 	jmpl	%o2, %g0
 	 add	%sp, CCFSZ + SPOFF, %o0
 END(tl1_trap)
 
 ENTRY(tl1_ret)
 	ldx	[%sp + SPOFF + CCFSZ + TF_O0], %i0
 	ldx	[%sp + SPOFF + CCFSZ + TF_O1], %i1
 	ldx	[%sp + SPOFF + CCFSZ + TF_O2], %i2
 	ldx	[%sp + SPOFF + CCFSZ + TF_O3], %i3
 	ldx	[%sp + SPOFF + CCFSZ + TF_O4], %i4
 	ldx	[%sp + SPOFF + CCFSZ + TF_O5], %i5
 	ldx	[%sp + SPOFF + CCFSZ + TF_O6], %i6
 	ldx	[%sp + SPOFF + CCFSZ + TF_O7], %i7
 
 	ldx	[%sp + SPOFF + CCFSZ + TF_G1], %g1
 	ldx	[%sp + SPOFF + CCFSZ + TF_G2], %g2
 	ldx	[%sp + SPOFF + CCFSZ + TF_G3], %g3
 	ldx	[%sp + SPOFF + CCFSZ + TF_G4], %g4
 	ldx	[%sp + SPOFF + CCFSZ + TF_G5], %g5
 
 	ldx	[%sp + SPOFF + CCFSZ + TF_TSTATE], %l0
 	ldx	[%sp + SPOFF + CCFSZ + TF_TPC], %l1
 	ldx	[%sp + SPOFF + CCFSZ + TF_TNPC], %l2
 	ldx	[%sp + SPOFF + CCFSZ + TF_PIL], %l3
 	ldx	[%sp + SPOFF + CCFSZ + TF_Y], %l4
 
 	set	VM_MIN_PROM_ADDRESS, %l5
 	cmp	%l1, %l5
 	bl,a,pt	%xcc, 1f
 	 nop
 	set	VM_MAX_PROM_ADDRESS, %l5
 	cmp	%l1, %l5
 	bg,a,pt	%xcc, 1f
 	 nop
 
 	wrpr	%g0, PSTATE_NORMAL, %pstate
 
 	ldx	[%sp + SPOFF + CCFSZ + TF_G6], %g6
 	ldx	[%sp + SPOFF + CCFSZ + TF_G7], %g7
 
 1:	wrpr	%g0, PSTATE_ALT, %pstate
 
 	andn	%l0, TSTATE_CWP_MASK, %g1
 	mov	%l1, %g2
 	mov	%l2, %g3
-	mov	%l4, %g4
 
 	wrpr	%l3, 0, %pil
+	wr	%l4, 0, %y
 
 	restore
 
 	wrpr	%g0, 2, %tl
 
+	rdpr	%cwp, %g4
+	wrpr	%g1, %g4, %tstate
 	wrpr	%g2, 0, %tpc
 	wrpr	%g3, 0, %tnpc
-	rdpr	%cwp, %g2
-	wrpr	%g1, %g2, %tstate
 
 #if KTR_COMPILE & KTR_TRAP
 	CATR(KTR_TRAP, "tl1_ret: td=%#lx pil=%#lx ts=%#lx pc=%#lx sp=%#lx"
-	    , %g1, %g2, %g3, 7, 8, 9)
-	ldx	[PCPU(CURTHREAD)], %g2
-	stx	%g2, [%g1 + KTR_PARM1]
-	rdpr	%pil, %g2
-	stx	%g2, [%g1 + KTR_PARM2]
-	rdpr	%tstate, %g2
-	stx	%g2, [%g1 + KTR_PARM3]
-	rdpr	%tpc, %g2
-	stx	%g2, [%g1 + KTR_PARM4]
-	stx	%sp, [%g1 + KTR_PARM5]
+	    , %g2, %g3, %g4, 7, 8, 9)
+	ldx	[PCPU(CURTHREAD)], %g3
+	stx	%g3, [%g2 + KTR_PARM1]
+	rdpr	%pil, %g3
+	stx	%g3, [%g2 + KTR_PARM2]
+	rdpr	%tstate, %g3
+	stx	%g3, [%g2 + KTR_PARM3]
+	rdpr	%tpc, %g3
+	stx	%g3, [%g2 + KTR_PARM4]
+	stx	%sp, [%g2 + KTR_PARM5]
 9:
 #endif
 
-	wr	%g4, 0, %y
-
 	retry
 END(tl1_ret)
 
 /*
  * void tl1_intr(u_int level, u_int mask)
  */
 ENTRY(tl1_intr)
 	rdpr	%tstate, %l0
 	rdpr	%tpc, %l1
 	rdpr	%tnpc, %l2
 	rdpr	%pil, %l3
 	rd	%y, %l4
 	rdpr	%wstate, %l5
 
 #if KTR_COMPILE & KTR_INTR
 	CATR(KTR_INTR,
 	    "tl1_intr: td=%p level=%#x pil=%#lx pc=%#lx sp=%#lx"
 	    , %g1, %g2, %g3, 7, 8, 9)
 	ldx	[PCPU(CURTHREAD)], %g2
 	stx	%g2, [%g1 + KTR_PARM1]
 	stx	%o0, [%g1 + KTR_PARM2]
 	stx	%l3, [%g1 + KTR_PARM3]
 	stx	%l1, [%g1 + KTR_PARM4]
 	stx	%i6, [%g1 + KTR_PARM5]
 9:
 #endif
 
 	wrpr	%o0, 0, %pil
 	wr	%o1, 0, %clear_softint
 
 	wrpr	%g0, 1, %tl
 
 	and	%l5, WSTATE_OTHER_MASK, %l5
 	wrpr	%l5, WSTATE_KERNEL, %wstate
 
 	stx	%l0, [%sp + SPOFF + CCFSZ + TF_TSTATE]
 	stx	%l1, [%sp + SPOFF + CCFSZ + TF_TPC]
 	stx	%l2, [%sp + SPOFF + CCFSZ + TF_TNPC]
 	stx	%l3, [%sp + SPOFF + CCFSZ + TF_PIL]
 	stx	%l4, [%sp + SPOFF + CCFSZ + TF_Y]
 
 	mov	%o0, %l7
 	mov	T_INTERRUPT | T_KERNEL, %o1
 
 	stx	%o0, [%sp + SPOFF + CCFSZ + TF_LEVEL]
 	stx	%o1, [%sp + SPOFF + CCFSZ + TF_TYPE]
 
 	stx	%i6, [%sp + SPOFF + CCFSZ + TF_O6]
 	stx	%i7, [%sp + SPOFF + CCFSZ + TF_O7]
 
 	mov	PCB_REG, %l4
 	mov	PCPU_REG, %l5
 	wrpr	%g0, PSTATE_NORMAL, %pstate
 
 	stx	%g1, [%sp + SPOFF + CCFSZ + TF_G1]
 	stx	%g2, [%sp + SPOFF + CCFSZ + TF_G2]
 	stx	%g3, [%sp + SPOFF + CCFSZ + TF_G3]
 	stx	%g4, [%sp + SPOFF + CCFSZ + TF_G4]
 	stx	%g5, [%sp + SPOFF + CCFSZ + TF_G5]
 
 	mov	%l4, PCB_REG
 	mov	%l5, PCPU_REG
 	wrpr	%g0, PSTATE_KERNEL, %pstate
 
 	SET(intr_handlers, %l5, %l4)
 	sllx	%l7, IH_SHIFT, %l5
 	ldx	[%l4 + %l5], %l5
 	KASSERT(%l5, "tl1_intr: ih null")
 	call	%l5
 	 add	%sp, CCFSZ + SPOFF, %o0
 
 	/* %l7 contains PIL */
 	SET(intrcnt, %l5, %l4)
 	prefetcha [%l4] ASI_N, 1
 	SET(pil_countp, %l5, %l6)
 	sllx	%l7, 1, %l5
 	lduh	[%l5 + %l6], %l5
 	sllx	%l5, 3, %l5
 	add	%l5, %l4, %l4
 	ldx	[%l4], %l5
 	inc	%l5
 	stx	%l5, [%l4]
 
 	lduw	[PCPU(CNT) + V_INTR], %l4
 	inc	%l4
 	stw	%l4, [PCPU(CNT) + V_INTR]
 
 	ldx	[%sp + SPOFF + CCFSZ + TF_Y], %l4
 
 	ldx	[%sp + SPOFF + CCFSZ + TF_G1], %g1
 	ldx	[%sp + SPOFF + CCFSZ + TF_G2], %g2
 	ldx	[%sp + SPOFF + CCFSZ + TF_G3], %g3
 	ldx	[%sp + SPOFF + CCFSZ + TF_G4], %g4
 	ldx	[%sp + SPOFF + CCFSZ + TF_G5], %g5
 
 	wrpr	%g0, PSTATE_ALT, %pstate
 
 	andn	%l0, TSTATE_CWP_MASK, %g1
 	mov	%l1, %g2
 	mov	%l2, %g3
-	mov	%l4, %g4
 	wrpr	%l3, 0, %pil
+	wr	%l4, 0, %y
 
 	restore
 
 	wrpr	%g0, 2, %tl
 
+	rdpr	%cwp, %g4
+	wrpr	%g1, %g4, %tstate
 	wrpr	%g2, 0, %tpc
 	wrpr	%g3, 0, %tnpc
-	rdpr	%cwp, %g2
-	wrpr	%g1, %g2, %tstate
 
 #if KTR_COMPILE & KTR_INTR
 	CATR(KTR_INTR, "tl1_intr: td=%#x pil=%#lx ts=%#lx pc=%#lx sp=%#lx"
-	    , %g1, %g2, %g3, 7, 8, 9)
-	ldx	[PCPU(CURTHREAD)], %g2
-	stx	%g2, [%g1 + KTR_PARM1]
-	rdpr	%pil, %g2
-	stx	%g2, [%g1 + KTR_PARM2]
-	rdpr	%tstate, %g2
-	stx	%g2, [%g1 + KTR_PARM3]
-	rdpr	%tpc, %g2
-	stx	%g2, [%g1 + KTR_PARM4]
-	stx	%sp, [%g1 + KTR_PARM5]
+	    , %g2, %g3, %g4, 7, 8, 9)
+	ldx	[PCPU(CURTHREAD)], %g3
+	stx	%g3, [%g2 + KTR_PARM1]
+	rdpr	%pil, %g3
+	stx	%g3, [%g2 + KTR_PARM2]
+	rdpr	%tstate, %g3
+	stx	%g3, [%g2 + KTR_PARM3]
+	rdpr	%tpc, %g3
+	stx	%g3, [%g2 + KTR_PARM4]
+	stx	%sp, [%g2 + KTR_PARM5]
 9:
 #endif
-
-	wr	%g4, 0, %y
 
 	retry
 END(tl1_intr)
 
 	.globl	tl_text_end
 tl_text_end:
 	nop
 
 /*
  * Freshly forked processes come here when switched to for the first time.
  * The arguments to fork_exit() have been setup in the locals, we must move
  * them to the outs.
  */
 ENTRY(fork_trampoline)
 #if KTR_COMPILE & KTR_PROC
 	CATR(KTR_PROC, "fork_trampoline: td=%p (%s) cwp=%#lx"
 	    , %g1, %g2, %g3, 7, 8, 9)
 	ldx	[PCPU(CURTHREAD)], %g2
 	stx	%g2, [%g1 + KTR_PARM1]
 	ldx	[%g2 + TD_PROC], %g2
 	add	%g2, P_COMM, %g2
 	stx	%g2, [%g1 + KTR_PARM2]
 	rdpr	%cwp, %g2
 	stx	%g2, [%g1 + KTR_PARM3]
 9:
 #endif
 	mov	%l0, %o0
 	mov	%l1, %o1
 	call	fork_exit
 	 mov	%l2, %o2
 	ba,a	%xcc, tl0_ret
 	 nop
 END(fork_trampoline)
Index: stable/10/sys/sparc64/sparc64/mp_exception.S
===================================================================
--- stable/10/sys/sparc64/sparc64/mp_exception.S	(revision 293852)
+++ stable/10/sys/sparc64/sparc64/mp_exception.S	(revision 293853)
@@ -1,312 +1,309 @@
 /*-
  * Copyright (c) 2002 Jake Burkholder.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <machine/asm.h>
 __FBSDID("$FreeBSD$");
 
 #include <machine/asi.h>
 #include <machine/asmacros.h>
 #include <machine/cache.h>
 #include <machine/ktr.h>
 #include <machine/pstate.h>
 
 #include "assym.s"
 
 	.register	%g2, #ignore
 	.register	%g3, #ignore
 
-#define	IPI_DONE(r1, r2, r3, r4, r5, r6)				\
-	rd	%y, r6 ;						\
+#define	IPI_DONE(r1, r2, r3, r4, r5)					\
 	lduw	[PCPU(CPUID)], r2 ;					\
 	mov	_NCPUBITS, r3 ;						\
-	mov	%g0, %y ;						\
-	udiv	r2, r3, r4 ;						\
+	udivx	r2, r3, r4 ;						\
 	srl	r4, 0, r5 ;						\
 	sllx	r5, PTR_SHIFT, r5 ;					\
 	add	r1, r5, r1 ;						\
 	smul	r4, r3, r3 ;						\
 	sub	r2, r3, r3 ;						\
 	mov	1, r4 ;							\
 	sllx	r4, r3, r4 ;						\
-	wr	r6, %y ;						\
 	ATOMIC_CLEAR_LONG(r1, r2, r3, r4)
 
 /*
  * Invalidate a physical page in the data cache.  For UltraSPARC I and II.
  */
 ENTRY(tl_ipi_spitfire_dcache_page_inval)
 #if KTR_COMPILE & KTR_SMP
 	CATR(KTR_SMP, "tl_ipi_spitfire_dcache_page_inval: pa=%#lx"
 	    , %g1, %g2, %g3, 7, 8, 9)
 	ldx	[%g5 + ICA_PA], %g2
 	stx	%g2, [%g1 + KTR_PARM1]
 9:
 #endif
 
 	ldx	[%g5 + ICA_PA], %g6
 	srlx	%g6, PAGE_SHIFT - DC_TAG_SHIFT, %g6
 
 	lduw	[PCPU(CACHE) + DC_SIZE], %g3
 	lduw	[PCPU(CACHE) + DC_LINESIZE], %g4
 	sub	%g3, %g4, %g2
 
 1:	ldxa	[%g2] ASI_DCACHE_TAG, %g1
 	srlx	%g1, DC_VALID_SHIFT, %g3
 	andcc	%g3, DC_VALID_MASK, %g0
 	bz,pt	%xcc, 2f
 	 set	DC_TAG_MASK, %g3
 	sllx	%g3, DC_TAG_SHIFT, %g3
 	and	%g1, %g3, %g1
 	cmp	%g1, %g6
 	bne,a,pt %xcc, 2f
 	 nop
 	stxa	%g1, [%g2] ASI_DCACHE_TAG
 	membar	#Sync
 
 2:	brgz,pt	%g2, 1b
 	 sub	%g2, %g4, %g2
 
-	IPI_DONE(%g5, %g1, %g2, %g3, %g4, %g6)
+	IPI_DONE(%g5, %g1, %g2, %g3, %g4)
 	retry
 END(tl_ipi_spitfire_dcache_page_inval)
 
 /*
  * Invalidate a physical page in the instruction cache.  For UltraSPARC I and
  * II.
  */
 ENTRY(tl_ipi_spitfire_icache_page_inval)
 #if KTR_COMPILE & KTR_SMP
 	CATR(KTR_SMP, "tl_ipi_spitfire_icache_page_inval: pa=%#lx"
 	    , %g1, %g2, %g3, 7, 8, 9)
 	ldx	[%g5 + ICA_PA], %g2
 	stx	%g2, [%g1 + KTR_PARM1]
 9:
 #endif
 
 	ldx	[%g5 + ICA_PA], %g6
 	srlx	%g6, PAGE_SHIFT - IC_TAG_SHIFT, %g6
 
 	lduw	[PCPU(CACHE) + IC_SIZE], %g3
 	lduw	[PCPU(CACHE) + IC_LINESIZE], %g4
 	sub	%g3, %g4, %g2
 
 1:	ldda	[%g2] ASI_ICACHE_TAG, %g0 /*, %g1 */
 	srlx	%g1, IC_VALID_SHIFT, %g3
 	andcc	%g3, IC_VALID_MASK, %g0
 	bz,pt	%xcc, 2f
 	 set	IC_TAG_MASK, %g3
 	sllx	%g3, IC_TAG_SHIFT, %g3
 	and	%g1, %g3, %g1
 	cmp	%g1, %g6
 	bne,a,pt %xcc, 2f
 	 nop
 	stxa	%g1, [%g2] ASI_ICACHE_TAG
 	membar	#Sync
 
 2:	brgz,pt	%g2, 1b
 	 sub	%g2, %g4, %g2
 
-	IPI_DONE(%g5, %g1, %g2, %g3, %g4, %g6)
+	IPI_DONE(%g5, %g1, %g2, %g3, %g4)
 	retry
 END(tl_ipi_spitfire_icache_page_inval)
 
 /*
  * Invalidate a physical page in the data cache.  For UltraSPARC III.
  */
 ENTRY(tl_ipi_cheetah_dcache_page_inval)
 #if KTR_COMPILE & KTR_SMP
 	CATR(KTR_SMP, "tl_ipi_cheetah_dcache_page_inval: pa=%#lx"
 	    , %g1, %g2, %g3, 7, 8, 9)
 	ldx	[%g5 + ICA_PA], %g2
 	stx	%g2, [%g1 + KTR_PARM1]
 9:
 #endif
 
 	ldx	[%g5 + ICA_PA], %g1
 
 	set	PAGE_SIZE, %g2
 	add	%g1, %g2, %g3
 
 	lduw	[PCPU(CACHE) + DC_LINESIZE], %g2
 
 1:	stxa	%g0, [%g1] ASI_DCACHE_INVALIDATE
 	membar	#Sync
 
 	add	%g1, %g2, %g1
 	cmp	%g1, %g3
 	blt,a,pt %xcc, 1b
 	 nop
 
-	IPI_DONE(%g5, %g1, %g2, %g3, %g4, %g6)
+	IPI_DONE(%g5, %g1, %g2, %g3, %g4)
 	retry
 END(tl_ipi_cheetah_dcache_page_inval)
 
 /*
  * Trigger a softint at the desired level.
  */
 ENTRY(tl_ipi_level)
 #if KTR_COMPILE & KTR_SMP
 	CATR(KTR_SMP, "tl_ipi_level: cpuid=%d mid=%d d1=%#lx d2=%#lx"
 	    , %g1, %g2, %g3, 7, 8, 9)
 	lduw	[PCPU(CPUID)], %g2
 	stx	%g2, [%g1 + KTR_PARM1]
 	lduw	[PCPU(MID)], %g2
 	stx	%g2, [%g1 + KTR_PARM2]
 	stx	%g4, [%g1 + KTR_PARM3]
 	stx	%g5, [%g1 + KTR_PARM4]
 9:
 #endif
 
 	mov	1, %g1
 	sllx	%g1, %g5, %g1
 	wr	%g1, 0, %set_softint
 	retry
 END(tl_ipi_level)
 
 /*
  * Demap a page from the dtlb and/or itlb.
  */
 ENTRY(tl_ipi_tlb_page_demap)
 #if KTR_COMPILE & KTR_SMP
 	CATR(KTR_SMP, "ipi_tlb_page_demap: pm=%p va=%#lx"
 	    , %g1, %g2, %g3, 7, 8, 9)
 	ldx	[%g5 + ITA_PMAP], %g2
 	stx	%g2, [%g1 + KTR_PARM1]
 	ldx	[%g5 + ITA_VA], %g2
 	stx	%g2, [%g1 + KTR_PARM2]
 9:
 #endif
 
 	ldx	[%g5 + ITA_PMAP], %g1
 
 	SET(kernel_pmap_store, %g3, %g2)
 	mov	TLB_DEMAP_NUCLEUS | TLB_DEMAP_PAGE, %g3
 
 	cmp	%g1, %g2
 	movne	%xcc, TLB_DEMAP_PRIMARY | TLB_DEMAP_PAGE, %g3
 
 	ldx	[%g5 + ITA_VA], %g2
 	or	%g2, %g3, %g2
 
 	sethi	%hi(KERNBASE), %g3
 	stxa	%g0, [%g2] ASI_DMMU_DEMAP
 	stxa	%g0, [%g2] ASI_IMMU_DEMAP
 	flush	%g3
 
-	IPI_DONE(%g5, %g1, %g2, %g3, %g4, %g6)
+	IPI_DONE(%g5, %g1, %g2, %g3, %g4)
 	retry
 END(tl_ipi_tlb_page_demap)
 
 /*
  * Demap a range of pages from the dtlb and itlb.
  */
 ENTRY(tl_ipi_tlb_range_demap)
 #if KTR_COMPILE & KTR_SMP
 	CATR(KTR_SMP, "ipi_tlb_range_demap: pm=%p start=%#lx end=%#lx"
 	    , %g1, %g2, %g3, 7, 8, 9)
 	ldx	[%g5 + ITA_PMAP], %g2
 	stx	%g2, [%g1 + KTR_PARM1]
 	ldx	[%g5 + ITA_START], %g2
 	stx	%g2, [%g1 + KTR_PARM2]
 	ldx	[%g5 + ITA_END], %g2
 	stx	%g2, [%g1 + KTR_PARM3]
 9:
 #endif
 
 	ldx	[%g5 + ITA_PMAP], %g1
 
 	SET(kernel_pmap_store, %g3, %g2)
 	mov	TLB_DEMAP_NUCLEUS | TLB_DEMAP_PAGE, %g3
 
 	cmp	%g1, %g2
 	movne	%xcc, TLB_DEMAP_PRIMARY | TLB_DEMAP_PAGE, %g3
 
 	ldx	[%g5 + ITA_START], %g1
 	ldx	[%g5 + ITA_END], %g2
 
 	sethi	%hi(KERNBASE), %g6
 1:	or	%g1, %g3, %g4
 	stxa	%g0, [%g4] ASI_DMMU_DEMAP
 	stxa	%g0, [%g4] ASI_IMMU_DEMAP
 	flush	%g6
 
 	set	PAGE_SIZE, %g6
 	add	%g1, %g6, %g1
 	cmp	%g1, %g2
 	blt,a,pt %xcc, 1b
 	 sethi	%hi(KERNBASE), %g6
 
-	IPI_DONE(%g5, %g1, %g2, %g3, %g4, %g6)
+	IPI_DONE(%g5, %g1, %g2, %g3, %g4)
 	retry
 END(tl_ipi_tlb_range_demap)
 
 /*
  * Demap the primary context from the dtlb and itlb.
  */
 ENTRY(tl_ipi_tlb_context_demap)
 #if KTR_COMPILE & KTR_SMP
 	CATR(KTR_SMP, "tl_ipi_tlb_context_demap: pm=%p va=%#lx"
 	    , %g1, %g2, %g3, 7, 8, 9)
 	ldx	[%g5 + ITA_PMAP], %g2
 	stx	%g2, [%g1 + KTR_PARM1]
 	ldx	[%g5 + ITA_VA], %g2
 	stx	%g2, [%g1 + KTR_PARM2]
 9:
 #endif
 
 	mov	TLB_DEMAP_PRIMARY | TLB_DEMAP_CONTEXT, %g1
 	sethi	%hi(KERNBASE), %g3
 	stxa	%g0, [%g1] ASI_DMMU_DEMAP
 	stxa	%g0, [%g1] ASI_IMMU_DEMAP
 	flush	%g3
 
-	IPI_DONE(%g5, %g1, %g2, %g3, %g4, %g6)
+	IPI_DONE(%g5, %g1, %g2, %g3, %g4)
 	retry
 END(tl_ipi_tlb_context_demap)
 
 /*
  * Read %stick.
  */
 ENTRY(tl_ipi_stick_rd)
 	ldx	[%g5 + IRA_VAL], %g1
 	rd	%asr24, %g2
 	stx	%g2, [%g1]
 
-	IPI_DONE(%g5, %g1, %g2, %g3, %g4, %g6)
+	IPI_DONE(%g5, %g1, %g2, %g3, %g4)
 	retry
 END(tl_ipi_stick_rd)
 
 /*
  * Read %tick.
  */
 ENTRY(tl_ipi_tick_rd)
 	ldx	[%g5 + IRA_VAL], %g1
 	rd	%tick, %g2
 	stx	%g2, [%g1]
 
-	IPI_DONE(%g5, %g1, %g2, %g3, %g4, %g6)
+	IPI_DONE(%g5, %g1, %g2, %g3, %g4)
 	retry
 END(tl_ipi_tick_rd)
Index: stable/10/sys/sparc64/sparc64/pmap.c
===================================================================
--- stable/10/sys/sparc64/sparc64/pmap.c	(revision 293852)
+++ stable/10/sys/sparc64/sparc64/pmap.c	(revision 293853)
@@ -1,2279 +1,2286 @@
 /*-
  * Copyright (c) 1991 Regents of the University of California.
  * All rights reserved.
  * Copyright (c) 1994 John S. Dyson
  * All rights reserved.
  * Copyright (c) 1994 David Greenman
  * All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * the Systems Programming Group of the University of Utah Computer
  * Science Department and William Jolitz of UUNET Technologies Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *      from:   @(#)pmap.c      7.7 (Berkeley)  5/12/91
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 /*
  * Manages physical address maps.
  *
  * Since the information managed by this module is also stored by the
  * logical address mapping module, this module may throw away valid virtual
  * to physical mappings at almost any time.  However, invalidations of
  * mappings must be done as requested.
  *
  * In order to cope with hardware architectures which make virtual to
  * physical map invalidates expensive, this module may delay invalidate
  * reduced protection operations until such time as they are actually
  * necessary.  This module is given full information as to which processors
  * are currently using which maps, and to when physical maps must be made
  * correct.
  */
 
 #include "opt_kstack_pages.h"
 #include "opt_pmap.h"
 
 #include <sys/param.h>
 #include <sys/kernel.h>
 #include <sys/ktr.h>
 #include <sys/lock.h>
 #include <sys/msgbuf.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/rwlock.h>
 #include <sys/smp.h>
 #include <sys/sysctl.h>
 #include <sys/systm.h>
 #include <sys/vmmeter.h>
 
 #include <dev/ofw/openfirm.h>
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_page.h>
 #include <vm/vm_map.h>
 #include <vm/vm_object.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_pageout.h>
 #include <vm/vm_pager.h>
 #include <vm/vm_phys.h>
 
 #include <machine/cache.h>
 #include <machine/frame.h>
 #include <machine/instr.h>
 #include <machine/md_var.h>
 #include <machine/metadata.h>
 #include <machine/ofw_mem.h>
 #include <machine/smp.h>
 #include <machine/tlb.h>
 #include <machine/tte.h>
 #include <machine/tsb.h>
 #include <machine/ver.h>
 
 /*
  * Virtual address of message buffer
  */
 struct msgbuf *msgbufp;
 
 /*
  * Map of physical memory reagions
  */
 vm_paddr_t phys_avail[128];
 static struct ofw_mem_region mra[128];
 struct ofw_mem_region sparc64_memreg[128];
 int sparc64_nmemreg;
 static struct ofw_map translations[128];
 static int translations_size;
 
 static vm_offset_t pmap_idle_map;
 static vm_offset_t pmap_temp_map_1;
 static vm_offset_t pmap_temp_map_2;
 
 /*
  * First and last available kernel virtual addresses
  */
 vm_offset_t virtual_avail;
 vm_offset_t virtual_end;
 vm_offset_t kernel_vm_end;
 
 vm_offset_t vm_max_kernel_address;
 
 /*
  * Kernel pmap
  */
 struct pmap kernel_pmap_store;
 
 struct rwlock_padalign tte_list_global_lock;
 
 /*
  * Allocate physical memory for use in pmap_bootstrap.
  */
 static vm_paddr_t pmap_bootstrap_alloc(vm_size_t size, uint32_t colors);
 
 static void pmap_bootstrap_set_tte(struct tte *tp, u_long vpn, u_long data);
 static void pmap_cache_remove(vm_page_t m, vm_offset_t va);
 static int pmap_protect_tte(struct pmap *pm1, struct pmap *pm2,
     struct tte *tp, vm_offset_t va);
 static int pmap_unwire_tte(pmap_t pm, pmap_t pm2, struct tte *tp,
     vm_offset_t va);
 
 /*
  * Map the given physical page at the specified virtual address in the
  * target pmap with the protection requested.  If specified the page
  * will be wired down.
  *
  * The page queues and pmap must be locked.
  */
 static int pmap_enter_locked(pmap_t pm, vm_offset_t va, vm_page_t m,
     vm_prot_t prot, u_int flags, int8_t psind);
 
 extern int tl1_dmmu_miss_direct_patch_tsb_phys_1[];
 extern int tl1_dmmu_miss_direct_patch_tsb_phys_end_1[];
 extern int tl1_dmmu_miss_patch_asi_1[];
 extern int tl1_dmmu_miss_patch_quad_ldd_1[];
 extern int tl1_dmmu_miss_patch_tsb_1[];
 extern int tl1_dmmu_miss_patch_tsb_2[];
 extern int tl1_dmmu_miss_patch_tsb_mask_1[];
 extern int tl1_dmmu_miss_patch_tsb_mask_2[];
 extern int tl1_dmmu_prot_patch_asi_1[];
 extern int tl1_dmmu_prot_patch_quad_ldd_1[];
 extern int tl1_dmmu_prot_patch_tsb_1[];
 extern int tl1_dmmu_prot_patch_tsb_2[];
 extern int tl1_dmmu_prot_patch_tsb_mask_1[];
 extern int tl1_dmmu_prot_patch_tsb_mask_2[];
 extern int tl1_immu_miss_patch_asi_1[];
 extern int tl1_immu_miss_patch_quad_ldd_1[];
 extern int tl1_immu_miss_patch_tsb_1[];
 extern int tl1_immu_miss_patch_tsb_2[];
 extern int tl1_immu_miss_patch_tsb_mask_1[];
 extern int tl1_immu_miss_patch_tsb_mask_2[];
 
 /*
  * If user pmap is processed with pmap_remove and with pmap_remove and the
  * resident count drops to 0, there are no more pages to remove, so we
  * need not continue.
  */
 #define	PMAP_REMOVE_DONE(pm) \
 	((pm) != kernel_pmap && (pm)->pm_stats.resident_count == 0)
 
 /*
  * The threshold (in bytes) above which tsb_foreach() is used in pmap_remove()
  * and pmap_protect() instead of trying each virtual address.
  */
 #define	PMAP_TSB_THRESH	((TSB_SIZE / 2) * PAGE_SIZE)
 
 SYSCTL_NODE(_debug, OID_AUTO, pmap_stats, CTLFLAG_RD, 0, "");
 
 PMAP_STATS_VAR(pmap_nenter);
 PMAP_STATS_VAR(pmap_nenter_update);
 PMAP_STATS_VAR(pmap_nenter_replace);
 PMAP_STATS_VAR(pmap_nenter_new);
 PMAP_STATS_VAR(pmap_nkenter);
 PMAP_STATS_VAR(pmap_nkenter_oc);
 PMAP_STATS_VAR(pmap_nkenter_stupid);
 PMAP_STATS_VAR(pmap_nkremove);
 PMAP_STATS_VAR(pmap_nqenter);
 PMAP_STATS_VAR(pmap_nqremove);
 PMAP_STATS_VAR(pmap_ncache_enter);
 PMAP_STATS_VAR(pmap_ncache_enter_c);
 PMAP_STATS_VAR(pmap_ncache_enter_oc);
 PMAP_STATS_VAR(pmap_ncache_enter_cc);
 PMAP_STATS_VAR(pmap_ncache_enter_coc);
 PMAP_STATS_VAR(pmap_ncache_enter_nc);
 PMAP_STATS_VAR(pmap_ncache_enter_cnc);
 PMAP_STATS_VAR(pmap_ncache_remove);
 PMAP_STATS_VAR(pmap_ncache_remove_c);
 PMAP_STATS_VAR(pmap_ncache_remove_oc);
 PMAP_STATS_VAR(pmap_ncache_remove_cc);
 PMAP_STATS_VAR(pmap_ncache_remove_coc);
 PMAP_STATS_VAR(pmap_ncache_remove_nc);
 PMAP_STATS_VAR(pmap_nzero_page);
 PMAP_STATS_VAR(pmap_nzero_page_c);
 PMAP_STATS_VAR(pmap_nzero_page_oc);
 PMAP_STATS_VAR(pmap_nzero_page_nc);
 PMAP_STATS_VAR(pmap_nzero_page_area);
 PMAP_STATS_VAR(pmap_nzero_page_area_c);
 PMAP_STATS_VAR(pmap_nzero_page_area_oc);
 PMAP_STATS_VAR(pmap_nzero_page_area_nc);
 PMAP_STATS_VAR(pmap_nzero_page_idle);
 PMAP_STATS_VAR(pmap_nzero_page_idle_c);
 PMAP_STATS_VAR(pmap_nzero_page_idle_oc);
 PMAP_STATS_VAR(pmap_nzero_page_idle_nc);
 PMAP_STATS_VAR(pmap_ncopy_page);
 PMAP_STATS_VAR(pmap_ncopy_page_c);
 PMAP_STATS_VAR(pmap_ncopy_page_oc);
 PMAP_STATS_VAR(pmap_ncopy_page_nc);
 PMAP_STATS_VAR(pmap_ncopy_page_dc);
 PMAP_STATS_VAR(pmap_ncopy_page_doc);
 PMAP_STATS_VAR(pmap_ncopy_page_sc);
 PMAP_STATS_VAR(pmap_ncopy_page_soc);
 
 PMAP_STATS_VAR(pmap_nnew_thread);
 PMAP_STATS_VAR(pmap_nnew_thread_oc);
 
 static inline u_long dtlb_get_data(u_int tlb, u_int slot);
 
 /*
  * Quick sort callout for comparing memory regions
  */
 static int mr_cmp(const void *a, const void *b);
 static int om_cmp(const void *a, const void *b);
 
 static int
 mr_cmp(const void *a, const void *b)
 {
 	const struct ofw_mem_region *mra;
 	const struct ofw_mem_region *mrb;
 
 	mra = a;
 	mrb = b;
 	if (mra->mr_start < mrb->mr_start)
 		return (-1);
 	else if (mra->mr_start > mrb->mr_start)
 		return (1);
 	else
 		return (0);
 }
 
 static int
 om_cmp(const void *a, const void *b)
 {
 	const struct ofw_map *oma;
 	const struct ofw_map *omb;
 
 	oma = a;
 	omb = b;
 	if (oma->om_start < omb->om_start)
 		return (-1);
 	else if (oma->om_start > omb->om_start)
 		return (1);
 	else
 		return (0);
 }
 
 static inline u_long
 dtlb_get_data(u_int tlb, u_int slot)
 {
 	u_long data;
 	register_t s;
 
 	slot = TLB_DAR_SLOT(tlb, slot);
 	/*
 	 * We read ASI_DTLB_DATA_ACCESS_REG twice back-to-back in order to
 	 * work around errata of USIII and beyond.
 	 */
 	s = intr_disable();
 	(void)ldxa(slot, ASI_DTLB_DATA_ACCESS_REG);
 	data = ldxa(slot, ASI_DTLB_DATA_ACCESS_REG);
 	intr_restore(s);
 	return (data);
 }
 
 /*
  * Bootstrap the system enough to run with virtual memory.
  */
 void
 pmap_bootstrap(u_int cpu_impl)
 {
 	struct pmap *pm;
 	struct tte *tp;
 	vm_offset_t off;
 	vm_offset_t va;
 	vm_paddr_t pa;
 	vm_size_t physsz;
 	vm_size_t virtsz;
 	u_long data;
 	u_long vpn;
 	phandle_t pmem;
 	phandle_t vmem;
 	u_int dtlb_slots_avail;
 	int i;
 	int j;
 	int sz;
 	uint32_t asi;
 	uint32_t colors;
 	uint32_t ldd;
 
 	/*
 	 * Set the kernel context.
 	 */
 	pmap_set_kctx();
 
 	colors = dcache_color_ignore != 0 ? 1 : DCACHE_COLORS;
 
 	/*
 	 * Find out what physical memory is available from the PROM and
 	 * initialize the phys_avail array.  This must be done before
 	 * pmap_bootstrap_alloc is called.
 	 */
 	if ((pmem = OF_finddevice("/memory")) == -1)
 		OF_panic("%s: finddevice /memory", __func__);
 	if ((sz = OF_getproplen(pmem, "available")) == -1)
 		OF_panic("%s: getproplen /memory/available", __func__);
 	if (sizeof(phys_avail) < sz)
 		OF_panic("%s: phys_avail too small", __func__);
 	if (sizeof(mra) < sz)
 		OF_panic("%s: mra too small", __func__);
 	bzero(mra, sz);
 	if (OF_getprop(pmem, "available", mra, sz) == -1)
 		OF_panic("%s: getprop /memory/available", __func__);
 	sz /= sizeof(*mra);
-	CTR0(KTR_PMAP, "pmap_bootstrap: physical memory");
+#ifdef DIAGNOSTIC
+	OF_printf("pmap_bootstrap: physical memory\n");
+#endif
 	qsort(mra, sz, sizeof (*mra), mr_cmp);
 	physsz = 0;
 	getenv_quad("hw.physmem", &physmem);
 	physmem = btoc(physmem);
 	for (i = 0, j = 0; i < sz; i++, j += 2) {
-		CTR2(KTR_PMAP, "start=%#lx size=%#lx", mra[i].mr_start,
+#ifdef DIAGNOSTIC
+		OF_printf("start=%#lx size=%#lx\n", mra[i].mr_start,
 		    mra[i].mr_size);
+#endif
 		if (physmem != 0 && btoc(physsz + mra[i].mr_size) >= physmem) {
 			if (btoc(physsz) < physmem) {
 				phys_avail[j] = mra[i].mr_start;
 				phys_avail[j + 1] = mra[i].mr_start +
 				    (ctob(physmem) - physsz);
 				physsz = ctob(physmem);
 			}
 			break;
 		}
 		phys_avail[j] = mra[i].mr_start;
 		phys_avail[j + 1] = mra[i].mr_start + mra[i].mr_size;
 		physsz += mra[i].mr_size;
 	}
 	physmem = btoc(physsz);
 
 	/*
 	 * Calculate the size of kernel virtual memory, and the size and mask
 	 * for the kernel TSB based on the phsyical memory size but limited
 	 * by the amount of dTLB slots available for locked entries if we have
 	 * to lock the TSB in the TLB (given that for spitfire-class CPUs all
 	 * of the dt64 slots can hold locked entries but there is no large
 	 * dTLB for unlocked ones, we don't use more than half of it for the
 	 * TSB).
 	 * Note that for reasons unknown OpenSolaris doesn't take advantage of
 	 * ASI_ATOMIC_QUAD_LDD_PHYS on UltraSPARC-III.  However, given that no
 	 * public documentation is available for these, the latter just might
 	 * not support it, yet.
 	 */
 	if (cpu_impl == CPU_IMPL_SPARC64V ||
 	    cpu_impl >= CPU_IMPL_ULTRASPARCIIIp) {
 		tsb_kernel_ldd_phys = 1;
 		virtsz = roundup(5 / 3 * physsz, PAGE_SIZE_4M <<
 		    (PAGE_SHIFT - TTE_SHIFT));
 	} else {
 		dtlb_slots_avail = 0;
 		for (i = 0; i < dtlb_slots; i++) {
 			data = dtlb_get_data(cpu_impl ==
 			    CPU_IMPL_ULTRASPARCIII ? TLB_DAR_T16 :
 			    TLB_DAR_T32, i);
 			if ((data & (TD_V | TD_L)) != (TD_V | TD_L))
 				dtlb_slots_avail++;
 		}
 #ifdef SMP
 		dtlb_slots_avail -= PCPU_PAGES;
 #endif
 		if (cpu_impl >= CPU_IMPL_ULTRASPARCI &&
 		    cpu_impl < CPU_IMPL_ULTRASPARCIII)
 			dtlb_slots_avail /= 2;
 		virtsz = roundup(physsz, PAGE_SIZE_4M <<
 		    (PAGE_SHIFT - TTE_SHIFT));
 		virtsz = MIN(virtsz, (dtlb_slots_avail * PAGE_SIZE_4M) <<
 		    (PAGE_SHIFT - TTE_SHIFT));
 	}
 	vm_max_kernel_address = VM_MIN_KERNEL_ADDRESS + virtsz;
 	tsb_kernel_size = virtsz >> (PAGE_SHIFT - TTE_SHIFT);
 	tsb_kernel_mask = (tsb_kernel_size >> TTE_SHIFT) - 1;
 
 	/*
 	 * Allocate the kernel TSB and lock it in the TLB if necessary.
 	 */
 	pa = pmap_bootstrap_alloc(tsb_kernel_size, colors);
 	if (pa & PAGE_MASK_4M)
 		OF_panic("%s: TSB unaligned", __func__);
 	tsb_kernel_phys = pa;
 	if (tsb_kernel_ldd_phys == 0) {
 		tsb_kernel =
 		    (struct tte *)(VM_MIN_KERNEL_ADDRESS - tsb_kernel_size);
 		pmap_map_tsb();
 		bzero(tsb_kernel, tsb_kernel_size);
 	} else {
 		tsb_kernel =
 		    (struct tte *)TLB_PHYS_TO_DIRECT(tsb_kernel_phys);
 		aszero(ASI_PHYS_USE_EC, tsb_kernel_phys, tsb_kernel_size);
 	}
 
 	/*
 	 * Allocate and map the dynamic per-CPU area for the BSP.
 	 */
 	pa = pmap_bootstrap_alloc(DPCPU_SIZE, colors);
 	dpcpu0 = (void *)TLB_PHYS_TO_DIRECT(pa);
 
 	/*
 	 * Allocate and map the message buffer.
 	 */
 	pa = pmap_bootstrap_alloc(msgbufsize, colors);
 	msgbufp = (struct msgbuf *)TLB_PHYS_TO_DIRECT(pa);
 
 	/*
 	 * Patch the TSB addresses and mask as well as the ASIs used to load
 	 * it into the trap table.
 	 */
 
 #define	LDDA_R_I_R(rd, imm_asi, rs1, rs2)				\
 	(EIF_OP(IOP_LDST) | EIF_F3_RD(rd) | EIF_F3_OP3(INS3_LDDA) |	\
 	    EIF_F3_RS1(rs1) | EIF_F3_I(0) | EIF_F3_IMM_ASI(imm_asi) |	\
 	    EIF_F3_RS2(rs2))
 #define	OR_R_I_R(rd, imm13, rs1)					\
 	(EIF_OP(IOP_MISC) | EIF_F3_RD(rd) | EIF_F3_OP3(INS2_OR) |	\
 	    EIF_F3_RS1(rs1) | EIF_F3_I(1) | EIF_IMM(imm13, 13))
 #define	SETHI(rd, imm22)						\
 	(EIF_OP(IOP_FORM2) | EIF_F2_RD(rd) | EIF_F2_OP2(INS0_SETHI) |	\
 	    EIF_IMM((imm22) >> 10, 22))
 #define	WR_R_I(rd, imm13, rs1)						\
 	(EIF_OP(IOP_MISC) | EIF_F3_RD(rd) | EIF_F3_OP3(INS2_WR) |	\
 	    EIF_F3_RS1(rs1) | EIF_F3_I(1) | EIF_IMM(imm13, 13))
 
 #define	PATCH_ASI(addr, asi) do {					\
 	if (addr[0] != WR_R_I(IF_F3_RD(addr[0]), 0x0,			\
 	    IF_F3_RS1(addr[0])))					\
 		OF_panic("%s: patched instructions have changed",	\
 		    __func__);						\
 	addr[0] |= EIF_IMM((asi), 13);					\
 	flush(addr);							\
 } while (0)
 
 #define	PATCH_LDD(addr, asi) do {					\
 	if (addr[0] != LDDA_R_I_R(IF_F3_RD(addr[0]), 0x0,		\
 	    IF_F3_RS1(addr[0]), IF_F3_RS2(addr[0])))			\
 		OF_panic("%s: patched instructions have changed",	\
 		    __func__);						\
 	addr[0] |= EIF_F3_IMM_ASI(asi);					\
 	flush(addr);							\
 } while (0)
 
 #define	PATCH_TSB(addr, val) do {					\
 	if (addr[0] != SETHI(IF_F2_RD(addr[0]), 0x0) ||			\
 	    addr[1] != OR_R_I_R(IF_F3_RD(addr[1]), 0x0,			\
 	    IF_F3_RS1(addr[1]))	||					\
 	    addr[3] != SETHI(IF_F2_RD(addr[3]), 0x0))			\
 		OF_panic("%s: patched instructions have changed",	\
 		    __func__);						\
 	addr[0] |= EIF_IMM((val) >> 42, 22);				\
 	addr[1] |= EIF_IMM((val) >> 32, 10);				\
 	addr[3] |= EIF_IMM((val) >> 10, 22);				\
 	flush(addr);							\
 	flush(addr + 1);						\
 	flush(addr + 3);						\
 } while (0)
 
 #define	PATCH_TSB_MASK(addr, val) do {					\
 	if (addr[0] != SETHI(IF_F2_RD(addr[0]), 0x0) ||			\
 	    addr[1] != OR_R_I_R(IF_F3_RD(addr[1]), 0x0,			\
 	    IF_F3_RS1(addr[1])))					\
 		OF_panic("%s: patched instructions have changed",	\
 		    __func__);						\
 	addr[0] |= EIF_IMM((val) >> 10, 22);				\
 	addr[1] |= EIF_IMM((val), 10);					\
 	flush(addr);							\
 	flush(addr + 1);						\
 } while (0)
 
 	if (tsb_kernel_ldd_phys == 0) {
 		asi = ASI_N;
 		ldd = ASI_NUCLEUS_QUAD_LDD;
 		off = (vm_offset_t)tsb_kernel;
 	} else {
 		asi = ASI_PHYS_USE_EC;
 		ldd = ASI_ATOMIC_QUAD_LDD_PHYS;
 		off = (vm_offset_t)tsb_kernel_phys;
 	}
 	PATCH_TSB(tl1_dmmu_miss_direct_patch_tsb_phys_1, tsb_kernel_phys);
 	PATCH_TSB(tl1_dmmu_miss_direct_patch_tsb_phys_end_1,
 	    tsb_kernel_phys + tsb_kernel_size - 1);
 	PATCH_ASI(tl1_dmmu_miss_patch_asi_1, asi);
 	PATCH_LDD(tl1_dmmu_miss_patch_quad_ldd_1, ldd);
 	PATCH_TSB(tl1_dmmu_miss_patch_tsb_1, off);
 	PATCH_TSB(tl1_dmmu_miss_patch_tsb_2, off);
 	PATCH_TSB_MASK(tl1_dmmu_miss_patch_tsb_mask_1, tsb_kernel_mask);
 	PATCH_TSB_MASK(tl1_dmmu_miss_patch_tsb_mask_2, tsb_kernel_mask);
 	PATCH_ASI(tl1_dmmu_prot_patch_asi_1, asi);
 	PATCH_LDD(tl1_dmmu_prot_patch_quad_ldd_1, ldd);
 	PATCH_TSB(tl1_dmmu_prot_patch_tsb_1, off);
 	PATCH_TSB(tl1_dmmu_prot_patch_tsb_2, off);
 	PATCH_TSB_MASK(tl1_dmmu_prot_patch_tsb_mask_1, tsb_kernel_mask);
 	PATCH_TSB_MASK(tl1_dmmu_prot_patch_tsb_mask_2, tsb_kernel_mask);
 	PATCH_ASI(tl1_immu_miss_patch_asi_1, asi);
 	PATCH_LDD(tl1_immu_miss_patch_quad_ldd_1, ldd);
 	PATCH_TSB(tl1_immu_miss_patch_tsb_1, off);
 	PATCH_TSB(tl1_immu_miss_patch_tsb_2, off);
 	PATCH_TSB_MASK(tl1_immu_miss_patch_tsb_mask_1, tsb_kernel_mask);
 	PATCH_TSB_MASK(tl1_immu_miss_patch_tsb_mask_2, tsb_kernel_mask);
 
 	/*
 	 * Enter fake 8k pages for the 4MB kernel pages, so that
 	 * pmap_kextract() will work for them.
 	 */
 	for (i = 0; i < kernel_tlb_slots; i++) {
 		pa = kernel_tlbs[i].te_pa;
 		va = kernel_tlbs[i].te_va;
 		for (off = 0; off < PAGE_SIZE_4M; off += PAGE_SIZE) {
 			tp = tsb_kvtotte(va + off);
 			vpn = TV_VPN(va + off, TS_8K);
 			data = TD_V | TD_8K | TD_PA(pa + off) | TD_REF |
 			    TD_SW | TD_CP | TD_CV | TD_P | TD_W;
 			pmap_bootstrap_set_tte(tp, vpn, data);
 		}
 	}
 
 	/*
 	 * Set the start and end of KVA.  The kernel is loaded starting
 	 * at the first available 4MB super page, so we advance to the
 	 * end of the last one used for it.
 	 */
 	virtual_avail = KERNBASE + kernel_tlb_slots * PAGE_SIZE_4M;
 	virtual_end = vm_max_kernel_address;
 	kernel_vm_end = vm_max_kernel_address;
 
 	/*
 	 * Allocate kva space for temporary mappings.
 	 */
 	pmap_idle_map = virtual_avail;
 	virtual_avail += PAGE_SIZE * colors;
 	pmap_temp_map_1 = virtual_avail;
 	virtual_avail += PAGE_SIZE * colors;
 	pmap_temp_map_2 = virtual_avail;
 	virtual_avail += PAGE_SIZE * colors;
 
 	/*
 	 * Allocate a kernel stack with guard page for thread0 and map it
 	 * into the kernel TSB.  We must ensure that the virtual address is
 	 * colored properly for corresponding CPUs, since we're allocating
 	 * from phys_avail so the memory won't have an associated vm_page_t.
 	 */
 	pa = pmap_bootstrap_alloc(KSTACK_PAGES * PAGE_SIZE, colors);
 	kstack0_phys = pa;
 	virtual_avail += roundup(KSTACK_GUARD_PAGES, colors) * PAGE_SIZE;
 	kstack0 = virtual_avail;
 	virtual_avail += roundup(KSTACK_PAGES, colors) * PAGE_SIZE;
 	if (dcache_color_ignore == 0)
 		KASSERT(DCACHE_COLOR(kstack0) == DCACHE_COLOR(kstack0_phys),
 		    ("pmap_bootstrap: kstack0 miscolored"));
 	for (i = 0; i < KSTACK_PAGES; i++) {
 		pa = kstack0_phys + i * PAGE_SIZE;
 		va = kstack0 + i * PAGE_SIZE;
 		tp = tsb_kvtotte(va);
 		vpn = TV_VPN(va, TS_8K);
 		data = TD_V | TD_8K | TD_PA(pa) | TD_REF | TD_SW | TD_CP |
 		    TD_CV | TD_P | TD_W;
 		pmap_bootstrap_set_tte(tp, vpn, data);
 	}
 
 	/*
 	 * Calculate the last available physical address.
 	 */
 	for (i = 0; phys_avail[i + 2] != 0; i += 2)
 		;
 	Maxmem = sparc64_btop(phys_avail[i + 1]);
 
 	/*
 	 * Add the PROM mappings to the kernel TSB.
 	 */
 	if ((vmem = OF_finddevice("/virtual-memory")) == -1)
 		OF_panic("%s: finddevice /virtual-memory", __func__);
 	if ((sz = OF_getproplen(vmem, "translations")) == -1)
 		OF_panic("%s: getproplen translations", __func__);
 	if (sizeof(translations) < sz)
 		OF_panic("%s: translations too small", __func__);
 	bzero(translations, sz);
 	if (OF_getprop(vmem, "translations", translations, sz) == -1)
 		OF_panic("%s: getprop /virtual-memory/translations",
 		    __func__);
 	sz /= sizeof(*translations);
 	translations_size = sz;
-	CTR0(KTR_PMAP, "pmap_bootstrap: translations");
+#ifdef DIAGNOSTIC
+	OF_printf("pmap_bootstrap: translations\n");
+#endif
 	qsort(translations, sz, sizeof (*translations), om_cmp);
 	for (i = 0; i < sz; i++) {
-		CTR3(KTR_PMAP,
-		    "translation: start=%#lx size=%#lx tte=%#lx",
+#ifdef DIAGNOSTIC
+		OF_printf("translation: start=%#lx size=%#lx tte=%#lx\n",
 		    translations[i].om_start, translations[i].om_size,
 		    translations[i].om_tte);
+#endif
 		if ((translations[i].om_tte & TD_V) == 0)
 			continue;
 		if (translations[i].om_start < VM_MIN_PROM_ADDRESS ||
 		    translations[i].om_start > VM_MAX_PROM_ADDRESS)
 			continue;
 		for (off = 0; off < translations[i].om_size;
 		    off += PAGE_SIZE) {
 			va = translations[i].om_start + off;
 			tp = tsb_kvtotte(va);
 			vpn = TV_VPN(va, TS_8K);
 			data = ((translations[i].om_tte &
 			    ~((TD_SOFT2_MASK << TD_SOFT2_SHIFT) |
 			    (cpu_impl >= CPU_IMPL_ULTRASPARCI &&
 			    cpu_impl < CPU_IMPL_ULTRASPARCIII ?
 			    (TD_DIAG_SF_MASK << TD_DIAG_SF_SHIFT) :
 			    (TD_RSVD_CH_MASK << TD_RSVD_CH_SHIFT)) |
 			    (TD_SOFT_MASK << TD_SOFT_SHIFT))) | TD_EXEC) +
 			    off;
 			pmap_bootstrap_set_tte(tp, vpn, data);
 		}
 	}
 
 	/*
 	 * Get the available physical memory ranges from /memory/reg.  These
 	 * are only used for kernel dumps, but it may not be wise to do PROM
 	 * calls in that situation.
 	 */
 	if ((sz = OF_getproplen(pmem, "reg")) == -1)
 		OF_panic("%s: getproplen /memory/reg", __func__);
 	if (sizeof(sparc64_memreg) < sz)
 		OF_panic("%s: sparc64_memreg too small", __func__);
 	if (OF_getprop(pmem, "reg", sparc64_memreg, sz) == -1)
 		OF_panic("%s: getprop /memory/reg", __func__);
 	sparc64_nmemreg = sz / sizeof(*sparc64_memreg);
 
 	/*
 	 * Initialize the kernel pmap (which is statically allocated).
 	 */
 	pm = kernel_pmap;
 	PMAP_LOCK_INIT(pm);
 	for (i = 0; i < MAXCPU; i++)
 		pm->pm_context[i] = TLB_CTX_KERNEL;
 	CPU_FILL(&pm->pm_active);
 
 	/*
 	 * Initialize the global tte list lock, which is more commonly
 	 * known as the pmap pv global lock.
 	 */
 	rw_init(&tte_list_global_lock, "pmap pv global");
 
 	/*
 	 * Flush all non-locked TLB entries possibly left over by the
 	 * firmware.
 	 */
 	tlb_flush_nonlocked();
 }
 
 /*
  * Map the 4MB kernel TSB pages.
  */
 void
 pmap_map_tsb(void)
 {
 	vm_offset_t va;
 	vm_paddr_t pa;
 	u_long data;
 	int i;
 
 	for (i = 0; i < tsb_kernel_size; i += PAGE_SIZE_4M) {
 		va = (vm_offset_t)tsb_kernel + i;
 		pa = tsb_kernel_phys + i;
 		data = TD_V | TD_4M | TD_PA(pa) | TD_L | TD_CP | TD_CV |
 		    TD_P | TD_W;
 		stxa(AA_DMMU_TAR, ASI_DMMU, TLB_TAR_VA(va) |
 		    TLB_TAR_CTX(TLB_CTX_KERNEL));
 		stxa_sync(0, ASI_DTLB_DATA_IN_REG, data);
 	}
 }
 
 /*
  * Set the secondary context to be the kernel context (needed for FP block
  * operations in the kernel).
  */
 void
 pmap_set_kctx(void)
 {
 
 	stxa(AA_DMMU_SCXR, ASI_DMMU, (ldxa(AA_DMMU_SCXR, ASI_DMMU) &
 	    TLB_CXR_PGSZ_MASK) | TLB_CTX_KERNEL);
 	flush(KERNBASE);
 }
 
 /*
  * Allocate a physical page of memory directly from the phys_avail map.
  * Can only be called from pmap_bootstrap before avail start and end are
  * calculated.
  */
 static vm_paddr_t
 pmap_bootstrap_alloc(vm_size_t size, uint32_t colors)
 {
 	vm_paddr_t pa;
 	int i;
 
 	size = roundup(size, PAGE_SIZE * colors);
 	for (i = 0; phys_avail[i + 1] != 0; i += 2) {
 		if (phys_avail[i + 1] - phys_avail[i] < size)
 			continue;
 		pa = phys_avail[i];
 		phys_avail[i] += size;
 		return (pa);
 	}
 	OF_panic("%s: no suitable region found", __func__);
 }
 
 /*
  * Set a TTE.  This function is intended as a helper when tsb_kernel is
  * direct-mapped but we haven't taken over the trap table, yet, as it's the
  * case when we are taking advantage of ASI_ATOMIC_QUAD_LDD_PHYS to access
  * the kernel TSB.
  */
 void
 pmap_bootstrap_set_tte(struct tte *tp, u_long vpn, u_long data)
 {
 
 	if (tsb_kernel_ldd_phys == 0) {
 		tp->tte_vpn = vpn;
 		tp->tte_data = data;
 	} else {
 		stxa((vm_paddr_t)tp + offsetof(struct tte, tte_vpn),
 		    ASI_PHYS_USE_EC, vpn);
 		stxa((vm_paddr_t)tp + offsetof(struct tte, tte_data),
 		    ASI_PHYS_USE_EC, data);
 	}
 }
 
 /*
  * Initialize a vm_page's machine-dependent fields.
  */
 void
 pmap_page_init(vm_page_t m)
 {
 
 	TAILQ_INIT(&m->md.tte_list);
 	m->md.color = DCACHE_COLOR(VM_PAGE_TO_PHYS(m));
 	m->md.pmap = NULL;
 }
 
 /*
  * Initialize the pmap module.
  */
 void
 pmap_init(void)
 {
 	vm_offset_t addr;
 	vm_size_t size;
 	int result;
 	int i;
 
 	for (i = 0; i < translations_size; i++) {
 		addr = translations[i].om_start;
 		size = translations[i].om_size;
 		if ((translations[i].om_tte & TD_V) == 0)
 			continue;
 		if (addr < VM_MIN_PROM_ADDRESS || addr > VM_MAX_PROM_ADDRESS)
 			continue;
 		result = vm_map_find(kernel_map, NULL, 0, &addr, size, 0,
 		    VMFS_NO_SPACE, VM_PROT_ALL, VM_PROT_ALL, MAP_NOFAULT);
 		if (result != KERN_SUCCESS || addr != translations[i].om_start)
 			panic("pmap_init: vm_map_find");
 	}
 }
 
 /*
  * Extract the physical page address associated with the given
  * map/virtual_address pair.
  */
 vm_paddr_t
 pmap_extract(pmap_t pm, vm_offset_t va)
 {
 	struct tte *tp;
 	vm_paddr_t pa;
 
 	if (pm == kernel_pmap)
 		return (pmap_kextract(va));
 	PMAP_LOCK(pm);
 	tp = tsb_tte_lookup(pm, va);
 	if (tp == NULL)
 		pa = 0;
 	else
 		pa = TTE_GET_PA(tp) | (va & TTE_GET_PAGE_MASK(tp));
 	PMAP_UNLOCK(pm);
 	return (pa);
 }
 
 /*
  * Atomically extract and hold the physical page with the given
  * pmap and virtual address pair if that mapping permits the given
  * protection.
  */
 vm_page_t
 pmap_extract_and_hold(pmap_t pm, vm_offset_t va, vm_prot_t prot)
 {
 	struct tte *tp;
 	vm_page_t m;
 	vm_paddr_t pa;
 
 	m = NULL;
 	pa = 0;
 	PMAP_LOCK(pm);
 retry:
 	if (pm == kernel_pmap) {
 		if (va >= VM_MIN_DIRECT_ADDRESS) {
 			tp = NULL;
 			m = PHYS_TO_VM_PAGE(TLB_DIRECT_TO_PHYS(va));
 			(void)vm_page_pa_tryrelock(pm, TLB_DIRECT_TO_PHYS(va),
 			    &pa);
 			vm_page_hold(m);
 		} else {
 			tp = tsb_kvtotte(va);
 			if ((tp->tte_data & TD_V) == 0)
 				tp = NULL;
 		}
 	} else
 		tp = tsb_tte_lookup(pm, va);
 	if (tp != NULL && ((tp->tte_data & TD_SW) ||
 	    (prot & VM_PROT_WRITE) == 0)) {
 		if (vm_page_pa_tryrelock(pm, TTE_GET_PA(tp), &pa))
 			goto retry;
 		m = PHYS_TO_VM_PAGE(TTE_GET_PA(tp));
 		vm_page_hold(m);
 	}
 	PA_UNLOCK_COND(pa);
 	PMAP_UNLOCK(pm);
 	return (m);
 }
 
 /*
  * Extract the physical page address associated with the given kernel virtual
  * address.
  */
 vm_paddr_t
 pmap_kextract(vm_offset_t va)
 {
 	struct tte *tp;
 
 	if (va >= VM_MIN_DIRECT_ADDRESS)
 		return (TLB_DIRECT_TO_PHYS(va));
 	tp = tsb_kvtotte(va);
 	if ((tp->tte_data & TD_V) == 0)
 		return (0);
 	return (TTE_GET_PA(tp) | (va & TTE_GET_PAGE_MASK(tp)));
 }
 
 int
 pmap_cache_enter(vm_page_t m, vm_offset_t va)
 {
 	struct tte *tp;
 	int color;
 
 	rw_assert(&tte_list_global_lock, RA_WLOCKED);
 	KASSERT((m->flags & PG_FICTITIOUS) == 0,
 	    ("pmap_cache_enter: fake page"));
 	PMAP_STATS_INC(pmap_ncache_enter);
 
 	if (dcache_color_ignore != 0)
 		return (1);
 
 	/*
 	 * Find the color for this virtual address and note the added mapping.
 	 */
 	color = DCACHE_COLOR(va);
 	m->md.colors[color]++;
 
 	/*
 	 * If all existing mappings have the same color, the mapping is
 	 * cacheable.
 	 */
 	if (m->md.color == color) {
 		KASSERT(m->md.colors[DCACHE_OTHER_COLOR(color)] == 0,
 		    ("pmap_cache_enter: cacheable, mappings of other color"));
 		if (m->md.color == DCACHE_COLOR(VM_PAGE_TO_PHYS(m)))
 			PMAP_STATS_INC(pmap_ncache_enter_c);
 		else
 			PMAP_STATS_INC(pmap_ncache_enter_oc);
 		return (1);
 	}
 
 	/*
 	 * If there are no mappings of the other color, and the page still has
 	 * the wrong color, this must be a new mapping.  Change the color to
 	 * match the new mapping, which is cacheable.  We must flush the page
 	 * from the cache now.
 	 */
 	if (m->md.colors[DCACHE_OTHER_COLOR(color)] == 0) {
 		KASSERT(m->md.colors[color] == 1,
 		    ("pmap_cache_enter: changing color, not new mapping"));
 		dcache_page_inval(VM_PAGE_TO_PHYS(m));
 		m->md.color = color;
 		if (m->md.color == DCACHE_COLOR(VM_PAGE_TO_PHYS(m)))
 			PMAP_STATS_INC(pmap_ncache_enter_cc);
 		else
 			PMAP_STATS_INC(pmap_ncache_enter_coc);
 		return (1);
 	}
 
 	/*
 	 * If the mapping is already non-cacheable, just return.
 	 */
 	if (m->md.color == -1) {
 		PMAP_STATS_INC(pmap_ncache_enter_nc);
 		return (0);
 	}
 
 	PMAP_STATS_INC(pmap_ncache_enter_cnc);
 
 	/*
 	 * Mark all mappings as uncacheable, flush any lines with the other
 	 * color out of the dcache, and set the color to none (-1).
 	 */
 	TAILQ_FOREACH(tp, &m->md.tte_list, tte_link) {
 		atomic_clear_long(&tp->tte_data, TD_CV);
 		tlb_page_demap(TTE_GET_PMAP(tp), TTE_GET_VA(tp));
 	}
 	dcache_page_inval(VM_PAGE_TO_PHYS(m));
 	m->md.color = -1;
 	return (0);
 }
 
 static void
 pmap_cache_remove(vm_page_t m, vm_offset_t va)
 {
 	struct tte *tp;
 	int color;
 
 	rw_assert(&tte_list_global_lock, RA_WLOCKED);
 	CTR3(KTR_PMAP, "pmap_cache_remove: m=%p va=%#lx c=%d", m, va,
 	    m->md.colors[DCACHE_COLOR(va)]);
 	KASSERT((m->flags & PG_FICTITIOUS) == 0,
 	    ("pmap_cache_remove: fake page"));
 	PMAP_STATS_INC(pmap_ncache_remove);
 
 	if (dcache_color_ignore != 0)
 		return;
 
 	KASSERT(m->md.colors[DCACHE_COLOR(va)] > 0,
 	    ("pmap_cache_remove: no mappings %d <= 0",
 	    m->md.colors[DCACHE_COLOR(va)]));
 
 	/*
 	 * Find the color for this virtual address and note the removal of
 	 * the mapping.
 	 */
 	color = DCACHE_COLOR(va);
 	m->md.colors[color]--;
 
 	/*
 	 * If the page is cacheable, just return and keep the same color, even
 	 * if there are no longer any mappings.
 	 */
 	if (m->md.color != -1) {
 		if (m->md.color == DCACHE_COLOR(VM_PAGE_TO_PHYS(m)))
 			PMAP_STATS_INC(pmap_ncache_remove_c);
 		else
 			PMAP_STATS_INC(pmap_ncache_remove_oc);
 		return;
 	}
 
 	KASSERT(m->md.colors[DCACHE_OTHER_COLOR(color)] != 0,
 	    ("pmap_cache_remove: uncacheable, no mappings of other color"));
 
 	/*
 	 * If the page is not cacheable (color is -1), and the number of
 	 * mappings for this color is not zero, just return.  There are
 	 * mappings of the other color still, so remain non-cacheable.
 	 */
 	if (m->md.colors[color] != 0) {
 		PMAP_STATS_INC(pmap_ncache_remove_nc);
 		return;
 	}
 
 	/*
 	 * The number of mappings for this color is now zero.  Recache the
 	 * other colored mappings, and change the page color to the other
 	 * color.  There should be no lines in the data cache for this page,
 	 * so flushing should not be needed.
 	 */
 	TAILQ_FOREACH(tp, &m->md.tte_list, tte_link) {
 		atomic_set_long(&tp->tte_data, TD_CV);
 		tlb_page_demap(TTE_GET_PMAP(tp), TTE_GET_VA(tp));
 	}
 	m->md.color = DCACHE_OTHER_COLOR(color);
 
 	if (m->md.color == DCACHE_COLOR(VM_PAGE_TO_PHYS(m)))
 		PMAP_STATS_INC(pmap_ncache_remove_cc);
 	else
 		PMAP_STATS_INC(pmap_ncache_remove_coc);
 }
 
 /*
  * Map a wired page into kernel virtual address space.
  */
 void
 pmap_kenter(vm_offset_t va, vm_page_t m)
 {
 	vm_offset_t ova;
 	struct tte *tp;
 	vm_page_t om;
 	u_long data;
 
 	rw_assert(&tte_list_global_lock, RA_WLOCKED);
 	PMAP_STATS_INC(pmap_nkenter);
 	tp = tsb_kvtotte(va);
 	CTR4(KTR_PMAP, "pmap_kenter: va=%#lx pa=%#lx tp=%p data=%#lx",
 	    va, VM_PAGE_TO_PHYS(m), tp, tp->tte_data);
 	if (DCACHE_COLOR(VM_PAGE_TO_PHYS(m)) != DCACHE_COLOR(va)) {
 		CTR5(KTR_SPARE2,
 	"pmap_kenter: off color va=%#lx pa=%#lx o=%p ot=%d pi=%#lx",
 		    va, VM_PAGE_TO_PHYS(m), m->object,
 		    m->object ? m->object->type : -1,
 		    m->pindex);
 		PMAP_STATS_INC(pmap_nkenter_oc);
 	}
 	if ((tp->tte_data & TD_V) != 0) {
 		om = PHYS_TO_VM_PAGE(TTE_GET_PA(tp));
 		ova = TTE_GET_VA(tp);
 		if (m == om && va == ova) {
 			PMAP_STATS_INC(pmap_nkenter_stupid);
 			return;
 		}
 		TAILQ_REMOVE(&om->md.tte_list, tp, tte_link);
 		pmap_cache_remove(om, ova);
 		if (va != ova)
 			tlb_page_demap(kernel_pmap, ova);
 	}
 	data = TD_V | TD_8K | VM_PAGE_TO_PHYS(m) | TD_REF | TD_SW | TD_CP |
 	    TD_P | TD_W;
 	if (pmap_cache_enter(m, va) != 0)
 		data |= TD_CV;
 	tp->tte_vpn = TV_VPN(va, TS_8K);
 	tp->tte_data = data;
 	TAILQ_INSERT_TAIL(&m->md.tte_list, tp, tte_link);
 }
 
 /*
  * Map a wired page into kernel virtual address space.  This additionally
  * takes a flag argument which is or'ed to the TTE data.  This is used by
  * sparc64_bus_mem_map().
  * NOTE: if the mapping is non-cacheable, it's the caller's responsibility
  * to flush entries that might still be in the cache, if applicable.
  */
 void
 pmap_kenter_flags(vm_offset_t va, vm_paddr_t pa, u_long flags)
 {
 	struct tte *tp;
 
 	tp = tsb_kvtotte(va);
 	CTR4(KTR_PMAP, "pmap_kenter_flags: va=%#lx pa=%#lx tp=%p data=%#lx",
 	    va, pa, tp, tp->tte_data);
 	tp->tte_vpn = TV_VPN(va, TS_8K);
 	tp->tte_data = TD_V | TD_8K | TD_PA(pa) | TD_REF | TD_P | flags;
 }
 
 /*
  * Remove a wired page from kernel virtual address space.
  */
 void
 pmap_kremove(vm_offset_t va)
 {
 	struct tte *tp;
 	vm_page_t m;
 
 	rw_assert(&tte_list_global_lock, RA_WLOCKED);
 	PMAP_STATS_INC(pmap_nkremove);
 	tp = tsb_kvtotte(va);
 	CTR3(KTR_PMAP, "pmap_kremove: va=%#lx tp=%p data=%#lx", va, tp,
 	    tp->tte_data);
 	if ((tp->tte_data & TD_V) == 0)
 		return;
 	m = PHYS_TO_VM_PAGE(TTE_GET_PA(tp));
 	TAILQ_REMOVE(&m->md.tte_list, tp, tte_link);
 	pmap_cache_remove(m, va);
 	TTE_ZERO(tp);
 }
 
 /*
  * Inverse of pmap_kenter_flags, used by bus_space_unmap().
  */
 void
 pmap_kremove_flags(vm_offset_t va)
 {
 	struct tte *tp;
 
 	tp = tsb_kvtotte(va);
 	CTR3(KTR_PMAP, "pmap_kremove_flags: va=%#lx tp=%p data=%#lx", va, tp,
 	    tp->tte_data);
 	TTE_ZERO(tp);
 }
 
 /*
  * Map a range of physical addresses into kernel virtual address space.
  *
  * The value passed in *virt is a suggested virtual address for the mapping.
  * Architectures which can support a direct-mapped physical to virtual region
  * can return the appropriate address within that region, leaving '*virt'
  * unchanged.
  */
 vm_offset_t
 pmap_map(vm_offset_t *virt, vm_paddr_t start, vm_paddr_t end, int prot)
 {
 
 	return (TLB_PHYS_TO_DIRECT(start));
 }
 
 /*
  * Map a list of wired pages into kernel virtual address space.  This is
  * intended for temporary mappings which do not need page modification or
  * references recorded.  Existing mappings in the region are overwritten.
  */
 void
 pmap_qenter(vm_offset_t sva, vm_page_t *m, int count)
 {
 	vm_offset_t va;
 
 	PMAP_STATS_INC(pmap_nqenter);
 	va = sva;
 	rw_wlock(&tte_list_global_lock);
 	while (count-- > 0) {
 		pmap_kenter(va, *m);
 		va += PAGE_SIZE;
 		m++;
 	}
 	rw_wunlock(&tte_list_global_lock);
 	tlb_range_demap(kernel_pmap, sva, va);
 }
 
 /*
  * Remove page mappings from kernel virtual address space.  Intended for
  * temporary mappings entered by pmap_qenter.
  */
 void
 pmap_qremove(vm_offset_t sva, int count)
 {
 	vm_offset_t va;
 
 	PMAP_STATS_INC(pmap_nqremove);
 	va = sva;
 	rw_wlock(&tte_list_global_lock);
 	while (count-- > 0) {
 		pmap_kremove(va);
 		va += PAGE_SIZE;
 	}
 	rw_wunlock(&tte_list_global_lock);
 	tlb_range_demap(kernel_pmap, sva, va);
 }
 
 /*
  * Initialize the pmap associated with process 0.
  */
 void
 pmap_pinit0(pmap_t pm)
 {
 	int i;
 
 	PMAP_LOCK_INIT(pm);
 	for (i = 0; i < MAXCPU; i++)
 		pm->pm_context[i] = TLB_CTX_KERNEL;
 	CPU_ZERO(&pm->pm_active);
 	pm->pm_tsb = NULL;
 	pm->pm_tsb_obj = NULL;
 	bzero(&pm->pm_stats, sizeof(pm->pm_stats));
 }
 
 /*
  * Initialize a preallocated and zeroed pmap structure, such as one in a
  * vmspace structure.
  */
 int
 pmap_pinit(pmap_t pm)
 {
 	vm_page_t ma[TSB_PAGES];
 	vm_page_t m;
 	int i;
 
 	/*
 	 * Allocate KVA space for the TSB.
 	 */
 	if (pm->pm_tsb == NULL) {
 		pm->pm_tsb = (struct tte *)kva_alloc(TSB_BSIZE);
 		if (pm->pm_tsb == NULL)
 			return (0);
 		}
 
 	/*
 	 * Allocate an object for it.
 	 */
 	if (pm->pm_tsb_obj == NULL)
 		pm->pm_tsb_obj = vm_object_allocate(OBJT_PHYS, TSB_PAGES);
 
 	for (i = 0; i < MAXCPU; i++)
 		pm->pm_context[i] = -1;
 	CPU_ZERO(&pm->pm_active);
 
 	VM_OBJECT_WLOCK(pm->pm_tsb_obj);
 	for (i = 0; i < TSB_PAGES; i++) {
 		m = vm_page_grab(pm->pm_tsb_obj, i, VM_ALLOC_NOBUSY |
 		    VM_ALLOC_WIRED | VM_ALLOC_ZERO);
 		m->valid = VM_PAGE_BITS_ALL;
 		m->md.pmap = pm;
 		ma[i] = m;
 	}
 	VM_OBJECT_WUNLOCK(pm->pm_tsb_obj);
 	pmap_qenter((vm_offset_t)pm->pm_tsb, ma, TSB_PAGES);
 
 	bzero(&pm->pm_stats, sizeof(pm->pm_stats));
 	return (1);
 }
 
 /*
  * Release any resources held by the given physical map.
  * Called when a pmap initialized by pmap_pinit is being released.
  * Should only be called if the map contains no valid mappings.
  */
 void
 pmap_release(pmap_t pm)
 {
 	vm_object_t obj;
 	vm_page_t m;
 #ifdef SMP
 	struct pcpu *pc;
 #endif
 
 	CTR2(KTR_PMAP, "pmap_release: ctx=%#x tsb=%p",
 	    pm->pm_context[curcpu], pm->pm_tsb);
 	KASSERT(pmap_resident_count(pm) == 0,
 	    ("pmap_release: resident pages %ld != 0",
 	    pmap_resident_count(pm)));
 
 	/*
 	 * After the pmap was freed, it might be reallocated to a new process.
 	 * When switching, this might lead us to wrongly assume that we need
 	 * not switch contexts because old and new pmap pointer are equal.
 	 * Therefore, make sure that this pmap is not referenced by any PCPU
 	 * pointer any more.  This could happen in two cases:
 	 * - A process that referenced the pmap is currently exiting on a CPU.
 	 *   However, it is guaranteed to not switch in any more after setting
 	 *   its state to PRS_ZOMBIE.
 	 * - A process that referenced this pmap ran on a CPU, but we switched
 	 *   to a kernel thread, leaving the pmap pointer unchanged.
 	 */
 #ifdef SMP
 	sched_pin();
 	STAILQ_FOREACH(pc, &cpuhead, pc_allcpu)
 		atomic_cmpset_rel_ptr((uintptr_t *)&pc->pc_pmap,
 		    (uintptr_t)pm, (uintptr_t)NULL);
 	sched_unpin();
 #else
 	critical_enter();
 	if (PCPU_GET(pmap) == pm)
 		PCPU_SET(pmap, NULL);
 	critical_exit();
 #endif
 
 	pmap_qremove((vm_offset_t)pm->pm_tsb, TSB_PAGES);
 	obj = pm->pm_tsb_obj;
 	VM_OBJECT_WLOCK(obj);
 	KASSERT(obj->ref_count == 1, ("pmap_release: tsbobj ref count != 1"));
 	while (!TAILQ_EMPTY(&obj->memq)) {
 		m = TAILQ_FIRST(&obj->memq);
 		m->md.pmap = NULL;
 		m->wire_count--;
 		atomic_subtract_int(&cnt.v_wire_count, 1);
 		vm_page_free_zero(m);
 	}
 	VM_OBJECT_WUNLOCK(obj);
 }
 
 /*
  * Grow the number of kernel page table entries.  Unneeded.
  */
 void
 pmap_growkernel(vm_offset_t addr)
 {
 
 	panic("pmap_growkernel: can't grow kernel");
 }
 
 int
 pmap_remove_tte(struct pmap *pm, struct pmap *pm2, struct tte *tp,
     vm_offset_t va)
 {
 	vm_page_t m;
 	u_long data;
 
 	rw_assert(&tte_list_global_lock, RA_WLOCKED);
 	data = atomic_readandclear_long(&tp->tte_data);
 	if ((data & TD_FAKE) == 0) {
 		m = PHYS_TO_VM_PAGE(TD_PA(data));
 		TAILQ_REMOVE(&m->md.tte_list, tp, tte_link);
 		if ((data & TD_WIRED) != 0)
 			pm->pm_stats.wired_count--;
 		if ((data & TD_PV) != 0) {
 			if ((data & TD_W) != 0)
 				vm_page_dirty(m);
 			if ((data & TD_REF) != 0)
 				vm_page_aflag_set(m, PGA_REFERENCED);
 			if (TAILQ_EMPTY(&m->md.tte_list))
 				vm_page_aflag_clear(m, PGA_WRITEABLE);
 			pm->pm_stats.resident_count--;
 		}
 		pmap_cache_remove(m, va);
 	}
 	TTE_ZERO(tp);
 	if (PMAP_REMOVE_DONE(pm))
 		return (0);
 	return (1);
 }
 
 /*
  * Remove the given range of addresses from the specified map.
  */
 void
 pmap_remove(pmap_t pm, vm_offset_t start, vm_offset_t end)
 {
 	struct tte *tp;
 	vm_offset_t va;
 
 	CTR3(KTR_PMAP, "pmap_remove: ctx=%#lx start=%#lx end=%#lx",
 	    pm->pm_context[curcpu], start, end);
 	if (PMAP_REMOVE_DONE(pm))
 		return;
 	rw_wlock(&tte_list_global_lock);
 	PMAP_LOCK(pm);
 	if (end - start > PMAP_TSB_THRESH) {
 		tsb_foreach(pm, NULL, start, end, pmap_remove_tte);
 		tlb_context_demap(pm);
 	} else {
 		for (va = start; va < end; va += PAGE_SIZE)
 			if ((tp = tsb_tte_lookup(pm, va)) != NULL &&
 			    !pmap_remove_tte(pm, NULL, tp, va))
 				break;
 		tlb_range_demap(pm, start, end - 1);
 	}
 	PMAP_UNLOCK(pm);
 	rw_wunlock(&tte_list_global_lock);
 }
 
 void
 pmap_remove_all(vm_page_t m)
 {
 	struct pmap *pm;
 	struct tte *tpn;
 	struct tte *tp;
 	vm_offset_t va;
 
 	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
 	    ("pmap_remove_all: page %p is not managed", m));
 	rw_wlock(&tte_list_global_lock);
 	for (tp = TAILQ_FIRST(&m->md.tte_list); tp != NULL; tp = tpn) {
 		tpn = TAILQ_NEXT(tp, tte_link);
 		if ((tp->tte_data & TD_PV) == 0)
 			continue;
 		pm = TTE_GET_PMAP(tp);
 		va = TTE_GET_VA(tp);
 		PMAP_LOCK(pm);
 		if ((tp->tte_data & TD_WIRED) != 0)
 			pm->pm_stats.wired_count--;
 		if ((tp->tte_data & TD_REF) != 0)
 			vm_page_aflag_set(m, PGA_REFERENCED);
 		if ((tp->tte_data & TD_W) != 0)
 			vm_page_dirty(m);
 		tp->tte_data &= ~TD_V;
 		tlb_page_demap(pm, va);
 		TAILQ_REMOVE(&m->md.tte_list, tp, tte_link);
 		pm->pm_stats.resident_count--;
 		pmap_cache_remove(m, va);
 		TTE_ZERO(tp);
 		PMAP_UNLOCK(pm);
 	}
 	vm_page_aflag_clear(m, PGA_WRITEABLE);
 	rw_wunlock(&tte_list_global_lock);
 }
 
 static int
 pmap_protect_tte(struct pmap *pm, struct pmap *pm2, struct tte *tp,
     vm_offset_t va)
 {
 	u_long data;
 	vm_page_t m;
 
 	PMAP_LOCK_ASSERT(pm, MA_OWNED);
 	data = atomic_clear_long(&tp->tte_data, TD_SW | TD_W);
 	if ((data & (TD_PV | TD_W)) == (TD_PV | TD_W)) {
 		m = PHYS_TO_VM_PAGE(TD_PA(data));
 		vm_page_dirty(m);
 	}
 	return (1);
 }
 
 /*
  * Set the physical protection on the specified range of this map as requested.
  */
 void
 pmap_protect(pmap_t pm, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot)
 {
 	vm_offset_t va;
 	struct tte *tp;
 
 	CTR4(KTR_PMAP, "pmap_protect: ctx=%#lx sva=%#lx eva=%#lx prot=%#lx",
 	    pm->pm_context[curcpu], sva, eva, prot);
 
 	if ((prot & VM_PROT_READ) == VM_PROT_NONE) {
 		pmap_remove(pm, sva, eva);
 		return;
 	}
 
 	if (prot & VM_PROT_WRITE)
 		return;
 
 	PMAP_LOCK(pm);
 	if (eva - sva > PMAP_TSB_THRESH) {
 		tsb_foreach(pm, NULL, sva, eva, pmap_protect_tte);
 		tlb_context_demap(pm);
 	} else {
 		for (va = sva; va < eva; va += PAGE_SIZE)
 			if ((tp = tsb_tte_lookup(pm, va)) != NULL)
 				pmap_protect_tte(pm, NULL, tp, va);
 		tlb_range_demap(pm, sva, eva - 1);
 	}
 	PMAP_UNLOCK(pm);
 }
 
 /*
  * Map the given physical page at the specified virtual address in the
  * target pmap with the protection requested.  If specified the page
  * will be wired down.
  */
 int
 pmap_enter(pmap_t pm, vm_offset_t va, vm_page_t m, vm_prot_t prot,
     u_int flags, int8_t psind)
 {
 	int rv;
 
 	rw_wlock(&tte_list_global_lock);
 	PMAP_LOCK(pm);
 	rv = pmap_enter_locked(pm, va, m, prot, flags, psind);
 	rw_wunlock(&tte_list_global_lock);
 	PMAP_UNLOCK(pm);
 	return (rv);
 }
 
 /*
  * Map the given physical page at the specified virtual address in the
  * target pmap with the protection requested.  If specified the page
  * will be wired down.
  *
  * The page queues and pmap must be locked.
  */
 static int
 pmap_enter_locked(pmap_t pm, vm_offset_t va, vm_page_t m, vm_prot_t prot,
     u_int flags, int8_t psind __unused)
 {
 	struct tte *tp;
 	vm_paddr_t pa;
 	vm_page_t real;
 	u_long data;
 	boolean_t wired;
 
 	rw_assert(&tte_list_global_lock, RA_WLOCKED);
 	PMAP_LOCK_ASSERT(pm, MA_OWNED);
 	if ((m->oflags & VPO_UNMANAGED) == 0 && !vm_page_xbusied(m))
 		VM_OBJECT_ASSERT_LOCKED(m->object);
 	PMAP_STATS_INC(pmap_nenter);
 	pa = VM_PAGE_TO_PHYS(m);
 	wired = (flags & PMAP_ENTER_WIRED) != 0;
 
 	/*
 	 * If this is a fake page from the device_pager, but it covers actual
 	 * physical memory, convert to the real backing page.
 	 */
 	if ((m->flags & PG_FICTITIOUS) != 0) {
 		real = vm_phys_paddr_to_vm_page(pa);
 		if (real != NULL)
 			m = real;
 	}
 
 	CTR6(KTR_PMAP,
 	    "pmap_enter_locked: ctx=%p m=%p va=%#lx pa=%#lx prot=%#x wired=%d",
 	    pm->pm_context[curcpu], m, va, pa, prot, wired);
 
 	/*
 	 * If there is an existing mapping, and the physical address has not
 	 * changed, must be protection or wiring change.
 	 */
 	if ((tp = tsb_tte_lookup(pm, va)) != NULL && TTE_GET_PA(tp) == pa) {
 		CTR0(KTR_PMAP, "pmap_enter_locked: update");
 		PMAP_STATS_INC(pmap_nenter_update);
 
 		/*
 		 * Wiring change, just update stats.
 		 */
 		if (wired) {
 			if ((tp->tte_data & TD_WIRED) == 0) {
 				tp->tte_data |= TD_WIRED;
 				pm->pm_stats.wired_count++;
 			}
 		} else {
 			if ((tp->tte_data & TD_WIRED) != 0) {
 				tp->tte_data &= ~TD_WIRED;
 				pm->pm_stats.wired_count--;
 			}
 		}
 
 		/*
 		 * Save the old bits and clear the ones we're interested in.
 		 */
 		data = tp->tte_data;
 		tp->tte_data &= ~(TD_EXEC | TD_SW | TD_W);
 
 		/*
 		 * If we're turning off write permissions, sense modify status.
 		 */
 		if ((prot & VM_PROT_WRITE) != 0) {
 			tp->tte_data |= TD_SW;
 			if (wired)
 				tp->tte_data |= TD_W;
 			if ((m->oflags & VPO_UNMANAGED) == 0)
 				vm_page_aflag_set(m, PGA_WRITEABLE);
 		} else if ((data & TD_W) != 0)
 			vm_page_dirty(m);
 
 		/*
 		 * If we're turning on execute permissions, flush the icache.
 		 */
 		if ((prot & VM_PROT_EXECUTE) != 0) {
 			if ((data & TD_EXEC) == 0)
 				icache_page_inval(pa);
 			tp->tte_data |= TD_EXEC;
 		}
 
 		/*
 		 * Delete the old mapping.
 		 */
 		tlb_page_demap(pm, TTE_GET_VA(tp));
 	} else {
 		/*
 		 * If there is an existing mapping, but its for a different
 		 * physical address, delete the old mapping.
 		 */
 		if (tp != NULL) {
 			CTR0(KTR_PMAP, "pmap_enter_locked: replace");
 			PMAP_STATS_INC(pmap_nenter_replace);
 			pmap_remove_tte(pm, NULL, tp, va);
 			tlb_page_demap(pm, va);
 		} else {
 			CTR0(KTR_PMAP, "pmap_enter_locked: new");
 			PMAP_STATS_INC(pmap_nenter_new);
 		}
 
 		/*
 		 * Now set up the data and install the new mapping.
 		 */
 		data = TD_V | TD_8K | TD_PA(pa);
 		if (pm == kernel_pmap)
 			data |= TD_P;
 		if ((prot & VM_PROT_WRITE) != 0) {
 			data |= TD_SW;
 			if ((m->oflags & VPO_UNMANAGED) == 0)
 				vm_page_aflag_set(m, PGA_WRITEABLE);
 		}
 		if (prot & VM_PROT_EXECUTE) {
 			data |= TD_EXEC;
 			icache_page_inval(pa);
 		}
 
 		/*
 		 * If its wired update stats.  We also don't need reference or
 		 * modify tracking for wired mappings, so set the bits now.
 		 */
 		if (wired) {
 			pm->pm_stats.wired_count++;
 			data |= TD_REF | TD_WIRED;
 			if ((prot & VM_PROT_WRITE) != 0)
 				data |= TD_W;
 		}
 
 		tsb_tte_enter(pm, m, va, TS_8K, data);
 	}
 
 	return (KERN_SUCCESS);
 }
 
 /*
  * Maps a sequence of resident pages belonging to the same object.
  * The sequence begins with the given page m_start.  This page is
  * mapped at the given virtual address start.  Each subsequent page is
  * mapped at a virtual address that is offset from start by the same
  * amount as the page is offset from m_start within the object.  The
  * last page in the sequence is the page with the largest offset from
  * m_start that can be mapped at a virtual address less than the given
  * virtual address end.  Not every virtual page between start and end
  * is mapped; only those for which a resident page exists with the
  * corresponding offset from m_start are mapped.
  */
 void
 pmap_enter_object(pmap_t pm, vm_offset_t start, vm_offset_t end,
     vm_page_t m_start, vm_prot_t prot)
 {
 	vm_page_t m;
 	vm_pindex_t diff, psize;
 
 	VM_OBJECT_ASSERT_LOCKED(m_start->object);
 
 	psize = atop(end - start);
 	m = m_start;
 	rw_wlock(&tte_list_global_lock);
 	PMAP_LOCK(pm);
 	while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) {
 		pmap_enter_locked(pm, start + ptoa(diff), m, prot &
 		    (VM_PROT_READ | VM_PROT_EXECUTE), 0, 0);
 		m = TAILQ_NEXT(m, listq);
 	}
 	rw_wunlock(&tte_list_global_lock);
 	PMAP_UNLOCK(pm);
 }
 
 void
 pmap_enter_quick(pmap_t pm, vm_offset_t va, vm_page_t m, vm_prot_t prot)
 {
 
 	rw_wlock(&tte_list_global_lock);
 	PMAP_LOCK(pm);
 	pmap_enter_locked(pm, va, m, prot & (VM_PROT_READ | VM_PROT_EXECUTE),
 	    0, 0);
 	rw_wunlock(&tte_list_global_lock);
 	PMAP_UNLOCK(pm);
 }
 
 void
 pmap_object_init_pt(pmap_t pm, vm_offset_t addr, vm_object_t object,
     vm_pindex_t pindex, vm_size_t size)
 {
 
 	VM_OBJECT_ASSERT_WLOCKED(object);
 	KASSERT(object->type == OBJT_DEVICE || object->type == OBJT_SG,
 	    ("pmap_object_init_pt: non-device object"));
 }
 
 static int
 pmap_unwire_tte(pmap_t pm, pmap_t pm2, struct tte *tp, vm_offset_t va)
 {
 
 	PMAP_LOCK_ASSERT(pm, MA_OWNED);
 	if ((tp->tte_data & TD_WIRED) == 0)
 		panic("pmap_unwire_tte: tp %p is missing TD_WIRED", tp);
 	atomic_clear_long(&tp->tte_data, TD_WIRED);
 	pm->pm_stats.wired_count--;
 	return (1);
 }
 
 /*
  * Clear the wired attribute from the mappings for the specified range of
  * addresses in the given pmap.  Every valid mapping within that range must
  * have the wired attribute set.  In contrast, invalid mappings cannot have
  * the wired attribute set, so they are ignored.
  *
  * The wired attribute of the translation table entry is not a hardware
  * feature, so there is no need to invalidate any TLB entries.
  */
 void
 pmap_unwire(pmap_t pm, vm_offset_t sva, vm_offset_t eva)
 {
 	vm_offset_t va;
 	struct tte *tp;
 
 	PMAP_LOCK(pm);
 	if (eva - sva > PMAP_TSB_THRESH)
 		tsb_foreach(pm, NULL, sva, eva, pmap_unwire_tte);
 	else {
 		for (va = sva; va < eva; va += PAGE_SIZE)
 			if ((tp = tsb_tte_lookup(pm, va)) != NULL)
 				pmap_unwire_tte(pm, NULL, tp, va);
 	}
 	PMAP_UNLOCK(pm);
 }
 
 static int
 pmap_copy_tte(pmap_t src_pmap, pmap_t dst_pmap, struct tte *tp,
     vm_offset_t va)
 {
 	vm_page_t m;
 	u_long data;
 
 	if ((tp->tte_data & TD_FAKE) != 0)
 		return (1);
 	if (tsb_tte_lookup(dst_pmap, va) == NULL) {
 		data = tp->tte_data &
 		    ~(TD_PV | TD_REF | TD_SW | TD_CV | TD_W);
 		m = PHYS_TO_VM_PAGE(TTE_GET_PA(tp));
 		tsb_tte_enter(dst_pmap, m, va, TS_8K, data);
 	}
 	return (1);
 }
 
 void
 pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr,
     vm_size_t len, vm_offset_t src_addr)
 {
 	struct tte *tp;
 	vm_offset_t va;
 
 	if (dst_addr != src_addr)
 		return;
 	rw_wlock(&tte_list_global_lock);
 	if (dst_pmap < src_pmap) {
 		PMAP_LOCK(dst_pmap);
 		PMAP_LOCK(src_pmap);
 	} else {
 		PMAP_LOCK(src_pmap);
 		PMAP_LOCK(dst_pmap);
 	}
 	if (len > PMAP_TSB_THRESH) {
 		tsb_foreach(src_pmap, dst_pmap, src_addr, src_addr + len,
 		    pmap_copy_tte);
 		tlb_context_demap(dst_pmap);
 	} else {
 		for (va = src_addr; va < src_addr + len; va += PAGE_SIZE)
 			if ((tp = tsb_tte_lookup(src_pmap, va)) != NULL)
 				pmap_copy_tte(src_pmap, dst_pmap, tp, va);
 		tlb_range_demap(dst_pmap, src_addr, src_addr + len - 1);
 	}
 	rw_wunlock(&tte_list_global_lock);
 	PMAP_UNLOCK(src_pmap);
 	PMAP_UNLOCK(dst_pmap);
 }
 
 void
 pmap_zero_page(vm_page_t m)
 {
 	struct tte *tp;
 	vm_offset_t va;
 	vm_paddr_t pa;
 
 	KASSERT((m->flags & PG_FICTITIOUS) == 0,
 	    ("pmap_zero_page: fake page"));
 	PMAP_STATS_INC(pmap_nzero_page);
 	pa = VM_PAGE_TO_PHYS(m);
 	if (dcache_color_ignore != 0 || m->md.color == DCACHE_COLOR(pa)) {
 		PMAP_STATS_INC(pmap_nzero_page_c);
 		va = TLB_PHYS_TO_DIRECT(pa);
 		cpu_block_zero((void *)va, PAGE_SIZE);
 	} else if (m->md.color == -1) {
 		PMAP_STATS_INC(pmap_nzero_page_nc);
 		aszero(ASI_PHYS_USE_EC, pa, PAGE_SIZE);
 	} else {
 		PMAP_STATS_INC(pmap_nzero_page_oc);
 		PMAP_LOCK(kernel_pmap);
 		va = pmap_temp_map_1 + (m->md.color * PAGE_SIZE);
 		tp = tsb_kvtotte(va);
 		tp->tte_data = TD_V | TD_8K | TD_PA(pa) | TD_CP | TD_CV | TD_W;
 		tp->tte_vpn = TV_VPN(va, TS_8K);
 		cpu_block_zero((void *)va, PAGE_SIZE);
 		tlb_page_demap(kernel_pmap, va);
 		PMAP_UNLOCK(kernel_pmap);
 	}
 }
 
 void
 pmap_zero_page_area(vm_page_t m, int off, int size)
 {
 	struct tte *tp;
 	vm_offset_t va;
 	vm_paddr_t pa;
 
 	KASSERT((m->flags & PG_FICTITIOUS) == 0,
 	    ("pmap_zero_page_area: fake page"));
 	KASSERT(off + size <= PAGE_SIZE, ("pmap_zero_page_area: bad off/size"));
 	PMAP_STATS_INC(pmap_nzero_page_area);
 	pa = VM_PAGE_TO_PHYS(m);
 	if (dcache_color_ignore != 0 || m->md.color == DCACHE_COLOR(pa)) {
 		PMAP_STATS_INC(pmap_nzero_page_area_c);
 		va = TLB_PHYS_TO_DIRECT(pa);
 		bzero((void *)(va + off), size);
 	} else if (m->md.color == -1) {
 		PMAP_STATS_INC(pmap_nzero_page_area_nc);
 		aszero(ASI_PHYS_USE_EC, pa + off, size);
 	} else {
 		PMAP_STATS_INC(pmap_nzero_page_area_oc);
 		PMAP_LOCK(kernel_pmap);
 		va = pmap_temp_map_1 + (m->md.color * PAGE_SIZE);
 		tp = tsb_kvtotte(va);
 		tp->tte_data = TD_V | TD_8K | TD_PA(pa) | TD_CP | TD_CV | TD_W;
 		tp->tte_vpn = TV_VPN(va, TS_8K);
 		bzero((void *)(va + off), size);
 		tlb_page_demap(kernel_pmap, va);
 		PMAP_UNLOCK(kernel_pmap);
 	}
 }
 
 void
 pmap_zero_page_idle(vm_page_t m)
 {
 	struct tte *tp;
 	vm_offset_t va;
 	vm_paddr_t pa;
 
 	KASSERT((m->flags & PG_FICTITIOUS) == 0,
 	    ("pmap_zero_page_idle: fake page"));
 	PMAP_STATS_INC(pmap_nzero_page_idle);
 	pa = VM_PAGE_TO_PHYS(m);
 	if (dcache_color_ignore != 0 || m->md.color == DCACHE_COLOR(pa)) {
 		PMAP_STATS_INC(pmap_nzero_page_idle_c);
 		va = TLB_PHYS_TO_DIRECT(pa);
 		cpu_block_zero((void *)va, PAGE_SIZE);
 	} else if (m->md.color == -1) {
 		PMAP_STATS_INC(pmap_nzero_page_idle_nc);
 		aszero(ASI_PHYS_USE_EC, pa, PAGE_SIZE);
 	} else {
 		PMAP_STATS_INC(pmap_nzero_page_idle_oc);
 		va = pmap_idle_map + (m->md.color * PAGE_SIZE);
 		tp = tsb_kvtotte(va);
 		tp->tte_data = TD_V | TD_8K | TD_PA(pa) | TD_CP | TD_CV | TD_W;
 		tp->tte_vpn = TV_VPN(va, TS_8K);
 		cpu_block_zero((void *)va, PAGE_SIZE);
 		tlb_page_demap(kernel_pmap, va);
 	}
 }
 
 void
 pmap_copy_page(vm_page_t msrc, vm_page_t mdst)
 {
 	vm_offset_t vdst;
 	vm_offset_t vsrc;
 	vm_paddr_t pdst;
 	vm_paddr_t psrc;
 	struct tte *tp;
 
 	KASSERT((mdst->flags & PG_FICTITIOUS) == 0,
 	    ("pmap_copy_page: fake dst page"));
 	KASSERT((msrc->flags & PG_FICTITIOUS) == 0,
 	    ("pmap_copy_page: fake src page"));
 	PMAP_STATS_INC(pmap_ncopy_page);
 	pdst = VM_PAGE_TO_PHYS(mdst);
 	psrc = VM_PAGE_TO_PHYS(msrc);
 	if (dcache_color_ignore != 0 ||
 	    (msrc->md.color == DCACHE_COLOR(psrc) &&
 	    mdst->md.color == DCACHE_COLOR(pdst))) {
 		PMAP_STATS_INC(pmap_ncopy_page_c);
 		vdst = TLB_PHYS_TO_DIRECT(pdst);
 		vsrc = TLB_PHYS_TO_DIRECT(psrc);
 		cpu_block_copy((void *)vsrc, (void *)vdst, PAGE_SIZE);
 	} else if (msrc->md.color == -1 && mdst->md.color == -1) {
 		PMAP_STATS_INC(pmap_ncopy_page_nc);
 		ascopy(ASI_PHYS_USE_EC, psrc, pdst, PAGE_SIZE);
 	} else if (msrc->md.color == -1) {
 		if (mdst->md.color == DCACHE_COLOR(pdst)) {
 			PMAP_STATS_INC(pmap_ncopy_page_dc);
 			vdst = TLB_PHYS_TO_DIRECT(pdst);
 			ascopyfrom(ASI_PHYS_USE_EC, psrc, (void *)vdst,
 			    PAGE_SIZE);
 		} else {
 			PMAP_STATS_INC(pmap_ncopy_page_doc);
 			PMAP_LOCK(kernel_pmap);
 			vdst = pmap_temp_map_1 + (mdst->md.color * PAGE_SIZE);
 			tp = tsb_kvtotte(vdst);
 			tp->tte_data =
 			    TD_V | TD_8K | TD_PA(pdst) | TD_CP | TD_CV | TD_W;
 			tp->tte_vpn = TV_VPN(vdst, TS_8K);
 			ascopyfrom(ASI_PHYS_USE_EC, psrc, (void *)vdst,
 			    PAGE_SIZE);
 			tlb_page_demap(kernel_pmap, vdst);
 			PMAP_UNLOCK(kernel_pmap);
 		}
 	} else if (mdst->md.color == -1) {
 		if (msrc->md.color == DCACHE_COLOR(psrc)) {
 			PMAP_STATS_INC(pmap_ncopy_page_sc);
 			vsrc = TLB_PHYS_TO_DIRECT(psrc);
 			ascopyto((void *)vsrc, ASI_PHYS_USE_EC, pdst,
 			    PAGE_SIZE);
 		} else {
 			PMAP_STATS_INC(pmap_ncopy_page_soc);
 			PMAP_LOCK(kernel_pmap);
 			vsrc = pmap_temp_map_1 + (msrc->md.color * PAGE_SIZE);
 			tp = tsb_kvtotte(vsrc);
 			tp->tte_data =
 			    TD_V | TD_8K | TD_PA(psrc) | TD_CP | TD_CV | TD_W;
 			tp->tte_vpn = TV_VPN(vsrc, TS_8K);
 			ascopyto((void *)vsrc, ASI_PHYS_USE_EC, pdst,
 			    PAGE_SIZE);
 			tlb_page_demap(kernel_pmap, vsrc);
 			PMAP_UNLOCK(kernel_pmap);
 		}
 	} else {
 		PMAP_STATS_INC(pmap_ncopy_page_oc);
 		PMAP_LOCK(kernel_pmap);
 		vdst = pmap_temp_map_1 + (mdst->md.color * PAGE_SIZE);
 		tp = tsb_kvtotte(vdst);
 		tp->tte_data =
 		    TD_V | TD_8K | TD_PA(pdst) | TD_CP | TD_CV | TD_W;
 		tp->tte_vpn = TV_VPN(vdst, TS_8K);
 		vsrc = pmap_temp_map_2 + (msrc->md.color * PAGE_SIZE);
 		tp = tsb_kvtotte(vsrc);
 		tp->tte_data =
 		    TD_V | TD_8K | TD_PA(psrc) | TD_CP | TD_CV | TD_W;
 		tp->tte_vpn = TV_VPN(vsrc, TS_8K);
 		cpu_block_copy((void *)vsrc, (void *)vdst, PAGE_SIZE);
 		tlb_page_demap(kernel_pmap, vdst);
 		tlb_page_demap(kernel_pmap, vsrc);
 		PMAP_UNLOCK(kernel_pmap);
 	}
 }
 
 int unmapped_buf_allowed;
 
 void
 pmap_copy_pages(vm_page_t ma[], vm_offset_t a_offset, vm_page_t mb[],
     vm_offset_t b_offset, int xfersize)
 {
 
 	panic("pmap_copy_pages: not implemented");
 }
 
 /*
  * Returns true if the pmap's pv is one of the first
  * 16 pvs linked to from this page.  This count may
  * be changed upwards or downwards in the future; it
  * is only necessary that true be returned for a small
  * subset of pmaps for proper page aging.
  */
 boolean_t
 pmap_page_exists_quick(pmap_t pm, vm_page_t m)
 {
 	struct tte *tp;
 	int loops;
 	boolean_t rv;
 
 	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
 	    ("pmap_page_exists_quick: page %p is not managed", m));
 	loops = 0;
 	rv = FALSE;
 	rw_wlock(&tte_list_global_lock);
 	TAILQ_FOREACH(tp, &m->md.tte_list, tte_link) {
 		if ((tp->tte_data & TD_PV) == 0)
 			continue;
 		if (TTE_GET_PMAP(tp) == pm) {
 			rv = TRUE;
 			break;
 		}
 		if (++loops >= 16)
 			break;
 	}
 	rw_wunlock(&tte_list_global_lock);
 	return (rv);
 }
 
 /*
  * Return the number of managed mappings to the given physical page
  * that are wired.
  */
 int
 pmap_page_wired_mappings(vm_page_t m)
 {
 	struct tte *tp;
 	int count;
 
 	count = 0;
 	if ((m->oflags & VPO_UNMANAGED) != 0)
 		return (count);
 	rw_wlock(&tte_list_global_lock);
 	TAILQ_FOREACH(tp, &m->md.tte_list, tte_link)
 		if ((tp->tte_data & (TD_PV | TD_WIRED)) == (TD_PV | TD_WIRED))
 			count++;
 	rw_wunlock(&tte_list_global_lock);
 	return (count);
 }
 
 /*
  * Remove all pages from specified address space, this aids process exit
  * speeds.  This is much faster than pmap_remove in the case of running down
  * an entire address space.  Only works for the current pmap.
  */
 void
 pmap_remove_pages(pmap_t pm)
 {
 
 }
 
 /*
  * Returns TRUE if the given page has a managed mapping.
  */
 boolean_t
 pmap_page_is_mapped(vm_page_t m)
 {
 	struct tte *tp;
 	boolean_t rv;
 
 	rv = FALSE;
 	if ((m->oflags & VPO_UNMANAGED) != 0)
 		return (rv);
 	rw_wlock(&tte_list_global_lock);
 	TAILQ_FOREACH(tp, &m->md.tte_list, tte_link)
 		if ((tp->tte_data & TD_PV) != 0) {
 			rv = TRUE;
 			break;
 		}
 	rw_wunlock(&tte_list_global_lock);
 	return (rv);
 }
 
 /*
  * Return a count of reference bits for a page, clearing those bits.
  * It is not necessary for every reference bit to be cleared, but it
  * is necessary that 0 only be returned when there are truly no
  * reference bits set.
  *
  * XXX: The exact number of bits to check and clear is a matter that
  * should be tested and standardized at some point in the future for
  * optimal aging of shared pages.
  */
 int
 pmap_ts_referenced(vm_page_t m)
 {
 	struct tte *tpf;
 	struct tte *tpn;
 	struct tte *tp;
 	u_long data;
 	int count;
 
 	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
 	    ("pmap_ts_referenced: page %p is not managed", m));
 	count = 0;
 	rw_wlock(&tte_list_global_lock);
 	if ((tp = TAILQ_FIRST(&m->md.tte_list)) != NULL) {
 		tpf = tp;
 		do {
 			tpn = TAILQ_NEXT(tp, tte_link);
 			TAILQ_REMOVE(&m->md.tte_list, tp, tte_link);
 			TAILQ_INSERT_TAIL(&m->md.tte_list, tp, tte_link);
 			if ((tp->tte_data & TD_PV) == 0)
 				continue;
 			data = atomic_clear_long(&tp->tte_data, TD_REF);
 			if ((data & TD_REF) != 0 && ++count > 4)
 				break;
 		} while ((tp = tpn) != NULL && tp != tpf);
 	}
 	rw_wunlock(&tte_list_global_lock);
 	return (count);
 }
 
 boolean_t
 pmap_is_modified(vm_page_t m)
 {
 	struct tte *tp;
 	boolean_t rv;
 
 	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
 	    ("pmap_is_modified: page %p is not managed", m));
 	rv = FALSE;
 
 	/*
 	 * If the page is not exclusive busied, then PGA_WRITEABLE cannot be
 	 * concurrently set while the object is locked.  Thus, if PGA_WRITEABLE
 	 * is clear, no TTEs can have TD_W set.
 	 */
 	VM_OBJECT_ASSERT_WLOCKED(m->object);
 	if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0)
 		return (rv);
 	rw_wlock(&tte_list_global_lock);
 	TAILQ_FOREACH(tp, &m->md.tte_list, tte_link) {
 		if ((tp->tte_data & TD_PV) == 0)
 			continue;
 		if ((tp->tte_data & TD_W) != 0) {
 			rv = TRUE;
 			break;
 		}
 	}
 	rw_wunlock(&tte_list_global_lock);
 	return (rv);
 }
 
 /*
  *	pmap_is_prefaultable:
  *
  *	Return whether or not the specified virtual address is elgible
  *	for prefault.
  */
 boolean_t
 pmap_is_prefaultable(pmap_t pmap, vm_offset_t addr)
 {
 	boolean_t rv;
 
 	PMAP_LOCK(pmap);
 	rv = tsb_tte_lookup(pmap, addr) == NULL;
 	PMAP_UNLOCK(pmap);
 	return (rv);
 }
 
 /*
  * Return whether or not the specified physical page was referenced
  * in any physical maps.
  */
 boolean_t
 pmap_is_referenced(vm_page_t m)
 {
 	struct tte *tp;
 	boolean_t rv;
 
 	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
 	    ("pmap_is_referenced: page %p is not managed", m));
 	rv = FALSE;
 	rw_wlock(&tte_list_global_lock);
 	TAILQ_FOREACH(tp, &m->md.tte_list, tte_link) {
 		if ((tp->tte_data & TD_PV) == 0)
 			continue;
 		if ((tp->tte_data & TD_REF) != 0) {
 			rv = TRUE;
 			break;
 		}
 	}
 	rw_wunlock(&tte_list_global_lock);
 	return (rv);
 }
 
 /*
  * This function is advisory.
  */
 void
 pmap_advise(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, int advice)
 {
 }
 
 void
 pmap_clear_modify(vm_page_t m)
 {
 	struct tte *tp;
 	u_long data;
 
 	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
 	    ("pmap_clear_modify: page %p is not managed", m));
 	VM_OBJECT_ASSERT_WLOCKED(m->object);
 	KASSERT(!vm_page_xbusied(m),
 	    ("pmap_clear_modify: page %p is exclusive busied", m));
 
 	/*
 	 * If the page is not PGA_WRITEABLE, then no TTEs can have TD_W set.
 	 * If the object containing the page is locked and the page is not
 	 * exclusive busied, then PGA_WRITEABLE cannot be concurrently set.
 	 */
 	if ((m->aflags & PGA_WRITEABLE) == 0)
 		return;
 	rw_wlock(&tte_list_global_lock);
 	TAILQ_FOREACH(tp, &m->md.tte_list, tte_link) {
 		if ((tp->tte_data & TD_PV) == 0)
 			continue;
 		data = atomic_clear_long(&tp->tte_data, TD_W);
 		if ((data & TD_W) != 0)
 			tlb_page_demap(TTE_GET_PMAP(tp), TTE_GET_VA(tp));
 	}
 	rw_wunlock(&tte_list_global_lock);
 }
 
 void
 pmap_remove_write(vm_page_t m)
 {
 	struct tte *tp;
 	u_long data;
 
 	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
 	    ("pmap_remove_write: page %p is not managed", m));
 
 	/*
 	 * If the page is not exclusive busied, then PGA_WRITEABLE cannot be
 	 * set by another thread while the object is locked.  Thus,
 	 * if PGA_WRITEABLE is clear, no page table entries need updating.
 	 */
 	VM_OBJECT_ASSERT_WLOCKED(m->object);
 	if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0)
 		return;
 	rw_wlock(&tte_list_global_lock);
 	TAILQ_FOREACH(tp, &m->md.tte_list, tte_link) {
 		if ((tp->tte_data & TD_PV) == 0)
 			continue;
 		data = atomic_clear_long(&tp->tte_data, TD_SW | TD_W);
 		if ((data & TD_W) != 0) {
 			vm_page_dirty(m);
 			tlb_page_demap(TTE_GET_PMAP(tp), TTE_GET_VA(tp));
 		}
 	}
 	vm_page_aflag_clear(m, PGA_WRITEABLE);
 	rw_wunlock(&tte_list_global_lock);
 }
 
 int
 pmap_mincore(pmap_t pm, vm_offset_t addr, vm_paddr_t *locked_pa)
 {
 
 	/* TODO; */
 	return (0);
 }
 
 /*
  * Activate a user pmap.  The pmap must be activated before its address space
  * can be accessed in any way.
  */
 void
 pmap_activate(struct thread *td)
 {
 	struct vmspace *vm;
 	struct pmap *pm;
 	int context;
 
 	critical_enter();
 	vm = td->td_proc->p_vmspace;
 	pm = vmspace_pmap(vm);
 
 	context = PCPU_GET(tlb_ctx);
 	if (context == PCPU_GET(tlb_ctx_max)) {
 		tlb_flush_user();
 		context = PCPU_GET(tlb_ctx_min);
 	}
 	PCPU_SET(tlb_ctx, context + 1);
 
 	pm->pm_context[curcpu] = context;
 #ifdef SMP
 	CPU_SET_ATOMIC(PCPU_GET(cpuid), &pm->pm_active);
 	atomic_store_acq_ptr((uintptr_t *)PCPU_PTR(pmap), (uintptr_t)pm);
 #else
 	CPU_SET(PCPU_GET(cpuid), &pm->pm_active);
 	PCPU_SET(pmap, pm);
 #endif
 
 	stxa(AA_DMMU_TSB, ASI_DMMU, pm->pm_tsb);
 	stxa(AA_IMMU_TSB, ASI_IMMU, pm->pm_tsb);
 	stxa(AA_DMMU_PCXR, ASI_DMMU, (ldxa(AA_DMMU_PCXR, ASI_DMMU) &
 	    TLB_CXR_PGSZ_MASK) | context);
 	flush(KERNBASE);
 	critical_exit();
 }
 
 void
 pmap_sync_icache(pmap_t pm, vm_offset_t va, vm_size_t sz)
 {
 
 }
 
 /*
  * Increase the starting virtual address of the given mapping if a
  * different alignment might result in more superpage mappings.
  */
 void
 pmap_align_superpage(vm_object_t object, vm_ooffset_t offset,
     vm_offset_t *addr, vm_size_t size)
 {
 
 }
Index: stable/10/sys/sparc64/sparc64/swtch.S
===================================================================
--- stable/10/sys/sparc64/sparc64/swtch.S	(revision 293852)
+++ stable/10/sys/sparc64/sparc64/swtch.S	(revision 293853)
@@ -1,346 +1,344 @@
 /*-
  * Copyright (c) 2001 Jake Burkholder.
  * Copyright (c) 2011 Marius Strobl <marius@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <machine/asm.h>
 __FBSDID("$FreeBSD$");
 
 #include <machine/asmacros.h>
 #include <machine/asi.h>
 #include <machine/fsr.h>
 #include <machine/ktr.h>
 #include <machine/pcb.h>
 #include <machine/tstate.h>
 
 #include "assym.s"
 #include "opt_sched.h"
 
 	.register	%g2, #ignore
 	.register	%g3, #ignore
 
 /*
  * void cpu_throw(struct thread *old, struct thread *new)
  */
 ENTRY(cpu_throw)
 	save	%sp, -CCFSZ, %sp
 	flushw
 	ba	%xcc, .Lsw1
 	 mov	%g0, %i2
 END(cpu_throw)
 
 /*
  * void cpu_switch(struct thread *old, struct thread *new, struct mtx *mtx)
  */
 ENTRY(cpu_switch)
 	save	%sp, -CCFSZ, %sp
 
 	/*
 	 * If the current thread was using floating point in the kernel, save
 	 * its context.  The userland floating point context has already been
 	 * saved in that case.
 	 */
 	rd	%fprs, %l2
 	andcc	%l2, FPRS_FEF, %g0
 	bz,a,pt	%xcc, 1f
 	 nop
 	call	savefpctx
 	 add	PCB_REG, PCB_KFP, %o0
 	ba,a,pt	%xcc, 2f
 	 nop
 
 	/*
 	 * If the current thread was using floating point in userland, save
 	 * its context.
 	 */
 1:	sub	PCB_REG, TF_SIZEOF, %l2
 	ldx	[%l2 + TF_FPRS], %l3
 	andcc	%l3, FPRS_FEF, %g0
 	bz,a,pt	%xcc, 2f
 	 nop
 	call	savefpctx
 	 add	PCB_REG, PCB_UFP, %o0
 	andn	%l3, FPRS_FEF, %l3
 	stx	%l3, [%l2 + TF_FPRS]
 
 	ldx	[PCB_REG + PCB_FLAGS], %l3
 	or	%l3, PCB_FEF, %l3
 	stx	%l3, [PCB_REG + PCB_FLAGS]
 
 	/*
 	 * Flush the windows out to the stack and save the current frame
 	 * pointer and program counter.
 	 */
 2:	flushw
 	wrpr	%g0, 0, %cleanwin
 	stx	%fp, [PCB_REG + PCB_SP]
 	stx	%i7, [PCB_REG + PCB_PC]
 
 	/*
 	 * Load the new thread's frame pointer and program counter, and set
 	 * the current thread and pcb.
 	 */
 .Lsw1:
 #if KTR_COMPILE & KTR_PROC
 	CATR(KTR_PROC, "cpu_switch: new td=%p pc=%#lx fp=%#lx"
 	    , %g1, %g2, %g3, 8, 9, 10)
 	stx	%i1, [%g1 + KTR_PARM1]
 	ldx	[%i1 + TD_PCB], %g2
 	ldx	[%g2 + PCB_PC], %g3
 	stx	%g3, [%g1 + KTR_PARM2]
 	ldx	[%g2 + PCB_SP], %g3
 	stx	%g3, [%g1 + KTR_PARM3]
 10:
 #endif
 	ldx	[%i1 + TD_PCB], %l0
 
 	stx	%i1, [PCPU(CURTHREAD)]
 	stx	%l0, [PCPU(CURPCB)]
 
 	wrpr	%g0, PSTATE_NORMAL, %pstate
 	mov	%l0, PCB_REG
 	wrpr	%g0, PSTATE_ALT, %pstate
 	mov	%l0, PCB_REG
 	wrpr	%g0, PSTATE_KERNEL, %pstate
 
 	ldx	[PCB_REG + PCB_SP], %fp
 	ldx	[PCB_REG + PCB_PC], %i7
 	sub	%fp, CCFSZ, %sp
 
 	/*
 	 * Point to the pmaps of the new process, and of the last non-kernel
 	 * process to run.
 	 */
 	ldx	[%i1 + TD_PROC], %l1
 	ldx	[PCPU(PMAP)], %l2
 	ldx	[%l1 + P_VMSPACE], %i5
 	add	%i5, VM_PMAP, %l1
 
 #if KTR_COMPILE & KTR_PROC
 	CATR(KTR_PROC, "cpu_switch: new pmap=%p old pmap=%p"
 	    , %g1, %g2, %g3, 8, 9, 10)
 	stx	%l1, [%g1 + KTR_PARM1]
 	stx	%l2, [%g1 + KTR_PARM2]
 10:
 #endif
 
 	/*
 	 * If they are the same we are done.
 	 */
 	cmp	%l2, %l1
 	be,a,pn	%xcc, 8f
 	 nop
 
 	/*
 	 * If the new process is a kernel thread we can just leave the old
 	 * context active and avoid recycling its context number.
 	 */
 	SET(vmspace0, %i4, %i3)
 	cmp	%i5, %i3
 	be,a,pn	%xcc, 8f
 	 nop
 
 	/*
 	 * If there was no non-kernel pmap, don't try to deactivate it.
 	 */
 	brz,pn	%l2, 3f
 	 lduw	[PCPU(CPUID)], %l3
 
 	/*
 	 * Mark the pmap of the last non-kernel vmspace to run as no longer
 	 * active on this CPU.
 	 */
 	mov	_NCPUBITS, %l5
-	mov	%g0, %y
-	udiv	%l3, %l5, %l6
+	udivx	%l3, %l5, %l6
 	srl	%l6, 0, %l4
 	sllx	%l4, PTR_SHIFT, %l4
 	add	%l4, PM_ACTIVE, %l4
 	smul	%l6, %l5, %l5
 	sub	%l3, %l5, %l5
 	mov	1, %l6
 	sllx	%l6, %l5, %l5
 #ifdef SMP
 	add	%l2, %l4, %l4
 	membar	#LoadStore | #StoreStore
 	ATOMIC_CLEAR_LONG(%l4, %l6, %l7, %l5)
 #else
 	ldx	[%l2 + %l4], %l6
 	andn	%l6, %l5, %l6
 	stx	%l6, [%l2 + %l4]
 #endif
 
 	/*
 	 * Take away its context number.
 	 */
 	sllx	%l3, INT_SHIFT, %l3
 	add	%l2, PM_CONTEXT, %l4
 	mov	-1, %l5
 	stw	%l5, [%l3 + %l4]
 
 3:	cmp	%i2, %g0
 	be,pn	%xcc, 4f
 	 add	%i0, TD_LOCK, %l4
 #if defined(SCHED_ULE) && defined(SMP)
 	membar	#LoadStore | #StoreStore
 	ATOMIC_STORE_LONG(%l4, %l6, %l7, %i2)
 #else
 	stx	%i2, [%l4]
 #endif
 
 	/*
 	 * Find a new TLB context.  If we've run out we have to flush all
 	 * user mappings from the TLB and reset the context numbers.
 	 */
 4:	lduw	[PCPU(TLB_CTX)], %i3
 	lduw	[PCPU(TLB_CTX_MAX)], %i4
 	cmp	%i3, %i4
 	bne,a,pt %xcc, 5f
 	 nop
 	SET(tlb_flush_user, %i5, %i4)
 	ldx	[%i4], %i5
 	call	%i5
 	 lduw	[PCPU(TLB_CTX_MIN)], %i3
 
 	/*
 	 * Advance next free context.
 	 */
 5:	add	%i3, 1, %i4
 	stw	%i4, [PCPU(TLB_CTX)]
 
 	/*
 	 * Set the new context number in the pmap.
 	 */
 	lduw	[PCPU(CPUID)], %l3
 	sllx	%l3, INT_SHIFT, %i4
 	add	%l1, PM_CONTEXT, %i5
 	stw	%i3, [%i4 + %i5]
 
 	/*
 	 * Mark the pmap as active on this CPU.
 	 */
 	mov	_NCPUBITS, %l5
-	mov	%g0, %y
-	udiv	%l3, %l5, %l6
+	udivx	%l3, %l5, %l6
 	srl	%l6, 0, %l4
 	sllx	%l4, PTR_SHIFT, %l4
 	add	%l4, PM_ACTIVE, %l4
 	smul	%l6, %l5, %l5
 	sub	%l3, %l5, %l5
 	mov	1, %l6
 	sllx	%l6, %l5, %l5
 #ifdef SMP
 	add	%l1, %l4, %l4
 	ATOMIC_SET_LONG(%l4, %l6, %l7, %l5)
 #else
 	ldx	[%l1 + %l4], %l6
 	or	%l6, %l5, %l6
 	stx	%l6, [%l1 + %l4]
 #endif
 
 	/*
 	 * Make note of the change in pmap.
 	 */
 #ifdef SMP
 	PCPU_ADDR(PMAP, %l4)
 	ATOMIC_STORE_LONG(%l4, %l5, %l6, %l1)
 #else
 	stx	%l1, [PCPU(PMAP)]
 #endif
 
 	/*
 	 * Fiddle the hardware bits.  Set the TSB registers and install the
 	 * new context number in the CPU.
 	 */
 	ldx	[%l1 + PM_TSB], %i4
 	mov	AA_DMMU_TSB, %i5
 	stxa	%i4, [%i5] ASI_DMMU
 	mov	AA_IMMU_TSB, %i5
 	stxa	%i4, [%i5] ASI_IMMU
 	setx	TLB_CXR_PGSZ_MASK, %i5, %i4
 	mov	AA_DMMU_PCXR, %i5
 	ldxa	[%i5] ASI_DMMU, %l1
 	and	%l1, %i4, %l1
 	or	%i3, %l1, %i3
 	sethi	%hi(KERNBASE), %i4
 	stxa	%i3, [%i5] ASI_DMMU
 	flush	%i4
 
 6:
 #if defined(SCHED_ULE) && defined(SMP)
 	SET(blocked_lock, %l2, %l1)
 	add	%i1, TD_LOCK, %l2
 7:
 	ATOMIC_LOAD_LONG(%l2, %l3)
 	cmp	%l1, %l3
 	be,a,pn	%xcc, 7b
 	 nop
 #endif
 
 	/*
 	 * Done, return and load the new process's window from the stack.
 	 */
 	ret
 	 restore
 
 8:	cmp	%i2, %g0
 	be,pn	%xcc, 6b
 	 add	%i0, TD_LOCK, %l4
 #if defined(SCHED_ULE) && defined(SMP)
 	membar	#LoadStore | #StoreStore
 	ATOMIC_STORE_LONG(%l4, %l6, %l7, %i2)
 	ba,pt	%xcc, 6b
 	 nop
 #else
 	ba,pt	%xcc, 6b
 	 stx	%i2, [%l4]
 #endif
 END(cpu_switch)
 
 ENTRY(savectx)
 	save	%sp, -CCFSZ, %sp
 	flushw
 	call	savefpctx
 	 add	%i0, PCB_UFP, %o0
 	stx	%fp, [%i0 + PCB_SP]
 	stx	%i7, [%i0 + PCB_PC]
 	ret
 	 restore %g0, 0, %o0
 END(savectx)
 
 /*
  * void savefpctx(uint32_t *);
  */
 ENTRY(savefpctx)
 	wr	%g0, FPRS_FEF, %fprs
 	wr	%g0, ASI_BLK_S, %asi
 	stda	%f0, [%o0 + (0 * 64)] %asi
 	stda	%f16, [%o0 + (1 * 64)] %asi
 	stda	%f32, [%o0 + (2 * 64)] %asi
 	stda	%f48, [%o0 + (3 * 64)] %asi
 	membar	#Sync
 	retl
 	 wr	%g0, 0, %fprs
 END(savefpctx)
Index: stable/10
===================================================================
--- stable/10	(revision 293852)
+++ stable/10	(revision 293853)

Property changes on: stable/10
___________________________________________________________________
Modified: svn:mergeinfo
## -0,0 +0,1 ##
   Merged /head:r292943,292960