Index: head/sys/i386/i386/apic_vector.s
===================================================================
--- head/sys/i386/i386/apic_vector.s	(revision 323721)
+++ head/sys/i386/i386/apic_vector.s	(revision 323722)
@@ -1,320 +1,314 @@
 /*-
  * Copyright (c) 1989, 1990 William F. Jolitz.
  * Copyright (c) 1990 The Regents of the University of California.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	from: vector.s, 386BSD 0.1 unknown origin
  * $FreeBSD$
  */
 
 /*
  * Interrupt entry points for external interrupts triggered by I/O APICs
  * as well as IPI handlers.
  */
 
 #include "opt_smp.h"
 
 #include <machine/asmacros.h>
 #include <machine/specialreg.h>
 #include <x86/apicreg.h>
 
 #include "assym.s"
 
 	.text
 	SUPERALIGN_TEXT
 	/* End Of Interrupt to APIC */
 as_lapic_eoi:
 	cmpl	$0,x2apic_mode
 	jne	1f
 	movl	lapic_map,%eax
 	movl	$0,LA_EOI(%eax)
 	ret
 1:
 	movl	$MSR_APIC_EOI,%ecx
 	xorl	%eax,%eax
 	xorl	%edx,%edx
 	wrmsr
 	ret
 
 /*
  * I/O Interrupt Entry Point.  Rather than having one entry point for
  * each interrupt source, we use one entry point for each 32-bit word
  * in the ISR.  The handler determines the highest bit set in the ISR,
  * translates that into a vector, and passes the vector to the
  * lapic_handle_intr() function.
  */
 #define	ISR_VEC(index, vec_name)					\
 	.text ;								\
 	SUPERALIGN_TEXT ;						\
 IDTVEC(vec_name) ;							\
 	PUSH_FRAME ;							\
 	SET_KERNEL_SREGS ;						\
 	cld ;								\
 	FAKE_MCOUNT(TF_EIP(%esp)) ;					\
 	cmpl	$0,x2apic_mode ;					\
 	je	1f ;							\
 	movl	$(MSR_APIC_ISR0 + index),%ecx ;				\
 	rdmsr ;								\
 	jmp	2f ;							\
 1: ;									\
 	movl	lapic_map, %edx ;/* pointer to local APIC */		\
 	movl	LA_ISR + 16 * (index)(%edx), %eax ;	/* load ISR */	\
 2: ;									\
 	bsrl	%eax, %eax ;	/* index of highest set bit in ISR */	\
 	jz	3f ;							\
 	addl	$(32 * index),%eax ;					\
 	pushl	%esp		;                                       \
 	pushl	%eax ;		/* pass the IRQ */			\
 	call	lapic_handle_intr ;					\
 	addl	$8, %esp ;	/* discard parameter */			\
 3: ;									\
 	MEXITCOUNT ;							\
 	jmp	doreti
 
 /*
  * Handle "spurious INTerrupts".
  * Notes:
  *  This is different than the "spurious INTerrupt" generated by an
  *   8259 PIC for missing INTs.  See the APIC documentation for details.
  *  This routine should NOT do an 'EOI' cycle.
  */
 	.text
 	SUPERALIGN_TEXT
 IDTVEC(spuriousint)
 
 	/* No EOI cycle used here */
 
 	iret
 
 	ISR_VEC(1, apic_isr1)
 	ISR_VEC(2, apic_isr2)
 	ISR_VEC(3, apic_isr3)
 	ISR_VEC(4, apic_isr4)
 	ISR_VEC(5, apic_isr5)
 	ISR_VEC(6, apic_isr6)
 	ISR_VEC(7, apic_isr7)
 
 /*
  * Local APIC periodic timer handler.
  */
 	.text
 	SUPERALIGN_TEXT
 IDTVEC(timerint)
 	PUSH_FRAME
 	SET_KERNEL_SREGS
 	cld
 	FAKE_MCOUNT(TF_EIP(%esp))
 	pushl	%esp
 	call	lapic_handle_timer
 	add	$4, %esp
 	MEXITCOUNT
 	jmp	doreti
 
 /*
  * Local APIC CMCI handler.
  */
 	.text
 	SUPERALIGN_TEXT
 IDTVEC(cmcint)
 	PUSH_FRAME
 	SET_KERNEL_SREGS
 	cld
 	FAKE_MCOUNT(TF_EIP(%esp))
 	call	lapic_handle_cmc
 	MEXITCOUNT
 	jmp	doreti
 
 /*
  * Local APIC error interrupt handler.
  */
 	.text
 	SUPERALIGN_TEXT
 IDTVEC(errorint)
 	PUSH_FRAME
 	SET_KERNEL_SREGS
 	cld
 	FAKE_MCOUNT(TF_EIP(%esp))
 	call	lapic_handle_error
 	MEXITCOUNT
 	jmp	doreti
 
 #ifdef XENHVM
 /*
  * Xen event channel upcall interrupt handler.
  * Only used when the hypervisor supports direct vector callbacks.
  */
 	.text
 	SUPERALIGN_TEXT
 IDTVEC(xen_intr_upcall)
 	PUSH_FRAME
 	SET_KERNEL_SREGS
 	cld
 	FAKE_MCOUNT(TF_EIP(%esp))
 	pushl	%esp
 	call	xen_intr_handle_upcall
 	add	$4, %esp
 	MEXITCOUNT
 	jmp	doreti
 #endif
 
 #ifdef SMP
 /*
  * Global address space TLB shootdown.
  */
 	.text
 	SUPERALIGN_TEXT
 invltlb_ret:
 	call	as_lapic_eoi
-	POP_FRAME
-	iret
+	jmp	doreti
 
 	SUPERALIGN_TEXT
 IDTVEC(invltlb)
 	PUSH_FRAME
 	SET_KERNEL_SREGS
 	cld
 
 	call	invltlb_handler
 
 	jmp	invltlb_ret
 
 /*
  * Single page TLB shootdown
  */
 	.text
 	SUPERALIGN_TEXT
 IDTVEC(invlpg)
 	PUSH_FRAME
 	SET_KERNEL_SREGS
 	cld
 
 	call	invlpg_handler
 
 	jmp	invltlb_ret
 
 /*
  * Page range TLB shootdown.
  */
 	.text
 	SUPERALIGN_TEXT
 IDTVEC(invlrng)
 	PUSH_FRAME
 	SET_KERNEL_SREGS
 	cld
 
 	call	invlrng_handler
 
 	jmp	invltlb_ret
 
 /*
  * Invalidate cache.
  */
 	.text
 	SUPERALIGN_TEXT
 IDTVEC(invlcache)
 	PUSH_FRAME
 	SET_KERNEL_SREGS
 	cld
 
 	call	invlcache_handler
 
 	jmp	invltlb_ret
 
 /*
  * Handler for IPIs sent via the per-cpu IPI bitmap.
  */
 	.text
 	SUPERALIGN_TEXT
 IDTVEC(ipi_intr_bitmap_handler)	
 	PUSH_FRAME
 	SET_KERNEL_SREGS
 	cld
 
 	call	as_lapic_eoi
 	
 	FAKE_MCOUNT(TF_EIP(%esp))
 
 	call	ipi_bitmap_handler
 	MEXITCOUNT
 	jmp	doreti
 
 /*
  * Executed by a CPU when it receives an IPI_STOP from another CPU.
  */
 	.text
 	SUPERALIGN_TEXT
 IDTVEC(cpustop)
 	PUSH_FRAME
 	SET_KERNEL_SREGS
 	cld
 
 	call	as_lapic_eoi
 	call	cpustop_handler
+	jmp	doreti
 
-	POP_FRAME
-	iret
-
 /*
  * Executed by a CPU when it receives an IPI_SUSPEND from another CPU.
  */
 	.text
 	SUPERALIGN_TEXT
 IDTVEC(cpususpend)
 	PUSH_FRAME
 	SET_KERNEL_SREGS
 	cld
 
 	call	as_lapic_eoi
 	call	cpususpend_handler
+	jmp	doreti
 
-	POP_FRAME
-	jmp	doreti_iret
-
 /*
  * Executed by a CPU when it receives a RENDEZVOUS IPI from another CPU.
  *
  * - Calls the generic rendezvous action function.
  */
 	.text
 	SUPERALIGN_TEXT
 IDTVEC(rendezvous)
 	PUSH_FRAME
 	SET_KERNEL_SREGS
 	cld
 
 #ifdef COUNT_IPIS
 	movl	PCPU(CPUID), %eax
 	movl	ipi_rendezvous_counts(,%eax,4), %eax
 	incl	(%eax)
 #endif
 	call	smp_rendezvous_action
 
 	call	as_lapic_eoi
-	POP_FRAME
-	iret
+	jmp	doreti
 	
 #endif /* SMP */
Index: head/sys/i386/i386/db_trace.c
===================================================================
--- head/sys/i386/i386/db_trace.c	(revision 323721)
+++ head/sys/i386/i386/db_trace.c	(revision 323722)
@@ -1,766 +1,765 @@
 /*-
  * Mach Operating System
  * Copyright (c) 1991,1990 Carnegie Mellon University
  * All Rights Reserved.
  *
  * Permission to use, copy, modify and distribute this software and its
  * documentation is hereby granted, provided that both the copyright
  * notice and this permission notice appear in all copies of the
  * software, derivative works or modified versions, and any portions
  * thereof, and that both notices appear in supporting documentation.
  *
  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS
  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
  * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
  *
  * Carnegie Mellon requests users of this software to return to
  *
  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
  *  School of Computer Science
  *  Carnegie Mellon University
  *  Pittsburgh PA 15213-3890
  *
  * any improvements or extensions that they make and grant Carnegie the
  * rights to redistribute these changes.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kdb.h>
 #include <sys/proc.h>
 #include <sys/sysent.h>
 
 #include <machine/cpu.h>
 #include <machine/frame.h>
 #include <machine/md_var.h>
 #include <machine/pcb.h>
 #include <machine/reg.h>
 #include <machine/stack.h>
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/pmap.h>
 
 #include <ddb/ddb.h>
 #include <ddb/db_access.h>
 #include <ddb/db_sym.h>
 #include <ddb/db_variables.h>
 
 static db_varfcn_t db_esp;
 static db_varfcn_t db_frame;
 static db_varfcn_t db_frame_seg;
 static db_varfcn_t db_gs;
 static db_varfcn_t db_ss;
 
 /*
  * Machine register set.
  */
 #define	DB_OFFSET(x)	(db_expr_t *)offsetof(struct trapframe, x)
 struct db_variable db_regs[] = {
 	{ "cs",		DB_OFFSET(tf_cs),	db_frame_seg },
 	{ "ds",		DB_OFFSET(tf_ds),	db_frame_seg },
 	{ "es",		DB_OFFSET(tf_es),	db_frame_seg },
 	{ "fs",		DB_OFFSET(tf_fs),	db_frame_seg },
 	{ "gs",		NULL,			db_gs },
 	{ "ss",		NULL,			db_ss },
 	{ "eax",	DB_OFFSET(tf_eax),	db_frame },
 	{ "ecx",	DB_OFFSET(tf_ecx),	db_frame },
 	{ "edx",	DB_OFFSET(tf_edx),	db_frame },
 	{ "ebx",	DB_OFFSET(tf_ebx),	db_frame },
 	{ "esp",	NULL,			db_esp },
 	{ "ebp",	DB_OFFSET(tf_ebp),	db_frame },
 	{ "esi",	DB_OFFSET(tf_esi),	db_frame },
 	{ "edi",	DB_OFFSET(tf_edi),	db_frame },
 	{ "eip",	DB_OFFSET(tf_eip),	db_frame },
 	{ "efl",	DB_OFFSET(tf_eflags),	db_frame },
 };
 struct db_variable *db_eregs = db_regs + nitems(db_regs);
 
 static __inline int
 get_esp(struct trapframe *tf)
 {
 	return (TF_HAS_STACKREGS(tf) ? tf->tf_esp : (intptr_t)&tf->tf_esp);
 }
 
 static int
 db_frame(struct db_variable *vp, db_expr_t *valuep, int op)
 {
 	int *reg;
 
 	if (kdb_frame == NULL)
 		return (0);
 
 	reg = (int *)((uintptr_t)kdb_frame + (db_expr_t)vp->valuep);
 	if (op == DB_VAR_GET)
 		*valuep = *reg;
 	else
 		*reg = *valuep;
 	return (1);
 }
 
 static int
 db_frame_seg(struct db_variable *vp, db_expr_t *valuep, int op)
 {
 	struct trapframe_vm86 *tfp;
 	int off;
 	uint16_t *reg;
 
 	if (kdb_frame == NULL)
 		return (0);
 
 	off = (intptr_t)vp->valuep;
 	if (kdb_frame->tf_eflags & PSL_VM) {
 		tfp = (void *)kdb_frame;
 		switch ((intptr_t)vp->valuep) {
 		case (intptr_t)DB_OFFSET(tf_cs):
 			reg = (uint16_t *)&tfp->tf_cs;
 			break;
 		case (intptr_t)DB_OFFSET(tf_ds):
 			reg = (uint16_t *)&tfp->tf_vm86_ds;
 			break;
 		case (intptr_t)DB_OFFSET(tf_es):
 			reg = (uint16_t *)&tfp->tf_vm86_es;
 			break;
 		case (intptr_t)DB_OFFSET(tf_fs):
 			reg = (uint16_t *)&tfp->tf_vm86_fs;
 			break;
 		}
 	} else
 		reg = (uint16_t *)((uintptr_t)kdb_frame + off);
 	if (op == DB_VAR_GET)
 		*valuep = *reg;
 	else
 		*reg = *valuep;
 	return (1);
 }
 
 static int
 db_esp(struct db_variable *vp, db_expr_t *valuep, int op)
 {
 
 	if (kdb_frame == NULL)
 		return (0);
 
 	if (op == DB_VAR_GET)
 		*valuep = get_esp(kdb_frame);
 	else if (TF_HAS_STACKREGS(kdb_frame))
 		kdb_frame->tf_esp = *valuep;
 	return (1);
 }
 
 static int
 db_gs(struct db_variable *vp, db_expr_t *valuep, int op)
 {
 	struct trapframe_vm86 *tfp;
 
 	if (kdb_frame != NULL && kdb_frame->tf_eflags & PSL_VM) {
 		tfp = (void *)kdb_frame;
 		if (op == DB_VAR_GET)
 			*valuep = tfp->tf_vm86_gs;
 		else
 			tfp->tf_vm86_gs = *valuep;
 		return (1);
 	}
 	if (op == DB_VAR_GET)
 		*valuep = rgs();
 	else
 		load_gs(*valuep);
 	return (1);
 }
 
 static int
 db_ss(struct db_variable *vp, db_expr_t *valuep, int op)
 {
 
 	if (kdb_frame == NULL)
 		return (0);
 
 	if (op == DB_VAR_GET)
 		*valuep = TF_HAS_STACKREGS(kdb_frame) ? kdb_frame->tf_ss :
 		    rss();
 	else if (TF_HAS_STACKREGS(kdb_frame))
 		kdb_frame->tf_ss = *valuep;
 	return (1);
 }
 
 #define NORMAL		0
 #define	TRAP		1
 #define	INTERRUPT	2
 #define	SYSCALL		3
 #define	DOUBLE_FAULT	4
 #define	TRAP_INTERRUPT	5
 #define	TRAP_TIMERINT	6
 
 static void db_nextframe(struct i386_frame **, db_addr_t *, struct thread *);
 static int db_numargs(struct i386_frame *);
 static void db_print_stack_entry(const char *, int, char **, int *, db_addr_t,
     void *);
 static void decode_syscall(int, struct thread *);
 
 static const char * watchtype_str(int type);
 int  i386_set_watch(int watchnum, unsigned int watchaddr, int size, int access,
 		    struct dbreg *d);
 int  i386_clr_watch(int watchnum, struct dbreg *d);
 
 /*
  * Figure out how many arguments were passed into the frame at "fp".
  */
 static int
 db_numargs(fp)
 	struct i386_frame *fp;
 {
 	char   *argp;
 	int	inst;
 	int	args;
 
 	argp = (char *)db_get_value((int)&fp->f_retaddr, 4, FALSE);
 	/*
 	 * XXX etext is wrong for LKMs.  We should attempt to interpret
 	 * the instruction at the return address in all cases.  This
 	 * may require better fault handling.
 	 */
 	if (argp < btext || argp >= etext) {
 		args = -1;
 	} else {
 retry:
 		inst = db_get_value((int)argp, 4, FALSE);
 		if ((inst & 0xff) == 0x59)	/* popl %ecx */
 			args = 1;
 		else if ((inst & 0xffff) == 0xc483)	/* addl $Ibs, %esp */
 			args = ((inst >> 16) & 0xff) / 4;
 		else if ((inst & 0xf8ff) == 0xc089) {	/* movl %eax, %Reg */
 			argp += 2;
 			goto retry;
 		} else
 			args = -1;
 	}
 	return (args);
 }
 
 static void
 db_print_stack_entry(name, narg, argnp, argp, callpc, frame)
 	const char *name;
 	int narg;
 	char **argnp;
 	int *argp;
 	db_addr_t callpc;
 	void *frame;
 {
 	int n = narg >= 0 ? narg : 5;
 
 	db_printf("%s(", name);
 	while (n) {
 		if (argnp)
 			db_printf("%s=", *argnp++);
 		db_printf("%r", db_get_value((int)argp, 4, FALSE));
 		argp++;
 		if (--n != 0)
 			db_printf(",");
 	}
 	if (narg < 0)
 		db_printf(",...");
 	db_printf(") at ");
 	db_printsym(callpc, DB_STGY_PROC);
 	if (frame != NULL)
 		db_printf("/frame 0x%r", (register_t)frame);
 	db_printf("\n");
 }
 
 static void
 decode_syscall(int number, struct thread *td)
 {
 	struct proc *p;
 	c_db_sym_t sym;
 	db_expr_t diff;
 	sy_call_t *f;
 	const char *symname;
 
 	db_printf(" (%d", number);
 	p = (td != NULL) ? td->td_proc : NULL;
 	if (p != NULL && 0 <= number && number < p->p_sysent->sv_size) {
 		f = p->p_sysent->sv_table[number].sy_call;
 		sym = db_search_symbol((db_addr_t)f, DB_STGY_ANY, &diff);
 		if (sym != DB_SYM_NULL && diff == 0) {
 			db_symbol_values(sym, &symname, NULL);
 			db_printf(", %s, %s", p->p_sysent->sv_name, symname);
 		}
 	}
 	db_printf(")");
 }
 
 /*
  * Figure out the next frame up in the call stack.
  */
 static void
 db_nextframe(struct i386_frame **fp, db_addr_t *ip, struct thread *td)
 {
 	struct trapframe *tf;
 	int frame_type;
 	int eip, esp, ebp;
 	db_expr_t offset;
 	c_db_sym_t sym;
 	const char *name;
 
 	eip = db_get_value((int) &(*fp)->f_retaddr, 4, FALSE);
 	ebp = db_get_value((int) &(*fp)->f_frame, 4, FALSE);
 
 	/*
 	 * Figure out frame type.  We look at the address just before
 	 * the saved instruction pointer as the saved EIP is after the
 	 * call function, and if the function being called is marked as
 	 * dead (such as panic() at the end of dblfault_handler()), then
 	 * the instruction at the saved EIP will be part of a different
 	 * function (syscall() in this example) rather than the one that
 	 * actually made the call.
 	 */
 	frame_type = NORMAL;
 	sym = db_search_symbol(eip - 1, DB_STGY_ANY, &offset);
 	db_symbol_values(sym, &name, NULL);
 	if (name != NULL) {
 		if (strcmp(name, "calltrap") == 0 ||
 		    strcmp(name, "fork_trampoline") == 0)
 			frame_type = TRAP;
 		else if (strncmp(name, "Xatpic_intr", 11) == 0 ||
 		    strncmp(name, "Xapic_isr", 9) == 0)
 			frame_type = INTERRUPT;
-		else if (strcmp(name, "Xlcall_syscall") == 0 ||
-		    strcmp(name, "Xint0x80_syscall") == 0)
+		else if (strcmp(name, "Xint0x80_syscall") == 0)
 			frame_type = SYSCALL;
 		else if (strcmp(name, "dblfault_handler") == 0)
 			frame_type = DOUBLE_FAULT;
 		/* XXX: These are interrupts with trap frames. */
 		else if (strcmp(name, "Xtimerint") == 0)
 			frame_type = TRAP_TIMERINT;
 		else if (strcmp(name, "Xcpustop") == 0 ||
 		    strcmp(name, "Xrendezvous") == 0 ||
 		    strcmp(name, "Xipi_intr_bitmap_handler") == 0)
 			frame_type = TRAP_INTERRUPT;
 	}
 
 	/*
 	 * Normal frames need no special processing.
 	 */
 	if (frame_type == NORMAL) {
 		*ip = (db_addr_t) eip;
 		*fp = (struct i386_frame *) ebp;
 		return;
 	}
 
 	db_print_stack_entry(name, 0, 0, 0, eip, &(*fp)->f_frame);
 
 	/*
 	 * For a double fault, we have to snag the values from the
 	 * previous TSS since a double fault uses a task gate to
 	 * switch to a known good state.
 	 */
 	if (frame_type == DOUBLE_FAULT) {
 		esp = PCPU_GET(common_tss.tss_esp);
 		eip = PCPU_GET(common_tss.tss_eip);
 		ebp = PCPU_GET(common_tss.tss_ebp);
 		db_printf(
 		    "--- trap 0x17, eip = %#r, esp = %#r, ebp = %#r ---\n",
 		    eip, esp, ebp);
 		*ip = (db_addr_t) eip;
 		*fp = (struct i386_frame *) ebp;
 		return;
 	}
 
 	/*
 	 * Point to base of trapframe which is just above the
 	 * current frame.
 	 */
 	if (frame_type == INTERRUPT)
 		tf = (struct trapframe *)((int)*fp + 16);
 	else if (frame_type == TRAP_INTERRUPT)
 		tf = (struct trapframe *)((int)*fp + 8);
 	else
 		tf = (struct trapframe *)((int)*fp + 12);
 
 	if (INKERNEL((int) tf)) {
 		esp = get_esp(tf);
 		eip = tf->tf_eip;
 		ebp = tf->tf_ebp;
 		switch (frame_type) {
 		case TRAP:
 			db_printf("--- trap %#r", tf->tf_trapno);
 			break;
 		case SYSCALL:
 			db_printf("--- syscall");
 			decode_syscall(tf->tf_eax, td);
 			break;
 		case TRAP_TIMERINT:
 		case TRAP_INTERRUPT:
 		case INTERRUPT:
 			db_printf("--- interrupt");
 			break;
 		default:
 			panic("The moon has moved again.");
 		}
 		db_printf(", eip = %#r, esp = %#r, ebp = %#r ---\n", eip,
 		    esp, ebp);
 	}
 
 	*ip = (db_addr_t) eip;
 	*fp = (struct i386_frame *) ebp;
 }
 
 static int
 db_backtrace(struct thread *td, struct trapframe *tf, struct i386_frame *frame,
     db_addr_t pc, register_t sp, int count)
 {
 	struct i386_frame *actframe;
 #define MAXNARG	16
 	char *argnames[MAXNARG], **argnp = NULL;
 	const char *name;
 	int *argp;
 	db_expr_t offset;
 	c_db_sym_t sym;
 	int instr, narg;
 	boolean_t first;
 
 	if (db_segsize(tf) == 16) {
 		db_printf(
 "--- 16-bit%s, cs:eip = %#x:%#x, ss:esp = %#x:%#x, ebp = %#x, tf = %p ---\n",
 		    (tf->tf_eflags & PSL_VM) ? " (vm86)" : "",
 		    tf->tf_cs, tf->tf_eip,
 		    TF_HAS_STACKREGS(tf) ? tf->tf_ss : rss(),
 		    TF_HAS_STACKREGS(tf) ? tf->tf_esp : (intptr_t)&tf->tf_esp,
 		    tf->tf_ebp, tf);
 		return (0);
 	}
 
 	/*
 	 * If an indirect call via an invalid pointer caused a trap,
 	 * %pc contains the invalid address while the return address
 	 * of the unlucky caller has been saved by CPU on the stack
 	 * just before the trap frame.  In this case, try to recover
 	 * the caller's address so that the first frame is assigned
 	 * to the right spot in the right function, for that is where
 	 * the failure actually happened.
 	 *
 	 * This trick depends on the fault address stashed in tf_err
 	 * by trap_fatal() before entering KDB.
 	 */
 	if (kdb_frame && pc == kdb_frame->tf_err) {
 		/*
 		 * Find where the trap frame actually ends.
 		 * It won't contain tf_esp or tf_ss unless crossing rings.
 		 */
 		if (TF_HAS_STACKREGS(kdb_frame))
 			instr = (int)(kdb_frame + 1);
 		else
 			instr = (int)&kdb_frame->tf_esp;
 		pc = db_get_value(instr, 4, FALSE);
 	}
 
 	if (count == -1)
 		count = 1024;
 
 	first = TRUE;
 	while (count-- && !db_pager_quit) {
 		sym = db_search_symbol(pc, DB_STGY_ANY, &offset);
 		db_symbol_values(sym, &name, NULL);
 
 		/*
 		 * Attempt to determine a (possibly fake) frame that gives
 		 * the caller's pc.  It may differ from `frame' if the
 		 * current function never sets up a standard frame or hasn't
 		 * set one up yet or has just discarded one.  The last two
 		 * cases can be guessed fairly reliably for code generated
 		 * by gcc.  The first case is too much trouble to handle in
 		 * general because the amount of junk on the stack depends
 		 * on the pc (the special handling of "calltrap", etc. in
 		 * db_nextframe() works because the `next' pc is special).
 		 */
 		actframe = frame;
 		if (first) {
 			first = FALSE;
 			if (sym == C_DB_SYM_NULL && sp != 0) {
 				/*
 				 * If a symbol couldn't be found, we've probably
 				 * jumped to a bogus location, so try and use
 				 * the return address to find our caller.
 				 */
 				db_print_stack_entry(name, 0, 0, 0, pc,
 				    NULL);
 				pc = db_get_value(sp, 4, FALSE);
 				if (db_search_symbol(pc, DB_STGY_PROC,
 				    &offset) == C_DB_SYM_NULL)
 					break;
 				continue;
 			} else if (tf != NULL) {
 				instr = db_get_value(pc, 4, FALSE);
 				if ((instr & 0xffffff) == 0x00e58955) {
 					/* pushl %ebp; movl %esp, %ebp */
 					actframe = (void *)(get_esp(tf) - 4);
 				} else if ((instr & 0xffff) == 0x0000e589) {
 					/* movl %esp, %ebp */
 					actframe = (void *)get_esp(tf);
 					if (tf->tf_ebp == 0) {
 						/* Fake frame better. */
 						frame = actframe;
 					}
 				} else if ((instr & 0xff) == 0x000000c3) {
 					/* ret */
 					actframe = (void *)(get_esp(tf) - 4);
 				} else if (offset == 0) {
 					/* Probably an assembler symbol. */
 					actframe = (void *)(get_esp(tf) - 4);
 				}
 			} else if (strcmp(name, "fork_trampoline") == 0) {
 				/*
 				 * Don't try to walk back on a stack for a
 				 * process that hasn't actually been run yet.
 				 */
 				db_print_stack_entry(name, 0, 0, 0, pc,
 				    actframe);
 				break;
 			}
 		}
 
 		argp = &actframe->f_arg0;
 		narg = MAXNARG;
 		if (sym != NULL && db_sym_numargs(sym, &narg, argnames)) {
 			argnp = argnames;
 		} else {
 			narg = db_numargs(frame);
 		}
 
 		db_print_stack_entry(name, narg, argnp, argp, pc, actframe);
 
 		if (actframe != frame) {
 			/* `frame' belongs to caller. */
 			pc = (db_addr_t)
 			    db_get_value((int)&actframe->f_retaddr, 4, FALSE);
 			continue;
 		}
 
 		db_nextframe(&frame, &pc, td);
 
 		if (INKERNEL((int)pc) && !INKERNEL((int) frame)) {
 			sym = db_search_symbol(pc, DB_STGY_ANY, &offset);
 			db_symbol_values(sym, &name, NULL);
 			db_print_stack_entry(name, 0, 0, 0, pc, frame);
 			break;
 		}
 		if (!INKERNEL((int) frame)) {
 			break;
 		}
 	}
 
 	return (0);
 }
 
 void
 db_trace_self(void)
 {
 	struct i386_frame *frame;
 	db_addr_t callpc;
 	register_t ebp;
 
 	__asm __volatile("movl %%ebp,%0" : "=r" (ebp));
 	frame = (struct i386_frame *)ebp;
 	callpc = (db_addr_t)db_get_value((int)&frame->f_retaddr, 4, FALSE);
 	frame = frame->f_frame;
 	db_backtrace(curthread, NULL, frame, callpc, 0, -1);
 }
 
 int
 db_trace_thread(struct thread *thr, int count)
 {
 	struct pcb *ctx;
 	struct trapframe *tf;
 
 	ctx = kdb_thr_ctx(thr);
 	tf = thr == kdb_thread ? kdb_frame : NULL;
 	return (db_backtrace(thr, tf, (struct i386_frame *)ctx->pcb_ebp,
 	    ctx->pcb_eip, ctx->pcb_esp, count));
 }
 
 int
 i386_set_watch(watchnum, watchaddr, size, access, d)
 	int watchnum;
 	unsigned int watchaddr;
 	int size;
 	int access;
 	struct dbreg *d;
 {
 	int i, len;
 
 	if (watchnum == -1) {
 		for (i = 0; i < 4; i++)
 			if (!DBREG_DR7_ENABLED(d->dr[7], i))
 				break;
 		if (i < 4)
 			watchnum = i;
 		else
 			return (-1);
 	}
 
 	switch (access) {
 	case DBREG_DR7_EXEC:
 		size = 1; /* size must be 1 for an execution breakpoint */
 		/* fall through */
 	case DBREG_DR7_WRONLY:
 	case DBREG_DR7_RDWR:
 		break;
 	default:
 		return (-1);
 	}
 
 	/*
 	 * we can watch a 1, 2, or 4 byte sized location
 	 */
 	switch (size) {
 	case 1:
 		len = DBREG_DR7_LEN_1;
 		break;
 	case 2:
 		len = DBREG_DR7_LEN_2;
 		break;
 	case 4:
 		len = DBREG_DR7_LEN_4;
 		break;
 	default:
 		return (-1);
 	}
 
 	/* clear the bits we are about to affect */
 	d->dr[7] &= ~DBREG_DR7_MASK(watchnum);
 
 	/* set drN register to the address, N=watchnum */
 	DBREG_DRX(d, watchnum) = watchaddr;
 
 	/* enable the watchpoint */
 	d->dr[7] |= DBREG_DR7_SET(watchnum, len, access,
 	    DBREG_DR7_GLOBAL_ENABLE);
 
 	return (watchnum);
 }
 
 
 int
 i386_clr_watch(watchnum, d)
 	int watchnum;
 	struct dbreg *d;
 {
 
 	if (watchnum < 0 || watchnum >= 4)
 		return (-1);
 
 	d->dr[7] &= ~DBREG_DR7_MASK(watchnum);
 	DBREG_DRX(d, watchnum) = 0;
 
 	return (0);
 }
 
 
 int
 db_md_set_watchpoint(addr, size)
 	db_expr_t addr;
 	db_expr_t size;
 {
 	struct dbreg d;
 	int avail, i, wsize;
 
 	fill_dbregs(NULL, &d);
 
 	avail = 0;
 	for(i = 0; i < 4; i++) {
 		if (!DBREG_DR7_ENABLED(d.dr[7], i))
 			avail++;
 	}
 
 	if (avail * 4 < size)
 		return (-1);
 
 	for (i = 0; i < 4 && (size > 0); i++) {
 		if (!DBREG_DR7_ENABLED(d.dr[7], i)) {
 			if (size > 2)
 				wsize = 4;
 			else
 				wsize = size;
 			i386_set_watch(i, addr, wsize,
 				       DBREG_DR7_WRONLY, &d);
 			addr += wsize;
 			size -= wsize;
 		}
 	}
 
 	set_dbregs(NULL, &d);
 
 	return(0);
 }
 
 
 int
 db_md_clr_watchpoint(addr, size)
 	db_expr_t addr;
 	db_expr_t size;
 {
 	struct dbreg d;
 	int i;
 
 	fill_dbregs(NULL, &d);
 
 	for(i = 0; i < 4; i++) {
 		if (DBREG_DR7_ENABLED(d.dr[7], i)) {
 			if ((DBREG_DRX((&d), i) >= addr) &&
 			    (DBREG_DRX((&d), i) < addr+size))
 				i386_clr_watch(i, &d);
 
 		}
 	}
 
 	set_dbregs(NULL, &d);
 
 	return(0);
 }
 
 
 static const char *
 watchtype_str(type)
 	int type;
 {
 	switch (type) {
 		case DBREG_DR7_EXEC   : return "execute";    break;
 		case DBREG_DR7_RDWR   : return "read/write"; break;
 		case DBREG_DR7_WRONLY : return "write";	     break;
 		default		      : return "invalid";    break;
 	}
 }
 
 
 void
 db_md_list_watchpoints(void)
 {
 	struct dbreg d;
 	int i, len, type;
 
 	fill_dbregs(NULL, &d);
 
 	db_printf("\nhardware watchpoints:\n");
 	db_printf("  watch    status        type  len     address\n");
 	db_printf("  -----  --------  ----------  ---  ----------\n");
 	for (i = 0; i < 4; i++) {
 		if (DBREG_DR7_ENABLED(d.dr[7], i)) {
 			type = DBREG_DR7_ACCESS(d.dr[7], i);
 			len = DBREG_DR7_LEN(d.dr[7], i);
 			db_printf("  %-5d  %-8s  %10s  %3d  ",
 			    i, "enabled", watchtype_str(type), len + 1);
 			db_printsym((db_addr_t)DBREG_DRX(&d, i), DB_STGY_ANY);
 			db_printf("\n");
 		} else {
 			db_printf("  %-5d  disabled\n", i);
 		}
 	}
 
 	db_printf("\ndebug register values:\n");
 	for (i = 0; i < 8; i++)
 		if (i != 4 && i != 5)
 			db_printf("  dr%d 0x%08x\n", i, DBREG_DRX(&d, i));
 	db_printf("\n");
 }
Index: head/sys/i386/i386/exception.s
===================================================================
--- head/sys/i386/i386/exception.s	(revision 323721)
+++ head/sys/i386/i386/exception.s	(revision 323722)
@@ -1,489 +1,479 @@
 /*-
  * Copyright (c) 1989, 1990 William F. Jolitz.
  * Copyright (c) 1990 The Regents of the University of California.
  * Copyright (c) 2007 The FreeBSD Foundation
  * All rights reserved.
  *
  * Portions of this software were developed by A. Joseph Koshy under
  * sponsorship from the FreeBSD Foundation and Google, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #include "opt_apic.h"
 #include "opt_atpic.h"
 #include "opt_hwpmc_hooks.h"
 
 #include <machine/asmacros.h>
 #include <machine/psl.h>
 #include <machine/trap.h>
 
 #include "assym.s"
 
 #define	SEL_RPL_MASK	0x0003
 #define	GSEL_KPL	0x0020	/* GSEL(GCODE_SEL, SEL_KPL) */
 
 #ifdef KDTRACE_HOOKS
 	.bss
 	.globl	dtrace_invop_jump_addr
 	.align	4
 	.type	dtrace_invop_jump_addr, @object
 	.size	dtrace_invop_jump_addr, 4
 dtrace_invop_jump_addr:
 	.zero	4
 	.globl	dtrace_invop_calltrap_addr
 	.align	4
 	.type	dtrace_invop_calltrap_addr, @object
 	.size	dtrace_invop_calltrap_addr, 4
 dtrace_invop_calltrap_addr:
 	.zero	8
 #endif
 	.text
 #ifdef HWPMC_HOOKS
 	ENTRY(start_exceptions)
 #endif
 /*****************************************************************************/
 /* Trap handling                                                             */
 /*****************************************************************************/
 /*
  * Trap and fault vector routines.
  *
  * Most traps are 'trap gates', SDT_SYS386TGT.  A trap gate pushes state on
  * the stack that mostly looks like an interrupt, but does not disable
  * interrupts.  A few of the traps we are use are interrupt gates,
  * SDT_SYS386IGT, which are nearly the same thing except interrupts are
  * disabled on entry.
  *
  * The cpu will push a certain amount of state onto the kernel stack for
  * the current process.  The amount of state depends on the type of trap
  * and whether the trap crossed rings or not.  See i386/include/frame.h.
  * At the very least the current EFLAGS (status register, which includes
  * the interrupt disable state prior to the trap), the code segment register,
  * and the return instruction pointer are pushed by the cpu.  The cpu
  * will also push an 'error' code for certain traps.  We push a dummy
  * error code for those traps where the cpu doesn't in order to maintain
  * a consistent frame.  We also push a contrived 'trap number'.
  *
  * The cpu does not push the general registers, we must do that, and we
  * must restore them prior to calling 'iret'.  The cpu adjusts the %cs and
  * %ss segment registers, but does not mess with %ds, %es, or %fs.  Thus we
  * must load them with appropriate values for supervisor mode operation.
  */
 
 MCOUNT_LABEL(user)
 MCOUNT_LABEL(btrap)
 
 #define	TRAP(a)		pushl $(a) ; jmp alltraps
+#define	TRAP_NOEN(a)	pushl $(a) ; jmp alltraps_noen
 
 IDTVEC(div)
 	pushl $0; TRAP(T_DIVIDE)
 IDTVEC(dbg)
-	pushl $0; TRAP(T_TRCTRAP)
+	pushl $0; TRAP_NOEN(T_TRCTRAP)
 IDTVEC(nmi)
-	pushl $0; TRAP(T_NMI)
+	pushl $0; TRAP_NOEN(T_NMI)
 IDTVEC(bpt)
-	pushl $0; TRAP(T_BPTFLT)
+	pushl $0; TRAP_NOEN(T_BPTFLT)
 IDTVEC(dtrace_ret)
 	pushl $0; TRAP(T_DTRACE_RET)
 IDTVEC(ofl)
 	pushl $0; TRAP(T_OFLOW)
 IDTVEC(bnd)
 	pushl $0; TRAP(T_BOUND)
 #ifndef KDTRACE_HOOKS
 IDTVEC(ill)
 	pushl $0; TRAP(T_PRIVINFLT)
 #endif
 IDTVEC(dna)
 	pushl $0; TRAP(T_DNA)
 IDTVEC(fpusegm)
 	pushl $0; TRAP(T_FPOPFLT)
 IDTVEC(tss)
 	TRAP(T_TSSFLT)
 IDTVEC(missing)
 	TRAP(T_SEGNPFLT)
 IDTVEC(stk)
 	TRAP(T_STKFLT)
 IDTVEC(prot)
 	TRAP(T_PROTFLT)
 IDTVEC(page)
-	TRAP(T_PAGEFLT)
+	TRAP_NOEN(T_PAGEFLT)
 IDTVEC(mchk)
 	pushl $0; TRAP(T_MCHK)
 IDTVEC(rsvd)
 	pushl $0; TRAP(T_RESERVED)
 IDTVEC(fpu)
 	pushl $0; TRAP(T_ARITHTRAP)
 IDTVEC(align)
 	TRAP(T_ALIGNFLT)
 IDTVEC(xmm)
 	pushl $0; TRAP(T_XMMFLT)
 
+	SUPERALIGN_TEXT
+	.globl	alltraps_noen
+alltraps_noen:
+	pushal
+	pushl	$0
+	movw	%ds,(%esp)
+	pushl	$0
+	movw	%es,(%esp)
+	pushl	$0
+	movw	%fs,(%esp)
+	SET_KERNEL_SREGS
+	cld
+	FAKE_MCOUNT(TF_EIP(%esp))
+	jmp	calltrap
+	
 	/*
 	 * All traps except ones for syscalls jump to alltraps.  If
 	 * interrupts were enabled when the trap occurred, then interrupts
 	 * are enabled now if the trap was through a trap gate, else
 	 * disabled if the trap was through an interrupt gate.  Note that
 	 * int0x80_syscall is a trap gate.   Interrupt gates are used by
 	 * page faults, non-maskable interrupts, debug and breakpoint
 	 * exceptions.
 	 */
 	SUPERALIGN_TEXT
 	.globl	alltraps
 	.type	alltraps,@function
 alltraps:
 	pushal
 	pushl	$0
 	movw	%ds,(%esp)
 	pushl	$0
 	movw	%es,(%esp)
 	pushl	$0
 	movw	%fs,(%esp)
 alltraps_with_regs_pushed:
 	SET_KERNEL_SREGS
+	sti
 	cld
 	FAKE_MCOUNT(TF_EIP(%esp))
 calltrap:
 	pushl	%esp
 	call	trap
 	add	$4, %esp
 
 	/*
 	 * Return via doreti to handle ASTs.
 	 */
 	MEXITCOUNT
 	jmp	doreti
 
 /*
  * Privileged instruction fault.
  */
 #ifdef KDTRACE_HOOKS
 	SUPERALIGN_TEXT
 IDTVEC(ill)
 	/*
 	 * Check if a DTrace hook is registered.  The default (data) segment
 	 * cannot be used for this since %ds is not known good until we
 	 * verify that the entry was from kernel mode.
 	 */
 	cmpl	$0,%ss:dtrace_invop_jump_addr
 	je	norm_ill
 
 	/*
 	 * Check if this is a user fault.  If so, just handle it as a normal
 	 * trap.
 	 */
 	cmpl	$GSEL_KPL, 4(%esp)	/* Check the code segment */
 	jne	norm_ill
 	testl	$PSL_VM, 8(%esp)	/* and vm86 mode. */
 	jnz	norm_ill
 
 	/*
 	 * This is a kernel instruction fault that might have been caused
 	 * by a DTrace provider.
 	 */
 	pushal
 	cld
 
 	/*
 	 * Set our jump address for the jump back in the event that
 	 * the exception wasn't caused by DTrace at all.
 	 */
 	movl	$norm_ill, dtrace_invop_calltrap_addr
 
 	/* Jump to the code hooked in by DTrace. */
 	jmpl	*dtrace_invop_jump_addr
 
 	/*
 	 * Process the instruction fault in the normal way.
 	 */
 norm_ill:
 	pushl $0
 	TRAP(T_PRIVINFLT)
 #endif
 
 /*
- * Call gate entry for syscalls (lcall 7,0).
- * This is used by FreeBSD 1.x a.out executables and "old" NetBSD executables.
- *
- * The intersegment call has been set up to specify one dummy parameter.
- * This leaves a place to put eflags so that the call frame can be
- * converted to a trap frame. Note that the eflags is (semi-)bogusly
- * pushed into (what will be) tf_err and then copied later into the
- * final spot. It has to be done this way because esp can't be just
- * temporarily altered for the pushfl - an interrupt might come in
- * and clobber the saved cs/eip.
- */
-	SUPERALIGN_TEXT
-IDTVEC(lcall_syscall)
-	pushfl				/* save eflags */
-	popl	8(%esp)			/* shuffle into tf_eflags */
-	pushl	$7			/* sizeof "lcall 7,0" */
-	pushl	$0			/* tf_trapno */
-	pushal
-	pushl	$0
-	movw	%ds,(%esp)
-	pushl	$0
-	movw	%es,(%esp)
-	pushl	$0
-	movw	%fs,(%esp)
-	SET_KERNEL_SREGS
-	cld
-	FAKE_MCOUNT(TF_EIP(%esp))
-	pushl	%esp
-	call	syscall
-	add	$4, %esp
-	MEXITCOUNT
-	jmp	doreti
-
-/*
  * Trap gate entry for syscalls (int 0x80).
  * This is used by FreeBSD ELF executables, "new" NetBSD executables, and all
  * Linux executables.
  *
  * Even though the name says 'int0x80', this is actually a trap gate, not an
  * interrupt gate.  Thus interrupts are enabled on entry just as they are for
  * a normal syscall.
  */
 	SUPERALIGN_TEXT
 IDTVEC(int0x80_syscall)
 	pushl	$2			/* sizeof "int 0x80" */
 	pushl	$0			/* tf_trapno */
 	pushal
 	pushl	$0
 	movw	%ds,(%esp)
 	pushl	$0
 	movw	%es,(%esp)
 	pushl	$0
 	movw	%fs,(%esp)
 	SET_KERNEL_SREGS
+	sti
 	cld
 	FAKE_MCOUNT(TF_EIP(%esp))
 	pushl	%esp
 	call	syscall
 	add	$4, %esp
 	MEXITCOUNT
 	jmp	doreti
 
 ENTRY(fork_trampoline)
 	pushl	%esp			/* trapframe pointer */
 	pushl	%ebx			/* arg1 */
 	pushl	%esi			/* function */
 	call	fork_exit
 	addl	$12,%esp
 	/* cut from syscall */
 
 	/*
 	 * Return via doreti to handle ASTs.
 	 */
 	MEXITCOUNT
 	jmp	doreti
 
 
 /*
  * To efficiently implement classification of trap and interrupt handlers
  * for profiling, there must be only trap handlers between the labels btrap
  * and bintr, and only interrupt handlers between the labels bintr and
  * eintr.  This is implemented (partly) by including files that contain
  * some of the handlers.  Before including the files, set up a normal asm
  * environment so that the included files doen't need to know that they are
  * included.
  */
 
 	.data
 	.p2align 4
 	.text
 	SUPERALIGN_TEXT
 MCOUNT_LABEL(bintr)
 
 #ifdef DEV_ATPIC
 #include <i386/i386/atpic_vector.s>
 #endif
 
 #if defined(DEV_APIC) && defined(DEV_ATPIC)
 	.data
 	.p2align 4
 	.text
 	SUPERALIGN_TEXT
 #endif
 
 #ifdef DEV_APIC
 #include <i386/i386/apic_vector.s>
 #endif
 
 	.data
 	.p2align 4
 	.text
 	SUPERALIGN_TEXT
 #include <i386/i386/vm86bios.s>
 
 	.text
 MCOUNT_LABEL(eintr)
 
 /*
  * void doreti(struct trapframe)
  *
  * Handle return from interrupts, traps and syscalls.
  */
 	.text
 	SUPERALIGN_TEXT
 	.type	doreti,@function
 	.globl	doreti
 doreti:
 	FAKE_MCOUNT($bintr)		/* init "from" bintr -> doreti */
 doreti_next:
 	/*
 	 * Check if ASTs can be handled now.  ASTs cannot be safely
 	 * processed when returning from an NMI.
 	 */
 	cmpb	$T_NMI,TF_TRAPNO(%esp)
 #ifdef HWPMC_HOOKS
 	je	doreti_nmi
 #else
-	je	doreti_exit
+	je	doreti_notvm86
 #endif
 	/*
 	 * PSL_VM must be checked first since segment registers only
 	 * have an RPL in non-VM86 mode.
 	 * ASTs can not be handled now if we are in a vm86 call.
 	 */
 	testl	$PSL_VM,TF_EFLAGS(%esp)
 	jz	doreti_notvm86
 	movl	PCPU(CURPCB),%ecx
 	testl	$PCB_VM86CALL,PCB_FLAGS(%ecx)
 	jz	doreti_ast
 	jmp	doreti_exit
 
 doreti_notvm86:
 	testb	$SEL_RPL_MASK,TF_CS(%esp) /* are we returning to user mode? */
-	jz	doreti_exit		/* can't handle ASTs now if not */
+	jz	doreti_nosegs		/* can't handle ASTs now if not */
 
 doreti_ast:
 	/*
 	 * Check for ASTs atomically with returning.  Disabling CPU
 	 * interrupts provides sufficient locking even in the SMP case,
 	 * since we will be informed of any new ASTs by an IPI.
 	 */
 	cli
 	movl	PCPU(CURTHREAD),%eax
 	testl	$TDF_ASTPENDING | TDF_NEEDRESCHED,TD_FLAGS(%eax)
 	je	doreti_exit
 	sti
 	pushl	%esp			/* pass a pointer to the trapframe */
 	call	ast
 	add	$4,%esp
 	jmp	doreti_ast
 
 	/*
 	 * doreti_exit:	pop registers, iret.
 	 *
 	 *	The segment register pop is a special case, since it may
 	 *	fault if (for example) a sigreturn specifies bad segment
 	 *	registers.  The fault is handled in trap.c.
 	 */
 doreti_exit:
 	MEXITCOUNT
 
 	.globl	doreti_popl_fs
 doreti_popl_fs:
 	popl	%fs
 	.globl	doreti_popl_es
 doreti_popl_es:
 	popl	%es
 	.globl	doreti_popl_ds
 doreti_popl_ds:
 	popl	%ds
+	jmp	doreti_iret_popal
+
+doreti_nosegs:
+	MEXITCOUNT
+	addl	$12,%esp
+doreti_iret_popal:
 	popal
 	addl	$8,%esp
 	.globl	doreti_iret
 doreti_iret:
 	iret
 
 	/*
 	 * doreti_iret_fault and friends.  Alternative return code for
 	 * the case where we get a fault in the doreti_exit code
 	 * above.  trap() (i386/i386/trap.c) catches this specific
 	 * case, sends the process a signal and continues in the
 	 * corresponding place in the code below.
 	 */
 	ALIGN_TEXT
 	.globl	doreti_iret_fault
 doreti_iret_fault:
 	subl	$8,%esp
 	pushal
 	pushl	$0
 	movw	%ds,(%esp)
 	.globl	doreti_popl_ds_fault
 doreti_popl_ds_fault:
 	pushl	$0
 	movw	%es,(%esp)
 	.globl	doreti_popl_es_fault
 doreti_popl_es_fault:
 	pushl	$0
 	movw	%fs,(%esp)
 	.globl	doreti_popl_fs_fault
 doreti_popl_fs_fault:
 	sti
 	movl	$0,TF_ERR(%esp)	/* XXX should be the error code */
 	movl	$T_PROTFLT,TF_TRAPNO(%esp)
 	jmp	alltraps_with_regs_pushed
 #ifdef HWPMC_HOOKS
 doreti_nmi:
 	/*
 	 * Since we are returning from an NMI, check if the current trap
 	 * was from user mode and if so whether the current thread
 	 * needs a user call chain capture.
 	 */
 	testb	$SEL_RPL_MASK,TF_CS(%esp)
-	jz	doreti_exit
+	jz	doreti_nosegs
 	movl	PCPU(CURTHREAD),%eax	/* curthread present? */
 	orl	%eax,%eax
 	jz	doreti_exit
 	testl	$TDP_CALLCHAIN,TD_PFLAGS(%eax) /* flagged for capture? */
 	jz	doreti_exit
 	/*
 	 * Take the processor out of NMI mode by executing a fake "iret".
 	 */
 	pushfl
 	pushl	%cs
 	pushl	$outofnmi
 	iret
 outofnmi:
 	/*
 	 * Call the callchain capture hook after turning interrupts back on.
 	 */
 	movl	pmc_hook,%ecx
 	orl	%ecx,%ecx
 	jz	doreti_exit
 	pushl	%esp			/* frame pointer */
 	pushl	$PMC_FN_USER_CALLCHAIN	/* command */
 	movl	PCPU(CURTHREAD),%eax
 	pushl	%eax			/* curthread */
 	sti
 	call	*%ecx
 	addl	$12,%esp
 	jmp	doreti_ast
 	ENTRY(end_exceptions)
 #endif
Index: head/sys/i386/i386/locore.s
===================================================================
--- head/sys/i386/i386/locore.s	(revision 323721)
+++ head/sys/i386/i386/locore.s	(revision 323722)
@@ -1,866 +1,907 @@
 /*-
  * Copyright (c) 1990 The Regents of the University of California.
  * All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * William Jolitz.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	from: @(#)locore.s	7.3 (Berkeley) 5/13/91
  * $FreeBSD$
  *
  *		originally from: locore.s, by William F. Jolitz
  *
  *		Substantially rewritten by David Greenman, Rod Grimes,
  *			Bruce Evans, Wolfgang Solfrank, Poul-Henning Kamp
  *			and many others.
  */
 
 #include "opt_bootp.h"
 #include "opt_compat.h"
 #include "opt_nfsroot.h"
 #include "opt_pmap.h"
 
 #include <sys/syscall.h>
 #include <sys/reboot.h>
 
 #include <machine/asmacros.h>
 #include <machine/cputypes.h>
 #include <machine/psl.h>
 #include <machine/pmap.h>
 #include <machine/specialreg.h>
 
 #include "assym.s"
 
 /*
  *	XXX
  *
  * Note: This version greatly munged to avoid various assembler errors
  * that may be fixed in newer versions of gas. Perhaps newer versions
  * will have more pleasant appearance.
  */
 
 /*
  * PTmap is recursive pagemap at top of virtual address space.
  * Within PTmap, the page directory can be found (third indirection).
  */
 	.globl	PTmap,PTD,PTDpde
 	.set	PTmap,(PTDPTDI << PDRSHIFT)
 	.set	PTD,PTmap + (PTDPTDI * PAGE_SIZE)
 	.set	PTDpde,PTD + (PTDPTDI * PDESIZE)
 
 /*
  * Compiled KERNBASE location and the kernel load address
  */
 	.globl	kernbase
 	.set	kernbase,KERNBASE
 	.globl	kernload
 	.set	kernload,KERNLOAD
 
 /*
  * Globals
  */
 	.data
 	ALIGN_DATA			/* just to be sure */
 
 	.space	0x2000			/* space for tmpstk - temporary stack */
 tmpstk:
 
 	.globl	bootinfo
 bootinfo:	.space	BOOTINFO_SIZE	/* bootinfo that we can handle */
 
 		.globl KERNend
 KERNend:	.long	0		/* phys addr end of kernel (just after bss) */
 physfree:	.long	0		/* phys addr of next free page */
 
 	.globl	IdlePTD
 IdlePTD:	.long	0		/* phys addr of kernel PTD */
 
 #if defined(PAE) || defined(PAE_TABLES)
 	.globl	IdlePDPT
 IdlePDPT:	.long	0		/* phys addr of kernel PDPT */
 #endif
 
 	.globl	KPTmap
 KPTmap:		.long	0		/* address of kernel page tables */
 
 	.globl	KPTphys
 KPTphys:	.long	0		/* phys addr of kernel page tables */
 
 	.globl	proc0kstack
 proc0kstack:	.long	0		/* address of proc 0 kstack space */
 p0kpa:		.long	0		/* phys addr of proc0's STACK */
 
 vm86phystk:	.long	0		/* PA of vm86/bios stack */
 
 	.globl	vm86paddr, vm86pa
 vm86paddr:	.long	0		/* address of vm86 region */
 vm86pa:		.long	0		/* phys addr of vm86 region */
 
 /**********************************************************************
  *
  * Some handy macros
  *
  */
 
 #define R(foo) ((foo)-KERNBASE)
 
 #define ALLOCPAGES(foo) \
 	movl	R(physfree), %esi ; \
 	movl	$((foo)*PAGE_SIZE), %eax ; \
 	addl	%esi, %eax ; \
 	movl	%eax, R(physfree) ; \
 	movl	%esi, %edi ; \
 	movl	$((foo)*PAGE_SIZE),%ecx ; \
 	xorl	%eax,%eax ; \
 	cld ; \
 	rep ; \
 	stosb
 
 /*
  * fillkpt
  *	eax = page frame address
  *	ebx = index into page table
  *	ecx = how many pages to map
  * 	base = base address of page dir/table
  *	prot = protection bits
  */
 #define	fillkpt(base, prot)		  \
 	shll	$PTESHIFT,%ebx		; \
 	addl	base,%ebx		; \
 	orl	$PG_V,%eax		; \
 	orl	prot,%eax		; \
 1:	movl	%eax,(%ebx)		; \
 	addl	$PAGE_SIZE,%eax		; /* increment physical address */ \
 	addl	$PTESIZE,%ebx		; /* next pte */ \
 	loop	1b
 
 /*
  * fillkptphys(prot)
  *	eax = physical address
  *	ecx = how many pages to map
  *	prot = protection bits
  */
 #define	fillkptphys(prot)		  \
 	movl	%eax, %ebx		; \
 	shrl	$PAGE_SHIFT, %ebx	; \
 	fillkpt(R(KPTphys), prot)
 
 	.text
 /**********************************************************************
  *
  * This is where the bootblocks start us, set the ball rolling...
  *
  */
 NON_GPROF_ENTRY(btext)
 
 /* Tell the bios to warmboot next time */
 	movw	$0x1234,0x472
 
 /* Set up a real frame in case the double return in newboot is executed. */
 	pushl	%ebp
 	movl	%esp, %ebp
 
 /* Don't trust what the BIOS gives for eflags. */
 	pushl	$PSL_KERNEL
 	popfl
 
 /*
  * Don't trust what the BIOS gives for %fs and %gs.  Trust the bootstrap
  * to set %cs, %ds, %es and %ss.
  */
 	mov	%ds, %ax
 	mov	%ax, %fs
 	mov	%ax, %gs
 
 /*
  * Clear the bss.  Not all boot programs do it, and it is our job anyway.
  *
  * XXX we don't check that there is memory for our bss and page tables
  * before using it.
  *
  * Note: we must be careful to not overwrite an active gdt or idt.  They
  * inactive from now until we switch to new ones, since we don't load any
  * more segment registers or permit interrupts until after the switch.
  */
 	movl	$R(end),%ecx
 	movl	$R(edata),%edi
 	subl	%edi,%ecx
 	xorl	%eax,%eax
 	cld
 	rep
 	stosb
 
 	call	recover_bootinfo
 
 /* Get onto a stack that we can trust. */
 /*
  * XXX this step is delayed in case recover_bootinfo needs to return via
  * the old stack, but it need not be, since recover_bootinfo actually
  * returns via the old frame.
  */
 	movl	$R(tmpstk),%esp
 
 	call	identify_cpu
 	call	create_pagetables
 
 /*
  * If the CPU has support for VME, turn it on.
  */ 
 	testl	$CPUID_VME, R(cpu_feature)
 	jz	1f
 	movl	%cr4, %eax
 	orl	$CR4_VME, %eax
 	movl	%eax, %cr4
 1:
 
 /* Now enable paging */
 #if defined(PAE) || defined(PAE_TABLES)
 	movl	R(IdlePDPT), %eax
 	movl	%eax, %cr3
 	movl	%cr4, %eax
 	orl	$CR4_PAE, %eax
 	movl	%eax, %cr4
 #else
 	movl	R(IdlePTD), %eax
 	movl	%eax,%cr3		/* load ptd addr into mmu */
 #endif
 	movl	%cr0,%eax		/* get control word */
 	orl	$CR0_PE|CR0_PG,%eax	/* enable paging */
 	movl	%eax,%cr0		/* and let's page NOW! */
 
 	pushl	$begin			/* jump to high virtualized address */
 	ret
 
 /* now running relocated at KERNBASE where the system is linked to run */
 begin:
 	/* set up bootstrap stack */
 	movl	proc0kstack,%eax	/* location of in-kernel stack */
 
 	/*
 	 * Only use bottom page for init386().  init386() calculates the
 	 * PCB + FPU save area size and returns the true top of stack.
 	 */
 	leal	PAGE_SIZE(%eax),%esp
 
 	xorl	%ebp,%ebp		/* mark end of frames */
 
 	pushl	physfree		/* value of first for init386(first) */
 	call	init386			/* wire 386 chip for unix operation */
 
 	/*
 	 * Clean up the stack in a way that db_numargs() understands, so
 	 * that backtraces in ddb don't underrun the stack.  Traps for
 	 * inaccessible memory are more fatal than usual this early.
 	 */
 	addl	$4,%esp
 
 	/* Switch to true top of stack. */
 	movl	%eax,%esp
 
 	call	mi_startup		/* autoconfiguration, mountroot etc */
 	/* NOTREACHED */
 	addl	$0,%esp			/* for db_numargs() again */
 
 /*
  * Signal trampoline, copied to top of user stack
  */
 NON_GPROF_ENTRY(sigcode)
 	calll	*SIGF_HANDLER(%esp)
 	leal	SIGF_UC(%esp),%eax	/* get ucontext */
 	pushl	%eax
 	testl	$PSL_VM,UC_EFLAGS(%eax)
 	jne	1f
 	mov	UC_GS(%eax),%gs		/* restore %gs */
 1:
 	movl	$SYS_sigreturn,%eax
 	pushl	%eax			/* junk to fake return addr. */
 	int	$0x80			/* enter kernel with args */
 					/* on stack */
 1:
 	jmp	1b
 
 #ifdef COMPAT_FREEBSD4
 	ALIGN_TEXT
 freebsd4_sigcode:
 	calll	*SIGF_HANDLER(%esp)
 	leal	SIGF_UC4(%esp),%eax	/* get ucontext */
 	pushl	%eax
 	testl	$PSL_VM,UC4_EFLAGS(%eax)
 	jne	1f
 	mov	UC4_GS(%eax),%gs	/* restore %gs */
 1:
 	movl	$344,%eax		/* 4.x SYS_sigreturn */
 	pushl	%eax			/* junk to fake return addr. */
 	int	$0x80			/* enter kernel with args */
 					/* on stack */
 1:
 	jmp	1b
 #endif
 
 #ifdef COMPAT_43
 	ALIGN_TEXT
 osigcode:
 	call	*SIGF_HANDLER(%esp)	/* call signal handler */
 	lea	SIGF_SC(%esp),%eax	/* get sigcontext */
 	pushl	%eax
 	testl	$PSL_VM,SC_PS(%eax)
 	jne	9f
 	mov	SC_GS(%eax),%gs		/* restore %gs */
 9:
 	movl	$103,%eax		/* 3.x SYS_sigreturn */
 	pushl	%eax			/* junk to fake return addr. */
 	int	$0x80			/* enter kernel with args */
 0:	jmp	0b
+
+/*
+ * The lcall $7,$0 handler cannot use the call gate that does an
+ * inter-privilege transition. The reason is that the call gate
+ * does not disable interrupts, and, before the kernel segment registers
+ * are loaded, we would have a window where the ring 0 code is
+ * executed with the wrong segments.
+ *
+ * Instead, set LDT descriptor 0 as code segment, which reflects
+ * the lcall $7,$0 back to ring 3 trampoline.  The trampoline sets up
+ * the frame for int $0x80.
+ */
+	ALIGN_TEXT
+lcall_tramp:
+	cmpl	$SYS_vfork,%eax
+	je	1f
+	pushl	%ebp
+	movl	%esp,%ebp
+	pushl	0x24(%ebp) /* arg 6 */
+	pushl	0x20(%ebp)
+	pushl	0x1c(%ebp)
+	pushl	0x18(%ebp)
+	pushl	0x14(%ebp)
+	pushl	0x10(%ebp) /* arg 1 */
+	subl	$4,%esp   /* gap */
+	int	$0x80
+	leavel
+	lretl
+1:
+	/*
+	 * vfork handling is special and relies on the libc stub saving
+	 * the return ip in %ecx.  Also, we assume that the call was done
+	 * with ucode32 selector in %cs.
+	 */
+	int	$0x80
+	movl	$0x33,4(%esp)	/* GUCODE32_SEL | SEL_UPL */
+	movl	%ecx,(%esp)
+	lretl
 #endif /* COMPAT_43 */
 
 	ALIGN_TEXT
 esigcode:
 
 	.data
 	.globl	szsigcode
 szsigcode:
 	.long	esigcode-sigcode
 #ifdef COMPAT_FREEBSD4
 	.globl	szfreebsd4_sigcode
 szfreebsd4_sigcode:
 	.long	esigcode-freebsd4_sigcode
 #endif
 #ifdef COMPAT_43
 	.globl	szosigcode
 szosigcode:
 	.long	esigcode-osigcode
+	.globl	szlcallcode
+szlcallcode:
+	.long	esigcode-lcall_tramp
 #endif
 	.text
 
 /**********************************************************************
  *
  * Recover the bootinfo passed to us from the boot program
  *
  */
 recover_bootinfo:
 	/*
 	 * This code is called in different ways depending on what loaded
 	 * and started the kernel.  This is used to detect how we get the
 	 * arguments from the other code and what we do with them.
 	 *
 	 * Old disk boot blocks:
 	 *	(*btext)(howto, bootdev, cyloffset, esym);
 	 *	[return address == 0, and can NOT be returned to]
 	 *	[cyloffset was not supported by the FreeBSD boot code
 	 *	 and always passed in as 0]
 	 *	[esym is also known as total in the boot code, and
 	 *	 was never properly supported by the FreeBSD boot code]
 	 *
 	 * Old diskless netboot code:
 	 *	(*btext)(0,0,0,0,&nfsdiskless,0,0,0);
 	 *	[return address != 0, and can NOT be returned to]
 	 *	If we are being booted by this code it will NOT work,
 	 *	so we are just going to halt if we find this case.
 	 *
 	 * New uniform boot code:
 	 *	(*btext)(howto, bootdev, 0, 0, 0, &bootinfo)
 	 *	[return address != 0, and can be returned to]
 	 *
 	 * There may seem to be a lot of wasted arguments in here, but
 	 * that is so the newer boot code can still load very old kernels
 	 * and old boot code can load new kernels.
 	 */
 
 	/*
 	 * The old style disk boot blocks fake a frame on the stack and
 	 * did an lret to get here.  The frame on the stack has a return
 	 * address of 0.
 	 */
 	cmpl	$0,4(%ebp)
 	je	olddiskboot
 
 	/*
 	 * We have some form of return address, so this is either the
 	 * old diskless netboot code, or the new uniform code.  That can
 	 * be detected by looking at the 5th argument, if it is 0
 	 * we are being booted by the new uniform boot code.
 	 */
 	cmpl	$0,24(%ebp)
 	je	newboot
 
 	/*
 	 * Seems we have been loaded by the old diskless boot code, we
 	 * don't stand a chance of running as the diskless structure
 	 * changed considerably between the two, so just halt.
 	 */
 	 hlt
 
 	/*
 	 * We have been loaded by the new uniform boot code.
 	 * Let's check the bootinfo version, and if we do not understand
 	 * it we return to the loader with a status of 1 to indicate this error
 	 */
 newboot:
 	movl	28(%ebp),%ebx		/* &bootinfo.version */
 	movl	BI_VERSION(%ebx),%eax
 	cmpl	$1,%eax			/* We only understand version 1 */
 	je	1f
 	movl	$1,%eax			/* Return status */
 	leave
 	/*
 	 * XXX this returns to our caller's caller (as is required) since
 	 * we didn't set up a frame and our caller did.
 	 */
 	ret
 
 1:
 	/*
 	 * If we have a kernelname copy it in
 	 */
 	movl	BI_KERNELNAME(%ebx),%esi
 	cmpl	$0,%esi
 	je	2f			/* No kernelname */
 	movl	$MAXPATHLEN,%ecx	/* Brute force!!! */
 	movl	$R(kernelname),%edi
 	cmpb	$'/',(%esi)		/* Make sure it starts with a slash */
 	je	1f
 	movb	$'/',(%edi)
 	incl	%edi
 	decl	%ecx
 1:
 	cld
 	rep
 	movsb
 
 2:
 	/*
 	 * Determine the size of the boot loader's copy of the bootinfo
 	 * struct.  This is impossible to do properly because old versions
 	 * of the struct don't contain a size field and there are 2 old
 	 * versions with the same version number.
 	 */
 	movl	$BI_ENDCOMMON,%ecx	/* prepare for sizeless version */
 	testl	$RB_BOOTINFO,8(%ebp)	/* bi_size (and bootinfo) valid? */
 	je	got_bi_size		/* no, sizeless version */
 	movl	BI_SIZE(%ebx),%ecx
 got_bi_size:
 
 	/*
 	 * Copy the common part of the bootinfo struct
 	 */
 	movl	%ebx,%esi
 	movl	$R(bootinfo),%edi
 	cmpl	$BOOTINFO_SIZE,%ecx
 	jbe	got_common_bi_size
 	movl	$BOOTINFO_SIZE,%ecx
 got_common_bi_size:
 	cld
 	rep
 	movsb
 
 #ifdef NFS_ROOT
 #ifndef BOOTP_NFSV3
 	/*
 	 * If we have a nfs_diskless structure copy it in
 	 */
 	movl	BI_NFS_DISKLESS(%ebx),%esi
 	cmpl	$0,%esi
 	je	olddiskboot
 	movl	$R(nfs_diskless),%edi
 	movl	$NFSDISKLESS_SIZE,%ecx
 	cld
 	rep
 	movsb
 	movl	$R(nfs_diskless_valid),%edi
 	movl	$1,(%edi)
 #endif
 #endif
 
 	/*
 	 * The old style disk boot.
 	 *	(*btext)(howto, bootdev, cyloffset, esym);
 	 * Note that the newer boot code just falls into here to pick
 	 * up howto and bootdev, cyloffset and esym are no longer used
 	 */
 olddiskboot:
 	movl	8(%ebp),%eax
 	movl	%eax,R(boothowto)
 	movl	12(%ebp),%eax
 	movl	%eax,R(bootdev)
 
 	ret
 
 
 /**********************************************************************
  *
  * Identify the CPU and initialize anything special about it
  *
  */
 identify_cpu:
 
 	/* Try to toggle alignment check flag; does not exist on 386. */
 	pushfl
 	popl	%eax
 	movl	%eax,%ecx
 	orl	$PSL_AC,%eax
 	pushl	%eax
 	popfl
 	pushfl
 	popl	%eax
 	xorl	%ecx,%eax
 	andl	$PSL_AC,%eax
 	pushl	%ecx
 	popfl
 
 	testl	%eax,%eax
 	jnz	try486
 
 	/* NexGen CPU does not have aligment check flag. */
 	pushfl
 	movl	$0x5555, %eax
 	xorl	%edx, %edx
 	movl	$2, %ecx
 	clc
 	divl	%ecx
 	jz	trynexgen
 	popfl
 	movl	$CPU_386,R(cpu)
 	jmp	3f
 
 trynexgen:
 	popfl
 	movl	$CPU_NX586,R(cpu)
 	movl	$0x4778654e,R(cpu_vendor)	# store vendor string
 	movl	$0x72446e65,R(cpu_vendor+4)
 	movl	$0x6e657669,R(cpu_vendor+8)
 	movl	$0,R(cpu_vendor+12)
 	jmp	3f
 
 try486:	/* Try to toggle identification flag; does not exist on early 486s. */
 	pushfl
 	popl	%eax
 	movl	%eax,%ecx
 	xorl	$PSL_ID,%eax
 	pushl	%eax
 	popfl
 	pushfl
 	popl	%eax
 	xorl	%ecx,%eax
 	andl	$PSL_ID,%eax
 	pushl	%ecx
 	popfl
 
 	testl	%eax,%eax
 	jnz	trycpuid
 	movl	$CPU_486,R(cpu)
 
 	/*
 	 * Check Cyrix CPU
 	 * Cyrix CPUs do not change the undefined flags following
 	 * execution of the divide instruction which divides 5 by 2.
 	 *
 	 * Note: CPUID is enabled on M2, so it passes another way.
 	 */
 	pushfl
 	movl	$0x5555, %eax
 	xorl	%edx, %edx
 	movl	$2, %ecx
 	clc
 	divl	%ecx
 	jnc	trycyrix
 	popfl
 	jmp	3f		/* You may use Intel CPU. */
 
 trycyrix:
 	popfl
 	/*
 	 * IBM Bluelighting CPU also doesn't change the undefined flags.
 	 * Because IBM doesn't disclose the information for Bluelighting
 	 * CPU, we couldn't distinguish it from Cyrix's (including IBM
 	 * brand of Cyrix CPUs).
 	 */
 	movl	$0x69727943,R(cpu_vendor)	# store vendor string
 	movl	$0x736e4978,R(cpu_vendor+4)
 	movl	$0x64616574,R(cpu_vendor+8)
 	jmp	3f
 
 trycpuid:	/* Use the `cpuid' instruction. */
 	xorl	%eax,%eax
 	cpuid					# cpuid 0
 	movl	%eax,R(cpu_high)		# highest capability
 	movl	%ebx,R(cpu_vendor)		# store vendor string
 	movl	%edx,R(cpu_vendor+4)
 	movl	%ecx,R(cpu_vendor+8)
 	movb	$0,R(cpu_vendor+12)
 
 	movl	$1,%eax
 	cpuid					# cpuid 1
 	movl	%eax,R(cpu_id)			# store cpu_id
 	movl	%ebx,R(cpu_procinfo)		# store cpu_procinfo
 	movl	%edx,R(cpu_feature)		# store cpu_feature
 	movl	%ecx,R(cpu_feature2)		# store cpu_feature2
 	rorl	$8,%eax				# extract family type
 	andl	$15,%eax
 	cmpl	$5,%eax
 	jae	1f
 
 	/* less than Pentium; must be 486 */
 	movl	$CPU_486,R(cpu)
 	jmp	3f
 1:
 	/* a Pentium? */
 	cmpl	$5,%eax
 	jne	2f
 	movl	$CPU_586,R(cpu)
 	jmp	3f
 2:
 	/* Greater than Pentium...call it a Pentium Pro */
 	movl	$CPU_686,R(cpu)
 3:
 	ret
 
 
 /**********************************************************************
  *
  * Create the first page directory and its page tables.
  *
  */
 
 create_pagetables:
 
 /* Find end of kernel image (rounded up to a page boundary). */
 	movl	$R(_end),%esi
 
 /* Include symbols, if any. */
 	movl	R(bootinfo+BI_ESYMTAB),%edi
 	testl	%edi,%edi
 	je	over_symalloc
 	movl	%edi,%esi
 	movl	$KERNBASE,%edi
 	addl	%edi,R(bootinfo+BI_SYMTAB)
 	addl	%edi,R(bootinfo+BI_ESYMTAB)
 over_symalloc:
 
 /* If we are told where the end of the kernel space is, believe it. */
 	movl	R(bootinfo+BI_KERNEND),%edi
 	testl	%edi,%edi
 	je	no_kernend
 	movl	%edi,%esi
 no_kernend:
 
 	addl	$PDRMASK,%esi		/* Play conservative for now, and */
 	andl	$~PDRMASK,%esi		/*   ... wrap to next 4M. */
 	movl	%esi,R(KERNend)		/* save end of kernel */
 	movl	%esi,R(physfree)	/* next free page is at end of kernel */
 
 /* Allocate Kernel Page Tables */
 	ALLOCPAGES(NKPT)
 	movl	%esi,R(KPTphys)
 	addl	$(KERNBASE-(KPTDI<<(PDRSHIFT-PAGE_SHIFT+PTESHIFT))),%esi
 	movl	%esi,R(KPTmap)
 
 /* Allocate Page Table Directory */
 #if defined(PAE) || defined(PAE_TABLES)
 	/* XXX only need 32 bytes (easier for now) */
 	ALLOCPAGES(1)
 	movl	%esi,R(IdlePDPT)
 #endif
 	ALLOCPAGES(NPGPTD)
 	movl	%esi,R(IdlePTD)
 
 /* Allocate KSTACK */
 	ALLOCPAGES(TD0_KSTACK_PAGES)
 	movl	%esi,R(p0kpa)
 	addl	$KERNBASE, %esi
 	movl	%esi, R(proc0kstack)
 
 	ALLOCPAGES(1)			/* vm86/bios stack */
 	movl	%esi,R(vm86phystk)
 
 	ALLOCPAGES(3)			/* pgtable + ext + IOPAGES */
 	movl	%esi,R(vm86pa)
 	addl	$KERNBASE, %esi
 	movl	%esi, R(vm86paddr)
 
 /*
  * Enable PSE and PGE.
  */
 #ifndef DISABLE_PSE
 	testl	$CPUID_PSE, R(cpu_feature)
 	jz	1f
 	movl	$PG_PS, R(pseflag)
 	movl	%cr4, %eax
 	orl	$CR4_PSE, %eax
 	movl	%eax, %cr4
 1:
 #endif
 #ifndef DISABLE_PG_G
 	testl	$CPUID_PGE, R(cpu_feature)
 	jz	2f
 	movl	$PG_G, R(pgeflag)
 	movl	%cr4, %eax
 	orl	$CR4_PGE, %eax
 	movl	%eax, %cr4
 2:
 #endif
 
 /*
  * Initialize page table pages mapping physical address zero through the
  * end of the kernel.  All of the page table entries allow read and write
  * access.  Write access to the first physical page is required by bios32
  * calls, and write access to the first 1 MB of physical memory is required
  * by ACPI for implementing suspend and resume.  We do this even
  * if we've enabled PSE above, we'll just switch the corresponding kernel
  * PDEs before we turn on paging.
  *
  * XXX: We waste some pages here in the PSE case!
  */
 	xorl	%eax, %eax
 	movl	R(KERNend),%ecx
 	shrl	$PAGE_SHIFT,%ecx
 	fillkptphys($PG_RW)
 
 /* Map page table pages. */
 	movl	R(KPTphys),%eax
 	movl	$NKPT,%ecx
 	fillkptphys($PG_RW)
 
 /* Map page directory. */
 #if defined(PAE) || defined(PAE_TABLES)
 	movl	R(IdlePDPT), %eax
 	movl	$1, %ecx
 	fillkptphys($PG_RW)
 #endif
 
 	movl	R(IdlePTD), %eax
 	movl	$NPGPTD, %ecx
 	fillkptphys($PG_RW)
 
 /* Map proc0's KSTACK in the physical way ... */
 	movl	R(p0kpa), %eax
 	movl	$(TD0_KSTACK_PAGES), %ecx
 	fillkptphys($PG_RW)
 
 /* Map ISA hole */
 	movl	$ISA_HOLE_START, %eax
 	movl	$ISA_HOLE_LENGTH>>PAGE_SHIFT, %ecx
 	fillkptphys($PG_RW)
 
 /* Map space for the vm86 region */
 	movl	R(vm86phystk), %eax
 	movl	$4, %ecx
 	fillkptphys($PG_RW)
 
 /* Map page 0 into the vm86 page table */
 	movl	$0, %eax
 	movl	$0, %ebx
 	movl	$1, %ecx
 	fillkpt(R(vm86pa), $PG_RW|PG_U)
 
 /* ...likewise for the ISA hole */
 	movl	$ISA_HOLE_START, %eax
 	movl	$ISA_HOLE_START>>PAGE_SHIFT, %ebx
 	movl	$ISA_HOLE_LENGTH>>PAGE_SHIFT, %ecx
 	fillkpt(R(vm86pa), $PG_RW|PG_U)
 
 /*
  * Create an identity mapping for low physical memory, including the kernel.
  * The part of this mapping that covers the first 1 MB of physical memory
  * becomes a permanent part of the kernel's address space.  The rest of this
  * mapping is destroyed in pmap_bootstrap().  Ordinarily, the same page table
  * pages are shared by the identity mapping and the kernel's native mapping.
  * However, the permanent identity mapping cannot contain PG_G mappings.
  * Thus, if the kernel is loaded within the permanent identity mapping, that
  * page table page must be duplicated and not shared.
  *
  * N.B. Due to errata concerning large pages and physical address zero,
  * a PG_PS mapping is not used.
  */
 	movl	R(KPTphys), %eax
 	xorl	%ebx, %ebx
 	movl	$NKPT, %ecx
 	fillkpt(R(IdlePTD), $PG_RW)
 #if KERNLOAD < (1 << PDRSHIFT)
 	testl	$PG_G, R(pgeflag)
 	jz	1f
 	ALLOCPAGES(1)
 	movl	%esi, %edi
 	movl	R(IdlePTD), %eax
 	movl	(%eax), %esi
 	movl	%edi, (%eax)
 	movl	$PAGE_SIZE, %ecx
 	cld
 	rep
 	movsb
 1:	
 #endif
 
 /*
  * For the non-PSE case, install PDEs for PTs covering the KVA.
  * For the PSE case, do the same, but clobber the ones corresponding
  * to the kernel (from btext to KERNend) with 4M (2M for PAE) ('PS')
  * PDEs immediately after.
  */
 	movl	R(KPTphys), %eax
 	movl	$KPTDI, %ebx
 	movl	$NKPT, %ecx
 	fillkpt(R(IdlePTD), $PG_RW)
 	cmpl	$0,R(pseflag)
 	je	done_pde
 
 	movl	R(KERNend), %ecx
 	movl	$KERNLOAD, %eax
 	subl	%eax, %ecx
 	shrl	$PDRSHIFT, %ecx
 	movl	$(KPTDI+(KERNLOAD/(1 << PDRSHIFT))), %ebx
 	shll	$PDESHIFT, %ebx
 	addl	R(IdlePTD), %ebx
 	orl	$(PG_V|PG_RW|PG_PS), %eax
 1:	movl	%eax, (%ebx)
 	addl	$(1 << PDRSHIFT), %eax
 	addl	$PDESIZE, %ebx
 	loop	1b
 
 done_pde:
 /* install a pde recursively mapping page directory as a page table */
 	movl	R(IdlePTD), %eax
 	movl	$PTDPTDI, %ebx
 	movl	$NPGPTD,%ecx
 	fillkpt(R(IdlePTD), $PG_RW)
 
 #if defined(PAE) || defined(PAE_TABLES)
 	movl	R(IdlePTD), %eax
 	xorl	%ebx, %ebx
 	movl	$NPGPTD, %ecx
 	fillkpt(R(IdlePDPT), $0x0)
 #endif
 
 	ret
 
 #ifdef XENHVM
 /* Xen Hypercall page */
 	.text
 .p2align PAGE_SHIFT, 0x90	/* Hypercall_page needs to be PAGE aligned */
 
 NON_GPROF_ENTRY(hypercall_page)
 	.skip	0x1000, 0x90	/* Fill with "nop"s */
 #endif
Index: head/sys/i386/i386/machdep.c
===================================================================
--- head/sys/i386/i386/machdep.c	(revision 323721)
+++ head/sys/i386/i386/machdep.c	(revision 323722)
@@ -1,3088 +1,3094 @@
 /*-
  * Copyright (c) 1992 Terrence R. Lambert.
  * Copyright (c) 1982, 1987, 1990 The Regents of the University of California.
  * All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * William Jolitz.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	from: @(#)machdep.c	7.4 (Berkeley) 6/3/91
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_apic.h"
 #include "opt_atpic.h"
 #include "opt_compat.h"
 #include "opt_cpu.h"
 #include "opt_ddb.h"
 #include "opt_inet.h"
 #include "opt_isa.h"
 #include "opt_kstack_pages.h"
 #include "opt_maxmem.h"
 #include "opt_mp_watchdog.h"
 #include "opt_perfmon.h"
 #include "opt_platform.h"
 #include "opt_xbox.h"
 
 #include <sys/param.h>
 #include <sys/proc.h>
 #include <sys/systm.h>
 #include <sys/bio.h>
 #include <sys/buf.h>
 #include <sys/bus.h>
 #include <sys/callout.h>
 #include <sys/cons.h>
 #include <sys/cpu.h>
 #include <sys/eventhandler.h>
 #include <sys/exec.h>
 #include <sys/imgact.h>
 #include <sys/kdb.h>
 #include <sys/kernel.h>
 #include <sys/ktr.h>
 #include <sys/linker.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/memrange.h>
 #include <sys/msgbuf.h>
 #include <sys/mutex.h>
 #include <sys/pcpu.h>
 #include <sys/ptrace.h>
 #include <sys/reboot.h>
 #include <sys/rwlock.h>
 #include <sys/sched.h>
 #include <sys/signalvar.h>
 #ifdef SMP
 #include <sys/smp.h>
 #endif
 #include <sys/syscallsubr.h>
 #include <sys/sysctl.h>
 #include <sys/sysent.h>
 #include <sys/sysproto.h>
 #include <sys/ucontext.h>
 #include <sys/vmmeter.h>
 
 #include <vm/vm.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_page.h>
 #include <vm/vm_map.h>
 #include <vm/vm_object.h>
 #include <vm/vm_pager.h>
 #include <vm/vm_param.h>
 
 #ifdef DDB
 #ifndef KDB
 #error KDB must be enabled in order for DDB to work!
 #endif
 #include <ddb/ddb.h>
 #include <ddb/db_sym.h>
 #endif
 
 #include <isa/rtc.h>
 
 #include <net/netisr.h>
 
 #include <machine/bootinfo.h>
 #include <machine/clock.h>
 #include <machine/cpu.h>
 #include <machine/cputypes.h>
 #include <machine/intr_machdep.h>
 #include <x86/mca.h>
 #include <machine/md_var.h>
 #include <machine/metadata.h>
 #include <machine/mp_watchdog.h>
 #include <machine/pc/bios.h>
 #include <machine/pcb.h>
 #include <machine/pcb_ext.h>
 #include <machine/proc.h>
 #include <machine/reg.h>
 #include <machine/sigframe.h>
 #include <machine/specialreg.h>
 #include <machine/vm86.h>
 #include <x86/init.h>
 #ifdef PERFMON
 #include <machine/perfmon.h>
 #endif
 #ifdef SMP
 #include <machine/smp.h>
 #endif
 #ifdef FDT
 #include <x86/fdt.h>
 #endif
 
 #ifdef DEV_APIC
 #include <x86/apicvar.h>
 #endif
 
 #ifdef DEV_ISA
 #include <x86/isa/icu.h>
 #endif
 
 #ifdef XBOX
 #include <machine/xbox.h>
 
 int arch_i386_is_xbox = 0;
 uint32_t arch_i386_xbox_memsize = 0;
 #endif
 
 /* Sanity check for __curthread() */
 CTASSERT(offsetof(struct pcpu, pc_curthread) == 0);
 
 extern register_t init386(int first);
 extern void dblfault_handler(void);
 
 static void cpu_startup(void *);
 static void fpstate_drop(struct thread *td);
 static void get_fpcontext(struct thread *td, mcontext_t *mcp,
     char *xfpusave, size_t xfpusave_len);
 static int  set_fpcontext(struct thread *td, mcontext_t *mcp,
     char *xfpustate, size_t xfpustate_len);
 SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL);
 
 /* Intel ICH registers */
 #define ICH_PMBASE	0x400
 #define ICH_SMI_EN	ICH_PMBASE + 0x30
 
 int	_udatasel, _ucodesel;
 u_int	basemem;
 
 int cold = 1;
 
 #ifdef COMPAT_43
 static void osendsig(sig_t catcher, ksiginfo_t *, sigset_t *mask);
 #endif
 #ifdef COMPAT_FREEBSD4
 static void freebsd4_sendsig(sig_t catcher, ksiginfo_t *, sigset_t *mask);
 #endif
 
 long Maxmem = 0;
 long realmem = 0;
 
 #ifdef PAE
 FEATURE(pae, "Physical Address Extensions");
 #endif
 
 /*
  * The number of PHYSMAP entries must be one less than the number of
  * PHYSSEG entries because the PHYSMAP entry that spans the largest
  * physical address that is accessible by ISA DMA is split into two
  * PHYSSEG entries.
  */
 #define	PHYSMAP_SIZE	(2 * (VM_PHYSSEG_MAX - 1))
 
 vm_paddr_t phys_avail[PHYSMAP_SIZE + 2];
 vm_paddr_t dump_avail[PHYSMAP_SIZE + 2];
 
 /* must be 2 less so 0 0 can signal end of chunks */
 #define	PHYS_AVAIL_ARRAY_END (nitems(phys_avail) - 2)
 #define	DUMP_AVAIL_ARRAY_END (nitems(dump_avail) - 2)
 
 struct kva_md_info kmi;
 
 static struct trapframe proc0_tf;
 struct pcpu __pcpu[MAXCPU];
 
 struct mtx icu_lock;
 
 struct mem_range_softc mem_range_softc;
 
  /* Default init_ops implementation. */
  struct init_ops init_ops = {
 	.early_clock_source_init =	i8254_init,
 	.early_delay =			i8254_delay,
 #ifdef DEV_APIC
 	.msi_init =			msi_init,
 #endif
  };
 
 static void
 cpu_startup(dummy)
 	void *dummy;
 {
 	uintmax_t memsize;
 	char *sysenv;
 
 	/*
 	 * On MacBooks, we need to disallow the legacy USB circuit to
 	 * generate an SMI# because this can cause several problems,
 	 * namely: incorrect CPU frequency detection and failure to
 	 * start the APs.
 	 * We do this by disabling a bit in the SMI_EN (SMI Control and
 	 * Enable register) of the Intel ICH LPC Interface Bridge.
 	 */
 	sysenv = kern_getenv("smbios.system.product");
 	if (sysenv != NULL) {
 		if (strncmp(sysenv, "MacBook1,1", 10) == 0 ||
 		    strncmp(sysenv, "MacBook3,1", 10) == 0 ||
 		    strncmp(sysenv, "MacBook4,1", 10) == 0 ||
 		    strncmp(sysenv, "MacBookPro1,1", 13) == 0 ||
 		    strncmp(sysenv, "MacBookPro1,2", 13) == 0 ||
 		    strncmp(sysenv, "MacBookPro3,1", 13) == 0 ||
 		    strncmp(sysenv, "MacBookPro4,1", 13) == 0 ||
 		    strncmp(sysenv, "Macmini1,1", 10) == 0) {
 			if (bootverbose)
 				printf("Disabling LEGACY_USB_EN bit on "
 				    "Intel ICH.\n");
 			outl(ICH_SMI_EN, inl(ICH_SMI_EN) & ~0x8);
 		}
 		freeenv(sysenv);
 	}
 
 	/*
 	 * Good {morning,afternoon,evening,night}.
 	 */
 	startrtclock();
 	printcpuinfo();
 	panicifcpuunsupported();
 #ifdef PERFMON
 	perfmon_init();
 #endif
 
 	/*
 	 * Display physical memory if SMBIOS reports reasonable amount.
 	 */
 	memsize = 0;
 	sysenv = kern_getenv("smbios.memory.enabled");
 	if (sysenv != NULL) {
 		memsize = (uintmax_t)strtoul(sysenv, (char **)NULL, 10) << 10;
 		freeenv(sysenv);
 	}
 	if (memsize < ptoa((uintmax_t)vm_cnt.v_free_count))
 		memsize = ptoa((uintmax_t)Maxmem);
 	printf("real memory  = %ju (%ju MB)\n", memsize, memsize >> 20);
 	realmem = atop(memsize);
 
 	/*
 	 * Display any holes after the first chunk of extended memory.
 	 */
 	if (bootverbose) {
 		int indx;
 
 		printf("Physical memory chunk(s):\n");
 		for (indx = 0; phys_avail[indx + 1] != 0; indx += 2) {
 			vm_paddr_t size;
 
 			size = phys_avail[indx + 1] - phys_avail[indx];
 			printf(
 			    "0x%016jx - 0x%016jx, %ju bytes (%ju pages)\n",
 			    (uintmax_t)phys_avail[indx],
 			    (uintmax_t)phys_avail[indx + 1] - 1,
 			    (uintmax_t)size, (uintmax_t)size / PAGE_SIZE);
 		}
 	}
 
 	vm_ksubmap_init(&kmi);
 
 	printf("avail memory = %ju (%ju MB)\n",
 	    ptoa((uintmax_t)vm_cnt.v_free_count),
 	    ptoa((uintmax_t)vm_cnt.v_free_count) / 1048576);
 
 	/*
 	 * Set up buffers, so they can be used to read disk labels.
 	 */
 	bufinit();
 	vm_pager_bufferinit();
 	cpu_setregs();
 }
 
 /*
  * Send an interrupt to process.
  *
  * Stack is set up to allow sigcode stored
  * at top to call routine, followed by call
  * to sigreturn routine below.  After sigreturn
  * resets the signal mask, the stack, and the
  * frame pointer, it returns to the user
  * specified pc, psl.
  */
 #ifdef COMPAT_43
 static void
 osendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
 {
 	struct osigframe sf, *fp;
 	struct proc *p;
 	struct thread *td;
 	struct sigacts *psp;
 	struct trapframe *regs;
 	int sig;
 	int oonstack;
 
 	td = curthread;
 	p = td->td_proc;
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 	sig = ksi->ksi_signo;
 	psp = p->p_sigacts;
 	mtx_assert(&psp->ps_mtx, MA_OWNED);
 	regs = td->td_frame;
 	oonstack = sigonstack(regs->tf_esp);
 
 	/* Allocate space for the signal handler context. */
 	if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
 	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
 		fp = (struct osigframe *)((uintptr_t)td->td_sigstk.ss_sp +
 		    td->td_sigstk.ss_size - sizeof(struct osigframe));
 #if defined(COMPAT_43)
 		td->td_sigstk.ss_flags |= SS_ONSTACK;
 #endif
 	} else
 		fp = (struct osigframe *)regs->tf_esp - 1;
 
 	/* Build the argument list for the signal handler. */
 	sf.sf_signum = sig;
 	sf.sf_scp = (register_t)&fp->sf_siginfo.si_sc;
 	bzero(&sf.sf_siginfo, sizeof(sf.sf_siginfo));
 	if (SIGISMEMBER(psp->ps_siginfo, sig)) {
 		/* Signal handler installed with SA_SIGINFO. */
 		sf.sf_arg2 = (register_t)&fp->sf_siginfo;
 		sf.sf_siginfo.si_signo = sig;
 		sf.sf_siginfo.si_code = ksi->ksi_code;
 		sf.sf_ahu.sf_action = (__osiginfohandler_t *)catcher;
 		sf.sf_addr = 0;
 	} else {
 		/* Old FreeBSD-style arguments. */
 		sf.sf_arg2 = ksi->ksi_code;
 		sf.sf_addr = (register_t)ksi->ksi_addr;
 		sf.sf_ahu.sf_handler = catcher;
 	}
 	mtx_unlock(&psp->ps_mtx);
 	PROC_UNLOCK(p);
 
 	/* Save most if not all of trap frame. */
 	sf.sf_siginfo.si_sc.sc_eax = regs->tf_eax;
 	sf.sf_siginfo.si_sc.sc_ebx = regs->tf_ebx;
 	sf.sf_siginfo.si_sc.sc_ecx = regs->tf_ecx;
 	sf.sf_siginfo.si_sc.sc_edx = regs->tf_edx;
 	sf.sf_siginfo.si_sc.sc_esi = regs->tf_esi;
 	sf.sf_siginfo.si_sc.sc_edi = regs->tf_edi;
 	sf.sf_siginfo.si_sc.sc_cs = regs->tf_cs;
 	sf.sf_siginfo.si_sc.sc_ds = regs->tf_ds;
 	sf.sf_siginfo.si_sc.sc_ss = regs->tf_ss;
 	sf.sf_siginfo.si_sc.sc_es = regs->tf_es;
 	sf.sf_siginfo.si_sc.sc_fs = regs->tf_fs;
 	sf.sf_siginfo.si_sc.sc_gs = rgs();
 	sf.sf_siginfo.si_sc.sc_isp = regs->tf_isp;
 
 	/* Build the signal context to be used by osigreturn(). */
 	sf.sf_siginfo.si_sc.sc_onstack = (oonstack) ? 1 : 0;
 	SIG2OSIG(*mask, sf.sf_siginfo.si_sc.sc_mask);
 	sf.sf_siginfo.si_sc.sc_sp = regs->tf_esp;
 	sf.sf_siginfo.si_sc.sc_fp = regs->tf_ebp;
 	sf.sf_siginfo.si_sc.sc_pc = regs->tf_eip;
 	sf.sf_siginfo.si_sc.sc_ps = regs->tf_eflags;
 	sf.sf_siginfo.si_sc.sc_trapno = regs->tf_trapno;
 	sf.sf_siginfo.si_sc.sc_err = regs->tf_err;
 
 	/*
 	 * If we're a vm86 process, we want to save the segment registers.
 	 * We also change eflags to be our emulated eflags, not the actual
 	 * eflags.
 	 */
 	if (regs->tf_eflags & PSL_VM) {
 		/* XXX confusing names: `tf' isn't a trapframe; `regs' is. */
 		struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs;
 		struct vm86_kernel *vm86 = &td->td_pcb->pcb_ext->ext_vm86;
 
 		sf.sf_siginfo.si_sc.sc_gs = tf->tf_vm86_gs;
 		sf.sf_siginfo.si_sc.sc_fs = tf->tf_vm86_fs;
 		sf.sf_siginfo.si_sc.sc_es = tf->tf_vm86_es;
 		sf.sf_siginfo.si_sc.sc_ds = tf->tf_vm86_ds;
 
 		if (vm86->vm86_has_vme == 0)
 			sf.sf_siginfo.si_sc.sc_ps =
 			    (tf->tf_eflags & ~(PSL_VIF | PSL_VIP)) |
 			    (vm86->vm86_eflags & (PSL_VIF | PSL_VIP));
 
 		/* See sendsig() for comments. */
 		tf->tf_eflags &= ~(PSL_VM | PSL_NT | PSL_VIF | PSL_VIP);
 	}
 
 	/*
 	 * Copy the sigframe out to the user's stack.
 	 */
 	if (copyout(&sf, fp, sizeof(*fp)) != 0) {
 #ifdef DEBUG
 		printf("process %ld has trashed its stack\n", (long)p->p_pid);
 #endif
 		PROC_LOCK(p);
 		sigexit(td, SIGILL);
 	}
 
 	regs->tf_esp = (int)fp;
 	if (p->p_sysent->sv_sigcode_base != 0) {
 		regs->tf_eip = p->p_sysent->sv_sigcode_base + szsigcode -
 		    szosigcode;
 	} else {
 		/* a.out sysentvec does not use shared page */
 		regs->tf_eip = p->p_sysent->sv_psstrings - szosigcode;
 	}
 	regs->tf_eflags &= ~(PSL_T | PSL_D);
 	regs->tf_cs = _ucodesel;
 	regs->tf_ds = _udatasel;
 	regs->tf_es = _udatasel;
 	regs->tf_fs = _udatasel;
 	load_gs(_udatasel);
 	regs->tf_ss = _udatasel;
 	PROC_LOCK(p);
 	mtx_lock(&psp->ps_mtx);
 }
 #endif /* COMPAT_43 */
 
 #ifdef COMPAT_FREEBSD4
 static void
 freebsd4_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
 {
 	struct sigframe4 sf, *sfp;
 	struct proc *p;
 	struct thread *td;
 	struct sigacts *psp;
 	struct trapframe *regs;
 	int sig;
 	int oonstack;
 
 	td = curthread;
 	p = td->td_proc;
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 	sig = ksi->ksi_signo;
 	psp = p->p_sigacts;
 	mtx_assert(&psp->ps_mtx, MA_OWNED);
 	regs = td->td_frame;
 	oonstack = sigonstack(regs->tf_esp);
 
 	/* Save user context. */
 	bzero(&sf, sizeof(sf));
 	sf.sf_uc.uc_sigmask = *mask;
 	sf.sf_uc.uc_stack = td->td_sigstk;
 	sf.sf_uc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK)
 	    ? ((oonstack) ? SS_ONSTACK : 0) : SS_DISABLE;
 	sf.sf_uc.uc_mcontext.mc_onstack = (oonstack) ? 1 : 0;
 	sf.sf_uc.uc_mcontext.mc_gs = rgs();
 	bcopy(regs, &sf.sf_uc.uc_mcontext.mc_fs, sizeof(*regs));
 	bzero(sf.sf_uc.uc_mcontext.mc_fpregs,
 	    sizeof(sf.sf_uc.uc_mcontext.mc_fpregs));
 	bzero(sf.sf_uc.uc_mcontext.__spare__,
 	    sizeof(sf.sf_uc.uc_mcontext.__spare__));
 	bzero(sf.sf_uc.__spare__, sizeof(sf.sf_uc.__spare__));
 
 	/* Allocate space for the signal handler context. */
 	if ((td->td_pflags & TDP_ALTSTACK) != 0 && !oonstack &&
 	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
 		sfp = (struct sigframe4 *)((uintptr_t)td->td_sigstk.ss_sp +
 		    td->td_sigstk.ss_size - sizeof(struct sigframe4));
 #if defined(COMPAT_43)
 		td->td_sigstk.ss_flags |= SS_ONSTACK;
 #endif
 	} else
 		sfp = (struct sigframe4 *)regs->tf_esp - 1;
 
 	/* Build the argument list for the signal handler. */
 	sf.sf_signum = sig;
 	sf.sf_ucontext = (register_t)&sfp->sf_uc;
 	bzero(&sf.sf_si, sizeof(sf.sf_si));
 	if (SIGISMEMBER(psp->ps_siginfo, sig)) {
 		/* Signal handler installed with SA_SIGINFO. */
 		sf.sf_siginfo = (register_t)&sfp->sf_si;
 		sf.sf_ahu.sf_action = (__siginfohandler_t *)catcher;
 
 		/* Fill in POSIX parts */
 		sf.sf_si.si_signo = sig;
 		sf.sf_si.si_code = ksi->ksi_code;
 		sf.sf_si.si_addr = ksi->ksi_addr;
 	} else {
 		/* Old FreeBSD-style arguments. */
 		sf.sf_siginfo = ksi->ksi_code;
 		sf.sf_addr = (register_t)ksi->ksi_addr;
 		sf.sf_ahu.sf_handler = catcher;
 	}
 	mtx_unlock(&psp->ps_mtx);
 	PROC_UNLOCK(p);
 
 	/*
 	 * If we're a vm86 process, we want to save the segment registers.
 	 * We also change eflags to be our emulated eflags, not the actual
 	 * eflags.
 	 */
 	if (regs->tf_eflags & PSL_VM) {
 		struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs;
 		struct vm86_kernel *vm86 = &td->td_pcb->pcb_ext->ext_vm86;
 
 		sf.sf_uc.uc_mcontext.mc_gs = tf->tf_vm86_gs;
 		sf.sf_uc.uc_mcontext.mc_fs = tf->tf_vm86_fs;
 		sf.sf_uc.uc_mcontext.mc_es = tf->tf_vm86_es;
 		sf.sf_uc.uc_mcontext.mc_ds = tf->tf_vm86_ds;
 
 		if (vm86->vm86_has_vme == 0)
 			sf.sf_uc.uc_mcontext.mc_eflags =
 			    (tf->tf_eflags & ~(PSL_VIF | PSL_VIP)) |
 			    (vm86->vm86_eflags & (PSL_VIF | PSL_VIP));
 
 		/*
 		 * Clear PSL_NT to inhibit T_TSSFLT faults on return from
 		 * syscalls made by the signal handler.  This just avoids
 		 * wasting time for our lazy fixup of such faults.  PSL_NT
 		 * does nothing in vm86 mode, but vm86 programs can set it
 		 * almost legitimately in probes for old cpu types.
 		 */
 		tf->tf_eflags &= ~(PSL_VM | PSL_NT | PSL_VIF | PSL_VIP);
 	}
 
 	/*
 	 * Copy the sigframe out to the user's stack.
 	 */
 	if (copyout(&sf, sfp, sizeof(*sfp)) != 0) {
 #ifdef DEBUG
 		printf("process %ld has trashed its stack\n", (long)p->p_pid);
 #endif
 		PROC_LOCK(p);
 		sigexit(td, SIGILL);
 	}
 
 	regs->tf_esp = (int)sfp;
 	regs->tf_eip = p->p_sysent->sv_sigcode_base + szsigcode -
 	    szfreebsd4_sigcode;
 	regs->tf_eflags &= ~(PSL_T | PSL_D);
 	regs->tf_cs = _ucodesel;
 	regs->tf_ds = _udatasel;
 	regs->tf_es = _udatasel;
 	regs->tf_fs = _udatasel;
 	regs->tf_ss = _udatasel;
 	PROC_LOCK(p);
 	mtx_lock(&psp->ps_mtx);
 }
 #endif	/* COMPAT_FREEBSD4 */
 
 void
 sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
 {
 	struct sigframe sf, *sfp;
 	struct proc *p;
 	struct thread *td;
 	struct sigacts *psp;
 	char *sp;
 	struct trapframe *regs;
 	struct segment_descriptor *sdp;
 	char *xfpusave;
 	size_t xfpusave_len;
 	int sig;
 	int oonstack;
 
 	td = curthread;
 	p = td->td_proc;
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 	sig = ksi->ksi_signo;
 	psp = p->p_sigacts;
 	mtx_assert(&psp->ps_mtx, MA_OWNED);
 #ifdef COMPAT_FREEBSD4
 	if (SIGISMEMBER(psp->ps_freebsd4, sig)) {
 		freebsd4_sendsig(catcher, ksi, mask);
 		return;
 	}
 #endif
 #ifdef COMPAT_43
 	if (SIGISMEMBER(psp->ps_osigset, sig)) {
 		osendsig(catcher, ksi, mask);
 		return;
 	}
 #endif
 	regs = td->td_frame;
 	oonstack = sigonstack(regs->tf_esp);
 
 	if (cpu_max_ext_state_size > sizeof(union savefpu) && use_xsave) {
 		xfpusave_len = cpu_max_ext_state_size - sizeof(union savefpu);
 		xfpusave = __builtin_alloca(xfpusave_len);
 	} else {
 		xfpusave_len = 0;
 		xfpusave = NULL;
 	}
 
 	/* Save user context. */
 	bzero(&sf, sizeof(sf));
 	sf.sf_uc.uc_sigmask = *mask;
 	sf.sf_uc.uc_stack = td->td_sigstk;
 	sf.sf_uc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK)
 	    ? ((oonstack) ? SS_ONSTACK : 0) : SS_DISABLE;
 	sf.sf_uc.uc_mcontext.mc_onstack = (oonstack) ? 1 : 0;
 	sf.sf_uc.uc_mcontext.mc_gs = rgs();
 	bcopy(regs, &sf.sf_uc.uc_mcontext.mc_fs, sizeof(*regs));
 	sf.sf_uc.uc_mcontext.mc_len = sizeof(sf.sf_uc.uc_mcontext); /* magic */
 	get_fpcontext(td, &sf.sf_uc.uc_mcontext, xfpusave, xfpusave_len);
 	fpstate_drop(td);
 	/*
 	 * Unconditionally fill the fsbase and gsbase into the mcontext.
 	 */
 	sdp = &td->td_pcb->pcb_fsd;
 	sf.sf_uc.uc_mcontext.mc_fsbase = sdp->sd_hibase << 24 |
 	    sdp->sd_lobase;
 	sdp = &td->td_pcb->pcb_gsd;
 	sf.sf_uc.uc_mcontext.mc_gsbase = sdp->sd_hibase << 24 |
 	    sdp->sd_lobase;
 	bzero(sf.sf_uc.uc_mcontext.mc_spare2,
 	    sizeof(sf.sf_uc.uc_mcontext.mc_spare2));
 	bzero(sf.sf_uc.__spare__, sizeof(sf.sf_uc.__spare__));
 
 	/* Allocate space for the signal handler context. */
 	if ((td->td_pflags & TDP_ALTSTACK) != 0 && !oonstack &&
 	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
 		sp = (char *)td->td_sigstk.ss_sp + td->td_sigstk.ss_size;
 #if defined(COMPAT_43)
 		td->td_sigstk.ss_flags |= SS_ONSTACK;
 #endif
 	} else
 		sp = (char *)regs->tf_esp - 128;
 	if (xfpusave != NULL) {
 		sp -= xfpusave_len;
 		sp = (char *)((unsigned int)sp & ~0x3F);
 		sf.sf_uc.uc_mcontext.mc_xfpustate = (register_t)sp;
 	}
 	sp -= sizeof(struct sigframe);
 
 	/* Align to 16 bytes. */
 	sfp = (struct sigframe *)((unsigned int)sp & ~0xF);
 
 	/* Build the argument list for the signal handler. */
 	sf.sf_signum = sig;
 	sf.sf_ucontext = (register_t)&sfp->sf_uc;
 	bzero(&sf.sf_si, sizeof(sf.sf_si));
 	if (SIGISMEMBER(psp->ps_siginfo, sig)) {
 		/* Signal handler installed with SA_SIGINFO. */
 		sf.sf_siginfo = (register_t)&sfp->sf_si;
 		sf.sf_ahu.sf_action = (__siginfohandler_t *)catcher;
 
 		/* Fill in POSIX parts */
 		sf.sf_si = ksi->ksi_info;
 		sf.sf_si.si_signo = sig; /* maybe a translated signal */
 	} else {
 		/* Old FreeBSD-style arguments. */
 		sf.sf_siginfo = ksi->ksi_code;
 		sf.sf_addr = (register_t)ksi->ksi_addr;
 		sf.sf_ahu.sf_handler = catcher;
 	}
 	mtx_unlock(&psp->ps_mtx);
 	PROC_UNLOCK(p);
 
 	/*
 	 * If we're a vm86 process, we want to save the segment registers.
 	 * We also change eflags to be our emulated eflags, not the actual
 	 * eflags.
 	 */
 	if (regs->tf_eflags & PSL_VM) {
 		struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs;
 		struct vm86_kernel *vm86 = &td->td_pcb->pcb_ext->ext_vm86;
 
 		sf.sf_uc.uc_mcontext.mc_gs = tf->tf_vm86_gs;
 		sf.sf_uc.uc_mcontext.mc_fs = tf->tf_vm86_fs;
 		sf.sf_uc.uc_mcontext.mc_es = tf->tf_vm86_es;
 		sf.sf_uc.uc_mcontext.mc_ds = tf->tf_vm86_ds;
 
 		if (vm86->vm86_has_vme == 0)
 			sf.sf_uc.uc_mcontext.mc_eflags =
 			    (tf->tf_eflags & ~(PSL_VIF | PSL_VIP)) |
 			    (vm86->vm86_eflags & (PSL_VIF | PSL_VIP));
 
 		/*
 		 * Clear PSL_NT to inhibit T_TSSFLT faults on return from
 		 * syscalls made by the signal handler.  This just avoids
 		 * wasting time for our lazy fixup of such faults.  PSL_NT
 		 * does nothing in vm86 mode, but vm86 programs can set it
 		 * almost legitimately in probes for old cpu types.
 		 */
 		tf->tf_eflags &= ~(PSL_VM | PSL_NT | PSL_VIF | PSL_VIP);
 	}
 
 	/*
 	 * Copy the sigframe out to the user's stack.
 	 */
 	if (copyout(&sf, sfp, sizeof(*sfp)) != 0 ||
 	    (xfpusave != NULL && copyout(xfpusave,
 	    (void *)sf.sf_uc.uc_mcontext.mc_xfpustate, xfpusave_len)
 	    != 0)) {
 #ifdef DEBUG
 		printf("process %ld has trashed its stack\n", (long)p->p_pid);
 #endif
 		PROC_LOCK(p);
 		sigexit(td, SIGILL);
 	}
 
 	regs->tf_esp = (int)sfp;
 	regs->tf_eip = p->p_sysent->sv_sigcode_base;
 	if (regs->tf_eip == 0)
 		regs->tf_eip = p->p_sysent->sv_psstrings - szsigcode;
 	regs->tf_eflags &= ~(PSL_T | PSL_D);
 	regs->tf_cs = _ucodesel;
 	regs->tf_ds = _udatasel;
 	regs->tf_es = _udatasel;
 	regs->tf_fs = _udatasel;
 	regs->tf_ss = _udatasel;
 	PROC_LOCK(p);
 	mtx_lock(&psp->ps_mtx);
 }
 
 /*
  * System call to cleanup state after a signal
  * has been taken.  Reset signal mask and
  * stack state from context left by sendsig (above).
  * Return to previous pc and psl as specified by
  * context left by sendsig. Check carefully to
  * make sure that the user has not modified the
  * state to gain improper privileges.
  *
  * MPSAFE
  */
 #ifdef COMPAT_43
 int
 osigreturn(td, uap)
 	struct thread *td;
 	struct osigreturn_args /* {
 		struct osigcontext *sigcntxp;
 	} */ *uap;
 {
 	struct osigcontext sc;
 	struct trapframe *regs;
 	struct osigcontext *scp;
 	int eflags, error;
 	ksiginfo_t ksi;
 
 	regs = td->td_frame;
 	error = copyin(uap->sigcntxp, &sc, sizeof(sc));
 	if (error != 0)
 		return (error);
 	scp = &sc;
 	eflags = scp->sc_ps;
 	if (eflags & PSL_VM) {
 		struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs;
 		struct vm86_kernel *vm86;
 
 		/*
 		 * if pcb_ext == 0 or vm86_inited == 0, the user hasn't
 		 * set up the vm86 area, and we can't enter vm86 mode.
 		 */
 		if (td->td_pcb->pcb_ext == 0)
 			return (EINVAL);
 		vm86 = &td->td_pcb->pcb_ext->ext_vm86;
 		if (vm86->vm86_inited == 0)
 			return (EINVAL);
 
 		/* Go back to user mode if both flags are set. */
 		if ((eflags & PSL_VIP) && (eflags & PSL_VIF)) {
 			ksiginfo_init_trap(&ksi);
 			ksi.ksi_signo = SIGBUS;
 			ksi.ksi_code = BUS_OBJERR;
 			ksi.ksi_addr = (void *)regs->tf_eip;
 			trapsignal(td, &ksi);
 		}
 
 		if (vm86->vm86_has_vme) {
 			eflags = (tf->tf_eflags & ~VME_USERCHANGE) |
 			    (eflags & VME_USERCHANGE) | PSL_VM;
 		} else {
 			vm86->vm86_eflags = eflags;	/* save VIF, VIP */
 			eflags = (tf->tf_eflags & ~VM_USERCHANGE) |
 			    (eflags & VM_USERCHANGE) | PSL_VM;
 		}
 		tf->tf_vm86_ds = scp->sc_ds;
 		tf->tf_vm86_es = scp->sc_es;
 		tf->tf_vm86_fs = scp->sc_fs;
 		tf->tf_vm86_gs = scp->sc_gs;
 		tf->tf_ds = _udatasel;
 		tf->tf_es = _udatasel;
 		tf->tf_fs = _udatasel;
 	} else {
 		/*
 		 * Don't allow users to change privileged or reserved flags.
 		 */
 		if (!EFL_SECURE(eflags, regs->tf_eflags)) {
 	    		return (EINVAL);
 		}
 
 		/*
 		 * Don't allow users to load a valid privileged %cs.  Let the
 		 * hardware check for invalid selectors, excess privilege in
 		 * other selectors, invalid %eip's and invalid %esp's.
 		 */
 		if (!CS_SECURE(scp->sc_cs)) {
 			ksiginfo_init_trap(&ksi);
 			ksi.ksi_signo = SIGBUS;
 			ksi.ksi_code = BUS_OBJERR;
 			ksi.ksi_trapno = T_PROTFLT;
 			ksi.ksi_addr = (void *)regs->tf_eip;
 			trapsignal(td, &ksi);
 			return (EINVAL);
 		}
 		regs->tf_ds = scp->sc_ds;
 		regs->tf_es = scp->sc_es;
 		regs->tf_fs = scp->sc_fs;
 	}
 
 	/* Restore remaining registers. */
 	regs->tf_eax = scp->sc_eax;
 	regs->tf_ebx = scp->sc_ebx;
 	regs->tf_ecx = scp->sc_ecx;
 	regs->tf_edx = scp->sc_edx;
 	regs->tf_esi = scp->sc_esi;
 	regs->tf_edi = scp->sc_edi;
 	regs->tf_cs = scp->sc_cs;
 	regs->tf_ss = scp->sc_ss;
 	regs->tf_isp = scp->sc_isp;
 	regs->tf_ebp = scp->sc_fp;
 	regs->tf_esp = scp->sc_sp;
 	regs->tf_eip = scp->sc_pc;
 	regs->tf_eflags = eflags;
 
 #if defined(COMPAT_43)
 	if (scp->sc_onstack & 1)
 		td->td_sigstk.ss_flags |= SS_ONSTACK;
 	else
 		td->td_sigstk.ss_flags &= ~SS_ONSTACK;
 #endif
 	kern_sigprocmask(td, SIG_SETMASK, (sigset_t *)&scp->sc_mask, NULL,
 	    SIGPROCMASK_OLD);
 	return (EJUSTRETURN);
 }
 #endif /* COMPAT_43 */
 
 #ifdef COMPAT_FREEBSD4
 /*
  * MPSAFE
  */
 int
 freebsd4_sigreturn(td, uap)
 	struct thread *td;
 	struct freebsd4_sigreturn_args /* {
 		const ucontext4 *sigcntxp;
 	} */ *uap;
 {
 	struct ucontext4 uc;
 	struct trapframe *regs;
 	struct ucontext4 *ucp;
 	int cs, eflags, error;
 	ksiginfo_t ksi;
 
 	error = copyin(uap->sigcntxp, &uc, sizeof(uc));
 	if (error != 0)
 		return (error);
 	ucp = &uc;
 	regs = td->td_frame;
 	eflags = ucp->uc_mcontext.mc_eflags;
 	if (eflags & PSL_VM) {
 		struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs;
 		struct vm86_kernel *vm86;
 
 		/*
 		 * if pcb_ext == 0 or vm86_inited == 0, the user hasn't
 		 * set up the vm86 area, and we can't enter vm86 mode.
 		 */
 		if (td->td_pcb->pcb_ext == 0)
 			return (EINVAL);
 		vm86 = &td->td_pcb->pcb_ext->ext_vm86;
 		if (vm86->vm86_inited == 0)
 			return (EINVAL);
 
 		/* Go back to user mode if both flags are set. */
 		if ((eflags & PSL_VIP) && (eflags & PSL_VIF)) {
 			ksiginfo_init_trap(&ksi);
 			ksi.ksi_signo = SIGBUS;
 			ksi.ksi_code = BUS_OBJERR;
 			ksi.ksi_addr = (void *)regs->tf_eip;
 			trapsignal(td, &ksi);
 		}
 		if (vm86->vm86_has_vme) {
 			eflags = (tf->tf_eflags & ~VME_USERCHANGE) |
 			    (eflags & VME_USERCHANGE) | PSL_VM;
 		} else {
 			vm86->vm86_eflags = eflags;	/* save VIF, VIP */
 			eflags = (tf->tf_eflags & ~VM_USERCHANGE) |
 			    (eflags & VM_USERCHANGE) | PSL_VM;
 		}
 		bcopy(&ucp->uc_mcontext.mc_fs, tf, sizeof(struct trapframe));
 		tf->tf_eflags = eflags;
 		tf->tf_vm86_ds = tf->tf_ds;
 		tf->tf_vm86_es = tf->tf_es;
 		tf->tf_vm86_fs = tf->tf_fs;
 		tf->tf_vm86_gs = ucp->uc_mcontext.mc_gs;
 		tf->tf_ds = _udatasel;
 		tf->tf_es = _udatasel;
 		tf->tf_fs = _udatasel;
 	} else {
 		/*
 		 * Don't allow users to change privileged or reserved flags.
 		 */
 		if (!EFL_SECURE(eflags, regs->tf_eflags)) {
 			uprintf("pid %d (%s): freebsd4_sigreturn eflags = 0x%x\n",
 			    td->td_proc->p_pid, td->td_name, eflags);
 	    		return (EINVAL);
 		}
 
 		/*
 		 * Don't allow users to load a valid privileged %cs.  Let the
 		 * hardware check for invalid selectors, excess privilege in
 		 * other selectors, invalid %eip's and invalid %esp's.
 		 */
 		cs = ucp->uc_mcontext.mc_cs;
 		if (!CS_SECURE(cs)) {
 			uprintf("pid %d (%s): freebsd4_sigreturn cs = 0x%x\n",
 			    td->td_proc->p_pid, td->td_name, cs);
 			ksiginfo_init_trap(&ksi);
 			ksi.ksi_signo = SIGBUS;
 			ksi.ksi_code = BUS_OBJERR;
 			ksi.ksi_trapno = T_PROTFLT;
 			ksi.ksi_addr = (void *)regs->tf_eip;
 			trapsignal(td, &ksi);
 			return (EINVAL);
 		}
 
 		bcopy(&ucp->uc_mcontext.mc_fs, regs, sizeof(*regs));
 	}
 
 #if defined(COMPAT_43)
 	if (ucp->uc_mcontext.mc_onstack & 1)
 		td->td_sigstk.ss_flags |= SS_ONSTACK;
 	else
 		td->td_sigstk.ss_flags &= ~SS_ONSTACK;
 #endif
 	kern_sigprocmask(td, SIG_SETMASK, &ucp->uc_sigmask, NULL, 0);
 	return (EJUSTRETURN);
 }
 #endif	/* COMPAT_FREEBSD4 */
 
 /*
  * MPSAFE
  */
 int
 sys_sigreturn(td, uap)
 	struct thread *td;
 	struct sigreturn_args /* {
 		const struct __ucontext *sigcntxp;
 	} */ *uap;
 {
 	ucontext_t uc;
 	struct proc *p;
 	struct trapframe *regs;
 	ucontext_t *ucp;
 	char *xfpustate;
 	size_t xfpustate_len;
 	int cs, eflags, error, ret;
 	ksiginfo_t ksi;
 
 	p = td->td_proc;
 
 	error = copyin(uap->sigcntxp, &uc, sizeof(uc));
 	if (error != 0)
 		return (error);
 	ucp = &uc;
 	if ((ucp->uc_mcontext.mc_flags & ~_MC_FLAG_MASK) != 0) {
 		uprintf("pid %d (%s): sigreturn mc_flags %x\n", p->p_pid,
 		    td->td_name, ucp->uc_mcontext.mc_flags);
 		return (EINVAL);
 	}
 	regs = td->td_frame;
 	eflags = ucp->uc_mcontext.mc_eflags;
 	if (eflags & PSL_VM) {
 		struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs;
 		struct vm86_kernel *vm86;
 
 		/*
 		 * if pcb_ext == 0 or vm86_inited == 0, the user hasn't
 		 * set up the vm86 area, and we can't enter vm86 mode.
 		 */
 		if (td->td_pcb->pcb_ext == 0)
 			return (EINVAL);
 		vm86 = &td->td_pcb->pcb_ext->ext_vm86;
 		if (vm86->vm86_inited == 0)
 			return (EINVAL);
 
 		/* Go back to user mode if both flags are set. */
 		if ((eflags & PSL_VIP) && (eflags & PSL_VIF)) {
 			ksiginfo_init_trap(&ksi);
 			ksi.ksi_signo = SIGBUS;
 			ksi.ksi_code = BUS_OBJERR;
 			ksi.ksi_addr = (void *)regs->tf_eip;
 			trapsignal(td, &ksi);
 		}
 
 		if (vm86->vm86_has_vme) {
 			eflags = (tf->tf_eflags & ~VME_USERCHANGE) |
 			    (eflags & VME_USERCHANGE) | PSL_VM;
 		} else {
 			vm86->vm86_eflags = eflags;	/* save VIF, VIP */
 			eflags = (tf->tf_eflags & ~VM_USERCHANGE) |
 			    (eflags & VM_USERCHANGE) | PSL_VM;
 		}
 		bcopy(&ucp->uc_mcontext.mc_fs, tf, sizeof(struct trapframe));
 		tf->tf_eflags = eflags;
 		tf->tf_vm86_ds = tf->tf_ds;
 		tf->tf_vm86_es = tf->tf_es;
 		tf->tf_vm86_fs = tf->tf_fs;
 		tf->tf_vm86_gs = ucp->uc_mcontext.mc_gs;
 		tf->tf_ds = _udatasel;
 		tf->tf_es = _udatasel;
 		tf->tf_fs = _udatasel;
 	} else {
 		/*
 		 * Don't allow users to change privileged or reserved flags.
 		 */
 		if (!EFL_SECURE(eflags, regs->tf_eflags)) {
 			uprintf("pid %d (%s): sigreturn eflags = 0x%x\n",
 			    td->td_proc->p_pid, td->td_name, eflags);
 	    		return (EINVAL);
 		}
 
 		/*
 		 * Don't allow users to load a valid privileged %cs.  Let the
 		 * hardware check for invalid selectors, excess privilege in
 		 * other selectors, invalid %eip's and invalid %esp's.
 		 */
 		cs = ucp->uc_mcontext.mc_cs;
 		if (!CS_SECURE(cs)) {
 			uprintf("pid %d (%s): sigreturn cs = 0x%x\n",
 			    td->td_proc->p_pid, td->td_name, cs);
 			ksiginfo_init_trap(&ksi);
 			ksi.ksi_signo = SIGBUS;
 			ksi.ksi_code = BUS_OBJERR;
 			ksi.ksi_trapno = T_PROTFLT;
 			ksi.ksi_addr = (void *)regs->tf_eip;
 			trapsignal(td, &ksi);
 			return (EINVAL);
 		}
 
 		if ((uc.uc_mcontext.mc_flags & _MC_HASFPXSTATE) != 0) {
 			xfpustate_len = uc.uc_mcontext.mc_xfpustate_len;
 			if (xfpustate_len > cpu_max_ext_state_size -
 			    sizeof(union savefpu)) {
 				uprintf(
 			    "pid %d (%s): sigreturn xfpusave_len = 0x%zx\n",
 				    p->p_pid, td->td_name, xfpustate_len);
 				return (EINVAL);
 			}
 			xfpustate = __builtin_alloca(xfpustate_len);
 			error = copyin((const void *)uc.uc_mcontext.mc_xfpustate,
 			    xfpustate, xfpustate_len);
 			if (error != 0) {
 				uprintf(
 	"pid %d (%s): sigreturn copying xfpustate failed\n",
 				    p->p_pid, td->td_name);
 				return (error);
 			}
 		} else {
 			xfpustate = NULL;
 			xfpustate_len = 0;
 		}
 		ret = set_fpcontext(td, &ucp->uc_mcontext, xfpustate,
 		    xfpustate_len);
 		if (ret != 0)
 			return (ret);
 		bcopy(&ucp->uc_mcontext.mc_fs, regs, sizeof(*regs));
 	}
 
 #if defined(COMPAT_43)
 	if (ucp->uc_mcontext.mc_onstack & 1)
 		td->td_sigstk.ss_flags |= SS_ONSTACK;
 	else
 		td->td_sigstk.ss_flags &= ~SS_ONSTACK;
 #endif
 
 	kern_sigprocmask(td, SIG_SETMASK, &ucp->uc_sigmask, NULL, 0);
 	return (EJUSTRETURN);
 }
 
 /*
  * Reset registers to default values on exec.
  */
 void
 exec_setregs(struct thread *td, struct image_params *imgp, u_long stack)
 {
 	struct trapframe *regs = td->td_frame;
 	struct pcb *pcb = td->td_pcb;
 
 	/* Reset pc->pcb_gs and %gs before possibly invalidating it. */
 	pcb->pcb_gs = _udatasel;
 	load_gs(_udatasel);
 
 	mtx_lock_spin(&dt_lock);
 	if (td->td_proc->p_md.md_ldt)
 		user_ldt_free(td);
 	else
 		mtx_unlock_spin(&dt_lock);
   
 	bzero((char *)regs, sizeof(struct trapframe));
 	regs->tf_eip = imgp->entry_addr;
 	regs->tf_esp = stack;
 	regs->tf_eflags = PSL_USER | (regs->tf_eflags & PSL_T);
 	regs->tf_ss = _udatasel;
 	regs->tf_ds = _udatasel;
 	regs->tf_es = _udatasel;
 	regs->tf_fs = _udatasel;
 	regs->tf_cs = _ucodesel;
 
 	/* PS_STRINGS value for BSD/OS binaries.  It is 0 for non-BSD/OS. */
 	regs->tf_ebx = imgp->ps_strings;
 
         /*
          * Reset the hardware debug registers if they were in use.
          * They won't have any meaning for the newly exec'd process.  
          */
         if (pcb->pcb_flags & PCB_DBREGS) {
                 pcb->pcb_dr0 = 0;
                 pcb->pcb_dr1 = 0;
                 pcb->pcb_dr2 = 0;
                 pcb->pcb_dr3 = 0;
                 pcb->pcb_dr6 = 0;
                 pcb->pcb_dr7 = 0;
                 if (pcb == curpcb) {
 		        /*
 			 * Clear the debug registers on the running
 			 * CPU, otherwise they will end up affecting
 			 * the next process we switch to.
 			 */
 		        reset_dbregs();
                 }
 		pcb->pcb_flags &= ~PCB_DBREGS;
         }
 
 	pcb->pcb_initial_npxcw = __INITIAL_NPXCW__;
 
 	/*
 	 * Drop the FP state if we hold it, so that the process gets a
 	 * clean FP state if it uses the FPU again.
 	 */
 	fpstate_drop(td);
 
 	/*
 	 * XXX - Linux emulator
 	 * Make sure sure edx is 0x0 on entry. Linux binaries depend
 	 * on it.
 	 */
 	td->td_retval[1] = 0;
 }
 
 void
 cpu_setregs(void)
 {
 	unsigned int cr0;
 
 	cr0 = rcr0();
 
 	/*
 	 * CR0_MP, CR0_NE and CR0_TS are set for NPX (FPU) support:
 	 *
 	 * Prepare to trap all ESC (i.e., NPX) instructions and all WAIT
 	 * instructions.  We must set the CR0_MP bit and use the CR0_TS
 	 * bit to control the trap, because setting the CR0_EM bit does
 	 * not cause WAIT instructions to trap.  It's important to trap
 	 * WAIT instructions - otherwise the "wait" variants of no-wait
 	 * control instructions would degenerate to the "no-wait" variants
 	 * after FP context switches but work correctly otherwise.  It's
 	 * particularly important to trap WAITs when there is no NPX -
 	 * otherwise the "wait" variants would always degenerate.
 	 *
 	 * Try setting CR0_NE to get correct error reporting on 486DX's.
 	 * Setting it should fail or do nothing on lesser processors.
 	 */
 	cr0 |= CR0_MP | CR0_NE | CR0_TS | CR0_WP | CR0_AM;
 	load_cr0(cr0);
 	load_gs(_udatasel);
 }
 
 u_long bootdev;		/* not a struct cdev *- encoding is different */
 SYSCTL_ULONG(_machdep, OID_AUTO, guessed_bootdev,
 	CTLFLAG_RD, &bootdev, 0, "Maybe the Boot device (not in struct cdev *format)");
 
 static char bootmethod[16] = "BIOS";
 SYSCTL_STRING(_machdep, OID_AUTO, bootmethod, CTLFLAG_RD, bootmethod, 0,
     "System firmware boot method");
 
 /*
  * Initialize 386 and configure to run kernel
  */
 
 /*
  * Initialize segments & interrupt table
  */
 
 int _default_ldt;
 
 union descriptor gdt[NGDT * MAXCPU];	/* global descriptor table */
 union descriptor ldt[NLDT];		/* local descriptor table */
 static struct gate_descriptor idt0[NIDT];
 struct gate_descriptor *idt = &idt0[0];	/* interrupt descriptor table */
 struct region_descriptor r_gdt, r_idt;	/* table descriptors */
 struct mtx dt_lock;			/* lock for GDT and LDT */
 
 static struct i386tss dblfault_tss;
 static char dblfault_stack[PAGE_SIZE];
 
 extern  vm_offset_t	proc0kstack;
 
 
 /*
  * software prototypes -- in more palatable form.
  *
  * GCODE_SEL through GUDATA_SEL must be in this order for syscall/sysret
  * GUFS_SEL and GUGS_SEL must be in this order (swtch.s knows it)
  */
 struct soft_segment_descriptor gdt_segs[] = {
 /* GNULL_SEL	0 Null Descriptor */
 {	.ssd_base = 0x0,
 	.ssd_limit = 0x0,
 	.ssd_type = 0,
 	.ssd_dpl = SEL_KPL,
 	.ssd_p = 0,
 	.ssd_xx = 0, .ssd_xx1 = 0,
 	.ssd_def32 = 0,
 	.ssd_gran = 0		},
 /* GPRIV_SEL	1 SMP Per-Processor Private Data Descriptor */
 {	.ssd_base = 0x0,
 	.ssd_limit = 0xfffff,
 	.ssd_type = SDT_MEMRWA,
 	.ssd_dpl = SEL_KPL,
 	.ssd_p = 1,
 	.ssd_xx = 0, .ssd_xx1 = 0,
 	.ssd_def32 = 1,
 	.ssd_gran = 1		},
 /* GUFS_SEL	2 %fs Descriptor for user */
 {	.ssd_base = 0x0,
 	.ssd_limit = 0xfffff,
 	.ssd_type = SDT_MEMRWA,
 	.ssd_dpl = SEL_UPL,
 	.ssd_p = 1,
 	.ssd_xx = 0, .ssd_xx1 = 0,
 	.ssd_def32 = 1,
 	.ssd_gran = 1		},
 /* GUGS_SEL	3 %gs Descriptor for user */
 {	.ssd_base = 0x0,
 	.ssd_limit = 0xfffff,
 	.ssd_type = SDT_MEMRWA,
 	.ssd_dpl = SEL_UPL,
 	.ssd_p = 1,
 	.ssd_xx = 0, .ssd_xx1 = 0,
 	.ssd_def32 = 1,
 	.ssd_gran = 1		},
 /* GCODE_SEL	4 Code Descriptor for kernel */
 {	.ssd_base = 0x0,
 	.ssd_limit = 0xfffff,
 	.ssd_type = SDT_MEMERA,
 	.ssd_dpl = SEL_KPL,
 	.ssd_p = 1,
 	.ssd_xx = 0, .ssd_xx1 = 0,
 	.ssd_def32 = 1,
 	.ssd_gran = 1		},
 /* GDATA_SEL	5 Data Descriptor for kernel */
 {	.ssd_base = 0x0,
 	.ssd_limit = 0xfffff,
 	.ssd_type = SDT_MEMRWA,
 	.ssd_dpl = SEL_KPL,
 	.ssd_p = 1,
 	.ssd_xx = 0, .ssd_xx1 = 0,
 	.ssd_def32 = 1,
 	.ssd_gran = 1		},
 /* GUCODE_SEL	6 Code Descriptor for user */
 {	.ssd_base = 0x0,
 	.ssd_limit = 0xfffff,
 	.ssd_type = SDT_MEMERA,
 	.ssd_dpl = SEL_UPL,
 	.ssd_p = 1,
 	.ssd_xx = 0, .ssd_xx1 = 0,
 	.ssd_def32 = 1,
 	.ssd_gran = 1		},
 /* GUDATA_SEL	7 Data Descriptor for user */
 {	.ssd_base = 0x0,
 	.ssd_limit = 0xfffff,
 	.ssd_type = SDT_MEMRWA,
 	.ssd_dpl = SEL_UPL,
 	.ssd_p = 1,
 	.ssd_xx = 0, .ssd_xx1 = 0,
 	.ssd_def32 = 1,
 	.ssd_gran = 1		},
 /* GBIOSLOWMEM_SEL 8 BIOS access to realmode segment 0x40, must be #8 in GDT */
 {	.ssd_base = 0x400,
 	.ssd_limit = 0xfffff,
 	.ssd_type = SDT_MEMRWA,
 	.ssd_dpl = SEL_KPL,
 	.ssd_p = 1,
 	.ssd_xx = 0, .ssd_xx1 = 0,
 	.ssd_def32 = 1,
 	.ssd_gran = 1		},
 /* GPROC0_SEL	9 Proc 0 Tss Descriptor */
 {
 	.ssd_base = 0x0,
 	.ssd_limit = sizeof(struct i386tss)-1,
 	.ssd_type = SDT_SYS386TSS,
 	.ssd_dpl = 0,
 	.ssd_p = 1,
 	.ssd_xx = 0, .ssd_xx1 = 0,
 	.ssd_def32 = 0,
 	.ssd_gran = 0		},
 /* GLDT_SEL	10 LDT Descriptor */
 {	.ssd_base = (int) ldt,
 	.ssd_limit = sizeof(ldt)-1,
 	.ssd_type = SDT_SYSLDT,
 	.ssd_dpl = SEL_UPL,
 	.ssd_p = 1,
 	.ssd_xx = 0, .ssd_xx1 = 0,
 	.ssd_def32 = 0,
 	.ssd_gran = 0		},
 /* GUSERLDT_SEL	11 User LDT Descriptor per process */
 {	.ssd_base = (int) ldt,
 	.ssd_limit = (512 * sizeof(union descriptor)-1),
 	.ssd_type = SDT_SYSLDT,
 	.ssd_dpl = 0,
 	.ssd_p = 1,
 	.ssd_xx = 0, .ssd_xx1 = 0,
 	.ssd_def32 = 0,
 	.ssd_gran = 0		},
 /* GPANIC_SEL	12 Panic Tss Descriptor */
 {	.ssd_base = (int) &dblfault_tss,
 	.ssd_limit = sizeof(struct i386tss)-1,
 	.ssd_type = SDT_SYS386TSS,
 	.ssd_dpl = 0,
 	.ssd_p = 1,
 	.ssd_xx = 0, .ssd_xx1 = 0,
 	.ssd_def32 = 0,
 	.ssd_gran = 0		},
 /* GBIOSCODE32_SEL 13 BIOS 32-bit interface (32bit Code) */
 {	.ssd_base = 0,
 	.ssd_limit = 0xfffff,
 	.ssd_type = SDT_MEMERA,
 	.ssd_dpl = 0,
 	.ssd_p = 1,
 	.ssd_xx = 0, .ssd_xx1 = 0,
 	.ssd_def32 = 0,
 	.ssd_gran = 1		},
 /* GBIOSCODE16_SEL 14 BIOS 32-bit interface (16bit Code) */
 {	.ssd_base = 0,
 	.ssd_limit = 0xfffff,
 	.ssd_type = SDT_MEMERA,
 	.ssd_dpl = 0,
 	.ssd_p = 1,
 	.ssd_xx = 0, .ssd_xx1 = 0,
 	.ssd_def32 = 0,
 	.ssd_gran = 1		},
 /* GBIOSDATA_SEL 15 BIOS 32-bit interface (Data) */
 {	.ssd_base = 0,
 	.ssd_limit = 0xfffff,
 	.ssd_type = SDT_MEMRWA,
 	.ssd_dpl = 0,
 	.ssd_p = 1,
 	.ssd_xx = 0, .ssd_xx1 = 0,
 	.ssd_def32 = 1,
 	.ssd_gran = 1		},
 /* GBIOSUTIL_SEL 16 BIOS 16-bit interface (Utility) */
 {	.ssd_base = 0,
 	.ssd_limit = 0xfffff,
 	.ssd_type = SDT_MEMRWA,
 	.ssd_dpl = 0,
 	.ssd_p = 1,
 	.ssd_xx = 0, .ssd_xx1 = 0,
 	.ssd_def32 = 0,
 	.ssd_gran = 1		},
 /* GBIOSARGS_SEL 17 BIOS 16-bit interface (Arguments) */
 {	.ssd_base = 0,
 	.ssd_limit = 0xfffff,
 	.ssd_type = SDT_MEMRWA,
 	.ssd_dpl = 0,
 	.ssd_p = 1,
 	.ssd_xx = 0, .ssd_xx1 = 0,
 	.ssd_def32 = 0,
 	.ssd_gran = 1		},
 /* GNDIS_SEL	18 NDIS Descriptor */
 {	.ssd_base = 0x0,
 	.ssd_limit = 0x0,
 	.ssd_type = 0,
 	.ssd_dpl = 0,
 	.ssd_p = 0,
 	.ssd_xx = 0, .ssd_xx1 = 0,
 	.ssd_def32 = 0,
 	.ssd_gran = 0		},
 };
 
 static struct soft_segment_descriptor ldt_segs[] = {
 	/* Null Descriptor - overwritten by call gate */
 {	.ssd_base = 0x0,
 	.ssd_limit = 0x0,
 	.ssd_type = 0,
 	.ssd_dpl = 0,
 	.ssd_p = 0,
 	.ssd_xx = 0, .ssd_xx1 = 0,
 	.ssd_def32 = 0,
 	.ssd_gran = 0		},
 	/* Null Descriptor - overwritten by call gate */
 {	.ssd_base = 0x0,
 	.ssd_limit = 0x0,
 	.ssd_type = 0,
 	.ssd_dpl = 0,
 	.ssd_p = 0,
 	.ssd_xx = 0, .ssd_xx1 = 0,
 	.ssd_def32 = 0,
 	.ssd_gran = 0		},
 	/* Null Descriptor - overwritten by call gate */
 {	.ssd_base = 0x0,
 	.ssd_limit = 0x0,
 	.ssd_type = 0,
 	.ssd_dpl = 0,
 	.ssd_p = 0,
 	.ssd_xx = 0, .ssd_xx1 = 0,
 	.ssd_def32 = 0,
 	.ssd_gran = 0		},
 	/* Code Descriptor for user */
 {	.ssd_base = 0x0,
 	.ssd_limit = 0xfffff,
 	.ssd_type = SDT_MEMERA,
 	.ssd_dpl = SEL_UPL,
 	.ssd_p = 1,
 	.ssd_xx = 0, .ssd_xx1 = 0,
 	.ssd_def32 = 1,
 	.ssd_gran = 1		},
 	/* Null Descriptor - overwritten by call gate */
 {	.ssd_base = 0x0,
 	.ssd_limit = 0x0,
 	.ssd_type = 0,
 	.ssd_dpl = 0,
 	.ssd_p = 0,
 	.ssd_xx = 0, .ssd_xx1 = 0,
 	.ssd_def32 = 0,
 	.ssd_gran = 0		},
 	/* Data Descriptor for user */
 {	.ssd_base = 0x0,
 	.ssd_limit = 0xfffff,
 	.ssd_type = SDT_MEMRWA,
 	.ssd_dpl = SEL_UPL,
 	.ssd_p = 1,
 	.ssd_xx = 0, .ssd_xx1 = 0,
 	.ssd_def32 = 1,
 	.ssd_gran = 1		},
 };
 
 void
 setidt(idx, func, typ, dpl, selec)
 	int idx;
 	inthand_t *func;
 	int typ;
 	int dpl;
 	int selec;
 {
 	struct gate_descriptor *ip;
 
 	ip = idt + idx;
 	ip->gd_looffset = (int)func;
 	ip->gd_selector = selec;
 	ip->gd_stkcpy = 0;
 	ip->gd_xx = 0;
 	ip->gd_type = typ;
 	ip->gd_dpl = dpl;
 	ip->gd_p = 1;
 	ip->gd_hioffset = ((int)func)>>16 ;
 }
 
 extern inthand_t
 	IDTVEC(div), IDTVEC(dbg), IDTVEC(nmi), IDTVEC(bpt), IDTVEC(ofl),
 	IDTVEC(bnd), IDTVEC(ill), IDTVEC(dna), IDTVEC(fpusegm),
 	IDTVEC(tss), IDTVEC(missing), IDTVEC(stk), IDTVEC(prot),
 	IDTVEC(page), IDTVEC(mchk), IDTVEC(rsvd), IDTVEC(fpu), IDTVEC(align),
 	IDTVEC(xmm),
 #ifdef KDTRACE_HOOKS
 	IDTVEC(dtrace_ret),
 #endif
 #ifdef XENHVM
 	IDTVEC(xen_intr_upcall),
 #endif
-	IDTVEC(lcall_syscall), IDTVEC(int0x80_syscall);
+	IDTVEC(int0x80_syscall);
 
 #ifdef DDB
 /*
  * Display the index and function name of any IDT entries that don't use
  * the default 'rsvd' entry point.
  */
 DB_SHOW_COMMAND(idt, db_show_idt)
 {
 	struct gate_descriptor *ip;
 	int idx;
 	uintptr_t func;
 
 	ip = idt;
 	for (idx = 0; idx < NIDT && !db_pager_quit; idx++) {
 		func = (ip->gd_hioffset << 16 | ip->gd_looffset);
 		if (func != (uintptr_t)&IDTVEC(rsvd)) {
 			db_printf("%3d\t", idx);
 			db_printsym(func, DB_STGY_PROC);
 			db_printf("\n");
 		}
 		ip++;
 	}
 }
 
 /* Show privileged registers. */
 DB_SHOW_COMMAND(sysregs, db_show_sysregs)
 {
 	uint64_t idtr, gdtr;
 
 	idtr = ridt();
 	db_printf("idtr\t0x%08x/%04x\n",
 	    (u_int)(idtr >> 16), (u_int)idtr & 0xffff);
 	gdtr = rgdt();
 	db_printf("gdtr\t0x%08x/%04x\n",
 	    (u_int)(gdtr >> 16), (u_int)gdtr & 0xffff);
 	db_printf("ldtr\t0x%04x\n", rldt());
 	db_printf("tr\t0x%04x\n", rtr());
 	db_printf("cr0\t0x%08x\n", rcr0());
 	db_printf("cr2\t0x%08x\n", rcr2());
 	db_printf("cr3\t0x%08x\n", rcr3());
 	db_printf("cr4\t0x%08x\n", rcr4());
 	if (rcr4() & CR4_XSAVE)
 		db_printf("xcr0\t0x%016llx\n", rxcr(0));
 	if (amd_feature & (AMDID_NX | AMDID_LM))
 		db_printf("EFER\t0x%016llx\n", rdmsr(MSR_EFER));
 	if (cpu_feature2 & (CPUID2_VMX | CPUID2_SMX))
 		db_printf("FEATURES_CTL\t0x%016llx\n",
 		    rdmsr(MSR_IA32_FEATURE_CONTROL));
 	if ((cpu_vendor_id == CPU_VENDOR_INTEL ||
 	    cpu_vendor_id == CPU_VENDOR_AMD) && CPUID_TO_FAMILY(cpu_id) >= 6)
 		db_printf("DEBUG_CTL\t0x%016llx\n", rdmsr(MSR_DEBUGCTLMSR));
 	if (cpu_feature & CPUID_PAT)
 		db_printf("PAT\t0x%016llx\n", rdmsr(MSR_PAT));
 }
 
 DB_SHOW_COMMAND(dbregs, db_show_dbregs)
 {
 
 	db_printf("dr0\t0x%08x\n", rdr0());
 	db_printf("dr1\t0x%08x\n", rdr1());
 	db_printf("dr2\t0x%08x\n", rdr2());
 	db_printf("dr3\t0x%08x\n", rdr3());
 	db_printf("dr6\t0x%08x\n", rdr6());
 	db_printf("dr7\t0x%08x\n", rdr7());	
 }
 #endif
 
 void
 sdtossd(sd, ssd)
 	struct segment_descriptor *sd;
 	struct soft_segment_descriptor *ssd;
 {
 	ssd->ssd_base  = (sd->sd_hibase << 24) | sd->sd_lobase;
 	ssd->ssd_limit = (sd->sd_hilimit << 16) | sd->sd_lolimit;
 	ssd->ssd_type  = sd->sd_type;
 	ssd->ssd_dpl   = sd->sd_dpl;
 	ssd->ssd_p     = sd->sd_p;
 	ssd->ssd_def32 = sd->sd_def32;
 	ssd->ssd_gran  = sd->sd_gran;
 }
 
 static int
 add_physmap_entry(uint64_t base, uint64_t length, vm_paddr_t *physmap,
     int *physmap_idxp)
 {
 	int i, insert_idx, physmap_idx;
 
 	physmap_idx = *physmap_idxp;
 	
 	if (length == 0)
 		return (1);
 
 #ifndef PAE
 	if (base > 0xffffffff) {
 		printf("%uK of memory above 4GB ignored\n",
 		    (u_int)(length / 1024));
 		return (1);
 	}
 #endif
 
 	/*
 	 * Find insertion point while checking for overlap.  Start off by
 	 * assuming the new entry will be added to the end.
 	 */
 	insert_idx = physmap_idx + 2;
 	for (i = 0; i <= physmap_idx; i += 2) {
 		if (base < physmap[i + 1]) {
 			if (base + length <= physmap[i]) {
 				insert_idx = i;
 				break;
 			}
 			if (boothowto & RB_VERBOSE)
 				printf(
 		    "Overlapping memory regions, ignoring second region\n");
 			return (1);
 		}
 	}
 
 	/* See if we can prepend to the next entry. */
 	if (insert_idx <= physmap_idx && base + length == physmap[insert_idx]) {
 		physmap[insert_idx] = base;
 		return (1);
 	}
 
 	/* See if we can append to the previous entry. */
 	if (insert_idx > 0 && base == physmap[insert_idx - 1]) {
 		physmap[insert_idx - 1] += length;
 		return (1);
 	}
 
 	physmap_idx += 2;
 	*physmap_idxp = physmap_idx;
 	if (physmap_idx == PHYSMAP_SIZE) {
 		printf(
 		"Too many segments in the physical address map, giving up\n");
 		return (0);
 	}
 
 	/*
 	 * Move the last 'N' entries down to make room for the new
 	 * entry if needed.
 	 */
 	for (i = physmap_idx; i > insert_idx; i -= 2) {
 		physmap[i] = physmap[i - 2];
 		physmap[i + 1] = physmap[i - 1];
 	}
 
 	/* Insert the new entry. */
 	physmap[insert_idx] = base;
 	physmap[insert_idx + 1] = base + length;
 	return (1);
 }
 
 static int
 add_smap_entry(struct bios_smap *smap, vm_paddr_t *physmap, int *physmap_idxp)
 {
 	if (boothowto & RB_VERBOSE)
 		printf("SMAP type=%02x base=%016llx len=%016llx\n",
 		    smap->type, smap->base, smap->length);
 
 	if (smap->type != SMAP_TYPE_MEMORY)
 		return (1);
 
 	return (add_physmap_entry(smap->base, smap->length, physmap,
 	    physmap_idxp));
 }
 
 static void
 add_smap_entries(struct bios_smap *smapbase, vm_paddr_t *physmap,
     int *physmap_idxp)
 {
 	struct bios_smap *smap, *smapend;
 	u_int32_t smapsize;
 	/*
 	 * Memory map from INT 15:E820.
 	 *
 	 * subr_module.c says:
 	 * "Consumer may safely assume that size value precedes data."
 	 * ie: an int32_t immediately precedes SMAP.
 	 */
 	smapsize = *((u_int32_t *)smapbase - 1);
 	smapend = (struct bios_smap *)((uintptr_t)smapbase + smapsize);
 
 	for (smap = smapbase; smap < smapend; smap++)
 		if (!add_smap_entry(smap, physmap, physmap_idxp))
 			break;
 }
 
 static void
 basemem_setup(void)
 {
 	vm_paddr_t pa;
 	pt_entry_t *pte;
 	int i;
 
 	if (basemem > 640) {
 		printf("Preposterous BIOS basemem of %uK, truncating to 640K\n",
 			basemem);
 		basemem = 640;
 	}
 
 	/*
 	 * XXX if biosbasemem is now < 640, there is a `hole'
 	 * between the end of base memory and the start of
 	 * ISA memory.  The hole may be empty or it may
 	 * contain BIOS code or data.  Map it read/write so
 	 * that the BIOS can write to it.  (Memory from 0 to
 	 * the physical end of the kernel is mapped read-only
 	 * to begin with and then parts of it are remapped.
 	 * The parts that aren't remapped form holes that
 	 * remain read-only and are unused by the kernel.
 	 * The base memory area is below the physical end of
 	 * the kernel and right now forms a read-only hole.
 	 * The part of it from PAGE_SIZE to
 	 * (trunc_page(biosbasemem * 1024) - 1) will be
 	 * remapped and used by the kernel later.)
 	 *
 	 * This code is similar to the code used in
 	 * pmap_mapdev, but since no memory needs to be
 	 * allocated we simply change the mapping.
 	 */
 	for (pa = trunc_page(basemem * 1024);
 	     pa < ISA_HOLE_START; pa += PAGE_SIZE)
 		pmap_kenter(KERNBASE + pa, pa);
 
 	/*
 	 * Map pages between basemem and ISA_HOLE_START, if any, r/w into
 	 * the vm86 page table so that vm86 can scribble on them using
 	 * the vm86 map too.  XXX: why 2 ways for this and only 1 way for
 	 * page 0, at least as initialized here?
 	 */
 	pte = (pt_entry_t *)vm86paddr;
 	for (i = basemem / 4; i < 160; i++)
 		pte[i] = (i << PAGE_SHIFT) | PG_V | PG_RW | PG_U;
 }
 
 /*
  * Populate the (physmap) array with base/bound pairs describing the
  * available physical memory in the system, then test this memory and
  * build the phys_avail array describing the actually-available memory.
  *
  * If we cannot accurately determine the physical memory map, then use
  * value from the 0xE801 call, and failing that, the RTC.
  *
  * Total memory size may be set by the kernel environment variable
  * hw.physmem or the compile-time define MAXMEM.
  *
  * XXX first should be vm_paddr_t.
  */
 static void
 getmemsize(int first)
 {
 	int has_smap, off, physmap_idx, pa_indx, da_indx;
 	u_long memtest;
 	vm_paddr_t physmap[PHYSMAP_SIZE];
 	pt_entry_t *pte;
 	quad_t dcons_addr, dcons_size, physmem_tunable;
 	int hasbrokenint12, i, res;
 	u_int extmem;
 	struct vm86frame vmf;
 	struct vm86context vmc;
 	vm_paddr_t pa;
 	struct bios_smap *smap, *smapbase;
 	caddr_t kmdp;
 
 	has_smap = 0;
 #ifdef XBOX
 	if (arch_i386_is_xbox) {
 		/*
 		 * We queried the memory size before, so chop off 4MB for
 		 * the framebuffer and inform the OS of this.
 		 */
 		physmap[0] = 0;
 		physmap[1] = (arch_i386_xbox_memsize * 1024 * 1024) - XBOX_FB_SIZE;
 		physmap_idx = 0;
 		goto physmap_done;
 	}
 #endif
 	bzero(&vmf, sizeof(vmf));
 	bzero(physmap, sizeof(physmap));
 	basemem = 0;
 
 	/*
 	 * Check if the loader supplied an SMAP memory map.  If so,
 	 * use that and do not make any VM86 calls.
 	 */
 	physmap_idx = 0;
 	kmdp = preload_search_by_type("elf kernel");
 	if (kmdp == NULL)
 		kmdp = preload_search_by_type("elf32 kernel");
 	smapbase = (struct bios_smap *)preload_search_info(kmdp,
 	    MODINFO_METADATA | MODINFOMD_SMAP);
 	if (smapbase != NULL) {
 		add_smap_entries(smapbase, physmap, &physmap_idx);
 		has_smap = 1;
 		goto have_smap;
 	}
 
 	/*
 	 * Some newer BIOSes have a broken INT 12H implementation
 	 * which causes a kernel panic immediately.  In this case, we
 	 * need use the SMAP to determine the base memory size.
 	 */
 	hasbrokenint12 = 0;
 	TUNABLE_INT_FETCH("hw.hasbrokenint12", &hasbrokenint12);
 	if (hasbrokenint12 == 0) {
 		/* Use INT12 to determine base memory size. */
 		vm86_intcall(0x12, &vmf);
 		basemem = vmf.vmf_ax;
 		basemem_setup();
 	}
 
 	/*
 	 * Fetch the memory map with INT 15:E820.  Map page 1 R/W into
 	 * the kernel page table so we can use it as a buffer.  The
 	 * kernel will unmap this page later.
 	 */
 	pmap_kenter(KERNBASE + (1 << PAGE_SHIFT), 1 << PAGE_SHIFT);
 	vmc.npages = 0;
 	smap = (void *)vm86_addpage(&vmc, 1, KERNBASE + (1 << PAGE_SHIFT));
 	res = vm86_getptr(&vmc, (vm_offset_t)smap, &vmf.vmf_es, &vmf.vmf_di);
 	KASSERT(res != 0, ("vm86_getptr() failed: address not found"));
 
 	vmf.vmf_ebx = 0;
 	do {
 		vmf.vmf_eax = 0xE820;
 		vmf.vmf_edx = SMAP_SIG;
 		vmf.vmf_ecx = sizeof(struct bios_smap);
 		i = vm86_datacall(0x15, &vmf, &vmc);
 		if (i || vmf.vmf_eax != SMAP_SIG)
 			break;
 		has_smap = 1;
 		if (!add_smap_entry(smap, physmap, &physmap_idx))
 			break;
 	} while (vmf.vmf_ebx != 0);
 
 have_smap:
 	/*
 	 * If we didn't fetch the "base memory" size from INT12,
 	 * figure it out from the SMAP (or just guess).
 	 */
 	if (basemem == 0) {
 		for (i = 0; i <= physmap_idx; i += 2) {
 			if (physmap[i] == 0x00000000) {
 				basemem = physmap[i + 1] / 1024;
 				break;
 			}
 		}
 
 		/* XXX: If we couldn't find basemem from SMAP, just guess. */
 		if (basemem == 0)
 			basemem = 640;
 		basemem_setup();
 	}
 
 	if (physmap[1] != 0)
 		goto physmap_done;
 
 	/*
 	 * If we failed to find an SMAP, figure out the extended
 	 * memory size.  We will then build a simple memory map with
 	 * two segments, one for "base memory" and the second for
 	 * "extended memory".  Note that "extended memory" starts at a
 	 * physical address of 1MB and that both basemem and extmem
 	 * are in units of 1KB.
 	 *
 	 * First, try to fetch the extended memory size via INT 15:E801.
 	 */
 	vmf.vmf_ax = 0xE801;
 	if (vm86_intcall(0x15, &vmf) == 0) {
 		extmem = vmf.vmf_cx + vmf.vmf_dx * 64;
 	} else {
 		/*
 		 * If INT15:E801 fails, this is our last ditch effort
 		 * to determine the extended memory size.  Currently
 		 * we prefer the RTC value over INT15:88.
 		 */
 #if 0
 		vmf.vmf_ah = 0x88;
 		vm86_intcall(0x15, &vmf);
 		extmem = vmf.vmf_ax;
 #else
 		extmem = rtcin(RTC_EXTLO) + (rtcin(RTC_EXTHI) << 8);
 #endif
 	}
 
 	/*
 	 * Special hack for chipsets that still remap the 384k hole when
 	 * there's 16MB of memory - this really confuses people that
 	 * are trying to use bus mastering ISA controllers with the
 	 * "16MB limit"; they only have 16MB, but the remapping puts
 	 * them beyond the limit.
 	 *
 	 * If extended memory is between 15-16MB (16-17MB phys address range),
 	 *	chop it to 15MB.
 	 */
 	if ((extmem > 15 * 1024) && (extmem < 16 * 1024))
 		extmem = 15 * 1024;
 
 	physmap[0] = 0;
 	physmap[1] = basemem * 1024;
 	physmap_idx = 2;
 	physmap[physmap_idx] = 0x100000;
 	physmap[physmap_idx + 1] = physmap[physmap_idx] + extmem * 1024;
 
 physmap_done:
 	/*
 	 * Now, physmap contains a map of physical memory.
 	 */
 
 #ifdef SMP
 	/* make hole for AP bootstrap code */
 	physmap[1] = mp_bootaddress(physmap[1]);
 #endif
 
 	/*
 	 * Maxmem isn't the "maximum memory", it's one larger than the
 	 * highest page of the physical address space.  It should be
 	 * called something like "Maxphyspage".  We may adjust this 
 	 * based on ``hw.physmem'' and the results of the memory test.
 	 *
 	 * This is especially confusing when it is much larger than the
 	 * memory size and is displayed as "realmem".
 	 */
 	Maxmem = atop(physmap[physmap_idx + 1]);
 
 #ifdef MAXMEM
 	Maxmem = MAXMEM / 4;
 #endif
 
 	if (TUNABLE_QUAD_FETCH("hw.physmem", &physmem_tunable))
 		Maxmem = atop(physmem_tunable);
 
 	/*
 	 * If we have an SMAP, don't allow MAXMEM or hw.physmem to extend
 	 * the amount of memory in the system.
 	 */
 	if (has_smap && Maxmem > atop(physmap[physmap_idx + 1]))
 		Maxmem = atop(physmap[physmap_idx + 1]);
 
 	/*
 	 * By default enable the memory test on real hardware, and disable
 	 * it if we appear to be running in a VM.  This avoids touching all
 	 * pages unnecessarily, which doesn't matter on real hardware but is
 	 * bad for shared VM hosts.  Use a general name so that
 	 * one could eventually do more with the code than just disable it.
 	 */
 	memtest = (vm_guest > VM_GUEST_NO) ? 0 : 1;
 	TUNABLE_ULONG_FETCH("hw.memtest.tests", &memtest);
 
 	if (atop(physmap[physmap_idx + 1]) != Maxmem &&
 	    (boothowto & RB_VERBOSE))
 		printf("Physical memory use set to %ldK\n", Maxmem * 4);
 
 	/*
 	 * If Maxmem has been increased beyond what the system has detected,
 	 * extend the last memory segment to the new limit.
 	 */ 
 	if (atop(physmap[physmap_idx + 1]) < Maxmem)
 		physmap[physmap_idx + 1] = ptoa((vm_paddr_t)Maxmem);
 
 	/* call pmap initialization to make new kernel address space */
 	pmap_bootstrap(first);
 
 	/*
 	 * Size up each available chunk of physical memory.
 	 */
 	physmap[0] = PAGE_SIZE;		/* mask off page 0 */
 	pa_indx = 0;
 	da_indx = 1;
 	phys_avail[pa_indx++] = physmap[0];
 	phys_avail[pa_indx] = physmap[0];
 	dump_avail[da_indx] = physmap[0];
 	pte = CMAP3;
 
 	/*
 	 * Get dcons buffer address
 	 */
 	if (getenv_quad("dcons.addr", &dcons_addr) == 0 ||
 	    getenv_quad("dcons.size", &dcons_size) == 0)
 		dcons_addr = 0;
 
 	/*
 	 * physmap is in bytes, so when converting to page boundaries,
 	 * round up the start address and round down the end address.
 	 */
 	for (i = 0; i <= physmap_idx; i += 2) {
 		vm_paddr_t end;
 
 		end = ptoa((vm_paddr_t)Maxmem);
 		if (physmap[i + 1] < end)
 			end = trunc_page(physmap[i + 1]);
 		for (pa = round_page(physmap[i]); pa < end; pa += PAGE_SIZE) {
 			int tmp, page_bad, full;
 			int *ptr = (int *)CADDR3;
 
 			full = FALSE;
 			/*
 			 * block out kernel memory as not available.
 			 */
 			if (pa >= KERNLOAD && pa < first)
 				goto do_dump_avail;
 
 			/*
 			 * block out dcons buffer
 			 */
 			if (dcons_addr > 0
 			    && pa >= trunc_page(dcons_addr)
 			    && pa < dcons_addr + dcons_size)
 				goto do_dump_avail;
 
 			page_bad = FALSE;
 			if (memtest == 0)
 				goto skip_memtest;
 
 			/*
 			 * map page into kernel: valid, read/write,non-cacheable
 			 */
 			*pte = pa | PG_V | PG_RW | PG_N;
 			invltlb();
 
 			tmp = *(int *)ptr;
 			/*
 			 * Test for alternating 1's and 0's
 			 */
 			*(volatile int *)ptr = 0xaaaaaaaa;
 			if (*(volatile int *)ptr != 0xaaaaaaaa)
 				page_bad = TRUE;
 			/*
 			 * Test for alternating 0's and 1's
 			 */
 			*(volatile int *)ptr = 0x55555555;
 			if (*(volatile int *)ptr != 0x55555555)
 				page_bad = TRUE;
 			/*
 			 * Test for all 1's
 			 */
 			*(volatile int *)ptr = 0xffffffff;
 			if (*(volatile int *)ptr != 0xffffffff)
 				page_bad = TRUE;
 			/*
 			 * Test for all 0's
 			 */
 			*(volatile int *)ptr = 0x0;
 			if (*(volatile int *)ptr != 0x0)
 				page_bad = TRUE;
 			/*
 			 * Restore original value.
 			 */
 			*(int *)ptr = tmp;
 
 skip_memtest:
 			/*
 			 * Adjust array of valid/good pages.
 			 */
 			if (page_bad == TRUE)
 				continue;
 			/*
 			 * If this good page is a continuation of the
 			 * previous set of good pages, then just increase
 			 * the end pointer. Otherwise start a new chunk.
 			 * Note that "end" points one higher than end,
 			 * making the range >= start and < end.
 			 * If we're also doing a speculative memory
 			 * test and we at or past the end, bump up Maxmem
 			 * so that we keep going. The first bad page
 			 * will terminate the loop.
 			 */
 			if (phys_avail[pa_indx] == pa) {
 				phys_avail[pa_indx] += PAGE_SIZE;
 			} else {
 				pa_indx++;
 				if (pa_indx == PHYS_AVAIL_ARRAY_END) {
 					printf(
 		"Too many holes in the physical address space, giving up\n");
 					pa_indx--;
 					full = TRUE;
 					goto do_dump_avail;
 				}
 				phys_avail[pa_indx++] = pa;	/* start */
 				phys_avail[pa_indx] = pa + PAGE_SIZE; /* end */
 			}
 			physmem++;
 do_dump_avail:
 			if (dump_avail[da_indx] == pa) {
 				dump_avail[da_indx] += PAGE_SIZE;
 			} else {
 				da_indx++;
 				if (da_indx == DUMP_AVAIL_ARRAY_END) {
 					da_indx--;
 					goto do_next;
 				}
 				dump_avail[da_indx++] = pa;	/* start */
 				dump_avail[da_indx] = pa + PAGE_SIZE; /* end */
 			}
 do_next:
 			if (full)
 				break;
 		}
 	}
 	*pte = 0;
 	invltlb();
 	
 	/*
 	 * XXX
 	 * The last chunk must contain at least one page plus the message
 	 * buffer to avoid complicating other code (message buffer address
 	 * calculation, etc.).
 	 */
 	while (phys_avail[pa_indx - 1] + PAGE_SIZE +
 	    round_page(msgbufsize) >= phys_avail[pa_indx]) {
 		physmem -= atop(phys_avail[pa_indx] - phys_avail[pa_indx - 1]);
 		phys_avail[pa_indx--] = 0;
 		phys_avail[pa_indx--] = 0;
 	}
 
 	Maxmem = atop(phys_avail[pa_indx]);
 
 	/* Trim off space for the message buffer. */
 	phys_avail[pa_indx] -= round_page(msgbufsize);
 
 	/* Map the message buffer. */
 	for (off = 0; off < round_page(msgbufsize); off += PAGE_SIZE)
 		pmap_kenter((vm_offset_t)msgbufp + off, phys_avail[pa_indx] +
 		    off);
 }
 
 static void
 i386_kdb_init(void)
 {
 #ifdef DDB
 	db_fetch_ksymtab(bootinfo.bi_symtab, bootinfo.bi_esymtab);
 #endif
 	kdb_init();
 #ifdef KDB
 	if (boothowto & RB_KDB)
 		kdb_enter(KDB_WHY_BOOTFLAGS, "Boot flags requested debugger");
 #endif
 }
 
 register_t
 init386(int first)
 {
-	struct gate_descriptor *gdp;
+#ifdef COMPAT_43
+	struct segment_descriptor *gdp;
+#endif
 	int gsel_tss, metadata_missing, x, pa;
 	struct pcpu *pc;
 	struct xstate_hdr *xhdr;
 	int late_console;
 
 	thread0.td_kstack = proc0kstack;
 	thread0.td_kstack_pages = TD0_KSTACK_PAGES;
 
 	/*
  	 * This may be done better later if it gets more high level
  	 * components in it. If so just link td->td_proc here.
 	 */
 	proc_linkup0(&proc0, &thread0);
 
 	metadata_missing = 0;
 	if (bootinfo.bi_modulep) {
 		preload_metadata = (caddr_t)bootinfo.bi_modulep + KERNBASE;
 		preload_bootstrap_relocate(KERNBASE);
 	} else {
 		metadata_missing = 1;
 	}
 
 	if (bootinfo.bi_envp != 0)
 		init_static_kenv((char *)bootinfo.bi_envp + KERNBASE, 0);
 	else
 		init_static_kenv(NULL, 0);
 
 	identify_hypervisor();
 
 	/* Init basic tunables, hz etc */
 	init_param1();
 
 	/*
 	 * Make gdt memory segments.  All segments cover the full 4GB
 	 * of address space and permissions are enforced at page level.
 	 */
 	gdt_segs[GCODE_SEL].ssd_limit = atop(0 - 1);
 	gdt_segs[GDATA_SEL].ssd_limit = atop(0 - 1);
 	gdt_segs[GUCODE_SEL].ssd_limit = atop(0 - 1);
 	gdt_segs[GUDATA_SEL].ssd_limit = atop(0 - 1);
 	gdt_segs[GUFS_SEL].ssd_limit = atop(0 - 1);
 	gdt_segs[GUGS_SEL].ssd_limit = atop(0 - 1);
 
 	pc = &__pcpu[0];
 	gdt_segs[GPRIV_SEL].ssd_limit = atop(0 - 1);
 	gdt_segs[GPRIV_SEL].ssd_base = (int) pc;
 	gdt_segs[GPROC0_SEL].ssd_base = (int) &pc->pc_common_tss;
 
 	for (x = 0; x < NGDT; x++)
 		ssdtosd(&gdt_segs[x], &gdt[x].sd);
 
 	r_gdt.rd_limit = NGDT * sizeof(gdt[0]) - 1;
 	r_gdt.rd_base =  (int) gdt;
 	mtx_init(&dt_lock, "descriptor tables", NULL, MTX_SPIN);
 	lgdt(&r_gdt);
 
 	pcpu_init(pc, 0, sizeof(struct pcpu));
 	for (pa = first; pa < first + DPCPU_SIZE; pa += PAGE_SIZE)
 		pmap_kenter(pa + KERNBASE, pa);
 	dpcpu_init((void *)(first + KERNBASE), 0);
 	first += DPCPU_SIZE;
 	PCPU_SET(prvspace, pc);
 	PCPU_SET(curthread, &thread0);
 	/* Non-late cninit() and printf() can be moved up to here. */
 
 	/*
 	 * Initialize mutexes.
 	 *
 	 * icu_lock: in order to allow an interrupt to occur in a critical
 	 * 	     section, to set pcpu->ipending (etc...) properly, we
 	 *	     must be able to get the icu lock, so it can't be
 	 *	     under witness.
 	 */
 	mutex_init();
 	mtx_init(&icu_lock, "icu", NULL, MTX_SPIN | MTX_NOWITNESS | MTX_NOPROFILE);
 
 	/* make ldt memory segments */
 	ldt_segs[LUCODE_SEL].ssd_limit = atop(0 - 1);
 	ldt_segs[LUDATA_SEL].ssd_limit = atop(0 - 1);
 	for (x = 0; x < nitems(ldt_segs); x++)
 		ssdtosd(&ldt_segs[x], &ldt[x].sd);
 
 	_default_ldt = GSEL(GLDT_SEL, SEL_KPL);
 	lldt(_default_ldt);
 	PCPU_SET(currentldt, _default_ldt);
 
 	/* exceptions */
 	for (x = 0; x < NIDT; x++)
-		setidt(x, &IDTVEC(rsvd), SDT_SYS386TGT, SEL_KPL,
+		setidt(x, &IDTVEC(rsvd), SDT_SYS386IGT, SEL_KPL,
 		    GSEL(GCODE_SEL, SEL_KPL));
-	setidt(IDT_DE, &IDTVEC(div),  SDT_SYS386TGT, SEL_KPL,
+	setidt(IDT_DE, &IDTVEC(div),  SDT_SYS386IGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(IDT_DB, &IDTVEC(dbg),  SDT_SYS386IGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(IDT_NMI, &IDTVEC(nmi),  SDT_SYS386IGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
  	setidt(IDT_BP, &IDTVEC(bpt),  SDT_SYS386IGT, SEL_UPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
-	setidt(IDT_OF, &IDTVEC(ofl),  SDT_SYS386TGT, SEL_UPL,
+	setidt(IDT_OF, &IDTVEC(ofl),  SDT_SYS386IGT, SEL_UPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
-	setidt(IDT_BR, &IDTVEC(bnd),  SDT_SYS386TGT, SEL_KPL,
+	setidt(IDT_BR, &IDTVEC(bnd),  SDT_SYS386IGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
-	setidt(IDT_UD, &IDTVEC(ill),  SDT_SYS386TGT, SEL_KPL,
+	setidt(IDT_UD, &IDTVEC(ill),  SDT_SYS386IGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
-	setidt(IDT_NM, &IDTVEC(dna),  SDT_SYS386TGT, SEL_KPL
+	setidt(IDT_NM, &IDTVEC(dna),  SDT_SYS386IGT, SEL_KPL
 	    , GSEL(GCODE_SEL, SEL_KPL));
 	setidt(IDT_DF, 0,  SDT_SYSTASKGT, SEL_KPL, GSEL(GPANIC_SEL, SEL_KPL));
-	setidt(IDT_FPUGP, &IDTVEC(fpusegm),  SDT_SYS386TGT, SEL_KPL,
+	setidt(IDT_FPUGP, &IDTVEC(fpusegm),  SDT_SYS386IGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
-	setidt(IDT_TS, &IDTVEC(tss),  SDT_SYS386TGT, SEL_KPL,
+	setidt(IDT_TS, &IDTVEC(tss),  SDT_SYS386IGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
-	setidt(IDT_NP, &IDTVEC(missing),  SDT_SYS386TGT, SEL_KPL,
+	setidt(IDT_NP, &IDTVEC(missing),  SDT_SYS386IGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
-	setidt(IDT_SS, &IDTVEC(stk),  SDT_SYS386TGT, SEL_KPL,
+	setidt(IDT_SS, &IDTVEC(stk),  SDT_SYS386IGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
-	setidt(IDT_GP, &IDTVEC(prot),  SDT_SYS386TGT, SEL_KPL,
+	setidt(IDT_GP, &IDTVEC(prot),  SDT_SYS386IGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(IDT_PF, &IDTVEC(page),  SDT_SYS386IGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
-	setidt(IDT_MF, &IDTVEC(fpu),  SDT_SYS386TGT, SEL_KPL,
+	setidt(IDT_MF, &IDTVEC(fpu),  SDT_SYS386IGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
-	setidt(IDT_AC, &IDTVEC(align), SDT_SYS386TGT, SEL_KPL,
+	setidt(IDT_AC, &IDTVEC(align), SDT_SYS386IGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
-	setidt(IDT_MC, &IDTVEC(mchk),  SDT_SYS386TGT, SEL_KPL,
+	setidt(IDT_MC, &IDTVEC(mchk),  SDT_SYS386IGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
-	setidt(IDT_XF, &IDTVEC(xmm), SDT_SYS386TGT, SEL_KPL,
+	setidt(IDT_XF, &IDTVEC(xmm), SDT_SYS386IGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
- 	setidt(IDT_SYSCALL, &IDTVEC(int0x80_syscall), SDT_SYS386TGT, SEL_UPL,
+ 	setidt(IDT_SYSCALL, &IDTVEC(int0x80_syscall), SDT_SYS386IGT, SEL_UPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 #ifdef KDTRACE_HOOKS
-	setidt(IDT_DTRACE_RET, &IDTVEC(dtrace_ret), SDT_SYS386TGT, SEL_UPL,
+	setidt(IDT_DTRACE_RET, &IDTVEC(dtrace_ret), SDT_SYS386IGT, SEL_UPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 #endif
 #ifdef XENHVM
 	setidt(IDT_EVTCHN, &IDTVEC(xen_intr_upcall), SDT_SYS386IGT, SEL_UPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 #endif
 
 	r_idt.rd_limit = sizeof(idt0) - 1;
 	r_idt.rd_base = (int) idt;
 	lidt(&r_idt);
 
 #ifdef XBOX
 	/*
 	 * The following code queries the PCI ID of 0:0:0. For the XBOX,
 	 * This should be 0x10de / 0x02a5.
 	 *
 	 * This is exactly what Linux does.
 	 */
 	outl(0xcf8, 0x80000000);
 	if (inl(0xcfc) == 0x02a510de) {
 		arch_i386_is_xbox = 1;
 		pic16l_setled(XBOX_LED_GREEN);
 
 		/*
 		 * We are an XBOX, but we may have either 64MB or 128MB of
 		 * memory. The PCI host bridge should be programmed for this,
 		 * so we just query it. 
 		 */
 		outl(0xcf8, 0x80000084);
 		arch_i386_xbox_memsize = (inl(0xcfc) == 0x7FFFFFF) ? 128 : 64;
 	}
 #endif /* XBOX */
 
 	/*
 	 * Initialize the clock before the console so that console
 	 * initialization can use DELAY().
 	 */
 	clock_init();
 
 	finishidentcpu();	/* Final stage of CPU initialization */
-	setidt(IDT_UD, &IDTVEC(ill),  SDT_SYS386TGT, SEL_KPL,
+	setidt(IDT_UD, &IDTVEC(ill),  SDT_SYS386IGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
-	setidt(IDT_GP, &IDTVEC(prot),  SDT_SYS386TGT, SEL_KPL,
+	setidt(IDT_GP, &IDTVEC(prot),  SDT_SYS386IGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	initializecpu();	/* Initialize CPU registers */
 	initializecpucache();
 
 	/* pointer to selector slot for %fs/%gs */
 	PCPU_SET(fsgs_gdt, &gdt[GUFS_SEL].sd);
 
 	dblfault_tss.tss_esp = dblfault_tss.tss_esp0 = dblfault_tss.tss_esp1 =
 	    dblfault_tss.tss_esp2 = (int)&dblfault_stack[sizeof(dblfault_stack)];
 	dblfault_tss.tss_ss = dblfault_tss.tss_ss0 = dblfault_tss.tss_ss1 =
 	    dblfault_tss.tss_ss2 = GSEL(GDATA_SEL, SEL_KPL);
 #if defined(PAE) || defined(PAE_TABLES)
 	dblfault_tss.tss_cr3 = (int)IdlePDPT;
 #else
 	dblfault_tss.tss_cr3 = (int)IdlePTD;
 #endif
 	dblfault_tss.tss_eip = (int)dblfault_handler;
 	dblfault_tss.tss_eflags = PSL_KERNEL;
 	dblfault_tss.tss_ds = dblfault_tss.tss_es =
 	    dblfault_tss.tss_gs = GSEL(GDATA_SEL, SEL_KPL);
 	dblfault_tss.tss_fs = GSEL(GPRIV_SEL, SEL_KPL);
 	dblfault_tss.tss_cs = GSEL(GCODE_SEL, SEL_KPL);
 	dblfault_tss.tss_ldt = GSEL(GLDT_SEL, SEL_KPL);
 
 	/* Initialize the tss (except for the final esp0) early for vm86. */
 	PCPU_SET(common_tss.tss_esp0, thread0.td_kstack +
 	    thread0.td_kstack_pages * PAGE_SIZE - 16);
 	PCPU_SET(common_tss.tss_ss0, GSEL(GDATA_SEL, SEL_KPL));
 	gsel_tss = GSEL(GPROC0_SEL, SEL_KPL);
 	PCPU_SET(tss_gdt, &gdt[GPROC0_SEL].sd);
 	PCPU_SET(common_tssd, *PCPU_GET(tss_gdt));
 	PCPU_SET(common_tss.tss_ioopt, (sizeof (struct i386tss)) << 16);
 	ltr(gsel_tss);
 
 	/* Initialize the PIC early for vm86 calls. */
 #ifdef DEV_ISA
 #ifdef DEV_ATPIC
 	elcr_probe();
 	atpic_startup();
 #else
 	/* Reset and mask the atpics and leave them shut down. */
 	atpic_reset();
 
 	/*
 	 * Point the ICU spurious interrupt vectors at the APIC spurious
 	 * interrupt handler.
 	 */
 	setidt(IDT_IO_INTS + 7, IDTVEC(spuriousint), SDT_SYS386IGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(IDT_IO_INTS + 15, IDTVEC(spuriousint), SDT_SYS386IGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 #endif
 #endif
 
 	/*
 	 * The console and kdb should be initialized even earlier than here,
 	 * but some console drivers don't work until after getmemsize().
 	 * Default to late console initialization to support these drivers.
 	 * This loses mainly printf()s in getmemsize() and early debugging.
 	 */
 	late_console = 1;
 	TUNABLE_INT_FETCH("debug.late_console", &late_console);
 	if (!late_console) {
 		cninit();
 		i386_kdb_init();
 	}
 
 	vm86_initialize();
 	getmemsize(first);
 	init_param2(physmem);
 
 	/* now running on new page tables, configured,and u/iom is accessible */
 
 	if (late_console)
 		cninit();
 
 	if (metadata_missing)
 		printf("WARNING: loader(8) metadata is missing!\n");
 
 	if (late_console)
 		i386_kdb_init();
 
 	msgbufinit(msgbufp, msgbufsize);
 	npxinit(true);
 	/*
 	 * Set up thread0 pcb after npxinit calculated pcb + fpu save
 	 * area size.  Zero out the extended state header in fpu save
 	 * area.
 	 */
 	thread0.td_pcb = get_pcb_td(&thread0);
 	thread0.td_pcb->pcb_save = get_pcb_user_save_td(&thread0);
 	bzero(get_pcb_user_save_td(&thread0), cpu_max_ext_state_size);
 	if (use_xsave) {
 		xhdr = (struct xstate_hdr *)(get_pcb_user_save_td(&thread0) +
 		    1);
 		xhdr->xstate_bv = xsave_mask;
 	}
 	PCPU_SET(curpcb, thread0.td_pcb);
 	/* Move esp0 in the tss to its final place. */
 	/* Note: -16 is so we can grow the trapframe if we came from vm86 */
 	PCPU_SET(common_tss.tss_esp0, (vm_offset_t)thread0.td_pcb - 16);
 	gdt[GPROC0_SEL].sd.sd_type = SDT_SYS386TSS;	/* clear busy bit */
 	ltr(gsel_tss);
 
-	/* make a call gate to reenter kernel with */
-	gdp = &ldt[LSYS5CALLS_SEL].gd;
-
-	x = (int) &IDTVEC(lcall_syscall);
-	gdp->gd_looffset = x;
-	gdp->gd_selector = GSEL(GCODE_SEL,SEL_KPL);
-	gdp->gd_stkcpy = 1;
-	gdp->gd_type = SDT_SYS386CGT;
-	gdp->gd_dpl = SEL_UPL;
-	gdp->gd_p = 1;
-	gdp->gd_hioffset = x >> 16;
+#ifdef COMPAT_43
+	/*
+	 * Make a code descriptor to emulate lcall $7,$0 with int
+	 * $0x80.  sd_hibase and sd_lobase are set after the sigtramp
+	 * base in the shared table is known.
+	 */
+	gdp = &ldt[LSYS5CALLS_SEL].sd;
+	gdp->sd_type = SDT_MEMERA;
+	gdp->sd_dpl = SEL_UPL;
+	gdp->sd_p = 1;
+	gdp->sd_def32 = 1;
+	gdp->sd_gran = 1;
+	gdp->sd_lolimit = 0xffff;
+	gdp->sd_hilimit = 0xf;
+#endif
 
 	/* transfer to user mode */
 
 	_ucodesel = GSEL(GUCODE_SEL, SEL_UPL);
 	_udatasel = GSEL(GUDATA_SEL, SEL_UPL);
 
 	/* setup proc 0's pcb */
 	thread0.td_pcb->pcb_flags = 0;
 #if defined(PAE) || defined(PAE_TABLES)
 	thread0.td_pcb->pcb_cr3 = (int)IdlePDPT;
 #else
 	thread0.td_pcb->pcb_cr3 = (int)IdlePTD;
 #endif
 	thread0.td_pcb->pcb_ext = 0;
 	thread0.td_frame = &proc0_tf;
 
 	cpu_probe_amdc1e();
 
 #ifdef FDT
 	x86_init_fdt();
 #endif
 
 	/* Location of kernel stack for locore */
 	return ((register_t)thread0.td_pcb);
 }
 
 void
 cpu_pcpu_init(struct pcpu *pcpu, int cpuid, size_t size)
 {
 
 	pcpu->pc_acpi_id = 0xffffffff;
 }
 
 static int
 smap_sysctl_handler(SYSCTL_HANDLER_ARGS)
 {
 	struct bios_smap *smapbase;
 	struct bios_smap_xattr smap;
 	caddr_t kmdp;
 	uint32_t *smapattr;
 	int count, error, i;
 
 	/* Retrieve the system memory map from the loader. */
 	kmdp = preload_search_by_type("elf kernel");
 	if (kmdp == NULL)
 		kmdp = preload_search_by_type("elf32 kernel");
 	smapbase = (struct bios_smap *)preload_search_info(kmdp,
 	    MODINFO_METADATA | MODINFOMD_SMAP);
 	if (smapbase == NULL)
 		return (0);
 	smapattr = (uint32_t *)preload_search_info(kmdp,
 	    MODINFO_METADATA | MODINFOMD_SMAP_XATTR);
 	count = *((u_int32_t *)smapbase - 1) / sizeof(*smapbase);
 	error = 0;
 	for (i = 0; i < count; i++) {
 		smap.base = smapbase[i].base;
 		smap.length = smapbase[i].length;
 		smap.type = smapbase[i].type;
 		if (smapattr != NULL)
 			smap.xattr = smapattr[i];
 		else
 			smap.xattr = 0;
 		error = SYSCTL_OUT(req, &smap, sizeof(smap));
 	}
 	return (error);
 }
 SYSCTL_PROC(_machdep, OID_AUTO, smap, CTLTYPE_OPAQUE|CTLFLAG_RD, NULL, 0,
     smap_sysctl_handler, "S,bios_smap_xattr", "Raw BIOS SMAP data");
 
 void
 spinlock_enter(void)
 {
 	struct thread *td;
 	register_t flags;
 
 	td = curthread;
 	if (td->td_md.md_spinlock_count == 0) {
 		flags = intr_disable();
 		td->td_md.md_spinlock_count = 1;
 		td->td_md.md_saved_flags = flags;
 	} else
 		td->td_md.md_spinlock_count++;
 	critical_enter();
 }
 
 void
 spinlock_exit(void)
 {
 	struct thread *td;
 	register_t flags;
 
 	td = curthread;
 	critical_exit();
 	flags = td->td_md.md_saved_flags;
 	td->td_md.md_spinlock_count--;
 	if (td->td_md.md_spinlock_count == 0)
 		intr_restore(flags);
 }
 
 #if defined(I586_CPU) && !defined(NO_F00F_HACK)
 static void f00f_hack(void *unused);
 SYSINIT(f00f_hack, SI_SUB_INTRINSIC, SI_ORDER_FIRST, f00f_hack, NULL);
 
 static void
 f00f_hack(void *unused)
 {
 	struct gate_descriptor *new_idt;
 	vm_offset_t tmp;
 
 	if (!has_f00f_bug)
 		return;
 
 	GIANT_REQUIRED;
 
 	printf("Intel Pentium detected, installing workaround for F00F bug\n");
 
 	tmp = kmem_malloc(kernel_arena, PAGE_SIZE * 2, M_WAITOK | M_ZERO);
 	if (tmp == 0)
 		panic("kmem_malloc returned 0");
 
 	/* Put the problematic entry (#6) at the end of the lower page. */
 	new_idt = (struct gate_descriptor*)
 	    (tmp + PAGE_SIZE - 7 * sizeof(struct gate_descriptor));
 	bcopy(idt, new_idt, sizeof(idt0));
 	r_idt.rd_base = (u_int)new_idt;
 	lidt(&r_idt);
 	idt = new_idt;
 	pmap_protect(kernel_pmap, tmp, tmp + PAGE_SIZE, VM_PROT_READ);
 }
 #endif /* defined(I586_CPU) && !NO_F00F_HACK */
 
 /*
  * Construct a PCB from a trapframe. This is called from kdb_trap() where
  * we want to start a backtrace from the function that caused us to enter
  * the debugger. We have the context in the trapframe, but base the trace
  * on the PCB. The PCB doesn't have to be perfect, as long as it contains
  * enough for a backtrace.
  */
 void
 makectx(struct trapframe *tf, struct pcb *pcb)
 {
 
 	pcb->pcb_edi = tf->tf_edi;
 	pcb->pcb_esi = tf->tf_esi;
 	pcb->pcb_ebp = tf->tf_ebp;
 	pcb->pcb_ebx = tf->tf_ebx;
 	pcb->pcb_eip = tf->tf_eip;
 	pcb->pcb_esp = (ISPL(tf->tf_cs)) ? tf->tf_esp : (int)(tf + 1) - 8;
 	pcb->pcb_gs = rgs();
 }
 
 int
 ptrace_set_pc(struct thread *td, u_long addr)
 {
 
 	td->td_frame->tf_eip = addr;
 	return (0);
 }
 
 int
 ptrace_single_step(struct thread *td)
 {
 	td->td_frame->tf_eflags |= PSL_T;
 	return (0);
 }
 
 int
 ptrace_clear_single_step(struct thread *td)
 {
 	td->td_frame->tf_eflags &= ~PSL_T;
 	return (0);
 }
 
 int
 fill_regs(struct thread *td, struct reg *regs)
 {
 	struct pcb *pcb;
 	struct trapframe *tp;
 
 	tp = td->td_frame;
 	pcb = td->td_pcb;
 	regs->r_gs = pcb->pcb_gs;
 	return (fill_frame_regs(tp, regs));
 }
 
 int
 fill_frame_regs(struct trapframe *tp, struct reg *regs)
 {
 	regs->r_fs = tp->tf_fs;
 	regs->r_es = tp->tf_es;
 	regs->r_ds = tp->tf_ds;
 	regs->r_edi = tp->tf_edi;
 	regs->r_esi = tp->tf_esi;
 	regs->r_ebp = tp->tf_ebp;
 	regs->r_ebx = tp->tf_ebx;
 	regs->r_edx = tp->tf_edx;
 	regs->r_ecx = tp->tf_ecx;
 	regs->r_eax = tp->tf_eax;
 	regs->r_eip = tp->tf_eip;
 	regs->r_cs = tp->tf_cs;
 	regs->r_eflags = tp->tf_eflags;
 	regs->r_esp = tp->tf_esp;
 	regs->r_ss = tp->tf_ss;
 	return (0);
 }
 
 int
 set_regs(struct thread *td, struct reg *regs)
 {
 	struct pcb *pcb;
 	struct trapframe *tp;
 
 	tp = td->td_frame;
 	if (!EFL_SECURE(regs->r_eflags, tp->tf_eflags) ||
 	    !CS_SECURE(regs->r_cs))
 		return (EINVAL);
 	pcb = td->td_pcb;
 	tp->tf_fs = regs->r_fs;
 	tp->tf_es = regs->r_es;
 	tp->tf_ds = regs->r_ds;
 	tp->tf_edi = regs->r_edi;
 	tp->tf_esi = regs->r_esi;
 	tp->tf_ebp = regs->r_ebp;
 	tp->tf_ebx = regs->r_ebx;
 	tp->tf_edx = regs->r_edx;
 	tp->tf_ecx = regs->r_ecx;
 	tp->tf_eax = regs->r_eax;
 	tp->tf_eip = regs->r_eip;
 	tp->tf_cs = regs->r_cs;
 	tp->tf_eflags = regs->r_eflags;
 	tp->tf_esp = regs->r_esp;
 	tp->tf_ss = regs->r_ss;
 	pcb->pcb_gs = regs->r_gs;
 	return (0);
 }
 
 int
 fill_fpregs(struct thread *td, struct fpreg *fpregs)
 {
 
 	KASSERT(td == curthread || TD_IS_SUSPENDED(td) ||
 	    P_SHOULDSTOP(td->td_proc),
 	    ("not suspended thread %p", td));
 	npxgetregs(td);
 	if (cpu_fxsr)
 		npx_fill_fpregs_xmm(&get_pcb_user_save_td(td)->sv_xmm,
 		    (struct save87 *)fpregs);
 	else
 		bcopy(&get_pcb_user_save_td(td)->sv_87, fpregs,
 		    sizeof(*fpregs));
 	return (0);
 }
 
 int
 set_fpregs(struct thread *td, struct fpreg *fpregs)
 {
 
 	if (cpu_fxsr)
 		npx_set_fpregs_xmm((struct save87 *)fpregs,
 		    &get_pcb_user_save_td(td)->sv_xmm);
 	else
 		bcopy(fpregs, &get_pcb_user_save_td(td)->sv_87,
 		    sizeof(*fpregs));
 	npxuserinited(td);
 	return (0);
 }
 
 /*
  * Get machine context.
  */
 int
 get_mcontext(struct thread *td, mcontext_t *mcp, int flags)
 {
 	struct trapframe *tp;
 	struct segment_descriptor *sdp;
 
 	tp = td->td_frame;
 
 	PROC_LOCK(curthread->td_proc);
 	mcp->mc_onstack = sigonstack(tp->tf_esp);
 	PROC_UNLOCK(curthread->td_proc);
 	mcp->mc_gs = td->td_pcb->pcb_gs;
 	mcp->mc_fs = tp->tf_fs;
 	mcp->mc_es = tp->tf_es;
 	mcp->mc_ds = tp->tf_ds;
 	mcp->mc_edi = tp->tf_edi;
 	mcp->mc_esi = tp->tf_esi;
 	mcp->mc_ebp = tp->tf_ebp;
 	mcp->mc_isp = tp->tf_isp;
 	mcp->mc_eflags = tp->tf_eflags;
 	if (flags & GET_MC_CLEAR_RET) {
 		mcp->mc_eax = 0;
 		mcp->mc_edx = 0;
 		mcp->mc_eflags &= ~PSL_C;
 	} else {
 		mcp->mc_eax = tp->tf_eax;
 		mcp->mc_edx = tp->tf_edx;
 	}
 	mcp->mc_ebx = tp->tf_ebx;
 	mcp->mc_ecx = tp->tf_ecx;
 	mcp->mc_eip = tp->tf_eip;
 	mcp->mc_cs = tp->tf_cs;
 	mcp->mc_esp = tp->tf_esp;
 	mcp->mc_ss = tp->tf_ss;
 	mcp->mc_len = sizeof(*mcp);
 	get_fpcontext(td, mcp, NULL, 0);
 	sdp = &td->td_pcb->pcb_fsd;
 	mcp->mc_fsbase = sdp->sd_hibase << 24 | sdp->sd_lobase;
 	sdp = &td->td_pcb->pcb_gsd;
 	mcp->mc_gsbase = sdp->sd_hibase << 24 | sdp->sd_lobase;
 	mcp->mc_flags = 0;
 	mcp->mc_xfpustate = 0;
 	mcp->mc_xfpustate_len = 0;
 	bzero(mcp->mc_spare2, sizeof(mcp->mc_spare2));
 	return (0);
 }
 
 /*
  * Set machine context.
  *
  * However, we don't set any but the user modifiable flags, and we won't
  * touch the cs selector.
  */
 int
 set_mcontext(struct thread *td, mcontext_t *mcp)
 {
 	struct trapframe *tp;
 	char *xfpustate;
 	int eflags, ret;
 
 	tp = td->td_frame;
 	if (mcp->mc_len != sizeof(*mcp) ||
 	    (mcp->mc_flags & ~_MC_FLAG_MASK) != 0)
 		return (EINVAL);
 	eflags = (mcp->mc_eflags & PSL_USERCHANGE) |
 	    (tp->tf_eflags & ~PSL_USERCHANGE);
 	if (mcp->mc_flags & _MC_HASFPXSTATE) {
 		if (mcp->mc_xfpustate_len > cpu_max_ext_state_size -
 		    sizeof(union savefpu))
 			return (EINVAL);
 		xfpustate = __builtin_alloca(mcp->mc_xfpustate_len);
 		ret = copyin((void *)mcp->mc_xfpustate, xfpustate,
 		    mcp->mc_xfpustate_len);
 		if (ret != 0)
 			return (ret);
 	} else
 		xfpustate = NULL;
 	ret = set_fpcontext(td, mcp, xfpustate, mcp->mc_xfpustate_len);
 	if (ret != 0)
 		return (ret);
 	tp->tf_fs = mcp->mc_fs;
 	tp->tf_es = mcp->mc_es;
 	tp->tf_ds = mcp->mc_ds;
 	tp->tf_edi = mcp->mc_edi;
 	tp->tf_esi = mcp->mc_esi;
 	tp->tf_ebp = mcp->mc_ebp;
 	tp->tf_ebx = mcp->mc_ebx;
 	tp->tf_edx = mcp->mc_edx;
 	tp->tf_ecx = mcp->mc_ecx;
 	tp->tf_eax = mcp->mc_eax;
 	tp->tf_eip = mcp->mc_eip;
 	tp->tf_eflags = eflags;
 	tp->tf_esp = mcp->mc_esp;
 	tp->tf_ss = mcp->mc_ss;
 	td->td_pcb->pcb_gs = mcp->mc_gs;
 	return (0);
 }
 
 static void
 get_fpcontext(struct thread *td, mcontext_t *mcp, char *xfpusave,
     size_t xfpusave_len)
 {
 	size_t max_len, len;
 
 	mcp->mc_ownedfp = npxgetregs(td);
 	bcopy(get_pcb_user_save_td(td), &mcp->mc_fpstate[0],
 	    sizeof(mcp->mc_fpstate));
 	mcp->mc_fpformat = npxformat();
 	if (!use_xsave || xfpusave_len == 0)
 		return;
 	max_len = cpu_max_ext_state_size - sizeof(union savefpu);
 	len = xfpusave_len;
 	if (len > max_len) {
 		len = max_len;
 		bzero(xfpusave + max_len, len - max_len);
 	}
 	mcp->mc_flags |= _MC_HASFPXSTATE;
 	mcp->mc_xfpustate_len = len;
 	bcopy(get_pcb_user_save_td(td) + 1, xfpusave, len);
 }
 
 static int
 set_fpcontext(struct thread *td, mcontext_t *mcp, char *xfpustate,
     size_t xfpustate_len)
 {
 	union savefpu *fpstate;
 	int error;
 
 	if (mcp->mc_fpformat == _MC_FPFMT_NODEV)
 		return (0);
 	else if (mcp->mc_fpformat != _MC_FPFMT_387 &&
 	    mcp->mc_fpformat != _MC_FPFMT_XMM)
 		return (EINVAL);
 	else if (mcp->mc_ownedfp == _MC_FPOWNED_NONE) {
 		/* We don't care what state is left in the FPU or PCB. */
 		fpstate_drop(td);
 		error = 0;
 	} else if (mcp->mc_ownedfp == _MC_FPOWNED_FPU ||
 	    mcp->mc_ownedfp == _MC_FPOWNED_PCB) {
 		fpstate = (union savefpu *)&mcp->mc_fpstate;
 		if (cpu_fxsr)
 			fpstate->sv_xmm.sv_env.en_mxcsr &= cpu_mxcsr_mask;
 		error = npxsetregs(td, fpstate, xfpustate, xfpustate_len);
 	} else
 		return (EINVAL);
 	return (error);
 }
 
 static void
 fpstate_drop(struct thread *td)
 {
 
 	KASSERT(PCB_USER_FPU(td->td_pcb), ("fpstate_drop: kernel-owned fpu"));
 	critical_enter();
 	if (PCPU_GET(fpcurthread) == td)
 		npxdrop();
 	/*
 	 * XXX force a full drop of the npx.  The above only drops it if we
 	 * owned it.  npxgetregs() has the same bug in the !cpu_fxsr case.
 	 *
 	 * XXX I don't much like npxgetregs()'s semantics of doing a full
 	 * drop.  Dropping only to the pcb matches fnsave's behaviour.
 	 * We only need to drop to !PCB_INITDONE in sendsig().  But
 	 * sendsig() is the only caller of npxgetregs()... perhaps we just
 	 * have too many layers.
 	 */
 	curthread->td_pcb->pcb_flags &= ~(PCB_NPXINITDONE |
 	    PCB_NPXUSERINITDONE);
 	critical_exit();
 }
 
 int
 fill_dbregs(struct thread *td, struct dbreg *dbregs)
 {
 	struct pcb *pcb;
 
 	if (td == NULL) {
 		dbregs->dr[0] = rdr0();
 		dbregs->dr[1] = rdr1();
 		dbregs->dr[2] = rdr2();
 		dbregs->dr[3] = rdr3();
 		dbregs->dr[6] = rdr6();
 		dbregs->dr[7] = rdr7();
 	} else {
 		pcb = td->td_pcb;
 		dbregs->dr[0] = pcb->pcb_dr0;
 		dbregs->dr[1] = pcb->pcb_dr1;
 		dbregs->dr[2] = pcb->pcb_dr2;
 		dbregs->dr[3] = pcb->pcb_dr3;
 		dbregs->dr[6] = pcb->pcb_dr6;
 		dbregs->dr[7] = pcb->pcb_dr7;
 	}
 	dbregs->dr[4] = 0;
 	dbregs->dr[5] = 0;
 	return (0);
 }
 
 int
 set_dbregs(struct thread *td, struct dbreg *dbregs)
 {
 	struct pcb *pcb;
 	int i;
 
 	if (td == NULL) {
 		load_dr0(dbregs->dr[0]);
 		load_dr1(dbregs->dr[1]);
 		load_dr2(dbregs->dr[2]);
 		load_dr3(dbregs->dr[3]);
 		load_dr6(dbregs->dr[6]);
 		load_dr7(dbregs->dr[7]);
 	} else {
 		/*
 		 * Don't let an illegal value for dr7 get set.	Specifically,
 		 * check for undefined settings.  Setting these bit patterns
 		 * result in undefined behaviour and can lead to an unexpected
 		 * TRCTRAP.
 		 */
 		for (i = 0; i < 4; i++) {
 			if (DBREG_DR7_ACCESS(dbregs->dr[7], i) == 0x02)
 				return (EINVAL);
 			if (DBREG_DR7_LEN(dbregs->dr[7], i) == 0x02)
 				return (EINVAL);
 		}
 		
 		pcb = td->td_pcb;
 		
 		/*
 		 * Don't let a process set a breakpoint that is not within the
 		 * process's address space.  If a process could do this, it
 		 * could halt the system by setting a breakpoint in the kernel
 		 * (if ddb was enabled).  Thus, we need to check to make sure
 		 * that no breakpoints are being enabled for addresses outside
 		 * process's address space.
 		 *
 		 * XXX - what about when the watched area of the user's
 		 * address space is written into from within the kernel
 		 * ... wouldn't that still cause a breakpoint to be generated
 		 * from within kernel mode?
 		 */
 
 		if (DBREG_DR7_ENABLED(dbregs->dr[7], 0)) {
 			/* dr0 is enabled */
 			if (dbregs->dr[0] >= VM_MAXUSER_ADDRESS)
 				return (EINVAL);
 		}
 			
 		if (DBREG_DR7_ENABLED(dbregs->dr[7], 1)) {
 			/* dr1 is enabled */
 			if (dbregs->dr[1] >= VM_MAXUSER_ADDRESS)
 				return (EINVAL);
 		}
 			
 		if (DBREG_DR7_ENABLED(dbregs->dr[7], 2)) {
 			/* dr2 is enabled */
 			if (dbregs->dr[2] >= VM_MAXUSER_ADDRESS)
 				return (EINVAL);
 		}
 			
 		if (DBREG_DR7_ENABLED(dbregs->dr[7], 3)) {
 			/* dr3 is enabled */
 			if (dbregs->dr[3] >= VM_MAXUSER_ADDRESS)
 				return (EINVAL);
 		}
 
 		pcb->pcb_dr0 = dbregs->dr[0];
 		pcb->pcb_dr1 = dbregs->dr[1];
 		pcb->pcb_dr2 = dbregs->dr[2];
 		pcb->pcb_dr3 = dbregs->dr[3];
 		pcb->pcb_dr6 = dbregs->dr[6];
 		pcb->pcb_dr7 = dbregs->dr[7];
 
 		pcb->pcb_flags |= PCB_DBREGS;
 	}
 
 	return (0);
 }
 
 /*
  * Return > 0 if a hardware breakpoint has been hit, and the
  * breakpoint was in user space.  Return 0, otherwise.
  */
 int
 user_dbreg_trap(void)
 {
         u_int32_t dr7, dr6; /* debug registers dr6 and dr7 */
         u_int32_t bp;       /* breakpoint bits extracted from dr6 */
         int nbp;            /* number of breakpoints that triggered */
         caddr_t addr[4];    /* breakpoint addresses */
         int i;
         
         dr7 = rdr7();
         if ((dr7 & 0x000000ff) == 0) {
                 /*
                  * all GE and LE bits in the dr7 register are zero,
                  * thus the trap couldn't have been caused by the
                  * hardware debug registers
                  */
                 return 0;
         }
 
         nbp = 0;
         dr6 = rdr6();
         bp = dr6 & 0x0000000f;
 
         if (!bp) {
                 /*
                  * None of the breakpoint bits are set meaning this
                  * trap was not caused by any of the debug registers
                  */
                 return 0;
         }
 
         /*
          * at least one of the breakpoints were hit, check to see
          * which ones and if any of them are user space addresses
          */
 
         if (bp & 0x01) {
                 addr[nbp++] = (caddr_t)rdr0();
         }
         if (bp & 0x02) {
                 addr[nbp++] = (caddr_t)rdr1();
         }
         if (bp & 0x04) {
                 addr[nbp++] = (caddr_t)rdr2();
         }
         if (bp & 0x08) {
                 addr[nbp++] = (caddr_t)rdr3();
         }
 
         for (i = 0; i < nbp; i++) {
                 if (addr[i] < (caddr_t)VM_MAXUSER_ADDRESS) {
                         /*
                          * addr[i] is in user space
                          */
                         return nbp;
                 }
         }
 
         /*
          * None of the breakpoints are in user space.
          */
         return 0;
 }
 
 #ifdef KDB
 
 /*
  * Provide inb() and outb() as functions.  They are normally only available as
  * inline functions, thus cannot be called from the debugger.
  */
 
 /* silence compiler warnings */
 u_char inb_(u_short);
 void outb_(u_short, u_char);
 
 u_char
 inb_(u_short port)
 {
 	return inb(port);
 }
 
 void
 outb_(u_short port, u_char data)
 {
 	outb(port, data);
 }
 
 #endif /* KDB */
Index: head/sys/i386/i386/trap.c
===================================================================
--- head/sys/i386/i386/trap.c	(revision 323721)
+++ head/sys/i386/i386/trap.c	(revision 323722)
@@ -1,1110 +1,1091 @@
 /*-
  * Copyright (C) 1994, David Greenman
  * Copyright (c) 1990, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * the University of Utah, and William Jolitz.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	from: @(#)trap.c	7.4 (Berkeley) 5/13/91
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 /*
  * 386 Trap and System call handling
  */
 
 #include "opt_clock.h"
 #include "opt_cpu.h"
 #include "opt_hwpmc_hooks.h"
 #include "opt_isa.h"
 #include "opt_kdb.h"
 #include "opt_stack.h"
 #include "opt_trap.h"
 
 #include <sys/param.h>
 #include <sys/bus.h>
 #include <sys/systm.h>
 #include <sys/proc.h>
 #include <sys/pioctl.h>
 #include <sys/ptrace.h>
 #include <sys/kdb.h>
 #include <sys/kernel.h>
 #include <sys/ktr.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/resourcevar.h>
 #include <sys/signalvar.h>
 #include <sys/syscall.h>
 #include <sys/sysctl.h>
 #include <sys/sysent.h>
 #include <sys/uio.h>
 #include <sys/vmmeter.h>
 #ifdef HWPMC_HOOKS
 #include <sys/pmckern.h>
 PMC_SOFT_DEFINE( , , page_fault, all);
 PMC_SOFT_DEFINE( , , page_fault, read);
 PMC_SOFT_DEFINE( , , page_fault, write);
 #endif
 #include <security/audit/audit.h>
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/pmap.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_map.h>
 #include <vm/vm_page.h>
 #include <vm/vm_extern.h>
 
 #include <machine/cpu.h>
 #include <machine/intr_machdep.h>
 #include <x86/mca.h>
 #include <machine/md_var.h>
 #include <machine/pcb.h>
 #ifdef SMP
 #include <machine/smp.h>
 #endif
 #include <machine/stack.h>
 #include <machine/tss.h>
 #include <machine/vm86.h>
 
 #ifdef POWERFAIL_NMI
 #include <sys/syslog.h>
 #include <machine/clock.h>
 #endif
 
 #ifdef KDTRACE_HOOKS
 #include <sys/dtrace_bsd.h>
 #endif
 
 void trap(struct trapframe *frame);
 void syscall(struct trapframe *frame);
 
 static int trap_pfault(struct trapframe *, int, vm_offset_t);
 static void trap_fatal(struct trapframe *, vm_offset_t);
 void dblfault_handler(void);
 
-extern inthand_t IDTVEC(lcall_syscall);
-
 #define MAX_TRAP_MSG		32
 static char *trap_msg[] = {
 	"",					/*  0 unused */
 	"privileged instruction fault",		/*  1 T_PRIVINFLT */
 	"",					/*  2 unused */
 	"breakpoint instruction fault",		/*  3 T_BPTFLT */
 	"",					/*  4 unused */
 	"",					/*  5 unused */
 	"arithmetic trap",			/*  6 T_ARITHTRAP */
 	"",					/*  7 unused */
 	"",					/*  8 unused */
 	"general protection fault",		/*  9 T_PROTFLT */
 	"trace trap",				/* 10 T_TRCTRAP */
 	"",					/* 11 unused */
 	"page fault",				/* 12 T_PAGEFLT */
 	"",					/* 13 unused */
 	"alignment fault",			/* 14 T_ALIGNFLT */
 	"",					/* 15 unused */
 	"",					/* 16 unused */
 	"",					/* 17 unused */
 	"integer divide fault",			/* 18 T_DIVIDE */
 	"non-maskable interrupt trap",		/* 19 T_NMI */
 	"overflow trap",			/* 20 T_OFLOW */
 	"FPU bounds check fault",		/* 21 T_BOUND */
 	"FPU device not available",		/* 22 T_DNA */
 	"double fault",				/* 23 T_DOUBLEFLT */
 	"FPU operand fetch fault",		/* 24 T_FPOPFLT */
 	"invalid TSS fault",			/* 25 T_TSSFLT */
 	"segment not present fault",		/* 26 T_SEGNPFLT */
 	"stack fault",				/* 27 T_STKFLT */
 	"machine check trap",			/* 28 T_MCHK */
 	"SIMD floating-point exception",	/* 29 T_XMMFLT */
 	"reserved (unknown) fault",		/* 30 T_RESERVED */
 	"",					/* 31 unused (reserved) */
 	"DTrace pid return trap",               /* 32 T_DTRACE_RET */
 };
 
 #if defined(I586_CPU) && !defined(NO_F00F_HACK)
 int has_f00f_bug = 0;		/* Initialized so that it can be patched. */
 #endif
 
 static int prot_fault_translation = 0;
 SYSCTL_INT(_machdep, OID_AUTO, prot_fault_translation, CTLFLAG_RW,
 	&prot_fault_translation, 0, "Select signal to deliver on protection fault");
 static int uprintf_signal;
 SYSCTL_INT(_machdep, OID_AUTO, uprintf_signal, CTLFLAG_RW,
     &uprintf_signal, 0,
     "Print debugging information on trap signal to ctty");
 
 /*
  * Exception, fault, and trap interface to the FreeBSD kernel.
  * This common code is called from assembly language IDT gate entry
  * routines that prepare a suitable stack frame, and restore this
  * frame after the exception has been processed.
  */
 
 void
 trap(struct trapframe *frame)
 {
 #ifdef KDTRACE_HOOKS
 	struct reg regs;
 #endif
 	ksiginfo_t ksi;
 	struct thread *td;
 	struct proc *p;
 #ifdef KDB
 	register_t dr6;
 #endif
 	int signo, ucode;
 	u_int type;
 	register_t addr;
 	vm_offset_t eva;
 #ifdef POWERFAIL_NMI
 	static int lastalert = 0;
 #endif
 
 	td = curthread;
 	p = td->td_proc;
 	signo = 0;
 	ucode = 0;
 	addr = 0;
 
 	VM_CNT_INC(v_trap);
 	type = frame->tf_trapno;
 
 #ifdef SMP
 	/* Handler for NMI IPIs used for stopping CPUs. */
 	if (type == T_NMI && ipi_nmi_handler() == 0)
 		return;
 #endif /* SMP */
 
 #ifdef KDB
 	if (kdb_active) {
 		kdb_reenter();
 		return;
 	}
 #endif
 
 	if (type == T_RESERVED) {
 		trap_fatal(frame, 0);
 		return;
 	}
 
 	if (type == T_NMI) {
 #ifdef HWPMC_HOOKS
 		/*
 		 * CPU PMCs interrupt using an NMI so we check for that first.
 		 * If the HWPMC module is active, 'pmc_hook' will point to
 		 * the function to be called.  A non-zero return value from the
 		 * hook means that the NMI was consumed by it and that we can
 		 * return immediately.
 		 */
 		if (pmc_intr != NULL &&
 		    (*pmc_intr)(PCPU_GET(cpuid), frame) != 0)
 			return;
 #endif
 
 #ifdef STACK
 		if (stack_nmi_handler(frame) != 0)
 			return;
 #endif
 	}
 
 	if (type == T_MCHK) {
 		mca_intr();
 		return;
 	}
 
 #ifdef KDTRACE_HOOKS
 	/*
 	 * A trap can occur while DTrace executes a probe. Before
 	 * executing the probe, DTrace blocks re-scheduling and sets
 	 * a flag in its per-cpu flags to indicate that it doesn't
 	 * want to fault. On returning from the probe, the no-fault
 	 * flag is cleared and finally re-scheduling is enabled.
 	 */
 	if ((type == T_PROTFLT || type == T_PAGEFLT) &&
 	    dtrace_trap_func != NULL && (*dtrace_trap_func)(frame, type))
 		return;
 #endif
 
 	if ((frame->tf_eflags & PSL_I) == 0) {
 		/*
 		 * Buggy application or kernel code has disabled
 		 * interrupts and then trapped.  Enabling interrupts
 		 * now is wrong, but it is better than running with
 		 * interrupts disabled until they are accidentally
 		 * enabled later.
 		 */
 		if (TRAPF_USERMODE(frame) &&
 		    (curpcb->pcb_flags & PCB_VM86CALL) == 0)
 			uprintf(
 			    "pid %ld (%s): trap %d with interrupts disabled\n",
 			    (long)curproc->p_pid, curthread->td_name, type);
 		else if (type != T_NMI && type != T_BPTFLT &&
 		    type != T_TRCTRAP &&
 		    frame->tf_eip != (int)cpu_switch_load_gs) {
 			/*
 			 * XXX not quite right, since this may be for a
 			 * multiple fault in user mode.
 			 */
 			printf("kernel trap %d with interrupts disabled\n",
 			    type);
 			/*
 			 * Page faults need interrupts disabled until later,
 			 * and we shouldn't enable interrupts while holding
 			 * a spin lock.
 			 */
 			if (type != T_PAGEFLT &&
 			    td->td_md.md_spinlock_count == 0)
 				enable_intr();
 		}
 	}
 	eva = 0;
 	if (type == T_PAGEFLT) {
 		/*
 		 * For some Cyrix CPUs, %cr2 is clobbered by
 		 * interrupts.  This problem is worked around by using
 		 * an interrupt gate for the pagefault handler.  We
 		 * are finally ready to read %cr2 and conditionally
 		 * reenable interrupts.  If we hold a spin lock, then
 		 * we must not reenable interrupts.  This might be a
 		 * spurious page fault.
 		 */
 		eva = rcr2();
 		if (td->td_md.md_spinlock_count == 0)
 			enable_intr();
 	}
 
         if (TRAPF_USERMODE(frame) && (curpcb->pcb_flags & PCB_VM86CALL) == 0) {
 		/* user trap */
 
 		td->td_pticks = 0;
 		td->td_frame = frame;
 		addr = frame->tf_eip;
 		if (td->td_cowgen != p->p_cowgen)
 			thread_cow_update(td);
 
 		switch (type) {
 		case T_PRIVINFLT:	/* privileged instruction fault */
 			signo = SIGILL;
 			ucode = ILL_PRVOPC;
 			break;
 
 		case T_BPTFLT:		/* bpt instruction fault */
 		case T_TRCTRAP:		/* trace trap */
 			enable_intr();
 #ifdef KDTRACE_HOOKS
 			if (type == T_BPTFLT) {
 				fill_frame_regs(frame, &regs);
 				if (dtrace_pid_probe_ptr != NULL &&
 				    dtrace_pid_probe_ptr(&regs) == 0)
 					return;
 			}
 #endif
 user_trctrap_out:
 			frame->tf_eflags &= ~PSL_T;
 			signo = SIGTRAP;
 			ucode = (type == T_TRCTRAP ? TRAP_TRACE : TRAP_BRKPT);
 			break;
 
 		case T_ARITHTRAP:	/* arithmetic trap */
 			ucode = npxtrap_x87();
 			if (ucode == -1)
 				return;
 			signo = SIGFPE;
 			break;
 
 		/*
 		 * The following two traps can happen in vm86 mode,
 		 * and, if so, we want to handle them specially.
 		 */
 		case T_PROTFLT:		/* general protection fault */
 		case T_STKFLT:		/* stack fault */
 			if (frame->tf_eflags & PSL_VM) {
 				signo = vm86_emulate((struct vm86frame *)frame);
 				if (signo == SIGTRAP) {
 					type = T_TRCTRAP;
 					load_dr6(rdr6() | 0x4000);
 					goto user_trctrap_out;
 				}
 				if (signo == 0)
 					goto user;
 				break;
 			}
 			signo = SIGBUS;
 			ucode = (type == T_PROTFLT) ? BUS_OBJERR : BUS_ADRERR;
 			break;
 		case T_SEGNPFLT:	/* segment not present fault */
 			signo = SIGBUS;
 			ucode = BUS_ADRERR;
 			break;
 		case T_TSSFLT:		/* invalid TSS fault */
 			signo = SIGBUS;
 			ucode = BUS_OBJERR;
 			break;
 		case T_ALIGNFLT:
 			signo = SIGBUS;
 			ucode = BUS_ADRALN;
 			break;
 		case T_DOUBLEFLT:	/* double fault */
 		default:
 			signo = SIGBUS;
 			ucode = BUS_OBJERR;
 			break;
 
 		case T_PAGEFLT:		/* page fault */
 			signo = trap_pfault(frame, TRUE, eva);
 #if defined(I586_CPU) && !defined(NO_F00F_HACK)
 			if (signo == -2) {
 				/*
 				 * The f00f hack workaround has triggered, so
 				 * treat the fault as an illegal instruction 
 				 * (T_PRIVINFLT) instead of a page fault.
 				 */
 				type = frame->tf_trapno = T_PRIVINFLT;
 
 				/* Proceed as in that case. */
 				ucode = ILL_PRVOPC;
 				signo = SIGILL;
 				break;
 			}
 #endif
 			if (signo == -1)
 				return;
 			if (signo == 0)
 				goto user;
 
 			if (signo == SIGSEGV)
 				ucode = SEGV_MAPERR;
 			else if (prot_fault_translation == 0) {
 				/*
 				 * Autodetect.  This check also covers
 				 * the images without the ABI-tag ELF
 				 * note.
 				 */
 				if (SV_CURPROC_ABI() == SV_ABI_FREEBSD &&
 				    p->p_osrel >= P_OSREL_SIGSEGV) {
 					signo = SIGSEGV;
 					ucode = SEGV_ACCERR;
 				} else {
 					signo = SIGBUS;
 					ucode = BUS_PAGE_FAULT;
 				}
 			} else if (prot_fault_translation == 1) {
 				/*
 				 * Always compat mode.
 				 */
 				signo = SIGBUS;
 				ucode = BUS_PAGE_FAULT;
 			} else {
 				/*
 				 * Always SIGSEGV mode.
 				 */
 				signo = SIGSEGV;
 				ucode = SEGV_ACCERR;
 			}
 			addr = eva;
 			break;
 
 		case T_DIVIDE:		/* integer divide fault */
 			ucode = FPE_INTDIV;
 			signo = SIGFPE;
 			break;
 
 #ifdef DEV_ISA
 		case T_NMI:
 #ifdef POWERFAIL_NMI
 #ifndef TIMER_FREQ
 #  define TIMER_FREQ 1193182
 #endif
 			if (time_second - lastalert > 10) {
 				log(LOG_WARNING, "NMI: power fail\n");
 				sysbeep(880, hz);
 				lastalert = time_second;
 			}
 			return;
 #else /* !POWERFAIL_NMI */
 			nmi_handle_intr(type, frame);
 			return;
 #endif /* POWERFAIL_NMI */
 #endif /* DEV_ISA */
 
 		case T_OFLOW:		/* integer overflow fault */
 			ucode = FPE_INTOVF;
 			signo = SIGFPE;
 			break;
 
 		case T_BOUND:		/* bounds check fault */
 			ucode = FPE_FLTSUB;
 			signo = SIGFPE;
 			break;
 
 		case T_DNA:
 			KASSERT(PCB_USER_FPU(td->td_pcb),
 			    ("kernel FPU ctx has leaked"));
 			/* transparent fault (due to context switch "late") */
 			if (npxdna())
 				return;
 			uprintf("pid %d killed due to lack of floating point\n",
 				p->p_pid);
 			signo = SIGKILL;
 			ucode = 0;
 			break;
 
 		case T_FPOPFLT:		/* FPU operand fetch fault */
 			ucode = ILL_COPROC;
 			signo = SIGILL;
 			break;
 
 		case T_XMMFLT:		/* SIMD floating-point exception */
 			ucode = npxtrap_sse();
 			if (ucode == -1)
 				return;
 			signo = SIGFPE;
 			break;
 #ifdef KDTRACE_HOOKS
 		case T_DTRACE_RET:
 			enable_intr();
 			fill_frame_regs(frame, &regs);
 			if (dtrace_return_probe_ptr != NULL)
 				dtrace_return_probe_ptr(&regs);
 			return;
 #endif
 		}
 	} else {
 		/* kernel trap */
 
 		KASSERT(cold || td->td_ucred != NULL,
 		    ("kernel trap doesn't have ucred"));
 		switch (type) {
 		case T_PAGEFLT:			/* page fault */
 			(void) trap_pfault(frame, FALSE, eva);
 			return;
 
 		case T_DNA:
 			if (PCB_USER_FPU(td->td_pcb))
 				panic("Unregistered use of FPU in kernel");
 			if (npxdna())
 				return;
 			break;
 
 		case T_ARITHTRAP:	/* arithmetic trap */
 		case T_XMMFLT:		/* SIMD floating-point exception */
 		case T_FPOPFLT:		/* FPU operand fetch fault */
 			/*
 			 * XXXKIB for now disable any FPU traps in kernel
 			 * handler registration seems to be overkill
 			 */
 			trap_fatal(frame, 0);
 			return;
 
 			/*
 			 * The following two traps can happen in
 			 * vm86 mode, and, if so, we want to handle
 			 * them specially.
 			 */
 		case T_PROTFLT:		/* general protection fault */
 		case T_STKFLT:		/* stack fault */
 			if (frame->tf_eflags & PSL_VM) {
 				signo = vm86_emulate((struct vm86frame *)frame);
 				if (signo == SIGTRAP) {
 					type = T_TRCTRAP;
 					load_dr6(rdr6() | 0x4000);
 					goto kernel_trctrap;
 				}
 				if (signo != 0)
 					/*
 					 * returns to original process
 					 */
 					vm86_trap((struct vm86frame *)frame);
 				return;
 			}
 			/* FALL THROUGH */
 		case T_SEGNPFLT:	/* segment not present fault */
 			if (curpcb->pcb_flags & PCB_VM86CALL)
 				break;
 
 			/*
 			 * Invalid %fs's and %gs's can be created using
 			 * procfs or PT_SETREGS or by invalidating the
 			 * underlying LDT entry.  This causes a fault
 			 * in kernel mode when the kernel attempts to
 			 * switch contexts.  Lose the bad context
 			 * (XXX) so that we can continue, and generate
 			 * a signal.
 			 */
 			if (frame->tf_eip == (int)cpu_switch_load_gs) {
 				curpcb->pcb_gs = 0;
 #if 0				
 				PROC_LOCK(p);
 				kern_psignal(p, SIGBUS);
 				PROC_UNLOCK(p);
 #endif				
 				return;
 			}
 
 			if (td->td_intr_nesting_level != 0)
 				break;
 
 			/*
 			 * Invalid segment selectors and out of bounds
 			 * %eip's and %esp's can be set up in user mode.
 			 * This causes a fault in kernel mode when the
 			 * kernel tries to return to user mode.  We want
 			 * to get this fault so that we can fix the
 			 * problem here and not have to check all the
 			 * selectors and pointers when the user changes
 			 * them.
 			 */
 			if (frame->tf_eip == (int)doreti_iret) {
 				frame->tf_eip = (int)doreti_iret_fault;
 				return;
 			}
 			if (type == T_STKFLT)
 				break;
 
 			if (frame->tf_eip == (int)doreti_popl_ds) {
 				frame->tf_eip = (int)doreti_popl_ds_fault;
 				return;
 			}
 			if (frame->tf_eip == (int)doreti_popl_es) {
 				frame->tf_eip = (int)doreti_popl_es_fault;
 				return;
 			}
 			if (frame->tf_eip == (int)doreti_popl_fs) {
 				frame->tf_eip = (int)doreti_popl_fs_fault;
 				return;
 			}
 			if (curpcb->pcb_onfault != NULL) {
 				frame->tf_eip = (int)curpcb->pcb_onfault;
 				return;
 			}
 			break;
 
 		case T_TSSFLT:
 			/*
 			 * PSL_NT can be set in user mode and isn't cleared
 			 * automatically when the kernel is entered.  This
 			 * causes a TSS fault when the kernel attempts to
 			 * `iret' because the TSS link is uninitialized.  We
 			 * want to get this fault so that we can fix the
 			 * problem here and not every time the kernel is
 			 * entered.
 			 */
 			if (frame->tf_eflags & PSL_NT) {
 				frame->tf_eflags &= ~PSL_NT;
 				return;
 			}
 			break;
 
 		case T_TRCTRAP:	 /* trace trap */
 kernel_trctrap:
-			if (frame->tf_eip == (int)IDTVEC(lcall_syscall)) {
-				/*
-				 * We've just entered system mode via the
-				 * syscall lcall.  Continue single stepping
-				 * silently until the syscall handler has
-				 * saved the flags.
-				 */
-				return;
-			}
-			if (frame->tf_eip == (int)IDTVEC(lcall_syscall) + 1) {
-				/*
-				 * The syscall handler has now saved the
-				 * flags.  Stop single stepping it.
-				 */
-				frame->tf_eflags &= ~PSL_T;
-				return;
-			}
 			/*
 			 * Ignore debug register trace traps due to
 			 * accesses in the user's address space, which
 			 * can happen under several conditions such as
 			 * if a user sets a watchpoint on a buffer and
 			 * then passes that buffer to a system call.
 			 * We still want to get TRCTRAPS for addresses
 			 * in kernel space because that is useful when
 			 * debugging the kernel.
 			 */
 			if (user_dbreg_trap() && 
 			   !(curpcb->pcb_flags & PCB_VM86CALL)) {
 				/*
 				 * Reset breakpoint bits because the
 				 * processor doesn't
 				 */
 				load_dr6(rdr6() & ~0xf);
 				return;
 			}
 			/*
 			 * FALLTHROUGH (TRCTRAP kernel mode, kernel address)
 			 */
 		case T_BPTFLT:
 			/*
 			 * If KDB is enabled, let it handle the debugger trap.
 			 * Otherwise, debugger traps "can't happen".
 			 */
 #ifdef KDB
 			/* XXX %dr6 is not quite reentrant. */
 			dr6 = rdr6();
 			load_dr6(dr6 & ~0x4000);
 			if (kdb_trap(type, dr6, frame))
 				return;
 #endif
 			break;
 
 #ifdef DEV_ISA
 		case T_NMI:
 #ifdef POWERFAIL_NMI
 			if (time_second - lastalert > 10) {
 				log(LOG_WARNING, "NMI: power fail\n");
 				sysbeep(880, hz);
 				lastalert = time_second;
 			}
 			return;
 #else /* !POWERFAIL_NMI */
 			nmi_handle_intr(type, frame);
 			return;
 #endif /* POWERFAIL_NMI */
 #endif /* DEV_ISA */
 		}
 
 		trap_fatal(frame, eva);
 		return;
 	}
 
 	/* Translate fault for emulators (e.g. Linux) */
 	if (*p->p_sysent->sv_transtrap != NULL)
 		signo = (*p->p_sysent->sv_transtrap)(signo, type);
 
 	ksiginfo_init_trap(&ksi);
 	ksi.ksi_signo = signo;
 	ksi.ksi_code = ucode;
 	ksi.ksi_addr = (void *)addr;
 	ksi.ksi_trapno = type;
 	if (uprintf_signal) {
 		uprintf("pid %d comm %s: signal %d err %x code %d type %d "
 		    "addr 0x%x esp 0x%08x eip 0x%08x "
 		    "<%02x %02x %02x %02x %02x %02x %02x %02x>\n",
 		    p->p_pid, p->p_comm, signo, frame->tf_err, ucode, type,
 		    addr, frame->tf_esp, frame->tf_eip,
 		    fubyte((void *)(frame->tf_eip + 0)),
 		    fubyte((void *)(frame->tf_eip + 1)),
 		    fubyte((void *)(frame->tf_eip + 2)),
 		    fubyte((void *)(frame->tf_eip + 3)),
 		    fubyte((void *)(frame->tf_eip + 4)),
 		    fubyte((void *)(frame->tf_eip + 5)),
 		    fubyte((void *)(frame->tf_eip + 6)),
 		    fubyte((void *)(frame->tf_eip + 7)));
 	}
 	KASSERT((read_eflags() & PSL_I) != 0, ("interrupts disabled"));
 	trapsignal(td, &ksi);
 
 user:
 	userret(td, frame);
 	KASSERT(PCB_USER_FPU(td->td_pcb),
 	    ("Return from trap with kernel FPU ctx leaked"));
 }
 
 static int
 trap_pfault(struct trapframe *frame, int usermode, vm_offset_t eva)
 {
 	struct thread *td;
 	struct proc *p;
 	vm_offset_t va;
 	vm_map_t map;
 	int rv;
 	vm_prot_t ftype;
 
 	td = curthread;
 	p = td->td_proc;
 	rv = 0;
 
 	if (__predict_false((td->td_pflags & TDP_NOFAULTING) != 0)) {
 		/*
 		 * Due to both processor errata and lazy TLB invalidation when
 		 * access restrictions are removed from virtual pages, memory
 		 * accesses that are allowed by the physical mapping layer may
 		 * nonetheless cause one spurious page fault per virtual page. 
 		 * When the thread is executing a "no faulting" section that
 		 * is bracketed by vm_fault_{disable,enable}_pagefaults(),
 		 * every page fault is treated as a spurious page fault,
 		 * unless it accesses the same virtual address as the most
 		 * recent page fault within the same "no faulting" section.
 		 */
 		if (td->td_md.md_spurflt_addr != eva ||
 		    (td->td_pflags & TDP_RESETSPUR) != 0) {
 			/*
 			 * Do nothing to the TLB.  A stale TLB entry is
 			 * flushed automatically by a page fault.
 			 */
 			td->td_md.md_spurflt_addr = eva;
 			td->td_pflags &= ~TDP_RESETSPUR;
 			return (0);
 		}
 	} else {
 		/*
 		 * If we get a page fault while in a critical section, then
 		 * it is most likely a fatal kernel page fault.  The kernel
 		 * is already going to panic trying to get a sleep lock to
 		 * do the VM lookup, so just consider it a fatal trap so the
 		 * kernel can print out a useful trap message and even get
 		 * to the debugger.
 		 *
 		 * If we get a page fault while holding a non-sleepable
 		 * lock, then it is most likely a fatal kernel page fault.
 		 * If WITNESS is enabled, then it's going to whine about
 		 * bogus LORs with various VM locks, so just skip to the
 		 * fatal trap handling directly.
 		 */
 		if (td->td_critnest != 0 ||
 		    WITNESS_CHECK(WARN_SLEEPOK | WARN_GIANTOK, NULL,
 		    "Kernel page fault") != 0) {
 			trap_fatal(frame, eva);
 			return (-1);
 		}
 	}
 	va = trunc_page(eva);
 	if (va >= KERNBASE) {
 		/*
 		 * Don't allow user-mode faults in kernel address space.
 		 * An exception:  if the faulting address is the invalid
 		 * instruction entry in the IDT, then the Intel Pentium
 		 * F00F bug workaround was triggered, and we need to
 		 * treat it is as an illegal instruction, and not a page
 		 * fault.
 		 */
 #if defined(I586_CPU) && !defined(NO_F00F_HACK)
 		if ((eva == (unsigned int)&idt[6]) && has_f00f_bug)
 			return (-2);
 #endif
 		if (usermode)
 			goto nogo;
 
 		map = kernel_map;
 	} else {
 		map = &p->p_vmspace->vm_map;
 
 		/*
 		 * When accessing a user-space address, kernel must be
 		 * ready to accept the page fault, and provide a
 		 * handling routine.  Since accessing the address
 		 * without the handler is a bug, do not try to handle
 		 * it normally, and panic immediately.
 		 */
 		if (!usermode && (td->td_intr_nesting_level != 0 ||
 		    curpcb->pcb_onfault == NULL)) {
 			trap_fatal(frame, eva);
 			return (-1);
 		}
 	}
 
 	/*
 	 * If the trap was caused by errant bits in the PTE then panic.
 	 */
 	if (frame->tf_err & PGEX_RSV) {
 		trap_fatal(frame, eva);
 		return (-1);
 	}
 
 	/*
 	 * PGEX_I is defined only if the execute disable bit capability is
 	 * supported and enabled.
 	 */
 	if (frame->tf_err & PGEX_W)
 		ftype = VM_PROT_WRITE;
 #if defined(PAE) || defined(PAE_TABLES)
 	else if ((frame->tf_err & PGEX_I) && pg_nx != 0)
 		ftype = VM_PROT_EXECUTE;
 #endif
 	else
 		ftype = VM_PROT_READ;
 
 	/* Fault in the page. */
 	rv = vm_fault(map, va, ftype, VM_FAULT_NORMAL);
 	if (rv == KERN_SUCCESS) {
 #ifdef HWPMC_HOOKS
 		if (ftype == VM_PROT_READ || ftype == VM_PROT_WRITE) {
 			PMC_SOFT_CALL_TF( , , page_fault, all, frame);
 			if (ftype == VM_PROT_READ)
 				PMC_SOFT_CALL_TF( , , page_fault, read,
 				    frame);
 			else
 				PMC_SOFT_CALL_TF( , , page_fault, write,
 				    frame);
 		}
 #endif
 		return (0);
 	}
 nogo:
 	if (!usermode) {
 		if (td->td_intr_nesting_level == 0 &&
 		    curpcb->pcb_onfault != NULL) {
 			frame->tf_eip = (int)curpcb->pcb_onfault;
 			return (0);
 		}
 		trap_fatal(frame, eva);
 		return (-1);
 	}
 	return ((rv == KERN_PROTECTION_FAILURE) ? SIGBUS : SIGSEGV);
 }
 
 static void
 trap_fatal(frame, eva)
 	struct trapframe *frame;
 	vm_offset_t eva;
 {
 	int code, ss, esp;
 	u_int type;
 	struct soft_segment_descriptor softseg;
 	char *msg;
 
 	code = frame->tf_err;
 	type = frame->tf_trapno;
 	sdtossd(&gdt[IDXSEL(frame->tf_cs & 0xffff)].sd, &softseg);
 
 	if (type <= MAX_TRAP_MSG)
 		msg = trap_msg[type];
 	else
 		msg = "UNKNOWN";
 	printf("\n\nFatal trap %d: %s while in %s mode\n", type, msg,
 	    frame->tf_eflags & PSL_VM ? "vm86" :
 	    ISPL(frame->tf_cs) == SEL_UPL ? "user" : "kernel");
 #ifdef SMP
 	/* two separate prints in case of a trap on an unmapped page */
 	printf("cpuid = %d; ", PCPU_GET(cpuid));
 	printf("apic id = %02x\n", PCPU_GET(apic_id));
 #endif
 	if (type == T_PAGEFLT) {
 		printf("fault virtual address	= 0x%x\n", eva);
 		printf("fault code		= %s %s%s, %s\n",
 			code & PGEX_U ? "user" : "supervisor",
 			code & PGEX_W ? "write" : "read",
 #if defined(PAE) || defined(PAE_TABLES)
 			pg_nx != 0 ?
 			(code & PGEX_I ? " instruction" : " data") :
 #endif
 			"",
 			code & PGEX_RSV ? "reserved bits in PTE" :
 			code & PGEX_P ? "protection violation" : "page not present");
 	}
 	printf("instruction pointer	= 0x%x:0x%x\n",
 	       frame->tf_cs & 0xffff, frame->tf_eip);
         if (TF_HAS_STACKREGS(frame)) {
 		ss = frame->tf_ss & 0xffff;
 		esp = frame->tf_esp;
 	} else {
 		ss = GSEL(GDATA_SEL, SEL_KPL);
 		esp = (int)&frame->tf_esp;
 	}
 	printf("stack pointer	        = 0x%x:0x%x\n", ss, esp);
 	printf("frame pointer	        = 0x%x:0x%x\n", ss, frame->tf_ebp);
 	printf("code segment		= base 0x%x, limit 0x%x, type 0x%x\n",
 	       softseg.ssd_base, softseg.ssd_limit, softseg.ssd_type);
 	printf("			= DPL %d, pres %d, def32 %d, gran %d\n",
 	       softseg.ssd_dpl, softseg.ssd_p, softseg.ssd_def32,
 	       softseg.ssd_gran);
 	printf("processor eflags	= ");
 	if (frame->tf_eflags & PSL_T)
 		printf("trace trap, ");
 	if (frame->tf_eflags & PSL_I)
 		printf("interrupt enabled, ");
 	if (frame->tf_eflags & PSL_NT)
 		printf("nested task, ");
 	if (frame->tf_eflags & PSL_RF)
 		printf("resume, ");
 	if (frame->tf_eflags & PSL_VM)
 		printf("vm86, ");
 	printf("IOPL = %d\n", (frame->tf_eflags & PSL_IOPL) >> 12);
 	printf("current process		= %d (%s)\n",
 	    curproc->p_pid, curthread->td_name);
 
 #ifdef KDB
 	if (debugger_on_panic || kdb_active) {
 		frame->tf_err = eva;	/* smuggle fault address to ddb */
 		if (kdb_trap(type, 0, frame)) {
 			frame->tf_err = code;	/* restore error code */
 			return;
 		}
 		frame->tf_err = code;		/* restore error code */
 	}
 #endif
 	printf("trap number		= %d\n", type);
 	if (type <= MAX_TRAP_MSG)
 		panic("%s", trap_msg[type]);
 	else
 		panic("unknown/reserved trap");
 }
 
 /*
  * Double fault handler. Called when a fault occurs while writing
  * a frame for a trap/exception onto the stack. This usually occurs
  * when the stack overflows (such is the case with infinite recursion,
  * for example).
  *
  * XXX Note that the current PTD gets replaced by IdlePTD when the
  * task switch occurs. This means that the stack that was active at
  * the time of the double fault is not available at <kstack> unless
  * the machine was idle when the double fault occurred. The downside
  * of this is that "trace <ebp>" in ddb won't work.
  */
 void
 dblfault_handler()
 {
 #ifdef KDTRACE_HOOKS
 	if (dtrace_doubletrap_func != NULL)
 		(*dtrace_doubletrap_func)();
 #endif
 	printf("\nFatal double fault:\n");
 	printf("eip = 0x%x\n", PCPU_GET(common_tss.tss_eip));
 	printf("esp = 0x%x\n", PCPU_GET(common_tss.tss_esp));
 	printf("ebp = 0x%x\n", PCPU_GET(common_tss.tss_ebp));
 #ifdef SMP
 	/* two separate prints in case of a trap on an unmapped page */
 	printf("cpuid = %d; ", PCPU_GET(cpuid));
 	printf("apic id = %02x\n", PCPU_GET(apic_id));
 #endif
 	panic("double fault");
 }
 
 int
 cpu_fetch_syscall_args(struct thread *td)
 {
 	struct proc *p;
 	struct trapframe *frame;
 	struct syscall_args *sa;
 	caddr_t params;
 	long tmp;
 	int error;
 
 	p = td->td_proc;
 	frame = td->td_frame;
 	sa = &td->td_sa;
 
 	params = (caddr_t)frame->tf_esp + sizeof(int);
 	sa->code = frame->tf_eax;
 
 	/*
 	 * Need to check if this is a 32 bit or 64 bit syscall.
 	 */
 	if (sa->code == SYS_syscall) {
 		/*
 		 * Code is first argument, followed by actual args.
 		 */
 		error = fueword(params, &tmp);
 		if (error == -1)
 			return (EFAULT);
 		sa->code = tmp;
 		params += sizeof(int);
 	} else if (sa->code == SYS___syscall) {
 		/*
 		 * Like syscall, but code is a quad, so as to maintain
 		 * quad alignment for the rest of the arguments.
 		 */
 		error = fueword(params, &tmp);
 		if (error == -1)
 			return (EFAULT);
 		sa->code = tmp;
 		params += sizeof(quad_t);
 	}
 
  	if (p->p_sysent->sv_mask)
  		sa->code &= p->p_sysent->sv_mask;
  	if (sa->code >= p->p_sysent->sv_size)
  		sa->callp = &p->p_sysent->sv_table[0];
   	else
  		sa->callp = &p->p_sysent->sv_table[sa->code];
 	sa->narg = sa->callp->sy_narg;
 
 	if (params != NULL && sa->narg != 0)
 		error = copyin(params, (caddr_t)sa->args,
 		    (u_int)(sa->narg * sizeof(int)));
 	else
 		error = 0;
 
 	if (error == 0) {
 		td->td_retval[0] = 0;
 		td->td_retval[1] = frame->tf_edx;
 	}
 		
 	return (error);
 }
 
 #include "../../kern/subr_syscall.c"
 
 /*
  * syscall - system call request C handler.  A system call is
  * essentially treated as a trap by reusing the frame layout.
  */
 void
 syscall(struct trapframe *frame)
 {
 	struct thread *td;
 	register_t orig_tf_eflags;
 	int error;
 	ksiginfo_t ksi;
 
 #ifdef DIAGNOSTIC
 	if (!(TRAPF_USERMODE(frame) &&
 	    (curpcb->pcb_flags & PCB_VM86CALL) == 0)) {
 		panic("syscall");
 		/* NOT REACHED */
 	}
 #endif
 	orig_tf_eflags = frame->tf_eflags;
 
 	td = curthread;
 	td->td_frame = frame;
 
 	error = syscallenter(td);
 
 	/*
 	 * Traced syscall.
 	 */
 	if ((orig_tf_eflags & PSL_T) && !(orig_tf_eflags & PSL_VM)) {
 		frame->tf_eflags &= ~PSL_T;
 		ksiginfo_init_trap(&ksi);
 		ksi.ksi_signo = SIGTRAP;
 		ksi.ksi_code = TRAP_TRACE;
 		ksi.ksi_addr = (void *)frame->tf_eip;
 		trapsignal(td, &ksi);
 	}
 
 	KASSERT(PCB_USER_FPU(td->td_pcb),
 	    ("System call %s returning with kernel FPU ctx leaked",
 	     syscallname(td->td_proc, td->td_sa.code)));
 	KASSERT(td->td_pcb->pcb_save == get_pcb_user_save_td(td),
 	    ("System call %s returning with mangled pcb_save",
 	     syscallname(td->td_proc, td->td_sa.code)));
 
 	syscallret(td, error);
 }
Index: head/sys/i386/include/md_var.h
===================================================================
--- head/sys/i386/include/md_var.h	(revision 323721)
+++ head/sys/i386/include/md_var.h	(revision 323722)
@@ -1,73 +1,74 @@
 /*-
  * Copyright (c) 1995 Bruce D. Evans.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the author nor the names of contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef _MACHINE_MD_VAR_H_
 #define	_MACHINE_MD_VAR_H_
 
 #include <x86/x86_var.h>
 
 extern	u_int	cyrix_did;
 #if defined(I586_CPU) && !defined(NO_F00F_HACK)
 extern	int	has_f00f_bug;
 #endif
 #ifdef COMPAT_FREEBSD4
 extern	int	szfreebsd4_sigcode;
 #endif
 #ifdef COMPAT_43
 extern	int	szosigcode;
+extern	int	szlcallcode;
 #endif
 extern	uint32_t *vm_page_dump;
 
 struct	segment_descriptor;
 union savefpu;
 
 void	bcopyb(const void *from, void *to, size_t len);
 void	cpu_switch_load_gs(void) __asm(__STRING(cpu_switch_load_gs));
 void	doreti_iret(void) __asm(__STRING(doreti_iret));
 void	doreti_iret_fault(void) __asm(__STRING(doreti_iret_fault));
 void	doreti_popl_ds(void) __asm(__STRING(doreti_popl_ds));
 void	doreti_popl_ds_fault(void) __asm(__STRING(doreti_popl_ds_fault));
 void	doreti_popl_es(void) __asm(__STRING(doreti_popl_es));
 void	doreti_popl_es_fault(void) __asm(__STRING(doreti_popl_es_fault));
 void	doreti_popl_fs(void) __asm(__STRING(doreti_popl_fs));
 void	doreti_popl_fs_fault(void) __asm(__STRING(doreti_popl_fs_fault));
 void	fill_based_sd(struct segment_descriptor *sdp, uint32_t base);
 void	i686_pagezero(void *addr);
 void	sse2_pagezero(void *addr);
 void	init_AMD_Elan_sc520(void);
 vm_paddr_t kvtop(void *addr);
 void	panicifcpuunsupported(void);
 void	ppro_reenable_apic(void);
 void	setidt(int idx, alias_for_inthand_t *func, int typ, int dpl, int selec);
 union savefpu *get_pcb_user_save_td(struct thread *td);
 union savefpu *get_pcb_user_save_pcb(struct pcb *pcb);
 
 #endif /* !_MACHINE_MD_VAR_H_ */
Index: head/sys/i386/isa/npx.c
===================================================================
--- head/sys/i386/isa/npx.c	(revision 323721)
+++ head/sys/i386/isa/npx.c	(revision 323722)
@@ -1,1428 +1,1428 @@
 /*-
  * Copyright (c) 1990 William Jolitz.
  * Copyright (c) 1991 The Regents of the University of California.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	from: @(#)npx.c	7.2 (Berkeley) 5/12/91
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_cpu.h"
 #include "opt_isa.h"
 #include "opt_npx.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/bus.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/module.h>
 #include <sys/mutex.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/smp.h>
 #include <sys/sysctl.h>
 #include <machine/bus.h>
 #include <sys/rman.h>
 #ifdef NPX_DEBUG
 #include <sys/syslog.h>
 #endif
 #include <sys/signalvar.h>
 #include <vm/uma.h>
 
 #include <machine/asmacros.h>
 #include <machine/cputypes.h>
 #include <machine/frame.h>
 #include <machine/md_var.h>
 #include <machine/pcb.h>
 #include <machine/psl.h>
 #include <machine/resource.h>
 #include <machine/specialreg.h>
 #include <machine/segments.h>
 #include <machine/ucontext.h>
 
 #include <machine/intr_machdep.h>
 
 #ifdef DEV_ISA
 #include <isa/isavar.h>
 #endif
 
 /*
  * 387 and 287 Numeric Coprocessor Extension (NPX) Driver.
  */
 
 #if defined(__GNUCLIKE_ASM) && !defined(lint)
 
 #define	fldcw(cw)		__asm __volatile("fldcw %0" : : "m" (cw))
 #define	fnclex()		__asm __volatile("fnclex")
 #define	fninit()		__asm __volatile("fninit")
 #define	fnsave(addr)		__asm __volatile("fnsave %0" : "=m" (*(addr)))
 #define	fnstcw(addr)		__asm __volatile("fnstcw %0" : "=m" (*(addr)))
 #define	fnstsw(addr)		__asm __volatile("fnstsw %0" : "=am" (*(addr)))
 #define	fp_divide_by_0()	__asm __volatile( \
 				    "fldz; fld1; fdiv %st,%st(1); fnop")
 #define	frstor(addr)		__asm __volatile("frstor %0" : : "m" (*(addr)))
 #define	fxrstor(addr)		__asm __volatile("fxrstor %0" : : "m" (*(addr)))
 #define	fxsave(addr)		__asm __volatile("fxsave %0" : "=m" (*(addr)))
 #define	ldmxcsr(csr)		__asm __volatile("ldmxcsr %0" : : "m" (csr))
 #define	stmxcsr(addr)		__asm __volatile("stmxcsr %0" : : "m" (*(addr)))
 
 static __inline void
 xrstor(char *addr, uint64_t mask)
 {
 	uint32_t low, hi;
 
 	low = mask;
 	hi = mask >> 32;
 	__asm __volatile("xrstor %0" : : "m" (*addr), "a" (low), "d" (hi));
 }
 
 static __inline void
 xsave(char *addr, uint64_t mask)
 {
 	uint32_t low, hi;
 
 	low = mask;
 	hi = mask >> 32;
 	__asm __volatile("xsave %0" : "=m" (*addr) : "a" (low), "d" (hi) :
 	    "memory");
 }
 
 static __inline void
 xsaveopt(char *addr, uint64_t mask)
 {
 	uint32_t low, hi;
 
 	low = mask;
 	hi = mask >> 32;
 	__asm __volatile("xsaveopt %0" : "=m" (*addr) : "a" (low), "d" (hi) :
 	    "memory");
 }
 #else	/* !(__GNUCLIKE_ASM && !lint) */
 
 void	fldcw(u_short cw);
 void	fnclex(void);
 void	fninit(void);
 void	fnsave(caddr_t addr);
 void	fnstcw(caddr_t addr);
 void	fnstsw(caddr_t addr);
 void	fp_divide_by_0(void);
 void	frstor(caddr_t addr);
 void	fxsave(caddr_t addr);
 void	fxrstor(caddr_t addr);
 void	ldmxcsr(u_int csr);
 void	stmxcsr(u_int *csr);
 void	xrstor(char *addr, uint64_t mask);
 void	xsave(char *addr, uint64_t mask);
 void	xsaveopt(char *addr, uint64_t mask);
 
 #endif	/* __GNUCLIKE_ASM && !lint */
 
 #define	start_emulating()	load_cr0(rcr0() | CR0_TS)
 #define	stop_emulating()	clts()
 
 #define GET_FPU_CW(thread) \
 	(cpu_fxsr ? \
 		(thread)->td_pcb->pcb_save->sv_xmm.sv_env.en_cw : \
 		(thread)->td_pcb->pcb_save->sv_87.sv_env.en_cw)
 #define GET_FPU_SW(thread) \
 	(cpu_fxsr ? \
 		(thread)->td_pcb->pcb_save->sv_xmm.sv_env.en_sw : \
 		(thread)->td_pcb->pcb_save->sv_87.sv_env.en_sw)
 #define SET_FPU_CW(savefpu, value) do { \
 	if (cpu_fxsr) \
 		(savefpu)->sv_xmm.sv_env.en_cw = (value); \
 	else \
 		(savefpu)->sv_87.sv_env.en_cw = (value); \
 } while (0)
 
 CTASSERT(sizeof(union savefpu) == 512);
 CTASSERT(sizeof(struct xstate_hdr) == 64);
 CTASSERT(sizeof(struct savefpu_ymm) == 832);
 
 /*
  * This requirement is to make it easier for asm code to calculate
  * offset of the fpu save area from the pcb address. FPU save area
  * must be 64-byte aligned.
  */
 CTASSERT(sizeof(struct pcb) % XSAVE_AREA_ALIGN == 0);
 
 /*
  * Ensure the copy of XCR0 saved in a core is contained in the padding
  * area.
  */
 CTASSERT(X86_XSTATE_XCR0_OFFSET >= offsetof(struct savexmm, sv_pad) &&
     X86_XSTATE_XCR0_OFFSET + sizeof(uint64_t) <= sizeof(struct savexmm));
 
 static	void	fpu_clean_state(void);
 
 static	void	fpusave(union savefpu *);
 static	void	fpurstor(union savefpu *);
 
 int	hw_float;
 
 SYSCTL_INT(_hw, HW_FLOATINGPT, floatingpoint, CTLFLAG_RD,
     &hw_float, 0, "Floating point instructions executed in hardware");
 
 int use_xsave;
 uint64_t xsave_mask;
 static	uma_zone_t fpu_save_area_zone;
 static	union savefpu *npx_initialstate;
 
 struct xsave_area_elm_descr {
 	u_int	offset;
 	u_int	size;
 } *xsave_area_desc;
 
 static int use_xsaveopt;
 
 static	volatile u_int		npx_traps_while_probing;
 
 alias_for_inthand_t probetrap;
 __asm("								\n\
 	.text							\n\
 	.p2align 2,0x90						\n\
 	.type	" __XSTRING(CNAME(probetrap)) ",@function	\n\
 " __XSTRING(CNAME(probetrap)) ":				\n\
 	ss							\n\
 	incl	" __XSTRING(CNAME(npx_traps_while_probing)) "	\n\
 	fnclex							\n\
 	iret							\n\
 ");
 
 /*
  * Determine if an FPU is present and how to use it.
  */
 static int
 npx_probe(void)
 {
 	struct gate_descriptor save_idt_npxtrap;
 	u_short control, status;
 
 	/*
 	 * Modern CPUs all have an FPU that uses the INT16 interface
 	 * and provide a simple way to verify that, so handle the
 	 * common case right away.
 	 */
 	if (cpu_feature & CPUID_FPU) {
 		hw_float = 1;
 		return (1);
 	}
 
 	save_idt_npxtrap = idt[IDT_MF];
-	setidt(IDT_MF, probetrap, SDT_SYS386TGT, SEL_KPL,
+	setidt(IDT_MF, probetrap, SDT_SYS386IGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 
 	/*
 	 * Don't trap while we're probing.
 	 */
 	stop_emulating();
 
 	/*
 	 * Finish resetting the coprocessor, if any.  If there is an error
 	 * pending, then we may get a bogus IRQ13, but npx_intr() will handle
 	 * it OK.  Bogus halts have never been observed, but we enabled
 	 * IRQ13 and cleared the BUSY# latch early to handle them anyway.
 	 */
 	fninit();
 
 	/*
 	 * Don't use fwait here because it might hang.
 	 * Don't use fnop here because it usually hangs if there is no FPU.
 	 */
 	DELAY(1000);		/* wait for any IRQ13 */
 #ifdef DIAGNOSTIC
 	if (npx_traps_while_probing != 0)
 		printf("fninit caused %u bogus npx trap(s)\n",
 		       npx_traps_while_probing);
 #endif
 	/*
 	 * Check for a status of mostly zero.
 	 */
 	status = 0x5a5a;
 	fnstsw(&status);
 	if ((status & 0xb8ff) == 0) {
 		/*
 		 * Good, now check for a proper control word.
 		 */
 		control = 0x5a5a;
 		fnstcw(&control);
 		if ((control & 0x1f3f) == 0x033f) {
 			/*
 			 * We have an npx, now divide by 0 to see if exception
 			 * 16 works.
 			 */
 			control &= ~(1 << 2);	/* enable divide by 0 trap */
 			fldcw(control);
 			npx_traps_while_probing = 0;
 			fp_divide_by_0();
 			if (npx_traps_while_probing != 0) {
 				/*
 				 * Good, exception 16 works.
 				 */
 				hw_float = 1;
 				goto cleanup;
 			}
 			printf(
 	"FPU does not use exception 16 for error reporting\n");
 			goto cleanup;
 		}
 	}
 
 	/*
 	 * Probe failed.  Floating point simply won't work.
 	 * Notify user and disable FPU/MMX/SSE instruction execution.
 	 */
 	printf("WARNING: no FPU!\n");
 	__asm __volatile("smsw %%ax; orb %0,%%al; lmsw %%ax" : :
 	    "n" (CR0_EM | CR0_MP) : "ax");
 
 cleanup:
 	idt[IDT_MF] = save_idt_npxtrap;
 	return (hw_float);
 }
 
 /*
  * Enable XSAVE if supported and allowed by user.
  * Calculate the xsave_mask.
  */
 static void
 npxinit_bsp1(void)
 {
 	u_int cp[4];
 	uint64_t xsave_mask_user;
 
 	if (cpu_fxsr && (cpu_feature2 & CPUID2_XSAVE) != 0) {
 		use_xsave = 1;
 		TUNABLE_INT_FETCH("hw.use_xsave", &use_xsave);
 	}
 	if (!use_xsave)
 		return;
 
 	cpuid_count(0xd, 0x0, cp);
 	xsave_mask = XFEATURE_ENABLED_X87 | XFEATURE_ENABLED_SSE;
 	if ((cp[0] & xsave_mask) != xsave_mask)
 		panic("CPU0 does not support X87 or SSE: %x", cp[0]);
 	xsave_mask = ((uint64_t)cp[3] << 32) | cp[0];
 	xsave_mask_user = xsave_mask;
 	TUNABLE_QUAD_FETCH("hw.xsave_mask", &xsave_mask_user);
 	xsave_mask_user |= XFEATURE_ENABLED_X87 | XFEATURE_ENABLED_SSE;
 	xsave_mask &= xsave_mask_user;
 	if ((xsave_mask & XFEATURE_AVX512) != XFEATURE_AVX512)
 		xsave_mask &= ~XFEATURE_AVX512;
 	if ((xsave_mask & XFEATURE_MPX) != XFEATURE_MPX)
 		xsave_mask &= ~XFEATURE_MPX;
 
 	cpuid_count(0xd, 0x1, cp);
 	if ((cp[0] & CPUID_EXTSTATE_XSAVEOPT) != 0)
 		use_xsaveopt = 1;
 }
 
 /*
 
  * Calculate the fpu save area size.
  */
 static void
 npxinit_bsp2(void)
 {
 	u_int cp[4];
 
 	if (use_xsave) {
 		cpuid_count(0xd, 0x0, cp);
 		cpu_max_ext_state_size = cp[1];
 
 		/*
 		 * Reload the cpu_feature2, since we enabled OSXSAVE.
 		 */
 		do_cpuid(1, cp);
 		cpu_feature2 = cp[2];
 	} else
 		cpu_max_ext_state_size = sizeof(union savefpu);
 }
 
 /*
  * Initialize floating point unit.
  */
 void
 npxinit(bool bsp)
 {
 	static union savefpu dummy;
 	register_t saveintr;
 	u_int mxcsr;
 	u_short control;
 
 	if (bsp) {
 		if (!npx_probe())
 			return;
 		npxinit_bsp1();
 	}
 
 	if (use_xsave) {
 		load_cr4(rcr4() | CR4_XSAVE);
 		load_xcr(XCR0, xsave_mask);
 	}
 
 	/*
 	 * XCR0 shall be set up before CPU can report the save area size.
 	 */
 	if (bsp)
 		npxinit_bsp2();
 	
 	/*
 	 * fninit has the same h/w bugs as fnsave.  Use the detoxified
 	 * fnsave to throw away any junk in the fpu.  fpusave() initializes
 	 * the fpu.
 	 *
 	 * It is too early for critical_enter() to work on AP.
 	 */
 	saveintr = intr_disable();
 	stop_emulating();
 	if (cpu_fxsr)
 		fninit();
 	else
 		fnsave(&dummy);
 	control = __INITIAL_NPXCW__;
 	fldcw(control);
 	if (cpu_fxsr) {
 		mxcsr = __INITIAL_MXCSR__;
 		ldmxcsr(mxcsr);
 	}
 	start_emulating();
 	intr_restore(saveintr);
 }
 
 /*
  * On the boot CPU we generate a clean state that is used to
  * initialize the floating point unit when it is first used by a
  * process.
  */
 static void
 npxinitstate(void *arg __unused)
 {
 	register_t saveintr;
 	int cp[4], i, max_ext_n;
 
 	if (!hw_float)
 		return;
 
 	npx_initialstate = malloc(cpu_max_ext_state_size, M_DEVBUF,
 	    M_WAITOK | M_ZERO);
 	saveintr = intr_disable();
 	stop_emulating();
 
 	fpusave(npx_initialstate);
 	if (cpu_fxsr) {
 		if (npx_initialstate->sv_xmm.sv_env.en_mxcsr_mask)
 			cpu_mxcsr_mask = 
 			    npx_initialstate->sv_xmm.sv_env.en_mxcsr_mask;
 		else
 			cpu_mxcsr_mask = 0xFFBF;
 
 		/*
 		 * The fninit instruction does not modify XMM
 		 * registers or x87 registers (MM/ST).  The fpusave
 		 * call dumped the garbage contained in the registers
 		 * after reset to the initial state saved.  Clear XMM
 		 * and x87 registers file image to make the startup
 		 * program state and signal handler XMM/x87 register
 		 * content predictable.
 		 */
 		bzero(npx_initialstate->sv_xmm.sv_fp,
 		    sizeof(npx_initialstate->sv_xmm.sv_fp));
 		bzero(npx_initialstate->sv_xmm.sv_xmm,
 		    sizeof(npx_initialstate->sv_xmm.sv_xmm));
 	} else
 		bzero(npx_initialstate->sv_87.sv_ac,
 		    sizeof(npx_initialstate->sv_87.sv_ac));
 
 	/*
 	 * Create a table describing the layout of the CPU Extended
 	 * Save Area.
 	 */
 	if (use_xsave) {
 		if (xsave_mask >> 32 != 0)
 			max_ext_n = fls(xsave_mask >> 32) + 32;
 		else
 			max_ext_n = fls(xsave_mask);
 		xsave_area_desc = malloc(max_ext_n * sizeof(struct
 		    xsave_area_elm_descr), M_DEVBUF, M_WAITOK | M_ZERO);
 		/* x87 state */
 		xsave_area_desc[0].offset = 0;
 		xsave_area_desc[0].size = 160;
 		/* XMM */
 		xsave_area_desc[1].offset = 160;
 		xsave_area_desc[1].size = 288 - 160;
 
 		for (i = 2; i < max_ext_n; i++) {
 			cpuid_count(0xd, i, cp);
 			xsave_area_desc[i].offset = cp[1];
 			xsave_area_desc[i].size = cp[0];
 		}
 	}
 
 	fpu_save_area_zone = uma_zcreate("FPU_save_area",
 	    cpu_max_ext_state_size, NULL, NULL, NULL, NULL,
 	    XSAVE_AREA_ALIGN - 1, 0);
 
 	start_emulating();
 	intr_restore(saveintr);
 }
 SYSINIT(npxinitstate, SI_SUB_DRIVERS, SI_ORDER_ANY, npxinitstate, NULL);
 
 /*
  * Free coprocessor (if we have it).
  */
 void
 npxexit(struct thread *td)
 {
 
 	critical_enter();
 	if (curthread == PCPU_GET(fpcurthread)) {
 		stop_emulating();
 		fpusave(curpcb->pcb_save);
 		start_emulating();
 		PCPU_SET(fpcurthread, NULL);
 	}
 	critical_exit();
 #ifdef NPX_DEBUG
 	if (hw_float) {
 		u_int	masked_exceptions;
 
 		masked_exceptions = GET_FPU_CW(td) & GET_FPU_SW(td) & 0x7f;
 		/*
 		 * Log exceptions that would have trapped with the old
 		 * control word (overflow, divide by 0, and invalid operand).
 		 */
 		if (masked_exceptions & 0x0d)
 			log(LOG_ERR,
 	"pid %d (%s) exited with masked floating point exceptions 0x%02x\n",
 			    td->td_proc->p_pid, td->td_proc->p_comm,
 			    masked_exceptions);
 	}
 #endif
 }
 
 int
 npxformat(void)
 {
 
 	if (!hw_float)
 		return (_MC_FPFMT_NODEV);
 	if (cpu_fxsr)
 		return (_MC_FPFMT_XMM);
 	return (_MC_FPFMT_387);
 }
 
 /* 
  * The following mechanism is used to ensure that the FPE_... value
  * that is passed as a trapcode to the signal handler of the user
  * process does not have more than one bit set.
  * 
  * Multiple bits may be set if the user process modifies the control
  * word while a status word bit is already set.  While this is a sign
  * of bad coding, we have no choise than to narrow them down to one
  * bit, since we must not send a trapcode that is not exactly one of
  * the FPE_ macros.
  *
  * The mechanism has a static table with 127 entries.  Each combination
  * of the 7 FPU status word exception bits directly translates to a
  * position in this table, where a single FPE_... value is stored.
  * This FPE_... value stored there is considered the "most important"
  * of the exception bits and will be sent as the signal code.  The
  * precedence of the bits is based upon Intel Document "Numerical
  * Applications", Chapter "Special Computational Situations".
  *
  * The macro to choose one of these values does these steps: 1) Throw
  * away status word bits that cannot be masked.  2) Throw away the bits
  * currently masked in the control word, assuming the user isn't
  * interested in them anymore.  3) Reinsert status word bit 7 (stack
  * fault) if it is set, which cannot be masked but must be presered.
  * 4) Use the remaining bits to point into the trapcode table.
  *
  * The 6 maskable bits in order of their preference, as stated in the
  * above referenced Intel manual:
  * 1  Invalid operation (FP_X_INV)
  * 1a   Stack underflow
  * 1b   Stack overflow
  * 1c   Operand of unsupported format
  * 1d   SNaN operand.
  * 2  QNaN operand (not an exception, irrelavant here)
  * 3  Any other invalid-operation not mentioned above or zero divide
  *      (FP_X_INV, FP_X_DZ)
  * 4  Denormal operand (FP_X_DNML)
  * 5  Numeric over/underflow (FP_X_OFL, FP_X_UFL)
  * 6  Inexact result (FP_X_IMP) 
  */
 static char fpetable[128] = {
 	0,
 	FPE_FLTINV,	/*  1 - INV */
 	FPE_FLTUND,	/*  2 - DNML */
 	FPE_FLTINV,	/*  3 - INV | DNML */
 	FPE_FLTDIV,	/*  4 - DZ */
 	FPE_FLTINV,	/*  5 - INV | DZ */
 	FPE_FLTDIV,	/*  6 - DNML | DZ */
 	FPE_FLTINV,	/*  7 - INV | DNML | DZ */
 	FPE_FLTOVF,	/*  8 - OFL */
 	FPE_FLTINV,	/*  9 - INV | OFL */
 	FPE_FLTUND,	/*  A - DNML | OFL */
 	FPE_FLTINV,	/*  B - INV | DNML | OFL */
 	FPE_FLTDIV,	/*  C - DZ | OFL */
 	FPE_FLTINV,	/*  D - INV | DZ | OFL */
 	FPE_FLTDIV,	/*  E - DNML | DZ | OFL */
 	FPE_FLTINV,	/*  F - INV | DNML | DZ | OFL */
 	FPE_FLTUND,	/* 10 - UFL */
 	FPE_FLTINV,	/* 11 - INV | UFL */
 	FPE_FLTUND,	/* 12 - DNML | UFL */
 	FPE_FLTINV,	/* 13 - INV | DNML | UFL */
 	FPE_FLTDIV,	/* 14 - DZ | UFL */
 	FPE_FLTINV,	/* 15 - INV | DZ | UFL */
 	FPE_FLTDIV,	/* 16 - DNML | DZ | UFL */
 	FPE_FLTINV,	/* 17 - INV | DNML | DZ | UFL */
 	FPE_FLTOVF,	/* 18 - OFL | UFL */
 	FPE_FLTINV,	/* 19 - INV | OFL | UFL */
 	FPE_FLTUND,	/* 1A - DNML | OFL | UFL */
 	FPE_FLTINV,	/* 1B - INV | DNML | OFL | UFL */
 	FPE_FLTDIV,	/* 1C - DZ | OFL | UFL */
 	FPE_FLTINV,	/* 1D - INV | DZ | OFL | UFL */
 	FPE_FLTDIV,	/* 1E - DNML | DZ | OFL | UFL */
 	FPE_FLTINV,	/* 1F - INV | DNML | DZ | OFL | UFL */
 	FPE_FLTRES,	/* 20 - IMP */
 	FPE_FLTINV,	/* 21 - INV | IMP */
 	FPE_FLTUND,	/* 22 - DNML | IMP */
 	FPE_FLTINV,	/* 23 - INV | DNML | IMP */
 	FPE_FLTDIV,	/* 24 - DZ | IMP */
 	FPE_FLTINV,	/* 25 - INV | DZ | IMP */
 	FPE_FLTDIV,	/* 26 - DNML | DZ | IMP */
 	FPE_FLTINV,	/* 27 - INV | DNML | DZ | IMP */
 	FPE_FLTOVF,	/* 28 - OFL | IMP */
 	FPE_FLTINV,	/* 29 - INV | OFL | IMP */
 	FPE_FLTUND,	/* 2A - DNML | OFL | IMP */
 	FPE_FLTINV,	/* 2B - INV | DNML | OFL | IMP */
 	FPE_FLTDIV,	/* 2C - DZ | OFL | IMP */
 	FPE_FLTINV,	/* 2D - INV | DZ | OFL | IMP */
 	FPE_FLTDIV,	/* 2E - DNML | DZ | OFL | IMP */
 	FPE_FLTINV,	/* 2F - INV | DNML | DZ | OFL | IMP */
 	FPE_FLTUND,	/* 30 - UFL | IMP */
 	FPE_FLTINV,	/* 31 - INV | UFL | IMP */
 	FPE_FLTUND,	/* 32 - DNML | UFL | IMP */
 	FPE_FLTINV,	/* 33 - INV | DNML | UFL | IMP */
 	FPE_FLTDIV,	/* 34 - DZ | UFL | IMP */
 	FPE_FLTINV,	/* 35 - INV | DZ | UFL | IMP */
 	FPE_FLTDIV,	/* 36 - DNML | DZ | UFL | IMP */
 	FPE_FLTINV,	/* 37 - INV | DNML | DZ | UFL | IMP */
 	FPE_FLTOVF,	/* 38 - OFL | UFL | IMP */
 	FPE_FLTINV,	/* 39 - INV | OFL | UFL | IMP */
 	FPE_FLTUND,	/* 3A - DNML | OFL | UFL | IMP */
 	FPE_FLTINV,	/* 3B - INV | DNML | OFL | UFL | IMP */
 	FPE_FLTDIV,	/* 3C - DZ | OFL | UFL | IMP */
 	FPE_FLTINV,	/* 3D - INV | DZ | OFL | UFL | IMP */
 	FPE_FLTDIV,	/* 3E - DNML | DZ | OFL | UFL | IMP */
 	FPE_FLTINV,	/* 3F - INV | DNML | DZ | OFL | UFL | IMP */
 	FPE_FLTSUB,	/* 40 - STK */
 	FPE_FLTSUB,	/* 41 - INV | STK */
 	FPE_FLTUND,	/* 42 - DNML | STK */
 	FPE_FLTSUB,	/* 43 - INV | DNML | STK */
 	FPE_FLTDIV,	/* 44 - DZ | STK */
 	FPE_FLTSUB,	/* 45 - INV | DZ | STK */
 	FPE_FLTDIV,	/* 46 - DNML | DZ | STK */
 	FPE_FLTSUB,	/* 47 - INV | DNML | DZ | STK */
 	FPE_FLTOVF,	/* 48 - OFL | STK */
 	FPE_FLTSUB,	/* 49 - INV | OFL | STK */
 	FPE_FLTUND,	/* 4A - DNML | OFL | STK */
 	FPE_FLTSUB,	/* 4B - INV | DNML | OFL | STK */
 	FPE_FLTDIV,	/* 4C - DZ | OFL | STK */
 	FPE_FLTSUB,	/* 4D - INV | DZ | OFL | STK */
 	FPE_FLTDIV,	/* 4E - DNML | DZ | OFL | STK */
 	FPE_FLTSUB,	/* 4F - INV | DNML | DZ | OFL | STK */
 	FPE_FLTUND,	/* 50 - UFL | STK */
 	FPE_FLTSUB,	/* 51 - INV | UFL | STK */
 	FPE_FLTUND,	/* 52 - DNML | UFL | STK */
 	FPE_FLTSUB,	/* 53 - INV | DNML | UFL | STK */
 	FPE_FLTDIV,	/* 54 - DZ | UFL | STK */
 	FPE_FLTSUB,	/* 55 - INV | DZ | UFL | STK */
 	FPE_FLTDIV,	/* 56 - DNML | DZ | UFL | STK */
 	FPE_FLTSUB,	/* 57 - INV | DNML | DZ | UFL | STK */
 	FPE_FLTOVF,	/* 58 - OFL | UFL | STK */
 	FPE_FLTSUB,	/* 59 - INV | OFL | UFL | STK */
 	FPE_FLTUND,	/* 5A - DNML | OFL | UFL | STK */
 	FPE_FLTSUB,	/* 5B - INV | DNML | OFL | UFL | STK */
 	FPE_FLTDIV,	/* 5C - DZ | OFL | UFL | STK */
 	FPE_FLTSUB,	/* 5D - INV | DZ | OFL | UFL | STK */
 	FPE_FLTDIV,	/* 5E - DNML | DZ | OFL | UFL | STK */
 	FPE_FLTSUB,	/* 5F - INV | DNML | DZ | OFL | UFL | STK */
 	FPE_FLTRES,	/* 60 - IMP | STK */
 	FPE_FLTSUB,	/* 61 - INV | IMP | STK */
 	FPE_FLTUND,	/* 62 - DNML | IMP | STK */
 	FPE_FLTSUB,	/* 63 - INV | DNML | IMP | STK */
 	FPE_FLTDIV,	/* 64 - DZ | IMP | STK */
 	FPE_FLTSUB,	/* 65 - INV | DZ | IMP | STK */
 	FPE_FLTDIV,	/* 66 - DNML | DZ | IMP | STK */
 	FPE_FLTSUB,	/* 67 - INV | DNML | DZ | IMP | STK */
 	FPE_FLTOVF,	/* 68 - OFL | IMP | STK */
 	FPE_FLTSUB,	/* 69 - INV | OFL | IMP | STK */
 	FPE_FLTUND,	/* 6A - DNML | OFL | IMP | STK */
 	FPE_FLTSUB,	/* 6B - INV | DNML | OFL | IMP | STK */
 	FPE_FLTDIV,	/* 6C - DZ | OFL | IMP | STK */
 	FPE_FLTSUB,	/* 6D - INV | DZ | OFL | IMP | STK */
 	FPE_FLTDIV,	/* 6E - DNML | DZ | OFL | IMP | STK */
 	FPE_FLTSUB,	/* 6F - INV | DNML | DZ | OFL | IMP | STK */
 	FPE_FLTUND,	/* 70 - UFL | IMP | STK */
 	FPE_FLTSUB,	/* 71 - INV | UFL | IMP | STK */
 	FPE_FLTUND,	/* 72 - DNML | UFL | IMP | STK */
 	FPE_FLTSUB,	/* 73 - INV | DNML | UFL | IMP | STK */
 	FPE_FLTDIV,	/* 74 - DZ | UFL | IMP | STK */
 	FPE_FLTSUB,	/* 75 - INV | DZ | UFL | IMP | STK */
 	FPE_FLTDIV,	/* 76 - DNML | DZ | UFL | IMP | STK */
 	FPE_FLTSUB,	/* 77 - INV | DNML | DZ | UFL | IMP | STK */
 	FPE_FLTOVF,	/* 78 - OFL | UFL | IMP | STK */
 	FPE_FLTSUB,	/* 79 - INV | OFL | UFL | IMP | STK */
 	FPE_FLTUND,	/* 7A - DNML | OFL | UFL | IMP | STK */
 	FPE_FLTSUB,	/* 7B - INV | DNML | OFL | UFL | IMP | STK */
 	FPE_FLTDIV,	/* 7C - DZ | OFL | UFL | IMP | STK */
 	FPE_FLTSUB,	/* 7D - INV | DZ | OFL | UFL | IMP | STK */
 	FPE_FLTDIV,	/* 7E - DNML | DZ | OFL | UFL | IMP | STK */
 	FPE_FLTSUB,	/* 7F - INV | DNML | DZ | OFL | UFL | IMP | STK */
 };
 
 /*
  * Read the FP status and control words, then generate si_code value
  * for SIGFPE.  The error code chosen will be one of the
  * FPE_... macros.  It will be sent as the second argument to old
  * BSD-style signal handlers and as "siginfo_t->si_code" (second
  * argument) to SA_SIGINFO signal handlers.
  *
  * Some time ago, we cleared the x87 exceptions with FNCLEX there.
  * Clearing exceptions was necessary mainly to avoid IRQ13 bugs.  The
  * usermode code which understands the FPU hardware enough to enable
  * the exceptions, can also handle clearing the exception state in the
  * handler.  The only consequence of not clearing the exception is the
  * rethrow of the SIGFPE on return from the signal handler and
  * reexecution of the corresponding instruction.
  *
  * For XMM traps, the exceptions were never cleared.
  */
 int
 npxtrap_x87(void)
 {
 	u_short control, status;
 
 	if (!hw_float) {
 		printf(
 	"npxtrap_x87: fpcurthread = %p, curthread = %p, hw_float = %d\n",
 		       PCPU_GET(fpcurthread), curthread, hw_float);
 		panic("npxtrap from nowhere");
 	}
 	critical_enter();
 
 	/*
 	 * Interrupt handling (for another interrupt) may have pushed the
 	 * state to memory.  Fetch the relevant parts of the state from
 	 * wherever they are.
 	 */
 	if (PCPU_GET(fpcurthread) != curthread) {
 		control = GET_FPU_CW(curthread);
 		status = GET_FPU_SW(curthread);
 	} else {
 		fnstcw(&control);
 		fnstsw(&status);
 	}
 	critical_exit();
 	return (fpetable[status & ((~control & 0x3f) | 0x40)]);
 }
 
 int
 npxtrap_sse(void)
 {
 	u_int mxcsr;
 
 	if (!hw_float) {
 		printf(
 	"npxtrap_sse: fpcurthread = %p, curthread = %p, hw_float = %d\n",
 		       PCPU_GET(fpcurthread), curthread, hw_float);
 		panic("npxtrap from nowhere");
 	}
 	critical_enter();
 	if (PCPU_GET(fpcurthread) != curthread)
 		mxcsr = curthread->td_pcb->pcb_save->sv_xmm.sv_env.en_mxcsr;
 	else
 		stmxcsr(&mxcsr);
 	critical_exit();
 	return (fpetable[(mxcsr & (~mxcsr >> 7)) & 0x3f]);
 }
 
 /*
  * Implement device not available (DNA) exception
  *
  * It would be better to switch FP context here (if curthread != fpcurthread)
  * and not necessarily for every context switch, but it is too hard to
  * access foreign pcb's.
  */
 
 static int err_count = 0;
 
 int
 npxdna(void)
 {
 
 	if (!hw_float)
 		return (0);
 	critical_enter();
 	if (PCPU_GET(fpcurthread) == curthread) {
 		printf("npxdna: fpcurthread == curthread %d times\n",
 		    ++err_count);
 		stop_emulating();
 		critical_exit();
 		return (1);
 	}
 	if (PCPU_GET(fpcurthread) != NULL) {
 		printf("npxdna: fpcurthread = %p (%d), curthread = %p (%d)\n",
 		       PCPU_GET(fpcurthread),
 		       PCPU_GET(fpcurthread)->td_proc->p_pid,
 		       curthread, curthread->td_proc->p_pid);
 		panic("npxdna");
 	}
 	stop_emulating();
 	/*
 	 * Record new context early in case frstor causes a trap.
 	 */
 	PCPU_SET(fpcurthread, curthread);
 
 	if (cpu_fxsr)
 		fpu_clean_state();
 
 	if ((curpcb->pcb_flags & PCB_NPXINITDONE) == 0) {
 		/*
 		 * This is the first time this thread has used the FPU or
 		 * the PCB doesn't contain a clean FPU state.  Explicitly
 		 * load an initial state.
 		 *
 		 * We prefer to restore the state from the actual save
 		 * area in PCB instead of directly loading from
 		 * npx_initialstate, to ignite the XSAVEOPT
 		 * tracking engine.
 		 */
 		bcopy(npx_initialstate, curpcb->pcb_save, cpu_max_ext_state_size);
 		fpurstor(curpcb->pcb_save);
 		if (curpcb->pcb_initial_npxcw != __INITIAL_NPXCW__)
 			fldcw(curpcb->pcb_initial_npxcw);
 		curpcb->pcb_flags |= PCB_NPXINITDONE;
 		if (PCB_USER_FPU(curpcb))
 			curpcb->pcb_flags |= PCB_NPXUSERINITDONE;
 	} else {
 		fpurstor(curpcb->pcb_save);
 	}
 	critical_exit();
 
 	return (1);
 }
 
 /*
  * Wrapper for fpusave() called from context switch routines.
  *
  * npxsave() must be called with interrupts disabled, so that it clears
  * fpcurthread atomically with saving the state.  We require callers to do the
  * disabling, since most callers need to disable interrupts anyway to call
  * npxsave() atomically with checking fpcurthread.
  */
 void
 npxsave(addr)
 	union savefpu *addr;
 {
 
 	stop_emulating();
 	if (use_xsaveopt)
 		xsaveopt((char *)addr, xsave_mask);
 	else
 		fpusave(addr);
 	start_emulating();
 	PCPU_SET(fpcurthread, NULL);
 }
 
 /*
  * Unconditionally save the current co-processor state across suspend and
  * resume.
  */
 void
 npxsuspend(union savefpu *addr)
 {
 	register_t cr0;
 
 	if (!hw_float)
 		return;
 	if (PCPU_GET(fpcurthread) == NULL) {
 		bcopy(npx_initialstate, addr, cpu_max_ext_state_size);
 		return;
 	}
 	cr0 = rcr0();
 	stop_emulating();
 	fpusave(addr);
 	load_cr0(cr0);
 }
 
 void
 npxresume(union savefpu *addr)
 {
 	register_t cr0;
 
 	if (!hw_float)
 		return;
 
 	cr0 = rcr0();
 	npxinit(false);
 	stop_emulating();
 	fpurstor(addr);
 	load_cr0(cr0);
 }
 
 void
 npxdrop(void)
 {
 	struct thread *td;
 
 	/*
 	 * Discard pending exceptions in the !cpu_fxsr case so that unmasked
 	 * ones don't cause a panic on the next frstor.
 	 */
 	if (!cpu_fxsr)
 		fnclex();
 
 	td = PCPU_GET(fpcurthread);
 	KASSERT(td == curthread, ("fpudrop: fpcurthread != curthread"));
 	CRITICAL_ASSERT(td);
 	PCPU_SET(fpcurthread, NULL);
 	td->td_pcb->pcb_flags &= ~PCB_NPXINITDONE;
 	start_emulating();
 }
 
 /*
  * Get the user state of the FPU into pcb->pcb_user_save without
  * dropping ownership (if possible).  It returns the FPU ownership
  * status.
  */
 int
 npxgetregs(struct thread *td)
 {
 	struct pcb *pcb;
 	uint64_t *xstate_bv, bit;
 	char *sa;
 	int max_ext_n, i;
 	int owned;
 
 	if (!hw_float)
 		return (_MC_FPOWNED_NONE);
 
 	pcb = td->td_pcb;
 	if ((pcb->pcb_flags & PCB_NPXINITDONE) == 0) {
 		bcopy(npx_initialstate, get_pcb_user_save_pcb(pcb),
 		    cpu_max_ext_state_size);
 		SET_FPU_CW(get_pcb_user_save_pcb(pcb), pcb->pcb_initial_npxcw);
 		npxuserinited(td);
 		return (_MC_FPOWNED_PCB);
 	}
 	critical_enter();
 	if (td == PCPU_GET(fpcurthread)) {
 		fpusave(get_pcb_user_save_pcb(pcb));
 		if (!cpu_fxsr)
 			/*
 			 * fnsave initializes the FPU and destroys whatever
 			 * context it contains.  Make sure the FPU owner
 			 * starts with a clean state next time.
 			 */
 			npxdrop();
 		owned = _MC_FPOWNED_FPU;
 	} else {
 		owned = _MC_FPOWNED_PCB;
 	}
 	critical_exit();
 	if (use_xsave) {
 		/*
 		 * Handle partially saved state.
 		 */
 		sa = (char *)get_pcb_user_save_pcb(pcb);
 		xstate_bv = (uint64_t *)(sa + sizeof(union savefpu) +
 		    offsetof(struct xstate_hdr, xstate_bv));
 		if (xsave_mask >> 32 != 0)
 			max_ext_n = fls(xsave_mask >> 32) + 32;
 		else
 			max_ext_n = fls(xsave_mask);
 		for (i = 0; i < max_ext_n; i++) {
 			bit = 1ULL << i;
 			if ((xsave_mask & bit) == 0 || (*xstate_bv & bit) != 0)
 				continue;
 			bcopy((char *)npx_initialstate +
 			    xsave_area_desc[i].offset,
 			    sa + xsave_area_desc[i].offset,
 			    xsave_area_desc[i].size);
 			*xstate_bv |= bit;
 		}
 	}
 	return (owned);
 }
 
 void
 npxuserinited(struct thread *td)
 {
 	struct pcb *pcb;
 
 	pcb = td->td_pcb;
 	if (PCB_USER_FPU(pcb))
 		pcb->pcb_flags |= PCB_NPXINITDONE;
 	pcb->pcb_flags |= PCB_NPXUSERINITDONE;
 }
 
 int
 npxsetxstate(struct thread *td, char *xfpustate, size_t xfpustate_size)
 {
 	struct xstate_hdr *hdr, *ehdr;
 	size_t len, max_len;
 	uint64_t bv;
 
 	/* XXXKIB should we clear all extended state in xstate_bv instead ? */
 	if (xfpustate == NULL)
 		return (0);
 	if (!use_xsave)
 		return (EOPNOTSUPP);
 
 	len = xfpustate_size;
 	if (len < sizeof(struct xstate_hdr))
 		return (EINVAL);
 	max_len = cpu_max_ext_state_size - sizeof(union savefpu);
 	if (len > max_len)
 		return (EINVAL);
 
 	ehdr = (struct xstate_hdr *)xfpustate;
 	bv = ehdr->xstate_bv;
 
 	/*
 	 * Avoid #gp.
 	 */
 	if (bv & ~xsave_mask)
 		return (EINVAL);
 
 	hdr = (struct xstate_hdr *)(get_pcb_user_save_td(td) + 1);
 
 	hdr->xstate_bv = bv;
 	bcopy(xfpustate + sizeof(struct xstate_hdr),
 	    (char *)(hdr + 1), len - sizeof(struct xstate_hdr));
 
 	return (0);
 }
 
 int
 npxsetregs(struct thread *td, union savefpu *addr, char *xfpustate,
 	size_t xfpustate_size)
 {
 	struct pcb *pcb;
 	int error;
 
 	if (!hw_float)
 		return (ENXIO);
 
 	pcb = td->td_pcb;
 	critical_enter();
 	if (td == PCPU_GET(fpcurthread) && PCB_USER_FPU(pcb)) {
 		error = npxsetxstate(td, xfpustate, xfpustate_size);
 		if (error != 0) {
 			critical_exit();
 			return (error);
 		}
 		if (!cpu_fxsr)
 			fnclex();	/* As in npxdrop(). */
 		bcopy(addr, get_pcb_user_save_td(td), sizeof(*addr));
 		fpurstor(get_pcb_user_save_td(td));
 		critical_exit();
 		pcb->pcb_flags |= PCB_NPXUSERINITDONE | PCB_NPXINITDONE;
 	} else {
 		critical_exit();
 		error = npxsetxstate(td, xfpustate, xfpustate_size);
 		if (error != 0)
 			return (error);
 		bcopy(addr, get_pcb_user_save_td(td), sizeof(*addr));
 		npxuserinited(td);
 	}
 	return (0);
 }
 
 static void
 fpusave(addr)
 	union savefpu *addr;
 {
 	
 	if (use_xsave)
 		xsave((char *)addr, xsave_mask);
 	else if (cpu_fxsr)
 		fxsave(addr);
 	else
 		fnsave(addr);
 }
 
 static void
 npx_fill_fpregs_xmm1(struct savexmm *sv_xmm, struct save87 *sv_87)
 {
 	struct env87 *penv_87;
 	struct envxmm *penv_xmm;
 	int i;
 
 	penv_87 = &sv_87->sv_env;
 	penv_xmm = &sv_xmm->sv_env;
 
 	/* FPU control/status */
 	penv_87->en_cw = penv_xmm->en_cw;
 	penv_87->en_sw = penv_xmm->en_sw;
 	penv_87->en_fip = penv_xmm->en_fip;
 	penv_87->en_fcs = penv_xmm->en_fcs;
 	penv_87->en_opcode = penv_xmm->en_opcode;
 	penv_87->en_foo = penv_xmm->en_foo;
 	penv_87->en_fos = penv_xmm->en_fos;
 
 	/* FPU registers and tags */
 	penv_87->en_tw = 0xffff;
 	for (i = 0; i < 8; ++i) {
 		sv_87->sv_ac[i] = sv_xmm->sv_fp[i].fp_acc;
 		if ((penv_xmm->en_tw & (1 << i)) != 0)
 			/* zero and special are set as valid */
 			penv_87->en_tw &= ~(3 << i * 2);
 	}
 }
 
 void
 npx_fill_fpregs_xmm(struct savexmm *sv_xmm, struct save87 *sv_87)
 {
 
 	bzero(sv_87, sizeof(*sv_87));
 	npx_fill_fpregs_xmm1(sv_xmm, sv_87);
 }
 
 void
 npx_set_fpregs_xmm(struct save87 *sv_87, struct savexmm *sv_xmm)
 {
 	struct env87 *penv_87;
 	struct envxmm *penv_xmm;
 	int i;
 
 	penv_87 = &sv_87->sv_env;
 	penv_xmm = &sv_xmm->sv_env;
 
 	/* FPU control/status */
 	penv_xmm->en_cw = penv_87->en_cw;
 	penv_xmm->en_sw = penv_87->en_sw;
 	penv_xmm->en_fip = penv_87->en_fip;
 	penv_xmm->en_fcs = penv_87->en_fcs;
 	penv_xmm->en_opcode = penv_87->en_opcode;
 	penv_xmm->en_foo = penv_87->en_foo;
 	penv_xmm->en_fos = penv_87->en_fos;
 
 	/*
 	 * FPU registers and tags.
 	 * Abridged  /  Full translation (values in binary), see FXSAVE spec.
 	 * 0		11
 	 * 1		00, 01, 10
 	 */
 	penv_xmm->en_tw = 0;
 	for (i = 0; i < 8; ++i) {
 		sv_xmm->sv_fp[i].fp_acc = sv_87->sv_ac[i];
 		if ((penv_87->en_tw & (3 << i * 2)) != (3 << i * 2))
 			penv_xmm->en_tw |= 1 << i;
 	}
 }
 
 void
 npx_get_fsave(void *addr)
 {
 	struct thread *td;
 	union savefpu *sv;
 
 	td = curthread;
 	npxgetregs(td);
 	sv = get_pcb_user_save_td(td);
 	if (cpu_fxsr)
 		npx_fill_fpregs_xmm1(&sv->sv_xmm, addr);
 	else
 		bcopy(sv, addr, sizeof(struct env87) +
 		    sizeof(struct fpacc87[8]));
 }
 
 int
 npx_set_fsave(void *addr)
 {
 	union savefpu sv;
 	int error;
 
 	bzero(&sv, sizeof(sv));
 	if (cpu_fxsr)
 		npx_set_fpregs_xmm(addr, &sv.sv_xmm);
 	else
 		bcopy(addr, &sv, sizeof(struct env87) +
 		    sizeof(struct fpacc87[8]));
 	error = npxsetregs(curthread, &sv, NULL, 0);
 	return (error);
 }
 
 /*
  * On AuthenticAMD processors, the fxrstor instruction does not restore
  * the x87's stored last instruction pointer, last data pointer, and last
  * opcode values, except in the rare case in which the exception summary
  * (ES) bit in the x87 status word is set to 1.
  *
  * In order to avoid leaking this information across processes, we clean
  * these values by performing a dummy load before executing fxrstor().
  */
 static void
 fpu_clean_state(void)
 {
 	static float dummy_variable = 0.0;
 	u_short status;
 
 	/*
 	 * Clear the ES bit in the x87 status word if it is currently
 	 * set, in order to avoid causing a fault in the upcoming load.
 	 */
 	fnstsw(&status);
 	if (status & 0x80)
 		fnclex();
 
 	/*
 	 * Load the dummy variable into the x87 stack.  This mangles
 	 * the x87 stack, but we don't care since we're about to call
 	 * fxrstor() anyway.
 	 */
 	__asm __volatile("ffree %%st(7); flds %0" : : "m" (dummy_variable));
 }
 
 static void
 fpurstor(union savefpu *addr)
 {
 
 	if (use_xsave)
 		xrstor((char *)addr, xsave_mask);
 	else if (cpu_fxsr)
 		fxrstor(addr);
 	else
 		frstor(addr);
 }
 
 #ifdef DEV_ISA
 /*
  * This sucks up the legacy ISA support assignments from PNPBIOS/ACPI.
  */
 static struct isa_pnp_id npxisa_ids[] = {
 	{ 0x040cd041, "Legacy ISA coprocessor support" }, /* PNP0C04 */
 	{ 0 }
 };
 
 static int
 npxisa_probe(device_t dev)
 {
 	int result;
 	if ((result = ISA_PNP_PROBE(device_get_parent(dev), dev, npxisa_ids)) <= 0) {
 		device_quiet(dev);
 	}
 	return(result);
 }
 
 static int
 npxisa_attach(device_t dev)
 {
 	return (0);
 }
 
 static device_method_t npxisa_methods[] = {
 	/* Device interface */
 	DEVMETHOD(device_probe,		npxisa_probe),
 	DEVMETHOD(device_attach,	npxisa_attach),
 	DEVMETHOD(device_detach,	bus_generic_detach),
 	DEVMETHOD(device_shutdown,	bus_generic_shutdown),
 	DEVMETHOD(device_suspend,	bus_generic_suspend),
 	DEVMETHOD(device_resume,	bus_generic_resume),
 	
 	{ 0, 0 }
 };
 
 static driver_t npxisa_driver = {
 	"npxisa",
 	npxisa_methods,
 	1,			/* no softc */
 };
 
 static devclass_t npxisa_devclass;
 
 DRIVER_MODULE(npxisa, isa, npxisa_driver, npxisa_devclass, 0, 0);
 DRIVER_MODULE(npxisa, acpi, npxisa_driver, npxisa_devclass, 0, 0);
 #endif /* DEV_ISA */
 
 static MALLOC_DEFINE(M_FPUKERN_CTX, "fpukern_ctx",
     "Kernel contexts for FPU state");
 
 #define	FPU_KERN_CTX_NPXINITDONE 0x01
 #define	FPU_KERN_CTX_DUMMY	 0x02
 #define	FPU_KERN_CTX_INUSE	 0x04
 
 struct fpu_kern_ctx {
 	union savefpu *prev;
 	uint32_t flags;
 	char hwstate1[];
 };
 
 struct fpu_kern_ctx *
 fpu_kern_alloc_ctx(u_int flags)
 {
 	struct fpu_kern_ctx *res;
 	size_t sz;
 
 	sz = sizeof(struct fpu_kern_ctx) + XSAVE_AREA_ALIGN +
 	    cpu_max_ext_state_size;
 	res = malloc(sz, M_FPUKERN_CTX, ((flags & FPU_KERN_NOWAIT) ?
 	    M_NOWAIT : M_WAITOK) | M_ZERO);
 	return (res);
 }
 
 void
 fpu_kern_free_ctx(struct fpu_kern_ctx *ctx)
 {
 
 	KASSERT((ctx->flags & FPU_KERN_CTX_INUSE) == 0, ("free'ing inuse ctx"));
 	/* XXXKIB clear the memory ? */
 	free(ctx, M_FPUKERN_CTX);
 }
 
 static union savefpu *
 fpu_kern_ctx_savefpu(struct fpu_kern_ctx *ctx)
 {
 	vm_offset_t p;
 
 	p = (vm_offset_t)&ctx->hwstate1;
 	p = roundup2(p, XSAVE_AREA_ALIGN);
 	return ((union savefpu *)p);
 }
 
 int
 fpu_kern_enter(struct thread *td, struct fpu_kern_ctx *ctx, u_int flags)
 {
 	struct pcb *pcb;
 
 	KASSERT((ctx->flags & FPU_KERN_CTX_INUSE) == 0, ("using inuse ctx"));
 
 	if ((flags & FPU_KERN_KTHR) != 0 && is_fpu_kern_thread(0)) {
 		ctx->flags = FPU_KERN_CTX_DUMMY | FPU_KERN_CTX_INUSE;
 		return (0);
 	}
 	pcb = td->td_pcb;
 	KASSERT(!PCB_USER_FPU(pcb) || pcb->pcb_save ==
 	    get_pcb_user_save_pcb(pcb), ("mangled pcb_save"));
 	ctx->flags = FPU_KERN_CTX_INUSE;
 	if ((pcb->pcb_flags & PCB_NPXINITDONE) != 0)
 		ctx->flags |= FPU_KERN_CTX_NPXINITDONE;
 	npxexit(td);
 	ctx->prev = pcb->pcb_save;
 	pcb->pcb_save = fpu_kern_ctx_savefpu(ctx);
 	pcb->pcb_flags |= PCB_KERNNPX;
 	pcb->pcb_flags &= ~PCB_NPXINITDONE;
 	return (0);
 }
 
 int
 fpu_kern_leave(struct thread *td, struct fpu_kern_ctx *ctx)
 {
 	struct pcb *pcb;
 
 	KASSERT((ctx->flags & FPU_KERN_CTX_INUSE) != 0,
 	    ("leaving not inuse ctx"));
 	ctx->flags &= ~FPU_KERN_CTX_INUSE;
 
 	if (is_fpu_kern_thread(0) && (ctx->flags & FPU_KERN_CTX_DUMMY) != 0)
 		return (0);
 	pcb = td->td_pcb;
 	critical_enter();
 	if (curthread == PCPU_GET(fpcurthread))
 		npxdrop();
 	critical_exit();
 	pcb->pcb_save = ctx->prev;
 	if (pcb->pcb_save == get_pcb_user_save_pcb(pcb)) {
 		if ((pcb->pcb_flags & PCB_NPXUSERINITDONE) != 0)
 			pcb->pcb_flags |= PCB_NPXINITDONE;
 		else
 			pcb->pcb_flags &= ~PCB_NPXINITDONE;
 		pcb->pcb_flags &= ~PCB_KERNNPX;
 	} else {
 		if ((ctx->flags & FPU_KERN_CTX_NPXINITDONE) != 0)
 			pcb->pcb_flags |= PCB_NPXINITDONE;
 		else
 			pcb->pcb_flags &= ~PCB_NPXINITDONE;
 		KASSERT(!PCB_USER_FPU(pcb), ("unpaired fpu_kern_leave"));
 	}
 	return (0);
 }
 
 int
 fpu_kern_thread(u_int flags)
 {
 
 	KASSERT((curthread->td_pflags & TDP_KTHREAD) != 0,
 	    ("Only kthread may use fpu_kern_thread"));
 	KASSERT(curpcb->pcb_save == get_pcb_user_save_pcb(curpcb),
 	    ("mangled pcb_save"));
 	KASSERT(PCB_USER_FPU(curpcb), ("recursive call"));
 
 	curpcb->pcb_flags |= PCB_KERNNPX;
 	return (0);
 }
 
 int
 is_fpu_kern_thread(u_int flags)
 {
 
 	if ((curthread->td_pflags & TDP_KTHREAD) == 0)
 		return (0);
 	return ((curpcb->pcb_flags & PCB_KERNNPX) != 0);
 }
 
 /*
  * FPU save area alloc/free/init utility routines
  */
 union savefpu *
 fpu_save_area_alloc(void)
 {
 
 	return (uma_zalloc(fpu_save_area_zone, 0));
 }
 
 void
 fpu_save_area_free(union savefpu *fsa)
 {
 
 	uma_zfree(fpu_save_area_zone, fsa);
 }
 
 void
 fpu_save_area_reset(union savefpu *fsa)
 {
 
 	bcopy(npx_initialstate, fsa, cpu_max_ext_state_size);
 }
Index: head/sys/kern/imgact_aout.c
===================================================================
--- head/sys/kern/imgact_aout.c	(revision 323721)
+++ head/sys/kern/imgact_aout.c	(revision 323722)
@@ -1,339 +1,356 @@
 /*-
  * Copyright (c) 1993, David Greenman
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
+#include "opt_compat.h"
+
 #include <sys/param.h>
 #include <sys/exec.h>
 #include <sys/imgact.h>
 #include <sys/imgact_aout.h>
 #include <sys/kernel.h>
 #include <sys/limits.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/racct.h>
 #include <sys/resourcevar.h>
 #include <sys/signalvar.h>
 #include <sys/syscall.h>
 #include <sys/sysent.h>
 #include <sys/systm.h>
 #include <sys/vnode.h>
 
 #include <machine/frame.h>
 #include <machine/md_var.h>
 
 #include <vm/vm.h>
 #include <vm/pmap.h>
 #include <vm/vm_map.h>
 #include <vm/vm_object.h>
 #include <vm/vm_param.h>
 
 #ifdef __amd64__
 #include <compat/freebsd32/freebsd32_signal.h>
 #include <compat/freebsd32/freebsd32_util.h>
 #include <compat/freebsd32/freebsd32_proto.h>
 #include <compat/freebsd32/freebsd32_syscall.h>
 #include <compat/ia32/ia32_signal.h>
 #endif
 
 static int	exec_aout_imgact(struct image_params *imgp);
 static int	aout_fixup(register_t **stack_base, struct image_params *imgp);
 
 #if defined(__i386__)
 struct sysentvec aout_sysvec = {
 	.sv_size	= SYS_MAXSYSCALL,
 	.sv_table	= sysent,
 	.sv_mask	= 0,
 	.sv_errsize	= 0,
 	.sv_errtbl	= NULL,
 	.sv_transtrap	= NULL,
 	.sv_fixup	= aout_fixup,
 	.sv_sendsig	= sendsig,
 	.sv_sigcode	= sigcode,
 	.sv_szsigcode	= &szsigcode,
 	.sv_name	= "FreeBSD a.out",
 	.sv_coredump	= NULL,
 	.sv_imgact_try	= NULL,
 	.sv_minsigstksz	= MINSIGSTKSZ,
 	.sv_pagesize	= PAGE_SIZE,
 	.sv_minuser	= VM_MIN_ADDRESS,
 	.sv_maxuser	= VM_MAXUSER_ADDRESS,
 	.sv_usrstack	= USRSTACK,
 	.sv_psstrings	= PS_STRINGS,
 	.sv_stackprot	= VM_PROT_ALL,
 	.sv_copyout_strings	= exec_copyout_strings,
 	.sv_setregs	= exec_setregs,
 	.sv_fixlimit	= NULL,
 	.sv_maxssiz	= NULL,
 	.sv_flags	= SV_ABI_FREEBSD | SV_AOUT | SV_IA32 | SV_ILP32,
 	.sv_set_syscall_retval = cpu_set_syscall_retval,
 	.sv_fetch_syscall_args = cpu_fetch_syscall_args,
 	.sv_syscallnames = syscallnames,
 	.sv_schedtail	= NULL,
 	.sv_thread_detach = NULL,
 	.sv_trap	= NULL,
 };
 
 #elif defined(__amd64__)
 
 #define	AOUT32_USRSTACK	0xbfc00000
 #define	AOUT32_PS_STRINGS \
     (AOUT32_USRSTACK - sizeof(struct freebsd32_ps_strings))
 #define	AOUT32_MINUSER	FREEBSD32_MINUSER
 
 extern const char *freebsd32_syscallnames[];
 extern u_long ia32_maxssiz;
 
 struct sysentvec aout_sysvec = {
 	.sv_size	= FREEBSD32_SYS_MAXSYSCALL,
 	.sv_table	= freebsd32_sysent,
 	.sv_mask	= 0,
 	.sv_errsize	= 0,
 	.sv_errtbl	= NULL,
 	.sv_transtrap	= NULL,
 	.sv_fixup	= aout_fixup,
 	.sv_sendsig	= ia32_sendsig,
 	.sv_sigcode	= ia32_sigcode,
 	.sv_szsigcode	= &sz_ia32_sigcode,
 	.sv_name	= "FreeBSD a.out",
 	.sv_coredump	= NULL,
 	.sv_imgact_try	= NULL,
 	.sv_minsigstksz	= MINSIGSTKSZ,
 	.sv_pagesize	= IA32_PAGE_SIZE,
 	.sv_minuser	= AOUT32_MINUSER,
 	.sv_maxuser	= AOUT32_USRSTACK,
 	.sv_usrstack	= AOUT32_USRSTACK,
 	.sv_psstrings	= AOUT32_PS_STRINGS,
 	.sv_stackprot	= VM_PROT_ALL,
 	.sv_copyout_strings	= freebsd32_copyout_strings,
 	.sv_setregs	= ia32_setregs,
 	.sv_fixlimit	= ia32_fixlimit,
 	.sv_maxssiz	= &ia32_maxssiz,
 	.sv_flags	= SV_ABI_FREEBSD | SV_AOUT | SV_IA32 | SV_ILP32,
 	.sv_set_syscall_retval = ia32_set_syscall_retval,
 	.sv_fetch_syscall_args = ia32_fetch_syscall_args,
 	.sv_syscallnames = freebsd32_syscallnames,
 };
 #else
 #error "Port me"
 #endif
 
 static int
 aout_fixup(register_t **stack_base, struct image_params *imgp)
 {
 
 	*(char **)stack_base -= sizeof(uint32_t);
 	return (suword32(*stack_base, imgp->args->argc));
 }
 
 static int
 exec_aout_imgact(struct image_params *imgp)
 {
 	const struct exec *a_out = (const struct exec *) imgp->image_header;
 	struct vmspace *vmspace;
 	vm_map_t map;
 	vm_object_t object;
 	vm_offset_t text_end, data_end;
 	unsigned long virtual_offset;
 	unsigned long file_offset;
 	unsigned long bss_size;
 	int error;
 
 	/*
 	 * Linux and *BSD binaries look very much alike,
 	 * only the machine id is different:
 	 * 0x64 for Linux, 0x86 for *BSD, 0x00 for BSDI.
 	 * NetBSD is in network byte order.. ugh.
 	 */
 	if (((a_out->a_midmag >> 16) & 0xff) != 0x86 &&
 	    ((a_out->a_midmag >> 16) & 0xff) != 0 &&
 	    ((((int)ntohl(a_out->a_midmag)) >> 16) & 0xff) != 0x86)
                 return -1;
 
 	/*
 	 * Set file/virtual offset based on a.out variant.
 	 *	We do two cases: host byte order and network byte order
 	 *	(for NetBSD compatibility)
 	 */
 	switch ((int)(a_out->a_midmag & 0xffff)) {
 	case ZMAGIC:
 		virtual_offset = 0;
 		if (a_out->a_text) {
 			file_offset = PAGE_SIZE;
 		} else {
 			/* Bill's "screwball mode" */
 			file_offset = 0;
 		}
 		break;
 	case QMAGIC:
 		virtual_offset = PAGE_SIZE;
 		file_offset = 0;
 		/* Pass PS_STRINGS for BSD/OS binaries only. */
 		if (N_GETMID(*a_out) == MID_ZERO)
 			imgp->ps_strings = aout_sysvec.sv_psstrings;
 		break;
 	default:
 		/* NetBSD compatibility */
 		switch ((int)(ntohl(a_out->a_midmag) & 0xffff)) {
 		case ZMAGIC:
 		case QMAGIC:
 			virtual_offset = PAGE_SIZE;
 			file_offset = 0;
 			break;
 		default:
 			return (-1);
 		}
 	}
 
 	bss_size = roundup(a_out->a_bss, PAGE_SIZE);
 
 	/*
 	 * Check various fields in header for validity/bounds.
 	 */
 	if (/* entry point must lay with text region */
 	    a_out->a_entry < virtual_offset ||
 	    a_out->a_entry >= virtual_offset + a_out->a_text ||
 
 	    /* text and data size must each be page rounded */
 	    a_out->a_text & PAGE_MASK || a_out->a_data & PAGE_MASK
 
 #ifdef __amd64__
 	    ||
 	    /* overflows */
 	    virtual_offset + a_out->a_text + a_out->a_data + bss_size > UINT_MAX
 #endif
 	    )
 		return (-1);
 
 	/* text + data can't exceed file size */
 	if (a_out->a_data + a_out->a_text > imgp->attr->va_size)
 		return (EFAULT);
 
 	/*
 	 * text/data/bss must not exceed limits
 	 */
 	PROC_LOCK(imgp->proc);
 	if (/* text can't exceed maximum text size */
 	    a_out->a_text > maxtsiz ||
 
 	    /* data + bss can't exceed rlimit */
 	    a_out->a_data + bss_size > lim_cur_proc(imgp->proc, RLIMIT_DATA) ||
 	    racct_set(imgp->proc, RACCT_DATA, a_out->a_data + bss_size) != 0) {
 			PROC_UNLOCK(imgp->proc);
 			return (ENOMEM);
 	}
 	PROC_UNLOCK(imgp->proc);
 
 	/*
 	 * Avoid a possible deadlock if the current address space is destroyed
 	 * and that address space maps the locked vnode.  In the common case,
 	 * the locked vnode's v_usecount is decremented but remains greater
 	 * than zero.  Consequently, the vnode lock is not needed by vrele().
 	 * However, in cases where the vnode lock is external, such as nullfs,
 	 * v_usecount may become zero.
 	 */
 	VOP_UNLOCK(imgp->vp, 0);
 
 	/*
 	 * Destroy old process VM and create a new one (with a new stack)
 	 */
 	error = exec_new_vmspace(imgp, &aout_sysvec);
 
 	vn_lock(imgp->vp, LK_EXCLUSIVE | LK_RETRY);
 	if (error)
 		return (error);
 
 	/*
 	 * The vm space can be changed by exec_new_vmspace
 	 */
 	vmspace = imgp->proc->p_vmspace;
 
 	object = imgp->object;
 	map = &vmspace->vm_map;
 	vm_map_lock(map);
 	vm_object_reference(object);
 
 	text_end = virtual_offset + a_out->a_text;
 	error = vm_map_insert(map, object,
 		file_offset,
 		virtual_offset, text_end,
 		VM_PROT_READ | VM_PROT_EXECUTE, VM_PROT_ALL,
 		MAP_COPY_ON_WRITE | MAP_PREFAULT);
 	if (error) {
 		vm_map_unlock(map);
 		vm_object_deallocate(object);
 		return (error);
 	}
 	data_end = text_end + a_out->a_data;
 	if (a_out->a_data) {
 		vm_object_reference(object);
 		error = vm_map_insert(map, object,
 			file_offset + a_out->a_text,
 			text_end, data_end,
 			VM_PROT_ALL, VM_PROT_ALL,
 			MAP_COPY_ON_WRITE | MAP_PREFAULT);
 		if (error) {
 			vm_map_unlock(map);
 			vm_object_deallocate(object);
 			return (error);
 		}
 	}
 
 	if (bss_size) {
 		error = vm_map_insert(map, NULL, 0,
 			data_end, data_end + bss_size,
 			VM_PROT_ALL, VM_PROT_ALL, 0);
 		if (error) {
 			vm_map_unlock(map);
 			return (error);
 		}
 	}
 	vm_map_unlock(map);
 
 	/* Fill in process VM information */
 	vmspace->vm_tsize = a_out->a_text >> PAGE_SHIFT;
 	vmspace->vm_dsize = (a_out->a_data + bss_size) >> PAGE_SHIFT;
 	vmspace->vm_taddr = (caddr_t) (uintptr_t) virtual_offset;
 	vmspace->vm_daddr = (caddr_t) (uintptr_t)
 			    (virtual_offset + a_out->a_text);
 
 	/* Fill in image_params */
 	imgp->interpreted = 0;
 	imgp->entry_addr = a_out->a_entry;
 
 	imgp->proc->p_sysent = &aout_sysvec;
 
 	return (0);
 }
 
 /*
  * Tell kern_execve.c about it, with a little help from the linker.
  */
 static struct execsw aout_execsw = { exec_aout_imgact, "a.out" };
 EXEC_SET(aout, aout_execsw);
+
+#if defined(__i386__) && defined(COMPAT_43)
+static void
+exec_init_lcall(void *arg __unused)
+{
+	struct segment_descriptor *gdp;
+	u_int lcall_addr;
+
+	gdp = &ldt[LSYS5CALLS_SEL].sd;
+	lcall_addr = aout_sysvec.sv_psstrings - szlcallcode;
+	gdp->sd_hibase = lcall_addr >> 24;
+	gdp->sd_lobase = lcall_addr;
+}
+SYSINIT(aout, SI_SUB_EXEC + 1, SI_ORDER_ANY, exec_init_lcall, NULL);
+#endif