Index: head/sys/amd64/amd64/genassym.c
===================================================================
--- head/sys/amd64/amd64/genassym.c	(revision 82308)
+++ head/sys/amd64/amd64/genassym.c	(revision 82309)
@@ -1,206 +1,208 @@
 /*-
  * Copyright (c) 1982, 1990 The Regents of the University of California.
  * All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * William Jolitz.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	from: @(#)genassym.c	5.11 (Berkeley) 5/10/91
  * $FreeBSD$
  */
 
+#include "opt_upages.h"
+
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/assym.h>
 #include <sys/bio.h>
 #include <sys/buf.h>
 #include <sys/proc.h>
 #include <sys/errno.h>
 #include <sys/mount.h>
 #include <sys/mutex.h>
 #include <sys/socket.h>
 #include <sys/resourcevar.h>
 /* XXX */
 #ifdef KTR_PERCPU
 #include <sys/ktr.h>
 #endif
 #include <machine/bootinfo.h>
 #include <machine/tss.h>
 #include <sys/vmmeter.h>
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/pmap.h>
 #include <vm/vm_map.h>
 #include <sys/user.h>
 #include <net/if.h>
 #include <netinet/in.h>
 #include <nfs/nfsv2.h>
 #include <nfs/rpcv2.h>
 #include <nfs/nfs.h>
 #include <nfs/nfsdiskless.h>
 #ifdef SMP
 #include <machine/apic.h>
 #endif
 #include <machine/cpu.h>
 #include <machine/sigframe.h>
 #include <machine/globaldata.h>
 #include <machine/vm86.h>
 
 ASSYM(P_VMSPACE, offsetof(struct proc, p_vmspace));
 ASSYM(VM_PMAP, offsetof(struct vmspace, vm_pmap));
 ASSYM(PM_ACTIVE, offsetof(struct pmap, pm_active));
 ASSYM(P_ADDR, offsetof(struct proc, p_addr));
 ASSYM(P_INTR_NESTING_LEVEL, offsetof(struct proc, p_intr_nesting_level));
 ASSYM(P_SFLAG, offsetof(struct proc, p_sflag));
 ASSYM(P_STAT, offsetof(struct proc, p_stat));
 ASSYM(P_WCHAN, offsetof(struct proc, p_wchan));
 
 ASSYM(PS_ASTPENDING, PS_ASTPENDING);
 ASSYM(PS_NEEDRESCHED, PS_NEEDRESCHED);
 
 ASSYM(SSLEEP, SSLEEP);
 ASSYM(SRUN, SRUN);
 ASSYM(V_TRAP, offsetof(struct vmmeter, v_trap));
 ASSYM(V_SYSCALL, offsetof(struct vmmeter, v_syscall));
 ASSYM(V_INTR, offsetof(struct vmmeter, v_intr));
 ASSYM(UPAGES, UPAGES);
 ASSYM(PAGE_SIZE, PAGE_SIZE);
 ASSYM(NPTEPG, NPTEPG);
 ASSYM(NPDEPG, NPDEPG);
 ASSYM(PDESIZE, PDESIZE);
 ASSYM(PTESIZE, PTESIZE);
 ASSYM(PAGE_SHIFT, PAGE_SHIFT);
 ASSYM(PAGE_MASK, PAGE_MASK);
 ASSYM(PDRSHIFT, PDRSHIFT);
 ASSYM(USRSTACK, USRSTACK);
 ASSYM(VM_MAXUSER_ADDRESS, VM_MAXUSER_ADDRESS);
 ASSYM(KERNBASE, KERNBASE);
 ASSYM(MCLBYTES, MCLBYTES);
 ASSYM(PCB_CR3, offsetof(struct pcb, pcb_cr3));
 ASSYM(PCB_EDI, offsetof(struct pcb, pcb_edi));
 ASSYM(PCB_ESI, offsetof(struct pcb, pcb_esi));
 ASSYM(PCB_EBP, offsetof(struct pcb, pcb_ebp));
 ASSYM(PCB_ESP, offsetof(struct pcb, pcb_esp));
 ASSYM(PCB_EBX, offsetof(struct pcb, pcb_ebx));
 ASSYM(PCB_EIP, offsetof(struct pcb, pcb_eip));
 ASSYM(TSS_ESP0, offsetof(struct i386tss, tss_esp0));
 
 ASSYM(PCB_USERLDT, offsetof(struct pcb, pcb_ldt));
 ASSYM(PCB_GS, offsetof(struct pcb, pcb_gs));
 ASSYM(PCB_DR0, offsetof(struct pcb, pcb_dr0));
 ASSYM(PCB_DR1, offsetof(struct pcb, pcb_dr1));
 ASSYM(PCB_DR2, offsetof(struct pcb, pcb_dr2));
 ASSYM(PCB_DR3, offsetof(struct pcb, pcb_dr3));
 ASSYM(PCB_DR6, offsetof(struct pcb, pcb_dr6));
 ASSYM(PCB_DR7, offsetof(struct pcb, pcb_dr7));
 ASSYM(PCB_DBREGS, PCB_DBREGS);
 ASSYM(PCB_EXT, offsetof(struct pcb, pcb_ext));
 
 ASSYM(PCB_SPARE, offsetof(struct pcb, __pcb_spare));
 ASSYM(PCB_FLAGS, offsetof(struct pcb, pcb_flags));
 ASSYM(PCB_SAVEFPU, offsetof(struct pcb, pcb_save));
 ASSYM(PCB_SAVEFPU_SIZE, sizeof(union savefpu));
 ASSYM(PCB_SAVE87_SIZE, sizeof(struct save87));
 ASSYM(PCB_ONFAULT, offsetof(struct pcb, pcb_onfault));
 
 #ifdef SMP
 ASSYM(PCB_SIZE, sizeof(struct pcb));
 #endif
 
 ASSYM(TF_TRAPNO, offsetof(struct trapframe, tf_trapno));
 ASSYM(TF_ERR, offsetof(struct trapframe, tf_err));
 ASSYM(TF_CS, offsetof(struct trapframe, tf_cs));
 ASSYM(TF_EFLAGS, offsetof(struct trapframe, tf_eflags));
 ASSYM(SIGF_HANDLER, offsetof(struct sigframe, sf_ahu.sf_handler));
 ASSYM(SIGF_SC, offsetof(struct osigframe, sf_siginfo.si_sc));
 ASSYM(SIGF_UC, offsetof(struct sigframe, sf_uc));
 ASSYM(SC_PS, offsetof(struct osigcontext, sc_ps));
 ASSYM(SC_FS, offsetof(struct osigcontext, sc_fs));
 ASSYM(SC_GS, offsetof(struct osigcontext, sc_gs));
 ASSYM(SC_TRAPNO, offsetof(struct osigcontext, sc_trapno));
 ASSYM(UC_EFLAGS, offsetof(ucontext_t, uc_mcontext.mc_eflags));
 ASSYM(UC_GS, offsetof(ucontext_t, uc_mcontext.mc_gs));
 ASSYM(ENOENT, ENOENT);
 ASSYM(EFAULT, EFAULT);
 ASSYM(ENAMETOOLONG, ENAMETOOLONG);
 ASSYM(MAXPATHLEN, MAXPATHLEN);
 ASSYM(BOOTINFO_SIZE, sizeof(struct bootinfo));
 ASSYM(BI_VERSION, offsetof(struct bootinfo, bi_version));
 ASSYM(BI_KERNELNAME, offsetof(struct bootinfo, bi_kernelname));
 ASSYM(BI_NFS_DISKLESS, offsetof(struct bootinfo, bi_nfs_diskless));
 ASSYM(BI_ENDCOMMON, offsetof(struct bootinfo, bi_endcommon));
 ASSYM(NFSDISKLESS_SIZE, sizeof(struct nfs_diskless));
 ASSYM(BI_SIZE, offsetof(struct bootinfo, bi_size));
 ASSYM(BI_SYMTAB, offsetof(struct bootinfo, bi_symtab));
 ASSYM(BI_ESYMTAB, offsetof(struct bootinfo, bi_esymtab));
 ASSYM(BI_KERNEND, offsetof(struct bootinfo, bi_kernend));
 ASSYM(GD_SIZEOF, sizeof(struct globaldata));
 ASSYM(GD_PRVSPACE, offsetof(struct globaldata, gd_prvspace));
 ASSYM(GD_CURPROC, offsetof(struct globaldata, gd_curproc));
 ASSYM(GD_NPXPROC, offsetof(struct globaldata, gd_npxproc));
 ASSYM(GD_IDLEPROC, offsetof(struct globaldata, gd_idleproc));
 ASSYM(GD_CURPCB, offsetof(struct globaldata, gd_curpcb));
 ASSYM(GD_COMMON_TSS, offsetof(struct globaldata, gd_common_tss));
 ASSYM(GD_SWITCHTIME, offsetof(struct globaldata, gd_switchtime));
 ASSYM(GD_SWITCHTICKS, offsetof(struct globaldata, gd_switchticks));
 ASSYM(GD_COMMON_TSSD, offsetof(struct globaldata, gd_common_tssd));
 ASSYM(GD_TSS_GDT, offsetof(struct globaldata, gd_tss_gdt));
 ASSYM(GD_CURRENTLDT, offsetof(struct globaldata, gd_currentldt));
 
 /* XXX */
 #ifdef KTR_PERCPU
 ASSYM(GD_KTR_IDX, offsetof(struct globaldata, gd_ktr_idx));
 ASSYM(GD_KTR_BUF, offsetof(struct globaldata, gd_ktr_buf));
 ASSYM(GD_KTR_BUF_DATA, offsetof(struct globaldata, gd_ktr_buf_data));
 #endif
 
 ASSYM(GD_CPUID, offsetof(struct globaldata, gd_cpuid));
 
 #ifdef SMP
 ASSYM(LA_VER, offsetof(struct LAPIC, version));
 ASSYM(LA_TPR, offsetof(struct LAPIC, tpr));
 ASSYM(LA_EOI, offsetof(struct LAPIC, eoi));
 ASSYM(LA_SVR, offsetof(struct LAPIC, svr));
 ASSYM(LA_ICR_LO, offsetof(struct LAPIC, icr_lo));
 ASSYM(LA_ICR_HI, offsetof(struct LAPIC, icr_hi));
 #endif
 
 ASSYM(KCSEL, GSEL(GCODE_SEL, SEL_KPL));
 ASSYM(KDSEL, GSEL(GDATA_SEL, SEL_KPL));
 ASSYM(KPSEL, GSEL(GPRIV_SEL, SEL_KPL));
 
 ASSYM(BC32SEL, GSEL(GBIOSCODE32_SEL, SEL_KPL));
 ASSYM(GPROC0_SEL, GPROC0_SEL);
 ASSYM(VM86_FRAMESIZE, sizeof(struct vm86frame));
 
 ASSYM(MTX_LOCK, offsetof(struct mtx, mtx_lock));
 ASSYM(MTX_RECURSECNT, offsetof(struct mtx, mtx_recurse));
 ASSYM(MTX_SAVECRIT, offsetof(struct mtx, mtx_savecrit));
Index: head/sys/amd64/amd64/machdep.c
===================================================================
--- head/sys/amd64/amd64/machdep.c	(revision 82308)
+++ head/sys/amd64/amd64/machdep.c	(revision 82309)
@@ -1,2530 +1,2534 @@
 /*-
  * Copyright (c) 1992 Terrence R. Lambert.
  * Copyright (c) 1982, 1987, 1990 The Regents of the University of California.
  * All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * William Jolitz.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	from: @(#)machdep.c	7.4 (Berkeley) 6/3/91
  * $FreeBSD$
  */
 
 #include "opt_atalk.h"
 #include "opt_compat.h"
 #include "opt_cpu.h"
 #include "opt_ddb.h"
 #include "opt_inet.h"
 #include "opt_ipx.h"
 #include "opt_isa.h"
 #include "opt_maxmem.h"
 #include "opt_msgbuf.h"
 #include "opt_npx.h"
 #include "opt_perfmon.h"
+#include "opt_upages.h"
 /* #include "opt_userconfig.h" */
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/sysproto.h>
 #include <sys/signalvar.h>
 #include <sys/kernel.h>
 #include <sys/ktr.h>
 #include <sys/linker.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mutex.h>
 #include <sys/pcpu.h>
 #include <sys/proc.h>
 #include <sys/bio.h>
 #include <sys/buf.h>
 #include <sys/reboot.h>
 #include <sys/smp.h>
 #include <sys/callout.h>
 #include <sys/msgbuf.h>
 #include <sys/sysent.h>
 #include <sys/sysctl.h>
 #include <sys/vmmeter.h>
 #include <sys/bus.h>
 #include <sys/eventhandler.h>
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <sys/lock.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_object.h>
 #include <vm/vm_page.h>
 #include <vm/vm_map.h>
 #include <vm/vm_pager.h>
 #include <vm/vm_extern.h>
 
 #include <sys/user.h>
 #include <sys/exec.h>
 #include <sys/cons.h>
 
 #include <ddb/ddb.h>
 
 #include <net/netisr.h>
 
 #include <machine/cpu.h>
 #include <machine/cputypes.h>
 #include <machine/reg.h>
 #include <machine/clock.h>
 #include <machine/specialreg.h>
 #include <machine/bootinfo.h>
 #include <machine/md_var.h>
 #include <machine/pc/bios.h>
 #include <machine/pcb_ext.h>		/* pcb.h included via sys/user.h */
 #include <machine/globals.h>
 #ifdef PERFMON
 #include <machine/perfmon.h>
+#endif
+#ifdef SMP
+#include <machine/privatespace.h>
 #endif
 
 #include <i386/isa/icu.h>
 #include <i386/isa/intr_machdep.h>
 #include <isa/rtc.h>
 #include <machine/vm86.h>
 #include <sys/ptrace.h>
 #include <machine/sigframe.h>
 
 extern void init386 __P((int first));
 extern void dblfault_handler __P((void));
 
 extern void printcpuinfo(void);	/* XXX header file */
 extern void earlysetcpuclass(void);	/* same header file */
 extern void finishidentcpu(void);
 extern void panicifcpuunsupported(void);
 extern void initializecpu(void);
 
 #define	CS_SECURE(cs)		(ISPL(cs) == SEL_UPL)
 #define	EFL_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
 
 static void cpu_startup __P((void *));
 #ifdef CPU_ENABLE_SSE
 static void set_fpregs_xmm __P((struct save87 *, struct savexmm *));
 static void fill_fpregs_xmm __P((struct savexmm *, struct save87 *));
 #endif /* CPU_ENABLE_SSE */
 SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL)
 
 int	_udatasel, _ucodesel;
 u_int	atdevbase;
 
 #if defined(SWTCH_OPTIM_STATS)
 extern int swtch_optim_stats;
 SYSCTL_INT(_debug, OID_AUTO, swtch_optim_stats,
 	CTLFLAG_RD, &swtch_optim_stats, 0, "");
 SYSCTL_INT(_debug, OID_AUTO, tlb_flush_count,
 	CTLFLAG_RD, &tlb_flush_count, 0, "");
 #endif
 
 #ifdef PC98
 static int	ispc98 = 1;
 #else
 static int	ispc98 = 0;
 #endif
 SYSCTL_INT(_machdep, OID_AUTO, ispc98, CTLFLAG_RD, &ispc98, 0, "");
 
 int physmem = 0;
 int cold = 1;
 
 #ifdef COMPAT_43
 static void osendsig __P((sig_t catcher, int sig, sigset_t *mask, u_long code));
 #endif
 
 static int
 sysctl_hw_physmem(SYSCTL_HANDLER_ARGS)
 {
 	int error = sysctl_handle_int(oidp, 0, ctob(physmem), req);
 	return (error);
 }
 
 SYSCTL_PROC(_hw, HW_PHYSMEM, physmem, CTLTYPE_INT|CTLFLAG_RD,
 	0, 0, sysctl_hw_physmem, "IU", "");
 
 static int
 sysctl_hw_usermem(SYSCTL_HANDLER_ARGS)
 {
 	int error = sysctl_handle_int(oidp, 0,
 		ctob(physmem - cnt.v_wire_count), req);
 	return (error);
 }
 
 SYSCTL_PROC(_hw, HW_USERMEM, usermem, CTLTYPE_INT|CTLFLAG_RD,
 	0, 0, sysctl_hw_usermem, "IU", "");
 
 static int
 sysctl_hw_availpages(SYSCTL_HANDLER_ARGS)
 {
 	int error = sysctl_handle_int(oidp, 0,
 		i386_btop(avail_end - avail_start), req);
 	return (error);
 }
 
 SYSCTL_PROC(_hw, OID_AUTO, availpages, CTLTYPE_INT|CTLFLAG_RD,
 	0, 0, sysctl_hw_availpages, "I", "");
 
 int Maxmem = 0;
 long dumplo;
 
 vm_offset_t phys_avail[10];
 
 /* must be 2 less so 0 0 can signal end of chunks */
 #define PHYS_AVAIL_ARRAY_END ((sizeof(phys_avail) / sizeof(vm_offset_t)) - 2)
 
 struct kva_md_info kmi;
 
 static struct trapframe proc0_tf;
 #ifndef SMP
 static struct globaldata __globaldata;
 #endif
 
 struct mtx sched_lock;
 struct mtx Giant;
 
 static void
 cpu_startup(dummy)
 	void *dummy;
 {
 	/*
 	 * Good {morning,afternoon,evening,night}.
 	 */
 	earlysetcpuclass();
 	startrtclock();
 	printcpuinfo();
 	panicifcpuunsupported();
 #ifdef PERFMON
 	perfmon_init();
 #endif
 	printf("real memory  = %u (%uK bytes)\n", ptoa(Maxmem),
 	    ptoa(Maxmem) / 1024);
 	/*
 	 * Display any holes after the first chunk of extended memory.
 	 */
 	if (bootverbose) {
 		int indx;
 
 		printf("Physical memory chunk(s):\n");
 		for (indx = 0; phys_avail[indx + 1] != 0; indx += 2) {
 			unsigned int size1;
 
 			size1 = phys_avail[indx + 1] - phys_avail[indx];
 			printf("0x%08x - 0x%08x, %u bytes (%u pages)\n",
 			    phys_avail[indx], phys_avail[indx + 1] - 1, size1,
 			    size1 / PAGE_SIZE);
 		}
 	}
 
 	vm_ksubmap_init(&kmi);
 
 #if 0
 	/*
 	 * Calculate callout wheel size
 	 */
 	for (callwheelsize = 1, callwheelbits = 0;
 	     callwheelsize < ncallout;
 	     callwheelsize <<= 1, ++callwheelbits)
 		;
 	callwheelmask = callwheelsize - 1;
 
 	/*
 	 * Allocate space for system data structures.
 	 * The first available kernel virtual address is in "v".
 	 * As pages of kernel virtual memory are allocated, "v" is incremented.
 	 * As pages of memory are allocated and cleared,
 	 * "firstaddr" is incremented.
 	 * An index into the kernel page table corresponding to the
 	 * virtual memory address maintained in "v" is kept in "mapaddr".
 	 */
 
 	/*
 	 * Make two passes.  The first pass calculates how much memory is
 	 * needed and allocates it.  The second pass assigns virtual
 	 * addresses to the various data structures.
 	 */
 	firstaddr = 0;
 again:
 	v = (caddr_t)firstaddr;
 
 #define	valloc(name, type, num) \
 	    (name) = (type *)v; v = (caddr_t)((name)+(num))
 #define	valloclim(name, type, num, lim) \
 	    (name) = (type *)v; v = (caddr_t)((lim) = ((name)+(num)))
 
 	valloc(callout, struct callout, ncallout);
 	valloc(callwheel, struct callout_tailq, callwheelsize);
 
 	/*
 	 * Discount the physical memory larger than the size of kernel_map
 	 * to avoid eating up all of KVA space.
 	 */
 	if (kernel_map->first_free == NULL) {
 		printf("Warning: no free entries in kernel_map.\n");
 		physmem_est = physmem;
 	} else {
 		physmem_est = min(physmem, btoc(kernel_map->max_offset -
 		    kernel_map->min_offset));
 	}
 
 	/*
 	 * The nominal buffer size (and minimum KVA allocation) is BKVASIZE.
 	 * For the first 64MB of ram nominally allocate sufficient buffers to
 	 * cover 1/4 of our ram.  Beyond the first 64MB allocate additional
 	 * buffers to cover 1/20 of our ram over 64MB.  When auto-sizing
 	 * the buffer cache we limit the eventual kva reservation to
 	 * maxbcache bytes.
 	 *
 	 * factor represents the 1/4 x ram conversion.
 	 */
 	if (nbuf == 0) {
 		int factor = 4 * BKVASIZE / PAGE_SIZE;
 
 		nbuf = 50;
 		if (physmem_est > 1024)
 			nbuf += min((physmem_est - 1024) / factor,
 			    16384 / factor);
 		if (physmem_est > 16384)
 			nbuf += (physmem_est - 16384) * 2 / (factor * 5);
 
 		if (maxbcache && nbuf > maxbcache / BKVASIZE)
 			nbuf = maxbcache / BKVASIZE;
 	}
 
 	/*
 	 * Do not allow the buffer_map to be more then 1/2 the size of the
 	 * kernel_map.
 	 */
 	if (nbuf > (kernel_map->max_offset - kernel_map->min_offset) / 
 	    (BKVASIZE * 2)) {
 		nbuf = (kernel_map->max_offset - kernel_map->min_offset) / 
 		    (BKVASIZE * 2);
 		printf("Warning: nbufs capped at %d\n", nbuf);
 	}
 
 	nswbuf = max(min(nbuf/4, 256), 16);
 
 	valloc(swbuf, struct buf, nswbuf);
 	valloc(buf, struct buf, nbuf);
 	v = bufhashinit(v);
 
 	/*
 	 * End of first pass, size has been calculated so allocate memory
 	 */
 	if (firstaddr == 0) {
 		size = (vm_size_t)(v - firstaddr);
 		firstaddr = (int)kmem_alloc(kernel_map, round_page(size));
 		if (firstaddr == 0)
 			panic("startup: no room for tables");
 		goto again;
 	}
 
 	/*
 	 * End of second pass, addresses have been assigned
 	 */
 	if ((vm_size_t)(v - firstaddr) != size)
 		panic("startup: table size inconsistency");
 
 	clean_map = kmem_suballoc(kernel_map, &clean_sva, &clean_eva,
 			(nbuf*BKVASIZE) + (nswbuf*MAXPHYS) + pager_map_size);
 	buffer_map = kmem_suballoc(clean_map, &buffer_sva, &buffer_eva,
 				(nbuf*BKVASIZE));
 	buffer_map->system_map = 1;
 	pager_map = kmem_suballoc(clean_map, &pager_sva, &pager_eva,
 				(nswbuf*MAXPHYS) + pager_map_size);
 	pager_map->system_map = 1;
 	exec_map = kmem_suballoc(kernel_map, &minaddr, &maxaddr,
 				(16*(ARG_MAX+(PAGE_SIZE*3))));
 
 	/*
 	 * XXX: Mbuf system machine-specific initializations should
 	 *      go here, if anywhere.
 	 */
 
 	/*
 	 * Initialize callouts
 	 */
 	SLIST_INIT(&callfree);
 	for (i = 0; i < ncallout; i++) {
 		callout_init(&callout[i], 0);
 		callout[i].c_flags = CALLOUT_LOCAL_ALLOC;
 		SLIST_INSERT_HEAD(&callfree, &callout[i], c_links.sle);
 	}
 
 	for (i = 0; i < callwheelsize; i++) {
 		TAILQ_INIT(&callwheel[i]);
 	}
 
 	mtx_init(&callout_lock, "callout", MTX_SPIN | MTX_RECURSE);
 #endif
 
 #if defined(USERCONFIG)
 	userconfig();
 	cninit();		/* the preferred console may have changed */
 #endif
 
 	printf("avail memory = %u (%uK bytes)\n", ptoa(cnt.v_free_count),
 	    ptoa(cnt.v_free_count) / 1024);
 
 	/*
 	 * Set up buffers, so they can be used to read disk labels.
 	 */
 	bufinit();
 	vm_pager_bufferinit();
 
 	globaldata_register(GLOBALDATA);
 #ifndef SMP
 	/* For SMP, we delay the cpu_setregs() until after SMP startup. */
 	cpu_setregs();
 #endif
 }
 
 /*
  * Send an interrupt to process.
  *
  * Stack is set up to allow sigcode stored
  * at top to call routine, followed by kcall
  * to sigreturn routine below.  After sigreturn
  * resets the signal mask, the stack, and the
  * frame pointer, it returns to the user
  * specified pc, psl.
  */
 #ifdef COMPAT_43
 static void
 osendsig(catcher, sig, mask, code)
 	sig_t catcher;
 	int sig;
 	sigset_t *mask;
 	u_long code;
 {
 	struct osigframe sf;
 	struct osigframe *fp;
 	struct proc *p;
 	struct sigacts *psp;
 	struct trapframe *regs;
 	int oonstack;
 
 	p = curproc;
 	PROC_LOCK(p);
 	psp = p->p_sigacts;
 	regs = p->p_frame;
 	oonstack = sigonstack(regs->tf_esp);
 
 	/* Allocate and validate space for the signal handler context. */
 	if ((p->p_flag & P_ALTSTACK) && !oonstack &&
 	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
 		fp = (struct osigframe *)(p->p_sigstk.ss_sp +
 		    p->p_sigstk.ss_size - sizeof(struct osigframe));
 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
 		p->p_sigstk.ss_flags |= SS_ONSTACK;
 #endif
 	} else
 		fp = (struct osigframe *)regs->tf_esp - 1;
 	PROC_UNLOCK(p);
 
 	/*
 	 * grow_stack() will return 0 if *fp does not fit inside the stack
 	 * and the stack can not be grown.
 	 * useracc() will return FALSE if access is denied.
 	 */
 	if (grow_stack(p, (int)fp) == 0 ||
 	    !useracc((caddr_t)fp, sizeof(*fp), VM_PROT_WRITE)) {
 		/*
 		 * Process has trashed its stack; give it an illegal
 		 * instruction to halt it in its tracks.
 		 */
 		PROC_LOCK(p);
 		SIGACTION(p, SIGILL) = SIG_DFL;
 		SIGDELSET(p->p_sigignore, SIGILL);
 		SIGDELSET(p->p_sigcatch, SIGILL);
 		SIGDELSET(p->p_sigmask, SIGILL);
 		psignal(p, SIGILL);
 		PROC_UNLOCK(p);
 		return;
 	}
 
 	/* Translate the signal if appropriate. */
 	if (p->p_sysent->sv_sigtbl && sig <= p->p_sysent->sv_sigsize)
 		sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
 
 	/* Build the argument list for the signal handler. */
 	sf.sf_signum = sig;
 	sf.sf_scp = (register_t)&fp->sf_siginfo.si_sc;
 	PROC_LOCK(p);
 	if (SIGISMEMBER(p->p_sigacts->ps_siginfo, sig)) {
 		/* Signal handler installed with SA_SIGINFO. */
 		sf.sf_arg2 = (register_t)&fp->sf_siginfo;
 		sf.sf_siginfo.si_signo = sig;
 		sf.sf_siginfo.si_code = code;
 		sf.sf_ahu.sf_action = (__osiginfohandler_t *)catcher;
 	} else {
 		/* Old FreeBSD-style arguments. */
 		sf.sf_arg2 = code;
 		sf.sf_addr = regs->tf_err;
 		sf.sf_ahu.sf_handler = catcher;
 	}
 	PROC_UNLOCK(p);
 
 	/* Save most if not all of trap frame. */
 	sf.sf_siginfo.si_sc.sc_eax = regs->tf_eax;
 	sf.sf_siginfo.si_sc.sc_ebx = regs->tf_ebx;
 	sf.sf_siginfo.si_sc.sc_ecx = regs->tf_ecx;
 	sf.sf_siginfo.si_sc.sc_edx = regs->tf_edx;
 	sf.sf_siginfo.si_sc.sc_esi = regs->tf_esi;
 	sf.sf_siginfo.si_sc.sc_edi = regs->tf_edi;
 	sf.sf_siginfo.si_sc.sc_cs = regs->tf_cs;
 	sf.sf_siginfo.si_sc.sc_ds = regs->tf_ds;
 	sf.sf_siginfo.si_sc.sc_ss = regs->tf_ss;
 	sf.sf_siginfo.si_sc.sc_es = regs->tf_es;
 	sf.sf_siginfo.si_sc.sc_fs = regs->tf_fs;
 	sf.sf_siginfo.si_sc.sc_gs = rgs();
 	sf.sf_siginfo.si_sc.sc_isp = regs->tf_isp;
 
 	/* Build the signal context to be used by osigreturn(). */
 	sf.sf_siginfo.si_sc.sc_onstack = (oonstack) ? 1 : 0;
 	SIG2OSIG(*mask, sf.sf_siginfo.si_sc.sc_mask);
 	sf.sf_siginfo.si_sc.sc_sp = regs->tf_esp;
 	sf.sf_siginfo.si_sc.sc_fp = regs->tf_ebp;
 	sf.sf_siginfo.si_sc.sc_pc = regs->tf_eip;
 	sf.sf_siginfo.si_sc.sc_ps = regs->tf_eflags;
 	sf.sf_siginfo.si_sc.sc_trapno = regs->tf_trapno;
 	sf.sf_siginfo.si_sc.sc_err = regs->tf_err;
 
 	/*
 	 * If we're a vm86 process, we want to save the segment registers.
 	 * We also change eflags to be our emulated eflags, not the actual
 	 * eflags.
 	 */
 	if (regs->tf_eflags & PSL_VM) {
 		/* XXX confusing names: `tf' isn't a trapframe; `regs' is. */
 		struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs;
 		struct vm86_kernel *vm86 = &p->p_addr->u_pcb.pcb_ext->ext_vm86;
 
 		sf.sf_siginfo.si_sc.sc_gs = tf->tf_vm86_gs;
 		sf.sf_siginfo.si_sc.sc_fs = tf->tf_vm86_fs;
 		sf.sf_siginfo.si_sc.sc_es = tf->tf_vm86_es;
 		sf.sf_siginfo.si_sc.sc_ds = tf->tf_vm86_ds;
 
 		if (vm86->vm86_has_vme == 0)
 			sf.sf_siginfo.si_sc.sc_ps =
 			    (tf->tf_eflags & ~(PSL_VIF | PSL_VIP)) |
 			    (vm86->vm86_eflags & (PSL_VIF | PSL_VIP));
 
 		/* See sendsig() for comments. */
 		tf->tf_eflags &= ~(PSL_VM | PSL_NT | PSL_T | PSL_VIF | PSL_VIP);
 	}
 
 	/* Copy the sigframe out to the user's stack. */
 	if (copyout(&sf, fp, sizeof(*fp)) != 0) {
 		/*
 		 * Something is wrong with the stack pointer.
 		 * ...Kill the process.
 		 */
 		PROC_LOCK(p);
 		sigexit(p, SIGILL);
 		/* NOTREACHED */
 	}
 
 	regs->tf_esp = (int)fp;
 	regs->tf_eip = PS_STRINGS - szosigcode;
 	regs->tf_cs = _ucodesel;
 	regs->tf_ds = _udatasel;
 	regs->tf_es = _udatasel;
 	regs->tf_fs = _udatasel;
 	load_gs(_udatasel);
 	regs->tf_ss = _udatasel;
 }
 #endif
 
 void
 sendsig(catcher, sig, mask, code)
 	sig_t catcher;
 	int sig;
 	sigset_t *mask;
 	u_long code;
 {
 	struct sigframe sf;
 	struct proc *p;
 	struct sigacts *psp;
 	struct trapframe *regs;
 	struct sigframe *sfp;
 	int oonstack;
 
 	p = curproc;
 	PROC_LOCK(p);
 	psp = p->p_sigacts;
 #ifdef COMPAT_43
 	if (SIGISMEMBER(psp->ps_osigset, sig)) {
 		PROC_UNLOCK(p);
 		osendsig(catcher, sig, mask, code);
 		return;
 	}
 #endif
 	regs = p->p_frame;
 	oonstack = sigonstack(regs->tf_esp);
 
 	/* Save user context. */
 	bzero(&sf, sizeof(sf));
 	sf.sf_uc.uc_sigmask = *mask;
 	sf.sf_uc.uc_stack = p->p_sigstk;
 	sf.sf_uc.uc_stack.ss_flags = (p->p_flag & P_ALTSTACK)
 	    ? ((oonstack) ? SS_ONSTACK : 0) : SS_DISABLE;
 	sf.sf_uc.uc_mcontext.mc_onstack = (oonstack) ? 1 : 0;
 	sf.sf_uc.uc_mcontext.mc_gs = rgs();
 	bcopy(regs, &sf.sf_uc.uc_mcontext.mc_fs, sizeof(*regs));
 
 	/* Allocate and validate space for the signal handler context. */
 	if ((p->p_flag & P_ALTSTACK) != 0 && !oonstack &&
 	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
 		sfp = (struct sigframe *)(p->p_sigstk.ss_sp +
 		    p->p_sigstk.ss_size - sizeof(struct sigframe));
 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
 		p->p_sigstk.ss_flags |= SS_ONSTACK;
 #endif
 	} else
 		sfp = (struct sigframe *)regs->tf_esp - 1;
 	PROC_UNLOCK(p);
 
 	/*
 	 * grow_stack() will return 0 if *sfp does not fit inside the stack
 	 * and the stack can not be grown.
 	 * useracc() will return FALSE if access is denied.
 	 */
 	if (grow_stack(p, (int)sfp) == 0 ||
 	    !useracc((caddr_t)sfp, sizeof(*sfp), VM_PROT_WRITE)) {
 		/*
 		 * Process has trashed its stack; give it an illegal
 		 * instruction to halt it in its tracks.
 		 */
 #ifdef DEBUG
 		printf("process %d has trashed its stack\n", p->p_pid);
 #endif
 		PROC_LOCK(p);
 		SIGACTION(p, SIGILL) = SIG_DFL;
 		SIGDELSET(p->p_sigignore, SIGILL);
 		SIGDELSET(p->p_sigcatch, SIGILL);
 		SIGDELSET(p->p_sigmask, SIGILL);
 		psignal(p, SIGILL);
 		PROC_UNLOCK(p);
 		return;
 	}
 
 	/* Translate the signal if appropriate. */
 	if (p->p_sysent->sv_sigtbl && sig <= p->p_sysent->sv_sigsize)
 		sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
 
 	/* Build the argument list for the signal handler. */
 	sf.sf_signum = sig;
 	sf.sf_ucontext = (register_t)&sfp->sf_uc;
 	PROC_LOCK(p);
 	if (SIGISMEMBER(p->p_sigacts->ps_siginfo, sig)) {
 		/* Signal handler installed with SA_SIGINFO. */
 		sf.sf_siginfo = (register_t)&sfp->sf_si;
 		sf.sf_ahu.sf_action = (__siginfohandler_t *)catcher;
 
 		/* Fill siginfo structure. */
 		sf.sf_si.si_signo = sig;
 		sf.sf_si.si_code = code;
 		sf.sf_si.si_addr = (void *)regs->tf_err;
 	} else {
 		/* Old FreeBSD-style arguments. */
 		sf.sf_siginfo = code;
 		sf.sf_addr = regs->tf_err;
 		sf.sf_ahu.sf_handler = catcher;
 	}
 	PROC_UNLOCK(p);
 
 	/*
 	 * If we're a vm86 process, we want to save the segment registers.
 	 * We also change eflags to be our emulated eflags, not the actual
 	 * eflags.
 	 */
 	if (regs->tf_eflags & PSL_VM) {
 		struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs;
 		struct vm86_kernel *vm86 = &p->p_addr->u_pcb.pcb_ext->ext_vm86;
 
 		sf.sf_uc.uc_mcontext.mc_gs = tf->tf_vm86_gs;
 		sf.sf_uc.uc_mcontext.mc_fs = tf->tf_vm86_fs;
 		sf.sf_uc.uc_mcontext.mc_es = tf->tf_vm86_es;
 		sf.sf_uc.uc_mcontext.mc_ds = tf->tf_vm86_ds;
 
 		if (vm86->vm86_has_vme == 0)
 			sf.sf_uc.uc_mcontext.mc_eflags =
 			    (tf->tf_eflags & ~(PSL_VIF | PSL_VIP)) |
 			    (vm86->vm86_eflags & (PSL_VIF | PSL_VIP));
 
 		/*
 		 * We should never have PSL_T set when returning from vm86
 		 * mode.  It may be set here if we deliver a signal before
 		 * getting to vm86 mode, so turn it off.
 		 *
 		 * Clear PSL_NT to inhibit T_TSSFLT faults on return from
 		 * syscalls made by the signal handler.  This just avoids
 		 * wasting time for our lazy fixup of such faults.  PSL_NT
 		 * does nothing in vm86 mode, but vm86 programs can set it
 		 * almost legitimately in probes for old cpu types.
 		 */
 		tf->tf_eflags &= ~(PSL_VM | PSL_NT | PSL_T | PSL_VIF | PSL_VIP);
 	}
 
 	/* Copy the sigframe out to the user's stack. */
 	if (copyout(&sf, sfp, sizeof(*sfp)) != 0) {
 		/*
 		 * Something is wrong with the stack pointer.
 		 * ...Kill the process.
 		 */
 		PROC_LOCK(p);
 		sigexit(p, SIGILL);
 		/* NOTREACHED */
 	}
 
 	regs->tf_esp = (int)sfp;
 	regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode);
 	regs->tf_cs = _ucodesel;
 	regs->tf_ds = _udatasel;
 	regs->tf_es = _udatasel;
 	regs->tf_fs = _udatasel;
 	regs->tf_ss = _udatasel;
 }
 
 /*
  * System call to cleanup state after a signal
  * has been taken.  Reset signal mask and
  * stack state from context left by sendsig (above).
  * Return to previous pc and psl as specified by
  * context left by sendsig. Check carefully to
  * make sure that the user has not modified the
  * state to gain improper privileges.
  */
 #ifdef COMPAT_43
 int
 osigreturn(p, uap)
 	struct proc *p;
 	struct osigreturn_args /* {
 		struct osigcontext *sigcntxp;
 	} */ *uap;
 {
 	struct trapframe *regs;
 	struct osigcontext *scp;
 	int eflags;
 
 	regs = p->p_frame;
 	scp = uap->sigcntxp;
 	if (!useracc((caddr_t)scp, sizeof(*scp), VM_PROT_READ))
 		return (EFAULT);
 	eflags = scp->sc_ps;
 	if (eflags & PSL_VM) {
 		struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs;
 		struct vm86_kernel *vm86;
 
 		/*
 		 * if pcb_ext == 0 or vm86_inited == 0, the user hasn't
 		 * set up the vm86 area, and we can't enter vm86 mode.
 		 */
 		if (p->p_addr->u_pcb.pcb_ext == 0)
 			return (EINVAL);
 		vm86 = &p->p_addr->u_pcb.pcb_ext->ext_vm86;
 		if (vm86->vm86_inited == 0)
 			return (EINVAL);
 
 		/* Go back to user mode if both flags are set. */
 		if ((eflags & PSL_VIP) && (eflags & PSL_VIF))
 			trapsignal(p, SIGBUS, 0);
 
 		if (vm86->vm86_has_vme) {
 			eflags = (tf->tf_eflags & ~VME_USERCHANGE) |
 			    (eflags & VME_USERCHANGE) | PSL_VM;
 		} else {
 			vm86->vm86_eflags = eflags;	/* save VIF, VIP */
 			eflags = (tf->tf_eflags & ~VM_USERCHANGE) |
 			    (eflags & VM_USERCHANGE) | PSL_VM;
 		}
 		tf->tf_vm86_ds = scp->sc_ds;
 		tf->tf_vm86_es = scp->sc_es;
 		tf->tf_vm86_fs = scp->sc_fs;
 		tf->tf_vm86_gs = scp->sc_gs;
 		tf->tf_ds = _udatasel;
 		tf->tf_es = _udatasel;
 		tf->tf_fs = _udatasel;
 	} else {
 		/*
 		 * Don't allow users to change privileged or reserved flags.
 		 */
 		/*
 		 * XXX do allow users to change the privileged flag PSL_RF.
 		 * The cpu sets PSL_RF in tf_eflags for faults.  Debuggers
 		 * should sometimes set it there too.  tf_eflags is kept in
 		 * the signal context during signal handling and there is no
 		 * other place to remember it, so the PSL_RF bit may be
 		 * corrupted by the signal handler without us knowing.
 		 * Corruption of the PSL_RF bit at worst causes one more or
 		 * one less debugger trap, so allowing it is fairly harmless.
 		 */
 		if (!EFL_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) {
 	    		return (EINVAL);
 		}
 
 		/*
 		 * Don't allow users to load a valid privileged %cs.  Let the
 		 * hardware check for invalid selectors, excess privilege in
 		 * other selectors, invalid %eip's and invalid %esp's.
 		 */
 		if (!CS_SECURE(scp->sc_cs)) {
 			trapsignal(p, SIGBUS, T_PROTFLT);
 			return (EINVAL);
 		}
 		regs->tf_ds = scp->sc_ds;
 		regs->tf_es = scp->sc_es;
 		regs->tf_fs = scp->sc_fs;
 	}
 
 	/* Restore remaining registers. */
 	regs->tf_eax = scp->sc_eax;
 	regs->tf_ebx = scp->sc_ebx;
 	regs->tf_ecx = scp->sc_ecx;
 	regs->tf_edx = scp->sc_edx;
 	regs->tf_esi = scp->sc_esi;
 	regs->tf_edi = scp->sc_edi;
 	regs->tf_cs = scp->sc_cs;
 	regs->tf_ss = scp->sc_ss;
 	regs->tf_isp = scp->sc_isp;
 
 	PROC_LOCK(p);
 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
 	if (scp->sc_onstack & 1)
 		p->p_sigstk.ss_flags |= SS_ONSTACK;
 	else
 		p->p_sigstk.ss_flags &= ~SS_ONSTACK;
 #endif
 
 	SIGSETOLD(p->p_sigmask, scp->sc_mask);
 	SIG_CANTMASK(p->p_sigmask);
 	PROC_UNLOCK(p);
 	regs->tf_ebp = scp->sc_fp;
 	regs->tf_esp = scp->sc_sp;
 	regs->tf_eip = scp->sc_pc;
 	regs->tf_eflags = eflags;
 	return (EJUSTRETURN);
 }
 #endif
 
 int
 sigreturn(p, uap)
 	struct proc *p;
 	struct sigreturn_args /* {
 		ucontext_t *sigcntxp;
 	} */ *uap;
 {
 	struct trapframe *regs;
 	ucontext_t *ucp;
 	int cs, eflags;
 
 	ucp = uap->sigcntxp;
 #ifdef COMPAT_43
 	if (!useracc((caddr_t)ucp, sizeof(struct osigcontext), VM_PROT_READ))
 		return (EFAULT);
 	if (((struct osigcontext *)ucp)->sc_trapno == 0x01d516)
 		return (osigreturn(p, (struct osigreturn_args *)uap));
 	/*
 	 * Since ucp is not an osigcontext but a ucontext_t, we have to
 	 * check again if all of it is accessible.  A ucontext_t is
 	 * much larger, so instead of just checking for the pointer
 	 * being valid for the size of an osigcontext, now check for
 	 * it being valid for a whole, new-style ucontext_t.
 	 */
 #endif
 	if (!useracc((caddr_t)ucp, sizeof(*ucp), VM_PROT_READ))
 		return (EFAULT);
 
 	regs = p->p_frame;
 	eflags = ucp->uc_mcontext.mc_eflags;
 	if (eflags & PSL_VM) {
 		struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs;
 		struct vm86_kernel *vm86;
 
 		/*
 		 * if pcb_ext == 0 or vm86_inited == 0, the user hasn't
 		 * set up the vm86 area, and we can't enter vm86 mode.
 		 */
 		if (p->p_addr->u_pcb.pcb_ext == 0)
 			return (EINVAL);
 		vm86 = &p->p_addr->u_pcb.pcb_ext->ext_vm86;
 		if (vm86->vm86_inited == 0)
 			return (EINVAL);
 
 		/* Go back to user mode if both flags are set. */
 		if ((eflags & PSL_VIP) && (eflags & PSL_VIF))
 			trapsignal(p, SIGBUS, 0);
 
 		if (vm86->vm86_has_vme) {
 			eflags = (tf->tf_eflags & ~VME_USERCHANGE) |
 			    (eflags & VME_USERCHANGE) | PSL_VM;
 		} else {
 			vm86->vm86_eflags = eflags;	/* save VIF, VIP */
 			eflags = (tf->tf_eflags & ~VM_USERCHANGE) |
 			    (eflags & VM_USERCHANGE) | PSL_VM;
 		}
 		bcopy(&ucp->uc_mcontext.mc_fs, tf, sizeof(struct trapframe));
 		tf->tf_eflags = eflags;
 		tf->tf_vm86_ds = tf->tf_ds;
 		tf->tf_vm86_es = tf->tf_es;
 		tf->tf_vm86_fs = tf->tf_fs;
 		tf->tf_vm86_gs = ucp->uc_mcontext.mc_gs;
 		tf->tf_ds = _udatasel;
 		tf->tf_es = _udatasel;
 		tf->tf_fs = _udatasel;
 	} else {
 		/*
 		 * Don't allow users to change privileged or reserved flags.
 		 */
 		/*
 		 * XXX do allow users to change the privileged flag PSL_RF.
 		 * The cpu sets PSL_RF in tf_eflags for faults.  Debuggers
 		 * should sometimes set it there too.  tf_eflags is kept in
 		 * the signal context during signal handling and there is no
 		 * other place to remember it, so the PSL_RF bit may be
 		 * corrupted by the signal handler without us knowing.
 		 * Corruption of the PSL_RF bit at worst causes one more or
 		 * one less debugger trap, so allowing it is fairly harmless.
 		 */
 		if (!EFL_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) {
 			printf("sigreturn: eflags = 0x%x\n", eflags);
 	    		return (EINVAL);
 		}
 
 		/*
 		 * Don't allow users to load a valid privileged %cs.  Let the
 		 * hardware check for invalid selectors, excess privilege in
 		 * other selectors, invalid %eip's and invalid %esp's.
 		 */
 		cs = ucp->uc_mcontext.mc_cs;
 		if (!CS_SECURE(cs)) {
 			printf("sigreturn: cs = 0x%x\n", cs);
 			trapsignal(p, SIGBUS, T_PROTFLT);
 			return (EINVAL);
 		}
 
 		bcopy(&ucp->uc_mcontext.mc_fs, regs, sizeof(*regs));
 	}
 
 	PROC_LOCK(p);
 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
 	if (ucp->uc_mcontext.mc_onstack & 1)
 		p->p_sigstk.ss_flags |= SS_ONSTACK;
 	else
 		p->p_sigstk.ss_flags &= ~SS_ONSTACK;
 #endif
 
 	p->p_sigmask = ucp->uc_sigmask;
 	SIG_CANTMASK(p->p_sigmask);
 	PROC_UNLOCK(p);
 	return (EJUSTRETURN);
 }
 
 /*
  * Machine dependent boot() routine
  *
  * I haven't seen anything to put here yet
  * Possibly some stuff might be grafted back here from boot()
  */
 void
 cpu_boot(int howto)
 {
 }
 
 /*
  * Shutdown the CPU as much as possible
  */
 void
 cpu_halt(void)
 {
 	for (;;)
 		__asm__ ("hlt");
 }
 
 /*
  * Hook to idle the CPU when possible.  This currently only works in
  * the !SMP case, as there is no clean way to ensure that a CPU will be
  * woken when there is work available for it.
  */
 static int	cpu_idle_hlt = 1;
 SYSCTL_INT(_machdep, OID_AUTO, cpu_idle_hlt, CTLFLAG_RW,
     &cpu_idle_hlt, 0, "Idle loop HLT enable");
 
 /*
  * Note that we have to be careful here to avoid a race between checking
  * procrunnable() and actually halting.  If we don't do this, we may waste
  * the time between calling hlt and the next interrupt even though there
  * is a runnable process.
  */
 void
 cpu_idle(void)
 {
 #ifndef SMP
 	if (cpu_idle_hlt) {
 		disable_intr();
   		if (procrunnable())
 			enable_intr();
 		else {
 			enable_intr();
 			__asm __volatile("hlt");
 		}
 	}
 #endif
 }
 
 /*
  * Clear registers on exec
  */
 void
 setregs(p, entry, stack, ps_strings)
 	struct proc *p;
 	u_long entry;
 	u_long stack;
 	u_long ps_strings;
 {
 	struct trapframe *regs = p->p_frame;
 	struct pcb *pcb = &p->p_addr->u_pcb;
 
 	if (pcb->pcb_ldt)
 		user_ldt_free(pcb);
   
 	bzero((char *)regs, sizeof(struct trapframe));
 	regs->tf_eip = entry;
 	regs->tf_esp = stack;
 	regs->tf_eflags = PSL_USER | (regs->tf_eflags & PSL_T);
 	regs->tf_ss = _udatasel;
 	regs->tf_ds = _udatasel;
 	regs->tf_es = _udatasel;
 	regs->tf_fs = _udatasel;
 	regs->tf_cs = _ucodesel;
 
 	/* PS_STRINGS value for BSD/OS binaries.  It is 0 for non-BSD/OS. */
 	regs->tf_ebx = ps_strings;
 
 	/* reset %gs as well */
 	if (pcb == PCPU_GET(curpcb))
 		load_gs(_udatasel);
 	else
 		pcb->pcb_gs = _udatasel;
 
         /*
          * Reset the hardware debug registers if they were in use.
          * They won't have any meaning for the newly exec'd process.  
          */
         if (pcb->pcb_flags & PCB_DBREGS) {
                 pcb->pcb_dr0 = 0;
                 pcb->pcb_dr1 = 0;
                 pcb->pcb_dr2 = 0;
                 pcb->pcb_dr3 = 0;
                 pcb->pcb_dr6 = 0;
                 pcb->pcb_dr7 = 0;
                 if (pcb == PCPU_GET(curpcb)) {
 		        /*
 			 * Clear the debug registers on the running
 			 * CPU, otherwise they will end up affecting
 			 * the next process we switch to.
 			 */
 		        reset_dbregs();
                 }
                 pcb->pcb_flags &= ~PCB_DBREGS;
         }
 
 	/*
 	 * Initialize the math emulator (if any) for the current process.
 	 * Actually, just clear the bit that says that the emulator has
 	 * been initialized.  Initialization is delayed until the process
 	 * traps to the emulator (if it is done at all) mainly because
 	 * emulators don't provide an entry point for initialization.
 	 */
 	p->p_addr->u_pcb.pcb_flags &= ~FP_SOFTFP;
 
 	/*
 	 * Arrange to trap the next npx or `fwait' instruction (see npx.c
 	 * for why fwait must be trapped at least if there is an npx or an
 	 * emulator).  This is mainly to handle the case where npx0 is not
 	 * configured, since the npx routines normally set up the trap
 	 * otherwise.  It should be done only at boot time, but doing it
 	 * here allows modifying `npx_exists' for testing the emulator on
 	 * systems with an npx.
 	 */
 	load_cr0(rcr0() | CR0_MP | CR0_TS);
 
 #ifdef DEV_NPX
 	/* Initialize the npx (if any) for the current process. */
 	npxinit(__INITIAL_NPXCW__);
 #endif
 
 	/*
 	 * XXX - Linux emulator
 	 * Make sure sure edx is 0x0 on entry. Linux binaries depend
 	 * on it.
 	 */
 	p->p_retval[1] = 0;
 }
 
 void
 cpu_setregs(void)
 {
 	unsigned int cr0;
 
 	cr0 = rcr0();
 	cr0 |= CR0_NE;			/* Done by npxinit() */
 	cr0 |= CR0_MP | CR0_TS;		/* Done at every execve() too. */
 #ifndef I386_CPU
 	cr0 |= CR0_WP | CR0_AM;
 #endif
 	load_cr0(cr0);
 	load_gs(_udatasel);
 }
 
 static int
 sysctl_machdep_adjkerntz(SYSCTL_HANDLER_ARGS)
 {
 	int error;
 	error = sysctl_handle_int(oidp, oidp->oid_arg1, oidp->oid_arg2,
 		req);
 	if (!error && req->newptr)
 		resettodr();
 	return (error);
 }
 
 SYSCTL_PROC(_machdep, CPU_ADJKERNTZ, adjkerntz, CTLTYPE_INT|CTLFLAG_RW,
 	&adjkerntz, 0, sysctl_machdep_adjkerntz, "I", "");
 
 SYSCTL_INT(_machdep, CPU_DISRTCSET, disable_rtc_set,
 	CTLFLAG_RW, &disable_rtc_set, 0, "");
 
 SYSCTL_STRUCT(_machdep, CPU_BOOTINFO, bootinfo, 
 	CTLFLAG_RD, &bootinfo, bootinfo, "");
 
 SYSCTL_INT(_machdep, CPU_WALLCLOCK, wall_cmos_clock,
 	CTLFLAG_RW, &wall_cmos_clock, 0, "");
 
 /*
  * Initialize 386 and configure to run kernel
  */
 
 /*
  * Initialize segments & interrupt table
  */
 
 int _default_ldt;
 union descriptor gdt[NGDT * MAXCPU];	/* global descriptor table */
 static struct gate_descriptor idt0[NIDT];
 struct gate_descriptor *idt = &idt0[0];	/* interrupt descriptor table */
 union descriptor ldt[NLDT];		/* local descriptor table */
 #ifdef SMP
 /* table descriptors - used to load tables by microp */
 struct region_descriptor r_gdt, r_idt;
 #endif
 
 int private_tss;			/* flag indicating private tss */
 
 #if defined(I586_CPU) && !defined(NO_F00F_HACK)
 extern int has_f00f_bug;
 #endif
 
 static struct i386tss dblfault_tss;
 static char dblfault_stack[PAGE_SIZE];
 
 extern  struct user *proc0paddr;
 
 
 /* software prototypes -- in more palatable form */
 struct soft_segment_descriptor gdt_segs[] = {
 /* GNULL_SEL	0 Null Descriptor */
 {	0x0,			/* segment base address  */
 	0x0,			/* length */
 	0,			/* segment type */
 	0,			/* segment descriptor priority level */
 	0,			/* segment descriptor present */
 	0, 0,
 	0,			/* default 32 vs 16 bit size */
 	0  			/* limit granularity (byte/page units)*/ },
 /* GCODE_SEL	1 Code Descriptor for kernel */
 {	0x0,			/* segment base address  */
 	0xfffff,		/* length - all address space */
 	SDT_MEMERA,		/* segment type */
 	0,			/* segment descriptor priority level */
 	1,			/* segment descriptor present */
 	0, 0,
 	1,			/* default 32 vs 16 bit size */
 	1  			/* limit granularity (byte/page units)*/ },
 /* GDATA_SEL	2 Data Descriptor for kernel */
 {	0x0,			/* segment base address  */
 	0xfffff,		/* length - all address space */
 	SDT_MEMRWA,		/* segment type */
 	0,			/* segment descriptor priority level */
 	1,			/* segment descriptor present */
 	0, 0,
 	1,			/* default 32 vs 16 bit size */
 	1  			/* limit granularity (byte/page units)*/ },
 /* GPRIV_SEL	3 SMP Per-Processor Private Data Descriptor */
 {	0x0,			/* segment base address  */
 	0xfffff,		/* length - all address space */
 	SDT_MEMRWA,		/* segment type */
 	0,			/* segment descriptor priority level */
 	1,			/* segment descriptor present */
 	0, 0,
 	1,			/* default 32 vs 16 bit size */
 	1  			/* limit granularity (byte/page units)*/ },
 /* GPROC0_SEL	4 Proc 0 Tss Descriptor */
 {
 	0x0,			/* segment base address */
 	sizeof(struct i386tss)-1,/* length - all address space */
 	SDT_SYS386TSS,		/* segment type */
 	0,			/* segment descriptor priority level */
 	1,			/* segment descriptor present */
 	0, 0,
 	0,			/* unused - default 32 vs 16 bit size */
 	0  			/* limit granularity (byte/page units)*/ },
 /* GLDT_SEL	5 LDT Descriptor */
 {	(int) ldt,		/* segment base address  */
 	sizeof(ldt)-1,		/* length - all address space */
 	SDT_SYSLDT,		/* segment type */
 	SEL_UPL,		/* segment descriptor priority level */
 	1,			/* segment descriptor present */
 	0, 0,
 	0,			/* unused - default 32 vs 16 bit size */
 	0  			/* limit granularity (byte/page units)*/ },
 /* GUSERLDT_SEL	6 User LDT Descriptor per process */
 {	(int) ldt,		/* segment base address  */
 	(512 * sizeof(union descriptor)-1),		/* length */
 	SDT_SYSLDT,		/* segment type */
 	0,			/* segment descriptor priority level */
 	1,			/* segment descriptor present */
 	0, 0,
 	0,			/* unused - default 32 vs 16 bit size */
 	0  			/* limit granularity (byte/page units)*/ },
 /* GTGATE_SEL	7 Null Descriptor - Placeholder */
 {	0x0,			/* segment base address  */
 	0x0,			/* length - all address space */
 	0,			/* segment type */
 	0,			/* segment descriptor priority level */
 	0,			/* segment descriptor present */
 	0, 0,
 	0,			/* default 32 vs 16 bit size */
 	0  			/* limit granularity (byte/page units)*/ },
 /* GBIOSLOWMEM_SEL 8 BIOS access to realmode segment 0x40, must be #8 in GDT */
 {	0x400,			/* segment base address */
 	0xfffff,		/* length */
 	SDT_MEMRWA,		/* segment type */
 	0,			/* segment descriptor priority level */
 	1,			/* segment descriptor present */
 	0, 0,
 	1,			/* default 32 vs 16 bit size */
 	1  			/* limit granularity (byte/page units)*/ },
 /* GPANIC_SEL	9 Panic Tss Descriptor */
 {	(int) &dblfault_tss,	/* segment base address  */
 	sizeof(struct i386tss)-1,/* length - all address space */
 	SDT_SYS386TSS,		/* segment type */
 	0,			/* segment descriptor priority level */
 	1,			/* segment descriptor present */
 	0, 0,
 	0,			/* unused - default 32 vs 16 bit size */
 	0  			/* limit granularity (byte/page units)*/ },
 /* GBIOSCODE32_SEL 10 BIOS 32-bit interface (32bit Code) */
 {	0,			/* segment base address (overwritten)  */
 	0xfffff,		/* length */
 	SDT_MEMERA,		/* segment type */
 	0,			/* segment descriptor priority level */
 	1,			/* segment descriptor present */
 	0, 0,
 	0,			/* default 32 vs 16 bit size */
 	1  			/* limit granularity (byte/page units)*/ },
 /* GBIOSCODE16_SEL 11 BIOS 32-bit interface (16bit Code) */
 {	0,			/* segment base address (overwritten)  */
 	0xfffff,		/* length */
 	SDT_MEMERA,		/* segment type */
 	0,			/* segment descriptor priority level */
 	1,			/* segment descriptor present */
 	0, 0,
 	0,			/* default 32 vs 16 bit size */
 	1  			/* limit granularity (byte/page units)*/ },
 /* GBIOSDATA_SEL 12 BIOS 32-bit interface (Data) */
 {	0,			/* segment base address (overwritten) */
 	0xfffff,		/* length */
 	SDT_MEMRWA,		/* segment type */
 	0,			/* segment descriptor priority level */
 	1,			/* segment descriptor present */
 	0, 0,
 	1,			/* default 32 vs 16 bit size */
 	1  			/* limit granularity (byte/page units)*/ },
 /* GBIOSUTIL_SEL 13 BIOS 16-bit interface (Utility) */
 {	0,			/* segment base address (overwritten) */
 	0xfffff,		/* length */
 	SDT_MEMRWA,		/* segment type */
 	0,			/* segment descriptor priority level */
 	1,			/* segment descriptor present */
 	0, 0,
 	0,			/* default 32 vs 16 bit size */
 	1  			/* limit granularity (byte/page units)*/ },
 /* GBIOSARGS_SEL 14 BIOS 16-bit interface (Arguments) */
 {	0,			/* segment base address (overwritten) */
 	0xfffff,		/* length */
 	SDT_MEMRWA,		/* segment type */
 	0,			/* segment descriptor priority level */
 	1,			/* segment descriptor present */
 	0, 0,
 	0,			/* default 32 vs 16 bit size */
 	1  			/* limit granularity (byte/page units)*/ },
 };
 
 static struct soft_segment_descriptor ldt_segs[] = {
 	/* Null Descriptor - overwritten by call gate */
 {	0x0,			/* segment base address  */
 	0x0,			/* length - all address space */
 	0,			/* segment type */
 	0,			/* segment descriptor priority level */
 	0,			/* segment descriptor present */
 	0, 0,
 	0,			/* default 32 vs 16 bit size */
 	0  			/* limit granularity (byte/page units)*/ },
 	/* Null Descriptor - overwritten by call gate */
 {	0x0,			/* segment base address  */
 	0x0,			/* length - all address space */
 	0,			/* segment type */
 	0,			/* segment descriptor priority level */
 	0,			/* segment descriptor present */
 	0, 0,
 	0,			/* default 32 vs 16 bit size */
 	0  			/* limit granularity (byte/page units)*/ },
 	/* Null Descriptor - overwritten by call gate */
 {	0x0,			/* segment base address  */
 	0x0,			/* length - all address space */
 	0,			/* segment type */
 	0,			/* segment descriptor priority level */
 	0,			/* segment descriptor present */
 	0, 0,
 	0,			/* default 32 vs 16 bit size */
 	0  			/* limit granularity (byte/page units)*/ },
 	/* Code Descriptor for user */
 {	0x0,			/* segment base address  */
 	0xfffff,		/* length - all address space */
 	SDT_MEMERA,		/* segment type */
 	SEL_UPL,		/* segment descriptor priority level */
 	1,			/* segment descriptor present */
 	0, 0,
 	1,			/* default 32 vs 16 bit size */
 	1  			/* limit granularity (byte/page units)*/ },
 	/* Null Descriptor - overwritten by call gate */
 {	0x0,			/* segment base address  */
 	0x0,			/* length - all address space */
 	0,			/* segment type */
 	0,			/* segment descriptor priority level */
 	0,			/* segment descriptor present */
 	0, 0,
 	0,			/* default 32 vs 16 bit size */
 	0  			/* limit granularity (byte/page units)*/ },
 	/* Data Descriptor for user */
 {	0x0,			/* segment base address  */
 	0xfffff,		/* length - all address space */
 	SDT_MEMRWA,		/* segment type */
 	SEL_UPL,		/* segment descriptor priority level */
 	1,			/* segment descriptor present */
 	0, 0,
 	1,			/* default 32 vs 16 bit size */
 	1  			/* limit granularity (byte/page units)*/ },
 };
 
 void
 setidt(idx, func, typ, dpl, selec)
 	int idx;
 	inthand_t *func;
 	int typ;
 	int dpl;
 	int selec;
 {
 	struct gate_descriptor *ip;
 
 	ip = idt + idx;
 	ip->gd_looffset = (int)func;
 	ip->gd_selector = selec;
 	ip->gd_stkcpy = 0;
 	ip->gd_xx = 0;
 	ip->gd_type = typ;
 	ip->gd_dpl = dpl;
 	ip->gd_p = 1;
 	ip->gd_hioffset = ((int)func)>>16 ;
 }
 
 #define	IDTVEC(name)	__CONCAT(X,name)
 
 extern inthand_t
 	IDTVEC(div), IDTVEC(dbg), IDTVEC(nmi), IDTVEC(bpt), IDTVEC(ofl),
 	IDTVEC(bnd), IDTVEC(ill), IDTVEC(dna), IDTVEC(fpusegm),
 	IDTVEC(tss), IDTVEC(missing), IDTVEC(stk), IDTVEC(prot),
 	IDTVEC(page), IDTVEC(mchk), IDTVEC(rsvd), IDTVEC(fpu), IDTVEC(align),
 	IDTVEC(xmm), IDTVEC(lcall_syscall), IDTVEC(int0x80_syscall);
 
 void
 sdtossd(sd, ssd)
 	struct segment_descriptor *sd;
 	struct soft_segment_descriptor *ssd;
 {
 	ssd->ssd_base  = (sd->sd_hibase << 24) | sd->sd_lobase;
 	ssd->ssd_limit = (sd->sd_hilimit << 16) | sd->sd_lolimit;
 	ssd->ssd_type  = sd->sd_type;
 	ssd->ssd_dpl   = sd->sd_dpl;
 	ssd->ssd_p     = sd->sd_p;
 	ssd->ssd_def32 = sd->sd_def32;
 	ssd->ssd_gran  = sd->sd_gran;
 }
 
 #define PHYSMAP_SIZE	(2 * 8)
 
 /*
  * Populate the (physmap) array with base/bound pairs describing the
  * available physical memory in the system, then test this memory and
  * build the phys_avail array describing the actually-available memory.
  *
  * If we cannot accurately determine the physical memory map, then use
  * value from the 0xE801 call, and failing that, the RTC.
  *
  * Total memory size may be set by the kernel environment variable
  * hw.physmem or the compile-time define MAXMEM.
  */
 static void
 getmemsize(int first)
 {
 	int i, physmap_idx, pa_indx;
 	u_int basemem, extmem;
 	struct vm86frame vmf;
 	struct vm86context vmc;
 	vm_offset_t pa, physmap[PHYSMAP_SIZE];
 	pt_entry_t pte;
 	const char *cp;
 	struct bios_smap *smap;
 
 	bzero(&vmf, sizeof(struct vm86frame));
 	bzero(physmap, sizeof(physmap));
 
 	/*
 	 * Perform "base memory" related probes & setup
 	 */
 	vm86_intcall(0x12, &vmf);
 	basemem = vmf.vmf_ax;
 	if (basemem > 640) {
 		printf("Preposterous BIOS basemem of %uK, truncating to 640K\n",
 			basemem);
 		basemem = 640;
 	}
 
 	/*
 	 * XXX if biosbasemem is now < 640, there is a `hole'
 	 * between the end of base memory and the start of
 	 * ISA memory.  The hole may be empty or it may
 	 * contain BIOS code or data.  Map it read/write so
 	 * that the BIOS can write to it.  (Memory from 0 to
 	 * the physical end of the kernel is mapped read-only
 	 * to begin with and then parts of it are remapped.
 	 * The parts that aren't remapped form holes that
 	 * remain read-only and are unused by the kernel.
 	 * The base memory area is below the physical end of
 	 * the kernel and right now forms a read-only hole.
 	 * The part of it from PAGE_SIZE to
 	 * (trunc_page(biosbasemem * 1024) - 1) will be
 	 * remapped and used by the kernel later.)
 	 *
 	 * This code is similar to the code used in
 	 * pmap_mapdev, but since no memory needs to be
 	 * allocated we simply change the mapping.
 	 */
 	for (pa = trunc_page(basemem * 1024);
 	     pa < ISA_HOLE_START; pa += PAGE_SIZE) {
 		pte = (pt_entry_t)vtopte(pa + KERNBASE);
 		*pte = pa | PG_RW | PG_V;
 	}
 
 	/*
 	 * if basemem != 640, map pages r/w into vm86 page table so 
 	 * that the bios can scribble on it.
 	 */
 	pte = (pt_entry_t)vm86paddr;
 	for (i = basemem / 4; i < 160; i++)
 		pte[i] = (i << PAGE_SHIFT) | PG_V | PG_RW | PG_U;
 
 	/*
 	 * map page 1 R/W into the kernel page table so we can use it
 	 * as a buffer.  The kernel will unmap this page later.
 	 */
 	pte = (pt_entry_t)vtopte(KERNBASE + (1 << PAGE_SHIFT));
 	*pte = (1 << PAGE_SHIFT) | PG_RW | PG_V;
 
 	/*
 	 * get memory map with INT 15:E820
 	 */
 	vmc.npages = 0;
 	smap = (void *)vm86_addpage(&vmc, 1, KERNBASE + (1 << PAGE_SHIFT));
 	vm86_getptr(&vmc, (vm_offset_t)smap, &vmf.vmf_es, &vmf.vmf_di);
 
 	physmap_idx = 0;
 	vmf.vmf_ebx = 0;
 	do {
 		vmf.vmf_eax = 0xE820;
 		vmf.vmf_edx = SMAP_SIG;
 		vmf.vmf_ecx = sizeof(struct bios_smap);
 		i = vm86_datacall(0x15, &vmf, &vmc);
 		if (i || vmf.vmf_eax != SMAP_SIG)
 			break;
 		if (boothowto & RB_VERBOSE)
 			printf("SMAP type=%02x base=%08x %08x len=%08x %08x\n",
 				smap->type,
 				*(u_int32_t *)((char *)&smap->base + 4),
 				(u_int32_t)smap->base,
 				*(u_int32_t *)((char *)&smap->length + 4),
 				(u_int32_t)smap->length);
 
 		if (smap->type != 0x01)
 			goto next_run;
 
 		if (smap->length == 0)
 			goto next_run;
 
 		if (smap->base >= 0xffffffff) {
 			printf("%uK of memory above 4GB ignored\n",
 			    (u_int)(smap->length / 1024));
 			goto next_run;
 		}
 
 		for (i = 0; i <= physmap_idx; i += 2) {
 			if (smap->base < physmap[i + 1]) {
 				if (boothowto & RB_VERBOSE)
 					printf(
 	"Overlapping or non-montonic memory region, ignoring second region\n");
 				goto next_run;
 			}
 		}
 
 		if (smap->base == physmap[physmap_idx + 1]) {
 			physmap[physmap_idx + 1] += smap->length;
 			goto next_run;
 		}
 
 		physmap_idx += 2;
 		if (physmap_idx == PHYSMAP_SIZE) {
 			printf(
 		"Too many segments in the physical address map, giving up\n");
 			break;
 		}
 		physmap[physmap_idx] = smap->base;
 		physmap[physmap_idx + 1] = smap->base + smap->length;
 next_run:
 	} while (vmf.vmf_ebx != 0);
 
 	if (physmap[1] != 0)
 		goto physmap_done;
 
 	/*
 	 * If we failed above, try memory map with INT 15:E801
 	 */
 	vmf.vmf_ax = 0xE801;
 	if (vm86_intcall(0x15, &vmf) == 0) {
 		extmem = vmf.vmf_cx + vmf.vmf_dx * 64;
 	} else {
 #if 0
 		vmf.vmf_ah = 0x88;
 		vm86_intcall(0x15, &vmf);
 		extmem = vmf.vmf_ax;
 #else
 		/*
 		 * Prefer the RTC value for extended memory.
 		 */
 		extmem = rtcin(RTC_EXTLO) + (rtcin(RTC_EXTHI) << 8);
 #endif
 	}
 
 	/*
 	 * Special hack for chipsets that still remap the 384k hole when
 	 * there's 16MB of memory - this really confuses people that
 	 * are trying to use bus mastering ISA controllers with the
 	 * "16MB limit"; they only have 16MB, but the remapping puts
 	 * them beyond the limit.
 	 *
 	 * If extended memory is between 15-16MB (16-17MB phys address range),
 	 *	chop it to 15MB.
 	 */
 	if ((extmem > 15 * 1024) && (extmem < 16 * 1024))
 		extmem = 15 * 1024;
 
 	physmap[0] = 0;
 	physmap[1] = basemem * 1024;
 	physmap_idx = 2;
 	physmap[physmap_idx] = 0x100000;
 	physmap[physmap_idx + 1] = physmap[physmap_idx] + extmem * 1024;
 
 physmap_done:
 	/*
 	 * Now, physmap contains a map of physical memory.
 	 */
 
 #ifdef SMP
 	/* make hole for AP bootstrap code */
 	physmap[1] = mp_bootaddress(physmap[1] / 1024);
 
 	/* look for the MP hardware - needed for apic addresses */
 	i386_mp_probe();
 #endif
 
 	/*
 	 * Maxmem isn't the "maximum memory", it's one larger than the
 	 * highest page of the physical address space.  It should be
 	 * called something like "Maxphyspage".  We may adjust this 
 	 * based on ``hw.physmem'' and the results of the memory test.
 	 */
 	Maxmem = atop(physmap[physmap_idx + 1]);
 
 #ifdef MAXMEM
 	Maxmem = MAXMEM / 4;
 #endif
 
 	/*
 	 * hw.physmem is a size in bytes; we also allow k, m, and g suffixes
 	 * for the appropriate modifiers.  This overrides MAXMEM.
 	 */
 	if ((cp = getenv("hw.physmem")) != NULL) {
 		u_int64_t AllowMem, sanity;
 		char *ep;
 
 		sanity = AllowMem = strtouq(cp, &ep, 0);
 		if ((ep != cp) && (*ep != 0)) {
 			switch(*ep) {
 			case 'g':
 			case 'G':
 				AllowMem <<= 10;
 			case 'm':
 			case 'M':
 				AllowMem <<= 10;
 			case 'k':
 			case 'K':
 				AllowMem <<= 10;
 				break;
 			default:
 				AllowMem = sanity = 0;
 			}
 			if (AllowMem < sanity)
 				AllowMem = 0;
 		}
 		if (AllowMem == 0)
 			printf("Ignoring invalid memory size of '%s'\n", cp);
 		else
 			Maxmem = atop(AllowMem);
 	}
 
 	if (atop(physmap[physmap_idx + 1]) != Maxmem &&
 	    (boothowto & RB_VERBOSE))
 		printf("Physical memory use set to %uK\n", Maxmem * 4);
 
 	/*
 	 * If Maxmem has been increased beyond what the system has detected,
 	 * extend the last memory segment to the new limit.
 	 */ 
 	if (atop(physmap[physmap_idx + 1]) < Maxmem)
 		physmap[physmap_idx + 1] = ptoa(Maxmem);
 
 	/* call pmap initialization to make new kernel address space */
 	pmap_bootstrap(first, 0);
 
 	/*
 	 * Size up each available chunk of physical memory.
 	 */
 	physmap[0] = PAGE_SIZE;		/* mask off page 0 */
 	pa_indx = 0;
 	phys_avail[pa_indx++] = physmap[0];
 	phys_avail[pa_indx] = physmap[0];
 #if 0
 	pte = (pt_entry_t)vtopte(KERNBASE);
 #else
 	pte = (pt_entry_t)CMAP1;
 #endif
 
 	/*
 	 * physmap is in bytes, so when converting to page boundaries,
 	 * round up the start address and round down the end address.
 	 */
 	for (i = 0; i <= physmap_idx; i += 2) {
 		vm_offset_t end;
 
 		end = ptoa(Maxmem);
 		if (physmap[i + 1] < end)
 			end = trunc_page(physmap[i + 1]);
 		for (pa = round_page(physmap[i]); pa < end; pa += PAGE_SIZE) {
 			int tmp, page_bad;
 #if 0
 			int *ptr = 0;
 #else
 			int *ptr = (int *)CADDR1;
 #endif
 
 			/*
 			 * block out kernel memory as not available.
 			 */
 			if (pa >= 0x100000 && pa < first)
 				continue;
 	
 			page_bad = FALSE;
 
 			/*
 			 * map page into kernel: valid, read/write,non-cacheable
 			 */
 			*pte = pa | PG_V | PG_RW | PG_N;
 			invltlb();
 
 			tmp = *(int *)ptr;
 			/*
 			 * Test for alternating 1's and 0's
 			 */
 			*(volatile int *)ptr = 0xaaaaaaaa;
 			if (*(volatile int *)ptr != 0xaaaaaaaa) {
 				page_bad = TRUE;
 			}
 			/*
 			 * Test for alternating 0's and 1's
 			 */
 			*(volatile int *)ptr = 0x55555555;
 			if (*(volatile int *)ptr != 0x55555555) {
 			page_bad = TRUE;
 			}
 			/*
 			 * Test for all 1's
 			 */
 			*(volatile int *)ptr = 0xffffffff;
 			if (*(volatile int *)ptr != 0xffffffff) {
 				page_bad = TRUE;
 			}
 			/*
 			 * Test for all 0's
 			 */
 			*(volatile int *)ptr = 0x0;
 			if (*(volatile int *)ptr != 0x0) {
 				page_bad = TRUE;
 			}
 			/*
 			 * Restore original value.
 			 */
 			*(int *)ptr = tmp;
 
 			/*
 			 * Adjust array of valid/good pages.
 			 */
 			if (page_bad == TRUE) {
 				continue;
 			}
 			/*
 			 * If this good page is a continuation of the
 			 * previous set of good pages, then just increase
 			 * the end pointer. Otherwise start a new chunk.
 			 * Note that "end" points one higher than end,
 			 * making the range >= start and < end.
 			 * If we're also doing a speculative memory
 			 * test and we at or past the end, bump up Maxmem
 			 * so that we keep going. The first bad page
 			 * will terminate the loop.
 			 */
 			if (phys_avail[pa_indx] == pa) {
 				phys_avail[pa_indx] += PAGE_SIZE;
 			} else {
 				pa_indx++;
 				if (pa_indx == PHYS_AVAIL_ARRAY_END) {
 					printf(
 		"Too many holes in the physical address space, giving up\n");
 					pa_indx--;
 					break;
 				}
 				phys_avail[pa_indx++] = pa;	/* start */
 				phys_avail[pa_indx] = pa + PAGE_SIZE;	/* end */
 			}
 			physmem++;
 		}
 	}
 	*pte = 0;
 	invltlb();
 
 	/*
 	 * XXX
 	 * The last chunk must contain at least one page plus the message
 	 * buffer to avoid complicating other code (message buffer address
 	 * calculation, etc.).
 	 */
 	while (phys_avail[pa_indx - 1] + PAGE_SIZE +
 	    round_page(MSGBUF_SIZE) >= phys_avail[pa_indx]) {
 		physmem -= atop(phys_avail[pa_indx] - phys_avail[pa_indx - 1]);
 		phys_avail[pa_indx--] = 0;
 		phys_avail[pa_indx--] = 0;
 	}
 
 	Maxmem = atop(phys_avail[pa_indx]);
 
 	/* Trim off space for the message buffer. */
 	phys_avail[pa_indx] -= round_page(MSGBUF_SIZE);
 
 	avail_end = phys_avail[pa_indx];
 }
 
 void
 init386(first)
 	int first;
 {
 	struct gate_descriptor *gdp;
 	int gsel_tss, metadata_missing, off, x;
 #ifndef SMP
 	/* table descriptors - used to load tables by microp */
 	struct region_descriptor r_gdt, r_idt;
 #endif
 
 	proc0.p_addr = proc0paddr;
 
 	atdevbase = ISA_HOLE_START + KERNBASE;
 
 	metadata_missing = 0;
 	if (bootinfo.bi_modulep) {
 		preload_metadata = (caddr_t)bootinfo.bi_modulep + KERNBASE;
 		preload_bootstrap_relocate(KERNBASE);
 	} else {
 		metadata_missing = 1;
 	}
 	if (bootinfo.bi_envp)
 		kern_envp = (caddr_t)bootinfo.bi_envp + KERNBASE;
 
 	/* Init basic tunables, hz etc */
 	init_param();
 
 	/*
 	 * make gdt memory segments, the code segment goes up to end of the
 	 * page with etext in it, the data segment goes to the end of
 	 * the address space
 	 */
 	/*
 	 * XXX text protection is temporarily (?) disabled.  The limit was
 	 * i386_btop(round_page(etext)) - 1.
 	 */
 	gdt_segs[GCODE_SEL].ssd_limit = atop(0 - 1);
 	gdt_segs[GDATA_SEL].ssd_limit = atop(0 - 1);
 #ifdef SMP
 	gdt_segs[GPRIV_SEL].ssd_limit =
 		atop(sizeof(struct privatespace) - 1);
 	gdt_segs[GPRIV_SEL].ssd_base = (int) &SMP_prvspace[0];
 	gdt_segs[GPROC0_SEL].ssd_base =
 		(int) &SMP_prvspace[0].globaldata.gd_common_tss;
 	SMP_prvspace[0].globaldata.gd_prvspace = &SMP_prvspace[0].globaldata;
 #else
 	gdt_segs[GPRIV_SEL].ssd_limit =
 		atop(sizeof(struct globaldata) - 1);
 	gdt_segs[GPRIV_SEL].ssd_base = (int) &__globaldata;
 	gdt_segs[GPROC0_SEL].ssd_base =
 		(int) &__globaldata.gd_common_tss;
 	__globaldata.gd_prvspace = &__globaldata;
 #endif
 
 	for (x = 0; x < NGDT; x++) {
 #ifdef BDE_DEBUGGER
 		/* avoid overwriting db entries with APM ones */
 		if (x >= GAPMCODE32_SEL && x <= GAPMDATA_SEL)
 			continue;
 #endif
 		ssdtosd(&gdt_segs[x], &gdt[x].sd);
 	}
 
 	r_gdt.rd_limit = NGDT * sizeof(gdt[0]) - 1;
 	r_gdt.rd_base =  (int) gdt;
 	lgdt(&r_gdt);
 
 	/* setup curproc so that mutexes work */
 	PCPU_SET(curproc, &proc0);
 	PCPU_SET(spinlocks, NULL);
 
 	LIST_INIT(&proc0.p_contested);
 
 	/*
 	 * Initialize mutexes.
 	 */
 	mtx_init(&Giant, "Giant", MTX_DEF | MTX_RECURSE);
 	mtx_init(&sched_lock, "sched lock", MTX_SPIN | MTX_RECURSE);
 	mtx_init(&proc0.p_mtx, "process lock", MTX_DEF);
 	mtx_init(&clock_lock, "clk", MTX_SPIN | MTX_RECURSE);
 #ifdef SMP
 	mtx_init(&imen_mtx, "imen", MTX_SPIN);
 #endif
 	mtx_lock(&Giant);
 
 	/* make ldt memory segments */
 	/*
 	 * XXX - VM_MAXUSER_ADDRESS is an end address, not a max.  And it
 	 * should be spelled ...MAX_USER...
 	 */
 	ldt_segs[LUCODE_SEL].ssd_limit = atop(VM_MAXUSER_ADDRESS - 1);
 	ldt_segs[LUDATA_SEL].ssd_limit = atop(VM_MAXUSER_ADDRESS - 1);
 	for (x = 0; x < sizeof ldt_segs / sizeof ldt_segs[0]; x++)
 		ssdtosd(&ldt_segs[x], &ldt[x].sd);
 
 	_default_ldt = GSEL(GLDT_SEL, SEL_KPL);
 	lldt(_default_ldt);
 	PCPU_SET(currentldt, _default_ldt);
 
 	/* exceptions */
 	for (x = 0; x < NIDT; x++)
 		setidt(x, &IDTVEC(rsvd), SDT_SYS386TGT, SEL_KPL,
 		    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(0, &IDTVEC(div),  SDT_SYS386TGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(1, &IDTVEC(dbg),  SDT_SYS386IGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(2, &IDTVEC(nmi),  SDT_SYS386TGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
  	setidt(3, &IDTVEC(bpt),  SDT_SYS386IGT, SEL_UPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(4, &IDTVEC(ofl),  SDT_SYS386TGT, SEL_UPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(5, &IDTVEC(bnd),  SDT_SYS386TGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(6, &IDTVEC(ill),  SDT_SYS386TGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(7, &IDTVEC(dna),  SDT_SYS386TGT, SEL_KPL
 	    , GSEL(GCODE_SEL, SEL_KPL));
 	setidt(8, 0,  SDT_SYSTASKGT, SEL_KPL, GSEL(GPANIC_SEL, SEL_KPL));
 	setidt(9, &IDTVEC(fpusegm),  SDT_SYS386TGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(10, &IDTVEC(tss),  SDT_SYS386TGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(11, &IDTVEC(missing),  SDT_SYS386TGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(12, &IDTVEC(stk),  SDT_SYS386TGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(13, &IDTVEC(prot),  SDT_SYS386TGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(14, &IDTVEC(page),  SDT_SYS386IGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(15, &IDTVEC(rsvd),  SDT_SYS386TGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(16, &IDTVEC(fpu),  SDT_SYS386TGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(17, &IDTVEC(align), SDT_SYS386TGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(18, &IDTVEC(mchk),  SDT_SYS386TGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(19, &IDTVEC(xmm), SDT_SYS386TGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
  	setidt(0x80, &IDTVEC(int0x80_syscall), SDT_SYS386TGT, SEL_UPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 
 	r_idt.rd_limit = sizeof(idt0) - 1;
 	r_idt.rd_base = (int) idt;
 	lidt(&r_idt);
 
 	/*
 	 * Initialize the console before we print anything out.
 	 */
 	cninit();
 
 	if (metadata_missing)
 		printf("WARNING: loader(8) metadata is missing!\n");
 
 #ifdef DEV_ISA
 	isa_defaultirq();
 #endif
 
 #ifdef DDB
 	kdb_init();
 	if (boothowto & RB_KDB)
 		Debugger("Boot flags requested debugger");
 #endif
 
 	finishidentcpu();	/* Final stage of CPU initialization */
 	setidt(6, &IDTVEC(ill),  SDT_SYS386TGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(13, &IDTVEC(prot),  SDT_SYS386TGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	initializecpu();	/* Initialize CPU registers */
 
 	/* make an initial tss so cpu can get interrupt stack on syscall! */
 	PCPU_SET(common_tss.tss_esp0,
 	    (int) proc0.p_addr + UPAGES*PAGE_SIZE - 16);
 	PCPU_SET(common_tss.tss_ss0, GSEL(GDATA_SEL, SEL_KPL));
 	gsel_tss = GSEL(GPROC0_SEL, SEL_KPL);
 	private_tss = 0;
 	PCPU_SET(tss_gdt, &gdt[GPROC0_SEL].sd);
 	PCPU_SET(common_tssd, *PCPU_GET(tss_gdt));
 	PCPU_SET(common_tss.tss_ioopt, (sizeof (struct i386tss)) << 16);
 	ltr(gsel_tss);
 
 	dblfault_tss.tss_esp = dblfault_tss.tss_esp0 = dblfault_tss.tss_esp1 =
 	    dblfault_tss.tss_esp2 = (int)&dblfault_stack[sizeof(dblfault_stack)];
 	dblfault_tss.tss_ss = dblfault_tss.tss_ss0 = dblfault_tss.tss_ss1 =
 	    dblfault_tss.tss_ss2 = GSEL(GDATA_SEL, SEL_KPL);
 	dblfault_tss.tss_cr3 = (int)IdlePTD;
 	dblfault_tss.tss_eip = (int)dblfault_handler;
 	dblfault_tss.tss_eflags = PSL_KERNEL;
 	dblfault_tss.tss_ds = dblfault_tss.tss_es =
 	    dblfault_tss.tss_gs = GSEL(GDATA_SEL, SEL_KPL);
 	dblfault_tss.tss_fs = GSEL(GPRIV_SEL, SEL_KPL);
 	dblfault_tss.tss_cs = GSEL(GCODE_SEL, SEL_KPL);
 	dblfault_tss.tss_ldt = GSEL(GLDT_SEL, SEL_KPL);
 
 	vm86_initialize();
 	getmemsize(first);
 
 	/* now running on new page tables, configured,and u/iom is accessible */
 
 	/* Map the message buffer. */
 	for (off = 0; off < round_page(MSGBUF_SIZE); off += PAGE_SIZE)
 		pmap_kenter((vm_offset_t)msgbufp + off, avail_end + off);
 
 	msgbufinit(msgbufp, MSGBUF_SIZE);
 
 	/* make a call gate to reenter kernel with */
 	gdp = &ldt[LSYS5CALLS_SEL].gd;
 
 	x = (int) &IDTVEC(lcall_syscall);
 	gdp->gd_looffset = x;
 	gdp->gd_selector = GSEL(GCODE_SEL,SEL_KPL);
 	gdp->gd_stkcpy = 1;
 	gdp->gd_type = SDT_SYS386CGT;
 	gdp->gd_dpl = SEL_UPL;
 	gdp->gd_p = 1;
 	gdp->gd_hioffset = x >> 16;
 
 	/* XXX does this work? */
 	ldt[LBSDICALLS_SEL] = ldt[LSYS5CALLS_SEL];
 	ldt[LSOL26CALLS_SEL] = ldt[LSYS5CALLS_SEL];
 
 	/* transfer to user mode */
 
 	_ucodesel = LSEL(LUCODE_SEL, SEL_UPL);
 	_udatasel = LSEL(LUDATA_SEL, SEL_UPL);
 
 	/* setup proc 0's pcb */
 	proc0.p_addr->u_pcb.pcb_flags = 0;
 	proc0.p_addr->u_pcb.pcb_cr3 = (int)IdlePTD;
 	proc0.p_addr->u_pcb.pcb_ext = 0;
 	proc0.p_frame = &proc0_tf;
 }
 
 #if defined(I586_CPU) && !defined(NO_F00F_HACK)
 static void f00f_hack(void *unused);
 SYSINIT(f00f_hack, SI_SUB_INTRINSIC, SI_ORDER_FIRST, f00f_hack, NULL);
 
 static void
 f00f_hack(void *unused) {
 	struct gate_descriptor *new_idt;
 #ifndef SMP
 	struct region_descriptor r_idt;
 #endif
 	vm_offset_t tmp;
 
 	if (!has_f00f_bug)
 		return;
 
 	GIANT_REQUIRED;
 
 	printf("Intel Pentium detected, installing workaround for F00F bug\n");
 
 	r_idt.rd_limit = sizeof(idt0) - 1;
 
 	tmp = kmem_alloc(kernel_map, PAGE_SIZE * 2);
 	if (tmp == 0)
 		panic("kmem_alloc returned 0");
 	if (((unsigned int)tmp & (PAGE_SIZE-1)) != 0)
 		panic("kmem_alloc returned non-page-aligned memory");
 	/* Put the first seven entries in the lower page */
 	new_idt = (struct gate_descriptor*)(tmp + PAGE_SIZE - (7*8));
 	bcopy(idt, new_idt, sizeof(idt0));
 	r_idt.rd_base = (int)new_idt;
 	lidt(&r_idt);
 	idt = new_idt;
 	if (vm_map_protect(kernel_map, tmp, tmp + PAGE_SIZE,
 			   VM_PROT_READ, FALSE) != KERN_SUCCESS)
 		panic("vm_map_protect failed");
 	return;
 }
 #endif /* defined(I586_CPU) && !NO_F00F_HACK */
 
 int
 ptrace_set_pc(p, addr)
 	struct proc *p;
 	unsigned long addr;
 {
 	p->p_frame->tf_eip = addr;
 	return (0);
 }
 
 int
 ptrace_single_step(p)
 	struct proc *p;
 {
 	p->p_frame->tf_eflags |= PSL_T;
 	return (0);
 }
 
 int
 fill_regs(p, regs)
 	struct proc *p;
 	struct reg *regs;
 {
 	struct pcb *pcb;
 	struct trapframe *tp;
 
 	tp = p->p_frame;
 	regs->r_fs = tp->tf_fs;
 	regs->r_es = tp->tf_es;
 	regs->r_ds = tp->tf_ds;
 	regs->r_edi = tp->tf_edi;
 	regs->r_esi = tp->tf_esi;
 	regs->r_ebp = tp->tf_ebp;
 	regs->r_ebx = tp->tf_ebx;
 	regs->r_edx = tp->tf_edx;
 	regs->r_ecx = tp->tf_ecx;
 	regs->r_eax = tp->tf_eax;
 	regs->r_eip = tp->tf_eip;
 	regs->r_cs = tp->tf_cs;
 	regs->r_eflags = tp->tf_eflags;
 	regs->r_esp = tp->tf_esp;
 	regs->r_ss = tp->tf_ss;
 	pcb = &p->p_addr->u_pcb;
 	regs->r_gs = pcb->pcb_gs;
 	return (0);
 }
 
 int
 set_regs(p, regs)
 	struct proc *p;
 	struct reg *regs;
 {
 	struct pcb *pcb;
 	struct trapframe *tp;
 
 	tp = p->p_frame;
 	if (!EFL_SECURE(regs->r_eflags, tp->tf_eflags) ||
 	    !CS_SECURE(regs->r_cs))
 		return (EINVAL);
 	tp->tf_fs = regs->r_fs;
 	tp->tf_es = regs->r_es;
 	tp->tf_ds = regs->r_ds;
 	tp->tf_edi = regs->r_edi;
 	tp->tf_esi = regs->r_esi;
 	tp->tf_ebp = regs->r_ebp;
 	tp->tf_ebx = regs->r_ebx;
 	tp->tf_edx = regs->r_edx;
 	tp->tf_ecx = regs->r_ecx;
 	tp->tf_eax = regs->r_eax;
 	tp->tf_eip = regs->r_eip;
 	tp->tf_cs = regs->r_cs;
 	tp->tf_eflags = regs->r_eflags;
 	tp->tf_esp = regs->r_esp;
 	tp->tf_ss = regs->r_ss;
 	pcb = &p->p_addr->u_pcb;
 	pcb->pcb_gs = regs->r_gs;
 	return (0);
 }
 
 #ifdef CPU_ENABLE_SSE
 static void
 fill_fpregs_xmm(sv_xmm, sv_87)
 	struct savexmm *sv_xmm;
 	struct save87 *sv_87;
 {
 	register struct env87 *penv_87 = &sv_87->sv_env;
 	register struct envxmm *penv_xmm = &sv_xmm->sv_env;
 	int i;
 
 	/* FPU control/status */
 	penv_87->en_cw = penv_xmm->en_cw;
 	penv_87->en_sw = penv_xmm->en_sw;
 	penv_87->en_tw = penv_xmm->en_tw;
 	penv_87->en_fip = penv_xmm->en_fip;
 	penv_87->en_fcs = penv_xmm->en_fcs;
 	penv_87->en_opcode = penv_xmm->en_opcode;
 	penv_87->en_foo = penv_xmm->en_foo;
 	penv_87->en_fos = penv_xmm->en_fos;
 
 	/* FPU registers */
 	for (i = 0; i < 8; ++i)
 		sv_87->sv_ac[i] = sv_xmm->sv_fp[i].fp_acc;
 
 	sv_87->sv_ex_sw = sv_xmm->sv_ex_sw;
 }
 
 static void
 set_fpregs_xmm(sv_87, sv_xmm)
 	struct save87 *sv_87;
 	struct savexmm *sv_xmm;
 {
 	register struct env87 *penv_87 = &sv_87->sv_env;
 	register struct envxmm *penv_xmm = &sv_xmm->sv_env;
 	int i;
 
 	/* FPU control/status */
 	penv_xmm->en_cw = penv_87->en_cw;
 	penv_xmm->en_sw = penv_87->en_sw;
 	penv_xmm->en_tw = penv_87->en_tw;
 	penv_xmm->en_fip = penv_87->en_fip;
 	penv_xmm->en_fcs = penv_87->en_fcs;
 	penv_xmm->en_opcode = penv_87->en_opcode;
 	penv_xmm->en_foo = penv_87->en_foo;
 	penv_xmm->en_fos = penv_87->en_fos;
 
 	/* FPU registers */
 	for (i = 0; i < 8; ++i)
 		sv_xmm->sv_fp[i].fp_acc = sv_87->sv_ac[i];
 
 	sv_xmm->sv_ex_sw = sv_87->sv_ex_sw;
 }
 #endif /* CPU_ENABLE_SSE */
 
 int
 fill_fpregs(p, fpregs)
 	struct proc *p;
 	struct fpreg *fpregs;
 {
 #ifdef CPU_ENABLE_SSE
 	if (cpu_fxsr) {
 		fill_fpregs_xmm(&p->p_addr->u_pcb.pcb_save.sv_xmm,
 						(struct save87 *)fpregs);
 		return (0);
 	}
 #endif /* CPU_ENABLE_SSE */
 	bcopy(&p->p_addr->u_pcb.pcb_save.sv_87, fpregs, sizeof *fpregs);
 	return (0);
 }
 
 int
 set_fpregs(p, fpregs)
 	struct proc *p;
 	struct fpreg *fpregs;
 {
 #ifdef CPU_ENABLE_SSE
 	if (cpu_fxsr) {
 		set_fpregs_xmm((struct save87 *)fpregs,
 					   &p->p_addr->u_pcb.pcb_save.sv_xmm);
 		return (0);
 	}
 #endif /* CPU_ENABLE_SSE */
 	bcopy(fpregs, &p->p_addr->u_pcb.pcb_save.sv_87, sizeof *fpregs);
 	return (0);
 }
 
 int
 fill_dbregs(p, dbregs)
 	struct proc *p;
 	struct dbreg *dbregs;
 {
 	struct pcb *pcb;
 
 	if (p == NULL) {
 		dbregs->dr0 = rdr0();
 		dbregs->dr1 = rdr1();
 		dbregs->dr2 = rdr2();
 		dbregs->dr3 = rdr3();
 		dbregs->dr4 = rdr4();
 		dbregs->dr5 = rdr5();
 		dbregs->dr6 = rdr6();
 		dbregs->dr7 = rdr7();
 	}
 	else {
 		pcb = &p->p_addr->u_pcb;
 		dbregs->dr0 = pcb->pcb_dr0;
 		dbregs->dr1 = pcb->pcb_dr1;
 		dbregs->dr2 = pcb->pcb_dr2;
 		dbregs->dr3 = pcb->pcb_dr3;
 		dbregs->dr4 = 0;
 		dbregs->dr5 = 0;
 		dbregs->dr6 = pcb->pcb_dr6;
 		dbregs->dr7 = pcb->pcb_dr7;
 	}
 	return (0);
 }
 
 int
 set_dbregs(p, dbregs)
 	struct proc *p;
 	struct dbreg *dbregs;
 {
 	struct pcb *pcb;
 	int i;
 	u_int32_t mask1, mask2;
 
 	if (p == NULL) {
 		load_dr0(dbregs->dr0);
 		load_dr1(dbregs->dr1);
 		load_dr2(dbregs->dr2);
 		load_dr3(dbregs->dr3);
 		load_dr4(dbregs->dr4);
 		load_dr5(dbregs->dr5);
 		load_dr6(dbregs->dr6);
 		load_dr7(dbregs->dr7);
 	}
 	else {
 		/*
 		 * Don't let an illegal value for dr7 get set.	Specifically,
 		 * check for undefined settings.  Setting these bit patterns
 		 * result in undefined behaviour and can lead to an unexpected
 		 * TRCTRAP.
 		 */
 		for (i = 0, mask1 = 0x3<<16, mask2 = 0x2<<16; i < 8; 
 		     i++, mask1 <<= 2, mask2 <<= 2)
 			if ((dbregs->dr7 & mask1) == mask2)
 				return (EINVAL);
 		
 		pcb = &p->p_addr->u_pcb;
 		
 		/*
 		 * Don't let a process set a breakpoint that is not within the
 		 * process's address space.  If a process could do this, it
 		 * could halt the system by setting a breakpoint in the kernel
 		 * (if ddb was enabled).  Thus, we need to check to make sure
 		 * that no breakpoints are being enabled for addresses outside
 		 * process's address space, unless, perhaps, we were called by
 		 * uid 0.
 		 *
 		 * XXX - what about when the watched area of the user's
 		 * address space is written into from within the kernel
 		 * ... wouldn't that still cause a breakpoint to be generated
 		 * from within kernel mode?
 		 */
 
 		if (suser(p) != 0) {
 			if (dbregs->dr7 & 0x3) {
 				/* dr0 is enabled */
 				if (dbregs->dr0 >= VM_MAXUSER_ADDRESS)
 					return (EINVAL);
 			}
 			
 			if (dbregs->dr7 & (0x3<<2)) {
 				/* dr1 is enabled */
 				if (dbregs->dr1 >= VM_MAXUSER_ADDRESS)
 					return (EINVAL);
 			}
 			
 			if (dbregs->dr7 & (0x3<<4)) {
 				/* dr2 is enabled */
 				if (dbregs->dr2 >= VM_MAXUSER_ADDRESS)
 					return (EINVAL);
 			}
 			
 			if (dbregs->dr7 & (0x3<<6)) {
 				/* dr3 is enabled */
 				if (dbregs->dr3 >= VM_MAXUSER_ADDRESS)
 					return (EINVAL);
 			}
 		}
 
 		pcb->pcb_dr0 = dbregs->dr0;
 		pcb->pcb_dr1 = dbregs->dr1;
 		pcb->pcb_dr2 = dbregs->dr2;
 		pcb->pcb_dr3 = dbregs->dr3;
 		pcb->pcb_dr6 = dbregs->dr6;
 		pcb->pcb_dr7 = dbregs->dr7;
 
 		pcb->pcb_flags |= PCB_DBREGS;
 	}
 
 	return (0);
 }
 
 /*
  * Return > 0 if a hardware breakpoint has been hit, and the
  * breakpoint was in user space.  Return 0, otherwise.
  */
 int
 user_dbreg_trap(void)
 {
         u_int32_t dr7, dr6; /* debug registers dr6 and dr7 */
         u_int32_t bp;       /* breakpoint bits extracted from dr6 */
         int nbp;            /* number of breakpoints that triggered */
         caddr_t addr[4];    /* breakpoint addresses */
         int i;
         
         dr7 = rdr7();
         if ((dr7 & 0x000000ff) == 0) {
                 /*
                  * all GE and LE bits in the dr7 register are zero,
                  * thus the trap couldn't have been caused by the
                  * hardware debug registers
                  */
                 return 0;
         }
 
         nbp = 0;
         dr6 = rdr6();
         bp = dr6 & 0x0000000f;
 
         if (!bp) {
                 /*
                  * None of the breakpoint bits are set meaning this
                  * trap was not caused by any of the debug registers
                  */
                 return 0;
         }
 
         /*
          * at least one of the breakpoints were hit, check to see
          * which ones and if any of them are user space addresses
          */
 
         if (bp & 0x01) {
                 addr[nbp++] = (caddr_t)rdr0();
         }
         if (bp & 0x02) {
                 addr[nbp++] = (caddr_t)rdr1();
         }
         if (bp & 0x04) {
                 addr[nbp++] = (caddr_t)rdr2();
         }
         if (bp & 0x08) {
                 addr[nbp++] = (caddr_t)rdr3();
         }
 
         for (i=0; i<nbp; i++) {
                 if (addr[i] <
                     (caddr_t)VM_MAXUSER_ADDRESS) {
                         /*
                          * addr[i] is in user space
                          */
                         return nbp;
                 }
         }
 
         /*
          * None of the breakpoints are in user space.
          */
         return 0;
 }
 
 
 #ifndef DDB
 void
 Debugger(const char *msg)
 {
 	printf("Debugger(\"%s\") called.\n", msg);
 }
 #endif /* no DDB */
 
 #include <sys/disklabel.h>
 
 /*
  * Determine the size of the transfer, and make sure it is
  * within the boundaries of the partition. Adjust transfer
  * if needed, and signal errors or early completion.
  */
 int
 bounds_check_with_label(struct bio *bp, struct disklabel *lp, int wlabel)
 {
         struct partition *p = lp->d_partitions + dkpart(bp->bio_dev);
         int labelsect = lp->d_partitions[0].p_offset;
         int maxsz = p->p_size,
                 sz = (bp->bio_bcount + DEV_BSIZE - 1) >> DEV_BSHIFT;
 
         /* overwriting disk label ? */
         /* XXX should also protect bootstrap in first 8K */
         if (bp->bio_blkno + p->p_offset <= LABELSECTOR + labelsect &&
 #if LABELSECTOR != 0
             bp->bio_blkno + p->p_offset + sz > LABELSECTOR + labelsect &&
 #endif
             (bp->bio_cmd == BIO_WRITE) && wlabel == 0) {
                 bp->bio_error = EROFS;
                 goto bad;
         }
 
 #if     defined(DOSBBSECTOR) && defined(notyet)
         /* overwriting master boot record? */
         if (bp->bio_blkno + p->p_offset <= DOSBBSECTOR &&
             (bp->bio_cmd == BIO_WRITE) && wlabel == 0) {
                 bp->bio_error = EROFS;
                 goto bad;
         }
 #endif
 
         /* beyond partition? */
         if (bp->bio_blkno < 0 || bp->bio_blkno + sz > maxsz) {
                 /* if exactly at end of disk, return an EOF */
                 if (bp->bio_blkno == maxsz) {
                         bp->bio_resid = bp->bio_bcount;
                         return(0);
                 }
                 /* or truncate if part of it fits */
                 sz = maxsz - bp->bio_blkno;
                 if (sz <= 0) {
                         bp->bio_error = EINVAL;
                         goto bad;
                 }
                 bp->bio_bcount = sz << DEV_BSHIFT;
         }
 
         bp->bio_pblkno = bp->bio_blkno + p->p_offset;
         return(1);
 
 bad:
         bp->bio_flags |= BIO_ERROR;
         return(-1);
 }
 
 #ifdef DDB
 
 /*
  * Provide inb() and outb() as functions.  They are normally only
  * available as macros calling inlined functions, thus cannot be
  * called inside DDB.
  *
  * The actual code is stolen from <machine/cpufunc.h>, and de-inlined.
  */
 
 #undef inb
 #undef outb
 
 /* silence compiler warnings */
 u_char inb(u_int);
 void outb(u_int, u_char);
 
 u_char
 inb(u_int port)
 {
 	u_char	data;
 	/*
 	 * We use %%dx and not %1 here because i/o is done at %dx and not at
 	 * %edx, while gcc generates inferior code (movw instead of movl)
 	 * if we tell it to load (u_short) port.
 	 */
 	__asm __volatile("inb %%dx,%0" : "=a" (data) : "d" (port));
 	return (data);
 }
 
 void
 outb(u_int port, u_char data)
 {
 	u_char	al;
 	/*
 	 * Use an unnecessary assignment to help gcc's register allocator.
 	 * This make a large difference for gcc-1.40 and a tiny difference
 	 * for gcc-2.6.0.  For gcc-1.40, al had to be ``asm("ax")'' for
 	 * best results.  gcc-2.6.0 can't handle this.
 	 */
 	al = data;
 	__asm __volatile("outb %0,%%dx" : : "a" (al), "d" (port));
 }
 
 #endif /* DDB */
Index: head/sys/amd64/amd64/mp_machdep.c
===================================================================
--- head/sys/amd64/amd64/mp_machdep.c	(revision 82308)
+++ head/sys/amd64/amd64/mp_machdep.c	(revision 82309)
@@ -1,2440 +1,2442 @@
 /*
  * Copyright (c) 1996, by Steve Passe
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. The name of the developer may NOT be used to endorse or promote products
  *    derived from this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #include "opt_cpu.h"
+#include "opt_upages.h"
 
 #ifdef SMP
 #include <machine/smptests.h>
 #else
 #error
 #endif
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/bus.h>
 #include <sys/cons.h>	/* cngetc() */
 #include <sys/dkstat.h>
 #ifdef GPROF 
 #include <sys/gmon.h>
 #endif
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/memrange.h>
 #include <sys/mutex.h>
 #include <sys/pcpu.h>
 #include <sys/proc.h>
 #include <sys/smp.h>
 #include <sys/sysctl.h>
 #include <sys/user.h>
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/pmap.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_map.h>
 
 #include <machine/apic.h>
 #include <machine/atomic.h>
 #include <machine/cpu.h>
 #include <machine/cpufunc.h>
 #include <machine/mpapic.h>
 #include <machine/psl.h>
 #include <machine/segments.h>
 #include <machine/smptests.h>	/** TEST_DEFAULT_CONFIG, TEST_TEST1 */
 #include <machine/tss.h>
 #include <machine/specialreg.h>
 #include <machine/globaldata.h>
+#include <machine/privatespace.h>
 
 #if defined(APIC_IO)
 #include <machine/md_var.h>		/* setidt() */
 #include <i386/isa/icu.h>		/* IPIs */
 #include <i386/isa/intr_machdep.h>	/* IPIs */
 #endif	/* APIC_IO */
 
 #if defined(TEST_DEFAULT_CONFIG)
 #define MPFPS_MPFB1	TEST_DEFAULT_CONFIG
 #else
 #define MPFPS_MPFB1	mpfps->mpfb1
 #endif  /* TEST_DEFAULT_CONFIG */
 
 #define WARMBOOT_TARGET		0
 #define WARMBOOT_OFF		(KERNBASE + 0x0467)
 #define WARMBOOT_SEG		(KERNBASE + 0x0469)
 
 #ifdef PC98
 #define BIOS_BASE		(0xe8000)
 #define BIOS_SIZE		(0x18000)
 #else
 #define BIOS_BASE		(0xf0000)
 #define BIOS_SIZE		(0x10000)
 #endif
 #define BIOS_COUNT		(BIOS_SIZE/4)
 
 #define CMOS_REG		(0x70)
 #define CMOS_DATA		(0x71)
 #define BIOS_RESET		(0x0f)
 #define BIOS_WARM		(0x0a)
 
 #define PROCENTRY_FLAG_EN	0x01
 #define PROCENTRY_FLAG_BP	0x02
 #define IOAPICENTRY_FLAG_EN	0x01
 
 
 /* MP Floating Pointer Structure */
 typedef struct MPFPS {
 	char    signature[4];
 	void   *pap;
 	u_char  length;
 	u_char  spec_rev;
 	u_char  checksum;
 	u_char  mpfb1;
 	u_char  mpfb2;
 	u_char  mpfb3;
 	u_char  mpfb4;
 	u_char  mpfb5;
 }      *mpfps_t;
 
 /* MP Configuration Table Header */
 typedef struct MPCTH {
 	char    signature[4];
 	u_short base_table_length;
 	u_char  spec_rev;
 	u_char  checksum;
 	u_char  oem_id[8];
 	u_char  product_id[12];
 	void   *oem_table_pointer;
 	u_short oem_table_size;
 	u_short entry_count;
 	void   *apic_address;
 	u_short extended_table_length;
 	u_char  extended_table_checksum;
 	u_char  reserved;
 }      *mpcth_t;
 
 
 typedef struct PROCENTRY {
 	u_char  type;
 	u_char  apic_id;
 	u_char  apic_version;
 	u_char  cpu_flags;
 	u_long  cpu_signature;
 	u_long  feature_flags;
 	u_long  reserved1;
 	u_long  reserved2;
 }      *proc_entry_ptr;
 
 typedef struct BUSENTRY {
 	u_char  type;
 	u_char  bus_id;
 	char    bus_type[6];
 }      *bus_entry_ptr;
 
 typedef struct IOAPICENTRY {
 	u_char  type;
 	u_char  apic_id;
 	u_char  apic_version;
 	u_char  apic_flags;
 	void   *apic_address;
 }      *io_apic_entry_ptr;
 
 typedef struct INTENTRY {
 	u_char  type;
 	u_char  int_type;
 	u_short int_flags;
 	u_char  src_bus_id;
 	u_char  src_bus_irq;
 	u_char  dst_apic_id;
 	u_char  dst_apic_int;
 }      *int_entry_ptr;
 
 /* descriptions of MP basetable entries */
 typedef struct BASETABLE_ENTRY {
 	u_char  type;
 	u_char  length;
 	char    name[16];
 }       basetable_entry;
 
 /*
  * this code MUST be enabled here and in mpboot.s.
  * it follows the very early stages of AP boot by placing values in CMOS ram.
  * it NORMALLY will never be needed and thus the primitive method for enabling.
  *
 #define CHECK_POINTS
  */
 
 #if defined(CHECK_POINTS) && !defined(PC98)
 #define CHECK_READ(A)	 (outb(CMOS_REG, (A)), inb(CMOS_DATA))
 #define CHECK_WRITE(A,D) (outb(CMOS_REG, (A)), outb(CMOS_DATA, (D)))
 
 #define CHECK_INIT(D);				\
 	CHECK_WRITE(0x34, (D));			\
 	CHECK_WRITE(0x35, (D));			\
 	CHECK_WRITE(0x36, (D));			\
 	CHECK_WRITE(0x37, (D));			\
 	CHECK_WRITE(0x38, (D));			\
 	CHECK_WRITE(0x39, (D));
 
 #define CHECK_PRINT(S);				\
 	printf("%s: %d, %d, %d, %d, %d, %d\n",	\
 	   (S),					\
 	   CHECK_READ(0x34),			\
 	   CHECK_READ(0x35),			\
 	   CHECK_READ(0x36),			\
 	   CHECK_READ(0x37),			\
 	   CHECK_READ(0x38),			\
 	   CHECK_READ(0x39));
 
 #else				/* CHECK_POINTS */
 
 #define CHECK_INIT(D)
 #define CHECK_PRINT(S)
 
 #endif				/* CHECK_POINTS */
 
 /*
  * Values to send to the POST hardware.
  */
 #define MP_BOOTADDRESS_POST	0x10
 #define MP_PROBE_POST		0x11
 #define MPTABLE_PASS1_POST	0x12
 
 #define MP_START_POST		0x13
 #define MP_ENABLE_POST		0x14
 #define MPTABLE_PASS2_POST	0x15
 
 #define START_ALL_APS_POST	0x16
 #define INSTALL_AP_TRAMP_POST	0x17
 #define START_AP_POST		0x18
 
 #define MP_ANNOUNCE_POST	0x19
 
 /* used to hold the AP's until we are ready to release them */
 static struct mtx ap_boot_mtx;
 
 /** XXX FIXME: where does this really belong, isa.h/isa.c perhaps? */
 int	current_postcode;
 
 /** XXX FIXME: what system files declare these??? */
 extern struct region_descriptor r_gdt, r_idt;
 
 int	bsp_apic_ready = 0;	/* flags useability of BSP apic */
 int	mp_naps;		/* # of Applications processors */
 int	mp_nbusses;		/* # of busses */
 int	mp_napics;		/* # of IO APICs */
 int	boot_cpu_id;		/* designated BSP */
 vm_offset_t cpu_apic_address;
 vm_offset_t io_apic_address[NAPICID];	/* NAPICID is more than enough */
 extern	int nkpt;
 
 u_int32_t cpu_apic_versions[MAXCPU];
 u_int32_t *io_apic_versions;
 
 #ifdef APIC_INTR_REORDER
 struct {
 	volatile int *location;
 	int bit;
 } apic_isrbit_location[32];
 #endif
 
 struct apic_intmapinfo	int_to_apicintpin[APIC_INTMAPSIZE];
 
 /*
  * APIC ID logical/physical mapping structures.
  * We oversize these to simplify boot-time config.
  */
 int     cpu_num_to_apic_id[NAPICID];
 int     io_num_to_apic_id[NAPICID];
 int     apic_id_to_logical[NAPICID];
 
 
 /* AP uses this during bootstrap.  Do not staticize.  */
 char *bootSTK;
 static int bootAP;
 
 /* Hotwire a 0->4MB V==P mapping */
 extern pt_entry_t *KPTphys;
 
 /* SMP page table page */
 extern pt_entry_t *SMPpt;
 
 struct pcb stoppcbs[MAXCPU];
 
 int invltlb_ok = 0;	/* throttle smp_invltlb() till safe */
 SYSCTL_INT(_machdep, OID_AUTO, invltlb_ok, CTLFLAG_RW, &invltlb_ok, 0, "");
 
 /*
  * Local data and functions.
  */
 
 /* Set to 1 once we're ready to let the APs out of the pen. */
 static volatile int aps_ready = 0;
 
 static int	mp_capable;
 static u_int	boot_address;
 static u_int	base_memory;
 
 static int	picmode;		/* 0: virtual wire mode, 1: PIC mode */
 static mpfps_t	mpfps;
 static int	search_for_sig(u_int32_t target, int count);
 static void	mp_enable(u_int boot_addr);
 
 static void	mptable_pass1(void);
 static int	mptable_pass2(void);
 static void	default_mp_table(int type);
 static void	fix_mp_table(void);
 static void	setup_apic_irq_mapping(void);
 static void	init_locks(void);
 static int	start_all_aps(u_int boot_addr);
 static void	install_ap_tramp(u_int boot_addr);
 static int	start_ap(int logicalCpu, u_int boot_addr);
 void		ap_init(void);
 static int	apic_int_is_bus_type(int intr, int bus_type);
 static void	release_aps(void *dummy);
 
 /*
  * initialize all the SMP locks
  */
 
 /* critical region around IO APIC, apic_imen */
 struct mtx		imen_mtx;
 
 /* lock region used by kernel profiling */
 int	mcount_lock;
 
 #ifdef USE_COMLOCK
 /* locks com (tty) data/hardware accesses: a FASTINTR() */
 struct mtx		com_mtx;
 #endif /* USE_COMLOCK */
 
 static void
 init_locks(void)
 {
 
 #ifdef USE_COMLOCK
 	mtx_init(&com_mtx, "com", MTX_SPIN);
 #endif /* USE_COMLOCK */
 }
 
 /*
  * Calculate usable address in base memory for AP trampoline code.
  */
 u_int
 mp_bootaddress(u_int basemem)
 {
 	POSTCODE(MP_BOOTADDRESS_POST);
 
 	base_memory = basemem * 1024;	/* convert to bytes */
 
 	boot_address = base_memory & ~0xfff;	/* round down to 4k boundary */
 	if ((base_memory - boot_address) < bootMP_size)
 		boot_address -= 4096;	/* not enough, lower by 4k */
 
 	return boot_address;
 }
 
 
 /*
  * Look for an Intel MP spec table (ie, SMP capable hardware).
  */
 void
 i386_mp_probe(void)
 {
 	int     x;
 	u_long  segment;
 	u_int32_t target;
 
 	POSTCODE(MP_PROBE_POST);
 
 	/* see if EBDA exists */
 	if ((segment = (u_long) * (u_short *) (KERNBASE + 0x40e)) != 0) {
 		/* search first 1K of EBDA */
 		target = (u_int32_t) (segment << 4);
 		if ((x = search_for_sig(target, 1024 / 4)) >= 0)
 			goto found;
 	} else {
 		/* last 1K of base memory, effective 'top of base' passed in */
 		target = (u_int32_t) (base_memory - 0x400);
 		if ((x = search_for_sig(target, 1024 / 4)) >= 0)
 			goto found;
 	}
 
 	/* search the BIOS */
 	target = (u_int32_t) BIOS_BASE;
 	if ((x = search_for_sig(target, BIOS_COUNT)) >= 0)
 		goto found;
 
 	/* nothing found */
 	mpfps = (mpfps_t)0;
 	mp_capable = 0;
 	return;
 
 found:
 	/* calculate needed resources */
 	mpfps = (mpfps_t)x;
 	mptable_pass1();
 
 	/* flag fact that we are running multiple processors */
 	mp_capable = 1;
 }
 
 int
 cpu_mp_probe(void)
 {
 	/*
 	 * Record BSP in CPU map
 	 * This is done here so that MBUF init code works correctly.
 	 */
 	all_cpus = 1;
 
 	return (mp_capable);
 }
 
 /*
  * Initialize the SMP hardware and the APIC and start up the AP's.
  */
 void
 cpu_mp_start(void)
 {
 	POSTCODE(MP_START_POST);
 
 	/* look for MP capable motherboard */
 	if (mp_capable)
 		mp_enable(boot_address);
 	else
 		panic("MP hardware not found!");
 
 	cpu_setregs();
 }
 
 
 /*
  * Print various information about the SMP system hardware and setup.
  */
 void
 cpu_mp_announce(void)
 {
 	int     x;
 
 	POSTCODE(MP_ANNOUNCE_POST);
 
 	printf(" cpu0 (BSP): apic id: %2d", CPU_TO_ID(0));
 	printf(", version: 0x%08x", cpu_apic_versions[0]);
 	printf(", at 0x%08x\n", cpu_apic_address);
 	for (x = 1; x <= mp_naps; ++x) {
 		printf(" cpu%d (AP):  apic id: %2d", x, CPU_TO_ID(x));
 		printf(", version: 0x%08x", cpu_apic_versions[x]);
 		printf(", at 0x%08x\n", cpu_apic_address);
 	}
 
 #if defined(APIC_IO)
 	for (x = 0; x < mp_napics; ++x) {
 		printf(" io%d (APIC): apic id: %2d", x, IO_TO_ID(x));
 		printf(", version: 0x%08x", io_apic_versions[x]);
 		printf(", at 0x%08x\n", io_apic_address[x]);
 	}
 #else
 	printf(" Warning: APIC I/O disabled\n");
 #endif	/* APIC_IO */
 }
 
 /*
  * AP cpu's call this to sync up protected mode.
  */
 void
 init_secondary(void)
 {
 	int	gsel_tss;
 	int	x, myid = bootAP;
 
 	gdt_segs[GPRIV_SEL].ssd_base = (int) &SMP_prvspace[myid];
 	gdt_segs[GPROC0_SEL].ssd_base =
 		(int) &SMP_prvspace[myid].globaldata.gd_common_tss;
 	SMP_prvspace[myid].globaldata.gd_prvspace =
 		&SMP_prvspace[myid].globaldata;
 
 	for (x = 0; x < NGDT; x++) {
 		ssdtosd(&gdt_segs[x], &gdt[myid * NGDT + x].sd);
 	}
 
 	r_gdt.rd_limit = NGDT * sizeof(gdt[0]) - 1;
 	r_gdt.rd_base = (int) &gdt[myid * NGDT];
 	lgdt(&r_gdt);			/* does magic intra-segment return */
 
 	lidt(&r_idt);
 
 	lldt(_default_ldt);
 	PCPU_SET(currentldt, _default_ldt);
 
 	gsel_tss = GSEL(GPROC0_SEL, SEL_KPL);
 	gdt[myid * NGDT + GPROC0_SEL].sd.sd_type = SDT_SYS386TSS;
 	PCPU_SET(common_tss.tss_esp0, 0); /* not used until after switch */
 	PCPU_SET(common_tss.tss_ss0, GSEL(GDATA_SEL, SEL_KPL));
 	PCPU_SET(common_tss.tss_ioopt, (sizeof (struct i386tss)) << 16);
 	PCPU_SET(tss_gdt, &gdt[myid * NGDT + GPROC0_SEL].sd);
 	PCPU_SET(common_tssd, *PCPU_GET(tss_gdt));
 	ltr(gsel_tss);
 
 	pmap_set_opt();
 }
 
 
 #if defined(APIC_IO)
 /*
  * Final configuration of the BSP's local APIC:
  *  - disable 'pic mode'.
  *  - disable 'virtual wire mode'.
  *  - enable NMI.
  */
 void
 bsp_apic_configure(void)
 {
 	u_char		byte;
 	u_int32_t	temp;
 
 	/* leave 'pic mode' if necessary */
 	if (picmode) {
 		outb(0x22, 0x70);	/* select IMCR */
 		byte = inb(0x23);	/* current contents */
 		byte |= 0x01;		/* mask external INTR */
 		outb(0x23, byte);	/* disconnect 8259s/NMI */
 	}
 
 	/* mask lint0 (the 8259 'virtual wire' connection) */
 	temp = lapic.lvt_lint0;
 	temp |= APIC_LVT_M;		/* set the mask */
 	lapic.lvt_lint0 = temp;
 
         /* setup lint1 to handle NMI */
         temp = lapic.lvt_lint1;
         temp &= ~APIC_LVT_M;		/* clear the mask */
         lapic.lvt_lint1 = temp;
 
 	if (bootverbose)
 		apic_dump("bsp_apic_configure()");
 }
 #endif  /* APIC_IO */
 
 
 /*******************************************************************
  * local functions and data
  */
 
 /*
  * start the SMP system
  */
 static void
 mp_enable(u_int boot_addr)
 {
 	int     x;
 #if defined(APIC_IO)
 	int     apic;
 	u_int   ux;
 #endif	/* APIC_IO */
 
 	POSTCODE(MP_ENABLE_POST);
 
 	/* turn on 4MB of V == P addressing so we can get to MP table */
 	*(int *)PTD = PG_V | PG_RW | ((uintptr_t)(void *)KPTphys & PG_FRAME);
 	invltlb();
 
 	/* examine the MP table for needed info, uses physical addresses */
 	x = mptable_pass2();
 
 	*(int *)PTD = 0;
 	invltlb();
 
 	/* can't process default configs till the CPU APIC is pmapped */
 	if (x)
 		default_mp_table(x);
 
 	/* post scan cleanup */
 	fix_mp_table();
 	setup_apic_irq_mapping();
 
 #if defined(APIC_IO)
 
 	/* fill the LOGICAL io_apic_versions table */
 	for (apic = 0; apic < mp_napics; ++apic) {
 		ux = io_apic_read(apic, IOAPIC_VER);
 		io_apic_versions[apic] = ux;
 		io_apic_set_id(apic, IO_TO_ID(apic));
 	}
 
 	/* program each IO APIC in the system */
 	for (apic = 0; apic < mp_napics; ++apic)
 		if (io_apic_setup(apic) < 0)
 			panic("IO APIC setup failure");
 
 	/* install a 'Spurious INTerrupt' vector */
 	setidt(XSPURIOUSINT_OFFSET, Xspuriousint,
 	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
 
 	/* install an inter-CPU IPI for TLB invalidation */
 	setidt(XINVLTLB_OFFSET, Xinvltlb,
 	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
 
 	/* install an inter-CPU IPI for forwarding hardclock() */
 	setidt(XHARDCLOCK_OFFSET, Xhardclock,
 	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
 	
 	/* install an inter-CPU IPI for forwarding statclock() */
 	setidt(XSTATCLOCK_OFFSET, Xstatclock,
 	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
 	
 	/* install an inter-CPU IPI for all-CPU rendezvous */
 	setidt(XRENDEZVOUS_OFFSET, Xrendezvous,
 	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
 
 	/* install an inter-CPU IPI for forcing an additional software trap */
 	setidt(XCPUAST_OFFSET, Xcpuast,
 	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
 
 	/* install an inter-CPU IPI for CPU stop/restart */
 	setidt(XCPUSTOP_OFFSET, Xcpustop,
 	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
 
 #if defined(TEST_TEST1)
 	/* install a "fake hardware INTerrupt" vector */
 	setidt(XTEST1_OFFSET, Xtest1,
 	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
 #endif  /** TEST_TEST1 */
 
 #endif	/* APIC_IO */
 
 	/* initialize all SMP locks */
 	init_locks();
 
 	/* start each Application Processor */
 	start_all_aps(boot_addr);
 }
 
 
 /*
  * look for the MP spec signature
  */
 
 /* string defined by the Intel MP Spec as identifying the MP table */
 #define MP_SIG		0x5f504d5f	/* _MP_ */
 #define NEXT(X)		((X) += 4)
 static int
 search_for_sig(u_int32_t target, int count)
 {
 	int     x;
 	u_int32_t *addr = (u_int32_t *) (KERNBASE + target);
 
 	for (x = 0; x < count; NEXT(x))
 		if (addr[x] == MP_SIG)
 			/* make array index a byte index */
 			return (target + (x * sizeof(u_int32_t)));
 
 	return -1;
 }
 
 
 static basetable_entry basetable_entry_types[] =
 {
 	{0, 20, "Processor"},
 	{1, 8, "Bus"},
 	{2, 8, "I/O APIC"},
 	{3, 8, "I/O INT"},
 	{4, 8, "Local INT"}
 };
 
 typedef struct BUSDATA {
 	u_char  bus_id;
 	enum busTypes bus_type;
 }       bus_datum;
 
 typedef struct INTDATA {
 	u_char  int_type;
 	u_short int_flags;
 	u_char  src_bus_id;
 	u_char  src_bus_irq;
 	u_char  dst_apic_id;
 	u_char  dst_apic_int;
 	u_char	int_vector;
 }       io_int, local_int;
 
 typedef struct BUSTYPENAME {
 	u_char  type;
 	char    name[7];
 }       bus_type_name;
 
 static bus_type_name bus_type_table[] =
 {
 	{CBUS, "CBUS"},
 	{CBUSII, "CBUSII"},
 	{EISA, "EISA"},
 	{MCA, "MCA"},
 	{UNKNOWN_BUSTYPE, "---"},
 	{ISA, "ISA"},
 	{MCA, "MCA"},
 	{UNKNOWN_BUSTYPE, "---"},
 	{UNKNOWN_BUSTYPE, "---"},
 	{UNKNOWN_BUSTYPE, "---"},
 	{UNKNOWN_BUSTYPE, "---"},
 	{UNKNOWN_BUSTYPE, "---"},
 	{PCI, "PCI"},
 	{UNKNOWN_BUSTYPE, "---"},
 	{UNKNOWN_BUSTYPE, "---"},
 	{UNKNOWN_BUSTYPE, "---"},
 	{UNKNOWN_BUSTYPE, "---"},
 	{XPRESS, "XPRESS"},
 	{UNKNOWN_BUSTYPE, "---"}
 };
 /* from MP spec v1.4, table 5-1 */
 static int default_data[7][5] =
 {
 /*   nbus, id0, type0, id1, type1 */
 	{1, 0, ISA, 255, 255},
 	{1, 0, EISA, 255, 255},
 	{1, 0, EISA, 255, 255},
 	{1, 0, MCA, 255, 255},
 	{2, 0, ISA, 1, PCI},
 	{2, 0, EISA, 1, PCI},
 	{2, 0, MCA, 1, PCI}
 };
 
 
 /* the bus data */
 static bus_datum *bus_data;
 
 /* the IO INT data, one entry per possible APIC INTerrupt */
 static io_int  *io_apic_ints;
 
 static int nintrs;
 
 static int processor_entry	__P((proc_entry_ptr entry, int cpu));
 static int bus_entry		__P((bus_entry_ptr entry, int bus));
 static int io_apic_entry	__P((io_apic_entry_ptr entry, int apic));
 static int int_entry		__P((int_entry_ptr entry, int intr));
 static int lookup_bus_type	__P((char *name));
 
 
 /*
  * 1st pass on motherboard's Intel MP specification table.
  *
  * initializes:
  *	mp_ncpus = 1
  *
  * determines:
  *	cpu_apic_address (common to all CPUs)
  *	io_apic_address[N]
  *	mp_naps
  *	mp_nbusses
  *	mp_napics
  *	nintrs
  */
 static void
 mptable_pass1(void)
 {
 	int	x;
 	mpcth_t	cth;
 	int	totalSize;
 	void*	position;
 	int	count;
 	int	type;
 
 	POSTCODE(MPTABLE_PASS1_POST);
 
 	/* clear various tables */
 	for (x = 0; x < NAPICID; ++x) {
 		io_apic_address[x] = ~0;	/* IO APIC address table */
 	}
 
 	/* init everything to empty */
 	mp_naps = 0;
 	mp_nbusses = 0;
 	mp_napics = 0;
 	nintrs = 0;
 
 	/* check for use of 'default' configuration */
 	if (MPFPS_MPFB1 != 0) {
 		/* use default addresses */
 		cpu_apic_address = DEFAULT_APIC_BASE;
 		io_apic_address[0] = DEFAULT_IO_APIC_BASE;
 
 		/* fill in with defaults */
 		mp_naps = 2;		/* includes BSP */
 		mp_nbusses = default_data[MPFPS_MPFB1 - 1][0];
 #if defined(APIC_IO)
 		mp_napics = 1;
 		nintrs = 16;
 #endif	/* APIC_IO */
 	}
 	else {
 		if ((cth = mpfps->pap) == 0)
 			panic("MP Configuration Table Header MISSING!");
 
 		cpu_apic_address = (vm_offset_t) cth->apic_address;
 
 		/* walk the table, recording info of interest */
 		totalSize = cth->base_table_length - sizeof(struct MPCTH);
 		position = (u_char *) cth + sizeof(struct MPCTH);
 		count = cth->entry_count;
 
 		while (count--) {
 			switch (type = *(u_char *) position) {
 			case 0: /* processor_entry */
 				if (((proc_entry_ptr)position)->cpu_flags
 					& PROCENTRY_FLAG_EN)
 					++mp_naps;
 				break;
 			case 1: /* bus_entry */
 				++mp_nbusses;
 				break;
 			case 2: /* io_apic_entry */
 				if (((io_apic_entry_ptr)position)->apic_flags
 					& IOAPICENTRY_FLAG_EN)
 					io_apic_address[mp_napics++] =
 					    (vm_offset_t)((io_apic_entry_ptr)
 						position)->apic_address;
 				break;
 			case 3: /* int_entry */
 				++nintrs;
 				break;
 			case 4:	/* int_entry */
 				break;
 			default:
 				panic("mpfps Base Table HOSED!");
 				/* NOTREACHED */
 			}
 
 			totalSize -= basetable_entry_types[type].length;
 			(u_char*)position += basetable_entry_types[type].length;
 		}
 	}
 
 	/* qualify the numbers */
 	if (mp_naps > MAXCPU) {
 		printf("Warning: only using %d of %d available CPUs!\n",
 			MAXCPU, mp_naps);
 		mp_naps = MAXCPU;
 	}
 
 	/*
 	 * Count the BSP.
 	 * This is also used as a counter while starting the APs.
 	 */
 	mp_ncpus = 1;
 
 	--mp_naps;	/* subtract the BSP */
 }
 
 
 /*
  * 2nd pass on motherboard's Intel MP specification table.
  *
  * sets:
  *	boot_cpu_id
  *	ID_TO_IO(N), phy APIC ID to log CPU/IO table
  *	CPU_TO_ID(N), logical CPU to APIC ID table
  *	IO_TO_ID(N), logical IO to APIC ID table
  *	bus_data[N]
  *	io_apic_ints[N]
  */
 static int
 mptable_pass2(void)
 {
 	int     x;
 	mpcth_t cth;
 	int     totalSize;
 	void*   position;
 	int     count;
 	int     type;
 	int     apic, bus, cpu, intr;
 	int	i, j;
 	int	pgeflag;
 
 	POSTCODE(MPTABLE_PASS2_POST);
 
 	pgeflag = 0;		/* XXX - Not used under SMP yet.  */
 
 	MALLOC(io_apic_versions, u_int32_t *, sizeof(u_int32_t) * mp_napics,
 	    M_DEVBUF, M_WAITOK);
 	MALLOC(ioapic, volatile ioapic_t **, sizeof(ioapic_t *) * mp_napics,
 	    M_DEVBUF, M_WAITOK);
 	MALLOC(io_apic_ints, io_int *, sizeof(io_int) * (nintrs + 1),
 	    M_DEVBUF, M_WAITOK);
 	MALLOC(bus_data, bus_datum *, sizeof(bus_datum) * mp_nbusses,
 	    M_DEVBUF, M_WAITOK);
 
 	bzero(ioapic, sizeof(ioapic_t *) * mp_napics);
 
 	for (i = 0; i < mp_napics; i++) {
 		for (j = 0; j < mp_napics; j++) {
 			/* same page frame as a previous IO apic? */
 			if (((vm_offset_t)SMPpt[NPTEPG-2-j] & PG_FRAME) ==
 			    (io_apic_address[i] & PG_FRAME)) {
 				ioapic[i] = (ioapic_t *)((u_int)SMP_prvspace
 					+ (NPTEPG-2-j) * PAGE_SIZE
 					+ (io_apic_address[i] & PAGE_MASK));
 				break;
 			}
 			/* use this slot if available */
 			if (((vm_offset_t)SMPpt[NPTEPG-2-j] & PG_FRAME) == 0) {
 				SMPpt[NPTEPG-2-j] = (pt_entry_t)(PG_V | PG_RW |
 				    pgeflag | (io_apic_address[i] & PG_FRAME));
 				ioapic[i] = (ioapic_t *)((u_int)SMP_prvspace
 					+ (NPTEPG-2-j) * PAGE_SIZE
 					+ (io_apic_address[i] & PAGE_MASK));
 				break;
 			}
 		}
 	}
 
 	/* clear various tables */
 	for (x = 0; x < NAPICID; ++x) {
 		ID_TO_IO(x) = -1;	/* phy APIC ID to log CPU/IO table */
 		CPU_TO_ID(x) = -1;	/* logical CPU to APIC ID table */
 		IO_TO_ID(x) = -1;	/* logical IO to APIC ID table */
 	}
 
 	/* clear bus data table */
 	for (x = 0; x < mp_nbusses; ++x)
 		bus_data[x].bus_id = 0xff;
 
 	/* clear IO APIC INT table */
 	for (x = 0; x < (nintrs + 1); ++x) {
 		io_apic_ints[x].int_type = 0xff;
 		io_apic_ints[x].int_vector = 0xff;
 	}
 
 	/* setup the cpu/apic mapping arrays */
 	boot_cpu_id = -1;
 
 	/* record whether PIC or virtual-wire mode */
 	picmode = (mpfps->mpfb2 & 0x80) ? 1 : 0;
 
 	/* check for use of 'default' configuration */
 	if (MPFPS_MPFB1 != 0)
 		return MPFPS_MPFB1;	/* return default configuration type */
 
 	if ((cth = mpfps->pap) == 0)
 		panic("MP Configuration Table Header MISSING!");
 
 	/* walk the table, recording info of interest */
 	totalSize = cth->base_table_length - sizeof(struct MPCTH);
 	position = (u_char *) cth + sizeof(struct MPCTH);
 	count = cth->entry_count;
 	apic = bus = intr = 0;
 	cpu = 1;				/* pre-count the BSP */
 
 	while (count--) {
 		switch (type = *(u_char *) position) {
 		case 0:
 			if (processor_entry(position, cpu))
 				++cpu;
 			break;
 		case 1:
 			if (bus_entry(position, bus))
 				++bus;
 			break;
 		case 2:
 			if (io_apic_entry(position, apic))
 				++apic;
 			break;
 		case 3:
 			if (int_entry(position, intr))
 				++intr;
 			break;
 		case 4:
 			/* int_entry(position); */
 			break;
 		default:
 			panic("mpfps Base Table HOSED!");
 			/* NOTREACHED */
 		}
 
 		totalSize -= basetable_entry_types[type].length;
 		(u_char *) position += basetable_entry_types[type].length;
 	}
 
 	if (boot_cpu_id == -1)
 		panic("NO BSP found!");
 
 	/* report fact that its NOT a default configuration */
 	return 0;
 }
 
 
 void
 assign_apic_irq(int apic, int intpin, int irq)
 {
 	int x;
 	
 	if (int_to_apicintpin[irq].ioapic != -1)
 		panic("assign_apic_irq: inconsistent table");
 	
 	int_to_apicintpin[irq].ioapic = apic;
 	int_to_apicintpin[irq].int_pin = intpin;
 	int_to_apicintpin[irq].apic_address = ioapic[apic];
 	int_to_apicintpin[irq].redirindex = IOAPIC_REDTBL + 2 * intpin;
 	
 	for (x = 0; x < nintrs; x++) {
 		if ((io_apic_ints[x].int_type == 0 || 
 		     io_apic_ints[x].int_type == 3) &&
 		    io_apic_ints[x].int_vector == 0xff &&
 		    io_apic_ints[x].dst_apic_id == IO_TO_ID(apic) &&
 		    io_apic_ints[x].dst_apic_int == intpin)
 			io_apic_ints[x].int_vector = irq;
 	}
 }
 
 void
 revoke_apic_irq(int irq)
 {
 	int x;
 	int oldapic;
 	int oldintpin;
 	
 	if (int_to_apicintpin[irq].ioapic == -1)
 		panic("assign_apic_irq: inconsistent table");
 	
 	oldapic = int_to_apicintpin[irq].ioapic;
 	oldintpin = int_to_apicintpin[irq].int_pin;
 
 	int_to_apicintpin[irq].ioapic = -1;
 	int_to_apicintpin[irq].int_pin = 0;
 	int_to_apicintpin[irq].apic_address = NULL;
 	int_to_apicintpin[irq].redirindex = 0;
 	
 	for (x = 0; x < nintrs; x++) {
 		if ((io_apic_ints[x].int_type == 0 || 
 		     io_apic_ints[x].int_type == 3) &&
 		    io_apic_ints[x].int_vector == 0xff &&
 		    io_apic_ints[x].dst_apic_id == IO_TO_ID(oldapic) &&
 		    io_apic_ints[x].dst_apic_int == oldintpin)
 			io_apic_ints[x].int_vector = 0xff;
 	}
 }
 
 
 static void
 allocate_apic_irq(int intr)
 {
 	int apic;
 	int intpin;
 	int irq;
 	
 	if (io_apic_ints[intr].int_vector != 0xff)
 		return;		/* Interrupt handler already assigned */
 	
 	if (io_apic_ints[intr].int_type != 0 &&
 	    (io_apic_ints[intr].int_type != 3 ||
 	     (io_apic_ints[intr].dst_apic_id == IO_TO_ID(0) &&
 	      io_apic_ints[intr].dst_apic_int == 0)))
 		return;		/* Not INT or ExtInt on != (0, 0) */
 	
 	irq = 0;
 	while (irq < APIC_INTMAPSIZE &&
 	       int_to_apicintpin[irq].ioapic != -1)
 		irq++;
 	
 	if (irq >= APIC_INTMAPSIZE)
 		return;		/* No free interrupt handlers */
 	
 	apic = ID_TO_IO(io_apic_ints[intr].dst_apic_id);
 	intpin = io_apic_ints[intr].dst_apic_int;
 	
 	assign_apic_irq(apic, intpin, irq);
 	io_apic_setup_intpin(apic, intpin);
 }
 
 
 static void
 swap_apic_id(int apic, int oldid, int newid)
 {
 	int x;
 	int oapic;
 	
 
 	if (oldid == newid)
 		return;			/* Nothing to do */
 	
 	printf("Changing APIC ID for IO APIC #%d from %d to %d in MP table\n",
 	       apic, oldid, newid);
 	
 	/* Swap physical APIC IDs in interrupt entries */
 	for (x = 0; x < nintrs; x++) {
 		if (io_apic_ints[x].dst_apic_id == oldid)
 			io_apic_ints[x].dst_apic_id = newid;
 		else if (io_apic_ints[x].dst_apic_id == newid)
 			io_apic_ints[x].dst_apic_id = oldid;
 	}
 	
 	/* Swap physical APIC IDs in IO_TO_ID mappings */
 	for (oapic = 0; oapic < mp_napics; oapic++)
 		if (IO_TO_ID(oapic) == newid)
 			break;
 	
 	if (oapic < mp_napics) {
 		printf("Changing APIC ID for IO APIC #%d from "
 		       "%d to %d in MP table\n",
 		       oapic, newid, oldid);
 		IO_TO_ID(oapic) = oldid;
 	}
 	IO_TO_ID(apic) = newid;
 }
 
 
 static void
 fix_id_to_io_mapping(void)
 {
 	int x;
 
 	for (x = 0; x < NAPICID; x++)
 		ID_TO_IO(x) = -1;
 	
 	for (x = 0; x <= mp_naps; x++)
 		if (CPU_TO_ID(x) < NAPICID)
 			ID_TO_IO(CPU_TO_ID(x)) = x;
 	
 	for (x = 0; x < mp_napics; x++)
 		if (IO_TO_ID(x) < NAPICID)
 			ID_TO_IO(IO_TO_ID(x)) = x;
 }
 
 
 static int
 first_free_apic_id(void)
 {
 	int freeid, x;
 	
 	for (freeid = 0; freeid < NAPICID; freeid++) {
 		for (x = 0; x <= mp_naps; x++)
 			if (CPU_TO_ID(x) == freeid)
 				break;
 		if (x <= mp_naps)
 			continue;
 		for (x = 0; x < mp_napics; x++)
 			if (IO_TO_ID(x) == freeid)
 				break;
 		if (x < mp_napics)
 			continue;
 		return freeid;
 	}
 	return freeid;
 }
 
 
 static int
 io_apic_id_acceptable(int apic, int id)
 {
 	int cpu;		/* Logical CPU number */
 	int oapic;		/* Logical IO APIC number for other IO APIC */
 
 	if (id >= NAPICID)
 		return 0;	/* Out of range */
 	
 	for (cpu = 0; cpu <= mp_naps; cpu++)
 		if (CPU_TO_ID(cpu) == id)
 			return 0;	/* Conflict with CPU */
 	
 	for (oapic = 0; oapic < mp_napics && oapic < apic; oapic++)
 		if (IO_TO_ID(oapic) == id)
 			return 0;	/* Conflict with other APIC */
 	
 	return 1;		/* ID is acceptable for IO APIC */
 }
 
 
 /*
  * parse an Intel MP specification table
  */
 static void
 fix_mp_table(void)
 {
 	int	x;
 	int	id;
 	int	bus_0 = 0;	/* Stop GCC warning */
 	int	bus_pci = 0;	/* Stop GCC warning */
 	int	num_pci_bus;
 	int	apic;		/* IO APIC unit number */
 	int     freeid;		/* Free physical APIC ID */
 	int	physid;		/* Current physical IO APIC ID */
 
 	/*
 	 * Fix mis-numbering of the PCI bus and its INT entries if the BIOS
 	 * did it wrong.  The MP spec says that when more than 1 PCI bus
 	 * exists the BIOS must begin with bus entries for the PCI bus and use
 	 * actual PCI bus numbering.  This implies that when only 1 PCI bus
 	 * exists the BIOS can choose to ignore this ordering, and indeed many
 	 * MP motherboards do ignore it.  This causes a problem when the PCI
 	 * sub-system makes requests of the MP sub-system based on PCI bus
 	 * numbers.	So here we look for the situation and renumber the
 	 * busses and associated INTs in an effort to "make it right".
 	 */
 
 	/* find bus 0, PCI bus, count the number of PCI busses */
 	for (num_pci_bus = 0, x = 0; x < mp_nbusses; ++x) {
 		if (bus_data[x].bus_id == 0) {
 			bus_0 = x;
 		}
 		if (bus_data[x].bus_type == PCI) {
 			++num_pci_bus;
 			bus_pci = x;
 		}
 	}
 	/*
 	 * bus_0 == slot of bus with ID of 0
 	 * bus_pci == slot of last PCI bus encountered
 	 */
 
 	/* check the 1 PCI bus case for sanity */
 	/* if it is number 0 all is well */
 	if (num_pci_bus == 1 &&
 	    bus_data[bus_pci].bus_id != 0) {
 		
 		/* mis-numbered, swap with whichever bus uses slot 0 */
 
 		/* swap the bus entry types */
 		bus_data[bus_pci].bus_type = bus_data[bus_0].bus_type;
 		bus_data[bus_0].bus_type = PCI;
 
 		/* swap each relavant INTerrupt entry */
 		id = bus_data[bus_pci].bus_id;
 		for (x = 0; x < nintrs; ++x) {
 			if (io_apic_ints[x].src_bus_id == id) {
 				io_apic_ints[x].src_bus_id = 0;
 			}
 			else if (io_apic_ints[x].src_bus_id == 0) {
 				io_apic_ints[x].src_bus_id = id;
 			}
 		}
 	}
 
 	/* Assign IO APIC IDs.
 	 * 
 	 * First try the existing ID. If a conflict is detected, try
 	 * the ID in the MP table.  If a conflict is still detected, find
 	 * a free id.
 	 *
 	 * We cannot use the ID_TO_IO table before all conflicts has been
 	 * resolved and the table has been corrected.
 	 */
 	for (apic = 0; apic < mp_napics; ++apic) { /* For all IO APICs */
 		
 		/* First try to use the value set by the BIOS */
 		physid = io_apic_get_id(apic);
 		if (io_apic_id_acceptable(apic, physid)) {
 			if (IO_TO_ID(apic) != physid)
 				swap_apic_id(apic, IO_TO_ID(apic), physid);
 			continue;
 		}
 
 		/* Then check if the value in the MP table is acceptable */
 		if (io_apic_id_acceptable(apic, IO_TO_ID(apic)))
 			continue;
 
 		/* Last resort, find a free APIC ID and use it */
 		freeid = first_free_apic_id();
 		if (freeid >= NAPICID)
 			panic("No free physical APIC IDs found");
 		
 		if (io_apic_id_acceptable(apic, freeid)) {
 			swap_apic_id(apic, IO_TO_ID(apic), freeid);
 			continue;
 		}
 		panic("Free physical APIC ID not usable");
 	}
 	fix_id_to_io_mapping();
 
 	/* detect and fix broken Compaq MP table */
 	if (apic_int_type(0, 0) == -1) {
 		printf("APIC_IO: MP table broken: 8259->APIC entry missing!\n");
 		io_apic_ints[nintrs].int_type = 3;	/* ExtInt */
 		io_apic_ints[nintrs].int_vector = 0xff;	/* Unassigned */
 		/* XXX fixme, set src bus id etc, but it doesn't seem to hurt */
 		io_apic_ints[nintrs].dst_apic_id = IO_TO_ID(0);
 		io_apic_ints[nintrs].dst_apic_int = 0;	/* Pin 0 */
 		nintrs++;
 	}
 }
 
 
 /* Assign low level interrupt handlers */
 static void
 setup_apic_irq_mapping(void)
 {
 	int	x;
 	int	int_vector;
 
 	/* Clear array */
 	for (x = 0; x < APIC_INTMAPSIZE; x++) {
 		int_to_apicintpin[x].ioapic = -1;
 		int_to_apicintpin[x].int_pin = 0;
 		int_to_apicintpin[x].apic_address = NULL;
 		int_to_apicintpin[x].redirindex = 0;
 	}
 
 	/* First assign ISA/EISA interrupts */
 	for (x = 0; x < nintrs; x++) {
 		int_vector = io_apic_ints[x].src_bus_irq;
 		if (int_vector < APIC_INTMAPSIZE &&
 		    io_apic_ints[x].int_vector == 0xff && 
 		    int_to_apicintpin[int_vector].ioapic == -1 &&
 		    (apic_int_is_bus_type(x, ISA) ||
 		     apic_int_is_bus_type(x, EISA)) &&
 		    io_apic_ints[x].int_type == 0) {
 			assign_apic_irq(ID_TO_IO(io_apic_ints[x].dst_apic_id), 
 					io_apic_ints[x].dst_apic_int,
 					int_vector);
 		}
 	}
 
 	/* Assign ExtInt entry if no ISA/EISA interrupt 0 entry */
 	for (x = 0; x < nintrs; x++) {
 		if (io_apic_ints[x].dst_apic_int == 0 &&
 		    io_apic_ints[x].dst_apic_id == IO_TO_ID(0) &&
 		    io_apic_ints[x].int_vector == 0xff && 
 		    int_to_apicintpin[0].ioapic == -1 &&
 		    io_apic_ints[x].int_type == 3) {
 			assign_apic_irq(0, 0, 0);
 			break;
 		}
 	}
 	/* PCI interrupt assignment is deferred */
 }
 
 
 static int
 processor_entry(proc_entry_ptr entry, int cpu)
 {
 	/* check for usability */
 	if (!(entry->cpu_flags & PROCENTRY_FLAG_EN))
 		return 0;
 
 	if(entry->apic_id >= NAPICID)
 		panic("CPU APIC ID out of range (0..%d)", NAPICID - 1);
 	/* check for BSP flag */
 	if (entry->cpu_flags & PROCENTRY_FLAG_BP) {
 		boot_cpu_id = entry->apic_id;
 		CPU_TO_ID(0) = entry->apic_id;
 		ID_TO_CPU(entry->apic_id) = 0;
 		return 0;	/* its already been counted */
 	}
 
 	/* add another AP to list, if less than max number of CPUs */
 	else if (cpu < MAXCPU) {
 		CPU_TO_ID(cpu) = entry->apic_id;
 		ID_TO_CPU(entry->apic_id) = cpu;
 		return 1;
 	}
 
 	return 0;
 }
 
 
 static int
 bus_entry(bus_entry_ptr entry, int bus)
 {
 	int     x;
 	char    c, name[8];
 
 	/* encode the name into an index */
 	for (x = 0; x < 6; ++x) {
 		if ((c = entry->bus_type[x]) == ' ')
 			break;
 		name[x] = c;
 	}
 	name[x] = '\0';
 
 	if ((x = lookup_bus_type(name)) == UNKNOWN_BUSTYPE)
 		panic("unknown bus type: '%s'", name);
 
 	bus_data[bus].bus_id = entry->bus_id;
 	bus_data[bus].bus_type = x;
 
 	return 1;
 }
 
 
 static int
 io_apic_entry(io_apic_entry_ptr entry, int apic)
 {
 	if (!(entry->apic_flags & IOAPICENTRY_FLAG_EN))
 		return 0;
 
 	IO_TO_ID(apic) = entry->apic_id;
 	if (entry->apic_id < NAPICID)
 		ID_TO_IO(entry->apic_id) = apic;
 
 	return 1;
 }
 
 
 static int
 lookup_bus_type(char *name)
 {
 	int     x;
 
 	for (x = 0; x < MAX_BUSTYPE; ++x)
 		if (strcmp(bus_type_table[x].name, name) == 0)
 			return bus_type_table[x].type;
 
 	return UNKNOWN_BUSTYPE;
 }
 
 
 static int
 int_entry(int_entry_ptr entry, int intr)
 {
 	int apic;
 
 	io_apic_ints[intr].int_type = entry->int_type;
 	io_apic_ints[intr].int_flags = entry->int_flags;
 	io_apic_ints[intr].src_bus_id = entry->src_bus_id;
 	io_apic_ints[intr].src_bus_irq = entry->src_bus_irq;
 	if (entry->dst_apic_id == 255) {
 		/* This signal goes to all IO APICS.  Select an IO APIC
 		   with sufficient number of interrupt pins */
 		for (apic = 0; apic < mp_napics; apic++)
 			if (((io_apic_read(apic, IOAPIC_VER) & 
 			      IOART_VER_MAXREDIR) >> MAXREDIRSHIFT) >= 
 			    entry->dst_apic_int)
 				break;
 		if (apic < mp_napics)
 			io_apic_ints[intr].dst_apic_id = IO_TO_ID(apic);
 		else
 			io_apic_ints[intr].dst_apic_id = entry->dst_apic_id;
 	} else
 		io_apic_ints[intr].dst_apic_id = entry->dst_apic_id;
 	io_apic_ints[intr].dst_apic_int = entry->dst_apic_int;
 
 	return 1;
 }
 
 
 static int
 apic_int_is_bus_type(int intr, int bus_type)
 {
 	int     bus;
 
 	for (bus = 0; bus < mp_nbusses; ++bus)
 		if ((bus_data[bus].bus_id == io_apic_ints[intr].src_bus_id)
 		    && ((int) bus_data[bus].bus_type == bus_type))
 			return 1;
 
 	return 0;
 }
 
 
 /*
  * Given a traditional ISA INT mask, return an APIC mask.
  */
 u_int
 isa_apic_mask(u_int isa_mask)
 {
 	int isa_irq;
 	int apic_pin;
 
 #if defined(SKIP_IRQ15_REDIRECT)
 	if (isa_mask == (1 << 15)) {
 		printf("skipping ISA IRQ15 redirect\n");
 		return isa_mask;
 	}
 #endif  /* SKIP_IRQ15_REDIRECT */
 
 	isa_irq = ffs(isa_mask);		/* find its bit position */
 	if (isa_irq == 0)			/* doesn't exist */
 		return 0;
 	--isa_irq;				/* make it zero based */
 
 	apic_pin = isa_apic_irq(isa_irq);	/* look for APIC connection */
 	if (apic_pin == -1)
 		return 0;
 
 	return (1 << apic_pin);			/* convert pin# to a mask */
 }
 
 
 /*
  * Determine which APIC pin an ISA/EISA INT is attached to.
  */
 #define INTTYPE(I)	(io_apic_ints[(I)].int_type)
 #define INTPIN(I)	(io_apic_ints[(I)].dst_apic_int)
 #define INTIRQ(I)	(io_apic_ints[(I)].int_vector)
 #define INTAPIC(I)	(ID_TO_IO(io_apic_ints[(I)].dst_apic_id))
 
 #define SRCBUSIRQ(I)	(io_apic_ints[(I)].src_bus_irq)
 int
 isa_apic_irq(int isa_irq)
 {
 	int     intr;
 
 	for (intr = 0; intr < nintrs; ++intr) {		/* check each record */
 		if (INTTYPE(intr) == 0) {		/* standard INT */
 			if (SRCBUSIRQ(intr) == isa_irq) {
 				if (apic_int_is_bus_type(intr, ISA) ||
 			            apic_int_is_bus_type(intr, EISA)) {
 					if (INTIRQ(intr) == 0xff)
 						return -1; /* unassigned */
 					return INTIRQ(intr);	/* found */
 				}
 			}
 		}
 	}
 	return -1;					/* NOT found */
 }
 
 
 /*
  * Determine which APIC pin a PCI INT is attached to.
  */
 #define SRCBUSID(I)	(io_apic_ints[(I)].src_bus_id)
 #define SRCBUSDEVICE(I)	((io_apic_ints[(I)].src_bus_irq >> 2) & 0x1f)
 #define SRCBUSLINE(I)	(io_apic_ints[(I)].src_bus_irq & 0x03)
 int
 pci_apic_irq(int pciBus, int pciDevice, int pciInt)
 {
 	int     intr;
 
 	--pciInt;					/* zero based */
 
 	for (intr = 0; intr < nintrs; ++intr)		/* check each record */
 		if ((INTTYPE(intr) == 0)		/* standard INT */
 		    && (SRCBUSID(intr) == pciBus)
 		    && (SRCBUSDEVICE(intr) == pciDevice)
 		    && (SRCBUSLINE(intr) == pciInt))	/* a candidate IRQ */
 			if (apic_int_is_bus_type(intr, PCI)) {
 				if (INTIRQ(intr) == 0xff)
 					allocate_apic_irq(intr);
 				if (INTIRQ(intr) == 0xff)
 					return -1;	/* unassigned */
 				return INTIRQ(intr);	/* exact match */
 			}
 
 	return -1;					/* NOT found */
 }
 
 int
 next_apic_irq(int irq) 
 {
 	int intr, ointr;
 	int bus, bustype;
 
 	bus = 0;
 	bustype = 0;
 	for (intr = 0; intr < nintrs; intr++) {
 		if (INTIRQ(intr) != irq || INTTYPE(intr) != 0)
 			continue;
 		bus = SRCBUSID(intr);
 		bustype = apic_bus_type(bus);
 		if (bustype != ISA &&
 		    bustype != EISA &&
 		    bustype != PCI)
 			continue;
 		break;
 	}
 	if (intr >= nintrs) {
 		return -1;
 	}
 	for (ointr = intr + 1; ointr < nintrs; ointr++) {
 		if (INTTYPE(ointr) != 0)
 			continue;
 		if (bus != SRCBUSID(ointr))
 			continue;
 		if (bustype == PCI) {
 			if (SRCBUSDEVICE(intr) != SRCBUSDEVICE(ointr))
 				continue;
 			if (SRCBUSLINE(intr) != SRCBUSLINE(ointr))
 				continue;
 		}
 		if (bustype == ISA || bustype == EISA) {
 			if (SRCBUSIRQ(intr) != SRCBUSIRQ(ointr))
 				continue;
 		}
 		if (INTPIN(intr) == INTPIN(ointr))
 			continue;
 		break;
 	}
 	if (ointr >= nintrs) {
 		return -1;
 	}
 	return INTIRQ(ointr);
 }
 #undef SRCBUSLINE
 #undef SRCBUSDEVICE
 #undef SRCBUSID
 #undef SRCBUSIRQ
 
 #undef INTPIN
 #undef INTIRQ
 #undef INTAPIC
 #undef INTTYPE
 
 
 /*
  * Reprogram the MB chipset to NOT redirect an ISA INTerrupt.
  *
  * XXX FIXME:
  *  Exactly what this means is unclear at this point.  It is a solution
  *  for motherboards that redirect the MBIRQ0 pin.  Generically a motherboard
  *  could route any of the ISA INTs to upper (>15) IRQ values.  But most would
  *  NOT be redirected via MBIRQ0, thus "undirect()ing" them would NOT be an
  *  option.
  */
 int
 undirect_isa_irq(int rirq)
 {
 #if defined(READY)
 	if (bootverbose)
 	    printf("Freeing redirected ISA irq %d.\n", rirq);
 	/** FIXME: tickle the MB redirector chip */
 	return -1;
 #else
 	if (bootverbose)
 	    printf("Freeing (NOT implemented) redirected ISA irq %d.\n", rirq);
 	return 0;
 #endif  /* READY */
 }
 
 
 /*
  * Reprogram the MB chipset to NOT redirect a PCI INTerrupt
  */
 int
 undirect_pci_irq(int rirq)
 {
 #if defined(READY)
 	if (bootverbose)
 		printf("Freeing redirected PCI irq %d.\n", rirq);
 
 	/** FIXME: tickle the MB redirector chip */
 	return -1;
 #else
 	if (bootverbose)
 		printf("Freeing (NOT implemented) redirected PCI irq %d.\n",
 		       rirq);
 	return 0;
 #endif  /* READY */
 }
 
 
 /*
  * given a bus ID, return:
  *  the bus type if found
  *  -1 if NOT found
  */
 int
 apic_bus_type(int id)
 {
 	int     x;
 
 	for (x = 0; x < mp_nbusses; ++x)
 		if (bus_data[x].bus_id == id)
 			return bus_data[x].bus_type;
 
 	return -1;
 }
 
 
 /*
  * given a LOGICAL APIC# and pin#, return:
  *  the associated src bus ID if found
  *  -1 if NOT found
  */
 int
 apic_src_bus_id(int apic, int pin)
 {
 	int     x;
 
 	/* search each of the possible INTerrupt sources */
 	for (x = 0; x < nintrs; ++x)
 		if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
 		    (pin == io_apic_ints[x].dst_apic_int))
 			return (io_apic_ints[x].src_bus_id);
 
 	return -1;		/* NOT found */
 }
 
 
 /*
  * given a LOGICAL APIC# and pin#, return:
  *  the associated src bus IRQ if found
  *  -1 if NOT found
  */
 int
 apic_src_bus_irq(int apic, int pin)
 {
 	int     x;
 
 	for (x = 0; x < nintrs; x++)
 		if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
 		    (pin == io_apic_ints[x].dst_apic_int))
 			return (io_apic_ints[x].src_bus_irq);
 
 	return -1;		/* NOT found */
 }
 
 
 /*
  * given a LOGICAL APIC# and pin#, return:
  *  the associated INTerrupt type if found
  *  -1 if NOT found
  */
 int
 apic_int_type(int apic, int pin)
 {
 	int     x;
 
 	/* search each of the possible INTerrupt sources */
 	for (x = 0; x < nintrs; ++x)
 		if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
 		    (pin == io_apic_ints[x].dst_apic_int))
 			return (io_apic_ints[x].int_type);
 
 	return -1;		/* NOT found */
 }
 
 int 
 apic_irq(int apic, int pin)
 {
 	int x;
 	int res;
 
 	for (x = 0; x < nintrs; ++x)
 		if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
 		    (pin == io_apic_ints[x].dst_apic_int)) {
 			res = io_apic_ints[x].int_vector;
 			if (res == 0xff)
 				return -1;
 			if (apic != int_to_apicintpin[res].ioapic)
 				panic("apic_irq: inconsistent table");
 			if (pin != int_to_apicintpin[res].int_pin)
 				panic("apic_irq inconsistent table (2)");
 			return res;
 		}
 	return -1;
 }
 
 
 /*
  * given a LOGICAL APIC# and pin#, return:
  *  the associated trigger mode if found
  *  -1 if NOT found
  */
 int
 apic_trigger(int apic, int pin)
 {
 	int     x;
 
 	/* search each of the possible INTerrupt sources */
 	for (x = 0; x < nintrs; ++x)
 		if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
 		    (pin == io_apic_ints[x].dst_apic_int))
 			return ((io_apic_ints[x].int_flags >> 2) & 0x03);
 
 	return -1;		/* NOT found */
 }
 
 
 /*
  * given a LOGICAL APIC# and pin#, return:
  *  the associated 'active' level if found
  *  -1 if NOT found
  */
 int
 apic_polarity(int apic, int pin)
 {
 	int     x;
 
 	/* search each of the possible INTerrupt sources */
 	for (x = 0; x < nintrs; ++x)
 		if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
 		    (pin == io_apic_ints[x].dst_apic_int))
 			return (io_apic_ints[x].int_flags & 0x03);
 
 	return -1;		/* NOT found */
 }
 
 
 /*
  * set data according to MP defaults
  * FIXME: probably not complete yet...
  */
 static void
 default_mp_table(int type)
 {
 	int     ap_cpu_id;
 #if defined(APIC_IO)
 	int     io_apic_id;
 	int     pin;
 #endif	/* APIC_IO */
 
 #if 0
 	printf("  MP default config type: %d\n", type);
 	switch (type) {
 	case 1:
 		printf("   bus: ISA, APIC: 82489DX\n");
 		break;
 	case 2:
 		printf("   bus: EISA, APIC: 82489DX\n");
 		break;
 	case 3:
 		printf("   bus: EISA, APIC: 82489DX\n");
 		break;
 	case 4:
 		printf("   bus: MCA, APIC: 82489DX\n");
 		break;
 	case 5:
 		printf("   bus: ISA+PCI, APIC: Integrated\n");
 		break;
 	case 6:
 		printf("   bus: EISA+PCI, APIC: Integrated\n");
 		break;
 	case 7:
 		printf("   bus: MCA+PCI, APIC: Integrated\n");
 		break;
 	default:
 		printf("   future type\n");
 		break;
 		/* NOTREACHED */
 	}
 #endif	/* 0 */
 
 	boot_cpu_id = (lapic.id & APIC_ID_MASK) >> 24;
 	ap_cpu_id = (boot_cpu_id == 0) ? 1 : 0;
 
 	/* BSP */
 	CPU_TO_ID(0) = boot_cpu_id;
 	ID_TO_CPU(boot_cpu_id) = 0;
 
 	/* one and only AP */
 	CPU_TO_ID(1) = ap_cpu_id;
 	ID_TO_CPU(ap_cpu_id) = 1;
 
 #if defined(APIC_IO)
 	/* one and only IO APIC */
 	io_apic_id = (io_apic_read(0, IOAPIC_ID) & APIC_ID_MASK) >> 24;
 
 	/*
 	 * sanity check, refer to MP spec section 3.6.6, last paragraph
 	 * necessary as some hardware isn't properly setting up the IO APIC
 	 */
 #if defined(REALLY_ANAL_IOAPICID_VALUE)
 	if (io_apic_id != 2) {
 #else
 	if ((io_apic_id == 0) || (io_apic_id == 1) || (io_apic_id == 15)) {
 #endif	/* REALLY_ANAL_IOAPICID_VALUE */
 		io_apic_set_id(0, 2);
 		io_apic_id = 2;
 	}
 	IO_TO_ID(0) = io_apic_id;
 	ID_TO_IO(io_apic_id) = 0;
 #endif	/* APIC_IO */
 
 	/* fill out bus entries */
 	switch (type) {
 	case 1:
 	case 2:
 	case 3:
 	case 4:
 	case 5:
 	case 6:
 	case 7:
 		bus_data[0].bus_id = default_data[type - 1][1];
 		bus_data[0].bus_type = default_data[type - 1][2];
 		bus_data[1].bus_id = default_data[type - 1][3];
 		bus_data[1].bus_type = default_data[type - 1][4];
 		break;
 
 	/* case 4: case 7:		   MCA NOT supported */
 	default:		/* illegal/reserved */
 		panic("BAD default MP config: %d", type);
 		/* NOTREACHED */
 	}
 
 #if defined(APIC_IO)
 	/* general cases from MP v1.4, table 5-2 */
 	for (pin = 0; pin < 16; ++pin) {
 		io_apic_ints[pin].int_type = 0;
 		io_apic_ints[pin].int_flags = 0x05;	/* edge/active-hi */
 		io_apic_ints[pin].src_bus_id = 0;
 		io_apic_ints[pin].src_bus_irq = pin;	/* IRQ2 caught below */
 		io_apic_ints[pin].dst_apic_id = io_apic_id;
 		io_apic_ints[pin].dst_apic_int = pin;	/* 1-to-1 */
 	}
 
 	/* special cases from MP v1.4, table 5-2 */
 	if (type == 2) {
 		io_apic_ints[2].int_type = 0xff;	/* N/C */
 		io_apic_ints[13].int_type = 0xff;	/* N/C */
 #if !defined(APIC_MIXED_MODE)
 		/** FIXME: ??? */
 		panic("sorry, can't support type 2 default yet");
 #endif	/* APIC_MIXED_MODE */
 	}
 	else
 		io_apic_ints[2].src_bus_irq = 0;	/* ISA IRQ0 is on APIC INT 2 */
 
 	if (type == 7)
 		io_apic_ints[0].int_type = 0xff;	/* N/C */
 	else
 		io_apic_ints[0].int_type = 3;	/* vectored 8259 */
 #endif	/* APIC_IO */
 }
 
 
 /*
  * start each AP in our list
  */
 static int
 start_all_aps(u_int boot_addr)
 {
 	int     x, i, pg;
 	u_char  mpbiosreason;
 	u_long  mpbioswarmvec;
 	struct globaldata *gd;
 	char *stack;
 	uintptr_t kptbase;
 
 	POSTCODE(START_ALL_APS_POST);
 
 	mtx_init(&ap_boot_mtx, "ap boot", MTX_SPIN);
 
 	/* initialize BSP's local APIC */
 	apic_initialize();
 	bsp_apic_ready = 1;
 
 	/* install the AP 1st level boot code */
 	install_ap_tramp(boot_addr);
 
 
 	/* save the current value of the warm-start vector */
 	mpbioswarmvec = *((u_long *) WARMBOOT_OFF);
 #ifndef PC98
 	outb(CMOS_REG, BIOS_RESET);
 	mpbiosreason = inb(CMOS_DATA);
 #endif
 
 	/* set up temporary P==V mapping for AP boot */
 	/* XXX this is a hack, we should boot the AP on its own stack/PTD */
 	kptbase = (uintptr_t)(void *)KPTphys;
 	for (x = 0; x < NKPT; x++)
 		PTD[x] = (pd_entry_t)(PG_V | PG_RW |
 		    ((kptbase + x * PAGE_SIZE) & PG_FRAME));
 	invltlb();
 
 	/* start each AP */
 	for (x = 1; x <= mp_naps; ++x) {
 
 		/* This is a bit verbose, it will go away soon.  */
 
 		/* first page of AP's private space */
 		pg = x * i386_btop(sizeof(struct privatespace));
 
 		/* allocate a new private data page */
 		gd = (struct globaldata *)kmem_alloc(kernel_map, PAGE_SIZE);
 
 		/* wire it into the private page table page */
 		SMPpt[pg] = (pt_entry_t)(PG_V | PG_RW | vtophys(gd));
 
 		/* allocate and set up an idle stack data page */
 		stack = (char *)kmem_alloc(kernel_map, UPAGES*PAGE_SIZE);
 		for (i = 0; i < UPAGES; i++)
 			SMPpt[pg + 1 + i] = (pt_entry_t)
 			    (PG_V | PG_RW | vtophys(PAGE_SIZE * i + stack));
 
 		/* prime data page for it to use */
 		gd->gd_cpuid = x;
 		globaldata_register(gd);
 
 		/* setup a vector to our boot code */
 		*((volatile u_short *) WARMBOOT_OFF) = WARMBOOT_TARGET;
 		*((volatile u_short *) WARMBOOT_SEG) = (boot_addr >> 4);
 #ifndef PC98
 		outb(CMOS_REG, BIOS_RESET);
 		outb(CMOS_DATA, BIOS_WARM);	/* 'warm-start' */
 #endif
 
 		bootSTK = &SMP_prvspace[x].idlestack[UPAGES*PAGE_SIZE];
 		bootAP = x;
 
 		/* attempt to start the Application Processor */
 		CHECK_INIT(99);	/* setup checkpoints */
 		if (!start_ap(x, boot_addr)) {
 			printf("AP #%d (PHY# %d) failed!\n", x, CPU_TO_ID(x));
 			CHECK_PRINT("trace");	/* show checkpoints */
 			/* better panic as the AP may be running loose */
 			printf("panic y/n? [y] ");
 			if (cngetc() != 'n')
 				panic("bye-bye");
 		}
 		CHECK_PRINT("trace");		/* show checkpoints */
 
 		/* record its version info */
 		cpu_apic_versions[x] = cpu_apic_versions[0];
 
 		all_cpus |= (1 << x);		/* record AP in CPU map */
 	}
 
 	/* build our map of 'other' CPUs */
 	PCPU_SET(other_cpus, all_cpus & ~(1 << PCPU_GET(cpuid)));
 
 	/* fill in our (BSP) APIC version */
 	cpu_apic_versions[0] = lapic.version;
 
 	/* restore the warmstart vector */
 	*(u_long *) WARMBOOT_OFF = mpbioswarmvec;
 #ifndef PC98
 	outb(CMOS_REG, BIOS_RESET);
 	outb(CMOS_DATA, mpbiosreason);
 #endif
 
 	/*
 	 * Set up the idle context for the BSP.  Similar to above except
 	 * that some was done by locore, some by pmap.c and some is implicit
 	 * because the BSP is cpu#0 and the page is initially zero, and also
 	 * because we can refer to variables by name on the BSP..
 	 */
 
 	/* Allocate and setup BSP idle stack */
 	stack = (char *)kmem_alloc(kernel_map, UPAGES * PAGE_SIZE);
 	for (i = 0; i < UPAGES; i++)
 		SMPpt[1 + i] = (pt_entry_t)
 		    (PG_V | PG_RW | vtophys(PAGE_SIZE * i + stack));
 
 	for (x = 0; x < NKPT; x++)
 		PTD[x] = 0;
 	pmap_set_opt();
 
 	/* number of APs actually started */
 	return mp_ncpus - 1;
 }
 
 
 /*
  * load the 1st level AP boot code into base memory.
  */
 
 /* targets for relocation */
 extern void bigJump(void);
 extern void bootCodeSeg(void);
 extern void bootDataSeg(void);
 extern void MPentry(void);
 extern u_int MP_GDT;
 extern u_int mp_gdtbase;
 
 static void
 install_ap_tramp(u_int boot_addr)
 {
 	int     x;
 	int     size = *(int *) ((u_long) & bootMP_size);
 	u_char *src = (u_char *) ((u_long) bootMP);
 	u_char *dst = (u_char *) boot_addr + KERNBASE;
 	u_int   boot_base = (u_int) bootMP;
 	u_int8_t *dst8;
 	u_int16_t *dst16;
 	u_int32_t *dst32;
 
 	POSTCODE(INSTALL_AP_TRAMP_POST);
 
 	for (x = 0; x < size; ++x)
 		*dst++ = *src++;
 
 	/*
 	 * modify addresses in code we just moved to basemem. unfortunately we
 	 * need fairly detailed info about mpboot.s for this to work.  changes
 	 * to mpboot.s might require changes here.
 	 */
 
 	/* boot code is located in KERNEL space */
 	dst = (u_char *) boot_addr + KERNBASE;
 
 	/* modify the lgdt arg */
 	dst32 = (u_int32_t *) (dst + ((u_int) & mp_gdtbase - boot_base));
 	*dst32 = boot_addr + ((u_int) & MP_GDT - boot_base);
 
 	/* modify the ljmp target for MPentry() */
 	dst32 = (u_int32_t *) (dst + ((u_int) bigJump - boot_base) + 1);
 	*dst32 = ((u_int) MPentry - KERNBASE);
 
 	/* modify the target for boot code segment */
 	dst16 = (u_int16_t *) (dst + ((u_int) bootCodeSeg - boot_base));
 	dst8 = (u_int8_t *) (dst16 + 1);
 	*dst16 = (u_int) boot_addr & 0xffff;
 	*dst8 = ((u_int) boot_addr >> 16) & 0xff;
 
 	/* modify the target for boot data segment */
 	dst16 = (u_int16_t *) (dst + ((u_int) bootDataSeg - boot_base));
 	dst8 = (u_int8_t *) (dst16 + 1);
 	*dst16 = (u_int) boot_addr & 0xffff;
 	*dst8 = ((u_int) boot_addr >> 16) & 0xff;
 }
 
 
 /*
  * this function starts the AP (application processor) identified
  * by the APIC ID 'physicalCpu'.  It does quite a "song and dance"
  * to accomplish this.  This is necessary because of the nuances
  * of the different hardware we might encounter.  It ain't pretty,
  * but it seems to work.
  */
 static int
 start_ap(int logical_cpu, u_int boot_addr)
 {
 	int     physical_cpu;
 	int     vector;
 	int     cpus;
 	u_long  icr_lo, icr_hi;
 
 	POSTCODE(START_AP_POST);
 
 	/* get the PHYSICAL APIC ID# */
 	physical_cpu = CPU_TO_ID(logical_cpu);
 
 	/* calculate the vector */
 	vector = (boot_addr >> 12) & 0xff;
 
 	/* used as a watchpoint to signal AP startup */
 	cpus = mp_ncpus;
 
 	/*
 	 * first we do an INIT/RESET IPI this INIT IPI might be run, reseting
 	 * and running the target CPU. OR this INIT IPI might be latched (P5
 	 * bug), CPU waiting for STARTUP IPI. OR this INIT IPI might be
 	 * ignored.
 	 */
 
 	/* setup the address for the target AP */
 	icr_hi = lapic.icr_hi & ~APIC_ID_MASK;
 	icr_hi |= (physical_cpu << 24);
 	lapic.icr_hi = icr_hi;
 
 	/* do an INIT IPI: assert RESET */
 	icr_lo = lapic.icr_lo & 0xfff00000;
 	lapic.icr_lo = icr_lo | 0x0000c500;
 
 	/* wait for pending status end */
 	while (lapic.icr_lo & APIC_DELSTAT_MASK)
 		 /* spin */ ;
 
 	/* do an INIT IPI: deassert RESET */
 	lapic.icr_lo = icr_lo | 0x00008500;
 
 	/* wait for pending status end */
 	u_sleep(10000);		/* wait ~10mS */
 	while (lapic.icr_lo & APIC_DELSTAT_MASK)
 		 /* spin */ ;
 
 	/*
 	 * next we do a STARTUP IPI: the previous INIT IPI might still be
 	 * latched, (P5 bug) this 1st STARTUP would then terminate
 	 * immediately, and the previously started INIT IPI would continue. OR
 	 * the previous INIT IPI has already run. and this STARTUP IPI will
 	 * run. OR the previous INIT IPI was ignored. and this STARTUP IPI
 	 * will run.
 	 */
 
 	/* do a STARTUP IPI */
 	lapic.icr_lo = icr_lo | 0x00000600 | vector;
 	while (lapic.icr_lo & APIC_DELSTAT_MASK)
 		 /* spin */ ;
 	u_sleep(200);		/* wait ~200uS */
 
 	/*
 	 * finally we do a 2nd STARTUP IPI: this 2nd STARTUP IPI should run IF
 	 * the previous STARTUP IPI was cancelled by a latched INIT IPI. OR
 	 * this STARTUP IPI will be ignored, as only ONE STARTUP IPI is
 	 * recognized after hardware RESET or INIT IPI.
 	 */
 
 	lapic.icr_lo = icr_lo | 0x00000600 | vector;
 	while (lapic.icr_lo & APIC_DELSTAT_MASK)
 		 /* spin */ ;
 	u_sleep(200);		/* wait ~200uS */
 
 	/* wait for it to start */
 	set_apic_timer(5000000);/* == 5 seconds */
 	while (read_apic_timer())
 		if (mp_ncpus > cpus)
 			return 1;	/* return SUCCESS */
 
 	return 0;		/* return FAILURE */
 }
 
 /*
  * Flush the TLB on all other CPU's
  *
  * XXX: Needs to handshake and wait for completion before proceding.
  */
 void
 smp_invltlb(void)
 {
 #if defined(APIC_IO)
 	if (smp_started && invltlb_ok)
 		ipi_all_but_self(IPI_INVLTLB);
 #endif  /* APIC_IO */
 }
 
 void
 invlpg(u_int addr)
 {
 	__asm   __volatile("invlpg (%0)"::"r"(addr):"memory");
 
 	/* send a message to the other CPUs */
 	smp_invltlb();
 }
 
 void
 invltlb(void)
 {
 	u_long  temp;
 
 	/*
 	 * This should be implemented as load_cr3(rcr3()) when load_cr3() is
 	 * inlined.
 	 */
 	__asm __volatile("movl %%cr3, %0; movl %0, %%cr3":"=r"(temp) :: "memory");
 
 	/* send a message to the other CPUs */
 	smp_invltlb();
 }
 
 
 /*
  * This is called once the rest of the system is up and running and we're
  * ready to let the AP's out of the pen.
  */
 extern void	enable_sse(void);
 
 void
 ap_init(void)
 {
 	u_int	apic_id;
 
 	/* spin until all the AP's are ready */
 	while (!aps_ready)
 		/* spin */ ;
 
 	/*
 	 * Set curproc to our per-cpu idleproc so that mutexes have
 	 * something unique to lock with.
 	 */
 	PCPU_SET(curproc, PCPU_GET(idleproc));
 	PCPU_SET(spinlocks, NULL);
 
 	/* lock against other AP's that are waking up */
 	mtx_lock_spin(&ap_boot_mtx);
 
 	/* BSP may have changed PTD while we're waiting for the lock */
 	cpu_invltlb();
 
 	smp_cpus++;
 
 #if defined(I586_CPU) && !defined(NO_F00F_HACK)
 	lidt(&r_idt);
 #endif
 
 	/* Build our map of 'other' CPUs. */
 	PCPU_SET(other_cpus, all_cpus & ~(1 << PCPU_GET(cpuid)));
 
 	printf("SMP: AP CPU #%d Launched!\n", PCPU_GET(cpuid));
 
 	/* set up CPU registers and state */
 	cpu_setregs();
 
 	/* set up FPU state on the AP */
 	npxinit(__INITIAL_NPXCW__);
 
 	/* set up SSE registers */
 	enable_sse();
 
 	/* A quick check from sanity claus */
 	apic_id = (apic_id_to_logical[(lapic.id & 0x0f000000) >> 24]);
 	if (PCPU_GET(cpuid) != apic_id) {
 		printf("SMP: cpuid = %d\n", PCPU_GET(cpuid));
 		printf("SMP: apic_id = %d\n", apic_id);
 		printf("PTD[MPPTDI] = %p\n", (void *)PTD[MPPTDI]);
 		panic("cpuid mismatch! boom!!");
 	}
 
 	/* Init local apic for irq's */
 	apic_initialize();
 
 	/* Set memory range attributes for this CPU to match the BSP */
 	mem_range_AP_init();
 
 	/*
 	 * Activate smp_invltlb, although strictly speaking, this isn't
 	 * quite correct yet.  We should have a bitfield for cpus willing
 	 * to accept TLB flush IPI's or something and sync them.
 	 */
 	if (smp_cpus == mp_ncpus) {
 		invltlb_ok = 1;
 		smp_started = 1; /* enable IPI's, tlb shootdown, freezes etc */
 		smp_active = 1;	 /* historic */
 	}
 
 	/* let other AP's wake up now */
 	mtx_unlock_spin(&ap_boot_mtx);
 
 	/* wait until all the AP's are up */
 	while (smp_started == 0)
 		; /* nothing */
 
 	microuptime(PCPU_PTR(switchtime));
 	PCPU_SET(switchticks, ticks);
 
 	/* ok, now grab sched_lock and enter the scheduler */
 	enable_intr();
 	mtx_lock_spin(&sched_lock);
 	cpu_throw();	/* doesn't return */
 
 	panic("scheduler returned us to ap_init");
 }
 
 /*
  * For statclock, we send an IPI to all CPU's to have them call this
  * function.
  */
 void
 forwarded_statclock(struct trapframe frame)
 {
 
 	mtx_lock_spin(&sched_lock);
 	statclock_process(curproc, TRAPF_PC(&frame), TRAPF_USERMODE(&frame));
 	mtx_unlock_spin(&sched_lock);
 }
 
 void
 forward_statclock(void)
 {
 	int map;
 
 	CTR0(KTR_SMP, "forward_statclock");
 
 	if (!smp_started || !invltlb_ok || cold || panicstr)
 		return;
 
 	map = PCPU_GET(other_cpus) & ~stopped_cpus ;
 	if (map != 0)
 		ipi_selected(map, IPI_STATCLOCK);
 }
 
 /*
  * For each hardclock(), we send an IPI to all other CPU's to have them
  * execute this function.  It would be nice to reduce contention on
  * sched_lock if we could simply peek at the CPU to determine the user/kernel
  * state and call hardclock_process() on the CPU receiving the clock interrupt
  * and then just use a simple IPI to handle any ast's if needed.
  */
 void
 forwarded_hardclock(struct trapframe frame)
 {
 
 	mtx_lock_spin(&sched_lock);
 	hardclock_process(curproc, TRAPF_USERMODE(&frame));
 	mtx_unlock_spin(&sched_lock);
 }
 
 void 
 forward_hardclock(void)
 {
 	u_int map;
 
 	CTR0(KTR_SMP, "forward_hardclock");
 
 	if (!smp_started || !invltlb_ok || cold || panicstr)
 		return;
 
 	map = PCPU_GET(other_cpus) & ~stopped_cpus ;
 	if (map != 0)
 		ipi_selected(map, IPI_HARDCLOCK);
 }
 
 #ifdef APIC_INTR_REORDER
 /*
  *	Maintain mapping from softintr vector to isr bit in local apic.
  */
 void
 set_lapic_isrloc(int intr, int vector)
 {
 	if (intr < 0 || intr > 32)
 		panic("set_apic_isrloc: bad intr argument: %d",intr);
 	if (vector < ICU_OFFSET || vector > 255)
 		panic("set_apic_isrloc: bad vector argument: %d",vector);
 	apic_isrbit_location[intr].location = &lapic.isr0 + ((vector>>5)<<2);
 	apic_isrbit_location[intr].bit = (1<<(vector & 31));
 }
 #endif
 
 /*
  * send an IPI to a set of cpus.
  */
 void
 ipi_selected(u_int32_t cpus, u_int ipi)
 {
 
 	CTR2(KTR_SMP, __func__ ": cpus: %x ipi: %x", cpus, ipi);
 	selected_apic_ipi(cpus, ipi, APIC_DELMODE_FIXED);
 }
 
 /*
  * send an IPI INTerrupt containing 'vector' to all CPUs, including myself
  */
 void
 ipi_all(u_int ipi)
 {
 
 	CTR1(KTR_SMP, __func__ ": ipi: %x", ipi);
 	apic_ipi(APIC_DEST_ALLISELF, ipi, APIC_DELMODE_FIXED); 
 }
 
 /*
  * send an IPI to all CPUs EXCEPT myself
  */
 void
 ipi_all_but_self(u_int ipi)
 {
 
 	CTR1(KTR_SMP, __func__ ": ipi: %x", ipi);
 	apic_ipi(APIC_DEST_ALLESELF, ipi, APIC_DELMODE_FIXED); 
 }
 
 /*
  * send an IPI to myself
  */
 void
 ipi_self(u_int ipi)
 {
 
 	CTR1(KTR_SMP, __func__ ": ipi: %x", ipi);
 	apic_ipi(APIC_DEST_SELF, ipi, APIC_DELMODE_FIXED); 
 }
 
 void
 release_aps(void *dummy __unused)
 {
 	atomic_store_rel_int(&aps_ready, 1);
 }
 
 SYSINIT(start_aps, SI_SUB_SMP, SI_ORDER_FIRST, release_aps, NULL);
Index: head/sys/amd64/amd64/mptable.c
===================================================================
--- head/sys/amd64/amd64/mptable.c	(revision 82308)
+++ head/sys/amd64/amd64/mptable.c	(revision 82309)
@@ -1,2440 +1,2442 @@
 /*
  * Copyright (c) 1996, by Steve Passe
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. The name of the developer may NOT be used to endorse or promote products
  *    derived from this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #include "opt_cpu.h"
+#include "opt_upages.h"
 
 #ifdef SMP
 #include <machine/smptests.h>
 #else
 #error
 #endif
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/bus.h>
 #include <sys/cons.h>	/* cngetc() */
 #include <sys/dkstat.h>
 #ifdef GPROF 
 #include <sys/gmon.h>
 #endif
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/memrange.h>
 #include <sys/mutex.h>
 #include <sys/pcpu.h>
 #include <sys/proc.h>
 #include <sys/smp.h>
 #include <sys/sysctl.h>
 #include <sys/user.h>
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/pmap.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_map.h>
 
 #include <machine/apic.h>
 #include <machine/atomic.h>
 #include <machine/cpu.h>
 #include <machine/cpufunc.h>
 #include <machine/mpapic.h>
 #include <machine/psl.h>
 #include <machine/segments.h>
 #include <machine/smptests.h>	/** TEST_DEFAULT_CONFIG, TEST_TEST1 */
 #include <machine/tss.h>
 #include <machine/specialreg.h>
 #include <machine/globaldata.h>
+#include <machine/privatespace.h>
 
 #if defined(APIC_IO)
 #include <machine/md_var.h>		/* setidt() */
 #include <i386/isa/icu.h>		/* IPIs */
 #include <i386/isa/intr_machdep.h>	/* IPIs */
 #endif	/* APIC_IO */
 
 #if defined(TEST_DEFAULT_CONFIG)
 #define MPFPS_MPFB1	TEST_DEFAULT_CONFIG
 #else
 #define MPFPS_MPFB1	mpfps->mpfb1
 #endif  /* TEST_DEFAULT_CONFIG */
 
 #define WARMBOOT_TARGET		0
 #define WARMBOOT_OFF		(KERNBASE + 0x0467)
 #define WARMBOOT_SEG		(KERNBASE + 0x0469)
 
 #ifdef PC98
 #define BIOS_BASE		(0xe8000)
 #define BIOS_SIZE		(0x18000)
 #else
 #define BIOS_BASE		(0xf0000)
 #define BIOS_SIZE		(0x10000)
 #endif
 #define BIOS_COUNT		(BIOS_SIZE/4)
 
 #define CMOS_REG		(0x70)
 #define CMOS_DATA		(0x71)
 #define BIOS_RESET		(0x0f)
 #define BIOS_WARM		(0x0a)
 
 #define PROCENTRY_FLAG_EN	0x01
 #define PROCENTRY_FLAG_BP	0x02
 #define IOAPICENTRY_FLAG_EN	0x01
 
 
 /* MP Floating Pointer Structure */
 typedef struct MPFPS {
 	char    signature[4];
 	void   *pap;
 	u_char  length;
 	u_char  spec_rev;
 	u_char  checksum;
 	u_char  mpfb1;
 	u_char  mpfb2;
 	u_char  mpfb3;
 	u_char  mpfb4;
 	u_char  mpfb5;
 }      *mpfps_t;
 
 /* MP Configuration Table Header */
 typedef struct MPCTH {
 	char    signature[4];
 	u_short base_table_length;
 	u_char  spec_rev;
 	u_char  checksum;
 	u_char  oem_id[8];
 	u_char  product_id[12];
 	void   *oem_table_pointer;
 	u_short oem_table_size;
 	u_short entry_count;
 	void   *apic_address;
 	u_short extended_table_length;
 	u_char  extended_table_checksum;
 	u_char  reserved;
 }      *mpcth_t;
 
 
 typedef struct PROCENTRY {
 	u_char  type;
 	u_char  apic_id;
 	u_char  apic_version;
 	u_char  cpu_flags;
 	u_long  cpu_signature;
 	u_long  feature_flags;
 	u_long  reserved1;
 	u_long  reserved2;
 }      *proc_entry_ptr;
 
 typedef struct BUSENTRY {
 	u_char  type;
 	u_char  bus_id;
 	char    bus_type[6];
 }      *bus_entry_ptr;
 
 typedef struct IOAPICENTRY {
 	u_char  type;
 	u_char  apic_id;
 	u_char  apic_version;
 	u_char  apic_flags;
 	void   *apic_address;
 }      *io_apic_entry_ptr;
 
 typedef struct INTENTRY {
 	u_char  type;
 	u_char  int_type;
 	u_short int_flags;
 	u_char  src_bus_id;
 	u_char  src_bus_irq;
 	u_char  dst_apic_id;
 	u_char  dst_apic_int;
 }      *int_entry_ptr;
 
 /* descriptions of MP basetable entries */
 typedef struct BASETABLE_ENTRY {
 	u_char  type;
 	u_char  length;
 	char    name[16];
 }       basetable_entry;
 
 /*
  * this code MUST be enabled here and in mpboot.s.
  * it follows the very early stages of AP boot by placing values in CMOS ram.
  * it NORMALLY will never be needed and thus the primitive method for enabling.
  *
 #define CHECK_POINTS
  */
 
 #if defined(CHECK_POINTS) && !defined(PC98)
 #define CHECK_READ(A)	 (outb(CMOS_REG, (A)), inb(CMOS_DATA))
 #define CHECK_WRITE(A,D) (outb(CMOS_REG, (A)), outb(CMOS_DATA, (D)))
 
 #define CHECK_INIT(D);				\
 	CHECK_WRITE(0x34, (D));			\
 	CHECK_WRITE(0x35, (D));			\
 	CHECK_WRITE(0x36, (D));			\
 	CHECK_WRITE(0x37, (D));			\
 	CHECK_WRITE(0x38, (D));			\
 	CHECK_WRITE(0x39, (D));
 
 #define CHECK_PRINT(S);				\
 	printf("%s: %d, %d, %d, %d, %d, %d\n",	\
 	   (S),					\
 	   CHECK_READ(0x34),			\
 	   CHECK_READ(0x35),			\
 	   CHECK_READ(0x36),			\
 	   CHECK_READ(0x37),			\
 	   CHECK_READ(0x38),			\
 	   CHECK_READ(0x39));
 
 #else				/* CHECK_POINTS */
 
 #define CHECK_INIT(D)
 #define CHECK_PRINT(S)
 
 #endif				/* CHECK_POINTS */
 
 /*
  * Values to send to the POST hardware.
  */
 #define MP_BOOTADDRESS_POST	0x10
 #define MP_PROBE_POST		0x11
 #define MPTABLE_PASS1_POST	0x12
 
 #define MP_START_POST		0x13
 #define MP_ENABLE_POST		0x14
 #define MPTABLE_PASS2_POST	0x15
 
 #define START_ALL_APS_POST	0x16
 #define INSTALL_AP_TRAMP_POST	0x17
 #define START_AP_POST		0x18
 
 #define MP_ANNOUNCE_POST	0x19
 
 /* used to hold the AP's until we are ready to release them */
 static struct mtx ap_boot_mtx;
 
 /** XXX FIXME: where does this really belong, isa.h/isa.c perhaps? */
 int	current_postcode;
 
 /** XXX FIXME: what system files declare these??? */
 extern struct region_descriptor r_gdt, r_idt;
 
 int	bsp_apic_ready = 0;	/* flags useability of BSP apic */
 int	mp_naps;		/* # of Applications processors */
 int	mp_nbusses;		/* # of busses */
 int	mp_napics;		/* # of IO APICs */
 int	boot_cpu_id;		/* designated BSP */
 vm_offset_t cpu_apic_address;
 vm_offset_t io_apic_address[NAPICID];	/* NAPICID is more than enough */
 extern	int nkpt;
 
 u_int32_t cpu_apic_versions[MAXCPU];
 u_int32_t *io_apic_versions;
 
 #ifdef APIC_INTR_REORDER
 struct {
 	volatile int *location;
 	int bit;
 } apic_isrbit_location[32];
 #endif
 
 struct apic_intmapinfo	int_to_apicintpin[APIC_INTMAPSIZE];
 
 /*
  * APIC ID logical/physical mapping structures.
  * We oversize these to simplify boot-time config.
  */
 int     cpu_num_to_apic_id[NAPICID];
 int     io_num_to_apic_id[NAPICID];
 int     apic_id_to_logical[NAPICID];
 
 
 /* AP uses this during bootstrap.  Do not staticize.  */
 char *bootSTK;
 static int bootAP;
 
 /* Hotwire a 0->4MB V==P mapping */
 extern pt_entry_t *KPTphys;
 
 /* SMP page table page */
 extern pt_entry_t *SMPpt;
 
 struct pcb stoppcbs[MAXCPU];
 
 int invltlb_ok = 0;	/* throttle smp_invltlb() till safe */
 SYSCTL_INT(_machdep, OID_AUTO, invltlb_ok, CTLFLAG_RW, &invltlb_ok, 0, "");
 
 /*
  * Local data and functions.
  */
 
 /* Set to 1 once we're ready to let the APs out of the pen. */
 static volatile int aps_ready = 0;
 
 static int	mp_capable;
 static u_int	boot_address;
 static u_int	base_memory;
 
 static int	picmode;		/* 0: virtual wire mode, 1: PIC mode */
 static mpfps_t	mpfps;
 static int	search_for_sig(u_int32_t target, int count);
 static void	mp_enable(u_int boot_addr);
 
 static void	mptable_pass1(void);
 static int	mptable_pass2(void);
 static void	default_mp_table(int type);
 static void	fix_mp_table(void);
 static void	setup_apic_irq_mapping(void);
 static void	init_locks(void);
 static int	start_all_aps(u_int boot_addr);
 static void	install_ap_tramp(u_int boot_addr);
 static int	start_ap(int logicalCpu, u_int boot_addr);
 void		ap_init(void);
 static int	apic_int_is_bus_type(int intr, int bus_type);
 static void	release_aps(void *dummy);
 
 /*
  * initialize all the SMP locks
  */
 
 /* critical region around IO APIC, apic_imen */
 struct mtx		imen_mtx;
 
 /* lock region used by kernel profiling */
 int	mcount_lock;
 
 #ifdef USE_COMLOCK
 /* locks com (tty) data/hardware accesses: a FASTINTR() */
 struct mtx		com_mtx;
 #endif /* USE_COMLOCK */
 
 static void
 init_locks(void)
 {
 
 #ifdef USE_COMLOCK
 	mtx_init(&com_mtx, "com", MTX_SPIN);
 #endif /* USE_COMLOCK */
 }
 
 /*
  * Calculate usable address in base memory for AP trampoline code.
  */
 u_int
 mp_bootaddress(u_int basemem)
 {
 	POSTCODE(MP_BOOTADDRESS_POST);
 
 	base_memory = basemem * 1024;	/* convert to bytes */
 
 	boot_address = base_memory & ~0xfff;	/* round down to 4k boundary */
 	if ((base_memory - boot_address) < bootMP_size)
 		boot_address -= 4096;	/* not enough, lower by 4k */
 
 	return boot_address;
 }
 
 
 /*
  * Look for an Intel MP spec table (ie, SMP capable hardware).
  */
 void
 i386_mp_probe(void)
 {
 	int     x;
 	u_long  segment;
 	u_int32_t target;
 
 	POSTCODE(MP_PROBE_POST);
 
 	/* see if EBDA exists */
 	if ((segment = (u_long) * (u_short *) (KERNBASE + 0x40e)) != 0) {
 		/* search first 1K of EBDA */
 		target = (u_int32_t) (segment << 4);
 		if ((x = search_for_sig(target, 1024 / 4)) >= 0)
 			goto found;
 	} else {
 		/* last 1K of base memory, effective 'top of base' passed in */
 		target = (u_int32_t) (base_memory - 0x400);
 		if ((x = search_for_sig(target, 1024 / 4)) >= 0)
 			goto found;
 	}
 
 	/* search the BIOS */
 	target = (u_int32_t) BIOS_BASE;
 	if ((x = search_for_sig(target, BIOS_COUNT)) >= 0)
 		goto found;
 
 	/* nothing found */
 	mpfps = (mpfps_t)0;
 	mp_capable = 0;
 	return;
 
 found:
 	/* calculate needed resources */
 	mpfps = (mpfps_t)x;
 	mptable_pass1();
 
 	/* flag fact that we are running multiple processors */
 	mp_capable = 1;
 }
 
 int
 cpu_mp_probe(void)
 {
 	/*
 	 * Record BSP in CPU map
 	 * This is done here so that MBUF init code works correctly.
 	 */
 	all_cpus = 1;
 
 	return (mp_capable);
 }
 
 /*
  * Initialize the SMP hardware and the APIC and start up the AP's.
  */
 void
 cpu_mp_start(void)
 {
 	POSTCODE(MP_START_POST);
 
 	/* look for MP capable motherboard */
 	if (mp_capable)
 		mp_enable(boot_address);
 	else
 		panic("MP hardware not found!");
 
 	cpu_setregs();
 }
 
 
 /*
  * Print various information about the SMP system hardware and setup.
  */
 void
 cpu_mp_announce(void)
 {
 	int     x;
 
 	POSTCODE(MP_ANNOUNCE_POST);
 
 	printf(" cpu0 (BSP): apic id: %2d", CPU_TO_ID(0));
 	printf(", version: 0x%08x", cpu_apic_versions[0]);
 	printf(", at 0x%08x\n", cpu_apic_address);
 	for (x = 1; x <= mp_naps; ++x) {
 		printf(" cpu%d (AP):  apic id: %2d", x, CPU_TO_ID(x));
 		printf(", version: 0x%08x", cpu_apic_versions[x]);
 		printf(", at 0x%08x\n", cpu_apic_address);
 	}
 
 #if defined(APIC_IO)
 	for (x = 0; x < mp_napics; ++x) {
 		printf(" io%d (APIC): apic id: %2d", x, IO_TO_ID(x));
 		printf(", version: 0x%08x", io_apic_versions[x]);
 		printf(", at 0x%08x\n", io_apic_address[x]);
 	}
 #else
 	printf(" Warning: APIC I/O disabled\n");
 #endif	/* APIC_IO */
 }
 
 /*
  * AP cpu's call this to sync up protected mode.
  */
 void
 init_secondary(void)
 {
 	int	gsel_tss;
 	int	x, myid = bootAP;
 
 	gdt_segs[GPRIV_SEL].ssd_base = (int) &SMP_prvspace[myid];
 	gdt_segs[GPROC0_SEL].ssd_base =
 		(int) &SMP_prvspace[myid].globaldata.gd_common_tss;
 	SMP_prvspace[myid].globaldata.gd_prvspace =
 		&SMP_prvspace[myid].globaldata;
 
 	for (x = 0; x < NGDT; x++) {
 		ssdtosd(&gdt_segs[x], &gdt[myid * NGDT + x].sd);
 	}
 
 	r_gdt.rd_limit = NGDT * sizeof(gdt[0]) - 1;
 	r_gdt.rd_base = (int) &gdt[myid * NGDT];
 	lgdt(&r_gdt);			/* does magic intra-segment return */
 
 	lidt(&r_idt);
 
 	lldt(_default_ldt);
 	PCPU_SET(currentldt, _default_ldt);
 
 	gsel_tss = GSEL(GPROC0_SEL, SEL_KPL);
 	gdt[myid * NGDT + GPROC0_SEL].sd.sd_type = SDT_SYS386TSS;
 	PCPU_SET(common_tss.tss_esp0, 0); /* not used until after switch */
 	PCPU_SET(common_tss.tss_ss0, GSEL(GDATA_SEL, SEL_KPL));
 	PCPU_SET(common_tss.tss_ioopt, (sizeof (struct i386tss)) << 16);
 	PCPU_SET(tss_gdt, &gdt[myid * NGDT + GPROC0_SEL].sd);
 	PCPU_SET(common_tssd, *PCPU_GET(tss_gdt));
 	ltr(gsel_tss);
 
 	pmap_set_opt();
 }
 
 
 #if defined(APIC_IO)
 /*
  * Final configuration of the BSP's local APIC:
  *  - disable 'pic mode'.
  *  - disable 'virtual wire mode'.
  *  - enable NMI.
  */
 void
 bsp_apic_configure(void)
 {
 	u_char		byte;
 	u_int32_t	temp;
 
 	/* leave 'pic mode' if necessary */
 	if (picmode) {
 		outb(0x22, 0x70);	/* select IMCR */
 		byte = inb(0x23);	/* current contents */
 		byte |= 0x01;		/* mask external INTR */
 		outb(0x23, byte);	/* disconnect 8259s/NMI */
 	}
 
 	/* mask lint0 (the 8259 'virtual wire' connection) */
 	temp = lapic.lvt_lint0;
 	temp |= APIC_LVT_M;		/* set the mask */
 	lapic.lvt_lint0 = temp;
 
         /* setup lint1 to handle NMI */
         temp = lapic.lvt_lint1;
         temp &= ~APIC_LVT_M;		/* clear the mask */
         lapic.lvt_lint1 = temp;
 
 	if (bootverbose)
 		apic_dump("bsp_apic_configure()");
 }
 #endif  /* APIC_IO */
 
 
 /*******************************************************************
  * local functions and data
  */
 
 /*
  * start the SMP system
  */
 static void
 mp_enable(u_int boot_addr)
 {
 	int     x;
 #if defined(APIC_IO)
 	int     apic;
 	u_int   ux;
 #endif	/* APIC_IO */
 
 	POSTCODE(MP_ENABLE_POST);
 
 	/* turn on 4MB of V == P addressing so we can get to MP table */
 	*(int *)PTD = PG_V | PG_RW | ((uintptr_t)(void *)KPTphys & PG_FRAME);
 	invltlb();
 
 	/* examine the MP table for needed info, uses physical addresses */
 	x = mptable_pass2();
 
 	*(int *)PTD = 0;
 	invltlb();
 
 	/* can't process default configs till the CPU APIC is pmapped */
 	if (x)
 		default_mp_table(x);
 
 	/* post scan cleanup */
 	fix_mp_table();
 	setup_apic_irq_mapping();
 
 #if defined(APIC_IO)
 
 	/* fill the LOGICAL io_apic_versions table */
 	for (apic = 0; apic < mp_napics; ++apic) {
 		ux = io_apic_read(apic, IOAPIC_VER);
 		io_apic_versions[apic] = ux;
 		io_apic_set_id(apic, IO_TO_ID(apic));
 	}
 
 	/* program each IO APIC in the system */
 	for (apic = 0; apic < mp_napics; ++apic)
 		if (io_apic_setup(apic) < 0)
 			panic("IO APIC setup failure");
 
 	/* install a 'Spurious INTerrupt' vector */
 	setidt(XSPURIOUSINT_OFFSET, Xspuriousint,
 	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
 
 	/* install an inter-CPU IPI for TLB invalidation */
 	setidt(XINVLTLB_OFFSET, Xinvltlb,
 	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
 
 	/* install an inter-CPU IPI for forwarding hardclock() */
 	setidt(XHARDCLOCK_OFFSET, Xhardclock,
 	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
 	
 	/* install an inter-CPU IPI for forwarding statclock() */
 	setidt(XSTATCLOCK_OFFSET, Xstatclock,
 	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
 	
 	/* install an inter-CPU IPI for all-CPU rendezvous */
 	setidt(XRENDEZVOUS_OFFSET, Xrendezvous,
 	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
 
 	/* install an inter-CPU IPI for forcing an additional software trap */
 	setidt(XCPUAST_OFFSET, Xcpuast,
 	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
 
 	/* install an inter-CPU IPI for CPU stop/restart */
 	setidt(XCPUSTOP_OFFSET, Xcpustop,
 	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
 
 #if defined(TEST_TEST1)
 	/* install a "fake hardware INTerrupt" vector */
 	setidt(XTEST1_OFFSET, Xtest1,
 	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
 #endif  /** TEST_TEST1 */
 
 #endif	/* APIC_IO */
 
 	/* initialize all SMP locks */
 	init_locks();
 
 	/* start each Application Processor */
 	start_all_aps(boot_addr);
 }
 
 
 /*
  * look for the MP spec signature
  */
 
 /* string defined by the Intel MP Spec as identifying the MP table */
 #define MP_SIG		0x5f504d5f	/* _MP_ */
 #define NEXT(X)		((X) += 4)
 static int
 search_for_sig(u_int32_t target, int count)
 {
 	int     x;
 	u_int32_t *addr = (u_int32_t *) (KERNBASE + target);
 
 	for (x = 0; x < count; NEXT(x))
 		if (addr[x] == MP_SIG)
 			/* make array index a byte index */
 			return (target + (x * sizeof(u_int32_t)));
 
 	return -1;
 }
 
 
 static basetable_entry basetable_entry_types[] =
 {
 	{0, 20, "Processor"},
 	{1, 8, "Bus"},
 	{2, 8, "I/O APIC"},
 	{3, 8, "I/O INT"},
 	{4, 8, "Local INT"}
 };
 
 typedef struct BUSDATA {
 	u_char  bus_id;
 	enum busTypes bus_type;
 }       bus_datum;
 
 typedef struct INTDATA {
 	u_char  int_type;
 	u_short int_flags;
 	u_char  src_bus_id;
 	u_char  src_bus_irq;
 	u_char  dst_apic_id;
 	u_char  dst_apic_int;
 	u_char	int_vector;
 }       io_int, local_int;
 
 typedef struct BUSTYPENAME {
 	u_char  type;
 	char    name[7];
 }       bus_type_name;
 
 static bus_type_name bus_type_table[] =
 {
 	{CBUS, "CBUS"},
 	{CBUSII, "CBUSII"},
 	{EISA, "EISA"},
 	{MCA, "MCA"},
 	{UNKNOWN_BUSTYPE, "---"},
 	{ISA, "ISA"},
 	{MCA, "MCA"},
 	{UNKNOWN_BUSTYPE, "---"},
 	{UNKNOWN_BUSTYPE, "---"},
 	{UNKNOWN_BUSTYPE, "---"},
 	{UNKNOWN_BUSTYPE, "---"},
 	{UNKNOWN_BUSTYPE, "---"},
 	{PCI, "PCI"},
 	{UNKNOWN_BUSTYPE, "---"},
 	{UNKNOWN_BUSTYPE, "---"},
 	{UNKNOWN_BUSTYPE, "---"},
 	{UNKNOWN_BUSTYPE, "---"},
 	{XPRESS, "XPRESS"},
 	{UNKNOWN_BUSTYPE, "---"}
 };
 /* from MP spec v1.4, table 5-1 */
 static int default_data[7][5] =
 {
 /*   nbus, id0, type0, id1, type1 */
 	{1, 0, ISA, 255, 255},
 	{1, 0, EISA, 255, 255},
 	{1, 0, EISA, 255, 255},
 	{1, 0, MCA, 255, 255},
 	{2, 0, ISA, 1, PCI},
 	{2, 0, EISA, 1, PCI},
 	{2, 0, MCA, 1, PCI}
 };
 
 
 /* the bus data */
 static bus_datum *bus_data;
 
 /* the IO INT data, one entry per possible APIC INTerrupt */
 static io_int  *io_apic_ints;
 
 static int nintrs;
 
 static int processor_entry	__P((proc_entry_ptr entry, int cpu));
 static int bus_entry		__P((bus_entry_ptr entry, int bus));
 static int io_apic_entry	__P((io_apic_entry_ptr entry, int apic));
 static int int_entry		__P((int_entry_ptr entry, int intr));
 static int lookup_bus_type	__P((char *name));
 
 
 /*
  * 1st pass on motherboard's Intel MP specification table.
  *
  * initializes:
  *	mp_ncpus = 1
  *
  * determines:
  *	cpu_apic_address (common to all CPUs)
  *	io_apic_address[N]
  *	mp_naps
  *	mp_nbusses
  *	mp_napics
  *	nintrs
  */
 static void
 mptable_pass1(void)
 {
 	int	x;
 	mpcth_t	cth;
 	int	totalSize;
 	void*	position;
 	int	count;
 	int	type;
 
 	POSTCODE(MPTABLE_PASS1_POST);
 
 	/* clear various tables */
 	for (x = 0; x < NAPICID; ++x) {
 		io_apic_address[x] = ~0;	/* IO APIC address table */
 	}
 
 	/* init everything to empty */
 	mp_naps = 0;
 	mp_nbusses = 0;
 	mp_napics = 0;
 	nintrs = 0;
 
 	/* check for use of 'default' configuration */
 	if (MPFPS_MPFB1 != 0) {
 		/* use default addresses */
 		cpu_apic_address = DEFAULT_APIC_BASE;
 		io_apic_address[0] = DEFAULT_IO_APIC_BASE;
 
 		/* fill in with defaults */
 		mp_naps = 2;		/* includes BSP */
 		mp_nbusses = default_data[MPFPS_MPFB1 - 1][0];
 #if defined(APIC_IO)
 		mp_napics = 1;
 		nintrs = 16;
 #endif	/* APIC_IO */
 	}
 	else {
 		if ((cth = mpfps->pap) == 0)
 			panic("MP Configuration Table Header MISSING!");
 
 		cpu_apic_address = (vm_offset_t) cth->apic_address;
 
 		/* walk the table, recording info of interest */
 		totalSize = cth->base_table_length - sizeof(struct MPCTH);
 		position = (u_char *) cth + sizeof(struct MPCTH);
 		count = cth->entry_count;
 
 		while (count--) {
 			switch (type = *(u_char *) position) {
 			case 0: /* processor_entry */
 				if (((proc_entry_ptr)position)->cpu_flags
 					& PROCENTRY_FLAG_EN)
 					++mp_naps;
 				break;
 			case 1: /* bus_entry */
 				++mp_nbusses;
 				break;
 			case 2: /* io_apic_entry */
 				if (((io_apic_entry_ptr)position)->apic_flags
 					& IOAPICENTRY_FLAG_EN)
 					io_apic_address[mp_napics++] =
 					    (vm_offset_t)((io_apic_entry_ptr)
 						position)->apic_address;
 				break;
 			case 3: /* int_entry */
 				++nintrs;
 				break;
 			case 4:	/* int_entry */
 				break;
 			default:
 				panic("mpfps Base Table HOSED!");
 				/* NOTREACHED */
 			}
 
 			totalSize -= basetable_entry_types[type].length;
 			(u_char*)position += basetable_entry_types[type].length;
 		}
 	}
 
 	/* qualify the numbers */
 	if (mp_naps > MAXCPU) {
 		printf("Warning: only using %d of %d available CPUs!\n",
 			MAXCPU, mp_naps);
 		mp_naps = MAXCPU;
 	}
 
 	/*
 	 * Count the BSP.
 	 * This is also used as a counter while starting the APs.
 	 */
 	mp_ncpus = 1;
 
 	--mp_naps;	/* subtract the BSP */
 }
 
 
 /*
  * 2nd pass on motherboard's Intel MP specification table.
  *
  * sets:
  *	boot_cpu_id
  *	ID_TO_IO(N), phy APIC ID to log CPU/IO table
  *	CPU_TO_ID(N), logical CPU to APIC ID table
  *	IO_TO_ID(N), logical IO to APIC ID table
  *	bus_data[N]
  *	io_apic_ints[N]
  */
 static int
 mptable_pass2(void)
 {
 	int     x;
 	mpcth_t cth;
 	int     totalSize;
 	void*   position;
 	int     count;
 	int     type;
 	int     apic, bus, cpu, intr;
 	int	i, j;
 	int	pgeflag;
 
 	POSTCODE(MPTABLE_PASS2_POST);
 
 	pgeflag = 0;		/* XXX - Not used under SMP yet.  */
 
 	MALLOC(io_apic_versions, u_int32_t *, sizeof(u_int32_t) * mp_napics,
 	    M_DEVBUF, M_WAITOK);
 	MALLOC(ioapic, volatile ioapic_t **, sizeof(ioapic_t *) * mp_napics,
 	    M_DEVBUF, M_WAITOK);
 	MALLOC(io_apic_ints, io_int *, sizeof(io_int) * (nintrs + 1),
 	    M_DEVBUF, M_WAITOK);
 	MALLOC(bus_data, bus_datum *, sizeof(bus_datum) * mp_nbusses,
 	    M_DEVBUF, M_WAITOK);
 
 	bzero(ioapic, sizeof(ioapic_t *) * mp_napics);
 
 	for (i = 0; i < mp_napics; i++) {
 		for (j = 0; j < mp_napics; j++) {
 			/* same page frame as a previous IO apic? */
 			if (((vm_offset_t)SMPpt[NPTEPG-2-j] & PG_FRAME) ==
 			    (io_apic_address[i] & PG_FRAME)) {
 				ioapic[i] = (ioapic_t *)((u_int)SMP_prvspace
 					+ (NPTEPG-2-j) * PAGE_SIZE
 					+ (io_apic_address[i] & PAGE_MASK));
 				break;
 			}
 			/* use this slot if available */
 			if (((vm_offset_t)SMPpt[NPTEPG-2-j] & PG_FRAME) == 0) {
 				SMPpt[NPTEPG-2-j] = (pt_entry_t)(PG_V | PG_RW |
 				    pgeflag | (io_apic_address[i] & PG_FRAME));
 				ioapic[i] = (ioapic_t *)((u_int)SMP_prvspace
 					+ (NPTEPG-2-j) * PAGE_SIZE
 					+ (io_apic_address[i] & PAGE_MASK));
 				break;
 			}
 		}
 	}
 
 	/* clear various tables */
 	for (x = 0; x < NAPICID; ++x) {
 		ID_TO_IO(x) = -1;	/* phy APIC ID to log CPU/IO table */
 		CPU_TO_ID(x) = -1;	/* logical CPU to APIC ID table */
 		IO_TO_ID(x) = -1;	/* logical IO to APIC ID table */
 	}
 
 	/* clear bus data table */
 	for (x = 0; x < mp_nbusses; ++x)
 		bus_data[x].bus_id = 0xff;
 
 	/* clear IO APIC INT table */
 	for (x = 0; x < (nintrs + 1); ++x) {
 		io_apic_ints[x].int_type = 0xff;
 		io_apic_ints[x].int_vector = 0xff;
 	}
 
 	/* setup the cpu/apic mapping arrays */
 	boot_cpu_id = -1;
 
 	/* record whether PIC or virtual-wire mode */
 	picmode = (mpfps->mpfb2 & 0x80) ? 1 : 0;
 
 	/* check for use of 'default' configuration */
 	if (MPFPS_MPFB1 != 0)
 		return MPFPS_MPFB1;	/* return default configuration type */
 
 	if ((cth = mpfps->pap) == 0)
 		panic("MP Configuration Table Header MISSING!");
 
 	/* walk the table, recording info of interest */
 	totalSize = cth->base_table_length - sizeof(struct MPCTH);
 	position = (u_char *) cth + sizeof(struct MPCTH);
 	count = cth->entry_count;
 	apic = bus = intr = 0;
 	cpu = 1;				/* pre-count the BSP */
 
 	while (count--) {
 		switch (type = *(u_char *) position) {
 		case 0:
 			if (processor_entry(position, cpu))
 				++cpu;
 			break;
 		case 1:
 			if (bus_entry(position, bus))
 				++bus;
 			break;
 		case 2:
 			if (io_apic_entry(position, apic))
 				++apic;
 			break;
 		case 3:
 			if (int_entry(position, intr))
 				++intr;
 			break;
 		case 4:
 			/* int_entry(position); */
 			break;
 		default:
 			panic("mpfps Base Table HOSED!");
 			/* NOTREACHED */
 		}
 
 		totalSize -= basetable_entry_types[type].length;
 		(u_char *) position += basetable_entry_types[type].length;
 	}
 
 	if (boot_cpu_id == -1)
 		panic("NO BSP found!");
 
 	/* report fact that its NOT a default configuration */
 	return 0;
 }
 
 
 void
 assign_apic_irq(int apic, int intpin, int irq)
 {
 	int x;
 	
 	if (int_to_apicintpin[irq].ioapic != -1)
 		panic("assign_apic_irq: inconsistent table");
 	
 	int_to_apicintpin[irq].ioapic = apic;
 	int_to_apicintpin[irq].int_pin = intpin;
 	int_to_apicintpin[irq].apic_address = ioapic[apic];
 	int_to_apicintpin[irq].redirindex = IOAPIC_REDTBL + 2 * intpin;
 	
 	for (x = 0; x < nintrs; x++) {
 		if ((io_apic_ints[x].int_type == 0 || 
 		     io_apic_ints[x].int_type == 3) &&
 		    io_apic_ints[x].int_vector == 0xff &&
 		    io_apic_ints[x].dst_apic_id == IO_TO_ID(apic) &&
 		    io_apic_ints[x].dst_apic_int == intpin)
 			io_apic_ints[x].int_vector = irq;
 	}
 }
 
 void
 revoke_apic_irq(int irq)
 {
 	int x;
 	int oldapic;
 	int oldintpin;
 	
 	if (int_to_apicintpin[irq].ioapic == -1)
 		panic("assign_apic_irq: inconsistent table");
 	
 	oldapic = int_to_apicintpin[irq].ioapic;
 	oldintpin = int_to_apicintpin[irq].int_pin;
 
 	int_to_apicintpin[irq].ioapic = -1;
 	int_to_apicintpin[irq].int_pin = 0;
 	int_to_apicintpin[irq].apic_address = NULL;
 	int_to_apicintpin[irq].redirindex = 0;
 	
 	for (x = 0; x < nintrs; x++) {
 		if ((io_apic_ints[x].int_type == 0 || 
 		     io_apic_ints[x].int_type == 3) &&
 		    io_apic_ints[x].int_vector == 0xff &&
 		    io_apic_ints[x].dst_apic_id == IO_TO_ID(oldapic) &&
 		    io_apic_ints[x].dst_apic_int == oldintpin)
 			io_apic_ints[x].int_vector = 0xff;
 	}
 }
 
 
 static void
 allocate_apic_irq(int intr)
 {
 	int apic;
 	int intpin;
 	int irq;
 	
 	if (io_apic_ints[intr].int_vector != 0xff)
 		return;		/* Interrupt handler already assigned */
 	
 	if (io_apic_ints[intr].int_type != 0 &&
 	    (io_apic_ints[intr].int_type != 3 ||
 	     (io_apic_ints[intr].dst_apic_id == IO_TO_ID(0) &&
 	      io_apic_ints[intr].dst_apic_int == 0)))
 		return;		/* Not INT or ExtInt on != (0, 0) */
 	
 	irq = 0;
 	while (irq < APIC_INTMAPSIZE &&
 	       int_to_apicintpin[irq].ioapic != -1)
 		irq++;
 	
 	if (irq >= APIC_INTMAPSIZE)
 		return;		/* No free interrupt handlers */
 	
 	apic = ID_TO_IO(io_apic_ints[intr].dst_apic_id);
 	intpin = io_apic_ints[intr].dst_apic_int;
 	
 	assign_apic_irq(apic, intpin, irq);
 	io_apic_setup_intpin(apic, intpin);
 }
 
 
 static void
 swap_apic_id(int apic, int oldid, int newid)
 {
 	int x;
 	int oapic;
 	
 
 	if (oldid == newid)
 		return;			/* Nothing to do */
 	
 	printf("Changing APIC ID for IO APIC #%d from %d to %d in MP table\n",
 	       apic, oldid, newid);
 	
 	/* Swap physical APIC IDs in interrupt entries */
 	for (x = 0; x < nintrs; x++) {
 		if (io_apic_ints[x].dst_apic_id == oldid)
 			io_apic_ints[x].dst_apic_id = newid;
 		else if (io_apic_ints[x].dst_apic_id == newid)
 			io_apic_ints[x].dst_apic_id = oldid;
 	}
 	
 	/* Swap physical APIC IDs in IO_TO_ID mappings */
 	for (oapic = 0; oapic < mp_napics; oapic++)
 		if (IO_TO_ID(oapic) == newid)
 			break;
 	
 	if (oapic < mp_napics) {
 		printf("Changing APIC ID for IO APIC #%d from "
 		       "%d to %d in MP table\n",
 		       oapic, newid, oldid);
 		IO_TO_ID(oapic) = oldid;
 	}
 	IO_TO_ID(apic) = newid;
 }
 
 
 static void
 fix_id_to_io_mapping(void)
 {
 	int x;
 
 	for (x = 0; x < NAPICID; x++)
 		ID_TO_IO(x) = -1;
 	
 	for (x = 0; x <= mp_naps; x++)
 		if (CPU_TO_ID(x) < NAPICID)
 			ID_TO_IO(CPU_TO_ID(x)) = x;
 	
 	for (x = 0; x < mp_napics; x++)
 		if (IO_TO_ID(x) < NAPICID)
 			ID_TO_IO(IO_TO_ID(x)) = x;
 }
 
 
 static int
 first_free_apic_id(void)
 {
 	int freeid, x;
 	
 	for (freeid = 0; freeid < NAPICID; freeid++) {
 		for (x = 0; x <= mp_naps; x++)
 			if (CPU_TO_ID(x) == freeid)
 				break;
 		if (x <= mp_naps)
 			continue;
 		for (x = 0; x < mp_napics; x++)
 			if (IO_TO_ID(x) == freeid)
 				break;
 		if (x < mp_napics)
 			continue;
 		return freeid;
 	}
 	return freeid;
 }
 
 
 static int
 io_apic_id_acceptable(int apic, int id)
 {
 	int cpu;		/* Logical CPU number */
 	int oapic;		/* Logical IO APIC number for other IO APIC */
 
 	if (id >= NAPICID)
 		return 0;	/* Out of range */
 	
 	for (cpu = 0; cpu <= mp_naps; cpu++)
 		if (CPU_TO_ID(cpu) == id)
 			return 0;	/* Conflict with CPU */
 	
 	for (oapic = 0; oapic < mp_napics && oapic < apic; oapic++)
 		if (IO_TO_ID(oapic) == id)
 			return 0;	/* Conflict with other APIC */
 	
 	return 1;		/* ID is acceptable for IO APIC */
 }
 
 
 /*
  * parse an Intel MP specification table
  */
 static void
 fix_mp_table(void)
 {
 	int	x;
 	int	id;
 	int	bus_0 = 0;	/* Stop GCC warning */
 	int	bus_pci = 0;	/* Stop GCC warning */
 	int	num_pci_bus;
 	int	apic;		/* IO APIC unit number */
 	int     freeid;		/* Free physical APIC ID */
 	int	physid;		/* Current physical IO APIC ID */
 
 	/*
 	 * Fix mis-numbering of the PCI bus and its INT entries if the BIOS
 	 * did it wrong.  The MP spec says that when more than 1 PCI bus
 	 * exists the BIOS must begin with bus entries for the PCI bus and use
 	 * actual PCI bus numbering.  This implies that when only 1 PCI bus
 	 * exists the BIOS can choose to ignore this ordering, and indeed many
 	 * MP motherboards do ignore it.  This causes a problem when the PCI
 	 * sub-system makes requests of the MP sub-system based on PCI bus
 	 * numbers.	So here we look for the situation and renumber the
 	 * busses and associated INTs in an effort to "make it right".
 	 */
 
 	/* find bus 0, PCI bus, count the number of PCI busses */
 	for (num_pci_bus = 0, x = 0; x < mp_nbusses; ++x) {
 		if (bus_data[x].bus_id == 0) {
 			bus_0 = x;
 		}
 		if (bus_data[x].bus_type == PCI) {
 			++num_pci_bus;
 			bus_pci = x;
 		}
 	}
 	/*
 	 * bus_0 == slot of bus with ID of 0
 	 * bus_pci == slot of last PCI bus encountered
 	 */
 
 	/* check the 1 PCI bus case for sanity */
 	/* if it is number 0 all is well */
 	if (num_pci_bus == 1 &&
 	    bus_data[bus_pci].bus_id != 0) {
 		
 		/* mis-numbered, swap with whichever bus uses slot 0 */
 
 		/* swap the bus entry types */
 		bus_data[bus_pci].bus_type = bus_data[bus_0].bus_type;
 		bus_data[bus_0].bus_type = PCI;
 
 		/* swap each relavant INTerrupt entry */
 		id = bus_data[bus_pci].bus_id;
 		for (x = 0; x < nintrs; ++x) {
 			if (io_apic_ints[x].src_bus_id == id) {
 				io_apic_ints[x].src_bus_id = 0;
 			}
 			else if (io_apic_ints[x].src_bus_id == 0) {
 				io_apic_ints[x].src_bus_id = id;
 			}
 		}
 	}
 
 	/* Assign IO APIC IDs.
 	 * 
 	 * First try the existing ID. If a conflict is detected, try
 	 * the ID in the MP table.  If a conflict is still detected, find
 	 * a free id.
 	 *
 	 * We cannot use the ID_TO_IO table before all conflicts has been
 	 * resolved and the table has been corrected.
 	 */
 	for (apic = 0; apic < mp_napics; ++apic) { /* For all IO APICs */
 		
 		/* First try to use the value set by the BIOS */
 		physid = io_apic_get_id(apic);
 		if (io_apic_id_acceptable(apic, physid)) {
 			if (IO_TO_ID(apic) != physid)
 				swap_apic_id(apic, IO_TO_ID(apic), physid);
 			continue;
 		}
 
 		/* Then check if the value in the MP table is acceptable */
 		if (io_apic_id_acceptable(apic, IO_TO_ID(apic)))
 			continue;
 
 		/* Last resort, find a free APIC ID and use it */
 		freeid = first_free_apic_id();
 		if (freeid >= NAPICID)
 			panic("No free physical APIC IDs found");
 		
 		if (io_apic_id_acceptable(apic, freeid)) {
 			swap_apic_id(apic, IO_TO_ID(apic), freeid);
 			continue;
 		}
 		panic("Free physical APIC ID not usable");
 	}
 	fix_id_to_io_mapping();
 
 	/* detect and fix broken Compaq MP table */
 	if (apic_int_type(0, 0) == -1) {
 		printf("APIC_IO: MP table broken: 8259->APIC entry missing!\n");
 		io_apic_ints[nintrs].int_type = 3;	/* ExtInt */
 		io_apic_ints[nintrs].int_vector = 0xff;	/* Unassigned */
 		/* XXX fixme, set src bus id etc, but it doesn't seem to hurt */
 		io_apic_ints[nintrs].dst_apic_id = IO_TO_ID(0);
 		io_apic_ints[nintrs].dst_apic_int = 0;	/* Pin 0 */
 		nintrs++;
 	}
 }
 
 
 /* Assign low level interrupt handlers */
 static void
 setup_apic_irq_mapping(void)
 {
 	int	x;
 	int	int_vector;
 
 	/* Clear array */
 	for (x = 0; x < APIC_INTMAPSIZE; x++) {
 		int_to_apicintpin[x].ioapic = -1;
 		int_to_apicintpin[x].int_pin = 0;
 		int_to_apicintpin[x].apic_address = NULL;
 		int_to_apicintpin[x].redirindex = 0;
 	}
 
 	/* First assign ISA/EISA interrupts */
 	for (x = 0; x < nintrs; x++) {
 		int_vector = io_apic_ints[x].src_bus_irq;
 		if (int_vector < APIC_INTMAPSIZE &&
 		    io_apic_ints[x].int_vector == 0xff && 
 		    int_to_apicintpin[int_vector].ioapic == -1 &&
 		    (apic_int_is_bus_type(x, ISA) ||
 		     apic_int_is_bus_type(x, EISA)) &&
 		    io_apic_ints[x].int_type == 0) {
 			assign_apic_irq(ID_TO_IO(io_apic_ints[x].dst_apic_id), 
 					io_apic_ints[x].dst_apic_int,
 					int_vector);
 		}
 	}
 
 	/* Assign ExtInt entry if no ISA/EISA interrupt 0 entry */
 	for (x = 0; x < nintrs; x++) {
 		if (io_apic_ints[x].dst_apic_int == 0 &&
 		    io_apic_ints[x].dst_apic_id == IO_TO_ID(0) &&
 		    io_apic_ints[x].int_vector == 0xff && 
 		    int_to_apicintpin[0].ioapic == -1 &&
 		    io_apic_ints[x].int_type == 3) {
 			assign_apic_irq(0, 0, 0);
 			break;
 		}
 	}
 	/* PCI interrupt assignment is deferred */
 }
 
 
 static int
 processor_entry(proc_entry_ptr entry, int cpu)
 {
 	/* check for usability */
 	if (!(entry->cpu_flags & PROCENTRY_FLAG_EN))
 		return 0;
 
 	if(entry->apic_id >= NAPICID)
 		panic("CPU APIC ID out of range (0..%d)", NAPICID - 1);
 	/* check for BSP flag */
 	if (entry->cpu_flags & PROCENTRY_FLAG_BP) {
 		boot_cpu_id = entry->apic_id;
 		CPU_TO_ID(0) = entry->apic_id;
 		ID_TO_CPU(entry->apic_id) = 0;
 		return 0;	/* its already been counted */
 	}
 
 	/* add another AP to list, if less than max number of CPUs */
 	else if (cpu < MAXCPU) {
 		CPU_TO_ID(cpu) = entry->apic_id;
 		ID_TO_CPU(entry->apic_id) = cpu;
 		return 1;
 	}
 
 	return 0;
 }
 
 
 static int
 bus_entry(bus_entry_ptr entry, int bus)
 {
 	int     x;
 	char    c, name[8];
 
 	/* encode the name into an index */
 	for (x = 0; x < 6; ++x) {
 		if ((c = entry->bus_type[x]) == ' ')
 			break;
 		name[x] = c;
 	}
 	name[x] = '\0';
 
 	if ((x = lookup_bus_type(name)) == UNKNOWN_BUSTYPE)
 		panic("unknown bus type: '%s'", name);
 
 	bus_data[bus].bus_id = entry->bus_id;
 	bus_data[bus].bus_type = x;
 
 	return 1;
 }
 
 
 static int
 io_apic_entry(io_apic_entry_ptr entry, int apic)
 {
 	if (!(entry->apic_flags & IOAPICENTRY_FLAG_EN))
 		return 0;
 
 	IO_TO_ID(apic) = entry->apic_id;
 	if (entry->apic_id < NAPICID)
 		ID_TO_IO(entry->apic_id) = apic;
 
 	return 1;
 }
 
 
 static int
 lookup_bus_type(char *name)
 {
 	int     x;
 
 	for (x = 0; x < MAX_BUSTYPE; ++x)
 		if (strcmp(bus_type_table[x].name, name) == 0)
 			return bus_type_table[x].type;
 
 	return UNKNOWN_BUSTYPE;
 }
 
 
 static int
 int_entry(int_entry_ptr entry, int intr)
 {
 	int apic;
 
 	io_apic_ints[intr].int_type = entry->int_type;
 	io_apic_ints[intr].int_flags = entry->int_flags;
 	io_apic_ints[intr].src_bus_id = entry->src_bus_id;
 	io_apic_ints[intr].src_bus_irq = entry->src_bus_irq;
 	if (entry->dst_apic_id == 255) {
 		/* This signal goes to all IO APICS.  Select an IO APIC
 		   with sufficient number of interrupt pins */
 		for (apic = 0; apic < mp_napics; apic++)
 			if (((io_apic_read(apic, IOAPIC_VER) & 
 			      IOART_VER_MAXREDIR) >> MAXREDIRSHIFT) >= 
 			    entry->dst_apic_int)
 				break;
 		if (apic < mp_napics)
 			io_apic_ints[intr].dst_apic_id = IO_TO_ID(apic);
 		else
 			io_apic_ints[intr].dst_apic_id = entry->dst_apic_id;
 	} else
 		io_apic_ints[intr].dst_apic_id = entry->dst_apic_id;
 	io_apic_ints[intr].dst_apic_int = entry->dst_apic_int;
 
 	return 1;
 }
 
 
 static int
 apic_int_is_bus_type(int intr, int bus_type)
 {
 	int     bus;
 
 	for (bus = 0; bus < mp_nbusses; ++bus)
 		if ((bus_data[bus].bus_id == io_apic_ints[intr].src_bus_id)
 		    && ((int) bus_data[bus].bus_type == bus_type))
 			return 1;
 
 	return 0;
 }
 
 
 /*
  * Given a traditional ISA INT mask, return an APIC mask.
  */
 u_int
 isa_apic_mask(u_int isa_mask)
 {
 	int isa_irq;
 	int apic_pin;
 
 #if defined(SKIP_IRQ15_REDIRECT)
 	if (isa_mask == (1 << 15)) {
 		printf("skipping ISA IRQ15 redirect\n");
 		return isa_mask;
 	}
 #endif  /* SKIP_IRQ15_REDIRECT */
 
 	isa_irq = ffs(isa_mask);		/* find its bit position */
 	if (isa_irq == 0)			/* doesn't exist */
 		return 0;
 	--isa_irq;				/* make it zero based */
 
 	apic_pin = isa_apic_irq(isa_irq);	/* look for APIC connection */
 	if (apic_pin == -1)
 		return 0;
 
 	return (1 << apic_pin);			/* convert pin# to a mask */
 }
 
 
 /*
  * Determine which APIC pin an ISA/EISA INT is attached to.
  */
 #define INTTYPE(I)	(io_apic_ints[(I)].int_type)
 #define INTPIN(I)	(io_apic_ints[(I)].dst_apic_int)
 #define INTIRQ(I)	(io_apic_ints[(I)].int_vector)
 #define INTAPIC(I)	(ID_TO_IO(io_apic_ints[(I)].dst_apic_id))
 
 #define SRCBUSIRQ(I)	(io_apic_ints[(I)].src_bus_irq)
 int
 isa_apic_irq(int isa_irq)
 {
 	int     intr;
 
 	for (intr = 0; intr < nintrs; ++intr) {		/* check each record */
 		if (INTTYPE(intr) == 0) {		/* standard INT */
 			if (SRCBUSIRQ(intr) == isa_irq) {
 				if (apic_int_is_bus_type(intr, ISA) ||
 			            apic_int_is_bus_type(intr, EISA)) {
 					if (INTIRQ(intr) == 0xff)
 						return -1; /* unassigned */
 					return INTIRQ(intr);	/* found */
 				}
 			}
 		}
 	}
 	return -1;					/* NOT found */
 }
 
 
 /*
  * Determine which APIC pin a PCI INT is attached to.
  */
 #define SRCBUSID(I)	(io_apic_ints[(I)].src_bus_id)
 #define SRCBUSDEVICE(I)	((io_apic_ints[(I)].src_bus_irq >> 2) & 0x1f)
 #define SRCBUSLINE(I)	(io_apic_ints[(I)].src_bus_irq & 0x03)
 int
 pci_apic_irq(int pciBus, int pciDevice, int pciInt)
 {
 	int     intr;
 
 	--pciInt;					/* zero based */
 
 	for (intr = 0; intr < nintrs; ++intr)		/* check each record */
 		if ((INTTYPE(intr) == 0)		/* standard INT */
 		    && (SRCBUSID(intr) == pciBus)
 		    && (SRCBUSDEVICE(intr) == pciDevice)
 		    && (SRCBUSLINE(intr) == pciInt))	/* a candidate IRQ */
 			if (apic_int_is_bus_type(intr, PCI)) {
 				if (INTIRQ(intr) == 0xff)
 					allocate_apic_irq(intr);
 				if (INTIRQ(intr) == 0xff)
 					return -1;	/* unassigned */
 				return INTIRQ(intr);	/* exact match */
 			}
 
 	return -1;					/* NOT found */
 }
 
 int
 next_apic_irq(int irq) 
 {
 	int intr, ointr;
 	int bus, bustype;
 
 	bus = 0;
 	bustype = 0;
 	for (intr = 0; intr < nintrs; intr++) {
 		if (INTIRQ(intr) != irq || INTTYPE(intr) != 0)
 			continue;
 		bus = SRCBUSID(intr);
 		bustype = apic_bus_type(bus);
 		if (bustype != ISA &&
 		    bustype != EISA &&
 		    bustype != PCI)
 			continue;
 		break;
 	}
 	if (intr >= nintrs) {
 		return -1;
 	}
 	for (ointr = intr + 1; ointr < nintrs; ointr++) {
 		if (INTTYPE(ointr) != 0)
 			continue;
 		if (bus != SRCBUSID(ointr))
 			continue;
 		if (bustype == PCI) {
 			if (SRCBUSDEVICE(intr) != SRCBUSDEVICE(ointr))
 				continue;
 			if (SRCBUSLINE(intr) != SRCBUSLINE(ointr))
 				continue;
 		}
 		if (bustype == ISA || bustype == EISA) {
 			if (SRCBUSIRQ(intr) != SRCBUSIRQ(ointr))
 				continue;
 		}
 		if (INTPIN(intr) == INTPIN(ointr))
 			continue;
 		break;
 	}
 	if (ointr >= nintrs) {
 		return -1;
 	}
 	return INTIRQ(ointr);
 }
 #undef SRCBUSLINE
 #undef SRCBUSDEVICE
 #undef SRCBUSID
 #undef SRCBUSIRQ
 
 #undef INTPIN
 #undef INTIRQ
 #undef INTAPIC
 #undef INTTYPE
 
 
 /*
  * Reprogram the MB chipset to NOT redirect an ISA INTerrupt.
  *
  * XXX FIXME:
  *  Exactly what this means is unclear at this point.  It is a solution
  *  for motherboards that redirect the MBIRQ0 pin.  Generically a motherboard
  *  could route any of the ISA INTs to upper (>15) IRQ values.  But most would
  *  NOT be redirected via MBIRQ0, thus "undirect()ing" them would NOT be an
  *  option.
  */
 int
 undirect_isa_irq(int rirq)
 {
 #if defined(READY)
 	if (bootverbose)
 	    printf("Freeing redirected ISA irq %d.\n", rirq);
 	/** FIXME: tickle the MB redirector chip */
 	return -1;
 #else
 	if (bootverbose)
 	    printf("Freeing (NOT implemented) redirected ISA irq %d.\n", rirq);
 	return 0;
 #endif  /* READY */
 }
 
 
 /*
  * Reprogram the MB chipset to NOT redirect a PCI INTerrupt
  */
 int
 undirect_pci_irq(int rirq)
 {
 #if defined(READY)
 	if (bootverbose)
 		printf("Freeing redirected PCI irq %d.\n", rirq);
 
 	/** FIXME: tickle the MB redirector chip */
 	return -1;
 #else
 	if (bootverbose)
 		printf("Freeing (NOT implemented) redirected PCI irq %d.\n",
 		       rirq);
 	return 0;
 #endif  /* READY */
 }
 
 
 /*
  * given a bus ID, return:
  *  the bus type if found
  *  -1 if NOT found
  */
 int
 apic_bus_type(int id)
 {
 	int     x;
 
 	for (x = 0; x < mp_nbusses; ++x)
 		if (bus_data[x].bus_id == id)
 			return bus_data[x].bus_type;
 
 	return -1;
 }
 
 
 /*
  * given a LOGICAL APIC# and pin#, return:
  *  the associated src bus ID if found
  *  -1 if NOT found
  */
 int
 apic_src_bus_id(int apic, int pin)
 {
 	int     x;
 
 	/* search each of the possible INTerrupt sources */
 	for (x = 0; x < nintrs; ++x)
 		if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
 		    (pin == io_apic_ints[x].dst_apic_int))
 			return (io_apic_ints[x].src_bus_id);
 
 	return -1;		/* NOT found */
 }
 
 
 /*
  * given a LOGICAL APIC# and pin#, return:
  *  the associated src bus IRQ if found
  *  -1 if NOT found
  */
 int
 apic_src_bus_irq(int apic, int pin)
 {
 	int     x;
 
 	for (x = 0; x < nintrs; x++)
 		if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
 		    (pin == io_apic_ints[x].dst_apic_int))
 			return (io_apic_ints[x].src_bus_irq);
 
 	return -1;		/* NOT found */
 }
 
 
 /*
  * given a LOGICAL APIC# and pin#, return:
  *  the associated INTerrupt type if found
  *  -1 if NOT found
  */
 int
 apic_int_type(int apic, int pin)
 {
 	int     x;
 
 	/* search each of the possible INTerrupt sources */
 	for (x = 0; x < nintrs; ++x)
 		if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
 		    (pin == io_apic_ints[x].dst_apic_int))
 			return (io_apic_ints[x].int_type);
 
 	return -1;		/* NOT found */
 }
 
 int 
 apic_irq(int apic, int pin)
 {
 	int x;
 	int res;
 
 	for (x = 0; x < nintrs; ++x)
 		if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
 		    (pin == io_apic_ints[x].dst_apic_int)) {
 			res = io_apic_ints[x].int_vector;
 			if (res == 0xff)
 				return -1;
 			if (apic != int_to_apicintpin[res].ioapic)
 				panic("apic_irq: inconsistent table");
 			if (pin != int_to_apicintpin[res].int_pin)
 				panic("apic_irq inconsistent table (2)");
 			return res;
 		}
 	return -1;
 }
 
 
 /*
  * given a LOGICAL APIC# and pin#, return:
  *  the associated trigger mode if found
  *  -1 if NOT found
  */
 int
 apic_trigger(int apic, int pin)
 {
 	int     x;
 
 	/* search each of the possible INTerrupt sources */
 	for (x = 0; x < nintrs; ++x)
 		if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
 		    (pin == io_apic_ints[x].dst_apic_int))
 			return ((io_apic_ints[x].int_flags >> 2) & 0x03);
 
 	return -1;		/* NOT found */
 }
 
 
 /*
  * given a LOGICAL APIC# and pin#, return:
  *  the associated 'active' level if found
  *  -1 if NOT found
  */
 int
 apic_polarity(int apic, int pin)
 {
 	int     x;
 
 	/* search each of the possible INTerrupt sources */
 	for (x = 0; x < nintrs; ++x)
 		if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
 		    (pin == io_apic_ints[x].dst_apic_int))
 			return (io_apic_ints[x].int_flags & 0x03);
 
 	return -1;		/* NOT found */
 }
 
 
 /*
  * set data according to MP defaults
  * FIXME: probably not complete yet...
  */
 static void
 default_mp_table(int type)
 {
 	int     ap_cpu_id;
 #if defined(APIC_IO)
 	int     io_apic_id;
 	int     pin;
 #endif	/* APIC_IO */
 
 #if 0
 	printf("  MP default config type: %d\n", type);
 	switch (type) {
 	case 1:
 		printf("   bus: ISA, APIC: 82489DX\n");
 		break;
 	case 2:
 		printf("   bus: EISA, APIC: 82489DX\n");
 		break;
 	case 3:
 		printf("   bus: EISA, APIC: 82489DX\n");
 		break;
 	case 4:
 		printf("   bus: MCA, APIC: 82489DX\n");
 		break;
 	case 5:
 		printf("   bus: ISA+PCI, APIC: Integrated\n");
 		break;
 	case 6:
 		printf("   bus: EISA+PCI, APIC: Integrated\n");
 		break;
 	case 7:
 		printf("   bus: MCA+PCI, APIC: Integrated\n");
 		break;
 	default:
 		printf("   future type\n");
 		break;
 		/* NOTREACHED */
 	}
 #endif	/* 0 */
 
 	boot_cpu_id = (lapic.id & APIC_ID_MASK) >> 24;
 	ap_cpu_id = (boot_cpu_id == 0) ? 1 : 0;
 
 	/* BSP */
 	CPU_TO_ID(0) = boot_cpu_id;
 	ID_TO_CPU(boot_cpu_id) = 0;
 
 	/* one and only AP */
 	CPU_TO_ID(1) = ap_cpu_id;
 	ID_TO_CPU(ap_cpu_id) = 1;
 
 #if defined(APIC_IO)
 	/* one and only IO APIC */
 	io_apic_id = (io_apic_read(0, IOAPIC_ID) & APIC_ID_MASK) >> 24;
 
 	/*
 	 * sanity check, refer to MP spec section 3.6.6, last paragraph
 	 * necessary as some hardware isn't properly setting up the IO APIC
 	 */
 #if defined(REALLY_ANAL_IOAPICID_VALUE)
 	if (io_apic_id != 2) {
 #else
 	if ((io_apic_id == 0) || (io_apic_id == 1) || (io_apic_id == 15)) {
 #endif	/* REALLY_ANAL_IOAPICID_VALUE */
 		io_apic_set_id(0, 2);
 		io_apic_id = 2;
 	}
 	IO_TO_ID(0) = io_apic_id;
 	ID_TO_IO(io_apic_id) = 0;
 #endif	/* APIC_IO */
 
 	/* fill out bus entries */
 	switch (type) {
 	case 1:
 	case 2:
 	case 3:
 	case 4:
 	case 5:
 	case 6:
 	case 7:
 		bus_data[0].bus_id = default_data[type - 1][1];
 		bus_data[0].bus_type = default_data[type - 1][2];
 		bus_data[1].bus_id = default_data[type - 1][3];
 		bus_data[1].bus_type = default_data[type - 1][4];
 		break;
 
 	/* case 4: case 7:		   MCA NOT supported */
 	default:		/* illegal/reserved */
 		panic("BAD default MP config: %d", type);
 		/* NOTREACHED */
 	}
 
 #if defined(APIC_IO)
 	/* general cases from MP v1.4, table 5-2 */
 	for (pin = 0; pin < 16; ++pin) {
 		io_apic_ints[pin].int_type = 0;
 		io_apic_ints[pin].int_flags = 0x05;	/* edge/active-hi */
 		io_apic_ints[pin].src_bus_id = 0;
 		io_apic_ints[pin].src_bus_irq = pin;	/* IRQ2 caught below */
 		io_apic_ints[pin].dst_apic_id = io_apic_id;
 		io_apic_ints[pin].dst_apic_int = pin;	/* 1-to-1 */
 	}
 
 	/* special cases from MP v1.4, table 5-2 */
 	if (type == 2) {
 		io_apic_ints[2].int_type = 0xff;	/* N/C */
 		io_apic_ints[13].int_type = 0xff;	/* N/C */
 #if !defined(APIC_MIXED_MODE)
 		/** FIXME: ??? */
 		panic("sorry, can't support type 2 default yet");
 #endif	/* APIC_MIXED_MODE */
 	}
 	else
 		io_apic_ints[2].src_bus_irq = 0;	/* ISA IRQ0 is on APIC INT 2 */
 
 	if (type == 7)
 		io_apic_ints[0].int_type = 0xff;	/* N/C */
 	else
 		io_apic_ints[0].int_type = 3;	/* vectored 8259 */
 #endif	/* APIC_IO */
 }
 
 
 /*
  * start each AP in our list
  */
 static int
 start_all_aps(u_int boot_addr)
 {
 	int     x, i, pg;
 	u_char  mpbiosreason;
 	u_long  mpbioswarmvec;
 	struct globaldata *gd;
 	char *stack;
 	uintptr_t kptbase;
 
 	POSTCODE(START_ALL_APS_POST);
 
 	mtx_init(&ap_boot_mtx, "ap boot", MTX_SPIN);
 
 	/* initialize BSP's local APIC */
 	apic_initialize();
 	bsp_apic_ready = 1;
 
 	/* install the AP 1st level boot code */
 	install_ap_tramp(boot_addr);
 
 
 	/* save the current value of the warm-start vector */
 	mpbioswarmvec = *((u_long *) WARMBOOT_OFF);
 #ifndef PC98
 	outb(CMOS_REG, BIOS_RESET);
 	mpbiosreason = inb(CMOS_DATA);
 #endif
 
 	/* set up temporary P==V mapping for AP boot */
 	/* XXX this is a hack, we should boot the AP on its own stack/PTD */
 	kptbase = (uintptr_t)(void *)KPTphys;
 	for (x = 0; x < NKPT; x++)
 		PTD[x] = (pd_entry_t)(PG_V | PG_RW |
 		    ((kptbase + x * PAGE_SIZE) & PG_FRAME));
 	invltlb();
 
 	/* start each AP */
 	for (x = 1; x <= mp_naps; ++x) {
 
 		/* This is a bit verbose, it will go away soon.  */
 
 		/* first page of AP's private space */
 		pg = x * i386_btop(sizeof(struct privatespace));
 
 		/* allocate a new private data page */
 		gd = (struct globaldata *)kmem_alloc(kernel_map, PAGE_SIZE);
 
 		/* wire it into the private page table page */
 		SMPpt[pg] = (pt_entry_t)(PG_V | PG_RW | vtophys(gd));
 
 		/* allocate and set up an idle stack data page */
 		stack = (char *)kmem_alloc(kernel_map, UPAGES*PAGE_SIZE);
 		for (i = 0; i < UPAGES; i++)
 			SMPpt[pg + 1 + i] = (pt_entry_t)
 			    (PG_V | PG_RW | vtophys(PAGE_SIZE * i + stack));
 
 		/* prime data page for it to use */
 		gd->gd_cpuid = x;
 		globaldata_register(gd);
 
 		/* setup a vector to our boot code */
 		*((volatile u_short *) WARMBOOT_OFF) = WARMBOOT_TARGET;
 		*((volatile u_short *) WARMBOOT_SEG) = (boot_addr >> 4);
 #ifndef PC98
 		outb(CMOS_REG, BIOS_RESET);
 		outb(CMOS_DATA, BIOS_WARM);	/* 'warm-start' */
 #endif
 
 		bootSTK = &SMP_prvspace[x].idlestack[UPAGES*PAGE_SIZE];
 		bootAP = x;
 
 		/* attempt to start the Application Processor */
 		CHECK_INIT(99);	/* setup checkpoints */
 		if (!start_ap(x, boot_addr)) {
 			printf("AP #%d (PHY# %d) failed!\n", x, CPU_TO_ID(x));
 			CHECK_PRINT("trace");	/* show checkpoints */
 			/* better panic as the AP may be running loose */
 			printf("panic y/n? [y] ");
 			if (cngetc() != 'n')
 				panic("bye-bye");
 		}
 		CHECK_PRINT("trace");		/* show checkpoints */
 
 		/* record its version info */
 		cpu_apic_versions[x] = cpu_apic_versions[0];
 
 		all_cpus |= (1 << x);		/* record AP in CPU map */
 	}
 
 	/* build our map of 'other' CPUs */
 	PCPU_SET(other_cpus, all_cpus & ~(1 << PCPU_GET(cpuid)));
 
 	/* fill in our (BSP) APIC version */
 	cpu_apic_versions[0] = lapic.version;
 
 	/* restore the warmstart vector */
 	*(u_long *) WARMBOOT_OFF = mpbioswarmvec;
 #ifndef PC98
 	outb(CMOS_REG, BIOS_RESET);
 	outb(CMOS_DATA, mpbiosreason);
 #endif
 
 	/*
 	 * Set up the idle context for the BSP.  Similar to above except
 	 * that some was done by locore, some by pmap.c and some is implicit
 	 * because the BSP is cpu#0 and the page is initially zero, and also
 	 * because we can refer to variables by name on the BSP..
 	 */
 
 	/* Allocate and setup BSP idle stack */
 	stack = (char *)kmem_alloc(kernel_map, UPAGES * PAGE_SIZE);
 	for (i = 0; i < UPAGES; i++)
 		SMPpt[1 + i] = (pt_entry_t)
 		    (PG_V | PG_RW | vtophys(PAGE_SIZE * i + stack));
 
 	for (x = 0; x < NKPT; x++)
 		PTD[x] = 0;
 	pmap_set_opt();
 
 	/* number of APs actually started */
 	return mp_ncpus - 1;
 }
 
 
 /*
  * load the 1st level AP boot code into base memory.
  */
 
 /* targets for relocation */
 extern void bigJump(void);
 extern void bootCodeSeg(void);
 extern void bootDataSeg(void);
 extern void MPentry(void);
 extern u_int MP_GDT;
 extern u_int mp_gdtbase;
 
 static void
 install_ap_tramp(u_int boot_addr)
 {
 	int     x;
 	int     size = *(int *) ((u_long) & bootMP_size);
 	u_char *src = (u_char *) ((u_long) bootMP);
 	u_char *dst = (u_char *) boot_addr + KERNBASE;
 	u_int   boot_base = (u_int) bootMP;
 	u_int8_t *dst8;
 	u_int16_t *dst16;
 	u_int32_t *dst32;
 
 	POSTCODE(INSTALL_AP_TRAMP_POST);
 
 	for (x = 0; x < size; ++x)
 		*dst++ = *src++;
 
 	/*
 	 * modify addresses in code we just moved to basemem. unfortunately we
 	 * need fairly detailed info about mpboot.s for this to work.  changes
 	 * to mpboot.s might require changes here.
 	 */
 
 	/* boot code is located in KERNEL space */
 	dst = (u_char *) boot_addr + KERNBASE;
 
 	/* modify the lgdt arg */
 	dst32 = (u_int32_t *) (dst + ((u_int) & mp_gdtbase - boot_base));
 	*dst32 = boot_addr + ((u_int) & MP_GDT - boot_base);
 
 	/* modify the ljmp target for MPentry() */
 	dst32 = (u_int32_t *) (dst + ((u_int) bigJump - boot_base) + 1);
 	*dst32 = ((u_int) MPentry - KERNBASE);
 
 	/* modify the target for boot code segment */
 	dst16 = (u_int16_t *) (dst + ((u_int) bootCodeSeg - boot_base));
 	dst8 = (u_int8_t *) (dst16 + 1);
 	*dst16 = (u_int) boot_addr & 0xffff;
 	*dst8 = ((u_int) boot_addr >> 16) & 0xff;
 
 	/* modify the target for boot data segment */
 	dst16 = (u_int16_t *) (dst + ((u_int) bootDataSeg - boot_base));
 	dst8 = (u_int8_t *) (dst16 + 1);
 	*dst16 = (u_int) boot_addr & 0xffff;
 	*dst8 = ((u_int) boot_addr >> 16) & 0xff;
 }
 
 
 /*
  * this function starts the AP (application processor) identified
  * by the APIC ID 'physicalCpu'.  It does quite a "song and dance"
  * to accomplish this.  This is necessary because of the nuances
  * of the different hardware we might encounter.  It ain't pretty,
  * but it seems to work.
  */
 static int
 start_ap(int logical_cpu, u_int boot_addr)
 {
 	int     physical_cpu;
 	int     vector;
 	int     cpus;
 	u_long  icr_lo, icr_hi;
 
 	POSTCODE(START_AP_POST);
 
 	/* get the PHYSICAL APIC ID# */
 	physical_cpu = CPU_TO_ID(logical_cpu);
 
 	/* calculate the vector */
 	vector = (boot_addr >> 12) & 0xff;
 
 	/* used as a watchpoint to signal AP startup */
 	cpus = mp_ncpus;
 
 	/*
 	 * first we do an INIT/RESET IPI this INIT IPI might be run, reseting
 	 * and running the target CPU. OR this INIT IPI might be latched (P5
 	 * bug), CPU waiting for STARTUP IPI. OR this INIT IPI might be
 	 * ignored.
 	 */
 
 	/* setup the address for the target AP */
 	icr_hi = lapic.icr_hi & ~APIC_ID_MASK;
 	icr_hi |= (physical_cpu << 24);
 	lapic.icr_hi = icr_hi;
 
 	/* do an INIT IPI: assert RESET */
 	icr_lo = lapic.icr_lo & 0xfff00000;
 	lapic.icr_lo = icr_lo | 0x0000c500;
 
 	/* wait for pending status end */
 	while (lapic.icr_lo & APIC_DELSTAT_MASK)
 		 /* spin */ ;
 
 	/* do an INIT IPI: deassert RESET */
 	lapic.icr_lo = icr_lo | 0x00008500;
 
 	/* wait for pending status end */
 	u_sleep(10000);		/* wait ~10mS */
 	while (lapic.icr_lo & APIC_DELSTAT_MASK)
 		 /* spin */ ;
 
 	/*
 	 * next we do a STARTUP IPI: the previous INIT IPI might still be
 	 * latched, (P5 bug) this 1st STARTUP would then terminate
 	 * immediately, and the previously started INIT IPI would continue. OR
 	 * the previous INIT IPI has already run. and this STARTUP IPI will
 	 * run. OR the previous INIT IPI was ignored. and this STARTUP IPI
 	 * will run.
 	 */
 
 	/* do a STARTUP IPI */
 	lapic.icr_lo = icr_lo | 0x00000600 | vector;
 	while (lapic.icr_lo & APIC_DELSTAT_MASK)
 		 /* spin */ ;
 	u_sleep(200);		/* wait ~200uS */
 
 	/*
 	 * finally we do a 2nd STARTUP IPI: this 2nd STARTUP IPI should run IF
 	 * the previous STARTUP IPI was cancelled by a latched INIT IPI. OR
 	 * this STARTUP IPI will be ignored, as only ONE STARTUP IPI is
 	 * recognized after hardware RESET or INIT IPI.
 	 */
 
 	lapic.icr_lo = icr_lo | 0x00000600 | vector;
 	while (lapic.icr_lo & APIC_DELSTAT_MASK)
 		 /* spin */ ;
 	u_sleep(200);		/* wait ~200uS */
 
 	/* wait for it to start */
 	set_apic_timer(5000000);/* == 5 seconds */
 	while (read_apic_timer())
 		if (mp_ncpus > cpus)
 			return 1;	/* return SUCCESS */
 
 	return 0;		/* return FAILURE */
 }
 
 /*
  * Flush the TLB on all other CPU's
  *
  * XXX: Needs to handshake and wait for completion before proceding.
  */
 void
 smp_invltlb(void)
 {
 #if defined(APIC_IO)
 	if (smp_started && invltlb_ok)
 		ipi_all_but_self(IPI_INVLTLB);
 #endif  /* APIC_IO */
 }
 
 void
 invlpg(u_int addr)
 {
 	__asm   __volatile("invlpg (%0)"::"r"(addr):"memory");
 
 	/* send a message to the other CPUs */
 	smp_invltlb();
 }
 
 void
 invltlb(void)
 {
 	u_long  temp;
 
 	/*
 	 * This should be implemented as load_cr3(rcr3()) when load_cr3() is
 	 * inlined.
 	 */
 	__asm __volatile("movl %%cr3, %0; movl %0, %%cr3":"=r"(temp) :: "memory");
 
 	/* send a message to the other CPUs */
 	smp_invltlb();
 }
 
 
 /*
  * This is called once the rest of the system is up and running and we're
  * ready to let the AP's out of the pen.
  */
 extern void	enable_sse(void);
 
 void
 ap_init(void)
 {
 	u_int	apic_id;
 
 	/* spin until all the AP's are ready */
 	while (!aps_ready)
 		/* spin */ ;
 
 	/*
 	 * Set curproc to our per-cpu idleproc so that mutexes have
 	 * something unique to lock with.
 	 */
 	PCPU_SET(curproc, PCPU_GET(idleproc));
 	PCPU_SET(spinlocks, NULL);
 
 	/* lock against other AP's that are waking up */
 	mtx_lock_spin(&ap_boot_mtx);
 
 	/* BSP may have changed PTD while we're waiting for the lock */
 	cpu_invltlb();
 
 	smp_cpus++;
 
 #if defined(I586_CPU) && !defined(NO_F00F_HACK)
 	lidt(&r_idt);
 #endif
 
 	/* Build our map of 'other' CPUs. */
 	PCPU_SET(other_cpus, all_cpus & ~(1 << PCPU_GET(cpuid)));
 
 	printf("SMP: AP CPU #%d Launched!\n", PCPU_GET(cpuid));
 
 	/* set up CPU registers and state */
 	cpu_setregs();
 
 	/* set up FPU state on the AP */
 	npxinit(__INITIAL_NPXCW__);
 
 	/* set up SSE registers */
 	enable_sse();
 
 	/* A quick check from sanity claus */
 	apic_id = (apic_id_to_logical[(lapic.id & 0x0f000000) >> 24]);
 	if (PCPU_GET(cpuid) != apic_id) {
 		printf("SMP: cpuid = %d\n", PCPU_GET(cpuid));
 		printf("SMP: apic_id = %d\n", apic_id);
 		printf("PTD[MPPTDI] = %p\n", (void *)PTD[MPPTDI]);
 		panic("cpuid mismatch! boom!!");
 	}
 
 	/* Init local apic for irq's */
 	apic_initialize();
 
 	/* Set memory range attributes for this CPU to match the BSP */
 	mem_range_AP_init();
 
 	/*
 	 * Activate smp_invltlb, although strictly speaking, this isn't
 	 * quite correct yet.  We should have a bitfield for cpus willing
 	 * to accept TLB flush IPI's or something and sync them.
 	 */
 	if (smp_cpus == mp_ncpus) {
 		invltlb_ok = 1;
 		smp_started = 1; /* enable IPI's, tlb shootdown, freezes etc */
 		smp_active = 1;	 /* historic */
 	}
 
 	/* let other AP's wake up now */
 	mtx_unlock_spin(&ap_boot_mtx);
 
 	/* wait until all the AP's are up */
 	while (smp_started == 0)
 		; /* nothing */
 
 	microuptime(PCPU_PTR(switchtime));
 	PCPU_SET(switchticks, ticks);
 
 	/* ok, now grab sched_lock and enter the scheduler */
 	enable_intr();
 	mtx_lock_spin(&sched_lock);
 	cpu_throw();	/* doesn't return */
 
 	panic("scheduler returned us to ap_init");
 }
 
 /*
  * For statclock, we send an IPI to all CPU's to have them call this
  * function.
  */
 void
 forwarded_statclock(struct trapframe frame)
 {
 
 	mtx_lock_spin(&sched_lock);
 	statclock_process(curproc, TRAPF_PC(&frame), TRAPF_USERMODE(&frame));
 	mtx_unlock_spin(&sched_lock);
 }
 
 void
 forward_statclock(void)
 {
 	int map;
 
 	CTR0(KTR_SMP, "forward_statclock");
 
 	if (!smp_started || !invltlb_ok || cold || panicstr)
 		return;
 
 	map = PCPU_GET(other_cpus) & ~stopped_cpus ;
 	if (map != 0)
 		ipi_selected(map, IPI_STATCLOCK);
 }
 
 /*
  * For each hardclock(), we send an IPI to all other CPU's to have them
  * execute this function.  It would be nice to reduce contention on
  * sched_lock if we could simply peek at the CPU to determine the user/kernel
  * state and call hardclock_process() on the CPU receiving the clock interrupt
  * and then just use a simple IPI to handle any ast's if needed.
  */
 void
 forwarded_hardclock(struct trapframe frame)
 {
 
 	mtx_lock_spin(&sched_lock);
 	hardclock_process(curproc, TRAPF_USERMODE(&frame));
 	mtx_unlock_spin(&sched_lock);
 }
 
 void 
 forward_hardclock(void)
 {
 	u_int map;
 
 	CTR0(KTR_SMP, "forward_hardclock");
 
 	if (!smp_started || !invltlb_ok || cold || panicstr)
 		return;
 
 	map = PCPU_GET(other_cpus) & ~stopped_cpus ;
 	if (map != 0)
 		ipi_selected(map, IPI_HARDCLOCK);
 }
 
 #ifdef APIC_INTR_REORDER
 /*
  *	Maintain mapping from softintr vector to isr bit in local apic.
  */
 void
 set_lapic_isrloc(int intr, int vector)
 {
 	if (intr < 0 || intr > 32)
 		panic("set_apic_isrloc: bad intr argument: %d",intr);
 	if (vector < ICU_OFFSET || vector > 255)
 		panic("set_apic_isrloc: bad vector argument: %d",vector);
 	apic_isrbit_location[intr].location = &lapic.isr0 + ((vector>>5)<<2);
 	apic_isrbit_location[intr].bit = (1<<(vector & 31));
 }
 #endif
 
 /*
  * send an IPI to a set of cpus.
  */
 void
 ipi_selected(u_int32_t cpus, u_int ipi)
 {
 
 	CTR2(KTR_SMP, __func__ ": cpus: %x ipi: %x", cpus, ipi);
 	selected_apic_ipi(cpus, ipi, APIC_DELMODE_FIXED);
 }
 
 /*
  * send an IPI INTerrupt containing 'vector' to all CPUs, including myself
  */
 void
 ipi_all(u_int ipi)
 {
 
 	CTR1(KTR_SMP, __func__ ": ipi: %x", ipi);
 	apic_ipi(APIC_DEST_ALLISELF, ipi, APIC_DELMODE_FIXED); 
 }
 
 /*
  * send an IPI to all CPUs EXCEPT myself
  */
 void
 ipi_all_but_self(u_int ipi)
 {
 
 	CTR1(KTR_SMP, __func__ ": ipi: %x", ipi);
 	apic_ipi(APIC_DEST_ALLESELF, ipi, APIC_DELMODE_FIXED); 
 }
 
 /*
  * send an IPI to myself
  */
 void
 ipi_self(u_int ipi)
 {
 
 	CTR1(KTR_SMP, __func__ ": ipi: %x", ipi);
 	apic_ipi(APIC_DEST_SELF, ipi, APIC_DELMODE_FIXED); 
 }
 
 void
 release_aps(void *dummy __unused)
 {
 	atomic_store_rel_int(&aps_ready, 1);
 }
 
 SYSINIT(start_aps, SI_SUB_SMP, SI_ORDER_FIRST, release_aps, NULL);
Index: head/sys/amd64/amd64/pmap.c
===================================================================
--- head/sys/amd64/amd64/pmap.c	(revision 82308)
+++ head/sys/amd64/amd64/pmap.c	(revision 82309)
@@ -1,3397 +1,3398 @@
 /*
  * Copyright (c) 1991 Regents of the University of California.
  * All rights reserved.
  * Copyright (c) 1994 John S. Dyson
  * All rights reserved.
  * Copyright (c) 1994 David Greenman
  * All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * the Systems Programming Group of the University of Utah Computer
  * Science Department and William Jolitz of UUNET Technologies Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	from:	@(#)pmap.c	7.7 (Berkeley)	5/12/91
  * $FreeBSD$
  */
 
 /*
  *	Manages physical address maps.
  *
  *	In addition to hardware address maps, this
  *	module is called upon to provide software-use-only
  *	maps which may or may not be stored in the same
  *	form as hardware maps.  These pseudo-maps are
  *	used to store intermediate results from copy
  *	operations to and from address spaces.
  *
  *	Since the information managed by this module is
  *	also stored by the logical address mapping module,
  *	this module may throw away valid virtual-to-physical
  *	mappings at almost any time.  However, invalidations
  *	of virtual-to-physical mappings must be done as
  *	requested.
  *
  *	In order to cope with hardware architectures which
  *	make virtual-to-physical map invalidates expensive,
  *	this module may delay invalidate or reduced protection
  *	operations until such time as they are actually
  *	necessary.  This module is given full information as
  *	to which processors are currently using which maps,
  *	and to when physical maps must be made correct.
  */
 
 #include "opt_disable_pse.h"
 #include "opt_pmap.h"
 #include "opt_msgbuf.h"
+#include "opt_upages.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/mman.h>
 #include <sys/msgbuf.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/sx.h>
 #include <sys/user.h>
 #include <sys/vmmeter.h>
 #include <sys/sysctl.h>
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_page.h>
 #include <vm/vm_map.h>
 #include <vm/vm_object.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_pageout.h>
 #include <vm/vm_pager.h>
 #include <vm/vm_zone.h>
 
 #include <machine/cputypes.h>
 #include <machine/md_var.h>
 #include <machine/specialreg.h>
 #if defined(SMP) || defined(APIC_IO)
 #include <machine/smp.h>
 #include <machine/apic.h>
 #include <machine/segments.h>
 #include <machine/tss.h>
 #include <machine/globaldata.h>
 #endif /* SMP || APIC_IO */
 
 #define PMAP_KEEP_PDIRS
 #ifndef PMAP_SHPGPERPROC
 #define PMAP_SHPGPERPROC 200
 #endif
 
 #if defined(DIAGNOSTIC)
 #define PMAP_DIAGNOSTIC
 #endif
 
 #define MINPV 2048
 
 #if !defined(PMAP_DIAGNOSTIC)
 #define PMAP_INLINE __inline
 #else
 #define PMAP_INLINE
 #endif
 
 /*
  * Get PDEs and PTEs for user/kernel address space
  */
 #define	pmap_pde(m, v)	(&((m)->pm_pdir[(vm_offset_t)(v) >> PDRSHIFT]))
 #define pdir_pde(m, v) (m[(vm_offset_t)(v) >> PDRSHIFT])
 
 #define pmap_pde_v(pte)		((*(int *)pte & PG_V) != 0)
 #define pmap_pte_w(pte)		((*(int *)pte & PG_W) != 0)
 #define pmap_pte_m(pte)		((*(int *)pte & PG_M) != 0)
 #define pmap_pte_u(pte)		((*(int *)pte & PG_A) != 0)
 #define pmap_pte_v(pte)		((*(int *)pte & PG_V) != 0)
 
 #define pmap_pte_set_w(pte, v) ((v)?(*(int *)pte |= PG_W):(*(int *)pte &= ~PG_W))
 #define pmap_pte_set_prot(pte, v) ((*(int *)pte &= ~PG_PROT), (*(int *)pte |= (v)))
 
 /*
  * Given a map and a machine independent protection code,
  * convert to a vax protection code.
  */
 #define pte_prot(m, p)	(protection_codes[p])
 static int protection_codes[8];
 
 static struct pmap kernel_pmap_store;
 pmap_t kernel_pmap;
 LIST_HEAD(pmaplist, pmap);
 struct pmaplist allpmaps;
 
 vm_offset_t avail_start;	/* PA of first available physical page */
 vm_offset_t avail_end;		/* PA of last available physical page */
 vm_offset_t virtual_avail;	/* VA of first avail page (after kernel bss) */
 vm_offset_t virtual_end;	/* VA of last avail page (end of kernel AS) */
 static boolean_t pmap_initialized = FALSE;	/* Has pmap_init completed? */
 static int pgeflag;		/* PG_G or-in */
 static int pseflag;		/* PG_PS or-in */
 
 static vm_object_t kptobj;
 
 static int nkpt;
 vm_offset_t kernel_vm_end;
 
 /*
  * Data for the pv entry allocation mechanism
  */
 static vm_zone_t pvzone;
 static struct vm_zone pvzone_store;
 static struct vm_object pvzone_obj;
 static int pv_entry_count=0, pv_entry_max=0, pv_entry_high_water=0;
 static int pmap_pagedaemon_waken = 0;
 static struct pv_entry *pvinit;
 
 /*
  * All those kernel PT submaps that BSD is so fond of
  */
 pt_entry_t *CMAP1 = 0;
 static pt_entry_t *CMAP2, *ptmmap;
 caddr_t CADDR1 = 0, ptvmmap = 0;
 static caddr_t CADDR2;
 static pt_entry_t *msgbufmap;
 struct msgbuf *msgbufp=0;
 
 /*
  * Crashdump maps.
  */
 static pt_entry_t *pt_crashdumpmap;
 static caddr_t crashdumpmap;
 
 #ifdef SMP
 extern pt_entry_t *SMPpt;
 #endif
 static pt_entry_t *PMAP1 = 0;
 static unsigned *PADDR1 = 0;
 
 static PMAP_INLINE void	free_pv_entry __P((pv_entry_t pv));
 static unsigned * get_ptbase __P((pmap_t pmap));
 static pv_entry_t get_pv_entry __P((void));
 static void	i386_protection_init __P((void));
 static __inline void	pmap_changebit __P((vm_page_t m, int bit, boolean_t setem));
 
 static void	pmap_remove_all __P((vm_page_t m));
 static vm_page_t pmap_enter_quick __P((pmap_t pmap, vm_offset_t va,
 				      vm_page_t m, vm_page_t mpte));
 static int pmap_remove_pte __P((struct pmap *pmap, unsigned *ptq,
 					vm_offset_t sva));
 static void pmap_remove_page __P((struct pmap *pmap, vm_offset_t va));
 static int pmap_remove_entry __P((struct pmap *pmap, vm_page_t m,
 					vm_offset_t va));
 static boolean_t pmap_testbit __P((vm_page_t m, int bit));
 static void pmap_insert_entry __P((pmap_t pmap, vm_offset_t va,
 		vm_page_t mpte, vm_page_t m));
 
 static vm_page_t pmap_allocpte __P((pmap_t pmap, vm_offset_t va));
 
 static int pmap_release_free_page __P((pmap_t pmap, vm_page_t p));
 static vm_page_t _pmap_allocpte __P((pmap_t pmap, unsigned ptepindex));
 static unsigned * pmap_pte_quick __P((pmap_t pmap, vm_offset_t va));
 static vm_page_t pmap_page_lookup __P((vm_object_t object, vm_pindex_t pindex));
 static int pmap_unuse_pt __P((pmap_t, vm_offset_t, vm_page_t));
 static vm_offset_t pmap_kmem_choose(vm_offset_t addr);
 
 static unsigned pdir4mb;
 
 /*
  *	Routine:	pmap_pte
  *	Function:
  *		Extract the page table entry associated
  *		with the given map/virtual_address pair.
  */
 
 PMAP_INLINE unsigned *
 pmap_pte(pmap, va)
 	register pmap_t pmap;
 	vm_offset_t va;
 {
 	unsigned *pdeaddr;
 
 	if (pmap) {
 		pdeaddr = (unsigned *) pmap_pde(pmap, va);
 		if (*pdeaddr & PG_PS)
 			return pdeaddr;
 		if (*pdeaddr) {
 			return get_ptbase(pmap) + i386_btop(va);
 		}
 	}
 	return (0);
 }
 
 /*
  * Move the kernel virtual free pointer to the next
  * 4MB.  This is used to help improve performance
  * by using a large (4MB) page for much of the kernel
  * (.text, .data, .bss)
  */
 static vm_offset_t
 pmap_kmem_choose(vm_offset_t addr)
 {
 	vm_offset_t newaddr = addr;
 #ifndef DISABLE_PSE
 	if (cpu_feature & CPUID_PSE) {
 		newaddr = (addr + (NBPDR - 1)) & ~(NBPDR - 1);
 	}
 #endif
 	return newaddr;
 }
 
 /*
  *	Bootstrap the system enough to run with virtual memory.
  *
  *	On the i386 this is called after mapping has already been enabled
  *	and just syncs the pmap module with what has already been done.
  *	[We can't call it easily with mapping off since the kernel is not
  *	mapped with PA == VA, hence we would have to relocate every address
  *	from the linked base (virtual) address "KERNBASE" to the actual
  *	(physical) address starting relative to 0]
  */
 void
 pmap_bootstrap(firstaddr, loadaddr)
 	vm_offset_t firstaddr;
 	vm_offset_t loadaddr;
 {
 	vm_offset_t va;
 	pt_entry_t *pte;
 	int i;
 
 	avail_start = firstaddr;
 
 	/*
 	 * XXX The calculation of virtual_avail is wrong. It's NKPT*PAGE_SIZE too
 	 * large. It should instead be correctly calculated in locore.s and
 	 * not based on 'first' (which is a physical address, not a virtual
 	 * address, for the start of unused physical memory). The kernel
 	 * page tables are NOT double mapped and thus should not be included
 	 * in this calculation.
 	 */
 	virtual_avail = (vm_offset_t) KERNBASE + firstaddr;
 	virtual_avail = pmap_kmem_choose(virtual_avail);
 
 	virtual_end = VM_MAX_KERNEL_ADDRESS;
 
 	/*
 	 * Initialize protection array.
 	 */
 	i386_protection_init();
 
 	/*
 	 * The kernel's pmap is statically allocated so we don't have to use
 	 * pmap_create, which is unlikely to work correctly at this part of
 	 * the boot sequence (XXX and which no longer exists).
 	 */
 	kernel_pmap = &kernel_pmap_store;
 
 	kernel_pmap->pm_pdir = (pd_entry_t *) (KERNBASE + (u_int)IdlePTD);
 	kernel_pmap->pm_count = 1;
 	kernel_pmap->pm_active = -1;	/* don't allow deactivation */
 	TAILQ_INIT(&kernel_pmap->pm_pvlist);
 	LIST_INIT(&allpmaps);
 	LIST_INSERT_HEAD(&allpmaps, kernel_pmap, pm_list);
 	nkpt = NKPT;
 
 	/*
 	 * Reserve some special page table entries/VA space for temporary
 	 * mapping of pages.
 	 */
 #define	SYSMAP(c, p, v, n)	\
 	v = (c)va; va += ((n)*PAGE_SIZE); p = pte; pte += (n);
 
 	va = virtual_avail;
 	pte = (pt_entry_t *) pmap_pte(kernel_pmap, va);
 
 	/*
 	 * CMAP1/CMAP2 are used for zeroing and copying pages.
 	 */
 	SYSMAP(caddr_t, CMAP1, CADDR1, 1)
 	SYSMAP(caddr_t, CMAP2, CADDR2, 1)
 
 	/*
 	 * Crashdump maps.
 	 */
 	SYSMAP(caddr_t, pt_crashdumpmap, crashdumpmap, MAXDUMPPGS);
 
 	/*
 	 * ptvmmap is used for reading arbitrary physical pages via /dev/mem.
 	 * XXX ptmmap is not used.
 	 */
 	SYSMAP(caddr_t, ptmmap, ptvmmap, 1)
 
 	/*
 	 * msgbufp is used to map the system message buffer.
 	 * XXX msgbufmap is not used.
 	 */
 	SYSMAP(struct msgbuf *, msgbufmap, msgbufp,
 	       atop(round_page(MSGBUF_SIZE)))
 
 	/*
 	 * ptemap is used for pmap_pte_quick
 	 */
 	SYSMAP(unsigned *, PMAP1, PADDR1, 1);
 
 	virtual_avail = va;
 
 	*(int *) CMAP1 = *(int *) CMAP2 = 0;
 	for (i = 0; i < NKPT; i++)
 		PTD[i] = 0;
 
 	pgeflag = 0;
 #if !defined(SMP)			/* XXX - see also mp_machdep.c */
 	if (cpu_feature & CPUID_PGE) {
 		pgeflag = PG_G;
 	}
 #endif
 	
 /*
  * Initialize the 4MB page size flag
  */
 	pseflag = 0;
 /*
  * The 4MB page version of the initial
  * kernel page mapping.
  */
 	pdir4mb = 0;
 
 #if !defined(DISABLE_PSE)
 	if (cpu_feature & CPUID_PSE) {
 		unsigned ptditmp;
 		/*
 		 * Note that we have enabled PSE mode
 		 */
 		pseflag = PG_PS;
 		ptditmp = *((unsigned *)PTmap + i386_btop(KERNBASE));
 		ptditmp &= ~(NBPDR - 1);
 		ptditmp |= PG_V | PG_RW | PG_PS | PG_U | pgeflag;
 		pdir4mb = ptditmp;
 
 #if !defined(SMP)
 		/*
 		 * Enable the PSE mode.
 		 */
 		load_cr4(rcr4() | CR4_PSE);
 
 		/*
 		 * We can do the mapping here for the single processor
 		 * case.  We simply ignore the old page table page from
 		 * now on.
 		 */
 		/*
 		 * For SMP, we still need 4K pages to bootstrap APs,
 		 * PSE will be enabled as soon as all APs are up.
 		 */
 		PTD[KPTDI] = (pd_entry_t) ptditmp;
 		kernel_pmap->pm_pdir[KPTDI] = (pd_entry_t) ptditmp;
 		invltlb();
 #endif
 	}
 #endif
 
 #ifdef SMP
 	if (cpu_apic_address == 0)
 		panic("pmap_bootstrap: no local apic! (non-SMP hardware?)");
 
 	/* local apic is mapped on last page */
 	SMPpt[NPTEPG - 1] = (pt_entry_t)(PG_V | PG_RW | PG_N | pgeflag |
 	    (cpu_apic_address & PG_FRAME));
 #endif
 
 	invltlb();
 }
 
 #ifdef SMP
 /*
  * Set 4mb pdir for mp startup
  */
 void
 pmap_set_opt(void)
 {
 	if (pseflag && (cpu_feature & CPUID_PSE)) {
 		load_cr4(rcr4() | CR4_PSE);
 		if (pdir4mb && PCPU_GET(cpuid) == 0) {	/* only on BSP */
 			kernel_pmap->pm_pdir[KPTDI] =
 			    PTD[KPTDI] = (pd_entry_t)pdir4mb;
 			cpu_invltlb();
 		}
 	}
 }
 #endif
 
 /*
  *	Initialize the pmap module.
  *	Called by vm_init, to initialize any structures that the pmap
  *	system needs to map virtual memory.
  *	pmap_init has been enhanced to support in a fairly consistant
  *	way, discontiguous physical memory.
  */
 void
 pmap_init(phys_start, phys_end)
 	vm_offset_t phys_start, phys_end;
 {
 	int i;
 	int initial_pvs;
 
 	/*
 	 * object for kernel page table pages
 	 */
 	kptobj = vm_object_allocate(OBJT_DEFAULT, NKPDE);
 
 	/*
 	 * Allocate memory for random pmap data structures.  Includes the
 	 * pv_head_table.
 	 */
 
 	for(i = 0; i < vm_page_array_size; i++) {
 		vm_page_t m;
 
 		m = &vm_page_array[i];
 		TAILQ_INIT(&m->md.pv_list);
 		m->md.pv_list_count = 0;
 	}
 
 	/*
 	 * init the pv free list
 	 */
 	initial_pvs = vm_page_array_size;
 	if (initial_pvs < MINPV)
 		initial_pvs = MINPV;
 	pvzone = &pvzone_store;
 	pvinit = (struct pv_entry *) kmem_alloc(kernel_map,
 		initial_pvs * sizeof (struct pv_entry));
 	zbootinit(pvzone, "PV ENTRY", sizeof (struct pv_entry), pvinit,
 	    vm_page_array_size);
 
 	/*
 	 * Now it is safe to enable pv_table recording.
 	 */
 	pmap_initialized = TRUE;
 }
 
 /*
  * Initialize the address space (zone) for the pv_entries.  Set a
  * high water mark so that the system can recover from excessive
  * numbers of pv entries.
  */
 void
 pmap_init2()
 {
 	int shpgperproc = PMAP_SHPGPERPROC;
 
 	TUNABLE_INT_FETCH("vm.pmap.shpgperproc", &shpgperproc);
 	pv_entry_max = shpgperproc * maxproc + vm_page_array_size;
 	pv_entry_high_water = 9 * (pv_entry_max / 10);
 	zinitna(pvzone, &pvzone_obj, NULL, 0, pv_entry_max, ZONE_INTERRUPT, 1);
 }
 
 
 /***************************************************
  * Low level helper routines.....
  ***************************************************/
 
 #if defined(PMAP_DIAGNOSTIC)
 
 /*
  * This code checks for non-writeable/modified pages.
  * This should be an invalid condition.
  */
 static int
 pmap_nw_modified(pt_entry_t ptea)
 {
 	int pte;
 
 	pte = (int) ptea;
 
 	if ((pte & (PG_M|PG_RW)) == PG_M)
 		return 1;
 	else
 		return 0;
 }
 #endif
 
 
 /*
  * this routine defines the region(s) of memory that should
  * not be tested for the modified bit.
  */
 static PMAP_INLINE int
 pmap_track_modified(vm_offset_t va)
 {
 	if ((va < kmi.clean_sva) || (va >= kmi.clean_eva)) 
 		return 1;
 	else
 		return 0;
 }
 
 static PMAP_INLINE void
 invltlb_1pg(vm_offset_t va)
 {
 #ifdef I386_CPU
 	invltlb();
 #else
 	invlpg(va);
 #endif
 }
 
 static __inline void
 pmap_TLB_invalidate(pmap_t pmap, vm_offset_t va)
 {
 #if defined(SMP)
 	if (pmap->pm_active & (1 << PCPU_GET(cpuid)))
 		cpu_invlpg((void *)va);
 	if (pmap->pm_active & PCPU_GET(other_cpus))
 		smp_invltlb();
 #else
 	if (pmap->pm_active)
 		invltlb_1pg(va);
 #endif
 }
 
 static __inline void
 pmap_TLB_invalidate_all(pmap_t pmap)
 {
 #if defined(SMP)
 	if (pmap->pm_active & (1 << PCPU_GET(cpuid)))
 		cpu_invltlb();
 	if (pmap->pm_active & PCPU_GET(other_cpus))
 		smp_invltlb();
 #else
 	if (pmap->pm_active)
 		invltlb();
 #endif
 }
 
 static unsigned *
 get_ptbase(pmap)
 	pmap_t pmap;
 {
 	unsigned frame = (unsigned) pmap->pm_pdir[PTDPTDI] & PG_FRAME;
 
 	/* are we current address space or kernel? */
 	if (pmap == kernel_pmap || frame == (((unsigned) PTDpde) & PG_FRAME)) {
 		return (unsigned *) PTmap;
 	}
 	/* otherwise, we are alternate address space */
 	if (frame != (((unsigned) APTDpde) & PG_FRAME)) {
 		APTDpde = (pd_entry_t) (frame | PG_RW | PG_V);
 #if defined(SMP)
 		/* The page directory is not shared between CPUs */
 		cpu_invltlb();
 #else
 		invltlb();
 #endif
 	}
 	return (unsigned *) APTmap;
 }
 
 /*
  * Super fast pmap_pte routine best used when scanning
  * the pv lists.  This eliminates many coarse-grained
  * invltlb calls.  Note that many of the pv list
  * scans are across different pmaps.  It is very wasteful
  * to do an entire invltlb for checking a single mapping.
  */
 
 static unsigned * 
 pmap_pte_quick(pmap, va)
 	register pmap_t pmap;
 	vm_offset_t va;
 {
 	unsigned pde, newpf;
 	if ((pde = (unsigned) pmap->pm_pdir[va >> PDRSHIFT]) != 0) {
 		unsigned frame = (unsigned) pmap->pm_pdir[PTDPTDI] & PG_FRAME;
 		unsigned index = i386_btop(va);
 		/* are we current address space or kernel? */
 		if ((pmap == kernel_pmap) ||
 			(frame == (((unsigned) PTDpde) & PG_FRAME))) {
 			return (unsigned *) PTmap + index;
 		}
 		newpf = pde & PG_FRAME;
 		if ( ((* (unsigned *) PMAP1) & PG_FRAME) != newpf) {
 			* (unsigned *) PMAP1 = newpf | PG_RW | PG_V;
 			invltlb_1pg((vm_offset_t) PADDR1);
 		}
 		return PADDR1 + ((unsigned) index & (NPTEPG - 1));
 	}
 	return (0);
 }
 
 /*
  *	Routine:	pmap_extract
  *	Function:
  *		Extract the physical page address associated
  *		with the given map/virtual_address pair.
  */
 vm_offset_t 
 pmap_extract(pmap, va)
 	register pmap_t pmap;
 	vm_offset_t va;
 {
 	vm_offset_t rtval;
 	vm_offset_t pdirindex;
 	pdirindex = va >> PDRSHIFT;
 	if (pmap && (rtval = (unsigned) pmap->pm_pdir[pdirindex])) {
 		unsigned *pte;
 		if ((rtval & PG_PS) != 0) {
 			rtval &= ~(NBPDR - 1);
 			rtval |= va & (NBPDR - 1);
 			return rtval;
 		}
 		pte = get_ptbase(pmap) + i386_btop(va);
 		rtval = ((*pte & PG_FRAME) | (va & PAGE_MASK));
 		return rtval;
 	}
 	return 0;
 
 }
 
 /***************************************************
  * Low level mapping routines.....
  ***************************************************/
 
 /*
  * add a wired page to the kva
  * note that in order for the mapping to take effect -- you
  * should do a invltlb after doing the pmap_kenter...
  */
 PMAP_INLINE void 
 pmap_kenter(va, pa)
 	vm_offset_t va;
 	register vm_offset_t pa;
 {
 	register unsigned *pte;
 	unsigned npte, opte;
 
 	npte = pa | PG_RW | PG_V | pgeflag;
 	pte = (unsigned *)vtopte(va);
 	opte = *pte;
 	*pte = npte;
 	/*if (opte)*/
 		invltlb_1pg(va);	/* XXX what about SMP? */
 }
 
 /*
  * remove a page from the kernel pagetables
  */
 PMAP_INLINE void
 pmap_kremove(va)
 	vm_offset_t va;
 {
 	register unsigned *pte;
 
 	pte = (unsigned *)vtopte(va);
 	*pte = 0;
 	invltlb_1pg(va);	/* XXX what about SMP? */
 }
 
 /*
  *	Used to map a range of physical addresses into kernel
  *	virtual address space.
  *
  *	The value passed in '*virt' is a suggested virtual address for
  *	the mapping. Architectures which can support a direct-mapped
  *	physical to virtual region can return the appropriate address
  *	within that region, leaving '*virt' unchanged. Other
  *	architectures should map the pages starting at '*virt' and
  *	update '*virt' with the first usable address after the mapped
  *	region.
  */
 vm_offset_t
 pmap_map(virt, start, end, prot)
 	vm_offset_t *virt;
 	vm_offset_t start;
 	vm_offset_t end;
 	int prot;
 {
 	vm_offset_t sva = *virt;
 	vm_offset_t va = sva;
 	while (start < end) {
 		pmap_kenter(va, start);
 		va += PAGE_SIZE;
 		start += PAGE_SIZE;
 	}
 	*virt = va;
 	return (sva);
 }
 
 
 /*
  * Add a list of wired pages to the kva
  * this routine is only used for temporary
  * kernel mappings that do not need to have
  * page modification or references recorded.
  * Note that old mappings are simply written
  * over.  The page *must* be wired.
  */
 void
 pmap_qenter(va, m, count)
 	vm_offset_t va;
 	vm_page_t *m;
 	int count;
 {
 	int i;
 
 	for (i = 0; i < count; i++) {
 		vm_offset_t tva = va + i * PAGE_SIZE;
 		pmap_kenter(tva, VM_PAGE_TO_PHYS(m[i]));
 	}
 }
 
 /*
  * this routine jerks page mappings from the
  * kernel -- it is meant only for temporary mappings.
  */
 void
 pmap_qremove(va, count)
 	vm_offset_t va;
 	int count;
 {
 	vm_offset_t end_va;
 
 	end_va = va + count*PAGE_SIZE;
 
 	while (va < end_va) {
 		unsigned *pte;
 
 		pte = (unsigned *)vtopte(va);
 		*pte = 0;
 #ifdef SMP
 		cpu_invlpg((void *)va);
 #else
 		invltlb_1pg(va);
 #endif
 		va += PAGE_SIZE;
 	}
 #ifdef SMP
 	smp_invltlb();
 #endif
 }
 
 static vm_page_t
 pmap_page_lookup(object, pindex)
 	vm_object_t object;
 	vm_pindex_t pindex;
 {
 	vm_page_t m;
 retry:
 	m = vm_page_lookup(object, pindex);
 	if (m && vm_page_sleep_busy(m, FALSE, "pplookp"))
 		goto retry;
 	return m;
 }
 
 /*
  * Create the UPAGES for a new process.
  * This routine directly affects the fork perf for a process.
  */
 void
 pmap_new_proc(p)
 	struct proc *p;
 {
 #ifdef I386_CPU
 	int updateneeded;
 #endif
 	int i;
 	vm_object_t upobj;
 	vm_page_t m;
 	struct user *up;
 	unsigned *ptek, oldpte;
 
 	/*
 	 * allocate object for the upages
 	 */
 	if ((upobj = p->p_upages_obj) == NULL) {
 		upobj = vm_object_allocate( OBJT_DEFAULT, UPAGES);
 		p->p_upages_obj = upobj;
 	}
 
 	/* get a kernel virtual address for the UPAGES for this proc */
 	if ((up = p->p_addr) == NULL) {
 		up = (struct user *) kmem_alloc_nofault(kernel_map,
 				UPAGES * PAGE_SIZE);
 		if (up == NULL)
 			panic("pmap_new_proc: u_map allocation failed");
 		p->p_addr = up;
 	}
 
 	ptek = (unsigned *) vtopte((vm_offset_t) up);
 
 #ifdef I386_CPU
 	updateneeded = 0;
 #endif
 	for(i=0;i<UPAGES;i++) {
 		/*
 		 * Get a kernel stack page
 		 */
 		m = vm_page_grab(upobj, i, VM_ALLOC_NORMAL | VM_ALLOC_RETRY);
 
 		/*
 		 * Wire the page
 		 */
 		m->wire_count++;
 		cnt.v_wire_count++;
 
 		oldpte = *(ptek + i);
 		/*
 		 * Enter the page into the kernel address space.
 		 */
 		*(ptek + i) = VM_PAGE_TO_PHYS(m) | PG_RW | PG_V | pgeflag;
 		if (oldpte) {
 #ifdef I386_CPU
 			updateneeded = 1;
 #else
 			invlpg((vm_offset_t) up + i * PAGE_SIZE);
 #endif
 		}
 
 		vm_page_wakeup(m);
 		vm_page_flag_clear(m, PG_ZERO);
 		vm_page_flag_set(m, PG_MAPPED | PG_WRITEABLE);
 		m->valid = VM_PAGE_BITS_ALL;
 	}
 #ifdef I386_CPU
 	if (updateneeded)
 		invltlb();
 #endif
 }
 
 /*
  * Dispose the UPAGES for a process that has exited.
  * This routine directly impacts the exit perf of a process.
  */
 void
 pmap_dispose_proc(p)
 	struct proc *p;
 {
 	int i;
 	vm_object_t upobj;
 	vm_page_t m;
 	unsigned *ptek, oldpte;
 
 	upobj = p->p_upages_obj;
 
 	ptek = (unsigned *) vtopte((vm_offset_t) p->p_addr);
 	for(i=0;i<UPAGES;i++) {
 
 		if ((m = vm_page_lookup(upobj, i)) == NULL)
 			panic("pmap_dispose_proc: upage already missing???");
 
 		vm_page_busy(m);
 
 		oldpte = *(ptek + i);
 		*(ptek + i) = 0;
 #ifndef I386_CPU
 		invlpg((vm_offset_t) p->p_addr + i * PAGE_SIZE);
 #endif
 		vm_page_unwire(m, 0);
 		vm_page_free(m);
 	}
 #ifdef I386_CPU
 	invltlb();
 #endif
 }
 
 /*
  * Allow the UPAGES for a process to be prejudicially paged out.
  */
 void
 pmap_swapout_proc(p)
 	struct proc *p;
 {
 	int i;
 	vm_object_t upobj;
 	vm_page_t m;
 
 	upobj = p->p_upages_obj;
 	/*
 	 * let the upages be paged
 	 */
 	for(i=0;i<UPAGES;i++) {
 		if ((m = vm_page_lookup(upobj, i)) == NULL)
 			panic("pmap_swapout_proc: upage already missing???");
 		vm_page_dirty(m);
 		vm_page_unwire(m, 0);
 		pmap_kremove( (vm_offset_t) p->p_addr + PAGE_SIZE * i);
 	}
 }
 
 /*
  * Bring the UPAGES for a specified process back in.
  */
 void
 pmap_swapin_proc(p)
 	struct proc *p;
 {
 	int i,rv;
 	vm_object_t upobj;
 	vm_page_t m;
 
 	upobj = p->p_upages_obj;
 	for(i=0;i<UPAGES;i++) {
 
 		m = vm_page_grab(upobj, i, VM_ALLOC_NORMAL | VM_ALLOC_RETRY);
 
 		pmap_kenter(((vm_offset_t) p->p_addr) + i * PAGE_SIZE,
 			VM_PAGE_TO_PHYS(m));
 
 		if (m->valid != VM_PAGE_BITS_ALL) {
 			rv = vm_pager_get_pages(upobj, &m, 1, 0);
 			if (rv != VM_PAGER_OK)
 				panic("pmap_swapin_proc: cannot get upages for proc: %d\n", p->p_pid);
 			m = vm_page_lookup(upobj, i);
 			m->valid = VM_PAGE_BITS_ALL;
 		}
 
 		vm_page_wire(m);
 		vm_page_wakeup(m);
 		vm_page_flag_set(m, PG_MAPPED | PG_WRITEABLE);
 	}
 }
 
 /***************************************************
  * Page table page management routines.....
  ***************************************************/
 
 /*
  * This routine unholds page table pages, and if the hold count
  * drops to zero, then it decrements the wire count.
  */
 static int 
 _pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m) {
 
 	while (vm_page_sleep_busy(m, FALSE, "pmuwpt"))
 		;
 
 	if (m->hold_count == 0) {
 		vm_offset_t pteva;
 		/*
 		 * unmap the page table page
 		 */
 		pmap->pm_pdir[m->pindex] = 0;
 		--pmap->pm_stats.resident_count;
 		if ((((unsigned)pmap->pm_pdir[PTDPTDI]) & PG_FRAME) ==
 			(((unsigned) PTDpde) & PG_FRAME)) {
 			/*
 			 * Do a invltlb to make the invalidated mapping
 			 * take effect immediately.
 			 */
 			pteva = UPT_MIN_ADDRESS + i386_ptob(m->pindex);
 			pmap_TLB_invalidate(pmap, pteva);
 		}
 
 		if (pmap->pm_ptphint == m)
 			pmap->pm_ptphint = NULL;
 
 		/*
 		 * If the page is finally unwired, simply free it.
 		 */
 		--m->wire_count;
 		if (m->wire_count == 0) {
 
 			vm_page_flash(m);
 			vm_page_busy(m);
 			vm_page_free_zero(m);
 			--cnt.v_wire_count;
 		}
 		return 1;
 	}
 	return 0;
 }
 
 static PMAP_INLINE int
 pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m)
 {
 	vm_page_unhold(m);
 	if (m->hold_count == 0)
 		return _pmap_unwire_pte_hold(pmap, m);
 	else
 		return 0;
 }
 
 /*
  * After removing a page table entry, this routine is used to
  * conditionally free the page, and manage the hold/wire counts.
  */
 static int
 pmap_unuse_pt(pmap, va, mpte)
 	pmap_t pmap;
 	vm_offset_t va;
 	vm_page_t mpte;
 {
 	unsigned ptepindex;
 	if (va >= UPT_MIN_ADDRESS)
 		return 0;
 
 	if (mpte == NULL) {
 		ptepindex = (va >> PDRSHIFT);
 		if (pmap->pm_ptphint &&
 			(pmap->pm_ptphint->pindex == ptepindex)) {
 			mpte = pmap->pm_ptphint;
 		} else {
 			mpte = pmap_page_lookup( pmap->pm_pteobj, ptepindex);
 			pmap->pm_ptphint = mpte;
 		}
 	}
 
 	return pmap_unwire_pte_hold(pmap, mpte);
 }
 
 void
 pmap_pinit0(pmap)
 	struct pmap *pmap;
 {
 	pmap->pm_pdir =
 		(pd_entry_t *)kmem_alloc_pageable(kernel_map, PAGE_SIZE);
 	pmap_kenter((vm_offset_t) pmap->pm_pdir, (vm_offset_t) IdlePTD);
 	pmap->pm_count = 1;
 	pmap->pm_active = 0;
 	pmap->pm_ptphint = NULL;
 	TAILQ_INIT(&pmap->pm_pvlist);
 	bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
 	LIST_INSERT_HEAD(&allpmaps, pmap, pm_list);
 }
 
 /*
  * Initialize a preallocated and zeroed pmap structure,
  * such as one in a vmspace structure.
  */
 void
 pmap_pinit(pmap)
 	register struct pmap *pmap;
 {
 	vm_page_t ptdpg;
 
 	/*
 	 * No need to allocate page table space yet but we do need a valid
 	 * page directory table.
 	 */
 	if (pmap->pm_pdir == NULL)
 		pmap->pm_pdir =
 			(pd_entry_t *)kmem_alloc_pageable(kernel_map, PAGE_SIZE);
 
 	/*
 	 * allocate object for the ptes
 	 */
 	if (pmap->pm_pteobj == NULL)
 		pmap->pm_pteobj = vm_object_allocate( OBJT_DEFAULT, PTDPTDI + 1);
 
 	/*
 	 * allocate the page directory page
 	 */
 	ptdpg = vm_page_grab( pmap->pm_pteobj, PTDPTDI,
 			VM_ALLOC_NORMAL | VM_ALLOC_RETRY);
 
 	ptdpg->wire_count = 1;
 	++cnt.v_wire_count;
 
 
 	vm_page_flag_clear(ptdpg, PG_MAPPED | PG_BUSY); /* not usually mapped*/
 	ptdpg->valid = VM_PAGE_BITS_ALL;
 
 	pmap_kenter((vm_offset_t) pmap->pm_pdir, VM_PAGE_TO_PHYS(ptdpg));
 	if ((ptdpg->flags & PG_ZERO) == 0)
 		bzero(pmap->pm_pdir, PAGE_SIZE);
 
 	LIST_INSERT_HEAD(&allpmaps, pmap, pm_list);
 	/* Wire in kernel global address entries. */
 	/* XXX copies current process, does not fill in MPPTDI */
 	bcopy(PTD + KPTDI, pmap->pm_pdir + KPTDI, nkpt * PTESIZE);
 #ifdef SMP
 	pmap->pm_pdir[MPPTDI] = PTD[MPPTDI];
 #endif
 
 	/* install self-referential address mapping entry */
 	*(unsigned *) (pmap->pm_pdir + PTDPTDI) =
 		VM_PAGE_TO_PHYS(ptdpg) | PG_V | PG_RW | PG_A | PG_M;
 
 	pmap->pm_count = 1;
 	pmap->pm_active = 0;
 	pmap->pm_ptphint = NULL;
 	TAILQ_INIT(&pmap->pm_pvlist);
 	bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
 }
 
 /*
  * Wire in kernel global address entries.  To avoid a race condition
  * between pmap initialization and pmap_growkernel, this procedure
  * should be called after the vmspace is attached to the process
  * but before this pmap is activated.
  */
 void
 pmap_pinit2(pmap)
 	struct pmap *pmap;
 {
 	/* XXX: Remove this stub when no longer called */
 }
 
 static int
 pmap_release_free_page(pmap, p)
 	struct pmap *pmap;
 	vm_page_t p;
 {
 	unsigned *pde = (unsigned *) pmap->pm_pdir;
 	/*
 	 * This code optimizes the case of freeing non-busy
 	 * page-table pages.  Those pages are zero now, and
 	 * might as well be placed directly into the zero queue.
 	 */
 	if (vm_page_sleep_busy(p, FALSE, "pmaprl"))
 		return 0;
 
 	vm_page_busy(p);
 
 	/*
 	 * Remove the page table page from the processes address space.
 	 */
 	pde[p->pindex] = 0;
 	pmap->pm_stats.resident_count--;
 
 	if (p->hold_count)  {
 		panic("pmap_release: freeing held page table page");
 	}
 	/*
 	 * Page directory pages need to have the kernel
 	 * stuff cleared, so they can go into the zero queue also.
 	 */
 	if (p->pindex == PTDPTDI) {
 		bzero(pde + KPTDI, nkpt * PTESIZE);
 #ifdef SMP
 		pde[MPPTDI] = 0;
 #endif
 		pde[APTDPTDI] = 0;
 		pmap_kremove((vm_offset_t) pmap->pm_pdir);
 	}
 
 	if (pmap->pm_ptphint && (pmap->pm_ptphint->pindex == p->pindex))
 		pmap->pm_ptphint = NULL;
 
 	p->wire_count--;
 	cnt.v_wire_count--;
 	vm_page_free_zero(p);
 	return 1;
 }
 
 /*
  * this routine is called if the page table page is not
  * mapped correctly.
  */
 static vm_page_t
 _pmap_allocpte(pmap, ptepindex)
 	pmap_t	pmap;
 	unsigned ptepindex;
 {
 	vm_offset_t pteva, ptepa;
 	vm_page_t m;
 
 	/*
 	 * Find or fabricate a new pagetable page
 	 */
 	m = vm_page_grab(pmap->pm_pteobj, ptepindex,
 			VM_ALLOC_ZERO | VM_ALLOC_RETRY);
 
 	KASSERT(m->queue == PQ_NONE,
 		("_pmap_allocpte: %p->queue != PQ_NONE", m));
 
 	if (m->wire_count == 0)
 		cnt.v_wire_count++;
 	m->wire_count++;
 
 	/*
 	 * Increment the hold count for the page table page
 	 * (denoting a new mapping.)
 	 */
 	m->hold_count++;
 
 	/*
 	 * Map the pagetable page into the process address space, if
 	 * it isn't already there.
 	 */
 
 	pmap->pm_stats.resident_count++;
 
 	ptepa = VM_PAGE_TO_PHYS(m);
 	pmap->pm_pdir[ptepindex] =
 		(pd_entry_t) (ptepa | PG_U | PG_RW | PG_V | PG_A | PG_M);
 
 	/*
 	 * Set the page table hint
 	 */
 	pmap->pm_ptphint = m;
 
 	/*
 	 * Try to use the new mapping, but if we cannot, then
 	 * do it with the routine that maps the page explicitly.
 	 */
 	if ((m->flags & PG_ZERO) == 0) {
 		if ((((unsigned)pmap->pm_pdir[PTDPTDI]) & PG_FRAME) ==
 			(((unsigned) PTDpde) & PG_FRAME)) {
 			pteva = UPT_MIN_ADDRESS + i386_ptob(ptepindex);
 			bzero((caddr_t) pteva, PAGE_SIZE);
 		} else {
 			pmap_zero_page(ptepa);
 		}
 	}
 
 	m->valid = VM_PAGE_BITS_ALL;
 	vm_page_flag_clear(m, PG_ZERO);
 	vm_page_flag_set(m, PG_MAPPED);
 	vm_page_wakeup(m);
 
 	return m;
 }
 
 static vm_page_t
 pmap_allocpte(pmap, va)
 	pmap_t	pmap;
 	vm_offset_t va;
 {
 	unsigned ptepindex;
 	vm_offset_t ptepa;
 	vm_page_t m;
 
 	/*
 	 * Calculate pagetable page index
 	 */
 	ptepindex = va >> PDRSHIFT;
 
 	/*
 	 * Get the page directory entry
 	 */
 	ptepa = (vm_offset_t) pmap->pm_pdir[ptepindex];
 
 	/*
 	 * This supports switching from a 4MB page to a
 	 * normal 4K page.
 	 */
 	if (ptepa & PG_PS) {
 		pmap->pm_pdir[ptepindex] = 0;
 		ptepa = 0;
 		invltlb();
 	}
 
 	/*
 	 * If the page table page is mapped, we just increment the
 	 * hold count, and activate it.
 	 */
 	if (ptepa) {
 		/*
 		 * In order to get the page table page, try the
 		 * hint first.
 		 */
 		if (pmap->pm_ptphint &&
 			(pmap->pm_ptphint->pindex == ptepindex)) {
 			m = pmap->pm_ptphint;
 		} else {
 			m = pmap_page_lookup( pmap->pm_pteobj, ptepindex);
 			pmap->pm_ptphint = m;
 		}
 		m->hold_count++;
 		return m;
 	}
 	/*
 	 * Here if the pte page isn't mapped, or if it has been deallocated.
 	 */
 	return _pmap_allocpte(pmap, ptepindex);
 }
 
 
 /***************************************************
 * Pmap allocation/deallocation routines.
  ***************************************************/
 
 /*
  * Release any resources held by the given physical map.
  * Called when a pmap initialized by pmap_pinit is being released.
  * Should only be called if the map contains no valid mappings.
  */
 void
 pmap_release(pmap)
 	register struct pmap *pmap;
 {
 	vm_page_t p,n,ptdpg;
 	vm_object_t object = pmap->pm_pteobj;
 	int curgeneration;
 
 #if defined(DIAGNOSTIC)
 	if (object->ref_count != 1)
 		panic("pmap_release: pteobj reference count != 1");
 #endif
 	
 	ptdpg = NULL;
 	LIST_REMOVE(pmap, pm_list);
 retry:
 	curgeneration = object->generation;
 	for (p = TAILQ_FIRST(&object->memq); p != NULL; p = n) {
 		n = TAILQ_NEXT(p, listq);
 		if (p->pindex == PTDPTDI) {
 			ptdpg = p;
 			continue;
 		}
 		while (1) {
 			if (!pmap_release_free_page(pmap, p) &&
 				(object->generation != curgeneration))
 				goto retry;
 		}
 	}
 
 	if (ptdpg && !pmap_release_free_page(pmap, ptdpg))
 		goto retry;
 }
 
 static int
 kvm_size(SYSCTL_HANDLER_ARGS)
 {
 	unsigned long ksize = VM_MAX_KERNEL_ADDRESS - KERNBASE;
 
         return sysctl_handle_long(oidp, &ksize, 0, req);
 }
 SYSCTL_PROC(_vm, OID_AUTO, kvm_size, CTLTYPE_LONG|CTLFLAG_RD, 
     0, 0, kvm_size, "IU", "Size of KVM");
 
 static int
 kvm_free(SYSCTL_HANDLER_ARGS)
 {
 	unsigned long kfree = VM_MAX_KERNEL_ADDRESS - kernel_vm_end;
 
         return sysctl_handle_long(oidp, &kfree, 0, req);
 }
 SYSCTL_PROC(_vm, OID_AUTO, kvm_free, CTLTYPE_LONG|CTLFLAG_RD, 
     0, 0, kvm_free, "IU", "Amount of KVM free");
 
 /*
  * grow the number of kernel page table entries, if needed
  */
 void
 pmap_growkernel(vm_offset_t addr)
 {
 	struct pmap *pmap;
 	int s;
 	vm_offset_t ptppaddr;
 	vm_page_t nkpg;
 	pd_entry_t newpdir;
 
 	s = splhigh();
 	if (kernel_vm_end == 0) {
 		kernel_vm_end = KERNBASE;
 		nkpt = 0;
 		while (pdir_pde(PTD, kernel_vm_end)) {
 			kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1);
 			nkpt++;
 		}
 	}
 	addr = (addr + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1);
 	while (kernel_vm_end < addr) {
 		if (pdir_pde(PTD, kernel_vm_end)) {
 			kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1);
 			continue;
 		}
 
 		/*
 		 * This index is bogus, but out of the way
 		 */
 		nkpg = vm_page_alloc(kptobj, nkpt, VM_ALLOC_SYSTEM);
 		if (!nkpg)
 			panic("pmap_growkernel: no memory to grow kernel");
 
 		nkpt++;
 
 		vm_page_wire(nkpg);
 		ptppaddr = VM_PAGE_TO_PHYS(nkpg);
 		pmap_zero_page(ptppaddr);
 		newpdir = (pd_entry_t) (ptppaddr | PG_V | PG_RW | PG_A | PG_M);
 		pdir_pde(PTD, kernel_vm_end) = newpdir;
 
 		LIST_FOREACH(pmap, &allpmaps, pm_list) {
 			*pmap_pde(pmap, kernel_vm_end) = newpdir;
 		}
 		kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1);
 	}
 	splx(s);
 }
 
 /*
  *	Retire the given physical map from service.
  *	Should only be called if the map contains
  *	no valid mappings.
  */
 void
 pmap_destroy(pmap)
 	register pmap_t pmap;
 {
 	int count;
 
 	if (pmap == NULL)
 		return;
 
 	count = --pmap->pm_count;
 	if (count == 0) {
 		pmap_release(pmap);
 		panic("destroying a pmap is not yet implemented");
 	}
 }
 
 /*
  *	Add a reference to the specified pmap.
  */
 void
 pmap_reference(pmap)
 	pmap_t pmap;
 {
 	if (pmap != NULL) {
 		pmap->pm_count++;
 	}
 }
 
 /***************************************************
 * page management routines.
  ***************************************************/
 
 /*
  * free the pv_entry back to the free list
  */
 static PMAP_INLINE void
 free_pv_entry(pv)
 	pv_entry_t pv;
 {
 	pv_entry_count--;
 	zfree(pvzone, pv);
 }
 
 /*
  * get a new pv_entry, allocating a block from the system
  * when needed.
  * the memory allocation is performed bypassing the malloc code
  * because of the possibility of allocations at interrupt time.
  */
 static pv_entry_t
 get_pv_entry(void)
 {
 	pv_entry_count++;
 	if (pv_entry_high_water &&
 		(pv_entry_count > pv_entry_high_water) &&
 		(pmap_pagedaemon_waken == 0)) {
 		pmap_pagedaemon_waken = 1;
 		wakeup (&vm_pages_needed);
 	}
 	return zalloc(pvzone);
 }
 
 /*
  * This routine is very drastic, but can save the system
  * in a pinch.
  */
 void
 pmap_collect()
 {
 	int i;
 	vm_page_t m;
 	static int warningdone=0;
 
 	if (pmap_pagedaemon_waken == 0)
 		return;
 
 	if (warningdone < 5) {
 		printf("pmap_collect: collecting pv entries -- suggest increasing PMAP_SHPGPERPROC\n");
 		warningdone++;
 	}
 
 	for(i = 0; i < vm_page_array_size; i++) {
 		m = &vm_page_array[i];
 		if (m->wire_count || m->hold_count || m->busy ||
 		    (m->flags & PG_BUSY))
 			continue;
 		pmap_remove_all(m);
 	}
 	pmap_pagedaemon_waken = 0;
 }
 	
 
 /*
  * If it is the first entry on the list, it is actually
  * in the header and we must copy the following entry up
  * to the header.  Otherwise we must search the list for
  * the entry.  In either case we free the now unused entry.
  */
 
 static int
 pmap_remove_entry(pmap, m, va)
 	struct pmap *pmap;
 	vm_page_t m;
 	vm_offset_t va;
 {
 	pv_entry_t pv;
 	int rtval;
 	int s;
 
 	s = splvm();
 	if (m->md.pv_list_count < pmap->pm_stats.resident_count) {
 		for (pv = TAILQ_FIRST(&m->md.pv_list);
 			pv;
 			pv = TAILQ_NEXT(pv, pv_list)) {
 			if (pmap == pv->pv_pmap && va == pv->pv_va) 
 				break;
 		}
 	} else {
 		for (pv = TAILQ_FIRST(&pmap->pm_pvlist);
 			pv;
 			pv = TAILQ_NEXT(pv, pv_plist)) {
 			if (va == pv->pv_va) 
 				break;
 		}
 	}
 
 	rtval = 0;
 	if (pv) {
 
 		rtval = pmap_unuse_pt(pmap, va, pv->pv_ptem);
 		TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
 		m->md.pv_list_count--;
 		if (TAILQ_FIRST(&m->md.pv_list) == NULL)
 			vm_page_flag_clear(m, PG_MAPPED | PG_WRITEABLE);
 
 		TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist);
 		free_pv_entry(pv);
 	}
 			
 	splx(s);
 	return rtval;
 }
 
 /*
  * Create a pv entry for page at pa for
  * (pmap, va).
  */
 static void
 pmap_insert_entry(pmap, va, mpte, m)
 	pmap_t pmap;
 	vm_offset_t va;
 	vm_page_t mpte;
 	vm_page_t m;
 {
 
 	int s;
 	pv_entry_t pv;
 
 	s = splvm();
 	pv = get_pv_entry();
 	pv->pv_va = va;
 	pv->pv_pmap = pmap;
 	pv->pv_ptem = mpte;
 
 	TAILQ_INSERT_TAIL(&pmap->pm_pvlist, pv, pv_plist);
 	TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
 	m->md.pv_list_count++;
 
 	splx(s);
 }
 
 /*
  * pmap_remove_pte: do the things to unmap a page in a process
  */
 static int
 pmap_remove_pte(pmap, ptq, va)
 	struct pmap *pmap;
 	unsigned *ptq;
 	vm_offset_t va;
 {
 	unsigned oldpte;
 	vm_page_t m;
 
 	oldpte = atomic_readandclear_int(ptq);
 	if (oldpte & PG_W)
 		pmap->pm_stats.wired_count -= 1;
 	/*
 	 * Machines that don't support invlpg, also don't support
 	 * PG_G.
 	 */
 	if (oldpte & PG_G)
 		invlpg(va);
 	pmap->pm_stats.resident_count -= 1;
 	if (oldpte & PG_MANAGED) {
 		m = PHYS_TO_VM_PAGE(oldpte);
 		if (oldpte & PG_M) {
 #if defined(PMAP_DIAGNOSTIC)
 			if (pmap_nw_modified((pt_entry_t) oldpte)) {
 				printf(
 	"pmap_remove: modified page not writable: va: 0x%x, pte: 0x%x\n",
 				    va, oldpte);
 			}
 #endif
 			if (pmap_track_modified(va))
 				vm_page_dirty(m);
 		}
 		if (oldpte & PG_A)
 			vm_page_flag_set(m, PG_REFERENCED);
 		return pmap_remove_entry(pmap, m, va);
 	} else {
 		return pmap_unuse_pt(pmap, va, NULL);
 	}
 
 	return 0;
 }
 
 /*
  * Remove a single page from a process address space
  */
 static void
 pmap_remove_page(pmap, va)
 	struct pmap *pmap;
 	register vm_offset_t va;
 {
 	register unsigned *ptq;
 
 	/*
 	 * if there is no pte for this address, just skip it!!!
 	 */
 	if (*pmap_pde(pmap, va) == 0) {
 		return;
 	}
 
 	/*
 	 * get a local va for mappings for this pmap.
 	 */
 	ptq = get_ptbase(pmap) + i386_btop(va);
 	if (*ptq) {
 		(void) pmap_remove_pte(pmap, ptq, va);
 		pmap_TLB_invalidate(pmap, va);
 	}
 	return;
 }
 
 /*
  *	Remove the given range of addresses from the specified map.
  *
  *	It is assumed that the start and end are properly
  *	rounded to the page size.
  */
 void
 pmap_remove(pmap, sva, eva)
 	struct pmap *pmap;
 	register vm_offset_t sva;
 	register vm_offset_t eva;
 {
 	register unsigned *ptbase;
 	vm_offset_t pdnxt;
 	vm_offset_t ptpaddr;
 	vm_offset_t sindex, eindex;
 	int anyvalid;
 
 	if (pmap == NULL)
 		return;
 
 	if (pmap->pm_stats.resident_count == 0)
 		return;
 
 	/*
 	 * special handling of removing one page.  a very
 	 * common operation and easy to short circuit some
 	 * code.
 	 */
 	if (((sva + PAGE_SIZE) == eva) && 
 		(((unsigned) pmap->pm_pdir[(sva >> PDRSHIFT)] & PG_PS) == 0)) {
 		pmap_remove_page(pmap, sva);
 		return;
 	}
 
 	anyvalid = 0;
 
 	/*
 	 * Get a local virtual address for the mappings that are being
 	 * worked with.
 	 */
 	ptbase = get_ptbase(pmap);
 
 	sindex = i386_btop(sva);
 	eindex = i386_btop(eva);
 
 	for (; sindex < eindex; sindex = pdnxt) {
 		unsigned pdirindex;
 
 		/*
 		 * Calculate index for next page table.
 		 */
 		pdnxt = ((sindex + NPTEPG) & ~(NPTEPG - 1));
 		if (pmap->pm_stats.resident_count == 0)
 			break;
 
 		pdirindex = sindex / NPDEPG;
 		if (((ptpaddr = (unsigned) pmap->pm_pdir[pdirindex]) & PG_PS) != 0) {
 			pmap->pm_pdir[pdirindex] = 0;
 			pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE;
 			anyvalid++;
 			continue;
 		}
 
 		/*
 		 * Weed out invalid mappings. Note: we assume that the page
 		 * directory table is always allocated, and in kernel virtual.
 		 */
 		if (ptpaddr == 0)
 			continue;
 
 		/*
 		 * Limit our scan to either the end of the va represented
 		 * by the current page table page, or to the end of the
 		 * range being removed.
 		 */
 		if (pdnxt > eindex) {
 			pdnxt = eindex;
 		}
 
 		for ( ;sindex != pdnxt; sindex++) {
 			vm_offset_t va;
 			if (ptbase[sindex] == 0) {
 				continue;
 			}
 			va = i386_ptob(sindex);
 			
 			anyvalid++;
 			if (pmap_remove_pte(pmap,
 				ptbase + sindex, va))
 				break;
 		}
 	}
 
 	if (anyvalid)
 		pmap_TLB_invalidate_all(pmap);
 }
 
 /*
  *	Routine:	pmap_remove_all
  *	Function:
  *		Removes this physical page from
  *		all physical maps in which it resides.
  *		Reflects back modify bits to the pager.
  *
  *	Notes:
  *		Original versions of this routine were very
  *		inefficient because they iteratively called
  *		pmap_remove (slow...)
  */
 
 static void
 pmap_remove_all(m)
 	vm_page_t m;
 {
 	register pv_entry_t pv;
 	register unsigned *pte, tpte;
 	int s;
 
 #if defined(PMAP_DIAGNOSTIC)
 	/*
 	 * XXX this makes pmap_page_protect(NONE) illegal for non-managed
 	 * pages!
 	 */
 	if (!pmap_initialized || (m->flags & PG_FICTITIOUS)) {
 		panic("pmap_page_protect: illegal for unmanaged page, va: 0x%x", VM_PAGE_TO_PHYS(m));
 	}
 #endif
 
 	s = splvm();
 	while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) {
 		pv->pv_pmap->pm_stats.resident_count--;
 
 		pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va);
 
 		tpte = atomic_readandclear_int(pte);
 		if (tpte & PG_W)
 			pv->pv_pmap->pm_stats.wired_count--;
 
 		if (tpte & PG_A)
 			vm_page_flag_set(m, PG_REFERENCED);
 
 		/*
 		 * Update the vm_page_t clean and reference bits.
 		 */
 		if (tpte & PG_M) {
 #if defined(PMAP_DIAGNOSTIC)
 			if (pmap_nw_modified((pt_entry_t) tpte)) {
 				printf(
 	"pmap_remove_all: modified page not writable: va: 0x%x, pte: 0x%x\n",
 				    pv->pv_va, tpte);
 			}
 #endif
 			if (pmap_track_modified(pv->pv_va))
 				vm_page_dirty(m);
 		}
 		pmap_TLB_invalidate(pv->pv_pmap, pv->pv_va);
 
 		TAILQ_REMOVE(&pv->pv_pmap->pm_pvlist, pv, pv_plist);
 		TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
 		m->md.pv_list_count--;
 		pmap_unuse_pt(pv->pv_pmap, pv->pv_va, pv->pv_ptem);
 		free_pv_entry(pv);
 	}
 
 	vm_page_flag_clear(m, PG_MAPPED | PG_WRITEABLE);
 
 	splx(s);
 }
 
 /*
  *	Set the physical protection on the
  *	specified range of this map as requested.
  */
 void
 pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot)
 {
 	register unsigned *ptbase;
 	vm_offset_t pdnxt, ptpaddr;
 	vm_pindex_t sindex, eindex;
 	int anychanged;
 
 	if (pmap == NULL)
 		return;
 
 	if ((prot & VM_PROT_READ) == VM_PROT_NONE) {
 		pmap_remove(pmap, sva, eva);
 		return;
 	}
 
 	if (prot & VM_PROT_WRITE)
 		return;
 
 	anychanged = 0;
 
 	ptbase = get_ptbase(pmap);
 
 	sindex = i386_btop(sva);
 	eindex = i386_btop(eva);
 
 	for (; sindex < eindex; sindex = pdnxt) {
 
 		unsigned pdirindex;
 
 		pdnxt = ((sindex + NPTEPG) & ~(NPTEPG - 1));
 
 		pdirindex = sindex / NPDEPG;
 		if (((ptpaddr = (unsigned) pmap->pm_pdir[pdirindex]) & PG_PS) != 0) {
 			(unsigned) pmap->pm_pdir[pdirindex] &= ~(PG_M|PG_RW);
 			pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE;
 			anychanged++;
 			continue;
 		}
 
 		/*
 		 * Weed out invalid mappings. Note: we assume that the page
 		 * directory table is always allocated, and in kernel virtual.
 		 */
 		if (ptpaddr == 0)
 			continue;
 
 		if (pdnxt > eindex) {
 			pdnxt = eindex;
 		}
 
 		for (; sindex != pdnxt; sindex++) {
 
 			unsigned pbits;
 			vm_page_t m;
 
 			pbits = ptbase[sindex];
 
 			if (pbits & PG_MANAGED) {
 				m = NULL;
 				if (pbits & PG_A) {
 					m = PHYS_TO_VM_PAGE(pbits);
 					vm_page_flag_set(m, PG_REFERENCED);
 					pbits &= ~PG_A;
 				}
 				if (pbits & PG_M) {
 					if (pmap_track_modified(i386_ptob(sindex))) {
 						if (m == NULL)
 							m = PHYS_TO_VM_PAGE(pbits);
 						vm_page_dirty(m);
 						pbits &= ~PG_M;
 					}
 				}
 			}
 
 			pbits &= ~PG_RW;
 
 			if (pbits != ptbase[sindex]) {
 				ptbase[sindex] = pbits;
 				anychanged = 1;
 			}
 		}
 	}
 	if (anychanged)
 		pmap_TLB_invalidate_all(pmap);
 }
 
 /*
  *	Insert the given physical page (p) at
  *	the specified virtual address (v) in the
  *	target physical map with the protection requested.
  *
  *	If specified, the page will be wired down, meaning
  *	that the related pte can not be reclaimed.
  *
  *	NB:  This is the only routine which MAY NOT lazy-evaluate
  *	or lose information.  That is, this routine must actually
  *	insert this page into the given map NOW.
  */
 void
 pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
 	   boolean_t wired)
 {
 	vm_offset_t pa;
 	register unsigned *pte;
 	vm_offset_t opa;
 	vm_offset_t origpte, newpte;
 	vm_page_t mpte;
 
 	if (pmap == NULL)
 		return;
 
 	va &= PG_FRAME;
 #ifdef PMAP_DIAGNOSTIC
 	if (va > VM_MAX_KERNEL_ADDRESS)
 		panic("pmap_enter: toobig");
 	if ((va >= UPT_MIN_ADDRESS) && (va < UPT_MAX_ADDRESS))
 		panic("pmap_enter: invalid to pmap_enter page table pages (va: 0x%x)", va);
 #endif
 
 	mpte = NULL;
 	/*
 	 * In the case that a page table page is not
 	 * resident, we are creating it here.
 	 */
 	if (va < UPT_MIN_ADDRESS) {
 		mpte = pmap_allocpte(pmap, va);
 	}
 #if 0 && defined(PMAP_DIAGNOSTIC)
 	else {
 		vm_offset_t *pdeaddr = (vm_offset_t *)pmap_pde(pmap, va);
 		if (((origpte = (vm_offset_t) *pdeaddr) & PG_V) == 0) { 
 			panic("pmap_enter: invalid kernel page table page(0), pdir=%p, pde=%p, va=%p\n",
 				pmap->pm_pdir[PTDPTDI], origpte, va);
 		}
 		if (smp_active) {
 			pdeaddr = (vm_offset_t *) IdlePTDS[PCPU_GET(cpuid)];
 			if (((newpte = pdeaddr[va >> PDRSHIFT]) & PG_V) == 0) {
 				if ((vm_offset_t) my_idlePTD != (vm_offset_t) vtophys(pdeaddr))
 					printf("pde mismatch: %x, %x\n", my_idlePTD, pdeaddr);
 				printf("cpuid: %d, pdeaddr: 0x%x\n", PCPU_GET(cpuid), pdeaddr);
 				panic("pmap_enter: invalid kernel page table page(1), pdir=%p, npde=%p, pde=%p, va=%p\n",
 					pmap->pm_pdir[PTDPTDI], newpte, origpte, va);
 			}
 		}
 	}
 #endif
 
 	pte = pmap_pte(pmap, va);
 
 	/*
 	 * Page Directory table entry not valid, we need a new PT page
 	 */
 	if (pte == NULL) {
 		panic("pmap_enter: invalid page directory, pdir=%p, va=0x%x\n",
 			(void *)pmap->pm_pdir[PTDPTDI], va);
 	}
 
 	pa = VM_PAGE_TO_PHYS(m) & PG_FRAME;
 	origpte = *(vm_offset_t *)pte;
 	opa = origpte & PG_FRAME;
 
 	if (origpte & PG_PS)
 		panic("pmap_enter: attempted pmap_enter on 4MB page");
 
 	/*
 	 * Mapping has not changed, must be protection or wiring change.
 	 */
 	if (origpte && (opa == pa)) {
 		/*
 		 * Wiring change, just update stats. We don't worry about
 		 * wiring PT pages as they remain resident as long as there
 		 * are valid mappings in them. Hence, if a user page is wired,
 		 * the PT page will be also.
 		 */
 		if (wired && ((origpte & PG_W) == 0))
 			pmap->pm_stats.wired_count++;
 		else if (!wired && (origpte & PG_W))
 			pmap->pm_stats.wired_count--;
 
 #if defined(PMAP_DIAGNOSTIC)
 		if (pmap_nw_modified((pt_entry_t) origpte)) {
 			printf(
 	"pmap_enter: modified page not writable: va: 0x%x, pte: 0x%x\n",
 			    va, origpte);
 		}
 #endif
 
 		/*
 		 * Remove extra pte reference
 		 */
 		if (mpte)
 			mpte->hold_count--;
 
 		if ((prot & VM_PROT_WRITE) && (origpte & PG_V)) {
 			if ((origpte & PG_RW) == 0) {
 				*pte |= PG_RW;
 #ifdef SMP
 				cpu_invlpg((void *)va);
 				if (pmap->pm_active & PCPU_GET(other_cpus))
 					smp_invltlb();
 #else
 				invltlb_1pg(va);
 #endif
 			}
 			return;
 		}
 
 		/*
 		 * We might be turning off write access to the page,
 		 * so we go ahead and sense modify status.
 		 */
 		if (origpte & PG_MANAGED) {
 			if ((origpte & PG_M) && pmap_track_modified(va)) {
 				vm_page_t om;
 				om = PHYS_TO_VM_PAGE(opa);
 				vm_page_dirty(om);
 			}
 			pa |= PG_MANAGED;
 		}
 		goto validate;
 	} 
 	/*
 	 * Mapping has changed, invalidate old range and fall through to
 	 * handle validating new mapping.
 	 */
 	if (opa) {
 		int err;
 		err = pmap_remove_pte(pmap, pte, va);
 		if (err)
 			panic("pmap_enter: pte vanished, va: 0x%x", va);
 	}
 
 	/*
 	 * Enter on the PV list if part of our managed memory. Note that we
 	 * raise IPL while manipulating pv_table since pmap_enter can be
 	 * called at interrupt time.
 	 */
 	if (pmap_initialized && 
 	    (m->flags & (PG_FICTITIOUS|PG_UNMANAGED)) == 0) {
 		pmap_insert_entry(pmap, va, mpte, m);
 		pa |= PG_MANAGED;
 	}
 
 	/*
 	 * Increment counters
 	 */
 	pmap->pm_stats.resident_count++;
 	if (wired)
 		pmap->pm_stats.wired_count++;
 
 validate:
 	/*
 	 * Now validate mapping with desired protection/wiring.
 	 */
 	newpte = (vm_offset_t) (pa | pte_prot(pmap, prot) | PG_V);
 
 	if (wired)
 		newpte |= PG_W;
 	if (va < UPT_MIN_ADDRESS)
 		newpte |= PG_U;
 	if (pmap == kernel_pmap)
 		newpte |= pgeflag;
 
 	/*
 	 * if the mapping or permission bits are different, we need
 	 * to update the pte.
 	 */
 	if ((origpte & ~(PG_M|PG_A)) != newpte) {
 		*pte = newpte | PG_A;
 		/*if (origpte)*/ {
 #ifdef SMP
 			cpu_invlpg((void *)va);
 			if (pmap->pm_active & PCPU_GET(other_cpus))
 				smp_invltlb();
 #else
 			invltlb_1pg(va);
 #endif
 		}
 	}
 }
 
 /*
  * this code makes some *MAJOR* assumptions:
  * 1. Current pmap & pmap exists.
  * 2. Not wired.
  * 3. Read access.
  * 4. No page table pages.
  * 5. Tlbflush is deferred to calling procedure.
  * 6. Page IS managed.
  * but is *MUCH* faster than pmap_enter...
  */
 
 static vm_page_t
 pmap_enter_quick(pmap, va, m, mpte)
 	register pmap_t pmap;
 	vm_offset_t va;
 	vm_page_t m;
 	vm_page_t mpte;
 {
 	unsigned *pte;
 	vm_offset_t pa;
 
 	/*
 	 * In the case that a page table page is not
 	 * resident, we are creating it here.
 	 */
 	if (va < UPT_MIN_ADDRESS) {
 		unsigned ptepindex;
 		vm_offset_t ptepa;
 
 		/*
 		 * Calculate pagetable page index
 		 */
 		ptepindex = va >> PDRSHIFT;
 		if (mpte && (mpte->pindex == ptepindex)) {
 			mpte->hold_count++;
 		} else {
 retry:
 			/*
 			 * Get the page directory entry
 			 */
 			ptepa = (vm_offset_t) pmap->pm_pdir[ptepindex];
 
 			/*
 			 * If the page table page is mapped, we just increment
 			 * the hold count, and activate it.
 			 */
 			if (ptepa) {
 				if (ptepa & PG_PS)
 					panic("pmap_enter_quick: unexpected mapping into 4MB page");
 				if (pmap->pm_ptphint &&
 					(pmap->pm_ptphint->pindex == ptepindex)) {
 					mpte = pmap->pm_ptphint;
 				} else {
 					mpte = pmap_page_lookup( pmap->pm_pteobj, ptepindex);
 					pmap->pm_ptphint = mpte;
 				}
 				if (mpte == NULL)
 					goto retry;
 				mpte->hold_count++;
 			} else {
 				mpte = _pmap_allocpte(pmap, ptepindex);
 			}
 		}
 	} else {
 		mpte = NULL;
 	}
 
 	/*
 	 * This call to vtopte makes the assumption that we are
 	 * entering the page into the current pmap.  In order to support
 	 * quick entry into any pmap, one would likely use pmap_pte_quick.
 	 * But that isn't as quick as vtopte.
 	 */
 	pte = (unsigned *)vtopte(va);
 	if (*pte) {
 		if (mpte)
 			pmap_unwire_pte_hold(pmap, mpte);
 		return 0;
 	}
 
 	/*
 	 * Enter on the PV list if part of our managed memory. Note that we
 	 * raise IPL while manipulating pv_table since pmap_enter can be
 	 * called at interrupt time.
 	 */
 	if ((m->flags & (PG_FICTITIOUS|PG_UNMANAGED)) == 0)
 		pmap_insert_entry(pmap, va, mpte, m);
 
 	/*
 	 * Increment counters
 	 */
 	pmap->pm_stats.resident_count++;
 
 	pa = VM_PAGE_TO_PHYS(m);
 
 	/*
 	 * Now validate mapping with RO protection
 	 */
 	if (m->flags & (PG_FICTITIOUS|PG_UNMANAGED))
 		*pte = pa | PG_V | PG_U;
 	else
 		*pte = pa | PG_V | PG_U | PG_MANAGED;
 
 	return mpte;
 }
 
 /*
  * Make a temporary mapping for a physical address.  This is only intended
  * to be used for panic dumps.
  */
 void *
 pmap_kenter_temporary(vm_offset_t pa, int i)
 {
 	pmap_kenter((vm_offset_t)crashdumpmap + (i * PAGE_SIZE), pa);
 	return ((void *)crashdumpmap);
 }
 
 #define MAX_INIT_PT (96)
 /*
  * pmap_object_init_pt preloads the ptes for a given object
  * into the specified pmap.  This eliminates the blast of soft
  * faults on process startup and immediately after an mmap.
  */
 void
 pmap_object_init_pt(pmap, addr, object, pindex, size, limit)
 	pmap_t pmap;
 	vm_offset_t addr;
 	vm_object_t object;
 	vm_pindex_t pindex;
 	vm_size_t size;
 	int limit;
 {
 	vm_offset_t tmpidx;
 	int psize;
 	vm_page_t p, mpte;
 	int objpgs;
 
 	if (pmap == NULL || object == NULL)
 		return;
 
 	/*
 	 * This code maps large physical mmap regions into the
 	 * processor address space.  Note that some shortcuts
 	 * are taken, but the code works.
 	 */
 	if (pseflag &&
 		(object->type == OBJT_DEVICE) &&
 		((addr & (NBPDR - 1)) == 0) &&
 		((size & (NBPDR - 1)) == 0) ) {
 		int i;
 		vm_page_t m[1];
 		unsigned int ptepindex;
 		int npdes;
 		vm_offset_t ptepa;
 
 		if (pmap->pm_pdir[ptepindex = (addr >> PDRSHIFT)])
 			return;
 
 retry:
 		p = vm_page_lookup(object, pindex);
 		if (p && vm_page_sleep_busy(p, FALSE, "init4p"))
 			goto retry;
 
 		if (p == NULL) {
 			p = vm_page_alloc(object, pindex, VM_ALLOC_NORMAL);
 			if (p == NULL)
 				return;
 			m[0] = p;
 
 			if (vm_pager_get_pages(object, m, 1, 0) != VM_PAGER_OK) {
 				vm_page_free(p);
 				return;
 			}
 
 			p = vm_page_lookup(object, pindex);
 			vm_page_wakeup(p);
 		}
 
 		ptepa = (vm_offset_t) VM_PAGE_TO_PHYS(p);
 		if (ptepa & (NBPDR - 1)) {
 			return;
 		}
 
 		p->valid = VM_PAGE_BITS_ALL;
 
 		pmap->pm_stats.resident_count += size >> PAGE_SHIFT;
 		npdes = size >> PDRSHIFT;
 		for(i=0;i<npdes;i++) {
 			pmap->pm_pdir[ptepindex] =
 				(pd_entry_t) (ptepa | PG_U | PG_RW | PG_V | PG_PS);
 			ptepa += NBPDR;
 			ptepindex += 1;
 		}
 		vm_page_flag_set(p, PG_MAPPED);
 		invltlb();
 		return;
 	}
 
 	psize = i386_btop(size);
 
 	if ((object->type != OBJT_VNODE) ||
 		(limit && (psize > MAX_INIT_PT) &&
 			(object->resident_page_count > MAX_INIT_PT))) {
 		return;
 	}
 
 	if (psize + pindex > object->size) {
 		if (object->size < pindex)
 			return;		  
 		psize = object->size - pindex;
 	}
 
 	mpte = NULL;
 	/*
 	 * if we are processing a major portion of the object, then scan the
 	 * entire thing.
 	 */
 	if (psize > (object->resident_page_count >> 2)) {
 		objpgs = psize;
 
 		for (p = TAILQ_FIRST(&object->memq);
 		    ((objpgs > 0) && (p != NULL));
 		    p = TAILQ_NEXT(p, listq)) {
 
 			tmpidx = p->pindex;
 			if (tmpidx < pindex) {
 				continue;
 			}
 			tmpidx -= pindex;
 			if (tmpidx >= psize) {
 				continue;
 			}
 			if (((p->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) &&
 				(p->busy == 0) &&
 			    (p->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) {
 				if ((p->queue - p->pc) == PQ_CACHE)
 					vm_page_deactivate(p);
 				vm_page_busy(p);
 				mpte = pmap_enter_quick(pmap, 
 					addr + i386_ptob(tmpidx), p, mpte);
 				vm_page_flag_set(p, PG_MAPPED);
 				vm_page_wakeup(p);
 			}
 			objpgs -= 1;
 		}
 	} else {
 		/*
 		 * else lookup the pages one-by-one.
 		 */
 		for (tmpidx = 0; tmpidx < psize; tmpidx += 1) {
 			p = vm_page_lookup(object, tmpidx + pindex);
 			if (p &&
 			    ((p->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) &&
 				(p->busy == 0) &&
 			    (p->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) {
 				if ((p->queue - p->pc) == PQ_CACHE)
 					vm_page_deactivate(p);
 				vm_page_busy(p);
 				mpte = pmap_enter_quick(pmap, 
 					addr + i386_ptob(tmpidx), p, mpte);
 				vm_page_flag_set(p, PG_MAPPED);
 				vm_page_wakeup(p);
 			}
 		}
 	}
 	return;
 }
 
 /*
  * pmap_prefault provides a quick way of clustering
  * pagefaults into a processes address space.  It is a "cousin"
  * of pmap_object_init_pt, except it runs at page fault time instead
  * of mmap time.
  */
 #define PFBAK 4
 #define PFFOR 4
 #define PAGEORDER_SIZE (PFBAK+PFFOR)
 
 static int pmap_prefault_pageorder[] = {
 	-PAGE_SIZE, PAGE_SIZE,
 	-2 * PAGE_SIZE, 2 * PAGE_SIZE,
 	-3 * PAGE_SIZE, 3 * PAGE_SIZE
 	-4 * PAGE_SIZE, 4 * PAGE_SIZE
 };
 
 void
 pmap_prefault(pmap, addra, entry)
 	pmap_t pmap;
 	vm_offset_t addra;
 	vm_map_entry_t entry;
 {
 	int i;
 	vm_offset_t starta;
 	vm_offset_t addr;
 	vm_pindex_t pindex;
 	vm_page_t m, mpte;
 	vm_object_t object;
 
 	if (!curproc || (pmap != vmspace_pmap(curproc->p_vmspace)))
 		return;
 
 	object = entry->object.vm_object;
 
 	starta = addra - PFBAK * PAGE_SIZE;
 	if (starta < entry->start) {
 		starta = entry->start;
 	} else if (starta > addra) {
 		starta = 0;
 	}
 
 	mpte = NULL;
 	for (i = 0; i < PAGEORDER_SIZE; i++) {
 		vm_object_t lobject;
 		unsigned *pte;
 
 		addr = addra + pmap_prefault_pageorder[i];
 		if (addr > addra + (PFFOR * PAGE_SIZE))
 			addr = 0;
 
 		if (addr < starta || addr >= entry->end)
 			continue;
 
 		if ((*pmap_pde(pmap, addr)) == NULL) 
 			continue;
 
 		pte = (unsigned *) vtopte(addr);
 		if (*pte)
 			continue;
 
 		pindex = ((addr - entry->start) + entry->offset) >> PAGE_SHIFT;
 		lobject = object;
 		for (m = vm_page_lookup(lobject, pindex);
 		    (!m && (lobject->type == OBJT_DEFAULT) && (lobject->backing_object));
 		    lobject = lobject->backing_object) {
 			if (lobject->backing_object_offset & PAGE_MASK)
 				break;
 			pindex += (lobject->backing_object_offset >> PAGE_SHIFT);
 			m = vm_page_lookup(lobject->backing_object, pindex);
 		}
 
 		/*
 		 * give-up when a page is not in memory
 		 */
 		if (m == NULL)
 			break;
 
 		if (((m->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) &&
 			(m->busy == 0) &&
 		    (m->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) {
 
 			if ((m->queue - m->pc) == PQ_CACHE) {
 				vm_page_deactivate(m);
 			}
 			vm_page_busy(m);
 			mpte = pmap_enter_quick(pmap, addr, m, mpte);
 			vm_page_flag_set(m, PG_MAPPED);
 			vm_page_wakeup(m);
 		}
 	}
 }
 
 /*
  *	Routine:	pmap_change_wiring
  *	Function:	Change the wiring attribute for a map/virtual-address
  *			pair.
  *	In/out conditions:
  *			The mapping must already exist in the pmap.
  */
 void
 pmap_change_wiring(pmap, va, wired)
 	register pmap_t pmap;
 	vm_offset_t va;
 	boolean_t wired;
 {
 	register unsigned *pte;
 
 	if (pmap == NULL)
 		return;
 
 	pte = pmap_pte(pmap, va);
 
 	if (wired && !pmap_pte_w(pte))
 		pmap->pm_stats.wired_count++;
 	else if (!wired && pmap_pte_w(pte))
 		pmap->pm_stats.wired_count--;
 
 	/*
 	 * Wiring is not a hardware characteristic so there is no need to
 	 * invalidate TLB.
 	 */
 	pmap_pte_set_w(pte, wired);
 }
 
 
 
 /*
  *	Copy the range specified by src_addr/len
  *	from the source map to the range dst_addr/len
  *	in the destination map.
  *
  *	This routine is only advisory and need not do anything.
  */
 
 void
 pmap_copy(dst_pmap, src_pmap, dst_addr, len, src_addr)
 	pmap_t dst_pmap, src_pmap;
 	vm_offset_t dst_addr;
 	vm_size_t len;
 	vm_offset_t src_addr;
 {
 	vm_offset_t addr;
 	vm_offset_t end_addr = src_addr + len;
 	vm_offset_t pdnxt;
 	unsigned src_frame, dst_frame;
 	vm_page_t m;
 
 	if (dst_addr != src_addr)
 		return;
 
 	src_frame = ((unsigned) src_pmap->pm_pdir[PTDPTDI]) & PG_FRAME;
 	if (src_frame != (((unsigned) PTDpde) & PG_FRAME)) {
 		return;
 	}
 
 	dst_frame = ((unsigned) dst_pmap->pm_pdir[PTDPTDI]) & PG_FRAME;
 	if (dst_frame != (((unsigned) APTDpde) & PG_FRAME)) {
 		APTDpde = (pd_entry_t) (dst_frame | PG_RW | PG_V);
 #if defined(SMP)
 		/* The page directory is not shared between CPUs */
 		cpu_invltlb();
 #else
 		invltlb();
 #endif
 	}
 
 	for(addr = src_addr; addr < end_addr; addr = pdnxt) {
 		unsigned *src_pte, *dst_pte;
 		vm_page_t dstmpte, srcmpte;
 		vm_offset_t srcptepaddr;
 		unsigned ptepindex;
 
 		if (addr >= UPT_MIN_ADDRESS)
 			panic("pmap_copy: invalid to pmap_copy page tables\n");
 
 		/*
 		 * Don't let optional prefaulting of pages make us go
 		 * way below the low water mark of free pages or way
 		 * above high water mark of used pv entries.
 		 */
 		if (cnt.v_free_count < cnt.v_free_reserved ||
 		    pv_entry_count > pv_entry_high_water)
 			break;
 		
 		pdnxt = ((addr + PAGE_SIZE*NPTEPG) & ~(PAGE_SIZE*NPTEPG - 1));
 		ptepindex = addr >> PDRSHIFT;
 
 		srcptepaddr = (vm_offset_t) src_pmap->pm_pdir[ptepindex];
 		if (srcptepaddr == 0)
 			continue;
 			
 		if (srcptepaddr & PG_PS) {
 			if (dst_pmap->pm_pdir[ptepindex] == 0) {
 				dst_pmap->pm_pdir[ptepindex] = (pd_entry_t) srcptepaddr;
 				dst_pmap->pm_stats.resident_count += NBPDR / PAGE_SIZE;
 			}
 			continue;
 		}
 
 		srcmpte = vm_page_lookup(src_pmap->pm_pteobj, ptepindex);
 		if ((srcmpte == NULL) ||
 			(srcmpte->hold_count == 0) || (srcmpte->flags & PG_BUSY))
 			continue;
 
 		if (pdnxt > end_addr)
 			pdnxt = end_addr;
 
 		src_pte = (unsigned *) vtopte(addr);
 		dst_pte = (unsigned *) avtopte(addr);
 		while (addr < pdnxt) {
 			unsigned ptetemp;
 			ptetemp = *src_pte;
 			/*
 			 * we only virtual copy managed pages
 			 */
 			if ((ptetemp & PG_MANAGED) != 0) {
 				/*
 				 * We have to check after allocpte for the
 				 * pte still being around...  allocpte can
 				 * block.
 				 */
 				dstmpte = pmap_allocpte(dst_pmap, addr);
 				if ((*dst_pte == 0) && (ptetemp = *src_pte)) {
 					/*
 					 * Clear the modified and
 					 * accessed (referenced) bits
 					 * during the copy.
 					 */
 					m = PHYS_TO_VM_PAGE(ptetemp);
 					*dst_pte = ptetemp & ~(PG_M | PG_A);
 					dst_pmap->pm_stats.resident_count++;
 					pmap_insert_entry(dst_pmap, addr,
 						dstmpte, m);
 	 			} else {
 					pmap_unwire_pte_hold(dst_pmap, dstmpte);
 				}
 				if (dstmpte->hold_count >= srcmpte->hold_count)
 					break;
 			}
 			addr += PAGE_SIZE;
 			src_pte++;
 			dst_pte++;
 		}
 	}
 }	
 
 /*
  *	Routine:	pmap_kernel
  *	Function:
  *		Returns the physical map handle for the kernel.
  */
 pmap_t
 pmap_kernel()
 {
 	return (kernel_pmap);
 }
 
 /*
  *	pmap_zero_page zeros the specified hardware page by mapping 
  *	the page into KVM and using bzero to clear its contents.
  */
 void
 pmap_zero_page(phys)
 	vm_offset_t phys;
 {
 
 	if (*(int *) CMAP2)
 		panic("pmap_zero_page: CMAP2 busy");
 
 	*(int *) CMAP2 = PG_V | PG_RW | (phys & PG_FRAME) | PG_A | PG_M;
 	invltlb_1pg((vm_offset_t)CADDR2);
 
 #if defined(I686_CPU)
 	if (cpu_class == CPUCLASS_686)
 		i686_pagezero(CADDR2);
 	else
 #endif
 		bzero(CADDR2, PAGE_SIZE);
 	*(int *) CMAP2 = 0;
 }
 
 /*
  *	pmap_zero_page_area zeros the specified hardware page by mapping 
  *	the page into KVM and using bzero to clear its contents.
  *
  *	off and size may not cover an area beyond a single hardware page.
  */
 void
 pmap_zero_page_area(phys, off, size)
 	vm_offset_t phys;
 	int off;
 	int size;
 {
 
 	if (*(int *) CMAP2)
 		panic("pmap_zero_page: CMAP2 busy");
 
 	*(int *) CMAP2 = PG_V | PG_RW | (phys & PG_FRAME) | PG_A | PG_M;
 	invltlb_1pg((vm_offset_t)CADDR2);
 
 #if defined(I686_CPU)
 	if (cpu_class == CPUCLASS_686 && off == 0 && size == PAGE_SIZE)
 		i686_pagezero(CADDR2);
 	else
 #endif
 		bzero((char *)CADDR2 + off, size);
 	*(int *) CMAP2 = 0;
 }
 
 /*
  *	pmap_copy_page copies the specified (machine independent)
  *	page by mapping the page into virtual memory and using
  *	bcopy to copy the page, one machine dependent page at a
  *	time.
  */
 void
 pmap_copy_page(src, dst)
 	vm_offset_t src;
 	vm_offset_t dst;
 {
 
 	if (*(int *) CMAP1)
 		panic("pmap_copy_page: CMAP1 busy");
 	if (*(int *) CMAP2)
 		panic("pmap_copy_page: CMAP2 busy");
 
 	*(int *) CMAP1 = PG_V | (src & PG_FRAME) | PG_A;
 	*(int *) CMAP2 = PG_V | PG_RW | (dst & PG_FRAME) | PG_A | PG_M;
 #ifdef I386_CPU
 	invltlb();
 #else
 	invlpg((u_int)CADDR1);
 	invlpg((u_int)CADDR2);
 #endif
 
 	bcopy(CADDR1, CADDR2, PAGE_SIZE);
 
 	*(int *) CMAP1 = 0;
 	*(int *) CMAP2 = 0;
 }
 
 
 /*
  *	Routine:	pmap_pageable
  *	Function:
  *		Make the specified pages (by pmap, offset)
  *		pageable (or not) as requested.
  *
  *		A page which is not pageable may not take
  *		a fault; therefore, its page table entry
  *		must remain valid for the duration.
  *
  *		This routine is merely advisory; pmap_enter
  *		will specify that these pages are to be wired
  *		down (or not) as appropriate.
  */
 void
 pmap_pageable(pmap, sva, eva, pageable)
 	pmap_t pmap;
 	vm_offset_t sva, eva;
 	boolean_t pageable;
 {
 }
 
 /*
  * this routine returns true if a physical page resides
  * in the given pmap.
  */
 boolean_t
 pmap_page_exists(pmap, m)
 	pmap_t pmap;
 	vm_page_t m;
 {
 	register pv_entry_t pv;
 	int s;
 
 	if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
 		return FALSE;
 
 	s = splvm();
 
 	/*
 	 * Not found, check current mappings returning immediately if found.
 	 */
 	for (pv = TAILQ_FIRST(&m->md.pv_list);
 		pv;
 		pv = TAILQ_NEXT(pv, pv_list)) {
 		if (pv->pv_pmap == pmap) {
 			splx(s);
 			return TRUE;
 		}
 	}
 	splx(s);
 	return (FALSE);
 }
 
 #define PMAP_REMOVE_PAGES_CURPROC_ONLY
 /*
  * Remove all pages from specified address space
  * this aids process exit speeds.  Also, this code
  * is special cased for current process only, but
  * can have the more generic (and slightly slower)
  * mode enabled.  This is much faster than pmap_remove
  * in the case of running down an entire address space.
  */
 void
 pmap_remove_pages(pmap, sva, eva)
 	pmap_t pmap;
 	vm_offset_t sva, eva;
 {
 	unsigned *pte, tpte;
 	pv_entry_t pv, npv;
 	int s;
 	vm_page_t m;
 
 #ifdef PMAP_REMOVE_PAGES_CURPROC_ONLY
 	if (!curproc || (pmap != vmspace_pmap(curproc->p_vmspace))) {
 		printf("warning: pmap_remove_pages called with non-current pmap\n");
 		return;
 	}
 #endif
 
 	s = splvm();
 	for(pv = TAILQ_FIRST(&pmap->pm_pvlist);
 		pv;
 		pv = npv) {
 
 		if (pv->pv_va >= eva || pv->pv_va < sva) {
 			npv = TAILQ_NEXT(pv, pv_plist);
 			continue;
 		}
 
 #ifdef PMAP_REMOVE_PAGES_CURPROC_ONLY
 		pte = (unsigned *)vtopte(pv->pv_va);
 #else
 		pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va);
 #endif
 		tpte = *pte;
 
 /*
  * We cannot remove wired pages from a process' mapping at this time
  */
 		if (tpte & PG_W) {
 			npv = TAILQ_NEXT(pv, pv_plist);
 			continue;
 		}
 		*pte = 0;
 
 		m = PHYS_TO_VM_PAGE(tpte);
 
 		KASSERT(m < &vm_page_array[vm_page_array_size],
 			("pmap_remove_pages: bad tpte %x", tpte));
 
 		pv->pv_pmap->pm_stats.resident_count--;
 
 		/*
 		 * Update the vm_page_t clean and reference bits.
 		 */
 		if (tpte & PG_M) {
 			vm_page_dirty(m);
 		}
 
 
 		npv = TAILQ_NEXT(pv, pv_plist);
 		TAILQ_REMOVE(&pv->pv_pmap->pm_pvlist, pv, pv_plist);
 
 		m->md.pv_list_count--;
 		TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
 		if (TAILQ_FIRST(&m->md.pv_list) == NULL) {
 			vm_page_flag_clear(m, PG_MAPPED | PG_WRITEABLE);
 		}
 
 		pmap_unuse_pt(pv->pv_pmap, pv->pv_va, pv->pv_ptem);
 		free_pv_entry(pv);
 	}
 	splx(s);
 	pmap_TLB_invalidate_all(pmap);
 }
 
 /*
  * pmap_testbit tests bits in pte's
  * note that the testbit/changebit routines are inline,
  * and a lot of things compile-time evaluate.
  */
 static boolean_t
 pmap_testbit(m, bit)
 	vm_page_t m;
 	int bit;
 {
 	pv_entry_t pv;
 	unsigned *pte;
 	int s;
 
 	if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
 		return FALSE;
 
 	if (TAILQ_FIRST(&m->md.pv_list) == NULL)
 		return FALSE;
 
 	s = splvm();
 
 	for (pv = TAILQ_FIRST(&m->md.pv_list);
 		pv;
 		pv = TAILQ_NEXT(pv, pv_list)) {
 
 		/*
 		 * if the bit being tested is the modified bit, then
 		 * mark clean_map and ptes as never
 		 * modified.
 		 */
 		if (bit & (PG_A|PG_M)) {
 			if (!pmap_track_modified(pv->pv_va))
 				continue;
 		}
 
 #if defined(PMAP_DIAGNOSTIC)
 		if (!pv->pv_pmap) {
 			printf("Null pmap (tb) at va: 0x%x\n", pv->pv_va);
 			continue;
 		}
 #endif
 		pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va);
 		if (*pte & bit) {
 			splx(s);
 			return TRUE;
 		}
 	}
 	splx(s);
 	return (FALSE);
 }
 
 /*
  * this routine is used to modify bits in ptes
  */
 static __inline void
 pmap_changebit(m, bit, setem)
 	vm_page_t m;
 	int bit;
 	boolean_t setem;
 {
 	register pv_entry_t pv;
 	register unsigned *pte;
 	int s;
 
 	if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
 		return;
 
 	s = splvm();
 
 	/*
 	 * Loop over all current mappings setting/clearing as appropos If
 	 * setting RO do we need to clear the VAC?
 	 */
 	for (pv = TAILQ_FIRST(&m->md.pv_list);
 		pv;
 		pv = TAILQ_NEXT(pv, pv_list)) {
 
 		/*
 		 * don't write protect pager mappings
 		 */
 		if (!setem && (bit == PG_RW)) {
 			if (!pmap_track_modified(pv->pv_va))
 				continue;
 		}
 
 #if defined(PMAP_DIAGNOSTIC)
 		if (!pv->pv_pmap) {
 			printf("Null pmap (cb) at va: 0x%x\n", pv->pv_va);
 			continue;
 		}
 #endif
 
 		pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va);
 
 		if (setem) {
 			*(int *)pte |= bit;
 			pmap_TLB_invalidate(pv->pv_pmap, pv->pv_va);
 		} else {
 			vm_offset_t pbits = *(vm_offset_t *)pte;
 			if (pbits & bit) {
 				if (bit == PG_RW) {
 					if (pbits & PG_M) {
 						vm_page_dirty(m);
 					}
 					*(int *)pte = pbits & ~(PG_M|PG_RW);
 				} else {
 					*(int *)pte = pbits & ~bit;
 				}
 				pmap_TLB_invalidate(pv->pv_pmap, pv->pv_va);
 			}
 		}
 	}
 	splx(s);
 }
 
 /*
  *      pmap_page_protect:
  *
  *      Lower the permission for all mappings to a given page.
  */
 void
 pmap_page_protect(vm_page_t m, vm_prot_t prot)
 {
 	if ((prot & VM_PROT_WRITE) == 0) {
 		if (prot & (VM_PROT_READ | VM_PROT_EXECUTE)) {
 			pmap_changebit(m, PG_RW, FALSE);
 		} else {
 			pmap_remove_all(m);
 		}
 	}
 }
 
 vm_offset_t
 pmap_phys_address(ppn)
 	int ppn;
 {
 	return (i386_ptob(ppn));
 }
 
 /*
  *	pmap_ts_referenced:
  *
  *	Return the count of reference bits for a page, clearing all of them.
  */
 int
 pmap_ts_referenced(vm_page_t m)
 {
 	register pv_entry_t pv, pvf, pvn;
 	unsigned *pte;
 	int s;
 	int rtval = 0;
 
 	if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
 		return (rtval);
 
 	s = splvm();
 
 	if ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) {
 
 		pvf = pv;
 
 		do {
 			pvn = TAILQ_NEXT(pv, pv_list);
 
 			TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
 
 			TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
 
 			if (!pmap_track_modified(pv->pv_va))
 				continue;
 
 			pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va);
 
 			if (pte && (*pte & PG_A)) {
 				*pte &= ~PG_A;
 
 				pmap_TLB_invalidate(pv->pv_pmap, pv->pv_va);
 
 				rtval++;
 				if (rtval > 4) {
 					break;
 				}
 			}
 		} while ((pv = pvn) != NULL && pv != pvf);
 	}
 	splx(s);
 
 	return (rtval);
 }
 
 /*
  *	pmap_is_modified:
  *
  *	Return whether or not the specified physical page was modified
  *	in any physical maps.
  */
 boolean_t
 pmap_is_modified(vm_page_t m)
 {
 	return pmap_testbit(m, PG_M);
 }
 
 /*
  *	Clear the modify bits on the specified physical page.
  */
 void
 pmap_clear_modify(vm_page_t m)
 {
 	pmap_changebit(m, PG_M, FALSE);
 }
 
 /*
  *	pmap_clear_reference:
  *
  *	Clear the reference bit on the specified physical page.
  */
 void
 pmap_clear_reference(vm_page_t m)
 {
 	pmap_changebit(m, PG_A, FALSE);
 }
 
 /*
  * Miscellaneous support routines follow
  */
 
 static void
 i386_protection_init()
 {
 	register int *kp, prot;
 
 	kp = protection_codes;
 	for (prot = 0; prot < 8; prot++) {
 		switch (prot) {
 		case VM_PROT_NONE | VM_PROT_NONE | VM_PROT_NONE:
 			/*
 			 * Read access is also 0. There isn't any execute bit,
 			 * so just make it readable.
 			 */
 		case VM_PROT_READ | VM_PROT_NONE | VM_PROT_NONE:
 		case VM_PROT_READ | VM_PROT_NONE | VM_PROT_EXECUTE:
 		case VM_PROT_NONE | VM_PROT_NONE | VM_PROT_EXECUTE:
 			*kp++ = 0;
 			break;
 		case VM_PROT_NONE | VM_PROT_WRITE | VM_PROT_NONE:
 		case VM_PROT_NONE | VM_PROT_WRITE | VM_PROT_EXECUTE:
 		case VM_PROT_READ | VM_PROT_WRITE | VM_PROT_NONE:
 		case VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE:
 			*kp++ = PG_RW;
 			break;
 		}
 	}
 }
 
 /*
  * Map a set of physical memory pages into the kernel virtual
  * address space. Return a pointer to where it is mapped. This
  * routine is intended to be used for mapping device memory,
  * NOT real memory.
  */
 void *
 pmap_mapdev(pa, size)
 	vm_offset_t pa;
 	vm_size_t size;
 {
 	vm_offset_t va, tmpva, offset;
 	unsigned *pte;
 
 	offset = pa & PAGE_MASK;
 	size = roundup(offset + size, PAGE_SIZE);
 
 	GIANT_REQUIRED;
 
 	va = kmem_alloc_pageable(kernel_map, size);
 	if (!va)
 		panic("pmap_mapdev: Couldn't alloc kernel virtual memory");
 
 	pa = pa & PG_FRAME;
 	for (tmpva = va; size > 0;) {
 		pte = (unsigned *)vtopte(tmpva);
 		*pte = pa | PG_RW | PG_V | pgeflag;
 		size -= PAGE_SIZE;
 		tmpva += PAGE_SIZE;
 		pa += PAGE_SIZE;
 	}
 	invltlb();
 
 	return ((void *)(va + offset));
 }
 
 void
 pmap_unmapdev(va, size)
 	vm_offset_t va;
 	vm_size_t size;
 {
 	vm_offset_t base, offset;
 
 	base = va & PG_FRAME;
 	offset = va & PAGE_MASK;
 	size = roundup(offset + size, PAGE_SIZE);
 	kmem_free(kernel_map, base, size);
 }
 
 /*
  * perform the pmap work for mincore
  */
 int
 pmap_mincore(pmap, addr)
 	pmap_t pmap;
 	vm_offset_t addr;
 {
 	
 	unsigned *ptep, pte;
 	vm_page_t m;
 	int val = 0;
 	
 	ptep = pmap_pte(pmap, addr);
 	if (ptep == 0) {
 		return 0;
 	}
 
 	if ((pte = *ptep) != 0) {
 		vm_offset_t pa;
 
 		val = MINCORE_INCORE;
 		if ((pte & PG_MANAGED) == 0)
 			return val;
 
 		pa = pte & PG_FRAME;
 
 		m = PHYS_TO_VM_PAGE(pa);
 
 		/*
 		 * Modified by us
 		 */
 		if (pte & PG_M)
 			val |= MINCORE_MODIFIED|MINCORE_MODIFIED_OTHER;
 		/*
 		 * Modified by someone
 		 */
 		else if (m->dirty || pmap_is_modified(m))
 			val |= MINCORE_MODIFIED_OTHER;
 		/*
 		 * Referenced by us
 		 */
 		if (pte & PG_A)
 			val |= MINCORE_REFERENCED|MINCORE_REFERENCED_OTHER;
 
 		/*
 		 * Referenced by someone
 		 */
 		else if ((m->flags & PG_REFERENCED) || pmap_ts_referenced(m)) {
 			val |= MINCORE_REFERENCED_OTHER;
 			vm_page_flag_set(m, PG_REFERENCED);
 		}
 	} 
 	return val;
 }
 
 void
 pmap_activate(struct proc *p)
 {
 	pmap_t	pmap;
 
 	pmap = vmspace_pmap(p->p_vmspace);
 #if defined(SMP)
 	pmap->pm_active |= 1 << PCPU_GET(cpuid);
 #else
 	pmap->pm_active |= 1;
 #endif
 #if defined(SWTCH_OPTIM_STATS)
 	tlb_flush_count++;
 #endif
 	load_cr3(p->p_addr->u_pcb.pcb_cr3 = vtophys(pmap->pm_pdir));
 }
 
 vm_offset_t
 pmap_addr_hint(vm_object_t obj, vm_offset_t addr, vm_size_t size)
 {
 
 	if ((obj == NULL) || (size < NBPDR) || (obj->type != OBJT_DEVICE)) {
 		return addr;
 	}
 
 	addr = (addr + (NBPDR - 1)) & ~(NBPDR - 1);
 	return addr;
 }
 
 
 #if defined(PMAP_DEBUG)
 pmap_pid_dump(int pid)
 {
 	pmap_t pmap;
 	struct proc *p;
 	int npte = 0;
 	int index;
 
 	sx_slock(&allproc_lock);
 	LIST_FOREACH(p, &allproc, p_list) {
 		if (p->p_pid != pid)
 			continue;
 
 		if (p->p_vmspace) {
 			int i,j;
 			index = 0;
 			pmap = vmspace_pmap(p->p_vmspace);
 			for(i=0;i<1024;i++) {
 				pd_entry_t *pde;
 				unsigned *pte;
 				unsigned base = i << PDRSHIFT;
 				
 				pde = &pmap->pm_pdir[i];
 				if (pde && pmap_pde_v(pde)) {
 					for(j=0;j<1024;j++) {
 						unsigned va = base + (j << PAGE_SHIFT);
 						if (va >= (vm_offset_t) VM_MIN_KERNEL_ADDRESS) {
 							if (index) {
 								index = 0;
 								printf("\n");
 							}
 							sx_sunlock(&allproc_lock);
 							return npte;
 						}
 						pte = pmap_pte_quick( pmap, va);
 						if (pte && pmap_pte_v(pte)) {
 							vm_offset_t pa;
 							vm_page_t m;
 							pa = *(int *)pte;
 							m = PHYS_TO_VM_PAGE(pa);
 							printf("va: 0x%x, pt: 0x%x, h: %d, w: %d, f: 0x%x",
 								va, pa, m->hold_count, m->wire_count, m->flags);
 							npte++;
 							index++;
 							if (index >= 2) {
 								index = 0;
 								printf("\n");
 							} else {
 								printf(" ");
 							}
 						}
 					}
 				}
 			}
 		}
 	}
 	sx_sunlock(&allproc_lock);
 	return npte;
 }
 #endif
 
 #if defined(DEBUG)
 
 static void	pads __P((pmap_t pm));
 void		pmap_pvdump __P((vm_offset_t pa));
 
 /* print address space of pmap*/
 static void
 pads(pm)
 	pmap_t pm;
 {
 	unsigned va, i, j;
 	unsigned *ptep;
 
 	if (pm == kernel_pmap)
 		return;
 	for (i = 0; i < 1024; i++)
 		if (pm->pm_pdir[i])
 			for (j = 0; j < 1024; j++) {
 				va = (i << PDRSHIFT) + (j << PAGE_SHIFT);
 				if (pm == kernel_pmap && va < KERNBASE)
 					continue;
 				if (pm != kernel_pmap && va > UPT_MAX_ADDRESS)
 					continue;
 				ptep = pmap_pte_quick(pm, va);
 				if (pmap_pte_v(ptep))
 					printf("%x:%x ", va, *(int *) ptep);
 			};
 
 }
 
 void
 pmap_pvdump(pa)
 	vm_offset_t pa;
 {
 	register pv_entry_t pv;
 	vm_page_t m;
 
 	printf("pa %x", pa);
 	m = PHYS_TO_VM_PAGE(pa);
 	for (pv = TAILQ_FIRST(&m->md.pv_list);
 		pv;
 		pv = TAILQ_NEXT(pv, pv_list)) {
 #ifdef used_to_be
 		printf(" -> pmap %p, va %x, flags %x",
 		    (void *)pv->pv_pmap, pv->pv_va, pv->pv_flags);
 #endif
 		printf(" -> pmap %p, va %x", (void *)pv->pv_pmap, pv->pv_va);
 		pads(pv->pv_pmap);
 	}
 	printf(" ");
 }
 #endif
Index: head/sys/amd64/amd64/sys_machdep.c
===================================================================
--- head/sys/amd64/amd64/sys_machdep.c	(revision 82308)
+++ head/sys/amd64/amd64/sys_machdep.c	(revision 82309)
@@ -1,535 +1,537 @@
 /*-
  * Copyright (c) 1990 The Regents of the University of California.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	from: @(#)sys_machdep.c	5.5 (Berkeley) 1/19/91
  * $FreeBSD$
  *
  */
 
+#include "opt_upages.h"
+
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/smp.h>
 #include <sys/sysproto.h>
 #include <sys/user.h>
 
 #include <vm/vm.h>
 #include <vm/pmap.h>
 #include <vm/vm_map.h>
 #include <vm/vm_extern.h>
 
 #include <machine/cpu.h>
 #include <machine/pcb_ext.h>	/* pcb.h included by sys/user.h */
 #include <machine/sysarch.h>
 
 #include <vm/vm_kern.h>		/* for kernel_map */
 
 #define MAX_LD 8192
 #define LD_PER_PAGE 512
 #define NEW_MAX_LD(num)  ((num + LD_PER_PAGE) & ~(LD_PER_PAGE-1))
 #define SIZE_FROM_LARGEST_LD(num) (NEW_MAX_LD(num) << 3)
 
 
 
 static int i386_get_ldt	__P((struct proc *, char *));
 static int i386_set_ldt	__P((struct proc *, char *));
 static int i386_get_ioperm	__P((struct proc *, char *));
 static int i386_set_ioperm	__P((struct proc *, char *));
 #ifdef SMP
 static void set_user_ldt_rv	__P((struct pcb *));
 #endif
 
 #ifndef _SYS_SYSPROTO_H_
 struct sysarch_args {
 	int op;
 	char *parms;
 };
 #endif
 
 int
 sysarch(p, uap)
 	struct proc *p;
 	register struct sysarch_args *uap;
 {
 	int error = 0;
 
 	switch(uap->op) {
 	case I386_GET_LDT:
 		error = i386_get_ldt(p, uap->parms);
 		break;
 
 	case I386_SET_LDT:
 		error = i386_set_ldt(p, uap->parms);
 		break;
 	case I386_GET_IOPERM:
 		error = i386_get_ioperm(p, uap->parms);
 		break;
 	case I386_SET_IOPERM:
 		error = i386_set_ioperm(p, uap->parms);
 		break;
 	case I386_VM86:
 		error = vm86_sysarch(p, uap->parms);
 		break;
 	default:
 		error = EOPNOTSUPP;
 		break;
 	}
 	return (error);
 }
 
 int
 i386_extend_pcb(struct proc *p)
 {
 	int i, offset;
 	u_long *addr;
 	struct pcb_ext *ext;
 	struct soft_segment_descriptor ssd = {
 		0,			/* segment base address (overwritten) */
 		ctob(IOPAGES + 1) - 1,	/* length */
 		SDT_SYS386TSS,		/* segment type */
 		0,			/* priority level */
 		1,			/* descriptor present */
 		0, 0,
 		0,			/* default 32 size */
 		0			/* granularity */
 	};
 
 	ext = (struct pcb_ext *)kmem_alloc(kernel_map, ctob(IOPAGES+1));
 	if (ext == 0)
 		return (ENOMEM);
 	bzero(ext, sizeof(struct pcb_ext)); 
 	ext->ext_tss.tss_esp0 = (unsigned)p->p_addr + ctob(UPAGES) - 16;
 	ext->ext_tss.tss_ss0 = GSEL(GDATA_SEL, SEL_KPL);
 	/*
 	 * The last byte of the i/o map must be followed by an 0xff byte.
 	 * We arbitrarily allocate 16 bytes here, to keep the starting
 	 * address on a doubleword boundary.
 	 */
 	offset = PAGE_SIZE - 16;
 	ext->ext_tss.tss_ioopt = 
 	    (offset - ((unsigned)&ext->ext_tss - (unsigned)ext)) << 16;
 	ext->ext_iomap = (caddr_t)ext + offset;
 	ext->ext_vm86.vm86_intmap = (caddr_t)ext + offset - 32;
 
 	addr = (u_long *)ext->ext_vm86.vm86_intmap;
 	for (i = 0; i < (ctob(IOPAGES) + 32 + 16) / sizeof(u_long); i++)
 		*addr++ = ~0;
 
 	ssd.ssd_base = (unsigned)&ext->ext_tss;
 	ssd.ssd_limit -= ((unsigned)&ext->ext_tss - (unsigned)ext);
 	ssdtosd(&ssd, &ext->ext_tssd);
 
 	KASSERT(p == curproc, ("giving a TSS to non-curproc"));
 	KASSERT(p->p_addr->u_pcb.pcb_ext == 0, ("already have a TSS!"));
 	mtx_lock_spin(&sched_lock);
 	p->p_addr->u_pcb.pcb_ext = ext;
 	
 	/* switch to the new TSS after syscall completes */
 	p->p_sflag |= PS_NEEDRESCHED;
 	mtx_unlock_spin(&sched_lock);
 
 	return 0;
 }
 
 static int
 i386_set_ioperm(p, args)
 	struct proc *p;
 	char *args;
 {
 	int i, error;
 	struct i386_ioperm_args ua;
 	char *iomap;
 
 	if ((error = copyin(args, &ua, sizeof(struct i386_ioperm_args))) != 0)
 		return (error);
 
 	if ((error = suser(p)) != 0)
 		return (error);
 	if (securelevel > 0)
 		return (EPERM);
 	/*
 	 * XXX 
 	 * While this is restricted to root, we should probably figure out
 	 * whether any other driver is using this i/o address, as so not to
 	 * cause confusion.  This probably requires a global 'usage registry'.
 	 */
 
 	if (p->p_addr->u_pcb.pcb_ext == 0)
 		if ((error = i386_extend_pcb(p)) != 0)
 			return (error);
 	iomap = (char *)p->p_addr->u_pcb.pcb_ext->ext_iomap;
 
 	if (ua.start + ua.length > IOPAGES * PAGE_SIZE * NBBY)
 		return (EINVAL);
 
 	for (i = ua.start; i < ua.start + ua.length; i++) {
 		if (ua.enable) 
 			iomap[i >> 3] &= ~(1 << (i & 7));
 		else
 			iomap[i >> 3] |= (1 << (i & 7));
 	}
 	return (error);
 }
 
 static int
 i386_get_ioperm(p, args)
 	struct proc *p;
 	char *args;
 {
 	int i, state, error;
 	struct i386_ioperm_args ua;
 	char *iomap;
 
 	if ((error = copyin(args, &ua, sizeof(struct i386_ioperm_args))) != 0)
 		return (error);
 	if (ua.start >= IOPAGES * PAGE_SIZE * NBBY)
 		return (EINVAL);
 
 	if (p->p_addr->u_pcb.pcb_ext == 0) {
 		ua.length = 0;
 		goto done;
 	}
 
 	iomap = (char *)p->p_addr->u_pcb.pcb_ext->ext_iomap;
 
 	i = ua.start;
 	state = (iomap[i >> 3] >> (i & 7)) & 1;
 	ua.enable = !state;
 	ua.length = 1;
 
 	for (i = ua.start + 1; i < IOPAGES * PAGE_SIZE * NBBY; i++) {
 		if (state != ((iomap[i >> 3] >> (i & 7)) & 1))
 			break;
 		ua.length++;
 	}
 			
 done:
 	error = copyout(&ua, args, sizeof(struct i386_ioperm_args));
 	return (error);
 }
 
 /*
  * Update the GDT entry pointing to the LDT to point to the LDT of the
  * current process.
  *
  * This must be called with sched_lock held.  Unfortunately, we can't use a
  * mtx_assert() here because cpu_switch() calls this function after changing
  * curproc but before sched_lock's owner is updated in mi_switch().
  */   
 void
 set_user_ldt(struct pcb *pcb)
 {
 	struct pcb_ldt *pcb_ldt;
 
 	pcb_ldt = pcb->pcb_ldt;
 #ifdef SMP
 	gdt[PCPU_GET(cpuid) * NGDT + GUSERLDT_SEL].sd = pcb_ldt->ldt_sd;
 #else
 	gdt[GUSERLDT_SEL].sd = pcb_ldt->ldt_sd;
 #endif
 	lldt(GSEL(GUSERLDT_SEL, SEL_KPL));
 	PCPU_SET(currentldt, GSEL(GUSERLDT_SEL, SEL_KPL));
 }
 
 #ifdef SMP
 static void
 set_user_ldt_rv(struct pcb *pcb)
 {
 
 	if (pcb != PCPU_GET(curpcb))
 		return;
 
 	mtx_lock_spin(&sched_lock);
 	set_user_ldt(pcb);
 	mtx_unlock_spin(&sched_lock);
 }
 #endif
 
 /*
  * Must be called with either sched_lock free or held but not recursed.
  * If it does not return NULL, it will return with it owned.
  */
 struct pcb_ldt *
 user_ldt_alloc(struct pcb *pcb, int len)
 {
 	struct pcb_ldt *pcb_ldt, *new_ldt;
 
 	if (mtx_owned(&sched_lock))
 		mtx_unlock_spin(&sched_lock);
 	mtx_assert(&sched_lock, MA_NOTOWNED);
 	MALLOC(new_ldt, struct pcb_ldt *, sizeof(struct pcb_ldt),
 		M_SUBPROC, M_WAITOK);
 
 	new_ldt->ldt_len = len = NEW_MAX_LD(len);
 	new_ldt->ldt_base = (caddr_t)kmem_alloc(kernel_map,
 		len * sizeof(union descriptor));
 	if (new_ldt->ldt_base == NULL) {
 		FREE(new_ldt, M_SUBPROC);
 		return NULL;
 	}
 	new_ldt->ldt_refcnt = 1;
 	new_ldt->ldt_active = 0;
 
 	mtx_lock_spin(&sched_lock);
 	gdt_segs[GUSERLDT_SEL].ssd_base = (unsigned)new_ldt->ldt_base;
 	gdt_segs[GUSERLDT_SEL].ssd_limit = len * sizeof(union descriptor) - 1;
 	ssdtosd(&gdt_segs[GUSERLDT_SEL], &new_ldt->ldt_sd);
 
 	if ((pcb_ldt = pcb->pcb_ldt)) {
 		if (len > pcb_ldt->ldt_len)
 			len = pcb_ldt->ldt_len;
 		bcopy(pcb_ldt->ldt_base, new_ldt->ldt_base,
 			len * sizeof(union descriptor));
 	} else {
 		bcopy(ldt, new_ldt->ldt_base, sizeof(ldt));
 	}
 	return new_ldt;
 }
 
 /*
  * Must be called either with sched_lock free or held but not recursed.
  * If pcb->pcb_ldt is not NULL, it will return with sched_lock released.
  */
 void
 user_ldt_free(struct pcb *pcb)
 {
 	struct pcb_ldt *pcb_ldt = pcb->pcb_ldt;
 
 	if (pcb_ldt == NULL)
 		return;
 
 	if (!mtx_owned(&sched_lock))
 		mtx_lock_spin(&sched_lock);
 	mtx_assert(&sched_lock, MA_OWNED | MA_NOTRECURSED);
 	if (pcb == PCPU_GET(curpcb)) {
 		lldt(_default_ldt);
 		PCPU_SET(currentldt, _default_ldt);
 	}
 
 	pcb->pcb_ldt = NULL;
 	if (--pcb_ldt->ldt_refcnt == 0) {
 		mtx_unlock_spin(&sched_lock);
 		kmem_free(kernel_map, (vm_offset_t)pcb_ldt->ldt_base,
 			pcb_ldt->ldt_len * sizeof(union descriptor));
 		FREE(pcb_ldt, M_SUBPROC);
 	} else
 		mtx_unlock_spin(&sched_lock);
 }
 
 static int
 i386_get_ldt(p, args)
 	struct proc *p;
 	char *args;
 {
 	int error = 0;
 	struct pcb *pcb = &p->p_addr->u_pcb;
 	struct pcb_ldt *pcb_ldt = pcb->pcb_ldt;
 	int nldt, num;
 	union descriptor *lp;
 	struct i386_ldt_args ua, *uap = &ua;
 
 	if ((error = copyin(args, uap, sizeof(struct i386_ldt_args))) < 0)
 		return(error);
 
 #ifdef	DEBUG
 	printf("i386_get_ldt: start=%d num=%d descs=%p\n",
 	    uap->start, uap->num, (void *)uap->descs);
 #endif
 
 	/* verify range of LDTs exist */
 	if ((uap->start < 0) || (uap->num <= 0))
 		return(EINVAL);
 
 	if (pcb_ldt) {
 		nldt = pcb_ldt->ldt_len;
 		num = min(uap->num, nldt);
 		lp = &((union descriptor *)(pcb_ldt->ldt_base))[uap->start];
 	} else {
 		nldt = sizeof(ldt)/sizeof(ldt[0]);
 		num = min(uap->num, nldt);
 		lp = &ldt[uap->start];
 	}
 	if (uap->start > nldt)
 		return(EINVAL);
 
 	error = copyout(lp, uap->descs, num * sizeof(union descriptor));
 	if (!error)
 		p->p_retval[0] = num;
 
 	return(error);
 }
 
 static int
 i386_set_ldt(p, args)
 	struct proc *p;
 	char *args;
 {
 	int error = 0, i, n;
 	int largest_ld;
 	struct pcb *pcb = &p->p_addr->u_pcb;
 	struct pcb_ldt *pcb_ldt = pcb->pcb_ldt;
 	struct i386_ldt_args ua, *uap = &ua;
 	caddr_t old_ldt_base;
 	int old_ldt_len;
 	critical_t savecrit;
 
 	if ((error = copyin(args, uap, sizeof(struct i386_ldt_args))) < 0)
 		return(error);
 
 #ifdef	DEBUG
 	printf("i386_set_ldt: start=%d num=%d descs=%p\n",
 	    uap->start, uap->num, (void *)uap->descs);
 #endif
 
 	/* verify range of descriptors to modify */
 	if ((uap->start < 0) || (uap->start >= MAX_LD) || (uap->num < 0) ||
 		(uap->num > MAX_LD))
 	{
 		return(EINVAL);
 	}
 	largest_ld = uap->start + uap->num - 1;
 	if (largest_ld >= MAX_LD)
 		return(EINVAL);
 
 	/* allocate user ldt */
 	if (!pcb_ldt || largest_ld >= pcb_ldt->ldt_len) {
 		struct pcb_ldt *new_ldt = user_ldt_alloc(pcb, largest_ld);
 		if (new_ldt == NULL)
 			return ENOMEM;
 		if (pcb_ldt) {
 			old_ldt_base = pcb_ldt->ldt_base;
 			old_ldt_len = pcb_ldt->ldt_len;
 			pcb_ldt->ldt_sd = new_ldt->ldt_sd;
 			pcb_ldt->ldt_base = new_ldt->ldt_base;
 			pcb_ldt->ldt_len = new_ldt->ldt_len;
 			mtx_unlock_spin(&sched_lock);
 			kmem_free(kernel_map, (vm_offset_t)old_ldt_base,
 				old_ldt_len * sizeof(union descriptor));
 			FREE(new_ldt, M_SUBPROC);
 #ifndef SMP
 			mtx_lock_spin(&sched_lock);
 #endif
 		} else {
 			pcb->pcb_ldt = pcb_ldt = new_ldt;
 #ifdef SMP
 			mtx_unlock_spin(&sched_lock);
 #endif
 		}
 #ifdef SMP
 		/* signal other cpus to reload ldt */
 		smp_rendezvous(NULL, (void (*)(void *))set_user_ldt_rv, NULL, pcb);
 #else
 		set_user_ldt(pcb);
 		mtx_unlock_spin(&sched_lock);
 #endif
 	}
 
 	/* Check descriptors for access violations */
 	for (i = 0, n = uap->start; i < uap->num; i++, n++) {
 		union descriptor desc, *dp;
 		dp = &uap->descs[i];
 		error = copyin(dp, &desc, sizeof(union descriptor));
 		if (error)
 			return(error);
 
 		switch (desc.sd.sd_type) {
 		case SDT_SYSNULL:	/* system null */ 
 			desc.sd.sd_p = 0;
 			break;
 		case SDT_SYS286TSS: /* system 286 TSS available */
 		case SDT_SYSLDT:    /* system local descriptor table */
 		case SDT_SYS286BSY: /* system 286 TSS busy */
 		case SDT_SYSTASKGT: /* system task gate */
 		case SDT_SYS286IGT: /* system 286 interrupt gate */
 		case SDT_SYS286TGT: /* system 286 trap gate */
 		case SDT_SYSNULL2:  /* undefined by Intel */ 
 		case SDT_SYS386TSS: /* system 386 TSS available */
 		case SDT_SYSNULL3:  /* undefined by Intel */
 		case SDT_SYS386BSY: /* system 386 TSS busy */
 		case SDT_SYSNULL4:  /* undefined by Intel */ 
 		case SDT_SYS386IGT: /* system 386 interrupt gate */
 		case SDT_SYS386TGT: /* system 386 trap gate */
 		case SDT_SYS286CGT: /* system 286 call gate */ 
 		case SDT_SYS386CGT: /* system 386 call gate */
 			/* I can't think of any reason to allow a user proc
 			 * to create a segment of these types.  They are
 			 * for OS use only.
 			 */
 			return EACCES;
 			/*NOTREACHED*/
 
 		/* memory segment types */
 		case SDT_MEMEC:   /* memory execute only conforming */
 		case SDT_MEMEAC:  /* memory execute only accessed conforming */
 		case SDT_MEMERC:  /* memory execute read conforming */
 		case SDT_MEMERAC: /* memory execute read accessed conforming */
 			 /* Must be "present" if executable and conforming. */
 			if (desc.sd.sd_p == 0)
 				return (EACCES);
 			break;
 		case SDT_MEMRO:   /* memory read only */
 		case SDT_MEMROA:  /* memory read only accessed */
 		case SDT_MEMRW:   /* memory read write */
 		case SDT_MEMRWA:  /* memory read write accessed */
 		case SDT_MEMROD:  /* memory read only expand dwn limit */
 		case SDT_MEMRODA: /* memory read only expand dwn lim accessed */
 		case SDT_MEMRWD:  /* memory read write expand dwn limit */  
 		case SDT_MEMRWDA: /* memory read write expand dwn lim acessed */
 		case SDT_MEME:    /* memory execute only */ 
 		case SDT_MEMEA:   /* memory execute only accessed */
 		case SDT_MEMER:   /* memory execute read */
 		case SDT_MEMERA:  /* memory execute read accessed */
 			break;
 		default:
 			return(EINVAL);
 			/*NOTREACHED*/
 		}
 
 		/* Only user (ring-3) descriptors may be present. */
 		if ((desc.sd.sd_p != 0) && (desc.sd.sd_dpl != SEL_UPL))
 			return (EACCES);
 	}
 
 	/* Fill in range */
 	savecrit = critical_enter();
 	error = copyin(uap->descs, 
 	    &((union descriptor *)(pcb_ldt->ldt_base))[uap->start],
 	    uap->num * sizeof(union descriptor));
 	if (!error)
 		p->p_retval[0] = uap->start;
 	critical_exit(savecrit);
 
 	return(error);
 }
Index: head/sys/amd64/amd64/vm_machdep.c
===================================================================
--- head/sys/amd64/amd64/vm_machdep.c	(revision 82308)
+++ head/sys/amd64/amd64/vm_machdep.c	(revision 82309)
@@ -1,587 +1,588 @@
 /*-
  * Copyright (c) 1982, 1986 The Regents of the University of California.
  * Copyright (c) 1989, 1990 William Jolitz
  * Copyright (c) 1994 John Dyson
  * All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * the Systems Programming Group of the University of Utah Computer
  * Science Department, and William Jolitz.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	from: @(#)vm_machdep.c	7.3 (Berkeley) 5/13/91
  *	Utah $Hdr: vm_machdep.c 1.16.1.1 89/06/23$
  * $FreeBSD$
  */
 
 #include "opt_npx.h"
 #ifdef PC98
 #include "opt_pc98.h"
 #endif
 #include "opt_reset.h"
 #include "opt_isa.h"
+#include "opt_upages.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/malloc.h>
 #include <sys/proc.h>
 #include <sys/bio.h>
 #include <sys/buf.h>
 #include <sys/vnode.h>
 #include <sys/vmmeter.h>
 #include <sys/kernel.h>
 #include <sys/ktr.h>
 #include <sys/mutex.h>
 #include <sys/smp.h>
 #include <sys/sysctl.h>
 #include <sys/unistd.h>
 
 #include <machine/cpu.h>
 #include <machine/md_var.h>
 #include <machine/pcb.h>
 #include <machine/pcb_ext.h>
 #include <machine/vm86.h>
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <sys/lock.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_page.h>
 #include <vm/vm_map.h>
 #include <vm/vm_extern.h>
 
 #include <sys/user.h>
 
 #ifdef PC98
 #include <pc98/pc98/pc98.h>
 #else
 #include <i386/isa/isa.h>
 #endif
 
 static void	cpu_reset_real __P((void));
 #ifdef SMP
 static void	cpu_reset_proxy __P((void));
 static u_int	cpu_reset_proxyid;
 static volatile u_int	cpu_reset_proxy_active;
 #endif
 extern int	_ucodesel, _udatasel;
 
 /*
  * quick version of vm_fault
  */
 int
 vm_fault_quick(v, prot)
 	caddr_t v;
 	int prot;
 {
 	int r;
 
 	if (prot & VM_PROT_WRITE)
 		r = subyte(v, fubyte(v));
 	else
 		r = fubyte(v);
 	return(r);
 }
 
 /*
  * Finish a fork operation, with process p2 nearly set up.
  * Copy and update the pcb, set up the stack so that the child
  * ready to run and return to user mode.
  */
 void
 cpu_fork(p1, p2, flags)
 	register struct proc *p1, *p2;
 	int flags;
 {
 	struct pcb *pcb2;
 #ifdef DEV_NPX
 	int savecrit;
 #endif
 
 	if ((flags & RFPROC) == 0) {
 		if ((flags & RFMEM) == 0) {
 			/* unshare user LDT */
 			struct pcb *pcb1 = &p1->p_addr->u_pcb;
 			struct pcb_ldt *pcb_ldt = pcb1->pcb_ldt;
 			if (pcb_ldt && pcb_ldt->ldt_refcnt > 1) {
 				pcb_ldt = user_ldt_alloc(pcb1,pcb_ldt->ldt_len);
 				if (pcb_ldt == NULL)
 					panic("could not copy LDT");
 				pcb1->pcb_ldt = pcb_ldt;
 				set_user_ldt(pcb1);
 				user_ldt_free(pcb1);
 			}
 		}
 		return;
 	}
 
 	/* Ensure that p1's pcb is up to date. */
 #ifdef DEV_NPX
 	if (p1 == curproc)
 		p1->p_addr->u_pcb.pcb_gs = rgs();
 	savecrit = critical_enter();
 	if (PCPU_GET(npxproc) == p1)
 		npxsave(&p1->p_addr->u_pcb.pcb_save);
 	critical_exit(savecrit);
 #endif
 
 	/* Copy p1's pcb. */
 	p2->p_addr->u_pcb = p1->p_addr->u_pcb;
 	pcb2 = &p2->p_addr->u_pcb;
 
 	/*
 	 * Create a new fresh stack for the new process.
 	 * Copy the trap frame for the return to user mode as if from a
 	 * syscall.  This copies most of the user mode register values.
 	 */
 	p2->p_frame = (struct trapframe *)
 			   ((int)p2->p_addr + UPAGES * PAGE_SIZE - 16) - 1;
 	bcopy(p1->p_frame, p2->p_frame, sizeof(struct trapframe));
 
 	p2->p_frame->tf_eax = 0;		/* Child returns zero */
 	p2->p_frame->tf_eflags &= ~PSL_C;	/* success */
 	p2->p_frame->tf_edx = 1;
 
 	/*
 	 * Set registers for trampoline to user mode.  Leave space for the
 	 * return address on stack.  These are the kernel mode register values.
 	 */
 	pcb2->pcb_cr3 = vtophys(vmspace_pmap(p2->p_vmspace)->pm_pdir);
 	pcb2->pcb_edi = 0;
 	pcb2->pcb_esi = (int)fork_return;	/* fork_trampoline argument */
 	pcb2->pcb_ebp = 0;
 	pcb2->pcb_esp = (int)p2->p_frame - sizeof(void *);
 	pcb2->pcb_ebx = (int)p2;		/* fork_trampoline argument */
 	pcb2->pcb_eip = (int)fork_trampoline;
 	/*-
 	 * pcb2->pcb_dr*:	cloned above.
 	 * pcb2->pcb_ldt:	duplicated below, if necessary.
 	 * pcb2->pcb_savefpu:	cloned above.
 	 * pcb2->pcb_flags:	cloned above.
 	 * pcb2->pcb_onfault:	cloned above (always NULL here?).
 	 * pcb2->pcb_gs:	cloned above.
 	 * pcb2->pcb_ext:	cleared below.
 	 */
 
 	/*
 	 * XXX don't copy the i/o pages.  this should probably be fixed.
 	 */
 	pcb2->pcb_ext = 0;
 
         /* Copy the LDT, if necessary. */
 	mtx_lock_spin(&sched_lock);
         if (pcb2->pcb_ldt != 0) {
 		if (flags & RFMEM) {
 			pcb2->pcb_ldt->ldt_refcnt++;
 		} else {
 			pcb2->pcb_ldt = user_ldt_alloc(pcb2,
 				pcb2->pcb_ldt->ldt_len);
 			if (pcb2->pcb_ldt == NULL)
 				panic("could not copy LDT");
 		}
         }
 	mtx_unlock_spin(&sched_lock);
 
 	/*
 	 * Now, cpu_switch() can schedule the new process.
 	 * pcb_esp is loaded pointing to the cpu_switch() stack frame
 	 * containing the return address when exiting cpu_switch.
 	 * This will normally be to fork_trampoline(), which will have
 	 * %ebx loaded with the new proc's pointer.  fork_trampoline()
 	 * will set up a stack to call fork_return(p, frame); to complete
 	 * the return to user-mode.
 	 */
 }
 
 /*
  * Intercept the return address from a freshly forked process that has NOT
  * been scheduled yet.
  *
  * This is needed to make kernel threads stay in kernel mode.
  */
 void
 cpu_set_fork_handler(p, func, arg)
 	struct proc *p;
 	void (*func) __P((void *));
 	void *arg;
 {
 	/*
 	 * Note that the trap frame follows the args, so the function
 	 * is really called like this:  func(arg, frame);
 	 */
 	p->p_addr->u_pcb.pcb_esi = (int) func;	/* function */
 	p->p_addr->u_pcb.pcb_ebx = (int) arg;	/* first arg */
 }
 
 void
 cpu_exit(p)
 	register struct proc *p;
 {
 	struct pcb *pcb = &p->p_addr->u_pcb; 
 
 #ifdef DEV_NPX
 	npxexit(p);
 #endif
 	if (pcb->pcb_ext != 0) {
 	        /* 
 		 * XXX do we need to move the TSS off the allocated pages 
 		 * before freeing them?  (not done here)
 		 */
 		kmem_free(kernel_map, (vm_offset_t)pcb->pcb_ext,
 		    ctob(IOPAGES + 1));
 		pcb->pcb_ext = 0;
 	}
 	if (pcb->pcb_ldt)
 		user_ldt_free(pcb);
         if (pcb->pcb_flags & PCB_DBREGS) {
                 /*
                  * disable all hardware breakpoints
                  */
                 reset_dbregs();
                 pcb->pcb_flags &= ~PCB_DBREGS;
         }
 	PROC_LOCK(p);
 	mtx_lock_spin(&sched_lock);
 	while (mtx_owned(&Giant))
 		mtx_unlock_flags(&Giant, MTX_NOSWITCH);
 
 	/*
 	 * We have to wait until after releasing all locks before
 	 * changing p_stat.  If we block on a mutex then we will be
 	 * back at SRUN when we resume and our parent will never
 	 * harvest us.
 	 */
 	p->p_stat = SZOMB;
 
 	wakeup(p->p_pptr);
 	PROC_UNLOCK_NOSWITCH(p);
 
 	cnt.v_swtch++;
 	cpu_throw();
 	panic("cpu_exit");
 }
 
 void
 cpu_wait(p)
 	struct proc *p;
 {
 	GIANT_REQUIRED;
 
 	/* drop per-process resources */
 	pmap_dispose_proc(p);
 
 	/* and clean-out the vmspace */
 	vmspace_free(p->p_vmspace);
 }
 
 /*
  * Dump the machine specific header information at the start of a core dump.
  */
 int
 cpu_coredump(p, vp, cred)
 	struct proc *p;
 	struct vnode *vp;
 	struct ucred *cred;
 {
 	int error;
 	caddr_t tempuser;
 
 	tempuser = malloc(ctob(UPAGES), M_TEMP, M_WAITOK | M_ZERO);
 	if (!tempuser)
 		return EINVAL;
 	
 	bcopy(p->p_addr, tempuser, sizeof(struct user));
 	bcopy(p->p_frame,
 	      tempuser + ((caddr_t) p->p_frame - (caddr_t) p->p_addr),
 	      sizeof(struct trapframe));
 
 	error = vn_rdwr(UIO_WRITE, vp, (caddr_t) tempuser, 
 			ctob(UPAGES),
 			(off_t)0, UIO_SYSSPACE, IO_NODELOCKED|IO_UNIT, 
 			cred, (int *)NULL, p);
 
 	free(tempuser, M_TEMP);
 	
 	return error;
 }
 
 #ifdef notyet
 static void
 setredzone(pte, vaddr)
 	u_short *pte;
 	caddr_t vaddr;
 {
 /* eventually do this by setting up an expand-down stack segment
    for ss0: selector, allowing stack access down to top of u.
    this means though that protection violations need to be handled
    thru a double fault exception that must do an integral task
    switch to a known good context, within which a dump can be
    taken. a sensible scheme might be to save the initial context
    used by sched (that has physical memory mapped 1:1 at bottom)
    and take the dump while still in mapped mode */
 }
 #endif
 
 /*
  * Convert kernel VA to physical address
  */
 u_long
 kvtop(void *addr)
 {
 	vm_offset_t va;
 
 	va = pmap_kextract((vm_offset_t)addr);
 	if (va == 0)
 		panic("kvtop: zero page frame");
 	return((int)va);
 }
 
 /*
  * Map an IO request into kernel virtual address space.
  *
  * All requests are (re)mapped into kernel VA space.
  * Notice that we use b_bufsize for the size of the buffer
  * to be mapped.  b_bcount might be modified by the driver.
  */
 void
 vmapbuf(bp)
 	register struct buf *bp;
 {
 	register caddr_t addr, v, kva;
 	vm_offset_t pa;
 
 	GIANT_REQUIRED;
 
 	if ((bp->b_flags & B_PHYS) == 0)
 		panic("vmapbuf");
 
 	for (v = bp->b_saveaddr, addr = (caddr_t)trunc_page((vm_offset_t)bp->b_data);
 	    addr < bp->b_data + bp->b_bufsize;
 	    addr += PAGE_SIZE, v += PAGE_SIZE) {
 		/*
 		 * Do the vm_fault if needed; do the copy-on-write thing
 		 * when reading stuff off device into memory.
 		 */
 		vm_fault_quick(addr,
 			(bp->b_iocmd == BIO_READ)?(VM_PROT_READ|VM_PROT_WRITE):VM_PROT_READ);
 		pa = trunc_page(pmap_kextract((vm_offset_t) addr));
 		if (pa == 0)
 			panic("vmapbuf: page not present");
 		vm_page_hold(PHYS_TO_VM_PAGE(pa));
 		pmap_kenter((vm_offset_t) v, pa);
 	}
 
 	kva = bp->b_saveaddr;
 	bp->b_saveaddr = bp->b_data;
 	bp->b_data = kva + (((vm_offset_t) bp->b_data) & PAGE_MASK);
 }
 
 /*
  * Free the io map PTEs associated with this IO operation.
  * We also invalidate the TLB entries and restore the original b_addr.
  */
 void
 vunmapbuf(bp)
 	register struct buf *bp;
 {
 	register caddr_t addr;
 	vm_offset_t pa;
 
 	GIANT_REQUIRED;
 
 	if ((bp->b_flags & B_PHYS) == 0)
 		panic("vunmapbuf");
 
 	for (addr = (caddr_t)trunc_page((vm_offset_t)bp->b_data);
 	    addr < bp->b_data + bp->b_bufsize;
 	    addr += PAGE_SIZE) {
 		pa = trunc_page(pmap_kextract((vm_offset_t) addr));
 		pmap_kremove((vm_offset_t) addr);
 		vm_page_unhold(PHYS_TO_VM_PAGE(pa));
 	}
 
 	bp->b_data = bp->b_saveaddr;
 }
 
 /*
  * Force reset the processor by invalidating the entire address space!
  */
 
 #ifdef SMP
 static void
 cpu_reset_proxy()
 {
 
 	cpu_reset_proxy_active = 1;
 	while (cpu_reset_proxy_active == 1)
 		;	 /* Wait for other cpu to see that we've started */
 	stop_cpus((1<<cpu_reset_proxyid));
 	printf("cpu_reset_proxy: Stopped CPU %d\n", cpu_reset_proxyid);
 	DELAY(1000000);
 	cpu_reset_real();
 }
 #endif
 
 void
 cpu_reset()
 {
 #ifdef SMP
 	if (smp_active == 0) {
 		cpu_reset_real();
 		/* NOTREACHED */
 	} else {
 
 		u_int map;
 		int cnt;
 		printf("cpu_reset called on cpu#%d\n", PCPU_GET(cpuid));
 
 		map = PCPU_GET(other_cpus) & ~ stopped_cpus;
 
 		if (map != 0) {
 			printf("cpu_reset: Stopping other CPUs\n");
 			stop_cpus(map);		/* Stop all other CPUs */
 		}
 
 		if (PCPU_GET(cpuid) == 0) {
 			DELAY(1000000);
 			cpu_reset_real();
 			/* NOTREACHED */
 		} else {
 			/* We are not BSP (CPU #0) */
 
 			cpu_reset_proxyid = PCPU_GET(cpuid);
 			cpustop_restartfunc = cpu_reset_proxy;
 			cpu_reset_proxy_active = 0;
 			printf("cpu_reset: Restarting BSP\n");
 			started_cpus = (1<<0);		/* Restart CPU #0 */
 
 			cnt = 0;
 			while (cpu_reset_proxy_active == 0 && cnt < 10000000)
 				cnt++;	/* Wait for BSP to announce restart */
 			if (cpu_reset_proxy_active == 0)
 				printf("cpu_reset: Failed to restart BSP\n");
 			enable_intr();
 			cpu_reset_proxy_active = 2;
 
 			while (1);
 			/* NOTREACHED */
 		}
 	}
 #else
 	cpu_reset_real();
 #endif
 }
 
 static void
 cpu_reset_real()
 {
 
 #ifdef PC98
 	/*
 	 * Attempt to do a CPU reset via CPU reset port.
 	 */
 	disable_intr();
 	if ((inb(0x35) & 0xa0) != 0xa0) {
 		outb(0x37, 0x0f);		/* SHUT0 = 0. */
 		outb(0x37, 0x0b);		/* SHUT1 = 0. */
 	}
 	outb(0xf0, 0x00);		/* Reset. */
 #else
 	/*
 	 * Attempt to do a CPU reset via the keyboard controller,
 	 * do not turn of the GateA20, as any machine that fails
 	 * to do the reset here would then end up in no man's land.
 	 */
 
 #if !defined(BROKEN_KEYBOARD_RESET)
 	outb(IO_KBD + 4, 0xFE);
 	DELAY(500000);	/* wait 0.5 sec to see if that did it */
 	printf("Keyboard reset did not work, attempting CPU shutdown\n");
 	DELAY(1000000);	/* wait 1 sec for printf to complete */
 #endif
 #endif /* PC98 */
 	/* force a shutdown by unmapping entire address space ! */
 	bzero((caddr_t) PTD, PAGE_SIZE);
 
 	/* "good night, sweet prince .... <THUNK!>" */
 	invltlb();
 	/* NOTREACHED */
 	while(1);
 }
 
 int
 grow_stack(p, sp)
 	struct proc *p;
 	u_int sp;
 {
 	int rv;
 
 	rv = vm_map_growstack (p, sp);
 	if (rv != KERN_SUCCESS)
 		return (0);
 
 	return (1);
 }
 
 /*
  * Software interrupt handler for queued VM system processing.
  */   
 void  
 swi_vm(void *dummy) 
 {     
 	if (busdma_swi_pending != 0)
 		busdma_swi();
 }
 
 /*
  * Tell whether this address is in some physical memory region.
  * Currently used by the kernel coredump code in order to avoid
  * dumping the ``ISA memory hole'' which could cause indefinite hangs,
  * or other unpredictable behaviour.
  */
 
 int
 is_physical_memory(addr)
 	vm_offset_t addr;
 {
 
 #ifdef DEV_ISA
 	/* The ISA ``memory hole''. */
 	if (addr >= 0xa0000 && addr < 0x100000)
 		return 0;
 #endif
 
 	/*
 	 * stuff other tests for known memory-mapped devices (PCI?)
 	 * here
 	 */
 
 	return 1;
 }
Index: head/sys/amd64/conf/GENERIC
===================================================================
--- head/sys/amd64/conf/GENERIC	(revision 82308)
+++ head/sys/amd64/conf/GENERIC	(revision 82309)
@@ -1,235 +1,237 @@
 #
 # GENERIC -- Generic kernel configuration file for FreeBSD/i386
 #
 # For more information on this file, please read the handbook section on
 # Kernel Configuration Files:
 #
 #    http://www.FreeBSD.org/handbook/kernelconfig-config.html
 #
 # The handbook is also available locally in /usr/share/doc/handbook
 # if you've installed the doc distribution, otherwise always see the
 # FreeBSD World Wide Web server (http://www.FreeBSD.org/) for the
 # latest information.
 #
 # An exhaustive list of options and more detailed explanations of the
 # device lines is also present in the NOTES configuration file. If you are
 # in doubt as to the purpose or necessity of a line, check first in NOTES.
 #
 # $FreeBSD$
 
 machine		i386
 cpu		I486_CPU
 cpu		I586_CPU
 cpu		I686_CPU
 ident		GENERIC
 maxusers	32
 
 #To statically compile in device wiring instead of /boot/device.hints
-#hints		"GENERIC.hints"		#Default places to look for devices.
+hints		"GENERIC.hints"		#Default places to look for devices.
 
 makeoptions	DEBUG=-g		#Build kernel with gdb(1) debug symbols
 
 options 	MATH_EMULATE		#Support for x87 emulation
 options 	INET			#InterNETworking
 options 	INET6			#IPv6 communications protocols
 options 	FFS			#Berkeley Fast Filesystem
 options 	SOFTUPDATES		#Enable FFS soft updates support
 options 	MD_ROOT			#MD is a potential root device
 options 	NFS			#Network Filesystem
 options 	NFS_ROOT		#NFS usable as root device, NFS required
 options 	MSDOSFS			#MSDOS Filesystem
 options 	CD9660			#ISO 9660 Filesystem
 options 	PROCFS			#Process filesystem
 options 	COMPAT_43		#Compatible with BSD 4.3 [KEEP THIS!]
 options 	SCSI_DELAY=15000	#Delay (in ms) before probing SCSI
 options 	UCONSOLE		#Allow users to grab the console
 #options 	USERCONFIG		#boot -c editor
 #options 	VISUAL_USERCONFIG	#visual boot -c editor
 options 	KTRACE			#ktrace(1) support
 options 	SYSVSHM			#SYSV-style shared memory
 options 	SYSVMSG			#SYSV-style message queues
 options 	SYSVSEM			#SYSV-style semaphores
 options 	P1003_1B		#Posix P1003_1B real-time extensions
 options 	_KPOSIX_PRIORITY_SCHEDULING
 options 	KBD_INSTALL_CDEV	# install a CDEV entry in /dev
 
 # Debugging for use in -current
 options 	DDB
 options 	INVARIANTS
 options 	INVARIANT_SUPPORT
 options 	WITNESS
+options 	UPAGES=4
+options		CPU_ENABLE_SSE
 
 # To make an SMP kernel, the next two are needed
 #options 	SMP			# Symmetric MultiProcessor Kernel
 #options 	APIC_IO			# Symmetric (APIC) I/O
 
 device		isa
 device		eisa
 device		pci
 
 # Floppy drives
 device		fdc
 
 # ATA and ATAPI devices
 device		ata
 device		atadisk			# ATA disk drives
 device		atapicd			# ATAPI CDROM drives
 device		atapifd			# ATAPI floppy drives
 device		atapist			# ATAPI tape drives
 options 	ATA_STATIC_ID		#Static device numbering
 
 # SCSI Controllers
 device		ahb		# EISA AHA1742 family
 device		ahc		# AHA2940 and onboard AIC7xxx devices
 device		amd		# AMD 53C974 (Tekram DC-390(T))
 device		isp		# Qlogic family
 #device		ncr		# NCR/Symbios Logic
 device		sym		# NCR/Symbios Logic (newer chipsets + those of `ncr')
 
 device		adv		# Advansys SCSI adapters
 device		adw		# Advansys wide SCSI adapters
 device		aha		# Adaptec 154x SCSI adapters
 device		aic		# Adaptec 15[012]x SCSI adapters, AIC-6[23]60.
 device		bt		# Buslogic/Mylex MultiMaster SCSI adapters
 
 device		ncv		# NCR 53C500
 device		nsp		# Workbit Ninja SCSI-3
 device		stg		# TMC 18C30/18C50
 
 # RAID controllers interfaced to the SCSI subsystem
 device		asr		# DPT SmartRAID V, VI and Adaptec SCSI RAID
 device		dpt		# DPT Smartcache III, IV - See NOTES for options!
 device		mly		# Mylex AcceleRAID/eXtremeRAID
 
 # SCSI peripherals
 device		scbus		# SCSI bus (required)
 device		da		# Direct Access (disks)
 device		sa		# Sequential Access (tape etc)
 device		cd		# CD
 device		pass		# Passthrough device (direct SCSI access)
 
 # RAID controllers
 device		aac		# Adaptec FSA RAID
 device		amr		# AMI MegaRAID
 device		ida		# Compaq Smart RAID
 device		mlx		# Mylex DAC960 family
 device		twe		# 3ware ATA RAID
 
 # atkbdc0 controls both the keyboard and the PS/2 mouse
 device		atkbdc	1	# At keyboard controller
 device		atkbd		# at keyboard
 device		psm		# psm mouse
 
 device		vga		# VGA screen
 
 # splash screen/screen saver
 device		splash
 
 # syscons is the default console driver, resembling an SCO console
 device		sc	1
 
 # Enable this for the pcvt (VT220 compatible) console driver
 #device		vt
 #options 	XSERVER			# support for X server on a vt console
 #options 	FAT_CURSOR		# start with block cursor
 
 # Floating point support - do not disable.
 device		npx
 
 # Power management support (see NOTES for more options)
 device		apm
 # Add suspend/resume support for the i8254.
 device		pmtimer
 
 # PCCARD (PCMCIA) support
 device		card		# pccard bus
 device		pcic		# PCMCIA bridge
 
 # Serial (COM) ports
 device		sio		# 8250, 16[45]50 based serial ports
 
 # Parallel port
 device		ppc
 device		ppbus		# Parallel port bus (required)
 device		lpt		# Printer
 device		plip		# TCP/IP over parallel
 device		ppi		# Parallel port interface device
 #device		vpo		# Requires scbus and da
 
 
 # PCI Ethernet NICs.
 device		de		# DEC/Intel DC21x4x (``Tulip'')
 device		vx		# 3Com 3c590, 3c595 (``Vortex'')
 device		txp		# 3Com 3cR990 (``Typhoon'')
 
 # PCI Ethernet NICs that use the common MII bus controller code.
 # NOTE: Be sure to keep the 'device miibus' line in order to use these NICs!
 device		miibus		# MII bus support
 device		dc		# DEC/Intel 21143 and various workalikes
 device		fxp		# Intel EtherExpress PRO/100B (82557, 82558)
 device		pcn		# AMD Am79C97x PCI 10/100 NICs
 device		rl		# RealTek 8129/8139
 device		sf		# Adaptec AIC-6915 (``Starfire'')
 device		sis		# Silicon Integrated Systems SiS 900/SiS 7016
 device		ste		# Sundance ST201 (D-Link DFE-550TX)
 device		tl		# Texas Instruments ThunderLAN
 device		tx		# SMC EtherPower II (83c170 ``EPIC'')
 device		vr		# VIA Rhine, Rhine II
 device		wb		# Winbond W89C840F
 device		wx		# Intel Gigabit Ethernet Card (``Wiseman'')
 device		xl		# 3Com 3c90x (``Boomerang'', ``Cyclone'')
 
 # ISA Ethernet NICs.  pccard nics included.
 device		cs		# Crystal Semiconductor CS89x0 NIC
 # 'device ed' requires 'device miibus'
 device		ed		# NE[12]000, SMC Ultra, 3c503, DS8390 cards
 device		ex		# Intel EtherExpress Pro/10 and Pro/10+
 device		ep		# Etherlink III based cards
 device		fe		# Fujitsu MB8696x based cards
 device		sn		# SMC's 9000 series of ethernet chips
 device		xe		# Xircom pccard ethernet
 
 # The probe order of these is presently determined by i386/isa/isa_compat.c.
 #device		ie
 #device		le
 device		lnc
 
 # Wireless NIC cards
 device		an		# Aironet 4500/4800 802.11 wireless NICs. 
 device		awi		# BayStack 660 and others
 device		wi		# WaveLAN/IEEE 802.11 wireless NICs. 
 #device		wl		# Older non 802.11 Wavelan wireless NIC.
 
 # Pseudo devices - the number indicates how many units to allocate.
 device		random		# Entropy device
 device		loop		# Network loopback
 device		ether		# Ethernet support
 device		sl		# Kernel SLIP
 device		ppp	1	# Kernel PPP
 device		tun		# Packet tunnel.
 device		pty		# Pseudo-ttys (telnet etc)
 device		md		# Memory "disks"
 device		gif		# IPv6 and IPv4 tunneling
 device		faith	1	# IPv6-to-IPv4 relaying (translation)
 
 # The `bpf' device enables the Berkeley Packet Filter.
 # Be aware of the administrative consequences of enabling this!
 device		bpf		# Berkeley packet filter
 
 # USB support
 device		uhci		# UHCI PCI->USB interface
 device		ohci		# OHCI PCI->USB interface
 device		usb		# USB Bus (required)
 #device		udbp		# USB Double Bulk Pipe devices
 device		ugen		# Generic
 device		uhid		# "Human Interface Devices"
 device		ukbd		# Keyboard
 device		ulpt		# Printer
 device		umass		# Disks/Mass storage - Requires scbus and da
 device		ums		# Mouse
 device		urio		# Diamond Rio 500 MP3 player
 device		uscanner	# Scanners
 # USB Ethernet, requires mii
 device		aue		# ADMtek USB ethernet
 device		cue		# CATC USB ethernet
 device		kue		# Kawasaki LSI USB ethernet
Index: head/sys/amd64/include/mptable.h
===================================================================
--- head/sys/amd64/include/mptable.h	(revision 82308)
+++ head/sys/amd64/include/mptable.h	(revision 82309)
@@ -1,2440 +1,2442 @@
 /*
  * Copyright (c) 1996, by Steve Passe
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. The name of the developer may NOT be used to endorse or promote products
  *    derived from this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #include "opt_cpu.h"
+#include "opt_upages.h"
 
 #ifdef SMP
 #include <machine/smptests.h>
 #else
 #error
 #endif
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/bus.h>
 #include <sys/cons.h>	/* cngetc() */
 #include <sys/dkstat.h>
 #ifdef GPROF 
 #include <sys/gmon.h>
 #endif
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/memrange.h>
 #include <sys/mutex.h>
 #include <sys/pcpu.h>
 #include <sys/proc.h>
 #include <sys/smp.h>
 #include <sys/sysctl.h>
 #include <sys/user.h>
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/pmap.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_map.h>
 
 #include <machine/apic.h>
 #include <machine/atomic.h>
 #include <machine/cpu.h>
 #include <machine/cpufunc.h>
 #include <machine/mpapic.h>
 #include <machine/psl.h>
 #include <machine/segments.h>
 #include <machine/smptests.h>	/** TEST_DEFAULT_CONFIG, TEST_TEST1 */
 #include <machine/tss.h>
 #include <machine/specialreg.h>
 #include <machine/globaldata.h>
+#include <machine/privatespace.h>
 
 #if defined(APIC_IO)
 #include <machine/md_var.h>		/* setidt() */
 #include <i386/isa/icu.h>		/* IPIs */
 #include <i386/isa/intr_machdep.h>	/* IPIs */
 #endif	/* APIC_IO */
 
 #if defined(TEST_DEFAULT_CONFIG)
 #define MPFPS_MPFB1	TEST_DEFAULT_CONFIG
 #else
 #define MPFPS_MPFB1	mpfps->mpfb1
 #endif  /* TEST_DEFAULT_CONFIG */
 
 #define WARMBOOT_TARGET		0
 #define WARMBOOT_OFF		(KERNBASE + 0x0467)
 #define WARMBOOT_SEG		(KERNBASE + 0x0469)
 
 #ifdef PC98
 #define BIOS_BASE		(0xe8000)
 #define BIOS_SIZE		(0x18000)
 #else
 #define BIOS_BASE		(0xf0000)
 #define BIOS_SIZE		(0x10000)
 #endif
 #define BIOS_COUNT		(BIOS_SIZE/4)
 
 #define CMOS_REG		(0x70)
 #define CMOS_DATA		(0x71)
 #define BIOS_RESET		(0x0f)
 #define BIOS_WARM		(0x0a)
 
 #define PROCENTRY_FLAG_EN	0x01
 #define PROCENTRY_FLAG_BP	0x02
 #define IOAPICENTRY_FLAG_EN	0x01
 
 
 /* MP Floating Pointer Structure */
 typedef struct MPFPS {
 	char    signature[4];
 	void   *pap;
 	u_char  length;
 	u_char  spec_rev;
 	u_char  checksum;
 	u_char  mpfb1;
 	u_char  mpfb2;
 	u_char  mpfb3;
 	u_char  mpfb4;
 	u_char  mpfb5;
 }      *mpfps_t;
 
 /* MP Configuration Table Header */
 typedef struct MPCTH {
 	char    signature[4];
 	u_short base_table_length;
 	u_char  spec_rev;
 	u_char  checksum;
 	u_char  oem_id[8];
 	u_char  product_id[12];
 	void   *oem_table_pointer;
 	u_short oem_table_size;
 	u_short entry_count;
 	void   *apic_address;
 	u_short extended_table_length;
 	u_char  extended_table_checksum;
 	u_char  reserved;
 }      *mpcth_t;
 
 
 typedef struct PROCENTRY {
 	u_char  type;
 	u_char  apic_id;
 	u_char  apic_version;
 	u_char  cpu_flags;
 	u_long  cpu_signature;
 	u_long  feature_flags;
 	u_long  reserved1;
 	u_long  reserved2;
 }      *proc_entry_ptr;
 
 typedef struct BUSENTRY {
 	u_char  type;
 	u_char  bus_id;
 	char    bus_type[6];
 }      *bus_entry_ptr;
 
 typedef struct IOAPICENTRY {
 	u_char  type;
 	u_char  apic_id;
 	u_char  apic_version;
 	u_char  apic_flags;
 	void   *apic_address;
 }      *io_apic_entry_ptr;
 
 typedef struct INTENTRY {
 	u_char  type;
 	u_char  int_type;
 	u_short int_flags;
 	u_char  src_bus_id;
 	u_char  src_bus_irq;
 	u_char  dst_apic_id;
 	u_char  dst_apic_int;
 }      *int_entry_ptr;
 
 /* descriptions of MP basetable entries */
 typedef struct BASETABLE_ENTRY {
 	u_char  type;
 	u_char  length;
 	char    name[16];
 }       basetable_entry;
 
 /*
  * this code MUST be enabled here and in mpboot.s.
  * it follows the very early stages of AP boot by placing values in CMOS ram.
  * it NORMALLY will never be needed and thus the primitive method for enabling.
  *
 #define CHECK_POINTS
  */
 
 #if defined(CHECK_POINTS) && !defined(PC98)
 #define CHECK_READ(A)	 (outb(CMOS_REG, (A)), inb(CMOS_DATA))
 #define CHECK_WRITE(A,D) (outb(CMOS_REG, (A)), outb(CMOS_DATA, (D)))
 
 #define CHECK_INIT(D);				\
 	CHECK_WRITE(0x34, (D));			\
 	CHECK_WRITE(0x35, (D));			\
 	CHECK_WRITE(0x36, (D));			\
 	CHECK_WRITE(0x37, (D));			\
 	CHECK_WRITE(0x38, (D));			\
 	CHECK_WRITE(0x39, (D));
 
 #define CHECK_PRINT(S);				\
 	printf("%s: %d, %d, %d, %d, %d, %d\n",	\
 	   (S),					\
 	   CHECK_READ(0x34),			\
 	   CHECK_READ(0x35),			\
 	   CHECK_READ(0x36),			\
 	   CHECK_READ(0x37),			\
 	   CHECK_READ(0x38),			\
 	   CHECK_READ(0x39));
 
 #else				/* CHECK_POINTS */
 
 #define CHECK_INIT(D)
 #define CHECK_PRINT(S)
 
 #endif				/* CHECK_POINTS */
 
 /*
  * Values to send to the POST hardware.
  */
 #define MP_BOOTADDRESS_POST	0x10
 #define MP_PROBE_POST		0x11
 #define MPTABLE_PASS1_POST	0x12
 
 #define MP_START_POST		0x13
 #define MP_ENABLE_POST		0x14
 #define MPTABLE_PASS2_POST	0x15
 
 #define START_ALL_APS_POST	0x16
 #define INSTALL_AP_TRAMP_POST	0x17
 #define START_AP_POST		0x18
 
 #define MP_ANNOUNCE_POST	0x19
 
 /* used to hold the AP's until we are ready to release them */
 static struct mtx ap_boot_mtx;
 
 /** XXX FIXME: where does this really belong, isa.h/isa.c perhaps? */
 int	current_postcode;
 
 /** XXX FIXME: what system files declare these??? */
 extern struct region_descriptor r_gdt, r_idt;
 
 int	bsp_apic_ready = 0;	/* flags useability of BSP apic */
 int	mp_naps;		/* # of Applications processors */
 int	mp_nbusses;		/* # of busses */
 int	mp_napics;		/* # of IO APICs */
 int	boot_cpu_id;		/* designated BSP */
 vm_offset_t cpu_apic_address;
 vm_offset_t io_apic_address[NAPICID];	/* NAPICID is more than enough */
 extern	int nkpt;
 
 u_int32_t cpu_apic_versions[MAXCPU];
 u_int32_t *io_apic_versions;
 
 #ifdef APIC_INTR_REORDER
 struct {
 	volatile int *location;
 	int bit;
 } apic_isrbit_location[32];
 #endif
 
 struct apic_intmapinfo	int_to_apicintpin[APIC_INTMAPSIZE];
 
 /*
  * APIC ID logical/physical mapping structures.
  * We oversize these to simplify boot-time config.
  */
 int     cpu_num_to_apic_id[NAPICID];
 int     io_num_to_apic_id[NAPICID];
 int     apic_id_to_logical[NAPICID];
 
 
 /* AP uses this during bootstrap.  Do not staticize.  */
 char *bootSTK;
 static int bootAP;
 
 /* Hotwire a 0->4MB V==P mapping */
 extern pt_entry_t *KPTphys;
 
 /* SMP page table page */
 extern pt_entry_t *SMPpt;
 
 struct pcb stoppcbs[MAXCPU];
 
 int invltlb_ok = 0;	/* throttle smp_invltlb() till safe */
 SYSCTL_INT(_machdep, OID_AUTO, invltlb_ok, CTLFLAG_RW, &invltlb_ok, 0, "");
 
 /*
  * Local data and functions.
  */
 
 /* Set to 1 once we're ready to let the APs out of the pen. */
 static volatile int aps_ready = 0;
 
 static int	mp_capable;
 static u_int	boot_address;
 static u_int	base_memory;
 
 static int	picmode;		/* 0: virtual wire mode, 1: PIC mode */
 static mpfps_t	mpfps;
 static int	search_for_sig(u_int32_t target, int count);
 static void	mp_enable(u_int boot_addr);
 
 static void	mptable_pass1(void);
 static int	mptable_pass2(void);
 static void	default_mp_table(int type);
 static void	fix_mp_table(void);
 static void	setup_apic_irq_mapping(void);
 static void	init_locks(void);
 static int	start_all_aps(u_int boot_addr);
 static void	install_ap_tramp(u_int boot_addr);
 static int	start_ap(int logicalCpu, u_int boot_addr);
 void		ap_init(void);
 static int	apic_int_is_bus_type(int intr, int bus_type);
 static void	release_aps(void *dummy);
 
 /*
  * initialize all the SMP locks
  */
 
 /* critical region around IO APIC, apic_imen */
 struct mtx		imen_mtx;
 
 /* lock region used by kernel profiling */
 int	mcount_lock;
 
 #ifdef USE_COMLOCK
 /* locks com (tty) data/hardware accesses: a FASTINTR() */
 struct mtx		com_mtx;
 #endif /* USE_COMLOCK */
 
 static void
 init_locks(void)
 {
 
 #ifdef USE_COMLOCK
 	mtx_init(&com_mtx, "com", MTX_SPIN);
 #endif /* USE_COMLOCK */
 }
 
 /*
  * Calculate usable address in base memory for AP trampoline code.
  */
 u_int
 mp_bootaddress(u_int basemem)
 {
 	POSTCODE(MP_BOOTADDRESS_POST);
 
 	base_memory = basemem * 1024;	/* convert to bytes */
 
 	boot_address = base_memory & ~0xfff;	/* round down to 4k boundary */
 	if ((base_memory - boot_address) < bootMP_size)
 		boot_address -= 4096;	/* not enough, lower by 4k */
 
 	return boot_address;
 }
 
 
 /*
  * Look for an Intel MP spec table (ie, SMP capable hardware).
  */
 void
 i386_mp_probe(void)
 {
 	int     x;
 	u_long  segment;
 	u_int32_t target;
 
 	POSTCODE(MP_PROBE_POST);
 
 	/* see if EBDA exists */
 	if ((segment = (u_long) * (u_short *) (KERNBASE + 0x40e)) != 0) {
 		/* search first 1K of EBDA */
 		target = (u_int32_t) (segment << 4);
 		if ((x = search_for_sig(target, 1024 / 4)) >= 0)
 			goto found;
 	} else {
 		/* last 1K of base memory, effective 'top of base' passed in */
 		target = (u_int32_t) (base_memory - 0x400);
 		if ((x = search_for_sig(target, 1024 / 4)) >= 0)
 			goto found;
 	}
 
 	/* search the BIOS */
 	target = (u_int32_t) BIOS_BASE;
 	if ((x = search_for_sig(target, BIOS_COUNT)) >= 0)
 		goto found;
 
 	/* nothing found */
 	mpfps = (mpfps_t)0;
 	mp_capable = 0;
 	return;
 
 found:
 	/* calculate needed resources */
 	mpfps = (mpfps_t)x;
 	mptable_pass1();
 
 	/* flag fact that we are running multiple processors */
 	mp_capable = 1;
 }
 
 int
 cpu_mp_probe(void)
 {
 	/*
 	 * Record BSP in CPU map
 	 * This is done here so that MBUF init code works correctly.
 	 */
 	all_cpus = 1;
 
 	return (mp_capable);
 }
 
 /*
  * Initialize the SMP hardware and the APIC and start up the AP's.
  */
 void
 cpu_mp_start(void)
 {
 	POSTCODE(MP_START_POST);
 
 	/* look for MP capable motherboard */
 	if (mp_capable)
 		mp_enable(boot_address);
 	else
 		panic("MP hardware not found!");
 
 	cpu_setregs();
 }
 
 
 /*
  * Print various information about the SMP system hardware and setup.
  */
 void
 cpu_mp_announce(void)
 {
 	int     x;
 
 	POSTCODE(MP_ANNOUNCE_POST);
 
 	printf(" cpu0 (BSP): apic id: %2d", CPU_TO_ID(0));
 	printf(", version: 0x%08x", cpu_apic_versions[0]);
 	printf(", at 0x%08x\n", cpu_apic_address);
 	for (x = 1; x <= mp_naps; ++x) {
 		printf(" cpu%d (AP):  apic id: %2d", x, CPU_TO_ID(x));
 		printf(", version: 0x%08x", cpu_apic_versions[x]);
 		printf(", at 0x%08x\n", cpu_apic_address);
 	}
 
 #if defined(APIC_IO)
 	for (x = 0; x < mp_napics; ++x) {
 		printf(" io%d (APIC): apic id: %2d", x, IO_TO_ID(x));
 		printf(", version: 0x%08x", io_apic_versions[x]);
 		printf(", at 0x%08x\n", io_apic_address[x]);
 	}
 #else
 	printf(" Warning: APIC I/O disabled\n");
 #endif	/* APIC_IO */
 }
 
 /*
  * AP cpu's call this to sync up protected mode.
  */
 void
 init_secondary(void)
 {
 	int	gsel_tss;
 	int	x, myid = bootAP;
 
 	gdt_segs[GPRIV_SEL].ssd_base = (int) &SMP_prvspace[myid];
 	gdt_segs[GPROC0_SEL].ssd_base =
 		(int) &SMP_prvspace[myid].globaldata.gd_common_tss;
 	SMP_prvspace[myid].globaldata.gd_prvspace =
 		&SMP_prvspace[myid].globaldata;
 
 	for (x = 0; x < NGDT; x++) {
 		ssdtosd(&gdt_segs[x], &gdt[myid * NGDT + x].sd);
 	}
 
 	r_gdt.rd_limit = NGDT * sizeof(gdt[0]) - 1;
 	r_gdt.rd_base = (int) &gdt[myid * NGDT];
 	lgdt(&r_gdt);			/* does magic intra-segment return */
 
 	lidt(&r_idt);
 
 	lldt(_default_ldt);
 	PCPU_SET(currentldt, _default_ldt);
 
 	gsel_tss = GSEL(GPROC0_SEL, SEL_KPL);
 	gdt[myid * NGDT + GPROC0_SEL].sd.sd_type = SDT_SYS386TSS;
 	PCPU_SET(common_tss.tss_esp0, 0); /* not used until after switch */
 	PCPU_SET(common_tss.tss_ss0, GSEL(GDATA_SEL, SEL_KPL));
 	PCPU_SET(common_tss.tss_ioopt, (sizeof (struct i386tss)) << 16);
 	PCPU_SET(tss_gdt, &gdt[myid * NGDT + GPROC0_SEL].sd);
 	PCPU_SET(common_tssd, *PCPU_GET(tss_gdt));
 	ltr(gsel_tss);
 
 	pmap_set_opt();
 }
 
 
 #if defined(APIC_IO)
 /*
  * Final configuration of the BSP's local APIC:
  *  - disable 'pic mode'.
  *  - disable 'virtual wire mode'.
  *  - enable NMI.
  */
 void
 bsp_apic_configure(void)
 {
 	u_char		byte;
 	u_int32_t	temp;
 
 	/* leave 'pic mode' if necessary */
 	if (picmode) {
 		outb(0x22, 0x70);	/* select IMCR */
 		byte = inb(0x23);	/* current contents */
 		byte |= 0x01;		/* mask external INTR */
 		outb(0x23, byte);	/* disconnect 8259s/NMI */
 	}
 
 	/* mask lint0 (the 8259 'virtual wire' connection) */
 	temp = lapic.lvt_lint0;
 	temp |= APIC_LVT_M;		/* set the mask */
 	lapic.lvt_lint0 = temp;
 
         /* setup lint1 to handle NMI */
         temp = lapic.lvt_lint1;
         temp &= ~APIC_LVT_M;		/* clear the mask */
         lapic.lvt_lint1 = temp;
 
 	if (bootverbose)
 		apic_dump("bsp_apic_configure()");
 }
 #endif  /* APIC_IO */
 
 
 /*******************************************************************
  * local functions and data
  */
 
 /*
  * start the SMP system
  */
 static void
 mp_enable(u_int boot_addr)
 {
 	int     x;
 #if defined(APIC_IO)
 	int     apic;
 	u_int   ux;
 #endif	/* APIC_IO */
 
 	POSTCODE(MP_ENABLE_POST);
 
 	/* turn on 4MB of V == P addressing so we can get to MP table */
 	*(int *)PTD = PG_V | PG_RW | ((uintptr_t)(void *)KPTphys & PG_FRAME);
 	invltlb();
 
 	/* examine the MP table for needed info, uses physical addresses */
 	x = mptable_pass2();
 
 	*(int *)PTD = 0;
 	invltlb();
 
 	/* can't process default configs till the CPU APIC is pmapped */
 	if (x)
 		default_mp_table(x);
 
 	/* post scan cleanup */
 	fix_mp_table();
 	setup_apic_irq_mapping();
 
 #if defined(APIC_IO)
 
 	/* fill the LOGICAL io_apic_versions table */
 	for (apic = 0; apic < mp_napics; ++apic) {
 		ux = io_apic_read(apic, IOAPIC_VER);
 		io_apic_versions[apic] = ux;
 		io_apic_set_id(apic, IO_TO_ID(apic));
 	}
 
 	/* program each IO APIC in the system */
 	for (apic = 0; apic < mp_napics; ++apic)
 		if (io_apic_setup(apic) < 0)
 			panic("IO APIC setup failure");
 
 	/* install a 'Spurious INTerrupt' vector */
 	setidt(XSPURIOUSINT_OFFSET, Xspuriousint,
 	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
 
 	/* install an inter-CPU IPI for TLB invalidation */
 	setidt(XINVLTLB_OFFSET, Xinvltlb,
 	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
 
 	/* install an inter-CPU IPI for forwarding hardclock() */
 	setidt(XHARDCLOCK_OFFSET, Xhardclock,
 	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
 	
 	/* install an inter-CPU IPI for forwarding statclock() */
 	setidt(XSTATCLOCK_OFFSET, Xstatclock,
 	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
 	
 	/* install an inter-CPU IPI for all-CPU rendezvous */
 	setidt(XRENDEZVOUS_OFFSET, Xrendezvous,
 	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
 
 	/* install an inter-CPU IPI for forcing an additional software trap */
 	setidt(XCPUAST_OFFSET, Xcpuast,
 	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
 
 	/* install an inter-CPU IPI for CPU stop/restart */
 	setidt(XCPUSTOP_OFFSET, Xcpustop,
 	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
 
 #if defined(TEST_TEST1)
 	/* install a "fake hardware INTerrupt" vector */
 	setidt(XTEST1_OFFSET, Xtest1,
 	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
 #endif  /** TEST_TEST1 */
 
 #endif	/* APIC_IO */
 
 	/* initialize all SMP locks */
 	init_locks();
 
 	/* start each Application Processor */
 	start_all_aps(boot_addr);
 }
 
 
 /*
  * look for the MP spec signature
  */
 
 /* string defined by the Intel MP Spec as identifying the MP table */
 #define MP_SIG		0x5f504d5f	/* _MP_ */
 #define NEXT(X)		((X) += 4)
 static int
 search_for_sig(u_int32_t target, int count)
 {
 	int     x;
 	u_int32_t *addr = (u_int32_t *) (KERNBASE + target);
 
 	for (x = 0; x < count; NEXT(x))
 		if (addr[x] == MP_SIG)
 			/* make array index a byte index */
 			return (target + (x * sizeof(u_int32_t)));
 
 	return -1;
 }
 
 
 static basetable_entry basetable_entry_types[] =
 {
 	{0, 20, "Processor"},
 	{1, 8, "Bus"},
 	{2, 8, "I/O APIC"},
 	{3, 8, "I/O INT"},
 	{4, 8, "Local INT"}
 };
 
 typedef struct BUSDATA {
 	u_char  bus_id;
 	enum busTypes bus_type;
 }       bus_datum;
 
 typedef struct INTDATA {
 	u_char  int_type;
 	u_short int_flags;
 	u_char  src_bus_id;
 	u_char  src_bus_irq;
 	u_char  dst_apic_id;
 	u_char  dst_apic_int;
 	u_char	int_vector;
 }       io_int, local_int;
 
 typedef struct BUSTYPENAME {
 	u_char  type;
 	char    name[7];
 }       bus_type_name;
 
 static bus_type_name bus_type_table[] =
 {
 	{CBUS, "CBUS"},
 	{CBUSII, "CBUSII"},
 	{EISA, "EISA"},
 	{MCA, "MCA"},
 	{UNKNOWN_BUSTYPE, "---"},
 	{ISA, "ISA"},
 	{MCA, "MCA"},
 	{UNKNOWN_BUSTYPE, "---"},
 	{UNKNOWN_BUSTYPE, "---"},
 	{UNKNOWN_BUSTYPE, "---"},
 	{UNKNOWN_BUSTYPE, "---"},
 	{UNKNOWN_BUSTYPE, "---"},
 	{PCI, "PCI"},
 	{UNKNOWN_BUSTYPE, "---"},
 	{UNKNOWN_BUSTYPE, "---"},
 	{UNKNOWN_BUSTYPE, "---"},
 	{UNKNOWN_BUSTYPE, "---"},
 	{XPRESS, "XPRESS"},
 	{UNKNOWN_BUSTYPE, "---"}
 };
 /* from MP spec v1.4, table 5-1 */
 static int default_data[7][5] =
 {
 /*   nbus, id0, type0, id1, type1 */
 	{1, 0, ISA, 255, 255},
 	{1, 0, EISA, 255, 255},
 	{1, 0, EISA, 255, 255},
 	{1, 0, MCA, 255, 255},
 	{2, 0, ISA, 1, PCI},
 	{2, 0, EISA, 1, PCI},
 	{2, 0, MCA, 1, PCI}
 };
 
 
 /* the bus data */
 static bus_datum *bus_data;
 
 /* the IO INT data, one entry per possible APIC INTerrupt */
 static io_int  *io_apic_ints;
 
 static int nintrs;
 
 static int processor_entry	__P((proc_entry_ptr entry, int cpu));
 static int bus_entry		__P((bus_entry_ptr entry, int bus));
 static int io_apic_entry	__P((io_apic_entry_ptr entry, int apic));
 static int int_entry		__P((int_entry_ptr entry, int intr));
 static int lookup_bus_type	__P((char *name));
 
 
 /*
  * 1st pass on motherboard's Intel MP specification table.
  *
  * initializes:
  *	mp_ncpus = 1
  *
  * determines:
  *	cpu_apic_address (common to all CPUs)
  *	io_apic_address[N]
  *	mp_naps
  *	mp_nbusses
  *	mp_napics
  *	nintrs
  */
 static void
 mptable_pass1(void)
 {
 	int	x;
 	mpcth_t	cth;
 	int	totalSize;
 	void*	position;
 	int	count;
 	int	type;
 
 	POSTCODE(MPTABLE_PASS1_POST);
 
 	/* clear various tables */
 	for (x = 0; x < NAPICID; ++x) {
 		io_apic_address[x] = ~0;	/* IO APIC address table */
 	}
 
 	/* init everything to empty */
 	mp_naps = 0;
 	mp_nbusses = 0;
 	mp_napics = 0;
 	nintrs = 0;
 
 	/* check for use of 'default' configuration */
 	if (MPFPS_MPFB1 != 0) {
 		/* use default addresses */
 		cpu_apic_address = DEFAULT_APIC_BASE;
 		io_apic_address[0] = DEFAULT_IO_APIC_BASE;
 
 		/* fill in with defaults */
 		mp_naps = 2;		/* includes BSP */
 		mp_nbusses = default_data[MPFPS_MPFB1 - 1][0];
 #if defined(APIC_IO)
 		mp_napics = 1;
 		nintrs = 16;
 #endif	/* APIC_IO */
 	}
 	else {
 		if ((cth = mpfps->pap) == 0)
 			panic("MP Configuration Table Header MISSING!");
 
 		cpu_apic_address = (vm_offset_t) cth->apic_address;
 
 		/* walk the table, recording info of interest */
 		totalSize = cth->base_table_length - sizeof(struct MPCTH);
 		position = (u_char *) cth + sizeof(struct MPCTH);
 		count = cth->entry_count;
 
 		while (count--) {
 			switch (type = *(u_char *) position) {
 			case 0: /* processor_entry */
 				if (((proc_entry_ptr)position)->cpu_flags
 					& PROCENTRY_FLAG_EN)
 					++mp_naps;
 				break;
 			case 1: /* bus_entry */
 				++mp_nbusses;
 				break;
 			case 2: /* io_apic_entry */
 				if (((io_apic_entry_ptr)position)->apic_flags
 					& IOAPICENTRY_FLAG_EN)
 					io_apic_address[mp_napics++] =
 					    (vm_offset_t)((io_apic_entry_ptr)
 						position)->apic_address;
 				break;
 			case 3: /* int_entry */
 				++nintrs;
 				break;
 			case 4:	/* int_entry */
 				break;
 			default:
 				panic("mpfps Base Table HOSED!");
 				/* NOTREACHED */
 			}
 
 			totalSize -= basetable_entry_types[type].length;
 			(u_char*)position += basetable_entry_types[type].length;
 		}
 	}
 
 	/* qualify the numbers */
 	if (mp_naps > MAXCPU) {
 		printf("Warning: only using %d of %d available CPUs!\n",
 			MAXCPU, mp_naps);
 		mp_naps = MAXCPU;
 	}
 
 	/*
 	 * Count the BSP.
 	 * This is also used as a counter while starting the APs.
 	 */
 	mp_ncpus = 1;
 
 	--mp_naps;	/* subtract the BSP */
 }
 
 
 /*
  * 2nd pass on motherboard's Intel MP specification table.
  *
  * sets:
  *	boot_cpu_id
  *	ID_TO_IO(N), phy APIC ID to log CPU/IO table
  *	CPU_TO_ID(N), logical CPU to APIC ID table
  *	IO_TO_ID(N), logical IO to APIC ID table
  *	bus_data[N]
  *	io_apic_ints[N]
  */
 static int
 mptable_pass2(void)
 {
 	int     x;
 	mpcth_t cth;
 	int     totalSize;
 	void*   position;
 	int     count;
 	int     type;
 	int     apic, bus, cpu, intr;
 	int	i, j;
 	int	pgeflag;
 
 	POSTCODE(MPTABLE_PASS2_POST);
 
 	pgeflag = 0;		/* XXX - Not used under SMP yet.  */
 
 	MALLOC(io_apic_versions, u_int32_t *, sizeof(u_int32_t) * mp_napics,
 	    M_DEVBUF, M_WAITOK);
 	MALLOC(ioapic, volatile ioapic_t **, sizeof(ioapic_t *) * mp_napics,
 	    M_DEVBUF, M_WAITOK);
 	MALLOC(io_apic_ints, io_int *, sizeof(io_int) * (nintrs + 1),
 	    M_DEVBUF, M_WAITOK);
 	MALLOC(bus_data, bus_datum *, sizeof(bus_datum) * mp_nbusses,
 	    M_DEVBUF, M_WAITOK);
 
 	bzero(ioapic, sizeof(ioapic_t *) * mp_napics);
 
 	for (i = 0; i < mp_napics; i++) {
 		for (j = 0; j < mp_napics; j++) {
 			/* same page frame as a previous IO apic? */
 			if (((vm_offset_t)SMPpt[NPTEPG-2-j] & PG_FRAME) ==
 			    (io_apic_address[i] & PG_FRAME)) {
 				ioapic[i] = (ioapic_t *)((u_int)SMP_prvspace
 					+ (NPTEPG-2-j) * PAGE_SIZE
 					+ (io_apic_address[i] & PAGE_MASK));
 				break;
 			}
 			/* use this slot if available */
 			if (((vm_offset_t)SMPpt[NPTEPG-2-j] & PG_FRAME) == 0) {
 				SMPpt[NPTEPG-2-j] = (pt_entry_t)(PG_V | PG_RW |
 				    pgeflag | (io_apic_address[i] & PG_FRAME));
 				ioapic[i] = (ioapic_t *)((u_int)SMP_prvspace
 					+ (NPTEPG-2-j) * PAGE_SIZE
 					+ (io_apic_address[i] & PAGE_MASK));
 				break;
 			}
 		}
 	}
 
 	/* clear various tables */
 	for (x = 0; x < NAPICID; ++x) {
 		ID_TO_IO(x) = -1;	/* phy APIC ID to log CPU/IO table */
 		CPU_TO_ID(x) = -1;	/* logical CPU to APIC ID table */
 		IO_TO_ID(x) = -1;	/* logical IO to APIC ID table */
 	}
 
 	/* clear bus data table */
 	for (x = 0; x < mp_nbusses; ++x)
 		bus_data[x].bus_id = 0xff;
 
 	/* clear IO APIC INT table */
 	for (x = 0; x < (nintrs + 1); ++x) {
 		io_apic_ints[x].int_type = 0xff;
 		io_apic_ints[x].int_vector = 0xff;
 	}
 
 	/* setup the cpu/apic mapping arrays */
 	boot_cpu_id = -1;
 
 	/* record whether PIC or virtual-wire mode */
 	picmode = (mpfps->mpfb2 & 0x80) ? 1 : 0;
 
 	/* check for use of 'default' configuration */
 	if (MPFPS_MPFB1 != 0)
 		return MPFPS_MPFB1;	/* return default configuration type */
 
 	if ((cth = mpfps->pap) == 0)
 		panic("MP Configuration Table Header MISSING!");
 
 	/* walk the table, recording info of interest */
 	totalSize = cth->base_table_length - sizeof(struct MPCTH);
 	position = (u_char *) cth + sizeof(struct MPCTH);
 	count = cth->entry_count;
 	apic = bus = intr = 0;
 	cpu = 1;				/* pre-count the BSP */
 
 	while (count--) {
 		switch (type = *(u_char *) position) {
 		case 0:
 			if (processor_entry(position, cpu))
 				++cpu;
 			break;
 		case 1:
 			if (bus_entry(position, bus))
 				++bus;
 			break;
 		case 2:
 			if (io_apic_entry(position, apic))
 				++apic;
 			break;
 		case 3:
 			if (int_entry(position, intr))
 				++intr;
 			break;
 		case 4:
 			/* int_entry(position); */
 			break;
 		default:
 			panic("mpfps Base Table HOSED!");
 			/* NOTREACHED */
 		}
 
 		totalSize -= basetable_entry_types[type].length;
 		(u_char *) position += basetable_entry_types[type].length;
 	}
 
 	if (boot_cpu_id == -1)
 		panic("NO BSP found!");
 
 	/* report fact that its NOT a default configuration */
 	return 0;
 }
 
 
 void
 assign_apic_irq(int apic, int intpin, int irq)
 {
 	int x;
 	
 	if (int_to_apicintpin[irq].ioapic != -1)
 		panic("assign_apic_irq: inconsistent table");
 	
 	int_to_apicintpin[irq].ioapic = apic;
 	int_to_apicintpin[irq].int_pin = intpin;
 	int_to_apicintpin[irq].apic_address = ioapic[apic];
 	int_to_apicintpin[irq].redirindex = IOAPIC_REDTBL + 2 * intpin;
 	
 	for (x = 0; x < nintrs; x++) {
 		if ((io_apic_ints[x].int_type == 0 || 
 		     io_apic_ints[x].int_type == 3) &&
 		    io_apic_ints[x].int_vector == 0xff &&
 		    io_apic_ints[x].dst_apic_id == IO_TO_ID(apic) &&
 		    io_apic_ints[x].dst_apic_int == intpin)
 			io_apic_ints[x].int_vector = irq;
 	}
 }
 
 void
 revoke_apic_irq(int irq)
 {
 	int x;
 	int oldapic;
 	int oldintpin;
 	
 	if (int_to_apicintpin[irq].ioapic == -1)
 		panic("assign_apic_irq: inconsistent table");
 	
 	oldapic = int_to_apicintpin[irq].ioapic;
 	oldintpin = int_to_apicintpin[irq].int_pin;
 
 	int_to_apicintpin[irq].ioapic = -1;
 	int_to_apicintpin[irq].int_pin = 0;
 	int_to_apicintpin[irq].apic_address = NULL;
 	int_to_apicintpin[irq].redirindex = 0;
 	
 	for (x = 0; x < nintrs; x++) {
 		if ((io_apic_ints[x].int_type == 0 || 
 		     io_apic_ints[x].int_type == 3) &&
 		    io_apic_ints[x].int_vector == 0xff &&
 		    io_apic_ints[x].dst_apic_id == IO_TO_ID(oldapic) &&
 		    io_apic_ints[x].dst_apic_int == oldintpin)
 			io_apic_ints[x].int_vector = 0xff;
 	}
 }
 
 
 static void
 allocate_apic_irq(int intr)
 {
 	int apic;
 	int intpin;
 	int irq;
 	
 	if (io_apic_ints[intr].int_vector != 0xff)
 		return;		/* Interrupt handler already assigned */
 	
 	if (io_apic_ints[intr].int_type != 0 &&
 	    (io_apic_ints[intr].int_type != 3 ||
 	     (io_apic_ints[intr].dst_apic_id == IO_TO_ID(0) &&
 	      io_apic_ints[intr].dst_apic_int == 0)))
 		return;		/* Not INT or ExtInt on != (0, 0) */
 	
 	irq = 0;
 	while (irq < APIC_INTMAPSIZE &&
 	       int_to_apicintpin[irq].ioapic != -1)
 		irq++;
 	
 	if (irq >= APIC_INTMAPSIZE)
 		return;		/* No free interrupt handlers */
 	
 	apic = ID_TO_IO(io_apic_ints[intr].dst_apic_id);
 	intpin = io_apic_ints[intr].dst_apic_int;
 	
 	assign_apic_irq(apic, intpin, irq);
 	io_apic_setup_intpin(apic, intpin);
 }
 
 
 static void
 swap_apic_id(int apic, int oldid, int newid)
 {
 	int x;
 	int oapic;
 	
 
 	if (oldid == newid)
 		return;			/* Nothing to do */
 	
 	printf("Changing APIC ID for IO APIC #%d from %d to %d in MP table\n",
 	       apic, oldid, newid);
 	
 	/* Swap physical APIC IDs in interrupt entries */
 	for (x = 0; x < nintrs; x++) {
 		if (io_apic_ints[x].dst_apic_id == oldid)
 			io_apic_ints[x].dst_apic_id = newid;
 		else if (io_apic_ints[x].dst_apic_id == newid)
 			io_apic_ints[x].dst_apic_id = oldid;
 	}
 	
 	/* Swap physical APIC IDs in IO_TO_ID mappings */
 	for (oapic = 0; oapic < mp_napics; oapic++)
 		if (IO_TO_ID(oapic) == newid)
 			break;
 	
 	if (oapic < mp_napics) {
 		printf("Changing APIC ID for IO APIC #%d from "
 		       "%d to %d in MP table\n",
 		       oapic, newid, oldid);
 		IO_TO_ID(oapic) = oldid;
 	}
 	IO_TO_ID(apic) = newid;
 }
 
 
 static void
 fix_id_to_io_mapping(void)
 {
 	int x;
 
 	for (x = 0; x < NAPICID; x++)
 		ID_TO_IO(x) = -1;
 	
 	for (x = 0; x <= mp_naps; x++)
 		if (CPU_TO_ID(x) < NAPICID)
 			ID_TO_IO(CPU_TO_ID(x)) = x;
 	
 	for (x = 0; x < mp_napics; x++)
 		if (IO_TO_ID(x) < NAPICID)
 			ID_TO_IO(IO_TO_ID(x)) = x;
 }
 
 
 static int
 first_free_apic_id(void)
 {
 	int freeid, x;
 	
 	for (freeid = 0; freeid < NAPICID; freeid++) {
 		for (x = 0; x <= mp_naps; x++)
 			if (CPU_TO_ID(x) == freeid)
 				break;
 		if (x <= mp_naps)
 			continue;
 		for (x = 0; x < mp_napics; x++)
 			if (IO_TO_ID(x) == freeid)
 				break;
 		if (x < mp_napics)
 			continue;
 		return freeid;
 	}
 	return freeid;
 }
 
 
 static int
 io_apic_id_acceptable(int apic, int id)
 {
 	int cpu;		/* Logical CPU number */
 	int oapic;		/* Logical IO APIC number for other IO APIC */
 
 	if (id >= NAPICID)
 		return 0;	/* Out of range */
 	
 	for (cpu = 0; cpu <= mp_naps; cpu++)
 		if (CPU_TO_ID(cpu) == id)
 			return 0;	/* Conflict with CPU */
 	
 	for (oapic = 0; oapic < mp_napics && oapic < apic; oapic++)
 		if (IO_TO_ID(oapic) == id)
 			return 0;	/* Conflict with other APIC */
 	
 	return 1;		/* ID is acceptable for IO APIC */
 }
 
 
 /*
  * parse an Intel MP specification table
  */
 static void
 fix_mp_table(void)
 {
 	int	x;
 	int	id;
 	int	bus_0 = 0;	/* Stop GCC warning */
 	int	bus_pci = 0;	/* Stop GCC warning */
 	int	num_pci_bus;
 	int	apic;		/* IO APIC unit number */
 	int     freeid;		/* Free physical APIC ID */
 	int	physid;		/* Current physical IO APIC ID */
 
 	/*
 	 * Fix mis-numbering of the PCI bus and its INT entries if the BIOS
 	 * did it wrong.  The MP spec says that when more than 1 PCI bus
 	 * exists the BIOS must begin with bus entries for the PCI bus and use
 	 * actual PCI bus numbering.  This implies that when only 1 PCI bus
 	 * exists the BIOS can choose to ignore this ordering, and indeed many
 	 * MP motherboards do ignore it.  This causes a problem when the PCI
 	 * sub-system makes requests of the MP sub-system based on PCI bus
 	 * numbers.	So here we look for the situation and renumber the
 	 * busses and associated INTs in an effort to "make it right".
 	 */
 
 	/* find bus 0, PCI bus, count the number of PCI busses */
 	for (num_pci_bus = 0, x = 0; x < mp_nbusses; ++x) {
 		if (bus_data[x].bus_id == 0) {
 			bus_0 = x;
 		}
 		if (bus_data[x].bus_type == PCI) {
 			++num_pci_bus;
 			bus_pci = x;
 		}
 	}
 	/*
 	 * bus_0 == slot of bus with ID of 0
 	 * bus_pci == slot of last PCI bus encountered
 	 */
 
 	/* check the 1 PCI bus case for sanity */
 	/* if it is number 0 all is well */
 	if (num_pci_bus == 1 &&
 	    bus_data[bus_pci].bus_id != 0) {
 		
 		/* mis-numbered, swap with whichever bus uses slot 0 */
 
 		/* swap the bus entry types */
 		bus_data[bus_pci].bus_type = bus_data[bus_0].bus_type;
 		bus_data[bus_0].bus_type = PCI;
 
 		/* swap each relavant INTerrupt entry */
 		id = bus_data[bus_pci].bus_id;
 		for (x = 0; x < nintrs; ++x) {
 			if (io_apic_ints[x].src_bus_id == id) {
 				io_apic_ints[x].src_bus_id = 0;
 			}
 			else if (io_apic_ints[x].src_bus_id == 0) {
 				io_apic_ints[x].src_bus_id = id;
 			}
 		}
 	}
 
 	/* Assign IO APIC IDs.
 	 * 
 	 * First try the existing ID. If a conflict is detected, try
 	 * the ID in the MP table.  If a conflict is still detected, find
 	 * a free id.
 	 *
 	 * We cannot use the ID_TO_IO table before all conflicts has been
 	 * resolved and the table has been corrected.
 	 */
 	for (apic = 0; apic < mp_napics; ++apic) { /* For all IO APICs */
 		
 		/* First try to use the value set by the BIOS */
 		physid = io_apic_get_id(apic);
 		if (io_apic_id_acceptable(apic, physid)) {
 			if (IO_TO_ID(apic) != physid)
 				swap_apic_id(apic, IO_TO_ID(apic), physid);
 			continue;
 		}
 
 		/* Then check if the value in the MP table is acceptable */
 		if (io_apic_id_acceptable(apic, IO_TO_ID(apic)))
 			continue;
 
 		/* Last resort, find a free APIC ID and use it */
 		freeid = first_free_apic_id();
 		if (freeid >= NAPICID)
 			panic("No free physical APIC IDs found");
 		
 		if (io_apic_id_acceptable(apic, freeid)) {
 			swap_apic_id(apic, IO_TO_ID(apic), freeid);
 			continue;
 		}
 		panic("Free physical APIC ID not usable");
 	}
 	fix_id_to_io_mapping();
 
 	/* detect and fix broken Compaq MP table */
 	if (apic_int_type(0, 0) == -1) {
 		printf("APIC_IO: MP table broken: 8259->APIC entry missing!\n");
 		io_apic_ints[nintrs].int_type = 3;	/* ExtInt */
 		io_apic_ints[nintrs].int_vector = 0xff;	/* Unassigned */
 		/* XXX fixme, set src bus id etc, but it doesn't seem to hurt */
 		io_apic_ints[nintrs].dst_apic_id = IO_TO_ID(0);
 		io_apic_ints[nintrs].dst_apic_int = 0;	/* Pin 0 */
 		nintrs++;
 	}
 }
 
 
 /* Assign low level interrupt handlers */
 static void
 setup_apic_irq_mapping(void)
 {
 	int	x;
 	int	int_vector;
 
 	/* Clear array */
 	for (x = 0; x < APIC_INTMAPSIZE; x++) {
 		int_to_apicintpin[x].ioapic = -1;
 		int_to_apicintpin[x].int_pin = 0;
 		int_to_apicintpin[x].apic_address = NULL;
 		int_to_apicintpin[x].redirindex = 0;
 	}
 
 	/* First assign ISA/EISA interrupts */
 	for (x = 0; x < nintrs; x++) {
 		int_vector = io_apic_ints[x].src_bus_irq;
 		if (int_vector < APIC_INTMAPSIZE &&
 		    io_apic_ints[x].int_vector == 0xff && 
 		    int_to_apicintpin[int_vector].ioapic == -1 &&
 		    (apic_int_is_bus_type(x, ISA) ||
 		     apic_int_is_bus_type(x, EISA)) &&
 		    io_apic_ints[x].int_type == 0) {
 			assign_apic_irq(ID_TO_IO(io_apic_ints[x].dst_apic_id), 
 					io_apic_ints[x].dst_apic_int,
 					int_vector);
 		}
 	}
 
 	/* Assign ExtInt entry if no ISA/EISA interrupt 0 entry */
 	for (x = 0; x < nintrs; x++) {
 		if (io_apic_ints[x].dst_apic_int == 0 &&
 		    io_apic_ints[x].dst_apic_id == IO_TO_ID(0) &&
 		    io_apic_ints[x].int_vector == 0xff && 
 		    int_to_apicintpin[0].ioapic == -1 &&
 		    io_apic_ints[x].int_type == 3) {
 			assign_apic_irq(0, 0, 0);
 			break;
 		}
 	}
 	/* PCI interrupt assignment is deferred */
 }
 
 
 static int
 processor_entry(proc_entry_ptr entry, int cpu)
 {
 	/* check for usability */
 	if (!(entry->cpu_flags & PROCENTRY_FLAG_EN))
 		return 0;
 
 	if(entry->apic_id >= NAPICID)
 		panic("CPU APIC ID out of range (0..%d)", NAPICID - 1);
 	/* check for BSP flag */
 	if (entry->cpu_flags & PROCENTRY_FLAG_BP) {
 		boot_cpu_id = entry->apic_id;
 		CPU_TO_ID(0) = entry->apic_id;
 		ID_TO_CPU(entry->apic_id) = 0;
 		return 0;	/* its already been counted */
 	}
 
 	/* add another AP to list, if less than max number of CPUs */
 	else if (cpu < MAXCPU) {
 		CPU_TO_ID(cpu) = entry->apic_id;
 		ID_TO_CPU(entry->apic_id) = cpu;
 		return 1;
 	}
 
 	return 0;
 }
 
 
 static int
 bus_entry(bus_entry_ptr entry, int bus)
 {
 	int     x;
 	char    c, name[8];
 
 	/* encode the name into an index */
 	for (x = 0; x < 6; ++x) {
 		if ((c = entry->bus_type[x]) == ' ')
 			break;
 		name[x] = c;
 	}
 	name[x] = '\0';
 
 	if ((x = lookup_bus_type(name)) == UNKNOWN_BUSTYPE)
 		panic("unknown bus type: '%s'", name);
 
 	bus_data[bus].bus_id = entry->bus_id;
 	bus_data[bus].bus_type = x;
 
 	return 1;
 }
 
 
 static int
 io_apic_entry(io_apic_entry_ptr entry, int apic)
 {
 	if (!(entry->apic_flags & IOAPICENTRY_FLAG_EN))
 		return 0;
 
 	IO_TO_ID(apic) = entry->apic_id;
 	if (entry->apic_id < NAPICID)
 		ID_TO_IO(entry->apic_id) = apic;
 
 	return 1;
 }
 
 
 static int
 lookup_bus_type(char *name)
 {
 	int     x;
 
 	for (x = 0; x < MAX_BUSTYPE; ++x)
 		if (strcmp(bus_type_table[x].name, name) == 0)
 			return bus_type_table[x].type;
 
 	return UNKNOWN_BUSTYPE;
 }
 
 
 static int
 int_entry(int_entry_ptr entry, int intr)
 {
 	int apic;
 
 	io_apic_ints[intr].int_type = entry->int_type;
 	io_apic_ints[intr].int_flags = entry->int_flags;
 	io_apic_ints[intr].src_bus_id = entry->src_bus_id;
 	io_apic_ints[intr].src_bus_irq = entry->src_bus_irq;
 	if (entry->dst_apic_id == 255) {
 		/* This signal goes to all IO APICS.  Select an IO APIC
 		   with sufficient number of interrupt pins */
 		for (apic = 0; apic < mp_napics; apic++)
 			if (((io_apic_read(apic, IOAPIC_VER) & 
 			      IOART_VER_MAXREDIR) >> MAXREDIRSHIFT) >= 
 			    entry->dst_apic_int)
 				break;
 		if (apic < mp_napics)
 			io_apic_ints[intr].dst_apic_id = IO_TO_ID(apic);
 		else
 			io_apic_ints[intr].dst_apic_id = entry->dst_apic_id;
 	} else
 		io_apic_ints[intr].dst_apic_id = entry->dst_apic_id;
 	io_apic_ints[intr].dst_apic_int = entry->dst_apic_int;
 
 	return 1;
 }
 
 
 static int
 apic_int_is_bus_type(int intr, int bus_type)
 {
 	int     bus;
 
 	for (bus = 0; bus < mp_nbusses; ++bus)
 		if ((bus_data[bus].bus_id == io_apic_ints[intr].src_bus_id)
 		    && ((int) bus_data[bus].bus_type == bus_type))
 			return 1;
 
 	return 0;
 }
 
 
 /*
  * Given a traditional ISA INT mask, return an APIC mask.
  */
 u_int
 isa_apic_mask(u_int isa_mask)
 {
 	int isa_irq;
 	int apic_pin;
 
 #if defined(SKIP_IRQ15_REDIRECT)
 	if (isa_mask == (1 << 15)) {
 		printf("skipping ISA IRQ15 redirect\n");
 		return isa_mask;
 	}
 #endif  /* SKIP_IRQ15_REDIRECT */
 
 	isa_irq = ffs(isa_mask);		/* find its bit position */
 	if (isa_irq == 0)			/* doesn't exist */
 		return 0;
 	--isa_irq;				/* make it zero based */
 
 	apic_pin = isa_apic_irq(isa_irq);	/* look for APIC connection */
 	if (apic_pin == -1)
 		return 0;
 
 	return (1 << apic_pin);			/* convert pin# to a mask */
 }
 
 
 /*
  * Determine which APIC pin an ISA/EISA INT is attached to.
  */
 #define INTTYPE(I)	(io_apic_ints[(I)].int_type)
 #define INTPIN(I)	(io_apic_ints[(I)].dst_apic_int)
 #define INTIRQ(I)	(io_apic_ints[(I)].int_vector)
 #define INTAPIC(I)	(ID_TO_IO(io_apic_ints[(I)].dst_apic_id))
 
 #define SRCBUSIRQ(I)	(io_apic_ints[(I)].src_bus_irq)
 int
 isa_apic_irq(int isa_irq)
 {
 	int     intr;
 
 	for (intr = 0; intr < nintrs; ++intr) {		/* check each record */
 		if (INTTYPE(intr) == 0) {		/* standard INT */
 			if (SRCBUSIRQ(intr) == isa_irq) {
 				if (apic_int_is_bus_type(intr, ISA) ||
 			            apic_int_is_bus_type(intr, EISA)) {
 					if (INTIRQ(intr) == 0xff)
 						return -1; /* unassigned */
 					return INTIRQ(intr);	/* found */
 				}
 			}
 		}
 	}
 	return -1;					/* NOT found */
 }
 
 
 /*
  * Determine which APIC pin a PCI INT is attached to.
  */
 #define SRCBUSID(I)	(io_apic_ints[(I)].src_bus_id)
 #define SRCBUSDEVICE(I)	((io_apic_ints[(I)].src_bus_irq >> 2) & 0x1f)
 #define SRCBUSLINE(I)	(io_apic_ints[(I)].src_bus_irq & 0x03)
 int
 pci_apic_irq(int pciBus, int pciDevice, int pciInt)
 {
 	int     intr;
 
 	--pciInt;					/* zero based */
 
 	for (intr = 0; intr < nintrs; ++intr)		/* check each record */
 		if ((INTTYPE(intr) == 0)		/* standard INT */
 		    && (SRCBUSID(intr) == pciBus)
 		    && (SRCBUSDEVICE(intr) == pciDevice)
 		    && (SRCBUSLINE(intr) == pciInt))	/* a candidate IRQ */
 			if (apic_int_is_bus_type(intr, PCI)) {
 				if (INTIRQ(intr) == 0xff)
 					allocate_apic_irq(intr);
 				if (INTIRQ(intr) == 0xff)
 					return -1;	/* unassigned */
 				return INTIRQ(intr);	/* exact match */
 			}
 
 	return -1;					/* NOT found */
 }
 
 int
 next_apic_irq(int irq) 
 {
 	int intr, ointr;
 	int bus, bustype;
 
 	bus = 0;
 	bustype = 0;
 	for (intr = 0; intr < nintrs; intr++) {
 		if (INTIRQ(intr) != irq || INTTYPE(intr) != 0)
 			continue;
 		bus = SRCBUSID(intr);
 		bustype = apic_bus_type(bus);
 		if (bustype != ISA &&
 		    bustype != EISA &&
 		    bustype != PCI)
 			continue;
 		break;
 	}
 	if (intr >= nintrs) {
 		return -1;
 	}
 	for (ointr = intr + 1; ointr < nintrs; ointr++) {
 		if (INTTYPE(ointr) != 0)
 			continue;
 		if (bus != SRCBUSID(ointr))
 			continue;
 		if (bustype == PCI) {
 			if (SRCBUSDEVICE(intr) != SRCBUSDEVICE(ointr))
 				continue;
 			if (SRCBUSLINE(intr) != SRCBUSLINE(ointr))
 				continue;
 		}
 		if (bustype == ISA || bustype == EISA) {
 			if (SRCBUSIRQ(intr) != SRCBUSIRQ(ointr))
 				continue;
 		}
 		if (INTPIN(intr) == INTPIN(ointr))
 			continue;
 		break;
 	}
 	if (ointr >= nintrs) {
 		return -1;
 	}
 	return INTIRQ(ointr);
 }
 #undef SRCBUSLINE
 #undef SRCBUSDEVICE
 #undef SRCBUSID
 #undef SRCBUSIRQ
 
 #undef INTPIN
 #undef INTIRQ
 #undef INTAPIC
 #undef INTTYPE
 
 
 /*
  * Reprogram the MB chipset to NOT redirect an ISA INTerrupt.
  *
  * XXX FIXME:
  *  Exactly what this means is unclear at this point.  It is a solution
  *  for motherboards that redirect the MBIRQ0 pin.  Generically a motherboard
  *  could route any of the ISA INTs to upper (>15) IRQ values.  But most would
  *  NOT be redirected via MBIRQ0, thus "undirect()ing" them would NOT be an
  *  option.
  */
 int
 undirect_isa_irq(int rirq)
 {
 #if defined(READY)
 	if (bootverbose)
 	    printf("Freeing redirected ISA irq %d.\n", rirq);
 	/** FIXME: tickle the MB redirector chip */
 	return -1;
 #else
 	if (bootverbose)
 	    printf("Freeing (NOT implemented) redirected ISA irq %d.\n", rirq);
 	return 0;
 #endif  /* READY */
 }
 
 
 /*
  * Reprogram the MB chipset to NOT redirect a PCI INTerrupt
  */
 int
 undirect_pci_irq(int rirq)
 {
 #if defined(READY)
 	if (bootverbose)
 		printf("Freeing redirected PCI irq %d.\n", rirq);
 
 	/** FIXME: tickle the MB redirector chip */
 	return -1;
 #else
 	if (bootverbose)
 		printf("Freeing (NOT implemented) redirected PCI irq %d.\n",
 		       rirq);
 	return 0;
 #endif  /* READY */
 }
 
 
 /*
  * given a bus ID, return:
  *  the bus type if found
  *  -1 if NOT found
  */
 int
 apic_bus_type(int id)
 {
 	int     x;
 
 	for (x = 0; x < mp_nbusses; ++x)
 		if (bus_data[x].bus_id == id)
 			return bus_data[x].bus_type;
 
 	return -1;
 }
 
 
 /*
  * given a LOGICAL APIC# and pin#, return:
  *  the associated src bus ID if found
  *  -1 if NOT found
  */
 int
 apic_src_bus_id(int apic, int pin)
 {
 	int     x;
 
 	/* search each of the possible INTerrupt sources */
 	for (x = 0; x < nintrs; ++x)
 		if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
 		    (pin == io_apic_ints[x].dst_apic_int))
 			return (io_apic_ints[x].src_bus_id);
 
 	return -1;		/* NOT found */
 }
 
 
 /*
  * given a LOGICAL APIC# and pin#, return:
  *  the associated src bus IRQ if found
  *  -1 if NOT found
  */
 int
 apic_src_bus_irq(int apic, int pin)
 {
 	int     x;
 
 	for (x = 0; x < nintrs; x++)
 		if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
 		    (pin == io_apic_ints[x].dst_apic_int))
 			return (io_apic_ints[x].src_bus_irq);
 
 	return -1;		/* NOT found */
 }
 
 
 /*
  * given a LOGICAL APIC# and pin#, return:
  *  the associated INTerrupt type if found
  *  -1 if NOT found
  */
 int
 apic_int_type(int apic, int pin)
 {
 	int     x;
 
 	/* search each of the possible INTerrupt sources */
 	for (x = 0; x < nintrs; ++x)
 		if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
 		    (pin == io_apic_ints[x].dst_apic_int))
 			return (io_apic_ints[x].int_type);
 
 	return -1;		/* NOT found */
 }
 
 int 
 apic_irq(int apic, int pin)
 {
 	int x;
 	int res;
 
 	for (x = 0; x < nintrs; ++x)
 		if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
 		    (pin == io_apic_ints[x].dst_apic_int)) {
 			res = io_apic_ints[x].int_vector;
 			if (res == 0xff)
 				return -1;
 			if (apic != int_to_apicintpin[res].ioapic)
 				panic("apic_irq: inconsistent table");
 			if (pin != int_to_apicintpin[res].int_pin)
 				panic("apic_irq inconsistent table (2)");
 			return res;
 		}
 	return -1;
 }
 
 
 /*
  * given a LOGICAL APIC# and pin#, return:
  *  the associated trigger mode if found
  *  -1 if NOT found
  */
 int
 apic_trigger(int apic, int pin)
 {
 	int     x;
 
 	/* search each of the possible INTerrupt sources */
 	for (x = 0; x < nintrs; ++x)
 		if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
 		    (pin == io_apic_ints[x].dst_apic_int))
 			return ((io_apic_ints[x].int_flags >> 2) & 0x03);
 
 	return -1;		/* NOT found */
 }
 
 
 /*
  * given a LOGICAL APIC# and pin#, return:
  *  the associated 'active' level if found
  *  -1 if NOT found
  */
 int
 apic_polarity(int apic, int pin)
 {
 	int     x;
 
 	/* search each of the possible INTerrupt sources */
 	for (x = 0; x < nintrs; ++x)
 		if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
 		    (pin == io_apic_ints[x].dst_apic_int))
 			return (io_apic_ints[x].int_flags & 0x03);
 
 	return -1;		/* NOT found */
 }
 
 
 /*
  * set data according to MP defaults
  * FIXME: probably not complete yet...
  */
 static void
 default_mp_table(int type)
 {
 	int     ap_cpu_id;
 #if defined(APIC_IO)
 	int     io_apic_id;
 	int     pin;
 #endif	/* APIC_IO */
 
 #if 0
 	printf("  MP default config type: %d\n", type);
 	switch (type) {
 	case 1:
 		printf("   bus: ISA, APIC: 82489DX\n");
 		break;
 	case 2:
 		printf("   bus: EISA, APIC: 82489DX\n");
 		break;
 	case 3:
 		printf("   bus: EISA, APIC: 82489DX\n");
 		break;
 	case 4:
 		printf("   bus: MCA, APIC: 82489DX\n");
 		break;
 	case 5:
 		printf("   bus: ISA+PCI, APIC: Integrated\n");
 		break;
 	case 6:
 		printf("   bus: EISA+PCI, APIC: Integrated\n");
 		break;
 	case 7:
 		printf("   bus: MCA+PCI, APIC: Integrated\n");
 		break;
 	default:
 		printf("   future type\n");
 		break;
 		/* NOTREACHED */
 	}
 #endif	/* 0 */
 
 	boot_cpu_id = (lapic.id & APIC_ID_MASK) >> 24;
 	ap_cpu_id = (boot_cpu_id == 0) ? 1 : 0;
 
 	/* BSP */
 	CPU_TO_ID(0) = boot_cpu_id;
 	ID_TO_CPU(boot_cpu_id) = 0;
 
 	/* one and only AP */
 	CPU_TO_ID(1) = ap_cpu_id;
 	ID_TO_CPU(ap_cpu_id) = 1;
 
 #if defined(APIC_IO)
 	/* one and only IO APIC */
 	io_apic_id = (io_apic_read(0, IOAPIC_ID) & APIC_ID_MASK) >> 24;
 
 	/*
 	 * sanity check, refer to MP spec section 3.6.6, last paragraph
 	 * necessary as some hardware isn't properly setting up the IO APIC
 	 */
 #if defined(REALLY_ANAL_IOAPICID_VALUE)
 	if (io_apic_id != 2) {
 #else
 	if ((io_apic_id == 0) || (io_apic_id == 1) || (io_apic_id == 15)) {
 #endif	/* REALLY_ANAL_IOAPICID_VALUE */
 		io_apic_set_id(0, 2);
 		io_apic_id = 2;
 	}
 	IO_TO_ID(0) = io_apic_id;
 	ID_TO_IO(io_apic_id) = 0;
 #endif	/* APIC_IO */
 
 	/* fill out bus entries */
 	switch (type) {
 	case 1:
 	case 2:
 	case 3:
 	case 4:
 	case 5:
 	case 6:
 	case 7:
 		bus_data[0].bus_id = default_data[type - 1][1];
 		bus_data[0].bus_type = default_data[type - 1][2];
 		bus_data[1].bus_id = default_data[type - 1][3];
 		bus_data[1].bus_type = default_data[type - 1][4];
 		break;
 
 	/* case 4: case 7:		   MCA NOT supported */
 	default:		/* illegal/reserved */
 		panic("BAD default MP config: %d", type);
 		/* NOTREACHED */
 	}
 
 #if defined(APIC_IO)
 	/* general cases from MP v1.4, table 5-2 */
 	for (pin = 0; pin < 16; ++pin) {
 		io_apic_ints[pin].int_type = 0;
 		io_apic_ints[pin].int_flags = 0x05;	/* edge/active-hi */
 		io_apic_ints[pin].src_bus_id = 0;
 		io_apic_ints[pin].src_bus_irq = pin;	/* IRQ2 caught below */
 		io_apic_ints[pin].dst_apic_id = io_apic_id;
 		io_apic_ints[pin].dst_apic_int = pin;	/* 1-to-1 */
 	}
 
 	/* special cases from MP v1.4, table 5-2 */
 	if (type == 2) {
 		io_apic_ints[2].int_type = 0xff;	/* N/C */
 		io_apic_ints[13].int_type = 0xff;	/* N/C */
 #if !defined(APIC_MIXED_MODE)
 		/** FIXME: ??? */
 		panic("sorry, can't support type 2 default yet");
 #endif	/* APIC_MIXED_MODE */
 	}
 	else
 		io_apic_ints[2].src_bus_irq = 0;	/* ISA IRQ0 is on APIC INT 2 */
 
 	if (type == 7)
 		io_apic_ints[0].int_type = 0xff;	/* N/C */
 	else
 		io_apic_ints[0].int_type = 3;	/* vectored 8259 */
 #endif	/* APIC_IO */
 }
 
 
 /*
  * start each AP in our list
  */
 static int
 start_all_aps(u_int boot_addr)
 {
 	int     x, i, pg;
 	u_char  mpbiosreason;
 	u_long  mpbioswarmvec;
 	struct globaldata *gd;
 	char *stack;
 	uintptr_t kptbase;
 
 	POSTCODE(START_ALL_APS_POST);
 
 	mtx_init(&ap_boot_mtx, "ap boot", MTX_SPIN);
 
 	/* initialize BSP's local APIC */
 	apic_initialize();
 	bsp_apic_ready = 1;
 
 	/* install the AP 1st level boot code */
 	install_ap_tramp(boot_addr);
 
 
 	/* save the current value of the warm-start vector */
 	mpbioswarmvec = *((u_long *) WARMBOOT_OFF);
 #ifndef PC98
 	outb(CMOS_REG, BIOS_RESET);
 	mpbiosreason = inb(CMOS_DATA);
 #endif
 
 	/* set up temporary P==V mapping for AP boot */
 	/* XXX this is a hack, we should boot the AP on its own stack/PTD */
 	kptbase = (uintptr_t)(void *)KPTphys;
 	for (x = 0; x < NKPT; x++)
 		PTD[x] = (pd_entry_t)(PG_V | PG_RW |
 		    ((kptbase + x * PAGE_SIZE) & PG_FRAME));
 	invltlb();
 
 	/* start each AP */
 	for (x = 1; x <= mp_naps; ++x) {
 
 		/* This is a bit verbose, it will go away soon.  */
 
 		/* first page of AP's private space */
 		pg = x * i386_btop(sizeof(struct privatespace));
 
 		/* allocate a new private data page */
 		gd = (struct globaldata *)kmem_alloc(kernel_map, PAGE_SIZE);
 
 		/* wire it into the private page table page */
 		SMPpt[pg] = (pt_entry_t)(PG_V | PG_RW | vtophys(gd));
 
 		/* allocate and set up an idle stack data page */
 		stack = (char *)kmem_alloc(kernel_map, UPAGES*PAGE_SIZE);
 		for (i = 0; i < UPAGES; i++)
 			SMPpt[pg + 1 + i] = (pt_entry_t)
 			    (PG_V | PG_RW | vtophys(PAGE_SIZE * i + stack));
 
 		/* prime data page for it to use */
 		gd->gd_cpuid = x;
 		globaldata_register(gd);
 
 		/* setup a vector to our boot code */
 		*((volatile u_short *) WARMBOOT_OFF) = WARMBOOT_TARGET;
 		*((volatile u_short *) WARMBOOT_SEG) = (boot_addr >> 4);
 #ifndef PC98
 		outb(CMOS_REG, BIOS_RESET);
 		outb(CMOS_DATA, BIOS_WARM);	/* 'warm-start' */
 #endif
 
 		bootSTK = &SMP_prvspace[x].idlestack[UPAGES*PAGE_SIZE];
 		bootAP = x;
 
 		/* attempt to start the Application Processor */
 		CHECK_INIT(99);	/* setup checkpoints */
 		if (!start_ap(x, boot_addr)) {
 			printf("AP #%d (PHY# %d) failed!\n", x, CPU_TO_ID(x));
 			CHECK_PRINT("trace");	/* show checkpoints */
 			/* better panic as the AP may be running loose */
 			printf("panic y/n? [y] ");
 			if (cngetc() != 'n')
 				panic("bye-bye");
 		}
 		CHECK_PRINT("trace");		/* show checkpoints */
 
 		/* record its version info */
 		cpu_apic_versions[x] = cpu_apic_versions[0];
 
 		all_cpus |= (1 << x);		/* record AP in CPU map */
 	}
 
 	/* build our map of 'other' CPUs */
 	PCPU_SET(other_cpus, all_cpus & ~(1 << PCPU_GET(cpuid)));
 
 	/* fill in our (BSP) APIC version */
 	cpu_apic_versions[0] = lapic.version;
 
 	/* restore the warmstart vector */
 	*(u_long *) WARMBOOT_OFF = mpbioswarmvec;
 #ifndef PC98
 	outb(CMOS_REG, BIOS_RESET);
 	outb(CMOS_DATA, mpbiosreason);
 #endif
 
 	/*
 	 * Set up the idle context for the BSP.  Similar to above except
 	 * that some was done by locore, some by pmap.c and some is implicit
 	 * because the BSP is cpu#0 and the page is initially zero, and also
 	 * because we can refer to variables by name on the BSP..
 	 */
 
 	/* Allocate and setup BSP idle stack */
 	stack = (char *)kmem_alloc(kernel_map, UPAGES * PAGE_SIZE);
 	for (i = 0; i < UPAGES; i++)
 		SMPpt[1 + i] = (pt_entry_t)
 		    (PG_V | PG_RW | vtophys(PAGE_SIZE * i + stack));
 
 	for (x = 0; x < NKPT; x++)
 		PTD[x] = 0;
 	pmap_set_opt();
 
 	/* number of APs actually started */
 	return mp_ncpus - 1;
 }
 
 
 /*
  * load the 1st level AP boot code into base memory.
  */
 
 /* targets for relocation */
 extern void bigJump(void);
 extern void bootCodeSeg(void);
 extern void bootDataSeg(void);
 extern void MPentry(void);
 extern u_int MP_GDT;
 extern u_int mp_gdtbase;
 
 static void
 install_ap_tramp(u_int boot_addr)
 {
 	int     x;
 	int     size = *(int *) ((u_long) & bootMP_size);
 	u_char *src = (u_char *) ((u_long) bootMP);
 	u_char *dst = (u_char *) boot_addr + KERNBASE;
 	u_int   boot_base = (u_int) bootMP;
 	u_int8_t *dst8;
 	u_int16_t *dst16;
 	u_int32_t *dst32;
 
 	POSTCODE(INSTALL_AP_TRAMP_POST);
 
 	for (x = 0; x < size; ++x)
 		*dst++ = *src++;
 
 	/*
 	 * modify addresses in code we just moved to basemem. unfortunately we
 	 * need fairly detailed info about mpboot.s for this to work.  changes
 	 * to mpboot.s might require changes here.
 	 */
 
 	/* boot code is located in KERNEL space */
 	dst = (u_char *) boot_addr + KERNBASE;
 
 	/* modify the lgdt arg */
 	dst32 = (u_int32_t *) (dst + ((u_int) & mp_gdtbase - boot_base));
 	*dst32 = boot_addr + ((u_int) & MP_GDT - boot_base);
 
 	/* modify the ljmp target for MPentry() */
 	dst32 = (u_int32_t *) (dst + ((u_int) bigJump - boot_base) + 1);
 	*dst32 = ((u_int) MPentry - KERNBASE);
 
 	/* modify the target for boot code segment */
 	dst16 = (u_int16_t *) (dst + ((u_int) bootCodeSeg - boot_base));
 	dst8 = (u_int8_t *) (dst16 + 1);
 	*dst16 = (u_int) boot_addr & 0xffff;
 	*dst8 = ((u_int) boot_addr >> 16) & 0xff;
 
 	/* modify the target for boot data segment */
 	dst16 = (u_int16_t *) (dst + ((u_int) bootDataSeg - boot_base));
 	dst8 = (u_int8_t *) (dst16 + 1);
 	*dst16 = (u_int) boot_addr & 0xffff;
 	*dst8 = ((u_int) boot_addr >> 16) & 0xff;
 }
 
 
 /*
  * this function starts the AP (application processor) identified
  * by the APIC ID 'physicalCpu'.  It does quite a "song and dance"
  * to accomplish this.  This is necessary because of the nuances
  * of the different hardware we might encounter.  It ain't pretty,
  * but it seems to work.
  */
 static int
 start_ap(int logical_cpu, u_int boot_addr)
 {
 	int     physical_cpu;
 	int     vector;
 	int     cpus;
 	u_long  icr_lo, icr_hi;
 
 	POSTCODE(START_AP_POST);
 
 	/* get the PHYSICAL APIC ID# */
 	physical_cpu = CPU_TO_ID(logical_cpu);
 
 	/* calculate the vector */
 	vector = (boot_addr >> 12) & 0xff;
 
 	/* used as a watchpoint to signal AP startup */
 	cpus = mp_ncpus;
 
 	/*
 	 * first we do an INIT/RESET IPI this INIT IPI might be run, reseting
 	 * and running the target CPU. OR this INIT IPI might be latched (P5
 	 * bug), CPU waiting for STARTUP IPI. OR this INIT IPI might be
 	 * ignored.
 	 */
 
 	/* setup the address for the target AP */
 	icr_hi = lapic.icr_hi & ~APIC_ID_MASK;
 	icr_hi |= (physical_cpu << 24);
 	lapic.icr_hi = icr_hi;
 
 	/* do an INIT IPI: assert RESET */
 	icr_lo = lapic.icr_lo & 0xfff00000;
 	lapic.icr_lo = icr_lo | 0x0000c500;
 
 	/* wait for pending status end */
 	while (lapic.icr_lo & APIC_DELSTAT_MASK)
 		 /* spin */ ;
 
 	/* do an INIT IPI: deassert RESET */
 	lapic.icr_lo = icr_lo | 0x00008500;
 
 	/* wait for pending status end */
 	u_sleep(10000);		/* wait ~10mS */
 	while (lapic.icr_lo & APIC_DELSTAT_MASK)
 		 /* spin */ ;
 
 	/*
 	 * next we do a STARTUP IPI: the previous INIT IPI might still be
 	 * latched, (P5 bug) this 1st STARTUP would then terminate
 	 * immediately, and the previously started INIT IPI would continue. OR
 	 * the previous INIT IPI has already run. and this STARTUP IPI will
 	 * run. OR the previous INIT IPI was ignored. and this STARTUP IPI
 	 * will run.
 	 */
 
 	/* do a STARTUP IPI */
 	lapic.icr_lo = icr_lo | 0x00000600 | vector;
 	while (lapic.icr_lo & APIC_DELSTAT_MASK)
 		 /* spin */ ;
 	u_sleep(200);		/* wait ~200uS */
 
 	/*
 	 * finally we do a 2nd STARTUP IPI: this 2nd STARTUP IPI should run IF
 	 * the previous STARTUP IPI was cancelled by a latched INIT IPI. OR
 	 * this STARTUP IPI will be ignored, as only ONE STARTUP IPI is
 	 * recognized after hardware RESET or INIT IPI.
 	 */
 
 	lapic.icr_lo = icr_lo | 0x00000600 | vector;
 	while (lapic.icr_lo & APIC_DELSTAT_MASK)
 		 /* spin */ ;
 	u_sleep(200);		/* wait ~200uS */
 
 	/* wait for it to start */
 	set_apic_timer(5000000);/* == 5 seconds */
 	while (read_apic_timer())
 		if (mp_ncpus > cpus)
 			return 1;	/* return SUCCESS */
 
 	return 0;		/* return FAILURE */
 }
 
 /*
  * Flush the TLB on all other CPU's
  *
  * XXX: Needs to handshake and wait for completion before proceding.
  */
 void
 smp_invltlb(void)
 {
 #if defined(APIC_IO)
 	if (smp_started && invltlb_ok)
 		ipi_all_but_self(IPI_INVLTLB);
 #endif  /* APIC_IO */
 }
 
 void
 invlpg(u_int addr)
 {
 	__asm   __volatile("invlpg (%0)"::"r"(addr):"memory");
 
 	/* send a message to the other CPUs */
 	smp_invltlb();
 }
 
 void
 invltlb(void)
 {
 	u_long  temp;
 
 	/*
 	 * This should be implemented as load_cr3(rcr3()) when load_cr3() is
 	 * inlined.
 	 */
 	__asm __volatile("movl %%cr3, %0; movl %0, %%cr3":"=r"(temp) :: "memory");
 
 	/* send a message to the other CPUs */
 	smp_invltlb();
 }
 
 
 /*
  * This is called once the rest of the system is up and running and we're
  * ready to let the AP's out of the pen.
  */
 extern void	enable_sse(void);
 
 void
 ap_init(void)
 {
 	u_int	apic_id;
 
 	/* spin until all the AP's are ready */
 	while (!aps_ready)
 		/* spin */ ;
 
 	/*
 	 * Set curproc to our per-cpu idleproc so that mutexes have
 	 * something unique to lock with.
 	 */
 	PCPU_SET(curproc, PCPU_GET(idleproc));
 	PCPU_SET(spinlocks, NULL);
 
 	/* lock against other AP's that are waking up */
 	mtx_lock_spin(&ap_boot_mtx);
 
 	/* BSP may have changed PTD while we're waiting for the lock */
 	cpu_invltlb();
 
 	smp_cpus++;
 
 #if defined(I586_CPU) && !defined(NO_F00F_HACK)
 	lidt(&r_idt);
 #endif
 
 	/* Build our map of 'other' CPUs. */
 	PCPU_SET(other_cpus, all_cpus & ~(1 << PCPU_GET(cpuid)));
 
 	printf("SMP: AP CPU #%d Launched!\n", PCPU_GET(cpuid));
 
 	/* set up CPU registers and state */
 	cpu_setregs();
 
 	/* set up FPU state on the AP */
 	npxinit(__INITIAL_NPXCW__);
 
 	/* set up SSE registers */
 	enable_sse();
 
 	/* A quick check from sanity claus */
 	apic_id = (apic_id_to_logical[(lapic.id & 0x0f000000) >> 24]);
 	if (PCPU_GET(cpuid) != apic_id) {
 		printf("SMP: cpuid = %d\n", PCPU_GET(cpuid));
 		printf("SMP: apic_id = %d\n", apic_id);
 		printf("PTD[MPPTDI] = %p\n", (void *)PTD[MPPTDI]);
 		panic("cpuid mismatch! boom!!");
 	}
 
 	/* Init local apic for irq's */
 	apic_initialize();
 
 	/* Set memory range attributes for this CPU to match the BSP */
 	mem_range_AP_init();
 
 	/*
 	 * Activate smp_invltlb, although strictly speaking, this isn't
 	 * quite correct yet.  We should have a bitfield for cpus willing
 	 * to accept TLB flush IPI's or something and sync them.
 	 */
 	if (smp_cpus == mp_ncpus) {
 		invltlb_ok = 1;
 		smp_started = 1; /* enable IPI's, tlb shootdown, freezes etc */
 		smp_active = 1;	 /* historic */
 	}
 
 	/* let other AP's wake up now */
 	mtx_unlock_spin(&ap_boot_mtx);
 
 	/* wait until all the AP's are up */
 	while (smp_started == 0)
 		; /* nothing */
 
 	microuptime(PCPU_PTR(switchtime));
 	PCPU_SET(switchticks, ticks);
 
 	/* ok, now grab sched_lock and enter the scheduler */
 	enable_intr();
 	mtx_lock_spin(&sched_lock);
 	cpu_throw();	/* doesn't return */
 
 	panic("scheduler returned us to ap_init");
 }
 
 /*
  * For statclock, we send an IPI to all CPU's to have them call this
  * function.
  */
 void
 forwarded_statclock(struct trapframe frame)
 {
 
 	mtx_lock_spin(&sched_lock);
 	statclock_process(curproc, TRAPF_PC(&frame), TRAPF_USERMODE(&frame));
 	mtx_unlock_spin(&sched_lock);
 }
 
 void
 forward_statclock(void)
 {
 	int map;
 
 	CTR0(KTR_SMP, "forward_statclock");
 
 	if (!smp_started || !invltlb_ok || cold || panicstr)
 		return;
 
 	map = PCPU_GET(other_cpus) & ~stopped_cpus ;
 	if (map != 0)
 		ipi_selected(map, IPI_STATCLOCK);
 }
 
 /*
  * For each hardclock(), we send an IPI to all other CPU's to have them
  * execute this function.  It would be nice to reduce contention on
  * sched_lock if we could simply peek at the CPU to determine the user/kernel
  * state and call hardclock_process() on the CPU receiving the clock interrupt
  * and then just use a simple IPI to handle any ast's if needed.
  */
 void
 forwarded_hardclock(struct trapframe frame)
 {
 
 	mtx_lock_spin(&sched_lock);
 	hardclock_process(curproc, TRAPF_USERMODE(&frame));
 	mtx_unlock_spin(&sched_lock);
 }
 
 void 
 forward_hardclock(void)
 {
 	u_int map;
 
 	CTR0(KTR_SMP, "forward_hardclock");
 
 	if (!smp_started || !invltlb_ok || cold || panicstr)
 		return;
 
 	map = PCPU_GET(other_cpus) & ~stopped_cpus ;
 	if (map != 0)
 		ipi_selected(map, IPI_HARDCLOCK);
 }
 
 #ifdef APIC_INTR_REORDER
 /*
  *	Maintain mapping from softintr vector to isr bit in local apic.
  */
 void
 set_lapic_isrloc(int intr, int vector)
 {
 	if (intr < 0 || intr > 32)
 		panic("set_apic_isrloc: bad intr argument: %d",intr);
 	if (vector < ICU_OFFSET || vector > 255)
 		panic("set_apic_isrloc: bad vector argument: %d",vector);
 	apic_isrbit_location[intr].location = &lapic.isr0 + ((vector>>5)<<2);
 	apic_isrbit_location[intr].bit = (1<<(vector & 31));
 }
 #endif
 
 /*
  * send an IPI to a set of cpus.
  */
 void
 ipi_selected(u_int32_t cpus, u_int ipi)
 {
 
 	CTR2(KTR_SMP, __func__ ": cpus: %x ipi: %x", cpus, ipi);
 	selected_apic_ipi(cpus, ipi, APIC_DELMODE_FIXED);
 }
 
 /*
  * send an IPI INTerrupt containing 'vector' to all CPUs, including myself
  */
 void
 ipi_all(u_int ipi)
 {
 
 	CTR1(KTR_SMP, __func__ ": ipi: %x", ipi);
 	apic_ipi(APIC_DEST_ALLISELF, ipi, APIC_DELMODE_FIXED); 
 }
 
 /*
  * send an IPI to all CPUs EXCEPT myself
  */
 void
 ipi_all_but_self(u_int ipi)
 {
 
 	CTR1(KTR_SMP, __func__ ": ipi: %x", ipi);
 	apic_ipi(APIC_DEST_ALLESELF, ipi, APIC_DELMODE_FIXED); 
 }
 
 /*
  * send an IPI to myself
  */
 void
 ipi_self(u_int ipi)
 {
 
 	CTR1(KTR_SMP, __func__ ": ipi: %x", ipi);
 	apic_ipi(APIC_DEST_SELF, ipi, APIC_DELMODE_FIXED); 
 }
 
 void
 release_aps(void *dummy __unused)
 {
 	atomic_store_rel_int(&aps_ready, 1);
 }
 
 SYSINIT(start_aps, SI_SUB_SMP, SI_ORDER_FIRST, release_aps, NULL);
Index: head/sys/amd64/include/pcpu.h
===================================================================
--- head/sys/amd64/include/pcpu.h	(revision 82308)
+++ head/sys/amd64/include/pcpu.h	(revision 82309)
@@ -1,99 +1,79 @@
 /*-
  * Copyright (c) Peter Wemm <peter@netplex.com.au>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef _MACHINE_GLOBALDATA_H_
 #define _MACHINE_GLOBALDATA_H_
 
 #ifdef _KERNEL
 
 #include <machine/segments.h>
 #include <machine/tss.h>
 
 /* XXX */
 #ifdef KTR_PERCPU
 #include <sys/ktr.h>
 #endif
 
 /*
  * This structure maps out the global data that needs to be kept on a
  * per-cpu basis.  genassym uses this to generate offsets for the assembler
  * code, which also provides external symbols so that C can get at them as
  * though they were really globals.
  *
  * The SMP parts are setup in pmap.c and locore.s for the BSP, and
  * mp_machdep.c sets up the data for the AP's to "see" when they awake.
  * The reason for doing it via a struct is so that an array of pointers
  * to each CPU's data can be set up for things like "check curproc on all
  * other processors"
  */
 struct globaldata {
 	struct	globaldata *gd_prvspace;	/* self-reference */
 	struct	proc *gd_curproc;		/* current process */
 	struct	proc *gd_idleproc;		/* idle process */
 	struct	proc *gd_npxproc;
 	struct	pcb *gd_curpcb;			/* current pcb */
 	struct	timeval gd_switchtime;
 	struct	i386tss gd_common_tss;
 	int	gd_switchticks;
 	struct	segment_descriptor gd_common_tssd;
 	struct	segment_descriptor *gd_tss_gdt;
 	int	gd_currentldt;
 	u_int	gd_cpuid;			/* this cpu number */
 	u_int	gd_other_cpus;			/* all other cpus */
 	SLIST_ENTRY(globaldata) gd_allcpu;
 	struct	lock_list_entry *gd_spinlocks;
 #ifdef KTR_PERCPU
 	volatile int	gd_ktr_idx;		/* Index into trace table */
 	char	*gd_ktr_buf;
 	char	gd_ktr_buf_data[KTR_SIZE];
 #endif
 };
 
-#ifdef SMP
-/*
- * This is the upper (0xff800000) address space layout that is per-cpu.
- * It is setup in locore.s and pmap.c for the BSP and in mp_machdep.c for
- * each AP.  genassym helps export this to the assembler code.
- */
-struct privatespace {
-	/* page 0 - data page */
-	struct	globaldata globaldata;
-	char	__filler0[PAGE_SIZE - sizeof(struct globaldata)];
-
-	/* page 1 - idle stack (UPAGES pages) */
-	char	idlestack[UPAGES * PAGE_SIZE];
-	/* page 1+UPAGES... */
-};
-
-extern struct privatespace SMP_prvspace[];
-
-#endif
-
 #endif	/* _KERNEL */
 
 #endif	/* ! _MACHINE_GLOBALDATA_H_ */
Index: head/sys/conf/NOTES
===================================================================
--- head/sys/conf/NOTES	(revision 82308)
+++ head/sys/conf/NOTES	(revision 82309)
@@ -1,2909 +1,2910 @@
 #
 # NOTES -- Lines that can be cut/pasted into kernel and hints configs.
 #
 # Lines that begin with 'device', 'options', 'machine', 'ident', 'maxusers',
 # 'makeoptions', 'hints' etc go into the kernel configuration that you
 # run config(8) with.
 #
 # Lines that begin with 'hints.' are NOT for config(8), they go into your
 # hints file.  See /boot/device.hints and/or the 'hints' config(8) directive.
 #
 # Please use ``make LINT'' to create an old-style LINT file if you want to
 # do kernel test-builds.
 #
 # $FreeBSD$
 #
 
 #
 # This directive is mandatory; it defines the architecture to be
 # configured for; in this case, the 386 family based IBM-PC and
 # compatibles.
 #
 machine		i386
 
 #
 # This is the ``identification'' of the kernel.  Usually this should
 # be the same as the name of your kernel.
 #
 ident		LINT
 
 #
 # The `maxusers' parameter controls the static sizing of a number of
 # internal system tables by a complicated formula defined in param.c.
 #
 maxusers	10
 
 #
 # We want LINT to cover profiling as well
 profile 	2
 
 #
 # The `makeoptions' parameter allows variables to be passed to the
 # generated Makefile in the build area.
 #
 # CONF_CFLAGS gives some extra compiler flags that are added to ${CFLAGS}
 # after most other flags.  Here we use it to inhibit use of non-optimal
 # gcc builtin functions (e.g., memcmp).
 #
 # DEBUG happens to be magic.
 # The following is equivalent to 'config -g KERNELNAME' and creates
 # 'kernel.debug' compiled with -g debugging as well as a normal
 # 'kernel'.  Use 'make install.debug' to install the debug kernel
 # but that isn't normally necessary as the debug symbols are not loaded
 # by the kernel and are not useful there anyway.
 #
 # KERNEL can be overridden so that you can change the default name of your
 # kernel.
 #
 makeoptions	CONF_CFLAGS=-fno-builtin  #Don't allow use of memcmp, etc.
 #makeoptions	DEBUG=-g		#Build kernel with gdb(1) debug symbols
 #makeoptions	KERNEL=foo		#Build kernel "foo" and install "/foo"
 
 #
 # Certain applications can grow to be larger than the 512M limit
 # that FreeBSD initially imposes.  Below are some options to
 # allow that limit to grow to 1GB, and can be increased further
 # with changing the parameters.  MAXDSIZ is the maximum that the
 # limit can be set to, and the DFLDSIZ is the default value for
 # the limit.  MAXSSIZ is the maximum that the stack limit can be
 # set to.  You might want to set the default lower than the max, 
 # and explicitly set the maximum with a shell command for processes
 # that regularly exceed the limit like INND.
 #
 options 	MAXDSIZ="(1024UL*1024*1024)"
 options 	MAXSSIZ="(128UL*1024*1024)"
 options 	DFLDSIZ="(1024UL*1024*1024)"
 
 #
 # BLKDEV_IOSIZE sets the default block size used in user block
 # device I/O.  Note that this value will be overriden by the label
 # when specifying a block device from a label with a non-0
 # partition blocksize.  The default is PAGE_SIZE.
 #
 options 	BLKDEV_IOSIZE=8192
 
 # Options for the VM subsystem
 options 	PQ_CACHESIZE=512	# color for 512k/16k cache
+options 	UPAGES=3		# number of 4k stack pages per process
 # Deprecated options supported for backwards compatibility
 #options 	PQ_NOOPT		# No coloring
 #options 	PQ_LARGECACHE		# color for 512k/16k cache
 #options 	PQ_HUGECACHE		# color for 1024k/16k cache
 #options 	PQ_MEDIUMCACHE		# color for 256k/16k cache
 #options 	PQ_NORMALCACHE		# color for 64k/16k cache
 
 # This allows you to actually store this configuration file into
 # the kernel binary itself, where it may be later read by saying:
 #    strings -n 3 /boot/kernel/kernel | sed -n 's/^___//p' > MYKERNEL
 #
 options 	INCLUDE_CONFIG_FILE     # Include this file in kernel
 
 #
 # The root device and filesystem type can be compiled in;
 # this provides a fallback option if the root device cannot
 # be correctly guesst by the bootstrap code, or an override if
 # the RB_DFLTROOT flag (-r) is specified when booting the kernel.
 #
 options 	ROOTDEVNAME=\"ufs:da0s2e\"
 
 
 #####################################################################
 # SMP OPTIONS:
 #
 # SMP enables building of a Symmetric MultiProcessor Kernel.
 # APIC_IO enables the use of the IO APIC for Symmetric I/O.
 #
 # Notes:
 #
 #  An SMP kernel will ONLY run on an Intel MP spec. qualified motherboard.
 #
 #  Be sure to disable 'cpu I386_CPU' && 'cpu I486_CPU' for SMP kernels.
 #
 #  Check the 'Rogue SMP hardware' section to see if additional options
 #   are required by your hardware.
 #
 
 # Mandatory:
 options 	SMP			# Symmetric MultiProcessor Kernel
 options 	APIC_IO			# Symmetric (APIC) I/O
 
 #
 # Rogue SMP hardware:
 #
 
 # Bridged PCI cards:
 #
 # The MP tables of most of the current generation MP motherboards
 #  do NOT properly support bridged PCI cards.  To use one of these
 #  cards you should refer to ???
 
 # SMP Debugging Options:
 #
 # MUTEX_DEBUG enables various extra assertions in the mutex code.
 # WITNESS enables the mutex witness code which detects deadlocks and cycles
 #         during locking operations.
 # WITNESS_DDB causes the witness code to drop into the kernel debugger if
 #	  a lock heirarchy violation occurs or if locks are held when going to
 #	  sleep.
 # WITNESS_SKIPSPIN disables the witness checks on spin mutexes.
 options 	MUTEX_DEBUG
 options 	WITNESS
 options 	WITNESS_DDB
 options 	WITNESS_SKIPSPIN
 
 
 #####################################################################
 # CPU OPTIONS
 
 #
 # You must specify at least one CPU (the one you intend to run on);
 # deleting the specification for CPUs you don't need to use may make
 # parts of the system run faster.
 # I386_CPU is mutually exclusive with the other CPU types.
 #
 #cpu		I386_CPU		
 cpu		I486_CPU
 cpu		I586_CPU		# aka Pentium(tm)
 cpu		I686_CPU		# aka Pentium Pro(tm)
 
 #
 # Options for CPU features.
 #
 # CPU_BLUELIGHTNING_FPU_OP_CACHE enables FPU operand cache on IBM
 # BlueLightning CPU.  It works only with Cyrix FPU, and this option
 # should not be used with Intel FPU.
 #
 # CPU_BLUELIGHTNING_3X enables triple-clock mode on IBM Blue Lightning
 # CPU if CPU supports it. The default is double-clock mode on
 # BlueLightning CPU box.
 #
 # CPU_BTB_EN enables branch target buffer on Cyrix 5x86 (NOTE 1).
 #
 # CPU_DIRECT_MAPPED_CACHE sets L1 cache of Cyrix 486DLC CPU in direct
 # mapped mode.  Default is 2-way set associative mode.
 #
 # CPU_CYRIX_NO_LOCK enables weak locking for the entire address space
 # of Cyrix 6x86 and 6x86MX CPUs by setting the NO_LOCK bit of CCR1.
 # Otherwise, the NO_LOCK bit of CCR1 is cleared.  (NOTE 3)
 #
 # CPU_DISABLE_5X86_LSSER disables load store serialize (i.e. enables
 # reorder).  This option should not be used if you use memory mapped
 # I/O device(s).
 #
 # CPU_ENABLE_SSE enables SSE/MMX2 instructions support.
 #
 # CPU_FASTER_5X86_FPU enables faster FPU exception handler.
 #
 # CPU_I486_ON_386 enables CPU cache on i486 based CPU upgrade products
 # for i386 machines.
 #
 # CPU_IORT defines I/O clock delay time (NOTE 1).  Default values of
 # I/O clock delay time on Cyrix 5x86 and 6x86 are 0 and 7,respectively
 # (no clock delay).
 #
 # CPU_L2_LATENCY specifed the L2 cache latency value.  This option is used
 # only when CPU_PPRO2CELERON is defined and Mendocino Celeron is detected.
 # The default value is 5.
 #
 # CPU_LOOP_EN prevents flushing the prefetch buffer if the destination
 # of a jump is already present in the prefetch buffer on Cyrix 5x86(NOTE
 # 1).
 #
 # CPU_PPRO2CELERON enables L2 cache of Mendocino Celeron CPUs.  This option
 # is useful when you use Socket 8 to Socket 370 converter, because most Pentium
 # Pro BIOSs do not enable L2 cache of Mendocino Celeron CPUs.
 #
 # CPU_RSTK_EN enables return stack on Cyrix 5x86 (NOTE 1).
 #
 # CPU_SUSP_HLT enables suspend on HALT.  If this option is set, CPU
 # enters suspend mode following execution of HALT instruction.
 #
 # CPU_WT_ALLOC enables write allocation on Cyrix 6x86/6x86MX and AMD
 # K5/K6/K6-2 cpus.
 #
 # CYRIX_CACHE_WORKS enables CPU cache on Cyrix 486 CPUs with cache
 # flush at hold state.
 #
 # CYRIX_CACHE_REALLY_WORKS enables (1) CPU cache on Cyrix 486 CPUs
 # without cache flush at hold state, and (2) write-back CPU cache on
 # Cyrix 6x86 whose revision < 2.7 (NOTE 2).
 #
 # NO_F00F_HACK disables the hack that prevents Pentiums (and ONLY
 # Pentiums) from locking up when a LOCK CMPXCHG8B instruction is
 # executed.  This option is only needed if I586_CPU is also defined,
 # and should be included for any non-Pentium CPU that defines it.
 #
 # NO_MEMORY_HOLE is an optimisation for systems with AMD K6 processors
 # which indicates that the 15-16MB range is *definitely* not being
 # occupied by an ISA memory hole.
 #
 # NOTE 1: The options, CPU_BTB_EN, CPU_LOOP_EN, CPU_IORT,
 # CPU_LOOP_EN and CPU_RSTK_EN should not be used because of CPU bugs.
 # These options may crash your system.
 #
 # NOTE 2: If CYRIX_CACHE_REALLY_WORKS is not set, CPU cache is enabled
 # in write-through mode when revision < 2.7.  If revision of Cyrix
 # 6x86 >= 2.7, CPU cache is always enabled in write-back mode.
 #
 # NOTE 3: This option may cause failures for software that requires
 # locked cycles in order to operate correctly.
 #
 options 	CPU_BLUELIGHTNING_FPU_OP_CACHE
 options 	CPU_BLUELIGHTNING_3X
 options 	CPU_BTB_EN
 options 	CPU_DIRECT_MAPPED_CACHE
 options 	CPU_DISABLE_5X86_LSSER
 options 	CPU_ENABLE_SSE
 options 	CPU_FASTER_5X86_FPU
 options 	CPU_I486_ON_386
 options 	CPU_IORT
 options 	CPU_L2_LATENCY=5
 options 	CPU_LOOP_EN
 options 	CPU_PPRO2CELERON
 options 	CPU_RSTK_EN
 options 	CPU_SUSP_HLT
 options 	CPU_WT_ALLOC
 options 	CYRIX_CACHE_WORKS
 options 	CYRIX_CACHE_REALLY_WORKS
 #options 	NO_F00F_HACK
 
 #
 # A math emulator is mandatory if you wish to run on hardware which
 # does not have a floating-point processor.  Pick either the original,
 # bogus (but freely-distributable) math emulator, or a much more
 # fully-featured but GPL-licensed emulator taken from Linux.
 #
 options 	MATH_EMULATE		#Support for x87 emulation
 # Don't enable both of these in a real config.
 options 	GPL_MATH_EMULATE	#Support for x87 emulation via
 					#new math emulator
 
 
 #####################################################################
 # COMPATIBILITY OPTIONS                                             
 
 #
 # Implement system calls compatible with 4.3BSD and older versions of
 # FreeBSD.  You probably do NOT want to remove this as much current code
 # still relies on the 4.3 emulation.
 #
 options 	COMPAT_43
 
 #
 # These three options provide support for System V Interface
 # Definition-style interprocess communication, in the form of shared
 # memory, semaphores, and message queues, respectively.
 #
 options 	SYSVSHM
 options 	SYSVSEM
 options 	SYSVMSG
 
 
 #####################################################################
 # DEBUGGING OPTIONS
 
 #
 # Enable the kernel debugger.
 #
 options 	DDB
 
 #
 # Don't drop into DDB for a panic. Intended for unattended operation
 # where you may want to drop to DDB from the console, but still want
 # the machine to recover from a panic
 #
 options 	DDB_UNATTENDED
 
 #
 # If using GDB remote mode to debug the kernel, there's a non-standard
 # extension to the remote protocol that can be used to use the serial
 # port as both the debugging port and the system console.  It's non-
 # standard and you're on your own if you enable it.  See also the
 # "remotechat" variables in the FreeBSD specific version of gdb.
 #
 options 	GDB_REMOTE_CHAT
 
 #
 # KTRACE enables the system-call tracing facility ktrace(2).
 #
 options 	KTRACE			#kernel tracing
 
 #
 # KTR is a kernel tracing mechanism imported from BSD/OS.  Currently it
 # has no userland interface aside from a few sysctl's.  It is enabled with
 # the KTR option.  The KTR_EXTEND option causes trace events to be generated
 # as a string from snprintf rather than as a string and up to 5 argument
 # pointers.  KTR_ENTRIES defines the number of entries in the circular trace
 # buffer.  KTR_COMPILE defines the mask of events to compile into the kernel
 # as defined by the KTR_* constants in <sys/ktr.h>.  KTR_MASK defines the
 # initial value of the ktr_mask variable which determines at runtime what
 # events to trace.  KTR_CPUMASK determines which CPU's log events, with
 # bit X corresponding to cpu X.  KTR_VERBOSE enables dumping of KTR events
 # to the console by default.  This functionality can be toggled via the
 # debug.ktr_verbose sysctl and defaults to off if KTR_VERBOSE is not defined.
 #
 options 	KTR
 options 	KTR_EXTEND
 options 	KTR_ENTRIES=1024
 options 	KTR_COMPILE="(KTR_INTR|KTR_PROC)"
 options 	KTR_MASK=KTR_INTR
 options 	KTR_CPUMASK=0x3
 options 	KTR_VERBOSE
 
 #
 # The INVARIANTS option is used in a number of source files to enable
 # extra sanity checking of internal structures.  This support is not
 # enabled by default because of the extra time it would take to check
 # for these conditions, which can only occur as a result of
 # programming errors.
 #
 options 	INVARIANTS
 
 #
 # The INVARIANT_SUPPORT option makes us compile in support for
 # verifying some of the internal structures.  It is a prerequisite for
 # 'INVARIANTS', as enabling 'INVARIANTS' will make these functions be
 # called.  The intent is that you can set 'INVARIANTS' for single
 # source files (by changing the source file or specifying it on the
 # command line) if you have 'INVARIANT_SUPPORT' enabled.  Also, if you
 # wish to build a kernel module with 'INVARIANTS', then adding
 # 'INVARIANT_SUPPORT' to your kernel will provide all the necessary
 # infrastructure without the added overhead.
 #
 options 	INVARIANT_SUPPORT
 
 #
 # The DIAGNOSTIC option is used to enable extra debugging information
 # from some parts of the kernel.  As this makes everything more noisy,
 # it is disabled by default.
 #
 options 	DIAGNOSTIC
 
 #
 # REGRESSION causes optional kernel interfaces necessary only for regression
 # testing to be enabled.  These interfaces may consitute security risks
 # when enabled, as they permit processes to easily modify aspects of the
 # run-time environment to reproduce unlikely or unusual (possibly normally
 # impossible) scenarios.
 #
 options		REGRESSION
 
 #
 # RESTARTABLE_PANICS allows one to continue from a panic as if it were
 # a call to the debugger via the Debugger() function instead.  It is only
 # useful if a kernel debugger is present.  To restart from a panic, reset
 # the panicstr variable to NULL and continue execution.  This option is
 # for development use only and should NOT be used in production systems
 # to "workaround" a panic.
 #
 options 	RESTARTABLE_PANICS
 
 #
 # PERFMON causes the driver for Pentium/Pentium Pro performance counters
 # to be compiled.  See perfmon(4) for more information.
 #
 options 	PERFMON
 
 
 #
 # This option let some drivers co-exist that can't co-exist in a running
 # system.  This is used to be able to compile all kernel code in one go for
 # quality assurance purposes (like this file, which the option takes it name
 # from.)
 #
 options 	COMPILING_LINT
 
 
 # XXX - this doesn't belong here.
 # Allow ordinary users to take the console - this is useful for X.
 options 	UCONSOLE
 
 # XXX - this doesn't belong here either
 #options 	USERCONFIG		#boot -c editor
 #options 	INTRO_USERCONFIG	#imply -c and show intro screen
 #options 	VISUAL_USERCONFIG	#visual boot -c editor
 
 #####################################################################
 # NETWORKING OPTIONS
 
 #
 # Protocol families:
 #  Only the INET (Internet) family is officially supported in FreeBSD.
 #  Source code for the NS (Xerox Network Service) is provided for amusement
 #  value.
 #
 options 	INET			#Internet communications protocols
 options 	INET6			#IPv6 communications protocols
 options 	IPSEC			#IP security
 options 	IPSEC_ESP		#IP security (crypto; define w/ IPSEC)
 options 	IPSEC_DEBUG		#debug for IP security
 
 options 	IPX			#IPX/SPX communications protocols
 options 	IPXIP			#IPX in IP encapsulation (not available)
 options 	IPTUNNEL		#IP in IPX encapsulation (not available)
 
 options 	NCP			#NetWare Core protocol
 
 options 	NETATALK		#Appletalk communications protocols
 options 	NETATALKDEBUG		#Appletalk debugging
 
 # These are currently broken but are shipped due to interest.
 #options 	NS			#Xerox NS protocols
 #options 	NSIP			#XNS over IP
 
 # mchain library. It can be either loaded as KLD or compiled into kernel
 options 	LIBMCHAIN
 
 # netgraph(4). Enable the base netgraph code with the NETGRAPH option.
 # Individual node types can be enabled with the corresponding option
 # listed below; however, this is not strictly necessary as netgraph
 # will automatically load the corresponding KLD module if the node type
 # is not already compiled into the kernel. Each type below has a
 # corresponding man page, e.g., ng_async(8).
 options 	NETGRAPH		#netgraph(4) system
 options 	NETGRAPH_ASYNC
 options 	NETGRAPH_BPF
 options 	NETGRAPH_CISCO
 options 	NETGRAPH_ECHO
 options 	NETGRAPH_ETHER
 options 	NETGRAPH_FRAME_RELAY
 options 	NETGRAPH_HOLE
 options 	NETGRAPH_IFACE
 options 	NETGRAPH_KSOCKET
 options 	NETGRAPH_LMI
 # MPPC compression requires proprietary files (not included)
 #options 	NETGRAPH_MPPC_COMPRESSION
 options 	NETGRAPH_MPPC_ENCRYPTION
 options 	NETGRAPH_ONE2MANY
 options 	NETGRAPH_PPP
 options 	NETGRAPH_PPPOE
 options 	NETGRAPH_PPTPGRE
 options 	NETGRAPH_RFC1490
 options 	NETGRAPH_SOCKET
 options 	NETGRAPH_SPLIT
 options 	NETGRAPH_TEE
 options 	NETGRAPH_TTY
 options 	NETGRAPH_UI
 options 	NETGRAPH_VJC
 
 device		mn	# Munich32x/Falc54 Nx64kbit/sec cards.
 device		lmc	# tulip based LanMedia WAN cards
 device		musycc	# LMC/SBE LMC1504 quad T1/E1
 
 #
 # Network interfaces:
 #  The `loop' device is MANDATORY when networking is enabled.
 #  The `ether' device provides generic code to handle
 #  Ethernets; it is MANDATORY when a Ethernet device driver is
 #  configured or token-ring is enabled.
 #  The `fddi' device provides generic code to support FDDI.
 #  The `sppp' device serves a similar role for certain types
 #  of synchronous PPP links (like `cx', `ar').
 #  The `sl' device implements the Serial Line IP (SLIP) service.
 #  The `ppp' device implements the Point-to-Point Protocol.
 #  The `bpf' device enables the Berkeley Packet Filter.  Be
 #  aware of the legal and administrative consequences of enabling this
 #  option.  The number of devices determines the maximum number of
 #  simultaneous BPF clients programs runnable.
 #  The `disc' device implements a minimal network interface,
 #  which throws away all packets sent and never receives any.  It is
 #  included for testing purposes.  This shows up as the `ds' interface.
 #  The `tap' device is a pty-like virtual Ethernet interface
 #  The `tun' device implements (user-)ppp and nos-tun
 #  The `gif' device implements IPv6 over IP4 tunneling,
 #  IPv4 over IPv6 tunneling, IPv4 over IPv4 tunneling and
 #  IPv6 over IPv6 tunneling.
 #  The XBONEHACK option allows the same pair of addresses to be configured on
 #  multiple gif interfaces.
 #  The `faith' device captures packets sent to it and diverts them
 #  to the IPv4/IPv6 translation daemon.
 #  The `stf' device implements 6to4 encapsulation.
 #  The `ef' device provides support for multiple ethernet frame types
 #  specified via ETHER_* options. See ef(4) for details.
 #
 # The PPP_BSDCOMP option enables support for compress(1) style entire
 # packet compression, the PPP_DEFLATE is for zlib/gzip style compression.
 # PPP_FILTER enables code for filtering the ppp data stream and selecting
 # events for resetting the demand dial activity timer - requires bpf.
 # See pppd(8) for more details.
 #
 device		ether			#Generic Ethernet
 device		vlan	1		#VLAN support
 device		token			#Generic TokenRing
 device		fddi			#Generic FDDI
 device		sppp			#Generic Synchronous PPP
 device		loop	1		#Network loopback device
 device		bpf			#Berkeley packet filter
 device		disc			#Discard device (ds0, ds1, etc)
 device		tap			#Virtual Ethernet driver
 device		tun			#Tunnel driver (ppp(8), nos-tun(8))
 device		sl			#Serial Line IP
 device		ppp	2		#Point-to-point protocol
 options 	PPP_BSDCOMP		#PPP BSD-compress support
 options 	PPP_DEFLATE		#PPP zlib/deflate/gzip support
 options 	PPP_FILTER		#enable bpf filtering (needs bpf)
 
 device		ef			# Multiple ethernet frames support
 options 	ETHER_II		# enable Ethernet_II frame
 options 	ETHER_8023		# enable Ethernet_802.3 (Novell) frame
 options 	ETHER_8022		# enable Ethernet_802.2 frame
 options 	ETHER_SNAP		# enable Ethernet_802.2/SNAP frame
 
 # for IPv6
 device		gif			#IPv6 and IPv4 tunneling
 options 	XBONEHACK
 device		faith	1		#for IPv6 and IPv4 translation
 device		stf			#6to4 IPv6 over IPv4 encapsulation
 
 #
 # Internet family options:
 #
 # MROUTING enables the kernel multicast packet forwarder, which works
 # with mrouted(8).
 #
 # IPFIREWALL enables support for IP firewall construction, in
 # conjunction with the `ipfw' program.  IPFIREWALL_VERBOSE sends
 # logged packets to the system logger.  IPFIREWALL_VERBOSE_LIMIT
 # limits the number of times a matching entry can be logged.
 #
 # WARNING:  IPFIREWALL defaults to a policy of "deny ip from any to any"
 # and if you do not add other rules during startup to allow access,
 # YOU WILL LOCK YOURSELF OUT.  It is suggested that you set firewall_type=open
 # in /etc/rc.conf when first enabling this feature, then refining the
 # firewall rules in /etc/rc.firewall after you've tested that the new kernel
 # feature works properly.
 #
 # IPFIREWALL_DEFAULT_TO_ACCEPT causes the default rule (at boot) to
 # allow everything.  Use with care, if a cracker can crash your
 # firewall machine, they can get to your protected machines.  However,
 # if you are using it as an as-needed filter for specific problems as
 # they arise, then this may be for you.  Changing the default to 'allow'
 # means that you won't get stuck if the kernel and /sbin/ipfw binary get
 # out of sync.
 #
 # IPDIVERT enables the divert IP sockets, used by ``ipfw divert''
 #
 # IPSTEALTH enables code to support stealth forwarding (i.e., forwarding
 # packets without touching the ttl).  This can be useful to hide firewalls
 # from traceroute and similar tools.
 #
 # TCPDEBUG enables code which keeps traces of the TCP state machine
 # for sockets with the SO_DEBUG option set, which can then be examined
 # using the trpt(8) utility.
 #
 options 	MROUTING		# Multicast routing
 options 	IPFIREWALL		#firewall
 options 	IPFIREWALL_VERBOSE	#print information about
 					# dropped packets
 options 	IPFIREWALL_FORWARD	#enable transparent proxy support
 options 	IPFIREWALL_VERBOSE_LIMIT=100	#limit verbosity
 options 	IPFIREWALL_DEFAULT_TO_ACCEPT	#allow everything by default
 options 	IPV6FIREWALL		#firewall for IPv6
 options 	IPV6FIREWALL_VERBOSE
 options 	IPV6FIREWALL_VERBOSE_LIMIT=100
 options 	IPV6FIREWALL_DEFAULT_TO_ACCEPT
 options 	IPDIVERT		#divert sockets
 options 	IPFILTER		#ipfilter support
 options 	IPFILTER_LOG		#ipfilter logging
 options 	IPFILTER_DEFAULT_BLOCK	#block all packets by default
 options 	IPSTEALTH		#support for stealth forwarding
 options 	TCPDEBUG
 
 # RANDOM_IP_ID causes the ID field in IP packets to be randomized
 # instead of incremented by 1 with each packet generated.  This
 # option closes a minor information leak which allows remote
 # observers to determine the rate of packet generation on the
 # machine by watching the counter.
 options		RANDOM_IP_ID
 
 # Statically Link in accept filters
 options		ACCEPT_FILTER_DATA
 options		ACCEPT_FILTER_HTTP
 
 # TCP_DROP_SYNFIN adds support for ignoring TCP packets with SYN+FIN. This
 # prevents nmap et al. from identifying the TCP/IP stack, but breaks support
 # for RFC1644 extensions and is not recommended for web servers.
 #
 options 	TCP_DROP_SYNFIN		#drop TCP packets with SYN+FIN
 
 # DUMMYNET enables the "dummynet" bandwidth limiter. You need
 # IPFIREWALL as well. See the dummynet(4) manpage for more info.
 # BRIDGE enables bridging between ethernet cards -- see bridge(4).
 # You can use IPFIREWALL and dummynet together with bridging.
 options 	DUMMYNET
 options 	BRIDGE
 
 #
 # ATM (HARP version) options
 #
 # ATM_CORE includes the base ATM functionality code.  This must be included
 #	for ATM support.
 #
 # ATM_IP includes support for running IP over ATM.
 #
 # At least one (and usually only one) of the following signalling managers
 # must be included (note that all signalling managers include PVC support):
 # ATM_SIGPVC includes support for the PVC-only signalling manager `sigpvc'.
 # ATM_SPANS includes support for the `spans' signalling manager, which runs
 #	the FORE Systems's proprietary SPANS signalling protocol.
 # ATM_UNI includes support for the `uni30' and `uni31' signalling managers,
 #	which run the ATM Forum UNI 3.x signalling protocols.
 #
 # The `hea' driver provides support for the Efficient Networks, Inc.
 # ENI-155p ATM PCI Adapter.
 #
 # The `hfa' driver provides support for the FORE Systems, Inc.
 # PCA-200E ATM PCI Adapter.
 #
 options 	ATM_CORE		#core ATM protocol family
 options 	ATM_IP			#IP over ATM support
 options 	ATM_SIGPVC		#SIGPVC signalling manager
 options 	ATM_SPANS		#SPANS signalling manager
 options 	ATM_UNI			#UNI signalling manager
 device		hea			#Efficient ENI-155p ATM PCI
 device		hfa			#FORE PCA-200E ATM PCI
 
 
 #####################################################################
 # FILESYSTEM OPTIONS
 
 #
 # Only the root, /usr, and /tmp filesystems need be statically
 # compiled; everything else will be automatically loaded at mount
 # time.  (Exception: the UFS family--- FFS --- cannot
 # currently be demand-loaded.)  Some people still prefer to statically
 # compile other filesystems as well.
 #
 # NB: The NULL, PORTAL, UMAP and UNION filesystems are known to be
 # buggy, and WILL panic your system if you attempt to do anything with
 # them.  They are included here as an incentive for some enterprising
 # soul to sit down and fix them.
 #
 
 # One of these is mandatory:
 options 	FFS			#Fast filesystem
 options 	NFS			#Network File System
 
 # The rest are optional:
 #options 	NFS_NOSERVER		#Disable the NFS-server code.
 options 	CD9660			#ISO 9660 filesystem
 options 	FDESCFS			#File descriptor filesystem
 options 	HPFS			#OS/2 File system
 options 	MSDOSFS			#MS DOS File System (FAT, FAT32)
 options 	NTFS			#NT File System
 options 	NULLFS			#NULL filesystem
 options 	NWFS			#NetWare filesystem
 options 	PORTALFS		#Portal filesystem
 options 	PROCFS			#Process filesystem
 options 	PSEUDOFS		#Pseudo-filesystem framework
 options 	UMAPFS			#UID map filesystem
 options 	UNIONFS			#Union filesystem
 # options 	NODEVFS			#disable devices filesystem
 # The xFS_ROOT options REQUIRE the associated ``options xFS''
 options 	NFS_ROOT		#NFS usable as root device
 # This code enables IFS, an FFS which exports inodes as the namespace.
 # You can find details in src/sys/ufs/ifs/README .
 options		IFS
 
 # Soft updates is a technique for improving file system speed and
 # making abrupt shutdown less risky.
 #
 options 	SOFTUPDATES
 
 # Extended attributes allow additional data to be associated with files,
 # and is used for ACLs, Capabilities, and MAC labels.
 # See src/sys/ufs/ufs/README.extattr for more information.
 options		UFS_EXTATTR
 options		UFS_EXTATTR_AUTOSTART
 
 # Access Control List support for UFS filesystems.  The current ACL
 # implementation requires extended attribute support, UFS_EXTATTR,
 # for the underlying filesystem.
 # See src/sys/ufs/ufs/README.acls for more information.
 options 	UFS_ACL
 
 # Directory hashing improves the speed of operations on very large
 # directories at the expense of some memory.
 options		UFS_DIRHASH
 
 # Make space in the kernel for a root filesystem on a md device.
 # Define to the number of kilobytes to reserve for the filesystem.
 options 	MD_ROOT_SIZE=10
 
 # Make the md device a potential root device, either with preloaded
 # images of type mfs_root or md_root.
 options 	MD_ROOT
 
 # Allow this many swap-devices.
 #
 # In order to manage swap, the system must reserve bitmap space that
 # scales with the largest mounted swap device multiplied by NSWAPDEV, 
 # irregardless of whether other swap devices exist or not.  So it
 # is not a good idea to make this value too large.
 options 	NSWAPDEV=5
 
 # Disk quotas are supported when this option is enabled.
 options 	QUOTA			#enable disk quotas
 
 # If you are running a machine just as a fileserver for PC and MAC
 # users, using SAMBA or Netatalk, you may consider setting this option
 # and keeping all those users' directories on a filesystem that is
 # mounted with the suiddir option. This gives new files the same
 # ownership as the directory (similar to group). It's a security hole
 # if you let these users run programs, so confine it to file-servers
 # (but it'll save you lots of headaches in those cases). Root owned
 # directories are exempt and X bits are cleared. The suid bit must be
 # set on the directory as well; see chmod(1) PC owners can't see/set
 # ownerships so they keep getting their toes trodden on. This saves
 # you all the support calls as the filesystem it's used on will act as
 # they expect: "It's my dir so it must be my file".
 #
 options 	SUIDDIR
 
 # NFS options:
 options 	NFS_MINATTRTIMO=3	# VREG attrib cache timeout in sec
 options 	NFS_MAXATTRTIMO=60
 options 	NFS_MINDIRATTRTIMO=30	# VDIR attrib cache timeout in sec
 options 	NFS_MAXDIRATTRTIMO=60
 options 	NFS_GATHERDELAY=10	# Default write gather delay (msec)
 options 	NFS_UIDHASHSIZ=29	# Tune the size of nfssvc_sock with this
 options 	NFS_WDELAYHASHSIZ=16	# and with this
 options 	NFS_MUIDHASHSIZ=63	# Tune the size of nfsmount with this
 options 	NFS_DEBUG		# Enable NFS Debugging
 
 # Coda stuff:
 options 	CODA			#CODA filesystem.
 device		vcoda	4		#coda minicache <-> venus comm.
 
 #
 # Add support for the EXT2FS filesystem of Linux fame.  Be a bit
 # careful with this - the ext2fs code has a tendency to lag behind
 # changes and not be exercised very much, so mounting read/write could
 # be dangerous (and even mounting read only could result in panics.)
 #
 options 	EXT2FS
 
 # Use real implementations of the aio_* system calls.  There are numerous
 # stability issues in the current aio code that make it unsuitable for
 # inclusion on shell boxes.
 options 	VFS_AIO
 
 # Enable the code UFS IO optimization through the VM system.  This allows
 # use VM operations instead of copying operations when possible.
 # 
 # Even with this enabled, actual use of the code is still controlled by the
 # sysctl vfs.ioopt.  0 gives no optimization, 1 gives normal (use VM
 # operations if a request happens to fit), 2 gives agressive optimization
 # (the operations are split to do as much as possible through the VM system.)
 #
 # Enabling this will probably not give an overall speedup except for
 # special workloads.
 options 	ENABLE_VFS_IOOPT
 
 # Cryptographically secure random number generator; /dev/[u]random
 device		random
 
 
 #####################################################################
 # POSIX P1003.1B
 
 # Real time extensions added in the 1993 Posix
 # P1003_1B: Infrastructure
 # _KPOSIX_PRIORITY_SCHEDULING: Build in _POSIX_PRIORITY_SCHEDULING
 # _KPOSIX_VERSION:             Version kernel is built for
 
 options 	P1003_1B
 options 	_KPOSIX_PRIORITY_SCHEDULING
 options 	_KPOSIX_VERSION=199309L
 
 
 #####################################################################
 # CLOCK OPTIONS
 
 # The granularity of operation is controlled by the kernel option HZ whose
 # default value (100) means a granularity of 10ms.  For an accurate simulation
 # of high data rates it might be necessary to reduce the timer granularity to
 # 1ms or less.  Consider, however, that some interfaces using programmed I/O
 # may require a considerable time to output packets.  So, reducing the
 # granularity too much might actually cause ticks to be missed thus reducing
 # the accuracy of operation.
 
 options 	HZ=100
 
 # Other clock options
 
 options 	CLK_CALIBRATION_LOOP
 options 	CLK_USE_I8254_CALIBRATION
 options 	CLK_USE_TSC_CALIBRATION
 
 
 #####################################################################
 # SCSI DEVICES
 
 # SCSI DEVICE CONFIGURATION
 
 # The SCSI subsystem consists of the `base' SCSI code, a number of
 # high-level SCSI device `type' drivers, and the low-level host-adapter
 # device drivers.  The host adapters are listed in the ISA and PCI
 # device configuration sections below.
 #
 # Beginning with FreeBSD 2.0.5 you can wire down your SCSI devices so
 # that a given bus, target, and LUN always come on line as the same
 # device unit.  In earlier versions the unit numbers were assigned
 # in the order that the devices were probed on the SCSI bus.  This
 # means that if you removed a disk drive, you may have had to rewrite
 # your /etc/fstab file, and also that you had to be careful when adding
 # a new disk as it may have been probed earlier and moved your device
 # configuration around.
 
 # This old behavior is maintained as the default behavior.  The unit
 # assignment begins with the first non-wired down unit for a device
 # type.  For example, if you wire a disk as "da3" then the first
 # non-wired disk will be assigned da4.
 
 # The syntax for wiring down devices is:
 
 hint.scbus.0.at="ahc0"
 hint.scbus.1.at="ahc1"
 hint.scbus.1.bus="0"
 hint.scbus.3.at="ahc2"
 hint.scbus.3.bus="0"
 hint.scbus.2.at="ahc2"
 hint.scbus.2.bus="1"
 hint.da.0.at="scbus0"
 hint.da.0.target="0"
 hint.da.0.unit="0"
 hint.da.1.at="scbus3"
 hint.da.1.target="1"
 hint.da.2.at="scbus2"
 hint.da.2.target="3"
 hint.sa.1.at="scbus1"
 hint.sa.1.target="6"
 
 # "units" (SCSI logical unit number) that are not specified are
 # treated as if specified as LUN 0.
 
 # All SCSI devices allocate as many units as are required.
 
 # The ch driver drives SCSI Media Changer ("jukebox") devices.
 #
 # The da driver drives SCSI Direct Access ("disk") and Optical Media
 # ("WORM") devices.
 #
 # The sa driver drives SCSI Sequential Access ("tape") devices.
 #
 # The cd driver drives SCSI Read Only Direct Access ("cd") devices.
 #
 # The ses driver drives SCSI Envinronment Services ("ses") and
 # SAF-TE ("SCSI Accessable Fault-Tolerant Enclosure") devices.
 #
 # The pt driver drives SCSI Processor devices.
 #
 # 
 # Target Mode support is provided here but also requires that a SIM
 # (SCSI Host Adapter Driver) provide support as well.
 #
 # The targ driver provides target mode support as a Processor type device.
 # It exists to give the minimal context necessary to respond to Inquiry
 # commands. There is a sample user application that shows how the rest
 # of the command support might be done in /usr/share/examples/scsi_target.
 #
 # The targbh driver provides target mode support and exists to respond
 # to incoming commands that do not otherwise have a logical unit assigned
 # to them.
 # 
 # The "unknown" device (uk? in pre-2.0.5) is now part of the base SCSI
 # configuration as the "pass" driver.
 
 device		scbus		#base SCSI code
 device		ch		#SCSI media changers
 device		da		#SCSI direct access devices (aka disks)
 device		sa		#SCSI tapes
 device		cd		#SCSI CD-ROMs
 device		ses		#SCSI Environmental Services (and SAF-TE)
 device		pt		#SCSI processor 
 device		targ		#SCSI Target Mode Code
 device		targbh		#SCSI Target Mode Blackhole Device
 device		pass		#CAM passthrough driver
 
 # CAM OPTIONS:
 # debugging options:
 # -- NOTE --  If you specify one of the bus/target/lun options, you must
 #             specify them all!
 # CAMDEBUG: When defined enables debugging macros
 # CAM_DEBUG_BUS:  Debug the given bus.  Use -1 to debug all busses.
 # CAM_DEBUG_TARGET:  Debug the given target.  Use -1 to debug all targets.
 # CAM_DEBUG_LUN:  Debug the given lun.  Use -1 to debug all luns.
 # CAM_DEBUG_FLAGS:  OR together CAM_DEBUG_INFO, CAM_DEBUG_TRACE,
 #                   CAM_DEBUG_SUBTRACE, and CAM_DEBUG_CDB
 #
 # CAM_MAX_HIGHPOWER: Maximum number of concurrent high power (start unit) cmds
 # CAM_NEW_TRAN_CODE: this is the new transport layer code that will be switched
 #			to soon
 # SCSI_NO_SENSE_STRINGS: When defined disables sense descriptions
 # SCSI_NO_OP_STRINGS: When defined disables opcode descriptions
 # SCSI_DELAY: The number of MILLISECONDS to freeze the SIM (scsi adapter)
 #             queue after a bus reset, and the number of milliseconds to
 #             freeze the device queue after a bus device reset.
 options 	CAMDEBUG
 options 	CAM_DEBUG_BUS=-1
 options 	CAM_DEBUG_TARGET=-1
 options 	CAM_DEBUG_LUN=-1
 options 	CAM_DEBUG_FLAGS="CAM_DEBUG_INFO|CAM_DEBUG_TRACE|CAM_DEBUG_CDB"
 options 	CAM_MAX_HIGHPOWER=4
 options 	SCSI_NO_SENSE_STRINGS
 options 	SCSI_NO_OP_STRINGS
 options 	SCSI_DELAY=8000	# Be pessimistic about Joe SCSI device
 
 # Options for the CAM CDROM driver:
 # CHANGER_MIN_BUSY_SECONDS: Guaranteed minimum time quantum for a changer LUN
 # CHANGER_MAX_BUSY_SECONDS: Maximum time quantum per changer LUN, only
 #                           enforced if there is I/O waiting for another LUN
 # The compiled in defaults for these variables are 2 and 10 seconds,
 # respectively.
 #
 # These can also be changed on the fly with the following sysctl variables:
 # kern.cam.cd.changer.min_busy_seconds
 # kern.cam.cd.changer.max_busy_seconds
 #
 options 	CHANGER_MIN_BUSY_SECONDS=2
 options 	CHANGER_MAX_BUSY_SECONDS=10
 
 # Options for the CAM sequential access driver:
 # SA_IO_TIMEOUT: Timeout for read/write/wfm  operations, in minutes
 # SA_SPACE_TIMEOUT: Timeout for space operations, in minutes
 # SA_REWIND_TIMEOUT: Timeout for rewind operations, in minutes
 # SA_ERASE_TIMEOUT: Timeout for erase operations, in minutes
 # SA_1FM_AT_EOD: Default to model which only has a default one filemark at EOT.
 options 	SA_IO_TIMEOUT="(4)"
 options 	SA_SPACE_TIMEOUT="(60)"
 options 	SA_REWIND_TIMEOUT="(2*60)"
 options 	SA_ERASE_TIMEOUT="(4*60)"
 options 	SA_1FM_AT_EOD
 
 # Optional timeout for the CAM processor target (pt) device
 # This is specified in seconds.  The default is 60 seconds.
 options 	SCSI_PT_DEFAULT_TIMEOUT="60"
 
 # Optional enable of doing SES passthrough on other devices (e.g., disks)
 #
 # Normally disabled because a lot of newer SCSI disks report themselves
 # as having SES capabilities, but this can then clot up attempts to build
 # build a topology with the SES device that's on the box these drives
 # are in....
 options		SES_ENABLE_PASSTHROUGH
 
 
 #####################################################################
 # MISCELLANEOUS DEVICES AND OPTIONS
 
 # The `pty' device usually turns out to be ``effectively mandatory'',
 # as it is required for `telnetd', `rlogind', `screen', `emacs', and
 # `xterm', among others.
 
 device		pty		#Pseudo ttys
 device		speaker		#Play IBM BASIC-style noises out your speaker
 device		gzip		#Exec gzipped a.out's
 device		md		#Memory/malloc disk
 device		snp		#Snoop device - to look at pty/vty/etc..
 device		ccd	4	#Concatenated disk driver
 
 # Configuring Vinum into the kernel is not necessary, since the kld
 # module gets started automatically when vinum(8) starts.  This
 # device is also untested.  Use at your own risk.
 #
 # The option VINUMDEBUG must match the value set in CFLAGS
 # in src/sbin/vinum/Makefile.  Failure to do so will result in
 # the following message from vinum(8):
 #
 # Can't get vinum config: Invalid argument
 #
 # see vinum(4) for more reasons not to use these options.
 device		vinum		#Vinum concat/mirror/raid driver
 options 	VINUMDEBUG	#enable Vinum debugging hooks
 
 # Kernel side iconv library
 options 	LIBICONV
 
 # Size of the kernel message buffer.  Should be N * pagesize.
 options 	MSGBUF_SIZE=40960
 
 
 #####################################################################
 # HARDWARE BUS CONFIGURATION
 
 # ISA, EISA, MCA and PCI bus:
 
 #
 # Mandatory ISA devices: isa, npx
 #
 device		isa
 
 #
 # Options for `isa':
 #
 # AUTO_EOI_1 enables the `automatic EOI' feature for the master 8259A
 # interrupt controller.  This saves about 0.7-1.25 usec for each interrupt.
 # This option breaks suspend/resume on some portables.
 #
 # AUTO_EOI_2 enables the `automatic EOI' feature for the slave 8259A
 # interrupt controller.  This saves about 0.7-1.25 usec for each interrupt.
 # Automatic EOI is documented not to work for for the slave with the
 # original i8259A, but it works for some clones and some integrated
 # versions.
 #
 # MAXMEM specifies the amount of RAM on the machine; if this is not
 # specified, FreeBSD will first read the amount of memory from the CMOS
 # RAM, so the amount of memory will initially be limited to 64MB or 16MB
 # depending on the BIOS.  If the BIOS reports 64MB, a memory probe will
 # then attempt to detect the installed amount of RAM.  If this probe
 # fails to detect >64MB RAM you will have to use the MAXMEM option.
 # The amount is in kilobytes, so for a machine with 128MB of RAM, it would
 # be 131072 (128 * 1024).
 #
 # BROKEN_KEYBOARD_RESET disables the use of the keyboard controller to
 # reset the CPU for reboot.  This is needed on some systems with broken
 # keyboard controllers.
 
 options 	COMPAT_OLDISA	#Use ISA shims and glue for old drivers
 options 	AUTO_EOI_1
 #options 	AUTO_EOI_2
 
 options 	MAXMEM="(128*1024)"
 #options 	BROKEN_KEYBOARD_RESET
 
 # Enable support for the kernel PLL to use an external PPS signal,
 # under supervision of [x]ntpd(8)
 # More info in ntpd documentation: http://www.eecis.udel.edu/~ntp
 
 options 	PPS_SYNC
 
 # If you see the "calcru: negative time of %ld usec for pid %d (%s)\n"
 # message you probably have some broken sw/hw which disables interrupts
 # for too long.  You can make the system more resistant to this by
 # choosing a high value for NTIMECOUNTER.  The default is 5, there
 # is no upper limit but more than a couple of hundred are not productive.
 # A better strategy may be to sysctl -w kern.timecounter.method=1
 
 options 	NTIMECOUNTER=20
 
 # 
 # EISA bus
 #
 # The EISA bus device is `eisa'.  It provides auto-detection and
 # configuration support for all devices on the EISA bus.
 
 device		eisa
 
 # By default, only 10 EISA slots are probed, since the slot numbers
 # above clash with the configuration address space of the PCI subsystem,
 # and the EISA probe is not very smart about this.  This is sufficient
 # for most machines, but in particular the HP NetServer LC series comes
 # with an onboard AIC7770 dual-channel SCSI controller on EISA slot #11,
 # thus you need to bump this figure to 12 for them.
 options 	EISA_SLOTS=12
 
 #
 # MCA bus:
 #
 # The MCA bus device is `mca'.  It provides auto-detection and
 # configuration support for all devices on the MCA bus.
 # No hints are required for MCA.
 
 device		mca
 
 #
 # PCI bus & PCI options:
 #
 # The main PCI bus device is `pci'.  It provides auto-detection and
 # configuration support for all devices on the PCI bus, using either
 # configuration mode defined in the PCI specification.
 
 device		pci
 
 #
 # AGP GART support
 device		agp
 
 # PCI options
 #
 #options 	PCI_QUIET	#quiets PCI code on chipset settings
 
 
 #####################################################################
 # HARDWARE DEVICE CONFIGURATION
 
 # EISA support is available for some device, so they can be auto-probed.
 # MicroChannel (MCA) support is available for some devices.
 # For ISA the required hints are listed.
 # EISA, MCA, PCI and pccard are self identifying buses, so no hints
 # are needed.
 
 #
 # Mandatory devices:
 #
 
 # The keyboard controller; it controls the keyboard and the PS/2 mouse.
 device		atkbdc	1
 hint.atkbdc.0.at="isa"
 hint.atkbdc.0.port="0x060"
 
 # The AT keyboard
 device		atkbd
 hint.atkbd.0.at="atkbdc"
 hint.atkbd.0.irq="1"
 
 # Options for atkbd:
 options 	ATKBD_DFLT_KEYMAP	# specify the built-in keymap
 makeoptions	ATKBD_DFLT_KEYMAP="jp.106"
 
 # These options are valid for other keyboard drivers as well.
 options 	KBD_DISABLE_KEYMAP_LOAD	# refuse to load a keymap
 options 	KBD_INSTALL_CDEV	# install a CDEV entry in /dev
 
 # `flags' for atkbd:
 #       0x01    Force detection of keyboard, else we always assume a keyboard
 #       0x02    Don't reset keyboard, useful for some newer ThinkPads
 #       0x04    Old-style (XT) keyboard support, useful for older ThinkPads
 
 # PS/2 mouse
 device		psm
 hint.psm.0.at="atkbdc"
 hint.psm.0.irq="12"
 
 # Options for psm:
 options 	PSM_HOOKRESUME		#hook the system resume event, useful
 					#for some laptops
 options 	PSM_RESETAFTERSUSPEND	#reset the device at the resume event
 
 # The video card driver.
 device		vga
 hint.vga.0.at="isa"
 
 # Options for vga:
 # Try the following option if the mouse pointer is not drawn correctly
 # or font does not seem to be loaded properly.  May cause flicker on
 # some systems.
 options 	VGA_ALT_SEQACCESS
 
 # If you can dispense with some vga driver features, you may want to
 # use the following options to save some memory.
 #options 	VGA_NO_FONT_LOADING	# don't save/load font
 #options 	VGA_NO_MODE_CHANGE	# don't change video modes
 
 # Older video cards may require this option for proper operation.
 options 	VGA_SLOW_IOACCESS	# do byte-wide i/o's to TS and GDC regs
 
 # The following option probably won't work with the LCD displays.
 options 	VGA_WIDTH90		# support 90 column modes
 
 # To include support for VESA video modes
 options 	VESA
 
 options 	FB_DEBUG		# Frame buffer debugging
 options 	FB_INSTALL_CDEV		# install a CDEV entry in /dev
 
 # Splash screen at start up!  Screen savers require this too.
 device		splash
 
 # Various screen savers.
 device		apm_saver		# Requires APM
 device		blank_saver
 device		daemon_saver
 device		fade_saver
 device		fire_saver
 device		green_saver
 device		logo_saver
 device		rain_saver
 device		star_saver
 device		warp_saver
 
 # The pcvt console driver (vt220 compatible).
 device		vt
 hint.vt.0.at="isa"
 options 	XSERVER			# support for running an X server on vt
 options 	FAT_CURSOR		# start with block cursor
 # This PCVT option is for keyboards such as those used on really old ThinkPads
 options 	PCVT_SCANSET=2
 # Other PCVT options are documented in pcvt(4).
 options 	PCVT_24LINESDEF
 options 	PCVT_CTRL_ALT_DEL
 options 	PCVT_META_ESC
 options 	PCVT_NSCREENS=9
 options 	PCVT_PRETTYSCRNS
 options 	PCVT_SCREENSAVER
 options 	PCVT_USEKBDSEC
 options 	PCVT_VT220KEYB
 options 	PCVT_GREENSAVER
 
 # The syscons console driver (sco color console compatible).
 device		sc	1
 hint.sc.0.at="isa"
 options 	MAXCONS=16		# number of virtual consoles
 options 	SC_ALT_MOUSE_IMAGE	# simplified mouse cursor in text mode
 options 	SC_DFLT_FONT		# compile font in
 makeoptions	SC_DFLT_FONT=cp850
 options 	SC_DISABLE_DDBKEY	# disable `debug' key
 options 	SC_DISABLE_REBOOT	# disable reboot key sequence
 options 	SC_HISTORY_SIZE=200	# number of history buffer lines
 options 	SC_MOUSE_CHAR=0x3	# char code for text mode mouse cursor
 options 	SC_PIXEL_MODE		# add support for the raster text mode
 
 # The following options will let you change the default colors of syscons.
 options 	SC_NORM_ATTR="(FG_GREEN|BG_BLACK)"
 options 	SC_NORM_REV_ATTR="(FG_YELLOW|BG_GREEN)"
 options 	SC_KERNEL_CONS_ATTR="(FG_RED|BG_BLACK)"
 options 	SC_KERNEL_CONS_REV_ATTR="(FG_BLACK|BG_RED)"
 
 # If you have a two button mouse, you may want to add the following option
 # to use the right button of the mouse to paste text.
 options 	SC_TWOBUTTON_MOUSE
 
 # You can selectively disable features in syscons.
 options 	SC_NO_CUTPASTE
 options 	SC_NO_FONT_LOADING
 options 	SC_NO_HISTORY
 options 	SC_NO_SYSMOUSE
 
 # `flags' for sc
 #	0x80	Put the video card in the VESA 800x600 dots, 16 color mode
 #	0x100	Probe for a keyboard device periodically if one is not present
 
 # 3Dfx Voodoo Graphics, Voodoo II /dev/3dfx CDEV support. This will create
 # the /dev/3dfx0 device to work with glide implementations. This should get
 # linked to /dev/3dfx and /dev/voodoo. Note that this is not the same as
 # the tdfx DRI module from XFree86 and is completely unrelated.
 #
 # To enable Linuxulator support, one must also include COMPAT_LINUX in the
 # config as well, or you will not have the dependencies. The other option
 # is to load both as modules.
 
 device 		tdfx			# Enable 3Dfx Voodoo support
 options 	TDFX_LINUX		# Enable Linuxulator support
 
 #
 # The Numeric Processing eXtension driver.  In addition to this, you
 # may configure a math emulator (see above).  If your machine has a
 # hardware FPU and the kernel configuration includes the npx device
 # *and* a math emulator compiled into the kernel, the hardware FPU
 # will be used, unless it is found to be broken or unless "flags" to
 # npx0 includes "0x08", which requests preference for the emulator.
 device		npx
 hint.npx.0.at="nexus"
 hint.npx.0.port="0x0F0"
 hint.npx.0.flags="0x0"
 hint.npx.0.irq="13"
 
 #
 # `flags' for npx0:
 #	0x01	don't use the npx registers to optimize bcopy.
 #	0x02	don't use the npx registers to optimize bzero.
 #	0x04	don't use the npx registers to optimize copyin or copyout.
 #	0x08	use emulator even if hardware FPU is available.
 # The npx registers are normally used to optimize copying and zeroing when
 # all of the following conditions are satisfied:
 #	I586_CPU is an option
 #	the cpu is an i586 (perhaps not a Pentium)
 #	the probe for npx0 succeeds
 #	INT 16 exception handling works.
 # Then copying and zeroing using the npx registers is normally 30-100% faster.
 # The flags can be used to control cases where it doesn't work or is slower.
 # Setting them at boot time using userconfig works right (the optimizations
 # are not used until later in the bootstrap when npx0 is attached).
 # Flag 0x08 automatically disables the i586 optimized routines.
 #
 
 #
 # ACPI support using the Intel ACPI Component Architecture reference
 # implementation.
 #
 # ACPI_DEBUG enables the use of the debug.acpi.level and debug.acpi.layer
 # kernel environment variables to select initial debugging levels for the
 # Intel ACPICA code.  (Note that the Intel code must also have USE_DEBUGGER
 # defined when it is built).
 #
 device		acpica
 options		ACPI_DEBUG
 
 #
 # Optional devices:
 #
 
 #
 # SCSI host adapters:
 #
 # adv: All Narrow SCSI bus AdvanSys controllers.
 # adw: Second Generation AdvanSys controllers including the ADV940UW.
 # aha: Adaptec 154x/1535/1640
 # ahb: Adaptec 174x EISA controllers
 # ahc: Adaptec 274x/284x/2910/293x/294x/394x/3950x/3960x/398X/4944/
 #      19160x/29160x, aic7770/aic78xx
 # aic: Adaptec 6260/6360, APA-1460 (PC Card), NEC PC9801-100 (C-BUS)
 # amd: Support for the AMD 53C974 SCSI host adapter chip as found on devices
 #      such as the Tekram DC-390(T).
 # bt:  Most Buslogic controllers: including BT-445, BT-54x, BT-64x, BT-74x,
 #      BT-75x, BT-946, BT-948, BT-956, BT-958, SDC3211B, SDC3211F, SDC3222F
 # isp: Qlogic ISP 1020, 1040 and 1040B PCI SCSI host adapters,
 #      ISP 1240 Dual Ultra SCSI, ISP 1080 and 1280 (Dual) Ultra2,
 #      ISP 12160 Ultra3 SCSI,
 #      Qlogic ISP 2100 and ISP 2200 Fibre Channel host adapters.
 # ispfw: Firmware module for Qlogic host adapters
 # ncr: NCR 53C810, 53C825 self-contained SCSI host adapters.
 # ncv: NCR 53C500 based SCSI host adapters.
 # nsp: Workbit Ninja SCSI-3 based PC Card SCSI host adapters.
 # sym: Symbios/Logic 53C8XX family of PCI-SCSI I/O processors:
 #      53C810, 53C810A, 53C815, 53C825,  53C825A, 53C860, 53C875, 
 #      53C876, 53C885,  53C895, 53C895A, 53C896,  53C897, 53C1510D, 
 #      53C1010-33, 53C1010-66.
 # stg: TMC 18C30, 18C50 based SCSI host adapters.
 # wds: WD7000
 
 #
 # Note that the order is important in order for Buslogic ISA/EISA cards to be
 # probed correctly.
 #
 device		bt
 hint.bt.0.at="isa"
 hint.bt.0.port="0x330"
 device		adv
 hint.adv.0.at="isa"
 device		adw
 device		aha
 hint.aha.0.at="isa"
 device		aic
 hint.aic.0.at="isa"
 device		ahb
 device		ahc
 device		amd
 device		isp
 hint.isp.0.disable="1"
 hint.isp.0.role="3"
 hint.isp.0.prefer_iomap="1"
 hint.isp.0.prefer_memmap="1"
 hint.isp.0.fwload_disable="1"
 hint.isp.0.ignore_nvram="1"
 hint.isp.0.fullduplex="1"
 hint.isp.0.topology="lport"
 hint.isp.0.topology="nport"
 hint.isp.0.topology="lport-only"
 hint.isp.0.topology="nport-only"
 # we can't get u_int64_t types, nor can we get strings if it's got
 # a leading 0x, hence this silly dodge.
 hint.isp.0.portwnn="w50000000aaaa0000"
 hint.isp.0.nodewnn="w50000000aaaa0001"
 device		ispfw
 device		ncr
 device		ncv
 device		nsp
 device		sym
 device		stg
 hint.stg.0.at="isa"
 hint.stg.0.port="0x140"
 hint.stg.0.port="11"
 device		wds
 hint.wds.0.at="isa"
 hint.wds.0.port="0x350"
 hint.wds.0.irq="11"
 hint.wds.0.drq="6"
 
 # The aic7xxx driver will attempt to use memory mapped I/O for all PCI
 # controllers that have it configured only if this option is set. Unfortunately,
 # this doesn't work on some motherboards, which prevents it from being the
 # default.
 options 	AHC_ALLOW_MEMIO
 
 # Enable diagnostic sequencer code.
 options 	AHC_DEBUG_SEQUENCER
 
 # Dump the contents of the ahc controller configuration PROM.
 options 	AHC_DUMP_EEPROM
 
 # Bitmap of units to enable targetmode operations.
 options 	AHC_TMODE_ENABLE
 
 # The adw driver will attempt to use memory mapped I/O for all PCI
 # controllers that have it configured only if this option is set.
 options 	ADW_ALLOW_MEMIO
 
 # Options used in dev/isp/ (Qlogic SCSI/FC driver).
 #
 #	ISP_TARGET_MODE		-	enable target mode operation
 #
 #options 	ISP_TARGET_MODE=1
 
 # Options used in dev/sym/ (Symbios SCSI driver).
 #options 	SYM_SETUP_LP_PROBE_MAP	#-Low Priority Probe Map (bits)
 					# Allows the ncr to take precedence
 					# 1 (1<<0) -> 810a, 860
 					# 2 (1<<1) -> 825a, 875, 885, 895
 					# 4 (1<<2) -> 895a, 896, 1510d 
 #options 	SYM_SETUP_SCSI_DIFF	#-HVD support for 825a, 875, 885
 					# disabled:0 (default), enabled:1
 #options 	SYM_SETUP_PCI_PARITY	#-PCI parity checking
 					# disabled:0, enabled:1 (default)
 #options 	SYM_SETUP_MAX_LUN	#-Number of LUNs supported
 					# default:8, range:[1..64]
 
 # The 'asr' driver provides support for current DPT/Adaptec SCSI RAID
 # controllers (SmartRAID V and VI and later).
 # These controllers require the CAM infrastructure.
 #
 device		asr
 
 # The 'dpt' driver provides support for old DPT controllers (http://www.dpt.com/).
 # These have hardware RAID-{0,1,5} support, and do multi-initiator I/O.
 # The DPT controllers are commonly re-licensed under other brand-names -
 # some controllers by Olivetti, Dec, HP, AT&T, SNI, AST, Alphatronic, NEC and
 # Compaq are actually DPT controllers.
 #
 # See src/sys/dev/dpt for debugging and other subtle options.
 #   DPT_MEASURE_PERFORMANCE Enables a set of (semi)invasive metrics. Various
 #                           instruments are enabled.  The tools in
 #                           /usr/sbin/dpt_* assume these to be enabled.
 #   DPT_HANDLE_TIMEOUTS     Normally device timeouts are handled by the DPT.
 #                           If you ant the driver to handle timeouts, enable
 #                           this option.  If your system is very busy, this
 #                           option will create more trouble than solve.
 #   DPT_TIMEOUT_FACTOR      Used to compute the excessive amount of time to
 #                           wait when timing out with the above option.
 #  DPT_DEBUG_xxxx           These are controllable from sys/dev/dpt/dpt.h
 #  DPT_LOST_IRQ             When enabled, will try, once per second, to catch
 #                           any interrupt that got lost.  Seems to help in some
 #                           DPT-firmware/Motherboard combinations.  Minimal
 #                           cost, great benefit.
 #  DPT_RESET_HBA            Make "reset" actually reset the controller
 #                           instead of fudging it.  Only enable this if you
 #			    are 100% certain you need it.
 
 device		dpt
 
 # DPT options
 #!CAM# options 	DPT_MEASURE_PERFORMANCE
 #!CAM# options 	DPT_HANDLE_TIMEOUTS
 options 	DPT_TIMEOUT_FACTOR=4
 options 	DPT_LOST_IRQ
 options 	DPT_RESET_HBA
 options 	DPT_ALLOW_MEMIO
 
 #
 # Mylex AcceleRAID and eXtremeRAID controllers with v6 and later
 # firmware.  These controllers have a SCSI-like interface, and require
 # the CAM infrastructure.
 #
 device		mly
 
 #
 # Adaptec FSA RAID controllers, including integrated DELL controllers,
 # the Dell PERC 2/QC and the HP NetRAID-4M
 #
 # AAC_COMPAT_LINUX	Include code to support Linux-binary management
 #			utilities (requires Linux compatibility
 #			support).
 #
 device		aac
 
 #
 # Compaq Smart RAID, Mylex DAC960 and AMI MegaRAID controllers.  Only
 # one entry is needed; the code will find and configure all supported
 # controllers.
 #
 device		ida		# Compaq Smart RAID
 device		mlx		# Mylex DAC960
 device		amr		# AMI MegaRAID
 
 #
 # 3ware ATA RAID
 #
 device		twe		# 3ware ATA RAID
 
 #
 # The 'ATA' driver supports all ATA and ATAPI devices, including PC Card
 # devices. You only need one "device ata" for it to find all
 # PCI and PC Card ATA/ATAPI devices on modern machines.
 device		ata
 device		atadisk		# ATA disk drives
 device		atapicd		# ATAPI CDROM drives
 device		atapifd		# ATAPI floppy drives
 device		atapist		# ATAPI tape drives
 
 #
 # For older non-PCI, non-PnPBIOS systems, these are the hints lines to add:
 hint.ata.0.at="isa"
 hint.ata.0.port="0x1f0"
 hint.ata.0.irq="14"
 hint.ata.1.at="isa"
 hint.ata.1.port="0x170"
 hint.ata.1.irq="15"
 
 #
 # The following options are valid on the ATA driver:
 #
 # ATA_STATIC_ID:	controller numbering is static ie depends on location
 #			else the device numbers are dynamically allocated.
 
 options 	ATA_STATIC_ID
 
 #
 # Standard floppy disk controllers and floppy tapes, supports
 # the Y-E DATA External FDD (PC Card)
 #
 device		fdc
 hint.fdc.0.at="isa"
 hint.fdc.0.port="0x3F0"
 hint.fdc.0.irq="6"
 hint.fdc.0.drq="2"
 #
 # FDC_DEBUG enables floppy debugging.  Since the debug output is huge, you
 # gotta turn it actually on by setting the variable fd_debug with DDB,
 # however.
 options 	FDC_DEBUG
 #
 # Activate this line if you happen to have an Insight floppy tape.
 # Probing them proved to be dangerous for people with floppy disks only,
 # so it's "hidden" behind a flag:
 #hint.fdc.0.flags="1"
 
 # Specify floppy devices
 hint.fd.0.at="fdc0"
 hint.fd.0.drive="0"
 hint.fd.1.at="fdc0"
 hint.fd.1.drive="1"
 
 # M-systems DiskOnchip products see src/sys/contrib/dev/fla/README
 device		fla
 hint.fla.0.at="isa"
 
 #
 # Other standard PC hardware:
 #
 # mse: Logitech and ATI InPort bus mouse ports
 # sio: serial ports (see sio(4)), including support for various
 #      PC Card devices, such as Modem and NICs (see etc/defaults/pccard.conf)
 
 device		mse
 hint.mse.0.at="isa"
 hint.mse.0.port="0x23c"
 hint.mse.0.irq="5"
 
 device		sio
 hint.sio.0.at="isa"
 hint.sio.0.port="0x3F8"
 hint.sio.0.flags="0x10"
 hint.sio.0.irq="4"
 
 #
 # `flags' for serial drivers that support consoles (only for sio now):
 #	0x10	enable console support for this unit.  The other console flags
 #		are ignored unless this is set.  Enabling console support does
 #		not make the unit the preferred console - boot with -h or set
 #		the 0x20 flag for that.  Currently, at most one unit can have
 #		console support; the first one (in config file order) with
 #		this flag set is preferred.  Setting this flag for sio0 gives
 #		the old behaviour.
 #	0x20	force this unit to be the console (unless there is another
 #		higher priority console).  This replaces the COMCONSOLE option.
 #	0x40	reserve this unit for low level console operations.  Do not
 #		access the device in any normal way.
 #	0x80	use this port for serial line gdb support in ddb.
 #
 # PnP `flags' (set via userconfig using pnp x flags y)
 #	0x1	disable probing of this device.  Used to prevent your modem
 #		from being attached as a PnP modem.
 #
 
 # Options for serial drivers that support consoles (only for sio now):
 options 	BREAK_TO_DEBUGGER	#a BREAK on a comconsole goes to
 					#DDB, if available.
 options 	CONSPEED=115200		# speed for serial console
 					# (default 9600)
 
 # Solaris implements a new BREAK which is initiated by a character
 # sequence CR ~ ^b which is similar to a familiar pattern used on
 # Sun servers by the Remote Console.
 options 	ALT_BREAK_TO_DEBUGGER
 
 # Options for sio:
 options 	COM_ESP			#code for Hayes ESP
 options 	COM_MULTIPORT		#code for some cards with shared IRQs
 
 # Other flags for sio that aren't documented in the man page.
 #	0x20000	enable hardware RTS/CTS and larger FIFOs.  Only works for
 #		ST16650A-compatible UARTs.
 
 #
 # Network interfaces:
 #
 # MII bus support is required for some PCI 10/100 ethernet NICs,
 # namely those which use MII-compliant transceivers or implement
 # tranceiver control interfaces that operate like an MII. Adding
 # "device miibus0" to the kernel config pulls in support for
 # the generic miibus API and all of the PHY drivers, including a
 # generic one for PHYs that aren't specifically handled by an
 # individual driver.
 device		miibus
 
 # an:   Aironet 4500/4800 802.11 wireless adapters. Supports the PCMCIA,
 #       PCI and ISA varieties.
 # ar:   Arnet SYNC/570i hdlc sync 2/4 port V.35/X.21 serial driver
 #       (requires sppp)
 # awi:  Support for IEEE 802.11 PC Card devices using the AMD Am79C930 and
 #       Harris (Intersil) Chipset with PCnetMobile firmware by AMD.
 # cnw:  Xircom CNW/Netware Airsurfer PC Card adapter
 # cs:   IBM Etherjet and other Crystal Semi CS89x0-based adapters
 # cx:   Cronyx/Sigma multiport sync/async (with Cisco or PPP framing)
 # dc:   Support for PCI fast ethernet adapters based on the DEC/Intel 21143
 #       and various workalikes including:
 #       the ADMtek AL981 Comet and AN985 Centaur, the ASIX Electronics
 #       AX88140A and AX88141, the Davicom DM9100 and DM9102, the Lite-On
 #       82c168 and 82c169 PNIC, the Lite-On/Macronix LC82C115 PNIC II
 #       and the Macronix 98713/98713A/98715/98715A/98725 PMAC. This driver
 #       replaces the old al, ax, dm, pn and mx drivers.  List of brands:
 #       Digital DE500-BA, Kingston KNE100TX, D-Link DFE-570TX, SOHOware SFA110, 
 #       SVEC PN102-TX, CNet Pro110B, 120A, and 120B, Compex RL100-TX, 
 #       LinkSys LNE100TX, LNE100TX V2.0, Jaton XpressNet, Alfa Inc GFC2204,
 #       KNE110TX.
 # de:   Digital Equipment DC21040
 # ed:   Western Digital and SMC 80xx; Novell NE1000 and NE2000; 3Com 3C503
 #       HP PC Lan+, various PC Card devices (refer to etc/defauls/pccard.conf)
 # el:   3Com 3C501 (slow!)
 # ep:   3Com 3C509, 3C529, 3C556, 3C562D, 3C563D, 3C572, 3C574X, 3C579, 3C589
 #       and PC Card devices using these chipsets.
 # ex:   Intel EtherExpress Pro/10 and other i82595-based adapters,
 #       Olicom Ethernet PC Card devices.
 # fe:   Fujitsu MB86960A/MB86965A Ethernet
 # fea:  DEC DEFEA EISA FDDI adapter
 # fpa:  Support for the Digital DEFPA PCI FDDI. `device fddi' is also needed.
 # fxp:  Intel EtherExpress Pro/100B
 #	(hint of prefer_iomap can be done to prefer I/O instead of Mem mapping)
 # ie:   AT&T StarLAN 10 and EN100; 3Com 3C507; unknown NI5210;
 #       Intel EtherExpress
 # le:   Digital Equipment EtherWorks 2 and EtherWorks 3 (DEPCA, DE100,
 #       DE101, DE200, DE201, DE202, DE203, DE204, DE205, DE422)
 # lnc:  Lance/PCnet cards (Isolan, Novell NE2100, NE32-VL, AMD Am7990 and
 #       Am79C960)
 # lge:	Support for PCI gigabit ethernet adapters based on the Level 1
 #	LXT1001 NetCellerator chipset. This includes the D-Link DGE-500SX,
 #	SMC TigerCard 1000 (SMC9462SX), and some Addtron cards.
 # nge:	Support for PCI gigabit ethernet adapters based on the National
 #	Semiconductor DP83820 and DP83821 chipset. This includes the
 #	SMC EZ Card 1000 (SMC9462TX), D-Link DGE-500T, Asante FriendlyNet
 #	GigaNIX 1000TA and 1000TPC, the Addtron AEG320T, the LinkSys
 #	EG1032 and EG1064, the Surecom EP-320G-TX and the Netgear GA622T.
 # oltr: Olicom ISA token-ring adapters OC-3115, OC-3117, OC-3118 and OC-3133
 #       (no hints needed).
 #       Olicom PCI token-ring adapters OC-3136, OC-3137, OC-3139, OC-3140,
 #       OC-3141, OC-3540, OC-3250
 # rdp:  RealTek RTL 8002-based pocket ethernet adapters
 # pcn:	Support for PCI fast ethernet adapters based on the AMD Am79c97x
 #	chipsets, including the PCnet/FAST, PCnet/FAST+, PCnet/PRO and
 #	PCnet/Home. These were previously handled by the lnc driver (and
 #	still will be if you leave this driver out of the kernel).
 # rl:   Support for PCI fast ethernet adapters based on the RealTek 8129/8139
 #       chipset.  Note that the RealTek driver defaults to using programmed
 #       I/O to do register accesses because memory mapped mode seems to cause
 #       severe lockups on SMP hardware.  This driver also supports the
 #       Accton EN1207D `Cheetah' adapter, which uses a chip called
 #       the MPX 5030/5038, which is either a RealTek in disguise or a
 #       RealTek workalike.  Note that the D-Link DFE-530TX+ uses the RealTek
 #       chipset and is supported by this driver, not the 'vr' driver.
 # sf:   Support for Adaptec Duralink PCI fast ethernet adapters based on the
 #       Adaptec AIC-6915 "starfire" controller.
 #       This includes dual and quad port cards, as well as one 100baseFX card.
 #       Most of these are 64-bit PCI devices, except for one single port
 #       card which is 32-bit.
 # sis:  Support for NICs based on the Silicon Integrated Systems SiS 900,
 #       SiS 7016 and NS DP83815 PCI fast ethernet controller chips.
 # sk:   Support for the SysKonnect SK-984x series PCI gigabit ethernet NICs.
 #       This includes the SK-9841 and SK-9842 single port cards (single mode
 #       and multimode fiber) and the SK-9843 and SK-9844 dual port cards
 #       (also single mode and multimode).
 #       The driver will autodetect the number of ports on the card and
 #       attach each one as a separate network interface.
 # sn:   Support for ISA and PC Card Ethernet devices using the
 #       SMC91C90/92/94/95 chips.
 # sr:   RISCom/N2 hdlc sync 1/2 port V.35/X.21 serial driver (requires sppp)
 # ste:  Sundance Technologies ST201 PCI fast ethernet controller, includes
 #       the D-Link DFE-550TX.
 # ti:   Support for PCI gigabit ethernet NICs based on the Alteon Networks
 #       Tigon 1 and Tigon 2 chipsets.  This includes the Alteon AceNIC, the
 #       3Com 3c985, the Netgear GA620 and various others.  Note that you will
 #       probably want to bump up NMBCLUSTERS a lot to use this driver.
 # tl:   Support for the Texas Instruments TNETE100 series 'ThunderLAN'
 #       cards and integrated ethernet controllers.  This includes several
 #       Compaq Netelligent 10/100 cards and the built-in ethernet controllers
 #       in several Compaq Prosignia, Proliant and Deskpro systems.  It also
 #       supports several Olicom 10Mbps and 10/100 boards.
 # tx:   SMC 9432 TX, BTX and FTX cards. (SMC EtherPower II serie)
 # txp:	Support for 3Com 3cR990 cards with the "Typhoon" chipset
 # vr:   Support for various fast ethernet adapters based on the VIA
 #       Technologies VT3043 `Rhine I' and VT86C100A `Rhine II' chips,
 #       including the D-Link DFE530TX (see 'rl' for DFE530TX+), the Hawking 
 #       Technologies PN102TX, and the AOpen/Acer ALN-320.
 # vx:   3Com 3C590 and 3C595
 # wb:   Support for fast ethernet adapters based on the Winbond W89C840F chip.
 #       Note: this is not the same as the Winbond W89C940F, which is a
 #       NE2000 clone.
 # wl:   Lucent Wavelan (ISA card only).
 # wi:   Lucent WaveLAN/IEEE 802.11 PCMCIA adapters. Note: this supports both
 #       the PCMCIA and ISA cards: the ISA card is really a PCMCIA to ISA
 #       bridge with a PCMCIA adapter plugged into it.
 # wx:   Intel Gigabit Ethernet PCI card (`Wiseman')
 # xe:   Xircom/Intel EtherExpress Pro100/16 PC Card ethernet controller,
 #       Accton Fast EtherCard-16, Compaq Netelligent 10/100 PC Card,
 #       Toshiba 10/100 Ethernet PC Card, Xircom 16-bit Ethernet + Modem 56
 # xl:   Support for the 3Com 3c900, 3c905, 3c905B and 3c905C (Fast)
 #       Etherlink XL cards and integrated controllers.  This includes the
 #       integrated 3c905B-TX chips in certain Dell Optiplex and Dell
 #       Precision desktop machines and the integrated 3c905-TX chips
 #       in Dell Latitude laptop docking stations.
 #       Also supported: 3Com 3c980(C)-TX, 3Com 3cSOHO100-TX, 3Com 3c450-TX
 
 # Order for ISA/EISA devices is important here
 
 device		ar	1
 hint.ar.0.at="isa"
 hint.ar.0.port="0x300"
 hint.ar.0.irq="10"
 hint.ar.0.maddr="0xd0000"
 device		cs
 hint.cs.0.at="isa"
 hint.cs.0.port="0x300"
 device		cx	1
 hint.cx.0.at="isa"
 hint.cx.0.port="0x240"
 hint.cx.0.irq="15"
 hint.cx.0.drq="7"
 device		ed
 hint.ed.0.at="isa"
 hint.ed.0.port="0x280"
 hint.ed.0.irq="5"
 hint.ed.0.maddr="0xd8000"
 device		el	1
 hint.el.0.at="isa"
 hint.el.0.port="0x300"
 hint.el.0.irq="9"
 device		ep
 device		ex
 device		fe	1
 options 	FE_8BIT_SUPPORT		# LAC-98 support
 hint.fe.0.at="isa"
 hint.fe.0.port="0x300"
 device		fea
 device		ie	2
 hint.ie.0.at="isa"
 hint.ie.0.port="0x300"
 hint.ie.0.irq="5"
 hint.ie.0.maddr="0xd0000"
 hint.ie.1.at="isa"
 hint.ie.1.port="0x360"
 hint.ie.1.irq="7"
 hint.ie.1.maddr="0xd0000"
 device		le	1
 hint.le.0.at="isa"
 hint.le.0.port="0x300"
 hint.le.0.irq="5"
 hint.le.0.maddr="0xd0000"
 device		lnc	1
 hint.lnc.0.at="isa"
 hint.lnc.0.port="0x280"
 hint.lnc.0.irq="10"
 hint.lnc.0.drq="0"
 device		rdp	1
 hint.rdp.0.at="isa"
 hint.rdp.0.port="0x378"
 hint.rdp.0.irq="7"
 hint.rdp.0.flags="2"
 device		sr	1
 hint.sr.0.at="isa"
 hint.sr.0.port="0x300"
 hint.sr.0.irq="5"
 hint.sr.0.maddr="0xd0000"
 device		sn
 hint.sn.0.at="isa"
 hint.sn.0.port="0x300"
 hint.sn.0.irq="10"
 device		an
 device		awi
 device		cnw
 device		wi
 options 	WLCACHE		# enables the signal-strength cache
 options 	WLDEBUG		# enables verbose debugging output
 device		wl	1
 hint.wl.0.at="isa"
 hint.wl.0.port="0x300"
 device		xe
 
 device		oltr
 options 	OLTR_NO_BULLSEYE_MAC
 options 	OLTR_NO_HAWKEYE_MAC
 options 	OLTR_NO_TMS_MAC
 hint.oltr.0.at="isa"
 
 # PCI Ethernet NICs that use the common MII bus controller code.
 device		dc		# DEC/Intel 21143 and various workalikes
 device		fxp		# Intel EtherExpress PRO/100B (82557, 82558)
 hint.fxp.0.prefer_iomap="0"
 device		rl		# RealTek 8129/8139
 device		pcn		# AMD Am79C97x PCI 10/100 NICs
 device		sf		# Adaptec AIC-6915 (``Starfire'')
 device		sis		# Silicon Integrated Systems SiS 900/SiS 7016
 device		ste		# Sundance ST201 (D-Link DFE-550TX)
 device		tl		# Texas Instruments ThunderLAN
 device		tx		# SMC EtherPower II (83c170 ``EPIC'')
 device		vr		# VIA Rhine, Rhine II
 device		wb		# Winbond W89C840F
 device		xl		# 3Com 3c90x (``Boomerang'', ``Cyclone'')
 
 # PCI Ethernet NICs.
 device		de		# DEC/Intel DC21x4x (``Tulip'')
 device		vx		# 3Com 3c590, 3c595 (``Vortex'')
 
 # PCI Gigabit & FDDI NICs.
 device		lge
 device		nge
 device		sk
 device		ti
 device		wx
 device		fpa	1
 
 #
 # ATM related options (Cranor version)
 # (note: this driver cannot be used with the HARP ATM stack)
 #
 # The `en' device provides support for Efficient Networks (ENI)
 # ENI-155 PCI midway cards, and the Adaptec 155Mbps PCI ATM cards (ANA-59x0).
 #
 # atm device provides generic atm functions and is required for
 # atm devices.
 # NATM enables the netnatm protocol family that can be used to
 # bypass TCP/IP.
 #
 # the current driver supports only PVC operations (no atm-arp, no multicast).
 # for more details, please read the original documents at
 # http://www.ccrc.wustl.edu/pub/chuck/tech/bsdatm/bsdatm.html
 #
 device		atm
 device		en
 options 	NATM			#native ATM
 
 #
 # Audio drivers: `pcm', `sbc', `gusc', `pca'
 #
 # pcm: PCM audio through various sound cards.
 #
 # This has support for a large number of new audio cards, based on
 # CS423x, OPTi931, Yamaha OPL-SAx, and also for SB16, GusPnP.
 # For more information about this driver and supported cards,
 # see the pcm.4 man page.
 #
 # The flags of the device tells the device a bit more info about the
 # device that normally is obtained through the PnP interface.
 #	bit  2..0   secondary DMA channel;
 #	bit  4      set if the board uses two dma channels;
 #	bit 15..8   board type, overrides autodetection; leave it
 #		    zero if don't know what to put in (and you don't,
 #		    since this is unsupported at the moment...).
 #
 # This driver will use the new PnP code if it's available.
 #
 # pca: PCM audio through your PC speaker
 #
 # Supported cards include:
 # Creative SoundBlaster ISA PnP/non-PnP
 # Supports ESS and Avance ISA chips as well.
 # Gravis UltraSound ISA PnP/non-PnP
 # Crystal Semiconductor CS461x/428x PCI
 # Neomagic 256AV (ac97)
 # Most of the more common ISA/PnP sb/mss/ess compatable cards.
 
 device		pcm
 
 # For non-pnp sound cards with no bridge drivers only:
 hint.pcm.0.at="isa"
 hint.pcm.0.irq="10"
 hint.pcm.0.drq="1"
 hint.pcm.0.flags="0x0"
 
 # For PnP/PCI sound cards, no hints are required.
 
 #
 # midi: MIDI interfaces and synthesizers
 #
 
 device		midi
 
 # For non-pnp sound cards with no bridge drivers:
 hint.midi.0.at="isa"
 hint.midi.0.irq="5"
 hint.midi.0.flags="0x0"
 
 # For serial ports (this example configures port 2):
 # TODO: implement generic tty-midi interface so that we can use
 #	other uarts.
 hint.midi.0.at="isa"
 hint.midi.0.port="0x2F8"
 hint.midi.0.irq="3"
 
 #
 # seq: MIDI sequencer
 #
 
 device		seq
 
 # The bridge drivers for sound cards.  These can be separately configured
 # for providing services to the likes of new-midi.
 # When used with 'device pcm' they also provide pcm sound services.
 #
 # sbc:  Creative SoundBlaster ISA PnP/non-PnP
 #	Supports ESS and Avance ISA chips as well.
 # gusc: Gravis UltraSound ISA PnP/non-PnP
 # csa:  Crystal Semiconductor CS461x/428x PCI
 
 # For non-PnP cards:
 device		sbc
 hint.sbc.0.at="isa"
 hint.sbc.0.port="0x220"
 hint.sbc.0.irq="5"
 hint.sbc.0.drq="1"
 hint.sbc.0.flags="0x15"
 device		gusc
 hint.gusc.0.at="isa"
 hint.gusc.0.port="0x220"
 hint.gusc.0.irq="5"
 hint.gusc.0.drq="1"
 hint.gusc.0.flags="0x13"
 
 device		pca
 hint.pca.0.at="isa"
 hint.pca.0.port="0x040"
 
 #
 # Miscellaneous hardware:
 #
 # mcd: Mitsumi CD-ROM using proprietary (non-ATAPI) interface
 # scd: Sony CD-ROM using proprietary (non-ATAPI) interface
 # matcd: Matsushita/Panasonic CD-ROM using proprietary (non-ATAPI) interface
 # wt: Wangtek and Archive QIC-02/QIC-36 tape drives
 # ctx: Cortex-I frame grabber
 # apm: Laptop Advanced Power Management (experimental)
 # pmtimer: Timer device driver for power management events (APM or ACPI)
 # spigot: The Creative Labs Video Spigot video-acquisition board
 # meteor: Matrox Meteor video capture board
 # bktr: Brooktree bt848/848a/849a/878/879 video capture and TV Tuner board
 # cy: Cyclades serial driver
 # dgb: Digiboard PC/Xi and PC/Xe series driver (ALPHA QUALITY!)
 # digi: Digiboard driver
 # gp:  National Instruments AT-GPIB and AT-GPIB/TNT board, PCMCIA-GPIB
 # asc: GI1904-based hand scanners, e.g. the Trust Amiscan Grey
 # gsc: Genius GS-4500 hand scanner.
 # joy: joystick (including IO DATA PCJOY PC Card joystick)
 # The LOUTB option specifies a slower outb() for debugging purposes. 
 # rc: RISCom/8 multiport card
 # rp: Comtrol Rocketport(ISA) - single card
 # tw: TW-523 power line interface for use with X-10 home control products
 # si: Specialix SI/XIO 4-32 port terminal multiplexor
 # spic: Sony Programmable I/O controller (VAIO notebooks)
 # stl: Stallion EasyIO and EasyConnection 8/32 (cd1400 based)
 # stli: Stallion EasyConnection 8/64, ONboard, Brumby (intelligent)
 
 # Notes on APM
 #  The flags takes the following meaning for apm0:
 #    0x0020  Statclock is broken.
 #  If apm is omitted, some systems require sysctl -w kern.timecounter.method=1
 #  for correct timekeeping.
 
 # Notes on the spigot:
 #  The video spigot is at 0xad6.  This port address can not be changed.
 #  The irq values may only be 10, 11, or 15
 #  I/O memory is an 8kb region.  Possible values are:
 #    0a0000, 0a2000, ..., 0fffff, f00000, f02000, ..., ffffff
 #    The start address must be on an even boundary.
 #  Add the following option if you want to allow non-root users to be able
 #  to access the spigot.  This option is not secure because it allows users
 #  direct access to the I/O page.
 #  	options SPIGOT_UNSECURE
 
 # Notes on the Comtrol Rocketport driver:
 #
 # The exact values used for rp0 depend on how many boards you have
 # in the system.  The manufacturer's sample configs are listed as:
 #
 #               device  rp	# core driver support
 #
 #   Comtrol Rocketport ISA single card
 #		hints.rp.0.at="isa"
 #		hints.rp.0.port="0x280"
 #
 #   If instead you have two ISA cards, one installed at 0x100 and the
 #   second installed at 0x180, then you should add the following to
 #   your kernel probe hints:
 #		hints.rp.0.at="isa"
 #		hints.rp.0.port="0x100"
 #		hints.rp.1.at="isa"
 #		hints.rp.1.port="0x180"
 #
 #   For 4 ISA cards, it might be something like this:
 #		hints.rp.0.at="isa"
 #		hints.rp.0.port="0x180"
 #		hints.rp.1.at="isa"
 #		hints.rp.1.port="0x100"
 #		hints.rp.2.at="isa"
 #		hints.rp.2.port="0x340"
 #		hints.rp.3.at="isa"
 #		hints.rp.3.port="0x240"
 #
 #   And for PCI cards, you need no hints.
 
 # Notes on the Digiboard driver:
 #
 # The following flag values have special meanings in dgb:
 #	0x01 - alternate layout of pins
 #	0x02 - use the windowed PC/Xe in 64K mode
 
 # Notes on the Specialix SI/XIO driver:
 #  The host card is memory, not IO mapped.
 #  The Rev 1 host cards use a 64K chunk, on a 32K boundary.
 #  The Rev 2 host cards use a 32K chunk, on a 32K boundary.
 #  The cards can use an IRQ of 11, 12 or 15.
 
 # Notes on the Sony Programmable I/O controller
 #  This is a temporary driver that should someday be replaced by something
 #  that hooks into the ACPI layer. The device is hooked to the PIIX4's
 #  General Device 10 decoder, which means you have to fiddle with PCI
 #  registers to map it in, even though it is otherwise treated here as
 #  an ISA device. At the moment, the driver polls, although the device
 #  is capable of generating interrupts. It largely undocumented.
 #  The port location in the hint is where you WANT the device to be
 #  mapped. 0x10a0 seems to be traditional. At the moment the jogdial
 #  is the only thing truly supported, but aparently a fair percentage
 #  of the Vaio extra features are controlled by this device.
 
 # Notes on the Stallion stl and stli drivers:
 #  See src/i386/isa/README.stl for complete instructions.
 #  This is version 0.0.5alpha, unsupported by Stallion.
 #  The stl driver has a secondary IO port hard coded at 0x280.  You need
 #     to change src/i386/isa/stallion.c if you reconfigure this on the boards.
 #  The "flags" and "msize" settings on the stli driver depend on the board:
 #	EasyConnection 8/64 ISA:     flags 23         msize 0x1000
 #	EasyConnection 8/64 EISA:    flags 24         msize 0x10000
 #	EasyConnection 8/64 MCA:     flags 25         msize 0x1000
 #	ONboard ISA:                 flags 4          msize 0x10000
 #	ONboard EISA:                flags 7          msize 0x10000
 #	ONboard MCA:                 flags 3          msize 0x10000
 #	Brumby:                      flags 2          msize 0x4000
 #	Stallion:                    flags 1          msize 0x10000
 
 device		mcd	1
 hint.mcd.0.at="isa"
 hint.mcd.0.port="0x300"
 hint.mcd.0.irq="10"
 # for the Sony CDU31/33A CDROM
 device		scd	1
 hint.scd.0.at="isa"
 hint.scd.0.port="0x230"
 # for the SoundBlaster 16 multicd - up to 4 devices
 device		matcd	1
 hint.matcd.0.at="isa"
 hint.matcd.0.port="0x230"
 device		wt	1
 hint.wt.0.at="isa"
 hint.wt.0.port="0x300"
 hint.wt.0.irq="5"
 hint.wt.0.drq="1"
 device		ctx	1
 hint.ctx.0.at="isa"
 hint.ctx.0.port="0x230"
 hint.ctx.0.maddr="0xd0000"
 device		spigot	1
 hint.spigot.0.at="isa"
 hint.spigot.0.port="0xad6"
 hint.spigot.0.irq="15"
 hint.spigot.0.maddr="0xee000"
 device		apm
 hint.apm.0.flags="0x20"
 device		pmtimer			# Adjust system timer at wakeup time
 hint.pmtimer.0.at="isa"
 device		gp
 hint.gp.0.at="isa"
 hint.gp.0.port="0x2c0"
 device		gsc	1
 hint.gsc.0.at="isa"
 hint.gsc.0.port="0x270"
 hint.gsc.0.drq="3"
 device		joy			# PnP aware, hints for nonpnp only
 hint.joy.0.at="isa"
 hint.joy.0.port="0x201"
 device		cy	1
 options 	CY_PCI_FASTINTR		# Use with cy_pci unless irq is shared
 hint.cy.0.at="isa"
 hint.cy.0.irq="10"
 hint.cy.0.maddr="0xd4000"
 hint.cy.0.msize="0x2000"
 device		dgb	1
 options 	NDGBPORTS=16		# Defaults to 16*NDGB
 hint.dgb.0.at="isa"
 hint.dgb.0.port="0x220"
 hint.dgb.0.maddr="0xfc000"
 device		digi
 hint.digi.0.at="isa"
 hint.digi.0.port="0x104"
 hint.digi.0.maddr="0xd0000"
 # BIOS & FEP/OS components of device digi.  Normally left as modules
 device		digi_CX
 device		digi_CX_PCI
 device		digi_EPCX
 device		digi_EPCX_PCI
 device		digi_Xe
 device		digi_Xem
 device		digi_Xr
 device		rc	1
 hint.rc.0.at="isa"
 hint.rc.0.port="0x220"
 hint.rc.0.irq="12"
 device		rp
 hint.rp.0.at="isa"
 hint.rp.0.port="0x280"
 # the port and irq for tw0 are fictitious
 device		tw	1
 hint.tw.0.at="isa"
 hint.tw.0.port="0x380"
 hint.tw.0.irq="11"
 device		si
 options 	SI_DEBUG
 hint.si.0.at="isa"
 hint.si.0.maddr="0xd0000"
 hint.si.0.irq="12"
 device		asc	1
 hint.asc.0.at="isa"
 hint.asc.0.port="0x3EB"
 hint.asc.0.drq="3"
 hint.asc.0.irq="10"
 device		spic
 hint.spic.0.at="isa"
 hint.spic.0.port="0x10a0"
 device		stl
 hint.stl.0.at="isa"
 hint.stl.0.port="0x2a0"
 hint.stl.0.irq="10"
 device		stli
 hint.stli.0.at="isa"
 hint.stli.0.port="0x2a0"
 hint.stli.0.maddr="0xcc000"
 hint.stli.0.flags="23"
 hint.stli.0.msize="0x1000"
 # You are unlikely to have the hardware for loran <phk@FreeBSD.org>
 device		loran
 hint.loran.0.at="isa"
 hint.loran.0.irq="5"
 # HOT1 Xilinx 6200 card (http://www.vcc.com/)
 device		xrpu
 
 #
 # The `meteor' device is a PCI video capture board. It can also have the
 # following options:
 #   options METEOR_ALLOC_PAGES=xxx	preallocate kernel pages for data entry
 #	figure (ROWS*COLUMN*BYTES_PER_PIXEL*FRAME+PAGE_SIZE-1)/PAGE_SIZE
 #   options METEOR_DEALLOC_PAGES	remove all allocated pages on close(2)
 #   options METEOR_DEALLOC_ABOVE=xxx	remove all allocated pages above the
 #	specified amount. If this value is below the allocated amount no action
 #	taken
 #   options METEOR_SYSTEM_DEFAULT={METEOR_PAL|METEOR_NTSC|METEOR_SECAM}, used
 #	for initialization of fps routine when a signal is not present.
 #
 # The 'bktr' device is a PCI video capture device using the Brooktree
 # bt848/bt848a/bt849a/bt878/bt879 chipset. When used with a TV Tuner it forms a
 # TV card, eg Miro PC/TV, Hauppauge WinCast/TV WinTV, VideoLogic Captivator,
 # Intel Smart Video III, AverMedia, IMS Turbo, FlyVideo.
 #
 # options 	OVERRIDE_CARD=xxx
 # options 	OVERRIDE_TUNER=xxx
 # options 	OVERRIDE_MSP=1
 # options 	OVERRIDE_DBX=1
 # These options can be used to override the auto detection
 # The current values for xxx are found in src/sys/dev/bktr/bktr_card.h
 # Using sysctl(8) run-time overrides on a per-card basis can be made
 #
 # options 	BROOKTREE_SYSTEM_DEFAULT=BROOKTREE_PAL
 # or
 # options 	BROOKTREE_SYSTEM_DEFAULT=BROOKTREE_NTSC
 # Specifes the default video capture mode.
 # This is required for Dual Crystal (28&35Mhz) boards where PAL is used
 # to prevent hangs during initialisation.  eg VideoLogic Captivator PCI.
 #
 # options 	BKTR_USE_PLL
 # PAL or SECAM users who have a 28Mhz crystal (and no 35Mhz crystal)
 # must enable PLL mode with this option. eg some new Bt878 cards.
 #
 # options 	BKTR_GPIO_ACCESS
 # This enable IOCTLs which give user level access to the GPIO port.
 #
 # options 	BKTR_NO_MSP_RESET
 # Prevents the MSP34xx reset. Good if you initialise the MSP in another OS first
 #
 # options 	BKTR_430_FX_MODE
 # Switch Bt878/879 cards into Intel 430FX chipset compatibility mode.
 #
 # options 	BKTR_SIS_VIA_MODE
 # Switch Bt878/879 cards into SIS/VIA chipset compatibility mode which is
 # needed for some old SiS and VIA chipset motherboards.
 # This also allows Bt878/879 chips to work on old OPTi (<1997) chipset
 # motherboards and motherboards with bad or incomplete PCI 2.1 support.
 # As a rough guess, old = before 1998
 #
 
 device		meteor	1
 
 # Brooktree driver has been ported to the new I2C framework. Thus,
 # you'll need to have the following 3 lines in the kernel config.
 #     device smbus
 #     device iicbus
 #     device iicbb
 # The iic and smb devices are only needed if you want to control other
 # I2C slaves connected to the external connector of some cards.
 #
 device		bktr	1
 
 #
 # PC Card/PCMCIA
 # (OLDCARD)
 #
 # card: pccard slots
 # pcic: isa/pccard bridge
 device		pcic
 hint.pcic.0.at="isa"
 hint.pcic.1.at="isa"
 device		card
 
 #
 # PC Card/PCMCIA and Cardbus
 # (NEWCARD)
 #
 # Note that NEWCARD and OLDCARD are incompatible.  Do not use both at the same
 # time.
 #
 # pccbb: isa/pccard and pci/cardbus bridge
 # pccard: pccard slots
 # cardbus: cardbus slots
 #device		pccbb
 #device		pccard
 #device		cardbus
 
 # You may need to reset all pccards after resuming
 options 	PCIC_RESUME_RESET	# reset after resume
 
 #
 # Laptop/Notebook options:
 #
 # See also:
 #  apm under `Miscellaneous hardware'
 # above.
 
 # For older notebooks that signal a powerfail condition (external
 # power supply dropped, or battery state low) by issuing an NMI:
 
 options 	POWERFAIL_NMI	# make it beep instead of panicing
 
 #
 # SMB bus
 #
 # System Management Bus support is provided by the 'smbus' device.
 # Access to the SMBus device is via the 'smb' device (/dev/smb*),
 # which is a child of the 'smbus' device.
 #
 # Supported devices:
 # smb		standard io through /dev/smb*
 #
 # Supported SMB interfaces:
 # iicsmb	I2C to SMB bridge with any iicbus interface
 # bktr		brooktree848 I2C hardware interface
 # intpm		Intel PIIX4 Power Management Unit
 # alpm		Acer Aladdin-IV/V/Pro2 Power Management Unit
 # ichsmb	Intel ICH SMBus controller chips (82801AA, 82801AB, 82801BA)
 #
 device		smbus		# Bus support, required for smb below.
 
 device		intpm
 device		alpm
 device		ichsmb
 
 device		smb
 
 #
 # I2C Bus
 #
 # Philips i2c bus support is provided by the `iicbus' device.
 #
 # Supported devices:
 # ic	i2c network interface
 # iic	i2c standard io
 # iicsmb i2c to smb bridge. Allow i2c i/o with smb commands.
 #
 # Supported interfaces:
 # pcf	Philips PCF8584 ISA-bus controller
 # bktr	brooktree848 I2C software interface
 #
 # Other:
 # iicbb	generic I2C bit-banging code (needed by lpbb, bktr)
 #
 device		iicbus		# Bus support, required for ic/iic/iicsmb below.
 device		iicbb
 
 device		ic
 device		iic
 device		iicsmb		# smb over i2c bridge
 
 device		pcf
 hint.pcf.0.at="isa"
 hint.pcf.0.port="0x320"
 hint.pcf.0.irq="5"
 
 #---------------------------------------------------------------------------
 # ISDN4BSD
 #
 # See /usr/share/examples/isdn/ROADMAP for an introduction to isdn4bsd.
 #
 # i4b passive ISDN cards support contains the following hardware drivers:
 #
 #	isic  - Siemens/Infineon ISDN ISAC/HSCX/IPAC chipset driver
 #	iwic  - Winbond W6692 PCI bus ISDN S/T interface controller
 #	ifpi  - AVM Fritz!Card PCI driver
 #	ihfc  - Cologne Chip HFC ISA/ISA-PnP chipset driver
 #	ifpnp - AVM Fritz!Card PnP driver 
 #	itjc  - Siemens ISAC / TJNet Tiger300/320 chipset
 #
 # i4b active ISDN cards support contains the following hardware drivers:
 #
 #	iavc  - AVM B1 PCI, AVM B1 ISA, AVM T1
 #
 # Note that the ``options'' (if given) and ``device'' lines must BOTH
 # be uncommented to enable support for a given card !
 #
 # In addition to a hardware driver (and probably an option) the mandatory
 # ISDN protocol stack devices and the mandatory support device must be 
 # enabled as well as one or more devices from the optional devices section.
 #
 #---------------------------------------------------------------------------
 #	isic driver (Siemens/Infineon chipsets)
 #
 device	isic
 #
 # ISA bus non-PnP Cards:
 # ----------------------
 #
 # Teles S0/8 or Niccy 1008
 options 	TEL_S0_8
 hint.isic.0.at="isa"
 hint.isic.0.maddr="0xd0000"
 hint.isic.0.irq="5"
 hint.isic.0.flags="1"
 #
 # Teles S0/16 or Creatix ISDN-S0 or Niccy 1016
 options 	TEL_S0_16
 hint.isic.0.at="isa"
 hint.isic.0.port="0xd80"
 hint.isic.0.maddr="0xd0000"
 hint.isic.0.irq="5"
 hint.isic.0.flags="2"
 #
 # Teles S0/16.3
 options 	TEL_S0_16_3
 hint.isic.0.at="isa"
 hint.isic.0.port="0xd80"
 hint.isic.0.irq="5"
 hint.isic.0.flags="3"
 #
 # AVM A1 or AVM Fritz!Card
 options 	AVM_A1
 hint.isic.0.at="isa"
 hint.isic.0.port="0x340"
 hint.isic.0.irq="5"
 hint.isic.0.flags="4"
 #
 # USRobotics Sportster ISDN TA intern
 options 	USR_STI
 hint.isic.0.at="isa"
 hint.isic.0.port="0x268"
 hint.isic.0.irq="5"
 hint.isic.0.flags="7"
 #
 # ITK ix1 Micro ( < V.3, non-PnP version )
 options 	ITKIX1
 hint.isic.0.at="isa"
 hint.isic.0.port="0x398"
 hint.isic.0.irq="10"
 hint.isic.0.flags="18"
 #
 # ELSA PCC-16
 options 	ELSA_PCC16
 hint.isic.0.at="isa"
 hint.isic.0.port="0x360"
 hint.isic.0.irq="10"
 hint.isic.0.flags="20"
 #
 # ISA bus PnP Cards:
 # ------------------
 #
 # Teles S0/16.3 PnP
 options 	TEL_S0_16_3_P
 #
 # Creatix ISDN-S0 P&P
 options 	CRTX_S0_P
 #
 # Dr. Neuhaus Niccy Go@
 options 	DRN_NGO
 #
 # Sedlbauer Win Speed
 options 	SEDLBAUER
 #
 # Dynalink IS64PH
 options 	DYNALINK 
 #
 # ELSA QuickStep 1000pro ISA
 options 	ELSA_QS1ISA
 #
 # Siemens I-Surf 2.0
 options 	SIEMENS_ISURF2
 #
 # Asuscom ISDNlink 128K ISA
 options 	ASUSCOM_IPAC
 #
 # Eicon Diehl DIVA 2.0 and 2.02
 options       EICON_DIVA
 #
 # PCI bus Cards:
 # --------------
 #
 # ELSA MicroLink ISDN/PCI (same as ELSA QuickStep 1000pro PCI)
 options 	ELSA_QS1PCI
 #
 #
 #---------------------------------------------------------------------------
 #	ifpnp driver for AVM Fritz!Card PnP
 #
 # AVM Fritz!Card PnP
 device ifpnp
 #
 #---------------------------------------------------------------------------
 #	ihfc driver for Cologne Chip ISA chipsets (experimental!)
 #
 # Teles 16.3c ISA PnP
 # AcerISDN P10 ISA PnP
 # TELEINT ISDN SPEED No.1
 device ihfc
 #
 #---------------------------------------------------------------------------
 #	ifpi driver for AVM Fritz!Card PCI
 #
 # AVM Fritz!Card PCI
 device  ifpi
 #
 #---------------------------------------------------------------------------
 #	iwic driver for Winbond W6692 chipset
 #
 # ASUSCOM P-IN100-ST-D (and other Winbond W6692 based cards)
 device  iwic
 #
 #---------------------------------------------------------------------------
 #	itjc driver for Simens ISAC / TJNet Tiger300/320 chipset
 #
 # Traverse Technologies NETjet-S
 # Teles PCI-TJ
 device  itjc
 #
 #---------------------------------------------------------------------------
 #	iavc driver (AVM active cards, needs i4bcapi driver!)
 #
 device	iavc
 #
 # AVM B1 ISA bus (PnP mode not supported!)
 # ----------------------------------------
 hint.iavc.0.at="isa"
 hint.iavc.0.port="0x150"
 hint.iavc.0.irq="5"
 #
 #---------------------------------------------------------------------------
 #	ISDN Protocol Stack - mandatory for all hardware drivers
 #
 # Q.921 / layer 2 - i4b passive cards D channel handling
 device		"i4bq921"
 #
 # Q.931 / layer 3 - i4b passive cards D channel handling
 device		"i4bq931"
 #
 # layer 4 - i4b common passive and active card handling
 device		"i4b"
 #
 #---------------------------------------------------------------------------
 #	ISDN devices - mandatory for all hardware drivers
 #
 # userland driver to do ISDN tracing (for passive cards only)
 device		"i4btrc"	4
 #
 # userland driver to control the whole thing
 device		"i4bctl"
 #
 #---------------------------------------------------------------------------
 #	ISDN devices - optional
 #
 # userland driver for access to raw B channel
 device		"i4brbch"	4
 #
 # userland driver for telephony
 device		"i4btel"	2
 #
 # network driver for IP over raw HDLC ISDN
 device		"i4bipr"	4
 # enable VJ header compression detection for ipr i/f
 options 	IPR_VJ
 # enable logging of the first n IP packets to isdnd (n=32 here)
 options 	IPR_LOG=32
 #
 # network driver for sync PPP over ISDN; requires an equivalent
 # number of sppp device to be configured
 device		"i4bisppp"	4
 #
 # B-channel interface to the netgraph subsystem
 device		"i4bing"	2
 #
 # CAPI driver needed for active ISDN cards (see iavc driver above)
 device		"i4bcapi"
 #
 #---------------------------------------------------------------------------
 
 # Parallel-Port Bus
 #
 # Parallel port bus support is provided by the `ppbus' device.
 # Multiple devices may be attached to the parallel port, devices
 # are automatically probed and attached when found.
 #
 # Supported devices:
 # vpo	Iomega Zip Drive
 #	Requires SCSI disk support ('scbus' and 'da'), best
 #	performance is achieved with ports in EPP 1.9 mode.
 # lpt	Parallel Printer
 # plip	Parallel network interface
 # ppi	General-purpose I/O ("Geek Port") + IEEE1284 I/O
 # pps	Pulse per second Timing Interface
 # lpbb	Philips official parallel port I2C bit-banging interface
 #
 # Supported interfaces:
 # ppc	ISA-bus parallel port interfaces.
 #
 
 options		PPC_PROBE_CHIPSET # Enable chipset specific detection
 				  # (see flags in ppc(4))
 options 	DEBUG_1284	# IEEE1284 signaling protocol debug
 options 	PERIPH_1284	# Makes your computer act as a IEEE1284
 				# compliant peripheral
 options 	DONTPROBE_1284	# Avoid boot detection of PnP parallel devices
 options 	VP0_DEBUG	# ZIP/ZIP+ debug
 options 	LPT_DEBUG	# Printer driver debug
 options 	PPC_DEBUG	# Parallel chipset level debug
 options 	PLIP_DEBUG	# Parallel network IP interface debug
 options		PCFCLOCK_VERBOSE         # Verbose pcfclock driver
 options		PCFCLOCK_MAX_RETRIES=5   # Maximum read tries (default 10)
 
 device		ppc
 hint.ppc.0.at="isa"
 hint.ppc.0.irq="7"
 device		ppbus
 device		vpo
 device		lpt
 device		plip
 device		ppi
 device		pps
 device		lpbb
 device		pcfclock
 
 # Kernel BOOTP support
 
 options 	BOOTP		# Use BOOTP to obtain IP address/hostname
 options 	BOOTP_NFSROOT	# NFS mount root filesystem using BOOTP info
 options 	BOOTP_NFSV3	# Use NFS v3 to NFS mount root
 options 	BOOTP_COMPAT	# Workaround for broken bootp daemons.
 options 	BOOTP_WIRED_TO=fxp0 # Use interface fxp0 for BOOTP
 
 #
 # Add tie-ins for a hardware watchdog.  This only enable the hooks;
 # the user must still supply the actual driver.
 #
 options 	HW_WDOG
 
 #
 # Set the number of PV entries per process.  Increasing this can
 # stop panics related to heavy use of shared memory. However, that can
 # (combined with large amounts of physical memory) cause panics at
 # boot time due the kernel running out of VM space.
 #
 # If you're tweaking this, you might also want to increase the sysctls
 # "vm.v_free_min", "vm.v_free_reserved", and "vm.v_free_target".
 #
 # The value below is the one more than the default.
 #
 options 	PMAP_SHPGPERPROC=201
 
 #
 # Disable swapping. This option removes all code which actually performs
 # swapping, so it's not possible to turn it back on at run-time.
 #
 # This is sometimes usable for systems which don't have any swap space
 # (see also sysctls "vm.defer_swapspace_pageouts" and
 # "vm.disable_swapspace_pageouts")
 #
 #options 	NO_SWAPPING
 
 # Set the number of sf_bufs to allocate. sf_bufs are virtual buffers
 # for sendfile(2) that are used to map file VM pages, and normally
 # default to a quantity that is roughly 16*MAXUSERS+512. You would
 # typically want about 4 of these for each simultaneous file send.
 #
 options 	NSFBUFS=1024
 
 #
 # Enable extra debugging code for locks.  This stores the filename and
 # line of whatever acquired the lock in the lock itself, and change a
 # number of function calls to pass around the relevant data.  This is
 # not at all useful unless you are debugging lock code.  Also note
 # that it is likely to break e.g. fstat(1) unless you recompile your
 # userland with -DDEBUG_LOCKS as well.
 #
 options 	DEBUG_LOCKS
 
 
 #####################################################################
 # ABI Emulation
 
 # Enable iBCS2 runtime support for SCO and ISC binaries
 options 	IBCS2
 
 # Emulate spx device for client side of SVR3 local X interface
 options 	SPX_HACK
 
 # Enable Linux ABI emulation
 options 	COMPAT_LINUX
 
 # Enable the linux-like proc filesystem support (requires COMPAT_LINUX
 # and PSEUDOFS)
 options 	LINPROCFS
 
 # Linux debugging
 options 	DEBUG_LINUX
 
 #
 # SysVR4 ABI emulation
 #
 # The svr4 ABI emulator can be statically compiled into the kernel or loaded as
 # a KLD module.  
 # The STREAMS network emulation code can also be compiled statically or as a 
 # module.  If loaded as a module, it must be loaded before the svr4 module
 # (the /usr/sbin/svr4 script does this for you).  If compiling statically,
 # the `streams' device must be configured into any kernel which also
 # specifies COMPAT_SVR4.  It is possible to have a statically-configured 
 # STREAMS device and a dynamically loadable svr4 emulator;  the /usr/sbin/svr4
 # script understands that it doesn't need to load the `streams' module under
 # those circumstances.
 # Caveat:  At this time, `options KTRACE' is required for the svr4 emulator
 # (whether static or dynamic).  
 # 
 options		COMPAT_SVR4	# build emulator statically
 options		DEBUG_SVR4	# enable verbose debugging
 device		streams		# STREAMS network driver (required for svr4).
 
 
 #####################################################################
 # USB support
 # UHCI controller
 device		uhci
 # OHCI controller
 device		ohci
 # General USB code (mandatory for USB)
 device		usb
 #
 # USB Double Bulk Pipe devices
 device		udbp
 # Generic USB device driver
 device		ugen
 # Human Interface Device (anything with buttons and dials)
 device		uhid
 # USB keyboard
 device		ukbd
 # USB printer
 device		ulpt
 # USB Iomega Zip 100 Drive (Requires scbus and da)
 device		umass
 # USB modem support
 device		umodem
 # USB mouse
 device		ums
 # Diamond Rio 500 Mp3 player
 device		urio
 # USB scanners
 device		uscanner
 #
 # ADMtek USB ethernet. Supports the LinkSys USB100TX,
 # the Billionton USB100, the Melco LU-ATX, the D-Link DSB-650TX
 # and the SMC 2202USB. Also works with the ADMtek AN986 Pegasus
 # eval board.
 device		aue
 #
 # CATC USB-EL1201A USB ethernet. Supports the CATC Netmate
 # and Netmate II, and the Belkin F5U111.
 device		cue
 #
 # Kawasaki LSI ethernet. Supports the LinkSys USB10T,
 # Entrega USB-NET-E45, Peracom Ethernet Adapter, the
 # 3Com 3c19250, the ADS Technologies USB-10BT, the ATen UC10T,
 # the Netgear EA101, the D-Link DSB-650, the SMC 2102USB
 # and 2104USB, and the Corega USB-T.
 device		kue
 
 # debugging options for the USB subsystem
 #
 options 	UHCI_DEBUG
 options 	OHCI_DEBUG
 options 	USB_DEBUG
 
 options 	UGEN_DEBUG
 options 	UHID_DEBUG
 options 	UHUB_DEBUG
 options 	UKBD_DEBUG
 options 	ULPT_DEBUG
 options 	UMASS_DEBUG
 options 	UMS_DEBUG
 options 	URIO_DEBUG
 
 # options for ukbd:
 options 	UKBD_DFLT_KEYMAP	# specify the built-in keymap
 makeoptions	UKBD_DFLT_KEYMAP=it.iso
 
 #
 # Embedded system options:
 #
 # An embedded system might want to run something other than init.
 options 	INIT_PATH="/sbin/init:/stand/sysinstall"
 
 # Debug options
 options 	BUS_DEBUG	# enable newbus debugging
 options 	DEBUG_VFS_LOCKS	# enable vfs lock debugging
 options 	NPX_DEBUG	# enable npx debugging (FPU/math emu)
 
 #####################################################################
 # SYSV IPC KERNEL PARAMETERS
 #
 # Maximum number of entries in a semaphore map.
 options 	SEMMAP=31
 
 # Maximum number of System V semaphores that can be used on the system at
 # one time. 
 options 	SEMMNI=11
 
 # Total number of semaphores system wide
 options 	SEMMNS=61
 
 # Total number of undo structures in system
 options 	SEMMNU=31
 
 # Maximum number of System V semaphores that can be used by a single process
 # at one time. 
 options 	SEMMSL=61
 
 # Maximum number of operations that can be outstanding on a single System V
 # semaphore at one time. 
 options 	SEMOPM=101
 
 # Maximum number of undo operations that can be outstanding on a single
 # System V semaphore at one time. 
 options 	SEMUME=11
 
 # Maximum number of shared memory pages system wide.
 options 	SHMALL=1025
 
 # Maximum size, in bytes, of a single System V shared memory region. 
 options 	SHMMAX="(SHMMAXPGS*PAGE_SIZE+1)"
 options 	SHMMAXPGS=1025
 
 # Minimum size, in bytes, of a single System V shared memory region. 
 options 	SHMMIN=2
 
 # Maximum number of shared memory regions that can be used on the system
 # at one time. 
 options 	SHMMNI=33
 
 # Maximum number of System V shared memory regions that can be attached to
 # a single process at one time. 
 options 	SHMSEG=9
 
 # Set the amount of time (in seconds) the system will wait before
 # rebooting automatically when a kernel panic occurs.  If set to (-1),
 # the system will wait indefinitely until a key is pressed on the
 # console.
 options 	PANIC_REBOOT_WAIT_TIME=16
 
 #####################################################################
 
 # More undocumented options for linting.
 # Note that documenting these are not considered an affront.
 
 options 	CAM_DEBUG_DELAY
 
 # VFS cluster debugging.
 options 	CLUSTERDEBUG
 
 # Eliminate unneeded cache flush instruction(s).
 options 	CPU_UPGRADE_HW_CACHE
 
 options 	DEBUG
 
 # PECOFF module (Win32 Execution Format)
 options		PECOFF_SUPPORT
 options		PECOFF_DEBUG
 
 # Disable the 4 MByte PSE CPU feature.
 #options 	DISABLE_PSE
 
 options 	ENABLE_ALART
 options 	I4B_SMP_WORKAROUND
 options 	I586_PMC_GUPROF=0x70000
 options 	KBDIO_DEBUG=2
 options 	KBD_MAXRETRY=4
 options 	KBD_MAXWAIT=6
 options 	KBD_RESETDELAY=201
 
 # Enable the PF_KEY Key Management API.
 options 	KEY
 
 # Kernel filelock debugging.
 options 	LOCKF_DEBUG
 
 # System V compatible message queues
 # Please note that the values provided here are used to test kernel
 # building.  The defaults in the sources provide almost the same numbers.
 # MSGSSZ must be a power of 2 between 8 and 1024.
 options 	MSGMNB=2049	# Max number of chars in queue
 options 	MSGMNI=41	# Max number of message queue identifiers
 options 	MSGSEG=2049	# Max number of message segments
 options 	MSGSSZ=16	# Size of a message segment
 options 	MSGTQL=41	# Max number of messages in system
 
 options 	NBUF=512	# Number of buffer headers
 
 options 	NMBCLUSTERS=1024	# Number of mbuf clusters
 
 options 	PSM_DEBUG=1
 
 options 	SCSI_NCR_DEBUG
 options 	SCSI_NCR_MAX_SYNC=10000
 options 	SCSI_NCR_MAX_WIDE=1
 options 	SCSI_NCR_MYADDR=7
 
 options 	SC_DEBUG_LEVEL=5	# Syscons debug level
 options 	SC_RENDER_DEBUG	# syscons rendering debugging
 
 options 	SHOW_BUSYBUFS	# List buffers that prevent root unmount
 options 	SIMPLELOCK_DEBUG
 options 	SLIP_IFF_OPTS
 options 	TIMER_FREQ="((14318182+6)/12)"
 options 	VFS_BIO_DEBUG	# VFS buffer I/O debugging
 
 options 	VM_KMEM_SIZE
 options 	VM_KMEM_SIZE_MAX
 options 	VM_KMEM_SIZE_SCALE
Index: head/sys/conf/options.i386
===================================================================
--- head/sys/conf/options.i386	(revision 82308)
+++ head/sys/conf/options.i386	(revision 82309)
@@ -1,213 +1,214 @@
 # $FreeBSD$
 # Options specific to the i386 platform kernels
 
 DISABLE_PSE
 IDE_DELAY
 MATH_EMULATE		opt_math_emulate.h
 GPL_MATH_EMULATE	opt_math_emulate.h
 PMAP_SHPGPERPROC	opt_pmap.h
 PPC_PROBE_CHIPSET	opt_ppc.h
 PPC_DEBUG		opt_ppc.h
 SHOW_BUSYBUFS
 PANIC_REBOOT_WAIT_TIME	opt_panic.h
 MAXMEM
 PERFMON			opt_perfmon.h
 POWERFAIL_NMI		opt_trap.h
 AUTO_EOI_1		opt_auto_eoi.h
 AUTO_EOI_2		opt_auto_eoi.h
 CONSPEED		opt_comconsole.h
 I586_PMC_GUPROF		opt_i586_guprof.h
 WLCACHE			opt_wavelan.h
 WLDEBUG			opt_wavelan.h
 COMPAT_OLDISA
 BROKEN_KEYBOARD_RESET	opt_reset.h
+UPAGES
 
 # Options for emulators.  These should only be used at config time, so
 # they are handled like options for static file systems
 # (see src/sys/conf/options), except for broken debugging options.
 IBCS2			opt_dontuse.h
 COMPAT_LINUX		opt_dontuse.h
 DEBUG_LINUX		opt_linux.h
 COMPAT_SVR4		opt_dontuse.h
 DEBUG_SVR4		opt_svr4.h
 PECOFF_SUPPORT		opt_dontuse.h
 PECOFF_DEBUG		opt_pecoff.h
 # i386 SMP options
 APIC_IO			opt_global.h
 
 CLK_CALIBRATION_LOOP		opt_clock.h
 CLK_USE_I8254_CALIBRATION	opt_clock.h
 CLK_USE_TSC_CALIBRATION		opt_clock.h
 TIMER_FREQ			opt_clock.h
 
 NO_F00F_HACK			opt_cpu.h
 CPU_BLUELIGHTNING_FPU_OP_CACHE	opt_cpu.h
 CPU_BLUELIGHTNING_3X		opt_cpu.h
 CPU_BTB_EN			opt_cpu.h
 CPU_CYRIX_NO_LOCK		opt_cpu.h
 CPU_DIRECT_MAPPED_CACHE		opt_cpu.h
 CPU_DISABLE_5X86_LSSER		opt_cpu.h
 CPU_FASTER_5X86_FPU		opt_cpu.h
 CPU_I486_ON_386			opt_cpu.h
 CPU_IORT			opt_cpu.h
 CPU_L2_LATENCY			opt_cpu.h
 CPU_LOOP_EN			opt_cpu.h
 CPU_PPRO2CELERON		opt_cpu.h
 CPU_RSTK_EN			opt_cpu.h
 CPU_SUSP_HLT			opt_cpu.h
 CPU_UPGRADE_HW_CACHE		opt_cpu.h
 CPU_WT_ALLOC			opt_cpu.h
 CYRIX_CACHE_WORKS		opt_cpu.h
 CYRIX_CACHE_REALLY_WORKS	opt_cpu.h
 NO_MEMORY_HOLE			opt_cpu.h
 CPU_ENABLE_SSE			opt_cpu.h
 
 # The CPU type affects the endian conversion functions all over the kernel.
 I386_CPU		opt_global.h
 I486_CPU		opt_global.h
 I586_CPU		opt_global.h
 I686_CPU		opt_global.h
 
 MAXCONS			opt_syscons.h
 SC_ALT_MOUSE_IMAGE	opt_syscons.h
 SC_DEBUG_LEVEL		opt_syscons.h
 SC_DFLT_FONT		opt_syscons.h
 SC_DISABLE_DDBKEY	opt_syscons.h
 SC_DISABLE_REBOOT	opt_syscons.h
 SC_HISTORY_SIZE		opt_syscons.h
 SC_KERNEL_CONS_ATTR	opt_syscons.h
 SC_KERNEL_CONS_REV_ATTR	opt_syscons.h
 SC_MOUSE_CHAR		opt_syscons.h
 SC_NO_CUTPASTE		opt_syscons.h
 SC_NO_FONT_LOADING	opt_syscons.h
 SC_NO_HISTORY		opt_syscons.h
 SC_NO_SYSMOUSE		opt_syscons.h
 SC_NORM_ATTR		opt_syscons.h
 SC_NORM_REV_ATTR	opt_syscons.h
 SC_PIXEL_MODE		opt_syscons.h
 SC_RENDER_DEBUG		opt_syscons.h
 SC_TWOBUTTON_MOUSE	opt_syscons.h
 
 VGA_ALT_SEQACCESS	opt_vga.h
 VGA_DEBUG		opt_vga.h
 VGA_NO_FONT_LOADING	opt_vga.h
 VGA_NO_MODE_CHANGE	opt_vga.h
 VGA_SLOW_IOACCESS	opt_vga.h
 VGA_WIDTH90		opt_vga.h
 
 VESA			opt_vesa.h
 VESA_DEBUG		opt_vesa.h
 
 PSM_HOOKRESUME		opt_psm.h
 PSM_RESETAFTERSUSPEND	opt_psm.h
 PSM_DEBUG		opt_psm.h
 
 PCIC_RESUME_RESET	opt_pcic.h
 
 ATKBD_DFLT_KEYMAP	opt_atkbd.h
 
 KBD_DISABLE_KEYMAP_LOAD	opt_kbd.h
 KBD_INSTALL_CDEV	opt_kbd.h
 KBD_MAXRETRY		opt_kbd.h
 KBD_MAXWAIT		opt_kbd.h
 KBD_RESETDELAY		opt_kbd.h
 KBDIO_DEBUG		opt_kbd.h
 
 #USERCONFIG		opt_userconfig.h
 #VISUAL_USERCONFIG	opt_userconfig.h
 #INTRO_USERCONFIG	opt_userconfig.h
 #DEV_EISA		opt_userconfig.h
 
 EISA_SLOTS		opt_eisa.h
 
 FE_8BIT_SUPPORT		opt_fe.h
 
 # pcvt(4) has a bunch of options
 FAT_CURSOR		opt_pcvt.h
 XSERVER			opt_pcvt.h
 PCVT_24LINESDEF		opt_pcvt.h
 PCVT_CTRL_ALT_DEL	opt_pcvt.h
 PCVT_META_ESC		opt_pcvt.h
 PCVT_NSCREENS		opt_pcvt.h
 PCVT_PRETTYSCRNS	opt_pcvt.h
 PCVT_SCANSET		opt_pcvt.h
 PCVT_SCREENSAVER	opt_pcvt.h
 PCVT_USEKBDSEC		opt_pcvt.h
 PCVT_VT220KEYB		opt_pcvt.h
 PCVT_GREENSAVER		opt_pcvt.h
 
 # voxware options
 GUS_DMA2		opt_sound.h
 GUS_DMA			opt_sound.h
 GUS_IRQ			opt_sound.h
 
 # Video spigot
 SPIGOT_UNSECURE		opt_spigot.h
 
 # -------------------------------
 # isdn4bsd: passive ISA cards
 # -------------------------------
 TEL_S0_8		opt_i4b.h
 TEL_S0_16		opt_i4b.h
 TEL_S0_16_3		opt_i4b.h
 AVM_A1			opt_i4b.h
 USR_STI			opt_i4b.h
 ITKIX1			opt_i4b.h
 ELSA_PCC16		opt_i4b.h
 # -------------------------------
 # isdn4bsd: passive ISA PnP cards
 # -------------------------------
 CRTX_S0_P		opt_i4b.h
 DRN_NGO                 opt_i4b.h
 TEL_S0_16_3_P		opt_i4b.h
 SEDLBAUER		opt_i4b.h
 DYNALINK		opt_i4b.h
 ASUSCOM_IPAC		opt_i4b.h
 ELSA_QS1ISA		opt_i4b.h
 SIEMENS_ISURF2		opt_i4b.h
 EICON_DIVA		opt_i4b.h
 # -------------------------------
 # isdn4bsd: passive PCI cards
 # -------------------------------
 ELSA_QS1PCI		opt_i4b.h
 AVM_A1_PCI		opt_i4b.h
 # -------------------------------
 # isdn4bsd: passive PCMCIA cards
 # -------------------------------
 #AVM_A1_PCMCIA		opt_i4b.h
 # -------------------------------
 # isdn4bsd: misc options
 # -------------------------------
 # temporary workaround for SMP machines
 I4B_SMP_WORKAROUND      opt_i4b.h
 # enable VJ compression code for ipr i/f
 IPR_VJ			opt_i4b.h
 IPR_LOG			opt_i4b.h
 
 # -------------------------------
 # oltr: build options
 # -------------------------------
 # Exclude microcode options
 OLTR_NO_TMS_MAC		opt_oltr.h
 OLTR_NO_HAWKEYE_MAC	opt_oltr.h
 OLTR_NO_BULLSEYE_MAC	opt_oltr.h
 
 # Total number of ports controlled by the dgb(4) driver.
 # Defaults to NDGB*16.
 NDGBPORTS		opt_dgb.h
 
 # Device options
 DEV_NPX			opt_npx.h
 DEV_APM			opt_apm.h
 DEV_SPLASH		opt_splash.h
 
 # SMB/CIFS requester
 NETSMB			opt_netsmb.h
 NETSMBCRYPTO		opt_netsmb.h
 
 # SMB/CIFS filesystem
 SMBFS
 
 # -------------------------------
 # EOF
 # -------------------------------
Index: head/sys/conf/options.pc98
===================================================================
--- head/sys/conf/options.pc98	(revision 82308)
+++ head/sys/conf/options.pc98	(revision 82309)
@@ -1,220 +1,221 @@
 # $FreeBSD$
 # Options specific to the pc98 platform kernels
 
 DISABLE_PSE
 IDE_DELAY
 MATH_EMULATE		opt_math_emulate.h
 GPL_MATH_EMULATE	opt_math_emulate.h
 PMAP_SHPGPERPROC	opt_pmap.h
 PPC_PROBE_CHIPSET	opt_ppc.h
 PPC_DEBUG		opt_ppc.h
 SHOW_BUSYBUFS
 PANIC_REBOOT_WAIT_TIME	opt_panic.h
 MAXMEM
 PERFMON			opt_perfmon.h
 POWERFAIL_NMI		opt_trap.h
 AUTO_EOI_1		opt_auto_eoi.h
 AUTO_EOI_2		opt_auto_eoi.h
 CONSPEED		opt_comconsole.h
 I586_PMC_GUPROF		opt_i586_guprof.h
 WLCACHE			opt_wavelan.h
 WLDEBUG			opt_wavelan.h
 COMPAT_OLDISA
 BROKEN_KEYBOARD_RESET	opt_reset.h
+UPAGES
 
 # Options for emulators.  These should only be used at config time, so
 # they are handled like options for static file systems
 # (see src/sys/conf/options), except for broken debugging options.
 IBCS2			opt_dontuse.h
 COMPAT_LINUX		opt_dontuse.h
 DEBUG_LINUX		opt_linux.h
 COMPAT_SVR4		opt_dontuse.h
 DEBUG_SVR4		opt_svr4.h
 PECOFF_SUPPORT		opt_dontuse.h
 PECOFF_DEBUG		opt_pecoff.h
 # i386 SMP options
 APIC_IO			opt_global.h
 
 CLK_CALIBRATION_LOOP		opt_clock.h
 CLK_USE_I8254_CALIBRATION	opt_clock.h
 CLK_USE_TSC_CALIBRATION		opt_clock.h
 TIMER_FREQ			opt_clock.h
 
 NO_F00F_HACK			opt_cpu.h
 CPU_BLUELIGHTNING_FPU_OP_CACHE	opt_cpu.h
 CPU_BLUELIGHTNING_3X		opt_cpu.h
 CPU_BTB_EN			opt_cpu.h
 CPU_CYRIX_NO_LOCK		opt_cpu.h
 CPU_DIRECT_MAPPED_CACHE		opt_cpu.h
 CPU_DISABLE_5X86_LSSER		opt_cpu.h
 CPU_FASTER_5X86_FPU		opt_cpu.h
 CPU_I486_ON_386			opt_cpu.h
 CPU_IORT			opt_cpu.h
 CPU_L2_LATENCY			opt_cpu.h
 CPU_LOOP_EN			opt_cpu.h
 CPU_PPRO2CELERON		opt_cpu.h
 CPU_RSTK_EN			opt_cpu.h
 CPU_SUSP_HLT			opt_cpu.h
 CPU_UPGRADE_HW_CACHE		opt_cpu.h
 CPU_WT_ALLOC			opt_cpu.h
 CYRIX_CACHE_WORKS		opt_cpu.h
 CYRIX_CACHE_REALLY_WORKS	opt_cpu.h
 NO_MEMORY_HOLE			opt_cpu.h
 CPU_ENABLE_SSE			opt_cpu.h
 
 # The CPU type affects the endian conversion functions all over the kernel.
 I386_CPU		opt_global.h
 I486_CPU		opt_global.h
 I586_CPU		opt_global.h
 I686_CPU		opt_global.h
 
 MAXCONS			opt_syscons.h
 SC_ALT_MOUSE_IMAGE	opt_syscons.h
 SC_DEBUG_LEVEL		opt_syscons.h
 SC_DFLT_FONT		opt_syscons.h
 SC_DISABLE_DDBKEY	opt_syscons.h
 SC_DISABLE_REBOOT	opt_syscons.h
 SC_HISTORY_SIZE		opt_syscons.h
 SC_KERNEL_CONS_ATTR	opt_syscons.h
 SC_KERNEL_CONS_REV_ATTR	opt_syscons.h
 SC_MOUSE_CHAR		opt_syscons.h
 SC_NO_CUTPASTE		opt_syscons.h
 SC_NO_FONT_LOADING	opt_syscons.h
 SC_NO_HISTORY		opt_syscons.h
 SC_NO_SYSMOUSE		opt_syscons.h
 SC_NORM_ATTR		opt_syscons.h
 SC_NORM_REV_ATTR	opt_syscons.h
 SC_PIXEL_MODE		opt_syscons.h
 SC_RENDER_DEBUG		opt_syscons.h
 SC_TWOBUTTON_MOUSE	opt_syscons.h
 
 GDC			opt_gdc.h
 
 PSM_HOOKRESUME		opt_psm.h
 PSM_RESETAFTERSUSPEND	opt_psm.h
 PSM_DEBUG		opt_psm.h
 
 PCIC_RESUME_RESET	opt_pcic.h
 
 KBD_DISABLE_KEYMAP_LOAD	opt_kbd.h
 KBD_INSTALL_CDEV	opt_kbd.h
 KBD_MAXRETRY		opt_kbd.h
 KBD_MAXWAIT		opt_kbd.h
 KBD_RESETDELAY		opt_kbd.h
 KBDIO_DEBUG		opt_kbd.h
 
 #USERCONFIG		opt_userconfig.h
 #VISUAL_USERCONFIG	opt_userconfig.h
 #INTRO_USERCONFIG	opt_userconfig.h
 #DEV_EISA		opt_userconfig.h
 
 EISA_SLOTS		opt_eisa.h
 
 FE_8BIT_SUPPORT		opt_fe.h
 
 # pcvt(4) has a bunch of options
 FAT_CURSOR		opt_pcvt.h
 XSERVER			opt_pcvt.h
 PCVT_24LINESDEF		opt_pcvt.h
 PCVT_CTRL_ALT_DEL	opt_pcvt.h
 PCVT_META_ESC		opt_pcvt.h
 PCVT_NSCREENS		opt_pcvt.h
 PCVT_PRETTYSCRNS	opt_pcvt.h
 PCVT_SCANSET		opt_pcvt.h
 PCVT_SCREENSAVER	opt_pcvt.h
 PCVT_USEKBDSEC		opt_pcvt.h
 PCVT_VT220KEYB		opt_pcvt.h
 PCVT_GREENSAVER		opt_pcvt.h
 
 # voxware options
 GUS_DMA2		opt_sound.h
 GUS_DMA			opt_sound.h
 GUS_IRQ			opt_sound.h
 
 # Video spigot
 SPIGOT_UNSECURE		opt_spigot.h
 
 # -------------------------------
 # isdn4bsd: passive ISA cards
 # -------------------------------
 TEL_S0_8		opt_i4b.h
 TEL_S0_16		opt_i4b.h
 TEL_S0_16_3		opt_i4b.h
 AVM_A1			opt_i4b.h
 USR_STI			opt_i4b.h
 ITKIX1			opt_i4b.h
 ELSA_PCC16		opt_i4b.h
 # -------------------------------
 # isdn4bsd: passive ISA PnP cards
 # -------------------------------
 CRTX_S0_P		opt_i4b.h
 DRN_NGO                 opt_i4b.h
 TEL_S0_16_3_P		opt_i4b.h
 SEDLBAUER		opt_i4b.h
 DYNALINK		opt_i4b.h
 ASUSCOM_IPAC		opt_i4b.h
 ELSA_QS1ISA		opt_i4b.h
 SIEMENS_ISURF2		opt_i4b.h
 EICON_DIVA		opt_i4b.h
 # -------------------------------
 # isdn4bsd: passive PCI cards
 # -------------------------------
 ELSA_QS1PCI		opt_i4b.h
 AVM_A1_PCI		opt_i4b.h
 # -------------------------------
 # isdn4bsd: passive PCMCIA cards
 # -------------------------------
 #AVM_A1_PCMCIA		opt_i4b.h
 # -------------------------------
 # isdn4bsd: misc options
 # -------------------------------
 # temporary workaround for SMP machines
 I4B_SMP_WORKAROUND      opt_i4b.h
 # enable VJ compression code for ipr i/f
 IPR_VJ			opt_i4b.h
 IPR_LOG			opt_i4b.h
 
 # -------------------------------
 # oltr: build options
 # -------------------------------
 # Exclude microcode options
 OLTR_NO_TMS_MAC		opt_oltr.h
 OLTR_NO_HAWKEYE_MAC	opt_oltr.h
 OLTR_NO_BULLSEYE_MAC	opt_oltr.h
 
 # Total number of ports controlled by the dgb(4) driver.
 # Defaults to NDGB*16.
 NDGBPORTS		opt_dgb.h
 
 # bs driver options
 SCSI_BOUNCE_SIZE	opt_bs.h
 BS_TARG_SAFEMODE	opt_bs.h
 
 # ct driver options
 CT_USE_RELOCATE_OFFSET	opt_ct.h
 CT_BUS_WEIGHT		opt_ct.h
 
 # npx options
 FPU_ERROR_BROKEN	opt_npx.h
 
 # PC98 options
 PC98			opt_global.h
 EPSON_BOUNCEDMA		opt_pc98.h
 EPSON_MEMWIN		opt_pc98.h
 LINE30			opt_syscons.h
 
 # Device options
 DEV_NPX			opt_npx.h
 DEV_APM			opt_apm.h
 DEV_SPLASH		opt_splash.h
 
 # SMB/CIFS requester
 NETSMB			opt_netsmb.h
 NETSMBCRYPTO		opt_netsmb.h
 
 # SMB/CIFS filesystem
 SMBFS
 
 # -------------------------------
 # EOF
 # -------------------------------
Index: head/sys/i386/conf/GENERIC
===================================================================
--- head/sys/i386/conf/GENERIC	(revision 82308)
+++ head/sys/i386/conf/GENERIC	(revision 82309)
@@ -1,235 +1,237 @@
 #
 # GENERIC -- Generic kernel configuration file for FreeBSD/i386
 #
 # For more information on this file, please read the handbook section on
 # Kernel Configuration Files:
 #
 #    http://www.FreeBSD.org/handbook/kernelconfig-config.html
 #
 # The handbook is also available locally in /usr/share/doc/handbook
 # if you've installed the doc distribution, otherwise always see the
 # FreeBSD World Wide Web server (http://www.FreeBSD.org/) for the
 # latest information.
 #
 # An exhaustive list of options and more detailed explanations of the
 # device lines is also present in the NOTES configuration file. If you are
 # in doubt as to the purpose or necessity of a line, check first in NOTES.
 #
 # $FreeBSD$
 
 machine		i386
 cpu		I486_CPU
 cpu		I586_CPU
 cpu		I686_CPU
 ident		GENERIC
 maxusers	32
 
 #To statically compile in device wiring instead of /boot/device.hints
-#hints		"GENERIC.hints"		#Default places to look for devices.
+hints		"GENERIC.hints"		#Default places to look for devices.
 
 makeoptions	DEBUG=-g		#Build kernel with gdb(1) debug symbols
 
 options 	MATH_EMULATE		#Support for x87 emulation
 options 	INET			#InterNETworking
 options 	INET6			#IPv6 communications protocols
 options 	FFS			#Berkeley Fast Filesystem
 options 	SOFTUPDATES		#Enable FFS soft updates support
 options 	MD_ROOT			#MD is a potential root device
 options 	NFS			#Network Filesystem
 options 	NFS_ROOT		#NFS usable as root device, NFS required
 options 	MSDOSFS			#MSDOS Filesystem
 options 	CD9660			#ISO 9660 Filesystem
 options 	PROCFS			#Process filesystem
 options 	COMPAT_43		#Compatible with BSD 4.3 [KEEP THIS!]
 options 	SCSI_DELAY=15000	#Delay (in ms) before probing SCSI
 options 	UCONSOLE		#Allow users to grab the console
 #options 	USERCONFIG		#boot -c editor
 #options 	VISUAL_USERCONFIG	#visual boot -c editor
 options 	KTRACE			#ktrace(1) support
 options 	SYSVSHM			#SYSV-style shared memory
 options 	SYSVMSG			#SYSV-style message queues
 options 	SYSVSEM			#SYSV-style semaphores
 options 	P1003_1B		#Posix P1003_1B real-time extensions
 options 	_KPOSIX_PRIORITY_SCHEDULING
 options 	KBD_INSTALL_CDEV	# install a CDEV entry in /dev
 
 # Debugging for use in -current
 options 	DDB
 options 	INVARIANTS
 options 	INVARIANT_SUPPORT
 options 	WITNESS
+options 	UPAGES=4
+options		CPU_ENABLE_SSE
 
 # To make an SMP kernel, the next two are needed
 #options 	SMP			# Symmetric MultiProcessor Kernel
 #options 	APIC_IO			# Symmetric (APIC) I/O
 
 device		isa
 device		eisa
 device		pci
 
 # Floppy drives
 device		fdc
 
 # ATA and ATAPI devices
 device		ata
 device		atadisk			# ATA disk drives
 device		atapicd			# ATAPI CDROM drives
 device		atapifd			# ATAPI floppy drives
 device		atapist			# ATAPI tape drives
 options 	ATA_STATIC_ID		#Static device numbering
 
 # SCSI Controllers
 device		ahb		# EISA AHA1742 family
 device		ahc		# AHA2940 and onboard AIC7xxx devices
 device		amd		# AMD 53C974 (Tekram DC-390(T))
 device		isp		# Qlogic family
 #device		ncr		# NCR/Symbios Logic
 device		sym		# NCR/Symbios Logic (newer chipsets + those of `ncr')
 
 device		adv		# Advansys SCSI adapters
 device		adw		# Advansys wide SCSI adapters
 device		aha		# Adaptec 154x SCSI adapters
 device		aic		# Adaptec 15[012]x SCSI adapters, AIC-6[23]60.
 device		bt		# Buslogic/Mylex MultiMaster SCSI adapters
 
 device		ncv		# NCR 53C500
 device		nsp		# Workbit Ninja SCSI-3
 device		stg		# TMC 18C30/18C50
 
 # RAID controllers interfaced to the SCSI subsystem
 device		asr		# DPT SmartRAID V, VI and Adaptec SCSI RAID
 device		dpt		# DPT Smartcache III, IV - See NOTES for options!
 device		mly		# Mylex AcceleRAID/eXtremeRAID
 
 # SCSI peripherals
 device		scbus		# SCSI bus (required)
 device		da		# Direct Access (disks)
 device		sa		# Sequential Access (tape etc)
 device		cd		# CD
 device		pass		# Passthrough device (direct SCSI access)
 
 # RAID controllers
 device		aac		# Adaptec FSA RAID
 device		amr		# AMI MegaRAID
 device		ida		# Compaq Smart RAID
 device		mlx		# Mylex DAC960 family
 device		twe		# 3ware ATA RAID
 
 # atkbdc0 controls both the keyboard and the PS/2 mouse
 device		atkbdc	1	# At keyboard controller
 device		atkbd		# at keyboard
 device		psm		# psm mouse
 
 device		vga		# VGA screen
 
 # splash screen/screen saver
 device		splash
 
 # syscons is the default console driver, resembling an SCO console
 device		sc	1
 
 # Enable this for the pcvt (VT220 compatible) console driver
 #device		vt
 #options 	XSERVER			# support for X server on a vt console
 #options 	FAT_CURSOR		# start with block cursor
 
 # Floating point support - do not disable.
 device		npx
 
 # Power management support (see NOTES for more options)
 device		apm
 # Add suspend/resume support for the i8254.
 device		pmtimer
 
 # PCCARD (PCMCIA) support
 device		card		# pccard bus
 device		pcic		# PCMCIA bridge
 
 # Serial (COM) ports
 device		sio		# 8250, 16[45]50 based serial ports
 
 # Parallel port
 device		ppc
 device		ppbus		# Parallel port bus (required)
 device		lpt		# Printer
 device		plip		# TCP/IP over parallel
 device		ppi		# Parallel port interface device
 #device		vpo		# Requires scbus and da
 
 
 # PCI Ethernet NICs.
 device		de		# DEC/Intel DC21x4x (``Tulip'')
 device		vx		# 3Com 3c590, 3c595 (``Vortex'')
 device		txp		# 3Com 3cR990 (``Typhoon'')
 
 # PCI Ethernet NICs that use the common MII bus controller code.
 # NOTE: Be sure to keep the 'device miibus' line in order to use these NICs!
 device		miibus		# MII bus support
 device		dc		# DEC/Intel 21143 and various workalikes
 device		fxp		# Intel EtherExpress PRO/100B (82557, 82558)
 device		pcn		# AMD Am79C97x PCI 10/100 NICs
 device		rl		# RealTek 8129/8139
 device		sf		# Adaptec AIC-6915 (``Starfire'')
 device		sis		# Silicon Integrated Systems SiS 900/SiS 7016
 device		ste		# Sundance ST201 (D-Link DFE-550TX)
 device		tl		# Texas Instruments ThunderLAN
 device		tx		# SMC EtherPower II (83c170 ``EPIC'')
 device		vr		# VIA Rhine, Rhine II
 device		wb		# Winbond W89C840F
 device		wx		# Intel Gigabit Ethernet Card (``Wiseman'')
 device		xl		# 3Com 3c90x (``Boomerang'', ``Cyclone'')
 
 # ISA Ethernet NICs.  pccard nics included.
 device		cs		# Crystal Semiconductor CS89x0 NIC
 # 'device ed' requires 'device miibus'
 device		ed		# NE[12]000, SMC Ultra, 3c503, DS8390 cards
 device		ex		# Intel EtherExpress Pro/10 and Pro/10+
 device		ep		# Etherlink III based cards
 device		fe		# Fujitsu MB8696x based cards
 device		sn		# SMC's 9000 series of ethernet chips
 device		xe		# Xircom pccard ethernet
 
 # The probe order of these is presently determined by i386/isa/isa_compat.c.
 #device		ie
 #device		le
 device		lnc
 
 # Wireless NIC cards
 device		an		# Aironet 4500/4800 802.11 wireless NICs. 
 device		awi		# BayStack 660 and others
 device		wi		# WaveLAN/IEEE 802.11 wireless NICs. 
 #device		wl		# Older non 802.11 Wavelan wireless NIC.
 
 # Pseudo devices - the number indicates how many units to allocate.
 device		random		# Entropy device
 device		loop		# Network loopback
 device		ether		# Ethernet support
 device		sl		# Kernel SLIP
 device		ppp	1	# Kernel PPP
 device		tun		# Packet tunnel.
 device		pty		# Pseudo-ttys (telnet etc)
 device		md		# Memory "disks"
 device		gif		# IPv6 and IPv4 tunneling
 device		faith	1	# IPv6-to-IPv4 relaying (translation)
 
 # The `bpf' device enables the Berkeley Packet Filter.
 # Be aware of the administrative consequences of enabling this!
 device		bpf		# Berkeley packet filter
 
 # USB support
 device		uhci		# UHCI PCI->USB interface
 device		ohci		# OHCI PCI->USB interface
 device		usb		# USB Bus (required)
 #device		udbp		# USB Double Bulk Pipe devices
 device		ugen		# Generic
 device		uhid		# "Human Interface Devices"
 device		ukbd		# Keyboard
 device		ulpt		# Printer
 device		umass		# Disks/Mass storage - Requires scbus and da
 device		ums		# Mouse
 device		urio		# Diamond Rio 500 MP3 player
 device		uscanner	# Scanners
 # USB Ethernet, requires mii
 device		aue		# ADMtek USB ethernet
 device		cue		# CATC USB ethernet
 device		kue		# Kawasaki LSI USB ethernet
Index: head/sys/i386/conf/NOTES
===================================================================
--- head/sys/i386/conf/NOTES	(revision 82308)
+++ head/sys/i386/conf/NOTES	(revision 82309)
@@ -1,2909 +1,2910 @@
 #
 # NOTES -- Lines that can be cut/pasted into kernel and hints configs.
 #
 # Lines that begin with 'device', 'options', 'machine', 'ident', 'maxusers',
 # 'makeoptions', 'hints' etc go into the kernel configuration that you
 # run config(8) with.
 #
 # Lines that begin with 'hints.' are NOT for config(8), they go into your
 # hints file.  See /boot/device.hints and/or the 'hints' config(8) directive.
 #
 # Please use ``make LINT'' to create an old-style LINT file if you want to
 # do kernel test-builds.
 #
 # $FreeBSD$
 #
 
 #
 # This directive is mandatory; it defines the architecture to be
 # configured for; in this case, the 386 family based IBM-PC and
 # compatibles.
 #
 machine		i386
 
 #
 # This is the ``identification'' of the kernel.  Usually this should
 # be the same as the name of your kernel.
 #
 ident		LINT
 
 #
 # The `maxusers' parameter controls the static sizing of a number of
 # internal system tables by a complicated formula defined in param.c.
 #
 maxusers	10
 
 #
 # We want LINT to cover profiling as well
 profile 	2
 
 #
 # The `makeoptions' parameter allows variables to be passed to the
 # generated Makefile in the build area.
 #
 # CONF_CFLAGS gives some extra compiler flags that are added to ${CFLAGS}
 # after most other flags.  Here we use it to inhibit use of non-optimal
 # gcc builtin functions (e.g., memcmp).
 #
 # DEBUG happens to be magic.
 # The following is equivalent to 'config -g KERNELNAME' and creates
 # 'kernel.debug' compiled with -g debugging as well as a normal
 # 'kernel'.  Use 'make install.debug' to install the debug kernel
 # but that isn't normally necessary as the debug symbols are not loaded
 # by the kernel and are not useful there anyway.
 #
 # KERNEL can be overridden so that you can change the default name of your
 # kernel.
 #
 makeoptions	CONF_CFLAGS=-fno-builtin  #Don't allow use of memcmp, etc.
 #makeoptions	DEBUG=-g		#Build kernel with gdb(1) debug symbols
 #makeoptions	KERNEL=foo		#Build kernel "foo" and install "/foo"
 
 #
 # Certain applications can grow to be larger than the 512M limit
 # that FreeBSD initially imposes.  Below are some options to
 # allow that limit to grow to 1GB, and can be increased further
 # with changing the parameters.  MAXDSIZ is the maximum that the
 # limit can be set to, and the DFLDSIZ is the default value for
 # the limit.  MAXSSIZ is the maximum that the stack limit can be
 # set to.  You might want to set the default lower than the max, 
 # and explicitly set the maximum with a shell command for processes
 # that regularly exceed the limit like INND.
 #
 options 	MAXDSIZ="(1024UL*1024*1024)"
 options 	MAXSSIZ="(128UL*1024*1024)"
 options 	DFLDSIZ="(1024UL*1024*1024)"
 
 #
 # BLKDEV_IOSIZE sets the default block size used in user block
 # device I/O.  Note that this value will be overriden by the label
 # when specifying a block device from a label with a non-0
 # partition blocksize.  The default is PAGE_SIZE.
 #
 options 	BLKDEV_IOSIZE=8192
 
 # Options for the VM subsystem
 options 	PQ_CACHESIZE=512	# color for 512k/16k cache
+options 	UPAGES=3		# number of 4k stack pages per process
 # Deprecated options supported for backwards compatibility
 #options 	PQ_NOOPT		# No coloring
 #options 	PQ_LARGECACHE		# color for 512k/16k cache
 #options 	PQ_HUGECACHE		# color for 1024k/16k cache
 #options 	PQ_MEDIUMCACHE		# color for 256k/16k cache
 #options 	PQ_NORMALCACHE		# color for 64k/16k cache
 
 # This allows you to actually store this configuration file into
 # the kernel binary itself, where it may be later read by saying:
 #    strings -n 3 /boot/kernel/kernel | sed -n 's/^___//p' > MYKERNEL
 #
 options 	INCLUDE_CONFIG_FILE     # Include this file in kernel
 
 #
 # The root device and filesystem type can be compiled in;
 # this provides a fallback option if the root device cannot
 # be correctly guesst by the bootstrap code, or an override if
 # the RB_DFLTROOT flag (-r) is specified when booting the kernel.
 #
 options 	ROOTDEVNAME=\"ufs:da0s2e\"
 
 
 #####################################################################
 # SMP OPTIONS:
 #
 # SMP enables building of a Symmetric MultiProcessor Kernel.
 # APIC_IO enables the use of the IO APIC for Symmetric I/O.
 #
 # Notes:
 #
 #  An SMP kernel will ONLY run on an Intel MP spec. qualified motherboard.
 #
 #  Be sure to disable 'cpu I386_CPU' && 'cpu I486_CPU' for SMP kernels.
 #
 #  Check the 'Rogue SMP hardware' section to see if additional options
 #   are required by your hardware.
 #
 
 # Mandatory:
 options 	SMP			# Symmetric MultiProcessor Kernel
 options 	APIC_IO			# Symmetric (APIC) I/O
 
 #
 # Rogue SMP hardware:
 #
 
 # Bridged PCI cards:
 #
 # The MP tables of most of the current generation MP motherboards
 #  do NOT properly support bridged PCI cards.  To use one of these
 #  cards you should refer to ???
 
 # SMP Debugging Options:
 #
 # MUTEX_DEBUG enables various extra assertions in the mutex code.
 # WITNESS enables the mutex witness code which detects deadlocks and cycles
 #         during locking operations.
 # WITNESS_DDB causes the witness code to drop into the kernel debugger if
 #	  a lock heirarchy violation occurs or if locks are held when going to
 #	  sleep.
 # WITNESS_SKIPSPIN disables the witness checks on spin mutexes.
 options 	MUTEX_DEBUG
 options 	WITNESS
 options 	WITNESS_DDB
 options 	WITNESS_SKIPSPIN
 
 
 #####################################################################
 # CPU OPTIONS
 
 #
 # You must specify at least one CPU (the one you intend to run on);
 # deleting the specification for CPUs you don't need to use may make
 # parts of the system run faster.
 # I386_CPU is mutually exclusive with the other CPU types.
 #
 #cpu		I386_CPU		
 cpu		I486_CPU
 cpu		I586_CPU		# aka Pentium(tm)
 cpu		I686_CPU		# aka Pentium Pro(tm)
 
 #
 # Options for CPU features.
 #
 # CPU_BLUELIGHTNING_FPU_OP_CACHE enables FPU operand cache on IBM
 # BlueLightning CPU.  It works only with Cyrix FPU, and this option
 # should not be used with Intel FPU.
 #
 # CPU_BLUELIGHTNING_3X enables triple-clock mode on IBM Blue Lightning
 # CPU if CPU supports it. The default is double-clock mode on
 # BlueLightning CPU box.
 #
 # CPU_BTB_EN enables branch target buffer on Cyrix 5x86 (NOTE 1).
 #
 # CPU_DIRECT_MAPPED_CACHE sets L1 cache of Cyrix 486DLC CPU in direct
 # mapped mode.  Default is 2-way set associative mode.
 #
 # CPU_CYRIX_NO_LOCK enables weak locking for the entire address space
 # of Cyrix 6x86 and 6x86MX CPUs by setting the NO_LOCK bit of CCR1.
 # Otherwise, the NO_LOCK bit of CCR1 is cleared.  (NOTE 3)
 #
 # CPU_DISABLE_5X86_LSSER disables load store serialize (i.e. enables
 # reorder).  This option should not be used if you use memory mapped
 # I/O device(s).
 #
 # CPU_ENABLE_SSE enables SSE/MMX2 instructions support.
 #
 # CPU_FASTER_5X86_FPU enables faster FPU exception handler.
 #
 # CPU_I486_ON_386 enables CPU cache on i486 based CPU upgrade products
 # for i386 machines.
 #
 # CPU_IORT defines I/O clock delay time (NOTE 1).  Default values of
 # I/O clock delay time on Cyrix 5x86 and 6x86 are 0 and 7,respectively
 # (no clock delay).
 #
 # CPU_L2_LATENCY specifed the L2 cache latency value.  This option is used
 # only when CPU_PPRO2CELERON is defined and Mendocino Celeron is detected.
 # The default value is 5.
 #
 # CPU_LOOP_EN prevents flushing the prefetch buffer if the destination
 # of a jump is already present in the prefetch buffer on Cyrix 5x86(NOTE
 # 1).
 #
 # CPU_PPRO2CELERON enables L2 cache of Mendocino Celeron CPUs.  This option
 # is useful when you use Socket 8 to Socket 370 converter, because most Pentium
 # Pro BIOSs do not enable L2 cache of Mendocino Celeron CPUs.
 #
 # CPU_RSTK_EN enables return stack on Cyrix 5x86 (NOTE 1).
 #
 # CPU_SUSP_HLT enables suspend on HALT.  If this option is set, CPU
 # enters suspend mode following execution of HALT instruction.
 #
 # CPU_WT_ALLOC enables write allocation on Cyrix 6x86/6x86MX and AMD
 # K5/K6/K6-2 cpus.
 #
 # CYRIX_CACHE_WORKS enables CPU cache on Cyrix 486 CPUs with cache
 # flush at hold state.
 #
 # CYRIX_CACHE_REALLY_WORKS enables (1) CPU cache on Cyrix 486 CPUs
 # without cache flush at hold state, and (2) write-back CPU cache on
 # Cyrix 6x86 whose revision < 2.7 (NOTE 2).
 #
 # NO_F00F_HACK disables the hack that prevents Pentiums (and ONLY
 # Pentiums) from locking up when a LOCK CMPXCHG8B instruction is
 # executed.  This option is only needed if I586_CPU is also defined,
 # and should be included for any non-Pentium CPU that defines it.
 #
 # NO_MEMORY_HOLE is an optimisation for systems with AMD K6 processors
 # which indicates that the 15-16MB range is *definitely* not being
 # occupied by an ISA memory hole.
 #
 # NOTE 1: The options, CPU_BTB_EN, CPU_LOOP_EN, CPU_IORT,
 # CPU_LOOP_EN and CPU_RSTK_EN should not be used because of CPU bugs.
 # These options may crash your system.
 #
 # NOTE 2: If CYRIX_CACHE_REALLY_WORKS is not set, CPU cache is enabled
 # in write-through mode when revision < 2.7.  If revision of Cyrix
 # 6x86 >= 2.7, CPU cache is always enabled in write-back mode.
 #
 # NOTE 3: This option may cause failures for software that requires
 # locked cycles in order to operate correctly.
 #
 options 	CPU_BLUELIGHTNING_FPU_OP_CACHE
 options 	CPU_BLUELIGHTNING_3X
 options 	CPU_BTB_EN
 options 	CPU_DIRECT_MAPPED_CACHE
 options 	CPU_DISABLE_5X86_LSSER
 options 	CPU_ENABLE_SSE
 options 	CPU_FASTER_5X86_FPU
 options 	CPU_I486_ON_386
 options 	CPU_IORT
 options 	CPU_L2_LATENCY=5
 options 	CPU_LOOP_EN
 options 	CPU_PPRO2CELERON
 options 	CPU_RSTK_EN
 options 	CPU_SUSP_HLT
 options 	CPU_WT_ALLOC
 options 	CYRIX_CACHE_WORKS
 options 	CYRIX_CACHE_REALLY_WORKS
 #options 	NO_F00F_HACK
 
 #
 # A math emulator is mandatory if you wish to run on hardware which
 # does not have a floating-point processor.  Pick either the original,
 # bogus (but freely-distributable) math emulator, or a much more
 # fully-featured but GPL-licensed emulator taken from Linux.
 #
 options 	MATH_EMULATE		#Support for x87 emulation
 # Don't enable both of these in a real config.
 options 	GPL_MATH_EMULATE	#Support for x87 emulation via
 					#new math emulator
 
 
 #####################################################################
 # COMPATIBILITY OPTIONS                                             
 
 #
 # Implement system calls compatible with 4.3BSD and older versions of
 # FreeBSD.  You probably do NOT want to remove this as much current code
 # still relies on the 4.3 emulation.
 #
 options 	COMPAT_43
 
 #
 # These three options provide support for System V Interface
 # Definition-style interprocess communication, in the form of shared
 # memory, semaphores, and message queues, respectively.
 #
 options 	SYSVSHM
 options 	SYSVSEM
 options 	SYSVMSG
 
 
 #####################################################################
 # DEBUGGING OPTIONS
 
 #
 # Enable the kernel debugger.
 #
 options 	DDB
 
 #
 # Don't drop into DDB for a panic. Intended for unattended operation
 # where you may want to drop to DDB from the console, but still want
 # the machine to recover from a panic
 #
 options 	DDB_UNATTENDED
 
 #
 # If using GDB remote mode to debug the kernel, there's a non-standard
 # extension to the remote protocol that can be used to use the serial
 # port as both the debugging port and the system console.  It's non-
 # standard and you're on your own if you enable it.  See also the
 # "remotechat" variables in the FreeBSD specific version of gdb.
 #
 options 	GDB_REMOTE_CHAT
 
 #
 # KTRACE enables the system-call tracing facility ktrace(2).
 #
 options 	KTRACE			#kernel tracing
 
 #
 # KTR is a kernel tracing mechanism imported from BSD/OS.  Currently it
 # has no userland interface aside from a few sysctl's.  It is enabled with
 # the KTR option.  The KTR_EXTEND option causes trace events to be generated
 # as a string from snprintf rather than as a string and up to 5 argument
 # pointers.  KTR_ENTRIES defines the number of entries in the circular trace
 # buffer.  KTR_COMPILE defines the mask of events to compile into the kernel
 # as defined by the KTR_* constants in <sys/ktr.h>.  KTR_MASK defines the
 # initial value of the ktr_mask variable which determines at runtime what
 # events to trace.  KTR_CPUMASK determines which CPU's log events, with
 # bit X corresponding to cpu X.  KTR_VERBOSE enables dumping of KTR events
 # to the console by default.  This functionality can be toggled via the
 # debug.ktr_verbose sysctl and defaults to off if KTR_VERBOSE is not defined.
 #
 options 	KTR
 options 	KTR_EXTEND
 options 	KTR_ENTRIES=1024
 options 	KTR_COMPILE="(KTR_INTR|KTR_PROC)"
 options 	KTR_MASK=KTR_INTR
 options 	KTR_CPUMASK=0x3
 options 	KTR_VERBOSE
 
 #
 # The INVARIANTS option is used in a number of source files to enable
 # extra sanity checking of internal structures.  This support is not
 # enabled by default because of the extra time it would take to check
 # for these conditions, which can only occur as a result of
 # programming errors.
 #
 options 	INVARIANTS
 
 #
 # The INVARIANT_SUPPORT option makes us compile in support for
 # verifying some of the internal structures.  It is a prerequisite for
 # 'INVARIANTS', as enabling 'INVARIANTS' will make these functions be
 # called.  The intent is that you can set 'INVARIANTS' for single
 # source files (by changing the source file or specifying it on the
 # command line) if you have 'INVARIANT_SUPPORT' enabled.  Also, if you
 # wish to build a kernel module with 'INVARIANTS', then adding
 # 'INVARIANT_SUPPORT' to your kernel will provide all the necessary
 # infrastructure without the added overhead.
 #
 options 	INVARIANT_SUPPORT
 
 #
 # The DIAGNOSTIC option is used to enable extra debugging information
 # from some parts of the kernel.  As this makes everything more noisy,
 # it is disabled by default.
 #
 options 	DIAGNOSTIC
 
 #
 # REGRESSION causes optional kernel interfaces necessary only for regression
 # testing to be enabled.  These interfaces may consitute security risks
 # when enabled, as they permit processes to easily modify aspects of the
 # run-time environment to reproduce unlikely or unusual (possibly normally
 # impossible) scenarios.
 #
 options		REGRESSION
 
 #
 # RESTARTABLE_PANICS allows one to continue from a panic as if it were
 # a call to the debugger via the Debugger() function instead.  It is only
 # useful if a kernel debugger is present.  To restart from a panic, reset
 # the panicstr variable to NULL and continue execution.  This option is
 # for development use only and should NOT be used in production systems
 # to "workaround" a panic.
 #
 options 	RESTARTABLE_PANICS
 
 #
 # PERFMON causes the driver for Pentium/Pentium Pro performance counters
 # to be compiled.  See perfmon(4) for more information.
 #
 options 	PERFMON
 
 
 #
 # This option let some drivers co-exist that can't co-exist in a running
 # system.  This is used to be able to compile all kernel code in one go for
 # quality assurance purposes (like this file, which the option takes it name
 # from.)
 #
 options 	COMPILING_LINT
 
 
 # XXX - this doesn't belong here.
 # Allow ordinary users to take the console - this is useful for X.
 options 	UCONSOLE
 
 # XXX - this doesn't belong here either
 #options 	USERCONFIG		#boot -c editor
 #options 	INTRO_USERCONFIG	#imply -c and show intro screen
 #options 	VISUAL_USERCONFIG	#visual boot -c editor
 
 #####################################################################
 # NETWORKING OPTIONS
 
 #
 # Protocol families:
 #  Only the INET (Internet) family is officially supported in FreeBSD.
 #  Source code for the NS (Xerox Network Service) is provided for amusement
 #  value.
 #
 options 	INET			#Internet communications protocols
 options 	INET6			#IPv6 communications protocols
 options 	IPSEC			#IP security
 options 	IPSEC_ESP		#IP security (crypto; define w/ IPSEC)
 options 	IPSEC_DEBUG		#debug for IP security
 
 options 	IPX			#IPX/SPX communications protocols
 options 	IPXIP			#IPX in IP encapsulation (not available)
 options 	IPTUNNEL		#IP in IPX encapsulation (not available)
 
 options 	NCP			#NetWare Core protocol
 
 options 	NETATALK		#Appletalk communications protocols
 options 	NETATALKDEBUG		#Appletalk debugging
 
 # These are currently broken but are shipped due to interest.
 #options 	NS			#Xerox NS protocols
 #options 	NSIP			#XNS over IP
 
 # mchain library. It can be either loaded as KLD or compiled into kernel
 options 	LIBMCHAIN
 
 # netgraph(4). Enable the base netgraph code with the NETGRAPH option.
 # Individual node types can be enabled with the corresponding option
 # listed below; however, this is not strictly necessary as netgraph
 # will automatically load the corresponding KLD module if the node type
 # is not already compiled into the kernel. Each type below has a
 # corresponding man page, e.g., ng_async(8).
 options 	NETGRAPH		#netgraph(4) system
 options 	NETGRAPH_ASYNC
 options 	NETGRAPH_BPF
 options 	NETGRAPH_CISCO
 options 	NETGRAPH_ECHO
 options 	NETGRAPH_ETHER
 options 	NETGRAPH_FRAME_RELAY
 options 	NETGRAPH_HOLE
 options 	NETGRAPH_IFACE
 options 	NETGRAPH_KSOCKET
 options 	NETGRAPH_LMI
 # MPPC compression requires proprietary files (not included)
 #options 	NETGRAPH_MPPC_COMPRESSION
 options 	NETGRAPH_MPPC_ENCRYPTION
 options 	NETGRAPH_ONE2MANY
 options 	NETGRAPH_PPP
 options 	NETGRAPH_PPPOE
 options 	NETGRAPH_PPTPGRE
 options 	NETGRAPH_RFC1490
 options 	NETGRAPH_SOCKET
 options 	NETGRAPH_SPLIT
 options 	NETGRAPH_TEE
 options 	NETGRAPH_TTY
 options 	NETGRAPH_UI
 options 	NETGRAPH_VJC
 
 device		mn	# Munich32x/Falc54 Nx64kbit/sec cards.
 device		lmc	# tulip based LanMedia WAN cards
 device		musycc	# LMC/SBE LMC1504 quad T1/E1
 
 #
 # Network interfaces:
 #  The `loop' device is MANDATORY when networking is enabled.
 #  The `ether' device provides generic code to handle
 #  Ethernets; it is MANDATORY when a Ethernet device driver is
 #  configured or token-ring is enabled.
 #  The `fddi' device provides generic code to support FDDI.
 #  The `sppp' device serves a similar role for certain types
 #  of synchronous PPP links (like `cx', `ar').
 #  The `sl' device implements the Serial Line IP (SLIP) service.
 #  The `ppp' device implements the Point-to-Point Protocol.
 #  The `bpf' device enables the Berkeley Packet Filter.  Be
 #  aware of the legal and administrative consequences of enabling this
 #  option.  The number of devices determines the maximum number of
 #  simultaneous BPF clients programs runnable.
 #  The `disc' device implements a minimal network interface,
 #  which throws away all packets sent and never receives any.  It is
 #  included for testing purposes.  This shows up as the `ds' interface.
 #  The `tap' device is a pty-like virtual Ethernet interface
 #  The `tun' device implements (user-)ppp and nos-tun
 #  The `gif' device implements IPv6 over IP4 tunneling,
 #  IPv4 over IPv6 tunneling, IPv4 over IPv4 tunneling and
 #  IPv6 over IPv6 tunneling.
 #  The XBONEHACK option allows the same pair of addresses to be configured on
 #  multiple gif interfaces.
 #  The `faith' device captures packets sent to it and diverts them
 #  to the IPv4/IPv6 translation daemon.
 #  The `stf' device implements 6to4 encapsulation.
 #  The `ef' device provides support for multiple ethernet frame types
 #  specified via ETHER_* options. See ef(4) for details.
 #
 # The PPP_BSDCOMP option enables support for compress(1) style entire
 # packet compression, the PPP_DEFLATE is for zlib/gzip style compression.
 # PPP_FILTER enables code for filtering the ppp data stream and selecting
 # events for resetting the demand dial activity timer - requires bpf.
 # See pppd(8) for more details.
 #
 device		ether			#Generic Ethernet
 device		vlan	1		#VLAN support
 device		token			#Generic TokenRing
 device		fddi			#Generic FDDI
 device		sppp			#Generic Synchronous PPP
 device		loop	1		#Network loopback device
 device		bpf			#Berkeley packet filter
 device		disc			#Discard device (ds0, ds1, etc)
 device		tap			#Virtual Ethernet driver
 device		tun			#Tunnel driver (ppp(8), nos-tun(8))
 device		sl			#Serial Line IP
 device		ppp	2		#Point-to-point protocol
 options 	PPP_BSDCOMP		#PPP BSD-compress support
 options 	PPP_DEFLATE		#PPP zlib/deflate/gzip support
 options 	PPP_FILTER		#enable bpf filtering (needs bpf)
 
 device		ef			# Multiple ethernet frames support
 options 	ETHER_II		# enable Ethernet_II frame
 options 	ETHER_8023		# enable Ethernet_802.3 (Novell) frame
 options 	ETHER_8022		# enable Ethernet_802.2 frame
 options 	ETHER_SNAP		# enable Ethernet_802.2/SNAP frame
 
 # for IPv6
 device		gif			#IPv6 and IPv4 tunneling
 options 	XBONEHACK
 device		faith	1		#for IPv6 and IPv4 translation
 device		stf			#6to4 IPv6 over IPv4 encapsulation
 
 #
 # Internet family options:
 #
 # MROUTING enables the kernel multicast packet forwarder, which works
 # with mrouted(8).
 #
 # IPFIREWALL enables support for IP firewall construction, in
 # conjunction with the `ipfw' program.  IPFIREWALL_VERBOSE sends
 # logged packets to the system logger.  IPFIREWALL_VERBOSE_LIMIT
 # limits the number of times a matching entry can be logged.
 #
 # WARNING:  IPFIREWALL defaults to a policy of "deny ip from any to any"
 # and if you do not add other rules during startup to allow access,
 # YOU WILL LOCK YOURSELF OUT.  It is suggested that you set firewall_type=open
 # in /etc/rc.conf when first enabling this feature, then refining the
 # firewall rules in /etc/rc.firewall after you've tested that the new kernel
 # feature works properly.
 #
 # IPFIREWALL_DEFAULT_TO_ACCEPT causes the default rule (at boot) to
 # allow everything.  Use with care, if a cracker can crash your
 # firewall machine, they can get to your protected machines.  However,
 # if you are using it as an as-needed filter for specific problems as
 # they arise, then this may be for you.  Changing the default to 'allow'
 # means that you won't get stuck if the kernel and /sbin/ipfw binary get
 # out of sync.
 #
 # IPDIVERT enables the divert IP sockets, used by ``ipfw divert''
 #
 # IPSTEALTH enables code to support stealth forwarding (i.e., forwarding
 # packets without touching the ttl).  This can be useful to hide firewalls
 # from traceroute and similar tools.
 #
 # TCPDEBUG enables code which keeps traces of the TCP state machine
 # for sockets with the SO_DEBUG option set, which can then be examined
 # using the trpt(8) utility.
 #
 options 	MROUTING		# Multicast routing
 options 	IPFIREWALL		#firewall
 options 	IPFIREWALL_VERBOSE	#print information about
 					# dropped packets
 options 	IPFIREWALL_FORWARD	#enable transparent proxy support
 options 	IPFIREWALL_VERBOSE_LIMIT=100	#limit verbosity
 options 	IPFIREWALL_DEFAULT_TO_ACCEPT	#allow everything by default
 options 	IPV6FIREWALL		#firewall for IPv6
 options 	IPV6FIREWALL_VERBOSE
 options 	IPV6FIREWALL_VERBOSE_LIMIT=100
 options 	IPV6FIREWALL_DEFAULT_TO_ACCEPT
 options 	IPDIVERT		#divert sockets
 options 	IPFILTER		#ipfilter support
 options 	IPFILTER_LOG		#ipfilter logging
 options 	IPFILTER_DEFAULT_BLOCK	#block all packets by default
 options 	IPSTEALTH		#support for stealth forwarding
 options 	TCPDEBUG
 
 # RANDOM_IP_ID causes the ID field in IP packets to be randomized
 # instead of incremented by 1 with each packet generated.  This
 # option closes a minor information leak which allows remote
 # observers to determine the rate of packet generation on the
 # machine by watching the counter.
 options		RANDOM_IP_ID
 
 # Statically Link in accept filters
 options		ACCEPT_FILTER_DATA
 options		ACCEPT_FILTER_HTTP
 
 # TCP_DROP_SYNFIN adds support for ignoring TCP packets with SYN+FIN. This
 # prevents nmap et al. from identifying the TCP/IP stack, but breaks support
 # for RFC1644 extensions and is not recommended for web servers.
 #
 options 	TCP_DROP_SYNFIN		#drop TCP packets with SYN+FIN
 
 # DUMMYNET enables the "dummynet" bandwidth limiter. You need
 # IPFIREWALL as well. See the dummynet(4) manpage for more info.
 # BRIDGE enables bridging between ethernet cards -- see bridge(4).
 # You can use IPFIREWALL and dummynet together with bridging.
 options 	DUMMYNET
 options 	BRIDGE
 
 #
 # ATM (HARP version) options
 #
 # ATM_CORE includes the base ATM functionality code.  This must be included
 #	for ATM support.
 #
 # ATM_IP includes support for running IP over ATM.
 #
 # At least one (and usually only one) of the following signalling managers
 # must be included (note that all signalling managers include PVC support):
 # ATM_SIGPVC includes support for the PVC-only signalling manager `sigpvc'.
 # ATM_SPANS includes support for the `spans' signalling manager, which runs
 #	the FORE Systems's proprietary SPANS signalling protocol.
 # ATM_UNI includes support for the `uni30' and `uni31' signalling managers,
 #	which run the ATM Forum UNI 3.x signalling protocols.
 #
 # The `hea' driver provides support for the Efficient Networks, Inc.
 # ENI-155p ATM PCI Adapter.
 #
 # The `hfa' driver provides support for the FORE Systems, Inc.
 # PCA-200E ATM PCI Adapter.
 #
 options 	ATM_CORE		#core ATM protocol family
 options 	ATM_IP			#IP over ATM support
 options 	ATM_SIGPVC		#SIGPVC signalling manager
 options 	ATM_SPANS		#SPANS signalling manager
 options 	ATM_UNI			#UNI signalling manager
 device		hea			#Efficient ENI-155p ATM PCI
 device		hfa			#FORE PCA-200E ATM PCI
 
 
 #####################################################################
 # FILESYSTEM OPTIONS
 
 #
 # Only the root, /usr, and /tmp filesystems need be statically
 # compiled; everything else will be automatically loaded at mount
 # time.  (Exception: the UFS family--- FFS --- cannot
 # currently be demand-loaded.)  Some people still prefer to statically
 # compile other filesystems as well.
 #
 # NB: The NULL, PORTAL, UMAP and UNION filesystems are known to be
 # buggy, and WILL panic your system if you attempt to do anything with
 # them.  They are included here as an incentive for some enterprising
 # soul to sit down and fix them.
 #
 
 # One of these is mandatory:
 options 	FFS			#Fast filesystem
 options 	NFS			#Network File System
 
 # The rest are optional:
 #options 	NFS_NOSERVER		#Disable the NFS-server code.
 options 	CD9660			#ISO 9660 filesystem
 options 	FDESCFS			#File descriptor filesystem
 options 	HPFS			#OS/2 File system
 options 	MSDOSFS			#MS DOS File System (FAT, FAT32)
 options 	NTFS			#NT File System
 options 	NULLFS			#NULL filesystem
 options 	NWFS			#NetWare filesystem
 options 	PORTALFS		#Portal filesystem
 options 	PROCFS			#Process filesystem
 options 	PSEUDOFS		#Pseudo-filesystem framework
 options 	UMAPFS			#UID map filesystem
 options 	UNIONFS			#Union filesystem
 # options 	NODEVFS			#disable devices filesystem
 # The xFS_ROOT options REQUIRE the associated ``options xFS''
 options 	NFS_ROOT		#NFS usable as root device
 # This code enables IFS, an FFS which exports inodes as the namespace.
 # You can find details in src/sys/ufs/ifs/README .
 options		IFS
 
 # Soft updates is a technique for improving file system speed and
 # making abrupt shutdown less risky.
 #
 options 	SOFTUPDATES
 
 # Extended attributes allow additional data to be associated with files,
 # and is used for ACLs, Capabilities, and MAC labels.
 # See src/sys/ufs/ufs/README.extattr for more information.
 options		UFS_EXTATTR
 options		UFS_EXTATTR_AUTOSTART
 
 # Access Control List support for UFS filesystems.  The current ACL
 # implementation requires extended attribute support, UFS_EXTATTR,
 # for the underlying filesystem.
 # See src/sys/ufs/ufs/README.acls for more information.
 options 	UFS_ACL
 
 # Directory hashing improves the speed of operations on very large
 # directories at the expense of some memory.
 options		UFS_DIRHASH
 
 # Make space in the kernel for a root filesystem on a md device.
 # Define to the number of kilobytes to reserve for the filesystem.
 options 	MD_ROOT_SIZE=10
 
 # Make the md device a potential root device, either with preloaded
 # images of type mfs_root or md_root.
 options 	MD_ROOT
 
 # Allow this many swap-devices.
 #
 # In order to manage swap, the system must reserve bitmap space that
 # scales with the largest mounted swap device multiplied by NSWAPDEV, 
 # irregardless of whether other swap devices exist or not.  So it
 # is not a good idea to make this value too large.
 options 	NSWAPDEV=5
 
 # Disk quotas are supported when this option is enabled.
 options 	QUOTA			#enable disk quotas
 
 # If you are running a machine just as a fileserver for PC and MAC
 # users, using SAMBA or Netatalk, you may consider setting this option
 # and keeping all those users' directories on a filesystem that is
 # mounted with the suiddir option. This gives new files the same
 # ownership as the directory (similar to group). It's a security hole
 # if you let these users run programs, so confine it to file-servers
 # (but it'll save you lots of headaches in those cases). Root owned
 # directories are exempt and X bits are cleared. The suid bit must be
 # set on the directory as well; see chmod(1) PC owners can't see/set
 # ownerships so they keep getting their toes trodden on. This saves
 # you all the support calls as the filesystem it's used on will act as
 # they expect: "It's my dir so it must be my file".
 #
 options 	SUIDDIR
 
 # NFS options:
 options 	NFS_MINATTRTIMO=3	# VREG attrib cache timeout in sec
 options 	NFS_MAXATTRTIMO=60
 options 	NFS_MINDIRATTRTIMO=30	# VDIR attrib cache timeout in sec
 options 	NFS_MAXDIRATTRTIMO=60
 options 	NFS_GATHERDELAY=10	# Default write gather delay (msec)
 options 	NFS_UIDHASHSIZ=29	# Tune the size of nfssvc_sock with this
 options 	NFS_WDELAYHASHSIZ=16	# and with this
 options 	NFS_MUIDHASHSIZ=63	# Tune the size of nfsmount with this
 options 	NFS_DEBUG		# Enable NFS Debugging
 
 # Coda stuff:
 options 	CODA			#CODA filesystem.
 device		vcoda	4		#coda minicache <-> venus comm.
 
 #
 # Add support for the EXT2FS filesystem of Linux fame.  Be a bit
 # careful with this - the ext2fs code has a tendency to lag behind
 # changes and not be exercised very much, so mounting read/write could
 # be dangerous (and even mounting read only could result in panics.)
 #
 options 	EXT2FS
 
 # Use real implementations of the aio_* system calls.  There are numerous
 # stability issues in the current aio code that make it unsuitable for
 # inclusion on shell boxes.
 options 	VFS_AIO
 
 # Enable the code UFS IO optimization through the VM system.  This allows
 # use VM operations instead of copying operations when possible.
 # 
 # Even with this enabled, actual use of the code is still controlled by the
 # sysctl vfs.ioopt.  0 gives no optimization, 1 gives normal (use VM
 # operations if a request happens to fit), 2 gives agressive optimization
 # (the operations are split to do as much as possible through the VM system.)
 #
 # Enabling this will probably not give an overall speedup except for
 # special workloads.
 options 	ENABLE_VFS_IOOPT
 
 # Cryptographically secure random number generator; /dev/[u]random
 device		random
 
 
 #####################################################################
 # POSIX P1003.1B
 
 # Real time extensions added in the 1993 Posix
 # P1003_1B: Infrastructure
 # _KPOSIX_PRIORITY_SCHEDULING: Build in _POSIX_PRIORITY_SCHEDULING
 # _KPOSIX_VERSION:             Version kernel is built for
 
 options 	P1003_1B
 options 	_KPOSIX_PRIORITY_SCHEDULING
 options 	_KPOSIX_VERSION=199309L
 
 
 #####################################################################
 # CLOCK OPTIONS
 
 # The granularity of operation is controlled by the kernel option HZ whose
 # default value (100) means a granularity of 10ms.  For an accurate simulation
 # of high data rates it might be necessary to reduce the timer granularity to
 # 1ms or less.  Consider, however, that some interfaces using programmed I/O
 # may require a considerable time to output packets.  So, reducing the
 # granularity too much might actually cause ticks to be missed thus reducing
 # the accuracy of operation.
 
 options 	HZ=100
 
 # Other clock options
 
 options 	CLK_CALIBRATION_LOOP
 options 	CLK_USE_I8254_CALIBRATION
 options 	CLK_USE_TSC_CALIBRATION
 
 
 #####################################################################
 # SCSI DEVICES
 
 # SCSI DEVICE CONFIGURATION
 
 # The SCSI subsystem consists of the `base' SCSI code, a number of
 # high-level SCSI device `type' drivers, and the low-level host-adapter
 # device drivers.  The host adapters are listed in the ISA and PCI
 # device configuration sections below.
 #
 # Beginning with FreeBSD 2.0.5 you can wire down your SCSI devices so
 # that a given bus, target, and LUN always come on line as the same
 # device unit.  In earlier versions the unit numbers were assigned
 # in the order that the devices were probed on the SCSI bus.  This
 # means that if you removed a disk drive, you may have had to rewrite
 # your /etc/fstab file, and also that you had to be careful when adding
 # a new disk as it may have been probed earlier and moved your device
 # configuration around.
 
 # This old behavior is maintained as the default behavior.  The unit
 # assignment begins with the first non-wired down unit for a device
 # type.  For example, if you wire a disk as "da3" then the first
 # non-wired disk will be assigned da4.
 
 # The syntax for wiring down devices is:
 
 hint.scbus.0.at="ahc0"
 hint.scbus.1.at="ahc1"
 hint.scbus.1.bus="0"
 hint.scbus.3.at="ahc2"
 hint.scbus.3.bus="0"
 hint.scbus.2.at="ahc2"
 hint.scbus.2.bus="1"
 hint.da.0.at="scbus0"
 hint.da.0.target="0"
 hint.da.0.unit="0"
 hint.da.1.at="scbus3"
 hint.da.1.target="1"
 hint.da.2.at="scbus2"
 hint.da.2.target="3"
 hint.sa.1.at="scbus1"
 hint.sa.1.target="6"
 
 # "units" (SCSI logical unit number) that are not specified are
 # treated as if specified as LUN 0.
 
 # All SCSI devices allocate as many units as are required.
 
 # The ch driver drives SCSI Media Changer ("jukebox") devices.
 #
 # The da driver drives SCSI Direct Access ("disk") and Optical Media
 # ("WORM") devices.
 #
 # The sa driver drives SCSI Sequential Access ("tape") devices.
 #
 # The cd driver drives SCSI Read Only Direct Access ("cd") devices.
 #
 # The ses driver drives SCSI Envinronment Services ("ses") and
 # SAF-TE ("SCSI Accessable Fault-Tolerant Enclosure") devices.
 #
 # The pt driver drives SCSI Processor devices.
 #
 # 
 # Target Mode support is provided here but also requires that a SIM
 # (SCSI Host Adapter Driver) provide support as well.
 #
 # The targ driver provides target mode support as a Processor type device.
 # It exists to give the minimal context necessary to respond to Inquiry
 # commands. There is a sample user application that shows how the rest
 # of the command support might be done in /usr/share/examples/scsi_target.
 #
 # The targbh driver provides target mode support and exists to respond
 # to incoming commands that do not otherwise have a logical unit assigned
 # to them.
 # 
 # The "unknown" device (uk? in pre-2.0.5) is now part of the base SCSI
 # configuration as the "pass" driver.
 
 device		scbus		#base SCSI code
 device		ch		#SCSI media changers
 device		da		#SCSI direct access devices (aka disks)
 device		sa		#SCSI tapes
 device		cd		#SCSI CD-ROMs
 device		ses		#SCSI Environmental Services (and SAF-TE)
 device		pt		#SCSI processor 
 device		targ		#SCSI Target Mode Code
 device		targbh		#SCSI Target Mode Blackhole Device
 device		pass		#CAM passthrough driver
 
 # CAM OPTIONS:
 # debugging options:
 # -- NOTE --  If you specify one of the bus/target/lun options, you must
 #             specify them all!
 # CAMDEBUG: When defined enables debugging macros
 # CAM_DEBUG_BUS:  Debug the given bus.  Use -1 to debug all busses.
 # CAM_DEBUG_TARGET:  Debug the given target.  Use -1 to debug all targets.
 # CAM_DEBUG_LUN:  Debug the given lun.  Use -1 to debug all luns.
 # CAM_DEBUG_FLAGS:  OR together CAM_DEBUG_INFO, CAM_DEBUG_TRACE,
 #                   CAM_DEBUG_SUBTRACE, and CAM_DEBUG_CDB
 #
 # CAM_MAX_HIGHPOWER: Maximum number of concurrent high power (start unit) cmds
 # CAM_NEW_TRAN_CODE: this is the new transport layer code that will be switched
 #			to soon
 # SCSI_NO_SENSE_STRINGS: When defined disables sense descriptions
 # SCSI_NO_OP_STRINGS: When defined disables opcode descriptions
 # SCSI_DELAY: The number of MILLISECONDS to freeze the SIM (scsi adapter)
 #             queue after a bus reset, and the number of milliseconds to
 #             freeze the device queue after a bus device reset.
 options 	CAMDEBUG
 options 	CAM_DEBUG_BUS=-1
 options 	CAM_DEBUG_TARGET=-1
 options 	CAM_DEBUG_LUN=-1
 options 	CAM_DEBUG_FLAGS="CAM_DEBUG_INFO|CAM_DEBUG_TRACE|CAM_DEBUG_CDB"
 options 	CAM_MAX_HIGHPOWER=4
 options 	SCSI_NO_SENSE_STRINGS
 options 	SCSI_NO_OP_STRINGS
 options 	SCSI_DELAY=8000	# Be pessimistic about Joe SCSI device
 
 # Options for the CAM CDROM driver:
 # CHANGER_MIN_BUSY_SECONDS: Guaranteed minimum time quantum for a changer LUN
 # CHANGER_MAX_BUSY_SECONDS: Maximum time quantum per changer LUN, only
 #                           enforced if there is I/O waiting for another LUN
 # The compiled in defaults for these variables are 2 and 10 seconds,
 # respectively.
 #
 # These can also be changed on the fly with the following sysctl variables:
 # kern.cam.cd.changer.min_busy_seconds
 # kern.cam.cd.changer.max_busy_seconds
 #
 options 	CHANGER_MIN_BUSY_SECONDS=2
 options 	CHANGER_MAX_BUSY_SECONDS=10
 
 # Options for the CAM sequential access driver:
 # SA_IO_TIMEOUT: Timeout for read/write/wfm  operations, in minutes
 # SA_SPACE_TIMEOUT: Timeout for space operations, in minutes
 # SA_REWIND_TIMEOUT: Timeout for rewind operations, in minutes
 # SA_ERASE_TIMEOUT: Timeout for erase operations, in minutes
 # SA_1FM_AT_EOD: Default to model which only has a default one filemark at EOT.
 options 	SA_IO_TIMEOUT="(4)"
 options 	SA_SPACE_TIMEOUT="(60)"
 options 	SA_REWIND_TIMEOUT="(2*60)"
 options 	SA_ERASE_TIMEOUT="(4*60)"
 options 	SA_1FM_AT_EOD
 
 # Optional timeout for the CAM processor target (pt) device
 # This is specified in seconds.  The default is 60 seconds.
 options 	SCSI_PT_DEFAULT_TIMEOUT="60"
 
 # Optional enable of doing SES passthrough on other devices (e.g., disks)
 #
 # Normally disabled because a lot of newer SCSI disks report themselves
 # as having SES capabilities, but this can then clot up attempts to build
 # build a topology with the SES device that's on the box these drives
 # are in....
 options		SES_ENABLE_PASSTHROUGH
 
 
 #####################################################################
 # MISCELLANEOUS DEVICES AND OPTIONS
 
 # The `pty' device usually turns out to be ``effectively mandatory'',
 # as it is required for `telnetd', `rlogind', `screen', `emacs', and
 # `xterm', among others.
 
 device		pty		#Pseudo ttys
 device		speaker		#Play IBM BASIC-style noises out your speaker
 device		gzip		#Exec gzipped a.out's
 device		md		#Memory/malloc disk
 device		snp		#Snoop device - to look at pty/vty/etc..
 device		ccd	4	#Concatenated disk driver
 
 # Configuring Vinum into the kernel is not necessary, since the kld
 # module gets started automatically when vinum(8) starts.  This
 # device is also untested.  Use at your own risk.
 #
 # The option VINUMDEBUG must match the value set in CFLAGS
 # in src/sbin/vinum/Makefile.  Failure to do so will result in
 # the following message from vinum(8):
 #
 # Can't get vinum config: Invalid argument
 #
 # see vinum(4) for more reasons not to use these options.
 device		vinum		#Vinum concat/mirror/raid driver
 options 	VINUMDEBUG	#enable Vinum debugging hooks
 
 # Kernel side iconv library
 options 	LIBICONV
 
 # Size of the kernel message buffer.  Should be N * pagesize.
 options 	MSGBUF_SIZE=40960
 
 
 #####################################################################
 # HARDWARE BUS CONFIGURATION
 
 # ISA, EISA, MCA and PCI bus:
 
 #
 # Mandatory ISA devices: isa, npx
 #
 device		isa
 
 #
 # Options for `isa':
 #
 # AUTO_EOI_1 enables the `automatic EOI' feature for the master 8259A
 # interrupt controller.  This saves about 0.7-1.25 usec for each interrupt.
 # This option breaks suspend/resume on some portables.
 #
 # AUTO_EOI_2 enables the `automatic EOI' feature for the slave 8259A
 # interrupt controller.  This saves about 0.7-1.25 usec for each interrupt.
 # Automatic EOI is documented not to work for for the slave with the
 # original i8259A, but it works for some clones and some integrated
 # versions.
 #
 # MAXMEM specifies the amount of RAM on the machine; if this is not
 # specified, FreeBSD will first read the amount of memory from the CMOS
 # RAM, so the amount of memory will initially be limited to 64MB or 16MB
 # depending on the BIOS.  If the BIOS reports 64MB, a memory probe will
 # then attempt to detect the installed amount of RAM.  If this probe
 # fails to detect >64MB RAM you will have to use the MAXMEM option.
 # The amount is in kilobytes, so for a machine with 128MB of RAM, it would
 # be 131072 (128 * 1024).
 #
 # BROKEN_KEYBOARD_RESET disables the use of the keyboard controller to
 # reset the CPU for reboot.  This is needed on some systems with broken
 # keyboard controllers.
 
 options 	COMPAT_OLDISA	#Use ISA shims and glue for old drivers
 options 	AUTO_EOI_1
 #options 	AUTO_EOI_2
 
 options 	MAXMEM="(128*1024)"
 #options 	BROKEN_KEYBOARD_RESET
 
 # Enable support for the kernel PLL to use an external PPS signal,
 # under supervision of [x]ntpd(8)
 # More info in ntpd documentation: http://www.eecis.udel.edu/~ntp
 
 options 	PPS_SYNC
 
 # If you see the "calcru: negative time of %ld usec for pid %d (%s)\n"
 # message you probably have some broken sw/hw which disables interrupts
 # for too long.  You can make the system more resistant to this by
 # choosing a high value for NTIMECOUNTER.  The default is 5, there
 # is no upper limit but more than a couple of hundred are not productive.
 # A better strategy may be to sysctl -w kern.timecounter.method=1
 
 options 	NTIMECOUNTER=20
 
 # 
 # EISA bus
 #
 # The EISA bus device is `eisa'.  It provides auto-detection and
 # configuration support for all devices on the EISA bus.
 
 device		eisa
 
 # By default, only 10 EISA slots are probed, since the slot numbers
 # above clash with the configuration address space of the PCI subsystem,
 # and the EISA probe is not very smart about this.  This is sufficient
 # for most machines, but in particular the HP NetServer LC series comes
 # with an onboard AIC7770 dual-channel SCSI controller on EISA slot #11,
 # thus you need to bump this figure to 12 for them.
 options 	EISA_SLOTS=12
 
 #
 # MCA bus:
 #
 # The MCA bus device is `mca'.  It provides auto-detection and
 # configuration support for all devices on the MCA bus.
 # No hints are required for MCA.
 
 device		mca
 
 #
 # PCI bus & PCI options:
 #
 # The main PCI bus device is `pci'.  It provides auto-detection and
 # configuration support for all devices on the PCI bus, using either
 # configuration mode defined in the PCI specification.
 
 device		pci
 
 #
 # AGP GART support
 device		agp
 
 # PCI options
 #
 #options 	PCI_QUIET	#quiets PCI code on chipset settings
 
 
 #####################################################################
 # HARDWARE DEVICE CONFIGURATION
 
 # EISA support is available for some device, so they can be auto-probed.
 # MicroChannel (MCA) support is available for some devices.
 # For ISA the required hints are listed.
 # EISA, MCA, PCI and pccard are self identifying buses, so no hints
 # are needed.
 
 #
 # Mandatory devices:
 #
 
 # The keyboard controller; it controls the keyboard and the PS/2 mouse.
 device		atkbdc	1
 hint.atkbdc.0.at="isa"
 hint.atkbdc.0.port="0x060"
 
 # The AT keyboard
 device		atkbd
 hint.atkbd.0.at="atkbdc"
 hint.atkbd.0.irq="1"
 
 # Options for atkbd:
 options 	ATKBD_DFLT_KEYMAP	# specify the built-in keymap
 makeoptions	ATKBD_DFLT_KEYMAP="jp.106"
 
 # These options are valid for other keyboard drivers as well.
 options 	KBD_DISABLE_KEYMAP_LOAD	# refuse to load a keymap
 options 	KBD_INSTALL_CDEV	# install a CDEV entry in /dev
 
 # `flags' for atkbd:
 #       0x01    Force detection of keyboard, else we always assume a keyboard
 #       0x02    Don't reset keyboard, useful for some newer ThinkPads
 #       0x04    Old-style (XT) keyboard support, useful for older ThinkPads
 
 # PS/2 mouse
 device		psm
 hint.psm.0.at="atkbdc"
 hint.psm.0.irq="12"
 
 # Options for psm:
 options 	PSM_HOOKRESUME		#hook the system resume event, useful
 					#for some laptops
 options 	PSM_RESETAFTERSUSPEND	#reset the device at the resume event
 
 # The video card driver.
 device		vga
 hint.vga.0.at="isa"
 
 # Options for vga:
 # Try the following option if the mouse pointer is not drawn correctly
 # or font does not seem to be loaded properly.  May cause flicker on
 # some systems.
 options 	VGA_ALT_SEQACCESS
 
 # If you can dispense with some vga driver features, you may want to
 # use the following options to save some memory.
 #options 	VGA_NO_FONT_LOADING	# don't save/load font
 #options 	VGA_NO_MODE_CHANGE	# don't change video modes
 
 # Older video cards may require this option for proper operation.
 options 	VGA_SLOW_IOACCESS	# do byte-wide i/o's to TS and GDC regs
 
 # The following option probably won't work with the LCD displays.
 options 	VGA_WIDTH90		# support 90 column modes
 
 # To include support for VESA video modes
 options 	VESA
 
 options 	FB_DEBUG		# Frame buffer debugging
 options 	FB_INSTALL_CDEV		# install a CDEV entry in /dev
 
 # Splash screen at start up!  Screen savers require this too.
 device		splash
 
 # Various screen savers.
 device		apm_saver		# Requires APM
 device		blank_saver
 device		daemon_saver
 device		fade_saver
 device		fire_saver
 device		green_saver
 device		logo_saver
 device		rain_saver
 device		star_saver
 device		warp_saver
 
 # The pcvt console driver (vt220 compatible).
 device		vt
 hint.vt.0.at="isa"
 options 	XSERVER			# support for running an X server on vt
 options 	FAT_CURSOR		# start with block cursor
 # This PCVT option is for keyboards such as those used on really old ThinkPads
 options 	PCVT_SCANSET=2
 # Other PCVT options are documented in pcvt(4).
 options 	PCVT_24LINESDEF
 options 	PCVT_CTRL_ALT_DEL
 options 	PCVT_META_ESC
 options 	PCVT_NSCREENS=9
 options 	PCVT_PRETTYSCRNS
 options 	PCVT_SCREENSAVER
 options 	PCVT_USEKBDSEC
 options 	PCVT_VT220KEYB
 options 	PCVT_GREENSAVER
 
 # The syscons console driver (sco color console compatible).
 device		sc	1
 hint.sc.0.at="isa"
 options 	MAXCONS=16		# number of virtual consoles
 options 	SC_ALT_MOUSE_IMAGE	# simplified mouse cursor in text mode
 options 	SC_DFLT_FONT		# compile font in
 makeoptions	SC_DFLT_FONT=cp850
 options 	SC_DISABLE_DDBKEY	# disable `debug' key
 options 	SC_DISABLE_REBOOT	# disable reboot key sequence
 options 	SC_HISTORY_SIZE=200	# number of history buffer lines
 options 	SC_MOUSE_CHAR=0x3	# char code for text mode mouse cursor
 options 	SC_PIXEL_MODE		# add support for the raster text mode
 
 # The following options will let you change the default colors of syscons.
 options 	SC_NORM_ATTR="(FG_GREEN|BG_BLACK)"
 options 	SC_NORM_REV_ATTR="(FG_YELLOW|BG_GREEN)"
 options 	SC_KERNEL_CONS_ATTR="(FG_RED|BG_BLACK)"
 options 	SC_KERNEL_CONS_REV_ATTR="(FG_BLACK|BG_RED)"
 
 # If you have a two button mouse, you may want to add the following option
 # to use the right button of the mouse to paste text.
 options 	SC_TWOBUTTON_MOUSE
 
 # You can selectively disable features in syscons.
 options 	SC_NO_CUTPASTE
 options 	SC_NO_FONT_LOADING
 options 	SC_NO_HISTORY
 options 	SC_NO_SYSMOUSE
 
 # `flags' for sc
 #	0x80	Put the video card in the VESA 800x600 dots, 16 color mode
 #	0x100	Probe for a keyboard device periodically if one is not present
 
 # 3Dfx Voodoo Graphics, Voodoo II /dev/3dfx CDEV support. This will create
 # the /dev/3dfx0 device to work with glide implementations. This should get
 # linked to /dev/3dfx and /dev/voodoo. Note that this is not the same as
 # the tdfx DRI module from XFree86 and is completely unrelated.
 #
 # To enable Linuxulator support, one must also include COMPAT_LINUX in the
 # config as well, or you will not have the dependencies. The other option
 # is to load both as modules.
 
 device 		tdfx			# Enable 3Dfx Voodoo support
 options 	TDFX_LINUX		# Enable Linuxulator support
 
 #
 # The Numeric Processing eXtension driver.  In addition to this, you
 # may configure a math emulator (see above).  If your machine has a
 # hardware FPU and the kernel configuration includes the npx device
 # *and* a math emulator compiled into the kernel, the hardware FPU
 # will be used, unless it is found to be broken or unless "flags" to
 # npx0 includes "0x08", which requests preference for the emulator.
 device		npx
 hint.npx.0.at="nexus"
 hint.npx.0.port="0x0F0"
 hint.npx.0.flags="0x0"
 hint.npx.0.irq="13"
 
 #
 # `flags' for npx0:
 #	0x01	don't use the npx registers to optimize bcopy.
 #	0x02	don't use the npx registers to optimize bzero.
 #	0x04	don't use the npx registers to optimize copyin or copyout.
 #	0x08	use emulator even if hardware FPU is available.
 # The npx registers are normally used to optimize copying and zeroing when
 # all of the following conditions are satisfied:
 #	I586_CPU is an option
 #	the cpu is an i586 (perhaps not a Pentium)
 #	the probe for npx0 succeeds
 #	INT 16 exception handling works.
 # Then copying and zeroing using the npx registers is normally 30-100% faster.
 # The flags can be used to control cases where it doesn't work or is slower.
 # Setting them at boot time using userconfig works right (the optimizations
 # are not used until later in the bootstrap when npx0 is attached).
 # Flag 0x08 automatically disables the i586 optimized routines.
 #
 
 #
 # ACPI support using the Intel ACPI Component Architecture reference
 # implementation.
 #
 # ACPI_DEBUG enables the use of the debug.acpi.level and debug.acpi.layer
 # kernel environment variables to select initial debugging levels for the
 # Intel ACPICA code.  (Note that the Intel code must also have USE_DEBUGGER
 # defined when it is built).
 #
 device		acpica
 options		ACPI_DEBUG
 
 #
 # Optional devices:
 #
 
 #
 # SCSI host adapters:
 #
 # adv: All Narrow SCSI bus AdvanSys controllers.
 # adw: Second Generation AdvanSys controllers including the ADV940UW.
 # aha: Adaptec 154x/1535/1640
 # ahb: Adaptec 174x EISA controllers
 # ahc: Adaptec 274x/284x/2910/293x/294x/394x/3950x/3960x/398X/4944/
 #      19160x/29160x, aic7770/aic78xx
 # aic: Adaptec 6260/6360, APA-1460 (PC Card), NEC PC9801-100 (C-BUS)
 # amd: Support for the AMD 53C974 SCSI host adapter chip as found on devices
 #      such as the Tekram DC-390(T).
 # bt:  Most Buslogic controllers: including BT-445, BT-54x, BT-64x, BT-74x,
 #      BT-75x, BT-946, BT-948, BT-956, BT-958, SDC3211B, SDC3211F, SDC3222F
 # isp: Qlogic ISP 1020, 1040 and 1040B PCI SCSI host adapters,
 #      ISP 1240 Dual Ultra SCSI, ISP 1080 and 1280 (Dual) Ultra2,
 #      ISP 12160 Ultra3 SCSI,
 #      Qlogic ISP 2100 and ISP 2200 Fibre Channel host adapters.
 # ispfw: Firmware module for Qlogic host adapters
 # ncr: NCR 53C810, 53C825 self-contained SCSI host adapters.
 # ncv: NCR 53C500 based SCSI host adapters.
 # nsp: Workbit Ninja SCSI-3 based PC Card SCSI host adapters.
 # sym: Symbios/Logic 53C8XX family of PCI-SCSI I/O processors:
 #      53C810, 53C810A, 53C815, 53C825,  53C825A, 53C860, 53C875, 
 #      53C876, 53C885,  53C895, 53C895A, 53C896,  53C897, 53C1510D, 
 #      53C1010-33, 53C1010-66.
 # stg: TMC 18C30, 18C50 based SCSI host adapters.
 # wds: WD7000
 
 #
 # Note that the order is important in order for Buslogic ISA/EISA cards to be
 # probed correctly.
 #
 device		bt
 hint.bt.0.at="isa"
 hint.bt.0.port="0x330"
 device		adv
 hint.adv.0.at="isa"
 device		adw
 device		aha
 hint.aha.0.at="isa"
 device		aic
 hint.aic.0.at="isa"
 device		ahb
 device		ahc
 device		amd
 device		isp
 hint.isp.0.disable="1"
 hint.isp.0.role="3"
 hint.isp.0.prefer_iomap="1"
 hint.isp.0.prefer_memmap="1"
 hint.isp.0.fwload_disable="1"
 hint.isp.0.ignore_nvram="1"
 hint.isp.0.fullduplex="1"
 hint.isp.0.topology="lport"
 hint.isp.0.topology="nport"
 hint.isp.0.topology="lport-only"
 hint.isp.0.topology="nport-only"
 # we can't get u_int64_t types, nor can we get strings if it's got
 # a leading 0x, hence this silly dodge.
 hint.isp.0.portwnn="w50000000aaaa0000"
 hint.isp.0.nodewnn="w50000000aaaa0001"
 device		ispfw
 device		ncr
 device		ncv
 device		nsp
 device		sym
 device		stg
 hint.stg.0.at="isa"
 hint.stg.0.port="0x140"
 hint.stg.0.port="11"
 device		wds
 hint.wds.0.at="isa"
 hint.wds.0.port="0x350"
 hint.wds.0.irq="11"
 hint.wds.0.drq="6"
 
 # The aic7xxx driver will attempt to use memory mapped I/O for all PCI
 # controllers that have it configured only if this option is set. Unfortunately,
 # this doesn't work on some motherboards, which prevents it from being the
 # default.
 options 	AHC_ALLOW_MEMIO
 
 # Enable diagnostic sequencer code.
 options 	AHC_DEBUG_SEQUENCER
 
 # Dump the contents of the ahc controller configuration PROM.
 options 	AHC_DUMP_EEPROM
 
 # Bitmap of units to enable targetmode operations.
 options 	AHC_TMODE_ENABLE
 
 # The adw driver will attempt to use memory mapped I/O for all PCI
 # controllers that have it configured only if this option is set.
 options 	ADW_ALLOW_MEMIO
 
 # Options used in dev/isp/ (Qlogic SCSI/FC driver).
 #
 #	ISP_TARGET_MODE		-	enable target mode operation
 #
 #options 	ISP_TARGET_MODE=1
 
 # Options used in dev/sym/ (Symbios SCSI driver).
 #options 	SYM_SETUP_LP_PROBE_MAP	#-Low Priority Probe Map (bits)
 					# Allows the ncr to take precedence
 					# 1 (1<<0) -> 810a, 860
 					# 2 (1<<1) -> 825a, 875, 885, 895
 					# 4 (1<<2) -> 895a, 896, 1510d 
 #options 	SYM_SETUP_SCSI_DIFF	#-HVD support for 825a, 875, 885
 					# disabled:0 (default), enabled:1
 #options 	SYM_SETUP_PCI_PARITY	#-PCI parity checking
 					# disabled:0, enabled:1 (default)
 #options 	SYM_SETUP_MAX_LUN	#-Number of LUNs supported
 					# default:8, range:[1..64]
 
 # The 'asr' driver provides support for current DPT/Adaptec SCSI RAID
 # controllers (SmartRAID V and VI and later).
 # These controllers require the CAM infrastructure.
 #
 device		asr
 
 # The 'dpt' driver provides support for old DPT controllers (http://www.dpt.com/).
 # These have hardware RAID-{0,1,5} support, and do multi-initiator I/O.
 # The DPT controllers are commonly re-licensed under other brand-names -
 # some controllers by Olivetti, Dec, HP, AT&T, SNI, AST, Alphatronic, NEC and
 # Compaq are actually DPT controllers.
 #
 # See src/sys/dev/dpt for debugging and other subtle options.
 #   DPT_MEASURE_PERFORMANCE Enables a set of (semi)invasive metrics. Various
 #                           instruments are enabled.  The tools in
 #                           /usr/sbin/dpt_* assume these to be enabled.
 #   DPT_HANDLE_TIMEOUTS     Normally device timeouts are handled by the DPT.
 #                           If you ant the driver to handle timeouts, enable
 #                           this option.  If your system is very busy, this
 #                           option will create more trouble than solve.
 #   DPT_TIMEOUT_FACTOR      Used to compute the excessive amount of time to
 #                           wait when timing out with the above option.
 #  DPT_DEBUG_xxxx           These are controllable from sys/dev/dpt/dpt.h
 #  DPT_LOST_IRQ             When enabled, will try, once per second, to catch
 #                           any interrupt that got lost.  Seems to help in some
 #                           DPT-firmware/Motherboard combinations.  Minimal
 #                           cost, great benefit.
 #  DPT_RESET_HBA            Make "reset" actually reset the controller
 #                           instead of fudging it.  Only enable this if you
 #			    are 100% certain you need it.
 
 device		dpt
 
 # DPT options
 #!CAM# options 	DPT_MEASURE_PERFORMANCE
 #!CAM# options 	DPT_HANDLE_TIMEOUTS
 options 	DPT_TIMEOUT_FACTOR=4
 options 	DPT_LOST_IRQ
 options 	DPT_RESET_HBA
 options 	DPT_ALLOW_MEMIO
 
 #
 # Mylex AcceleRAID and eXtremeRAID controllers with v6 and later
 # firmware.  These controllers have a SCSI-like interface, and require
 # the CAM infrastructure.
 #
 device		mly
 
 #
 # Adaptec FSA RAID controllers, including integrated DELL controllers,
 # the Dell PERC 2/QC and the HP NetRAID-4M
 #
 # AAC_COMPAT_LINUX	Include code to support Linux-binary management
 #			utilities (requires Linux compatibility
 #			support).
 #
 device		aac
 
 #
 # Compaq Smart RAID, Mylex DAC960 and AMI MegaRAID controllers.  Only
 # one entry is needed; the code will find and configure all supported
 # controllers.
 #
 device		ida		# Compaq Smart RAID
 device		mlx		# Mylex DAC960
 device		amr		# AMI MegaRAID
 
 #
 # 3ware ATA RAID
 #
 device		twe		# 3ware ATA RAID
 
 #
 # The 'ATA' driver supports all ATA and ATAPI devices, including PC Card
 # devices. You only need one "device ata" for it to find all
 # PCI and PC Card ATA/ATAPI devices on modern machines.
 device		ata
 device		atadisk		# ATA disk drives
 device		atapicd		# ATAPI CDROM drives
 device		atapifd		# ATAPI floppy drives
 device		atapist		# ATAPI tape drives
 
 #
 # For older non-PCI, non-PnPBIOS systems, these are the hints lines to add:
 hint.ata.0.at="isa"
 hint.ata.0.port="0x1f0"
 hint.ata.0.irq="14"
 hint.ata.1.at="isa"
 hint.ata.1.port="0x170"
 hint.ata.1.irq="15"
 
 #
 # The following options are valid on the ATA driver:
 #
 # ATA_STATIC_ID:	controller numbering is static ie depends on location
 #			else the device numbers are dynamically allocated.
 
 options 	ATA_STATIC_ID
 
 #
 # Standard floppy disk controllers and floppy tapes, supports
 # the Y-E DATA External FDD (PC Card)
 #
 device		fdc
 hint.fdc.0.at="isa"
 hint.fdc.0.port="0x3F0"
 hint.fdc.0.irq="6"
 hint.fdc.0.drq="2"
 #
 # FDC_DEBUG enables floppy debugging.  Since the debug output is huge, you
 # gotta turn it actually on by setting the variable fd_debug with DDB,
 # however.
 options 	FDC_DEBUG
 #
 # Activate this line if you happen to have an Insight floppy tape.
 # Probing them proved to be dangerous for people with floppy disks only,
 # so it's "hidden" behind a flag:
 #hint.fdc.0.flags="1"
 
 # Specify floppy devices
 hint.fd.0.at="fdc0"
 hint.fd.0.drive="0"
 hint.fd.1.at="fdc0"
 hint.fd.1.drive="1"
 
 # M-systems DiskOnchip products see src/sys/contrib/dev/fla/README
 device		fla
 hint.fla.0.at="isa"
 
 #
 # Other standard PC hardware:
 #
 # mse: Logitech and ATI InPort bus mouse ports
 # sio: serial ports (see sio(4)), including support for various
 #      PC Card devices, such as Modem and NICs (see etc/defaults/pccard.conf)
 
 device		mse
 hint.mse.0.at="isa"
 hint.mse.0.port="0x23c"
 hint.mse.0.irq="5"
 
 device		sio
 hint.sio.0.at="isa"
 hint.sio.0.port="0x3F8"
 hint.sio.0.flags="0x10"
 hint.sio.0.irq="4"
 
 #
 # `flags' for serial drivers that support consoles (only for sio now):
 #	0x10	enable console support for this unit.  The other console flags
 #		are ignored unless this is set.  Enabling console support does
 #		not make the unit the preferred console - boot with -h or set
 #		the 0x20 flag for that.  Currently, at most one unit can have
 #		console support; the first one (in config file order) with
 #		this flag set is preferred.  Setting this flag for sio0 gives
 #		the old behaviour.
 #	0x20	force this unit to be the console (unless there is another
 #		higher priority console).  This replaces the COMCONSOLE option.
 #	0x40	reserve this unit for low level console operations.  Do not
 #		access the device in any normal way.
 #	0x80	use this port for serial line gdb support in ddb.
 #
 # PnP `flags' (set via userconfig using pnp x flags y)
 #	0x1	disable probing of this device.  Used to prevent your modem
 #		from being attached as a PnP modem.
 #
 
 # Options for serial drivers that support consoles (only for sio now):
 options 	BREAK_TO_DEBUGGER	#a BREAK on a comconsole goes to
 					#DDB, if available.
 options 	CONSPEED=115200		# speed for serial console
 					# (default 9600)
 
 # Solaris implements a new BREAK which is initiated by a character
 # sequence CR ~ ^b which is similar to a familiar pattern used on
 # Sun servers by the Remote Console.
 options 	ALT_BREAK_TO_DEBUGGER
 
 # Options for sio:
 options 	COM_ESP			#code for Hayes ESP
 options 	COM_MULTIPORT		#code for some cards with shared IRQs
 
 # Other flags for sio that aren't documented in the man page.
 #	0x20000	enable hardware RTS/CTS and larger FIFOs.  Only works for
 #		ST16650A-compatible UARTs.
 
 #
 # Network interfaces:
 #
 # MII bus support is required for some PCI 10/100 ethernet NICs,
 # namely those which use MII-compliant transceivers or implement
 # tranceiver control interfaces that operate like an MII. Adding
 # "device miibus0" to the kernel config pulls in support for
 # the generic miibus API and all of the PHY drivers, including a
 # generic one for PHYs that aren't specifically handled by an
 # individual driver.
 device		miibus
 
 # an:   Aironet 4500/4800 802.11 wireless adapters. Supports the PCMCIA,
 #       PCI and ISA varieties.
 # ar:   Arnet SYNC/570i hdlc sync 2/4 port V.35/X.21 serial driver
 #       (requires sppp)
 # awi:  Support for IEEE 802.11 PC Card devices using the AMD Am79C930 and
 #       Harris (Intersil) Chipset with PCnetMobile firmware by AMD.
 # cnw:  Xircom CNW/Netware Airsurfer PC Card adapter
 # cs:   IBM Etherjet and other Crystal Semi CS89x0-based adapters
 # cx:   Cronyx/Sigma multiport sync/async (with Cisco or PPP framing)
 # dc:   Support for PCI fast ethernet adapters based on the DEC/Intel 21143
 #       and various workalikes including:
 #       the ADMtek AL981 Comet and AN985 Centaur, the ASIX Electronics
 #       AX88140A and AX88141, the Davicom DM9100 and DM9102, the Lite-On
 #       82c168 and 82c169 PNIC, the Lite-On/Macronix LC82C115 PNIC II
 #       and the Macronix 98713/98713A/98715/98715A/98725 PMAC. This driver
 #       replaces the old al, ax, dm, pn and mx drivers.  List of brands:
 #       Digital DE500-BA, Kingston KNE100TX, D-Link DFE-570TX, SOHOware SFA110, 
 #       SVEC PN102-TX, CNet Pro110B, 120A, and 120B, Compex RL100-TX, 
 #       LinkSys LNE100TX, LNE100TX V2.0, Jaton XpressNet, Alfa Inc GFC2204,
 #       KNE110TX.
 # de:   Digital Equipment DC21040
 # ed:   Western Digital and SMC 80xx; Novell NE1000 and NE2000; 3Com 3C503
 #       HP PC Lan+, various PC Card devices (refer to etc/defauls/pccard.conf)
 # el:   3Com 3C501 (slow!)
 # ep:   3Com 3C509, 3C529, 3C556, 3C562D, 3C563D, 3C572, 3C574X, 3C579, 3C589
 #       and PC Card devices using these chipsets.
 # ex:   Intel EtherExpress Pro/10 and other i82595-based adapters,
 #       Olicom Ethernet PC Card devices.
 # fe:   Fujitsu MB86960A/MB86965A Ethernet
 # fea:  DEC DEFEA EISA FDDI adapter
 # fpa:  Support for the Digital DEFPA PCI FDDI. `device fddi' is also needed.
 # fxp:  Intel EtherExpress Pro/100B
 #	(hint of prefer_iomap can be done to prefer I/O instead of Mem mapping)
 # ie:   AT&T StarLAN 10 and EN100; 3Com 3C507; unknown NI5210;
 #       Intel EtherExpress
 # le:   Digital Equipment EtherWorks 2 and EtherWorks 3 (DEPCA, DE100,
 #       DE101, DE200, DE201, DE202, DE203, DE204, DE205, DE422)
 # lnc:  Lance/PCnet cards (Isolan, Novell NE2100, NE32-VL, AMD Am7990 and
 #       Am79C960)
 # lge:	Support for PCI gigabit ethernet adapters based on the Level 1
 #	LXT1001 NetCellerator chipset. This includes the D-Link DGE-500SX,
 #	SMC TigerCard 1000 (SMC9462SX), and some Addtron cards.
 # nge:	Support for PCI gigabit ethernet adapters based on the National
 #	Semiconductor DP83820 and DP83821 chipset. This includes the
 #	SMC EZ Card 1000 (SMC9462TX), D-Link DGE-500T, Asante FriendlyNet
 #	GigaNIX 1000TA and 1000TPC, the Addtron AEG320T, the LinkSys
 #	EG1032 and EG1064, the Surecom EP-320G-TX and the Netgear GA622T.
 # oltr: Olicom ISA token-ring adapters OC-3115, OC-3117, OC-3118 and OC-3133
 #       (no hints needed).
 #       Olicom PCI token-ring adapters OC-3136, OC-3137, OC-3139, OC-3140,
 #       OC-3141, OC-3540, OC-3250
 # rdp:  RealTek RTL 8002-based pocket ethernet adapters
 # pcn:	Support for PCI fast ethernet adapters based on the AMD Am79c97x
 #	chipsets, including the PCnet/FAST, PCnet/FAST+, PCnet/PRO and
 #	PCnet/Home. These were previously handled by the lnc driver (and
 #	still will be if you leave this driver out of the kernel).
 # rl:   Support for PCI fast ethernet adapters based on the RealTek 8129/8139
 #       chipset.  Note that the RealTek driver defaults to using programmed
 #       I/O to do register accesses because memory mapped mode seems to cause
 #       severe lockups on SMP hardware.  This driver also supports the
 #       Accton EN1207D `Cheetah' adapter, which uses a chip called
 #       the MPX 5030/5038, which is either a RealTek in disguise or a
 #       RealTek workalike.  Note that the D-Link DFE-530TX+ uses the RealTek
 #       chipset and is supported by this driver, not the 'vr' driver.
 # sf:   Support for Adaptec Duralink PCI fast ethernet adapters based on the
 #       Adaptec AIC-6915 "starfire" controller.
 #       This includes dual and quad port cards, as well as one 100baseFX card.
 #       Most of these are 64-bit PCI devices, except for one single port
 #       card which is 32-bit.
 # sis:  Support for NICs based on the Silicon Integrated Systems SiS 900,
 #       SiS 7016 and NS DP83815 PCI fast ethernet controller chips.
 # sk:   Support for the SysKonnect SK-984x series PCI gigabit ethernet NICs.
 #       This includes the SK-9841 and SK-9842 single port cards (single mode
 #       and multimode fiber) and the SK-9843 and SK-9844 dual port cards
 #       (also single mode and multimode).
 #       The driver will autodetect the number of ports on the card and
 #       attach each one as a separate network interface.
 # sn:   Support for ISA and PC Card Ethernet devices using the
 #       SMC91C90/92/94/95 chips.
 # sr:   RISCom/N2 hdlc sync 1/2 port V.35/X.21 serial driver (requires sppp)
 # ste:  Sundance Technologies ST201 PCI fast ethernet controller, includes
 #       the D-Link DFE-550TX.
 # ti:   Support for PCI gigabit ethernet NICs based on the Alteon Networks
 #       Tigon 1 and Tigon 2 chipsets.  This includes the Alteon AceNIC, the
 #       3Com 3c985, the Netgear GA620 and various others.  Note that you will
 #       probably want to bump up NMBCLUSTERS a lot to use this driver.
 # tl:   Support for the Texas Instruments TNETE100 series 'ThunderLAN'
 #       cards and integrated ethernet controllers.  This includes several
 #       Compaq Netelligent 10/100 cards and the built-in ethernet controllers
 #       in several Compaq Prosignia, Proliant and Deskpro systems.  It also
 #       supports several Olicom 10Mbps and 10/100 boards.
 # tx:   SMC 9432 TX, BTX and FTX cards. (SMC EtherPower II serie)
 # txp:	Support for 3Com 3cR990 cards with the "Typhoon" chipset
 # vr:   Support for various fast ethernet adapters based on the VIA
 #       Technologies VT3043 `Rhine I' and VT86C100A `Rhine II' chips,
 #       including the D-Link DFE530TX (see 'rl' for DFE530TX+), the Hawking 
 #       Technologies PN102TX, and the AOpen/Acer ALN-320.
 # vx:   3Com 3C590 and 3C595
 # wb:   Support for fast ethernet adapters based on the Winbond W89C840F chip.
 #       Note: this is not the same as the Winbond W89C940F, which is a
 #       NE2000 clone.
 # wl:   Lucent Wavelan (ISA card only).
 # wi:   Lucent WaveLAN/IEEE 802.11 PCMCIA adapters. Note: this supports both
 #       the PCMCIA and ISA cards: the ISA card is really a PCMCIA to ISA
 #       bridge with a PCMCIA adapter plugged into it.
 # wx:   Intel Gigabit Ethernet PCI card (`Wiseman')
 # xe:   Xircom/Intel EtherExpress Pro100/16 PC Card ethernet controller,
 #       Accton Fast EtherCard-16, Compaq Netelligent 10/100 PC Card,
 #       Toshiba 10/100 Ethernet PC Card, Xircom 16-bit Ethernet + Modem 56
 # xl:   Support for the 3Com 3c900, 3c905, 3c905B and 3c905C (Fast)
 #       Etherlink XL cards and integrated controllers.  This includes the
 #       integrated 3c905B-TX chips in certain Dell Optiplex and Dell
 #       Precision desktop machines and the integrated 3c905-TX chips
 #       in Dell Latitude laptop docking stations.
 #       Also supported: 3Com 3c980(C)-TX, 3Com 3cSOHO100-TX, 3Com 3c450-TX
 
 # Order for ISA/EISA devices is important here
 
 device		ar	1
 hint.ar.0.at="isa"
 hint.ar.0.port="0x300"
 hint.ar.0.irq="10"
 hint.ar.0.maddr="0xd0000"
 device		cs
 hint.cs.0.at="isa"
 hint.cs.0.port="0x300"
 device		cx	1
 hint.cx.0.at="isa"
 hint.cx.0.port="0x240"
 hint.cx.0.irq="15"
 hint.cx.0.drq="7"
 device		ed
 hint.ed.0.at="isa"
 hint.ed.0.port="0x280"
 hint.ed.0.irq="5"
 hint.ed.0.maddr="0xd8000"
 device		el	1
 hint.el.0.at="isa"
 hint.el.0.port="0x300"
 hint.el.0.irq="9"
 device		ep
 device		ex
 device		fe	1
 options 	FE_8BIT_SUPPORT		# LAC-98 support
 hint.fe.0.at="isa"
 hint.fe.0.port="0x300"
 device		fea
 device		ie	2
 hint.ie.0.at="isa"
 hint.ie.0.port="0x300"
 hint.ie.0.irq="5"
 hint.ie.0.maddr="0xd0000"
 hint.ie.1.at="isa"
 hint.ie.1.port="0x360"
 hint.ie.1.irq="7"
 hint.ie.1.maddr="0xd0000"
 device		le	1
 hint.le.0.at="isa"
 hint.le.0.port="0x300"
 hint.le.0.irq="5"
 hint.le.0.maddr="0xd0000"
 device		lnc	1
 hint.lnc.0.at="isa"
 hint.lnc.0.port="0x280"
 hint.lnc.0.irq="10"
 hint.lnc.0.drq="0"
 device		rdp	1
 hint.rdp.0.at="isa"
 hint.rdp.0.port="0x378"
 hint.rdp.0.irq="7"
 hint.rdp.0.flags="2"
 device		sr	1
 hint.sr.0.at="isa"
 hint.sr.0.port="0x300"
 hint.sr.0.irq="5"
 hint.sr.0.maddr="0xd0000"
 device		sn
 hint.sn.0.at="isa"
 hint.sn.0.port="0x300"
 hint.sn.0.irq="10"
 device		an
 device		awi
 device		cnw
 device		wi
 options 	WLCACHE		# enables the signal-strength cache
 options 	WLDEBUG		# enables verbose debugging output
 device		wl	1
 hint.wl.0.at="isa"
 hint.wl.0.port="0x300"
 device		xe
 
 device		oltr
 options 	OLTR_NO_BULLSEYE_MAC
 options 	OLTR_NO_HAWKEYE_MAC
 options 	OLTR_NO_TMS_MAC
 hint.oltr.0.at="isa"
 
 # PCI Ethernet NICs that use the common MII bus controller code.
 device		dc		# DEC/Intel 21143 and various workalikes
 device		fxp		# Intel EtherExpress PRO/100B (82557, 82558)
 hint.fxp.0.prefer_iomap="0"
 device		rl		# RealTek 8129/8139
 device		pcn		# AMD Am79C97x PCI 10/100 NICs
 device		sf		# Adaptec AIC-6915 (``Starfire'')
 device		sis		# Silicon Integrated Systems SiS 900/SiS 7016
 device		ste		# Sundance ST201 (D-Link DFE-550TX)
 device		tl		# Texas Instruments ThunderLAN
 device		tx		# SMC EtherPower II (83c170 ``EPIC'')
 device		vr		# VIA Rhine, Rhine II
 device		wb		# Winbond W89C840F
 device		xl		# 3Com 3c90x (``Boomerang'', ``Cyclone'')
 
 # PCI Ethernet NICs.
 device		de		# DEC/Intel DC21x4x (``Tulip'')
 device		vx		# 3Com 3c590, 3c595 (``Vortex'')
 
 # PCI Gigabit & FDDI NICs.
 device		lge
 device		nge
 device		sk
 device		ti
 device		wx
 device		fpa	1
 
 #
 # ATM related options (Cranor version)
 # (note: this driver cannot be used with the HARP ATM stack)
 #
 # The `en' device provides support for Efficient Networks (ENI)
 # ENI-155 PCI midway cards, and the Adaptec 155Mbps PCI ATM cards (ANA-59x0).
 #
 # atm device provides generic atm functions and is required for
 # atm devices.
 # NATM enables the netnatm protocol family that can be used to
 # bypass TCP/IP.
 #
 # the current driver supports only PVC operations (no atm-arp, no multicast).
 # for more details, please read the original documents at
 # http://www.ccrc.wustl.edu/pub/chuck/tech/bsdatm/bsdatm.html
 #
 device		atm
 device		en
 options 	NATM			#native ATM
 
 #
 # Audio drivers: `pcm', `sbc', `gusc', `pca'
 #
 # pcm: PCM audio through various sound cards.
 #
 # This has support for a large number of new audio cards, based on
 # CS423x, OPTi931, Yamaha OPL-SAx, and also for SB16, GusPnP.
 # For more information about this driver and supported cards,
 # see the pcm.4 man page.
 #
 # The flags of the device tells the device a bit more info about the
 # device that normally is obtained through the PnP interface.
 #	bit  2..0   secondary DMA channel;
 #	bit  4      set if the board uses two dma channels;
 #	bit 15..8   board type, overrides autodetection; leave it
 #		    zero if don't know what to put in (and you don't,
 #		    since this is unsupported at the moment...).
 #
 # This driver will use the new PnP code if it's available.
 #
 # pca: PCM audio through your PC speaker
 #
 # Supported cards include:
 # Creative SoundBlaster ISA PnP/non-PnP
 # Supports ESS and Avance ISA chips as well.
 # Gravis UltraSound ISA PnP/non-PnP
 # Crystal Semiconductor CS461x/428x PCI
 # Neomagic 256AV (ac97)
 # Most of the more common ISA/PnP sb/mss/ess compatable cards.
 
 device		pcm
 
 # For non-pnp sound cards with no bridge drivers only:
 hint.pcm.0.at="isa"
 hint.pcm.0.irq="10"
 hint.pcm.0.drq="1"
 hint.pcm.0.flags="0x0"
 
 # For PnP/PCI sound cards, no hints are required.
 
 #
 # midi: MIDI interfaces and synthesizers
 #
 
 device		midi
 
 # For non-pnp sound cards with no bridge drivers:
 hint.midi.0.at="isa"
 hint.midi.0.irq="5"
 hint.midi.0.flags="0x0"
 
 # For serial ports (this example configures port 2):
 # TODO: implement generic tty-midi interface so that we can use
 #	other uarts.
 hint.midi.0.at="isa"
 hint.midi.0.port="0x2F8"
 hint.midi.0.irq="3"
 
 #
 # seq: MIDI sequencer
 #
 
 device		seq
 
 # The bridge drivers for sound cards.  These can be separately configured
 # for providing services to the likes of new-midi.
 # When used with 'device pcm' they also provide pcm sound services.
 #
 # sbc:  Creative SoundBlaster ISA PnP/non-PnP
 #	Supports ESS and Avance ISA chips as well.
 # gusc: Gravis UltraSound ISA PnP/non-PnP
 # csa:  Crystal Semiconductor CS461x/428x PCI
 
 # For non-PnP cards:
 device		sbc
 hint.sbc.0.at="isa"
 hint.sbc.0.port="0x220"
 hint.sbc.0.irq="5"
 hint.sbc.0.drq="1"
 hint.sbc.0.flags="0x15"
 device		gusc
 hint.gusc.0.at="isa"
 hint.gusc.0.port="0x220"
 hint.gusc.0.irq="5"
 hint.gusc.0.drq="1"
 hint.gusc.0.flags="0x13"
 
 device		pca
 hint.pca.0.at="isa"
 hint.pca.0.port="0x040"
 
 #
 # Miscellaneous hardware:
 #
 # mcd: Mitsumi CD-ROM using proprietary (non-ATAPI) interface
 # scd: Sony CD-ROM using proprietary (non-ATAPI) interface
 # matcd: Matsushita/Panasonic CD-ROM using proprietary (non-ATAPI) interface
 # wt: Wangtek and Archive QIC-02/QIC-36 tape drives
 # ctx: Cortex-I frame grabber
 # apm: Laptop Advanced Power Management (experimental)
 # pmtimer: Timer device driver for power management events (APM or ACPI)
 # spigot: The Creative Labs Video Spigot video-acquisition board
 # meteor: Matrox Meteor video capture board
 # bktr: Brooktree bt848/848a/849a/878/879 video capture and TV Tuner board
 # cy: Cyclades serial driver
 # dgb: Digiboard PC/Xi and PC/Xe series driver (ALPHA QUALITY!)
 # digi: Digiboard driver
 # gp:  National Instruments AT-GPIB and AT-GPIB/TNT board, PCMCIA-GPIB
 # asc: GI1904-based hand scanners, e.g. the Trust Amiscan Grey
 # gsc: Genius GS-4500 hand scanner.
 # joy: joystick (including IO DATA PCJOY PC Card joystick)
 # The LOUTB option specifies a slower outb() for debugging purposes. 
 # rc: RISCom/8 multiport card
 # rp: Comtrol Rocketport(ISA) - single card
 # tw: TW-523 power line interface for use with X-10 home control products
 # si: Specialix SI/XIO 4-32 port terminal multiplexor
 # spic: Sony Programmable I/O controller (VAIO notebooks)
 # stl: Stallion EasyIO and EasyConnection 8/32 (cd1400 based)
 # stli: Stallion EasyConnection 8/64, ONboard, Brumby (intelligent)
 
 # Notes on APM
 #  The flags takes the following meaning for apm0:
 #    0x0020  Statclock is broken.
 #  If apm is omitted, some systems require sysctl -w kern.timecounter.method=1
 #  for correct timekeeping.
 
 # Notes on the spigot:
 #  The video spigot is at 0xad6.  This port address can not be changed.
 #  The irq values may only be 10, 11, or 15
 #  I/O memory is an 8kb region.  Possible values are:
 #    0a0000, 0a2000, ..., 0fffff, f00000, f02000, ..., ffffff
 #    The start address must be on an even boundary.
 #  Add the following option if you want to allow non-root users to be able
 #  to access the spigot.  This option is not secure because it allows users
 #  direct access to the I/O page.
 #  	options SPIGOT_UNSECURE
 
 # Notes on the Comtrol Rocketport driver:
 #
 # The exact values used for rp0 depend on how many boards you have
 # in the system.  The manufacturer's sample configs are listed as:
 #
 #               device  rp	# core driver support
 #
 #   Comtrol Rocketport ISA single card
 #		hints.rp.0.at="isa"
 #		hints.rp.0.port="0x280"
 #
 #   If instead you have two ISA cards, one installed at 0x100 and the
 #   second installed at 0x180, then you should add the following to
 #   your kernel probe hints:
 #		hints.rp.0.at="isa"
 #		hints.rp.0.port="0x100"
 #		hints.rp.1.at="isa"
 #		hints.rp.1.port="0x180"
 #
 #   For 4 ISA cards, it might be something like this:
 #		hints.rp.0.at="isa"
 #		hints.rp.0.port="0x180"
 #		hints.rp.1.at="isa"
 #		hints.rp.1.port="0x100"
 #		hints.rp.2.at="isa"
 #		hints.rp.2.port="0x340"
 #		hints.rp.3.at="isa"
 #		hints.rp.3.port="0x240"
 #
 #   And for PCI cards, you need no hints.
 
 # Notes on the Digiboard driver:
 #
 # The following flag values have special meanings in dgb:
 #	0x01 - alternate layout of pins
 #	0x02 - use the windowed PC/Xe in 64K mode
 
 # Notes on the Specialix SI/XIO driver:
 #  The host card is memory, not IO mapped.
 #  The Rev 1 host cards use a 64K chunk, on a 32K boundary.
 #  The Rev 2 host cards use a 32K chunk, on a 32K boundary.
 #  The cards can use an IRQ of 11, 12 or 15.
 
 # Notes on the Sony Programmable I/O controller
 #  This is a temporary driver that should someday be replaced by something
 #  that hooks into the ACPI layer. The device is hooked to the PIIX4's
 #  General Device 10 decoder, which means you have to fiddle with PCI
 #  registers to map it in, even though it is otherwise treated here as
 #  an ISA device. At the moment, the driver polls, although the device
 #  is capable of generating interrupts. It largely undocumented.
 #  The port location in the hint is where you WANT the device to be
 #  mapped. 0x10a0 seems to be traditional. At the moment the jogdial
 #  is the only thing truly supported, but aparently a fair percentage
 #  of the Vaio extra features are controlled by this device.
 
 # Notes on the Stallion stl and stli drivers:
 #  See src/i386/isa/README.stl for complete instructions.
 #  This is version 0.0.5alpha, unsupported by Stallion.
 #  The stl driver has a secondary IO port hard coded at 0x280.  You need
 #     to change src/i386/isa/stallion.c if you reconfigure this on the boards.
 #  The "flags" and "msize" settings on the stli driver depend on the board:
 #	EasyConnection 8/64 ISA:     flags 23         msize 0x1000
 #	EasyConnection 8/64 EISA:    flags 24         msize 0x10000
 #	EasyConnection 8/64 MCA:     flags 25         msize 0x1000
 #	ONboard ISA:                 flags 4          msize 0x10000
 #	ONboard EISA:                flags 7          msize 0x10000
 #	ONboard MCA:                 flags 3          msize 0x10000
 #	Brumby:                      flags 2          msize 0x4000
 #	Stallion:                    flags 1          msize 0x10000
 
 device		mcd	1
 hint.mcd.0.at="isa"
 hint.mcd.0.port="0x300"
 hint.mcd.0.irq="10"
 # for the Sony CDU31/33A CDROM
 device		scd	1
 hint.scd.0.at="isa"
 hint.scd.0.port="0x230"
 # for the SoundBlaster 16 multicd - up to 4 devices
 device		matcd	1
 hint.matcd.0.at="isa"
 hint.matcd.0.port="0x230"
 device		wt	1
 hint.wt.0.at="isa"
 hint.wt.0.port="0x300"
 hint.wt.0.irq="5"
 hint.wt.0.drq="1"
 device		ctx	1
 hint.ctx.0.at="isa"
 hint.ctx.0.port="0x230"
 hint.ctx.0.maddr="0xd0000"
 device		spigot	1
 hint.spigot.0.at="isa"
 hint.spigot.0.port="0xad6"
 hint.spigot.0.irq="15"
 hint.spigot.0.maddr="0xee000"
 device		apm
 hint.apm.0.flags="0x20"
 device		pmtimer			# Adjust system timer at wakeup time
 hint.pmtimer.0.at="isa"
 device		gp
 hint.gp.0.at="isa"
 hint.gp.0.port="0x2c0"
 device		gsc	1
 hint.gsc.0.at="isa"
 hint.gsc.0.port="0x270"
 hint.gsc.0.drq="3"
 device		joy			# PnP aware, hints for nonpnp only
 hint.joy.0.at="isa"
 hint.joy.0.port="0x201"
 device		cy	1
 options 	CY_PCI_FASTINTR		# Use with cy_pci unless irq is shared
 hint.cy.0.at="isa"
 hint.cy.0.irq="10"
 hint.cy.0.maddr="0xd4000"
 hint.cy.0.msize="0x2000"
 device		dgb	1
 options 	NDGBPORTS=16		# Defaults to 16*NDGB
 hint.dgb.0.at="isa"
 hint.dgb.0.port="0x220"
 hint.dgb.0.maddr="0xfc000"
 device		digi
 hint.digi.0.at="isa"
 hint.digi.0.port="0x104"
 hint.digi.0.maddr="0xd0000"
 # BIOS & FEP/OS components of device digi.  Normally left as modules
 device		digi_CX
 device		digi_CX_PCI
 device		digi_EPCX
 device		digi_EPCX_PCI
 device		digi_Xe
 device		digi_Xem
 device		digi_Xr
 device		rc	1
 hint.rc.0.at="isa"
 hint.rc.0.port="0x220"
 hint.rc.0.irq="12"
 device		rp
 hint.rp.0.at="isa"
 hint.rp.0.port="0x280"
 # the port and irq for tw0 are fictitious
 device		tw	1
 hint.tw.0.at="isa"
 hint.tw.0.port="0x380"
 hint.tw.0.irq="11"
 device		si
 options 	SI_DEBUG
 hint.si.0.at="isa"
 hint.si.0.maddr="0xd0000"
 hint.si.0.irq="12"
 device		asc	1
 hint.asc.0.at="isa"
 hint.asc.0.port="0x3EB"
 hint.asc.0.drq="3"
 hint.asc.0.irq="10"
 device		spic
 hint.spic.0.at="isa"
 hint.spic.0.port="0x10a0"
 device		stl
 hint.stl.0.at="isa"
 hint.stl.0.port="0x2a0"
 hint.stl.0.irq="10"
 device		stli
 hint.stli.0.at="isa"
 hint.stli.0.port="0x2a0"
 hint.stli.0.maddr="0xcc000"
 hint.stli.0.flags="23"
 hint.stli.0.msize="0x1000"
 # You are unlikely to have the hardware for loran <phk@FreeBSD.org>
 device		loran
 hint.loran.0.at="isa"
 hint.loran.0.irq="5"
 # HOT1 Xilinx 6200 card (http://www.vcc.com/)
 device		xrpu
 
 #
 # The `meteor' device is a PCI video capture board. It can also have the
 # following options:
 #   options METEOR_ALLOC_PAGES=xxx	preallocate kernel pages for data entry
 #	figure (ROWS*COLUMN*BYTES_PER_PIXEL*FRAME+PAGE_SIZE-1)/PAGE_SIZE
 #   options METEOR_DEALLOC_PAGES	remove all allocated pages on close(2)
 #   options METEOR_DEALLOC_ABOVE=xxx	remove all allocated pages above the
 #	specified amount. If this value is below the allocated amount no action
 #	taken
 #   options METEOR_SYSTEM_DEFAULT={METEOR_PAL|METEOR_NTSC|METEOR_SECAM}, used
 #	for initialization of fps routine when a signal is not present.
 #
 # The 'bktr' device is a PCI video capture device using the Brooktree
 # bt848/bt848a/bt849a/bt878/bt879 chipset. When used with a TV Tuner it forms a
 # TV card, eg Miro PC/TV, Hauppauge WinCast/TV WinTV, VideoLogic Captivator,
 # Intel Smart Video III, AverMedia, IMS Turbo, FlyVideo.
 #
 # options 	OVERRIDE_CARD=xxx
 # options 	OVERRIDE_TUNER=xxx
 # options 	OVERRIDE_MSP=1
 # options 	OVERRIDE_DBX=1
 # These options can be used to override the auto detection
 # The current values for xxx are found in src/sys/dev/bktr/bktr_card.h
 # Using sysctl(8) run-time overrides on a per-card basis can be made
 #
 # options 	BROOKTREE_SYSTEM_DEFAULT=BROOKTREE_PAL
 # or
 # options 	BROOKTREE_SYSTEM_DEFAULT=BROOKTREE_NTSC
 # Specifes the default video capture mode.
 # This is required for Dual Crystal (28&35Mhz) boards where PAL is used
 # to prevent hangs during initialisation.  eg VideoLogic Captivator PCI.
 #
 # options 	BKTR_USE_PLL
 # PAL or SECAM users who have a 28Mhz crystal (and no 35Mhz crystal)
 # must enable PLL mode with this option. eg some new Bt878 cards.
 #
 # options 	BKTR_GPIO_ACCESS
 # This enable IOCTLs which give user level access to the GPIO port.
 #
 # options 	BKTR_NO_MSP_RESET
 # Prevents the MSP34xx reset. Good if you initialise the MSP in another OS first
 #
 # options 	BKTR_430_FX_MODE
 # Switch Bt878/879 cards into Intel 430FX chipset compatibility mode.
 #
 # options 	BKTR_SIS_VIA_MODE
 # Switch Bt878/879 cards into SIS/VIA chipset compatibility mode which is
 # needed for some old SiS and VIA chipset motherboards.
 # This also allows Bt878/879 chips to work on old OPTi (<1997) chipset
 # motherboards and motherboards with bad or incomplete PCI 2.1 support.
 # As a rough guess, old = before 1998
 #
 
 device		meteor	1
 
 # Brooktree driver has been ported to the new I2C framework. Thus,
 # you'll need to have the following 3 lines in the kernel config.
 #     device smbus
 #     device iicbus
 #     device iicbb
 # The iic and smb devices are only needed if you want to control other
 # I2C slaves connected to the external connector of some cards.
 #
 device		bktr	1
 
 #
 # PC Card/PCMCIA
 # (OLDCARD)
 #
 # card: pccard slots
 # pcic: isa/pccard bridge
 device		pcic
 hint.pcic.0.at="isa"
 hint.pcic.1.at="isa"
 device		card
 
 #
 # PC Card/PCMCIA and Cardbus
 # (NEWCARD)
 #
 # Note that NEWCARD and OLDCARD are incompatible.  Do not use both at the same
 # time.
 #
 # pccbb: isa/pccard and pci/cardbus bridge
 # pccard: pccard slots
 # cardbus: cardbus slots
 #device		pccbb
 #device		pccard
 #device		cardbus
 
 # You may need to reset all pccards after resuming
 options 	PCIC_RESUME_RESET	# reset after resume
 
 #
 # Laptop/Notebook options:
 #
 # See also:
 #  apm under `Miscellaneous hardware'
 # above.
 
 # For older notebooks that signal a powerfail condition (external
 # power supply dropped, or battery state low) by issuing an NMI:
 
 options 	POWERFAIL_NMI	# make it beep instead of panicing
 
 #
 # SMB bus
 #
 # System Management Bus support is provided by the 'smbus' device.
 # Access to the SMBus device is via the 'smb' device (/dev/smb*),
 # which is a child of the 'smbus' device.
 #
 # Supported devices:
 # smb		standard io through /dev/smb*
 #
 # Supported SMB interfaces:
 # iicsmb	I2C to SMB bridge with any iicbus interface
 # bktr		brooktree848 I2C hardware interface
 # intpm		Intel PIIX4 Power Management Unit
 # alpm		Acer Aladdin-IV/V/Pro2 Power Management Unit
 # ichsmb	Intel ICH SMBus controller chips (82801AA, 82801AB, 82801BA)
 #
 device		smbus		# Bus support, required for smb below.
 
 device		intpm
 device		alpm
 device		ichsmb
 
 device		smb
 
 #
 # I2C Bus
 #
 # Philips i2c bus support is provided by the `iicbus' device.
 #
 # Supported devices:
 # ic	i2c network interface
 # iic	i2c standard io
 # iicsmb i2c to smb bridge. Allow i2c i/o with smb commands.
 #
 # Supported interfaces:
 # pcf	Philips PCF8584 ISA-bus controller
 # bktr	brooktree848 I2C software interface
 #
 # Other:
 # iicbb	generic I2C bit-banging code (needed by lpbb, bktr)
 #
 device		iicbus		# Bus support, required for ic/iic/iicsmb below.
 device		iicbb
 
 device		ic
 device		iic
 device		iicsmb		# smb over i2c bridge
 
 device		pcf
 hint.pcf.0.at="isa"
 hint.pcf.0.port="0x320"
 hint.pcf.0.irq="5"
 
 #---------------------------------------------------------------------------
 # ISDN4BSD
 #
 # See /usr/share/examples/isdn/ROADMAP for an introduction to isdn4bsd.
 #
 # i4b passive ISDN cards support contains the following hardware drivers:
 #
 #	isic  - Siemens/Infineon ISDN ISAC/HSCX/IPAC chipset driver
 #	iwic  - Winbond W6692 PCI bus ISDN S/T interface controller
 #	ifpi  - AVM Fritz!Card PCI driver
 #	ihfc  - Cologne Chip HFC ISA/ISA-PnP chipset driver
 #	ifpnp - AVM Fritz!Card PnP driver 
 #	itjc  - Siemens ISAC / TJNet Tiger300/320 chipset
 #
 # i4b active ISDN cards support contains the following hardware drivers:
 #
 #	iavc  - AVM B1 PCI, AVM B1 ISA, AVM T1
 #
 # Note that the ``options'' (if given) and ``device'' lines must BOTH
 # be uncommented to enable support for a given card !
 #
 # In addition to a hardware driver (and probably an option) the mandatory
 # ISDN protocol stack devices and the mandatory support device must be 
 # enabled as well as one or more devices from the optional devices section.
 #
 #---------------------------------------------------------------------------
 #	isic driver (Siemens/Infineon chipsets)
 #
 device	isic
 #
 # ISA bus non-PnP Cards:
 # ----------------------
 #
 # Teles S0/8 or Niccy 1008
 options 	TEL_S0_8
 hint.isic.0.at="isa"
 hint.isic.0.maddr="0xd0000"
 hint.isic.0.irq="5"
 hint.isic.0.flags="1"
 #
 # Teles S0/16 or Creatix ISDN-S0 or Niccy 1016
 options 	TEL_S0_16
 hint.isic.0.at="isa"
 hint.isic.0.port="0xd80"
 hint.isic.0.maddr="0xd0000"
 hint.isic.0.irq="5"
 hint.isic.0.flags="2"
 #
 # Teles S0/16.3
 options 	TEL_S0_16_3
 hint.isic.0.at="isa"
 hint.isic.0.port="0xd80"
 hint.isic.0.irq="5"
 hint.isic.0.flags="3"
 #
 # AVM A1 or AVM Fritz!Card
 options 	AVM_A1
 hint.isic.0.at="isa"
 hint.isic.0.port="0x340"
 hint.isic.0.irq="5"
 hint.isic.0.flags="4"
 #
 # USRobotics Sportster ISDN TA intern
 options 	USR_STI
 hint.isic.0.at="isa"
 hint.isic.0.port="0x268"
 hint.isic.0.irq="5"
 hint.isic.0.flags="7"
 #
 # ITK ix1 Micro ( < V.3, non-PnP version )
 options 	ITKIX1
 hint.isic.0.at="isa"
 hint.isic.0.port="0x398"
 hint.isic.0.irq="10"
 hint.isic.0.flags="18"
 #
 # ELSA PCC-16
 options 	ELSA_PCC16
 hint.isic.0.at="isa"
 hint.isic.0.port="0x360"
 hint.isic.0.irq="10"
 hint.isic.0.flags="20"
 #
 # ISA bus PnP Cards:
 # ------------------
 #
 # Teles S0/16.3 PnP
 options 	TEL_S0_16_3_P
 #
 # Creatix ISDN-S0 P&P
 options 	CRTX_S0_P
 #
 # Dr. Neuhaus Niccy Go@
 options 	DRN_NGO
 #
 # Sedlbauer Win Speed
 options 	SEDLBAUER
 #
 # Dynalink IS64PH
 options 	DYNALINK 
 #
 # ELSA QuickStep 1000pro ISA
 options 	ELSA_QS1ISA
 #
 # Siemens I-Surf 2.0
 options 	SIEMENS_ISURF2
 #
 # Asuscom ISDNlink 128K ISA
 options 	ASUSCOM_IPAC
 #
 # Eicon Diehl DIVA 2.0 and 2.02
 options       EICON_DIVA
 #
 # PCI bus Cards:
 # --------------
 #
 # ELSA MicroLink ISDN/PCI (same as ELSA QuickStep 1000pro PCI)
 options 	ELSA_QS1PCI
 #
 #
 #---------------------------------------------------------------------------
 #	ifpnp driver for AVM Fritz!Card PnP
 #
 # AVM Fritz!Card PnP
 device ifpnp
 #
 #---------------------------------------------------------------------------
 #	ihfc driver for Cologne Chip ISA chipsets (experimental!)
 #
 # Teles 16.3c ISA PnP
 # AcerISDN P10 ISA PnP
 # TELEINT ISDN SPEED No.1
 device ihfc
 #
 #---------------------------------------------------------------------------
 #	ifpi driver for AVM Fritz!Card PCI
 #
 # AVM Fritz!Card PCI
 device  ifpi
 #
 #---------------------------------------------------------------------------
 #	iwic driver for Winbond W6692 chipset
 #
 # ASUSCOM P-IN100-ST-D (and other Winbond W6692 based cards)
 device  iwic
 #
 #---------------------------------------------------------------------------
 #	itjc driver for Simens ISAC / TJNet Tiger300/320 chipset
 #
 # Traverse Technologies NETjet-S
 # Teles PCI-TJ
 device  itjc
 #
 #---------------------------------------------------------------------------
 #	iavc driver (AVM active cards, needs i4bcapi driver!)
 #
 device	iavc
 #
 # AVM B1 ISA bus (PnP mode not supported!)
 # ----------------------------------------
 hint.iavc.0.at="isa"
 hint.iavc.0.port="0x150"
 hint.iavc.0.irq="5"
 #
 #---------------------------------------------------------------------------
 #	ISDN Protocol Stack - mandatory for all hardware drivers
 #
 # Q.921 / layer 2 - i4b passive cards D channel handling
 device		"i4bq921"
 #
 # Q.931 / layer 3 - i4b passive cards D channel handling
 device		"i4bq931"
 #
 # layer 4 - i4b common passive and active card handling
 device		"i4b"
 #
 #---------------------------------------------------------------------------
 #	ISDN devices - mandatory for all hardware drivers
 #
 # userland driver to do ISDN tracing (for passive cards only)
 device		"i4btrc"	4
 #
 # userland driver to control the whole thing
 device		"i4bctl"
 #
 #---------------------------------------------------------------------------
 #	ISDN devices - optional
 #
 # userland driver for access to raw B channel
 device		"i4brbch"	4
 #
 # userland driver for telephony
 device		"i4btel"	2
 #
 # network driver for IP over raw HDLC ISDN
 device		"i4bipr"	4
 # enable VJ header compression detection for ipr i/f
 options 	IPR_VJ
 # enable logging of the first n IP packets to isdnd (n=32 here)
 options 	IPR_LOG=32
 #
 # network driver for sync PPP over ISDN; requires an equivalent
 # number of sppp device to be configured
 device		"i4bisppp"	4
 #
 # B-channel interface to the netgraph subsystem
 device		"i4bing"	2
 #
 # CAPI driver needed for active ISDN cards (see iavc driver above)
 device		"i4bcapi"
 #
 #---------------------------------------------------------------------------
 
 # Parallel-Port Bus
 #
 # Parallel port bus support is provided by the `ppbus' device.
 # Multiple devices may be attached to the parallel port, devices
 # are automatically probed and attached when found.
 #
 # Supported devices:
 # vpo	Iomega Zip Drive
 #	Requires SCSI disk support ('scbus' and 'da'), best
 #	performance is achieved with ports in EPP 1.9 mode.
 # lpt	Parallel Printer
 # plip	Parallel network interface
 # ppi	General-purpose I/O ("Geek Port") + IEEE1284 I/O
 # pps	Pulse per second Timing Interface
 # lpbb	Philips official parallel port I2C bit-banging interface
 #
 # Supported interfaces:
 # ppc	ISA-bus parallel port interfaces.
 #
 
 options		PPC_PROBE_CHIPSET # Enable chipset specific detection
 				  # (see flags in ppc(4))
 options 	DEBUG_1284	# IEEE1284 signaling protocol debug
 options 	PERIPH_1284	# Makes your computer act as a IEEE1284
 				# compliant peripheral
 options 	DONTPROBE_1284	# Avoid boot detection of PnP parallel devices
 options 	VP0_DEBUG	# ZIP/ZIP+ debug
 options 	LPT_DEBUG	# Printer driver debug
 options 	PPC_DEBUG	# Parallel chipset level debug
 options 	PLIP_DEBUG	# Parallel network IP interface debug
 options		PCFCLOCK_VERBOSE         # Verbose pcfclock driver
 options		PCFCLOCK_MAX_RETRIES=5   # Maximum read tries (default 10)
 
 device		ppc
 hint.ppc.0.at="isa"
 hint.ppc.0.irq="7"
 device		ppbus
 device		vpo
 device		lpt
 device		plip
 device		ppi
 device		pps
 device		lpbb
 device		pcfclock
 
 # Kernel BOOTP support
 
 options 	BOOTP		# Use BOOTP to obtain IP address/hostname
 options 	BOOTP_NFSROOT	# NFS mount root filesystem using BOOTP info
 options 	BOOTP_NFSV3	# Use NFS v3 to NFS mount root
 options 	BOOTP_COMPAT	# Workaround for broken bootp daemons.
 options 	BOOTP_WIRED_TO=fxp0 # Use interface fxp0 for BOOTP
 
 #
 # Add tie-ins for a hardware watchdog.  This only enable the hooks;
 # the user must still supply the actual driver.
 #
 options 	HW_WDOG
 
 #
 # Set the number of PV entries per process.  Increasing this can
 # stop panics related to heavy use of shared memory. However, that can
 # (combined with large amounts of physical memory) cause panics at
 # boot time due the kernel running out of VM space.
 #
 # If you're tweaking this, you might also want to increase the sysctls
 # "vm.v_free_min", "vm.v_free_reserved", and "vm.v_free_target".
 #
 # The value below is the one more than the default.
 #
 options 	PMAP_SHPGPERPROC=201
 
 #
 # Disable swapping. This option removes all code which actually performs
 # swapping, so it's not possible to turn it back on at run-time.
 #
 # This is sometimes usable for systems which don't have any swap space
 # (see also sysctls "vm.defer_swapspace_pageouts" and
 # "vm.disable_swapspace_pageouts")
 #
 #options 	NO_SWAPPING
 
 # Set the number of sf_bufs to allocate. sf_bufs are virtual buffers
 # for sendfile(2) that are used to map file VM pages, and normally
 # default to a quantity that is roughly 16*MAXUSERS+512. You would
 # typically want about 4 of these for each simultaneous file send.
 #
 options 	NSFBUFS=1024
 
 #
 # Enable extra debugging code for locks.  This stores the filename and
 # line of whatever acquired the lock in the lock itself, and change a
 # number of function calls to pass around the relevant data.  This is
 # not at all useful unless you are debugging lock code.  Also note
 # that it is likely to break e.g. fstat(1) unless you recompile your
 # userland with -DDEBUG_LOCKS as well.
 #
 options 	DEBUG_LOCKS
 
 
 #####################################################################
 # ABI Emulation
 
 # Enable iBCS2 runtime support for SCO and ISC binaries
 options 	IBCS2
 
 # Emulate spx device for client side of SVR3 local X interface
 options 	SPX_HACK
 
 # Enable Linux ABI emulation
 options 	COMPAT_LINUX
 
 # Enable the linux-like proc filesystem support (requires COMPAT_LINUX
 # and PSEUDOFS)
 options 	LINPROCFS
 
 # Linux debugging
 options 	DEBUG_LINUX
 
 #
 # SysVR4 ABI emulation
 #
 # The svr4 ABI emulator can be statically compiled into the kernel or loaded as
 # a KLD module.  
 # The STREAMS network emulation code can also be compiled statically or as a 
 # module.  If loaded as a module, it must be loaded before the svr4 module
 # (the /usr/sbin/svr4 script does this for you).  If compiling statically,
 # the `streams' device must be configured into any kernel which also
 # specifies COMPAT_SVR4.  It is possible to have a statically-configured 
 # STREAMS device and a dynamically loadable svr4 emulator;  the /usr/sbin/svr4
 # script understands that it doesn't need to load the `streams' module under
 # those circumstances.
 # Caveat:  At this time, `options KTRACE' is required for the svr4 emulator
 # (whether static or dynamic).  
 # 
 options		COMPAT_SVR4	# build emulator statically
 options		DEBUG_SVR4	# enable verbose debugging
 device		streams		# STREAMS network driver (required for svr4).
 
 
 #####################################################################
 # USB support
 # UHCI controller
 device		uhci
 # OHCI controller
 device		ohci
 # General USB code (mandatory for USB)
 device		usb
 #
 # USB Double Bulk Pipe devices
 device		udbp
 # Generic USB device driver
 device		ugen
 # Human Interface Device (anything with buttons and dials)
 device		uhid
 # USB keyboard
 device		ukbd
 # USB printer
 device		ulpt
 # USB Iomega Zip 100 Drive (Requires scbus and da)
 device		umass
 # USB modem support
 device		umodem
 # USB mouse
 device		ums
 # Diamond Rio 500 Mp3 player
 device		urio
 # USB scanners
 device		uscanner
 #
 # ADMtek USB ethernet. Supports the LinkSys USB100TX,
 # the Billionton USB100, the Melco LU-ATX, the D-Link DSB-650TX
 # and the SMC 2202USB. Also works with the ADMtek AN986 Pegasus
 # eval board.
 device		aue
 #
 # CATC USB-EL1201A USB ethernet. Supports the CATC Netmate
 # and Netmate II, and the Belkin F5U111.
 device		cue
 #
 # Kawasaki LSI ethernet. Supports the LinkSys USB10T,
 # Entrega USB-NET-E45, Peracom Ethernet Adapter, the
 # 3Com 3c19250, the ADS Technologies USB-10BT, the ATen UC10T,
 # the Netgear EA101, the D-Link DSB-650, the SMC 2102USB
 # and 2104USB, and the Corega USB-T.
 device		kue
 
 # debugging options for the USB subsystem
 #
 options 	UHCI_DEBUG
 options 	OHCI_DEBUG
 options 	USB_DEBUG
 
 options 	UGEN_DEBUG
 options 	UHID_DEBUG
 options 	UHUB_DEBUG
 options 	UKBD_DEBUG
 options 	ULPT_DEBUG
 options 	UMASS_DEBUG
 options 	UMS_DEBUG
 options 	URIO_DEBUG
 
 # options for ukbd:
 options 	UKBD_DFLT_KEYMAP	# specify the built-in keymap
 makeoptions	UKBD_DFLT_KEYMAP=it.iso
 
 #
 # Embedded system options:
 #
 # An embedded system might want to run something other than init.
 options 	INIT_PATH="/sbin/init:/stand/sysinstall"
 
 # Debug options
 options 	BUS_DEBUG	# enable newbus debugging
 options 	DEBUG_VFS_LOCKS	# enable vfs lock debugging
 options 	NPX_DEBUG	# enable npx debugging (FPU/math emu)
 
 #####################################################################
 # SYSV IPC KERNEL PARAMETERS
 #
 # Maximum number of entries in a semaphore map.
 options 	SEMMAP=31
 
 # Maximum number of System V semaphores that can be used on the system at
 # one time. 
 options 	SEMMNI=11
 
 # Total number of semaphores system wide
 options 	SEMMNS=61
 
 # Total number of undo structures in system
 options 	SEMMNU=31
 
 # Maximum number of System V semaphores that can be used by a single process
 # at one time. 
 options 	SEMMSL=61
 
 # Maximum number of operations that can be outstanding on a single System V
 # semaphore at one time. 
 options 	SEMOPM=101
 
 # Maximum number of undo operations that can be outstanding on a single
 # System V semaphore at one time. 
 options 	SEMUME=11
 
 # Maximum number of shared memory pages system wide.
 options 	SHMALL=1025
 
 # Maximum size, in bytes, of a single System V shared memory region. 
 options 	SHMMAX="(SHMMAXPGS*PAGE_SIZE+1)"
 options 	SHMMAXPGS=1025
 
 # Minimum size, in bytes, of a single System V shared memory region. 
 options 	SHMMIN=2
 
 # Maximum number of shared memory regions that can be used on the system
 # at one time. 
 options 	SHMMNI=33
 
 # Maximum number of System V shared memory regions that can be attached to
 # a single process at one time. 
 options 	SHMSEG=9
 
 # Set the amount of time (in seconds) the system will wait before
 # rebooting automatically when a kernel panic occurs.  If set to (-1),
 # the system will wait indefinitely until a key is pressed on the
 # console.
 options 	PANIC_REBOOT_WAIT_TIME=16
 
 #####################################################################
 
 # More undocumented options for linting.
 # Note that documenting these are not considered an affront.
 
 options 	CAM_DEBUG_DELAY
 
 # VFS cluster debugging.
 options 	CLUSTERDEBUG
 
 # Eliminate unneeded cache flush instruction(s).
 options 	CPU_UPGRADE_HW_CACHE
 
 options 	DEBUG
 
 # PECOFF module (Win32 Execution Format)
 options		PECOFF_SUPPORT
 options		PECOFF_DEBUG
 
 # Disable the 4 MByte PSE CPU feature.
 #options 	DISABLE_PSE
 
 options 	ENABLE_ALART
 options 	I4B_SMP_WORKAROUND
 options 	I586_PMC_GUPROF=0x70000
 options 	KBDIO_DEBUG=2
 options 	KBD_MAXRETRY=4
 options 	KBD_MAXWAIT=6
 options 	KBD_RESETDELAY=201
 
 # Enable the PF_KEY Key Management API.
 options 	KEY
 
 # Kernel filelock debugging.
 options 	LOCKF_DEBUG
 
 # System V compatible message queues
 # Please note that the values provided here are used to test kernel
 # building.  The defaults in the sources provide almost the same numbers.
 # MSGSSZ must be a power of 2 between 8 and 1024.
 options 	MSGMNB=2049	# Max number of chars in queue
 options 	MSGMNI=41	# Max number of message queue identifiers
 options 	MSGSEG=2049	# Max number of message segments
 options 	MSGSSZ=16	# Size of a message segment
 options 	MSGTQL=41	# Max number of messages in system
 
 options 	NBUF=512	# Number of buffer headers
 
 options 	NMBCLUSTERS=1024	# Number of mbuf clusters
 
 options 	PSM_DEBUG=1
 
 options 	SCSI_NCR_DEBUG
 options 	SCSI_NCR_MAX_SYNC=10000
 options 	SCSI_NCR_MAX_WIDE=1
 options 	SCSI_NCR_MYADDR=7
 
 options 	SC_DEBUG_LEVEL=5	# Syscons debug level
 options 	SC_RENDER_DEBUG	# syscons rendering debugging
 
 options 	SHOW_BUSYBUFS	# List buffers that prevent root unmount
 options 	SIMPLELOCK_DEBUG
 options 	SLIP_IFF_OPTS
 options 	TIMER_FREQ="((14318182+6)/12)"
 options 	VFS_BIO_DEBUG	# VFS buffer I/O debugging
 
 options 	VM_KMEM_SIZE
 options 	VM_KMEM_SIZE_MAX
 options 	VM_KMEM_SIZE_SCALE
Index: head/sys/i386/i386/genassym.c
===================================================================
--- head/sys/i386/i386/genassym.c	(revision 82308)
+++ head/sys/i386/i386/genassym.c	(revision 82309)
@@ -1,206 +1,208 @@
 /*-
  * Copyright (c) 1982, 1990 The Regents of the University of California.
  * All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * William Jolitz.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	from: @(#)genassym.c	5.11 (Berkeley) 5/10/91
  * $FreeBSD$
  */
 
+#include "opt_upages.h"
+
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/assym.h>
 #include <sys/bio.h>
 #include <sys/buf.h>
 #include <sys/proc.h>
 #include <sys/errno.h>
 #include <sys/mount.h>
 #include <sys/mutex.h>
 #include <sys/socket.h>
 #include <sys/resourcevar.h>
 /* XXX */
 #ifdef KTR_PERCPU
 #include <sys/ktr.h>
 #endif
 #include <machine/bootinfo.h>
 #include <machine/tss.h>
 #include <sys/vmmeter.h>
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/pmap.h>
 #include <vm/vm_map.h>
 #include <sys/user.h>
 #include <net/if.h>
 #include <netinet/in.h>
 #include <nfs/nfsv2.h>
 #include <nfs/rpcv2.h>
 #include <nfs/nfs.h>
 #include <nfs/nfsdiskless.h>
 #ifdef SMP
 #include <machine/apic.h>
 #endif
 #include <machine/cpu.h>
 #include <machine/sigframe.h>
 #include <machine/globaldata.h>
 #include <machine/vm86.h>
 
 ASSYM(P_VMSPACE, offsetof(struct proc, p_vmspace));
 ASSYM(VM_PMAP, offsetof(struct vmspace, vm_pmap));
 ASSYM(PM_ACTIVE, offsetof(struct pmap, pm_active));
 ASSYM(P_ADDR, offsetof(struct proc, p_addr));
 ASSYM(P_INTR_NESTING_LEVEL, offsetof(struct proc, p_intr_nesting_level));
 ASSYM(P_SFLAG, offsetof(struct proc, p_sflag));
 ASSYM(P_STAT, offsetof(struct proc, p_stat));
 ASSYM(P_WCHAN, offsetof(struct proc, p_wchan));
 
 ASSYM(PS_ASTPENDING, PS_ASTPENDING);
 ASSYM(PS_NEEDRESCHED, PS_NEEDRESCHED);
 
 ASSYM(SSLEEP, SSLEEP);
 ASSYM(SRUN, SRUN);
 ASSYM(V_TRAP, offsetof(struct vmmeter, v_trap));
 ASSYM(V_SYSCALL, offsetof(struct vmmeter, v_syscall));
 ASSYM(V_INTR, offsetof(struct vmmeter, v_intr));
 ASSYM(UPAGES, UPAGES);
 ASSYM(PAGE_SIZE, PAGE_SIZE);
 ASSYM(NPTEPG, NPTEPG);
 ASSYM(NPDEPG, NPDEPG);
 ASSYM(PDESIZE, PDESIZE);
 ASSYM(PTESIZE, PTESIZE);
 ASSYM(PAGE_SHIFT, PAGE_SHIFT);
 ASSYM(PAGE_MASK, PAGE_MASK);
 ASSYM(PDRSHIFT, PDRSHIFT);
 ASSYM(USRSTACK, USRSTACK);
 ASSYM(VM_MAXUSER_ADDRESS, VM_MAXUSER_ADDRESS);
 ASSYM(KERNBASE, KERNBASE);
 ASSYM(MCLBYTES, MCLBYTES);
 ASSYM(PCB_CR3, offsetof(struct pcb, pcb_cr3));
 ASSYM(PCB_EDI, offsetof(struct pcb, pcb_edi));
 ASSYM(PCB_ESI, offsetof(struct pcb, pcb_esi));
 ASSYM(PCB_EBP, offsetof(struct pcb, pcb_ebp));
 ASSYM(PCB_ESP, offsetof(struct pcb, pcb_esp));
 ASSYM(PCB_EBX, offsetof(struct pcb, pcb_ebx));
 ASSYM(PCB_EIP, offsetof(struct pcb, pcb_eip));
 ASSYM(TSS_ESP0, offsetof(struct i386tss, tss_esp0));
 
 ASSYM(PCB_USERLDT, offsetof(struct pcb, pcb_ldt));
 ASSYM(PCB_GS, offsetof(struct pcb, pcb_gs));
 ASSYM(PCB_DR0, offsetof(struct pcb, pcb_dr0));
 ASSYM(PCB_DR1, offsetof(struct pcb, pcb_dr1));
 ASSYM(PCB_DR2, offsetof(struct pcb, pcb_dr2));
 ASSYM(PCB_DR3, offsetof(struct pcb, pcb_dr3));
 ASSYM(PCB_DR6, offsetof(struct pcb, pcb_dr6));
 ASSYM(PCB_DR7, offsetof(struct pcb, pcb_dr7));
 ASSYM(PCB_DBREGS, PCB_DBREGS);
 ASSYM(PCB_EXT, offsetof(struct pcb, pcb_ext));
 
 ASSYM(PCB_SPARE, offsetof(struct pcb, __pcb_spare));
 ASSYM(PCB_FLAGS, offsetof(struct pcb, pcb_flags));
 ASSYM(PCB_SAVEFPU, offsetof(struct pcb, pcb_save));
 ASSYM(PCB_SAVEFPU_SIZE, sizeof(union savefpu));
 ASSYM(PCB_SAVE87_SIZE, sizeof(struct save87));
 ASSYM(PCB_ONFAULT, offsetof(struct pcb, pcb_onfault));
 
 #ifdef SMP
 ASSYM(PCB_SIZE, sizeof(struct pcb));
 #endif
 
 ASSYM(TF_TRAPNO, offsetof(struct trapframe, tf_trapno));
 ASSYM(TF_ERR, offsetof(struct trapframe, tf_err));
 ASSYM(TF_CS, offsetof(struct trapframe, tf_cs));
 ASSYM(TF_EFLAGS, offsetof(struct trapframe, tf_eflags));
 ASSYM(SIGF_HANDLER, offsetof(struct sigframe, sf_ahu.sf_handler));
 ASSYM(SIGF_SC, offsetof(struct osigframe, sf_siginfo.si_sc));
 ASSYM(SIGF_UC, offsetof(struct sigframe, sf_uc));
 ASSYM(SC_PS, offsetof(struct osigcontext, sc_ps));
 ASSYM(SC_FS, offsetof(struct osigcontext, sc_fs));
 ASSYM(SC_GS, offsetof(struct osigcontext, sc_gs));
 ASSYM(SC_TRAPNO, offsetof(struct osigcontext, sc_trapno));
 ASSYM(UC_EFLAGS, offsetof(ucontext_t, uc_mcontext.mc_eflags));
 ASSYM(UC_GS, offsetof(ucontext_t, uc_mcontext.mc_gs));
 ASSYM(ENOENT, ENOENT);
 ASSYM(EFAULT, EFAULT);
 ASSYM(ENAMETOOLONG, ENAMETOOLONG);
 ASSYM(MAXPATHLEN, MAXPATHLEN);
 ASSYM(BOOTINFO_SIZE, sizeof(struct bootinfo));
 ASSYM(BI_VERSION, offsetof(struct bootinfo, bi_version));
 ASSYM(BI_KERNELNAME, offsetof(struct bootinfo, bi_kernelname));
 ASSYM(BI_NFS_DISKLESS, offsetof(struct bootinfo, bi_nfs_diskless));
 ASSYM(BI_ENDCOMMON, offsetof(struct bootinfo, bi_endcommon));
 ASSYM(NFSDISKLESS_SIZE, sizeof(struct nfs_diskless));
 ASSYM(BI_SIZE, offsetof(struct bootinfo, bi_size));
 ASSYM(BI_SYMTAB, offsetof(struct bootinfo, bi_symtab));
 ASSYM(BI_ESYMTAB, offsetof(struct bootinfo, bi_esymtab));
 ASSYM(BI_KERNEND, offsetof(struct bootinfo, bi_kernend));
 ASSYM(GD_SIZEOF, sizeof(struct globaldata));
 ASSYM(GD_PRVSPACE, offsetof(struct globaldata, gd_prvspace));
 ASSYM(GD_CURPROC, offsetof(struct globaldata, gd_curproc));
 ASSYM(GD_NPXPROC, offsetof(struct globaldata, gd_npxproc));
 ASSYM(GD_IDLEPROC, offsetof(struct globaldata, gd_idleproc));
 ASSYM(GD_CURPCB, offsetof(struct globaldata, gd_curpcb));
 ASSYM(GD_COMMON_TSS, offsetof(struct globaldata, gd_common_tss));
 ASSYM(GD_SWITCHTIME, offsetof(struct globaldata, gd_switchtime));
 ASSYM(GD_SWITCHTICKS, offsetof(struct globaldata, gd_switchticks));
 ASSYM(GD_COMMON_TSSD, offsetof(struct globaldata, gd_common_tssd));
 ASSYM(GD_TSS_GDT, offsetof(struct globaldata, gd_tss_gdt));
 ASSYM(GD_CURRENTLDT, offsetof(struct globaldata, gd_currentldt));
 
 /* XXX */
 #ifdef KTR_PERCPU
 ASSYM(GD_KTR_IDX, offsetof(struct globaldata, gd_ktr_idx));
 ASSYM(GD_KTR_BUF, offsetof(struct globaldata, gd_ktr_buf));
 ASSYM(GD_KTR_BUF_DATA, offsetof(struct globaldata, gd_ktr_buf_data));
 #endif
 
 ASSYM(GD_CPUID, offsetof(struct globaldata, gd_cpuid));
 
 #ifdef SMP
 ASSYM(LA_VER, offsetof(struct LAPIC, version));
 ASSYM(LA_TPR, offsetof(struct LAPIC, tpr));
 ASSYM(LA_EOI, offsetof(struct LAPIC, eoi));
 ASSYM(LA_SVR, offsetof(struct LAPIC, svr));
 ASSYM(LA_ICR_LO, offsetof(struct LAPIC, icr_lo));
 ASSYM(LA_ICR_HI, offsetof(struct LAPIC, icr_hi));
 #endif
 
 ASSYM(KCSEL, GSEL(GCODE_SEL, SEL_KPL));
 ASSYM(KDSEL, GSEL(GDATA_SEL, SEL_KPL));
 ASSYM(KPSEL, GSEL(GPRIV_SEL, SEL_KPL));
 
 ASSYM(BC32SEL, GSEL(GBIOSCODE32_SEL, SEL_KPL));
 ASSYM(GPROC0_SEL, GPROC0_SEL);
 ASSYM(VM86_FRAMESIZE, sizeof(struct vm86frame));
 
 ASSYM(MTX_LOCK, offsetof(struct mtx, mtx_lock));
 ASSYM(MTX_RECURSECNT, offsetof(struct mtx, mtx_recurse));
 ASSYM(MTX_SAVECRIT, offsetof(struct mtx, mtx_savecrit));
Index: head/sys/i386/i386/machdep.c
===================================================================
--- head/sys/i386/i386/machdep.c	(revision 82308)
+++ head/sys/i386/i386/machdep.c	(revision 82309)
@@ -1,2530 +1,2534 @@
 /*-
  * Copyright (c) 1992 Terrence R. Lambert.
  * Copyright (c) 1982, 1987, 1990 The Regents of the University of California.
  * All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * William Jolitz.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	from: @(#)machdep.c	7.4 (Berkeley) 6/3/91
  * $FreeBSD$
  */
 
 #include "opt_atalk.h"
 #include "opt_compat.h"
 #include "opt_cpu.h"
 #include "opt_ddb.h"
 #include "opt_inet.h"
 #include "opt_ipx.h"
 #include "opt_isa.h"
 #include "opt_maxmem.h"
 #include "opt_msgbuf.h"
 #include "opt_npx.h"
 #include "opt_perfmon.h"
+#include "opt_upages.h"
 /* #include "opt_userconfig.h" */
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/sysproto.h>
 #include <sys/signalvar.h>
 #include <sys/kernel.h>
 #include <sys/ktr.h>
 #include <sys/linker.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mutex.h>
 #include <sys/pcpu.h>
 #include <sys/proc.h>
 #include <sys/bio.h>
 #include <sys/buf.h>
 #include <sys/reboot.h>
 #include <sys/smp.h>
 #include <sys/callout.h>
 #include <sys/msgbuf.h>
 #include <sys/sysent.h>
 #include <sys/sysctl.h>
 #include <sys/vmmeter.h>
 #include <sys/bus.h>
 #include <sys/eventhandler.h>
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <sys/lock.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_object.h>
 #include <vm/vm_page.h>
 #include <vm/vm_map.h>
 #include <vm/vm_pager.h>
 #include <vm/vm_extern.h>
 
 #include <sys/user.h>
 #include <sys/exec.h>
 #include <sys/cons.h>
 
 #include <ddb/ddb.h>
 
 #include <net/netisr.h>
 
 #include <machine/cpu.h>
 #include <machine/cputypes.h>
 #include <machine/reg.h>
 #include <machine/clock.h>
 #include <machine/specialreg.h>
 #include <machine/bootinfo.h>
 #include <machine/md_var.h>
 #include <machine/pc/bios.h>
 #include <machine/pcb_ext.h>		/* pcb.h included via sys/user.h */
 #include <machine/globals.h>
 #ifdef PERFMON
 #include <machine/perfmon.h>
+#endif
+#ifdef SMP
+#include <machine/privatespace.h>
 #endif
 
 #include <i386/isa/icu.h>
 #include <i386/isa/intr_machdep.h>
 #include <isa/rtc.h>
 #include <machine/vm86.h>
 #include <sys/ptrace.h>
 #include <machine/sigframe.h>
 
 extern void init386 __P((int first));
 extern void dblfault_handler __P((void));
 
 extern void printcpuinfo(void);	/* XXX header file */
 extern void earlysetcpuclass(void);	/* same header file */
 extern void finishidentcpu(void);
 extern void panicifcpuunsupported(void);
 extern void initializecpu(void);
 
 #define	CS_SECURE(cs)		(ISPL(cs) == SEL_UPL)
 #define	EFL_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
 
 static void cpu_startup __P((void *));
 #ifdef CPU_ENABLE_SSE
 static void set_fpregs_xmm __P((struct save87 *, struct savexmm *));
 static void fill_fpregs_xmm __P((struct savexmm *, struct save87 *));
 #endif /* CPU_ENABLE_SSE */
 SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL)
 
 int	_udatasel, _ucodesel;
 u_int	atdevbase;
 
 #if defined(SWTCH_OPTIM_STATS)
 extern int swtch_optim_stats;
 SYSCTL_INT(_debug, OID_AUTO, swtch_optim_stats,
 	CTLFLAG_RD, &swtch_optim_stats, 0, "");
 SYSCTL_INT(_debug, OID_AUTO, tlb_flush_count,
 	CTLFLAG_RD, &tlb_flush_count, 0, "");
 #endif
 
 #ifdef PC98
 static int	ispc98 = 1;
 #else
 static int	ispc98 = 0;
 #endif
 SYSCTL_INT(_machdep, OID_AUTO, ispc98, CTLFLAG_RD, &ispc98, 0, "");
 
 int physmem = 0;
 int cold = 1;
 
 #ifdef COMPAT_43
 static void osendsig __P((sig_t catcher, int sig, sigset_t *mask, u_long code));
 #endif
 
 static int
 sysctl_hw_physmem(SYSCTL_HANDLER_ARGS)
 {
 	int error = sysctl_handle_int(oidp, 0, ctob(physmem), req);
 	return (error);
 }
 
 SYSCTL_PROC(_hw, HW_PHYSMEM, physmem, CTLTYPE_INT|CTLFLAG_RD,
 	0, 0, sysctl_hw_physmem, "IU", "");
 
 static int
 sysctl_hw_usermem(SYSCTL_HANDLER_ARGS)
 {
 	int error = sysctl_handle_int(oidp, 0,
 		ctob(physmem - cnt.v_wire_count), req);
 	return (error);
 }
 
 SYSCTL_PROC(_hw, HW_USERMEM, usermem, CTLTYPE_INT|CTLFLAG_RD,
 	0, 0, sysctl_hw_usermem, "IU", "");
 
 static int
 sysctl_hw_availpages(SYSCTL_HANDLER_ARGS)
 {
 	int error = sysctl_handle_int(oidp, 0,
 		i386_btop(avail_end - avail_start), req);
 	return (error);
 }
 
 SYSCTL_PROC(_hw, OID_AUTO, availpages, CTLTYPE_INT|CTLFLAG_RD,
 	0, 0, sysctl_hw_availpages, "I", "");
 
 int Maxmem = 0;
 long dumplo;
 
 vm_offset_t phys_avail[10];
 
 /* must be 2 less so 0 0 can signal end of chunks */
 #define PHYS_AVAIL_ARRAY_END ((sizeof(phys_avail) / sizeof(vm_offset_t)) - 2)
 
 struct kva_md_info kmi;
 
 static struct trapframe proc0_tf;
 #ifndef SMP
 static struct globaldata __globaldata;
 #endif
 
 struct mtx sched_lock;
 struct mtx Giant;
 
 static void
 cpu_startup(dummy)
 	void *dummy;
 {
 	/*
 	 * Good {morning,afternoon,evening,night}.
 	 */
 	earlysetcpuclass();
 	startrtclock();
 	printcpuinfo();
 	panicifcpuunsupported();
 #ifdef PERFMON
 	perfmon_init();
 #endif
 	printf("real memory  = %u (%uK bytes)\n", ptoa(Maxmem),
 	    ptoa(Maxmem) / 1024);
 	/*
 	 * Display any holes after the first chunk of extended memory.
 	 */
 	if (bootverbose) {
 		int indx;
 
 		printf("Physical memory chunk(s):\n");
 		for (indx = 0; phys_avail[indx + 1] != 0; indx += 2) {
 			unsigned int size1;
 
 			size1 = phys_avail[indx + 1] - phys_avail[indx];
 			printf("0x%08x - 0x%08x, %u bytes (%u pages)\n",
 			    phys_avail[indx], phys_avail[indx + 1] - 1, size1,
 			    size1 / PAGE_SIZE);
 		}
 	}
 
 	vm_ksubmap_init(&kmi);
 
 #if 0
 	/*
 	 * Calculate callout wheel size
 	 */
 	for (callwheelsize = 1, callwheelbits = 0;
 	     callwheelsize < ncallout;
 	     callwheelsize <<= 1, ++callwheelbits)
 		;
 	callwheelmask = callwheelsize - 1;
 
 	/*
 	 * Allocate space for system data structures.
 	 * The first available kernel virtual address is in "v".
 	 * As pages of kernel virtual memory are allocated, "v" is incremented.
 	 * As pages of memory are allocated and cleared,
 	 * "firstaddr" is incremented.
 	 * An index into the kernel page table corresponding to the
 	 * virtual memory address maintained in "v" is kept in "mapaddr".
 	 */
 
 	/*
 	 * Make two passes.  The first pass calculates how much memory is
 	 * needed and allocates it.  The second pass assigns virtual
 	 * addresses to the various data structures.
 	 */
 	firstaddr = 0;
 again:
 	v = (caddr_t)firstaddr;
 
 #define	valloc(name, type, num) \
 	    (name) = (type *)v; v = (caddr_t)((name)+(num))
 #define	valloclim(name, type, num, lim) \
 	    (name) = (type *)v; v = (caddr_t)((lim) = ((name)+(num)))
 
 	valloc(callout, struct callout, ncallout);
 	valloc(callwheel, struct callout_tailq, callwheelsize);
 
 	/*
 	 * Discount the physical memory larger than the size of kernel_map
 	 * to avoid eating up all of KVA space.
 	 */
 	if (kernel_map->first_free == NULL) {
 		printf("Warning: no free entries in kernel_map.\n");
 		physmem_est = physmem;
 	} else {
 		physmem_est = min(physmem, btoc(kernel_map->max_offset -
 		    kernel_map->min_offset));
 	}
 
 	/*
 	 * The nominal buffer size (and minimum KVA allocation) is BKVASIZE.
 	 * For the first 64MB of ram nominally allocate sufficient buffers to
 	 * cover 1/4 of our ram.  Beyond the first 64MB allocate additional
 	 * buffers to cover 1/20 of our ram over 64MB.  When auto-sizing
 	 * the buffer cache we limit the eventual kva reservation to
 	 * maxbcache bytes.
 	 *
 	 * factor represents the 1/4 x ram conversion.
 	 */
 	if (nbuf == 0) {
 		int factor = 4 * BKVASIZE / PAGE_SIZE;
 
 		nbuf = 50;
 		if (physmem_est > 1024)
 			nbuf += min((physmem_est - 1024) / factor,
 			    16384 / factor);
 		if (physmem_est > 16384)
 			nbuf += (physmem_est - 16384) * 2 / (factor * 5);
 
 		if (maxbcache && nbuf > maxbcache / BKVASIZE)
 			nbuf = maxbcache / BKVASIZE;
 	}
 
 	/*
 	 * Do not allow the buffer_map to be more then 1/2 the size of the
 	 * kernel_map.
 	 */
 	if (nbuf > (kernel_map->max_offset - kernel_map->min_offset) / 
 	    (BKVASIZE * 2)) {
 		nbuf = (kernel_map->max_offset - kernel_map->min_offset) / 
 		    (BKVASIZE * 2);
 		printf("Warning: nbufs capped at %d\n", nbuf);
 	}
 
 	nswbuf = max(min(nbuf/4, 256), 16);
 
 	valloc(swbuf, struct buf, nswbuf);
 	valloc(buf, struct buf, nbuf);
 	v = bufhashinit(v);
 
 	/*
 	 * End of first pass, size has been calculated so allocate memory
 	 */
 	if (firstaddr == 0) {
 		size = (vm_size_t)(v - firstaddr);
 		firstaddr = (int)kmem_alloc(kernel_map, round_page(size));
 		if (firstaddr == 0)
 			panic("startup: no room for tables");
 		goto again;
 	}
 
 	/*
 	 * End of second pass, addresses have been assigned
 	 */
 	if ((vm_size_t)(v - firstaddr) != size)
 		panic("startup: table size inconsistency");
 
 	clean_map = kmem_suballoc(kernel_map, &clean_sva, &clean_eva,
 			(nbuf*BKVASIZE) + (nswbuf*MAXPHYS) + pager_map_size);
 	buffer_map = kmem_suballoc(clean_map, &buffer_sva, &buffer_eva,
 				(nbuf*BKVASIZE));
 	buffer_map->system_map = 1;
 	pager_map = kmem_suballoc(clean_map, &pager_sva, &pager_eva,
 				(nswbuf*MAXPHYS) + pager_map_size);
 	pager_map->system_map = 1;
 	exec_map = kmem_suballoc(kernel_map, &minaddr, &maxaddr,
 				(16*(ARG_MAX+(PAGE_SIZE*3))));
 
 	/*
 	 * XXX: Mbuf system machine-specific initializations should
 	 *      go here, if anywhere.
 	 */
 
 	/*
 	 * Initialize callouts
 	 */
 	SLIST_INIT(&callfree);
 	for (i = 0; i < ncallout; i++) {
 		callout_init(&callout[i], 0);
 		callout[i].c_flags = CALLOUT_LOCAL_ALLOC;
 		SLIST_INSERT_HEAD(&callfree, &callout[i], c_links.sle);
 	}
 
 	for (i = 0; i < callwheelsize; i++) {
 		TAILQ_INIT(&callwheel[i]);
 	}
 
 	mtx_init(&callout_lock, "callout", MTX_SPIN | MTX_RECURSE);
 #endif
 
 #if defined(USERCONFIG)
 	userconfig();
 	cninit();		/* the preferred console may have changed */
 #endif
 
 	printf("avail memory = %u (%uK bytes)\n", ptoa(cnt.v_free_count),
 	    ptoa(cnt.v_free_count) / 1024);
 
 	/*
 	 * Set up buffers, so they can be used to read disk labels.
 	 */
 	bufinit();
 	vm_pager_bufferinit();
 
 	globaldata_register(GLOBALDATA);
 #ifndef SMP
 	/* For SMP, we delay the cpu_setregs() until after SMP startup. */
 	cpu_setregs();
 #endif
 }
 
 /*
  * Send an interrupt to process.
  *
  * Stack is set up to allow sigcode stored
  * at top to call routine, followed by kcall
  * to sigreturn routine below.  After sigreturn
  * resets the signal mask, the stack, and the
  * frame pointer, it returns to the user
  * specified pc, psl.
  */
 #ifdef COMPAT_43
 static void
 osendsig(catcher, sig, mask, code)
 	sig_t catcher;
 	int sig;
 	sigset_t *mask;
 	u_long code;
 {
 	struct osigframe sf;
 	struct osigframe *fp;
 	struct proc *p;
 	struct sigacts *psp;
 	struct trapframe *regs;
 	int oonstack;
 
 	p = curproc;
 	PROC_LOCK(p);
 	psp = p->p_sigacts;
 	regs = p->p_frame;
 	oonstack = sigonstack(regs->tf_esp);
 
 	/* Allocate and validate space for the signal handler context. */
 	if ((p->p_flag & P_ALTSTACK) && !oonstack &&
 	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
 		fp = (struct osigframe *)(p->p_sigstk.ss_sp +
 		    p->p_sigstk.ss_size - sizeof(struct osigframe));
 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
 		p->p_sigstk.ss_flags |= SS_ONSTACK;
 #endif
 	} else
 		fp = (struct osigframe *)regs->tf_esp - 1;
 	PROC_UNLOCK(p);
 
 	/*
 	 * grow_stack() will return 0 if *fp does not fit inside the stack
 	 * and the stack can not be grown.
 	 * useracc() will return FALSE if access is denied.
 	 */
 	if (grow_stack(p, (int)fp) == 0 ||
 	    !useracc((caddr_t)fp, sizeof(*fp), VM_PROT_WRITE)) {
 		/*
 		 * Process has trashed its stack; give it an illegal
 		 * instruction to halt it in its tracks.
 		 */
 		PROC_LOCK(p);
 		SIGACTION(p, SIGILL) = SIG_DFL;
 		SIGDELSET(p->p_sigignore, SIGILL);
 		SIGDELSET(p->p_sigcatch, SIGILL);
 		SIGDELSET(p->p_sigmask, SIGILL);
 		psignal(p, SIGILL);
 		PROC_UNLOCK(p);
 		return;
 	}
 
 	/* Translate the signal if appropriate. */
 	if (p->p_sysent->sv_sigtbl && sig <= p->p_sysent->sv_sigsize)
 		sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
 
 	/* Build the argument list for the signal handler. */
 	sf.sf_signum = sig;
 	sf.sf_scp = (register_t)&fp->sf_siginfo.si_sc;
 	PROC_LOCK(p);
 	if (SIGISMEMBER(p->p_sigacts->ps_siginfo, sig)) {
 		/* Signal handler installed with SA_SIGINFO. */
 		sf.sf_arg2 = (register_t)&fp->sf_siginfo;
 		sf.sf_siginfo.si_signo = sig;
 		sf.sf_siginfo.si_code = code;
 		sf.sf_ahu.sf_action = (__osiginfohandler_t *)catcher;
 	} else {
 		/* Old FreeBSD-style arguments. */
 		sf.sf_arg2 = code;
 		sf.sf_addr = regs->tf_err;
 		sf.sf_ahu.sf_handler = catcher;
 	}
 	PROC_UNLOCK(p);
 
 	/* Save most if not all of trap frame. */
 	sf.sf_siginfo.si_sc.sc_eax = regs->tf_eax;
 	sf.sf_siginfo.si_sc.sc_ebx = regs->tf_ebx;
 	sf.sf_siginfo.si_sc.sc_ecx = regs->tf_ecx;
 	sf.sf_siginfo.si_sc.sc_edx = regs->tf_edx;
 	sf.sf_siginfo.si_sc.sc_esi = regs->tf_esi;
 	sf.sf_siginfo.si_sc.sc_edi = regs->tf_edi;
 	sf.sf_siginfo.si_sc.sc_cs = regs->tf_cs;
 	sf.sf_siginfo.si_sc.sc_ds = regs->tf_ds;
 	sf.sf_siginfo.si_sc.sc_ss = regs->tf_ss;
 	sf.sf_siginfo.si_sc.sc_es = regs->tf_es;
 	sf.sf_siginfo.si_sc.sc_fs = regs->tf_fs;
 	sf.sf_siginfo.si_sc.sc_gs = rgs();
 	sf.sf_siginfo.si_sc.sc_isp = regs->tf_isp;
 
 	/* Build the signal context to be used by osigreturn(). */
 	sf.sf_siginfo.si_sc.sc_onstack = (oonstack) ? 1 : 0;
 	SIG2OSIG(*mask, sf.sf_siginfo.si_sc.sc_mask);
 	sf.sf_siginfo.si_sc.sc_sp = regs->tf_esp;
 	sf.sf_siginfo.si_sc.sc_fp = regs->tf_ebp;
 	sf.sf_siginfo.si_sc.sc_pc = regs->tf_eip;
 	sf.sf_siginfo.si_sc.sc_ps = regs->tf_eflags;
 	sf.sf_siginfo.si_sc.sc_trapno = regs->tf_trapno;
 	sf.sf_siginfo.si_sc.sc_err = regs->tf_err;
 
 	/*
 	 * If we're a vm86 process, we want to save the segment registers.
 	 * We also change eflags to be our emulated eflags, not the actual
 	 * eflags.
 	 */
 	if (regs->tf_eflags & PSL_VM) {
 		/* XXX confusing names: `tf' isn't a trapframe; `regs' is. */
 		struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs;
 		struct vm86_kernel *vm86 = &p->p_addr->u_pcb.pcb_ext->ext_vm86;
 
 		sf.sf_siginfo.si_sc.sc_gs = tf->tf_vm86_gs;
 		sf.sf_siginfo.si_sc.sc_fs = tf->tf_vm86_fs;
 		sf.sf_siginfo.si_sc.sc_es = tf->tf_vm86_es;
 		sf.sf_siginfo.si_sc.sc_ds = tf->tf_vm86_ds;
 
 		if (vm86->vm86_has_vme == 0)
 			sf.sf_siginfo.si_sc.sc_ps =
 			    (tf->tf_eflags & ~(PSL_VIF | PSL_VIP)) |
 			    (vm86->vm86_eflags & (PSL_VIF | PSL_VIP));
 
 		/* See sendsig() for comments. */
 		tf->tf_eflags &= ~(PSL_VM | PSL_NT | PSL_T | PSL_VIF | PSL_VIP);
 	}
 
 	/* Copy the sigframe out to the user's stack. */
 	if (copyout(&sf, fp, sizeof(*fp)) != 0) {
 		/*
 		 * Something is wrong with the stack pointer.
 		 * ...Kill the process.
 		 */
 		PROC_LOCK(p);
 		sigexit(p, SIGILL);
 		/* NOTREACHED */
 	}
 
 	regs->tf_esp = (int)fp;
 	regs->tf_eip = PS_STRINGS - szosigcode;
 	regs->tf_cs = _ucodesel;
 	regs->tf_ds = _udatasel;
 	regs->tf_es = _udatasel;
 	regs->tf_fs = _udatasel;
 	load_gs(_udatasel);
 	regs->tf_ss = _udatasel;
 }
 #endif
 
 void
 sendsig(catcher, sig, mask, code)
 	sig_t catcher;
 	int sig;
 	sigset_t *mask;
 	u_long code;
 {
 	struct sigframe sf;
 	struct proc *p;
 	struct sigacts *psp;
 	struct trapframe *regs;
 	struct sigframe *sfp;
 	int oonstack;
 
 	p = curproc;
 	PROC_LOCK(p);
 	psp = p->p_sigacts;
 #ifdef COMPAT_43
 	if (SIGISMEMBER(psp->ps_osigset, sig)) {
 		PROC_UNLOCK(p);
 		osendsig(catcher, sig, mask, code);
 		return;
 	}
 #endif
 	regs = p->p_frame;
 	oonstack = sigonstack(regs->tf_esp);
 
 	/* Save user context. */
 	bzero(&sf, sizeof(sf));
 	sf.sf_uc.uc_sigmask = *mask;
 	sf.sf_uc.uc_stack = p->p_sigstk;
 	sf.sf_uc.uc_stack.ss_flags = (p->p_flag & P_ALTSTACK)
 	    ? ((oonstack) ? SS_ONSTACK : 0) : SS_DISABLE;
 	sf.sf_uc.uc_mcontext.mc_onstack = (oonstack) ? 1 : 0;
 	sf.sf_uc.uc_mcontext.mc_gs = rgs();
 	bcopy(regs, &sf.sf_uc.uc_mcontext.mc_fs, sizeof(*regs));
 
 	/* Allocate and validate space for the signal handler context. */
 	if ((p->p_flag & P_ALTSTACK) != 0 && !oonstack &&
 	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
 		sfp = (struct sigframe *)(p->p_sigstk.ss_sp +
 		    p->p_sigstk.ss_size - sizeof(struct sigframe));
 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
 		p->p_sigstk.ss_flags |= SS_ONSTACK;
 #endif
 	} else
 		sfp = (struct sigframe *)regs->tf_esp - 1;
 	PROC_UNLOCK(p);
 
 	/*
 	 * grow_stack() will return 0 if *sfp does not fit inside the stack
 	 * and the stack can not be grown.
 	 * useracc() will return FALSE if access is denied.
 	 */
 	if (grow_stack(p, (int)sfp) == 0 ||
 	    !useracc((caddr_t)sfp, sizeof(*sfp), VM_PROT_WRITE)) {
 		/*
 		 * Process has trashed its stack; give it an illegal
 		 * instruction to halt it in its tracks.
 		 */
 #ifdef DEBUG
 		printf("process %d has trashed its stack\n", p->p_pid);
 #endif
 		PROC_LOCK(p);
 		SIGACTION(p, SIGILL) = SIG_DFL;
 		SIGDELSET(p->p_sigignore, SIGILL);
 		SIGDELSET(p->p_sigcatch, SIGILL);
 		SIGDELSET(p->p_sigmask, SIGILL);
 		psignal(p, SIGILL);
 		PROC_UNLOCK(p);
 		return;
 	}
 
 	/* Translate the signal if appropriate. */
 	if (p->p_sysent->sv_sigtbl && sig <= p->p_sysent->sv_sigsize)
 		sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
 
 	/* Build the argument list for the signal handler. */
 	sf.sf_signum = sig;
 	sf.sf_ucontext = (register_t)&sfp->sf_uc;
 	PROC_LOCK(p);
 	if (SIGISMEMBER(p->p_sigacts->ps_siginfo, sig)) {
 		/* Signal handler installed with SA_SIGINFO. */
 		sf.sf_siginfo = (register_t)&sfp->sf_si;
 		sf.sf_ahu.sf_action = (__siginfohandler_t *)catcher;
 
 		/* Fill siginfo structure. */
 		sf.sf_si.si_signo = sig;
 		sf.sf_si.si_code = code;
 		sf.sf_si.si_addr = (void *)regs->tf_err;
 	} else {
 		/* Old FreeBSD-style arguments. */
 		sf.sf_siginfo = code;
 		sf.sf_addr = regs->tf_err;
 		sf.sf_ahu.sf_handler = catcher;
 	}
 	PROC_UNLOCK(p);
 
 	/*
 	 * If we're a vm86 process, we want to save the segment registers.
 	 * We also change eflags to be our emulated eflags, not the actual
 	 * eflags.
 	 */
 	if (regs->tf_eflags & PSL_VM) {
 		struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs;
 		struct vm86_kernel *vm86 = &p->p_addr->u_pcb.pcb_ext->ext_vm86;
 
 		sf.sf_uc.uc_mcontext.mc_gs = tf->tf_vm86_gs;
 		sf.sf_uc.uc_mcontext.mc_fs = tf->tf_vm86_fs;
 		sf.sf_uc.uc_mcontext.mc_es = tf->tf_vm86_es;
 		sf.sf_uc.uc_mcontext.mc_ds = tf->tf_vm86_ds;
 
 		if (vm86->vm86_has_vme == 0)
 			sf.sf_uc.uc_mcontext.mc_eflags =
 			    (tf->tf_eflags & ~(PSL_VIF | PSL_VIP)) |
 			    (vm86->vm86_eflags & (PSL_VIF | PSL_VIP));
 
 		/*
 		 * We should never have PSL_T set when returning from vm86
 		 * mode.  It may be set here if we deliver a signal before
 		 * getting to vm86 mode, so turn it off.
 		 *
 		 * Clear PSL_NT to inhibit T_TSSFLT faults on return from
 		 * syscalls made by the signal handler.  This just avoids
 		 * wasting time for our lazy fixup of such faults.  PSL_NT
 		 * does nothing in vm86 mode, but vm86 programs can set it
 		 * almost legitimately in probes for old cpu types.
 		 */
 		tf->tf_eflags &= ~(PSL_VM | PSL_NT | PSL_T | PSL_VIF | PSL_VIP);
 	}
 
 	/* Copy the sigframe out to the user's stack. */
 	if (copyout(&sf, sfp, sizeof(*sfp)) != 0) {
 		/*
 		 * Something is wrong with the stack pointer.
 		 * ...Kill the process.
 		 */
 		PROC_LOCK(p);
 		sigexit(p, SIGILL);
 		/* NOTREACHED */
 	}
 
 	regs->tf_esp = (int)sfp;
 	regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode);
 	regs->tf_cs = _ucodesel;
 	regs->tf_ds = _udatasel;
 	regs->tf_es = _udatasel;
 	regs->tf_fs = _udatasel;
 	regs->tf_ss = _udatasel;
 }
 
 /*
  * System call to cleanup state after a signal
  * has been taken.  Reset signal mask and
  * stack state from context left by sendsig (above).
  * Return to previous pc and psl as specified by
  * context left by sendsig. Check carefully to
  * make sure that the user has not modified the
  * state to gain improper privileges.
  */
 #ifdef COMPAT_43
 int
 osigreturn(p, uap)
 	struct proc *p;
 	struct osigreturn_args /* {
 		struct osigcontext *sigcntxp;
 	} */ *uap;
 {
 	struct trapframe *regs;
 	struct osigcontext *scp;
 	int eflags;
 
 	regs = p->p_frame;
 	scp = uap->sigcntxp;
 	if (!useracc((caddr_t)scp, sizeof(*scp), VM_PROT_READ))
 		return (EFAULT);
 	eflags = scp->sc_ps;
 	if (eflags & PSL_VM) {
 		struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs;
 		struct vm86_kernel *vm86;
 
 		/*
 		 * if pcb_ext == 0 or vm86_inited == 0, the user hasn't
 		 * set up the vm86 area, and we can't enter vm86 mode.
 		 */
 		if (p->p_addr->u_pcb.pcb_ext == 0)
 			return (EINVAL);
 		vm86 = &p->p_addr->u_pcb.pcb_ext->ext_vm86;
 		if (vm86->vm86_inited == 0)
 			return (EINVAL);
 
 		/* Go back to user mode if both flags are set. */
 		if ((eflags & PSL_VIP) && (eflags & PSL_VIF))
 			trapsignal(p, SIGBUS, 0);
 
 		if (vm86->vm86_has_vme) {
 			eflags = (tf->tf_eflags & ~VME_USERCHANGE) |
 			    (eflags & VME_USERCHANGE) | PSL_VM;
 		} else {
 			vm86->vm86_eflags = eflags;	/* save VIF, VIP */
 			eflags = (tf->tf_eflags & ~VM_USERCHANGE) |
 			    (eflags & VM_USERCHANGE) | PSL_VM;
 		}
 		tf->tf_vm86_ds = scp->sc_ds;
 		tf->tf_vm86_es = scp->sc_es;
 		tf->tf_vm86_fs = scp->sc_fs;
 		tf->tf_vm86_gs = scp->sc_gs;
 		tf->tf_ds = _udatasel;
 		tf->tf_es = _udatasel;
 		tf->tf_fs = _udatasel;
 	} else {
 		/*
 		 * Don't allow users to change privileged or reserved flags.
 		 */
 		/*
 		 * XXX do allow users to change the privileged flag PSL_RF.
 		 * The cpu sets PSL_RF in tf_eflags for faults.  Debuggers
 		 * should sometimes set it there too.  tf_eflags is kept in
 		 * the signal context during signal handling and there is no
 		 * other place to remember it, so the PSL_RF bit may be
 		 * corrupted by the signal handler without us knowing.
 		 * Corruption of the PSL_RF bit at worst causes one more or
 		 * one less debugger trap, so allowing it is fairly harmless.
 		 */
 		if (!EFL_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) {
 	    		return (EINVAL);
 		}
 
 		/*
 		 * Don't allow users to load a valid privileged %cs.  Let the
 		 * hardware check for invalid selectors, excess privilege in
 		 * other selectors, invalid %eip's and invalid %esp's.
 		 */
 		if (!CS_SECURE(scp->sc_cs)) {
 			trapsignal(p, SIGBUS, T_PROTFLT);
 			return (EINVAL);
 		}
 		regs->tf_ds = scp->sc_ds;
 		regs->tf_es = scp->sc_es;
 		regs->tf_fs = scp->sc_fs;
 	}
 
 	/* Restore remaining registers. */
 	regs->tf_eax = scp->sc_eax;
 	regs->tf_ebx = scp->sc_ebx;
 	regs->tf_ecx = scp->sc_ecx;
 	regs->tf_edx = scp->sc_edx;
 	regs->tf_esi = scp->sc_esi;
 	regs->tf_edi = scp->sc_edi;
 	regs->tf_cs = scp->sc_cs;
 	regs->tf_ss = scp->sc_ss;
 	regs->tf_isp = scp->sc_isp;
 
 	PROC_LOCK(p);
 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
 	if (scp->sc_onstack & 1)
 		p->p_sigstk.ss_flags |= SS_ONSTACK;
 	else
 		p->p_sigstk.ss_flags &= ~SS_ONSTACK;
 #endif
 
 	SIGSETOLD(p->p_sigmask, scp->sc_mask);
 	SIG_CANTMASK(p->p_sigmask);
 	PROC_UNLOCK(p);
 	regs->tf_ebp = scp->sc_fp;
 	regs->tf_esp = scp->sc_sp;
 	regs->tf_eip = scp->sc_pc;
 	regs->tf_eflags = eflags;
 	return (EJUSTRETURN);
 }
 #endif
 
 int
 sigreturn(p, uap)
 	struct proc *p;
 	struct sigreturn_args /* {
 		ucontext_t *sigcntxp;
 	} */ *uap;
 {
 	struct trapframe *regs;
 	ucontext_t *ucp;
 	int cs, eflags;
 
 	ucp = uap->sigcntxp;
 #ifdef COMPAT_43
 	if (!useracc((caddr_t)ucp, sizeof(struct osigcontext), VM_PROT_READ))
 		return (EFAULT);
 	if (((struct osigcontext *)ucp)->sc_trapno == 0x01d516)
 		return (osigreturn(p, (struct osigreturn_args *)uap));
 	/*
 	 * Since ucp is not an osigcontext but a ucontext_t, we have to
 	 * check again if all of it is accessible.  A ucontext_t is
 	 * much larger, so instead of just checking for the pointer
 	 * being valid for the size of an osigcontext, now check for
 	 * it being valid for a whole, new-style ucontext_t.
 	 */
 #endif
 	if (!useracc((caddr_t)ucp, sizeof(*ucp), VM_PROT_READ))
 		return (EFAULT);
 
 	regs = p->p_frame;
 	eflags = ucp->uc_mcontext.mc_eflags;
 	if (eflags & PSL_VM) {
 		struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs;
 		struct vm86_kernel *vm86;
 
 		/*
 		 * if pcb_ext == 0 or vm86_inited == 0, the user hasn't
 		 * set up the vm86 area, and we can't enter vm86 mode.
 		 */
 		if (p->p_addr->u_pcb.pcb_ext == 0)
 			return (EINVAL);
 		vm86 = &p->p_addr->u_pcb.pcb_ext->ext_vm86;
 		if (vm86->vm86_inited == 0)
 			return (EINVAL);
 
 		/* Go back to user mode if both flags are set. */
 		if ((eflags & PSL_VIP) && (eflags & PSL_VIF))
 			trapsignal(p, SIGBUS, 0);
 
 		if (vm86->vm86_has_vme) {
 			eflags = (tf->tf_eflags & ~VME_USERCHANGE) |
 			    (eflags & VME_USERCHANGE) | PSL_VM;
 		} else {
 			vm86->vm86_eflags = eflags;	/* save VIF, VIP */
 			eflags = (tf->tf_eflags & ~VM_USERCHANGE) |
 			    (eflags & VM_USERCHANGE) | PSL_VM;
 		}
 		bcopy(&ucp->uc_mcontext.mc_fs, tf, sizeof(struct trapframe));
 		tf->tf_eflags = eflags;
 		tf->tf_vm86_ds = tf->tf_ds;
 		tf->tf_vm86_es = tf->tf_es;
 		tf->tf_vm86_fs = tf->tf_fs;
 		tf->tf_vm86_gs = ucp->uc_mcontext.mc_gs;
 		tf->tf_ds = _udatasel;
 		tf->tf_es = _udatasel;
 		tf->tf_fs = _udatasel;
 	} else {
 		/*
 		 * Don't allow users to change privileged or reserved flags.
 		 */
 		/*
 		 * XXX do allow users to change the privileged flag PSL_RF.
 		 * The cpu sets PSL_RF in tf_eflags for faults.  Debuggers
 		 * should sometimes set it there too.  tf_eflags is kept in
 		 * the signal context during signal handling and there is no
 		 * other place to remember it, so the PSL_RF bit may be
 		 * corrupted by the signal handler without us knowing.
 		 * Corruption of the PSL_RF bit at worst causes one more or
 		 * one less debugger trap, so allowing it is fairly harmless.
 		 */
 		if (!EFL_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) {
 			printf("sigreturn: eflags = 0x%x\n", eflags);
 	    		return (EINVAL);
 		}
 
 		/*
 		 * Don't allow users to load a valid privileged %cs.  Let the
 		 * hardware check for invalid selectors, excess privilege in
 		 * other selectors, invalid %eip's and invalid %esp's.
 		 */
 		cs = ucp->uc_mcontext.mc_cs;
 		if (!CS_SECURE(cs)) {
 			printf("sigreturn: cs = 0x%x\n", cs);
 			trapsignal(p, SIGBUS, T_PROTFLT);
 			return (EINVAL);
 		}
 
 		bcopy(&ucp->uc_mcontext.mc_fs, regs, sizeof(*regs));
 	}
 
 	PROC_LOCK(p);
 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
 	if (ucp->uc_mcontext.mc_onstack & 1)
 		p->p_sigstk.ss_flags |= SS_ONSTACK;
 	else
 		p->p_sigstk.ss_flags &= ~SS_ONSTACK;
 #endif
 
 	p->p_sigmask = ucp->uc_sigmask;
 	SIG_CANTMASK(p->p_sigmask);
 	PROC_UNLOCK(p);
 	return (EJUSTRETURN);
 }
 
 /*
  * Machine dependent boot() routine
  *
  * I haven't seen anything to put here yet
  * Possibly some stuff might be grafted back here from boot()
  */
 void
 cpu_boot(int howto)
 {
 }
 
 /*
  * Shutdown the CPU as much as possible
  */
 void
 cpu_halt(void)
 {
 	for (;;)
 		__asm__ ("hlt");
 }
 
 /*
  * Hook to idle the CPU when possible.  This currently only works in
  * the !SMP case, as there is no clean way to ensure that a CPU will be
  * woken when there is work available for it.
  */
 static int	cpu_idle_hlt = 1;
 SYSCTL_INT(_machdep, OID_AUTO, cpu_idle_hlt, CTLFLAG_RW,
     &cpu_idle_hlt, 0, "Idle loop HLT enable");
 
 /*
  * Note that we have to be careful here to avoid a race between checking
  * procrunnable() and actually halting.  If we don't do this, we may waste
  * the time between calling hlt and the next interrupt even though there
  * is a runnable process.
  */
 void
 cpu_idle(void)
 {
 #ifndef SMP
 	if (cpu_idle_hlt) {
 		disable_intr();
   		if (procrunnable())
 			enable_intr();
 		else {
 			enable_intr();
 			__asm __volatile("hlt");
 		}
 	}
 #endif
 }
 
 /*
  * Clear registers on exec
  */
 void
 setregs(p, entry, stack, ps_strings)
 	struct proc *p;
 	u_long entry;
 	u_long stack;
 	u_long ps_strings;
 {
 	struct trapframe *regs = p->p_frame;
 	struct pcb *pcb = &p->p_addr->u_pcb;
 
 	if (pcb->pcb_ldt)
 		user_ldt_free(pcb);
   
 	bzero((char *)regs, sizeof(struct trapframe));
 	regs->tf_eip = entry;
 	regs->tf_esp = stack;
 	regs->tf_eflags = PSL_USER | (regs->tf_eflags & PSL_T);
 	regs->tf_ss = _udatasel;
 	regs->tf_ds = _udatasel;
 	regs->tf_es = _udatasel;
 	regs->tf_fs = _udatasel;
 	regs->tf_cs = _ucodesel;
 
 	/* PS_STRINGS value for BSD/OS binaries.  It is 0 for non-BSD/OS. */
 	regs->tf_ebx = ps_strings;
 
 	/* reset %gs as well */
 	if (pcb == PCPU_GET(curpcb))
 		load_gs(_udatasel);
 	else
 		pcb->pcb_gs = _udatasel;
 
         /*
          * Reset the hardware debug registers if they were in use.
          * They won't have any meaning for the newly exec'd process.  
          */
         if (pcb->pcb_flags & PCB_DBREGS) {
                 pcb->pcb_dr0 = 0;
                 pcb->pcb_dr1 = 0;
                 pcb->pcb_dr2 = 0;
                 pcb->pcb_dr3 = 0;
                 pcb->pcb_dr6 = 0;
                 pcb->pcb_dr7 = 0;
                 if (pcb == PCPU_GET(curpcb)) {
 		        /*
 			 * Clear the debug registers on the running
 			 * CPU, otherwise they will end up affecting
 			 * the next process we switch to.
 			 */
 		        reset_dbregs();
                 }
                 pcb->pcb_flags &= ~PCB_DBREGS;
         }
 
 	/*
 	 * Initialize the math emulator (if any) for the current process.
 	 * Actually, just clear the bit that says that the emulator has
 	 * been initialized.  Initialization is delayed until the process
 	 * traps to the emulator (if it is done at all) mainly because
 	 * emulators don't provide an entry point for initialization.
 	 */
 	p->p_addr->u_pcb.pcb_flags &= ~FP_SOFTFP;
 
 	/*
 	 * Arrange to trap the next npx or `fwait' instruction (see npx.c
 	 * for why fwait must be trapped at least if there is an npx or an
 	 * emulator).  This is mainly to handle the case where npx0 is not
 	 * configured, since the npx routines normally set up the trap
 	 * otherwise.  It should be done only at boot time, but doing it
 	 * here allows modifying `npx_exists' for testing the emulator on
 	 * systems with an npx.
 	 */
 	load_cr0(rcr0() | CR0_MP | CR0_TS);
 
 #ifdef DEV_NPX
 	/* Initialize the npx (if any) for the current process. */
 	npxinit(__INITIAL_NPXCW__);
 #endif
 
 	/*
 	 * XXX - Linux emulator
 	 * Make sure sure edx is 0x0 on entry. Linux binaries depend
 	 * on it.
 	 */
 	p->p_retval[1] = 0;
 }
 
 void
 cpu_setregs(void)
 {
 	unsigned int cr0;
 
 	cr0 = rcr0();
 	cr0 |= CR0_NE;			/* Done by npxinit() */
 	cr0 |= CR0_MP | CR0_TS;		/* Done at every execve() too. */
 #ifndef I386_CPU
 	cr0 |= CR0_WP | CR0_AM;
 #endif
 	load_cr0(cr0);
 	load_gs(_udatasel);
 }
 
 static int
 sysctl_machdep_adjkerntz(SYSCTL_HANDLER_ARGS)
 {
 	int error;
 	error = sysctl_handle_int(oidp, oidp->oid_arg1, oidp->oid_arg2,
 		req);
 	if (!error && req->newptr)
 		resettodr();
 	return (error);
 }
 
 SYSCTL_PROC(_machdep, CPU_ADJKERNTZ, adjkerntz, CTLTYPE_INT|CTLFLAG_RW,
 	&adjkerntz, 0, sysctl_machdep_adjkerntz, "I", "");
 
 SYSCTL_INT(_machdep, CPU_DISRTCSET, disable_rtc_set,
 	CTLFLAG_RW, &disable_rtc_set, 0, "");
 
 SYSCTL_STRUCT(_machdep, CPU_BOOTINFO, bootinfo, 
 	CTLFLAG_RD, &bootinfo, bootinfo, "");
 
 SYSCTL_INT(_machdep, CPU_WALLCLOCK, wall_cmos_clock,
 	CTLFLAG_RW, &wall_cmos_clock, 0, "");
 
 /*
  * Initialize 386 and configure to run kernel
  */
 
 /*
  * Initialize segments & interrupt table
  */
 
 int _default_ldt;
 union descriptor gdt[NGDT * MAXCPU];	/* global descriptor table */
 static struct gate_descriptor idt0[NIDT];
 struct gate_descriptor *idt = &idt0[0];	/* interrupt descriptor table */
 union descriptor ldt[NLDT];		/* local descriptor table */
 #ifdef SMP
 /* table descriptors - used to load tables by microp */
 struct region_descriptor r_gdt, r_idt;
 #endif
 
 int private_tss;			/* flag indicating private tss */
 
 #if defined(I586_CPU) && !defined(NO_F00F_HACK)
 extern int has_f00f_bug;
 #endif
 
 static struct i386tss dblfault_tss;
 static char dblfault_stack[PAGE_SIZE];
 
 extern  struct user *proc0paddr;
 
 
 /* software prototypes -- in more palatable form */
 struct soft_segment_descriptor gdt_segs[] = {
 /* GNULL_SEL	0 Null Descriptor */
 {	0x0,			/* segment base address  */
 	0x0,			/* length */
 	0,			/* segment type */
 	0,			/* segment descriptor priority level */
 	0,			/* segment descriptor present */
 	0, 0,
 	0,			/* default 32 vs 16 bit size */
 	0  			/* limit granularity (byte/page units)*/ },
 /* GCODE_SEL	1 Code Descriptor for kernel */
 {	0x0,			/* segment base address  */
 	0xfffff,		/* length - all address space */
 	SDT_MEMERA,		/* segment type */
 	0,			/* segment descriptor priority level */
 	1,			/* segment descriptor present */
 	0, 0,
 	1,			/* default 32 vs 16 bit size */
 	1  			/* limit granularity (byte/page units)*/ },
 /* GDATA_SEL	2 Data Descriptor for kernel */
 {	0x0,			/* segment base address  */
 	0xfffff,		/* length - all address space */
 	SDT_MEMRWA,		/* segment type */
 	0,			/* segment descriptor priority level */
 	1,			/* segment descriptor present */
 	0, 0,
 	1,			/* default 32 vs 16 bit size */
 	1  			/* limit granularity (byte/page units)*/ },
 /* GPRIV_SEL	3 SMP Per-Processor Private Data Descriptor */
 {	0x0,			/* segment base address  */
 	0xfffff,		/* length - all address space */
 	SDT_MEMRWA,		/* segment type */
 	0,			/* segment descriptor priority level */
 	1,			/* segment descriptor present */
 	0, 0,
 	1,			/* default 32 vs 16 bit size */
 	1  			/* limit granularity (byte/page units)*/ },
 /* GPROC0_SEL	4 Proc 0 Tss Descriptor */
 {
 	0x0,			/* segment base address */
 	sizeof(struct i386tss)-1,/* length - all address space */
 	SDT_SYS386TSS,		/* segment type */
 	0,			/* segment descriptor priority level */
 	1,			/* segment descriptor present */
 	0, 0,
 	0,			/* unused - default 32 vs 16 bit size */
 	0  			/* limit granularity (byte/page units)*/ },
 /* GLDT_SEL	5 LDT Descriptor */
 {	(int) ldt,		/* segment base address  */
 	sizeof(ldt)-1,		/* length - all address space */
 	SDT_SYSLDT,		/* segment type */
 	SEL_UPL,		/* segment descriptor priority level */
 	1,			/* segment descriptor present */
 	0, 0,
 	0,			/* unused - default 32 vs 16 bit size */
 	0  			/* limit granularity (byte/page units)*/ },
 /* GUSERLDT_SEL	6 User LDT Descriptor per process */
 {	(int) ldt,		/* segment base address  */
 	(512 * sizeof(union descriptor)-1),		/* length */
 	SDT_SYSLDT,		/* segment type */
 	0,			/* segment descriptor priority level */
 	1,			/* segment descriptor present */
 	0, 0,
 	0,			/* unused - default 32 vs 16 bit size */
 	0  			/* limit granularity (byte/page units)*/ },
 /* GTGATE_SEL	7 Null Descriptor - Placeholder */
 {	0x0,			/* segment base address  */
 	0x0,			/* length - all address space */
 	0,			/* segment type */
 	0,			/* segment descriptor priority level */
 	0,			/* segment descriptor present */
 	0, 0,
 	0,			/* default 32 vs 16 bit size */
 	0  			/* limit granularity (byte/page units)*/ },
 /* GBIOSLOWMEM_SEL 8 BIOS access to realmode segment 0x40, must be #8 in GDT */
 {	0x400,			/* segment base address */
 	0xfffff,		/* length */
 	SDT_MEMRWA,		/* segment type */
 	0,			/* segment descriptor priority level */
 	1,			/* segment descriptor present */
 	0, 0,
 	1,			/* default 32 vs 16 bit size */
 	1  			/* limit granularity (byte/page units)*/ },
 /* GPANIC_SEL	9 Panic Tss Descriptor */
 {	(int) &dblfault_tss,	/* segment base address  */
 	sizeof(struct i386tss)-1,/* length - all address space */
 	SDT_SYS386TSS,		/* segment type */
 	0,			/* segment descriptor priority level */
 	1,			/* segment descriptor present */
 	0, 0,
 	0,			/* unused - default 32 vs 16 bit size */
 	0  			/* limit granularity (byte/page units)*/ },
 /* GBIOSCODE32_SEL 10 BIOS 32-bit interface (32bit Code) */
 {	0,			/* segment base address (overwritten)  */
 	0xfffff,		/* length */
 	SDT_MEMERA,		/* segment type */
 	0,			/* segment descriptor priority level */
 	1,			/* segment descriptor present */
 	0, 0,
 	0,			/* default 32 vs 16 bit size */
 	1  			/* limit granularity (byte/page units)*/ },
 /* GBIOSCODE16_SEL 11 BIOS 32-bit interface (16bit Code) */
 {	0,			/* segment base address (overwritten)  */
 	0xfffff,		/* length */
 	SDT_MEMERA,		/* segment type */
 	0,			/* segment descriptor priority level */
 	1,			/* segment descriptor present */
 	0, 0,
 	0,			/* default 32 vs 16 bit size */
 	1  			/* limit granularity (byte/page units)*/ },
 /* GBIOSDATA_SEL 12 BIOS 32-bit interface (Data) */
 {	0,			/* segment base address (overwritten) */
 	0xfffff,		/* length */
 	SDT_MEMRWA,		/* segment type */
 	0,			/* segment descriptor priority level */
 	1,			/* segment descriptor present */
 	0, 0,
 	1,			/* default 32 vs 16 bit size */
 	1  			/* limit granularity (byte/page units)*/ },
 /* GBIOSUTIL_SEL 13 BIOS 16-bit interface (Utility) */
 {	0,			/* segment base address (overwritten) */
 	0xfffff,		/* length */
 	SDT_MEMRWA,		/* segment type */
 	0,			/* segment descriptor priority level */
 	1,			/* segment descriptor present */
 	0, 0,
 	0,			/* default 32 vs 16 bit size */
 	1  			/* limit granularity (byte/page units)*/ },
 /* GBIOSARGS_SEL 14 BIOS 16-bit interface (Arguments) */
 {	0,			/* segment base address (overwritten) */
 	0xfffff,		/* length */
 	SDT_MEMRWA,		/* segment type */
 	0,			/* segment descriptor priority level */
 	1,			/* segment descriptor present */
 	0, 0,
 	0,			/* default 32 vs 16 bit size */
 	1  			/* limit granularity (byte/page units)*/ },
 };
 
 static struct soft_segment_descriptor ldt_segs[] = {
 	/* Null Descriptor - overwritten by call gate */
 {	0x0,			/* segment base address  */
 	0x0,			/* length - all address space */
 	0,			/* segment type */
 	0,			/* segment descriptor priority level */
 	0,			/* segment descriptor present */
 	0, 0,
 	0,			/* default 32 vs 16 bit size */
 	0  			/* limit granularity (byte/page units)*/ },
 	/* Null Descriptor - overwritten by call gate */
 {	0x0,			/* segment base address  */
 	0x0,			/* length - all address space */
 	0,			/* segment type */
 	0,			/* segment descriptor priority level */
 	0,			/* segment descriptor present */
 	0, 0,
 	0,			/* default 32 vs 16 bit size */
 	0  			/* limit granularity (byte/page units)*/ },
 	/* Null Descriptor - overwritten by call gate */
 {	0x0,			/* segment base address  */
 	0x0,			/* length - all address space */
 	0,			/* segment type */
 	0,			/* segment descriptor priority level */
 	0,			/* segment descriptor present */
 	0, 0,
 	0,			/* default 32 vs 16 bit size */
 	0  			/* limit granularity (byte/page units)*/ },
 	/* Code Descriptor for user */
 {	0x0,			/* segment base address  */
 	0xfffff,		/* length - all address space */
 	SDT_MEMERA,		/* segment type */
 	SEL_UPL,		/* segment descriptor priority level */
 	1,			/* segment descriptor present */
 	0, 0,
 	1,			/* default 32 vs 16 bit size */
 	1  			/* limit granularity (byte/page units)*/ },
 	/* Null Descriptor - overwritten by call gate */
 {	0x0,			/* segment base address  */
 	0x0,			/* length - all address space */
 	0,			/* segment type */
 	0,			/* segment descriptor priority level */
 	0,			/* segment descriptor present */
 	0, 0,
 	0,			/* default 32 vs 16 bit size */
 	0  			/* limit granularity (byte/page units)*/ },
 	/* Data Descriptor for user */
 {	0x0,			/* segment base address  */
 	0xfffff,		/* length - all address space */
 	SDT_MEMRWA,		/* segment type */
 	SEL_UPL,		/* segment descriptor priority level */
 	1,			/* segment descriptor present */
 	0, 0,
 	1,			/* default 32 vs 16 bit size */
 	1  			/* limit granularity (byte/page units)*/ },
 };
 
 void
 setidt(idx, func, typ, dpl, selec)
 	int idx;
 	inthand_t *func;
 	int typ;
 	int dpl;
 	int selec;
 {
 	struct gate_descriptor *ip;
 
 	ip = idt + idx;
 	ip->gd_looffset = (int)func;
 	ip->gd_selector = selec;
 	ip->gd_stkcpy = 0;
 	ip->gd_xx = 0;
 	ip->gd_type = typ;
 	ip->gd_dpl = dpl;
 	ip->gd_p = 1;
 	ip->gd_hioffset = ((int)func)>>16 ;
 }
 
 #define	IDTVEC(name)	__CONCAT(X,name)
 
 extern inthand_t
 	IDTVEC(div), IDTVEC(dbg), IDTVEC(nmi), IDTVEC(bpt), IDTVEC(ofl),
 	IDTVEC(bnd), IDTVEC(ill), IDTVEC(dna), IDTVEC(fpusegm),
 	IDTVEC(tss), IDTVEC(missing), IDTVEC(stk), IDTVEC(prot),
 	IDTVEC(page), IDTVEC(mchk), IDTVEC(rsvd), IDTVEC(fpu), IDTVEC(align),
 	IDTVEC(xmm), IDTVEC(lcall_syscall), IDTVEC(int0x80_syscall);
 
 void
 sdtossd(sd, ssd)
 	struct segment_descriptor *sd;
 	struct soft_segment_descriptor *ssd;
 {
 	ssd->ssd_base  = (sd->sd_hibase << 24) | sd->sd_lobase;
 	ssd->ssd_limit = (sd->sd_hilimit << 16) | sd->sd_lolimit;
 	ssd->ssd_type  = sd->sd_type;
 	ssd->ssd_dpl   = sd->sd_dpl;
 	ssd->ssd_p     = sd->sd_p;
 	ssd->ssd_def32 = sd->sd_def32;
 	ssd->ssd_gran  = sd->sd_gran;
 }
 
 #define PHYSMAP_SIZE	(2 * 8)
 
 /*
  * Populate the (physmap) array with base/bound pairs describing the
  * available physical memory in the system, then test this memory and
  * build the phys_avail array describing the actually-available memory.
  *
  * If we cannot accurately determine the physical memory map, then use
  * value from the 0xE801 call, and failing that, the RTC.
  *
  * Total memory size may be set by the kernel environment variable
  * hw.physmem or the compile-time define MAXMEM.
  */
 static void
 getmemsize(int first)
 {
 	int i, physmap_idx, pa_indx;
 	u_int basemem, extmem;
 	struct vm86frame vmf;
 	struct vm86context vmc;
 	vm_offset_t pa, physmap[PHYSMAP_SIZE];
 	pt_entry_t pte;
 	const char *cp;
 	struct bios_smap *smap;
 
 	bzero(&vmf, sizeof(struct vm86frame));
 	bzero(physmap, sizeof(physmap));
 
 	/*
 	 * Perform "base memory" related probes & setup
 	 */
 	vm86_intcall(0x12, &vmf);
 	basemem = vmf.vmf_ax;
 	if (basemem > 640) {
 		printf("Preposterous BIOS basemem of %uK, truncating to 640K\n",
 			basemem);
 		basemem = 640;
 	}
 
 	/*
 	 * XXX if biosbasemem is now < 640, there is a `hole'
 	 * between the end of base memory and the start of
 	 * ISA memory.  The hole may be empty or it may
 	 * contain BIOS code or data.  Map it read/write so
 	 * that the BIOS can write to it.  (Memory from 0 to
 	 * the physical end of the kernel is mapped read-only
 	 * to begin with and then parts of it are remapped.
 	 * The parts that aren't remapped form holes that
 	 * remain read-only and are unused by the kernel.
 	 * The base memory area is below the physical end of
 	 * the kernel and right now forms a read-only hole.
 	 * The part of it from PAGE_SIZE to
 	 * (trunc_page(biosbasemem * 1024) - 1) will be
 	 * remapped and used by the kernel later.)
 	 *
 	 * This code is similar to the code used in
 	 * pmap_mapdev, but since no memory needs to be
 	 * allocated we simply change the mapping.
 	 */
 	for (pa = trunc_page(basemem * 1024);
 	     pa < ISA_HOLE_START; pa += PAGE_SIZE) {
 		pte = (pt_entry_t)vtopte(pa + KERNBASE);
 		*pte = pa | PG_RW | PG_V;
 	}
 
 	/*
 	 * if basemem != 640, map pages r/w into vm86 page table so 
 	 * that the bios can scribble on it.
 	 */
 	pte = (pt_entry_t)vm86paddr;
 	for (i = basemem / 4; i < 160; i++)
 		pte[i] = (i << PAGE_SHIFT) | PG_V | PG_RW | PG_U;
 
 	/*
 	 * map page 1 R/W into the kernel page table so we can use it
 	 * as a buffer.  The kernel will unmap this page later.
 	 */
 	pte = (pt_entry_t)vtopte(KERNBASE + (1 << PAGE_SHIFT));
 	*pte = (1 << PAGE_SHIFT) | PG_RW | PG_V;
 
 	/*
 	 * get memory map with INT 15:E820
 	 */
 	vmc.npages = 0;
 	smap = (void *)vm86_addpage(&vmc, 1, KERNBASE + (1 << PAGE_SHIFT));
 	vm86_getptr(&vmc, (vm_offset_t)smap, &vmf.vmf_es, &vmf.vmf_di);
 
 	physmap_idx = 0;
 	vmf.vmf_ebx = 0;
 	do {
 		vmf.vmf_eax = 0xE820;
 		vmf.vmf_edx = SMAP_SIG;
 		vmf.vmf_ecx = sizeof(struct bios_smap);
 		i = vm86_datacall(0x15, &vmf, &vmc);
 		if (i || vmf.vmf_eax != SMAP_SIG)
 			break;
 		if (boothowto & RB_VERBOSE)
 			printf("SMAP type=%02x base=%08x %08x len=%08x %08x\n",
 				smap->type,
 				*(u_int32_t *)((char *)&smap->base + 4),
 				(u_int32_t)smap->base,
 				*(u_int32_t *)((char *)&smap->length + 4),
 				(u_int32_t)smap->length);
 
 		if (smap->type != 0x01)
 			goto next_run;
 
 		if (smap->length == 0)
 			goto next_run;
 
 		if (smap->base >= 0xffffffff) {
 			printf("%uK of memory above 4GB ignored\n",
 			    (u_int)(smap->length / 1024));
 			goto next_run;
 		}
 
 		for (i = 0; i <= physmap_idx; i += 2) {
 			if (smap->base < physmap[i + 1]) {
 				if (boothowto & RB_VERBOSE)
 					printf(
 	"Overlapping or non-montonic memory region, ignoring second region\n");
 				goto next_run;
 			}
 		}
 
 		if (smap->base == physmap[physmap_idx + 1]) {
 			physmap[physmap_idx + 1] += smap->length;
 			goto next_run;
 		}
 
 		physmap_idx += 2;
 		if (physmap_idx == PHYSMAP_SIZE) {
 			printf(
 		"Too many segments in the physical address map, giving up\n");
 			break;
 		}
 		physmap[physmap_idx] = smap->base;
 		physmap[physmap_idx + 1] = smap->base + smap->length;
 next_run:
 	} while (vmf.vmf_ebx != 0);
 
 	if (physmap[1] != 0)
 		goto physmap_done;
 
 	/*
 	 * If we failed above, try memory map with INT 15:E801
 	 */
 	vmf.vmf_ax = 0xE801;
 	if (vm86_intcall(0x15, &vmf) == 0) {
 		extmem = vmf.vmf_cx + vmf.vmf_dx * 64;
 	} else {
 #if 0
 		vmf.vmf_ah = 0x88;
 		vm86_intcall(0x15, &vmf);
 		extmem = vmf.vmf_ax;
 #else
 		/*
 		 * Prefer the RTC value for extended memory.
 		 */
 		extmem = rtcin(RTC_EXTLO) + (rtcin(RTC_EXTHI) << 8);
 #endif
 	}
 
 	/*
 	 * Special hack for chipsets that still remap the 384k hole when
 	 * there's 16MB of memory - this really confuses people that
 	 * are trying to use bus mastering ISA controllers with the
 	 * "16MB limit"; they only have 16MB, but the remapping puts
 	 * them beyond the limit.
 	 *
 	 * If extended memory is between 15-16MB (16-17MB phys address range),
 	 *	chop it to 15MB.
 	 */
 	if ((extmem > 15 * 1024) && (extmem < 16 * 1024))
 		extmem = 15 * 1024;
 
 	physmap[0] = 0;
 	physmap[1] = basemem * 1024;
 	physmap_idx = 2;
 	physmap[physmap_idx] = 0x100000;
 	physmap[physmap_idx + 1] = physmap[physmap_idx] + extmem * 1024;
 
 physmap_done:
 	/*
 	 * Now, physmap contains a map of physical memory.
 	 */
 
 #ifdef SMP
 	/* make hole for AP bootstrap code */
 	physmap[1] = mp_bootaddress(physmap[1] / 1024);
 
 	/* look for the MP hardware - needed for apic addresses */
 	i386_mp_probe();
 #endif
 
 	/*
 	 * Maxmem isn't the "maximum memory", it's one larger than the
 	 * highest page of the physical address space.  It should be
 	 * called something like "Maxphyspage".  We may adjust this 
 	 * based on ``hw.physmem'' and the results of the memory test.
 	 */
 	Maxmem = atop(physmap[physmap_idx + 1]);
 
 #ifdef MAXMEM
 	Maxmem = MAXMEM / 4;
 #endif
 
 	/*
 	 * hw.physmem is a size in bytes; we also allow k, m, and g suffixes
 	 * for the appropriate modifiers.  This overrides MAXMEM.
 	 */
 	if ((cp = getenv("hw.physmem")) != NULL) {
 		u_int64_t AllowMem, sanity;
 		char *ep;
 
 		sanity = AllowMem = strtouq(cp, &ep, 0);
 		if ((ep != cp) && (*ep != 0)) {
 			switch(*ep) {
 			case 'g':
 			case 'G':
 				AllowMem <<= 10;
 			case 'm':
 			case 'M':
 				AllowMem <<= 10;
 			case 'k':
 			case 'K':
 				AllowMem <<= 10;
 				break;
 			default:
 				AllowMem = sanity = 0;
 			}
 			if (AllowMem < sanity)
 				AllowMem = 0;
 		}
 		if (AllowMem == 0)
 			printf("Ignoring invalid memory size of '%s'\n", cp);
 		else
 			Maxmem = atop(AllowMem);
 	}
 
 	if (atop(physmap[physmap_idx + 1]) != Maxmem &&
 	    (boothowto & RB_VERBOSE))
 		printf("Physical memory use set to %uK\n", Maxmem * 4);
 
 	/*
 	 * If Maxmem has been increased beyond what the system has detected,
 	 * extend the last memory segment to the new limit.
 	 */ 
 	if (atop(physmap[physmap_idx + 1]) < Maxmem)
 		physmap[physmap_idx + 1] = ptoa(Maxmem);
 
 	/* call pmap initialization to make new kernel address space */
 	pmap_bootstrap(first, 0);
 
 	/*
 	 * Size up each available chunk of physical memory.
 	 */
 	physmap[0] = PAGE_SIZE;		/* mask off page 0 */
 	pa_indx = 0;
 	phys_avail[pa_indx++] = physmap[0];
 	phys_avail[pa_indx] = physmap[0];
 #if 0
 	pte = (pt_entry_t)vtopte(KERNBASE);
 #else
 	pte = (pt_entry_t)CMAP1;
 #endif
 
 	/*
 	 * physmap is in bytes, so when converting to page boundaries,
 	 * round up the start address and round down the end address.
 	 */
 	for (i = 0; i <= physmap_idx; i += 2) {
 		vm_offset_t end;
 
 		end = ptoa(Maxmem);
 		if (physmap[i + 1] < end)
 			end = trunc_page(physmap[i + 1]);
 		for (pa = round_page(physmap[i]); pa < end; pa += PAGE_SIZE) {
 			int tmp, page_bad;
 #if 0
 			int *ptr = 0;
 #else
 			int *ptr = (int *)CADDR1;
 #endif
 
 			/*
 			 * block out kernel memory as not available.
 			 */
 			if (pa >= 0x100000 && pa < first)
 				continue;
 	
 			page_bad = FALSE;
 
 			/*
 			 * map page into kernel: valid, read/write,non-cacheable
 			 */
 			*pte = pa | PG_V | PG_RW | PG_N;
 			invltlb();
 
 			tmp = *(int *)ptr;
 			/*
 			 * Test for alternating 1's and 0's
 			 */
 			*(volatile int *)ptr = 0xaaaaaaaa;
 			if (*(volatile int *)ptr != 0xaaaaaaaa) {
 				page_bad = TRUE;
 			}
 			/*
 			 * Test for alternating 0's and 1's
 			 */
 			*(volatile int *)ptr = 0x55555555;
 			if (*(volatile int *)ptr != 0x55555555) {
 			page_bad = TRUE;
 			}
 			/*
 			 * Test for all 1's
 			 */
 			*(volatile int *)ptr = 0xffffffff;
 			if (*(volatile int *)ptr != 0xffffffff) {
 				page_bad = TRUE;
 			}
 			/*
 			 * Test for all 0's
 			 */
 			*(volatile int *)ptr = 0x0;
 			if (*(volatile int *)ptr != 0x0) {
 				page_bad = TRUE;
 			}
 			/*
 			 * Restore original value.
 			 */
 			*(int *)ptr = tmp;
 
 			/*
 			 * Adjust array of valid/good pages.
 			 */
 			if (page_bad == TRUE) {
 				continue;
 			}
 			/*
 			 * If this good page is a continuation of the
 			 * previous set of good pages, then just increase
 			 * the end pointer. Otherwise start a new chunk.
 			 * Note that "end" points one higher than end,
 			 * making the range >= start and < end.
 			 * If we're also doing a speculative memory
 			 * test and we at or past the end, bump up Maxmem
 			 * so that we keep going. The first bad page
 			 * will terminate the loop.
 			 */
 			if (phys_avail[pa_indx] == pa) {
 				phys_avail[pa_indx] += PAGE_SIZE;
 			} else {
 				pa_indx++;
 				if (pa_indx == PHYS_AVAIL_ARRAY_END) {
 					printf(
 		"Too many holes in the physical address space, giving up\n");
 					pa_indx--;
 					break;
 				}
 				phys_avail[pa_indx++] = pa;	/* start */
 				phys_avail[pa_indx] = pa + PAGE_SIZE;	/* end */
 			}
 			physmem++;
 		}
 	}
 	*pte = 0;
 	invltlb();
 
 	/*
 	 * XXX
 	 * The last chunk must contain at least one page plus the message
 	 * buffer to avoid complicating other code (message buffer address
 	 * calculation, etc.).
 	 */
 	while (phys_avail[pa_indx - 1] + PAGE_SIZE +
 	    round_page(MSGBUF_SIZE) >= phys_avail[pa_indx]) {
 		physmem -= atop(phys_avail[pa_indx] - phys_avail[pa_indx - 1]);
 		phys_avail[pa_indx--] = 0;
 		phys_avail[pa_indx--] = 0;
 	}
 
 	Maxmem = atop(phys_avail[pa_indx]);
 
 	/* Trim off space for the message buffer. */
 	phys_avail[pa_indx] -= round_page(MSGBUF_SIZE);
 
 	avail_end = phys_avail[pa_indx];
 }
 
 void
 init386(first)
 	int first;
 {
 	struct gate_descriptor *gdp;
 	int gsel_tss, metadata_missing, off, x;
 #ifndef SMP
 	/* table descriptors - used to load tables by microp */
 	struct region_descriptor r_gdt, r_idt;
 #endif
 
 	proc0.p_addr = proc0paddr;
 
 	atdevbase = ISA_HOLE_START + KERNBASE;
 
 	metadata_missing = 0;
 	if (bootinfo.bi_modulep) {
 		preload_metadata = (caddr_t)bootinfo.bi_modulep + KERNBASE;
 		preload_bootstrap_relocate(KERNBASE);
 	} else {
 		metadata_missing = 1;
 	}
 	if (bootinfo.bi_envp)
 		kern_envp = (caddr_t)bootinfo.bi_envp + KERNBASE;
 
 	/* Init basic tunables, hz etc */
 	init_param();
 
 	/*
 	 * make gdt memory segments, the code segment goes up to end of the
 	 * page with etext in it, the data segment goes to the end of
 	 * the address space
 	 */
 	/*
 	 * XXX text protection is temporarily (?) disabled.  The limit was
 	 * i386_btop(round_page(etext)) - 1.
 	 */
 	gdt_segs[GCODE_SEL].ssd_limit = atop(0 - 1);
 	gdt_segs[GDATA_SEL].ssd_limit = atop(0 - 1);
 #ifdef SMP
 	gdt_segs[GPRIV_SEL].ssd_limit =
 		atop(sizeof(struct privatespace) - 1);
 	gdt_segs[GPRIV_SEL].ssd_base = (int) &SMP_prvspace[0];
 	gdt_segs[GPROC0_SEL].ssd_base =
 		(int) &SMP_prvspace[0].globaldata.gd_common_tss;
 	SMP_prvspace[0].globaldata.gd_prvspace = &SMP_prvspace[0].globaldata;
 #else
 	gdt_segs[GPRIV_SEL].ssd_limit =
 		atop(sizeof(struct globaldata) - 1);
 	gdt_segs[GPRIV_SEL].ssd_base = (int) &__globaldata;
 	gdt_segs[GPROC0_SEL].ssd_base =
 		(int) &__globaldata.gd_common_tss;
 	__globaldata.gd_prvspace = &__globaldata;
 #endif
 
 	for (x = 0; x < NGDT; x++) {
 #ifdef BDE_DEBUGGER
 		/* avoid overwriting db entries with APM ones */
 		if (x >= GAPMCODE32_SEL && x <= GAPMDATA_SEL)
 			continue;
 #endif
 		ssdtosd(&gdt_segs[x], &gdt[x].sd);
 	}
 
 	r_gdt.rd_limit = NGDT * sizeof(gdt[0]) - 1;
 	r_gdt.rd_base =  (int) gdt;
 	lgdt(&r_gdt);
 
 	/* setup curproc so that mutexes work */
 	PCPU_SET(curproc, &proc0);
 	PCPU_SET(spinlocks, NULL);
 
 	LIST_INIT(&proc0.p_contested);
 
 	/*
 	 * Initialize mutexes.
 	 */
 	mtx_init(&Giant, "Giant", MTX_DEF | MTX_RECURSE);
 	mtx_init(&sched_lock, "sched lock", MTX_SPIN | MTX_RECURSE);
 	mtx_init(&proc0.p_mtx, "process lock", MTX_DEF);
 	mtx_init(&clock_lock, "clk", MTX_SPIN | MTX_RECURSE);
 #ifdef SMP
 	mtx_init(&imen_mtx, "imen", MTX_SPIN);
 #endif
 	mtx_lock(&Giant);
 
 	/* make ldt memory segments */
 	/*
 	 * XXX - VM_MAXUSER_ADDRESS is an end address, not a max.  And it
 	 * should be spelled ...MAX_USER...
 	 */
 	ldt_segs[LUCODE_SEL].ssd_limit = atop(VM_MAXUSER_ADDRESS - 1);
 	ldt_segs[LUDATA_SEL].ssd_limit = atop(VM_MAXUSER_ADDRESS - 1);
 	for (x = 0; x < sizeof ldt_segs / sizeof ldt_segs[0]; x++)
 		ssdtosd(&ldt_segs[x], &ldt[x].sd);
 
 	_default_ldt = GSEL(GLDT_SEL, SEL_KPL);
 	lldt(_default_ldt);
 	PCPU_SET(currentldt, _default_ldt);
 
 	/* exceptions */
 	for (x = 0; x < NIDT; x++)
 		setidt(x, &IDTVEC(rsvd), SDT_SYS386TGT, SEL_KPL,
 		    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(0, &IDTVEC(div),  SDT_SYS386TGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(1, &IDTVEC(dbg),  SDT_SYS386IGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(2, &IDTVEC(nmi),  SDT_SYS386TGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
  	setidt(3, &IDTVEC(bpt),  SDT_SYS386IGT, SEL_UPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(4, &IDTVEC(ofl),  SDT_SYS386TGT, SEL_UPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(5, &IDTVEC(bnd),  SDT_SYS386TGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(6, &IDTVEC(ill),  SDT_SYS386TGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(7, &IDTVEC(dna),  SDT_SYS386TGT, SEL_KPL
 	    , GSEL(GCODE_SEL, SEL_KPL));
 	setidt(8, 0,  SDT_SYSTASKGT, SEL_KPL, GSEL(GPANIC_SEL, SEL_KPL));
 	setidt(9, &IDTVEC(fpusegm),  SDT_SYS386TGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(10, &IDTVEC(tss),  SDT_SYS386TGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(11, &IDTVEC(missing),  SDT_SYS386TGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(12, &IDTVEC(stk),  SDT_SYS386TGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(13, &IDTVEC(prot),  SDT_SYS386TGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(14, &IDTVEC(page),  SDT_SYS386IGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(15, &IDTVEC(rsvd),  SDT_SYS386TGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(16, &IDTVEC(fpu),  SDT_SYS386TGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(17, &IDTVEC(align), SDT_SYS386TGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(18, &IDTVEC(mchk),  SDT_SYS386TGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(19, &IDTVEC(xmm), SDT_SYS386TGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
  	setidt(0x80, &IDTVEC(int0x80_syscall), SDT_SYS386TGT, SEL_UPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 
 	r_idt.rd_limit = sizeof(idt0) - 1;
 	r_idt.rd_base = (int) idt;
 	lidt(&r_idt);
 
 	/*
 	 * Initialize the console before we print anything out.
 	 */
 	cninit();
 
 	if (metadata_missing)
 		printf("WARNING: loader(8) metadata is missing!\n");
 
 #ifdef DEV_ISA
 	isa_defaultirq();
 #endif
 
 #ifdef DDB
 	kdb_init();
 	if (boothowto & RB_KDB)
 		Debugger("Boot flags requested debugger");
 #endif
 
 	finishidentcpu();	/* Final stage of CPU initialization */
 	setidt(6, &IDTVEC(ill),  SDT_SYS386TGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(13, &IDTVEC(prot),  SDT_SYS386TGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	initializecpu();	/* Initialize CPU registers */
 
 	/* make an initial tss so cpu can get interrupt stack on syscall! */
 	PCPU_SET(common_tss.tss_esp0,
 	    (int) proc0.p_addr + UPAGES*PAGE_SIZE - 16);
 	PCPU_SET(common_tss.tss_ss0, GSEL(GDATA_SEL, SEL_KPL));
 	gsel_tss = GSEL(GPROC0_SEL, SEL_KPL);
 	private_tss = 0;
 	PCPU_SET(tss_gdt, &gdt[GPROC0_SEL].sd);
 	PCPU_SET(common_tssd, *PCPU_GET(tss_gdt));
 	PCPU_SET(common_tss.tss_ioopt, (sizeof (struct i386tss)) << 16);
 	ltr(gsel_tss);
 
 	dblfault_tss.tss_esp = dblfault_tss.tss_esp0 = dblfault_tss.tss_esp1 =
 	    dblfault_tss.tss_esp2 = (int)&dblfault_stack[sizeof(dblfault_stack)];
 	dblfault_tss.tss_ss = dblfault_tss.tss_ss0 = dblfault_tss.tss_ss1 =
 	    dblfault_tss.tss_ss2 = GSEL(GDATA_SEL, SEL_KPL);
 	dblfault_tss.tss_cr3 = (int)IdlePTD;
 	dblfault_tss.tss_eip = (int)dblfault_handler;
 	dblfault_tss.tss_eflags = PSL_KERNEL;
 	dblfault_tss.tss_ds = dblfault_tss.tss_es =
 	    dblfault_tss.tss_gs = GSEL(GDATA_SEL, SEL_KPL);
 	dblfault_tss.tss_fs = GSEL(GPRIV_SEL, SEL_KPL);
 	dblfault_tss.tss_cs = GSEL(GCODE_SEL, SEL_KPL);
 	dblfault_tss.tss_ldt = GSEL(GLDT_SEL, SEL_KPL);
 
 	vm86_initialize();
 	getmemsize(first);
 
 	/* now running on new page tables, configured,and u/iom is accessible */
 
 	/* Map the message buffer. */
 	for (off = 0; off < round_page(MSGBUF_SIZE); off += PAGE_SIZE)
 		pmap_kenter((vm_offset_t)msgbufp + off, avail_end + off);
 
 	msgbufinit(msgbufp, MSGBUF_SIZE);
 
 	/* make a call gate to reenter kernel with */
 	gdp = &ldt[LSYS5CALLS_SEL].gd;
 
 	x = (int) &IDTVEC(lcall_syscall);
 	gdp->gd_looffset = x;
 	gdp->gd_selector = GSEL(GCODE_SEL,SEL_KPL);
 	gdp->gd_stkcpy = 1;
 	gdp->gd_type = SDT_SYS386CGT;
 	gdp->gd_dpl = SEL_UPL;
 	gdp->gd_p = 1;
 	gdp->gd_hioffset = x >> 16;
 
 	/* XXX does this work? */
 	ldt[LBSDICALLS_SEL] = ldt[LSYS5CALLS_SEL];
 	ldt[LSOL26CALLS_SEL] = ldt[LSYS5CALLS_SEL];
 
 	/* transfer to user mode */
 
 	_ucodesel = LSEL(LUCODE_SEL, SEL_UPL);
 	_udatasel = LSEL(LUDATA_SEL, SEL_UPL);
 
 	/* setup proc 0's pcb */
 	proc0.p_addr->u_pcb.pcb_flags = 0;
 	proc0.p_addr->u_pcb.pcb_cr3 = (int)IdlePTD;
 	proc0.p_addr->u_pcb.pcb_ext = 0;
 	proc0.p_frame = &proc0_tf;
 }
 
 #if defined(I586_CPU) && !defined(NO_F00F_HACK)
 static void f00f_hack(void *unused);
 SYSINIT(f00f_hack, SI_SUB_INTRINSIC, SI_ORDER_FIRST, f00f_hack, NULL);
 
 static void
 f00f_hack(void *unused) {
 	struct gate_descriptor *new_idt;
 #ifndef SMP
 	struct region_descriptor r_idt;
 #endif
 	vm_offset_t tmp;
 
 	if (!has_f00f_bug)
 		return;
 
 	GIANT_REQUIRED;
 
 	printf("Intel Pentium detected, installing workaround for F00F bug\n");
 
 	r_idt.rd_limit = sizeof(idt0) - 1;
 
 	tmp = kmem_alloc(kernel_map, PAGE_SIZE * 2);
 	if (tmp == 0)
 		panic("kmem_alloc returned 0");
 	if (((unsigned int)tmp & (PAGE_SIZE-1)) != 0)
 		panic("kmem_alloc returned non-page-aligned memory");
 	/* Put the first seven entries in the lower page */
 	new_idt = (struct gate_descriptor*)(tmp + PAGE_SIZE - (7*8));
 	bcopy(idt, new_idt, sizeof(idt0));
 	r_idt.rd_base = (int)new_idt;
 	lidt(&r_idt);
 	idt = new_idt;
 	if (vm_map_protect(kernel_map, tmp, tmp + PAGE_SIZE,
 			   VM_PROT_READ, FALSE) != KERN_SUCCESS)
 		panic("vm_map_protect failed");
 	return;
 }
 #endif /* defined(I586_CPU) && !NO_F00F_HACK */
 
 int
 ptrace_set_pc(p, addr)
 	struct proc *p;
 	unsigned long addr;
 {
 	p->p_frame->tf_eip = addr;
 	return (0);
 }
 
 int
 ptrace_single_step(p)
 	struct proc *p;
 {
 	p->p_frame->tf_eflags |= PSL_T;
 	return (0);
 }
 
 int
 fill_regs(p, regs)
 	struct proc *p;
 	struct reg *regs;
 {
 	struct pcb *pcb;
 	struct trapframe *tp;
 
 	tp = p->p_frame;
 	regs->r_fs = tp->tf_fs;
 	regs->r_es = tp->tf_es;
 	regs->r_ds = tp->tf_ds;
 	regs->r_edi = tp->tf_edi;
 	regs->r_esi = tp->tf_esi;
 	regs->r_ebp = tp->tf_ebp;
 	regs->r_ebx = tp->tf_ebx;
 	regs->r_edx = tp->tf_edx;
 	regs->r_ecx = tp->tf_ecx;
 	regs->r_eax = tp->tf_eax;
 	regs->r_eip = tp->tf_eip;
 	regs->r_cs = tp->tf_cs;
 	regs->r_eflags = tp->tf_eflags;
 	regs->r_esp = tp->tf_esp;
 	regs->r_ss = tp->tf_ss;
 	pcb = &p->p_addr->u_pcb;
 	regs->r_gs = pcb->pcb_gs;
 	return (0);
 }
 
 int
 set_regs(p, regs)
 	struct proc *p;
 	struct reg *regs;
 {
 	struct pcb *pcb;
 	struct trapframe *tp;
 
 	tp = p->p_frame;
 	if (!EFL_SECURE(regs->r_eflags, tp->tf_eflags) ||
 	    !CS_SECURE(regs->r_cs))
 		return (EINVAL);
 	tp->tf_fs = regs->r_fs;
 	tp->tf_es = regs->r_es;
 	tp->tf_ds = regs->r_ds;
 	tp->tf_edi = regs->r_edi;
 	tp->tf_esi = regs->r_esi;
 	tp->tf_ebp = regs->r_ebp;
 	tp->tf_ebx = regs->r_ebx;
 	tp->tf_edx = regs->r_edx;
 	tp->tf_ecx = regs->r_ecx;
 	tp->tf_eax = regs->r_eax;
 	tp->tf_eip = regs->r_eip;
 	tp->tf_cs = regs->r_cs;
 	tp->tf_eflags = regs->r_eflags;
 	tp->tf_esp = regs->r_esp;
 	tp->tf_ss = regs->r_ss;
 	pcb = &p->p_addr->u_pcb;
 	pcb->pcb_gs = regs->r_gs;
 	return (0);
 }
 
 #ifdef CPU_ENABLE_SSE
 static void
 fill_fpregs_xmm(sv_xmm, sv_87)
 	struct savexmm *sv_xmm;
 	struct save87 *sv_87;
 {
 	register struct env87 *penv_87 = &sv_87->sv_env;
 	register struct envxmm *penv_xmm = &sv_xmm->sv_env;
 	int i;
 
 	/* FPU control/status */
 	penv_87->en_cw = penv_xmm->en_cw;
 	penv_87->en_sw = penv_xmm->en_sw;
 	penv_87->en_tw = penv_xmm->en_tw;
 	penv_87->en_fip = penv_xmm->en_fip;
 	penv_87->en_fcs = penv_xmm->en_fcs;
 	penv_87->en_opcode = penv_xmm->en_opcode;
 	penv_87->en_foo = penv_xmm->en_foo;
 	penv_87->en_fos = penv_xmm->en_fos;
 
 	/* FPU registers */
 	for (i = 0; i < 8; ++i)
 		sv_87->sv_ac[i] = sv_xmm->sv_fp[i].fp_acc;
 
 	sv_87->sv_ex_sw = sv_xmm->sv_ex_sw;
 }
 
 static void
 set_fpregs_xmm(sv_87, sv_xmm)
 	struct save87 *sv_87;
 	struct savexmm *sv_xmm;
 {
 	register struct env87 *penv_87 = &sv_87->sv_env;
 	register struct envxmm *penv_xmm = &sv_xmm->sv_env;
 	int i;
 
 	/* FPU control/status */
 	penv_xmm->en_cw = penv_87->en_cw;
 	penv_xmm->en_sw = penv_87->en_sw;
 	penv_xmm->en_tw = penv_87->en_tw;
 	penv_xmm->en_fip = penv_87->en_fip;
 	penv_xmm->en_fcs = penv_87->en_fcs;
 	penv_xmm->en_opcode = penv_87->en_opcode;
 	penv_xmm->en_foo = penv_87->en_foo;
 	penv_xmm->en_fos = penv_87->en_fos;
 
 	/* FPU registers */
 	for (i = 0; i < 8; ++i)
 		sv_xmm->sv_fp[i].fp_acc = sv_87->sv_ac[i];
 
 	sv_xmm->sv_ex_sw = sv_87->sv_ex_sw;
 }
 #endif /* CPU_ENABLE_SSE */
 
 int
 fill_fpregs(p, fpregs)
 	struct proc *p;
 	struct fpreg *fpregs;
 {
 #ifdef CPU_ENABLE_SSE
 	if (cpu_fxsr) {
 		fill_fpregs_xmm(&p->p_addr->u_pcb.pcb_save.sv_xmm,
 						(struct save87 *)fpregs);
 		return (0);
 	}
 #endif /* CPU_ENABLE_SSE */
 	bcopy(&p->p_addr->u_pcb.pcb_save.sv_87, fpregs, sizeof *fpregs);
 	return (0);
 }
 
 int
 set_fpregs(p, fpregs)
 	struct proc *p;
 	struct fpreg *fpregs;
 {
 #ifdef CPU_ENABLE_SSE
 	if (cpu_fxsr) {
 		set_fpregs_xmm((struct save87 *)fpregs,
 					   &p->p_addr->u_pcb.pcb_save.sv_xmm);
 		return (0);
 	}
 #endif /* CPU_ENABLE_SSE */
 	bcopy(fpregs, &p->p_addr->u_pcb.pcb_save.sv_87, sizeof *fpregs);
 	return (0);
 }
 
 int
 fill_dbregs(p, dbregs)
 	struct proc *p;
 	struct dbreg *dbregs;
 {
 	struct pcb *pcb;
 
 	if (p == NULL) {
 		dbregs->dr0 = rdr0();
 		dbregs->dr1 = rdr1();
 		dbregs->dr2 = rdr2();
 		dbregs->dr3 = rdr3();
 		dbregs->dr4 = rdr4();
 		dbregs->dr5 = rdr5();
 		dbregs->dr6 = rdr6();
 		dbregs->dr7 = rdr7();
 	}
 	else {
 		pcb = &p->p_addr->u_pcb;
 		dbregs->dr0 = pcb->pcb_dr0;
 		dbregs->dr1 = pcb->pcb_dr1;
 		dbregs->dr2 = pcb->pcb_dr2;
 		dbregs->dr3 = pcb->pcb_dr3;
 		dbregs->dr4 = 0;
 		dbregs->dr5 = 0;
 		dbregs->dr6 = pcb->pcb_dr6;
 		dbregs->dr7 = pcb->pcb_dr7;
 	}
 	return (0);
 }
 
 int
 set_dbregs(p, dbregs)
 	struct proc *p;
 	struct dbreg *dbregs;
 {
 	struct pcb *pcb;
 	int i;
 	u_int32_t mask1, mask2;
 
 	if (p == NULL) {
 		load_dr0(dbregs->dr0);
 		load_dr1(dbregs->dr1);
 		load_dr2(dbregs->dr2);
 		load_dr3(dbregs->dr3);
 		load_dr4(dbregs->dr4);
 		load_dr5(dbregs->dr5);
 		load_dr6(dbregs->dr6);
 		load_dr7(dbregs->dr7);
 	}
 	else {
 		/*
 		 * Don't let an illegal value for dr7 get set.	Specifically,
 		 * check for undefined settings.  Setting these bit patterns
 		 * result in undefined behaviour and can lead to an unexpected
 		 * TRCTRAP.
 		 */
 		for (i = 0, mask1 = 0x3<<16, mask2 = 0x2<<16; i < 8; 
 		     i++, mask1 <<= 2, mask2 <<= 2)
 			if ((dbregs->dr7 & mask1) == mask2)
 				return (EINVAL);
 		
 		pcb = &p->p_addr->u_pcb;
 		
 		/*
 		 * Don't let a process set a breakpoint that is not within the
 		 * process's address space.  If a process could do this, it
 		 * could halt the system by setting a breakpoint in the kernel
 		 * (if ddb was enabled).  Thus, we need to check to make sure
 		 * that no breakpoints are being enabled for addresses outside
 		 * process's address space, unless, perhaps, we were called by
 		 * uid 0.
 		 *
 		 * XXX - what about when the watched area of the user's
 		 * address space is written into from within the kernel
 		 * ... wouldn't that still cause a breakpoint to be generated
 		 * from within kernel mode?
 		 */
 
 		if (suser(p) != 0) {
 			if (dbregs->dr7 & 0x3) {
 				/* dr0 is enabled */
 				if (dbregs->dr0 >= VM_MAXUSER_ADDRESS)
 					return (EINVAL);
 			}
 			
 			if (dbregs->dr7 & (0x3<<2)) {
 				/* dr1 is enabled */
 				if (dbregs->dr1 >= VM_MAXUSER_ADDRESS)
 					return (EINVAL);
 			}
 			
 			if (dbregs->dr7 & (0x3<<4)) {
 				/* dr2 is enabled */
 				if (dbregs->dr2 >= VM_MAXUSER_ADDRESS)
 					return (EINVAL);
 			}
 			
 			if (dbregs->dr7 & (0x3<<6)) {
 				/* dr3 is enabled */
 				if (dbregs->dr3 >= VM_MAXUSER_ADDRESS)
 					return (EINVAL);
 			}
 		}
 
 		pcb->pcb_dr0 = dbregs->dr0;
 		pcb->pcb_dr1 = dbregs->dr1;
 		pcb->pcb_dr2 = dbregs->dr2;
 		pcb->pcb_dr3 = dbregs->dr3;
 		pcb->pcb_dr6 = dbregs->dr6;
 		pcb->pcb_dr7 = dbregs->dr7;
 
 		pcb->pcb_flags |= PCB_DBREGS;
 	}
 
 	return (0);
 }
 
 /*
  * Return > 0 if a hardware breakpoint has been hit, and the
  * breakpoint was in user space.  Return 0, otherwise.
  */
 int
 user_dbreg_trap(void)
 {
         u_int32_t dr7, dr6; /* debug registers dr6 and dr7 */
         u_int32_t bp;       /* breakpoint bits extracted from dr6 */
         int nbp;            /* number of breakpoints that triggered */
         caddr_t addr[4];    /* breakpoint addresses */
         int i;
         
         dr7 = rdr7();
         if ((dr7 & 0x000000ff) == 0) {
                 /*
                  * all GE and LE bits in the dr7 register are zero,
                  * thus the trap couldn't have been caused by the
                  * hardware debug registers
                  */
                 return 0;
         }
 
         nbp = 0;
         dr6 = rdr6();
         bp = dr6 & 0x0000000f;
 
         if (!bp) {
                 /*
                  * None of the breakpoint bits are set meaning this
                  * trap was not caused by any of the debug registers
                  */
                 return 0;
         }
 
         /*
          * at least one of the breakpoints were hit, check to see
          * which ones and if any of them are user space addresses
          */
 
         if (bp & 0x01) {
                 addr[nbp++] = (caddr_t)rdr0();
         }
         if (bp & 0x02) {
                 addr[nbp++] = (caddr_t)rdr1();
         }
         if (bp & 0x04) {
                 addr[nbp++] = (caddr_t)rdr2();
         }
         if (bp & 0x08) {
                 addr[nbp++] = (caddr_t)rdr3();
         }
 
         for (i=0; i<nbp; i++) {
                 if (addr[i] <
                     (caddr_t)VM_MAXUSER_ADDRESS) {
                         /*
                          * addr[i] is in user space
                          */
                         return nbp;
                 }
         }
 
         /*
          * None of the breakpoints are in user space.
          */
         return 0;
 }
 
 
 #ifndef DDB
 void
 Debugger(const char *msg)
 {
 	printf("Debugger(\"%s\") called.\n", msg);
 }
 #endif /* no DDB */
 
 #include <sys/disklabel.h>
 
 /*
  * Determine the size of the transfer, and make sure it is
  * within the boundaries of the partition. Adjust transfer
  * if needed, and signal errors or early completion.
  */
 int
 bounds_check_with_label(struct bio *bp, struct disklabel *lp, int wlabel)
 {
         struct partition *p = lp->d_partitions + dkpart(bp->bio_dev);
         int labelsect = lp->d_partitions[0].p_offset;
         int maxsz = p->p_size,
                 sz = (bp->bio_bcount + DEV_BSIZE - 1) >> DEV_BSHIFT;
 
         /* overwriting disk label ? */
         /* XXX should also protect bootstrap in first 8K */
         if (bp->bio_blkno + p->p_offset <= LABELSECTOR + labelsect &&
 #if LABELSECTOR != 0
             bp->bio_blkno + p->p_offset + sz > LABELSECTOR + labelsect &&
 #endif
             (bp->bio_cmd == BIO_WRITE) && wlabel == 0) {
                 bp->bio_error = EROFS;
                 goto bad;
         }
 
 #if     defined(DOSBBSECTOR) && defined(notyet)
         /* overwriting master boot record? */
         if (bp->bio_blkno + p->p_offset <= DOSBBSECTOR &&
             (bp->bio_cmd == BIO_WRITE) && wlabel == 0) {
                 bp->bio_error = EROFS;
                 goto bad;
         }
 #endif
 
         /* beyond partition? */
         if (bp->bio_blkno < 0 || bp->bio_blkno + sz > maxsz) {
                 /* if exactly at end of disk, return an EOF */
                 if (bp->bio_blkno == maxsz) {
                         bp->bio_resid = bp->bio_bcount;
                         return(0);
                 }
                 /* or truncate if part of it fits */
                 sz = maxsz - bp->bio_blkno;
                 if (sz <= 0) {
                         bp->bio_error = EINVAL;
                         goto bad;
                 }
                 bp->bio_bcount = sz << DEV_BSHIFT;
         }
 
         bp->bio_pblkno = bp->bio_blkno + p->p_offset;
         return(1);
 
 bad:
         bp->bio_flags |= BIO_ERROR;
         return(-1);
 }
 
 #ifdef DDB
 
 /*
  * Provide inb() and outb() as functions.  They are normally only
  * available as macros calling inlined functions, thus cannot be
  * called inside DDB.
  *
  * The actual code is stolen from <machine/cpufunc.h>, and de-inlined.
  */
 
 #undef inb
 #undef outb
 
 /* silence compiler warnings */
 u_char inb(u_int);
 void outb(u_int, u_char);
 
 u_char
 inb(u_int port)
 {
 	u_char	data;
 	/*
 	 * We use %%dx and not %1 here because i/o is done at %dx and not at
 	 * %edx, while gcc generates inferior code (movw instead of movl)
 	 * if we tell it to load (u_short) port.
 	 */
 	__asm __volatile("inb %%dx,%0" : "=a" (data) : "d" (port));
 	return (data);
 }
 
 void
 outb(u_int port, u_char data)
 {
 	u_char	al;
 	/*
 	 * Use an unnecessary assignment to help gcc's register allocator.
 	 * This make a large difference for gcc-1.40 and a tiny difference
 	 * for gcc-2.6.0.  For gcc-1.40, al had to be ``asm("ax")'' for
 	 * best results.  gcc-2.6.0 can't handle this.
 	 */
 	al = data;
 	__asm __volatile("outb %0,%%dx" : : "a" (al), "d" (port));
 }
 
 #endif /* DDB */
Index: head/sys/i386/i386/mp_machdep.c
===================================================================
--- head/sys/i386/i386/mp_machdep.c	(revision 82308)
+++ head/sys/i386/i386/mp_machdep.c	(revision 82309)
@@ -1,2440 +1,2442 @@
 /*
  * Copyright (c) 1996, by Steve Passe
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. The name of the developer may NOT be used to endorse or promote products
  *    derived from this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #include "opt_cpu.h"
+#include "opt_upages.h"
 
 #ifdef SMP
 #include <machine/smptests.h>
 #else
 #error
 #endif
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/bus.h>
 #include <sys/cons.h>	/* cngetc() */
 #include <sys/dkstat.h>
 #ifdef GPROF 
 #include <sys/gmon.h>
 #endif
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/memrange.h>
 #include <sys/mutex.h>
 #include <sys/pcpu.h>
 #include <sys/proc.h>
 #include <sys/smp.h>
 #include <sys/sysctl.h>
 #include <sys/user.h>
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/pmap.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_map.h>
 
 #include <machine/apic.h>
 #include <machine/atomic.h>
 #include <machine/cpu.h>
 #include <machine/cpufunc.h>
 #include <machine/mpapic.h>
 #include <machine/psl.h>
 #include <machine/segments.h>
 #include <machine/smptests.h>	/** TEST_DEFAULT_CONFIG, TEST_TEST1 */
 #include <machine/tss.h>
 #include <machine/specialreg.h>
 #include <machine/globaldata.h>
+#include <machine/privatespace.h>
 
 #if defined(APIC_IO)
 #include <machine/md_var.h>		/* setidt() */
 #include <i386/isa/icu.h>		/* IPIs */
 #include <i386/isa/intr_machdep.h>	/* IPIs */
 #endif	/* APIC_IO */
 
 #if defined(TEST_DEFAULT_CONFIG)
 #define MPFPS_MPFB1	TEST_DEFAULT_CONFIG
 #else
 #define MPFPS_MPFB1	mpfps->mpfb1
 #endif  /* TEST_DEFAULT_CONFIG */
 
 #define WARMBOOT_TARGET		0
 #define WARMBOOT_OFF		(KERNBASE + 0x0467)
 #define WARMBOOT_SEG		(KERNBASE + 0x0469)
 
 #ifdef PC98
 #define BIOS_BASE		(0xe8000)
 #define BIOS_SIZE		(0x18000)
 #else
 #define BIOS_BASE		(0xf0000)
 #define BIOS_SIZE		(0x10000)
 #endif
 #define BIOS_COUNT		(BIOS_SIZE/4)
 
 #define CMOS_REG		(0x70)
 #define CMOS_DATA		(0x71)
 #define BIOS_RESET		(0x0f)
 #define BIOS_WARM		(0x0a)
 
 #define PROCENTRY_FLAG_EN	0x01
 #define PROCENTRY_FLAG_BP	0x02
 #define IOAPICENTRY_FLAG_EN	0x01
 
 
 /* MP Floating Pointer Structure */
 typedef struct MPFPS {
 	char    signature[4];
 	void   *pap;
 	u_char  length;
 	u_char  spec_rev;
 	u_char  checksum;
 	u_char  mpfb1;
 	u_char  mpfb2;
 	u_char  mpfb3;
 	u_char  mpfb4;
 	u_char  mpfb5;
 }      *mpfps_t;
 
 /* MP Configuration Table Header */
 typedef struct MPCTH {
 	char    signature[4];
 	u_short base_table_length;
 	u_char  spec_rev;
 	u_char  checksum;
 	u_char  oem_id[8];
 	u_char  product_id[12];
 	void   *oem_table_pointer;
 	u_short oem_table_size;
 	u_short entry_count;
 	void   *apic_address;
 	u_short extended_table_length;
 	u_char  extended_table_checksum;
 	u_char  reserved;
 }      *mpcth_t;
 
 
 typedef struct PROCENTRY {
 	u_char  type;
 	u_char  apic_id;
 	u_char  apic_version;
 	u_char  cpu_flags;
 	u_long  cpu_signature;
 	u_long  feature_flags;
 	u_long  reserved1;
 	u_long  reserved2;
 }      *proc_entry_ptr;
 
 typedef struct BUSENTRY {
 	u_char  type;
 	u_char  bus_id;
 	char    bus_type[6];
 }      *bus_entry_ptr;
 
 typedef struct IOAPICENTRY {
 	u_char  type;
 	u_char  apic_id;
 	u_char  apic_version;
 	u_char  apic_flags;
 	void   *apic_address;
 }      *io_apic_entry_ptr;
 
 typedef struct INTENTRY {
 	u_char  type;
 	u_char  int_type;
 	u_short int_flags;
 	u_char  src_bus_id;
 	u_char  src_bus_irq;
 	u_char  dst_apic_id;
 	u_char  dst_apic_int;
 }      *int_entry_ptr;
 
 /* descriptions of MP basetable entries */
 typedef struct BASETABLE_ENTRY {
 	u_char  type;
 	u_char  length;
 	char    name[16];
 }       basetable_entry;
 
 /*
  * this code MUST be enabled here and in mpboot.s.
  * it follows the very early stages of AP boot by placing values in CMOS ram.
  * it NORMALLY will never be needed and thus the primitive method for enabling.
  *
 #define CHECK_POINTS
  */
 
 #if defined(CHECK_POINTS) && !defined(PC98)
 #define CHECK_READ(A)	 (outb(CMOS_REG, (A)), inb(CMOS_DATA))
 #define CHECK_WRITE(A,D) (outb(CMOS_REG, (A)), outb(CMOS_DATA, (D)))
 
 #define CHECK_INIT(D);				\
 	CHECK_WRITE(0x34, (D));			\
 	CHECK_WRITE(0x35, (D));			\
 	CHECK_WRITE(0x36, (D));			\
 	CHECK_WRITE(0x37, (D));			\
 	CHECK_WRITE(0x38, (D));			\
 	CHECK_WRITE(0x39, (D));
 
 #define CHECK_PRINT(S);				\
 	printf("%s: %d, %d, %d, %d, %d, %d\n",	\
 	   (S),					\
 	   CHECK_READ(0x34),			\
 	   CHECK_READ(0x35),			\
 	   CHECK_READ(0x36),			\
 	   CHECK_READ(0x37),			\
 	   CHECK_READ(0x38),			\
 	   CHECK_READ(0x39));
 
 #else				/* CHECK_POINTS */
 
 #define CHECK_INIT(D)
 #define CHECK_PRINT(S)
 
 #endif				/* CHECK_POINTS */
 
 /*
  * Values to send to the POST hardware.
  */
 #define MP_BOOTADDRESS_POST	0x10
 #define MP_PROBE_POST		0x11
 #define MPTABLE_PASS1_POST	0x12
 
 #define MP_START_POST		0x13
 #define MP_ENABLE_POST		0x14
 #define MPTABLE_PASS2_POST	0x15
 
 #define START_ALL_APS_POST	0x16
 #define INSTALL_AP_TRAMP_POST	0x17
 #define START_AP_POST		0x18
 
 #define MP_ANNOUNCE_POST	0x19
 
 /* used to hold the AP's until we are ready to release them */
 static struct mtx ap_boot_mtx;
 
 /** XXX FIXME: where does this really belong, isa.h/isa.c perhaps? */
 int	current_postcode;
 
 /** XXX FIXME: what system files declare these??? */
 extern struct region_descriptor r_gdt, r_idt;
 
 int	bsp_apic_ready = 0;	/* flags useability of BSP apic */
 int	mp_naps;		/* # of Applications processors */
 int	mp_nbusses;		/* # of busses */
 int	mp_napics;		/* # of IO APICs */
 int	boot_cpu_id;		/* designated BSP */
 vm_offset_t cpu_apic_address;
 vm_offset_t io_apic_address[NAPICID];	/* NAPICID is more than enough */
 extern	int nkpt;
 
 u_int32_t cpu_apic_versions[MAXCPU];
 u_int32_t *io_apic_versions;
 
 #ifdef APIC_INTR_REORDER
 struct {
 	volatile int *location;
 	int bit;
 } apic_isrbit_location[32];
 #endif
 
 struct apic_intmapinfo	int_to_apicintpin[APIC_INTMAPSIZE];
 
 /*
  * APIC ID logical/physical mapping structures.
  * We oversize these to simplify boot-time config.
  */
 int     cpu_num_to_apic_id[NAPICID];
 int     io_num_to_apic_id[NAPICID];
 int     apic_id_to_logical[NAPICID];
 
 
 /* AP uses this during bootstrap.  Do not staticize.  */
 char *bootSTK;
 static int bootAP;
 
 /* Hotwire a 0->4MB V==P mapping */
 extern pt_entry_t *KPTphys;
 
 /* SMP page table page */
 extern pt_entry_t *SMPpt;
 
 struct pcb stoppcbs[MAXCPU];
 
 int invltlb_ok = 0;	/* throttle smp_invltlb() till safe */
 SYSCTL_INT(_machdep, OID_AUTO, invltlb_ok, CTLFLAG_RW, &invltlb_ok, 0, "");
 
 /*
  * Local data and functions.
  */
 
 /* Set to 1 once we're ready to let the APs out of the pen. */
 static volatile int aps_ready = 0;
 
 static int	mp_capable;
 static u_int	boot_address;
 static u_int	base_memory;
 
 static int	picmode;		/* 0: virtual wire mode, 1: PIC mode */
 static mpfps_t	mpfps;
 static int	search_for_sig(u_int32_t target, int count);
 static void	mp_enable(u_int boot_addr);
 
 static void	mptable_pass1(void);
 static int	mptable_pass2(void);
 static void	default_mp_table(int type);
 static void	fix_mp_table(void);
 static void	setup_apic_irq_mapping(void);
 static void	init_locks(void);
 static int	start_all_aps(u_int boot_addr);
 static void	install_ap_tramp(u_int boot_addr);
 static int	start_ap(int logicalCpu, u_int boot_addr);
 void		ap_init(void);
 static int	apic_int_is_bus_type(int intr, int bus_type);
 static void	release_aps(void *dummy);
 
 /*
  * initialize all the SMP locks
  */
 
 /* critical region around IO APIC, apic_imen */
 struct mtx		imen_mtx;
 
 /* lock region used by kernel profiling */
 int	mcount_lock;
 
 #ifdef USE_COMLOCK
 /* locks com (tty) data/hardware accesses: a FASTINTR() */
 struct mtx		com_mtx;
 #endif /* USE_COMLOCK */
 
 static void
 init_locks(void)
 {
 
 #ifdef USE_COMLOCK
 	mtx_init(&com_mtx, "com", MTX_SPIN);
 #endif /* USE_COMLOCK */
 }
 
 /*
  * Calculate usable address in base memory for AP trampoline code.
  */
 u_int
 mp_bootaddress(u_int basemem)
 {
 	POSTCODE(MP_BOOTADDRESS_POST);
 
 	base_memory = basemem * 1024;	/* convert to bytes */
 
 	boot_address = base_memory & ~0xfff;	/* round down to 4k boundary */
 	if ((base_memory - boot_address) < bootMP_size)
 		boot_address -= 4096;	/* not enough, lower by 4k */
 
 	return boot_address;
 }
 
 
 /*
  * Look for an Intel MP spec table (ie, SMP capable hardware).
  */
 void
 i386_mp_probe(void)
 {
 	int     x;
 	u_long  segment;
 	u_int32_t target;
 
 	POSTCODE(MP_PROBE_POST);
 
 	/* see if EBDA exists */
 	if ((segment = (u_long) * (u_short *) (KERNBASE + 0x40e)) != 0) {
 		/* search first 1K of EBDA */
 		target = (u_int32_t) (segment << 4);
 		if ((x = search_for_sig(target, 1024 / 4)) >= 0)
 			goto found;
 	} else {
 		/* last 1K of base memory, effective 'top of base' passed in */
 		target = (u_int32_t) (base_memory - 0x400);
 		if ((x = search_for_sig(target, 1024 / 4)) >= 0)
 			goto found;
 	}
 
 	/* search the BIOS */
 	target = (u_int32_t) BIOS_BASE;
 	if ((x = search_for_sig(target, BIOS_COUNT)) >= 0)
 		goto found;
 
 	/* nothing found */
 	mpfps = (mpfps_t)0;
 	mp_capable = 0;
 	return;
 
 found:
 	/* calculate needed resources */
 	mpfps = (mpfps_t)x;
 	mptable_pass1();
 
 	/* flag fact that we are running multiple processors */
 	mp_capable = 1;
 }
 
 int
 cpu_mp_probe(void)
 {
 	/*
 	 * Record BSP in CPU map
 	 * This is done here so that MBUF init code works correctly.
 	 */
 	all_cpus = 1;
 
 	return (mp_capable);
 }
 
 /*
  * Initialize the SMP hardware and the APIC and start up the AP's.
  */
 void
 cpu_mp_start(void)
 {
 	POSTCODE(MP_START_POST);
 
 	/* look for MP capable motherboard */
 	if (mp_capable)
 		mp_enable(boot_address);
 	else
 		panic("MP hardware not found!");
 
 	cpu_setregs();
 }
 
 
 /*
  * Print various information about the SMP system hardware and setup.
  */
 void
 cpu_mp_announce(void)
 {
 	int     x;
 
 	POSTCODE(MP_ANNOUNCE_POST);
 
 	printf(" cpu0 (BSP): apic id: %2d", CPU_TO_ID(0));
 	printf(", version: 0x%08x", cpu_apic_versions[0]);
 	printf(", at 0x%08x\n", cpu_apic_address);
 	for (x = 1; x <= mp_naps; ++x) {
 		printf(" cpu%d (AP):  apic id: %2d", x, CPU_TO_ID(x));
 		printf(", version: 0x%08x", cpu_apic_versions[x]);
 		printf(", at 0x%08x\n", cpu_apic_address);
 	}
 
 #if defined(APIC_IO)
 	for (x = 0; x < mp_napics; ++x) {
 		printf(" io%d (APIC): apic id: %2d", x, IO_TO_ID(x));
 		printf(", version: 0x%08x", io_apic_versions[x]);
 		printf(", at 0x%08x\n", io_apic_address[x]);
 	}
 #else
 	printf(" Warning: APIC I/O disabled\n");
 #endif	/* APIC_IO */
 }
 
 /*
  * AP cpu's call this to sync up protected mode.
  */
 void
 init_secondary(void)
 {
 	int	gsel_tss;
 	int	x, myid = bootAP;
 
 	gdt_segs[GPRIV_SEL].ssd_base = (int) &SMP_prvspace[myid];
 	gdt_segs[GPROC0_SEL].ssd_base =
 		(int) &SMP_prvspace[myid].globaldata.gd_common_tss;
 	SMP_prvspace[myid].globaldata.gd_prvspace =
 		&SMP_prvspace[myid].globaldata;
 
 	for (x = 0; x < NGDT; x++) {
 		ssdtosd(&gdt_segs[x], &gdt[myid * NGDT + x].sd);
 	}
 
 	r_gdt.rd_limit = NGDT * sizeof(gdt[0]) - 1;
 	r_gdt.rd_base = (int) &gdt[myid * NGDT];
 	lgdt(&r_gdt);			/* does magic intra-segment return */
 
 	lidt(&r_idt);
 
 	lldt(_default_ldt);
 	PCPU_SET(currentldt, _default_ldt);
 
 	gsel_tss = GSEL(GPROC0_SEL, SEL_KPL);
 	gdt[myid * NGDT + GPROC0_SEL].sd.sd_type = SDT_SYS386TSS;
 	PCPU_SET(common_tss.tss_esp0, 0); /* not used until after switch */
 	PCPU_SET(common_tss.tss_ss0, GSEL(GDATA_SEL, SEL_KPL));
 	PCPU_SET(common_tss.tss_ioopt, (sizeof (struct i386tss)) << 16);
 	PCPU_SET(tss_gdt, &gdt[myid * NGDT + GPROC0_SEL].sd);
 	PCPU_SET(common_tssd, *PCPU_GET(tss_gdt));
 	ltr(gsel_tss);
 
 	pmap_set_opt();
 }
 
 
 #if defined(APIC_IO)
 /*
  * Final configuration of the BSP's local APIC:
  *  - disable 'pic mode'.
  *  - disable 'virtual wire mode'.
  *  - enable NMI.
  */
 void
 bsp_apic_configure(void)
 {
 	u_char		byte;
 	u_int32_t	temp;
 
 	/* leave 'pic mode' if necessary */
 	if (picmode) {
 		outb(0x22, 0x70);	/* select IMCR */
 		byte = inb(0x23);	/* current contents */
 		byte |= 0x01;		/* mask external INTR */
 		outb(0x23, byte);	/* disconnect 8259s/NMI */
 	}
 
 	/* mask lint0 (the 8259 'virtual wire' connection) */
 	temp = lapic.lvt_lint0;
 	temp |= APIC_LVT_M;		/* set the mask */
 	lapic.lvt_lint0 = temp;
 
         /* setup lint1 to handle NMI */
         temp = lapic.lvt_lint1;
         temp &= ~APIC_LVT_M;		/* clear the mask */
         lapic.lvt_lint1 = temp;
 
 	if (bootverbose)
 		apic_dump("bsp_apic_configure()");
 }
 #endif  /* APIC_IO */
 
 
 /*******************************************************************
  * local functions and data
  */
 
 /*
  * start the SMP system
  */
 static void
 mp_enable(u_int boot_addr)
 {
 	int     x;
 #if defined(APIC_IO)
 	int     apic;
 	u_int   ux;
 #endif	/* APIC_IO */
 
 	POSTCODE(MP_ENABLE_POST);
 
 	/* turn on 4MB of V == P addressing so we can get to MP table */
 	*(int *)PTD = PG_V | PG_RW | ((uintptr_t)(void *)KPTphys & PG_FRAME);
 	invltlb();
 
 	/* examine the MP table for needed info, uses physical addresses */
 	x = mptable_pass2();
 
 	*(int *)PTD = 0;
 	invltlb();
 
 	/* can't process default configs till the CPU APIC is pmapped */
 	if (x)
 		default_mp_table(x);
 
 	/* post scan cleanup */
 	fix_mp_table();
 	setup_apic_irq_mapping();
 
 #if defined(APIC_IO)
 
 	/* fill the LOGICAL io_apic_versions table */
 	for (apic = 0; apic < mp_napics; ++apic) {
 		ux = io_apic_read(apic, IOAPIC_VER);
 		io_apic_versions[apic] = ux;
 		io_apic_set_id(apic, IO_TO_ID(apic));
 	}
 
 	/* program each IO APIC in the system */
 	for (apic = 0; apic < mp_napics; ++apic)
 		if (io_apic_setup(apic) < 0)
 			panic("IO APIC setup failure");
 
 	/* install a 'Spurious INTerrupt' vector */
 	setidt(XSPURIOUSINT_OFFSET, Xspuriousint,
 	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
 
 	/* install an inter-CPU IPI for TLB invalidation */
 	setidt(XINVLTLB_OFFSET, Xinvltlb,
 	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
 
 	/* install an inter-CPU IPI for forwarding hardclock() */
 	setidt(XHARDCLOCK_OFFSET, Xhardclock,
 	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
 	
 	/* install an inter-CPU IPI for forwarding statclock() */
 	setidt(XSTATCLOCK_OFFSET, Xstatclock,
 	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
 	
 	/* install an inter-CPU IPI for all-CPU rendezvous */
 	setidt(XRENDEZVOUS_OFFSET, Xrendezvous,
 	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
 
 	/* install an inter-CPU IPI for forcing an additional software trap */
 	setidt(XCPUAST_OFFSET, Xcpuast,
 	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
 
 	/* install an inter-CPU IPI for CPU stop/restart */
 	setidt(XCPUSTOP_OFFSET, Xcpustop,
 	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
 
 #if defined(TEST_TEST1)
 	/* install a "fake hardware INTerrupt" vector */
 	setidt(XTEST1_OFFSET, Xtest1,
 	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
 #endif  /** TEST_TEST1 */
 
 #endif	/* APIC_IO */
 
 	/* initialize all SMP locks */
 	init_locks();
 
 	/* start each Application Processor */
 	start_all_aps(boot_addr);
 }
 
 
 /*
  * look for the MP spec signature
  */
 
 /* string defined by the Intel MP Spec as identifying the MP table */
 #define MP_SIG		0x5f504d5f	/* _MP_ */
 #define NEXT(X)		((X) += 4)
 static int
 search_for_sig(u_int32_t target, int count)
 {
 	int     x;
 	u_int32_t *addr = (u_int32_t *) (KERNBASE + target);
 
 	for (x = 0; x < count; NEXT(x))
 		if (addr[x] == MP_SIG)
 			/* make array index a byte index */
 			return (target + (x * sizeof(u_int32_t)));
 
 	return -1;
 }
 
 
 static basetable_entry basetable_entry_types[] =
 {
 	{0, 20, "Processor"},
 	{1, 8, "Bus"},
 	{2, 8, "I/O APIC"},
 	{3, 8, "I/O INT"},
 	{4, 8, "Local INT"}
 };
 
 typedef struct BUSDATA {
 	u_char  bus_id;
 	enum busTypes bus_type;
 }       bus_datum;
 
 typedef struct INTDATA {
 	u_char  int_type;
 	u_short int_flags;
 	u_char  src_bus_id;
 	u_char  src_bus_irq;
 	u_char  dst_apic_id;
 	u_char  dst_apic_int;
 	u_char	int_vector;
 }       io_int, local_int;
 
 typedef struct BUSTYPENAME {
 	u_char  type;
 	char    name[7];
 }       bus_type_name;
 
 static bus_type_name bus_type_table[] =
 {
 	{CBUS, "CBUS"},
 	{CBUSII, "CBUSII"},
 	{EISA, "EISA"},
 	{MCA, "MCA"},
 	{UNKNOWN_BUSTYPE, "---"},
 	{ISA, "ISA"},
 	{MCA, "MCA"},
 	{UNKNOWN_BUSTYPE, "---"},
 	{UNKNOWN_BUSTYPE, "---"},
 	{UNKNOWN_BUSTYPE, "---"},
 	{UNKNOWN_BUSTYPE, "---"},
 	{UNKNOWN_BUSTYPE, "---"},
 	{PCI, "PCI"},
 	{UNKNOWN_BUSTYPE, "---"},
 	{UNKNOWN_BUSTYPE, "---"},
 	{UNKNOWN_BUSTYPE, "---"},
 	{UNKNOWN_BUSTYPE, "---"},
 	{XPRESS, "XPRESS"},
 	{UNKNOWN_BUSTYPE, "---"}
 };
 /* from MP spec v1.4, table 5-1 */
 static int default_data[7][5] =
 {
 /*   nbus, id0, type0, id1, type1 */
 	{1, 0, ISA, 255, 255},
 	{1, 0, EISA, 255, 255},
 	{1, 0, EISA, 255, 255},
 	{1, 0, MCA, 255, 255},
 	{2, 0, ISA, 1, PCI},
 	{2, 0, EISA, 1, PCI},
 	{2, 0, MCA, 1, PCI}
 };
 
 
 /* the bus data */
 static bus_datum *bus_data;
 
 /* the IO INT data, one entry per possible APIC INTerrupt */
 static io_int  *io_apic_ints;
 
 static int nintrs;
 
 static int processor_entry	__P((proc_entry_ptr entry, int cpu));
 static int bus_entry		__P((bus_entry_ptr entry, int bus));
 static int io_apic_entry	__P((io_apic_entry_ptr entry, int apic));
 static int int_entry		__P((int_entry_ptr entry, int intr));
 static int lookup_bus_type	__P((char *name));
 
 
 /*
  * 1st pass on motherboard's Intel MP specification table.
  *
  * initializes:
  *	mp_ncpus = 1
  *
  * determines:
  *	cpu_apic_address (common to all CPUs)
  *	io_apic_address[N]
  *	mp_naps
  *	mp_nbusses
  *	mp_napics
  *	nintrs
  */
 static void
 mptable_pass1(void)
 {
 	int	x;
 	mpcth_t	cth;
 	int	totalSize;
 	void*	position;
 	int	count;
 	int	type;
 
 	POSTCODE(MPTABLE_PASS1_POST);
 
 	/* clear various tables */
 	for (x = 0; x < NAPICID; ++x) {
 		io_apic_address[x] = ~0;	/* IO APIC address table */
 	}
 
 	/* init everything to empty */
 	mp_naps = 0;
 	mp_nbusses = 0;
 	mp_napics = 0;
 	nintrs = 0;
 
 	/* check for use of 'default' configuration */
 	if (MPFPS_MPFB1 != 0) {
 		/* use default addresses */
 		cpu_apic_address = DEFAULT_APIC_BASE;
 		io_apic_address[0] = DEFAULT_IO_APIC_BASE;
 
 		/* fill in with defaults */
 		mp_naps = 2;		/* includes BSP */
 		mp_nbusses = default_data[MPFPS_MPFB1 - 1][0];
 #if defined(APIC_IO)
 		mp_napics = 1;
 		nintrs = 16;
 #endif	/* APIC_IO */
 	}
 	else {
 		if ((cth = mpfps->pap) == 0)
 			panic("MP Configuration Table Header MISSING!");
 
 		cpu_apic_address = (vm_offset_t) cth->apic_address;
 
 		/* walk the table, recording info of interest */
 		totalSize = cth->base_table_length - sizeof(struct MPCTH);
 		position = (u_char *) cth + sizeof(struct MPCTH);
 		count = cth->entry_count;
 
 		while (count--) {
 			switch (type = *(u_char *) position) {
 			case 0: /* processor_entry */
 				if (((proc_entry_ptr)position)->cpu_flags
 					& PROCENTRY_FLAG_EN)
 					++mp_naps;
 				break;
 			case 1: /* bus_entry */
 				++mp_nbusses;
 				break;
 			case 2: /* io_apic_entry */
 				if (((io_apic_entry_ptr)position)->apic_flags
 					& IOAPICENTRY_FLAG_EN)
 					io_apic_address[mp_napics++] =
 					    (vm_offset_t)((io_apic_entry_ptr)
 						position)->apic_address;
 				break;
 			case 3: /* int_entry */
 				++nintrs;
 				break;
 			case 4:	/* int_entry */
 				break;
 			default:
 				panic("mpfps Base Table HOSED!");
 				/* NOTREACHED */
 			}
 
 			totalSize -= basetable_entry_types[type].length;
 			(u_char*)position += basetable_entry_types[type].length;
 		}
 	}
 
 	/* qualify the numbers */
 	if (mp_naps > MAXCPU) {
 		printf("Warning: only using %d of %d available CPUs!\n",
 			MAXCPU, mp_naps);
 		mp_naps = MAXCPU;
 	}
 
 	/*
 	 * Count the BSP.
 	 * This is also used as a counter while starting the APs.
 	 */
 	mp_ncpus = 1;
 
 	--mp_naps;	/* subtract the BSP */
 }
 
 
 /*
  * 2nd pass on motherboard's Intel MP specification table.
  *
  * sets:
  *	boot_cpu_id
  *	ID_TO_IO(N), phy APIC ID to log CPU/IO table
  *	CPU_TO_ID(N), logical CPU to APIC ID table
  *	IO_TO_ID(N), logical IO to APIC ID table
  *	bus_data[N]
  *	io_apic_ints[N]
  */
 static int
 mptable_pass2(void)
 {
 	int     x;
 	mpcth_t cth;
 	int     totalSize;
 	void*   position;
 	int     count;
 	int     type;
 	int     apic, bus, cpu, intr;
 	int	i, j;
 	int	pgeflag;
 
 	POSTCODE(MPTABLE_PASS2_POST);
 
 	pgeflag = 0;		/* XXX - Not used under SMP yet.  */
 
 	MALLOC(io_apic_versions, u_int32_t *, sizeof(u_int32_t) * mp_napics,
 	    M_DEVBUF, M_WAITOK);
 	MALLOC(ioapic, volatile ioapic_t **, sizeof(ioapic_t *) * mp_napics,
 	    M_DEVBUF, M_WAITOK);
 	MALLOC(io_apic_ints, io_int *, sizeof(io_int) * (nintrs + 1),
 	    M_DEVBUF, M_WAITOK);
 	MALLOC(bus_data, bus_datum *, sizeof(bus_datum) * mp_nbusses,
 	    M_DEVBUF, M_WAITOK);
 
 	bzero(ioapic, sizeof(ioapic_t *) * mp_napics);
 
 	for (i = 0; i < mp_napics; i++) {
 		for (j = 0; j < mp_napics; j++) {
 			/* same page frame as a previous IO apic? */
 			if (((vm_offset_t)SMPpt[NPTEPG-2-j] & PG_FRAME) ==
 			    (io_apic_address[i] & PG_FRAME)) {
 				ioapic[i] = (ioapic_t *)((u_int)SMP_prvspace
 					+ (NPTEPG-2-j) * PAGE_SIZE
 					+ (io_apic_address[i] & PAGE_MASK));
 				break;
 			}
 			/* use this slot if available */
 			if (((vm_offset_t)SMPpt[NPTEPG-2-j] & PG_FRAME) == 0) {
 				SMPpt[NPTEPG-2-j] = (pt_entry_t)(PG_V | PG_RW |
 				    pgeflag | (io_apic_address[i] & PG_FRAME));
 				ioapic[i] = (ioapic_t *)((u_int)SMP_prvspace
 					+ (NPTEPG-2-j) * PAGE_SIZE
 					+ (io_apic_address[i] & PAGE_MASK));
 				break;
 			}
 		}
 	}
 
 	/* clear various tables */
 	for (x = 0; x < NAPICID; ++x) {
 		ID_TO_IO(x) = -1;	/* phy APIC ID to log CPU/IO table */
 		CPU_TO_ID(x) = -1;	/* logical CPU to APIC ID table */
 		IO_TO_ID(x) = -1;	/* logical IO to APIC ID table */
 	}
 
 	/* clear bus data table */
 	for (x = 0; x < mp_nbusses; ++x)
 		bus_data[x].bus_id = 0xff;
 
 	/* clear IO APIC INT table */
 	for (x = 0; x < (nintrs + 1); ++x) {
 		io_apic_ints[x].int_type = 0xff;
 		io_apic_ints[x].int_vector = 0xff;
 	}
 
 	/* setup the cpu/apic mapping arrays */
 	boot_cpu_id = -1;
 
 	/* record whether PIC or virtual-wire mode */
 	picmode = (mpfps->mpfb2 & 0x80) ? 1 : 0;
 
 	/* check for use of 'default' configuration */
 	if (MPFPS_MPFB1 != 0)
 		return MPFPS_MPFB1;	/* return default configuration type */
 
 	if ((cth = mpfps->pap) == 0)
 		panic("MP Configuration Table Header MISSING!");
 
 	/* walk the table, recording info of interest */
 	totalSize = cth->base_table_length - sizeof(struct MPCTH);
 	position = (u_char *) cth + sizeof(struct MPCTH);
 	count = cth->entry_count;
 	apic = bus = intr = 0;
 	cpu = 1;				/* pre-count the BSP */
 
 	while (count--) {
 		switch (type = *(u_char *) position) {
 		case 0:
 			if (processor_entry(position, cpu))
 				++cpu;
 			break;
 		case 1:
 			if (bus_entry(position, bus))
 				++bus;
 			break;
 		case 2:
 			if (io_apic_entry(position, apic))
 				++apic;
 			break;
 		case 3:
 			if (int_entry(position, intr))
 				++intr;
 			break;
 		case 4:
 			/* int_entry(position); */
 			break;
 		default:
 			panic("mpfps Base Table HOSED!");
 			/* NOTREACHED */
 		}
 
 		totalSize -= basetable_entry_types[type].length;
 		(u_char *) position += basetable_entry_types[type].length;
 	}
 
 	if (boot_cpu_id == -1)
 		panic("NO BSP found!");
 
 	/* report fact that its NOT a default configuration */
 	return 0;
 }
 
 
 void
 assign_apic_irq(int apic, int intpin, int irq)
 {
 	int x;
 	
 	if (int_to_apicintpin[irq].ioapic != -1)
 		panic("assign_apic_irq: inconsistent table");
 	
 	int_to_apicintpin[irq].ioapic = apic;
 	int_to_apicintpin[irq].int_pin = intpin;
 	int_to_apicintpin[irq].apic_address = ioapic[apic];
 	int_to_apicintpin[irq].redirindex = IOAPIC_REDTBL + 2 * intpin;
 	
 	for (x = 0; x < nintrs; x++) {
 		if ((io_apic_ints[x].int_type == 0 || 
 		     io_apic_ints[x].int_type == 3) &&
 		    io_apic_ints[x].int_vector == 0xff &&
 		    io_apic_ints[x].dst_apic_id == IO_TO_ID(apic) &&
 		    io_apic_ints[x].dst_apic_int == intpin)
 			io_apic_ints[x].int_vector = irq;
 	}
 }
 
 void
 revoke_apic_irq(int irq)
 {
 	int x;
 	int oldapic;
 	int oldintpin;
 	
 	if (int_to_apicintpin[irq].ioapic == -1)
 		panic("assign_apic_irq: inconsistent table");
 	
 	oldapic = int_to_apicintpin[irq].ioapic;
 	oldintpin = int_to_apicintpin[irq].int_pin;
 
 	int_to_apicintpin[irq].ioapic = -1;
 	int_to_apicintpin[irq].int_pin = 0;
 	int_to_apicintpin[irq].apic_address = NULL;
 	int_to_apicintpin[irq].redirindex = 0;
 	
 	for (x = 0; x < nintrs; x++) {
 		if ((io_apic_ints[x].int_type == 0 || 
 		     io_apic_ints[x].int_type == 3) &&
 		    io_apic_ints[x].int_vector == 0xff &&
 		    io_apic_ints[x].dst_apic_id == IO_TO_ID(oldapic) &&
 		    io_apic_ints[x].dst_apic_int == oldintpin)
 			io_apic_ints[x].int_vector = 0xff;
 	}
 }
 
 
 static void
 allocate_apic_irq(int intr)
 {
 	int apic;
 	int intpin;
 	int irq;
 	
 	if (io_apic_ints[intr].int_vector != 0xff)
 		return;		/* Interrupt handler already assigned */
 	
 	if (io_apic_ints[intr].int_type != 0 &&
 	    (io_apic_ints[intr].int_type != 3 ||
 	     (io_apic_ints[intr].dst_apic_id == IO_TO_ID(0) &&
 	      io_apic_ints[intr].dst_apic_int == 0)))
 		return;		/* Not INT or ExtInt on != (0, 0) */
 	
 	irq = 0;
 	while (irq < APIC_INTMAPSIZE &&
 	       int_to_apicintpin[irq].ioapic != -1)
 		irq++;
 	
 	if (irq >= APIC_INTMAPSIZE)
 		return;		/* No free interrupt handlers */
 	
 	apic = ID_TO_IO(io_apic_ints[intr].dst_apic_id);
 	intpin = io_apic_ints[intr].dst_apic_int;
 	
 	assign_apic_irq(apic, intpin, irq);
 	io_apic_setup_intpin(apic, intpin);
 }
 
 
 static void
 swap_apic_id(int apic, int oldid, int newid)
 {
 	int x;
 	int oapic;
 	
 
 	if (oldid == newid)
 		return;			/* Nothing to do */
 	
 	printf("Changing APIC ID for IO APIC #%d from %d to %d in MP table\n",
 	       apic, oldid, newid);
 	
 	/* Swap physical APIC IDs in interrupt entries */
 	for (x = 0; x < nintrs; x++) {
 		if (io_apic_ints[x].dst_apic_id == oldid)
 			io_apic_ints[x].dst_apic_id = newid;
 		else if (io_apic_ints[x].dst_apic_id == newid)
 			io_apic_ints[x].dst_apic_id = oldid;
 	}
 	
 	/* Swap physical APIC IDs in IO_TO_ID mappings */
 	for (oapic = 0; oapic < mp_napics; oapic++)
 		if (IO_TO_ID(oapic) == newid)
 			break;
 	
 	if (oapic < mp_napics) {
 		printf("Changing APIC ID for IO APIC #%d from "
 		       "%d to %d in MP table\n",
 		       oapic, newid, oldid);
 		IO_TO_ID(oapic) = oldid;
 	}
 	IO_TO_ID(apic) = newid;
 }
 
 
 static void
 fix_id_to_io_mapping(void)
 {
 	int x;
 
 	for (x = 0; x < NAPICID; x++)
 		ID_TO_IO(x) = -1;
 	
 	for (x = 0; x <= mp_naps; x++)
 		if (CPU_TO_ID(x) < NAPICID)
 			ID_TO_IO(CPU_TO_ID(x)) = x;
 	
 	for (x = 0; x < mp_napics; x++)
 		if (IO_TO_ID(x) < NAPICID)
 			ID_TO_IO(IO_TO_ID(x)) = x;
 }
 
 
 static int
 first_free_apic_id(void)
 {
 	int freeid, x;
 	
 	for (freeid = 0; freeid < NAPICID; freeid++) {
 		for (x = 0; x <= mp_naps; x++)
 			if (CPU_TO_ID(x) == freeid)
 				break;
 		if (x <= mp_naps)
 			continue;
 		for (x = 0; x < mp_napics; x++)
 			if (IO_TO_ID(x) == freeid)
 				break;
 		if (x < mp_napics)
 			continue;
 		return freeid;
 	}
 	return freeid;
 }
 
 
 static int
 io_apic_id_acceptable(int apic, int id)
 {
 	int cpu;		/* Logical CPU number */
 	int oapic;		/* Logical IO APIC number for other IO APIC */
 
 	if (id >= NAPICID)
 		return 0;	/* Out of range */
 	
 	for (cpu = 0; cpu <= mp_naps; cpu++)
 		if (CPU_TO_ID(cpu) == id)
 			return 0;	/* Conflict with CPU */
 	
 	for (oapic = 0; oapic < mp_napics && oapic < apic; oapic++)
 		if (IO_TO_ID(oapic) == id)
 			return 0;	/* Conflict with other APIC */
 	
 	return 1;		/* ID is acceptable for IO APIC */
 }
 
 
 /*
  * parse an Intel MP specification table
  */
 static void
 fix_mp_table(void)
 {
 	int	x;
 	int	id;
 	int	bus_0 = 0;	/* Stop GCC warning */
 	int	bus_pci = 0;	/* Stop GCC warning */
 	int	num_pci_bus;
 	int	apic;		/* IO APIC unit number */
 	int     freeid;		/* Free physical APIC ID */
 	int	physid;		/* Current physical IO APIC ID */
 
 	/*
 	 * Fix mis-numbering of the PCI bus and its INT entries if the BIOS
 	 * did it wrong.  The MP spec says that when more than 1 PCI bus
 	 * exists the BIOS must begin with bus entries for the PCI bus and use
 	 * actual PCI bus numbering.  This implies that when only 1 PCI bus
 	 * exists the BIOS can choose to ignore this ordering, and indeed many
 	 * MP motherboards do ignore it.  This causes a problem when the PCI
 	 * sub-system makes requests of the MP sub-system based on PCI bus
 	 * numbers.	So here we look for the situation and renumber the
 	 * busses and associated INTs in an effort to "make it right".
 	 */
 
 	/* find bus 0, PCI bus, count the number of PCI busses */
 	for (num_pci_bus = 0, x = 0; x < mp_nbusses; ++x) {
 		if (bus_data[x].bus_id == 0) {
 			bus_0 = x;
 		}
 		if (bus_data[x].bus_type == PCI) {
 			++num_pci_bus;
 			bus_pci = x;
 		}
 	}
 	/*
 	 * bus_0 == slot of bus with ID of 0
 	 * bus_pci == slot of last PCI bus encountered
 	 */
 
 	/* check the 1 PCI bus case for sanity */
 	/* if it is number 0 all is well */
 	if (num_pci_bus == 1 &&
 	    bus_data[bus_pci].bus_id != 0) {
 		
 		/* mis-numbered, swap with whichever bus uses slot 0 */
 
 		/* swap the bus entry types */
 		bus_data[bus_pci].bus_type = bus_data[bus_0].bus_type;
 		bus_data[bus_0].bus_type = PCI;
 
 		/* swap each relavant INTerrupt entry */
 		id = bus_data[bus_pci].bus_id;
 		for (x = 0; x < nintrs; ++x) {
 			if (io_apic_ints[x].src_bus_id == id) {
 				io_apic_ints[x].src_bus_id = 0;
 			}
 			else if (io_apic_ints[x].src_bus_id == 0) {
 				io_apic_ints[x].src_bus_id = id;
 			}
 		}
 	}
 
 	/* Assign IO APIC IDs.
 	 * 
 	 * First try the existing ID. If a conflict is detected, try
 	 * the ID in the MP table.  If a conflict is still detected, find
 	 * a free id.
 	 *
 	 * We cannot use the ID_TO_IO table before all conflicts has been
 	 * resolved and the table has been corrected.
 	 */
 	for (apic = 0; apic < mp_napics; ++apic) { /* For all IO APICs */
 		
 		/* First try to use the value set by the BIOS */
 		physid = io_apic_get_id(apic);
 		if (io_apic_id_acceptable(apic, physid)) {
 			if (IO_TO_ID(apic) != physid)
 				swap_apic_id(apic, IO_TO_ID(apic), physid);
 			continue;
 		}
 
 		/* Then check if the value in the MP table is acceptable */
 		if (io_apic_id_acceptable(apic, IO_TO_ID(apic)))
 			continue;
 
 		/* Last resort, find a free APIC ID and use it */
 		freeid = first_free_apic_id();
 		if (freeid >= NAPICID)
 			panic("No free physical APIC IDs found");
 		
 		if (io_apic_id_acceptable(apic, freeid)) {
 			swap_apic_id(apic, IO_TO_ID(apic), freeid);
 			continue;
 		}
 		panic("Free physical APIC ID not usable");
 	}
 	fix_id_to_io_mapping();
 
 	/* detect and fix broken Compaq MP table */
 	if (apic_int_type(0, 0) == -1) {
 		printf("APIC_IO: MP table broken: 8259->APIC entry missing!\n");
 		io_apic_ints[nintrs].int_type = 3;	/* ExtInt */
 		io_apic_ints[nintrs].int_vector = 0xff;	/* Unassigned */
 		/* XXX fixme, set src bus id etc, but it doesn't seem to hurt */
 		io_apic_ints[nintrs].dst_apic_id = IO_TO_ID(0);
 		io_apic_ints[nintrs].dst_apic_int = 0;	/* Pin 0 */
 		nintrs++;
 	}
 }
 
 
 /* Assign low level interrupt handlers */
 static void
 setup_apic_irq_mapping(void)
 {
 	int	x;
 	int	int_vector;
 
 	/* Clear array */
 	for (x = 0; x < APIC_INTMAPSIZE; x++) {
 		int_to_apicintpin[x].ioapic = -1;
 		int_to_apicintpin[x].int_pin = 0;
 		int_to_apicintpin[x].apic_address = NULL;
 		int_to_apicintpin[x].redirindex = 0;
 	}
 
 	/* First assign ISA/EISA interrupts */
 	for (x = 0; x < nintrs; x++) {
 		int_vector = io_apic_ints[x].src_bus_irq;
 		if (int_vector < APIC_INTMAPSIZE &&
 		    io_apic_ints[x].int_vector == 0xff && 
 		    int_to_apicintpin[int_vector].ioapic == -1 &&
 		    (apic_int_is_bus_type(x, ISA) ||
 		     apic_int_is_bus_type(x, EISA)) &&
 		    io_apic_ints[x].int_type == 0) {
 			assign_apic_irq(ID_TO_IO(io_apic_ints[x].dst_apic_id), 
 					io_apic_ints[x].dst_apic_int,
 					int_vector);
 		}
 	}
 
 	/* Assign ExtInt entry if no ISA/EISA interrupt 0 entry */
 	for (x = 0; x < nintrs; x++) {
 		if (io_apic_ints[x].dst_apic_int == 0 &&
 		    io_apic_ints[x].dst_apic_id == IO_TO_ID(0) &&
 		    io_apic_ints[x].int_vector == 0xff && 
 		    int_to_apicintpin[0].ioapic == -1 &&
 		    io_apic_ints[x].int_type == 3) {
 			assign_apic_irq(0, 0, 0);
 			break;
 		}
 	}
 	/* PCI interrupt assignment is deferred */
 }
 
 
 static int
 processor_entry(proc_entry_ptr entry, int cpu)
 {
 	/* check for usability */
 	if (!(entry->cpu_flags & PROCENTRY_FLAG_EN))
 		return 0;
 
 	if(entry->apic_id >= NAPICID)
 		panic("CPU APIC ID out of range (0..%d)", NAPICID - 1);
 	/* check for BSP flag */
 	if (entry->cpu_flags & PROCENTRY_FLAG_BP) {
 		boot_cpu_id = entry->apic_id;
 		CPU_TO_ID(0) = entry->apic_id;
 		ID_TO_CPU(entry->apic_id) = 0;
 		return 0;	/* its already been counted */
 	}
 
 	/* add another AP to list, if less than max number of CPUs */
 	else if (cpu < MAXCPU) {
 		CPU_TO_ID(cpu) = entry->apic_id;
 		ID_TO_CPU(entry->apic_id) = cpu;
 		return 1;
 	}
 
 	return 0;
 }
 
 
 static int
 bus_entry(bus_entry_ptr entry, int bus)
 {
 	int     x;
 	char    c, name[8];
 
 	/* encode the name into an index */
 	for (x = 0; x < 6; ++x) {
 		if ((c = entry->bus_type[x]) == ' ')
 			break;
 		name[x] = c;
 	}
 	name[x] = '\0';
 
 	if ((x = lookup_bus_type(name)) == UNKNOWN_BUSTYPE)
 		panic("unknown bus type: '%s'", name);
 
 	bus_data[bus].bus_id = entry->bus_id;
 	bus_data[bus].bus_type = x;
 
 	return 1;
 }
 
 
 static int
 io_apic_entry(io_apic_entry_ptr entry, int apic)
 {
 	if (!(entry->apic_flags & IOAPICENTRY_FLAG_EN))
 		return 0;
 
 	IO_TO_ID(apic) = entry->apic_id;
 	if (entry->apic_id < NAPICID)
 		ID_TO_IO(entry->apic_id) = apic;
 
 	return 1;
 }
 
 
 static int
 lookup_bus_type(char *name)
 {
 	int     x;
 
 	for (x = 0; x < MAX_BUSTYPE; ++x)
 		if (strcmp(bus_type_table[x].name, name) == 0)
 			return bus_type_table[x].type;
 
 	return UNKNOWN_BUSTYPE;
 }
 
 
 static int
 int_entry(int_entry_ptr entry, int intr)
 {
 	int apic;
 
 	io_apic_ints[intr].int_type = entry->int_type;
 	io_apic_ints[intr].int_flags = entry->int_flags;
 	io_apic_ints[intr].src_bus_id = entry->src_bus_id;
 	io_apic_ints[intr].src_bus_irq = entry->src_bus_irq;
 	if (entry->dst_apic_id == 255) {
 		/* This signal goes to all IO APICS.  Select an IO APIC
 		   with sufficient number of interrupt pins */
 		for (apic = 0; apic < mp_napics; apic++)
 			if (((io_apic_read(apic, IOAPIC_VER) & 
 			      IOART_VER_MAXREDIR) >> MAXREDIRSHIFT) >= 
 			    entry->dst_apic_int)
 				break;
 		if (apic < mp_napics)
 			io_apic_ints[intr].dst_apic_id = IO_TO_ID(apic);
 		else
 			io_apic_ints[intr].dst_apic_id = entry->dst_apic_id;
 	} else
 		io_apic_ints[intr].dst_apic_id = entry->dst_apic_id;
 	io_apic_ints[intr].dst_apic_int = entry->dst_apic_int;
 
 	return 1;
 }
 
 
 static int
 apic_int_is_bus_type(int intr, int bus_type)
 {
 	int     bus;
 
 	for (bus = 0; bus < mp_nbusses; ++bus)
 		if ((bus_data[bus].bus_id == io_apic_ints[intr].src_bus_id)
 		    && ((int) bus_data[bus].bus_type == bus_type))
 			return 1;
 
 	return 0;
 }
 
 
 /*
  * Given a traditional ISA INT mask, return an APIC mask.
  */
 u_int
 isa_apic_mask(u_int isa_mask)
 {
 	int isa_irq;
 	int apic_pin;
 
 #if defined(SKIP_IRQ15_REDIRECT)
 	if (isa_mask == (1 << 15)) {
 		printf("skipping ISA IRQ15 redirect\n");
 		return isa_mask;
 	}
 #endif  /* SKIP_IRQ15_REDIRECT */
 
 	isa_irq = ffs(isa_mask);		/* find its bit position */
 	if (isa_irq == 0)			/* doesn't exist */
 		return 0;
 	--isa_irq;				/* make it zero based */
 
 	apic_pin = isa_apic_irq(isa_irq);	/* look for APIC connection */
 	if (apic_pin == -1)
 		return 0;
 
 	return (1 << apic_pin);			/* convert pin# to a mask */
 }
 
 
 /*
  * Determine which APIC pin an ISA/EISA INT is attached to.
  */
 #define INTTYPE(I)	(io_apic_ints[(I)].int_type)
 #define INTPIN(I)	(io_apic_ints[(I)].dst_apic_int)
 #define INTIRQ(I)	(io_apic_ints[(I)].int_vector)
 #define INTAPIC(I)	(ID_TO_IO(io_apic_ints[(I)].dst_apic_id))
 
 #define SRCBUSIRQ(I)	(io_apic_ints[(I)].src_bus_irq)
 int
 isa_apic_irq(int isa_irq)
 {
 	int     intr;
 
 	for (intr = 0; intr < nintrs; ++intr) {		/* check each record */
 		if (INTTYPE(intr) == 0) {		/* standard INT */
 			if (SRCBUSIRQ(intr) == isa_irq) {
 				if (apic_int_is_bus_type(intr, ISA) ||
 			            apic_int_is_bus_type(intr, EISA)) {
 					if (INTIRQ(intr) == 0xff)
 						return -1; /* unassigned */
 					return INTIRQ(intr);	/* found */
 				}
 			}
 		}
 	}
 	return -1;					/* NOT found */
 }
 
 
 /*
  * Determine which APIC pin a PCI INT is attached to.
  */
 #define SRCBUSID(I)	(io_apic_ints[(I)].src_bus_id)
 #define SRCBUSDEVICE(I)	((io_apic_ints[(I)].src_bus_irq >> 2) & 0x1f)
 #define SRCBUSLINE(I)	(io_apic_ints[(I)].src_bus_irq & 0x03)
 int
 pci_apic_irq(int pciBus, int pciDevice, int pciInt)
 {
 	int     intr;
 
 	--pciInt;					/* zero based */
 
 	for (intr = 0; intr < nintrs; ++intr)		/* check each record */
 		if ((INTTYPE(intr) == 0)		/* standard INT */
 		    && (SRCBUSID(intr) == pciBus)
 		    && (SRCBUSDEVICE(intr) == pciDevice)
 		    && (SRCBUSLINE(intr) == pciInt))	/* a candidate IRQ */
 			if (apic_int_is_bus_type(intr, PCI)) {
 				if (INTIRQ(intr) == 0xff)
 					allocate_apic_irq(intr);
 				if (INTIRQ(intr) == 0xff)
 					return -1;	/* unassigned */
 				return INTIRQ(intr);	/* exact match */
 			}
 
 	return -1;					/* NOT found */
 }
 
 int
 next_apic_irq(int irq) 
 {
 	int intr, ointr;
 	int bus, bustype;
 
 	bus = 0;
 	bustype = 0;
 	for (intr = 0; intr < nintrs; intr++) {
 		if (INTIRQ(intr) != irq || INTTYPE(intr) != 0)
 			continue;
 		bus = SRCBUSID(intr);
 		bustype = apic_bus_type(bus);
 		if (bustype != ISA &&
 		    bustype != EISA &&
 		    bustype != PCI)
 			continue;
 		break;
 	}
 	if (intr >= nintrs) {
 		return -1;
 	}
 	for (ointr = intr + 1; ointr < nintrs; ointr++) {
 		if (INTTYPE(ointr) != 0)
 			continue;
 		if (bus != SRCBUSID(ointr))
 			continue;
 		if (bustype == PCI) {
 			if (SRCBUSDEVICE(intr) != SRCBUSDEVICE(ointr))
 				continue;
 			if (SRCBUSLINE(intr) != SRCBUSLINE(ointr))
 				continue;
 		}
 		if (bustype == ISA || bustype == EISA) {
 			if (SRCBUSIRQ(intr) != SRCBUSIRQ(ointr))
 				continue;
 		}
 		if (INTPIN(intr) == INTPIN(ointr))
 			continue;
 		break;
 	}
 	if (ointr >= nintrs) {
 		return -1;
 	}
 	return INTIRQ(ointr);
 }
 #undef SRCBUSLINE
 #undef SRCBUSDEVICE
 #undef SRCBUSID
 #undef SRCBUSIRQ
 
 #undef INTPIN
 #undef INTIRQ
 #undef INTAPIC
 #undef INTTYPE
 
 
 /*
  * Reprogram the MB chipset to NOT redirect an ISA INTerrupt.
  *
  * XXX FIXME:
  *  Exactly what this means is unclear at this point.  It is a solution
  *  for motherboards that redirect the MBIRQ0 pin.  Generically a motherboard
  *  could route any of the ISA INTs to upper (>15) IRQ values.  But most would
  *  NOT be redirected via MBIRQ0, thus "undirect()ing" them would NOT be an
  *  option.
  */
 int
 undirect_isa_irq(int rirq)
 {
 #if defined(READY)
 	if (bootverbose)
 	    printf("Freeing redirected ISA irq %d.\n", rirq);
 	/** FIXME: tickle the MB redirector chip */
 	return -1;
 #else
 	if (bootverbose)
 	    printf("Freeing (NOT implemented) redirected ISA irq %d.\n", rirq);
 	return 0;
 #endif  /* READY */
 }
 
 
 /*
  * Reprogram the MB chipset to NOT redirect a PCI INTerrupt
  */
 int
 undirect_pci_irq(int rirq)
 {
 #if defined(READY)
 	if (bootverbose)
 		printf("Freeing redirected PCI irq %d.\n", rirq);
 
 	/** FIXME: tickle the MB redirector chip */
 	return -1;
 #else
 	if (bootverbose)
 		printf("Freeing (NOT implemented) redirected PCI irq %d.\n",
 		       rirq);
 	return 0;
 #endif  /* READY */
 }
 
 
 /*
  * given a bus ID, return:
  *  the bus type if found
  *  -1 if NOT found
  */
 int
 apic_bus_type(int id)
 {
 	int     x;
 
 	for (x = 0; x < mp_nbusses; ++x)
 		if (bus_data[x].bus_id == id)
 			return bus_data[x].bus_type;
 
 	return -1;
 }
 
 
 /*
  * given a LOGICAL APIC# and pin#, return:
  *  the associated src bus ID if found
  *  -1 if NOT found
  */
 int
 apic_src_bus_id(int apic, int pin)
 {
 	int     x;
 
 	/* search each of the possible INTerrupt sources */
 	for (x = 0; x < nintrs; ++x)
 		if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
 		    (pin == io_apic_ints[x].dst_apic_int))
 			return (io_apic_ints[x].src_bus_id);
 
 	return -1;		/* NOT found */
 }
 
 
 /*
  * given a LOGICAL APIC# and pin#, return:
  *  the associated src bus IRQ if found
  *  -1 if NOT found
  */
 int
 apic_src_bus_irq(int apic, int pin)
 {
 	int     x;
 
 	for (x = 0; x < nintrs; x++)
 		if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
 		    (pin == io_apic_ints[x].dst_apic_int))
 			return (io_apic_ints[x].src_bus_irq);
 
 	return -1;		/* NOT found */
 }
 
 
 /*
  * given a LOGICAL APIC# and pin#, return:
  *  the associated INTerrupt type if found
  *  -1 if NOT found
  */
 int
 apic_int_type(int apic, int pin)
 {
 	int     x;
 
 	/* search each of the possible INTerrupt sources */
 	for (x = 0; x < nintrs; ++x)
 		if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
 		    (pin == io_apic_ints[x].dst_apic_int))
 			return (io_apic_ints[x].int_type);
 
 	return -1;		/* NOT found */
 }
 
 int 
 apic_irq(int apic, int pin)
 {
 	int x;
 	int res;
 
 	for (x = 0; x < nintrs; ++x)
 		if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
 		    (pin == io_apic_ints[x].dst_apic_int)) {
 			res = io_apic_ints[x].int_vector;
 			if (res == 0xff)
 				return -1;
 			if (apic != int_to_apicintpin[res].ioapic)
 				panic("apic_irq: inconsistent table");
 			if (pin != int_to_apicintpin[res].int_pin)
 				panic("apic_irq inconsistent table (2)");
 			return res;
 		}
 	return -1;
 }
 
 
 /*
  * given a LOGICAL APIC# and pin#, return:
  *  the associated trigger mode if found
  *  -1 if NOT found
  */
 int
 apic_trigger(int apic, int pin)
 {
 	int     x;
 
 	/* search each of the possible INTerrupt sources */
 	for (x = 0; x < nintrs; ++x)
 		if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
 		    (pin == io_apic_ints[x].dst_apic_int))
 			return ((io_apic_ints[x].int_flags >> 2) & 0x03);
 
 	return -1;		/* NOT found */
 }
 
 
 /*
  * given a LOGICAL APIC# and pin#, return:
  *  the associated 'active' level if found
  *  -1 if NOT found
  */
 int
 apic_polarity(int apic, int pin)
 {
 	int     x;
 
 	/* search each of the possible INTerrupt sources */
 	for (x = 0; x < nintrs; ++x)
 		if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
 		    (pin == io_apic_ints[x].dst_apic_int))
 			return (io_apic_ints[x].int_flags & 0x03);
 
 	return -1;		/* NOT found */
 }
 
 
 /*
  * set data according to MP defaults
  * FIXME: probably not complete yet...
  */
 static void
 default_mp_table(int type)
 {
 	int     ap_cpu_id;
 #if defined(APIC_IO)
 	int     io_apic_id;
 	int     pin;
 #endif	/* APIC_IO */
 
 #if 0
 	printf("  MP default config type: %d\n", type);
 	switch (type) {
 	case 1:
 		printf("   bus: ISA, APIC: 82489DX\n");
 		break;
 	case 2:
 		printf("   bus: EISA, APIC: 82489DX\n");
 		break;
 	case 3:
 		printf("   bus: EISA, APIC: 82489DX\n");
 		break;
 	case 4:
 		printf("   bus: MCA, APIC: 82489DX\n");
 		break;
 	case 5:
 		printf("   bus: ISA+PCI, APIC: Integrated\n");
 		break;
 	case 6:
 		printf("   bus: EISA+PCI, APIC: Integrated\n");
 		break;
 	case 7:
 		printf("   bus: MCA+PCI, APIC: Integrated\n");
 		break;
 	default:
 		printf("   future type\n");
 		break;
 		/* NOTREACHED */
 	}
 #endif	/* 0 */
 
 	boot_cpu_id = (lapic.id & APIC_ID_MASK) >> 24;
 	ap_cpu_id = (boot_cpu_id == 0) ? 1 : 0;
 
 	/* BSP */
 	CPU_TO_ID(0) = boot_cpu_id;
 	ID_TO_CPU(boot_cpu_id) = 0;
 
 	/* one and only AP */
 	CPU_TO_ID(1) = ap_cpu_id;
 	ID_TO_CPU(ap_cpu_id) = 1;
 
 #if defined(APIC_IO)
 	/* one and only IO APIC */
 	io_apic_id = (io_apic_read(0, IOAPIC_ID) & APIC_ID_MASK) >> 24;
 
 	/*
 	 * sanity check, refer to MP spec section 3.6.6, last paragraph
 	 * necessary as some hardware isn't properly setting up the IO APIC
 	 */
 #if defined(REALLY_ANAL_IOAPICID_VALUE)
 	if (io_apic_id != 2) {
 #else
 	if ((io_apic_id == 0) || (io_apic_id == 1) || (io_apic_id == 15)) {
 #endif	/* REALLY_ANAL_IOAPICID_VALUE */
 		io_apic_set_id(0, 2);
 		io_apic_id = 2;
 	}
 	IO_TO_ID(0) = io_apic_id;
 	ID_TO_IO(io_apic_id) = 0;
 #endif	/* APIC_IO */
 
 	/* fill out bus entries */
 	switch (type) {
 	case 1:
 	case 2:
 	case 3:
 	case 4:
 	case 5:
 	case 6:
 	case 7:
 		bus_data[0].bus_id = default_data[type - 1][1];
 		bus_data[0].bus_type = default_data[type - 1][2];
 		bus_data[1].bus_id = default_data[type - 1][3];
 		bus_data[1].bus_type = default_data[type - 1][4];
 		break;
 
 	/* case 4: case 7:		   MCA NOT supported */
 	default:		/* illegal/reserved */
 		panic("BAD default MP config: %d", type);
 		/* NOTREACHED */
 	}
 
 #if defined(APIC_IO)
 	/* general cases from MP v1.4, table 5-2 */
 	for (pin = 0; pin < 16; ++pin) {
 		io_apic_ints[pin].int_type = 0;
 		io_apic_ints[pin].int_flags = 0x05;	/* edge/active-hi */
 		io_apic_ints[pin].src_bus_id = 0;
 		io_apic_ints[pin].src_bus_irq = pin;	/* IRQ2 caught below */
 		io_apic_ints[pin].dst_apic_id = io_apic_id;
 		io_apic_ints[pin].dst_apic_int = pin;	/* 1-to-1 */
 	}
 
 	/* special cases from MP v1.4, table 5-2 */
 	if (type == 2) {
 		io_apic_ints[2].int_type = 0xff;	/* N/C */
 		io_apic_ints[13].int_type = 0xff;	/* N/C */
 #if !defined(APIC_MIXED_MODE)
 		/** FIXME: ??? */
 		panic("sorry, can't support type 2 default yet");
 #endif	/* APIC_MIXED_MODE */
 	}
 	else
 		io_apic_ints[2].src_bus_irq = 0;	/* ISA IRQ0 is on APIC INT 2 */
 
 	if (type == 7)
 		io_apic_ints[0].int_type = 0xff;	/* N/C */
 	else
 		io_apic_ints[0].int_type = 3;	/* vectored 8259 */
 #endif	/* APIC_IO */
 }
 
 
 /*
  * start each AP in our list
  */
 static int
 start_all_aps(u_int boot_addr)
 {
 	int     x, i, pg;
 	u_char  mpbiosreason;
 	u_long  mpbioswarmvec;
 	struct globaldata *gd;
 	char *stack;
 	uintptr_t kptbase;
 
 	POSTCODE(START_ALL_APS_POST);
 
 	mtx_init(&ap_boot_mtx, "ap boot", MTX_SPIN);
 
 	/* initialize BSP's local APIC */
 	apic_initialize();
 	bsp_apic_ready = 1;
 
 	/* install the AP 1st level boot code */
 	install_ap_tramp(boot_addr);
 
 
 	/* save the current value of the warm-start vector */
 	mpbioswarmvec = *((u_long *) WARMBOOT_OFF);
 #ifndef PC98
 	outb(CMOS_REG, BIOS_RESET);
 	mpbiosreason = inb(CMOS_DATA);
 #endif
 
 	/* set up temporary P==V mapping for AP boot */
 	/* XXX this is a hack, we should boot the AP on its own stack/PTD */
 	kptbase = (uintptr_t)(void *)KPTphys;
 	for (x = 0; x < NKPT; x++)
 		PTD[x] = (pd_entry_t)(PG_V | PG_RW |
 		    ((kptbase + x * PAGE_SIZE) & PG_FRAME));
 	invltlb();
 
 	/* start each AP */
 	for (x = 1; x <= mp_naps; ++x) {
 
 		/* This is a bit verbose, it will go away soon.  */
 
 		/* first page of AP's private space */
 		pg = x * i386_btop(sizeof(struct privatespace));
 
 		/* allocate a new private data page */
 		gd = (struct globaldata *)kmem_alloc(kernel_map, PAGE_SIZE);
 
 		/* wire it into the private page table page */
 		SMPpt[pg] = (pt_entry_t)(PG_V | PG_RW | vtophys(gd));
 
 		/* allocate and set up an idle stack data page */
 		stack = (char *)kmem_alloc(kernel_map, UPAGES*PAGE_SIZE);
 		for (i = 0; i < UPAGES; i++)
 			SMPpt[pg + 1 + i] = (pt_entry_t)
 			    (PG_V | PG_RW | vtophys(PAGE_SIZE * i + stack));
 
 		/* prime data page for it to use */
 		gd->gd_cpuid = x;
 		globaldata_register(gd);
 
 		/* setup a vector to our boot code */
 		*((volatile u_short *) WARMBOOT_OFF) = WARMBOOT_TARGET;
 		*((volatile u_short *) WARMBOOT_SEG) = (boot_addr >> 4);
 #ifndef PC98
 		outb(CMOS_REG, BIOS_RESET);
 		outb(CMOS_DATA, BIOS_WARM);	/* 'warm-start' */
 #endif
 
 		bootSTK = &SMP_prvspace[x].idlestack[UPAGES*PAGE_SIZE];
 		bootAP = x;
 
 		/* attempt to start the Application Processor */
 		CHECK_INIT(99);	/* setup checkpoints */
 		if (!start_ap(x, boot_addr)) {
 			printf("AP #%d (PHY# %d) failed!\n", x, CPU_TO_ID(x));
 			CHECK_PRINT("trace");	/* show checkpoints */
 			/* better panic as the AP may be running loose */
 			printf("panic y/n? [y] ");
 			if (cngetc() != 'n')
 				panic("bye-bye");
 		}
 		CHECK_PRINT("trace");		/* show checkpoints */
 
 		/* record its version info */
 		cpu_apic_versions[x] = cpu_apic_versions[0];
 
 		all_cpus |= (1 << x);		/* record AP in CPU map */
 	}
 
 	/* build our map of 'other' CPUs */
 	PCPU_SET(other_cpus, all_cpus & ~(1 << PCPU_GET(cpuid)));
 
 	/* fill in our (BSP) APIC version */
 	cpu_apic_versions[0] = lapic.version;
 
 	/* restore the warmstart vector */
 	*(u_long *) WARMBOOT_OFF = mpbioswarmvec;
 #ifndef PC98
 	outb(CMOS_REG, BIOS_RESET);
 	outb(CMOS_DATA, mpbiosreason);
 #endif
 
 	/*
 	 * Set up the idle context for the BSP.  Similar to above except
 	 * that some was done by locore, some by pmap.c and some is implicit
 	 * because the BSP is cpu#0 and the page is initially zero, and also
 	 * because we can refer to variables by name on the BSP..
 	 */
 
 	/* Allocate and setup BSP idle stack */
 	stack = (char *)kmem_alloc(kernel_map, UPAGES * PAGE_SIZE);
 	for (i = 0; i < UPAGES; i++)
 		SMPpt[1 + i] = (pt_entry_t)
 		    (PG_V | PG_RW | vtophys(PAGE_SIZE * i + stack));
 
 	for (x = 0; x < NKPT; x++)
 		PTD[x] = 0;
 	pmap_set_opt();
 
 	/* number of APs actually started */
 	return mp_ncpus - 1;
 }
 
 
 /*
  * load the 1st level AP boot code into base memory.
  */
 
 /* targets for relocation */
 extern void bigJump(void);
 extern void bootCodeSeg(void);
 extern void bootDataSeg(void);
 extern void MPentry(void);
 extern u_int MP_GDT;
 extern u_int mp_gdtbase;
 
 static void
 install_ap_tramp(u_int boot_addr)
 {
 	int     x;
 	int     size = *(int *) ((u_long) & bootMP_size);
 	u_char *src = (u_char *) ((u_long) bootMP);
 	u_char *dst = (u_char *) boot_addr + KERNBASE;
 	u_int   boot_base = (u_int) bootMP;
 	u_int8_t *dst8;
 	u_int16_t *dst16;
 	u_int32_t *dst32;
 
 	POSTCODE(INSTALL_AP_TRAMP_POST);
 
 	for (x = 0; x < size; ++x)
 		*dst++ = *src++;
 
 	/*
 	 * modify addresses in code we just moved to basemem. unfortunately we
 	 * need fairly detailed info about mpboot.s for this to work.  changes
 	 * to mpboot.s might require changes here.
 	 */
 
 	/* boot code is located in KERNEL space */
 	dst = (u_char *) boot_addr + KERNBASE;
 
 	/* modify the lgdt arg */
 	dst32 = (u_int32_t *) (dst + ((u_int) & mp_gdtbase - boot_base));
 	*dst32 = boot_addr + ((u_int) & MP_GDT - boot_base);
 
 	/* modify the ljmp target for MPentry() */
 	dst32 = (u_int32_t *) (dst + ((u_int) bigJump - boot_base) + 1);
 	*dst32 = ((u_int) MPentry - KERNBASE);
 
 	/* modify the target for boot code segment */
 	dst16 = (u_int16_t *) (dst + ((u_int) bootCodeSeg - boot_base));
 	dst8 = (u_int8_t *) (dst16 + 1);
 	*dst16 = (u_int) boot_addr & 0xffff;
 	*dst8 = ((u_int) boot_addr >> 16) & 0xff;
 
 	/* modify the target for boot data segment */
 	dst16 = (u_int16_t *) (dst + ((u_int) bootDataSeg - boot_base));
 	dst8 = (u_int8_t *) (dst16 + 1);
 	*dst16 = (u_int) boot_addr & 0xffff;
 	*dst8 = ((u_int) boot_addr >> 16) & 0xff;
 }
 
 
 /*
  * this function starts the AP (application processor) identified
  * by the APIC ID 'physicalCpu'.  It does quite a "song and dance"
  * to accomplish this.  This is necessary because of the nuances
  * of the different hardware we might encounter.  It ain't pretty,
  * but it seems to work.
  */
 static int
 start_ap(int logical_cpu, u_int boot_addr)
 {
 	int     physical_cpu;
 	int     vector;
 	int     cpus;
 	u_long  icr_lo, icr_hi;
 
 	POSTCODE(START_AP_POST);
 
 	/* get the PHYSICAL APIC ID# */
 	physical_cpu = CPU_TO_ID(logical_cpu);
 
 	/* calculate the vector */
 	vector = (boot_addr >> 12) & 0xff;
 
 	/* used as a watchpoint to signal AP startup */
 	cpus = mp_ncpus;
 
 	/*
 	 * first we do an INIT/RESET IPI this INIT IPI might be run, reseting
 	 * and running the target CPU. OR this INIT IPI might be latched (P5
 	 * bug), CPU waiting for STARTUP IPI. OR this INIT IPI might be
 	 * ignored.
 	 */
 
 	/* setup the address for the target AP */
 	icr_hi = lapic.icr_hi & ~APIC_ID_MASK;
 	icr_hi |= (physical_cpu << 24);
 	lapic.icr_hi = icr_hi;
 
 	/* do an INIT IPI: assert RESET */
 	icr_lo = lapic.icr_lo & 0xfff00000;
 	lapic.icr_lo = icr_lo | 0x0000c500;
 
 	/* wait for pending status end */
 	while (lapic.icr_lo & APIC_DELSTAT_MASK)
 		 /* spin */ ;
 
 	/* do an INIT IPI: deassert RESET */
 	lapic.icr_lo = icr_lo | 0x00008500;
 
 	/* wait for pending status end */
 	u_sleep(10000);		/* wait ~10mS */
 	while (lapic.icr_lo & APIC_DELSTAT_MASK)
 		 /* spin */ ;
 
 	/*
 	 * next we do a STARTUP IPI: the previous INIT IPI might still be
 	 * latched, (P5 bug) this 1st STARTUP would then terminate
 	 * immediately, and the previously started INIT IPI would continue. OR
 	 * the previous INIT IPI has already run. and this STARTUP IPI will
 	 * run. OR the previous INIT IPI was ignored. and this STARTUP IPI
 	 * will run.
 	 */
 
 	/* do a STARTUP IPI */
 	lapic.icr_lo = icr_lo | 0x00000600 | vector;
 	while (lapic.icr_lo & APIC_DELSTAT_MASK)
 		 /* spin */ ;
 	u_sleep(200);		/* wait ~200uS */
 
 	/*
 	 * finally we do a 2nd STARTUP IPI: this 2nd STARTUP IPI should run IF
 	 * the previous STARTUP IPI was cancelled by a latched INIT IPI. OR
 	 * this STARTUP IPI will be ignored, as only ONE STARTUP IPI is
 	 * recognized after hardware RESET or INIT IPI.
 	 */
 
 	lapic.icr_lo = icr_lo | 0x00000600 | vector;
 	while (lapic.icr_lo & APIC_DELSTAT_MASK)
 		 /* spin */ ;
 	u_sleep(200);		/* wait ~200uS */
 
 	/* wait for it to start */
 	set_apic_timer(5000000);/* == 5 seconds */
 	while (read_apic_timer())
 		if (mp_ncpus > cpus)
 			return 1;	/* return SUCCESS */
 
 	return 0;		/* return FAILURE */
 }
 
 /*
  * Flush the TLB on all other CPU's
  *
  * XXX: Needs to handshake and wait for completion before proceding.
  */
 void
 smp_invltlb(void)
 {
 #if defined(APIC_IO)
 	if (smp_started && invltlb_ok)
 		ipi_all_but_self(IPI_INVLTLB);
 #endif  /* APIC_IO */
 }
 
 void
 invlpg(u_int addr)
 {
 	__asm   __volatile("invlpg (%0)"::"r"(addr):"memory");
 
 	/* send a message to the other CPUs */
 	smp_invltlb();
 }
 
 void
 invltlb(void)
 {
 	u_long  temp;
 
 	/*
 	 * This should be implemented as load_cr3(rcr3()) when load_cr3() is
 	 * inlined.
 	 */
 	__asm __volatile("movl %%cr3, %0; movl %0, %%cr3":"=r"(temp) :: "memory");
 
 	/* send a message to the other CPUs */
 	smp_invltlb();
 }
 
 
 /*
  * This is called once the rest of the system is up and running and we're
  * ready to let the AP's out of the pen.
  */
 extern void	enable_sse(void);
 
 void
 ap_init(void)
 {
 	u_int	apic_id;
 
 	/* spin until all the AP's are ready */
 	while (!aps_ready)
 		/* spin */ ;
 
 	/*
 	 * Set curproc to our per-cpu idleproc so that mutexes have
 	 * something unique to lock with.
 	 */
 	PCPU_SET(curproc, PCPU_GET(idleproc));
 	PCPU_SET(spinlocks, NULL);
 
 	/* lock against other AP's that are waking up */
 	mtx_lock_spin(&ap_boot_mtx);
 
 	/* BSP may have changed PTD while we're waiting for the lock */
 	cpu_invltlb();
 
 	smp_cpus++;
 
 #if defined(I586_CPU) && !defined(NO_F00F_HACK)
 	lidt(&r_idt);
 #endif
 
 	/* Build our map of 'other' CPUs. */
 	PCPU_SET(other_cpus, all_cpus & ~(1 << PCPU_GET(cpuid)));
 
 	printf("SMP: AP CPU #%d Launched!\n", PCPU_GET(cpuid));
 
 	/* set up CPU registers and state */
 	cpu_setregs();
 
 	/* set up FPU state on the AP */
 	npxinit(__INITIAL_NPXCW__);
 
 	/* set up SSE registers */
 	enable_sse();
 
 	/* A quick check from sanity claus */
 	apic_id = (apic_id_to_logical[(lapic.id & 0x0f000000) >> 24]);
 	if (PCPU_GET(cpuid) != apic_id) {
 		printf("SMP: cpuid = %d\n", PCPU_GET(cpuid));
 		printf("SMP: apic_id = %d\n", apic_id);
 		printf("PTD[MPPTDI] = %p\n", (void *)PTD[MPPTDI]);
 		panic("cpuid mismatch! boom!!");
 	}
 
 	/* Init local apic for irq's */
 	apic_initialize();
 
 	/* Set memory range attributes for this CPU to match the BSP */
 	mem_range_AP_init();
 
 	/*
 	 * Activate smp_invltlb, although strictly speaking, this isn't
 	 * quite correct yet.  We should have a bitfield for cpus willing
 	 * to accept TLB flush IPI's or something and sync them.
 	 */
 	if (smp_cpus == mp_ncpus) {
 		invltlb_ok = 1;
 		smp_started = 1; /* enable IPI's, tlb shootdown, freezes etc */
 		smp_active = 1;	 /* historic */
 	}
 
 	/* let other AP's wake up now */
 	mtx_unlock_spin(&ap_boot_mtx);
 
 	/* wait until all the AP's are up */
 	while (smp_started == 0)
 		; /* nothing */
 
 	microuptime(PCPU_PTR(switchtime));
 	PCPU_SET(switchticks, ticks);
 
 	/* ok, now grab sched_lock and enter the scheduler */
 	enable_intr();
 	mtx_lock_spin(&sched_lock);
 	cpu_throw();	/* doesn't return */
 
 	panic("scheduler returned us to ap_init");
 }
 
 /*
  * For statclock, we send an IPI to all CPU's to have them call this
  * function.
  */
 void
 forwarded_statclock(struct trapframe frame)
 {
 
 	mtx_lock_spin(&sched_lock);
 	statclock_process(curproc, TRAPF_PC(&frame), TRAPF_USERMODE(&frame));
 	mtx_unlock_spin(&sched_lock);
 }
 
 void
 forward_statclock(void)
 {
 	int map;
 
 	CTR0(KTR_SMP, "forward_statclock");
 
 	if (!smp_started || !invltlb_ok || cold || panicstr)
 		return;
 
 	map = PCPU_GET(other_cpus) & ~stopped_cpus ;
 	if (map != 0)
 		ipi_selected(map, IPI_STATCLOCK);
 }
 
 /*
  * For each hardclock(), we send an IPI to all other CPU's to have them
  * execute this function.  It would be nice to reduce contention on
  * sched_lock if we could simply peek at the CPU to determine the user/kernel
  * state and call hardclock_process() on the CPU receiving the clock interrupt
  * and then just use a simple IPI to handle any ast's if needed.
  */
 void
 forwarded_hardclock(struct trapframe frame)
 {
 
 	mtx_lock_spin(&sched_lock);
 	hardclock_process(curproc, TRAPF_USERMODE(&frame));
 	mtx_unlock_spin(&sched_lock);
 }
 
 void 
 forward_hardclock(void)
 {
 	u_int map;
 
 	CTR0(KTR_SMP, "forward_hardclock");
 
 	if (!smp_started || !invltlb_ok || cold || panicstr)
 		return;
 
 	map = PCPU_GET(other_cpus) & ~stopped_cpus ;
 	if (map != 0)
 		ipi_selected(map, IPI_HARDCLOCK);
 }
 
 #ifdef APIC_INTR_REORDER
 /*
  *	Maintain mapping from softintr vector to isr bit in local apic.
  */
 void
 set_lapic_isrloc(int intr, int vector)
 {
 	if (intr < 0 || intr > 32)
 		panic("set_apic_isrloc: bad intr argument: %d",intr);
 	if (vector < ICU_OFFSET || vector > 255)
 		panic("set_apic_isrloc: bad vector argument: %d",vector);
 	apic_isrbit_location[intr].location = &lapic.isr0 + ((vector>>5)<<2);
 	apic_isrbit_location[intr].bit = (1<<(vector & 31));
 }
 #endif
 
 /*
  * send an IPI to a set of cpus.
  */
 void
 ipi_selected(u_int32_t cpus, u_int ipi)
 {
 
 	CTR2(KTR_SMP, __func__ ": cpus: %x ipi: %x", cpus, ipi);
 	selected_apic_ipi(cpus, ipi, APIC_DELMODE_FIXED);
 }
 
 /*
  * send an IPI INTerrupt containing 'vector' to all CPUs, including myself
  */
 void
 ipi_all(u_int ipi)
 {
 
 	CTR1(KTR_SMP, __func__ ": ipi: %x", ipi);
 	apic_ipi(APIC_DEST_ALLISELF, ipi, APIC_DELMODE_FIXED); 
 }
 
 /*
  * send an IPI to all CPUs EXCEPT myself
  */
 void
 ipi_all_but_self(u_int ipi)
 {
 
 	CTR1(KTR_SMP, __func__ ": ipi: %x", ipi);
 	apic_ipi(APIC_DEST_ALLESELF, ipi, APIC_DELMODE_FIXED); 
 }
 
 /*
  * send an IPI to myself
  */
 void
 ipi_self(u_int ipi)
 {
 
 	CTR1(KTR_SMP, __func__ ": ipi: %x", ipi);
 	apic_ipi(APIC_DEST_SELF, ipi, APIC_DELMODE_FIXED); 
 }
 
 void
 release_aps(void *dummy __unused)
 {
 	atomic_store_rel_int(&aps_ready, 1);
 }
 
 SYSINIT(start_aps, SI_SUB_SMP, SI_ORDER_FIRST, release_aps, NULL);
Index: head/sys/i386/i386/mptable.c
===================================================================
--- head/sys/i386/i386/mptable.c	(revision 82308)
+++ head/sys/i386/i386/mptable.c	(revision 82309)
@@ -1,2440 +1,2442 @@
 /*
  * Copyright (c) 1996, by Steve Passe
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. The name of the developer may NOT be used to endorse or promote products
  *    derived from this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #include "opt_cpu.h"
+#include "opt_upages.h"
 
 #ifdef SMP
 #include <machine/smptests.h>
 #else
 #error
 #endif
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/bus.h>
 #include <sys/cons.h>	/* cngetc() */
 #include <sys/dkstat.h>
 #ifdef GPROF 
 #include <sys/gmon.h>
 #endif
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/memrange.h>
 #include <sys/mutex.h>
 #include <sys/pcpu.h>
 #include <sys/proc.h>
 #include <sys/smp.h>
 #include <sys/sysctl.h>
 #include <sys/user.h>
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/pmap.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_map.h>
 
 #include <machine/apic.h>
 #include <machine/atomic.h>
 #include <machine/cpu.h>
 #include <machine/cpufunc.h>
 #include <machine/mpapic.h>
 #include <machine/psl.h>
 #include <machine/segments.h>
 #include <machine/smptests.h>	/** TEST_DEFAULT_CONFIG, TEST_TEST1 */
 #include <machine/tss.h>
 #include <machine/specialreg.h>
 #include <machine/globaldata.h>
+#include <machine/privatespace.h>
 
 #if defined(APIC_IO)
 #include <machine/md_var.h>		/* setidt() */
 #include <i386/isa/icu.h>		/* IPIs */
 #include <i386/isa/intr_machdep.h>	/* IPIs */
 #endif	/* APIC_IO */
 
 #if defined(TEST_DEFAULT_CONFIG)
 #define MPFPS_MPFB1	TEST_DEFAULT_CONFIG
 #else
 #define MPFPS_MPFB1	mpfps->mpfb1
 #endif  /* TEST_DEFAULT_CONFIG */
 
 #define WARMBOOT_TARGET		0
 #define WARMBOOT_OFF		(KERNBASE + 0x0467)
 #define WARMBOOT_SEG		(KERNBASE + 0x0469)
 
 #ifdef PC98
 #define BIOS_BASE		(0xe8000)
 #define BIOS_SIZE		(0x18000)
 #else
 #define BIOS_BASE		(0xf0000)
 #define BIOS_SIZE		(0x10000)
 #endif
 #define BIOS_COUNT		(BIOS_SIZE/4)
 
 #define CMOS_REG		(0x70)
 #define CMOS_DATA		(0x71)
 #define BIOS_RESET		(0x0f)
 #define BIOS_WARM		(0x0a)
 
 #define PROCENTRY_FLAG_EN	0x01
 #define PROCENTRY_FLAG_BP	0x02
 #define IOAPICENTRY_FLAG_EN	0x01
 
 
 /* MP Floating Pointer Structure */
 typedef struct MPFPS {
 	char    signature[4];
 	void   *pap;
 	u_char  length;
 	u_char  spec_rev;
 	u_char  checksum;
 	u_char  mpfb1;
 	u_char  mpfb2;
 	u_char  mpfb3;
 	u_char  mpfb4;
 	u_char  mpfb5;
 }      *mpfps_t;
 
 /* MP Configuration Table Header */
 typedef struct MPCTH {
 	char    signature[4];
 	u_short base_table_length;
 	u_char  spec_rev;
 	u_char  checksum;
 	u_char  oem_id[8];
 	u_char  product_id[12];
 	void   *oem_table_pointer;
 	u_short oem_table_size;
 	u_short entry_count;
 	void   *apic_address;
 	u_short extended_table_length;
 	u_char  extended_table_checksum;
 	u_char  reserved;
 }      *mpcth_t;
 
 
 typedef struct PROCENTRY {
 	u_char  type;
 	u_char  apic_id;
 	u_char  apic_version;
 	u_char  cpu_flags;
 	u_long  cpu_signature;
 	u_long  feature_flags;
 	u_long  reserved1;
 	u_long  reserved2;
 }      *proc_entry_ptr;
 
 typedef struct BUSENTRY {
 	u_char  type;
 	u_char  bus_id;
 	char    bus_type[6];
 }      *bus_entry_ptr;
 
 typedef struct IOAPICENTRY {
 	u_char  type;
 	u_char  apic_id;
 	u_char  apic_version;
 	u_char  apic_flags;
 	void   *apic_address;
 }      *io_apic_entry_ptr;
 
 typedef struct INTENTRY {
 	u_char  type;
 	u_char  int_type;
 	u_short int_flags;
 	u_char  src_bus_id;
 	u_char  src_bus_irq;
 	u_char  dst_apic_id;
 	u_char  dst_apic_int;
 }      *int_entry_ptr;
 
 /* descriptions of MP basetable entries */
 typedef struct BASETABLE_ENTRY {
 	u_char  type;
 	u_char  length;
 	char    name[16];
 }       basetable_entry;
 
 /*
  * this code MUST be enabled here and in mpboot.s.
  * it follows the very early stages of AP boot by placing values in CMOS ram.
  * it NORMALLY will never be needed and thus the primitive method for enabling.
  *
 #define CHECK_POINTS
  */
 
 #if defined(CHECK_POINTS) && !defined(PC98)
 #define CHECK_READ(A)	 (outb(CMOS_REG, (A)), inb(CMOS_DATA))
 #define CHECK_WRITE(A,D) (outb(CMOS_REG, (A)), outb(CMOS_DATA, (D)))
 
 #define CHECK_INIT(D);				\
 	CHECK_WRITE(0x34, (D));			\
 	CHECK_WRITE(0x35, (D));			\
 	CHECK_WRITE(0x36, (D));			\
 	CHECK_WRITE(0x37, (D));			\
 	CHECK_WRITE(0x38, (D));			\
 	CHECK_WRITE(0x39, (D));
 
 #define CHECK_PRINT(S);				\
 	printf("%s: %d, %d, %d, %d, %d, %d\n",	\
 	   (S),					\
 	   CHECK_READ(0x34),			\
 	   CHECK_READ(0x35),			\
 	   CHECK_READ(0x36),			\
 	   CHECK_READ(0x37),			\
 	   CHECK_READ(0x38),			\
 	   CHECK_READ(0x39));
 
 #else				/* CHECK_POINTS */
 
 #define CHECK_INIT(D)
 #define CHECK_PRINT(S)
 
 #endif				/* CHECK_POINTS */
 
 /*
  * Values to send to the POST hardware.
  */
 #define MP_BOOTADDRESS_POST	0x10
 #define MP_PROBE_POST		0x11
 #define MPTABLE_PASS1_POST	0x12
 
 #define MP_START_POST		0x13
 #define MP_ENABLE_POST		0x14
 #define MPTABLE_PASS2_POST	0x15
 
 #define START_ALL_APS_POST	0x16
 #define INSTALL_AP_TRAMP_POST	0x17
 #define START_AP_POST		0x18
 
 #define MP_ANNOUNCE_POST	0x19
 
 /* used to hold the AP's until we are ready to release them */
 static struct mtx ap_boot_mtx;
 
 /** XXX FIXME: where does this really belong, isa.h/isa.c perhaps? */
 int	current_postcode;
 
 /** XXX FIXME: what system files declare these??? */
 extern struct region_descriptor r_gdt, r_idt;
 
 int	bsp_apic_ready = 0;	/* flags useability of BSP apic */
 int	mp_naps;		/* # of Applications processors */
 int	mp_nbusses;		/* # of busses */
 int	mp_napics;		/* # of IO APICs */
 int	boot_cpu_id;		/* designated BSP */
 vm_offset_t cpu_apic_address;
 vm_offset_t io_apic_address[NAPICID];	/* NAPICID is more than enough */
 extern	int nkpt;
 
 u_int32_t cpu_apic_versions[MAXCPU];
 u_int32_t *io_apic_versions;
 
 #ifdef APIC_INTR_REORDER
 struct {
 	volatile int *location;
 	int bit;
 } apic_isrbit_location[32];
 #endif
 
 struct apic_intmapinfo	int_to_apicintpin[APIC_INTMAPSIZE];
 
 /*
  * APIC ID logical/physical mapping structures.
  * We oversize these to simplify boot-time config.
  */
 int     cpu_num_to_apic_id[NAPICID];
 int     io_num_to_apic_id[NAPICID];
 int     apic_id_to_logical[NAPICID];
 
 
 /* AP uses this during bootstrap.  Do not staticize.  */
 char *bootSTK;
 static int bootAP;
 
 /* Hotwire a 0->4MB V==P mapping */
 extern pt_entry_t *KPTphys;
 
 /* SMP page table page */
 extern pt_entry_t *SMPpt;
 
 struct pcb stoppcbs[MAXCPU];
 
 int invltlb_ok = 0;	/* throttle smp_invltlb() till safe */
 SYSCTL_INT(_machdep, OID_AUTO, invltlb_ok, CTLFLAG_RW, &invltlb_ok, 0, "");
 
 /*
  * Local data and functions.
  */
 
 /* Set to 1 once we're ready to let the APs out of the pen. */
 static volatile int aps_ready = 0;
 
 static int	mp_capable;
 static u_int	boot_address;
 static u_int	base_memory;
 
 static int	picmode;		/* 0: virtual wire mode, 1: PIC mode */
 static mpfps_t	mpfps;
 static int	search_for_sig(u_int32_t target, int count);
 static void	mp_enable(u_int boot_addr);
 
 static void	mptable_pass1(void);
 static int	mptable_pass2(void);
 static void	default_mp_table(int type);
 static void	fix_mp_table(void);
 static void	setup_apic_irq_mapping(void);
 static void	init_locks(void);
 static int	start_all_aps(u_int boot_addr);
 static void	install_ap_tramp(u_int boot_addr);
 static int	start_ap(int logicalCpu, u_int boot_addr);
 void		ap_init(void);
 static int	apic_int_is_bus_type(int intr, int bus_type);
 static void	release_aps(void *dummy);
 
 /*
  * initialize all the SMP locks
  */
 
 /* critical region around IO APIC, apic_imen */
 struct mtx		imen_mtx;
 
 /* lock region used by kernel profiling */
 int	mcount_lock;
 
 #ifdef USE_COMLOCK
 /* locks com (tty) data/hardware accesses: a FASTINTR() */
 struct mtx		com_mtx;
 #endif /* USE_COMLOCK */
 
 static void
 init_locks(void)
 {
 
 #ifdef USE_COMLOCK
 	mtx_init(&com_mtx, "com", MTX_SPIN);
 #endif /* USE_COMLOCK */
 }
 
 /*
  * Calculate usable address in base memory for AP trampoline code.
  */
 u_int
 mp_bootaddress(u_int basemem)
 {
 	POSTCODE(MP_BOOTADDRESS_POST);
 
 	base_memory = basemem * 1024;	/* convert to bytes */
 
 	boot_address = base_memory & ~0xfff;	/* round down to 4k boundary */
 	if ((base_memory - boot_address) < bootMP_size)
 		boot_address -= 4096;	/* not enough, lower by 4k */
 
 	return boot_address;
 }
 
 
 /*
  * Look for an Intel MP spec table (ie, SMP capable hardware).
  */
 void
 i386_mp_probe(void)
 {
 	int     x;
 	u_long  segment;
 	u_int32_t target;
 
 	POSTCODE(MP_PROBE_POST);
 
 	/* see if EBDA exists */
 	if ((segment = (u_long) * (u_short *) (KERNBASE + 0x40e)) != 0) {
 		/* search first 1K of EBDA */
 		target = (u_int32_t) (segment << 4);
 		if ((x = search_for_sig(target, 1024 / 4)) >= 0)
 			goto found;
 	} else {
 		/* last 1K of base memory, effective 'top of base' passed in */
 		target = (u_int32_t) (base_memory - 0x400);
 		if ((x = search_for_sig(target, 1024 / 4)) >= 0)
 			goto found;
 	}
 
 	/* search the BIOS */
 	target = (u_int32_t) BIOS_BASE;
 	if ((x = search_for_sig(target, BIOS_COUNT)) >= 0)
 		goto found;
 
 	/* nothing found */
 	mpfps = (mpfps_t)0;
 	mp_capable = 0;
 	return;
 
 found:
 	/* calculate needed resources */
 	mpfps = (mpfps_t)x;
 	mptable_pass1();
 
 	/* flag fact that we are running multiple processors */
 	mp_capable = 1;
 }
 
 int
 cpu_mp_probe(void)
 {
 	/*
 	 * Record BSP in CPU map
 	 * This is done here so that MBUF init code works correctly.
 	 */
 	all_cpus = 1;
 
 	return (mp_capable);
 }
 
 /*
  * Initialize the SMP hardware and the APIC and start up the AP's.
  */
 void
 cpu_mp_start(void)
 {
 	POSTCODE(MP_START_POST);
 
 	/* look for MP capable motherboard */
 	if (mp_capable)
 		mp_enable(boot_address);
 	else
 		panic("MP hardware not found!");
 
 	cpu_setregs();
 }
 
 
 /*
  * Print various information about the SMP system hardware and setup.
  */
 void
 cpu_mp_announce(void)
 {
 	int     x;
 
 	POSTCODE(MP_ANNOUNCE_POST);
 
 	printf(" cpu0 (BSP): apic id: %2d", CPU_TO_ID(0));
 	printf(", version: 0x%08x", cpu_apic_versions[0]);
 	printf(", at 0x%08x\n", cpu_apic_address);
 	for (x = 1; x <= mp_naps; ++x) {
 		printf(" cpu%d (AP):  apic id: %2d", x, CPU_TO_ID(x));
 		printf(", version: 0x%08x", cpu_apic_versions[x]);
 		printf(", at 0x%08x\n", cpu_apic_address);
 	}
 
 #if defined(APIC_IO)
 	for (x = 0; x < mp_napics; ++x) {
 		printf(" io%d (APIC): apic id: %2d", x, IO_TO_ID(x));
 		printf(", version: 0x%08x", io_apic_versions[x]);
 		printf(", at 0x%08x\n", io_apic_address[x]);
 	}
 #else
 	printf(" Warning: APIC I/O disabled\n");
 #endif	/* APIC_IO */
 }
 
 /*
  * AP cpu's call this to sync up protected mode.
  */
 void
 init_secondary(void)
 {
 	int	gsel_tss;
 	int	x, myid = bootAP;
 
 	gdt_segs[GPRIV_SEL].ssd_base = (int) &SMP_prvspace[myid];
 	gdt_segs[GPROC0_SEL].ssd_base =
 		(int) &SMP_prvspace[myid].globaldata.gd_common_tss;
 	SMP_prvspace[myid].globaldata.gd_prvspace =
 		&SMP_prvspace[myid].globaldata;
 
 	for (x = 0; x < NGDT; x++) {
 		ssdtosd(&gdt_segs[x], &gdt[myid * NGDT + x].sd);
 	}
 
 	r_gdt.rd_limit = NGDT * sizeof(gdt[0]) - 1;
 	r_gdt.rd_base = (int) &gdt[myid * NGDT];
 	lgdt(&r_gdt);			/* does magic intra-segment return */
 
 	lidt(&r_idt);
 
 	lldt(_default_ldt);
 	PCPU_SET(currentldt, _default_ldt);
 
 	gsel_tss = GSEL(GPROC0_SEL, SEL_KPL);
 	gdt[myid * NGDT + GPROC0_SEL].sd.sd_type = SDT_SYS386TSS;
 	PCPU_SET(common_tss.tss_esp0, 0); /* not used until after switch */
 	PCPU_SET(common_tss.tss_ss0, GSEL(GDATA_SEL, SEL_KPL));
 	PCPU_SET(common_tss.tss_ioopt, (sizeof (struct i386tss)) << 16);
 	PCPU_SET(tss_gdt, &gdt[myid * NGDT + GPROC0_SEL].sd);
 	PCPU_SET(common_tssd, *PCPU_GET(tss_gdt));
 	ltr(gsel_tss);
 
 	pmap_set_opt();
 }
 
 
 #if defined(APIC_IO)
 /*
  * Final configuration of the BSP's local APIC:
  *  - disable 'pic mode'.
  *  - disable 'virtual wire mode'.
  *  - enable NMI.
  */
 void
 bsp_apic_configure(void)
 {
 	u_char		byte;
 	u_int32_t	temp;
 
 	/* leave 'pic mode' if necessary */
 	if (picmode) {
 		outb(0x22, 0x70);	/* select IMCR */
 		byte = inb(0x23);	/* current contents */
 		byte |= 0x01;		/* mask external INTR */
 		outb(0x23, byte);	/* disconnect 8259s/NMI */
 	}
 
 	/* mask lint0 (the 8259 'virtual wire' connection) */
 	temp = lapic.lvt_lint0;
 	temp |= APIC_LVT_M;		/* set the mask */
 	lapic.lvt_lint0 = temp;
 
         /* setup lint1 to handle NMI */
         temp = lapic.lvt_lint1;
         temp &= ~APIC_LVT_M;		/* clear the mask */
         lapic.lvt_lint1 = temp;
 
 	if (bootverbose)
 		apic_dump("bsp_apic_configure()");
 }
 #endif  /* APIC_IO */
 
 
 /*******************************************************************
  * local functions and data
  */
 
 /*
  * start the SMP system
  */
 static void
 mp_enable(u_int boot_addr)
 {
 	int     x;
 #if defined(APIC_IO)
 	int     apic;
 	u_int   ux;
 #endif	/* APIC_IO */
 
 	POSTCODE(MP_ENABLE_POST);
 
 	/* turn on 4MB of V == P addressing so we can get to MP table */
 	*(int *)PTD = PG_V | PG_RW | ((uintptr_t)(void *)KPTphys & PG_FRAME);
 	invltlb();
 
 	/* examine the MP table for needed info, uses physical addresses */
 	x = mptable_pass2();
 
 	*(int *)PTD = 0;
 	invltlb();
 
 	/* can't process default configs till the CPU APIC is pmapped */
 	if (x)
 		default_mp_table(x);
 
 	/* post scan cleanup */
 	fix_mp_table();
 	setup_apic_irq_mapping();
 
 #if defined(APIC_IO)
 
 	/* fill the LOGICAL io_apic_versions table */
 	for (apic = 0; apic < mp_napics; ++apic) {
 		ux = io_apic_read(apic, IOAPIC_VER);
 		io_apic_versions[apic] = ux;
 		io_apic_set_id(apic, IO_TO_ID(apic));
 	}
 
 	/* program each IO APIC in the system */
 	for (apic = 0; apic < mp_napics; ++apic)
 		if (io_apic_setup(apic) < 0)
 			panic("IO APIC setup failure");
 
 	/* install a 'Spurious INTerrupt' vector */
 	setidt(XSPURIOUSINT_OFFSET, Xspuriousint,
 	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
 
 	/* install an inter-CPU IPI for TLB invalidation */
 	setidt(XINVLTLB_OFFSET, Xinvltlb,
 	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
 
 	/* install an inter-CPU IPI for forwarding hardclock() */
 	setidt(XHARDCLOCK_OFFSET, Xhardclock,
 	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
 	
 	/* install an inter-CPU IPI for forwarding statclock() */
 	setidt(XSTATCLOCK_OFFSET, Xstatclock,
 	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
 	
 	/* install an inter-CPU IPI for all-CPU rendezvous */
 	setidt(XRENDEZVOUS_OFFSET, Xrendezvous,
 	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
 
 	/* install an inter-CPU IPI for forcing an additional software trap */
 	setidt(XCPUAST_OFFSET, Xcpuast,
 	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
 
 	/* install an inter-CPU IPI for CPU stop/restart */
 	setidt(XCPUSTOP_OFFSET, Xcpustop,
 	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
 
 #if defined(TEST_TEST1)
 	/* install a "fake hardware INTerrupt" vector */
 	setidt(XTEST1_OFFSET, Xtest1,
 	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
 #endif  /** TEST_TEST1 */
 
 #endif	/* APIC_IO */
 
 	/* initialize all SMP locks */
 	init_locks();
 
 	/* start each Application Processor */
 	start_all_aps(boot_addr);
 }
 
 
 /*
  * look for the MP spec signature
  */
 
 /* string defined by the Intel MP Spec as identifying the MP table */
 #define MP_SIG		0x5f504d5f	/* _MP_ */
 #define NEXT(X)		((X) += 4)
 static int
 search_for_sig(u_int32_t target, int count)
 {
 	int     x;
 	u_int32_t *addr = (u_int32_t *) (KERNBASE + target);
 
 	for (x = 0; x < count; NEXT(x))
 		if (addr[x] == MP_SIG)
 			/* make array index a byte index */
 			return (target + (x * sizeof(u_int32_t)));
 
 	return -1;
 }
 
 
 static basetable_entry basetable_entry_types[] =
 {
 	{0, 20, "Processor"},
 	{1, 8, "Bus"},
 	{2, 8, "I/O APIC"},
 	{3, 8, "I/O INT"},
 	{4, 8, "Local INT"}
 };
 
 typedef struct BUSDATA {
 	u_char  bus_id;
 	enum busTypes bus_type;
 }       bus_datum;
 
 typedef struct INTDATA {
 	u_char  int_type;
 	u_short int_flags;
 	u_char  src_bus_id;
 	u_char  src_bus_irq;
 	u_char  dst_apic_id;
 	u_char  dst_apic_int;
 	u_char	int_vector;
 }       io_int, local_int;
 
 typedef struct BUSTYPENAME {
 	u_char  type;
 	char    name[7];
 }       bus_type_name;
 
 static bus_type_name bus_type_table[] =
 {
 	{CBUS, "CBUS"},
 	{CBUSII, "CBUSII"},
 	{EISA, "EISA"},
 	{MCA, "MCA"},
 	{UNKNOWN_BUSTYPE, "---"},
 	{ISA, "ISA"},
 	{MCA, "MCA"},
 	{UNKNOWN_BUSTYPE, "---"},
 	{UNKNOWN_BUSTYPE, "---"},
 	{UNKNOWN_BUSTYPE, "---"},
 	{UNKNOWN_BUSTYPE, "---"},
 	{UNKNOWN_BUSTYPE, "---"},
 	{PCI, "PCI"},
 	{UNKNOWN_BUSTYPE, "---"},
 	{UNKNOWN_BUSTYPE, "---"},
 	{UNKNOWN_BUSTYPE, "---"},
 	{UNKNOWN_BUSTYPE, "---"},
 	{XPRESS, "XPRESS"},
 	{UNKNOWN_BUSTYPE, "---"}
 };
 /* from MP spec v1.4, table 5-1 */
 static int default_data[7][5] =
 {
 /*   nbus, id0, type0, id1, type1 */
 	{1, 0, ISA, 255, 255},
 	{1, 0, EISA, 255, 255},
 	{1, 0, EISA, 255, 255},
 	{1, 0, MCA, 255, 255},
 	{2, 0, ISA, 1, PCI},
 	{2, 0, EISA, 1, PCI},
 	{2, 0, MCA, 1, PCI}
 };
 
 
 /* the bus data */
 static bus_datum *bus_data;
 
 /* the IO INT data, one entry per possible APIC INTerrupt */
 static io_int  *io_apic_ints;
 
 static int nintrs;
 
 static int processor_entry	__P((proc_entry_ptr entry, int cpu));
 static int bus_entry		__P((bus_entry_ptr entry, int bus));
 static int io_apic_entry	__P((io_apic_entry_ptr entry, int apic));
 static int int_entry		__P((int_entry_ptr entry, int intr));
 static int lookup_bus_type	__P((char *name));
 
 
 /*
  * 1st pass on motherboard's Intel MP specification table.
  *
  * initializes:
  *	mp_ncpus = 1
  *
  * determines:
  *	cpu_apic_address (common to all CPUs)
  *	io_apic_address[N]
  *	mp_naps
  *	mp_nbusses
  *	mp_napics
  *	nintrs
  */
 static void
 mptable_pass1(void)
 {
 	int	x;
 	mpcth_t	cth;
 	int	totalSize;
 	void*	position;
 	int	count;
 	int	type;
 
 	POSTCODE(MPTABLE_PASS1_POST);
 
 	/* clear various tables */
 	for (x = 0; x < NAPICID; ++x) {
 		io_apic_address[x] = ~0;	/* IO APIC address table */
 	}
 
 	/* init everything to empty */
 	mp_naps = 0;
 	mp_nbusses = 0;
 	mp_napics = 0;
 	nintrs = 0;
 
 	/* check for use of 'default' configuration */
 	if (MPFPS_MPFB1 != 0) {
 		/* use default addresses */
 		cpu_apic_address = DEFAULT_APIC_BASE;
 		io_apic_address[0] = DEFAULT_IO_APIC_BASE;
 
 		/* fill in with defaults */
 		mp_naps = 2;		/* includes BSP */
 		mp_nbusses = default_data[MPFPS_MPFB1 - 1][0];
 #if defined(APIC_IO)
 		mp_napics = 1;
 		nintrs = 16;
 #endif	/* APIC_IO */
 	}
 	else {
 		if ((cth = mpfps->pap) == 0)
 			panic("MP Configuration Table Header MISSING!");
 
 		cpu_apic_address = (vm_offset_t) cth->apic_address;
 
 		/* walk the table, recording info of interest */
 		totalSize = cth->base_table_length - sizeof(struct MPCTH);
 		position = (u_char *) cth + sizeof(struct MPCTH);
 		count = cth->entry_count;
 
 		while (count--) {
 			switch (type = *(u_char *) position) {
 			case 0: /* processor_entry */
 				if (((proc_entry_ptr)position)->cpu_flags
 					& PROCENTRY_FLAG_EN)
 					++mp_naps;
 				break;
 			case 1: /* bus_entry */
 				++mp_nbusses;
 				break;
 			case 2: /* io_apic_entry */
 				if (((io_apic_entry_ptr)position)->apic_flags
 					& IOAPICENTRY_FLAG_EN)
 					io_apic_address[mp_napics++] =
 					    (vm_offset_t)((io_apic_entry_ptr)
 						position)->apic_address;
 				break;
 			case 3: /* int_entry */
 				++nintrs;
 				break;
 			case 4:	/* int_entry */
 				break;
 			default:
 				panic("mpfps Base Table HOSED!");
 				/* NOTREACHED */
 			}
 
 			totalSize -= basetable_entry_types[type].length;
 			(u_char*)position += basetable_entry_types[type].length;
 		}
 	}
 
 	/* qualify the numbers */
 	if (mp_naps > MAXCPU) {
 		printf("Warning: only using %d of %d available CPUs!\n",
 			MAXCPU, mp_naps);
 		mp_naps = MAXCPU;
 	}
 
 	/*
 	 * Count the BSP.
 	 * This is also used as a counter while starting the APs.
 	 */
 	mp_ncpus = 1;
 
 	--mp_naps;	/* subtract the BSP */
 }
 
 
 /*
  * 2nd pass on motherboard's Intel MP specification table.
  *
  * sets:
  *	boot_cpu_id
  *	ID_TO_IO(N), phy APIC ID to log CPU/IO table
  *	CPU_TO_ID(N), logical CPU to APIC ID table
  *	IO_TO_ID(N), logical IO to APIC ID table
  *	bus_data[N]
  *	io_apic_ints[N]
  */
 static int
 mptable_pass2(void)
 {
 	int     x;
 	mpcth_t cth;
 	int     totalSize;
 	void*   position;
 	int     count;
 	int     type;
 	int     apic, bus, cpu, intr;
 	int	i, j;
 	int	pgeflag;
 
 	POSTCODE(MPTABLE_PASS2_POST);
 
 	pgeflag = 0;		/* XXX - Not used under SMP yet.  */
 
 	MALLOC(io_apic_versions, u_int32_t *, sizeof(u_int32_t) * mp_napics,
 	    M_DEVBUF, M_WAITOK);
 	MALLOC(ioapic, volatile ioapic_t **, sizeof(ioapic_t *) * mp_napics,
 	    M_DEVBUF, M_WAITOK);
 	MALLOC(io_apic_ints, io_int *, sizeof(io_int) * (nintrs + 1),
 	    M_DEVBUF, M_WAITOK);
 	MALLOC(bus_data, bus_datum *, sizeof(bus_datum) * mp_nbusses,
 	    M_DEVBUF, M_WAITOK);
 
 	bzero(ioapic, sizeof(ioapic_t *) * mp_napics);
 
 	for (i = 0; i < mp_napics; i++) {
 		for (j = 0; j < mp_napics; j++) {
 			/* same page frame as a previous IO apic? */
 			if (((vm_offset_t)SMPpt[NPTEPG-2-j] & PG_FRAME) ==
 			    (io_apic_address[i] & PG_FRAME)) {
 				ioapic[i] = (ioapic_t *)((u_int)SMP_prvspace
 					+ (NPTEPG-2-j) * PAGE_SIZE
 					+ (io_apic_address[i] & PAGE_MASK));
 				break;
 			}
 			/* use this slot if available */
 			if (((vm_offset_t)SMPpt[NPTEPG-2-j] & PG_FRAME) == 0) {
 				SMPpt[NPTEPG-2-j] = (pt_entry_t)(PG_V | PG_RW |
 				    pgeflag | (io_apic_address[i] & PG_FRAME));
 				ioapic[i] = (ioapic_t *)((u_int)SMP_prvspace
 					+ (NPTEPG-2-j) * PAGE_SIZE
 					+ (io_apic_address[i] & PAGE_MASK));
 				break;
 			}
 		}
 	}
 
 	/* clear various tables */
 	for (x = 0; x < NAPICID; ++x) {
 		ID_TO_IO(x) = -1;	/* phy APIC ID to log CPU/IO table */
 		CPU_TO_ID(x) = -1;	/* logical CPU to APIC ID table */
 		IO_TO_ID(x) = -1;	/* logical IO to APIC ID table */
 	}
 
 	/* clear bus data table */
 	for (x = 0; x < mp_nbusses; ++x)
 		bus_data[x].bus_id = 0xff;
 
 	/* clear IO APIC INT table */
 	for (x = 0; x < (nintrs + 1); ++x) {
 		io_apic_ints[x].int_type = 0xff;
 		io_apic_ints[x].int_vector = 0xff;
 	}
 
 	/* setup the cpu/apic mapping arrays */
 	boot_cpu_id = -1;
 
 	/* record whether PIC or virtual-wire mode */
 	picmode = (mpfps->mpfb2 & 0x80) ? 1 : 0;
 
 	/* check for use of 'default' configuration */
 	if (MPFPS_MPFB1 != 0)
 		return MPFPS_MPFB1;	/* return default configuration type */
 
 	if ((cth = mpfps->pap) == 0)
 		panic("MP Configuration Table Header MISSING!");
 
 	/* walk the table, recording info of interest */
 	totalSize = cth->base_table_length - sizeof(struct MPCTH);
 	position = (u_char *) cth + sizeof(struct MPCTH);
 	count = cth->entry_count;
 	apic = bus = intr = 0;
 	cpu = 1;				/* pre-count the BSP */
 
 	while (count--) {
 		switch (type = *(u_char *) position) {
 		case 0:
 			if (processor_entry(position, cpu))
 				++cpu;
 			break;
 		case 1:
 			if (bus_entry(position, bus))
 				++bus;
 			break;
 		case 2:
 			if (io_apic_entry(position, apic))
 				++apic;
 			break;
 		case 3:
 			if (int_entry(position, intr))
 				++intr;
 			break;
 		case 4:
 			/* int_entry(position); */
 			break;
 		default:
 			panic("mpfps Base Table HOSED!");
 			/* NOTREACHED */
 		}
 
 		totalSize -= basetable_entry_types[type].length;
 		(u_char *) position += basetable_entry_types[type].length;
 	}
 
 	if (boot_cpu_id == -1)
 		panic("NO BSP found!");
 
 	/* report fact that its NOT a default configuration */
 	return 0;
 }
 
 
 void
 assign_apic_irq(int apic, int intpin, int irq)
 {
 	int x;
 	
 	if (int_to_apicintpin[irq].ioapic != -1)
 		panic("assign_apic_irq: inconsistent table");
 	
 	int_to_apicintpin[irq].ioapic = apic;
 	int_to_apicintpin[irq].int_pin = intpin;
 	int_to_apicintpin[irq].apic_address = ioapic[apic];
 	int_to_apicintpin[irq].redirindex = IOAPIC_REDTBL + 2 * intpin;
 	
 	for (x = 0; x < nintrs; x++) {
 		if ((io_apic_ints[x].int_type == 0 || 
 		     io_apic_ints[x].int_type == 3) &&
 		    io_apic_ints[x].int_vector == 0xff &&
 		    io_apic_ints[x].dst_apic_id == IO_TO_ID(apic) &&
 		    io_apic_ints[x].dst_apic_int == intpin)
 			io_apic_ints[x].int_vector = irq;
 	}
 }
 
 void
 revoke_apic_irq(int irq)
 {
 	int x;
 	int oldapic;
 	int oldintpin;
 	
 	if (int_to_apicintpin[irq].ioapic == -1)
 		panic("assign_apic_irq: inconsistent table");
 	
 	oldapic = int_to_apicintpin[irq].ioapic;
 	oldintpin = int_to_apicintpin[irq].int_pin;
 
 	int_to_apicintpin[irq].ioapic = -1;
 	int_to_apicintpin[irq].int_pin = 0;
 	int_to_apicintpin[irq].apic_address = NULL;
 	int_to_apicintpin[irq].redirindex = 0;
 	
 	for (x = 0; x < nintrs; x++) {
 		if ((io_apic_ints[x].int_type == 0 || 
 		     io_apic_ints[x].int_type == 3) &&
 		    io_apic_ints[x].int_vector == 0xff &&
 		    io_apic_ints[x].dst_apic_id == IO_TO_ID(oldapic) &&
 		    io_apic_ints[x].dst_apic_int == oldintpin)
 			io_apic_ints[x].int_vector = 0xff;
 	}
 }
 
 
 static void
 allocate_apic_irq(int intr)
 {
 	int apic;
 	int intpin;
 	int irq;
 	
 	if (io_apic_ints[intr].int_vector != 0xff)
 		return;		/* Interrupt handler already assigned */
 	
 	if (io_apic_ints[intr].int_type != 0 &&
 	    (io_apic_ints[intr].int_type != 3 ||
 	     (io_apic_ints[intr].dst_apic_id == IO_TO_ID(0) &&
 	      io_apic_ints[intr].dst_apic_int == 0)))
 		return;		/* Not INT or ExtInt on != (0, 0) */
 	
 	irq = 0;
 	while (irq < APIC_INTMAPSIZE &&
 	       int_to_apicintpin[irq].ioapic != -1)
 		irq++;
 	
 	if (irq >= APIC_INTMAPSIZE)
 		return;		/* No free interrupt handlers */
 	
 	apic = ID_TO_IO(io_apic_ints[intr].dst_apic_id);
 	intpin = io_apic_ints[intr].dst_apic_int;
 	
 	assign_apic_irq(apic, intpin, irq);
 	io_apic_setup_intpin(apic, intpin);
 }
 
 
 static void
 swap_apic_id(int apic, int oldid, int newid)
 {
 	int x;
 	int oapic;
 	
 
 	if (oldid == newid)
 		return;			/* Nothing to do */
 	
 	printf("Changing APIC ID for IO APIC #%d from %d to %d in MP table\n",
 	       apic, oldid, newid);
 	
 	/* Swap physical APIC IDs in interrupt entries */
 	for (x = 0; x < nintrs; x++) {
 		if (io_apic_ints[x].dst_apic_id == oldid)
 			io_apic_ints[x].dst_apic_id = newid;
 		else if (io_apic_ints[x].dst_apic_id == newid)
 			io_apic_ints[x].dst_apic_id = oldid;
 	}
 	
 	/* Swap physical APIC IDs in IO_TO_ID mappings */
 	for (oapic = 0; oapic < mp_napics; oapic++)
 		if (IO_TO_ID(oapic) == newid)
 			break;
 	
 	if (oapic < mp_napics) {
 		printf("Changing APIC ID for IO APIC #%d from "
 		       "%d to %d in MP table\n",
 		       oapic, newid, oldid);
 		IO_TO_ID(oapic) = oldid;
 	}
 	IO_TO_ID(apic) = newid;
 }
 
 
 static void
 fix_id_to_io_mapping(void)
 {
 	int x;
 
 	for (x = 0; x < NAPICID; x++)
 		ID_TO_IO(x) = -1;
 	
 	for (x = 0; x <= mp_naps; x++)
 		if (CPU_TO_ID(x) < NAPICID)
 			ID_TO_IO(CPU_TO_ID(x)) = x;
 	
 	for (x = 0; x < mp_napics; x++)
 		if (IO_TO_ID(x) < NAPICID)
 			ID_TO_IO(IO_TO_ID(x)) = x;
 }
 
 
 static int
 first_free_apic_id(void)
 {
 	int freeid, x;
 	
 	for (freeid = 0; freeid < NAPICID; freeid++) {
 		for (x = 0; x <= mp_naps; x++)
 			if (CPU_TO_ID(x) == freeid)
 				break;
 		if (x <= mp_naps)
 			continue;
 		for (x = 0; x < mp_napics; x++)
 			if (IO_TO_ID(x) == freeid)
 				break;
 		if (x < mp_napics)
 			continue;
 		return freeid;
 	}
 	return freeid;
 }
 
 
 static int
 io_apic_id_acceptable(int apic, int id)
 {
 	int cpu;		/* Logical CPU number */
 	int oapic;		/* Logical IO APIC number for other IO APIC */
 
 	if (id >= NAPICID)
 		return 0;	/* Out of range */
 	
 	for (cpu = 0; cpu <= mp_naps; cpu++)
 		if (CPU_TO_ID(cpu) == id)
 			return 0;	/* Conflict with CPU */
 	
 	for (oapic = 0; oapic < mp_napics && oapic < apic; oapic++)
 		if (IO_TO_ID(oapic) == id)
 			return 0;	/* Conflict with other APIC */
 	
 	return 1;		/* ID is acceptable for IO APIC */
 }
 
 
 /*
  * parse an Intel MP specification table
  */
 static void
 fix_mp_table(void)
 {
 	int	x;
 	int	id;
 	int	bus_0 = 0;	/* Stop GCC warning */
 	int	bus_pci = 0;	/* Stop GCC warning */
 	int	num_pci_bus;
 	int	apic;		/* IO APIC unit number */
 	int     freeid;		/* Free physical APIC ID */
 	int	physid;		/* Current physical IO APIC ID */
 
 	/*
 	 * Fix mis-numbering of the PCI bus and its INT entries if the BIOS
 	 * did it wrong.  The MP spec says that when more than 1 PCI bus
 	 * exists the BIOS must begin with bus entries for the PCI bus and use
 	 * actual PCI bus numbering.  This implies that when only 1 PCI bus
 	 * exists the BIOS can choose to ignore this ordering, and indeed many
 	 * MP motherboards do ignore it.  This causes a problem when the PCI
 	 * sub-system makes requests of the MP sub-system based on PCI bus
 	 * numbers.	So here we look for the situation and renumber the
 	 * busses and associated INTs in an effort to "make it right".
 	 */
 
 	/* find bus 0, PCI bus, count the number of PCI busses */
 	for (num_pci_bus = 0, x = 0; x < mp_nbusses; ++x) {
 		if (bus_data[x].bus_id == 0) {
 			bus_0 = x;
 		}
 		if (bus_data[x].bus_type == PCI) {
 			++num_pci_bus;
 			bus_pci = x;
 		}
 	}
 	/*
 	 * bus_0 == slot of bus with ID of 0
 	 * bus_pci == slot of last PCI bus encountered
 	 */
 
 	/* check the 1 PCI bus case for sanity */
 	/* if it is number 0 all is well */
 	if (num_pci_bus == 1 &&
 	    bus_data[bus_pci].bus_id != 0) {
 		
 		/* mis-numbered, swap with whichever bus uses slot 0 */
 
 		/* swap the bus entry types */
 		bus_data[bus_pci].bus_type = bus_data[bus_0].bus_type;
 		bus_data[bus_0].bus_type = PCI;
 
 		/* swap each relavant INTerrupt entry */
 		id = bus_data[bus_pci].bus_id;
 		for (x = 0; x < nintrs; ++x) {
 			if (io_apic_ints[x].src_bus_id == id) {
 				io_apic_ints[x].src_bus_id = 0;
 			}
 			else if (io_apic_ints[x].src_bus_id == 0) {
 				io_apic_ints[x].src_bus_id = id;
 			}
 		}
 	}
 
 	/* Assign IO APIC IDs.
 	 * 
 	 * First try the existing ID. If a conflict is detected, try
 	 * the ID in the MP table.  If a conflict is still detected, find
 	 * a free id.
 	 *
 	 * We cannot use the ID_TO_IO table before all conflicts has been
 	 * resolved and the table has been corrected.
 	 */
 	for (apic = 0; apic < mp_napics; ++apic) { /* For all IO APICs */
 		
 		/* First try to use the value set by the BIOS */
 		physid = io_apic_get_id(apic);
 		if (io_apic_id_acceptable(apic, physid)) {
 			if (IO_TO_ID(apic) != physid)
 				swap_apic_id(apic, IO_TO_ID(apic), physid);
 			continue;
 		}
 
 		/* Then check if the value in the MP table is acceptable */
 		if (io_apic_id_acceptable(apic, IO_TO_ID(apic)))
 			continue;
 
 		/* Last resort, find a free APIC ID and use it */
 		freeid = first_free_apic_id();
 		if (freeid >= NAPICID)
 			panic("No free physical APIC IDs found");
 		
 		if (io_apic_id_acceptable(apic, freeid)) {
 			swap_apic_id(apic, IO_TO_ID(apic), freeid);
 			continue;
 		}
 		panic("Free physical APIC ID not usable");
 	}
 	fix_id_to_io_mapping();
 
 	/* detect and fix broken Compaq MP table */
 	if (apic_int_type(0, 0) == -1) {
 		printf("APIC_IO: MP table broken: 8259->APIC entry missing!\n");
 		io_apic_ints[nintrs].int_type = 3;	/* ExtInt */
 		io_apic_ints[nintrs].int_vector = 0xff;	/* Unassigned */
 		/* XXX fixme, set src bus id etc, but it doesn't seem to hurt */
 		io_apic_ints[nintrs].dst_apic_id = IO_TO_ID(0);
 		io_apic_ints[nintrs].dst_apic_int = 0;	/* Pin 0 */
 		nintrs++;
 	}
 }
 
 
 /* Assign low level interrupt handlers */
 static void
 setup_apic_irq_mapping(void)
 {
 	int	x;
 	int	int_vector;
 
 	/* Clear array */
 	for (x = 0; x < APIC_INTMAPSIZE; x++) {
 		int_to_apicintpin[x].ioapic = -1;
 		int_to_apicintpin[x].int_pin = 0;
 		int_to_apicintpin[x].apic_address = NULL;
 		int_to_apicintpin[x].redirindex = 0;
 	}
 
 	/* First assign ISA/EISA interrupts */
 	for (x = 0; x < nintrs; x++) {
 		int_vector = io_apic_ints[x].src_bus_irq;
 		if (int_vector < APIC_INTMAPSIZE &&
 		    io_apic_ints[x].int_vector == 0xff && 
 		    int_to_apicintpin[int_vector].ioapic == -1 &&
 		    (apic_int_is_bus_type(x, ISA) ||
 		     apic_int_is_bus_type(x, EISA)) &&
 		    io_apic_ints[x].int_type == 0) {
 			assign_apic_irq(ID_TO_IO(io_apic_ints[x].dst_apic_id), 
 					io_apic_ints[x].dst_apic_int,
 					int_vector);
 		}
 	}
 
 	/* Assign ExtInt entry if no ISA/EISA interrupt 0 entry */
 	for (x = 0; x < nintrs; x++) {
 		if (io_apic_ints[x].dst_apic_int == 0 &&
 		    io_apic_ints[x].dst_apic_id == IO_TO_ID(0) &&
 		    io_apic_ints[x].int_vector == 0xff && 
 		    int_to_apicintpin[0].ioapic == -1 &&
 		    io_apic_ints[x].int_type == 3) {
 			assign_apic_irq(0, 0, 0);
 			break;
 		}
 	}
 	/* PCI interrupt assignment is deferred */
 }
 
 
 static int
 processor_entry(proc_entry_ptr entry, int cpu)
 {
 	/* check for usability */
 	if (!(entry->cpu_flags & PROCENTRY_FLAG_EN))
 		return 0;
 
 	if(entry->apic_id >= NAPICID)
 		panic("CPU APIC ID out of range (0..%d)", NAPICID - 1);
 	/* check for BSP flag */
 	if (entry->cpu_flags & PROCENTRY_FLAG_BP) {
 		boot_cpu_id = entry->apic_id;
 		CPU_TO_ID(0) = entry->apic_id;
 		ID_TO_CPU(entry->apic_id) = 0;
 		return 0;	/* its already been counted */
 	}
 
 	/* add another AP to list, if less than max number of CPUs */
 	else if (cpu < MAXCPU) {
 		CPU_TO_ID(cpu) = entry->apic_id;
 		ID_TO_CPU(entry->apic_id) = cpu;
 		return 1;
 	}
 
 	return 0;
 }
 
 
 static int
 bus_entry(bus_entry_ptr entry, int bus)
 {
 	int     x;
 	char    c, name[8];
 
 	/* encode the name into an index */
 	for (x = 0; x < 6; ++x) {
 		if ((c = entry->bus_type[x]) == ' ')
 			break;
 		name[x] = c;
 	}
 	name[x] = '\0';
 
 	if ((x = lookup_bus_type(name)) == UNKNOWN_BUSTYPE)
 		panic("unknown bus type: '%s'", name);
 
 	bus_data[bus].bus_id = entry->bus_id;
 	bus_data[bus].bus_type = x;
 
 	return 1;
 }
 
 
 static int
 io_apic_entry(io_apic_entry_ptr entry, int apic)
 {
 	if (!(entry->apic_flags & IOAPICENTRY_FLAG_EN))
 		return 0;
 
 	IO_TO_ID(apic) = entry->apic_id;
 	if (entry->apic_id < NAPICID)
 		ID_TO_IO(entry->apic_id) = apic;
 
 	return 1;
 }
 
 
 static int
 lookup_bus_type(char *name)
 {
 	int     x;
 
 	for (x = 0; x < MAX_BUSTYPE; ++x)
 		if (strcmp(bus_type_table[x].name, name) == 0)
 			return bus_type_table[x].type;
 
 	return UNKNOWN_BUSTYPE;
 }
 
 
 static int
 int_entry(int_entry_ptr entry, int intr)
 {
 	int apic;
 
 	io_apic_ints[intr].int_type = entry->int_type;
 	io_apic_ints[intr].int_flags = entry->int_flags;
 	io_apic_ints[intr].src_bus_id = entry->src_bus_id;
 	io_apic_ints[intr].src_bus_irq = entry->src_bus_irq;
 	if (entry->dst_apic_id == 255) {
 		/* This signal goes to all IO APICS.  Select an IO APIC
 		   with sufficient number of interrupt pins */
 		for (apic = 0; apic < mp_napics; apic++)
 			if (((io_apic_read(apic, IOAPIC_VER) & 
 			      IOART_VER_MAXREDIR) >> MAXREDIRSHIFT) >= 
 			    entry->dst_apic_int)
 				break;
 		if (apic < mp_napics)
 			io_apic_ints[intr].dst_apic_id = IO_TO_ID(apic);
 		else
 			io_apic_ints[intr].dst_apic_id = entry->dst_apic_id;
 	} else
 		io_apic_ints[intr].dst_apic_id = entry->dst_apic_id;
 	io_apic_ints[intr].dst_apic_int = entry->dst_apic_int;
 
 	return 1;
 }
 
 
 static int
 apic_int_is_bus_type(int intr, int bus_type)
 {
 	int     bus;
 
 	for (bus = 0; bus < mp_nbusses; ++bus)
 		if ((bus_data[bus].bus_id == io_apic_ints[intr].src_bus_id)
 		    && ((int) bus_data[bus].bus_type == bus_type))
 			return 1;
 
 	return 0;
 }
 
 
 /*
  * Given a traditional ISA INT mask, return an APIC mask.
  */
 u_int
 isa_apic_mask(u_int isa_mask)
 {
 	int isa_irq;
 	int apic_pin;
 
 #if defined(SKIP_IRQ15_REDIRECT)
 	if (isa_mask == (1 << 15)) {
 		printf("skipping ISA IRQ15 redirect\n");
 		return isa_mask;
 	}
 #endif  /* SKIP_IRQ15_REDIRECT */
 
 	isa_irq = ffs(isa_mask);		/* find its bit position */
 	if (isa_irq == 0)			/* doesn't exist */
 		return 0;
 	--isa_irq;				/* make it zero based */
 
 	apic_pin = isa_apic_irq(isa_irq);	/* look for APIC connection */
 	if (apic_pin == -1)
 		return 0;
 
 	return (1 << apic_pin);			/* convert pin# to a mask */
 }
 
 
 /*
  * Determine which APIC pin an ISA/EISA INT is attached to.
  */
 #define INTTYPE(I)	(io_apic_ints[(I)].int_type)
 #define INTPIN(I)	(io_apic_ints[(I)].dst_apic_int)
 #define INTIRQ(I)	(io_apic_ints[(I)].int_vector)
 #define INTAPIC(I)	(ID_TO_IO(io_apic_ints[(I)].dst_apic_id))
 
 #define SRCBUSIRQ(I)	(io_apic_ints[(I)].src_bus_irq)
 int
 isa_apic_irq(int isa_irq)
 {
 	int     intr;
 
 	for (intr = 0; intr < nintrs; ++intr) {		/* check each record */
 		if (INTTYPE(intr) == 0) {		/* standard INT */
 			if (SRCBUSIRQ(intr) == isa_irq) {
 				if (apic_int_is_bus_type(intr, ISA) ||
 			            apic_int_is_bus_type(intr, EISA)) {
 					if (INTIRQ(intr) == 0xff)
 						return -1; /* unassigned */
 					return INTIRQ(intr);	/* found */
 				}
 			}
 		}
 	}
 	return -1;					/* NOT found */
 }
 
 
 /*
  * Determine which APIC pin a PCI INT is attached to.
  */
 #define SRCBUSID(I)	(io_apic_ints[(I)].src_bus_id)
 #define SRCBUSDEVICE(I)	((io_apic_ints[(I)].src_bus_irq >> 2) & 0x1f)
 #define SRCBUSLINE(I)	(io_apic_ints[(I)].src_bus_irq & 0x03)
 int
 pci_apic_irq(int pciBus, int pciDevice, int pciInt)
 {
 	int     intr;
 
 	--pciInt;					/* zero based */
 
 	for (intr = 0; intr < nintrs; ++intr)		/* check each record */
 		if ((INTTYPE(intr) == 0)		/* standard INT */
 		    && (SRCBUSID(intr) == pciBus)
 		    && (SRCBUSDEVICE(intr) == pciDevice)
 		    && (SRCBUSLINE(intr) == pciInt))	/* a candidate IRQ */
 			if (apic_int_is_bus_type(intr, PCI)) {
 				if (INTIRQ(intr) == 0xff)
 					allocate_apic_irq(intr);
 				if (INTIRQ(intr) == 0xff)
 					return -1;	/* unassigned */
 				return INTIRQ(intr);	/* exact match */
 			}
 
 	return -1;					/* NOT found */
 }
 
 int
 next_apic_irq(int irq) 
 {
 	int intr, ointr;
 	int bus, bustype;
 
 	bus = 0;
 	bustype = 0;
 	for (intr = 0; intr < nintrs; intr++) {
 		if (INTIRQ(intr) != irq || INTTYPE(intr) != 0)
 			continue;
 		bus = SRCBUSID(intr);
 		bustype = apic_bus_type(bus);
 		if (bustype != ISA &&
 		    bustype != EISA &&
 		    bustype != PCI)
 			continue;
 		break;
 	}
 	if (intr >= nintrs) {
 		return -1;
 	}
 	for (ointr = intr + 1; ointr < nintrs; ointr++) {
 		if (INTTYPE(ointr) != 0)
 			continue;
 		if (bus != SRCBUSID(ointr))
 			continue;
 		if (bustype == PCI) {
 			if (SRCBUSDEVICE(intr) != SRCBUSDEVICE(ointr))
 				continue;
 			if (SRCBUSLINE(intr) != SRCBUSLINE(ointr))
 				continue;
 		}
 		if (bustype == ISA || bustype == EISA) {
 			if (SRCBUSIRQ(intr) != SRCBUSIRQ(ointr))
 				continue;
 		}
 		if (INTPIN(intr) == INTPIN(ointr))
 			continue;
 		break;
 	}
 	if (ointr >= nintrs) {
 		return -1;
 	}
 	return INTIRQ(ointr);
 }
 #undef SRCBUSLINE
 #undef SRCBUSDEVICE
 #undef SRCBUSID
 #undef SRCBUSIRQ
 
 #undef INTPIN
 #undef INTIRQ
 #undef INTAPIC
 #undef INTTYPE
 
 
 /*
  * Reprogram the MB chipset to NOT redirect an ISA INTerrupt.
  *
  * XXX FIXME:
  *  Exactly what this means is unclear at this point.  It is a solution
  *  for motherboards that redirect the MBIRQ0 pin.  Generically a motherboard
  *  could route any of the ISA INTs to upper (>15) IRQ values.  But most would
  *  NOT be redirected via MBIRQ0, thus "undirect()ing" them would NOT be an
  *  option.
  */
 int
 undirect_isa_irq(int rirq)
 {
 #if defined(READY)
 	if (bootverbose)
 	    printf("Freeing redirected ISA irq %d.\n", rirq);
 	/** FIXME: tickle the MB redirector chip */
 	return -1;
 #else
 	if (bootverbose)
 	    printf("Freeing (NOT implemented) redirected ISA irq %d.\n", rirq);
 	return 0;
 #endif  /* READY */
 }
 
 
 /*
  * Reprogram the MB chipset to NOT redirect a PCI INTerrupt
  */
 int
 undirect_pci_irq(int rirq)
 {
 #if defined(READY)
 	if (bootverbose)
 		printf("Freeing redirected PCI irq %d.\n", rirq);
 
 	/** FIXME: tickle the MB redirector chip */
 	return -1;
 #else
 	if (bootverbose)
 		printf("Freeing (NOT implemented) redirected PCI irq %d.\n",
 		       rirq);
 	return 0;
 #endif  /* READY */
 }
 
 
 /*
  * given a bus ID, return:
  *  the bus type if found
  *  -1 if NOT found
  */
 int
 apic_bus_type(int id)
 {
 	int     x;
 
 	for (x = 0; x < mp_nbusses; ++x)
 		if (bus_data[x].bus_id == id)
 			return bus_data[x].bus_type;
 
 	return -1;
 }
 
 
 /*
  * given a LOGICAL APIC# and pin#, return:
  *  the associated src bus ID if found
  *  -1 if NOT found
  */
 int
 apic_src_bus_id(int apic, int pin)
 {
 	int     x;
 
 	/* search each of the possible INTerrupt sources */
 	for (x = 0; x < nintrs; ++x)
 		if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
 		    (pin == io_apic_ints[x].dst_apic_int))
 			return (io_apic_ints[x].src_bus_id);
 
 	return -1;		/* NOT found */
 }
 
 
 /*
  * given a LOGICAL APIC# and pin#, return:
  *  the associated src bus IRQ if found
  *  -1 if NOT found
  */
 int
 apic_src_bus_irq(int apic, int pin)
 {
 	int     x;
 
 	for (x = 0; x < nintrs; x++)
 		if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
 		    (pin == io_apic_ints[x].dst_apic_int))
 			return (io_apic_ints[x].src_bus_irq);
 
 	return -1;		/* NOT found */
 }
 
 
 /*
  * given a LOGICAL APIC# and pin#, return:
  *  the associated INTerrupt type if found
  *  -1 if NOT found
  */
 int
 apic_int_type(int apic, int pin)
 {
 	int     x;
 
 	/* search each of the possible INTerrupt sources */
 	for (x = 0; x < nintrs; ++x)
 		if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
 		    (pin == io_apic_ints[x].dst_apic_int))
 			return (io_apic_ints[x].int_type);
 
 	return -1;		/* NOT found */
 }
 
 int 
 apic_irq(int apic, int pin)
 {
 	int x;
 	int res;
 
 	for (x = 0; x < nintrs; ++x)
 		if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
 		    (pin == io_apic_ints[x].dst_apic_int)) {
 			res = io_apic_ints[x].int_vector;
 			if (res == 0xff)
 				return -1;
 			if (apic != int_to_apicintpin[res].ioapic)
 				panic("apic_irq: inconsistent table");
 			if (pin != int_to_apicintpin[res].int_pin)
 				panic("apic_irq inconsistent table (2)");
 			return res;
 		}
 	return -1;
 }
 
 
 /*
  * given a LOGICAL APIC# and pin#, return:
  *  the associated trigger mode if found
  *  -1 if NOT found
  */
 int
 apic_trigger(int apic, int pin)
 {
 	int     x;
 
 	/* search each of the possible INTerrupt sources */
 	for (x = 0; x < nintrs; ++x)
 		if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
 		    (pin == io_apic_ints[x].dst_apic_int))
 			return ((io_apic_ints[x].int_flags >> 2) & 0x03);
 
 	return -1;		/* NOT found */
 }
 
 
 /*
  * given a LOGICAL APIC# and pin#, return:
  *  the associated 'active' level if found
  *  -1 if NOT found
  */
 int
 apic_polarity(int apic, int pin)
 {
 	int     x;
 
 	/* search each of the possible INTerrupt sources */
 	for (x = 0; x < nintrs; ++x)
 		if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
 		    (pin == io_apic_ints[x].dst_apic_int))
 			return (io_apic_ints[x].int_flags & 0x03);
 
 	return -1;		/* NOT found */
 }
 
 
 /*
  * set data according to MP defaults
  * FIXME: probably not complete yet...
  */
 static void
 default_mp_table(int type)
 {
 	int     ap_cpu_id;
 #if defined(APIC_IO)
 	int     io_apic_id;
 	int     pin;
 #endif	/* APIC_IO */
 
 #if 0
 	printf("  MP default config type: %d\n", type);
 	switch (type) {
 	case 1:
 		printf("   bus: ISA, APIC: 82489DX\n");
 		break;
 	case 2:
 		printf("   bus: EISA, APIC: 82489DX\n");
 		break;
 	case 3:
 		printf("   bus: EISA, APIC: 82489DX\n");
 		break;
 	case 4:
 		printf("   bus: MCA, APIC: 82489DX\n");
 		break;
 	case 5:
 		printf("   bus: ISA+PCI, APIC: Integrated\n");
 		break;
 	case 6:
 		printf("   bus: EISA+PCI, APIC: Integrated\n");
 		break;
 	case 7:
 		printf("   bus: MCA+PCI, APIC: Integrated\n");
 		break;
 	default:
 		printf("   future type\n");
 		break;
 		/* NOTREACHED */
 	}
 #endif	/* 0 */
 
 	boot_cpu_id = (lapic.id & APIC_ID_MASK) >> 24;
 	ap_cpu_id = (boot_cpu_id == 0) ? 1 : 0;
 
 	/* BSP */
 	CPU_TO_ID(0) = boot_cpu_id;
 	ID_TO_CPU(boot_cpu_id) = 0;
 
 	/* one and only AP */
 	CPU_TO_ID(1) = ap_cpu_id;
 	ID_TO_CPU(ap_cpu_id) = 1;
 
 #if defined(APIC_IO)
 	/* one and only IO APIC */
 	io_apic_id = (io_apic_read(0, IOAPIC_ID) & APIC_ID_MASK) >> 24;
 
 	/*
 	 * sanity check, refer to MP spec section 3.6.6, last paragraph
 	 * necessary as some hardware isn't properly setting up the IO APIC
 	 */
 #if defined(REALLY_ANAL_IOAPICID_VALUE)
 	if (io_apic_id != 2) {
 #else
 	if ((io_apic_id == 0) || (io_apic_id == 1) || (io_apic_id == 15)) {
 #endif	/* REALLY_ANAL_IOAPICID_VALUE */
 		io_apic_set_id(0, 2);
 		io_apic_id = 2;
 	}
 	IO_TO_ID(0) = io_apic_id;
 	ID_TO_IO(io_apic_id) = 0;
 #endif	/* APIC_IO */
 
 	/* fill out bus entries */
 	switch (type) {
 	case 1:
 	case 2:
 	case 3:
 	case 4:
 	case 5:
 	case 6:
 	case 7:
 		bus_data[0].bus_id = default_data[type - 1][1];
 		bus_data[0].bus_type = default_data[type - 1][2];
 		bus_data[1].bus_id = default_data[type - 1][3];
 		bus_data[1].bus_type = default_data[type - 1][4];
 		break;
 
 	/* case 4: case 7:		   MCA NOT supported */
 	default:		/* illegal/reserved */
 		panic("BAD default MP config: %d", type);
 		/* NOTREACHED */
 	}
 
 #if defined(APIC_IO)
 	/* general cases from MP v1.4, table 5-2 */
 	for (pin = 0; pin < 16; ++pin) {
 		io_apic_ints[pin].int_type = 0;
 		io_apic_ints[pin].int_flags = 0x05;	/* edge/active-hi */
 		io_apic_ints[pin].src_bus_id = 0;
 		io_apic_ints[pin].src_bus_irq = pin;	/* IRQ2 caught below */
 		io_apic_ints[pin].dst_apic_id = io_apic_id;
 		io_apic_ints[pin].dst_apic_int = pin;	/* 1-to-1 */
 	}
 
 	/* special cases from MP v1.4, table 5-2 */
 	if (type == 2) {
 		io_apic_ints[2].int_type = 0xff;	/* N/C */
 		io_apic_ints[13].int_type = 0xff;	/* N/C */
 #if !defined(APIC_MIXED_MODE)
 		/** FIXME: ??? */
 		panic("sorry, can't support type 2 default yet");
 #endif	/* APIC_MIXED_MODE */
 	}
 	else
 		io_apic_ints[2].src_bus_irq = 0;	/* ISA IRQ0 is on APIC INT 2 */
 
 	if (type == 7)
 		io_apic_ints[0].int_type = 0xff;	/* N/C */
 	else
 		io_apic_ints[0].int_type = 3;	/* vectored 8259 */
 #endif	/* APIC_IO */
 }
 
 
 /*
  * start each AP in our list
  */
 static int
 start_all_aps(u_int boot_addr)
 {
 	int     x, i, pg;
 	u_char  mpbiosreason;
 	u_long  mpbioswarmvec;
 	struct globaldata *gd;
 	char *stack;
 	uintptr_t kptbase;
 
 	POSTCODE(START_ALL_APS_POST);
 
 	mtx_init(&ap_boot_mtx, "ap boot", MTX_SPIN);
 
 	/* initialize BSP's local APIC */
 	apic_initialize();
 	bsp_apic_ready = 1;
 
 	/* install the AP 1st level boot code */
 	install_ap_tramp(boot_addr);
 
 
 	/* save the current value of the warm-start vector */
 	mpbioswarmvec = *((u_long *) WARMBOOT_OFF);
 #ifndef PC98
 	outb(CMOS_REG, BIOS_RESET);
 	mpbiosreason = inb(CMOS_DATA);
 #endif
 
 	/* set up temporary P==V mapping for AP boot */
 	/* XXX this is a hack, we should boot the AP on its own stack/PTD */
 	kptbase = (uintptr_t)(void *)KPTphys;
 	for (x = 0; x < NKPT; x++)
 		PTD[x] = (pd_entry_t)(PG_V | PG_RW |
 		    ((kptbase + x * PAGE_SIZE) & PG_FRAME));
 	invltlb();
 
 	/* start each AP */
 	for (x = 1; x <= mp_naps; ++x) {
 
 		/* This is a bit verbose, it will go away soon.  */
 
 		/* first page of AP's private space */
 		pg = x * i386_btop(sizeof(struct privatespace));
 
 		/* allocate a new private data page */
 		gd = (struct globaldata *)kmem_alloc(kernel_map, PAGE_SIZE);
 
 		/* wire it into the private page table page */
 		SMPpt[pg] = (pt_entry_t)(PG_V | PG_RW | vtophys(gd));
 
 		/* allocate and set up an idle stack data page */
 		stack = (char *)kmem_alloc(kernel_map, UPAGES*PAGE_SIZE);
 		for (i = 0; i < UPAGES; i++)
 			SMPpt[pg + 1 + i] = (pt_entry_t)
 			    (PG_V | PG_RW | vtophys(PAGE_SIZE * i + stack));
 
 		/* prime data page for it to use */
 		gd->gd_cpuid = x;
 		globaldata_register(gd);
 
 		/* setup a vector to our boot code */
 		*((volatile u_short *) WARMBOOT_OFF) = WARMBOOT_TARGET;
 		*((volatile u_short *) WARMBOOT_SEG) = (boot_addr >> 4);
 #ifndef PC98
 		outb(CMOS_REG, BIOS_RESET);
 		outb(CMOS_DATA, BIOS_WARM);	/* 'warm-start' */
 #endif
 
 		bootSTK = &SMP_prvspace[x].idlestack[UPAGES*PAGE_SIZE];
 		bootAP = x;
 
 		/* attempt to start the Application Processor */
 		CHECK_INIT(99);	/* setup checkpoints */
 		if (!start_ap(x, boot_addr)) {
 			printf("AP #%d (PHY# %d) failed!\n", x, CPU_TO_ID(x));
 			CHECK_PRINT("trace");	/* show checkpoints */
 			/* better panic as the AP may be running loose */
 			printf("panic y/n? [y] ");
 			if (cngetc() != 'n')
 				panic("bye-bye");
 		}
 		CHECK_PRINT("trace");		/* show checkpoints */
 
 		/* record its version info */
 		cpu_apic_versions[x] = cpu_apic_versions[0];
 
 		all_cpus |= (1 << x);		/* record AP in CPU map */
 	}
 
 	/* build our map of 'other' CPUs */
 	PCPU_SET(other_cpus, all_cpus & ~(1 << PCPU_GET(cpuid)));
 
 	/* fill in our (BSP) APIC version */
 	cpu_apic_versions[0] = lapic.version;
 
 	/* restore the warmstart vector */
 	*(u_long *) WARMBOOT_OFF = mpbioswarmvec;
 #ifndef PC98
 	outb(CMOS_REG, BIOS_RESET);
 	outb(CMOS_DATA, mpbiosreason);
 #endif
 
 	/*
 	 * Set up the idle context for the BSP.  Similar to above except
 	 * that some was done by locore, some by pmap.c and some is implicit
 	 * because the BSP is cpu#0 and the page is initially zero, and also
 	 * because we can refer to variables by name on the BSP..
 	 */
 
 	/* Allocate and setup BSP idle stack */
 	stack = (char *)kmem_alloc(kernel_map, UPAGES * PAGE_SIZE);
 	for (i = 0; i < UPAGES; i++)
 		SMPpt[1 + i] = (pt_entry_t)
 		    (PG_V | PG_RW | vtophys(PAGE_SIZE * i + stack));
 
 	for (x = 0; x < NKPT; x++)
 		PTD[x] = 0;
 	pmap_set_opt();
 
 	/* number of APs actually started */
 	return mp_ncpus - 1;
 }
 
 
 /*
  * load the 1st level AP boot code into base memory.
  */
 
 /* targets for relocation */
 extern void bigJump(void);
 extern void bootCodeSeg(void);
 extern void bootDataSeg(void);
 extern void MPentry(void);
 extern u_int MP_GDT;
 extern u_int mp_gdtbase;
 
 static void
 install_ap_tramp(u_int boot_addr)
 {
 	int     x;
 	int     size = *(int *) ((u_long) & bootMP_size);
 	u_char *src = (u_char *) ((u_long) bootMP);
 	u_char *dst = (u_char *) boot_addr + KERNBASE;
 	u_int   boot_base = (u_int) bootMP;
 	u_int8_t *dst8;
 	u_int16_t *dst16;
 	u_int32_t *dst32;
 
 	POSTCODE(INSTALL_AP_TRAMP_POST);
 
 	for (x = 0; x < size; ++x)
 		*dst++ = *src++;
 
 	/*
 	 * modify addresses in code we just moved to basemem. unfortunately we
 	 * need fairly detailed info about mpboot.s for this to work.  changes
 	 * to mpboot.s might require changes here.
 	 */
 
 	/* boot code is located in KERNEL space */
 	dst = (u_char *) boot_addr + KERNBASE;
 
 	/* modify the lgdt arg */
 	dst32 = (u_int32_t *) (dst + ((u_int) & mp_gdtbase - boot_base));
 	*dst32 = boot_addr + ((u_int) & MP_GDT - boot_base);
 
 	/* modify the ljmp target for MPentry() */
 	dst32 = (u_int32_t *) (dst + ((u_int) bigJump - boot_base) + 1);
 	*dst32 = ((u_int) MPentry - KERNBASE);
 
 	/* modify the target for boot code segment */
 	dst16 = (u_int16_t *) (dst + ((u_int) bootCodeSeg - boot_base));
 	dst8 = (u_int8_t *) (dst16 + 1);
 	*dst16 = (u_int) boot_addr & 0xffff;
 	*dst8 = ((u_int) boot_addr >> 16) & 0xff;
 
 	/* modify the target for boot data segment */
 	dst16 = (u_int16_t *) (dst + ((u_int) bootDataSeg - boot_base));
 	dst8 = (u_int8_t *) (dst16 + 1);
 	*dst16 = (u_int) boot_addr & 0xffff;
 	*dst8 = ((u_int) boot_addr >> 16) & 0xff;
 }
 
 
 /*
  * this function starts the AP (application processor) identified
  * by the APIC ID 'physicalCpu'.  It does quite a "song and dance"
  * to accomplish this.  This is necessary because of the nuances
  * of the different hardware we might encounter.  It ain't pretty,
  * but it seems to work.
  */
 static int
 start_ap(int logical_cpu, u_int boot_addr)
 {
 	int     physical_cpu;
 	int     vector;
 	int     cpus;
 	u_long  icr_lo, icr_hi;
 
 	POSTCODE(START_AP_POST);
 
 	/* get the PHYSICAL APIC ID# */
 	physical_cpu = CPU_TO_ID(logical_cpu);
 
 	/* calculate the vector */
 	vector = (boot_addr >> 12) & 0xff;
 
 	/* used as a watchpoint to signal AP startup */
 	cpus = mp_ncpus;
 
 	/*
 	 * first we do an INIT/RESET IPI this INIT IPI might be run, reseting
 	 * and running the target CPU. OR this INIT IPI might be latched (P5
 	 * bug), CPU waiting for STARTUP IPI. OR this INIT IPI might be
 	 * ignored.
 	 */
 
 	/* setup the address for the target AP */
 	icr_hi = lapic.icr_hi & ~APIC_ID_MASK;
 	icr_hi |= (physical_cpu << 24);
 	lapic.icr_hi = icr_hi;
 
 	/* do an INIT IPI: assert RESET */
 	icr_lo = lapic.icr_lo & 0xfff00000;
 	lapic.icr_lo = icr_lo | 0x0000c500;
 
 	/* wait for pending status end */
 	while (lapic.icr_lo & APIC_DELSTAT_MASK)
 		 /* spin */ ;
 
 	/* do an INIT IPI: deassert RESET */
 	lapic.icr_lo = icr_lo | 0x00008500;
 
 	/* wait for pending status end */
 	u_sleep(10000);		/* wait ~10mS */
 	while (lapic.icr_lo & APIC_DELSTAT_MASK)
 		 /* spin */ ;
 
 	/*
 	 * next we do a STARTUP IPI: the previous INIT IPI might still be
 	 * latched, (P5 bug) this 1st STARTUP would then terminate
 	 * immediately, and the previously started INIT IPI would continue. OR
 	 * the previous INIT IPI has already run. and this STARTUP IPI will
 	 * run. OR the previous INIT IPI was ignored. and this STARTUP IPI
 	 * will run.
 	 */
 
 	/* do a STARTUP IPI */
 	lapic.icr_lo = icr_lo | 0x00000600 | vector;
 	while (lapic.icr_lo & APIC_DELSTAT_MASK)
 		 /* spin */ ;
 	u_sleep(200);		/* wait ~200uS */
 
 	/*
 	 * finally we do a 2nd STARTUP IPI: this 2nd STARTUP IPI should run IF
 	 * the previous STARTUP IPI was cancelled by a latched INIT IPI. OR
 	 * this STARTUP IPI will be ignored, as only ONE STARTUP IPI is
 	 * recognized after hardware RESET or INIT IPI.
 	 */
 
 	lapic.icr_lo = icr_lo | 0x00000600 | vector;
 	while (lapic.icr_lo & APIC_DELSTAT_MASK)
 		 /* spin */ ;
 	u_sleep(200);		/* wait ~200uS */
 
 	/* wait for it to start */
 	set_apic_timer(5000000);/* == 5 seconds */
 	while (read_apic_timer())
 		if (mp_ncpus > cpus)
 			return 1;	/* return SUCCESS */
 
 	return 0;		/* return FAILURE */
 }
 
 /*
  * Flush the TLB on all other CPU's
  *
  * XXX: Needs to handshake and wait for completion before proceding.
  */
 void
 smp_invltlb(void)
 {
 #if defined(APIC_IO)
 	if (smp_started && invltlb_ok)
 		ipi_all_but_self(IPI_INVLTLB);
 #endif  /* APIC_IO */
 }
 
 void
 invlpg(u_int addr)
 {
 	__asm   __volatile("invlpg (%0)"::"r"(addr):"memory");
 
 	/* send a message to the other CPUs */
 	smp_invltlb();
 }
 
 void
 invltlb(void)
 {
 	u_long  temp;
 
 	/*
 	 * This should be implemented as load_cr3(rcr3()) when load_cr3() is
 	 * inlined.
 	 */
 	__asm __volatile("movl %%cr3, %0; movl %0, %%cr3":"=r"(temp) :: "memory");
 
 	/* send a message to the other CPUs */
 	smp_invltlb();
 }
 
 
 /*
  * This is called once the rest of the system is up and running and we're
  * ready to let the AP's out of the pen.
  */
 extern void	enable_sse(void);
 
 void
 ap_init(void)
 {
 	u_int	apic_id;
 
 	/* spin until all the AP's are ready */
 	while (!aps_ready)
 		/* spin */ ;
 
 	/*
 	 * Set curproc to our per-cpu idleproc so that mutexes have
 	 * something unique to lock with.
 	 */
 	PCPU_SET(curproc, PCPU_GET(idleproc));
 	PCPU_SET(spinlocks, NULL);
 
 	/* lock against other AP's that are waking up */
 	mtx_lock_spin(&ap_boot_mtx);
 
 	/* BSP may have changed PTD while we're waiting for the lock */
 	cpu_invltlb();
 
 	smp_cpus++;
 
 #if defined(I586_CPU) && !defined(NO_F00F_HACK)
 	lidt(&r_idt);
 #endif
 
 	/* Build our map of 'other' CPUs. */
 	PCPU_SET(other_cpus, all_cpus & ~(1 << PCPU_GET(cpuid)));
 
 	printf("SMP: AP CPU #%d Launched!\n", PCPU_GET(cpuid));
 
 	/* set up CPU registers and state */
 	cpu_setregs();
 
 	/* set up FPU state on the AP */
 	npxinit(__INITIAL_NPXCW__);
 
 	/* set up SSE registers */
 	enable_sse();
 
 	/* A quick check from sanity claus */
 	apic_id = (apic_id_to_logical[(lapic.id & 0x0f000000) >> 24]);
 	if (PCPU_GET(cpuid) != apic_id) {
 		printf("SMP: cpuid = %d\n", PCPU_GET(cpuid));
 		printf("SMP: apic_id = %d\n", apic_id);
 		printf("PTD[MPPTDI] = %p\n", (void *)PTD[MPPTDI]);
 		panic("cpuid mismatch! boom!!");
 	}
 
 	/* Init local apic for irq's */
 	apic_initialize();
 
 	/* Set memory range attributes for this CPU to match the BSP */
 	mem_range_AP_init();
 
 	/*
 	 * Activate smp_invltlb, although strictly speaking, this isn't
 	 * quite correct yet.  We should have a bitfield for cpus willing
 	 * to accept TLB flush IPI's or something and sync them.
 	 */
 	if (smp_cpus == mp_ncpus) {
 		invltlb_ok = 1;
 		smp_started = 1; /* enable IPI's, tlb shootdown, freezes etc */
 		smp_active = 1;	 /* historic */
 	}
 
 	/* let other AP's wake up now */
 	mtx_unlock_spin(&ap_boot_mtx);
 
 	/* wait until all the AP's are up */
 	while (smp_started == 0)
 		; /* nothing */
 
 	microuptime(PCPU_PTR(switchtime));
 	PCPU_SET(switchticks, ticks);
 
 	/* ok, now grab sched_lock and enter the scheduler */
 	enable_intr();
 	mtx_lock_spin(&sched_lock);
 	cpu_throw();	/* doesn't return */
 
 	panic("scheduler returned us to ap_init");
 }
 
 /*
  * For statclock, we send an IPI to all CPU's to have them call this
  * function.
  */
 void
 forwarded_statclock(struct trapframe frame)
 {
 
 	mtx_lock_spin(&sched_lock);
 	statclock_process(curproc, TRAPF_PC(&frame), TRAPF_USERMODE(&frame));
 	mtx_unlock_spin(&sched_lock);
 }
 
 void
 forward_statclock(void)
 {
 	int map;
 
 	CTR0(KTR_SMP, "forward_statclock");
 
 	if (!smp_started || !invltlb_ok || cold || panicstr)
 		return;
 
 	map = PCPU_GET(other_cpus) & ~stopped_cpus ;
 	if (map != 0)
 		ipi_selected(map, IPI_STATCLOCK);
 }
 
 /*
  * For each hardclock(), we send an IPI to all other CPU's to have them
  * execute this function.  It would be nice to reduce contention on
  * sched_lock if we could simply peek at the CPU to determine the user/kernel
  * state and call hardclock_process() on the CPU receiving the clock interrupt
  * and then just use a simple IPI to handle any ast's if needed.
  */
 void
 forwarded_hardclock(struct trapframe frame)
 {
 
 	mtx_lock_spin(&sched_lock);
 	hardclock_process(curproc, TRAPF_USERMODE(&frame));
 	mtx_unlock_spin(&sched_lock);
 }
 
 void 
 forward_hardclock(void)
 {
 	u_int map;
 
 	CTR0(KTR_SMP, "forward_hardclock");
 
 	if (!smp_started || !invltlb_ok || cold || panicstr)
 		return;
 
 	map = PCPU_GET(other_cpus) & ~stopped_cpus ;
 	if (map != 0)
 		ipi_selected(map, IPI_HARDCLOCK);
 }
 
 #ifdef APIC_INTR_REORDER
 /*
  *	Maintain mapping from softintr vector to isr bit in local apic.
  */
 void
 set_lapic_isrloc(int intr, int vector)
 {
 	if (intr < 0 || intr > 32)
 		panic("set_apic_isrloc: bad intr argument: %d",intr);
 	if (vector < ICU_OFFSET || vector > 255)
 		panic("set_apic_isrloc: bad vector argument: %d",vector);
 	apic_isrbit_location[intr].location = &lapic.isr0 + ((vector>>5)<<2);
 	apic_isrbit_location[intr].bit = (1<<(vector & 31));
 }
 #endif
 
 /*
  * send an IPI to a set of cpus.
  */
 void
 ipi_selected(u_int32_t cpus, u_int ipi)
 {
 
 	CTR2(KTR_SMP, __func__ ": cpus: %x ipi: %x", cpus, ipi);
 	selected_apic_ipi(cpus, ipi, APIC_DELMODE_FIXED);
 }
 
 /*
  * send an IPI INTerrupt containing 'vector' to all CPUs, including myself
  */
 void
 ipi_all(u_int ipi)
 {
 
 	CTR1(KTR_SMP, __func__ ": ipi: %x", ipi);
 	apic_ipi(APIC_DEST_ALLISELF, ipi, APIC_DELMODE_FIXED); 
 }
 
 /*
  * send an IPI to all CPUs EXCEPT myself
  */
 void
 ipi_all_but_self(u_int ipi)
 {
 
 	CTR1(KTR_SMP, __func__ ": ipi: %x", ipi);
 	apic_ipi(APIC_DEST_ALLESELF, ipi, APIC_DELMODE_FIXED); 
 }
 
 /*
  * send an IPI to myself
  */
 void
 ipi_self(u_int ipi)
 {
 
 	CTR1(KTR_SMP, __func__ ": ipi: %x", ipi);
 	apic_ipi(APIC_DEST_SELF, ipi, APIC_DELMODE_FIXED); 
 }
 
 void
 release_aps(void *dummy __unused)
 {
 	atomic_store_rel_int(&aps_ready, 1);
 }
 
 SYSINIT(start_aps, SI_SUB_SMP, SI_ORDER_FIRST, release_aps, NULL);
Index: head/sys/i386/i386/pmap.c
===================================================================
--- head/sys/i386/i386/pmap.c	(revision 82308)
+++ head/sys/i386/i386/pmap.c	(revision 82309)
@@ -1,3397 +1,3398 @@
 /*
  * Copyright (c) 1991 Regents of the University of California.
  * All rights reserved.
  * Copyright (c) 1994 John S. Dyson
  * All rights reserved.
  * Copyright (c) 1994 David Greenman
  * All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * the Systems Programming Group of the University of Utah Computer
  * Science Department and William Jolitz of UUNET Technologies Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	from:	@(#)pmap.c	7.7 (Berkeley)	5/12/91
  * $FreeBSD$
  */
 
 /*
  *	Manages physical address maps.
  *
  *	In addition to hardware address maps, this
  *	module is called upon to provide software-use-only
  *	maps which may or may not be stored in the same
  *	form as hardware maps.  These pseudo-maps are
  *	used to store intermediate results from copy
  *	operations to and from address spaces.
  *
  *	Since the information managed by this module is
  *	also stored by the logical address mapping module,
  *	this module may throw away valid virtual-to-physical
  *	mappings at almost any time.  However, invalidations
  *	of virtual-to-physical mappings must be done as
  *	requested.
  *
  *	In order to cope with hardware architectures which
  *	make virtual-to-physical map invalidates expensive,
  *	this module may delay invalidate or reduced protection
  *	operations until such time as they are actually
  *	necessary.  This module is given full information as
  *	to which processors are currently using which maps,
  *	and to when physical maps must be made correct.
  */
 
 #include "opt_disable_pse.h"
 #include "opt_pmap.h"
 #include "opt_msgbuf.h"
+#include "opt_upages.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/mman.h>
 #include <sys/msgbuf.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/sx.h>
 #include <sys/user.h>
 #include <sys/vmmeter.h>
 #include <sys/sysctl.h>
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_page.h>
 #include <vm/vm_map.h>
 #include <vm/vm_object.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_pageout.h>
 #include <vm/vm_pager.h>
 #include <vm/vm_zone.h>
 
 #include <machine/cputypes.h>
 #include <machine/md_var.h>
 #include <machine/specialreg.h>
 #if defined(SMP) || defined(APIC_IO)
 #include <machine/smp.h>
 #include <machine/apic.h>
 #include <machine/segments.h>
 #include <machine/tss.h>
 #include <machine/globaldata.h>
 #endif /* SMP || APIC_IO */
 
 #define PMAP_KEEP_PDIRS
 #ifndef PMAP_SHPGPERPROC
 #define PMAP_SHPGPERPROC 200
 #endif
 
 #if defined(DIAGNOSTIC)
 #define PMAP_DIAGNOSTIC
 #endif
 
 #define MINPV 2048
 
 #if !defined(PMAP_DIAGNOSTIC)
 #define PMAP_INLINE __inline
 #else
 #define PMAP_INLINE
 #endif
 
 /*
  * Get PDEs and PTEs for user/kernel address space
  */
 #define	pmap_pde(m, v)	(&((m)->pm_pdir[(vm_offset_t)(v) >> PDRSHIFT]))
 #define pdir_pde(m, v) (m[(vm_offset_t)(v) >> PDRSHIFT])
 
 #define pmap_pde_v(pte)		((*(int *)pte & PG_V) != 0)
 #define pmap_pte_w(pte)		((*(int *)pte & PG_W) != 0)
 #define pmap_pte_m(pte)		((*(int *)pte & PG_M) != 0)
 #define pmap_pte_u(pte)		((*(int *)pte & PG_A) != 0)
 #define pmap_pte_v(pte)		((*(int *)pte & PG_V) != 0)
 
 #define pmap_pte_set_w(pte, v) ((v)?(*(int *)pte |= PG_W):(*(int *)pte &= ~PG_W))
 #define pmap_pte_set_prot(pte, v) ((*(int *)pte &= ~PG_PROT), (*(int *)pte |= (v)))
 
 /*
  * Given a map and a machine independent protection code,
  * convert to a vax protection code.
  */
 #define pte_prot(m, p)	(protection_codes[p])
 static int protection_codes[8];
 
 static struct pmap kernel_pmap_store;
 pmap_t kernel_pmap;
 LIST_HEAD(pmaplist, pmap);
 struct pmaplist allpmaps;
 
 vm_offset_t avail_start;	/* PA of first available physical page */
 vm_offset_t avail_end;		/* PA of last available physical page */
 vm_offset_t virtual_avail;	/* VA of first avail page (after kernel bss) */
 vm_offset_t virtual_end;	/* VA of last avail page (end of kernel AS) */
 static boolean_t pmap_initialized = FALSE;	/* Has pmap_init completed? */
 static int pgeflag;		/* PG_G or-in */
 static int pseflag;		/* PG_PS or-in */
 
 static vm_object_t kptobj;
 
 static int nkpt;
 vm_offset_t kernel_vm_end;
 
 /*
  * Data for the pv entry allocation mechanism
  */
 static vm_zone_t pvzone;
 static struct vm_zone pvzone_store;
 static struct vm_object pvzone_obj;
 static int pv_entry_count=0, pv_entry_max=0, pv_entry_high_water=0;
 static int pmap_pagedaemon_waken = 0;
 static struct pv_entry *pvinit;
 
 /*
  * All those kernel PT submaps that BSD is so fond of
  */
 pt_entry_t *CMAP1 = 0;
 static pt_entry_t *CMAP2, *ptmmap;
 caddr_t CADDR1 = 0, ptvmmap = 0;
 static caddr_t CADDR2;
 static pt_entry_t *msgbufmap;
 struct msgbuf *msgbufp=0;
 
 /*
  * Crashdump maps.
  */
 static pt_entry_t *pt_crashdumpmap;
 static caddr_t crashdumpmap;
 
 #ifdef SMP
 extern pt_entry_t *SMPpt;
 #endif
 static pt_entry_t *PMAP1 = 0;
 static unsigned *PADDR1 = 0;
 
 static PMAP_INLINE void	free_pv_entry __P((pv_entry_t pv));
 static unsigned * get_ptbase __P((pmap_t pmap));
 static pv_entry_t get_pv_entry __P((void));
 static void	i386_protection_init __P((void));
 static __inline void	pmap_changebit __P((vm_page_t m, int bit, boolean_t setem));
 
 static void	pmap_remove_all __P((vm_page_t m));
 static vm_page_t pmap_enter_quick __P((pmap_t pmap, vm_offset_t va,
 				      vm_page_t m, vm_page_t mpte));
 static int pmap_remove_pte __P((struct pmap *pmap, unsigned *ptq,
 					vm_offset_t sva));
 static void pmap_remove_page __P((struct pmap *pmap, vm_offset_t va));
 static int pmap_remove_entry __P((struct pmap *pmap, vm_page_t m,
 					vm_offset_t va));
 static boolean_t pmap_testbit __P((vm_page_t m, int bit));
 static void pmap_insert_entry __P((pmap_t pmap, vm_offset_t va,
 		vm_page_t mpte, vm_page_t m));
 
 static vm_page_t pmap_allocpte __P((pmap_t pmap, vm_offset_t va));
 
 static int pmap_release_free_page __P((pmap_t pmap, vm_page_t p));
 static vm_page_t _pmap_allocpte __P((pmap_t pmap, unsigned ptepindex));
 static unsigned * pmap_pte_quick __P((pmap_t pmap, vm_offset_t va));
 static vm_page_t pmap_page_lookup __P((vm_object_t object, vm_pindex_t pindex));
 static int pmap_unuse_pt __P((pmap_t, vm_offset_t, vm_page_t));
 static vm_offset_t pmap_kmem_choose(vm_offset_t addr);
 
 static unsigned pdir4mb;
 
 /*
  *	Routine:	pmap_pte
  *	Function:
  *		Extract the page table entry associated
  *		with the given map/virtual_address pair.
  */
 
 PMAP_INLINE unsigned *
 pmap_pte(pmap, va)
 	register pmap_t pmap;
 	vm_offset_t va;
 {
 	unsigned *pdeaddr;
 
 	if (pmap) {
 		pdeaddr = (unsigned *) pmap_pde(pmap, va);
 		if (*pdeaddr & PG_PS)
 			return pdeaddr;
 		if (*pdeaddr) {
 			return get_ptbase(pmap) + i386_btop(va);
 		}
 	}
 	return (0);
 }
 
 /*
  * Move the kernel virtual free pointer to the next
  * 4MB.  This is used to help improve performance
  * by using a large (4MB) page for much of the kernel
  * (.text, .data, .bss)
  */
 static vm_offset_t
 pmap_kmem_choose(vm_offset_t addr)
 {
 	vm_offset_t newaddr = addr;
 #ifndef DISABLE_PSE
 	if (cpu_feature & CPUID_PSE) {
 		newaddr = (addr + (NBPDR - 1)) & ~(NBPDR - 1);
 	}
 #endif
 	return newaddr;
 }
 
 /*
  *	Bootstrap the system enough to run with virtual memory.
  *
  *	On the i386 this is called after mapping has already been enabled
  *	and just syncs the pmap module with what has already been done.
  *	[We can't call it easily with mapping off since the kernel is not
  *	mapped with PA == VA, hence we would have to relocate every address
  *	from the linked base (virtual) address "KERNBASE" to the actual
  *	(physical) address starting relative to 0]
  */
 void
 pmap_bootstrap(firstaddr, loadaddr)
 	vm_offset_t firstaddr;
 	vm_offset_t loadaddr;
 {
 	vm_offset_t va;
 	pt_entry_t *pte;
 	int i;
 
 	avail_start = firstaddr;
 
 	/*
 	 * XXX The calculation of virtual_avail is wrong. It's NKPT*PAGE_SIZE too
 	 * large. It should instead be correctly calculated in locore.s and
 	 * not based on 'first' (which is a physical address, not a virtual
 	 * address, for the start of unused physical memory). The kernel
 	 * page tables are NOT double mapped and thus should not be included
 	 * in this calculation.
 	 */
 	virtual_avail = (vm_offset_t) KERNBASE + firstaddr;
 	virtual_avail = pmap_kmem_choose(virtual_avail);
 
 	virtual_end = VM_MAX_KERNEL_ADDRESS;
 
 	/*
 	 * Initialize protection array.
 	 */
 	i386_protection_init();
 
 	/*
 	 * The kernel's pmap is statically allocated so we don't have to use
 	 * pmap_create, which is unlikely to work correctly at this part of
 	 * the boot sequence (XXX and which no longer exists).
 	 */
 	kernel_pmap = &kernel_pmap_store;
 
 	kernel_pmap->pm_pdir = (pd_entry_t *) (KERNBASE + (u_int)IdlePTD);
 	kernel_pmap->pm_count = 1;
 	kernel_pmap->pm_active = -1;	/* don't allow deactivation */
 	TAILQ_INIT(&kernel_pmap->pm_pvlist);
 	LIST_INIT(&allpmaps);
 	LIST_INSERT_HEAD(&allpmaps, kernel_pmap, pm_list);
 	nkpt = NKPT;
 
 	/*
 	 * Reserve some special page table entries/VA space for temporary
 	 * mapping of pages.
 	 */
 #define	SYSMAP(c, p, v, n)	\
 	v = (c)va; va += ((n)*PAGE_SIZE); p = pte; pte += (n);
 
 	va = virtual_avail;
 	pte = (pt_entry_t *) pmap_pte(kernel_pmap, va);
 
 	/*
 	 * CMAP1/CMAP2 are used for zeroing and copying pages.
 	 */
 	SYSMAP(caddr_t, CMAP1, CADDR1, 1)
 	SYSMAP(caddr_t, CMAP2, CADDR2, 1)
 
 	/*
 	 * Crashdump maps.
 	 */
 	SYSMAP(caddr_t, pt_crashdumpmap, crashdumpmap, MAXDUMPPGS);
 
 	/*
 	 * ptvmmap is used for reading arbitrary physical pages via /dev/mem.
 	 * XXX ptmmap is not used.
 	 */
 	SYSMAP(caddr_t, ptmmap, ptvmmap, 1)
 
 	/*
 	 * msgbufp is used to map the system message buffer.
 	 * XXX msgbufmap is not used.
 	 */
 	SYSMAP(struct msgbuf *, msgbufmap, msgbufp,
 	       atop(round_page(MSGBUF_SIZE)))
 
 	/*
 	 * ptemap is used for pmap_pte_quick
 	 */
 	SYSMAP(unsigned *, PMAP1, PADDR1, 1);
 
 	virtual_avail = va;
 
 	*(int *) CMAP1 = *(int *) CMAP2 = 0;
 	for (i = 0; i < NKPT; i++)
 		PTD[i] = 0;
 
 	pgeflag = 0;
 #if !defined(SMP)			/* XXX - see also mp_machdep.c */
 	if (cpu_feature & CPUID_PGE) {
 		pgeflag = PG_G;
 	}
 #endif
 	
 /*
  * Initialize the 4MB page size flag
  */
 	pseflag = 0;
 /*
  * The 4MB page version of the initial
  * kernel page mapping.
  */
 	pdir4mb = 0;
 
 #if !defined(DISABLE_PSE)
 	if (cpu_feature & CPUID_PSE) {
 		unsigned ptditmp;
 		/*
 		 * Note that we have enabled PSE mode
 		 */
 		pseflag = PG_PS;
 		ptditmp = *((unsigned *)PTmap + i386_btop(KERNBASE));
 		ptditmp &= ~(NBPDR - 1);
 		ptditmp |= PG_V | PG_RW | PG_PS | PG_U | pgeflag;
 		pdir4mb = ptditmp;
 
 #if !defined(SMP)
 		/*
 		 * Enable the PSE mode.
 		 */
 		load_cr4(rcr4() | CR4_PSE);
 
 		/*
 		 * We can do the mapping here for the single processor
 		 * case.  We simply ignore the old page table page from
 		 * now on.
 		 */
 		/*
 		 * For SMP, we still need 4K pages to bootstrap APs,
 		 * PSE will be enabled as soon as all APs are up.
 		 */
 		PTD[KPTDI] = (pd_entry_t) ptditmp;
 		kernel_pmap->pm_pdir[KPTDI] = (pd_entry_t) ptditmp;
 		invltlb();
 #endif
 	}
 #endif
 
 #ifdef SMP
 	if (cpu_apic_address == 0)
 		panic("pmap_bootstrap: no local apic! (non-SMP hardware?)");
 
 	/* local apic is mapped on last page */
 	SMPpt[NPTEPG - 1] = (pt_entry_t)(PG_V | PG_RW | PG_N | pgeflag |
 	    (cpu_apic_address & PG_FRAME));
 #endif
 
 	invltlb();
 }
 
 #ifdef SMP
 /*
  * Set 4mb pdir for mp startup
  */
 void
 pmap_set_opt(void)
 {
 	if (pseflag && (cpu_feature & CPUID_PSE)) {
 		load_cr4(rcr4() | CR4_PSE);
 		if (pdir4mb && PCPU_GET(cpuid) == 0) {	/* only on BSP */
 			kernel_pmap->pm_pdir[KPTDI] =
 			    PTD[KPTDI] = (pd_entry_t)pdir4mb;
 			cpu_invltlb();
 		}
 	}
 }
 #endif
 
 /*
  *	Initialize the pmap module.
  *	Called by vm_init, to initialize any structures that the pmap
  *	system needs to map virtual memory.
  *	pmap_init has been enhanced to support in a fairly consistant
  *	way, discontiguous physical memory.
  */
 void
 pmap_init(phys_start, phys_end)
 	vm_offset_t phys_start, phys_end;
 {
 	int i;
 	int initial_pvs;
 
 	/*
 	 * object for kernel page table pages
 	 */
 	kptobj = vm_object_allocate(OBJT_DEFAULT, NKPDE);
 
 	/*
 	 * Allocate memory for random pmap data structures.  Includes the
 	 * pv_head_table.
 	 */
 
 	for(i = 0; i < vm_page_array_size; i++) {
 		vm_page_t m;
 
 		m = &vm_page_array[i];
 		TAILQ_INIT(&m->md.pv_list);
 		m->md.pv_list_count = 0;
 	}
 
 	/*
 	 * init the pv free list
 	 */
 	initial_pvs = vm_page_array_size;
 	if (initial_pvs < MINPV)
 		initial_pvs = MINPV;
 	pvzone = &pvzone_store;
 	pvinit = (struct pv_entry *) kmem_alloc(kernel_map,
 		initial_pvs * sizeof (struct pv_entry));
 	zbootinit(pvzone, "PV ENTRY", sizeof (struct pv_entry), pvinit,
 	    vm_page_array_size);
 
 	/*
 	 * Now it is safe to enable pv_table recording.
 	 */
 	pmap_initialized = TRUE;
 }
 
 /*
  * Initialize the address space (zone) for the pv_entries.  Set a
  * high water mark so that the system can recover from excessive
  * numbers of pv entries.
  */
 void
 pmap_init2()
 {
 	int shpgperproc = PMAP_SHPGPERPROC;
 
 	TUNABLE_INT_FETCH("vm.pmap.shpgperproc", &shpgperproc);
 	pv_entry_max = shpgperproc * maxproc + vm_page_array_size;
 	pv_entry_high_water = 9 * (pv_entry_max / 10);
 	zinitna(pvzone, &pvzone_obj, NULL, 0, pv_entry_max, ZONE_INTERRUPT, 1);
 }
 
 
 /***************************************************
  * Low level helper routines.....
  ***************************************************/
 
 #if defined(PMAP_DIAGNOSTIC)
 
 /*
  * This code checks for non-writeable/modified pages.
  * This should be an invalid condition.
  */
 static int
 pmap_nw_modified(pt_entry_t ptea)
 {
 	int pte;
 
 	pte = (int) ptea;
 
 	if ((pte & (PG_M|PG_RW)) == PG_M)
 		return 1;
 	else
 		return 0;
 }
 #endif
 
 
 /*
  * this routine defines the region(s) of memory that should
  * not be tested for the modified bit.
  */
 static PMAP_INLINE int
 pmap_track_modified(vm_offset_t va)
 {
 	if ((va < kmi.clean_sva) || (va >= kmi.clean_eva)) 
 		return 1;
 	else
 		return 0;
 }
 
 static PMAP_INLINE void
 invltlb_1pg(vm_offset_t va)
 {
 #ifdef I386_CPU
 	invltlb();
 #else
 	invlpg(va);
 #endif
 }
 
 static __inline void
 pmap_TLB_invalidate(pmap_t pmap, vm_offset_t va)
 {
 #if defined(SMP)
 	if (pmap->pm_active & (1 << PCPU_GET(cpuid)))
 		cpu_invlpg((void *)va);
 	if (pmap->pm_active & PCPU_GET(other_cpus))
 		smp_invltlb();
 #else
 	if (pmap->pm_active)
 		invltlb_1pg(va);
 #endif
 }
 
 static __inline void
 pmap_TLB_invalidate_all(pmap_t pmap)
 {
 #if defined(SMP)
 	if (pmap->pm_active & (1 << PCPU_GET(cpuid)))
 		cpu_invltlb();
 	if (pmap->pm_active & PCPU_GET(other_cpus))
 		smp_invltlb();
 #else
 	if (pmap->pm_active)
 		invltlb();
 #endif
 }
 
 static unsigned *
 get_ptbase(pmap)
 	pmap_t pmap;
 {
 	unsigned frame = (unsigned) pmap->pm_pdir[PTDPTDI] & PG_FRAME;
 
 	/* are we current address space or kernel? */
 	if (pmap == kernel_pmap || frame == (((unsigned) PTDpde) & PG_FRAME)) {
 		return (unsigned *) PTmap;
 	}
 	/* otherwise, we are alternate address space */
 	if (frame != (((unsigned) APTDpde) & PG_FRAME)) {
 		APTDpde = (pd_entry_t) (frame | PG_RW | PG_V);
 #if defined(SMP)
 		/* The page directory is not shared between CPUs */
 		cpu_invltlb();
 #else
 		invltlb();
 #endif
 	}
 	return (unsigned *) APTmap;
 }
 
 /*
  * Super fast pmap_pte routine best used when scanning
  * the pv lists.  This eliminates many coarse-grained
  * invltlb calls.  Note that many of the pv list
  * scans are across different pmaps.  It is very wasteful
  * to do an entire invltlb for checking a single mapping.
  */
 
 static unsigned * 
 pmap_pte_quick(pmap, va)
 	register pmap_t pmap;
 	vm_offset_t va;
 {
 	unsigned pde, newpf;
 	if ((pde = (unsigned) pmap->pm_pdir[va >> PDRSHIFT]) != 0) {
 		unsigned frame = (unsigned) pmap->pm_pdir[PTDPTDI] & PG_FRAME;
 		unsigned index = i386_btop(va);
 		/* are we current address space or kernel? */
 		if ((pmap == kernel_pmap) ||
 			(frame == (((unsigned) PTDpde) & PG_FRAME))) {
 			return (unsigned *) PTmap + index;
 		}
 		newpf = pde & PG_FRAME;
 		if ( ((* (unsigned *) PMAP1) & PG_FRAME) != newpf) {
 			* (unsigned *) PMAP1 = newpf | PG_RW | PG_V;
 			invltlb_1pg((vm_offset_t) PADDR1);
 		}
 		return PADDR1 + ((unsigned) index & (NPTEPG - 1));
 	}
 	return (0);
 }
 
 /*
  *	Routine:	pmap_extract
  *	Function:
  *		Extract the physical page address associated
  *		with the given map/virtual_address pair.
  */
 vm_offset_t 
 pmap_extract(pmap, va)
 	register pmap_t pmap;
 	vm_offset_t va;
 {
 	vm_offset_t rtval;
 	vm_offset_t pdirindex;
 	pdirindex = va >> PDRSHIFT;
 	if (pmap && (rtval = (unsigned) pmap->pm_pdir[pdirindex])) {
 		unsigned *pte;
 		if ((rtval & PG_PS) != 0) {
 			rtval &= ~(NBPDR - 1);
 			rtval |= va & (NBPDR - 1);
 			return rtval;
 		}
 		pte = get_ptbase(pmap) + i386_btop(va);
 		rtval = ((*pte & PG_FRAME) | (va & PAGE_MASK));
 		return rtval;
 	}
 	return 0;
 
 }
 
 /***************************************************
  * Low level mapping routines.....
  ***************************************************/
 
 /*
  * add a wired page to the kva
  * note that in order for the mapping to take effect -- you
  * should do a invltlb after doing the pmap_kenter...
  */
 PMAP_INLINE void 
 pmap_kenter(va, pa)
 	vm_offset_t va;
 	register vm_offset_t pa;
 {
 	register unsigned *pte;
 	unsigned npte, opte;
 
 	npte = pa | PG_RW | PG_V | pgeflag;
 	pte = (unsigned *)vtopte(va);
 	opte = *pte;
 	*pte = npte;
 	/*if (opte)*/
 		invltlb_1pg(va);	/* XXX what about SMP? */
 }
 
 /*
  * remove a page from the kernel pagetables
  */
 PMAP_INLINE void
 pmap_kremove(va)
 	vm_offset_t va;
 {
 	register unsigned *pte;
 
 	pte = (unsigned *)vtopte(va);
 	*pte = 0;
 	invltlb_1pg(va);	/* XXX what about SMP? */
 }
 
 /*
  *	Used to map a range of physical addresses into kernel
  *	virtual address space.
  *
  *	The value passed in '*virt' is a suggested virtual address for
  *	the mapping. Architectures which can support a direct-mapped
  *	physical to virtual region can return the appropriate address
  *	within that region, leaving '*virt' unchanged. Other
  *	architectures should map the pages starting at '*virt' and
  *	update '*virt' with the first usable address after the mapped
  *	region.
  */
 vm_offset_t
 pmap_map(virt, start, end, prot)
 	vm_offset_t *virt;
 	vm_offset_t start;
 	vm_offset_t end;
 	int prot;
 {
 	vm_offset_t sva = *virt;
 	vm_offset_t va = sva;
 	while (start < end) {
 		pmap_kenter(va, start);
 		va += PAGE_SIZE;
 		start += PAGE_SIZE;
 	}
 	*virt = va;
 	return (sva);
 }
 
 
 /*
  * Add a list of wired pages to the kva
  * this routine is only used for temporary
  * kernel mappings that do not need to have
  * page modification or references recorded.
  * Note that old mappings are simply written
  * over.  The page *must* be wired.
  */
 void
 pmap_qenter(va, m, count)
 	vm_offset_t va;
 	vm_page_t *m;
 	int count;
 {
 	int i;
 
 	for (i = 0; i < count; i++) {
 		vm_offset_t tva = va + i * PAGE_SIZE;
 		pmap_kenter(tva, VM_PAGE_TO_PHYS(m[i]));
 	}
 }
 
 /*
  * this routine jerks page mappings from the
  * kernel -- it is meant only for temporary mappings.
  */
 void
 pmap_qremove(va, count)
 	vm_offset_t va;
 	int count;
 {
 	vm_offset_t end_va;
 
 	end_va = va + count*PAGE_SIZE;
 
 	while (va < end_va) {
 		unsigned *pte;
 
 		pte = (unsigned *)vtopte(va);
 		*pte = 0;
 #ifdef SMP
 		cpu_invlpg((void *)va);
 #else
 		invltlb_1pg(va);
 #endif
 		va += PAGE_SIZE;
 	}
 #ifdef SMP
 	smp_invltlb();
 #endif
 }
 
 static vm_page_t
 pmap_page_lookup(object, pindex)
 	vm_object_t object;
 	vm_pindex_t pindex;
 {
 	vm_page_t m;
 retry:
 	m = vm_page_lookup(object, pindex);
 	if (m && vm_page_sleep_busy(m, FALSE, "pplookp"))
 		goto retry;
 	return m;
 }
 
 /*
  * Create the UPAGES for a new process.
  * This routine directly affects the fork perf for a process.
  */
 void
 pmap_new_proc(p)
 	struct proc *p;
 {
 #ifdef I386_CPU
 	int updateneeded;
 #endif
 	int i;
 	vm_object_t upobj;
 	vm_page_t m;
 	struct user *up;
 	unsigned *ptek, oldpte;
 
 	/*
 	 * allocate object for the upages
 	 */
 	if ((upobj = p->p_upages_obj) == NULL) {
 		upobj = vm_object_allocate( OBJT_DEFAULT, UPAGES);
 		p->p_upages_obj = upobj;
 	}
 
 	/* get a kernel virtual address for the UPAGES for this proc */
 	if ((up = p->p_addr) == NULL) {
 		up = (struct user *) kmem_alloc_nofault(kernel_map,
 				UPAGES * PAGE_SIZE);
 		if (up == NULL)
 			panic("pmap_new_proc: u_map allocation failed");
 		p->p_addr = up;
 	}
 
 	ptek = (unsigned *) vtopte((vm_offset_t) up);
 
 #ifdef I386_CPU
 	updateneeded = 0;
 #endif
 	for(i=0;i<UPAGES;i++) {
 		/*
 		 * Get a kernel stack page
 		 */
 		m = vm_page_grab(upobj, i, VM_ALLOC_NORMAL | VM_ALLOC_RETRY);
 
 		/*
 		 * Wire the page
 		 */
 		m->wire_count++;
 		cnt.v_wire_count++;
 
 		oldpte = *(ptek + i);
 		/*
 		 * Enter the page into the kernel address space.
 		 */
 		*(ptek + i) = VM_PAGE_TO_PHYS(m) | PG_RW | PG_V | pgeflag;
 		if (oldpte) {
 #ifdef I386_CPU
 			updateneeded = 1;
 #else
 			invlpg((vm_offset_t) up + i * PAGE_SIZE);
 #endif
 		}
 
 		vm_page_wakeup(m);
 		vm_page_flag_clear(m, PG_ZERO);
 		vm_page_flag_set(m, PG_MAPPED | PG_WRITEABLE);
 		m->valid = VM_PAGE_BITS_ALL;
 	}
 #ifdef I386_CPU
 	if (updateneeded)
 		invltlb();
 #endif
 }
 
 /*
  * Dispose the UPAGES for a process that has exited.
  * This routine directly impacts the exit perf of a process.
  */
 void
 pmap_dispose_proc(p)
 	struct proc *p;
 {
 	int i;
 	vm_object_t upobj;
 	vm_page_t m;
 	unsigned *ptek, oldpte;
 
 	upobj = p->p_upages_obj;
 
 	ptek = (unsigned *) vtopte((vm_offset_t) p->p_addr);
 	for(i=0;i<UPAGES;i++) {
 
 		if ((m = vm_page_lookup(upobj, i)) == NULL)
 			panic("pmap_dispose_proc: upage already missing???");
 
 		vm_page_busy(m);
 
 		oldpte = *(ptek + i);
 		*(ptek + i) = 0;
 #ifndef I386_CPU
 		invlpg((vm_offset_t) p->p_addr + i * PAGE_SIZE);
 #endif
 		vm_page_unwire(m, 0);
 		vm_page_free(m);
 	}
 #ifdef I386_CPU
 	invltlb();
 #endif
 }
 
 /*
  * Allow the UPAGES for a process to be prejudicially paged out.
  */
 void
 pmap_swapout_proc(p)
 	struct proc *p;
 {
 	int i;
 	vm_object_t upobj;
 	vm_page_t m;
 
 	upobj = p->p_upages_obj;
 	/*
 	 * let the upages be paged
 	 */
 	for(i=0;i<UPAGES;i++) {
 		if ((m = vm_page_lookup(upobj, i)) == NULL)
 			panic("pmap_swapout_proc: upage already missing???");
 		vm_page_dirty(m);
 		vm_page_unwire(m, 0);
 		pmap_kremove( (vm_offset_t) p->p_addr + PAGE_SIZE * i);
 	}
 }
 
 /*
  * Bring the UPAGES for a specified process back in.
  */
 void
 pmap_swapin_proc(p)
 	struct proc *p;
 {
 	int i,rv;
 	vm_object_t upobj;
 	vm_page_t m;
 
 	upobj = p->p_upages_obj;
 	for(i=0;i<UPAGES;i++) {
 
 		m = vm_page_grab(upobj, i, VM_ALLOC_NORMAL | VM_ALLOC_RETRY);
 
 		pmap_kenter(((vm_offset_t) p->p_addr) + i * PAGE_SIZE,
 			VM_PAGE_TO_PHYS(m));
 
 		if (m->valid != VM_PAGE_BITS_ALL) {
 			rv = vm_pager_get_pages(upobj, &m, 1, 0);
 			if (rv != VM_PAGER_OK)
 				panic("pmap_swapin_proc: cannot get upages for proc: %d\n", p->p_pid);
 			m = vm_page_lookup(upobj, i);
 			m->valid = VM_PAGE_BITS_ALL;
 		}
 
 		vm_page_wire(m);
 		vm_page_wakeup(m);
 		vm_page_flag_set(m, PG_MAPPED | PG_WRITEABLE);
 	}
 }
 
 /***************************************************
  * Page table page management routines.....
  ***************************************************/
 
 /*
  * This routine unholds page table pages, and if the hold count
  * drops to zero, then it decrements the wire count.
  */
 static int 
 _pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m) {
 
 	while (vm_page_sleep_busy(m, FALSE, "pmuwpt"))
 		;
 
 	if (m->hold_count == 0) {
 		vm_offset_t pteva;
 		/*
 		 * unmap the page table page
 		 */
 		pmap->pm_pdir[m->pindex] = 0;
 		--pmap->pm_stats.resident_count;
 		if ((((unsigned)pmap->pm_pdir[PTDPTDI]) & PG_FRAME) ==
 			(((unsigned) PTDpde) & PG_FRAME)) {
 			/*
 			 * Do a invltlb to make the invalidated mapping
 			 * take effect immediately.
 			 */
 			pteva = UPT_MIN_ADDRESS + i386_ptob(m->pindex);
 			pmap_TLB_invalidate(pmap, pteva);
 		}
 
 		if (pmap->pm_ptphint == m)
 			pmap->pm_ptphint = NULL;
 
 		/*
 		 * If the page is finally unwired, simply free it.
 		 */
 		--m->wire_count;
 		if (m->wire_count == 0) {
 
 			vm_page_flash(m);
 			vm_page_busy(m);
 			vm_page_free_zero(m);
 			--cnt.v_wire_count;
 		}
 		return 1;
 	}
 	return 0;
 }
 
 static PMAP_INLINE int
 pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m)
 {
 	vm_page_unhold(m);
 	if (m->hold_count == 0)
 		return _pmap_unwire_pte_hold(pmap, m);
 	else
 		return 0;
 }
 
 /*
  * After removing a page table entry, this routine is used to
  * conditionally free the page, and manage the hold/wire counts.
  */
 static int
 pmap_unuse_pt(pmap, va, mpte)
 	pmap_t pmap;
 	vm_offset_t va;
 	vm_page_t mpte;
 {
 	unsigned ptepindex;
 	if (va >= UPT_MIN_ADDRESS)
 		return 0;
 
 	if (mpte == NULL) {
 		ptepindex = (va >> PDRSHIFT);
 		if (pmap->pm_ptphint &&
 			(pmap->pm_ptphint->pindex == ptepindex)) {
 			mpte = pmap->pm_ptphint;
 		} else {
 			mpte = pmap_page_lookup( pmap->pm_pteobj, ptepindex);
 			pmap->pm_ptphint = mpte;
 		}
 	}
 
 	return pmap_unwire_pte_hold(pmap, mpte);
 }
 
 void
 pmap_pinit0(pmap)
 	struct pmap *pmap;
 {
 	pmap->pm_pdir =
 		(pd_entry_t *)kmem_alloc_pageable(kernel_map, PAGE_SIZE);
 	pmap_kenter((vm_offset_t) pmap->pm_pdir, (vm_offset_t) IdlePTD);
 	pmap->pm_count = 1;
 	pmap->pm_active = 0;
 	pmap->pm_ptphint = NULL;
 	TAILQ_INIT(&pmap->pm_pvlist);
 	bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
 	LIST_INSERT_HEAD(&allpmaps, pmap, pm_list);
 }
 
 /*
  * Initialize a preallocated and zeroed pmap structure,
  * such as one in a vmspace structure.
  */
 void
 pmap_pinit(pmap)
 	register struct pmap *pmap;
 {
 	vm_page_t ptdpg;
 
 	/*
 	 * No need to allocate page table space yet but we do need a valid
 	 * page directory table.
 	 */
 	if (pmap->pm_pdir == NULL)
 		pmap->pm_pdir =
 			(pd_entry_t *)kmem_alloc_pageable(kernel_map, PAGE_SIZE);
 
 	/*
 	 * allocate object for the ptes
 	 */
 	if (pmap->pm_pteobj == NULL)
 		pmap->pm_pteobj = vm_object_allocate( OBJT_DEFAULT, PTDPTDI + 1);
 
 	/*
 	 * allocate the page directory page
 	 */
 	ptdpg = vm_page_grab( pmap->pm_pteobj, PTDPTDI,
 			VM_ALLOC_NORMAL | VM_ALLOC_RETRY);
 
 	ptdpg->wire_count = 1;
 	++cnt.v_wire_count;
 
 
 	vm_page_flag_clear(ptdpg, PG_MAPPED | PG_BUSY); /* not usually mapped*/
 	ptdpg->valid = VM_PAGE_BITS_ALL;
 
 	pmap_kenter((vm_offset_t) pmap->pm_pdir, VM_PAGE_TO_PHYS(ptdpg));
 	if ((ptdpg->flags & PG_ZERO) == 0)
 		bzero(pmap->pm_pdir, PAGE_SIZE);
 
 	LIST_INSERT_HEAD(&allpmaps, pmap, pm_list);
 	/* Wire in kernel global address entries. */
 	/* XXX copies current process, does not fill in MPPTDI */
 	bcopy(PTD + KPTDI, pmap->pm_pdir + KPTDI, nkpt * PTESIZE);
 #ifdef SMP
 	pmap->pm_pdir[MPPTDI] = PTD[MPPTDI];
 #endif
 
 	/* install self-referential address mapping entry */
 	*(unsigned *) (pmap->pm_pdir + PTDPTDI) =
 		VM_PAGE_TO_PHYS(ptdpg) | PG_V | PG_RW | PG_A | PG_M;
 
 	pmap->pm_count = 1;
 	pmap->pm_active = 0;
 	pmap->pm_ptphint = NULL;
 	TAILQ_INIT(&pmap->pm_pvlist);
 	bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
 }
 
 /*
  * Wire in kernel global address entries.  To avoid a race condition
  * between pmap initialization and pmap_growkernel, this procedure
  * should be called after the vmspace is attached to the process
  * but before this pmap is activated.
  */
 void
 pmap_pinit2(pmap)
 	struct pmap *pmap;
 {
 	/* XXX: Remove this stub when no longer called */
 }
 
 static int
 pmap_release_free_page(pmap, p)
 	struct pmap *pmap;
 	vm_page_t p;
 {
 	unsigned *pde = (unsigned *) pmap->pm_pdir;
 	/*
 	 * This code optimizes the case of freeing non-busy
 	 * page-table pages.  Those pages are zero now, and
 	 * might as well be placed directly into the zero queue.
 	 */
 	if (vm_page_sleep_busy(p, FALSE, "pmaprl"))
 		return 0;
 
 	vm_page_busy(p);
 
 	/*
 	 * Remove the page table page from the processes address space.
 	 */
 	pde[p->pindex] = 0;
 	pmap->pm_stats.resident_count--;
 
 	if (p->hold_count)  {
 		panic("pmap_release: freeing held page table page");
 	}
 	/*
 	 * Page directory pages need to have the kernel
 	 * stuff cleared, so they can go into the zero queue also.
 	 */
 	if (p->pindex == PTDPTDI) {
 		bzero(pde + KPTDI, nkpt * PTESIZE);
 #ifdef SMP
 		pde[MPPTDI] = 0;
 #endif
 		pde[APTDPTDI] = 0;
 		pmap_kremove((vm_offset_t) pmap->pm_pdir);
 	}
 
 	if (pmap->pm_ptphint && (pmap->pm_ptphint->pindex == p->pindex))
 		pmap->pm_ptphint = NULL;
 
 	p->wire_count--;
 	cnt.v_wire_count--;
 	vm_page_free_zero(p);
 	return 1;
 }
 
 /*
  * this routine is called if the page table page is not
  * mapped correctly.
  */
 static vm_page_t
 _pmap_allocpte(pmap, ptepindex)
 	pmap_t	pmap;
 	unsigned ptepindex;
 {
 	vm_offset_t pteva, ptepa;
 	vm_page_t m;
 
 	/*
 	 * Find or fabricate a new pagetable page
 	 */
 	m = vm_page_grab(pmap->pm_pteobj, ptepindex,
 			VM_ALLOC_ZERO | VM_ALLOC_RETRY);
 
 	KASSERT(m->queue == PQ_NONE,
 		("_pmap_allocpte: %p->queue != PQ_NONE", m));
 
 	if (m->wire_count == 0)
 		cnt.v_wire_count++;
 	m->wire_count++;
 
 	/*
 	 * Increment the hold count for the page table page
 	 * (denoting a new mapping.)
 	 */
 	m->hold_count++;
 
 	/*
 	 * Map the pagetable page into the process address space, if
 	 * it isn't already there.
 	 */
 
 	pmap->pm_stats.resident_count++;
 
 	ptepa = VM_PAGE_TO_PHYS(m);
 	pmap->pm_pdir[ptepindex] =
 		(pd_entry_t) (ptepa | PG_U | PG_RW | PG_V | PG_A | PG_M);
 
 	/*
 	 * Set the page table hint
 	 */
 	pmap->pm_ptphint = m;
 
 	/*
 	 * Try to use the new mapping, but if we cannot, then
 	 * do it with the routine that maps the page explicitly.
 	 */
 	if ((m->flags & PG_ZERO) == 0) {
 		if ((((unsigned)pmap->pm_pdir[PTDPTDI]) & PG_FRAME) ==
 			(((unsigned) PTDpde) & PG_FRAME)) {
 			pteva = UPT_MIN_ADDRESS + i386_ptob(ptepindex);
 			bzero((caddr_t) pteva, PAGE_SIZE);
 		} else {
 			pmap_zero_page(ptepa);
 		}
 	}
 
 	m->valid = VM_PAGE_BITS_ALL;
 	vm_page_flag_clear(m, PG_ZERO);
 	vm_page_flag_set(m, PG_MAPPED);
 	vm_page_wakeup(m);
 
 	return m;
 }
 
 static vm_page_t
 pmap_allocpte(pmap, va)
 	pmap_t	pmap;
 	vm_offset_t va;
 {
 	unsigned ptepindex;
 	vm_offset_t ptepa;
 	vm_page_t m;
 
 	/*
 	 * Calculate pagetable page index
 	 */
 	ptepindex = va >> PDRSHIFT;
 
 	/*
 	 * Get the page directory entry
 	 */
 	ptepa = (vm_offset_t) pmap->pm_pdir[ptepindex];
 
 	/*
 	 * This supports switching from a 4MB page to a
 	 * normal 4K page.
 	 */
 	if (ptepa & PG_PS) {
 		pmap->pm_pdir[ptepindex] = 0;
 		ptepa = 0;
 		invltlb();
 	}
 
 	/*
 	 * If the page table page is mapped, we just increment the
 	 * hold count, and activate it.
 	 */
 	if (ptepa) {
 		/*
 		 * In order to get the page table page, try the
 		 * hint first.
 		 */
 		if (pmap->pm_ptphint &&
 			(pmap->pm_ptphint->pindex == ptepindex)) {
 			m = pmap->pm_ptphint;
 		} else {
 			m = pmap_page_lookup( pmap->pm_pteobj, ptepindex);
 			pmap->pm_ptphint = m;
 		}
 		m->hold_count++;
 		return m;
 	}
 	/*
 	 * Here if the pte page isn't mapped, or if it has been deallocated.
 	 */
 	return _pmap_allocpte(pmap, ptepindex);
 }
 
 
 /***************************************************
 * Pmap allocation/deallocation routines.
  ***************************************************/
 
 /*
  * Release any resources held by the given physical map.
  * Called when a pmap initialized by pmap_pinit is being released.
  * Should only be called if the map contains no valid mappings.
  */
 void
 pmap_release(pmap)
 	register struct pmap *pmap;
 {
 	vm_page_t p,n,ptdpg;
 	vm_object_t object = pmap->pm_pteobj;
 	int curgeneration;
 
 #if defined(DIAGNOSTIC)
 	if (object->ref_count != 1)
 		panic("pmap_release: pteobj reference count != 1");
 #endif
 	
 	ptdpg = NULL;
 	LIST_REMOVE(pmap, pm_list);
 retry:
 	curgeneration = object->generation;
 	for (p = TAILQ_FIRST(&object->memq); p != NULL; p = n) {
 		n = TAILQ_NEXT(p, listq);
 		if (p->pindex == PTDPTDI) {
 			ptdpg = p;
 			continue;
 		}
 		while (1) {
 			if (!pmap_release_free_page(pmap, p) &&
 				(object->generation != curgeneration))
 				goto retry;
 		}
 	}
 
 	if (ptdpg && !pmap_release_free_page(pmap, ptdpg))
 		goto retry;
 }
 
 static int
 kvm_size(SYSCTL_HANDLER_ARGS)
 {
 	unsigned long ksize = VM_MAX_KERNEL_ADDRESS - KERNBASE;
 
         return sysctl_handle_long(oidp, &ksize, 0, req);
 }
 SYSCTL_PROC(_vm, OID_AUTO, kvm_size, CTLTYPE_LONG|CTLFLAG_RD, 
     0, 0, kvm_size, "IU", "Size of KVM");
 
 static int
 kvm_free(SYSCTL_HANDLER_ARGS)
 {
 	unsigned long kfree = VM_MAX_KERNEL_ADDRESS - kernel_vm_end;
 
         return sysctl_handle_long(oidp, &kfree, 0, req);
 }
 SYSCTL_PROC(_vm, OID_AUTO, kvm_free, CTLTYPE_LONG|CTLFLAG_RD, 
     0, 0, kvm_free, "IU", "Amount of KVM free");
 
 /*
  * grow the number of kernel page table entries, if needed
  */
 void
 pmap_growkernel(vm_offset_t addr)
 {
 	struct pmap *pmap;
 	int s;
 	vm_offset_t ptppaddr;
 	vm_page_t nkpg;
 	pd_entry_t newpdir;
 
 	s = splhigh();
 	if (kernel_vm_end == 0) {
 		kernel_vm_end = KERNBASE;
 		nkpt = 0;
 		while (pdir_pde(PTD, kernel_vm_end)) {
 			kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1);
 			nkpt++;
 		}
 	}
 	addr = (addr + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1);
 	while (kernel_vm_end < addr) {
 		if (pdir_pde(PTD, kernel_vm_end)) {
 			kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1);
 			continue;
 		}
 
 		/*
 		 * This index is bogus, but out of the way
 		 */
 		nkpg = vm_page_alloc(kptobj, nkpt, VM_ALLOC_SYSTEM);
 		if (!nkpg)
 			panic("pmap_growkernel: no memory to grow kernel");
 
 		nkpt++;
 
 		vm_page_wire(nkpg);
 		ptppaddr = VM_PAGE_TO_PHYS(nkpg);
 		pmap_zero_page(ptppaddr);
 		newpdir = (pd_entry_t) (ptppaddr | PG_V | PG_RW | PG_A | PG_M);
 		pdir_pde(PTD, kernel_vm_end) = newpdir;
 
 		LIST_FOREACH(pmap, &allpmaps, pm_list) {
 			*pmap_pde(pmap, kernel_vm_end) = newpdir;
 		}
 		kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1);
 	}
 	splx(s);
 }
 
 /*
  *	Retire the given physical map from service.
  *	Should only be called if the map contains
  *	no valid mappings.
  */
 void
 pmap_destroy(pmap)
 	register pmap_t pmap;
 {
 	int count;
 
 	if (pmap == NULL)
 		return;
 
 	count = --pmap->pm_count;
 	if (count == 0) {
 		pmap_release(pmap);
 		panic("destroying a pmap is not yet implemented");
 	}
 }
 
 /*
  *	Add a reference to the specified pmap.
  */
 void
 pmap_reference(pmap)
 	pmap_t pmap;
 {
 	if (pmap != NULL) {
 		pmap->pm_count++;
 	}
 }
 
 /***************************************************
 * page management routines.
  ***************************************************/
 
 /*
  * free the pv_entry back to the free list
  */
 static PMAP_INLINE void
 free_pv_entry(pv)
 	pv_entry_t pv;
 {
 	pv_entry_count--;
 	zfree(pvzone, pv);
 }
 
 /*
  * get a new pv_entry, allocating a block from the system
  * when needed.
  * the memory allocation is performed bypassing the malloc code
  * because of the possibility of allocations at interrupt time.
  */
 static pv_entry_t
 get_pv_entry(void)
 {
 	pv_entry_count++;
 	if (pv_entry_high_water &&
 		(pv_entry_count > pv_entry_high_water) &&
 		(pmap_pagedaemon_waken == 0)) {
 		pmap_pagedaemon_waken = 1;
 		wakeup (&vm_pages_needed);
 	}
 	return zalloc(pvzone);
 }
 
 /*
  * This routine is very drastic, but can save the system
  * in a pinch.
  */
 void
 pmap_collect()
 {
 	int i;
 	vm_page_t m;
 	static int warningdone=0;
 
 	if (pmap_pagedaemon_waken == 0)
 		return;
 
 	if (warningdone < 5) {
 		printf("pmap_collect: collecting pv entries -- suggest increasing PMAP_SHPGPERPROC\n");
 		warningdone++;
 	}
 
 	for(i = 0; i < vm_page_array_size; i++) {
 		m = &vm_page_array[i];
 		if (m->wire_count || m->hold_count || m->busy ||
 		    (m->flags & PG_BUSY))
 			continue;
 		pmap_remove_all(m);
 	}
 	pmap_pagedaemon_waken = 0;
 }
 	
 
 /*
  * If it is the first entry on the list, it is actually
  * in the header and we must copy the following entry up
  * to the header.  Otherwise we must search the list for
  * the entry.  In either case we free the now unused entry.
  */
 
 static int
 pmap_remove_entry(pmap, m, va)
 	struct pmap *pmap;
 	vm_page_t m;
 	vm_offset_t va;
 {
 	pv_entry_t pv;
 	int rtval;
 	int s;
 
 	s = splvm();
 	if (m->md.pv_list_count < pmap->pm_stats.resident_count) {
 		for (pv = TAILQ_FIRST(&m->md.pv_list);
 			pv;
 			pv = TAILQ_NEXT(pv, pv_list)) {
 			if (pmap == pv->pv_pmap && va == pv->pv_va) 
 				break;
 		}
 	} else {
 		for (pv = TAILQ_FIRST(&pmap->pm_pvlist);
 			pv;
 			pv = TAILQ_NEXT(pv, pv_plist)) {
 			if (va == pv->pv_va) 
 				break;
 		}
 	}
 
 	rtval = 0;
 	if (pv) {
 
 		rtval = pmap_unuse_pt(pmap, va, pv->pv_ptem);
 		TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
 		m->md.pv_list_count--;
 		if (TAILQ_FIRST(&m->md.pv_list) == NULL)
 			vm_page_flag_clear(m, PG_MAPPED | PG_WRITEABLE);
 
 		TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist);
 		free_pv_entry(pv);
 	}
 			
 	splx(s);
 	return rtval;
 }
 
 /*
  * Create a pv entry for page at pa for
  * (pmap, va).
  */
 static void
 pmap_insert_entry(pmap, va, mpte, m)
 	pmap_t pmap;
 	vm_offset_t va;
 	vm_page_t mpte;
 	vm_page_t m;
 {
 
 	int s;
 	pv_entry_t pv;
 
 	s = splvm();
 	pv = get_pv_entry();
 	pv->pv_va = va;
 	pv->pv_pmap = pmap;
 	pv->pv_ptem = mpte;
 
 	TAILQ_INSERT_TAIL(&pmap->pm_pvlist, pv, pv_plist);
 	TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
 	m->md.pv_list_count++;
 
 	splx(s);
 }
 
 /*
  * pmap_remove_pte: do the things to unmap a page in a process
  */
 static int
 pmap_remove_pte(pmap, ptq, va)
 	struct pmap *pmap;
 	unsigned *ptq;
 	vm_offset_t va;
 {
 	unsigned oldpte;
 	vm_page_t m;
 
 	oldpte = atomic_readandclear_int(ptq);
 	if (oldpte & PG_W)
 		pmap->pm_stats.wired_count -= 1;
 	/*
 	 * Machines that don't support invlpg, also don't support
 	 * PG_G.
 	 */
 	if (oldpte & PG_G)
 		invlpg(va);
 	pmap->pm_stats.resident_count -= 1;
 	if (oldpte & PG_MANAGED) {
 		m = PHYS_TO_VM_PAGE(oldpte);
 		if (oldpte & PG_M) {
 #if defined(PMAP_DIAGNOSTIC)
 			if (pmap_nw_modified((pt_entry_t) oldpte)) {
 				printf(
 	"pmap_remove: modified page not writable: va: 0x%x, pte: 0x%x\n",
 				    va, oldpte);
 			}
 #endif
 			if (pmap_track_modified(va))
 				vm_page_dirty(m);
 		}
 		if (oldpte & PG_A)
 			vm_page_flag_set(m, PG_REFERENCED);
 		return pmap_remove_entry(pmap, m, va);
 	} else {
 		return pmap_unuse_pt(pmap, va, NULL);
 	}
 
 	return 0;
 }
 
 /*
  * Remove a single page from a process address space
  */
 static void
 pmap_remove_page(pmap, va)
 	struct pmap *pmap;
 	register vm_offset_t va;
 {
 	register unsigned *ptq;
 
 	/*
 	 * if there is no pte for this address, just skip it!!!
 	 */
 	if (*pmap_pde(pmap, va) == 0) {
 		return;
 	}
 
 	/*
 	 * get a local va for mappings for this pmap.
 	 */
 	ptq = get_ptbase(pmap) + i386_btop(va);
 	if (*ptq) {
 		(void) pmap_remove_pte(pmap, ptq, va);
 		pmap_TLB_invalidate(pmap, va);
 	}
 	return;
 }
 
 /*
  *	Remove the given range of addresses from the specified map.
  *
  *	It is assumed that the start and end are properly
  *	rounded to the page size.
  */
 void
 pmap_remove(pmap, sva, eva)
 	struct pmap *pmap;
 	register vm_offset_t sva;
 	register vm_offset_t eva;
 {
 	register unsigned *ptbase;
 	vm_offset_t pdnxt;
 	vm_offset_t ptpaddr;
 	vm_offset_t sindex, eindex;
 	int anyvalid;
 
 	if (pmap == NULL)
 		return;
 
 	if (pmap->pm_stats.resident_count == 0)
 		return;
 
 	/*
 	 * special handling of removing one page.  a very
 	 * common operation and easy to short circuit some
 	 * code.
 	 */
 	if (((sva + PAGE_SIZE) == eva) && 
 		(((unsigned) pmap->pm_pdir[(sva >> PDRSHIFT)] & PG_PS) == 0)) {
 		pmap_remove_page(pmap, sva);
 		return;
 	}
 
 	anyvalid = 0;
 
 	/*
 	 * Get a local virtual address for the mappings that are being
 	 * worked with.
 	 */
 	ptbase = get_ptbase(pmap);
 
 	sindex = i386_btop(sva);
 	eindex = i386_btop(eva);
 
 	for (; sindex < eindex; sindex = pdnxt) {
 		unsigned pdirindex;
 
 		/*
 		 * Calculate index for next page table.
 		 */
 		pdnxt = ((sindex + NPTEPG) & ~(NPTEPG - 1));
 		if (pmap->pm_stats.resident_count == 0)
 			break;
 
 		pdirindex = sindex / NPDEPG;
 		if (((ptpaddr = (unsigned) pmap->pm_pdir[pdirindex]) & PG_PS) != 0) {
 			pmap->pm_pdir[pdirindex] = 0;
 			pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE;
 			anyvalid++;
 			continue;
 		}
 
 		/*
 		 * Weed out invalid mappings. Note: we assume that the page
 		 * directory table is always allocated, and in kernel virtual.
 		 */
 		if (ptpaddr == 0)
 			continue;
 
 		/*
 		 * Limit our scan to either the end of the va represented
 		 * by the current page table page, or to the end of the
 		 * range being removed.
 		 */
 		if (pdnxt > eindex) {
 			pdnxt = eindex;
 		}
 
 		for ( ;sindex != pdnxt; sindex++) {
 			vm_offset_t va;
 			if (ptbase[sindex] == 0) {
 				continue;
 			}
 			va = i386_ptob(sindex);
 			
 			anyvalid++;
 			if (pmap_remove_pte(pmap,
 				ptbase + sindex, va))
 				break;
 		}
 	}
 
 	if (anyvalid)
 		pmap_TLB_invalidate_all(pmap);
 }
 
 /*
  *	Routine:	pmap_remove_all
  *	Function:
  *		Removes this physical page from
  *		all physical maps in which it resides.
  *		Reflects back modify bits to the pager.
  *
  *	Notes:
  *		Original versions of this routine were very
  *		inefficient because they iteratively called
  *		pmap_remove (slow...)
  */
 
 static void
 pmap_remove_all(m)
 	vm_page_t m;
 {
 	register pv_entry_t pv;
 	register unsigned *pte, tpte;
 	int s;
 
 #if defined(PMAP_DIAGNOSTIC)
 	/*
 	 * XXX this makes pmap_page_protect(NONE) illegal for non-managed
 	 * pages!
 	 */
 	if (!pmap_initialized || (m->flags & PG_FICTITIOUS)) {
 		panic("pmap_page_protect: illegal for unmanaged page, va: 0x%x", VM_PAGE_TO_PHYS(m));
 	}
 #endif
 
 	s = splvm();
 	while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) {
 		pv->pv_pmap->pm_stats.resident_count--;
 
 		pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va);
 
 		tpte = atomic_readandclear_int(pte);
 		if (tpte & PG_W)
 			pv->pv_pmap->pm_stats.wired_count--;
 
 		if (tpte & PG_A)
 			vm_page_flag_set(m, PG_REFERENCED);
 
 		/*
 		 * Update the vm_page_t clean and reference bits.
 		 */
 		if (tpte & PG_M) {
 #if defined(PMAP_DIAGNOSTIC)
 			if (pmap_nw_modified((pt_entry_t) tpte)) {
 				printf(
 	"pmap_remove_all: modified page not writable: va: 0x%x, pte: 0x%x\n",
 				    pv->pv_va, tpte);
 			}
 #endif
 			if (pmap_track_modified(pv->pv_va))
 				vm_page_dirty(m);
 		}
 		pmap_TLB_invalidate(pv->pv_pmap, pv->pv_va);
 
 		TAILQ_REMOVE(&pv->pv_pmap->pm_pvlist, pv, pv_plist);
 		TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
 		m->md.pv_list_count--;
 		pmap_unuse_pt(pv->pv_pmap, pv->pv_va, pv->pv_ptem);
 		free_pv_entry(pv);
 	}
 
 	vm_page_flag_clear(m, PG_MAPPED | PG_WRITEABLE);
 
 	splx(s);
 }
 
 /*
  *	Set the physical protection on the
  *	specified range of this map as requested.
  */
 void
 pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot)
 {
 	register unsigned *ptbase;
 	vm_offset_t pdnxt, ptpaddr;
 	vm_pindex_t sindex, eindex;
 	int anychanged;
 
 	if (pmap == NULL)
 		return;
 
 	if ((prot & VM_PROT_READ) == VM_PROT_NONE) {
 		pmap_remove(pmap, sva, eva);
 		return;
 	}
 
 	if (prot & VM_PROT_WRITE)
 		return;
 
 	anychanged = 0;
 
 	ptbase = get_ptbase(pmap);
 
 	sindex = i386_btop(sva);
 	eindex = i386_btop(eva);
 
 	for (; sindex < eindex; sindex = pdnxt) {
 
 		unsigned pdirindex;
 
 		pdnxt = ((sindex + NPTEPG) & ~(NPTEPG - 1));
 
 		pdirindex = sindex / NPDEPG;
 		if (((ptpaddr = (unsigned) pmap->pm_pdir[pdirindex]) & PG_PS) != 0) {
 			(unsigned) pmap->pm_pdir[pdirindex] &= ~(PG_M|PG_RW);
 			pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE;
 			anychanged++;
 			continue;
 		}
 
 		/*
 		 * Weed out invalid mappings. Note: we assume that the page
 		 * directory table is always allocated, and in kernel virtual.
 		 */
 		if (ptpaddr == 0)
 			continue;
 
 		if (pdnxt > eindex) {
 			pdnxt = eindex;
 		}
 
 		for (; sindex != pdnxt; sindex++) {
 
 			unsigned pbits;
 			vm_page_t m;
 
 			pbits = ptbase[sindex];
 
 			if (pbits & PG_MANAGED) {
 				m = NULL;
 				if (pbits & PG_A) {
 					m = PHYS_TO_VM_PAGE(pbits);
 					vm_page_flag_set(m, PG_REFERENCED);
 					pbits &= ~PG_A;
 				}
 				if (pbits & PG_M) {
 					if (pmap_track_modified(i386_ptob(sindex))) {
 						if (m == NULL)
 							m = PHYS_TO_VM_PAGE(pbits);
 						vm_page_dirty(m);
 						pbits &= ~PG_M;
 					}
 				}
 			}
 
 			pbits &= ~PG_RW;
 
 			if (pbits != ptbase[sindex]) {
 				ptbase[sindex] = pbits;
 				anychanged = 1;
 			}
 		}
 	}
 	if (anychanged)
 		pmap_TLB_invalidate_all(pmap);
 }
 
 /*
  *	Insert the given physical page (p) at
  *	the specified virtual address (v) in the
  *	target physical map with the protection requested.
  *
  *	If specified, the page will be wired down, meaning
  *	that the related pte can not be reclaimed.
  *
  *	NB:  This is the only routine which MAY NOT lazy-evaluate
  *	or lose information.  That is, this routine must actually
  *	insert this page into the given map NOW.
  */
 void
 pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
 	   boolean_t wired)
 {
 	vm_offset_t pa;
 	register unsigned *pte;
 	vm_offset_t opa;
 	vm_offset_t origpte, newpte;
 	vm_page_t mpte;
 
 	if (pmap == NULL)
 		return;
 
 	va &= PG_FRAME;
 #ifdef PMAP_DIAGNOSTIC
 	if (va > VM_MAX_KERNEL_ADDRESS)
 		panic("pmap_enter: toobig");
 	if ((va >= UPT_MIN_ADDRESS) && (va < UPT_MAX_ADDRESS))
 		panic("pmap_enter: invalid to pmap_enter page table pages (va: 0x%x)", va);
 #endif
 
 	mpte = NULL;
 	/*
 	 * In the case that a page table page is not
 	 * resident, we are creating it here.
 	 */
 	if (va < UPT_MIN_ADDRESS) {
 		mpte = pmap_allocpte(pmap, va);
 	}
 #if 0 && defined(PMAP_DIAGNOSTIC)
 	else {
 		vm_offset_t *pdeaddr = (vm_offset_t *)pmap_pde(pmap, va);
 		if (((origpte = (vm_offset_t) *pdeaddr) & PG_V) == 0) { 
 			panic("pmap_enter: invalid kernel page table page(0), pdir=%p, pde=%p, va=%p\n",
 				pmap->pm_pdir[PTDPTDI], origpte, va);
 		}
 		if (smp_active) {
 			pdeaddr = (vm_offset_t *) IdlePTDS[PCPU_GET(cpuid)];
 			if (((newpte = pdeaddr[va >> PDRSHIFT]) & PG_V) == 0) {
 				if ((vm_offset_t) my_idlePTD != (vm_offset_t) vtophys(pdeaddr))
 					printf("pde mismatch: %x, %x\n", my_idlePTD, pdeaddr);
 				printf("cpuid: %d, pdeaddr: 0x%x\n", PCPU_GET(cpuid), pdeaddr);
 				panic("pmap_enter: invalid kernel page table page(1), pdir=%p, npde=%p, pde=%p, va=%p\n",
 					pmap->pm_pdir[PTDPTDI], newpte, origpte, va);
 			}
 		}
 	}
 #endif
 
 	pte = pmap_pte(pmap, va);
 
 	/*
 	 * Page Directory table entry not valid, we need a new PT page
 	 */
 	if (pte == NULL) {
 		panic("pmap_enter: invalid page directory, pdir=%p, va=0x%x\n",
 			(void *)pmap->pm_pdir[PTDPTDI], va);
 	}
 
 	pa = VM_PAGE_TO_PHYS(m) & PG_FRAME;
 	origpte = *(vm_offset_t *)pte;
 	opa = origpte & PG_FRAME;
 
 	if (origpte & PG_PS)
 		panic("pmap_enter: attempted pmap_enter on 4MB page");
 
 	/*
 	 * Mapping has not changed, must be protection or wiring change.
 	 */
 	if (origpte && (opa == pa)) {
 		/*
 		 * Wiring change, just update stats. We don't worry about
 		 * wiring PT pages as they remain resident as long as there
 		 * are valid mappings in them. Hence, if a user page is wired,
 		 * the PT page will be also.
 		 */
 		if (wired && ((origpte & PG_W) == 0))
 			pmap->pm_stats.wired_count++;
 		else if (!wired && (origpte & PG_W))
 			pmap->pm_stats.wired_count--;
 
 #if defined(PMAP_DIAGNOSTIC)
 		if (pmap_nw_modified((pt_entry_t) origpte)) {
 			printf(
 	"pmap_enter: modified page not writable: va: 0x%x, pte: 0x%x\n",
 			    va, origpte);
 		}
 #endif
 
 		/*
 		 * Remove extra pte reference
 		 */
 		if (mpte)
 			mpte->hold_count--;
 
 		if ((prot & VM_PROT_WRITE) && (origpte & PG_V)) {
 			if ((origpte & PG_RW) == 0) {
 				*pte |= PG_RW;
 #ifdef SMP
 				cpu_invlpg((void *)va);
 				if (pmap->pm_active & PCPU_GET(other_cpus))
 					smp_invltlb();
 #else
 				invltlb_1pg(va);
 #endif
 			}
 			return;
 		}
 
 		/*
 		 * We might be turning off write access to the page,
 		 * so we go ahead and sense modify status.
 		 */
 		if (origpte & PG_MANAGED) {
 			if ((origpte & PG_M) && pmap_track_modified(va)) {
 				vm_page_t om;
 				om = PHYS_TO_VM_PAGE(opa);
 				vm_page_dirty(om);
 			}
 			pa |= PG_MANAGED;
 		}
 		goto validate;
 	} 
 	/*
 	 * Mapping has changed, invalidate old range and fall through to
 	 * handle validating new mapping.
 	 */
 	if (opa) {
 		int err;
 		err = pmap_remove_pte(pmap, pte, va);
 		if (err)
 			panic("pmap_enter: pte vanished, va: 0x%x", va);
 	}
 
 	/*
 	 * Enter on the PV list if part of our managed memory. Note that we
 	 * raise IPL while manipulating pv_table since pmap_enter can be
 	 * called at interrupt time.
 	 */
 	if (pmap_initialized && 
 	    (m->flags & (PG_FICTITIOUS|PG_UNMANAGED)) == 0) {
 		pmap_insert_entry(pmap, va, mpte, m);
 		pa |= PG_MANAGED;
 	}
 
 	/*
 	 * Increment counters
 	 */
 	pmap->pm_stats.resident_count++;
 	if (wired)
 		pmap->pm_stats.wired_count++;
 
 validate:
 	/*
 	 * Now validate mapping with desired protection/wiring.
 	 */
 	newpte = (vm_offset_t) (pa | pte_prot(pmap, prot) | PG_V);
 
 	if (wired)
 		newpte |= PG_W;
 	if (va < UPT_MIN_ADDRESS)
 		newpte |= PG_U;
 	if (pmap == kernel_pmap)
 		newpte |= pgeflag;
 
 	/*
 	 * if the mapping or permission bits are different, we need
 	 * to update the pte.
 	 */
 	if ((origpte & ~(PG_M|PG_A)) != newpte) {
 		*pte = newpte | PG_A;
 		/*if (origpte)*/ {
 #ifdef SMP
 			cpu_invlpg((void *)va);
 			if (pmap->pm_active & PCPU_GET(other_cpus))
 				smp_invltlb();
 #else
 			invltlb_1pg(va);
 #endif
 		}
 	}
 }
 
 /*
  * this code makes some *MAJOR* assumptions:
  * 1. Current pmap & pmap exists.
  * 2. Not wired.
  * 3. Read access.
  * 4. No page table pages.
  * 5. Tlbflush is deferred to calling procedure.
  * 6. Page IS managed.
  * but is *MUCH* faster than pmap_enter...
  */
 
 static vm_page_t
 pmap_enter_quick(pmap, va, m, mpte)
 	register pmap_t pmap;
 	vm_offset_t va;
 	vm_page_t m;
 	vm_page_t mpte;
 {
 	unsigned *pte;
 	vm_offset_t pa;
 
 	/*
 	 * In the case that a page table page is not
 	 * resident, we are creating it here.
 	 */
 	if (va < UPT_MIN_ADDRESS) {
 		unsigned ptepindex;
 		vm_offset_t ptepa;
 
 		/*
 		 * Calculate pagetable page index
 		 */
 		ptepindex = va >> PDRSHIFT;
 		if (mpte && (mpte->pindex == ptepindex)) {
 			mpte->hold_count++;
 		} else {
 retry:
 			/*
 			 * Get the page directory entry
 			 */
 			ptepa = (vm_offset_t) pmap->pm_pdir[ptepindex];
 
 			/*
 			 * If the page table page is mapped, we just increment
 			 * the hold count, and activate it.
 			 */
 			if (ptepa) {
 				if (ptepa & PG_PS)
 					panic("pmap_enter_quick: unexpected mapping into 4MB page");
 				if (pmap->pm_ptphint &&
 					(pmap->pm_ptphint->pindex == ptepindex)) {
 					mpte = pmap->pm_ptphint;
 				} else {
 					mpte = pmap_page_lookup( pmap->pm_pteobj, ptepindex);
 					pmap->pm_ptphint = mpte;
 				}
 				if (mpte == NULL)
 					goto retry;
 				mpte->hold_count++;
 			} else {
 				mpte = _pmap_allocpte(pmap, ptepindex);
 			}
 		}
 	} else {
 		mpte = NULL;
 	}
 
 	/*
 	 * This call to vtopte makes the assumption that we are
 	 * entering the page into the current pmap.  In order to support
 	 * quick entry into any pmap, one would likely use pmap_pte_quick.
 	 * But that isn't as quick as vtopte.
 	 */
 	pte = (unsigned *)vtopte(va);
 	if (*pte) {
 		if (mpte)
 			pmap_unwire_pte_hold(pmap, mpte);
 		return 0;
 	}
 
 	/*
 	 * Enter on the PV list if part of our managed memory. Note that we
 	 * raise IPL while manipulating pv_table since pmap_enter can be
 	 * called at interrupt time.
 	 */
 	if ((m->flags & (PG_FICTITIOUS|PG_UNMANAGED)) == 0)
 		pmap_insert_entry(pmap, va, mpte, m);
 
 	/*
 	 * Increment counters
 	 */
 	pmap->pm_stats.resident_count++;
 
 	pa = VM_PAGE_TO_PHYS(m);
 
 	/*
 	 * Now validate mapping with RO protection
 	 */
 	if (m->flags & (PG_FICTITIOUS|PG_UNMANAGED))
 		*pte = pa | PG_V | PG_U;
 	else
 		*pte = pa | PG_V | PG_U | PG_MANAGED;
 
 	return mpte;
 }
 
 /*
  * Make a temporary mapping for a physical address.  This is only intended
  * to be used for panic dumps.
  */
 void *
 pmap_kenter_temporary(vm_offset_t pa, int i)
 {
 	pmap_kenter((vm_offset_t)crashdumpmap + (i * PAGE_SIZE), pa);
 	return ((void *)crashdumpmap);
 }
 
 #define MAX_INIT_PT (96)
 /*
  * pmap_object_init_pt preloads the ptes for a given object
  * into the specified pmap.  This eliminates the blast of soft
  * faults on process startup and immediately after an mmap.
  */
 void
 pmap_object_init_pt(pmap, addr, object, pindex, size, limit)
 	pmap_t pmap;
 	vm_offset_t addr;
 	vm_object_t object;
 	vm_pindex_t pindex;
 	vm_size_t size;
 	int limit;
 {
 	vm_offset_t tmpidx;
 	int psize;
 	vm_page_t p, mpte;
 	int objpgs;
 
 	if (pmap == NULL || object == NULL)
 		return;
 
 	/*
 	 * This code maps large physical mmap regions into the
 	 * processor address space.  Note that some shortcuts
 	 * are taken, but the code works.
 	 */
 	if (pseflag &&
 		(object->type == OBJT_DEVICE) &&
 		((addr & (NBPDR - 1)) == 0) &&
 		((size & (NBPDR - 1)) == 0) ) {
 		int i;
 		vm_page_t m[1];
 		unsigned int ptepindex;
 		int npdes;
 		vm_offset_t ptepa;
 
 		if (pmap->pm_pdir[ptepindex = (addr >> PDRSHIFT)])
 			return;
 
 retry:
 		p = vm_page_lookup(object, pindex);
 		if (p && vm_page_sleep_busy(p, FALSE, "init4p"))
 			goto retry;
 
 		if (p == NULL) {
 			p = vm_page_alloc(object, pindex, VM_ALLOC_NORMAL);
 			if (p == NULL)
 				return;
 			m[0] = p;
 
 			if (vm_pager_get_pages(object, m, 1, 0) != VM_PAGER_OK) {
 				vm_page_free(p);
 				return;
 			}
 
 			p = vm_page_lookup(object, pindex);
 			vm_page_wakeup(p);
 		}
 
 		ptepa = (vm_offset_t) VM_PAGE_TO_PHYS(p);
 		if (ptepa & (NBPDR - 1)) {
 			return;
 		}
 
 		p->valid = VM_PAGE_BITS_ALL;
 
 		pmap->pm_stats.resident_count += size >> PAGE_SHIFT;
 		npdes = size >> PDRSHIFT;
 		for(i=0;i<npdes;i++) {
 			pmap->pm_pdir[ptepindex] =
 				(pd_entry_t) (ptepa | PG_U | PG_RW | PG_V | PG_PS);
 			ptepa += NBPDR;
 			ptepindex += 1;
 		}
 		vm_page_flag_set(p, PG_MAPPED);
 		invltlb();
 		return;
 	}
 
 	psize = i386_btop(size);
 
 	if ((object->type != OBJT_VNODE) ||
 		(limit && (psize > MAX_INIT_PT) &&
 			(object->resident_page_count > MAX_INIT_PT))) {
 		return;
 	}
 
 	if (psize + pindex > object->size) {
 		if (object->size < pindex)
 			return;		  
 		psize = object->size - pindex;
 	}
 
 	mpte = NULL;
 	/*
 	 * if we are processing a major portion of the object, then scan the
 	 * entire thing.
 	 */
 	if (psize > (object->resident_page_count >> 2)) {
 		objpgs = psize;
 
 		for (p = TAILQ_FIRST(&object->memq);
 		    ((objpgs > 0) && (p != NULL));
 		    p = TAILQ_NEXT(p, listq)) {
 
 			tmpidx = p->pindex;
 			if (tmpidx < pindex) {
 				continue;
 			}
 			tmpidx -= pindex;
 			if (tmpidx >= psize) {
 				continue;
 			}
 			if (((p->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) &&
 				(p->busy == 0) &&
 			    (p->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) {
 				if ((p->queue - p->pc) == PQ_CACHE)
 					vm_page_deactivate(p);
 				vm_page_busy(p);
 				mpte = pmap_enter_quick(pmap, 
 					addr + i386_ptob(tmpidx), p, mpte);
 				vm_page_flag_set(p, PG_MAPPED);
 				vm_page_wakeup(p);
 			}
 			objpgs -= 1;
 		}
 	} else {
 		/*
 		 * else lookup the pages one-by-one.
 		 */
 		for (tmpidx = 0; tmpidx < psize; tmpidx += 1) {
 			p = vm_page_lookup(object, tmpidx + pindex);
 			if (p &&
 			    ((p->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) &&
 				(p->busy == 0) &&
 			    (p->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) {
 				if ((p->queue - p->pc) == PQ_CACHE)
 					vm_page_deactivate(p);
 				vm_page_busy(p);
 				mpte = pmap_enter_quick(pmap, 
 					addr + i386_ptob(tmpidx), p, mpte);
 				vm_page_flag_set(p, PG_MAPPED);
 				vm_page_wakeup(p);
 			}
 		}
 	}
 	return;
 }
 
 /*
  * pmap_prefault provides a quick way of clustering
  * pagefaults into a processes address space.  It is a "cousin"
  * of pmap_object_init_pt, except it runs at page fault time instead
  * of mmap time.
  */
 #define PFBAK 4
 #define PFFOR 4
 #define PAGEORDER_SIZE (PFBAK+PFFOR)
 
 static int pmap_prefault_pageorder[] = {
 	-PAGE_SIZE, PAGE_SIZE,
 	-2 * PAGE_SIZE, 2 * PAGE_SIZE,
 	-3 * PAGE_SIZE, 3 * PAGE_SIZE
 	-4 * PAGE_SIZE, 4 * PAGE_SIZE
 };
 
 void
 pmap_prefault(pmap, addra, entry)
 	pmap_t pmap;
 	vm_offset_t addra;
 	vm_map_entry_t entry;
 {
 	int i;
 	vm_offset_t starta;
 	vm_offset_t addr;
 	vm_pindex_t pindex;
 	vm_page_t m, mpte;
 	vm_object_t object;
 
 	if (!curproc || (pmap != vmspace_pmap(curproc->p_vmspace)))
 		return;
 
 	object = entry->object.vm_object;
 
 	starta = addra - PFBAK * PAGE_SIZE;
 	if (starta < entry->start) {
 		starta = entry->start;
 	} else if (starta > addra) {
 		starta = 0;
 	}
 
 	mpte = NULL;
 	for (i = 0; i < PAGEORDER_SIZE; i++) {
 		vm_object_t lobject;
 		unsigned *pte;
 
 		addr = addra + pmap_prefault_pageorder[i];
 		if (addr > addra + (PFFOR * PAGE_SIZE))
 			addr = 0;
 
 		if (addr < starta || addr >= entry->end)
 			continue;
 
 		if ((*pmap_pde(pmap, addr)) == NULL) 
 			continue;
 
 		pte = (unsigned *) vtopte(addr);
 		if (*pte)
 			continue;
 
 		pindex = ((addr - entry->start) + entry->offset) >> PAGE_SHIFT;
 		lobject = object;
 		for (m = vm_page_lookup(lobject, pindex);
 		    (!m && (lobject->type == OBJT_DEFAULT) && (lobject->backing_object));
 		    lobject = lobject->backing_object) {
 			if (lobject->backing_object_offset & PAGE_MASK)
 				break;
 			pindex += (lobject->backing_object_offset >> PAGE_SHIFT);
 			m = vm_page_lookup(lobject->backing_object, pindex);
 		}
 
 		/*
 		 * give-up when a page is not in memory
 		 */
 		if (m == NULL)
 			break;
 
 		if (((m->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) &&
 			(m->busy == 0) &&
 		    (m->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) {
 
 			if ((m->queue - m->pc) == PQ_CACHE) {
 				vm_page_deactivate(m);
 			}
 			vm_page_busy(m);
 			mpte = pmap_enter_quick(pmap, addr, m, mpte);
 			vm_page_flag_set(m, PG_MAPPED);
 			vm_page_wakeup(m);
 		}
 	}
 }
 
 /*
  *	Routine:	pmap_change_wiring
  *	Function:	Change the wiring attribute for a map/virtual-address
  *			pair.
  *	In/out conditions:
  *			The mapping must already exist in the pmap.
  */
 void
 pmap_change_wiring(pmap, va, wired)
 	register pmap_t pmap;
 	vm_offset_t va;
 	boolean_t wired;
 {
 	register unsigned *pte;
 
 	if (pmap == NULL)
 		return;
 
 	pte = pmap_pte(pmap, va);
 
 	if (wired && !pmap_pte_w(pte))
 		pmap->pm_stats.wired_count++;
 	else if (!wired && pmap_pte_w(pte))
 		pmap->pm_stats.wired_count--;
 
 	/*
 	 * Wiring is not a hardware characteristic so there is no need to
 	 * invalidate TLB.
 	 */
 	pmap_pte_set_w(pte, wired);
 }
 
 
 
 /*
  *	Copy the range specified by src_addr/len
  *	from the source map to the range dst_addr/len
  *	in the destination map.
  *
  *	This routine is only advisory and need not do anything.
  */
 
 void
 pmap_copy(dst_pmap, src_pmap, dst_addr, len, src_addr)
 	pmap_t dst_pmap, src_pmap;
 	vm_offset_t dst_addr;
 	vm_size_t len;
 	vm_offset_t src_addr;
 {
 	vm_offset_t addr;
 	vm_offset_t end_addr = src_addr + len;
 	vm_offset_t pdnxt;
 	unsigned src_frame, dst_frame;
 	vm_page_t m;
 
 	if (dst_addr != src_addr)
 		return;
 
 	src_frame = ((unsigned) src_pmap->pm_pdir[PTDPTDI]) & PG_FRAME;
 	if (src_frame != (((unsigned) PTDpde) & PG_FRAME)) {
 		return;
 	}
 
 	dst_frame = ((unsigned) dst_pmap->pm_pdir[PTDPTDI]) & PG_FRAME;
 	if (dst_frame != (((unsigned) APTDpde) & PG_FRAME)) {
 		APTDpde = (pd_entry_t) (dst_frame | PG_RW | PG_V);
 #if defined(SMP)
 		/* The page directory is not shared between CPUs */
 		cpu_invltlb();
 #else
 		invltlb();
 #endif
 	}
 
 	for(addr = src_addr; addr < end_addr; addr = pdnxt) {
 		unsigned *src_pte, *dst_pte;
 		vm_page_t dstmpte, srcmpte;
 		vm_offset_t srcptepaddr;
 		unsigned ptepindex;
 
 		if (addr >= UPT_MIN_ADDRESS)
 			panic("pmap_copy: invalid to pmap_copy page tables\n");
 
 		/*
 		 * Don't let optional prefaulting of pages make us go
 		 * way below the low water mark of free pages or way
 		 * above high water mark of used pv entries.
 		 */
 		if (cnt.v_free_count < cnt.v_free_reserved ||
 		    pv_entry_count > pv_entry_high_water)
 			break;
 		
 		pdnxt = ((addr + PAGE_SIZE*NPTEPG) & ~(PAGE_SIZE*NPTEPG - 1));
 		ptepindex = addr >> PDRSHIFT;
 
 		srcptepaddr = (vm_offset_t) src_pmap->pm_pdir[ptepindex];
 		if (srcptepaddr == 0)
 			continue;
 			
 		if (srcptepaddr & PG_PS) {
 			if (dst_pmap->pm_pdir[ptepindex] == 0) {
 				dst_pmap->pm_pdir[ptepindex] = (pd_entry_t) srcptepaddr;
 				dst_pmap->pm_stats.resident_count += NBPDR / PAGE_SIZE;
 			}
 			continue;
 		}
 
 		srcmpte = vm_page_lookup(src_pmap->pm_pteobj, ptepindex);
 		if ((srcmpte == NULL) ||
 			(srcmpte->hold_count == 0) || (srcmpte->flags & PG_BUSY))
 			continue;
 
 		if (pdnxt > end_addr)
 			pdnxt = end_addr;
 
 		src_pte = (unsigned *) vtopte(addr);
 		dst_pte = (unsigned *) avtopte(addr);
 		while (addr < pdnxt) {
 			unsigned ptetemp;
 			ptetemp = *src_pte;
 			/*
 			 * we only virtual copy managed pages
 			 */
 			if ((ptetemp & PG_MANAGED) != 0) {
 				/*
 				 * We have to check after allocpte for the
 				 * pte still being around...  allocpte can
 				 * block.
 				 */
 				dstmpte = pmap_allocpte(dst_pmap, addr);
 				if ((*dst_pte == 0) && (ptetemp = *src_pte)) {
 					/*
 					 * Clear the modified and
 					 * accessed (referenced) bits
 					 * during the copy.
 					 */
 					m = PHYS_TO_VM_PAGE(ptetemp);
 					*dst_pte = ptetemp & ~(PG_M | PG_A);
 					dst_pmap->pm_stats.resident_count++;
 					pmap_insert_entry(dst_pmap, addr,
 						dstmpte, m);
 	 			} else {
 					pmap_unwire_pte_hold(dst_pmap, dstmpte);
 				}
 				if (dstmpte->hold_count >= srcmpte->hold_count)
 					break;
 			}
 			addr += PAGE_SIZE;
 			src_pte++;
 			dst_pte++;
 		}
 	}
 }	
 
 /*
  *	Routine:	pmap_kernel
  *	Function:
  *		Returns the physical map handle for the kernel.
  */
 pmap_t
 pmap_kernel()
 {
 	return (kernel_pmap);
 }
 
 /*
  *	pmap_zero_page zeros the specified hardware page by mapping 
  *	the page into KVM and using bzero to clear its contents.
  */
 void
 pmap_zero_page(phys)
 	vm_offset_t phys;
 {
 
 	if (*(int *) CMAP2)
 		panic("pmap_zero_page: CMAP2 busy");
 
 	*(int *) CMAP2 = PG_V | PG_RW | (phys & PG_FRAME) | PG_A | PG_M;
 	invltlb_1pg((vm_offset_t)CADDR2);
 
 #if defined(I686_CPU)
 	if (cpu_class == CPUCLASS_686)
 		i686_pagezero(CADDR2);
 	else
 #endif
 		bzero(CADDR2, PAGE_SIZE);
 	*(int *) CMAP2 = 0;
 }
 
 /*
  *	pmap_zero_page_area zeros the specified hardware page by mapping 
  *	the page into KVM and using bzero to clear its contents.
  *
  *	off and size may not cover an area beyond a single hardware page.
  */
 void
 pmap_zero_page_area(phys, off, size)
 	vm_offset_t phys;
 	int off;
 	int size;
 {
 
 	if (*(int *) CMAP2)
 		panic("pmap_zero_page: CMAP2 busy");
 
 	*(int *) CMAP2 = PG_V | PG_RW | (phys & PG_FRAME) | PG_A | PG_M;
 	invltlb_1pg((vm_offset_t)CADDR2);
 
 #if defined(I686_CPU)
 	if (cpu_class == CPUCLASS_686 && off == 0 && size == PAGE_SIZE)
 		i686_pagezero(CADDR2);
 	else
 #endif
 		bzero((char *)CADDR2 + off, size);
 	*(int *) CMAP2 = 0;
 }
 
 /*
  *	pmap_copy_page copies the specified (machine independent)
  *	page by mapping the page into virtual memory and using
  *	bcopy to copy the page, one machine dependent page at a
  *	time.
  */
 void
 pmap_copy_page(src, dst)
 	vm_offset_t src;
 	vm_offset_t dst;
 {
 
 	if (*(int *) CMAP1)
 		panic("pmap_copy_page: CMAP1 busy");
 	if (*(int *) CMAP2)
 		panic("pmap_copy_page: CMAP2 busy");
 
 	*(int *) CMAP1 = PG_V | (src & PG_FRAME) | PG_A;
 	*(int *) CMAP2 = PG_V | PG_RW | (dst & PG_FRAME) | PG_A | PG_M;
 #ifdef I386_CPU
 	invltlb();
 #else
 	invlpg((u_int)CADDR1);
 	invlpg((u_int)CADDR2);
 #endif
 
 	bcopy(CADDR1, CADDR2, PAGE_SIZE);
 
 	*(int *) CMAP1 = 0;
 	*(int *) CMAP2 = 0;
 }
 
 
 /*
  *	Routine:	pmap_pageable
  *	Function:
  *		Make the specified pages (by pmap, offset)
  *		pageable (or not) as requested.
  *
  *		A page which is not pageable may not take
  *		a fault; therefore, its page table entry
  *		must remain valid for the duration.
  *
  *		This routine is merely advisory; pmap_enter
  *		will specify that these pages are to be wired
  *		down (or not) as appropriate.
  */
 void
 pmap_pageable(pmap, sva, eva, pageable)
 	pmap_t pmap;
 	vm_offset_t sva, eva;
 	boolean_t pageable;
 {
 }
 
 /*
  * this routine returns true if a physical page resides
  * in the given pmap.
  */
 boolean_t
 pmap_page_exists(pmap, m)
 	pmap_t pmap;
 	vm_page_t m;
 {
 	register pv_entry_t pv;
 	int s;
 
 	if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
 		return FALSE;
 
 	s = splvm();
 
 	/*
 	 * Not found, check current mappings returning immediately if found.
 	 */
 	for (pv = TAILQ_FIRST(&m->md.pv_list);
 		pv;
 		pv = TAILQ_NEXT(pv, pv_list)) {
 		if (pv->pv_pmap == pmap) {
 			splx(s);
 			return TRUE;
 		}
 	}
 	splx(s);
 	return (FALSE);
 }
 
 #define PMAP_REMOVE_PAGES_CURPROC_ONLY
 /*
  * Remove all pages from specified address space
  * this aids process exit speeds.  Also, this code
  * is special cased for current process only, but
  * can have the more generic (and slightly slower)
  * mode enabled.  This is much faster than pmap_remove
  * in the case of running down an entire address space.
  */
 void
 pmap_remove_pages(pmap, sva, eva)
 	pmap_t pmap;
 	vm_offset_t sva, eva;
 {
 	unsigned *pte, tpte;
 	pv_entry_t pv, npv;
 	int s;
 	vm_page_t m;
 
 #ifdef PMAP_REMOVE_PAGES_CURPROC_ONLY
 	if (!curproc || (pmap != vmspace_pmap(curproc->p_vmspace))) {
 		printf("warning: pmap_remove_pages called with non-current pmap\n");
 		return;
 	}
 #endif
 
 	s = splvm();
 	for(pv = TAILQ_FIRST(&pmap->pm_pvlist);
 		pv;
 		pv = npv) {
 
 		if (pv->pv_va >= eva || pv->pv_va < sva) {
 			npv = TAILQ_NEXT(pv, pv_plist);
 			continue;
 		}
 
 #ifdef PMAP_REMOVE_PAGES_CURPROC_ONLY
 		pte = (unsigned *)vtopte(pv->pv_va);
 #else
 		pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va);
 #endif
 		tpte = *pte;
 
 /*
  * We cannot remove wired pages from a process' mapping at this time
  */
 		if (tpte & PG_W) {
 			npv = TAILQ_NEXT(pv, pv_plist);
 			continue;
 		}
 		*pte = 0;
 
 		m = PHYS_TO_VM_PAGE(tpte);
 
 		KASSERT(m < &vm_page_array[vm_page_array_size],
 			("pmap_remove_pages: bad tpte %x", tpte));
 
 		pv->pv_pmap->pm_stats.resident_count--;
 
 		/*
 		 * Update the vm_page_t clean and reference bits.
 		 */
 		if (tpte & PG_M) {
 			vm_page_dirty(m);
 		}
 
 
 		npv = TAILQ_NEXT(pv, pv_plist);
 		TAILQ_REMOVE(&pv->pv_pmap->pm_pvlist, pv, pv_plist);
 
 		m->md.pv_list_count--;
 		TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
 		if (TAILQ_FIRST(&m->md.pv_list) == NULL) {
 			vm_page_flag_clear(m, PG_MAPPED | PG_WRITEABLE);
 		}
 
 		pmap_unuse_pt(pv->pv_pmap, pv->pv_va, pv->pv_ptem);
 		free_pv_entry(pv);
 	}
 	splx(s);
 	pmap_TLB_invalidate_all(pmap);
 }
 
 /*
  * pmap_testbit tests bits in pte's
  * note that the testbit/changebit routines are inline,
  * and a lot of things compile-time evaluate.
  */
 static boolean_t
 pmap_testbit(m, bit)
 	vm_page_t m;
 	int bit;
 {
 	pv_entry_t pv;
 	unsigned *pte;
 	int s;
 
 	if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
 		return FALSE;
 
 	if (TAILQ_FIRST(&m->md.pv_list) == NULL)
 		return FALSE;
 
 	s = splvm();
 
 	for (pv = TAILQ_FIRST(&m->md.pv_list);
 		pv;
 		pv = TAILQ_NEXT(pv, pv_list)) {
 
 		/*
 		 * if the bit being tested is the modified bit, then
 		 * mark clean_map and ptes as never
 		 * modified.
 		 */
 		if (bit & (PG_A|PG_M)) {
 			if (!pmap_track_modified(pv->pv_va))
 				continue;
 		}
 
 #if defined(PMAP_DIAGNOSTIC)
 		if (!pv->pv_pmap) {
 			printf("Null pmap (tb) at va: 0x%x\n", pv->pv_va);
 			continue;
 		}
 #endif
 		pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va);
 		if (*pte & bit) {
 			splx(s);
 			return TRUE;
 		}
 	}
 	splx(s);
 	return (FALSE);
 }
 
 /*
  * this routine is used to modify bits in ptes
  */
 static __inline void
 pmap_changebit(m, bit, setem)
 	vm_page_t m;
 	int bit;
 	boolean_t setem;
 {
 	register pv_entry_t pv;
 	register unsigned *pte;
 	int s;
 
 	if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
 		return;
 
 	s = splvm();
 
 	/*
 	 * Loop over all current mappings setting/clearing as appropos If
 	 * setting RO do we need to clear the VAC?
 	 */
 	for (pv = TAILQ_FIRST(&m->md.pv_list);
 		pv;
 		pv = TAILQ_NEXT(pv, pv_list)) {
 
 		/*
 		 * don't write protect pager mappings
 		 */
 		if (!setem && (bit == PG_RW)) {
 			if (!pmap_track_modified(pv->pv_va))
 				continue;
 		}
 
 #if defined(PMAP_DIAGNOSTIC)
 		if (!pv->pv_pmap) {
 			printf("Null pmap (cb) at va: 0x%x\n", pv->pv_va);
 			continue;
 		}
 #endif
 
 		pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va);
 
 		if (setem) {
 			*(int *)pte |= bit;
 			pmap_TLB_invalidate(pv->pv_pmap, pv->pv_va);
 		} else {
 			vm_offset_t pbits = *(vm_offset_t *)pte;
 			if (pbits & bit) {
 				if (bit == PG_RW) {
 					if (pbits & PG_M) {
 						vm_page_dirty(m);
 					}
 					*(int *)pte = pbits & ~(PG_M|PG_RW);
 				} else {
 					*(int *)pte = pbits & ~bit;
 				}
 				pmap_TLB_invalidate(pv->pv_pmap, pv->pv_va);
 			}
 		}
 	}
 	splx(s);
 }
 
 /*
  *      pmap_page_protect:
  *
  *      Lower the permission for all mappings to a given page.
  */
 void
 pmap_page_protect(vm_page_t m, vm_prot_t prot)
 {
 	if ((prot & VM_PROT_WRITE) == 0) {
 		if (prot & (VM_PROT_READ | VM_PROT_EXECUTE)) {
 			pmap_changebit(m, PG_RW, FALSE);
 		} else {
 			pmap_remove_all(m);
 		}
 	}
 }
 
 vm_offset_t
 pmap_phys_address(ppn)
 	int ppn;
 {
 	return (i386_ptob(ppn));
 }
 
 /*
  *	pmap_ts_referenced:
  *
  *	Return the count of reference bits for a page, clearing all of them.
  */
 int
 pmap_ts_referenced(vm_page_t m)
 {
 	register pv_entry_t pv, pvf, pvn;
 	unsigned *pte;
 	int s;
 	int rtval = 0;
 
 	if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
 		return (rtval);
 
 	s = splvm();
 
 	if ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) {
 
 		pvf = pv;
 
 		do {
 			pvn = TAILQ_NEXT(pv, pv_list);
 
 			TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
 
 			TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
 
 			if (!pmap_track_modified(pv->pv_va))
 				continue;
 
 			pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va);
 
 			if (pte && (*pte & PG_A)) {
 				*pte &= ~PG_A;
 
 				pmap_TLB_invalidate(pv->pv_pmap, pv->pv_va);
 
 				rtval++;
 				if (rtval > 4) {
 					break;
 				}
 			}
 		} while ((pv = pvn) != NULL && pv != pvf);
 	}
 	splx(s);
 
 	return (rtval);
 }
 
 /*
  *	pmap_is_modified:
  *
  *	Return whether or not the specified physical page was modified
  *	in any physical maps.
  */
 boolean_t
 pmap_is_modified(vm_page_t m)
 {
 	return pmap_testbit(m, PG_M);
 }
 
 /*
  *	Clear the modify bits on the specified physical page.
  */
 void
 pmap_clear_modify(vm_page_t m)
 {
 	pmap_changebit(m, PG_M, FALSE);
 }
 
 /*
  *	pmap_clear_reference:
  *
  *	Clear the reference bit on the specified physical page.
  */
 void
 pmap_clear_reference(vm_page_t m)
 {
 	pmap_changebit(m, PG_A, FALSE);
 }
 
 /*
  * Miscellaneous support routines follow
  */
 
 static void
 i386_protection_init()
 {
 	register int *kp, prot;
 
 	kp = protection_codes;
 	for (prot = 0; prot < 8; prot++) {
 		switch (prot) {
 		case VM_PROT_NONE | VM_PROT_NONE | VM_PROT_NONE:
 			/*
 			 * Read access is also 0. There isn't any execute bit,
 			 * so just make it readable.
 			 */
 		case VM_PROT_READ | VM_PROT_NONE | VM_PROT_NONE:
 		case VM_PROT_READ | VM_PROT_NONE | VM_PROT_EXECUTE:
 		case VM_PROT_NONE | VM_PROT_NONE | VM_PROT_EXECUTE:
 			*kp++ = 0;
 			break;
 		case VM_PROT_NONE | VM_PROT_WRITE | VM_PROT_NONE:
 		case VM_PROT_NONE | VM_PROT_WRITE | VM_PROT_EXECUTE:
 		case VM_PROT_READ | VM_PROT_WRITE | VM_PROT_NONE:
 		case VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE:
 			*kp++ = PG_RW;
 			break;
 		}
 	}
 }
 
 /*
  * Map a set of physical memory pages into the kernel virtual
  * address space. Return a pointer to where it is mapped. This
  * routine is intended to be used for mapping device memory,
  * NOT real memory.
  */
 void *
 pmap_mapdev(pa, size)
 	vm_offset_t pa;
 	vm_size_t size;
 {
 	vm_offset_t va, tmpva, offset;
 	unsigned *pte;
 
 	offset = pa & PAGE_MASK;
 	size = roundup(offset + size, PAGE_SIZE);
 
 	GIANT_REQUIRED;
 
 	va = kmem_alloc_pageable(kernel_map, size);
 	if (!va)
 		panic("pmap_mapdev: Couldn't alloc kernel virtual memory");
 
 	pa = pa & PG_FRAME;
 	for (tmpva = va; size > 0;) {
 		pte = (unsigned *)vtopte(tmpva);
 		*pte = pa | PG_RW | PG_V | pgeflag;
 		size -= PAGE_SIZE;
 		tmpva += PAGE_SIZE;
 		pa += PAGE_SIZE;
 	}
 	invltlb();
 
 	return ((void *)(va + offset));
 }
 
 void
 pmap_unmapdev(va, size)
 	vm_offset_t va;
 	vm_size_t size;
 {
 	vm_offset_t base, offset;
 
 	base = va & PG_FRAME;
 	offset = va & PAGE_MASK;
 	size = roundup(offset + size, PAGE_SIZE);
 	kmem_free(kernel_map, base, size);
 }
 
 /*
  * perform the pmap work for mincore
  */
 int
 pmap_mincore(pmap, addr)
 	pmap_t pmap;
 	vm_offset_t addr;
 {
 	
 	unsigned *ptep, pte;
 	vm_page_t m;
 	int val = 0;
 	
 	ptep = pmap_pte(pmap, addr);
 	if (ptep == 0) {
 		return 0;
 	}
 
 	if ((pte = *ptep) != 0) {
 		vm_offset_t pa;
 
 		val = MINCORE_INCORE;
 		if ((pte & PG_MANAGED) == 0)
 			return val;
 
 		pa = pte & PG_FRAME;
 
 		m = PHYS_TO_VM_PAGE(pa);
 
 		/*
 		 * Modified by us
 		 */
 		if (pte & PG_M)
 			val |= MINCORE_MODIFIED|MINCORE_MODIFIED_OTHER;
 		/*
 		 * Modified by someone
 		 */
 		else if (m->dirty || pmap_is_modified(m))
 			val |= MINCORE_MODIFIED_OTHER;
 		/*
 		 * Referenced by us
 		 */
 		if (pte & PG_A)
 			val |= MINCORE_REFERENCED|MINCORE_REFERENCED_OTHER;
 
 		/*
 		 * Referenced by someone
 		 */
 		else if ((m->flags & PG_REFERENCED) || pmap_ts_referenced(m)) {
 			val |= MINCORE_REFERENCED_OTHER;
 			vm_page_flag_set(m, PG_REFERENCED);
 		}
 	} 
 	return val;
 }
 
 void
 pmap_activate(struct proc *p)
 {
 	pmap_t	pmap;
 
 	pmap = vmspace_pmap(p->p_vmspace);
 #if defined(SMP)
 	pmap->pm_active |= 1 << PCPU_GET(cpuid);
 #else
 	pmap->pm_active |= 1;
 #endif
 #if defined(SWTCH_OPTIM_STATS)
 	tlb_flush_count++;
 #endif
 	load_cr3(p->p_addr->u_pcb.pcb_cr3 = vtophys(pmap->pm_pdir));
 }
 
 vm_offset_t
 pmap_addr_hint(vm_object_t obj, vm_offset_t addr, vm_size_t size)
 {
 
 	if ((obj == NULL) || (size < NBPDR) || (obj->type != OBJT_DEVICE)) {
 		return addr;
 	}
 
 	addr = (addr + (NBPDR - 1)) & ~(NBPDR - 1);
 	return addr;
 }
 
 
 #if defined(PMAP_DEBUG)
 pmap_pid_dump(int pid)
 {
 	pmap_t pmap;
 	struct proc *p;
 	int npte = 0;
 	int index;
 
 	sx_slock(&allproc_lock);
 	LIST_FOREACH(p, &allproc, p_list) {
 		if (p->p_pid != pid)
 			continue;
 
 		if (p->p_vmspace) {
 			int i,j;
 			index = 0;
 			pmap = vmspace_pmap(p->p_vmspace);
 			for(i=0;i<1024;i++) {
 				pd_entry_t *pde;
 				unsigned *pte;
 				unsigned base = i << PDRSHIFT;
 				
 				pde = &pmap->pm_pdir[i];
 				if (pde && pmap_pde_v(pde)) {
 					for(j=0;j<1024;j++) {
 						unsigned va = base + (j << PAGE_SHIFT);
 						if (va >= (vm_offset_t) VM_MIN_KERNEL_ADDRESS) {
 							if (index) {
 								index = 0;
 								printf("\n");
 							}
 							sx_sunlock(&allproc_lock);
 							return npte;
 						}
 						pte = pmap_pte_quick( pmap, va);
 						if (pte && pmap_pte_v(pte)) {
 							vm_offset_t pa;
 							vm_page_t m;
 							pa = *(int *)pte;
 							m = PHYS_TO_VM_PAGE(pa);
 							printf("va: 0x%x, pt: 0x%x, h: %d, w: %d, f: 0x%x",
 								va, pa, m->hold_count, m->wire_count, m->flags);
 							npte++;
 							index++;
 							if (index >= 2) {
 								index = 0;
 								printf("\n");
 							} else {
 								printf(" ");
 							}
 						}
 					}
 				}
 			}
 		}
 	}
 	sx_sunlock(&allproc_lock);
 	return npte;
 }
 #endif
 
 #if defined(DEBUG)
 
 static void	pads __P((pmap_t pm));
 void		pmap_pvdump __P((vm_offset_t pa));
 
 /* print address space of pmap*/
 static void
 pads(pm)
 	pmap_t pm;
 {
 	unsigned va, i, j;
 	unsigned *ptep;
 
 	if (pm == kernel_pmap)
 		return;
 	for (i = 0; i < 1024; i++)
 		if (pm->pm_pdir[i])
 			for (j = 0; j < 1024; j++) {
 				va = (i << PDRSHIFT) + (j << PAGE_SHIFT);
 				if (pm == kernel_pmap && va < KERNBASE)
 					continue;
 				if (pm != kernel_pmap && va > UPT_MAX_ADDRESS)
 					continue;
 				ptep = pmap_pte_quick(pm, va);
 				if (pmap_pte_v(ptep))
 					printf("%x:%x ", va, *(int *) ptep);
 			};
 
 }
 
 void
 pmap_pvdump(pa)
 	vm_offset_t pa;
 {
 	register pv_entry_t pv;
 	vm_page_t m;
 
 	printf("pa %x", pa);
 	m = PHYS_TO_VM_PAGE(pa);
 	for (pv = TAILQ_FIRST(&m->md.pv_list);
 		pv;
 		pv = TAILQ_NEXT(pv, pv_list)) {
 #ifdef used_to_be
 		printf(" -> pmap %p, va %x, flags %x",
 		    (void *)pv->pv_pmap, pv->pv_va, pv->pv_flags);
 #endif
 		printf(" -> pmap %p, va %x", (void *)pv->pv_pmap, pv->pv_va);
 		pads(pv->pv_pmap);
 	}
 	printf(" ");
 }
 #endif
Index: head/sys/i386/i386/sys_machdep.c
===================================================================
--- head/sys/i386/i386/sys_machdep.c	(revision 82308)
+++ head/sys/i386/i386/sys_machdep.c	(revision 82309)
@@ -1,535 +1,537 @@
 /*-
  * Copyright (c) 1990 The Regents of the University of California.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	from: @(#)sys_machdep.c	5.5 (Berkeley) 1/19/91
  * $FreeBSD$
  *
  */
 
+#include "opt_upages.h"
+
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/smp.h>
 #include <sys/sysproto.h>
 #include <sys/user.h>
 
 #include <vm/vm.h>
 #include <vm/pmap.h>
 #include <vm/vm_map.h>
 #include <vm/vm_extern.h>
 
 #include <machine/cpu.h>
 #include <machine/pcb_ext.h>	/* pcb.h included by sys/user.h */
 #include <machine/sysarch.h>
 
 #include <vm/vm_kern.h>		/* for kernel_map */
 
 #define MAX_LD 8192
 #define LD_PER_PAGE 512
 #define NEW_MAX_LD(num)  ((num + LD_PER_PAGE) & ~(LD_PER_PAGE-1))
 #define SIZE_FROM_LARGEST_LD(num) (NEW_MAX_LD(num) << 3)
 
 
 
 static int i386_get_ldt	__P((struct proc *, char *));
 static int i386_set_ldt	__P((struct proc *, char *));
 static int i386_get_ioperm	__P((struct proc *, char *));
 static int i386_set_ioperm	__P((struct proc *, char *));
 #ifdef SMP
 static void set_user_ldt_rv	__P((struct pcb *));
 #endif
 
 #ifndef _SYS_SYSPROTO_H_
 struct sysarch_args {
 	int op;
 	char *parms;
 };
 #endif
 
 int
 sysarch(p, uap)
 	struct proc *p;
 	register struct sysarch_args *uap;
 {
 	int error = 0;
 
 	switch(uap->op) {
 	case I386_GET_LDT:
 		error = i386_get_ldt(p, uap->parms);
 		break;
 
 	case I386_SET_LDT:
 		error = i386_set_ldt(p, uap->parms);
 		break;
 	case I386_GET_IOPERM:
 		error = i386_get_ioperm(p, uap->parms);
 		break;
 	case I386_SET_IOPERM:
 		error = i386_set_ioperm(p, uap->parms);
 		break;
 	case I386_VM86:
 		error = vm86_sysarch(p, uap->parms);
 		break;
 	default:
 		error = EOPNOTSUPP;
 		break;
 	}
 	return (error);
 }
 
 int
 i386_extend_pcb(struct proc *p)
 {
 	int i, offset;
 	u_long *addr;
 	struct pcb_ext *ext;
 	struct soft_segment_descriptor ssd = {
 		0,			/* segment base address (overwritten) */
 		ctob(IOPAGES + 1) - 1,	/* length */
 		SDT_SYS386TSS,		/* segment type */
 		0,			/* priority level */
 		1,			/* descriptor present */
 		0, 0,
 		0,			/* default 32 size */
 		0			/* granularity */
 	};
 
 	ext = (struct pcb_ext *)kmem_alloc(kernel_map, ctob(IOPAGES+1));
 	if (ext == 0)
 		return (ENOMEM);
 	bzero(ext, sizeof(struct pcb_ext)); 
 	ext->ext_tss.tss_esp0 = (unsigned)p->p_addr + ctob(UPAGES) - 16;
 	ext->ext_tss.tss_ss0 = GSEL(GDATA_SEL, SEL_KPL);
 	/*
 	 * The last byte of the i/o map must be followed by an 0xff byte.
 	 * We arbitrarily allocate 16 bytes here, to keep the starting
 	 * address on a doubleword boundary.
 	 */
 	offset = PAGE_SIZE - 16;
 	ext->ext_tss.tss_ioopt = 
 	    (offset - ((unsigned)&ext->ext_tss - (unsigned)ext)) << 16;
 	ext->ext_iomap = (caddr_t)ext + offset;
 	ext->ext_vm86.vm86_intmap = (caddr_t)ext + offset - 32;
 
 	addr = (u_long *)ext->ext_vm86.vm86_intmap;
 	for (i = 0; i < (ctob(IOPAGES) + 32 + 16) / sizeof(u_long); i++)
 		*addr++ = ~0;
 
 	ssd.ssd_base = (unsigned)&ext->ext_tss;
 	ssd.ssd_limit -= ((unsigned)&ext->ext_tss - (unsigned)ext);
 	ssdtosd(&ssd, &ext->ext_tssd);
 
 	KASSERT(p == curproc, ("giving a TSS to non-curproc"));
 	KASSERT(p->p_addr->u_pcb.pcb_ext == 0, ("already have a TSS!"));
 	mtx_lock_spin(&sched_lock);
 	p->p_addr->u_pcb.pcb_ext = ext;
 	
 	/* switch to the new TSS after syscall completes */
 	p->p_sflag |= PS_NEEDRESCHED;
 	mtx_unlock_spin(&sched_lock);
 
 	return 0;
 }
 
 static int
 i386_set_ioperm(p, args)
 	struct proc *p;
 	char *args;
 {
 	int i, error;
 	struct i386_ioperm_args ua;
 	char *iomap;
 
 	if ((error = copyin(args, &ua, sizeof(struct i386_ioperm_args))) != 0)
 		return (error);
 
 	if ((error = suser(p)) != 0)
 		return (error);
 	if (securelevel > 0)
 		return (EPERM);
 	/*
 	 * XXX 
 	 * While this is restricted to root, we should probably figure out
 	 * whether any other driver is using this i/o address, as so not to
 	 * cause confusion.  This probably requires a global 'usage registry'.
 	 */
 
 	if (p->p_addr->u_pcb.pcb_ext == 0)
 		if ((error = i386_extend_pcb(p)) != 0)
 			return (error);
 	iomap = (char *)p->p_addr->u_pcb.pcb_ext->ext_iomap;
 
 	if (ua.start + ua.length > IOPAGES * PAGE_SIZE * NBBY)
 		return (EINVAL);
 
 	for (i = ua.start; i < ua.start + ua.length; i++) {
 		if (ua.enable) 
 			iomap[i >> 3] &= ~(1 << (i & 7));
 		else
 			iomap[i >> 3] |= (1 << (i & 7));
 	}
 	return (error);
 }
 
 static int
 i386_get_ioperm(p, args)
 	struct proc *p;
 	char *args;
 {
 	int i, state, error;
 	struct i386_ioperm_args ua;
 	char *iomap;
 
 	if ((error = copyin(args, &ua, sizeof(struct i386_ioperm_args))) != 0)
 		return (error);
 	if (ua.start >= IOPAGES * PAGE_SIZE * NBBY)
 		return (EINVAL);
 
 	if (p->p_addr->u_pcb.pcb_ext == 0) {
 		ua.length = 0;
 		goto done;
 	}
 
 	iomap = (char *)p->p_addr->u_pcb.pcb_ext->ext_iomap;
 
 	i = ua.start;
 	state = (iomap[i >> 3] >> (i & 7)) & 1;
 	ua.enable = !state;
 	ua.length = 1;
 
 	for (i = ua.start + 1; i < IOPAGES * PAGE_SIZE * NBBY; i++) {
 		if (state != ((iomap[i >> 3] >> (i & 7)) & 1))
 			break;
 		ua.length++;
 	}
 			
 done:
 	error = copyout(&ua, args, sizeof(struct i386_ioperm_args));
 	return (error);
 }
 
 /*
  * Update the GDT entry pointing to the LDT to point to the LDT of the
  * current process.
  *
  * This must be called with sched_lock held.  Unfortunately, we can't use a
  * mtx_assert() here because cpu_switch() calls this function after changing
  * curproc but before sched_lock's owner is updated in mi_switch().
  */   
 void
 set_user_ldt(struct pcb *pcb)
 {
 	struct pcb_ldt *pcb_ldt;
 
 	pcb_ldt = pcb->pcb_ldt;
 #ifdef SMP
 	gdt[PCPU_GET(cpuid) * NGDT + GUSERLDT_SEL].sd = pcb_ldt->ldt_sd;
 #else
 	gdt[GUSERLDT_SEL].sd = pcb_ldt->ldt_sd;
 #endif
 	lldt(GSEL(GUSERLDT_SEL, SEL_KPL));
 	PCPU_SET(currentldt, GSEL(GUSERLDT_SEL, SEL_KPL));
 }
 
 #ifdef SMP
 static void
 set_user_ldt_rv(struct pcb *pcb)
 {
 
 	if (pcb != PCPU_GET(curpcb))
 		return;
 
 	mtx_lock_spin(&sched_lock);
 	set_user_ldt(pcb);
 	mtx_unlock_spin(&sched_lock);
 }
 #endif
 
 /*
  * Must be called with either sched_lock free or held but not recursed.
  * If it does not return NULL, it will return with it owned.
  */
 struct pcb_ldt *
 user_ldt_alloc(struct pcb *pcb, int len)
 {
 	struct pcb_ldt *pcb_ldt, *new_ldt;
 
 	if (mtx_owned(&sched_lock))
 		mtx_unlock_spin(&sched_lock);
 	mtx_assert(&sched_lock, MA_NOTOWNED);
 	MALLOC(new_ldt, struct pcb_ldt *, sizeof(struct pcb_ldt),
 		M_SUBPROC, M_WAITOK);
 
 	new_ldt->ldt_len = len = NEW_MAX_LD(len);
 	new_ldt->ldt_base = (caddr_t)kmem_alloc(kernel_map,
 		len * sizeof(union descriptor));
 	if (new_ldt->ldt_base == NULL) {
 		FREE(new_ldt, M_SUBPROC);
 		return NULL;
 	}
 	new_ldt->ldt_refcnt = 1;
 	new_ldt->ldt_active = 0;
 
 	mtx_lock_spin(&sched_lock);
 	gdt_segs[GUSERLDT_SEL].ssd_base = (unsigned)new_ldt->ldt_base;
 	gdt_segs[GUSERLDT_SEL].ssd_limit = len * sizeof(union descriptor) - 1;
 	ssdtosd(&gdt_segs[GUSERLDT_SEL], &new_ldt->ldt_sd);
 
 	if ((pcb_ldt = pcb->pcb_ldt)) {
 		if (len > pcb_ldt->ldt_len)
 			len = pcb_ldt->ldt_len;
 		bcopy(pcb_ldt->ldt_base, new_ldt->ldt_base,
 			len * sizeof(union descriptor));
 	} else {
 		bcopy(ldt, new_ldt->ldt_base, sizeof(ldt));
 	}
 	return new_ldt;
 }
 
 /*
  * Must be called either with sched_lock free or held but not recursed.
  * If pcb->pcb_ldt is not NULL, it will return with sched_lock released.
  */
 void
 user_ldt_free(struct pcb *pcb)
 {
 	struct pcb_ldt *pcb_ldt = pcb->pcb_ldt;
 
 	if (pcb_ldt == NULL)
 		return;
 
 	if (!mtx_owned(&sched_lock))
 		mtx_lock_spin(&sched_lock);
 	mtx_assert(&sched_lock, MA_OWNED | MA_NOTRECURSED);
 	if (pcb == PCPU_GET(curpcb)) {
 		lldt(_default_ldt);
 		PCPU_SET(currentldt, _default_ldt);
 	}
 
 	pcb->pcb_ldt = NULL;
 	if (--pcb_ldt->ldt_refcnt == 0) {
 		mtx_unlock_spin(&sched_lock);
 		kmem_free(kernel_map, (vm_offset_t)pcb_ldt->ldt_base,
 			pcb_ldt->ldt_len * sizeof(union descriptor));
 		FREE(pcb_ldt, M_SUBPROC);
 	} else
 		mtx_unlock_spin(&sched_lock);
 }
 
 static int
 i386_get_ldt(p, args)
 	struct proc *p;
 	char *args;
 {
 	int error = 0;
 	struct pcb *pcb = &p->p_addr->u_pcb;
 	struct pcb_ldt *pcb_ldt = pcb->pcb_ldt;
 	int nldt, num;
 	union descriptor *lp;
 	struct i386_ldt_args ua, *uap = &ua;
 
 	if ((error = copyin(args, uap, sizeof(struct i386_ldt_args))) < 0)
 		return(error);
 
 #ifdef	DEBUG
 	printf("i386_get_ldt: start=%d num=%d descs=%p\n",
 	    uap->start, uap->num, (void *)uap->descs);
 #endif
 
 	/* verify range of LDTs exist */
 	if ((uap->start < 0) || (uap->num <= 0))
 		return(EINVAL);
 
 	if (pcb_ldt) {
 		nldt = pcb_ldt->ldt_len;
 		num = min(uap->num, nldt);
 		lp = &((union descriptor *)(pcb_ldt->ldt_base))[uap->start];
 	} else {
 		nldt = sizeof(ldt)/sizeof(ldt[0]);
 		num = min(uap->num, nldt);
 		lp = &ldt[uap->start];
 	}
 	if (uap->start > nldt)
 		return(EINVAL);
 
 	error = copyout(lp, uap->descs, num * sizeof(union descriptor));
 	if (!error)
 		p->p_retval[0] = num;
 
 	return(error);
 }
 
 static int
 i386_set_ldt(p, args)
 	struct proc *p;
 	char *args;
 {
 	int error = 0, i, n;
 	int largest_ld;
 	struct pcb *pcb = &p->p_addr->u_pcb;
 	struct pcb_ldt *pcb_ldt = pcb->pcb_ldt;
 	struct i386_ldt_args ua, *uap = &ua;
 	caddr_t old_ldt_base;
 	int old_ldt_len;
 	critical_t savecrit;
 
 	if ((error = copyin(args, uap, sizeof(struct i386_ldt_args))) < 0)
 		return(error);
 
 #ifdef	DEBUG
 	printf("i386_set_ldt: start=%d num=%d descs=%p\n",
 	    uap->start, uap->num, (void *)uap->descs);
 #endif
 
 	/* verify range of descriptors to modify */
 	if ((uap->start < 0) || (uap->start >= MAX_LD) || (uap->num < 0) ||
 		(uap->num > MAX_LD))
 	{
 		return(EINVAL);
 	}
 	largest_ld = uap->start + uap->num - 1;
 	if (largest_ld >= MAX_LD)
 		return(EINVAL);
 
 	/* allocate user ldt */
 	if (!pcb_ldt || largest_ld >= pcb_ldt->ldt_len) {
 		struct pcb_ldt *new_ldt = user_ldt_alloc(pcb, largest_ld);
 		if (new_ldt == NULL)
 			return ENOMEM;
 		if (pcb_ldt) {
 			old_ldt_base = pcb_ldt->ldt_base;
 			old_ldt_len = pcb_ldt->ldt_len;
 			pcb_ldt->ldt_sd = new_ldt->ldt_sd;
 			pcb_ldt->ldt_base = new_ldt->ldt_base;
 			pcb_ldt->ldt_len = new_ldt->ldt_len;
 			mtx_unlock_spin(&sched_lock);
 			kmem_free(kernel_map, (vm_offset_t)old_ldt_base,
 				old_ldt_len * sizeof(union descriptor));
 			FREE(new_ldt, M_SUBPROC);
 #ifndef SMP
 			mtx_lock_spin(&sched_lock);
 #endif
 		} else {
 			pcb->pcb_ldt = pcb_ldt = new_ldt;
 #ifdef SMP
 			mtx_unlock_spin(&sched_lock);
 #endif
 		}
 #ifdef SMP
 		/* signal other cpus to reload ldt */
 		smp_rendezvous(NULL, (void (*)(void *))set_user_ldt_rv, NULL, pcb);
 #else
 		set_user_ldt(pcb);
 		mtx_unlock_spin(&sched_lock);
 #endif
 	}
 
 	/* Check descriptors for access violations */
 	for (i = 0, n = uap->start; i < uap->num; i++, n++) {
 		union descriptor desc, *dp;
 		dp = &uap->descs[i];
 		error = copyin(dp, &desc, sizeof(union descriptor));
 		if (error)
 			return(error);
 
 		switch (desc.sd.sd_type) {
 		case SDT_SYSNULL:	/* system null */ 
 			desc.sd.sd_p = 0;
 			break;
 		case SDT_SYS286TSS: /* system 286 TSS available */
 		case SDT_SYSLDT:    /* system local descriptor table */
 		case SDT_SYS286BSY: /* system 286 TSS busy */
 		case SDT_SYSTASKGT: /* system task gate */
 		case SDT_SYS286IGT: /* system 286 interrupt gate */
 		case SDT_SYS286TGT: /* system 286 trap gate */
 		case SDT_SYSNULL2:  /* undefined by Intel */ 
 		case SDT_SYS386TSS: /* system 386 TSS available */
 		case SDT_SYSNULL3:  /* undefined by Intel */
 		case SDT_SYS386BSY: /* system 386 TSS busy */
 		case SDT_SYSNULL4:  /* undefined by Intel */ 
 		case SDT_SYS386IGT: /* system 386 interrupt gate */
 		case SDT_SYS386TGT: /* system 386 trap gate */
 		case SDT_SYS286CGT: /* system 286 call gate */ 
 		case SDT_SYS386CGT: /* system 386 call gate */
 			/* I can't think of any reason to allow a user proc
 			 * to create a segment of these types.  They are
 			 * for OS use only.
 			 */
 			return EACCES;
 			/*NOTREACHED*/
 
 		/* memory segment types */
 		case SDT_MEMEC:   /* memory execute only conforming */
 		case SDT_MEMEAC:  /* memory execute only accessed conforming */
 		case SDT_MEMERC:  /* memory execute read conforming */
 		case SDT_MEMERAC: /* memory execute read accessed conforming */
 			 /* Must be "present" if executable and conforming. */
 			if (desc.sd.sd_p == 0)
 				return (EACCES);
 			break;
 		case SDT_MEMRO:   /* memory read only */
 		case SDT_MEMROA:  /* memory read only accessed */
 		case SDT_MEMRW:   /* memory read write */
 		case SDT_MEMRWA:  /* memory read write accessed */
 		case SDT_MEMROD:  /* memory read only expand dwn limit */
 		case SDT_MEMRODA: /* memory read only expand dwn lim accessed */
 		case SDT_MEMRWD:  /* memory read write expand dwn limit */  
 		case SDT_MEMRWDA: /* memory read write expand dwn lim acessed */
 		case SDT_MEME:    /* memory execute only */ 
 		case SDT_MEMEA:   /* memory execute only accessed */
 		case SDT_MEMER:   /* memory execute read */
 		case SDT_MEMERA:  /* memory execute read accessed */
 			break;
 		default:
 			return(EINVAL);
 			/*NOTREACHED*/
 		}
 
 		/* Only user (ring-3) descriptors may be present. */
 		if ((desc.sd.sd_p != 0) && (desc.sd.sd_dpl != SEL_UPL))
 			return (EACCES);
 	}
 
 	/* Fill in range */
 	savecrit = critical_enter();
 	error = copyin(uap->descs, 
 	    &((union descriptor *)(pcb_ldt->ldt_base))[uap->start],
 	    uap->num * sizeof(union descriptor));
 	if (!error)
 		p->p_retval[0] = uap->start;
 	critical_exit(savecrit);
 
 	return(error);
 }
Index: head/sys/i386/i386/vm_machdep.c
===================================================================
--- head/sys/i386/i386/vm_machdep.c	(revision 82308)
+++ head/sys/i386/i386/vm_machdep.c	(revision 82309)
@@ -1,587 +1,588 @@
 /*-
  * Copyright (c) 1982, 1986 The Regents of the University of California.
  * Copyright (c) 1989, 1990 William Jolitz
  * Copyright (c) 1994 John Dyson
  * All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * the Systems Programming Group of the University of Utah Computer
  * Science Department, and William Jolitz.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	from: @(#)vm_machdep.c	7.3 (Berkeley) 5/13/91
  *	Utah $Hdr: vm_machdep.c 1.16.1.1 89/06/23$
  * $FreeBSD$
  */
 
 #include "opt_npx.h"
 #ifdef PC98
 #include "opt_pc98.h"
 #endif
 #include "opt_reset.h"
 #include "opt_isa.h"
+#include "opt_upages.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/malloc.h>
 #include <sys/proc.h>
 #include <sys/bio.h>
 #include <sys/buf.h>
 #include <sys/vnode.h>
 #include <sys/vmmeter.h>
 #include <sys/kernel.h>
 #include <sys/ktr.h>
 #include <sys/mutex.h>
 #include <sys/smp.h>
 #include <sys/sysctl.h>
 #include <sys/unistd.h>
 
 #include <machine/cpu.h>
 #include <machine/md_var.h>
 #include <machine/pcb.h>
 #include <machine/pcb_ext.h>
 #include <machine/vm86.h>
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <sys/lock.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_page.h>
 #include <vm/vm_map.h>
 #include <vm/vm_extern.h>
 
 #include <sys/user.h>
 
 #ifdef PC98
 #include <pc98/pc98/pc98.h>
 #else
 #include <i386/isa/isa.h>
 #endif
 
 static void	cpu_reset_real __P((void));
 #ifdef SMP
 static void	cpu_reset_proxy __P((void));
 static u_int	cpu_reset_proxyid;
 static volatile u_int	cpu_reset_proxy_active;
 #endif
 extern int	_ucodesel, _udatasel;
 
 /*
  * quick version of vm_fault
  */
 int
 vm_fault_quick(v, prot)
 	caddr_t v;
 	int prot;
 {
 	int r;
 
 	if (prot & VM_PROT_WRITE)
 		r = subyte(v, fubyte(v));
 	else
 		r = fubyte(v);
 	return(r);
 }
 
 /*
  * Finish a fork operation, with process p2 nearly set up.
  * Copy and update the pcb, set up the stack so that the child
  * ready to run and return to user mode.
  */
 void
 cpu_fork(p1, p2, flags)
 	register struct proc *p1, *p2;
 	int flags;
 {
 	struct pcb *pcb2;
 #ifdef DEV_NPX
 	int savecrit;
 #endif
 
 	if ((flags & RFPROC) == 0) {
 		if ((flags & RFMEM) == 0) {
 			/* unshare user LDT */
 			struct pcb *pcb1 = &p1->p_addr->u_pcb;
 			struct pcb_ldt *pcb_ldt = pcb1->pcb_ldt;
 			if (pcb_ldt && pcb_ldt->ldt_refcnt > 1) {
 				pcb_ldt = user_ldt_alloc(pcb1,pcb_ldt->ldt_len);
 				if (pcb_ldt == NULL)
 					panic("could not copy LDT");
 				pcb1->pcb_ldt = pcb_ldt;
 				set_user_ldt(pcb1);
 				user_ldt_free(pcb1);
 			}
 		}
 		return;
 	}
 
 	/* Ensure that p1's pcb is up to date. */
 #ifdef DEV_NPX
 	if (p1 == curproc)
 		p1->p_addr->u_pcb.pcb_gs = rgs();
 	savecrit = critical_enter();
 	if (PCPU_GET(npxproc) == p1)
 		npxsave(&p1->p_addr->u_pcb.pcb_save);
 	critical_exit(savecrit);
 #endif
 
 	/* Copy p1's pcb. */
 	p2->p_addr->u_pcb = p1->p_addr->u_pcb;
 	pcb2 = &p2->p_addr->u_pcb;
 
 	/*
 	 * Create a new fresh stack for the new process.
 	 * Copy the trap frame for the return to user mode as if from a
 	 * syscall.  This copies most of the user mode register values.
 	 */
 	p2->p_frame = (struct trapframe *)
 			   ((int)p2->p_addr + UPAGES * PAGE_SIZE - 16) - 1;
 	bcopy(p1->p_frame, p2->p_frame, sizeof(struct trapframe));
 
 	p2->p_frame->tf_eax = 0;		/* Child returns zero */
 	p2->p_frame->tf_eflags &= ~PSL_C;	/* success */
 	p2->p_frame->tf_edx = 1;
 
 	/*
 	 * Set registers for trampoline to user mode.  Leave space for the
 	 * return address on stack.  These are the kernel mode register values.
 	 */
 	pcb2->pcb_cr3 = vtophys(vmspace_pmap(p2->p_vmspace)->pm_pdir);
 	pcb2->pcb_edi = 0;
 	pcb2->pcb_esi = (int)fork_return;	/* fork_trampoline argument */
 	pcb2->pcb_ebp = 0;
 	pcb2->pcb_esp = (int)p2->p_frame - sizeof(void *);
 	pcb2->pcb_ebx = (int)p2;		/* fork_trampoline argument */
 	pcb2->pcb_eip = (int)fork_trampoline;
 	/*-
 	 * pcb2->pcb_dr*:	cloned above.
 	 * pcb2->pcb_ldt:	duplicated below, if necessary.
 	 * pcb2->pcb_savefpu:	cloned above.
 	 * pcb2->pcb_flags:	cloned above.
 	 * pcb2->pcb_onfault:	cloned above (always NULL here?).
 	 * pcb2->pcb_gs:	cloned above.
 	 * pcb2->pcb_ext:	cleared below.
 	 */
 
 	/*
 	 * XXX don't copy the i/o pages.  this should probably be fixed.
 	 */
 	pcb2->pcb_ext = 0;
 
         /* Copy the LDT, if necessary. */
 	mtx_lock_spin(&sched_lock);
         if (pcb2->pcb_ldt != 0) {
 		if (flags & RFMEM) {
 			pcb2->pcb_ldt->ldt_refcnt++;
 		} else {
 			pcb2->pcb_ldt = user_ldt_alloc(pcb2,
 				pcb2->pcb_ldt->ldt_len);
 			if (pcb2->pcb_ldt == NULL)
 				panic("could not copy LDT");
 		}
         }
 	mtx_unlock_spin(&sched_lock);
 
 	/*
 	 * Now, cpu_switch() can schedule the new process.
 	 * pcb_esp is loaded pointing to the cpu_switch() stack frame
 	 * containing the return address when exiting cpu_switch.
 	 * This will normally be to fork_trampoline(), which will have
 	 * %ebx loaded with the new proc's pointer.  fork_trampoline()
 	 * will set up a stack to call fork_return(p, frame); to complete
 	 * the return to user-mode.
 	 */
 }
 
 /*
  * Intercept the return address from a freshly forked process that has NOT
  * been scheduled yet.
  *
  * This is needed to make kernel threads stay in kernel mode.
  */
 void
 cpu_set_fork_handler(p, func, arg)
 	struct proc *p;
 	void (*func) __P((void *));
 	void *arg;
 {
 	/*
 	 * Note that the trap frame follows the args, so the function
 	 * is really called like this:  func(arg, frame);
 	 */
 	p->p_addr->u_pcb.pcb_esi = (int) func;	/* function */
 	p->p_addr->u_pcb.pcb_ebx = (int) arg;	/* first arg */
 }
 
 void
 cpu_exit(p)
 	register struct proc *p;
 {
 	struct pcb *pcb = &p->p_addr->u_pcb; 
 
 #ifdef DEV_NPX
 	npxexit(p);
 #endif
 	if (pcb->pcb_ext != 0) {
 	        /* 
 		 * XXX do we need to move the TSS off the allocated pages 
 		 * before freeing them?  (not done here)
 		 */
 		kmem_free(kernel_map, (vm_offset_t)pcb->pcb_ext,
 		    ctob(IOPAGES + 1));
 		pcb->pcb_ext = 0;
 	}
 	if (pcb->pcb_ldt)
 		user_ldt_free(pcb);
         if (pcb->pcb_flags & PCB_DBREGS) {
                 /*
                  * disable all hardware breakpoints
                  */
                 reset_dbregs();
                 pcb->pcb_flags &= ~PCB_DBREGS;
         }
 	PROC_LOCK(p);
 	mtx_lock_spin(&sched_lock);
 	while (mtx_owned(&Giant))
 		mtx_unlock_flags(&Giant, MTX_NOSWITCH);
 
 	/*
 	 * We have to wait until after releasing all locks before
 	 * changing p_stat.  If we block on a mutex then we will be
 	 * back at SRUN when we resume and our parent will never
 	 * harvest us.
 	 */
 	p->p_stat = SZOMB;
 
 	wakeup(p->p_pptr);
 	PROC_UNLOCK_NOSWITCH(p);
 
 	cnt.v_swtch++;
 	cpu_throw();
 	panic("cpu_exit");
 }
 
 void
 cpu_wait(p)
 	struct proc *p;
 {
 	GIANT_REQUIRED;
 
 	/* drop per-process resources */
 	pmap_dispose_proc(p);
 
 	/* and clean-out the vmspace */
 	vmspace_free(p->p_vmspace);
 }
 
 /*
  * Dump the machine specific header information at the start of a core dump.
  */
 int
 cpu_coredump(p, vp, cred)
 	struct proc *p;
 	struct vnode *vp;
 	struct ucred *cred;
 {
 	int error;
 	caddr_t tempuser;
 
 	tempuser = malloc(ctob(UPAGES), M_TEMP, M_WAITOK | M_ZERO);
 	if (!tempuser)
 		return EINVAL;
 	
 	bcopy(p->p_addr, tempuser, sizeof(struct user));
 	bcopy(p->p_frame,
 	      tempuser + ((caddr_t) p->p_frame - (caddr_t) p->p_addr),
 	      sizeof(struct trapframe));
 
 	error = vn_rdwr(UIO_WRITE, vp, (caddr_t) tempuser, 
 			ctob(UPAGES),
 			(off_t)0, UIO_SYSSPACE, IO_NODELOCKED|IO_UNIT, 
 			cred, (int *)NULL, p);
 
 	free(tempuser, M_TEMP);
 	
 	return error;
 }
 
 #ifdef notyet
 static void
 setredzone(pte, vaddr)
 	u_short *pte;
 	caddr_t vaddr;
 {
 /* eventually do this by setting up an expand-down stack segment
    for ss0: selector, allowing stack access down to top of u.
    this means though that protection violations need to be handled
    thru a double fault exception that must do an integral task
    switch to a known good context, within which a dump can be
    taken. a sensible scheme might be to save the initial context
    used by sched (that has physical memory mapped 1:1 at bottom)
    and take the dump while still in mapped mode */
 }
 #endif
 
 /*
  * Convert kernel VA to physical address
  */
 u_long
 kvtop(void *addr)
 {
 	vm_offset_t va;
 
 	va = pmap_kextract((vm_offset_t)addr);
 	if (va == 0)
 		panic("kvtop: zero page frame");
 	return((int)va);
 }
 
 /*
  * Map an IO request into kernel virtual address space.
  *
  * All requests are (re)mapped into kernel VA space.
  * Notice that we use b_bufsize for the size of the buffer
  * to be mapped.  b_bcount might be modified by the driver.
  */
 void
 vmapbuf(bp)
 	register struct buf *bp;
 {
 	register caddr_t addr, v, kva;
 	vm_offset_t pa;
 
 	GIANT_REQUIRED;
 
 	if ((bp->b_flags & B_PHYS) == 0)
 		panic("vmapbuf");
 
 	for (v = bp->b_saveaddr, addr = (caddr_t)trunc_page((vm_offset_t)bp->b_data);
 	    addr < bp->b_data + bp->b_bufsize;
 	    addr += PAGE_SIZE, v += PAGE_SIZE) {
 		/*
 		 * Do the vm_fault if needed; do the copy-on-write thing
 		 * when reading stuff off device into memory.
 		 */
 		vm_fault_quick(addr,
 			(bp->b_iocmd == BIO_READ)?(VM_PROT_READ|VM_PROT_WRITE):VM_PROT_READ);
 		pa = trunc_page(pmap_kextract((vm_offset_t) addr));
 		if (pa == 0)
 			panic("vmapbuf: page not present");
 		vm_page_hold(PHYS_TO_VM_PAGE(pa));
 		pmap_kenter((vm_offset_t) v, pa);
 	}
 
 	kva = bp->b_saveaddr;
 	bp->b_saveaddr = bp->b_data;
 	bp->b_data = kva + (((vm_offset_t) bp->b_data) & PAGE_MASK);
 }
 
 /*
  * Free the io map PTEs associated with this IO operation.
  * We also invalidate the TLB entries and restore the original b_addr.
  */
 void
 vunmapbuf(bp)
 	register struct buf *bp;
 {
 	register caddr_t addr;
 	vm_offset_t pa;
 
 	GIANT_REQUIRED;
 
 	if ((bp->b_flags & B_PHYS) == 0)
 		panic("vunmapbuf");
 
 	for (addr = (caddr_t)trunc_page((vm_offset_t)bp->b_data);
 	    addr < bp->b_data + bp->b_bufsize;
 	    addr += PAGE_SIZE) {
 		pa = trunc_page(pmap_kextract((vm_offset_t) addr));
 		pmap_kremove((vm_offset_t) addr);
 		vm_page_unhold(PHYS_TO_VM_PAGE(pa));
 	}
 
 	bp->b_data = bp->b_saveaddr;
 }
 
 /*
  * Force reset the processor by invalidating the entire address space!
  */
 
 #ifdef SMP
 static void
 cpu_reset_proxy()
 {
 
 	cpu_reset_proxy_active = 1;
 	while (cpu_reset_proxy_active == 1)
 		;	 /* Wait for other cpu to see that we've started */
 	stop_cpus((1<<cpu_reset_proxyid));
 	printf("cpu_reset_proxy: Stopped CPU %d\n", cpu_reset_proxyid);
 	DELAY(1000000);
 	cpu_reset_real();
 }
 #endif
 
 void
 cpu_reset()
 {
 #ifdef SMP
 	if (smp_active == 0) {
 		cpu_reset_real();
 		/* NOTREACHED */
 	} else {
 
 		u_int map;
 		int cnt;
 		printf("cpu_reset called on cpu#%d\n", PCPU_GET(cpuid));
 
 		map = PCPU_GET(other_cpus) & ~ stopped_cpus;
 
 		if (map != 0) {
 			printf("cpu_reset: Stopping other CPUs\n");
 			stop_cpus(map);		/* Stop all other CPUs */
 		}
 
 		if (PCPU_GET(cpuid) == 0) {
 			DELAY(1000000);
 			cpu_reset_real();
 			/* NOTREACHED */
 		} else {
 			/* We are not BSP (CPU #0) */
 
 			cpu_reset_proxyid = PCPU_GET(cpuid);
 			cpustop_restartfunc = cpu_reset_proxy;
 			cpu_reset_proxy_active = 0;
 			printf("cpu_reset: Restarting BSP\n");
 			started_cpus = (1<<0);		/* Restart CPU #0 */
 
 			cnt = 0;
 			while (cpu_reset_proxy_active == 0 && cnt < 10000000)
 				cnt++;	/* Wait for BSP to announce restart */
 			if (cpu_reset_proxy_active == 0)
 				printf("cpu_reset: Failed to restart BSP\n");
 			enable_intr();
 			cpu_reset_proxy_active = 2;
 
 			while (1);
 			/* NOTREACHED */
 		}
 	}
 #else
 	cpu_reset_real();
 #endif
 }
 
 static void
 cpu_reset_real()
 {
 
 #ifdef PC98
 	/*
 	 * Attempt to do a CPU reset via CPU reset port.
 	 */
 	disable_intr();
 	if ((inb(0x35) & 0xa0) != 0xa0) {
 		outb(0x37, 0x0f);		/* SHUT0 = 0. */
 		outb(0x37, 0x0b);		/* SHUT1 = 0. */
 	}
 	outb(0xf0, 0x00);		/* Reset. */
 #else
 	/*
 	 * Attempt to do a CPU reset via the keyboard controller,
 	 * do not turn of the GateA20, as any machine that fails
 	 * to do the reset here would then end up in no man's land.
 	 */
 
 #if !defined(BROKEN_KEYBOARD_RESET)
 	outb(IO_KBD + 4, 0xFE);
 	DELAY(500000);	/* wait 0.5 sec to see if that did it */
 	printf("Keyboard reset did not work, attempting CPU shutdown\n");
 	DELAY(1000000);	/* wait 1 sec for printf to complete */
 #endif
 #endif /* PC98 */
 	/* force a shutdown by unmapping entire address space ! */
 	bzero((caddr_t) PTD, PAGE_SIZE);
 
 	/* "good night, sweet prince .... <THUNK!>" */
 	invltlb();
 	/* NOTREACHED */
 	while(1);
 }
 
 int
 grow_stack(p, sp)
 	struct proc *p;
 	u_int sp;
 {
 	int rv;
 
 	rv = vm_map_growstack (p, sp);
 	if (rv != KERN_SUCCESS)
 		return (0);
 
 	return (1);
 }
 
 /*
  * Software interrupt handler for queued VM system processing.
  */   
 void  
 swi_vm(void *dummy) 
 {     
 	if (busdma_swi_pending != 0)
 		busdma_swi();
 }
 
 /*
  * Tell whether this address is in some physical memory region.
  * Currently used by the kernel coredump code in order to avoid
  * dumping the ``ISA memory hole'' which could cause indefinite hangs,
  * or other unpredictable behaviour.
  */
 
 int
 is_physical_memory(addr)
 	vm_offset_t addr;
 {
 
 #ifdef DEV_ISA
 	/* The ISA ``memory hole''. */
 	if (addr >= 0xa0000 && addr < 0x100000)
 		return 0;
 #endif
 
 	/*
 	 * stuff other tests for known memory-mapped devices (PCI?)
 	 * here
 	 */
 
 	return 1;
 }
Index: head/sys/i386/include/globaldata.h
===================================================================
--- head/sys/i386/include/globaldata.h	(revision 82308)
+++ head/sys/i386/include/globaldata.h	(revision 82309)
@@ -1,99 +1,79 @@
 /*-
  * Copyright (c) Peter Wemm <peter@netplex.com.au>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef _MACHINE_GLOBALDATA_H_
 #define _MACHINE_GLOBALDATA_H_
 
 #ifdef _KERNEL
 
 #include <machine/segments.h>
 #include <machine/tss.h>
 
 /* XXX */
 #ifdef KTR_PERCPU
 #include <sys/ktr.h>
 #endif
 
 /*
  * This structure maps out the global data that needs to be kept on a
  * per-cpu basis.  genassym uses this to generate offsets for the assembler
  * code, which also provides external symbols so that C can get at them as
  * though they were really globals.
  *
  * The SMP parts are setup in pmap.c and locore.s for the BSP, and
  * mp_machdep.c sets up the data for the AP's to "see" when they awake.
  * The reason for doing it via a struct is so that an array of pointers
  * to each CPU's data can be set up for things like "check curproc on all
  * other processors"
  */
 struct globaldata {
 	struct	globaldata *gd_prvspace;	/* self-reference */
 	struct	proc *gd_curproc;		/* current process */
 	struct	proc *gd_idleproc;		/* idle process */
 	struct	proc *gd_npxproc;
 	struct	pcb *gd_curpcb;			/* current pcb */
 	struct	timeval gd_switchtime;
 	struct	i386tss gd_common_tss;
 	int	gd_switchticks;
 	struct	segment_descriptor gd_common_tssd;
 	struct	segment_descriptor *gd_tss_gdt;
 	int	gd_currentldt;
 	u_int	gd_cpuid;			/* this cpu number */
 	u_int	gd_other_cpus;			/* all other cpus */
 	SLIST_ENTRY(globaldata) gd_allcpu;
 	struct	lock_list_entry *gd_spinlocks;
 #ifdef KTR_PERCPU
 	volatile int	gd_ktr_idx;		/* Index into trace table */
 	char	*gd_ktr_buf;
 	char	gd_ktr_buf_data[KTR_SIZE];
 #endif
 };
 
-#ifdef SMP
-/*
- * This is the upper (0xff800000) address space layout that is per-cpu.
- * It is setup in locore.s and pmap.c for the BSP and in mp_machdep.c for
- * each AP.  genassym helps export this to the assembler code.
- */
-struct privatespace {
-	/* page 0 - data page */
-	struct	globaldata globaldata;
-	char	__filler0[PAGE_SIZE - sizeof(struct globaldata)];
-
-	/* page 1 - idle stack (UPAGES pages) */
-	char	idlestack[UPAGES * PAGE_SIZE];
-	/* page 1+UPAGES... */
-};
-
-extern struct privatespace SMP_prvspace[];
-
-#endif
-
 #endif	/* _KERNEL */
 
 #endif	/* ! _MACHINE_GLOBALDATA_H_ */
Index: head/sys/i386/include/mptable.h
===================================================================
--- head/sys/i386/include/mptable.h	(revision 82308)
+++ head/sys/i386/include/mptable.h	(revision 82309)
@@ -1,2440 +1,2442 @@
 /*
  * Copyright (c) 1996, by Steve Passe
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. The name of the developer may NOT be used to endorse or promote products
  *    derived from this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #include "opt_cpu.h"
+#include "opt_upages.h"
 
 #ifdef SMP
 #include <machine/smptests.h>
 #else
 #error
 #endif
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/bus.h>
 #include <sys/cons.h>	/* cngetc() */
 #include <sys/dkstat.h>
 #ifdef GPROF 
 #include <sys/gmon.h>
 #endif
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/memrange.h>
 #include <sys/mutex.h>
 #include <sys/pcpu.h>
 #include <sys/proc.h>
 #include <sys/smp.h>
 #include <sys/sysctl.h>
 #include <sys/user.h>
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/pmap.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_map.h>
 
 #include <machine/apic.h>
 #include <machine/atomic.h>
 #include <machine/cpu.h>
 #include <machine/cpufunc.h>
 #include <machine/mpapic.h>
 #include <machine/psl.h>
 #include <machine/segments.h>
 #include <machine/smptests.h>	/** TEST_DEFAULT_CONFIG, TEST_TEST1 */
 #include <machine/tss.h>
 #include <machine/specialreg.h>
 #include <machine/globaldata.h>
+#include <machine/privatespace.h>
 
 #if defined(APIC_IO)
 #include <machine/md_var.h>		/* setidt() */
 #include <i386/isa/icu.h>		/* IPIs */
 #include <i386/isa/intr_machdep.h>	/* IPIs */
 #endif	/* APIC_IO */
 
 #if defined(TEST_DEFAULT_CONFIG)
 #define MPFPS_MPFB1	TEST_DEFAULT_CONFIG
 #else
 #define MPFPS_MPFB1	mpfps->mpfb1
 #endif  /* TEST_DEFAULT_CONFIG */
 
 #define WARMBOOT_TARGET		0
 #define WARMBOOT_OFF		(KERNBASE + 0x0467)
 #define WARMBOOT_SEG		(KERNBASE + 0x0469)
 
 #ifdef PC98
 #define BIOS_BASE		(0xe8000)
 #define BIOS_SIZE		(0x18000)
 #else
 #define BIOS_BASE		(0xf0000)
 #define BIOS_SIZE		(0x10000)
 #endif
 #define BIOS_COUNT		(BIOS_SIZE/4)
 
 #define CMOS_REG		(0x70)
 #define CMOS_DATA		(0x71)
 #define BIOS_RESET		(0x0f)
 #define BIOS_WARM		(0x0a)
 
 #define PROCENTRY_FLAG_EN	0x01
 #define PROCENTRY_FLAG_BP	0x02
 #define IOAPICENTRY_FLAG_EN	0x01
 
 
 /* MP Floating Pointer Structure */
 typedef struct MPFPS {
 	char    signature[4];
 	void   *pap;
 	u_char  length;
 	u_char  spec_rev;
 	u_char  checksum;
 	u_char  mpfb1;
 	u_char  mpfb2;
 	u_char  mpfb3;
 	u_char  mpfb4;
 	u_char  mpfb5;
 }      *mpfps_t;
 
 /* MP Configuration Table Header */
 typedef struct MPCTH {
 	char    signature[4];
 	u_short base_table_length;
 	u_char  spec_rev;
 	u_char  checksum;
 	u_char  oem_id[8];
 	u_char  product_id[12];
 	void   *oem_table_pointer;
 	u_short oem_table_size;
 	u_short entry_count;
 	void   *apic_address;
 	u_short extended_table_length;
 	u_char  extended_table_checksum;
 	u_char  reserved;
 }      *mpcth_t;
 
 
 typedef struct PROCENTRY {
 	u_char  type;
 	u_char  apic_id;
 	u_char  apic_version;
 	u_char  cpu_flags;
 	u_long  cpu_signature;
 	u_long  feature_flags;
 	u_long  reserved1;
 	u_long  reserved2;
 }      *proc_entry_ptr;
 
 typedef struct BUSENTRY {
 	u_char  type;
 	u_char  bus_id;
 	char    bus_type[6];
 }      *bus_entry_ptr;
 
 typedef struct IOAPICENTRY {
 	u_char  type;
 	u_char  apic_id;
 	u_char  apic_version;
 	u_char  apic_flags;
 	void   *apic_address;
 }      *io_apic_entry_ptr;
 
 typedef struct INTENTRY {
 	u_char  type;
 	u_char  int_type;
 	u_short int_flags;
 	u_char  src_bus_id;
 	u_char  src_bus_irq;
 	u_char  dst_apic_id;
 	u_char  dst_apic_int;
 }      *int_entry_ptr;
 
 /* descriptions of MP basetable entries */
 typedef struct BASETABLE_ENTRY {
 	u_char  type;
 	u_char  length;
 	char    name[16];
 }       basetable_entry;
 
 /*
  * this code MUST be enabled here and in mpboot.s.
  * it follows the very early stages of AP boot by placing values in CMOS ram.
  * it NORMALLY will never be needed and thus the primitive method for enabling.
  *
 #define CHECK_POINTS
  */
 
 #if defined(CHECK_POINTS) && !defined(PC98)
 #define CHECK_READ(A)	 (outb(CMOS_REG, (A)), inb(CMOS_DATA))
 #define CHECK_WRITE(A,D) (outb(CMOS_REG, (A)), outb(CMOS_DATA, (D)))
 
 #define CHECK_INIT(D);				\
 	CHECK_WRITE(0x34, (D));			\
 	CHECK_WRITE(0x35, (D));			\
 	CHECK_WRITE(0x36, (D));			\
 	CHECK_WRITE(0x37, (D));			\
 	CHECK_WRITE(0x38, (D));			\
 	CHECK_WRITE(0x39, (D));
 
 #define CHECK_PRINT(S);				\
 	printf("%s: %d, %d, %d, %d, %d, %d\n",	\
 	   (S),					\
 	   CHECK_READ(0x34),			\
 	   CHECK_READ(0x35),			\
 	   CHECK_READ(0x36),			\
 	   CHECK_READ(0x37),			\
 	   CHECK_READ(0x38),			\
 	   CHECK_READ(0x39));
 
 #else				/* CHECK_POINTS */
 
 #define CHECK_INIT(D)
 #define CHECK_PRINT(S)
 
 #endif				/* CHECK_POINTS */
 
 /*
  * Values to send to the POST hardware.
  */
 #define MP_BOOTADDRESS_POST	0x10
 #define MP_PROBE_POST		0x11
 #define MPTABLE_PASS1_POST	0x12
 
 #define MP_START_POST		0x13
 #define MP_ENABLE_POST		0x14
 #define MPTABLE_PASS2_POST	0x15
 
 #define START_ALL_APS_POST	0x16
 #define INSTALL_AP_TRAMP_POST	0x17
 #define START_AP_POST		0x18
 
 #define MP_ANNOUNCE_POST	0x19
 
 /* used to hold the AP's until we are ready to release them */
 static struct mtx ap_boot_mtx;
 
 /** XXX FIXME: where does this really belong, isa.h/isa.c perhaps? */
 int	current_postcode;
 
 /** XXX FIXME: what system files declare these??? */
 extern struct region_descriptor r_gdt, r_idt;
 
 int	bsp_apic_ready = 0;	/* flags useability of BSP apic */
 int	mp_naps;		/* # of Applications processors */
 int	mp_nbusses;		/* # of busses */
 int	mp_napics;		/* # of IO APICs */
 int	boot_cpu_id;		/* designated BSP */
 vm_offset_t cpu_apic_address;
 vm_offset_t io_apic_address[NAPICID];	/* NAPICID is more than enough */
 extern	int nkpt;
 
 u_int32_t cpu_apic_versions[MAXCPU];
 u_int32_t *io_apic_versions;
 
 #ifdef APIC_INTR_REORDER
 struct {
 	volatile int *location;
 	int bit;
 } apic_isrbit_location[32];
 #endif
 
 struct apic_intmapinfo	int_to_apicintpin[APIC_INTMAPSIZE];
 
 /*
  * APIC ID logical/physical mapping structures.
  * We oversize these to simplify boot-time config.
  */
 int     cpu_num_to_apic_id[NAPICID];
 int     io_num_to_apic_id[NAPICID];
 int     apic_id_to_logical[NAPICID];
 
 
 /* AP uses this during bootstrap.  Do not staticize.  */
 char *bootSTK;
 static int bootAP;
 
 /* Hotwire a 0->4MB V==P mapping */
 extern pt_entry_t *KPTphys;
 
 /* SMP page table page */
 extern pt_entry_t *SMPpt;
 
 struct pcb stoppcbs[MAXCPU];
 
 int invltlb_ok = 0;	/* throttle smp_invltlb() till safe */
 SYSCTL_INT(_machdep, OID_AUTO, invltlb_ok, CTLFLAG_RW, &invltlb_ok, 0, "");
 
 /*
  * Local data and functions.
  */
 
 /* Set to 1 once we're ready to let the APs out of the pen. */
 static volatile int aps_ready = 0;
 
 static int	mp_capable;
 static u_int	boot_address;
 static u_int	base_memory;
 
 static int	picmode;		/* 0: virtual wire mode, 1: PIC mode */
 static mpfps_t	mpfps;
 static int	search_for_sig(u_int32_t target, int count);
 static void	mp_enable(u_int boot_addr);
 
 static void	mptable_pass1(void);
 static int	mptable_pass2(void);
 static void	default_mp_table(int type);
 static void	fix_mp_table(void);
 static void	setup_apic_irq_mapping(void);
 static void	init_locks(void);
 static int	start_all_aps(u_int boot_addr);
 static void	install_ap_tramp(u_int boot_addr);
 static int	start_ap(int logicalCpu, u_int boot_addr);
 void		ap_init(void);
 static int	apic_int_is_bus_type(int intr, int bus_type);
 static void	release_aps(void *dummy);
 
 /*
  * initialize all the SMP locks
  */
 
 /* critical region around IO APIC, apic_imen */
 struct mtx		imen_mtx;
 
 /* lock region used by kernel profiling */
 int	mcount_lock;
 
 #ifdef USE_COMLOCK
 /* locks com (tty) data/hardware accesses: a FASTINTR() */
 struct mtx		com_mtx;
 #endif /* USE_COMLOCK */
 
 static void
 init_locks(void)
 {
 
 #ifdef USE_COMLOCK
 	mtx_init(&com_mtx, "com", MTX_SPIN);
 #endif /* USE_COMLOCK */
 }
 
 /*
  * Calculate usable address in base memory for AP trampoline code.
  */
 u_int
 mp_bootaddress(u_int basemem)
 {
 	POSTCODE(MP_BOOTADDRESS_POST);
 
 	base_memory = basemem * 1024;	/* convert to bytes */
 
 	boot_address = base_memory & ~0xfff;	/* round down to 4k boundary */
 	if ((base_memory - boot_address) < bootMP_size)
 		boot_address -= 4096;	/* not enough, lower by 4k */
 
 	return boot_address;
 }
 
 
 /*
  * Look for an Intel MP spec table (ie, SMP capable hardware).
  */
 void
 i386_mp_probe(void)
 {
 	int     x;
 	u_long  segment;
 	u_int32_t target;
 
 	POSTCODE(MP_PROBE_POST);
 
 	/* see if EBDA exists */
 	if ((segment = (u_long) * (u_short *) (KERNBASE + 0x40e)) != 0) {
 		/* search first 1K of EBDA */
 		target = (u_int32_t) (segment << 4);
 		if ((x = search_for_sig(target, 1024 / 4)) >= 0)
 			goto found;
 	} else {
 		/* last 1K of base memory, effective 'top of base' passed in */
 		target = (u_int32_t) (base_memory - 0x400);
 		if ((x = search_for_sig(target, 1024 / 4)) >= 0)
 			goto found;
 	}
 
 	/* search the BIOS */
 	target = (u_int32_t) BIOS_BASE;
 	if ((x = search_for_sig(target, BIOS_COUNT)) >= 0)
 		goto found;
 
 	/* nothing found */
 	mpfps = (mpfps_t)0;
 	mp_capable = 0;
 	return;
 
 found:
 	/* calculate needed resources */
 	mpfps = (mpfps_t)x;
 	mptable_pass1();
 
 	/* flag fact that we are running multiple processors */
 	mp_capable = 1;
 }
 
 int
 cpu_mp_probe(void)
 {
 	/*
 	 * Record BSP in CPU map
 	 * This is done here so that MBUF init code works correctly.
 	 */
 	all_cpus = 1;
 
 	return (mp_capable);
 }
 
 /*
  * Initialize the SMP hardware and the APIC and start up the AP's.
  */
 void
 cpu_mp_start(void)
 {
 	POSTCODE(MP_START_POST);
 
 	/* look for MP capable motherboard */
 	if (mp_capable)
 		mp_enable(boot_address);
 	else
 		panic("MP hardware not found!");
 
 	cpu_setregs();
 }
 
 
 /*
  * Print various information about the SMP system hardware and setup.
  */
 void
 cpu_mp_announce(void)
 {
 	int     x;
 
 	POSTCODE(MP_ANNOUNCE_POST);
 
 	printf(" cpu0 (BSP): apic id: %2d", CPU_TO_ID(0));
 	printf(", version: 0x%08x", cpu_apic_versions[0]);
 	printf(", at 0x%08x\n", cpu_apic_address);
 	for (x = 1; x <= mp_naps; ++x) {
 		printf(" cpu%d (AP):  apic id: %2d", x, CPU_TO_ID(x));
 		printf(", version: 0x%08x", cpu_apic_versions[x]);
 		printf(", at 0x%08x\n", cpu_apic_address);
 	}
 
 #if defined(APIC_IO)
 	for (x = 0; x < mp_napics; ++x) {
 		printf(" io%d (APIC): apic id: %2d", x, IO_TO_ID(x));
 		printf(", version: 0x%08x", io_apic_versions[x]);
 		printf(", at 0x%08x\n", io_apic_address[x]);
 	}
 #else
 	printf(" Warning: APIC I/O disabled\n");
 #endif	/* APIC_IO */
 }
 
 /*
  * AP cpu's call this to sync up protected mode.
  */
 void
 init_secondary(void)
 {
 	int	gsel_tss;
 	int	x, myid = bootAP;
 
 	gdt_segs[GPRIV_SEL].ssd_base = (int) &SMP_prvspace[myid];
 	gdt_segs[GPROC0_SEL].ssd_base =
 		(int) &SMP_prvspace[myid].globaldata.gd_common_tss;
 	SMP_prvspace[myid].globaldata.gd_prvspace =
 		&SMP_prvspace[myid].globaldata;
 
 	for (x = 0; x < NGDT; x++) {
 		ssdtosd(&gdt_segs[x], &gdt[myid * NGDT + x].sd);
 	}
 
 	r_gdt.rd_limit = NGDT * sizeof(gdt[0]) - 1;
 	r_gdt.rd_base = (int) &gdt[myid * NGDT];
 	lgdt(&r_gdt);			/* does magic intra-segment return */
 
 	lidt(&r_idt);
 
 	lldt(_default_ldt);
 	PCPU_SET(currentldt, _default_ldt);
 
 	gsel_tss = GSEL(GPROC0_SEL, SEL_KPL);
 	gdt[myid * NGDT + GPROC0_SEL].sd.sd_type = SDT_SYS386TSS;
 	PCPU_SET(common_tss.tss_esp0, 0); /* not used until after switch */
 	PCPU_SET(common_tss.tss_ss0, GSEL(GDATA_SEL, SEL_KPL));
 	PCPU_SET(common_tss.tss_ioopt, (sizeof (struct i386tss)) << 16);
 	PCPU_SET(tss_gdt, &gdt[myid * NGDT + GPROC0_SEL].sd);
 	PCPU_SET(common_tssd, *PCPU_GET(tss_gdt));
 	ltr(gsel_tss);
 
 	pmap_set_opt();
 }
 
 
 #if defined(APIC_IO)
 /*
  * Final configuration of the BSP's local APIC:
  *  - disable 'pic mode'.
  *  - disable 'virtual wire mode'.
  *  - enable NMI.
  */
 void
 bsp_apic_configure(void)
 {
 	u_char		byte;
 	u_int32_t	temp;
 
 	/* leave 'pic mode' if necessary */
 	if (picmode) {
 		outb(0x22, 0x70);	/* select IMCR */
 		byte = inb(0x23);	/* current contents */
 		byte |= 0x01;		/* mask external INTR */
 		outb(0x23, byte);	/* disconnect 8259s/NMI */
 	}
 
 	/* mask lint0 (the 8259 'virtual wire' connection) */
 	temp = lapic.lvt_lint0;
 	temp |= APIC_LVT_M;		/* set the mask */
 	lapic.lvt_lint0 = temp;
 
         /* setup lint1 to handle NMI */
         temp = lapic.lvt_lint1;
         temp &= ~APIC_LVT_M;		/* clear the mask */
         lapic.lvt_lint1 = temp;
 
 	if (bootverbose)
 		apic_dump("bsp_apic_configure()");
 }
 #endif  /* APIC_IO */
 
 
 /*******************************************************************
  * local functions and data
  */
 
 /*
  * start the SMP system
  */
 static void
 mp_enable(u_int boot_addr)
 {
 	int     x;
 #if defined(APIC_IO)
 	int     apic;
 	u_int   ux;
 #endif	/* APIC_IO */
 
 	POSTCODE(MP_ENABLE_POST);
 
 	/* turn on 4MB of V == P addressing so we can get to MP table */
 	*(int *)PTD = PG_V | PG_RW | ((uintptr_t)(void *)KPTphys & PG_FRAME);
 	invltlb();
 
 	/* examine the MP table for needed info, uses physical addresses */
 	x = mptable_pass2();
 
 	*(int *)PTD = 0;
 	invltlb();
 
 	/* can't process default configs till the CPU APIC is pmapped */
 	if (x)
 		default_mp_table(x);
 
 	/* post scan cleanup */
 	fix_mp_table();
 	setup_apic_irq_mapping();
 
 #if defined(APIC_IO)
 
 	/* fill the LOGICAL io_apic_versions table */
 	for (apic = 0; apic < mp_napics; ++apic) {
 		ux = io_apic_read(apic, IOAPIC_VER);
 		io_apic_versions[apic] = ux;
 		io_apic_set_id(apic, IO_TO_ID(apic));
 	}
 
 	/* program each IO APIC in the system */
 	for (apic = 0; apic < mp_napics; ++apic)
 		if (io_apic_setup(apic) < 0)
 			panic("IO APIC setup failure");
 
 	/* install a 'Spurious INTerrupt' vector */
 	setidt(XSPURIOUSINT_OFFSET, Xspuriousint,
 	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
 
 	/* install an inter-CPU IPI for TLB invalidation */
 	setidt(XINVLTLB_OFFSET, Xinvltlb,
 	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
 
 	/* install an inter-CPU IPI for forwarding hardclock() */
 	setidt(XHARDCLOCK_OFFSET, Xhardclock,
 	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
 	
 	/* install an inter-CPU IPI for forwarding statclock() */
 	setidt(XSTATCLOCK_OFFSET, Xstatclock,
 	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
 	
 	/* install an inter-CPU IPI for all-CPU rendezvous */
 	setidt(XRENDEZVOUS_OFFSET, Xrendezvous,
 	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
 
 	/* install an inter-CPU IPI for forcing an additional software trap */
 	setidt(XCPUAST_OFFSET, Xcpuast,
 	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
 
 	/* install an inter-CPU IPI for CPU stop/restart */
 	setidt(XCPUSTOP_OFFSET, Xcpustop,
 	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
 
 #if defined(TEST_TEST1)
 	/* install a "fake hardware INTerrupt" vector */
 	setidt(XTEST1_OFFSET, Xtest1,
 	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
 #endif  /** TEST_TEST1 */
 
 #endif	/* APIC_IO */
 
 	/* initialize all SMP locks */
 	init_locks();
 
 	/* start each Application Processor */
 	start_all_aps(boot_addr);
 }
 
 
 /*
  * look for the MP spec signature
  */
 
 /* string defined by the Intel MP Spec as identifying the MP table */
 #define MP_SIG		0x5f504d5f	/* _MP_ */
 #define NEXT(X)		((X) += 4)
 static int
 search_for_sig(u_int32_t target, int count)
 {
 	int     x;
 	u_int32_t *addr = (u_int32_t *) (KERNBASE + target);
 
 	for (x = 0; x < count; NEXT(x))
 		if (addr[x] == MP_SIG)
 			/* make array index a byte index */
 			return (target + (x * sizeof(u_int32_t)));
 
 	return -1;
 }
 
 
 static basetable_entry basetable_entry_types[] =
 {
 	{0, 20, "Processor"},
 	{1, 8, "Bus"},
 	{2, 8, "I/O APIC"},
 	{3, 8, "I/O INT"},
 	{4, 8, "Local INT"}
 };
 
 typedef struct BUSDATA {
 	u_char  bus_id;
 	enum busTypes bus_type;
 }       bus_datum;
 
 typedef struct INTDATA {
 	u_char  int_type;
 	u_short int_flags;
 	u_char  src_bus_id;
 	u_char  src_bus_irq;
 	u_char  dst_apic_id;
 	u_char  dst_apic_int;
 	u_char	int_vector;
 }       io_int, local_int;
 
 typedef struct BUSTYPENAME {
 	u_char  type;
 	char    name[7];
 }       bus_type_name;
 
 static bus_type_name bus_type_table[] =
 {
 	{CBUS, "CBUS"},
 	{CBUSII, "CBUSII"},
 	{EISA, "EISA"},
 	{MCA, "MCA"},
 	{UNKNOWN_BUSTYPE, "---"},
 	{ISA, "ISA"},
 	{MCA, "MCA"},
 	{UNKNOWN_BUSTYPE, "---"},
 	{UNKNOWN_BUSTYPE, "---"},
 	{UNKNOWN_BUSTYPE, "---"},
 	{UNKNOWN_BUSTYPE, "---"},
 	{UNKNOWN_BUSTYPE, "---"},
 	{PCI, "PCI"},
 	{UNKNOWN_BUSTYPE, "---"},
 	{UNKNOWN_BUSTYPE, "---"},
 	{UNKNOWN_BUSTYPE, "---"},
 	{UNKNOWN_BUSTYPE, "---"},
 	{XPRESS, "XPRESS"},
 	{UNKNOWN_BUSTYPE, "---"}
 };
 /* from MP spec v1.4, table 5-1 */
 static int default_data[7][5] =
 {
 /*   nbus, id0, type0, id1, type1 */
 	{1, 0, ISA, 255, 255},
 	{1, 0, EISA, 255, 255},
 	{1, 0, EISA, 255, 255},
 	{1, 0, MCA, 255, 255},
 	{2, 0, ISA, 1, PCI},
 	{2, 0, EISA, 1, PCI},
 	{2, 0, MCA, 1, PCI}
 };
 
 
 /* the bus data */
 static bus_datum *bus_data;
 
 /* the IO INT data, one entry per possible APIC INTerrupt */
 static io_int  *io_apic_ints;
 
 static int nintrs;
 
 static int processor_entry	__P((proc_entry_ptr entry, int cpu));
 static int bus_entry		__P((bus_entry_ptr entry, int bus));
 static int io_apic_entry	__P((io_apic_entry_ptr entry, int apic));
 static int int_entry		__P((int_entry_ptr entry, int intr));
 static int lookup_bus_type	__P((char *name));
 
 
 /*
  * 1st pass on motherboard's Intel MP specification table.
  *
  * initializes:
  *	mp_ncpus = 1
  *
  * determines:
  *	cpu_apic_address (common to all CPUs)
  *	io_apic_address[N]
  *	mp_naps
  *	mp_nbusses
  *	mp_napics
  *	nintrs
  */
 static void
 mptable_pass1(void)
 {
 	int	x;
 	mpcth_t	cth;
 	int	totalSize;
 	void*	position;
 	int	count;
 	int	type;
 
 	POSTCODE(MPTABLE_PASS1_POST);
 
 	/* clear various tables */
 	for (x = 0; x < NAPICID; ++x) {
 		io_apic_address[x] = ~0;	/* IO APIC address table */
 	}
 
 	/* init everything to empty */
 	mp_naps = 0;
 	mp_nbusses = 0;
 	mp_napics = 0;
 	nintrs = 0;
 
 	/* check for use of 'default' configuration */
 	if (MPFPS_MPFB1 != 0) {
 		/* use default addresses */
 		cpu_apic_address = DEFAULT_APIC_BASE;
 		io_apic_address[0] = DEFAULT_IO_APIC_BASE;
 
 		/* fill in with defaults */
 		mp_naps = 2;		/* includes BSP */
 		mp_nbusses = default_data[MPFPS_MPFB1 - 1][0];
 #if defined(APIC_IO)
 		mp_napics = 1;
 		nintrs = 16;
 #endif	/* APIC_IO */
 	}
 	else {
 		if ((cth = mpfps->pap) == 0)
 			panic("MP Configuration Table Header MISSING!");
 
 		cpu_apic_address = (vm_offset_t) cth->apic_address;
 
 		/* walk the table, recording info of interest */
 		totalSize = cth->base_table_length - sizeof(struct MPCTH);
 		position = (u_char *) cth + sizeof(struct MPCTH);
 		count = cth->entry_count;
 
 		while (count--) {
 			switch (type = *(u_char *) position) {
 			case 0: /* processor_entry */
 				if (((proc_entry_ptr)position)->cpu_flags
 					& PROCENTRY_FLAG_EN)
 					++mp_naps;
 				break;
 			case 1: /* bus_entry */
 				++mp_nbusses;
 				break;
 			case 2: /* io_apic_entry */
 				if (((io_apic_entry_ptr)position)->apic_flags
 					& IOAPICENTRY_FLAG_EN)
 					io_apic_address[mp_napics++] =
 					    (vm_offset_t)((io_apic_entry_ptr)
 						position)->apic_address;
 				break;
 			case 3: /* int_entry */
 				++nintrs;
 				break;
 			case 4:	/* int_entry */
 				break;
 			default:
 				panic("mpfps Base Table HOSED!");
 				/* NOTREACHED */
 			}
 
 			totalSize -= basetable_entry_types[type].length;
 			(u_char*)position += basetable_entry_types[type].length;
 		}
 	}
 
 	/* qualify the numbers */
 	if (mp_naps > MAXCPU) {
 		printf("Warning: only using %d of %d available CPUs!\n",
 			MAXCPU, mp_naps);
 		mp_naps = MAXCPU;
 	}
 
 	/*
 	 * Count the BSP.
 	 * This is also used as a counter while starting the APs.
 	 */
 	mp_ncpus = 1;
 
 	--mp_naps;	/* subtract the BSP */
 }
 
 
 /*
  * 2nd pass on motherboard's Intel MP specification table.
  *
  * sets:
  *	boot_cpu_id
  *	ID_TO_IO(N), phy APIC ID to log CPU/IO table
  *	CPU_TO_ID(N), logical CPU to APIC ID table
  *	IO_TO_ID(N), logical IO to APIC ID table
  *	bus_data[N]
  *	io_apic_ints[N]
  */
 static int
 mptable_pass2(void)
 {
 	int     x;
 	mpcth_t cth;
 	int     totalSize;
 	void*   position;
 	int     count;
 	int     type;
 	int     apic, bus, cpu, intr;
 	int	i, j;
 	int	pgeflag;
 
 	POSTCODE(MPTABLE_PASS2_POST);
 
 	pgeflag = 0;		/* XXX - Not used under SMP yet.  */
 
 	MALLOC(io_apic_versions, u_int32_t *, sizeof(u_int32_t) * mp_napics,
 	    M_DEVBUF, M_WAITOK);
 	MALLOC(ioapic, volatile ioapic_t **, sizeof(ioapic_t *) * mp_napics,
 	    M_DEVBUF, M_WAITOK);
 	MALLOC(io_apic_ints, io_int *, sizeof(io_int) * (nintrs + 1),
 	    M_DEVBUF, M_WAITOK);
 	MALLOC(bus_data, bus_datum *, sizeof(bus_datum) * mp_nbusses,
 	    M_DEVBUF, M_WAITOK);
 
 	bzero(ioapic, sizeof(ioapic_t *) * mp_napics);
 
 	for (i = 0; i < mp_napics; i++) {
 		for (j = 0; j < mp_napics; j++) {
 			/* same page frame as a previous IO apic? */
 			if (((vm_offset_t)SMPpt[NPTEPG-2-j] & PG_FRAME) ==
 			    (io_apic_address[i] & PG_FRAME)) {
 				ioapic[i] = (ioapic_t *)((u_int)SMP_prvspace
 					+ (NPTEPG-2-j) * PAGE_SIZE
 					+ (io_apic_address[i] & PAGE_MASK));
 				break;
 			}
 			/* use this slot if available */
 			if (((vm_offset_t)SMPpt[NPTEPG-2-j] & PG_FRAME) == 0) {
 				SMPpt[NPTEPG-2-j] = (pt_entry_t)(PG_V | PG_RW |
 				    pgeflag | (io_apic_address[i] & PG_FRAME));
 				ioapic[i] = (ioapic_t *)((u_int)SMP_prvspace
 					+ (NPTEPG-2-j) * PAGE_SIZE
 					+ (io_apic_address[i] & PAGE_MASK));
 				break;
 			}
 		}
 	}
 
 	/* clear various tables */
 	for (x = 0; x < NAPICID; ++x) {
 		ID_TO_IO(x) = -1;	/* phy APIC ID to log CPU/IO table */
 		CPU_TO_ID(x) = -1;	/* logical CPU to APIC ID table */
 		IO_TO_ID(x) = -1;	/* logical IO to APIC ID table */
 	}
 
 	/* clear bus data table */
 	for (x = 0; x < mp_nbusses; ++x)
 		bus_data[x].bus_id = 0xff;
 
 	/* clear IO APIC INT table */
 	for (x = 0; x < (nintrs + 1); ++x) {
 		io_apic_ints[x].int_type = 0xff;
 		io_apic_ints[x].int_vector = 0xff;
 	}
 
 	/* setup the cpu/apic mapping arrays */
 	boot_cpu_id = -1;
 
 	/* record whether PIC or virtual-wire mode */
 	picmode = (mpfps->mpfb2 & 0x80) ? 1 : 0;
 
 	/* check for use of 'default' configuration */
 	if (MPFPS_MPFB1 != 0)
 		return MPFPS_MPFB1;	/* return default configuration type */
 
 	if ((cth = mpfps->pap) == 0)
 		panic("MP Configuration Table Header MISSING!");
 
 	/* walk the table, recording info of interest */
 	totalSize = cth->base_table_length - sizeof(struct MPCTH);
 	position = (u_char *) cth + sizeof(struct MPCTH);
 	count = cth->entry_count;
 	apic = bus = intr = 0;
 	cpu = 1;				/* pre-count the BSP */
 
 	while (count--) {
 		switch (type = *(u_char *) position) {
 		case 0:
 			if (processor_entry(position, cpu))
 				++cpu;
 			break;
 		case 1:
 			if (bus_entry(position, bus))
 				++bus;
 			break;
 		case 2:
 			if (io_apic_entry(position, apic))
 				++apic;
 			break;
 		case 3:
 			if (int_entry(position, intr))
 				++intr;
 			break;
 		case 4:
 			/* int_entry(position); */
 			break;
 		default:
 			panic("mpfps Base Table HOSED!");
 			/* NOTREACHED */
 		}
 
 		totalSize -= basetable_entry_types[type].length;
 		(u_char *) position += basetable_entry_types[type].length;
 	}
 
 	if (boot_cpu_id == -1)
 		panic("NO BSP found!");
 
 	/* report fact that its NOT a default configuration */
 	return 0;
 }
 
 
 void
 assign_apic_irq(int apic, int intpin, int irq)
 {
 	int x;
 	
 	if (int_to_apicintpin[irq].ioapic != -1)
 		panic("assign_apic_irq: inconsistent table");
 	
 	int_to_apicintpin[irq].ioapic = apic;
 	int_to_apicintpin[irq].int_pin = intpin;
 	int_to_apicintpin[irq].apic_address = ioapic[apic];
 	int_to_apicintpin[irq].redirindex = IOAPIC_REDTBL + 2 * intpin;
 	
 	for (x = 0; x < nintrs; x++) {
 		if ((io_apic_ints[x].int_type == 0 || 
 		     io_apic_ints[x].int_type == 3) &&
 		    io_apic_ints[x].int_vector == 0xff &&
 		    io_apic_ints[x].dst_apic_id == IO_TO_ID(apic) &&
 		    io_apic_ints[x].dst_apic_int == intpin)
 			io_apic_ints[x].int_vector = irq;
 	}
 }
 
 void
 revoke_apic_irq(int irq)
 {
 	int x;
 	int oldapic;
 	int oldintpin;
 	
 	if (int_to_apicintpin[irq].ioapic == -1)
 		panic("assign_apic_irq: inconsistent table");
 	
 	oldapic = int_to_apicintpin[irq].ioapic;
 	oldintpin = int_to_apicintpin[irq].int_pin;
 
 	int_to_apicintpin[irq].ioapic = -1;
 	int_to_apicintpin[irq].int_pin = 0;
 	int_to_apicintpin[irq].apic_address = NULL;
 	int_to_apicintpin[irq].redirindex = 0;
 	
 	for (x = 0; x < nintrs; x++) {
 		if ((io_apic_ints[x].int_type == 0 || 
 		     io_apic_ints[x].int_type == 3) &&
 		    io_apic_ints[x].int_vector == 0xff &&
 		    io_apic_ints[x].dst_apic_id == IO_TO_ID(oldapic) &&
 		    io_apic_ints[x].dst_apic_int == oldintpin)
 			io_apic_ints[x].int_vector = 0xff;
 	}
 }
 
 
 static void
 allocate_apic_irq(int intr)
 {
 	int apic;
 	int intpin;
 	int irq;
 	
 	if (io_apic_ints[intr].int_vector != 0xff)
 		return;		/* Interrupt handler already assigned */
 	
 	if (io_apic_ints[intr].int_type != 0 &&
 	    (io_apic_ints[intr].int_type != 3 ||
 	     (io_apic_ints[intr].dst_apic_id == IO_TO_ID(0) &&
 	      io_apic_ints[intr].dst_apic_int == 0)))
 		return;		/* Not INT or ExtInt on != (0, 0) */
 	
 	irq = 0;
 	while (irq < APIC_INTMAPSIZE &&
 	       int_to_apicintpin[irq].ioapic != -1)
 		irq++;
 	
 	if (irq >= APIC_INTMAPSIZE)
 		return;		/* No free interrupt handlers */
 	
 	apic = ID_TO_IO(io_apic_ints[intr].dst_apic_id);
 	intpin = io_apic_ints[intr].dst_apic_int;
 	
 	assign_apic_irq(apic, intpin, irq);
 	io_apic_setup_intpin(apic, intpin);
 }
 
 
 static void
 swap_apic_id(int apic, int oldid, int newid)
 {
 	int x;
 	int oapic;
 	
 
 	if (oldid == newid)
 		return;			/* Nothing to do */
 	
 	printf("Changing APIC ID for IO APIC #%d from %d to %d in MP table\n",
 	       apic, oldid, newid);
 	
 	/* Swap physical APIC IDs in interrupt entries */
 	for (x = 0; x < nintrs; x++) {
 		if (io_apic_ints[x].dst_apic_id == oldid)
 			io_apic_ints[x].dst_apic_id = newid;
 		else if (io_apic_ints[x].dst_apic_id == newid)
 			io_apic_ints[x].dst_apic_id = oldid;
 	}
 	
 	/* Swap physical APIC IDs in IO_TO_ID mappings */
 	for (oapic = 0; oapic < mp_napics; oapic++)
 		if (IO_TO_ID(oapic) == newid)
 			break;
 	
 	if (oapic < mp_napics) {
 		printf("Changing APIC ID for IO APIC #%d from "
 		       "%d to %d in MP table\n",
 		       oapic, newid, oldid);
 		IO_TO_ID(oapic) = oldid;
 	}
 	IO_TO_ID(apic) = newid;
 }
 
 
 static void
 fix_id_to_io_mapping(void)
 {
 	int x;
 
 	for (x = 0; x < NAPICID; x++)
 		ID_TO_IO(x) = -1;
 	
 	for (x = 0; x <= mp_naps; x++)
 		if (CPU_TO_ID(x) < NAPICID)
 			ID_TO_IO(CPU_TO_ID(x)) = x;
 	
 	for (x = 0; x < mp_napics; x++)
 		if (IO_TO_ID(x) < NAPICID)
 			ID_TO_IO(IO_TO_ID(x)) = x;
 }
 
 
 static int
 first_free_apic_id(void)
 {
 	int freeid, x;
 	
 	for (freeid = 0; freeid < NAPICID; freeid++) {
 		for (x = 0; x <= mp_naps; x++)
 			if (CPU_TO_ID(x) == freeid)
 				break;
 		if (x <= mp_naps)
 			continue;
 		for (x = 0; x < mp_napics; x++)
 			if (IO_TO_ID(x) == freeid)
 				break;
 		if (x < mp_napics)
 			continue;
 		return freeid;
 	}
 	return freeid;
 }
 
 
 static int
 io_apic_id_acceptable(int apic, int id)
 {
 	int cpu;		/* Logical CPU number */
 	int oapic;		/* Logical IO APIC number for other IO APIC */
 
 	if (id >= NAPICID)
 		return 0;	/* Out of range */
 	
 	for (cpu = 0; cpu <= mp_naps; cpu++)
 		if (CPU_TO_ID(cpu) == id)
 			return 0;	/* Conflict with CPU */
 	
 	for (oapic = 0; oapic < mp_napics && oapic < apic; oapic++)
 		if (IO_TO_ID(oapic) == id)
 			return 0;	/* Conflict with other APIC */
 	
 	return 1;		/* ID is acceptable for IO APIC */
 }
 
 
 /*
  * parse an Intel MP specification table
  */
 static void
 fix_mp_table(void)
 {
 	int	x;
 	int	id;
 	int	bus_0 = 0;	/* Stop GCC warning */
 	int	bus_pci = 0;	/* Stop GCC warning */
 	int	num_pci_bus;
 	int	apic;		/* IO APIC unit number */
 	int     freeid;		/* Free physical APIC ID */
 	int	physid;		/* Current physical IO APIC ID */
 
 	/*
 	 * Fix mis-numbering of the PCI bus and its INT entries if the BIOS
 	 * did it wrong.  The MP spec says that when more than 1 PCI bus
 	 * exists the BIOS must begin with bus entries for the PCI bus and use
 	 * actual PCI bus numbering.  This implies that when only 1 PCI bus
 	 * exists the BIOS can choose to ignore this ordering, and indeed many
 	 * MP motherboards do ignore it.  This causes a problem when the PCI
 	 * sub-system makes requests of the MP sub-system based on PCI bus
 	 * numbers.	So here we look for the situation and renumber the
 	 * busses and associated INTs in an effort to "make it right".
 	 */
 
 	/* find bus 0, PCI bus, count the number of PCI busses */
 	for (num_pci_bus = 0, x = 0; x < mp_nbusses; ++x) {
 		if (bus_data[x].bus_id == 0) {
 			bus_0 = x;
 		}
 		if (bus_data[x].bus_type == PCI) {
 			++num_pci_bus;
 			bus_pci = x;
 		}
 	}
 	/*
 	 * bus_0 == slot of bus with ID of 0
 	 * bus_pci == slot of last PCI bus encountered
 	 */
 
 	/* check the 1 PCI bus case for sanity */
 	/* if it is number 0 all is well */
 	if (num_pci_bus == 1 &&
 	    bus_data[bus_pci].bus_id != 0) {
 		
 		/* mis-numbered, swap with whichever bus uses slot 0 */
 
 		/* swap the bus entry types */
 		bus_data[bus_pci].bus_type = bus_data[bus_0].bus_type;
 		bus_data[bus_0].bus_type = PCI;
 
 		/* swap each relavant INTerrupt entry */
 		id = bus_data[bus_pci].bus_id;
 		for (x = 0; x < nintrs; ++x) {
 			if (io_apic_ints[x].src_bus_id == id) {
 				io_apic_ints[x].src_bus_id = 0;
 			}
 			else if (io_apic_ints[x].src_bus_id == 0) {
 				io_apic_ints[x].src_bus_id = id;
 			}
 		}
 	}
 
 	/* Assign IO APIC IDs.
 	 * 
 	 * First try the existing ID. If a conflict is detected, try
 	 * the ID in the MP table.  If a conflict is still detected, find
 	 * a free id.
 	 *
 	 * We cannot use the ID_TO_IO table before all conflicts has been
 	 * resolved and the table has been corrected.
 	 */
 	for (apic = 0; apic < mp_napics; ++apic) { /* For all IO APICs */
 		
 		/* First try to use the value set by the BIOS */
 		physid = io_apic_get_id(apic);
 		if (io_apic_id_acceptable(apic, physid)) {
 			if (IO_TO_ID(apic) != physid)
 				swap_apic_id(apic, IO_TO_ID(apic), physid);
 			continue;
 		}
 
 		/* Then check if the value in the MP table is acceptable */
 		if (io_apic_id_acceptable(apic, IO_TO_ID(apic)))
 			continue;
 
 		/* Last resort, find a free APIC ID and use it */
 		freeid = first_free_apic_id();
 		if (freeid >= NAPICID)
 			panic("No free physical APIC IDs found");
 		
 		if (io_apic_id_acceptable(apic, freeid)) {
 			swap_apic_id(apic, IO_TO_ID(apic), freeid);
 			continue;
 		}
 		panic("Free physical APIC ID not usable");
 	}
 	fix_id_to_io_mapping();
 
 	/* detect and fix broken Compaq MP table */
 	if (apic_int_type(0, 0) == -1) {
 		printf("APIC_IO: MP table broken: 8259->APIC entry missing!\n");
 		io_apic_ints[nintrs].int_type = 3;	/* ExtInt */
 		io_apic_ints[nintrs].int_vector = 0xff;	/* Unassigned */
 		/* XXX fixme, set src bus id etc, but it doesn't seem to hurt */
 		io_apic_ints[nintrs].dst_apic_id = IO_TO_ID(0);
 		io_apic_ints[nintrs].dst_apic_int = 0;	/* Pin 0 */
 		nintrs++;
 	}
 }
 
 
 /* Assign low level interrupt handlers */
 static void
 setup_apic_irq_mapping(void)
 {
 	int	x;
 	int	int_vector;
 
 	/* Clear array */
 	for (x = 0; x < APIC_INTMAPSIZE; x++) {
 		int_to_apicintpin[x].ioapic = -1;
 		int_to_apicintpin[x].int_pin = 0;
 		int_to_apicintpin[x].apic_address = NULL;
 		int_to_apicintpin[x].redirindex = 0;
 	}
 
 	/* First assign ISA/EISA interrupts */
 	for (x = 0; x < nintrs; x++) {
 		int_vector = io_apic_ints[x].src_bus_irq;
 		if (int_vector < APIC_INTMAPSIZE &&
 		    io_apic_ints[x].int_vector == 0xff && 
 		    int_to_apicintpin[int_vector].ioapic == -1 &&
 		    (apic_int_is_bus_type(x, ISA) ||
 		     apic_int_is_bus_type(x, EISA)) &&
 		    io_apic_ints[x].int_type == 0) {
 			assign_apic_irq(ID_TO_IO(io_apic_ints[x].dst_apic_id), 
 					io_apic_ints[x].dst_apic_int,
 					int_vector);
 		}
 	}
 
 	/* Assign ExtInt entry if no ISA/EISA interrupt 0 entry */
 	for (x = 0; x < nintrs; x++) {
 		if (io_apic_ints[x].dst_apic_int == 0 &&
 		    io_apic_ints[x].dst_apic_id == IO_TO_ID(0) &&
 		    io_apic_ints[x].int_vector == 0xff && 
 		    int_to_apicintpin[0].ioapic == -1 &&
 		    io_apic_ints[x].int_type == 3) {
 			assign_apic_irq(0, 0, 0);
 			break;
 		}
 	}
 	/* PCI interrupt assignment is deferred */
 }
 
 
 static int
 processor_entry(proc_entry_ptr entry, int cpu)
 {
 	/* check for usability */
 	if (!(entry->cpu_flags & PROCENTRY_FLAG_EN))
 		return 0;
 
 	if(entry->apic_id >= NAPICID)
 		panic("CPU APIC ID out of range (0..%d)", NAPICID - 1);
 	/* check for BSP flag */
 	if (entry->cpu_flags & PROCENTRY_FLAG_BP) {
 		boot_cpu_id = entry->apic_id;
 		CPU_TO_ID(0) = entry->apic_id;
 		ID_TO_CPU(entry->apic_id) = 0;
 		return 0;	/* its already been counted */
 	}
 
 	/* add another AP to list, if less than max number of CPUs */
 	else if (cpu < MAXCPU) {
 		CPU_TO_ID(cpu) = entry->apic_id;
 		ID_TO_CPU(entry->apic_id) = cpu;
 		return 1;
 	}
 
 	return 0;
 }
 
 
 static int
 bus_entry(bus_entry_ptr entry, int bus)
 {
 	int     x;
 	char    c, name[8];
 
 	/* encode the name into an index */
 	for (x = 0; x < 6; ++x) {
 		if ((c = entry->bus_type[x]) == ' ')
 			break;
 		name[x] = c;
 	}
 	name[x] = '\0';
 
 	if ((x = lookup_bus_type(name)) == UNKNOWN_BUSTYPE)
 		panic("unknown bus type: '%s'", name);
 
 	bus_data[bus].bus_id = entry->bus_id;
 	bus_data[bus].bus_type = x;
 
 	return 1;
 }
 
 
 static int
 io_apic_entry(io_apic_entry_ptr entry, int apic)
 {
 	if (!(entry->apic_flags & IOAPICENTRY_FLAG_EN))
 		return 0;
 
 	IO_TO_ID(apic) = entry->apic_id;
 	if (entry->apic_id < NAPICID)
 		ID_TO_IO(entry->apic_id) = apic;
 
 	return 1;
 }
 
 
 static int
 lookup_bus_type(char *name)
 {
 	int     x;
 
 	for (x = 0; x < MAX_BUSTYPE; ++x)
 		if (strcmp(bus_type_table[x].name, name) == 0)
 			return bus_type_table[x].type;
 
 	return UNKNOWN_BUSTYPE;
 }
 
 
 static int
 int_entry(int_entry_ptr entry, int intr)
 {
 	int apic;
 
 	io_apic_ints[intr].int_type = entry->int_type;
 	io_apic_ints[intr].int_flags = entry->int_flags;
 	io_apic_ints[intr].src_bus_id = entry->src_bus_id;
 	io_apic_ints[intr].src_bus_irq = entry->src_bus_irq;
 	if (entry->dst_apic_id == 255) {
 		/* This signal goes to all IO APICS.  Select an IO APIC
 		   with sufficient number of interrupt pins */
 		for (apic = 0; apic < mp_napics; apic++)
 			if (((io_apic_read(apic, IOAPIC_VER) & 
 			      IOART_VER_MAXREDIR) >> MAXREDIRSHIFT) >= 
 			    entry->dst_apic_int)
 				break;
 		if (apic < mp_napics)
 			io_apic_ints[intr].dst_apic_id = IO_TO_ID(apic);
 		else
 			io_apic_ints[intr].dst_apic_id = entry->dst_apic_id;
 	} else
 		io_apic_ints[intr].dst_apic_id = entry->dst_apic_id;
 	io_apic_ints[intr].dst_apic_int = entry->dst_apic_int;
 
 	return 1;
 }
 
 
 static int
 apic_int_is_bus_type(int intr, int bus_type)
 {
 	int     bus;
 
 	for (bus = 0; bus < mp_nbusses; ++bus)
 		if ((bus_data[bus].bus_id == io_apic_ints[intr].src_bus_id)
 		    && ((int) bus_data[bus].bus_type == bus_type))
 			return 1;
 
 	return 0;
 }
 
 
 /*
  * Given a traditional ISA INT mask, return an APIC mask.
  */
 u_int
 isa_apic_mask(u_int isa_mask)
 {
 	int isa_irq;
 	int apic_pin;
 
 #if defined(SKIP_IRQ15_REDIRECT)
 	if (isa_mask == (1 << 15)) {
 		printf("skipping ISA IRQ15 redirect\n");
 		return isa_mask;
 	}
 #endif  /* SKIP_IRQ15_REDIRECT */
 
 	isa_irq = ffs(isa_mask);		/* find its bit position */
 	if (isa_irq == 0)			/* doesn't exist */
 		return 0;
 	--isa_irq;				/* make it zero based */
 
 	apic_pin = isa_apic_irq(isa_irq);	/* look for APIC connection */
 	if (apic_pin == -1)
 		return 0;
 
 	return (1 << apic_pin);			/* convert pin# to a mask */
 }
 
 
 /*
  * Determine which APIC pin an ISA/EISA INT is attached to.
  */
 #define INTTYPE(I)	(io_apic_ints[(I)].int_type)
 #define INTPIN(I)	(io_apic_ints[(I)].dst_apic_int)
 #define INTIRQ(I)	(io_apic_ints[(I)].int_vector)
 #define INTAPIC(I)	(ID_TO_IO(io_apic_ints[(I)].dst_apic_id))
 
 #define SRCBUSIRQ(I)	(io_apic_ints[(I)].src_bus_irq)
 int
 isa_apic_irq(int isa_irq)
 {
 	int     intr;
 
 	for (intr = 0; intr < nintrs; ++intr) {		/* check each record */
 		if (INTTYPE(intr) == 0) {		/* standard INT */
 			if (SRCBUSIRQ(intr) == isa_irq) {
 				if (apic_int_is_bus_type(intr, ISA) ||
 			            apic_int_is_bus_type(intr, EISA)) {
 					if (INTIRQ(intr) == 0xff)
 						return -1; /* unassigned */
 					return INTIRQ(intr);	/* found */
 				}
 			}
 		}
 	}
 	return -1;					/* NOT found */
 }
 
 
 /*
  * Determine which APIC pin a PCI INT is attached to.
  */
 #define SRCBUSID(I)	(io_apic_ints[(I)].src_bus_id)
 #define SRCBUSDEVICE(I)	((io_apic_ints[(I)].src_bus_irq >> 2) & 0x1f)
 #define SRCBUSLINE(I)	(io_apic_ints[(I)].src_bus_irq & 0x03)
 int
 pci_apic_irq(int pciBus, int pciDevice, int pciInt)
 {
 	int     intr;
 
 	--pciInt;					/* zero based */
 
 	for (intr = 0; intr < nintrs; ++intr)		/* check each record */
 		if ((INTTYPE(intr) == 0)		/* standard INT */
 		    && (SRCBUSID(intr) == pciBus)
 		    && (SRCBUSDEVICE(intr) == pciDevice)
 		    && (SRCBUSLINE(intr) == pciInt))	/* a candidate IRQ */
 			if (apic_int_is_bus_type(intr, PCI)) {
 				if (INTIRQ(intr) == 0xff)
 					allocate_apic_irq(intr);
 				if (INTIRQ(intr) == 0xff)
 					return -1;	/* unassigned */
 				return INTIRQ(intr);	/* exact match */
 			}
 
 	return -1;					/* NOT found */
 }
 
 int
 next_apic_irq(int irq) 
 {
 	int intr, ointr;
 	int bus, bustype;
 
 	bus = 0;
 	bustype = 0;
 	for (intr = 0; intr < nintrs; intr++) {
 		if (INTIRQ(intr) != irq || INTTYPE(intr) != 0)
 			continue;
 		bus = SRCBUSID(intr);
 		bustype = apic_bus_type(bus);
 		if (bustype != ISA &&
 		    bustype != EISA &&
 		    bustype != PCI)
 			continue;
 		break;
 	}
 	if (intr >= nintrs) {
 		return -1;
 	}
 	for (ointr = intr + 1; ointr < nintrs; ointr++) {
 		if (INTTYPE(ointr) != 0)
 			continue;
 		if (bus != SRCBUSID(ointr))
 			continue;
 		if (bustype == PCI) {
 			if (SRCBUSDEVICE(intr) != SRCBUSDEVICE(ointr))
 				continue;
 			if (SRCBUSLINE(intr) != SRCBUSLINE(ointr))
 				continue;
 		}
 		if (bustype == ISA || bustype == EISA) {
 			if (SRCBUSIRQ(intr) != SRCBUSIRQ(ointr))
 				continue;
 		}
 		if (INTPIN(intr) == INTPIN(ointr))
 			continue;
 		break;
 	}
 	if (ointr >= nintrs) {
 		return -1;
 	}
 	return INTIRQ(ointr);
 }
 #undef SRCBUSLINE
 #undef SRCBUSDEVICE
 #undef SRCBUSID
 #undef SRCBUSIRQ
 
 #undef INTPIN
 #undef INTIRQ
 #undef INTAPIC
 #undef INTTYPE
 
 
 /*
  * Reprogram the MB chipset to NOT redirect an ISA INTerrupt.
  *
  * XXX FIXME:
  *  Exactly what this means is unclear at this point.  It is a solution
  *  for motherboards that redirect the MBIRQ0 pin.  Generically a motherboard
  *  could route any of the ISA INTs to upper (>15) IRQ values.  But most would
  *  NOT be redirected via MBIRQ0, thus "undirect()ing" them would NOT be an
  *  option.
  */
 int
 undirect_isa_irq(int rirq)
 {
 #if defined(READY)
 	if (bootverbose)
 	    printf("Freeing redirected ISA irq %d.\n", rirq);
 	/** FIXME: tickle the MB redirector chip */
 	return -1;
 #else
 	if (bootverbose)
 	    printf("Freeing (NOT implemented) redirected ISA irq %d.\n", rirq);
 	return 0;
 #endif  /* READY */
 }
 
 
 /*
  * Reprogram the MB chipset to NOT redirect a PCI INTerrupt
  */
 int
 undirect_pci_irq(int rirq)
 {
 #if defined(READY)
 	if (bootverbose)
 		printf("Freeing redirected PCI irq %d.\n", rirq);
 
 	/** FIXME: tickle the MB redirector chip */
 	return -1;
 #else
 	if (bootverbose)
 		printf("Freeing (NOT implemented) redirected PCI irq %d.\n",
 		       rirq);
 	return 0;
 #endif  /* READY */
 }
 
 
 /*
  * given a bus ID, return:
  *  the bus type if found
  *  -1 if NOT found
  */
 int
 apic_bus_type(int id)
 {
 	int     x;
 
 	for (x = 0; x < mp_nbusses; ++x)
 		if (bus_data[x].bus_id == id)
 			return bus_data[x].bus_type;
 
 	return -1;
 }
 
 
 /*
  * given a LOGICAL APIC# and pin#, return:
  *  the associated src bus ID if found
  *  -1 if NOT found
  */
 int
 apic_src_bus_id(int apic, int pin)
 {
 	int     x;
 
 	/* search each of the possible INTerrupt sources */
 	for (x = 0; x < nintrs; ++x)
 		if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
 		    (pin == io_apic_ints[x].dst_apic_int))
 			return (io_apic_ints[x].src_bus_id);
 
 	return -1;		/* NOT found */
 }
 
 
 /*
  * given a LOGICAL APIC# and pin#, return:
  *  the associated src bus IRQ if found
  *  -1 if NOT found
  */
 int
 apic_src_bus_irq(int apic, int pin)
 {
 	int     x;
 
 	for (x = 0; x < nintrs; x++)
 		if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
 		    (pin == io_apic_ints[x].dst_apic_int))
 			return (io_apic_ints[x].src_bus_irq);
 
 	return -1;		/* NOT found */
 }
 
 
 /*
  * given a LOGICAL APIC# and pin#, return:
  *  the associated INTerrupt type if found
  *  -1 if NOT found
  */
 int
 apic_int_type(int apic, int pin)
 {
 	int     x;
 
 	/* search each of the possible INTerrupt sources */
 	for (x = 0; x < nintrs; ++x)
 		if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
 		    (pin == io_apic_ints[x].dst_apic_int))
 			return (io_apic_ints[x].int_type);
 
 	return -1;		/* NOT found */
 }
 
 int 
 apic_irq(int apic, int pin)
 {
 	int x;
 	int res;
 
 	for (x = 0; x < nintrs; ++x)
 		if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
 		    (pin == io_apic_ints[x].dst_apic_int)) {
 			res = io_apic_ints[x].int_vector;
 			if (res == 0xff)
 				return -1;
 			if (apic != int_to_apicintpin[res].ioapic)
 				panic("apic_irq: inconsistent table");
 			if (pin != int_to_apicintpin[res].int_pin)
 				panic("apic_irq inconsistent table (2)");
 			return res;
 		}
 	return -1;
 }
 
 
 /*
  * given a LOGICAL APIC# and pin#, return:
  *  the associated trigger mode if found
  *  -1 if NOT found
  */
 int
 apic_trigger(int apic, int pin)
 {
 	int     x;
 
 	/* search each of the possible INTerrupt sources */
 	for (x = 0; x < nintrs; ++x)
 		if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
 		    (pin == io_apic_ints[x].dst_apic_int))
 			return ((io_apic_ints[x].int_flags >> 2) & 0x03);
 
 	return -1;		/* NOT found */
 }
 
 
 /*
  * given a LOGICAL APIC# and pin#, return:
  *  the associated 'active' level if found
  *  -1 if NOT found
  */
 int
 apic_polarity(int apic, int pin)
 {
 	int     x;
 
 	/* search each of the possible INTerrupt sources */
 	for (x = 0; x < nintrs; ++x)
 		if ((apic == ID_TO_IO(io_apic_ints[x].dst_apic_id)) &&
 		    (pin == io_apic_ints[x].dst_apic_int))
 			return (io_apic_ints[x].int_flags & 0x03);
 
 	return -1;		/* NOT found */
 }
 
 
 /*
  * set data according to MP defaults
  * FIXME: probably not complete yet...
  */
 static void
 default_mp_table(int type)
 {
 	int     ap_cpu_id;
 #if defined(APIC_IO)
 	int     io_apic_id;
 	int     pin;
 #endif	/* APIC_IO */
 
 #if 0
 	printf("  MP default config type: %d\n", type);
 	switch (type) {
 	case 1:
 		printf("   bus: ISA, APIC: 82489DX\n");
 		break;
 	case 2:
 		printf("   bus: EISA, APIC: 82489DX\n");
 		break;
 	case 3:
 		printf("   bus: EISA, APIC: 82489DX\n");
 		break;
 	case 4:
 		printf("   bus: MCA, APIC: 82489DX\n");
 		break;
 	case 5:
 		printf("   bus: ISA+PCI, APIC: Integrated\n");
 		break;
 	case 6:
 		printf("   bus: EISA+PCI, APIC: Integrated\n");
 		break;
 	case 7:
 		printf("   bus: MCA+PCI, APIC: Integrated\n");
 		break;
 	default:
 		printf("   future type\n");
 		break;
 		/* NOTREACHED */
 	}
 #endif	/* 0 */
 
 	boot_cpu_id = (lapic.id & APIC_ID_MASK) >> 24;
 	ap_cpu_id = (boot_cpu_id == 0) ? 1 : 0;
 
 	/* BSP */
 	CPU_TO_ID(0) = boot_cpu_id;
 	ID_TO_CPU(boot_cpu_id) = 0;
 
 	/* one and only AP */
 	CPU_TO_ID(1) = ap_cpu_id;
 	ID_TO_CPU(ap_cpu_id) = 1;
 
 #if defined(APIC_IO)
 	/* one and only IO APIC */
 	io_apic_id = (io_apic_read(0, IOAPIC_ID) & APIC_ID_MASK) >> 24;
 
 	/*
 	 * sanity check, refer to MP spec section 3.6.6, last paragraph
 	 * necessary as some hardware isn't properly setting up the IO APIC
 	 */
 #if defined(REALLY_ANAL_IOAPICID_VALUE)
 	if (io_apic_id != 2) {
 #else
 	if ((io_apic_id == 0) || (io_apic_id == 1) || (io_apic_id == 15)) {
 #endif	/* REALLY_ANAL_IOAPICID_VALUE */
 		io_apic_set_id(0, 2);
 		io_apic_id = 2;
 	}
 	IO_TO_ID(0) = io_apic_id;
 	ID_TO_IO(io_apic_id) = 0;
 #endif	/* APIC_IO */
 
 	/* fill out bus entries */
 	switch (type) {
 	case 1:
 	case 2:
 	case 3:
 	case 4:
 	case 5:
 	case 6:
 	case 7:
 		bus_data[0].bus_id = default_data[type - 1][1];
 		bus_data[0].bus_type = default_data[type - 1][2];
 		bus_data[1].bus_id = default_data[type - 1][3];
 		bus_data[1].bus_type = default_data[type - 1][4];
 		break;
 
 	/* case 4: case 7:		   MCA NOT supported */
 	default:		/* illegal/reserved */
 		panic("BAD default MP config: %d", type);
 		/* NOTREACHED */
 	}
 
 #if defined(APIC_IO)
 	/* general cases from MP v1.4, table 5-2 */
 	for (pin = 0; pin < 16; ++pin) {
 		io_apic_ints[pin].int_type = 0;
 		io_apic_ints[pin].int_flags = 0x05;	/* edge/active-hi */
 		io_apic_ints[pin].src_bus_id = 0;
 		io_apic_ints[pin].src_bus_irq = pin;	/* IRQ2 caught below */
 		io_apic_ints[pin].dst_apic_id = io_apic_id;
 		io_apic_ints[pin].dst_apic_int = pin;	/* 1-to-1 */
 	}
 
 	/* special cases from MP v1.4, table 5-2 */
 	if (type == 2) {
 		io_apic_ints[2].int_type = 0xff;	/* N/C */
 		io_apic_ints[13].int_type = 0xff;	/* N/C */
 #if !defined(APIC_MIXED_MODE)
 		/** FIXME: ??? */
 		panic("sorry, can't support type 2 default yet");
 #endif	/* APIC_MIXED_MODE */
 	}
 	else
 		io_apic_ints[2].src_bus_irq = 0;	/* ISA IRQ0 is on APIC INT 2 */
 
 	if (type == 7)
 		io_apic_ints[0].int_type = 0xff;	/* N/C */
 	else
 		io_apic_ints[0].int_type = 3;	/* vectored 8259 */
 #endif	/* APIC_IO */
 }
 
 
 /*
  * start each AP in our list
  */
 static int
 start_all_aps(u_int boot_addr)
 {
 	int     x, i, pg;
 	u_char  mpbiosreason;
 	u_long  mpbioswarmvec;
 	struct globaldata *gd;
 	char *stack;
 	uintptr_t kptbase;
 
 	POSTCODE(START_ALL_APS_POST);
 
 	mtx_init(&ap_boot_mtx, "ap boot", MTX_SPIN);
 
 	/* initialize BSP's local APIC */
 	apic_initialize();
 	bsp_apic_ready = 1;
 
 	/* install the AP 1st level boot code */
 	install_ap_tramp(boot_addr);
 
 
 	/* save the current value of the warm-start vector */
 	mpbioswarmvec = *((u_long *) WARMBOOT_OFF);
 #ifndef PC98
 	outb(CMOS_REG, BIOS_RESET);
 	mpbiosreason = inb(CMOS_DATA);
 #endif
 
 	/* set up temporary P==V mapping for AP boot */
 	/* XXX this is a hack, we should boot the AP on its own stack/PTD */
 	kptbase = (uintptr_t)(void *)KPTphys;
 	for (x = 0; x < NKPT; x++)
 		PTD[x] = (pd_entry_t)(PG_V | PG_RW |
 		    ((kptbase + x * PAGE_SIZE) & PG_FRAME));
 	invltlb();
 
 	/* start each AP */
 	for (x = 1; x <= mp_naps; ++x) {
 
 		/* This is a bit verbose, it will go away soon.  */
 
 		/* first page of AP's private space */
 		pg = x * i386_btop(sizeof(struct privatespace));
 
 		/* allocate a new private data page */
 		gd = (struct globaldata *)kmem_alloc(kernel_map, PAGE_SIZE);
 
 		/* wire it into the private page table page */
 		SMPpt[pg] = (pt_entry_t)(PG_V | PG_RW | vtophys(gd));
 
 		/* allocate and set up an idle stack data page */
 		stack = (char *)kmem_alloc(kernel_map, UPAGES*PAGE_SIZE);
 		for (i = 0; i < UPAGES; i++)
 			SMPpt[pg + 1 + i] = (pt_entry_t)
 			    (PG_V | PG_RW | vtophys(PAGE_SIZE * i + stack));
 
 		/* prime data page for it to use */
 		gd->gd_cpuid = x;
 		globaldata_register(gd);
 
 		/* setup a vector to our boot code */
 		*((volatile u_short *) WARMBOOT_OFF) = WARMBOOT_TARGET;
 		*((volatile u_short *) WARMBOOT_SEG) = (boot_addr >> 4);
 #ifndef PC98
 		outb(CMOS_REG, BIOS_RESET);
 		outb(CMOS_DATA, BIOS_WARM);	/* 'warm-start' */
 #endif
 
 		bootSTK = &SMP_prvspace[x].idlestack[UPAGES*PAGE_SIZE];
 		bootAP = x;
 
 		/* attempt to start the Application Processor */
 		CHECK_INIT(99);	/* setup checkpoints */
 		if (!start_ap(x, boot_addr)) {
 			printf("AP #%d (PHY# %d) failed!\n", x, CPU_TO_ID(x));
 			CHECK_PRINT("trace");	/* show checkpoints */
 			/* better panic as the AP may be running loose */
 			printf("panic y/n? [y] ");
 			if (cngetc() != 'n')
 				panic("bye-bye");
 		}
 		CHECK_PRINT("trace");		/* show checkpoints */
 
 		/* record its version info */
 		cpu_apic_versions[x] = cpu_apic_versions[0];
 
 		all_cpus |= (1 << x);		/* record AP in CPU map */
 	}
 
 	/* build our map of 'other' CPUs */
 	PCPU_SET(other_cpus, all_cpus & ~(1 << PCPU_GET(cpuid)));
 
 	/* fill in our (BSP) APIC version */
 	cpu_apic_versions[0] = lapic.version;
 
 	/* restore the warmstart vector */
 	*(u_long *) WARMBOOT_OFF = mpbioswarmvec;
 #ifndef PC98
 	outb(CMOS_REG, BIOS_RESET);
 	outb(CMOS_DATA, mpbiosreason);
 #endif
 
 	/*
 	 * Set up the idle context for the BSP.  Similar to above except
 	 * that some was done by locore, some by pmap.c and some is implicit
 	 * because the BSP is cpu#0 and the page is initially zero, and also
 	 * because we can refer to variables by name on the BSP..
 	 */
 
 	/* Allocate and setup BSP idle stack */
 	stack = (char *)kmem_alloc(kernel_map, UPAGES * PAGE_SIZE);
 	for (i = 0; i < UPAGES; i++)
 		SMPpt[1 + i] = (pt_entry_t)
 		    (PG_V | PG_RW | vtophys(PAGE_SIZE * i + stack));
 
 	for (x = 0; x < NKPT; x++)
 		PTD[x] = 0;
 	pmap_set_opt();
 
 	/* number of APs actually started */
 	return mp_ncpus - 1;
 }
 
 
 /*
  * load the 1st level AP boot code into base memory.
  */
 
 /* targets for relocation */
 extern void bigJump(void);
 extern void bootCodeSeg(void);
 extern void bootDataSeg(void);
 extern void MPentry(void);
 extern u_int MP_GDT;
 extern u_int mp_gdtbase;
 
 static void
 install_ap_tramp(u_int boot_addr)
 {
 	int     x;
 	int     size = *(int *) ((u_long) & bootMP_size);
 	u_char *src = (u_char *) ((u_long) bootMP);
 	u_char *dst = (u_char *) boot_addr + KERNBASE;
 	u_int   boot_base = (u_int) bootMP;
 	u_int8_t *dst8;
 	u_int16_t *dst16;
 	u_int32_t *dst32;
 
 	POSTCODE(INSTALL_AP_TRAMP_POST);
 
 	for (x = 0; x < size; ++x)
 		*dst++ = *src++;
 
 	/*
 	 * modify addresses in code we just moved to basemem. unfortunately we
 	 * need fairly detailed info about mpboot.s for this to work.  changes
 	 * to mpboot.s might require changes here.
 	 */
 
 	/* boot code is located in KERNEL space */
 	dst = (u_char *) boot_addr + KERNBASE;
 
 	/* modify the lgdt arg */
 	dst32 = (u_int32_t *) (dst + ((u_int) & mp_gdtbase - boot_base));
 	*dst32 = boot_addr + ((u_int) & MP_GDT - boot_base);
 
 	/* modify the ljmp target for MPentry() */
 	dst32 = (u_int32_t *) (dst + ((u_int) bigJump - boot_base) + 1);
 	*dst32 = ((u_int) MPentry - KERNBASE);
 
 	/* modify the target for boot code segment */
 	dst16 = (u_int16_t *) (dst + ((u_int) bootCodeSeg - boot_base));
 	dst8 = (u_int8_t *) (dst16 + 1);
 	*dst16 = (u_int) boot_addr & 0xffff;
 	*dst8 = ((u_int) boot_addr >> 16) & 0xff;
 
 	/* modify the target for boot data segment */
 	dst16 = (u_int16_t *) (dst + ((u_int) bootDataSeg - boot_base));
 	dst8 = (u_int8_t *) (dst16 + 1);
 	*dst16 = (u_int) boot_addr & 0xffff;
 	*dst8 = ((u_int) boot_addr >> 16) & 0xff;
 }
 
 
 /*
  * this function starts the AP (application processor) identified
  * by the APIC ID 'physicalCpu'.  It does quite a "song and dance"
  * to accomplish this.  This is necessary because of the nuances
  * of the different hardware we might encounter.  It ain't pretty,
  * but it seems to work.
  */
 static int
 start_ap(int logical_cpu, u_int boot_addr)
 {
 	int     physical_cpu;
 	int     vector;
 	int     cpus;
 	u_long  icr_lo, icr_hi;
 
 	POSTCODE(START_AP_POST);
 
 	/* get the PHYSICAL APIC ID# */
 	physical_cpu = CPU_TO_ID(logical_cpu);
 
 	/* calculate the vector */
 	vector = (boot_addr >> 12) & 0xff;
 
 	/* used as a watchpoint to signal AP startup */
 	cpus = mp_ncpus;
 
 	/*
 	 * first we do an INIT/RESET IPI this INIT IPI might be run, reseting
 	 * and running the target CPU. OR this INIT IPI might be latched (P5
 	 * bug), CPU waiting for STARTUP IPI. OR this INIT IPI might be
 	 * ignored.
 	 */
 
 	/* setup the address for the target AP */
 	icr_hi = lapic.icr_hi & ~APIC_ID_MASK;
 	icr_hi |= (physical_cpu << 24);
 	lapic.icr_hi = icr_hi;
 
 	/* do an INIT IPI: assert RESET */
 	icr_lo = lapic.icr_lo & 0xfff00000;
 	lapic.icr_lo = icr_lo | 0x0000c500;
 
 	/* wait for pending status end */
 	while (lapic.icr_lo & APIC_DELSTAT_MASK)
 		 /* spin */ ;
 
 	/* do an INIT IPI: deassert RESET */
 	lapic.icr_lo = icr_lo | 0x00008500;
 
 	/* wait for pending status end */
 	u_sleep(10000);		/* wait ~10mS */
 	while (lapic.icr_lo & APIC_DELSTAT_MASK)
 		 /* spin */ ;
 
 	/*
 	 * next we do a STARTUP IPI: the previous INIT IPI might still be
 	 * latched, (P5 bug) this 1st STARTUP would then terminate
 	 * immediately, and the previously started INIT IPI would continue. OR
 	 * the previous INIT IPI has already run. and this STARTUP IPI will
 	 * run. OR the previous INIT IPI was ignored. and this STARTUP IPI
 	 * will run.
 	 */
 
 	/* do a STARTUP IPI */
 	lapic.icr_lo = icr_lo | 0x00000600 | vector;
 	while (lapic.icr_lo & APIC_DELSTAT_MASK)
 		 /* spin */ ;
 	u_sleep(200);		/* wait ~200uS */
 
 	/*
 	 * finally we do a 2nd STARTUP IPI: this 2nd STARTUP IPI should run IF
 	 * the previous STARTUP IPI was cancelled by a latched INIT IPI. OR
 	 * this STARTUP IPI will be ignored, as only ONE STARTUP IPI is
 	 * recognized after hardware RESET or INIT IPI.
 	 */
 
 	lapic.icr_lo = icr_lo | 0x00000600 | vector;
 	while (lapic.icr_lo & APIC_DELSTAT_MASK)
 		 /* spin */ ;
 	u_sleep(200);		/* wait ~200uS */
 
 	/* wait for it to start */
 	set_apic_timer(5000000);/* == 5 seconds */
 	while (read_apic_timer())
 		if (mp_ncpus > cpus)
 			return 1;	/* return SUCCESS */
 
 	return 0;		/* return FAILURE */
 }
 
 /*
  * Flush the TLB on all other CPU's
  *
  * XXX: Needs to handshake and wait for completion before proceding.
  */
 void
 smp_invltlb(void)
 {
 #if defined(APIC_IO)
 	if (smp_started && invltlb_ok)
 		ipi_all_but_self(IPI_INVLTLB);
 #endif  /* APIC_IO */
 }
 
 void
 invlpg(u_int addr)
 {
 	__asm   __volatile("invlpg (%0)"::"r"(addr):"memory");
 
 	/* send a message to the other CPUs */
 	smp_invltlb();
 }
 
 void
 invltlb(void)
 {
 	u_long  temp;
 
 	/*
 	 * This should be implemented as load_cr3(rcr3()) when load_cr3() is
 	 * inlined.
 	 */
 	__asm __volatile("movl %%cr3, %0; movl %0, %%cr3":"=r"(temp) :: "memory");
 
 	/* send a message to the other CPUs */
 	smp_invltlb();
 }
 
 
 /*
  * This is called once the rest of the system is up and running and we're
  * ready to let the AP's out of the pen.
  */
 extern void	enable_sse(void);
 
 void
 ap_init(void)
 {
 	u_int	apic_id;
 
 	/* spin until all the AP's are ready */
 	while (!aps_ready)
 		/* spin */ ;
 
 	/*
 	 * Set curproc to our per-cpu idleproc so that mutexes have
 	 * something unique to lock with.
 	 */
 	PCPU_SET(curproc, PCPU_GET(idleproc));
 	PCPU_SET(spinlocks, NULL);
 
 	/* lock against other AP's that are waking up */
 	mtx_lock_spin(&ap_boot_mtx);
 
 	/* BSP may have changed PTD while we're waiting for the lock */
 	cpu_invltlb();
 
 	smp_cpus++;
 
 #if defined(I586_CPU) && !defined(NO_F00F_HACK)
 	lidt(&r_idt);
 #endif
 
 	/* Build our map of 'other' CPUs. */
 	PCPU_SET(other_cpus, all_cpus & ~(1 << PCPU_GET(cpuid)));
 
 	printf("SMP: AP CPU #%d Launched!\n", PCPU_GET(cpuid));
 
 	/* set up CPU registers and state */
 	cpu_setregs();
 
 	/* set up FPU state on the AP */
 	npxinit(__INITIAL_NPXCW__);
 
 	/* set up SSE registers */
 	enable_sse();
 
 	/* A quick check from sanity claus */
 	apic_id = (apic_id_to_logical[(lapic.id & 0x0f000000) >> 24]);
 	if (PCPU_GET(cpuid) != apic_id) {
 		printf("SMP: cpuid = %d\n", PCPU_GET(cpuid));
 		printf("SMP: apic_id = %d\n", apic_id);
 		printf("PTD[MPPTDI] = %p\n", (void *)PTD[MPPTDI]);
 		panic("cpuid mismatch! boom!!");
 	}
 
 	/* Init local apic for irq's */
 	apic_initialize();
 
 	/* Set memory range attributes for this CPU to match the BSP */
 	mem_range_AP_init();
 
 	/*
 	 * Activate smp_invltlb, although strictly speaking, this isn't
 	 * quite correct yet.  We should have a bitfield for cpus willing
 	 * to accept TLB flush IPI's or something and sync them.
 	 */
 	if (smp_cpus == mp_ncpus) {
 		invltlb_ok = 1;
 		smp_started = 1; /* enable IPI's, tlb shootdown, freezes etc */
 		smp_active = 1;	 /* historic */
 	}
 
 	/* let other AP's wake up now */
 	mtx_unlock_spin(&ap_boot_mtx);
 
 	/* wait until all the AP's are up */
 	while (smp_started == 0)
 		; /* nothing */
 
 	microuptime(PCPU_PTR(switchtime));
 	PCPU_SET(switchticks, ticks);
 
 	/* ok, now grab sched_lock and enter the scheduler */
 	enable_intr();
 	mtx_lock_spin(&sched_lock);
 	cpu_throw();	/* doesn't return */
 
 	panic("scheduler returned us to ap_init");
 }
 
 /*
  * For statclock, we send an IPI to all CPU's to have them call this
  * function.
  */
 void
 forwarded_statclock(struct trapframe frame)
 {
 
 	mtx_lock_spin(&sched_lock);
 	statclock_process(curproc, TRAPF_PC(&frame), TRAPF_USERMODE(&frame));
 	mtx_unlock_spin(&sched_lock);
 }
 
 void
 forward_statclock(void)
 {
 	int map;
 
 	CTR0(KTR_SMP, "forward_statclock");
 
 	if (!smp_started || !invltlb_ok || cold || panicstr)
 		return;
 
 	map = PCPU_GET(other_cpus) & ~stopped_cpus ;
 	if (map != 0)
 		ipi_selected(map, IPI_STATCLOCK);
 }
 
 /*
  * For each hardclock(), we send an IPI to all other CPU's to have them
  * execute this function.  It would be nice to reduce contention on
  * sched_lock if we could simply peek at the CPU to determine the user/kernel
  * state and call hardclock_process() on the CPU receiving the clock interrupt
  * and then just use a simple IPI to handle any ast's if needed.
  */
 void
 forwarded_hardclock(struct trapframe frame)
 {
 
 	mtx_lock_spin(&sched_lock);
 	hardclock_process(curproc, TRAPF_USERMODE(&frame));
 	mtx_unlock_spin(&sched_lock);
 }
 
 void 
 forward_hardclock(void)
 {
 	u_int map;
 
 	CTR0(KTR_SMP, "forward_hardclock");
 
 	if (!smp_started || !invltlb_ok || cold || panicstr)
 		return;
 
 	map = PCPU_GET(other_cpus) & ~stopped_cpus ;
 	if (map != 0)
 		ipi_selected(map, IPI_HARDCLOCK);
 }
 
 #ifdef APIC_INTR_REORDER
 /*
  *	Maintain mapping from softintr vector to isr bit in local apic.
  */
 void
 set_lapic_isrloc(int intr, int vector)
 {
 	if (intr < 0 || intr > 32)
 		panic("set_apic_isrloc: bad intr argument: %d",intr);
 	if (vector < ICU_OFFSET || vector > 255)
 		panic("set_apic_isrloc: bad vector argument: %d",vector);
 	apic_isrbit_location[intr].location = &lapic.isr0 + ((vector>>5)<<2);
 	apic_isrbit_location[intr].bit = (1<<(vector & 31));
 }
 #endif
 
 /*
  * send an IPI to a set of cpus.
  */
 void
 ipi_selected(u_int32_t cpus, u_int ipi)
 {
 
 	CTR2(KTR_SMP, __func__ ": cpus: %x ipi: %x", cpus, ipi);
 	selected_apic_ipi(cpus, ipi, APIC_DELMODE_FIXED);
 }
 
 /*
  * send an IPI INTerrupt containing 'vector' to all CPUs, including myself
  */
 void
 ipi_all(u_int ipi)
 {
 
 	CTR1(KTR_SMP, __func__ ": ipi: %x", ipi);
 	apic_ipi(APIC_DEST_ALLISELF, ipi, APIC_DELMODE_FIXED); 
 }
 
 /*
  * send an IPI to all CPUs EXCEPT myself
  */
 void
 ipi_all_but_self(u_int ipi)
 {
 
 	CTR1(KTR_SMP, __func__ ": ipi: %x", ipi);
 	apic_ipi(APIC_DEST_ALLESELF, ipi, APIC_DELMODE_FIXED); 
 }
 
 /*
  * send an IPI to myself
  */
 void
 ipi_self(u_int ipi)
 {
 
 	CTR1(KTR_SMP, __func__ ": ipi: %x", ipi);
 	apic_ipi(APIC_DEST_SELF, ipi, APIC_DELMODE_FIXED); 
 }
 
 void
 release_aps(void *dummy __unused)
 {
 	atomic_store_rel_int(&aps_ready, 1);
 }
 
 SYSINIT(start_aps, SI_SUB_SMP, SI_ORDER_FIRST, release_aps, NULL);
Index: head/sys/i386/include/param.h
===================================================================
--- head/sys/i386/include/param.h	(revision 82308)
+++ head/sys/i386/include/param.h	(revision 82309)
@@ -1,185 +1,188 @@
 /*-
  * Copyright (c) 1990 The Regents of the University of California.
  * All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * William Jolitz.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	from: @(#)param.h	5.8 (Berkeley) 6/28/91
  * $FreeBSD$
  */
 
 /*
  * Machine dependent constants for Intel 386.
  */
 
 /*
  * Round p (pointer or byte index) up to a correctly-aligned value
  * for all data types (int, long, ...).   The result is unsigned int
  * and must be cast to any desired pointer type.
  */
 #ifndef _ALIGNBYTES
 #define _ALIGNBYTES	(sizeof(int) - 1)
 #endif
 #ifndef _ALIGN
 #define _ALIGN(p)	(((unsigned)(p) + _ALIGNBYTES) & ~_ALIGNBYTES)
 #endif
 
 #ifndef _MACHINE
 #define	_MACHINE	i386
 #endif
 #ifndef _MACHINE_ARCH
 #define	_MACHINE_ARCH	i386
 #endif
 
 #ifndef _NO_NAMESPACE_POLLUTION
 
 #ifndef _MACHINE_PARAM_H_
 #define	_MACHINE_PARAM_H_
 
 #ifndef MACHINE
 #define MACHINE		"i386"
 #endif
 #ifndef MACHINE_ARCH
 #define	MACHINE_ARCH	"i386"
 #endif
 #define MID_MACHINE	MID_I386
 
 /*
  * OBJFORMAT_NAMES is a comma-separated list of the object formats
  * that are supported on the architecture.
  */
 #define OBJFORMAT_NAMES		"elf", "aout"
 #define OBJFORMAT_DEFAULT	"elf"
 
 #ifdef SMP
 #define MAXCPU		16
 #else
 #define MAXCPU		1
 #endif /* SMP */
 
 #define ALIGNBYTES	_ALIGNBYTES
 #define ALIGN(p)	_ALIGN(p)
 
 #define PAGE_SHIFT	12		/* LOG2(PAGE_SIZE) */
 #define PAGE_SIZE	(1<<PAGE_SHIFT)	/* bytes/page */
 #define PAGE_MASK	(PAGE_SIZE-1)
 #define NPTEPG		(PAGE_SIZE/(sizeof (pt_entry_t)))
 
 #define NPDEPG		(PAGE_SIZE/(sizeof (pd_entry_t)))
 #define PDRSHIFT	22		/* LOG2(NBPDR) */
 #define NBPDR		(1<<PDRSHIFT)	/* bytes/page dir */
 #define PDRMASK		(NBPDR-1)
 
 #define DEV_BSHIFT	9		/* log2(DEV_BSIZE) */
 #define DEV_BSIZE	(1<<DEV_BSHIFT)
 
 #ifndef BLKDEV_IOSIZE
 #define BLKDEV_IOSIZE	PAGE_SIZE	/* default block device I/O size */
 #endif
 #define DFLTPHYS	(64 * 1024)	/* default max raw I/O transfer size */
 #define MAXPHYS		(128 * 1024)	/* max raw I/O transfer size */
 #define MAXDUMPPGS	(DFLTPHYS/PAGE_SIZE)
 
 #define IOPAGES	2		/* pages of i/o permission bitmap */
+
+#ifndef UPAGES
 #define UPAGES	2		/* pages of u-area */
+#endif
 
 /*
  * Ceiling on amount of swblock kva space.
  */
 #ifndef VM_SWZONE_SIZE_MAX
 #define VM_SWZONE_SIZE_MAX	(70 * 1024 * 1024)
 #endif
 
 /*
  * Ceiling on size of buffer cache (really only effects write queueing,
  * the VM page cache is not effected).
  */
 #ifndef VM_BCACHE_SIZE_MAX
 #define VM_BCACHE_SIZE_MAX	(200 * 1024 * 1024)
 #endif
 
 
 /*
  * Constants related to network buffer management.
  * MCLBYTES must be no larger than PAGE_SIZE.
  */
 #ifndef	MSIZE
 #define MSIZE		256		/* size of an mbuf */
 #endif	/* MSIZE */
 
 #ifndef	MCLSHIFT
 #define MCLSHIFT	11		/* convert bytes to mbuf clusters */
 #endif	/* MCLSHIFT */
 #define MCLBYTES	(1 << MCLSHIFT)	/* size of an mbuf cluster */
 
 /*
  * Some macros for units conversion
  */
 
 /* clicks to bytes */
 #define ctob(x)	((x)<<PAGE_SHIFT)
 
 /* bytes to clicks */
 #define btoc(x)	(((unsigned)(x)+PAGE_MASK)>>PAGE_SHIFT)
 
 /*
  * btodb() is messy and perhaps slow because `bytes' may be an off_t.  We
  * want to shift an unsigned type to avoid sign extension and we don't
  * want to widen `bytes' unnecessarily.  Assume that the result fits in
  * a daddr_t.
  */
 #define btodb(bytes)	 		/* calculates (bytes / DEV_BSIZE) */ \
 	(sizeof (bytes) > sizeof(long) \
 	 ? (daddr_t)((unsigned long long)(bytes) >> DEV_BSHIFT) \
 	 : (daddr_t)((unsigned long)(bytes) >> DEV_BSHIFT))
 
 #define dbtob(db)			/* calculates (db * DEV_BSIZE) */ \
 	((off_t)(db) << DEV_BSHIFT)
 
 /*
  * Mach derived conversion macros
  */
 #define trunc_page(x)		((x) & ~PAGE_MASK)
 #define round_page(x)		(((x) + PAGE_MASK) & ~PAGE_MASK)
 #define trunc_4mpage(x)		((unsigned)(x) & ~PDRMASK)
 #define round_4mpage(x)		((((unsigned)(x)) + PDRMASK) & ~PDRMASK)
 
 #define atop(x)			((unsigned)(x) >> PAGE_SHIFT)
 #define ptoa(x)			((unsigned)(x) << PAGE_SHIFT)
 
 #define i386_btop(x)		((unsigned)(x) >> PAGE_SHIFT)
 #define i386_ptob(x)		((unsigned)(x) << PAGE_SHIFT)
 
 #define	pgtok(x)		((x) * (PAGE_SIZE / 1024))
 
 #endif /* !_MACHINE_PARAM_H_ */
 #endif /* !_NO_NAMESPACE_POLLUTION */
Index: head/sys/i386/include/pcpu.h
===================================================================
--- head/sys/i386/include/pcpu.h	(revision 82308)
+++ head/sys/i386/include/pcpu.h	(revision 82309)
@@ -1,99 +1,79 @@
 /*-
  * Copyright (c) Peter Wemm <peter@netplex.com.au>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef _MACHINE_GLOBALDATA_H_
 #define _MACHINE_GLOBALDATA_H_
 
 #ifdef _KERNEL
 
 #include <machine/segments.h>
 #include <machine/tss.h>
 
 /* XXX */
 #ifdef KTR_PERCPU
 #include <sys/ktr.h>
 #endif
 
 /*
  * This structure maps out the global data that needs to be kept on a
  * per-cpu basis.  genassym uses this to generate offsets for the assembler
  * code, which also provides external symbols so that C can get at them as
  * though they were really globals.
  *
  * The SMP parts are setup in pmap.c and locore.s for the BSP, and
  * mp_machdep.c sets up the data for the AP's to "see" when they awake.
  * The reason for doing it via a struct is so that an array of pointers
  * to each CPU's data can be set up for things like "check curproc on all
  * other processors"
  */
 struct globaldata {
 	struct	globaldata *gd_prvspace;	/* self-reference */
 	struct	proc *gd_curproc;		/* current process */
 	struct	proc *gd_idleproc;		/* idle process */
 	struct	proc *gd_npxproc;
 	struct	pcb *gd_curpcb;			/* current pcb */
 	struct	timeval gd_switchtime;
 	struct	i386tss gd_common_tss;
 	int	gd_switchticks;
 	struct	segment_descriptor gd_common_tssd;
 	struct	segment_descriptor *gd_tss_gdt;
 	int	gd_currentldt;
 	u_int	gd_cpuid;			/* this cpu number */
 	u_int	gd_other_cpus;			/* all other cpus */
 	SLIST_ENTRY(globaldata) gd_allcpu;
 	struct	lock_list_entry *gd_spinlocks;
 #ifdef KTR_PERCPU
 	volatile int	gd_ktr_idx;		/* Index into trace table */
 	char	*gd_ktr_buf;
 	char	gd_ktr_buf_data[KTR_SIZE];
 #endif
 };
 
-#ifdef SMP
-/*
- * This is the upper (0xff800000) address space layout that is per-cpu.
- * It is setup in locore.s and pmap.c for the BSP and in mp_machdep.c for
- * each AP.  genassym helps export this to the assembler code.
- */
-struct privatespace {
-	/* page 0 - data page */
-	struct	globaldata globaldata;
-	char	__filler0[PAGE_SIZE - sizeof(struct globaldata)];
-
-	/* page 1 - idle stack (UPAGES pages) */
-	char	idlestack[UPAGES * PAGE_SIZE];
-	/* page 1+UPAGES... */
-};
-
-extern struct privatespace SMP_prvspace[];
-
-#endif
-
 #endif	/* _KERNEL */
 
 #endif	/* ! _MACHINE_GLOBALDATA_H_ */
Index: head/sys/i386/include/privatespace.h
===================================================================
--- head/sys/i386/include/privatespace.h	(nonexistent)
+++ head/sys/i386/include/privatespace.h	(revision 82309)
@@ -0,0 +1,49 @@
+/*-
+ * Copyright (c) Peter Wemm <peter@netplex.com.au>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _MACHINE_PRIVATESPACE_H_
+#define _MACHINE_PRIVATESPACE_H_
+
+/*
+ * This is the upper (0xff800000) address space layout that is per-cpu.
+ * It is setup in locore.s and pmap.c for the BSP and in mp_machdep.c for
+ * each AP.  This is only applicable to the x86 SMP kernel.
+ */
+struct privatespace {
+	/* page 0 - data page */
+	struct	globaldata globaldata;
+	char	__filler0[PAGE_SIZE - sizeof(struct globaldata)];
+
+	/* page 1 - idle stack (UPAGES pages) */
+	char	idlestack[UPAGES * PAGE_SIZE];
+	/* page 1+UPAGES... */
+};
+
+extern struct privatespace SMP_prvspace[];
+
+#endif	/* ! _MACHINE_PRIVATESPACE_H_ */

Property changes on: head/sys/i386/include/privatespace.h
___________________________________________________________________
Added: svn:keywords
## -0,0 +1 ##
+FreeBSD=%H
\ No newline at end of property
Index: head/sys/kern/imgact_aout.c
===================================================================
--- head/sys/kern/imgact_aout.c	(revision 82308)
+++ head/sys/kern/imgact_aout.c	(revision 82309)
@@ -1,281 +1,283 @@
 /*
  * Copyright (c) 1993, David Greenman
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
+#include "opt_upages.h"
+
 #include <sys/param.h>
 #include <sys/exec.h>
 #include <sys/fcntl.h>
 #include <sys/imgact.h>
 #include <sys/imgact_aout.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/namei.h>
 #include <sys/pioctl.h>
 #include <sys/proc.h>
 #include <sys/resourcevar.h>
 #include <sys/systm.h>
 #include <sys/signalvar.h>
 #include <sys/stat.h>
 #include <sys/sysent.h>
 #include <sys/syscall.h>
 #include <sys/vnode.h>
 #include <sys/user.h>
 
 #include <machine/md_var.h>
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/pmap.h>
 #include <vm/vm_map.h>
 #include <vm/vm_object.h>
 
 static int	exec_aout_imgact __P((struct image_params *imgp));
 
 struct sysentvec aout_sysvec = {
 	SYS_MAXSYSCALL,
 	sysent,
 	0,
 	0,
 	0,
 	0,
 	0,
 	0,
 	0,
 	sendsig,
 	sigcode,
 	&szsigcode,
 	0,
 	"FreeBSD a.out",
 	aout_coredump,
 	NULL,
 	MINSIGSTKSZ
 };
 
 static int
 exec_aout_imgact(imgp)
 	struct image_params *imgp;
 {
 	const struct exec *a_out = (const struct exec *) imgp->image_header;
 	struct vmspace *vmspace;
 	struct vnode *vp;
 	vm_map_t map;
 	vm_object_t object;
 	vm_offset_t text_end, data_end;
 	unsigned long virtual_offset;
 	unsigned long file_offset;
 	unsigned long bss_size;
 	int error;
 
 	GIANT_REQUIRED;
 
 	/*
 	 * Linux and *BSD binaries look very much alike,
 	 * only the machine id is different:
 	 * 0x64 for Linux, 0x86 for *BSD, 0x00 for BSDI.
 	 * NetBSD is in network byte order.. ugh.
 	 */
 	if (((a_out->a_magic >> 16) & 0xff) != 0x86 &&
 	    ((a_out->a_magic >> 16) & 0xff) != 0 &&
 	    ((((int)ntohl(a_out->a_magic)) >> 16) & 0xff) != 0x86)
                 return -1;
 
 	/*
 	 * Set file/virtual offset based on a.out variant.
 	 *	We do two cases: host byte order and network byte order
 	 *	(for NetBSD compatibility)
 	 */
 	switch ((int)(a_out->a_magic & 0xffff)) {
 	case ZMAGIC:
 		virtual_offset = 0;
 		if (a_out->a_text) {
 			file_offset = PAGE_SIZE;
 		} else {
 			/* Bill's "screwball mode" */
 			file_offset = 0;
 		}
 		break;
 	case QMAGIC:
 		virtual_offset = PAGE_SIZE;
 		file_offset = 0;
 		/* Pass PS_STRINGS for BSD/OS binaries only. */
 		if (N_GETMID(*a_out) == MID_ZERO)
 			imgp->ps_strings = PS_STRINGS;
 		break;
 	default:
 		/* NetBSD compatibility */
 		switch ((int)(ntohl(a_out->a_magic) & 0xffff)) {
 		case ZMAGIC:
 		case QMAGIC:
 			virtual_offset = PAGE_SIZE;
 			file_offset = 0;
 			break;
 		default:
 			return (-1);
 		}
 	}
 
 	bss_size = roundup(a_out->a_bss, PAGE_SIZE);
 
 	/*
 	 * Check various fields in header for validity/bounds.
 	 */
 	if (/* entry point must lay with text region */
 	    a_out->a_entry < virtual_offset ||
 	    a_out->a_entry >= virtual_offset + a_out->a_text ||
 
 	    /* text and data size must each be page rounded */
 	    a_out->a_text & PAGE_MASK || a_out->a_data & PAGE_MASK)
 		return (-1);
 
 	/* text + data can't exceed file size */
 	if (a_out->a_data + a_out->a_text > imgp->attr->va_size)
 		return (EFAULT);
 
 	/*
 	 * text/data/bss must not exceed limits
 	 */
 	mtx_assert(&Giant, MA_OWNED);
 	if (/* text can't exceed maximum text size */
 	    a_out->a_text > MAXTSIZ ||
 
 	    /* data + bss can't exceed rlimit */
 	    a_out->a_data + bss_size >
 		imgp->proc->p_rlimit[RLIMIT_DATA].rlim_cur)
 			return (ENOMEM);
 
 	/* copy in arguments and/or environment from old process */
 	error = exec_extract_strings(imgp);
 	if (error)
 		return (error);
 
 	/*
 	 * Destroy old process VM and create a new one (with a new stack)
 	 */
 	exec_new_vmspace(imgp);
 
 	/*
 	 * The vm space can be changed by exec_new_vmspace
 	 */
 	vmspace = imgp->proc->p_vmspace;
 
 	vp = imgp->vp;
 	map = &vmspace->vm_map;
 	vm_map_lock(map);
 	VOP_GETVOBJECT(vp, &object);
 	vm_object_reference(object);
 
 	text_end = virtual_offset + a_out->a_text;
 	error = vm_map_insert(map, object,
 		file_offset,
 		virtual_offset, text_end,
 		VM_PROT_READ | VM_PROT_EXECUTE, VM_PROT_ALL,
 		MAP_COPY_ON_WRITE | MAP_PREFAULT);
 	if (error) {
 		vm_map_unlock(map);
 		return (error);
 	}
 	data_end = text_end + a_out->a_data;
 	if (a_out->a_data) {
 		vm_object_reference(object);
 		error = vm_map_insert(map, object,
 			file_offset + a_out->a_text,
 			text_end, data_end,
 			VM_PROT_ALL, VM_PROT_ALL,
 			MAP_COPY_ON_WRITE | MAP_PREFAULT);
 		if (error) {
 			vm_map_unlock(map);
 			return (error);
 		}
 	}
 
 	if (bss_size) {
 		error = vm_map_insert(map, NULL, 0,
 			data_end, data_end + bss_size,
 			VM_PROT_ALL, VM_PROT_ALL, 0);
 		if (error) {
 			vm_map_unlock(map);
 			return (error);
 		}
 	}
 	vm_map_unlock(map);
 
 	/* Fill in process VM information */
 	vmspace->vm_tsize = a_out->a_text >> PAGE_SHIFT;
 	vmspace->vm_dsize = (a_out->a_data + bss_size) >> PAGE_SHIFT;
 	vmspace->vm_taddr = (caddr_t) (uintptr_t) virtual_offset;
 	vmspace->vm_daddr = (caddr_t) (uintptr_t)
 			    (virtual_offset + a_out->a_text);
 
 	/* Fill in image_params */
 	imgp->interpreted = 0;
 	imgp->entry_addr = a_out->a_entry;
 
 	imgp->proc->p_sysent = &aout_sysvec;
 
 	/* Indicate that this file should not be modified */
 	imgp->vp->v_flag |= VTEXT;
 
 	return (0);
 }
 
 /*
  * Dump core, into a file named as described in the comments for
  * expand_name(), unless the process was setuid/setgid.
  */
 int
 aout_coredump(p, vp, limit)
 	register struct proc *p;
 	register struct vnode *vp;
 	off_t limit;
 {
 	register struct ucred *cred = p->p_ucred;
 	register struct vmspace *vm = p->p_vmspace;
 	int error;
 
 	if (ctob(UPAGES + vm->vm_dsize + vm->vm_ssize) >= limit)
 		return (EFAULT);
 	fill_kinfo_proc(p, &p->p_addr->u_kproc);
 	error = cpu_coredump(p, vp, cred);
 	if (error == 0)
 		error = vn_rdwr(UIO_WRITE, vp, vm->vm_daddr,
 		    (int)ctob(vm->vm_dsize), (off_t)ctob(UPAGES), UIO_USERSPACE,
 		    IO_NODELOCKED|IO_UNIT, cred, (int *) NULL, p);
 	if (error == 0)
 		error = vn_rdwr(UIO_WRITE, vp,
 		    (caddr_t) trunc_page(USRSTACK - ctob(vm->vm_ssize)),
 		    round_page(ctob(vm->vm_ssize)),
 		    (off_t)ctob(UPAGES) + ctob(vm->vm_dsize), UIO_USERSPACE,
 		    IO_NODELOCKED|IO_UNIT, cred, (int *) NULL, p);
 	return (error);
 }
 
 /*
  * Tell kern_execve.c about it, with a little help from the linker.
  */
 static struct execsw aout_execsw = { exec_aout_imgact, "a.out" };
 EXEC_SET(aout, aout_execsw);
Index: head/sys/pc98/i386/machdep.c
===================================================================
--- head/sys/pc98/i386/machdep.c	(revision 82308)
+++ head/sys/pc98/i386/machdep.c	(revision 82309)
@@ -1,2594 +1,2598 @@
 /*-
  * Copyright (c) 1992 Terrence R. Lambert.
  * Copyright (c) 1982, 1987, 1990 The Regents of the University of California.
  * All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * William Jolitz.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	from: @(#)machdep.c	7.4 (Berkeley) 6/3/91
  * $FreeBSD$
  */
 
 #include "opt_atalk.h"
 #include "opt_compat.h"
 #include "opt_cpu.h"
 #include "opt_ddb.h"
 #include "opt_inet.h"
 #include "opt_ipx.h"
 #include "opt_isa.h"
 #include "opt_maxmem.h"
 #include "opt_msgbuf.h"
 #include "opt_npx.h"
 #include "opt_perfmon.h"
+#include "opt_upages.h"
 /* #include "opt_userconfig.h" */
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/sysproto.h>
 #include <sys/signalvar.h>
 #include <sys/kernel.h>
 #include <sys/ktr.h>
 #include <sys/linker.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mutex.h>
 #include <sys/pcpu.h>
 #include <sys/proc.h>
 #include <sys/bio.h>
 #include <sys/buf.h>
 #include <sys/reboot.h>
 #include <sys/smp.h>
 #include <sys/callout.h>
 #include <sys/msgbuf.h>
 #include <sys/sysent.h>
 #include <sys/sysctl.h>
 #include <sys/vmmeter.h>
 #include <sys/bus.h>
 #include <sys/eventhandler.h>
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <sys/lock.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_object.h>
 #include <vm/vm_page.h>
 #include <vm/vm_map.h>
 #include <vm/vm_pager.h>
 #include <vm/vm_extern.h>
 
 #include <sys/user.h>
 #include <sys/exec.h>
 #include <sys/cons.h>
 
 #include <ddb/ddb.h>
 
 #include <net/netisr.h>
 
 #include <machine/cpu.h>
 #include <machine/cputypes.h>
 #include <machine/reg.h>
 #include <machine/clock.h>
 #include <machine/specialreg.h>
 #include <machine/bootinfo.h>
 #include <machine/md_var.h>
 #include <machine/pc/bios.h>
 #include <machine/pcb_ext.h>		/* pcb.h included via sys/user.h */
 #include <machine/globals.h>
 #ifdef PERFMON
 #include <machine/perfmon.h>
+#endif
+#ifdef SMP
+#include <machine/privatespace.h>
 #endif
 
 #include <i386/isa/icu.h>
 #include <i386/isa/intr_machdep.h>
 #ifdef PC98
 #include <pc98/pc98/pc98_machdep.h>
 #include <pc98/pc98/pc98.h>
 #else
 #include <isa/rtc.h>
 #endif
 #include <machine/vm86.h>
 #include <sys/ptrace.h>
 #include <machine/sigframe.h>
 
 extern void init386 __P((int first));
 extern void dblfault_handler __P((void));
 
 extern void printcpuinfo(void);	/* XXX header file */
 extern void earlysetcpuclass(void);	/* same header file */
 extern void finishidentcpu(void);
 extern void panicifcpuunsupported(void);
 extern void initializecpu(void);
 
 #define	CS_SECURE(cs)		(ISPL(cs) == SEL_UPL)
 #define	EFL_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
 
 static void cpu_startup __P((void *));
 #ifdef CPU_ENABLE_SSE
 static void set_fpregs_xmm __P((struct save87 *, struct savexmm *));
 static void fill_fpregs_xmm __P((struct savexmm *, struct save87 *));
 #endif /* CPU_ENABLE_SSE */
 SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL)
 
 #ifdef PC98
 int	need_pre_dma_flush;	/* If 1, use wbinvd befor DMA transfer. */
 int	need_post_dma_flush;	/* If 1, use invd after DMA transfer. */
 #endif
 
 int	_udatasel, _ucodesel;
 u_int	atdevbase;
 
 #if defined(SWTCH_OPTIM_STATS)
 extern int swtch_optim_stats;
 SYSCTL_INT(_debug, OID_AUTO, swtch_optim_stats,
 	CTLFLAG_RD, &swtch_optim_stats, 0, "");
 SYSCTL_INT(_debug, OID_AUTO, tlb_flush_count,
 	CTLFLAG_RD, &tlb_flush_count, 0, "");
 #endif
 
 #ifdef PC98
 static int	ispc98 = 1;
 #else
 static int	ispc98 = 0;
 #endif
 SYSCTL_INT(_machdep, OID_AUTO, ispc98, CTLFLAG_RD, &ispc98, 0, "");
 
 int physmem = 0;
 int cold = 1;
 
 #ifdef COMPAT_43
 static void osendsig __P((sig_t catcher, int sig, sigset_t *mask, u_long code));
 #endif
 
 static int
 sysctl_hw_physmem(SYSCTL_HANDLER_ARGS)
 {
 	int error = sysctl_handle_int(oidp, 0, ctob(physmem), req);
 	return (error);
 }
 
 SYSCTL_PROC(_hw, HW_PHYSMEM, physmem, CTLTYPE_INT|CTLFLAG_RD,
 	0, 0, sysctl_hw_physmem, "IU", "");
 
 static int
 sysctl_hw_usermem(SYSCTL_HANDLER_ARGS)
 {
 	int error = sysctl_handle_int(oidp, 0,
 		ctob(physmem - cnt.v_wire_count), req);
 	return (error);
 }
 
 SYSCTL_PROC(_hw, HW_USERMEM, usermem, CTLTYPE_INT|CTLFLAG_RD,
 	0, 0, sysctl_hw_usermem, "IU", "");
 
 static int
 sysctl_hw_availpages(SYSCTL_HANDLER_ARGS)
 {
 	int error = sysctl_handle_int(oidp, 0,
 		i386_btop(avail_end - avail_start), req);
 	return (error);
 }
 
 SYSCTL_PROC(_hw, OID_AUTO, availpages, CTLTYPE_INT|CTLFLAG_RD,
 	0, 0, sysctl_hw_availpages, "I", "");
 
 int Maxmem = 0;
 #ifdef PC98
 int Maxmem_under16M = 0;
 #endif
 long dumplo;
 
 vm_offset_t phys_avail[10];
 
 /* must be 2 less so 0 0 can signal end of chunks */
 #define PHYS_AVAIL_ARRAY_END ((sizeof(phys_avail) / sizeof(vm_offset_t)) - 2)
 
 struct kva_md_info kmi;
 
 static struct trapframe proc0_tf;
 #ifndef SMP
 static struct globaldata __globaldata;
 #endif
 
 struct mtx sched_lock;
 struct mtx Giant;
 
 static void
 cpu_startup(dummy)
 	void *dummy;
 {
 	/*
 	 * Good {morning,afternoon,evening,night}.
 	 */
 	earlysetcpuclass();
 	startrtclock();
 	printcpuinfo();
 	panicifcpuunsupported();
 #ifdef PERFMON
 	perfmon_init();
 #endif
 	printf("real memory  = %u (%uK bytes)\n", ptoa(Maxmem),
 	    ptoa(Maxmem) / 1024);
 	/*
 	 * Display any holes after the first chunk of extended memory.
 	 */
 	if (bootverbose) {
 		int indx;
 
 		printf("Physical memory chunk(s):\n");
 		for (indx = 0; phys_avail[indx + 1] != 0; indx += 2) {
 			unsigned int size1;
 
 			size1 = phys_avail[indx + 1] - phys_avail[indx];
 			printf("0x%08x - 0x%08x, %u bytes (%u pages)\n",
 			    phys_avail[indx], phys_avail[indx + 1] - 1, size1,
 			    size1 / PAGE_SIZE);
 		}
 	}
 
 	vm_ksubmap_init(&kmi);
 
 #if 0
 	/*
 	 * Calculate callout wheel size
 	 */
 	for (callwheelsize = 1, callwheelbits = 0;
 	     callwheelsize < ncallout;
 	     callwheelsize <<= 1, ++callwheelbits)
 		;
 	callwheelmask = callwheelsize - 1;
 
 	/*
 	 * Allocate space for system data structures.
 	 * The first available kernel virtual address is in "v".
 	 * As pages of kernel virtual memory are allocated, "v" is incremented.
 	 * As pages of memory are allocated and cleared,
 	 * "firstaddr" is incremented.
 	 * An index into the kernel page table corresponding to the
 	 * virtual memory address maintained in "v" is kept in "mapaddr".
 	 */
 
 	/*
 	 * Make two passes.  The first pass calculates how much memory is
 	 * needed and allocates it.  The second pass assigns virtual
 	 * addresses to the various data structures.
 	 */
 	firstaddr = 0;
 again:
 	v = (caddr_t)firstaddr;
 
 #define	valloc(name, type, num) \
 	    (name) = (type *)v; v = (caddr_t)((name)+(num))
 #define	valloclim(name, type, num, lim) \
 	    (name) = (type *)v; v = (caddr_t)((lim) = ((name)+(num)))
 
 	valloc(callout, struct callout, ncallout);
 	valloc(callwheel, struct callout_tailq, callwheelsize);
 
 	/*
 	 * Discount the physical memory larger than the size of kernel_map
 	 * to avoid eating up all of KVA space.
 	 */
 	if (kernel_map->first_free == NULL) {
 		printf("Warning: no free entries in kernel_map.\n");
 		physmem_est = physmem;
 	} else {
 		physmem_est = min(physmem, btoc(kernel_map->max_offset -
 		    kernel_map->min_offset));
 	}
 
 	/*
 	 * The nominal buffer size (and minimum KVA allocation) is BKVASIZE.
 	 * For the first 64MB of ram nominally allocate sufficient buffers to
 	 * cover 1/4 of our ram.  Beyond the first 64MB allocate additional
 	 * buffers to cover 1/20 of our ram over 64MB.  When auto-sizing
 	 * the buffer cache we limit the eventual kva reservation to
 	 * maxbcache bytes.
 	 *
 	 * factor represents the 1/4 x ram conversion.
 	 */
 	if (nbuf == 0) {
 		int factor = 4 * BKVASIZE / PAGE_SIZE;
 
 		nbuf = 50;
 		if (physmem_est > 1024)
 			nbuf += min((physmem_est - 1024) / factor,
 			    16384 / factor);
 		if (physmem_est > 16384)
 			nbuf += (physmem_est - 16384) * 2 / (factor * 5);
 
 		if (maxbcache && nbuf > maxbcache / BKVASIZE)
 			nbuf = maxbcache / BKVASIZE;
 	}
 
 	/*
 	 * Do not allow the buffer_map to be more then 1/2 the size of the
 	 * kernel_map.
 	 */
 	if (nbuf > (kernel_map->max_offset - kernel_map->min_offset) / 
 	    (BKVASIZE * 2)) {
 		nbuf = (kernel_map->max_offset - kernel_map->min_offset) / 
 		    (BKVASIZE * 2);
 		printf("Warning: nbufs capped at %d\n", nbuf);
 	}
 
 	nswbuf = max(min(nbuf/4, 256), 16);
 
 	valloc(swbuf, struct buf, nswbuf);
 	valloc(buf, struct buf, nbuf);
 	v = bufhashinit(v);
 
 	/*
 	 * End of first pass, size has been calculated so allocate memory
 	 */
 	if (firstaddr == 0) {
 		size = (vm_size_t)(v - firstaddr);
 		firstaddr = (int)kmem_alloc(kernel_map, round_page(size));
 		if (firstaddr == 0)
 			panic("startup: no room for tables");
 		goto again;
 	}
 
 	/*
 	 * End of second pass, addresses have been assigned
 	 */
 	if ((vm_size_t)(v - firstaddr) != size)
 		panic("startup: table size inconsistency");
 
 	clean_map = kmem_suballoc(kernel_map, &clean_sva, &clean_eva,
 			(nbuf*BKVASIZE) + (nswbuf*MAXPHYS) + pager_map_size);
 	buffer_map = kmem_suballoc(clean_map, &buffer_sva, &buffer_eva,
 				(nbuf*BKVASIZE));
 	buffer_map->system_map = 1;
 	pager_map = kmem_suballoc(clean_map, &pager_sva, &pager_eva,
 				(nswbuf*MAXPHYS) + pager_map_size);
 	pager_map->system_map = 1;
 	exec_map = kmem_suballoc(kernel_map, &minaddr, &maxaddr,
 				(16*(ARG_MAX+(PAGE_SIZE*3))));
 
 	/*
 	 * XXX: Mbuf system machine-specific initializations should
 	 *      go here, if anywhere.
 	 */
 
 	/*
 	 * Initialize callouts
 	 */
 	SLIST_INIT(&callfree);
 	for (i = 0; i < ncallout; i++) {
 		callout_init(&callout[i], 0);
 		callout[i].c_flags = CALLOUT_LOCAL_ALLOC;
 		SLIST_INSERT_HEAD(&callfree, &callout[i], c_links.sle);
 	}
 
 	for (i = 0; i < callwheelsize; i++) {
 		TAILQ_INIT(&callwheel[i]);
 	}
 
 	mtx_init(&callout_lock, "callout", MTX_SPIN | MTX_RECURSE);
 #endif
 
 #if defined(USERCONFIG)
 	userconfig();
 	cninit();		/* the preferred console may have changed */
 #endif
 
 	printf("avail memory = %u (%uK bytes)\n", ptoa(cnt.v_free_count),
 	    ptoa(cnt.v_free_count) / 1024);
 
 	/*
 	 * Set up buffers, so they can be used to read disk labels.
 	 */
 	bufinit();
 	vm_pager_bufferinit();
 
 	globaldata_register(GLOBALDATA);
 #ifndef SMP
 	/* For SMP, we delay the cpu_setregs() until after SMP startup. */
 	cpu_setregs();
 #endif
 }
 
 /*
  * Send an interrupt to process.
  *
  * Stack is set up to allow sigcode stored
  * at top to call routine, followed by kcall
  * to sigreturn routine below.  After sigreturn
  * resets the signal mask, the stack, and the
  * frame pointer, it returns to the user
  * specified pc, psl.
  */
 #ifdef COMPAT_43
 static void
 osendsig(catcher, sig, mask, code)
 	sig_t catcher;
 	int sig;
 	sigset_t *mask;
 	u_long code;
 {
 	struct osigframe sf;
 	struct osigframe *fp;
 	struct proc *p;
 	struct sigacts *psp;
 	struct trapframe *regs;
 	int oonstack;
 
 	p = curproc;
 	PROC_LOCK(p);
 	psp = p->p_sigacts;
 	regs = p->p_frame;
 	oonstack = sigonstack(regs->tf_esp);
 
 	/* Allocate and validate space for the signal handler context. */
 	if ((p->p_flag & P_ALTSTACK) && !oonstack &&
 	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
 		fp = (struct osigframe *)(p->p_sigstk.ss_sp +
 		    p->p_sigstk.ss_size - sizeof(struct osigframe));
 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
 		p->p_sigstk.ss_flags |= SS_ONSTACK;
 #endif
 	} else
 		fp = (struct osigframe *)regs->tf_esp - 1;
 	PROC_UNLOCK(p);
 
 	/*
 	 * grow_stack() will return 0 if *fp does not fit inside the stack
 	 * and the stack can not be grown.
 	 * useracc() will return FALSE if access is denied.
 	 */
 	if (grow_stack(p, (int)fp) == 0 ||
 	    !useracc((caddr_t)fp, sizeof(*fp), VM_PROT_WRITE)) {
 		/*
 		 * Process has trashed its stack; give it an illegal
 		 * instruction to halt it in its tracks.
 		 */
 		PROC_LOCK(p);
 		SIGACTION(p, SIGILL) = SIG_DFL;
 		SIGDELSET(p->p_sigignore, SIGILL);
 		SIGDELSET(p->p_sigcatch, SIGILL);
 		SIGDELSET(p->p_sigmask, SIGILL);
 		psignal(p, SIGILL);
 		PROC_UNLOCK(p);
 		return;
 	}
 
 	/* Translate the signal if appropriate. */
 	if (p->p_sysent->sv_sigtbl && sig <= p->p_sysent->sv_sigsize)
 		sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
 
 	/* Build the argument list for the signal handler. */
 	sf.sf_signum = sig;
 	sf.sf_scp = (register_t)&fp->sf_siginfo.si_sc;
 	PROC_LOCK(p);
 	if (SIGISMEMBER(p->p_sigacts->ps_siginfo, sig)) {
 		/* Signal handler installed with SA_SIGINFO. */
 		sf.sf_arg2 = (register_t)&fp->sf_siginfo;
 		sf.sf_siginfo.si_signo = sig;
 		sf.sf_siginfo.si_code = code;
 		sf.sf_ahu.sf_action = (__osiginfohandler_t *)catcher;
 	} else {
 		/* Old FreeBSD-style arguments. */
 		sf.sf_arg2 = code;
 		sf.sf_addr = regs->tf_err;
 		sf.sf_ahu.sf_handler = catcher;
 	}
 	PROC_UNLOCK(p);
 
 	/* Save most if not all of trap frame. */
 	sf.sf_siginfo.si_sc.sc_eax = regs->tf_eax;
 	sf.sf_siginfo.si_sc.sc_ebx = regs->tf_ebx;
 	sf.sf_siginfo.si_sc.sc_ecx = regs->tf_ecx;
 	sf.sf_siginfo.si_sc.sc_edx = regs->tf_edx;
 	sf.sf_siginfo.si_sc.sc_esi = regs->tf_esi;
 	sf.sf_siginfo.si_sc.sc_edi = regs->tf_edi;
 	sf.sf_siginfo.si_sc.sc_cs = regs->tf_cs;
 	sf.sf_siginfo.si_sc.sc_ds = regs->tf_ds;
 	sf.sf_siginfo.si_sc.sc_ss = regs->tf_ss;
 	sf.sf_siginfo.si_sc.sc_es = regs->tf_es;
 	sf.sf_siginfo.si_sc.sc_fs = regs->tf_fs;
 	sf.sf_siginfo.si_sc.sc_gs = rgs();
 	sf.sf_siginfo.si_sc.sc_isp = regs->tf_isp;
 
 	/* Build the signal context to be used by osigreturn(). */
 	sf.sf_siginfo.si_sc.sc_onstack = (oonstack) ? 1 : 0;
 	SIG2OSIG(*mask, sf.sf_siginfo.si_sc.sc_mask);
 	sf.sf_siginfo.si_sc.sc_sp = regs->tf_esp;
 	sf.sf_siginfo.si_sc.sc_fp = regs->tf_ebp;
 	sf.sf_siginfo.si_sc.sc_pc = regs->tf_eip;
 	sf.sf_siginfo.si_sc.sc_ps = regs->tf_eflags;
 	sf.sf_siginfo.si_sc.sc_trapno = regs->tf_trapno;
 	sf.sf_siginfo.si_sc.sc_err = regs->tf_err;
 
 	/*
 	 * If we're a vm86 process, we want to save the segment registers.
 	 * We also change eflags to be our emulated eflags, not the actual
 	 * eflags.
 	 */
 	if (regs->tf_eflags & PSL_VM) {
 		/* XXX confusing names: `tf' isn't a trapframe; `regs' is. */
 		struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs;
 		struct vm86_kernel *vm86 = &p->p_addr->u_pcb.pcb_ext->ext_vm86;
 
 		sf.sf_siginfo.si_sc.sc_gs = tf->tf_vm86_gs;
 		sf.sf_siginfo.si_sc.sc_fs = tf->tf_vm86_fs;
 		sf.sf_siginfo.si_sc.sc_es = tf->tf_vm86_es;
 		sf.sf_siginfo.si_sc.sc_ds = tf->tf_vm86_ds;
 
 		if (vm86->vm86_has_vme == 0)
 			sf.sf_siginfo.si_sc.sc_ps =
 			    (tf->tf_eflags & ~(PSL_VIF | PSL_VIP)) |
 			    (vm86->vm86_eflags & (PSL_VIF | PSL_VIP));
 
 		/* See sendsig() for comments. */
 		tf->tf_eflags &= ~(PSL_VM | PSL_NT | PSL_T | PSL_VIF | PSL_VIP);
 	}
 
 	/* Copy the sigframe out to the user's stack. */
 	if (copyout(&sf, fp, sizeof(*fp)) != 0) {
 		/*
 		 * Something is wrong with the stack pointer.
 		 * ...Kill the process.
 		 */
 		PROC_LOCK(p);
 		sigexit(p, SIGILL);
 		/* NOTREACHED */
 	}
 
 	regs->tf_esp = (int)fp;
 	regs->tf_eip = PS_STRINGS - szosigcode;
 	regs->tf_cs = _ucodesel;
 	regs->tf_ds = _udatasel;
 	regs->tf_es = _udatasel;
 	regs->tf_fs = _udatasel;
 	load_gs(_udatasel);
 	regs->tf_ss = _udatasel;
 }
 #endif
 
 void
 sendsig(catcher, sig, mask, code)
 	sig_t catcher;
 	int sig;
 	sigset_t *mask;
 	u_long code;
 {
 	struct sigframe sf;
 	struct proc *p;
 	struct sigacts *psp;
 	struct trapframe *regs;
 	struct sigframe *sfp;
 	int oonstack;
 
 	p = curproc;
 	PROC_LOCK(p);
 	psp = p->p_sigacts;
 #ifdef COMPAT_43
 	if (SIGISMEMBER(psp->ps_osigset, sig)) {
 		PROC_UNLOCK(p);
 		osendsig(catcher, sig, mask, code);
 		return;
 	}
 #endif
 	regs = p->p_frame;
 	oonstack = sigonstack(regs->tf_esp);
 
 	/* Save user context. */
 	bzero(&sf, sizeof(sf));
 	sf.sf_uc.uc_sigmask = *mask;
 	sf.sf_uc.uc_stack = p->p_sigstk;
 	sf.sf_uc.uc_stack.ss_flags = (p->p_flag & P_ALTSTACK)
 	    ? ((oonstack) ? SS_ONSTACK : 0) : SS_DISABLE;
 	sf.sf_uc.uc_mcontext.mc_onstack = (oonstack) ? 1 : 0;
 	sf.sf_uc.uc_mcontext.mc_gs = rgs();
 	bcopy(regs, &sf.sf_uc.uc_mcontext.mc_fs, sizeof(*regs));
 
 	/* Allocate and validate space for the signal handler context. */
 	if ((p->p_flag & P_ALTSTACK) != 0 && !oonstack &&
 	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
 		sfp = (struct sigframe *)(p->p_sigstk.ss_sp +
 		    p->p_sigstk.ss_size - sizeof(struct sigframe));
 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
 		p->p_sigstk.ss_flags |= SS_ONSTACK;
 #endif
 	} else
 		sfp = (struct sigframe *)regs->tf_esp - 1;
 	PROC_UNLOCK(p);
 
 	/*
 	 * grow_stack() will return 0 if *sfp does not fit inside the stack
 	 * and the stack can not be grown.
 	 * useracc() will return FALSE if access is denied.
 	 */
 	if (grow_stack(p, (int)sfp) == 0 ||
 	    !useracc((caddr_t)sfp, sizeof(*sfp), VM_PROT_WRITE)) {
 		/*
 		 * Process has trashed its stack; give it an illegal
 		 * instruction to halt it in its tracks.
 		 */
 #ifdef DEBUG
 		printf("process %d has trashed its stack\n", p->p_pid);
 #endif
 		PROC_LOCK(p);
 		SIGACTION(p, SIGILL) = SIG_DFL;
 		SIGDELSET(p->p_sigignore, SIGILL);
 		SIGDELSET(p->p_sigcatch, SIGILL);
 		SIGDELSET(p->p_sigmask, SIGILL);
 		psignal(p, SIGILL);
 		PROC_UNLOCK(p);
 		return;
 	}
 
 	/* Translate the signal if appropriate. */
 	if (p->p_sysent->sv_sigtbl && sig <= p->p_sysent->sv_sigsize)
 		sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
 
 	/* Build the argument list for the signal handler. */
 	sf.sf_signum = sig;
 	sf.sf_ucontext = (register_t)&sfp->sf_uc;
 	PROC_LOCK(p);
 	if (SIGISMEMBER(p->p_sigacts->ps_siginfo, sig)) {
 		/* Signal handler installed with SA_SIGINFO. */
 		sf.sf_siginfo = (register_t)&sfp->sf_si;
 		sf.sf_ahu.sf_action = (__siginfohandler_t *)catcher;
 
 		/* Fill siginfo structure. */
 		sf.sf_si.si_signo = sig;
 		sf.sf_si.si_code = code;
 		sf.sf_si.si_addr = (void *)regs->tf_err;
 	} else {
 		/* Old FreeBSD-style arguments. */
 		sf.sf_siginfo = code;
 		sf.sf_addr = regs->tf_err;
 		sf.sf_ahu.sf_handler = catcher;
 	}
 	PROC_UNLOCK(p);
 
 	/*
 	 * If we're a vm86 process, we want to save the segment registers.
 	 * We also change eflags to be our emulated eflags, not the actual
 	 * eflags.
 	 */
 	if (regs->tf_eflags & PSL_VM) {
 		struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs;
 		struct vm86_kernel *vm86 = &p->p_addr->u_pcb.pcb_ext->ext_vm86;
 
 		sf.sf_uc.uc_mcontext.mc_gs = tf->tf_vm86_gs;
 		sf.sf_uc.uc_mcontext.mc_fs = tf->tf_vm86_fs;
 		sf.sf_uc.uc_mcontext.mc_es = tf->tf_vm86_es;
 		sf.sf_uc.uc_mcontext.mc_ds = tf->tf_vm86_ds;
 
 		if (vm86->vm86_has_vme == 0)
 			sf.sf_uc.uc_mcontext.mc_eflags =
 			    (tf->tf_eflags & ~(PSL_VIF | PSL_VIP)) |
 			    (vm86->vm86_eflags & (PSL_VIF | PSL_VIP));
 
 		/*
 		 * We should never have PSL_T set when returning from vm86
 		 * mode.  It may be set here if we deliver a signal before
 		 * getting to vm86 mode, so turn it off.
 		 *
 		 * Clear PSL_NT to inhibit T_TSSFLT faults on return from
 		 * syscalls made by the signal handler.  This just avoids
 		 * wasting time for our lazy fixup of such faults.  PSL_NT
 		 * does nothing in vm86 mode, but vm86 programs can set it
 		 * almost legitimately in probes for old cpu types.
 		 */
 		tf->tf_eflags &= ~(PSL_VM | PSL_NT | PSL_T | PSL_VIF | PSL_VIP);
 	}
 
 	/* Copy the sigframe out to the user's stack. */
 	if (copyout(&sf, sfp, sizeof(*sfp)) != 0) {
 		/*
 		 * Something is wrong with the stack pointer.
 		 * ...Kill the process.
 		 */
 		PROC_LOCK(p);
 		sigexit(p, SIGILL);
 		/* NOTREACHED */
 	}
 
 	regs->tf_esp = (int)sfp;
 	regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode);
 	regs->tf_cs = _ucodesel;
 	regs->tf_ds = _udatasel;
 	regs->tf_es = _udatasel;
 	regs->tf_fs = _udatasel;
 	regs->tf_ss = _udatasel;
 }
 
 /*
  * System call to cleanup state after a signal
  * has been taken.  Reset signal mask and
  * stack state from context left by sendsig (above).
  * Return to previous pc and psl as specified by
  * context left by sendsig. Check carefully to
  * make sure that the user has not modified the
  * state to gain improper privileges.
  */
 #ifdef COMPAT_43
 int
 osigreturn(p, uap)
 	struct proc *p;
 	struct osigreturn_args /* {
 		struct osigcontext *sigcntxp;
 	} */ *uap;
 {
 	struct trapframe *regs;
 	struct osigcontext *scp;
 	int eflags;
 
 	regs = p->p_frame;
 	scp = uap->sigcntxp;
 	if (!useracc((caddr_t)scp, sizeof(*scp), VM_PROT_READ))
 		return (EFAULT);
 	eflags = scp->sc_ps;
 	if (eflags & PSL_VM) {
 		struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs;
 		struct vm86_kernel *vm86;
 
 		/*
 		 * if pcb_ext == 0 or vm86_inited == 0, the user hasn't
 		 * set up the vm86 area, and we can't enter vm86 mode.
 		 */
 		if (p->p_addr->u_pcb.pcb_ext == 0)
 			return (EINVAL);
 		vm86 = &p->p_addr->u_pcb.pcb_ext->ext_vm86;
 		if (vm86->vm86_inited == 0)
 			return (EINVAL);
 
 		/* Go back to user mode if both flags are set. */
 		if ((eflags & PSL_VIP) && (eflags & PSL_VIF))
 			trapsignal(p, SIGBUS, 0);
 
 		if (vm86->vm86_has_vme) {
 			eflags = (tf->tf_eflags & ~VME_USERCHANGE) |
 			    (eflags & VME_USERCHANGE) | PSL_VM;
 		} else {
 			vm86->vm86_eflags = eflags;	/* save VIF, VIP */
 			eflags = (tf->tf_eflags & ~VM_USERCHANGE) |
 			    (eflags & VM_USERCHANGE) | PSL_VM;
 		}
 		tf->tf_vm86_ds = scp->sc_ds;
 		tf->tf_vm86_es = scp->sc_es;
 		tf->tf_vm86_fs = scp->sc_fs;
 		tf->tf_vm86_gs = scp->sc_gs;
 		tf->tf_ds = _udatasel;
 		tf->tf_es = _udatasel;
 		tf->tf_fs = _udatasel;
 	} else {
 		/*
 		 * Don't allow users to change privileged or reserved flags.
 		 */
 		/*
 		 * XXX do allow users to change the privileged flag PSL_RF.
 		 * The cpu sets PSL_RF in tf_eflags for faults.  Debuggers
 		 * should sometimes set it there too.  tf_eflags is kept in
 		 * the signal context during signal handling and there is no
 		 * other place to remember it, so the PSL_RF bit may be
 		 * corrupted by the signal handler without us knowing.
 		 * Corruption of the PSL_RF bit at worst causes one more or
 		 * one less debugger trap, so allowing it is fairly harmless.
 		 */
 		if (!EFL_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) {
 	    		return (EINVAL);
 		}
 
 		/*
 		 * Don't allow users to load a valid privileged %cs.  Let the
 		 * hardware check for invalid selectors, excess privilege in
 		 * other selectors, invalid %eip's and invalid %esp's.
 		 */
 		if (!CS_SECURE(scp->sc_cs)) {
 			trapsignal(p, SIGBUS, T_PROTFLT);
 			return (EINVAL);
 		}
 		regs->tf_ds = scp->sc_ds;
 		regs->tf_es = scp->sc_es;
 		regs->tf_fs = scp->sc_fs;
 	}
 
 	/* Restore remaining registers. */
 	regs->tf_eax = scp->sc_eax;
 	regs->tf_ebx = scp->sc_ebx;
 	regs->tf_ecx = scp->sc_ecx;
 	regs->tf_edx = scp->sc_edx;
 	regs->tf_esi = scp->sc_esi;
 	regs->tf_edi = scp->sc_edi;
 	regs->tf_cs = scp->sc_cs;
 	regs->tf_ss = scp->sc_ss;
 	regs->tf_isp = scp->sc_isp;
 
 	PROC_LOCK(p);
 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
 	if (scp->sc_onstack & 1)
 		p->p_sigstk.ss_flags |= SS_ONSTACK;
 	else
 		p->p_sigstk.ss_flags &= ~SS_ONSTACK;
 #endif
 
 	SIGSETOLD(p->p_sigmask, scp->sc_mask);
 	SIG_CANTMASK(p->p_sigmask);
 	PROC_UNLOCK(p);
 	regs->tf_ebp = scp->sc_fp;
 	regs->tf_esp = scp->sc_sp;
 	regs->tf_eip = scp->sc_pc;
 	regs->tf_eflags = eflags;
 	return (EJUSTRETURN);
 }
 #endif
 
 int
 sigreturn(p, uap)
 	struct proc *p;
 	struct sigreturn_args /* {
 		ucontext_t *sigcntxp;
 	} */ *uap;
 {
 	struct trapframe *regs;
 	ucontext_t *ucp;
 	int cs, eflags;
 
 	ucp = uap->sigcntxp;
 #ifdef COMPAT_43
 	if (!useracc((caddr_t)ucp, sizeof(struct osigcontext), VM_PROT_READ))
 		return (EFAULT);
 	if (((struct osigcontext *)ucp)->sc_trapno == 0x01d516)
 		return (osigreturn(p, (struct osigreturn_args *)uap));
 	/*
 	 * Since ucp is not an osigcontext but a ucontext_t, we have to
 	 * check again if all of it is accessible.  A ucontext_t is
 	 * much larger, so instead of just checking for the pointer
 	 * being valid for the size of an osigcontext, now check for
 	 * it being valid for a whole, new-style ucontext_t.
 	 */
 #endif
 	if (!useracc((caddr_t)ucp, sizeof(*ucp), VM_PROT_READ))
 		return (EFAULT);
 
 	regs = p->p_frame;
 	eflags = ucp->uc_mcontext.mc_eflags;
 	if (eflags & PSL_VM) {
 		struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs;
 		struct vm86_kernel *vm86;
 
 		/*
 		 * if pcb_ext == 0 or vm86_inited == 0, the user hasn't
 		 * set up the vm86 area, and we can't enter vm86 mode.
 		 */
 		if (p->p_addr->u_pcb.pcb_ext == 0)
 			return (EINVAL);
 		vm86 = &p->p_addr->u_pcb.pcb_ext->ext_vm86;
 		if (vm86->vm86_inited == 0)
 			return (EINVAL);
 
 		/* Go back to user mode if both flags are set. */
 		if ((eflags & PSL_VIP) && (eflags & PSL_VIF))
 			trapsignal(p, SIGBUS, 0);
 
 		if (vm86->vm86_has_vme) {
 			eflags = (tf->tf_eflags & ~VME_USERCHANGE) |
 			    (eflags & VME_USERCHANGE) | PSL_VM;
 		} else {
 			vm86->vm86_eflags = eflags;	/* save VIF, VIP */
 			eflags = (tf->tf_eflags & ~VM_USERCHANGE) |
 			    (eflags & VM_USERCHANGE) | PSL_VM;
 		}
 		bcopy(&ucp->uc_mcontext.mc_fs, tf, sizeof(struct trapframe));
 		tf->tf_eflags = eflags;
 		tf->tf_vm86_ds = tf->tf_ds;
 		tf->tf_vm86_es = tf->tf_es;
 		tf->tf_vm86_fs = tf->tf_fs;
 		tf->tf_vm86_gs = ucp->uc_mcontext.mc_gs;
 		tf->tf_ds = _udatasel;
 		tf->tf_es = _udatasel;
 		tf->tf_fs = _udatasel;
 	} else {
 		/*
 		 * Don't allow users to change privileged or reserved flags.
 		 */
 		/*
 		 * XXX do allow users to change the privileged flag PSL_RF.
 		 * The cpu sets PSL_RF in tf_eflags for faults.  Debuggers
 		 * should sometimes set it there too.  tf_eflags is kept in
 		 * the signal context during signal handling and there is no
 		 * other place to remember it, so the PSL_RF bit may be
 		 * corrupted by the signal handler without us knowing.
 		 * Corruption of the PSL_RF bit at worst causes one more or
 		 * one less debugger trap, so allowing it is fairly harmless.
 		 */
 		if (!EFL_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) {
 			printf("sigreturn: eflags = 0x%x\n", eflags);
 	    		return (EINVAL);
 		}
 
 		/*
 		 * Don't allow users to load a valid privileged %cs.  Let the
 		 * hardware check for invalid selectors, excess privilege in
 		 * other selectors, invalid %eip's and invalid %esp's.
 		 */
 		cs = ucp->uc_mcontext.mc_cs;
 		if (!CS_SECURE(cs)) {
 			printf("sigreturn: cs = 0x%x\n", cs);
 			trapsignal(p, SIGBUS, T_PROTFLT);
 			return (EINVAL);
 		}
 
 		bcopy(&ucp->uc_mcontext.mc_fs, regs, sizeof(*regs));
 	}
 
 	PROC_LOCK(p);
 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
 	if (ucp->uc_mcontext.mc_onstack & 1)
 		p->p_sigstk.ss_flags |= SS_ONSTACK;
 	else
 		p->p_sigstk.ss_flags &= ~SS_ONSTACK;
 #endif
 
 	p->p_sigmask = ucp->uc_sigmask;
 	SIG_CANTMASK(p->p_sigmask);
 	PROC_UNLOCK(p);
 	return (EJUSTRETURN);
 }
 
 /*
  * Machine dependent boot() routine
  *
  * I haven't seen anything to put here yet
  * Possibly some stuff might be grafted back here from boot()
  */
 void
 cpu_boot(int howto)
 {
 }
 
 /*
  * Shutdown the CPU as much as possible
  */
 void
 cpu_halt(void)
 {
 	for (;;)
 		__asm__ ("hlt");
 }
 
 /*
  * Hook to idle the CPU when possible.  This currently only works in
  * the !SMP case, as there is no clean way to ensure that a CPU will be
  * woken when there is work available for it.
  */
 static int	cpu_idle_hlt = 1;
 SYSCTL_INT(_machdep, OID_AUTO, cpu_idle_hlt, CTLFLAG_RW,
     &cpu_idle_hlt, 0, "Idle loop HLT enable");
 
 /*
  * Note that we have to be careful here to avoid a race between checking
  * procrunnable() and actually halting.  If we don't do this, we may waste
  * the time between calling hlt and the next interrupt even though there
  * is a runnable process.
  */
 void
 cpu_idle(void)
 {
 #ifndef SMP
 	if (cpu_idle_hlt) {
 		disable_intr();
   		if (procrunnable())
 			enable_intr();
 		else {
 			enable_intr();
 			__asm __volatile("hlt");
 		}
 	}
 #endif
 }
 
 /*
  * Clear registers on exec
  */
 void
 setregs(p, entry, stack, ps_strings)
 	struct proc *p;
 	u_long entry;
 	u_long stack;
 	u_long ps_strings;
 {
 	struct trapframe *regs = p->p_frame;
 	struct pcb *pcb = &p->p_addr->u_pcb;
 
 	if (pcb->pcb_ldt)
 		user_ldt_free(pcb);
   
 	bzero((char *)regs, sizeof(struct trapframe));
 	regs->tf_eip = entry;
 	regs->tf_esp = stack;
 	regs->tf_eflags = PSL_USER | (regs->tf_eflags & PSL_T);
 	regs->tf_ss = _udatasel;
 	regs->tf_ds = _udatasel;
 	regs->tf_es = _udatasel;
 	regs->tf_fs = _udatasel;
 	regs->tf_cs = _ucodesel;
 
 	/* PS_STRINGS value for BSD/OS binaries.  It is 0 for non-BSD/OS. */
 	regs->tf_ebx = ps_strings;
 
 	/* reset %gs as well */
 	if (pcb == PCPU_GET(curpcb))
 		load_gs(_udatasel);
 	else
 		pcb->pcb_gs = _udatasel;
 
         /*
          * Reset the hardware debug registers if they were in use.
          * They won't have any meaning for the newly exec'd process.  
          */
         if (pcb->pcb_flags & PCB_DBREGS) {
                 pcb->pcb_dr0 = 0;
                 pcb->pcb_dr1 = 0;
                 pcb->pcb_dr2 = 0;
                 pcb->pcb_dr3 = 0;
                 pcb->pcb_dr6 = 0;
                 pcb->pcb_dr7 = 0;
                 if (pcb == PCPU_GET(curpcb)) {
 		        /*
 			 * Clear the debug registers on the running
 			 * CPU, otherwise they will end up affecting
 			 * the next process we switch to.
 			 */
 		        reset_dbregs();
                 }
                 pcb->pcb_flags &= ~PCB_DBREGS;
         }
 
 	/*
 	 * Initialize the math emulator (if any) for the current process.
 	 * Actually, just clear the bit that says that the emulator has
 	 * been initialized.  Initialization is delayed until the process
 	 * traps to the emulator (if it is done at all) mainly because
 	 * emulators don't provide an entry point for initialization.
 	 */
 	p->p_addr->u_pcb.pcb_flags &= ~FP_SOFTFP;
 
 	/*
 	 * Arrange to trap the next npx or `fwait' instruction (see npx.c
 	 * for why fwait must be trapped at least if there is an npx or an
 	 * emulator).  This is mainly to handle the case where npx0 is not
 	 * configured, since the npx routines normally set up the trap
 	 * otherwise.  It should be done only at boot time, but doing it
 	 * here allows modifying `npx_exists' for testing the emulator on
 	 * systems with an npx.
 	 */
 	load_cr0(rcr0() | CR0_MP | CR0_TS);
 
 #ifdef DEV_NPX
 	/* Initialize the npx (if any) for the current process. */
 	npxinit(__INITIAL_NPXCW__);
 #endif
 
 	/*
 	 * XXX - Linux emulator
 	 * Make sure sure edx is 0x0 on entry. Linux binaries depend
 	 * on it.
 	 */
 	p->p_retval[1] = 0;
 }
 
 void
 cpu_setregs(void)
 {
 	unsigned int cr0;
 
 	cr0 = rcr0();
 	cr0 |= CR0_NE;			/* Done by npxinit() */
 	cr0 |= CR0_MP | CR0_TS;		/* Done at every execve() too. */
 #ifndef I386_CPU
 	cr0 |= CR0_WP | CR0_AM;
 #endif
 	load_cr0(cr0);
 	load_gs(_udatasel);
 }
 
 static int
 sysctl_machdep_adjkerntz(SYSCTL_HANDLER_ARGS)
 {
 	int error;
 	error = sysctl_handle_int(oidp, oidp->oid_arg1, oidp->oid_arg2,
 		req);
 	if (!error && req->newptr)
 		resettodr();
 	return (error);
 }
 
 SYSCTL_PROC(_machdep, CPU_ADJKERNTZ, adjkerntz, CTLTYPE_INT|CTLFLAG_RW,
 	&adjkerntz, 0, sysctl_machdep_adjkerntz, "I", "");
 
 SYSCTL_INT(_machdep, CPU_DISRTCSET, disable_rtc_set,
 	CTLFLAG_RW, &disable_rtc_set, 0, "");
 
 SYSCTL_STRUCT(_machdep, CPU_BOOTINFO, bootinfo, 
 	CTLFLAG_RD, &bootinfo, bootinfo, "");
 
 SYSCTL_INT(_machdep, CPU_WALLCLOCK, wall_cmos_clock,
 	CTLFLAG_RW, &wall_cmos_clock, 0, "");
 
 /*
  * Initialize 386 and configure to run kernel
  */
 
 /*
  * Initialize segments & interrupt table
  */
 
 int _default_ldt;
 union descriptor gdt[NGDT * MAXCPU];	/* global descriptor table */
 static struct gate_descriptor idt0[NIDT];
 struct gate_descriptor *idt = &idt0[0];	/* interrupt descriptor table */
 union descriptor ldt[NLDT];		/* local descriptor table */
 #ifdef SMP
 /* table descriptors - used to load tables by microp */
 struct region_descriptor r_gdt, r_idt;
 #endif
 
 int private_tss;			/* flag indicating private tss */
 
 #if defined(I586_CPU) && !defined(NO_F00F_HACK)
 extern int has_f00f_bug;
 #endif
 
 static struct i386tss dblfault_tss;
 static char dblfault_stack[PAGE_SIZE];
 
 extern  struct user *proc0paddr;
 
 
 /* software prototypes -- in more palatable form */
 struct soft_segment_descriptor gdt_segs[] = {
 /* GNULL_SEL	0 Null Descriptor */
 {	0x0,			/* segment base address  */
 	0x0,			/* length */
 	0,			/* segment type */
 	0,			/* segment descriptor priority level */
 	0,			/* segment descriptor present */
 	0, 0,
 	0,			/* default 32 vs 16 bit size */
 	0  			/* limit granularity (byte/page units)*/ },
 /* GCODE_SEL	1 Code Descriptor for kernel */
 {	0x0,			/* segment base address  */
 	0xfffff,		/* length - all address space */
 	SDT_MEMERA,		/* segment type */
 	0,			/* segment descriptor priority level */
 	1,			/* segment descriptor present */
 	0, 0,
 	1,			/* default 32 vs 16 bit size */
 	1  			/* limit granularity (byte/page units)*/ },
 /* GDATA_SEL	2 Data Descriptor for kernel */
 {	0x0,			/* segment base address  */
 	0xfffff,		/* length - all address space */
 	SDT_MEMRWA,		/* segment type */
 	0,			/* segment descriptor priority level */
 	1,			/* segment descriptor present */
 	0, 0,
 	1,			/* default 32 vs 16 bit size */
 	1  			/* limit granularity (byte/page units)*/ },
 /* GPRIV_SEL	3 SMP Per-Processor Private Data Descriptor */
 {	0x0,			/* segment base address  */
 	0xfffff,		/* length - all address space */
 	SDT_MEMRWA,		/* segment type */
 	0,			/* segment descriptor priority level */
 	1,			/* segment descriptor present */
 	0, 0,
 	1,			/* default 32 vs 16 bit size */
 	1  			/* limit granularity (byte/page units)*/ },
 /* GPROC0_SEL	4 Proc 0 Tss Descriptor */
 {
 	0x0,			/* segment base address */
 	sizeof(struct i386tss)-1,/* length - all address space */
 	SDT_SYS386TSS,		/* segment type */
 	0,			/* segment descriptor priority level */
 	1,			/* segment descriptor present */
 	0, 0,
 	0,			/* unused - default 32 vs 16 bit size */
 	0  			/* limit granularity (byte/page units)*/ },
 /* GLDT_SEL	5 LDT Descriptor */
 {	(int) ldt,		/* segment base address  */
 	sizeof(ldt)-1,		/* length - all address space */
 	SDT_SYSLDT,		/* segment type */
 	SEL_UPL,		/* segment descriptor priority level */
 	1,			/* segment descriptor present */
 	0, 0,
 	0,			/* unused - default 32 vs 16 bit size */
 	0  			/* limit granularity (byte/page units)*/ },
 /* GUSERLDT_SEL	6 User LDT Descriptor per process */
 {	(int) ldt,		/* segment base address  */
 	(512 * sizeof(union descriptor)-1),		/* length */
 	SDT_SYSLDT,		/* segment type */
 	0,			/* segment descriptor priority level */
 	1,			/* segment descriptor present */
 	0, 0,
 	0,			/* unused - default 32 vs 16 bit size */
 	0  			/* limit granularity (byte/page units)*/ },
 /* GTGATE_SEL	7 Null Descriptor - Placeholder */
 {	0x0,			/* segment base address  */
 	0x0,			/* length - all address space */
 	0,			/* segment type */
 	0,			/* segment descriptor priority level */
 	0,			/* segment descriptor present */
 	0, 0,
 	0,			/* default 32 vs 16 bit size */
 	0  			/* limit granularity (byte/page units)*/ },
 /* GBIOSLOWMEM_SEL 8 BIOS access to realmode segment 0x40, must be #8 in GDT */
 {	0x400,			/* segment base address */
 	0xfffff,		/* length */
 	SDT_MEMRWA,		/* segment type */
 	0,			/* segment descriptor priority level */
 	1,			/* segment descriptor present */
 	0, 0,
 	1,			/* default 32 vs 16 bit size */
 	1  			/* limit granularity (byte/page units)*/ },
 /* GPANIC_SEL	9 Panic Tss Descriptor */
 {	(int) &dblfault_tss,	/* segment base address  */
 	sizeof(struct i386tss)-1,/* length - all address space */
 	SDT_SYS386TSS,		/* segment type */
 	0,			/* segment descriptor priority level */
 	1,			/* segment descriptor present */
 	0, 0,
 	0,			/* unused - default 32 vs 16 bit size */
 	0  			/* limit granularity (byte/page units)*/ },
 /* GBIOSCODE32_SEL 10 BIOS 32-bit interface (32bit Code) */
 {	0,			/* segment base address (overwritten)  */
 	0xfffff,		/* length */
 	SDT_MEMERA,		/* segment type */
 	0,			/* segment descriptor priority level */
 	1,			/* segment descriptor present */
 	0, 0,
 	0,			/* default 32 vs 16 bit size */
 	1  			/* limit granularity (byte/page units)*/ },
 /* GBIOSCODE16_SEL 11 BIOS 32-bit interface (16bit Code) */
 {	0,			/* segment base address (overwritten)  */
 	0xfffff,		/* length */
 	SDT_MEMERA,		/* segment type */
 	0,			/* segment descriptor priority level */
 	1,			/* segment descriptor present */
 	0, 0,
 	0,			/* default 32 vs 16 bit size */
 	1  			/* limit granularity (byte/page units)*/ },
 /* GBIOSDATA_SEL 12 BIOS 32-bit interface (Data) */
 {	0,			/* segment base address (overwritten) */
 	0xfffff,		/* length */
 	SDT_MEMRWA,		/* segment type */
 	0,			/* segment descriptor priority level */
 	1,			/* segment descriptor present */
 	0, 0,
 	1,			/* default 32 vs 16 bit size */
 	1  			/* limit granularity (byte/page units)*/ },
 /* GBIOSUTIL_SEL 13 BIOS 16-bit interface (Utility) */
 {	0,			/* segment base address (overwritten) */
 	0xfffff,		/* length */
 	SDT_MEMRWA,		/* segment type */
 	0,			/* segment descriptor priority level */
 	1,			/* segment descriptor present */
 	0, 0,
 	0,			/* default 32 vs 16 bit size */
 	1  			/* limit granularity (byte/page units)*/ },
 /* GBIOSARGS_SEL 14 BIOS 16-bit interface (Arguments) */
 {	0,			/* segment base address (overwritten) */
 	0xfffff,		/* length */
 	SDT_MEMRWA,		/* segment type */
 	0,			/* segment descriptor priority level */
 	1,			/* segment descriptor present */
 	0, 0,
 	0,			/* default 32 vs 16 bit size */
 	1  			/* limit granularity (byte/page units)*/ },
 };
 
 static struct soft_segment_descriptor ldt_segs[] = {
 	/* Null Descriptor - overwritten by call gate */
 {	0x0,			/* segment base address  */
 	0x0,			/* length - all address space */
 	0,			/* segment type */
 	0,			/* segment descriptor priority level */
 	0,			/* segment descriptor present */
 	0, 0,
 	0,			/* default 32 vs 16 bit size */
 	0  			/* limit granularity (byte/page units)*/ },
 	/* Null Descriptor - overwritten by call gate */
 {	0x0,			/* segment base address  */
 	0x0,			/* length - all address space */
 	0,			/* segment type */
 	0,			/* segment descriptor priority level */
 	0,			/* segment descriptor present */
 	0, 0,
 	0,			/* default 32 vs 16 bit size */
 	0  			/* limit granularity (byte/page units)*/ },
 	/* Null Descriptor - overwritten by call gate */
 {	0x0,			/* segment base address  */
 	0x0,			/* length - all address space */
 	0,			/* segment type */
 	0,			/* segment descriptor priority level */
 	0,			/* segment descriptor present */
 	0, 0,
 	0,			/* default 32 vs 16 bit size */
 	0  			/* limit granularity (byte/page units)*/ },
 	/* Code Descriptor for user */
 {	0x0,			/* segment base address  */
 	0xfffff,		/* length - all address space */
 	SDT_MEMERA,		/* segment type */
 	SEL_UPL,		/* segment descriptor priority level */
 	1,			/* segment descriptor present */
 	0, 0,
 	1,			/* default 32 vs 16 bit size */
 	1  			/* limit granularity (byte/page units)*/ },
 	/* Null Descriptor - overwritten by call gate */
 {	0x0,			/* segment base address  */
 	0x0,			/* length - all address space */
 	0,			/* segment type */
 	0,			/* segment descriptor priority level */
 	0,			/* segment descriptor present */
 	0, 0,
 	0,			/* default 32 vs 16 bit size */
 	0  			/* limit granularity (byte/page units)*/ },
 	/* Data Descriptor for user */
 {	0x0,			/* segment base address  */
 	0xfffff,		/* length - all address space */
 	SDT_MEMRWA,		/* segment type */
 	SEL_UPL,		/* segment descriptor priority level */
 	1,			/* segment descriptor present */
 	0, 0,
 	1,			/* default 32 vs 16 bit size */
 	1  			/* limit granularity (byte/page units)*/ },
 };
 
 void
 setidt(idx, func, typ, dpl, selec)
 	int idx;
 	inthand_t *func;
 	int typ;
 	int dpl;
 	int selec;
 {
 	struct gate_descriptor *ip;
 
 	ip = idt + idx;
 	ip->gd_looffset = (int)func;
 	ip->gd_selector = selec;
 	ip->gd_stkcpy = 0;
 	ip->gd_xx = 0;
 	ip->gd_type = typ;
 	ip->gd_dpl = dpl;
 	ip->gd_p = 1;
 	ip->gd_hioffset = ((int)func)>>16 ;
 }
 
 #define	IDTVEC(name)	__CONCAT(X,name)
 
 extern inthand_t
 	IDTVEC(div), IDTVEC(dbg), IDTVEC(nmi), IDTVEC(bpt), IDTVEC(ofl),
 	IDTVEC(bnd), IDTVEC(ill), IDTVEC(dna), IDTVEC(fpusegm),
 	IDTVEC(tss), IDTVEC(missing), IDTVEC(stk), IDTVEC(prot),
 	IDTVEC(page), IDTVEC(mchk), IDTVEC(rsvd), IDTVEC(fpu), IDTVEC(align),
 	IDTVEC(xmm), IDTVEC(lcall_syscall), IDTVEC(int0x80_syscall);
 
 void
 sdtossd(sd, ssd)
 	struct segment_descriptor *sd;
 	struct soft_segment_descriptor *ssd;
 {
 	ssd->ssd_base  = (sd->sd_hibase << 24) | sd->sd_lobase;
 	ssd->ssd_limit = (sd->sd_hilimit << 16) | sd->sd_lolimit;
 	ssd->ssd_type  = sd->sd_type;
 	ssd->ssd_dpl   = sd->sd_dpl;
 	ssd->ssd_p     = sd->sd_p;
 	ssd->ssd_def32 = sd->sd_def32;
 	ssd->ssd_gran  = sd->sd_gran;
 }
 
 #define PHYSMAP_SIZE	(2 * 8)
 
 /*
  * Populate the (physmap) array with base/bound pairs describing the
  * available physical memory in the system, then test this memory and
  * build the phys_avail array describing the actually-available memory.
  *
  * If we cannot accurately determine the physical memory map, then use
  * value from the 0xE801 call, and failing that, the RTC.
  *
  * Total memory size may be set by the kernel environment variable
  * hw.physmem or the compile-time define MAXMEM.
  */
 static void
 getmemsize(int first)
 {
 	int i, physmap_idx, pa_indx;
 	u_int basemem, extmem;
 #ifdef PC98
 	int pg_n;
 	u_int under16;
 #else
 	struct vm86frame vmf;
 	struct vm86context vmc;
 #endif
 	vm_offset_t pa, physmap[PHYSMAP_SIZE];
 	pt_entry_t pte;
 	const char *cp;
 #ifndef PC98
 	struct bios_smap *smap;
 #endif
 
 #ifdef PC98
 	/* XXX - some of EPSON machines can't use PG_N */
 	pg_n = PG_N;
 	if (pc98_machine_type & M_EPSON_PC98) {
 		switch (epson_machine_id) {
 #ifdef WB_CACHE
 		default:
 #endif
 		case 0x34:		/* PC-486HX */
 		case 0x35:		/* PC-486HG */
 		case 0x3B:		/* PC-486HA */
 			pg_n = 0;
 			break;
 		}
 	}
 #else
 	bzero(&vmf, sizeof(struct vm86frame));
 #endif
 	bzero(physmap, sizeof(physmap));
 
 	/*
 	 * Perform "base memory" related probes & setup
 	 */
 #ifdef PC98
         under16 = pc98_getmemsize(&basemem, &extmem);
 #else
 	vm86_intcall(0x12, &vmf);
 	basemem = vmf.vmf_ax;
 #endif
 	if (basemem > 640) {
 		printf("Preposterous BIOS basemem of %uK, truncating to 640K\n",
 			basemem);
 		basemem = 640;
 	}
 
 	/*
 	 * XXX if biosbasemem is now < 640, there is a `hole'
 	 * between the end of base memory and the start of
 	 * ISA memory.  The hole may be empty or it may
 	 * contain BIOS code or data.  Map it read/write so
 	 * that the BIOS can write to it.  (Memory from 0 to
 	 * the physical end of the kernel is mapped read-only
 	 * to begin with and then parts of it are remapped.
 	 * The parts that aren't remapped form holes that
 	 * remain read-only and are unused by the kernel.
 	 * The base memory area is below the physical end of
 	 * the kernel and right now forms a read-only hole.
 	 * The part of it from PAGE_SIZE to
 	 * (trunc_page(biosbasemem * 1024) - 1) will be
 	 * remapped and used by the kernel later.)
 	 *
 	 * This code is similar to the code used in
 	 * pmap_mapdev, but since no memory needs to be
 	 * allocated we simply change the mapping.
 	 */
 	for (pa = trunc_page(basemem * 1024);
 	     pa < ISA_HOLE_START; pa += PAGE_SIZE) {
 		pte = (pt_entry_t)vtopte(pa + KERNBASE);
 		*pte = pa | PG_RW | PG_V;
 	}
 
 	/*
 	 * if basemem != 640, map pages r/w into vm86 page table so 
 	 * that the bios can scribble on it.
 	 */
 	pte = (pt_entry_t)vm86paddr;
 	for (i = basemem / 4; i < 160; i++)
 		pte[i] = (i << PAGE_SHIFT) | PG_V | PG_RW | PG_U;
 
 #ifndef PC98
 	/*
 	 * map page 1 R/W into the kernel page table so we can use it
 	 * as a buffer.  The kernel will unmap this page later.
 	 */
 	pte = (pt_entry_t)vtopte(KERNBASE + (1 << PAGE_SHIFT));
 	*pte = (1 << PAGE_SHIFT) | PG_RW | PG_V;
 
 	/*
 	 * get memory map with INT 15:E820
 	 */
 	vmc.npages = 0;
 	smap = (void *)vm86_addpage(&vmc, 1, KERNBASE + (1 << PAGE_SHIFT));
 	vm86_getptr(&vmc, (vm_offset_t)smap, &vmf.vmf_es, &vmf.vmf_di);
 
 	physmap_idx = 0;
 	vmf.vmf_ebx = 0;
 	do {
 		vmf.vmf_eax = 0xE820;
 		vmf.vmf_edx = SMAP_SIG;
 		vmf.vmf_ecx = sizeof(struct bios_smap);
 		i = vm86_datacall(0x15, &vmf, &vmc);
 		if (i || vmf.vmf_eax != SMAP_SIG)
 			break;
 		if (boothowto & RB_VERBOSE)
 			printf("SMAP type=%02x base=%08x %08x len=%08x %08x\n",
 				smap->type,
 				*(u_int32_t *)((char *)&smap->base + 4),
 				(u_int32_t)smap->base,
 				*(u_int32_t *)((char *)&smap->length + 4),
 				(u_int32_t)smap->length);
 
 		if (smap->type != 0x01)
 			goto next_run;
 
 		if (smap->length == 0)
 			goto next_run;
 
 		if (smap->base >= 0xffffffff) {
 			printf("%uK of memory above 4GB ignored\n",
 			    (u_int)(smap->length / 1024));
 			goto next_run;
 		}
 
 		for (i = 0; i <= physmap_idx; i += 2) {
 			if (smap->base < physmap[i + 1]) {
 				if (boothowto & RB_VERBOSE)
 					printf(
 	"Overlapping or non-montonic memory region, ignoring second region\n");
 				goto next_run;
 			}
 		}
 
 		if (smap->base == physmap[physmap_idx + 1]) {
 			physmap[physmap_idx + 1] += smap->length;
 			goto next_run;
 		}
 
 		physmap_idx += 2;
 		if (physmap_idx == PHYSMAP_SIZE) {
 			printf(
 		"Too many segments in the physical address map, giving up\n");
 			break;
 		}
 		physmap[physmap_idx] = smap->base;
 		physmap[physmap_idx + 1] = smap->base + smap->length;
 next_run:
 	} while (vmf.vmf_ebx != 0);
 
 	if (physmap[1] != 0)
 		goto physmap_done;
 
 	/*
 	 * If we failed above, try memory map with INT 15:E801
 	 */
 	vmf.vmf_ax = 0xE801;
 	if (vm86_intcall(0x15, &vmf) == 0) {
 		extmem = vmf.vmf_cx + vmf.vmf_dx * 64;
 	} else {
 #if 0
 		vmf.vmf_ah = 0x88;
 		vm86_intcall(0x15, &vmf);
 		extmem = vmf.vmf_ax;
 #else
 		/*
 		 * Prefer the RTC value for extended memory.
 		 */
 		extmem = rtcin(RTC_EXTLO) + (rtcin(RTC_EXTHI) << 8);
 #endif
 	}
 
 	/*
 	 * Special hack for chipsets that still remap the 384k hole when
 	 * there's 16MB of memory - this really confuses people that
 	 * are trying to use bus mastering ISA controllers with the
 	 * "16MB limit"; they only have 16MB, but the remapping puts
 	 * them beyond the limit.
 	 *
 	 * If extended memory is between 15-16MB (16-17MB phys address range),
 	 *	chop it to 15MB.
 	 */
 	if ((extmem > 15 * 1024) && (extmem < 16 * 1024))
 		extmem = 15 * 1024;
 #endif
 
 	physmap[0] = 0;
 	physmap[1] = basemem * 1024;
 	physmap_idx = 2;
 	physmap[physmap_idx] = 0x100000;
 	physmap[physmap_idx + 1] = physmap[physmap_idx] + extmem * 1024;
 
 #ifdef PC98
         if ((under16 != 16 * 1024) && (extmem > 15 * 1024)) {
 		/* 15M - 16M region is cut off, so need to divide chunk */
                 physmap[physmap_idx + 1] = under16 * 1024;
                 physmap_idx += 2;
                 physmap[physmap_idx] = 0x1000000;
                 physmap[physmap_idx + 1] = physmap[2] + extmem * 1024;
         }
 #else
 physmap_done:
 #endif
 	/*
 	 * Now, physmap contains a map of physical memory.
 	 */
 
 #ifdef SMP
 	/* make hole for AP bootstrap code */
 	physmap[1] = mp_bootaddress(physmap[1] / 1024);
 
 	/* look for the MP hardware - needed for apic addresses */
 	i386_mp_probe();
 #endif
 
 	/*
 	 * Maxmem isn't the "maximum memory", it's one larger than the
 	 * highest page of the physical address space.  It should be
 	 * called something like "Maxphyspage".  We may adjust this 
 	 * based on ``hw.physmem'' and the results of the memory test.
 	 */
 	Maxmem = atop(physmap[physmap_idx + 1]);
 
 #ifdef MAXMEM
 	Maxmem = MAXMEM / 4;
 #endif
 
 	/*
 	 * hw.physmem is a size in bytes; we also allow k, m, and g suffixes
 	 * for the appropriate modifiers.  This overrides MAXMEM.
 	 */
 	if ((cp = getenv("hw.physmem")) != NULL) {
 		u_int64_t AllowMem, sanity;
 		char *ep;
 
 		sanity = AllowMem = strtouq(cp, &ep, 0);
 		if ((ep != cp) && (*ep != 0)) {
 			switch(*ep) {
 			case 'g':
 			case 'G':
 				AllowMem <<= 10;
 			case 'm':
 			case 'M':
 				AllowMem <<= 10;
 			case 'k':
 			case 'K':
 				AllowMem <<= 10;
 				break;
 			default:
 				AllowMem = sanity = 0;
 			}
 			if (AllowMem < sanity)
 				AllowMem = 0;
 		}
 		if (AllowMem == 0)
 			printf("Ignoring invalid memory size of '%s'\n", cp);
 		else
 			Maxmem = atop(AllowMem);
 	}
 
 	if (atop(physmap[physmap_idx + 1]) != Maxmem &&
 	    (boothowto & RB_VERBOSE))
 		printf("Physical memory use set to %uK\n", Maxmem * 4);
 
 	/*
 	 * If Maxmem has been increased beyond what the system has detected,
 	 * extend the last memory segment to the new limit.
 	 */ 
 	if (atop(physmap[physmap_idx + 1]) < Maxmem)
 		physmap[physmap_idx + 1] = ptoa(Maxmem);
 
 	/* call pmap initialization to make new kernel address space */
 	pmap_bootstrap(first, 0);
 
 	/*
 	 * Size up each available chunk of physical memory.
 	 */
 	physmap[0] = PAGE_SIZE;		/* mask off page 0 */
 	pa_indx = 0;
 	phys_avail[pa_indx++] = physmap[0];
 	phys_avail[pa_indx] = physmap[0];
 #if 0
 	pte = (pt_entry_t)vtopte(KERNBASE);
 #else
 	pte = (pt_entry_t)CMAP1;
 #endif
 
 	/*
 	 * physmap is in bytes, so when converting to page boundaries,
 	 * round up the start address and round down the end address.
 	 */
 	for (i = 0; i <= physmap_idx; i += 2) {
 		vm_offset_t end;
 
 		end = ptoa(Maxmem);
 		if (physmap[i + 1] < end)
 			end = trunc_page(physmap[i + 1]);
 		for (pa = round_page(physmap[i]); pa < end; pa += PAGE_SIZE) {
 			int tmp, page_bad;
 #if 0
 			int *ptr = 0;
 #else
 			int *ptr = (int *)CADDR1;
 #endif
 
 			/*
 			 * block out kernel memory as not available.
 			 */
 			if (pa >= 0x100000 && pa < first)
 				continue;
 	
 			page_bad = FALSE;
 
 			/*
 			 * map page into kernel: valid, read/write,non-cacheable
 			 */
 #ifdef PC98
 			*pte = pa | PG_V | PG_RW | pg_n;
 #else
 			*pte = pa | PG_V | PG_RW | PG_N;
 #endif
 			invltlb();
 
 			tmp = *(int *)ptr;
 			/*
 			 * Test for alternating 1's and 0's
 			 */
 			*(volatile int *)ptr = 0xaaaaaaaa;
 			if (*(volatile int *)ptr != 0xaaaaaaaa) {
 				page_bad = TRUE;
 			}
 			/*
 			 * Test for alternating 0's and 1's
 			 */
 			*(volatile int *)ptr = 0x55555555;
 			if (*(volatile int *)ptr != 0x55555555) {
 			page_bad = TRUE;
 			}
 			/*
 			 * Test for all 1's
 			 */
 			*(volatile int *)ptr = 0xffffffff;
 			if (*(volatile int *)ptr != 0xffffffff) {
 				page_bad = TRUE;
 			}
 			/*
 			 * Test for all 0's
 			 */
 			*(volatile int *)ptr = 0x0;
 			if (*(volatile int *)ptr != 0x0) {
 				page_bad = TRUE;
 			}
 			/*
 			 * Restore original value.
 			 */
 			*(int *)ptr = tmp;
 
 			/*
 			 * Adjust array of valid/good pages.
 			 */
 			if (page_bad == TRUE) {
 				continue;
 			}
 			/*
 			 * If this good page is a continuation of the
 			 * previous set of good pages, then just increase
 			 * the end pointer. Otherwise start a new chunk.
 			 * Note that "end" points one higher than end,
 			 * making the range >= start and < end.
 			 * If we're also doing a speculative memory
 			 * test and we at or past the end, bump up Maxmem
 			 * so that we keep going. The first bad page
 			 * will terminate the loop.
 			 */
 			if (phys_avail[pa_indx] == pa) {
 				phys_avail[pa_indx] += PAGE_SIZE;
 			} else {
 				pa_indx++;
 				if (pa_indx == PHYS_AVAIL_ARRAY_END) {
 					printf(
 		"Too many holes in the physical address space, giving up\n");
 					pa_indx--;
 					break;
 				}
 				phys_avail[pa_indx++] = pa;	/* start */
 				phys_avail[pa_indx] = pa + PAGE_SIZE;	/* end */
 			}
 			physmem++;
 		}
 	}
 	*pte = 0;
 	invltlb();
 
 	/*
 	 * XXX
 	 * The last chunk must contain at least one page plus the message
 	 * buffer to avoid complicating other code (message buffer address
 	 * calculation, etc.).
 	 */
 	while (phys_avail[pa_indx - 1] + PAGE_SIZE +
 	    round_page(MSGBUF_SIZE) >= phys_avail[pa_indx]) {
 		physmem -= atop(phys_avail[pa_indx] - phys_avail[pa_indx - 1]);
 		phys_avail[pa_indx--] = 0;
 		phys_avail[pa_indx--] = 0;
 	}
 
 	Maxmem = atop(phys_avail[pa_indx]);
 
 	/* Trim off space for the message buffer. */
 	phys_avail[pa_indx] -= round_page(MSGBUF_SIZE);
 
 	avail_end = phys_avail[pa_indx];
 }
 
 void
 init386(first)
 	int first;
 {
 	struct gate_descriptor *gdp;
 	int gsel_tss, metadata_missing, off, x;
 #ifndef SMP
 	/* table descriptors - used to load tables by microp */
 	struct region_descriptor r_gdt, r_idt;
 #endif
 
 	proc0.p_addr = proc0paddr;
 
 	atdevbase = ISA_HOLE_START + KERNBASE;
 
 #ifdef PC98
 	/*
 	 * Initialize DMAC
 	 */
 	pc98_init_dmac();
 #endif
 
 	metadata_missing = 0;
 	if (bootinfo.bi_modulep) {
 		preload_metadata = (caddr_t)bootinfo.bi_modulep + KERNBASE;
 		preload_bootstrap_relocate(KERNBASE);
 	} else {
 		metadata_missing = 1;
 	}
 	if (bootinfo.bi_envp)
 		kern_envp = (caddr_t)bootinfo.bi_envp + KERNBASE;
 
 	/* Init basic tunables, hz etc */
 	init_param();
 
 	/*
 	 * make gdt memory segments, the code segment goes up to end of the
 	 * page with etext in it, the data segment goes to the end of
 	 * the address space
 	 */
 	/*
 	 * XXX text protection is temporarily (?) disabled.  The limit was
 	 * i386_btop(round_page(etext)) - 1.
 	 */
 	gdt_segs[GCODE_SEL].ssd_limit = atop(0 - 1);
 	gdt_segs[GDATA_SEL].ssd_limit = atop(0 - 1);
 #ifdef SMP
 	gdt_segs[GPRIV_SEL].ssd_limit =
 		atop(sizeof(struct privatespace) - 1);
 	gdt_segs[GPRIV_SEL].ssd_base = (int) &SMP_prvspace[0];
 	gdt_segs[GPROC0_SEL].ssd_base =
 		(int) &SMP_prvspace[0].globaldata.gd_common_tss;
 	SMP_prvspace[0].globaldata.gd_prvspace = &SMP_prvspace[0].globaldata;
 #else
 	gdt_segs[GPRIV_SEL].ssd_limit =
 		atop(sizeof(struct globaldata) - 1);
 	gdt_segs[GPRIV_SEL].ssd_base = (int) &__globaldata;
 	gdt_segs[GPROC0_SEL].ssd_base =
 		(int) &__globaldata.gd_common_tss;
 	__globaldata.gd_prvspace = &__globaldata;
 #endif
 
 	for (x = 0; x < NGDT; x++) {
 #ifdef BDE_DEBUGGER
 		/* avoid overwriting db entries with APM ones */
 		if (x >= GAPMCODE32_SEL && x <= GAPMDATA_SEL)
 			continue;
 #endif
 		ssdtosd(&gdt_segs[x], &gdt[x].sd);
 	}
 
 	r_gdt.rd_limit = NGDT * sizeof(gdt[0]) - 1;
 	r_gdt.rd_base =  (int) gdt;
 	lgdt(&r_gdt);
 
 	/* setup curproc so that mutexes work */
 	PCPU_SET(curproc, &proc0);
 	PCPU_SET(spinlocks, NULL);
 
 	LIST_INIT(&proc0.p_contested);
 
 	/*
 	 * Initialize mutexes.
 	 */
 	mtx_init(&Giant, "Giant", MTX_DEF | MTX_RECURSE);
 	mtx_init(&sched_lock, "sched lock", MTX_SPIN | MTX_RECURSE);
 	mtx_init(&proc0.p_mtx, "process lock", MTX_DEF);
 	mtx_init(&clock_lock, "clk", MTX_SPIN | MTX_RECURSE);
 #ifdef SMP
 	mtx_init(&imen_mtx, "imen", MTX_SPIN);
 #endif
 	mtx_lock(&Giant);
 
 	/* make ldt memory segments */
 	/*
 	 * XXX - VM_MAXUSER_ADDRESS is an end address, not a max.  And it
 	 * should be spelled ...MAX_USER...
 	 */
 	ldt_segs[LUCODE_SEL].ssd_limit = atop(VM_MAXUSER_ADDRESS - 1);
 	ldt_segs[LUDATA_SEL].ssd_limit = atop(VM_MAXUSER_ADDRESS - 1);
 	for (x = 0; x < sizeof ldt_segs / sizeof ldt_segs[0]; x++)
 		ssdtosd(&ldt_segs[x], &ldt[x].sd);
 
 	_default_ldt = GSEL(GLDT_SEL, SEL_KPL);
 	lldt(_default_ldt);
 	PCPU_SET(currentldt, _default_ldt);
 
 	/* exceptions */
 	for (x = 0; x < NIDT; x++)
 		setidt(x, &IDTVEC(rsvd), SDT_SYS386TGT, SEL_KPL,
 		    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(0, &IDTVEC(div),  SDT_SYS386TGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(1, &IDTVEC(dbg),  SDT_SYS386IGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(2, &IDTVEC(nmi),  SDT_SYS386TGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
  	setidt(3, &IDTVEC(bpt),  SDT_SYS386IGT, SEL_UPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(4, &IDTVEC(ofl),  SDT_SYS386TGT, SEL_UPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(5, &IDTVEC(bnd),  SDT_SYS386TGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(6, &IDTVEC(ill),  SDT_SYS386TGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(7, &IDTVEC(dna),  SDT_SYS386TGT, SEL_KPL
 	    , GSEL(GCODE_SEL, SEL_KPL));
 	setidt(8, 0,  SDT_SYSTASKGT, SEL_KPL, GSEL(GPANIC_SEL, SEL_KPL));
 	setidt(9, &IDTVEC(fpusegm),  SDT_SYS386TGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(10, &IDTVEC(tss),  SDT_SYS386TGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(11, &IDTVEC(missing),  SDT_SYS386TGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(12, &IDTVEC(stk),  SDT_SYS386TGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(13, &IDTVEC(prot),  SDT_SYS386TGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(14, &IDTVEC(page),  SDT_SYS386IGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(15, &IDTVEC(rsvd),  SDT_SYS386TGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(16, &IDTVEC(fpu),  SDT_SYS386TGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(17, &IDTVEC(align), SDT_SYS386TGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(18, &IDTVEC(mchk),  SDT_SYS386TGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(19, &IDTVEC(xmm), SDT_SYS386TGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
  	setidt(0x80, &IDTVEC(int0x80_syscall), SDT_SYS386TGT, SEL_UPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 
 	r_idt.rd_limit = sizeof(idt0) - 1;
 	r_idt.rd_base = (int) idt;
 	lidt(&r_idt);
 
 	/*
 	 * Initialize the console before we print anything out.
 	 */
 	cninit();
 
 	if (metadata_missing)
 		printf("WARNING: loader(8) metadata is missing!\n");
 
 #ifdef DEV_ISA
 	isa_defaultirq();
 #endif
 
 #ifdef DDB
 	kdb_init();
 	if (boothowto & RB_KDB)
 		Debugger("Boot flags requested debugger");
 #endif
 
 	finishidentcpu();	/* Final stage of CPU initialization */
 	setidt(6, &IDTVEC(ill),  SDT_SYS386TGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(13, &IDTVEC(prot),  SDT_SYS386TGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	initializecpu();	/* Initialize CPU registers */
 
 	/* make an initial tss so cpu can get interrupt stack on syscall! */
 	PCPU_SET(common_tss.tss_esp0,
 	    (int) proc0.p_addr + UPAGES*PAGE_SIZE - 16);
 	PCPU_SET(common_tss.tss_ss0, GSEL(GDATA_SEL, SEL_KPL));
 	gsel_tss = GSEL(GPROC0_SEL, SEL_KPL);
 	private_tss = 0;
 	PCPU_SET(tss_gdt, &gdt[GPROC0_SEL].sd);
 	PCPU_SET(common_tssd, *PCPU_GET(tss_gdt));
 	PCPU_SET(common_tss.tss_ioopt, (sizeof (struct i386tss)) << 16);
 	ltr(gsel_tss);
 
 	dblfault_tss.tss_esp = dblfault_tss.tss_esp0 = dblfault_tss.tss_esp1 =
 	    dblfault_tss.tss_esp2 = (int)&dblfault_stack[sizeof(dblfault_stack)];
 	dblfault_tss.tss_ss = dblfault_tss.tss_ss0 = dblfault_tss.tss_ss1 =
 	    dblfault_tss.tss_ss2 = GSEL(GDATA_SEL, SEL_KPL);
 	dblfault_tss.tss_cr3 = (int)IdlePTD;
 	dblfault_tss.tss_eip = (int)dblfault_handler;
 	dblfault_tss.tss_eflags = PSL_KERNEL;
 	dblfault_tss.tss_ds = dblfault_tss.tss_es =
 	    dblfault_tss.tss_gs = GSEL(GDATA_SEL, SEL_KPL);
 	dblfault_tss.tss_fs = GSEL(GPRIV_SEL, SEL_KPL);
 	dblfault_tss.tss_cs = GSEL(GCODE_SEL, SEL_KPL);
 	dblfault_tss.tss_ldt = GSEL(GLDT_SEL, SEL_KPL);
 
 	vm86_initialize();
 	getmemsize(first);
 
 	/* now running on new page tables, configured,and u/iom is accessible */
 
 	/* Map the message buffer. */
 	for (off = 0; off < round_page(MSGBUF_SIZE); off += PAGE_SIZE)
 		pmap_kenter((vm_offset_t)msgbufp + off, avail_end + off);
 
 	msgbufinit(msgbufp, MSGBUF_SIZE);
 
 	/* make a call gate to reenter kernel with */
 	gdp = &ldt[LSYS5CALLS_SEL].gd;
 
 	x = (int) &IDTVEC(lcall_syscall);
 	gdp->gd_looffset = x;
 	gdp->gd_selector = GSEL(GCODE_SEL,SEL_KPL);
 	gdp->gd_stkcpy = 1;
 	gdp->gd_type = SDT_SYS386CGT;
 	gdp->gd_dpl = SEL_UPL;
 	gdp->gd_p = 1;
 	gdp->gd_hioffset = x >> 16;
 
 	/* XXX does this work? */
 	ldt[LBSDICALLS_SEL] = ldt[LSYS5CALLS_SEL];
 	ldt[LSOL26CALLS_SEL] = ldt[LSYS5CALLS_SEL];
 
 	/* transfer to user mode */
 
 	_ucodesel = LSEL(LUCODE_SEL, SEL_UPL);
 	_udatasel = LSEL(LUDATA_SEL, SEL_UPL);
 
 	/* setup proc 0's pcb */
 	proc0.p_addr->u_pcb.pcb_flags = 0;
 	proc0.p_addr->u_pcb.pcb_cr3 = (int)IdlePTD;
 	proc0.p_addr->u_pcb.pcb_ext = 0;
 	proc0.p_frame = &proc0_tf;
 }
 
 #if defined(I586_CPU) && !defined(NO_F00F_HACK)
 static void f00f_hack(void *unused);
 SYSINIT(f00f_hack, SI_SUB_INTRINSIC, SI_ORDER_FIRST, f00f_hack, NULL);
 
 static void
 f00f_hack(void *unused) {
 	struct gate_descriptor *new_idt;
 #ifndef SMP
 	struct region_descriptor r_idt;
 #endif
 	vm_offset_t tmp;
 
 	if (!has_f00f_bug)
 		return;
 
 	GIANT_REQUIRED;
 
 	printf("Intel Pentium detected, installing workaround for F00F bug\n");
 
 	r_idt.rd_limit = sizeof(idt0) - 1;
 
 	tmp = kmem_alloc(kernel_map, PAGE_SIZE * 2);
 	if (tmp == 0)
 		panic("kmem_alloc returned 0");
 	if (((unsigned int)tmp & (PAGE_SIZE-1)) != 0)
 		panic("kmem_alloc returned non-page-aligned memory");
 	/* Put the first seven entries in the lower page */
 	new_idt = (struct gate_descriptor*)(tmp + PAGE_SIZE - (7*8));
 	bcopy(idt, new_idt, sizeof(idt0));
 	r_idt.rd_base = (int)new_idt;
 	lidt(&r_idt);
 	idt = new_idt;
 	if (vm_map_protect(kernel_map, tmp, tmp + PAGE_SIZE,
 			   VM_PROT_READ, FALSE) != KERN_SUCCESS)
 		panic("vm_map_protect failed");
 	return;
 }
 #endif /* defined(I586_CPU) && !NO_F00F_HACK */
 
 int
 ptrace_set_pc(p, addr)
 	struct proc *p;
 	unsigned long addr;
 {
 	p->p_frame->tf_eip = addr;
 	return (0);
 }
 
 int
 ptrace_single_step(p)
 	struct proc *p;
 {
 	p->p_frame->tf_eflags |= PSL_T;
 	return (0);
 }
 
 int
 fill_regs(p, regs)
 	struct proc *p;
 	struct reg *regs;
 {
 	struct pcb *pcb;
 	struct trapframe *tp;
 
 	tp = p->p_frame;
 	regs->r_fs = tp->tf_fs;
 	regs->r_es = tp->tf_es;
 	regs->r_ds = tp->tf_ds;
 	regs->r_edi = tp->tf_edi;
 	regs->r_esi = tp->tf_esi;
 	regs->r_ebp = tp->tf_ebp;
 	regs->r_ebx = tp->tf_ebx;
 	regs->r_edx = tp->tf_edx;
 	regs->r_ecx = tp->tf_ecx;
 	regs->r_eax = tp->tf_eax;
 	regs->r_eip = tp->tf_eip;
 	regs->r_cs = tp->tf_cs;
 	regs->r_eflags = tp->tf_eflags;
 	regs->r_esp = tp->tf_esp;
 	regs->r_ss = tp->tf_ss;
 	pcb = &p->p_addr->u_pcb;
 	regs->r_gs = pcb->pcb_gs;
 	return (0);
 }
 
 int
 set_regs(p, regs)
 	struct proc *p;
 	struct reg *regs;
 {
 	struct pcb *pcb;
 	struct trapframe *tp;
 
 	tp = p->p_frame;
 	if (!EFL_SECURE(regs->r_eflags, tp->tf_eflags) ||
 	    !CS_SECURE(regs->r_cs))
 		return (EINVAL);
 	tp->tf_fs = regs->r_fs;
 	tp->tf_es = regs->r_es;
 	tp->tf_ds = regs->r_ds;
 	tp->tf_edi = regs->r_edi;
 	tp->tf_esi = regs->r_esi;
 	tp->tf_ebp = regs->r_ebp;
 	tp->tf_ebx = regs->r_ebx;
 	tp->tf_edx = regs->r_edx;
 	tp->tf_ecx = regs->r_ecx;
 	tp->tf_eax = regs->r_eax;
 	tp->tf_eip = regs->r_eip;
 	tp->tf_cs = regs->r_cs;
 	tp->tf_eflags = regs->r_eflags;
 	tp->tf_esp = regs->r_esp;
 	tp->tf_ss = regs->r_ss;
 	pcb = &p->p_addr->u_pcb;
 	pcb->pcb_gs = regs->r_gs;
 	return (0);
 }
 
 #ifdef CPU_ENABLE_SSE
 static void
 fill_fpregs_xmm(sv_xmm, sv_87)
 	struct savexmm *sv_xmm;
 	struct save87 *sv_87;
 {
 	register struct env87 *penv_87 = &sv_87->sv_env;
 	register struct envxmm *penv_xmm = &sv_xmm->sv_env;
 	int i;
 
 	/* FPU control/status */
 	penv_87->en_cw = penv_xmm->en_cw;
 	penv_87->en_sw = penv_xmm->en_sw;
 	penv_87->en_tw = penv_xmm->en_tw;
 	penv_87->en_fip = penv_xmm->en_fip;
 	penv_87->en_fcs = penv_xmm->en_fcs;
 	penv_87->en_opcode = penv_xmm->en_opcode;
 	penv_87->en_foo = penv_xmm->en_foo;
 	penv_87->en_fos = penv_xmm->en_fos;
 
 	/* FPU registers */
 	for (i = 0; i < 8; ++i)
 		sv_87->sv_ac[i] = sv_xmm->sv_fp[i].fp_acc;
 
 	sv_87->sv_ex_sw = sv_xmm->sv_ex_sw;
 }
 
 static void
 set_fpregs_xmm(sv_87, sv_xmm)
 	struct save87 *sv_87;
 	struct savexmm *sv_xmm;
 {
 	register struct env87 *penv_87 = &sv_87->sv_env;
 	register struct envxmm *penv_xmm = &sv_xmm->sv_env;
 	int i;
 
 	/* FPU control/status */
 	penv_xmm->en_cw = penv_87->en_cw;
 	penv_xmm->en_sw = penv_87->en_sw;
 	penv_xmm->en_tw = penv_87->en_tw;
 	penv_xmm->en_fip = penv_87->en_fip;
 	penv_xmm->en_fcs = penv_87->en_fcs;
 	penv_xmm->en_opcode = penv_87->en_opcode;
 	penv_xmm->en_foo = penv_87->en_foo;
 	penv_xmm->en_fos = penv_87->en_fos;
 
 	/* FPU registers */
 	for (i = 0; i < 8; ++i)
 		sv_xmm->sv_fp[i].fp_acc = sv_87->sv_ac[i];
 
 	sv_xmm->sv_ex_sw = sv_87->sv_ex_sw;
 }
 #endif /* CPU_ENABLE_SSE */
 
 int
 fill_fpregs(p, fpregs)
 	struct proc *p;
 	struct fpreg *fpregs;
 {
 #ifdef CPU_ENABLE_SSE
 	if (cpu_fxsr) {
 		fill_fpregs_xmm(&p->p_addr->u_pcb.pcb_save.sv_xmm,
 						(struct save87 *)fpregs);
 		return (0);
 	}
 #endif /* CPU_ENABLE_SSE */
 	bcopy(&p->p_addr->u_pcb.pcb_save.sv_87, fpregs, sizeof *fpregs);
 	return (0);
 }
 
 int
 set_fpregs(p, fpregs)
 	struct proc *p;
 	struct fpreg *fpregs;
 {
 #ifdef CPU_ENABLE_SSE
 	if (cpu_fxsr) {
 		set_fpregs_xmm((struct save87 *)fpregs,
 					   &p->p_addr->u_pcb.pcb_save.sv_xmm);
 		return (0);
 	}
 #endif /* CPU_ENABLE_SSE */
 	bcopy(fpregs, &p->p_addr->u_pcb.pcb_save.sv_87, sizeof *fpregs);
 	return (0);
 }
 
 int
 fill_dbregs(p, dbregs)
 	struct proc *p;
 	struct dbreg *dbregs;
 {
 	struct pcb *pcb;
 
 	if (p == NULL) {
 		dbregs->dr0 = rdr0();
 		dbregs->dr1 = rdr1();
 		dbregs->dr2 = rdr2();
 		dbregs->dr3 = rdr3();
 		dbregs->dr4 = rdr4();
 		dbregs->dr5 = rdr5();
 		dbregs->dr6 = rdr6();
 		dbregs->dr7 = rdr7();
 	}
 	else {
 		pcb = &p->p_addr->u_pcb;
 		dbregs->dr0 = pcb->pcb_dr0;
 		dbregs->dr1 = pcb->pcb_dr1;
 		dbregs->dr2 = pcb->pcb_dr2;
 		dbregs->dr3 = pcb->pcb_dr3;
 		dbregs->dr4 = 0;
 		dbregs->dr5 = 0;
 		dbregs->dr6 = pcb->pcb_dr6;
 		dbregs->dr7 = pcb->pcb_dr7;
 	}
 	return (0);
 }
 
 int
 set_dbregs(p, dbregs)
 	struct proc *p;
 	struct dbreg *dbregs;
 {
 	struct pcb *pcb;
 	int i;
 	u_int32_t mask1, mask2;
 
 	if (p == NULL) {
 		load_dr0(dbregs->dr0);
 		load_dr1(dbregs->dr1);
 		load_dr2(dbregs->dr2);
 		load_dr3(dbregs->dr3);
 		load_dr4(dbregs->dr4);
 		load_dr5(dbregs->dr5);
 		load_dr6(dbregs->dr6);
 		load_dr7(dbregs->dr7);
 	}
 	else {
 		/*
 		 * Don't let an illegal value for dr7 get set.	Specifically,
 		 * check for undefined settings.  Setting these bit patterns
 		 * result in undefined behaviour and can lead to an unexpected
 		 * TRCTRAP.
 		 */
 		for (i = 0, mask1 = 0x3<<16, mask2 = 0x2<<16; i < 8; 
 		     i++, mask1 <<= 2, mask2 <<= 2)
 			if ((dbregs->dr7 & mask1) == mask2)
 				return (EINVAL);
 		
 		pcb = &p->p_addr->u_pcb;
 		
 		/*
 		 * Don't let a process set a breakpoint that is not within the
 		 * process's address space.  If a process could do this, it
 		 * could halt the system by setting a breakpoint in the kernel
 		 * (if ddb was enabled).  Thus, we need to check to make sure
 		 * that no breakpoints are being enabled for addresses outside
 		 * process's address space, unless, perhaps, we were called by
 		 * uid 0.
 		 *
 		 * XXX - what about when the watched area of the user's
 		 * address space is written into from within the kernel
 		 * ... wouldn't that still cause a breakpoint to be generated
 		 * from within kernel mode?
 		 */
 
 		if (suser(p) != 0) {
 			if (dbregs->dr7 & 0x3) {
 				/* dr0 is enabled */
 				if (dbregs->dr0 >= VM_MAXUSER_ADDRESS)
 					return (EINVAL);
 			}
 			
 			if (dbregs->dr7 & (0x3<<2)) {
 				/* dr1 is enabled */
 				if (dbregs->dr1 >= VM_MAXUSER_ADDRESS)
 					return (EINVAL);
 			}
 			
 			if (dbregs->dr7 & (0x3<<4)) {
 				/* dr2 is enabled */
 				if (dbregs->dr2 >= VM_MAXUSER_ADDRESS)
 					return (EINVAL);
 			}
 			
 			if (dbregs->dr7 & (0x3<<6)) {
 				/* dr3 is enabled */
 				if (dbregs->dr3 >= VM_MAXUSER_ADDRESS)
 					return (EINVAL);
 			}
 		}
 
 		pcb->pcb_dr0 = dbregs->dr0;
 		pcb->pcb_dr1 = dbregs->dr1;
 		pcb->pcb_dr2 = dbregs->dr2;
 		pcb->pcb_dr3 = dbregs->dr3;
 		pcb->pcb_dr6 = dbregs->dr6;
 		pcb->pcb_dr7 = dbregs->dr7;
 
 		pcb->pcb_flags |= PCB_DBREGS;
 	}
 
 	return (0);
 }
 
 /*
  * Return > 0 if a hardware breakpoint has been hit, and the
  * breakpoint was in user space.  Return 0, otherwise.
  */
 int
 user_dbreg_trap(void)
 {
         u_int32_t dr7, dr6; /* debug registers dr6 and dr7 */
         u_int32_t bp;       /* breakpoint bits extracted from dr6 */
         int nbp;            /* number of breakpoints that triggered */
         caddr_t addr[4];    /* breakpoint addresses */
         int i;
         
         dr7 = rdr7();
         if ((dr7 & 0x000000ff) == 0) {
                 /*
                  * all GE and LE bits in the dr7 register are zero,
                  * thus the trap couldn't have been caused by the
                  * hardware debug registers
                  */
                 return 0;
         }
 
         nbp = 0;
         dr6 = rdr6();
         bp = dr6 & 0x0000000f;
 
         if (!bp) {
                 /*
                  * None of the breakpoint bits are set meaning this
                  * trap was not caused by any of the debug registers
                  */
                 return 0;
         }
 
         /*
          * at least one of the breakpoints were hit, check to see
          * which ones and if any of them are user space addresses
          */
 
         if (bp & 0x01) {
                 addr[nbp++] = (caddr_t)rdr0();
         }
         if (bp & 0x02) {
                 addr[nbp++] = (caddr_t)rdr1();
         }
         if (bp & 0x04) {
                 addr[nbp++] = (caddr_t)rdr2();
         }
         if (bp & 0x08) {
                 addr[nbp++] = (caddr_t)rdr3();
         }
 
         for (i=0; i<nbp; i++) {
                 if (addr[i] <
                     (caddr_t)VM_MAXUSER_ADDRESS) {
                         /*
                          * addr[i] is in user space
                          */
                         return nbp;
                 }
         }
 
         /*
          * None of the breakpoints are in user space.
          */
         return 0;
 }
 
 
 #ifndef DDB
 void
 Debugger(const char *msg)
 {
 	printf("Debugger(\"%s\") called.\n", msg);
 }
 #endif /* no DDB */
 
 #include <sys/disklabel.h>
 
 /*
  * Determine the size of the transfer, and make sure it is
  * within the boundaries of the partition. Adjust transfer
  * if needed, and signal errors or early completion.
  */
 int
 bounds_check_with_label(struct bio *bp, struct disklabel *lp, int wlabel)
 {
         struct partition *p = lp->d_partitions + dkpart(bp->bio_dev);
         int labelsect = lp->d_partitions[0].p_offset;
         int maxsz = p->p_size,
                 sz = (bp->bio_bcount + DEV_BSIZE - 1) >> DEV_BSHIFT;
 
         /* overwriting disk label ? */
         /* XXX should also protect bootstrap in first 8K */
         if (bp->bio_blkno + p->p_offset <= LABELSECTOR + labelsect &&
 #if LABELSECTOR != 0
             bp->bio_blkno + p->p_offset + sz > LABELSECTOR + labelsect &&
 #endif
             (bp->bio_cmd == BIO_WRITE) && wlabel == 0) {
                 bp->bio_error = EROFS;
                 goto bad;
         }
 
 #if     defined(DOSBBSECTOR) && defined(notyet)
         /* overwriting master boot record? */
         if (bp->bio_blkno + p->p_offset <= DOSBBSECTOR &&
             (bp->bio_cmd == BIO_WRITE) && wlabel == 0) {
                 bp->bio_error = EROFS;
                 goto bad;
         }
 #endif
 
         /* beyond partition? */
         if (bp->bio_blkno < 0 || bp->bio_blkno + sz > maxsz) {
                 /* if exactly at end of disk, return an EOF */
                 if (bp->bio_blkno == maxsz) {
                         bp->bio_resid = bp->bio_bcount;
                         return(0);
                 }
                 /* or truncate if part of it fits */
                 sz = maxsz - bp->bio_blkno;
                 if (sz <= 0) {
                         bp->bio_error = EINVAL;
                         goto bad;
                 }
                 bp->bio_bcount = sz << DEV_BSHIFT;
         }
 
         bp->bio_pblkno = bp->bio_blkno + p->p_offset;
         return(1);
 
 bad:
         bp->bio_flags |= BIO_ERROR;
         return(-1);
 }
 
 #ifdef DDB
 
 /*
  * Provide inb() and outb() as functions.  They are normally only
  * available as macros calling inlined functions, thus cannot be
  * called inside DDB.
  *
  * The actual code is stolen from <machine/cpufunc.h>, and de-inlined.
  */
 
 #undef inb
 #undef outb
 
 /* silence compiler warnings */
 u_char inb(u_int);
 void outb(u_int, u_char);
 
 u_char
 inb(u_int port)
 {
 	u_char	data;
 	/*
 	 * We use %%dx and not %1 here because i/o is done at %dx and not at
 	 * %edx, while gcc generates inferior code (movw instead of movl)
 	 * if we tell it to load (u_short) port.
 	 */
 	__asm __volatile("inb %%dx,%0" : "=a" (data) : "d" (port));
 	return (data);
 }
 
 void
 outb(u_int port, u_char data)
 {
 	u_char	al;
 	/*
 	 * Use an unnecessary assignment to help gcc's register allocator.
 	 * This make a large difference for gcc-1.40 and a tiny difference
 	 * for gcc-2.6.0.  For gcc-1.40, al had to be ``asm("ax")'' for
 	 * best results.  gcc-2.6.0 can't handle this.
 	 */
 	al = data;
 	__asm __volatile("outb %0,%%dx" : : "a" (al), "d" (port));
 }
 
 #endif /* DDB */
Index: head/sys/pc98/pc98/machdep.c
===================================================================
--- head/sys/pc98/pc98/machdep.c	(revision 82308)
+++ head/sys/pc98/pc98/machdep.c	(revision 82309)
@@ -1,2594 +1,2598 @@
 /*-
  * Copyright (c) 1992 Terrence R. Lambert.
  * Copyright (c) 1982, 1987, 1990 The Regents of the University of California.
  * All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * William Jolitz.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	from: @(#)machdep.c	7.4 (Berkeley) 6/3/91
  * $FreeBSD$
  */
 
 #include "opt_atalk.h"
 #include "opt_compat.h"
 #include "opt_cpu.h"
 #include "opt_ddb.h"
 #include "opt_inet.h"
 #include "opt_ipx.h"
 #include "opt_isa.h"
 #include "opt_maxmem.h"
 #include "opt_msgbuf.h"
 #include "opt_npx.h"
 #include "opt_perfmon.h"
+#include "opt_upages.h"
 /* #include "opt_userconfig.h" */
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/sysproto.h>
 #include <sys/signalvar.h>
 #include <sys/kernel.h>
 #include <sys/ktr.h>
 #include <sys/linker.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mutex.h>
 #include <sys/pcpu.h>
 #include <sys/proc.h>
 #include <sys/bio.h>
 #include <sys/buf.h>
 #include <sys/reboot.h>
 #include <sys/smp.h>
 #include <sys/callout.h>
 #include <sys/msgbuf.h>
 #include <sys/sysent.h>
 #include <sys/sysctl.h>
 #include <sys/vmmeter.h>
 #include <sys/bus.h>
 #include <sys/eventhandler.h>
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <sys/lock.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_object.h>
 #include <vm/vm_page.h>
 #include <vm/vm_map.h>
 #include <vm/vm_pager.h>
 #include <vm/vm_extern.h>
 
 #include <sys/user.h>
 #include <sys/exec.h>
 #include <sys/cons.h>
 
 #include <ddb/ddb.h>
 
 #include <net/netisr.h>
 
 #include <machine/cpu.h>
 #include <machine/cputypes.h>
 #include <machine/reg.h>
 #include <machine/clock.h>
 #include <machine/specialreg.h>
 #include <machine/bootinfo.h>
 #include <machine/md_var.h>
 #include <machine/pc/bios.h>
 #include <machine/pcb_ext.h>		/* pcb.h included via sys/user.h */
 #include <machine/globals.h>
 #ifdef PERFMON
 #include <machine/perfmon.h>
+#endif
+#ifdef SMP
+#include <machine/privatespace.h>
 #endif
 
 #include <i386/isa/icu.h>
 #include <i386/isa/intr_machdep.h>
 #ifdef PC98
 #include <pc98/pc98/pc98_machdep.h>
 #include <pc98/pc98/pc98.h>
 #else
 #include <isa/rtc.h>
 #endif
 #include <machine/vm86.h>
 #include <sys/ptrace.h>
 #include <machine/sigframe.h>
 
 extern void init386 __P((int first));
 extern void dblfault_handler __P((void));
 
 extern void printcpuinfo(void);	/* XXX header file */
 extern void earlysetcpuclass(void);	/* same header file */
 extern void finishidentcpu(void);
 extern void panicifcpuunsupported(void);
 extern void initializecpu(void);
 
 #define	CS_SECURE(cs)		(ISPL(cs) == SEL_UPL)
 #define	EFL_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
 
 static void cpu_startup __P((void *));
 #ifdef CPU_ENABLE_SSE
 static void set_fpregs_xmm __P((struct save87 *, struct savexmm *));
 static void fill_fpregs_xmm __P((struct savexmm *, struct save87 *));
 #endif /* CPU_ENABLE_SSE */
 SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL)
 
 #ifdef PC98
 int	need_pre_dma_flush;	/* If 1, use wbinvd befor DMA transfer. */
 int	need_post_dma_flush;	/* If 1, use invd after DMA transfer. */
 #endif
 
 int	_udatasel, _ucodesel;
 u_int	atdevbase;
 
 #if defined(SWTCH_OPTIM_STATS)
 extern int swtch_optim_stats;
 SYSCTL_INT(_debug, OID_AUTO, swtch_optim_stats,
 	CTLFLAG_RD, &swtch_optim_stats, 0, "");
 SYSCTL_INT(_debug, OID_AUTO, tlb_flush_count,
 	CTLFLAG_RD, &tlb_flush_count, 0, "");
 #endif
 
 #ifdef PC98
 static int	ispc98 = 1;
 #else
 static int	ispc98 = 0;
 #endif
 SYSCTL_INT(_machdep, OID_AUTO, ispc98, CTLFLAG_RD, &ispc98, 0, "");
 
 int physmem = 0;
 int cold = 1;
 
 #ifdef COMPAT_43
 static void osendsig __P((sig_t catcher, int sig, sigset_t *mask, u_long code));
 #endif
 
 static int
 sysctl_hw_physmem(SYSCTL_HANDLER_ARGS)
 {
 	int error = sysctl_handle_int(oidp, 0, ctob(physmem), req);
 	return (error);
 }
 
 SYSCTL_PROC(_hw, HW_PHYSMEM, physmem, CTLTYPE_INT|CTLFLAG_RD,
 	0, 0, sysctl_hw_physmem, "IU", "");
 
 static int
 sysctl_hw_usermem(SYSCTL_HANDLER_ARGS)
 {
 	int error = sysctl_handle_int(oidp, 0,
 		ctob(physmem - cnt.v_wire_count), req);
 	return (error);
 }
 
 SYSCTL_PROC(_hw, HW_USERMEM, usermem, CTLTYPE_INT|CTLFLAG_RD,
 	0, 0, sysctl_hw_usermem, "IU", "");
 
 static int
 sysctl_hw_availpages(SYSCTL_HANDLER_ARGS)
 {
 	int error = sysctl_handle_int(oidp, 0,
 		i386_btop(avail_end - avail_start), req);
 	return (error);
 }
 
 SYSCTL_PROC(_hw, OID_AUTO, availpages, CTLTYPE_INT|CTLFLAG_RD,
 	0, 0, sysctl_hw_availpages, "I", "");
 
 int Maxmem = 0;
 #ifdef PC98
 int Maxmem_under16M = 0;
 #endif
 long dumplo;
 
 vm_offset_t phys_avail[10];
 
 /* must be 2 less so 0 0 can signal end of chunks */
 #define PHYS_AVAIL_ARRAY_END ((sizeof(phys_avail) / sizeof(vm_offset_t)) - 2)
 
 struct kva_md_info kmi;
 
 static struct trapframe proc0_tf;
 #ifndef SMP
 static struct globaldata __globaldata;
 #endif
 
 struct mtx sched_lock;
 struct mtx Giant;
 
 static void
 cpu_startup(dummy)
 	void *dummy;
 {
 	/*
 	 * Good {morning,afternoon,evening,night}.
 	 */
 	earlysetcpuclass();
 	startrtclock();
 	printcpuinfo();
 	panicifcpuunsupported();
 #ifdef PERFMON
 	perfmon_init();
 #endif
 	printf("real memory  = %u (%uK bytes)\n", ptoa(Maxmem),
 	    ptoa(Maxmem) / 1024);
 	/*
 	 * Display any holes after the first chunk of extended memory.
 	 */
 	if (bootverbose) {
 		int indx;
 
 		printf("Physical memory chunk(s):\n");
 		for (indx = 0; phys_avail[indx + 1] != 0; indx += 2) {
 			unsigned int size1;
 
 			size1 = phys_avail[indx + 1] - phys_avail[indx];
 			printf("0x%08x - 0x%08x, %u bytes (%u pages)\n",
 			    phys_avail[indx], phys_avail[indx + 1] - 1, size1,
 			    size1 / PAGE_SIZE);
 		}
 	}
 
 	vm_ksubmap_init(&kmi);
 
 #if 0
 	/*
 	 * Calculate callout wheel size
 	 */
 	for (callwheelsize = 1, callwheelbits = 0;
 	     callwheelsize < ncallout;
 	     callwheelsize <<= 1, ++callwheelbits)
 		;
 	callwheelmask = callwheelsize - 1;
 
 	/*
 	 * Allocate space for system data structures.
 	 * The first available kernel virtual address is in "v".
 	 * As pages of kernel virtual memory are allocated, "v" is incremented.
 	 * As pages of memory are allocated and cleared,
 	 * "firstaddr" is incremented.
 	 * An index into the kernel page table corresponding to the
 	 * virtual memory address maintained in "v" is kept in "mapaddr".
 	 */
 
 	/*
 	 * Make two passes.  The first pass calculates how much memory is
 	 * needed and allocates it.  The second pass assigns virtual
 	 * addresses to the various data structures.
 	 */
 	firstaddr = 0;
 again:
 	v = (caddr_t)firstaddr;
 
 #define	valloc(name, type, num) \
 	    (name) = (type *)v; v = (caddr_t)((name)+(num))
 #define	valloclim(name, type, num, lim) \
 	    (name) = (type *)v; v = (caddr_t)((lim) = ((name)+(num)))
 
 	valloc(callout, struct callout, ncallout);
 	valloc(callwheel, struct callout_tailq, callwheelsize);
 
 	/*
 	 * Discount the physical memory larger than the size of kernel_map
 	 * to avoid eating up all of KVA space.
 	 */
 	if (kernel_map->first_free == NULL) {
 		printf("Warning: no free entries in kernel_map.\n");
 		physmem_est = physmem;
 	} else {
 		physmem_est = min(physmem, btoc(kernel_map->max_offset -
 		    kernel_map->min_offset));
 	}
 
 	/*
 	 * The nominal buffer size (and minimum KVA allocation) is BKVASIZE.
 	 * For the first 64MB of ram nominally allocate sufficient buffers to
 	 * cover 1/4 of our ram.  Beyond the first 64MB allocate additional
 	 * buffers to cover 1/20 of our ram over 64MB.  When auto-sizing
 	 * the buffer cache we limit the eventual kva reservation to
 	 * maxbcache bytes.
 	 *
 	 * factor represents the 1/4 x ram conversion.
 	 */
 	if (nbuf == 0) {
 		int factor = 4 * BKVASIZE / PAGE_SIZE;
 
 		nbuf = 50;
 		if (physmem_est > 1024)
 			nbuf += min((physmem_est - 1024) / factor,
 			    16384 / factor);
 		if (physmem_est > 16384)
 			nbuf += (physmem_est - 16384) * 2 / (factor * 5);
 
 		if (maxbcache && nbuf > maxbcache / BKVASIZE)
 			nbuf = maxbcache / BKVASIZE;
 	}
 
 	/*
 	 * Do not allow the buffer_map to be more then 1/2 the size of the
 	 * kernel_map.
 	 */
 	if (nbuf > (kernel_map->max_offset - kernel_map->min_offset) / 
 	    (BKVASIZE * 2)) {
 		nbuf = (kernel_map->max_offset - kernel_map->min_offset) / 
 		    (BKVASIZE * 2);
 		printf("Warning: nbufs capped at %d\n", nbuf);
 	}
 
 	nswbuf = max(min(nbuf/4, 256), 16);
 
 	valloc(swbuf, struct buf, nswbuf);
 	valloc(buf, struct buf, nbuf);
 	v = bufhashinit(v);
 
 	/*
 	 * End of first pass, size has been calculated so allocate memory
 	 */
 	if (firstaddr == 0) {
 		size = (vm_size_t)(v - firstaddr);
 		firstaddr = (int)kmem_alloc(kernel_map, round_page(size));
 		if (firstaddr == 0)
 			panic("startup: no room for tables");
 		goto again;
 	}
 
 	/*
 	 * End of second pass, addresses have been assigned
 	 */
 	if ((vm_size_t)(v - firstaddr) != size)
 		panic("startup: table size inconsistency");
 
 	clean_map = kmem_suballoc(kernel_map, &clean_sva, &clean_eva,
 			(nbuf*BKVASIZE) + (nswbuf*MAXPHYS) + pager_map_size);
 	buffer_map = kmem_suballoc(clean_map, &buffer_sva, &buffer_eva,
 				(nbuf*BKVASIZE));
 	buffer_map->system_map = 1;
 	pager_map = kmem_suballoc(clean_map, &pager_sva, &pager_eva,
 				(nswbuf*MAXPHYS) + pager_map_size);
 	pager_map->system_map = 1;
 	exec_map = kmem_suballoc(kernel_map, &minaddr, &maxaddr,
 				(16*(ARG_MAX+(PAGE_SIZE*3))));
 
 	/*
 	 * XXX: Mbuf system machine-specific initializations should
 	 *      go here, if anywhere.
 	 */
 
 	/*
 	 * Initialize callouts
 	 */
 	SLIST_INIT(&callfree);
 	for (i = 0; i < ncallout; i++) {
 		callout_init(&callout[i], 0);
 		callout[i].c_flags = CALLOUT_LOCAL_ALLOC;
 		SLIST_INSERT_HEAD(&callfree, &callout[i], c_links.sle);
 	}
 
 	for (i = 0; i < callwheelsize; i++) {
 		TAILQ_INIT(&callwheel[i]);
 	}
 
 	mtx_init(&callout_lock, "callout", MTX_SPIN | MTX_RECURSE);
 #endif
 
 #if defined(USERCONFIG)
 	userconfig();
 	cninit();		/* the preferred console may have changed */
 #endif
 
 	printf("avail memory = %u (%uK bytes)\n", ptoa(cnt.v_free_count),
 	    ptoa(cnt.v_free_count) / 1024);
 
 	/*
 	 * Set up buffers, so they can be used to read disk labels.
 	 */
 	bufinit();
 	vm_pager_bufferinit();
 
 	globaldata_register(GLOBALDATA);
 #ifndef SMP
 	/* For SMP, we delay the cpu_setregs() until after SMP startup. */
 	cpu_setregs();
 #endif
 }
 
 /*
  * Send an interrupt to process.
  *
  * Stack is set up to allow sigcode stored
  * at top to call routine, followed by kcall
  * to sigreturn routine below.  After sigreturn
  * resets the signal mask, the stack, and the
  * frame pointer, it returns to the user
  * specified pc, psl.
  */
 #ifdef COMPAT_43
 static void
 osendsig(catcher, sig, mask, code)
 	sig_t catcher;
 	int sig;
 	sigset_t *mask;
 	u_long code;
 {
 	struct osigframe sf;
 	struct osigframe *fp;
 	struct proc *p;
 	struct sigacts *psp;
 	struct trapframe *regs;
 	int oonstack;
 
 	p = curproc;
 	PROC_LOCK(p);
 	psp = p->p_sigacts;
 	regs = p->p_frame;
 	oonstack = sigonstack(regs->tf_esp);
 
 	/* Allocate and validate space for the signal handler context. */
 	if ((p->p_flag & P_ALTSTACK) && !oonstack &&
 	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
 		fp = (struct osigframe *)(p->p_sigstk.ss_sp +
 		    p->p_sigstk.ss_size - sizeof(struct osigframe));
 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
 		p->p_sigstk.ss_flags |= SS_ONSTACK;
 #endif
 	} else
 		fp = (struct osigframe *)regs->tf_esp - 1;
 	PROC_UNLOCK(p);
 
 	/*
 	 * grow_stack() will return 0 if *fp does not fit inside the stack
 	 * and the stack can not be grown.
 	 * useracc() will return FALSE if access is denied.
 	 */
 	if (grow_stack(p, (int)fp) == 0 ||
 	    !useracc((caddr_t)fp, sizeof(*fp), VM_PROT_WRITE)) {
 		/*
 		 * Process has trashed its stack; give it an illegal
 		 * instruction to halt it in its tracks.
 		 */
 		PROC_LOCK(p);
 		SIGACTION(p, SIGILL) = SIG_DFL;
 		SIGDELSET(p->p_sigignore, SIGILL);
 		SIGDELSET(p->p_sigcatch, SIGILL);
 		SIGDELSET(p->p_sigmask, SIGILL);
 		psignal(p, SIGILL);
 		PROC_UNLOCK(p);
 		return;
 	}
 
 	/* Translate the signal if appropriate. */
 	if (p->p_sysent->sv_sigtbl && sig <= p->p_sysent->sv_sigsize)
 		sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
 
 	/* Build the argument list for the signal handler. */
 	sf.sf_signum = sig;
 	sf.sf_scp = (register_t)&fp->sf_siginfo.si_sc;
 	PROC_LOCK(p);
 	if (SIGISMEMBER(p->p_sigacts->ps_siginfo, sig)) {
 		/* Signal handler installed with SA_SIGINFO. */
 		sf.sf_arg2 = (register_t)&fp->sf_siginfo;
 		sf.sf_siginfo.si_signo = sig;
 		sf.sf_siginfo.si_code = code;
 		sf.sf_ahu.sf_action = (__osiginfohandler_t *)catcher;
 	} else {
 		/* Old FreeBSD-style arguments. */
 		sf.sf_arg2 = code;
 		sf.sf_addr = regs->tf_err;
 		sf.sf_ahu.sf_handler = catcher;
 	}
 	PROC_UNLOCK(p);
 
 	/* Save most if not all of trap frame. */
 	sf.sf_siginfo.si_sc.sc_eax = regs->tf_eax;
 	sf.sf_siginfo.si_sc.sc_ebx = regs->tf_ebx;
 	sf.sf_siginfo.si_sc.sc_ecx = regs->tf_ecx;
 	sf.sf_siginfo.si_sc.sc_edx = regs->tf_edx;
 	sf.sf_siginfo.si_sc.sc_esi = regs->tf_esi;
 	sf.sf_siginfo.si_sc.sc_edi = regs->tf_edi;
 	sf.sf_siginfo.si_sc.sc_cs = regs->tf_cs;
 	sf.sf_siginfo.si_sc.sc_ds = regs->tf_ds;
 	sf.sf_siginfo.si_sc.sc_ss = regs->tf_ss;
 	sf.sf_siginfo.si_sc.sc_es = regs->tf_es;
 	sf.sf_siginfo.si_sc.sc_fs = regs->tf_fs;
 	sf.sf_siginfo.si_sc.sc_gs = rgs();
 	sf.sf_siginfo.si_sc.sc_isp = regs->tf_isp;
 
 	/* Build the signal context to be used by osigreturn(). */
 	sf.sf_siginfo.si_sc.sc_onstack = (oonstack) ? 1 : 0;
 	SIG2OSIG(*mask, sf.sf_siginfo.si_sc.sc_mask);
 	sf.sf_siginfo.si_sc.sc_sp = regs->tf_esp;
 	sf.sf_siginfo.si_sc.sc_fp = regs->tf_ebp;
 	sf.sf_siginfo.si_sc.sc_pc = regs->tf_eip;
 	sf.sf_siginfo.si_sc.sc_ps = regs->tf_eflags;
 	sf.sf_siginfo.si_sc.sc_trapno = regs->tf_trapno;
 	sf.sf_siginfo.si_sc.sc_err = regs->tf_err;
 
 	/*
 	 * If we're a vm86 process, we want to save the segment registers.
 	 * We also change eflags to be our emulated eflags, not the actual
 	 * eflags.
 	 */
 	if (regs->tf_eflags & PSL_VM) {
 		/* XXX confusing names: `tf' isn't a trapframe; `regs' is. */
 		struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs;
 		struct vm86_kernel *vm86 = &p->p_addr->u_pcb.pcb_ext->ext_vm86;
 
 		sf.sf_siginfo.si_sc.sc_gs = tf->tf_vm86_gs;
 		sf.sf_siginfo.si_sc.sc_fs = tf->tf_vm86_fs;
 		sf.sf_siginfo.si_sc.sc_es = tf->tf_vm86_es;
 		sf.sf_siginfo.si_sc.sc_ds = tf->tf_vm86_ds;
 
 		if (vm86->vm86_has_vme == 0)
 			sf.sf_siginfo.si_sc.sc_ps =
 			    (tf->tf_eflags & ~(PSL_VIF | PSL_VIP)) |
 			    (vm86->vm86_eflags & (PSL_VIF | PSL_VIP));
 
 		/* See sendsig() for comments. */
 		tf->tf_eflags &= ~(PSL_VM | PSL_NT | PSL_T | PSL_VIF | PSL_VIP);
 	}
 
 	/* Copy the sigframe out to the user's stack. */
 	if (copyout(&sf, fp, sizeof(*fp)) != 0) {
 		/*
 		 * Something is wrong with the stack pointer.
 		 * ...Kill the process.
 		 */
 		PROC_LOCK(p);
 		sigexit(p, SIGILL);
 		/* NOTREACHED */
 	}
 
 	regs->tf_esp = (int)fp;
 	regs->tf_eip = PS_STRINGS - szosigcode;
 	regs->tf_cs = _ucodesel;
 	regs->tf_ds = _udatasel;
 	regs->tf_es = _udatasel;
 	regs->tf_fs = _udatasel;
 	load_gs(_udatasel);
 	regs->tf_ss = _udatasel;
 }
 #endif
 
 void
 sendsig(catcher, sig, mask, code)
 	sig_t catcher;
 	int sig;
 	sigset_t *mask;
 	u_long code;
 {
 	struct sigframe sf;
 	struct proc *p;
 	struct sigacts *psp;
 	struct trapframe *regs;
 	struct sigframe *sfp;
 	int oonstack;
 
 	p = curproc;
 	PROC_LOCK(p);
 	psp = p->p_sigacts;
 #ifdef COMPAT_43
 	if (SIGISMEMBER(psp->ps_osigset, sig)) {
 		PROC_UNLOCK(p);
 		osendsig(catcher, sig, mask, code);
 		return;
 	}
 #endif
 	regs = p->p_frame;
 	oonstack = sigonstack(regs->tf_esp);
 
 	/* Save user context. */
 	bzero(&sf, sizeof(sf));
 	sf.sf_uc.uc_sigmask = *mask;
 	sf.sf_uc.uc_stack = p->p_sigstk;
 	sf.sf_uc.uc_stack.ss_flags = (p->p_flag & P_ALTSTACK)
 	    ? ((oonstack) ? SS_ONSTACK : 0) : SS_DISABLE;
 	sf.sf_uc.uc_mcontext.mc_onstack = (oonstack) ? 1 : 0;
 	sf.sf_uc.uc_mcontext.mc_gs = rgs();
 	bcopy(regs, &sf.sf_uc.uc_mcontext.mc_fs, sizeof(*regs));
 
 	/* Allocate and validate space for the signal handler context. */
 	if ((p->p_flag & P_ALTSTACK) != 0 && !oonstack &&
 	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
 		sfp = (struct sigframe *)(p->p_sigstk.ss_sp +
 		    p->p_sigstk.ss_size - sizeof(struct sigframe));
 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
 		p->p_sigstk.ss_flags |= SS_ONSTACK;
 #endif
 	} else
 		sfp = (struct sigframe *)regs->tf_esp - 1;
 	PROC_UNLOCK(p);
 
 	/*
 	 * grow_stack() will return 0 if *sfp does not fit inside the stack
 	 * and the stack can not be grown.
 	 * useracc() will return FALSE if access is denied.
 	 */
 	if (grow_stack(p, (int)sfp) == 0 ||
 	    !useracc((caddr_t)sfp, sizeof(*sfp), VM_PROT_WRITE)) {
 		/*
 		 * Process has trashed its stack; give it an illegal
 		 * instruction to halt it in its tracks.
 		 */
 #ifdef DEBUG
 		printf("process %d has trashed its stack\n", p->p_pid);
 #endif
 		PROC_LOCK(p);
 		SIGACTION(p, SIGILL) = SIG_DFL;
 		SIGDELSET(p->p_sigignore, SIGILL);
 		SIGDELSET(p->p_sigcatch, SIGILL);
 		SIGDELSET(p->p_sigmask, SIGILL);
 		psignal(p, SIGILL);
 		PROC_UNLOCK(p);
 		return;
 	}
 
 	/* Translate the signal if appropriate. */
 	if (p->p_sysent->sv_sigtbl && sig <= p->p_sysent->sv_sigsize)
 		sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
 
 	/* Build the argument list for the signal handler. */
 	sf.sf_signum = sig;
 	sf.sf_ucontext = (register_t)&sfp->sf_uc;
 	PROC_LOCK(p);
 	if (SIGISMEMBER(p->p_sigacts->ps_siginfo, sig)) {
 		/* Signal handler installed with SA_SIGINFO. */
 		sf.sf_siginfo = (register_t)&sfp->sf_si;
 		sf.sf_ahu.sf_action = (__siginfohandler_t *)catcher;
 
 		/* Fill siginfo structure. */
 		sf.sf_si.si_signo = sig;
 		sf.sf_si.si_code = code;
 		sf.sf_si.si_addr = (void *)regs->tf_err;
 	} else {
 		/* Old FreeBSD-style arguments. */
 		sf.sf_siginfo = code;
 		sf.sf_addr = regs->tf_err;
 		sf.sf_ahu.sf_handler = catcher;
 	}
 	PROC_UNLOCK(p);
 
 	/*
 	 * If we're a vm86 process, we want to save the segment registers.
 	 * We also change eflags to be our emulated eflags, not the actual
 	 * eflags.
 	 */
 	if (regs->tf_eflags & PSL_VM) {
 		struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs;
 		struct vm86_kernel *vm86 = &p->p_addr->u_pcb.pcb_ext->ext_vm86;
 
 		sf.sf_uc.uc_mcontext.mc_gs = tf->tf_vm86_gs;
 		sf.sf_uc.uc_mcontext.mc_fs = tf->tf_vm86_fs;
 		sf.sf_uc.uc_mcontext.mc_es = tf->tf_vm86_es;
 		sf.sf_uc.uc_mcontext.mc_ds = tf->tf_vm86_ds;
 
 		if (vm86->vm86_has_vme == 0)
 			sf.sf_uc.uc_mcontext.mc_eflags =
 			    (tf->tf_eflags & ~(PSL_VIF | PSL_VIP)) |
 			    (vm86->vm86_eflags & (PSL_VIF | PSL_VIP));
 
 		/*
 		 * We should never have PSL_T set when returning from vm86
 		 * mode.  It may be set here if we deliver a signal before
 		 * getting to vm86 mode, so turn it off.
 		 *
 		 * Clear PSL_NT to inhibit T_TSSFLT faults on return from
 		 * syscalls made by the signal handler.  This just avoids
 		 * wasting time for our lazy fixup of such faults.  PSL_NT
 		 * does nothing in vm86 mode, but vm86 programs can set it
 		 * almost legitimately in probes for old cpu types.
 		 */
 		tf->tf_eflags &= ~(PSL_VM | PSL_NT | PSL_T | PSL_VIF | PSL_VIP);
 	}
 
 	/* Copy the sigframe out to the user's stack. */
 	if (copyout(&sf, sfp, sizeof(*sfp)) != 0) {
 		/*
 		 * Something is wrong with the stack pointer.
 		 * ...Kill the process.
 		 */
 		PROC_LOCK(p);
 		sigexit(p, SIGILL);
 		/* NOTREACHED */
 	}
 
 	regs->tf_esp = (int)sfp;
 	regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode);
 	regs->tf_cs = _ucodesel;
 	regs->tf_ds = _udatasel;
 	regs->tf_es = _udatasel;
 	regs->tf_fs = _udatasel;
 	regs->tf_ss = _udatasel;
 }
 
 /*
  * System call to cleanup state after a signal
  * has been taken.  Reset signal mask and
  * stack state from context left by sendsig (above).
  * Return to previous pc and psl as specified by
  * context left by sendsig. Check carefully to
  * make sure that the user has not modified the
  * state to gain improper privileges.
  */
 #ifdef COMPAT_43
 int
 osigreturn(p, uap)
 	struct proc *p;
 	struct osigreturn_args /* {
 		struct osigcontext *sigcntxp;
 	} */ *uap;
 {
 	struct trapframe *regs;
 	struct osigcontext *scp;
 	int eflags;
 
 	regs = p->p_frame;
 	scp = uap->sigcntxp;
 	if (!useracc((caddr_t)scp, sizeof(*scp), VM_PROT_READ))
 		return (EFAULT);
 	eflags = scp->sc_ps;
 	if (eflags & PSL_VM) {
 		struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs;
 		struct vm86_kernel *vm86;
 
 		/*
 		 * if pcb_ext == 0 or vm86_inited == 0, the user hasn't
 		 * set up the vm86 area, and we can't enter vm86 mode.
 		 */
 		if (p->p_addr->u_pcb.pcb_ext == 0)
 			return (EINVAL);
 		vm86 = &p->p_addr->u_pcb.pcb_ext->ext_vm86;
 		if (vm86->vm86_inited == 0)
 			return (EINVAL);
 
 		/* Go back to user mode if both flags are set. */
 		if ((eflags & PSL_VIP) && (eflags & PSL_VIF))
 			trapsignal(p, SIGBUS, 0);
 
 		if (vm86->vm86_has_vme) {
 			eflags = (tf->tf_eflags & ~VME_USERCHANGE) |
 			    (eflags & VME_USERCHANGE) | PSL_VM;
 		} else {
 			vm86->vm86_eflags = eflags;	/* save VIF, VIP */
 			eflags = (tf->tf_eflags & ~VM_USERCHANGE) |
 			    (eflags & VM_USERCHANGE) | PSL_VM;
 		}
 		tf->tf_vm86_ds = scp->sc_ds;
 		tf->tf_vm86_es = scp->sc_es;
 		tf->tf_vm86_fs = scp->sc_fs;
 		tf->tf_vm86_gs = scp->sc_gs;
 		tf->tf_ds = _udatasel;
 		tf->tf_es = _udatasel;
 		tf->tf_fs = _udatasel;
 	} else {
 		/*
 		 * Don't allow users to change privileged or reserved flags.
 		 */
 		/*
 		 * XXX do allow users to change the privileged flag PSL_RF.
 		 * The cpu sets PSL_RF in tf_eflags for faults.  Debuggers
 		 * should sometimes set it there too.  tf_eflags is kept in
 		 * the signal context during signal handling and there is no
 		 * other place to remember it, so the PSL_RF bit may be
 		 * corrupted by the signal handler without us knowing.
 		 * Corruption of the PSL_RF bit at worst causes one more or
 		 * one less debugger trap, so allowing it is fairly harmless.
 		 */
 		if (!EFL_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) {
 	    		return (EINVAL);
 		}
 
 		/*
 		 * Don't allow users to load a valid privileged %cs.  Let the
 		 * hardware check for invalid selectors, excess privilege in
 		 * other selectors, invalid %eip's and invalid %esp's.
 		 */
 		if (!CS_SECURE(scp->sc_cs)) {
 			trapsignal(p, SIGBUS, T_PROTFLT);
 			return (EINVAL);
 		}
 		regs->tf_ds = scp->sc_ds;
 		regs->tf_es = scp->sc_es;
 		regs->tf_fs = scp->sc_fs;
 	}
 
 	/* Restore remaining registers. */
 	regs->tf_eax = scp->sc_eax;
 	regs->tf_ebx = scp->sc_ebx;
 	regs->tf_ecx = scp->sc_ecx;
 	regs->tf_edx = scp->sc_edx;
 	regs->tf_esi = scp->sc_esi;
 	regs->tf_edi = scp->sc_edi;
 	regs->tf_cs = scp->sc_cs;
 	regs->tf_ss = scp->sc_ss;
 	regs->tf_isp = scp->sc_isp;
 
 	PROC_LOCK(p);
 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
 	if (scp->sc_onstack & 1)
 		p->p_sigstk.ss_flags |= SS_ONSTACK;
 	else
 		p->p_sigstk.ss_flags &= ~SS_ONSTACK;
 #endif
 
 	SIGSETOLD(p->p_sigmask, scp->sc_mask);
 	SIG_CANTMASK(p->p_sigmask);
 	PROC_UNLOCK(p);
 	regs->tf_ebp = scp->sc_fp;
 	regs->tf_esp = scp->sc_sp;
 	regs->tf_eip = scp->sc_pc;
 	regs->tf_eflags = eflags;
 	return (EJUSTRETURN);
 }
 #endif
 
 int
 sigreturn(p, uap)
 	struct proc *p;
 	struct sigreturn_args /* {
 		ucontext_t *sigcntxp;
 	} */ *uap;
 {
 	struct trapframe *regs;
 	ucontext_t *ucp;
 	int cs, eflags;
 
 	ucp = uap->sigcntxp;
 #ifdef COMPAT_43
 	if (!useracc((caddr_t)ucp, sizeof(struct osigcontext), VM_PROT_READ))
 		return (EFAULT);
 	if (((struct osigcontext *)ucp)->sc_trapno == 0x01d516)
 		return (osigreturn(p, (struct osigreturn_args *)uap));
 	/*
 	 * Since ucp is not an osigcontext but a ucontext_t, we have to
 	 * check again if all of it is accessible.  A ucontext_t is
 	 * much larger, so instead of just checking for the pointer
 	 * being valid for the size of an osigcontext, now check for
 	 * it being valid for a whole, new-style ucontext_t.
 	 */
 #endif
 	if (!useracc((caddr_t)ucp, sizeof(*ucp), VM_PROT_READ))
 		return (EFAULT);
 
 	regs = p->p_frame;
 	eflags = ucp->uc_mcontext.mc_eflags;
 	if (eflags & PSL_VM) {
 		struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs;
 		struct vm86_kernel *vm86;
 
 		/*
 		 * if pcb_ext == 0 or vm86_inited == 0, the user hasn't
 		 * set up the vm86 area, and we can't enter vm86 mode.
 		 */
 		if (p->p_addr->u_pcb.pcb_ext == 0)
 			return (EINVAL);
 		vm86 = &p->p_addr->u_pcb.pcb_ext->ext_vm86;
 		if (vm86->vm86_inited == 0)
 			return (EINVAL);
 
 		/* Go back to user mode if both flags are set. */
 		if ((eflags & PSL_VIP) && (eflags & PSL_VIF))
 			trapsignal(p, SIGBUS, 0);
 
 		if (vm86->vm86_has_vme) {
 			eflags = (tf->tf_eflags & ~VME_USERCHANGE) |
 			    (eflags & VME_USERCHANGE) | PSL_VM;
 		} else {
 			vm86->vm86_eflags = eflags;	/* save VIF, VIP */
 			eflags = (tf->tf_eflags & ~VM_USERCHANGE) |
 			    (eflags & VM_USERCHANGE) | PSL_VM;
 		}
 		bcopy(&ucp->uc_mcontext.mc_fs, tf, sizeof(struct trapframe));
 		tf->tf_eflags = eflags;
 		tf->tf_vm86_ds = tf->tf_ds;
 		tf->tf_vm86_es = tf->tf_es;
 		tf->tf_vm86_fs = tf->tf_fs;
 		tf->tf_vm86_gs = ucp->uc_mcontext.mc_gs;
 		tf->tf_ds = _udatasel;
 		tf->tf_es = _udatasel;
 		tf->tf_fs = _udatasel;
 	} else {
 		/*
 		 * Don't allow users to change privileged or reserved flags.
 		 */
 		/*
 		 * XXX do allow users to change the privileged flag PSL_RF.
 		 * The cpu sets PSL_RF in tf_eflags for faults.  Debuggers
 		 * should sometimes set it there too.  tf_eflags is kept in
 		 * the signal context during signal handling and there is no
 		 * other place to remember it, so the PSL_RF bit may be
 		 * corrupted by the signal handler without us knowing.
 		 * Corruption of the PSL_RF bit at worst causes one more or
 		 * one less debugger trap, so allowing it is fairly harmless.
 		 */
 		if (!EFL_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) {
 			printf("sigreturn: eflags = 0x%x\n", eflags);
 	    		return (EINVAL);
 		}
 
 		/*
 		 * Don't allow users to load a valid privileged %cs.  Let the
 		 * hardware check for invalid selectors, excess privilege in
 		 * other selectors, invalid %eip's and invalid %esp's.
 		 */
 		cs = ucp->uc_mcontext.mc_cs;
 		if (!CS_SECURE(cs)) {
 			printf("sigreturn: cs = 0x%x\n", cs);
 			trapsignal(p, SIGBUS, T_PROTFLT);
 			return (EINVAL);
 		}
 
 		bcopy(&ucp->uc_mcontext.mc_fs, regs, sizeof(*regs));
 	}
 
 	PROC_LOCK(p);
 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
 	if (ucp->uc_mcontext.mc_onstack & 1)
 		p->p_sigstk.ss_flags |= SS_ONSTACK;
 	else
 		p->p_sigstk.ss_flags &= ~SS_ONSTACK;
 #endif
 
 	p->p_sigmask = ucp->uc_sigmask;
 	SIG_CANTMASK(p->p_sigmask);
 	PROC_UNLOCK(p);
 	return (EJUSTRETURN);
 }
 
 /*
  * Machine dependent boot() routine
  *
  * I haven't seen anything to put here yet
  * Possibly some stuff might be grafted back here from boot()
  */
 void
 cpu_boot(int howto)
 {
 }
 
 /*
  * Shutdown the CPU as much as possible
  */
 void
 cpu_halt(void)
 {
 	for (;;)
 		__asm__ ("hlt");
 }
 
 /*
  * Hook to idle the CPU when possible.  This currently only works in
  * the !SMP case, as there is no clean way to ensure that a CPU will be
  * woken when there is work available for it.
  */
 static int	cpu_idle_hlt = 1;
 SYSCTL_INT(_machdep, OID_AUTO, cpu_idle_hlt, CTLFLAG_RW,
     &cpu_idle_hlt, 0, "Idle loop HLT enable");
 
 /*
  * Note that we have to be careful here to avoid a race between checking
  * procrunnable() and actually halting.  If we don't do this, we may waste
  * the time between calling hlt and the next interrupt even though there
  * is a runnable process.
  */
 void
 cpu_idle(void)
 {
 #ifndef SMP
 	if (cpu_idle_hlt) {
 		disable_intr();
   		if (procrunnable())
 			enable_intr();
 		else {
 			enable_intr();
 			__asm __volatile("hlt");
 		}
 	}
 #endif
 }
 
 /*
  * Clear registers on exec
  */
 void
 setregs(p, entry, stack, ps_strings)
 	struct proc *p;
 	u_long entry;
 	u_long stack;
 	u_long ps_strings;
 {
 	struct trapframe *regs = p->p_frame;
 	struct pcb *pcb = &p->p_addr->u_pcb;
 
 	if (pcb->pcb_ldt)
 		user_ldt_free(pcb);
   
 	bzero((char *)regs, sizeof(struct trapframe));
 	regs->tf_eip = entry;
 	regs->tf_esp = stack;
 	regs->tf_eflags = PSL_USER | (regs->tf_eflags & PSL_T);
 	regs->tf_ss = _udatasel;
 	regs->tf_ds = _udatasel;
 	regs->tf_es = _udatasel;
 	regs->tf_fs = _udatasel;
 	regs->tf_cs = _ucodesel;
 
 	/* PS_STRINGS value for BSD/OS binaries.  It is 0 for non-BSD/OS. */
 	regs->tf_ebx = ps_strings;
 
 	/* reset %gs as well */
 	if (pcb == PCPU_GET(curpcb))
 		load_gs(_udatasel);
 	else
 		pcb->pcb_gs = _udatasel;
 
         /*
          * Reset the hardware debug registers if they were in use.
          * They won't have any meaning for the newly exec'd process.  
          */
         if (pcb->pcb_flags & PCB_DBREGS) {
                 pcb->pcb_dr0 = 0;
                 pcb->pcb_dr1 = 0;
                 pcb->pcb_dr2 = 0;
                 pcb->pcb_dr3 = 0;
                 pcb->pcb_dr6 = 0;
                 pcb->pcb_dr7 = 0;
                 if (pcb == PCPU_GET(curpcb)) {
 		        /*
 			 * Clear the debug registers on the running
 			 * CPU, otherwise they will end up affecting
 			 * the next process we switch to.
 			 */
 		        reset_dbregs();
                 }
                 pcb->pcb_flags &= ~PCB_DBREGS;
         }
 
 	/*
 	 * Initialize the math emulator (if any) for the current process.
 	 * Actually, just clear the bit that says that the emulator has
 	 * been initialized.  Initialization is delayed until the process
 	 * traps to the emulator (if it is done at all) mainly because
 	 * emulators don't provide an entry point for initialization.
 	 */
 	p->p_addr->u_pcb.pcb_flags &= ~FP_SOFTFP;
 
 	/*
 	 * Arrange to trap the next npx or `fwait' instruction (see npx.c
 	 * for why fwait must be trapped at least if there is an npx or an
 	 * emulator).  This is mainly to handle the case where npx0 is not
 	 * configured, since the npx routines normally set up the trap
 	 * otherwise.  It should be done only at boot time, but doing it
 	 * here allows modifying `npx_exists' for testing the emulator on
 	 * systems with an npx.
 	 */
 	load_cr0(rcr0() | CR0_MP | CR0_TS);
 
 #ifdef DEV_NPX
 	/* Initialize the npx (if any) for the current process. */
 	npxinit(__INITIAL_NPXCW__);
 #endif
 
 	/*
 	 * XXX - Linux emulator
 	 * Make sure sure edx is 0x0 on entry. Linux binaries depend
 	 * on it.
 	 */
 	p->p_retval[1] = 0;
 }
 
 void
 cpu_setregs(void)
 {
 	unsigned int cr0;
 
 	cr0 = rcr0();
 	cr0 |= CR0_NE;			/* Done by npxinit() */
 	cr0 |= CR0_MP | CR0_TS;		/* Done at every execve() too. */
 #ifndef I386_CPU
 	cr0 |= CR0_WP | CR0_AM;
 #endif
 	load_cr0(cr0);
 	load_gs(_udatasel);
 }
 
 static int
 sysctl_machdep_adjkerntz(SYSCTL_HANDLER_ARGS)
 {
 	int error;
 	error = sysctl_handle_int(oidp, oidp->oid_arg1, oidp->oid_arg2,
 		req);
 	if (!error && req->newptr)
 		resettodr();
 	return (error);
 }
 
 SYSCTL_PROC(_machdep, CPU_ADJKERNTZ, adjkerntz, CTLTYPE_INT|CTLFLAG_RW,
 	&adjkerntz, 0, sysctl_machdep_adjkerntz, "I", "");
 
 SYSCTL_INT(_machdep, CPU_DISRTCSET, disable_rtc_set,
 	CTLFLAG_RW, &disable_rtc_set, 0, "");
 
 SYSCTL_STRUCT(_machdep, CPU_BOOTINFO, bootinfo, 
 	CTLFLAG_RD, &bootinfo, bootinfo, "");
 
 SYSCTL_INT(_machdep, CPU_WALLCLOCK, wall_cmos_clock,
 	CTLFLAG_RW, &wall_cmos_clock, 0, "");
 
 /*
  * Initialize 386 and configure to run kernel
  */
 
 /*
  * Initialize segments & interrupt table
  */
 
 int _default_ldt;
 union descriptor gdt[NGDT * MAXCPU];	/* global descriptor table */
 static struct gate_descriptor idt0[NIDT];
 struct gate_descriptor *idt = &idt0[0];	/* interrupt descriptor table */
 union descriptor ldt[NLDT];		/* local descriptor table */
 #ifdef SMP
 /* table descriptors - used to load tables by microp */
 struct region_descriptor r_gdt, r_idt;
 #endif
 
 int private_tss;			/* flag indicating private tss */
 
 #if defined(I586_CPU) && !defined(NO_F00F_HACK)
 extern int has_f00f_bug;
 #endif
 
 static struct i386tss dblfault_tss;
 static char dblfault_stack[PAGE_SIZE];
 
 extern  struct user *proc0paddr;
 
 
 /* software prototypes -- in more palatable form */
 struct soft_segment_descriptor gdt_segs[] = {
 /* GNULL_SEL	0 Null Descriptor */
 {	0x0,			/* segment base address  */
 	0x0,			/* length */
 	0,			/* segment type */
 	0,			/* segment descriptor priority level */
 	0,			/* segment descriptor present */
 	0, 0,
 	0,			/* default 32 vs 16 bit size */
 	0  			/* limit granularity (byte/page units)*/ },
 /* GCODE_SEL	1 Code Descriptor for kernel */
 {	0x0,			/* segment base address  */
 	0xfffff,		/* length - all address space */
 	SDT_MEMERA,		/* segment type */
 	0,			/* segment descriptor priority level */
 	1,			/* segment descriptor present */
 	0, 0,
 	1,			/* default 32 vs 16 bit size */
 	1  			/* limit granularity (byte/page units)*/ },
 /* GDATA_SEL	2 Data Descriptor for kernel */
 {	0x0,			/* segment base address  */
 	0xfffff,		/* length - all address space */
 	SDT_MEMRWA,		/* segment type */
 	0,			/* segment descriptor priority level */
 	1,			/* segment descriptor present */
 	0, 0,
 	1,			/* default 32 vs 16 bit size */
 	1  			/* limit granularity (byte/page units)*/ },
 /* GPRIV_SEL	3 SMP Per-Processor Private Data Descriptor */
 {	0x0,			/* segment base address  */
 	0xfffff,		/* length - all address space */
 	SDT_MEMRWA,		/* segment type */
 	0,			/* segment descriptor priority level */
 	1,			/* segment descriptor present */
 	0, 0,
 	1,			/* default 32 vs 16 bit size */
 	1  			/* limit granularity (byte/page units)*/ },
 /* GPROC0_SEL	4 Proc 0 Tss Descriptor */
 {
 	0x0,			/* segment base address */
 	sizeof(struct i386tss)-1,/* length - all address space */
 	SDT_SYS386TSS,		/* segment type */
 	0,			/* segment descriptor priority level */
 	1,			/* segment descriptor present */
 	0, 0,
 	0,			/* unused - default 32 vs 16 bit size */
 	0  			/* limit granularity (byte/page units)*/ },
 /* GLDT_SEL	5 LDT Descriptor */
 {	(int) ldt,		/* segment base address  */
 	sizeof(ldt)-1,		/* length - all address space */
 	SDT_SYSLDT,		/* segment type */
 	SEL_UPL,		/* segment descriptor priority level */
 	1,			/* segment descriptor present */
 	0, 0,
 	0,			/* unused - default 32 vs 16 bit size */
 	0  			/* limit granularity (byte/page units)*/ },
 /* GUSERLDT_SEL	6 User LDT Descriptor per process */
 {	(int) ldt,		/* segment base address  */
 	(512 * sizeof(union descriptor)-1),		/* length */
 	SDT_SYSLDT,		/* segment type */
 	0,			/* segment descriptor priority level */
 	1,			/* segment descriptor present */
 	0, 0,
 	0,			/* unused - default 32 vs 16 bit size */
 	0  			/* limit granularity (byte/page units)*/ },
 /* GTGATE_SEL	7 Null Descriptor - Placeholder */
 {	0x0,			/* segment base address  */
 	0x0,			/* length - all address space */
 	0,			/* segment type */
 	0,			/* segment descriptor priority level */
 	0,			/* segment descriptor present */
 	0, 0,
 	0,			/* default 32 vs 16 bit size */
 	0  			/* limit granularity (byte/page units)*/ },
 /* GBIOSLOWMEM_SEL 8 BIOS access to realmode segment 0x40, must be #8 in GDT */
 {	0x400,			/* segment base address */
 	0xfffff,		/* length */
 	SDT_MEMRWA,		/* segment type */
 	0,			/* segment descriptor priority level */
 	1,			/* segment descriptor present */
 	0, 0,
 	1,			/* default 32 vs 16 bit size */
 	1  			/* limit granularity (byte/page units)*/ },
 /* GPANIC_SEL	9 Panic Tss Descriptor */
 {	(int) &dblfault_tss,	/* segment base address  */
 	sizeof(struct i386tss)-1,/* length - all address space */
 	SDT_SYS386TSS,		/* segment type */
 	0,			/* segment descriptor priority level */
 	1,			/* segment descriptor present */
 	0, 0,
 	0,			/* unused - default 32 vs 16 bit size */
 	0  			/* limit granularity (byte/page units)*/ },
 /* GBIOSCODE32_SEL 10 BIOS 32-bit interface (32bit Code) */
 {	0,			/* segment base address (overwritten)  */
 	0xfffff,		/* length */
 	SDT_MEMERA,		/* segment type */
 	0,			/* segment descriptor priority level */
 	1,			/* segment descriptor present */
 	0, 0,
 	0,			/* default 32 vs 16 bit size */
 	1  			/* limit granularity (byte/page units)*/ },
 /* GBIOSCODE16_SEL 11 BIOS 32-bit interface (16bit Code) */
 {	0,			/* segment base address (overwritten)  */
 	0xfffff,		/* length */
 	SDT_MEMERA,		/* segment type */
 	0,			/* segment descriptor priority level */
 	1,			/* segment descriptor present */
 	0, 0,
 	0,			/* default 32 vs 16 bit size */
 	1  			/* limit granularity (byte/page units)*/ },
 /* GBIOSDATA_SEL 12 BIOS 32-bit interface (Data) */
 {	0,			/* segment base address (overwritten) */
 	0xfffff,		/* length */
 	SDT_MEMRWA,		/* segment type */
 	0,			/* segment descriptor priority level */
 	1,			/* segment descriptor present */
 	0, 0,
 	1,			/* default 32 vs 16 bit size */
 	1  			/* limit granularity (byte/page units)*/ },
 /* GBIOSUTIL_SEL 13 BIOS 16-bit interface (Utility) */
 {	0,			/* segment base address (overwritten) */
 	0xfffff,		/* length */
 	SDT_MEMRWA,		/* segment type */
 	0,			/* segment descriptor priority level */
 	1,			/* segment descriptor present */
 	0, 0,
 	0,			/* default 32 vs 16 bit size */
 	1  			/* limit granularity (byte/page units)*/ },
 /* GBIOSARGS_SEL 14 BIOS 16-bit interface (Arguments) */
 {	0,			/* segment base address (overwritten) */
 	0xfffff,		/* length */
 	SDT_MEMRWA,		/* segment type */
 	0,			/* segment descriptor priority level */
 	1,			/* segment descriptor present */
 	0, 0,
 	0,			/* default 32 vs 16 bit size */
 	1  			/* limit granularity (byte/page units)*/ },
 };
 
 static struct soft_segment_descriptor ldt_segs[] = {
 	/* Null Descriptor - overwritten by call gate */
 {	0x0,			/* segment base address  */
 	0x0,			/* length - all address space */
 	0,			/* segment type */
 	0,			/* segment descriptor priority level */
 	0,			/* segment descriptor present */
 	0, 0,
 	0,			/* default 32 vs 16 bit size */
 	0  			/* limit granularity (byte/page units)*/ },
 	/* Null Descriptor - overwritten by call gate */
 {	0x0,			/* segment base address  */
 	0x0,			/* length - all address space */
 	0,			/* segment type */
 	0,			/* segment descriptor priority level */
 	0,			/* segment descriptor present */
 	0, 0,
 	0,			/* default 32 vs 16 bit size */
 	0  			/* limit granularity (byte/page units)*/ },
 	/* Null Descriptor - overwritten by call gate */
 {	0x0,			/* segment base address  */
 	0x0,			/* length - all address space */
 	0,			/* segment type */
 	0,			/* segment descriptor priority level */
 	0,			/* segment descriptor present */
 	0, 0,
 	0,			/* default 32 vs 16 bit size */
 	0  			/* limit granularity (byte/page units)*/ },
 	/* Code Descriptor for user */
 {	0x0,			/* segment base address  */
 	0xfffff,		/* length - all address space */
 	SDT_MEMERA,		/* segment type */
 	SEL_UPL,		/* segment descriptor priority level */
 	1,			/* segment descriptor present */
 	0, 0,
 	1,			/* default 32 vs 16 bit size */
 	1  			/* limit granularity (byte/page units)*/ },
 	/* Null Descriptor - overwritten by call gate */
 {	0x0,			/* segment base address  */
 	0x0,			/* length - all address space */
 	0,			/* segment type */
 	0,			/* segment descriptor priority level */
 	0,			/* segment descriptor present */
 	0, 0,
 	0,			/* default 32 vs 16 bit size */
 	0  			/* limit granularity (byte/page units)*/ },
 	/* Data Descriptor for user */
 {	0x0,			/* segment base address  */
 	0xfffff,		/* length - all address space */
 	SDT_MEMRWA,		/* segment type */
 	SEL_UPL,		/* segment descriptor priority level */
 	1,			/* segment descriptor present */
 	0, 0,
 	1,			/* default 32 vs 16 bit size */
 	1  			/* limit granularity (byte/page units)*/ },
 };
 
 void
 setidt(idx, func, typ, dpl, selec)
 	int idx;
 	inthand_t *func;
 	int typ;
 	int dpl;
 	int selec;
 {
 	struct gate_descriptor *ip;
 
 	ip = idt + idx;
 	ip->gd_looffset = (int)func;
 	ip->gd_selector = selec;
 	ip->gd_stkcpy = 0;
 	ip->gd_xx = 0;
 	ip->gd_type = typ;
 	ip->gd_dpl = dpl;
 	ip->gd_p = 1;
 	ip->gd_hioffset = ((int)func)>>16 ;
 }
 
 #define	IDTVEC(name)	__CONCAT(X,name)
 
 extern inthand_t
 	IDTVEC(div), IDTVEC(dbg), IDTVEC(nmi), IDTVEC(bpt), IDTVEC(ofl),
 	IDTVEC(bnd), IDTVEC(ill), IDTVEC(dna), IDTVEC(fpusegm),
 	IDTVEC(tss), IDTVEC(missing), IDTVEC(stk), IDTVEC(prot),
 	IDTVEC(page), IDTVEC(mchk), IDTVEC(rsvd), IDTVEC(fpu), IDTVEC(align),
 	IDTVEC(xmm), IDTVEC(lcall_syscall), IDTVEC(int0x80_syscall);
 
 void
 sdtossd(sd, ssd)
 	struct segment_descriptor *sd;
 	struct soft_segment_descriptor *ssd;
 {
 	ssd->ssd_base  = (sd->sd_hibase << 24) | sd->sd_lobase;
 	ssd->ssd_limit = (sd->sd_hilimit << 16) | sd->sd_lolimit;
 	ssd->ssd_type  = sd->sd_type;
 	ssd->ssd_dpl   = sd->sd_dpl;
 	ssd->ssd_p     = sd->sd_p;
 	ssd->ssd_def32 = sd->sd_def32;
 	ssd->ssd_gran  = sd->sd_gran;
 }
 
 #define PHYSMAP_SIZE	(2 * 8)
 
 /*
  * Populate the (physmap) array with base/bound pairs describing the
  * available physical memory in the system, then test this memory and
  * build the phys_avail array describing the actually-available memory.
  *
  * If we cannot accurately determine the physical memory map, then use
  * value from the 0xE801 call, and failing that, the RTC.
  *
  * Total memory size may be set by the kernel environment variable
  * hw.physmem or the compile-time define MAXMEM.
  */
 static void
 getmemsize(int first)
 {
 	int i, physmap_idx, pa_indx;
 	u_int basemem, extmem;
 #ifdef PC98
 	int pg_n;
 	u_int under16;
 #else
 	struct vm86frame vmf;
 	struct vm86context vmc;
 #endif
 	vm_offset_t pa, physmap[PHYSMAP_SIZE];
 	pt_entry_t pte;
 	const char *cp;
 #ifndef PC98
 	struct bios_smap *smap;
 #endif
 
 #ifdef PC98
 	/* XXX - some of EPSON machines can't use PG_N */
 	pg_n = PG_N;
 	if (pc98_machine_type & M_EPSON_PC98) {
 		switch (epson_machine_id) {
 #ifdef WB_CACHE
 		default:
 #endif
 		case 0x34:		/* PC-486HX */
 		case 0x35:		/* PC-486HG */
 		case 0x3B:		/* PC-486HA */
 			pg_n = 0;
 			break;
 		}
 	}
 #else
 	bzero(&vmf, sizeof(struct vm86frame));
 #endif
 	bzero(physmap, sizeof(physmap));
 
 	/*
 	 * Perform "base memory" related probes & setup
 	 */
 #ifdef PC98
         under16 = pc98_getmemsize(&basemem, &extmem);
 #else
 	vm86_intcall(0x12, &vmf);
 	basemem = vmf.vmf_ax;
 #endif
 	if (basemem > 640) {
 		printf("Preposterous BIOS basemem of %uK, truncating to 640K\n",
 			basemem);
 		basemem = 640;
 	}
 
 	/*
 	 * XXX if biosbasemem is now < 640, there is a `hole'
 	 * between the end of base memory and the start of
 	 * ISA memory.  The hole may be empty or it may
 	 * contain BIOS code or data.  Map it read/write so
 	 * that the BIOS can write to it.  (Memory from 0 to
 	 * the physical end of the kernel is mapped read-only
 	 * to begin with and then parts of it are remapped.
 	 * The parts that aren't remapped form holes that
 	 * remain read-only and are unused by the kernel.
 	 * The base memory area is below the physical end of
 	 * the kernel and right now forms a read-only hole.
 	 * The part of it from PAGE_SIZE to
 	 * (trunc_page(biosbasemem * 1024) - 1) will be
 	 * remapped and used by the kernel later.)
 	 *
 	 * This code is similar to the code used in
 	 * pmap_mapdev, but since no memory needs to be
 	 * allocated we simply change the mapping.
 	 */
 	for (pa = trunc_page(basemem * 1024);
 	     pa < ISA_HOLE_START; pa += PAGE_SIZE) {
 		pte = (pt_entry_t)vtopte(pa + KERNBASE);
 		*pte = pa | PG_RW | PG_V;
 	}
 
 	/*
 	 * if basemem != 640, map pages r/w into vm86 page table so 
 	 * that the bios can scribble on it.
 	 */
 	pte = (pt_entry_t)vm86paddr;
 	for (i = basemem / 4; i < 160; i++)
 		pte[i] = (i << PAGE_SHIFT) | PG_V | PG_RW | PG_U;
 
 #ifndef PC98
 	/*
 	 * map page 1 R/W into the kernel page table so we can use it
 	 * as a buffer.  The kernel will unmap this page later.
 	 */
 	pte = (pt_entry_t)vtopte(KERNBASE + (1 << PAGE_SHIFT));
 	*pte = (1 << PAGE_SHIFT) | PG_RW | PG_V;
 
 	/*
 	 * get memory map with INT 15:E820
 	 */
 	vmc.npages = 0;
 	smap = (void *)vm86_addpage(&vmc, 1, KERNBASE + (1 << PAGE_SHIFT));
 	vm86_getptr(&vmc, (vm_offset_t)smap, &vmf.vmf_es, &vmf.vmf_di);
 
 	physmap_idx = 0;
 	vmf.vmf_ebx = 0;
 	do {
 		vmf.vmf_eax = 0xE820;
 		vmf.vmf_edx = SMAP_SIG;
 		vmf.vmf_ecx = sizeof(struct bios_smap);
 		i = vm86_datacall(0x15, &vmf, &vmc);
 		if (i || vmf.vmf_eax != SMAP_SIG)
 			break;
 		if (boothowto & RB_VERBOSE)
 			printf("SMAP type=%02x base=%08x %08x len=%08x %08x\n",
 				smap->type,
 				*(u_int32_t *)((char *)&smap->base + 4),
 				(u_int32_t)smap->base,
 				*(u_int32_t *)((char *)&smap->length + 4),
 				(u_int32_t)smap->length);
 
 		if (smap->type != 0x01)
 			goto next_run;
 
 		if (smap->length == 0)
 			goto next_run;
 
 		if (smap->base >= 0xffffffff) {
 			printf("%uK of memory above 4GB ignored\n",
 			    (u_int)(smap->length / 1024));
 			goto next_run;
 		}
 
 		for (i = 0; i <= physmap_idx; i += 2) {
 			if (smap->base < physmap[i + 1]) {
 				if (boothowto & RB_VERBOSE)
 					printf(
 	"Overlapping or non-montonic memory region, ignoring second region\n");
 				goto next_run;
 			}
 		}
 
 		if (smap->base == physmap[physmap_idx + 1]) {
 			physmap[physmap_idx + 1] += smap->length;
 			goto next_run;
 		}
 
 		physmap_idx += 2;
 		if (physmap_idx == PHYSMAP_SIZE) {
 			printf(
 		"Too many segments in the physical address map, giving up\n");
 			break;
 		}
 		physmap[physmap_idx] = smap->base;
 		physmap[physmap_idx + 1] = smap->base + smap->length;
 next_run:
 	} while (vmf.vmf_ebx != 0);
 
 	if (physmap[1] != 0)
 		goto physmap_done;
 
 	/*
 	 * If we failed above, try memory map with INT 15:E801
 	 */
 	vmf.vmf_ax = 0xE801;
 	if (vm86_intcall(0x15, &vmf) == 0) {
 		extmem = vmf.vmf_cx + vmf.vmf_dx * 64;
 	} else {
 #if 0
 		vmf.vmf_ah = 0x88;
 		vm86_intcall(0x15, &vmf);
 		extmem = vmf.vmf_ax;
 #else
 		/*
 		 * Prefer the RTC value for extended memory.
 		 */
 		extmem = rtcin(RTC_EXTLO) + (rtcin(RTC_EXTHI) << 8);
 #endif
 	}
 
 	/*
 	 * Special hack for chipsets that still remap the 384k hole when
 	 * there's 16MB of memory - this really confuses people that
 	 * are trying to use bus mastering ISA controllers with the
 	 * "16MB limit"; they only have 16MB, but the remapping puts
 	 * them beyond the limit.
 	 *
 	 * If extended memory is between 15-16MB (16-17MB phys address range),
 	 *	chop it to 15MB.
 	 */
 	if ((extmem > 15 * 1024) && (extmem < 16 * 1024))
 		extmem = 15 * 1024;
 #endif
 
 	physmap[0] = 0;
 	physmap[1] = basemem * 1024;
 	physmap_idx = 2;
 	physmap[physmap_idx] = 0x100000;
 	physmap[physmap_idx + 1] = physmap[physmap_idx] + extmem * 1024;
 
 #ifdef PC98
         if ((under16 != 16 * 1024) && (extmem > 15 * 1024)) {
 		/* 15M - 16M region is cut off, so need to divide chunk */
                 physmap[physmap_idx + 1] = under16 * 1024;
                 physmap_idx += 2;
                 physmap[physmap_idx] = 0x1000000;
                 physmap[physmap_idx + 1] = physmap[2] + extmem * 1024;
         }
 #else
 physmap_done:
 #endif
 	/*
 	 * Now, physmap contains a map of physical memory.
 	 */
 
 #ifdef SMP
 	/* make hole for AP bootstrap code */
 	physmap[1] = mp_bootaddress(physmap[1] / 1024);
 
 	/* look for the MP hardware - needed for apic addresses */
 	i386_mp_probe();
 #endif
 
 	/*
 	 * Maxmem isn't the "maximum memory", it's one larger than the
 	 * highest page of the physical address space.  It should be
 	 * called something like "Maxphyspage".  We may adjust this 
 	 * based on ``hw.physmem'' and the results of the memory test.
 	 */
 	Maxmem = atop(physmap[physmap_idx + 1]);
 
 #ifdef MAXMEM
 	Maxmem = MAXMEM / 4;
 #endif
 
 	/*
 	 * hw.physmem is a size in bytes; we also allow k, m, and g suffixes
 	 * for the appropriate modifiers.  This overrides MAXMEM.
 	 */
 	if ((cp = getenv("hw.physmem")) != NULL) {
 		u_int64_t AllowMem, sanity;
 		char *ep;
 
 		sanity = AllowMem = strtouq(cp, &ep, 0);
 		if ((ep != cp) && (*ep != 0)) {
 			switch(*ep) {
 			case 'g':
 			case 'G':
 				AllowMem <<= 10;
 			case 'm':
 			case 'M':
 				AllowMem <<= 10;
 			case 'k':
 			case 'K':
 				AllowMem <<= 10;
 				break;
 			default:
 				AllowMem = sanity = 0;
 			}
 			if (AllowMem < sanity)
 				AllowMem = 0;
 		}
 		if (AllowMem == 0)
 			printf("Ignoring invalid memory size of '%s'\n", cp);
 		else
 			Maxmem = atop(AllowMem);
 	}
 
 	if (atop(physmap[physmap_idx + 1]) != Maxmem &&
 	    (boothowto & RB_VERBOSE))
 		printf("Physical memory use set to %uK\n", Maxmem * 4);
 
 	/*
 	 * If Maxmem has been increased beyond what the system has detected,
 	 * extend the last memory segment to the new limit.
 	 */ 
 	if (atop(physmap[physmap_idx + 1]) < Maxmem)
 		physmap[physmap_idx + 1] = ptoa(Maxmem);
 
 	/* call pmap initialization to make new kernel address space */
 	pmap_bootstrap(first, 0);
 
 	/*
 	 * Size up each available chunk of physical memory.
 	 */
 	physmap[0] = PAGE_SIZE;		/* mask off page 0 */
 	pa_indx = 0;
 	phys_avail[pa_indx++] = physmap[0];
 	phys_avail[pa_indx] = physmap[0];
 #if 0
 	pte = (pt_entry_t)vtopte(KERNBASE);
 #else
 	pte = (pt_entry_t)CMAP1;
 #endif
 
 	/*
 	 * physmap is in bytes, so when converting to page boundaries,
 	 * round up the start address and round down the end address.
 	 */
 	for (i = 0; i <= physmap_idx; i += 2) {
 		vm_offset_t end;
 
 		end = ptoa(Maxmem);
 		if (physmap[i + 1] < end)
 			end = trunc_page(physmap[i + 1]);
 		for (pa = round_page(physmap[i]); pa < end; pa += PAGE_SIZE) {
 			int tmp, page_bad;
 #if 0
 			int *ptr = 0;
 #else
 			int *ptr = (int *)CADDR1;
 #endif
 
 			/*
 			 * block out kernel memory as not available.
 			 */
 			if (pa >= 0x100000 && pa < first)
 				continue;
 	
 			page_bad = FALSE;
 
 			/*
 			 * map page into kernel: valid, read/write,non-cacheable
 			 */
 #ifdef PC98
 			*pte = pa | PG_V | PG_RW | pg_n;
 #else
 			*pte = pa | PG_V | PG_RW | PG_N;
 #endif
 			invltlb();
 
 			tmp = *(int *)ptr;
 			/*
 			 * Test for alternating 1's and 0's
 			 */
 			*(volatile int *)ptr = 0xaaaaaaaa;
 			if (*(volatile int *)ptr != 0xaaaaaaaa) {
 				page_bad = TRUE;
 			}
 			/*
 			 * Test for alternating 0's and 1's
 			 */
 			*(volatile int *)ptr = 0x55555555;
 			if (*(volatile int *)ptr != 0x55555555) {
 			page_bad = TRUE;
 			}
 			/*
 			 * Test for all 1's
 			 */
 			*(volatile int *)ptr = 0xffffffff;
 			if (*(volatile int *)ptr != 0xffffffff) {
 				page_bad = TRUE;
 			}
 			/*
 			 * Test for all 0's
 			 */
 			*(volatile int *)ptr = 0x0;
 			if (*(volatile int *)ptr != 0x0) {
 				page_bad = TRUE;
 			}
 			/*
 			 * Restore original value.
 			 */
 			*(int *)ptr = tmp;
 
 			/*
 			 * Adjust array of valid/good pages.
 			 */
 			if (page_bad == TRUE) {
 				continue;
 			}
 			/*
 			 * If this good page is a continuation of the
 			 * previous set of good pages, then just increase
 			 * the end pointer. Otherwise start a new chunk.
 			 * Note that "end" points one higher than end,
 			 * making the range >= start and < end.
 			 * If we're also doing a speculative memory
 			 * test and we at or past the end, bump up Maxmem
 			 * so that we keep going. The first bad page
 			 * will terminate the loop.
 			 */
 			if (phys_avail[pa_indx] == pa) {
 				phys_avail[pa_indx] += PAGE_SIZE;
 			} else {
 				pa_indx++;
 				if (pa_indx == PHYS_AVAIL_ARRAY_END) {
 					printf(
 		"Too many holes in the physical address space, giving up\n");
 					pa_indx--;
 					break;
 				}
 				phys_avail[pa_indx++] = pa;	/* start */
 				phys_avail[pa_indx] = pa + PAGE_SIZE;	/* end */
 			}
 			physmem++;
 		}
 	}
 	*pte = 0;
 	invltlb();
 
 	/*
 	 * XXX
 	 * The last chunk must contain at least one page plus the message
 	 * buffer to avoid complicating other code (message buffer address
 	 * calculation, etc.).
 	 */
 	while (phys_avail[pa_indx - 1] + PAGE_SIZE +
 	    round_page(MSGBUF_SIZE) >= phys_avail[pa_indx]) {
 		physmem -= atop(phys_avail[pa_indx] - phys_avail[pa_indx - 1]);
 		phys_avail[pa_indx--] = 0;
 		phys_avail[pa_indx--] = 0;
 	}
 
 	Maxmem = atop(phys_avail[pa_indx]);
 
 	/* Trim off space for the message buffer. */
 	phys_avail[pa_indx] -= round_page(MSGBUF_SIZE);
 
 	avail_end = phys_avail[pa_indx];
 }
 
 void
 init386(first)
 	int first;
 {
 	struct gate_descriptor *gdp;
 	int gsel_tss, metadata_missing, off, x;
 #ifndef SMP
 	/* table descriptors - used to load tables by microp */
 	struct region_descriptor r_gdt, r_idt;
 #endif
 
 	proc0.p_addr = proc0paddr;
 
 	atdevbase = ISA_HOLE_START + KERNBASE;
 
 #ifdef PC98
 	/*
 	 * Initialize DMAC
 	 */
 	pc98_init_dmac();
 #endif
 
 	metadata_missing = 0;
 	if (bootinfo.bi_modulep) {
 		preload_metadata = (caddr_t)bootinfo.bi_modulep + KERNBASE;
 		preload_bootstrap_relocate(KERNBASE);
 	} else {
 		metadata_missing = 1;
 	}
 	if (bootinfo.bi_envp)
 		kern_envp = (caddr_t)bootinfo.bi_envp + KERNBASE;
 
 	/* Init basic tunables, hz etc */
 	init_param();
 
 	/*
 	 * make gdt memory segments, the code segment goes up to end of the
 	 * page with etext in it, the data segment goes to the end of
 	 * the address space
 	 */
 	/*
 	 * XXX text protection is temporarily (?) disabled.  The limit was
 	 * i386_btop(round_page(etext)) - 1.
 	 */
 	gdt_segs[GCODE_SEL].ssd_limit = atop(0 - 1);
 	gdt_segs[GDATA_SEL].ssd_limit = atop(0 - 1);
 #ifdef SMP
 	gdt_segs[GPRIV_SEL].ssd_limit =
 		atop(sizeof(struct privatespace) - 1);
 	gdt_segs[GPRIV_SEL].ssd_base = (int) &SMP_prvspace[0];
 	gdt_segs[GPROC0_SEL].ssd_base =
 		(int) &SMP_prvspace[0].globaldata.gd_common_tss;
 	SMP_prvspace[0].globaldata.gd_prvspace = &SMP_prvspace[0].globaldata;
 #else
 	gdt_segs[GPRIV_SEL].ssd_limit =
 		atop(sizeof(struct globaldata) - 1);
 	gdt_segs[GPRIV_SEL].ssd_base = (int) &__globaldata;
 	gdt_segs[GPROC0_SEL].ssd_base =
 		(int) &__globaldata.gd_common_tss;
 	__globaldata.gd_prvspace = &__globaldata;
 #endif
 
 	for (x = 0; x < NGDT; x++) {
 #ifdef BDE_DEBUGGER
 		/* avoid overwriting db entries with APM ones */
 		if (x >= GAPMCODE32_SEL && x <= GAPMDATA_SEL)
 			continue;
 #endif
 		ssdtosd(&gdt_segs[x], &gdt[x].sd);
 	}
 
 	r_gdt.rd_limit = NGDT * sizeof(gdt[0]) - 1;
 	r_gdt.rd_base =  (int) gdt;
 	lgdt(&r_gdt);
 
 	/* setup curproc so that mutexes work */
 	PCPU_SET(curproc, &proc0);
 	PCPU_SET(spinlocks, NULL);
 
 	LIST_INIT(&proc0.p_contested);
 
 	/*
 	 * Initialize mutexes.
 	 */
 	mtx_init(&Giant, "Giant", MTX_DEF | MTX_RECURSE);
 	mtx_init(&sched_lock, "sched lock", MTX_SPIN | MTX_RECURSE);
 	mtx_init(&proc0.p_mtx, "process lock", MTX_DEF);
 	mtx_init(&clock_lock, "clk", MTX_SPIN | MTX_RECURSE);
 #ifdef SMP
 	mtx_init(&imen_mtx, "imen", MTX_SPIN);
 #endif
 	mtx_lock(&Giant);
 
 	/* make ldt memory segments */
 	/*
 	 * XXX - VM_MAXUSER_ADDRESS is an end address, not a max.  And it
 	 * should be spelled ...MAX_USER...
 	 */
 	ldt_segs[LUCODE_SEL].ssd_limit = atop(VM_MAXUSER_ADDRESS - 1);
 	ldt_segs[LUDATA_SEL].ssd_limit = atop(VM_MAXUSER_ADDRESS - 1);
 	for (x = 0; x < sizeof ldt_segs / sizeof ldt_segs[0]; x++)
 		ssdtosd(&ldt_segs[x], &ldt[x].sd);
 
 	_default_ldt = GSEL(GLDT_SEL, SEL_KPL);
 	lldt(_default_ldt);
 	PCPU_SET(currentldt, _default_ldt);
 
 	/* exceptions */
 	for (x = 0; x < NIDT; x++)
 		setidt(x, &IDTVEC(rsvd), SDT_SYS386TGT, SEL_KPL,
 		    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(0, &IDTVEC(div),  SDT_SYS386TGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(1, &IDTVEC(dbg),  SDT_SYS386IGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(2, &IDTVEC(nmi),  SDT_SYS386TGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
  	setidt(3, &IDTVEC(bpt),  SDT_SYS386IGT, SEL_UPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(4, &IDTVEC(ofl),  SDT_SYS386TGT, SEL_UPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(5, &IDTVEC(bnd),  SDT_SYS386TGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(6, &IDTVEC(ill),  SDT_SYS386TGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(7, &IDTVEC(dna),  SDT_SYS386TGT, SEL_KPL
 	    , GSEL(GCODE_SEL, SEL_KPL));
 	setidt(8, 0,  SDT_SYSTASKGT, SEL_KPL, GSEL(GPANIC_SEL, SEL_KPL));
 	setidt(9, &IDTVEC(fpusegm),  SDT_SYS386TGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(10, &IDTVEC(tss),  SDT_SYS386TGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(11, &IDTVEC(missing),  SDT_SYS386TGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(12, &IDTVEC(stk),  SDT_SYS386TGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(13, &IDTVEC(prot),  SDT_SYS386TGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(14, &IDTVEC(page),  SDT_SYS386IGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(15, &IDTVEC(rsvd),  SDT_SYS386TGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(16, &IDTVEC(fpu),  SDT_SYS386TGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(17, &IDTVEC(align), SDT_SYS386TGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(18, &IDTVEC(mchk),  SDT_SYS386TGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(19, &IDTVEC(xmm), SDT_SYS386TGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
  	setidt(0x80, &IDTVEC(int0x80_syscall), SDT_SYS386TGT, SEL_UPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 
 	r_idt.rd_limit = sizeof(idt0) - 1;
 	r_idt.rd_base = (int) idt;
 	lidt(&r_idt);
 
 	/*
 	 * Initialize the console before we print anything out.
 	 */
 	cninit();
 
 	if (metadata_missing)
 		printf("WARNING: loader(8) metadata is missing!\n");
 
 #ifdef DEV_ISA
 	isa_defaultirq();
 #endif
 
 #ifdef DDB
 	kdb_init();
 	if (boothowto & RB_KDB)
 		Debugger("Boot flags requested debugger");
 #endif
 
 	finishidentcpu();	/* Final stage of CPU initialization */
 	setidt(6, &IDTVEC(ill),  SDT_SYS386TGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(13, &IDTVEC(prot),  SDT_SYS386TGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	initializecpu();	/* Initialize CPU registers */
 
 	/* make an initial tss so cpu can get interrupt stack on syscall! */
 	PCPU_SET(common_tss.tss_esp0,
 	    (int) proc0.p_addr + UPAGES*PAGE_SIZE - 16);
 	PCPU_SET(common_tss.tss_ss0, GSEL(GDATA_SEL, SEL_KPL));
 	gsel_tss = GSEL(GPROC0_SEL, SEL_KPL);
 	private_tss = 0;
 	PCPU_SET(tss_gdt, &gdt[GPROC0_SEL].sd);
 	PCPU_SET(common_tssd, *PCPU_GET(tss_gdt));
 	PCPU_SET(common_tss.tss_ioopt, (sizeof (struct i386tss)) << 16);
 	ltr(gsel_tss);
 
 	dblfault_tss.tss_esp = dblfault_tss.tss_esp0 = dblfault_tss.tss_esp1 =
 	    dblfault_tss.tss_esp2 = (int)&dblfault_stack[sizeof(dblfault_stack)];
 	dblfault_tss.tss_ss = dblfault_tss.tss_ss0 = dblfault_tss.tss_ss1 =
 	    dblfault_tss.tss_ss2 = GSEL(GDATA_SEL, SEL_KPL);
 	dblfault_tss.tss_cr3 = (int)IdlePTD;
 	dblfault_tss.tss_eip = (int)dblfault_handler;
 	dblfault_tss.tss_eflags = PSL_KERNEL;
 	dblfault_tss.tss_ds = dblfault_tss.tss_es =
 	    dblfault_tss.tss_gs = GSEL(GDATA_SEL, SEL_KPL);
 	dblfault_tss.tss_fs = GSEL(GPRIV_SEL, SEL_KPL);
 	dblfault_tss.tss_cs = GSEL(GCODE_SEL, SEL_KPL);
 	dblfault_tss.tss_ldt = GSEL(GLDT_SEL, SEL_KPL);
 
 	vm86_initialize();
 	getmemsize(first);
 
 	/* now running on new page tables, configured,and u/iom is accessible */
 
 	/* Map the message buffer. */
 	for (off = 0; off < round_page(MSGBUF_SIZE); off += PAGE_SIZE)
 		pmap_kenter((vm_offset_t)msgbufp + off, avail_end + off);
 
 	msgbufinit(msgbufp, MSGBUF_SIZE);
 
 	/* make a call gate to reenter kernel with */
 	gdp = &ldt[LSYS5CALLS_SEL].gd;
 
 	x = (int) &IDTVEC(lcall_syscall);
 	gdp->gd_looffset = x;
 	gdp->gd_selector = GSEL(GCODE_SEL,SEL_KPL);
 	gdp->gd_stkcpy = 1;
 	gdp->gd_type = SDT_SYS386CGT;
 	gdp->gd_dpl = SEL_UPL;
 	gdp->gd_p = 1;
 	gdp->gd_hioffset = x >> 16;
 
 	/* XXX does this work? */
 	ldt[LBSDICALLS_SEL] = ldt[LSYS5CALLS_SEL];
 	ldt[LSOL26CALLS_SEL] = ldt[LSYS5CALLS_SEL];
 
 	/* transfer to user mode */
 
 	_ucodesel = LSEL(LUCODE_SEL, SEL_UPL);
 	_udatasel = LSEL(LUDATA_SEL, SEL_UPL);
 
 	/* setup proc 0's pcb */
 	proc0.p_addr->u_pcb.pcb_flags = 0;
 	proc0.p_addr->u_pcb.pcb_cr3 = (int)IdlePTD;
 	proc0.p_addr->u_pcb.pcb_ext = 0;
 	proc0.p_frame = &proc0_tf;
 }
 
 #if defined(I586_CPU) && !defined(NO_F00F_HACK)
 static void f00f_hack(void *unused);
 SYSINIT(f00f_hack, SI_SUB_INTRINSIC, SI_ORDER_FIRST, f00f_hack, NULL);
 
 static void
 f00f_hack(void *unused) {
 	struct gate_descriptor *new_idt;
 #ifndef SMP
 	struct region_descriptor r_idt;
 #endif
 	vm_offset_t tmp;
 
 	if (!has_f00f_bug)
 		return;
 
 	GIANT_REQUIRED;
 
 	printf("Intel Pentium detected, installing workaround for F00F bug\n");
 
 	r_idt.rd_limit = sizeof(idt0) - 1;
 
 	tmp = kmem_alloc(kernel_map, PAGE_SIZE * 2);
 	if (tmp == 0)
 		panic("kmem_alloc returned 0");
 	if (((unsigned int)tmp & (PAGE_SIZE-1)) != 0)
 		panic("kmem_alloc returned non-page-aligned memory");
 	/* Put the first seven entries in the lower page */
 	new_idt = (struct gate_descriptor*)(tmp + PAGE_SIZE - (7*8));
 	bcopy(idt, new_idt, sizeof(idt0));
 	r_idt.rd_base = (int)new_idt;
 	lidt(&r_idt);
 	idt = new_idt;
 	if (vm_map_protect(kernel_map, tmp, tmp + PAGE_SIZE,
 			   VM_PROT_READ, FALSE) != KERN_SUCCESS)
 		panic("vm_map_protect failed");
 	return;
 }
 #endif /* defined(I586_CPU) && !NO_F00F_HACK */
 
 int
 ptrace_set_pc(p, addr)
 	struct proc *p;
 	unsigned long addr;
 {
 	p->p_frame->tf_eip = addr;
 	return (0);
 }
 
 int
 ptrace_single_step(p)
 	struct proc *p;
 {
 	p->p_frame->tf_eflags |= PSL_T;
 	return (0);
 }
 
 int
 fill_regs(p, regs)
 	struct proc *p;
 	struct reg *regs;
 {
 	struct pcb *pcb;
 	struct trapframe *tp;
 
 	tp = p->p_frame;
 	regs->r_fs = tp->tf_fs;
 	regs->r_es = tp->tf_es;
 	regs->r_ds = tp->tf_ds;
 	regs->r_edi = tp->tf_edi;
 	regs->r_esi = tp->tf_esi;
 	regs->r_ebp = tp->tf_ebp;
 	regs->r_ebx = tp->tf_ebx;
 	regs->r_edx = tp->tf_edx;
 	regs->r_ecx = tp->tf_ecx;
 	regs->r_eax = tp->tf_eax;
 	regs->r_eip = tp->tf_eip;
 	regs->r_cs = tp->tf_cs;
 	regs->r_eflags = tp->tf_eflags;
 	regs->r_esp = tp->tf_esp;
 	regs->r_ss = tp->tf_ss;
 	pcb = &p->p_addr->u_pcb;
 	regs->r_gs = pcb->pcb_gs;
 	return (0);
 }
 
 int
 set_regs(p, regs)
 	struct proc *p;
 	struct reg *regs;
 {
 	struct pcb *pcb;
 	struct trapframe *tp;
 
 	tp = p->p_frame;
 	if (!EFL_SECURE(regs->r_eflags, tp->tf_eflags) ||
 	    !CS_SECURE(regs->r_cs))
 		return (EINVAL);
 	tp->tf_fs = regs->r_fs;
 	tp->tf_es = regs->r_es;
 	tp->tf_ds = regs->r_ds;
 	tp->tf_edi = regs->r_edi;
 	tp->tf_esi = regs->r_esi;
 	tp->tf_ebp = regs->r_ebp;
 	tp->tf_ebx = regs->r_ebx;
 	tp->tf_edx = regs->r_edx;
 	tp->tf_ecx = regs->r_ecx;
 	tp->tf_eax = regs->r_eax;
 	tp->tf_eip = regs->r_eip;
 	tp->tf_cs = regs->r_cs;
 	tp->tf_eflags = regs->r_eflags;
 	tp->tf_esp = regs->r_esp;
 	tp->tf_ss = regs->r_ss;
 	pcb = &p->p_addr->u_pcb;
 	pcb->pcb_gs = regs->r_gs;
 	return (0);
 }
 
 #ifdef CPU_ENABLE_SSE
 static void
 fill_fpregs_xmm(sv_xmm, sv_87)
 	struct savexmm *sv_xmm;
 	struct save87 *sv_87;
 {
 	register struct env87 *penv_87 = &sv_87->sv_env;
 	register struct envxmm *penv_xmm = &sv_xmm->sv_env;
 	int i;
 
 	/* FPU control/status */
 	penv_87->en_cw = penv_xmm->en_cw;
 	penv_87->en_sw = penv_xmm->en_sw;
 	penv_87->en_tw = penv_xmm->en_tw;
 	penv_87->en_fip = penv_xmm->en_fip;
 	penv_87->en_fcs = penv_xmm->en_fcs;
 	penv_87->en_opcode = penv_xmm->en_opcode;
 	penv_87->en_foo = penv_xmm->en_foo;
 	penv_87->en_fos = penv_xmm->en_fos;
 
 	/* FPU registers */
 	for (i = 0; i < 8; ++i)
 		sv_87->sv_ac[i] = sv_xmm->sv_fp[i].fp_acc;
 
 	sv_87->sv_ex_sw = sv_xmm->sv_ex_sw;
 }
 
 static void
 set_fpregs_xmm(sv_87, sv_xmm)
 	struct save87 *sv_87;
 	struct savexmm *sv_xmm;
 {
 	register struct env87 *penv_87 = &sv_87->sv_env;
 	register struct envxmm *penv_xmm = &sv_xmm->sv_env;
 	int i;
 
 	/* FPU control/status */
 	penv_xmm->en_cw = penv_87->en_cw;
 	penv_xmm->en_sw = penv_87->en_sw;
 	penv_xmm->en_tw = penv_87->en_tw;
 	penv_xmm->en_fip = penv_87->en_fip;
 	penv_xmm->en_fcs = penv_87->en_fcs;
 	penv_xmm->en_opcode = penv_87->en_opcode;
 	penv_xmm->en_foo = penv_87->en_foo;
 	penv_xmm->en_fos = penv_87->en_fos;
 
 	/* FPU registers */
 	for (i = 0; i < 8; ++i)
 		sv_xmm->sv_fp[i].fp_acc = sv_87->sv_ac[i];
 
 	sv_xmm->sv_ex_sw = sv_87->sv_ex_sw;
 }
 #endif /* CPU_ENABLE_SSE */
 
 int
 fill_fpregs(p, fpregs)
 	struct proc *p;
 	struct fpreg *fpregs;
 {
 #ifdef CPU_ENABLE_SSE
 	if (cpu_fxsr) {
 		fill_fpregs_xmm(&p->p_addr->u_pcb.pcb_save.sv_xmm,
 						(struct save87 *)fpregs);
 		return (0);
 	}
 #endif /* CPU_ENABLE_SSE */
 	bcopy(&p->p_addr->u_pcb.pcb_save.sv_87, fpregs, sizeof *fpregs);
 	return (0);
 }
 
 int
 set_fpregs(p, fpregs)
 	struct proc *p;
 	struct fpreg *fpregs;
 {
 #ifdef CPU_ENABLE_SSE
 	if (cpu_fxsr) {
 		set_fpregs_xmm((struct save87 *)fpregs,
 					   &p->p_addr->u_pcb.pcb_save.sv_xmm);
 		return (0);
 	}
 #endif /* CPU_ENABLE_SSE */
 	bcopy(fpregs, &p->p_addr->u_pcb.pcb_save.sv_87, sizeof *fpregs);
 	return (0);
 }
 
 int
 fill_dbregs(p, dbregs)
 	struct proc *p;
 	struct dbreg *dbregs;
 {
 	struct pcb *pcb;
 
 	if (p == NULL) {
 		dbregs->dr0 = rdr0();
 		dbregs->dr1 = rdr1();
 		dbregs->dr2 = rdr2();
 		dbregs->dr3 = rdr3();
 		dbregs->dr4 = rdr4();
 		dbregs->dr5 = rdr5();
 		dbregs->dr6 = rdr6();
 		dbregs->dr7 = rdr7();
 	}
 	else {
 		pcb = &p->p_addr->u_pcb;
 		dbregs->dr0 = pcb->pcb_dr0;
 		dbregs->dr1 = pcb->pcb_dr1;
 		dbregs->dr2 = pcb->pcb_dr2;
 		dbregs->dr3 = pcb->pcb_dr3;
 		dbregs->dr4 = 0;
 		dbregs->dr5 = 0;
 		dbregs->dr6 = pcb->pcb_dr6;
 		dbregs->dr7 = pcb->pcb_dr7;
 	}
 	return (0);
 }
 
 int
 set_dbregs(p, dbregs)
 	struct proc *p;
 	struct dbreg *dbregs;
 {
 	struct pcb *pcb;
 	int i;
 	u_int32_t mask1, mask2;
 
 	if (p == NULL) {
 		load_dr0(dbregs->dr0);
 		load_dr1(dbregs->dr1);
 		load_dr2(dbregs->dr2);
 		load_dr3(dbregs->dr3);
 		load_dr4(dbregs->dr4);
 		load_dr5(dbregs->dr5);
 		load_dr6(dbregs->dr6);
 		load_dr7(dbregs->dr7);
 	}
 	else {
 		/*
 		 * Don't let an illegal value for dr7 get set.	Specifically,
 		 * check for undefined settings.  Setting these bit patterns
 		 * result in undefined behaviour and can lead to an unexpected
 		 * TRCTRAP.
 		 */
 		for (i = 0, mask1 = 0x3<<16, mask2 = 0x2<<16; i < 8; 
 		     i++, mask1 <<= 2, mask2 <<= 2)
 			if ((dbregs->dr7 & mask1) == mask2)
 				return (EINVAL);
 		
 		pcb = &p->p_addr->u_pcb;
 		
 		/*
 		 * Don't let a process set a breakpoint that is not within the
 		 * process's address space.  If a process could do this, it
 		 * could halt the system by setting a breakpoint in the kernel
 		 * (if ddb was enabled).  Thus, we need to check to make sure
 		 * that no breakpoints are being enabled for addresses outside
 		 * process's address space, unless, perhaps, we were called by
 		 * uid 0.
 		 *
 		 * XXX - what about when the watched area of the user's
 		 * address space is written into from within the kernel
 		 * ... wouldn't that still cause a breakpoint to be generated
 		 * from within kernel mode?
 		 */
 
 		if (suser(p) != 0) {
 			if (dbregs->dr7 & 0x3) {
 				/* dr0 is enabled */
 				if (dbregs->dr0 >= VM_MAXUSER_ADDRESS)
 					return (EINVAL);
 			}
 			
 			if (dbregs->dr7 & (0x3<<2)) {
 				/* dr1 is enabled */
 				if (dbregs->dr1 >= VM_MAXUSER_ADDRESS)
 					return (EINVAL);
 			}
 			
 			if (dbregs->dr7 & (0x3<<4)) {
 				/* dr2 is enabled */
 				if (dbregs->dr2 >= VM_MAXUSER_ADDRESS)
 					return (EINVAL);
 			}
 			
 			if (dbregs->dr7 & (0x3<<6)) {
 				/* dr3 is enabled */
 				if (dbregs->dr3 >= VM_MAXUSER_ADDRESS)
 					return (EINVAL);
 			}
 		}
 
 		pcb->pcb_dr0 = dbregs->dr0;
 		pcb->pcb_dr1 = dbregs->dr1;
 		pcb->pcb_dr2 = dbregs->dr2;
 		pcb->pcb_dr3 = dbregs->dr3;
 		pcb->pcb_dr6 = dbregs->dr6;
 		pcb->pcb_dr7 = dbregs->dr7;
 
 		pcb->pcb_flags |= PCB_DBREGS;
 	}
 
 	return (0);
 }
 
 /*
  * Return > 0 if a hardware breakpoint has been hit, and the
  * breakpoint was in user space.  Return 0, otherwise.
  */
 int
 user_dbreg_trap(void)
 {
         u_int32_t dr7, dr6; /* debug registers dr6 and dr7 */
         u_int32_t bp;       /* breakpoint bits extracted from dr6 */
         int nbp;            /* number of breakpoints that triggered */
         caddr_t addr[4];    /* breakpoint addresses */
         int i;
         
         dr7 = rdr7();
         if ((dr7 & 0x000000ff) == 0) {
                 /*
                  * all GE and LE bits in the dr7 register are zero,
                  * thus the trap couldn't have been caused by the
                  * hardware debug registers
                  */
                 return 0;
         }
 
         nbp = 0;
         dr6 = rdr6();
         bp = dr6 & 0x0000000f;
 
         if (!bp) {
                 /*
                  * None of the breakpoint bits are set meaning this
                  * trap was not caused by any of the debug registers
                  */
                 return 0;
         }
 
         /*
          * at least one of the breakpoints were hit, check to see
          * which ones and if any of them are user space addresses
          */
 
         if (bp & 0x01) {
                 addr[nbp++] = (caddr_t)rdr0();
         }
         if (bp & 0x02) {
                 addr[nbp++] = (caddr_t)rdr1();
         }
         if (bp & 0x04) {
                 addr[nbp++] = (caddr_t)rdr2();
         }
         if (bp & 0x08) {
                 addr[nbp++] = (caddr_t)rdr3();
         }
 
         for (i=0; i<nbp; i++) {
                 if (addr[i] <
                     (caddr_t)VM_MAXUSER_ADDRESS) {
                         /*
                          * addr[i] is in user space
                          */
                         return nbp;
                 }
         }
 
         /*
          * None of the breakpoints are in user space.
          */
         return 0;
 }
 
 
 #ifndef DDB
 void
 Debugger(const char *msg)
 {
 	printf("Debugger(\"%s\") called.\n", msg);
 }
 #endif /* no DDB */
 
 #include <sys/disklabel.h>
 
 /*
  * Determine the size of the transfer, and make sure it is
  * within the boundaries of the partition. Adjust transfer
  * if needed, and signal errors or early completion.
  */
 int
 bounds_check_with_label(struct bio *bp, struct disklabel *lp, int wlabel)
 {
         struct partition *p = lp->d_partitions + dkpart(bp->bio_dev);
         int labelsect = lp->d_partitions[0].p_offset;
         int maxsz = p->p_size,
                 sz = (bp->bio_bcount + DEV_BSIZE - 1) >> DEV_BSHIFT;
 
         /* overwriting disk label ? */
         /* XXX should also protect bootstrap in first 8K */
         if (bp->bio_blkno + p->p_offset <= LABELSECTOR + labelsect &&
 #if LABELSECTOR != 0
             bp->bio_blkno + p->p_offset + sz > LABELSECTOR + labelsect &&
 #endif
             (bp->bio_cmd == BIO_WRITE) && wlabel == 0) {
                 bp->bio_error = EROFS;
                 goto bad;
         }
 
 #if     defined(DOSBBSECTOR) && defined(notyet)
         /* overwriting master boot record? */
         if (bp->bio_blkno + p->p_offset <= DOSBBSECTOR &&
             (bp->bio_cmd == BIO_WRITE) && wlabel == 0) {
                 bp->bio_error = EROFS;
                 goto bad;
         }
 #endif
 
         /* beyond partition? */
         if (bp->bio_blkno < 0 || bp->bio_blkno + sz > maxsz) {
                 /* if exactly at end of disk, return an EOF */
                 if (bp->bio_blkno == maxsz) {
                         bp->bio_resid = bp->bio_bcount;
                         return(0);
                 }
                 /* or truncate if part of it fits */
                 sz = maxsz - bp->bio_blkno;
                 if (sz <= 0) {
                         bp->bio_error = EINVAL;
                         goto bad;
                 }
                 bp->bio_bcount = sz << DEV_BSHIFT;
         }
 
         bp->bio_pblkno = bp->bio_blkno + p->p_offset;
         return(1);
 
 bad:
         bp->bio_flags |= BIO_ERROR;
         return(-1);
 }
 
 #ifdef DDB
 
 /*
  * Provide inb() and outb() as functions.  They are normally only
  * available as macros calling inlined functions, thus cannot be
  * called inside DDB.
  *
  * The actual code is stolen from <machine/cpufunc.h>, and de-inlined.
  */
 
 #undef inb
 #undef outb
 
 /* silence compiler warnings */
 u_char inb(u_int);
 void outb(u_int, u_char);
 
 u_char
 inb(u_int port)
 {
 	u_char	data;
 	/*
 	 * We use %%dx and not %1 here because i/o is done at %dx and not at
 	 * %edx, while gcc generates inferior code (movw instead of movl)
 	 * if we tell it to load (u_short) port.
 	 */
 	__asm __volatile("inb %%dx,%0" : "=a" (data) : "d" (port));
 	return (data);
 }
 
 void
 outb(u_int port, u_char data)
 {
 	u_char	al;
 	/*
 	 * Use an unnecessary assignment to help gcc's register allocator.
 	 * This make a large difference for gcc-1.40 and a tiny difference
 	 * for gcc-2.6.0.  For gcc-1.40, al had to be ``asm("ax")'' for
 	 * best results.  gcc-2.6.0 can't handle this.
 	 */
 	al = data;
 	__asm __volatile("outb %0,%%dx" : : "a" (al), "d" (port));
 }
 
 #endif /* DDB */