Index: head/sys/alpha/alpha/machdep.c
===================================================================
--- head/sys/alpha/alpha/machdep.c	(revision 71983)
+++ head/sys/alpha/alpha/machdep.c	(revision 71984)
@@ -1,2255 +1,2256 @@
 /*-
  * Copyright (c) 1998 Doug Rabson
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 /*-
  * Copyright (c) 1998 The NetBSD Foundation, Inc.
  * All rights reserved.
  *
  * This code is derived from software contributed to The NetBSD Foundation
  * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
  * NASA Ames Research Center and by Chris G. Demetriou.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the NetBSD
  *	Foundation, Inc. and its contributors.
  * 4. Neither the name of The NetBSD Foundation nor the names of its
  *    contributors may be used to endorse or promote products derived
  *    from this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  * POSSIBILITY OF SUCH DAMAGE.
  */
 /*
  * Copyright (c) 1994, 1995, 1996 Carnegie-Mellon University.
  * All rights reserved.
  *
  * Author: Chris G. Demetriou
  *
  * Permission to use, copy, modify and distribute this software and
  * its documentation is hereby granted, provided that both the copyright
  * notice and this permission notice appear in all copies of the
  * software, derivative works or modified versions, and any portions
  * thereof, and that both notices appear in supporting documentation.
  *
  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
  *
  * Carnegie Mellon requests users of this software to return to
  *
  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
  *  School of Computer Science
  *  Carnegie Mellon University
  *  Pittsburgh PA 15213-3890
  *
  * any improvements or extensions that they make and grant Carnegie the
  * rights to redistribute these changes.
  */
 
 #include "opt_compat.h"
 #include "opt_ddb.h"
 #include "opt_simos.h"
 #include "opt_msgbuf.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/eventhandler.h>
 #include <sys/sysproto.h>
 #include <sys/mutex.h>
 #include <sys/ktr.h>
 #include <sys/signalvar.h>
 #include <sys/kernel.h>
 #include <sys/proc.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/reboot.h>
 #include <sys/bio.h>
 #include <sys/buf.h>
 #include <sys/bus.h>
 #include <sys/mbuf.h>
 #include <sys/vmmeter.h>
 #include <sys/msgbuf.h>
 #include <sys/exec.h>
 #include <sys/sysctl.h>
 #include <sys/uio.h>
 #include <sys/linker.h>
 #include <net/netisr.h>
 #include <vm/vm.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_page.h>
 #include <vm/vm_map.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_object.h>
 #include <vm/vm_pager.h>
 #include <sys/user.h>
 #include <sys/ptrace.h>
 #include <machine/clock.h>
 #include <machine/md_var.h>
 #include <machine/reg.h>
 #include <machine/fpu.h>
 #include <machine/pal.h>
 #include <machine/smp.h>
 #include <machine/globaldata.h>
 #include <machine/cpuconf.h>
 #include <machine/bootinfo.h>
 #include <machine/rpb.h>
 #include <machine/prom.h>
 #include <machine/chipset.h>
 #include <machine/vmparam.h>
 #include <machine/elf.h>
 #include <ddb/ddb.h>
 #include <alpha/alpha/db_instruction.h>
 #include <sys/vnode.h>
 #include <miscfs/procfs/procfs.h>
 #include <machine/sigframe.h>
 
 u_int64_t cycles_per_usec;
 u_int32_t cycles_per_sec;
 int cold = 1;
 struct platform platform;
 alpha_chipset_t chipset;
 struct bootinfo_kernel bootinfo;
 
 struct cpuhead cpuhead;
 
 struct mtx sched_lock;
 struct mtx Giant;
 
 struct	user *proc0paddr;
 
 char machine[] = "alpha";
 SYSCTL_STRING(_hw, HW_MACHINE, machine, CTLFLAG_RD, machine, 0, "");
 
 static char cpu_model[128];
 SYSCTL_STRING(_hw, HW_MODEL, model, CTLFLAG_RD, cpu_model, 0, "");
 
 #ifdef DDB
 /* start and end of kernel symbol table */
 void	*ksym_start, *ksym_end;
 #endif
 
 int	alpha_unaligned_print = 1;	/* warn about unaligned accesses */
 int	alpha_unaligned_fix = 1;	/* fix up unaligned accesses */
 int	alpha_unaligned_sigbus = 0;	/* don't SIGBUS on fixed-up accesses */
 
 SYSCTL_INT(_machdep, CPU_UNALIGNED_PRINT, unaligned_print,
 	CTLFLAG_RW, &alpha_unaligned_print, 0, "");
 
 SYSCTL_INT(_machdep, CPU_UNALIGNED_FIX, unaligned_fix,
 	CTLFLAG_RW, &alpha_unaligned_fix, 0, "");
 
 SYSCTL_INT(_machdep, CPU_UNALIGNED_SIGBUS, unaligned_sigbus,
 	CTLFLAG_RW, &alpha_unaligned_sigbus, 0, "");
 
 static void cpu_startup __P((void *));
 SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL)
 
 struct msgbuf *msgbufp=0;
 
 int bootverbose = 0, Maxmem = 0;
 long dumplo;
 
 int	totalphysmem;		/* total amount of physical memory in system */
 int	physmem;		/* physical memory used by NetBSD + some rsvd */
 int	resvmem;		/* amount of memory reserved for PROM */
 int	unusedmem;		/* amount of memory for OS that we don't use */
 int	unknownmem;		/* amount of memory with an unknown use */
 int	ncpus;			/* number of cpus */
 
 vm_offset_t phys_avail[10];
 
 static int
 sysctl_hw_physmem(SYSCTL_HANDLER_ARGS)
 {
 	int error = sysctl_handle_int(oidp, 0, alpha_ptob(physmem), req);
 	return (error);
 }
 
 SYSCTL_PROC(_hw, HW_PHYSMEM, physmem, CTLTYPE_INT|CTLFLAG_RD,
 	0, 0, sysctl_hw_physmem, "I", "");
 
 static int
 sysctl_hw_usermem(SYSCTL_HANDLER_ARGS)
 {
 	int error = sysctl_handle_int(oidp, 0,
 		alpha_ptob(physmem - cnt.v_wire_count), req);
 	return (error);
 }
 
 SYSCTL_PROC(_hw, HW_USERMEM, usermem, CTLTYPE_INT|CTLFLAG_RD,
 	0, 0, sysctl_hw_usermem, "I", "");
 
 SYSCTL_INT(_hw, OID_AUTO, availpages, CTLFLAG_RD, &physmem, 0, "");
 
 /* must be 2 less so 0 0 can signal end of chunks */
 #define PHYS_AVAIL_ARRAY_END ((sizeof(phys_avail) / sizeof(vm_offset_t)) - 2)
 
 void osendsig(sig_t catcher, int sig, sigset_t *mask, u_long code);
 
 static void identifycpu __P((void));
 
 static vm_offset_t buffer_sva, buffer_eva;
 vm_offset_t clean_sva, clean_eva;
 static vm_offset_t pager_sva, pager_eva;
 
 /*
  * Hooked into the shutdown chain; if the system is to be halted,
  * unconditionally drop back to the SRM console.
  */
 static void
 alpha_srm_shutdown(void *junk, int howto)
 {
 	if (howto & RB_HALT)
 		alpha_pal_halt();
 }
 
 static void
 cpu_startup(dummy)
 	void *dummy;
 {
 	register unsigned i;
 	register caddr_t v;
 	vm_offset_t maxaddr;
 	vm_size_t size = 0;
 	vm_offset_t firstaddr;
 	vm_offset_t minaddr;
 
 	if (boothowto & RB_VERBOSE)
 		bootverbose++;
 
 	/*
 	 * Good {morning,afternoon,evening,night}.
 	 */
 	printf("%s", version);
 	identifycpu();
 
 	/* startrtclock(); */
 #ifdef PERFMON
 	perfmon_init();
 #endif
 	printf("real memory  = %ld (%ldK bytes)\n", alpha_ptob(Maxmem), alpha_ptob(Maxmem) / 1024);
 
 	/*
 	 * Display any holes after the first chunk of extended memory.
 	 */
 	if (bootverbose) {
 		int indx;
 
 		printf("Physical memory chunk(s):\n");
 		for (indx = 0; phys_avail[indx + 1] != 0; indx += 2) {
 			int size1 = phys_avail[indx + 1] - phys_avail[indx];
 
 			printf("0x%08lx - 0x%08lx, %d bytes (%d pages)\n", phys_avail[indx],
 			    phys_avail[indx + 1] - 1, size1, size1 / PAGE_SIZE);
 		}
 	}
 
 	/*
 	 * Calculate callout wheel size
 	 */
 	for (callwheelsize = 1, callwheelbits = 0;
 	     callwheelsize < ncallout;
 	     callwheelsize <<= 1, ++callwheelbits)
 		;
 	callwheelmask = callwheelsize - 1;
 
 	/*
 	 * Allocate space for system data structures.
 	 * The first available kernel virtual address is in "v".
 	 * As pages of kernel virtual memory are allocated, "v" is incremented.
 	 * As pages of memory are allocated and cleared,
 	 * "firstaddr" is incremented.
 	 * An index into the kernel page table corresponding to the
 	 * virtual memory address maintained in "v" is kept in "mapaddr".
 	 */
 
 	/*
 	 * Make two passes.  The first pass calculates how much memory is
 	 * needed and allocates it.  The second pass assigns virtual
 	 * addresses to the various data structures.
 	 */
 	firstaddr = 0;
 again:
 	v = (caddr_t)firstaddr;
 
 #define	valloc(name, type, num) \
 	    (name) = (type *)v; v = (caddr_t)((name)+(num))
 #define	valloclim(name, type, num, lim) \
 	    (name) = (type *)v; v = (caddr_t)((lim) = ((name)+(num)))
 
 	valloc(callout, struct callout, ncallout);
 	valloc(callwheel, struct callout_tailq, callwheelsize);
 
 	/*
 	 * The nominal buffer size (and minimum KVA allocation) is BKVASIZE.
 	 * For the first 64MB of ram nominally allocate sufficient buffers to
 	 * cover 1/4 of our ram.  Beyond the first 64MB allocate additional
 	 * buffers to cover 1/20 of our ram over 64MB.
 	 */
 
 	if (nbuf == 0) {
 		int factor = 4 * BKVASIZE / PAGE_SIZE;
 
 		nbuf = 50;
 		if (physmem > 1024)
 			nbuf += min((physmem - 1024) / factor, 16384 / factor);
 		if (physmem > 16384)
 			nbuf += (physmem - 16384) * 2 / (factor * 5);
 	}
 	nswbuf = max(min(nbuf/4, 64), 16);
 
 	valloc(swbuf, struct buf, nswbuf);
 	valloc(buf, struct buf, nbuf);
 	v = bufhashinit(v);
 
 	/*
 	 * End of first pass, size has been calculated so allocate memory
 	 */
 	if (firstaddr == 0) {
 		size = (vm_size_t)(v - firstaddr);
 		firstaddr = (vm_offset_t)kmem_alloc(kernel_map, round_page(size));
 		if (firstaddr == 0)
 			panic("startup: no room for tables");
 		goto again;
 	}
 
 	/*
 	 * End of second pass, addresses have been assigned
 	 */
 	if ((vm_size_t)(v - firstaddr) != size)
 		panic("startup: table size inconsistency");
 
 	clean_map = kmem_suballoc(kernel_map, &clean_sva, &clean_eva,
 			(nbuf*BKVASIZE) + (nswbuf*MAXPHYS) + pager_map_size);
 	buffer_map = kmem_suballoc(clean_map, &buffer_sva, &buffer_eva,
 				(nbuf*BKVASIZE));
+	buffer_map->system_map = 1;
 	pager_map = kmem_suballoc(clean_map, &pager_sva, &pager_eva,
 				(nswbuf*MAXPHYS) + pager_map_size);
 	pager_map->system_map = 1;
 	exec_map = kmem_suballoc(kernel_map, &minaddr, &maxaddr,
 				(16*(ARG_MAX+(PAGE_SIZE*3))));
 
 	/*
 	 * XXX: Mbuf system machine-specific initializations should
 	 *      go here, if anywhere.
 	 */
 
 	/*
 	 * Initialize callouts
 	 */
 	SLIST_INIT(&callfree);
 	for (i = 0; i < ncallout; i++) {
 		callout_init(&callout[i], 0);
 		callout[i].c_flags = CALLOUT_LOCAL_ALLOC;
 		SLIST_INSERT_HEAD(&callfree, &callout[i], c_links.sle);
 	}
 
 	for (i = 0; i < callwheelsize; i++) {
 		TAILQ_INIT(&callwheel[i]);
 	}
 
 	mtx_init(&callout_lock, "callout", MTX_SPIN | MTX_RECURSE);
 
 #if defined(USERCONFIG)
 #if defined(USERCONFIG_BOOT)
 	if (1)
 #else
         if (boothowto & RB_CONFIG)
 #endif
 	{
 		userconfig();
 		cninit();	/* the preferred console may have changed */
 	}
 #endif
 
 	printf("avail memory = %ld (%ldK bytes)\n", ptoa(cnt.v_free_count),
 	    ptoa(cnt.v_free_count) / 1024);
 
 	/*
 	 * Set up buffers, so they can be used to read disk labels.
 	 */
 	bufinit();
 	vm_pager_bufferinit();
 	EVENTHANDLER_REGISTER(shutdown_final, alpha_srm_shutdown, 0,
 			      SHUTDOWN_PRI_LAST);
 
 #ifdef SMP
 	/*
 	 * OK, enough kmem_alloc/malloc state should be up, lets get on with it!
 	 */
 	mp_start();			/* fire up the secondaries */
 	mp_announce();
 #endif  /* SMP */
 }
 
 /*
  * Retrieve the platform name from the DSR.
  */
 const char *
 alpha_dsr_sysname()
 {
 	struct dsrdb *dsr;
 	const char *sysname;
 
 	/*
 	 * DSR does not exist on early HWRPB versions.
 	 */
 	if (hwrpb->rpb_version < HWRPB_DSRDB_MINVERS)
 		return (NULL);
 
 	dsr = (struct dsrdb *)(((caddr_t)hwrpb) + hwrpb->rpb_dsrdb_off);
 	sysname = (const char *)((caddr_t)dsr + (dsr->dsr_sysname_off +
 	    sizeof(u_int64_t)));
 	return (sysname);
 }
 
 /*
  * Lookup the system specified system variation in the provided table,
  * returning the model string on match.
  */
 const char *
 alpha_variation_name(u_int64_t variation,
 		     const struct alpha_variation_table *avtp)
 {
 	int i;
 
 	for (i = 0; avtp[i].avt_model != NULL; i++)
 		if (avtp[i].avt_variation == variation)
 			return (avtp[i].avt_model);
 	return (NULL);
 }
 
 /*
  * Generate a default platform name based for unknown system variations.
  */
 const char *
 alpha_unknown_sysname()
 {
 	static char s[128];		/* safe size */
 
 	snprintf(s, sizeof(s), "%s family, unknown model variation 0x%lx",
 	    platform.family, hwrpb->rpb_variation & SV_ST_MASK);
 	return ((const char *)s);
 }
 
 static void
 identifycpu(void)
 {
 	u_int64_t type, major, minor;
 	u_int64_t amask;
 	struct pcs *pcsp;
 	char *cpuname[] = {
 		"unknown",		/* 0 */
 		"EV3",			/* 1 */
 		"EV4 (21064)",		/* 2 */
 		"Simulation",		/* 3 */
 		"LCA Family",		/* 4 */
 		"EV5 (21164)",		/* 5 */
 		"EV45 (21064A)",	/* 6 */
 		"EV56 (21164A)",	/* 7 */
 		"EV6 (21264)",		/* 8 */
 		"PCA56 (21164PC)"	/* 9 */
 	};
 
 	/*
 	 * print out CPU identification information.
 	 */
 	printf("%s\n%s, %ldMHz\n", platform.family, platform.model,
 	    hwrpb->rpb_cc_freq / 1000000);	/* XXX true for 21164? */
 	printf("%ld byte page size, %d processor%s.\n",
 	    hwrpb->rpb_page_size, ncpus, ncpus == 1 ? "" : "s");
 #if 0
 	/* this isn't defined for any systems that we run on? */
 	printf("serial number 0x%lx 0x%lx\n",
 	    ((long *)hwrpb->rpb_ssn)[0], ((long *)hwrpb->rpb_ssn)[1]);
 
 	/* and these aren't particularly useful! */
 	printf("variation: 0x%lx, revision 0x%lx\n",
 	    hwrpb->rpb_variation, *(long *)hwrpb->rpb_revision);
 #endif
  	pcsp = LOCATE_PCS(hwrpb, hwrpb->rpb_primary_cpu_id);
 	/* cpu type */
 	type = pcsp->pcs_proc_type;
 	major = (type & PCS_PROC_MAJOR) >> PCS_PROC_MAJORSHIFT;
 	minor = (type & PCS_PROC_MINOR) >> PCS_PROC_MINORSHIFT;
 	if (major < sizeof(cpuname)/sizeof(char *))
 		printf("CPU: %s major=%lu minor=%lu",
 			cpuname[major], major, minor);
 	else
 		printf("CPU: major=%lu minor=%lu\n", major, minor);
 	/* amask */
 	if (major >= PCS_PROC_EV56) {
 		amask = 0xffffffff; /* 32 bit for printf */
 		amask = (~alpha_amask(amask)) & amask;
 		printf(" extensions=0x%b\n", (u_int32_t) amask,
 			"\020"
 			"\001BWX"
 			"\002FIX"
 			"\003CIX"
 			"\011MVI"
 			"\012PRECISE"
 		);
 	} else
 		printf("\n");	
 	/* PAL code */
 	printf("OSF PAL rev: 0x%lx\n", pcsp->pcs_palrevisions[PALvar_OSF1]);
 }
 
 extern char kernel_text[], _end[];
 
 void
 alpha_init(pfn, ptb, bim, bip, biv)
 	u_long pfn;		/* first free PFN number */
 	u_long ptb;		/* PFN of current level 1 page table */
 	u_long bim;		/* bootinfo magic */
 	u_long bip;		/* bootinfo pointer */
 	u_long biv;		/* bootinfo version */
 {
 	int phys_avail_cnt;
 	char *bootinfo_msg, *bootinfo_booted_kernel;
 	vm_offset_t kernstart, kernend;
 	vm_offset_t kernstartpfn, kernendpfn, pfn0, pfn1;
 	struct mddt *mddtp;
 	struct mddt_cluster *memc;
 	int i, mddtweird;
 	int cputype;
 	char *p;
 
 	/* NO OUTPUT ALLOWED UNTIL FURTHER NOTICE */
 
 	/*
 	 * Turn off interrupts (not mchecks) and floating point.
 	 * Make sure the instruction and data streams are consistent.
 	 */
 	(void)alpha_pal_swpipl(ALPHA_PSL_IPL_HIGH);
 	/* alpha_pal_wrfen(0); */
 	ALPHA_TBIA();
 	alpha_pal_imb();
 
 	/*
 	 * Get critical system information (if possible, from the
 	 * information provided by the boot program).
 	 */
 	bootinfo_msg = NULL;
 	bootinfo_booted_kernel = NULL;
 	if (bim == BOOTINFO_MAGIC) {
 		if (biv == 0) {		/* backward compat */
 			biv = *(u_long *)bip;
 			bip += 8;
 		}
 		switch (biv) {
 		case 1: {
 			struct bootinfo_v1 *v1p = (struct bootinfo_v1 *)bip;
 
 			bootinfo.ssym = v1p->ssym;
 			bootinfo.esym = v1p->esym;
 			bootinfo.kernend = v1p->kernend;
 			bootinfo.modptr = v1p->modptr;
 			bootinfo.envp = v1p->envp;
 			/* hwrpb may not be provided by boot block in v1 */
 			if (v1p->hwrpb != NULL) {
 				bootinfo.hwrpb_phys =
 				    ((struct rpb *)v1p->hwrpb)->rpb_phys;
 				bootinfo.hwrpb_size = v1p->hwrpbsize;
 			} else {
 				bootinfo.hwrpb_phys =
 				    ((struct rpb *)HWRPB_ADDR)->rpb_phys;
 				bootinfo.hwrpb_size =
 				    ((struct rpb *)HWRPB_ADDR)->rpb_size;
 			}
 			bcopy(v1p->boot_flags, bootinfo.boot_flags,
 			    min(sizeof v1p->boot_flags,
 			      sizeof bootinfo.boot_flags));
 			bcopy(v1p->booted_kernel, bootinfo.booted_kernel,
 			    min(sizeof v1p->booted_kernel,
 			      sizeof bootinfo.booted_kernel));
 			bootinfo_booted_kernel = bootinfo.booted_kernel;
 			/* booted dev not provided in bootinfo */
 			init_prom_interface((struct rpb *)
 			    ALPHA_PHYS_TO_K0SEG(bootinfo.hwrpb_phys));
                 	prom_getenv(PROM_E_BOOTED_DEV, bootinfo.booted_dev,
 			    sizeof bootinfo.booted_dev);
 			break;
 		}
 		default:
 			bootinfo_msg = "unknown bootinfo version";
 			goto nobootinfo;
 		}
 	} else {
 		bootinfo_msg = "boot program did not pass bootinfo";
 	nobootinfo:
 		bootinfo.ssym = (u_long)&_end;
 		bootinfo.esym = (u_long)&_end;
 #ifdef SIMOS
 		{
 			char* p = (char*)bootinfo.ssym + 8;
 			if (p[EI_MAG0] == ELFMAG0
 			    && p[EI_MAG1] == ELFMAG1
 			    && p[EI_MAG2] == ELFMAG2
 			    && p[EI_MAG3] == ELFMAG3) {
 				bootinfo.ssym = (u_long) p;
 				bootinfo.esym = (u_long)p + *(u_long*)(p - 8);
 			}
 		}
 #endif
 		bootinfo.hwrpb_phys = ((struct rpb *)HWRPB_ADDR)->rpb_phys;
 		bootinfo.hwrpb_size = ((struct rpb *)HWRPB_ADDR)->rpb_size;
 		init_prom_interface((struct rpb *)HWRPB_ADDR);
 		prom_getenv(PROM_E_BOOTED_OSFLAGS, bootinfo.boot_flags,
 			    sizeof bootinfo.boot_flags);
 #ifndef SIMOS
 		prom_getenv(PROM_E_BOOTED_FILE, bootinfo.booted_kernel,
 			    sizeof bootinfo.booted_kernel);
 #endif
 		prom_getenv(PROM_E_BOOTED_DEV, bootinfo.booted_dev,
 			    sizeof bootinfo.booted_dev);
 	}
 
 	/*
 	 * Initialize the kernel's mapping of the RPB.  It's needed for
 	 * lots of things.
 	 */
 	hwrpb = (struct rpb *)ALPHA_PHYS_TO_K0SEG(bootinfo.hwrpb_phys);
 
 	/*
 	 * Remember how many cycles there are per microsecond, 
 	 * so that we can use delay().  Round up, for safety.
 	 */
 	cycles_per_usec = (hwrpb->rpb_cc_freq + 999999) / 1000000;
 
 	/*
 	 * Remember how many cycles per closk for coping with missed
 	 * clock interrupts.
 	 */
 	cycles_per_sec = hwrpb->rpb_cc_freq;
 
 	/* Get the loader(8) metadata */
 	preload_metadata = (caddr_t)bootinfo.modptr;
 	kern_envp = bootinfo.envp;
 
 	/*
 	 * Initalize the (temporary) bootstrap console interface, so
 	 * we can use printf until the VM system starts being setup.
 	 * The real console is initialized before then.
 	 */
 	init_bootstrap_console();
 
 	/* OUTPUT NOW ALLOWED */
 
 	/* delayed from above */
 	if (bootinfo_msg)
 		printf("WARNING: %s (0x%lx, 0x%lx, 0x%lx)\n",
 		       bootinfo_msg, bim, bip, biv);
 
 	/*
 	 * Point interrupt/exception vectors to our own.
 	 */
 	alpha_pal_wrent(XentInt, ALPHA_KENTRY_INT);
 	alpha_pal_wrent(XentArith, ALPHA_KENTRY_ARITH);
 	alpha_pal_wrent(XentMM, ALPHA_KENTRY_MM);
 	alpha_pal_wrent(XentIF, ALPHA_KENTRY_IF);
 	alpha_pal_wrent(XentUna, ALPHA_KENTRY_UNA);
 	alpha_pal_wrent(XentSys, ALPHA_KENTRY_SYS);
 
 	/*
 	 * Clear pending machine checks and error reports, and enable
 	 * system- and processor-correctable error reporting.
 	 */
 	alpha_pal_wrmces(alpha_pal_rdmces() &
 			 ~(ALPHA_MCES_DSC|ALPHA_MCES_DPC));
 
 	/*
 	 * Find out what hardware we're on, and do basic initialization.
 	 */
 	cputype = hwrpb->rpb_type;
 	if (cputype < 0) {
 		/*
 		 * At least some white-box (NT) systems have SRM which
 		 * reports a systype that's the negative of their
 		 * blue-box (UNIX/OVMS) counterpart.
 		 */
 		cputype = -cputype;
 	}
 	
 	if (cputype >= API_ST_BASE) {
 		if (cputype >= napi_cpuinit + API_ST_BASE) {
 			platform_not_supported(cputype);
 			/* NOTREACHED */
 		}
 		cputype -= API_ST_BASE;
 		api_cpuinit[cputype].init(cputype);
 	} else {
 		if (cputype >= ncpuinit) {
 			platform_not_supported(cputype);
 			/* NOTREACHED */
 		}	
 		cpuinit[cputype].init(cputype);
 	}
 	snprintf(cpu_model, sizeof(cpu_model), "%s", platform.model);
 
 	/*
 	 * Initalize the real console, so the the bootstrap console is
 	 * no longer necessary.
 	 */
 	if (platform.cons_init)
 		platform.cons_init();
 
 	/* NO MORE FIRMWARE ACCESS ALLOWED */
 #ifdef _PMAP_MAY_USE_PROM_CONSOLE
 	/*
 	 * XXX (unless _PMAP_MAY_USE_PROM_CONSOLE is defined and
 	 * XXX pmap_uses_prom_console() evaluates to non-zero.)
 	 */
 #endif
 
 	/*
 	 * find out this system's page size
 	 */
 	if (hwrpb->rpb_page_size != PAGE_SIZE)
 		panic("page size %ld != 8192?!", hwrpb->rpb_page_size);
 
 
 	/*
 	 * Find the beginning and end of the kernel (and leave a
 	 * bit of space before the beginning for the bootstrap
 	 * stack).
 	 */
 	kernstart = trunc_page(kernel_text) - 2 * PAGE_SIZE;
 #ifdef DDB
 	ksym_start = (void *)bootinfo.ssym;
 	ksym_end   = (void *)bootinfo.esym;
 	kernend = (vm_offset_t)round_page(ksym_end);
 #else
 	kernend = (vm_offset_t)round_page(_end);
 #endif
 	/* But if the bootstrap tells us otherwise, believe it! */
 	if (bootinfo.kernend)
 		kernend = round_page(bootinfo.kernend);
 	if (preload_metadata == NULL)
 		printf("WARNING: loader(8) metadata is missing!\n");
 
 	kernstartpfn = atop(ALPHA_K0SEG_TO_PHYS(kernstart));
 	kernendpfn = atop(ALPHA_K0SEG_TO_PHYS(kernend));
 #ifdef SIMOS
 	/* 
 	 * SimOS console puts the bootstrap stack after kernel
 	 */
 	kernendpfn += 4;
 #endif
 
 	/*
 	 * Find out how much memory is available, by looking at
 	 * the memory cluster descriptors.  This also tries to do
 	 * its best to detect things things that have never been seen
 	 * before...
 	 */
 	mddtp = (struct mddt *)(((caddr_t)hwrpb) + hwrpb->rpb_memdat_off);
 
 	/* MDDT SANITY CHECKING */
 	mddtweird = 0;
 	if (mddtp->mddt_cluster_cnt < 2) {
 		mddtweird = 1;
 		printf("WARNING: weird number of mem clusters: %ld\n",
 		       mddtp->mddt_cluster_cnt);
 	}
 
 #ifdef DEBUG_CLUSTER
 	printf("Memory cluster count: %d\n", mddtp->mddt_cluster_cnt);
 #endif
 
 	phys_avail_cnt = 0;
 	for (i = 0; i < mddtp->mddt_cluster_cnt; i++) {
 		memc = &mddtp->mddt_clusters[i];
 #ifdef DEBUG_CLUSTER
 		printf("MEMC %d: pfn 0x%lx cnt 0x%lx usage 0x%lx\n", i,
 		       memc->mddt_pfn, memc->mddt_pg_cnt, memc->mddt_usage);
 #endif
 		totalphysmem += memc->mddt_pg_cnt;
 
 		if (memc->mddt_usage & MDDT_mbz) {
 			mddtweird = 1;
 			printf("WARNING: mem cluster %d has weird "
 			       "usage 0x%lx\n", i, memc->mddt_usage);
 			unknownmem += memc->mddt_pg_cnt;
 			continue;
 		}
 		if (memc->mddt_usage & MDDT_NONVOLATILE) {
 			/* XXX should handle these... */
 			printf("WARNING: skipping non-volatile mem "
 			       "cluster %d\n", i);
 			unusedmem += memc->mddt_pg_cnt;
 			continue;
 		}
 		if (memc->mddt_usage & MDDT_PALCODE) {
 			resvmem += memc->mddt_pg_cnt;
 			continue;
 		}
 
 		/*
 		 * We have a memory cluster available for system
 		 * software use.  We must determine if this cluster
 		 * holds the kernel.
 		 */
 		/*
 		 * XXX If the kernel uses the PROM console, we only use the
 		 * XXX memory after the kernel in the first system segment,
 		 * XXX to avoid clobbering prom mapping, data, etc.
 		 */
 		physmem += memc->mddt_pg_cnt;
 		pfn0 = memc->mddt_pfn;
 		pfn1 = memc->mddt_pfn + memc->mddt_pg_cnt;
 		if (pfn0 <= kernendpfn && kernstartpfn <= pfn1) {
 			/*
 			 * Must compute the location of the kernel
 			 * within the segment.
 			 */
 #ifdef DEBUG_CLUSTER
 			printf("Cluster %d contains kernel\n", i);
 #endif
 			if (!pmap_uses_prom_console()) {
 				if (pfn0 < kernstartpfn) {
 					/*
 					 * There is a chunk before the kernel.
 					 */
 #ifdef DEBUG_CLUSTER
 					printf("Loading chunk before kernel: "
 					       "0x%lx / 0x%lx\n", pfn0, kernstartpfn);
 #endif
 					phys_avail[phys_avail_cnt] = alpha_ptob(pfn0);
 					phys_avail[phys_avail_cnt+1] = alpha_ptob(kernstartpfn);
 					phys_avail_cnt += 2;
 				}
 			}
 			if (kernendpfn < pfn1) {
 				/*
 				 * There is a chunk after the kernel.
 				 */
 #ifdef DEBUG_CLUSTER
 				printf("Loading chunk after kernel: "
 				       "0x%lx / 0x%lx\n", kernendpfn, pfn1);
 #endif
 				phys_avail[phys_avail_cnt] = alpha_ptob(kernendpfn);
 				phys_avail[phys_avail_cnt+1] = alpha_ptob(pfn1);
 				phys_avail_cnt += 2;
 			}
 		} else {
 			/*
 			 * Just load this cluster as one chunk.
 			 */
 #ifdef DEBUG_CLUSTER
 			printf("Loading cluster %d: 0x%lx / 0x%lx\n", i,
 			       pfn0, pfn1);
 #endif
 			phys_avail[phys_avail_cnt] = alpha_ptob(pfn0);
 			phys_avail[phys_avail_cnt+1] = alpha_ptob(pfn1);
 			phys_avail_cnt += 2;
 			
 		}
 	}
 	phys_avail[phys_avail_cnt] = 0;
 
 	/*
 	 * Dump out the MDDT if it looks odd...
 	 */
 	if (mddtweird) {
 		printf("\n");
 		printf("complete memory cluster information:\n");
 		for (i = 0; i < mddtp->mddt_cluster_cnt; i++) {
 			printf("mddt %d:\n", i);
 			printf("\tpfn %lx\n",
 			       mddtp->mddt_clusters[i].mddt_pfn);
 			printf("\tcnt %lx\n",
 			       mddtp->mddt_clusters[i].mddt_pg_cnt);
 			printf("\ttest %lx\n",
 			       mddtp->mddt_clusters[i].mddt_pg_test);
 			printf("\tbva %lx\n",
 			       mddtp->mddt_clusters[i].mddt_v_bitaddr);
 			printf("\tbpa %lx\n",
 			       mddtp->mddt_clusters[i].mddt_p_bitaddr);
 			printf("\tbcksum %lx\n",
 			       mddtp->mddt_clusters[i].mddt_bit_cksum);
 			printf("\tusage %lx\n",
 			       mddtp->mddt_clusters[i].mddt_usage);
 		}
 		printf("\n");
 	}
 
 	Maxmem = physmem;
 
 	/*
 	 * Initialize error message buffer (at end of core).
 	 */
 	{
 		size_t sz = round_page(MSGBUF_SIZE);
 		int i = phys_avail_cnt - 2;
 
 		/* shrink so that it'll fit in the last segment */
 		if (phys_avail[i+1] - phys_avail[i] < sz)
 			sz = phys_avail[i+1] - phys_avail[i];
 
 		phys_avail[i+1] -= sz;
 		msgbufp = (struct msgbuf*) ALPHA_PHYS_TO_K0SEG(phys_avail[i+1]);
 
 		msgbufinit(msgbufp, sz);
 
 		/* Remove the last segment if it now has no pages. */
 		if (phys_avail[i] == phys_avail[i+1])
 			phys_avail[i] = 0;
 
 		/* warn if the message buffer had to be shrunk */
 		if (sz != round_page(MSGBUF_SIZE))
 			printf("WARNING: %ld bytes not available for msgbuf in last cluster (%ld used)\n",
 			    round_page(MSGBUF_SIZE), sz);
 
 	}
 
 	/*
 	 * Init mapping for u page(s) for proc 0
 	 */
 	proc0.p_addr = proc0paddr =
 	    (struct user *)pmap_steal_memory(UPAGES * PAGE_SIZE);
 
 	/*
 	 * Setup the global data for the bootstrap cpu.
 	 */
 	{
 		size_t sz = round_page(UPAGES * PAGE_SIZE);
 		globalp = (struct globaldata *) pmap_steal_memory(sz);
 		globaldata_init(globalp, alpha_pal_whami(), sz);
 		alpha_pal_wrval((u_int64_t) globalp);
 		PCPU_GET(next_asn) = 1;	/* 0 used for proc0 pmap */
 	}
 
 	/*
 	 * Initialize the virtual memory system, and set the
 	 * page table base register in proc 0's PCB.
 	 */
 	pmap_bootstrap(ALPHA_PHYS_TO_K0SEG(alpha_ptob(ptb)),
 	    hwrpb->rpb_max_asn);
 	hwrpb->rpb_vptb = VPTBASE;
 	hwrpb->rpb_checksum = hwrpb_checksum();
 
 
 	/*
 	 * Initialize the rest of proc 0's PCB, and cache its physical
 	 * address.
 	 */
 	proc0.p_md.md_pcbpaddr =
 	    (struct pcb *)ALPHA_K0SEG_TO_PHYS((vm_offset_t)&proc0paddr->u_pcb);
 
 	/*
 	 * Set the kernel sp, reserving space for an (empty) trapframe,
 	 * and make proc0's trapframe pointer point to it for sanity.
 	 */
 	proc0paddr->u_pcb.pcb_hw.apcb_ksp =
 	    (u_int64_t)proc0paddr + USPACE - sizeof(struct trapframe);
 	proc0.p_md.md_tf =
 	    (struct trapframe *)proc0paddr->u_pcb.pcb_hw.apcb_ksp;
 	PCPU_SET(curproc, &proc0);
 
 	/*
 	 * Get the right value for the boot cpu's idle ptbr.
 	 */
 	globalp->gd_idlepcb.apcb_ptbr = proc0.p_addr->u_pcb.pcb_hw.apcb_ptbr;
 
 	/*
 	 * Record all cpus in a list.
 	 */
 	SLIST_INIT(&cpuhead);
 	SLIST_INSERT_HEAD(&cpuhead, GLOBALP, gd_allcpu);
 
 	/* Setup curproc so that mutexes work */
 	PCPU_SET(curproc, &proc0);
 
 	LIST_INIT(&proc0.p_heldmtx);
 	LIST_INIT(&proc0.p_contested);
 
 	/*
 	 * Initialise mutexes.
 	 */
 	mtx_init(&Giant, "Giant", MTX_DEF | MTX_RECURSE);
 	mtx_init(&sched_lock, "sched lock", MTX_SPIN | MTX_RECURSE);
 	mtx_enter(&Giant, MTX_DEF);
 
 	/*
 	 * Look at arguments passed to us and compute boothowto.
 	 */
 
 #ifdef KADB
 	boothowto |= RB_KDB;
 #endif
 /*	boothowto |= RB_KDB | RB_GDB; */
 	for (p = bootinfo.boot_flags; p && *p != '\0'; p++) {
 		/*
 		 * Note that we'd really like to differentiate case here,
 		 * but the Alpha AXP Architecture Reference Manual
 		 * says that we shouldn't.
 		 */
 		switch (*p) {
 		case 'a': /* autoboot */
 		case 'A':
 			boothowto &= ~RB_SINGLE;
 			break;
 
 #ifdef DEBUG
 		case 'c': /* crash dump immediately after autoconfig */
 		case 'C':
 			boothowto |= RB_DUMP;
 			break;
 #endif
 
 #if defined(DDB)
 		case 'd': /* break into the kernel debugger ASAP */
 		case 'D':
 			boothowto |= RB_KDB;
 			break;
 		case 'g': /* use kernel gdb */
 		case 'G':
 			boothowto |= RB_GDB;
 			break;
 #endif
 
 		case 'h': /* always halt, never reboot */
 		case 'H':
 			boothowto |= RB_HALT;
 			break;
 
 #if 0
 		case 'm': /* mini root present in memory */
 		case 'M':
 			boothowto |= RB_MINIROOT;
 			break;
 #endif
 
 		case 'n': /* askname */
 		case 'N':
 			boothowto |= RB_ASKNAME;
 			break;
 
 		case 's': /* single-user (default, supported for sanity) */
 		case 'S':
 			boothowto |= RB_SINGLE;
 			break;
 
 		case 'v':
 		case 'V':
 			boothowto |= RB_VERBOSE;
 			bootverbose = 1;
 			break;
 
 		default:
 			printf("Unrecognized boot flag '%c'.\n", *p);
 			break;
 		}
 	}
 
 	/*
 	 * Catch case of boot_verbose set in environment.
 	 */
 	if ((p = getenv("boot_verbose")) != NULL) {
 		if (strcmp(p, "yes") == 0 || strcmp(p, "YES") == 0) {
 			boothowto |= RB_VERBOSE;
 			bootverbose = 1;
 		}
 	}
 
 	/*
 	 * Pick up kernelname.
 	 */
 	if (bootinfo_booted_kernel) {
 		strncpy(kernelname, bootinfo_booted_kernel,
 		   min(sizeof(kernelname), sizeof bootinfo.booted_kernel) - 1);
 	} else if ((p = getenv("kernelname")) != NULL) {
 		strncpy(kernelname, p, sizeof(kernelname) - 1);
 	}
 
 	/*
 	 * Initialize debuggers, and break into them if appropriate.
 	 */
 #ifdef DDB
 	kdb_init();
 	if (boothowto & RB_KDB) {
 		printf("Boot flags requested debugger\n");
 		breakpoint();
 	}
 #endif
 
 	/*
 	 * Figure out the number of cpus in the box, from RPB fields.
 	 * Really.  We mean it.
 	 */
 	for (i = 0; i < hwrpb->rpb_pcs_cnt; i++) {
 		struct pcs *pcsp;
 
 		pcsp = (struct pcs *)((char *)hwrpb + hwrpb->rpb_pcs_off +
 		    (i * hwrpb->rpb_pcs_size));
 		if ((pcsp->pcs_flags & PCS_PP) != 0)
 			ncpus++;
 	}
 
 	/*
 	 * Figure out our clock frequency, from RPB fields.
 	 */
 	hz = hwrpb->rpb_intr_freq >> 12;
 	if (!(60 <= hz && hz <= 10240)) {
 		hz = 1024;
 #ifdef DIAGNOSTIC
 		printf("WARNING: unbelievable rpb_intr_freq: %ld (%d hz)\n",
 			hwrpb->rpb_intr_freq, hz);
 #endif
 	}
 
 	hwrpb_restart_setup();
 
 	alpha_pal_wrfen(0);
 }
 
 void
 bzero(void *buf, size_t len)
 {
 	caddr_t p = buf;
 
 	while (((vm_offset_t) p & (sizeof(u_long) - 1)) && len) {
 		*p++ = 0;
 		len--;
 	}
 	while (len >= sizeof(u_long) * 8) {
 		*(u_long*) p = 0;
 		*((u_long*) p + 1) = 0;
 		*((u_long*) p + 2) = 0;
 		*((u_long*) p + 3) = 0;
 		len -= sizeof(u_long) * 8;
 		*((u_long*) p + 4) = 0;
 		*((u_long*) p + 5) = 0;
 		*((u_long*) p + 6) = 0;
 		*((u_long*) p + 7) = 0;
 		p += sizeof(u_long) * 8;
 	}
 	while (len >= sizeof(u_long)) {
 		*(u_long*) p = 0;
 		len -= sizeof(u_long);
 		p += sizeof(u_long);
 	}
 	while (len) {
 		*p++ = 0;
 		len--;
 	}
 }
 
 void
 DELAY(int n)
 {
 #ifndef	SIMOS
 	unsigned long pcc0, pcc1, curcycle, cycles;
         int usec;
 
 	if (n == 0)
 		return;
 
         pcc0 = alpha_rpcc() & 0xffffffffUL;
 	cycles = 0;
 	usec = 0;
 
         while (usec <= n) {
 		/*
 		 * Get the next CPU cycle count. The assumption here
 		 * is that we can't have wrapped twice past 32 bits worth
 		 * of CPU cycles since we last checked.
 		 */
 		pcc1 = alpha_rpcc() & 0xffffffffUL;
 		if (pcc1 < pcc0) {
 			curcycle = (pcc1 + 0x100000000UL) - pcc0;
 		} else {
 			curcycle = pcc1 - pcc0;
 		}
 
 		/*
 		 * We now have the number of processor cycles since we
 		 * last checked. Add the current cycle count to the
 		 * running total. If it's over cycles_per_usec, increment
 		 * the usec counter.
 		 */
 		cycles += curcycle;
 		while (cycles > cycles_per_usec) {
 			usec++;
 			cycles -= cycles_per_usec;
 		}
 		pcc0 = pcc1;
         }
 #endif
 }
 
 /*
  * Send an interrupt to process.
  *
  * Stack is set up to allow sigcode stored
  * at top to call routine, followed by kcall
  * to sigreturn routine below.  After sigreturn
  * resets the signal mask, the stack, and the
  * frame pointer, it returns to the user
  * specified pc, psl.
  */
 void
 osendsig(sig_t catcher, int sig, sigset_t *mask, u_long code)
 {
 	struct proc *p = curproc;
 	osiginfo_t *sip, ksi;
 	struct trapframe *frame;
 	struct sigacts *psp;
 	int oonstack, fsize, rndfsize;
 
 	frame = p->p_md.md_tf;
 	oonstack = sigonstack(alpha_pal_rdusp());
 	fsize = sizeof ksi;
 	rndfsize = ((fsize + 15) / 16) * 16;
 	PROC_LOCK(p);
 	psp = p->p_sigacts;
 
 	/*
 	 * Allocate and validate space for the signal handler
 	 * context. Note that if the stack is in P0 space, the
 	 * call to grow() is a nop, and the useracc() check
 	 * will fail if the process has not already allocated
 	 * the space with a `brk'.
 	 */
 	if ((p->p_flag & P_ALTSTACK) && !oonstack &&
 	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
 		sip = (osiginfo_t *)((caddr_t)p->p_sigstk.ss_sp +
 		    p->p_sigstk.ss_size - rndfsize);
 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
 		p->p_sigstk.ss_flags |= SS_ONSTACK;
 #endif
 	} else
 		sip = (osiginfo_t *)(alpha_pal_rdusp() - rndfsize);
 	PROC_UNLOCK(p);
 
 	(void)grow_stack(p, (u_long)sip);
 	if (!useracc((caddr_t)sip, fsize, VM_PROT_WRITE)) {
 		/*
 		 * Process has trashed its stack; give it an illegal
 		 * instruction to halt it in its tracks.
 		 */
 		PROC_LOCK(p);
 		SIGACTION(p, SIGILL) = SIG_DFL;	
 		SIGDELSET(p->p_sigignore, SIGILL);
 		SIGDELSET(p->p_sigcatch, SIGILL);
 		SIGDELSET(p->p_sigmask, SIGILL);
 		PROC_UNLOCK(p);
 		psignal(p, SIGILL);
 		return;
 	}
 
 	/*
 	 * Build the signal context to be used by sigreturn.
 	 */
 	ksi.si_sc.sc_onstack = (oonstack) ? 1 : 0;
 	SIG2OSIG(*mask, ksi.si_sc.sc_mask);
 	ksi.si_sc.sc_pc = frame->tf_regs[FRAME_PC];
 	ksi.si_sc.sc_ps = frame->tf_regs[FRAME_PS];
 
 	/* copy the registers. */
 	fill_regs(p, (struct reg *)ksi.si_sc.sc_regs);
 	ksi.si_sc.sc_regs[R_ZERO] = 0xACEDBADE;		/* magic number */
 	ksi.si_sc.sc_regs[R_SP] = alpha_pal_rdusp();
 
 	/* save the floating-point state, if necessary, then copy it. */
 	alpha_fpstate_save(p, 1);		/* XXX maybe write=0 */
 	ksi.si_sc.sc_ownedfp = p->p_md.md_flags & MDP_FPUSED;
 	bcopy(&p->p_addr->u_pcb.pcb_fp, (struct fpreg *)ksi.si_sc.sc_fpregs,
 	    sizeof(struct fpreg));
 	ksi.si_sc.sc_fp_control = p->p_addr->u_pcb.pcb_fp_control;
 	bzero(ksi.si_sc.sc_reserved, sizeof ksi.si_sc.sc_reserved); /* XXX */
 	ksi.si_sc.sc_xxx1[0] = 0;				/* XXX */
 	ksi.si_sc.sc_xxx1[1] = 0;				/* XXX */
 	ksi.si_sc.sc_traparg_a0 = frame->tf_regs[FRAME_TRAPARG_A0];
 	ksi.si_sc.sc_traparg_a1 = frame->tf_regs[FRAME_TRAPARG_A1];
 	ksi.si_sc.sc_traparg_a2 = frame->tf_regs[FRAME_TRAPARG_A2];
 	ksi.si_sc.sc_xxx2[0] = 0;				/* XXX */
 	ksi.si_sc.sc_xxx2[1] = 0;				/* XXX */
 	ksi.si_sc.sc_xxx2[2] = 0;				/* XXX */
 	/* Fill in POSIX parts */
 	ksi.si_signo = sig;
 	ksi.si_code = code;
 	ksi.si_value.sigval_ptr = NULL;				/* XXX */
 
 	/*
 	 * copy the frame out to userland.
 	 */
 	(void) copyout((caddr_t)&ksi, (caddr_t)sip, fsize);
 
 	/*
 	 * Set up the registers to return to sigcode.
 	 */
 	frame->tf_regs[FRAME_PC] = PS_STRINGS - (esigcode - sigcode);
 	frame->tf_regs[FRAME_A0] = sig;
 	frame->tf_regs[FRAME_FLAGS] = 0; /* full restore */
 	PROC_LOCK(p);
 	if (SIGISMEMBER(p->p_sigacts->ps_siginfo, sig))
 		frame->tf_regs[FRAME_A1] = (u_int64_t)sip;
 	else
 		frame->tf_regs[FRAME_A1] = code;
 	PROC_UNLOCK(p);
 	frame->tf_regs[FRAME_A2] = (u_int64_t)&sip->si_sc;
 	frame->tf_regs[FRAME_T12] = (u_int64_t)catcher;	/* t12 is pv */
 	alpha_pal_wrusp((unsigned long)sip);
 }
 
 void
 sendsig(sig_t catcher, int sig, sigset_t *mask, u_long code)
 {
 	struct proc *p = curproc;
 	struct trapframe *frame;
 	struct sigacts *psp;
 	struct sigframe sf, *sfp;
 	int oonstack, rndfsize;
 
 	PROC_LOCK(p);
 	psp = p->p_sigacts;
 	if (SIGISMEMBER(psp->ps_osigset, sig)) {
 		PROC_UNLOCK(p);
 		osendsig(catcher, sig, mask, code);
 		return;
 	}
 
 	frame = p->p_md.md_tf;
 	oonstack = sigonstack(alpha_pal_rdusp());
 	rndfsize = ((sizeof(sf) + 15) / 16) * 16;
 
 	/* save user context */
 	bzero(&sf, sizeof(struct sigframe));
 	sf.sf_uc.uc_sigmask = *mask;
 	sf.sf_uc.uc_stack = p->p_sigstk;
 	sf.sf_uc.uc_stack.ss_flags = (p->p_flag & P_ALTSTACK)
 	    ? ((oonstack) ? SS_ONSTACK : 0) : SS_DISABLE;
 	sf.sf_uc.uc_mcontext.mc_onstack = (oonstack) ? 1 : 0;
 
 	fill_regs(p, (struct reg *)sf.sf_uc.uc_mcontext.mc_regs);
 	sf.sf_uc.uc_mcontext.mc_regs[R_SP] = alpha_pal_rdusp();
 	sf.sf_uc.uc_mcontext.mc_regs[R_ZERO] = 0xACEDBADE;   /* magic number */
 	sf.sf_uc.uc_mcontext.mc_regs[R_PS] = frame->tf_regs[FRAME_PS];
 	sf.sf_uc.uc_mcontext.mc_regs[R_PC] = frame->tf_regs[FRAME_PC];
 	sf.sf_uc.uc_mcontext.mc_regs[R_TRAPARG_A0] =
 	    frame->tf_regs[FRAME_TRAPARG_A0];
 	sf.sf_uc.uc_mcontext.mc_regs[R_TRAPARG_A1] =
 	    frame->tf_regs[FRAME_TRAPARG_A1];
 	sf.sf_uc.uc_mcontext.mc_regs[R_TRAPARG_A2] =
 	    frame->tf_regs[FRAME_TRAPARG_A2];
 
 	/*
 	 * Allocate and validate space for the signal handler
 	 * context. Note that if the stack is in P0 space, the
 	 * call to grow() is a nop, and the useracc() check
 	 * will fail if the process has not already allocated
 	 * the space with a `brk'.
 	 */
 	if ((p->p_flag & P_ALTSTACK) != 0 && !oonstack &&
 	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
 		sfp = (struct sigframe *)((caddr_t)p->p_sigstk.ss_sp +
 		    p->p_sigstk.ss_size - rndfsize);
 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
 		p->p_sigstk.ss_flags |= SS_ONSTACK;
 #endif
 	} else
 		sfp = (struct sigframe *)(alpha_pal_rdusp() - rndfsize);
 	PROC_UNLOCK(p);
 
 	(void)grow_stack(p, (u_long)sfp);
 #ifdef DEBUG
 	if ((sigdebug & SDB_KSTACK) && p->p_pid == sigpid)
 		printf("sendsig(%d): sig %d ssp %p usp %p\n", p->p_pid,
 		       sig, &sf, sfp);
 #endif
 	if (!useracc((caddr_t)sfp, sizeof(sf), VM_PROT_WRITE)) {
 #ifdef DEBUG
 		if ((sigdebug & SDB_KSTACK) && p->p_pid == sigpid)
 			printf("sendsig(%d): useracc failed on sig %d\n",
 			       p->p_pid, sig);
 #endif
 		/*
 		 * Process has trashed its stack; give it an illegal
 		 * instruction to halt it in its tracks.
 		 */
 		PROC_LOCK(p);
 		SIGACTION(p, SIGILL) = SIG_DFL;
 		SIGDELSET(p->p_sigignore, SIGILL);
 		SIGDELSET(p->p_sigcatch, SIGILL);
 		SIGDELSET(p->p_sigmask, SIGILL);
 		PROC_UNLOCK(p);
 		psignal(p, SIGILL);
 		return;
 	}
 
 	/* save the floating-point state, if necessary, then copy it. */
 	alpha_fpstate_save(p, 1);
 	sf.sf_uc.uc_mcontext.mc_ownedfp = p->p_md.md_flags & MDP_FPUSED;
 	bcopy(&p->p_addr->u_pcb.pcb_fp,
 	      (struct fpreg *)sf.sf_uc.uc_mcontext.mc_fpregs,
 	      sizeof(struct fpreg));
 	sf.sf_uc.uc_mcontext.mc_fp_control = p->p_addr->u_pcb.pcb_fp_control;
 
 #ifdef COMPAT_OSF1
 	/*
 	 * XXX Create an OSF/1-style sigcontext and associated goo.
 	 */
 #endif
 
 	/*
 	 * copy the frame out to userland.
 	 */
 	(void) copyout((caddr_t)&sf, (caddr_t)sfp, sizeof(sf));
 #ifdef DEBUG
 	if (sigdebug & SDB_FOLLOW)
 		printf("sendsig(%d): sig %d sfp %p code %lx\n", p->p_pid, sig,
 		    sfp, code);
 #endif
 
 	/*
 	 * Set up the registers to return to sigcode.
 	 */
 	frame->tf_regs[FRAME_PC] = PS_STRINGS - (esigcode - sigcode);
 	frame->tf_regs[FRAME_A0] = sig;
 	PROC_LOCK(p);
 	if (SIGISMEMBER(p->p_sigacts->ps_siginfo, sig)) {
 		frame->tf_regs[FRAME_A1] = (u_int64_t)&(sfp->sf_si);
 
 		/* Fill in POSIX parts */
 		sf.sf_si.si_signo = sig;
 		sf.sf_si.si_code = code;
 		sf.sf_si.si_addr = (void*)frame->tf_regs[FRAME_TRAPARG_A0];
 	}
 	else
 		frame->tf_regs[FRAME_A1] = code;
 	PROC_UNLOCK(p);
 
 	frame->tf_regs[FRAME_A2] = (u_int64_t)&(sfp->sf_uc);
 	frame->tf_regs[FRAME_T12] = (u_int64_t)catcher;	/* t12 is pv */
 	frame->tf_regs[FRAME_FLAGS] = 0; /* full restore */
 	alpha_pal_wrusp((unsigned long)sfp);
 
 #ifdef DEBUG
 	if (sigdebug & SDB_FOLLOW)
 		printf("sendsig(%d): pc %lx, catcher %lx\n", p->p_pid,
 		    frame->tf_regs[FRAME_PC], frame->tf_regs[FRAME_A3]);
 	if ((sigdebug & SDB_KSTACK) && p->p_pid == sigpid)
 		printf("sendsig(%d): sig %d returns\n",
 		    p->p_pid, sig);
 #endif
 }
 
 /*
  * System call to cleanup state after a signal
  * has been taken.  Reset signal mask and
  * stack state from context left by sendsig (above).
  * Return to previous pc and psl as specified by
  * context left by sendsig. Check carefully to
  * make sure that the user has not modified the
  * state to gain improper privileges.
  */
 int
 osigreturn(struct proc *p,
 	struct osigreturn_args /* {
 		struct osigcontext *sigcntxp;
 	} */ *uap)
 {
 	struct osigcontext *scp, ksc;
 
 	scp = uap->sigcntxp;
 
 	/*
 	 * Fetch the entire context structure at once for speed.
 	 */
 	if (copyin((caddr_t)scp, (caddr_t)&ksc, sizeof ksc))
 		return (EFAULT);
 
 	/*
 	 * XXX - Should we do this. What if we get a "handcrafted"
 	 * but valid sigcontext that hasn't the magic number?
 	 */
 	if (ksc.sc_regs[R_ZERO] != 0xACEDBADE)		/* magic number */
 		return (EINVAL);
 
 	PROC_LOCK(p);
 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
 	/*
 	 * Restore the user-supplied information
 	 */
 	if (ksc.sc_onstack)
 		p->p_sigstk.ss_flags |= SS_ONSTACK;
 	else
 		p->p_sigstk.ss_flags &= ~SS_ONSTACK;
 #endif
 
 	/*
 	 * longjmp is still implemented by calling osigreturn. The new
 	 * sigmask is stored in sc_reserved, sc_mask is only used for
 	 * backward compatibility.
 	 */
 	SIGSETOLD(p->p_sigmask, ksc.sc_mask);
 	SIG_CANTMASK(p->p_sigmask);
 	PROC_UNLOCK(p);
 
 	set_regs(p, (struct reg *)ksc.sc_regs);
 	p->p_md.md_tf->tf_regs[FRAME_PC] = ksc.sc_pc;
 	p->p_md.md_tf->tf_regs[FRAME_PS] =
 	    (ksc.sc_ps | ALPHA_PSL_USERSET) & ~ALPHA_PSL_USERCLR;
 	p->p_md.md_tf->tf_regs[FRAME_FLAGS] = 0; /* full restore */
 
 	alpha_pal_wrusp(ksc.sc_regs[R_SP]);
 
 	/* XXX ksc.sc_ownedfp ? */
 	alpha_fpstate_drop(p);
 	bcopy((struct fpreg *)ksc.sc_fpregs, &p->p_addr->u_pcb.pcb_fp,
 	    sizeof(struct fpreg));
 	p->p_addr->u_pcb.pcb_fp_control = ksc.sc_fp_control;
 	return (EJUSTRETURN);
 }
 
 int
 sigreturn(struct proc *p,
 	struct sigreturn_args /* {
 		ucontext_t *sigcntxp;
 	} */ *uap)
 {
 	ucontext_t uc, *ucp;
 	struct pcb *pcb;
 	unsigned long val;
 
 	if (((struct osigcontext*)uap->sigcntxp)->sc_regs[R_ZERO] == 0xACEDBADE)
 		return osigreturn(p, (struct osigreturn_args *)uap);
 
 	ucp = uap->sigcntxp;
 	pcb = &p->p_addr->u_pcb;
 
 #ifdef DEBUG
 	if (sigdebug & SDB_FOLLOW)
 	    printf("sigreturn: pid %d, scp %p\n", p->p_pid, ucp);
 #endif
 
 	/*
 	 * Fetch the entire context structure at once for speed.
 	 */
 	if (copyin((caddr_t)ucp, (caddr_t)&uc, sizeof(ucontext_t)))
 		return (EFAULT);
 
 	/*
 	 * Restore the user-supplied information
 	 */
 	set_regs(p, (struct reg *)uc.uc_mcontext.mc_regs);
 	val = (uc.uc_mcontext.mc_regs[R_PS] | ALPHA_PSL_USERSET) &
 	    ~ALPHA_PSL_USERCLR;
 	p->p_md.md_tf->tf_regs[FRAME_PS] = val;
 	p->p_md.md_tf->tf_regs[FRAME_PC] = uc.uc_mcontext.mc_regs[R_PC];
 	p->p_md.md_tf->tf_regs[FRAME_FLAGS] = 0; /* full restore */
 	alpha_pal_wrusp(uc.uc_mcontext.mc_regs[R_SP]);
 
 	PROC_LOCK(p);
 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
 	if (uc.uc_mcontext.mc_onstack & 1)
 		p->p_sigstk.ss_flags |= SS_ONSTACK;
 	else
 		p->p_sigstk.ss_flags &= ~SS_ONSTACK;
 #endif
 
 	p->p_sigmask = uc.uc_sigmask;
 	SIG_CANTMASK(p->p_sigmask);
 	PROC_UNLOCK(p);
 
 	/* XXX ksc.sc_ownedfp ? */
 	alpha_fpstate_drop(p);
 	bcopy((struct fpreg *)uc.uc_mcontext.mc_fpregs,
 	      &p->p_addr->u_pcb.pcb_fp, sizeof(struct fpreg));
 	p->p_addr->u_pcb.pcb_fp_control = uc.uc_mcontext.mc_fp_control;
 
 #ifdef DEBUG
 	if (sigdebug & SDB_FOLLOW)
 		printf("sigreturn(%d): returns\n", p->p_pid);
 #endif
 	return (EJUSTRETURN);
 }
 
 /*
  * Machine dependent boot() routine
  *
  * I haven't seen anything to put here yet
  * Possibly some stuff might be grafted back here from boot()
  */
 void
 cpu_boot(int howto)
 {
 }
 
 /*
  * Shutdown the CPU as much as possible
  */
 void
 cpu_halt(void)
 {
 	/*alpha_pal_halt(); */
 	prom_halt(1);
 }
 
 /*
  * Clear registers on exec
  */
 void
 setregs(struct proc *p, u_long entry, u_long stack, u_long ps_strings)
 {
 	struct trapframe *tfp = p->p_md.md_tf;
 
 	bzero(tfp->tf_regs, FRAME_SIZE * sizeof tfp->tf_regs[0]);
 	bzero(&p->p_addr->u_pcb.pcb_fp, sizeof p->p_addr->u_pcb.pcb_fp);
 	p->p_addr->u_pcb.pcb_fp_control = 0;
 	p->p_addr->u_pcb.pcb_fp.fpr_cr = (FPCR_DYN_NORMAL
 					  | FPCR_INVD | FPCR_DZED
 					  | FPCR_OVFD | FPCR_INED
 					  | FPCR_UNFD);
 
 	alpha_pal_wrusp(stack);
 	tfp->tf_regs[FRAME_PS] = ALPHA_PSL_USERSET;
 	tfp->tf_regs[FRAME_PC] = entry & ~3;
 
 	tfp->tf_regs[FRAME_A0] = stack;			/* a0 = sp */
 	tfp->tf_regs[FRAME_A1] = 0;			/* a1 = rtld cleanup */
 	tfp->tf_regs[FRAME_A2] = 0;			/* a2 = rtld object */
 	tfp->tf_regs[FRAME_A3] = PS_STRINGS;		/* a3 = ps_strings */
 	tfp->tf_regs[FRAME_T12] = tfp->tf_regs[FRAME_PC];	/* a.k.a. PV */
 	tfp->tf_regs[FRAME_FLAGS] = 0;			/* full restore */
 
 	p->p_md.md_flags &= ~MDP_FPUSED;
 	alpha_fpstate_drop(p);
 }
 
 int
 ptrace_set_pc(struct proc *p, unsigned long addr)
 {
 	struct trapframe *tp = p->p_md.md_tf;
 	tp->tf_regs[FRAME_PC] = addr;
 	return 0;
 }
 
 static int
 ptrace_read_int(struct proc *p, vm_offset_t addr, u_int32_t *v)
 {
 	struct iovec iov;
 	struct uio uio;
 	iov.iov_base = (caddr_t) v;
 	iov.iov_len = sizeof(u_int32_t);
 	uio.uio_iov = &iov;
 	uio.uio_iovcnt = 1;
 	uio.uio_offset = (off_t)addr;
 	uio.uio_resid = sizeof(u_int32_t);
 	uio.uio_segflg = UIO_SYSSPACE;
 	uio.uio_rw = UIO_READ;
 	uio.uio_procp = p;
 	return procfs_domem(curproc, p, NULL, &uio);
 }
 
 static int
 ptrace_write_int(struct proc *p, vm_offset_t addr, u_int32_t v)
 {
 	struct iovec iov;
 	struct uio uio;
 	iov.iov_base = (caddr_t) &v;
 	iov.iov_len = sizeof(u_int32_t);
 	uio.uio_iov = &iov;
 	uio.uio_iovcnt = 1;
 	uio.uio_offset = (off_t)addr;
 	uio.uio_resid = sizeof(u_int32_t);
 	uio.uio_segflg = UIO_SYSSPACE;
 	uio.uio_rw = UIO_WRITE;
 	uio.uio_procp = p;
 	return procfs_domem(curproc, p, NULL, &uio);
 }
 
 static u_int64_t
 ptrace_read_register(struct proc *p, int regno)
 {
 	static int reg_to_frame[32] = {
 		FRAME_V0,
 		FRAME_T0,
 		FRAME_T1,
 		FRAME_T2,
 		FRAME_T3,
 		FRAME_T4,
 		FRAME_T5,
 		FRAME_T6,
 		FRAME_T7,
 
 		FRAME_S0,
 		FRAME_S1,
 		FRAME_S2,
 		FRAME_S3,
 		FRAME_S4,
 		FRAME_S5,
 		FRAME_S6,
 
 		FRAME_A0,
 		FRAME_A1,
 		FRAME_A2,
 		FRAME_A3,
 		FRAME_A4,
 		FRAME_A5,
 
 		FRAME_T8,
 		FRAME_T9,
 		FRAME_T10,
 		FRAME_T11,
 		FRAME_RA,
 		FRAME_T12,
 		FRAME_AT,
 		FRAME_GP,
 		FRAME_SP,
 		-1,		/* zero */
 	};
 
 	if (regno == R_ZERO)
 		return 0;
 
 	return p->p_md.md_tf->tf_regs[reg_to_frame[regno]];
 }
 
 
 static int
 ptrace_clear_bpt(struct proc *p, struct mdbpt *bpt)
 {
 	return ptrace_write_int(p, bpt->addr, bpt->contents);
 }
 
 static int
 ptrace_set_bpt(struct proc *p, struct mdbpt *bpt)
 {
 	int error;
 	u_int32_t bpins = 0x00000080;
 	error = ptrace_read_int(p, bpt->addr, &bpt->contents);
 	if (error)
 		return error;
 	return ptrace_write_int(p, bpt->addr, bpins);
 }
 
 int
 ptrace_clear_single_step(struct proc *p)
 {
 	if (p->p_md.md_flags & MDP_STEP2) {
 		ptrace_clear_bpt(p, &p->p_md.md_sstep[1]);
 		ptrace_clear_bpt(p, &p->p_md.md_sstep[0]);
 		p->p_md.md_flags &= ~MDP_STEP2;
 	} else if (p->p_md.md_flags & MDP_STEP1) {
 		ptrace_clear_bpt(p, &p->p_md.md_sstep[0]);
 		p->p_md.md_flags &= ~MDP_STEP1;
 	}
 	return 0;
 }
 
 int
 ptrace_single_step(struct proc *p)
 {
 	int error;
 	vm_offset_t pc = p->p_md.md_tf->tf_regs[FRAME_PC];
 	alpha_instruction ins;
 	vm_offset_t addr[2];	/* places to set breakpoints */
 	int count = 0;		/* count of breakpoints */
 
 	if (p->p_md.md_flags & (MDP_STEP1|MDP_STEP2))
 		panic("ptrace_single_step: step breakpoints not removed");
 
 	error = ptrace_read_int(p, pc, &ins.bits);
 	if (error)
 		return error;
 
 	switch (ins.branch_format.opcode) {
 
 	case op_j:
 		/* Jump: target is register value */
 		addr[0] = ptrace_read_register(p, ins.jump_format.rs) & ~3;
 		count = 1;
 		break;
 
 	case op_br:
 	case op_fbeq:
 	case op_fblt:
 	case op_fble:
 	case op_bsr:
 	case op_fbne:
 	case op_fbge:
 	case op_fbgt:
 	case op_blbc:
 	case op_beq:
 	case op_blt:
 	case op_ble:
 	case op_blbs:
 	case op_bne:
 	case op_bge:
 	case op_bgt:
 		/* Branch: target is pc+4+4*displacement */
 		addr[0] = pc + 4;
 		addr[1] = pc + 4 + 4 * ins.branch_format.displacement;
 		count = 2;
 		break;
 
 	default:
 		addr[0] = pc + 4;
 		count = 1;
 	}
 
 	p->p_md.md_sstep[0].addr = addr[0];
 	error = ptrace_set_bpt(p, &p->p_md.md_sstep[0]);
 	if (error)
 		return error;
 	if (count == 2) {
 		p->p_md.md_sstep[1].addr = addr[1];
 		error = ptrace_set_bpt(p, &p->p_md.md_sstep[1]);
 		if (error) {
 			ptrace_clear_bpt(p, &p->p_md.md_sstep[0]);
 			return error;
 		}
 		p->p_md.md_flags |= MDP_STEP2;
 	} else
 		p->p_md.md_flags |= MDP_STEP1;
 
 	return 0;
 }
 
 int ptrace_read_u_check(p, addr, len)
 	struct proc *p;
 	vm_offset_t addr;
 	size_t len;
 {
 	vm_offset_t gap;
 
 	if ((vm_offset_t) (addr + len) < addr)
 		return EPERM;
 	if ((vm_offset_t) (addr + len) <= sizeof(struct user))
 		return 0;
 
 	gap = (char *) p->p_md.md_tf - (char *) p->p_addr;
 	
 	if ((vm_offset_t) addr < gap)
 		return EPERM;
 	if ((vm_offset_t) (addr + len) <= 
 	    (vm_offset_t) (gap + sizeof(struct trapframe)))
 		return 0;
 	return EPERM;
 }
 
 int
 ptrace_write_u(struct proc *p, vm_offset_t off, long data)
 {
 	vm_offset_t min;
 #if 0
 	struct trapframe frame_copy;
 	struct trapframe *tp;
 #endif
 
 	/*
 	 * Privileged kernel state is scattered all over the user area.
 	 * Only allow write access to parts of regs and to fpregs.
 	 */
 	min = (char *)p->p_md.md_tf - (char *)p->p_addr;
 	if (off >= min && off <= min + sizeof(struct trapframe) - sizeof(int)) {
 #if 0
 		tp = p->p_md.md_tf;
 		frame_copy = *tp;
 		*(int *)((char *)&frame_copy + (off - min)) = data;
 		if (!EFLAGS_SECURE(frame_copy.tf_eflags, tp->tf_eflags) ||
 		    !CS_SECURE(frame_copy.tf_cs))
 			return (EINVAL);
 #endif
 		*(int*)((char *)p->p_addr + off) = data;
 		return (0);
 	}
 	min = offsetof(struct user, u_pcb) + offsetof(struct pcb, pcb_fp);
 	if (off >= min && off <= min + sizeof(struct fpreg) - sizeof(int)) {
 		*(int*)((char *)p->p_addr + off) = data;
 		return (0);
 	}
 	return (EFAULT);
 }
 
 int
 alpha_pa_access(vm_offset_t pa)
 {
 #if 0
 	int i;
 
 	for (i = 0; phys_avail[i] != 0; i += 2) {
 		if (pa < phys_avail[i])
 			continue;
 		if (pa < phys_avail[i+1])
 			return VM_PROT_READ|VM_PROT_WRITE;
 	}
 	return 0;
 #else
 	return VM_PROT_READ|VM_PROT_WRITE;
 #endif
 }
 
 int
 fill_regs(p, regs)
 	struct proc *p;
 	struct reg *regs;
 {
 	struct pcb *pcb = &p->p_addr->u_pcb;
 	struct trapframe *tp = p->p_md.md_tf;
 
 	tp = p->p_md.md_tf;
  
 #define C(r)	regs->r_regs[R_ ## r] = tp->tf_regs[FRAME_ ## r]
 
 	C(V0);
 	C(T0); C(T1); C(T2); C(T3); C(T4); C(T5); C(T6); C(T7);
 	C(S0); C(S1); C(S2); C(S3); C(S4); C(S5); C(S6);
 	C(A0); C(A1); C(A2); C(A3); C(A4); C(A5);
 	C(T8); C(T9); C(T10); C(T11);
 	C(RA); C(T12); C(AT); C(GP);
 
 #undef C
 
 	regs->r_regs[R_ZERO] = tp->tf_regs[FRAME_PC];
 	regs->r_regs[R_SP] = pcb->pcb_hw.apcb_usp;
 
 	return (0);
 }
 
 int
 set_regs(p, regs)
 	struct proc *p;
 	struct reg *regs;
 {
 	struct pcb *pcb = &p->p_addr->u_pcb;
 	struct trapframe *tp = p->p_md.md_tf;
 
 	tp = p->p_md.md_tf;
 
 #define C(r)	tp->tf_regs[FRAME_ ## r] = regs->r_regs[R_ ## r]
 
 	C(V0);
 	C(T0); C(T1); C(T2); C(T3); C(T4); C(T5); C(T6); C(T7);
 	C(S0); C(S1); C(S2); C(S3); C(S4); C(S5); C(S6);
 	C(A0); C(A1); C(A2); C(A3); C(A4); C(A5);
 	C(T8); C(T9); C(T10); C(T11);
 	C(RA); C(T12); C(AT); C(GP);
 
 #undef C
 
 	tp->tf_regs[FRAME_PC] = regs->r_regs[R_ZERO];
 	pcb->pcb_hw.apcb_usp = regs->r_regs[R_SP];
 
 	return (0);
 }
 
 int
 fill_fpregs(p, fpregs)
 	struct proc *p;
 	struct fpreg *fpregs;
 {
 	alpha_fpstate_save(p, 0);
 
 	bcopy(&p->p_addr->u_pcb.pcb_fp, fpregs, sizeof *fpregs);
 	return (0);
 }
 
 int
 set_fpregs(p, fpregs)
 	struct proc *p;
 	struct fpreg *fpregs;
 {
 	alpha_fpstate_drop(p);
 
 	bcopy(fpregs, &p->p_addr->u_pcb.pcb_fp, sizeof *fpregs);
 	return (0);
 }
 
 #ifndef DDB
 void
 Debugger(const char *msg)
 {
 	printf("Debugger(\"%s\") called.\n", msg);
 }
 #endif /* no DDB */
 
 #include <sys/disklabel.h>
 
 /*
  * Determine the size of the transfer, and make sure it is
  * within the boundaries of the partition. Adjust transfer
  * if needed, and signal errors or early completion.
  */
 int
 bounds_check_with_label(struct bio *bp, struct disklabel *lp, int wlabel)
 {
 #if 0
         struct partition *p = lp->d_partitions + dkpart(bp->bio_dev);
         int labelsect = lp->d_partitions[0].p_offset;
         int maxsz = p->p_size,
                 sz = (bp->bio_bcount + DEV_BSIZE - 1) >> DEV_BSHIFT;
 
         /* overwriting disk label ? */
         /* XXX should also protect bootstrap in first 8K */
         if (bp->bio_blkno + p->p_offset <= LABELSECTOR + labelsect &&
 #if LABELSECTOR != 0
             bp->bio_blkno + p->p_offset + sz > LABELSECTOR + labelsect &&
 #endif
             (bp->bio_cmd == BIO_WRITE) && wlabel == 0) {
                 bp->bio_error = EROFS;
                 goto bad;
         }
 
 #if     defined(DOSBBSECTOR) && defined(notyet)
         /* overwriting master boot record? */
         if (bp->bio_blkno + p->p_offset <= DOSBBSECTOR &&
             (bp->bio_cmd == BIO_WRITE) && wlabel == 0) {
                 bp->bio_error = EROFS;
                 goto bad;
         }
 #endif
 
         /* beyond partition? */
         if (bp->bio_blkno < 0 || bp->bio_blkno + sz > maxsz) {
                 /* if exactly at end of disk, return an EOF */
                 if (bp->bio_blkno == maxsz) {
                         bp->bio_resid = bp->bio_bcount;
                         return(0);
                 }
                 /* or truncate if part of it fits */
                 sz = maxsz - bp->bio_blkno;
                 if (sz <= 0) {
                         bp->bio_error = EINVAL;
                         goto bad;
                 }
                 bp->bio_bcount = sz << DEV_BSHIFT;
         }
 
         bp->bio_pblkno = bp->bio_blkno + p->p_offset;
         return(1);
 
 bad:
         bp->bio_flags |= BIO_ERROR;
 #endif
         return(-1);
 
 }
 
 static int
 sysctl_machdep_adjkerntz(SYSCTL_HANDLER_ARGS)
 {
 	int error;
 	error = sysctl_handle_int(oidp, oidp->oid_arg1, oidp->oid_arg2,
 		req);
 	if (!error && req->newptr)
 		resettodr();
 	return (error);
 }
 
 SYSCTL_PROC(_machdep, CPU_ADJKERNTZ, adjkerntz, CTLTYPE_INT|CTLFLAG_RW,
 	&adjkerntz, 0, sysctl_machdep_adjkerntz, "I", "");
 
 SYSCTL_INT(_machdep, CPU_DISRTCSET, disable_rtc_set,
 	CTLFLAG_RW, &disable_rtc_set, 0, "");
 
 SYSCTL_INT(_machdep, CPU_WALLCLOCK, wall_cmos_clock,
 	CTLFLAG_RW, &wall_cmos_clock, 0, "");
 
 void
 alpha_fpstate_check(struct proc *p)
 {
 	/*
 	 * For SMP, we should check the fpcurproc of each cpu.
 	 */
 #ifndef SMP
 	if (p->p_addr->u_pcb.pcb_hw.apcb_flags & ALPHA_PCB_FLAGS_FEN)
 		if (p != PCPU_GET(fpcurproc))
 			panic("alpha_check_fpcurproc: bogus");
 #endif
 }
 
 #define SET_FEN(p) \
 	(p)->p_addr->u_pcb.pcb_hw.apcb_flags |= ALPHA_PCB_FLAGS_FEN
 
 #define CLEAR_FEN(p) \
 	(p)->p_addr->u_pcb.pcb_hw.apcb_flags &= ~ALPHA_PCB_FLAGS_FEN
 
 /*
  * Save the floating point state in the pcb. Use this to get read-only
  * access to the floating point state. If write is true, the current
  * fp process is cleared so that fp state can safely be modified. The
  * process will automatically reload the changed state by generating a 
  * FEN trap.
  */
 void
 alpha_fpstate_save(struct proc *p, int write)
 {
 	int s;
 
 	s = save_intr();
 	disable_intr();
 	if (p == PCPU_GET(fpcurproc)) {
 		/*
 		 * If curproc != fpcurproc, then we need to enable FEN 
 		 * so that we can dump the fp state.
 		 */
 		alpha_pal_wrfen(1);
 
 		/*
 		 * Save the state in the pcb.
 		 */
 		savefpstate(&p->p_addr->u_pcb.pcb_fp);
 
 		if (write) {
 			/*
 			 * If fpcurproc == curproc, just ask the
 			 * PALcode to disable FEN, otherwise we must
 			 * clear the FEN bit in fpcurproc's pcb.
 			 */
 			if (PCPU_GET(fpcurproc) == curproc)
 				alpha_pal_wrfen(0);
 			else
 				CLEAR_FEN(PCPU_GET(fpcurproc));
 			PCPU_SET(fpcurproc, NULL);
 		} else {
 			/*
 			 * Make sure that we leave FEN enabled if
 			 * curproc == fpcurproc. We must have at most
 			 * one process with FEN enabled. Note that FEN 
 			 * must already be set in fpcurproc's pcb.
 			 */
 			if (curproc != PCPU_GET(fpcurproc))
 				alpha_pal_wrfen(0);
 		}
 	}
 	restore_intr(s);
 }
 
 /*
  * Relinquish ownership of the FP state. This is called instead of
  * alpha_save_fpstate() if the entire FP state is being changed
  * (e.g. on sigreturn).
  */
 void
 alpha_fpstate_drop(struct proc *p)
 {
 	int s;
 
 	s = save_intr();
 	disable_intr();
 	if (p == PCPU_GET(fpcurproc)) {
 		if (p == curproc) {
 			/*
 			 * Disable FEN via the PALcode. This will
 			 * clear the bit in the pcb as well.
 			 */
 			alpha_pal_wrfen(0);
 		} else {
 			/*
 			 * Clear the FEN bit of the pcb.
 			 */
 			CLEAR_FEN(p);
 		}
 		PCPU_SET(fpcurproc, NULL);
 	}
 	restore_intr(s);
 }
 
 /*
  * Switch the current owner of the fp state to p, reloading the state
  * from the pcb.
  */
 void
 alpha_fpstate_switch(struct proc *p)
 {
 	int s;
 
 	/*
 	 * Enable FEN so that we can access the fp registers.
 	 */
 	s = save_intr();
 	disable_intr();
 	alpha_pal_wrfen(1);
 	if (PCPU_GET(fpcurproc)) {
 		/*
 		 * Dump the old fp state if its valid.
 		 */
 		savefpstate(&PCPU_GET(fpcurproc)->p_addr->u_pcb.pcb_fp);
 		CLEAR_FEN(PCPU_GET(fpcurproc));
 	}
 
 	/*
 	 * Remember the new FP owner and reload its state.
 	 */
 	PCPU_SET(fpcurproc, p);
 	restorefpstate(&PCPU_GET(fpcurproc)->p_addr->u_pcb.pcb_fp);
 
 	/*
 	 * If the new owner is curproc, leave FEN enabled, otherwise
 	 * mark its PCB so that it gets FEN when we context switch to
 	 * it later.
 	 */
 	if (p != curproc) {
 		alpha_pal_wrfen(0);
 		SET_FEN(p);
 	}
 
 	p->p_md.md_flags |= MDP_FPUSED;
 	restore_intr(s);
 }
 
 /*
  * Initialise a struct globaldata.
  */
 void
 globaldata_init(struct globaldata *globaldata, int cpuid, size_t sz)
 {
 	bzero(globaldata, sz);
 	globaldata->gd_idlepcbphys = vtophys((vm_offset_t) &globaldata->gd_idlepcb);
 	globaldata->gd_idlepcb.apcb_ksp = (u_int64_t)
 		((caddr_t) globaldata + sz - sizeof(struct trapframe));
 	globaldata->gd_idlepcb.apcb_ptbr = proc0.p_addr->u_pcb.pcb_hw.apcb_ptbr;
 	globaldata->gd_cpuid = cpuid;
 	globaldata->gd_next_asn = 0;
 	globaldata->gd_current_asngen = 1;
 #ifdef SMP
 	globaldata->gd_other_cpus = all_cpus & ~(1 << cpuid);
 	globaldata_register(globaldata);
 #endif
 }
Index: head/sys/ia64/ia64/machdep.c
===================================================================
--- head/sys/ia64/ia64/machdep.c	(revision 71983)
+++ head/sys/ia64/ia64/machdep.c	(revision 71984)
@@ -1,1370 +1,1371 @@
 /*-
  * Copyright (c) 2000 Doug Rabson
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #include "opt_compat.h"
 #include "opt_ddb.h"
 #include "opt_simos.h"
 #include "opt_msgbuf.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/eventhandler.h>
 #include <sys/sysproto.h>
 #include <sys/signalvar.h>
 #include <sys/kernel.h>
 #include <sys/proc.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/reboot.h>
 #include <sys/bio.h>
 #include <sys/buf.h>
 #include <sys/mbuf.h>
 #include <sys/vmmeter.h>
 #include <sys/msgbuf.h>
 #include <sys/exec.h>
 #include <sys/sysctl.h>
 #include <sys/uio.h>
 #include <sys/linker.h>
 #include <sys/random.h>
 #include <net/netisr.h>
 #include <vm/vm.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_page.h>
 #include <vm/vm_map.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_object.h>
 #include <vm/vm_pager.h>
 #include <sys/user.h>
 #include <sys/ptrace.h>
 #include <machine/clock.h>
 #include <machine/md_var.h>
 #include <machine/reg.h>
 #include <machine/fpu.h>
 #include <machine/pal.h>
 #include <machine/efi.h>
 #include <machine/bootinfo.h>
 #include <machine/mutex.h>
 #include <machine/vmparam.h>
 #include <machine/elf.h>
 #include <ddb/ddb.h>
 #include <alpha/alpha/db_instruction.h>
 #include <sys/vnode.h>
 #include <miscfs/procfs/procfs.h>
 #include <machine/sigframe.h>
 
 u_int64_t cycles_per_usec;
 u_int32_t cycles_per_sec;
 int cold = 1;
 struct bootinfo_kernel bootinfo;
 
 struct cpuhead cpuhead;
 
 struct mtx sched_lock;
 struct mtx Giant;
 
 struct	user *proc0paddr;
 
 char machine[] = "ia64";
 SYSCTL_STRING(_hw, HW_MACHINE, machine, CTLFLAG_RD, machine, 0, "");
 
 static char cpu_model[128];
 SYSCTL_STRING(_hw, HW_MODEL, model, CTLFLAG_RD, cpu_model, 0, "");
 
 #ifdef DDB
 /* start and end of kernel symbol table */
 void	*ksym_start, *ksym_end;
 #endif
 
 int	ia64_unaligned_print = 1;	/* warn about unaligned accesses */
 int	ia64_unaligned_fix = 1;	/* fix up unaligned accesses */
 int	ia64_unaligned_sigbus = 0;	/* don't SIGBUS on fixed-up accesses */
 
 SYSCTL_INT(_machdep, CPU_UNALIGNED_PRINT, unaligned_print,
 	CTLFLAG_RW, &ia64_unaligned_print, 0, "");
 
 SYSCTL_INT(_machdep, CPU_UNALIGNED_FIX, unaligned_fix,
 	CTLFLAG_RW, &ia64_unaligned_fix, 0, "");
 
 SYSCTL_INT(_machdep, CPU_UNALIGNED_SIGBUS, unaligned_sigbus,
 	CTLFLAG_RW, &ia64_unaligned_sigbus, 0, "");
 
 static void cpu_startup __P((void *));
 SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL)
 
 static MALLOC_DEFINE(M_MBUF, "mbuf", "mbuf");
 
 struct msgbuf *msgbufp=0;
 
 int bootverbose = 0, Maxmem = 0;
 long dumplo;
 
 int	totalphysmem;		/* total amount of physical memory in system */
 int	physmem;		/* physical memory used by NetBSD + some rsvd */
 int	resvmem;		/* amount of memory reserved for PROM */
 int	unusedmem;		/* amount of memory for OS that we don't use */
 int	unknownmem;		/* amount of memory with an unknown use */
 int	ncpus;			/* number of cpus */
 
 vm_offset_t phys_avail[10];
 
 static int
 sysctl_hw_physmem(SYSCTL_HANDLER_ARGS)
 {
 	int error = sysctl_handle_int(oidp, 0, ia64_ptob(physmem), req);
 	return (error);
 }
 
 SYSCTL_PROC(_hw, HW_PHYSMEM, physmem, CTLTYPE_INT|CTLFLAG_RD,
 	0, 0, sysctl_hw_physmem, "I", "");
 
 static int
 sysctl_hw_usermem(SYSCTL_HANDLER_ARGS)
 {
 	int error = sysctl_handle_int(oidp, 0,
 		ia64_ptob(physmem - cnt.v_wire_count), req);
 	return (error);
 }
 
 SYSCTL_PROC(_hw, HW_USERMEM, usermem, CTLTYPE_INT|CTLFLAG_RD,
 	0, 0, sysctl_hw_usermem, "I", "");
 
 SYSCTL_INT(_hw, OID_AUTO, availpages, CTLFLAG_RD, &physmem, 0, "");
 
 /* must be 2 less so 0 0 can signal end of chunks */
 #define PHYS_AVAIL_ARRAY_END ((sizeof(phys_avail) / sizeof(vm_offset_t)) - 2)
 
 static void identifycpu __P((void));
 
 static vm_offset_t buffer_sva, buffer_eva;
 vm_offset_t clean_sva, clean_eva;
 static vm_offset_t pager_sva, pager_eva;
 
 static void
 cpu_startup(dummy)
 	void *dummy;
 {
 	unsigned int i;
 	caddr_t v;
 	vm_offset_t maxaddr;
 	vm_size_t size = 0;
 	vm_offset_t firstaddr;
 	vm_offset_t minaddr;
 
 	if (boothowto & RB_VERBOSE)
 		bootverbose++;
 
 	/*
 	 * Good {morning,afternoon,evening,night}.
 	 */
 	printf("%s", version);
 	identifycpu();
 
 	/* startrtclock(); */
 #ifdef PERFMON
 	perfmon_init();
 #endif
 	printf("real memory  = %ld (%ldK bytes)\n", ia64_ptob(Maxmem), ia64_ptob(Maxmem) / 1024);
 
 	/*
 	 * Display any holes after the first chunk of extended memory.
 	 */
 	if (bootverbose) {
 		int indx;
 
 		printf("Physical memory chunk(s):\n");
 		for (indx = 0; phys_avail[indx + 1] != 0; indx += 2) {
 			int size1 = phys_avail[indx + 1] - phys_avail[indx];
 
 			printf("0x%08lx - 0x%08lx, %d bytes (%d pages)\n", phys_avail[indx],
 			    phys_avail[indx + 1] - 1, size1, size1 / PAGE_SIZE);
 		}
 	}
 
 	/*
 	 * Calculate callout wheel size
 	 */
 	for (callwheelsize = 1, callwheelbits = 0;
 	     callwheelsize < ncallout;
 	     callwheelsize <<= 1, ++callwheelbits)
 		;
 	callwheelmask = callwheelsize - 1;
 
 	/*
 	 * Allocate space for system data structures.
 	 * The first available kernel virtual address is in "v".
 	 * As pages of kernel virtual memory are allocated, "v" is incremented.
 	 * As pages of memory are allocated and cleared,
 	 * "firstaddr" is incremented.
 	 * An index into the kernel page table corresponding to the
 	 * virtual memory address maintained in "v" is kept in "mapaddr".
 	 */
 
 	/*
 	 * Make two passes.  The first pass calculates how much memory is
 	 * needed and allocates it.  The second pass assigns virtual
 	 * addresses to the various data structures.
 	 */
 	firstaddr = 0;
 again:
 	v = (caddr_t)firstaddr;
 
 #define	valloc(name, type, num) \
 	    (name) = (type *)v; v = (caddr_t)((name)+(num))
 #define	valloclim(name, type, num, lim) \
 	    (name) = (type *)v; v = (caddr_t)((lim) = ((name)+(num)))
 
 	valloc(callout, struct callout, ncallout);
 	valloc(callwheel, struct callout_tailq, callwheelsize);
 
 	/*
 	 * The nominal buffer size (and minimum KVA allocation) is BKVASIZE.
 	 * For the first 64MB of ram nominally allocate sufficient buffers to
 	 * cover 1/4 of our ram.  Beyond the first 64MB allocate additional
 	 * buffers to cover 1/20 of our ram over 64MB.
 	 */
 
 	if (nbuf == 0) {
 		int factor = 4 * BKVASIZE / PAGE_SIZE;
 
 		nbuf = 50;
 		if (physmem > 1024)
 			nbuf += min((physmem - 1024) / factor, 16384 / factor);
 		if (physmem > 16384)
 			nbuf += (physmem - 16384) * 2 / (factor * 5);
 	}
 	nswbuf = max(min(nbuf/4, 64), 16);
 
 	valloc(swbuf, struct buf, nswbuf);
 	valloc(buf, struct buf, nbuf);
 	v = bufhashinit(v);
 
 	/*
 	 * End of first pass, size has been calculated so allocate memory
 	 */
 	if (firstaddr == 0) {
 		size = (vm_size_t)(v - firstaddr);
 		firstaddr = (vm_offset_t)kmem_alloc(kernel_map, round_page(size));
 		if (firstaddr == 0)
 			panic("startup: no room for tables");
 		goto again;
 	}
 
 	/*
 	 * End of second pass, addresses have been assigned
 	 */
 	if ((vm_size_t)(v - firstaddr) != size)
 		panic("startup: table size inconsistency");
 
 	clean_map = kmem_suballoc(kernel_map, &clean_sva, &clean_eva,
 			(nbuf*BKVASIZE) + (nswbuf*MAXPHYS) + pager_map_size);
 	buffer_map = kmem_suballoc(clean_map, &buffer_sva, &buffer_eva,
 				(nbuf*BKVASIZE));
+	buffer_map->system_map = 1;
 	pager_map = kmem_suballoc(clean_map, &pager_sva, &pager_eva,
 				(nswbuf*MAXPHYS) + pager_map_size);
 	pager_map->system_map = 1;
 	exec_map = kmem_suballoc(kernel_map, &minaddr, &maxaddr,
 				(16*(ARG_MAX+(PAGE_SIZE*3))));
 
 	/*
 	 * Finally, allocate mbuf pool.  Since mclrefcnt is an off-size
 	 * we use the more space efficient malloc in place of kmem_alloc.
 	 */
 	{
 		vm_offset_t mb_map_size;
 
 		mb_map_size = nmbufs * MSIZE + nmbclusters * MCLBYTES +
 		    (nmbclusters + nmbufs / 4) * sizeof(union mext_refcnt);
 		mb_map_size = roundup2(mb_map_size, max(MCLBYTES, PAGE_SIZE));
 		mb_map = kmem_suballoc(kmem_map, (vm_offset_t *)&mbutl,
 		    &maxaddr, mb_map_size);
 		mb_map->system_map = 1;
 	}
 
 	/*
 	 * Initialize callouts
 	 */
 	SLIST_INIT(&callfree);
 	for (i = 0; i < ncallout; i++) {
 		callout_init(&callout[i], 0);
 		callout[i].c_flags = CALLOUT_LOCAL_ALLOC;
 		SLIST_INSERT_HEAD(&callfree, &callout[i], c_links.sle);
 	}
 
 	for (i = 0; i < callwheelsize; i++) {
 		TAILQ_INIT(&callwheel[i]);
 	}
 
 	mtx_init(&callout_lock, "callout", MTX_SPIN | MTX_RECURSE);
 
 #if defined(USERCONFIG)
 #if defined(USERCONFIG_BOOT)
 	if (1)
 #else
         if (boothowto & RB_CONFIG)
 #endif
 	{
 		userconfig();
 		cninit();	/* the preferred console may have changed */
 	}
 #endif
 
 	printf("avail memory = %ld (%ldK bytes)\n", ptoa(cnt.v_free_count),
 	    ptoa(cnt.v_free_count) / 1024);
 
 	/*
 	 * Set up buffers, so they can be used to read disk labels.
 	 */
 	bufinit();
 	vm_pager_bufferinit();
 }
 
 static void
 identifycpu(void)
 {
 	/* print cpu type & version */
 }
 
 extern char kernel_text[], _end[];
 
 #define DEBUG_MD
 
 void
 ia64_init()
 {
 	int phys_avail_cnt;
 	vm_offset_t kernstart, kernend;
 	vm_offset_t kernstartpfn, kernendpfn, pfn0, pfn1;
 	char *p;
 	EFI_MEMORY_DESCRIPTOR ski_md[2]; /* XXX */
 	EFI_MEMORY_DESCRIPTOR *mdp;
 	int mdcount, i;
 
 	/* NO OUTPUT ALLOWED UNTIL FURTHER NOTICE */
 
 	/*
 	 * TODO: Disable interrupts, floating point etc.
 	 * Maybe flush cache and tlb
 	 */
 	__asm __volatile("mov ar.fpsr=%0" :: "r"(IA64_FPSR_DEFAULT));
 
 	/*
 	 * TODO: Get critical system information (if possible, from the
 	 * information provided by the boot program).
 	 */
 
 	/*
 	 * Initalize the (temporary) bootstrap console interface, so
 	 * we can use printf until the VM system starts being setup.
 	 * The real console is initialized before then.
 	 * TODO: I guess we start with a serial console here.
 	 */
 	ssccnattach();
 
 	/* OUTPUT NOW ALLOWED */
 
 	/*
 	 * Find the beginning and end of the kernel.
 	 */
 	kernstart = trunc_page(kernel_text);
 #ifdef DDBxx
 	ksym_start = (void *)bootinfo.ssym;
 	ksym_end   = (void *)bootinfo.esym;
 	kernend = (vm_offset_t)round_page(ksym_end);
 #else
 	kernend = (vm_offset_t)round_page(_end);
 #endif
 	/* But if the bootstrap tells us otherwise, believe it! */
 	if (bootinfo.kernend)
 		kernend = round_page(bootinfo.kernend);
 	preload_metadata = (caddr_t)bootinfo.modptr;
 	kern_envp = bootinfo.envp;
 
 	p = getenv("kernelname");
 	if (p)
 		strncpy(kernelname, p, sizeof(kernelname) - 1);
 
 	kernstartpfn = atop(IA64_RR_MASK(kernstart));
 	kernendpfn = atop(IA64_RR_MASK(kernend));
 
 	/*
 	 * Size the memory regions and load phys_avail[] with the results.
 	 */
 
 	/*
 	 * XXX hack for ski. In reality, the loader will probably ask
 	 * EFI and pass the results to us. Possibly, we will call EFI
 	 * directly.
 	 */
 	ski_md[0].Type = EfiConventionalMemory;
 	ski_md[0].PhysicalStart = 2L*1024*1024;
 	ski_md[0].VirtualStart = 0;
 	ski_md[0].NumberOfPages = (64L*1024*1024)>>12;
 	ski_md[0].Attribute = EFI_MEMORY_WB;
 
 	ski_md[1].Type = EfiConventionalMemory;
 	ski_md[1].PhysicalStart = 4096L*1024*1024;
 	ski_md[1].VirtualStart = 0;
 	ski_md[1].NumberOfPages = (32L*1024*1024)>>12;
 	ski_md[1].Attribute = EFI_MEMORY_WB;
 	
 	mdcount = 1;		/* ignore the high memory for now */
 
 	/*
 	 * Find out how much memory is available, by looking at
 	 * the memory descriptors.
 	 */
 #ifdef DEBUG_MD
 	printf("Memory descriptor count: %d\n", mdcount);
 #endif
 
 	phys_avail_cnt = 0;
 	for (i = 0; i < mdcount; i++) {
 		mdp = &ski_md[i];
 #ifdef DEBUG_MD
 		printf("MD %d: type %d pa 0x%lx cnt 0x%lx\n", i,
 		       mdp->Type,
 		       mdp->PhysicalStart,
 		       mdp->NumberOfPages);
 #endif
 		totalphysmem += mdp->NumberOfPages;
 
 		if (mdp->Type != EfiConventionalMemory) {
 			resvmem += mdp->NumberOfPages;
 			continue;
 		}
 
 		/*
 		 * We have a memory descriptors available for system
 		 * software use.  We must determine if this cluster
 		 * holds the kernel.
 		 */
 		physmem += mdp->NumberOfPages;
 		pfn0 = atop(mdp->PhysicalStart);
 		pfn1 = pfn0 + mdp->NumberOfPages;
 		if (pfn0 <= kernendpfn && kernstartpfn <= pfn1) {
 			/*
 			 * Must compute the location of the kernel
 			 * within the segment.
 			 */
 #ifdef DEBUG_MD
 			printf("Descriptor %d contains kernel\n", i);
 #endif
 			if (pfn0 < kernstartpfn) {
 				/*
 				 * There is a chunk before the kernel.
 				 */
 #ifdef DEBUG_MD
 				printf("Loading chunk before kernel: "
 				       "0x%lx / 0x%lx\n", pfn0, kernstartpfn);
 #endif
 				phys_avail[phys_avail_cnt] = ia64_ptob(pfn0);
 				phys_avail[phys_avail_cnt+1] = ia64_ptob(kernstartpfn);
 				phys_avail_cnt += 2;
 			}
 			if (kernendpfn < pfn1) {
 				/*
 				 * There is a chunk after the kernel.
 				 */
 #ifdef DEBUG_MD
 				printf("Loading chunk after kernel: "
 				       "0x%lx / 0x%lx\n", kernendpfn, pfn1);
 #endif
 				phys_avail[phys_avail_cnt] = ia64_ptob(kernendpfn);
 				phys_avail[phys_avail_cnt+1] = ia64_ptob(pfn1);
 				phys_avail_cnt += 2;
 			}
 		} else {
 			/*
 			 * Just load this cluster as one chunk.
 			 */
 #ifdef DEBUG_MD
 			printf("Loading descriptor %d: 0x%lx / 0x%lx\n", i,
 			       pfn0, pfn1);
 #endif
 			phys_avail[phys_avail_cnt] = ia64_ptob(pfn0);
 			phys_avail[phys_avail_cnt+1] = ia64_ptob(pfn1);
 			phys_avail_cnt += 2;
 			
 		}
 	}
 	phys_avail[phys_avail_cnt] = 0;
 
 	Maxmem = physmem;
 
 	/*
 	 * Initialize error message buffer (at end of core).
 	 */
 	{
 		size_t sz = round_page(MSGBUF_SIZE);
 		int i = phys_avail_cnt - 2;
 
 		/* shrink so that it'll fit in the last segment */
 		if (phys_avail[i+1] - phys_avail[i] < sz)
 			sz = phys_avail[i+1] - phys_avail[i];
 
 		phys_avail[i+1] -= sz;
 		msgbufp = (struct msgbuf*) IA64_PHYS_TO_RR7(phys_avail[i+1]);
 
 		msgbufinit(msgbufp, sz);
 
 		/* Remove the last segment if it now has no pages. */
 		if (phys_avail[i] == phys_avail[i+1])
 			phys_avail[i] = 0;
 
 		/* warn if the message buffer had to be shrunk */
 		if (sz != round_page(MSGBUF_SIZE))
 			printf("WARNING: %ld bytes not available for msgbuf in last cluster (%ld used)\n",
 			    round_page(MSGBUF_SIZE), sz);
 
 	}
 
 	/*
 	 * Init mapping for u page(s) for proc 0
 	 */
 	proc0paddr = proc0.p_addr =
 	    (struct user *)pmap_steal_memory(UPAGES * PAGE_SIZE);
 
 	/*
 	 * Setup the global data for the bootstrap cpu.
 	 */
 	{
 		size_t sz = round_page(UPAGES * PAGE_SIZE);
 		globalp = (struct globaldata *) pmap_steal_memory(sz);
 		globaldata_init(globalp, 0, sz);
 		ia64_set_k4((u_int64_t) globalp);
 		PCPU_GET(next_asn) = 1;	/* 0 used for proc0 pmap */
 	}
 
 	/*
 	 * Initialize the virtual memory system, and set the
 	 * page table base register in proc 0's PCB.
 	 */
 	pmap_bootstrap();
 
 	/*
 	 * Initialize the rest of proc 0's PCB.
 	 *
 	 * Set the kernel sp, reserving space for an (empty) trapframe,
 	 * and make proc0's trapframe pointer point to it for sanity.
 	 * Initialise proc0's backing store to start after u area.
 	 */
 	proc0.p_addr->u_pcb.pcb_sp =
 	    (u_int64_t)proc0.p_addr + USPACE - sizeof(struct trapframe) - 16;
 	proc0.p_addr->u_pcb.pcb_bspstore = (u_int64_t) (proc0.p_addr + 1);
 	proc0.p_md.md_tf =
 	    (struct trapframe *)(proc0.p_addr->u_pcb.pcb_sp + 16);
 
 	/*
 	 * Record all cpus in a list.
 	 */
 	SLIST_INIT(&cpuhead);
 	SLIST_INSERT_HEAD(&cpuhead, GLOBALP, gd_allcpu);
 
 	/* Setup curproc so that mutexes work */
 	PCPU_SET(curproc, &proc0);
 
 	LIST_INIT(&proc0.p_heldmtx);
 	LIST_INIT(&proc0.p_contested);
 
 	/*
 	 * Initialise mutexes.
 	 */
 	mtx_init(&Giant, "Giant", MTX_DEF | MTX_RECURSE);
 	mtx_init(&sched_lock, "sched lock", MTX_SPIN | MTX_RECURSE);
 	mtx_enter(&Giant, MTX_DEF);
 
 #if 0
 	/*
 	 * Enable interrupts on first release (in switch_trampoline).
 	 */
 	sched_lock.mtx_saveipl = ALPHA_PSL_IPL_0;
 #endif
 
 	/*
 	 * Look at arguments passed to us and compute boothowto.
 	 */
 	boothowto = 0;
 #ifdef KADB
 	boothowto |= RB_KDB;
 #endif
 /*	boothowto |= RB_KDB | RB_GDB; */
 	for (p = bootinfo.boot_flags; p && *p != '\0'; p++) {
 		/*
 		 * Note that we'd really like to differentiate case here,
 		 * but the Ia64 AXP Architecture Reference Manual
 		 * says that we shouldn't.
 		 */
 		switch (*p) {
 		case 'a': /* autoboot */
 		case 'A':
 			boothowto &= ~RB_SINGLE;
 			break;
 
 #ifdef DEBUG
 		case 'c': /* crash dump immediately after autoconfig */
 		case 'C':
 			boothowto |= RB_DUMP;
 			break;
 #endif
 
 #if defined(DDB)
 		case 'd': /* break into the kernel debugger ASAP */
 		case 'D':
 			boothowto |= RB_KDB;
 			break;
 		case 'g': /* use kernel gdb */
 		case 'G':
 			boothowto |= RB_GDB;
 			break;
 #endif
 
 		case 'h': /* always halt, never reboot */
 		case 'H':
 			boothowto |= RB_HALT;
 			break;
 
 #if 0
 		case 'm': /* mini root present in memory */
 		case 'M':
 			boothowto |= RB_MINIROOT;
 			break;
 #endif
 
 		case 'n': /* askname */
 		case 'N':
 			boothowto |= RB_ASKNAME;
 			break;
 
 		case 's': /* single-user (default, supported for sanity) */
 		case 'S':
 			boothowto |= RB_SINGLE;
 			break;
 
 		case 'v':
 		case 'V':
 			boothowto |= RB_VERBOSE;
 			bootverbose = 1;
 			break;
 
 		default:
 			printf("Unrecognized boot flag '%c'.\n", *p);
 			break;
 		}
 	}
 
 	/*
 	 * Catch case of boot_verbose set in environment.
 	 */
 	if ((p = getenv("boot_verbose")) != NULL) {
 		if (strcmp(p, "yes") == 0 || strcmp(p, "YES") == 0) {
 			boothowto |= RB_VERBOSE;
 			bootverbose = 1;
 		}
 	}
 
 	/*
 	 * Force single-user for a while.
 	 */
 	boothowto |= RB_SINGLE;
 
 	/*
 	 * Initialize debuggers, and break into them if appropriate.
 	 */
 #ifdef DDB
 	kdb_init();
 	if (boothowto & RB_KDB) {
 		printf("Boot flags requested debugger\n");
 		breakpoint();
 	}
 #endif
 }
 
 void
 bzero(void *buf, size_t len)
 {
 	caddr_t p = buf;
 
 	while (((vm_offset_t) p & (sizeof(u_long) - 1)) && len) {
 		*p++ = 0;
 		len--;
 	}
 	while (len >= sizeof(u_long) * 8) {
 		*(u_long*) p = 0;
 		*((u_long*) p + 1) = 0;
 		*((u_long*) p + 2) = 0;
 		*((u_long*) p + 3) = 0;
 		len -= sizeof(u_long) * 8;
 		*((u_long*) p + 4) = 0;
 		*((u_long*) p + 5) = 0;
 		*((u_long*) p + 6) = 0;
 		*((u_long*) p + 7) = 0;
 		p += sizeof(u_long) * 8;
 	}
 	while (len >= sizeof(u_long)) {
 		*(u_long*) p = 0;
 		len -= sizeof(u_long);
 		p += sizeof(u_long);
 	}
 	while (len) {
 		*p++ = 0;
 		len--;
 	}
 }
 
 void
 DELAY(int n)
 {
     /* TODO */
 }
 
 /*
  * Send an interrupt to process.
  *
  * Stack is set up to allow sigcode stored
  * at top to call routine, followed by kcall
  * to sigreturn routine below.  After sigreturn
  * resets the signal mask, the stack, and the
  * frame pointer, it returns to the user
  * specified pc, psl.
  */
 void
 sendsig(sig_t catcher, int sig, sigset_t *mask, u_long code)
 {
 	struct proc *p = curproc;
 	struct trapframe *frame;
 	struct sigacts *psp;
 	struct sigframe sf, *sfp;
 	u_int64_t sbs = 0;
 	int oonstack, rndfsize;
 
 	PROC_LOCK(p);
 	psp = p->p_sigacts;
 	frame = p->p_md.md_tf;
 	oonstack = sigonstack(frame->tf_r[FRAME_SP]);
 	rndfsize = ((sizeof(sf) + 15) / 16) * 16;
 
 	/*
 	 * Make sure that we restore the entire trapframe after a
 	 * signal.
 	 */
 	frame->tf_flags &= ~FRAME_SYSCALL;
 
 	/* save user context */
 	bzero(&sf, sizeof(struct sigframe));
 	sf.sf_uc.uc_sigmask = *mask;
 	sf.sf_uc.uc_stack = p->p_sigstk;
 	sf.sf_uc.uc_stack.ss_flags = (p->p_flag & P_ALTSTACK)
 	    ? ((oonstack) ? SS_ONSTACK : 0) : SS_DISABLE;
 	sf.sf_uc.uc_mcontext.mc_flags = IA64_MC_FLAG_ONSTACK;
 	sf.sf_uc.uc_mcontext.mc_onstack = (oonstack) ? 1 : 0;
 
 	sf.sf_uc.uc_mcontext.mc_nat     = 0; /* XXX */
 	sf.sf_uc.uc_mcontext.mc_sp	= frame->tf_r[FRAME_SP];
 	sf.sf_uc.uc_mcontext.mc_ip	= (frame->tf_cr_iip
 					   | ((frame->tf_cr_ipsr >> 41) & 3));
 	sf.sf_uc.uc_mcontext.mc_cfm     = frame->tf_cr_ifs & ~(1<<31);
 	sf.sf_uc.uc_mcontext.mc_um      = frame->tf_cr_ipsr & 0x1fff;
 	sf.sf_uc.uc_mcontext.mc_ar_rsc  = frame->tf_ar_rsc;
 	sf.sf_uc.uc_mcontext.mc_ar_bsp  = frame->tf_ar_bspstore;
 	sf.sf_uc.uc_mcontext.mc_ar_rnat = frame->tf_ar_rnat;
 	sf.sf_uc.uc_mcontext.mc_ar_ccv  = frame->tf_ar_ccv;
 	sf.sf_uc.uc_mcontext.mc_ar_unat = frame->tf_ar_unat;
 	sf.sf_uc.uc_mcontext.mc_ar_fpsr = frame->tf_ar_fpsr;
 	sf.sf_uc.uc_mcontext.mc_ar_pfs  = frame->tf_ar_pfs;
 	sf.sf_uc.uc_mcontext.mc_pr      = frame->tf_pr;
 
 	bcopy(&frame->tf_b[0],
 	      &sf.sf_uc.uc_mcontext.mc_br[0],
 	      8 * sizeof(unsigned long));
 	sf.sf_uc.uc_mcontext.mc_gr[0] = 0;
 	bcopy(&frame->tf_r[0],
 	      &sf.sf_uc.uc_mcontext.mc_gr[1],
 	      31 * sizeof(unsigned long));
 
 	/* XXX mc_fr[] */
 
 	/*
 	 * Allocate and validate space for the signal handler
 	 * context. Note that if the stack is in P0 space, the
 	 * call to grow() is a nop, and the useracc() check
 	 * will fail if the process has not already allocated
 	 * the space with a `brk'.
 	 */
 	if ((p->p_flag & P_ALTSTACK) != 0 && !oonstack &&
 	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
 		sbs = (u_int64_t) p->p_sigstk.ss_sp;
 		sfp = (struct sigframe *)((caddr_t)p->p_sigstk.ss_sp +
 		    p->p_sigstk.ss_size - rndfsize);
 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
 		p->p_sigstk.ss_flags |= SS_ONSTACK;
 #endif
 	} else
 		sfp = (struct sigframe *)(frame->tf_r[FRAME_SP] - rndfsize);
 	PROC_UNLOCK(p);
 
 	(void)grow_stack(p, (u_long)sfp);
 #ifdef DEBUG
 	if ((sigdebug & SDB_KSTACK) && p->p_pid == sigpid)
 		printf("sendsig(%d): sig %d ssp %p usp %p\n", p->p_pid,
 		       sig, &sf, sfp);
 #endif
 	if (!useracc((caddr_t)sfp, sizeof(sf), VM_PROT_WRITE)) {
 #ifdef DEBUG
 		if ((sigdebug & SDB_KSTACK) && p->p_pid == sigpid)
 			printf("sendsig(%d): useracc failed on sig %d\n",
 			       p->p_pid, sig);
 #endif
 		/*
 		 * Process has trashed its stack; give it an illegal
 		 * instruction to halt it in its tracks.
 		 */
 		PROC_LOCK(p);
 		SIGACTION(p, SIGILL) = SIG_DFL;
 		SIGDELSET(p->p_sigignore, SIGILL);
 		SIGDELSET(p->p_sigcatch, SIGILL);
 		SIGDELSET(p->p_sigmask, SIGILL);
 		PROC_UNLOCK(p);
 		psignal(p, SIGILL);
 		return;
 	}
 
 #if 0
 	/* save the floating-point state, if necessary, then copy it. */
 	ia64_fpstate_save(p, 1);
 	sf.sf_uc.uc_mcontext.mc_ownedfp = p->p_md.md_flags & MDP_FPUSED;
 	bcopy(&p->p_addr->u_pcb.pcb_fp,
 	      (struct fpreg *)sf.sf_uc.uc_mcontext.mc_fpregs,
 	      sizeof(struct fpreg));
 	sf.sf_uc.uc_mcontext.mc_fp_control = p->p_addr->u_pcb.pcb_fp_control;
 #endif
 
 	/*
 	 * copy the frame out to userland.
 	 */
 	(void) copyout((caddr_t)&sf, (caddr_t)sfp, sizeof(sf));
 #ifdef DEBUG
 	if (sigdebug & SDB_FOLLOW)
 		printf("sendsig(%d): sig %d sfp %p code %lx\n", p->p_pid, sig,
 		    sfp, code);
 #endif
 
 	/*
 	 * Set up the registers to return to sigcode.
 	 */
 	frame->tf_cr_ipsr &= ~IA64_PSR_RI;
 	frame->tf_cr_iip = PS_STRINGS - (esigcode - sigcode);
 	frame->tf_r[FRAME_R1] = sig;
 	PROC_LOCK(p);
 	if (SIGISMEMBER(p->p_sigacts->ps_siginfo, sig)) {
 		frame->tf_r[FRAME_R15] = (u_int64_t)&(sfp->sf_si);
 
 		/* Fill in POSIX parts */
 		sf.sf_si.si_signo = sig;
 		sf.sf_si.si_code = code;
 		sf.sf_si.si_addr = (void*)frame->tf_cr_ifa;
 	}
 	else
 		frame->tf_r[FRAME_R15] = code;
 	PROC_UNLOCK(p);
 
 	frame->tf_r[FRAME_SP] = (u_int64_t)sfp - 16;
 	frame->tf_r[FRAME_R14] = sig;
 	frame->tf_r[FRAME_R15] = (u_int64_t) &sfp->sf_si;
 	frame->tf_r[FRAME_R16] = (u_int64_t) &sfp->sf_uc;
 	frame->tf_r[FRAME_R17] = (u_int64_t)catcher;
 	frame->tf_r[FRAME_R18] = sbs;
 
 #ifdef DEBUG
 	if (sigdebug & SDB_FOLLOW)
 		printf("sendsig(%d): pc %lx, catcher %lx\n", p->p_pid,
 		    frame->tf_cr_iip, frame->tf_regs[FRAME_R4]);
 	if ((sigdebug & SDB_KSTACK) && p->p_pid == sigpid)
 		printf("sendsig(%d): sig %d returns\n",
 		    p->p_pid, sig);
 #endif
 }
 
 /*
  * System call to cleanup state after a signal
  * has been taken.  Reset signal mask and
  * stack state from context left by sendsig (above).
  * Return to previous pc and psl as specified by
  * context left by sendsig. Check carefully to
  * make sure that the user has not modified the
  * state to gain improper privileges.
  */
 int
 osigreturn(struct proc *p,
 	struct osigreturn_args /* {
 		struct osigcontext *sigcntxp;
 	} */ *uap)
 {
 	return EOPNOTSUPP;
 }
 
 /*
  * System call to cleanup state after a signal
  * has been taken.  Reset signal mask and
  * stack state from context left by sendsig (above).
  * Return to previous pc and psl as specified by
  * context left by sendsig. Check carefully to
  * make sure that the user has not modified the
  * state to gain improper privileges.
  */
 
 int
 sigreturn(struct proc *p,
 	struct sigreturn_args /* {
 		ucontext_t *sigcntxp;
 	} */ *uap)
 {
 	ucontext_t uc, *ucp;
 	struct pcb *pcb;
 	struct trapframe *frame = p->p_md.md_tf;
 	struct __mcontext *mcp;
 
 	ucp = uap->sigcntxp;
 	pcb = &p->p_addr->u_pcb;
 
 #ifdef DEBUG
 	if (sigdebug & SDB_FOLLOW)
 	    printf("sigreturn: pid %d, scp %p\n", p->p_pid, ucp);
 #endif
 
 	/*
 	 * Fetch the entire context structure at once for speed.
 	 * We don't use a normal argument to simplify RSE handling.
 	 */
 	if (copyin((caddr_t)frame->tf_r[FRAME_R4],
 		   (caddr_t)&uc, sizeof(ucontext_t)))
 		return (EFAULT);
 
 	/*
 	 * Restore the user-supplied information
 	 */
 	mcp = &uc.uc_mcontext;
 	bcopy(&mcp->mc_br[0], &frame->tf_b[0], 8*sizeof(u_int64_t));
 	bcopy(&mcp->mc_gr[1], &frame->tf_r[0], 31*sizeof(u_int64_t));
 	/* XXX mc_fr */
 
 	frame->tf_flags &= ~FRAME_SYSCALL;
 	frame->tf_cr_iip = mcp->mc_ip & ~15;
 	frame->tf_cr_ipsr &= ~IA64_PSR_RI;
 	switch (mcp->mc_ip & 15) {
 	case 1:
 		frame->tf_cr_ipsr |= IA64_PSR_RI_1;
 		break;
 	case 2:
 		frame->tf_cr_ipsr |= IA64_PSR_RI_2;
 		break;
 	}
 	frame->tf_cr_ipsr     = ((frame->tf_cr_ipsr & ~0x1fff)
 				 | (mcp->mc_um & 0x1fff));
 	frame->tf_pr          = mcp->mc_pr;
 	frame->tf_ar_rsc      = (mcp->mc_ar_rsc & 3) | 12; /* user, loadrs=0 */
 	frame->tf_ar_pfs      = mcp->mc_ar_pfs;
 	frame->tf_cr_ifs      = mcp->mc_cfm | (1UL<<63);
 	frame->tf_ar_bspstore = mcp->mc_ar_bsp;
 	frame->tf_ar_rnat     = mcp->mc_ar_rnat;
 	frame->tf_ndirty      = 0; /* assumes flushrs in sigcode */
 	frame->tf_ar_unat     = mcp->mc_ar_unat;
 	frame->tf_ar_ccv      = mcp->mc_ar_ccv;
 	frame->tf_ar_fpsr     = mcp->mc_ar_fpsr;
 
 	frame->tf_r[FRAME_SP] = mcp->mc_sp;
 
 	PROC_LOCK(p);
 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
 	if (uc.uc_mcontext.mc_onstack & 1)
 		p->p_sigstk.ss_flags |= SS_ONSTACK;
 	else
 		p->p_sigstk.ss_flags &= ~SS_ONSTACK;
 #endif
 
 	p->p_sigmask = uc.uc_sigmask;
 	SIG_CANTMASK(p->p_sigmask);
 	PROC_UNLOCK(p);
 
 	/* XXX ksc.sc_ownedfp ? */
 	ia64_fpstate_drop(p);
 #if 0
 	bcopy((struct fpreg *)uc.uc_mcontext.mc_fpregs,
 	      &p->p_addr->u_pcb.pcb_fp, sizeof(struct fpreg));
 	p->p_addr->u_pcb.pcb_fp_control =
 		uc.uc_mcontext.mc_fp_control;
 #endif
 
 #ifdef DEBUG
 	if (sigdebug & SDB_FOLLOW)
 		printf("sigreturn(%d): returns\n", p->p_pid);
 #endif
 	return (EJUSTRETURN);
 }
 
 /*
  * Machine dependent boot() routine
  *
  * I haven't seen anything to put here yet
  * Possibly some stuff might be grafted back here from boot()
  */
 void
 cpu_boot(int howto)
 {
 }
 
 /*
  * Shutdown the CPU as much as possible
  */
 void
 cpu_halt(void)
 {
     /* TODO */
 }
 
 /*
  * Clear registers on exec
  */
 void
 setregs(struct proc *p, u_long entry, u_long stack, u_long ps_strings)
 {
 	struct trapframe *frame;
 
 	frame = p->p_md.md_tf;
 
 	/*
 	 * Make sure that we restore the entire trapframe after an
 	 * execve.
 	 */
 	frame->tf_flags &= ~FRAME_SYSCALL;
 
 	bzero(frame->tf_r, sizeof(frame->tf_r));
 	bzero(frame->tf_f, sizeof(frame->tf_f));
 	frame->tf_cr_iip = entry;
 	frame->tf_cr_ipsr = (IA64_PSR_IC
 			     | IA64_PSR_I
 			     | IA64_PSR_IT
 			     | IA64_PSR_DT
 			     | IA64_PSR_RT
 			     | IA64_PSR_DFH
 			     | IA64_PSR_BN
 			     | IA64_PSR_CPL_USER);
 	frame->tf_r[FRAME_SP] = stack;
 	frame->tf_r[FRAME_R14] = ps_strings;
 
 	/*
 	 * Setup the new backing store and make sure the new image
 	 * starts executing with an empty register stack frame.
 	 */
 	frame->tf_ar_bspstore = p->p_md.md_bspstore;
 	frame->tf_ndirty = 0;
 	frame->tf_cr_ifs = (1L<<63); /* ifm=0, v=1 */
 	frame->tf_ar_rsc = 0xf;	/* user mode rsc */
 	frame->tf_ar_fpsr = IA64_FPSR_DEFAULT;
 
 	p->p_md.md_flags &= ~MDP_FPUSED;
 	ia64_fpstate_drop(p);
 }
 
 int
 ptrace_set_pc(struct proc *p, unsigned long addr)
 {
 	/* TODO set pc in trapframe */
 	return 0;
 }
 
 int
 ptrace_single_step(struct proc *p)
 {
 	/* TODO arrange for user process to single step */
 	return 0;
 }
 
 int ptrace_read_u_check(struct proc *p, vm_offset_t addr, size_t len)
 {
 	vm_offset_t gap;
 
 	if ((vm_offset_t) (addr + len) < addr)
 		return EPERM;
 	if ((vm_offset_t) (addr + len) <= sizeof(struct user))
 		return 0;
 
 	gap = (char *) p->p_md.md_tf - (char *) p->p_addr;
 	
 	if ((vm_offset_t) addr < gap)
 		return EPERM;
 	if ((vm_offset_t) (addr + len) <= 
 	    (vm_offset_t) (gap + sizeof(struct trapframe)))
 		return 0;
 	return EPERM;
 }
 
 int
 ptrace_write_u(struct proc *p, vm_offset_t off, long data)
 {
 	vm_offset_t min;
 #if 0
 	struct trapframe frame_copy;
 	struct trapframe *tp;
 #endif
 
 	/*
 	 * Privileged kernel state is scattered all over the user area.
 	 * Only allow write access to parts of regs and to fpregs.
 	 */
 	min = (char *)p->p_md.md_tf - (char *)p->p_addr;
 	if (off >= min && off <= min + sizeof(struct trapframe) - sizeof(int)) {
 #if 0
 		tp = p->p_md.md_tf;
 		frame_copy = *tp;
 		*(int *)((char *)&frame_copy + (off - min)) = data;
 		if (!EFLAGS_SECURE(frame_copy.tf_eflags, tp->tf_eflags) ||
 		    !CS_SECURE(frame_copy.tf_cs))
 			return (EINVAL);
 #endif
 		*(int*)((char *)p->p_addr + off) = data;
 		return (0);
 	}
 	min = offsetof(struct user, u_pcb);
 	if (off >= min && off <= min + sizeof(struct pcb)) {
 		*(int*)((char *)p->p_addr + off) = data;
 		return (0);
 	}
 	return (EFAULT);
 }
 
 int
 ia64_pa_access(vm_offset_t pa)
 {
 	return VM_PROT_READ|VM_PROT_WRITE;
 }
 
 int
 fill_regs(p, regs)
 	struct proc *p;
 	struct reg *regs;
 {
 	/* TODO copy trapframe to regs */
 	return (0);
 }
 
 int
 set_regs(p, regs)
 	struct proc *p;
 	struct reg *regs;
 {
 	/* TODO copy regs to trapframe */
 	return (0);
 }
 
 int
 fill_fpregs(p, fpregs)
 	struct proc *p;
 	struct fpreg *fpregs;
 {
 	/* TODO copy fpu state to fpregs */
 	ia64_fpstate_save(p, 0);
 
 #if 0
 	bcopy(&p->p_addr->u_pcb.pcb_fp, fpregs, sizeof *fpregs);
 #endif
 	return (0);
 }
 
 int
 set_fpregs(p, fpregs)
 	struct proc *p;
 	struct fpreg *fpregs;
 {
 	/* TODO copy fpregs fpu state */
 	ia64_fpstate_drop(p);
 
 #if 0
 	bcopy(fpregs, &p->p_addr->u_pcb.pcb_fp, sizeof *fpregs);
 #endif
 	return (0);
 }
 
 #ifndef DDB
 void
 Debugger(const char *msg)
 {
 	printf("Debugger(\"%s\") called.\n", msg);
 }
 #endif /* no DDB */
 
 #include <sys/disklabel.h>
 
 /*
  * Determine the size of the transfer, and make sure it is
  * within the boundaries of the partition. Adjust transfer
  * if needed, and signal errors or early completion.
  */
 int
 bounds_check_with_label(struct bio *bp, struct disklabel *lp, int wlabel)
 {
 #if 0
         struct partition *p = lp->d_partitions + dkpart(bp->bio_dev);
         int labelsect = lp->d_partitions[0].p_offset;
         int maxsz = p->p_size,
                 sz = (bp->bio_bcount + DEV_BSIZE - 1) >> DEV_BSHIFT;
 
         /* overwriting disk label ? */
         /* XXX should also protect bootstrap in first 8K */
         if (bp->bio_blkno + p->p_offset <= LABELSECTOR + labelsect &&
 #if LABELSECTOR != 0
             bp->bio_blkno + p->p_offset + sz > LABELSECTOR + labelsect &&
 #endif
             (bp->bio_cmd == BIO_WRITE) && wlabel == 0) {
                 bp->bio_error = EROFS;
                 goto bad;
         }
 
 #if     defined(DOSBBSECTOR) && defined(notyet)
         /* overwriting master boot record? */
         if (bp->bio_blkno + p->p_offset <= DOSBBSECTOR &&
             (bp->bio_cmd == BIO_WRITE) && wlabel == 0) {
                 bp->bio_error = EROFS;
                 goto bad;
         }
 #endif
 
         /* beyond partition? */
         if (bp->bio_blkno < 0 || bp->bio_blkno + sz > maxsz) {
                 /* if exactly at end of disk, return an EOF */
                 if (bp->bio_blkno == maxsz) {
                         bp->bio_resid = bp->bio_bcount;
                         return(0);
                 }
                 /* or truncate if part of it fits */
                 sz = maxsz - bp->bio_blkno;
                 if (sz <= 0) {
                         bp->bio_error = EINVAL;
                         goto bad;
                 }
                 bp->bio_bcount = sz << DEV_BSHIFT;
         }
 
         bp->bio_pblkno = bp->bio_blkno + p->p_offset;
         return(1);
 
 bad:
         bp->bio_flags |= BIO_ERROR;
 #endif
         return(-1);
 
 }
 
 static int
 sysctl_machdep_adjkerntz(SYSCTL_HANDLER_ARGS)
 {
 	int error;
 	error = sysctl_handle_int(oidp, oidp->oid_arg1, oidp->oid_arg2,
 		req);
 	if (!error && req->newptr)
 		resettodr();
 	return (error);
 }
 
 SYSCTL_PROC(_machdep, CPU_ADJKERNTZ, adjkerntz, CTLTYPE_INT|CTLFLAG_RW,
 	&adjkerntz, 0, sysctl_machdep_adjkerntz, "I", "");
 
 SYSCTL_INT(_machdep, CPU_DISRTCSET, disable_rtc_set,
 	CTLFLAG_RW, &disable_rtc_set, 0, "");
 
 SYSCTL_INT(_machdep, CPU_WALLCLOCK, wall_cmos_clock,
 	CTLFLAG_RW, &wall_cmos_clock, 0, "");
 
 void
 ia64_fpstate_check(struct proc *p)
 {
 	if ((p->p_md.md_tf->tf_cr_ipsr & IA64_PSR_DFH) == 0)
 		if (p != PCPU_GET(fpcurproc))
 			panic("ia64_check_fpcurproc: bogus");
 }
 
 /*
  * Save the high floating point state in the pcb. Use this to get
  * read-only access to the floating point state. If write is true, the
  * current fp process is cleared so that fp state can safely be
  * modified. The process will automatically reload the changed state
  * by generating a disabled fp trap.
  */
 void
 ia64_fpstate_save(struct proc *p, int write)
 {
 	if (p == PCPU_GET(fpcurproc)) {
 		/*
 		 * Save the state in the pcb.
 		 */
 		savehighfp(p->p_addr->u_pcb.pcb_highfp);
 
 		if (write) {
 			p->p_md.md_tf->tf_cr_ipsr |= IA64_PSR_DFH;
 			PCPU_SET(fpcurproc, NULL);
 		}
 	}
 }
 
 /*
  * Relinquish ownership of the FP state. This is called instead of
  * ia64_save_fpstate() if the entire FP state is being changed
  * (e.g. on sigreturn).
  */
 void
 ia64_fpstate_drop(struct proc *p)
 {
 	if (p == PCPU_GET(fpcurproc)) {
 		p->p_md.md_tf->tf_cr_ipsr |= IA64_PSR_DFH;
 		PCPU_SET(fpcurproc, NULL);
 	}
 }
 
 /*
  * Switch the current owner of the fp state to p, reloading the state
  * from the pcb.
  */
 void
 ia64_fpstate_switch(struct proc *p)
 {
 	if (PCPU_GET(fpcurproc)) {
 		/*
 		 * Dump the old fp state if its valid.
 		 */
 		savehighfp(PCPU_GET(fpcurproc)->p_addr->u_pcb.pcb_highfp);
 		PCPU_GET(fpcurproc)->p_md.md_tf->tf_cr_ipsr |= IA64_PSR_DFH;
 	}
 
 	/*
 	 * Remember the new FP owner and reload its state.
 	 */
 	PCPU_SET(fpcurproc, p);
 	restorehighfp(p->p_addr->u_pcb.pcb_highfp);
 	p->p_md.md_tf->tf_cr_ipsr &= ~IA64_PSR_DFH;
 
 	p->p_md.md_flags |= MDP_FPUSED;
 }
Index: head/sys/pc98/i386/machdep.c
===================================================================
--- head/sys/pc98/i386/machdep.c	(revision 71983)
+++ head/sys/pc98/i386/machdep.c	(revision 71984)
@@ -1,2838 +1,2839 @@
 /*-
  * Copyright (c) 1992 Terrence R. Lambert.
  * Copyright (c) 1982, 1987, 1990 The Regents of the University of California.
  * All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * William Jolitz.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	from: @(#)machdep.c	7.4 (Berkeley) 6/3/91
  * $FreeBSD$
  */
 
 #include "opt_atalk.h"
 #include "opt_compat.h"
 #include "opt_cpu.h"
 #include "opt_ddb.h"
 #include "opt_inet.h"
 #include "opt_ipx.h"
 #include "opt_isa.h"
 #include "opt_maxmem.h"
 #include "opt_msgbuf.h"
 #include "opt_npx.h"
 #include "opt_perfmon.h"
 #include "opt_user_ldt.h"
 #include "opt_userconfig.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/sysproto.h>
 #include <sys/signalvar.h>
 #include <sys/ipl.h>
 #include <sys/kernel.h>
 #include <sys/ktr.h>
 #include <sys/linker.h>
 #include <sys/malloc.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/bio.h>
 #include <sys/buf.h>
 #include <sys/reboot.h>
 #include <sys/callout.h>
 #include <sys/msgbuf.h>
 #include <sys/sysent.h>
 #include <sys/sysctl.h>
 #include <sys/vmmeter.h>
 #include <sys/bus.h>
 #include <sys/eventhandler.h>
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <sys/lock.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_object.h>
 #include <vm/vm_page.h>
 #include <vm/vm_map.h>
 #include <vm/vm_pager.h>
 #include <vm/vm_extern.h>
 
 #include <sys/user.h>
 #include <sys/exec.h>
 #include <sys/cons.h>
 
 #include <ddb/ddb.h>
 
 #include <net/netisr.h>
 
 #include <machine/cpu.h>
 #include <machine/cputypes.h>
 #include <machine/reg.h>
 #include <machine/clock.h>
 #include <machine/specialreg.h>
 #include <machine/bootinfo.h>
 #include <machine/md_var.h>
 #include <machine/pc/bios.h>
 #include <machine/pcb_ext.h>		/* pcb.h included via sys/user.h */
 #include <machine/globaldata.h>
 #include <machine/globals.h>
 #ifdef SMP
 #include <machine/smp.h>
 #endif
 #ifdef PERFMON
 #include <machine/perfmon.h>
 #endif
 
 #ifdef OLD_BUS_ARCH
 #include <i386/isa/isa_device.h>
 #endif
 #include <i386/isa/icu.h>
 #include <i386/isa/intr_machdep.h>
 #ifdef PC98
 #include <pc98/pc98/pc98_machdep.h>
 #include <pc98/pc98/pc98.h>
 #else
 #include <isa/rtc.h>
 #endif
 #include <machine/vm86.h>
 #include <sys/ptrace.h>
 #include <machine/sigframe.h>
 
 extern void init386 __P((int first));
 extern void dblfault_handler __P((void));
 
 extern void printcpuinfo(void);	/* XXX header file */
 extern void earlysetcpuclass(void);	/* same header file */
 extern void finishidentcpu(void);
 extern void panicifcpuunsupported(void);
 extern void initializecpu(void);
 
 #define	CS_SECURE(cs)		(ISPL(cs) == SEL_UPL)
 #define	EFL_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
 
 static void cpu_startup __P((void *));
 SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL)
 
 #ifdef PC98
 int	need_pre_dma_flush;	/* If 1, use wbinvd befor DMA transfer. */
 int	need_post_dma_flush;	/* If 1, use invd after DMA transfer. */
 #endif
 
 int	_udatasel, _ucodesel;
 u_int	atdevbase;
 
 #if defined(SWTCH_OPTIM_STATS)
 extern int swtch_optim_stats;
 SYSCTL_INT(_debug, OID_AUTO, swtch_optim_stats,
 	CTLFLAG_RD, &swtch_optim_stats, 0, "");
 SYSCTL_INT(_debug, OID_AUTO, tlb_flush_count,
 	CTLFLAG_RD, &tlb_flush_count, 0, "");
 #endif
 
 #ifdef PC98
 static int	ispc98 = 1;
 #else
 static int	ispc98 = 0;
 #endif
 SYSCTL_INT(_machdep, OID_AUTO, ispc98, CTLFLAG_RD, &ispc98, 0, "");
 
 int physmem = 0;
 int cold = 1;
 
 static void osendsig __P((sig_t catcher, int sig, sigset_t *mask, u_long code));
 
 static int
 sysctl_hw_physmem(SYSCTL_HANDLER_ARGS)
 {
 	int error = sysctl_handle_int(oidp, 0, ctob(physmem), req);
 	return (error);
 }
 
 SYSCTL_PROC(_hw, HW_PHYSMEM, physmem, CTLTYPE_INT|CTLFLAG_RD,
 	0, 0, sysctl_hw_physmem, "I", "");
 
 static int
 sysctl_hw_usermem(SYSCTL_HANDLER_ARGS)
 {
 	int error = sysctl_handle_int(oidp, 0,
 		ctob(physmem - cnt.v_wire_count), req);
 	return (error);
 }
 
 SYSCTL_PROC(_hw, HW_USERMEM, usermem, CTLTYPE_INT|CTLFLAG_RD,
 	0, 0, sysctl_hw_usermem, "I", "");
 
 static int
 sysctl_hw_availpages(SYSCTL_HANDLER_ARGS)
 {
 	int error = sysctl_handle_int(oidp, 0,
 		i386_btop(avail_end - avail_start), req);
 	return (error);
 }
 
 SYSCTL_PROC(_hw, OID_AUTO, availpages, CTLTYPE_INT|CTLFLAG_RD,
 	0, 0, sysctl_hw_availpages, "I", "");
 
 static int
 sysctl_machdep_msgbuf(SYSCTL_HANDLER_ARGS)
 {
 	int error;
 
 	/* Unwind the buffer, so that it's linear (possibly starting with
 	 * some initial nulls).
 	 */
 	error=sysctl_handle_opaque(oidp,msgbufp->msg_ptr+msgbufp->msg_bufr,
 		msgbufp->msg_size-msgbufp->msg_bufr,req);
 	if(error) return(error);
 	if(msgbufp->msg_bufr>0) {
 		error=sysctl_handle_opaque(oidp,msgbufp->msg_ptr,
 			msgbufp->msg_bufr,req);
 	}
 	return(error);
 }
 
 SYSCTL_PROC(_machdep, OID_AUTO, msgbuf, CTLTYPE_STRING|CTLFLAG_RD,
 	0, 0, sysctl_machdep_msgbuf, "A","Contents of kernel message buffer");
 
 static int msgbuf_clear;
 
 static int
 sysctl_machdep_msgbuf_clear(SYSCTL_HANDLER_ARGS)
 {
 	int error;
 	error = sysctl_handle_int(oidp, oidp->oid_arg1, oidp->oid_arg2,
 		req);
 	if (!error && req->newptr) {
 		/* Clear the buffer and reset write pointer */
 		bzero(msgbufp->msg_ptr,msgbufp->msg_size);
 		msgbufp->msg_bufr=msgbufp->msg_bufx=0;
 		msgbuf_clear=0;
 	}
 	return (error);
 }
 
 SYSCTL_PROC(_machdep, OID_AUTO, msgbuf_clear, CTLTYPE_INT|CTLFLAG_RW,
 	&msgbuf_clear, 0, sysctl_machdep_msgbuf_clear, "I",
 	"Clear kernel message buffer");
 
 int bootverbose = 0, Maxmem = 0;
 #ifdef PC98
 int Maxmem_under16M = 0;
 #endif
 long dumplo;
 
 vm_offset_t phys_avail[10];
 
 /* must be 2 less so 0 0 can signal end of chunks */
 #define PHYS_AVAIL_ARRAY_END ((sizeof(phys_avail) / sizeof(vm_offset_t)) - 2)
 
 static vm_offset_t buffer_sva, buffer_eva;
 vm_offset_t clean_sva, clean_eva;
 static vm_offset_t pager_sva, pager_eva;
 static struct trapframe proc0_tf;
 #ifndef SMP
 static struct globaldata __globaldata;
 #endif
 
 struct cpuhead cpuhead;
 
 struct mtx sched_lock;
 struct mtx Giant;
 
 static void
 cpu_startup(dummy)
 	void *dummy;
 {
 	register unsigned i;
 	register caddr_t v;
 	vm_offset_t maxaddr;
 	vm_size_t size = 0;
 	int firstaddr;
 	vm_offset_t minaddr;
 	int physmem_est;
 
 	if (boothowto & RB_VERBOSE)
 		bootverbose++;
 
 	/*
 	 * Good {morning,afternoon,evening,night}.
 	 */
 	printf("%s", version);
 	earlysetcpuclass();
 	startrtclock();
 	printcpuinfo();
 	panicifcpuunsupported();
 #ifdef PERFMON
 	perfmon_init();
 #endif
 	printf("real memory  = %u (%uK bytes)\n", ptoa(Maxmem), ptoa(Maxmem) / 1024);
 	/*
 	 * Display any holes after the first chunk of extended memory.
 	 */
 	if (bootverbose) {
 		int indx;
 
 		printf("Physical memory chunk(s):\n");
 		for (indx = 0; phys_avail[indx + 1] != 0; indx += 2) {
 			unsigned int size1 = phys_avail[indx + 1] - phys_avail[indx];
 
 			printf("0x%08x - 0x%08x, %u bytes (%u pages)\n",
 			    phys_avail[indx], phys_avail[indx + 1] - 1, size1,
 			    size1 / PAGE_SIZE);
 		}
 	}
 
 	/*
 	 * Calculate callout wheel size
 	 */
 	for (callwheelsize = 1, callwheelbits = 0;
 	     callwheelsize < ncallout;
 	     callwheelsize <<= 1, ++callwheelbits)
 		;
 	callwheelmask = callwheelsize - 1;
 
 	/*
 	 * Allocate space for system data structures.
 	 * The first available kernel virtual address is in "v".
 	 * As pages of kernel virtual memory are allocated, "v" is incremented.
 	 * As pages of memory are allocated and cleared,
 	 * "firstaddr" is incremented.
 	 * An index into the kernel page table corresponding to the
 	 * virtual memory address maintained in "v" is kept in "mapaddr".
 	 */
 
 	/*
 	 * Make two passes.  The first pass calculates how much memory is
 	 * needed and allocates it.  The second pass assigns virtual
 	 * addresses to the various data structures.
 	 */
 	firstaddr = 0;
 again:
 	v = (caddr_t)firstaddr;
 
 #define	valloc(name, type, num) \
 	    (name) = (type *)v; v = (caddr_t)((name)+(num))
 #define	valloclim(name, type, num, lim) \
 	    (name) = (type *)v; v = (caddr_t)((lim) = ((name)+(num)))
 
 	valloc(callout, struct callout, ncallout);
 	valloc(callwheel, struct callout_tailq, callwheelsize);
 
 	/*
 	 * Discount the physical memory larger than the size of kernel_map
 	 * to avoid eating up all of KVA space.
 	 */
 	if (kernel_map->first_free == NULL) {
 		printf("Warning: no free entries in kernel_map.\n");
 		physmem_est = physmem;
 	} else
 		physmem_est = min(physmem, kernel_map->max_offset - kernel_map->min_offset);
 
 	/*
 	 * The nominal buffer size (and minimum KVA allocation) is BKVASIZE.
 	 * For the first 64MB of ram nominally allocate sufficient buffers to
 	 * cover 1/4 of our ram.  Beyond the first 64MB allocate additional
 	 * buffers to cover 1/20 of our ram over 64MB.
 	 *
 	 * factor represents the 1/4 x ram conversion.
 	 */
 	if (nbuf == 0) {
 		int factor = 4 * BKVASIZE / PAGE_SIZE;
 
 		nbuf = 50;
 		if (physmem_est > 1024)
 			nbuf += min((physmem_est - 1024) / factor, 16384 / factor);
 		if (physmem_est > 16384)
 			nbuf += (physmem_est - 16384) * 2 / (factor * 5);
 	}
 
 	/*
 	 * Do not allow the buffer_map to be more then 1/2 the size of the
 	 * kernel_map.
 	 */
 	if (nbuf > (kernel_map->max_offset - kernel_map->min_offset) / 
 	    (BKVASIZE * 2)) {
 		nbuf = (kernel_map->max_offset - kernel_map->min_offset) / 
 		    (BKVASIZE * 2);
 		printf("Warning: nbufs capped at %d\n", nbuf);
 	}
 
 	nswbuf = max(min(nbuf/4, 256), 16);
 
 	valloc(swbuf, struct buf, nswbuf);
 	valloc(buf, struct buf, nbuf);
 	v = bufhashinit(v);
 
 	/*
 	 * End of first pass, size has been calculated so allocate memory
 	 */
 	if (firstaddr == 0) {
 		size = (vm_size_t)(v - firstaddr);
 		firstaddr = (int)kmem_alloc(kernel_map, round_page(size));
 		if (firstaddr == 0)
 			panic("startup: no room for tables");
 		goto again;
 	}
 
 	/*
 	 * End of second pass, addresses have been assigned
 	 */
 	if ((vm_size_t)(v - firstaddr) != size)
 		panic("startup: table size inconsistency");
 
 	clean_map = kmem_suballoc(kernel_map, &clean_sva, &clean_eva,
 			(nbuf*BKVASIZE) + (nswbuf*MAXPHYS) + pager_map_size);
 	buffer_map = kmem_suballoc(clean_map, &buffer_sva, &buffer_eva,
 				(nbuf*BKVASIZE));
+	buffer_map->system_map = 1;
 	pager_map = kmem_suballoc(clean_map, &pager_sva, &pager_eva,
 				(nswbuf*MAXPHYS) + pager_map_size);
 	pager_map->system_map = 1;
 	exec_map = kmem_suballoc(kernel_map, &minaddr, &maxaddr,
 				(16*(ARG_MAX+(PAGE_SIZE*3))));
 
 	/*
 	 * XXX: Mbuf system machine-specific initializations should
 	 *      go here, if anywhere. 
 	 */
 
 	/*
 	 * Initialize callouts
 	 */
 	SLIST_INIT(&callfree);
 	for (i = 0; i < ncallout; i++) {
 		callout_init(&callout[i], 0);
 		callout[i].c_flags = CALLOUT_LOCAL_ALLOC;
 		SLIST_INSERT_HEAD(&callfree, &callout[i], c_links.sle);
 	}
 
 	for (i = 0; i < callwheelsize; i++) {
 		TAILQ_INIT(&callwheel[i]);
 	}
 
 	mtx_init(&callout_lock, "callout", MTX_SPIN | MTX_RECURSE);
 
 #if defined(USERCONFIG)
 	userconfig();
 	cninit();		/* the preferred console may have changed */
 #endif
 
 	printf("avail memory = %u (%uK bytes)\n", ptoa(cnt.v_free_count),
 	    ptoa(cnt.v_free_count) / 1024);
 
 	/*
 	 * Set up buffers, so they can be used to read disk labels.
 	 */
 	bufinit();
 	vm_pager_bufferinit();
 
 	SLIST_INIT(&cpuhead);
 	SLIST_INSERT_HEAD(&cpuhead, GLOBALDATA, gd_allcpu);
 
 #ifdef SMP
 	/*
 	 * OK, enough kmem_alloc/malloc state should be up, lets get on with it!
 	 */
 	mp_start();			/* fire up the APs and APICs */
 	mp_announce();
 #endif  /* SMP */
 	cpu_setregs();
 }
 
 /*
  * Send an interrupt to process.
  *
  * Stack is set up to allow sigcode stored
  * at top to call routine, followed by kcall
  * to sigreturn routine below.  After sigreturn
  * resets the signal mask, the stack, and the
  * frame pointer, it returns to the user
  * specified pc, psl.
  */
 static void
 osendsig(catcher, sig, mask, code)
 	sig_t catcher;
 	int sig;
 	sigset_t *mask;
 	u_long code;
 {
 	struct osigframe sf;
 	struct osigframe *fp;
 	struct proc *p;
 	struct sigacts *psp;
 	struct trapframe *regs;
 	int oonstack;
 
 	p = curproc;
 	PROC_LOCK(p);
 	psp = p->p_sigacts;
 	regs = p->p_md.md_regs;
 	oonstack = sigonstack(regs->tf_esp);
 
 	/* Allocate and validate space for the signal handler context. */
 	if ((p->p_flag & P_ALTSTACK) && !oonstack &&
 	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
 		fp = (struct osigframe *)(p->p_sigstk.ss_sp +
 		    p->p_sigstk.ss_size - sizeof(struct osigframe));
 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
 		p->p_sigstk.ss_flags |= SS_ONSTACK;
 #endif
 	} else
 		fp = (struct osigframe *)regs->tf_esp - 1;
 	PROC_UNLOCK(p);
 
 	/*
 	 * grow_stack() will return 0 if *fp does not fit inside the stack
 	 * and the stack can not be grown.
 	 * useracc() will return FALSE if access is denied.
 	 */
 	if (grow_stack(p, (int)fp) == 0 ||
 	    !useracc((caddr_t)fp, sizeof(*fp), VM_PROT_WRITE)) {
 		/*
 		 * Process has trashed its stack; give it an illegal
 		 * instruction to halt it in its tracks.
 		 */
 		PROC_LOCK(p);
 		SIGACTION(p, SIGILL) = SIG_DFL;
 		SIGDELSET(p->p_sigignore, SIGILL);
 		SIGDELSET(p->p_sigcatch, SIGILL);
 		SIGDELSET(p->p_sigmask, SIGILL);
 		PROC_UNLOCK(p);
 		psignal(p, SIGILL);
 		return;
 	}
 
 	/* Translate the signal if appropriate. */
 	if (p->p_sysent->sv_sigtbl && sig <= p->p_sysent->sv_sigsize)
 		sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
 
 	/* Build the argument list for the signal handler. */
 	sf.sf_signum = sig;
 	sf.sf_scp = (register_t)&fp->sf_siginfo.si_sc;
 	PROC_LOCK(p);
 	if (SIGISMEMBER(p->p_sigacts->ps_siginfo, sig)) {
 		/* Signal handler installed with SA_SIGINFO. */
 		sf.sf_arg2 = (register_t)&fp->sf_siginfo;
 		sf.sf_siginfo.si_signo = sig;
 		sf.sf_siginfo.si_code = code;
 		sf.sf_ahu.sf_action = (__osiginfohandler_t *)catcher;
 	} else {
 		/* Old FreeBSD-style arguments. */
 		sf.sf_arg2 = code;
 		sf.sf_addr = regs->tf_err;
 		sf.sf_ahu.sf_handler = catcher;
 	}
 	PROC_UNLOCK(p);
 
 	/* Save most if not all of trap frame. */
 	sf.sf_siginfo.si_sc.sc_eax = regs->tf_eax;
 	sf.sf_siginfo.si_sc.sc_ebx = regs->tf_ebx;
 	sf.sf_siginfo.si_sc.sc_ecx = regs->tf_ecx;
 	sf.sf_siginfo.si_sc.sc_edx = regs->tf_edx;
 	sf.sf_siginfo.si_sc.sc_esi = regs->tf_esi;
 	sf.sf_siginfo.si_sc.sc_edi = regs->tf_edi;
 	sf.sf_siginfo.si_sc.sc_cs = regs->tf_cs;
 	sf.sf_siginfo.si_sc.sc_ds = regs->tf_ds;
 	sf.sf_siginfo.si_sc.sc_ss = regs->tf_ss;
 	sf.sf_siginfo.si_sc.sc_es = regs->tf_es;
 	sf.sf_siginfo.si_sc.sc_fs = regs->tf_fs;
 	sf.sf_siginfo.si_sc.sc_gs = rgs();
 	sf.sf_siginfo.si_sc.sc_isp = regs->tf_isp;
 
 	/* Build the signal context to be used by osigreturn(). */
 	sf.sf_siginfo.si_sc.sc_onstack = (oonstack) ? 1 : 0;
 	SIG2OSIG(*mask, sf.sf_siginfo.si_sc.sc_mask);
 	sf.sf_siginfo.si_sc.sc_sp = regs->tf_esp;
 	sf.sf_siginfo.si_sc.sc_fp = regs->tf_ebp;
 	sf.sf_siginfo.si_sc.sc_pc = regs->tf_eip;
 	sf.sf_siginfo.si_sc.sc_ps = regs->tf_eflags;
 	sf.sf_siginfo.si_sc.sc_trapno = regs->tf_trapno;
 	sf.sf_siginfo.si_sc.sc_err = regs->tf_err;
 
 	/*
 	 * If we're a vm86 process, we want to save the segment registers.
 	 * We also change eflags to be our emulated eflags, not the actual
 	 * eflags.
 	 */
 	if (regs->tf_eflags & PSL_VM) {
 		/* XXX confusing names: `tf' isn't a trapframe; `regs' is. */
 		struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs;
 		struct vm86_kernel *vm86 = &p->p_addr->u_pcb.pcb_ext->ext_vm86;
 
 		sf.sf_siginfo.si_sc.sc_gs = tf->tf_vm86_gs;
 		sf.sf_siginfo.si_sc.sc_fs = tf->tf_vm86_fs;
 		sf.sf_siginfo.si_sc.sc_es = tf->tf_vm86_es;
 		sf.sf_siginfo.si_sc.sc_ds = tf->tf_vm86_ds;
 
 		if (vm86->vm86_has_vme == 0)
 			sf.sf_siginfo.si_sc.sc_ps =
 			    (tf->tf_eflags & ~(PSL_VIF | PSL_VIP)) |
 			    (vm86->vm86_eflags & (PSL_VIF | PSL_VIP));
 
 		/* See sendsig() for comments. */
 		tf->tf_eflags &= ~(PSL_VM | PSL_NT | PSL_T | PSL_VIF | PSL_VIP);
 	}
 
 	/* Copy the sigframe out to the user's stack. */
 	if (copyout(&sf, fp, sizeof(*fp)) != 0) {
 		/*
 		 * Something is wrong with the stack pointer.
 		 * ...Kill the process.
 		 */
 		sigexit(p, SIGILL);
 	}
 
 	regs->tf_esp = (int)fp;
 	regs->tf_eip = PS_STRINGS - szosigcode;
 	regs->tf_cs = _ucodesel;
 	regs->tf_ds = _udatasel;
 	regs->tf_es = _udatasel;
 	regs->tf_fs = _udatasel;
 	load_gs(_udatasel);
 	regs->tf_ss = _udatasel;
 }
 
 void
 sendsig(catcher, sig, mask, code)
 	sig_t catcher;
 	int sig;
 	sigset_t *mask;
 	u_long code;
 {
 	struct sigframe sf;
 	struct proc *p;
 	struct sigacts *psp;
 	struct trapframe *regs;
 	struct sigframe *sfp;
 	int oonstack;
 
 	p = curproc;
 	PROC_LOCK(p);
 	psp = p->p_sigacts;
 	if (SIGISMEMBER(psp->ps_osigset, sig)) {
 		PROC_UNLOCK(p);
 		osendsig(catcher, sig, mask, code);
 		return;
 	}
 	regs = p->p_md.md_regs;
 	oonstack = sigonstack(regs->tf_esp);
 
 	/* Save user context. */
 	bzero(&sf, sizeof(sf));
 	sf.sf_uc.uc_sigmask = *mask;
 	sf.sf_uc.uc_stack = p->p_sigstk;
 	sf.sf_uc.uc_stack.ss_flags = (p->p_flag & P_ALTSTACK)
 	    ? ((oonstack) ? SS_ONSTACK : 0) : SS_DISABLE;
 	sf.sf_uc.uc_mcontext.mc_onstack = (oonstack) ? 1 : 0;
 	sf.sf_uc.uc_mcontext.mc_gs = rgs();
 	bcopy(regs, &sf.sf_uc.uc_mcontext.mc_fs, sizeof(*regs));
 
 	/* Allocate and validate space for the signal handler context. */
 	if ((p->p_flag & P_ALTSTACK) != 0 && !oonstack &&
 	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
 		sfp = (struct sigframe *)(p->p_sigstk.ss_sp +
 		    p->p_sigstk.ss_size - sizeof(struct sigframe));
 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
 		p->p_sigstk.ss_flags |= SS_ONSTACK;
 #endif
 	} else
 		sfp = (struct sigframe *)regs->tf_esp - 1;
 	PROC_UNLOCK(p);
 
 	/*
 	 * grow_stack() will return 0 if *sfp does not fit inside the stack
 	 * and the stack can not be grown.
 	 * useracc() will return FALSE if access is denied.
 	 */
 	if (grow_stack(p, (int)sfp) == 0 ||
 	    !useracc((caddr_t)sfp, sizeof(*sfp), VM_PROT_WRITE)) {
 		/*
 		 * Process has trashed its stack; give it an illegal
 		 * instruction to halt it in its tracks.
 		 */
 #ifdef DEBUG
 		printf("process %d has trashed its stack\n", p->p_pid);
 #endif
 		PROC_LOCK(p);
 		SIGACTION(p, SIGILL) = SIG_DFL;
 		SIGDELSET(p->p_sigignore, SIGILL);
 		SIGDELSET(p->p_sigcatch, SIGILL);
 		SIGDELSET(p->p_sigmask, SIGILL);
 		PROC_UNLOCK(p);
 		psignal(p, SIGILL);
 		return;
 	}
 
 	/* Translate the signal if appropriate. */
 	if (p->p_sysent->sv_sigtbl && sig <= p->p_sysent->sv_sigsize)
 		sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
 
 	/* Build the argument list for the signal handler. */
 	sf.sf_signum = sig;
 	sf.sf_ucontext = (register_t)&sfp->sf_uc;
 	PROC_LOCK(p);
 	if (SIGISMEMBER(p->p_sigacts->ps_siginfo, sig)) {
 		/* Signal handler installed with SA_SIGINFO. */
 		sf.sf_siginfo = (register_t)&sfp->sf_si;
 		sf.sf_ahu.sf_action = (__siginfohandler_t *)catcher;
 
 		/* Fill siginfo structure. */
 		sf.sf_si.si_signo = sig;
 		sf.sf_si.si_code = code;
 		sf.sf_si.si_addr = (void *)regs->tf_err;
 	} else {
 		/* Old FreeBSD-style arguments. */
 		sf.sf_siginfo = code;
 		sf.sf_addr = regs->tf_err;
 		sf.sf_ahu.sf_handler = catcher;
 	}
 	PROC_UNLOCK(p);
 
 	/*
 	 * If we're a vm86 process, we want to save the segment registers.
 	 * We also change eflags to be our emulated eflags, not the actual
 	 * eflags.
 	 */
 	if (regs->tf_eflags & PSL_VM) {
 		struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs;
 		struct vm86_kernel *vm86 = &p->p_addr->u_pcb.pcb_ext->ext_vm86;
 
 		sf.sf_uc.uc_mcontext.mc_gs = tf->tf_vm86_gs;
 		sf.sf_uc.uc_mcontext.mc_fs = tf->tf_vm86_fs;
 		sf.sf_uc.uc_mcontext.mc_es = tf->tf_vm86_es;
 		sf.sf_uc.uc_mcontext.mc_ds = tf->tf_vm86_ds;
 
 		if (vm86->vm86_has_vme == 0)
 			sf.sf_uc.uc_mcontext.mc_eflags =
 			    (tf->tf_eflags & ~(PSL_VIF | PSL_VIP)) |
 			    (vm86->vm86_eflags & (PSL_VIF | PSL_VIP));
 
 		/*
 		 * We should never have PSL_T set when returning from vm86
 		 * mode.  It may be set here if we deliver a signal before
 		 * getting to vm86 mode, so turn it off.
 		 *
 		 * Clear PSL_NT to inhibit T_TSSFLT faults on return from
 		 * syscalls made by the signal handler.  This just avoids
 		 * wasting time for our lazy fixup of such faults.  PSL_NT
 		 * does nothing in vm86 mode, but vm86 programs can set it
 		 * almost legitimately in probes for old cpu types.
 		 */
 		tf->tf_eflags &= ~(PSL_VM | PSL_NT | PSL_T | PSL_VIF | PSL_VIP);
 	}
 
 	/* Copy the sigframe out to the user's stack. */
 	if (copyout(&sf, sfp, sizeof(*sfp)) != 0) {
 		/*
 		 * Something is wrong with the stack pointer.
 		 * ...Kill the process.
 		 */
 		sigexit(p, SIGILL);
 	}
 
 	regs->tf_esp = (int)sfp;
 	regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode);
 	regs->tf_cs = _ucodesel;
 	regs->tf_ds = _udatasel;
 	regs->tf_es = _udatasel;
 	regs->tf_fs = _udatasel;
 	load_gs(_udatasel);
 	regs->tf_ss = _udatasel;
 }
 
 /*
  * System call to cleanup state after a signal
  * has been taken.  Reset signal mask and
  * stack state from context left by sendsig (above).
  * Return to previous pc and psl as specified by
  * context left by sendsig. Check carefully to
  * make sure that the user has not modified the
  * state to gain improper privileges.
  */
 int
 osigreturn(p, uap)
 	struct proc *p;
 	struct osigreturn_args /* {
 		struct osigcontext *sigcntxp;
 	} */ *uap;
 {
 	struct trapframe *regs;
 	struct osigcontext *scp;
 	int eflags;
 
 	regs = p->p_md.md_regs;
 	scp = uap->sigcntxp;
 	if (!useracc((caddr_t)scp, sizeof(*scp), VM_PROT_READ))
 		return (EFAULT);
 	eflags = scp->sc_ps;
 	if (eflags & PSL_VM) {
 		struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs;
 		struct vm86_kernel *vm86;
 
 		/*
 		 * if pcb_ext == 0 or vm86_inited == 0, the user hasn't
 		 * set up the vm86 area, and we can't enter vm86 mode.
 		 */
 		if (p->p_addr->u_pcb.pcb_ext == 0)
 			return (EINVAL);
 		vm86 = &p->p_addr->u_pcb.pcb_ext->ext_vm86;
 		if (vm86->vm86_inited == 0)
 			return (EINVAL);
 
 		/* Go back to user mode if both flags are set. */
 		if ((eflags & PSL_VIP) && (eflags & PSL_VIF))
 			trapsignal(p, SIGBUS, 0);
 
 		if (vm86->vm86_has_vme) {
 			eflags = (tf->tf_eflags & ~VME_USERCHANGE) |
 			    (eflags & VME_USERCHANGE) | PSL_VM;
 		} else {
 			vm86->vm86_eflags = eflags;	/* save VIF, VIP */
 			eflags = (tf->tf_eflags & ~VM_USERCHANGE) |					    (eflags & VM_USERCHANGE) | PSL_VM;
 		}
 		tf->tf_vm86_ds = scp->sc_ds;
 		tf->tf_vm86_es = scp->sc_es;
 		tf->tf_vm86_fs = scp->sc_fs;
 		tf->tf_vm86_gs = scp->sc_gs;
 		tf->tf_ds = _udatasel;
 		tf->tf_es = _udatasel;
 		tf->tf_fs = _udatasel;
 	} else {
 		/*
 		 * Don't allow users to change privileged or reserved flags.
 		 */
 		/*
 		 * XXX do allow users to change the privileged flag PSL_RF.
 		 * The cpu sets PSL_RF in tf_eflags for faults.  Debuggers
 		 * should sometimes set it there too.  tf_eflags is kept in
 		 * the signal context during signal handling and there is no
 		 * other place to remember it, so the PSL_RF bit may be
 		 * corrupted by the signal handler without us knowing.
 		 * Corruption of the PSL_RF bit at worst causes one more or
 		 * one less debugger trap, so allowing it is fairly harmless.
 		 */
 		if (!EFL_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) {
 	    		return (EINVAL);
 		}
 
 		/*
 		 * Don't allow users to load a valid privileged %cs.  Let the
 		 * hardware check for invalid selectors, excess privilege in
 		 * other selectors, invalid %eip's and invalid %esp's.
 		 */
 		if (!CS_SECURE(scp->sc_cs)) {
 			trapsignal(p, SIGBUS, T_PROTFLT);
 			return (EINVAL);
 		}
 		regs->tf_ds = scp->sc_ds;
 		regs->tf_es = scp->sc_es;
 		regs->tf_fs = scp->sc_fs;
 	}
 
 	/* Restore remaining registers. */
 	regs->tf_eax = scp->sc_eax;
 	regs->tf_ebx = scp->sc_ebx;
 	regs->tf_ecx = scp->sc_ecx;
 	regs->tf_edx = scp->sc_edx;
 	regs->tf_esi = scp->sc_esi;
 	regs->tf_edi = scp->sc_edi;
 	regs->tf_cs = scp->sc_cs;
 	regs->tf_ss = scp->sc_ss;
 	regs->tf_isp = scp->sc_isp;
 
 	PROC_LOCK(p);
 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
 	if (scp->sc_onstack & 1)
 		p->p_sigstk.ss_flags |= SS_ONSTACK;
 	else
 		p->p_sigstk.ss_flags &= ~SS_ONSTACK;
 #endif
 
 	SIGSETOLD(p->p_sigmask, scp->sc_mask);
 	SIG_CANTMASK(p->p_sigmask);
 	PROC_UNLOCK(p);
 	regs->tf_ebp = scp->sc_fp;
 	regs->tf_esp = scp->sc_sp;
 	regs->tf_eip = scp->sc_pc;
 	regs->tf_eflags = eflags;
 	return (EJUSTRETURN);
 }
 
 int
 sigreturn(p, uap)
 	struct proc *p;
 	struct sigreturn_args /* {
 		ucontext_t *sigcntxp;
 	} */ *uap;
 {
 	struct trapframe *regs;
 	ucontext_t *ucp;
 	int cs, eflags;
 
 	ucp = uap->sigcntxp;
 	if (!useracc((caddr_t)ucp, sizeof(struct osigcontext), VM_PROT_READ))
 		return (EFAULT);
 	if (((struct osigcontext *)ucp)->sc_trapno == 0x01d516)
 		return (osigreturn(p, (struct osigreturn_args *)uap));
 
 	/*
 	 * Since ucp is not an osigcontext but a ucontext_t, we have to
 	 * check again if all of it is accessible.  A ucontext_t is
 	 * much larger, so instead of just checking for the pointer
 	 * being valid for the size of an osigcontext, now check for
 	 * it being valid for a whole, new-style ucontext_t.
 	 */
 	if (!useracc((caddr_t)ucp, sizeof(*ucp), VM_PROT_READ))
 		return (EFAULT);
 
 	regs = p->p_md.md_regs;
 	eflags = ucp->uc_mcontext.mc_eflags;
 	if (eflags & PSL_VM) {
 		struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs;
 		struct vm86_kernel *vm86;
 
 		/*
 		 * if pcb_ext == 0 or vm86_inited == 0, the user hasn't
 		 * set up the vm86 area, and we can't enter vm86 mode.
 		 */
 		if (p->p_addr->u_pcb.pcb_ext == 0)
 			return (EINVAL);
 		vm86 = &p->p_addr->u_pcb.pcb_ext->ext_vm86;
 		if (vm86->vm86_inited == 0)
 			return (EINVAL);
 
 		/* Go back to user mode if both flags are set. */
 		if ((eflags & PSL_VIP) && (eflags & PSL_VIF))
 			trapsignal(p, SIGBUS, 0);
 
 		if (vm86->vm86_has_vme) {
 			eflags = (tf->tf_eflags & ~VME_USERCHANGE) |
 			    (eflags & VME_USERCHANGE) | PSL_VM;
 		} else {
 			vm86->vm86_eflags = eflags;	/* save VIF, VIP */
 			eflags = (tf->tf_eflags & ~VM_USERCHANGE) |					    (eflags & VM_USERCHANGE) | PSL_VM;
 		}
 		bcopy(&ucp->uc_mcontext.mc_fs, tf, sizeof(struct trapframe));
 		tf->tf_eflags = eflags;
 		tf->tf_vm86_ds = tf->tf_ds;
 		tf->tf_vm86_es = tf->tf_es;
 		tf->tf_vm86_fs = tf->tf_fs;
 		tf->tf_vm86_gs = ucp->uc_mcontext.mc_gs;
 		tf->tf_ds = _udatasel;
 		tf->tf_es = _udatasel;
 		tf->tf_fs = _udatasel;
 	} else {
 		/*
 		 * Don't allow users to change privileged or reserved flags.
 		 */
 		/*
 		 * XXX do allow users to change the privileged flag PSL_RF.
 		 * The cpu sets PSL_RF in tf_eflags for faults.  Debuggers
 		 * should sometimes set it there too.  tf_eflags is kept in
 		 * the signal context during signal handling and there is no
 		 * other place to remember it, so the PSL_RF bit may be
 		 * corrupted by the signal handler without us knowing.
 		 * Corruption of the PSL_RF bit at worst causes one more or
 		 * one less debugger trap, so allowing it is fairly harmless.
 		 */
 		if (!EFL_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) {
 			printf("sigreturn: eflags = 0x%x\n", eflags);
 	    		return (EINVAL);
 		}
 
 		/*
 		 * Don't allow users to load a valid privileged %cs.  Let the
 		 * hardware check for invalid selectors, excess privilege in
 		 * other selectors, invalid %eip's and invalid %esp's.
 		 */
 		cs = ucp->uc_mcontext.mc_cs;
 		if (!CS_SECURE(cs)) {
 			printf("sigreturn: cs = 0x%x\n", cs);
 			trapsignal(p, SIGBUS, T_PROTFLT);
 			return (EINVAL);
 		}
 
 		bcopy(&ucp->uc_mcontext.mc_fs, regs, sizeof(*regs));
 	}
 
 	PROC_LOCK(p);
 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
 	if (ucp->uc_mcontext.mc_onstack & 1)
 		p->p_sigstk.ss_flags |= SS_ONSTACK;
 	else
 		p->p_sigstk.ss_flags &= ~SS_ONSTACK;
 #endif
 
 	p->p_sigmask = ucp->uc_sigmask;
 	SIG_CANTMASK(p->p_sigmask);
 	PROC_UNLOCK(p);
 	return (EJUSTRETURN);
 }
 
 /*
  * Machine dependent boot() routine
  *
  * I haven't seen anything to put here yet
  * Possibly some stuff might be grafted back here from boot()
  */
 void
 cpu_boot(int howto)
 {
 }
 
 /*
  * Shutdown the CPU as much as possible
  */
 void
 cpu_halt(void)
 {
 	for (;;)
 		__asm__ ("hlt");
 }
 
 /*
  * Hook to idle the CPU when possible.  This currently only works in
  * the !SMP case, as there is no clean way to ensure that a CPU will be
  * woken when there is work available for it.
  */
 static int	cpu_idle_hlt = 1;
 SYSCTL_INT(_machdep, OID_AUTO, cpu_idle_hlt, CTLFLAG_RW,
     &cpu_idle_hlt, 0, "Idle loop HLT enable");
 
 /*
  * Note that we have to be careful here to avoid a race between checking
  * procrunnable() and actually halting.  If we don't do this, we may waste
  * the time between calling hlt and the next interrupt even though there
  * is a runnable process.
  */
 void
 cpu_idle(void)
 {
 #ifndef SMP
 	if (cpu_idle_hlt) {
 		disable_intr();
   		if (procrunnable())
 			enable_intr();
 		else {
 			enable_intr();
 			__asm __volatile("hlt");
 		}
 	}
 #endif
 }
 
 /*
  * Clear registers on exec
  */
 void
 setregs(p, entry, stack, ps_strings)
 	struct proc *p;
 	u_long entry;
 	u_long stack;
 	u_long ps_strings;
 {
 	struct trapframe *regs = p->p_md.md_regs;
 	struct pcb *pcb = &p->p_addr->u_pcb;
 
 #ifdef USER_LDT
 	/* was i386_user_cleanup() in NetBSD */
 	user_ldt_free(pcb);
 #endif
   
 	bzero((char *)regs, sizeof(struct trapframe));
 	regs->tf_eip = entry;
 	regs->tf_esp = stack;
 	regs->tf_eflags = PSL_USER | (regs->tf_eflags & PSL_T);
 	regs->tf_ss = _udatasel;
 	regs->tf_ds = _udatasel;
 	regs->tf_es = _udatasel;
 	regs->tf_fs = _udatasel;
 	regs->tf_cs = _ucodesel;
 
 	/* PS_STRINGS value for BSD/OS binaries.  It is 0 for non-BSD/OS. */
 	regs->tf_ebx = ps_strings;
 
 	/* reset %gs as well */
 	if (pcb == PCPU_GET(curpcb))
 		load_gs(_udatasel);
 	else
 		pcb->pcb_gs = _udatasel;
 
         /*
          * Reset the hardware debug registers if they were in use.
          * They won't have any meaning for the newly exec'd process.  
          */
         if (pcb->pcb_flags & PCB_DBREGS) {
                 pcb->pcb_dr0 = 0;
                 pcb->pcb_dr1 = 0;
                 pcb->pcb_dr2 = 0;
                 pcb->pcb_dr3 = 0;
                 pcb->pcb_dr6 = 0;
                 pcb->pcb_dr7 = 0;
                 if (pcb == PCPU_GET(curpcb)) {
 		        /*
 			 * Clear the debug registers on the running
 			 * CPU, otherwise they will end up affecting
 			 * the next process we switch to.
 			 */
 		        reset_dbregs();
                 }
                 pcb->pcb_flags &= ~PCB_DBREGS;
         }
 
 	/*
 	 * Initialize the math emulator (if any) for the current process.
 	 * Actually, just clear the bit that says that the emulator has
 	 * been initialized.  Initialization is delayed until the process
 	 * traps to the emulator (if it is done at all) mainly because
 	 * emulators don't provide an entry point for initialization.
 	 */
 	p->p_addr->u_pcb.pcb_flags &= ~FP_SOFTFP;
 
 	/*
 	 * Arrange to trap the next npx or `fwait' instruction (see npx.c
 	 * for why fwait must be trapped at least if there is an npx or an
 	 * emulator).  This is mainly to handle the case where npx0 is not
 	 * configured, since the npx routines normally set up the trap
 	 * otherwise.  It should be done only at boot time, but doing it
 	 * here allows modifying `npx_exists' for testing the emulator on
 	 * systems with an npx.
 	 */
 	load_cr0(rcr0() | CR0_MP | CR0_TS);
 
 #ifdef DEV_NPX
 	/* Initialize the npx (if any) for the current process. */
 	npxinit(__INITIAL_NPXCW__);
 #endif
 
       /*
        * XXX - Linux emulator
        * Make sure sure edx is 0x0 on entry. Linux binaries depend
        * on it.
        */
       p->p_retval[1] = 0;
 }
 
 void
 cpu_setregs(void)
 {
 	unsigned int cr0;
 
 	cr0 = rcr0();
 	cr0 |= CR0_NE;			/* Done by npxinit() */
 	cr0 |= CR0_MP | CR0_TS;		/* Done at every execve() too. */
 #ifndef I386_CPU
 	cr0 |= CR0_WP | CR0_AM;
 #endif
 	load_cr0(cr0);
 	load_gs(_udatasel);
 }
 
 static int
 sysctl_machdep_adjkerntz(SYSCTL_HANDLER_ARGS)
 {
 	int error;
 	error = sysctl_handle_int(oidp, oidp->oid_arg1, oidp->oid_arg2,
 		req);
 	if (!error && req->newptr)
 		resettodr();
 	return (error);
 }
 
 SYSCTL_PROC(_machdep, CPU_ADJKERNTZ, adjkerntz, CTLTYPE_INT|CTLFLAG_RW,
 	&adjkerntz, 0, sysctl_machdep_adjkerntz, "I", "");
 
 SYSCTL_INT(_machdep, CPU_DISRTCSET, disable_rtc_set,
 	CTLFLAG_RW, &disable_rtc_set, 0, "");
 
 SYSCTL_STRUCT(_machdep, CPU_BOOTINFO, bootinfo, 
 	CTLFLAG_RD, &bootinfo, bootinfo, "");
 
 SYSCTL_INT(_machdep, CPU_WALLCLOCK, wall_cmos_clock,
 	CTLFLAG_RW, &wall_cmos_clock, 0, "");
 
 /*
  * Initialize 386 and configure to run kernel
  */
 
 /*
  * Initialize segments & interrupt table
  */
 
 int _default_ldt;
 union descriptor gdt[NGDT * MAXCPU];	/* global descriptor table */
 static struct gate_descriptor idt0[NIDT];
 struct gate_descriptor *idt = &idt0[0];	/* interrupt descriptor table */
 union descriptor ldt[NLDT];		/* local descriptor table */
 #ifdef SMP
 /* table descriptors - used to load tables by microp */
 struct region_descriptor r_gdt, r_idt;
 #endif
 
 int private_tss;			/* flag indicating private tss */
 
 #if defined(I586_CPU) && !defined(NO_F00F_HACK)
 extern int has_f00f_bug;
 #endif
 
 static struct i386tss dblfault_tss;
 static char dblfault_stack[PAGE_SIZE];
 
 extern  struct user *proc0paddr;
 
 
 /* software prototypes -- in more palatable form */
 struct soft_segment_descriptor gdt_segs[] = {
 /* GNULL_SEL	0 Null Descriptor */
 {	0x0,			/* segment base address  */
 	0x0,			/* length */
 	0,			/* segment type */
 	0,			/* segment descriptor priority level */
 	0,			/* segment descriptor present */
 	0, 0,
 	0,			/* default 32 vs 16 bit size */
 	0  			/* limit granularity (byte/page units)*/ },
 /* GCODE_SEL	1 Code Descriptor for kernel */
 {	0x0,			/* segment base address  */
 	0xfffff,		/* length - all address space */
 	SDT_MEMERA,		/* segment type */
 	0,			/* segment descriptor priority level */
 	1,			/* segment descriptor present */
 	0, 0,
 	1,			/* default 32 vs 16 bit size */
 	1  			/* limit granularity (byte/page units)*/ },
 /* GDATA_SEL	2 Data Descriptor for kernel */
 {	0x0,			/* segment base address  */
 	0xfffff,		/* length - all address space */
 	SDT_MEMRWA,		/* segment type */
 	0,			/* segment descriptor priority level */
 	1,			/* segment descriptor present */
 	0, 0,
 	1,			/* default 32 vs 16 bit size */
 	1  			/* limit granularity (byte/page units)*/ },
 /* GPRIV_SEL	3 SMP Per-Processor Private Data Descriptor */
 {	0x0,			/* segment base address  */
 	0xfffff,		/* length - all address space */
 	SDT_MEMRWA,		/* segment type */
 	0,			/* segment descriptor priority level */
 	1,			/* segment descriptor present */
 	0, 0,
 	1,			/* default 32 vs 16 bit size */
 	1  			/* limit granularity (byte/page units)*/ },
 /* GPROC0_SEL	4 Proc 0 Tss Descriptor */
 {
 	0x0,			/* segment base address */
 	sizeof(struct i386tss)-1,/* length - all address space */
 	SDT_SYS386TSS,		/* segment type */
 	0,			/* segment descriptor priority level */
 	1,			/* segment descriptor present */
 	0, 0,
 	0,			/* unused - default 32 vs 16 bit size */
 	0  			/* limit granularity (byte/page units)*/ },
 /* GLDT_SEL	5 LDT Descriptor */
 {	(int) ldt,		/* segment base address  */
 	sizeof(ldt)-1,		/* length - all address space */
 	SDT_SYSLDT,		/* segment type */
 	SEL_UPL,		/* segment descriptor priority level */
 	1,			/* segment descriptor present */
 	0, 0,
 	0,			/* unused - default 32 vs 16 bit size */
 	0  			/* limit granularity (byte/page units)*/ },
 /* GUSERLDT_SEL	6 User LDT Descriptor per process */
 {	(int) ldt,		/* segment base address  */
 	(512 * sizeof(union descriptor)-1),		/* length */
 	SDT_SYSLDT,		/* segment type */
 	0,			/* segment descriptor priority level */
 	1,			/* segment descriptor present */
 	0, 0,
 	0,			/* unused - default 32 vs 16 bit size */
 	0  			/* limit granularity (byte/page units)*/ },
 /* GTGATE_SEL	7 Null Descriptor - Placeholder */
 {	0x0,			/* segment base address  */
 	0x0,			/* length - all address space */
 	0,			/* segment type */
 	0,			/* segment descriptor priority level */
 	0,			/* segment descriptor present */
 	0, 0,
 	0,			/* default 32 vs 16 bit size */
 	0  			/* limit granularity (byte/page units)*/ },
 /* GBIOSLOWMEM_SEL 8 BIOS access to realmode segment 0x40, must be #8 in GDT */
 {	0x400,			/* segment base address */
 	0xfffff,		/* length */
 	SDT_MEMRWA,		/* segment type */
 	0,			/* segment descriptor priority level */
 	1,			/* segment descriptor present */
 	0, 0,
 	1,			/* default 32 vs 16 bit size */
 	1  			/* limit granularity (byte/page units)*/ },
 /* GPANIC_SEL	9 Panic Tss Descriptor */
 {	(int) &dblfault_tss,	/* segment base address  */
 	sizeof(struct i386tss)-1,/* length - all address space */
 	SDT_SYS386TSS,		/* segment type */
 	0,			/* segment descriptor priority level */
 	1,			/* segment descriptor present */
 	0, 0,
 	0,			/* unused - default 32 vs 16 bit size */
 	0  			/* limit granularity (byte/page units)*/ },
 /* GBIOSCODE32_SEL 10 BIOS 32-bit interface (32bit Code) */
 {	0,			/* segment base address (overwritten)  */
 	0xfffff,		/* length */
 	SDT_MEMERA,		/* segment type */
 	0,			/* segment descriptor priority level */
 	1,			/* segment descriptor present */
 	0, 0,
 	0,			/* default 32 vs 16 bit size */
 	1  			/* limit granularity (byte/page units)*/ },
 /* GBIOSCODE16_SEL 11 BIOS 32-bit interface (16bit Code) */
 {	0,			/* segment base address (overwritten)  */
 	0xfffff,		/* length */
 	SDT_MEMERA,		/* segment type */
 	0,			/* segment descriptor priority level */
 	1,			/* segment descriptor present */
 	0, 0,
 	0,			/* default 32 vs 16 bit size */
 	1  			/* limit granularity (byte/page units)*/ },
 /* GBIOSDATA_SEL 12 BIOS 32-bit interface (Data) */
 {	0,			/* segment base address (overwritten) */
 	0xfffff,		/* length */
 	SDT_MEMRWA,		/* segment type */
 	0,			/* segment descriptor priority level */
 	1,			/* segment descriptor present */
 	0, 0,
 	1,			/* default 32 vs 16 bit size */
 	1  			/* limit granularity (byte/page units)*/ },
 /* GBIOSUTIL_SEL 13 BIOS 16-bit interface (Utility) */
 {	0,			/* segment base address (overwritten) */
 	0xfffff,		/* length */
 	SDT_MEMRWA,		/* segment type */
 	0,			/* segment descriptor priority level */
 	1,			/* segment descriptor present */
 	0, 0,
 	0,			/* default 32 vs 16 bit size */
 	1  			/* limit granularity (byte/page units)*/ },
 /* GBIOSARGS_SEL 14 BIOS 16-bit interface (Arguments) */
 {	0,			/* segment base address (overwritten) */
 	0xfffff,		/* length */
 	SDT_MEMRWA,		/* segment type */
 	0,			/* segment descriptor priority level */
 	1,			/* segment descriptor present */
 	0, 0,
 	0,			/* default 32 vs 16 bit size */
 	1  			/* limit granularity (byte/page units)*/ },
 };
 
 static struct soft_segment_descriptor ldt_segs[] = {
 	/* Null Descriptor - overwritten by call gate */
 {	0x0,			/* segment base address  */
 	0x0,			/* length - all address space */
 	0,			/* segment type */
 	0,			/* segment descriptor priority level */
 	0,			/* segment descriptor present */
 	0, 0,
 	0,			/* default 32 vs 16 bit size */
 	0  			/* limit granularity (byte/page units)*/ },
 	/* Null Descriptor - overwritten by call gate */
 {	0x0,			/* segment base address  */
 	0x0,			/* length - all address space */
 	0,			/* segment type */
 	0,			/* segment descriptor priority level */
 	0,			/* segment descriptor present */
 	0, 0,
 	0,			/* default 32 vs 16 bit size */
 	0  			/* limit granularity (byte/page units)*/ },
 	/* Null Descriptor - overwritten by call gate */
 {	0x0,			/* segment base address  */
 	0x0,			/* length - all address space */
 	0,			/* segment type */
 	0,			/* segment descriptor priority level */
 	0,			/* segment descriptor present */
 	0, 0,
 	0,			/* default 32 vs 16 bit size */
 	0  			/* limit granularity (byte/page units)*/ },
 	/* Code Descriptor for user */
 {	0x0,			/* segment base address  */
 	0xfffff,		/* length - all address space */
 	SDT_MEMERA,		/* segment type */
 	SEL_UPL,		/* segment descriptor priority level */
 	1,			/* segment descriptor present */
 	0, 0,
 	1,			/* default 32 vs 16 bit size */
 	1  			/* limit granularity (byte/page units)*/ },
 	/* Null Descriptor - overwritten by call gate */
 {	0x0,			/* segment base address  */
 	0x0,			/* length - all address space */
 	0,			/* segment type */
 	0,			/* segment descriptor priority level */
 	0,			/* segment descriptor present */
 	0, 0,
 	0,			/* default 32 vs 16 bit size */
 	0  			/* limit granularity (byte/page units)*/ },
 	/* Data Descriptor for user */
 {	0x0,			/* segment base address  */
 	0xfffff,		/* length - all address space */
 	SDT_MEMRWA,		/* segment type */
 	SEL_UPL,		/* segment descriptor priority level */
 	1,			/* segment descriptor present */
 	0, 0,
 	1,			/* default 32 vs 16 bit size */
 	1  			/* limit granularity (byte/page units)*/ },
 };
 
 void
 setidt(idx, func, typ, dpl, selec)
 	int idx;
 	inthand_t *func;
 	int typ;
 	int dpl;
 	int selec;
 {
 	struct gate_descriptor *ip;
 
 	ip = idt + idx;
 	ip->gd_looffset = (int)func;
 	ip->gd_selector = selec;
 	ip->gd_stkcpy = 0;
 	ip->gd_xx = 0;
 	ip->gd_type = typ;
 	ip->gd_dpl = dpl;
 	ip->gd_p = 1;
 	ip->gd_hioffset = ((int)func)>>16 ;
 }
 
 #define	IDTVEC(name)	__CONCAT(X,name)
 
 extern inthand_t
 	IDTVEC(div), IDTVEC(dbg), IDTVEC(nmi), IDTVEC(bpt), IDTVEC(ofl),
 	IDTVEC(bnd), IDTVEC(ill), IDTVEC(dna), IDTVEC(fpusegm),
 	IDTVEC(tss), IDTVEC(missing), IDTVEC(stk), IDTVEC(prot),
 	IDTVEC(page), IDTVEC(mchk), IDTVEC(rsvd), IDTVEC(fpu), IDTVEC(align),
 	IDTVEC(syscall), IDTVEC(int0x80_syscall);
 
 void
 sdtossd(sd, ssd)
 	struct segment_descriptor *sd;
 	struct soft_segment_descriptor *ssd;
 {
 	ssd->ssd_base  = (sd->sd_hibase << 24) | sd->sd_lobase;
 	ssd->ssd_limit = (sd->sd_hilimit << 16) | sd->sd_lolimit;
 	ssd->ssd_type  = sd->sd_type;
 	ssd->ssd_dpl   = sd->sd_dpl;
 	ssd->ssd_p     = sd->sd_p;
 	ssd->ssd_def32 = sd->sd_def32;
 	ssd->ssd_gran  = sd->sd_gran;
 }
 
 #define PHYSMAP_SIZE	(2 * 8)
 
 /*
  * Populate the (physmap) array with base/bound pairs describing the
  * available physical memory in the system, then test this memory and
  * build the phys_avail array describing the actually-available memory.
  *
  * If we cannot accurately determine the physical memory map, then use
  * value from the 0xE801 call, and failing that, the RTC.
  *
  * Total memory size may be set by the kernel environment variable
  * hw.physmem or the compile-time define MAXMEM.
  */
 #ifdef PC98
 static void
 getmemsize(int first)
 {
 	u_int	biosbasemem, biosextmem;
 	u_int	pagesinbase, pagesinext;
 	int	pa_indx;
 	int	pg_n;
 	int	speculative_mprobe;
 #ifdef DEV_NPX
 	int	msize;
 #endif
 	unsigned	under16;
 	vm_offset_t	target_page;
 
 	pc98_getmemsize(&biosbasemem, &biosextmem, &under16);
 
 #ifdef SMP
 	/* make hole for AP bootstrap code */
 	pagesinbase = mp_bootaddress(biosbasemem) / PAGE_SIZE;
 #else
 	pagesinbase = biosbasemem * 1024 / PAGE_SIZE;
 #endif
 	pagesinext = biosextmem * 1024 / PAGE_SIZE;
 
  	Maxmem_under16M = under16 * 1024 / PAGE_SIZE;
 
 #ifndef MAXMEM
 	/*
 	 * Maxmem isn't the "maximum memory", it's one larger than the
 	 * highest page of the physical address space.  It should be
 	 * called something like "Maxphyspage".
 	 */
 	Maxmem = pagesinext + 0x100000/PAGE_SIZE;
 	/*
 	 * Indicate that we wish to do a speculative search for memory beyond
 	 * the end of the reported size if the indicated amount is 64MB (0x4000
 	 * pages) - which is the largest amount that the BIOS/bootblocks can
 	 * currently report. If a specific amount of memory is indicated via
 	 * the MAXMEM option or the npx0 "msize", then don't do the speculative
 	 * memory probe.
 	 */
 	if (Maxmem >= 0x4000)
 		speculative_mprobe = TRUE;
 	else
 		speculative_mprobe = FALSE;
 #else
 	Maxmem = MAXMEM/4;
 	speculative_mprobe = FALSE;
 #endif
 
 #ifdef DEV_NPX
 	if (resource_int_value("npx", 0, "msize", &msize) == 0) {
 		if (msize != 0) {
 			Maxmem = msize / 4;
 			speculative_mprobe = FALSE;
 		}
 	}
 #endif
 
 #ifdef SMP
 	/* look for the MP hardware - needed for apic addresses */
 	mp_probe();
 #endif
 
 	/* call pmap initialization to make new kernel address space */
 	pmap_bootstrap (first, 0);
 
 	/*
 	 * Size up each available chunk of physical memory.
 	 */
 
 	/*
 	 * We currently don't bother testing base memory.
 	 * XXX  ...but we probably should.
 	 */
 	pa_indx = 0;
 	if (pagesinbase > 1) {
 		phys_avail[pa_indx++] = PAGE_SIZE;	/* skip first page of memory */
 		phys_avail[pa_indx] = ptoa(pagesinbase);/* memory up to the ISA hole */
 		physmem = pagesinbase - 1;
 	} else {
 		/* point at first chunk end */
 		pa_indx++;
 	}
 
 	/* XXX - some of EPSON machines can't use PG_N */
 	pg_n = PG_N;
 	if (pc98_machine_type & M_EPSON_PC98) {
 		switch (epson_machine_id) {
 #ifdef WB_CACHE
 		default:
 #endif
 		case 0x34:		/* PC-486HX */
 		case 0x35:		/* PC-486HG */
 		case 0x3B:		/* PC-486HA */
 			pg_n = 0;
 			break;
 		}
 	}
 
 	speculative_mprobe = FALSE;
 #ifdef notdef	/* XXX - see below */
 	/*
 	 * Certain 'CPU accelerator' supports over 16MB memory on the machines
 	 * whose BIOS doesn't store true size.  
 	 * To support this, we don't trust BIOS values if Maxmem <= 16MB (0x1000
 	 * pages) - which is the largest amount that the OLD PC-98 can report.
 	 *
 	 * OK: PC-9801NS/R(9.6M)
 	 * OK: PC-9801DA(5.6M)+EUD-H(32M)+Cyrix 5x86
 	 * OK: PC-9821Ap(14.6M)+EUA-T(8M)+Cyrix 5x86-100
 	 * NG: PC-9821Ap(14.6M)+EUA-T(8M)+AMD DX4-100 -> freeze
 	 */
 	if (Maxmem <= 0x1000) {
 		int tmp, page_bad;
 
 		page_bad = FALSE;
 
 		/*
 		 * For Max14.6MB machines, the 0x10f0 page is same as 0x00f0,
 		 * which is BIOS ROM, by overlapping.
 		 * So, we check that page's ability of writing.
 		 */
 		target_page = ptoa(0x10f0);
 
 		/*
 		 * map page into kernel: valid, read/write, non-cacheable
 		 */
 		*(int *)CMAP1 = PG_V | PG_RW | pg_n | target_page;
 		invltlb();
 
 		tmp = *(int *)CADDR1;
 		/*
 		 * Test for alternating 1's and 0's
 		 */
 		*(volatile int *)CADDR1 = 0xaaaaaaaa;
 		if (*(volatile int *)CADDR1 != 0xaaaaaaaa)
 			page_bad = TRUE;
 		/*
 		 * Test for alternating 0's and 1's
 		 */
 		*(volatile int *)CADDR1 = 0x55555555;
 		if (*(volatile int *)CADDR1 != 0x55555555)
 			page_bad = TRUE;
 		/*
 		 * Test for all 1's
 		 */
 		*(volatile int *)CADDR1 = 0xffffffff;
 		if (*(volatile int *)CADDR1 != 0xffffffff)
 			page_bad = TRUE;
 		/*
 		 * Test for all 0's
 		 */
 		*(volatile int *)CADDR1 = 0x0;
 		if (*(volatile int *)CADDR1 != 0x0) {
 			/*
 			 * test of page failed
 			 */
 			page_bad = TRUE;
 		}
 		/*
 		 * Restore original value.
 		 */
 		*(int *)CADDR1 = tmp;
 
 		/*
 		 * Adjust Maxmem if valid/good page.
 		 */
 		if (page_bad == FALSE) {
 			/* '+ 2' is needed to make speculative_mprobe sure */
 			Maxmem = 0x1000 + 2;
 			speculative_mprobe = TRUE;
 		}
 	}
 #endif
 
 	for (target_page = avail_start; target_page < ptoa(Maxmem); target_page += PAGE_SIZE) {
 		int tmp, page_bad;
 
 		page_bad = FALSE;
 
 		/* skip system area */
 		if (target_page >= ptoa(Maxmem_under16M) &&
 				target_page < ptoa(4096))
 			continue;
 
 		/*
 		 * map page into kernel: valid, read/write, non-cacheable
 		 */
 		*(int *)CMAP1 = PG_V | PG_RW | pg_n | target_page;
 		invltlb();
 
 		tmp = *(int *)CADDR1;
 		/*
 		 * Test for alternating 1's and 0's
 		 */
 		*(volatile int *)CADDR1 = 0xaaaaaaaa;
 		if (*(volatile int *)CADDR1 != 0xaaaaaaaa) {
 			page_bad = TRUE;
 		}
 		/*
 		 * Test for alternating 0's and 1's
 		 */
 		*(volatile int *)CADDR1 = 0x55555555;
 		if (*(volatile int *)CADDR1 != 0x55555555) {
 			page_bad = TRUE;
 		}
 		/*
 		 * Test for all 1's
 		 */
 		*(volatile int *)CADDR1 = 0xffffffff;
 		if (*(volatile int *)CADDR1 != 0xffffffff) {
 			page_bad = TRUE;
 		}
 		/*
 		 * Test for all 0's
 		 */
 		*(volatile int *)CADDR1 = 0x0;
 		if (*(volatile int *)CADDR1 != 0x0) {
 			/*
 			 * test of page failed
 			 */
 			page_bad = TRUE;
 		}
 		/*
 		 * Restore original value.
 		 */
 		*(int *)CADDR1 = tmp;
 
 		/*
 		 * Adjust array of valid/good pages.
 		 */
 		if (page_bad == FALSE) {
 			/*
 			 * If this good page is a continuation of the
 			 * previous set of good pages, then just increase
 			 * the end pointer. Otherwise start a new chunk.
 			 * Note that "end" points one higher than end,
 			 * making the range >= start and < end.
 			 * If we're also doing a speculative memory
 			 * test and we at or past the end, bump up Maxmem
 			 * so that we keep going. The first bad page
 			 * will terminate the loop.
 			 */
 			if (phys_avail[pa_indx] == target_page) {
 				phys_avail[pa_indx] += PAGE_SIZE;
 				if (speculative_mprobe == TRUE &&
 				    phys_avail[pa_indx] >= (16*1024*1024))
 					Maxmem++;
 			} else {
 				pa_indx++;
 				if (pa_indx == PHYS_AVAIL_ARRAY_END) {
 					printf("Too many holes in the physical address space, giving up\n");
 					pa_indx--;
 					break;
 				}
 				phys_avail[pa_indx++] = target_page;	/* start */
 				phys_avail[pa_indx] = target_page + PAGE_SIZE;	/* end */
 			}
 			physmem++;
 		}
 	}
 
 	*(int *)CMAP1 = 0;
 	invltlb();
 
 	/*
 	 * XXX
 	 * The last chunk must contain at least one page plus the message
 	 * buffer to avoid complicating other code (message buffer address
 	 * calculation, etc.).
 	 */
 	while (phys_avail[pa_indx - 1] + PAGE_SIZE +
 	    round_page(MSGBUF_SIZE) >= phys_avail[pa_indx]) {
 		physmem -= atop(phys_avail[pa_indx] - phys_avail[pa_indx - 1]);
 		phys_avail[pa_indx--] = 0;
 		phys_avail[pa_indx--] = 0;
 	}
 
 	Maxmem = atop(phys_avail[pa_indx]);
 
 	/* Trim off space for the message buffer. */
 	phys_avail[pa_indx] -= round_page(MSGBUF_SIZE);
 
 	avail_end = phys_avail[pa_indx];
 }
 #else
 static void
 getmemsize(int first)
 {
 	int i, physmap_idx, pa_indx;
 	u_int basemem, extmem;
 	struct vm86frame vmf;
 	struct vm86context vmc;
 	vm_offset_t pa, physmap[PHYSMAP_SIZE];
 	pt_entry_t pte;
 	const char *cp;
 	struct bios_smap *smap;
 
 	bzero(&vmf, sizeof(struct vm86frame));
 	bzero(physmap, sizeof(physmap));
 
 	/*
 	 * Perform "base memory" related probes & setup
 	 */
 	vm86_intcall(0x12, &vmf);
 	basemem = vmf.vmf_ax;
 	if (basemem > 640) {
 		printf("Preposterous BIOS basemem of %uK, truncating to 640K\n",
 			basemem);
 		basemem = 640;
 	}
 
 	/*
 	 * XXX if biosbasemem is now < 640, there is a `hole'
 	 * between the end of base memory and the start of
 	 * ISA memory.  The hole may be empty or it may
 	 * contain BIOS code or data.  Map it read/write so
 	 * that the BIOS can write to it.  (Memory from 0 to
 	 * the physical end of the kernel is mapped read-only
 	 * to begin with and then parts of it are remapped.
 	 * The parts that aren't remapped form holes that
 	 * remain read-only and are unused by the kernel.
 	 * The base memory area is below the physical end of
 	 * the kernel and right now forms a read-only hole.
 	 * The part of it from PAGE_SIZE to
 	 * (trunc_page(biosbasemem * 1024) - 1) will be
 	 * remapped and used by the kernel later.)
 	 *
 	 * This code is similar to the code used in
 	 * pmap_mapdev, but since no memory needs to be
 	 * allocated we simply change the mapping.
 	 */
 	for (pa = trunc_page(basemem * 1024);
 	     pa < ISA_HOLE_START; pa += PAGE_SIZE) {
 		pte = (pt_entry_t)vtopte(pa + KERNBASE);
 		*pte = pa | PG_RW | PG_V;
 	}
 
 	/*
 	 * if basemem != 640, map pages r/w into vm86 page table so 
 	 * that the bios can scribble on it.
 	 */
 	pte = (pt_entry_t)vm86paddr;
 	for (i = basemem / 4; i < 160; i++)
 		pte[i] = (i << PAGE_SHIFT) | PG_V | PG_RW | PG_U;
 
 	/*
 	 * map page 1 R/W into the kernel page table so we can use it
 	 * as a buffer.  The kernel will unmap this page later.
 	 */
 	pte = (pt_entry_t)vtopte(KERNBASE + (1 << PAGE_SHIFT));
 	*pte = (1 << PAGE_SHIFT) | PG_RW | PG_V;
 
 	/*
 	 * get memory map with INT 15:E820
 	 */
 	vmc.npages = 0;
 	smap = (void *)vm86_addpage(&vmc, 1, KERNBASE + (1 << PAGE_SHIFT));
 	vm86_getptr(&vmc, (vm_offset_t)smap, &vmf.vmf_es, &vmf.vmf_di);
 
 	physmap_idx = 0;
 	vmf.vmf_ebx = 0;
 	do {
 		vmf.vmf_eax = 0xE820;
 		vmf.vmf_edx = SMAP_SIG;
 		vmf.vmf_ecx = sizeof(struct bios_smap);
 		i = vm86_datacall(0x15, &vmf, &vmc);
 		if (i || vmf.vmf_eax != SMAP_SIG)
 			break;
 		if (boothowto & RB_VERBOSE)
 			printf("SMAP type=%02x base=%08x %08x len=%08x %08x\n",
 				smap->type,
 				*(u_int32_t *)((char *)&smap->base + 4),
 				(u_int32_t)smap->base,
 				*(u_int32_t *)((char *)&smap->length + 4),
 				(u_int32_t)smap->length);
 
 		if (smap->type != 0x01)
 			goto next_run;
 
 		if (smap->length == 0)
 			goto next_run;
 
 		if (smap->base >= 0xffffffff) {
 			printf("%uK of memory above 4GB ignored\n",
 			    (u_int)(smap->length / 1024));
 			goto next_run;
 		}
 
 		for (i = 0; i <= physmap_idx; i += 2) {
 			if (smap->base < physmap[i + 1]) {
 				if (boothowto & RB_VERBOSE)
 					printf(
 	"Overlapping or non-montonic memory region, ignoring second region\n");
 				goto next_run;
 			}
 		}
 
 		if (smap->base == physmap[physmap_idx + 1]) {
 			physmap[physmap_idx + 1] += smap->length;
 			goto next_run;
 		}
 
 		physmap_idx += 2;
 		if (physmap_idx == PHYSMAP_SIZE) {
 			printf(
 		"Too many segments in the physical address map, giving up\n");
 			break;
 		}
 		physmap[physmap_idx] = smap->base;
 		physmap[physmap_idx + 1] = smap->base + smap->length;
 next_run:
 	} while (vmf.vmf_ebx != 0);
 
 	if (physmap[1] != 0)
 		goto physmap_done;
 
 	/*
 	 * If we failed above, try memory map with INT 15:E801
 	 */
 	vmf.vmf_ax = 0xE801;
 	if (vm86_intcall(0x15, &vmf) == 0) {
 		extmem = vmf.vmf_cx + vmf.vmf_dx * 64;
 	} else {
 #if 0
 		vmf.vmf_ah = 0x88;
 		vm86_intcall(0x15, &vmf);
 		extmem = vmf.vmf_ax;
 #else
 		/*
 		 * Prefer the RTC value for extended memory.
 		 */
 		extmem = rtcin(RTC_EXTLO) + (rtcin(RTC_EXTHI) << 8);
 #endif
 	}
 
 	/*
 	 * Special hack for chipsets that still remap the 384k hole when
 	 * there's 16MB of memory - this really confuses people that
 	 * are trying to use bus mastering ISA controllers with the
 	 * "16MB limit"; they only have 16MB, but the remapping puts
 	 * them beyond the limit.
 	 *
 	 * If extended memory is between 15-16MB (16-17MB phys address range),
 	 *	chop it to 15MB.
 	 */
 	if ((extmem > 15 * 1024) && (extmem < 16 * 1024))
 		extmem = 15 * 1024;
 
 	physmap[0] = 0;
 	physmap[1] = basemem * 1024;
 	physmap_idx = 2;
 	physmap[physmap_idx] = 0x100000;
 	physmap[physmap_idx + 1] = physmap[physmap_idx] + extmem * 1024;
 
 physmap_done:
 	/*
 	 * Now, physmap contains a map of physical memory.
 	 */
 
 #ifdef SMP
 	/* make hole for AP bootstrap code */
 	physmap[1] = mp_bootaddress(physmap[1] / 1024);
 
 	/* look for the MP hardware - needed for apic addresses */
 	mp_probe();
 #endif
 
 	/*
 	 * Maxmem isn't the "maximum memory", it's one larger than the
 	 * highest page of the physical address space.  It should be
 	 * called something like "Maxphyspage".  We may adjust this 
 	 * based on ``hw.physmem'' and the results of the memory test.
 	 */
 	Maxmem = atop(physmap[physmap_idx + 1]);
 
 #ifdef MAXMEM
 	Maxmem = MAXMEM / 4;
 #endif
 
 	/*
 	 * hw.maxmem is a size in bytes; we also allow k, m, and g suffixes
 	 * for the appropriate modifiers.  This overrides MAXMEM.
 	 */
 	if ((cp = getenv("hw.physmem")) != NULL) {
 		u_int64_t AllowMem, sanity;
 		char *ep;
 
 		sanity = AllowMem = strtouq(cp, &ep, 0);
 		if ((ep != cp) && (*ep != 0)) {
 			switch(*ep) {
 			case 'g':
 			case 'G':
 				AllowMem <<= 10;
 			case 'm':
 			case 'M':
 				AllowMem <<= 10;
 			case 'k':
 			case 'K':
 				AllowMem <<= 10;
 				break;
 			default:
 				AllowMem = sanity = 0;
 			}
 			if (AllowMem < sanity)
 				AllowMem = 0;
 		}
 		if (AllowMem == 0)
 			printf("Ignoring invalid memory size of '%s'\n", cp);
 		else
 			Maxmem = atop(AllowMem);
 	}
 
 	if (atop(physmap[physmap_idx + 1]) != Maxmem &&
 	    (boothowto & RB_VERBOSE))
 		printf("Physical memory use set to %uK\n", Maxmem * 4);
 
 	/*
 	 * If Maxmem has been increased beyond what the system has detected,
 	 * extend the last memory segment to the new limit.
 	 */ 
 	if (atop(physmap[physmap_idx + 1]) < Maxmem)
 		physmap[physmap_idx + 1] = ptoa(Maxmem);
 
 	/* call pmap initialization to make new kernel address space */
 	pmap_bootstrap(first, 0);
 
 	/*
 	 * Size up each available chunk of physical memory.
 	 */
 	physmap[0] = PAGE_SIZE;		/* mask off page 0 */
 	pa_indx = 0;
 	phys_avail[pa_indx++] = physmap[0];
 	phys_avail[pa_indx] = physmap[0];
 #if 0
 	pte = (pt_entry_t)vtopte(KERNBASE);
 #else
 	pte = (pt_entry_t)CMAP1;
 #endif
 
 	/*
 	 * physmap is in bytes, so when converting to page boundaries,
 	 * round up the start address and round down the end address.
 	 */
 	for (i = 0; i <= physmap_idx; i += 2) {
 		vm_offset_t end;
 
 		end = ptoa(Maxmem);
 		if (physmap[i + 1] < end)
 			end = trunc_page(physmap[i + 1]);
 		for (pa = round_page(physmap[i]); pa < end; pa += PAGE_SIZE) {
 			int tmp, page_bad;
 #if 0
 			int *ptr = 0;
 #else
 			int *ptr = (int *)CADDR1;
 #endif
 
 			/*
 			 * block out kernel memory as not available.
 			 */
 			if (pa >= 0x100000 && pa < first)
 				continue;
 	
 			page_bad = FALSE;
 
 			/*
 			 * map page into kernel: valid, read/write,non-cacheable
 			 */
 			*pte = pa | PG_V | PG_RW | PG_N;
 			invltlb();
 
 			tmp = *(int *)ptr;
 			/*
 			 * Test for alternating 1's and 0's
 			 */
 			*(volatile int *)ptr = 0xaaaaaaaa;
 			if (*(volatile int *)ptr != 0xaaaaaaaa) {
 				page_bad = TRUE;
 			}
 			/*
 			 * Test for alternating 0's and 1's
 			 */
 			*(volatile int *)ptr = 0x55555555;
 			if (*(volatile int *)ptr != 0x55555555) {
 			page_bad = TRUE;
 			}
 			/*
 			 * Test for all 1's
 			 */
 			*(volatile int *)ptr = 0xffffffff;
 			if (*(volatile int *)ptr != 0xffffffff) {
 				page_bad = TRUE;
 			}
 			/*
 			 * Test for all 0's
 			 */
 			*(volatile int *)ptr = 0x0;
 			if (*(volatile int *)ptr != 0x0) {
 				page_bad = TRUE;
 			}
 			/*
 			 * Restore original value.
 			 */
 			*(int *)ptr = tmp;
 
 			/*
 			 * Adjust array of valid/good pages.
 			 */
 			if (page_bad == TRUE) {
 				continue;
 			}
 			/*
 			 * If this good page is a continuation of the
 			 * previous set of good pages, then just increase
 			 * the end pointer. Otherwise start a new chunk.
 			 * Note that "end" points one higher than end,
 			 * making the range >= start and < end.
 			 * If we're also doing a speculative memory
 			 * test and we at or past the end, bump up Maxmem
 			 * so that we keep going. The first bad page
 			 * will terminate the loop.
 			 */
 			if (phys_avail[pa_indx] == pa) {
 				phys_avail[pa_indx] += PAGE_SIZE;
 			} else {
 				pa_indx++;
 				if (pa_indx == PHYS_AVAIL_ARRAY_END) {
 					printf("Too many holes in the physical address space, giving up\n");
 					pa_indx--;
 					break;
 				}
 				phys_avail[pa_indx++] = pa;	/* start */
 				phys_avail[pa_indx] = pa + PAGE_SIZE;	/* end */
 			}
 			physmem++;
 		}
 	}
 	*pte = 0;
 	invltlb();
 
 	/*
 	 * XXX
 	 * The last chunk must contain at least one page plus the message
 	 * buffer to avoid complicating other code (message buffer address
 	 * calculation, etc.).
 	 */
 	while (phys_avail[pa_indx - 1] + PAGE_SIZE +
 	    round_page(MSGBUF_SIZE) >= phys_avail[pa_indx]) {
 		physmem -= atop(phys_avail[pa_indx] - phys_avail[pa_indx - 1]);
 		phys_avail[pa_indx--] = 0;
 		phys_avail[pa_indx--] = 0;
 	}
 
 	Maxmem = atop(phys_avail[pa_indx]);
 
 	/* Trim off space for the message buffer. */
 	phys_avail[pa_indx] -= round_page(MSGBUF_SIZE);
 
 	avail_end = phys_avail[pa_indx];
 }
 #endif
 
 void
 init386(first)
 	int first;
 {
 	int x;
 	struct gate_descriptor *gdp;
 	int gsel_tss;
 #ifndef SMP
 	/* table descriptors - used to load tables by microp */
 	struct region_descriptor r_gdt, r_idt;
 #endif
 	int off;
 
 	proc0.p_addr = proc0paddr;
 
 	atdevbase = ISA_HOLE_START + KERNBASE;
 
 #ifdef PC98
 	/*
 	 * Initialize DMAC
 	 */
 	pc98_init_dmac();
 #endif
 
 	if (bootinfo.bi_modulep) {
 		preload_metadata = (caddr_t)bootinfo.bi_modulep + KERNBASE;
 		preload_bootstrap_relocate(KERNBASE);
 	} else {
 		printf("WARNING: loader(8) metadata is missing!\n");
 	}
 	if (bootinfo.bi_envp)
 		kern_envp = (caddr_t)bootinfo.bi_envp + KERNBASE;
 
 	/*
 	 * make gdt memory segments, the code segment goes up to end of the
 	 * page with etext in it, the data segment goes to the end of
 	 * the address space
 	 */
 	/*
 	 * XXX text protection is temporarily (?) disabled.  The limit was
 	 * i386_btop(round_page(etext)) - 1.
 	 */
 	gdt_segs[GCODE_SEL].ssd_limit = i386_btop(0) - 1;
 	gdt_segs[GDATA_SEL].ssd_limit = i386_btop(0) - 1;
 #ifdef SMP
 	gdt_segs[GPRIV_SEL].ssd_limit =
 		i386_btop(sizeof(struct privatespace)) - 1;
 	gdt_segs[GPRIV_SEL].ssd_base = (int) &SMP_prvspace[0];
 	gdt_segs[GPROC0_SEL].ssd_base =
 		(int) &SMP_prvspace[0].globaldata.gd_common_tss;
 	SMP_prvspace[0].globaldata.gd_prvspace = &SMP_prvspace[0].globaldata;
 #else
 	gdt_segs[GPRIV_SEL].ssd_limit =
 		i386_btop(sizeof(struct globaldata)) - 1;
 	gdt_segs[GPRIV_SEL].ssd_base = (int) &__globaldata;
 	gdt_segs[GPROC0_SEL].ssd_base =
 		(int) &__globaldata.gd_common_tss;
 	__globaldata.gd_prvspace = &__globaldata;
 #endif
 
 	for (x = 0; x < NGDT; x++) {
 #ifdef BDE_DEBUGGER
 		/* avoid overwriting db entries with APM ones */
 		if (x >= GAPMCODE32_SEL && x <= GAPMDATA_SEL)
 			continue;
 #endif
 		ssdtosd(&gdt_segs[x], &gdt[x].sd);
 	}
 
 	r_gdt.rd_limit = NGDT * sizeof(gdt[0]) - 1;
 	r_gdt.rd_base =  (int) gdt;
 	lgdt(&r_gdt);
 
 	/* setup curproc so that mutexes work */
 	PCPU_SET(curproc, &proc0);
 
 	LIST_INIT(&proc0.p_heldmtx);
 	LIST_INIT(&proc0.p_contested);
 
 	mtx_init(&sched_lock, "sched lock", MTX_SPIN | MTX_RECURSE);
 #ifdef SMP
 	/*
 	 * Interrupts can happen very early, so initialize imen_mtx here, rather
 	 * than in init_locks().
 	 */
 	mtx_init(&imen_mtx, "imen", MTX_SPIN);
 #endif
 
 	/*
 	 * Giant is used early for at least debugger traps and unexpected traps.
 	 */
 	mtx_init(&Giant, "Giant", MTX_DEF | MTX_RECURSE);
 	mtx_enter(&Giant, MTX_DEF);
 
 	/* make ldt memory segments */
 	/*
 	 * The data segment limit must not cover the user area because we
 	 * don't want the user area to be writable in copyout() etc. (page
 	 * level protection is lost in kernel mode on 386's).  Also, we
 	 * don't want the user area to be writable directly (page level
 	 * protection of the user area is not available on 486's with
 	 * CR0_WP set, because there is no user-read/kernel-write mode).
 	 *
 	 * XXX - VM_MAXUSER_ADDRESS is an end address, not a max.  And it
 	 * should be spelled ...MAX_USER...
 	 */
 #define VM_END_USER_RW_ADDRESS	VM_MAXUSER_ADDRESS
 	/*
 	 * The code segment limit has to cover the user area until we move
 	 * the signal trampoline out of the user area.  This is safe because
 	 * the code segment cannot be written to directly.
 	 */
 #define VM_END_USER_R_ADDRESS	(VM_END_USER_RW_ADDRESS + UPAGES * PAGE_SIZE)
 	ldt_segs[LUCODE_SEL].ssd_limit = i386_btop(VM_END_USER_R_ADDRESS) - 1;
 	ldt_segs[LUDATA_SEL].ssd_limit = i386_btop(VM_END_USER_RW_ADDRESS) - 1;
 	for (x = 0; x < sizeof ldt_segs / sizeof ldt_segs[0]; x++)
 		ssdtosd(&ldt_segs[x], &ldt[x].sd);
 
 	_default_ldt = GSEL(GLDT_SEL, SEL_KPL);
 	lldt(_default_ldt);
 #ifdef USER_LDT
 	PCPU_SET(currentldt, _default_ldt);
 #endif
 
 	/* exceptions */
 	for (x = 0; x < NIDT; x++)
 		setidt(x, &IDTVEC(rsvd), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
 	setidt(0, &IDTVEC(div),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
 	setidt(1, &IDTVEC(dbg),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
 	setidt(2, &IDTVEC(nmi),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
  	setidt(3, &IDTVEC(bpt),  SDT_SYS386TGT, SEL_UPL, GSEL(GCODE_SEL, SEL_KPL));
 	setidt(4, &IDTVEC(ofl),  SDT_SYS386TGT, SEL_UPL, GSEL(GCODE_SEL, SEL_KPL));
 	setidt(5, &IDTVEC(bnd),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
 	setidt(6, &IDTVEC(ill),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
 	setidt(7, &IDTVEC(dna),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
 	setidt(8, 0,  SDT_SYSTASKGT, SEL_KPL, GSEL(GPANIC_SEL, SEL_KPL));
 	setidt(9, &IDTVEC(fpusegm),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
 	setidt(10, &IDTVEC(tss),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
 	setidt(11, &IDTVEC(missing),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
 	setidt(12, &IDTVEC(stk),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
 	setidt(13, &IDTVEC(prot),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
 	setidt(14, &IDTVEC(page),  SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
 	setidt(15, &IDTVEC(rsvd),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
 	setidt(16, &IDTVEC(fpu),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
 	setidt(17, &IDTVEC(align), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
 	setidt(18, &IDTVEC(mchk),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
  	setidt(0x80, &IDTVEC(int0x80_syscall),
 			SDT_SYS386TGT, SEL_UPL, GSEL(GCODE_SEL, SEL_KPL));
 
 	r_idt.rd_limit = sizeof(idt0) - 1;
 	r_idt.rd_base = (int) idt;
 	lidt(&r_idt);
 
 	/*
 	 * We need this mutex before the console probe.
 	 */
 	mtx_init(&clock_lock, "clk", MTX_SPIN | MTX_RECURSE);
 
 	/*
 	 * Initialize the console before we print anything out.
 	 */
 	cninit();
 
 #ifdef DEV_ISA
 	isa_defaultirq();
 #endif
 
 #ifdef DDB
 	kdb_init();
 	if (boothowto & RB_KDB)
 		Debugger("Boot flags requested debugger");
 #endif
 
 	finishidentcpu();	/* Final stage of CPU initialization */
 	setidt(6, &IDTVEC(ill),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
 	setidt(13, &IDTVEC(prot),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
 	initializecpu();	/* Initialize CPU registers */
 
 	/* make an initial tss so cpu can get interrupt stack on syscall! */
 	PCPU_SET(common_tss.tss_esp0,
 	    (int) proc0.p_addr + UPAGES*PAGE_SIZE - 16);
 	PCPU_SET(common_tss.tss_ss0, GSEL(GDATA_SEL, SEL_KPL));
 	gsel_tss = GSEL(GPROC0_SEL, SEL_KPL);
 	private_tss = 0;
 	PCPU_SET(tss_gdt, &gdt[GPROC0_SEL].sd);
 	PCPU_SET(common_tssd, *PCPU_GET(tss_gdt));
 	PCPU_SET(common_tss.tss_ioopt, (sizeof (struct i386tss)) << 16);
 	ltr(gsel_tss);
 
 	dblfault_tss.tss_esp = dblfault_tss.tss_esp0 = dblfault_tss.tss_esp1 =
 	    dblfault_tss.tss_esp2 = (int) &dblfault_stack[sizeof(dblfault_stack)];
 	dblfault_tss.tss_ss = dblfault_tss.tss_ss0 = dblfault_tss.tss_ss1 =
 	    dblfault_tss.tss_ss2 = GSEL(GDATA_SEL, SEL_KPL);
 	dblfault_tss.tss_cr3 = (int)IdlePTD;
 	dblfault_tss.tss_eip = (int) dblfault_handler;
 	dblfault_tss.tss_eflags = PSL_KERNEL;
 	dblfault_tss.tss_ds = dblfault_tss.tss_es =
 	    dblfault_tss.tss_gs = GSEL(GDATA_SEL, SEL_KPL);
 	dblfault_tss.tss_fs = GSEL(GPRIV_SEL, SEL_KPL);
 	dblfault_tss.tss_cs = GSEL(GCODE_SEL, SEL_KPL);
 	dblfault_tss.tss_ldt = GSEL(GLDT_SEL, SEL_KPL);
 
 	vm86_initialize();
 	getmemsize(first);
 
 	/* now running on new page tables, configured,and u/iom is accessible */
 
 	/* Map the message buffer. */
 	for (off = 0; off < round_page(MSGBUF_SIZE); off += PAGE_SIZE)
 		pmap_kenter((vm_offset_t)msgbufp + off, avail_end + off);
 
 	msgbufinit(msgbufp, MSGBUF_SIZE);
 
 	/* make a call gate to reenter kernel with */
 	gdp = &ldt[LSYS5CALLS_SEL].gd;
 
 	x = (int) &IDTVEC(syscall);
 	gdp->gd_looffset = x++;
 	gdp->gd_selector = GSEL(GCODE_SEL,SEL_KPL);
 	gdp->gd_stkcpy = 1;
 	gdp->gd_type = SDT_SYS386CGT;
 	gdp->gd_dpl = SEL_UPL;
 	gdp->gd_p = 1;
 	gdp->gd_hioffset = ((int) &IDTVEC(syscall)) >>16;
 
 	/* XXX does this work? */
 	ldt[LBSDICALLS_SEL] = ldt[LSYS5CALLS_SEL];
 	ldt[LSOL26CALLS_SEL] = ldt[LSYS5CALLS_SEL];
 
 	/* transfer to user mode */
 
 	_ucodesel = LSEL(LUCODE_SEL, SEL_UPL);
 	_udatasel = LSEL(LUDATA_SEL, SEL_UPL);
 
 	/* setup proc 0's pcb */
 	proc0.p_addr->u_pcb.pcb_flags = 0;
 	proc0.p_addr->u_pcb.pcb_cr3 = (int)IdlePTD;
 	proc0.p_addr->u_pcb.pcb_schednest = 0;
 	proc0.p_addr->u_pcb.pcb_ext = 0;
 	proc0.p_md.md_regs = &proc0_tf;
 }
 
 #if defined(I586_CPU) && !defined(NO_F00F_HACK)
 static void f00f_hack(void *unused);
 SYSINIT(f00f_hack, SI_SUB_INTRINSIC, SI_ORDER_FIRST, f00f_hack, NULL);
 
 static void
 f00f_hack(void *unused) {
 	struct gate_descriptor *new_idt;
 #ifndef SMP
 	struct region_descriptor r_idt;
 #endif
 	vm_offset_t tmp;
 
 	if (!has_f00f_bug)
 		return;
 
 	printf("Intel Pentium detected, installing workaround for F00F bug\n");
 
 	r_idt.rd_limit = sizeof(idt0) - 1;
 
 	tmp = kmem_alloc(kernel_map, PAGE_SIZE * 2);
 	if (tmp == 0)
 		panic("kmem_alloc returned 0");
 	if (((unsigned int)tmp & (PAGE_SIZE-1)) != 0)
 		panic("kmem_alloc returned non-page-aligned memory");
 	/* Put the first seven entries in the lower page */
 	new_idt = (struct gate_descriptor*)(tmp + PAGE_SIZE - (7*8));
 	bcopy(idt, new_idt, sizeof(idt0));
 	r_idt.rd_base = (int)new_idt;
 	lidt(&r_idt);
 	idt = new_idt;
 	if (vm_map_protect(kernel_map, tmp, tmp + PAGE_SIZE,
 			   VM_PROT_READ, FALSE) != KERN_SUCCESS)
 		panic("vm_map_protect failed");
 	return;
 }
 #endif /* defined(I586_CPU) && !NO_F00F_HACK */
 
 int
 ptrace_set_pc(p, addr)
 	struct proc *p;
 	unsigned long addr;
 {
 	p->p_md.md_regs->tf_eip = addr;
 	return (0);
 }
 
 int
 ptrace_single_step(p)
 	struct proc *p;
 {
 	p->p_md.md_regs->tf_eflags |= PSL_T;
 	return (0);
 }
 
 int ptrace_read_u_check(p, addr, len)
 	struct proc *p;
 	vm_offset_t addr;
 	size_t len;
 {
 	vm_offset_t gap;
 
 	if ((vm_offset_t) (addr + len) < addr)
 		return EPERM;
 	if ((vm_offset_t) (addr + len) <= sizeof(struct user))
 		return 0;
 
 	gap = (char *) p->p_md.md_regs - (char *) p->p_addr;
 	
 	if ((vm_offset_t) addr < gap)
 		return EPERM;
 	if ((vm_offset_t) (addr + len) <= 
 	    (vm_offset_t) (gap + sizeof(struct trapframe)))
 		return 0;
 	return EPERM;
 }
 
 int ptrace_write_u(p, off, data)
 	struct proc *p;
 	vm_offset_t off;
 	long data;
 {
 	struct trapframe frame_copy;
 	vm_offset_t min;
 	struct trapframe *tp;
 
 	/*
 	 * Privileged kernel state is scattered all over the user area.
 	 * Only allow write access to parts of regs and to fpregs.
 	 */
 	min = (char *)p->p_md.md_regs - (char *)p->p_addr;
 	if (off >= min && off <= min + sizeof(struct trapframe) - sizeof(int)) {
 		tp = p->p_md.md_regs;
 		frame_copy = *tp;
 		*(int *)((char *)&frame_copy + (off - min)) = data;
 		if (!EFL_SECURE(frame_copy.tf_eflags, tp->tf_eflags) ||
 		    !CS_SECURE(frame_copy.tf_cs))
 			return (EINVAL);
 		*(int*)((char *)p->p_addr + off) = data;
 		return (0);
 	}
 	min = offsetof(struct user, u_pcb) + offsetof(struct pcb, pcb_savefpu);
 	if (off >= min && off <= min + sizeof(struct save87) - sizeof(int)) {
 		*(int*)((char *)p->p_addr + off) = data;
 		return (0);
 	}
 	return (EFAULT);
 }
 
 int
 fill_regs(p, regs)
 	struct proc *p;
 	struct reg *regs;
 {
 	struct pcb *pcb;
 	struct trapframe *tp;
 
 	tp = p->p_md.md_regs;
 	regs->r_fs = tp->tf_fs;
 	regs->r_es = tp->tf_es;
 	regs->r_ds = tp->tf_ds;
 	regs->r_edi = tp->tf_edi;
 	regs->r_esi = tp->tf_esi;
 	regs->r_ebp = tp->tf_ebp;
 	regs->r_ebx = tp->tf_ebx;
 	regs->r_edx = tp->tf_edx;
 	regs->r_ecx = tp->tf_ecx;
 	regs->r_eax = tp->tf_eax;
 	regs->r_eip = tp->tf_eip;
 	regs->r_cs = tp->tf_cs;
 	regs->r_eflags = tp->tf_eflags;
 	regs->r_esp = tp->tf_esp;
 	regs->r_ss = tp->tf_ss;
 	pcb = &p->p_addr->u_pcb;
 	regs->r_gs = pcb->pcb_gs;
 	return (0);
 }
 
 int
 set_regs(p, regs)
 	struct proc *p;
 	struct reg *regs;
 {
 	struct pcb *pcb;
 	struct trapframe *tp;
 
 	tp = p->p_md.md_regs;
 	if (!EFL_SECURE(regs->r_eflags, tp->tf_eflags) ||
 	    !CS_SECURE(regs->r_cs))
 		return (EINVAL);
 	tp->tf_fs = regs->r_fs;
 	tp->tf_es = regs->r_es;
 	tp->tf_ds = regs->r_ds;
 	tp->tf_edi = regs->r_edi;
 	tp->tf_esi = regs->r_esi;
 	tp->tf_ebp = regs->r_ebp;
 	tp->tf_ebx = regs->r_ebx;
 	tp->tf_edx = regs->r_edx;
 	tp->tf_ecx = regs->r_ecx;
 	tp->tf_eax = regs->r_eax;
 	tp->tf_eip = regs->r_eip;
 	tp->tf_cs = regs->r_cs;
 	tp->tf_eflags = regs->r_eflags;
 	tp->tf_esp = regs->r_esp;
 	tp->tf_ss = regs->r_ss;
 	pcb = &p->p_addr->u_pcb;
 	pcb->pcb_gs = regs->r_gs;
 	return (0);
 }
 
 int
 fill_fpregs(p, fpregs)
 	struct proc *p;
 	struct fpreg *fpregs;
 {
 	bcopy(&p->p_addr->u_pcb.pcb_savefpu, fpregs, sizeof *fpregs);
 	return (0);
 }
 
 int
 set_fpregs(p, fpregs)
 	struct proc *p;
 	struct fpreg *fpregs;
 {
 	bcopy(fpregs, &p->p_addr->u_pcb.pcb_savefpu, sizeof *fpregs);
 	return (0);
 }
 
 int
 fill_dbregs(p, dbregs)
 	struct proc *p;
 	struct dbreg *dbregs;
 {
 	struct pcb *pcb;
 
 	pcb = &p->p_addr->u_pcb;
 	dbregs->dr0 = pcb->pcb_dr0;
 	dbregs->dr1 = pcb->pcb_dr1;
 	dbregs->dr2 = pcb->pcb_dr2;
 	dbregs->dr3 = pcb->pcb_dr3;
 	dbregs->dr4 = 0;
 	dbregs->dr5 = 0;
 	dbregs->dr6 = pcb->pcb_dr6;
 	dbregs->dr7 = pcb->pcb_dr7;
 	return (0);
 }
 
 int
 set_dbregs(p, dbregs)
 	struct proc *p;
 	struct dbreg *dbregs;
 {
 	struct pcb *pcb;
 	int i;
 	u_int32_t mask1, mask2;
 
 	/*
 	 * Don't let an illegal value for dr7 get set.  Specifically,
 	 * check for undefined settings.  Setting these bit patterns
 	 * result in undefined behaviour and can lead to an unexpected
 	 * TRCTRAP.
 	 */
 	for (i = 0, mask1 = 0x3<<16, mask2 = 0x2<<16; i < 8; 
 	     i++, mask1 <<= 2, mask2 <<= 2)
 		if ((dbregs->dr7 & mask1) == mask2)
 			return (EINVAL);
 
 	if (dbregs->dr7 & 0x0000fc00)
 		return (EINVAL);
 
 
 
 	pcb = &p->p_addr->u_pcb;
 
 	/*
 	 * Don't let a process set a breakpoint that is not within the
 	 * process's address space.  If a process could do this, it
 	 * could halt the system by setting a breakpoint in the kernel
 	 * (if ddb was enabled).  Thus, we need to check to make sure
 	 * that no breakpoints are being enabled for addresses outside
 	 * process's address space, unless, perhaps, we were called by
 	 * uid 0.
 	 *
 	 * XXX - what about when the watched area of the user's
 	 * address space is written into from within the kernel
 	 * ... wouldn't that still cause a breakpoint to be generated
 	 * from within kernel mode?
 	 */
 
 	if (suser(p) != 0) {
 		if (dbregs->dr7 & 0x3) {
 			/* dr0 is enabled */
 			if (dbregs->dr0 >= VM_MAXUSER_ADDRESS)
 				return (EINVAL);
 		}
 
 		if (dbregs->dr7 & (0x3<<2)) {
 			/* dr1 is enabled */
 			if (dbregs->dr1 >= VM_MAXUSER_ADDRESS)
 				return (EINVAL);
 		}
 
 		if (dbregs->dr7 & (0x3<<4)) {
 			/* dr2 is enabled */
 			if (dbregs->dr2 >= VM_MAXUSER_ADDRESS)
        				return (EINVAL);
 		}
 
 		if (dbregs->dr7 & (0x3<<6)) {
 			/* dr3 is enabled */
 			if (dbregs->dr3 >= VM_MAXUSER_ADDRESS)
 				return (EINVAL);
 		}
 	}
 
 	pcb->pcb_dr0 = dbregs->dr0;
 	pcb->pcb_dr1 = dbregs->dr1;
 	pcb->pcb_dr2 = dbregs->dr2;
 	pcb->pcb_dr3 = dbregs->dr3;
 	pcb->pcb_dr6 = dbregs->dr6;
 	pcb->pcb_dr7 = dbregs->dr7;
 
 	pcb->pcb_flags |= PCB_DBREGS;
 
 	return (0);
 }
 
 /*
  * Return > 0 if a hardware breakpoint has been hit, and the
  * breakpoint was in user space.  Return 0, otherwise.
  */
 int
 user_dbreg_trap(void)
 {
         u_int32_t dr7, dr6; /* debug registers dr6 and dr7 */
         u_int32_t bp;       /* breakpoint bits extracted from dr6 */
         int nbp;            /* number of breakpoints that triggered */
         caddr_t addr[4];    /* breakpoint addresses */
         int i;
         
         dr7 = rdr7();
         if ((dr7 & 0x000000ff) == 0) {
                 /*
                  * all GE and LE bits in the dr7 register are zero,
                  * thus the trap couldn't have been caused by the
                  * hardware debug registers
                  */
                 return 0;
         }
 
         nbp = 0;
         dr6 = rdr6();
         bp = dr6 & 0x0000000f;
 
         if (!bp) {
                 /*
                  * None of the breakpoint bits are set meaning this
                  * trap was not caused by any of the debug registers
                  */
                 return 0;
         }
 
         /*
          * at least one of the breakpoints were hit, check to see
          * which ones and if any of them are user space addresses
          */
 
         if (bp & 0x01) {
                 addr[nbp++] = (caddr_t)rdr0();
         }
         if (bp & 0x02) {
                 addr[nbp++] = (caddr_t)rdr1();
         }
         if (bp & 0x04) {
                 addr[nbp++] = (caddr_t)rdr2();
         }
         if (bp & 0x08) {
                 addr[nbp++] = (caddr_t)rdr3();
         }
 
         for (i=0; i<nbp; i++) {
                 if (addr[i] <
                     (caddr_t)VM_MAXUSER_ADDRESS) {
                         /*
                          * addr[i] is in user space
                          */
                         return nbp;
                 }
         }
 
         /*
          * None of the breakpoints are in user space.
          */
         return 0;
 }
 
 
 #ifndef DDB
 void
 Debugger(const char *msg)
 {
 	printf("Debugger(\"%s\") called.\n", msg);
 }
 #endif /* no DDB */
 
 #include <sys/disklabel.h>
 
 /*
  * Determine the size of the transfer, and make sure it is
  * within the boundaries of the partition. Adjust transfer
  * if needed, and signal errors or early completion.
  */
 int
 bounds_check_with_label(struct bio *bp, struct disklabel *lp, int wlabel)
 {
         struct partition *p = lp->d_partitions + dkpart(bp->bio_dev);
         int labelsect = lp->d_partitions[0].p_offset;
         int maxsz = p->p_size,
                 sz = (bp->bio_bcount + DEV_BSIZE - 1) >> DEV_BSHIFT;
 
         /* overwriting disk label ? */
         /* XXX should also protect bootstrap in first 8K */
         if (bp->bio_blkno + p->p_offset <= LABELSECTOR + labelsect &&
 #if LABELSECTOR != 0
             bp->bio_blkno + p->p_offset + sz > LABELSECTOR + labelsect &&
 #endif
             (bp->bio_cmd == BIO_WRITE) && wlabel == 0) {
                 bp->bio_error = EROFS;
                 goto bad;
         }
 
 #if     defined(DOSBBSECTOR) && defined(notyet)
         /* overwriting master boot record? */
         if (bp->bio_blkno + p->p_offset <= DOSBBSECTOR &&
             (bp->bio_cmd == BIO_WRITE) && wlabel == 0) {
                 bp->bio_error = EROFS;
                 goto bad;
         }
 #endif
 
         /* beyond partition? */
         if (bp->bio_blkno < 0 || bp->bio_blkno + sz > maxsz) {
                 /* if exactly at end of disk, return an EOF */
                 if (bp->bio_blkno == maxsz) {
                         bp->bio_resid = bp->bio_bcount;
                         return(0);
                 }
                 /* or truncate if part of it fits */
                 sz = maxsz - bp->bio_blkno;
                 if (sz <= 0) {
                         bp->bio_error = EINVAL;
                         goto bad;
                 }
                 bp->bio_bcount = sz << DEV_BSHIFT;
         }
 
         bp->bio_pblkno = bp->bio_blkno + p->p_offset;
         return(1);
 
 bad:
         bp->bio_flags |= BIO_ERROR;
         return(-1);
 }
 
 #ifdef DDB
 
 /*
  * Provide inb() and outb() as functions.  They are normally only
  * available as macros calling inlined functions, thus cannot be
  * called inside DDB.
  *
  * The actual code is stolen from <machine/cpufunc.h>, and de-inlined.
  */
 
 #undef inb
 #undef outb
 
 /* silence compiler warnings */
 u_char inb(u_int);
 void outb(u_int, u_char);
 
 u_char
 inb(u_int port)
 {
 	u_char	data;
 	/*
 	 * We use %%dx and not %1 here because i/o is done at %dx and not at
 	 * %edx, while gcc generates inferior code (movw instead of movl)
 	 * if we tell it to load (u_short) port.
 	 */
 	__asm __volatile("inb %%dx,%0" : "=a" (data) : "d" (port));
 	return (data);
 }
 
 void
 outb(u_int port, u_char data)
 {
 	u_char	al;
 	/*
 	 * Use an unnecessary assignment to help gcc's register allocator.
 	 * This make a large difference for gcc-1.40 and a tiny difference
 	 * for gcc-2.6.0.  For gcc-1.40, al had to be ``asm("ax")'' for
 	 * best results.  gcc-2.6.0 can't handle this.
 	 */
 	al = data;
 	__asm __volatile("outb %0,%%dx" : : "a" (al), "d" (port));
 }
 
 #endif /* DDB */
Index: head/sys/pc98/pc98/machdep.c
===================================================================
--- head/sys/pc98/pc98/machdep.c	(revision 71983)
+++ head/sys/pc98/pc98/machdep.c	(revision 71984)
@@ -1,2838 +1,2839 @@
 /*-
  * Copyright (c) 1992 Terrence R. Lambert.
  * Copyright (c) 1982, 1987, 1990 The Regents of the University of California.
  * All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * William Jolitz.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	from: @(#)machdep.c	7.4 (Berkeley) 6/3/91
  * $FreeBSD$
  */
 
 #include "opt_atalk.h"
 #include "opt_compat.h"
 #include "opt_cpu.h"
 #include "opt_ddb.h"
 #include "opt_inet.h"
 #include "opt_ipx.h"
 #include "opt_isa.h"
 #include "opt_maxmem.h"
 #include "opt_msgbuf.h"
 #include "opt_npx.h"
 #include "opt_perfmon.h"
 #include "opt_user_ldt.h"
 #include "opt_userconfig.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/sysproto.h>
 #include <sys/signalvar.h>
 #include <sys/ipl.h>
 #include <sys/kernel.h>
 #include <sys/ktr.h>
 #include <sys/linker.h>
 #include <sys/malloc.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/bio.h>
 #include <sys/buf.h>
 #include <sys/reboot.h>
 #include <sys/callout.h>
 #include <sys/msgbuf.h>
 #include <sys/sysent.h>
 #include <sys/sysctl.h>
 #include <sys/vmmeter.h>
 #include <sys/bus.h>
 #include <sys/eventhandler.h>
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <sys/lock.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_object.h>
 #include <vm/vm_page.h>
 #include <vm/vm_map.h>
 #include <vm/vm_pager.h>
 #include <vm/vm_extern.h>
 
 #include <sys/user.h>
 #include <sys/exec.h>
 #include <sys/cons.h>
 
 #include <ddb/ddb.h>
 
 #include <net/netisr.h>
 
 #include <machine/cpu.h>
 #include <machine/cputypes.h>
 #include <machine/reg.h>
 #include <machine/clock.h>
 #include <machine/specialreg.h>
 #include <machine/bootinfo.h>
 #include <machine/md_var.h>
 #include <machine/pc/bios.h>
 #include <machine/pcb_ext.h>		/* pcb.h included via sys/user.h */
 #include <machine/globaldata.h>
 #include <machine/globals.h>
 #ifdef SMP
 #include <machine/smp.h>
 #endif
 #ifdef PERFMON
 #include <machine/perfmon.h>
 #endif
 
 #ifdef OLD_BUS_ARCH
 #include <i386/isa/isa_device.h>
 #endif
 #include <i386/isa/icu.h>
 #include <i386/isa/intr_machdep.h>
 #ifdef PC98
 #include <pc98/pc98/pc98_machdep.h>
 #include <pc98/pc98/pc98.h>
 #else
 #include <isa/rtc.h>
 #endif
 #include <machine/vm86.h>
 #include <sys/ptrace.h>
 #include <machine/sigframe.h>
 
 extern void init386 __P((int first));
 extern void dblfault_handler __P((void));
 
 extern void printcpuinfo(void);	/* XXX header file */
 extern void earlysetcpuclass(void);	/* same header file */
 extern void finishidentcpu(void);
 extern void panicifcpuunsupported(void);
 extern void initializecpu(void);
 
 #define	CS_SECURE(cs)		(ISPL(cs) == SEL_UPL)
 #define	EFL_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
 
 static void cpu_startup __P((void *));
 SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL)
 
 #ifdef PC98
 int	need_pre_dma_flush;	/* If 1, use wbinvd befor DMA transfer. */
 int	need_post_dma_flush;	/* If 1, use invd after DMA transfer. */
 #endif
 
 int	_udatasel, _ucodesel;
 u_int	atdevbase;
 
 #if defined(SWTCH_OPTIM_STATS)
 extern int swtch_optim_stats;
 SYSCTL_INT(_debug, OID_AUTO, swtch_optim_stats,
 	CTLFLAG_RD, &swtch_optim_stats, 0, "");
 SYSCTL_INT(_debug, OID_AUTO, tlb_flush_count,
 	CTLFLAG_RD, &tlb_flush_count, 0, "");
 #endif
 
 #ifdef PC98
 static int	ispc98 = 1;
 #else
 static int	ispc98 = 0;
 #endif
 SYSCTL_INT(_machdep, OID_AUTO, ispc98, CTLFLAG_RD, &ispc98, 0, "");
 
 int physmem = 0;
 int cold = 1;
 
 static void osendsig __P((sig_t catcher, int sig, sigset_t *mask, u_long code));
 
 static int
 sysctl_hw_physmem(SYSCTL_HANDLER_ARGS)
 {
 	int error = sysctl_handle_int(oidp, 0, ctob(physmem), req);
 	return (error);
 }
 
 SYSCTL_PROC(_hw, HW_PHYSMEM, physmem, CTLTYPE_INT|CTLFLAG_RD,
 	0, 0, sysctl_hw_physmem, "I", "");
 
 static int
 sysctl_hw_usermem(SYSCTL_HANDLER_ARGS)
 {
 	int error = sysctl_handle_int(oidp, 0,
 		ctob(physmem - cnt.v_wire_count), req);
 	return (error);
 }
 
 SYSCTL_PROC(_hw, HW_USERMEM, usermem, CTLTYPE_INT|CTLFLAG_RD,
 	0, 0, sysctl_hw_usermem, "I", "");
 
 static int
 sysctl_hw_availpages(SYSCTL_HANDLER_ARGS)
 {
 	int error = sysctl_handle_int(oidp, 0,
 		i386_btop(avail_end - avail_start), req);
 	return (error);
 }
 
 SYSCTL_PROC(_hw, OID_AUTO, availpages, CTLTYPE_INT|CTLFLAG_RD,
 	0, 0, sysctl_hw_availpages, "I", "");
 
 static int
 sysctl_machdep_msgbuf(SYSCTL_HANDLER_ARGS)
 {
 	int error;
 
 	/* Unwind the buffer, so that it's linear (possibly starting with
 	 * some initial nulls).
 	 */
 	error=sysctl_handle_opaque(oidp,msgbufp->msg_ptr+msgbufp->msg_bufr,
 		msgbufp->msg_size-msgbufp->msg_bufr,req);
 	if(error) return(error);
 	if(msgbufp->msg_bufr>0) {
 		error=sysctl_handle_opaque(oidp,msgbufp->msg_ptr,
 			msgbufp->msg_bufr,req);
 	}
 	return(error);
 }
 
 SYSCTL_PROC(_machdep, OID_AUTO, msgbuf, CTLTYPE_STRING|CTLFLAG_RD,
 	0, 0, sysctl_machdep_msgbuf, "A","Contents of kernel message buffer");
 
 static int msgbuf_clear;
 
 static int
 sysctl_machdep_msgbuf_clear(SYSCTL_HANDLER_ARGS)
 {
 	int error;
 	error = sysctl_handle_int(oidp, oidp->oid_arg1, oidp->oid_arg2,
 		req);
 	if (!error && req->newptr) {
 		/* Clear the buffer and reset write pointer */
 		bzero(msgbufp->msg_ptr,msgbufp->msg_size);
 		msgbufp->msg_bufr=msgbufp->msg_bufx=0;
 		msgbuf_clear=0;
 	}
 	return (error);
 }
 
 SYSCTL_PROC(_machdep, OID_AUTO, msgbuf_clear, CTLTYPE_INT|CTLFLAG_RW,
 	&msgbuf_clear, 0, sysctl_machdep_msgbuf_clear, "I",
 	"Clear kernel message buffer");
 
 int bootverbose = 0, Maxmem = 0;
 #ifdef PC98
 int Maxmem_under16M = 0;
 #endif
 long dumplo;
 
 vm_offset_t phys_avail[10];
 
 /* must be 2 less so 0 0 can signal end of chunks */
 #define PHYS_AVAIL_ARRAY_END ((sizeof(phys_avail) / sizeof(vm_offset_t)) - 2)
 
 static vm_offset_t buffer_sva, buffer_eva;
 vm_offset_t clean_sva, clean_eva;
 static vm_offset_t pager_sva, pager_eva;
 static struct trapframe proc0_tf;
 #ifndef SMP
 static struct globaldata __globaldata;
 #endif
 
 struct cpuhead cpuhead;
 
 struct mtx sched_lock;
 struct mtx Giant;
 
 static void
 cpu_startup(dummy)
 	void *dummy;
 {
 	register unsigned i;
 	register caddr_t v;
 	vm_offset_t maxaddr;
 	vm_size_t size = 0;
 	int firstaddr;
 	vm_offset_t minaddr;
 	int physmem_est;
 
 	if (boothowto & RB_VERBOSE)
 		bootverbose++;
 
 	/*
 	 * Good {morning,afternoon,evening,night}.
 	 */
 	printf("%s", version);
 	earlysetcpuclass();
 	startrtclock();
 	printcpuinfo();
 	panicifcpuunsupported();
 #ifdef PERFMON
 	perfmon_init();
 #endif
 	printf("real memory  = %u (%uK bytes)\n", ptoa(Maxmem), ptoa(Maxmem) / 1024);
 	/*
 	 * Display any holes after the first chunk of extended memory.
 	 */
 	if (bootverbose) {
 		int indx;
 
 		printf("Physical memory chunk(s):\n");
 		for (indx = 0; phys_avail[indx + 1] != 0; indx += 2) {
 			unsigned int size1 = phys_avail[indx + 1] - phys_avail[indx];
 
 			printf("0x%08x - 0x%08x, %u bytes (%u pages)\n",
 			    phys_avail[indx], phys_avail[indx + 1] - 1, size1,
 			    size1 / PAGE_SIZE);
 		}
 	}
 
 	/*
 	 * Calculate callout wheel size
 	 */
 	for (callwheelsize = 1, callwheelbits = 0;
 	     callwheelsize < ncallout;
 	     callwheelsize <<= 1, ++callwheelbits)
 		;
 	callwheelmask = callwheelsize - 1;
 
 	/*
 	 * Allocate space for system data structures.
 	 * The first available kernel virtual address is in "v".
 	 * As pages of kernel virtual memory are allocated, "v" is incremented.
 	 * As pages of memory are allocated and cleared,
 	 * "firstaddr" is incremented.
 	 * An index into the kernel page table corresponding to the
 	 * virtual memory address maintained in "v" is kept in "mapaddr".
 	 */
 
 	/*
 	 * Make two passes.  The first pass calculates how much memory is
 	 * needed and allocates it.  The second pass assigns virtual
 	 * addresses to the various data structures.
 	 */
 	firstaddr = 0;
 again:
 	v = (caddr_t)firstaddr;
 
 #define	valloc(name, type, num) \
 	    (name) = (type *)v; v = (caddr_t)((name)+(num))
 #define	valloclim(name, type, num, lim) \
 	    (name) = (type *)v; v = (caddr_t)((lim) = ((name)+(num)))
 
 	valloc(callout, struct callout, ncallout);
 	valloc(callwheel, struct callout_tailq, callwheelsize);
 
 	/*
 	 * Discount the physical memory larger than the size of kernel_map
 	 * to avoid eating up all of KVA space.
 	 */
 	if (kernel_map->first_free == NULL) {
 		printf("Warning: no free entries in kernel_map.\n");
 		physmem_est = physmem;
 	} else
 		physmem_est = min(physmem, kernel_map->max_offset - kernel_map->min_offset);
 
 	/*
 	 * The nominal buffer size (and minimum KVA allocation) is BKVASIZE.
 	 * For the first 64MB of ram nominally allocate sufficient buffers to
 	 * cover 1/4 of our ram.  Beyond the first 64MB allocate additional
 	 * buffers to cover 1/20 of our ram over 64MB.
 	 *
 	 * factor represents the 1/4 x ram conversion.
 	 */
 	if (nbuf == 0) {
 		int factor = 4 * BKVASIZE / PAGE_SIZE;
 
 		nbuf = 50;
 		if (physmem_est > 1024)
 			nbuf += min((physmem_est - 1024) / factor, 16384 / factor);
 		if (physmem_est > 16384)
 			nbuf += (physmem_est - 16384) * 2 / (factor * 5);
 	}
 
 	/*
 	 * Do not allow the buffer_map to be more then 1/2 the size of the
 	 * kernel_map.
 	 */
 	if (nbuf > (kernel_map->max_offset - kernel_map->min_offset) / 
 	    (BKVASIZE * 2)) {
 		nbuf = (kernel_map->max_offset - kernel_map->min_offset) / 
 		    (BKVASIZE * 2);
 		printf("Warning: nbufs capped at %d\n", nbuf);
 	}
 
 	nswbuf = max(min(nbuf/4, 256), 16);
 
 	valloc(swbuf, struct buf, nswbuf);
 	valloc(buf, struct buf, nbuf);
 	v = bufhashinit(v);
 
 	/*
 	 * End of first pass, size has been calculated so allocate memory
 	 */
 	if (firstaddr == 0) {
 		size = (vm_size_t)(v - firstaddr);
 		firstaddr = (int)kmem_alloc(kernel_map, round_page(size));
 		if (firstaddr == 0)
 			panic("startup: no room for tables");
 		goto again;
 	}
 
 	/*
 	 * End of second pass, addresses have been assigned
 	 */
 	if ((vm_size_t)(v - firstaddr) != size)
 		panic("startup: table size inconsistency");
 
 	clean_map = kmem_suballoc(kernel_map, &clean_sva, &clean_eva,
 			(nbuf*BKVASIZE) + (nswbuf*MAXPHYS) + pager_map_size);
 	buffer_map = kmem_suballoc(clean_map, &buffer_sva, &buffer_eva,
 				(nbuf*BKVASIZE));
+	buffer_map->system_map = 1;
 	pager_map = kmem_suballoc(clean_map, &pager_sva, &pager_eva,
 				(nswbuf*MAXPHYS) + pager_map_size);
 	pager_map->system_map = 1;
 	exec_map = kmem_suballoc(kernel_map, &minaddr, &maxaddr,
 				(16*(ARG_MAX+(PAGE_SIZE*3))));
 
 	/*
 	 * XXX: Mbuf system machine-specific initializations should
 	 *      go here, if anywhere. 
 	 */
 
 	/*
 	 * Initialize callouts
 	 */
 	SLIST_INIT(&callfree);
 	for (i = 0; i < ncallout; i++) {
 		callout_init(&callout[i], 0);
 		callout[i].c_flags = CALLOUT_LOCAL_ALLOC;
 		SLIST_INSERT_HEAD(&callfree, &callout[i], c_links.sle);
 	}
 
 	for (i = 0; i < callwheelsize; i++) {
 		TAILQ_INIT(&callwheel[i]);
 	}
 
 	mtx_init(&callout_lock, "callout", MTX_SPIN | MTX_RECURSE);
 
 #if defined(USERCONFIG)
 	userconfig();
 	cninit();		/* the preferred console may have changed */
 #endif
 
 	printf("avail memory = %u (%uK bytes)\n", ptoa(cnt.v_free_count),
 	    ptoa(cnt.v_free_count) / 1024);
 
 	/*
 	 * Set up buffers, so they can be used to read disk labels.
 	 */
 	bufinit();
 	vm_pager_bufferinit();
 
 	SLIST_INIT(&cpuhead);
 	SLIST_INSERT_HEAD(&cpuhead, GLOBALDATA, gd_allcpu);
 
 #ifdef SMP
 	/*
 	 * OK, enough kmem_alloc/malloc state should be up, lets get on with it!
 	 */
 	mp_start();			/* fire up the APs and APICs */
 	mp_announce();
 #endif  /* SMP */
 	cpu_setregs();
 }
 
 /*
  * Send an interrupt to process.
  *
  * Stack is set up to allow sigcode stored
  * at top to call routine, followed by kcall
  * to sigreturn routine below.  After sigreturn
  * resets the signal mask, the stack, and the
  * frame pointer, it returns to the user
  * specified pc, psl.
  */
 static void
 osendsig(catcher, sig, mask, code)
 	sig_t catcher;
 	int sig;
 	sigset_t *mask;
 	u_long code;
 {
 	struct osigframe sf;
 	struct osigframe *fp;
 	struct proc *p;
 	struct sigacts *psp;
 	struct trapframe *regs;
 	int oonstack;
 
 	p = curproc;
 	PROC_LOCK(p);
 	psp = p->p_sigacts;
 	regs = p->p_md.md_regs;
 	oonstack = sigonstack(regs->tf_esp);
 
 	/* Allocate and validate space for the signal handler context. */
 	if ((p->p_flag & P_ALTSTACK) && !oonstack &&
 	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
 		fp = (struct osigframe *)(p->p_sigstk.ss_sp +
 		    p->p_sigstk.ss_size - sizeof(struct osigframe));
 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
 		p->p_sigstk.ss_flags |= SS_ONSTACK;
 #endif
 	} else
 		fp = (struct osigframe *)regs->tf_esp - 1;
 	PROC_UNLOCK(p);
 
 	/*
 	 * grow_stack() will return 0 if *fp does not fit inside the stack
 	 * and the stack can not be grown.
 	 * useracc() will return FALSE if access is denied.
 	 */
 	if (grow_stack(p, (int)fp) == 0 ||
 	    !useracc((caddr_t)fp, sizeof(*fp), VM_PROT_WRITE)) {
 		/*
 		 * Process has trashed its stack; give it an illegal
 		 * instruction to halt it in its tracks.
 		 */
 		PROC_LOCK(p);
 		SIGACTION(p, SIGILL) = SIG_DFL;
 		SIGDELSET(p->p_sigignore, SIGILL);
 		SIGDELSET(p->p_sigcatch, SIGILL);
 		SIGDELSET(p->p_sigmask, SIGILL);
 		PROC_UNLOCK(p);
 		psignal(p, SIGILL);
 		return;
 	}
 
 	/* Translate the signal if appropriate. */
 	if (p->p_sysent->sv_sigtbl && sig <= p->p_sysent->sv_sigsize)
 		sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
 
 	/* Build the argument list for the signal handler. */
 	sf.sf_signum = sig;
 	sf.sf_scp = (register_t)&fp->sf_siginfo.si_sc;
 	PROC_LOCK(p);
 	if (SIGISMEMBER(p->p_sigacts->ps_siginfo, sig)) {
 		/* Signal handler installed with SA_SIGINFO. */
 		sf.sf_arg2 = (register_t)&fp->sf_siginfo;
 		sf.sf_siginfo.si_signo = sig;
 		sf.sf_siginfo.si_code = code;
 		sf.sf_ahu.sf_action = (__osiginfohandler_t *)catcher;
 	} else {
 		/* Old FreeBSD-style arguments. */
 		sf.sf_arg2 = code;
 		sf.sf_addr = regs->tf_err;
 		sf.sf_ahu.sf_handler = catcher;
 	}
 	PROC_UNLOCK(p);
 
 	/* Save most if not all of trap frame. */
 	sf.sf_siginfo.si_sc.sc_eax = regs->tf_eax;
 	sf.sf_siginfo.si_sc.sc_ebx = regs->tf_ebx;
 	sf.sf_siginfo.si_sc.sc_ecx = regs->tf_ecx;
 	sf.sf_siginfo.si_sc.sc_edx = regs->tf_edx;
 	sf.sf_siginfo.si_sc.sc_esi = regs->tf_esi;
 	sf.sf_siginfo.si_sc.sc_edi = regs->tf_edi;
 	sf.sf_siginfo.si_sc.sc_cs = regs->tf_cs;
 	sf.sf_siginfo.si_sc.sc_ds = regs->tf_ds;
 	sf.sf_siginfo.si_sc.sc_ss = regs->tf_ss;
 	sf.sf_siginfo.si_sc.sc_es = regs->tf_es;
 	sf.sf_siginfo.si_sc.sc_fs = regs->tf_fs;
 	sf.sf_siginfo.si_sc.sc_gs = rgs();
 	sf.sf_siginfo.si_sc.sc_isp = regs->tf_isp;
 
 	/* Build the signal context to be used by osigreturn(). */
 	sf.sf_siginfo.si_sc.sc_onstack = (oonstack) ? 1 : 0;
 	SIG2OSIG(*mask, sf.sf_siginfo.si_sc.sc_mask);
 	sf.sf_siginfo.si_sc.sc_sp = regs->tf_esp;
 	sf.sf_siginfo.si_sc.sc_fp = regs->tf_ebp;
 	sf.sf_siginfo.si_sc.sc_pc = regs->tf_eip;
 	sf.sf_siginfo.si_sc.sc_ps = regs->tf_eflags;
 	sf.sf_siginfo.si_sc.sc_trapno = regs->tf_trapno;
 	sf.sf_siginfo.si_sc.sc_err = regs->tf_err;
 
 	/*
 	 * If we're a vm86 process, we want to save the segment registers.
 	 * We also change eflags to be our emulated eflags, not the actual
 	 * eflags.
 	 */
 	if (regs->tf_eflags & PSL_VM) {
 		/* XXX confusing names: `tf' isn't a trapframe; `regs' is. */
 		struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs;
 		struct vm86_kernel *vm86 = &p->p_addr->u_pcb.pcb_ext->ext_vm86;
 
 		sf.sf_siginfo.si_sc.sc_gs = tf->tf_vm86_gs;
 		sf.sf_siginfo.si_sc.sc_fs = tf->tf_vm86_fs;
 		sf.sf_siginfo.si_sc.sc_es = tf->tf_vm86_es;
 		sf.sf_siginfo.si_sc.sc_ds = tf->tf_vm86_ds;
 
 		if (vm86->vm86_has_vme == 0)
 			sf.sf_siginfo.si_sc.sc_ps =
 			    (tf->tf_eflags & ~(PSL_VIF | PSL_VIP)) |
 			    (vm86->vm86_eflags & (PSL_VIF | PSL_VIP));
 
 		/* See sendsig() for comments. */
 		tf->tf_eflags &= ~(PSL_VM | PSL_NT | PSL_T | PSL_VIF | PSL_VIP);
 	}
 
 	/* Copy the sigframe out to the user's stack. */
 	if (copyout(&sf, fp, sizeof(*fp)) != 0) {
 		/*
 		 * Something is wrong with the stack pointer.
 		 * ...Kill the process.
 		 */
 		sigexit(p, SIGILL);
 	}
 
 	regs->tf_esp = (int)fp;
 	regs->tf_eip = PS_STRINGS - szosigcode;
 	regs->tf_cs = _ucodesel;
 	regs->tf_ds = _udatasel;
 	regs->tf_es = _udatasel;
 	regs->tf_fs = _udatasel;
 	load_gs(_udatasel);
 	regs->tf_ss = _udatasel;
 }
 
 void
 sendsig(catcher, sig, mask, code)
 	sig_t catcher;
 	int sig;
 	sigset_t *mask;
 	u_long code;
 {
 	struct sigframe sf;
 	struct proc *p;
 	struct sigacts *psp;
 	struct trapframe *regs;
 	struct sigframe *sfp;
 	int oonstack;
 
 	p = curproc;
 	PROC_LOCK(p);
 	psp = p->p_sigacts;
 	if (SIGISMEMBER(psp->ps_osigset, sig)) {
 		PROC_UNLOCK(p);
 		osendsig(catcher, sig, mask, code);
 		return;
 	}
 	regs = p->p_md.md_regs;
 	oonstack = sigonstack(regs->tf_esp);
 
 	/* Save user context. */
 	bzero(&sf, sizeof(sf));
 	sf.sf_uc.uc_sigmask = *mask;
 	sf.sf_uc.uc_stack = p->p_sigstk;
 	sf.sf_uc.uc_stack.ss_flags = (p->p_flag & P_ALTSTACK)
 	    ? ((oonstack) ? SS_ONSTACK : 0) : SS_DISABLE;
 	sf.sf_uc.uc_mcontext.mc_onstack = (oonstack) ? 1 : 0;
 	sf.sf_uc.uc_mcontext.mc_gs = rgs();
 	bcopy(regs, &sf.sf_uc.uc_mcontext.mc_fs, sizeof(*regs));
 
 	/* Allocate and validate space for the signal handler context. */
 	if ((p->p_flag & P_ALTSTACK) != 0 && !oonstack &&
 	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
 		sfp = (struct sigframe *)(p->p_sigstk.ss_sp +
 		    p->p_sigstk.ss_size - sizeof(struct sigframe));
 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
 		p->p_sigstk.ss_flags |= SS_ONSTACK;
 #endif
 	} else
 		sfp = (struct sigframe *)regs->tf_esp - 1;
 	PROC_UNLOCK(p);
 
 	/*
 	 * grow_stack() will return 0 if *sfp does not fit inside the stack
 	 * and the stack can not be grown.
 	 * useracc() will return FALSE if access is denied.
 	 */
 	if (grow_stack(p, (int)sfp) == 0 ||
 	    !useracc((caddr_t)sfp, sizeof(*sfp), VM_PROT_WRITE)) {
 		/*
 		 * Process has trashed its stack; give it an illegal
 		 * instruction to halt it in its tracks.
 		 */
 #ifdef DEBUG
 		printf("process %d has trashed its stack\n", p->p_pid);
 #endif
 		PROC_LOCK(p);
 		SIGACTION(p, SIGILL) = SIG_DFL;
 		SIGDELSET(p->p_sigignore, SIGILL);
 		SIGDELSET(p->p_sigcatch, SIGILL);
 		SIGDELSET(p->p_sigmask, SIGILL);
 		PROC_UNLOCK(p);
 		psignal(p, SIGILL);
 		return;
 	}
 
 	/* Translate the signal if appropriate. */
 	if (p->p_sysent->sv_sigtbl && sig <= p->p_sysent->sv_sigsize)
 		sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
 
 	/* Build the argument list for the signal handler. */
 	sf.sf_signum = sig;
 	sf.sf_ucontext = (register_t)&sfp->sf_uc;
 	PROC_LOCK(p);
 	if (SIGISMEMBER(p->p_sigacts->ps_siginfo, sig)) {
 		/* Signal handler installed with SA_SIGINFO. */
 		sf.sf_siginfo = (register_t)&sfp->sf_si;
 		sf.sf_ahu.sf_action = (__siginfohandler_t *)catcher;
 
 		/* Fill siginfo structure. */
 		sf.sf_si.si_signo = sig;
 		sf.sf_si.si_code = code;
 		sf.sf_si.si_addr = (void *)regs->tf_err;
 	} else {
 		/* Old FreeBSD-style arguments. */
 		sf.sf_siginfo = code;
 		sf.sf_addr = regs->tf_err;
 		sf.sf_ahu.sf_handler = catcher;
 	}
 	PROC_UNLOCK(p);
 
 	/*
 	 * If we're a vm86 process, we want to save the segment registers.
 	 * We also change eflags to be our emulated eflags, not the actual
 	 * eflags.
 	 */
 	if (regs->tf_eflags & PSL_VM) {
 		struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs;
 		struct vm86_kernel *vm86 = &p->p_addr->u_pcb.pcb_ext->ext_vm86;
 
 		sf.sf_uc.uc_mcontext.mc_gs = tf->tf_vm86_gs;
 		sf.sf_uc.uc_mcontext.mc_fs = tf->tf_vm86_fs;
 		sf.sf_uc.uc_mcontext.mc_es = tf->tf_vm86_es;
 		sf.sf_uc.uc_mcontext.mc_ds = tf->tf_vm86_ds;
 
 		if (vm86->vm86_has_vme == 0)
 			sf.sf_uc.uc_mcontext.mc_eflags =
 			    (tf->tf_eflags & ~(PSL_VIF | PSL_VIP)) |
 			    (vm86->vm86_eflags & (PSL_VIF | PSL_VIP));
 
 		/*
 		 * We should never have PSL_T set when returning from vm86
 		 * mode.  It may be set here if we deliver a signal before
 		 * getting to vm86 mode, so turn it off.
 		 *
 		 * Clear PSL_NT to inhibit T_TSSFLT faults on return from
 		 * syscalls made by the signal handler.  This just avoids
 		 * wasting time for our lazy fixup of such faults.  PSL_NT
 		 * does nothing in vm86 mode, but vm86 programs can set it
 		 * almost legitimately in probes for old cpu types.
 		 */
 		tf->tf_eflags &= ~(PSL_VM | PSL_NT | PSL_T | PSL_VIF | PSL_VIP);
 	}
 
 	/* Copy the sigframe out to the user's stack. */
 	if (copyout(&sf, sfp, sizeof(*sfp)) != 0) {
 		/*
 		 * Something is wrong with the stack pointer.
 		 * ...Kill the process.
 		 */
 		sigexit(p, SIGILL);
 	}
 
 	regs->tf_esp = (int)sfp;
 	regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode);
 	regs->tf_cs = _ucodesel;
 	regs->tf_ds = _udatasel;
 	regs->tf_es = _udatasel;
 	regs->tf_fs = _udatasel;
 	load_gs(_udatasel);
 	regs->tf_ss = _udatasel;
 }
 
 /*
  * System call to cleanup state after a signal
  * has been taken.  Reset signal mask and
  * stack state from context left by sendsig (above).
  * Return to previous pc and psl as specified by
  * context left by sendsig. Check carefully to
  * make sure that the user has not modified the
  * state to gain improper privileges.
  */
 int
 osigreturn(p, uap)
 	struct proc *p;
 	struct osigreturn_args /* {
 		struct osigcontext *sigcntxp;
 	} */ *uap;
 {
 	struct trapframe *regs;
 	struct osigcontext *scp;
 	int eflags;
 
 	regs = p->p_md.md_regs;
 	scp = uap->sigcntxp;
 	if (!useracc((caddr_t)scp, sizeof(*scp), VM_PROT_READ))
 		return (EFAULT);
 	eflags = scp->sc_ps;
 	if (eflags & PSL_VM) {
 		struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs;
 		struct vm86_kernel *vm86;
 
 		/*
 		 * if pcb_ext == 0 or vm86_inited == 0, the user hasn't
 		 * set up the vm86 area, and we can't enter vm86 mode.
 		 */
 		if (p->p_addr->u_pcb.pcb_ext == 0)
 			return (EINVAL);
 		vm86 = &p->p_addr->u_pcb.pcb_ext->ext_vm86;
 		if (vm86->vm86_inited == 0)
 			return (EINVAL);
 
 		/* Go back to user mode if both flags are set. */
 		if ((eflags & PSL_VIP) && (eflags & PSL_VIF))
 			trapsignal(p, SIGBUS, 0);
 
 		if (vm86->vm86_has_vme) {
 			eflags = (tf->tf_eflags & ~VME_USERCHANGE) |
 			    (eflags & VME_USERCHANGE) | PSL_VM;
 		} else {
 			vm86->vm86_eflags = eflags;	/* save VIF, VIP */
 			eflags = (tf->tf_eflags & ~VM_USERCHANGE) |					    (eflags & VM_USERCHANGE) | PSL_VM;
 		}
 		tf->tf_vm86_ds = scp->sc_ds;
 		tf->tf_vm86_es = scp->sc_es;
 		tf->tf_vm86_fs = scp->sc_fs;
 		tf->tf_vm86_gs = scp->sc_gs;
 		tf->tf_ds = _udatasel;
 		tf->tf_es = _udatasel;
 		tf->tf_fs = _udatasel;
 	} else {
 		/*
 		 * Don't allow users to change privileged or reserved flags.
 		 */
 		/*
 		 * XXX do allow users to change the privileged flag PSL_RF.
 		 * The cpu sets PSL_RF in tf_eflags for faults.  Debuggers
 		 * should sometimes set it there too.  tf_eflags is kept in
 		 * the signal context during signal handling and there is no
 		 * other place to remember it, so the PSL_RF bit may be
 		 * corrupted by the signal handler without us knowing.
 		 * Corruption of the PSL_RF bit at worst causes one more or
 		 * one less debugger trap, so allowing it is fairly harmless.
 		 */
 		if (!EFL_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) {
 	    		return (EINVAL);
 		}
 
 		/*
 		 * Don't allow users to load a valid privileged %cs.  Let the
 		 * hardware check for invalid selectors, excess privilege in
 		 * other selectors, invalid %eip's and invalid %esp's.
 		 */
 		if (!CS_SECURE(scp->sc_cs)) {
 			trapsignal(p, SIGBUS, T_PROTFLT);
 			return (EINVAL);
 		}
 		regs->tf_ds = scp->sc_ds;
 		regs->tf_es = scp->sc_es;
 		regs->tf_fs = scp->sc_fs;
 	}
 
 	/* Restore remaining registers. */
 	regs->tf_eax = scp->sc_eax;
 	regs->tf_ebx = scp->sc_ebx;
 	regs->tf_ecx = scp->sc_ecx;
 	regs->tf_edx = scp->sc_edx;
 	regs->tf_esi = scp->sc_esi;
 	regs->tf_edi = scp->sc_edi;
 	regs->tf_cs = scp->sc_cs;
 	regs->tf_ss = scp->sc_ss;
 	regs->tf_isp = scp->sc_isp;
 
 	PROC_LOCK(p);
 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
 	if (scp->sc_onstack & 1)
 		p->p_sigstk.ss_flags |= SS_ONSTACK;
 	else
 		p->p_sigstk.ss_flags &= ~SS_ONSTACK;
 #endif
 
 	SIGSETOLD(p->p_sigmask, scp->sc_mask);
 	SIG_CANTMASK(p->p_sigmask);
 	PROC_UNLOCK(p);
 	regs->tf_ebp = scp->sc_fp;
 	regs->tf_esp = scp->sc_sp;
 	regs->tf_eip = scp->sc_pc;
 	regs->tf_eflags = eflags;
 	return (EJUSTRETURN);
 }
 
 int
 sigreturn(p, uap)
 	struct proc *p;
 	struct sigreturn_args /* {
 		ucontext_t *sigcntxp;
 	} */ *uap;
 {
 	struct trapframe *regs;
 	ucontext_t *ucp;
 	int cs, eflags;
 
 	ucp = uap->sigcntxp;
 	if (!useracc((caddr_t)ucp, sizeof(struct osigcontext), VM_PROT_READ))
 		return (EFAULT);
 	if (((struct osigcontext *)ucp)->sc_trapno == 0x01d516)
 		return (osigreturn(p, (struct osigreturn_args *)uap));
 
 	/*
 	 * Since ucp is not an osigcontext but a ucontext_t, we have to
 	 * check again if all of it is accessible.  A ucontext_t is
 	 * much larger, so instead of just checking for the pointer
 	 * being valid for the size of an osigcontext, now check for
 	 * it being valid for a whole, new-style ucontext_t.
 	 */
 	if (!useracc((caddr_t)ucp, sizeof(*ucp), VM_PROT_READ))
 		return (EFAULT);
 
 	regs = p->p_md.md_regs;
 	eflags = ucp->uc_mcontext.mc_eflags;
 	if (eflags & PSL_VM) {
 		struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs;
 		struct vm86_kernel *vm86;
 
 		/*
 		 * if pcb_ext == 0 or vm86_inited == 0, the user hasn't
 		 * set up the vm86 area, and we can't enter vm86 mode.
 		 */
 		if (p->p_addr->u_pcb.pcb_ext == 0)
 			return (EINVAL);
 		vm86 = &p->p_addr->u_pcb.pcb_ext->ext_vm86;
 		if (vm86->vm86_inited == 0)
 			return (EINVAL);
 
 		/* Go back to user mode if both flags are set. */
 		if ((eflags & PSL_VIP) && (eflags & PSL_VIF))
 			trapsignal(p, SIGBUS, 0);
 
 		if (vm86->vm86_has_vme) {
 			eflags = (tf->tf_eflags & ~VME_USERCHANGE) |
 			    (eflags & VME_USERCHANGE) | PSL_VM;
 		} else {
 			vm86->vm86_eflags = eflags;	/* save VIF, VIP */
 			eflags = (tf->tf_eflags & ~VM_USERCHANGE) |					    (eflags & VM_USERCHANGE) | PSL_VM;
 		}
 		bcopy(&ucp->uc_mcontext.mc_fs, tf, sizeof(struct trapframe));
 		tf->tf_eflags = eflags;
 		tf->tf_vm86_ds = tf->tf_ds;
 		tf->tf_vm86_es = tf->tf_es;
 		tf->tf_vm86_fs = tf->tf_fs;
 		tf->tf_vm86_gs = ucp->uc_mcontext.mc_gs;
 		tf->tf_ds = _udatasel;
 		tf->tf_es = _udatasel;
 		tf->tf_fs = _udatasel;
 	} else {
 		/*
 		 * Don't allow users to change privileged or reserved flags.
 		 */
 		/*
 		 * XXX do allow users to change the privileged flag PSL_RF.
 		 * The cpu sets PSL_RF in tf_eflags for faults.  Debuggers
 		 * should sometimes set it there too.  tf_eflags is kept in
 		 * the signal context during signal handling and there is no
 		 * other place to remember it, so the PSL_RF bit may be
 		 * corrupted by the signal handler without us knowing.
 		 * Corruption of the PSL_RF bit at worst causes one more or
 		 * one less debugger trap, so allowing it is fairly harmless.
 		 */
 		if (!EFL_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) {
 			printf("sigreturn: eflags = 0x%x\n", eflags);
 	    		return (EINVAL);
 		}
 
 		/*
 		 * Don't allow users to load a valid privileged %cs.  Let the
 		 * hardware check for invalid selectors, excess privilege in
 		 * other selectors, invalid %eip's and invalid %esp's.
 		 */
 		cs = ucp->uc_mcontext.mc_cs;
 		if (!CS_SECURE(cs)) {
 			printf("sigreturn: cs = 0x%x\n", cs);
 			trapsignal(p, SIGBUS, T_PROTFLT);
 			return (EINVAL);
 		}
 
 		bcopy(&ucp->uc_mcontext.mc_fs, regs, sizeof(*regs));
 	}
 
 	PROC_LOCK(p);
 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
 	if (ucp->uc_mcontext.mc_onstack & 1)
 		p->p_sigstk.ss_flags |= SS_ONSTACK;
 	else
 		p->p_sigstk.ss_flags &= ~SS_ONSTACK;
 #endif
 
 	p->p_sigmask = ucp->uc_sigmask;
 	SIG_CANTMASK(p->p_sigmask);
 	PROC_UNLOCK(p);
 	return (EJUSTRETURN);
 }
 
 /*
  * Machine dependent boot() routine
  *
  * I haven't seen anything to put here yet
  * Possibly some stuff might be grafted back here from boot()
  */
 void
 cpu_boot(int howto)
 {
 }
 
 /*
  * Shutdown the CPU as much as possible
  */
 void
 cpu_halt(void)
 {
 	for (;;)
 		__asm__ ("hlt");
 }
 
 /*
  * Hook to idle the CPU when possible.  This currently only works in
  * the !SMP case, as there is no clean way to ensure that a CPU will be
  * woken when there is work available for it.
  */
 static int	cpu_idle_hlt = 1;
 SYSCTL_INT(_machdep, OID_AUTO, cpu_idle_hlt, CTLFLAG_RW,
     &cpu_idle_hlt, 0, "Idle loop HLT enable");
 
 /*
  * Note that we have to be careful here to avoid a race between checking
  * procrunnable() and actually halting.  If we don't do this, we may waste
  * the time between calling hlt and the next interrupt even though there
  * is a runnable process.
  */
 void
 cpu_idle(void)
 {
 #ifndef SMP
 	if (cpu_idle_hlt) {
 		disable_intr();
   		if (procrunnable())
 			enable_intr();
 		else {
 			enable_intr();
 			__asm __volatile("hlt");
 		}
 	}
 #endif
 }
 
 /*
  * Clear registers on exec
  */
 void
 setregs(p, entry, stack, ps_strings)
 	struct proc *p;
 	u_long entry;
 	u_long stack;
 	u_long ps_strings;
 {
 	struct trapframe *regs = p->p_md.md_regs;
 	struct pcb *pcb = &p->p_addr->u_pcb;
 
 #ifdef USER_LDT
 	/* was i386_user_cleanup() in NetBSD */
 	user_ldt_free(pcb);
 #endif
   
 	bzero((char *)regs, sizeof(struct trapframe));
 	regs->tf_eip = entry;
 	regs->tf_esp = stack;
 	regs->tf_eflags = PSL_USER | (regs->tf_eflags & PSL_T);
 	regs->tf_ss = _udatasel;
 	regs->tf_ds = _udatasel;
 	regs->tf_es = _udatasel;
 	regs->tf_fs = _udatasel;
 	regs->tf_cs = _ucodesel;
 
 	/* PS_STRINGS value for BSD/OS binaries.  It is 0 for non-BSD/OS. */
 	regs->tf_ebx = ps_strings;
 
 	/* reset %gs as well */
 	if (pcb == PCPU_GET(curpcb))
 		load_gs(_udatasel);
 	else
 		pcb->pcb_gs = _udatasel;
 
         /*
          * Reset the hardware debug registers if they were in use.
          * They won't have any meaning for the newly exec'd process.  
          */
         if (pcb->pcb_flags & PCB_DBREGS) {
                 pcb->pcb_dr0 = 0;
                 pcb->pcb_dr1 = 0;
                 pcb->pcb_dr2 = 0;
                 pcb->pcb_dr3 = 0;
                 pcb->pcb_dr6 = 0;
                 pcb->pcb_dr7 = 0;
                 if (pcb == PCPU_GET(curpcb)) {
 		        /*
 			 * Clear the debug registers on the running
 			 * CPU, otherwise they will end up affecting
 			 * the next process we switch to.
 			 */
 		        reset_dbregs();
                 }
                 pcb->pcb_flags &= ~PCB_DBREGS;
         }
 
 	/*
 	 * Initialize the math emulator (if any) for the current process.
 	 * Actually, just clear the bit that says that the emulator has
 	 * been initialized.  Initialization is delayed until the process
 	 * traps to the emulator (if it is done at all) mainly because
 	 * emulators don't provide an entry point for initialization.
 	 */
 	p->p_addr->u_pcb.pcb_flags &= ~FP_SOFTFP;
 
 	/*
 	 * Arrange to trap the next npx or `fwait' instruction (see npx.c
 	 * for why fwait must be trapped at least if there is an npx or an
 	 * emulator).  This is mainly to handle the case where npx0 is not
 	 * configured, since the npx routines normally set up the trap
 	 * otherwise.  It should be done only at boot time, but doing it
 	 * here allows modifying `npx_exists' for testing the emulator on
 	 * systems with an npx.
 	 */
 	load_cr0(rcr0() | CR0_MP | CR0_TS);
 
 #ifdef DEV_NPX
 	/* Initialize the npx (if any) for the current process. */
 	npxinit(__INITIAL_NPXCW__);
 #endif
 
       /*
        * XXX - Linux emulator
        * Make sure sure edx is 0x0 on entry. Linux binaries depend
        * on it.
        */
       p->p_retval[1] = 0;
 }
 
 void
 cpu_setregs(void)
 {
 	unsigned int cr0;
 
 	cr0 = rcr0();
 	cr0 |= CR0_NE;			/* Done by npxinit() */
 	cr0 |= CR0_MP | CR0_TS;		/* Done at every execve() too. */
 #ifndef I386_CPU
 	cr0 |= CR0_WP | CR0_AM;
 #endif
 	load_cr0(cr0);
 	load_gs(_udatasel);
 }
 
 static int
 sysctl_machdep_adjkerntz(SYSCTL_HANDLER_ARGS)
 {
 	int error;
 	error = sysctl_handle_int(oidp, oidp->oid_arg1, oidp->oid_arg2,
 		req);
 	if (!error && req->newptr)
 		resettodr();
 	return (error);
 }
 
 SYSCTL_PROC(_machdep, CPU_ADJKERNTZ, adjkerntz, CTLTYPE_INT|CTLFLAG_RW,
 	&adjkerntz, 0, sysctl_machdep_adjkerntz, "I", "");
 
 SYSCTL_INT(_machdep, CPU_DISRTCSET, disable_rtc_set,
 	CTLFLAG_RW, &disable_rtc_set, 0, "");
 
 SYSCTL_STRUCT(_machdep, CPU_BOOTINFO, bootinfo, 
 	CTLFLAG_RD, &bootinfo, bootinfo, "");
 
 SYSCTL_INT(_machdep, CPU_WALLCLOCK, wall_cmos_clock,
 	CTLFLAG_RW, &wall_cmos_clock, 0, "");
 
 /*
  * Initialize 386 and configure to run kernel
  */
 
 /*
  * Initialize segments & interrupt table
  */
 
 int _default_ldt;
 union descriptor gdt[NGDT * MAXCPU];	/* global descriptor table */
 static struct gate_descriptor idt0[NIDT];
 struct gate_descriptor *idt = &idt0[0];	/* interrupt descriptor table */
 union descriptor ldt[NLDT];		/* local descriptor table */
 #ifdef SMP
 /* table descriptors - used to load tables by microp */
 struct region_descriptor r_gdt, r_idt;
 #endif
 
 int private_tss;			/* flag indicating private tss */
 
 #if defined(I586_CPU) && !defined(NO_F00F_HACK)
 extern int has_f00f_bug;
 #endif
 
 static struct i386tss dblfault_tss;
 static char dblfault_stack[PAGE_SIZE];
 
 extern  struct user *proc0paddr;
 
 
 /* software prototypes -- in more palatable form */
 struct soft_segment_descriptor gdt_segs[] = {
 /* GNULL_SEL	0 Null Descriptor */
 {	0x0,			/* segment base address  */
 	0x0,			/* length */
 	0,			/* segment type */
 	0,			/* segment descriptor priority level */
 	0,			/* segment descriptor present */
 	0, 0,
 	0,			/* default 32 vs 16 bit size */
 	0  			/* limit granularity (byte/page units)*/ },
 /* GCODE_SEL	1 Code Descriptor for kernel */
 {	0x0,			/* segment base address  */
 	0xfffff,		/* length - all address space */
 	SDT_MEMERA,		/* segment type */
 	0,			/* segment descriptor priority level */
 	1,			/* segment descriptor present */
 	0, 0,
 	1,			/* default 32 vs 16 bit size */
 	1  			/* limit granularity (byte/page units)*/ },
 /* GDATA_SEL	2 Data Descriptor for kernel */
 {	0x0,			/* segment base address  */
 	0xfffff,		/* length - all address space */
 	SDT_MEMRWA,		/* segment type */
 	0,			/* segment descriptor priority level */
 	1,			/* segment descriptor present */
 	0, 0,
 	1,			/* default 32 vs 16 bit size */
 	1  			/* limit granularity (byte/page units)*/ },
 /* GPRIV_SEL	3 SMP Per-Processor Private Data Descriptor */
 {	0x0,			/* segment base address  */
 	0xfffff,		/* length - all address space */
 	SDT_MEMRWA,		/* segment type */
 	0,			/* segment descriptor priority level */
 	1,			/* segment descriptor present */
 	0, 0,
 	1,			/* default 32 vs 16 bit size */
 	1  			/* limit granularity (byte/page units)*/ },
 /* GPROC0_SEL	4 Proc 0 Tss Descriptor */
 {
 	0x0,			/* segment base address */
 	sizeof(struct i386tss)-1,/* length - all address space */
 	SDT_SYS386TSS,		/* segment type */
 	0,			/* segment descriptor priority level */
 	1,			/* segment descriptor present */
 	0, 0,
 	0,			/* unused - default 32 vs 16 bit size */
 	0  			/* limit granularity (byte/page units)*/ },
 /* GLDT_SEL	5 LDT Descriptor */
 {	(int) ldt,		/* segment base address  */
 	sizeof(ldt)-1,		/* length - all address space */
 	SDT_SYSLDT,		/* segment type */
 	SEL_UPL,		/* segment descriptor priority level */
 	1,			/* segment descriptor present */
 	0, 0,
 	0,			/* unused - default 32 vs 16 bit size */
 	0  			/* limit granularity (byte/page units)*/ },
 /* GUSERLDT_SEL	6 User LDT Descriptor per process */
 {	(int) ldt,		/* segment base address  */
 	(512 * sizeof(union descriptor)-1),		/* length */
 	SDT_SYSLDT,		/* segment type */
 	0,			/* segment descriptor priority level */
 	1,			/* segment descriptor present */
 	0, 0,
 	0,			/* unused - default 32 vs 16 bit size */
 	0  			/* limit granularity (byte/page units)*/ },
 /* GTGATE_SEL	7 Null Descriptor - Placeholder */
 {	0x0,			/* segment base address  */
 	0x0,			/* length - all address space */
 	0,			/* segment type */
 	0,			/* segment descriptor priority level */
 	0,			/* segment descriptor present */
 	0, 0,
 	0,			/* default 32 vs 16 bit size */
 	0  			/* limit granularity (byte/page units)*/ },
 /* GBIOSLOWMEM_SEL 8 BIOS access to realmode segment 0x40, must be #8 in GDT */
 {	0x400,			/* segment base address */
 	0xfffff,		/* length */
 	SDT_MEMRWA,		/* segment type */
 	0,			/* segment descriptor priority level */
 	1,			/* segment descriptor present */
 	0, 0,
 	1,			/* default 32 vs 16 bit size */
 	1  			/* limit granularity (byte/page units)*/ },
 /* GPANIC_SEL	9 Panic Tss Descriptor */
 {	(int) &dblfault_tss,	/* segment base address  */
 	sizeof(struct i386tss)-1,/* length - all address space */
 	SDT_SYS386TSS,		/* segment type */
 	0,			/* segment descriptor priority level */
 	1,			/* segment descriptor present */
 	0, 0,
 	0,			/* unused - default 32 vs 16 bit size */
 	0  			/* limit granularity (byte/page units)*/ },
 /* GBIOSCODE32_SEL 10 BIOS 32-bit interface (32bit Code) */
 {	0,			/* segment base address (overwritten)  */
 	0xfffff,		/* length */
 	SDT_MEMERA,		/* segment type */
 	0,			/* segment descriptor priority level */
 	1,			/* segment descriptor present */
 	0, 0,
 	0,			/* default 32 vs 16 bit size */
 	1  			/* limit granularity (byte/page units)*/ },
 /* GBIOSCODE16_SEL 11 BIOS 32-bit interface (16bit Code) */
 {	0,			/* segment base address (overwritten)  */
 	0xfffff,		/* length */
 	SDT_MEMERA,		/* segment type */
 	0,			/* segment descriptor priority level */
 	1,			/* segment descriptor present */
 	0, 0,
 	0,			/* default 32 vs 16 bit size */
 	1  			/* limit granularity (byte/page units)*/ },
 /* GBIOSDATA_SEL 12 BIOS 32-bit interface (Data) */
 {	0,			/* segment base address (overwritten) */
 	0xfffff,		/* length */
 	SDT_MEMRWA,		/* segment type */
 	0,			/* segment descriptor priority level */
 	1,			/* segment descriptor present */
 	0, 0,
 	1,			/* default 32 vs 16 bit size */
 	1  			/* limit granularity (byte/page units)*/ },
 /* GBIOSUTIL_SEL 13 BIOS 16-bit interface (Utility) */
 {	0,			/* segment base address (overwritten) */
 	0xfffff,		/* length */
 	SDT_MEMRWA,		/* segment type */
 	0,			/* segment descriptor priority level */
 	1,			/* segment descriptor present */
 	0, 0,
 	0,			/* default 32 vs 16 bit size */
 	1  			/* limit granularity (byte/page units)*/ },
 /* GBIOSARGS_SEL 14 BIOS 16-bit interface (Arguments) */
 {	0,			/* segment base address (overwritten) */
 	0xfffff,		/* length */
 	SDT_MEMRWA,		/* segment type */
 	0,			/* segment descriptor priority level */
 	1,			/* segment descriptor present */
 	0, 0,
 	0,			/* default 32 vs 16 bit size */
 	1  			/* limit granularity (byte/page units)*/ },
 };
 
 static struct soft_segment_descriptor ldt_segs[] = {
 	/* Null Descriptor - overwritten by call gate */
 {	0x0,			/* segment base address  */
 	0x0,			/* length - all address space */
 	0,			/* segment type */
 	0,			/* segment descriptor priority level */
 	0,			/* segment descriptor present */
 	0, 0,
 	0,			/* default 32 vs 16 bit size */
 	0  			/* limit granularity (byte/page units)*/ },
 	/* Null Descriptor - overwritten by call gate */
 {	0x0,			/* segment base address  */
 	0x0,			/* length - all address space */
 	0,			/* segment type */
 	0,			/* segment descriptor priority level */
 	0,			/* segment descriptor present */
 	0, 0,
 	0,			/* default 32 vs 16 bit size */
 	0  			/* limit granularity (byte/page units)*/ },
 	/* Null Descriptor - overwritten by call gate */
 {	0x0,			/* segment base address  */
 	0x0,			/* length - all address space */
 	0,			/* segment type */
 	0,			/* segment descriptor priority level */
 	0,			/* segment descriptor present */
 	0, 0,
 	0,			/* default 32 vs 16 bit size */
 	0  			/* limit granularity (byte/page units)*/ },
 	/* Code Descriptor for user */
 {	0x0,			/* segment base address  */
 	0xfffff,		/* length - all address space */
 	SDT_MEMERA,		/* segment type */
 	SEL_UPL,		/* segment descriptor priority level */
 	1,			/* segment descriptor present */
 	0, 0,
 	1,			/* default 32 vs 16 bit size */
 	1  			/* limit granularity (byte/page units)*/ },
 	/* Null Descriptor - overwritten by call gate */
 {	0x0,			/* segment base address  */
 	0x0,			/* length - all address space */
 	0,			/* segment type */
 	0,			/* segment descriptor priority level */
 	0,			/* segment descriptor present */
 	0, 0,
 	0,			/* default 32 vs 16 bit size */
 	0  			/* limit granularity (byte/page units)*/ },
 	/* Data Descriptor for user */
 {	0x0,			/* segment base address  */
 	0xfffff,		/* length - all address space */
 	SDT_MEMRWA,		/* segment type */
 	SEL_UPL,		/* segment descriptor priority level */
 	1,			/* segment descriptor present */
 	0, 0,
 	1,			/* default 32 vs 16 bit size */
 	1  			/* limit granularity (byte/page units)*/ },
 };
 
 void
 setidt(idx, func, typ, dpl, selec)
 	int idx;
 	inthand_t *func;
 	int typ;
 	int dpl;
 	int selec;
 {
 	struct gate_descriptor *ip;
 
 	ip = idt + idx;
 	ip->gd_looffset = (int)func;
 	ip->gd_selector = selec;
 	ip->gd_stkcpy = 0;
 	ip->gd_xx = 0;
 	ip->gd_type = typ;
 	ip->gd_dpl = dpl;
 	ip->gd_p = 1;
 	ip->gd_hioffset = ((int)func)>>16 ;
 }
 
 #define	IDTVEC(name)	__CONCAT(X,name)
 
 extern inthand_t
 	IDTVEC(div), IDTVEC(dbg), IDTVEC(nmi), IDTVEC(bpt), IDTVEC(ofl),
 	IDTVEC(bnd), IDTVEC(ill), IDTVEC(dna), IDTVEC(fpusegm),
 	IDTVEC(tss), IDTVEC(missing), IDTVEC(stk), IDTVEC(prot),
 	IDTVEC(page), IDTVEC(mchk), IDTVEC(rsvd), IDTVEC(fpu), IDTVEC(align),
 	IDTVEC(syscall), IDTVEC(int0x80_syscall);
 
 void
 sdtossd(sd, ssd)
 	struct segment_descriptor *sd;
 	struct soft_segment_descriptor *ssd;
 {
 	ssd->ssd_base  = (sd->sd_hibase << 24) | sd->sd_lobase;
 	ssd->ssd_limit = (sd->sd_hilimit << 16) | sd->sd_lolimit;
 	ssd->ssd_type  = sd->sd_type;
 	ssd->ssd_dpl   = sd->sd_dpl;
 	ssd->ssd_p     = sd->sd_p;
 	ssd->ssd_def32 = sd->sd_def32;
 	ssd->ssd_gran  = sd->sd_gran;
 }
 
 #define PHYSMAP_SIZE	(2 * 8)
 
 /*
  * Populate the (physmap) array with base/bound pairs describing the
  * available physical memory in the system, then test this memory and
  * build the phys_avail array describing the actually-available memory.
  *
  * If we cannot accurately determine the physical memory map, then use
  * value from the 0xE801 call, and failing that, the RTC.
  *
  * Total memory size may be set by the kernel environment variable
  * hw.physmem or the compile-time define MAXMEM.
  */
 #ifdef PC98
 static void
 getmemsize(int first)
 {
 	u_int	biosbasemem, biosextmem;
 	u_int	pagesinbase, pagesinext;
 	int	pa_indx;
 	int	pg_n;
 	int	speculative_mprobe;
 #ifdef DEV_NPX
 	int	msize;
 #endif
 	unsigned	under16;
 	vm_offset_t	target_page;
 
 	pc98_getmemsize(&biosbasemem, &biosextmem, &under16);
 
 #ifdef SMP
 	/* make hole for AP bootstrap code */
 	pagesinbase = mp_bootaddress(biosbasemem) / PAGE_SIZE;
 #else
 	pagesinbase = biosbasemem * 1024 / PAGE_SIZE;
 #endif
 	pagesinext = biosextmem * 1024 / PAGE_SIZE;
 
  	Maxmem_under16M = under16 * 1024 / PAGE_SIZE;
 
 #ifndef MAXMEM
 	/*
 	 * Maxmem isn't the "maximum memory", it's one larger than the
 	 * highest page of the physical address space.  It should be
 	 * called something like "Maxphyspage".
 	 */
 	Maxmem = pagesinext + 0x100000/PAGE_SIZE;
 	/*
 	 * Indicate that we wish to do a speculative search for memory beyond
 	 * the end of the reported size if the indicated amount is 64MB (0x4000
 	 * pages) - which is the largest amount that the BIOS/bootblocks can
 	 * currently report. If a specific amount of memory is indicated via
 	 * the MAXMEM option or the npx0 "msize", then don't do the speculative
 	 * memory probe.
 	 */
 	if (Maxmem >= 0x4000)
 		speculative_mprobe = TRUE;
 	else
 		speculative_mprobe = FALSE;
 #else
 	Maxmem = MAXMEM/4;
 	speculative_mprobe = FALSE;
 #endif
 
 #ifdef DEV_NPX
 	if (resource_int_value("npx", 0, "msize", &msize) == 0) {
 		if (msize != 0) {
 			Maxmem = msize / 4;
 			speculative_mprobe = FALSE;
 		}
 	}
 #endif
 
 #ifdef SMP
 	/* look for the MP hardware - needed for apic addresses */
 	mp_probe();
 #endif
 
 	/* call pmap initialization to make new kernel address space */
 	pmap_bootstrap (first, 0);
 
 	/*
 	 * Size up each available chunk of physical memory.
 	 */
 
 	/*
 	 * We currently don't bother testing base memory.
 	 * XXX  ...but we probably should.
 	 */
 	pa_indx = 0;
 	if (pagesinbase > 1) {
 		phys_avail[pa_indx++] = PAGE_SIZE;	/* skip first page of memory */
 		phys_avail[pa_indx] = ptoa(pagesinbase);/* memory up to the ISA hole */
 		physmem = pagesinbase - 1;
 	} else {
 		/* point at first chunk end */
 		pa_indx++;
 	}
 
 	/* XXX - some of EPSON machines can't use PG_N */
 	pg_n = PG_N;
 	if (pc98_machine_type & M_EPSON_PC98) {
 		switch (epson_machine_id) {
 #ifdef WB_CACHE
 		default:
 #endif
 		case 0x34:		/* PC-486HX */
 		case 0x35:		/* PC-486HG */
 		case 0x3B:		/* PC-486HA */
 			pg_n = 0;
 			break;
 		}
 	}
 
 	speculative_mprobe = FALSE;
 #ifdef notdef	/* XXX - see below */
 	/*
 	 * Certain 'CPU accelerator' supports over 16MB memory on the machines
 	 * whose BIOS doesn't store true size.  
 	 * To support this, we don't trust BIOS values if Maxmem <= 16MB (0x1000
 	 * pages) - which is the largest amount that the OLD PC-98 can report.
 	 *
 	 * OK: PC-9801NS/R(9.6M)
 	 * OK: PC-9801DA(5.6M)+EUD-H(32M)+Cyrix 5x86
 	 * OK: PC-9821Ap(14.6M)+EUA-T(8M)+Cyrix 5x86-100
 	 * NG: PC-9821Ap(14.6M)+EUA-T(8M)+AMD DX4-100 -> freeze
 	 */
 	if (Maxmem <= 0x1000) {
 		int tmp, page_bad;
 
 		page_bad = FALSE;
 
 		/*
 		 * For Max14.6MB machines, the 0x10f0 page is same as 0x00f0,
 		 * which is BIOS ROM, by overlapping.
 		 * So, we check that page's ability of writing.
 		 */
 		target_page = ptoa(0x10f0);
 
 		/*
 		 * map page into kernel: valid, read/write, non-cacheable
 		 */
 		*(int *)CMAP1 = PG_V | PG_RW | pg_n | target_page;
 		invltlb();
 
 		tmp = *(int *)CADDR1;
 		/*
 		 * Test for alternating 1's and 0's
 		 */
 		*(volatile int *)CADDR1 = 0xaaaaaaaa;
 		if (*(volatile int *)CADDR1 != 0xaaaaaaaa)
 			page_bad = TRUE;
 		/*
 		 * Test for alternating 0's and 1's
 		 */
 		*(volatile int *)CADDR1 = 0x55555555;
 		if (*(volatile int *)CADDR1 != 0x55555555)
 			page_bad = TRUE;
 		/*
 		 * Test for all 1's
 		 */
 		*(volatile int *)CADDR1 = 0xffffffff;
 		if (*(volatile int *)CADDR1 != 0xffffffff)
 			page_bad = TRUE;
 		/*
 		 * Test for all 0's
 		 */
 		*(volatile int *)CADDR1 = 0x0;
 		if (*(volatile int *)CADDR1 != 0x0) {
 			/*
 			 * test of page failed
 			 */
 			page_bad = TRUE;
 		}
 		/*
 		 * Restore original value.
 		 */
 		*(int *)CADDR1 = tmp;
 
 		/*
 		 * Adjust Maxmem if valid/good page.
 		 */
 		if (page_bad == FALSE) {
 			/* '+ 2' is needed to make speculative_mprobe sure */
 			Maxmem = 0x1000 + 2;
 			speculative_mprobe = TRUE;
 		}
 	}
 #endif
 
 	for (target_page = avail_start; target_page < ptoa(Maxmem); target_page += PAGE_SIZE) {
 		int tmp, page_bad;
 
 		page_bad = FALSE;
 
 		/* skip system area */
 		if (target_page >= ptoa(Maxmem_under16M) &&
 				target_page < ptoa(4096))
 			continue;
 
 		/*
 		 * map page into kernel: valid, read/write, non-cacheable
 		 */
 		*(int *)CMAP1 = PG_V | PG_RW | pg_n | target_page;
 		invltlb();
 
 		tmp = *(int *)CADDR1;
 		/*
 		 * Test for alternating 1's and 0's
 		 */
 		*(volatile int *)CADDR1 = 0xaaaaaaaa;
 		if (*(volatile int *)CADDR1 != 0xaaaaaaaa) {
 			page_bad = TRUE;
 		}
 		/*
 		 * Test for alternating 0's and 1's
 		 */
 		*(volatile int *)CADDR1 = 0x55555555;
 		if (*(volatile int *)CADDR1 != 0x55555555) {
 			page_bad = TRUE;
 		}
 		/*
 		 * Test for all 1's
 		 */
 		*(volatile int *)CADDR1 = 0xffffffff;
 		if (*(volatile int *)CADDR1 != 0xffffffff) {
 			page_bad = TRUE;
 		}
 		/*
 		 * Test for all 0's
 		 */
 		*(volatile int *)CADDR1 = 0x0;
 		if (*(volatile int *)CADDR1 != 0x0) {
 			/*
 			 * test of page failed
 			 */
 			page_bad = TRUE;
 		}
 		/*
 		 * Restore original value.
 		 */
 		*(int *)CADDR1 = tmp;
 
 		/*
 		 * Adjust array of valid/good pages.
 		 */
 		if (page_bad == FALSE) {
 			/*
 			 * If this good page is a continuation of the
 			 * previous set of good pages, then just increase
 			 * the end pointer. Otherwise start a new chunk.
 			 * Note that "end" points one higher than end,
 			 * making the range >= start and < end.
 			 * If we're also doing a speculative memory
 			 * test and we at or past the end, bump up Maxmem
 			 * so that we keep going. The first bad page
 			 * will terminate the loop.
 			 */
 			if (phys_avail[pa_indx] == target_page) {
 				phys_avail[pa_indx] += PAGE_SIZE;
 				if (speculative_mprobe == TRUE &&
 				    phys_avail[pa_indx] >= (16*1024*1024))
 					Maxmem++;
 			} else {
 				pa_indx++;
 				if (pa_indx == PHYS_AVAIL_ARRAY_END) {
 					printf("Too many holes in the physical address space, giving up\n");
 					pa_indx--;
 					break;
 				}
 				phys_avail[pa_indx++] = target_page;	/* start */
 				phys_avail[pa_indx] = target_page + PAGE_SIZE;	/* end */
 			}
 			physmem++;
 		}
 	}
 
 	*(int *)CMAP1 = 0;
 	invltlb();
 
 	/*
 	 * XXX
 	 * The last chunk must contain at least one page plus the message
 	 * buffer to avoid complicating other code (message buffer address
 	 * calculation, etc.).
 	 */
 	while (phys_avail[pa_indx - 1] + PAGE_SIZE +
 	    round_page(MSGBUF_SIZE) >= phys_avail[pa_indx]) {
 		physmem -= atop(phys_avail[pa_indx] - phys_avail[pa_indx - 1]);
 		phys_avail[pa_indx--] = 0;
 		phys_avail[pa_indx--] = 0;
 	}
 
 	Maxmem = atop(phys_avail[pa_indx]);
 
 	/* Trim off space for the message buffer. */
 	phys_avail[pa_indx] -= round_page(MSGBUF_SIZE);
 
 	avail_end = phys_avail[pa_indx];
 }
 #else
 static void
 getmemsize(int first)
 {
 	int i, physmap_idx, pa_indx;
 	u_int basemem, extmem;
 	struct vm86frame vmf;
 	struct vm86context vmc;
 	vm_offset_t pa, physmap[PHYSMAP_SIZE];
 	pt_entry_t pte;
 	const char *cp;
 	struct bios_smap *smap;
 
 	bzero(&vmf, sizeof(struct vm86frame));
 	bzero(physmap, sizeof(physmap));
 
 	/*
 	 * Perform "base memory" related probes & setup
 	 */
 	vm86_intcall(0x12, &vmf);
 	basemem = vmf.vmf_ax;
 	if (basemem > 640) {
 		printf("Preposterous BIOS basemem of %uK, truncating to 640K\n",
 			basemem);
 		basemem = 640;
 	}
 
 	/*
 	 * XXX if biosbasemem is now < 640, there is a `hole'
 	 * between the end of base memory and the start of
 	 * ISA memory.  The hole may be empty or it may
 	 * contain BIOS code or data.  Map it read/write so
 	 * that the BIOS can write to it.  (Memory from 0 to
 	 * the physical end of the kernel is mapped read-only
 	 * to begin with and then parts of it are remapped.
 	 * The parts that aren't remapped form holes that
 	 * remain read-only and are unused by the kernel.
 	 * The base memory area is below the physical end of
 	 * the kernel and right now forms a read-only hole.
 	 * The part of it from PAGE_SIZE to
 	 * (trunc_page(biosbasemem * 1024) - 1) will be
 	 * remapped and used by the kernel later.)
 	 *
 	 * This code is similar to the code used in
 	 * pmap_mapdev, but since no memory needs to be
 	 * allocated we simply change the mapping.
 	 */
 	for (pa = trunc_page(basemem * 1024);
 	     pa < ISA_HOLE_START; pa += PAGE_SIZE) {
 		pte = (pt_entry_t)vtopte(pa + KERNBASE);
 		*pte = pa | PG_RW | PG_V;
 	}
 
 	/*
 	 * if basemem != 640, map pages r/w into vm86 page table so 
 	 * that the bios can scribble on it.
 	 */
 	pte = (pt_entry_t)vm86paddr;
 	for (i = basemem / 4; i < 160; i++)
 		pte[i] = (i << PAGE_SHIFT) | PG_V | PG_RW | PG_U;
 
 	/*
 	 * map page 1 R/W into the kernel page table so we can use it
 	 * as a buffer.  The kernel will unmap this page later.
 	 */
 	pte = (pt_entry_t)vtopte(KERNBASE + (1 << PAGE_SHIFT));
 	*pte = (1 << PAGE_SHIFT) | PG_RW | PG_V;
 
 	/*
 	 * get memory map with INT 15:E820
 	 */
 	vmc.npages = 0;
 	smap = (void *)vm86_addpage(&vmc, 1, KERNBASE + (1 << PAGE_SHIFT));
 	vm86_getptr(&vmc, (vm_offset_t)smap, &vmf.vmf_es, &vmf.vmf_di);
 
 	physmap_idx = 0;
 	vmf.vmf_ebx = 0;
 	do {
 		vmf.vmf_eax = 0xE820;
 		vmf.vmf_edx = SMAP_SIG;
 		vmf.vmf_ecx = sizeof(struct bios_smap);
 		i = vm86_datacall(0x15, &vmf, &vmc);
 		if (i || vmf.vmf_eax != SMAP_SIG)
 			break;
 		if (boothowto & RB_VERBOSE)
 			printf("SMAP type=%02x base=%08x %08x len=%08x %08x\n",
 				smap->type,
 				*(u_int32_t *)((char *)&smap->base + 4),
 				(u_int32_t)smap->base,
 				*(u_int32_t *)((char *)&smap->length + 4),
 				(u_int32_t)smap->length);
 
 		if (smap->type != 0x01)
 			goto next_run;
 
 		if (smap->length == 0)
 			goto next_run;
 
 		if (smap->base >= 0xffffffff) {
 			printf("%uK of memory above 4GB ignored\n",
 			    (u_int)(smap->length / 1024));
 			goto next_run;
 		}
 
 		for (i = 0; i <= physmap_idx; i += 2) {
 			if (smap->base < physmap[i + 1]) {
 				if (boothowto & RB_VERBOSE)
 					printf(
 	"Overlapping or non-montonic memory region, ignoring second region\n");
 				goto next_run;
 			}
 		}
 
 		if (smap->base == physmap[physmap_idx + 1]) {
 			physmap[physmap_idx + 1] += smap->length;
 			goto next_run;
 		}
 
 		physmap_idx += 2;
 		if (physmap_idx == PHYSMAP_SIZE) {
 			printf(
 		"Too many segments in the physical address map, giving up\n");
 			break;
 		}
 		physmap[physmap_idx] = smap->base;
 		physmap[physmap_idx + 1] = smap->base + smap->length;
 next_run:
 	} while (vmf.vmf_ebx != 0);
 
 	if (physmap[1] != 0)
 		goto physmap_done;
 
 	/*
 	 * If we failed above, try memory map with INT 15:E801
 	 */
 	vmf.vmf_ax = 0xE801;
 	if (vm86_intcall(0x15, &vmf) == 0) {
 		extmem = vmf.vmf_cx + vmf.vmf_dx * 64;
 	} else {
 #if 0
 		vmf.vmf_ah = 0x88;
 		vm86_intcall(0x15, &vmf);
 		extmem = vmf.vmf_ax;
 #else
 		/*
 		 * Prefer the RTC value for extended memory.
 		 */
 		extmem = rtcin(RTC_EXTLO) + (rtcin(RTC_EXTHI) << 8);
 #endif
 	}
 
 	/*
 	 * Special hack for chipsets that still remap the 384k hole when
 	 * there's 16MB of memory - this really confuses people that
 	 * are trying to use bus mastering ISA controllers with the
 	 * "16MB limit"; they only have 16MB, but the remapping puts
 	 * them beyond the limit.
 	 *
 	 * If extended memory is between 15-16MB (16-17MB phys address range),
 	 *	chop it to 15MB.
 	 */
 	if ((extmem > 15 * 1024) && (extmem < 16 * 1024))
 		extmem = 15 * 1024;
 
 	physmap[0] = 0;
 	physmap[1] = basemem * 1024;
 	physmap_idx = 2;
 	physmap[physmap_idx] = 0x100000;
 	physmap[physmap_idx + 1] = physmap[physmap_idx] + extmem * 1024;
 
 physmap_done:
 	/*
 	 * Now, physmap contains a map of physical memory.
 	 */
 
 #ifdef SMP
 	/* make hole for AP bootstrap code */
 	physmap[1] = mp_bootaddress(physmap[1] / 1024);
 
 	/* look for the MP hardware - needed for apic addresses */
 	mp_probe();
 #endif
 
 	/*
 	 * Maxmem isn't the "maximum memory", it's one larger than the
 	 * highest page of the physical address space.  It should be
 	 * called something like "Maxphyspage".  We may adjust this 
 	 * based on ``hw.physmem'' and the results of the memory test.
 	 */
 	Maxmem = atop(physmap[physmap_idx + 1]);
 
 #ifdef MAXMEM
 	Maxmem = MAXMEM / 4;
 #endif
 
 	/*
 	 * hw.maxmem is a size in bytes; we also allow k, m, and g suffixes
 	 * for the appropriate modifiers.  This overrides MAXMEM.
 	 */
 	if ((cp = getenv("hw.physmem")) != NULL) {
 		u_int64_t AllowMem, sanity;
 		char *ep;
 
 		sanity = AllowMem = strtouq(cp, &ep, 0);
 		if ((ep != cp) && (*ep != 0)) {
 			switch(*ep) {
 			case 'g':
 			case 'G':
 				AllowMem <<= 10;
 			case 'm':
 			case 'M':
 				AllowMem <<= 10;
 			case 'k':
 			case 'K':
 				AllowMem <<= 10;
 				break;
 			default:
 				AllowMem = sanity = 0;
 			}
 			if (AllowMem < sanity)
 				AllowMem = 0;
 		}
 		if (AllowMem == 0)
 			printf("Ignoring invalid memory size of '%s'\n", cp);
 		else
 			Maxmem = atop(AllowMem);
 	}
 
 	if (atop(physmap[physmap_idx + 1]) != Maxmem &&
 	    (boothowto & RB_VERBOSE))
 		printf("Physical memory use set to %uK\n", Maxmem * 4);
 
 	/*
 	 * If Maxmem has been increased beyond what the system has detected,
 	 * extend the last memory segment to the new limit.
 	 */ 
 	if (atop(physmap[physmap_idx + 1]) < Maxmem)
 		physmap[physmap_idx + 1] = ptoa(Maxmem);
 
 	/* call pmap initialization to make new kernel address space */
 	pmap_bootstrap(first, 0);
 
 	/*
 	 * Size up each available chunk of physical memory.
 	 */
 	physmap[0] = PAGE_SIZE;		/* mask off page 0 */
 	pa_indx = 0;
 	phys_avail[pa_indx++] = physmap[0];
 	phys_avail[pa_indx] = physmap[0];
 #if 0
 	pte = (pt_entry_t)vtopte(KERNBASE);
 #else
 	pte = (pt_entry_t)CMAP1;
 #endif
 
 	/*
 	 * physmap is in bytes, so when converting to page boundaries,
 	 * round up the start address and round down the end address.
 	 */
 	for (i = 0; i <= physmap_idx; i += 2) {
 		vm_offset_t end;
 
 		end = ptoa(Maxmem);
 		if (physmap[i + 1] < end)
 			end = trunc_page(physmap[i + 1]);
 		for (pa = round_page(physmap[i]); pa < end; pa += PAGE_SIZE) {
 			int tmp, page_bad;
 #if 0
 			int *ptr = 0;
 #else
 			int *ptr = (int *)CADDR1;
 #endif
 
 			/*
 			 * block out kernel memory as not available.
 			 */
 			if (pa >= 0x100000 && pa < first)
 				continue;
 	
 			page_bad = FALSE;
 
 			/*
 			 * map page into kernel: valid, read/write,non-cacheable
 			 */
 			*pte = pa | PG_V | PG_RW | PG_N;
 			invltlb();
 
 			tmp = *(int *)ptr;
 			/*
 			 * Test for alternating 1's and 0's
 			 */
 			*(volatile int *)ptr = 0xaaaaaaaa;
 			if (*(volatile int *)ptr != 0xaaaaaaaa) {
 				page_bad = TRUE;
 			}
 			/*
 			 * Test for alternating 0's and 1's
 			 */
 			*(volatile int *)ptr = 0x55555555;
 			if (*(volatile int *)ptr != 0x55555555) {
 			page_bad = TRUE;
 			}
 			/*
 			 * Test for all 1's
 			 */
 			*(volatile int *)ptr = 0xffffffff;
 			if (*(volatile int *)ptr != 0xffffffff) {
 				page_bad = TRUE;
 			}
 			/*
 			 * Test for all 0's
 			 */
 			*(volatile int *)ptr = 0x0;
 			if (*(volatile int *)ptr != 0x0) {
 				page_bad = TRUE;
 			}
 			/*
 			 * Restore original value.
 			 */
 			*(int *)ptr = tmp;
 
 			/*
 			 * Adjust array of valid/good pages.
 			 */
 			if (page_bad == TRUE) {
 				continue;
 			}
 			/*
 			 * If this good page is a continuation of the
 			 * previous set of good pages, then just increase
 			 * the end pointer. Otherwise start a new chunk.
 			 * Note that "end" points one higher than end,
 			 * making the range >= start and < end.
 			 * If we're also doing a speculative memory
 			 * test and we at or past the end, bump up Maxmem
 			 * so that we keep going. The first bad page
 			 * will terminate the loop.
 			 */
 			if (phys_avail[pa_indx] == pa) {
 				phys_avail[pa_indx] += PAGE_SIZE;
 			} else {
 				pa_indx++;
 				if (pa_indx == PHYS_AVAIL_ARRAY_END) {
 					printf("Too many holes in the physical address space, giving up\n");
 					pa_indx--;
 					break;
 				}
 				phys_avail[pa_indx++] = pa;	/* start */
 				phys_avail[pa_indx] = pa + PAGE_SIZE;	/* end */
 			}
 			physmem++;
 		}
 	}
 	*pte = 0;
 	invltlb();
 
 	/*
 	 * XXX
 	 * The last chunk must contain at least one page plus the message
 	 * buffer to avoid complicating other code (message buffer address
 	 * calculation, etc.).
 	 */
 	while (phys_avail[pa_indx - 1] + PAGE_SIZE +
 	    round_page(MSGBUF_SIZE) >= phys_avail[pa_indx]) {
 		physmem -= atop(phys_avail[pa_indx] - phys_avail[pa_indx - 1]);
 		phys_avail[pa_indx--] = 0;
 		phys_avail[pa_indx--] = 0;
 	}
 
 	Maxmem = atop(phys_avail[pa_indx]);
 
 	/* Trim off space for the message buffer. */
 	phys_avail[pa_indx] -= round_page(MSGBUF_SIZE);
 
 	avail_end = phys_avail[pa_indx];
 }
 #endif
 
 void
 init386(first)
 	int first;
 {
 	int x;
 	struct gate_descriptor *gdp;
 	int gsel_tss;
 #ifndef SMP
 	/* table descriptors - used to load tables by microp */
 	struct region_descriptor r_gdt, r_idt;
 #endif
 	int off;
 
 	proc0.p_addr = proc0paddr;
 
 	atdevbase = ISA_HOLE_START + KERNBASE;
 
 #ifdef PC98
 	/*
 	 * Initialize DMAC
 	 */
 	pc98_init_dmac();
 #endif
 
 	if (bootinfo.bi_modulep) {
 		preload_metadata = (caddr_t)bootinfo.bi_modulep + KERNBASE;
 		preload_bootstrap_relocate(KERNBASE);
 	} else {
 		printf("WARNING: loader(8) metadata is missing!\n");
 	}
 	if (bootinfo.bi_envp)
 		kern_envp = (caddr_t)bootinfo.bi_envp + KERNBASE;
 
 	/*
 	 * make gdt memory segments, the code segment goes up to end of the
 	 * page with etext in it, the data segment goes to the end of
 	 * the address space
 	 */
 	/*
 	 * XXX text protection is temporarily (?) disabled.  The limit was
 	 * i386_btop(round_page(etext)) - 1.
 	 */
 	gdt_segs[GCODE_SEL].ssd_limit = i386_btop(0) - 1;
 	gdt_segs[GDATA_SEL].ssd_limit = i386_btop(0) - 1;
 #ifdef SMP
 	gdt_segs[GPRIV_SEL].ssd_limit =
 		i386_btop(sizeof(struct privatespace)) - 1;
 	gdt_segs[GPRIV_SEL].ssd_base = (int) &SMP_prvspace[0];
 	gdt_segs[GPROC0_SEL].ssd_base =
 		(int) &SMP_prvspace[0].globaldata.gd_common_tss;
 	SMP_prvspace[0].globaldata.gd_prvspace = &SMP_prvspace[0].globaldata;
 #else
 	gdt_segs[GPRIV_SEL].ssd_limit =
 		i386_btop(sizeof(struct globaldata)) - 1;
 	gdt_segs[GPRIV_SEL].ssd_base = (int) &__globaldata;
 	gdt_segs[GPROC0_SEL].ssd_base =
 		(int) &__globaldata.gd_common_tss;
 	__globaldata.gd_prvspace = &__globaldata;
 #endif
 
 	for (x = 0; x < NGDT; x++) {
 #ifdef BDE_DEBUGGER
 		/* avoid overwriting db entries with APM ones */
 		if (x >= GAPMCODE32_SEL && x <= GAPMDATA_SEL)
 			continue;
 #endif
 		ssdtosd(&gdt_segs[x], &gdt[x].sd);
 	}
 
 	r_gdt.rd_limit = NGDT * sizeof(gdt[0]) - 1;
 	r_gdt.rd_base =  (int) gdt;
 	lgdt(&r_gdt);
 
 	/* setup curproc so that mutexes work */
 	PCPU_SET(curproc, &proc0);
 
 	LIST_INIT(&proc0.p_heldmtx);
 	LIST_INIT(&proc0.p_contested);
 
 	mtx_init(&sched_lock, "sched lock", MTX_SPIN | MTX_RECURSE);
 #ifdef SMP
 	/*
 	 * Interrupts can happen very early, so initialize imen_mtx here, rather
 	 * than in init_locks().
 	 */
 	mtx_init(&imen_mtx, "imen", MTX_SPIN);
 #endif
 
 	/*
 	 * Giant is used early for at least debugger traps and unexpected traps.
 	 */
 	mtx_init(&Giant, "Giant", MTX_DEF | MTX_RECURSE);
 	mtx_enter(&Giant, MTX_DEF);
 
 	/* make ldt memory segments */
 	/*
 	 * The data segment limit must not cover the user area because we
 	 * don't want the user area to be writable in copyout() etc. (page
 	 * level protection is lost in kernel mode on 386's).  Also, we
 	 * don't want the user area to be writable directly (page level
 	 * protection of the user area is not available on 486's with
 	 * CR0_WP set, because there is no user-read/kernel-write mode).
 	 *
 	 * XXX - VM_MAXUSER_ADDRESS is an end address, not a max.  And it
 	 * should be spelled ...MAX_USER...
 	 */
 #define VM_END_USER_RW_ADDRESS	VM_MAXUSER_ADDRESS
 	/*
 	 * The code segment limit has to cover the user area until we move
 	 * the signal trampoline out of the user area.  This is safe because
 	 * the code segment cannot be written to directly.
 	 */
 #define VM_END_USER_R_ADDRESS	(VM_END_USER_RW_ADDRESS + UPAGES * PAGE_SIZE)
 	ldt_segs[LUCODE_SEL].ssd_limit = i386_btop(VM_END_USER_R_ADDRESS) - 1;
 	ldt_segs[LUDATA_SEL].ssd_limit = i386_btop(VM_END_USER_RW_ADDRESS) - 1;
 	for (x = 0; x < sizeof ldt_segs / sizeof ldt_segs[0]; x++)
 		ssdtosd(&ldt_segs[x], &ldt[x].sd);
 
 	_default_ldt = GSEL(GLDT_SEL, SEL_KPL);
 	lldt(_default_ldt);
 #ifdef USER_LDT
 	PCPU_SET(currentldt, _default_ldt);
 #endif
 
 	/* exceptions */
 	for (x = 0; x < NIDT; x++)
 		setidt(x, &IDTVEC(rsvd), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
 	setidt(0, &IDTVEC(div),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
 	setidt(1, &IDTVEC(dbg),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
 	setidt(2, &IDTVEC(nmi),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
  	setidt(3, &IDTVEC(bpt),  SDT_SYS386TGT, SEL_UPL, GSEL(GCODE_SEL, SEL_KPL));
 	setidt(4, &IDTVEC(ofl),  SDT_SYS386TGT, SEL_UPL, GSEL(GCODE_SEL, SEL_KPL));
 	setidt(5, &IDTVEC(bnd),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
 	setidt(6, &IDTVEC(ill),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
 	setidt(7, &IDTVEC(dna),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
 	setidt(8, 0,  SDT_SYSTASKGT, SEL_KPL, GSEL(GPANIC_SEL, SEL_KPL));
 	setidt(9, &IDTVEC(fpusegm),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
 	setidt(10, &IDTVEC(tss),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
 	setidt(11, &IDTVEC(missing),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
 	setidt(12, &IDTVEC(stk),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
 	setidt(13, &IDTVEC(prot),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
 	setidt(14, &IDTVEC(page),  SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
 	setidt(15, &IDTVEC(rsvd),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
 	setidt(16, &IDTVEC(fpu),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
 	setidt(17, &IDTVEC(align), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
 	setidt(18, &IDTVEC(mchk),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
  	setidt(0x80, &IDTVEC(int0x80_syscall),
 			SDT_SYS386TGT, SEL_UPL, GSEL(GCODE_SEL, SEL_KPL));
 
 	r_idt.rd_limit = sizeof(idt0) - 1;
 	r_idt.rd_base = (int) idt;
 	lidt(&r_idt);
 
 	/*
 	 * We need this mutex before the console probe.
 	 */
 	mtx_init(&clock_lock, "clk", MTX_SPIN | MTX_RECURSE);
 
 	/*
 	 * Initialize the console before we print anything out.
 	 */
 	cninit();
 
 #ifdef DEV_ISA
 	isa_defaultirq();
 #endif
 
 #ifdef DDB
 	kdb_init();
 	if (boothowto & RB_KDB)
 		Debugger("Boot flags requested debugger");
 #endif
 
 	finishidentcpu();	/* Final stage of CPU initialization */
 	setidt(6, &IDTVEC(ill),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
 	setidt(13, &IDTVEC(prot),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
 	initializecpu();	/* Initialize CPU registers */
 
 	/* make an initial tss so cpu can get interrupt stack on syscall! */
 	PCPU_SET(common_tss.tss_esp0,
 	    (int) proc0.p_addr + UPAGES*PAGE_SIZE - 16);
 	PCPU_SET(common_tss.tss_ss0, GSEL(GDATA_SEL, SEL_KPL));
 	gsel_tss = GSEL(GPROC0_SEL, SEL_KPL);
 	private_tss = 0;
 	PCPU_SET(tss_gdt, &gdt[GPROC0_SEL].sd);
 	PCPU_SET(common_tssd, *PCPU_GET(tss_gdt));
 	PCPU_SET(common_tss.tss_ioopt, (sizeof (struct i386tss)) << 16);
 	ltr(gsel_tss);
 
 	dblfault_tss.tss_esp = dblfault_tss.tss_esp0 = dblfault_tss.tss_esp1 =
 	    dblfault_tss.tss_esp2 = (int) &dblfault_stack[sizeof(dblfault_stack)];
 	dblfault_tss.tss_ss = dblfault_tss.tss_ss0 = dblfault_tss.tss_ss1 =
 	    dblfault_tss.tss_ss2 = GSEL(GDATA_SEL, SEL_KPL);
 	dblfault_tss.tss_cr3 = (int)IdlePTD;
 	dblfault_tss.tss_eip = (int) dblfault_handler;
 	dblfault_tss.tss_eflags = PSL_KERNEL;
 	dblfault_tss.tss_ds = dblfault_tss.tss_es =
 	    dblfault_tss.tss_gs = GSEL(GDATA_SEL, SEL_KPL);
 	dblfault_tss.tss_fs = GSEL(GPRIV_SEL, SEL_KPL);
 	dblfault_tss.tss_cs = GSEL(GCODE_SEL, SEL_KPL);
 	dblfault_tss.tss_ldt = GSEL(GLDT_SEL, SEL_KPL);
 
 	vm86_initialize();
 	getmemsize(first);
 
 	/* now running on new page tables, configured,and u/iom is accessible */
 
 	/* Map the message buffer. */
 	for (off = 0; off < round_page(MSGBUF_SIZE); off += PAGE_SIZE)
 		pmap_kenter((vm_offset_t)msgbufp + off, avail_end + off);
 
 	msgbufinit(msgbufp, MSGBUF_SIZE);
 
 	/* make a call gate to reenter kernel with */
 	gdp = &ldt[LSYS5CALLS_SEL].gd;
 
 	x = (int) &IDTVEC(syscall);
 	gdp->gd_looffset = x++;
 	gdp->gd_selector = GSEL(GCODE_SEL,SEL_KPL);
 	gdp->gd_stkcpy = 1;
 	gdp->gd_type = SDT_SYS386CGT;
 	gdp->gd_dpl = SEL_UPL;
 	gdp->gd_p = 1;
 	gdp->gd_hioffset = ((int) &IDTVEC(syscall)) >>16;
 
 	/* XXX does this work? */
 	ldt[LBSDICALLS_SEL] = ldt[LSYS5CALLS_SEL];
 	ldt[LSOL26CALLS_SEL] = ldt[LSYS5CALLS_SEL];
 
 	/* transfer to user mode */
 
 	_ucodesel = LSEL(LUCODE_SEL, SEL_UPL);
 	_udatasel = LSEL(LUDATA_SEL, SEL_UPL);
 
 	/* setup proc 0's pcb */
 	proc0.p_addr->u_pcb.pcb_flags = 0;
 	proc0.p_addr->u_pcb.pcb_cr3 = (int)IdlePTD;
 	proc0.p_addr->u_pcb.pcb_schednest = 0;
 	proc0.p_addr->u_pcb.pcb_ext = 0;
 	proc0.p_md.md_regs = &proc0_tf;
 }
 
 #if defined(I586_CPU) && !defined(NO_F00F_HACK)
 static void f00f_hack(void *unused);
 SYSINIT(f00f_hack, SI_SUB_INTRINSIC, SI_ORDER_FIRST, f00f_hack, NULL);
 
 static void
 f00f_hack(void *unused) {
 	struct gate_descriptor *new_idt;
 #ifndef SMP
 	struct region_descriptor r_idt;
 #endif
 	vm_offset_t tmp;
 
 	if (!has_f00f_bug)
 		return;
 
 	printf("Intel Pentium detected, installing workaround for F00F bug\n");
 
 	r_idt.rd_limit = sizeof(idt0) - 1;
 
 	tmp = kmem_alloc(kernel_map, PAGE_SIZE * 2);
 	if (tmp == 0)
 		panic("kmem_alloc returned 0");
 	if (((unsigned int)tmp & (PAGE_SIZE-1)) != 0)
 		panic("kmem_alloc returned non-page-aligned memory");
 	/* Put the first seven entries in the lower page */
 	new_idt = (struct gate_descriptor*)(tmp + PAGE_SIZE - (7*8));
 	bcopy(idt, new_idt, sizeof(idt0));
 	r_idt.rd_base = (int)new_idt;
 	lidt(&r_idt);
 	idt = new_idt;
 	if (vm_map_protect(kernel_map, tmp, tmp + PAGE_SIZE,
 			   VM_PROT_READ, FALSE) != KERN_SUCCESS)
 		panic("vm_map_protect failed");
 	return;
 }
 #endif /* defined(I586_CPU) && !NO_F00F_HACK */
 
 int
 ptrace_set_pc(p, addr)
 	struct proc *p;
 	unsigned long addr;
 {
 	p->p_md.md_regs->tf_eip = addr;
 	return (0);
 }
 
 int
 ptrace_single_step(p)
 	struct proc *p;
 {
 	p->p_md.md_regs->tf_eflags |= PSL_T;
 	return (0);
 }
 
 int ptrace_read_u_check(p, addr, len)
 	struct proc *p;
 	vm_offset_t addr;
 	size_t len;
 {
 	vm_offset_t gap;
 
 	if ((vm_offset_t) (addr + len) < addr)
 		return EPERM;
 	if ((vm_offset_t) (addr + len) <= sizeof(struct user))
 		return 0;
 
 	gap = (char *) p->p_md.md_regs - (char *) p->p_addr;
 	
 	if ((vm_offset_t) addr < gap)
 		return EPERM;
 	if ((vm_offset_t) (addr + len) <= 
 	    (vm_offset_t) (gap + sizeof(struct trapframe)))
 		return 0;
 	return EPERM;
 }
 
 int ptrace_write_u(p, off, data)
 	struct proc *p;
 	vm_offset_t off;
 	long data;
 {
 	struct trapframe frame_copy;
 	vm_offset_t min;
 	struct trapframe *tp;
 
 	/*
 	 * Privileged kernel state is scattered all over the user area.
 	 * Only allow write access to parts of regs and to fpregs.
 	 */
 	min = (char *)p->p_md.md_regs - (char *)p->p_addr;
 	if (off >= min && off <= min + sizeof(struct trapframe) - sizeof(int)) {
 		tp = p->p_md.md_regs;
 		frame_copy = *tp;
 		*(int *)((char *)&frame_copy + (off - min)) = data;
 		if (!EFL_SECURE(frame_copy.tf_eflags, tp->tf_eflags) ||
 		    !CS_SECURE(frame_copy.tf_cs))
 			return (EINVAL);
 		*(int*)((char *)p->p_addr + off) = data;
 		return (0);
 	}
 	min = offsetof(struct user, u_pcb) + offsetof(struct pcb, pcb_savefpu);
 	if (off >= min && off <= min + sizeof(struct save87) - sizeof(int)) {
 		*(int*)((char *)p->p_addr + off) = data;
 		return (0);
 	}
 	return (EFAULT);
 }
 
 int
 fill_regs(p, regs)
 	struct proc *p;
 	struct reg *regs;
 {
 	struct pcb *pcb;
 	struct trapframe *tp;
 
 	tp = p->p_md.md_regs;
 	regs->r_fs = tp->tf_fs;
 	regs->r_es = tp->tf_es;
 	regs->r_ds = tp->tf_ds;
 	regs->r_edi = tp->tf_edi;
 	regs->r_esi = tp->tf_esi;
 	regs->r_ebp = tp->tf_ebp;
 	regs->r_ebx = tp->tf_ebx;
 	regs->r_edx = tp->tf_edx;
 	regs->r_ecx = tp->tf_ecx;
 	regs->r_eax = tp->tf_eax;
 	regs->r_eip = tp->tf_eip;
 	regs->r_cs = tp->tf_cs;
 	regs->r_eflags = tp->tf_eflags;
 	regs->r_esp = tp->tf_esp;
 	regs->r_ss = tp->tf_ss;
 	pcb = &p->p_addr->u_pcb;
 	regs->r_gs = pcb->pcb_gs;
 	return (0);
 }
 
 int
 set_regs(p, regs)
 	struct proc *p;
 	struct reg *regs;
 {
 	struct pcb *pcb;
 	struct trapframe *tp;
 
 	tp = p->p_md.md_regs;
 	if (!EFL_SECURE(regs->r_eflags, tp->tf_eflags) ||
 	    !CS_SECURE(regs->r_cs))
 		return (EINVAL);
 	tp->tf_fs = regs->r_fs;
 	tp->tf_es = regs->r_es;
 	tp->tf_ds = regs->r_ds;
 	tp->tf_edi = regs->r_edi;
 	tp->tf_esi = regs->r_esi;
 	tp->tf_ebp = regs->r_ebp;
 	tp->tf_ebx = regs->r_ebx;
 	tp->tf_edx = regs->r_edx;
 	tp->tf_ecx = regs->r_ecx;
 	tp->tf_eax = regs->r_eax;
 	tp->tf_eip = regs->r_eip;
 	tp->tf_cs = regs->r_cs;
 	tp->tf_eflags = regs->r_eflags;
 	tp->tf_esp = regs->r_esp;
 	tp->tf_ss = regs->r_ss;
 	pcb = &p->p_addr->u_pcb;
 	pcb->pcb_gs = regs->r_gs;
 	return (0);
 }
 
 int
 fill_fpregs(p, fpregs)
 	struct proc *p;
 	struct fpreg *fpregs;
 {
 	bcopy(&p->p_addr->u_pcb.pcb_savefpu, fpregs, sizeof *fpregs);
 	return (0);
 }
 
 int
 set_fpregs(p, fpregs)
 	struct proc *p;
 	struct fpreg *fpregs;
 {
 	bcopy(fpregs, &p->p_addr->u_pcb.pcb_savefpu, sizeof *fpregs);
 	return (0);
 }
 
 int
 fill_dbregs(p, dbregs)
 	struct proc *p;
 	struct dbreg *dbregs;
 {
 	struct pcb *pcb;
 
 	pcb = &p->p_addr->u_pcb;
 	dbregs->dr0 = pcb->pcb_dr0;
 	dbregs->dr1 = pcb->pcb_dr1;
 	dbregs->dr2 = pcb->pcb_dr2;
 	dbregs->dr3 = pcb->pcb_dr3;
 	dbregs->dr4 = 0;
 	dbregs->dr5 = 0;
 	dbregs->dr6 = pcb->pcb_dr6;
 	dbregs->dr7 = pcb->pcb_dr7;
 	return (0);
 }
 
 int
 set_dbregs(p, dbregs)
 	struct proc *p;
 	struct dbreg *dbregs;
 {
 	struct pcb *pcb;
 	int i;
 	u_int32_t mask1, mask2;
 
 	/*
 	 * Don't let an illegal value for dr7 get set.  Specifically,
 	 * check for undefined settings.  Setting these bit patterns
 	 * result in undefined behaviour and can lead to an unexpected
 	 * TRCTRAP.
 	 */
 	for (i = 0, mask1 = 0x3<<16, mask2 = 0x2<<16; i < 8; 
 	     i++, mask1 <<= 2, mask2 <<= 2)
 		if ((dbregs->dr7 & mask1) == mask2)
 			return (EINVAL);
 
 	if (dbregs->dr7 & 0x0000fc00)
 		return (EINVAL);
 
 
 
 	pcb = &p->p_addr->u_pcb;
 
 	/*
 	 * Don't let a process set a breakpoint that is not within the
 	 * process's address space.  If a process could do this, it
 	 * could halt the system by setting a breakpoint in the kernel
 	 * (if ddb was enabled).  Thus, we need to check to make sure
 	 * that no breakpoints are being enabled for addresses outside
 	 * process's address space, unless, perhaps, we were called by
 	 * uid 0.
 	 *
 	 * XXX - what about when the watched area of the user's
 	 * address space is written into from within the kernel
 	 * ... wouldn't that still cause a breakpoint to be generated
 	 * from within kernel mode?
 	 */
 
 	if (suser(p) != 0) {
 		if (dbregs->dr7 & 0x3) {
 			/* dr0 is enabled */
 			if (dbregs->dr0 >= VM_MAXUSER_ADDRESS)
 				return (EINVAL);
 		}
 
 		if (dbregs->dr7 & (0x3<<2)) {
 			/* dr1 is enabled */
 			if (dbregs->dr1 >= VM_MAXUSER_ADDRESS)
 				return (EINVAL);
 		}
 
 		if (dbregs->dr7 & (0x3<<4)) {
 			/* dr2 is enabled */
 			if (dbregs->dr2 >= VM_MAXUSER_ADDRESS)
        				return (EINVAL);
 		}
 
 		if (dbregs->dr7 & (0x3<<6)) {
 			/* dr3 is enabled */
 			if (dbregs->dr3 >= VM_MAXUSER_ADDRESS)
 				return (EINVAL);
 		}
 	}
 
 	pcb->pcb_dr0 = dbregs->dr0;
 	pcb->pcb_dr1 = dbregs->dr1;
 	pcb->pcb_dr2 = dbregs->dr2;
 	pcb->pcb_dr3 = dbregs->dr3;
 	pcb->pcb_dr6 = dbregs->dr6;
 	pcb->pcb_dr7 = dbregs->dr7;
 
 	pcb->pcb_flags |= PCB_DBREGS;
 
 	return (0);
 }
 
 /*
  * Return > 0 if a hardware breakpoint has been hit, and the
  * breakpoint was in user space.  Return 0, otherwise.
  */
 int
 user_dbreg_trap(void)
 {
         u_int32_t dr7, dr6; /* debug registers dr6 and dr7 */
         u_int32_t bp;       /* breakpoint bits extracted from dr6 */
         int nbp;            /* number of breakpoints that triggered */
         caddr_t addr[4];    /* breakpoint addresses */
         int i;
         
         dr7 = rdr7();
         if ((dr7 & 0x000000ff) == 0) {
                 /*
                  * all GE and LE bits in the dr7 register are zero,
                  * thus the trap couldn't have been caused by the
                  * hardware debug registers
                  */
                 return 0;
         }
 
         nbp = 0;
         dr6 = rdr6();
         bp = dr6 & 0x0000000f;
 
         if (!bp) {
                 /*
                  * None of the breakpoint bits are set meaning this
                  * trap was not caused by any of the debug registers
                  */
                 return 0;
         }
 
         /*
          * at least one of the breakpoints were hit, check to see
          * which ones and if any of them are user space addresses
          */
 
         if (bp & 0x01) {
                 addr[nbp++] = (caddr_t)rdr0();
         }
         if (bp & 0x02) {
                 addr[nbp++] = (caddr_t)rdr1();
         }
         if (bp & 0x04) {
                 addr[nbp++] = (caddr_t)rdr2();
         }
         if (bp & 0x08) {
                 addr[nbp++] = (caddr_t)rdr3();
         }
 
         for (i=0; i<nbp; i++) {
                 if (addr[i] <
                     (caddr_t)VM_MAXUSER_ADDRESS) {
                         /*
                          * addr[i] is in user space
                          */
                         return nbp;
                 }
         }
 
         /*
          * None of the breakpoints are in user space.
          */
         return 0;
 }
 
 
 #ifndef DDB
 void
 Debugger(const char *msg)
 {
 	printf("Debugger(\"%s\") called.\n", msg);
 }
 #endif /* no DDB */
 
 #include <sys/disklabel.h>
 
 /*
  * Determine the size of the transfer, and make sure it is
  * within the boundaries of the partition. Adjust transfer
  * if needed, and signal errors or early completion.
  */
 int
 bounds_check_with_label(struct bio *bp, struct disklabel *lp, int wlabel)
 {
         struct partition *p = lp->d_partitions + dkpart(bp->bio_dev);
         int labelsect = lp->d_partitions[0].p_offset;
         int maxsz = p->p_size,
                 sz = (bp->bio_bcount + DEV_BSIZE - 1) >> DEV_BSHIFT;
 
         /* overwriting disk label ? */
         /* XXX should also protect bootstrap in first 8K */
         if (bp->bio_blkno + p->p_offset <= LABELSECTOR + labelsect &&
 #if LABELSECTOR != 0
             bp->bio_blkno + p->p_offset + sz > LABELSECTOR + labelsect &&
 #endif
             (bp->bio_cmd == BIO_WRITE) && wlabel == 0) {
                 bp->bio_error = EROFS;
                 goto bad;
         }
 
 #if     defined(DOSBBSECTOR) && defined(notyet)
         /* overwriting master boot record? */
         if (bp->bio_blkno + p->p_offset <= DOSBBSECTOR &&
             (bp->bio_cmd == BIO_WRITE) && wlabel == 0) {
                 bp->bio_error = EROFS;
                 goto bad;
         }
 #endif
 
         /* beyond partition? */
         if (bp->bio_blkno < 0 || bp->bio_blkno + sz > maxsz) {
                 /* if exactly at end of disk, return an EOF */
                 if (bp->bio_blkno == maxsz) {
                         bp->bio_resid = bp->bio_bcount;
                         return(0);
                 }
                 /* or truncate if part of it fits */
                 sz = maxsz - bp->bio_blkno;
                 if (sz <= 0) {
                         bp->bio_error = EINVAL;
                         goto bad;
                 }
                 bp->bio_bcount = sz << DEV_BSHIFT;
         }
 
         bp->bio_pblkno = bp->bio_blkno + p->p_offset;
         return(1);
 
 bad:
         bp->bio_flags |= BIO_ERROR;
         return(-1);
 }
 
 #ifdef DDB
 
 /*
  * Provide inb() and outb() as functions.  They are normally only
  * available as macros calling inlined functions, thus cannot be
  * called inside DDB.
  *
  * The actual code is stolen from <machine/cpufunc.h>, and de-inlined.
  */
 
 #undef inb
 #undef outb
 
 /* silence compiler warnings */
 u_char inb(u_int);
 void outb(u_int, u_char);
 
 u_char
 inb(u_int port)
 {
 	u_char	data;
 	/*
 	 * We use %%dx and not %1 here because i/o is done at %dx and not at
 	 * %edx, while gcc generates inferior code (movw instead of movl)
 	 * if we tell it to load (u_short) port.
 	 */
 	__asm __volatile("inb %%dx,%0" : "=a" (data) : "d" (port));
 	return (data);
 }
 
 void
 outb(u_int port, u_char data)
 {
 	u_char	al;
 	/*
 	 * Use an unnecessary assignment to help gcc's register allocator.
 	 * This make a large difference for gcc-1.40 and a tiny difference
 	 * for gcc-2.6.0.  For gcc-1.40, al had to be ``asm("ax")'' for
 	 * best results.  gcc-2.6.0 can't handle this.
 	 */
 	al = data;
 	__asm __volatile("outb %0,%%dx" : : "a" (al), "d" (port));
 }
 
 #endif /* DDB */