Index: stable/4/sys/alpha/alpha/machdep.c
===================================================================
--- stable/4/sys/alpha/alpha/machdep.c	(revision 118739)
+++ stable/4/sys/alpha/alpha/machdep.c	(revision 118740)
@@ -1,2230 +1,2231 @@
 /*-
  * Copyright (c) 1998 Doug Rabson
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 /*-
  * Copyright (c) 1998 The NetBSD Foundation, Inc.
  * All rights reserved.
  *
  * This code is derived from software contributed to The NetBSD Foundation
  * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
  * NASA Ames Research Center and by Chris G. Demetriou.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the NetBSD
  *	Foundation, Inc. and its contributors.
  * 4. Neither the name of The NetBSD Foundation nor the names of its
  *    contributors may be used to endorse or promote products derived
  *    from this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  * POSSIBILITY OF SUCH DAMAGE.
  */
 /*
  * Copyright (c) 1994, 1995, 1996 Carnegie-Mellon University.
  * All rights reserved.
  *
  * Author: Chris G. Demetriou
  *
  * Permission to use, copy, modify and distribute this software and
  * its documentation is hereby granted, provided that both the copyright
  * notice and this permission notice appear in all copies of the
  * software, derivative works or modified versions, and any portions
  * thereof, and that both notices appear in supporting documentation.
  *
  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
  *
  * Carnegie Mellon requests users of this software to return to
  *
  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
  *  School of Computer Science
  *  Carnegie Mellon University
  *  Pittsburgh PA 15213-3890
  *
  * any improvements or extensions that they make and grant Carnegie the
  * rights to redistribute these changes.
  */
 
 #include "opt_compat.h"
 #include "opt_ddb.h"
 #include "opt_simos.h"
 #include "opt_msgbuf.h"
 #include "opt_maxmem.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/eventhandler.h>
 #include <sys/sysproto.h>
 #include <sys/signalvar.h>
 #include <sys/kernel.h>
 #include <sys/proc.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/reboot.h>
 #include <sys/buf.h>
 #include <sys/mbuf.h>
 #include <sys/vmmeter.h>
 #include <sys/msgbuf.h>
 #include <sys/exec.h>
 #include <sys/sysctl.h>
 #include <sys/uio.h>
 #include <sys/linker.h>
 #include <sys/random.h>
 #include <net/netisr.h>
 #include <vm/vm.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_page.h>
 #include <vm/vm_map.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_object.h>
 #include <vm/vm_pager.h>
 #include <sys/user.h>
 #include <sys/ptrace.h>
 #include <machine/clock.h>
 #include <machine/md_var.h>
 #include <machine/reg.h>
 #include <machine/fpu.h>
 #include <machine/pal.h>
 #include <machine/cpuconf.h>
 #include <machine/bootinfo.h>
 #include <machine/rpb.h>
 #include <machine/prom.h>
 #include <machine/chipset.h>
 #include <machine/vmparam.h>
 #include <machine/elf.h>
 #include <ddb/ddb.h>
 #include <alpha/alpha/db_instruction.h>
 #include <sys/vnode.h>
 #include <miscfs/procfs/procfs.h>
 #include <machine/sigframe.h>
 
 struct proc* curproc;
 struct proc* fpcurproc;
 struct pcb* curpcb;
 u_int64_t cycles_per_usec;
 u_int32_t cycles_per_sec;
 int whichqs, whichrtqs, whichidqs;
 int cold = 1;
 struct platform platform;
 alpha_chipset_t chipset;
 struct bootinfo_kernel bootinfo;
 struct timeval switchtime;
 int switchticks;
 
 struct	user *proc0paddr;
 
 char machine[] = "alpha";
 SYSCTL_STRING(_hw, HW_MACHINE, machine, CTLFLAG_RD, machine, 0, "");
 
 static char cpu_model[128];
 SYSCTL_STRING(_hw, HW_MODEL, model, CTLFLAG_RD, cpu_model, 0, "");
 
 #ifdef DDB
 /* start and end of kernel symbol table */
 void	*ksym_start, *ksym_end;
 #endif
 
 int	alpha_unaligned_print = 1;	/* warn about unaligned accesses */
 int	alpha_unaligned_fix = 1;	/* fix up unaligned accesses */
 int	alpha_unaligned_sigbus = 0;	/* don't SIGBUS on fixed-up accesses */
 
 SYSCTL_INT(_machdep, CPU_UNALIGNED_PRINT, unaligned_print,
 	CTLFLAG_RW, &alpha_unaligned_print, 0, "");
 
 SYSCTL_INT(_machdep, CPU_UNALIGNED_FIX, unaligned_fix,
 	CTLFLAG_RW, &alpha_unaligned_fix, 0, "");
 
 SYSCTL_INT(_machdep, CPU_UNALIGNED_SIGBUS, unaligned_sigbus,
 	CTLFLAG_RW, &alpha_unaligned_sigbus, 0, "");
 
 static void cpu_startup __P((void *));
 SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL)
 
 static MALLOC_DEFINE(M_MBUF, "mbuf", "mbuf");
 
 struct msgbuf *msgbufp=0;
 
 int bootverbose = 0, Maxmem = 0;
 long dumplo;
 
 int	totalphysmem;		/* total amount of physical memory in system */
 int	physmem;		/* physical memory used by NetBSD + some rsvd */
 int	resvmem;		/* amount of memory reserved for PROM */
 int	unusedmem;		/* amount of memory for OS that we don't use */
 int	unknownmem;		/* amount of memory with an unknown use */
 int	ncpus;			/* number of cpus */
 
 vm_offset_t phys_avail[10];
 
 static int
 sysctl_hw_physmem(SYSCTL_HANDLER_ARGS)
 {
 	int error = sysctl_handle_int(oidp, 0, alpha_ptob(physmem), req);
 	return (error);
 }
 
 SYSCTL_PROC(_hw, HW_PHYSMEM, physmem, CTLTYPE_INT|CTLFLAG_RD,
 	0, 0, sysctl_hw_physmem, "I", "");
 
 static int
 sysctl_hw_usermem(SYSCTL_HANDLER_ARGS)
 {
 	int error = sysctl_handle_int(oidp, 0,
 		alpha_ptob(physmem - cnt.v_wire_count), req);
 	return (error);
 }
 
 SYSCTL_PROC(_hw, HW_USERMEM, usermem, CTLTYPE_INT|CTLFLAG_RD,
 	0, 0, sysctl_hw_usermem, "I", "");
 
 SYSCTL_INT(_hw, OID_AUTO, availpages, CTLFLAG_RD, &physmem, 0, "");
 
 /* must be 2 less so 0 0 can signal end of chunks */
 #define PHYS_AVAIL_ARRAY_END ((sizeof(phys_avail) / sizeof(vm_offset_t)) - 2)
 
 static void identifycpu __P((void));
 
 static vm_offset_t buffer_sva, buffer_eva;
 vm_offset_t clean_sva, clean_eva;
 static vm_offset_t pager_sva, pager_eva;
 
 /*
  * Hooked into the shutdown chain; if the system is to be halted,
  * unconditionally drop back to the SRM console.
  */
 static void
 alpha_srm_shutdown(void *junk, int howto)
 {
 	if (howto & RB_HALT)
 		alpha_pal_halt();
 }
 
 static void
 cpu_startup(dummy)
 	void *dummy;
 {
 	register unsigned i;
 	register caddr_t v;
 	vm_offset_t maxaddr;
 	vm_size_t size = 0;
 	vm_offset_t firstaddr;
 	vm_offset_t minaddr;
 
 	if (boothowto & RB_VERBOSE)
 		bootverbose++;
 
 	/*
 	 * Good {morning,afternoon,evening,night}.
 	 */
 	printf("%s", version);
 	identifycpu();
 
 	/* startrtclock(); */
 #ifdef PERFMON
 	perfmon_init();
 #endif
 	printf("real memory  = %ld (%ldK bytes)\n", alpha_ptob(Maxmem), alpha_ptob(Maxmem) / 1024);
 
 	/*
 	 * Display any holes after the first chunk of extended memory.
 	 */
 	if (bootverbose) {
 		int indx;
 
 		printf("Physical memory chunk(s):\n");
 		for (indx = 0; phys_avail[indx + 1] != 0; indx += 2) {
 			int size1 = phys_avail[indx + 1] - phys_avail[indx];
 
 			printf("0x%08lx - 0x%08lx, %d bytes (%d pages)\n", phys_avail[indx],
 			    phys_avail[indx + 1] - 1, size1, size1 / PAGE_SIZE);
 		}
 	}
 
 	/*
 	 * Calculate callout wheel size
 	 */
 	for (callwheelsize = 1, callwheelbits = 0;
 	     callwheelsize < ncallout;
 	     callwheelsize <<= 1, ++callwheelbits)
 		;
 	callwheelmask = callwheelsize - 1;
 
 	/*
 	 * Allocate space for system data structures.
 	 * The first available kernel virtual address is in "v".
 	 * As pages of kernel virtual memory are allocated, "v" is incremented.
 	 * As pages of memory are allocated and cleared,
 	 * "firstaddr" is incremented.
 	 * An index into the kernel page table corresponding to the
 	 * virtual memory address maintained in "v" is kept in "mapaddr".
 	 */
 
 	/*
 	 * Make two passes.  The first pass calculates how much memory is
 	 * needed and allocates it.  The second pass assigns virtual
 	 * addresses to the various data structures.
 	 */
 	firstaddr = 0;
 again:
 	v = (caddr_t)firstaddr;
 
 #define	valloc(name, type, num) \
 	    (name) = (type *)v; v = (caddr_t)((name)+(num))
 #define	valloclim(name, type, num, lim) \
 	    (name) = (type *)v; v = (caddr_t)((lim) = ((name)+(num)))
 
 	valloc(callout, struct callout, ncallout);
 	valloc(callwheel, struct callout_tailq, callwheelsize);
 
 	/*
 	 * The nominal buffer size (and minimum KVA allocation) is BKVASIZE.
 	 * For the first 64MB of ram nominally allocate sufficient buffers to
 	 * cover 1/4 of our ram.  Beyond the first 64MB allocate additional
 	 * buffers to cover 1/20 of our ram over 64MB.  When auto-sizing
 	 * the buffer cache we limit the eventual kva reservation to
 	 * maxbcache bytes.
 	 */
 
 	if (nbuf == 0) {
 		int factor = 4 * BKVASIZE / 1024;
 		int kbytes = physmem * (PAGE_SIZE / 1024);
 
 		nbuf = 50;
 		if (kbytes > 4096)
 			nbuf += min((kbytes - 4096) / factor, 65536 / factor);
 		if (kbytes > 65536)
 			nbuf += (kbytes - 65536) * 2 / (factor * 5);
 		if (maxbcache && nbuf > maxbcache / BKVASIZE)
 			nbuf = maxbcache / BKVASIZE;
 	}
 	nswbuf = max(min(nbuf/4, 64), 16);
 
 	valloc(swbuf, struct buf, nswbuf);
 	valloc(buf, struct buf, nbuf);
 	v = bufhashinit(v);
 
 	/*
 	 * End of first pass, size has been calculated so allocate memory
 	 */
 	if (firstaddr == 0) {
 		size = (vm_size_t)(v - firstaddr);
 		firstaddr = (vm_offset_t)kmem_alloc(kernel_map, round_page(size));
 		if (firstaddr == 0)
 			panic("startup: no room for tables");
 		goto again;
 	}
 
 	/*
 	 * End of second pass, addresses have been assigned
 	 */
 	if ((vm_size_t)(v - firstaddr) != size)
 		panic("startup: table size inconsistency");
 
 	clean_map = kmem_suballoc(kernel_map, &clean_sva, &clean_eva,
 			(nbuf*BKVASIZE) + (nswbuf*MAXPHYS) + pager_map_size);
 	buffer_map = kmem_suballoc(clean_map, &buffer_sva, &buffer_eva,
 				(nbuf*BKVASIZE));
 	buffer_map->system_map = 1;
 	pager_map = kmem_suballoc(clean_map, &pager_sva, &pager_eva,
 				(nswbuf*MAXPHYS) + pager_map_size);
 	pager_map->system_map = 1;
 	exec_map = kmem_suballoc(kernel_map, &minaddr, &maxaddr,
 				(16*(ARG_MAX+(PAGE_SIZE*3))));
 
 	/*
 	 * Finally, allocate mbuf pool.  Since mclrefcnt is an off-size
 	 * we use the more space efficient malloc in place of kmem_alloc.
 	 */
 	{
 		vm_offset_t mb_map_size;
 
 		mb_map_size = nmbufs * MSIZE + nmbclusters * MCLBYTES;
 		mb_map_size = roundup2(mb_map_size, max(MCLBYTES, PAGE_SIZE));
 		mclrefcnt = malloc(mb_map_size / MCLBYTES, M_MBUF, M_NOWAIT);
 		bzero(mclrefcnt, mb_map_size / MCLBYTES);
 		mb_map = kmem_suballoc(kmem_map, (vm_offset_t *)&mbutl, &maxaddr,
 			mb_map_size);
 		mb_map->system_map = 1;
+		mbutltop = mbutl;
 	}
 
 	/*
 	 * Initialize callouts
 	 */
 	SLIST_INIT(&callfree);
 	for (i = 0; i < ncallout; i++) {
 		callout_init(&callout[i]);
 		callout[i].c_flags = CALLOUT_LOCAL_ALLOC;
 		SLIST_INSERT_HEAD(&callfree, &callout[i], c_links.sle);
 	}
 
 	for (i = 0; i < callwheelsize; i++) {
 		TAILQ_INIT(&callwheel[i]);
 	}
 
 #if defined(USERCONFIG)
 #if defined(USERCONFIG_BOOT)
 	if (1)
 #else
         if (boothowto & RB_CONFIG)
 #endif
 	{
 		userconfig();
 		cninit();	/* the preferred console may have changed */
 	}
 #endif
 
 	printf("avail memory = %ld (%ldK bytes)\n", ptoa(cnt.v_free_count),
 	    ptoa(cnt.v_free_count) / 1024);
 
 	/*
 	 * Set up buffers, so they can be used to read disk labels.
 	 */
 	bufinit();
 	vm_pager_bufferinit();
 	EVENTHANDLER_REGISTER(shutdown_final, alpha_srm_shutdown, 0,
 			      SHUTDOWN_PRI_LAST);
 }
 
 int
 register_netisr(num, handler)
 	int num;
 	netisr_t *handler;
 {
 	
 	if (num < 0 || num >= (sizeof(netisrs)/sizeof(*netisrs)) ) {
 		printf("register_netisr: bad isr number: %d\n", num);
 		return (EINVAL);
 	}
 	netisrs[num] = handler;
 	return (0);
 }
 
 int
 unregister_netisr(num)
 	int num;
 {
 	
 	if (num < 0 || num >= (sizeof(netisrs)/sizeof(*netisrs)) ) {
 		printf("unregister_netisr: bad isr number: %d\n", num);
 		return (EINVAL);
 	}
 	netisrs[num] = NULL;
 	return (0);
 }
 
 /*
  * Retrieve the platform name from the DSR.
  */
 const char *
 alpha_dsr_sysname()
 {
 	struct dsrdb *dsr;
 	const char *sysname;
 
 	/*
 	 * DSR does not exist on early HWRPB versions.
 	 */
 	if (hwrpb->rpb_version < HWRPB_DSRDB_MINVERS)
 		return (NULL);
 
 	dsr = (struct dsrdb *)(((caddr_t)hwrpb) + hwrpb->rpb_dsrdb_off);
 	sysname = (const char *)((caddr_t)dsr + (dsr->dsr_sysname_off +
 	    sizeof(u_int64_t)));
 	return (sysname);
 }
 
 /*
  * Lookup the system specified system variation in the provided table,
  * returning the model string on match.
  */
 const char *
 alpha_variation_name(u_int64_t variation,
 		     const struct alpha_variation_table *avtp)
 {
 	int i;
 
 	for (i = 0; avtp[i].avt_model != NULL; i++)
 		if (avtp[i].avt_variation == variation)
 			return (avtp[i].avt_model);
 	return (NULL);
 }
 
 /*
  * Generate a default platform name based for unknown system variations.
  */
 const char *
 alpha_unknown_sysname()
 {
 	static char s[128];		/* safe size */
 
 	snprintf(s, sizeof(s), "%s family, unknown model variation 0x%lx",
 	    platform.family, hwrpb->rpb_variation & SV_ST_MASK);
 	return ((const char *)s);
 }
 
 static void
 identifycpu(void)
 {
 	u_int64_t type, major, minor;
 	u_int64_t amask;
 	struct pcs *pcsp;
 	char *cpuname[] = {
 		"unknown",		/* 0 */
 		"EV3",			/* 1 */
 		"EV4 (21064)",		/* 2 */
 		"Simulation",		/* 3 */
 		"LCA Family",		/* 4 */
 		"EV5 (21164)",		/* 5 */
 		"EV45 (21064A)",	/* 6 */
 		"EV56 (21164A)",	/* 7 */
 		"EV6 (21264)",		/* 8 */
 		"PCA56 (21164PC)"	/* 9 */
 	};
 
 	/*
 	 * print out CPU identification information.
 	 */
 	printf("%s\n%s, %ldMHz\n", platform.family, platform.model,
 	    hwrpb->rpb_cc_freq / 1000000);	/* XXX true for 21164? */
 	printf("%ld byte page size, %d processor%s.\n",
 	    hwrpb->rpb_page_size, ncpus, ncpus == 1 ? "" : "s");
 #if 0
 	/* this isn't defined for any systems that we run on? */
 	printf("serial number 0x%lx 0x%lx\n",
 	    ((long *)hwrpb->rpb_ssn)[0], ((long *)hwrpb->rpb_ssn)[1]);
 
 	/* and these aren't particularly useful! */
 	printf("variation: 0x%lx, revision 0x%lx\n",
 	    hwrpb->rpb_variation, *(long *)hwrpb->rpb_revision);
 #endif
  	pcsp = LOCATE_PCS(hwrpb, hwrpb->rpb_primary_cpu_id);
 	/* cpu type */
 	type = pcsp->pcs_proc_type;
 	major = (type & PCS_PROC_MAJOR) >> PCS_PROC_MAJORSHIFT;
 	minor = (type & PCS_PROC_MINOR) >> PCS_PROC_MINORSHIFT;
 	if (major < sizeof(cpuname)/sizeof(char *))
 		printf("CPU: %s major=%lu minor=%lu",
 			cpuname[major], major, minor);
 	else
 		printf("CPU: major=%lu minor=%lu\n", major, minor);
 	/* amask */
 	if (major >= PCS_PROC_EV56) {
 		amask = 0xffffffff; /* 32 bit for printf */
 		amask = (~alpha_amask(amask)) & amask;
 		printf(" extensions=0x%b\n", (u_int32_t) amask,
 			"\020"
 			"\001BWX"
 			"\002FIX"
 			"\003CIX"
 			"\011MVI"
 			"\012PRECISE"
 		);
 	} else
 		printf("\n");	
 	/* PAL code */
 	printf("OSF PAL rev: 0x%lx\n", pcsp->pcs_palrevisions[PALvar_OSF1]);
 }
 
 extern char kernel_text[], _end[];
 
 void
 alpha_init(pfn, ptb, bim, bip, biv)
 	u_long pfn;		/* first free PFN number */
 	u_long ptb;		/* PFN of current level 1 page table */
 	u_long bim;		/* bootinfo magic */
 	u_long bip;		/* bootinfo pointer */
 	u_long biv;		/* bootinfo version */
 {
 	int phys_avail_cnt;
 	char *bootinfo_msg;
 	vm_offset_t kernstart, kernend;
 	vm_offset_t kernstartpfn, kernendpfn, pfn0, pfn1;
 	struct mddt *mddtp;
 	struct mddt_cluster *memc;
 	int i, mddtweird;
 	int cputype;
 	char *p;
 
 	/* NO OUTPUT ALLOWED UNTIL FURTHER NOTICE */
 
 	/*
 	 * Turn off interrupts (not mchecks) and floating point.
 	 * Make sure the instruction and data streams are consistent.
 	 */
 	(void)alpha_pal_swpipl(ALPHA_PSL_IPL_HIGH);
 	/* alpha_pal_wrfen(0); */
 	ALPHA_TBIA();
 	alpha_pal_imb();
 
 	/*
 	 * Get critical system information (if possible, from the
 	 * information provided by the boot program).
 	 */
 	bootinfo_msg = NULL;
 	if (bim == BOOTINFO_MAGIC) {
 		if (biv == 0) {		/* backward compat */
 			biv = *(u_long *)bip;
 			bip += 8;
 		}
 		switch (biv) {
 		case 1: {
 			struct bootinfo_v1 *v1p = (struct bootinfo_v1 *)bip;
 
 			bootinfo.ssym = v1p->ssym;
 			bootinfo.esym = v1p->esym;
 			bootinfo.kernend = v1p->kernend;
 			bootinfo.modptr = v1p->modptr;
 			bootinfo.envp = v1p->envp;
 			/* hwrpb may not be provided by boot block in v1 */
 			if (v1p->hwrpb != NULL) {
 				bootinfo.hwrpb_phys =
 				    ((struct rpb *)v1p->hwrpb)->rpb_phys;
 				bootinfo.hwrpb_size = v1p->hwrpbsize;
 			} else {
 				bootinfo.hwrpb_phys =
 				    ((struct rpb *)HWRPB_ADDR)->rpb_phys;
 				bootinfo.hwrpb_size =
 				    ((struct rpb *)HWRPB_ADDR)->rpb_size;
 			}
 			bcopy(v1p->boot_flags, bootinfo.boot_flags,
 			    min(sizeof v1p->boot_flags,
 			      sizeof bootinfo.boot_flags));
 			bcopy(v1p->booted_kernel, bootinfo.booted_kernel,
 			    min(sizeof v1p->booted_kernel,
 			      sizeof bootinfo.booted_kernel));
 			/* booted dev not provided in bootinfo */
 			init_prom_interface((struct rpb *)
 			    ALPHA_PHYS_TO_K0SEG(bootinfo.hwrpb_phys));
                 	prom_getenv(PROM_E_BOOTED_DEV, bootinfo.booted_dev,
 			    sizeof bootinfo.booted_dev);
 			break;
 		}
 		default:
 			bootinfo_msg = "unknown bootinfo version";
 			goto nobootinfo;
 		}
 	} else {
 		bootinfo_msg = "boot program did not pass bootinfo";
 	nobootinfo:
 		bootinfo.ssym = (u_long)&_end;
 		bootinfo.esym = (u_long)&_end;
 #ifdef SIMOS
 		{
 			char* p = (char*)bootinfo.ssym + 8;
 			if (p[EI_MAG0] == ELFMAG0
 			    && p[EI_MAG1] == ELFMAG1
 			    && p[EI_MAG2] == ELFMAG2
 			    && p[EI_MAG3] == ELFMAG3) {
 				bootinfo.ssym = (u_long) p;
 				bootinfo.esym = (u_long)p + *(u_long*)(p - 8);
 			}
 		}
 #endif
 		bootinfo.hwrpb_phys = ((struct rpb *)HWRPB_ADDR)->rpb_phys;
 		bootinfo.hwrpb_size = ((struct rpb *)HWRPB_ADDR)->rpb_size;
 		init_prom_interface((struct rpb *)HWRPB_ADDR);
 		prom_getenv(PROM_E_BOOTED_OSFLAGS, bootinfo.boot_flags,
 			    sizeof bootinfo.boot_flags);
 #ifndef SIMOS
 		prom_getenv(PROM_E_BOOTED_FILE, bootinfo.booted_kernel,
 			    sizeof bootinfo.booted_kernel);
 #endif
 		prom_getenv(PROM_E_BOOTED_DEV, bootinfo.booted_dev,
 			    sizeof bootinfo.booted_dev);
 	}
 
 	/*
 	 * Initialize the kernel's mapping of the RPB.  It's needed for
 	 * lots of things.
 	 */
 	hwrpb = (struct rpb *)ALPHA_PHYS_TO_K0SEG(bootinfo.hwrpb_phys);
 
 	/*
 	 * Remember how many cycles there are per microsecond, 
 	 * so that we can use delay().  Round up, for safety.
 	 */
 	cycles_per_usec = (hwrpb->rpb_cc_freq + 999999) / 1000000;
 
 	/*
 	 * Remember how many cycles per closk for coping with missed
 	 * clock interrupts.
 	 */
 	cycles_per_sec = hwrpb->rpb_cc_freq;
 
 	/* Get the loader(8) metadata */
 	preload_metadata = (caddr_t)bootinfo.modptr;
 	kern_envp = bootinfo.envp;
 
 	/* Do basic tuning, hz etc */
 	init_param1();
 
 	/*
 	 * Initalize the (temporary) bootstrap console interface, so
 	 * we can use printf until the VM system starts being setup.
 	 * The real console is initialized before then.
 	 */
 	init_bootstrap_console();
 
 	/* OUTPUT NOW ALLOWED */
 
 	/* delayed from above */
 	if (bootinfo_msg)
 		printf("WARNING: %s (0x%lx, 0x%lx, 0x%lx)\n",
 		       bootinfo_msg, bim, bip, biv);
 
 	/*
 	 * Point interrupt/exception vectors to our own.
 	 */
 	alpha_pal_wrent(XentInt, ALPHA_KENTRY_INT);
 	alpha_pal_wrent(XentArith, ALPHA_KENTRY_ARITH);
 	alpha_pal_wrent(XentMM, ALPHA_KENTRY_MM);
 	alpha_pal_wrent(XentIF, ALPHA_KENTRY_IF);
 	alpha_pal_wrent(XentUna, ALPHA_KENTRY_UNA);
 	alpha_pal_wrent(XentSys, ALPHA_KENTRY_SYS);
 
 	/*
 	 * Clear pending machine checks and error reports, and enable
 	 * system- and processor-correctable error reporting.
 	 */
 	alpha_pal_wrmces(alpha_pal_rdmces() &
 			 ~(ALPHA_MCES_DSC|ALPHA_MCES_DPC));
 
 	/*
 	 * Find out what hardware we're on, and do basic initialization.
 	 */
 	cputype = hwrpb->rpb_type;
 	if (cputype < 0) {
 		/*
 		 * At least some white-box (NT) systems have SRM which
 		 * reports a systype that's the negative of their
 		 * blue-box (UNIX/OVMS) counterpart.
 		 */
 		cputype = -cputype;
 	}
 	
 	if (cputype >= API_ST_BASE) {
 		if (cputype >= napi_cpuinit + API_ST_BASE) {
 			platform_not_supported(cputype);
 			/* NOTREACHED */
 		}
 		cputype -= API_ST_BASE;
 		api_cpuinit[cputype].init(cputype);
 	} else {
 		if (cputype >= ncpuinit) {
 			platform_not_supported(cputype);
 			/* NOTREACHED */
 		}	
 		cpuinit[cputype].init(cputype);
 	}
 	snprintf(cpu_model, sizeof(cpu_model), "%s", platform.model);
 
 	/*
 	 * Initalize the real console, so the the bootstrap console is
 	 * no longer necessary.
 	 */
 	if (platform.cons_init)
 		platform.cons_init();
 
 	/* NO MORE FIRMWARE ACCESS ALLOWED */
 #ifdef _PMAP_MAY_USE_PROM_CONSOLE
 	/*
 	 * XXX (unless _PMAP_MAY_USE_PROM_CONSOLE is defined and
 	 * XXX pmap_uses_prom_console() evaluates to non-zero.)
 	 */
 #endif
 
 	/*
 	 * find out this system's page size
 	 */
 	if (hwrpb->rpb_page_size != PAGE_SIZE)
 		panic("page size %ld != 8192?!", hwrpb->rpb_page_size);
 
 
 	/*
 	 * Find the beginning and end of the kernel (and leave a
 	 * bit of space before the beginning for the bootstrap
 	 * stack).
 	 */
 	kernstart = trunc_page(kernel_text) - 2 * PAGE_SIZE;
 #ifdef DDB
 	ksym_start = (void *)bootinfo.ssym;
 	ksym_end   = (void *)bootinfo.esym;
 	kernend = (vm_offset_t)round_page(ksym_end);
 #else
 	kernend = (vm_offset_t)round_page(_end);
 #endif
 	/* But if the bootstrap tells us otherwise, believe it! */
 	if (bootinfo.kernend)
 		kernend = round_page(bootinfo.kernend);
 
 	p = getenv("kernelname");
 	if (p)
 		strncpy(kernelname, p, sizeof(kernelname) - 1);
 
 	kernstartpfn = atop(ALPHA_K0SEG_TO_PHYS(kernstart));
 	kernendpfn = atop(ALPHA_K0SEG_TO_PHYS(kernend));
 #ifdef SIMOS
 	/* 
 	 * SimOS console puts the bootstrap stack after kernel
 	 */
 	kernendpfn += 4;
 #endif
 
 	/*
 	 * Find out how much memory is available, by looking at
 	 * the memory cluster descriptors.  This also tries to do
 	 * its best to detect things things that have never been seen
 	 * before...
 	 */
 	mddtp = (struct mddt *)(((caddr_t)hwrpb) + hwrpb->rpb_memdat_off);
 
 	/* MDDT SANITY CHECKING */
 	mddtweird = 0;
 	if (mddtp->mddt_cluster_cnt < 2) {
 		mddtweird = 1;
 		printf("WARNING: weird number of mem clusters: %ld\n",
 		       mddtp->mddt_cluster_cnt);
 	}
 
 #ifdef DEBUG_CLUSTER
 	printf("Memory cluster count: %d\n", mddtp->mddt_cluster_cnt);
 #endif
 
 	phys_avail_cnt = 0;
 	for (i = 0; i < mddtp->mddt_cluster_cnt; i++) {
 		memc = &mddtp->mddt_clusters[i];
 #ifdef DEBUG_CLUSTER
 		printf("MEMC %d: pfn 0x%lx cnt 0x%lx usage 0x%lx\n", i,
 		       memc->mddt_pfn, memc->mddt_pg_cnt, memc->mddt_usage);
 #endif
 		totalphysmem += memc->mddt_pg_cnt;
 
 		if (memc->mddt_usage & MDDT_mbz) {
 			mddtweird = 1;
 			printf("WARNING: mem cluster %d has weird "
 			       "usage 0x%lx\n", i, memc->mddt_usage);
 			unknownmem += memc->mddt_pg_cnt;
 			continue;
 		}
 		if (memc->mddt_usage & MDDT_NONVOLATILE) {
 			/* XXX should handle these... */
 			printf("WARNING: skipping non-volatile mem "
 			       "cluster %d\n", i);
 			unusedmem += memc->mddt_pg_cnt;
 			continue;
 		}
 		if (memc->mddt_usage & MDDT_PALCODE) {
 			resvmem += memc->mddt_pg_cnt;
 			continue;
 		}
 
 		/*
 		 * We have a memory cluster available for system
 		 * software use.  We must determine if this cluster
 		 * holds the kernel.
 		 */
 		/*
 		 * XXX If the kernel uses the PROM console, we only use the
 		 * XXX memory after the kernel in the first system segment,
 		 * XXX to avoid clobbering prom mapping, data, etc.
 		 */
 		physmem += memc->mddt_pg_cnt;
 		pfn0 = memc->mddt_pfn;
 		pfn1 = memc->mddt_pfn + memc->mddt_pg_cnt;
 		if (pfn0 <= kernendpfn && kernstartpfn <= pfn1) {
 			/*
 			 * Must compute the location of the kernel
 			 * within the segment.
 			 */
 #ifdef DEBUG_CLUSTER
 			printf("Cluster %d contains kernel\n", i);
 #endif
 			if (!pmap_uses_prom_console()) {
 				if (pfn0 < kernstartpfn) {
 					/*
 					 * There is a chunk before the kernel.
 					 */
 #ifdef DEBUG_CLUSTER
 					printf("Loading chunk before kernel: "
 					       "0x%lx / 0x%lx\n", pfn0, kernstartpfn);
 #endif
 					phys_avail[phys_avail_cnt] = alpha_ptob(pfn0);
 					phys_avail[phys_avail_cnt+1] = alpha_ptob(kernstartpfn);
 					phys_avail_cnt += 2;
 				}
 			}
 			if (kernendpfn < pfn1) {
 				/*
 				 * There is a chunk after the kernel.
 				 */
 #ifdef DEBUG_CLUSTER
 				printf("Loading chunk after kernel: "
 				       "0x%lx / 0x%lx\n", kernendpfn, pfn1);
 #endif
 				phys_avail[phys_avail_cnt] = alpha_ptob(kernendpfn);
 				phys_avail[phys_avail_cnt+1] = alpha_ptob(pfn1);
 				phys_avail_cnt += 2;
 			}
 		} else {
 			/*
 			 * Just load this cluster as one chunk.
 			 */
 #ifdef DEBUG_CLUSTER
 			printf("Loading cluster %d: 0x%lx / 0x%lx\n", i,
 			       pfn0, pfn1);
 #endif
 			phys_avail[phys_avail_cnt] = alpha_ptob(pfn0);
 			phys_avail[phys_avail_cnt+1] = alpha_ptob(pfn1);
 			phys_avail_cnt += 2;
 			
 		}
 	}
 	phys_avail[phys_avail_cnt] = 0;
 
 	/*
 	 * Dump out the MDDT if it looks odd...
 	 */
 	if (mddtweird) {
 		printf("\n");
 		printf("complete memory cluster information:\n");
 		for (i = 0; i < mddtp->mddt_cluster_cnt; i++) {
 			printf("mddt %d:\n", i);
 			printf("\tpfn %lx\n",
 			       mddtp->mddt_clusters[i].mddt_pfn);
 			printf("\tcnt %lx\n",
 			       mddtp->mddt_clusters[i].mddt_pg_cnt);
 			printf("\ttest %lx\n",
 			       mddtp->mddt_clusters[i].mddt_pg_test);
 			printf("\tbva %lx\n",
 			       mddtp->mddt_clusters[i].mddt_v_bitaddr);
 			printf("\tbpa %lx\n",
 			       mddtp->mddt_clusters[i].mddt_p_bitaddr);
 			printf("\tbcksum %lx\n",
 			       mddtp->mddt_clusters[i].mddt_bit_cksum);
 			printf("\tusage %lx\n",
 			       mddtp->mddt_clusters[i].mddt_usage);
 		}
 		printf("\n");
 	}
 
 	Maxmem = physmem;
 
 #ifdef MAXMEM
 	/*
 	 * MAXMEM define is in kilobytes.
 	 */
 	Maxmem = alpha_btop(MAXMEM * 1024);
 #endif
 
 	/*
 	 * hw.physmem is a size in bytes; we also allow k, m, and g suffixes
 	 * for the appropriate modifiers.  This overrides MAXMEM.
 	 */
 	if ((p = getenv("hw.physmem")) != NULL) {
 		u_int64_t AllowMem, sanity;
 		char *ep;
 
 		sanity = AllowMem = strtouq(p, &ep, 0);
 		if ((ep != p) && (*ep != 0)) {
 			switch(*ep) {
 			case 'g':
 			case 'G':
 				AllowMem <<= 10;
 			case 'm':
 			case 'M':
 				AllowMem <<= 10;
 			case 'k':
 			case 'K':
 				AllowMem <<= 10;
 				break;
 			default:
 				AllowMem = sanity = 0;
 			}
 			if (AllowMem < sanity)
 				AllowMem = 0;
 		}
 		if (AllowMem == 0)
 			printf("Ignoring invalid memory size of '%s'\n", p);
 		else
 			Maxmem = alpha_btop(AllowMem);
 	}
 
 	while (physmem > Maxmem) {
 		int i = phys_avail_cnt - 2;
 		size_t sz = alpha_btop(phys_avail[i+1] - phys_avail[i]);
 		size_t nsz;
 		if (physmem - sz > Maxmem) {
 			phys_avail[i] = 0;
 			phys_avail_cnt -= 2;
 		} else {
 			nsz = sz - (physmem - Maxmem);
 			phys_avail[i+1] = phys_avail[i] + alpha_ptob(nsz);
 			physmem -= (sz - nsz);
 		}
 	}
 	init_param2(physmem);
 
 	/*
 	 * Initialize error message buffer (at end of core).
 	 */
 	{
 		size_t sz = round_page(MSGBUF_SIZE);
 		int i = phys_avail_cnt - 2;
 
 		/* shrink so that it'll fit in the last segment */
 		if (phys_avail[i+1] - phys_avail[i] < sz)
 			sz = phys_avail[i+1] - phys_avail[i];
 
 		phys_avail[i+1] -= sz;
 		msgbufp = (struct msgbuf*) ALPHA_PHYS_TO_K0SEG(phys_avail[i+1]);
 
 		msgbufinit(msgbufp, sz);
 
 		/* Remove the last segment if it now has no pages. */
 		if (phys_avail[i] == phys_avail[i+1])
 			phys_avail[i] = 0;
 
 		/* warn if the message buffer had to be shrunk */
 		if (sz != round_page(MSGBUF_SIZE))
 			printf("WARNING: %ld bytes not available for msgbuf in last cluster (%ld used)\n",
 			    round_page(MSGBUF_SIZE), sz);
 
 	}
 
 	/*
 	 * Init mapping for u page(s) for proc 0
 	 */
 	proc0.p_addr = proc0paddr =
 	    (struct user *)pmap_steal_memory(UPAGES * PAGE_SIZE);
 
 	/*
 	 * Initialize the virtual memory system, and set the
 	 * page table base register in proc 0's PCB.
 	 */
 	pmap_bootstrap(ALPHA_PHYS_TO_K0SEG(alpha_ptob(ptb)),
 	    hwrpb->rpb_max_asn);
 
 	/*
 	 * Initialize the rest of proc 0's PCB, and cache its physical
 	 * address.
 	 */
 	proc0.p_md.md_pcbpaddr =
 	    (struct pcb *)ALPHA_K0SEG_TO_PHYS((vm_offset_t)&proc0paddr->u_pcb);
 
 	/*
 	 * Set the kernel sp, reserving space for an (empty) trapframe,
 	 * and make proc0's trapframe pointer point to it for sanity.
 	 */
 	proc0paddr->u_pcb.pcb_hw.apcb_ksp =
 	    (u_int64_t)proc0paddr + USPACE - sizeof(struct trapframe);
 	proc0.p_md.md_tf =
 	    (struct trapframe *)proc0paddr->u_pcb.pcb_hw.apcb_ksp;
 
 	/*
 	 * Initialise entropy pool.
 	 */
 	rand_initialize();
 
 	/*
 	 * Look at arguments passed to us and compute boothowto.
 	 */
 
 #ifdef KADB
 	boothowto |= RB_KDB;
 #endif
 /*	boothowto |= RB_KDB | RB_GDB; */
 	for (p = bootinfo.boot_flags; p && *p != '\0'; p++) {
 		/*
 		 * Note that we'd really like to differentiate case here,
 		 * but the Alpha AXP Architecture Reference Manual
 		 * says that we shouldn't.
 		 */
 		switch (*p) {
 		case 'a': /* autoboot */
 		case 'A':
 			boothowto &= ~RB_SINGLE;
 			break;
 
 #ifdef DEBUG
 		case 'c': /* crash dump immediately after autoconfig */
 		case 'C':
 			boothowto |= RB_DUMP;
 			break;
 #endif
 
 #if defined(DDB)
 		case 'd': /* break into the kernel debugger ASAP */
 		case 'D':
 			boothowto |= RB_KDB;
 			break;
 		case 'g': /* use kernel gdb */
 		case 'G':
 			boothowto |= RB_GDB;
 			break;
 #endif
 
 		case 'h': /* always halt, never reboot */
 		case 'H':
 			boothowto |= RB_HALT;
 			break;
 
 #if 0
 		case 'm': /* mini root present in memory */
 		case 'M':
 			boothowto |= RB_MINIROOT;
 			break;
 #endif
 
 		case 'n': /* askname */
 		case 'N':
 			boothowto |= RB_ASKNAME;
 			break;
 
 		case 's': /* single-user (default, supported for sanity) */
 		case 'S':
 			boothowto |= RB_SINGLE;
 			break;
 
 		case 'v':
 		case 'V':
 			boothowto |= RB_VERBOSE;
 			bootverbose = 1;
 			break;
 
 		default:
 			printf("Unrecognized boot flag '%c'.\n", *p);
 			break;
 		}
 	}
 
 	/*
 	 * Catch case of boot_verbose set in environment.
 	 */
 	if ((p = getenv("boot_verbose")) != NULL) {
 		if (strcmp(p, "yes") == 0 || strcmp(p, "YES") == 0) {
 			boothowto |= RB_VERBOSE;
 			bootverbose = 1;
 		}
 	}
 
 	/*
 	 * Initialize debuggers, and break into them if appropriate.
 	 */
 #ifdef DDB
 	kdb_init();
 	if (boothowto & RB_KDB) {
 		printf("Boot flags requested debugger\n");
 		breakpoint();
 	}
 #endif
 
 	/*
 	 * Figure out the number of cpus in the box, from RPB fields.
 	 * Really.  We mean it.
 	 */
 	for (i = 0; i < hwrpb->rpb_pcs_cnt; i++) {
 		struct pcs *pcsp;
 
 		pcsp = (struct pcs *)((char *)hwrpb + hwrpb->rpb_pcs_off +
 		    (i * hwrpb->rpb_pcs_size));
 		if ((pcsp->pcs_flags & PCS_PP) != 0)
 			ncpus++;
 	}
 
 	/*
 	 * Figure out our clock frequency, from RPB fields.
 	 */
 	hz = hwrpb->rpb_intr_freq >> 12;
 	if (!(60 <= hz && hz <= 10240)) {
 		hz = 1024;
 #ifdef DIAGNOSTIC
 		printf("WARNING: unbelievable rpb_intr_freq: %ld (%d hz)\n",
 			hwrpb->rpb_intr_freq, hz);
 #endif
 	}
 
 	alpha_pal_wrfen(0);
 }
 
 void
 bzero(void *buf, size_t len)
 {
 	caddr_t p = buf;
 
 	while (((vm_offset_t) p & (sizeof(u_long) - 1)) && len) {
 		*p++ = 0;
 		len--;
 	}
 	while (len >= sizeof(u_long) * 8) {
 		*(u_long*) p = 0;
 		*((u_long*) p + 1) = 0;
 		*((u_long*) p + 2) = 0;
 		*((u_long*) p + 3) = 0;
 		len -= sizeof(u_long) * 8;
 		*((u_long*) p + 4) = 0;
 		*((u_long*) p + 5) = 0;
 		*((u_long*) p + 6) = 0;
 		*((u_long*) p + 7) = 0;
 		p += sizeof(u_long) * 8;
 	}
 	while (len >= sizeof(u_long)) {
 		*(u_long*) p = 0;
 		len -= sizeof(u_long);
 		p += sizeof(u_long);
 	}
 	while (len) {
 		*p++ = 0;
 		len--;
 	}
 }
 
 void
 DELAY(int n)
 {
 #ifndef	SIMOS
 	unsigned long pcc0, pcc1, curcycle, cycles;
         int usec;
 
 	if (n == 0)
 		return;
 
         pcc0 = alpha_rpcc() & 0xffffffffUL;
 	cycles = 0;
 	usec = 0;
 
         while (usec <= n) {
 		/*
 		 * Get the next CPU cycle count. The assumption here
 		 * is that we can't have wrapped twice past 32 bits worth
 		 * of CPU cycles since we last checked.
 		 */
 		pcc1 = alpha_rpcc() & 0xffffffffUL;
 		if (pcc1 < pcc0) {
 			curcycle = (pcc1 + 0x100000000UL) - pcc0;
 		} else {
 			curcycle = pcc1 - pcc0;
 		}
 
 		/*
 		 * We now have the number of processor cycles since we
 		 * last checked. Add the current cycle count to the
 		 * running total. If it's over cycles_per_usec, increment
 		 * the usec counter.
 		 */
 		cycles += curcycle;
 		while (cycles > cycles_per_usec) {
 			usec++;
 			cycles -= cycles_per_usec;
 		}
 		pcc0 = pcc1;
         }
 #endif
 }
 
 /*
  * Send an interrupt to process.
  *
  * Stack is set up to allow sigcode stored
  * at top to call routine, followed by kcall
  * to sigreturn routine below.  After sigreturn
  * resets the signal mask, the stack, and the
  * frame pointer, it returns to the user
  * specified pc, psl.
  */
 static void
 osendsig(sig_t catcher, int sig, sigset_t *mask, u_long code)
 {
 	struct proc *p = curproc;
 	osiginfo_t *sip, ksi;
 	struct trapframe *frame;
 	struct sigacts *psp = p->p_sigacts;
 	int oonstack, fsize, rndfsize;
 
 	frame = p->p_md.md_tf;
 	oonstack = p->p_sigstk.ss_flags & SS_ONSTACK;
 	fsize = sizeof ksi;
 	rndfsize = ((fsize + 15) / 16) * 16;
 
 	/*
 	 * Allocate and validate space for the signal handler
 	 * context. Note that if the stack is in P0 space, the
 	 * call to grow() is a nop, and the useracc() check
 	 * will fail if the process has not already allocated
 	 * the space with a `brk'.
 	 */
 	if ((p->p_flag & P_ALTSTACK) && !oonstack &&
 	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
 		sip = (osiginfo_t *)((caddr_t)p->p_sigstk.ss_sp +
 		    p->p_sigstk.ss_size - rndfsize);
 		p->p_sigstk.ss_flags |= SS_ONSTACK;
 	} else
 		sip = (osiginfo_t *)(alpha_pal_rdusp() - rndfsize);
 
 	(void)grow_stack(p, (u_long)sip);
 	if (!useracc((caddr_t)sip, fsize, VM_PROT_WRITE)) {
 		/*
 		 * Process has trashed its stack; give it an illegal
 		 * instruction to halt it in its tracks.
 		 */
 		SIGACTION(p, SIGILL) = SIG_DFL;	
 		SIGDELSET(p->p_sigignore, SIGILL);
 		SIGDELSET(p->p_sigcatch, SIGILL);
 		SIGDELSET(p->p_sigmask, SIGILL);
 		psignal(p, SIGILL);
 		return;
 	}
 
 	/*
 	 * Build the signal context to be used by sigreturn.
 	 */
 	ksi.si_sc.sc_onstack = oonstack;
 	SIG2OSIG(*mask, ksi.si_sc.sc_mask);
 	ksi.si_sc.sc_pc = frame->tf_regs[FRAME_PC];
 	ksi.si_sc.sc_ps = frame->tf_regs[FRAME_PS];
 
 	/* copy the registers. */
 	fill_regs(p, (struct reg *)ksi.si_sc.sc_regs);
 	ksi.si_sc.sc_regs[R_ZERO] = 0xACEDBADE;		/* magic number */
 	ksi.si_sc.sc_regs[R_SP] = alpha_pal_rdusp();
 
 	/* save the floating-point state, if necessary, then copy it. */
 	alpha_fpstate_save(p, 1);		/* XXX maybe write=0 */
 	ksi.si_sc.sc_ownedfp = p->p_md.md_flags & MDP_FPUSED;
 	bcopy(&p->p_addr->u_pcb.pcb_fp, (struct fpreg *)ksi.si_sc.sc_fpregs,
 	    sizeof(struct fpreg));
 	ksi.si_sc.sc_fp_control = p->p_addr->u_pcb.pcb_fp_control;
 	bzero(ksi.si_sc.sc_reserved, sizeof ksi.si_sc.sc_reserved); /* XXX */
 	ksi.si_sc.sc_xxx1[0] = 0;				/* XXX */
 	ksi.si_sc.sc_xxx1[1] = 0;				/* XXX */
 	ksi.si_sc.sc_traparg_a0 = frame->tf_regs[FRAME_TRAPARG_A0];
 	ksi.si_sc.sc_traparg_a1 = frame->tf_regs[FRAME_TRAPARG_A1];
 	ksi.si_sc.sc_traparg_a2 = frame->tf_regs[FRAME_TRAPARG_A2];
 	ksi.si_sc.sc_xxx2[0] = 0;				/* XXX */
 	ksi.si_sc.sc_xxx2[1] = 0;				/* XXX */
 	ksi.si_sc.sc_xxx2[2] = 0;				/* XXX */
 	/* Fill in POSIX parts */
 	ksi.si_signo = sig;
 	ksi.si_code = code;
 	ksi.si_value.sigval_ptr = NULL;				/* XXX */
 
 	/*
 	 * copy the frame out to userland.
 	 */
 	(void) copyout((caddr_t)&ksi, (caddr_t)sip, fsize);
 
 	/*
 	 * Set up the registers to return to sigcode.
 	 */
 	frame->tf_regs[FRAME_PC] = PS_STRINGS - (esigcode - sigcode);
 	frame->tf_regs[FRAME_A0] = sig;
 	if (SIGISMEMBER(p->p_sigacts->ps_siginfo, sig))
 		frame->tf_regs[FRAME_A1] = (u_int64_t)sip;
 	else
 		frame->tf_regs[FRAME_A1] = code;
 	frame->tf_regs[FRAME_A2] = (u_int64_t)&sip->si_sc;
 	frame->tf_regs[FRAME_T12] = (u_int64_t)catcher;	/* t12 is pv */
 	alpha_pal_wrusp((unsigned long)sip);
 }
 
 void
 sendsig(sig_t catcher, int sig, sigset_t *mask, u_long code)
 {
 	struct proc *p = curproc;
 	struct trapframe *frame;
 	struct sigacts *psp = p->p_sigacts;
 	struct sigframe sf, *sfp;
 	int oonstack, rndfsize;
 
 	if (SIGISMEMBER(psp->ps_osigset, sig)) {
 		osendsig(catcher, sig, mask, code);
 		return;
 	}
 
 	frame = p->p_md.md_tf;
 	oonstack = (p->p_sigstk.ss_flags & SS_ONSTACK) ? 1 : 0;
 	rndfsize = ((sizeof(sf) + 15) / 16) * 16;
 
 	/* save user context */
 	bzero(&sf, sizeof(struct sigframe));
 	sf.sf_uc.uc_sigmask = *mask;
 	sf.sf_uc.uc_stack = p->p_sigstk;
 	sf.sf_uc.uc_mcontext.mc_onstack = oonstack;
 
 	fill_regs(p, (struct reg *)sf.sf_uc.uc_mcontext.mc_regs);
 	sf.sf_uc.uc_mcontext.mc_regs[R_SP] = alpha_pal_rdusp();
 	sf.sf_uc.uc_mcontext.mc_regs[R_ZERO] = 0xACEDBADE;   /* magic number */
 	sf.sf_uc.uc_mcontext.mc_regs[R_PS] = frame->tf_regs[FRAME_PS];
 	sf.sf_uc.uc_mcontext.mc_regs[R_PC] = frame->tf_regs[FRAME_PC];
 	sf.sf_uc.uc_mcontext.mc_regs[R_TRAPARG_A0] =
 	    frame->tf_regs[FRAME_TRAPARG_A0];
 	sf.sf_uc.uc_mcontext.mc_regs[R_TRAPARG_A1] =
 	    frame->tf_regs[FRAME_TRAPARG_A1];
 	sf.sf_uc.uc_mcontext.mc_regs[R_TRAPARG_A2] =
 	    frame->tf_regs[FRAME_TRAPARG_A2];
 
 	/*
 	 * Allocate and validate space for the signal handler
 	 * context. Note that if the stack is in P0 space, the
 	 * call to grow() is a nop, and the useracc() check
 	 * will fail if the process has not already allocated
 	 * the space with a `brk'.
 	 */
 	if ((p->p_flag & P_ALTSTACK) != 0 && !oonstack &&
 	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
 		sfp = (struct sigframe *)((caddr_t)p->p_sigstk.ss_sp +
 		    p->p_sigstk.ss_size - rndfsize);
 		p->p_sigstk.ss_flags |= SS_ONSTACK;
 	} else
 		sfp = (struct sigframe *)(alpha_pal_rdusp() - rndfsize);
 
 	(void)grow_stack(p, (u_long)sfp);
 #ifdef DEBUG
 	if ((sigdebug & SDB_KSTACK) && p->p_pid == sigpid)
 		printf("sendsig(%d): sig %d ssp %p usp %p\n", p->p_pid,
 		       sig, &sf, sfp);
 #endif
 	if (!useracc((caddr_t)sfp, sizeof(sf), VM_PROT_WRITE)) {
 #ifdef DEBUG
 		if ((sigdebug & SDB_KSTACK) && p->p_pid == sigpid)
 			printf("sendsig(%d): useracc failed on sig %d\n",
 			       p->p_pid, sig);
 #endif
 		/*
 		 * Process has trashed its stack; give it an illegal
 		 * instruction to halt it in its tracks.
 		 */
 		SIGACTION(p, SIGILL) = SIG_DFL;
 		SIGDELSET(p->p_sigignore, SIGILL);
 		SIGDELSET(p->p_sigcatch, SIGILL);
 		SIGDELSET(p->p_sigmask, SIGILL);
 		psignal(p, SIGILL);
 		return;
 	}
 
 	/* save the floating-point state, if necessary, then copy it. */
 	alpha_fpstate_save(p, 1);
 	sf.sf_uc.uc_mcontext.mc_ownedfp = p->p_md.md_flags & MDP_FPUSED;
 	bcopy(&p->p_addr->u_pcb.pcb_fp,
 	      (struct fpreg *)sf.sf_uc.uc_mcontext.mc_fpregs,
 	      sizeof(struct fpreg));
 	sf.sf_uc.uc_mcontext.mc_fp_control = p->p_addr->u_pcb.pcb_fp_control;
 
 #ifdef COMPAT_OSF1
 	/*
 	 * XXX Create an OSF/1-style sigcontext and associated goo.
 	 */
 #endif
 
 	/*
 	 * copy the frame out to userland.
 	 */
 	(void) copyout((caddr_t)&sf, (caddr_t)sfp, sizeof(sf));
 #ifdef DEBUG
 	if (sigdebug & SDB_FOLLOW)
 		printf("sendsig(%d): sig %d sfp %p code %lx\n", p->p_pid, sig,
 		    sfp, code);
 #endif
 
 	/*
 	 * Set up the registers to return to sigcode.
 	 */
 	frame->tf_regs[FRAME_PC] = PS_STRINGS - (esigcode - sigcode);
 	frame->tf_regs[FRAME_A0] = sig;
 	if (SIGISMEMBER(p->p_sigacts->ps_siginfo, sig)) {
 		frame->tf_regs[FRAME_A1] = (u_int64_t)&(sfp->sf_si);
 
 		/* Fill in POSIX parts */
 		sf.sf_si.si_signo = sig;
 		sf.sf_si.si_code = code;
 		sf.sf_si.si_addr = (void*)frame->tf_regs[FRAME_TRAPARG_A0];
 	}
 	else
 		frame->tf_regs[FRAME_A1] = code;
 
 	frame->tf_regs[FRAME_A2] = (u_int64_t)&(sfp->sf_uc);
 	frame->tf_regs[FRAME_T12] = (u_int64_t)catcher;	/* t12 is pv */
 	alpha_pal_wrusp((unsigned long)sfp);
 
 #ifdef DEBUG
 	if (sigdebug & SDB_FOLLOW)
 		printf("sendsig(%d): pc %lx, catcher %lx\n", p->p_pid,
 		    frame->tf_regs[FRAME_PC], frame->tf_regs[FRAME_A3]);
 	if ((sigdebug & SDB_KSTACK) && p->p_pid == sigpid)
 		printf("sendsig(%d): sig %d returns\n",
 		    p->p_pid, sig);
 #endif
 }
 
 /*
  * System call to cleanup state after a signal
  * has been taken.  Reset signal mask and
  * stack state from context left by sendsig (above).
  * Return to previous pc and psl as specified by
  * context left by sendsig. Check carefully to
  * make sure that the user has not modified the
  * state to gain improper privileges.
  */
 int
 osigreturn(struct proc *p,
 	struct osigreturn_args /* {
 		struct osigcontext *sigcntxp;
 	} */ *uap)
 {
 	struct osigcontext *scp, ksc;
 
 	scp = uap->sigcntxp;
 
 	/*
 	 * Fetch the entire context structure at once for speed.
 	 */
 	if (copyin((caddr_t)scp, (caddr_t)&ksc, sizeof ksc))
 		return (EFAULT);
 
 	/*
 	 * XXX - Should we do this. What if we get a "handcrafted"
 	 * but valid sigcontext that hasn't the magic number?
 	 */
 	if (ksc.sc_regs[R_ZERO] != 0xACEDBADE)		/* magic number */
 		return (EINVAL);
 	/*
 	 * Restore the user-supplied information
 	 */
 	if (ksc.sc_onstack)
 		p->p_sigstk.ss_flags |= SS_ONSTACK;
 	else
 		p->p_sigstk.ss_flags &= ~SS_ONSTACK;
 
 	/*
 	 * longjmp is still implemented by calling osigreturn. The new
 	 * sigmask is stored in sc_reserved, sc_mask is only used for
 	 * backward compatibility.
 	 */
 	SIGSETOLD(p->p_sigmask, ksc.sc_mask);
 	SIG_CANTMASK(p->p_sigmask);
 
 	set_regs(p, (struct reg *)ksc.sc_regs);
 	p->p_md.md_tf->tf_regs[FRAME_PC] = ksc.sc_pc;
 	p->p_md.md_tf->tf_regs[FRAME_PS] =
 	    (ksc.sc_ps | ALPHA_PSL_USERSET) & ~ALPHA_PSL_USERCLR;
 
 	alpha_pal_wrusp(ksc.sc_regs[R_SP]);
 
 	/* XXX ksc.sc_ownedfp ? */
 	alpha_fpstate_drop(p);
 	bcopy((struct fpreg *)ksc.sc_fpregs, &p->p_addr->u_pcb.pcb_fp,
 	    sizeof(struct fpreg));
 	p->p_addr->u_pcb.pcb_fp_control = ksc.sc_fp_control;
 	return (EJUSTRETURN);
 }
 
 int
 sigreturn(struct proc *p,
 	struct sigreturn_args /* {
 		ucontext_t *sigcntxp;
 	} */ *uap)
 {
 	ucontext_t uc, *ucp;
 	struct pcb *pcb;
 	unsigned long val;
 
 	ucp = uap->sigcntxp;
 	pcb = &p->p_addr->u_pcb;
 
 #ifdef DEBUG
 	if (sigdebug & SDB_FOLLOW)
 	    printf("sigreturn: pid %d, scp %p\n", p->p_pid, ucp);
 #endif
 
 	/*
 	 * Fetch the entire context structure at once for speed.
 	 * Note that struct osigcontext is smaller than a ucontext_t,
 	 * so even if copyin() faults, we may have actually gotten a complete
 	 * struct osigcontext.
 	 */
 	if (copyin((caddr_t)ucp, (caddr_t)&uc, sizeof(ucontext_t))) {
 		if (((struct osigcontext*)&uc)->sc_regs[R_ZERO] == 0xACEDBADE)
 			return osigreturn(p, (struct osigreturn_args *)uap);
 		else
 			return (EFAULT);
 	}
 
 	if (((struct osigcontext*)&uc)->sc_regs[R_ZERO] == 0xACEDBADE)
 		return osigreturn(p, (struct osigreturn_args *)uap);
 
 	/*
 	 * Restore the user-supplied information
 	 */
 	set_regs(p, (struct reg *)uc.uc_mcontext.mc_regs);
 	val = (uc.uc_mcontext.mc_regs[R_PS] | ALPHA_PSL_USERSET) &
 	    ~ALPHA_PSL_USERCLR;
 	p->p_md.md_tf->tf_regs[FRAME_PS] = val;
 	p->p_md.md_tf->tf_regs[FRAME_PC] = uc.uc_mcontext.mc_regs[R_PC];
 	alpha_pal_wrusp(uc.uc_mcontext.mc_regs[R_SP]);
 
 	if (uc.uc_mcontext.mc_onstack & 1)
 		p->p_sigstk.ss_flags |= SS_ONSTACK;
 	else
 		p->p_sigstk.ss_flags &= ~SS_ONSTACK;
 
 	p->p_sigmask = uc.uc_sigmask;
 	SIG_CANTMASK(p->p_sigmask);
 
 	/* XXX ksc.sc_ownedfp ? */
 	alpha_fpstate_drop(p);
 	bcopy((struct fpreg *)uc.uc_mcontext.mc_fpregs,
 	      &p->p_addr->u_pcb.pcb_fp, sizeof(struct fpreg));
 	p->p_addr->u_pcb.pcb_fp_control = uc.uc_mcontext.mc_fp_control;
 
 #ifdef DEBUG
 	if (sigdebug & SDB_FOLLOW)
 		printf("sigreturn(%d): returns\n", p->p_pid);
 #endif
 	return (EJUSTRETURN);
 }
 
 /*
  * Machine dependent boot() routine
  *
  * I haven't seen anything to put here yet
  * Possibly some stuff might be grafted back here from boot()
  */
 void
 cpu_boot(int howto)
 {
 }
 
 /*
  * Shutdown the CPU as much as possible
  */
 void
 cpu_halt(void)
 {
 	/*alpha_pal_halt(); */
 	prom_halt(1);
 }
 
 /*
  * Clear registers on exec
  */
 void
 setregs(struct proc *p, u_long entry, u_long stack, u_long ps_strings)
 {
 	struct trapframe *tfp = p->p_md.md_tf;
 
 	bzero(tfp->tf_regs, FRAME_SIZE * sizeof tfp->tf_regs[0]);
 	bzero(&p->p_addr->u_pcb.pcb_fp, sizeof p->p_addr->u_pcb.pcb_fp);
 	p->p_addr->u_pcb.pcb_fp_control = 0;
 	p->p_addr->u_pcb.pcb_fp.fpr_cr = (FPCR_DYN_NORMAL
 					  | FPCR_INVD | FPCR_DZED
 					  | FPCR_OVFD | FPCR_INED
 					  | FPCR_UNFD);
 
 	alpha_pal_wrusp(stack);
 	tfp->tf_regs[FRAME_PS] = ALPHA_PSL_USERSET;
 	tfp->tf_regs[FRAME_PC] = entry & ~3;
 
 	tfp->tf_regs[FRAME_A0] = stack;			/* a0 = sp */
 	tfp->tf_regs[FRAME_A1] = 0;			/* a1 = rtld cleanup */
 	tfp->tf_regs[FRAME_A2] = 0;			/* a2 = rtld object */
 	tfp->tf_regs[FRAME_A3] = PS_STRINGS;		/* a3 = ps_strings */
 	tfp->tf_regs[FRAME_T12] = tfp->tf_regs[FRAME_PC];	/* a.k.a. PV */
 
 	p->p_md.md_flags &= ~MDP_FPUSED;
 	alpha_fpstate_drop(p);
 }
 
 int
 ptrace_set_pc(struct proc *p, unsigned long addr)
 {
 	struct trapframe *tp = p->p_md.md_tf;
 	tp->tf_regs[FRAME_PC] = addr;
 	return 0;
 }
 
 static int
 ptrace_read_int(struct proc *p, vm_offset_t addr, u_int32_t *v)
 {
 	struct iovec iov;
 	struct uio uio;
 	iov.iov_base = (caddr_t) v;
 	iov.iov_len = sizeof(u_int32_t);
 	uio.uio_iov = &iov;
 	uio.uio_iovcnt = 1;
 	uio.uio_offset = (off_t)addr;
 	uio.uio_resid = sizeof(u_int32_t);
 	uio.uio_segflg = UIO_SYSSPACE;
 	uio.uio_rw = UIO_READ;
 	uio.uio_procp = p;
 	return procfs_domem(curproc, p, NULL, &uio);
 }
 
 static int
 ptrace_write_int(struct proc *p, vm_offset_t addr, u_int32_t v)
 {
 	struct iovec iov;
 	struct uio uio;
 	iov.iov_base = (caddr_t) &v;
 	iov.iov_len = sizeof(u_int32_t);
 	uio.uio_iov = &iov;
 	uio.uio_iovcnt = 1;
 	uio.uio_offset = (off_t)addr;
 	uio.uio_resid = sizeof(u_int32_t);
 	uio.uio_segflg = UIO_SYSSPACE;
 	uio.uio_rw = UIO_WRITE;
 	uio.uio_procp = p;
 	return procfs_domem(curproc, p, NULL, &uio);
 }
 
 static u_int64_t
 ptrace_read_register(struct proc *p, int regno)
 {
 	static int reg_to_frame[32] = {
 		FRAME_V0,
 		FRAME_T0,
 		FRAME_T1,
 		FRAME_T2,
 		FRAME_T3,
 		FRAME_T4,
 		FRAME_T5,
 		FRAME_T6,
 		FRAME_T7,
 
 		FRAME_S0,
 		FRAME_S1,
 		FRAME_S2,
 		FRAME_S3,
 		FRAME_S4,
 		FRAME_S5,
 		FRAME_S6,
 
 		FRAME_A0,
 		FRAME_A1,
 		FRAME_A2,
 		FRAME_A3,
 		FRAME_A4,
 		FRAME_A5,
 
 		FRAME_T8,
 		FRAME_T9,
 		FRAME_T10,
 		FRAME_T11,
 		FRAME_RA,
 		FRAME_T12,
 		FRAME_AT,
 		FRAME_GP,
 		FRAME_SP,
 		-1,		/* zero */
 	};
 
 	if (regno == R_ZERO)
 		return 0;
 
 	return p->p_md.md_tf->tf_regs[reg_to_frame[regno]];
 }
 
 
 static int
 ptrace_clear_bpt(struct proc *p, struct mdbpt *bpt)
 {
 	return ptrace_write_int(p, bpt->addr, bpt->contents);
 }
 
 static int
 ptrace_set_bpt(struct proc *p, struct mdbpt *bpt)
 {
 	int error;
 	u_int32_t bpins = 0x00000080;
 	error = ptrace_read_int(p, bpt->addr, &bpt->contents);
 	if (error)
 		return error;
 	return ptrace_write_int(p, bpt->addr, bpins);
 }
 
 int
 ptrace_clear_single_step(struct proc *p)
 {
 	if (p->p_md.md_flags & MDP_STEP2) {
 		ptrace_clear_bpt(p, &p->p_md.md_sstep[1]);
 		ptrace_clear_bpt(p, &p->p_md.md_sstep[0]);
 		p->p_md.md_flags &= ~MDP_STEP2;
 	} else if (p->p_md.md_flags & MDP_STEP1) {
 		ptrace_clear_bpt(p, &p->p_md.md_sstep[0]);
 		p->p_md.md_flags &= ~MDP_STEP1;
 	}
 	return 0;
 }
 
 int
 ptrace_single_step(struct proc *p)
 {
 	int error;
 	vm_offset_t pc = p->p_md.md_tf->tf_regs[FRAME_PC];
 	alpha_instruction ins;
 	vm_offset_t addr[2];	/* places to set breakpoints */
 	int count = 0;		/* count of breakpoints */
 
 	if (p->p_md.md_flags & (MDP_STEP1|MDP_STEP2))
 		panic("ptrace_single_step: step breakpoints not removed");
 
 	error = ptrace_read_int(p, pc, &ins.bits);
 	if (error)
 		return error;
 
 	switch (ins.branch_format.opcode) {
 
 	case op_j:
 		/* Jump: target is register value */
 		addr[0] = ptrace_read_register(p, ins.jump_format.rs) & ~3;
 		count = 1;
 		break;
 
 	case op_br:
 	case op_fbeq:
 	case op_fblt:
 	case op_fble:
 	case op_bsr:
 	case op_fbne:
 	case op_fbge:
 	case op_fbgt:
 	case op_blbc:
 	case op_beq:
 	case op_blt:
 	case op_ble:
 	case op_blbs:
 	case op_bne:
 	case op_bge:
 	case op_bgt:
 		/* Branch: target is pc+4+4*displacement */
 		addr[0] = pc + 4;
 		addr[1] = pc + 4 + 4 * ins.branch_format.displacement;
 		count = 2;
 		break;
 
 	default:
 		addr[0] = pc + 4;
 		count = 1;
 	}
 
 	p->p_md.md_sstep[0].addr = addr[0];
 	error = ptrace_set_bpt(p, &p->p_md.md_sstep[0]);
 	if (error)
 		return error;
 	if (count == 2) {
 		p->p_md.md_sstep[1].addr = addr[1];
 		error = ptrace_set_bpt(p, &p->p_md.md_sstep[1]);
 		if (error) {
 			ptrace_clear_bpt(p, &p->p_md.md_sstep[0]);
 			return error;
 		}
 		p->p_md.md_flags |= MDP_STEP2;
 	} else
 		p->p_md.md_flags |= MDP_STEP1;
 
 	return 0;
 }
 
 int ptrace_read_u_check(p, addr, len)
 	struct proc *p;
 	vm_offset_t addr;
 	size_t len;
 {
 	vm_offset_t gap;
 
 	if ((vm_offset_t) (addr + len) < addr)
 		return EPERM;
 	if ((vm_offset_t) (addr + len) <= sizeof(struct user))
 		return 0;
 
 	gap = (char *) p->p_md.md_tf - (char *) p->p_addr;
 	
 	if ((vm_offset_t) addr < gap)
 		return EPERM;
 	if ((vm_offset_t) (addr + len) <= 
 	    (vm_offset_t) (gap + sizeof(struct trapframe)))
 		return 0;
 	return EPERM;
 }
 
 int
 ptrace_write_u(struct proc *p, vm_offset_t off, long data)
 {
 	vm_offset_t min;
 #if 0
 	struct trapframe frame_copy;
 	struct trapframe *tp;
 #endif
 
 	/*
 	 * Privileged kernel state is scattered all over the user area.
 	 * Only allow write access to parts of regs and to fpregs.
 	 */
 	min = (char *)p->p_md.md_tf - (char *)p->p_addr;
 	if (off >= min && off <= min + sizeof(struct trapframe) - sizeof(int)) {
 #if 0
 		tp = p->p_md.md_tf;
 		frame_copy = *tp;
 		*(int *)((char *)&frame_copy + (off - min)) = data;
 		if (!EFLAGS_SECURE(frame_copy.tf_eflags, tp->tf_eflags) ||
 		    !CS_SECURE(frame_copy.tf_cs))
 			return (EINVAL);
 #endif
 		*(int*)((char *)p->p_addr + off) = data;
 		return (0);
 	}
 	min = offsetof(struct user, u_pcb) + offsetof(struct pcb, pcb_fp);
 	if (off >= min && off <= min + sizeof(struct fpreg) - sizeof(int)) {
 		*(int*)((char *)p->p_addr + off) = data;
 		return (0);
 	}
 	return (EFAULT);
 }
 
 int
 alpha_pa_access(vm_offset_t pa)
 {
 #if 0
 	int i;
 
 	for (i = 0; phys_avail[i] != 0; i += 2) {
 		if (pa < phys_avail[i])
 			continue;
 		if (pa < phys_avail[i+1])
 			return VM_PROT_READ|VM_PROT_WRITE;
 	}
 	return 0;
 #else
 	return VM_PROT_READ|VM_PROT_WRITE;
 #endif
 }
 
 int
 fill_regs(p, regs)
 	struct proc *p;
 	struct reg *regs;
 {
 	struct pcb *pcb = &p->p_addr->u_pcb;
 	struct trapframe *tp = p->p_md.md_tf;
 
 	tp = p->p_md.md_tf;
  
 #define C(r)	regs->r_regs[R_ ## r] = tp->tf_regs[FRAME_ ## r]
 
 	C(V0);
 	C(T0); C(T1); C(T2); C(T3); C(T4); C(T5); C(T6); C(T7);
 	C(S0); C(S1); C(S2); C(S3); C(S4); C(S5); C(S6);
 	C(A0); C(A1); C(A2); C(A3); C(A4); C(A5);
 	C(T8); C(T9); C(T10); C(T11);
 	C(RA); C(T12); C(AT); C(GP);
 
 #undef C
 
 	regs->r_regs[R_ZERO] = tp->tf_regs[FRAME_PC];
 	regs->r_regs[R_SP] = pcb->pcb_hw.apcb_usp;
 
 	return (0);
 }
 
 int
 set_regs(p, regs)
 	struct proc *p;
 	struct reg *regs;
 {
 	struct pcb *pcb = &p->p_addr->u_pcb;
 	struct trapframe *tp = p->p_md.md_tf;
 
 	tp = p->p_md.md_tf;
 
 #define C(r)	tp->tf_regs[FRAME_ ## r] = regs->r_regs[R_ ## r]
 
 	C(V0);
 	C(T0); C(T1); C(T2); C(T3); C(T4); C(T5); C(T6); C(T7);
 	C(S0); C(S1); C(S2); C(S3); C(S4); C(S5); C(S6);
 	C(A0); C(A1); C(A2); C(A3); C(A4); C(A5);
 	C(T8); C(T9); C(T10); C(T11);
 	C(RA); C(T12); C(AT); C(GP);
 
 #undef C
 
 	tp->tf_regs[FRAME_PC] = regs->r_regs[R_ZERO];
 	pcb->pcb_hw.apcb_usp = regs->r_regs[R_SP];
 
 	return (0);
 }
 
 int
 fill_fpregs(p, fpregs)
 	struct proc *p;
 	struct fpreg *fpregs;
 {
 	alpha_fpstate_save(p, 0);
 
 	bcopy(&p->p_addr->u_pcb.pcb_fp, fpregs, sizeof *fpregs);
 	return (0);
 }
 
 int
 set_fpregs(p, fpregs)
 	struct proc *p;
 	struct fpreg *fpregs;
 {
 	alpha_fpstate_drop(p);
 
 	bcopy(fpregs, &p->p_addr->u_pcb.pcb_fp, sizeof *fpregs);
 	return (0);
 }
 
 #ifndef DDB
 void
 Debugger(const char *msg)
 {
 	printf("Debugger(\"%s\") called.\n", msg);
 }
 #endif /* no DDB */
 
 #include <sys/disklabel.h>
 
 /*
  * Determine the size of the transfer, and make sure it is
  * within the boundaries of the partition. Adjust transfer
  * if needed, and signal errors or early completion.
  */
 int
 bounds_check_with_label(struct buf *bp, struct disklabel *lp, int wlabel)
 {
 #if 0
         struct partition *p = lp->d_partitions + dkpart(bp->b_dev);
         int labelsect = lp->d_partitions[0].p_offset;
         int maxsz = p->p_size,
                 sz = (bp->b_bcount + DEV_BSIZE - 1) >> DEV_BSHIFT;
 
         /* overwriting disk label ? */
         /* XXX should also protect bootstrap in first 8K */
         if (bp->b_blkno + p->p_offset <= LABELSECTOR + labelsect &&
 #if LABELSECTOR != 0
             bp->b_blkno + p->p_offset + sz > LABELSECTOR + labelsect &&
 #endif
             (bp->b_flags & B_READ) == 0 && wlabel == 0) {
                 bp->b_error = EROFS;
                 goto bad;
         }
 
 #if     defined(DOSBBSECTOR) && defined(notyet)
         /* overwriting master boot record? */
         if (bp->b_blkno + p->p_offset <= DOSBBSECTOR &&
             (bp->b_flags & B_READ) == 0 && wlabel == 0) {
                 bp->b_error = EROFS;
                 goto bad;
         }
 #endif
 
         /* beyond partition? */
         if (bp->b_blkno < 0 || bp->b_blkno + sz > maxsz) {
                 /* if exactly at end of disk, return an EOF */
                 if (bp->b_blkno == maxsz) {
                         bp->b_resid = bp->b_bcount;
                         return(0);
                 }
                 /* or truncate if part of it fits */
                 sz = maxsz - bp->b_blkno;
                 if (sz <= 0) {
                         bp->b_error = EINVAL;
                         goto bad;
                 }
                 bp->b_bcount = sz << DEV_BSHIFT;
         }
 
         bp->b_pblkno = bp->b_blkno + p->p_offset;
         return(1);
 
 bad:
         bp->b_flags |= B_ERROR;
 #endif
         return(-1);
 
 }
 
 static int
 sysctl_machdep_adjkerntz(SYSCTL_HANDLER_ARGS)
 {
 	int error;
 	error = sysctl_handle_int(oidp, oidp->oid_arg1, oidp->oid_arg2,
 		req);
 	if (!error && req->newptr)
 		resettodr();
 	return (error);
 }
 
 SYSCTL_PROC(_machdep, CPU_ADJKERNTZ, adjkerntz, CTLTYPE_INT|CTLFLAG_RW,
 	&adjkerntz, 0, sysctl_machdep_adjkerntz, "I", "");
 
 SYSCTL_INT(_machdep, CPU_DISRTCSET, disable_rtc_set,
 	CTLFLAG_RW, &disable_rtc_set, 0, "");
 
 SYSCTL_INT(_machdep, CPU_WALLCLOCK, wall_cmos_clock,
 	CTLFLAG_RW, &wall_cmos_clock, 0, "");
 
 void
 alpha_fpstate_check(struct proc *p)
 {
 	if (p->p_addr->u_pcb.pcb_hw.apcb_flags & ALPHA_PCB_FLAGS_FEN)
 		if (p != fpcurproc)
 			panic("alpha_check_fpcurproc: bogus");
 }
 
 #define SET_FEN(p) \
 	(p)->p_addr->u_pcb.pcb_hw.apcb_flags |= ALPHA_PCB_FLAGS_FEN
 
 #define CLEAR_FEN(p) \
 	(p)->p_addr->u_pcb.pcb_hw.apcb_flags &= ~ALPHA_PCB_FLAGS_FEN
 
 /*
  * Save the floating point state in the pcb. Use this to get read-only
  * access to the floating point state. If write is true, the current
  * fp process is cleared so that fp state can safely be modified. The
  * process will automatically reload the changed state by generating a 
  * FEN trap.
  */
 void
 alpha_fpstate_save(struct proc *p, int write)
 {
 	if (p == fpcurproc) {
 		/*
 		 * If curproc != fpcurproc, then we need to enable FEN 
 		 * so that we can dump the fp state.
 		 */
 		alpha_pal_wrfen(1);
 
 		/*
 		 * Save the state in the pcb.
 		 */
 		savefpstate(&p->p_addr->u_pcb.pcb_fp);
 
 		if (write) {
 			/*
 			 * If fpcurproc == curproc, just ask the
 			 * PALcode to disable FEN, otherwise we must
 			 * clear the FEN bit in fpcurproc's pcb.
 			 */
 			if (fpcurproc == curproc)
 				alpha_pal_wrfen(0);
 			else
 				CLEAR_FEN(fpcurproc);
 			fpcurproc = NULL;
 		} else {
 			/*
 			 * Make sure that we leave FEN enabled if
 			 * curproc == fpcurproc. We must have at most
 			 * one process with FEN enabled. Note that FEN 
 			 * must already be set in fpcurproc's pcb.
 			 */
 			if (curproc != fpcurproc)
 				alpha_pal_wrfen(0);
 		}
 	}
 }
 
 /*
  * Relinquish ownership of the FP state. This is called instead of
  * alpha_save_fpstate() if the entire FP state is being changed
  * (e.g. on sigreturn).
  */
 void
 alpha_fpstate_drop(struct proc *p)
 {
 	if (p == fpcurproc) {
 		if (p == curproc) {
 			/*
 			 * Disable FEN via the PALcode. This will
 			 * clear the bit in the pcb as well.
 			 */
 			alpha_pal_wrfen(0);
 		} else {
 			/*
 			 * Clear the FEN bit of the pcb.
 			 */
 			CLEAR_FEN(p);
 		}
 		fpcurproc = NULL;
 	}
 }
 
 /*
  * Switch the current owner of the fp state to p, reloading the state
  * from the pcb.
  */
 void
 alpha_fpstate_switch(struct proc *p)
 {
 	/*
 	 * Enable FEN so that we can access the fp registers.
 	 */
 	alpha_pal_wrfen(1);
 	if (fpcurproc) {
 		/*
 		 * Dump the old fp state if its valid.
 		 */
 		savefpstate(&fpcurproc->p_addr->u_pcb.pcb_fp);
 		CLEAR_FEN(fpcurproc);
 	}
 
 	/*
 	 * Remember the new FP owner and reload its state.
 	 */
 	fpcurproc = p;
 	restorefpstate(&fpcurproc->p_addr->u_pcb.pcb_fp);
 
 	/*
 	 * If the new owner is curproc, leave FEN enabled, otherwise
 	 * mark its PCB so that it gets FEN when we context switch to
 	 * it later.
 	 */
 	if (p != curproc) {
 		alpha_pal_wrfen(0);
 		SET_FEN(p);
 	}
 
 	p->p_md.md_flags |= MDP_FPUSED;
 }
Index: stable/4/sys/i386/i386/machdep.c
===================================================================
--- stable/4/sys/i386/i386/machdep.c	(revision 118739)
+++ stable/4/sys/i386/i386/machdep.c	(revision 118740)
@@ -1,2612 +1,2613 @@
 /*-
  * Copyright (c) 1992 Terrence R. Lambert.
  * Copyright (c) 1982, 1987, 1990 The Regents of the University of California.
  * All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * William Jolitz.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	from: @(#)machdep.c	7.4 (Berkeley) 6/3/91
  * $FreeBSD$
  */
 
 #include "apm.h"
 #include "ether.h"
 #include "npx.h"
 #include "opt_atalk.h"
 #include "opt_compat.h"
 #include "opt_cpu.h"
 #include "opt_ddb.h"
 #include "opt_directio.h"
 #include "opt_inet.h"
 #include "opt_ipx.h"
 #include "opt_maxmem.h"
 #include "opt_msgbuf.h"
 #include "opt_perfmon.h"
 #include "opt_swap.h"
 #include "opt_user_ldt.h"
 #include "opt_userconfig.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/sysproto.h>
 #include <sys/signalvar.h>
 #include <sys/kernel.h>
 #include <sys/linker.h>
 #include <sys/malloc.h>
 #include <sys/proc.h>
 #include <sys/buf.h>
 #include <sys/reboot.h>
 #include <sys/callout.h>
 #include <sys/mbuf.h>
 #include <sys/msgbuf.h>
 #include <sys/sysent.h>
 #include <sys/sysctl.h>
 #include <sys/vmmeter.h>
 #include <sys/bus.h>
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <sys/lock.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_object.h>
 #include <vm/vm_page.h>
 #include <vm/vm_map.h>
 #include <vm/vm_pager.h>
 #include <vm/vm_extern.h>
 
 #include <sys/user.h>
 #include <sys/exec.h>
 #include <sys/cons.h>
 
 #include <ddb/ddb.h>
 
 #include <net/netisr.h>
 
 #include <machine/cpu.h>
 #include <machine/reg.h>
 #include <machine/clock.h>
 #include <machine/specialreg.h>
 #include <machine/bootinfo.h>
 #include <machine/ipl.h>
 #include <machine/md_var.h>
 #include <machine/pcb_ext.h>		/* pcb.h included via sys/user.h */
 #ifdef SMP
 #include <machine/smp.h>
 #include <machine/globaldata.h>
 #endif
 #ifdef PERFMON
 #include <machine/perfmon.h>
 #endif
 #include <machine/cputypes.h>
 
 #ifdef OLD_BUS_ARCH
 #include <i386/isa/isa_device.h>
 #endif
 #include <i386/isa/intr_machdep.h>
 #include <isa/rtc.h>
 #include <machine/vm86.h>
 #include <sys/random.h>
 #include <sys/ptrace.h>
 #include <machine/sigframe.h>
 
 extern void init386 __P((int first));
 extern void dblfault_handler __P((void));
 
 extern void printcpuinfo(void);	/* XXX header file */
 extern void finishidentcpu(void);
 extern void panicifcpuunsupported(void);
 extern void initializecpu(void);
 
 static void cpu_startup __P((void *));
 #ifdef CPU_ENABLE_SSE
 static void set_fpregs_xmm __P((struct save87 *, struct savexmm *));
 static void fill_fpregs_xmm __P((struct savexmm *, struct save87 *));
 #endif /* CPU_ENABLE_SSE */
 #ifdef DIRECTIO
 extern void ffs_rawread_setup(void);
 #endif /* DIRECTIO */
 
 SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL)
 
 static MALLOC_DEFINE(M_MBUF, "mbuf", "mbuf");
 
 int	_udatasel, _ucodesel;
 u_int	atdevbase;
 
 #if defined(SWTCH_OPTIM_STATS)
 extern int swtch_optim_stats;
 SYSCTL_INT(_debug, OID_AUTO, swtch_optim_stats,
 	CTLFLAG_RD, &swtch_optim_stats, 0, "");
 SYSCTL_INT(_debug, OID_AUTO, tlb_flush_count,
 	CTLFLAG_RD, &tlb_flush_count, 0, "");
 #endif
 
 #ifdef PC98
 static int	ispc98 = 1;
 #else
 static int	ispc98 = 0;
 #endif
 SYSCTL_INT(_machdep, OID_AUTO, ispc98, CTLFLAG_RD, &ispc98, 0, "");
 
 int physmem = 0;
 int cold = 1;
 
 static int
 sysctl_hw_physmem(SYSCTL_HANDLER_ARGS)
 {
 	int error = sysctl_handle_int(oidp, 0, ctob(physmem), req);
 	return (error);
 }
 
 SYSCTL_PROC(_hw, HW_PHYSMEM, physmem, CTLTYPE_INT|CTLFLAG_RD,
 	0, 0, sysctl_hw_physmem, "IU", "");
 
 static int
 sysctl_hw_usermem(SYSCTL_HANDLER_ARGS)
 {
 	int error = sysctl_handle_int(oidp, 0,
 		ctob(physmem - cnt.v_wire_count), req);
 	return (error);
 }
 
 SYSCTL_PROC(_hw, HW_USERMEM, usermem, CTLTYPE_INT|CTLFLAG_RD,
 	0, 0, sysctl_hw_usermem, "IU", "");
 
 static int
 sysctl_hw_availpages(SYSCTL_HANDLER_ARGS)
 {
 	int error = sysctl_handle_int(oidp, 0,
 		i386_btop(avail_end - avail_start), req);
 	return (error);
 }
 
 SYSCTL_PROC(_hw, OID_AUTO, availpages, CTLTYPE_INT|CTLFLAG_RD,
 	0, 0, sysctl_hw_availpages, "I", "");
 
 static int
 sysctl_machdep_msgbuf(SYSCTL_HANDLER_ARGS)
 {
 	int error;
 
 	/* Unwind the buffer, so that it's linear (possibly starting with
 	 * some initial nulls).
 	 */
 	error=sysctl_handle_opaque(oidp,msgbufp->msg_ptr+msgbufp->msg_bufr,
 		msgbufp->msg_size-msgbufp->msg_bufr,req);
 	if(error) return(error);
 	if(msgbufp->msg_bufr>0) {
 		error=sysctl_handle_opaque(oidp,msgbufp->msg_ptr,
 			msgbufp->msg_bufr,req);
 	}
 	return(error);
 }
 
 SYSCTL_PROC(_machdep, OID_AUTO, msgbuf, CTLTYPE_STRING|CTLFLAG_RD,
 	0, 0, sysctl_machdep_msgbuf, "A","Contents of kernel message buffer");
 
 static int msgbuf_clear;
 
 static int
 sysctl_machdep_msgbuf_clear(SYSCTL_HANDLER_ARGS)
 {
 	int error;
 	error = sysctl_handle_int(oidp, oidp->oid_arg1, oidp->oid_arg2,
 		req);
 	if (!error && req->newptr) {
 		/* Clear the buffer and reset write pointer */
 		bzero(msgbufp->msg_ptr,msgbufp->msg_size);
 		msgbufp->msg_bufr=msgbufp->msg_bufx=0;
 		msgbuf_clear=0;
 	}
 	return (error);
 }
 
 SYSCTL_PROC(_machdep, OID_AUTO, msgbuf_clear, CTLTYPE_INT|CTLFLAG_RW,
 	&msgbuf_clear, 0, sysctl_machdep_msgbuf_clear, "I",
 	"Clear kernel message buffer");
 
 int bootverbose = 0, Maxmem = 0;
 long dumplo;
 
 vm_paddr_t phys_avail[10];
 
 /* must be 2 less so 0 0 can signal end of chunks */
 #define PHYS_AVAIL_ARRAY_END ((sizeof(phys_avail) / sizeof(vm_offset_t)) - 2)
 
 static vm_offset_t buffer_sva, buffer_eva;
 vm_offset_t clean_sva, clean_eva;
 static vm_offset_t pager_sva, pager_eva;
 static struct trapframe proc0_tf;
 
 static void
 cpu_startup(dummy)
 	void *dummy;
 {
 	register unsigned i;
 	register caddr_t v;
 	vm_offset_t maxaddr;
 	vm_size_t size = 0;
 	int firstaddr;
 	vm_offset_t minaddr;
 
 	if (boothowto & RB_VERBOSE)
 		bootverbose++;
 
 	/*
 	 * Good {morning,afternoon,evening,night}.
 	 */
 	printf("%s", version);
 	startrtclock();
 	printcpuinfo();
 	panicifcpuunsupported();
 #ifdef PERFMON
 	perfmon_init();
 #endif
 	printf("real memory  = %llu (%lluK bytes)\n",
 	    ptoa((u_int64_t)Maxmem), ptoa((u_int64_t)Maxmem) / 1024);
 	/*
 	 * Display any holes after the first chunk of extended memory.
 	 */
 	if (bootverbose) {
 		int indx;
 
 		printf("Physical memory chunk(s):\n");
 		for (indx = 0; phys_avail[indx + 1] != 0; indx += 2) {
 			vm_paddr_t size1;
 
 			size1 = phys_avail[indx + 1] - phys_avail[indx];
 			printf("0x%09llx - 0x%09llx, %llu bytes (%llu pages)\n",
 			    (u_int64_t)phys_avail[indx],
 			    (u_int64_t)phys_avail[indx + 1] - 1,
 			    (u_int64_t)size1,
 			    (u_int64_t)size1 / PAGE_SIZE);
 		}
 	}
 
 	/*
 	 * Calculate callout wheel size
 	 */
 	for (callwheelsize = 1, callwheelbits = 0;
 	     callwheelsize < ncallout;
 	     callwheelsize <<= 1, ++callwheelbits)
 		;
 	callwheelmask = callwheelsize - 1;
 
 	/*
 	 * Allocate space for system data structures.
 	 * The first available kernel virtual address is in "v".
 	 * As pages of kernel virtual memory are allocated, "v" is incremented.
 	 * As pages of memory are allocated and cleared,
 	 * "firstaddr" is incremented.
 	 * An index into the kernel page table corresponding to the
 	 * virtual memory address maintained in "v" is kept in "mapaddr".
 	 */
 
 	/*
 	 * Make two passes.  The first pass calculates how much memory is
 	 * needed and allocates it.  The second pass assigns virtual
 	 * addresses to the various data structures.
 	 */
 	firstaddr = 0;
 again:
 	v = (caddr_t)firstaddr;
 
 #define	valloc(name, type, num) \
 	    (name) = (type *)v; v = (caddr_t)((name)+(num))
 #define	valloclim(name, type, num, lim) \
 	    (name) = (type *)v; v = (caddr_t)((lim) = ((name)+(num)))
 
 	valloc(callout, struct callout, ncallout);
 	valloc(callwheel, struct callout_tailq, callwheelsize);
 
 	/*
 	 * The nominal buffer size (and minimum KVA allocation) is BKVASIZE.
 	 * For the first 64MB of ram nominally allocate sufficient buffers to
 	 * cover 1/4 of our ram.  Beyond the first 64MB allocate additional
 	 * buffers to cover 1/20 of our ram over 64MB.  When auto-sizing
 	 * the buffer cache we limit the eventual kva reservation to
 	 * maxbcache bytes.
 	 *
 	 * factor represents the 1/4 x ram conversion.
 	 */
 	if (nbuf == 0) {
 		int factor = 4 * BKVASIZE / 1024;
 		int kbytes = physmem * (PAGE_SIZE / 1024);
 
 		nbuf = 50;
 		if (kbytes > 4096)
 			nbuf += min((kbytes - 4096) / factor, 65536 / factor);
 		if (kbytes > 65536)
 			nbuf += (kbytes - 65536) * 2 / (factor * 5);
 		if (maxbcache && nbuf > maxbcache / BKVASIZE)
 			nbuf = maxbcache / BKVASIZE;
 	}
 
 	/*
 	 * Do not allow the buffer_map to be more then 1/2 the size of the
 	 * kernel_map.
 	 */
 	if (nbuf > (kernel_map->max_offset - kernel_map->min_offset) / 
 	    (BKVASIZE * 2)) {
 		nbuf = (kernel_map->max_offset - kernel_map->min_offset) / 
 		    (BKVASIZE * 2);
 		printf("Warning: nbufs capped at %d\n", nbuf);
 	}
 
 	nswbuf = max(min(nbuf/4, 256), 16);
 #ifdef NSWBUF_MIN
 	if (nswbuf < NSWBUF_MIN)
 		nswbuf = NSWBUF_MIN;
 #endif
 #ifdef DIRECTIO
 	ffs_rawread_setup();
 #endif
 
 	valloc(swbuf, struct buf, nswbuf);
 	valloc(buf, struct buf, nbuf);
 	v = bufhashinit(v);
 
 	/*
 	 * End of first pass, size has been calculated so allocate memory
 	 */
 	if (firstaddr == 0) {
 		size = (vm_size_t)(v - firstaddr);
 		firstaddr = (int)kmem_alloc(kernel_map, round_page(size));
 		if (firstaddr == 0)
 			panic("startup: no room for tables");
 		goto again;
 	}
 
 	/*
 	 * End of second pass, addresses have been assigned
 	 */
 	if ((vm_size_t)(v - firstaddr) != size)
 		panic("startup: table size inconsistency");
 
 	clean_map = kmem_suballoc(kernel_map, &clean_sva, &clean_eva,
 			(nbuf*BKVASIZE) + (nswbuf*MAXPHYS) + pager_map_size);
 	buffer_map = kmem_suballoc(clean_map, &buffer_sva, &buffer_eva,
 				(nbuf*BKVASIZE));
 	buffer_map->system_map = 1;
 	pager_map = kmem_suballoc(clean_map, &pager_sva, &pager_eva,
 				(nswbuf*MAXPHYS) + pager_map_size);
 	pager_map->system_map = 1;
 	exec_map = kmem_suballoc(kernel_map, &minaddr, &maxaddr,
 				(16*(ARG_MAX+(PAGE_SIZE*3))));
 
 	/*
 	 * Finally, allocate mbuf pool.  Since mclrefcnt is an off-size
 	 * we use the more space efficient malloc in place of kmem_alloc.
 	 */
 	{
 		vm_offset_t mb_map_size;
 
 		mb_map_size = nmbufs * MSIZE + nmbclusters * MCLBYTES;
 		mb_map_size = roundup2(mb_map_size, max(MCLBYTES, PAGE_SIZE));
 		mclrefcnt = malloc(mb_map_size / MCLBYTES, M_MBUF, M_NOWAIT);
 		bzero(mclrefcnt, mb_map_size / MCLBYTES);
 		mb_map = kmem_suballoc(kmem_map, (vm_offset_t *)&mbutl, &maxaddr,
 			mb_map_size);
 		mb_map->system_map = 1;
+		mbutltop = mbutl;
 	}
 
 	/*
 	 * Initialize callouts
 	 */
 	SLIST_INIT(&callfree);
 	for (i = 0; i < ncallout; i++) {
 		callout_init(&callout[i]);
 		callout[i].c_flags = CALLOUT_LOCAL_ALLOC;
 		SLIST_INSERT_HEAD(&callfree, &callout[i], c_links.sle);
 	}
 
 	for (i = 0; i < callwheelsize; i++) {
 		TAILQ_INIT(&callwheel[i]);
 	}
 
 #if defined(USERCONFIG)
 	userconfig();
 	cninit();		/* the preferred console may have changed */
 #endif
 
 	printf("avail memory = %llu (%lluK bytes)\n",
 	    ptoa((u_int64_t)cnt.v_free_count),
 	    ptoa((u_int64_t)cnt.v_free_count) / 1024);
 
 	/*
 	 * Set up buffers, so they can be used to read disk labels.
 	 */
 	bufinit();
 	vm_pager_bufferinit();
 
 #ifdef SMP
 	/*
 	 * OK, enough kmem_alloc/malloc state should be up, lets get on with it!
 	 */
 	mp_start();			/* fire up the APs and APICs */
 	mp_announce();
 #endif  /* SMP */
 	cpu_setregs();
 }
 
 int
 register_netisr(num, handler)
 	int num;
 	netisr_t *handler;
 {
 	
 	if (num < 0 || num >= (sizeof(netisrs)/sizeof(*netisrs)) ) {
 		printf("register_netisr: bad isr number: %d\n", num);
 		return (EINVAL);
 	}
 	netisrs[num] = handler;
 	return (0);
 }
 
 int
 unregister_netisr(num)
 	int num;
 {
 
 	if (num < 0 || num >= (sizeof(netisrs)/sizeof(*netisrs)) ) {
 		printf("unregister_netisr: bad isr number: %d\n", num);
 		return (EINVAL);
 	}
 	netisrs[num] = NULL;
 	return (0);
 }
 
 /*
  * Send an interrupt to process.
  *
  * Stack is set up to allow sigcode stored
  * at top to call routine, followed by kcall
  * to sigreturn routine below.  After sigreturn
  * resets the signal mask, the stack, and the
  * frame pointer, it returns to the user
  * specified pc, psl.
  */
 static void
 osendsig(sig_t catcher, int sig, sigset_t *mask, u_long code)
 {
 	register struct proc *p = curproc;
 	register struct trapframe *regs;
 	register struct osigframe *fp;
 	struct osigframe sf;
 	struct sigacts *psp = p->p_sigacts;
 	int oonstack;
 
 	regs = p->p_md.md_regs;
 	oonstack = (p->p_sigstk.ss_flags & SS_ONSTACK) ? 1 : 0;
 
 	/* Allocate and validate space for the signal handler context. */
 	if ((p->p_flag & P_ALTSTACK) && !oonstack &&
 	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
 		fp = (struct osigframe *)(p->p_sigstk.ss_sp +
 		    p->p_sigstk.ss_size - sizeof(struct osigframe));
 		p->p_sigstk.ss_flags |= SS_ONSTACK;
 	}
 	else
 		fp = (struct osigframe *)regs->tf_esp - 1;
 
 	/* Translate the signal if appropriate */
 	if (p->p_sysent->sv_sigtbl) {
 		if (sig <= p->p_sysent->sv_sigsize)
 			sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
 	}
 
 	/* Build the argument list for the signal handler. */
 	sf.sf_signum = sig;
 	sf.sf_scp = (register_t)&fp->sf_siginfo.si_sc;
 	if (SIGISMEMBER(p->p_sigacts->ps_siginfo, sig)) {
 		/* Signal handler installed with SA_SIGINFO. */
 		sf.sf_arg2 = (register_t)&fp->sf_siginfo;
 		sf.sf_siginfo.si_signo = sig;
 		sf.sf_siginfo.si_code = code;
 		sf.sf_ahu.sf_action = (__osiginfohandler_t *)catcher;
 	}
 	else {
 		/* Old FreeBSD-style arguments. */
 		sf.sf_arg2 = code;
 		sf.sf_addr = regs->tf_err;
 		sf.sf_ahu.sf_handler = catcher;
 	}
 
 	/* save scratch registers */
 	sf.sf_siginfo.si_sc.sc_eax = regs->tf_eax;
 	sf.sf_siginfo.si_sc.sc_ebx = regs->tf_ebx;
 	sf.sf_siginfo.si_sc.sc_ecx = regs->tf_ecx;
 	sf.sf_siginfo.si_sc.sc_edx = regs->tf_edx;
 	sf.sf_siginfo.si_sc.sc_esi = regs->tf_esi;
 	sf.sf_siginfo.si_sc.sc_edi = regs->tf_edi;
 	sf.sf_siginfo.si_sc.sc_cs = regs->tf_cs;
 	sf.sf_siginfo.si_sc.sc_ds = regs->tf_ds;
 	sf.sf_siginfo.si_sc.sc_ss = regs->tf_ss;
 	sf.sf_siginfo.si_sc.sc_es = regs->tf_es;
 	sf.sf_siginfo.si_sc.sc_fs = regs->tf_fs;
 	sf.sf_siginfo.si_sc.sc_gs = rgs();
 	sf.sf_siginfo.si_sc.sc_isp = regs->tf_isp;
 
 	/* Build the signal context to be used by sigreturn. */
 	sf.sf_siginfo.si_sc.sc_onstack = oonstack;
 	SIG2OSIG(*mask, sf.sf_siginfo.si_sc.sc_mask);
 	sf.sf_siginfo.si_sc.sc_sp = regs->tf_esp;
 	sf.sf_siginfo.si_sc.sc_fp = regs->tf_ebp;
 	sf.sf_siginfo.si_sc.sc_pc = regs->tf_eip;
 	sf.sf_siginfo.si_sc.sc_ps = regs->tf_eflags;
 	sf.sf_siginfo.si_sc.sc_trapno = regs->tf_trapno;
 	sf.sf_siginfo.si_sc.sc_err = regs->tf_err;
 
 	/*
 	 * If we're a vm86 process, we want to save the segment registers.
 	 * We also change eflags to be our emulated eflags, not the actual
 	 * eflags.
 	 */
 	if (regs->tf_eflags & PSL_VM) {
 		struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs;
 		struct vm86_kernel *vm86 = &p->p_addr->u_pcb.pcb_ext->ext_vm86;
 
 		sf.sf_siginfo.si_sc.sc_gs = tf->tf_vm86_gs;
 		sf.sf_siginfo.si_sc.sc_fs = tf->tf_vm86_fs;
 		sf.sf_siginfo.si_sc.sc_es = tf->tf_vm86_es;
 		sf.sf_siginfo.si_sc.sc_ds = tf->tf_vm86_ds;
 
 		if (vm86->vm86_has_vme == 0)
 			sf.sf_siginfo.si_sc.sc_ps =
 			    (tf->tf_eflags & ~(PSL_VIF | PSL_VIP))
 			    | (vm86->vm86_eflags & (PSL_VIF | PSL_VIP));
 		/* see sendsig for comment */
 		tf->tf_eflags &= ~(PSL_VM | PSL_NT | PSL_VIF | PSL_VIP);
 	}
 
 	/* Copy the sigframe out to the user's stack. */
 	if (copyout(&sf, fp, sizeof(struct osigframe)) != 0) {
 		/*
 		 * Something is wrong with the stack pointer.
 		 * ...Kill the process.
 		 */
 		sigexit(p, SIGILL);
 	}
 
 	regs->tf_esp = (int)fp;
 	regs->tf_eip = PS_STRINGS - szosigcode;
 	regs->tf_eflags &= ~PSL_T;
 	regs->tf_cs = _ucodesel;
 	regs->tf_ds = _udatasel;
 	regs->tf_es = _udatasel;
 	regs->tf_fs = _udatasel;
 	load_gs(_udatasel);
 	regs->tf_ss = _udatasel;
 }
 
 void
 sendsig(catcher, sig, mask, code)
 	sig_t catcher;
 	int sig;
 	sigset_t *mask;
 	u_long code;
 {
 	struct proc *p = curproc;
 	struct trapframe *regs;
 	struct sigacts *psp = p->p_sigacts;
 	struct sigframe sf, *sfp;
 	int oonstack;
 
 	if (SIGISMEMBER(psp->ps_osigset, sig)) {
 		osendsig(catcher, sig, mask, code);
 		return;
 	}
 
 	regs = p->p_md.md_regs;
 	oonstack = (p->p_sigstk.ss_flags & SS_ONSTACK) ? 1 : 0;
 
 	/* save user context */
 	bzero(&sf, sizeof(struct sigframe));
 	sf.sf_uc.uc_sigmask = *mask;
 	sf.sf_uc.uc_stack = p->p_sigstk;
 	sf.sf_uc.uc_mcontext.mc_onstack = oonstack;
 	sf.sf_uc.uc_mcontext.mc_gs = rgs();
 	bcopy(regs, &sf.sf_uc.uc_mcontext.mc_fs, sizeof(struct trapframe));
 
 	/* Allocate and validate space for the signal handler context. */
         if ((p->p_flag & P_ALTSTACK) != 0 && !oonstack &&
 	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
 		sfp = (struct sigframe *)(p->p_sigstk.ss_sp +
 		    p->p_sigstk.ss_size - sizeof(struct sigframe));
 		p->p_sigstk.ss_flags |= SS_ONSTACK;
 	}
 	else
 		sfp = (struct sigframe *)regs->tf_esp - 1;
 
 	/* Translate the signal is appropriate */
 	if (p->p_sysent->sv_sigtbl) {
 		if (sig <= p->p_sysent->sv_sigsize)
 			sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
 	}
 
 	/* Build the argument list for the signal handler. */
 	sf.sf_signum = sig;
 	sf.sf_ucontext = (register_t)&sfp->sf_uc;
 	if (SIGISMEMBER(p->p_sigacts->ps_siginfo, sig)) {
 		/* Signal handler installed with SA_SIGINFO. */
 		sf.sf_siginfo = (register_t)&sfp->sf_si;
 		sf.sf_ahu.sf_action = (__siginfohandler_t *)catcher;
 
 		/* fill siginfo structure */
 		sf.sf_si.si_signo = sig;
 		sf.sf_si.si_code = code;
 		sf.sf_si.si_addr = (void*)regs->tf_err;
 	}
 	else {
 		/* Old FreeBSD-style arguments. */
 		sf.sf_siginfo = code;
 		sf.sf_addr = regs->tf_err;
 		sf.sf_ahu.sf_handler = catcher;
 	}
 
 	/*
 	 * If we're a vm86 process, we want to save the segment registers.
 	 * We also change eflags to be our emulated eflags, not the actual
 	 * eflags.
 	 */
 	if (regs->tf_eflags & PSL_VM) {
 		struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs;
 		struct vm86_kernel *vm86 = &p->p_addr->u_pcb.pcb_ext->ext_vm86;
 
 		sf.sf_uc.uc_mcontext.mc_gs = tf->tf_vm86_gs;
 		sf.sf_uc.uc_mcontext.mc_fs = tf->tf_vm86_fs;
 		sf.sf_uc.uc_mcontext.mc_es = tf->tf_vm86_es;
 		sf.sf_uc.uc_mcontext.mc_ds = tf->tf_vm86_ds;
 
 		if (vm86->vm86_has_vme == 0)
 			sf.sf_uc.uc_mcontext.mc_eflags =
 			    (tf->tf_eflags & ~(PSL_VIF | PSL_VIP)) |
 			    (vm86->vm86_eflags & (PSL_VIF | PSL_VIP));
 
 		/*
 		 * Clear PSL_NT to inhibit T_TSSFLT faults on return from
 		 * syscalls made by the signal handler.  This just avoids
 		 * wasting time for our lazy fixup of such faults.  PSL_NT
 		 * does nothing in vm86 mode, but vm86 programs can set it
 		 * almost legitimately in probes for old cpu types.
 		 */
 		tf->tf_eflags &= ~(PSL_VM | PSL_NT | PSL_VIF | PSL_VIP);
 	}
 
 	/*
 	 * Copy the sigframe out to the user's stack.
 	 */
 	if (copyout(&sf, sfp, sizeof(struct sigframe)) != 0) {
 		/*
 		 * Something is wrong with the stack pointer.
 		 * ...Kill the process.
 		 */
 		sigexit(p, SIGILL);
 	}
 
 	regs->tf_esp = (int)sfp;
 	regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode);
 	regs->tf_eflags &= ~PSL_T;
 	regs->tf_cs = _ucodesel;
 	regs->tf_ds = _udatasel;
 	regs->tf_es = _udatasel;
 	regs->tf_fs = _udatasel;
 	load_gs(_udatasel);
 	regs->tf_ss = _udatasel;
 }
 
 /*
  * System call to cleanup state after a signal
  * has been taken.  Reset signal mask and
  * stack state from context left by sendsig (above).
  * Return to previous pc and psl as specified by
  * context left by sendsig. Check carefully to
  * make sure that the user has not modified the
  * state to gain improper privileges.
  */
 #define	EFL_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
 #define	CS_SECURE(cs)		(ISPL(cs) == SEL_UPL)
 
 int
 osigreturn(p, uap)
 	struct proc *p;
 	struct osigreturn_args /* {
 		struct osigcontext *sigcntxp;
 	} */ *uap;
 {
 	register struct osigcontext *scp;
 	register struct trapframe *regs = p->p_md.md_regs;
 	int eflags;
 
 	scp = uap->sigcntxp;
 
 	if (!useracc((caddr_t)scp, sizeof (struct osigcontext), VM_PROT_READ))
 		return(EFAULT);
 
 	eflags = scp->sc_ps;
 	if (eflags & PSL_VM) {
 		struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs;
 		struct vm86_kernel *vm86;
 
 		/*
 		 * if pcb_ext == 0 or vm86_inited == 0, the user hasn't
 		 * set up the vm86 area, and we can't enter vm86 mode.
 		 */
 		if (p->p_addr->u_pcb.pcb_ext == 0)
 			return (EINVAL);
 		vm86 = &p->p_addr->u_pcb.pcb_ext->ext_vm86;
 		if (vm86->vm86_inited == 0)
 			return (EINVAL);
 
 		/* go back to user mode if both flags are set */
 		if ((eflags & PSL_VIP) && (eflags & PSL_VIF))
 			trapsignal(p, SIGBUS, 0);
 
 		if (vm86->vm86_has_vme) {
 			eflags = (tf->tf_eflags & ~VME_USERCHANGE) |
 			    (eflags & VME_USERCHANGE) | PSL_VM;
 		} else {
 			vm86->vm86_eflags = eflags;	/* save VIF, VIP */
 			eflags = (tf->tf_eflags & ~VM_USERCHANGE) |					    (eflags & VM_USERCHANGE) | PSL_VM;
 		}
 		tf->tf_vm86_ds = scp->sc_ds;
 		tf->tf_vm86_es = scp->sc_es;
 		tf->tf_vm86_fs = scp->sc_fs;
 		tf->tf_vm86_gs = scp->sc_gs;
 		tf->tf_ds = _udatasel;
 		tf->tf_es = _udatasel;
 		tf->tf_fs = _udatasel;
 	} else {
 		/*
 		 * Don't allow users to change privileged or reserved flags.
 		 */
 		/*
 		 * XXX do allow users to change the privileged flag PSL_RF.
 		 * The cpu sets PSL_RF in tf_eflags for faults.  Debuggers
 		 * should sometimes set it there too.  tf_eflags is kept in
 		 * the signal context during signal handling and there is no
 		 * other place to remember it, so the PSL_RF bit may be
 		 * corrupted by the signal handler without us knowing.
 		 * Corruption of the PSL_RF bit at worst causes one more or
 		 * one less debugger trap, so allowing it is fairly harmless.
 		 */
 		if (!EFL_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) {
 	    		return(EINVAL);
 		}
 
 		/*
 		 * Don't allow users to load a valid privileged %cs.  Let the
 		 * hardware check for invalid selectors, excess privilege in
 		 * other selectors, invalid %eip's and invalid %esp's.
 		 */
 		if (!CS_SECURE(scp->sc_cs)) {
 			trapsignal(p, SIGBUS, T_PROTFLT);
 			return(EINVAL);
 		}
 		regs->tf_ds = scp->sc_ds;
 		regs->tf_es = scp->sc_es;
 		regs->tf_fs = scp->sc_fs;
 	}
 
 	/* restore scratch registers */
 	regs->tf_eax = scp->sc_eax;
 	regs->tf_ebx = scp->sc_ebx;
 	regs->tf_ecx = scp->sc_ecx;
 	regs->tf_edx = scp->sc_edx;
 	regs->tf_esi = scp->sc_esi;
 	regs->tf_edi = scp->sc_edi;
 	regs->tf_cs = scp->sc_cs;
 	regs->tf_ss = scp->sc_ss;
 	regs->tf_isp = scp->sc_isp;
 
 	if (scp->sc_onstack & 01)
 		p->p_sigstk.ss_flags |= SS_ONSTACK;
 	else
 		p->p_sigstk.ss_flags &= ~SS_ONSTACK;
 
 	SIGSETOLD(p->p_sigmask, scp->sc_mask);
 	SIG_CANTMASK(p->p_sigmask);
 	regs->tf_ebp = scp->sc_fp;
 	regs->tf_esp = scp->sc_sp;
 	regs->tf_eip = scp->sc_pc;
 	regs->tf_eflags = eflags;
 	return(EJUSTRETURN);
 }
 
 int
 sigreturn(p, uap)
 	struct proc *p;
 	struct sigreturn_args /* {
 		ucontext_t *sigcntxp;
 	} */ *uap;
 {
 	struct trapframe *regs;
 	ucontext_t *ucp;
 	int cs, eflags;
 
 	ucp = uap->sigcntxp;
 
 	if (!useracc((caddr_t)ucp, sizeof(struct osigcontext), VM_PROT_READ))
 		return (EFAULT);
 	if (((struct osigcontext *)ucp)->sc_trapno == 0x01d516)
 		return (osigreturn(p, (struct osigreturn_args *)uap));
 
 	/*
 	 * Since ucp is not an osigcontext but a ucontext_t, we have to
 	 * check again if all of it is accessible.  A ucontext_t is
 	 * much larger, so instead of just checking for the pointer
 	 * being valid for the size of an osigcontext, now check for
 	 * it being valid for a whole, new-style ucontext_t.
 	 */
 	if (!useracc((caddr_t)ucp, sizeof(ucontext_t), VM_PROT_READ))
 		return (EFAULT);
 
 	regs = p->p_md.md_regs;
 	eflags = ucp->uc_mcontext.mc_eflags;
 
 	if (eflags & PSL_VM) {
 		struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs;
 		struct vm86_kernel *vm86;
 
 		/*
 		 * if pcb_ext == 0 or vm86_inited == 0, the user hasn't
 		 * set up the vm86 area, and we can't enter vm86 mode.
 		 */
 		if (p->p_addr->u_pcb.pcb_ext == 0)
 			return (EINVAL);
 		vm86 = &p->p_addr->u_pcb.pcb_ext->ext_vm86;
 		if (vm86->vm86_inited == 0)
 			return (EINVAL);
 
 		/* go back to user mode if both flags are set */
 		if ((eflags & PSL_VIP) && (eflags & PSL_VIF))
 			trapsignal(p, SIGBUS, 0);
 
 		if (vm86->vm86_has_vme) {
 			eflags = (tf->tf_eflags & ~VME_USERCHANGE) |
 			    (eflags & VME_USERCHANGE) | PSL_VM;
 		} else {
 			vm86->vm86_eflags = eflags;	/* save VIF, VIP */
 			eflags = (tf->tf_eflags & ~VM_USERCHANGE) |					    (eflags & VM_USERCHANGE) | PSL_VM;
 		}
 		bcopy(&ucp->uc_mcontext.mc_fs, tf, sizeof(struct trapframe));
 		tf->tf_eflags = eflags;
 		tf->tf_vm86_ds = tf->tf_ds;
 		tf->tf_vm86_es = tf->tf_es;
 		tf->tf_vm86_fs = tf->tf_fs;
 		tf->tf_vm86_gs = ucp->uc_mcontext.mc_gs;
 		tf->tf_ds = _udatasel;
 		tf->tf_es = _udatasel;
 		tf->tf_fs = _udatasel;
 	} else {
 		/*
 		 * Don't allow users to change privileged or reserved flags.
 		 */
 		/*
 		 * XXX do allow users to change the privileged flag PSL_RF.
 		 * The cpu sets PSL_RF in tf_eflags for faults.  Debuggers
 		 * should sometimes set it there too.  tf_eflags is kept in
 		 * the signal context during signal handling and there is no
 		 * other place to remember it, so the PSL_RF bit may be
 		 * corrupted by the signal handler without us knowing.
 		 * Corruption of the PSL_RF bit at worst causes one more or
 		 * one less debugger trap, so allowing it is fairly harmless.
 		 */
 		if (!EFL_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) {
 			printf("sigreturn: eflags = 0x%x\n", eflags);
 	    		return(EINVAL);
 		}
 
 		/*
 		 * Don't allow users to load a valid privileged %cs.  Let the
 		 * hardware check for invalid selectors, excess privilege in
 		 * other selectors, invalid %eip's and invalid %esp's.
 		 */
 		cs = ucp->uc_mcontext.mc_cs;
 		if (!CS_SECURE(cs)) {
 			printf("sigreturn: cs = 0x%x\n", cs);
 			trapsignal(p, SIGBUS, T_PROTFLT);
 			return(EINVAL);
 		}
 		bcopy(&ucp->uc_mcontext.mc_fs, regs, sizeof(struct trapframe));
 	}
 
 	if (ucp->uc_mcontext.mc_onstack & 1)
 		p->p_sigstk.ss_flags |= SS_ONSTACK;
 	else
 		p->p_sigstk.ss_flags &= ~SS_ONSTACK;
 
 	p->p_sigmask = ucp->uc_sigmask;
 	SIG_CANTMASK(p->p_sigmask);
 	return(EJUSTRETURN);
 }
 
 /*
  * Machine dependent boot() routine
  *
  * I haven't seen anything to put here yet
  * Possibly some stuff might be grafted back here from boot()
  */
 void
 cpu_boot(int howto)
 {
 }
 
 /*
  * Shutdown the CPU as much as possible
  */
 void
 cpu_halt(void)
 {
 	for (;;)
 		__asm__ ("hlt");
 }
 
 /*
  * Hook to idle the CPU when possible.   This is disabled by default for
  * the SMP case as there is a small window of opportunity whereby a ready
  * process is delayed to the next clock tick.  It should be safe to enable
  * for SMP if power is a concern.
  *
  * On -stable, cpu_idle() is called with interrupts disabled and must
  * return with them enabled.
  */
 #ifdef SMP
 static int	cpu_idle_hlt = 0;
 #else
 static int	cpu_idle_hlt = 1;
 #endif
 SYSCTL_INT(_machdep, OID_AUTO, cpu_idle_hlt, CTLFLAG_RW,
     &cpu_idle_hlt, 0, "Idle loop HLT enable");
 
 void
 cpu_idle(void)
 {
 	if (cpu_idle_hlt) {
 		/*
 		 * We must guarentee that hlt is exactly the instruction
 		 * following the sti.
 		 */
 		__asm __volatile("sti; hlt");
 	} else {
 		__asm __volatile("sti");
 	}
 }
 
 /*
  * Clear registers on exec
  */
 void
 setregs(p, entry, stack, ps_strings)
 	struct proc *p;
 	u_long entry;
 	u_long stack;
 	u_long ps_strings;
 {
 	struct trapframe *regs = p->p_md.md_regs;
 	struct pcb *pcb = &p->p_addr->u_pcb;
 
 	/* Reset pc->pcb_gs and %gs before possibly invalidating it. */
 	pcb->pcb_gs = _udatasel;
 	load_gs(_udatasel);
 
 #ifdef USER_LDT
 	/* was i386_user_cleanup() in NetBSD */
 	user_ldt_free(pcb);
 #endif
   
 	bzero((char *)regs, sizeof(struct trapframe));
 	regs->tf_eip = entry;
 	regs->tf_esp = stack;
 	regs->tf_eflags = PSL_USER | (regs->tf_eflags & PSL_T);
 	regs->tf_ss = _udatasel;
 	regs->tf_ds = _udatasel;
 	regs->tf_es = _udatasel;
 	regs->tf_fs = _udatasel;
 	regs->tf_cs = _ucodesel;
 
 	/* PS_STRINGS value for BSD/OS binaries.  It is 0 for non-BSD/OS. */
 	regs->tf_ebx = ps_strings;
 
         /*
          * Reset the hardware debug registers if they were in use.
          * They won't have any meaning for the newly exec'd process.  
          */
         if (pcb->pcb_flags & PCB_DBREGS) {
                 pcb->pcb_dr0 = 0;
                 pcb->pcb_dr1 = 0;
                 pcb->pcb_dr2 = 0;
                 pcb->pcb_dr3 = 0;
                 pcb->pcb_dr6 = 0;
                 pcb->pcb_dr7 = 0;
                 if (pcb == curpcb) {
 		        /*
 			 * Clear the debug registers on the running
 			 * CPU, otherwise they will end up affecting
 			 * the next process we switch to.
 			 */
 		        reset_dbregs();
                 }
                 pcb->pcb_flags &= ~PCB_DBREGS;
         }
 
 	/*
 	 * Initialize the math emulator (if any) for the current process.
 	 * Actually, just clear the bit that says that the emulator has
 	 * been initialized.  Initialization is delayed until the process
 	 * traps to the emulator (if it is done at all) mainly because
 	 * emulators don't provide an entry point for initialization.
 	 */
 	p->p_addr->u_pcb.pcb_flags &= ~FP_SOFTFP;
 
 	/*
 	 * Arrange to trap the next npx or `fwait' instruction (see npx.c
 	 * for why fwait must be trapped at least if there is an npx or an
 	 * emulator).  This is mainly to handle the case where npx0 is not
 	 * configured, since the npx routines normally set up the trap
 	 * otherwise.  It should be done only at boot time, but doing it
 	 * here allows modifying `npx_exists' for testing the emulator on
 	 * systems with an npx.
 	 */
 	load_cr0(rcr0() | CR0_MP | CR0_TS);
 
 #if NNPX > 0
 	/* Initialize the npx (if any) for the current process. */
 	npxinit(__INITIAL_NPXCW__);
 #endif
 
       /*
        * XXX - Linux emulator
        * Make sure sure edx is 0x0 on entry. Linux binaries depend
        * on it.
        */
       p->p_retval[1] = 0;
 }
 
 void
 cpu_setregs(void)
 {
 	unsigned int cr0;
 
 	cr0 = rcr0();
 	cr0 |= CR0_NE;			/* Done by npxinit() */
 	cr0 |= CR0_MP | CR0_TS;		/* Done at every execve() too. */
 #ifdef I386_CPU
 	if (cpu_class != CPUCLASS_386)
 #endif
 		cr0 |= CR0_WP | CR0_AM;
 	load_cr0(cr0);
 	load_gs(_udatasel);
 }
 
 static int
 sysctl_machdep_adjkerntz(SYSCTL_HANDLER_ARGS)
 {
 	int error;
 	error = sysctl_handle_int(oidp, oidp->oid_arg1, oidp->oid_arg2,
 		req);
 	if (!error && req->newptr)
 		resettodr();
 	return (error);
 }
 
 SYSCTL_PROC(_machdep, CPU_ADJKERNTZ, adjkerntz, CTLTYPE_INT|CTLFLAG_RW,
 	&adjkerntz, 0, sysctl_machdep_adjkerntz, "I", "");
 
 SYSCTL_INT(_machdep, CPU_DISRTCSET, disable_rtc_set,
 	CTLFLAG_RW, &disable_rtc_set, 0, "");
 
 SYSCTL_STRUCT(_machdep, CPU_BOOTINFO, bootinfo, 
 	CTLFLAG_RD, &bootinfo, bootinfo, "");
 
 SYSCTL_INT(_machdep, CPU_WALLCLOCK, wall_cmos_clock,
 	CTLFLAG_RW, &wall_cmos_clock, 0, "");
 
 extern u_long bootdev;		/* not a dev_t - encoding is different */
 SYSCTL_ULONG(_machdep, OID_AUTO, guessed_bootdev,
 	CTLFLAG_RD, &bootdev, 0, "Boot device (not in dev_t format)");
 
 /*
  * Initialize 386 and configure to run kernel
  */
 
 /*
  * Initialize segments & interrupt table
  */
 
 int _default_ldt;
 union descriptor gdt[NGDT * MAXCPU];	/* global descriptor table */
 static struct gate_descriptor idt0[NIDT];
 struct gate_descriptor *idt = &idt0[0];	/* interrupt descriptor table */
 union descriptor ldt[NLDT];		/* local descriptor table */
 #ifdef SMP
 /* table descriptors - used to load tables by microp */
 struct region_descriptor r_gdt, r_idt;
 #endif
 
 #ifndef SMP
 extern struct segment_descriptor common_tssd, *tss_gdt;
 #endif
 int private_tss;			/* flag indicating private tss */
 
 #if defined(I586_CPU) && !defined(NO_F00F_HACK)
 extern int has_f00f_bug;
 #endif
 
 static struct i386tss dblfault_tss;
 static char dblfault_stack[PAGE_SIZE];
 
 extern  struct user *proc0paddr;
 
 
 /* software prototypes -- in more palatable form */
 struct soft_segment_descriptor gdt_segs[] = {
 /* GNULL_SEL	0 Null Descriptor */
 {	0x0,			/* segment base address  */
 	0x0,			/* length */
 	0,			/* segment type */
 	0,			/* segment descriptor priority level */
 	0,			/* segment descriptor present */
 	0, 0,
 	0,			/* default 32 vs 16 bit size */
 	0  			/* limit granularity (byte/page units)*/ },
 /* GCODE_SEL	1 Code Descriptor for kernel */
 {	0x0,			/* segment base address  */
 	0xfffff,		/* length - all address space */
 	SDT_MEMERA,		/* segment type */
 	0,			/* segment descriptor priority level */
 	1,			/* segment descriptor present */
 	0, 0,
 	1,			/* default 32 vs 16 bit size */
 	1  			/* limit granularity (byte/page units)*/ },
 /* GDATA_SEL	2 Data Descriptor for kernel */
 {	0x0,			/* segment base address  */
 	0xfffff,		/* length - all address space */
 	SDT_MEMRWA,		/* segment type */
 	0,			/* segment descriptor priority level */
 	1,			/* segment descriptor present */
 	0, 0,
 	1,			/* default 32 vs 16 bit size */
 	1  			/* limit granularity (byte/page units)*/ },
 /* GPRIV_SEL	3 SMP Per-Processor Private Data Descriptor */
 {	0x0,			/* segment base address  */
 	0xfffff,		/* length - all address space */
 	SDT_MEMRWA,		/* segment type */
 	0,			/* segment descriptor priority level */
 	1,			/* segment descriptor present */
 	0, 0,
 	1,			/* default 32 vs 16 bit size */
 	1  			/* limit granularity (byte/page units)*/ },
 /* GPROC0_SEL	4 Proc 0 Tss Descriptor */
 {
 	0x0,			/* segment base address */
 	sizeof(struct i386tss)-1,/* length - all address space */
 	SDT_SYS386TSS,		/* segment type */
 	0,			/* segment descriptor priority level */
 	1,			/* segment descriptor present */
 	0, 0,
 	0,			/* unused - default 32 vs 16 bit size */
 	0  			/* limit granularity (byte/page units)*/ },
 /* GLDT_SEL	5 LDT Descriptor */
 {	(int) ldt,		/* segment base address  */
 	sizeof(ldt)-1,		/* length - all address space */
 	SDT_SYSLDT,		/* segment type */
 	SEL_UPL,		/* segment descriptor priority level */
 	1,			/* segment descriptor present */
 	0, 0,
 	0,			/* unused - default 32 vs 16 bit size */
 	0  			/* limit granularity (byte/page units)*/ },
 /* GUSERLDT_SEL	6 User LDT Descriptor per process */
 {	(int) ldt,		/* segment base address  */
 	(512 * sizeof(union descriptor)-1),		/* length */
 	SDT_SYSLDT,		/* segment type */
 	0,			/* segment descriptor priority level */
 	1,			/* segment descriptor present */
 	0, 0,
 	0,			/* unused - default 32 vs 16 bit size */
 	0  			/* limit granularity (byte/page units)*/ },
 /* GTGATE_SEL	7 Null Descriptor - Placeholder */
 {	0x0,			/* segment base address  */
 	0x0,			/* length - all address space */
 	0,			/* segment type */
 	0,			/* segment descriptor priority level */
 	0,			/* segment descriptor present */
 	0, 0,
 	0,			/* default 32 vs 16 bit size */
 	0  			/* limit granularity (byte/page units)*/ },
 /* GBIOSLOWMEM_SEL 8 BIOS access to realmode segment 0x40, must be #8 in GDT */
 {	0x400,			/* segment base address */
 	0xfffff,		/* length */
 	SDT_MEMRWA,		/* segment type */
 	0,			/* segment descriptor priority level */
 	1,			/* segment descriptor present */
 	0, 0,
 	1,			/* default 32 vs 16 bit size */
 	1  			/* limit granularity (byte/page units)*/ },
 /* GPANIC_SEL	9 Panic Tss Descriptor */
 {	(int) &dblfault_tss,	/* segment base address  */
 	sizeof(struct i386tss)-1,/* length - all address space */
 	SDT_SYS386TSS,		/* segment type */
 	0,			/* segment descriptor priority level */
 	1,			/* segment descriptor present */
 	0, 0,
 	0,			/* unused - default 32 vs 16 bit size */
 	0  			/* limit granularity (byte/page units)*/ },
 /* GBIOSCODE32_SEL 10 BIOS 32-bit interface (32bit Code) */
 {	0,			/* segment base address (overwritten)  */
 	0xfffff,		/* length */
 	SDT_MEMERA,		/* segment type */
 	0,			/* segment descriptor priority level */
 	1,			/* segment descriptor present */
 	0, 0,
 	0,			/* default 32 vs 16 bit size */
 	1  			/* limit granularity (byte/page units)*/ },
 /* GBIOSCODE16_SEL 11 BIOS 32-bit interface (16bit Code) */
 {	0,			/* segment base address (overwritten)  */
 	0xfffff,		/* length */
 	SDT_MEMERA,		/* segment type */
 	0,			/* segment descriptor priority level */
 	1,			/* segment descriptor present */
 	0, 0,
 	0,			/* default 32 vs 16 bit size */
 	1  			/* limit granularity (byte/page units)*/ },
 /* GBIOSDATA_SEL 12 BIOS 32-bit interface (Data) */
 {	0,			/* segment base address (overwritten) */
 	0xfffff,		/* length */
 	SDT_MEMRWA,		/* segment type */
 	0,			/* segment descriptor priority level */
 	1,			/* segment descriptor present */
 	0, 0,
 	1,			/* default 32 vs 16 bit size */
 	1  			/* limit granularity (byte/page units)*/ },
 /* GBIOSUTIL_SEL 13 BIOS 16-bit interface (Utility) */
 {	0,			/* segment base address (overwritten) */
 	0xfffff,		/* length */
 	SDT_MEMRWA,		/* segment type */
 	0,			/* segment descriptor priority level */
 	1,			/* segment descriptor present */
 	0, 0,
 	0,			/* default 32 vs 16 bit size */
 	1  			/* limit granularity (byte/page units)*/ },
 /* GBIOSARGS_SEL 14 BIOS 16-bit interface (Arguments) */
 {	0,			/* segment base address (overwritten) */
 	0xfffff,		/* length */
 	SDT_MEMRWA,		/* segment type */
 	0,			/* segment descriptor priority level */
 	1,			/* segment descriptor present */
 	0, 0,
 	0,			/* default 32 vs 16 bit size */
 	1  			/* limit granularity (byte/page units)*/ },
 };
 
 static struct soft_segment_descriptor ldt_segs[] = {
 	/* Null Descriptor - overwritten by call gate */
 {	0x0,			/* segment base address  */
 	0x0,			/* length - all address space */
 	0,			/* segment type */
 	0,			/* segment descriptor priority level */
 	0,			/* segment descriptor present */
 	0, 0,
 	0,			/* default 32 vs 16 bit size */
 	0  			/* limit granularity (byte/page units)*/ },
 	/* Null Descriptor - overwritten by call gate */
 {	0x0,			/* segment base address  */
 	0x0,			/* length - all address space */
 	0,			/* segment type */
 	0,			/* segment descriptor priority level */
 	0,			/* segment descriptor present */
 	0, 0,
 	0,			/* default 32 vs 16 bit size */
 	0  			/* limit granularity (byte/page units)*/ },
 	/* Null Descriptor - overwritten by call gate */
 {	0x0,			/* segment base address  */
 	0x0,			/* length - all address space */
 	0,			/* segment type */
 	0,			/* segment descriptor priority level */
 	0,			/* segment descriptor present */
 	0, 0,
 	0,			/* default 32 vs 16 bit size */
 	0  			/* limit granularity (byte/page units)*/ },
 	/* Code Descriptor for user */
 {	0x0,			/* segment base address  */
 	0xfffff,		/* length - all address space */
 	SDT_MEMERA,		/* segment type */
 	SEL_UPL,		/* segment descriptor priority level */
 	1,			/* segment descriptor present */
 	0, 0,
 	1,			/* default 32 vs 16 bit size */
 	1  			/* limit granularity (byte/page units)*/ },
 	/* Null Descriptor - overwritten by call gate */
 {	0x0,			/* segment base address  */
 	0x0,			/* length - all address space */
 	0,			/* segment type */
 	0,			/* segment descriptor priority level */
 	0,			/* segment descriptor present */
 	0, 0,
 	0,			/* default 32 vs 16 bit size */
 	0  			/* limit granularity (byte/page units)*/ },
 	/* Data Descriptor for user */
 {	0x0,			/* segment base address  */
 	0xfffff,		/* length - all address space */
 	SDT_MEMRWA,		/* segment type */
 	SEL_UPL,		/* segment descriptor priority level */
 	1,			/* segment descriptor present */
 	0, 0,
 	1,			/* default 32 vs 16 bit size */
 	1  			/* limit granularity (byte/page units)*/ },
 };
 
 void
 setidt(idx, func, typ, dpl, selec)
 	int idx;
 	inthand_t *func;
 	int typ;
 	int dpl;
 	int selec;
 {
 	struct gate_descriptor *ip;
 
 	ip = idt + idx;
 	ip->gd_looffset = (int)func;
 	ip->gd_selector = selec;
 	ip->gd_stkcpy = 0;
 	ip->gd_xx = 0;
 	ip->gd_type = typ;
 	ip->gd_dpl = dpl;
 	ip->gd_p = 1;
 	ip->gd_hioffset = ((int)func)>>16 ;
 }
 
 #define	IDTVEC(name)	__CONCAT(X,name)
 
 extern inthand_t
 	IDTVEC(div), IDTVEC(dbg), IDTVEC(nmi), IDTVEC(bpt), IDTVEC(ofl),
 	IDTVEC(bnd), IDTVEC(ill), IDTVEC(dna), IDTVEC(fpusegm),
 	IDTVEC(tss), IDTVEC(missing), IDTVEC(stk), IDTVEC(prot),
 	IDTVEC(page), IDTVEC(mchk), IDTVEC(rsvd), IDTVEC(fpu), IDTVEC(align),
 	IDTVEC(xmm), IDTVEC(syscall), IDTVEC(int0x80_syscall);
 
 void
 sdtossd(sd, ssd)
 	struct segment_descriptor *sd;
 	struct soft_segment_descriptor *ssd;
 {
 	ssd->ssd_base  = (sd->sd_hibase << 24) | sd->sd_lobase;
 	ssd->ssd_limit = (sd->sd_hilimit << 16) | sd->sd_lolimit;
 	ssd->ssd_type  = sd->sd_type;
 	ssd->ssd_dpl   = sd->sd_dpl;
 	ssd->ssd_p     = sd->sd_p;
 	ssd->ssd_def32 = sd->sd_def32;
 	ssd->ssd_gran  = sd->sd_gran;
 }
 
 #define PHYSMAP_SIZE	(2 * 8)
 
 /*
  * Populate the (physmap) array with base/bound pairs describing the
  * available physical memory in the system, then test this memory and
  * build the phys_avail array describing the actually-available memory.
  *
  * If we cannot accurately determine the physical memory map, then use
  * value from the 0xE801 call, and failing that, the RTC.
  *
  * Total memory size may be set by the kernel environment variable
  * hw.physmem or the compile-time define MAXMEM.
  *
  * XXX first should be vm_paddr_t.
  */
 static void
 getmemsize(int first)
 {
 	int i, physmap_idx, pa_indx;
 	int hasbrokenint12;
 	u_int basemem, extmem;
 	struct vm86frame vmf;
 	struct vm86context vmc;
 	vm_paddr_t pa, physmap[PHYSMAP_SIZE];
 	pt_entry_t *pte;
 	const char *cp;
 	struct {
 		u_int64_t base;
 		u_int64_t length;
 		u_int32_t type;
 	} *smap;
 
 	hasbrokenint12 = 0;
 	TUNABLE_INT_FETCH("hw.hasbrokenint12", &hasbrokenint12);
 	bzero(&vmf, sizeof(struct vm86frame));
 	bzero(physmap, sizeof(physmap));
 	basemem = 0;
 
 	/*
 	 * Some newer BIOSes has broken INT 12H implementation which cause
 	 * kernel panic immediately. In this case, we need to scan SMAP
 	 * with INT 15:E820 first, then determine base memory size.
 	 */
 	if (hasbrokenint12) {
 		goto int15e820;
 	}
 
 	/*
 	 * Perform "base memory" related probes & setup
 	 */
 	vm86_intcall(0x12, &vmf);
 	basemem = vmf.vmf_ax;
 	if (basemem > 640) {
 		printf("Preposterous BIOS basemem of %uK, truncating to 640K\n",
 			basemem);
 		basemem = 640;
 	}
 
 	/*
 	 * XXX if biosbasemem is now < 640, there is a `hole'
 	 * between the end of base memory and the start of
 	 * ISA memory.  The hole may be empty or it may
 	 * contain BIOS code or data.  Map it read/write so
 	 * that the BIOS can write to it.  (Memory from 0 to
 	 * the physical end of the kernel is mapped read-only
 	 * to begin with and then parts of it are remapped.
 	 * The parts that aren't remapped form holes that
 	 * remain read-only and are unused by the kernel.
 	 * The base memory area is below the physical end of
 	 * the kernel and right now forms a read-only hole.
 	 * The part of it from PAGE_SIZE to
 	 * (trunc_page(biosbasemem * 1024) - 1) will be
 	 * remapped and used by the kernel later.)
 	 *
 	 * This code is similar to the code used in
 	 * pmap_mapdev, but since no memory needs to be
 	 * allocated we simply change the mapping.
 	 */
 	for (pa = trunc_page(basemem * 1024);
 	     pa < ISA_HOLE_START; pa += PAGE_SIZE) {
 		pte = vtopte(pa + KERNBASE);
 		*pte = pa | PG_RW | PG_V;
 	}
 
 	/*
 	 * if basemem != 640, map pages r/w into vm86 page table so 
 	 * that the bios can scribble on it.
 	 */
 	pte = (pt_entry_t *)vm86paddr;
 	for (i = basemem / 4; i < 160; i++)
 		pte[i] = (i << PAGE_SHIFT) | PG_V | PG_RW | PG_U;
 
 int15e820:
 	/*
 	 * map page 1 R/W into the kernel page table so we can use it
 	 * as a buffer.  The kernel will unmap this page later.
 	 */
 	pte = vtopte(KERNBASE + (1 << PAGE_SHIFT));
 	*pte = (1 << PAGE_SHIFT) | PG_RW | PG_V;
 
 	/*
 	 * get memory map with INT 15:E820
 	 */
 #define SMAPSIZ 	sizeof(*smap)
 #define SMAP_SIG	0x534D4150			/* 'SMAP' */
 
 	vmc.npages = 0;
 	smap = (void *)vm86_addpage(&vmc, 1, KERNBASE + (1 << PAGE_SHIFT));
 	vm86_getptr(&vmc, (vm_offset_t)smap, &vmf.vmf_es, &vmf.vmf_di);
 
 	physmap_idx = 0;
 	vmf.vmf_ebx = 0;
 	do {
 		vmf.vmf_eax = 0xE820;
 		vmf.vmf_edx = SMAP_SIG;
 		vmf.vmf_ecx = SMAPSIZ;
 		i = vm86_datacall(0x15, &vmf, &vmc);
 		if (i || vmf.vmf_eax != SMAP_SIG)
 			break;
 		if (boothowto & RB_VERBOSE)
 			printf("SMAP type=%02x base=%016llx len=%016llx\n",
 			    smap->type, smap->base, smap->length);
 
 		if (smap->type != 0x01)
 			goto next_run;
 
 		if (smap->length == 0)
 			goto next_run;
 
 #ifndef PAE
 		if (smap->base >= 0xffffffff) {
 			printf("%uK of memory above 4GB ignored\n",
 			    (u_int)(smap->length / 1024));
 			goto next_run;
 		}
 #endif
 
 		for (i = 0; i <= physmap_idx; i += 2) {
 			if (smap->base < physmap[i + 1]) {
 				if (boothowto & RB_VERBOSE)
 					printf(
 	"Overlapping or non-montonic memory region, ignoring second region\n");
 				goto next_run;
 			}
 		}
 
 		if (smap->base == physmap[physmap_idx + 1]) {
 			physmap[physmap_idx + 1] += smap->length;
 			goto next_run;
 		}
 
 		physmap_idx += 2;
 		if (physmap_idx == PHYSMAP_SIZE) {
 			printf(
 		"Too many segments in the physical address map, giving up\n");
 			break;
 		}
 		physmap[physmap_idx] = smap->base;
 		physmap[physmap_idx + 1] = smap->base + smap->length;
 next_run:
 	} while (vmf.vmf_ebx != 0);
 
 	/*
 	 * Perform "base memory" related probes & setup based on SMAP
 	 */
 	if (basemem == 0) {
 		for (i = 0; i <= physmap_idx; i += 2) {
 			if (physmap[i] == 0x00000000) {
 				basemem = physmap[i + 1] / 1024;
 				break;
 			}
 		}
 
 		if (basemem == 0) {
 			basemem = 640;
 		}
 
 		if (basemem > 640) {
 			printf("Preposterous BIOS basemem of %uK, truncating to 640K\n",
 				basemem);
 			basemem = 640;
 		}
 
 		for (pa = trunc_page(basemem * 1024);
 		     pa < ISA_HOLE_START; pa += PAGE_SIZE) {
 			pte = vtopte(pa + KERNBASE);
 			*pte = pa | PG_RW | PG_V;
 		}
 
 		pte = (pt_entry_t *)vm86paddr;
 		for (i = basemem / 4; i < 160; i++)
 			pte[i] = (i << PAGE_SHIFT) | PG_V | PG_RW | PG_U;
 	}
 
 	if (physmap[1] != 0)
 		goto physmap_done;
 
 	/*
 	 * If we failed above, try memory map with INT 15:E801
 	 */
 	vmf.vmf_ax = 0xE801;
 	if (vm86_intcall(0x15, &vmf) == 0) {
 		extmem = vmf.vmf_cx + vmf.vmf_dx * 64;
 	} else {
 #if 0
 		vmf.vmf_ah = 0x88;
 		vm86_intcall(0x15, &vmf);
 		extmem = vmf.vmf_ax;
 #else
 		/*
 		 * Prefer the RTC value for extended memory.
 		 */
 		extmem = rtcin(RTC_EXTLO) + (rtcin(RTC_EXTHI) << 8);
 #endif
 	}
 
 	/*
 	 * Special hack for chipsets that still remap the 384k hole when
 	 * there's 16MB of memory - this really confuses people that
 	 * are trying to use bus mastering ISA controllers with the
 	 * "16MB limit"; they only have 16MB, but the remapping puts
 	 * them beyond the limit.
 	 *
 	 * If extended memory is between 15-16MB (16-17MB phys address range),
 	 *	chop it to 15MB.
 	 */
 	if ((extmem > 15 * 1024) && (extmem < 16 * 1024))
 		extmem = 15 * 1024;
 
 	physmap[0] = 0;
 	physmap[1] = basemem * 1024;
 	physmap_idx = 2;
 	physmap[physmap_idx] = 0x100000;
 	physmap[physmap_idx + 1] = physmap[physmap_idx] + extmem * 1024;
 
 physmap_done:
 	/*
 	 * Now, physmap contains a map of physical memory.
 	 */
 
 #ifdef SMP
 	/* make hole for AP bootstrap code */
 	physmap[1] = mp_bootaddress(physmap[1] / 1024);
 
 	/* look for the MP hardware - needed for apic addresses */
 	mp_probe();
 #endif
 
 	/*
 	 * Maxmem isn't the "maximum memory", it's one larger than the
 	 * highest page of the physical address space.  It should be
 	 * called something like "Maxphyspage".  We may adjust this 
 	 * based on ``hw.physmem'' and the results of the memory test.
 	 */
 	Maxmem = atop(physmap[physmap_idx + 1]);
 
 #ifdef MAXMEM
 	Maxmem = MAXMEM / 4;
 #endif
 
 	/*
 	 * hw.maxmem is a size in bytes; we also allow k, m, and g suffixes
 	 * for the appropriate modifiers.  This overrides MAXMEM.
 	 */
 	if ((cp = getenv("hw.physmem")) != NULL) {
 		u_int64_t AllowMem, sanity;
 		char *ep;
 
 		sanity = AllowMem = strtouq(cp, &ep, 0);
 		if ((ep != cp) && (*ep != 0)) {
 			switch(*ep) {
 			case 'g':
 			case 'G':
 				AllowMem <<= 10;
 			case 'm':
 			case 'M':
 				AllowMem <<= 10;
 			case 'k':
 			case 'K':
 				AllowMem <<= 10;
 				break;
 			default:
 				AllowMem = sanity = 0;
 			}
 			if (AllowMem < sanity)
 				AllowMem = 0;
 		}
 		if (AllowMem == 0)
 			printf("Ignoring invalid memory size of '%s'\n", cp);
 		else
 			Maxmem = atop(AllowMem);
 	}
 
 	if (atop(physmap[physmap_idx + 1]) != Maxmem &&
 	    (boothowto & RB_VERBOSE))
 		printf("Physical memory use set to %uK\n", Maxmem * 4);
 
 	/*
 	 * If Maxmem has been increased beyond what the system has detected,
 	 * extend the last memory segment to the new limit.
 	 */ 
 	if (atop(physmap[physmap_idx + 1]) < Maxmem)
 		physmap[physmap_idx + 1] = ptoa((vm_paddr_t)Maxmem);
 
 	/*
 	 * Size up each available chunk of physical memory.
 	 */
 	physmap[0] = PAGE_SIZE;		/* mask off page 0 */
 	pa_indx = 0;
 	phys_avail[pa_indx++] = physmap[0];
 	phys_avail[pa_indx] = physmap[0];
 	pte = vtopte(KERNBASE + PAGE_SIZE);
 
 	/*
 	 * physmap is in bytes, so when converting to page boundaries,
 	 * round up the start address and round down the end address.
 	 */
 	for (i = 0; i <= physmap_idx; i += 2) {
 		vm_paddr_t end;
 
 		end = ptoa((vm_paddr_t)Maxmem);
 		if (physmap[i + 1] < end)
 			end = trunc_page(physmap[i + 1]);
 		for (pa = round_page(physmap[i]); pa < end; pa += PAGE_SIZE) {
 			int tmp, page_bad;
 			volatile int *ptr = (int *)(KERNBASE + PAGE_SIZE);
 
 			/*
 			 * block out kernel memory as not available.
 			 */
 			if (pa >= 0x100000 && pa < first)
 				continue;
 	
 			page_bad = FALSE;
 
 			/*
 			 * map page into kernel: valid, read/write,non-cacheable
 			 */
 			*pte = pa | PG_V | PG_RW | PG_N;
 			invltlb();
 
 			tmp = *ptr;
 			/*
 			 * Test for alternating 1's and 0's
 			 */
 			*ptr = 0xaaaaaaaa;
 			if (*ptr != 0xaaaaaaaa) {
 				page_bad = TRUE;
 			}
 			/*
 			 * Test for alternating 0's and 1's
 			 */
 			*ptr = 0x55555555;
 			if (*ptr != 0x55555555) {
 				page_bad = TRUE;
 			}
 			/*
 			 * Test for all 1's
 			 */
 			*ptr = 0xffffffff;
 			if (*ptr != 0xffffffff) {
 				page_bad = TRUE;
 			}
 			/*
 			 * Test for all 0's
 			 */
 			*ptr = 0x0;
 			if (*ptr != 0x0) {
 				page_bad = TRUE;
 			}
 			/*
 			 * Restore original value.
 			 */
 			*ptr = tmp;
 
 			/*
 			 * Adjust array of valid/good pages.
 			 */
 			if (page_bad == TRUE) {
 				continue;
 			}
 			/*
 			 * If this good page is a continuation of the
 			 * previous set of good pages, then just increase
 			 * the end pointer. Otherwise start a new chunk.
 			 * Note that "end" points one higher than end,
 			 * making the range >= start and < end.
 			 * If we're also doing a speculative memory
 			 * test and we at or past the end, bump up Maxmem
 			 * so that we keep going. The first bad page
 			 * will terminate the loop.
 			 */
 			if (phys_avail[pa_indx] == pa) {
 				phys_avail[pa_indx] += PAGE_SIZE;
 			} else {
 				pa_indx++;
 				if (pa_indx == PHYS_AVAIL_ARRAY_END) {
 					printf("Too many holes in the physical address space, giving up\n");
 					pa_indx--;
 					break;
 				}
 				phys_avail[pa_indx++] = pa;	/* start */
 				phys_avail[pa_indx] = pa + PAGE_SIZE;	/* end */
 			}
 			physmem++;
 		}
 	}
 	*pte = 0;
 	invltlb();
 
 	/*
 	 * XXX
 	 * The last chunk must contain at least one page plus the message
 	 * buffer to avoid complicating other code (message buffer address
 	 * calculation, etc.).
 	 */
 	while (phys_avail[pa_indx - 1] + PAGE_SIZE +
 	    round_page(MSGBUF_SIZE) >= phys_avail[pa_indx]) {
 		physmem -= atop(phys_avail[pa_indx] - phys_avail[pa_indx - 1]);
 		phys_avail[pa_indx--] = 0;
 		phys_avail[pa_indx--] = 0;
 	}
 
 	Maxmem = atop(phys_avail[pa_indx]);
 
 	/* Trim off space for the message buffer. */
 	phys_avail[pa_indx] -= round_page(MSGBUF_SIZE);
 
 	avail_end = phys_avail[pa_indx];
 }
 
 void
 init386(first)
 	int first;
 {
 	struct gate_descriptor *gdp;
 	int gsel_tss, metadata_missing, off, x;
 #ifndef SMP
 	/* table descriptors - used to load tables by microp */
 	struct region_descriptor r_gdt, r_idt;
 #endif
 
 	/*
 	 * Prevent lowering of the ipl if we call tsleep() early.
 	 */
 	safepri = cpl;
 
 	proc0.p_addr = proc0paddr;
 
 	atdevbase = ISA_HOLE_START + KERNBASE;
 
 	metadata_missing = 0;
 	if (bootinfo.bi_modulep) {
 		preload_metadata = (caddr_t)bootinfo.bi_modulep + KERNBASE;
 		preload_bootstrap_relocate(KERNBASE);
 	} else {
 		metadata_missing = 1;
 	}
 	if (bootinfo.bi_envp)
 		kern_envp = (caddr_t)bootinfo.bi_envp + KERNBASE;
 
 	/* Init basic tunables, hz etc */
 	init_param1();
 
 	/*
 	 * make gdt memory segments, the code segment goes up to end of the
 	 * page with etext in it, the data segment goes to the end of
 	 * the address space
 	 */
 	/*
 	 * XXX text protection is temporarily (?) disabled.  The limit was
 	 * i386_btop(round_page(etext)) - 1.
 	 */
 	gdt_segs[GCODE_SEL].ssd_limit = atop(0 - 1);
 	gdt_segs[GDATA_SEL].ssd_limit = atop(0 - 1);
 #ifdef SMP
 	gdt_segs[GPRIV_SEL].ssd_limit =
 		atop(sizeof(struct privatespace) - 1);
 	gdt_segs[GPRIV_SEL].ssd_base = (int) &SMP_prvspace[0];
 	gdt_segs[GPROC0_SEL].ssd_base =
 		(int) &SMP_prvspace[0].globaldata.gd_common_tss;
 	SMP_prvspace[0].globaldata.gd_prvspace = &SMP_prvspace[0];
 #else
 	gdt_segs[GPRIV_SEL].ssd_limit = atop(0 - 1);
 	gdt_segs[GPROC0_SEL].ssd_base = (int) &common_tss;
 #endif
 
 	for (x = 0; x < NGDT; x++) {
 #ifdef BDE_DEBUGGER
 		/* avoid overwriting db entries with APM ones */
 		if (x >= GAPMCODE32_SEL && x <= GAPMDATA_SEL)
 			continue;
 #endif
 		ssdtosd(&gdt_segs[x], &gdt[x].sd);
 	}
 
 	r_gdt.rd_limit = NGDT * sizeof(gdt[0]) - 1;
 	r_gdt.rd_base =  (int) gdt;
 	lgdt(&r_gdt);
 
 	/* make ldt memory segments */
 	/*
 	 * XXX - VM_MAXUSER_ADDRESS is an end address, not a max.  And it
 	 * should be spelled ...MAX_USER...
 	 */
 	ldt_segs[LUCODE_SEL].ssd_limit = atop(VM_MAXUSER_ADDRESS - 1);
 	ldt_segs[LUDATA_SEL].ssd_limit = atop(VM_MAXUSER_ADDRESS - 1);
 	for (x = 0; x < sizeof ldt_segs / sizeof ldt_segs[0]; x++)
 		ssdtosd(&ldt_segs[x], &ldt[x].sd);
 
 	_default_ldt = GSEL(GLDT_SEL, SEL_KPL);
 	lldt(_default_ldt);
 #ifdef USER_LDT
 	currentldt = _default_ldt;
 #endif
 
 	/* exceptions */
 	for (x = 0; x < NIDT; x++)
 		setidt(x, &IDTVEC(rsvd), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
 	setidt(0, &IDTVEC(div),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
 	setidt(1, &IDTVEC(dbg),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
 	setidt(2, &IDTVEC(nmi),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
  	setidt(3, &IDTVEC(bpt),  SDT_SYS386TGT, SEL_UPL, GSEL(GCODE_SEL, SEL_KPL));
 	setidt(4, &IDTVEC(ofl),  SDT_SYS386TGT, SEL_UPL, GSEL(GCODE_SEL, SEL_KPL));
 	setidt(5, &IDTVEC(bnd),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
 	setidt(6, &IDTVEC(ill),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
 	setidt(7, &IDTVEC(dna),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
 	setidt(8, 0,  SDT_SYSTASKGT, SEL_KPL, GSEL(GPANIC_SEL, SEL_KPL));
 	setidt(9, &IDTVEC(fpusegm),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
 	setidt(10, &IDTVEC(tss),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
 	setidt(11, &IDTVEC(missing),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
 	setidt(12, &IDTVEC(stk),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
 	setidt(13, &IDTVEC(prot),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
 	setidt(14, &IDTVEC(page),  SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
 	setidt(15, &IDTVEC(rsvd),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
 	setidt(16, &IDTVEC(fpu),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
 	setidt(17, &IDTVEC(align), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
 	setidt(18, &IDTVEC(mchk),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
 	setidt(19, &IDTVEC(xmm), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
  	setidt(0x80, &IDTVEC(int0x80_syscall),
 			SDT_SYS386TGT, SEL_UPL, GSEL(GCODE_SEL, SEL_KPL));
 
 	r_idt.rd_limit = sizeof(idt0) - 1;
 	r_idt.rd_base = (int) idt;
 	lidt(&r_idt);
 
 	/*
 	 * Initialize the console before we print anything out.
 	 */
 	cninit();
 
 	if (metadata_missing)
 		printf("WARNING: loader(8) metadata is missing!\n");
 
 #include	"isa.h"
 #if	NISA >0
 	isa_defaultirq();
 #endif
 	rand_initialize();
 
 #ifdef DDB
 	kdb_init();
 	if (boothowto & RB_KDB)
 		Debugger("Boot flags requested debugger");
 #endif
 
 	finishidentcpu();	/* Final stage of CPU initialization */
 	setidt(6, &IDTVEC(ill),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
 	setidt(13, &IDTVEC(prot),  SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
 	initializecpu();	/* Initialize CPU registers */
 
 	/* make an initial tss so cpu can get interrupt stack on syscall! */
 	common_tss.tss_esp0 = (int) proc0.p_addr + UPAGES*PAGE_SIZE - 16;
 	common_tss.tss_ss0 = GSEL(GDATA_SEL, SEL_KPL) ;
 	gsel_tss = GSEL(GPROC0_SEL, SEL_KPL);
 	private_tss = 0;
 	tss_gdt = &gdt[GPROC0_SEL].sd;
 	common_tssd = *tss_gdt;
 	common_tss.tss_ioopt = (sizeof common_tss) << 16;
 	ltr(gsel_tss);
 
 	dblfault_tss.tss_esp = dblfault_tss.tss_esp0 = dblfault_tss.tss_esp1 =
 	    dblfault_tss.tss_esp2 = (int) &dblfault_stack[sizeof(dblfault_stack)];
 	dblfault_tss.tss_ss = dblfault_tss.tss_ss0 = dblfault_tss.tss_ss1 =
 	    dblfault_tss.tss_ss2 = GSEL(GDATA_SEL, SEL_KPL);
 #ifdef PAE
 	dblfault_tss.tss_cr3 = (int)IdlePDPT - KERNBASE;
 #else
 	dblfault_tss.tss_cr3 = (int)IdlePTD;
 #endif
 	dblfault_tss.tss_eip = (int) dblfault_handler;
 	dblfault_tss.tss_eflags = PSL_KERNEL;
 	dblfault_tss.tss_ds = dblfault_tss.tss_es =
 	    dblfault_tss.tss_gs = GSEL(GDATA_SEL, SEL_KPL);
 	dblfault_tss.tss_fs = GSEL(GPRIV_SEL, SEL_KPL);
 	dblfault_tss.tss_cs = GSEL(GCODE_SEL, SEL_KPL);
 	dblfault_tss.tss_ldt = GSEL(GLDT_SEL, SEL_KPL);
 
 	vm86_initialize();
 	getmemsize(first);
 	pmap_bootstrap(first, 0);
 	init_param2(physmem);
 
 	/* now running on new page tables, configured,and u/iom is accessible */
 
 	/* Map the message buffer. */
 	for (off = 0; off < round_page(MSGBUF_SIZE); off += PAGE_SIZE)
 		pmap_kenter((vm_offset_t)msgbufp + off, avail_end + off);
 
 	msgbufinit(msgbufp, MSGBUF_SIZE);
 
 	/* make a call gate to reenter kernel with */
 	gdp = &ldt[LSYS5CALLS_SEL].gd;
 
 	x = (int) &IDTVEC(syscall);
 	gdp->gd_looffset = x++;
 	gdp->gd_selector = GSEL(GCODE_SEL,SEL_KPL);
 	gdp->gd_stkcpy = 1;
 	gdp->gd_type = SDT_SYS386CGT;
 	gdp->gd_dpl = SEL_UPL;
 	gdp->gd_p = 1;
 	gdp->gd_hioffset = ((int) &IDTVEC(syscall)) >>16;
 
 	/* XXX does this work? */
 	ldt[LBSDICALLS_SEL] = ldt[LSYS5CALLS_SEL];
 	ldt[LSOL26CALLS_SEL] = ldt[LSYS5CALLS_SEL];
 
 	/* transfer to user mode */
 
 	_ucodesel = LSEL(LUCODE_SEL, SEL_UPL);
 	_udatasel = LSEL(LUDATA_SEL, SEL_UPL);
 
 	/* setup proc 0's pcb */
 	proc0.p_addr->u_pcb.pcb_flags = 0;
 #ifdef PAE
         proc0.p_addr->u_pcb.pcb_cr3 = (int)IdlePDPT - KERNBASE;
 #else
         proc0.p_addr->u_pcb.pcb_cr3 = (int)IdlePTD;
 #endif
 #ifdef SMP
 	proc0.p_addr->u_pcb.pcb_mpnest = 1;
 #endif
 	proc0.p_addr->u_pcb.pcb_ext = 0;
 	proc0.p_md.md_regs = &proc0_tf;
 }
 
 #if defined(I586_CPU) && !defined(NO_F00F_HACK)
 static void f00f_hack(void *unused);
 SYSINIT(f00f_hack, SI_SUB_INTRINSIC, SI_ORDER_FIRST, f00f_hack, NULL);
 
 static void
 f00f_hack(void *unused) {
 	struct gate_descriptor *new_idt;
 #ifndef SMP
 	struct region_descriptor r_idt;
 #endif
 	vm_offset_t tmp;
 
 	if (!has_f00f_bug)
 		return;
 
 	printf("Intel Pentium detected, installing workaround for F00F bug\n");
 
 	r_idt.rd_limit = sizeof(idt0) - 1;
 
 	tmp = kmem_alloc(kernel_map, PAGE_SIZE * 2);
 	if (tmp == 0)
 		panic("kmem_alloc returned 0");
 	if (((unsigned int)tmp & (PAGE_SIZE-1)) != 0)
 		panic("kmem_alloc returned non-page-aligned memory");
 	/* Put the first seven entries in the lower page */
 	new_idt = (struct gate_descriptor*)(tmp + PAGE_SIZE - (7*8));
 	bcopy(idt, new_idt, sizeof(idt0));
 	r_idt.rd_base = (int)new_idt;
 	lidt(&r_idt);
 	idt = new_idt;
 	if (vm_map_protect(kernel_map, tmp, tmp + PAGE_SIZE,
 			   VM_PROT_READ, FALSE) != KERN_SUCCESS)
 		panic("vm_map_protect failed");
 	return;
 }
 #endif /* defined(I586_CPU) && !NO_F00F_HACK */
 
 int
 ptrace_set_pc(p, addr)
 	struct proc *p;
 	unsigned long addr;
 {
 	p->p_md.md_regs->tf_eip = addr;
 	return (0);
 }
 
 int
 ptrace_single_step(p)
 	struct proc *p;
 {
 	p->p_md.md_regs->tf_eflags |= PSL_T;
 	return (0);
 }
 
 int ptrace_read_u_check(p, addr, len)
 	struct proc *p;
 	vm_offset_t addr;
 	size_t len;
 {
 	vm_offset_t gap;
 
 	if ((vm_offset_t) (addr + len) < addr)
 		return EPERM;
 	if ((vm_offset_t) (addr + len) <= sizeof(struct user))
 		return 0;
 
 	gap = (char *) p->p_md.md_regs - (char *) p->p_addr;
 	
 	if ((vm_offset_t) addr < gap)
 		return EPERM;
 	if ((vm_offset_t) (addr + len) <= 
 	    (vm_offset_t) (gap + sizeof(struct trapframe)))
 		return 0;
 	return EPERM;
 }
 
 int ptrace_write_u(p, off, data)
 	struct proc *p;
 	vm_offset_t off;
 	long data;
 {
 	struct trapframe frame_copy;
 	vm_offset_t min;
 	struct trapframe *tp;
 
 	/*
 	 * Privileged kernel state is scattered all over the user area.
 	 * Only allow write access to parts of regs and to fpregs.
 	 */
 	min = (char *)p->p_md.md_regs - (char *)p->p_addr;
 	if (off >= min && off <= min + sizeof(struct trapframe) - sizeof(int)) {
 		tp = p->p_md.md_regs;
 		frame_copy = *tp;
 		*(int *)((char *)&frame_copy + (off - min)) = data;
 		if (!EFL_SECURE(frame_copy.tf_eflags, tp->tf_eflags) ||
 		    !CS_SECURE(frame_copy.tf_cs))
 			return (EINVAL);
 		*(int*)((char *)p->p_addr + off) = data;
 		return (0);
 	}
 	min = offsetof(struct user, u_pcb) + offsetof(struct pcb, pcb_save);
 	if (off >= min && off <= min + sizeof(union savefpu) - sizeof(int)) {
 		*(int*)((char *)p->p_addr + off) = data;
 		return (0);
 	}
 	return (EFAULT);
 }
 
 int
 fill_regs(p, regs)
 	struct proc *p;
 	struct reg *regs;
 {
 	struct pcb *pcb;
 	struct trapframe *tp;
 
 	tp = p->p_md.md_regs;
 	regs->r_fs = tp->tf_fs;
 	regs->r_es = tp->tf_es;
 	regs->r_ds = tp->tf_ds;
 	regs->r_edi = tp->tf_edi;
 	regs->r_esi = tp->tf_esi;
 	regs->r_ebp = tp->tf_ebp;
 	regs->r_ebx = tp->tf_ebx;
 	regs->r_edx = tp->tf_edx;
 	regs->r_ecx = tp->tf_ecx;
 	regs->r_eax = tp->tf_eax;
 	regs->r_eip = tp->tf_eip;
 	regs->r_cs = tp->tf_cs;
 	regs->r_eflags = tp->tf_eflags;
 	regs->r_esp = tp->tf_esp;
 	regs->r_ss = tp->tf_ss;
 	pcb = &p->p_addr->u_pcb;
 	regs->r_gs = pcb->pcb_gs;
 	return (0);
 }
 
 int
 set_regs(p, regs)
 	struct proc *p;
 	struct reg *regs;
 {
 	struct pcb *pcb;
 	struct trapframe *tp;
 
 	tp = p->p_md.md_regs;
 	if (!EFL_SECURE(regs->r_eflags, tp->tf_eflags) ||
 	    !CS_SECURE(regs->r_cs))
 		return (EINVAL);
 	tp->tf_fs = regs->r_fs;
 	tp->tf_es = regs->r_es;
 	tp->tf_ds = regs->r_ds;
 	tp->tf_edi = regs->r_edi;
 	tp->tf_esi = regs->r_esi;
 	tp->tf_ebp = regs->r_ebp;
 	tp->tf_ebx = regs->r_ebx;
 	tp->tf_edx = regs->r_edx;
 	tp->tf_ecx = regs->r_ecx;
 	tp->tf_eax = regs->r_eax;
 	tp->tf_eip = regs->r_eip;
 	tp->tf_cs = regs->r_cs;
 	tp->tf_eflags = regs->r_eflags;
 	tp->tf_esp = regs->r_esp;
 	tp->tf_ss = regs->r_ss;
 	pcb = &p->p_addr->u_pcb;
 	pcb->pcb_gs = regs->r_gs;
 	return (0);
 }
 
 #ifdef CPU_ENABLE_SSE
 static void
 fill_fpregs_xmm(sv_xmm, sv_87)
 	struct savexmm *sv_xmm;
 	struct save87 *sv_87;
 {
 	register struct env87 *penv_87 = &sv_87->sv_env;
 	register struct envxmm *penv_xmm = &sv_xmm->sv_env;
 	int i;
 
 	/* FPU control/status */
 	penv_87->en_cw = penv_xmm->en_cw;
 	penv_87->en_sw = penv_xmm->en_sw;
 	penv_87->en_tw = penv_xmm->en_tw;
 	penv_87->en_fip = penv_xmm->en_fip;
 	penv_87->en_fcs = penv_xmm->en_fcs;
 	penv_87->en_opcode = penv_xmm->en_opcode;
 	penv_87->en_foo = penv_xmm->en_foo;
 	penv_87->en_fos = penv_xmm->en_fos;
 
 	/* FPU registers */
 	for (i = 0; i < 8; ++i)
 		sv_87->sv_ac[i] = sv_xmm->sv_fp[i].fp_acc;
 
 	sv_87->sv_ex_sw = sv_xmm->sv_ex_sw;
 }
 
 static void
 set_fpregs_xmm(sv_87, sv_xmm)
 	struct save87 *sv_87;
 	struct savexmm *sv_xmm;
 {
 	register struct env87 *penv_87 = &sv_87->sv_env;
 	register struct envxmm *penv_xmm = &sv_xmm->sv_env;
 	int i;
 
 	/* FPU control/status */
 	penv_xmm->en_cw = penv_87->en_cw;
 	penv_xmm->en_sw = penv_87->en_sw;
 	penv_xmm->en_tw = penv_87->en_tw;
 	penv_xmm->en_fip = penv_87->en_fip;
 	penv_xmm->en_fcs = penv_87->en_fcs;
 	penv_xmm->en_opcode = penv_87->en_opcode;
 	penv_xmm->en_foo = penv_87->en_foo;
 	penv_xmm->en_fos = penv_87->en_fos;
 
 	/* FPU registers */
 	for (i = 0; i < 8; ++i)
 		sv_xmm->sv_fp[i].fp_acc = sv_87->sv_ac[i];
 
 	sv_xmm->sv_ex_sw = sv_87->sv_ex_sw;
 }
 #endif /* CPU_ENABLE_SSE */
 
 int
 fill_fpregs(p, fpregs)
 	struct proc *p;
 	struct fpreg *fpregs;
 {
 #ifdef CPU_ENABLE_SSE
 	if (cpu_fxsr) {
 		fill_fpregs_xmm(&p->p_addr->u_pcb.pcb_save.sv_xmm,
 						(struct save87 *)fpregs);
 		return (0);
 	}
 #endif /* CPU_ENABLE_SSE */
 	bcopy(&p->p_addr->u_pcb.pcb_save.sv_87, fpregs, sizeof *fpregs);
 	return (0);
 }
 
 int
 set_fpregs(p, fpregs)
 	struct proc *p;
 	struct fpreg *fpregs;
 {
 #ifdef CPU_ENABLE_SSE
 	if (cpu_fxsr) {
 		set_fpregs_xmm((struct save87 *)fpregs,
 					   &p->p_addr->u_pcb.pcb_save.sv_xmm);
 		return (0);
 	}
 #endif /* CPU_ENABLE_SSE */
 	bcopy(fpregs, &p->p_addr->u_pcb.pcb_save.sv_87, sizeof *fpregs);
 	return (0);
 }
 
 int
 fill_dbregs(p, dbregs)
 	struct proc *p;
 	struct dbreg *dbregs;
 {
 	struct pcb *pcb;
 
         if (p == NULL) {
                 dbregs->dr0 = rdr0();
                 dbregs->dr1 = rdr1();
                 dbregs->dr2 = rdr2();
                 dbregs->dr3 = rdr3();
                 dbregs->dr4 = rdr4();
                 dbregs->dr5 = rdr5();
                 dbregs->dr6 = rdr6();
                 dbregs->dr7 = rdr7();
         }
         else {
                 pcb = &p->p_addr->u_pcb;
                 dbregs->dr0 = pcb->pcb_dr0;
                 dbregs->dr1 = pcb->pcb_dr1;
                 dbregs->dr2 = pcb->pcb_dr2;
                 dbregs->dr3 = pcb->pcb_dr3;
                 dbregs->dr4 = 0;
                 dbregs->dr5 = 0;
                 dbregs->dr6 = pcb->pcb_dr6;
                 dbregs->dr7 = pcb->pcb_dr7;
         }
 	return (0);
 }
 
 int
 set_dbregs(p, dbregs)
 	struct proc *p;
 	struct dbreg *dbregs;
 {
 	struct pcb *pcb;
 	int i;
 	u_int32_t mask1, mask2;
 
 	if (p == NULL) {
 		load_dr0(dbregs->dr0);
 		load_dr1(dbregs->dr1);
 		load_dr2(dbregs->dr2);
 		load_dr3(dbregs->dr3);
 		load_dr4(dbregs->dr4);
 		load_dr5(dbregs->dr5);
 		load_dr6(dbregs->dr6);
 		load_dr7(dbregs->dr7);
 	}
 	else {
 		/*
 		 * Don't let an illegal value for dr7 get set.	Specifically,
 		 * check for undefined settings.  Setting these bit patterns
 		 * result in undefined behaviour and can lead to an unexpected
 		 * TRCTRAP.
 		 */
 		for (i = 0, mask1 = 0x3<<16, mask2 = 0x2<<16; i < 8; 
 		     i++, mask1 <<= 2, mask2 <<= 2)
 			if ((dbregs->dr7 & mask1) == mask2)
 				return (EINVAL);
 		
 		pcb = &p->p_addr->u_pcb;
 		
 		/*
 		 * Don't let a process set a breakpoint that is not within the
 		 * process's address space.  If a process could do this, it
 		 * could halt the system by setting a breakpoint in the kernel
 		 * (if ddb was enabled).  Thus, we need to check to make sure
 		 * that no breakpoints are being enabled for addresses outside
 		 * process's address space, unless, perhaps, we were called by
 		 * uid 0.
 		 *
 		 * XXX - what about when the watched area of the user's
 		 * address space is written into from within the kernel
 		 * ... wouldn't that still cause a breakpoint to be generated
 		 * from within kernel mode?
 		 */
 		
 		if (suser(p) != 0) {
 			if (dbregs->dr7 & 0x3) {
 				/* dr0 is enabled */
 				if (dbregs->dr0 >= VM_MAXUSER_ADDRESS)
 					return (EINVAL);
 			}
 			
 			if (dbregs->dr7 & (0x3<<2)) {
 				/* dr1 is enabled */
 				if (dbregs->dr1 >= VM_MAXUSER_ADDRESS)
 					return (EINVAL);
 			}
 			
 			if (dbregs->dr7 & (0x3<<4)) {
 				/* dr2 is enabled */
 				if (dbregs->dr2 >= VM_MAXUSER_ADDRESS)
 					return (EINVAL);
 			}
 			
 			if (dbregs->dr7 & (0x3<<6)) {
 				/* dr3 is enabled */
 				if (dbregs->dr3 >= VM_MAXUSER_ADDRESS)
 					return (EINVAL);
 			}
 		}
 		
 		pcb->pcb_dr0 = dbregs->dr0;
 		pcb->pcb_dr1 = dbregs->dr1;
 		pcb->pcb_dr2 = dbregs->dr2;
 		pcb->pcb_dr3 = dbregs->dr3;
 		pcb->pcb_dr6 = dbregs->dr6;
 		pcb->pcb_dr7 = dbregs->dr7;
 		
 		pcb->pcb_flags |= PCB_DBREGS;
 	}
 
 	return (0);
 }
 
 /*
  * Return > 0 if a hardware breakpoint has been hit, and the
  * breakpoint was in user space.  Return 0, otherwise.
  */
 int
 user_dbreg_trap(void)
 {
         u_int32_t dr7, dr6; /* debug registers dr6 and dr7 */
         u_int32_t bp;       /* breakpoint bits extracted from dr6 */
         int nbp;            /* number of breakpoints that triggered */
         caddr_t addr[4];    /* breakpoint addresses */
         int i;
         
         dr7 = rdr7();
         if ((dr7 & 0x000000ff) == 0) {
                 /*
                  * all GE and LE bits in the dr7 register are zero,
                  * thus the trap couldn't have been caused by the
                  * hardware debug registers
                  */
                 return 0;
         }
 
         nbp = 0;
         dr6 = rdr6();
         bp = dr6 & 0x0000000f;
 
         if (!bp) {
                 /*
                  * None of the breakpoint bits are set meaning this
                  * trap was not caused by any of the debug registers
                  */
                 return 0;
         }
 
         /*
          * at least one of the breakpoints were hit, check to see
          * which ones and if any of them are user space addresses
          */
 
         if (bp & 0x01) {
                 addr[nbp++] = (caddr_t)rdr0();
         }
         if (bp & 0x02) {
                 addr[nbp++] = (caddr_t)rdr1();
         }
         if (bp & 0x04) {
                 addr[nbp++] = (caddr_t)rdr2();
         }
         if (bp & 0x08) {
                 addr[nbp++] = (caddr_t)rdr3();
         }
 
         for (i=0; i<nbp; i++) {
                 if (addr[i] <
                     (caddr_t)VM_MAXUSER_ADDRESS) {
                         /*
                          * addr[i] is in user space
                          */
                         return nbp;
                 }
         }
 
         /*
          * None of the breakpoints are in user space.
          */
         return 0;
 }
 
 
 #ifndef DDB
 void
 Debugger(const char *msg)
 {
 	printf("Debugger(\"%s\") called.\n", msg);
 }
 #endif /* no DDB */
 
 #include <sys/disklabel.h>
 
 /*
  * Determine the size of the transfer, and make sure it is
  * within the boundaries of the partition. Adjust transfer
  * if needed, and signal errors or early completion.
  */
 int
 bounds_check_with_label(struct buf *bp, struct disklabel *lp, int wlabel)
 {
         struct partition *p = lp->d_partitions + dkpart(bp->b_dev);
         int labelsect = lp->d_partitions[0].p_offset;
         int maxsz = p->p_size,
                 sz = (bp->b_bcount + DEV_BSIZE - 1) >> DEV_BSHIFT;
 
         /* overwriting disk label ? */
         /* XXX should also protect bootstrap in first 8K */
         if (bp->b_blkno + p->p_offset <= LABELSECTOR + labelsect &&
 #if LABELSECTOR != 0
             bp->b_blkno + p->p_offset + sz > LABELSECTOR + labelsect &&
 #endif
             (bp->b_flags & B_READ) == 0 && wlabel == 0) {
                 bp->b_error = EROFS;
                 goto bad;
         }
 
 #if     defined(DOSBBSECTOR) && defined(notyet)
         /* overwriting master boot record? */
         if (bp->b_blkno + p->p_offset <= DOSBBSECTOR &&
             (bp->b_flags & B_READ) == 0 && wlabel == 0) {
                 bp->b_error = EROFS;
                 goto bad;
         }
 #endif
 
         /* beyond partition? */
         if (bp->b_blkno < 0 || bp->b_blkno + sz > maxsz) {
                 /* if exactly at end of disk, return an EOF */
                 if (bp->b_blkno == maxsz) {
                         bp->b_resid = bp->b_bcount;
                         return(0);
                 }
                 /* or truncate if part of it fits */
                 sz = maxsz - bp->b_blkno;
                 if (sz <= 0) {
                         bp->b_error = EINVAL;
                         goto bad;
                 }
                 bp->b_bcount = sz << DEV_BSHIFT;
         }
 
         bp->b_pblkno = bp->b_blkno + p->p_offset;
         return(1);
 
 bad:
         bp->b_flags |= B_ERROR;
         return(-1);
 }
 
 #ifdef DDB
 
 /*
  * Provide inb() and outb() as functions.  They are normally only
  * available as macros calling inlined functions, thus cannot be
  * called inside DDB.
  *
  * The actual code is stolen from <machine/cpufunc.h>, and de-inlined.
  */
 
 #undef inb
 #undef outb
 
 /* silence compiler warnings */
 u_char inb(u_int);
 void outb(u_int, u_char);
 
 u_char
 inb(u_int port)
 {
 	u_char	data;
 	/*
 	 * We use %%dx and not %1 here because i/o is done at %dx and not at
 	 * %edx, while gcc generates inferior code (movw instead of movl)
 	 * if we tell it to load (u_short) port.
 	 */
 	__asm __volatile("inb %%dx,%0" : "=a" (data) : "d" (port));
 	return (data);
 }
 
 void
 outb(u_int port, u_char data)
 {
 	u_char	al;
 	/*
 	 * Use an unnecessary assignment to help gcc's register allocator.
 	 * This make a large difference for gcc-1.40 and a tiny difference
 	 * for gcc-2.6.0.  For gcc-1.40, al had to be ``asm("ax")'' for
 	 * best results.  gcc-2.6.0 can't handle this.
 	 */
 	al = data;
 	__asm __volatile("outb %0,%%dx" : : "a" (al), "d" (port));
 }
 
 #endif /* DDB */
Index: stable/4/sys/kern/uipc_mbuf.c
===================================================================
--- stable/4/sys/kern/uipc_mbuf.c	(revision 118739)
+++ stable/4/sys/kern/uipc_mbuf.c	(revision 118740)
@@ -1,1631 +1,1635 @@
 /*
  * Copyright (c) 1982, 1986, 1988, 1991, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)uipc_mbuf.c	8.2 (Berkeley) 1/4/94
  * $FreeBSD$
  */
 
 #include "opt_param.h"
 #include "opt_mbuf_stress_test.h"
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/kernel.h>
 #include <sys/sysctl.h>
 #include <sys/domain.h>
 #include <sys/protosw.h>
 
 #include <vm/vm.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_extern.h>
 
 #ifdef INVARIANTS
 #include <machine/cpu.h>
 #endif
 
 static void mbinit __P((void *));
 SYSINIT(mbuf, SI_SUB_MBUF, SI_ORDER_FIRST, mbinit, NULL)
 
 struct mbuf *mbutl;
+struct mbuf *mbutltop;
 char	*mclrefcnt;
 struct mbstat mbstat;
 u_long	mbtypes[MT_NTYPES];
 struct mbuf *mmbfree;
 union mcluster *mclfree;
 int	max_linkhdr;
 int	max_protohdr;
 int	max_hdr;
 int	max_datalen;
 #ifdef MBUF_STRESS_TEST
 int	m_defragpackets;
 int	m_defragbytes;
 int	m_defraguseless;
 int	m_defragfailure;
 int	m_defragrandomfailures;
 #endif
 int	m_clreflimithits;
 
 int	nmbclusters;
 int	nmbufs;
 u_int	m_mballoc_wid = 0;
 u_int	m_clalloc_wid = 0;
 
 SYSCTL_DECL(_kern_ipc);
 SYSCTL_INT(_kern_ipc, KIPC_MAX_LINKHDR, max_linkhdr, CTLFLAG_RW,
 	   &max_linkhdr, 0, "");
 SYSCTL_INT(_kern_ipc, KIPC_MAX_PROTOHDR, max_protohdr, CTLFLAG_RW,
 	   &max_protohdr, 0, "");
 SYSCTL_INT(_kern_ipc, KIPC_MAX_HDR, max_hdr, CTLFLAG_RW, &max_hdr, 0, "");
 SYSCTL_INT(_kern_ipc, KIPC_MAX_DATALEN, max_datalen, CTLFLAG_RW,
 	   &max_datalen, 0, "");
 SYSCTL_INT(_kern_ipc, OID_AUTO, mbuf_wait, CTLFLAG_RW,
 	   &mbuf_wait, 0, "");
 SYSCTL_STRUCT(_kern_ipc, KIPC_MBSTAT, mbstat, CTLFLAG_RW, &mbstat, mbstat, "");
 SYSCTL_OPAQUE(_kern_ipc, OID_AUTO, mbtypes, CTLFLAG_RD, mbtypes,
 	   sizeof(mbtypes), "LU", "");
 SYSCTL_INT(_kern_ipc, KIPC_NMBCLUSTERS, nmbclusters, CTLFLAG_RD, 
 	   &nmbclusters, 0, "Maximum number of mbuf clusters available");
 SYSCTL_INT(_kern_ipc, OID_AUTO, nmbufs, CTLFLAG_RD, &nmbufs, 0,
 	   "Maximum number of mbufs available"); 
 #ifdef MBUF_STRESS_TEST
 SYSCTL_INT(_kern_ipc, OID_AUTO, m_defragpackets, CTLFLAG_RD,
 	   &m_defragpackets, 0, "");
 SYSCTL_INT(_kern_ipc, OID_AUTO, m_defragbytes, CTLFLAG_RD,
 	   &m_defragbytes, 0, "");
 SYSCTL_INT(_kern_ipc, OID_AUTO, m_defraguseless, CTLFLAG_RD,
 	   &m_defraguseless, 0, "");
 SYSCTL_INT(_kern_ipc, OID_AUTO, m_defragfailure, CTLFLAG_RD,
 	   &m_defragfailure, 0, "");
 SYSCTL_INT(_kern_ipc, OID_AUTO, m_defragrandomfailures, CTLFLAG_RW,
 	   &m_defragrandomfailures, 0, "");
 #endif
 SYSCTL_INT(_kern_ipc, OID_AUTO, m_clreflimithits, CTLFLAG_RD,
 	   &m_clreflimithits, 0, "");
 
 static void	m_reclaim __P((void));
 static struct mbuf *m_clreflimit(struct mbuf *m0, int how);
 
 #ifndef NMBCLUSTERS
 #define NMBCLUSTERS	(512 + maxusers * 16)
 #endif
 #ifndef NMBUFS
 #define NMBUFS		(nmbclusters * 4)
 #endif
 
 /*
  * Perform sanity checks of tunables declared above.
  */
 static void
 tunable_mbinit(void *dummy)
 {
 
 	/*
 	 * This has to be done before VM init.
 	 */
 	nmbclusters = NMBCLUSTERS;
 	TUNABLE_INT_FETCH("kern.ipc.nmbclusters", &nmbclusters);
 	nmbufs = NMBUFS;
 	TUNABLE_INT_FETCH("kern.ipc.nmbufs", &nmbufs);
 	/* Sanity checks */
 	if (nmbufs < nmbclusters * 2)
 		nmbufs = nmbclusters * 2;
 
 	return;
 }
 SYSINIT(tunable_mbinit, SI_SUB_TUNABLES, SI_ORDER_ANY, tunable_mbinit, NULL);
 
 /* "number of clusters of pages" */
 #define NCL_INIT	1
 
 #define NMB_INIT	16
 
 /* ARGSUSED*/
 static void
 mbinit(dummy)
 	void *dummy;
 {
 	int s;
 
 	mmbfree = NULL; mclfree = NULL;
 	mbstat.m_msize = MSIZE;
 	mbstat.m_mclbytes = MCLBYTES;
 	mbstat.m_minclsize = MINCLSIZE;
 	mbstat.m_mlen = MLEN;
 	mbstat.m_mhlen = MHLEN;
 
 	s = splimp();
 	if (m_mballoc(NMB_INIT, M_DONTWAIT) == 0)
 		goto bad;
 #if MCLBYTES <= PAGE_SIZE
 	if (m_clalloc(NCL_INIT, M_DONTWAIT) == 0)
 		goto bad;
 #else
 	/* It's OK to call contigmalloc in this context. */
 	if (m_clalloc(16, M_WAIT) == 0)
 		goto bad;
 #endif
 	splx(s);
 	return;
 bad:
 	panic("mbinit");
 }
 
 /*
  * Allocate at least nmb mbufs and place on mbuf free list.
  * Must be called at splimp.
  */
 /* ARGSUSED */
 int
 m_mballoc(nmb, how)
 	register int nmb;
 	int how;
 {
 	register caddr_t p;
 	register int i;
 	int nbytes;
 
 	/*
 	 * If we've hit the mbuf limit, stop allocating from mb_map,
 	 * (or trying to) in order to avoid dipping into the section of
 	 * mb_map which we've "reserved" for clusters.
 	 */
 	if ((nmb + mbstat.m_mbufs) > nmbufs)
 		return (0);
 
 	/*
 	 * Once we run out of map space, it will be impossible to get
 	 * any more (nothing is ever freed back to the map)
 	 * -- however you are not dead as m_reclaim might
 	 * still be able to free a substantial amount of space.
 	 *
 	 * XXX Furthermore, we can also work with "recycled" mbufs (when
 	 * we're calling with M_WAIT the sleep procedure will be woken
 	 * up when an mbuf is freed. See m_mballoc_wait()).
 	 */
 	if (mb_map_full)
 		return (0);
 
 	nbytes = round_page(nmb * MSIZE);
 	p = (caddr_t)kmem_malloc(mb_map, nbytes, M_NOWAIT);
 	if (p == 0 && how == M_WAIT) {
 		mbstat.m_wait++;
 		p = (caddr_t)kmem_malloc(mb_map, nbytes, M_WAITOK);
 	}
 
 	/*
 	 * Either the map is now full, or `how' is M_NOWAIT and there
 	 * are no pages left.
 	 */
 	if (p == NULL)
 		return (0);
 
+	mbutltop += nbytes;
 	nmb = nbytes / MSIZE;
 	for (i = 0; i < nmb; i++) {
 		((struct mbuf *)p)->m_next = mmbfree;
 		mmbfree = (struct mbuf *)p;
 		p += MSIZE;
 	}
 	mbstat.m_mbufs += nmb;
 	mbtypes[MT_FREE] += nmb;
 	return (1);
 }
 
 /*
  * Once the mb_map has been exhausted and if the call to the allocation macros
  * (or, in some cases, functions) is with M_WAIT, then it is necessary to rely
  * solely on reclaimed mbufs. Here we wait for an mbuf to be freed for a 
  * designated (mbuf_wait) time. 
  */
 struct mbuf *
 m_mballoc_wait(int caller, int type)
 {
 	struct mbuf *p;
 	int s;
 
 	s = splimp();
 	m_mballoc_wid++;
 	if ((tsleep(&m_mballoc_wid, PVM, "mballc", mbuf_wait)) == EWOULDBLOCK)
 		m_mballoc_wid--;
 	splx(s);
 
 	/*
 	 * Now that we (think) that we've got something, we will redo an
 	 * MGET, but avoid getting into another instance of m_mballoc_wait()
 	 * XXX: We retry to fetch _even_ if the sleep timed out. This is left
 	 *      this way, purposely, in the [unlikely] case that an mbuf was
 	 *      freed but the sleep was not awakened in time. 
 	 */
 	p = NULL;
 	switch (caller) {
 	case MGET_C:
 		MGET(p, M_DONTWAIT, type);
 		break;
 	case MGETHDR_C:
 		MGETHDR(p, M_DONTWAIT, type);
 		break;
 	default:
 		panic("m_mballoc_wait: invalid caller (%d)", caller);
 	}
 
 	s = splimp();
 	if (p != NULL) {		/* We waited and got something... */
 		mbstat.m_wait++;
 		/* Wake up another if we have more free. */
 		if (mmbfree != NULL)
 			MMBWAKEUP();
 	}
 	splx(s);
 	return (p);
 }
 
 #if MCLBYTES > PAGE_SIZE
 static int i_want_my_mcl;
 
 static void
 kproc_mclalloc(void)
 {
 	int status;
 
 	while (1) {
 		tsleep(&i_want_my_mcl, PVM, "mclalloc", 0);
 
 		for (; i_want_my_mcl; i_want_my_mcl--) {
 			if (m_clalloc(1, M_WAIT) == 0)
 				printf("m_clalloc failed even in process context!\n");
 		}
 	}
 }
 
 static struct proc *mclallocproc;
 static struct kproc_desc mclalloc_kp = {
 	"mclalloc",
 	kproc_mclalloc,
 	&mclallocproc
 };
 SYSINIT(mclallocproc, SI_SUB_KTHREAD_UPDATE, SI_ORDER_ANY, kproc_start,
 	   &mclalloc_kp);
 #endif
 
 /*
  * Allocate some number of mbuf clusters
  * and place on cluster free list.
  * Must be called at splimp.
  */
 /* ARGSUSED */
 int
 m_clalloc(ncl, how)
 	register int ncl;
 	int how;
 {
 	register caddr_t p;
 	register int i;
 	int npg;
 
 	/*
 	 * If we've hit the mcluster number limit, stop allocating from
 	 * mb_map, (or trying to) in order to avoid dipping into the section
 	 * of mb_map which we've "reserved" for mbufs.
 	 */
 	if ((ncl + mbstat.m_clusters) > nmbclusters)
 		goto m_clalloc_fail;
 
 	/*
 	 * Once we run out of map space, it will be impossible
 	 * to get any more (nothing is ever freed back to the
 	 * map). From this point on, we solely rely on freed 
 	 * mclusters.
 	 */
 	if (mb_map_full)
 		goto m_clalloc_fail;
 
 #if MCLBYTES > PAGE_SIZE
 	if (how != M_WAIT) {
 		i_want_my_mcl += ncl;
 		wakeup(&i_want_my_mcl);
 		mbstat.m_wait++;
 		p = 0;
 	} else {
 		p = contigmalloc1(MCLBYTES * ncl, M_DEVBUF, M_WAITOK, 0ul,
 				  ~0ul, PAGE_SIZE, 0, mb_map);
 	}
 #else
 	npg = ncl;
 	p = (caddr_t)kmem_malloc(mb_map, ctob(npg),
 				 how != M_WAIT ? M_NOWAIT : M_WAITOK);
 	ncl = ncl * PAGE_SIZE / MCLBYTES;
 #endif
 	/*
 	 * Either the map is now full, or `how' is M_NOWAIT and there
 	 * are no pages left.
 	 */
 	if (p == NULL) {
 		static int last_report ; /* when we did that (in ticks) */
 m_clalloc_fail:
 		mbstat.m_drops++;
 		if (ticks < last_report || (ticks - last_report) >= hz) {
 			last_report = ticks;
 			printf("All mbuf clusters exhausted, please see tuning(7).\n");
 		}
 		return (0);
 	}
+
+	mbutltop += ctob(npg);
 
 	for (i = 0; i < ncl; i++) {
 		((union mcluster *)p)->mcl_next = mclfree;
 		mclfree = (union mcluster *)p;
 		p += MCLBYTES;
 		mbstat.m_clfree++;
 	}
 	mbstat.m_clusters += ncl;
 	return (1);
 }
 
 /*
  * Once the mb_map submap has been exhausted and the allocation is called with
  * M_WAIT, we rely on the mclfree union pointers. If nothing is free, we will
  * sleep for a designated amount of time (mbuf_wait) or until we're woken up
  * due to sudden mcluster availability.
  */
 caddr_t
 m_clalloc_wait(void)
 {
 	caddr_t p;
 	int s;
 
 #ifdef __i386__
 	/* If in interrupt context, and INVARIANTS, maintain sanity and die. */
 	KASSERT(intr_nesting_level == 0, ("CLALLOC: CANNOT WAIT IN INTERRUPT"));
 #endif
 
 	/* Sleep until something's available or until we expire. */
 	m_clalloc_wid++;
 	if ((tsleep(&m_clalloc_wid, PVM, "mclalc", mbuf_wait)) == EWOULDBLOCK)
 		m_clalloc_wid--;
 
 	/*
 	 * Now that we (think) that we've got something, we will redo and
 	 * MGET, but avoid getting into another instance of m_clalloc_wait()
 	 */
 	p = NULL;
 	MCLALLOC(p, M_DONTWAIT);
 
 	s = splimp();
 	if (p != NULL) {	/* We waited and got something... */
 		mbstat.m_wait++;
 		/* Wake up another if we have more free. */
 		if (mclfree != NULL)
 			MCLWAKEUP();
 	}
 
 	splx(s);
 	return (p);
 }
 
 /*
  * When MGET fails, ask protocols to free space when short of memory,
  * then re-attempt to allocate an mbuf.
  */
 struct mbuf *
 m_retry(i, t)
 	int i, t;
 {
 	register struct mbuf *m;
 
 	/*
 	 * Must only do the reclaim if not in an interrupt context.
 	 */
 	if (i == M_WAIT) {
 #ifdef __i386__
 		KASSERT(intr_nesting_level == 0,
 		    ("MBALLOC: CANNOT WAIT IN INTERRUPT"));
 #endif
 		m_reclaim();
 	}
 
 	/*
 	 * Both m_mballoc_wait and m_retry must be nulled because
 	 * when the MGET macro is run from here, we deffinately do _not_
 	 * want to enter an instance of m_mballoc_wait() or m_retry() (again!)
 	 */
 #define m_mballoc_wait(caller,type)    (struct mbuf *)0
 #define m_retry(i, t)	(struct mbuf *)0
 	MGET(m, i, t);
 #undef m_retry
 #undef m_mballoc_wait
 
 	if (m != NULL)
 		mbstat.m_wait++;
 	else {
 		static int last_report ; /* when we did that (in ticks) */
 		mbstat.m_drops++;
 		if (ticks < last_report || (ticks - last_report) >= hz) {
 			last_report = ticks;
 			printf("All mbufs exhausted, please see tuning(7).\n");
 		}
 	}
 
 	return (m);
 }
 
 /*
  * As above; retry an MGETHDR.
  */
 struct mbuf *
 m_retryhdr(i, t)
 	int i, t;
 {
 	register struct mbuf *m;
 
 	/*
 	 * Must only do the reclaim if not in an interrupt context.
 	 */
 	if (i == M_WAIT) {
 #ifdef __i386__
 		KASSERT(intr_nesting_level == 0,
 		    ("MBALLOC: CANNOT WAIT IN INTERRUPT"));
 #endif
 		m_reclaim();
 	}
 
 #define m_mballoc_wait(caller,type)    (struct mbuf *)0
 #define m_retryhdr(i, t) (struct mbuf *)0
 	MGETHDR(m, i, t);
 #undef m_retryhdr
 #undef m_mballoc_wait
 
 	if (m != NULL)  
 		mbstat.m_wait++;
 	else    {
 		static int last_report ; /* when we did that (in ticks) */
 		mbstat.m_drops++;
 		if (ticks < last_report || (ticks - last_report) >= hz) {
 			last_report = ticks;
 			printf("All mbufs exhausted, please see tuning(7).\n");
 		}
 	}
 	
 	return (m);
 }
 
 static void
 m_reclaim()
 {
 	register struct domain *dp;
 	register struct protosw *pr;
 	int s = splimp();
 
 	for (dp = domains; dp; dp = dp->dom_next)
 		for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++)
 			if (pr->pr_drain)
 				(*pr->pr_drain)();
 	splx(s);
 	mbstat.m_drain++;
 }
 
 /*
  * Space allocation routines.
  * These are also available as macros
  * for critical paths.
  */
 struct mbuf *
 m_get(how, type)
 	int how, type;
 {
 	register struct mbuf *m;
 
 	MGET(m, how, type);
 	return (m);
 }
 
 struct mbuf *
 m_gethdr(how, type)
 	int how, type;
 {
 	register struct mbuf *m;
 
 	MGETHDR(m, how, type);
 	return (m);
 }
 
 struct mbuf *
 m_getclr(how, type)
 	int how, type;
 {
 	register struct mbuf *m;
 
 	MGET(m, how, type);
 	if (m == 0)
 		return (0);
 	bzero(mtod(m, caddr_t), MLEN);
 	return (m);
 }
 
 /*
  * m_getcl() returns an mbuf with an attached cluster.
  * Because many network drivers use this kind of buffers a lot, it is
  * convenient to keep a small pool of free buffers of this kind.
  * Even a small size such as 10 gives about 10% improvement in the
  * forwarding rate in a bridge or router.
  * The size of this free list is controlled by the sysctl variable
  * mcl_pool_max. The list is populated on m_freem(), and used in
  * m_getcl() if elements are available.
  */
 static struct mbuf *mcl_pool;
 static int mcl_pool_now;
 static int mcl_pool_max = 0;
  
 SYSCTL_INT(_kern_ipc, OID_AUTO, mcl_pool_max, CTLFLAG_RW, &mcl_pool_max, 0,
            "Maximum number of mbufs+cluster in free list");
 SYSCTL_INT(_kern_ipc, OID_AUTO, mcl_pool_now, CTLFLAG_RD, &mcl_pool_now, 0,
            "Current number of mbufs+cluster in free list");
 
 struct mbuf *
 m_getcl(int how, short type, int flags)
 {
 	int s = splimp();
 	struct mbuf *mp;
 
 	if (flags & M_PKTHDR) {
 		if (type == MT_DATA && mcl_pool) {
 			mp = mcl_pool;
 			mcl_pool = mp->m_nextpkt;
 			mcl_pool_now--;
 			splx(s);
 			mp->m_nextpkt = NULL;
 			mp->m_data = mp->m_ext.ext_buf;
 			mp->m_flags = M_PKTHDR|M_EXT;
 			mp->m_pkthdr.rcvif = NULL;
 			mp->m_pkthdr.csum_flags = 0;
 			return mp;
 		} else
 			MGETHDR(mp, how, type);
 	} else
 		MGET(mp, how, type);
 	if (mp) {
 		MCLGET(mp, how);
 		if ( (mp->m_flags & M_EXT) == 0) {
 			m_free(mp);
 			mp = NULL;
 		}
 	}
 	splx(s);
 	return mp;
 }
 
 /*
  * struct mbuf *
  * m_getm(m, len, how, type)
  *
  * This will allocate len-worth of mbufs and/or mbuf clusters (whatever fits
  * best) and return a pointer to the top of the allocated chain. If m is
  * non-null, then we assume that it is a single mbuf or an mbuf chain to
  * which we want len bytes worth of mbufs and/or clusters attached, and so
  * if we succeed in allocating it, we will just return a pointer to m.
  *
  * If we happen to fail at any point during the allocation, we will free
  * up everything we have already allocated and return NULL.
  *
  */
 struct mbuf *
 m_getm(struct mbuf *m, int len, int how, int type)
 {
 	struct mbuf *top, *tail, *mp, *mtail = NULL;
 
 	KASSERT(len >= 0, ("len is < 0 in m_getm"));
 
 	MGET(mp, how, type);
 	if (mp == NULL)
 		return (NULL);
 	else if (len > MINCLSIZE) {
 		MCLGET(mp, how);
 		if ((mp->m_flags & M_EXT) == 0) {
 			m_free(mp);
 			return (NULL);
 		}
 	}
 	mp->m_len = 0;
 	len -= M_TRAILINGSPACE(mp);
 
 	if (m != NULL)
 		for (mtail = m; mtail->m_next != NULL; mtail = mtail->m_next);
 	else
 		m = mp;
 
 	top = tail = mp;
 	while (len > 0) {
 		MGET(mp, how, type);
 		if (mp == NULL)
 			goto failed;
 
 		tail->m_next = mp;
 		tail = mp;
 		if (len > MINCLSIZE) {
 			MCLGET(mp, how);
 			if ((mp->m_flags & M_EXT) == 0)
 				goto failed;
 		}
 
 		mp->m_len = 0;
 		len -= M_TRAILINGSPACE(mp);
 	}
 
 	if (mtail != NULL)
 		mtail->m_next = top;
 	return (m);
 
 failed:
 	m_freem(top);
 	return (NULL);
 }
 
 /*
  * MFREE(struct mbuf *m, struct mbuf *n)
  * Free a single mbuf and associated external storage.
  * Place the successor, if any, in n.
  *
  * we do need to check non-first mbuf for m_aux, since some of existing
  * code does not call M_PREPEND properly.
  * (example: call to bpf_mtap from drivers)
  */
 #define	MFREE(m, n) MBUFLOCK(						\
 	struct mbuf *_mm = (m);						\
 									\
 	KASSERT(_mm->m_type != MT_FREE, ("freeing free mbuf"));		\
 	mbtypes[_mm->m_type]--;						\
 	if ((_mm->m_flags & M_PKTHDR) != 0)				\
 		m_tag_delete_chain(_mm, NULL);				\
 	if (_mm->m_flags & M_EXT)					\
 		MEXTFREE1(m);						\
 	(n) = _mm->m_next;						\
 	_mm->m_type = MT_FREE;						\
 	mbtypes[MT_FREE]++;						\
 	_mm->m_next = mmbfree;						\
 	mmbfree = _mm;							\
 	MMBWAKEUP();							\
 )
 
 struct mbuf *
 m_free(m)
 	struct mbuf *m;
 {
 	register struct mbuf *n;
 
 	MFREE(m, n);
 	return (n);
 }
 
 void
 m_freem(m)
 	struct mbuf *m;
 {
 	int s = splimp();
 
 	/*
 	 * Try to keep a small pool of mbuf+cluster for quick use in
 	 * device drivers. A good candidate is a M_PKTHDR buffer with
 	 * only one cluster attached. Other mbufs, or those exceeding
 	 * the pool size, are just m_free'd in the usual way.
 	 * The following code makes sure that m_next, m_type,
 	 * m_pkthdr.aux and m_ext.* are properly initialized.
 	 * Other fields in the mbuf are initialized in m_getcl()
 	 * upon allocation.
 	 */
         if (mcl_pool_now < mcl_pool_max && m && m->m_next == NULL &&
             (m->m_flags & (M_PKTHDR|M_EXT)) == (M_PKTHDR|M_EXT) &&
             m->m_type == MT_DATA && M_EXT_WRITABLE(m) ) {
 		m_tag_delete_chain(m, NULL);
                 m->m_nextpkt = mcl_pool;
                 mcl_pool = m;
                 mcl_pool_now++;
         } else {
 		while (m)
 			m = m_free(m);
 	}
 	splx(s);
 }
 
 /*
  * Mbuffer utility routines.
  */
 
 /*
  * Lesser-used path for M_PREPEND:
  * allocate new mbuf to prepend to chain,
  * copy junk along.
  */
 struct mbuf *
 m_prepend(m, len, how)
 	register struct mbuf *m;
 	int len, how;
 {
 	struct mbuf *mn;
 
 	if (m->m_flags & M_PKTHDR)
 		MGETHDR(mn, how, m->m_type);
 	else
 		MGET(mn, how, m->m_type);
 	if (mn == (struct mbuf *)NULL) {
 		m_freem(m);
 		return ((struct mbuf *)NULL);
 	}
 	if (m->m_flags & M_PKTHDR)
 		M_MOVE_PKTHDR(mn, m);
 	mn->m_next = m;
 	m = mn;
 	if (len < MHLEN)
 		MH_ALIGN(m, len);
 	m->m_len = len;
 	return (m);
 }
 
 /*
  * Make a copy of an mbuf chain starting "off0" bytes from the beginning,
  * continuing for "len" bytes.  If len is M_COPYALL, copy to end of mbuf.
  * The wait parameter is a choice of M_WAIT/M_DONTWAIT from caller.
  * Note that the copy is read-only, because clusters are not copied,
  * only their reference counts are incremented.
  */
 #define MCFail (mbstat.m_mcfail)
 
 struct mbuf *
 m_copym(m, off0, len, wait)
 	register struct mbuf *m;
 	int off0, wait;
 	register int len;
 {
 	register struct mbuf *n, **np;
 	register int off = off0;
 	struct mbuf *top;
 	int copyhdr = 0;
 
 	KASSERT(off >= 0, ("m_copym, negative off %d", off));
 	KASSERT(len >= 0, ("m_copym, negative len %d", len));
 	if (off == 0 && m->m_flags & M_PKTHDR)
 		copyhdr = 1;
 	while (off > 0) {
 		KASSERT(m != NULL, ("m_copym, offset > size of mbuf chain"));
 		if (off < m->m_len)
 			break;
 		off -= m->m_len;
 		m = m->m_next;
 	}
 	np = &top;
 	top = 0;
 	while (len > 0) {
 		if (m == 0) {
 			KASSERT(len == M_COPYALL, 
 			    ("m_copym, length > size of mbuf chain"));
 			break;
 		}
 		if (copyhdr)
 			MGETHDR(n, wait, m->m_type);
 		else
 			MGET(n, wait, m->m_type);
 		*np = n;
 		if (n == 0)
 			goto nospace;
 		if (copyhdr) {
 			if (!m_dup_pkthdr(n, m, wait))
 				goto nospace;
 			if (len == M_COPYALL)
 				n->m_pkthdr.len -= off0;
 			else
 				n->m_pkthdr.len = len;
 			copyhdr = 0;
 		}
 		n->m_len = min(len, m->m_len - off);
 		if (m->m_flags & M_EXT) {
 			n->m_data = m->m_data + off;
 			if (m->m_ext.ext_ref == NULL) {
 				atomic_add_char(
 				    &mclrefcnt[mtocl(m->m_ext.ext_buf)], 1);
 			} else {
 				int s = splimp();
 
 				(*m->m_ext.ext_ref)(m->m_ext.ext_buf,
 				    m->m_ext.ext_size);
 				splx(s);
 			}
 			n->m_ext = m->m_ext;
 			n->m_flags |= M_EXT;
 		} else
 			bcopy(mtod(m, caddr_t)+off, mtod(n, caddr_t),
 			    (unsigned)n->m_len);
 		if (len != M_COPYALL)
 			len -= n->m_len;
 		off = 0;
 		m = m->m_next;
 		np = &n->m_next;
 	}
 	top = m_clreflimit(top, wait);
 	if (top == 0)
 		MCFail++;
 	return (top);
 nospace:
 	m_freem(top);
 	MCFail++;
 	return (0);
 }
 
 /*
  * Copy an entire packet, including header (which must be present).
  * An optimization of the common case `m_copym(m, 0, M_COPYALL, how)'.
  * Note that the copy is read-only, because clusters are not copied,
  * only their reference counts are incremented.
  * Preserve alignment of the first mbuf so if the creator has left
  * some room at the beginning (e.g. for inserting protocol headers)
  * the copies also have the room available.
  */
 struct mbuf *
 m_copypacket(m, how)
 	struct mbuf *m;
 	int how;
 {
 	struct mbuf *top, *n, *o;
 
 	MGET(n, how, m->m_type);
 	top = n;
 	if (!n)
 		goto nospace;
 
 	if (!m_dup_pkthdr(n, m, how))
 		goto nospace;
 	n->m_len = m->m_len;
 	if (m->m_flags & M_EXT) {
 		n->m_data = m->m_data;
 		if (m->m_ext.ext_ref == NULL)
 			atomic_add_char(&mclrefcnt[mtocl(m->m_ext.ext_buf)], 1);
 		else {
 			int s = splimp();
 
 			(*m->m_ext.ext_ref)(m->m_ext.ext_buf,
 			    m->m_ext.ext_size);
 			splx(s);
 		}
 		n->m_ext = m->m_ext;
 		n->m_flags |= M_EXT;
 	} else {
 		n->m_data = n->m_pktdat + (m->m_data - m->m_pktdat );
 		bcopy(mtod(m, char *), mtod(n, char *), n->m_len);
 	}
 
 	m = m->m_next;
 	while (m) {
 		MGET(o, how, m->m_type);
 		if (!o)
 			goto nospace;
 
 		n->m_next = o;
 		n = n->m_next;
 
 		n->m_len = m->m_len;
 		if (m->m_flags & M_EXT) {
 			n->m_data = m->m_data;
 			if (m->m_ext.ext_ref == NULL) {
 				atomic_add_char(
 				    &mclrefcnt[mtocl(m->m_ext.ext_buf)], 1);
 			} else {
 				int s = splimp();
 
 				(*m->m_ext.ext_ref)(m->m_ext.ext_buf,
 				    m->m_ext.ext_size);
 				splx(s);
 			}
 			n->m_ext = m->m_ext;
 			n->m_flags |= M_EXT;
 		} else {
 			bcopy(mtod(m, char *), mtod(n, char *), n->m_len);
 		}
 
 		m = m->m_next;
 	}
 	top = m_clreflimit(top, how);
 	return top;
 nospace:
 	m_freem(top);
 	MCFail++;
 	return 0;
 }
 
 /*
  * Copy data from an mbuf chain starting "off" bytes from the beginning,
  * continuing for "len" bytes, into the indicated buffer.
  */
 void
 m_copydata(m, off, len, cp)
 	register struct mbuf *m;
 	register int off;
 	register int len;
 	caddr_t cp;
 {
 	register unsigned count;
 
 	KASSERT(off >= 0, ("m_copydata, negative off %d", off));
 	KASSERT(len >= 0, ("m_copydata, negative len %d", len));
 	while (off > 0) {
 		KASSERT(m != NULL, ("m_copydata, offset > size of mbuf chain"));
 		if (off < m->m_len)
 			break;
 		off -= m->m_len;
 		m = m->m_next;
 	}
 	while (len > 0) {
 		KASSERT(m != NULL, ("m_copydata, length > size of mbuf chain"));
 		count = min(m->m_len - off, len);
 		bcopy(mtod(m, caddr_t) + off, cp, count);
 		len -= count;
 		cp += count;
 		off = 0;
 		m = m->m_next;
 	}
 }
 
 /*
  * Copy a packet header mbuf chain into a completely new chain, including
  * copying any mbuf clusters.  Use this instead of m_copypacket() when
  * you need a writable copy of an mbuf chain.
  */
 struct mbuf *
 m_dup(m, how)
 	struct mbuf *m;
 	int how;
 {
 	struct mbuf **p, *top = NULL;
 	int remain, moff, nsize;
 
 	/* Sanity check */
 	if (m == NULL)
 		return (0);
 	KASSERT((m->m_flags & M_PKTHDR) != 0, ("%s: !PKTHDR", __FUNCTION__));
 
 	/* While there's more data, get a new mbuf, tack it on, and fill it */
 	remain = m->m_pkthdr.len;
 	moff = 0;
 	p = &top;
 	while (remain > 0 || top == NULL) {	/* allow m->m_pkthdr.len == 0 */
 		struct mbuf *n;
 
 		/* Get the next new mbuf */
 		MGET(n, how, m->m_type);
 		if (n == NULL)
 			goto nospace;
 		if (top == NULL) {		/* first one, must be PKTHDR */
 			if (!m_dup_pkthdr(n, m, how))
 				goto nospace;
 			nsize = MHLEN;
 		} else				/* not the first one */
 			nsize = MLEN;
 		if (remain >= MINCLSIZE) {
 			MCLGET(n, how);
 			if ((n->m_flags & M_EXT) == 0) {
 				(void)m_free(n);
 				goto nospace;
 			}
 			nsize = MCLBYTES;
 		}
 		n->m_len = 0;
 
 		/* Link it into the new chain */
 		*p = n;
 		p = &n->m_next;
 
 		/* Copy data from original mbuf(s) into new mbuf */
 		while (n->m_len < nsize && m != NULL) {
 			int chunk = min(nsize - n->m_len, m->m_len - moff);
 
 			bcopy(m->m_data + moff, n->m_data + n->m_len, chunk);
 			moff += chunk;
 			n->m_len += chunk;
 			remain -= chunk;
 			if (moff == m->m_len) {
 				m = m->m_next;
 				moff = 0;
 			}
 		}
 
 		/* Check correct total mbuf length */
 		KASSERT((remain > 0 && m != NULL) || (remain == 0 && m == NULL),
 		    	("%s: bogus m_pkthdr.len", __FUNCTION__));
 	}
 	return (top);
 
 nospace:
 	m_freem(top);
 	MCFail++;
 	return (0);
 }
 
 /*
  * Concatenate mbuf chain n to m.
  * Both chains must be of the same type (e.g. MT_DATA).
  * Any m_pkthdr is not updated.
  */
 void
 m_cat(m, n)
 	register struct mbuf *m, *n;
 {
 	while (m->m_next)
 		m = m->m_next;
 	while (n) {
 		if (m->m_flags & M_EXT ||
 		    m->m_data + m->m_len + n->m_len >= &m->m_dat[MLEN]) {
 			/* just join the two chains */
 			m->m_next = n;
 			return;
 		}
 		/* splat the data from one into the other */
 		bcopy(mtod(n, caddr_t), mtod(m, caddr_t) + m->m_len,
 		    (u_int)n->m_len);
 		m->m_len += n->m_len;
 		n = m_free(n);
 	}
 }
 
 void
 m_adj(mp, req_len)
 	struct mbuf *mp;
 	int req_len;
 {
 	register int len = req_len;
 	register struct mbuf *m;
 	register int count;
 
 	if ((m = mp) == NULL)
 		return;
 	if (len >= 0) {
 		/*
 		 * Trim from head.
 		 */
 		while (m != NULL && len > 0) {
 			if (m->m_len <= len) {
 				len -= m->m_len;
 				m->m_len = 0;
 				m = m->m_next;
 			} else {
 				m->m_len -= len;
 				m->m_data += len;
 				len = 0;
 			}
 		}
 		m = mp;
 		if (mp->m_flags & M_PKTHDR)
 			m->m_pkthdr.len -= (req_len - len);
 	} else {
 		/*
 		 * Trim from tail.  Scan the mbuf chain,
 		 * calculating its length and finding the last mbuf.
 		 * If the adjustment only affects this mbuf, then just
 		 * adjust and return.  Otherwise, rescan and truncate
 		 * after the remaining size.
 		 */
 		len = -len;
 		count = 0;
 		for (;;) {
 			count += m->m_len;
 			if (m->m_next == (struct mbuf *)0)
 				break;
 			m = m->m_next;
 		}
 		if (m->m_len >= len) {
 			m->m_len -= len;
 			if (mp->m_flags & M_PKTHDR)
 				mp->m_pkthdr.len -= len;
 			return;
 		}
 		count -= len;
 		if (count < 0)
 			count = 0;
 		/*
 		 * Correct length for chain is "count".
 		 * Find the mbuf with last data, adjust its length,
 		 * and toss data from remaining mbufs on chain.
 		 */
 		m = mp;
 		if (m->m_flags & M_PKTHDR)
 			m->m_pkthdr.len = count;
 		for (; m; m = m->m_next) {
 			if (m->m_len >= count) {
 				m->m_len = count;
 				break;
 			}
 			count -= m->m_len;
 		}
 		while (m->m_next)
 			(m = m->m_next) ->m_len = 0;
 	}
 }
 
 /*
  * Rearange an mbuf chain so that len bytes are contiguous
  * and in the data area of an mbuf (so that mtod and dtom
  * will work for a structure of size len).  Returns the resulting
  * mbuf chain on success, frees it and returns null on failure.
  * If there is room, it will add up to max_protohdr-len extra bytes to the
  * contiguous region in an attempt to avoid being called next time.
  */
 #define MPFail (mbstat.m_mpfail)
 
 struct mbuf *
 m_pullup(n, len)
 	register struct mbuf *n;
 	int len;
 {
 	register struct mbuf *m;
 	register int count;
 	int space;
 
 	/*
 	 * If first mbuf has no cluster, and has room for len bytes
 	 * without shifting current data, pullup into it,
 	 * otherwise allocate a new mbuf to prepend to the chain.
 	 */
 	if ((n->m_flags & M_EXT) == 0 &&
 	    n->m_data + len < &n->m_dat[MLEN] && n->m_next) {
 		if (n->m_len >= len)
 			return (n);
 		m = n;
 		n = n->m_next;
 		len -= m->m_len;
 	} else {
 		if (len > MHLEN)
 			goto bad;
 		MGET(m, M_DONTWAIT, n->m_type);
 		if (m == 0)
 			goto bad;
 		m->m_len = 0;
 		if (n->m_flags & M_PKTHDR)
 			M_MOVE_PKTHDR(m, n);
 	}
 	space = &m->m_dat[MLEN] - (m->m_data + m->m_len);
 	do {
 		count = min(min(max(len, max_protohdr), space), n->m_len);
 		bcopy(mtod(n, caddr_t), mtod(m, caddr_t) + m->m_len,
 		  (unsigned)count);
 		len -= count;
 		m->m_len += count;
 		n->m_len -= count;
 		space -= count;
 		if (n->m_len)
 			n->m_data += count;
 		else
 			n = m_free(n);
 	} while (len > 0 && n);
 	if (len > 0) {
 		(void) m_free(m);
 		goto bad;
 	}
 	m->m_next = n;
 	return (m);
 bad:
 	m_freem(n);
 	MPFail++;
 	return (0);
 }
 
 /*
  * Partition an mbuf chain in two pieces, returning the tail --
  * all but the first len0 bytes.  In case of failure, it returns NULL and
  * attempts to restore the chain to its original state.
  *
  * Note that the resulting mbufs might be read-only, because the new
  * mbuf can end up sharing an mbuf cluster with the original mbuf if
  * the "breaking point" happens to lie within a cluster mbuf. Use the
  * M_WRITABLE() macro to check for this case.
  */
 struct mbuf *
 m_split(m0, len0, wait)
 	register struct mbuf *m0;
 	int len0, wait;
 {
 	register struct mbuf *m, *n;
 	unsigned len = len0, remain;
 
 	for (m = m0; m && len > m->m_len; m = m->m_next)
 		len -= m->m_len;
 	if (m == 0)
 		return (0);
 	remain = m->m_len - len;
 	if (m0->m_flags & M_PKTHDR) {
 		MGETHDR(n, wait, m0->m_type);
 		if (n == 0)
 			return (0);
 		n->m_pkthdr.rcvif = m0->m_pkthdr.rcvif;
 		n->m_pkthdr.len = m0->m_pkthdr.len - len0;
 		m0->m_pkthdr.len = len0;
 		if (m->m_flags & M_EXT)
 			goto extpacket;
 		if (remain > MHLEN) {
 			/* m can't be the lead packet */
 			MH_ALIGN(n, 0);
 			n->m_next = m_split(m, len, wait);
 			if (n->m_next == 0) {
 				(void) m_free(n);
 				return (0);
 			} else {
 				n->m_len = 0;
 				return (n);
 			}
 		} else
 			MH_ALIGN(n, remain);
 	} else if (remain == 0) {
 		n = m->m_next;
 		m->m_next = 0;
 		return (n);
 	} else {
 		MGET(n, wait, m->m_type);
 		if (n == 0)
 			return (0);
 		M_ALIGN(n, remain);
 	}
 extpacket:
 	if (m->m_flags & M_EXT) {
 		n->m_flags |= M_EXT;
 		n->m_ext = m->m_ext;
 		if (m->m_ext.ext_ref == NULL)
 			atomic_add_char(&mclrefcnt[mtocl(m->m_ext.ext_buf)], 1);
 		else {
 			int s = splimp();
 
 			(*m->m_ext.ext_ref)(m->m_ext.ext_buf,
 			    m->m_ext.ext_size);
 			splx(s);
 		}
 		n->m_data = m->m_data + len;
 	} else {
 		bcopy(mtod(m, caddr_t) + len, mtod(n, caddr_t), remain);
 	}
 	n->m_len = remain;
 	m->m_len = len;
 	n->m_next = m->m_next;
 	m->m_next = 0;
 	n = m_clreflimit(n, wait);
 	return (n);
 }
 /*
  * Routine to copy from device local memory into mbufs.
  */
 struct mbuf *
 m_devget(buf, totlen, off0, ifp, copy)
 	char *buf;
 	int totlen, off0;
 	struct ifnet *ifp;
 	void (*copy) __P((char *from, caddr_t to, u_int len));
 {
 	register struct mbuf *m;
 	struct mbuf *top = 0, **mp = &top;
 	register int off = off0, len;
 	register char *cp;
 	char *epkt;
 
 	cp = buf;
 	epkt = cp + totlen;
 	if (off) {
 		cp += off + 2 * sizeof(u_short);
 		totlen -= 2 * sizeof(u_short);
 	}
 	MGETHDR(m, M_DONTWAIT, MT_DATA);
 	if (m == 0)
 		return (0);
 	m->m_pkthdr.rcvif = ifp;
 	m->m_pkthdr.len = totlen;
 	m->m_len = MHLEN;
 
 	while (totlen > 0) {
 		if (top) {
 			MGET(m, M_DONTWAIT, MT_DATA);
 			if (m == 0) {
 				m_freem(top);
 				return (0);
 			}
 			m->m_len = MLEN;
 		}
 		len = min(totlen, epkt - cp);
 		if (len >= MINCLSIZE) {
 			MCLGET(m, M_DONTWAIT);
 			if (m->m_flags & M_EXT)
 				m->m_len = len = min(len, MCLBYTES);
 			else
 				len = m->m_len;
 		} else {
 			/*
 			 * Place initial small packet/header at end of mbuf.
 			 */
 			if (len < m->m_len) {
 				if (top == 0 && len + max_linkhdr <= m->m_len)
 					m->m_data += max_linkhdr;
 				m->m_len = len;
 			} else
 				len = m->m_len;
 		}
 		if (copy)
 			copy(cp, mtod(m, caddr_t), (unsigned)len);
 		else
 			bcopy(cp, mtod(m, caddr_t), (unsigned)len);
 		cp += len;
 		*mp = m;
 		mp = &m->m_next;
 		totlen -= len;
 		if (cp == epkt)
 			cp = buf;
 	}
 	return (top);
 }
 
 /*
  * Copy data from a buffer back into the indicated mbuf chain,
  * starting "off" bytes from the beginning, extending the mbuf
  * chain if necessary.
  */
 void
 m_copyback(m0, off, len, cp)
 	struct	mbuf *m0;
 	register int off;
 	register int len;
 	caddr_t cp;
 {
 	register int mlen;
 	register struct mbuf *m = m0, *n;
 	int totlen = 0;
 
 	if (m0 == 0)
 		return;
 	while (off > (mlen = m->m_len)) {
 		off -= mlen;
 		totlen += mlen;
 		if (m->m_next == 0) {
 			n = m_getclr(M_DONTWAIT, m->m_type);
 			if (n == 0)
 				goto out;
 			n->m_len = min(MLEN, len + off);
 			m->m_next = n;
 		}
 		m = m->m_next;
 	}
 	while (len > 0) {
 		mlen = min (m->m_len - off, len);
 		bcopy(cp, off + mtod(m, caddr_t), (unsigned)mlen);
 		cp += mlen;
 		len -= mlen;
 		mlen += off;
 		off = 0;
 		totlen += mlen;
 		if (len == 0)
 			break;
 		if (m->m_next == 0) {
 			n = m_get(M_DONTWAIT, m->m_type);
 			if (n == 0)
 				break;
 			n->m_len = min(MLEN, len);
 			m->m_next = n;
 		}
 		m = m->m_next;
 	}
 out:	if (((m = m0)->m_flags & M_PKTHDR) && (m->m_pkthdr.len < totlen))
 		m->m_pkthdr.len = totlen;
 }
 
 void
 m_print(const struct mbuf *m)
 {
 	int len;
 	const struct mbuf *m2;
 
 	len = m->m_pkthdr.len;
 	m2 = m;
 	while (len) {
 		printf("%p %*D\n", m2, m2->m_len, (u_char *)m2->m_data, "-");
 		len -= m2->m_len;
 		m2 = m2->m_next;
 	}
 	return;
 }
 
 /*
  * "Move" mbuf pkthdr from "from" to "to".
  * "from" must have M_PKTHDR set, and "to" must be empty.
  */
 void
 m_move_pkthdr(struct mbuf *to, struct mbuf *from)
 {
 	KASSERT((to->m_flags & M_EXT) == 0, ("m_move_pkthdr: to has cluster"));
 
 	to->m_flags = from->m_flags & M_COPYFLAGS;
 	to->m_data = to->m_pktdat;
 	to->m_pkthdr = from->m_pkthdr;		/* especially tags */
 	SLIST_INIT(&from->m_pkthdr.tags);	/* purge tags from src */
 	from->m_flags &= ~M_PKTHDR;
 }
 
 /*
  * Duplicate "from"'s mbuf pkthdr in "to".
  * "from" must have M_PKTHDR set, and "to" must be empty.
  * In particular, this does a deep copy of the packet tags.
  */
 int
 m_dup_pkthdr(struct mbuf *to, struct mbuf *from, int how)
 {
 	to->m_flags = (from->m_flags & M_COPYFLAGS) | (to->m_flags & M_EXT);
 	if ((to->m_flags & M_EXT) == 0)
 		to->m_data = to->m_pktdat;
 	to->m_pkthdr = from->m_pkthdr;
 	SLIST_INIT(&to->m_pkthdr.tags);
 	return (m_tag_copy_chain(to, from, how));
 }
 
 u_int
 m_fixhdr(struct mbuf *m0)
 {
         u_int len;
 
         len = m_length(m0, NULL);
         m0->m_pkthdr.len = len;
         return (len);
 }
 
 u_int
 m_length(struct mbuf *m0, struct mbuf **last)
 {
         struct mbuf *m;
         u_int len;
 
         len = 0;
         for (m = m0; m != NULL; m = m->m_next) {
                 len += m->m_len;
                 if (m->m_next == NULL)
                         break;
         }
         if (last != NULL)
                 *last = m;
         return (len);
 }
 
 /*
  * Defragment a mbuf chain, returning the shortest possible
  * chain of mbufs and clusters.  If allocation fails and
  * this cannot be completed, NULL will be returned, but
  * the passed in chain will be unchanged.  Upon success,
  * the original chain will be freed, and the new chain
  * will be returned.
  *
  * If a non-packet header is passed in, the original
  * mbuf (chain?) will be returned unharmed.
  */
 struct mbuf *
 m_defrag(struct mbuf *m0, int how)
 {
 	struct mbuf	*m_new = NULL, *m_final = NULL;
 	int		progress = 0, length;
 
 	if (!(m0->m_flags & M_PKTHDR))
 		return (m0);
 
 	m_fixhdr(m0); /* Needed sanity check */
 
 #ifdef MBUF_STRESS_TEST
 	if (m_defragrandomfailures) {
 		int temp = arc4random() & 0xff;
 		if (temp == 0xba)
 			goto nospace;
 	}
 #endif
 	
 	if (m0->m_pkthdr.len > MHLEN)
 		m_final = m_getcl(how, MT_DATA, M_PKTHDR);
 	else
 		m_final = m_gethdr(how, MT_DATA);
 
 	if (m_final == NULL)
 		goto nospace;
 
 	if (m_dup_pkthdr(m_final, m0, how) == NULL)
 		goto nospace;
 
 	m_new = m_final;
 
 	while (progress < m0->m_pkthdr.len) {
 		length = m0->m_pkthdr.len - progress;
 		if (length > MCLBYTES)
 			length = MCLBYTES;
 
 		if (m_new == NULL) {
 			if (length > MLEN)
 				m_new = m_getcl(how, MT_DATA, 0);
 			else
 				m_new = m_get(how, MT_DATA);
 			if (m_new == NULL)
 				goto nospace;
 		}
 
 		m_copydata(m0, progress, length, mtod(m_new, caddr_t));
 		progress += length;
 		m_new->m_len = length;
 		if (m_new != m_final)
 			m_cat(m_final, m_new);
 		m_new = NULL;
 	}
 #ifdef MBUF_STRESS_TEST
 	if (m0->m_next == NULL)
 		m_defraguseless++;
 #endif
 	m_freem(m0);
 	m0 = m_final;
 #ifdef MBUF_STRESS_TEST
 	m_defragpackets++;
 	m_defragbytes += m0->m_pkthdr.len;
 #endif
 	return (m0);
 nospace:
 #ifdef MBUF_STRESS_TEST
 	m_defragfailure++;
 #endif
 	if (m_new)
 		m_free(m_new);
 	if (m_final)
 		m_freem(m_final);
 	return (NULL);
 }
 
 #define MAX_CLREFCOUNT	32
 
 /*
  * Ensure that the number of mbuf cluster references stays less than our
  * desired amount by making a new copy of the entire chain.
  *
  * If a reference count has already gone negative, panic.
  */
 static struct mbuf *
 m_clreflimit(struct mbuf *m0, int how)
 {
 	struct mbuf *m;
 	int maxrefs = 0;
 
 	for (m = m0; m != NULL; m = m->m_next) {
 		if ((m->m_flags & M_EXT) && (m->m_ext.ext_ref == NULL)) {
 			maxrefs = max(maxrefs,
 				mclrefcnt[mtocl(m->m_ext.ext_buf)]);
 			KASSERT(mclrefcnt[mtocl(m->m_ext.ext_buf)] > 0,
 			("m_clreflimit: bad reference count: %d",
 			 mclrefcnt[mtocl(m->m_ext.ext_buf)]));
 		}
 	}
 
 	if (maxrefs < MAX_CLREFCOUNT)
 		return (m0);
 
 	m_clreflimithits++;
 	m = m_defrag(m0, how);
 	/* Avoid returning NULL at all costs, m_split won't like it. */
 	if (m == NULL)
 		return (m0);
 	else
 		return (m);
 }
Index: stable/4/sys/sys/mbuf.h
===================================================================
--- stable/4/sys/sys/mbuf.h	(revision 118739)
+++ stable/4/sys/sys/mbuf.h	(revision 118740)
@@ -1,712 +1,723 @@
 /*
  * Copyright (c) 1982, 1986, 1988, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)mbuf.h	8.5 (Berkeley) 2/19/95
  * $FreeBSD$
  */
 
 #ifndef _SYS_MBUF_H_
 #define	_SYS_MBUF_H_
 
 #include <sys/queue.h>
 
 /*
  * Mbufs are of a single size, MSIZE (machine/param.h), which
  * includes overhead.  An mbuf may add a single "mbuf cluster" of size
  * MCLBYTES (also in machine/param.h), which has no additional overhead
  * and is used instead of the internal data area; this is done when
  * at least MINCLSIZE of data must be stored.
  */
 #define	MLEN		(MSIZE - sizeof(struct m_hdr))	/* normal data len */
 #define	MHLEN		(MLEN - sizeof(struct pkthdr))	/* data len w/pkthdr */
 #define	MINCLSIZE	(MHLEN + 1)	/* smallest amount to put in cluster */
 #define	M_MAXCOMPRESS	(MHLEN / 2)	/* max amount to copy for compression */
 
 /*
  * Macros for type conversion:
  * mtod(m, t)	-- Convert mbuf pointer to data pointer of correct type.
  * dtom(x)	-- Convert data pointer within mbuf to mbuf pointer (XXX).
  * mtocl(x) -	convert pointer within cluster to cluster index #
  * cltom(x) -	convert cluster # to ptr to beginning of cluster
  */
 #define	mtod(m, t)	((t)((m)->m_data))
 #define	dtom(x)		((struct mbuf *)((intptr_t)(x) & ~(MSIZE-1)))
 #define	mtocl(x)	(((uintptr_t)(x) - (uintptr_t)mbutl) >> MCLSHIFT)
 #define	cltom(x)	((caddr_t)((uintptr_t)mbutl + \
 			    ((uintptr_t)(x) << MCLSHIFT)))
+#define mcl_valid(x)	((uintptr_t)(x) >= (uintptr_t)mbutl &&		\
+			 (uintptr_t)(x) < (uintptr_t)mbutltop)
 
 /*
  * Header present at the beginning of every mbuf.
  */
 struct m_hdr {
 	struct	mbuf *mh_next;		/* next buffer in chain */
 	struct	mbuf *mh_nextpkt;	/* next chain in queue/record */
 	caddr_t	mh_data;		/* location of data */
 	int	mh_len;			/* amount of data in this mbuf */
 	short	mh_type;		/* type of data in this mbuf */
 	short	mh_flags;		/* flags; see below */
 };
 
 /*
  * Packet tag structure (see below for details).
  */
 struct m_tag {
 	SLIST_ENTRY(m_tag)	m_tag_link;	/* List of packet tags */
 	u_int16_t		m_tag_id;	/* Tag ID */
 	u_int16_t		m_tag_len;	/* Length of data */
 	u_int32_t		m_tag_cookie;	/* ABI/Module ID */
 };
 
 /*
  * Record/packet header in first mbuf of chain; valid only if M_PKTHDR is set.
  */
 struct pkthdr {
 	struct	ifnet *rcvif;		/* rcv interface */
 	int	len;			/* total packet length */
 	/* variables for ip and tcp reassembly */
 	void	*header;		/* pointer to packet header */
 	/* variables for hardware checksum */
 	int	csum_flags;		/* flags regarding checksum */
 	int	csum_data;		/* data field used by csum routines */
 	SLIST_HEAD(packet_tags, m_tag) tags; /* list of packet tags */
 };
 
 /*
  * Description of external storage mapped into mbuf; valid only if M_EXT is set.
  */
 struct m_ext {
 	caddr_t	ext_buf;		/* start of buffer */
 	void	(*ext_free)		/* free routine if not the usual */
 		    (caddr_t, u_int);
 	u_int	ext_size;		/* size of buffer, for ext_free */
 	void	(*ext_ref)		/* add a reference to the ext object */
 		(caddr_t, u_int);
 };
 
 /*
  * The core of the mbuf object along with some shortcut defines for
  * practical purposes.
  */
 struct mbuf {
 	struct	m_hdr m_hdr;
 	union {
 		struct {
 			struct	pkthdr MH_pkthdr;	/* M_PKTHDR set */
 			union {
 				struct	m_ext MH_ext;	/* M_EXT set */
 				char	MH_databuf[MHLEN];
 			} MH_dat;
 		} MH;
 		char	M_databuf[MLEN];		/* !M_PKTHDR, !M_EXT */
 	} M_dat;
 };
 #define	m_next		m_hdr.mh_next
 #define	m_len		m_hdr.mh_len
 #define	m_data		m_hdr.mh_data
 #define	m_type		m_hdr.mh_type
 #define	m_flags		m_hdr.mh_flags
 #define	m_nextpkt	m_hdr.mh_nextpkt
 #define	m_act		m_nextpkt
 #define	m_pkthdr	M_dat.MH.MH_pkthdr
 #define	m_ext		M_dat.MH.MH_dat.MH_ext
 #define	m_pktdat	M_dat.MH.MH_dat.MH_databuf
 #define	m_dat		M_dat.M_databuf
 
 /*
  * mbuf flags.
  */
 #define	M_EXT		0x0001	/* has associated external storage */
 #define	M_PKTHDR	0x0002	/* start of record */
 #define	M_EOR		0x0004	/* end of record */
 #define	M_PROTO1	0x0008	/* protocol-specific */
 #define	M_PROTO2	0x0010	/* protocol-specific */
 #define	M_PROTO3	0x0020	/* protocol-specific */
 #define	M_PROTO4	0x0040	/* protocol-specific */
 #define	M_PROTO5	0x0080	/* protocol-specific */
 
 /*
  * mbuf pkthdr flags (also stored in m_flags).
  */
 #define	M_BCAST		0x0100	/* send/received as link-level broadcast */
 #define	M_MCAST		0x0200	/* send/received as link-level multicast */
 #define	M_FRAG		0x0400	/* packet is a fragment of a larger packet */
 #define	M_FIRSTFRAG	0x0800	/* packet is first fragment */
 #define	M_LASTFRAG	0x1000	/* packet is last fragment */
 
 /*
  * Flags copied when copying m_pkthdr.
  */
 #define	M_COPYFLAGS	(M_PKTHDR|M_EOR|M_PROTO1|M_PROTO1|M_PROTO2|M_PROTO3 | \
 			    M_PROTO4|M_PROTO5|M_BCAST|M_MCAST|M_FRAG | \
 			    M_FIRSTFRAG|M_LASTFRAG)
 
 /*
  * Flags indicating hw checksum support and sw checksum requirements.
  */
 #define	CSUM_IP			0x0001		/* will csum IP */
 #define	CSUM_TCP		0x0002		/* will csum TCP */
 #define	CSUM_UDP		0x0004		/* will csum UDP */
 #define	CSUM_IP_FRAGS		0x0008		/* will csum IP fragments */
 #define	CSUM_FRAGMENT		0x0010		/* will do IP fragmentation */
 
 #define	CSUM_IP_CHECKED		0x0100		/* did csum IP */
 #define	CSUM_IP_VALID		0x0200		/*   ... the csum is valid */
 #define	CSUM_DATA_VALID		0x0400		/* csum_data field is valid */
 #define	CSUM_PSEUDO_HDR		0x0800		/* csum_data has pseudo hdr */
 
 #define	CSUM_DELAY_DATA		(CSUM_TCP | CSUM_UDP)
 #define	CSUM_DELAY_IP		(CSUM_IP)	/* XXX add ipv6 here too? */
 
 /*
  * mbuf types.
  */
 #define	MT_FREE		0	/* should be on free list */
 #define	MT_DATA		1	/* dynamic (data) allocation */
 #define	MT_HEADER	2	/* packet header */
 #if 0
 #define	MT_SOCKET	3	/* socket structure */
 #define	MT_PCB		4	/* protocol control block */
 #define	MT_RTABLE	5	/* routing tables */
 #define	MT_HTABLE	6	/* IMP host tables */
 #define	MT_ATABLE	7	/* address resolution tables */
 #endif
 #define	MT_SONAME	8	/* socket name */
 #if 0
 #define	MT_SOOPTS	10	/* socket options */
 #endif
 #define	MT_FTABLE	11	/* fragment reassembly header */
 #if 0
 #define	MT_RIGHTS	12	/* access rights */
 #define	MT_IFADDR	13	/* interface address */
 #endif
 #define	MT_TAG		13	/* volatile metadata associated to pkts */
 #define	MT_CONTROL	14	/* extra-data protocol message */
 #define	MT_OOBDATA	15	/* expedited data  */
 #define	MT_NTYPES	16	/* number of mbuf types for mbtypes[] */
 
 /*
  * General mbuf allocator statistics structure.
  */
 struct mbstat {
 	u_long	m_mbufs;	/* mbufs obtained from page pool */
 	u_long	m_clusters;	/* clusters obtained from page pool */
 	u_long	m_spare;	/* spare field */
 	u_long	m_clfree;	/* free clusters */
 	u_long	m_drops;	/* times failed to find space */
 	u_long	m_wait;		/* times waited for space */
 	u_long	m_drain;	/* times drained protocols for space */
 	u_long	m_mcfail;	/* times m_copym failed */
 	u_long	m_mpfail;	/* times m_pullup failed */
 	u_long	m_msize;	/* length of an mbuf */
 	u_long	m_mclbytes;	/* length of an mbuf cluster */
 	u_long	m_minclsize;	/* min length of data to allocate a cluster */
 	u_long	m_mlen;		/* length of data in an mbuf */
 	u_long	m_mhlen;	/* length of data in a header mbuf */
 };
 
 /*
  * Flags specifying how an allocation should be made.
  */
 
 #define	M_DONTWAIT	1
 #define	M_WAIT		0
 
 /* Freelists:
  *
  * Normal mbuf clusters are normally treated as character arrays
  * after allocation, but use the first word of the buffer as a free list
  * pointer while on the free list.
  */
 union mcluster {
 	union	mcluster *mcl_next;
 	char	mcl_buf[MCLBYTES];
 };
 
 
 /*
  * These are identifying numbers passed to the m_mballoc_wait function,
  * allowing us to determine whether the call came from an MGETHDR or
  * an MGET.
  */
 #define	MGETHDR_C      1
 #define	MGET_C         2
 
 /*
  * Wake up the next instance (if any) of m_mballoc_wait() which is
  * waiting for an mbuf to be freed.  This should be called at splimp().
  *
  * XXX: If there is another free mbuf, this routine will be called [again]
  * from the m_mballoc_wait routine in order to wake another sleep instance.
  */
 #define	MMBWAKEUP() do {						\
 	if (m_mballoc_wid) {						\
 		m_mballoc_wid--;					\
 		wakeup_one(&m_mballoc_wid); 				\
 	}								\
 } while (0)
 
 /*
  * Same as above, but for mbuf cluster(s).
  */
 #define	MCLWAKEUP() do {						\
 	if (m_clalloc_wid) {						\
 		m_clalloc_wid--;					\
 		wakeup_one(&m_clalloc_wid);				\
 	}								\
 } while (0)
 
 /*
  * mbuf utility macros:
  *
  *	MBUFLOCK(code)
  * prevents a section of code from from being interrupted by network
  * drivers.
  */
 #define	MBUFLOCK(code) do {						\
 	int _ms = splimp();						\
 									\
 	{ code }							\
 	splx(_ms);							\
 } while (0)
 
 /*
  * mbuf allocation/deallocation macros:
  *
  *	MGET(struct mbuf *m, int how, int type)
  * allocates an mbuf and initializes it to contain internal data.
  *
  *	MGETHDR(struct mbuf *m, int how, int type)
  * allocates an mbuf and initializes it to contain a packet header
  * and internal data.
  */
 #define	MGET(m, how, type) do {						\
 	struct mbuf *_mm;						\
 	int _mhow = (how);						\
 	int _mtype = (type);						\
 	int _ms = splimp();						\
 									\
 	if (mmbfree == NULL)						\
 		(void)m_mballoc(1, _mhow);				\
 	_mm = mmbfree;							\
 	if (_mm != NULL) {						\
 		mmbfree = _mm->m_next;					\
 		mbtypes[MT_FREE]--;					\
 		_mm->m_type = _mtype;					\
 		mbtypes[_mtype]++;					\
 		_mm->m_next = NULL;					\
 		_mm->m_nextpkt = NULL;					\
 		_mm->m_data = _mm->m_dat;				\
 		_mm->m_flags = 0;					\
 		(m) = _mm;						\
 		splx(_ms);						\
 	} else {							\
 		splx(_ms);						\
 		_mm = m_retry(_mhow, _mtype);				\
 		if (_mm == NULL && _mhow == M_WAIT)			\
 			(m) = m_mballoc_wait(MGET_C, _mtype);		\
 		else							\
 			(m) = _mm;					\
 	}								\
 } while (0)
 
 #define	MGETHDR(m, how, type) do {					\
 	struct mbuf *_mm;						\
 	int _mhow = (how);						\
 	int _mtype = (type);						\
 	int _ms = splimp();						\
 									\
 	if (mmbfree == NULL)						\
 		(void)m_mballoc(1, _mhow);				\
 	_mm = mmbfree;							\
 	if (_mm != NULL) {						\
 		mmbfree = _mm->m_next;					\
 		mbtypes[MT_FREE]--;					\
 		_mm->m_type = _mtype;					\
 		mbtypes[_mtype]++;					\
 		_mm->m_next = NULL;					\
 		_mm->m_nextpkt = NULL;					\
 		_mm->m_data = _mm->m_pktdat;				\
 		_mm->m_flags = M_PKTHDR;				\
 		_mm->m_pkthdr.rcvif = NULL;				\
 		SLIST_INIT(&_mm->m_pkthdr.tags); 			\
 		_mm->m_pkthdr.csum_flags = 0;				\
 		(m) = _mm;						\
 		splx(_ms);						\
 	} else {							\
 		splx(_ms);						\
 		_mm = m_retryhdr(_mhow, _mtype);			\
 		if (_mm == NULL && _mhow == M_WAIT)			\
 			(m) = m_mballoc_wait(MGETHDR_C, _mtype);	\
 		else							\
 			(m) = _mm;					\
 	}								\
 } while (0)
 
 /*
  * Mbuf cluster macros.
  * MCLALLOC(caddr_t p, int how) allocates an mbuf cluster.
  * MCLGET adds such clusters to a normal mbuf;
  * the flag M_EXT is set upon success.
  * MCLFREE releases a reference to a cluster allocated by MCLALLOC,
  * freeing the cluster if the reference count has reached 0.
  */
 #define	MCLALLOC(p, how) do {						\
 	caddr_t _mp;							\
 	int _mhow = (how);						\
 	int _ms = splimp();						\
 									\
 	if (mclfree == NULL)						\
 		(void)m_clalloc(1, _mhow);				\
 	_mp = (caddr_t)mclfree;						\
 	if (_mp != NULL) {						\
+		KASSERT(mcl_valid(_mp),					\
+			("MCLALLOC junk pointer: %x < %x < %x.",	\
+			(uintptr_t)mbutl, (uintptr_t)_mp,		\
+			(uintptr_t)mbutltop));				\
 		KASSERT(mclrefcnt[mtocl(_mp)] == 0,			\
 			("free cluster with refcount %d.",		\
 			mclrefcnt[mtocl(_mp)]));			\
 		mclrefcnt[mtocl(_mp)]++;				\
 		mbstat.m_clfree--;					\
 		mclfree = ((union mcluster *)_mp)->mcl_next;		\
 		(p) = _mp;						\
 		splx(_ms);						\
 	} else {							\
 		splx(_ms);						\
 		if (_mhow == M_WAIT)					\
 			(p) = m_clalloc_wait();				\
 		else							\
 			(p) = NULL;					\
 	}								\
 } while (0)	
 
 #define	MCLGET(m, how) do {						\
 	struct mbuf *_mm = (m);						\
 									\
 	MCLALLOC(_mm->m_ext.ext_buf, (how));				\
 	if (_mm->m_ext.ext_buf != NULL) {				\
 		_mm->m_data = _mm->m_ext.ext_buf;			\
 		_mm->m_flags |= M_EXT;					\
 		_mm->m_ext.ext_free = NULL;				\
 		_mm->m_ext.ext_ref = NULL;				\
 		_mm->m_ext.ext_size = MCLBYTES;				\
 	}								\
 } while (0)
 
 #define	MCLFREE1(p) do {						\
 	union mcluster *_mp = (union mcluster *)(p);			\
 									\
+	KASSERT(mcl_valid(_mp),						\
+		("MCLFREE1 junk pointer: %x < %x < %x.",		\
+		(uintptr_t)mbutl, (uintptr_t)_mp,			\
+		(uintptr_t)mbutltop));					\
 	KASSERT(mclrefcnt[mtocl(_mp)] > 0,				\
 		("freeing free cluster, refcount: %d.",			\
 		mclrefcnt[mtocl(_mp)]));				\
 	if (--mclrefcnt[mtocl(_mp)] == 0) {				\
 		_mp->mcl_next = mclfree;				\
 		mclfree = _mp;						\
 		mbstat.m_clfree++;					\
 		MCLWAKEUP();						\
 	}								\
 } while (0)
 
 #define	MCLFREE(p) MBUFLOCK(						\
 	MCLFREE1(p);							\
 )
 
 #define	MEXTFREE1(m) do {						\
 		struct mbuf *_mm = (m);					\
 									\
 		if (_mm->m_ext.ext_free != NULL)			\
 			(*_mm->m_ext.ext_free)(_mm->m_ext.ext_buf,	\
 		    	    _mm->m_ext.ext_size);			\
 		else							\
 			MCLFREE1(_mm->m_ext.ext_buf);			\
 } while (0)
 
 #define	MEXTFREE(m) MBUFLOCK(						\
 	MEXTFREE1(m);							\
 )
 
 /*
  * NB: M_COPY_PKTHDR is deprecated; use either M_MOVE_PKTHDR
  *     or m_dup_pkthdr.
  */
 /*
  * Move mbuf pkthdr from "from" to "to".
  * from should have M_PKTHDR set, and to must be empty.
  * from no longer has a pkthdr after this operation.
  */
 #define	M_MOVE_PKTHDR(_to, _from)	m_move_pkthdr((_to), (_from))
 
 /*
  * Set the m_data pointer of a newly-allocated mbuf (m_get/MGET) to place
  * an object of the specified size at the end of the mbuf, longword aligned.
  */
 #define	M_ALIGN(m, len) do {						\
 	(m)->m_data += (MLEN - (len)) & ~(sizeof(long) - 1);		\
 } while (0)
 
 /*
  * As above, for mbufs allocated with m_gethdr/MGETHDR
  * or initialized by M_COPY_PKTHDR.
  */
 #define	MH_ALIGN(m, len) do {						\
 	(m)->m_data += (MHLEN - (len)) & ~(sizeof(long) - 1);		\
 } while (0)
 
 /*
  * Check if we can write to an mbuf.
  */
 #define M_EXT_WRITABLE(m)	\
     ((m)->m_ext.ext_free == NULL && mclrefcnt[mtocl((m)->m_ext.ext_buf)] == 1)
 
 #define M_WRITABLE(m) (!((m)->m_flags & M_EXT) || \
     M_EXT_WRITABLE(m) )
 
 /*
  * Compute the amount of space available
  * before the current start of data in an mbuf.
  *
  * The M_WRITABLE() is a temporary, conservative safety measure: the burden
  * of checking writability of the mbuf data area rests solely with the caller.
  */
 #define	M_LEADINGSPACE(m)						\
 	((m)->m_flags & M_EXT ?						\
 	    (M_EXT_WRITABLE(m) ? (m)->m_data - (m)->m_ext.ext_buf : 0):	\
 	    (m)->m_flags & M_PKTHDR ? (m)->m_data - (m)->m_pktdat :	\
 	    (m)->m_data - (m)->m_dat)
 
 /*
  * Compute the amount of space available
  * after the end of data in an mbuf.
  *
  * The M_WRITABLE() is a temporary, conservative safety measure: the burden
  * of checking writability of the mbuf data area rests solely with the caller.
  */
 #define	M_TRAILINGSPACE(m)						\
 	((m)->m_flags & M_EXT ?						\
 	    (M_WRITABLE(m) ? (m)->m_ext.ext_buf + (m)->m_ext.ext_size	\
 		- ((m)->m_data + (m)->m_len) : 0) :			\
 	    &(m)->m_dat[MLEN] - ((m)->m_data + (m)->m_len))
 
 /*
  * Arrange to prepend space of size plen to mbuf m.
  * If a new mbuf must be allocated, how specifies whether to wait.
  * If how is M_DONTWAIT and allocation fails, the original mbuf chain
  * is freed and m is set to NULL.
  */
 #define	M_PREPEND(m, plen, how) do {					\
 	struct mbuf **_mmp = &(m);					\
 	struct mbuf *_mm = *_mmp;					\
 	int _mplen = (plen);						\
 	int __mhow = (how);						\
 									\
 	if (M_LEADINGSPACE(_mm) >= _mplen) {				\
 		_mm->m_data -= _mplen;					\
 		_mm->m_len += _mplen;					\
 	} else								\
 		_mm = m_prepend(_mm, _mplen, __mhow);			\
 	if (_mm != NULL && _mm->m_flags & M_PKTHDR)			\
 		_mm->m_pkthdr.len += _mplen;				\
 	*_mmp = _mm;							\
 } while (0)
 
 /* change mbuf to new type */
 #define	MCHTYPE(m, t) do {						\
 	struct mbuf *_mm = (m);						\
 	int _mt = (t);							\
 	int _ms = splimp();						\
 									\
 	mbtypes[_mm->m_type]--;						\
 	mbtypes[_mt]++;							\
 	splx(_ms);							\
 	_mm->m_type = (_mt);						\
 } while (0)
 
 /* Length to m_copy to copy all. */
 #define	M_COPYALL	1000000000
 
 /* Compatibility with 4.3 */
 #define	m_copy(m, o, l)	m_copym((m), (o), (l), M_DONTWAIT)
 
 #ifdef _KERNEL
 extern	u_int		 m_clalloc_wid;	/* mbuf cluster wait count */
 extern	u_int		 m_mballoc_wid;	/* mbuf wait count */
 extern	int		 max_linkhdr;	/* largest link-level header */
 extern	int		 max_protohdr;	/* largest protocol header */
 extern	int		 max_hdr;	/* largest link+protocol header */
 extern	int		 max_datalen;	/* MHLEN - max_hdr */
 extern	struct mbstat	 mbstat;
 extern	u_long		 mbtypes[MT_NTYPES]; /* per-type mbuf allocations */
 extern	int		 mbuf_wait;	/* mbuf sleep time */
 extern	struct mbuf	*mbutl;		/* virtual address of mclusters */
+extern	struct mbuf	*mbutltop;	/* highest address of mclusters */
 extern	char		*mclrefcnt;	/* cluster reference counts */
 extern	union mcluster	*mclfree;
 extern	struct mbuf	*mmbfree;
 extern	int		 nmbclusters;
 extern	int		 nmbufs;
 extern	int		 nsfbufs;
 
 void		 m_adj(struct mbuf *, int);
 void		 m_cat(struct mbuf *, struct mbuf *);
 int		 m_clalloc(int, int);
 caddr_t		 m_clalloc_wait(void);
 void		 m_copyback(struct mbuf *, int, int, caddr_t);
 void		 m_copydata(struct mbuf *, int, int, caddr_t);
 struct	mbuf	*m_copym(struct mbuf *, int, int, int);
 struct	mbuf	*m_copypacket(struct mbuf *, int);
 struct	mbuf	*m_defrag(struct mbuf *, int);
 struct	mbuf	*m_devget(char *, int, int, struct ifnet *,
 		    void (*copy)(char *, caddr_t, u_int));
 struct	mbuf	*m_dup(struct mbuf *, int);
 int		 m_dup_pkthdr(struct mbuf *, struct mbuf *, int);
 u_int		 m_fixhdr(struct mbuf *);
 struct	mbuf	*m_free(struct mbuf *);
 void		 m_freem(struct mbuf *);
 struct	mbuf	*m_get(int, int);
 struct  mbuf	*m_getcl(int how, short type, int flags);
 struct	mbuf	*m_getclr(int, int);
 struct	mbuf	*m_gethdr(int, int);
 struct	mbuf	*m_getm(struct mbuf *, int, int, int);
 u_int		m_length(struct mbuf *, struct mbuf **);
 int		 m_mballoc(int, int);
 struct	mbuf	*m_mballoc_wait(int, int);
 void		 m_move_pkthdr(struct mbuf *, struct mbuf *);
 struct	mbuf	*m_prepend(struct mbuf *, int, int);
 void		 m_print(const struct mbuf *m);
 struct	mbuf	*m_pulldown(struct mbuf *, int, int, int *);
 struct	mbuf	*m_pullup(struct mbuf *, int);
 struct	mbuf	*m_retry(int, int);
 struct	mbuf	*m_retryhdr(int, int);
 struct	mbuf	*m_split(struct mbuf *, int, int);
 
 /*
  * Packets may have annotations attached by affixing a list
  * of "packet tags" to the pkthdr structure.  Packet tags are
  * dynamically allocated semi-opaque data structures that have
  * a fixed header (struct m_tag) that specifies the size of the
  * memory block and a <cookie,type> pair that identifies it.
  * The cookie is a 32-bit unique unsigned value used to identify
  * a module or ABI.  By convention this value is chose as the
  * date+time that the module is created, expressed as the number of
  * seconds since the epoch (e.g. using date -u +'%s').  The type value
  * is an ABI/module-specific value that identifies a particular annotation
  * and is private to the module.  For compatibility with systems
  * like openbsd that define packet tags w/o an ABI/module cookie,
  * the value PACKET_ABI_COMPAT is used to implement m_tag_get and
  * m_tag_find compatibility shim functions and several tag types are
  * defined below.  Users that do not require compatibility should use
  * a private cookie value so that packet tag-related definitions
  * can be maintained privately.
  *
  * Note that the packet tag returned by m_tag_allocate has the default
  * memory alignment implemented by malloc.  To reference private data
  * one can use a construct like:
  *
  *	struct m_tag *mtag = m_tag_allocate(...);
  *	struct foo *p = (struct foo *)(mtag+1);
  *
  * if the alignment of struct m_tag is sufficient for referencing members
  * of struct foo.  Otherwise it is necessary to embed struct m_tag within
  * the private data structure to insure proper alignment; e.g.
  *
  *	struct foo {
  *		struct m_tag	tag;
  *		...
  *	};
  *	struct foo *p = (struct foo *) m_tag_allocate(...);
  *	struct m_tag *mtag = &p->tag;
  */
 
 #define	PACKET_TAG_NONE				0  /* Nadda */
 
 /* Packet tag for use with PACKET_ABI_COMPAT */
 #define	PACKET_TAG_IPSEC_IN_DONE		1  /* IPsec applied, in */
 #define	PACKET_TAG_IPSEC_OUT_DONE		2  /* IPsec applied, out */
 #define	PACKET_TAG_IPSEC_IN_CRYPTO_DONE		3  /* NIC IPsec crypto done */
 #define	PACKET_TAG_IPSEC_OUT_CRYPTO_NEEDED	4  /* NIC IPsec crypto req'ed */
 #define	PACKET_TAG_IPSEC_IN_COULD_DO_CRYPTO	5  /* NIC notifies IPsec */
 #define	PACKET_TAG_IPSEC_PENDING_TDB		6  /* Reminder to do IPsec */
 #define	PACKET_TAG_BRIDGE			7  /* Bridge processing done */
 #define	PACKET_TAG_GIF				8  /* GIF processing done */
 #define	PACKET_TAG_GRE				9  /* GRE processing done */
 #define	PACKET_TAG_IN_PACKET_CHECKSUM		10 /* NIC checksumming done */
 #define	PACKET_TAG_ENCAP			11 /* Encap.  processing */
 #define	PACKET_TAG_IPSEC_SOCKET			12 /* IPSEC socket ref */
 #define	PACKET_TAG_IPSEC_HISTORY		13 /* IPSEC history */
 #define	PACKET_TAG_IPV6_INPUT			14 /* IPV6 input processing */
 
 /*
  * As a temporary and low impact solution to replace the even uglier
  * approach used so far in some parts of the network stack (which relies
  * on global variables), packet tag-like annotations are stored in MT_TAG
  * mbufs (or lookalikes) prepended to the actual mbuf chain.
  *
  *	m_type	= MT_TAG
  *	m_flags = m_tag_id
  *	m_next	= next buffer in chain.
  *
  * BE VERY CAREFUL not to pass these blocks to the mbuf handling routines.
  */
 #define	_m_tag_id	m_hdr.mh_flags
 
 /* Packet tags used in the FreeBSD network stack */
 #define	PACKET_TAG_DUMMYNET			15 /* dummynet info */
 #define	PACKET_TAG_IPFW				16 /* ipfw classification */
 #define	PACKET_TAG_DIVERT			17 /* divert info */
 #define	PACKET_TAG_IPFORWARD			18 /* ipforward info */
 
 /* Packet tag routines */
 struct	m_tag 	*m_tag_alloc(u_int32_t, int, int, int);
 void		 m_tag_free(struct m_tag *);
 void		 m_tag_prepend(struct mbuf *, struct m_tag *);
 void		 m_tag_unlink(struct mbuf *, struct m_tag *);
 void		 m_tag_delete(struct mbuf *, struct m_tag *);
 void		 m_tag_delete_chain(struct mbuf *, struct m_tag *);
 struct	m_tag	*m_tag_locate(struct mbuf *, u_int32_t, int, struct m_tag *);
 struct	m_tag	*m_tag_copy(struct m_tag *, int);
 int		 m_tag_copy_chain(struct mbuf *, struct mbuf *, int);
 void		 m_tag_init(struct mbuf *);
 struct	m_tag	*m_tag_first(struct mbuf *);
 struct	m_tag	*m_tag_next(struct mbuf *, struct m_tag *);
 
 /* these are for openbsd compatibility */
 #define	MTAG_ABI_COMPAT		0		/* compatibility ABI */
 
 static __inline struct m_tag *
 m_tag_get(int type, int length, int wait)
 {
 	return m_tag_alloc(MTAG_ABI_COMPAT, type, length, wait);
 }
 
 static __inline struct m_tag *
 m_tag_find(struct mbuf *m, int type, struct m_tag *start)
 {
 	return m_tag_locate(m, MTAG_ABI_COMPAT, type, start);
 }
 #endif /* _KERNEL */
 
 #endif /* !_SYS_MBUF_H_ */