Index: head/sys/amd64/amd64/bios.c
===================================================================
--- head/sys/amd64/amd64/bios.c	(revision 112840)
+++ head/sys/amd64/amd64/bios.c	(revision 112841)
@@ -1,676 +1,680 @@
 /*-
  * Copyright (c) 1997 Michael Smith
  * Copyright (c) 1998 Jonathan Lemon
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 /*
  * Code for dealing with the BIOS in x86 PC systems.
  */
 
 #include "opt_isa.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/malloc.h>
 #include <sys/bus.h>
 #include <sys/pcpu.h>
 #include <vm/vm.h>
 #include <vm/pmap.h>
 #include <machine/md_var.h>
 #include <machine/segments.h>
 #include <machine/stdarg.h>
 #include <machine/vmparam.h>
 #include <machine/pc/bios.h>
 #ifdef DEV_ISA
 #include <isa/isavar.h>
 #include <isa/pnpreg.h>
 #include <isa/pnpvar.h>
 #endif
 
 #define BIOS_START	0xe0000
 #define BIOS_SIZE	0x20000
 
 /* exported lookup results */
 struct bios32_SDentry		PCIbios;
 struct PnPBIOS_table		*PnPBIOStable;
 
 static u_int			bios32_SDCI;
 
 /* start fairly early */
 static void			bios32_init(void *junk);
 SYSINIT(bios32, SI_SUB_CPU, SI_ORDER_ANY, bios32_init, NULL);
 
 /*
  * bios32_init
  *
  * Locate various bios32 entities.
  */
 static void
 bios32_init(void *junk)
 {
     u_long			sigaddr;
     struct bios32_SDheader	*sdh;
     struct PnPBIOS_table	*pt;
     u_int8_t			ck, *cv;
     int				i;
     char			*p;
     
     /*
      * BIOS32 Service Directory, PCI BIOS
      */
     
     /* look for the signature */
     if ((sigaddr = bios_sigsearch(0, "_32_", 4, 16, 0)) != 0) {
 
 	/* get a virtual pointer to the structure */
 	sdh = (struct bios32_SDheader *)(uintptr_t)BIOS_PADDRTOVADDR(sigaddr);
 	for (cv = (u_int8_t *)sdh, ck = 0, i = 0; i < (sdh->len * 16); i++) {
 	    ck += cv[i];
 	}
 	/* If checksum is OK, enable use of the entrypoint */
 	if ((ck == 0) && (BIOS_START <= sdh->entry ) &&
 	    (sdh->entry < (BIOS_START + BIOS_SIZE))) {
 	    bios32_SDCI = BIOS_PADDRTOVADDR(sdh->entry);
 	    if (bootverbose) {
 		printf("bios32: Found BIOS32 Service Directory header at %p\n", sdh);
 		printf("bios32: Entry = 0x%x (%x)  Rev = %d  Len = %d\n", 
 		       sdh->entry, bios32_SDCI, sdh->revision, sdh->len);
 	    }
 
 	    /* Allow user override of PCI BIOS search */
 	    if (((p = getenv("machdep.bios.pci")) == NULL) || strcmp(p, "disable")) {
 
 		/* See if there's a PCI BIOS entrypoint here */
 		PCIbios.ident.id = 0x49435024;	/* PCI systems should have this */
 		if (!bios32_SDlookup(&PCIbios) && bootverbose)
 		    printf("pcibios: PCI BIOS entry at 0x%x+0x%x\n", PCIbios.base, PCIbios.entry);
 	    }
 	    if (p != NULL)
 		    freeenv(p);
 	} else {
 	    printf("bios32: Bad BIOS32 Service Directory\n");
 	}
     }
 
     /*
      * PnP BIOS
      *
      * Allow user override of PnP BIOS search
      */
     if ((((p = getenv("machdep.bios.pnp")) == NULL) || strcmp(p, "disable")) &&
 	((sigaddr = bios_sigsearch(0, "$PnP", 4, 16, 0)) != 0)) {
 
 	/* get a virtual pointer to the structure */
 	pt = (struct PnPBIOS_table *)(uintptr_t)BIOS_PADDRTOVADDR(sigaddr);
 	for (cv = (u_int8_t *)pt, ck = 0, i = 0; i < pt->len; i++) {
 	    ck += cv[i];
 	}
 	/* If checksum is OK, enable use of the entrypoint */
 	if (ck == 0) {
 	    PnPBIOStable = pt;
 	    if (bootverbose) {
 		printf("pnpbios: Found PnP BIOS data at %p\n", pt);
 		printf("pnpbios: Entry = %x:%x  Rev = %d.%d\n", 
 		       pt->pmentrybase, pt->pmentryoffset, pt->version >> 4, pt->version & 0xf);
 		if ((pt->control & 0x3) == 0x01)
 		    printf("pnpbios: Event flag at %x\n", pt->evflagaddr);
 		if (pt->oemdevid != 0)
 		    printf("pnpbios: OEM ID %x\n", pt->oemdevid);
 		
 	    }
 	} else {
 	    printf("pnpbios: Bad PnP BIOS data checksum\n");
 	}
     }
     if (p != NULL)
 	    freeenv(p);
     if (bootverbose) {
 	    /* look for other know signatures */
 	    printf("Other BIOS signatures found:\n");
     }
 }
 
 /*
  * bios32_SDlookup
  *
  * Query the BIOS32 Service Directory for the service named in (ent),
  * returns nonzero if the lookup fails.  The caller must fill in
  * (ent->ident), the remainder are populated on a successful lookup.
  */
 int
 bios32_SDlookup(struct bios32_SDentry *ent)
 {
     struct bios_regs args;
 
     if (bios32_SDCI == 0)
 	return (1);
 
     args.eax = ent->ident.id;		/* set up arguments */
     args.ebx = args.ecx = args.edx = 0;
     bios32(&args, bios32_SDCI, GSEL(GCODE_SEL, SEL_KPL));
     if ((args.eax & 0xff) == 0) {	/* success? */
 	ent->base = args.ebx;
 	ent->len = args.ecx;
 	ent->entry = args.edx;
 	ent->ventry = BIOS_PADDRTOVADDR(ent->base + ent->entry);
 	return (0);			/* all OK */
     }
     return (1);				/* failed */
 }
 
 
 /*
  * bios_sigsearch
  *
  * Search some or all of the BIOS region for a signature string.
  *
  * (start)	Optional offset returned from this function 
  *		(for searching for multiple matches), or NULL
  *		to start the search from the base of the BIOS.
  *		Note that this will be a _physical_ address in
  *		the range 0xe0000 - 0xfffff.
  * (sig)	is a pointer to the byte(s) of the signature.
  * (siglen)	number of bytes in the signature.
  * (paralen)	signature paragraph (alignment) size.
  * (sigofs)	offset of the signature within the paragraph.
  *
  * Returns the _physical_ address of the found signature, 0 if the
  * signature was not found.
  */
 
 u_int32_t
 bios_sigsearch(u_int32_t start, u_char *sig, int siglen, int paralen, int sigofs)
 {
     u_char	*sp, *end;
     
     /* compute the starting address */
     if ((start >= BIOS_START) && (start <= (BIOS_START + BIOS_SIZE))) {
 	sp = (char *)BIOS_PADDRTOVADDR(start);
     } else if (start == 0) {
 	sp = (char *)BIOS_PADDRTOVADDR(BIOS_START);
     } else {
 	return 0;				/* bogus start address */
     }
 
     /* compute the end address */
     end = (u_char *)BIOS_PADDRTOVADDR(BIOS_START + BIOS_SIZE);
 
     /* loop searching */
     while ((sp + sigofs + siglen) < end) {
 	
 	/* compare here */
 	if (!bcmp(sp + sigofs, sig, siglen)) {
 	    /* convert back to physical address */
 	    return((u_int32_t)BIOS_VADDRTOPADDR(sp));
 	}
 	sp += paralen;
     }
     return(0);
 }
 
 /*
  * do not staticize, used by bioscall.s
  */
 union {
     struct {
 	u_short	offset;
 	u_short	segment;
     } vec16;
     struct {
 	u_int	offset;
 	u_short	segment;
     } vec32;
 } bioscall_vector;			/* bios jump vector */
 
 void
 set_bios_selectors(struct bios_segments *seg, int flags)
 {
     struct soft_segment_descriptor ssd = {
 	0,			/* segment base address (overwritten) */
 	0,			/* length (overwritten) */
 	SDT_MEMERA,		/* segment type (overwritten) */
 	0,			/* priority level */
 	1,			/* descriptor present */
 	0, 0,
 	1,			/* descriptor size (overwritten) */
 	0			/* granularity == byte units */
     };
     union descriptor *p_gdt;
 
 #ifdef SMP
     p_gdt = &gdt[PCPU_GET(cpuid) * NGDT];
 #else
     p_gdt = gdt;
 #endif
 	
     ssd.ssd_base = seg->code32.base;
     ssd.ssd_limit = seg->code32.limit;
     ssdtosd(&ssd, &p_gdt[GBIOSCODE32_SEL].sd);
 
     ssd.ssd_def32 = 0;
     if (flags & BIOSCODE_FLAG) {
 	ssd.ssd_base = seg->code16.base;
 	ssd.ssd_limit = seg->code16.limit;
 	ssdtosd(&ssd, &p_gdt[GBIOSCODE16_SEL].sd);
     }
 
     ssd.ssd_type = SDT_MEMRWA;
     if (flags & BIOSDATA_FLAG) {
 	ssd.ssd_base = seg->data.base;
 	ssd.ssd_limit = seg->data.limit;
 	ssdtosd(&ssd, &p_gdt[GBIOSDATA_SEL].sd);
     }
 
     if (flags & BIOSUTIL_FLAG) {
 	ssd.ssd_base = seg->util.base;
 	ssd.ssd_limit = seg->util.limit;
 	ssdtosd(&ssd, &p_gdt[GBIOSUTIL_SEL].sd);
     }
 
     if (flags & BIOSARGS_FLAG) {
 	ssd.ssd_base = seg->args.base;
 	ssd.ssd_limit = seg->args.limit;
 	ssdtosd(&ssd, &p_gdt[GBIOSARGS_SEL].sd);
     }
 }
 
 extern int vm86pa;
 extern void bios16_jmp(void);
 
 /*
  * this routine is really greedy with selectors, and uses 5:
  *
  * 32-bit code selector:	to return to kernel
  * 16-bit code selector:	for running code
  *        data selector:	for 16-bit data
  *        util selector:	extra utility selector
  *        args selector:	to handle pointers
  *
  * the util selector is set from the util16 entry in bios16_args, if a
  * "U" specifier is seen.
  *
  * See <machine/pc/bios.h> for description of format specifiers
  */
 int
 bios16(struct bios_args *args, char *fmt, ...)
 {
     char	*p, *stack, *stack_top;
     va_list 	ap;
     int 	flags = BIOSCODE_FLAG | BIOSDATA_FLAG;
     u_int 	i, arg_start, arg_end;
     pt_entry_t	*pte;
     pd_entry_t	*ptd;
 
     arg_start = 0xffffffff;
     arg_end = 0;
 
     /*
      * Some BIOS entrypoints attempt to copy the largest-case
      * argument frame (in order to generalise handling for 
      * different entry types).  If our argument frame is 
      * smaller than this, the BIOS will reach off the top of
      * our constructed stack segment.  Pad the top of the stack
      * with some garbage to avoid this.
      */
     stack = (caddr_t)PAGE_SIZE - 32;
 
     va_start(ap, fmt);
     for (p = fmt; p && *p; p++) {
 	switch (*p) {
 	case 'p':			/* 32-bit pointer */
 	    i = va_arg(ap, u_int);
 	    arg_start = min(arg_start, i);
 	    arg_end = max(arg_end, i);
 	    flags |= BIOSARGS_FLAG;
 	    stack -= 4;
 	    break;
 
 	case 'i':			/* 32-bit integer */
 	    i = va_arg(ap, u_int);
 	    stack -= 4;
 	    break;
 
 	case 'U':			/* 16-bit selector */
 	    flags |= BIOSUTIL_FLAG;
 	    /* FALLTHROUGH */
 	case 'D':			/* 16-bit selector */
 	case 'C':			/* 16-bit selector */
 	    stack -= 2;
 	    break;
 	    
 	case 's':			/* 16-bit integer passed as an int */
 	    i = va_arg(ap, int);
 	    stack -= 2;
 	    break;
 
 	default:
 	    return (EINVAL);
 	}
     }
 
     if (flags & BIOSARGS_FLAG) {
 	if (arg_end - arg_start > ctob(16))
 	    return (EACCES);
 	args->seg.args.base = arg_start;
 	args->seg.args.limit = 0xffff;
     }
 
     args->seg.code32.base = (u_int)&bios16_jmp & PG_FRAME;
     args->seg.code32.limit = 0xffff;	
 
     ptd = (pd_entry_t *)rcr3();
-    if (ptd == (u_int *)IdlePTD) {
+#ifdef PAE
+    if (ptd == IdlePDPT) {
+#else
+    if (ptd == IdlePTD) {
+#endif
 	/*
 	 * no page table, so create one and install it.
 	 */
 	pte = (pt_entry_t *)malloc(PAGE_SIZE, M_TEMP, M_WAITOK);
-	ptd = (pd_entry_t *)((u_int)ptd + KERNBASE);
+	ptd = (pd_entry_t *)((u_int)IdlePTD + KERNBASE);
 	*ptd = vtophys(pte) | PG_RW | PG_V;
     } else {
 	/*
 	 * this is a user-level page table 
 	 */
 	pte = PTmap;
     }
     /*
      * install pointer to page 0.  we don't need to flush the tlb,
      * since there should not be a previous mapping for page 0.
      */
     *pte = (vm86pa - PAGE_SIZE) | PG_RW | PG_V; 
 
     stack_top = stack;
     va_start(ap, fmt);
     for (p = fmt; p && *p; p++) {
 	switch (*p) {
 	case 'p':			/* 32-bit pointer */
 	    i = va_arg(ap, u_int);
 	    *(u_int *)stack = (i - arg_start) |
 		(GSEL(GBIOSARGS_SEL, SEL_KPL) << 16);
 	    stack += 4;
 	    break;
 
 	case 'i':			/* 32-bit integer */
 	    i = va_arg(ap, u_int);
 	    *(u_int *)stack = i;
 	    stack += 4;
 	    break;
 
 	case 'U':			/* 16-bit selector */
 	    *(u_short *)stack = GSEL(GBIOSUTIL_SEL, SEL_KPL);
 	    stack += 2;
 	    break;
 
 	case 'D':			/* 16-bit selector */
 	    *(u_short *)stack = GSEL(GBIOSDATA_SEL, SEL_KPL);
 	    stack += 2;
 	    break;
 
 	case 'C':			/* 16-bit selector */
 	    *(u_short *)stack = GSEL(GBIOSCODE16_SEL, SEL_KPL);
 	    stack += 2;
 	    break;
 
 	case 's':			/* 16-bit integer passed as an int */
 	    i = va_arg(ap, int);
 	    *(u_short *)stack = i;
 	    stack += 2;
 	    break;
 
 	default:
 	    return (EINVAL);
 	}
     }
 
     set_bios_selectors(&args->seg, flags);
     bioscall_vector.vec16.offset = (u_short)args->entry;
     bioscall_vector.vec16.segment = GSEL(GBIOSCODE16_SEL, SEL_KPL);
 
     i = bios16_call(&args->r, stack_top);
     
     if (pte == PTmap) {
 	*pte = 0;			/* remove entry */
     } else {
 	*ptd = 0;			/* remove page table */
 	free(pte, M_TEMP);		/* ... and free it */
     }
 
     /*
      * XXX only needs to be invlpg(0) but that doesn't work on the 386 
      */
     pmap_invalidate_all(kernel_pmap);
 
     return (i);
 }
 
 #ifdef DEV_ISA
 /*
  * PnP BIOS interface; enumerate devices only known to the system
  * BIOS and save information about them for later use.
  */
 
 struct pnp_sysdev 
 {
     u_int16_t	size;
     u_int8_t	handle;
     u_int32_t	devid;
     u_int8_t	type[3];
     u_int16_t	attrib;
 #define PNPATTR_NODISABLE	(1<<0)	/* can't be disabled */
 #define PNPATTR_NOCONFIG	(1<<1)	/* can't be configured */
 #define PNPATTR_OUTPUT		(1<<2)	/* can be primary output */
 #define PNPATTR_INPUT		(1<<3)	/* can be primary input */
 #define PNPATTR_BOOTABLE	(1<<4)	/* can be booted from */
 #define PNPATTR_DOCK		(1<<5)	/* is a docking station */
 #define PNPATTR_REMOVEABLE	(1<<6)	/* device is removeable */
 #define PNPATTR_CONFIG_STATIC	(0)
 #define PNPATTR_CONFIG_DYNAMIC	(1)
 #define PNPATTR_CONFIG_DYNONLY	(3)
 #define PNPATTR_CONFIG(a)	(((a) >> 7) & 0x3)
     /* device-specific data comes here */
     u_int8_t	devdata[0];
 } __packed;
 
 /* We have to cluster arguments within a 64k range for the bios16 call */
 struct pnp_sysdevargs
 {
     u_int16_t	next;
     struct pnp_sysdev node;
 };
 
 /*
  * This function is called after the bus has assigned resource
  * locations for a logical device.
  */
 static void
 pnpbios_set_config(void *arg, struct isa_config *config, int enable)
 {
 }
 
 /*
  * Quiz the PnP BIOS, build a list of PNP IDs and resource data.
  */
 static void
 pnpbios_identify(driver_t *driver, device_t parent)
 {
     struct PnPBIOS_table	*pt = PnPBIOStable;
     struct bios_args		args;
     struct pnp_sysdev		*pd;
     struct pnp_sysdevargs	*pda;
     u_int16_t			ndevs, bigdev;
     int				error, currdev;
     u_int8_t			*devnodebuf, tag;
     u_int32_t			*devid, *compid;
     int				idx, left;
     device_t			dev;
         
     /* no PnP BIOS information */
     if (pt == NULL)
 	return;
 
     /* ACPI already active */
     if (devclass_get_softc(devclass_find("ACPI"), 0) != NULL)
 	return;
 
     /* get count of PnP devices */
     bzero(&args, sizeof(args));
     args.seg.code16.base = BIOS_PADDRTOVADDR(pt->pmentrybase);
     args.seg.code16.limit = 0xffff;		/* XXX ? */
     args.seg.data.base = BIOS_PADDRTOVADDR(pt->pmdataseg);
     args.seg.data.limit = 0xffff;
     args.entry = pt->pmentryoffset;
     
     if ((error = bios16(&args, PNP_COUNT_DEVNODES, &ndevs, &bigdev)) || (args.r.eax & 0xff))
 	printf("pnpbios: error %d/%x getting device count/size limit\n", error, args.r.eax);
     ndevs &= 0xff;				/* clear high byte garbage */
     if (bootverbose)
 	printf("pnpbios: %d devices, largest %d bytes\n", ndevs, bigdev);
 
     devnodebuf = malloc(bigdev + (sizeof(struct pnp_sysdevargs) - sizeof(struct pnp_sysdev)),
 			M_DEVBUF, M_NOWAIT);
     pda = (struct pnp_sysdevargs *)devnodebuf;
     pd = &pda->node;
 
     for (currdev = 0, left = ndevs; (currdev != 0xff) && (left > 0); left--) {
 
 	bzero(pd, bigdev);
 	pda->next = currdev;
 	/* get current configuration */
 	if ((error = bios16(&args, PNP_GET_DEVNODE, &pda->next, &pda->node, 1))) {
 	    printf("pnpbios: error %d making BIOS16 call\n", error);
 	    break;
 	}
 	if ((error = (args.r.eax & 0xff))) {
 	    if (bootverbose)
 		printf("pnpbios: %s 0x%x fetching node %d\n", error & 0x80 ? "error" : "warning", error, currdev);
 	    if (error & 0x80) 
 		break;
 	}
 	currdev = pda->next;
 	if (pd->size < sizeof(struct pnp_sysdev)) {
 	    printf("pnpbios: bogus system node data, aborting scan\n");
 	    break;
 	}
 
 	/*
 	 * If we are in APIC_IO mode, we should ignore the ISA PIC if it
 	 * shows up.  Likewise, in !APIC_IO mode, we should ignore the
 	 * APIC (less important).
 	 * This is significant because the ISA PIC will claim IRQ 2 (which
 	 * it uses for chaining), while in APIC mode this is a valid IRQ
 	 * available for general use.
 	 */
 #ifdef APIC_IO
 	if (!strcmp(pnp_eisaformat(pd->devid), "PNP0000"))	/* ISA PIC */
 	    continue;
 #else
 	if (!strcmp(pnp_eisaformat(pd->devid), "PNP0003"))	/* APIC */
 	    continue;
 #endif	
 	
 	/* Add the device and parse its resources */
 	dev = BUS_ADD_CHILD(parent, ISA_ORDER_PNP, NULL, -1);
 	isa_set_vendorid(dev, pd->devid);
 	isa_set_logicalid(dev, pd->devid);
 	/*
 	 * It appears that some PnP BIOS doesn't allow us to re-enable
 	 * the embedded system device once it is disabled.  We shall
 	 * mark all system device nodes as "cannot be disabled", regardless
 	 * of actual settings in the device attribute byte.
 	 * XXX
 	isa_set_configattr(dev, 
 	    ((pd->attrib & PNPATTR_NODISABLE) ?  0 : ISACFGATTR_CANDISABLE) |
 	    ((!(pd->attrib & PNPATTR_NOCONFIG) && 
 		PNPATTR_CONFIG(pd->attrib) != PNPATTR_CONFIG_STATIC)
 		? ISACFGATTR_DYNAMIC : 0));
 	 */
 	isa_set_configattr(dev, 
 	    (!(pd->attrib & PNPATTR_NOCONFIG) && 
 		PNPATTR_CONFIG(pd->attrib) != PNPATTR_CONFIG_STATIC)
 		? ISACFGATTR_DYNAMIC : 0);
 	ISA_SET_CONFIG_CALLBACK(parent, dev, pnpbios_set_config, 0);
 	pnp_parse_resources(dev, &pd->devdata[0],
 			    pd->size - sizeof(struct pnp_sysdev), 0);
 	if (!device_get_desc(dev))
 	    device_set_desc_copy(dev, pnp_eisaformat(pd->devid));
 
 	/* Find device IDs */
 	devid = &pd->devid;
 	compid = NULL;
 
 	/* look for a compatible device ID too */
 	left = pd->size - sizeof(struct pnp_sysdev);
 	idx = 0;
 	while (idx < left) {
 	    tag = pd->devdata[idx++];
 	    if (PNP_RES_TYPE(tag) == 0) {
 		/* Small resource */
 		switch (PNP_SRES_NUM(tag)) {
 		case PNP_TAG_COMPAT_DEVICE:
 		    compid = (u_int32_t *)(pd->devdata + idx);
 		    if (bootverbose)
 			printf("pnpbios: node %d compat ID 0x%08x\n", pd->handle, *compid);
 		    /* FALLTHROUGH */
 		case PNP_TAG_END:
 		    idx = left;
 		    break;
 		default:
 		    idx += PNP_SRES_LEN(tag);
 		    break;
 		}
 	    } else
 		/* Large resource, skip it */
 		idx += *(u_int16_t *)(pd->devdata + idx) + 2;
 	}
 	if (bootverbose) {
 	    printf("pnpbios: handle %d device ID %s (%08x)", 
 		   pd->handle, pnp_eisaformat(*devid), *devid);
 	    if (compid != NULL)
 		printf(" compat ID %s (%08x)",
 		       pnp_eisaformat(*compid), *compid);
 	    printf("\n");
 	}
     }
 }
 
 static device_method_t pnpbios_methods[] = {
 	/* Device interface */
 	DEVMETHOD(device_identify,	pnpbios_identify),
 
 	{ 0, 0 }
 };
 
 static driver_t pnpbios_driver = {
 	"pnpbios",
 	pnpbios_methods,
 	1,			/* no softc */
 };
 
 static devclass_t pnpbios_devclass;
 
 DRIVER_MODULE(pnpbios, isa, pnpbios_driver, pnpbios_devclass, 0, 0);
 #endif /* DEV_ISA */
Index: head/sys/amd64/amd64/locore.S
===================================================================
--- head/sys/amd64/amd64/locore.S	(revision 112840)
+++ head/sys/amd64/amd64/locore.S	(revision 112841)
@@ -1,892 +1,927 @@
 /*-
  * Copyright (c) 1990 The Regents of the University of California.
  * All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * William Jolitz.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	from: @(#)locore.s	7.3 (Berkeley) 5/13/91
  * $FreeBSD$
  *
  *		originally from: locore.s, by William F. Jolitz
  *
  *		Substantially rewritten by David Greenman, Rod Grimes,
  *			Bruce Evans, Wolfgang Solfrank, Poul-Henning Kamp
  *			and many others.
  */
 
 #include "opt_bootp.h"
 #include "opt_compat.h"
 #include "opt_nfsroot.h"
 
 #include <sys/syscall.h>
 #include <sys/reboot.h>
 
 #include <machine/asmacros.h>
 #include <machine/cputypes.h>
 #include <machine/psl.h>
 #include <machine/pmap.h>
 #include <machine/specialreg.h>
 
 #include "assym.s"
 
 /*
  *	XXX
  *
  * Note: This version greatly munged to avoid various assembler errors
  * that may be fixed in newer versions of gas. Perhaps newer versions
  * will have more pleasant appearance.
  */
 
 /*
  * PTmap is recursive pagemap at top of virtual address space.
  * Within PTmap, the page directory can be found (third indirection).
  *
  * NOTE: PTDpde, PTmap, and PTD are being defined as address symbols.
  * In C you access them directly, and not with a '*'. Storage is not being 
  * allocated. They will magically address the correct locations in KVM
  * which C will treat as normal variables of the type they are defined in 
  * machine/pmap.h, i.e.  PTDpde = XX ; to set a PDE entry, NOT *PTDpde = XX;
  */
 	.globl	PTmap,PTD,PTDpde
 	.set	PTmap,(PTDPTDI << PDRSHIFT)
 	.set	PTD,PTmap + (PTDPTDI * PAGE_SIZE)
 	.set	PTDpde,PTD + (PTDPTDI * PDESIZE)
 
 /*
  * APTmap, APTD is the alternate recursive pagemap.
  * It's used when modifying another process's page tables.
  * See the note above. It is true here as well.
  */
 	.globl	APTmap,APTD,APTDpde
 	.set	APTmap,APTDPTDI << PDRSHIFT
 	.set	APTD,APTmap + (APTDPTDI * PAGE_SIZE)
 	.set	APTDpde,PTD + (APTDPTDI * PDESIZE)
 
 #ifdef SMP
 /*
  * Define layout of per-cpu address space.
  * This is "constructed" in locore.s on the BSP and in mp_machdep.c
  * for each AP.  DO NOT REORDER THESE WITHOUT UPDATING THE REST!
  */
 	.globl	SMP_prvspace, lapic
 	.set	SMP_prvspace,(MPPTDI << PDRSHIFT)
 	.set	lapic,SMP_prvspace + (NPTEPG-1) * PAGE_SIZE
 #endif /* SMP */
 
 /*
  * Compiled KERNBASE location
  */
 	.globl	kernbase
 	.set	kernbase,KERNBASE
 
 /*
  * Globals
  */
 	.data
 	ALIGN_DATA			/* just to be sure */
 
 	.globl	HIDENAME(tmpstk)
 	.space	0x2000			/* space for tmpstk - temporary stack */
 HIDENAME(tmpstk):
 
 	.globl	bootinfo
 bootinfo:	.space	BOOTINFO_SIZE	/* bootinfo that we can handle */
 
 		.globl KERNend
 KERNend:	.long	0		/* phys addr end of kernel (just after bss) */
 physfree:	.long	0		/* phys addr of next free page */
 
 #ifdef SMP
 		.globl	cpu0prvpage
 cpu0pp:		.long	0		/* phys addr cpu0 private pg */
 cpu0prvpage:	.long	0		/* relocated version */
 
 		.globl	SMPpt
 SMPptpa:	.long	0		/* phys addr SMP page table */
 SMPpt:		.long	0		/* relocated version */
 #endif /* SMP */
 
 	.globl	IdlePTD
 IdlePTD:	.long	0		/* phys addr of kernel PTD */
 
+#ifdef PAE
+	.globl	IdlePDPT
+IdlePDPT:	.long	0		/* phys addr of kernel PDPT */
+#endif
+
 #ifdef SMP
 	.globl	KPTphys
 #endif
 KPTphys:	.long	0		/* phys addr of kernel page tables */
 
 	.globl	proc0uarea, proc0kstack
 proc0uarea:	.long	0		/* address of proc 0 uarea space */
 proc0kstack:	.long	0		/* address of proc 0 kstack space */
 p0upa:		.long	0		/* phys addr of proc0's UAREA */
 p0kpa:		.long	0		/* phys addr of proc0's STACK */
 
 vm86phystk:	.long	0		/* PA of vm86/bios stack */
 
 	.globl	vm86paddr, vm86pa
 vm86paddr:	.long	0		/* address of vm86 region */
 vm86pa:		.long	0		/* phys addr of vm86 region */
 
 #ifdef PC98
 	.globl	pc98_system_parameter
 pc98_system_parameter:
 	.space	0x240
 #endif
 
 /**********************************************************************
  *
  * Some handy macros
  *
  */
 
 #define R(foo) ((foo)-KERNBASE)
 
 #define ALLOCPAGES(foo) \
 	movl	R(physfree), %esi ; \
 	movl	$((foo)*PAGE_SIZE), %eax ; \
 	addl	%esi, %eax ; \
 	movl	%eax, R(physfree) ; \
 	movl	%esi, %edi ; \
 	movl	$((foo)*PAGE_SIZE),%ecx ; \
 	xorl	%eax,%eax ; \
 	cld ; \
 	rep ; \
 	stosb
 
 /*
  * fillkpt
  *	eax = page frame address
  *	ebx = index into page table
  *	ecx = how many pages to map
  * 	base = base address of page dir/table
  *	prot = protection bits
  */
 #define	fillkpt(base, prot)		  \
 	shll	$PTESHIFT,%ebx		; \
 	addl	base,%ebx		; \
 	orl	$PG_V,%eax		; \
 	orl	prot,%eax		; \
 1:	movl	%eax,(%ebx)		; \
 	addl	$PAGE_SIZE,%eax		; /* increment physical address */ \
 	addl	$PTESIZE,%ebx		; /* next pte */ \
 	loop	1b
 
 /*
  * fillkptphys(prot)
  *	eax = physical address
  *	ecx = how many pages to map
  *	prot = protection bits
  */
 #define	fillkptphys(prot)		  \
 	movl	%eax, %ebx		; \
 	shrl	$PAGE_SHIFT, %ebx	; \
 	fillkpt(R(KPTphys), prot)
 
 	.text
 /**********************************************************************
  *
  * This is where the bootblocks start us, set the ball rolling...
  *
  */
 NON_GPROF_ENTRY(btext)
 
 #ifdef PC98
 	/* save SYSTEM PARAMETER for resume (NS/T or other) */
 	movl	$0xa1400,%esi
 	movl	$R(pc98_system_parameter),%edi
 	movl	$0x0240,%ecx
 	cld
 	rep
 	movsb
 #else	/* IBM-PC */
 /* Tell the bios to warmboot next time */
 	movw	$0x1234,0x472
 #endif	/* PC98 */
 
 /* Set up a real frame in case the double return in newboot is executed. */
 	pushl	%ebp
 	movl	%esp, %ebp
 
 /* Don't trust what the BIOS gives for eflags. */
 	pushl	$PSL_KERNEL
 	popfl
 
 /*
  * Don't trust what the BIOS gives for %fs and %gs.  Trust the bootstrap
  * to set %cs, %ds, %es and %ss.
  */
 	mov	%ds, %ax
 	mov	%ax, %fs
 	mov	%ax, %gs
 
 	call	recover_bootinfo
 
 /* Get onto a stack that we can trust. */
 /*
  * XXX this step is delayed in case recover_bootinfo needs to return via
  * the old stack, but it need not be, since recover_bootinfo actually
  * returns via the old frame.
  */
 	movl	$R(HIDENAME(tmpstk)),%esp
 
 #ifdef PC98
 	/* pc98_machine_type & M_EPSON_PC98 */
 	testb	$0x02,R(pc98_system_parameter)+220
 	jz	3f
 	/* epson_machine_id <= 0x0b */
 	cmpb	$0x0b,R(pc98_system_parameter)+224
 	ja	3f
 
 	/* count up memory */
 	movl	$0x100000,%eax		/* next, talley remaining memory */
 	movl	$0xFFF-0x100,%ecx
 1:	movl	0(%eax),%ebx		/* save location to check */
 	movl	$0xa55a5aa5,0(%eax)	/* write test pattern */
 	cmpl	$0xa55a5aa5,0(%eax)	/* does not check yet for rollover */
 	jne	2f
 	movl	%ebx,0(%eax)		/* restore memory */
 	addl	$PAGE_SIZE,%eax
 	loop	1b
 2:	subl	$0x100000,%eax
 	shrl	$17,%eax
 	movb	%al,R(pc98_system_parameter)+1
 3:
 
 	movw	R(pc98_system_parameter+0x86),%ax
 	movw	%ax,R(cpu_id)
 #endif
 
 	call	identify_cpu
 
 /* clear bss */
 /*
  * XXX this should be done a little earlier.
  *
  * XXX we don't check that there is memory for our bss and page tables
  * before using it.
  *
  * XXX the boot program somewhat bogusly clears the bss.  We still have
  * to do it in case we were unzipped by kzipboot.  Then the boot program
  * only clears kzipboot's bss.
  *
  * XXX the gdt and idt are still somewhere in the boot program.  We
  * depend on the convention that the boot program is below 1MB and we
  * are above 1MB to keep the gdt and idt away from the bss and page
  * tables.
  */
 	movl	$R(end),%ecx
 	movl	$R(edata),%edi
 	subl	%edi,%ecx
 	xorl	%eax,%eax
 	cld
 	rep
 	stosb
 
 	call	create_pagetables
 
 /*
  * If the CPU has support for VME, turn it on.
  */ 
 	testl	$CPUID_VME, R(cpu_feature)
 	jz	1f
 	movl	%cr4, %eax
 	orl	$CR4_VME, %eax
 	movl	%eax, %cr4
 1:
 
 /* Now enable paging */
+#ifdef PAE
+	movl	R(IdlePDPT), %eax
+	movl	%eax, %cr3
+	movl	%cr4, %eax
+	orl	$CR4_PAE, %eax
+	movl	%eax, %cr4
+#else
 	movl	R(IdlePTD), %eax
 	movl	%eax,%cr3		/* load ptd addr into mmu */
+#endif
 	movl	%cr0,%eax		/* get control word */
 	orl	$CR0_PE|CR0_PG,%eax	/* enable paging */
 	movl	%eax,%cr0		/* and let's page NOW! */
 
 	pushl	$begin			/* jump to high virtualized address */
 	ret
 
 /* now running relocated at KERNBASE where the system is linked to run */
 begin:
 	/* set up bootstrap stack */
 	movl	proc0kstack,%eax	/* location of in-kernel stack */
 			/* bootstrap stack end location */
 	leal	(KSTACK_PAGES*PAGE_SIZE-PCB_SIZE)(%eax),%esp
 
 	xorl	%ebp,%ebp		/* mark end of frames */
 
+#ifdef PAE
+	movl	IdlePDPT,%esi
+#else
 	movl	IdlePTD,%esi
+#endif
 	movl	%esi,(KSTACK_PAGES*PAGE_SIZE-PCB_SIZE+PCB_CR3)(%eax)
 
 	pushl	physfree		/* value of first for init386(first) */
 	call	init386			/* wire 386 chip for unix operation */
 
 	/*
 	 * Clean up the stack in a way that db_numargs() understands, so
 	 * that backtraces in ddb don't underrun the stack.  Traps for
 	 * inaccessible memory are more fatal than usual this early.
 	 */
 	addl	$4,%esp
 
 	call	mi_startup		/* autoconfiguration, mountroot etc */
 	/* NOTREACHED */
 	addl	$0,%esp			/* for db_numargs() again */
 
 /*
  * Signal trampoline, copied to top of user stack
  */
 NON_GPROF_ENTRY(sigcode)
 	calll	*SIGF_HANDLER(%esp)
 	leal	SIGF_UC(%esp),%eax	/* get ucontext */
 	pushl	%eax
 	testl	$PSL_VM,UC_EFLAGS(%eax)
 	jne	1f
 	movl	UC_GS(%eax),%gs		/* restore %gs */
 1:
 	movl	$SYS_sigreturn,%eax
 	pushl	%eax			/* junk to fake return addr. */
 	int	$0x80			/* enter kernel with args */
 					/* on stack */
 1:
 	jmp	1b
 
 #ifdef COMPAT_FREEBSD4
 	ALIGN_TEXT
 freebsd4_sigcode:
 	calll	*SIGF_HANDLER(%esp)
 	leal	SIGF_UC4(%esp),%eax	/* get ucontext */
 	pushl	%eax
 	testl	$PSL_VM,UC4_EFLAGS(%eax)
 	jne	1f
 	movl	UC4_GS(%eax),%gs	/* restore %gs */
 1:
 	movl	$344,%eax		/* 4.x SYS_sigreturn */
 	pushl	%eax			/* junk to fake return addr. */
 	int	$0x80			/* enter kernel with args */
 					/* on stack */
 1:
 	jmp	1b
 #endif
 
 #ifdef COMPAT_43
 	ALIGN_TEXT
 osigcode:
 	call	*SIGF_HANDLER(%esp)	/* call signal handler */
 	lea	SIGF_SC(%esp),%eax	/* get sigcontext */
 	pushl	%eax
 	testl	$PSL_VM,SC_PS(%eax)
 	jne	9f
 	movl	SC_GS(%eax),%gs		/* restore %gs */
 9:
 	movl	$103,%eax		/* 3.x SYS_sigreturn */
 	pushl	%eax			/* junk to fake return addr. */
 	int	$0x80			/* enter kernel with args */
 0:	jmp	0b
 #endif /* COMPAT_43 */
 
 	ALIGN_TEXT
 esigcode:
 
 	.data
 	.globl	szsigcode
 szsigcode:
 	.long	esigcode-sigcode
 #ifdef COMPAT_FREEBSD4
 	.globl	szfreebsd4_sigcode
 szfreebsd4_sigcode:
 	.long	esigcode-freebsd4_sigcode
 #endif
 #ifdef COMPAT_43
 	.globl	szosigcode
 szosigcode:
 	.long	esigcode-osigcode
 #endif
 	.text
 
 /**********************************************************************
  *
  * Recover the bootinfo passed to us from the boot program
  *
  */
 recover_bootinfo:
 	/*
 	 * This code is called in different ways depending on what loaded
 	 * and started the kernel.  This is used to detect how we get the
 	 * arguments from the other code and what we do with them.
 	 *
 	 * Old disk boot blocks:
 	 *	(*btext)(howto, bootdev, cyloffset, esym);
 	 *	[return address == 0, and can NOT be returned to]
 	 *	[cyloffset was not supported by the FreeBSD boot code
 	 *	 and always passed in as 0]
 	 *	[esym is also known as total in the boot code, and
 	 *	 was never properly supported by the FreeBSD boot code]
 	 *
 	 * Old diskless netboot code:
 	 *	(*btext)(0,0,0,0,&nfsdiskless,0,0,0);
 	 *	[return address != 0, and can NOT be returned to]
 	 *	If we are being booted by this code it will NOT work,
 	 *	so we are just going to halt if we find this case.
 	 *
 	 * New uniform boot code:
 	 *	(*btext)(howto, bootdev, 0, 0, 0, &bootinfo)
 	 *	[return address != 0, and can be returned to]
 	 *
 	 * There may seem to be a lot of wasted arguments in here, but
 	 * that is so the newer boot code can still load very old kernels
 	 * and old boot code can load new kernels.
 	 */
 
 	/*
 	 * The old style disk boot blocks fake a frame on the stack and
 	 * did an lret to get here.  The frame on the stack has a return
 	 * address of 0.
 	 */
 	cmpl	$0,4(%ebp)
 	je	olddiskboot
 
 	/*
 	 * We have some form of return address, so this is either the
 	 * old diskless netboot code, or the new uniform code.  That can
 	 * be detected by looking at the 5th argument, if it is 0
 	 * we are being booted by the new uniform boot code.
 	 */
 	cmpl	$0,24(%ebp)
 	je	newboot
 
 	/*
 	 * Seems we have been loaded by the old diskless boot code, we
 	 * don't stand a chance of running as the diskless structure
 	 * changed considerably between the two, so just halt.
 	 */
 	 hlt
 
 	/*
 	 * We have been loaded by the new uniform boot code.
 	 * Let's check the bootinfo version, and if we do not understand
 	 * it we return to the loader with a status of 1 to indicate this error
 	 */
 newboot:
 	movl	28(%ebp),%ebx		/* &bootinfo.version */
 	movl	BI_VERSION(%ebx),%eax
 	cmpl	$1,%eax			/* We only understand version 1 */
 	je	1f
 	movl	$1,%eax			/* Return status */
 	leave
 	/*
 	 * XXX this returns to our caller's caller (as is required) since
 	 * we didn't set up a frame and our caller did.
 	 */
 	ret
 
 1:
 	/*
 	 * If we have a kernelname copy it in
 	 */
 	movl	BI_KERNELNAME(%ebx),%esi
 	cmpl	$0,%esi
 	je	2f			/* No kernelname */
 	movl	$MAXPATHLEN,%ecx	/* Brute force!!! */
 	movl	$R(kernelname),%edi
 	cmpb	$'/',(%esi)		/* Make sure it starts with a slash */
 	je	1f
 	movb	$'/',(%edi)
 	incl	%edi
 	decl	%ecx
 1:
 	cld
 	rep
 	movsb
 
 2:
 	/*
 	 * Determine the size of the boot loader's copy of the bootinfo
 	 * struct.  This is impossible to do properly because old versions
 	 * of the struct don't contain a size field and there are 2 old
 	 * versions with the same version number.
 	 */
 	movl	$BI_ENDCOMMON,%ecx	/* prepare for sizeless version */
 	testl	$RB_BOOTINFO,8(%ebp)	/* bi_size (and bootinfo) valid? */
 	je	got_bi_size		/* no, sizeless version */
 	movl	BI_SIZE(%ebx),%ecx
 got_bi_size:
 
 	/*
 	 * Copy the common part of the bootinfo struct
 	 */
 	movl	%ebx,%esi
 	movl	$R(bootinfo),%edi
 	cmpl	$BOOTINFO_SIZE,%ecx
 	jbe	got_common_bi_size
 	movl	$BOOTINFO_SIZE,%ecx
 got_common_bi_size:
 	cld
 	rep
 	movsb
 
 #ifdef NFS_ROOT
 #ifndef BOOTP_NFSV3
 	/*
 	 * If we have a nfs_diskless structure copy it in
 	 */
 	movl	BI_NFS_DISKLESS(%ebx),%esi
 	cmpl	$0,%esi
 	je	olddiskboot
 	movl	$R(nfs_diskless),%edi
 	movl	$NFSDISKLESS_SIZE,%ecx
 	cld
 	rep
 	movsb
 	movl	$R(nfs_diskless_valid),%edi
 	movl	$1,(%edi)
 #endif
 #endif
 
 	/*
 	 * The old style disk boot.
 	 *	(*btext)(howto, bootdev, cyloffset, esym);
 	 * Note that the newer boot code just falls into here to pick
 	 * up howto and bootdev, cyloffset and esym are no longer used
 	 */
 olddiskboot:
 	movl	8(%ebp),%eax
 	movl	%eax,R(boothowto)
 	movl	12(%ebp),%eax
 	movl	%eax,R(bootdev)
 
 	ret
 
 
 /**********************************************************************
  *
  * Identify the CPU and initialize anything special about it
  *
  */
 identify_cpu:
 
 	/* Try to toggle alignment check flag; does not exist on 386. */
 	pushfl
 	popl	%eax
 	movl	%eax,%ecx
 	orl	$PSL_AC,%eax
 	pushl	%eax
 	popfl
 	pushfl
 	popl	%eax
 	xorl	%ecx,%eax
 	andl	$PSL_AC,%eax
 	pushl	%ecx
 	popfl
 
 	testl	%eax,%eax
 	jnz	try486
 
 	/* NexGen CPU does not have aligment check flag. */
 	pushfl
 	movl	$0x5555, %eax
 	xorl	%edx, %edx
 	movl	$2, %ecx
 	clc
 	divl	%ecx
 	jz	trynexgen
 	popfl
 	movl	$CPU_386,R(cpu)
 	jmp	3f
 
 trynexgen:
 	popfl
 	movl	$CPU_NX586,R(cpu)
 	movl	$0x4778654e,R(cpu_vendor)	# store vendor string
 	movl	$0x72446e65,R(cpu_vendor+4)
 	movl	$0x6e657669,R(cpu_vendor+8)
 	movl	$0,R(cpu_vendor+12)
 	jmp	3f
 
 try486:	/* Try to toggle identification flag; does not exist on early 486s. */
 	pushfl
 	popl	%eax
 	movl	%eax,%ecx
 	xorl	$PSL_ID,%eax
 	pushl	%eax
 	popfl
 	pushfl
 	popl	%eax
 	xorl	%ecx,%eax
 	andl	$PSL_ID,%eax
 	pushl	%ecx
 	popfl
 
 	testl	%eax,%eax
 	jnz	trycpuid
 	movl	$CPU_486,R(cpu)
 
 	/*
 	 * Check Cyrix CPU
 	 * Cyrix CPUs do not change the undefined flags following
 	 * execution of the divide instruction which divides 5 by 2.
 	 *
 	 * Note: CPUID is enabled on M2, so it passes another way.
 	 */
 	pushfl
 	movl	$0x5555, %eax
 	xorl	%edx, %edx
 	movl	$2, %ecx
 	clc
 	divl	%ecx
 	jnc	trycyrix
 	popfl
 	jmp	3f		/* You may use Intel CPU. */
 
 trycyrix:
 	popfl
 	/*
 	 * IBM Bluelighting CPU also doesn't change the undefined flags.
 	 * Because IBM doesn't disclose the information for Bluelighting
 	 * CPU, we couldn't distinguish it from Cyrix's (including IBM
 	 * brand of Cyrix CPUs).
 	 */
 	movl	$0x69727943,R(cpu_vendor)	# store vendor string
 	movl	$0x736e4978,R(cpu_vendor+4)
 	movl	$0x64616574,R(cpu_vendor+8)
 	jmp	3f
 
 trycpuid:	/* Use the `cpuid' instruction. */
 	xorl	%eax,%eax
 	cpuid					# cpuid 0
 	movl	%eax,R(cpu_high)		# highest capability
 	movl	%ebx,R(cpu_vendor)		# store vendor string
 	movl	%edx,R(cpu_vendor+4)
 	movl	%ecx,R(cpu_vendor+8)
 	movb	$0,R(cpu_vendor+12)
 
 	movl	$1,%eax
 	cpuid					# cpuid 1
 	movl	%eax,R(cpu_id)			# store cpu_id
 	movl	%ebx,R(cpu_procinfo)		# store cpu_procinfo
 	movl	%edx,R(cpu_feature)		# store cpu_feature
 	rorl	$8,%eax				# extract family type
 	andl	$15,%eax
 	cmpl	$5,%eax
 	jae	1f
 
 	/* less than Pentium; must be 486 */
 	movl	$CPU_486,R(cpu)
 	jmp	3f
 1:
 	/* a Pentium? */
 	cmpl	$5,%eax
 	jne	2f
 	movl	$CPU_586,R(cpu)
 	jmp	3f
 2:
 	/* Greater than Pentium...call it a Pentium Pro */
 	movl	$CPU_686,R(cpu)
 3:
 	ret
 
 
 /**********************************************************************
  *
  * Create the first page directory and its page tables.
  *
  */
 
 create_pagetables:
 
 /* Find end of kernel image (rounded up to a page boundary). */
 	movl	$R(_end),%esi
 
 /* Include symbols, if any. */
 	movl	R(bootinfo+BI_ESYMTAB),%edi
 	testl	%edi,%edi
 	je	over_symalloc
 	movl	%edi,%esi
 	movl	$KERNBASE,%edi
 	addl	%edi,R(bootinfo+BI_SYMTAB)
 	addl	%edi,R(bootinfo+BI_ESYMTAB)
 over_symalloc:
 
 /* If we are told where the end of the kernel space is, believe it. */
 	movl	R(bootinfo+BI_KERNEND),%edi
 	testl	%edi,%edi
 	je	no_kernend
 	movl	%edi,%esi
 no_kernend:
 	
 	addl	$PAGE_MASK,%esi
 	andl	$~PAGE_MASK,%esi
 	movl	%esi,R(KERNend)		/* save end of kernel */
 	movl	%esi,R(physfree)	/* next free page is at end of kernel */
 
 /* Allocate Kernel Page Tables */
 	ALLOCPAGES(NKPT)
 	movl	%esi,R(KPTphys)
 
 /* Allocate Page Table Directory */
+#ifdef PAE
+	/* XXX only need 32 bytes (easier for now) */
+	ALLOCPAGES(1)
+	movl	%esi,R(IdlePDPT)
+#endif
 	ALLOCPAGES(NPGPTD)
 	movl	%esi,R(IdlePTD)
 
 /* Allocate UPAGES */
 	ALLOCPAGES(UAREA_PAGES)
 	movl	%esi,R(p0upa)
 	addl	$KERNBASE, %esi
 	movl	%esi, R(proc0uarea)
 
 	ALLOCPAGES(KSTACK_PAGES)
 	movl	%esi,R(p0kpa)
 	addl	$KERNBASE, %esi
 	movl	%esi, R(proc0kstack)
 
 	ALLOCPAGES(1)			/* vm86/bios stack */
 	movl	%esi,R(vm86phystk)
 
 	ALLOCPAGES(3)			/* pgtable + ext + IOPAGES */
 	movl	%esi,R(vm86pa)
 	addl	$KERNBASE, %esi
 	movl	%esi, R(vm86paddr)
 
 #ifdef SMP
 /* Allocate cpu0's private data page */
 	ALLOCPAGES(1)
 	movl	%esi,R(cpu0pp)
 	addl	$KERNBASE, %esi
 	movl	%esi, R(cpu0prvpage)	/* relocated to KVM space */
 
 /* Allocate SMP page table page */
 	ALLOCPAGES(1)
 	movl	%esi,R(SMPptpa)
 	addl	$KERNBASE, %esi
 	movl	%esi, R(SMPpt)		/* relocated to KVM space */
 #endif	/* SMP */
 
 /* Map read-only from zero to the end of the kernel text section */
 	xorl	%eax, %eax
 	xorl	%edx,%edx
 	movl	$R(etext),%ecx
 	addl	$PAGE_MASK,%ecx
 	shrl	$PAGE_SHIFT,%ecx
 	fillkptphys(%edx)
 
 /* Map read-write, data, bss and symbols */
 	movl	$R(etext),%eax
 	addl	$PAGE_MASK, %eax
 	andl	$~PAGE_MASK, %eax
 	movl	$PG_RW,%edx
 	movl	R(KERNend),%ecx
 	subl	%eax,%ecx
 	shrl	$PAGE_SHIFT,%ecx
 	fillkptphys(%edx)
 
 /* Map page directory. */
+#ifdef PAE
+	movl	R(IdlePDPT), %eax
+	movl	$1, %ecx
+	fillkptphys($PG_RW)
+#endif
+
 	movl	R(IdlePTD), %eax
 	movl	$NPGPTD, %ecx
 	fillkptphys($PG_RW)
 
 /* Map proc0's UPAGES in the physical way ... */
 	movl	R(p0upa), %eax
 	movl	$(UAREA_PAGES), %ecx
 	fillkptphys($PG_RW)
 
 /* Map proc0's KSTACK in the physical way ... */
 	movl	R(p0kpa), %eax
 	movl	$(KSTACK_PAGES), %ecx
 	fillkptphys($PG_RW)
 
 /* Map ISA hole */
 	movl	$ISA_HOLE_START, %eax
 	movl	$ISA_HOLE_LENGTH>>PAGE_SHIFT, %ecx
 	fillkptphys($PG_RW)
 
 /* Map space for the vm86 region */
 	movl	R(vm86phystk), %eax
 	movl	$4, %ecx
 	fillkptphys($PG_RW)
 
 /* Map page 0 into the vm86 page table */
 	movl	$0, %eax
 	movl	$0, %ebx
 	movl	$1, %ecx
 	fillkpt(R(vm86pa), $PG_RW|PG_U)
 
 /* ...likewise for the ISA hole */
 	movl	$ISA_HOLE_START, %eax
 	movl	$ISA_HOLE_START>>PAGE_SHIFT, %ebx
 	movl	$ISA_HOLE_LENGTH>>PAGE_SHIFT, %ecx
 	fillkpt(R(vm86pa), $PG_RW|PG_U)
 
 #ifdef SMP
 /* Map cpu0's private page into global kmem (4K @ cpu0prvpage) */
 	movl	R(cpu0pp), %eax
 	movl	$1, %ecx
 	fillkptphys($PG_RW)
 
 /* Map SMP page table page into global kmem FWIW */
 	movl	R(SMPptpa), %eax
 	movl	$1, %ecx
 	fillkptphys($PG_RW)
 
 /* Map the private page into the SMP page table */
 	movl	R(cpu0pp), %eax
 	movl	$0, %ebx		/* pte offset = 0 */
 	movl	$1, %ecx		/* one private page coming right up */
 	fillkpt(R(SMPptpa), $PG_RW)
 
 /* ... and put the page table table in the pde. */
 	movl	R(SMPptpa), %eax
 	movl	$MPPTDI, %ebx
 	movl	$1, %ecx
 	fillkpt(R(IdlePTD), $PG_RW)
 
 /* Fakeup VA for the local apic to allow early traps. */
 	ALLOCPAGES(1)
 	movl	%esi, %eax
 	movl	$(NPTEPG-1), %ebx	/* pte offset = NTEPG-1 */
 	movl	$1, %ecx		/* one private pt coming right up */
 	fillkpt(R(SMPptpa), $PG_RW)
 #endif	/* SMP */
 
 /* install a pde for temporary double map of bottom of VA */
 	movl	R(KPTphys), %eax
 	xorl	%ebx, %ebx
 	movl	$NKPT, %ecx
 	fillkpt(R(IdlePTD), $PG_RW)
 
 /* install pde's for pt's */
 	movl	R(KPTphys), %eax
 	movl	$KPTDI, %ebx
 	movl	$NKPT, %ecx
 	fillkpt(R(IdlePTD), $PG_RW)
 
 /* install a pde recursively mapping page directory as a page table */
 	movl	R(IdlePTD), %eax
 	movl	$PTDPTDI, %ebx
 	movl	$NPGPTD,%ecx
 	fillkpt(R(IdlePTD), $PG_RW)
+
+#ifdef PAE
+	movl	R(IdlePTD), %eax
+	xorl	%ebx, %ebx
+	movl	$NPGPTD, %ecx
+	fillkpt(R(IdlePDPT), $0x0)
+#endif
 
 	ret
Index: head/sys/amd64/amd64/locore.s
===================================================================
--- head/sys/amd64/amd64/locore.s	(revision 112840)
+++ head/sys/amd64/amd64/locore.s	(revision 112841)
@@ -1,892 +1,927 @@
 /*-
  * Copyright (c) 1990 The Regents of the University of California.
  * All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * William Jolitz.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	from: @(#)locore.s	7.3 (Berkeley) 5/13/91
  * $FreeBSD$
  *
  *		originally from: locore.s, by William F. Jolitz
  *
  *		Substantially rewritten by David Greenman, Rod Grimes,
  *			Bruce Evans, Wolfgang Solfrank, Poul-Henning Kamp
  *			and many others.
  */
 
 #include "opt_bootp.h"
 #include "opt_compat.h"
 #include "opt_nfsroot.h"
 
 #include <sys/syscall.h>
 #include <sys/reboot.h>
 
 #include <machine/asmacros.h>
 #include <machine/cputypes.h>
 #include <machine/psl.h>
 #include <machine/pmap.h>
 #include <machine/specialreg.h>
 
 #include "assym.s"
 
 /*
  *	XXX
  *
  * Note: This version greatly munged to avoid various assembler errors
  * that may be fixed in newer versions of gas. Perhaps newer versions
  * will have more pleasant appearance.
  */
 
 /*
  * PTmap is recursive pagemap at top of virtual address space.
  * Within PTmap, the page directory can be found (third indirection).
  *
  * NOTE: PTDpde, PTmap, and PTD are being defined as address symbols.
  * In C you access them directly, and not with a '*'. Storage is not being 
  * allocated. They will magically address the correct locations in KVM
  * which C will treat as normal variables of the type they are defined in 
  * machine/pmap.h, i.e.  PTDpde = XX ; to set a PDE entry, NOT *PTDpde = XX;
  */
 	.globl	PTmap,PTD,PTDpde
 	.set	PTmap,(PTDPTDI << PDRSHIFT)
 	.set	PTD,PTmap + (PTDPTDI * PAGE_SIZE)
 	.set	PTDpde,PTD + (PTDPTDI * PDESIZE)
 
 /*
  * APTmap, APTD is the alternate recursive pagemap.
  * It's used when modifying another process's page tables.
  * See the note above. It is true here as well.
  */
 	.globl	APTmap,APTD,APTDpde
 	.set	APTmap,APTDPTDI << PDRSHIFT
 	.set	APTD,APTmap + (APTDPTDI * PAGE_SIZE)
 	.set	APTDpde,PTD + (APTDPTDI * PDESIZE)
 
 #ifdef SMP
 /*
  * Define layout of per-cpu address space.
  * This is "constructed" in locore.s on the BSP and in mp_machdep.c
  * for each AP.  DO NOT REORDER THESE WITHOUT UPDATING THE REST!
  */
 	.globl	SMP_prvspace, lapic
 	.set	SMP_prvspace,(MPPTDI << PDRSHIFT)
 	.set	lapic,SMP_prvspace + (NPTEPG-1) * PAGE_SIZE
 #endif /* SMP */
 
 /*
  * Compiled KERNBASE location
  */
 	.globl	kernbase
 	.set	kernbase,KERNBASE
 
 /*
  * Globals
  */
 	.data
 	ALIGN_DATA			/* just to be sure */
 
 	.globl	HIDENAME(tmpstk)
 	.space	0x2000			/* space for tmpstk - temporary stack */
 HIDENAME(tmpstk):
 
 	.globl	bootinfo
 bootinfo:	.space	BOOTINFO_SIZE	/* bootinfo that we can handle */
 
 		.globl KERNend
 KERNend:	.long	0		/* phys addr end of kernel (just after bss) */
 physfree:	.long	0		/* phys addr of next free page */
 
 #ifdef SMP
 		.globl	cpu0prvpage
 cpu0pp:		.long	0		/* phys addr cpu0 private pg */
 cpu0prvpage:	.long	0		/* relocated version */
 
 		.globl	SMPpt
 SMPptpa:	.long	0		/* phys addr SMP page table */
 SMPpt:		.long	0		/* relocated version */
 #endif /* SMP */
 
 	.globl	IdlePTD
 IdlePTD:	.long	0		/* phys addr of kernel PTD */
 
+#ifdef PAE
+	.globl	IdlePDPT
+IdlePDPT:	.long	0		/* phys addr of kernel PDPT */
+#endif
+
 #ifdef SMP
 	.globl	KPTphys
 #endif
 KPTphys:	.long	0		/* phys addr of kernel page tables */
 
 	.globl	proc0uarea, proc0kstack
 proc0uarea:	.long	0		/* address of proc 0 uarea space */
 proc0kstack:	.long	0		/* address of proc 0 kstack space */
 p0upa:		.long	0		/* phys addr of proc0's UAREA */
 p0kpa:		.long	0		/* phys addr of proc0's STACK */
 
 vm86phystk:	.long	0		/* PA of vm86/bios stack */
 
 	.globl	vm86paddr, vm86pa
 vm86paddr:	.long	0		/* address of vm86 region */
 vm86pa:		.long	0		/* phys addr of vm86 region */
 
 #ifdef PC98
 	.globl	pc98_system_parameter
 pc98_system_parameter:
 	.space	0x240
 #endif
 
 /**********************************************************************
  *
  * Some handy macros
  *
  */
 
 #define R(foo) ((foo)-KERNBASE)
 
 #define ALLOCPAGES(foo) \
 	movl	R(physfree), %esi ; \
 	movl	$((foo)*PAGE_SIZE), %eax ; \
 	addl	%esi, %eax ; \
 	movl	%eax, R(physfree) ; \
 	movl	%esi, %edi ; \
 	movl	$((foo)*PAGE_SIZE),%ecx ; \
 	xorl	%eax,%eax ; \
 	cld ; \
 	rep ; \
 	stosb
 
 /*
  * fillkpt
  *	eax = page frame address
  *	ebx = index into page table
  *	ecx = how many pages to map
  * 	base = base address of page dir/table
  *	prot = protection bits
  */
 #define	fillkpt(base, prot)		  \
 	shll	$PTESHIFT,%ebx		; \
 	addl	base,%ebx		; \
 	orl	$PG_V,%eax		; \
 	orl	prot,%eax		; \
 1:	movl	%eax,(%ebx)		; \
 	addl	$PAGE_SIZE,%eax		; /* increment physical address */ \
 	addl	$PTESIZE,%ebx		; /* next pte */ \
 	loop	1b
 
 /*
  * fillkptphys(prot)
  *	eax = physical address
  *	ecx = how many pages to map
  *	prot = protection bits
  */
 #define	fillkptphys(prot)		  \
 	movl	%eax, %ebx		; \
 	shrl	$PAGE_SHIFT, %ebx	; \
 	fillkpt(R(KPTphys), prot)
 
 	.text
 /**********************************************************************
  *
  * This is where the bootblocks start us, set the ball rolling...
  *
  */
 NON_GPROF_ENTRY(btext)
 
 #ifdef PC98
 	/* save SYSTEM PARAMETER for resume (NS/T or other) */
 	movl	$0xa1400,%esi
 	movl	$R(pc98_system_parameter),%edi
 	movl	$0x0240,%ecx
 	cld
 	rep
 	movsb
 #else	/* IBM-PC */
 /* Tell the bios to warmboot next time */
 	movw	$0x1234,0x472
 #endif	/* PC98 */
 
 /* Set up a real frame in case the double return in newboot is executed. */
 	pushl	%ebp
 	movl	%esp, %ebp
 
 /* Don't trust what the BIOS gives for eflags. */
 	pushl	$PSL_KERNEL
 	popfl
 
 /*
  * Don't trust what the BIOS gives for %fs and %gs.  Trust the bootstrap
  * to set %cs, %ds, %es and %ss.
  */
 	mov	%ds, %ax
 	mov	%ax, %fs
 	mov	%ax, %gs
 
 	call	recover_bootinfo
 
 /* Get onto a stack that we can trust. */
 /*
  * XXX this step is delayed in case recover_bootinfo needs to return via
  * the old stack, but it need not be, since recover_bootinfo actually
  * returns via the old frame.
  */
 	movl	$R(HIDENAME(tmpstk)),%esp
 
 #ifdef PC98
 	/* pc98_machine_type & M_EPSON_PC98 */
 	testb	$0x02,R(pc98_system_parameter)+220
 	jz	3f
 	/* epson_machine_id <= 0x0b */
 	cmpb	$0x0b,R(pc98_system_parameter)+224
 	ja	3f
 
 	/* count up memory */
 	movl	$0x100000,%eax		/* next, talley remaining memory */
 	movl	$0xFFF-0x100,%ecx
 1:	movl	0(%eax),%ebx		/* save location to check */
 	movl	$0xa55a5aa5,0(%eax)	/* write test pattern */
 	cmpl	$0xa55a5aa5,0(%eax)	/* does not check yet for rollover */
 	jne	2f
 	movl	%ebx,0(%eax)		/* restore memory */
 	addl	$PAGE_SIZE,%eax
 	loop	1b
 2:	subl	$0x100000,%eax
 	shrl	$17,%eax
 	movb	%al,R(pc98_system_parameter)+1
 3:
 
 	movw	R(pc98_system_parameter+0x86),%ax
 	movw	%ax,R(cpu_id)
 #endif
 
 	call	identify_cpu
 
 /* clear bss */
 /*
  * XXX this should be done a little earlier.
  *
  * XXX we don't check that there is memory for our bss and page tables
  * before using it.
  *
  * XXX the boot program somewhat bogusly clears the bss.  We still have
  * to do it in case we were unzipped by kzipboot.  Then the boot program
  * only clears kzipboot's bss.
  *
  * XXX the gdt and idt are still somewhere in the boot program.  We
  * depend on the convention that the boot program is below 1MB and we
  * are above 1MB to keep the gdt and idt away from the bss and page
  * tables.
  */
 	movl	$R(end),%ecx
 	movl	$R(edata),%edi
 	subl	%edi,%ecx
 	xorl	%eax,%eax
 	cld
 	rep
 	stosb
 
 	call	create_pagetables
 
 /*
  * If the CPU has support for VME, turn it on.
  */ 
 	testl	$CPUID_VME, R(cpu_feature)
 	jz	1f
 	movl	%cr4, %eax
 	orl	$CR4_VME, %eax
 	movl	%eax, %cr4
 1:
 
 /* Now enable paging */
+#ifdef PAE
+	movl	R(IdlePDPT), %eax
+	movl	%eax, %cr3
+	movl	%cr4, %eax
+	orl	$CR4_PAE, %eax
+	movl	%eax, %cr4
+#else
 	movl	R(IdlePTD), %eax
 	movl	%eax,%cr3		/* load ptd addr into mmu */
+#endif
 	movl	%cr0,%eax		/* get control word */
 	orl	$CR0_PE|CR0_PG,%eax	/* enable paging */
 	movl	%eax,%cr0		/* and let's page NOW! */
 
 	pushl	$begin			/* jump to high virtualized address */
 	ret
 
 /* now running relocated at KERNBASE where the system is linked to run */
 begin:
 	/* set up bootstrap stack */
 	movl	proc0kstack,%eax	/* location of in-kernel stack */
 			/* bootstrap stack end location */
 	leal	(KSTACK_PAGES*PAGE_SIZE-PCB_SIZE)(%eax),%esp
 
 	xorl	%ebp,%ebp		/* mark end of frames */
 
+#ifdef PAE
+	movl	IdlePDPT,%esi
+#else
 	movl	IdlePTD,%esi
+#endif
 	movl	%esi,(KSTACK_PAGES*PAGE_SIZE-PCB_SIZE+PCB_CR3)(%eax)
 
 	pushl	physfree		/* value of first for init386(first) */
 	call	init386			/* wire 386 chip for unix operation */
 
 	/*
 	 * Clean up the stack in a way that db_numargs() understands, so
 	 * that backtraces in ddb don't underrun the stack.  Traps for
 	 * inaccessible memory are more fatal than usual this early.
 	 */
 	addl	$4,%esp
 
 	call	mi_startup		/* autoconfiguration, mountroot etc */
 	/* NOTREACHED */
 	addl	$0,%esp			/* for db_numargs() again */
 
 /*
  * Signal trampoline, copied to top of user stack
  */
 NON_GPROF_ENTRY(sigcode)
 	calll	*SIGF_HANDLER(%esp)
 	leal	SIGF_UC(%esp),%eax	/* get ucontext */
 	pushl	%eax
 	testl	$PSL_VM,UC_EFLAGS(%eax)
 	jne	1f
 	movl	UC_GS(%eax),%gs		/* restore %gs */
 1:
 	movl	$SYS_sigreturn,%eax
 	pushl	%eax			/* junk to fake return addr. */
 	int	$0x80			/* enter kernel with args */
 					/* on stack */
 1:
 	jmp	1b
 
 #ifdef COMPAT_FREEBSD4
 	ALIGN_TEXT
 freebsd4_sigcode:
 	calll	*SIGF_HANDLER(%esp)
 	leal	SIGF_UC4(%esp),%eax	/* get ucontext */
 	pushl	%eax
 	testl	$PSL_VM,UC4_EFLAGS(%eax)
 	jne	1f
 	movl	UC4_GS(%eax),%gs	/* restore %gs */
 1:
 	movl	$344,%eax		/* 4.x SYS_sigreturn */
 	pushl	%eax			/* junk to fake return addr. */
 	int	$0x80			/* enter kernel with args */
 					/* on stack */
 1:
 	jmp	1b
 #endif
 
 #ifdef COMPAT_43
 	ALIGN_TEXT
 osigcode:
 	call	*SIGF_HANDLER(%esp)	/* call signal handler */
 	lea	SIGF_SC(%esp),%eax	/* get sigcontext */
 	pushl	%eax
 	testl	$PSL_VM,SC_PS(%eax)
 	jne	9f
 	movl	SC_GS(%eax),%gs		/* restore %gs */
 9:
 	movl	$103,%eax		/* 3.x SYS_sigreturn */
 	pushl	%eax			/* junk to fake return addr. */
 	int	$0x80			/* enter kernel with args */
 0:	jmp	0b
 #endif /* COMPAT_43 */
 
 	ALIGN_TEXT
 esigcode:
 
 	.data
 	.globl	szsigcode
 szsigcode:
 	.long	esigcode-sigcode
 #ifdef COMPAT_FREEBSD4
 	.globl	szfreebsd4_sigcode
 szfreebsd4_sigcode:
 	.long	esigcode-freebsd4_sigcode
 #endif
 #ifdef COMPAT_43
 	.globl	szosigcode
 szosigcode:
 	.long	esigcode-osigcode
 #endif
 	.text
 
 /**********************************************************************
  *
  * Recover the bootinfo passed to us from the boot program
  *
  */
 recover_bootinfo:
 	/*
 	 * This code is called in different ways depending on what loaded
 	 * and started the kernel.  This is used to detect how we get the
 	 * arguments from the other code and what we do with them.
 	 *
 	 * Old disk boot blocks:
 	 *	(*btext)(howto, bootdev, cyloffset, esym);
 	 *	[return address == 0, and can NOT be returned to]
 	 *	[cyloffset was not supported by the FreeBSD boot code
 	 *	 and always passed in as 0]
 	 *	[esym is also known as total in the boot code, and
 	 *	 was never properly supported by the FreeBSD boot code]
 	 *
 	 * Old diskless netboot code:
 	 *	(*btext)(0,0,0,0,&nfsdiskless,0,0,0);
 	 *	[return address != 0, and can NOT be returned to]
 	 *	If we are being booted by this code it will NOT work,
 	 *	so we are just going to halt if we find this case.
 	 *
 	 * New uniform boot code:
 	 *	(*btext)(howto, bootdev, 0, 0, 0, &bootinfo)
 	 *	[return address != 0, and can be returned to]
 	 *
 	 * There may seem to be a lot of wasted arguments in here, but
 	 * that is so the newer boot code can still load very old kernels
 	 * and old boot code can load new kernels.
 	 */
 
 	/*
 	 * The old style disk boot blocks fake a frame on the stack and
 	 * did an lret to get here.  The frame on the stack has a return
 	 * address of 0.
 	 */
 	cmpl	$0,4(%ebp)
 	je	olddiskboot
 
 	/*
 	 * We have some form of return address, so this is either the
 	 * old diskless netboot code, or the new uniform code.  That can
 	 * be detected by looking at the 5th argument, if it is 0
 	 * we are being booted by the new uniform boot code.
 	 */
 	cmpl	$0,24(%ebp)
 	je	newboot
 
 	/*
 	 * Seems we have been loaded by the old diskless boot code, we
 	 * don't stand a chance of running as the diskless structure
 	 * changed considerably between the two, so just halt.
 	 */
 	 hlt
 
 	/*
 	 * We have been loaded by the new uniform boot code.
 	 * Let's check the bootinfo version, and if we do not understand
 	 * it we return to the loader with a status of 1 to indicate this error
 	 */
 newboot:
 	movl	28(%ebp),%ebx		/* &bootinfo.version */
 	movl	BI_VERSION(%ebx),%eax
 	cmpl	$1,%eax			/* We only understand version 1 */
 	je	1f
 	movl	$1,%eax			/* Return status */
 	leave
 	/*
 	 * XXX this returns to our caller's caller (as is required) since
 	 * we didn't set up a frame and our caller did.
 	 */
 	ret
 
 1:
 	/*
 	 * If we have a kernelname copy it in
 	 */
 	movl	BI_KERNELNAME(%ebx),%esi
 	cmpl	$0,%esi
 	je	2f			/* No kernelname */
 	movl	$MAXPATHLEN,%ecx	/* Brute force!!! */
 	movl	$R(kernelname),%edi
 	cmpb	$'/',(%esi)		/* Make sure it starts with a slash */
 	je	1f
 	movb	$'/',(%edi)
 	incl	%edi
 	decl	%ecx
 1:
 	cld
 	rep
 	movsb
 
 2:
 	/*
 	 * Determine the size of the boot loader's copy of the bootinfo
 	 * struct.  This is impossible to do properly because old versions
 	 * of the struct don't contain a size field and there are 2 old
 	 * versions with the same version number.
 	 */
 	movl	$BI_ENDCOMMON,%ecx	/* prepare for sizeless version */
 	testl	$RB_BOOTINFO,8(%ebp)	/* bi_size (and bootinfo) valid? */
 	je	got_bi_size		/* no, sizeless version */
 	movl	BI_SIZE(%ebx),%ecx
 got_bi_size:
 
 	/*
 	 * Copy the common part of the bootinfo struct
 	 */
 	movl	%ebx,%esi
 	movl	$R(bootinfo),%edi
 	cmpl	$BOOTINFO_SIZE,%ecx
 	jbe	got_common_bi_size
 	movl	$BOOTINFO_SIZE,%ecx
 got_common_bi_size:
 	cld
 	rep
 	movsb
 
 #ifdef NFS_ROOT
 #ifndef BOOTP_NFSV3
 	/*
 	 * If we have a nfs_diskless structure copy it in
 	 */
 	movl	BI_NFS_DISKLESS(%ebx),%esi
 	cmpl	$0,%esi
 	je	olddiskboot
 	movl	$R(nfs_diskless),%edi
 	movl	$NFSDISKLESS_SIZE,%ecx
 	cld
 	rep
 	movsb
 	movl	$R(nfs_diskless_valid),%edi
 	movl	$1,(%edi)
 #endif
 #endif
 
 	/*
 	 * The old style disk boot.
 	 *	(*btext)(howto, bootdev, cyloffset, esym);
 	 * Note that the newer boot code just falls into here to pick
 	 * up howto and bootdev, cyloffset and esym are no longer used
 	 */
 olddiskboot:
 	movl	8(%ebp),%eax
 	movl	%eax,R(boothowto)
 	movl	12(%ebp),%eax
 	movl	%eax,R(bootdev)
 
 	ret
 
 
 /**********************************************************************
  *
  * Identify the CPU and initialize anything special about it
  *
  */
 identify_cpu:
 
 	/* Try to toggle alignment check flag; does not exist on 386. */
 	pushfl
 	popl	%eax
 	movl	%eax,%ecx
 	orl	$PSL_AC,%eax
 	pushl	%eax
 	popfl
 	pushfl
 	popl	%eax
 	xorl	%ecx,%eax
 	andl	$PSL_AC,%eax
 	pushl	%ecx
 	popfl
 
 	testl	%eax,%eax
 	jnz	try486
 
 	/* NexGen CPU does not have aligment check flag. */
 	pushfl
 	movl	$0x5555, %eax
 	xorl	%edx, %edx
 	movl	$2, %ecx
 	clc
 	divl	%ecx
 	jz	trynexgen
 	popfl
 	movl	$CPU_386,R(cpu)
 	jmp	3f
 
 trynexgen:
 	popfl
 	movl	$CPU_NX586,R(cpu)
 	movl	$0x4778654e,R(cpu_vendor)	# store vendor string
 	movl	$0x72446e65,R(cpu_vendor+4)
 	movl	$0x6e657669,R(cpu_vendor+8)
 	movl	$0,R(cpu_vendor+12)
 	jmp	3f
 
 try486:	/* Try to toggle identification flag; does not exist on early 486s. */
 	pushfl
 	popl	%eax
 	movl	%eax,%ecx
 	xorl	$PSL_ID,%eax
 	pushl	%eax
 	popfl
 	pushfl
 	popl	%eax
 	xorl	%ecx,%eax
 	andl	$PSL_ID,%eax
 	pushl	%ecx
 	popfl
 
 	testl	%eax,%eax
 	jnz	trycpuid
 	movl	$CPU_486,R(cpu)
 
 	/*
 	 * Check Cyrix CPU
 	 * Cyrix CPUs do not change the undefined flags following
 	 * execution of the divide instruction which divides 5 by 2.
 	 *
 	 * Note: CPUID is enabled on M2, so it passes another way.
 	 */
 	pushfl
 	movl	$0x5555, %eax
 	xorl	%edx, %edx
 	movl	$2, %ecx
 	clc
 	divl	%ecx
 	jnc	trycyrix
 	popfl
 	jmp	3f		/* You may use Intel CPU. */
 
 trycyrix:
 	popfl
 	/*
 	 * IBM Bluelighting CPU also doesn't change the undefined flags.
 	 * Because IBM doesn't disclose the information for Bluelighting
 	 * CPU, we couldn't distinguish it from Cyrix's (including IBM
 	 * brand of Cyrix CPUs).
 	 */
 	movl	$0x69727943,R(cpu_vendor)	# store vendor string
 	movl	$0x736e4978,R(cpu_vendor+4)
 	movl	$0x64616574,R(cpu_vendor+8)
 	jmp	3f
 
 trycpuid:	/* Use the `cpuid' instruction. */
 	xorl	%eax,%eax
 	cpuid					# cpuid 0
 	movl	%eax,R(cpu_high)		# highest capability
 	movl	%ebx,R(cpu_vendor)		# store vendor string
 	movl	%edx,R(cpu_vendor+4)
 	movl	%ecx,R(cpu_vendor+8)
 	movb	$0,R(cpu_vendor+12)
 
 	movl	$1,%eax
 	cpuid					# cpuid 1
 	movl	%eax,R(cpu_id)			# store cpu_id
 	movl	%ebx,R(cpu_procinfo)		# store cpu_procinfo
 	movl	%edx,R(cpu_feature)		# store cpu_feature
 	rorl	$8,%eax				# extract family type
 	andl	$15,%eax
 	cmpl	$5,%eax
 	jae	1f
 
 	/* less than Pentium; must be 486 */
 	movl	$CPU_486,R(cpu)
 	jmp	3f
 1:
 	/* a Pentium? */
 	cmpl	$5,%eax
 	jne	2f
 	movl	$CPU_586,R(cpu)
 	jmp	3f
 2:
 	/* Greater than Pentium...call it a Pentium Pro */
 	movl	$CPU_686,R(cpu)
 3:
 	ret
 
 
 /**********************************************************************
  *
  * Create the first page directory and its page tables.
  *
  */
 
 create_pagetables:
 
 /* Find end of kernel image (rounded up to a page boundary). */
 	movl	$R(_end),%esi
 
 /* Include symbols, if any. */
 	movl	R(bootinfo+BI_ESYMTAB),%edi
 	testl	%edi,%edi
 	je	over_symalloc
 	movl	%edi,%esi
 	movl	$KERNBASE,%edi
 	addl	%edi,R(bootinfo+BI_SYMTAB)
 	addl	%edi,R(bootinfo+BI_ESYMTAB)
 over_symalloc:
 
 /* If we are told where the end of the kernel space is, believe it. */
 	movl	R(bootinfo+BI_KERNEND),%edi
 	testl	%edi,%edi
 	je	no_kernend
 	movl	%edi,%esi
 no_kernend:
 	
 	addl	$PAGE_MASK,%esi
 	andl	$~PAGE_MASK,%esi
 	movl	%esi,R(KERNend)		/* save end of kernel */
 	movl	%esi,R(physfree)	/* next free page is at end of kernel */
 
 /* Allocate Kernel Page Tables */
 	ALLOCPAGES(NKPT)
 	movl	%esi,R(KPTphys)
 
 /* Allocate Page Table Directory */
+#ifdef PAE
+	/* XXX only need 32 bytes (easier for now) */
+	ALLOCPAGES(1)
+	movl	%esi,R(IdlePDPT)
+#endif
 	ALLOCPAGES(NPGPTD)
 	movl	%esi,R(IdlePTD)
 
 /* Allocate UPAGES */
 	ALLOCPAGES(UAREA_PAGES)
 	movl	%esi,R(p0upa)
 	addl	$KERNBASE, %esi
 	movl	%esi, R(proc0uarea)
 
 	ALLOCPAGES(KSTACK_PAGES)
 	movl	%esi,R(p0kpa)
 	addl	$KERNBASE, %esi
 	movl	%esi, R(proc0kstack)
 
 	ALLOCPAGES(1)			/* vm86/bios stack */
 	movl	%esi,R(vm86phystk)
 
 	ALLOCPAGES(3)			/* pgtable + ext + IOPAGES */
 	movl	%esi,R(vm86pa)
 	addl	$KERNBASE, %esi
 	movl	%esi, R(vm86paddr)
 
 #ifdef SMP
 /* Allocate cpu0's private data page */
 	ALLOCPAGES(1)
 	movl	%esi,R(cpu0pp)
 	addl	$KERNBASE, %esi
 	movl	%esi, R(cpu0prvpage)	/* relocated to KVM space */
 
 /* Allocate SMP page table page */
 	ALLOCPAGES(1)
 	movl	%esi,R(SMPptpa)
 	addl	$KERNBASE, %esi
 	movl	%esi, R(SMPpt)		/* relocated to KVM space */
 #endif	/* SMP */
 
 /* Map read-only from zero to the end of the kernel text section */
 	xorl	%eax, %eax
 	xorl	%edx,%edx
 	movl	$R(etext),%ecx
 	addl	$PAGE_MASK,%ecx
 	shrl	$PAGE_SHIFT,%ecx
 	fillkptphys(%edx)
 
 /* Map read-write, data, bss and symbols */
 	movl	$R(etext),%eax
 	addl	$PAGE_MASK, %eax
 	andl	$~PAGE_MASK, %eax
 	movl	$PG_RW,%edx
 	movl	R(KERNend),%ecx
 	subl	%eax,%ecx
 	shrl	$PAGE_SHIFT,%ecx
 	fillkptphys(%edx)
 
 /* Map page directory. */
+#ifdef PAE
+	movl	R(IdlePDPT), %eax
+	movl	$1, %ecx
+	fillkptphys($PG_RW)
+#endif
+
 	movl	R(IdlePTD), %eax
 	movl	$NPGPTD, %ecx
 	fillkptphys($PG_RW)
 
 /* Map proc0's UPAGES in the physical way ... */
 	movl	R(p0upa), %eax
 	movl	$(UAREA_PAGES), %ecx
 	fillkptphys($PG_RW)
 
 /* Map proc0's KSTACK in the physical way ... */
 	movl	R(p0kpa), %eax
 	movl	$(KSTACK_PAGES), %ecx
 	fillkptphys($PG_RW)
 
 /* Map ISA hole */
 	movl	$ISA_HOLE_START, %eax
 	movl	$ISA_HOLE_LENGTH>>PAGE_SHIFT, %ecx
 	fillkptphys($PG_RW)
 
 /* Map space for the vm86 region */
 	movl	R(vm86phystk), %eax
 	movl	$4, %ecx
 	fillkptphys($PG_RW)
 
 /* Map page 0 into the vm86 page table */
 	movl	$0, %eax
 	movl	$0, %ebx
 	movl	$1, %ecx
 	fillkpt(R(vm86pa), $PG_RW|PG_U)
 
 /* ...likewise for the ISA hole */
 	movl	$ISA_HOLE_START, %eax
 	movl	$ISA_HOLE_START>>PAGE_SHIFT, %ebx
 	movl	$ISA_HOLE_LENGTH>>PAGE_SHIFT, %ecx
 	fillkpt(R(vm86pa), $PG_RW|PG_U)
 
 #ifdef SMP
 /* Map cpu0's private page into global kmem (4K @ cpu0prvpage) */
 	movl	R(cpu0pp), %eax
 	movl	$1, %ecx
 	fillkptphys($PG_RW)
 
 /* Map SMP page table page into global kmem FWIW */
 	movl	R(SMPptpa), %eax
 	movl	$1, %ecx
 	fillkptphys($PG_RW)
 
 /* Map the private page into the SMP page table */
 	movl	R(cpu0pp), %eax
 	movl	$0, %ebx		/* pte offset = 0 */
 	movl	$1, %ecx		/* one private page coming right up */
 	fillkpt(R(SMPptpa), $PG_RW)
 
 /* ... and put the page table table in the pde. */
 	movl	R(SMPptpa), %eax
 	movl	$MPPTDI, %ebx
 	movl	$1, %ecx
 	fillkpt(R(IdlePTD), $PG_RW)
 
 /* Fakeup VA for the local apic to allow early traps. */
 	ALLOCPAGES(1)
 	movl	%esi, %eax
 	movl	$(NPTEPG-1), %ebx	/* pte offset = NTEPG-1 */
 	movl	$1, %ecx		/* one private pt coming right up */
 	fillkpt(R(SMPptpa), $PG_RW)
 #endif	/* SMP */
 
 /* install a pde for temporary double map of bottom of VA */
 	movl	R(KPTphys), %eax
 	xorl	%ebx, %ebx
 	movl	$NKPT, %ecx
 	fillkpt(R(IdlePTD), $PG_RW)
 
 /* install pde's for pt's */
 	movl	R(KPTphys), %eax
 	movl	$KPTDI, %ebx
 	movl	$NKPT, %ecx
 	fillkpt(R(IdlePTD), $PG_RW)
 
 /* install a pde recursively mapping page directory as a page table */
 	movl	R(IdlePTD), %eax
 	movl	$PTDPTDI, %ebx
 	movl	$NPGPTD,%ecx
 	fillkpt(R(IdlePTD), $PG_RW)
+
+#ifdef PAE
+	movl	R(IdlePTD), %eax
+	xorl	%ebx, %ebx
+	movl	$NPGPTD, %ecx
+	fillkpt(R(IdlePDPT), $0x0)
+#endif
 
 	ret
Index: head/sys/amd64/amd64/machdep.c
===================================================================
--- head/sys/amd64/amd64/machdep.c	(revision 112840)
+++ head/sys/amd64/amd64/machdep.c	(revision 112841)
@@ -1,2731 +1,2741 @@
 /*-
  * Copyright (c) 1992 Terrence R. Lambert.
  * Copyright (c) 1982, 1987, 1990 The Regents of the University of California.
  * All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * William Jolitz.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	from: @(#)machdep.c	7.4 (Berkeley) 6/3/91
  * $FreeBSD$
  */
 
 #include "opt_atalk.h"
 #include "opt_compat.h"
 #include "opt_cpu.h"
 #include "opt_ddb.h"
 #include "opt_inet.h"
 #include "opt_ipx.h"
 #include "opt_isa.h"
 #include "opt_maxmem.h"
 #include "opt_msgbuf.h"
 #include "opt_npx.h"
 #include "opt_perfmon.h"
 #include "opt_kstack_pages.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/sysproto.h>
 #include <sys/signalvar.h>
 #include <sys/imgact.h>
 #include <sys/kernel.h>
 #include <sys/ktr.h>
 #include <sys/linker.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mutex.h>
 #include <sys/pcpu.h>
 #include <sys/proc.h>
 #include <sys/bio.h>
 #include <sys/buf.h>
 #include <sys/reboot.h>
 #include <sys/callout.h>
 #include <sys/msgbuf.h>
 #include <sys/sched.h>
 #include <sys/sysent.h>
 #include <sys/sysctl.h>
 #include <sys/ucontext.h>
 #include <sys/vmmeter.h>
 #include <sys/bus.h>
 #include <sys/eventhandler.h>
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_object.h>
 #include <vm/vm_page.h>
 #include <vm/vm_map.h>
 #include <vm/vm_pager.h>
 #include <vm/vm_extern.h>
 
 #include <sys/user.h>
 #include <sys/exec.h>
 #include <sys/cons.h>
 
 #include <ddb/ddb.h>
 
 #include <net/netisr.h>
 
 #include <machine/cpu.h>
 #include <machine/cputypes.h>
 #include <machine/reg.h>
 #include <machine/clock.h>
 #include <machine/specialreg.h>
 #include <machine/bootinfo.h>
 #include <machine/md_var.h>
 #include <machine/pc/bios.h>
 #include <machine/pcb_ext.h>		/* pcb.h included via sys/user.h */
 #include <machine/proc.h>
 #ifdef PERFMON
 #include <machine/perfmon.h>
 #endif
 #ifdef SMP
 #include <machine/privatespace.h>
 #include <machine/smp.h>
 #endif
 
 #include <i386/isa/icu.h>
 #include <i386/isa/intr_machdep.h>
 #include <isa/rtc.h>
 #include <machine/vm86.h>
 #include <sys/ptrace.h>
 #include <machine/sigframe.h>
 
 extern void init386(int first);
 extern void dblfault_handler(void);
 
 extern void printcpuinfo(void);	/* XXX header file */
 extern void finishidentcpu(void);
 extern void panicifcpuunsupported(void);
 extern void initializecpu(void);
 
 #define	CS_SECURE(cs)		(ISPL(cs) == SEL_UPL)
 #define	EFL_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
 
 #if !defined(CPU_ENABLE_SSE) && defined(I686_CPU)
 #define CPU_ENABLE_SSE
 #endif
 #if defined(CPU_DISABLE_SSE)
 #undef CPU_ENABLE_SSE
 #endif
 
 static void cpu_startup(void *);
 static void fpstate_drop(struct thread *td);
 static void get_fpcontext(struct thread *td, mcontext_t *mcp);
 static int  set_fpcontext(struct thread *td, const mcontext_t *mcp);
 #ifdef CPU_ENABLE_SSE
 static void set_fpregs_xmm(struct save87 *, struct savexmm *);
 static void fill_fpregs_xmm(struct savexmm *, struct save87 *);
 #endif /* CPU_ENABLE_SSE */
 SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL)
 
 int	_udatasel, _ucodesel;
 u_int	atdevbase;
 
 #if defined(SWTCH_OPTIM_STATS)
 extern int swtch_optim_stats;
 SYSCTL_INT(_debug, OID_AUTO, swtch_optim_stats,
 	CTLFLAG_RD, &swtch_optim_stats, 0, "");
 SYSCTL_INT(_debug, OID_AUTO, tlb_flush_count,
 	CTLFLAG_RD, &tlb_flush_count, 0, "");
 #endif
 
 int cold = 1;
 
 #ifdef COMPAT_43
 static void osendsig(sig_t catcher, int sig, sigset_t *mask, u_long code);
 #endif
 #ifdef COMPAT_FREEBSD4
 static void freebsd4_sendsig(sig_t catcher, int sig, sigset_t *mask,
     u_long code);
 #endif
 
 long Maxmem = 0;
 
 vm_paddr_t phys_avail[10];
 
 /* must be 2 less so 0 0 can signal end of chunks */
 #define PHYS_AVAIL_ARRAY_END ((sizeof(phys_avail) / sizeof(vm_offset_t)) - 2)
 
 struct kva_md_info kmi;
 
 static struct trapframe proc0_tf;
 #ifndef SMP
 static struct pcpu __pcpu;
 #endif
 
 struct mtx icu_lock;
 
 static void
 cpu_startup(dummy)
 	void *dummy;
 {
 	/*
 	 * Good {morning,afternoon,evening,night}.
 	 */
 	startrtclock();
 	printcpuinfo();
 	panicifcpuunsupported();
 #ifdef PERFMON
 	perfmon_init();
 #endif
 	printf("real memory  = %ju (%ju MB)\n", ptoa((uintmax_t)Maxmem),
 	    ptoa((uintmax_t)Maxmem) / 1048576);
 	/*
 	 * Display any holes after the first chunk of extended memory.
 	 */
 	if (bootverbose) {
 		int indx;
 
 		printf("Physical memory chunk(s):\n");
 		for (indx = 0; phys_avail[indx + 1] != 0; indx += 2) {
 			vm_paddr_t size;
 
 			size = phys_avail[indx + 1] - phys_avail[indx];
 			printf(
 			    "0x%016jx - 0x%016jx, %ju bytes (%ju pages)\n",
 			    (uintmax_t)phys_avail[indx],
 			    (uintmax_t)phys_avail[indx + 1] - 1,
 			    (uintmax_t)size, (uintmax_t)size / PAGE_SIZE);
 		}
 	}
 
 	vm_ksubmap_init(&kmi);
 
 	printf("avail memory = %ju (%ju MB)\n",
 	    ptoa((uintmax_t)cnt.v_free_count),
 	    ptoa((uintmax_t)cnt.v_free_count) / 1048576);
 
 	/*
 	 * Set up buffers, so they can be used to read disk labels.
 	 */
 	bufinit();
 	vm_pager_bufferinit();
 
 #ifndef SMP
 	/* For SMP, we delay the cpu_setregs() until after SMP startup. */
 	cpu_setregs();
 #endif
 }
 
 /*
  * Send an interrupt to process.
  *
  * Stack is set up to allow sigcode stored
  * at top to call routine, followed by kcall
  * to sigreturn routine below.  After sigreturn
  * resets the signal mask, the stack, and the
  * frame pointer, it returns to the user
  * specified pc, psl.
  */
 #ifdef COMPAT_43
 static void
 osendsig(catcher, sig, mask, code)
 	sig_t catcher;
 	int sig;
 	sigset_t *mask;
 	u_long code;
 {
 	struct osigframe sf, *fp;
 	struct proc *p;
 	struct thread *td;
 	struct sigacts *psp;
 	struct trapframe *regs;
 	int oonstack;
 
 	td = curthread;
 	p = td->td_proc;
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 	psp = p->p_sigacts;
 	regs = td->td_frame;
 	oonstack = sigonstack(regs->tf_esp);
 
 	/* Allocate space for the signal handler context. */
 	if ((p->p_flag & P_ALTSTACK) && !oonstack &&
 	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
 		fp = (struct osigframe *)(p->p_sigstk.ss_sp +
 		    p->p_sigstk.ss_size - sizeof(struct osigframe));
 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
 		p->p_sigstk.ss_flags |= SS_ONSTACK;
 #endif
 	} else
 		fp = (struct osigframe *)regs->tf_esp - 1;
 	PROC_UNLOCK(p);
 
 	/* Translate the signal if appropriate. */
 	if (p->p_sysent->sv_sigtbl && sig <= p->p_sysent->sv_sigsize)
 		sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
 
 	/* Build the argument list for the signal handler. */
 	sf.sf_signum = sig;
 	sf.sf_scp = (register_t)&fp->sf_siginfo.si_sc;
 	PROC_LOCK(p);
 	if (SIGISMEMBER(p->p_sigacts->ps_siginfo, sig)) {
 		/* Signal handler installed with SA_SIGINFO. */
 		sf.sf_arg2 = (register_t)&fp->sf_siginfo;
 		sf.sf_siginfo.si_signo = sig;
 		sf.sf_siginfo.si_code = code;
 		sf.sf_ahu.sf_action = (__osiginfohandler_t *)catcher;
 	} else {
 		/* Old FreeBSD-style arguments. */
 		sf.sf_arg2 = code;
 		sf.sf_addr = regs->tf_err;
 		sf.sf_ahu.sf_handler = catcher;
 	}
 	PROC_UNLOCK(p);
 
 	/* Save most if not all of trap frame. */
 	sf.sf_siginfo.si_sc.sc_eax = regs->tf_eax;
 	sf.sf_siginfo.si_sc.sc_ebx = regs->tf_ebx;
 	sf.sf_siginfo.si_sc.sc_ecx = regs->tf_ecx;
 	sf.sf_siginfo.si_sc.sc_edx = regs->tf_edx;
 	sf.sf_siginfo.si_sc.sc_esi = regs->tf_esi;
 	sf.sf_siginfo.si_sc.sc_edi = regs->tf_edi;
 	sf.sf_siginfo.si_sc.sc_cs = regs->tf_cs;
 	sf.sf_siginfo.si_sc.sc_ds = regs->tf_ds;
 	sf.sf_siginfo.si_sc.sc_ss = regs->tf_ss;
 	sf.sf_siginfo.si_sc.sc_es = regs->tf_es;
 	sf.sf_siginfo.si_sc.sc_fs = regs->tf_fs;
 	sf.sf_siginfo.si_sc.sc_gs = rgs();
 	sf.sf_siginfo.si_sc.sc_isp = regs->tf_isp;
 
 	/* Build the signal context to be used by osigreturn(). */
 	sf.sf_siginfo.si_sc.sc_onstack = (oonstack) ? 1 : 0;
 	SIG2OSIG(*mask, sf.sf_siginfo.si_sc.sc_mask);
 	sf.sf_siginfo.si_sc.sc_sp = regs->tf_esp;
 	sf.sf_siginfo.si_sc.sc_fp = regs->tf_ebp;
 	sf.sf_siginfo.si_sc.sc_pc = regs->tf_eip;
 	sf.sf_siginfo.si_sc.sc_ps = regs->tf_eflags;
 	sf.sf_siginfo.si_sc.sc_trapno = regs->tf_trapno;
 	sf.sf_siginfo.si_sc.sc_err = regs->tf_err;
 
 	/*
 	 * If we're a vm86 process, we want to save the segment registers.
 	 * We also change eflags to be our emulated eflags, not the actual
 	 * eflags.
 	 */
 	if (regs->tf_eflags & PSL_VM) {
 		/* XXX confusing names: `tf' isn't a trapframe; `regs' is. */
 		struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs;
 		struct vm86_kernel *vm86 = &td->td_pcb->pcb_ext->ext_vm86;
 
 		sf.sf_siginfo.si_sc.sc_gs = tf->tf_vm86_gs;
 		sf.sf_siginfo.si_sc.sc_fs = tf->tf_vm86_fs;
 		sf.sf_siginfo.si_sc.sc_es = tf->tf_vm86_es;
 		sf.sf_siginfo.si_sc.sc_ds = tf->tf_vm86_ds;
 
 		if (vm86->vm86_has_vme == 0)
 			sf.sf_siginfo.si_sc.sc_ps =
 			    (tf->tf_eflags & ~(PSL_VIF | PSL_VIP)) |
 			    (vm86->vm86_eflags & (PSL_VIF | PSL_VIP));
 
 		/* See sendsig() for comments. */
 		tf->tf_eflags &= ~(PSL_VM | PSL_NT | PSL_VIF | PSL_VIP);
 	}
 
 	/*
 	 * Copy the sigframe out to the user's stack.
 	 */
 	if (copyout(&sf, fp, sizeof(*fp)) != 0) {
 #ifdef DEBUG
 		printf("process %ld has trashed its stack\n", (long)p->p_pid);
 #endif
 		PROC_LOCK(p);
 		sigexit(td, SIGILL);
 	}
 
 	regs->tf_esp = (int)fp;
 	regs->tf_eip = PS_STRINGS - szosigcode;
 	regs->tf_eflags &= ~PSL_T;
 	regs->tf_cs = _ucodesel;
 	regs->tf_ds = _udatasel;
 	regs->tf_es = _udatasel;
 	regs->tf_fs = _udatasel;
 	load_gs(_udatasel);
 	regs->tf_ss = _udatasel;
 	PROC_LOCK(p);
 }
 #endif /* COMPAT_43 */
 
 #ifdef COMPAT_FREEBSD4
 static void
 freebsd4_sendsig(catcher, sig, mask, code)
 	sig_t catcher;
 	int sig;
 	sigset_t *mask;
 	u_long code;
 {
 	struct sigframe4 sf, *sfp;
 	struct proc *p;
 	struct thread *td;
 	struct sigacts *psp;
 	struct trapframe *regs;
 	int oonstack;
 
 	td = curthread;
 	p = td->td_proc;
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 	psp = p->p_sigacts;
 	regs = td->td_frame;
 	oonstack = sigonstack(regs->tf_esp);
 
 	/* Save user context. */
 	bzero(&sf, sizeof(sf));
 	sf.sf_uc.uc_sigmask = *mask;
 	sf.sf_uc.uc_stack = p->p_sigstk;
 	sf.sf_uc.uc_stack.ss_flags = (p->p_flag & P_ALTSTACK)
 	    ? ((oonstack) ? SS_ONSTACK : 0) : SS_DISABLE;
 	sf.sf_uc.uc_mcontext.mc_onstack = (oonstack) ? 1 : 0;
 	sf.sf_uc.uc_mcontext.mc_gs = rgs();
 	bcopy(regs, &sf.sf_uc.uc_mcontext.mc_fs, sizeof(*regs));
 
 	/* Allocate space for the signal handler context. */
 	if ((p->p_flag & P_ALTSTACK) != 0 && !oonstack &&
 	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
 		sfp = (struct sigframe4 *)(p->p_sigstk.ss_sp +
 		    p->p_sigstk.ss_size - sizeof(struct sigframe4));
 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
 		p->p_sigstk.ss_flags |= SS_ONSTACK;
 #endif
 	} else
 		sfp = (struct sigframe4 *)regs->tf_esp - 1;
 	PROC_UNLOCK(p);
 
 	/* Translate the signal if appropriate. */
 	if (p->p_sysent->sv_sigtbl && sig <= p->p_sysent->sv_sigsize)
 		sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
 
 	/* Build the argument list for the signal handler. */
 	sf.sf_signum = sig;
 	sf.sf_ucontext = (register_t)&sfp->sf_uc;
 	PROC_LOCK(p);
 	if (SIGISMEMBER(p->p_sigacts->ps_siginfo, sig)) {
 		/* Signal handler installed with SA_SIGINFO. */
 		sf.sf_siginfo = (register_t)&sfp->sf_si;
 		sf.sf_ahu.sf_action = (__siginfohandler_t *)catcher;
 
 		/* Fill in POSIX parts */
 		sf.sf_si.si_signo = sig;
 		sf.sf_si.si_code = code;
 		sf.sf_si.si_addr = (void *)regs->tf_err;
 	} else {
 		/* Old FreeBSD-style arguments. */
 		sf.sf_siginfo = code;
 		sf.sf_addr = regs->tf_err;
 		sf.sf_ahu.sf_handler = catcher;
 	}
 	PROC_UNLOCK(p);
 
 	/*
 	 * If we're a vm86 process, we want to save the segment registers.
 	 * We also change eflags to be our emulated eflags, not the actual
 	 * eflags.
 	 */
 	if (regs->tf_eflags & PSL_VM) {
 		struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs;
 		struct vm86_kernel *vm86 = &td->td_pcb->pcb_ext->ext_vm86;
 
 		sf.sf_uc.uc_mcontext.mc_gs = tf->tf_vm86_gs;
 		sf.sf_uc.uc_mcontext.mc_fs = tf->tf_vm86_fs;
 		sf.sf_uc.uc_mcontext.mc_es = tf->tf_vm86_es;
 		sf.sf_uc.uc_mcontext.mc_ds = tf->tf_vm86_ds;
 
 		if (vm86->vm86_has_vme == 0)
 			sf.sf_uc.uc_mcontext.mc_eflags =
 			    (tf->tf_eflags & ~(PSL_VIF | PSL_VIP)) |
 			    (vm86->vm86_eflags & (PSL_VIF | PSL_VIP));
 
 		/*
 		 * Clear PSL_NT to inhibit T_TSSFLT faults on return from
 		 * syscalls made by the signal handler.  This just avoids
 		 * wasting time for our lazy fixup of such faults.  PSL_NT
 		 * does nothing in vm86 mode, but vm86 programs can set it
 		 * almost legitimately in probes for old cpu types.
 		 */
 		tf->tf_eflags &= ~(PSL_VM | PSL_NT | PSL_VIF | PSL_VIP);
 	}
 
 	/*
 	 * Copy the sigframe out to the user's stack.
 	 */
 	if (copyout(&sf, sfp, sizeof(*sfp)) != 0) {
 #ifdef DEBUG
 		printf("process %ld has trashed its stack\n", (long)p->p_pid);
 #endif
 		PROC_LOCK(p);
 		sigexit(td, SIGILL);
 	}
 
 	regs->tf_esp = (int)sfp;
 	regs->tf_eip = PS_STRINGS - szfreebsd4_sigcode;
 	regs->tf_eflags &= ~PSL_T;
 	regs->tf_cs = _ucodesel;
 	regs->tf_ds = _udatasel;
 	regs->tf_es = _udatasel;
 	regs->tf_fs = _udatasel;
 	regs->tf_ss = _udatasel;
 	PROC_LOCK(p);
 }
 #endif	/* COMPAT_FREEBSD4 */
 
 void
 sendsig(catcher, sig, mask, code)
 	sig_t catcher;
 	int sig;
 	sigset_t *mask;
 	u_long code;
 {
 	struct sigframe sf, *sfp;
 	struct proc *p;
 	struct thread *td;
 	struct sigacts *psp;
 	char *sp;
 	struct trapframe *regs;
 	int oonstack;
 
 	td = curthread;
 	p = td->td_proc;
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 	psp = p->p_sigacts;
 #ifdef COMPAT_FREEBSD4
 	if (SIGISMEMBER(psp->ps_freebsd4, sig)) {
 		freebsd4_sendsig(catcher, sig, mask, code);
 		return;
 	}
 #endif
 #ifdef COMPAT_43
 	if (SIGISMEMBER(psp->ps_osigset, sig)) {
 		osendsig(catcher, sig, mask, code);
 		return;
 	}
 #endif
 	regs = td->td_frame;
 	oonstack = sigonstack(regs->tf_esp);
 
 	/* Save user context. */
 	bzero(&sf, sizeof(sf));
 	sf.sf_uc.uc_sigmask = *mask;
 	sf.sf_uc.uc_stack = p->p_sigstk;
 	sf.sf_uc.uc_stack.ss_flags = (p->p_flag & P_ALTSTACK)
 	    ? ((oonstack) ? SS_ONSTACK : 0) : SS_DISABLE;
 	sf.sf_uc.uc_mcontext.mc_onstack = (oonstack) ? 1 : 0;
 	sf.sf_uc.uc_mcontext.mc_gs = rgs();
 	bcopy(regs, &sf.sf_uc.uc_mcontext.mc_fs, sizeof(*regs));
 	sf.sf_uc.uc_mcontext.mc_len = sizeof(sf.sf_uc.uc_mcontext); /* magic */
 	get_fpcontext(td, &sf.sf_uc.uc_mcontext);
 	fpstate_drop(td);
 
 	/* Allocate space for the signal handler context. */
 	if ((p->p_flag & P_ALTSTACK) != 0 && !oonstack &&
 	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
 		sp = p->p_sigstk.ss_sp +
 		    p->p_sigstk.ss_size - sizeof(struct sigframe);
 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
 		p->p_sigstk.ss_flags |= SS_ONSTACK;
 #endif
 	} else
 		sp = (char *)regs->tf_esp - sizeof(struct sigframe);
 	/* Align to 16 bytes. */
 	sfp = (struct sigframe *)((unsigned int)sp & ~0xF);
 	PROC_UNLOCK(p);
 
 	/* Translate the signal if appropriate. */
 	if (p->p_sysent->sv_sigtbl && sig <= p->p_sysent->sv_sigsize)
 		sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
 
 	/* Build the argument list for the signal handler. */
 	sf.sf_signum = sig;
 	sf.sf_ucontext = (register_t)&sfp->sf_uc;
 	PROC_LOCK(p);
 	if (SIGISMEMBER(p->p_sigacts->ps_siginfo, sig)) {
 		/* Signal handler installed with SA_SIGINFO. */
 		sf.sf_siginfo = (register_t)&sfp->sf_si;
 		sf.sf_ahu.sf_action = (__siginfohandler_t *)catcher;
 
 		/* Fill in POSIX parts */
 		sf.sf_si.si_signo = sig;
 		sf.sf_si.si_code = code;
 		sf.sf_si.si_addr = (void *)regs->tf_err;
 	} else {
 		/* Old FreeBSD-style arguments. */
 		sf.sf_siginfo = code;
 		sf.sf_addr = regs->tf_err;
 		sf.sf_ahu.sf_handler = catcher;
 	}
 	PROC_UNLOCK(p);
 
 	/*
 	 * If we're a vm86 process, we want to save the segment registers.
 	 * We also change eflags to be our emulated eflags, not the actual
 	 * eflags.
 	 */
 	if (regs->tf_eflags & PSL_VM) {
 		struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs;
 		struct vm86_kernel *vm86 = &td->td_pcb->pcb_ext->ext_vm86;
 
 		sf.sf_uc.uc_mcontext.mc_gs = tf->tf_vm86_gs;
 		sf.sf_uc.uc_mcontext.mc_fs = tf->tf_vm86_fs;
 		sf.sf_uc.uc_mcontext.mc_es = tf->tf_vm86_es;
 		sf.sf_uc.uc_mcontext.mc_ds = tf->tf_vm86_ds;
 
 		if (vm86->vm86_has_vme == 0)
 			sf.sf_uc.uc_mcontext.mc_eflags =
 			    (tf->tf_eflags & ~(PSL_VIF | PSL_VIP)) |
 			    (vm86->vm86_eflags & (PSL_VIF | PSL_VIP));
 
 		/*
 		 * Clear PSL_NT to inhibit T_TSSFLT faults on return from
 		 * syscalls made by the signal handler.  This just avoids
 		 * wasting time for our lazy fixup of such faults.  PSL_NT
 		 * does nothing in vm86 mode, but vm86 programs can set it
 		 * almost legitimately in probes for old cpu types.
 		 */
 		tf->tf_eflags &= ~(PSL_VM | PSL_NT | PSL_VIF | PSL_VIP);
 	}
 
 	/*
 	 * Copy the sigframe out to the user's stack.
 	 */
 	if (copyout(&sf, sfp, sizeof(*sfp)) != 0) {
 #ifdef DEBUG
 		printf("process %ld has trashed its stack\n", (long)p->p_pid);
 #endif
 		PROC_LOCK(p);
 		sigexit(td, SIGILL);
 	}
 
 	regs->tf_esp = (int)sfp;
 	regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode);
 	regs->tf_eflags &= ~PSL_T;
 	regs->tf_cs = _ucodesel;
 	regs->tf_ds = _udatasel;
 	regs->tf_es = _udatasel;
 	regs->tf_fs = _udatasel;
 	regs->tf_ss = _udatasel;
 	PROC_LOCK(p);
 }
 
 /*
  * System call to cleanup state after a signal
  * has been taken.  Reset signal mask and
  * stack state from context left by sendsig (above).
  * Return to previous pc and psl as specified by
  * context left by sendsig. Check carefully to
  * make sure that the user has not modified the
  * state to gain improper privileges.
  *
  * MPSAFE
  */
 #ifdef COMPAT_43
 int
 osigreturn(td, uap)
 	struct thread *td;
 	struct osigreturn_args /* {
 		struct osigcontext *sigcntxp;
 	} */ *uap;
 {
 	struct osigcontext sc;
 	struct trapframe *regs;
 	struct osigcontext *scp;
 	struct proc *p = td->td_proc;
 	int eflags, error;
 
 	regs = td->td_frame;
 	error = copyin(uap->sigcntxp, &sc, sizeof(sc));
 	if (error != 0)
 		return (error);
 	scp = &sc;
 	eflags = scp->sc_ps;
 	if (eflags & PSL_VM) {
 		struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs;
 		struct vm86_kernel *vm86;
 
 		/*
 		 * if pcb_ext == 0 or vm86_inited == 0, the user hasn't
 		 * set up the vm86 area, and we can't enter vm86 mode.
 		 */
 		if (td->td_pcb->pcb_ext == 0)
 			return (EINVAL);
 		vm86 = &td->td_pcb->pcb_ext->ext_vm86;
 		if (vm86->vm86_inited == 0)
 			return (EINVAL);
 
 		/* Go back to user mode if both flags are set. */
 		if ((eflags & PSL_VIP) && (eflags & PSL_VIF))
 			trapsignal(p, SIGBUS, 0);
 
 		if (vm86->vm86_has_vme) {
 			eflags = (tf->tf_eflags & ~VME_USERCHANGE) |
 			    (eflags & VME_USERCHANGE) | PSL_VM;
 		} else {
 			vm86->vm86_eflags = eflags;	/* save VIF, VIP */
 			eflags = (tf->tf_eflags & ~VM_USERCHANGE) |
 			    (eflags & VM_USERCHANGE) | PSL_VM;
 		}
 		tf->tf_vm86_ds = scp->sc_ds;
 		tf->tf_vm86_es = scp->sc_es;
 		tf->tf_vm86_fs = scp->sc_fs;
 		tf->tf_vm86_gs = scp->sc_gs;
 		tf->tf_ds = _udatasel;
 		tf->tf_es = _udatasel;
 		tf->tf_fs = _udatasel;
 	} else {
 		/*
 		 * Don't allow users to change privileged or reserved flags.
 		 */
 		/*
 		 * XXX do allow users to change the privileged flag PSL_RF.
 		 * The cpu sets PSL_RF in tf_eflags for faults.  Debuggers
 		 * should sometimes set it there too.  tf_eflags is kept in
 		 * the signal context during signal handling and there is no
 		 * other place to remember it, so the PSL_RF bit may be
 		 * corrupted by the signal handler without us knowing.
 		 * Corruption of the PSL_RF bit at worst causes one more or
 		 * one less debugger trap, so allowing it is fairly harmless.
 		 */
 		if (!EFL_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) {
 	    		return (EINVAL);
 		}
 
 		/*
 		 * Don't allow users to load a valid privileged %cs.  Let the
 		 * hardware check for invalid selectors, excess privilege in
 		 * other selectors, invalid %eip's and invalid %esp's.
 		 */
 		if (!CS_SECURE(scp->sc_cs)) {
 			trapsignal(p, SIGBUS, T_PROTFLT);
 			return (EINVAL);
 		}
 		regs->tf_ds = scp->sc_ds;
 		regs->tf_es = scp->sc_es;
 		regs->tf_fs = scp->sc_fs;
 	}
 
 	/* Restore remaining registers. */
 	regs->tf_eax = scp->sc_eax;
 	regs->tf_ebx = scp->sc_ebx;
 	regs->tf_ecx = scp->sc_ecx;
 	regs->tf_edx = scp->sc_edx;
 	regs->tf_esi = scp->sc_esi;
 	regs->tf_edi = scp->sc_edi;
 	regs->tf_cs = scp->sc_cs;
 	regs->tf_ss = scp->sc_ss;
 	regs->tf_isp = scp->sc_isp;
 	regs->tf_ebp = scp->sc_fp;
 	regs->tf_esp = scp->sc_sp;
 	regs->tf_eip = scp->sc_pc;
 	regs->tf_eflags = eflags;
 
 	PROC_LOCK(p);
 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
 	if (scp->sc_onstack & 1)
 		p->p_sigstk.ss_flags |= SS_ONSTACK;
 	else
 		p->p_sigstk.ss_flags &= ~SS_ONSTACK;
 #endif
 	SIGSETOLD(p->p_sigmask, scp->sc_mask);
 	SIG_CANTMASK(p->p_sigmask);
 	signotify(p);
 	PROC_UNLOCK(p);
 	return (EJUSTRETURN);
 }
 #endif /* COMPAT_43 */
 
 #ifdef COMPAT_FREEBSD4
 /*
  * MPSAFE
  */
 int
 freebsd4_sigreturn(td, uap)
 	struct thread *td;
 	struct freebsd4_sigreturn_args /* {
 		const ucontext4 *sigcntxp;
 	} */ *uap;
 {
 	struct ucontext4 uc;
 	struct proc *p = td->td_proc;
 	struct trapframe *regs;
 	const struct ucontext4 *ucp;
 	int cs, eflags, error;
 
 	error = copyin(uap->sigcntxp, &uc, sizeof(uc));
 	if (error != 0)
 		return (error);
 	ucp = &uc;
 	regs = td->td_frame;
 	eflags = ucp->uc_mcontext.mc_eflags;
 	if (eflags & PSL_VM) {
 		struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs;
 		struct vm86_kernel *vm86;
 
 		/*
 		 * if pcb_ext == 0 or vm86_inited == 0, the user hasn't
 		 * set up the vm86 area, and we can't enter vm86 mode.
 		 */
 		if (td->td_pcb->pcb_ext == 0)
 			return (EINVAL);
 		vm86 = &td->td_pcb->pcb_ext->ext_vm86;
 		if (vm86->vm86_inited == 0)
 			return (EINVAL);
 
 		/* Go back to user mode if both flags are set. */
 		if ((eflags & PSL_VIP) && (eflags & PSL_VIF))
 			trapsignal(p, SIGBUS, 0);
 
 		if (vm86->vm86_has_vme) {
 			eflags = (tf->tf_eflags & ~VME_USERCHANGE) |
 			    (eflags & VME_USERCHANGE) | PSL_VM;
 		} else {
 			vm86->vm86_eflags = eflags;	/* save VIF, VIP */
 			eflags = (tf->tf_eflags & ~VM_USERCHANGE) |
 			    (eflags & VM_USERCHANGE) | PSL_VM;
 		}
 		bcopy(&ucp->uc_mcontext.mc_fs, tf, sizeof(struct trapframe));
 		tf->tf_eflags = eflags;
 		tf->tf_vm86_ds = tf->tf_ds;
 		tf->tf_vm86_es = tf->tf_es;
 		tf->tf_vm86_fs = tf->tf_fs;
 		tf->tf_vm86_gs = ucp->uc_mcontext.mc_gs;
 		tf->tf_ds = _udatasel;
 		tf->tf_es = _udatasel;
 		tf->tf_fs = _udatasel;
 	} else {
 		/*
 		 * Don't allow users to change privileged or reserved flags.
 		 */
 		/*
 		 * XXX do allow users to change the privileged flag PSL_RF.
 		 * The cpu sets PSL_RF in tf_eflags for faults.  Debuggers
 		 * should sometimes set it there too.  tf_eflags is kept in
 		 * the signal context during signal handling and there is no
 		 * other place to remember it, so the PSL_RF bit may be
 		 * corrupted by the signal handler without us knowing.
 		 * Corruption of the PSL_RF bit at worst causes one more or
 		 * one less debugger trap, so allowing it is fairly harmless.
 		 */
 		if (!EFL_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) {
 			printf("freebsd4_sigreturn: eflags = 0x%x\n", eflags);
 	    		return (EINVAL);
 		}
 
 		/*
 		 * Don't allow users to load a valid privileged %cs.  Let the
 		 * hardware check for invalid selectors, excess privilege in
 		 * other selectors, invalid %eip's and invalid %esp's.
 		 */
 		cs = ucp->uc_mcontext.mc_cs;
 		if (!CS_SECURE(cs)) {
 			printf("freebsd4_sigreturn: cs = 0x%x\n", cs);
 			trapsignal(p, SIGBUS, T_PROTFLT);
 			return (EINVAL);
 		}
 
 		bcopy(&ucp->uc_mcontext.mc_fs, regs, sizeof(*regs));
 	}
 
 	PROC_LOCK(p);
 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
 	if (ucp->uc_mcontext.mc_onstack & 1)
 		p->p_sigstk.ss_flags |= SS_ONSTACK;
 	else
 		p->p_sigstk.ss_flags &= ~SS_ONSTACK;
 #endif
 
 	p->p_sigmask = ucp->uc_sigmask;
 	SIG_CANTMASK(p->p_sigmask);
 	signotify(p);
 	PROC_UNLOCK(p);
 	return (EJUSTRETURN);
 }
 #endif	/* COMPAT_FREEBSD4 */
 
 /*
  * MPSAFE
  */
 int
 sigreturn(td, uap)
 	struct thread *td;
 	struct sigreturn_args /* {
 		const __ucontext *sigcntxp;
 	} */ *uap;
 {
 	ucontext_t uc;
 	struct proc *p = td->td_proc;
 	struct trapframe *regs;
 	const ucontext_t *ucp;
 	int cs, eflags, error, ret;
 
 	error = copyin(uap->sigcntxp, &uc, sizeof(uc));
 	if (error != 0)
 		return (error);
 	ucp = &uc;
 	regs = td->td_frame;
 	eflags = ucp->uc_mcontext.mc_eflags;
 	if (eflags & PSL_VM) {
 		struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs;
 		struct vm86_kernel *vm86;
 
 		/*
 		 * if pcb_ext == 0 or vm86_inited == 0, the user hasn't
 		 * set up the vm86 area, and we can't enter vm86 mode.
 		 */
 		if (td->td_pcb->pcb_ext == 0)
 			return (EINVAL);
 		vm86 = &td->td_pcb->pcb_ext->ext_vm86;
 		if (vm86->vm86_inited == 0)
 			return (EINVAL);
 
 		/* Go back to user mode if both flags are set. */
 		if ((eflags & PSL_VIP) && (eflags & PSL_VIF))
 			trapsignal(p, SIGBUS, 0);
 
 		if (vm86->vm86_has_vme) {
 			eflags = (tf->tf_eflags & ~VME_USERCHANGE) |
 			    (eflags & VME_USERCHANGE) | PSL_VM;
 		} else {
 			vm86->vm86_eflags = eflags;	/* save VIF, VIP */
 			eflags = (tf->tf_eflags & ~VM_USERCHANGE) |
 			    (eflags & VM_USERCHANGE) | PSL_VM;
 		}
 		bcopy(&ucp->uc_mcontext.mc_fs, tf, sizeof(struct trapframe));
 		tf->tf_eflags = eflags;
 		tf->tf_vm86_ds = tf->tf_ds;
 		tf->tf_vm86_es = tf->tf_es;
 		tf->tf_vm86_fs = tf->tf_fs;
 		tf->tf_vm86_gs = ucp->uc_mcontext.mc_gs;
 		tf->tf_ds = _udatasel;
 		tf->tf_es = _udatasel;
 		tf->tf_fs = _udatasel;
 	} else {
 		/*
 		 * Don't allow users to change privileged or reserved flags.
 		 */
 		/*
 		 * XXX do allow users to change the privileged flag PSL_RF.
 		 * The cpu sets PSL_RF in tf_eflags for faults.  Debuggers
 		 * should sometimes set it there too.  tf_eflags is kept in
 		 * the signal context during signal handling and there is no
 		 * other place to remember it, so the PSL_RF bit may be
 		 * corrupted by the signal handler without us knowing.
 		 * Corruption of the PSL_RF bit at worst causes one more or
 		 * one less debugger trap, so allowing it is fairly harmless.
 		 */
 		if (!EFL_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) {
 			printf("sigreturn: eflags = 0x%x\n", eflags);
 	    		return (EINVAL);
 		}
 
 		/*
 		 * Don't allow users to load a valid privileged %cs.  Let the
 		 * hardware check for invalid selectors, excess privilege in
 		 * other selectors, invalid %eip's and invalid %esp's.
 		 */
 		cs = ucp->uc_mcontext.mc_cs;
 		if (!CS_SECURE(cs)) {
 			printf("sigreturn: cs = 0x%x\n", cs);
 			trapsignal(p, SIGBUS, T_PROTFLT);
 			return (EINVAL);
 		}
 
 		ret = set_fpcontext(td, &ucp->uc_mcontext);
 		if (ret != 0)
 			return (ret);
 		bcopy(&ucp->uc_mcontext.mc_fs, regs, sizeof(*regs));
 	}
 
 	PROC_LOCK(p);
 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
 	if (ucp->uc_mcontext.mc_onstack & 1)
 		p->p_sigstk.ss_flags |= SS_ONSTACK;
 	else
 		p->p_sigstk.ss_flags &= ~SS_ONSTACK;
 #endif
 
 	p->p_sigmask = ucp->uc_sigmask;
 	SIG_CANTMASK(p->p_sigmask);
 	signotify(p);
 	PROC_UNLOCK(p);
 	return (EJUSTRETURN);
 }
 
 /*
  * Machine dependent boot() routine
  *
  * I haven't seen anything to put here yet
  * Possibly some stuff might be grafted back here from boot()
  */
 void
 cpu_boot(int howto)
 {
 }
 
 /*
  * Shutdown the CPU as much as possible
  */
 void
 cpu_halt(void)
 {
 	for (;;)
 		__asm__ ("hlt");
 }
 
 /*
  * Hook to idle the CPU when possible.  In the SMP case we default to
  * off because a halted cpu will not currently pick up a new thread in the
  * run queue until the next timer tick.  If turned on this will result in
  * approximately a 4.2% loss in real time performance in buildworld tests
  * (but improves user and sys times oddly enough), and saves approximately
  * 5% in power consumption on an idle machine (tests w/2xCPU 1.1GHz P3).
  *
  * XXX we need to have a cpu mask of idle cpus and generate an IPI or
  * otherwise generate some sort of interrupt to wake up cpus sitting in HLT.
  * Then we can have our cake and eat it too.
  *
  * XXX I'm turning it on for SMP as well by default for now.  It seems to
  * help lock contention somewhat, and this is critical for HTT. -Peter
  */
 static int	cpu_idle_hlt = 1;
 SYSCTL_INT(_machdep, OID_AUTO, cpu_idle_hlt, CTLFLAG_RW,
     &cpu_idle_hlt, 0, "Idle loop HLT enable");
 
 /*
  * Note that we have to be careful here to avoid a race between checking
  * sched_runnable() and actually halting.  If we don't do this, we may waste
  * the time between calling hlt and the next interrupt even though there
  * is a runnable process.
  */
 void
 cpu_idle(void)
 {
 
 #ifdef SMP
 	if (mp_grab_cpu_hlt())
 		return;
 #endif
 
 	if (cpu_idle_hlt) {
 		disable_intr();
   		if (sched_runnable()) {
 			enable_intr();
 		} else {
 			/*
 			 * we must absolutely guarentee that hlt is the
 			 * absolute next instruction after sti or we
 			 * introduce a timing window.
 			 */
 			__asm __volatile("sti; hlt");
 		}
 	}
 }
 
 /*
  * Clear registers on exec
  */
 void
 exec_setregs(td, entry, stack, ps_strings)
 	struct thread *td;
 	u_long entry;
 	u_long stack;
 	u_long ps_strings;
 {
 	struct trapframe *regs = td->td_frame;
 	struct pcb *pcb = td->td_pcb;
 
 	/* Reset pc->pcb_gs and %gs before possibly invalidating it. */
 	pcb->pcb_gs = _udatasel;
 	load_gs(_udatasel);
 
 	if (td->td_proc->p_md.md_ldt)
 		user_ldt_free(td);
   
 	bzero((char *)regs, sizeof(struct trapframe));
 	regs->tf_eip = entry;
 	regs->tf_esp = stack;
 	regs->tf_eflags = PSL_USER | (regs->tf_eflags & PSL_T);
 	regs->tf_ss = _udatasel;
 	regs->tf_ds = _udatasel;
 	regs->tf_es = _udatasel;
 	regs->tf_fs = _udatasel;
 	regs->tf_cs = _ucodesel;
 
 	/* PS_STRINGS value for BSD/OS binaries.  It is 0 for non-BSD/OS. */
 	regs->tf_ebx = ps_strings;
 
         /*
          * Reset the hardware debug registers if they were in use.
          * They won't have any meaning for the newly exec'd process.  
          */
         if (pcb->pcb_flags & PCB_DBREGS) {
                 pcb->pcb_dr0 = 0;
                 pcb->pcb_dr1 = 0;
                 pcb->pcb_dr2 = 0;
                 pcb->pcb_dr3 = 0;
                 pcb->pcb_dr6 = 0;
                 pcb->pcb_dr7 = 0;
                 if (pcb == PCPU_GET(curpcb)) {
 		        /*
 			 * Clear the debug registers on the running
 			 * CPU, otherwise they will end up affecting
 			 * the next process we switch to.
 			 */
 		        reset_dbregs();
                 }
                 pcb->pcb_flags &= ~PCB_DBREGS;
         }
 
 	/*
 	 * Initialize the math emulator (if any) for the current process.
 	 * Actually, just clear the bit that says that the emulator has
 	 * been initialized.  Initialization is delayed until the process
 	 * traps to the emulator (if it is done at all) mainly because
 	 * emulators don't provide an entry point for initialization.
 	 */
 	td->td_pcb->pcb_flags &= ~FP_SOFTFP;
 
 	/*
 	 * Arrange to trap the next npx or `fwait' instruction (see npx.c
 	 * for why fwait must be trapped at least if there is an npx or an
 	 * emulator).  This is mainly to handle the case where npx0 is not
 	 * configured, since the npx routines normally set up the trap
 	 * otherwise.  It should be done only at boot time, but doing it
 	 * here allows modifying `npx_exists' for testing the emulator on
 	 * systems with an npx.
 	 */
 	load_cr0(rcr0() | CR0_MP | CR0_TS);
 
 	/* Initialize the npx (if any) for the current process. */
 	/*
 	 * XXX the above load_cr0() also initializes it and is a layering
 	 * violation if NPX is configured.  It drops the npx partially
 	 * and this would be fatal if we were interrupted now, and decided
 	 * to force the state to the pcb, and checked the invariant
 	 * (CR0_TS clear) if and only if PCPU_GET(fpcurthread) != NULL).
 	 * ALL of this can happen except the check.  The check used to
 	 * happen and be fatal later when we didn't complete the drop
 	 * before returning to user mode.  This should be fixed properly
 	 * soon.
 	 */
 	fpstate_drop(td);
 
 	/*
 	 * XXX - Linux emulator
 	 * Make sure sure edx is 0x0 on entry. Linux binaries depend
 	 * on it.
 	 */
 	td->td_retval[1] = 0;
 }
 
 void
 cpu_setregs(void)
 {
 	unsigned int cr0;
 
 	cr0 = rcr0();
 #ifdef SMP
 	cr0 |= CR0_NE;			/* Done by npxinit() */
 #endif
 	cr0 |= CR0_MP | CR0_TS;		/* Done at every execve() too. */
 #ifndef I386_CPU
 	cr0 |= CR0_WP | CR0_AM;
 #endif
 	load_cr0(cr0);
 	load_gs(_udatasel);
 }
 
 static int
 sysctl_machdep_adjkerntz(SYSCTL_HANDLER_ARGS)
 {
 	int error;
 	error = sysctl_handle_int(oidp, oidp->oid_arg1, oidp->oid_arg2,
 		req);
 	if (!error && req->newptr)
 		resettodr();
 	return (error);
 }
 
 SYSCTL_PROC(_machdep, CPU_ADJKERNTZ, adjkerntz, CTLTYPE_INT|CTLFLAG_RW,
 	&adjkerntz, 0, sysctl_machdep_adjkerntz, "I", "");
 
 SYSCTL_INT(_machdep, CPU_DISRTCSET, disable_rtc_set,
 	CTLFLAG_RW, &disable_rtc_set, 0, "");
 
 SYSCTL_STRUCT(_machdep, CPU_BOOTINFO, bootinfo, 
 	CTLFLAG_RD, &bootinfo, bootinfo, "");
 
 SYSCTL_INT(_machdep, CPU_WALLCLOCK, wall_cmos_clock,
 	CTLFLAG_RW, &wall_cmos_clock, 0, "");
 
 u_long bootdev;		/* not a dev_t - encoding is different */
 SYSCTL_ULONG(_machdep, OID_AUTO, guessed_bootdev,
 	CTLFLAG_RD, &bootdev, 0, "Maybe the Boot device (not in dev_t format)");
 
 /*
  * Initialize 386 and configure to run kernel
  */
 
 /*
  * Initialize segments & interrupt table
  */
 
 int _default_ldt;
 union descriptor gdt[NGDT * MAXCPU];	/* global descriptor table */
 static struct gate_descriptor idt0[NIDT];
 struct gate_descriptor *idt = &idt0[0];	/* interrupt descriptor table */
 union descriptor ldt[NLDT];		/* local descriptor table */
 #ifdef SMP
 /* table descriptors - used to load tables by microp */
 struct region_descriptor r_gdt, r_idt;
 #endif
 
 int private_tss;			/* flag indicating private tss */
 
 #if defined(I586_CPU) && !defined(NO_F00F_HACK)
 extern int has_f00f_bug;
 #endif
 
 static struct i386tss dblfault_tss;
 static char dblfault_stack[PAGE_SIZE];
 
 extern  struct user	*proc0uarea;
 extern  vm_offset_t	proc0kstack;
 
 
 /* software prototypes -- in more palatable form */
 struct soft_segment_descriptor gdt_segs[] = {
 /* GNULL_SEL	0 Null Descriptor */
 {	0x0,			/* segment base address  */
 	0x0,			/* length */
 	0,			/* segment type */
 	0,			/* segment descriptor priority level */
 	0,			/* segment descriptor present */
 	0, 0,
 	0,			/* default 32 vs 16 bit size */
 	0  			/* limit granularity (byte/page units)*/ },
 /* GCODE_SEL	1 Code Descriptor for kernel */
 {	0x0,			/* segment base address  */
 	0xfffff,		/* length - all address space */
 	SDT_MEMERA,		/* segment type */
 	0,			/* segment descriptor priority level */
 	1,			/* segment descriptor present */
 	0, 0,
 	1,			/* default 32 vs 16 bit size */
 	1  			/* limit granularity (byte/page units)*/ },
 /* GDATA_SEL	2 Data Descriptor for kernel */
 {	0x0,			/* segment base address  */
 	0xfffff,		/* length - all address space */
 	SDT_MEMRWA,		/* segment type */
 	0,			/* segment descriptor priority level */
 	1,			/* segment descriptor present */
 	0, 0,
 	1,			/* default 32 vs 16 bit size */
 	1  			/* limit granularity (byte/page units)*/ },
 /* GPRIV_SEL	3 SMP Per-Processor Private Data Descriptor */
 {	0x0,			/* segment base address  */
 	0xfffff,		/* length - all address space */
 	SDT_MEMRWA,		/* segment type */
 	0,			/* segment descriptor priority level */
 	1,			/* segment descriptor present */
 	0, 0,
 	1,			/* default 32 vs 16 bit size */
 	1  			/* limit granularity (byte/page units)*/ },
 /* GPROC0_SEL	4 Proc 0 Tss Descriptor */
 {
 	0x0,			/* segment base address */
 	sizeof(struct i386tss)-1,/* length - all address space */
 	SDT_SYS386TSS,		/* segment type */
 	0,			/* segment descriptor priority level */
 	1,			/* segment descriptor present */
 	0, 0,
 	0,			/* unused - default 32 vs 16 bit size */
 	0  			/* limit granularity (byte/page units)*/ },
 /* GLDT_SEL	5 LDT Descriptor */
 {	(int) ldt,		/* segment base address  */
 	sizeof(ldt)-1,		/* length - all address space */
 	SDT_SYSLDT,		/* segment type */
 	SEL_UPL,		/* segment descriptor priority level */
 	1,			/* segment descriptor present */
 	0, 0,
 	0,			/* unused - default 32 vs 16 bit size */
 	0  			/* limit granularity (byte/page units)*/ },
 /* GUSERLDT_SEL	6 User LDT Descriptor per process */
 {	(int) ldt,		/* segment base address  */
 	(512 * sizeof(union descriptor)-1),		/* length */
 	SDT_SYSLDT,		/* segment type */
 	0,			/* segment descriptor priority level */
 	1,			/* segment descriptor present */
 	0, 0,
 	0,			/* unused - default 32 vs 16 bit size */
 	0  			/* limit granularity (byte/page units)*/ },
 /* GTGATE_SEL	7 Null Descriptor - Placeholder */
 {	0x0,			/* segment base address  */
 	0x0,			/* length - all address space */
 	0,			/* segment type */
 	0,			/* segment descriptor priority level */
 	0,			/* segment descriptor present */
 	0, 0,
 	0,			/* default 32 vs 16 bit size */
 	0  			/* limit granularity (byte/page units)*/ },
 /* GBIOSLOWMEM_SEL 8 BIOS access to realmode segment 0x40, must be #8 in GDT */
 {	0x400,			/* segment base address */
 	0xfffff,		/* length */
 	SDT_MEMRWA,		/* segment type */
 	0,			/* segment descriptor priority level */
 	1,			/* segment descriptor present */
 	0, 0,
 	1,			/* default 32 vs 16 bit size */
 	1  			/* limit granularity (byte/page units)*/ },
 /* GPANIC_SEL	9 Panic Tss Descriptor */
 {	(int) &dblfault_tss,	/* segment base address  */
 	sizeof(struct i386tss)-1,/* length - all address space */
 	SDT_SYS386TSS,		/* segment type */
 	0,			/* segment descriptor priority level */
 	1,			/* segment descriptor present */
 	0, 0,
 	0,			/* unused - default 32 vs 16 bit size */
 	0  			/* limit granularity (byte/page units)*/ },
 /* GBIOSCODE32_SEL 10 BIOS 32-bit interface (32bit Code) */
 {	0,			/* segment base address (overwritten)  */
 	0xfffff,		/* length */
 	SDT_MEMERA,		/* segment type */
 	0,			/* segment descriptor priority level */
 	1,			/* segment descriptor present */
 	0, 0,
 	0,			/* default 32 vs 16 bit size */
 	1  			/* limit granularity (byte/page units)*/ },
 /* GBIOSCODE16_SEL 11 BIOS 32-bit interface (16bit Code) */
 {	0,			/* segment base address (overwritten)  */
 	0xfffff,		/* length */
 	SDT_MEMERA,		/* segment type */
 	0,			/* segment descriptor priority level */
 	1,			/* segment descriptor present */
 	0, 0,
 	0,			/* default 32 vs 16 bit size */
 	1  			/* limit granularity (byte/page units)*/ },
 /* GBIOSDATA_SEL 12 BIOS 32-bit interface (Data) */
 {	0,			/* segment base address (overwritten) */
 	0xfffff,		/* length */
 	SDT_MEMRWA,		/* segment type */
 	0,			/* segment descriptor priority level */
 	1,			/* segment descriptor present */
 	0, 0,
 	1,			/* default 32 vs 16 bit size */
 	1  			/* limit granularity (byte/page units)*/ },
 /* GBIOSUTIL_SEL 13 BIOS 16-bit interface (Utility) */
 {	0,			/* segment base address (overwritten) */
 	0xfffff,		/* length */
 	SDT_MEMRWA,		/* segment type */
 	0,			/* segment descriptor priority level */
 	1,			/* segment descriptor present */
 	0, 0,
 	0,			/* default 32 vs 16 bit size */
 	1  			/* limit granularity (byte/page units)*/ },
 /* GBIOSARGS_SEL 14 BIOS 16-bit interface (Arguments) */
 {	0,			/* segment base address (overwritten) */
 	0xfffff,		/* length */
 	SDT_MEMRWA,		/* segment type */
 	0,			/* segment descriptor priority level */
 	1,			/* segment descriptor present */
 	0, 0,
 	0,			/* default 32 vs 16 bit size */
 	1  			/* limit granularity (byte/page units)*/ },
 };
 
 static struct soft_segment_descriptor ldt_segs[] = {
 	/* Null Descriptor - overwritten by call gate */
 {	0x0,			/* segment base address  */
 	0x0,			/* length - all address space */
 	0,			/* segment type */
 	0,			/* segment descriptor priority level */
 	0,			/* segment descriptor present */
 	0, 0,
 	0,			/* default 32 vs 16 bit size */
 	0  			/* limit granularity (byte/page units)*/ },
 	/* Null Descriptor - overwritten by call gate */
 {	0x0,			/* segment base address  */
 	0x0,			/* length - all address space */
 	0,			/* segment type */
 	0,			/* segment descriptor priority level */
 	0,			/* segment descriptor present */
 	0, 0,
 	0,			/* default 32 vs 16 bit size */
 	0  			/* limit granularity (byte/page units)*/ },
 	/* Null Descriptor - overwritten by call gate */
 {	0x0,			/* segment base address  */
 	0x0,			/* length - all address space */
 	0,			/* segment type */
 	0,			/* segment descriptor priority level */
 	0,			/* segment descriptor present */
 	0, 0,
 	0,			/* default 32 vs 16 bit size */
 	0  			/* limit granularity (byte/page units)*/ },
 	/* Code Descriptor for user */
 {	0x0,			/* segment base address  */
 	0xfffff,		/* length - all address space */
 	SDT_MEMERA,		/* segment type */
 	SEL_UPL,		/* segment descriptor priority level */
 	1,			/* segment descriptor present */
 	0, 0,
 	1,			/* default 32 vs 16 bit size */
 	1  			/* limit granularity (byte/page units)*/ },
 	/* Null Descriptor - overwritten by call gate */
 {	0x0,			/* segment base address  */
 	0x0,			/* length - all address space */
 	0,			/* segment type */
 	0,			/* segment descriptor priority level */
 	0,			/* segment descriptor present */
 	0, 0,
 	0,			/* default 32 vs 16 bit size */
 	0  			/* limit granularity (byte/page units)*/ },
 	/* Data Descriptor for user */
 {	0x0,			/* segment base address  */
 	0xfffff,		/* length - all address space */
 	SDT_MEMRWA,		/* segment type */
 	SEL_UPL,		/* segment descriptor priority level */
 	1,			/* segment descriptor present */
 	0, 0,
 	1,			/* default 32 vs 16 bit size */
 	1  			/* limit granularity (byte/page units)*/ },
 };
 
 void
 setidt(idx, func, typ, dpl, selec)
 	int idx;
 	inthand_t *func;
 	int typ;
 	int dpl;
 	int selec;
 {
 	struct gate_descriptor *ip;
 
 	ip = idt + idx;
 	ip->gd_looffset = (int)func;
 	ip->gd_selector = selec;
 	ip->gd_stkcpy = 0;
 	ip->gd_xx = 0;
 	ip->gd_type = typ;
 	ip->gd_dpl = dpl;
 	ip->gd_p = 1;
 	ip->gd_hioffset = ((int)func)>>16 ;
 }
 
 #define	IDTVEC(name)	__CONCAT(X,name)
 
 extern inthand_t
 	IDTVEC(div), IDTVEC(dbg), IDTVEC(nmi), IDTVEC(bpt), IDTVEC(ofl),
 	IDTVEC(bnd), IDTVEC(ill), IDTVEC(dna), IDTVEC(fpusegm),
 	IDTVEC(tss), IDTVEC(missing), IDTVEC(stk), IDTVEC(prot),
 	IDTVEC(page), IDTVEC(mchk), IDTVEC(rsvd), IDTVEC(fpu), IDTVEC(align),
 	IDTVEC(xmm), IDTVEC(lcall_syscall), IDTVEC(int0x80_syscall);
 
 void
 sdtossd(sd, ssd)
 	struct segment_descriptor *sd;
 	struct soft_segment_descriptor *ssd;
 {
 	ssd->ssd_base  = (sd->sd_hibase << 24) | sd->sd_lobase;
 	ssd->ssd_limit = (sd->sd_hilimit << 16) | sd->sd_lolimit;
 	ssd->ssd_type  = sd->sd_type;
 	ssd->ssd_dpl   = sd->sd_dpl;
 	ssd->ssd_p     = sd->sd_p;
 	ssd->ssd_def32 = sd->sd_def32;
 	ssd->ssd_gran  = sd->sd_gran;
 }
 
 #define PHYSMAP_SIZE	(2 * 8)
 
 /*
  * Populate the (physmap) array with base/bound pairs describing the
  * available physical memory in the system, then test this memory and
  * build the phys_avail array describing the actually-available memory.
  *
  * If we cannot accurately determine the physical memory map, then use
  * value from the 0xE801 call, and failing that, the RTC.
  *
  * Total memory size may be set by the kernel environment variable
  * hw.physmem or the compile-time define MAXMEM.
  *
  * XXX first should be vm_paddr_t.
  */
 static void
 getmemsize(int first)
 {
 	int i, physmap_idx, pa_indx;
 	int hasbrokenint12;
 	u_int basemem, extmem;
 	struct vm86frame vmf;
 	struct vm86context vmc;
 	vm_paddr_t pa, physmap[PHYSMAP_SIZE];
 	pt_entry_t *pte;
 	char *cp;
 	struct bios_smap *smap;
 
 	hasbrokenint12 = 0;
 	TUNABLE_INT_FETCH("hw.hasbrokenint12", &hasbrokenint12);
 	bzero(&vmf, sizeof(struct vm86frame));
 	bzero(physmap, sizeof(physmap));
 	basemem = 0;
 
 	/*
 	 * Some newer BIOSes has broken INT 12H implementation which cause
 	 * kernel panic immediately. In this case, we need to scan SMAP
 	 * with INT 15:E820 first, then determine base memory size.
 	 */
 	if (hasbrokenint12) {
 		goto int15e820;
 	}
 
 	/*
 	 * Perform "base memory" related probes & setup
 	 */
 	vm86_intcall(0x12, &vmf);
 	basemem = vmf.vmf_ax;
 	if (basemem > 640) {
 		printf("Preposterous BIOS basemem of %uK, truncating to 640K\n",
 			basemem);
 		basemem = 640;
 	}
 
 	/*
 	 * XXX if biosbasemem is now < 640, there is a `hole'
 	 * between the end of base memory and the start of
 	 * ISA memory.  The hole may be empty or it may
 	 * contain BIOS code or data.  Map it read/write so
 	 * that the BIOS can write to it.  (Memory from 0 to
 	 * the physical end of the kernel is mapped read-only
 	 * to begin with and then parts of it are remapped.
 	 * The parts that aren't remapped form holes that
 	 * remain read-only and are unused by the kernel.
 	 * The base memory area is below the physical end of
 	 * the kernel and right now forms a read-only hole.
 	 * The part of it from PAGE_SIZE to
 	 * (trunc_page(biosbasemem * 1024) - 1) will be
 	 * remapped and used by the kernel later.)
 	 *
 	 * This code is similar to the code used in
 	 * pmap_mapdev, but since no memory needs to be
 	 * allocated we simply change the mapping.
 	 */
 	for (pa = trunc_page(basemem * 1024);
 	     pa < ISA_HOLE_START; pa += PAGE_SIZE)
 		pmap_kenter(KERNBASE + pa, pa);
 
 	/*
 	 * if basemem != 640, map pages r/w into vm86 page table so 
 	 * that the bios can scribble on it.
 	 */
 	pte = (pt_entry_t *)vm86paddr;
 	for (i = basemem / 4; i < 160; i++)
 		pte[i] = (i << PAGE_SHIFT) | PG_V | PG_RW | PG_U;
 
 int15e820:
 	/*
 	 * map page 1 R/W into the kernel page table so we can use it
 	 * as a buffer.  The kernel will unmap this page later.
 	 */
 	pmap_kenter(KERNBASE + (1 << PAGE_SHIFT), 1 << PAGE_SHIFT);
 
 	/*
 	 * get memory map with INT 15:E820
 	 */
 	vmc.npages = 0;
 	smap = (void *)vm86_addpage(&vmc, 1, KERNBASE + (1 << PAGE_SHIFT));
 	vm86_getptr(&vmc, (vm_offset_t)smap, &vmf.vmf_es, &vmf.vmf_di);
 
 	physmap_idx = 0;
 	vmf.vmf_ebx = 0;
 	do {
 		vmf.vmf_eax = 0xE820;
 		vmf.vmf_edx = SMAP_SIG;
 		vmf.vmf_ecx = sizeof(struct bios_smap);
 		i = vm86_datacall(0x15, &vmf, &vmc);
 		if (i || vmf.vmf_eax != SMAP_SIG)
 			break;
 		if (boothowto & RB_VERBOSE)
 			printf("SMAP type=%02x base=%016llx len=%016llx\n",
 			    smap->type, smap->base, smap->length);
 
 		if (smap->type != 0x01)
 			goto next_run;
 
 		if (smap->length == 0)
 			goto next_run;
 
+#ifndef PAE
 		if (smap->base >= 0xffffffff) {
 			printf("%uK of memory above 4GB ignored\n",
 			    (u_int)(smap->length / 1024));
 			goto next_run;
 		}
+#endif
 
 		for (i = 0; i <= physmap_idx; i += 2) {
 			if (smap->base < physmap[i + 1]) {
 				if (boothowto & RB_VERBOSE)
 					printf(
 	"Overlapping or non-montonic memory region, ignoring second region\n");
 				goto next_run;
 			}
 		}
 
 		if (smap->base == physmap[physmap_idx + 1]) {
 			physmap[physmap_idx + 1] += smap->length;
 			goto next_run;
 		}
 
 		physmap_idx += 2;
 		if (physmap_idx == PHYSMAP_SIZE) {
 			printf(
 		"Too many segments in the physical address map, giving up\n");
 			break;
 		}
 		physmap[physmap_idx] = smap->base;
 		physmap[physmap_idx + 1] = smap->base + smap->length;
 next_run: ;
 	} while (vmf.vmf_ebx != 0);
 
 	/*
 	 * Perform "base memory" related probes & setup based on SMAP
 	 */
 	if (basemem == 0) {
 		for (i = 0; i <= physmap_idx; i += 2) {
 			if (physmap[i] == 0x00000000) {
 				basemem = physmap[i + 1] / 1024;
 				break;
 			}
 		}
 
 		if (basemem == 0) {
 			basemem = 640;
 		}
 
 		if (basemem > 640) {
 			printf("Preposterous BIOS basemem of %uK, truncating to 640K\n",
 				basemem);
 			basemem = 640;
 		}
 
 		for (pa = trunc_page(basemem * 1024);
 		     pa < ISA_HOLE_START; pa += PAGE_SIZE)
 			pmap_kenter(KERNBASE + pa, pa);
 
 		pte = (pt_entry_t *)vm86paddr;
 		for (i = basemem / 4; i < 160; i++)
 			pte[i] = (i << PAGE_SHIFT) | PG_V | PG_RW | PG_U;
 	}
 
 	if (physmap[1] != 0)
 		goto physmap_done;
 
 	/*
 	 * If we failed above, try memory map with INT 15:E801
 	 */
 	vmf.vmf_ax = 0xE801;
 	if (vm86_intcall(0x15, &vmf) == 0) {
 		extmem = vmf.vmf_cx + vmf.vmf_dx * 64;
 	} else {
 #if 0
 		vmf.vmf_ah = 0x88;
 		vm86_intcall(0x15, &vmf);
 		extmem = vmf.vmf_ax;
 #else
 		/*
 		 * Prefer the RTC value for extended memory.
 		 */
 		extmem = rtcin(RTC_EXTLO) + (rtcin(RTC_EXTHI) << 8);
 #endif
 	}
 
 	/*
 	 * Special hack for chipsets that still remap the 384k hole when
 	 * there's 16MB of memory - this really confuses people that
 	 * are trying to use bus mastering ISA controllers with the
 	 * "16MB limit"; they only have 16MB, but the remapping puts
 	 * them beyond the limit.
 	 *
 	 * If extended memory is between 15-16MB (16-17MB phys address range),
 	 *	chop it to 15MB.
 	 */
 	if ((extmem > 15 * 1024) && (extmem < 16 * 1024))
 		extmem = 15 * 1024;
 
 	physmap[0] = 0;
 	physmap[1] = basemem * 1024;
 	physmap_idx = 2;
 	physmap[physmap_idx] = 0x100000;
 	physmap[physmap_idx + 1] = physmap[physmap_idx] + extmem * 1024;
 
 physmap_done:
 	/*
 	 * Now, physmap contains a map of physical memory.
 	 */
 
 #ifdef SMP
 	/* make hole for AP bootstrap code */
 	physmap[1] = mp_bootaddress(physmap[1] / 1024);
 
 	/* look for the MP hardware - needed for apic addresses */
 	i386_mp_probe();
 #endif
 
 	/*
 	 * Maxmem isn't the "maximum memory", it's one larger than the
 	 * highest page of the physical address space.  It should be
 	 * called something like "Maxphyspage".  We may adjust this 
 	 * based on ``hw.physmem'' and the results of the memory test.
 	 */
 	Maxmem = atop(physmap[physmap_idx + 1]);
 
 #ifdef MAXMEM
 	Maxmem = MAXMEM / 4;
 #endif
 
 	/*
 	 * hw.physmem is a size in bytes; we also allow k, m, and g suffixes
 	 * for the appropriate modifiers.  This overrides MAXMEM.
 	 */
 	if ((cp = getenv("hw.physmem")) != NULL) {
 		u_int64_t AllowMem, sanity;
 		char *ep;
 
 		sanity = AllowMem = strtouq(cp, &ep, 0);
 		if ((ep != cp) && (*ep != 0)) {
 			switch(*ep) {
 			case 'g':
 			case 'G':
 				AllowMem <<= 10;
 			case 'm':
 			case 'M':
 				AllowMem <<= 10;
 			case 'k':
 			case 'K':
 				AllowMem <<= 10;
 				break;
 			default:
 				AllowMem = sanity = 0;
 			}
 			if (AllowMem < sanity)
 				AllowMem = 0;
 		}
 		if (AllowMem == 0)
 			printf("Ignoring invalid memory size of '%s'\n", cp);
 		else
 			Maxmem = atop(AllowMem);
 		freeenv(cp);
 	}
 
 	if (atop(physmap[physmap_idx + 1]) != Maxmem &&
 	    (boothowto & RB_VERBOSE))
 		printf("Physical memory use set to %ldK\n", Maxmem * 4);
 
 	/*
 	 * If Maxmem has been increased beyond what the system has detected,
 	 * extend the last memory segment to the new limit.
 	 */ 
 	if (atop(physmap[physmap_idx + 1]) < Maxmem)
 		physmap[physmap_idx + 1] = ptoa((vm_paddr_t)Maxmem);
 
 	/* call pmap initialization to make new kernel address space */
 	pmap_bootstrap(first, 0);
 
 	/*
 	 * Size up each available chunk of physical memory.
 	 */
 	physmap[0] = PAGE_SIZE;		/* mask off page 0 */
 	pa_indx = 0;
 	phys_avail[pa_indx++] = physmap[0];
 	phys_avail[pa_indx] = physmap[0];
 	pte = CMAP1;
 
 	/*
 	 * physmap is in bytes, so when converting to page boundaries,
 	 * round up the start address and round down the end address.
 	 */
 	for (i = 0; i <= physmap_idx; i += 2) {
 		vm_paddr_t end;
 
 		end = ptoa((vm_paddr_t)Maxmem);
 		if (physmap[i + 1] < end)
 			end = trunc_page(physmap[i + 1]);
 		for (pa = round_page(physmap[i]); pa < end; pa += PAGE_SIZE) {
 			int tmp, page_bad;
 			int *ptr = (int *)CADDR1;
 
 			/*
 			 * block out kernel memory as not available.
 			 */
 			if (pa >= 0x100000 && pa < first)
 				continue;
 	
 			page_bad = FALSE;
 
 			/*
 			 * map page into kernel: valid, read/write,non-cacheable
 			 */
 			*pte = pa | PG_V | PG_RW | PG_N;
 			invltlb();
 
 			tmp = *(int *)ptr;
 			/*
 			 * Test for alternating 1's and 0's
 			 */
 			*(volatile int *)ptr = 0xaaaaaaaa;
 			if (*(volatile int *)ptr != 0xaaaaaaaa) {
 				page_bad = TRUE;
 			}
 			/*
 			 * Test for alternating 0's and 1's
 			 */
 			*(volatile int *)ptr = 0x55555555;
 			if (*(volatile int *)ptr != 0x55555555) {
 			page_bad = TRUE;
 			}
 			/*
 			 * Test for all 1's
 			 */
 			*(volatile int *)ptr = 0xffffffff;
 			if (*(volatile int *)ptr != 0xffffffff) {
 				page_bad = TRUE;
 			}
 			/*
 			 * Test for all 0's
 			 */
 			*(volatile int *)ptr = 0x0;
 			if (*(volatile int *)ptr != 0x0) {
 				page_bad = TRUE;
 			}
 			/*
 			 * Restore original value.
 			 */
 			*(int *)ptr = tmp;
 
 			/*
 			 * Adjust array of valid/good pages.
 			 */
 			if (page_bad == TRUE) {
 				continue;
 			}
 			/*
 			 * If this good page is a continuation of the
 			 * previous set of good pages, then just increase
 			 * the end pointer. Otherwise start a new chunk.
 			 * Note that "end" points one higher than end,
 			 * making the range >= start and < end.
 			 * If we're also doing a speculative memory
 			 * test and we at or past the end, bump up Maxmem
 			 * so that we keep going. The first bad page
 			 * will terminate the loop.
 			 */
 			if (phys_avail[pa_indx] == pa) {
 				phys_avail[pa_indx] += PAGE_SIZE;
 			} else {
 				pa_indx++;
 				if (pa_indx == PHYS_AVAIL_ARRAY_END) {
 					printf(
 		"Too many holes in the physical address space, giving up\n");
 					pa_indx--;
 					break;
 				}
 				phys_avail[pa_indx++] = pa;	/* start */
 				phys_avail[pa_indx] = pa + PAGE_SIZE;	/* end */
 			}
 			physmem++;
 		}
 	}
 	*pte = 0;
 	invltlb();
 
 	/*
 	 * XXX
 	 * The last chunk must contain at least one page plus the message
 	 * buffer to avoid complicating other code (message buffer address
 	 * calculation, etc.).
 	 */
 	while (phys_avail[pa_indx - 1] + PAGE_SIZE +
 	    round_page(MSGBUF_SIZE) >= phys_avail[pa_indx]) {
 		physmem -= atop(phys_avail[pa_indx] - phys_avail[pa_indx - 1]);
 		phys_avail[pa_indx--] = 0;
 		phys_avail[pa_indx--] = 0;
 	}
 
 	Maxmem = atop(phys_avail[pa_indx]);
 
 	/* Trim off space for the message buffer. */
 	phys_avail[pa_indx] -= round_page(MSGBUF_SIZE);
 
 	avail_end = phys_avail[pa_indx];
 }
 
 void
 init386(first)
 	int first;
 {
 	struct gate_descriptor *gdp;
 	int gsel_tss, metadata_missing, off, x;
 #ifndef SMP
 	/* table descriptors - used to load tables by microp */
 	struct region_descriptor r_gdt, r_idt;
 #endif
 	struct pcpu *pc;
 
 	proc0.p_uarea = proc0uarea;
 	thread0.td_kstack = proc0kstack;
 	thread0.td_pcb = (struct pcb *)
 	   (thread0.td_kstack + KSTACK_PAGES * PAGE_SIZE) - 1;
 	atdevbase = ISA_HOLE_START + KERNBASE;
 
 	/*
  	 * This may be done better later if it gets more high level
  	 * components in it. If so just link td->td_proc here.
 	 */
 	proc_linkup(&proc0, &ksegrp0, &kse0, &thread0);
 
 	metadata_missing = 0;
 	if (bootinfo.bi_modulep) {
 		preload_metadata = (caddr_t)bootinfo.bi_modulep + KERNBASE;
 		preload_bootstrap_relocate(KERNBASE);
 	} else {
 		metadata_missing = 1;
 	}
 	if (envmode == 1)
 		kern_envp = static_env;
 	else if (bootinfo.bi_envp)
 		kern_envp = (caddr_t)bootinfo.bi_envp + KERNBASE;
 
 	/* Init basic tunables, hz etc */
 	init_param1();
 
 	/*
 	 * make gdt memory segments, the code segment goes up to end of the
 	 * page with etext in it, the data segment goes to the end of
 	 * the address space
 	 */
 	/*
 	 * XXX text protection is temporarily (?) disabled.  The limit was
 	 * i386_btop(round_page(etext)) - 1.
 	 */
 	gdt_segs[GCODE_SEL].ssd_limit = atop(0 - 1);
 	gdt_segs[GDATA_SEL].ssd_limit = atop(0 - 1);
 #ifdef SMP
 	pc = &SMP_prvspace[0].pcpu;
 	gdt_segs[GPRIV_SEL].ssd_limit =
 		atop(sizeof(struct privatespace) - 1);
 #else
 	pc = &__pcpu;
 	gdt_segs[GPRIV_SEL].ssd_limit =
 		atop(sizeof(struct pcpu) - 1);
 #endif
 	gdt_segs[GPRIV_SEL].ssd_base = (int) pc;
 	gdt_segs[GPROC0_SEL].ssd_base = (int) &pc->pc_common_tss;
 
 	for (x = 0; x < NGDT; x++)
 		ssdtosd(&gdt_segs[x], &gdt[x].sd);
 
 	r_gdt.rd_limit = NGDT * sizeof(gdt[0]) - 1;
 	r_gdt.rd_base =  (int) gdt;
 	lgdt(&r_gdt);
 
 	pcpu_init(pc, 0, sizeof(struct pcpu));
 	PCPU_SET(prvspace, pc);
 	PCPU_SET(curthread, &thread0);
 
 	/*
 	 * Initialize mutexes.
 	 *
 	 * icu_lock: in order to allow an interrupt to occur in a critical
 	 * 	     section, to set pcpu->ipending (etc...) properly, we
 	 *	     must be able to get the icu lock, so it can't be
 	 *	     under witness.
 	 */
 	mutex_init();
 	mtx_init(&clock_lock, "clk", NULL, MTX_SPIN | MTX_RECURSE);
 	mtx_init(&icu_lock, "icu", NULL, MTX_SPIN | MTX_NOWITNESS);
 
 	/* make ldt memory segments */
 	/*
 	 * XXX - VM_MAXUSER_ADDRESS is an end address, not a max.  And it
 	 * should be spelled ...MAX_USER...
 	 */
 	ldt_segs[LUCODE_SEL].ssd_limit = atop(VM_MAXUSER_ADDRESS - 1);
 	ldt_segs[LUDATA_SEL].ssd_limit = atop(VM_MAXUSER_ADDRESS - 1);
 	for (x = 0; x < sizeof ldt_segs / sizeof ldt_segs[0]; x++)
 		ssdtosd(&ldt_segs[x], &ldt[x].sd);
 
 	_default_ldt = GSEL(GLDT_SEL, SEL_KPL);
 	lldt(_default_ldt);
 	PCPU_SET(currentldt, _default_ldt);
 
 	/* exceptions */
 	for (x = 0; x < NIDT; x++)
 		setidt(x, &IDTVEC(rsvd), SDT_SYS386TGT, SEL_KPL,
 		    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(0, &IDTVEC(div),  SDT_SYS386TGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(1, &IDTVEC(dbg),  SDT_SYS386IGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(2, &IDTVEC(nmi),  SDT_SYS386TGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
  	setidt(3, &IDTVEC(bpt),  SDT_SYS386IGT, SEL_UPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(4, &IDTVEC(ofl),  SDT_SYS386TGT, SEL_UPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(5, &IDTVEC(bnd),  SDT_SYS386TGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(6, &IDTVEC(ill),  SDT_SYS386TGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(7, &IDTVEC(dna),  SDT_SYS386TGT, SEL_KPL
 	    , GSEL(GCODE_SEL, SEL_KPL));
 	setidt(8, 0,  SDT_SYSTASKGT, SEL_KPL, GSEL(GPANIC_SEL, SEL_KPL));
 	setidt(9, &IDTVEC(fpusegm),  SDT_SYS386TGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(10, &IDTVEC(tss),  SDT_SYS386TGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(11, &IDTVEC(missing),  SDT_SYS386TGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(12, &IDTVEC(stk),  SDT_SYS386TGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(13, &IDTVEC(prot),  SDT_SYS386TGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(14, &IDTVEC(page),  SDT_SYS386IGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(15, &IDTVEC(rsvd),  SDT_SYS386TGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(16, &IDTVEC(fpu),  SDT_SYS386TGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(17, &IDTVEC(align), SDT_SYS386TGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(18, &IDTVEC(mchk),  SDT_SYS386TGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(19, &IDTVEC(xmm), SDT_SYS386TGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
  	setidt(0x80, &IDTVEC(int0x80_syscall), SDT_SYS386TGT, SEL_UPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 
 	r_idt.rd_limit = sizeof(idt0) - 1;
 	r_idt.rd_base = (int) idt;
 	lidt(&r_idt);
 
 	/*
 	 * Initialize the console before we print anything out.
 	 */
 	cninit();
 
 	if (metadata_missing)
 		printf("WARNING: loader(8) metadata is missing!\n");
 
 #ifdef DEV_ISA
 	isa_defaultirq();
 #endif
 
 #ifdef DDB
 	kdb_init();
 	if (boothowto & RB_KDB)
 		Debugger("Boot flags requested debugger");
 #endif
 
 	finishidentcpu();	/* Final stage of CPU initialization */
 	setidt(6, &IDTVEC(ill),  SDT_SYS386TGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(13, &IDTVEC(prot),  SDT_SYS386TGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	initializecpu();	/* Initialize CPU registers */
 
 	/* make an initial tss so cpu can get interrupt stack on syscall! */
 	/* Note: -16 is so we can grow the trapframe if we came from vm86 */
 	PCPU_SET(common_tss.tss_esp0, thread0.td_kstack +
 	    KSTACK_PAGES * PAGE_SIZE - sizeof(struct pcb) - 16);
 	PCPU_SET(common_tss.tss_ss0, GSEL(GDATA_SEL, SEL_KPL));
 	gsel_tss = GSEL(GPROC0_SEL, SEL_KPL);
 	private_tss = 0;
 	PCPU_SET(tss_gdt, &gdt[GPROC0_SEL].sd);
 	PCPU_SET(common_tssd, *PCPU_GET(tss_gdt));
 	PCPU_SET(common_tss.tss_ioopt, (sizeof (struct i386tss)) << 16);
 	ltr(gsel_tss);
 
 	dblfault_tss.tss_esp = dblfault_tss.tss_esp0 = dblfault_tss.tss_esp1 =
 	    dblfault_tss.tss_esp2 = (int)&dblfault_stack[sizeof(dblfault_stack)];
 	dblfault_tss.tss_ss = dblfault_tss.tss_ss0 = dblfault_tss.tss_ss1 =
 	    dblfault_tss.tss_ss2 = GSEL(GDATA_SEL, SEL_KPL);
+#ifdef PAE
+	dblfault_tss.tss_cr3 = (int)IdlePDPT;
+#else
 	dblfault_tss.tss_cr3 = (int)IdlePTD;
+#endif
 	dblfault_tss.tss_eip = (int)dblfault_handler;
 	dblfault_tss.tss_eflags = PSL_KERNEL;
 	dblfault_tss.tss_ds = dblfault_tss.tss_es =
 	    dblfault_tss.tss_gs = GSEL(GDATA_SEL, SEL_KPL);
 	dblfault_tss.tss_fs = GSEL(GPRIV_SEL, SEL_KPL);
 	dblfault_tss.tss_cs = GSEL(GCODE_SEL, SEL_KPL);
 	dblfault_tss.tss_ldt = GSEL(GLDT_SEL, SEL_KPL);
 
 	vm86_initialize();
 	getmemsize(first);
 	init_param2(physmem);
 
 	/* now running on new page tables, configured,and u/iom is accessible */
 
 	/* Map the message buffer. */
 	for (off = 0; off < round_page(MSGBUF_SIZE); off += PAGE_SIZE)
 		pmap_kenter((vm_offset_t)msgbufp + off, avail_end + off);
 
 	msgbufinit(msgbufp, MSGBUF_SIZE);
 
 	/* make a call gate to reenter kernel with */
 	gdp = &ldt[LSYS5CALLS_SEL].gd;
 
 	x = (int) &IDTVEC(lcall_syscall);
 	gdp->gd_looffset = x;
 	gdp->gd_selector = GSEL(GCODE_SEL,SEL_KPL);
 	gdp->gd_stkcpy = 1;
 	gdp->gd_type = SDT_SYS386CGT;
 	gdp->gd_dpl = SEL_UPL;
 	gdp->gd_p = 1;
 	gdp->gd_hioffset = x >> 16;
 
 	/* XXX does this work? */
 	ldt[LBSDICALLS_SEL] = ldt[LSYS5CALLS_SEL];
 	ldt[LSOL26CALLS_SEL] = ldt[LSYS5CALLS_SEL];
 
 	/* transfer to user mode */
 
 	_ucodesel = LSEL(LUCODE_SEL, SEL_UPL);
 	_udatasel = LSEL(LUDATA_SEL, SEL_UPL);
 
 	/* setup proc 0's pcb */
 	thread0.td_pcb->pcb_flags = 0; /* XXXKSE */
+#ifdef PAE
+	thread0.td_pcb->pcb_cr3 = (int)IdlePDPT;
+#else
 	thread0.td_pcb->pcb_cr3 = (int)IdlePTD;
+#endif
 	thread0.td_pcb->pcb_ext = 0;
 	thread0.td_frame = &proc0_tf;
 }
 
 void
 cpu_pcpu_init(struct pcpu *pcpu, int cpuid, size_t size)
 {
 }
 
 #if defined(I586_CPU) && !defined(NO_F00F_HACK)
 static void f00f_hack(void *unused);
 SYSINIT(f00f_hack, SI_SUB_INTRINSIC, SI_ORDER_FIRST, f00f_hack, NULL);
 
 static void
 f00f_hack(void *unused) {
 	struct gate_descriptor *new_idt;
 #ifndef SMP
 	struct region_descriptor r_idt;
 #endif
 	vm_offset_t tmp;
 
 	if (!has_f00f_bug)
 		return;
 
 	GIANT_REQUIRED;
 
 	printf("Intel Pentium detected, installing workaround for F00F bug\n");
 
 	r_idt.rd_limit = sizeof(idt0) - 1;
 
 	tmp = kmem_alloc(kernel_map, PAGE_SIZE * 2);
 	if (tmp == 0)
 		panic("kmem_alloc returned 0");
 	if (((unsigned int)tmp & (PAGE_SIZE-1)) != 0)
 		panic("kmem_alloc returned non-page-aligned memory");
 	/* Put the first seven entries in the lower page */
 	new_idt = (struct gate_descriptor*)(tmp + PAGE_SIZE - (7*8));
 	bcopy(idt, new_idt, sizeof(idt0));
 	r_idt.rd_base = (int)new_idt;
 	lidt(&r_idt);
 	idt = new_idt;
 	if (vm_map_protect(kernel_map, tmp, tmp + PAGE_SIZE,
 			   VM_PROT_READ, FALSE) != KERN_SUCCESS)
 		panic("vm_map_protect failed");
 	return;
 }
 #endif /* defined(I586_CPU) && !NO_F00F_HACK */
 
 int
 ptrace_set_pc(struct thread *td, unsigned long addr)
 {
 	td->td_frame->tf_eip = addr;
 	return (0);
 }
 
 int
 ptrace_single_step(struct thread *td)
 {
 	td->td_frame->tf_eflags |= PSL_T;
 	return (0);
 }
 
 int
 fill_regs(struct thread *td, struct reg *regs)
 {
 	struct pcb *pcb;
 	struct trapframe *tp;
 
 	tp = td->td_frame;
 	regs->r_fs = tp->tf_fs;
 	regs->r_es = tp->tf_es;
 	regs->r_ds = tp->tf_ds;
 	regs->r_edi = tp->tf_edi;
 	regs->r_esi = tp->tf_esi;
 	regs->r_ebp = tp->tf_ebp;
 	regs->r_ebx = tp->tf_ebx;
 	regs->r_edx = tp->tf_edx;
 	regs->r_ecx = tp->tf_ecx;
 	regs->r_eax = tp->tf_eax;
 	regs->r_eip = tp->tf_eip;
 	regs->r_cs = tp->tf_cs;
 	regs->r_eflags = tp->tf_eflags;
 	regs->r_esp = tp->tf_esp;
 	regs->r_ss = tp->tf_ss;
 	pcb = td->td_pcb;
 	regs->r_gs = pcb->pcb_gs;
 	return (0);
 }
 
 int
 set_regs(struct thread *td, struct reg *regs)
 {
 	struct pcb *pcb;
 	struct trapframe *tp;
 
 	tp = td->td_frame;
 	if (!EFL_SECURE(regs->r_eflags, tp->tf_eflags) ||
 	    !CS_SECURE(regs->r_cs))
 		return (EINVAL);
 	tp->tf_fs = regs->r_fs;
 	tp->tf_es = regs->r_es;
 	tp->tf_ds = regs->r_ds;
 	tp->tf_edi = regs->r_edi;
 	tp->tf_esi = regs->r_esi;
 	tp->tf_ebp = regs->r_ebp;
 	tp->tf_ebx = regs->r_ebx;
 	tp->tf_edx = regs->r_edx;
 	tp->tf_ecx = regs->r_ecx;
 	tp->tf_eax = regs->r_eax;
 	tp->tf_eip = regs->r_eip;
 	tp->tf_cs = regs->r_cs;
 	tp->tf_eflags = regs->r_eflags;
 	tp->tf_esp = regs->r_esp;
 	tp->tf_ss = regs->r_ss;
 	pcb = td->td_pcb;
 	pcb->pcb_gs = regs->r_gs;
 	return (0);
 }
 
 #ifdef CPU_ENABLE_SSE
 static void
 fill_fpregs_xmm(sv_xmm, sv_87)
 	struct savexmm *sv_xmm;
 	struct save87 *sv_87;
 {
 	register struct env87 *penv_87 = &sv_87->sv_env;
 	register struct envxmm *penv_xmm = &sv_xmm->sv_env;
 	int i;
 
 	bzero(sv_87, sizeof(*sv_87));
 
 	/* FPU control/status */
 	penv_87->en_cw = penv_xmm->en_cw;
 	penv_87->en_sw = penv_xmm->en_sw;
 	penv_87->en_tw = penv_xmm->en_tw;
 	penv_87->en_fip = penv_xmm->en_fip;
 	penv_87->en_fcs = penv_xmm->en_fcs;
 	penv_87->en_opcode = penv_xmm->en_opcode;
 	penv_87->en_foo = penv_xmm->en_foo;
 	penv_87->en_fos = penv_xmm->en_fos;
 
 	/* FPU registers */
 	for (i = 0; i < 8; ++i)
 		sv_87->sv_ac[i] = sv_xmm->sv_fp[i].fp_acc;
 }
 
 static void
 set_fpregs_xmm(sv_87, sv_xmm)
 	struct save87 *sv_87;
 	struct savexmm *sv_xmm;
 {
 	register struct env87 *penv_87 = &sv_87->sv_env;
 	register struct envxmm *penv_xmm = &sv_xmm->sv_env;
 	int i;
 
 	/* FPU control/status */
 	penv_xmm->en_cw = penv_87->en_cw;
 	penv_xmm->en_sw = penv_87->en_sw;
 	penv_xmm->en_tw = penv_87->en_tw;
 	penv_xmm->en_fip = penv_87->en_fip;
 	penv_xmm->en_fcs = penv_87->en_fcs;
 	penv_xmm->en_opcode = penv_87->en_opcode;
 	penv_xmm->en_foo = penv_87->en_foo;
 	penv_xmm->en_fos = penv_87->en_fos;
 
 	/* FPU registers */
 	for (i = 0; i < 8; ++i)
 		sv_xmm->sv_fp[i].fp_acc = sv_87->sv_ac[i];
 }
 #endif /* CPU_ENABLE_SSE */
 
 int
 fill_fpregs(struct thread *td, struct fpreg *fpregs)
 {
 #ifdef CPU_ENABLE_SSE
 	if (cpu_fxsr) {
 		fill_fpregs_xmm(&td->td_pcb->pcb_save.sv_xmm,
 						(struct save87 *)fpregs);
 		return (0);
 	}
 #endif /* CPU_ENABLE_SSE */
 	bcopy(&td->td_pcb->pcb_save.sv_87, fpregs, sizeof *fpregs);
 	return (0);
 }
 
 int
 set_fpregs(struct thread *td, struct fpreg *fpregs)
 {
 #ifdef CPU_ENABLE_SSE
 	if (cpu_fxsr) {
 		set_fpregs_xmm((struct save87 *)fpregs,
 					   &td->td_pcb->pcb_save.sv_xmm);
 		return (0);
 	}
 #endif /* CPU_ENABLE_SSE */
 	bcopy(fpregs, &td->td_pcb->pcb_save.sv_87, sizeof *fpregs);
 	return (0);
 }
 
 /*
  * Get machine context.
  */
 int
 get_mcontext(struct thread *td, mcontext_t *mcp)
 {
 	struct trapframe *tp;
 
 	tp = td->td_frame;
 
 	mcp->mc_onstack = sigonstack(tp->tf_esp);
 	mcp->mc_gs = td->td_pcb->pcb_gs;
 	mcp->mc_fs = tp->tf_fs;
 	mcp->mc_es = tp->tf_es;
 	mcp->mc_ds = tp->tf_ds;
 	mcp->mc_edi = tp->tf_edi;
 	mcp->mc_esi = tp->tf_esi;
 	mcp->mc_ebp = tp->tf_ebp;
 	mcp->mc_isp = tp->tf_isp;
 	mcp->mc_ebx = tp->tf_ebx;
 	mcp->mc_edx = tp->tf_edx;
 	mcp->mc_ecx = tp->tf_ecx;
 	mcp->mc_eax = tp->tf_eax;
 	mcp->mc_eip = tp->tf_eip;
 	mcp->mc_cs = tp->tf_cs;
 	mcp->mc_eflags = tp->tf_eflags;
 	mcp->mc_esp = tp->tf_esp;
 	mcp->mc_ss = tp->tf_ss;
 	mcp->mc_len = sizeof(*mcp);
 	get_fpcontext(td, mcp);
 	return (0);
 }
 
 /*
  * Set machine context.
  *
  * However, we don't set any but the user modifiable flags, and we won't
  * touch the cs selector.
  */
 int
 set_mcontext(struct thread *td, const mcontext_t *mcp)
 {
 	struct trapframe *tp;
 	int eflags, ret;
 
 	tp = td->td_frame;
 	if (mcp->mc_len != sizeof(*mcp))
 		return (EINVAL);
 	eflags = (mcp->mc_eflags & PSL_USERCHANGE) |
 	    (tp->tf_eflags & ~PSL_USERCHANGE);
 	if ((ret = set_fpcontext(td, mcp)) == 0) {
 		tp->tf_fs = mcp->mc_fs;
 		tp->tf_es = mcp->mc_es;
 		tp->tf_ds = mcp->mc_ds;
 		tp->tf_edi = mcp->mc_edi;
 		tp->tf_esi = mcp->mc_esi;
 		tp->tf_ebp = mcp->mc_ebp;
 		tp->tf_ebx = mcp->mc_ebx;
 		tp->tf_edx = mcp->mc_edx;
 		tp->tf_ecx = mcp->mc_ecx;
 		tp->tf_eax = mcp->mc_eax;
 		tp->tf_eip = mcp->mc_eip;
 		tp->tf_eflags = eflags;
 		tp->tf_esp = mcp->mc_esp;
 		tp->tf_ss = mcp->mc_ss;
 		td->td_pcb->pcb_gs = mcp->mc_gs;
 		ret = 0;
 	}
 	return (ret);
 }
 
 static void
 get_fpcontext(struct thread *td, mcontext_t *mcp)
 {
 #ifndef DEV_NPX
 	mcp->mc_fpformat = _MC_FPFMT_NODEV;
 	mcp->mc_ownedfp = _MC_FPOWNED_NONE;
 #else
 	union savefpu *addr;
 
 	/*
 	 * XXX mc_fpstate might be misaligned, since its declaration is not
 	 * unportabilized using __attribute__((aligned(16))) like the
 	 * declaration of struct savemm, and anyway, alignment doesn't work
 	 * for auto variables since we don't use gcc's pessimal stack
 	 * alignment.  Work around this by abusing the spare fields after
 	 * mcp->mc_fpstate.
 	 *
 	 * XXX unpessimize most cases by only aligning when fxsave might be
 	 * called, although this requires knowing too much about
 	 * npxgetregs()'s internals.
 	 */
 	addr = (union savefpu *)&mcp->mc_fpstate;
 	if (td == PCPU_GET(fpcurthread) &&
 #ifdef CPU_ENABLE_SSE
 	    cpu_fxsr &&
 #endif
 	    ((uintptr_t)(void *)addr & 0xF)) {
 		do
 			addr = (void *)((char *)addr + 4);
 		while ((uintptr_t)(void *)addr & 0xF);
 	}
 	mcp->mc_ownedfp = npxgetregs(td, addr);
 	if (addr != (union savefpu *)&mcp->mc_fpstate) {
 		bcopy(addr, &mcp->mc_fpstate, sizeof(mcp->mc_fpstate));
 		bzero(&mcp->mc_spare2, sizeof(mcp->mc_spare2));
 	}
 	mcp->mc_fpformat = npxformat();
 #endif
 }
 
 static int
 set_fpcontext(struct thread *td, const mcontext_t *mcp)
 {
 	union savefpu *addr;
 
 	if (mcp->mc_fpformat == _MC_FPFMT_NODEV)
 		return (0);
 	else if (mcp->mc_fpformat != _MC_FPFMT_387 &&
 	    mcp->mc_fpformat != _MC_FPFMT_XMM)
 		return (EINVAL);
 	else if (mcp->mc_ownedfp == _MC_FPOWNED_NONE)
 		/* We don't care what state is left in the FPU or PCB. */
 		fpstate_drop(td);
 	else if (mcp->mc_ownedfp == _MC_FPOWNED_FPU ||
 	    mcp->mc_ownedfp == _MC_FPOWNED_PCB) {
 		/* XXX align as above. */
 		addr = (union savefpu *)&mcp->mc_fpstate;
 		if (td == PCPU_GET(fpcurthread) &&
 #ifdef CPU_ENABLE_SSE
 		    cpu_fxsr &&
 #endif
 		    ((uintptr_t)(void *)addr & 0xF)) {
 			do
 				addr = (void *)((char *)addr + 4);
 			while ((uintptr_t)(void *)addr & 0xF);
 			bcopy(&mcp->mc_fpstate, addr, sizeof(mcp->mc_fpstate));
 		}
 #ifdef DEV_NPX
 		/*
 		 * XXX we violate the dubious requirement that npxsetregs()
 		 * be called with interrupts disabled.
 		 */
 		npxsetregs(td, addr);
 #endif
 		/*
 		 * Don't bother putting things back where they were in the
 		 * misaligned case, since we know that the caller won't use
 		 * them again.
 		 */
 	} else
 		return (EINVAL);
 	return (0);
 }
 
 static void
 fpstate_drop(struct thread *td)
 {
 	register_t s;
 
 	s = intr_disable();
 #ifdef DEV_NPX
 	if (PCPU_GET(fpcurthread) == td)
 		npxdrop();
 #endif
 	/*
 	 * XXX force a full drop of the npx.  The above only drops it if we
 	 * owned it.  npxgetregs() has the same bug in the !cpu_fxsr case.
 	 *
 	 * XXX I don't much like npxgetregs()'s semantics of doing a full
 	 * drop.  Dropping only to the pcb matches fnsave's behaviour.
 	 * We only need to drop to !PCB_INITDONE in sendsig().  But
 	 * sendsig() is the only caller of npxgetregs()... perhaps we just
 	 * have too many layers.
 	 */
 	curthread->td_pcb->pcb_flags &= ~PCB_NPXINITDONE;
 	intr_restore(s);
 }
 
 int
 fill_dbregs(struct thread *td, struct dbreg *dbregs)
 {
 	struct pcb *pcb;
 
 	if (td == NULL) {
 		dbregs->dr[0] = rdr0();
 		dbregs->dr[1] = rdr1();
 		dbregs->dr[2] = rdr2();
 		dbregs->dr[3] = rdr3();
 		dbregs->dr[4] = rdr4();
 		dbregs->dr[5] = rdr5();
 		dbregs->dr[6] = rdr6();
 		dbregs->dr[7] = rdr7();
 	} else {
 		pcb = td->td_pcb;
 		dbregs->dr[0] = pcb->pcb_dr0;
 		dbregs->dr[1] = pcb->pcb_dr1;
 		dbregs->dr[2] = pcb->pcb_dr2;
 		dbregs->dr[3] = pcb->pcb_dr3;
 		dbregs->dr[4] = 0;
 		dbregs->dr[5] = 0;
 		dbregs->dr[6] = pcb->pcb_dr6;
 		dbregs->dr[7] = pcb->pcb_dr7;
 	}
 	return (0);
 }
 
 int
 set_dbregs(struct thread *td, struct dbreg *dbregs)
 {
 	struct pcb *pcb;
 	int i;
 	u_int32_t mask1, mask2;
 
 	if (td == NULL) {
 		load_dr0(dbregs->dr[0]);
 		load_dr1(dbregs->dr[1]);
 		load_dr2(dbregs->dr[2]);
 		load_dr3(dbregs->dr[3]);
 		load_dr4(dbregs->dr[4]);
 		load_dr5(dbregs->dr[5]);
 		load_dr6(dbregs->dr[6]);
 		load_dr7(dbregs->dr[7]);
 	} else {
 		/*
 		 * Don't let an illegal value for dr7 get set.	Specifically,
 		 * check for undefined settings.  Setting these bit patterns
 		 * result in undefined behaviour and can lead to an unexpected
 		 * TRCTRAP.
 		 */
 		for (i = 0, mask1 = 0x3<<16, mask2 = 0x2<<16; i < 8; 
 		     i++, mask1 <<= 2, mask2 <<= 2)
 			if ((dbregs->dr[7] & mask1) == mask2)
 				return (EINVAL);
 		
 		pcb = td->td_pcb;
 		
 		/*
 		 * Don't let a process set a breakpoint that is not within the
 		 * process's address space.  If a process could do this, it
 		 * could halt the system by setting a breakpoint in the kernel
 		 * (if ddb was enabled).  Thus, we need to check to make sure
 		 * that no breakpoints are being enabled for addresses outside
 		 * process's address space, unless, perhaps, we were called by
 		 * uid 0.
 		 *
 		 * XXX - what about when the watched area of the user's
 		 * address space is written into from within the kernel
 		 * ... wouldn't that still cause a breakpoint to be generated
 		 * from within kernel mode?
 		 */
 
 		if (suser(td) != 0) {
 			if (dbregs->dr[7] & 0x3) {
 				/* dr0 is enabled */
 				if (dbregs->dr[0] >= VM_MAXUSER_ADDRESS)
 					return (EINVAL);
 			}
 			
 			if (dbregs->dr[7] & (0x3<<2)) {
 				/* dr1 is enabled */
 				if (dbregs->dr[1] >= VM_MAXUSER_ADDRESS)
 					return (EINVAL);
 			}
 			
 			if (dbregs->dr[7] & (0x3<<4)) {
 				/* dr2 is enabled */
 				if (dbregs->dr[2] >= VM_MAXUSER_ADDRESS)
 					return (EINVAL);
 			}
 			
 			if (dbregs->dr[7] & (0x3<<6)) {
 				/* dr3 is enabled */
 				if (dbregs->dr[3] >= VM_MAXUSER_ADDRESS)
 					return (EINVAL);
 			}
 		}
 
 		pcb->pcb_dr0 = dbregs->dr[0];
 		pcb->pcb_dr1 = dbregs->dr[1];
 		pcb->pcb_dr2 = dbregs->dr[2];
 		pcb->pcb_dr3 = dbregs->dr[3];
 		pcb->pcb_dr6 = dbregs->dr[6];
 		pcb->pcb_dr7 = dbregs->dr[7];
 
 		pcb->pcb_flags |= PCB_DBREGS;
 	}
 
 	return (0);
 }
 
 /*
  * Return > 0 if a hardware breakpoint has been hit, and the
  * breakpoint was in user space.  Return 0, otherwise.
  */
 int
 user_dbreg_trap(void)
 {
         u_int32_t dr7, dr6; /* debug registers dr6 and dr7 */
         u_int32_t bp;       /* breakpoint bits extracted from dr6 */
         int nbp;            /* number of breakpoints that triggered */
         caddr_t addr[4];    /* breakpoint addresses */
         int i;
         
         dr7 = rdr7();
         if ((dr7 & 0x000000ff) == 0) {
                 /*
                  * all GE and LE bits in the dr7 register are zero,
                  * thus the trap couldn't have been caused by the
                  * hardware debug registers
                  */
                 return 0;
         }
 
         nbp = 0;
         dr6 = rdr6();
         bp = dr6 & 0x0000000f;
 
         if (!bp) {
                 /*
                  * None of the breakpoint bits are set meaning this
                  * trap was not caused by any of the debug registers
                  */
                 return 0;
         }
 
         /*
          * at least one of the breakpoints were hit, check to see
          * which ones and if any of them are user space addresses
          */
 
         if (bp & 0x01) {
                 addr[nbp++] = (caddr_t)rdr0();
         }
         if (bp & 0x02) {
                 addr[nbp++] = (caddr_t)rdr1();
         }
         if (bp & 0x04) {
                 addr[nbp++] = (caddr_t)rdr2();
         }
         if (bp & 0x08) {
                 addr[nbp++] = (caddr_t)rdr3();
         }
 
         for (i=0; i<nbp; i++) {
                 if (addr[i] <
                     (caddr_t)VM_MAXUSER_ADDRESS) {
                         /*
                          * addr[i] is in user space
                          */
                         return nbp;
                 }
         }
 
         /*
          * None of the breakpoints are in user space.
          */
         return 0;
 }
 
 
 #ifndef DDB
 void
 Debugger(const char *msg)
 {
 	printf("Debugger(\"%s\") called.\n", msg);
 }
 #endif /* no DDB */
 
 #ifdef DDB
 
 /*
  * Provide inb() and outb() as functions.  They are normally only
  * available as macros calling inlined functions, thus cannot be
  * called inside DDB.
  *
  * The actual code is stolen from <machine/cpufunc.h>, and de-inlined.
  */
 
 #undef inb
 #undef outb
 
 /* silence compiler warnings */
 u_char inb(u_int);
 void outb(u_int, u_char);
 
 u_char
 inb(u_int port)
 {
 	u_char	data;
 	/*
 	 * We use %%dx and not %1 here because i/o is done at %dx and not at
 	 * %edx, while gcc generates inferior code (movw instead of movl)
 	 * if we tell it to load (u_short) port.
 	 */
 	__asm __volatile("inb %%dx,%0" : "=a" (data) : "d" (port));
 	return (data);
 }
 
 void
 outb(u_int port, u_char data)
 {
 	u_char	al;
 	/*
 	 * Use an unnecessary assignment to help gcc's register allocator.
 	 * This make a large difference for gcc-1.40 and a tiny difference
 	 * for gcc-2.6.0.  For gcc-1.40, al had to be ``asm("ax")'' for
 	 * best results.  gcc-2.6.0 can't handle this.
 	 */
 	al = data;
 	__asm __volatile("outb %0,%%dx" : : "a" (al), "d" (port));
 }
 
 #endif /* DDB */
Index: head/sys/amd64/amd64/mpboot.S
===================================================================
--- head/sys/amd64/amd64/mpboot.S	(revision 112840)
+++ head/sys/amd64/amd64/mpboot.S	(revision 112841)
@@ -1,272 +1,282 @@
 /*
  * Copyright (c) 1995, Jack F. Vogel
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by Jack F. Vogel
  * 4. The name of the developer may be used to endorse or promote products
  *    derived from this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * mpboot.s:	FreeBSD machine support for the Intel MP Spec
  *		multiprocessor systems.
  *
  * $FreeBSD$
  */
 
 #include <machine/asmacros.h>		/* miscellaneous asm macros */
 #include <machine/apic.h>
 #include <machine/specialreg.h>
 
 #include "assym.s"
 
+#define	R(x)	((x)-KERNBASE)
+
 /*
  * this code MUST be enabled here and in mp_machdep.c
  * it follows the very early stages of AP boot by placing values in CMOS ram.
  * it NORMALLY will never be needed and thus the primitive method for enabling.
  *
 #define CHECK_POINTS
  */
 
 #if defined(CHECK_POINTS) && !defined(PC98)
 
 #define CMOS_REG	(0x70)
 #define CMOS_DATA	(0x71)
 
 #define CHECKPOINT(A,D)		\
 	movb	$(A),%al ;	\
 	outb	%al,$CMOS_REG ;	\
 	movb	$(D),%al ;	\
 	outb	%al,$CMOS_DATA
 
 #else
 
 #define CHECKPOINT(A,D)
 
 #endif /* CHECK_POINTS */
 
 
 /*
  * the APs enter here from their trampoline code (bootMP, below)
  */
 	.p2align 4
 
 NON_GPROF_ENTRY(MPentry)
 	CHECKPOINT(0x36, 3)
 	/* Now enable paging mode */
-	movl	IdlePTD-KERNBASE, %eax
+#ifdef PAE
+	movl	R(IdlePDPT), %eax
+	movl	%eax, %cr3
+	movl	%cr4, %eax
+	orl	$CR4_PAE, %eax
+	movl	%eax, %cr4
+#else
+	movl	R(IdlePTD), %eax
 	movl	%eax,%cr3	
+#endif
 	movl	%cr0,%eax
 	orl	$CR0_PE|CR0_PG,%eax		/* enable paging */
 	movl	%eax,%cr0			/* let the games begin! */
 	movl	bootSTK,%esp			/* boot stack end loc. */
 
 	pushl	$mp_begin			/* jump to high mem */
 	ret
 
 	/*
 	 * Wait for the booting CPU to signal startup
 	 */
 mp_begin:	/* now running relocated at KERNBASE */
 	CHECKPOINT(0x37, 4)
 	call	init_secondary			/* load i386 tables */
 	CHECKPOINT(0x38, 5)
 
 	/*
 	 * If the [BSP] CPU has support for VME, turn it on.
 	 */
 	testl	$CPUID_VME, cpu_feature		/* XXX WRONG! BSP! */
 	jz	1f
 	movl	%cr4, %eax
 	orl	$CR4_VME, %eax
 	movl	%eax, %cr4
 1:
 
 	/* disable the APIC, just to be SURE */
 	movl	lapic+LA_SVR, %eax		/* get spurious vector reg. */
 	andl	$~APIC_SVR_SWEN, %eax		/* clear software enable bit */
 	movl	%eax, lapic+LA_SVR
 
 	/* signal our startup to the BSP */
 	movl	lapic+LA_VER, %eax		/* our version reg contents */
 	movl	%eax, cpu_apic_versions		/* into [ 0 ] */
 	incl	mp_ncpus			/* signal BSP */
 
 	CHECKPOINT(0x39, 6)
 
 	/* Now, let's prepare for some REAL WORK :-)  This doesn't return. */
 	call	ap_init
 
 /*
  * This is the embedded trampoline or bootstrap that is
  * copied into 'real-mode' low memory, it is where the
  * secondary processor "wakes up". When it is executed
  * the processor will eventually jump into the routine
  * MPentry, which resides in normal kernel text above
  * 1Meg.		-jackv
  */
 
 	.data
 	ALIGN_DATA				/* just to be sure */
 
 BOOTMP1:
 
 NON_GPROF_ENTRY(bootMP)
 	.code16		
 	cli
 	CHECKPOINT(0x34, 1)
 	/* First guarantee a 'clean slate' */
 	xorl	%eax, %eax
 	movl	%eax, %ebx
 	movl	%eax, %ecx
  	movl	%eax, %edx
 	movl	%eax, %esi
 	movl	%eax, %edi
 
 	/* set up data segments */
 	mov	%cs, %ax
 	mov	%ax, %ds
 	mov	%ax, %es
 	mov	%ax, %fs
 	mov	%ax, %gs
 	mov	%ax, %ss
 	mov	$(boot_stk-bootMP), %esp
 
 	/* Now load the global descriptor table */
 	lgdt	MP_GDTptr-bootMP
 
 	/* Enable protected mode */
 	movl	%cr0, %eax
 	orl	$CR0_PE, %eax
 	movl	%eax, %cr0 
 
 	/*
 	 * make intrasegment jump to flush the processor pipeline and
 	 * reload CS register
 	 */
 	pushl	$0x18
 	pushl	$(protmode-bootMP)
 	lretl
 
        .code32		
 protmode:
 	CHECKPOINT(0x35, 2)
 
 	/*
 	 * we are NOW running for the first time with %eip
 	 * having the full physical address, BUT we still
 	 * are using a segment descriptor with the origin
 	 * not matching the booting kernel.
 	 *
  	 * SO NOW... for the BIG Jump into kernel's segment
 	 * and physical text above 1 Meg.
 	 */
 	mov	$0x10, %ebx
 	movw	%bx, %ds
 	movw	%bx, %es
 	movw	%bx, %fs
 	movw	%bx, %gs
 	movw	%bx, %ss
 
 	.globl	bigJump
 bigJump:
 	/* this will be modified by mpInstallTramp() */
 	ljmp	$0x08, $0			/* far jmp to MPentry() */
 	
 dead:	hlt /* We should never get here */
 	jmp	dead
 
 /*
  * MP boot strap Global Descriptor Table
  */
 	.p2align 4
 	.globl	MP_GDT
 	.globl	bootCodeSeg
 	.globl	bootDataSeg
 MP_GDT:
 
 nulldesc:		/* offset = 0x0 */
 
 	.word	0x0	
 	.word	0x0	
 	.byte	0x0	
 	.byte	0x0	
 	.byte	0x0	
 	.byte	0x0	
 
 kernelcode:		/* offset = 0x08 */
 
 	.word	0xffff	/* segment limit 0..15 */
 	.word	0x0000	/* segment base 0..15 */
 	.byte	0x0	/* segment base 16..23; set for 0K */
 	.byte	0x9f	/* flags; Type	*/
 	.byte	0xcf	/* flags; Limit	*/
 	.byte	0x0	/* segment base 24..32 */
 
 kerneldata:		/* offset = 0x10 */
 
 	.word	0xffff	/* segment limit 0..15 */
 	.word	0x0000	/* segment base 0..15 */
 	.byte	0x0	/* segment base 16..23; set for 0k */
 	.byte	0x93	/* flags; Type  */
 	.byte	0xcf	/* flags; Limit */
 	.byte	0x0	/* segment base 24..32 */
 
 bootcode:		/* offset = 0x18 */
 
 	.word	0xffff	/* segment limit 0..15 */
 bootCodeSeg:		/* this will be modified by mpInstallTramp() */
 	.word	0x0000	/* segment base 0..15 */
 	.byte	0x00	/* segment base 16...23; set for 0x000xx000 */
 	.byte	0x9e	/* flags; Type  */
 	.byte	0xcf	/* flags; Limit */
 	.byte	0x0	/*segment base 24..32 */
 
 bootdata:		/* offset = 0x20 */
 
 	.word	0xffff	
 bootDataSeg:		/* this will be modified by mpInstallTramp() */
 	.word	0x0000	/* segment base 0..15 */
 	.byte	0x00	/* segment base 16...23; set for 0x000xx000 */
 	.byte	0x92	
 	.byte	0xcf	
 	.byte	0x0		
 
 /*
  * GDT pointer for the lgdt call
  */
 	.globl	mp_gdtbase
 
 MP_GDTptr:	
 mp_gdtlimit:
 	.word	0x0028		
 mp_gdtbase:		/* this will be modified by mpInstallTramp() */
 	.long	0
 
 	.space	0x100	/* space for boot_stk - 1st temporary stack */
 boot_stk:
 
 BOOTMP2:
 	.globl	bootMP_size
 bootMP_size:
 	.long	BOOTMP2 - BOOTMP1
Index: head/sys/amd64/amd64/pmap.c
===================================================================
--- head/sys/amd64/amd64/pmap.c	(revision 112840)
+++ head/sys/amd64/amd64/pmap.c	(revision 112841)
@@ -1,3425 +1,3473 @@
 /*
  * Copyright (c) 1991 Regents of the University of California.
  * All rights reserved.
  * Copyright (c) 1994 John S. Dyson
  * All rights reserved.
  * Copyright (c) 1994 David Greenman
  * All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * the Systems Programming Group of the University of Utah Computer
  * Science Department and William Jolitz of UUNET Technologies Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	from:	@(#)pmap.c	7.7 (Berkeley)	5/12/91
  * $FreeBSD$
  */
 /*-
  * Copyright (c) 2003 Networks Associates Technology, Inc.
  * All rights reserved.
  *
  * This software was developed for the FreeBSD Project by Jake Burkholder,
  * Safeport Network Services, and Network Associates Laboratories, the
  * Security Research Division of Network Associates, Inc. under
  * DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA
  * CHATS research program.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 /*
  *	Manages physical address maps.
  *
  *	In addition to hardware address maps, this
  *	module is called upon to provide software-use-only
  *	maps which may or may not be stored in the same
  *	form as hardware maps.  These pseudo-maps are
  *	used to store intermediate results from copy
  *	operations to and from address spaces.
  *
  *	Since the information managed by this module is
  *	also stored by the logical address mapping module,
  *	this module may throw away valid virtual-to-physical
  *	mappings at almost any time.  However, invalidations
  *	of virtual-to-physical mappings must be done as
  *	requested.
  *
  *	In order to cope with hardware architectures which
  *	make virtual-to-physical map invalidates expensive,
  *	this module may delay invalidate or reduced protection
  *	operations until such time as they are actually
  *	necessary.  This module is given full information as
  *	to which processors are currently using which maps,
  *	and to when physical maps must be made correct.
  */
 
 #include "opt_pmap.h"
 #include "opt_msgbuf.h"
 #include "opt_kstack_pages.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/mman.h>
 #include <sys/msgbuf.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/sx.h>
 #include <sys/user.h>
 #include <sys/vmmeter.h>
 #include <sys/sysctl.h>
 #ifdef SMP
 #include <sys/smp.h>
 #endif
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_page.h>
 #include <vm/vm_map.h>
 #include <vm/vm_object.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_pageout.h>
 #include <vm/vm_pager.h>
 #include <vm/uma.h>
 
 #include <machine/cpu.h>
 #include <machine/cputypes.h>
 #include <machine/md_var.h>
 #include <machine/specialreg.h>
 #if defined(SMP) || defined(APIC_IO)
 #include <machine/smp.h>
 #include <machine/apic.h>
 #include <machine/segments.h>
 #include <machine/tss.h>
 #endif /* SMP || APIC_IO */
 
 #define PMAP_KEEP_PDIRS
 #ifndef PMAP_SHPGPERPROC
 #define PMAP_SHPGPERPROC 200
 #endif
 
 #if defined(DIAGNOSTIC)
 #define PMAP_DIAGNOSTIC
 #endif
 
 #define MINPV 2048
 
 #if !defined(PMAP_DIAGNOSTIC)
 #define PMAP_INLINE __inline
 #else
 #define PMAP_INLINE
 #endif
 
 /*
  * Get PDEs and PTEs for user/kernel address space
  */
 #define	pmap_pde(m, v)	(&((m)->pm_pdir[(vm_offset_t)(v) >> PDRSHIFT]))
 #define pdir_pde(m, v) (m[(vm_offset_t)(v) >> PDRSHIFT])
 
 #define pmap_pde_v(pte)		((*(int *)pte & PG_V) != 0)
 #define pmap_pte_w(pte)		((*(int *)pte & PG_W) != 0)
 #define pmap_pte_m(pte)		((*(int *)pte & PG_M) != 0)
 #define pmap_pte_u(pte)		((*(int *)pte & PG_A) != 0)
 #define pmap_pte_v(pte)		((*(int *)pte & PG_V) != 0)
 
 #define pmap_pte_set_w(pte, v) ((v)?(*(int *)pte |= PG_W):(*(int *)pte &= ~PG_W))
 #define pmap_pte_set_prot(pte, v) ((*(int *)pte &= ~PG_PROT), (*(int *)pte |= (v)))
 
 /*
  * Given a map and a machine independent protection code,
  * convert to a vax protection code.
  */
 #define pte_prot(m, p)	(protection_codes[p])
 static int protection_codes[8];
 
 struct pmap kernel_pmap_store;
 LIST_HEAD(pmaplist, pmap);
 static struct pmaplist allpmaps;
 static struct mtx allpmaps_lock;
 
 vm_paddr_t avail_start;	/* PA of first available physical page */
 vm_paddr_t avail_end;	/* PA of last available physical page */
 vm_offset_t virtual_avail;	/* VA of first avail page (after kernel bss) */
 vm_offset_t virtual_end;	/* VA of last avail page (end of kernel AS) */
 static boolean_t pmap_initialized = FALSE;	/* Has pmap_init completed? */
 static int pgeflag;		/* PG_G or-in */
 static int pseflag;		/* PG_PS or-in */
 
 static int nkpt;
 vm_offset_t kernel_vm_end;
 extern u_int32_t KERNend;
 
+#ifdef PAE
+static uma_zone_t pdptzone;
+#endif
+
 /*
  * Data for the pv entry allocation mechanism
  */
 static uma_zone_t pvzone;
 static struct vm_object pvzone_obj;
 static int pv_entry_count = 0, pv_entry_max = 0, pv_entry_high_water = 0;
 int pmap_pagedaemon_waken;
 
 /*
  * All those kernel PT submaps that BSD is so fond of
  */
 pt_entry_t *CMAP1 = 0;
 static pt_entry_t *CMAP2, *CMAP3, *ptmmap;
 caddr_t CADDR1 = 0, ptvmmap = 0;
 static caddr_t CADDR2, CADDR3;
 static struct mtx CMAPCADDR12_lock;
 static pt_entry_t *msgbufmap;
 struct msgbuf *msgbufp = 0;
 
 /*
  * Crashdump maps.
  */
 static pt_entry_t *pt_crashdumpmap;
 static caddr_t crashdumpmap;
 
 #ifdef SMP
 extern pt_entry_t *SMPpt;
 #endif
 static pt_entry_t *PMAP1 = 0;
 static pt_entry_t *PADDR1 = 0;
 
 static PMAP_INLINE void	free_pv_entry(pv_entry_t pv);
 static pv_entry_t get_pv_entry(void);
 static void	i386_protection_init(void);
 static __inline void	pmap_changebit(vm_page_t m, int bit, boolean_t setem);
 
 static vm_page_t pmap_enter_quick(pmap_t pmap, vm_offset_t va,
 				      vm_page_t m, vm_page_t mpte);
 static int pmap_remove_pte(pmap_t pmap, pt_entry_t *ptq, vm_offset_t sva);
 static void pmap_remove_page(struct pmap *pmap, vm_offset_t va);
 static int pmap_remove_entry(struct pmap *pmap, vm_page_t m,
 					vm_offset_t va);
 static void pmap_insert_entry(pmap_t pmap, vm_offset_t va,
 		vm_page_t mpte, vm_page_t m);
 
 static vm_page_t pmap_allocpte(pmap_t pmap, vm_offset_t va);
 
 static vm_page_t _pmap_allocpte(pmap_t pmap, unsigned ptepindex);
 static vm_page_t pmap_page_lookup(vm_object_t object, vm_pindex_t pindex);
 static int pmap_unuse_pt(pmap_t, vm_offset_t, vm_page_t);
 static vm_offset_t pmap_kmem_choose(vm_offset_t addr);
-static void *pmap_allocf(uma_zone_t zone, int bytes, u_int8_t *flags, int wait);
+static void *pmap_pv_allocf(uma_zone_t zone, int bytes, u_int8_t *flags, int wait);
+#ifdef PAE
+static void *pmap_pdpt_allocf(uma_zone_t zone, int bytes, u_int8_t *flags, int wait);
+#endif
 
 static pd_entry_t pdir4mb;
 
 CTASSERT(1 << PDESHIFT == sizeof(pd_entry_t));
 CTASSERT(1 << PTESHIFT == sizeof(pt_entry_t));
 
 /*
  * Move the kernel virtual free pointer to the next
  * 4MB.  This is used to help improve performance
  * by using a large (4MB) page for much of the kernel
  * (.text, .data, .bss)
  */
 static vm_offset_t
 pmap_kmem_choose(vm_offset_t addr)
 {
 	vm_offset_t newaddr = addr;
 
 #ifdef I686_CPU_not	/* Problem seems to have gone away */
 	/* Deal with un-resolved Pentium4 issues */
 	if (cpu_class == CPUCLASS_686 &&
 	    strcmp(cpu_vendor, "GenuineIntel") == 0 &&
 	    (cpu_id & 0xf00) == 0xf00)
 		return newaddr;
 #endif
 #ifndef DISABLE_PSE
 	if (cpu_feature & CPUID_PSE)
 		newaddr = (addr + (NBPDR - 1)) & ~(NBPDR - 1);
 #endif
 	return newaddr;
 }
 
 /*
  *	Bootstrap the system enough to run with virtual memory.
  *
  *	On the i386 this is called after mapping has already been enabled
  *	and just syncs the pmap module with what has already been done.
  *	[We can't call it easily with mapping off since the kernel is not
  *	mapped with PA == VA, hence we would have to relocate every address
  *	from the linked base (virtual) address "KERNBASE" to the actual
  *	(physical) address starting relative to 0]
  */
 void
 pmap_bootstrap(firstaddr, loadaddr)
 	vm_paddr_t firstaddr;
 	vm_paddr_t loadaddr;
 {
 	vm_offset_t va;
 	pt_entry_t *pte;
 	int i;
 
 	avail_start = firstaddr;
 
 	/*
 	 * XXX The calculation of virtual_avail is wrong. It's NKPT*PAGE_SIZE too
 	 * large. It should instead be correctly calculated in locore.s and
 	 * not based on 'first' (which is a physical address, not a virtual
 	 * address, for the start of unused physical memory). The kernel
 	 * page tables are NOT double mapped and thus should not be included
 	 * in this calculation.
 	 */
 	virtual_avail = (vm_offset_t) KERNBASE + firstaddr;
 	virtual_avail = pmap_kmem_choose(virtual_avail);
 
 	virtual_end = VM_MAX_KERNEL_ADDRESS;
 
 	/*
 	 * Initialize protection array.
 	 */
 	i386_protection_init();
 
 	/*
 	 * Initialize the kernel pmap (which is statically allocated).
 	 */
 	kernel_pmap->pm_pdir = (pd_entry_t *) (KERNBASE + (u_int)IdlePTD);
+#ifdef PAE
+	kernel_pmap->pm_pdpt = (pdpt_entry_t *) (KERNBASE + (u_int)IdlePDPT);
+#endif
 	kernel_pmap->pm_active = -1;	/* don't allow deactivation */
 	TAILQ_INIT(&kernel_pmap->pm_pvlist);
 	LIST_INIT(&allpmaps);
 	mtx_init(&allpmaps_lock, "allpmaps", NULL, MTX_SPIN);
 	mtx_lock_spin(&allpmaps_lock);
 	LIST_INSERT_HEAD(&allpmaps, kernel_pmap, pm_list);
 	mtx_unlock_spin(&allpmaps_lock);
 	nkpt = NKPT;
 
 	/*
 	 * Reserve some special page table entries/VA space for temporary
 	 * mapping of pages.
 	 */
 #define	SYSMAP(c, p, v, n)	\
 	v = (c)va; va += ((n)*PAGE_SIZE); p = pte; pte += (n);
 
 	va = virtual_avail;
 	pte = vtopte(va);
 
 	/*
 	 * CMAP1/CMAP2 are used for zeroing and copying pages.
 	 * CMAP3 is used for the idle process page zeroing.
 	 */
 	SYSMAP(caddr_t, CMAP1, CADDR1, 1)
 	SYSMAP(caddr_t, CMAP2, CADDR2, 1)
 	SYSMAP(caddr_t, CMAP3, CADDR3, 1)
 
 	mtx_init(&CMAPCADDR12_lock, "CMAPCADDR12", NULL, MTX_DEF);
 
 	/*
 	 * Crashdump maps.
 	 */
 	SYSMAP(caddr_t, pt_crashdumpmap, crashdumpmap, MAXDUMPPGS);
 
 	/*
 	 * ptvmmap is used for reading arbitrary physical pages via /dev/mem.
 	 * XXX ptmmap is not used.
 	 */
 	SYSMAP(caddr_t, ptmmap, ptvmmap, 1)
 
 	/*
 	 * msgbufp is used to map the system message buffer.
 	 * XXX msgbufmap is not used.
 	 */
 	SYSMAP(struct msgbuf *, msgbufmap, msgbufp,
 	       atop(round_page(MSGBUF_SIZE)))
 
 	/*
 	 * ptemap is used for pmap_pte_quick
 	 */
 	SYSMAP(pt_entry_t *, PMAP1, PADDR1, 1);
 
 	virtual_avail = va;
 
 	*CMAP1 = *CMAP2 = 0;
 	for (i = 0; i < NKPT; i++)
 		PTD[i] = 0;
 
 	pgeflag = 0;
 #ifndef DISABLE_PG_G
 	if (cpu_feature & CPUID_PGE)
 		pgeflag = PG_G;
 #endif
 #ifdef I686_CPU_not	/* Problem seems to have gone away */
 	/* Deal with un-resolved Pentium4 issues */
 	if (cpu_class == CPUCLASS_686 &&
 	    strcmp(cpu_vendor, "GenuineIntel") == 0 &&
 	    (cpu_id & 0xf00) == 0xf00) {
 		printf("Warning: Pentium 4 cpu: PG_G disabled (global flag)\n");
 		pgeflag = 0;
 	}
 #endif
 	
 /*
  * Initialize the 4MB page size flag
  */
 	pseflag = 0;
 /*
  * The 4MB page version of the initial
  * kernel page mapping.
  */
 	pdir4mb = 0;
 
 #ifndef DISABLE_PSE
 	if (cpu_feature & CPUID_PSE)
 		pseflag = PG_PS;
 #endif
 #ifdef I686_CPU_not	/* Problem seems to have gone away */
 	/* Deal with un-resolved Pentium4 issues */
 	if (cpu_class == CPUCLASS_686 &&
 	    strcmp(cpu_vendor, "GenuineIntel") == 0 &&
 	    (cpu_id & 0xf00) == 0xf00) {
 		printf("Warning: Pentium 4 cpu: PG_PS disabled (4MB pages)\n");
 		pseflag = 0;
 	}
 #endif
 #ifndef DISABLE_PSE
 	if (pseflag) {
 		pd_entry_t ptditmp;
 		/*
 		 * Note that we have enabled PSE mode
 		 */
 		ptditmp = *(PTmap + i386_btop(KERNBASE));
 		ptditmp &= ~(NBPDR - 1);
 		ptditmp |= PG_V | PG_RW | PG_PS | PG_U | pgeflag;
 		pdir4mb = ptditmp;
 	}
 #endif
 #ifndef SMP
 	/*
 	 * Turn on PGE/PSE.  SMP does this later on since the
 	 * 4K page tables are required for AP boot (for now).
 	 * XXX fixme.
 	 */
 	pmap_set_opt();
 #endif
 #ifdef SMP
 	if (cpu_apic_address == 0)
 		panic("pmap_bootstrap: no local apic! (non-SMP hardware?)");
 
 	/* local apic is mapped on last page */
 	SMPpt[NPTEPG - 1] = (pt_entry_t)(PG_V | PG_RW | PG_N | pgeflag |
 	    (cpu_apic_address & PG_FRAME));
 #endif
 	invltlb();
 }
 
 /*
  * Enable 4MB page mode for MP startup.  Turn on PG_G support.
  * BSP will run this after all the AP's have started up.
  */
 void
 pmap_set_opt(void)
 {
 	pt_entry_t *pte;
 	vm_offset_t va, endva;
 
 	if (pgeflag && (cpu_feature & CPUID_PGE)) {
 		load_cr4(rcr4() | CR4_PGE);
 		invltlb();		/* Insurance */
 	}
 #ifndef DISABLE_PSE
 	if (pseflag && (cpu_feature & CPUID_PSE)) {
 		load_cr4(rcr4() | CR4_PSE);
 		invltlb();		/* Insurance */
 	}
 #endif
 	if (PCPU_GET(cpuid) == 0) {
 #ifndef DISABLE_PSE
 		if (pdir4mb) {
 			kernel_pmap->pm_pdir[KPTDI] = PTD[KPTDI] = pdir4mb;
 			invltlb();	/* Insurance */
 		}
 #endif
 		if (pgeflag) {
 			/* Turn on PG_G for text, data, bss pages. */
 			va = (vm_offset_t)btext;
 #ifndef DISABLE_PSE
 			if (pseflag && (cpu_feature & CPUID_PSE)) {
 				if (va < KERNBASE + (1 << PDRSHIFT))
 					va = KERNBASE + (1 << PDRSHIFT);
 			}
 #endif
 			endva = KERNBASE + KERNend;
 			while (va < endva) {
 				pte = vtopte(va);
 				if (*pte)
 					*pte |= pgeflag;
 				va += PAGE_SIZE;
 			}
 			invltlb();	/* Insurance */
 		}
 		/*
 		 * We do not need to broadcast the invltlb here, because
 		 * each AP does it the moment it is released from the boot
 		 * lock.  See ap_init().
 		 */
 	}
 }
 
 static void *
-pmap_allocf(uma_zone_t zone, int bytes, u_int8_t *flags, int wait)
+pmap_pv_allocf(uma_zone_t zone, int bytes, u_int8_t *flags, int wait)
 {
 	*flags = UMA_SLAB_PRIV;
 	return (void *)kmem_alloc(kernel_map, bytes);
 }
 
+#ifdef PAE
+static void *
+pmap_pdpt_allocf(uma_zone_t zone, int bytes, u_int8_t *flags, int wait)
+{
+	*flags = UMA_SLAB_PRIV;
+	return (contigmalloc(PAGE_SIZE, NULL, 0, 0x0ULL, 0xffffffffULL, 1, 0));
+}
+#endif
+
 /*
  *	Initialize the pmap module.
  *	Called by vm_init, to initialize any structures that the pmap
  *	system needs to map virtual memory.
  *	pmap_init has been enhanced to support in a fairly consistant
  *	way, discontiguous physical memory.
  */
 void
 pmap_init(phys_start, phys_end)
 	vm_paddr_t phys_start, phys_end;
 {
 	int i;
 	int initial_pvs;
 
 	/*
 	 * Allocate memory for random pmap data structures.  Includes the
 	 * pv_head_table.
 	 */
 
 	for(i = 0; i < vm_page_array_size; i++) {
 		vm_page_t m;
 
 		m = &vm_page_array[i];
 		TAILQ_INIT(&m->md.pv_list);
 		m->md.pv_list_count = 0;
 	}
 
 	/*
 	 * init the pv free list
 	 */
 	initial_pvs = vm_page_array_size;
 	if (initial_pvs < MINPV)
 		initial_pvs = MINPV;
 	pvzone = uma_zcreate("PV ENTRY", sizeof (struct pv_entry), NULL, NULL, 
 	    NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM);
-	uma_zone_set_allocf(pvzone, pmap_allocf);
+	uma_zone_set_allocf(pvzone, pmap_pv_allocf);
 	uma_prealloc(pvzone, initial_pvs);
 
+#ifdef PAE
+	pdptzone = uma_zcreate("PDPT", NPGPTD * sizeof(pdpt_entry_t), NULL,
+	    NULL, NULL, NULL, (NPGPTD * sizeof(pdpt_entry_t)) - 1, 0);
+	uma_zone_set_allocf(pdptzone, pmap_pdpt_allocf);
+#endif
+
 	/*
 	 * Now it is safe to enable pv_table recording.
 	 */
 	pmap_initialized = TRUE;
 }
 
 /*
  * Initialize the address space (zone) for the pv_entries.  Set a
  * high water mark so that the system can recover from excessive
  * numbers of pv entries.
  */
 void
 pmap_init2()
 {
 	int shpgperproc = PMAP_SHPGPERPROC;
 
 	TUNABLE_INT_FETCH("vm.pmap.shpgperproc", &shpgperproc);
 	pv_entry_max = shpgperproc * maxproc + vm_page_array_size;
 	TUNABLE_INT_FETCH("vm.pmap.pv_entries", &pv_entry_max);
 	pv_entry_high_water = 9 * (pv_entry_max / 10);
 	uma_zone_set_obj(pvzone, &pvzone_obj, pv_entry_max);
 }
 
 
 /***************************************************
  * Low level helper routines.....
  ***************************************************/
 
 #if defined(PMAP_DIAGNOSTIC)
 
 /*
  * This code checks for non-writeable/modified pages.
  * This should be an invalid condition.
  */
 static int
 pmap_nw_modified(pt_entry_t ptea)
 {
 	int pte;
 
 	pte = (int) ptea;
 
 	if ((pte & (PG_M|PG_RW)) == PG_M)
 		return 1;
 	else
 		return 0;
 }
 #endif
 
 
 /*
  * this routine defines the region(s) of memory that should
  * not be tested for the modified bit.
  */
 static PMAP_INLINE int
 pmap_track_modified(vm_offset_t va)
 {
 	if ((va < kmi.clean_sva) || (va >= kmi.clean_eva)) 
 		return 1;
 	else
 		return 0;
 }
 
 #ifdef I386_CPU
 /*
  * i386 only has "invalidate everything" and no SMP to worry about.
  */
 PMAP_INLINE void
 pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
 {
 
 	if (pmap == kernel_pmap || pmap->pm_active)
 		invltlb();
 }
 
 PMAP_INLINE void
 pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
 {
 
 	if (pmap == kernel_pmap || pmap->pm_active)
 		invltlb();
 }
 
 PMAP_INLINE void
 pmap_invalidate_all(pmap_t pmap)
 {
 
 	if (pmap == kernel_pmap || pmap->pm_active)
 		invltlb();
 }
 #else /* !I386_CPU */
 #ifdef SMP
 /*
  * For SMP, these functions have to use the IPI mechanism for coherence.
  */
 void
 pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
 {
 	u_int cpumask;
 	u_int other_cpus;
 
 	critical_enter();
 	/*
 	 * We need to disable interrupt preemption but MUST NOT have
 	 * interrupts disabled here.
 	 * XXX we may need to hold schedlock to get a coherent pm_active
 	 */
 	if (pmap->pm_active == -1 || pmap->pm_active == all_cpus) {
 		invlpg(va);
 		smp_invlpg(va);
 	} else {
 		cpumask = PCPU_GET(cpumask);
 		other_cpus = PCPU_GET(other_cpus);
 		if (pmap->pm_active & cpumask)
 			invlpg(va);
 		if (pmap->pm_active & other_cpus)
 			smp_masked_invlpg(pmap->pm_active & other_cpus, va);
 	}
 	critical_exit();
 }
 
 void
 pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
 {
 	u_int cpumask;
 	u_int other_cpus;
 	vm_offset_t addr;
 
 	critical_enter();
 	/*
 	 * We need to disable interrupt preemption but MUST NOT have
 	 * interrupts disabled here.
 	 * XXX we may need to hold schedlock to get a coherent pm_active
 	 */
 	if (pmap->pm_active == -1 || pmap->pm_active == all_cpus) {
 		for (addr = sva; addr < eva; addr += PAGE_SIZE)
 			invlpg(addr);
 		smp_invlpg_range(sva, eva);
 	} else {
 		cpumask = PCPU_GET(cpumask);
 		other_cpus = PCPU_GET(other_cpus);
 		if (pmap->pm_active & cpumask)
 			for (addr = sva; addr < eva; addr += PAGE_SIZE)
 				invlpg(addr);
 		if (pmap->pm_active & other_cpus)
 			smp_masked_invlpg_range(pmap->pm_active & other_cpus,
 			    sva, eva);
 	}
 	critical_exit();
 }
 
 void
 pmap_invalidate_all(pmap_t pmap)
 {
 	u_int cpumask;
 	u_int other_cpus;
 
 #ifdef SWTCH_OPTIM_STATS
 	tlb_flush_count++;
 #endif
 	critical_enter();
 	/*
 	 * We need to disable interrupt preemption but MUST NOT have
 	 * interrupts disabled here.
 	 * XXX we may need to hold schedlock to get a coherent pm_active
 	 */
 	if (pmap->pm_active == -1 || pmap->pm_active == all_cpus) {
 		invltlb();
 		smp_invltlb();
 	} else {
 		cpumask = PCPU_GET(cpumask);
 		other_cpus = PCPU_GET(other_cpus);
 		if (pmap->pm_active & cpumask)
 			invltlb();
 		if (pmap->pm_active & other_cpus)
 			smp_masked_invltlb(pmap->pm_active & other_cpus);
 	}
 	critical_exit();
 }
 #else /* !SMP */
 /*
  * Normal, non-SMP, 486+ invalidation functions.
  * We inline these within pmap.c for speed.
  */
 PMAP_INLINE void
 pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
 {
 
 	if (pmap == kernel_pmap || pmap->pm_active)
 		invlpg(va);
 }
 
 PMAP_INLINE void
 pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
 {
 	vm_offset_t addr;
 
 	if (pmap == kernel_pmap || pmap->pm_active)
 		for (addr = sva; addr < eva; addr += PAGE_SIZE)
 			invlpg(addr);
 }
 
 PMAP_INLINE void
 pmap_invalidate_all(pmap_t pmap)
 {
 
 	if (pmap == kernel_pmap || pmap->pm_active)
 		invltlb();
 }
 #endif /* !SMP */
 #endif /* !I386_CPU */
 
 /*
  * Are we current address space or kernel?
  */
 static __inline int
 pmap_is_current(pmap_t pmap)
 {
 	return (pmap == kernel_pmap ||
 	    (pmap->pm_pdir[PTDPTDI] & PG_FRAME) == (PTDpde[0] & PG_FRAME));
 }
 
 /*
  * Super fast pmap_pte routine best used when scanning
  * the pv lists.  This eliminates many coarse-grained
  * invltlb calls.  Note that many of the pv list
  * scans are across different pmaps.  It is very wasteful
  * to do an entire invltlb for checking a single mapping.
  */
 pt_entry_t * 
 pmap_pte_quick(pmap, va)
 	register pmap_t pmap;
 	vm_offset_t va;
 {
 	pd_entry_t newpf;
 	pd_entry_t *pde;
 
 	pde = pmap_pde(pmap, va);
 	if (*pde & PG_PS)
 		return (pde);
 	if (*pde != 0) {
 		/* are we current address space or kernel? */
 		if (pmap_is_current(pmap))
 			return vtopte(va);
 		newpf = *pde & PG_FRAME;
 		if (((*PMAP1) & PG_FRAME) != newpf) {
 			*PMAP1 = newpf | PG_RW | PG_V;
 			pmap_invalidate_page(kernel_pmap, (vm_offset_t)PADDR1);
 		}
 		return PADDR1 + (i386_btop(va) & (NPTEPG - 1));
 	}
 	return (0);
 }
 
 /*
  *	Routine:	pmap_extract
  *	Function:
  *		Extract the physical page address associated
  *		with the given map/virtual_address pair.
  */
 vm_paddr_t 
 pmap_extract(pmap, va)
 	register pmap_t pmap;
 	vm_offset_t va;
 {
 	vm_paddr_t rtval;
 	pt_entry_t *pte;
 	pd_entry_t pde;
 
 	if (pmap == 0)
 		return 0;
 	pde = pmap->pm_pdir[va >> PDRSHIFT];
 	if (pde != 0) {
 		if ((pde & PG_PS) != 0) {
 			rtval = (pde & ~PDRMASK) | (va & PDRMASK);
 			return rtval;
 		}
 		pte = pmap_pte_quick(pmap, va);
 		rtval = ((*pte & PG_FRAME) | (va & PAGE_MASK));
 		return rtval;
 	}
 	return 0;
 
 }
 
 /***************************************************
  * Low level mapping routines.....
  ***************************************************/
 
 /*
  * Add a wired page to the kva.
  * Note: not SMP coherent.
  */
 PMAP_INLINE void 
 pmap_kenter(vm_offset_t va, vm_paddr_t pa)
 {
 	pt_entry_t *pte;
 
 	pte = vtopte(va);
 	*pte = pa | PG_RW | PG_V | pgeflag;
 }
 
 /*
  * Remove a page from the kernel pagetables.
  * Note: not SMP coherent.
  */
 PMAP_INLINE void
 pmap_kremove(vm_offset_t va)
 {
 	pt_entry_t *pte;
 
 	pte = vtopte(va);
 	*pte = 0;
 }
 
 /*
  *	Used to map a range of physical addresses into kernel
  *	virtual address space.
  *
  *	The value passed in '*virt' is a suggested virtual address for
  *	the mapping. Architectures which can support a direct-mapped
  *	physical to virtual region can return the appropriate address
  *	within that region, leaving '*virt' unchanged. Other
  *	architectures should map the pages starting at '*virt' and
  *	update '*virt' with the first usable address after the mapped
  *	region.
  */
 vm_offset_t
 pmap_map(vm_offset_t *virt, vm_paddr_t start, vm_paddr_t end, int prot)
 {
 	vm_offset_t va, sva;
 
 	va = sva = *virt;
 	while (start < end) {
 		pmap_kenter(va, start);
 		va += PAGE_SIZE;
 		start += PAGE_SIZE;
 	}
 	pmap_invalidate_range(kernel_pmap, sva, va);
 	*virt = va;
 	return (sva);
 }
 
 
 /*
  * Add a list of wired pages to the kva
  * this routine is only used for temporary
  * kernel mappings that do not need to have
  * page modification or references recorded.
  * Note that old mappings are simply written
  * over.  The page *must* be wired.
  * Note: SMP coherent.  Uses a ranged shootdown IPI.
  */
 void
 pmap_qenter(vm_offset_t sva, vm_page_t *m, int count)
 {
 	vm_offset_t va;
 
 	va = sva;
 	while (count-- > 0) {
 		pmap_kenter(va, VM_PAGE_TO_PHYS(*m));
 		va += PAGE_SIZE;
 		m++;
 	}
 	pmap_invalidate_range(kernel_pmap, sva, va);
 }
 
 /*
  * This routine tears out page mappings from the
  * kernel -- it is meant only for temporary mappings.
  * Note: SMP coherent.  Uses a ranged shootdown IPI.
  */
 void
 pmap_qremove(vm_offset_t sva, int count)
 {
 	vm_offset_t va;
 
 	va = sva;
 	while (count-- > 0) {
 		pmap_kremove(va);
 		va += PAGE_SIZE;
 	}
 	pmap_invalidate_range(kernel_pmap, sva, va);
 }
 
 static vm_page_t
 pmap_page_lookup(vm_object_t object, vm_pindex_t pindex)
 {
 	vm_page_t m;
 
 retry:
 	m = vm_page_lookup(object, pindex);
 	if (m != NULL) {
 		vm_page_lock_queues();
 		if (vm_page_sleep_if_busy(m, FALSE, "pplookp"))
 			goto retry;
 		vm_page_unlock_queues();
 	}
 	return m;
 }
 
 #ifndef KSTACK_MAX_PAGES
 #define KSTACK_MAX_PAGES 32
 #endif
 
 /*
  * Create the kernel stack (including pcb for i386) for a new thread.
  * This routine directly affects the fork perf for a process and
  * create performance for a thread.
  */
 void
 pmap_new_thread(struct thread *td, int pages)
 {
 	int i;
 	vm_page_t ma[KSTACK_MAX_PAGES];
 	vm_object_t ksobj;
 	vm_page_t m;
 	vm_offset_t ks;
 
 	/* Bounds check */
 	if (pages <= 1)
 		pages = KSTACK_PAGES;
 	else if (pages > KSTACK_MAX_PAGES)
 		pages = KSTACK_MAX_PAGES;
 
 	/*
 	 * allocate object for the kstack
 	 */
 	ksobj = vm_object_allocate(OBJT_DEFAULT, pages);
 	td->td_kstack_obj = ksobj;
 
 	/* get a kernel virtual address for the kstack for this thread */
 #ifdef KSTACK_GUARD
 	ks = kmem_alloc_nofault(kernel_map, (pages + 1) * PAGE_SIZE);
 	if (ks == 0)
 		panic("pmap_new_thread: kstack allocation failed");
 	if (*vtopte(ks) != 0)
 		pmap_qremove(ks, 1);
 	ks += PAGE_SIZE;
 	td->td_kstack = ks;
 #else
 	/* get a kernel virtual address for the kstack for this thread */
 	ks = kmem_alloc_nofault(kernel_map, pages * PAGE_SIZE);
 	if (ks == 0)
 		panic("pmap_new_thread: kstack allocation failed");
 	td->td_kstack = ks;
 #endif
 	/*
 	 * Knowing the number of pages allocated is useful when you
 	 * want to deallocate them.
 	 */
 	td->td_kstack_pages = pages;
 
 	/* 
 	 * For the length of the stack, link in a real page of ram for each
 	 * page of stack.
 	 */
 	for (i = 0; i < pages; i++) {
 		/*
 		 * Get a kernel stack page
 		 */
 		m = vm_page_grab(ksobj, i,
 		    VM_ALLOC_NORMAL | VM_ALLOC_RETRY | VM_ALLOC_WIRED);
 		ma[i] = m;
 
 		vm_page_lock_queues();
 		vm_page_wakeup(m);
 		vm_page_flag_clear(m, PG_ZERO);
 		m->valid = VM_PAGE_BITS_ALL;
 		vm_page_unlock_queues();
 	}
 	pmap_qenter(ks, ma, pages);
 }
 
 /*
  * Dispose the kernel stack for a thread that has exited.
  * This routine directly impacts the exit perf of a process and thread.
  */
 void
 pmap_dispose_thread(td)
 	struct thread *td;
 {
 	int i;
 	int pages;
 	vm_object_t ksobj;
 	vm_offset_t ks;
 	vm_page_t m;
 
 	pages = td->td_kstack_pages;
 	ksobj = td->td_kstack_obj;
 	ks = td->td_kstack;
 	pmap_qremove(ks, pages);
 	for (i = 0; i < pages; i++) {
 		m = vm_page_lookup(ksobj, i);
 		if (m == NULL)
 			panic("pmap_dispose_thread: kstack already missing?");
 		vm_page_lock_queues();
 		vm_page_busy(m);
 		vm_page_unwire(m, 0);
 		vm_page_free(m);
 		vm_page_unlock_queues();
 	}
 	/*
 	 * Free the space that this stack was mapped to in the kernel
 	 * address map.
 	 */
 #ifdef KSTACK_GUARD
 	kmem_free(kernel_map, ks - PAGE_SIZE, (pages + 1) * PAGE_SIZE);
 #else
 	kmem_free(kernel_map, ks, pages * PAGE_SIZE);
 #endif
 	vm_object_deallocate(ksobj);
 }
 
 /*
  * Set up a variable sized alternate kstack.  Though it may look MI, it may
  * need to be different on certain arches like ia64.
  */
 void
 pmap_new_altkstack(struct thread *td, int pages)
 {
 	/* shuffle the original stack */
 	td->td_altkstack_obj = td->td_kstack_obj;
 	td->td_altkstack = td->td_kstack;
 	td->td_altkstack_pages = td->td_kstack_pages;
 
 	pmap_new_thread(td, pages);
 }
 
 void
 pmap_dispose_altkstack(td)
 	struct thread *td;
 {
 	pmap_dispose_thread(td);
 
 	/* restore the original kstack */
 	td->td_kstack = td->td_altkstack;
 	td->td_kstack_obj = td->td_altkstack_obj;
 	td->td_kstack_pages = td->td_altkstack_pages;
 	td->td_altkstack = 0;
 	td->td_altkstack_obj = NULL;
 	td->td_altkstack_pages = 0;
 }
 
 /*
  * Allow the Kernel stack for a thread to be prejudicially paged out.
  */
 void
 pmap_swapout_thread(td)
 	struct thread *td;
 {
 	int i;
 	int pages;
 	vm_object_t ksobj;
 	vm_offset_t ks;
 	vm_page_t m;
 
 	pages = td->td_kstack_pages;
 	ksobj = td->td_kstack_obj;
 	ks = td->td_kstack;
 	pmap_qremove(ks, pages);
 	for (i = 0; i < pages; i++) {
 		m = vm_page_lookup(ksobj, i);
 		if (m == NULL)
 			panic("pmap_swapout_thread: kstack already missing?");
 		vm_page_lock_queues();
 		vm_page_dirty(m);
 		vm_page_unwire(m, 0);
 		vm_page_unlock_queues();
 	}
 }
 
 /*
  * Bring the kernel stack for a specified thread back in.
  */
 void
 pmap_swapin_thread(td)
 	struct thread *td;
 {
 	int i, rv;
 	int pages;
 	vm_page_t ma[KSTACK_MAX_PAGES];
 	vm_object_t ksobj;
 	vm_offset_t ks;
 	vm_page_t m;
 
 	pages = td->td_kstack_pages;
 	ksobj = td->td_kstack_obj;
 	ks = td->td_kstack;
 	for (i = 0; i < pages; i++) {
 		m = vm_page_grab(ksobj, i, VM_ALLOC_NORMAL | VM_ALLOC_RETRY);
 		if (m->valid != VM_PAGE_BITS_ALL) {
 			rv = vm_pager_get_pages(ksobj, &m, 1, 0);
 			if (rv != VM_PAGER_OK)
 				panic("pmap_swapin_thread: cannot get kstack for proc: %d\n", td->td_proc->p_pid);
 			m = vm_page_lookup(ksobj, i);
 			m->valid = VM_PAGE_BITS_ALL;
 		}
 		ma[i] = m;
 		vm_page_lock_queues();
 		vm_page_wire(m);
 		vm_page_wakeup(m);
 		vm_page_unlock_queues();
 	}
 	pmap_qenter(ks, ma, pages);
 }
 
 /***************************************************
  * Page table page management routines.....
  ***************************************************/
 
 /*
  * This routine unholds page table pages, and if the hold count
  * drops to zero, then it decrements the wire count.
  */
 static int 
 _pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m)
 {
 
 	while (vm_page_sleep_if_busy(m, FALSE, "pmuwpt"))
 		vm_page_lock_queues();
 
 	if (m->hold_count == 0) {
 		vm_offset_t pteva;
 		/*
 		 * unmap the page table page
 		 */
 		pmap->pm_pdir[m->pindex] = 0;
 		--pmap->pm_stats.resident_count;
 		if (pmap_is_current(pmap)) {
 			/*
 			 * Do an invltlb to make the invalidated mapping
 			 * take effect immediately.
 			 */
 			pteva = VM_MAXUSER_ADDRESS + i386_ptob(m->pindex);
 			pmap_invalidate_page(pmap, pteva);
 		}
 
 		/*
 		 * If the page is finally unwired, simply free it.
 		 */
 		--m->wire_count;
 		if (m->wire_count == 0) {
 			vm_page_busy(m);
 			vm_page_free_zero(m);
 			atomic_subtract_int(&cnt.v_wire_count, 1);
 		}
 		return 1;
 	}
 	return 0;
 }
 
 static PMAP_INLINE int
 pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m)
 {
 	vm_page_unhold(m);
 	if (m->hold_count == 0)
 		return _pmap_unwire_pte_hold(pmap, m);
 	else
 		return 0;
 }
 
 /*
  * After removing a page table entry, this routine is used to
  * conditionally free the page, and manage the hold/wire counts.
  */
 static int
 pmap_unuse_pt(pmap_t pmap, vm_offset_t va, vm_page_t mpte)
 {
 	unsigned ptepindex;
 	if (va >= VM_MAXUSER_ADDRESS)
 		return 0;
 
 	if (mpte == NULL) {
 		ptepindex = (va >> PDRSHIFT);
 		if (pmap->pm_pteobj->root &&
 			(pmap->pm_pteobj->root->pindex == ptepindex)) {
 			mpte = pmap->pm_pteobj->root;
 		} else {
 			while ((mpte = vm_page_lookup(pmap->pm_pteobj, ptepindex)) != NULL &&
 			       vm_page_sleep_if_busy(mpte, FALSE, "pulook"))
 				vm_page_lock_queues();
 		}
 	}
 
 	return pmap_unwire_pte_hold(pmap, mpte);
 }
 
 void
 pmap_pinit0(pmap)
 	struct pmap *pmap;
 {
 
 	pmap->pm_pdir = (pd_entry_t *)(KERNBASE + (vm_offset_t)IdlePTD);
+#ifdef PAE
+	pmap->pm_pdpt = (pdpt_entry_t *)(KERNBASE + (vm_offset_t)IdlePDPT);
+#endif
 	pmap->pm_active = 0;
 	TAILQ_INIT(&pmap->pm_pvlist);
 	bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
 	mtx_lock_spin(&allpmaps_lock);
 	LIST_INSERT_HEAD(&allpmaps, pmap, pm_list);
 	mtx_unlock_spin(&allpmaps_lock);
 }
 
 /*
  * Initialize a preallocated and zeroed pmap structure,
  * such as one in a vmspace structure.
  */
 void
 pmap_pinit(pmap)
 	register struct pmap *pmap;
 {
 	vm_page_t ptdpg[NPGPTD];
 	vm_paddr_t pa;
 	int i;
 
 	/*
 	 * No need to allocate page table space yet but we do need a valid
 	 * page directory table.
 	 */
-	if (pmap->pm_pdir == NULL)
+	if (pmap->pm_pdir == NULL) {
 		pmap->pm_pdir = (pd_entry_t *)kmem_alloc_pageable(kernel_map,
 		    NBPTD);
+#ifdef PAE
+		pmap->pm_pdpt = uma_zalloc(pdptzone, M_WAITOK | M_ZERO);
+		KASSERT(((vm_offset_t)pmap->pm_pdpt &
+		    ((NPGPTD * sizeof(pdpt_entry_t)) - 1)) == 0,
+		    ("pmap_pinit: pdpt misaligned"));
+		KASSERT(pmap_kextract((vm_offset_t)pmap->pm_pdpt) < (4ULL<<30),
+		    ("pmap_pinit: pdpt above 4g"));
+#endif
+	}
 
 	/*
 	 * allocate object for the ptes
 	 */
 	if (pmap->pm_pteobj == NULL)
 		pmap->pm_pteobj = vm_object_allocate(OBJT_DEFAULT, PTDPTDI +
 		    NPGPTD);
 
 	/*
 	 * allocate the page directory page(s)
 	 */
 	for (i = 0; i < NPGPTD; i++) {
 		ptdpg[i] = vm_page_grab(pmap->pm_pteobj, PTDPTDI + i,
 		    VM_ALLOC_NORMAL | VM_ALLOC_RETRY | VM_ALLOC_WIRED |
 		    VM_ALLOC_ZERO);
 		vm_page_lock_queues();
 		vm_page_flag_clear(ptdpg[i], PG_BUSY);
 		ptdpg[i]->valid = VM_PAGE_BITS_ALL;
 		vm_page_unlock_queues();
 	}
 
 	pmap_qenter((vm_offset_t)pmap->pm_pdir, ptdpg, NPGPTD);
 
 	for (i = 0; i < NPGPTD; i++) {
 		if ((ptdpg[i]->flags & PG_ZERO) == 0)
 			bzero(pmap->pm_pdir + (i * NPDEPG), PAGE_SIZE);
 	}
 
 	mtx_lock_spin(&allpmaps_lock);
 	LIST_INSERT_HEAD(&allpmaps, pmap, pm_list);
 	mtx_unlock_spin(&allpmaps_lock);
 	/* Wire in kernel global address entries. */
 	/* XXX copies current process, does not fill in MPPTDI */
 	bcopy(PTD + KPTDI, pmap->pm_pdir + KPTDI, nkpt * sizeof(pd_entry_t));
 #ifdef SMP
 	pmap->pm_pdir[MPPTDI] = PTD[MPPTDI];
 #endif
 
 	/* install self-referential address mapping entry(s) */
 	for (i = 0; i < NPGPTD; i++) {
 		pa = VM_PAGE_TO_PHYS(ptdpg[i]);
 		pmap->pm_pdir[PTDPTDI + i] = pa | PG_V | PG_RW | PG_A | PG_M;
+#ifdef PAE
+		pmap->pm_pdpt[i] = pa | PG_V;
+#endif
 	}
 
 	pmap->pm_active = 0;
 	TAILQ_INIT(&pmap->pm_pvlist);
 	bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
 }
 
 /*
  * Wire in kernel global address entries.  To avoid a race condition
  * between pmap initialization and pmap_growkernel, this procedure
  * should be called after the vmspace is attached to the process
  * but before this pmap is activated.
  */
 void
 pmap_pinit2(pmap)
 	struct pmap *pmap;
 {
 	/* XXX: Remove this stub when no longer called */
 }
 
 /*
  * this routine is called if the page table page is not
  * mapped correctly.
  */
 static vm_page_t
 _pmap_allocpte(pmap, ptepindex)
 	pmap_t	pmap;
 	unsigned ptepindex;
 {
 	vm_paddr_t ptepa;
 	vm_offset_t pteva;
 	vm_page_t m;
 
 	/*
 	 * Find or fabricate a new pagetable page
 	 */
 	m = vm_page_grab(pmap->pm_pteobj, ptepindex,
 	    VM_ALLOC_WIRED | VM_ALLOC_ZERO | VM_ALLOC_RETRY);
 
 	KASSERT(m->queue == PQ_NONE,
 		("_pmap_allocpte: %p->queue != PQ_NONE", m));
 
 	/*
 	 * Increment the hold count for the page table page
 	 * (denoting a new mapping.)
 	 */
 	m->hold_count++;
 
 	/*
 	 * Map the pagetable page into the process address space, if
 	 * it isn't already there.
 	 */
 
 	pmap->pm_stats.resident_count++;
 
 	ptepa = VM_PAGE_TO_PHYS(m);
 	pmap->pm_pdir[ptepindex] =
 		(pd_entry_t) (ptepa | PG_U | PG_RW | PG_V | PG_A | PG_M);
 
 	/*
 	 * Try to use the new mapping, but if we cannot, then
 	 * do it with the routine that maps the page explicitly.
 	 */
 	if ((m->flags & PG_ZERO) == 0) {
 		if (pmap_is_current(pmap)) {
 			pteva = VM_MAXUSER_ADDRESS + i386_ptob(ptepindex);
 			bzero((caddr_t) pteva, PAGE_SIZE);
 		} else {
 			pmap_zero_page(m);
 		}
 	}
 	vm_page_lock_queues();
 	m->valid = VM_PAGE_BITS_ALL;
 	vm_page_flag_clear(m, PG_ZERO);
 	vm_page_wakeup(m);
 	vm_page_unlock_queues();
 
 	return m;
 }
 
 static vm_page_t
 pmap_allocpte(pmap_t pmap, vm_offset_t va)
 {
 	unsigned ptepindex;
 	pd_entry_t ptepa;
 	vm_page_t m;
 
 	/*
 	 * Calculate pagetable page index
 	 */
 	ptepindex = va >> PDRSHIFT;
 
 	/*
 	 * Get the page directory entry
 	 */
 	ptepa = (vm_offset_t) pmap->pm_pdir[ptepindex];
 
 	/*
 	 * This supports switching from a 4MB page to a
 	 * normal 4K page.
 	 */
 	if (ptepa & PG_PS) {
 		pmap->pm_pdir[ptepindex] = 0;
 		ptepa = 0;
 		pmap_invalidate_all(kernel_pmap);
 	}
 
 	/*
 	 * If the page table page is mapped, we just increment the
 	 * hold count, and activate it.
 	 */
 	if (ptepa) {
 		/*
 		 * In order to get the page table page, try the
 		 * hint first.
 		 */
 		if (pmap->pm_pteobj->root &&
 			(pmap->pm_pteobj->root->pindex == ptepindex)) {
 			m = pmap->pm_pteobj->root;
 		} else {
 			m = pmap_page_lookup(pmap->pm_pteobj, ptepindex);
 		}
 		m->hold_count++;
 		return m;
 	}
 	/*
 	 * Here if the pte page isn't mapped, or if it has been deallocated.
 	 */
 	return _pmap_allocpte(pmap, ptepindex);
 }
 
 
 /***************************************************
 * Pmap allocation/deallocation routines.
  ***************************************************/
 
 /*
  * Release any resources held by the given physical map.
  * Called when a pmap initialized by pmap_pinit is being released.
  * Should only be called if the map contains no valid mappings.
  */
 void
 pmap_release(pmap_t pmap)
 {
 	vm_object_t object;
 	vm_page_t m;
 	int i;
 
 	object = pmap->pm_pteobj;
 
 	KASSERT(object->ref_count == 1,
 	    ("pmap_release: pteobj reference count %d != 1",
 	    object->ref_count));
 	KASSERT(pmap->pm_stats.resident_count == 0,
 	    ("pmap_release: pmap resident count %ld != 0",
 	    pmap->pm_stats.resident_count));
 
 	mtx_lock_spin(&allpmaps_lock);
 	LIST_REMOVE(pmap, pm_list);
 	mtx_unlock_spin(&allpmaps_lock);
 
 	bzero(pmap->pm_pdir + KPTDI, nkpt * sizeof(*pmap->pm_pdir));
 	for (i = 0; i < NPGPTD; i++) {
 		pmap->pm_pdir[PTDPTDI + i] = 0;
 		pmap->pm_pdir[APTDPTDI + i] = 0;
 	}
 #ifdef SMP
 	pmap->pm_pdir[MPPTDI] = 0;
 #endif
 
 	pmap_qremove((vm_offset_t)pmap->pm_pdir, NPGPTD);
 
 	vm_page_lock_queues();
 	for (i = 0; i < NPGPTD; i++) {
 		m = TAILQ_FIRST(&object->memq);
+#ifdef PAE
+		KASSERT(VM_PAGE_TO_PHYS(m) == (pmap->pm_pdpt[i] & PG_FRAME),
+		    ("pmap_release: got wrong ptd page"));
+#endif
 		m->wire_count--;
 		atomic_subtract_int(&cnt.v_wire_count, 1);
 		vm_page_busy(m);
 		vm_page_free_zero(m);
 	}
 	KASSERT(TAILQ_EMPTY(&object->memq),
 	    ("pmap_release: leaking page table pages"));
 	vm_page_unlock_queues();
 }
 
 static int
 kvm_size(SYSCTL_HANDLER_ARGS)
 {
 	unsigned long ksize = VM_MAX_KERNEL_ADDRESS - KERNBASE;
 
 	return sysctl_handle_long(oidp, &ksize, 0, req);
 }
 SYSCTL_PROC(_vm, OID_AUTO, kvm_size, CTLTYPE_LONG|CTLFLAG_RD, 
     0, 0, kvm_size, "IU", "Size of KVM");
 
 static int
 kvm_free(SYSCTL_HANDLER_ARGS)
 {
 	unsigned long kfree = VM_MAX_KERNEL_ADDRESS - kernel_vm_end;
 
 	return sysctl_handle_long(oidp, &kfree, 0, req);
 }
 SYSCTL_PROC(_vm, OID_AUTO, kvm_free, CTLTYPE_LONG|CTLFLAG_RD, 
     0, 0, kvm_free, "IU", "Amount of KVM free");
 
 /*
  * grow the number of kernel page table entries, if needed
  */
 void
 pmap_growkernel(vm_offset_t addr)
 {
 	struct pmap *pmap;
 	int s;
 	vm_paddr_t ptppaddr;
 	vm_page_t nkpg;
 	pd_entry_t newpdir;
 
 	s = splhigh();
 	mtx_assert(&kernel_map->system_mtx, MA_OWNED);
 	if (kernel_vm_end == 0) {
 		kernel_vm_end = KERNBASE;
 		nkpt = 0;
 		while (pdir_pde(PTD, kernel_vm_end)) {
 			kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1);
 			nkpt++;
 		}
 	}
 	addr = roundup2(addr, PAGE_SIZE * NPTEPG);
 	while (kernel_vm_end < addr) {
 		if (pdir_pde(PTD, kernel_vm_end)) {
 			kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1);
 			continue;
 		}
 
 		/*
 		 * This index is bogus, but out of the way
 		 */
 		nkpg = vm_page_alloc(NULL, nkpt,
 		    VM_ALLOC_NOOBJ | VM_ALLOC_SYSTEM | VM_ALLOC_WIRED);
 		if (!nkpg)
 			panic("pmap_growkernel: no memory to grow kernel");
 
 		nkpt++;
 
 		pmap_zero_page(nkpg);
 		ptppaddr = VM_PAGE_TO_PHYS(nkpg);
 		newpdir = (pd_entry_t) (ptppaddr | PG_V | PG_RW | PG_A | PG_M);
 		pdir_pde(PTD, kernel_vm_end) = newpdir;
 
 		mtx_lock_spin(&allpmaps_lock);
 		LIST_FOREACH(pmap, &allpmaps, pm_list) {
 			*pmap_pde(pmap, kernel_vm_end) = newpdir;
 		}
 		mtx_unlock_spin(&allpmaps_lock);
 		kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1);
 	}
 	splx(s);
 }
 
 
 /***************************************************
  * page management routines.
  ***************************************************/
 
 /*
  * free the pv_entry back to the free list
  */
 static PMAP_INLINE void
 free_pv_entry(pv_entry_t pv)
 {
 	pv_entry_count--;
 	uma_zfree(pvzone, pv);
 }
 
 /*
  * get a new pv_entry, allocating a block from the system
  * when needed.
  * the memory allocation is performed bypassing the malloc code
  * because of the possibility of allocations at interrupt time.
  */
 static pv_entry_t
 get_pv_entry(void)
 {
 	pv_entry_count++;
 	if (pv_entry_high_water &&
 		(pv_entry_count > pv_entry_high_water) &&
 		(pmap_pagedaemon_waken == 0)) {
 		pmap_pagedaemon_waken = 1;
 		wakeup (&vm_pages_needed);
 	}
 	return uma_zalloc(pvzone, M_NOWAIT);
 }
 
 /*
  * If it is the first entry on the list, it is actually
  * in the header and we must copy the following entry up
  * to the header.  Otherwise we must search the list for
  * the entry.  In either case we free the now unused entry.
  */
 
 static int
 pmap_remove_entry(pmap_t pmap, vm_page_t m, vm_offset_t va)
 {
 	pv_entry_t pv;
 	int rtval;
 	int s;
 
 	s = splvm();
 	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
 	if (m->md.pv_list_count < pmap->pm_stats.resident_count) {
 		TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
 			if (pmap == pv->pv_pmap && va == pv->pv_va) 
 				break;
 		}
 	} else {
 		TAILQ_FOREACH(pv, &pmap->pm_pvlist, pv_plist) {
 			if (va == pv->pv_va) 
 				break;
 		}
 	}
 
 	rtval = 0;
 	if (pv) {
 		rtval = pmap_unuse_pt(pmap, va, pv->pv_ptem);
 		TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
 		m->md.pv_list_count--;
 		if (TAILQ_FIRST(&m->md.pv_list) == NULL)
 			vm_page_flag_clear(m, PG_WRITEABLE);
 
 		TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist);
 		free_pv_entry(pv);
 	}
 			
 	splx(s);
 	return rtval;
 }
 
 /*
  * Create a pv entry for page at pa for
  * (pmap, va).
  */
 static void
 pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t mpte, vm_page_t m)
 {
 
 	int s;
 	pv_entry_t pv;
 
 	s = splvm();
 	pv = get_pv_entry();
 	pv->pv_va = va;
 	pv->pv_pmap = pmap;
 	pv->pv_ptem = mpte;
 
 	TAILQ_INSERT_TAIL(&pmap->pm_pvlist, pv, pv_plist);
 	TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
 	m->md.pv_list_count++;
 
 	splx(s);
 }
 
 /*
  * pmap_remove_pte: do the things to unmap a page in a process
  */
 static int
 pmap_remove_pte(pmap_t pmap, pt_entry_t *ptq, vm_offset_t va)
 {
 	pt_entry_t oldpte;
 	vm_page_t m;
 
-	oldpte = atomic_readandclear_int(ptq);
+	oldpte = pte_load_clear(ptq);
 	if (oldpte & PG_W)
 		pmap->pm_stats.wired_count -= 1;
 	/*
 	 * Machines that don't support invlpg, also don't support
 	 * PG_G.
 	 */
 	if (oldpte & PG_G)
 		pmap_invalidate_page(kernel_pmap, va);
 	pmap->pm_stats.resident_count -= 1;
 	if (oldpte & PG_MANAGED) {
 		m = PHYS_TO_VM_PAGE(oldpte);
 		if (oldpte & PG_M) {
 #if defined(PMAP_DIAGNOSTIC)
 			if (pmap_nw_modified((pt_entry_t) oldpte)) {
 				printf(
 	"pmap_remove: modified page not writable: va: 0x%x, pte: 0x%x\n",
 				    va, oldpte);
 			}
 #endif
 			if (pmap_track_modified(va))
 				vm_page_dirty(m);
 		}
 		if (oldpte & PG_A)
 			vm_page_flag_set(m, PG_REFERENCED);
 		return pmap_remove_entry(pmap, m, va);
 	} else {
 		return pmap_unuse_pt(pmap, va, NULL);
 	}
 
 	return 0;
 }
 
 /*
  * Remove a single page from a process address space
  */
 static void
 pmap_remove_page(pmap_t pmap, vm_offset_t va)
 {
 	pt_entry_t *pte;
 
 	if ((pte = pmap_pte_quick(pmap, va)) == NULL || *pte == 0)
 		return;
 	pmap_remove_pte(pmap, pte, va);
 	pmap_invalidate_page(pmap, va);
 }
 
 /*
  *	Remove the given range of addresses from the specified map.
  *
  *	It is assumed that the start and end are properly
  *	rounded to the page size.
  */
 void
 pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
 {
 	vm_offset_t pdnxt;
 	pd_entry_t ptpaddr;
 	pt_entry_t *pte;
 	int anyvalid;
 
 	if (pmap == NULL)
 		return;
 
 	if (pmap->pm_stats.resident_count == 0)
 		return;
 
 	/*
 	 * special handling of removing one page.  a very
 	 * common operation and easy to short circuit some
 	 * code.
 	 */
 	if ((sva + PAGE_SIZE == eva) && 
 	    ((pmap->pm_pdir[(sva >> PDRSHIFT)] & PG_PS) == 0)) {
 		pmap_remove_page(pmap, sva);
 		return;
 	}
 
 	anyvalid = 0;
 
 	for (; sva < eva; sva = pdnxt) {
 		unsigned pdirindex;
 
 		/*
 		 * Calculate index for next page table.
 		 */
 		pdnxt = (sva + NBPDR) & ~PDRMASK;
 		if (pmap->pm_stats.resident_count == 0)
 			break;
 
 		pdirindex = sva >> PDRSHIFT;
 		ptpaddr = pmap->pm_pdir[pdirindex];
 
 		/*
 		 * Weed out invalid mappings. Note: we assume that the page
 		 * directory table is always allocated, and in kernel virtual.
 		 */
 		if (ptpaddr == 0)
 			continue;
 
 		/*
 		 * Check for large page.
 		 */
 		if ((ptpaddr & PG_PS) != 0) {
 			pmap->pm_pdir[pdirindex] = 0;
 			pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE;
 			anyvalid = 1;
 			continue;
 		}
 
 		/*
 		 * Limit our scan to either the end of the va represented
 		 * by the current page table page, or to the end of the
 		 * range being removed.
 		 */
 		if (pdnxt > eva)
 			pdnxt = eva;
 
 		for (; sva != pdnxt; sva += PAGE_SIZE) {
 			if ((pte = pmap_pte_quick(pmap, sva)) == NULL ||
 			    *pte == 0)
 				continue;
 			anyvalid = 1;
 			if (pmap_remove_pte(pmap, pte, sva))
 				break;
 		}
 	}
 
 	if (anyvalid)
 		pmap_invalidate_all(pmap);
 }
 
 /*
  *	Routine:	pmap_remove_all
  *	Function:
  *		Removes this physical page from
  *		all physical maps in which it resides.
  *		Reflects back modify bits to the pager.
  *
  *	Notes:
  *		Original versions of this routine were very
  *		inefficient because they iteratively called
  *		pmap_remove (slow...)
  */
 
 void
 pmap_remove_all(vm_page_t m)
 {
 	register pv_entry_t pv;
 	pt_entry_t *pte, tpte;
 	int s;
 
 #if defined(PMAP_DIAGNOSTIC)
 	/*
 	 * XXX This makes pmap_remove_all() illegal for non-managed pages!
 	 */
 	if (!pmap_initialized || (m->flags & PG_FICTITIOUS)) {
 		panic("pmap_remove_all: illegal for unmanaged page, va: 0x%x",
 		    VM_PAGE_TO_PHYS(m));
 	}
 #endif
 	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
 	s = splvm();
 	while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) {
 		pv->pv_pmap->pm_stats.resident_count--;
 		pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va);
-		tpte = atomic_readandclear_int(pte);
+		tpte = pte_load_clear(pte);
 		if (tpte & PG_W)
 			pv->pv_pmap->pm_stats.wired_count--;
 		if (tpte & PG_A)
 			vm_page_flag_set(m, PG_REFERENCED);
 
 		/*
 		 * Update the vm_page_t clean and reference bits.
 		 */
 		if (tpte & PG_M) {
 #if defined(PMAP_DIAGNOSTIC)
 			if (pmap_nw_modified((pt_entry_t) tpte)) {
 				printf(
 	"pmap_remove_all: modified page not writable: va: 0x%x, pte: 0x%x\n",
 				    pv->pv_va, tpte);
 			}
 #endif
 			if (pmap_track_modified(pv->pv_va))
 				vm_page_dirty(m);
 		}
 		pmap_invalidate_page(pv->pv_pmap, pv->pv_va);
 		TAILQ_REMOVE(&pv->pv_pmap->pm_pvlist, pv, pv_plist);
 		TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
 		m->md.pv_list_count--;
 		pmap_unuse_pt(pv->pv_pmap, pv->pv_va, pv->pv_ptem);
 		free_pv_entry(pv);
 	}
 	vm_page_flag_clear(m, PG_WRITEABLE);
 	splx(s);
 }
 
 /*
  *	Set the physical protection on the
  *	specified range of this map as requested.
  */
 void
 pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot)
 {
 	vm_offset_t pdnxt;
 	pd_entry_t ptpaddr;
 	int anychanged;
 
 	if (pmap == NULL)
 		return;
 
 	if ((prot & VM_PROT_READ) == VM_PROT_NONE) {
 		pmap_remove(pmap, sva, eva);
 		return;
 	}
 
 	if (prot & VM_PROT_WRITE)
 		return;
 
 	anychanged = 0;
 
 	for (; sva < eva; sva = pdnxt) {
 		unsigned pdirindex;
 
 		pdnxt = (sva + NBPDR) & ~PDRMASK;
 
 		pdirindex = sva >> PDRSHIFT;
 		ptpaddr = pmap->pm_pdir[pdirindex];
 
 		/*
 		 * Weed out invalid mappings. Note: we assume that the page
 		 * directory table is always allocated, and in kernel virtual.
 		 */
 		if (ptpaddr == 0)
 			continue;
 
 		/*
 		 * Check for large page.
 		 */
 		if ((ptpaddr & PG_PS) != 0) {
 			pmap->pm_pdir[pdirindex] &= ~(PG_M|PG_RW);
 			pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE;
 			anychanged = 1;
 			continue;
 		}
 
 		if (pdnxt > eva)
 			pdnxt = eva;
 
 		for (; sva != pdnxt; sva += PAGE_SIZE) {
 			pt_entry_t pbits;
 			pt_entry_t *pte;
 			vm_page_t m;
 
 			if ((pte = pmap_pte_quick(pmap, sva)) == NULL)
 				continue;
 			pbits = *pte;
 			if (pbits & PG_MANAGED) {
 				m = NULL;
 				if (pbits & PG_A) {
 					m = PHYS_TO_VM_PAGE(pbits);
 					vm_page_flag_set(m, PG_REFERENCED);
 					pbits &= ~PG_A;
 				}
 				if ((pbits & PG_M) != 0 &&
 				    pmap_track_modified(sva)) {
 					if (m == NULL)
 						m = PHYS_TO_VM_PAGE(pbits);
 					vm_page_dirty(m);
 					pbits &= ~PG_M;
 				}
 			}
 
 			pbits &= ~PG_RW;
 
 			if (pbits != *pte) {
 				*pte = pbits;
 				anychanged = 1;
 			}
 		}
 	}
 	if (anychanged)
 		pmap_invalidate_all(pmap);
 }
 
 /*
  *	Insert the given physical page (p) at
  *	the specified virtual address (v) in the
  *	target physical map with the protection requested.
  *
  *	If specified, the page will be wired down, meaning
  *	that the related pte can not be reclaimed.
  *
  *	NB:  This is the only routine which MAY NOT lazy-evaluate
  *	or lose information.  That is, this routine must actually
  *	insert this page into the given map NOW.
  */
 void
 pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
 	   boolean_t wired)
 {
 	vm_paddr_t pa;
 	register pt_entry_t *pte;
 	vm_paddr_t opa;
 	pt_entry_t origpte, newpte;
 	vm_page_t mpte;
 
 	if (pmap == NULL)
 		return;
 
 	va &= PG_FRAME;
 #ifdef PMAP_DIAGNOSTIC
 	if (va > VM_MAX_KERNEL_ADDRESS)
 		panic("pmap_enter: toobig");
 	if ((va >= UPT_MIN_ADDRESS) && (va < UPT_MAX_ADDRESS))
 		panic("pmap_enter: invalid to pmap_enter page table pages (va: 0x%x)", va);
 #endif
 
 	mpte = NULL;
 	/*
 	 * In the case that a page table page is not
 	 * resident, we are creating it here.
 	 */
 	if (va < VM_MAXUSER_ADDRESS) {
 		mpte = pmap_allocpte(pmap, va);
 	}
 #if 0 && defined(PMAP_DIAGNOSTIC)
 	else {
 		pd_entry_t *pdeaddr = pmap_pde(pmap, va);
 		origpte = *pdeaddr;
 		if ((origpte & PG_V) == 0) { 
 			panic("pmap_enter: invalid kernel page table page, pdir=%p, pde=%p, va=%p\n",
 				pmap->pm_pdir[PTDPTDI], origpte, va);
 		}
 	}
 #endif
 
 	pte = pmap_pte_quick(pmap, va);
 
 	/*
 	 * Page Directory table entry not valid, we need a new PT page
 	 */
 	if (pte == NULL) {
 		panic("pmap_enter: invalid page directory pdir=%#jx, va=%#x\n",
 			(uintmax_t)pmap->pm_pdir[PTDPTDI], va);
 	}
 
 	pa = VM_PAGE_TO_PHYS(m) & PG_FRAME;
 	origpte = *pte;
 	opa = origpte & PG_FRAME;
 
 	if (origpte & PG_PS)
 		panic("pmap_enter: attempted pmap_enter on 4MB page");
 
 	/*
 	 * Mapping has not changed, must be protection or wiring change.
 	 */
 	if (origpte && (opa == pa)) {
 		/*
 		 * Wiring change, just update stats. We don't worry about
 		 * wiring PT pages as they remain resident as long as there
 		 * are valid mappings in them. Hence, if a user page is wired,
 		 * the PT page will be also.
 		 */
 		if (wired && ((origpte & PG_W) == 0))
 			pmap->pm_stats.wired_count++;
 		else if (!wired && (origpte & PG_W))
 			pmap->pm_stats.wired_count--;
 
 #if defined(PMAP_DIAGNOSTIC)
 		if (pmap_nw_modified((pt_entry_t) origpte)) {
 			printf(
 	"pmap_enter: modified page not writable: va: 0x%x, pte: 0x%x\n",
 			    va, origpte);
 		}
 #endif
 
 		/*
 		 * Remove extra pte reference
 		 */
 		if (mpte)
 			mpte->hold_count--;
 
 		if ((prot & VM_PROT_WRITE) && (origpte & PG_V)) {
 			if ((origpte & PG_RW) == 0) {
 				*pte |= PG_RW;
 				pmap_invalidate_page(pmap, va);
 			}
 			return;
 		}
 
 		/*
 		 * We might be turning off write access to the page,
 		 * so we go ahead and sense modify status.
 		 */
 		if (origpte & PG_MANAGED) {
 			if ((origpte & PG_M) && pmap_track_modified(va)) {
 				vm_page_t om;
 				om = PHYS_TO_VM_PAGE(opa);
 				vm_page_dirty(om);
 			}
 			pa |= PG_MANAGED;
 		}
 		goto validate;
 	} 
 	/*
 	 * Mapping has changed, invalidate old range and fall through to
 	 * handle validating new mapping.
 	 */
 	if (opa) {
 		int err;
 		vm_page_lock_queues();
 		err = pmap_remove_pte(pmap, pte, va);
 		vm_page_unlock_queues();
 		if (err)
 			panic("pmap_enter: pte vanished, va: 0x%x", va);
 	}
 
 	/*
 	 * Enter on the PV list if part of our managed memory. Note that we
 	 * raise IPL while manipulating pv_table since pmap_enter can be
 	 * called at interrupt time.
 	 */
 	if (pmap_initialized && 
 	    (m->flags & (PG_FICTITIOUS|PG_UNMANAGED)) == 0) {
 		pmap_insert_entry(pmap, va, mpte, m);
 		pa |= PG_MANAGED;
 	}
 
 	/*
 	 * Increment counters
 	 */
 	pmap->pm_stats.resident_count++;
 	if (wired)
 		pmap->pm_stats.wired_count++;
 
 validate:
 	/*
 	 * Now validate mapping with desired protection/wiring.
 	 */
 	newpte = (pt_entry_t)(pa | pte_prot(pmap, prot) | PG_V);
 
 	if (wired)
 		newpte |= PG_W;
 	if (va < VM_MAXUSER_ADDRESS)
 		newpte |= PG_U;
 	if (pmap == kernel_pmap)
 		newpte |= pgeflag;
 
 	/*
 	 * if the mapping or permission bits are different, we need
 	 * to update the pte.
 	 */
 	if ((origpte & ~(PG_M|PG_A)) != newpte) {
 		*pte = newpte | PG_A;
 		/*if (origpte)*/ {
 			pmap_invalidate_page(pmap, va);
 		}
 	}
 }
 
 /*
  * this code makes some *MAJOR* assumptions:
  * 1. Current pmap & pmap exists.
  * 2. Not wired.
  * 3. Read access.
  * 4. No page table pages.
  * 5. Tlbflush is deferred to calling procedure.
  * 6. Page IS managed.
  * but is *MUCH* faster than pmap_enter...
  */
 
 static vm_page_t
 pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_page_t mpte)
 {
 	pt_entry_t *pte;
 	vm_paddr_t pa;
 
 	/*
 	 * In the case that a page table page is not
 	 * resident, we are creating it here.
 	 */
 	if (va < VM_MAXUSER_ADDRESS) {
 		unsigned ptepindex;
 		pd_entry_t ptepa;
 
 		/*
 		 * Calculate pagetable page index
 		 */
 		ptepindex = va >> PDRSHIFT;
 		if (mpte && (mpte->pindex == ptepindex)) {
 			mpte->hold_count++;
 		} else {
 retry:
 			/*
 			 * Get the page directory entry
 			 */
 			ptepa = pmap->pm_pdir[ptepindex];
 
 			/*
 			 * If the page table page is mapped, we just increment
 			 * the hold count, and activate it.
 			 */
 			if (ptepa) {
 				if (ptepa & PG_PS)
 					panic("pmap_enter_quick: unexpected mapping into 4MB page");
 				if (pmap->pm_pteobj->root &&
 					(pmap->pm_pteobj->root->pindex == ptepindex)) {
 					mpte = pmap->pm_pteobj->root;
 				} else {
 					mpte = pmap_page_lookup(pmap->pm_pteobj, ptepindex);
 				}
 				if (mpte == NULL)
 					goto retry;
 				mpte->hold_count++;
 			} else {
 				mpte = _pmap_allocpte(pmap, ptepindex);
 			}
 		}
 	} else {
 		mpte = NULL;
 	}
 
 	/*
 	 * This call to vtopte makes the assumption that we are
 	 * entering the page into the current pmap.  In order to support
 	 * quick entry into any pmap, one would likely use pmap_pte_quick.
 	 * But that isn't as quick as vtopte.
 	 */
 	pte = vtopte(va);
 	if (*pte) {
 		if (mpte != NULL) {
 			vm_page_lock_queues();
 			pmap_unwire_pte_hold(pmap, mpte);
 			vm_page_unlock_queues();
 		}
 		return 0;
 	}
 
 	/*
 	 * Enter on the PV list if part of our managed memory. Note that we
 	 * raise IPL while manipulating pv_table since pmap_enter can be
 	 * called at interrupt time.
 	 */
 	if ((m->flags & (PG_FICTITIOUS|PG_UNMANAGED)) == 0)
 		pmap_insert_entry(pmap, va, mpte, m);
 
 	/*
 	 * Increment counters
 	 */
 	pmap->pm_stats.resident_count++;
 
 	pa = VM_PAGE_TO_PHYS(m);
 
 	/*
 	 * Now validate mapping with RO protection
 	 */
 	if (m->flags & (PG_FICTITIOUS|PG_UNMANAGED))
 		*pte = pa | PG_V | PG_U;
 	else
 		*pte = pa | PG_V | PG_U | PG_MANAGED;
 
 	return mpte;
 }
 
 /*
  * Make a temporary mapping for a physical address.  This is only intended
  * to be used for panic dumps.
  */
 void *
 pmap_kenter_temporary(vm_offset_t pa, int i)
 {
 	vm_offset_t va;
 
 	va = (vm_offset_t)crashdumpmap + (i * PAGE_SIZE);
 	pmap_kenter(va, pa);
 #ifndef I386_CPU
 	invlpg(va);
 #else
 	invltlb();
 #endif
 	return ((void *)crashdumpmap);
 }
 
 #define MAX_INIT_PT (96)
 /*
  * pmap_object_init_pt preloads the ptes for a given object
  * into the specified pmap.  This eliminates the blast of soft
  * faults on process startup and immediately after an mmap.
  */
 void
 pmap_object_init_pt(pmap_t pmap, vm_offset_t addr,
 		    vm_object_t object, vm_pindex_t pindex,
 		    vm_size_t size, int limit)
 {
 	vm_offset_t tmpidx;
 	int psize;
 	vm_page_t p, mpte;
 
 	if (pmap == NULL || object == NULL)
 		return;
 
 	/*
 	 * This code maps large physical mmap regions into the
 	 * processor address space.  Note that some shortcuts
 	 * are taken, but the code works.
 	 */
 	if (pseflag && (object->type == OBJT_DEVICE) &&
 	    ((addr & (NBPDR - 1)) == 0) && ((size & (NBPDR - 1)) == 0)) {
 		int i;
 		vm_page_t m[1];
 		unsigned int ptepindex;
 		int npdes;
 		pd_entry_t ptepa;
 
 		if (pmap->pm_pdir[ptepindex = (addr >> PDRSHIFT)])
 			return;
 
 retry:
 		p = vm_page_lookup(object, pindex);
 		if (p != NULL) {
 			vm_page_lock_queues();
 			if (vm_page_sleep_if_busy(p, FALSE, "init4p"))
 				goto retry;
 		} else {
 			p = vm_page_alloc(object, pindex, VM_ALLOC_NORMAL);
 			if (p == NULL)
 				return;
 			m[0] = p;
 
 			if (vm_pager_get_pages(object, m, 1, 0) != VM_PAGER_OK) {
 				vm_page_lock_queues();
 				vm_page_free(p);
 				vm_page_unlock_queues();
 				return;
 			}
 
 			p = vm_page_lookup(object, pindex);
 			vm_page_lock_queues();
 			vm_page_wakeup(p);
 		}
 		vm_page_unlock_queues();
 
 		ptepa = VM_PAGE_TO_PHYS(p);
 		if (ptepa & (NBPDR - 1)) {
 			return;
 		}
 
 		p->valid = VM_PAGE_BITS_ALL;
 
 		pmap->pm_stats.resident_count += size >> PAGE_SHIFT;
 		npdes = size >> PDRSHIFT;
 		for(i = 0; i < npdes; i++) {
 			pmap->pm_pdir[ptepindex] =
 			    ptepa | PG_U | PG_RW | PG_V | PG_PS;
 			ptepa += NBPDR;
 			ptepindex += 1;
 		}
 		pmap_invalidate_all(kernel_pmap);
 		return;
 	}
 
 	psize = i386_btop(size);
 
 	if ((object->type != OBJT_VNODE) ||
 	    ((limit & MAP_PREFAULT_PARTIAL) && (psize > MAX_INIT_PT) &&
 	     (object->resident_page_count > MAX_INIT_PT))) {
 		return;
 	}
 
 	if (psize + pindex > object->size) {
 		if (object->size < pindex)
 			return;
 		psize = object->size - pindex;
 	}
 
 	mpte = NULL;
 
 	if ((p = TAILQ_FIRST(&object->memq)) != NULL) {
 		if (p->pindex < pindex) {
 			p = vm_page_splay(pindex, object->root);
 			if ((object->root = p)->pindex < pindex)
 				p = TAILQ_NEXT(p, listq);
 		}
 	}
 	/*
 	 * Assert: the variable p is either (1) the page with the
 	 * least pindex greater than or equal to the parameter pindex
 	 * or (2) NULL.
 	 */
 	for (;
 	     p != NULL && (tmpidx = p->pindex - pindex) < psize;
 	     p = TAILQ_NEXT(p, listq)) {
 		/*
 		 * don't allow an madvise to blow away our really
 		 * free pages allocating pv entries.
 		 */
 		if ((limit & MAP_PREFAULT_MADVISE) &&
 		    cnt.v_free_count < cnt.v_free_reserved) {
 			break;
 		}
 		vm_page_lock_queues();
 		if ((p->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL &&
 		    (p->busy == 0) &&
 		    (p->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) {
 			if ((p->queue - p->pc) == PQ_CACHE)
 				vm_page_deactivate(p);
 			vm_page_busy(p);
 			vm_page_unlock_queues();
 			mpte = pmap_enter_quick(pmap, 
 				addr + i386_ptob(tmpidx), p, mpte);
 			vm_page_lock_queues();
 			vm_page_wakeup(p);
 		}
 		vm_page_unlock_queues();
 	}
 	return;
 }
 
 /*
  * pmap_prefault provides a quick way of clustering
  * pagefaults into a processes address space.  It is a "cousin"
  * of pmap_object_init_pt, except it runs at page fault time instead
  * of mmap time.
  */
 #define PFBAK 4
 #define PFFOR 4
 #define PAGEORDER_SIZE (PFBAK+PFFOR)
 
 static int pmap_prefault_pageorder[] = {
 	-1 * PAGE_SIZE, 1 * PAGE_SIZE,
 	-2 * PAGE_SIZE, 2 * PAGE_SIZE,
 	-3 * PAGE_SIZE, 3 * PAGE_SIZE,
 	-4 * PAGE_SIZE, 4 * PAGE_SIZE
 };
 
 void
 pmap_prefault(pmap, addra, entry)
 	pmap_t pmap;
 	vm_offset_t addra;
 	vm_map_entry_t entry;
 {
 	int i;
 	vm_offset_t starta;
 	vm_offset_t addr;
 	vm_pindex_t pindex;
 	vm_page_t m, mpte;
 	vm_object_t object;
 
 	if (!curthread || (pmap != vmspace_pmap(curthread->td_proc->p_vmspace)))
 		return;
 
 	object = entry->object.vm_object;
 
 	starta = addra - PFBAK * PAGE_SIZE;
 	if (starta < entry->start) {
 		starta = entry->start;
 	} else if (starta > addra) {
 		starta = 0;
 	}
 
 	mpte = NULL;
 	for (i = 0; i < PAGEORDER_SIZE; i++) {
 		vm_object_t lobject;
 		pt_entry_t *pte;
 
 		addr = addra + pmap_prefault_pageorder[i];
 		if (addr > addra + (PFFOR * PAGE_SIZE))
 			addr = 0;
 
 		if (addr < starta || addr >= entry->end)
 			continue;
 
 		if ((*pmap_pde(pmap, addr)) == 0) 
 			continue;
 
 		pte = vtopte(addr);
 		if (*pte)
 			continue;
 
 		pindex = ((addr - entry->start) + entry->offset) >> PAGE_SHIFT;
 		lobject = object;
 		for (m = vm_page_lookup(lobject, pindex);
 		    (!m && (lobject->type == OBJT_DEFAULT) && (lobject->backing_object));
 		    lobject = lobject->backing_object) {
 			if (lobject->backing_object_offset & PAGE_MASK)
 				break;
 			pindex += (lobject->backing_object_offset >> PAGE_SHIFT);
 			m = vm_page_lookup(lobject->backing_object, pindex);
 		}
 
 		/*
 		 * give-up when a page is not in memory
 		 */
 		if (m == NULL)
 			break;
 		vm_page_lock_queues();
 		if (((m->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) &&
 			(m->busy == 0) &&
 		    (m->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) {
 
 			if ((m->queue - m->pc) == PQ_CACHE) {
 				vm_page_deactivate(m);
 			}
 			vm_page_busy(m);
 			vm_page_unlock_queues();
 			mpte = pmap_enter_quick(pmap, addr, m, mpte);
 			vm_page_lock_queues();
 			vm_page_wakeup(m);
 		}
 		vm_page_unlock_queues();
 	}
 }
 
 /*
  *	Routine:	pmap_change_wiring
  *	Function:	Change the wiring attribute for a map/virtual-address
  *			pair.
  *	In/out conditions:
  *			The mapping must already exist in the pmap.
  */
 void
 pmap_change_wiring(pmap, va, wired)
 	register pmap_t pmap;
 	vm_offset_t va;
 	boolean_t wired;
 {
 	register pt_entry_t *pte;
 
 	if (pmap == NULL)
 		return;
 
 	pte = pmap_pte_quick(pmap, va);
 
 	if (wired && !pmap_pte_w(pte))
 		pmap->pm_stats.wired_count++;
 	else if (!wired && pmap_pte_w(pte))
 		pmap->pm_stats.wired_count--;
 
 	/*
 	 * Wiring is not a hardware characteristic so there is no need to
 	 * invalidate TLB.
 	 */
 	pmap_pte_set_w(pte, wired);
 }
 
 
 
 /*
  *	Copy the range specified by src_addr/len
  *	from the source map to the range dst_addr/len
  *	in the destination map.
  *
  *	This routine is only advisory and need not do anything.
  */
 
 void
 pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len,
 	  vm_offset_t src_addr)
 {
 	vm_offset_t addr;
 	vm_offset_t end_addr = src_addr + len;
 	vm_offset_t pdnxt;
 	vm_page_t m;
 
 	if (dst_addr != src_addr)
 		return;
 
 	if (!pmap_is_current(src_pmap))
 		return;
 
 	for (addr = src_addr; addr < end_addr; addr = pdnxt) {
 		pt_entry_t *src_pte, *dst_pte;
 		vm_page_t dstmpte, srcmpte;
 		pd_entry_t srcptepaddr;
 		unsigned ptepindex;
 
 		if (addr >= UPT_MIN_ADDRESS)
 			panic("pmap_copy: invalid to pmap_copy page tables\n");
 
 		/*
 		 * Don't let optional prefaulting of pages make us go
 		 * way below the low water mark of free pages or way
 		 * above high water mark of used pv entries.
 		 */
 		if (cnt.v_free_count < cnt.v_free_reserved ||
 		    pv_entry_count > pv_entry_high_water)
 			break;
 		
 		pdnxt = (addr + NBPDR) & ~PDRMASK;
 		ptepindex = addr >> PDRSHIFT;
 
 		srcptepaddr = src_pmap->pm_pdir[ptepindex];
 		if (srcptepaddr == 0)
 			continue;
 			
 		if (srcptepaddr & PG_PS) {
 			if (dst_pmap->pm_pdir[ptepindex] == 0) {
 				dst_pmap->pm_pdir[ptepindex] = srcptepaddr;
 				dst_pmap->pm_stats.resident_count +=
 				    NBPDR / PAGE_SIZE;
 			}
 			continue;
 		}
 
 		srcmpte = vm_page_lookup(src_pmap->pm_pteobj, ptepindex);
 		if ((srcmpte == NULL) ||
 		    (srcmpte->hold_count == 0) || (srcmpte->flags & PG_BUSY))
 			continue;
 
 		if (pdnxt > end_addr)
 			pdnxt = end_addr;
 
 		src_pte = vtopte(addr);
 		while (addr < pdnxt) {
 			pt_entry_t ptetemp;
 			ptetemp = *src_pte;
 			/*
 			 * we only virtual copy managed pages
 			 */
 			if ((ptetemp & PG_MANAGED) != 0) {
 				/*
 				 * We have to check after allocpte for the
 				 * pte still being around...  allocpte can
 				 * block.
 				 */
 				dstmpte = pmap_allocpte(dst_pmap, addr);
 				dst_pte = pmap_pte_quick(dst_pmap, addr);
 				if ((*dst_pte == 0) && (ptetemp = *src_pte)) {
 					/*
 					 * Clear the modified and
 					 * accessed (referenced) bits
 					 * during the copy.
 					 */
 					m = PHYS_TO_VM_PAGE(ptetemp);
 					*dst_pte = ptetemp & ~(PG_M | PG_A);
 					dst_pmap->pm_stats.resident_count++;
 					pmap_insert_entry(dst_pmap, addr,
 						dstmpte, m);
 	 			} else {
 					vm_page_lock_queues();
 					pmap_unwire_pte_hold(dst_pmap, dstmpte);
 					vm_page_unlock_queues();
 				}
 				if (dstmpte->hold_count >= srcmpte->hold_count)
 					break;
 			}
 			addr += PAGE_SIZE;
 			src_pte++;
 		}
 	}
 }	
 
 #ifdef SMP
 
 /*
  *	pmap_zpi_switchin*()
  *
  *	These functions allow us to avoid doing IPIs alltogether in certain
  *	temporary page-mapping situations (page zeroing).  Instead to deal
  *	with being preempted and moved onto a different cpu we invalidate
  *	the page when the scheduler switches us in.  This does not occur
  *	very often so we remain relatively optimal with very little effort.
  */
 static void
 pmap_zpi_switchin12(void)
 {
 	invlpg((u_int)CADDR1);
 	invlpg((u_int)CADDR2);
 }
 
 static void
 pmap_zpi_switchin2(void)
 {
 	invlpg((u_int)CADDR2);
 }
 
 static void
 pmap_zpi_switchin3(void)
 {
 	invlpg((u_int)CADDR3);
 }
 
 #endif
 
 /*
  *	pmap_zero_page zeros the specified hardware page by mapping 
  *	the page into KVM and using bzero to clear its contents.
  */
 void
 pmap_zero_page(vm_page_t m)
 {
 
 	mtx_lock(&CMAPCADDR12_lock);
 	if (*CMAP2)
 		panic("pmap_zero_page: CMAP2 busy");
 	*CMAP2 = PG_V | PG_RW | VM_PAGE_TO_PHYS(m) | PG_A | PG_M;
 #ifdef I386_CPU
 	invltlb();
 #else
 #ifdef SMP
 	curthread->td_switchin = pmap_zpi_switchin2;
 #endif
 	invlpg((u_int)CADDR2);
 #endif
 #if defined(I686_CPU)
 	if (cpu_class == CPUCLASS_686)
 		i686_pagezero(CADDR2);
 	else
 #endif
 		bzero(CADDR2, PAGE_SIZE);
 #ifdef SMP
 	curthread->td_switchin = NULL;
 #endif
 	*CMAP2 = 0;
 	mtx_unlock(&CMAPCADDR12_lock);
 }
 
 /*
  *	pmap_zero_page_area zeros the specified hardware page by mapping 
  *	the page into KVM and using bzero to clear its contents.
  *
  *	off and size may not cover an area beyond a single hardware page.
  */
 void
 pmap_zero_page_area(vm_page_t m, int off, int size)
 {
 
 	mtx_lock(&CMAPCADDR12_lock);
 	if (*CMAP2)
 		panic("pmap_zero_page: CMAP2 busy");
 	*CMAP2 = PG_V | PG_RW | VM_PAGE_TO_PHYS(m) | PG_A | PG_M;
 #ifdef I386_CPU
 	invltlb();
 #else
 #ifdef SMP
 	curthread->td_switchin = pmap_zpi_switchin2;
 #endif
 	invlpg((u_int)CADDR2);
 #endif
 #if defined(I686_CPU)
 	if (cpu_class == CPUCLASS_686 && off == 0 && size == PAGE_SIZE)
 		i686_pagezero(CADDR2);
 	else
 #endif
 		bzero((char *)CADDR2 + off, size);
 #ifdef SMP
 	curthread->td_switchin = NULL;
 #endif
 	*CMAP2 = 0;
 	mtx_unlock(&CMAPCADDR12_lock);
 }
 
 /*
  *	pmap_zero_page_idle zeros the specified hardware page by mapping 
  *	the page into KVM and using bzero to clear its contents.  This
  *	is intended to be called from the vm_pagezero process only and
  *	outside of Giant.
  */
 void
 pmap_zero_page_idle(vm_page_t m)
 {
 
 	if (*CMAP3)
 		panic("pmap_zero_page: CMAP3 busy");
 	*CMAP3 = PG_V | PG_RW | VM_PAGE_TO_PHYS(m) | PG_A | PG_M;
 #ifdef I386_CPU
 	invltlb();
 #else
 #ifdef SMP
 	curthread->td_switchin = pmap_zpi_switchin3;
 #endif
 	invlpg((u_int)CADDR3);
 #endif
 #if defined(I686_CPU)
 	if (cpu_class == CPUCLASS_686)
 		i686_pagezero(CADDR3);
 	else
 #endif
 		bzero(CADDR3, PAGE_SIZE);
 #ifdef SMP
 	curthread->td_switchin = NULL;
 #endif
 	*CMAP3 = 0;
 }
 
 /*
  *	pmap_copy_page copies the specified (machine independent)
  *	page by mapping the page into virtual memory and using
  *	bcopy to copy the page, one machine dependent page at a
  *	time.
  */
 void
 pmap_copy_page(vm_page_t src, vm_page_t dst)
 {
 
 	mtx_lock(&CMAPCADDR12_lock);
 	if (*CMAP1)
 		panic("pmap_copy_page: CMAP1 busy");
 	if (*CMAP2)
 		panic("pmap_copy_page: CMAP2 busy");
 	*CMAP1 = PG_V | VM_PAGE_TO_PHYS(src) | PG_A;
 	*CMAP2 = PG_V | PG_RW | VM_PAGE_TO_PHYS(dst) | PG_A | PG_M;
 #ifdef I386_CPU
 	invltlb();
 #else
 #ifdef SMP
 	curthread->td_switchin = pmap_zpi_switchin12;
 #endif
 	invlpg((u_int)CADDR1);
 	invlpg((u_int)CADDR2);
 #endif
 	bcopy(CADDR1, CADDR2, PAGE_SIZE);
 #ifdef SMP
 	curthread->td_switchin = NULL;
 #endif
 	*CMAP1 = 0;
 	*CMAP2 = 0;
 	mtx_unlock(&CMAPCADDR12_lock);
 }
 
 /*
  * Returns true if the pmap's pv is one of the first
  * 16 pvs linked to from this page.  This count may
  * be changed upwards or downwards in the future; it
  * is only necessary that true be returned for a small
  * subset of pmaps for proper page aging.
  */
 boolean_t
 pmap_page_exists_quick(pmap, m)
 	pmap_t pmap;
 	vm_page_t m;
 {
 	pv_entry_t pv;
 	int loops = 0;
 	int s;
 
 	if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
 		return FALSE;
 
 	s = splvm();
 	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
 	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
 		if (pv->pv_pmap == pmap) {
 			splx(s);
 			return TRUE;
 		}
 		loops++;
 		if (loops >= 16)
 			break;
 	}
 	splx(s);
 	return (FALSE);
 }
 
 #define PMAP_REMOVE_PAGES_CURPROC_ONLY
 /*
  * Remove all pages from specified address space
  * this aids process exit speeds.  Also, this code
  * is special cased for current process only, but
  * can have the more generic (and slightly slower)
  * mode enabled.  This is much faster than pmap_remove
  * in the case of running down an entire address space.
  */
 void
 pmap_remove_pages(pmap, sva, eva)
 	pmap_t pmap;
 	vm_offset_t sva, eva;
 {
 	pt_entry_t *pte, tpte;
 	vm_page_t m;
 	pv_entry_t pv, npv;
 	int s;
 
 #ifdef PMAP_REMOVE_PAGES_CURPROC_ONLY
 	if (!curthread || (pmap != vmspace_pmap(curthread->td_proc->p_vmspace))) {
 		printf("warning: pmap_remove_pages called with non-current pmap\n");
 		return;
 	}
 #endif
 	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
 	s = splvm();
 	for (pv = TAILQ_FIRST(&pmap->pm_pvlist); pv; pv = npv) {
 
 		if (pv->pv_va >= eva || pv->pv_va < sva) {
 			npv = TAILQ_NEXT(pv, pv_plist);
 			continue;
 		}
 
 #ifdef PMAP_REMOVE_PAGES_CURPROC_ONLY
 		pte = vtopte(pv->pv_va);
 #else
 		pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va);
 #endif
 		tpte = *pte;
 
 		if (tpte == 0) {
 			printf("TPTE at %p  IS ZERO @ VA %08x\n",
 							pte, pv->pv_va);
 			panic("bad pte");
 		}
 
 /*
  * We cannot remove wired pages from a process' mapping at this time
  */
 		if (tpte & PG_W) {
 			npv = TAILQ_NEXT(pv, pv_plist);
 			continue;
 		}
 
 		m = PHYS_TO_VM_PAGE(tpte);
 		KASSERT(m->phys_addr == (tpte & PG_FRAME),
 		    ("vm_page_t %p phys_addr mismatch %016jx %016jx",
 		    m, (uintmax_t)m->phys_addr, (uintmax_t)tpte));
 
 		KASSERT(m < &vm_page_array[vm_page_array_size],
 			("pmap_remove_pages: bad tpte %#jx", (uintmax_t)tpte));
 
 		pv->pv_pmap->pm_stats.resident_count--;
 
 		*pte = 0;
 
 		/*
 		 * Update the vm_page_t clean and reference bits.
 		 */
 		if (tpte & PG_M) {
 			vm_page_dirty(m);
 		}
 
 		npv = TAILQ_NEXT(pv, pv_plist);
 		TAILQ_REMOVE(&pv->pv_pmap->pm_pvlist, pv, pv_plist);
 
 		m->md.pv_list_count--;
 		TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
 		if (TAILQ_FIRST(&m->md.pv_list) == NULL) {
 			vm_page_flag_clear(m, PG_WRITEABLE);
 		}
 
 		pmap_unuse_pt(pv->pv_pmap, pv->pv_va, pv->pv_ptem);
 		free_pv_entry(pv);
 	}
 	splx(s);
 	pmap_invalidate_all(pmap);
 }
 
 /*
  *	pmap_is_modified:
  *
  *	Return whether or not the specified physical page was modified
  *	in any physical maps.
  */
 boolean_t
 pmap_is_modified(vm_page_t m)
 {
 	pv_entry_t pv;
 	pt_entry_t *pte;
 	int s;
 
 	if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
 		return FALSE;
 
 	s = splvm();
 	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
 	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
 		/*
 		 * if the bit being tested is the modified bit, then
 		 * mark clean_map and ptes as never
 		 * modified.
 		 */
 		if (!pmap_track_modified(pv->pv_va))
 			continue;
 #if defined(PMAP_DIAGNOSTIC)
 		if (!pv->pv_pmap) {
 			printf("Null pmap (tb) at va: 0x%x\n", pv->pv_va);
 			continue;
 		}
 #endif
 		pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va);
 		if (*pte & PG_M) {
 			splx(s);
 			return TRUE;
 		}
 	}
 	splx(s);
 	return (FALSE);
 }
 
 /*
  * this routine is used to modify bits in ptes
  */
 static __inline void
 pmap_changebit(vm_page_t m, int bit, boolean_t setem)
 {
 	register pv_entry_t pv;
 	register pt_entry_t *pte;
 	int s;
 
 	if (!pmap_initialized || (m->flags & PG_FICTITIOUS) ||
 	    (!setem && bit == PG_RW && (m->flags & PG_WRITEABLE) == 0))
 		return;
 
 	s = splvm();
 	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
 	/*
 	 * Loop over all current mappings setting/clearing as appropos If
 	 * setting RO do we need to clear the VAC?
 	 */
 	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
 		/*
 		 * don't write protect pager mappings
 		 */
 		if (!setem && (bit == PG_RW)) {
 			if (!pmap_track_modified(pv->pv_va))
 				continue;
 		}
 
 #if defined(PMAP_DIAGNOSTIC)
 		if (!pv->pv_pmap) {
 			printf("Null pmap (cb) at va: 0x%x\n", pv->pv_va);
 			continue;
 		}
 #endif
 
 		pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va);
 
 		if (setem) {
 			*pte |= bit;
 			pmap_invalidate_page(pv->pv_pmap, pv->pv_va);
 		} else {
 			pt_entry_t pbits = *pte;
 			if (pbits & bit) {
 				if (bit == PG_RW) {
 					if (pbits & PG_M) {
 						vm_page_dirty(m);
 					}
 					*pte = pbits & ~(PG_M|PG_RW);
 				} else {
 					*pte = pbits & ~bit;
 				}
 				pmap_invalidate_page(pv->pv_pmap, pv->pv_va);
 			}
 		}
 	}
 	if (!setem && bit == PG_RW)
 		vm_page_flag_clear(m, PG_WRITEABLE);
 	splx(s);
 }
 
 /*
  *      pmap_page_protect:
  *
  *      Lower the permission for all mappings to a given page.
  */
 void
 pmap_page_protect(vm_page_t m, vm_prot_t prot)
 {
 	if ((prot & VM_PROT_WRITE) == 0) {
 		if (prot & (VM_PROT_READ | VM_PROT_EXECUTE)) {
 			pmap_changebit(m, PG_RW, FALSE);
 		} else {
 			pmap_remove_all(m);
 		}
 	}
 }
 
 /*
  *	pmap_ts_referenced:
  *
  *	Return a count of reference bits for a page, clearing those bits.
  *	It is not necessary for every reference bit to be cleared, but it
  *	is necessary that 0 only be returned when there are truly no
  *	reference bits set.
  *
  *	XXX: The exact number of bits to check and clear is a matter that
  *	should be tested and standardized at some point in the future for
  *	optimal aging of shared pages.
  */
 int
 pmap_ts_referenced(vm_page_t m)
 {
 	register pv_entry_t pv, pvf, pvn;
 	pt_entry_t *pte;
 	int s;
 	int rtval = 0;
 
 	if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
 		return (rtval);
 
 	s = splvm();
 	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
 	if ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) {
 
 		pvf = pv;
 
 		do {
 			pvn = TAILQ_NEXT(pv, pv_list);
 
 			TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
 
 			TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
 
 			if (!pmap_track_modified(pv->pv_va))
 				continue;
 
 			pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va);
 
 			if (pte && (*pte & PG_A)) {
 				*pte &= ~PG_A;
 
 				pmap_invalidate_page(pv->pv_pmap, pv->pv_va);
 
 				rtval++;
 				if (rtval > 4) {
 					break;
 				}
 			}
 		} while ((pv = pvn) != NULL && pv != pvf);
 	}
 	splx(s);
 
 	return (rtval);
 }
 
 /*
  *	Clear the modify bits on the specified physical page.
  */
 void
 pmap_clear_modify(vm_page_t m)
 {
 	pmap_changebit(m, PG_M, FALSE);
 }
 
 /*
  *	pmap_clear_reference:
  *
  *	Clear the reference bit on the specified physical page.
  */
 void
 pmap_clear_reference(vm_page_t m)
 {
 	pmap_changebit(m, PG_A, FALSE);
 }
 
 /*
  * Miscellaneous support routines follow
  */
 
 static void
 i386_protection_init()
 {
 	register int *kp, prot;
 
 	kp = protection_codes;
 	for (prot = 0; prot < 8; prot++) {
 		switch (prot) {
 		case VM_PROT_NONE | VM_PROT_NONE | VM_PROT_NONE:
 			/*
 			 * Read access is also 0. There isn't any execute bit,
 			 * so just make it readable.
 			 */
 		case VM_PROT_READ | VM_PROT_NONE | VM_PROT_NONE:
 		case VM_PROT_READ | VM_PROT_NONE | VM_PROT_EXECUTE:
 		case VM_PROT_NONE | VM_PROT_NONE | VM_PROT_EXECUTE:
 			*kp++ = 0;
 			break;
 		case VM_PROT_NONE | VM_PROT_WRITE | VM_PROT_NONE:
 		case VM_PROT_NONE | VM_PROT_WRITE | VM_PROT_EXECUTE:
 		case VM_PROT_READ | VM_PROT_WRITE | VM_PROT_NONE:
 		case VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE:
 			*kp++ = PG_RW;
 			break;
 		}
 	}
 }
 
 /*
  * Map a set of physical memory pages into the kernel virtual
  * address space. Return a pointer to where it is mapped. This
  * routine is intended to be used for mapping device memory,
  * NOT real memory.
  */
 void *
 pmap_mapdev(pa, size)
 	vm_paddr_t pa;
 	vm_size_t size;
 {
 	vm_offset_t va, tmpva, offset;
 
 	offset = pa & PAGE_MASK;
 	size = roundup(offset + size, PAGE_SIZE);
 
 	GIANT_REQUIRED;
 
 	va = kmem_alloc_pageable(kernel_map, size);
 	if (!va)
 		panic("pmap_mapdev: Couldn't alloc kernel virtual memory");
 
 	pa = pa & PG_FRAME;
 	for (tmpva = va; size > 0; ) {
 		pmap_kenter(tmpva, pa);
 		size -= PAGE_SIZE;
 		tmpva += PAGE_SIZE;
 		pa += PAGE_SIZE;
 	}
 	pmap_invalidate_range(kernel_pmap, va, tmpva);
 	return ((void *)(va + offset));
 }
 
 void
 pmap_unmapdev(va, size)
 	vm_offset_t va;
 	vm_size_t size;
 {
 	vm_offset_t base, offset, tmpva;
 	pt_entry_t *pte;
 
 	base = va & PG_FRAME;
 	offset = va & PAGE_MASK;
 	size = roundup(offset + size, PAGE_SIZE);
 	for (tmpva = base; tmpva < (base + size); tmpva += PAGE_SIZE) {
 		pte = vtopte(tmpva);
 		*pte = 0;
 	}
 	pmap_invalidate_range(kernel_pmap, va, tmpva);
 	kmem_free(kernel_map, base, size);
 }
 
 /*
  * perform the pmap work for mincore
  */
 int
 pmap_mincore(pmap, addr)
 	pmap_t pmap;
 	vm_offset_t addr;
 {
 	pt_entry_t *ptep, pte;
 	vm_page_t m;
 	int val = 0;
 	
 	ptep = pmap_pte_quick(pmap, addr);
 	if (ptep == 0) {
 		return 0;
 	}
 
 	if ((pte = *ptep) != 0) {
 		vm_paddr_t pa;
 
 		val = MINCORE_INCORE;
 		if ((pte & PG_MANAGED) == 0)
 			return val;
 
 		pa = pte & PG_FRAME;
 
 		m = PHYS_TO_VM_PAGE(pa);
 
 		/*
 		 * Modified by us
 		 */
 		if (pte & PG_M)
 			val |= MINCORE_MODIFIED|MINCORE_MODIFIED_OTHER;
 		else {
 			/*
 			 * Modified by someone else
 			 */
 			vm_page_lock_queues();
 			if (m->dirty || pmap_is_modified(m))
 				val |= MINCORE_MODIFIED_OTHER;
 			vm_page_unlock_queues();
 		}
 		/*
 		 * Referenced by us
 		 */
 		if (pte & PG_A)
 			val |= MINCORE_REFERENCED|MINCORE_REFERENCED_OTHER;
 		else {
 			/*
 			 * Referenced by someone else
 			 */
 			vm_page_lock_queues();
 			if ((m->flags & PG_REFERENCED) ||
 			    pmap_ts_referenced(m)) {
 				val |= MINCORE_REFERENCED_OTHER;
 				vm_page_flag_set(m, PG_REFERENCED);
 			}
 			vm_page_unlock_queues();
 		}
 	} 
 	return val;
 }
 
 void
 pmap_activate(struct thread *td)
 {
 	struct proc *p = td->td_proc;
 	pmap_t	pmap;
 	u_int32_t  cr3;
 
 	pmap = vmspace_pmap(td->td_proc->p_vmspace);
 #if defined(SMP)
 	pmap->pm_active |= PCPU_GET(cpumask);
 #else
 	pmap->pm_active |= 1;
 #endif
+#ifdef PAE
+	cr3 = vtophys(pmap->pm_pdpt);
+#else
 	cr3 = vtophys(pmap->pm_pdir);
+#endif
 	/* XXXKSE this is wrong.
 	 * pmap_activate is for the current thread on the current cpu
 	 */
 	if (p->p_flag & P_THREADED) {
 		/* Make sure all other cr3 entries are updated. */
 		/* what if they are running?  XXXKSE (maybe abort them) */
 		FOREACH_THREAD_IN_PROC(p, td) {
 			td->td_pcb->pcb_cr3 = cr3;
 		}
 	} else {
 		td->td_pcb->pcb_cr3 = cr3;
 	}
 	load_cr3(cr3);
 #ifdef SWTCH_OPTIM_STATS
 	tlb_flush_count++;
 #endif
 }
 
 vm_offset_t
 pmap_addr_hint(vm_object_t obj, vm_offset_t addr, vm_size_t size)
 {
 
 	if ((obj == NULL) || (size < NBPDR) || (obj->type != OBJT_DEVICE)) {
 		return addr;
 	}
 
 	addr = (addr + (NBPDR - 1)) & ~(NBPDR - 1);
 	return addr;
 }
 
 
 #if defined(PMAP_DEBUG)
 pmap_pid_dump(int pid)
 {
 	pmap_t pmap;
 	struct proc *p;
 	int npte = 0;
 	int index;
 
 	sx_slock(&allproc_lock);
 	LIST_FOREACH(p, &allproc, p_list) {
 		if (p->p_pid != pid)
 			continue;
 
 		if (p->p_vmspace) {
 			int i,j;
 			index = 0;
 			pmap = vmspace_pmap(p->p_vmspace);
 			for (i = 0; i < NPDEPTD; i++) {
 				pd_entry_t *pde;
 				pt_entry_t *pte;
 				vm_offset_t base = i << PDRSHIFT;
 				
 				pde = &pmap->pm_pdir[i];
 				if (pde && pmap_pde_v(pde)) {
 					for (j = 0; j < NPTEPG; j++) {
 						vm_offset_t va = base + (j << PAGE_SHIFT);
 						if (va >= (vm_offset_t) VM_MIN_KERNEL_ADDRESS) {
 							if (index) {
 								index = 0;
 								printf("\n");
 							}
 							sx_sunlock(&allproc_lock);
 							return npte;
 						}
 						pte = pmap_pte_quick(pmap, va);
 						if (pte && pmap_pte_v(pte)) {
 							pt_entry_t pa;
 							vm_page_t m;
 							pa = *pte;
 							m = PHYS_TO_VM_PAGE(pa);
 							printf("va: 0x%x, pt: 0x%x, h: %d, w: %d, f: 0x%x",
 								va, pa, m->hold_count, m->wire_count, m->flags);
 							npte++;
 							index++;
 							if (index >= 2) {
 								index = 0;
 								printf("\n");
 							} else {
 								printf(" ");
 							}
 						}
 					}
 				}
 			}
 		}
 	}
 	sx_sunlock(&allproc_lock);
 	return npte;
 }
 #endif
 
 #if defined(DEBUG)
 
 static void	pads(pmap_t pm);
 void		pmap_pvdump(vm_offset_t pa);
 
 /* print address space of pmap*/
 static void
 pads(pm)
 	pmap_t pm;
 {
 	int i, j;
 	vm_paddr_t va;
 	pt_entry_t *ptep;
 
 	if (pm == kernel_pmap)
 		return;
 	for (i = 0; i < NPDEPTD; i++)
 		if (pm->pm_pdir[i])
 			for (j = 0; j < NPTEPG; j++) {
 				va = (i << PDRSHIFT) + (j << PAGE_SHIFT);
 				if (pm == kernel_pmap && va < KERNBASE)
 					continue;
 				if (pm != kernel_pmap && va > UPT_MAX_ADDRESS)
 					continue;
 				ptep = pmap_pte_quick(pm, va);
 				if (pmap_pte_v(ptep))
 					printf("%x:%x ", va, *ptep);
 			};
 
 }
 
 void
 pmap_pvdump(pa)
 	vm_paddr_t pa;
 {
 	pv_entry_t pv;
 	vm_page_t m;
 
 	printf("pa %x", pa);
 	m = PHYS_TO_VM_PAGE(pa);
 	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
 		printf(" -> pmap %p, va %x", (void *)pv->pv_pmap, pv->pv_va);
 		pads(pv->pv_pmap);
 	}
 	printf(" ");
 }
 #endif
Index: head/sys/amd64/amd64/vm_machdep.c
===================================================================
--- head/sys/amd64/amd64/vm_machdep.c	(revision 112840)
+++ head/sys/amd64/amd64/vm_machdep.c	(revision 112841)
@@ -1,559 +1,567 @@
 /*-
  * Copyright (c) 1982, 1986 The Regents of the University of California.
  * Copyright (c) 1989, 1990 William Jolitz
  * Copyright (c) 1994 John Dyson
  * All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * the Systems Programming Group of the University of Utah Computer
  * Science Department, and William Jolitz.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	from: @(#)vm_machdep.c	7.3 (Berkeley) 5/13/91
  *	Utah $Hdr: vm_machdep.c 1.16.1.1 89/06/23$
  * $FreeBSD$
  */
 
 #include "opt_npx.h"
 #ifdef PC98
 #include "opt_pc98.h"
 #endif
 #include "opt_reset.h"
 #include "opt_isa.h"
 #include "opt_kstack_pages.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/malloc.h>
 #include <sys/proc.h>
 #include <sys/kse.h>
 #include <sys/bio.h>
 #include <sys/buf.h>
 #include <sys/vnode.h>
 #include <sys/vmmeter.h>
 #include <sys/kernel.h>
 #include <sys/ktr.h>
 #include <sys/mutex.h>
 #include <sys/smp.h>
 #include <sys/sysctl.h>
 #include <sys/unistd.h>
 
 #include <machine/cpu.h>
 #include <machine/md_var.h>
 #include <machine/pcb.h>
 #include <machine/pcb_ext.h>
 #include <machine/vm86.h>
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <sys/lock.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_page.h>
 #include <vm/vm_map.h>
 #include <vm/vm_extern.h>
 
 #include <sys/user.h>
 
 #ifdef PC98
 #include <pc98/pc98/pc98.h>
 #else
 #include <i386/isa/isa.h>
 #endif
 
 static void	cpu_reset_real(void);
 #ifdef SMP
 static void	cpu_reset_proxy(void);
 static u_int	cpu_reset_proxyid;
 static volatile u_int	cpu_reset_proxy_active;
 #endif
 extern int	_ucodesel, _udatasel;
 
 /*
  * Finish a fork operation, with process p2 nearly set up.
  * Copy and update the pcb, set up the stack so that the child
  * ready to run and return to user mode.
  */
 void
 cpu_fork(td1, p2, td2, flags)
 	register struct thread *td1;
 	register struct proc *p2;
 	struct thread *td2;
 	int flags;
 {
 	register struct proc *p1;
 	struct pcb *pcb2;
 	struct mdproc *mdp2;
 #ifdef DEV_NPX
 	register_t savecrit;
 #endif
 
 	p1 = td1->td_proc;
 	if ((flags & RFPROC) == 0) {
 		if ((flags & RFMEM) == 0) {
 			/* unshare user LDT */
 			struct mdproc *mdp1 = &p1->p_md;
 			struct proc_ldt *pldt = mdp1->md_ldt;
 			if (pldt && pldt->ldt_refcnt > 1) {
 				pldt = user_ldt_alloc(mdp1, pldt->ldt_len);
 				if (pldt == NULL)
 					panic("could not copy LDT");
 				mdp1->md_ldt = pldt;
 				set_user_ldt(mdp1);
 				user_ldt_free(td1);
 			}
 		}
 		return;
 	}
 
 	/* Ensure that p1's pcb is up to date. */
 #ifdef DEV_NPX
 	if (td1 == curthread)
 		td1->td_pcb->pcb_gs = rgs();
 	savecrit = intr_disable();
 	if (PCPU_GET(fpcurthread) == td1)
 		npxsave(&td1->td_pcb->pcb_save);
 	intr_restore(savecrit);
 #endif
 
 	/* Point the pcb to the top of the stack */
 	pcb2 = (struct pcb *)(td2->td_kstack + KSTACK_PAGES * PAGE_SIZE) - 1;
 	td2->td_pcb = pcb2;
 
 	/* Copy p1's pcb */
 	bcopy(td1->td_pcb, pcb2, sizeof(*pcb2));
 
 	/* Point mdproc and then copy over td1's contents */
 	mdp2 = &p2->p_md;
 	bcopy(&p1->p_md, mdp2, sizeof(*mdp2));
 
 	/*
 	 * Create a new fresh stack for the new process.
 	 * Copy the trap frame for the return to user mode as if from a
 	 * syscall.  This copies most of the user mode register values.
 	 * The -16 is so we can expand the trapframe if we go to vm86.
 	 */
 	td2->td_frame = (struct trapframe *)((caddr_t)td2->td_pcb - 16) - 1;
 	bcopy(td1->td_frame, td2->td_frame, sizeof(struct trapframe));
 
 	td2->td_frame->tf_eax = 0;		/* Child returns zero */
 	td2->td_frame->tf_eflags &= ~PSL_C;	/* success */
 	td2->td_frame->tf_edx = 1;
 
 	/*
 	 * Set registers for trampoline to user mode.  Leave space for the
 	 * return address on stack.  These are the kernel mode register values.
 	 */
+#ifdef PAE
+	pcb2->pcb_cr3 = vtophys(vmspace_pmap(p2->p_vmspace)->pm_pdpt);
+#else
 	pcb2->pcb_cr3 = vtophys(vmspace_pmap(p2->p_vmspace)->pm_pdir);
+#endif
 	pcb2->pcb_edi = 0;
 	pcb2->pcb_esi = (int)fork_return;	/* fork_trampoline argument */
 	pcb2->pcb_ebp = 0;
 	pcb2->pcb_esp = (int)td2->td_frame - sizeof(void *);
 	pcb2->pcb_ebx = (int)td2;		/* fork_trampoline argument */
 	pcb2->pcb_eip = (int)fork_trampoline;
 	pcb2->pcb_psl = td2->td_frame->tf_eflags & ~PSL_I; /* ints disabled */
 	/*-
 	 * pcb2->pcb_dr*:	cloned above.
 	 * pcb2->pcb_savefpu:	cloned above.
 	 * pcb2->pcb_flags:	cloned above.
 	 * pcb2->pcb_onfault:	cloned above (always NULL here?).
 	 * pcb2->pcb_gs:	cloned above.
 	 * pcb2->pcb_ext:	cleared below.
 	 */
 
 	/*
 	 * XXX don't copy the i/o pages.  this should probably be fixed.
 	 */
 	pcb2->pcb_ext = 0;
 
         /* Copy the LDT, if necessary. */
 	mtx_lock_spin(&sched_lock);
         if (mdp2->md_ldt != 0) {
 		if (flags & RFMEM) {
 			mdp2->md_ldt->ldt_refcnt++;
 		} else {
 			mdp2->md_ldt = user_ldt_alloc(mdp2,
 			    mdp2->md_ldt->ldt_len);
 			if (mdp2->md_ldt == NULL)
 				panic("could not copy LDT");
 		}
         }
 	mtx_unlock_spin(&sched_lock);
 
 	/*
 	 * Now, cpu_switch() can schedule the new process.
 	 * pcb_esp is loaded pointing to the cpu_switch() stack frame
 	 * containing the return address when exiting cpu_switch.
 	 * This will normally be to fork_trampoline(), which will have
 	 * %ebx loaded with the new proc's pointer.  fork_trampoline()
 	 * will set up a stack to call fork_return(p, frame); to complete
 	 * the return to user-mode.
 	 */
 }
 
 /*
  * Intercept the return address from a freshly forked process that has NOT
  * been scheduled yet.
  *
  * This is needed to make kernel threads stay in kernel mode.
  */
 void
 cpu_set_fork_handler(td, func, arg)
 	struct thread *td;
 	void (*func)(void *);
 	void *arg;
 {
 	/*
 	 * Note that the trap frame follows the args, so the function
 	 * is really called like this:  func(arg, frame);
 	 */
 	td->td_pcb->pcb_esi = (int) func;	/* function */
 	td->td_pcb->pcb_ebx = (int) arg;	/* first arg */
 }
 
 void
 cpu_exit(struct thread *td)
 {
 	struct mdproc *mdp;
 
 	mdp = &td->td_proc->p_md;
 	if (mdp->md_ldt)
 		user_ldt_free(td);
 	reset_dbregs();
 }
 
 void
 cpu_thread_exit(struct thread *td)
 {
 	struct pcb *pcb = td->td_pcb; 
 #ifdef DEV_NPX
 	npxexit(td);
 #endif
         if (pcb->pcb_flags & PCB_DBREGS) {
                 /*
                  * disable all hardware breakpoints
                  */
                 reset_dbregs();
                 pcb->pcb_flags &= ~PCB_DBREGS;
         }
 }
 
 void
 cpu_thread_clean(struct thread *td)
 {
 	struct pcb *pcb;
 
 	pcb = td->td_pcb; 
 	if (pcb->pcb_ext != 0) {
 		/* XXXKSE  XXXSMP  not SMP SAFE.. what locks do we have? */
 		/* if (pcb->pcb_ext->ext_refcount-- == 1) ?? */
 		/*
 		 * XXX do we need to move the TSS off the allocated pages
 		 * before freeing them?  (not done here)
 		 */
 		mtx_lock(&Giant);
 		kmem_free(kernel_map, (vm_offset_t)pcb->pcb_ext,
 		    ctob(IOPAGES + 1));
 		mtx_unlock(&Giant);
 		pcb->pcb_ext = 0;
 	}
 }
 
 void
 cpu_sched_exit(td)
 	register struct thread *td;
 {
 }
 
 void
 cpu_thread_setup(struct thread *td)
 {
 
 	td->td_pcb =
 	     (struct pcb *)(td->td_kstack + KSTACK_PAGES * PAGE_SIZE) - 1;
 	td->td_frame = (struct trapframe *)((caddr_t)td->td_pcb - 16) - 1;
 }
 
 /*
  * Initialize machine state (pcb and trap frame) for a new thread about to
  * upcall. Pu t enough state in the new thread's PCB to get it to go back 
  * userret(), where we can intercept it again to set the return (upcall)
  * Address and stack, along with those from upcals that are from other sources
  * such as those generated in thread_userret() itself.
  */
 void
 cpu_set_upcall(struct thread *td, void *pcb)
 {
 	struct pcb *pcb2;
 
 	/* Point the pcb to the top of the stack. */
 	pcb2 = td->td_pcb;
 
 	/*
 	 * Copy the upcall pcb.  This loads kernel regs.
 	 * Those not loaded individually below get their default
 	 * values here.
 	 *
 	 * XXXKSE It might be a good idea to simply skip this as
 	 * the values of the other registers may be unimportant.
 	 * This would remove any requirement for knowing the KSE
 	 * at this time (see the matching comment below for
 	 * more analysis) (need a good safe default).
 	 */
 	bcopy(pcb, pcb2, sizeof(*pcb2));
 
 	/*
 	 * Create a new fresh stack for the new thread.
 	 * The -16 is so we can expand the trapframe if we go to vm86.
 	 * Don't forget to set this stack value into whatever supplies
 	 * the address for the fault handlers.
 	 * The contexts are filled in at the time we actually DO the
 	 * upcall as only then do we know which KSE we got.
 	 */
 	td->td_frame = (struct trapframe *)((caddr_t)pcb2 - 16) - 1;
 
 	/*
 	 * Set registers for trampoline to user mode.  Leave space for the
 	 * return address on stack.  These are the kernel mode register values.
 	 */
+#ifdef PAE
+	pcb2->pcb_cr3 = vtophys(vmspace_pmap(td->td_proc->p_vmspace)->pm_pdpt);
+#else
 	pcb2->pcb_cr3 = vtophys(vmspace_pmap(td->td_proc->p_vmspace)->pm_pdir);
+#endif
 	pcb2->pcb_edi = 0;
 	pcb2->pcb_esi = (int)fork_return;		    /* trampoline arg */
 	pcb2->pcb_ebp = 0;
 	pcb2->pcb_esp = (int)td->td_frame - sizeof(void *); /* trampoline arg */
 	pcb2->pcb_ebx = (int)td;			    /* trampoline arg */
 	pcb2->pcb_eip = (int)fork_trampoline;
 	pcb2->pcb_psl &= ~(PSL_I);	/* interrupts must be disabled */
 	/*
 	 * If we didn't copy the pcb, we'd need to do the following registers:
 	 * pcb2->pcb_dr*:	cloned above.
 	 * pcb2->pcb_savefpu:	cloned above.
 	 * pcb2->pcb_flags:	cloned above.
 	 * pcb2->pcb_onfault:	cloned above (always NULL here?).
 	 * pcb2->pcb_gs:	cloned above.  XXXKSE ???
 	 * pcb2->pcb_ext:	cleared below.
 	 */
 	 pcb2->pcb_ext = NULL;
 }
 
 /*
  * Set that machine state for performing an upcall that has to
  * be done in thread_userret() so that those upcalls generated
  * in thread_userret() itself can be done as well.
  */
 void
 cpu_set_upcall_kse(struct thread *td, struct kse_upcall *ku)
 {
 
 	/* 
 	 * Do any extra cleaning that needs to be done.
 	 * The thread may have optional components
 	 * that are not present in a fresh thread.
 	 * This may be a recycled thread so make it look
 	 * as though it's newly allocated.
 	 */
 	cpu_thread_clean(td);
 
 	/*
 	 * Set the trap frame to point at the beginning of the uts
 	 * function.
 	 */
 	td->td_frame->tf_esp =
 	    (int)ku->ku_stack.ss_sp + ku->ku_stack.ss_size - 16;
 	td->td_frame->tf_eip = (int)ku->ku_func;
 
 	/*
 	 * Pass the address of the mailbox for this kse to the uts
 	 * function as a parameter on the stack.
 	 */
 	suword((void *)(td->td_frame->tf_esp + sizeof(void *)),
 	    (int)ku->ku_mailbox);
 }
 
 void
 cpu_wait(p)
 	struct proc *p;
 {
 }
 
 /*
  * Convert kernel VA to physical address
  */
 vm_paddr_t
 kvtop(void *addr)
 {
 	vm_paddr_t pa;
 
 	pa = pmap_kextract((vm_offset_t)addr);
 	if (pa == 0)
 		panic("kvtop: zero page frame");
 	return (pa);
 }
 
 /*
  * Force reset the processor by invalidating the entire address space!
  */
 
 #ifdef SMP
 static void
 cpu_reset_proxy()
 {
 
 	cpu_reset_proxy_active = 1;
 	while (cpu_reset_proxy_active == 1)
 		;	 /* Wait for other cpu to see that we've started */
 	stop_cpus((1<<cpu_reset_proxyid));
 	printf("cpu_reset_proxy: Stopped CPU %d\n", cpu_reset_proxyid);
 	DELAY(1000000);
 	cpu_reset_real();
 }
 #endif
 
 void
 cpu_reset()
 {
 #ifdef SMP
 	if (smp_active == 0) {
 		cpu_reset_real();
 		/* NOTREACHED */
 	} else {
 
 		u_int map;
 		int cnt;
 		printf("cpu_reset called on cpu#%d\n", PCPU_GET(cpuid));
 
 		map = PCPU_GET(other_cpus) & ~ stopped_cpus;
 
 		if (map != 0) {
 			printf("cpu_reset: Stopping other CPUs\n");
 			stop_cpus(map);		/* Stop all other CPUs */
 		}
 
 		if (PCPU_GET(cpuid) == 0) {
 			DELAY(1000000);
 			cpu_reset_real();
 			/* NOTREACHED */
 		} else {
 			/* We are not BSP (CPU #0) */
 
 			cpu_reset_proxyid = PCPU_GET(cpuid);
 			cpustop_restartfunc = cpu_reset_proxy;
 			cpu_reset_proxy_active = 0;
 			printf("cpu_reset: Restarting BSP\n");
 			started_cpus = (1<<0);		/* Restart CPU #0 */
 
 			cnt = 0;
 			while (cpu_reset_proxy_active == 0 && cnt < 10000000)
 				cnt++;	/* Wait for BSP to announce restart */
 			if (cpu_reset_proxy_active == 0)
 				printf("cpu_reset: Failed to restart BSP\n");
 			enable_intr();
 			cpu_reset_proxy_active = 2;
 
 			while (1);
 			/* NOTREACHED */
 		}
 	}
 #else
 	cpu_reset_real();
 #endif
 }
 
 static void
 cpu_reset_real()
 {
 
 #ifdef PC98
 	/*
 	 * Attempt to do a CPU reset via CPU reset port.
 	 */
 	disable_intr();
 	if ((inb(0x35) & 0xa0) != 0xa0) {
 		outb(0x37, 0x0f);		/* SHUT0 = 0. */
 		outb(0x37, 0x0b);		/* SHUT1 = 0. */
 	}
 	outb(0xf0, 0x00);		/* Reset. */
 #else
 	/*
 	 * Attempt to do a CPU reset via the keyboard controller,
 	 * do not turn of the GateA20, as any machine that fails
 	 * to do the reset here would then end up in no man's land.
 	 */
 
 #if !defined(BROKEN_KEYBOARD_RESET)
 	outb(IO_KBD + 4, 0xFE);
 	DELAY(500000);	/* wait 0.5 sec to see if that did it */
 	printf("Keyboard reset did not work, attempting CPU shutdown\n");
 	DELAY(1000000);	/* wait 1 sec for printf to complete */
 #endif
 #endif /* PC98 */
 	/* force a shutdown by unmapping entire address space ! */
 	bzero((caddr_t)PTD, NBPTD);
 
 	/* "good night, sweet prince .... <THUNK!>" */
 	invltlb();
 	/* NOTREACHED */
 	while(1);
 }
 
 /*
  * Software interrupt handler for queued VM system processing.
  */   
 void  
 swi_vm(void *dummy) 
 {     
 	if (busdma_swi_pending != 0)
 		busdma_swi();
 }
 
 /*
  * Tell whether this address is in some physical memory region.
  * Currently used by the kernel coredump code in order to avoid
  * dumping the ``ISA memory hole'' which could cause indefinite hangs,
  * or other unpredictable behaviour.
  */
 
 int
 is_physical_memory(addr)
 	vm_offset_t addr;
 {
 
 #ifdef DEV_ISA
 	/* The ISA ``memory hole''. */
 	if (addr >= 0xa0000 && addr < 0x100000)
 		return 0;
 #endif
 
 	/*
 	 * stuff other tests for known memory-mapped devices (PCI?)
 	 * here
 	 */
 
 	return 1;
 }
Index: head/sys/amd64/include/bus_amd64.h
===================================================================
--- head/sys/amd64/include/bus_amd64.h	(revision 112840)
+++ head/sys/amd64/include/bus_amd64.h	(revision 112841)
@@ -1,1216 +1,1224 @@
 /*	$NetBSD: bus.h,v 1.12 1997/10/01 08:25:15 fvdl Exp $	*/
 
 /*-
  * Copyright (c) 1996, 1997 The NetBSD Foundation, Inc.
  * All rights reserved.
  *
  * This code is derived from software contributed to The NetBSD Foundation
  * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
  * NASA Ames Research Center.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the NetBSD
  *	Foundation, Inc. and its contributors.
  * 4. Neither the name of The NetBSD Foundation nor the names of its
  *    contributors may be used to endorse or promote products derived
  *    from this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  * POSSIBILITY OF SUCH DAMAGE.
  */
 
 /*
  * Copyright (c) 1996 Charles M. Hannum.  All rights reserved.
  * Copyright (c) 1996 Christopher G. Demetriou.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *      This product includes software developed by Christopher G. Demetriou
  *	for the NetBSD Project.
  * 4. The name of the author may not be used to endorse or promote products
  *    derived from this software without specific prior written permission
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 /* $FreeBSD$ */
 
 #ifndef _I386_BUS_AT386_H_
 #define _I386_BUS_AT386_H_
 
 #include <machine/cpufunc.h>
 
 /*
  * To remain compatible with NetBSD's interface, default to both memio and
  * pio when neither of them is defined.
  */ 
 #if !defined(_I386_BUS_PIO_H_) && !defined(_I386_BUS_MEMIO_H_)
 #define _I386_BUS_PIO_H_
 #define _I386_BUS_MEMIO_H_
 #endif
 
 /*
  * Values for the i386 bus space tag, not to be used directly by MI code.
  */
 #define	I386_BUS_SPACE_IO	0	/* space is i/o space */
 #define I386_BUS_SPACE_MEM	1	/* space is mem space */
 
 /*
  * Bus address and size types
  */
-typedef u_int bus_addr_t;
-typedef u_int bus_size_t;
+#ifdef PAE
+typedef uint64_t bus_addr_t;
+#else
+typedef uint32_t bus_addr_t;
+#endif
+typedef uint32_t bus_size_t;
 
 #define BUS_SPACE_MAXSIZE_24BIT	0xFFFFFF
 #define BUS_SPACE_MAXSIZE_32BIT 0xFFFFFFFF
 #define BUS_SPACE_MAXSIZE	0xFFFFFFFF
 #define BUS_SPACE_MAXADDR_24BIT	0xFFFFFF
 #define BUS_SPACE_MAXADDR_32BIT 0xFFFFFFFF
+#ifdef PAE
+#define BUS_SPACE_MAXADDR	0xFFFFFFFFFFFFFFFFULL
+#else
 #define BUS_SPACE_MAXADDR	0xFFFFFFFF
+#endif
 
 #define BUS_SPACE_UNRESTRICTED	(~0)
 
 /*
  * Access methods for bus resources and address space.
  */
 typedef	int bus_space_tag_t;
 typedef	u_int bus_space_handle_t;
 
 /*
  * Map a region of device bus space into CPU virtual address space.
  */
 
 #define	BUS_SPACE_MAP_CACHEABLE		0x01
 #define	BUS_SPACE_MAP_LINEAR		0x02
 
 int	bus_space_map(bus_space_tag_t t, bus_addr_t addr, bus_size_t size,
 		      int flags, bus_space_handle_t *bshp);
 
 /*
  * Unmap a region of device bus space.
  */
 
 static __inline void bus_space_unmap(bus_space_tag_t t, bus_space_handle_t bsh,
 				     bus_size_t size);
 
 static __inline void
 bus_space_unmap(bus_space_tag_t t __unused, bus_space_handle_t bsh __unused,
 		bus_size_t size __unused)
 {
 }
 
 /*
  * Get a new handle for a subregion of an already-mapped area of bus space.
  */
 
 static __inline int bus_space_subregion(bus_space_tag_t t,
 					bus_space_handle_t bsh,
 					bus_size_t offset, bus_size_t size,
 					bus_space_handle_t *nbshp);
 
 static __inline int
 bus_space_subregion(bus_space_tag_t t __unused, bus_space_handle_t bsh,
 		    bus_size_t offset, bus_size_t size __unused,
 		    bus_space_handle_t *nbshp)
 {
 
 	*nbshp = bsh + offset;
 	return (0);
 }
 
 /*
  * Allocate a region of memory that is accessible to devices in bus space.
  */
 
 int	bus_space_alloc(bus_space_tag_t t, bus_addr_t rstart,
 			bus_addr_t rend, bus_size_t size, bus_size_t align,
 			bus_size_t boundary, int flags, bus_addr_t *addrp,
 			bus_space_handle_t *bshp);
 
 /*
  * Free a region of bus space accessible memory.
  */
 
 static __inline void bus_space_free(bus_space_tag_t t, bus_space_handle_t bsh,
 				    bus_size_t size);
 
 static __inline void
 bus_space_free(bus_space_tag_t t __unused, bus_space_handle_t bsh __unused,
 	       bus_size_t size __unused)
 {
 }
 
 
 #if defined(_I386_BUS_PIO_H_) || defined(_I386_BUS_MEMIO_H_)
 
 /*
  * Read a 1, 2, 4, or 8 byte quantity from bus space
  * described by tag/handle/offset.
  */
 static __inline u_int8_t bus_space_read_1(bus_space_tag_t tag,
 					  bus_space_handle_t handle,
 					  bus_size_t offset);
 
 static __inline u_int16_t bus_space_read_2(bus_space_tag_t tag,
 					   bus_space_handle_t handle,
 					   bus_size_t offset);
 
 static __inline u_int32_t bus_space_read_4(bus_space_tag_t tag,
 					   bus_space_handle_t handle,
 					   bus_size_t offset);
 
 static __inline u_int8_t
 bus_space_read_1(bus_space_tag_t tag, bus_space_handle_t handle,
 		 bus_size_t offset)
 {
 #if defined (_I386_BUS_PIO_H_)
 #if defined (_I386_BUS_MEMIO_H_)
 	if (tag == I386_BUS_SPACE_IO)
 #endif
 		return (inb(handle + offset));
 #endif
 #if defined (_I386_BUS_MEMIO_H_)
 	return (*(volatile u_int8_t *)(handle + offset));
 #endif
 }
 
 static __inline u_int16_t
 bus_space_read_2(bus_space_tag_t tag, bus_space_handle_t handle,
 		 bus_size_t offset)
 {
 #if defined(_I386_BUS_PIO_H_)
 #if defined(_I386_BUS_MEMIO_H_)
 	if (tag == I386_BUS_SPACE_IO)
 #endif
 		return (inw(handle + offset));
 #endif
 #if defined(_I386_BUS_MEMIO_H_)
 	return (*(volatile u_int16_t *)(handle + offset));
 #endif
 }
 
 static __inline u_int32_t
 bus_space_read_4(bus_space_tag_t tag, bus_space_handle_t handle,
 		 bus_size_t offset)
 {
 #if defined(_I386_BUS_PIO_H_)
 #if defined(_I386_BUS_MEMIO_H_)
 	if (tag == I386_BUS_SPACE_IO)
 #endif
 		return (inl(handle + offset));
 #endif
 #if defined(_I386_BUS_MEMIO_H_)
 	return (*(volatile u_int32_t *)(handle + offset));
 #endif
 }
 
 #if 0	/* Cause a link error for bus_space_read_8 */
 #define	bus_space_read_8(t, h, o)	!!! bus_space_read_8 unimplemented !!!
 #endif
 
 /*
  * Read `count' 1, 2, 4, or 8 byte quantities from bus space
  * described by tag/handle/offset and copy into buffer provided.
  */
 static __inline void bus_space_read_multi_1(bus_space_tag_t tag,
 					    bus_space_handle_t bsh,
 					    bus_size_t offset, u_int8_t *addr,
 					    size_t count);
 
 static __inline void bus_space_read_multi_2(bus_space_tag_t tag,
 					    bus_space_handle_t bsh,
 					    bus_size_t offset, u_int16_t *addr,
 					    size_t count);
 
 static __inline void bus_space_read_multi_4(bus_space_tag_t tag,
 					    bus_space_handle_t bsh,
 					    bus_size_t offset, u_int32_t *addr,
 					    size_t count);
 
 static __inline void
 bus_space_read_multi_1(bus_space_tag_t tag, bus_space_handle_t bsh,
 		       bus_size_t offset, u_int8_t *addr, size_t count)
 {
 #if defined(_I386_BUS_PIO_H_)
 #if defined(_I386_BUS_MEMIO_H_)
 	if (tag == I386_BUS_SPACE_IO)
 #endif
 		insb(bsh + offset, addr, count);
 #endif
 #if defined(_I386_BUS_MEMIO_H_)
 #if defined(_I386_BUS_PIO_H_)
 	else
 #endif
 	{
 #ifdef __GNUC__
 		__asm __volatile("				\n\
 			cld					\n\
 		1:	movb (%2),%%al				\n\
 			stosb					\n\
 			loop 1b"				:
 		    "=D" (addr), "=c" (count)			:
 		    "r" (bsh + offset), "0" (addr), "1" (count)	:
 		    "%eax", "memory");
 #endif
 	}
 #endif
 }
 
 static __inline void
 bus_space_read_multi_2(bus_space_tag_t tag, bus_space_handle_t bsh,
 		       bus_size_t offset, u_int16_t *addr, size_t count)
 {
 #if defined(_I386_BUS_PIO_H_)
 #if defined(_I386_BUS_MEMIO_H_)
 	if (tag == I386_BUS_SPACE_IO)
 #endif
 		insw(bsh + offset, addr, count);
 #endif
 #if defined(_I386_BUS_MEMIO_H_)
 #if defined(_I386_BUS_PIO_H_)
 	else
 #endif
 	{
 #ifdef __GNUC__
 		__asm __volatile("				\n\
 			cld					\n\
 		1:	movw (%2),%%ax				\n\
 			stosw					\n\
 			loop 1b"				:
 		    "=D" (addr), "=c" (count)			:
 		    "r" (bsh + offset), "0" (addr), "1" (count)	:
 		    "%eax", "memory");
 #endif
 	}
 #endif
 }
 
 static __inline void
 bus_space_read_multi_4(bus_space_tag_t tag, bus_space_handle_t bsh,
 		       bus_size_t offset, u_int32_t *addr, size_t count)
 {
 #if defined(_I386_BUS_PIO_H_)
 #if defined(_I386_BUS_MEMIO_H_)
 	if (tag == I386_BUS_SPACE_IO)
 #endif
 		insl(bsh + offset, addr, count);
 #endif
 #if defined(_I386_BUS_MEMIO_H_)
 #if defined(_I386_BUS_PIO_H_)
 	else
 #endif
 	{
 #ifdef __GNUC__
 		__asm __volatile("				\n\
 			cld					\n\
 		1:	movl (%2),%%eax				\n\
 			stosl					\n\
 			loop 1b"				:
 		    "=D" (addr), "=c" (count)			:
 		    "r" (bsh + offset), "0" (addr), "1" (count)	:
 		    "%eax", "memory");
 #endif
 	}
 #endif
 }
 
 #if 0	/* Cause a link error for bus_space_read_multi_8 */
 #define	bus_space_read_multi_8	!!! bus_space_read_multi_8 unimplemented !!!
 #endif
 
 /*
  * Read `count' 1, 2, 4, or 8 byte quantities from bus space
  * described by tag/handle and starting at `offset' and copy into
  * buffer provided.
  */
 static __inline void bus_space_read_region_1(bus_space_tag_t tag,
 					     bus_space_handle_t bsh,
 					     bus_size_t offset, u_int8_t *addr,
 					     size_t count);
 
 static __inline void bus_space_read_region_2(bus_space_tag_t tag,
 					     bus_space_handle_t bsh,
 					     bus_size_t offset, u_int16_t *addr,
 					     size_t count);
 
 static __inline void bus_space_read_region_4(bus_space_tag_t tag,
 					     bus_space_handle_t bsh,
 					     bus_size_t offset, u_int32_t *addr,
 					     size_t count);
 
 
 static __inline void
 bus_space_read_region_1(bus_space_tag_t tag, bus_space_handle_t bsh,
 			bus_size_t offset, u_int8_t *addr, size_t count)
 {
 #if defined(_I386_BUS_PIO_H_)
 #if defined(_I386_BUS_MEMIO_H_)
 	if (tag == I386_BUS_SPACE_IO)
 #endif
 	{
 		int _port_ = bsh + offset;
 #ifdef __GNUC__
 		__asm __volatile("				\n\
 			cld					\n\
 		1:	inb %w2,%%al				\n\
 			stosb					\n\
 			incl %2					\n\
 			loop 1b"				:
 		    "=D" (addr), "=c" (count), "=d" (_port_)	:
 		    "0" (addr), "1" (count), "2" (_port_)	:
 		    "%eax", "memory", "cc");
 #endif
 	}
 #endif
 #if defined(_I386_BUS_MEMIO_H_)
 #if defined(_I386_BUS_PIO_H_)
 	else
 #endif
 	{
 		int _port_ = bsh + offset;
 #ifdef __GNUC__
 		__asm __volatile("				\n\
 			cld					\n\
 			repne					\n\
 			movsb"					:
 		    "=D" (addr), "=c" (count), "=S" (_port_)	:
 		    "0" (addr), "1" (count), "2" (_port_)	:
 		    "memory", "cc");
 #endif
 	}
 #endif
 }
 
 static __inline void
 bus_space_read_region_2(bus_space_tag_t tag, bus_space_handle_t bsh,
 			bus_size_t offset, u_int16_t *addr, size_t count)
 {
 #if defined(_I386_BUS_PIO_H_)
 #if defined(_I386_BUS_MEMIO_H_)
 	if (tag == I386_BUS_SPACE_IO)
 #endif
 	{
 		int _port_ = bsh + offset;
 #ifdef	__GNUC__
 		__asm __volatile("				\n\
 			cld					\n\
 		1:	inw %w2,%%ax				\n\
 			stosw					\n\
 			addl $2,%2				\n\
 			loop 1b"				:
 		    "=D" (addr), "=c" (count), "=d" (_port_)	:
 		    "0" (addr), "1" (count), "2" (_port_)	:
 		    "%eax", "memory", "cc");
 #endif
 	}
 #endif
 #if defined(_I386_BUS_MEMIO_H_)
 #if defined(_I386_BUS_PIO_H_)
 	else
 #endif
 	{
 		int _port_ = bsh + offset;
 #ifdef	__GNUC__
 		__asm __volatile("				\n\
 			cld					\n\
 			repne					\n\
 			movsw"					:
 		    "=D" (addr), "=c" (count), "=S" (_port_)	:
 		    "0" (addr), "1" (count), "2" (_port_)	:
 		    "memory", "cc");
 #endif
 	}
 #endif
 }
 
 static __inline void
 bus_space_read_region_4(bus_space_tag_t tag, bus_space_handle_t bsh,
 			bus_size_t offset, u_int32_t *addr, size_t count)
 {
 #if defined(_I386_BUS_PIO_H_)
 #if defined(_I386_BUS_MEMIO_H_)
 	if (tag == I386_BUS_SPACE_IO)
 #endif
 	{
 		int _port_ = bsh + offset;
 #ifdef	__GNUC__
 		__asm __volatile("				\n\
 			cld					\n\
 		1:	inl %w2,%%eax				\n\
 			stosl					\n\
 			addl $4,%2				\n\
 			loop 1b"				:
 		    "=D" (addr), "=c" (count), "=d" (_port_)	:
 		    "0" (addr), "1" (count), "2" (_port_)	:
 		    "%eax", "memory", "cc");
 #endif
 	}
 #endif
 #if defined(_I386_BUS_MEMIO_H_)
 #if defined(_I386_BUS_PIO_H_)
 	else
 #endif
 	{
 		int _port_ = bsh + offset;
 #ifdef	__GNUC__
 		__asm __volatile("				\n\
 			cld					\n\
 			repne					\n\
 			movsl"					:
 		    "=D" (addr), "=c" (count), "=S" (_port_)	:
 		    "0" (addr), "1" (count), "2" (_port_)	:
 		    "memory", "cc");
 #endif
 	}
 #endif
 }
 
 #if 0	/* Cause a link error for bus_space_read_region_8 */
 #define	bus_space_read_region_8	!!! bus_space_read_region_8 unimplemented !!!
 #endif
 
 /*
  * Write the 1, 2, 4, or 8 byte value `value' to bus space
  * described by tag/handle/offset.
  */
 
 static __inline void bus_space_write_1(bus_space_tag_t tag,
 				       bus_space_handle_t bsh,
 				       bus_size_t offset, u_int8_t value);
 
 static __inline void bus_space_write_2(bus_space_tag_t tag,
 				       bus_space_handle_t bsh,
 				       bus_size_t offset, u_int16_t value);
 
 static __inline void bus_space_write_4(bus_space_tag_t tag,
 				       bus_space_handle_t bsh,
 				       bus_size_t offset, u_int32_t value);
 
 static __inline void
 bus_space_write_1(bus_space_tag_t tag, bus_space_handle_t bsh,
 		       bus_size_t offset, u_int8_t value)
 {
 #if defined(_I386_BUS_PIO_H_)
 #if defined(_I386_BUS_MEMIO_H_)
 	if (tag == I386_BUS_SPACE_IO)
 #endif
 		outb(bsh + offset, value);
 #endif
 #if defined(_I386_BUS_MEMIO_H_)
 #if defined(_I386_BUS_PIO_H_)
 	else
 #endif
 		*(volatile u_int8_t *)(bsh + offset) = value;
 #endif
 }
 
 static __inline void
 bus_space_write_2(bus_space_tag_t tag, bus_space_handle_t bsh,
 		       bus_size_t offset, u_int16_t value)
 {
 #if defined(_I386_BUS_PIO_H_)
 #if defined(_I386_BUS_MEMIO_H_)
 	if (tag == I386_BUS_SPACE_IO)
 #endif
 		outw(bsh + offset, value);
 #endif
 #if defined(_I386_BUS_MEMIO_H_)
 #if defined(_I386_BUS_PIO_H_)
 	else
 #endif
 		*(volatile u_int16_t *)(bsh + offset) = value;
 #endif
 }
 
 static __inline void
 bus_space_write_4(bus_space_tag_t tag, bus_space_handle_t bsh,
 		       bus_size_t offset, u_int32_t value)
 {
 #if defined(_I386_BUS_PIO_H_)
 #if defined(_I386_BUS_MEMIO_H_)
 	if (tag == I386_BUS_SPACE_IO)
 #endif
 		outl(bsh + offset, value);
 #endif
 #if defined(_I386_BUS_MEMIO_H_)
 #if defined(_I386_BUS_PIO_H_)
 	else
 #endif
 		*(volatile u_int32_t *)(bsh + offset) = value;
 #endif
 }
 
 #if 0	/* Cause a link error for bus_space_write_8 */
 #define	bus_space_write_8	!!! bus_space_write_8 not implemented !!!
 #endif
 
 /*
  * Write `count' 1, 2, 4, or 8 byte quantities from the buffer
  * provided to bus space described by tag/handle/offset.
  */
 
 static __inline void bus_space_write_multi_1(bus_space_tag_t tag,
 					     bus_space_handle_t bsh,
 					     bus_size_t offset,
 					     const u_int8_t *addr,
 					     size_t count);
 static __inline void bus_space_write_multi_2(bus_space_tag_t tag,
 					     bus_space_handle_t bsh,
 					     bus_size_t offset,
 					     const u_int16_t *addr,
 					     size_t count);
 
 static __inline void bus_space_write_multi_4(bus_space_tag_t tag,
 					     bus_space_handle_t bsh,
 					     bus_size_t offset,
 					     const u_int32_t *addr,
 					     size_t count);
 
 static __inline void
 bus_space_write_multi_1(bus_space_tag_t tag, bus_space_handle_t bsh,
 			bus_size_t offset, const u_int8_t *addr, size_t count)
 {
 #if defined(_I386_BUS_PIO_H_)
 #if defined(_I386_BUS_MEMIO_H_)
 	if (tag == I386_BUS_SPACE_IO)
 #endif
 		outsb(bsh + offset, addr, count);
 #endif
 #if defined(_I386_BUS_MEMIO_H_)
 #if defined(_I386_BUS_PIO_H_)
 	else
 #endif
 	{
 #ifdef	__GNUC__
 		__asm __volatile("				\n\
 			cld					\n\
 		1:	lodsb					\n\
 			movb %%al,(%2)				\n\
 			loop 1b"				:
 		    "=S" (addr), "=c" (count)			:
 		    "r" (bsh + offset), "0" (addr), "1" (count)	:
 		    "%eax", "memory", "cc");
 #endif
 	}
 #endif
 }
 
 static __inline void
 bus_space_write_multi_2(bus_space_tag_t tag, bus_space_handle_t bsh,
 			bus_size_t offset, const u_int16_t *addr, size_t count)
 {
 #if defined(_I386_BUS_PIO_H_)
 #if defined(_I386_BUS_MEMIO_H_)
 	if (tag == I386_BUS_SPACE_IO)
 #endif
 		outsw(bsh + offset, addr, count);
 #endif
 #if defined(_I386_BUS_MEMIO_H_)
 #if defined(_I386_BUS_PIO_H_)
 	else
 #endif
 	{
 #ifdef	__GNUC__
 		__asm __volatile("				\n\
 			cld					\n\
 		1:	lodsw					\n\
 			movw %%ax,(%2)				\n\
 			loop 1b"				:
 		    "=S" (addr), "=c" (count)			:
 		    "r" (bsh + offset), "0" (addr), "1" (count)	:
 		    "%eax", "memory", "cc");
 #endif
 	}
 #endif
 }
 
 static __inline void
 bus_space_write_multi_4(bus_space_tag_t tag, bus_space_handle_t bsh,
 			bus_size_t offset, const u_int32_t *addr, size_t count)
 {
 #if defined(_I386_BUS_PIO_H_)
 #if defined(_I386_BUS_MEMIO_H_)
 	if (tag == I386_BUS_SPACE_IO)
 #endif
 		outsl(bsh + offset, addr, count);
 #endif
 #if defined(_I386_BUS_MEMIO_H_)
 #if defined(_I386_BUS_PIO_H_)
 	else
 #endif
 	{
 #ifdef	__GNUC__
 		__asm __volatile("				\n\
 			cld					\n\
 		1:	lodsl					\n\
 			movl %%eax,(%2)				\n\
 			loop 1b"				:
 		    "=S" (addr), "=c" (count)			:
 		    "r" (bsh + offset), "0" (addr), "1" (count)	:
 		    "%eax", "memory", "cc");
 #endif
 	}
 #endif
 }
 
 #if 0	/* Cause a link error for bus_space_write_multi_8 */
 #define	bus_space_write_multi_8(t, h, o, a, c)				\
 			!!! bus_space_write_multi_8 unimplemented !!!
 #endif
 
 /*
  * Write `count' 1, 2, 4, or 8 byte quantities from the buffer provided
  * to bus space described by tag/handle starting at `offset'.
  */
 
 static __inline void bus_space_write_region_1(bus_space_tag_t tag,
 					      bus_space_handle_t bsh,
 					      bus_size_t offset,
 					      const u_int8_t *addr,
 					      size_t count);
 static __inline void bus_space_write_region_2(bus_space_tag_t tag,
 					      bus_space_handle_t bsh,
 					      bus_size_t offset,
 					      const u_int16_t *addr,
 					      size_t count);
 static __inline void bus_space_write_region_4(bus_space_tag_t tag,
 					      bus_space_handle_t bsh,
 					      bus_size_t offset,
 					      const u_int32_t *addr,
 					      size_t count);
 
 static __inline void
 bus_space_write_region_1(bus_space_tag_t tag, bus_space_handle_t bsh,
 			 bus_size_t offset, const u_int8_t *addr, size_t count)
 {
 #if defined(_I386_BUS_PIO_H_)
 #if defined(_I386_BUS_MEMIO_H_)
 	if (tag == I386_BUS_SPACE_IO)
 #endif
 	{
 		int _port_ = bsh + offset;
 #ifdef	__GNUC__
 		__asm __volatile("				\n\
 			cld					\n\
 		1:	lodsb					\n\
 			outb %%al,%w0				\n\
 			incl %0					\n\
 			loop 1b"				:
 		    "=d" (_port_), "=S" (addr), "=c" (count)	:
 		    "0" (_port_), "1" (addr), "2" (count)	:
 		    "%eax", "memory", "cc");
 #endif
 	}
 #endif
 #if defined(_I386_BUS_MEMIO_H_)
 #if defined(_I386_BUS_PIO_H_)
 	else
 #endif
 	{
 		int _port_ = bsh + offset;
 #ifdef	__GNUC__
 		__asm __volatile("				\n\
 			cld					\n\
 			repne					\n\
 			movsb"					:
 		    "=D" (_port_), "=S" (addr), "=c" (count)	:
 		    "0" (_port_), "1" (addr), "2" (count)	:
 		    "memory", "cc");
 #endif
 	}
 #endif
 }
 
 static __inline void
 bus_space_write_region_2(bus_space_tag_t tag, bus_space_handle_t bsh,
 			 bus_size_t offset, const u_int16_t *addr, size_t count)
 {
 #if defined(_I386_BUS_PIO_H_)
 #if defined(_I386_BUS_MEMIO_H_)
 	if (tag == I386_BUS_SPACE_IO)
 #endif
 	{
 		int _port_ = bsh + offset;
 #ifdef	__GNUC__
 		__asm __volatile("				\n\
 			cld					\n\
 		1:	lodsw					\n\
 			outw %%ax,%w0				\n\
 			addl $2,%0				\n\
 			loop 1b"				:
 		    "=d" (_port_), "=S" (addr), "=c" (count)	:
 		    "0" (_port_), "1" (addr), "2" (count)	:
 		    "%eax", "memory", "cc");
 #endif
 	}
 #endif
 #if defined(_I386_BUS_MEMIO_H_)
 #if defined(_I386_BUS_PIO_H_)
 	else
 #endif
 	{
 		int _port_ = bsh + offset;
 #ifdef	__GNUC__
 		__asm __volatile("				\n\
 			cld					\n\
 			repne					\n\
 			movsw"					:
 		    "=D" (_port_), "=S" (addr), "=c" (count)	:
 		    "0" (_port_), "1" (addr), "2" (count)	:
 		    "memory", "cc");
 #endif
 	}
 #endif
 }
 
 static __inline void
 bus_space_write_region_4(bus_space_tag_t tag, bus_space_handle_t bsh,
 			 bus_size_t offset, const u_int32_t *addr, size_t count)
 {
 #if defined(_I386_BUS_PIO_H_)
 #if defined(_I386_BUS_MEMIO_H_)
 	if (tag == I386_BUS_SPACE_IO)
 #endif
 	{
 		int _port_ = bsh + offset;
 #ifdef	__GNUC__
 		__asm __volatile("				\n\
 			cld					\n\
 		1:	lodsl					\n\
 			outl %%eax,%w0				\n\
 			addl $4,%0				\n\
 			loop 1b"				:
 		    "=d" (_port_), "=S" (addr), "=c" (count)	:
 		    "0" (_port_), "1" (addr), "2" (count)	:
 		    "%eax", "memory", "cc");
 #endif
 	}
 #endif
 #if defined(_I386_BUS_MEMIO_H_)
 #if defined(_I386_BUS_PIO_H_)
 	else
 #endif
 	{
 		int _port_ = bsh + offset;
 #ifdef	__GNUC__
 		__asm __volatile("				\n\
 			cld					\n\
 			repne					\n\
 			movsl"					:
 		    "=D" (_port_), "=S" (addr), "=c" (count)	:
 		    "0" (_port_), "1" (addr), "2" (count)	:
 		    "memory", "cc");
 #endif
 	}
 #endif
 }
 
 #if 0	/* Cause a link error for bus_space_write_region_8 */
 #define	bus_space_write_region_8					\
 			!!! bus_space_write_region_8 unimplemented !!!
 #endif
 
 /*
  * Write the 1, 2, 4, or 8 byte value `val' to bus space described
  * by tag/handle/offset `count' times.
  */
 
 static __inline void bus_space_set_multi_1(bus_space_tag_t tag,
 					   bus_space_handle_t bsh,
 					   bus_size_t offset,
 					   u_int8_t value, size_t count);
 static __inline void bus_space_set_multi_2(bus_space_tag_t tag,
 					   bus_space_handle_t bsh,
 					   bus_size_t offset,
 					   u_int16_t value, size_t count);
 static __inline void bus_space_set_multi_4(bus_space_tag_t tag,
 					   bus_space_handle_t bsh,
 					   bus_size_t offset,
 					   u_int32_t value, size_t count);
 
 static __inline void
 bus_space_set_multi_1(bus_space_tag_t tag, bus_space_handle_t bsh,
 		      bus_size_t offset, u_int8_t value, size_t count)
 {
 	bus_space_handle_t addr = bsh + offset;
 
 #if defined(_I386_BUS_PIO_H_)
 #if defined(_I386_BUS_MEMIO_H_)
 	if (tag == I386_BUS_SPACE_IO)
 #endif
 		while (count--)
 			outb(addr, value);
 #endif
 #if defined(_I386_BUS_MEMIO_H_)
 #if defined(_I386_BUS_PIO_H_)
 	else
 #endif
 		while (count--)
 			*(volatile u_int8_t *)(addr) = value;
 #endif
 }
 
 static __inline void
 bus_space_set_multi_2(bus_space_tag_t tag, bus_space_handle_t bsh,
 		     bus_size_t offset, u_int16_t value, size_t count)
 {
 	bus_space_handle_t addr = bsh + offset;
 
 #if defined(_I386_BUS_PIO_H_)
 #if defined(_I386_BUS_MEMIO_H_)
 	if (tag == I386_BUS_SPACE_IO)
 #endif
 		while (count--)
 			outw(addr, value);
 #endif
 #if defined(_I386_BUS_MEMIO_H_)
 #if defined(_I386_BUS_PIO_H_)
 	else
 #endif
 		while (count--)
 			*(volatile u_int16_t *)(addr) = value;
 #endif
 }
 
 static __inline void
 bus_space_set_multi_4(bus_space_tag_t tag, bus_space_handle_t bsh,
 		      bus_size_t offset, u_int32_t value, size_t count)
 {
 	bus_space_handle_t addr = bsh + offset;
 
 #if defined(_I386_BUS_PIO_H_)
 #if defined(_I386_BUS_MEMIO_H_)
 	if (tag == I386_BUS_SPACE_IO)
 #endif
 		while (count--)
 			outl(addr, value);
 #endif
 #if defined(_I386_BUS_MEMIO_H_)
 #if defined(_I386_BUS_PIO_H_)
 	else
 #endif
 		while (count--)
 			*(volatile u_int32_t *)(addr) = value;
 #endif
 }
 
 #if 0	/* Cause a link error for bus_space_set_multi_8 */
 #define	bus_space_set_multi_8 !!! bus_space_set_multi_8 unimplemented !!!
 #endif
 
 /*
  * Write `count' 1, 2, 4, or 8 byte value `val' to bus space described
  * by tag/handle starting at `offset'.
  */
 
 static __inline void bus_space_set_region_1(bus_space_tag_t tag,
 					    bus_space_handle_t bsh,
 					    bus_size_t offset, u_int8_t value,
 					    size_t count);
 static __inline void bus_space_set_region_2(bus_space_tag_t tag,
 					    bus_space_handle_t bsh,
 					    bus_size_t offset, u_int16_t value,
 					    size_t count);
 static __inline void bus_space_set_region_4(bus_space_tag_t tag,
 					    bus_space_handle_t bsh,
 					    bus_size_t offset, u_int32_t value,
 					    size_t count);
 
 static __inline void
 bus_space_set_region_1(bus_space_tag_t tag, bus_space_handle_t bsh,
 		       bus_size_t offset, u_int8_t value, size_t count)
 {
 	bus_space_handle_t addr = bsh + offset;
 
 #if defined(_I386_BUS_PIO_H_)
 #if defined(_I386_BUS_MEMIO_H_)
 	if (tag == I386_BUS_SPACE_IO)
 #endif
 		for (; count != 0; count--, addr++)
 			outb(addr, value);
 #endif
 #if defined(_I386_BUS_MEMIO_H_)
 #if defined(_I386_BUS_PIO_H_)
 	else
 #endif
 		for (; count != 0; count--, addr++)
 			*(volatile u_int8_t *)(addr) = value;
 #endif
 }
 
 static __inline void
 bus_space_set_region_2(bus_space_tag_t tag, bus_space_handle_t bsh,
 		       bus_size_t offset, u_int16_t value, size_t count)
 {
 	bus_space_handle_t addr = bsh + offset;
 
 #if defined(_I386_BUS_PIO_H_)
 #if defined(_I386_BUS_MEMIO_H_)
 	if (tag == I386_BUS_SPACE_IO)
 #endif
 		for (; count != 0; count--, addr += 2)
 			outw(addr, value);
 #endif
 #if defined(_I386_BUS_MEMIO_H_)
 #if defined(_I386_BUS_PIO_H_)
 	else
 #endif
 		for (; count != 0; count--, addr += 2)
 			*(volatile u_int16_t *)(addr) = value;
 #endif
 }
 
 static __inline void
 bus_space_set_region_4(bus_space_tag_t tag, bus_space_handle_t bsh,
 		       bus_size_t offset, u_int32_t value, size_t count)
 {
 	bus_space_handle_t addr = bsh + offset;
 
 #if defined(_I386_BUS_PIO_H_)
 #if defined(_I386_BUS_MEMIO_H_)
 	if (tag == I386_BUS_SPACE_IO)
 #endif
 		for (; count != 0; count--, addr += 4)
 			outl(addr, value);
 #endif
 #if defined(_I386_BUS_MEMIO_H_)
 #if defined(_I386_BUS_PIO_H_)
 	else
 #endif
 		for (; count != 0; count--, addr += 4)
 			*(volatile u_int32_t *)(addr) = value;
 #endif
 }
 
 #if 0	/* Cause a link error for bus_space_set_region_8 */
 #define	bus_space_set_region_8	!!! bus_space_set_region_8 unimplemented !!!
 #endif
 
 /*
  * Copy `count' 1, 2, 4, or 8 byte values from bus space starting
  * at tag/bsh1/off1 to bus space starting at tag/bsh2/off2.
  */
 
 static __inline void bus_space_copy_region_1(bus_space_tag_t tag,
 					     bus_space_handle_t bsh1,
 					     bus_size_t off1,
 					     bus_space_handle_t bsh2,
 					     bus_size_t off2, size_t count);
 
 static __inline void bus_space_copy_region_2(bus_space_tag_t tag,
 					     bus_space_handle_t bsh1,
 					     bus_size_t off1,
 					     bus_space_handle_t bsh2,
 					     bus_size_t off2, size_t count);
 
 static __inline void bus_space_copy_region_4(bus_space_tag_t tag,
 					     bus_space_handle_t bsh1,
 					     bus_size_t off1,
 					     bus_space_handle_t bsh2,
 					     bus_size_t off2, size_t count);
 
 static __inline void
 bus_space_copy_region_1(bus_space_tag_t tag, bus_space_handle_t bsh1,
 			bus_size_t off1, bus_space_handle_t bsh2,
 			bus_size_t off2, size_t count)
 {
 	bus_space_handle_t addr1 = bsh1 + off1;
 	bus_space_handle_t addr2 = bsh2 + off2;
 
 #if defined(_I386_BUS_PIO_H_)
 #if defined(_I386_BUS_MEMIO_H_)
 	if (tag == I386_BUS_SPACE_IO)
 #endif
 	{
 		if (addr1 >= addr2) {
 			/* src after dest: copy forward */
 			for (; count != 0; count--, addr1++, addr2++)
 				outb(addr2, inb(addr1));
 		} else {
 			/* dest after src: copy backwards */
 			for (addr1 += (count - 1), addr2 += (count - 1);
 			    count != 0; count--, addr1--, addr2--)
 				outb(addr2, inb(addr1));
 		}
 	}
 #endif
 #if defined(_I386_BUS_MEMIO_H_)
 #if defined(_I386_BUS_PIO_H_)
 	else
 #endif
 	{
 		if (addr1 >= addr2) {
 			/* src after dest: copy forward */
 			for (; count != 0; count--, addr1++, addr2++)
 				*(volatile u_int8_t *)(addr2) =
 				    *(volatile u_int8_t *)(addr1);
 		} else {
 			/* dest after src: copy backwards */
 			for (addr1 += (count - 1), addr2 += (count - 1);
 			    count != 0; count--, addr1--, addr2--)
 				*(volatile u_int8_t *)(addr2) =
 				    *(volatile u_int8_t *)(addr1);
 		}
 	}
 #endif
 }
 
 static __inline void
 bus_space_copy_region_2(bus_space_tag_t tag, bus_space_handle_t bsh1,
 			bus_size_t off1, bus_space_handle_t bsh2,
 			bus_size_t off2, size_t count)
 {
 	bus_space_handle_t addr1 = bsh1 + off1;
 	bus_space_handle_t addr2 = bsh2 + off2;
 
 #if defined(_I386_BUS_PIO_H_)
 #if defined(_I386_BUS_MEMIO_H_)
 	if (tag == I386_BUS_SPACE_IO)
 #endif
 	{
 		if (addr1 >= addr2) {
 			/* src after dest: copy forward */
 			for (; count != 0; count--, addr1 += 2, addr2 += 2)
 				outw(addr2, inw(addr1));
 		} else {
 			/* dest after src: copy backwards */
 			for (addr1 += 2 * (count - 1), addr2 += 2 * (count - 1);
 			    count != 0; count--, addr1 -= 2, addr2 -= 2)
 				outw(addr2, inw(addr1));
 		}
 	}
 #endif
 #if defined(_I386_BUS_MEMIO_H_)
 #if defined(_I386_BUS_PIO_H_)
 	else
 #endif
 	{
 		if (addr1 >= addr2) {
 			/* src after dest: copy forward */
 			for (; count != 0; count--, addr1 += 2, addr2 += 2)
 				*(volatile u_int16_t *)(addr2) =
 				    *(volatile u_int16_t *)(addr1);
 		} else {
 			/* dest after src: copy backwards */
 			for (addr1 += 2 * (count - 1), addr2 += 2 * (count - 1);
 			    count != 0; count--, addr1 -= 2, addr2 -= 2)
 				*(volatile u_int16_t *)(addr2) =
 				    *(volatile u_int16_t *)(addr1);
 		}
 	}
 #endif
 }
 
 static __inline void
 bus_space_copy_region_4(bus_space_tag_t tag, bus_space_handle_t bsh1,
 			bus_size_t off1, bus_space_handle_t bsh2,
 			bus_size_t off2, size_t count)
 {
 	bus_space_handle_t addr1 = bsh1 + off1;
 	bus_space_handle_t addr2 = bsh2 + off2;
 
 #if defined(_I386_BUS_PIO_H_)
 #if defined(_I386_BUS_MEMIO_H_)
 	if (tag == I386_BUS_SPACE_IO)
 #endif
 	{
 		if (addr1 >= addr2) {
 			/* src after dest: copy forward */
 			for (; count != 0; count--, addr1 += 4, addr2 += 4)
 				outl(addr2, inl(addr1));
 		} else {
 			/* dest after src: copy backwards */
 			for (addr1 += 4 * (count - 1), addr2 += 4 * (count - 1);
 			    count != 0; count--, addr1 -= 4, addr2 -= 4)
 				outl(addr2, inl(addr1));
 		}
 	}
 #endif
 #if defined(_I386_BUS_MEMIO_H_)
 #if defined(_I386_BUS_PIO_H_)
 	else
 #endif
 	{
 		if (addr1 >= addr2) {
 			/* src after dest: copy forward */
 			for (; count != 0; count--, addr1 += 4, addr2 += 4)
 				*(volatile u_int32_t *)(addr2) =
 				    *(volatile u_int32_t *)(addr1);
 		} else {
 			/* dest after src: copy backwards */
 			for (addr1 += 4 * (count - 1), addr2 += 4 * (count - 1);
 			    count != 0; count--, addr1 -= 4, addr2 -= 4)
 				*(volatile u_int32_t *)(addr2) =
 				    *(volatile u_int32_t *)(addr1);
 		}
 	}
 #endif
 }
 
 #endif /* defined(_I386_BUS_PIO_H_) || defined(_I386_MEM_IO_H_) */
 
 #if 0	/* Cause a link error for bus_space_copy_8 */
 #define	bus_space_copy_region_8	!!! bus_space_copy_region_8 unimplemented !!!
 #endif
 
 /*
  * Bus read/write barrier methods.
  *
  *	void bus_space_barrier(bus_space_tag_t tag, bus_space_handle_t bsh,
  *			       bus_size_t offset, bus_size_t len, int flags);
  *
  *
  * Note that BUS_SPACE_BARRIER_WRITE doesn't do anything other than
  * prevent reordering by the compiler; all Intel x86 processors currently
  * retire operations outside the CPU in program order.
  */
 #define	BUS_SPACE_BARRIER_READ	0x01		/* force read barrier */
 #define	BUS_SPACE_BARRIER_WRITE	0x02		/* force write barrier */
 
 static __inline void
 bus_space_barrier(bus_space_tag_t tag __unused, bus_space_handle_t bsh __unused,
 		  bus_size_t offset __unused, bus_size_t len __unused, int flags)
 {
 #ifdef	__GNUC__
 	if (flags & BUS_SPACE_BARRIER_READ)
 		__asm __volatile("lock; addl $0,0(%%esp)" : : : "memory");
 	else
 		__asm __volatile("" : : : "memory");
 #endif
 }
 
 #endif /* _I386_BUS_AT386_H_ */
Index: head/sys/amd64/include/bus_at386.h
===================================================================
--- head/sys/amd64/include/bus_at386.h	(revision 112840)
+++ head/sys/amd64/include/bus_at386.h	(revision 112841)
@@ -1,1216 +1,1224 @@
 /*	$NetBSD: bus.h,v 1.12 1997/10/01 08:25:15 fvdl Exp $	*/
 
 /*-
  * Copyright (c) 1996, 1997 The NetBSD Foundation, Inc.
  * All rights reserved.
  *
  * This code is derived from software contributed to The NetBSD Foundation
  * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
  * NASA Ames Research Center.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the NetBSD
  *	Foundation, Inc. and its contributors.
  * 4. Neither the name of The NetBSD Foundation nor the names of its
  *    contributors may be used to endorse or promote products derived
  *    from this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  * POSSIBILITY OF SUCH DAMAGE.
  */
 
 /*
  * Copyright (c) 1996 Charles M. Hannum.  All rights reserved.
  * Copyright (c) 1996 Christopher G. Demetriou.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *      This product includes software developed by Christopher G. Demetriou
  *	for the NetBSD Project.
  * 4. The name of the author may not be used to endorse or promote products
  *    derived from this software without specific prior written permission
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 /* $FreeBSD$ */
 
 #ifndef _I386_BUS_AT386_H_
 #define _I386_BUS_AT386_H_
 
 #include <machine/cpufunc.h>
 
 /*
  * To remain compatible with NetBSD's interface, default to both memio and
  * pio when neither of them is defined.
  */ 
 #if !defined(_I386_BUS_PIO_H_) && !defined(_I386_BUS_MEMIO_H_)
 #define _I386_BUS_PIO_H_
 #define _I386_BUS_MEMIO_H_
 #endif
 
 /*
  * Values for the i386 bus space tag, not to be used directly by MI code.
  */
 #define	I386_BUS_SPACE_IO	0	/* space is i/o space */
 #define I386_BUS_SPACE_MEM	1	/* space is mem space */
 
 /*
  * Bus address and size types
  */
-typedef u_int bus_addr_t;
-typedef u_int bus_size_t;
+#ifdef PAE
+typedef uint64_t bus_addr_t;
+#else
+typedef uint32_t bus_addr_t;
+#endif
+typedef uint32_t bus_size_t;
 
 #define BUS_SPACE_MAXSIZE_24BIT	0xFFFFFF
 #define BUS_SPACE_MAXSIZE_32BIT 0xFFFFFFFF
 #define BUS_SPACE_MAXSIZE	0xFFFFFFFF
 #define BUS_SPACE_MAXADDR_24BIT	0xFFFFFF
 #define BUS_SPACE_MAXADDR_32BIT 0xFFFFFFFF
+#ifdef PAE
+#define BUS_SPACE_MAXADDR	0xFFFFFFFFFFFFFFFFULL
+#else
 #define BUS_SPACE_MAXADDR	0xFFFFFFFF
+#endif
 
 #define BUS_SPACE_UNRESTRICTED	(~0)
 
 /*
  * Access methods for bus resources and address space.
  */
 typedef	int bus_space_tag_t;
 typedef	u_int bus_space_handle_t;
 
 /*
  * Map a region of device bus space into CPU virtual address space.
  */
 
 #define	BUS_SPACE_MAP_CACHEABLE		0x01
 #define	BUS_SPACE_MAP_LINEAR		0x02
 
 int	bus_space_map(bus_space_tag_t t, bus_addr_t addr, bus_size_t size,
 		      int flags, bus_space_handle_t *bshp);
 
 /*
  * Unmap a region of device bus space.
  */
 
 static __inline void bus_space_unmap(bus_space_tag_t t, bus_space_handle_t bsh,
 				     bus_size_t size);
 
 static __inline void
 bus_space_unmap(bus_space_tag_t t __unused, bus_space_handle_t bsh __unused,
 		bus_size_t size __unused)
 {
 }
 
 /*
  * Get a new handle for a subregion of an already-mapped area of bus space.
  */
 
 static __inline int bus_space_subregion(bus_space_tag_t t,
 					bus_space_handle_t bsh,
 					bus_size_t offset, bus_size_t size,
 					bus_space_handle_t *nbshp);
 
 static __inline int
 bus_space_subregion(bus_space_tag_t t __unused, bus_space_handle_t bsh,
 		    bus_size_t offset, bus_size_t size __unused,
 		    bus_space_handle_t *nbshp)
 {
 
 	*nbshp = bsh + offset;
 	return (0);
 }
 
 /*
  * Allocate a region of memory that is accessible to devices in bus space.
  */
 
 int	bus_space_alloc(bus_space_tag_t t, bus_addr_t rstart,
 			bus_addr_t rend, bus_size_t size, bus_size_t align,
 			bus_size_t boundary, int flags, bus_addr_t *addrp,
 			bus_space_handle_t *bshp);
 
 /*
  * Free a region of bus space accessible memory.
  */
 
 static __inline void bus_space_free(bus_space_tag_t t, bus_space_handle_t bsh,
 				    bus_size_t size);
 
 static __inline void
 bus_space_free(bus_space_tag_t t __unused, bus_space_handle_t bsh __unused,
 	       bus_size_t size __unused)
 {
 }
 
 
 #if defined(_I386_BUS_PIO_H_) || defined(_I386_BUS_MEMIO_H_)
 
 /*
  * Read a 1, 2, 4, or 8 byte quantity from bus space
  * described by tag/handle/offset.
  */
 static __inline u_int8_t bus_space_read_1(bus_space_tag_t tag,
 					  bus_space_handle_t handle,
 					  bus_size_t offset);
 
 static __inline u_int16_t bus_space_read_2(bus_space_tag_t tag,
 					   bus_space_handle_t handle,
 					   bus_size_t offset);
 
 static __inline u_int32_t bus_space_read_4(bus_space_tag_t tag,
 					   bus_space_handle_t handle,
 					   bus_size_t offset);
 
 static __inline u_int8_t
 bus_space_read_1(bus_space_tag_t tag, bus_space_handle_t handle,
 		 bus_size_t offset)
 {
 #if defined (_I386_BUS_PIO_H_)
 #if defined (_I386_BUS_MEMIO_H_)
 	if (tag == I386_BUS_SPACE_IO)
 #endif
 		return (inb(handle + offset));
 #endif
 #if defined (_I386_BUS_MEMIO_H_)
 	return (*(volatile u_int8_t *)(handle + offset));
 #endif
 }
 
 static __inline u_int16_t
 bus_space_read_2(bus_space_tag_t tag, bus_space_handle_t handle,
 		 bus_size_t offset)
 {
 #if defined(_I386_BUS_PIO_H_)
 #if defined(_I386_BUS_MEMIO_H_)
 	if (tag == I386_BUS_SPACE_IO)
 #endif
 		return (inw(handle + offset));
 #endif
 #if defined(_I386_BUS_MEMIO_H_)
 	return (*(volatile u_int16_t *)(handle + offset));
 #endif
 }
 
 static __inline u_int32_t
 bus_space_read_4(bus_space_tag_t tag, bus_space_handle_t handle,
 		 bus_size_t offset)
 {
 #if defined(_I386_BUS_PIO_H_)
 #if defined(_I386_BUS_MEMIO_H_)
 	if (tag == I386_BUS_SPACE_IO)
 #endif
 		return (inl(handle + offset));
 #endif
 #if defined(_I386_BUS_MEMIO_H_)
 	return (*(volatile u_int32_t *)(handle + offset));
 #endif
 }
 
 #if 0	/* Cause a link error for bus_space_read_8 */
 #define	bus_space_read_8(t, h, o)	!!! bus_space_read_8 unimplemented !!!
 #endif
 
 /*
  * Read `count' 1, 2, 4, or 8 byte quantities from bus space
  * described by tag/handle/offset and copy into buffer provided.
  */
 static __inline void bus_space_read_multi_1(bus_space_tag_t tag,
 					    bus_space_handle_t bsh,
 					    bus_size_t offset, u_int8_t *addr,
 					    size_t count);
 
 static __inline void bus_space_read_multi_2(bus_space_tag_t tag,
 					    bus_space_handle_t bsh,
 					    bus_size_t offset, u_int16_t *addr,
 					    size_t count);
 
 static __inline void bus_space_read_multi_4(bus_space_tag_t tag,
 					    bus_space_handle_t bsh,
 					    bus_size_t offset, u_int32_t *addr,
 					    size_t count);
 
 static __inline void
 bus_space_read_multi_1(bus_space_tag_t tag, bus_space_handle_t bsh,
 		       bus_size_t offset, u_int8_t *addr, size_t count)
 {
 #if defined(_I386_BUS_PIO_H_)
 #if defined(_I386_BUS_MEMIO_H_)
 	if (tag == I386_BUS_SPACE_IO)
 #endif
 		insb(bsh + offset, addr, count);
 #endif
 #if defined(_I386_BUS_MEMIO_H_)
 #if defined(_I386_BUS_PIO_H_)
 	else
 #endif
 	{
 #ifdef __GNUC__
 		__asm __volatile("				\n\
 			cld					\n\
 		1:	movb (%2),%%al				\n\
 			stosb					\n\
 			loop 1b"				:
 		    "=D" (addr), "=c" (count)			:
 		    "r" (bsh + offset), "0" (addr), "1" (count)	:
 		    "%eax", "memory");
 #endif
 	}
 #endif
 }
 
 static __inline void
 bus_space_read_multi_2(bus_space_tag_t tag, bus_space_handle_t bsh,
 		       bus_size_t offset, u_int16_t *addr, size_t count)
 {
 #if defined(_I386_BUS_PIO_H_)
 #if defined(_I386_BUS_MEMIO_H_)
 	if (tag == I386_BUS_SPACE_IO)
 #endif
 		insw(bsh + offset, addr, count);
 #endif
 #if defined(_I386_BUS_MEMIO_H_)
 #if defined(_I386_BUS_PIO_H_)
 	else
 #endif
 	{
 #ifdef __GNUC__
 		__asm __volatile("				\n\
 			cld					\n\
 		1:	movw (%2),%%ax				\n\
 			stosw					\n\
 			loop 1b"				:
 		    "=D" (addr), "=c" (count)			:
 		    "r" (bsh + offset), "0" (addr), "1" (count)	:
 		    "%eax", "memory");
 #endif
 	}
 #endif
 }
 
 static __inline void
 bus_space_read_multi_4(bus_space_tag_t tag, bus_space_handle_t bsh,
 		       bus_size_t offset, u_int32_t *addr, size_t count)
 {
 #if defined(_I386_BUS_PIO_H_)
 #if defined(_I386_BUS_MEMIO_H_)
 	if (tag == I386_BUS_SPACE_IO)
 #endif
 		insl(bsh + offset, addr, count);
 #endif
 #if defined(_I386_BUS_MEMIO_H_)
 #if defined(_I386_BUS_PIO_H_)
 	else
 #endif
 	{
 #ifdef __GNUC__
 		__asm __volatile("				\n\
 			cld					\n\
 		1:	movl (%2),%%eax				\n\
 			stosl					\n\
 			loop 1b"				:
 		    "=D" (addr), "=c" (count)			:
 		    "r" (bsh + offset), "0" (addr), "1" (count)	:
 		    "%eax", "memory");
 #endif
 	}
 #endif
 }
 
 #if 0	/* Cause a link error for bus_space_read_multi_8 */
 #define	bus_space_read_multi_8	!!! bus_space_read_multi_8 unimplemented !!!
 #endif
 
 /*
  * Read `count' 1, 2, 4, or 8 byte quantities from bus space
  * described by tag/handle and starting at `offset' and copy into
  * buffer provided.
  */
 static __inline void bus_space_read_region_1(bus_space_tag_t tag,
 					     bus_space_handle_t bsh,
 					     bus_size_t offset, u_int8_t *addr,
 					     size_t count);
 
 static __inline void bus_space_read_region_2(bus_space_tag_t tag,
 					     bus_space_handle_t bsh,
 					     bus_size_t offset, u_int16_t *addr,
 					     size_t count);
 
 static __inline void bus_space_read_region_4(bus_space_tag_t tag,
 					     bus_space_handle_t bsh,
 					     bus_size_t offset, u_int32_t *addr,
 					     size_t count);
 
 
 static __inline void
 bus_space_read_region_1(bus_space_tag_t tag, bus_space_handle_t bsh,
 			bus_size_t offset, u_int8_t *addr, size_t count)
 {
 #if defined(_I386_BUS_PIO_H_)
 #if defined(_I386_BUS_MEMIO_H_)
 	if (tag == I386_BUS_SPACE_IO)
 #endif
 	{
 		int _port_ = bsh + offset;
 #ifdef __GNUC__
 		__asm __volatile("				\n\
 			cld					\n\
 		1:	inb %w2,%%al				\n\
 			stosb					\n\
 			incl %2					\n\
 			loop 1b"				:
 		    "=D" (addr), "=c" (count), "=d" (_port_)	:
 		    "0" (addr), "1" (count), "2" (_port_)	:
 		    "%eax", "memory", "cc");
 #endif
 	}
 #endif
 #if defined(_I386_BUS_MEMIO_H_)
 #if defined(_I386_BUS_PIO_H_)
 	else
 #endif
 	{
 		int _port_ = bsh + offset;
 #ifdef __GNUC__
 		__asm __volatile("				\n\
 			cld					\n\
 			repne					\n\
 			movsb"					:
 		    "=D" (addr), "=c" (count), "=S" (_port_)	:
 		    "0" (addr), "1" (count), "2" (_port_)	:
 		    "memory", "cc");
 #endif
 	}
 #endif
 }
 
 static __inline void
 bus_space_read_region_2(bus_space_tag_t tag, bus_space_handle_t bsh,
 			bus_size_t offset, u_int16_t *addr, size_t count)
 {
 #if defined(_I386_BUS_PIO_H_)
 #if defined(_I386_BUS_MEMIO_H_)
 	if (tag == I386_BUS_SPACE_IO)
 #endif
 	{
 		int _port_ = bsh + offset;
 #ifdef	__GNUC__
 		__asm __volatile("				\n\
 			cld					\n\
 		1:	inw %w2,%%ax				\n\
 			stosw					\n\
 			addl $2,%2				\n\
 			loop 1b"				:
 		    "=D" (addr), "=c" (count), "=d" (_port_)	:
 		    "0" (addr), "1" (count), "2" (_port_)	:
 		    "%eax", "memory", "cc");
 #endif
 	}
 #endif
 #if defined(_I386_BUS_MEMIO_H_)
 #if defined(_I386_BUS_PIO_H_)
 	else
 #endif
 	{
 		int _port_ = bsh + offset;
 #ifdef	__GNUC__
 		__asm __volatile("				\n\
 			cld					\n\
 			repne					\n\
 			movsw"					:
 		    "=D" (addr), "=c" (count), "=S" (_port_)	:
 		    "0" (addr), "1" (count), "2" (_port_)	:
 		    "memory", "cc");
 #endif
 	}
 #endif
 }
 
 static __inline void
 bus_space_read_region_4(bus_space_tag_t tag, bus_space_handle_t bsh,
 			bus_size_t offset, u_int32_t *addr, size_t count)
 {
 #if defined(_I386_BUS_PIO_H_)
 #if defined(_I386_BUS_MEMIO_H_)
 	if (tag == I386_BUS_SPACE_IO)
 #endif
 	{
 		int _port_ = bsh + offset;
 #ifdef	__GNUC__
 		__asm __volatile("				\n\
 			cld					\n\
 		1:	inl %w2,%%eax				\n\
 			stosl					\n\
 			addl $4,%2				\n\
 			loop 1b"				:
 		    "=D" (addr), "=c" (count), "=d" (_port_)	:
 		    "0" (addr), "1" (count), "2" (_port_)	:
 		    "%eax", "memory", "cc");
 #endif
 	}
 #endif
 #if defined(_I386_BUS_MEMIO_H_)
 #if defined(_I386_BUS_PIO_H_)
 	else
 #endif
 	{
 		int _port_ = bsh + offset;
 #ifdef	__GNUC__
 		__asm __volatile("				\n\
 			cld					\n\
 			repne					\n\
 			movsl"					:
 		    "=D" (addr), "=c" (count), "=S" (_port_)	:
 		    "0" (addr), "1" (count), "2" (_port_)	:
 		    "memory", "cc");
 #endif
 	}
 #endif
 }
 
 #if 0	/* Cause a link error for bus_space_read_region_8 */
 #define	bus_space_read_region_8	!!! bus_space_read_region_8 unimplemented !!!
 #endif
 
 /*
  * Write the 1, 2, 4, or 8 byte value `value' to bus space
  * described by tag/handle/offset.
  */
 
 static __inline void bus_space_write_1(bus_space_tag_t tag,
 				       bus_space_handle_t bsh,
 				       bus_size_t offset, u_int8_t value);
 
 static __inline void bus_space_write_2(bus_space_tag_t tag,
 				       bus_space_handle_t bsh,
 				       bus_size_t offset, u_int16_t value);
 
 static __inline void bus_space_write_4(bus_space_tag_t tag,
 				       bus_space_handle_t bsh,
 				       bus_size_t offset, u_int32_t value);
 
 static __inline void
 bus_space_write_1(bus_space_tag_t tag, bus_space_handle_t bsh,
 		       bus_size_t offset, u_int8_t value)
 {
 #if defined(_I386_BUS_PIO_H_)
 #if defined(_I386_BUS_MEMIO_H_)
 	if (tag == I386_BUS_SPACE_IO)
 #endif
 		outb(bsh + offset, value);
 #endif
 #if defined(_I386_BUS_MEMIO_H_)
 #if defined(_I386_BUS_PIO_H_)
 	else
 #endif
 		*(volatile u_int8_t *)(bsh + offset) = value;
 #endif
 }
 
 static __inline void
 bus_space_write_2(bus_space_tag_t tag, bus_space_handle_t bsh,
 		       bus_size_t offset, u_int16_t value)
 {
 #if defined(_I386_BUS_PIO_H_)
 #if defined(_I386_BUS_MEMIO_H_)
 	if (tag == I386_BUS_SPACE_IO)
 #endif
 		outw(bsh + offset, value);
 #endif
 #if defined(_I386_BUS_MEMIO_H_)
 #if defined(_I386_BUS_PIO_H_)
 	else
 #endif
 		*(volatile u_int16_t *)(bsh + offset) = value;
 #endif
 }
 
 static __inline void
 bus_space_write_4(bus_space_tag_t tag, bus_space_handle_t bsh,
 		       bus_size_t offset, u_int32_t value)
 {
 #if defined(_I386_BUS_PIO_H_)
 #if defined(_I386_BUS_MEMIO_H_)
 	if (tag == I386_BUS_SPACE_IO)
 #endif
 		outl(bsh + offset, value);
 #endif
 #if defined(_I386_BUS_MEMIO_H_)
 #if defined(_I386_BUS_PIO_H_)
 	else
 #endif
 		*(volatile u_int32_t *)(bsh + offset) = value;
 #endif
 }
 
 #if 0	/* Cause a link error for bus_space_write_8 */
 #define	bus_space_write_8	!!! bus_space_write_8 not implemented !!!
 #endif
 
 /*
  * Write `count' 1, 2, 4, or 8 byte quantities from the buffer
  * provided to bus space described by tag/handle/offset.
  */
 
 static __inline void bus_space_write_multi_1(bus_space_tag_t tag,
 					     bus_space_handle_t bsh,
 					     bus_size_t offset,
 					     const u_int8_t *addr,
 					     size_t count);
 static __inline void bus_space_write_multi_2(bus_space_tag_t tag,
 					     bus_space_handle_t bsh,
 					     bus_size_t offset,
 					     const u_int16_t *addr,
 					     size_t count);
 
 static __inline void bus_space_write_multi_4(bus_space_tag_t tag,
 					     bus_space_handle_t bsh,
 					     bus_size_t offset,
 					     const u_int32_t *addr,
 					     size_t count);
 
 static __inline void
 bus_space_write_multi_1(bus_space_tag_t tag, bus_space_handle_t bsh,
 			bus_size_t offset, const u_int8_t *addr, size_t count)
 {
 #if defined(_I386_BUS_PIO_H_)
 #if defined(_I386_BUS_MEMIO_H_)
 	if (tag == I386_BUS_SPACE_IO)
 #endif
 		outsb(bsh + offset, addr, count);
 #endif
 #if defined(_I386_BUS_MEMIO_H_)
 #if defined(_I386_BUS_PIO_H_)
 	else
 #endif
 	{
 #ifdef	__GNUC__
 		__asm __volatile("				\n\
 			cld					\n\
 		1:	lodsb					\n\
 			movb %%al,(%2)				\n\
 			loop 1b"				:
 		    "=S" (addr), "=c" (count)			:
 		    "r" (bsh + offset), "0" (addr), "1" (count)	:
 		    "%eax", "memory", "cc");
 #endif
 	}
 #endif
 }
 
 static __inline void
 bus_space_write_multi_2(bus_space_tag_t tag, bus_space_handle_t bsh,
 			bus_size_t offset, const u_int16_t *addr, size_t count)
 {
 #if defined(_I386_BUS_PIO_H_)
 #if defined(_I386_BUS_MEMIO_H_)
 	if (tag == I386_BUS_SPACE_IO)
 #endif
 		outsw(bsh + offset, addr, count);
 #endif
 #if defined(_I386_BUS_MEMIO_H_)
 #if defined(_I386_BUS_PIO_H_)
 	else
 #endif
 	{
 #ifdef	__GNUC__
 		__asm __volatile("				\n\
 			cld					\n\
 		1:	lodsw					\n\
 			movw %%ax,(%2)				\n\
 			loop 1b"				:
 		    "=S" (addr), "=c" (count)			:
 		    "r" (bsh + offset), "0" (addr), "1" (count)	:
 		    "%eax", "memory", "cc");
 #endif
 	}
 #endif
 }
 
 static __inline void
 bus_space_write_multi_4(bus_space_tag_t tag, bus_space_handle_t bsh,
 			bus_size_t offset, const u_int32_t *addr, size_t count)
 {
 #if defined(_I386_BUS_PIO_H_)
 #if defined(_I386_BUS_MEMIO_H_)
 	if (tag == I386_BUS_SPACE_IO)
 #endif
 		outsl(bsh + offset, addr, count);
 #endif
 #if defined(_I386_BUS_MEMIO_H_)
 #if defined(_I386_BUS_PIO_H_)
 	else
 #endif
 	{
 #ifdef	__GNUC__
 		__asm __volatile("				\n\
 			cld					\n\
 		1:	lodsl					\n\
 			movl %%eax,(%2)				\n\
 			loop 1b"				:
 		    "=S" (addr), "=c" (count)			:
 		    "r" (bsh + offset), "0" (addr), "1" (count)	:
 		    "%eax", "memory", "cc");
 #endif
 	}
 #endif
 }
 
 #if 0	/* Cause a link error for bus_space_write_multi_8 */
 #define	bus_space_write_multi_8(t, h, o, a, c)				\
 			!!! bus_space_write_multi_8 unimplemented !!!
 #endif
 
 /*
  * Write `count' 1, 2, 4, or 8 byte quantities from the buffer provided
  * to bus space described by tag/handle starting at `offset'.
  */
 
 static __inline void bus_space_write_region_1(bus_space_tag_t tag,
 					      bus_space_handle_t bsh,
 					      bus_size_t offset,
 					      const u_int8_t *addr,
 					      size_t count);
 static __inline void bus_space_write_region_2(bus_space_tag_t tag,
 					      bus_space_handle_t bsh,
 					      bus_size_t offset,
 					      const u_int16_t *addr,
 					      size_t count);
 static __inline void bus_space_write_region_4(bus_space_tag_t tag,
 					      bus_space_handle_t bsh,
 					      bus_size_t offset,
 					      const u_int32_t *addr,
 					      size_t count);
 
 static __inline void
 bus_space_write_region_1(bus_space_tag_t tag, bus_space_handle_t bsh,
 			 bus_size_t offset, const u_int8_t *addr, size_t count)
 {
 #if defined(_I386_BUS_PIO_H_)
 #if defined(_I386_BUS_MEMIO_H_)
 	if (tag == I386_BUS_SPACE_IO)
 #endif
 	{
 		int _port_ = bsh + offset;
 #ifdef	__GNUC__
 		__asm __volatile("				\n\
 			cld					\n\
 		1:	lodsb					\n\
 			outb %%al,%w0				\n\
 			incl %0					\n\
 			loop 1b"				:
 		    "=d" (_port_), "=S" (addr), "=c" (count)	:
 		    "0" (_port_), "1" (addr), "2" (count)	:
 		    "%eax", "memory", "cc");
 #endif
 	}
 #endif
 #if defined(_I386_BUS_MEMIO_H_)
 #if defined(_I386_BUS_PIO_H_)
 	else
 #endif
 	{
 		int _port_ = bsh + offset;
 #ifdef	__GNUC__
 		__asm __volatile("				\n\
 			cld					\n\
 			repne					\n\
 			movsb"					:
 		    "=D" (_port_), "=S" (addr), "=c" (count)	:
 		    "0" (_port_), "1" (addr), "2" (count)	:
 		    "memory", "cc");
 #endif
 	}
 #endif
 }
 
 static __inline void
 bus_space_write_region_2(bus_space_tag_t tag, bus_space_handle_t bsh,
 			 bus_size_t offset, const u_int16_t *addr, size_t count)
 {
 #if defined(_I386_BUS_PIO_H_)
 #if defined(_I386_BUS_MEMIO_H_)
 	if (tag == I386_BUS_SPACE_IO)
 #endif
 	{
 		int _port_ = bsh + offset;
 #ifdef	__GNUC__
 		__asm __volatile("				\n\
 			cld					\n\
 		1:	lodsw					\n\
 			outw %%ax,%w0				\n\
 			addl $2,%0				\n\
 			loop 1b"				:
 		    "=d" (_port_), "=S" (addr), "=c" (count)	:
 		    "0" (_port_), "1" (addr), "2" (count)	:
 		    "%eax", "memory", "cc");
 #endif
 	}
 #endif
 #if defined(_I386_BUS_MEMIO_H_)
 #if defined(_I386_BUS_PIO_H_)
 	else
 #endif
 	{
 		int _port_ = bsh + offset;
 #ifdef	__GNUC__
 		__asm __volatile("				\n\
 			cld					\n\
 			repne					\n\
 			movsw"					:
 		    "=D" (_port_), "=S" (addr), "=c" (count)	:
 		    "0" (_port_), "1" (addr), "2" (count)	:
 		    "memory", "cc");
 #endif
 	}
 #endif
 }
 
 static __inline void
 bus_space_write_region_4(bus_space_tag_t tag, bus_space_handle_t bsh,
 			 bus_size_t offset, const u_int32_t *addr, size_t count)
 {
 #if defined(_I386_BUS_PIO_H_)
 #if defined(_I386_BUS_MEMIO_H_)
 	if (tag == I386_BUS_SPACE_IO)
 #endif
 	{
 		int _port_ = bsh + offset;
 #ifdef	__GNUC__
 		__asm __volatile("				\n\
 			cld					\n\
 		1:	lodsl					\n\
 			outl %%eax,%w0				\n\
 			addl $4,%0				\n\
 			loop 1b"				:
 		    "=d" (_port_), "=S" (addr), "=c" (count)	:
 		    "0" (_port_), "1" (addr), "2" (count)	:
 		    "%eax", "memory", "cc");
 #endif
 	}
 #endif
 #if defined(_I386_BUS_MEMIO_H_)
 #if defined(_I386_BUS_PIO_H_)
 	else
 #endif
 	{
 		int _port_ = bsh + offset;
 #ifdef	__GNUC__
 		__asm __volatile("				\n\
 			cld					\n\
 			repne					\n\
 			movsl"					:
 		    "=D" (_port_), "=S" (addr), "=c" (count)	:
 		    "0" (_port_), "1" (addr), "2" (count)	:
 		    "memory", "cc");
 #endif
 	}
 #endif
 }
 
 #if 0	/* Cause a link error for bus_space_write_region_8 */
 #define	bus_space_write_region_8					\
 			!!! bus_space_write_region_8 unimplemented !!!
 #endif
 
 /*
  * Write the 1, 2, 4, or 8 byte value `val' to bus space described
  * by tag/handle/offset `count' times.
  */
 
 static __inline void bus_space_set_multi_1(bus_space_tag_t tag,
 					   bus_space_handle_t bsh,
 					   bus_size_t offset,
 					   u_int8_t value, size_t count);
 static __inline void bus_space_set_multi_2(bus_space_tag_t tag,
 					   bus_space_handle_t bsh,
 					   bus_size_t offset,
 					   u_int16_t value, size_t count);
 static __inline void bus_space_set_multi_4(bus_space_tag_t tag,
 					   bus_space_handle_t bsh,
 					   bus_size_t offset,
 					   u_int32_t value, size_t count);
 
 static __inline void
 bus_space_set_multi_1(bus_space_tag_t tag, bus_space_handle_t bsh,
 		      bus_size_t offset, u_int8_t value, size_t count)
 {
 	bus_space_handle_t addr = bsh + offset;
 
 #if defined(_I386_BUS_PIO_H_)
 #if defined(_I386_BUS_MEMIO_H_)
 	if (tag == I386_BUS_SPACE_IO)
 #endif
 		while (count--)
 			outb(addr, value);
 #endif
 #if defined(_I386_BUS_MEMIO_H_)
 #if defined(_I386_BUS_PIO_H_)
 	else
 #endif
 		while (count--)
 			*(volatile u_int8_t *)(addr) = value;
 #endif
 }
 
 static __inline void
 bus_space_set_multi_2(bus_space_tag_t tag, bus_space_handle_t bsh,
 		     bus_size_t offset, u_int16_t value, size_t count)
 {
 	bus_space_handle_t addr = bsh + offset;
 
 #if defined(_I386_BUS_PIO_H_)
 #if defined(_I386_BUS_MEMIO_H_)
 	if (tag == I386_BUS_SPACE_IO)
 #endif
 		while (count--)
 			outw(addr, value);
 #endif
 #if defined(_I386_BUS_MEMIO_H_)
 #if defined(_I386_BUS_PIO_H_)
 	else
 #endif
 		while (count--)
 			*(volatile u_int16_t *)(addr) = value;
 #endif
 }
 
 static __inline void
 bus_space_set_multi_4(bus_space_tag_t tag, bus_space_handle_t bsh,
 		      bus_size_t offset, u_int32_t value, size_t count)
 {
 	bus_space_handle_t addr = bsh + offset;
 
 #if defined(_I386_BUS_PIO_H_)
 #if defined(_I386_BUS_MEMIO_H_)
 	if (tag == I386_BUS_SPACE_IO)
 #endif
 		while (count--)
 			outl(addr, value);
 #endif
 #if defined(_I386_BUS_MEMIO_H_)
 #if defined(_I386_BUS_PIO_H_)
 	else
 #endif
 		while (count--)
 			*(volatile u_int32_t *)(addr) = value;
 #endif
 }
 
 #if 0	/* Cause a link error for bus_space_set_multi_8 */
 #define	bus_space_set_multi_8 !!! bus_space_set_multi_8 unimplemented !!!
 #endif
 
 /*
  * Write `count' 1, 2, 4, or 8 byte value `val' to bus space described
  * by tag/handle starting at `offset'.
  */
 
 static __inline void bus_space_set_region_1(bus_space_tag_t tag,
 					    bus_space_handle_t bsh,
 					    bus_size_t offset, u_int8_t value,
 					    size_t count);
 static __inline void bus_space_set_region_2(bus_space_tag_t tag,
 					    bus_space_handle_t bsh,
 					    bus_size_t offset, u_int16_t value,
 					    size_t count);
 static __inline void bus_space_set_region_4(bus_space_tag_t tag,
 					    bus_space_handle_t bsh,
 					    bus_size_t offset, u_int32_t value,
 					    size_t count);
 
 static __inline void
 bus_space_set_region_1(bus_space_tag_t tag, bus_space_handle_t bsh,
 		       bus_size_t offset, u_int8_t value, size_t count)
 {
 	bus_space_handle_t addr = bsh + offset;
 
 #if defined(_I386_BUS_PIO_H_)
 #if defined(_I386_BUS_MEMIO_H_)
 	if (tag == I386_BUS_SPACE_IO)
 #endif
 		for (; count != 0; count--, addr++)
 			outb(addr, value);
 #endif
 #if defined(_I386_BUS_MEMIO_H_)
 #if defined(_I386_BUS_PIO_H_)
 	else
 #endif
 		for (; count != 0; count--, addr++)
 			*(volatile u_int8_t *)(addr) = value;
 #endif
 }
 
 static __inline void
 bus_space_set_region_2(bus_space_tag_t tag, bus_space_handle_t bsh,
 		       bus_size_t offset, u_int16_t value, size_t count)
 {
 	bus_space_handle_t addr = bsh + offset;
 
 #if defined(_I386_BUS_PIO_H_)
 #if defined(_I386_BUS_MEMIO_H_)
 	if (tag == I386_BUS_SPACE_IO)
 #endif
 		for (; count != 0; count--, addr += 2)
 			outw(addr, value);
 #endif
 #if defined(_I386_BUS_MEMIO_H_)
 #if defined(_I386_BUS_PIO_H_)
 	else
 #endif
 		for (; count != 0; count--, addr += 2)
 			*(volatile u_int16_t *)(addr) = value;
 #endif
 }
 
 static __inline void
 bus_space_set_region_4(bus_space_tag_t tag, bus_space_handle_t bsh,
 		       bus_size_t offset, u_int32_t value, size_t count)
 {
 	bus_space_handle_t addr = bsh + offset;
 
 #if defined(_I386_BUS_PIO_H_)
 #if defined(_I386_BUS_MEMIO_H_)
 	if (tag == I386_BUS_SPACE_IO)
 #endif
 		for (; count != 0; count--, addr += 4)
 			outl(addr, value);
 #endif
 #if defined(_I386_BUS_MEMIO_H_)
 #if defined(_I386_BUS_PIO_H_)
 	else
 #endif
 		for (; count != 0; count--, addr += 4)
 			*(volatile u_int32_t *)(addr) = value;
 #endif
 }
 
 #if 0	/* Cause a link error for bus_space_set_region_8 */
 #define	bus_space_set_region_8	!!! bus_space_set_region_8 unimplemented !!!
 #endif
 
 /*
  * Copy `count' 1, 2, 4, or 8 byte values from bus space starting
  * at tag/bsh1/off1 to bus space starting at tag/bsh2/off2.
  */
 
 static __inline void bus_space_copy_region_1(bus_space_tag_t tag,
 					     bus_space_handle_t bsh1,
 					     bus_size_t off1,
 					     bus_space_handle_t bsh2,
 					     bus_size_t off2, size_t count);
 
 static __inline void bus_space_copy_region_2(bus_space_tag_t tag,
 					     bus_space_handle_t bsh1,
 					     bus_size_t off1,
 					     bus_space_handle_t bsh2,
 					     bus_size_t off2, size_t count);
 
 static __inline void bus_space_copy_region_4(bus_space_tag_t tag,
 					     bus_space_handle_t bsh1,
 					     bus_size_t off1,
 					     bus_space_handle_t bsh2,
 					     bus_size_t off2, size_t count);
 
 static __inline void
 bus_space_copy_region_1(bus_space_tag_t tag, bus_space_handle_t bsh1,
 			bus_size_t off1, bus_space_handle_t bsh2,
 			bus_size_t off2, size_t count)
 {
 	bus_space_handle_t addr1 = bsh1 + off1;
 	bus_space_handle_t addr2 = bsh2 + off2;
 
 #if defined(_I386_BUS_PIO_H_)
 #if defined(_I386_BUS_MEMIO_H_)
 	if (tag == I386_BUS_SPACE_IO)
 #endif
 	{
 		if (addr1 >= addr2) {
 			/* src after dest: copy forward */
 			for (; count != 0; count--, addr1++, addr2++)
 				outb(addr2, inb(addr1));
 		} else {
 			/* dest after src: copy backwards */
 			for (addr1 += (count - 1), addr2 += (count - 1);
 			    count != 0; count--, addr1--, addr2--)
 				outb(addr2, inb(addr1));
 		}
 	}
 #endif
 #if defined(_I386_BUS_MEMIO_H_)
 #if defined(_I386_BUS_PIO_H_)
 	else
 #endif
 	{
 		if (addr1 >= addr2) {
 			/* src after dest: copy forward */
 			for (; count != 0; count--, addr1++, addr2++)
 				*(volatile u_int8_t *)(addr2) =
 				    *(volatile u_int8_t *)(addr1);
 		} else {
 			/* dest after src: copy backwards */
 			for (addr1 += (count - 1), addr2 += (count - 1);
 			    count != 0; count--, addr1--, addr2--)
 				*(volatile u_int8_t *)(addr2) =
 				    *(volatile u_int8_t *)(addr1);
 		}
 	}
 #endif
 }
 
 static __inline void
 bus_space_copy_region_2(bus_space_tag_t tag, bus_space_handle_t bsh1,
 			bus_size_t off1, bus_space_handle_t bsh2,
 			bus_size_t off2, size_t count)
 {
 	bus_space_handle_t addr1 = bsh1 + off1;
 	bus_space_handle_t addr2 = bsh2 + off2;
 
 #if defined(_I386_BUS_PIO_H_)
 #if defined(_I386_BUS_MEMIO_H_)
 	if (tag == I386_BUS_SPACE_IO)
 #endif
 	{
 		if (addr1 >= addr2) {
 			/* src after dest: copy forward */
 			for (; count != 0; count--, addr1 += 2, addr2 += 2)
 				outw(addr2, inw(addr1));
 		} else {
 			/* dest after src: copy backwards */
 			for (addr1 += 2 * (count - 1), addr2 += 2 * (count - 1);
 			    count != 0; count--, addr1 -= 2, addr2 -= 2)
 				outw(addr2, inw(addr1));
 		}
 	}
 #endif
 #if defined(_I386_BUS_MEMIO_H_)
 #if defined(_I386_BUS_PIO_H_)
 	else
 #endif
 	{
 		if (addr1 >= addr2) {
 			/* src after dest: copy forward */
 			for (; count != 0; count--, addr1 += 2, addr2 += 2)
 				*(volatile u_int16_t *)(addr2) =
 				    *(volatile u_int16_t *)(addr1);
 		} else {
 			/* dest after src: copy backwards */
 			for (addr1 += 2 * (count - 1), addr2 += 2 * (count - 1);
 			    count != 0; count--, addr1 -= 2, addr2 -= 2)
 				*(volatile u_int16_t *)(addr2) =
 				    *(volatile u_int16_t *)(addr1);
 		}
 	}
 #endif
 }
 
 static __inline void
 bus_space_copy_region_4(bus_space_tag_t tag, bus_space_handle_t bsh1,
 			bus_size_t off1, bus_space_handle_t bsh2,
 			bus_size_t off2, size_t count)
 {
 	bus_space_handle_t addr1 = bsh1 + off1;
 	bus_space_handle_t addr2 = bsh2 + off2;
 
 #if defined(_I386_BUS_PIO_H_)
 #if defined(_I386_BUS_MEMIO_H_)
 	if (tag == I386_BUS_SPACE_IO)
 #endif
 	{
 		if (addr1 >= addr2) {
 			/* src after dest: copy forward */
 			for (; count != 0; count--, addr1 += 4, addr2 += 4)
 				outl(addr2, inl(addr1));
 		} else {
 			/* dest after src: copy backwards */
 			for (addr1 += 4 * (count - 1), addr2 += 4 * (count - 1);
 			    count != 0; count--, addr1 -= 4, addr2 -= 4)
 				outl(addr2, inl(addr1));
 		}
 	}
 #endif
 #if defined(_I386_BUS_MEMIO_H_)
 #if defined(_I386_BUS_PIO_H_)
 	else
 #endif
 	{
 		if (addr1 >= addr2) {
 			/* src after dest: copy forward */
 			for (; count != 0; count--, addr1 += 4, addr2 += 4)
 				*(volatile u_int32_t *)(addr2) =
 				    *(volatile u_int32_t *)(addr1);
 		} else {
 			/* dest after src: copy backwards */
 			for (addr1 += 4 * (count - 1), addr2 += 4 * (count - 1);
 			    count != 0; count--, addr1 -= 4, addr2 -= 4)
 				*(volatile u_int32_t *)(addr2) =
 				    *(volatile u_int32_t *)(addr1);
 		}
 	}
 #endif
 }
 
 #endif /* defined(_I386_BUS_PIO_H_) || defined(_I386_MEM_IO_H_) */
 
 #if 0	/* Cause a link error for bus_space_copy_8 */
 #define	bus_space_copy_region_8	!!! bus_space_copy_region_8 unimplemented !!!
 #endif
 
 /*
  * Bus read/write barrier methods.
  *
  *	void bus_space_barrier(bus_space_tag_t tag, bus_space_handle_t bsh,
  *			       bus_size_t offset, bus_size_t len, int flags);
  *
  *
  * Note that BUS_SPACE_BARRIER_WRITE doesn't do anything other than
  * prevent reordering by the compiler; all Intel x86 processors currently
  * retire operations outside the CPU in program order.
  */
 #define	BUS_SPACE_BARRIER_READ	0x01		/* force read barrier */
 #define	BUS_SPACE_BARRIER_WRITE	0x02		/* force write barrier */
 
 static __inline void
 bus_space_barrier(bus_space_tag_t tag __unused, bus_space_handle_t bsh __unused,
 		  bus_size_t offset __unused, bus_size_t len __unused, int flags)
 {
 #ifdef	__GNUC__
 	if (flags & BUS_SPACE_BARRIER_READ)
 		__asm __volatile("lock; addl $0,0(%%esp)" : : : "memory");
 	else
 		__asm __volatile("" : : : "memory");
 #endif
 }
 
 #endif /* _I386_BUS_AT386_H_ */
Index: head/sys/amd64/include/pmap.h
===================================================================
--- head/sys/amd64/include/pmap.h	(revision 112840)
+++ head/sys/amd64/include/pmap.h	(revision 112841)
@@ -1,265 +1,317 @@
 /*
  * Copyright (c) 1991 Regents of the University of California.
  * All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * the Systems Programming Group of the University of Utah Computer
  * Science Department and William Jolitz of UUNET Technologies Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * Derived from hp300 version by Mike Hibler, this version by William
  * Jolitz uses a recursive map [a pde points to the page directory] to
  * map the page tables using the pagetables themselves. This is done to
  * reduce the impact on kernel virtual memory for lots of sparse address
  * space, and to reduce the cost of memory to each process.
  *
  *	from: hp300: @(#)pmap.h	7.2 (Berkeley) 12/16/90
  *	from: @(#)pmap.h	7.4 (Berkeley) 5/12/91
  * $FreeBSD$
  */
 
 #ifndef _MACHINE_PMAP_H_
 #define	_MACHINE_PMAP_H_
 
 /*
  * Page-directory and page-table entires follow this format, with a few
  * of the fields not present here and there, depending on a lot of things.
  */
 				/* ---- Intel Nomenclature ---- */
 #define	PG_V		0x001	/* P	Valid			*/
 #define PG_RW		0x002	/* R/W	Read/Write		*/
 #define PG_U		0x004	/* U/S  User/Supervisor		*/
 #define	PG_NC_PWT	0x008	/* PWT	Write through		*/
 #define	PG_NC_PCD	0x010	/* PCD	Cache disable		*/
 #define PG_A		0x020	/* A	Accessed		*/
 #define	PG_M		0x040	/* D	Dirty			*/
 #define	PG_PS		0x080	/* PS	Page size (0=4k,1=4M)	*/
 #define	PG_G		0x100	/* G	Global			*/
 #define	PG_AVAIL1	0x200	/*    /	Available for system	*/
 #define	PG_AVAIL2	0x400	/*   <	programmers use		*/
 #define	PG_AVAIL3	0x800	/*    \				*/
 
 
 /* Our various interpretations of the above */
 #define PG_W		PG_AVAIL1	/* "Wired" pseudoflag */
 #define	PG_MANAGED	PG_AVAIL2
 #define	PG_FRAME	(~((vm_paddr_t)PAGE_MASK))
 #define	PG_PROT		(PG_RW|PG_U)	/* all protection bits . */
 #define PG_N		(PG_NC_PWT|PG_NC_PCD)	/* Non-cacheable */
 
 /*
  * Page Protection Exception bits
  */
 
 #define PGEX_P		0x01	/* Protection violation vs. not present */
 #define PGEX_W		0x02	/* during a Write cycle */
 #define PGEX_U		0x04	/* access from User mode (UPL) */
 
 /*
  * Size of Kernel address space.  This is the number of page table pages
  * (4MB each) to use for the kernel.  256 pages == 1 Gigabyte.
  * This **MUST** be a multiple of 4 (eg: 252, 256, 260, etc).
  */
 #ifndef KVA_PAGES
+#ifdef PAE
+#define KVA_PAGES	512
+#else
 #define KVA_PAGES	256
 #endif
+#endif
 
 /*
  * Pte related macros
  */
 #define VADDR(pdi, pti) ((vm_offset_t)(((pdi)<<PDRSHIFT)|((pti)<<PAGE_SHIFT)))
 
 #ifndef NKPT
+#ifdef PAE
+#define	NKPT		120	/* actual number of kernel page tables */
+#else
 #define	NKPT		30	/* actual number of kernel page tables */
 #endif
+#endif
 #ifndef NKPDE
 #ifdef SMP
 #define NKPDE	(KVA_PAGES - (NPGPTD + 1)) /* number of page tables/pde's */
 #else
 #define NKPDE	(KVA_PAGES - NPGPTD)	/* number of page tables/pde's */
 #endif
 #endif
 
 /*
  * The *PTDI values control the layout of virtual memory
  *
  * XXX This works for now, but I am not real happy with it, I'll fix it
  * right after I fix locore.s and the magic 28K hole
  *
  * SMP_PRIVPAGES: The per-cpu address space is 0xff80000 -> 0xffbfffff
  */
 #define	APTDPTDI	(NPDEPTD-NPGPTD) /* alt ptd entry that points to APTD */
 #ifdef SMP
 #define MPPTDI		(APTDPTDI-1)	/* per cpu ptd entry */
 #define	KPTDI		(MPPTDI-NKPDE)	/* start of kernel virtual pde's */
 #else
 #define	KPTDI		(APTDPTDI-NKPDE)/* start of kernel virtual pde's */
 #endif	/* SMP */
 #define	PTDPTDI		(KPTDI-NPGPTD)	/* ptd entry that points to ptd! */
 
 /*
  * XXX doesn't really belong here I guess...
  */
 #define ISA_HOLE_START    0xa0000
 #define ISA_HOLE_LENGTH (0x100000-ISA_HOLE_START)
 
 #ifndef LOCORE
 
 #include <sys/queue.h>
 
-typedef u_int32_t pd_entry_t;
-typedef u_int32_t pt_entry_t;
+#ifdef PAE
 
+typedef uint64_t pdpt_entry_t;
+typedef uint64_t pd_entry_t;
+typedef uint64_t pt_entry_t;
+
+#define	PTESHIFT	(3)
+#define	PDESHIFT	(3)
+
+#else
+
+typedef uint32_t pd_entry_t;
+typedef uint32_t pt_entry_t;
+
 #define	PTESHIFT	(2)
 #define	PDESHIFT	(2)
 
+#endif
+
 /*
  * Address of current and alternate address space page table maps
  * and directories.
  */
 #ifdef _KERNEL
 extern pt_entry_t PTmap[], APTmap[];
 extern pd_entry_t PTD[], APTD[];
 extern pd_entry_t PTDpde[], APTDpde[];
 
+#ifdef PAE
+extern pdpt_entry_t *IdlePDPT;
+#endif
 extern pd_entry_t *IdlePTD;	/* physical address of "Idle" state directory */
 #endif
 
 #ifdef _KERNEL
 /*
  * virtual address to page table entry and
  * to physical address. Likewise for alternate address space.
  * Note: these work recursively, thus vtopte of a pte will give
  * the corresponding pde that in turn maps it.
  */
 #define	vtopte(va)	(PTmap + i386_btop(va))
 #define	avtopte(va)	(APTmap + i386_btop(va))
 
 /*
  *	Routine:	pmap_kextract
  *	Function:
  *		Extract the physical page address associated
  *		kernel virtual address.
  */
 static __inline vm_paddr_t
 pmap_kextract(vm_offset_t va)
 {
 	vm_paddr_t pa;
 
 	if ((pa = (vm_offset_t) PTD[va >> PDRSHIFT]) & PG_PS) {
 		pa = (pa & ~(NBPDR - 1)) | (va & (NBPDR - 1));
 	} else {
 		pa = *vtopte(va);
 		pa = (pa & PG_FRAME) | (va & PAGE_MASK);
 	}
 	return pa;
 }
 
 #define	vtophys(va)	pmap_kextract(((vm_offset_t) (va)))
+
+#ifdef PAE
+
+static __inline pt_entry_t
+pte_load_clear(pt_entry_t *pte)
+{
+	pt_entry_t r;
+
+	r = *pte;
+	__asm __volatile(
+	    "1:\n"
+	    "\tcmpxchg8b %1\n"
+	    "\tjnz 1b"
+	    : "+A" (r)
+	    : "m" (*pte), "b" (0), "c" (0));
+	return (r);
+}
+
+#else
+
+#define	pte_load_clear(pte)	atomic_readandclear_int(pte)
+
 #endif
 
+#endif
+
 /*
  * Pmap stuff
  */
 struct	pv_entry;
 
 struct md_page {
 	int pv_list_count;
 	TAILQ_HEAD(,pv_entry)	pv_list;
 };
 
 struct pmap {
 	pd_entry_t		*pm_pdir;	/* KVA of page directory */
 	vm_object_t		pm_pteobj;	/* Container for pte's */
 	TAILQ_HEAD(,pv_entry)	pm_pvlist;	/* list of mappings in pmap */
 	int			pm_active;	/* active on cpus */
 	struct pmap_statistics	pm_stats;	/* pmap statistics */
 	LIST_ENTRY(pmap) 	pm_list;	/* List of all pmaps */
+#ifdef PAE
+	pdpt_entry_t		*pm_pdpt;	/* KVA of page director pointer
+						   table */
+#endif
 };
 
 #define	pmap_page_is_mapped(m)	(!TAILQ_EMPTY(&(m)->md.pv_list))
 #define pmap_resident_count(pmap) (pmap)->pm_stats.resident_count
 
 typedef struct pmap	*pmap_t;
 
 #ifdef _KERNEL
 extern struct pmap	kernel_pmap_store;
 #define kernel_pmap	(&kernel_pmap_store)
 #endif
 
 /*
  * For each vm_page_t, there is a list of all currently valid virtual
  * mappings of that page.  An entry is a pv_entry_t, the list is pv_table.
  */
 typedef struct pv_entry {
 	pmap_t		pv_pmap;	/* pmap where mapping lies */
 	vm_offset_t	pv_va;		/* virtual address for mapping */
 	TAILQ_ENTRY(pv_entry)	pv_list;
 	TAILQ_ENTRY(pv_entry)	pv_plist;
 	vm_page_t	pv_ptem;	/* VM page for pte */
 } *pv_entry_t;
 
 #ifdef	_KERNEL
 
 #define NPPROVMTRR		8
 #define PPRO_VMTRRphysBase0	0x200
 #define PPRO_VMTRRphysMask0	0x201
 struct ppro_vmtrr {
 	u_int64_t base, mask;
 };
 extern struct ppro_vmtrr PPro_vmtrr[NPPROVMTRR];
 
 extern caddr_t	CADDR1;
 extern pt_entry_t *CMAP1;
 extern vm_paddr_t avail_end;
 extern vm_paddr_t avail_start;
 extern vm_offset_t clean_eva;
 extern vm_offset_t clean_sva;
 extern vm_paddr_t phys_avail[];
 extern char *ptvmmap;		/* poor name! */
 extern vm_offset_t virtual_avail;
 extern vm_offset_t virtual_end;
 
 void	pmap_bootstrap(vm_paddr_t, vm_paddr_t);
 void	pmap_kenter(vm_offset_t va, vm_paddr_t pa);
 void	pmap_kremove(vm_offset_t);
 void	*pmap_mapdev(vm_paddr_t, vm_size_t);
 void	pmap_unmapdev(vm_offset_t, vm_size_t);
 pt_entry_t *pmap_pte_quick(pmap_t, vm_offset_t) __pure2;
 void	pmap_set_opt(void);
 void	pmap_invalidate_page(pmap_t, vm_offset_t);
 void	pmap_invalidate_range(pmap_t, vm_offset_t, vm_offset_t);
 void	pmap_invalidate_all(pmap_t);
 
 #endif /* _KERNEL */
 
 #endif /* !LOCORE */
 
 #endif /* !_MACHINE_PMAP_H_ */
Index: head/sys/conf/options.i386
===================================================================
--- head/sys/conf/options.i386	(revision 112840)
+++ head/sys/conf/options.i386	(revision 112841)
@@ -1,184 +1,187 @@
 # $FreeBSD$
 # Options specific to the i386 platform kernels
 
 MATH_EMULATE		opt_math_emulate.h
 GPL_MATH_EMULATE	opt_math_emulate.h
 DISABLE_PSE		opt_pmap.h
 PMAP_SHPGPERPROC	opt_pmap.h
 DISABLE_PG_G		opt_pmap.h
 PPC_PROBE_CHIPSET	opt_ppc.h
 PPC_DEBUG		opt_ppc.h
 MAXMEM
 PERFMON			opt_perfmon.h
 POWERFAIL_NMI		opt_trap.h
 AUTO_EOI_1		opt_auto_eoi.h
 AUTO_EOI_2		opt_auto_eoi.h
 I586_PMC_GUPROF		opt_i586_guprof.h
 COMPAT_OLDISA
 BROKEN_KEYBOARD_RESET	opt_reset.h
 
 # Options for emulators.  These should only be used at config time, so
 # they are handled like options for static filesystems
 # (see src/sys/conf/options), except for broken debugging options.
 COMPAT_AOUT		opt_dontuse.h
 IBCS2			opt_dontuse.h
 COMPAT_LINUX		opt_dontuse.h
 COMPAT_SVR4		opt_dontuse.h
 DEBUG_SVR4		opt_svr4.h
 PECOFF_SUPPORT		opt_dontuse.h
 PECOFF_DEBUG		opt_pecoff.h
 # i386 SMP options
 APIC_IO			opt_global.h
 
 # Change KVM size.  Changes things all over the kernel.
 KVA_PAGES		opt_global.h
 
+# Physical address extensions and support for >4G ram.  As above.
+PAE			opt_global.h
+
 CLK_CALIBRATION_LOOP		opt_clock.h
 CLK_USE_I8254_CALIBRATION	opt_clock.h
 CLK_USE_TSC_CALIBRATION		opt_clock.h
 TIMER_FREQ			opt_clock.h
 
 NO_F00F_HACK			opt_cpu.h
 CPU_BLUELIGHTNING_FPU_OP_CACHE	opt_cpu.h
 CPU_BLUELIGHTNING_3X		opt_cpu.h
 CPU_BTB_EN			opt_cpu.h
 CPU_CYRIX_NO_LOCK		opt_cpu.h
 CPU_DIRECT_MAPPED_CACHE		opt_cpu.h
 CPU_DISABLE_5X86_LSSER		opt_cpu.h
 CPU_ELAN			opt_cpu.h
 CPU_FASTER_5X86_FPU		opt_cpu.h
 CPU_I486_ON_386			opt_cpu.h
 CPU_IORT			opt_cpu.h
 CPU_L2_LATENCY			opt_cpu.h
 CPU_LOOP_EN			opt_cpu.h
 CPU_PPRO2CELERON		opt_cpu.h
 CPU_RSTK_EN			opt_cpu.h
 CPU_SUSP_HLT			opt_cpu.h
 CPU_UPGRADE_HW_CACHE		opt_cpu.h
 CPU_WT_ALLOC			opt_cpu.h
 CYRIX_CACHE_WORKS		opt_cpu.h
 CYRIX_CACHE_REALLY_WORKS	opt_cpu.h
 NO_MEMORY_HOLE			opt_cpu.h
 CPU_ENABLE_SSE			opt_cpu.h
 CPU_ATHLON_SSE_HACK		opt_cpu.h
 CPU_DISABLE_SSE			opt_cpu.h
 CPU_DISABLE_CMPXCHG		opt_global.h
 
 # Options for the AMD Elan CPU
 ELAN_PPS		opt_cpu.h
 ELAN_XTAL		opt_cpu.h
 
 # The CPU type affects the endian conversion functions all over the kernel.
 I386_CPU		opt_global.h
 I486_CPU		opt_global.h
 I586_CPU		opt_global.h
 I686_CPU		opt_global.h
 
 MAXCONS			opt_syscons.h
 SC_ALT_MOUSE_IMAGE	opt_syscons.h
 SC_CUT_SPACES2TABS	opt_syscons.h
 SC_CUT_SEPCHARS		opt_syscons.h
 SC_DEBUG_LEVEL		opt_syscons.h
 SC_DFLT_FONT		opt_syscons.h
 SC_DISABLE_DDBKEY	opt_syscons.h
 SC_DISABLE_REBOOT	opt_syscons.h
 SC_HISTORY_SIZE		opt_syscons.h
 SC_KERNEL_CONS_ATTR	opt_syscons.h
 SC_KERNEL_CONS_REV_ATTR	opt_syscons.h
 SC_MOUSE_CHAR		opt_syscons.h
 SC_NO_CUTPASTE		opt_syscons.h
 SC_NO_FONT_LOADING	opt_syscons.h
 SC_NO_HISTORY		opt_syscons.h
 SC_NO_SYSMOUSE		opt_syscons.h
 SC_NORM_ATTR		opt_syscons.h
 SC_NORM_REV_ATTR	opt_syscons.h
 SC_PIXEL_MODE		opt_syscons.h
 SC_RENDER_DEBUG		opt_syscons.h
 SC_TWOBUTTON_MOUSE	opt_syscons.h
 SC_NO_SUSPEND_VTYSWITCH	opt_syscons.h
 
 VGA_ALT_SEQACCESS	opt_vga.h
 VGA_DEBUG		opt_vga.h
 VGA_NO_FONT_LOADING	opt_vga.h
 VGA_NO_MODE_CHANGE	opt_vga.h
 VGA_SLOW_IOACCESS	opt_vga.h
 VGA_WIDTH90		opt_vga.h
 
 VESA			opt_vesa.h
 VESA_DEBUG		opt_vesa.h
 
 PSM_HOOKRESUME		opt_psm.h
 PSM_RESETAFTERSUSPEND	opt_psm.h
 PSM_DEBUG		opt_psm.h
 
 ATKBD_DFLT_KEYMAP	opt_atkbd.h
 
 KBD_DISABLE_KEYMAP_LOAD	opt_kbd.h
 KBD_INSTALL_CDEV	opt_kbd.h
 KBD_MAXRETRY		opt_kbd.h
 KBD_MAXWAIT		opt_kbd.h
 KBD_RESETDELAY		opt_kbd.h
 KBDIO_DEBUG		opt_kbd.h
 
 EISA_SLOTS		opt_eisa.h
 
 # pcvt(4) has a bunch of options
 FAT_CURSOR		opt_pcvt.h
 XSERVER			opt_pcvt.h
 PCVT_24LINESDEF		opt_pcvt.h
 PCVT_CTRL_ALT_DEL	opt_pcvt.h
 PCVT_META_ESC		opt_pcvt.h
 PCVT_NSCREENS		opt_pcvt.h
 PCVT_PRETTYSCRNS	opt_pcvt.h
 PCVT_SCANSET		opt_pcvt.h
 PCVT_SCREENSAVER	opt_pcvt.h
 PCVT_USEKBDSEC		opt_pcvt.h
 PCVT_VT220KEYB		opt_pcvt.h
 PCVT_GREENSAVER		opt_pcvt.h
 
 # Video spigot
 SPIGOT_UNSECURE		opt_spigot.h
 
 # -------------------------------
 # isdn4bsd: passive ISA cards
 # -------------------------------
 TEL_S0_8		opt_i4b.h
 TEL_S0_16		opt_i4b.h
 TEL_S0_16_3		opt_i4b.h
 AVM_A1			opt_i4b.h
 USR_STI			opt_i4b.h
 ITKIX1			opt_i4b.h
 ELSA_PCC16		opt_i4b.h
 # -------------------------------
 # isdn4bsd: passive ISA PnP cards
 # -------------------------------
 CRTX_S0_P		opt_i4b.h
 DRN_NGO                 opt_i4b.h
 TEL_S0_16_3_P		opt_i4b.h
 SEDLBAUER		opt_i4b.h
 DYNALINK		opt_i4b.h
 ASUSCOM_IPAC		opt_i4b.h
 ELSA_QS1ISA		opt_i4b.h
 SIEMENS_ISURF2		opt_i4b.h
 EICON_DIVA		opt_i4b.h
 COMPAQ_M610		opt_i4b.h
 # -------------------------------
 # isdn4bsd: passive PCI cards
 # -------------------------------
 ELSA_QS1PCI		opt_i4b.h
 # -------------------------------
 # isdn4bsd: misc options
 # -------------------------------
 # temporary workaround for SMP machines
 I4B_SMP_WORKAROUND      opt_i4b.h
 # enable VJ compression code for ipr i/f
 IPR_VJ			opt_i4b.h
 IPR_LOG			opt_i4b.h
 
 # Device options
 DEV_NPX			opt_npx.h
 DEV_SPLASH		opt_splash.h
 
 # -------------------------------
 # EOF
 # -------------------------------
Index: head/sys/i386/i386/bios.c
===================================================================
--- head/sys/i386/i386/bios.c	(revision 112840)
+++ head/sys/i386/i386/bios.c	(revision 112841)
@@ -1,676 +1,680 @@
 /*-
  * Copyright (c) 1997 Michael Smith
  * Copyright (c) 1998 Jonathan Lemon
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 /*
  * Code for dealing with the BIOS in x86 PC systems.
  */
 
 #include "opt_isa.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/malloc.h>
 #include <sys/bus.h>
 #include <sys/pcpu.h>
 #include <vm/vm.h>
 #include <vm/pmap.h>
 #include <machine/md_var.h>
 #include <machine/segments.h>
 #include <machine/stdarg.h>
 #include <machine/vmparam.h>
 #include <machine/pc/bios.h>
 #ifdef DEV_ISA
 #include <isa/isavar.h>
 #include <isa/pnpreg.h>
 #include <isa/pnpvar.h>
 #endif
 
 #define BIOS_START	0xe0000
 #define BIOS_SIZE	0x20000
 
 /* exported lookup results */
 struct bios32_SDentry		PCIbios;
 struct PnPBIOS_table		*PnPBIOStable;
 
 static u_int			bios32_SDCI;
 
 /* start fairly early */
 static void			bios32_init(void *junk);
 SYSINIT(bios32, SI_SUB_CPU, SI_ORDER_ANY, bios32_init, NULL);
 
 /*
  * bios32_init
  *
  * Locate various bios32 entities.
  */
 static void
 bios32_init(void *junk)
 {
     u_long			sigaddr;
     struct bios32_SDheader	*sdh;
     struct PnPBIOS_table	*pt;
     u_int8_t			ck, *cv;
     int				i;
     char			*p;
     
     /*
      * BIOS32 Service Directory, PCI BIOS
      */
     
     /* look for the signature */
     if ((sigaddr = bios_sigsearch(0, "_32_", 4, 16, 0)) != 0) {
 
 	/* get a virtual pointer to the structure */
 	sdh = (struct bios32_SDheader *)(uintptr_t)BIOS_PADDRTOVADDR(sigaddr);
 	for (cv = (u_int8_t *)sdh, ck = 0, i = 0; i < (sdh->len * 16); i++) {
 	    ck += cv[i];
 	}
 	/* If checksum is OK, enable use of the entrypoint */
 	if ((ck == 0) && (BIOS_START <= sdh->entry ) &&
 	    (sdh->entry < (BIOS_START + BIOS_SIZE))) {
 	    bios32_SDCI = BIOS_PADDRTOVADDR(sdh->entry);
 	    if (bootverbose) {
 		printf("bios32: Found BIOS32 Service Directory header at %p\n", sdh);
 		printf("bios32: Entry = 0x%x (%x)  Rev = %d  Len = %d\n", 
 		       sdh->entry, bios32_SDCI, sdh->revision, sdh->len);
 	    }
 
 	    /* Allow user override of PCI BIOS search */
 	    if (((p = getenv("machdep.bios.pci")) == NULL) || strcmp(p, "disable")) {
 
 		/* See if there's a PCI BIOS entrypoint here */
 		PCIbios.ident.id = 0x49435024;	/* PCI systems should have this */
 		if (!bios32_SDlookup(&PCIbios) && bootverbose)
 		    printf("pcibios: PCI BIOS entry at 0x%x+0x%x\n", PCIbios.base, PCIbios.entry);
 	    }
 	    if (p != NULL)
 		    freeenv(p);
 	} else {
 	    printf("bios32: Bad BIOS32 Service Directory\n");
 	}
     }
 
     /*
      * PnP BIOS
      *
      * Allow user override of PnP BIOS search
      */
     if ((((p = getenv("machdep.bios.pnp")) == NULL) || strcmp(p, "disable")) &&
 	((sigaddr = bios_sigsearch(0, "$PnP", 4, 16, 0)) != 0)) {
 
 	/* get a virtual pointer to the structure */
 	pt = (struct PnPBIOS_table *)(uintptr_t)BIOS_PADDRTOVADDR(sigaddr);
 	for (cv = (u_int8_t *)pt, ck = 0, i = 0; i < pt->len; i++) {
 	    ck += cv[i];
 	}
 	/* If checksum is OK, enable use of the entrypoint */
 	if (ck == 0) {
 	    PnPBIOStable = pt;
 	    if (bootverbose) {
 		printf("pnpbios: Found PnP BIOS data at %p\n", pt);
 		printf("pnpbios: Entry = %x:%x  Rev = %d.%d\n", 
 		       pt->pmentrybase, pt->pmentryoffset, pt->version >> 4, pt->version & 0xf);
 		if ((pt->control & 0x3) == 0x01)
 		    printf("pnpbios: Event flag at %x\n", pt->evflagaddr);
 		if (pt->oemdevid != 0)
 		    printf("pnpbios: OEM ID %x\n", pt->oemdevid);
 		
 	    }
 	} else {
 	    printf("pnpbios: Bad PnP BIOS data checksum\n");
 	}
     }
     if (p != NULL)
 	    freeenv(p);
     if (bootverbose) {
 	    /* look for other know signatures */
 	    printf("Other BIOS signatures found:\n");
     }
 }
 
 /*
  * bios32_SDlookup
  *
  * Query the BIOS32 Service Directory for the service named in (ent),
  * returns nonzero if the lookup fails.  The caller must fill in
  * (ent->ident), the remainder are populated on a successful lookup.
  */
 int
 bios32_SDlookup(struct bios32_SDentry *ent)
 {
     struct bios_regs args;
 
     if (bios32_SDCI == 0)
 	return (1);
 
     args.eax = ent->ident.id;		/* set up arguments */
     args.ebx = args.ecx = args.edx = 0;
     bios32(&args, bios32_SDCI, GSEL(GCODE_SEL, SEL_KPL));
     if ((args.eax & 0xff) == 0) {	/* success? */
 	ent->base = args.ebx;
 	ent->len = args.ecx;
 	ent->entry = args.edx;
 	ent->ventry = BIOS_PADDRTOVADDR(ent->base + ent->entry);
 	return (0);			/* all OK */
     }
     return (1);				/* failed */
 }
 
 
 /*
  * bios_sigsearch
  *
  * Search some or all of the BIOS region for a signature string.
  *
  * (start)	Optional offset returned from this function 
  *		(for searching for multiple matches), or NULL
  *		to start the search from the base of the BIOS.
  *		Note that this will be a _physical_ address in
  *		the range 0xe0000 - 0xfffff.
  * (sig)	is a pointer to the byte(s) of the signature.
  * (siglen)	number of bytes in the signature.
  * (paralen)	signature paragraph (alignment) size.
  * (sigofs)	offset of the signature within the paragraph.
  *
  * Returns the _physical_ address of the found signature, 0 if the
  * signature was not found.
  */
 
 u_int32_t
 bios_sigsearch(u_int32_t start, u_char *sig, int siglen, int paralen, int sigofs)
 {
     u_char	*sp, *end;
     
     /* compute the starting address */
     if ((start >= BIOS_START) && (start <= (BIOS_START + BIOS_SIZE))) {
 	sp = (char *)BIOS_PADDRTOVADDR(start);
     } else if (start == 0) {
 	sp = (char *)BIOS_PADDRTOVADDR(BIOS_START);
     } else {
 	return 0;				/* bogus start address */
     }
 
     /* compute the end address */
     end = (u_char *)BIOS_PADDRTOVADDR(BIOS_START + BIOS_SIZE);
 
     /* loop searching */
     while ((sp + sigofs + siglen) < end) {
 	
 	/* compare here */
 	if (!bcmp(sp + sigofs, sig, siglen)) {
 	    /* convert back to physical address */
 	    return((u_int32_t)BIOS_VADDRTOPADDR(sp));
 	}
 	sp += paralen;
     }
     return(0);
 }
 
 /*
  * do not staticize, used by bioscall.s
  */
 union {
     struct {
 	u_short	offset;
 	u_short	segment;
     } vec16;
     struct {
 	u_int	offset;
 	u_short	segment;
     } vec32;
 } bioscall_vector;			/* bios jump vector */
 
 void
 set_bios_selectors(struct bios_segments *seg, int flags)
 {
     struct soft_segment_descriptor ssd = {
 	0,			/* segment base address (overwritten) */
 	0,			/* length (overwritten) */
 	SDT_MEMERA,		/* segment type (overwritten) */
 	0,			/* priority level */
 	1,			/* descriptor present */
 	0, 0,
 	1,			/* descriptor size (overwritten) */
 	0			/* granularity == byte units */
     };
     union descriptor *p_gdt;
 
 #ifdef SMP
     p_gdt = &gdt[PCPU_GET(cpuid) * NGDT];
 #else
     p_gdt = gdt;
 #endif
 	
     ssd.ssd_base = seg->code32.base;
     ssd.ssd_limit = seg->code32.limit;
     ssdtosd(&ssd, &p_gdt[GBIOSCODE32_SEL].sd);
 
     ssd.ssd_def32 = 0;
     if (flags & BIOSCODE_FLAG) {
 	ssd.ssd_base = seg->code16.base;
 	ssd.ssd_limit = seg->code16.limit;
 	ssdtosd(&ssd, &p_gdt[GBIOSCODE16_SEL].sd);
     }
 
     ssd.ssd_type = SDT_MEMRWA;
     if (flags & BIOSDATA_FLAG) {
 	ssd.ssd_base = seg->data.base;
 	ssd.ssd_limit = seg->data.limit;
 	ssdtosd(&ssd, &p_gdt[GBIOSDATA_SEL].sd);
     }
 
     if (flags & BIOSUTIL_FLAG) {
 	ssd.ssd_base = seg->util.base;
 	ssd.ssd_limit = seg->util.limit;
 	ssdtosd(&ssd, &p_gdt[GBIOSUTIL_SEL].sd);
     }
 
     if (flags & BIOSARGS_FLAG) {
 	ssd.ssd_base = seg->args.base;
 	ssd.ssd_limit = seg->args.limit;
 	ssdtosd(&ssd, &p_gdt[GBIOSARGS_SEL].sd);
     }
 }
 
 extern int vm86pa;
 extern void bios16_jmp(void);
 
 /*
  * this routine is really greedy with selectors, and uses 5:
  *
  * 32-bit code selector:	to return to kernel
  * 16-bit code selector:	for running code
  *        data selector:	for 16-bit data
  *        util selector:	extra utility selector
  *        args selector:	to handle pointers
  *
  * the util selector is set from the util16 entry in bios16_args, if a
  * "U" specifier is seen.
  *
  * See <machine/pc/bios.h> for description of format specifiers
  */
 int
 bios16(struct bios_args *args, char *fmt, ...)
 {
     char	*p, *stack, *stack_top;
     va_list 	ap;
     int 	flags = BIOSCODE_FLAG | BIOSDATA_FLAG;
     u_int 	i, arg_start, arg_end;
     pt_entry_t	*pte;
     pd_entry_t	*ptd;
 
     arg_start = 0xffffffff;
     arg_end = 0;
 
     /*
      * Some BIOS entrypoints attempt to copy the largest-case
      * argument frame (in order to generalise handling for 
      * different entry types).  If our argument frame is 
      * smaller than this, the BIOS will reach off the top of
      * our constructed stack segment.  Pad the top of the stack
      * with some garbage to avoid this.
      */
     stack = (caddr_t)PAGE_SIZE - 32;
 
     va_start(ap, fmt);
     for (p = fmt; p && *p; p++) {
 	switch (*p) {
 	case 'p':			/* 32-bit pointer */
 	    i = va_arg(ap, u_int);
 	    arg_start = min(arg_start, i);
 	    arg_end = max(arg_end, i);
 	    flags |= BIOSARGS_FLAG;
 	    stack -= 4;
 	    break;
 
 	case 'i':			/* 32-bit integer */
 	    i = va_arg(ap, u_int);
 	    stack -= 4;
 	    break;
 
 	case 'U':			/* 16-bit selector */
 	    flags |= BIOSUTIL_FLAG;
 	    /* FALLTHROUGH */
 	case 'D':			/* 16-bit selector */
 	case 'C':			/* 16-bit selector */
 	    stack -= 2;
 	    break;
 	    
 	case 's':			/* 16-bit integer passed as an int */
 	    i = va_arg(ap, int);
 	    stack -= 2;
 	    break;
 
 	default:
 	    return (EINVAL);
 	}
     }
 
     if (flags & BIOSARGS_FLAG) {
 	if (arg_end - arg_start > ctob(16))
 	    return (EACCES);
 	args->seg.args.base = arg_start;
 	args->seg.args.limit = 0xffff;
     }
 
     args->seg.code32.base = (u_int)&bios16_jmp & PG_FRAME;
     args->seg.code32.limit = 0xffff;	
 
     ptd = (pd_entry_t *)rcr3();
-    if (ptd == (u_int *)IdlePTD) {
+#ifdef PAE
+    if (ptd == IdlePDPT) {
+#else
+    if (ptd == IdlePTD) {
+#endif
 	/*
 	 * no page table, so create one and install it.
 	 */
 	pte = (pt_entry_t *)malloc(PAGE_SIZE, M_TEMP, M_WAITOK);
-	ptd = (pd_entry_t *)((u_int)ptd + KERNBASE);
+	ptd = (pd_entry_t *)((u_int)IdlePTD + KERNBASE);
 	*ptd = vtophys(pte) | PG_RW | PG_V;
     } else {
 	/*
 	 * this is a user-level page table 
 	 */
 	pte = PTmap;
     }
     /*
      * install pointer to page 0.  we don't need to flush the tlb,
      * since there should not be a previous mapping for page 0.
      */
     *pte = (vm86pa - PAGE_SIZE) | PG_RW | PG_V; 
 
     stack_top = stack;
     va_start(ap, fmt);
     for (p = fmt; p && *p; p++) {
 	switch (*p) {
 	case 'p':			/* 32-bit pointer */
 	    i = va_arg(ap, u_int);
 	    *(u_int *)stack = (i - arg_start) |
 		(GSEL(GBIOSARGS_SEL, SEL_KPL) << 16);
 	    stack += 4;
 	    break;
 
 	case 'i':			/* 32-bit integer */
 	    i = va_arg(ap, u_int);
 	    *(u_int *)stack = i;
 	    stack += 4;
 	    break;
 
 	case 'U':			/* 16-bit selector */
 	    *(u_short *)stack = GSEL(GBIOSUTIL_SEL, SEL_KPL);
 	    stack += 2;
 	    break;
 
 	case 'D':			/* 16-bit selector */
 	    *(u_short *)stack = GSEL(GBIOSDATA_SEL, SEL_KPL);
 	    stack += 2;
 	    break;
 
 	case 'C':			/* 16-bit selector */
 	    *(u_short *)stack = GSEL(GBIOSCODE16_SEL, SEL_KPL);
 	    stack += 2;
 	    break;
 
 	case 's':			/* 16-bit integer passed as an int */
 	    i = va_arg(ap, int);
 	    *(u_short *)stack = i;
 	    stack += 2;
 	    break;
 
 	default:
 	    return (EINVAL);
 	}
     }
 
     set_bios_selectors(&args->seg, flags);
     bioscall_vector.vec16.offset = (u_short)args->entry;
     bioscall_vector.vec16.segment = GSEL(GBIOSCODE16_SEL, SEL_KPL);
 
     i = bios16_call(&args->r, stack_top);
     
     if (pte == PTmap) {
 	*pte = 0;			/* remove entry */
     } else {
 	*ptd = 0;			/* remove page table */
 	free(pte, M_TEMP);		/* ... and free it */
     }
 
     /*
      * XXX only needs to be invlpg(0) but that doesn't work on the 386 
      */
     pmap_invalidate_all(kernel_pmap);
 
     return (i);
 }
 
 #ifdef DEV_ISA
 /*
  * PnP BIOS interface; enumerate devices only known to the system
  * BIOS and save information about them for later use.
  */
 
 struct pnp_sysdev 
 {
     u_int16_t	size;
     u_int8_t	handle;
     u_int32_t	devid;
     u_int8_t	type[3];
     u_int16_t	attrib;
 #define PNPATTR_NODISABLE	(1<<0)	/* can't be disabled */
 #define PNPATTR_NOCONFIG	(1<<1)	/* can't be configured */
 #define PNPATTR_OUTPUT		(1<<2)	/* can be primary output */
 #define PNPATTR_INPUT		(1<<3)	/* can be primary input */
 #define PNPATTR_BOOTABLE	(1<<4)	/* can be booted from */
 #define PNPATTR_DOCK		(1<<5)	/* is a docking station */
 #define PNPATTR_REMOVEABLE	(1<<6)	/* device is removeable */
 #define PNPATTR_CONFIG_STATIC	(0)
 #define PNPATTR_CONFIG_DYNAMIC	(1)
 #define PNPATTR_CONFIG_DYNONLY	(3)
 #define PNPATTR_CONFIG(a)	(((a) >> 7) & 0x3)
     /* device-specific data comes here */
     u_int8_t	devdata[0];
 } __packed;
 
 /* We have to cluster arguments within a 64k range for the bios16 call */
 struct pnp_sysdevargs
 {
     u_int16_t	next;
     struct pnp_sysdev node;
 };
 
 /*
  * This function is called after the bus has assigned resource
  * locations for a logical device.
  */
 static void
 pnpbios_set_config(void *arg, struct isa_config *config, int enable)
 {
 }
 
 /*
  * Quiz the PnP BIOS, build a list of PNP IDs and resource data.
  */
 static void
 pnpbios_identify(driver_t *driver, device_t parent)
 {
     struct PnPBIOS_table	*pt = PnPBIOStable;
     struct bios_args		args;
     struct pnp_sysdev		*pd;
     struct pnp_sysdevargs	*pda;
     u_int16_t			ndevs, bigdev;
     int				error, currdev;
     u_int8_t			*devnodebuf, tag;
     u_int32_t			*devid, *compid;
     int				idx, left;
     device_t			dev;
         
     /* no PnP BIOS information */
     if (pt == NULL)
 	return;
 
     /* ACPI already active */
     if (devclass_get_softc(devclass_find("ACPI"), 0) != NULL)
 	return;
 
     /* get count of PnP devices */
     bzero(&args, sizeof(args));
     args.seg.code16.base = BIOS_PADDRTOVADDR(pt->pmentrybase);
     args.seg.code16.limit = 0xffff;		/* XXX ? */
     args.seg.data.base = BIOS_PADDRTOVADDR(pt->pmdataseg);
     args.seg.data.limit = 0xffff;
     args.entry = pt->pmentryoffset;
     
     if ((error = bios16(&args, PNP_COUNT_DEVNODES, &ndevs, &bigdev)) || (args.r.eax & 0xff))
 	printf("pnpbios: error %d/%x getting device count/size limit\n", error, args.r.eax);
     ndevs &= 0xff;				/* clear high byte garbage */
     if (bootverbose)
 	printf("pnpbios: %d devices, largest %d bytes\n", ndevs, bigdev);
 
     devnodebuf = malloc(bigdev + (sizeof(struct pnp_sysdevargs) - sizeof(struct pnp_sysdev)),
 			M_DEVBUF, M_NOWAIT);
     pda = (struct pnp_sysdevargs *)devnodebuf;
     pd = &pda->node;
 
     for (currdev = 0, left = ndevs; (currdev != 0xff) && (left > 0); left--) {
 
 	bzero(pd, bigdev);
 	pda->next = currdev;
 	/* get current configuration */
 	if ((error = bios16(&args, PNP_GET_DEVNODE, &pda->next, &pda->node, 1))) {
 	    printf("pnpbios: error %d making BIOS16 call\n", error);
 	    break;
 	}
 	if ((error = (args.r.eax & 0xff))) {
 	    if (bootverbose)
 		printf("pnpbios: %s 0x%x fetching node %d\n", error & 0x80 ? "error" : "warning", error, currdev);
 	    if (error & 0x80) 
 		break;
 	}
 	currdev = pda->next;
 	if (pd->size < sizeof(struct pnp_sysdev)) {
 	    printf("pnpbios: bogus system node data, aborting scan\n");
 	    break;
 	}
 
 	/*
 	 * If we are in APIC_IO mode, we should ignore the ISA PIC if it
 	 * shows up.  Likewise, in !APIC_IO mode, we should ignore the
 	 * APIC (less important).
 	 * This is significant because the ISA PIC will claim IRQ 2 (which
 	 * it uses for chaining), while in APIC mode this is a valid IRQ
 	 * available for general use.
 	 */
 #ifdef APIC_IO
 	if (!strcmp(pnp_eisaformat(pd->devid), "PNP0000"))	/* ISA PIC */
 	    continue;
 #else
 	if (!strcmp(pnp_eisaformat(pd->devid), "PNP0003"))	/* APIC */
 	    continue;
 #endif	
 	
 	/* Add the device and parse its resources */
 	dev = BUS_ADD_CHILD(parent, ISA_ORDER_PNP, NULL, -1);
 	isa_set_vendorid(dev, pd->devid);
 	isa_set_logicalid(dev, pd->devid);
 	/*
 	 * It appears that some PnP BIOS doesn't allow us to re-enable
 	 * the embedded system device once it is disabled.  We shall
 	 * mark all system device nodes as "cannot be disabled", regardless
 	 * of actual settings in the device attribute byte.
 	 * XXX
 	isa_set_configattr(dev, 
 	    ((pd->attrib & PNPATTR_NODISABLE) ?  0 : ISACFGATTR_CANDISABLE) |
 	    ((!(pd->attrib & PNPATTR_NOCONFIG) && 
 		PNPATTR_CONFIG(pd->attrib) != PNPATTR_CONFIG_STATIC)
 		? ISACFGATTR_DYNAMIC : 0));
 	 */
 	isa_set_configattr(dev, 
 	    (!(pd->attrib & PNPATTR_NOCONFIG) && 
 		PNPATTR_CONFIG(pd->attrib) != PNPATTR_CONFIG_STATIC)
 		? ISACFGATTR_DYNAMIC : 0);
 	ISA_SET_CONFIG_CALLBACK(parent, dev, pnpbios_set_config, 0);
 	pnp_parse_resources(dev, &pd->devdata[0],
 			    pd->size - sizeof(struct pnp_sysdev), 0);
 	if (!device_get_desc(dev))
 	    device_set_desc_copy(dev, pnp_eisaformat(pd->devid));
 
 	/* Find device IDs */
 	devid = &pd->devid;
 	compid = NULL;
 
 	/* look for a compatible device ID too */
 	left = pd->size - sizeof(struct pnp_sysdev);
 	idx = 0;
 	while (idx < left) {
 	    tag = pd->devdata[idx++];
 	    if (PNP_RES_TYPE(tag) == 0) {
 		/* Small resource */
 		switch (PNP_SRES_NUM(tag)) {
 		case PNP_TAG_COMPAT_DEVICE:
 		    compid = (u_int32_t *)(pd->devdata + idx);
 		    if (bootverbose)
 			printf("pnpbios: node %d compat ID 0x%08x\n", pd->handle, *compid);
 		    /* FALLTHROUGH */
 		case PNP_TAG_END:
 		    idx = left;
 		    break;
 		default:
 		    idx += PNP_SRES_LEN(tag);
 		    break;
 		}
 	    } else
 		/* Large resource, skip it */
 		idx += *(u_int16_t *)(pd->devdata + idx) + 2;
 	}
 	if (bootverbose) {
 	    printf("pnpbios: handle %d device ID %s (%08x)", 
 		   pd->handle, pnp_eisaformat(*devid), *devid);
 	    if (compid != NULL)
 		printf(" compat ID %s (%08x)",
 		       pnp_eisaformat(*compid), *compid);
 	    printf("\n");
 	}
     }
 }
 
 static device_method_t pnpbios_methods[] = {
 	/* Device interface */
 	DEVMETHOD(device_identify,	pnpbios_identify),
 
 	{ 0, 0 }
 };
 
 static driver_t pnpbios_driver = {
 	"pnpbios",
 	pnpbios_methods,
 	1,			/* no softc */
 };
 
 static devclass_t pnpbios_devclass;
 
 DRIVER_MODULE(pnpbios, isa, pnpbios_driver, pnpbios_devclass, 0, 0);
 #endif /* DEV_ISA */
Index: head/sys/i386/i386/locore.s
===================================================================
--- head/sys/i386/i386/locore.s	(revision 112840)
+++ head/sys/i386/i386/locore.s	(revision 112841)
@@ -1,892 +1,927 @@
 /*-
  * Copyright (c) 1990 The Regents of the University of California.
  * All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * William Jolitz.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	from: @(#)locore.s	7.3 (Berkeley) 5/13/91
  * $FreeBSD$
  *
  *		originally from: locore.s, by William F. Jolitz
  *
  *		Substantially rewritten by David Greenman, Rod Grimes,
  *			Bruce Evans, Wolfgang Solfrank, Poul-Henning Kamp
  *			and many others.
  */
 
 #include "opt_bootp.h"
 #include "opt_compat.h"
 #include "opt_nfsroot.h"
 
 #include <sys/syscall.h>
 #include <sys/reboot.h>
 
 #include <machine/asmacros.h>
 #include <machine/cputypes.h>
 #include <machine/psl.h>
 #include <machine/pmap.h>
 #include <machine/specialreg.h>
 
 #include "assym.s"
 
 /*
  *	XXX
  *
  * Note: This version greatly munged to avoid various assembler errors
  * that may be fixed in newer versions of gas. Perhaps newer versions
  * will have more pleasant appearance.
  */
 
 /*
  * PTmap is recursive pagemap at top of virtual address space.
  * Within PTmap, the page directory can be found (third indirection).
  *
  * NOTE: PTDpde, PTmap, and PTD are being defined as address symbols.
  * In C you access them directly, and not with a '*'. Storage is not being 
  * allocated. They will magically address the correct locations in KVM
  * which C will treat as normal variables of the type they are defined in 
  * machine/pmap.h, i.e.  PTDpde = XX ; to set a PDE entry, NOT *PTDpde = XX;
  */
 	.globl	PTmap,PTD,PTDpde
 	.set	PTmap,(PTDPTDI << PDRSHIFT)
 	.set	PTD,PTmap + (PTDPTDI * PAGE_SIZE)
 	.set	PTDpde,PTD + (PTDPTDI * PDESIZE)
 
 /*
  * APTmap, APTD is the alternate recursive pagemap.
  * It's used when modifying another process's page tables.
  * See the note above. It is true here as well.
  */
 	.globl	APTmap,APTD,APTDpde
 	.set	APTmap,APTDPTDI << PDRSHIFT
 	.set	APTD,APTmap + (APTDPTDI * PAGE_SIZE)
 	.set	APTDpde,PTD + (APTDPTDI * PDESIZE)
 
 #ifdef SMP
 /*
  * Define layout of per-cpu address space.
  * This is "constructed" in locore.s on the BSP and in mp_machdep.c
  * for each AP.  DO NOT REORDER THESE WITHOUT UPDATING THE REST!
  */
 	.globl	SMP_prvspace, lapic
 	.set	SMP_prvspace,(MPPTDI << PDRSHIFT)
 	.set	lapic,SMP_prvspace + (NPTEPG-1) * PAGE_SIZE
 #endif /* SMP */
 
 /*
  * Compiled KERNBASE location
  */
 	.globl	kernbase
 	.set	kernbase,KERNBASE
 
 /*
  * Globals
  */
 	.data
 	ALIGN_DATA			/* just to be sure */
 
 	.globl	HIDENAME(tmpstk)
 	.space	0x2000			/* space for tmpstk - temporary stack */
 HIDENAME(tmpstk):
 
 	.globl	bootinfo
 bootinfo:	.space	BOOTINFO_SIZE	/* bootinfo that we can handle */
 
 		.globl KERNend
 KERNend:	.long	0		/* phys addr end of kernel (just after bss) */
 physfree:	.long	0		/* phys addr of next free page */
 
 #ifdef SMP
 		.globl	cpu0prvpage
 cpu0pp:		.long	0		/* phys addr cpu0 private pg */
 cpu0prvpage:	.long	0		/* relocated version */
 
 		.globl	SMPpt
 SMPptpa:	.long	0		/* phys addr SMP page table */
 SMPpt:		.long	0		/* relocated version */
 #endif /* SMP */
 
 	.globl	IdlePTD
 IdlePTD:	.long	0		/* phys addr of kernel PTD */
 
+#ifdef PAE
+	.globl	IdlePDPT
+IdlePDPT:	.long	0		/* phys addr of kernel PDPT */
+#endif
+
 #ifdef SMP
 	.globl	KPTphys
 #endif
 KPTphys:	.long	0		/* phys addr of kernel page tables */
 
 	.globl	proc0uarea, proc0kstack
 proc0uarea:	.long	0		/* address of proc 0 uarea space */
 proc0kstack:	.long	0		/* address of proc 0 kstack space */
 p0upa:		.long	0		/* phys addr of proc0's UAREA */
 p0kpa:		.long	0		/* phys addr of proc0's STACK */
 
 vm86phystk:	.long	0		/* PA of vm86/bios stack */
 
 	.globl	vm86paddr, vm86pa
 vm86paddr:	.long	0		/* address of vm86 region */
 vm86pa:		.long	0		/* phys addr of vm86 region */
 
 #ifdef PC98
 	.globl	pc98_system_parameter
 pc98_system_parameter:
 	.space	0x240
 #endif
 
 /**********************************************************************
  *
  * Some handy macros
  *
  */
 
 #define R(foo) ((foo)-KERNBASE)
 
 #define ALLOCPAGES(foo) \
 	movl	R(physfree), %esi ; \
 	movl	$((foo)*PAGE_SIZE), %eax ; \
 	addl	%esi, %eax ; \
 	movl	%eax, R(physfree) ; \
 	movl	%esi, %edi ; \
 	movl	$((foo)*PAGE_SIZE),%ecx ; \
 	xorl	%eax,%eax ; \
 	cld ; \
 	rep ; \
 	stosb
 
 /*
  * fillkpt
  *	eax = page frame address
  *	ebx = index into page table
  *	ecx = how many pages to map
  * 	base = base address of page dir/table
  *	prot = protection bits
  */
 #define	fillkpt(base, prot)		  \
 	shll	$PTESHIFT,%ebx		; \
 	addl	base,%ebx		; \
 	orl	$PG_V,%eax		; \
 	orl	prot,%eax		; \
 1:	movl	%eax,(%ebx)		; \
 	addl	$PAGE_SIZE,%eax		; /* increment physical address */ \
 	addl	$PTESIZE,%ebx		; /* next pte */ \
 	loop	1b
 
 /*
  * fillkptphys(prot)
  *	eax = physical address
  *	ecx = how many pages to map
  *	prot = protection bits
  */
 #define	fillkptphys(prot)		  \
 	movl	%eax, %ebx		; \
 	shrl	$PAGE_SHIFT, %ebx	; \
 	fillkpt(R(KPTphys), prot)
 
 	.text
 /**********************************************************************
  *
  * This is where the bootblocks start us, set the ball rolling...
  *
  */
 NON_GPROF_ENTRY(btext)
 
 #ifdef PC98
 	/* save SYSTEM PARAMETER for resume (NS/T or other) */
 	movl	$0xa1400,%esi
 	movl	$R(pc98_system_parameter),%edi
 	movl	$0x0240,%ecx
 	cld
 	rep
 	movsb
 #else	/* IBM-PC */
 /* Tell the bios to warmboot next time */
 	movw	$0x1234,0x472
 #endif	/* PC98 */
 
 /* Set up a real frame in case the double return in newboot is executed. */
 	pushl	%ebp
 	movl	%esp, %ebp
 
 /* Don't trust what the BIOS gives for eflags. */
 	pushl	$PSL_KERNEL
 	popfl
 
 /*
  * Don't trust what the BIOS gives for %fs and %gs.  Trust the bootstrap
  * to set %cs, %ds, %es and %ss.
  */
 	mov	%ds, %ax
 	mov	%ax, %fs
 	mov	%ax, %gs
 
 	call	recover_bootinfo
 
 /* Get onto a stack that we can trust. */
 /*
  * XXX this step is delayed in case recover_bootinfo needs to return via
  * the old stack, but it need not be, since recover_bootinfo actually
  * returns via the old frame.
  */
 	movl	$R(HIDENAME(tmpstk)),%esp
 
 #ifdef PC98
 	/* pc98_machine_type & M_EPSON_PC98 */
 	testb	$0x02,R(pc98_system_parameter)+220
 	jz	3f
 	/* epson_machine_id <= 0x0b */
 	cmpb	$0x0b,R(pc98_system_parameter)+224
 	ja	3f
 
 	/* count up memory */
 	movl	$0x100000,%eax		/* next, talley remaining memory */
 	movl	$0xFFF-0x100,%ecx
 1:	movl	0(%eax),%ebx		/* save location to check */
 	movl	$0xa55a5aa5,0(%eax)	/* write test pattern */
 	cmpl	$0xa55a5aa5,0(%eax)	/* does not check yet for rollover */
 	jne	2f
 	movl	%ebx,0(%eax)		/* restore memory */
 	addl	$PAGE_SIZE,%eax
 	loop	1b
 2:	subl	$0x100000,%eax
 	shrl	$17,%eax
 	movb	%al,R(pc98_system_parameter)+1
 3:
 
 	movw	R(pc98_system_parameter+0x86),%ax
 	movw	%ax,R(cpu_id)
 #endif
 
 	call	identify_cpu
 
 /* clear bss */
 /*
  * XXX this should be done a little earlier.
  *
  * XXX we don't check that there is memory for our bss and page tables
  * before using it.
  *
  * XXX the boot program somewhat bogusly clears the bss.  We still have
  * to do it in case we were unzipped by kzipboot.  Then the boot program
  * only clears kzipboot's bss.
  *
  * XXX the gdt and idt are still somewhere in the boot program.  We
  * depend on the convention that the boot program is below 1MB and we
  * are above 1MB to keep the gdt and idt away from the bss and page
  * tables.
  */
 	movl	$R(end),%ecx
 	movl	$R(edata),%edi
 	subl	%edi,%ecx
 	xorl	%eax,%eax
 	cld
 	rep
 	stosb
 
 	call	create_pagetables
 
 /*
  * If the CPU has support for VME, turn it on.
  */ 
 	testl	$CPUID_VME, R(cpu_feature)
 	jz	1f
 	movl	%cr4, %eax
 	orl	$CR4_VME, %eax
 	movl	%eax, %cr4
 1:
 
 /* Now enable paging */
+#ifdef PAE
+	movl	R(IdlePDPT), %eax
+	movl	%eax, %cr3
+	movl	%cr4, %eax
+	orl	$CR4_PAE, %eax
+	movl	%eax, %cr4
+#else
 	movl	R(IdlePTD), %eax
 	movl	%eax,%cr3		/* load ptd addr into mmu */
+#endif
 	movl	%cr0,%eax		/* get control word */
 	orl	$CR0_PE|CR0_PG,%eax	/* enable paging */
 	movl	%eax,%cr0		/* and let's page NOW! */
 
 	pushl	$begin			/* jump to high virtualized address */
 	ret
 
 /* now running relocated at KERNBASE where the system is linked to run */
 begin:
 	/* set up bootstrap stack */
 	movl	proc0kstack,%eax	/* location of in-kernel stack */
 			/* bootstrap stack end location */
 	leal	(KSTACK_PAGES*PAGE_SIZE-PCB_SIZE)(%eax),%esp
 
 	xorl	%ebp,%ebp		/* mark end of frames */
 
+#ifdef PAE
+	movl	IdlePDPT,%esi
+#else
 	movl	IdlePTD,%esi
+#endif
 	movl	%esi,(KSTACK_PAGES*PAGE_SIZE-PCB_SIZE+PCB_CR3)(%eax)
 
 	pushl	physfree		/* value of first for init386(first) */
 	call	init386			/* wire 386 chip for unix operation */
 
 	/*
 	 * Clean up the stack in a way that db_numargs() understands, so
 	 * that backtraces in ddb don't underrun the stack.  Traps for
 	 * inaccessible memory are more fatal than usual this early.
 	 */
 	addl	$4,%esp
 
 	call	mi_startup		/* autoconfiguration, mountroot etc */
 	/* NOTREACHED */
 	addl	$0,%esp			/* for db_numargs() again */
 
 /*
  * Signal trampoline, copied to top of user stack
  */
 NON_GPROF_ENTRY(sigcode)
 	calll	*SIGF_HANDLER(%esp)
 	leal	SIGF_UC(%esp),%eax	/* get ucontext */
 	pushl	%eax
 	testl	$PSL_VM,UC_EFLAGS(%eax)
 	jne	1f
 	movl	UC_GS(%eax),%gs		/* restore %gs */
 1:
 	movl	$SYS_sigreturn,%eax
 	pushl	%eax			/* junk to fake return addr. */
 	int	$0x80			/* enter kernel with args */
 					/* on stack */
 1:
 	jmp	1b
 
 #ifdef COMPAT_FREEBSD4
 	ALIGN_TEXT
 freebsd4_sigcode:
 	calll	*SIGF_HANDLER(%esp)
 	leal	SIGF_UC4(%esp),%eax	/* get ucontext */
 	pushl	%eax
 	testl	$PSL_VM,UC4_EFLAGS(%eax)
 	jne	1f
 	movl	UC4_GS(%eax),%gs	/* restore %gs */
 1:
 	movl	$344,%eax		/* 4.x SYS_sigreturn */
 	pushl	%eax			/* junk to fake return addr. */
 	int	$0x80			/* enter kernel with args */
 					/* on stack */
 1:
 	jmp	1b
 #endif
 
 #ifdef COMPAT_43
 	ALIGN_TEXT
 osigcode:
 	call	*SIGF_HANDLER(%esp)	/* call signal handler */
 	lea	SIGF_SC(%esp),%eax	/* get sigcontext */
 	pushl	%eax
 	testl	$PSL_VM,SC_PS(%eax)
 	jne	9f
 	movl	SC_GS(%eax),%gs		/* restore %gs */
 9:
 	movl	$103,%eax		/* 3.x SYS_sigreturn */
 	pushl	%eax			/* junk to fake return addr. */
 	int	$0x80			/* enter kernel with args */
 0:	jmp	0b
 #endif /* COMPAT_43 */
 
 	ALIGN_TEXT
 esigcode:
 
 	.data
 	.globl	szsigcode
 szsigcode:
 	.long	esigcode-sigcode
 #ifdef COMPAT_FREEBSD4
 	.globl	szfreebsd4_sigcode
 szfreebsd4_sigcode:
 	.long	esigcode-freebsd4_sigcode
 #endif
 #ifdef COMPAT_43
 	.globl	szosigcode
 szosigcode:
 	.long	esigcode-osigcode
 #endif
 	.text
 
 /**********************************************************************
  *
  * Recover the bootinfo passed to us from the boot program
  *
  */
 recover_bootinfo:
 	/*
 	 * This code is called in different ways depending on what loaded
 	 * and started the kernel.  This is used to detect how we get the
 	 * arguments from the other code and what we do with them.
 	 *
 	 * Old disk boot blocks:
 	 *	(*btext)(howto, bootdev, cyloffset, esym);
 	 *	[return address == 0, and can NOT be returned to]
 	 *	[cyloffset was not supported by the FreeBSD boot code
 	 *	 and always passed in as 0]
 	 *	[esym is also known as total in the boot code, and
 	 *	 was never properly supported by the FreeBSD boot code]
 	 *
 	 * Old diskless netboot code:
 	 *	(*btext)(0,0,0,0,&nfsdiskless,0,0,0);
 	 *	[return address != 0, and can NOT be returned to]
 	 *	If we are being booted by this code it will NOT work,
 	 *	so we are just going to halt if we find this case.
 	 *
 	 * New uniform boot code:
 	 *	(*btext)(howto, bootdev, 0, 0, 0, &bootinfo)
 	 *	[return address != 0, and can be returned to]
 	 *
 	 * There may seem to be a lot of wasted arguments in here, but
 	 * that is so the newer boot code can still load very old kernels
 	 * and old boot code can load new kernels.
 	 */
 
 	/*
 	 * The old style disk boot blocks fake a frame on the stack and
 	 * did an lret to get here.  The frame on the stack has a return
 	 * address of 0.
 	 */
 	cmpl	$0,4(%ebp)
 	je	olddiskboot
 
 	/*
 	 * We have some form of return address, so this is either the
 	 * old diskless netboot code, or the new uniform code.  That can
 	 * be detected by looking at the 5th argument, if it is 0
 	 * we are being booted by the new uniform boot code.
 	 */
 	cmpl	$0,24(%ebp)
 	je	newboot
 
 	/*
 	 * Seems we have been loaded by the old diskless boot code, we
 	 * don't stand a chance of running as the diskless structure
 	 * changed considerably between the two, so just halt.
 	 */
 	 hlt
 
 	/*
 	 * We have been loaded by the new uniform boot code.
 	 * Let's check the bootinfo version, and if we do not understand
 	 * it we return to the loader with a status of 1 to indicate this error
 	 */
 newboot:
 	movl	28(%ebp),%ebx		/* &bootinfo.version */
 	movl	BI_VERSION(%ebx),%eax
 	cmpl	$1,%eax			/* We only understand version 1 */
 	je	1f
 	movl	$1,%eax			/* Return status */
 	leave
 	/*
 	 * XXX this returns to our caller's caller (as is required) since
 	 * we didn't set up a frame and our caller did.
 	 */
 	ret
 
 1:
 	/*
 	 * If we have a kernelname copy it in
 	 */
 	movl	BI_KERNELNAME(%ebx),%esi
 	cmpl	$0,%esi
 	je	2f			/* No kernelname */
 	movl	$MAXPATHLEN,%ecx	/* Brute force!!! */
 	movl	$R(kernelname),%edi
 	cmpb	$'/',(%esi)		/* Make sure it starts with a slash */
 	je	1f
 	movb	$'/',(%edi)
 	incl	%edi
 	decl	%ecx
 1:
 	cld
 	rep
 	movsb
 
 2:
 	/*
 	 * Determine the size of the boot loader's copy of the bootinfo
 	 * struct.  This is impossible to do properly because old versions
 	 * of the struct don't contain a size field and there are 2 old
 	 * versions with the same version number.
 	 */
 	movl	$BI_ENDCOMMON,%ecx	/* prepare for sizeless version */
 	testl	$RB_BOOTINFO,8(%ebp)	/* bi_size (and bootinfo) valid? */
 	je	got_bi_size		/* no, sizeless version */
 	movl	BI_SIZE(%ebx),%ecx
 got_bi_size:
 
 	/*
 	 * Copy the common part of the bootinfo struct
 	 */
 	movl	%ebx,%esi
 	movl	$R(bootinfo),%edi
 	cmpl	$BOOTINFO_SIZE,%ecx
 	jbe	got_common_bi_size
 	movl	$BOOTINFO_SIZE,%ecx
 got_common_bi_size:
 	cld
 	rep
 	movsb
 
 #ifdef NFS_ROOT
 #ifndef BOOTP_NFSV3
 	/*
 	 * If we have a nfs_diskless structure copy it in
 	 */
 	movl	BI_NFS_DISKLESS(%ebx),%esi
 	cmpl	$0,%esi
 	je	olddiskboot
 	movl	$R(nfs_diskless),%edi
 	movl	$NFSDISKLESS_SIZE,%ecx
 	cld
 	rep
 	movsb
 	movl	$R(nfs_diskless_valid),%edi
 	movl	$1,(%edi)
 #endif
 #endif
 
 	/*
 	 * The old style disk boot.
 	 *	(*btext)(howto, bootdev, cyloffset, esym);
 	 * Note that the newer boot code just falls into here to pick
 	 * up howto and bootdev, cyloffset and esym are no longer used
 	 */
 olddiskboot:
 	movl	8(%ebp),%eax
 	movl	%eax,R(boothowto)
 	movl	12(%ebp),%eax
 	movl	%eax,R(bootdev)
 
 	ret
 
 
 /**********************************************************************
  *
  * Identify the CPU and initialize anything special about it
  *
  */
 identify_cpu:
 
 	/* Try to toggle alignment check flag; does not exist on 386. */
 	pushfl
 	popl	%eax
 	movl	%eax,%ecx
 	orl	$PSL_AC,%eax
 	pushl	%eax
 	popfl
 	pushfl
 	popl	%eax
 	xorl	%ecx,%eax
 	andl	$PSL_AC,%eax
 	pushl	%ecx
 	popfl
 
 	testl	%eax,%eax
 	jnz	try486
 
 	/* NexGen CPU does not have aligment check flag. */
 	pushfl
 	movl	$0x5555, %eax
 	xorl	%edx, %edx
 	movl	$2, %ecx
 	clc
 	divl	%ecx
 	jz	trynexgen
 	popfl
 	movl	$CPU_386,R(cpu)
 	jmp	3f
 
 trynexgen:
 	popfl
 	movl	$CPU_NX586,R(cpu)
 	movl	$0x4778654e,R(cpu_vendor)	# store vendor string
 	movl	$0x72446e65,R(cpu_vendor+4)
 	movl	$0x6e657669,R(cpu_vendor+8)
 	movl	$0,R(cpu_vendor+12)
 	jmp	3f
 
 try486:	/* Try to toggle identification flag; does not exist on early 486s. */
 	pushfl
 	popl	%eax
 	movl	%eax,%ecx
 	xorl	$PSL_ID,%eax
 	pushl	%eax
 	popfl
 	pushfl
 	popl	%eax
 	xorl	%ecx,%eax
 	andl	$PSL_ID,%eax
 	pushl	%ecx
 	popfl
 
 	testl	%eax,%eax
 	jnz	trycpuid
 	movl	$CPU_486,R(cpu)
 
 	/*
 	 * Check Cyrix CPU
 	 * Cyrix CPUs do not change the undefined flags following
 	 * execution of the divide instruction which divides 5 by 2.
 	 *
 	 * Note: CPUID is enabled on M2, so it passes another way.
 	 */
 	pushfl
 	movl	$0x5555, %eax
 	xorl	%edx, %edx
 	movl	$2, %ecx
 	clc
 	divl	%ecx
 	jnc	trycyrix
 	popfl
 	jmp	3f		/* You may use Intel CPU. */
 
 trycyrix:
 	popfl
 	/*
 	 * IBM Bluelighting CPU also doesn't change the undefined flags.
 	 * Because IBM doesn't disclose the information for Bluelighting
 	 * CPU, we couldn't distinguish it from Cyrix's (including IBM
 	 * brand of Cyrix CPUs).
 	 */
 	movl	$0x69727943,R(cpu_vendor)	# store vendor string
 	movl	$0x736e4978,R(cpu_vendor+4)
 	movl	$0x64616574,R(cpu_vendor+8)
 	jmp	3f
 
 trycpuid:	/* Use the `cpuid' instruction. */
 	xorl	%eax,%eax
 	cpuid					# cpuid 0
 	movl	%eax,R(cpu_high)		# highest capability
 	movl	%ebx,R(cpu_vendor)		# store vendor string
 	movl	%edx,R(cpu_vendor+4)
 	movl	%ecx,R(cpu_vendor+8)
 	movb	$0,R(cpu_vendor+12)
 
 	movl	$1,%eax
 	cpuid					# cpuid 1
 	movl	%eax,R(cpu_id)			# store cpu_id
 	movl	%ebx,R(cpu_procinfo)		# store cpu_procinfo
 	movl	%edx,R(cpu_feature)		# store cpu_feature
 	rorl	$8,%eax				# extract family type
 	andl	$15,%eax
 	cmpl	$5,%eax
 	jae	1f
 
 	/* less than Pentium; must be 486 */
 	movl	$CPU_486,R(cpu)
 	jmp	3f
 1:
 	/* a Pentium? */
 	cmpl	$5,%eax
 	jne	2f
 	movl	$CPU_586,R(cpu)
 	jmp	3f
 2:
 	/* Greater than Pentium...call it a Pentium Pro */
 	movl	$CPU_686,R(cpu)
 3:
 	ret
 
 
 /**********************************************************************
  *
  * Create the first page directory and its page tables.
  *
  */
 
 create_pagetables:
 
 /* Find end of kernel image (rounded up to a page boundary). */
 	movl	$R(_end),%esi
 
 /* Include symbols, if any. */
 	movl	R(bootinfo+BI_ESYMTAB),%edi
 	testl	%edi,%edi
 	je	over_symalloc
 	movl	%edi,%esi
 	movl	$KERNBASE,%edi
 	addl	%edi,R(bootinfo+BI_SYMTAB)
 	addl	%edi,R(bootinfo+BI_ESYMTAB)
 over_symalloc:
 
 /* If we are told where the end of the kernel space is, believe it. */
 	movl	R(bootinfo+BI_KERNEND),%edi
 	testl	%edi,%edi
 	je	no_kernend
 	movl	%edi,%esi
 no_kernend:
 	
 	addl	$PAGE_MASK,%esi
 	andl	$~PAGE_MASK,%esi
 	movl	%esi,R(KERNend)		/* save end of kernel */
 	movl	%esi,R(physfree)	/* next free page is at end of kernel */
 
 /* Allocate Kernel Page Tables */
 	ALLOCPAGES(NKPT)
 	movl	%esi,R(KPTphys)
 
 /* Allocate Page Table Directory */
+#ifdef PAE
+	/* XXX only need 32 bytes (easier for now) */
+	ALLOCPAGES(1)
+	movl	%esi,R(IdlePDPT)
+#endif
 	ALLOCPAGES(NPGPTD)
 	movl	%esi,R(IdlePTD)
 
 /* Allocate UPAGES */
 	ALLOCPAGES(UAREA_PAGES)
 	movl	%esi,R(p0upa)
 	addl	$KERNBASE, %esi
 	movl	%esi, R(proc0uarea)
 
 	ALLOCPAGES(KSTACK_PAGES)
 	movl	%esi,R(p0kpa)
 	addl	$KERNBASE, %esi
 	movl	%esi, R(proc0kstack)
 
 	ALLOCPAGES(1)			/* vm86/bios stack */
 	movl	%esi,R(vm86phystk)
 
 	ALLOCPAGES(3)			/* pgtable + ext + IOPAGES */
 	movl	%esi,R(vm86pa)
 	addl	$KERNBASE, %esi
 	movl	%esi, R(vm86paddr)
 
 #ifdef SMP
 /* Allocate cpu0's private data page */
 	ALLOCPAGES(1)
 	movl	%esi,R(cpu0pp)
 	addl	$KERNBASE, %esi
 	movl	%esi, R(cpu0prvpage)	/* relocated to KVM space */
 
 /* Allocate SMP page table page */
 	ALLOCPAGES(1)
 	movl	%esi,R(SMPptpa)
 	addl	$KERNBASE, %esi
 	movl	%esi, R(SMPpt)		/* relocated to KVM space */
 #endif	/* SMP */
 
 /* Map read-only from zero to the end of the kernel text section */
 	xorl	%eax, %eax
 	xorl	%edx,%edx
 	movl	$R(etext),%ecx
 	addl	$PAGE_MASK,%ecx
 	shrl	$PAGE_SHIFT,%ecx
 	fillkptphys(%edx)
 
 /* Map read-write, data, bss and symbols */
 	movl	$R(etext),%eax
 	addl	$PAGE_MASK, %eax
 	andl	$~PAGE_MASK, %eax
 	movl	$PG_RW,%edx
 	movl	R(KERNend),%ecx
 	subl	%eax,%ecx
 	shrl	$PAGE_SHIFT,%ecx
 	fillkptphys(%edx)
 
 /* Map page directory. */
+#ifdef PAE
+	movl	R(IdlePDPT), %eax
+	movl	$1, %ecx
+	fillkptphys($PG_RW)
+#endif
+
 	movl	R(IdlePTD), %eax
 	movl	$NPGPTD, %ecx
 	fillkptphys($PG_RW)
 
 /* Map proc0's UPAGES in the physical way ... */
 	movl	R(p0upa), %eax
 	movl	$(UAREA_PAGES), %ecx
 	fillkptphys($PG_RW)
 
 /* Map proc0's KSTACK in the physical way ... */
 	movl	R(p0kpa), %eax
 	movl	$(KSTACK_PAGES), %ecx
 	fillkptphys($PG_RW)
 
 /* Map ISA hole */
 	movl	$ISA_HOLE_START, %eax
 	movl	$ISA_HOLE_LENGTH>>PAGE_SHIFT, %ecx
 	fillkptphys($PG_RW)
 
 /* Map space for the vm86 region */
 	movl	R(vm86phystk), %eax
 	movl	$4, %ecx
 	fillkptphys($PG_RW)
 
 /* Map page 0 into the vm86 page table */
 	movl	$0, %eax
 	movl	$0, %ebx
 	movl	$1, %ecx
 	fillkpt(R(vm86pa), $PG_RW|PG_U)
 
 /* ...likewise for the ISA hole */
 	movl	$ISA_HOLE_START, %eax
 	movl	$ISA_HOLE_START>>PAGE_SHIFT, %ebx
 	movl	$ISA_HOLE_LENGTH>>PAGE_SHIFT, %ecx
 	fillkpt(R(vm86pa), $PG_RW|PG_U)
 
 #ifdef SMP
 /* Map cpu0's private page into global kmem (4K @ cpu0prvpage) */
 	movl	R(cpu0pp), %eax
 	movl	$1, %ecx
 	fillkptphys($PG_RW)
 
 /* Map SMP page table page into global kmem FWIW */
 	movl	R(SMPptpa), %eax
 	movl	$1, %ecx
 	fillkptphys($PG_RW)
 
 /* Map the private page into the SMP page table */
 	movl	R(cpu0pp), %eax
 	movl	$0, %ebx		/* pte offset = 0 */
 	movl	$1, %ecx		/* one private page coming right up */
 	fillkpt(R(SMPptpa), $PG_RW)
 
 /* ... and put the page table table in the pde. */
 	movl	R(SMPptpa), %eax
 	movl	$MPPTDI, %ebx
 	movl	$1, %ecx
 	fillkpt(R(IdlePTD), $PG_RW)
 
 /* Fakeup VA for the local apic to allow early traps. */
 	ALLOCPAGES(1)
 	movl	%esi, %eax
 	movl	$(NPTEPG-1), %ebx	/* pte offset = NTEPG-1 */
 	movl	$1, %ecx		/* one private pt coming right up */
 	fillkpt(R(SMPptpa), $PG_RW)
 #endif	/* SMP */
 
 /* install a pde for temporary double map of bottom of VA */
 	movl	R(KPTphys), %eax
 	xorl	%ebx, %ebx
 	movl	$NKPT, %ecx
 	fillkpt(R(IdlePTD), $PG_RW)
 
 /* install pde's for pt's */
 	movl	R(KPTphys), %eax
 	movl	$KPTDI, %ebx
 	movl	$NKPT, %ecx
 	fillkpt(R(IdlePTD), $PG_RW)
 
 /* install a pde recursively mapping page directory as a page table */
 	movl	R(IdlePTD), %eax
 	movl	$PTDPTDI, %ebx
 	movl	$NPGPTD,%ecx
 	fillkpt(R(IdlePTD), $PG_RW)
+
+#ifdef PAE
+	movl	R(IdlePTD), %eax
+	xorl	%ebx, %ebx
+	movl	$NPGPTD, %ecx
+	fillkpt(R(IdlePDPT), $0x0)
+#endif
 
 	ret
Index: head/sys/i386/i386/machdep.c
===================================================================
--- head/sys/i386/i386/machdep.c	(revision 112840)
+++ head/sys/i386/i386/machdep.c	(revision 112841)
@@ -1,2731 +1,2741 @@
 /*-
  * Copyright (c) 1992 Terrence R. Lambert.
  * Copyright (c) 1982, 1987, 1990 The Regents of the University of California.
  * All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * William Jolitz.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	from: @(#)machdep.c	7.4 (Berkeley) 6/3/91
  * $FreeBSD$
  */
 
 #include "opt_atalk.h"
 #include "opt_compat.h"
 #include "opt_cpu.h"
 #include "opt_ddb.h"
 #include "opt_inet.h"
 #include "opt_ipx.h"
 #include "opt_isa.h"
 #include "opt_maxmem.h"
 #include "opt_msgbuf.h"
 #include "opt_npx.h"
 #include "opt_perfmon.h"
 #include "opt_kstack_pages.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/sysproto.h>
 #include <sys/signalvar.h>
 #include <sys/imgact.h>
 #include <sys/kernel.h>
 #include <sys/ktr.h>
 #include <sys/linker.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mutex.h>
 #include <sys/pcpu.h>
 #include <sys/proc.h>
 #include <sys/bio.h>
 #include <sys/buf.h>
 #include <sys/reboot.h>
 #include <sys/callout.h>
 #include <sys/msgbuf.h>
 #include <sys/sched.h>
 #include <sys/sysent.h>
 #include <sys/sysctl.h>
 #include <sys/ucontext.h>
 #include <sys/vmmeter.h>
 #include <sys/bus.h>
 #include <sys/eventhandler.h>
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_object.h>
 #include <vm/vm_page.h>
 #include <vm/vm_map.h>
 #include <vm/vm_pager.h>
 #include <vm/vm_extern.h>
 
 #include <sys/user.h>
 #include <sys/exec.h>
 #include <sys/cons.h>
 
 #include <ddb/ddb.h>
 
 #include <net/netisr.h>
 
 #include <machine/cpu.h>
 #include <machine/cputypes.h>
 #include <machine/reg.h>
 #include <machine/clock.h>
 #include <machine/specialreg.h>
 #include <machine/bootinfo.h>
 #include <machine/md_var.h>
 #include <machine/pc/bios.h>
 #include <machine/pcb_ext.h>		/* pcb.h included via sys/user.h */
 #include <machine/proc.h>
 #ifdef PERFMON
 #include <machine/perfmon.h>
 #endif
 #ifdef SMP
 #include <machine/privatespace.h>
 #include <machine/smp.h>
 #endif
 
 #include <i386/isa/icu.h>
 #include <i386/isa/intr_machdep.h>
 #include <isa/rtc.h>
 #include <machine/vm86.h>
 #include <sys/ptrace.h>
 #include <machine/sigframe.h>
 
 extern void init386(int first);
 extern void dblfault_handler(void);
 
 extern void printcpuinfo(void);	/* XXX header file */
 extern void finishidentcpu(void);
 extern void panicifcpuunsupported(void);
 extern void initializecpu(void);
 
 #define	CS_SECURE(cs)		(ISPL(cs) == SEL_UPL)
 #define	EFL_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
 
 #if !defined(CPU_ENABLE_SSE) && defined(I686_CPU)
 #define CPU_ENABLE_SSE
 #endif
 #if defined(CPU_DISABLE_SSE)
 #undef CPU_ENABLE_SSE
 #endif
 
 static void cpu_startup(void *);
 static void fpstate_drop(struct thread *td);
 static void get_fpcontext(struct thread *td, mcontext_t *mcp);
 static int  set_fpcontext(struct thread *td, const mcontext_t *mcp);
 #ifdef CPU_ENABLE_SSE
 static void set_fpregs_xmm(struct save87 *, struct savexmm *);
 static void fill_fpregs_xmm(struct savexmm *, struct save87 *);
 #endif /* CPU_ENABLE_SSE */
 SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL)
 
 int	_udatasel, _ucodesel;
 u_int	atdevbase;
 
 #if defined(SWTCH_OPTIM_STATS)
 extern int swtch_optim_stats;
 SYSCTL_INT(_debug, OID_AUTO, swtch_optim_stats,
 	CTLFLAG_RD, &swtch_optim_stats, 0, "");
 SYSCTL_INT(_debug, OID_AUTO, tlb_flush_count,
 	CTLFLAG_RD, &tlb_flush_count, 0, "");
 #endif
 
 int cold = 1;
 
 #ifdef COMPAT_43
 static void osendsig(sig_t catcher, int sig, sigset_t *mask, u_long code);
 #endif
 #ifdef COMPAT_FREEBSD4
 static void freebsd4_sendsig(sig_t catcher, int sig, sigset_t *mask,
     u_long code);
 #endif
 
 long Maxmem = 0;
 
 vm_paddr_t phys_avail[10];
 
 /* must be 2 less so 0 0 can signal end of chunks */
 #define PHYS_AVAIL_ARRAY_END ((sizeof(phys_avail) / sizeof(vm_offset_t)) - 2)
 
 struct kva_md_info kmi;
 
 static struct trapframe proc0_tf;
 #ifndef SMP
 static struct pcpu __pcpu;
 #endif
 
 struct mtx icu_lock;
 
 static void
 cpu_startup(dummy)
 	void *dummy;
 {
 	/*
 	 * Good {morning,afternoon,evening,night}.
 	 */
 	startrtclock();
 	printcpuinfo();
 	panicifcpuunsupported();
 #ifdef PERFMON
 	perfmon_init();
 #endif
 	printf("real memory  = %ju (%ju MB)\n", ptoa((uintmax_t)Maxmem),
 	    ptoa((uintmax_t)Maxmem) / 1048576);
 	/*
 	 * Display any holes after the first chunk of extended memory.
 	 */
 	if (bootverbose) {
 		int indx;
 
 		printf("Physical memory chunk(s):\n");
 		for (indx = 0; phys_avail[indx + 1] != 0; indx += 2) {
 			vm_paddr_t size;
 
 			size = phys_avail[indx + 1] - phys_avail[indx];
 			printf(
 			    "0x%016jx - 0x%016jx, %ju bytes (%ju pages)\n",
 			    (uintmax_t)phys_avail[indx],
 			    (uintmax_t)phys_avail[indx + 1] - 1,
 			    (uintmax_t)size, (uintmax_t)size / PAGE_SIZE);
 		}
 	}
 
 	vm_ksubmap_init(&kmi);
 
 	printf("avail memory = %ju (%ju MB)\n",
 	    ptoa((uintmax_t)cnt.v_free_count),
 	    ptoa((uintmax_t)cnt.v_free_count) / 1048576);
 
 	/*
 	 * Set up buffers, so they can be used to read disk labels.
 	 */
 	bufinit();
 	vm_pager_bufferinit();
 
 #ifndef SMP
 	/* For SMP, we delay the cpu_setregs() until after SMP startup. */
 	cpu_setregs();
 #endif
 }
 
 /*
  * Send an interrupt to process.
  *
  * Stack is set up to allow sigcode stored
  * at top to call routine, followed by kcall
  * to sigreturn routine below.  After sigreturn
  * resets the signal mask, the stack, and the
  * frame pointer, it returns to the user
  * specified pc, psl.
  */
 #ifdef COMPAT_43
 static void
 osendsig(catcher, sig, mask, code)
 	sig_t catcher;
 	int sig;
 	sigset_t *mask;
 	u_long code;
 {
 	struct osigframe sf, *fp;
 	struct proc *p;
 	struct thread *td;
 	struct sigacts *psp;
 	struct trapframe *regs;
 	int oonstack;
 
 	td = curthread;
 	p = td->td_proc;
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 	psp = p->p_sigacts;
 	regs = td->td_frame;
 	oonstack = sigonstack(regs->tf_esp);
 
 	/* Allocate space for the signal handler context. */
 	if ((p->p_flag & P_ALTSTACK) && !oonstack &&
 	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
 		fp = (struct osigframe *)(p->p_sigstk.ss_sp +
 		    p->p_sigstk.ss_size - sizeof(struct osigframe));
 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
 		p->p_sigstk.ss_flags |= SS_ONSTACK;
 #endif
 	} else
 		fp = (struct osigframe *)regs->tf_esp - 1;
 	PROC_UNLOCK(p);
 
 	/* Translate the signal if appropriate. */
 	if (p->p_sysent->sv_sigtbl && sig <= p->p_sysent->sv_sigsize)
 		sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
 
 	/* Build the argument list for the signal handler. */
 	sf.sf_signum = sig;
 	sf.sf_scp = (register_t)&fp->sf_siginfo.si_sc;
 	PROC_LOCK(p);
 	if (SIGISMEMBER(p->p_sigacts->ps_siginfo, sig)) {
 		/* Signal handler installed with SA_SIGINFO. */
 		sf.sf_arg2 = (register_t)&fp->sf_siginfo;
 		sf.sf_siginfo.si_signo = sig;
 		sf.sf_siginfo.si_code = code;
 		sf.sf_ahu.sf_action = (__osiginfohandler_t *)catcher;
 	} else {
 		/* Old FreeBSD-style arguments. */
 		sf.sf_arg2 = code;
 		sf.sf_addr = regs->tf_err;
 		sf.sf_ahu.sf_handler = catcher;
 	}
 	PROC_UNLOCK(p);
 
 	/* Save most if not all of trap frame. */
 	sf.sf_siginfo.si_sc.sc_eax = regs->tf_eax;
 	sf.sf_siginfo.si_sc.sc_ebx = regs->tf_ebx;
 	sf.sf_siginfo.si_sc.sc_ecx = regs->tf_ecx;
 	sf.sf_siginfo.si_sc.sc_edx = regs->tf_edx;
 	sf.sf_siginfo.si_sc.sc_esi = regs->tf_esi;
 	sf.sf_siginfo.si_sc.sc_edi = regs->tf_edi;
 	sf.sf_siginfo.si_sc.sc_cs = regs->tf_cs;
 	sf.sf_siginfo.si_sc.sc_ds = regs->tf_ds;
 	sf.sf_siginfo.si_sc.sc_ss = regs->tf_ss;
 	sf.sf_siginfo.si_sc.sc_es = regs->tf_es;
 	sf.sf_siginfo.si_sc.sc_fs = regs->tf_fs;
 	sf.sf_siginfo.si_sc.sc_gs = rgs();
 	sf.sf_siginfo.si_sc.sc_isp = regs->tf_isp;
 
 	/* Build the signal context to be used by osigreturn(). */
 	sf.sf_siginfo.si_sc.sc_onstack = (oonstack) ? 1 : 0;
 	SIG2OSIG(*mask, sf.sf_siginfo.si_sc.sc_mask);
 	sf.sf_siginfo.si_sc.sc_sp = regs->tf_esp;
 	sf.sf_siginfo.si_sc.sc_fp = regs->tf_ebp;
 	sf.sf_siginfo.si_sc.sc_pc = regs->tf_eip;
 	sf.sf_siginfo.si_sc.sc_ps = regs->tf_eflags;
 	sf.sf_siginfo.si_sc.sc_trapno = regs->tf_trapno;
 	sf.sf_siginfo.si_sc.sc_err = regs->tf_err;
 
 	/*
 	 * If we're a vm86 process, we want to save the segment registers.
 	 * We also change eflags to be our emulated eflags, not the actual
 	 * eflags.
 	 */
 	if (regs->tf_eflags & PSL_VM) {
 		/* XXX confusing names: `tf' isn't a trapframe; `regs' is. */
 		struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs;
 		struct vm86_kernel *vm86 = &td->td_pcb->pcb_ext->ext_vm86;
 
 		sf.sf_siginfo.si_sc.sc_gs = tf->tf_vm86_gs;
 		sf.sf_siginfo.si_sc.sc_fs = tf->tf_vm86_fs;
 		sf.sf_siginfo.si_sc.sc_es = tf->tf_vm86_es;
 		sf.sf_siginfo.si_sc.sc_ds = tf->tf_vm86_ds;
 
 		if (vm86->vm86_has_vme == 0)
 			sf.sf_siginfo.si_sc.sc_ps =
 			    (tf->tf_eflags & ~(PSL_VIF | PSL_VIP)) |
 			    (vm86->vm86_eflags & (PSL_VIF | PSL_VIP));
 
 		/* See sendsig() for comments. */
 		tf->tf_eflags &= ~(PSL_VM | PSL_NT | PSL_VIF | PSL_VIP);
 	}
 
 	/*
 	 * Copy the sigframe out to the user's stack.
 	 */
 	if (copyout(&sf, fp, sizeof(*fp)) != 0) {
 #ifdef DEBUG
 		printf("process %ld has trashed its stack\n", (long)p->p_pid);
 #endif
 		PROC_LOCK(p);
 		sigexit(td, SIGILL);
 	}
 
 	regs->tf_esp = (int)fp;
 	regs->tf_eip = PS_STRINGS - szosigcode;
 	regs->tf_eflags &= ~PSL_T;
 	regs->tf_cs = _ucodesel;
 	regs->tf_ds = _udatasel;
 	regs->tf_es = _udatasel;
 	regs->tf_fs = _udatasel;
 	load_gs(_udatasel);
 	regs->tf_ss = _udatasel;
 	PROC_LOCK(p);
 }
 #endif /* COMPAT_43 */
 
 #ifdef COMPAT_FREEBSD4
 static void
 freebsd4_sendsig(catcher, sig, mask, code)
 	sig_t catcher;
 	int sig;
 	sigset_t *mask;
 	u_long code;
 {
 	struct sigframe4 sf, *sfp;
 	struct proc *p;
 	struct thread *td;
 	struct sigacts *psp;
 	struct trapframe *regs;
 	int oonstack;
 
 	td = curthread;
 	p = td->td_proc;
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 	psp = p->p_sigacts;
 	regs = td->td_frame;
 	oonstack = sigonstack(regs->tf_esp);
 
 	/* Save user context. */
 	bzero(&sf, sizeof(sf));
 	sf.sf_uc.uc_sigmask = *mask;
 	sf.sf_uc.uc_stack = p->p_sigstk;
 	sf.sf_uc.uc_stack.ss_flags = (p->p_flag & P_ALTSTACK)
 	    ? ((oonstack) ? SS_ONSTACK : 0) : SS_DISABLE;
 	sf.sf_uc.uc_mcontext.mc_onstack = (oonstack) ? 1 : 0;
 	sf.sf_uc.uc_mcontext.mc_gs = rgs();
 	bcopy(regs, &sf.sf_uc.uc_mcontext.mc_fs, sizeof(*regs));
 
 	/* Allocate space for the signal handler context. */
 	if ((p->p_flag & P_ALTSTACK) != 0 && !oonstack &&
 	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
 		sfp = (struct sigframe4 *)(p->p_sigstk.ss_sp +
 		    p->p_sigstk.ss_size - sizeof(struct sigframe4));
 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
 		p->p_sigstk.ss_flags |= SS_ONSTACK;
 #endif
 	} else
 		sfp = (struct sigframe4 *)regs->tf_esp - 1;
 	PROC_UNLOCK(p);
 
 	/* Translate the signal if appropriate. */
 	if (p->p_sysent->sv_sigtbl && sig <= p->p_sysent->sv_sigsize)
 		sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
 
 	/* Build the argument list for the signal handler. */
 	sf.sf_signum = sig;
 	sf.sf_ucontext = (register_t)&sfp->sf_uc;
 	PROC_LOCK(p);
 	if (SIGISMEMBER(p->p_sigacts->ps_siginfo, sig)) {
 		/* Signal handler installed with SA_SIGINFO. */
 		sf.sf_siginfo = (register_t)&sfp->sf_si;
 		sf.sf_ahu.sf_action = (__siginfohandler_t *)catcher;
 
 		/* Fill in POSIX parts */
 		sf.sf_si.si_signo = sig;
 		sf.sf_si.si_code = code;
 		sf.sf_si.si_addr = (void *)regs->tf_err;
 	} else {
 		/* Old FreeBSD-style arguments. */
 		sf.sf_siginfo = code;
 		sf.sf_addr = regs->tf_err;
 		sf.sf_ahu.sf_handler = catcher;
 	}
 	PROC_UNLOCK(p);
 
 	/*
 	 * If we're a vm86 process, we want to save the segment registers.
 	 * We also change eflags to be our emulated eflags, not the actual
 	 * eflags.
 	 */
 	if (regs->tf_eflags & PSL_VM) {
 		struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs;
 		struct vm86_kernel *vm86 = &td->td_pcb->pcb_ext->ext_vm86;
 
 		sf.sf_uc.uc_mcontext.mc_gs = tf->tf_vm86_gs;
 		sf.sf_uc.uc_mcontext.mc_fs = tf->tf_vm86_fs;
 		sf.sf_uc.uc_mcontext.mc_es = tf->tf_vm86_es;
 		sf.sf_uc.uc_mcontext.mc_ds = tf->tf_vm86_ds;
 
 		if (vm86->vm86_has_vme == 0)
 			sf.sf_uc.uc_mcontext.mc_eflags =
 			    (tf->tf_eflags & ~(PSL_VIF | PSL_VIP)) |
 			    (vm86->vm86_eflags & (PSL_VIF | PSL_VIP));
 
 		/*
 		 * Clear PSL_NT to inhibit T_TSSFLT faults on return from
 		 * syscalls made by the signal handler.  This just avoids
 		 * wasting time for our lazy fixup of such faults.  PSL_NT
 		 * does nothing in vm86 mode, but vm86 programs can set it
 		 * almost legitimately in probes for old cpu types.
 		 */
 		tf->tf_eflags &= ~(PSL_VM | PSL_NT | PSL_VIF | PSL_VIP);
 	}
 
 	/*
 	 * Copy the sigframe out to the user's stack.
 	 */
 	if (copyout(&sf, sfp, sizeof(*sfp)) != 0) {
 #ifdef DEBUG
 		printf("process %ld has trashed its stack\n", (long)p->p_pid);
 #endif
 		PROC_LOCK(p);
 		sigexit(td, SIGILL);
 	}
 
 	regs->tf_esp = (int)sfp;
 	regs->tf_eip = PS_STRINGS - szfreebsd4_sigcode;
 	regs->tf_eflags &= ~PSL_T;
 	regs->tf_cs = _ucodesel;
 	regs->tf_ds = _udatasel;
 	regs->tf_es = _udatasel;
 	regs->tf_fs = _udatasel;
 	regs->tf_ss = _udatasel;
 	PROC_LOCK(p);
 }
 #endif	/* COMPAT_FREEBSD4 */
 
 void
 sendsig(catcher, sig, mask, code)
 	sig_t catcher;
 	int sig;
 	sigset_t *mask;
 	u_long code;
 {
 	struct sigframe sf, *sfp;
 	struct proc *p;
 	struct thread *td;
 	struct sigacts *psp;
 	char *sp;
 	struct trapframe *regs;
 	int oonstack;
 
 	td = curthread;
 	p = td->td_proc;
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 	psp = p->p_sigacts;
 #ifdef COMPAT_FREEBSD4
 	if (SIGISMEMBER(psp->ps_freebsd4, sig)) {
 		freebsd4_sendsig(catcher, sig, mask, code);
 		return;
 	}
 #endif
 #ifdef COMPAT_43
 	if (SIGISMEMBER(psp->ps_osigset, sig)) {
 		osendsig(catcher, sig, mask, code);
 		return;
 	}
 #endif
 	regs = td->td_frame;
 	oonstack = sigonstack(regs->tf_esp);
 
 	/* Save user context. */
 	bzero(&sf, sizeof(sf));
 	sf.sf_uc.uc_sigmask = *mask;
 	sf.sf_uc.uc_stack = p->p_sigstk;
 	sf.sf_uc.uc_stack.ss_flags = (p->p_flag & P_ALTSTACK)
 	    ? ((oonstack) ? SS_ONSTACK : 0) : SS_DISABLE;
 	sf.sf_uc.uc_mcontext.mc_onstack = (oonstack) ? 1 : 0;
 	sf.sf_uc.uc_mcontext.mc_gs = rgs();
 	bcopy(regs, &sf.sf_uc.uc_mcontext.mc_fs, sizeof(*regs));
 	sf.sf_uc.uc_mcontext.mc_len = sizeof(sf.sf_uc.uc_mcontext); /* magic */
 	get_fpcontext(td, &sf.sf_uc.uc_mcontext);
 	fpstate_drop(td);
 
 	/* Allocate space for the signal handler context. */
 	if ((p->p_flag & P_ALTSTACK) != 0 && !oonstack &&
 	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
 		sp = p->p_sigstk.ss_sp +
 		    p->p_sigstk.ss_size - sizeof(struct sigframe);
 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
 		p->p_sigstk.ss_flags |= SS_ONSTACK;
 #endif
 	} else
 		sp = (char *)regs->tf_esp - sizeof(struct sigframe);
 	/* Align to 16 bytes. */
 	sfp = (struct sigframe *)((unsigned int)sp & ~0xF);
 	PROC_UNLOCK(p);
 
 	/* Translate the signal if appropriate. */
 	if (p->p_sysent->sv_sigtbl && sig <= p->p_sysent->sv_sigsize)
 		sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
 
 	/* Build the argument list for the signal handler. */
 	sf.sf_signum = sig;
 	sf.sf_ucontext = (register_t)&sfp->sf_uc;
 	PROC_LOCK(p);
 	if (SIGISMEMBER(p->p_sigacts->ps_siginfo, sig)) {
 		/* Signal handler installed with SA_SIGINFO. */
 		sf.sf_siginfo = (register_t)&sfp->sf_si;
 		sf.sf_ahu.sf_action = (__siginfohandler_t *)catcher;
 
 		/* Fill in POSIX parts */
 		sf.sf_si.si_signo = sig;
 		sf.sf_si.si_code = code;
 		sf.sf_si.si_addr = (void *)regs->tf_err;
 	} else {
 		/* Old FreeBSD-style arguments. */
 		sf.sf_siginfo = code;
 		sf.sf_addr = regs->tf_err;
 		sf.sf_ahu.sf_handler = catcher;
 	}
 	PROC_UNLOCK(p);
 
 	/*
 	 * If we're a vm86 process, we want to save the segment registers.
 	 * We also change eflags to be our emulated eflags, not the actual
 	 * eflags.
 	 */
 	if (regs->tf_eflags & PSL_VM) {
 		struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs;
 		struct vm86_kernel *vm86 = &td->td_pcb->pcb_ext->ext_vm86;
 
 		sf.sf_uc.uc_mcontext.mc_gs = tf->tf_vm86_gs;
 		sf.sf_uc.uc_mcontext.mc_fs = tf->tf_vm86_fs;
 		sf.sf_uc.uc_mcontext.mc_es = tf->tf_vm86_es;
 		sf.sf_uc.uc_mcontext.mc_ds = tf->tf_vm86_ds;
 
 		if (vm86->vm86_has_vme == 0)
 			sf.sf_uc.uc_mcontext.mc_eflags =
 			    (tf->tf_eflags & ~(PSL_VIF | PSL_VIP)) |
 			    (vm86->vm86_eflags & (PSL_VIF | PSL_VIP));
 
 		/*
 		 * Clear PSL_NT to inhibit T_TSSFLT faults on return from
 		 * syscalls made by the signal handler.  This just avoids
 		 * wasting time for our lazy fixup of such faults.  PSL_NT
 		 * does nothing in vm86 mode, but vm86 programs can set it
 		 * almost legitimately in probes for old cpu types.
 		 */
 		tf->tf_eflags &= ~(PSL_VM | PSL_NT | PSL_VIF | PSL_VIP);
 	}
 
 	/*
 	 * Copy the sigframe out to the user's stack.
 	 */
 	if (copyout(&sf, sfp, sizeof(*sfp)) != 0) {
 #ifdef DEBUG
 		printf("process %ld has trashed its stack\n", (long)p->p_pid);
 #endif
 		PROC_LOCK(p);
 		sigexit(td, SIGILL);
 	}
 
 	regs->tf_esp = (int)sfp;
 	regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode);
 	regs->tf_eflags &= ~PSL_T;
 	regs->tf_cs = _ucodesel;
 	regs->tf_ds = _udatasel;
 	regs->tf_es = _udatasel;
 	regs->tf_fs = _udatasel;
 	regs->tf_ss = _udatasel;
 	PROC_LOCK(p);
 }
 
 /*
  * System call to cleanup state after a signal
  * has been taken.  Reset signal mask and
  * stack state from context left by sendsig (above).
  * Return to previous pc and psl as specified by
  * context left by sendsig. Check carefully to
  * make sure that the user has not modified the
  * state to gain improper privileges.
  *
  * MPSAFE
  */
 #ifdef COMPAT_43
 int
 osigreturn(td, uap)
 	struct thread *td;
 	struct osigreturn_args /* {
 		struct osigcontext *sigcntxp;
 	} */ *uap;
 {
 	struct osigcontext sc;
 	struct trapframe *regs;
 	struct osigcontext *scp;
 	struct proc *p = td->td_proc;
 	int eflags, error;
 
 	regs = td->td_frame;
 	error = copyin(uap->sigcntxp, &sc, sizeof(sc));
 	if (error != 0)
 		return (error);
 	scp = &sc;
 	eflags = scp->sc_ps;
 	if (eflags & PSL_VM) {
 		struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs;
 		struct vm86_kernel *vm86;
 
 		/*
 		 * if pcb_ext == 0 or vm86_inited == 0, the user hasn't
 		 * set up the vm86 area, and we can't enter vm86 mode.
 		 */
 		if (td->td_pcb->pcb_ext == 0)
 			return (EINVAL);
 		vm86 = &td->td_pcb->pcb_ext->ext_vm86;
 		if (vm86->vm86_inited == 0)
 			return (EINVAL);
 
 		/* Go back to user mode if both flags are set. */
 		if ((eflags & PSL_VIP) && (eflags & PSL_VIF))
 			trapsignal(p, SIGBUS, 0);
 
 		if (vm86->vm86_has_vme) {
 			eflags = (tf->tf_eflags & ~VME_USERCHANGE) |
 			    (eflags & VME_USERCHANGE) | PSL_VM;
 		} else {
 			vm86->vm86_eflags = eflags;	/* save VIF, VIP */
 			eflags = (tf->tf_eflags & ~VM_USERCHANGE) |
 			    (eflags & VM_USERCHANGE) | PSL_VM;
 		}
 		tf->tf_vm86_ds = scp->sc_ds;
 		tf->tf_vm86_es = scp->sc_es;
 		tf->tf_vm86_fs = scp->sc_fs;
 		tf->tf_vm86_gs = scp->sc_gs;
 		tf->tf_ds = _udatasel;
 		tf->tf_es = _udatasel;
 		tf->tf_fs = _udatasel;
 	} else {
 		/*
 		 * Don't allow users to change privileged or reserved flags.
 		 */
 		/*
 		 * XXX do allow users to change the privileged flag PSL_RF.
 		 * The cpu sets PSL_RF in tf_eflags for faults.  Debuggers
 		 * should sometimes set it there too.  tf_eflags is kept in
 		 * the signal context during signal handling and there is no
 		 * other place to remember it, so the PSL_RF bit may be
 		 * corrupted by the signal handler without us knowing.
 		 * Corruption of the PSL_RF bit at worst causes one more or
 		 * one less debugger trap, so allowing it is fairly harmless.
 		 */
 		if (!EFL_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) {
 	    		return (EINVAL);
 		}
 
 		/*
 		 * Don't allow users to load a valid privileged %cs.  Let the
 		 * hardware check for invalid selectors, excess privilege in
 		 * other selectors, invalid %eip's and invalid %esp's.
 		 */
 		if (!CS_SECURE(scp->sc_cs)) {
 			trapsignal(p, SIGBUS, T_PROTFLT);
 			return (EINVAL);
 		}
 		regs->tf_ds = scp->sc_ds;
 		regs->tf_es = scp->sc_es;
 		regs->tf_fs = scp->sc_fs;
 	}
 
 	/* Restore remaining registers. */
 	regs->tf_eax = scp->sc_eax;
 	regs->tf_ebx = scp->sc_ebx;
 	regs->tf_ecx = scp->sc_ecx;
 	regs->tf_edx = scp->sc_edx;
 	regs->tf_esi = scp->sc_esi;
 	regs->tf_edi = scp->sc_edi;
 	regs->tf_cs = scp->sc_cs;
 	regs->tf_ss = scp->sc_ss;
 	regs->tf_isp = scp->sc_isp;
 	regs->tf_ebp = scp->sc_fp;
 	regs->tf_esp = scp->sc_sp;
 	regs->tf_eip = scp->sc_pc;
 	regs->tf_eflags = eflags;
 
 	PROC_LOCK(p);
 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
 	if (scp->sc_onstack & 1)
 		p->p_sigstk.ss_flags |= SS_ONSTACK;
 	else
 		p->p_sigstk.ss_flags &= ~SS_ONSTACK;
 #endif
 	SIGSETOLD(p->p_sigmask, scp->sc_mask);
 	SIG_CANTMASK(p->p_sigmask);
 	signotify(p);
 	PROC_UNLOCK(p);
 	return (EJUSTRETURN);
 }
 #endif /* COMPAT_43 */
 
 #ifdef COMPAT_FREEBSD4
 /*
  * MPSAFE
  */
 int
 freebsd4_sigreturn(td, uap)
 	struct thread *td;
 	struct freebsd4_sigreturn_args /* {
 		const ucontext4 *sigcntxp;
 	} */ *uap;
 {
 	struct ucontext4 uc;
 	struct proc *p = td->td_proc;
 	struct trapframe *regs;
 	const struct ucontext4 *ucp;
 	int cs, eflags, error;
 
 	error = copyin(uap->sigcntxp, &uc, sizeof(uc));
 	if (error != 0)
 		return (error);
 	ucp = &uc;
 	regs = td->td_frame;
 	eflags = ucp->uc_mcontext.mc_eflags;
 	if (eflags & PSL_VM) {
 		struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs;
 		struct vm86_kernel *vm86;
 
 		/*
 		 * if pcb_ext == 0 or vm86_inited == 0, the user hasn't
 		 * set up the vm86 area, and we can't enter vm86 mode.
 		 */
 		if (td->td_pcb->pcb_ext == 0)
 			return (EINVAL);
 		vm86 = &td->td_pcb->pcb_ext->ext_vm86;
 		if (vm86->vm86_inited == 0)
 			return (EINVAL);
 
 		/* Go back to user mode if both flags are set. */
 		if ((eflags & PSL_VIP) && (eflags & PSL_VIF))
 			trapsignal(p, SIGBUS, 0);
 
 		if (vm86->vm86_has_vme) {
 			eflags = (tf->tf_eflags & ~VME_USERCHANGE) |
 			    (eflags & VME_USERCHANGE) | PSL_VM;
 		} else {
 			vm86->vm86_eflags = eflags;	/* save VIF, VIP */
 			eflags = (tf->tf_eflags & ~VM_USERCHANGE) |
 			    (eflags & VM_USERCHANGE) | PSL_VM;
 		}
 		bcopy(&ucp->uc_mcontext.mc_fs, tf, sizeof(struct trapframe));
 		tf->tf_eflags = eflags;
 		tf->tf_vm86_ds = tf->tf_ds;
 		tf->tf_vm86_es = tf->tf_es;
 		tf->tf_vm86_fs = tf->tf_fs;
 		tf->tf_vm86_gs = ucp->uc_mcontext.mc_gs;
 		tf->tf_ds = _udatasel;
 		tf->tf_es = _udatasel;
 		tf->tf_fs = _udatasel;
 	} else {
 		/*
 		 * Don't allow users to change privileged or reserved flags.
 		 */
 		/*
 		 * XXX do allow users to change the privileged flag PSL_RF.
 		 * The cpu sets PSL_RF in tf_eflags for faults.  Debuggers
 		 * should sometimes set it there too.  tf_eflags is kept in
 		 * the signal context during signal handling and there is no
 		 * other place to remember it, so the PSL_RF bit may be
 		 * corrupted by the signal handler without us knowing.
 		 * Corruption of the PSL_RF bit at worst causes one more or
 		 * one less debugger trap, so allowing it is fairly harmless.
 		 */
 		if (!EFL_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) {
 			printf("freebsd4_sigreturn: eflags = 0x%x\n", eflags);
 	    		return (EINVAL);
 		}
 
 		/*
 		 * Don't allow users to load a valid privileged %cs.  Let the
 		 * hardware check for invalid selectors, excess privilege in
 		 * other selectors, invalid %eip's and invalid %esp's.
 		 */
 		cs = ucp->uc_mcontext.mc_cs;
 		if (!CS_SECURE(cs)) {
 			printf("freebsd4_sigreturn: cs = 0x%x\n", cs);
 			trapsignal(p, SIGBUS, T_PROTFLT);
 			return (EINVAL);
 		}
 
 		bcopy(&ucp->uc_mcontext.mc_fs, regs, sizeof(*regs));
 	}
 
 	PROC_LOCK(p);
 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
 	if (ucp->uc_mcontext.mc_onstack & 1)
 		p->p_sigstk.ss_flags |= SS_ONSTACK;
 	else
 		p->p_sigstk.ss_flags &= ~SS_ONSTACK;
 #endif
 
 	p->p_sigmask = ucp->uc_sigmask;
 	SIG_CANTMASK(p->p_sigmask);
 	signotify(p);
 	PROC_UNLOCK(p);
 	return (EJUSTRETURN);
 }
 #endif	/* COMPAT_FREEBSD4 */
 
 /*
  * MPSAFE
  */
 int
 sigreturn(td, uap)
 	struct thread *td;
 	struct sigreturn_args /* {
 		const __ucontext *sigcntxp;
 	} */ *uap;
 {
 	ucontext_t uc;
 	struct proc *p = td->td_proc;
 	struct trapframe *regs;
 	const ucontext_t *ucp;
 	int cs, eflags, error, ret;
 
 	error = copyin(uap->sigcntxp, &uc, sizeof(uc));
 	if (error != 0)
 		return (error);
 	ucp = &uc;
 	regs = td->td_frame;
 	eflags = ucp->uc_mcontext.mc_eflags;
 	if (eflags & PSL_VM) {
 		struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs;
 		struct vm86_kernel *vm86;
 
 		/*
 		 * if pcb_ext == 0 or vm86_inited == 0, the user hasn't
 		 * set up the vm86 area, and we can't enter vm86 mode.
 		 */
 		if (td->td_pcb->pcb_ext == 0)
 			return (EINVAL);
 		vm86 = &td->td_pcb->pcb_ext->ext_vm86;
 		if (vm86->vm86_inited == 0)
 			return (EINVAL);
 
 		/* Go back to user mode if both flags are set. */
 		if ((eflags & PSL_VIP) && (eflags & PSL_VIF))
 			trapsignal(p, SIGBUS, 0);
 
 		if (vm86->vm86_has_vme) {
 			eflags = (tf->tf_eflags & ~VME_USERCHANGE) |
 			    (eflags & VME_USERCHANGE) | PSL_VM;
 		} else {
 			vm86->vm86_eflags = eflags;	/* save VIF, VIP */
 			eflags = (tf->tf_eflags & ~VM_USERCHANGE) |
 			    (eflags & VM_USERCHANGE) | PSL_VM;
 		}
 		bcopy(&ucp->uc_mcontext.mc_fs, tf, sizeof(struct trapframe));
 		tf->tf_eflags = eflags;
 		tf->tf_vm86_ds = tf->tf_ds;
 		tf->tf_vm86_es = tf->tf_es;
 		tf->tf_vm86_fs = tf->tf_fs;
 		tf->tf_vm86_gs = ucp->uc_mcontext.mc_gs;
 		tf->tf_ds = _udatasel;
 		tf->tf_es = _udatasel;
 		tf->tf_fs = _udatasel;
 	} else {
 		/*
 		 * Don't allow users to change privileged or reserved flags.
 		 */
 		/*
 		 * XXX do allow users to change the privileged flag PSL_RF.
 		 * The cpu sets PSL_RF in tf_eflags for faults.  Debuggers
 		 * should sometimes set it there too.  tf_eflags is kept in
 		 * the signal context during signal handling and there is no
 		 * other place to remember it, so the PSL_RF bit may be
 		 * corrupted by the signal handler without us knowing.
 		 * Corruption of the PSL_RF bit at worst causes one more or
 		 * one less debugger trap, so allowing it is fairly harmless.
 		 */
 		if (!EFL_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) {
 			printf("sigreturn: eflags = 0x%x\n", eflags);
 	    		return (EINVAL);
 		}
 
 		/*
 		 * Don't allow users to load a valid privileged %cs.  Let the
 		 * hardware check for invalid selectors, excess privilege in
 		 * other selectors, invalid %eip's and invalid %esp's.
 		 */
 		cs = ucp->uc_mcontext.mc_cs;
 		if (!CS_SECURE(cs)) {
 			printf("sigreturn: cs = 0x%x\n", cs);
 			trapsignal(p, SIGBUS, T_PROTFLT);
 			return (EINVAL);
 		}
 
 		ret = set_fpcontext(td, &ucp->uc_mcontext);
 		if (ret != 0)
 			return (ret);
 		bcopy(&ucp->uc_mcontext.mc_fs, regs, sizeof(*regs));
 	}
 
 	PROC_LOCK(p);
 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
 	if (ucp->uc_mcontext.mc_onstack & 1)
 		p->p_sigstk.ss_flags |= SS_ONSTACK;
 	else
 		p->p_sigstk.ss_flags &= ~SS_ONSTACK;
 #endif
 
 	p->p_sigmask = ucp->uc_sigmask;
 	SIG_CANTMASK(p->p_sigmask);
 	signotify(p);
 	PROC_UNLOCK(p);
 	return (EJUSTRETURN);
 }
 
 /*
  * Machine dependent boot() routine
  *
  * I haven't seen anything to put here yet
  * Possibly some stuff might be grafted back here from boot()
  */
 void
 cpu_boot(int howto)
 {
 }
 
 /*
  * Shutdown the CPU as much as possible
  */
 void
 cpu_halt(void)
 {
 	for (;;)
 		__asm__ ("hlt");
 }
 
 /*
  * Hook to idle the CPU when possible.  In the SMP case we default to
  * off because a halted cpu will not currently pick up a new thread in the
  * run queue until the next timer tick.  If turned on this will result in
  * approximately a 4.2% loss in real time performance in buildworld tests
  * (but improves user and sys times oddly enough), and saves approximately
  * 5% in power consumption on an idle machine (tests w/2xCPU 1.1GHz P3).
  *
  * XXX we need to have a cpu mask of idle cpus and generate an IPI or
  * otherwise generate some sort of interrupt to wake up cpus sitting in HLT.
  * Then we can have our cake and eat it too.
  *
  * XXX I'm turning it on for SMP as well by default for now.  It seems to
  * help lock contention somewhat, and this is critical for HTT. -Peter
  */
 static int	cpu_idle_hlt = 1;
 SYSCTL_INT(_machdep, OID_AUTO, cpu_idle_hlt, CTLFLAG_RW,
     &cpu_idle_hlt, 0, "Idle loop HLT enable");
 
 /*
  * Note that we have to be careful here to avoid a race between checking
  * sched_runnable() and actually halting.  If we don't do this, we may waste
  * the time between calling hlt and the next interrupt even though there
  * is a runnable process.
  */
 void
 cpu_idle(void)
 {
 
 #ifdef SMP
 	if (mp_grab_cpu_hlt())
 		return;
 #endif
 
 	if (cpu_idle_hlt) {
 		disable_intr();
   		if (sched_runnable()) {
 			enable_intr();
 		} else {
 			/*
 			 * we must absolutely guarentee that hlt is the
 			 * absolute next instruction after sti or we
 			 * introduce a timing window.
 			 */
 			__asm __volatile("sti; hlt");
 		}
 	}
 }
 
 /*
  * Clear registers on exec
  */
 void
 exec_setregs(td, entry, stack, ps_strings)
 	struct thread *td;
 	u_long entry;
 	u_long stack;
 	u_long ps_strings;
 {
 	struct trapframe *regs = td->td_frame;
 	struct pcb *pcb = td->td_pcb;
 
 	/* Reset pc->pcb_gs and %gs before possibly invalidating it. */
 	pcb->pcb_gs = _udatasel;
 	load_gs(_udatasel);
 
 	if (td->td_proc->p_md.md_ldt)
 		user_ldt_free(td);
   
 	bzero((char *)regs, sizeof(struct trapframe));
 	regs->tf_eip = entry;
 	regs->tf_esp = stack;
 	regs->tf_eflags = PSL_USER | (regs->tf_eflags & PSL_T);
 	regs->tf_ss = _udatasel;
 	regs->tf_ds = _udatasel;
 	regs->tf_es = _udatasel;
 	regs->tf_fs = _udatasel;
 	regs->tf_cs = _ucodesel;
 
 	/* PS_STRINGS value for BSD/OS binaries.  It is 0 for non-BSD/OS. */
 	regs->tf_ebx = ps_strings;
 
         /*
          * Reset the hardware debug registers if they were in use.
          * They won't have any meaning for the newly exec'd process.  
          */
         if (pcb->pcb_flags & PCB_DBREGS) {
                 pcb->pcb_dr0 = 0;
                 pcb->pcb_dr1 = 0;
                 pcb->pcb_dr2 = 0;
                 pcb->pcb_dr3 = 0;
                 pcb->pcb_dr6 = 0;
                 pcb->pcb_dr7 = 0;
                 if (pcb == PCPU_GET(curpcb)) {
 		        /*
 			 * Clear the debug registers on the running
 			 * CPU, otherwise they will end up affecting
 			 * the next process we switch to.
 			 */
 		        reset_dbregs();
                 }
                 pcb->pcb_flags &= ~PCB_DBREGS;
         }
 
 	/*
 	 * Initialize the math emulator (if any) for the current process.
 	 * Actually, just clear the bit that says that the emulator has
 	 * been initialized.  Initialization is delayed until the process
 	 * traps to the emulator (if it is done at all) mainly because
 	 * emulators don't provide an entry point for initialization.
 	 */
 	td->td_pcb->pcb_flags &= ~FP_SOFTFP;
 
 	/*
 	 * Arrange to trap the next npx or `fwait' instruction (see npx.c
 	 * for why fwait must be trapped at least if there is an npx or an
 	 * emulator).  This is mainly to handle the case where npx0 is not
 	 * configured, since the npx routines normally set up the trap
 	 * otherwise.  It should be done only at boot time, but doing it
 	 * here allows modifying `npx_exists' for testing the emulator on
 	 * systems with an npx.
 	 */
 	load_cr0(rcr0() | CR0_MP | CR0_TS);
 
 	/* Initialize the npx (if any) for the current process. */
 	/*
 	 * XXX the above load_cr0() also initializes it and is a layering
 	 * violation if NPX is configured.  It drops the npx partially
 	 * and this would be fatal if we were interrupted now, and decided
 	 * to force the state to the pcb, and checked the invariant
 	 * (CR0_TS clear) if and only if PCPU_GET(fpcurthread) != NULL).
 	 * ALL of this can happen except the check.  The check used to
 	 * happen and be fatal later when we didn't complete the drop
 	 * before returning to user mode.  This should be fixed properly
 	 * soon.
 	 */
 	fpstate_drop(td);
 
 	/*
 	 * XXX - Linux emulator
 	 * Make sure sure edx is 0x0 on entry. Linux binaries depend
 	 * on it.
 	 */
 	td->td_retval[1] = 0;
 }
 
 void
 cpu_setregs(void)
 {
 	unsigned int cr0;
 
 	cr0 = rcr0();
 #ifdef SMP
 	cr0 |= CR0_NE;			/* Done by npxinit() */
 #endif
 	cr0 |= CR0_MP | CR0_TS;		/* Done at every execve() too. */
 #ifndef I386_CPU
 	cr0 |= CR0_WP | CR0_AM;
 #endif
 	load_cr0(cr0);
 	load_gs(_udatasel);
 }
 
 static int
 sysctl_machdep_adjkerntz(SYSCTL_HANDLER_ARGS)
 {
 	int error;
 	error = sysctl_handle_int(oidp, oidp->oid_arg1, oidp->oid_arg2,
 		req);
 	if (!error && req->newptr)
 		resettodr();
 	return (error);
 }
 
 SYSCTL_PROC(_machdep, CPU_ADJKERNTZ, adjkerntz, CTLTYPE_INT|CTLFLAG_RW,
 	&adjkerntz, 0, sysctl_machdep_adjkerntz, "I", "");
 
 SYSCTL_INT(_machdep, CPU_DISRTCSET, disable_rtc_set,
 	CTLFLAG_RW, &disable_rtc_set, 0, "");
 
 SYSCTL_STRUCT(_machdep, CPU_BOOTINFO, bootinfo, 
 	CTLFLAG_RD, &bootinfo, bootinfo, "");
 
 SYSCTL_INT(_machdep, CPU_WALLCLOCK, wall_cmos_clock,
 	CTLFLAG_RW, &wall_cmos_clock, 0, "");
 
 u_long bootdev;		/* not a dev_t - encoding is different */
 SYSCTL_ULONG(_machdep, OID_AUTO, guessed_bootdev,
 	CTLFLAG_RD, &bootdev, 0, "Maybe the Boot device (not in dev_t format)");
 
 /*
  * Initialize 386 and configure to run kernel
  */
 
 /*
  * Initialize segments & interrupt table
  */
 
 int _default_ldt;
 union descriptor gdt[NGDT * MAXCPU];	/* global descriptor table */
 static struct gate_descriptor idt0[NIDT];
 struct gate_descriptor *idt = &idt0[0];	/* interrupt descriptor table */
 union descriptor ldt[NLDT];		/* local descriptor table */
 #ifdef SMP
 /* table descriptors - used to load tables by microp */
 struct region_descriptor r_gdt, r_idt;
 #endif
 
 int private_tss;			/* flag indicating private tss */
 
 #if defined(I586_CPU) && !defined(NO_F00F_HACK)
 extern int has_f00f_bug;
 #endif
 
 static struct i386tss dblfault_tss;
 static char dblfault_stack[PAGE_SIZE];
 
 extern  struct user	*proc0uarea;
 extern  vm_offset_t	proc0kstack;
 
 
 /* software prototypes -- in more palatable form */
 struct soft_segment_descriptor gdt_segs[] = {
 /* GNULL_SEL	0 Null Descriptor */
 {	0x0,			/* segment base address  */
 	0x0,			/* length */
 	0,			/* segment type */
 	0,			/* segment descriptor priority level */
 	0,			/* segment descriptor present */
 	0, 0,
 	0,			/* default 32 vs 16 bit size */
 	0  			/* limit granularity (byte/page units)*/ },
 /* GCODE_SEL	1 Code Descriptor for kernel */
 {	0x0,			/* segment base address  */
 	0xfffff,		/* length - all address space */
 	SDT_MEMERA,		/* segment type */
 	0,			/* segment descriptor priority level */
 	1,			/* segment descriptor present */
 	0, 0,
 	1,			/* default 32 vs 16 bit size */
 	1  			/* limit granularity (byte/page units)*/ },
 /* GDATA_SEL	2 Data Descriptor for kernel */
 {	0x0,			/* segment base address  */
 	0xfffff,		/* length - all address space */
 	SDT_MEMRWA,		/* segment type */
 	0,			/* segment descriptor priority level */
 	1,			/* segment descriptor present */
 	0, 0,
 	1,			/* default 32 vs 16 bit size */
 	1  			/* limit granularity (byte/page units)*/ },
 /* GPRIV_SEL	3 SMP Per-Processor Private Data Descriptor */
 {	0x0,			/* segment base address  */
 	0xfffff,		/* length - all address space */
 	SDT_MEMRWA,		/* segment type */
 	0,			/* segment descriptor priority level */
 	1,			/* segment descriptor present */
 	0, 0,
 	1,			/* default 32 vs 16 bit size */
 	1  			/* limit granularity (byte/page units)*/ },
 /* GPROC0_SEL	4 Proc 0 Tss Descriptor */
 {
 	0x0,			/* segment base address */
 	sizeof(struct i386tss)-1,/* length - all address space */
 	SDT_SYS386TSS,		/* segment type */
 	0,			/* segment descriptor priority level */
 	1,			/* segment descriptor present */
 	0, 0,
 	0,			/* unused - default 32 vs 16 bit size */
 	0  			/* limit granularity (byte/page units)*/ },
 /* GLDT_SEL	5 LDT Descriptor */
 {	(int) ldt,		/* segment base address  */
 	sizeof(ldt)-1,		/* length - all address space */
 	SDT_SYSLDT,		/* segment type */
 	SEL_UPL,		/* segment descriptor priority level */
 	1,			/* segment descriptor present */
 	0, 0,
 	0,			/* unused - default 32 vs 16 bit size */
 	0  			/* limit granularity (byte/page units)*/ },
 /* GUSERLDT_SEL	6 User LDT Descriptor per process */
 {	(int) ldt,		/* segment base address  */
 	(512 * sizeof(union descriptor)-1),		/* length */
 	SDT_SYSLDT,		/* segment type */
 	0,			/* segment descriptor priority level */
 	1,			/* segment descriptor present */
 	0, 0,
 	0,			/* unused - default 32 vs 16 bit size */
 	0  			/* limit granularity (byte/page units)*/ },
 /* GTGATE_SEL	7 Null Descriptor - Placeholder */
 {	0x0,			/* segment base address  */
 	0x0,			/* length - all address space */
 	0,			/* segment type */
 	0,			/* segment descriptor priority level */
 	0,			/* segment descriptor present */
 	0, 0,
 	0,			/* default 32 vs 16 bit size */
 	0  			/* limit granularity (byte/page units)*/ },
 /* GBIOSLOWMEM_SEL 8 BIOS access to realmode segment 0x40, must be #8 in GDT */
 {	0x400,			/* segment base address */
 	0xfffff,		/* length */
 	SDT_MEMRWA,		/* segment type */
 	0,			/* segment descriptor priority level */
 	1,			/* segment descriptor present */
 	0, 0,
 	1,			/* default 32 vs 16 bit size */
 	1  			/* limit granularity (byte/page units)*/ },
 /* GPANIC_SEL	9 Panic Tss Descriptor */
 {	(int) &dblfault_tss,	/* segment base address  */
 	sizeof(struct i386tss)-1,/* length - all address space */
 	SDT_SYS386TSS,		/* segment type */
 	0,			/* segment descriptor priority level */
 	1,			/* segment descriptor present */
 	0, 0,
 	0,			/* unused - default 32 vs 16 bit size */
 	0  			/* limit granularity (byte/page units)*/ },
 /* GBIOSCODE32_SEL 10 BIOS 32-bit interface (32bit Code) */
 {	0,			/* segment base address (overwritten)  */
 	0xfffff,		/* length */
 	SDT_MEMERA,		/* segment type */
 	0,			/* segment descriptor priority level */
 	1,			/* segment descriptor present */
 	0, 0,
 	0,			/* default 32 vs 16 bit size */
 	1  			/* limit granularity (byte/page units)*/ },
 /* GBIOSCODE16_SEL 11 BIOS 32-bit interface (16bit Code) */
 {	0,			/* segment base address (overwritten)  */
 	0xfffff,		/* length */
 	SDT_MEMERA,		/* segment type */
 	0,			/* segment descriptor priority level */
 	1,			/* segment descriptor present */
 	0, 0,
 	0,			/* default 32 vs 16 bit size */
 	1  			/* limit granularity (byte/page units)*/ },
 /* GBIOSDATA_SEL 12 BIOS 32-bit interface (Data) */
 {	0,			/* segment base address (overwritten) */
 	0xfffff,		/* length */
 	SDT_MEMRWA,		/* segment type */
 	0,			/* segment descriptor priority level */
 	1,			/* segment descriptor present */
 	0, 0,
 	1,			/* default 32 vs 16 bit size */
 	1  			/* limit granularity (byte/page units)*/ },
 /* GBIOSUTIL_SEL 13 BIOS 16-bit interface (Utility) */
 {	0,			/* segment base address (overwritten) */
 	0xfffff,		/* length */
 	SDT_MEMRWA,		/* segment type */
 	0,			/* segment descriptor priority level */
 	1,			/* segment descriptor present */
 	0, 0,
 	0,			/* default 32 vs 16 bit size */
 	1  			/* limit granularity (byte/page units)*/ },
 /* GBIOSARGS_SEL 14 BIOS 16-bit interface (Arguments) */
 {	0,			/* segment base address (overwritten) */
 	0xfffff,		/* length */
 	SDT_MEMRWA,		/* segment type */
 	0,			/* segment descriptor priority level */
 	1,			/* segment descriptor present */
 	0, 0,
 	0,			/* default 32 vs 16 bit size */
 	1  			/* limit granularity (byte/page units)*/ },
 };
 
 static struct soft_segment_descriptor ldt_segs[] = {
 	/* Null Descriptor - overwritten by call gate */
 {	0x0,			/* segment base address  */
 	0x0,			/* length - all address space */
 	0,			/* segment type */
 	0,			/* segment descriptor priority level */
 	0,			/* segment descriptor present */
 	0, 0,
 	0,			/* default 32 vs 16 bit size */
 	0  			/* limit granularity (byte/page units)*/ },
 	/* Null Descriptor - overwritten by call gate */
 {	0x0,			/* segment base address  */
 	0x0,			/* length - all address space */
 	0,			/* segment type */
 	0,			/* segment descriptor priority level */
 	0,			/* segment descriptor present */
 	0, 0,
 	0,			/* default 32 vs 16 bit size */
 	0  			/* limit granularity (byte/page units)*/ },
 	/* Null Descriptor - overwritten by call gate */
 {	0x0,			/* segment base address  */
 	0x0,			/* length - all address space */
 	0,			/* segment type */
 	0,			/* segment descriptor priority level */
 	0,			/* segment descriptor present */
 	0, 0,
 	0,			/* default 32 vs 16 bit size */
 	0  			/* limit granularity (byte/page units)*/ },
 	/* Code Descriptor for user */
 {	0x0,			/* segment base address  */
 	0xfffff,		/* length - all address space */
 	SDT_MEMERA,		/* segment type */
 	SEL_UPL,		/* segment descriptor priority level */
 	1,			/* segment descriptor present */
 	0, 0,
 	1,			/* default 32 vs 16 bit size */
 	1  			/* limit granularity (byte/page units)*/ },
 	/* Null Descriptor - overwritten by call gate */
 {	0x0,			/* segment base address  */
 	0x0,			/* length - all address space */
 	0,			/* segment type */
 	0,			/* segment descriptor priority level */
 	0,			/* segment descriptor present */
 	0, 0,
 	0,			/* default 32 vs 16 bit size */
 	0  			/* limit granularity (byte/page units)*/ },
 	/* Data Descriptor for user */
 {	0x0,			/* segment base address  */
 	0xfffff,		/* length - all address space */
 	SDT_MEMRWA,		/* segment type */
 	SEL_UPL,		/* segment descriptor priority level */
 	1,			/* segment descriptor present */
 	0, 0,
 	1,			/* default 32 vs 16 bit size */
 	1  			/* limit granularity (byte/page units)*/ },
 };
 
 void
 setidt(idx, func, typ, dpl, selec)
 	int idx;
 	inthand_t *func;
 	int typ;
 	int dpl;
 	int selec;
 {
 	struct gate_descriptor *ip;
 
 	ip = idt + idx;
 	ip->gd_looffset = (int)func;
 	ip->gd_selector = selec;
 	ip->gd_stkcpy = 0;
 	ip->gd_xx = 0;
 	ip->gd_type = typ;
 	ip->gd_dpl = dpl;
 	ip->gd_p = 1;
 	ip->gd_hioffset = ((int)func)>>16 ;
 }
 
 #define	IDTVEC(name)	__CONCAT(X,name)
 
 extern inthand_t
 	IDTVEC(div), IDTVEC(dbg), IDTVEC(nmi), IDTVEC(bpt), IDTVEC(ofl),
 	IDTVEC(bnd), IDTVEC(ill), IDTVEC(dna), IDTVEC(fpusegm),
 	IDTVEC(tss), IDTVEC(missing), IDTVEC(stk), IDTVEC(prot),
 	IDTVEC(page), IDTVEC(mchk), IDTVEC(rsvd), IDTVEC(fpu), IDTVEC(align),
 	IDTVEC(xmm), IDTVEC(lcall_syscall), IDTVEC(int0x80_syscall);
 
 void
 sdtossd(sd, ssd)
 	struct segment_descriptor *sd;
 	struct soft_segment_descriptor *ssd;
 {
 	ssd->ssd_base  = (sd->sd_hibase << 24) | sd->sd_lobase;
 	ssd->ssd_limit = (sd->sd_hilimit << 16) | sd->sd_lolimit;
 	ssd->ssd_type  = sd->sd_type;
 	ssd->ssd_dpl   = sd->sd_dpl;
 	ssd->ssd_p     = sd->sd_p;
 	ssd->ssd_def32 = sd->sd_def32;
 	ssd->ssd_gran  = sd->sd_gran;
 }
 
 #define PHYSMAP_SIZE	(2 * 8)
 
 /*
  * Populate the (physmap) array with base/bound pairs describing the
  * available physical memory in the system, then test this memory and
  * build the phys_avail array describing the actually-available memory.
  *
  * If we cannot accurately determine the physical memory map, then use
  * value from the 0xE801 call, and failing that, the RTC.
  *
  * Total memory size may be set by the kernel environment variable
  * hw.physmem or the compile-time define MAXMEM.
  *
  * XXX first should be vm_paddr_t.
  */
 static void
 getmemsize(int first)
 {
 	int i, physmap_idx, pa_indx;
 	int hasbrokenint12;
 	u_int basemem, extmem;
 	struct vm86frame vmf;
 	struct vm86context vmc;
 	vm_paddr_t pa, physmap[PHYSMAP_SIZE];
 	pt_entry_t *pte;
 	char *cp;
 	struct bios_smap *smap;
 
 	hasbrokenint12 = 0;
 	TUNABLE_INT_FETCH("hw.hasbrokenint12", &hasbrokenint12);
 	bzero(&vmf, sizeof(struct vm86frame));
 	bzero(physmap, sizeof(physmap));
 	basemem = 0;
 
 	/*
 	 * Some newer BIOSes has broken INT 12H implementation which cause
 	 * kernel panic immediately. In this case, we need to scan SMAP
 	 * with INT 15:E820 first, then determine base memory size.
 	 */
 	if (hasbrokenint12) {
 		goto int15e820;
 	}
 
 	/*
 	 * Perform "base memory" related probes & setup
 	 */
 	vm86_intcall(0x12, &vmf);
 	basemem = vmf.vmf_ax;
 	if (basemem > 640) {
 		printf("Preposterous BIOS basemem of %uK, truncating to 640K\n",
 			basemem);
 		basemem = 640;
 	}
 
 	/*
 	 * XXX if biosbasemem is now < 640, there is a `hole'
 	 * between the end of base memory and the start of
 	 * ISA memory.  The hole may be empty or it may
 	 * contain BIOS code or data.  Map it read/write so
 	 * that the BIOS can write to it.  (Memory from 0 to
 	 * the physical end of the kernel is mapped read-only
 	 * to begin with and then parts of it are remapped.
 	 * The parts that aren't remapped form holes that
 	 * remain read-only and are unused by the kernel.
 	 * The base memory area is below the physical end of
 	 * the kernel and right now forms a read-only hole.
 	 * The part of it from PAGE_SIZE to
 	 * (trunc_page(biosbasemem * 1024) - 1) will be
 	 * remapped and used by the kernel later.)
 	 *
 	 * This code is similar to the code used in
 	 * pmap_mapdev, but since no memory needs to be
 	 * allocated we simply change the mapping.
 	 */
 	for (pa = trunc_page(basemem * 1024);
 	     pa < ISA_HOLE_START; pa += PAGE_SIZE)
 		pmap_kenter(KERNBASE + pa, pa);
 
 	/*
 	 * if basemem != 640, map pages r/w into vm86 page table so 
 	 * that the bios can scribble on it.
 	 */
 	pte = (pt_entry_t *)vm86paddr;
 	for (i = basemem / 4; i < 160; i++)
 		pte[i] = (i << PAGE_SHIFT) | PG_V | PG_RW | PG_U;
 
 int15e820:
 	/*
 	 * map page 1 R/W into the kernel page table so we can use it
 	 * as a buffer.  The kernel will unmap this page later.
 	 */
 	pmap_kenter(KERNBASE + (1 << PAGE_SHIFT), 1 << PAGE_SHIFT);
 
 	/*
 	 * get memory map with INT 15:E820
 	 */
 	vmc.npages = 0;
 	smap = (void *)vm86_addpage(&vmc, 1, KERNBASE + (1 << PAGE_SHIFT));
 	vm86_getptr(&vmc, (vm_offset_t)smap, &vmf.vmf_es, &vmf.vmf_di);
 
 	physmap_idx = 0;
 	vmf.vmf_ebx = 0;
 	do {
 		vmf.vmf_eax = 0xE820;
 		vmf.vmf_edx = SMAP_SIG;
 		vmf.vmf_ecx = sizeof(struct bios_smap);
 		i = vm86_datacall(0x15, &vmf, &vmc);
 		if (i || vmf.vmf_eax != SMAP_SIG)
 			break;
 		if (boothowto & RB_VERBOSE)
 			printf("SMAP type=%02x base=%016llx len=%016llx\n",
 			    smap->type, smap->base, smap->length);
 
 		if (smap->type != 0x01)
 			goto next_run;
 
 		if (smap->length == 0)
 			goto next_run;
 
+#ifndef PAE
 		if (smap->base >= 0xffffffff) {
 			printf("%uK of memory above 4GB ignored\n",
 			    (u_int)(smap->length / 1024));
 			goto next_run;
 		}
+#endif
 
 		for (i = 0; i <= physmap_idx; i += 2) {
 			if (smap->base < physmap[i + 1]) {
 				if (boothowto & RB_VERBOSE)
 					printf(
 	"Overlapping or non-montonic memory region, ignoring second region\n");
 				goto next_run;
 			}
 		}
 
 		if (smap->base == physmap[physmap_idx + 1]) {
 			physmap[physmap_idx + 1] += smap->length;
 			goto next_run;
 		}
 
 		physmap_idx += 2;
 		if (physmap_idx == PHYSMAP_SIZE) {
 			printf(
 		"Too many segments in the physical address map, giving up\n");
 			break;
 		}
 		physmap[physmap_idx] = smap->base;
 		physmap[physmap_idx + 1] = smap->base + smap->length;
 next_run: ;
 	} while (vmf.vmf_ebx != 0);
 
 	/*
 	 * Perform "base memory" related probes & setup based on SMAP
 	 */
 	if (basemem == 0) {
 		for (i = 0; i <= physmap_idx; i += 2) {
 			if (physmap[i] == 0x00000000) {
 				basemem = physmap[i + 1] / 1024;
 				break;
 			}
 		}
 
 		if (basemem == 0) {
 			basemem = 640;
 		}
 
 		if (basemem > 640) {
 			printf("Preposterous BIOS basemem of %uK, truncating to 640K\n",
 				basemem);
 			basemem = 640;
 		}
 
 		for (pa = trunc_page(basemem * 1024);
 		     pa < ISA_HOLE_START; pa += PAGE_SIZE)
 			pmap_kenter(KERNBASE + pa, pa);
 
 		pte = (pt_entry_t *)vm86paddr;
 		for (i = basemem / 4; i < 160; i++)
 			pte[i] = (i << PAGE_SHIFT) | PG_V | PG_RW | PG_U;
 	}
 
 	if (physmap[1] != 0)
 		goto physmap_done;
 
 	/*
 	 * If we failed above, try memory map with INT 15:E801
 	 */
 	vmf.vmf_ax = 0xE801;
 	if (vm86_intcall(0x15, &vmf) == 0) {
 		extmem = vmf.vmf_cx + vmf.vmf_dx * 64;
 	} else {
 #if 0
 		vmf.vmf_ah = 0x88;
 		vm86_intcall(0x15, &vmf);
 		extmem = vmf.vmf_ax;
 #else
 		/*
 		 * Prefer the RTC value for extended memory.
 		 */
 		extmem = rtcin(RTC_EXTLO) + (rtcin(RTC_EXTHI) << 8);
 #endif
 	}
 
 	/*
 	 * Special hack for chipsets that still remap the 384k hole when
 	 * there's 16MB of memory - this really confuses people that
 	 * are trying to use bus mastering ISA controllers with the
 	 * "16MB limit"; they only have 16MB, but the remapping puts
 	 * them beyond the limit.
 	 *
 	 * If extended memory is between 15-16MB (16-17MB phys address range),
 	 *	chop it to 15MB.
 	 */
 	if ((extmem > 15 * 1024) && (extmem < 16 * 1024))
 		extmem = 15 * 1024;
 
 	physmap[0] = 0;
 	physmap[1] = basemem * 1024;
 	physmap_idx = 2;
 	physmap[physmap_idx] = 0x100000;
 	physmap[physmap_idx + 1] = physmap[physmap_idx] + extmem * 1024;
 
 physmap_done:
 	/*
 	 * Now, physmap contains a map of physical memory.
 	 */
 
 #ifdef SMP
 	/* make hole for AP bootstrap code */
 	physmap[1] = mp_bootaddress(physmap[1] / 1024);
 
 	/* look for the MP hardware - needed for apic addresses */
 	i386_mp_probe();
 #endif
 
 	/*
 	 * Maxmem isn't the "maximum memory", it's one larger than the
 	 * highest page of the physical address space.  It should be
 	 * called something like "Maxphyspage".  We may adjust this 
 	 * based on ``hw.physmem'' and the results of the memory test.
 	 */
 	Maxmem = atop(physmap[physmap_idx + 1]);
 
 #ifdef MAXMEM
 	Maxmem = MAXMEM / 4;
 #endif
 
 	/*
 	 * hw.physmem is a size in bytes; we also allow k, m, and g suffixes
 	 * for the appropriate modifiers.  This overrides MAXMEM.
 	 */
 	if ((cp = getenv("hw.physmem")) != NULL) {
 		u_int64_t AllowMem, sanity;
 		char *ep;
 
 		sanity = AllowMem = strtouq(cp, &ep, 0);
 		if ((ep != cp) && (*ep != 0)) {
 			switch(*ep) {
 			case 'g':
 			case 'G':
 				AllowMem <<= 10;
 			case 'm':
 			case 'M':
 				AllowMem <<= 10;
 			case 'k':
 			case 'K':
 				AllowMem <<= 10;
 				break;
 			default:
 				AllowMem = sanity = 0;
 			}
 			if (AllowMem < sanity)
 				AllowMem = 0;
 		}
 		if (AllowMem == 0)
 			printf("Ignoring invalid memory size of '%s'\n", cp);
 		else
 			Maxmem = atop(AllowMem);
 		freeenv(cp);
 	}
 
 	if (atop(physmap[physmap_idx + 1]) != Maxmem &&
 	    (boothowto & RB_VERBOSE))
 		printf("Physical memory use set to %ldK\n", Maxmem * 4);
 
 	/*
 	 * If Maxmem has been increased beyond what the system has detected,
 	 * extend the last memory segment to the new limit.
 	 */ 
 	if (atop(physmap[physmap_idx + 1]) < Maxmem)
 		physmap[physmap_idx + 1] = ptoa((vm_paddr_t)Maxmem);
 
 	/* call pmap initialization to make new kernel address space */
 	pmap_bootstrap(first, 0);
 
 	/*
 	 * Size up each available chunk of physical memory.
 	 */
 	physmap[0] = PAGE_SIZE;		/* mask off page 0 */
 	pa_indx = 0;
 	phys_avail[pa_indx++] = physmap[0];
 	phys_avail[pa_indx] = physmap[0];
 	pte = CMAP1;
 
 	/*
 	 * physmap is in bytes, so when converting to page boundaries,
 	 * round up the start address and round down the end address.
 	 */
 	for (i = 0; i <= physmap_idx; i += 2) {
 		vm_paddr_t end;
 
 		end = ptoa((vm_paddr_t)Maxmem);
 		if (physmap[i + 1] < end)
 			end = trunc_page(physmap[i + 1]);
 		for (pa = round_page(physmap[i]); pa < end; pa += PAGE_SIZE) {
 			int tmp, page_bad;
 			int *ptr = (int *)CADDR1;
 
 			/*
 			 * block out kernel memory as not available.
 			 */
 			if (pa >= 0x100000 && pa < first)
 				continue;
 	
 			page_bad = FALSE;
 
 			/*
 			 * map page into kernel: valid, read/write,non-cacheable
 			 */
 			*pte = pa | PG_V | PG_RW | PG_N;
 			invltlb();
 
 			tmp = *(int *)ptr;
 			/*
 			 * Test for alternating 1's and 0's
 			 */
 			*(volatile int *)ptr = 0xaaaaaaaa;
 			if (*(volatile int *)ptr != 0xaaaaaaaa) {
 				page_bad = TRUE;
 			}
 			/*
 			 * Test for alternating 0's and 1's
 			 */
 			*(volatile int *)ptr = 0x55555555;
 			if (*(volatile int *)ptr != 0x55555555) {
 			page_bad = TRUE;
 			}
 			/*
 			 * Test for all 1's
 			 */
 			*(volatile int *)ptr = 0xffffffff;
 			if (*(volatile int *)ptr != 0xffffffff) {
 				page_bad = TRUE;
 			}
 			/*
 			 * Test for all 0's
 			 */
 			*(volatile int *)ptr = 0x0;
 			if (*(volatile int *)ptr != 0x0) {
 				page_bad = TRUE;
 			}
 			/*
 			 * Restore original value.
 			 */
 			*(int *)ptr = tmp;
 
 			/*
 			 * Adjust array of valid/good pages.
 			 */
 			if (page_bad == TRUE) {
 				continue;
 			}
 			/*
 			 * If this good page is a continuation of the
 			 * previous set of good pages, then just increase
 			 * the end pointer. Otherwise start a new chunk.
 			 * Note that "end" points one higher than end,
 			 * making the range >= start and < end.
 			 * If we're also doing a speculative memory
 			 * test and we at or past the end, bump up Maxmem
 			 * so that we keep going. The first bad page
 			 * will terminate the loop.
 			 */
 			if (phys_avail[pa_indx] == pa) {
 				phys_avail[pa_indx] += PAGE_SIZE;
 			} else {
 				pa_indx++;
 				if (pa_indx == PHYS_AVAIL_ARRAY_END) {
 					printf(
 		"Too many holes in the physical address space, giving up\n");
 					pa_indx--;
 					break;
 				}
 				phys_avail[pa_indx++] = pa;	/* start */
 				phys_avail[pa_indx] = pa + PAGE_SIZE;	/* end */
 			}
 			physmem++;
 		}
 	}
 	*pte = 0;
 	invltlb();
 
 	/*
 	 * XXX
 	 * The last chunk must contain at least one page plus the message
 	 * buffer to avoid complicating other code (message buffer address
 	 * calculation, etc.).
 	 */
 	while (phys_avail[pa_indx - 1] + PAGE_SIZE +
 	    round_page(MSGBUF_SIZE) >= phys_avail[pa_indx]) {
 		physmem -= atop(phys_avail[pa_indx] - phys_avail[pa_indx - 1]);
 		phys_avail[pa_indx--] = 0;
 		phys_avail[pa_indx--] = 0;
 	}
 
 	Maxmem = atop(phys_avail[pa_indx]);
 
 	/* Trim off space for the message buffer. */
 	phys_avail[pa_indx] -= round_page(MSGBUF_SIZE);
 
 	avail_end = phys_avail[pa_indx];
 }
 
 void
 init386(first)
 	int first;
 {
 	struct gate_descriptor *gdp;
 	int gsel_tss, metadata_missing, off, x;
 #ifndef SMP
 	/* table descriptors - used to load tables by microp */
 	struct region_descriptor r_gdt, r_idt;
 #endif
 	struct pcpu *pc;
 
 	proc0.p_uarea = proc0uarea;
 	thread0.td_kstack = proc0kstack;
 	thread0.td_pcb = (struct pcb *)
 	   (thread0.td_kstack + KSTACK_PAGES * PAGE_SIZE) - 1;
 	atdevbase = ISA_HOLE_START + KERNBASE;
 
 	/*
  	 * This may be done better later if it gets more high level
  	 * components in it. If so just link td->td_proc here.
 	 */
 	proc_linkup(&proc0, &ksegrp0, &kse0, &thread0);
 
 	metadata_missing = 0;
 	if (bootinfo.bi_modulep) {
 		preload_metadata = (caddr_t)bootinfo.bi_modulep + KERNBASE;
 		preload_bootstrap_relocate(KERNBASE);
 	} else {
 		metadata_missing = 1;
 	}
 	if (envmode == 1)
 		kern_envp = static_env;
 	else if (bootinfo.bi_envp)
 		kern_envp = (caddr_t)bootinfo.bi_envp + KERNBASE;
 
 	/* Init basic tunables, hz etc */
 	init_param1();
 
 	/*
 	 * make gdt memory segments, the code segment goes up to end of the
 	 * page with etext in it, the data segment goes to the end of
 	 * the address space
 	 */
 	/*
 	 * XXX text protection is temporarily (?) disabled.  The limit was
 	 * i386_btop(round_page(etext)) - 1.
 	 */
 	gdt_segs[GCODE_SEL].ssd_limit = atop(0 - 1);
 	gdt_segs[GDATA_SEL].ssd_limit = atop(0 - 1);
 #ifdef SMP
 	pc = &SMP_prvspace[0].pcpu;
 	gdt_segs[GPRIV_SEL].ssd_limit =
 		atop(sizeof(struct privatespace) - 1);
 #else
 	pc = &__pcpu;
 	gdt_segs[GPRIV_SEL].ssd_limit =
 		atop(sizeof(struct pcpu) - 1);
 #endif
 	gdt_segs[GPRIV_SEL].ssd_base = (int) pc;
 	gdt_segs[GPROC0_SEL].ssd_base = (int) &pc->pc_common_tss;
 
 	for (x = 0; x < NGDT; x++)
 		ssdtosd(&gdt_segs[x], &gdt[x].sd);
 
 	r_gdt.rd_limit = NGDT * sizeof(gdt[0]) - 1;
 	r_gdt.rd_base =  (int) gdt;
 	lgdt(&r_gdt);
 
 	pcpu_init(pc, 0, sizeof(struct pcpu));
 	PCPU_SET(prvspace, pc);
 	PCPU_SET(curthread, &thread0);
 
 	/*
 	 * Initialize mutexes.
 	 *
 	 * icu_lock: in order to allow an interrupt to occur in a critical
 	 * 	     section, to set pcpu->ipending (etc...) properly, we
 	 *	     must be able to get the icu lock, so it can't be
 	 *	     under witness.
 	 */
 	mutex_init();
 	mtx_init(&clock_lock, "clk", NULL, MTX_SPIN | MTX_RECURSE);
 	mtx_init(&icu_lock, "icu", NULL, MTX_SPIN | MTX_NOWITNESS);
 
 	/* make ldt memory segments */
 	/*
 	 * XXX - VM_MAXUSER_ADDRESS is an end address, not a max.  And it
 	 * should be spelled ...MAX_USER...
 	 */
 	ldt_segs[LUCODE_SEL].ssd_limit = atop(VM_MAXUSER_ADDRESS - 1);
 	ldt_segs[LUDATA_SEL].ssd_limit = atop(VM_MAXUSER_ADDRESS - 1);
 	for (x = 0; x < sizeof ldt_segs / sizeof ldt_segs[0]; x++)
 		ssdtosd(&ldt_segs[x], &ldt[x].sd);
 
 	_default_ldt = GSEL(GLDT_SEL, SEL_KPL);
 	lldt(_default_ldt);
 	PCPU_SET(currentldt, _default_ldt);
 
 	/* exceptions */
 	for (x = 0; x < NIDT; x++)
 		setidt(x, &IDTVEC(rsvd), SDT_SYS386TGT, SEL_KPL,
 		    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(0, &IDTVEC(div),  SDT_SYS386TGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(1, &IDTVEC(dbg),  SDT_SYS386IGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(2, &IDTVEC(nmi),  SDT_SYS386TGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
  	setidt(3, &IDTVEC(bpt),  SDT_SYS386IGT, SEL_UPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(4, &IDTVEC(ofl),  SDT_SYS386TGT, SEL_UPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(5, &IDTVEC(bnd),  SDT_SYS386TGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(6, &IDTVEC(ill),  SDT_SYS386TGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(7, &IDTVEC(dna),  SDT_SYS386TGT, SEL_KPL
 	    , GSEL(GCODE_SEL, SEL_KPL));
 	setidt(8, 0,  SDT_SYSTASKGT, SEL_KPL, GSEL(GPANIC_SEL, SEL_KPL));
 	setidt(9, &IDTVEC(fpusegm),  SDT_SYS386TGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(10, &IDTVEC(tss),  SDT_SYS386TGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(11, &IDTVEC(missing),  SDT_SYS386TGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(12, &IDTVEC(stk),  SDT_SYS386TGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(13, &IDTVEC(prot),  SDT_SYS386TGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(14, &IDTVEC(page),  SDT_SYS386IGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(15, &IDTVEC(rsvd),  SDT_SYS386TGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(16, &IDTVEC(fpu),  SDT_SYS386TGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(17, &IDTVEC(align), SDT_SYS386TGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(18, &IDTVEC(mchk),  SDT_SYS386TGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(19, &IDTVEC(xmm), SDT_SYS386TGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
  	setidt(0x80, &IDTVEC(int0x80_syscall), SDT_SYS386TGT, SEL_UPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 
 	r_idt.rd_limit = sizeof(idt0) - 1;
 	r_idt.rd_base = (int) idt;
 	lidt(&r_idt);
 
 	/*
 	 * Initialize the console before we print anything out.
 	 */
 	cninit();
 
 	if (metadata_missing)
 		printf("WARNING: loader(8) metadata is missing!\n");
 
 #ifdef DEV_ISA
 	isa_defaultirq();
 #endif
 
 #ifdef DDB
 	kdb_init();
 	if (boothowto & RB_KDB)
 		Debugger("Boot flags requested debugger");
 #endif
 
 	finishidentcpu();	/* Final stage of CPU initialization */
 	setidt(6, &IDTVEC(ill),  SDT_SYS386TGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(13, &IDTVEC(prot),  SDT_SYS386TGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	initializecpu();	/* Initialize CPU registers */
 
 	/* make an initial tss so cpu can get interrupt stack on syscall! */
 	/* Note: -16 is so we can grow the trapframe if we came from vm86 */
 	PCPU_SET(common_tss.tss_esp0, thread0.td_kstack +
 	    KSTACK_PAGES * PAGE_SIZE - sizeof(struct pcb) - 16);
 	PCPU_SET(common_tss.tss_ss0, GSEL(GDATA_SEL, SEL_KPL));
 	gsel_tss = GSEL(GPROC0_SEL, SEL_KPL);
 	private_tss = 0;
 	PCPU_SET(tss_gdt, &gdt[GPROC0_SEL].sd);
 	PCPU_SET(common_tssd, *PCPU_GET(tss_gdt));
 	PCPU_SET(common_tss.tss_ioopt, (sizeof (struct i386tss)) << 16);
 	ltr(gsel_tss);
 
 	dblfault_tss.tss_esp = dblfault_tss.tss_esp0 = dblfault_tss.tss_esp1 =
 	    dblfault_tss.tss_esp2 = (int)&dblfault_stack[sizeof(dblfault_stack)];
 	dblfault_tss.tss_ss = dblfault_tss.tss_ss0 = dblfault_tss.tss_ss1 =
 	    dblfault_tss.tss_ss2 = GSEL(GDATA_SEL, SEL_KPL);
+#ifdef PAE
+	dblfault_tss.tss_cr3 = (int)IdlePDPT;
+#else
 	dblfault_tss.tss_cr3 = (int)IdlePTD;
+#endif
 	dblfault_tss.tss_eip = (int)dblfault_handler;
 	dblfault_tss.tss_eflags = PSL_KERNEL;
 	dblfault_tss.tss_ds = dblfault_tss.tss_es =
 	    dblfault_tss.tss_gs = GSEL(GDATA_SEL, SEL_KPL);
 	dblfault_tss.tss_fs = GSEL(GPRIV_SEL, SEL_KPL);
 	dblfault_tss.tss_cs = GSEL(GCODE_SEL, SEL_KPL);
 	dblfault_tss.tss_ldt = GSEL(GLDT_SEL, SEL_KPL);
 
 	vm86_initialize();
 	getmemsize(first);
 	init_param2(physmem);
 
 	/* now running on new page tables, configured,and u/iom is accessible */
 
 	/* Map the message buffer. */
 	for (off = 0; off < round_page(MSGBUF_SIZE); off += PAGE_SIZE)
 		pmap_kenter((vm_offset_t)msgbufp + off, avail_end + off);
 
 	msgbufinit(msgbufp, MSGBUF_SIZE);
 
 	/* make a call gate to reenter kernel with */
 	gdp = &ldt[LSYS5CALLS_SEL].gd;
 
 	x = (int) &IDTVEC(lcall_syscall);
 	gdp->gd_looffset = x;
 	gdp->gd_selector = GSEL(GCODE_SEL,SEL_KPL);
 	gdp->gd_stkcpy = 1;
 	gdp->gd_type = SDT_SYS386CGT;
 	gdp->gd_dpl = SEL_UPL;
 	gdp->gd_p = 1;
 	gdp->gd_hioffset = x >> 16;
 
 	/* XXX does this work? */
 	ldt[LBSDICALLS_SEL] = ldt[LSYS5CALLS_SEL];
 	ldt[LSOL26CALLS_SEL] = ldt[LSYS5CALLS_SEL];
 
 	/* transfer to user mode */
 
 	_ucodesel = LSEL(LUCODE_SEL, SEL_UPL);
 	_udatasel = LSEL(LUDATA_SEL, SEL_UPL);
 
 	/* setup proc 0's pcb */
 	thread0.td_pcb->pcb_flags = 0; /* XXXKSE */
+#ifdef PAE
+	thread0.td_pcb->pcb_cr3 = (int)IdlePDPT;
+#else
 	thread0.td_pcb->pcb_cr3 = (int)IdlePTD;
+#endif
 	thread0.td_pcb->pcb_ext = 0;
 	thread0.td_frame = &proc0_tf;
 }
 
 void
 cpu_pcpu_init(struct pcpu *pcpu, int cpuid, size_t size)
 {
 }
 
 #if defined(I586_CPU) && !defined(NO_F00F_HACK)
 static void f00f_hack(void *unused);
 SYSINIT(f00f_hack, SI_SUB_INTRINSIC, SI_ORDER_FIRST, f00f_hack, NULL);
 
 static void
 f00f_hack(void *unused) {
 	struct gate_descriptor *new_idt;
 #ifndef SMP
 	struct region_descriptor r_idt;
 #endif
 	vm_offset_t tmp;
 
 	if (!has_f00f_bug)
 		return;
 
 	GIANT_REQUIRED;
 
 	printf("Intel Pentium detected, installing workaround for F00F bug\n");
 
 	r_idt.rd_limit = sizeof(idt0) - 1;
 
 	tmp = kmem_alloc(kernel_map, PAGE_SIZE * 2);
 	if (tmp == 0)
 		panic("kmem_alloc returned 0");
 	if (((unsigned int)tmp & (PAGE_SIZE-1)) != 0)
 		panic("kmem_alloc returned non-page-aligned memory");
 	/* Put the first seven entries in the lower page */
 	new_idt = (struct gate_descriptor*)(tmp + PAGE_SIZE - (7*8));
 	bcopy(idt, new_idt, sizeof(idt0));
 	r_idt.rd_base = (int)new_idt;
 	lidt(&r_idt);
 	idt = new_idt;
 	if (vm_map_protect(kernel_map, tmp, tmp + PAGE_SIZE,
 			   VM_PROT_READ, FALSE) != KERN_SUCCESS)
 		panic("vm_map_protect failed");
 	return;
 }
 #endif /* defined(I586_CPU) && !NO_F00F_HACK */
 
 int
 ptrace_set_pc(struct thread *td, unsigned long addr)
 {
 	td->td_frame->tf_eip = addr;
 	return (0);
 }
 
 int
 ptrace_single_step(struct thread *td)
 {
 	td->td_frame->tf_eflags |= PSL_T;
 	return (0);
 }
 
 int
 fill_regs(struct thread *td, struct reg *regs)
 {
 	struct pcb *pcb;
 	struct trapframe *tp;
 
 	tp = td->td_frame;
 	regs->r_fs = tp->tf_fs;
 	regs->r_es = tp->tf_es;
 	regs->r_ds = tp->tf_ds;
 	regs->r_edi = tp->tf_edi;
 	regs->r_esi = tp->tf_esi;
 	regs->r_ebp = tp->tf_ebp;
 	regs->r_ebx = tp->tf_ebx;
 	regs->r_edx = tp->tf_edx;
 	regs->r_ecx = tp->tf_ecx;
 	regs->r_eax = tp->tf_eax;
 	regs->r_eip = tp->tf_eip;
 	regs->r_cs = tp->tf_cs;
 	regs->r_eflags = tp->tf_eflags;
 	regs->r_esp = tp->tf_esp;
 	regs->r_ss = tp->tf_ss;
 	pcb = td->td_pcb;
 	regs->r_gs = pcb->pcb_gs;
 	return (0);
 }
 
 int
 set_regs(struct thread *td, struct reg *regs)
 {
 	struct pcb *pcb;
 	struct trapframe *tp;
 
 	tp = td->td_frame;
 	if (!EFL_SECURE(regs->r_eflags, tp->tf_eflags) ||
 	    !CS_SECURE(regs->r_cs))
 		return (EINVAL);
 	tp->tf_fs = regs->r_fs;
 	tp->tf_es = regs->r_es;
 	tp->tf_ds = regs->r_ds;
 	tp->tf_edi = regs->r_edi;
 	tp->tf_esi = regs->r_esi;
 	tp->tf_ebp = regs->r_ebp;
 	tp->tf_ebx = regs->r_ebx;
 	tp->tf_edx = regs->r_edx;
 	tp->tf_ecx = regs->r_ecx;
 	tp->tf_eax = regs->r_eax;
 	tp->tf_eip = regs->r_eip;
 	tp->tf_cs = regs->r_cs;
 	tp->tf_eflags = regs->r_eflags;
 	tp->tf_esp = regs->r_esp;
 	tp->tf_ss = regs->r_ss;
 	pcb = td->td_pcb;
 	pcb->pcb_gs = regs->r_gs;
 	return (0);
 }
 
 #ifdef CPU_ENABLE_SSE
 static void
 fill_fpregs_xmm(sv_xmm, sv_87)
 	struct savexmm *sv_xmm;
 	struct save87 *sv_87;
 {
 	register struct env87 *penv_87 = &sv_87->sv_env;
 	register struct envxmm *penv_xmm = &sv_xmm->sv_env;
 	int i;
 
 	bzero(sv_87, sizeof(*sv_87));
 
 	/* FPU control/status */
 	penv_87->en_cw = penv_xmm->en_cw;
 	penv_87->en_sw = penv_xmm->en_sw;
 	penv_87->en_tw = penv_xmm->en_tw;
 	penv_87->en_fip = penv_xmm->en_fip;
 	penv_87->en_fcs = penv_xmm->en_fcs;
 	penv_87->en_opcode = penv_xmm->en_opcode;
 	penv_87->en_foo = penv_xmm->en_foo;
 	penv_87->en_fos = penv_xmm->en_fos;
 
 	/* FPU registers */
 	for (i = 0; i < 8; ++i)
 		sv_87->sv_ac[i] = sv_xmm->sv_fp[i].fp_acc;
 }
 
 static void
 set_fpregs_xmm(sv_87, sv_xmm)
 	struct save87 *sv_87;
 	struct savexmm *sv_xmm;
 {
 	register struct env87 *penv_87 = &sv_87->sv_env;
 	register struct envxmm *penv_xmm = &sv_xmm->sv_env;
 	int i;
 
 	/* FPU control/status */
 	penv_xmm->en_cw = penv_87->en_cw;
 	penv_xmm->en_sw = penv_87->en_sw;
 	penv_xmm->en_tw = penv_87->en_tw;
 	penv_xmm->en_fip = penv_87->en_fip;
 	penv_xmm->en_fcs = penv_87->en_fcs;
 	penv_xmm->en_opcode = penv_87->en_opcode;
 	penv_xmm->en_foo = penv_87->en_foo;
 	penv_xmm->en_fos = penv_87->en_fos;
 
 	/* FPU registers */
 	for (i = 0; i < 8; ++i)
 		sv_xmm->sv_fp[i].fp_acc = sv_87->sv_ac[i];
 }
 #endif /* CPU_ENABLE_SSE */
 
 int
 fill_fpregs(struct thread *td, struct fpreg *fpregs)
 {
 #ifdef CPU_ENABLE_SSE
 	if (cpu_fxsr) {
 		fill_fpregs_xmm(&td->td_pcb->pcb_save.sv_xmm,
 						(struct save87 *)fpregs);
 		return (0);
 	}
 #endif /* CPU_ENABLE_SSE */
 	bcopy(&td->td_pcb->pcb_save.sv_87, fpregs, sizeof *fpregs);
 	return (0);
 }
 
 int
 set_fpregs(struct thread *td, struct fpreg *fpregs)
 {
 #ifdef CPU_ENABLE_SSE
 	if (cpu_fxsr) {
 		set_fpregs_xmm((struct save87 *)fpregs,
 					   &td->td_pcb->pcb_save.sv_xmm);
 		return (0);
 	}
 #endif /* CPU_ENABLE_SSE */
 	bcopy(fpregs, &td->td_pcb->pcb_save.sv_87, sizeof *fpregs);
 	return (0);
 }
 
 /*
  * Get machine context.
  */
 int
 get_mcontext(struct thread *td, mcontext_t *mcp)
 {
 	struct trapframe *tp;
 
 	tp = td->td_frame;
 
 	mcp->mc_onstack = sigonstack(tp->tf_esp);
 	mcp->mc_gs = td->td_pcb->pcb_gs;
 	mcp->mc_fs = tp->tf_fs;
 	mcp->mc_es = tp->tf_es;
 	mcp->mc_ds = tp->tf_ds;
 	mcp->mc_edi = tp->tf_edi;
 	mcp->mc_esi = tp->tf_esi;
 	mcp->mc_ebp = tp->tf_ebp;
 	mcp->mc_isp = tp->tf_isp;
 	mcp->mc_ebx = tp->tf_ebx;
 	mcp->mc_edx = tp->tf_edx;
 	mcp->mc_ecx = tp->tf_ecx;
 	mcp->mc_eax = tp->tf_eax;
 	mcp->mc_eip = tp->tf_eip;
 	mcp->mc_cs = tp->tf_cs;
 	mcp->mc_eflags = tp->tf_eflags;
 	mcp->mc_esp = tp->tf_esp;
 	mcp->mc_ss = tp->tf_ss;
 	mcp->mc_len = sizeof(*mcp);
 	get_fpcontext(td, mcp);
 	return (0);
 }
 
 /*
  * Set machine context.
  *
  * However, we don't set any but the user modifiable flags, and we won't
  * touch the cs selector.
  */
 int
 set_mcontext(struct thread *td, const mcontext_t *mcp)
 {
 	struct trapframe *tp;
 	int eflags, ret;
 
 	tp = td->td_frame;
 	if (mcp->mc_len != sizeof(*mcp))
 		return (EINVAL);
 	eflags = (mcp->mc_eflags & PSL_USERCHANGE) |
 	    (tp->tf_eflags & ~PSL_USERCHANGE);
 	if ((ret = set_fpcontext(td, mcp)) == 0) {
 		tp->tf_fs = mcp->mc_fs;
 		tp->tf_es = mcp->mc_es;
 		tp->tf_ds = mcp->mc_ds;
 		tp->tf_edi = mcp->mc_edi;
 		tp->tf_esi = mcp->mc_esi;
 		tp->tf_ebp = mcp->mc_ebp;
 		tp->tf_ebx = mcp->mc_ebx;
 		tp->tf_edx = mcp->mc_edx;
 		tp->tf_ecx = mcp->mc_ecx;
 		tp->tf_eax = mcp->mc_eax;
 		tp->tf_eip = mcp->mc_eip;
 		tp->tf_eflags = eflags;
 		tp->tf_esp = mcp->mc_esp;
 		tp->tf_ss = mcp->mc_ss;
 		td->td_pcb->pcb_gs = mcp->mc_gs;
 		ret = 0;
 	}
 	return (ret);
 }
 
 static void
 get_fpcontext(struct thread *td, mcontext_t *mcp)
 {
 #ifndef DEV_NPX
 	mcp->mc_fpformat = _MC_FPFMT_NODEV;
 	mcp->mc_ownedfp = _MC_FPOWNED_NONE;
 #else
 	union savefpu *addr;
 
 	/*
 	 * XXX mc_fpstate might be misaligned, since its declaration is not
 	 * unportabilized using __attribute__((aligned(16))) like the
 	 * declaration of struct savemm, and anyway, alignment doesn't work
 	 * for auto variables since we don't use gcc's pessimal stack
 	 * alignment.  Work around this by abusing the spare fields after
 	 * mcp->mc_fpstate.
 	 *
 	 * XXX unpessimize most cases by only aligning when fxsave might be
 	 * called, although this requires knowing too much about
 	 * npxgetregs()'s internals.
 	 */
 	addr = (union savefpu *)&mcp->mc_fpstate;
 	if (td == PCPU_GET(fpcurthread) &&
 #ifdef CPU_ENABLE_SSE
 	    cpu_fxsr &&
 #endif
 	    ((uintptr_t)(void *)addr & 0xF)) {
 		do
 			addr = (void *)((char *)addr + 4);
 		while ((uintptr_t)(void *)addr & 0xF);
 	}
 	mcp->mc_ownedfp = npxgetregs(td, addr);
 	if (addr != (union savefpu *)&mcp->mc_fpstate) {
 		bcopy(addr, &mcp->mc_fpstate, sizeof(mcp->mc_fpstate));
 		bzero(&mcp->mc_spare2, sizeof(mcp->mc_spare2));
 	}
 	mcp->mc_fpformat = npxformat();
 #endif
 }
 
 static int
 set_fpcontext(struct thread *td, const mcontext_t *mcp)
 {
 	union savefpu *addr;
 
 	if (mcp->mc_fpformat == _MC_FPFMT_NODEV)
 		return (0);
 	else if (mcp->mc_fpformat != _MC_FPFMT_387 &&
 	    mcp->mc_fpformat != _MC_FPFMT_XMM)
 		return (EINVAL);
 	else if (mcp->mc_ownedfp == _MC_FPOWNED_NONE)
 		/* We don't care what state is left in the FPU or PCB. */
 		fpstate_drop(td);
 	else if (mcp->mc_ownedfp == _MC_FPOWNED_FPU ||
 	    mcp->mc_ownedfp == _MC_FPOWNED_PCB) {
 		/* XXX align as above. */
 		addr = (union savefpu *)&mcp->mc_fpstate;
 		if (td == PCPU_GET(fpcurthread) &&
 #ifdef CPU_ENABLE_SSE
 		    cpu_fxsr &&
 #endif
 		    ((uintptr_t)(void *)addr & 0xF)) {
 			do
 				addr = (void *)((char *)addr + 4);
 			while ((uintptr_t)(void *)addr & 0xF);
 			bcopy(&mcp->mc_fpstate, addr, sizeof(mcp->mc_fpstate));
 		}
 #ifdef DEV_NPX
 		/*
 		 * XXX we violate the dubious requirement that npxsetregs()
 		 * be called with interrupts disabled.
 		 */
 		npxsetregs(td, addr);
 #endif
 		/*
 		 * Don't bother putting things back where they were in the
 		 * misaligned case, since we know that the caller won't use
 		 * them again.
 		 */
 	} else
 		return (EINVAL);
 	return (0);
 }
 
 static void
 fpstate_drop(struct thread *td)
 {
 	register_t s;
 
 	s = intr_disable();
 #ifdef DEV_NPX
 	if (PCPU_GET(fpcurthread) == td)
 		npxdrop();
 #endif
 	/*
 	 * XXX force a full drop of the npx.  The above only drops it if we
 	 * owned it.  npxgetregs() has the same bug in the !cpu_fxsr case.
 	 *
 	 * XXX I don't much like npxgetregs()'s semantics of doing a full
 	 * drop.  Dropping only to the pcb matches fnsave's behaviour.
 	 * We only need to drop to !PCB_INITDONE in sendsig().  But
 	 * sendsig() is the only caller of npxgetregs()... perhaps we just
 	 * have too many layers.
 	 */
 	curthread->td_pcb->pcb_flags &= ~PCB_NPXINITDONE;
 	intr_restore(s);
 }
 
 int
 fill_dbregs(struct thread *td, struct dbreg *dbregs)
 {
 	struct pcb *pcb;
 
 	if (td == NULL) {
 		dbregs->dr[0] = rdr0();
 		dbregs->dr[1] = rdr1();
 		dbregs->dr[2] = rdr2();
 		dbregs->dr[3] = rdr3();
 		dbregs->dr[4] = rdr4();
 		dbregs->dr[5] = rdr5();
 		dbregs->dr[6] = rdr6();
 		dbregs->dr[7] = rdr7();
 	} else {
 		pcb = td->td_pcb;
 		dbregs->dr[0] = pcb->pcb_dr0;
 		dbregs->dr[1] = pcb->pcb_dr1;
 		dbregs->dr[2] = pcb->pcb_dr2;
 		dbregs->dr[3] = pcb->pcb_dr3;
 		dbregs->dr[4] = 0;
 		dbregs->dr[5] = 0;
 		dbregs->dr[6] = pcb->pcb_dr6;
 		dbregs->dr[7] = pcb->pcb_dr7;
 	}
 	return (0);
 }
 
 int
 set_dbregs(struct thread *td, struct dbreg *dbregs)
 {
 	struct pcb *pcb;
 	int i;
 	u_int32_t mask1, mask2;
 
 	if (td == NULL) {
 		load_dr0(dbregs->dr[0]);
 		load_dr1(dbregs->dr[1]);
 		load_dr2(dbregs->dr[2]);
 		load_dr3(dbregs->dr[3]);
 		load_dr4(dbregs->dr[4]);
 		load_dr5(dbregs->dr[5]);
 		load_dr6(dbregs->dr[6]);
 		load_dr7(dbregs->dr[7]);
 	} else {
 		/*
 		 * Don't let an illegal value for dr7 get set.	Specifically,
 		 * check for undefined settings.  Setting these bit patterns
 		 * result in undefined behaviour and can lead to an unexpected
 		 * TRCTRAP.
 		 */
 		for (i = 0, mask1 = 0x3<<16, mask2 = 0x2<<16; i < 8; 
 		     i++, mask1 <<= 2, mask2 <<= 2)
 			if ((dbregs->dr[7] & mask1) == mask2)
 				return (EINVAL);
 		
 		pcb = td->td_pcb;
 		
 		/*
 		 * Don't let a process set a breakpoint that is not within the
 		 * process's address space.  If a process could do this, it
 		 * could halt the system by setting a breakpoint in the kernel
 		 * (if ddb was enabled).  Thus, we need to check to make sure
 		 * that no breakpoints are being enabled for addresses outside
 		 * process's address space, unless, perhaps, we were called by
 		 * uid 0.
 		 *
 		 * XXX - what about when the watched area of the user's
 		 * address space is written into from within the kernel
 		 * ... wouldn't that still cause a breakpoint to be generated
 		 * from within kernel mode?
 		 */
 
 		if (suser(td) != 0) {
 			if (dbregs->dr[7] & 0x3) {
 				/* dr0 is enabled */
 				if (dbregs->dr[0] >= VM_MAXUSER_ADDRESS)
 					return (EINVAL);
 			}
 			
 			if (dbregs->dr[7] & (0x3<<2)) {
 				/* dr1 is enabled */
 				if (dbregs->dr[1] >= VM_MAXUSER_ADDRESS)
 					return (EINVAL);
 			}
 			
 			if (dbregs->dr[7] & (0x3<<4)) {
 				/* dr2 is enabled */
 				if (dbregs->dr[2] >= VM_MAXUSER_ADDRESS)
 					return (EINVAL);
 			}
 			
 			if (dbregs->dr[7] & (0x3<<6)) {
 				/* dr3 is enabled */
 				if (dbregs->dr[3] >= VM_MAXUSER_ADDRESS)
 					return (EINVAL);
 			}
 		}
 
 		pcb->pcb_dr0 = dbregs->dr[0];
 		pcb->pcb_dr1 = dbregs->dr[1];
 		pcb->pcb_dr2 = dbregs->dr[2];
 		pcb->pcb_dr3 = dbregs->dr[3];
 		pcb->pcb_dr6 = dbregs->dr[6];
 		pcb->pcb_dr7 = dbregs->dr[7];
 
 		pcb->pcb_flags |= PCB_DBREGS;
 	}
 
 	return (0);
 }
 
 /*
  * Return > 0 if a hardware breakpoint has been hit, and the
  * breakpoint was in user space.  Return 0, otherwise.
  */
 int
 user_dbreg_trap(void)
 {
         u_int32_t dr7, dr6; /* debug registers dr6 and dr7 */
         u_int32_t bp;       /* breakpoint bits extracted from dr6 */
         int nbp;            /* number of breakpoints that triggered */
         caddr_t addr[4];    /* breakpoint addresses */
         int i;
         
         dr7 = rdr7();
         if ((dr7 & 0x000000ff) == 0) {
                 /*
                  * all GE and LE bits in the dr7 register are zero,
                  * thus the trap couldn't have been caused by the
                  * hardware debug registers
                  */
                 return 0;
         }
 
         nbp = 0;
         dr6 = rdr6();
         bp = dr6 & 0x0000000f;
 
         if (!bp) {
                 /*
                  * None of the breakpoint bits are set meaning this
                  * trap was not caused by any of the debug registers
                  */
                 return 0;
         }
 
         /*
          * at least one of the breakpoints were hit, check to see
          * which ones and if any of them are user space addresses
          */
 
         if (bp & 0x01) {
                 addr[nbp++] = (caddr_t)rdr0();
         }
         if (bp & 0x02) {
                 addr[nbp++] = (caddr_t)rdr1();
         }
         if (bp & 0x04) {
                 addr[nbp++] = (caddr_t)rdr2();
         }
         if (bp & 0x08) {
                 addr[nbp++] = (caddr_t)rdr3();
         }
 
         for (i=0; i<nbp; i++) {
                 if (addr[i] <
                     (caddr_t)VM_MAXUSER_ADDRESS) {
                         /*
                          * addr[i] is in user space
                          */
                         return nbp;
                 }
         }
 
         /*
          * None of the breakpoints are in user space.
          */
         return 0;
 }
 
 
 #ifndef DDB
 void
 Debugger(const char *msg)
 {
 	printf("Debugger(\"%s\") called.\n", msg);
 }
 #endif /* no DDB */
 
 #ifdef DDB
 
 /*
  * Provide inb() and outb() as functions.  They are normally only
  * available as macros calling inlined functions, thus cannot be
  * called inside DDB.
  *
  * The actual code is stolen from <machine/cpufunc.h>, and de-inlined.
  */
 
 #undef inb
 #undef outb
 
 /* silence compiler warnings */
 u_char inb(u_int);
 void outb(u_int, u_char);
 
 u_char
 inb(u_int port)
 {
 	u_char	data;
 	/*
 	 * We use %%dx and not %1 here because i/o is done at %dx and not at
 	 * %edx, while gcc generates inferior code (movw instead of movl)
 	 * if we tell it to load (u_short) port.
 	 */
 	__asm __volatile("inb %%dx,%0" : "=a" (data) : "d" (port));
 	return (data);
 }
 
 void
 outb(u_int port, u_char data)
 {
 	u_char	al;
 	/*
 	 * Use an unnecessary assignment to help gcc's register allocator.
 	 * This make a large difference for gcc-1.40 and a tiny difference
 	 * for gcc-2.6.0.  For gcc-1.40, al had to be ``asm("ax")'' for
 	 * best results.  gcc-2.6.0 can't handle this.
 	 */
 	al = data;
 	__asm __volatile("outb %0,%%dx" : : "a" (al), "d" (port));
 }
 
 #endif /* DDB */
Index: head/sys/i386/i386/mpboot.s
===================================================================
--- head/sys/i386/i386/mpboot.s	(revision 112840)
+++ head/sys/i386/i386/mpboot.s	(revision 112841)
@@ -1,272 +1,282 @@
 /*
  * Copyright (c) 1995, Jack F. Vogel
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by Jack F. Vogel
  * 4. The name of the developer may be used to endorse or promote products
  *    derived from this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * mpboot.s:	FreeBSD machine support for the Intel MP Spec
  *		multiprocessor systems.
  *
  * $FreeBSD$
  */
 
 #include <machine/asmacros.h>		/* miscellaneous asm macros */
 #include <machine/apic.h>
 #include <machine/specialreg.h>
 
 #include "assym.s"
 
+#define	R(x)	((x)-KERNBASE)
+
 /*
  * this code MUST be enabled here and in mp_machdep.c
  * it follows the very early stages of AP boot by placing values in CMOS ram.
  * it NORMALLY will never be needed and thus the primitive method for enabling.
  *
 #define CHECK_POINTS
  */
 
 #if defined(CHECK_POINTS) && !defined(PC98)
 
 #define CMOS_REG	(0x70)
 #define CMOS_DATA	(0x71)
 
 #define CHECKPOINT(A,D)		\
 	movb	$(A),%al ;	\
 	outb	%al,$CMOS_REG ;	\
 	movb	$(D),%al ;	\
 	outb	%al,$CMOS_DATA
 
 #else
 
 #define CHECKPOINT(A,D)
 
 #endif /* CHECK_POINTS */
 
 
 /*
  * the APs enter here from their trampoline code (bootMP, below)
  */
 	.p2align 4
 
 NON_GPROF_ENTRY(MPentry)
 	CHECKPOINT(0x36, 3)
 	/* Now enable paging mode */
-	movl	IdlePTD-KERNBASE, %eax
+#ifdef PAE
+	movl	R(IdlePDPT), %eax
+	movl	%eax, %cr3
+	movl	%cr4, %eax
+	orl	$CR4_PAE, %eax
+	movl	%eax, %cr4
+#else
+	movl	R(IdlePTD), %eax
 	movl	%eax,%cr3	
+#endif
 	movl	%cr0,%eax
 	orl	$CR0_PE|CR0_PG,%eax		/* enable paging */
 	movl	%eax,%cr0			/* let the games begin! */
 	movl	bootSTK,%esp			/* boot stack end loc. */
 
 	pushl	$mp_begin			/* jump to high mem */
 	ret
 
 	/*
 	 * Wait for the booting CPU to signal startup
 	 */
 mp_begin:	/* now running relocated at KERNBASE */
 	CHECKPOINT(0x37, 4)
 	call	init_secondary			/* load i386 tables */
 	CHECKPOINT(0x38, 5)
 
 	/*
 	 * If the [BSP] CPU has support for VME, turn it on.
 	 */
 	testl	$CPUID_VME, cpu_feature		/* XXX WRONG! BSP! */
 	jz	1f
 	movl	%cr4, %eax
 	orl	$CR4_VME, %eax
 	movl	%eax, %cr4
 1:
 
 	/* disable the APIC, just to be SURE */
 	movl	lapic+LA_SVR, %eax		/* get spurious vector reg. */
 	andl	$~APIC_SVR_SWEN, %eax		/* clear software enable bit */
 	movl	%eax, lapic+LA_SVR
 
 	/* signal our startup to the BSP */
 	movl	lapic+LA_VER, %eax		/* our version reg contents */
 	movl	%eax, cpu_apic_versions		/* into [ 0 ] */
 	incl	mp_ncpus			/* signal BSP */
 
 	CHECKPOINT(0x39, 6)
 
 	/* Now, let's prepare for some REAL WORK :-)  This doesn't return. */
 	call	ap_init
 
 /*
  * This is the embedded trampoline or bootstrap that is
  * copied into 'real-mode' low memory, it is where the
  * secondary processor "wakes up". When it is executed
  * the processor will eventually jump into the routine
  * MPentry, which resides in normal kernel text above
  * 1Meg.		-jackv
  */
 
 	.data
 	ALIGN_DATA				/* just to be sure */
 
 BOOTMP1:
 
 NON_GPROF_ENTRY(bootMP)
 	.code16		
 	cli
 	CHECKPOINT(0x34, 1)
 	/* First guarantee a 'clean slate' */
 	xorl	%eax, %eax
 	movl	%eax, %ebx
 	movl	%eax, %ecx
  	movl	%eax, %edx
 	movl	%eax, %esi
 	movl	%eax, %edi
 
 	/* set up data segments */
 	mov	%cs, %ax
 	mov	%ax, %ds
 	mov	%ax, %es
 	mov	%ax, %fs
 	mov	%ax, %gs
 	mov	%ax, %ss
 	mov	$(boot_stk-bootMP), %esp
 
 	/* Now load the global descriptor table */
 	lgdt	MP_GDTptr-bootMP
 
 	/* Enable protected mode */
 	movl	%cr0, %eax
 	orl	$CR0_PE, %eax
 	movl	%eax, %cr0 
 
 	/*
 	 * make intrasegment jump to flush the processor pipeline and
 	 * reload CS register
 	 */
 	pushl	$0x18
 	pushl	$(protmode-bootMP)
 	lretl
 
        .code32		
 protmode:
 	CHECKPOINT(0x35, 2)
 
 	/*
 	 * we are NOW running for the first time with %eip
 	 * having the full physical address, BUT we still
 	 * are using a segment descriptor with the origin
 	 * not matching the booting kernel.
 	 *
  	 * SO NOW... for the BIG Jump into kernel's segment
 	 * and physical text above 1 Meg.
 	 */
 	mov	$0x10, %ebx
 	movw	%bx, %ds
 	movw	%bx, %es
 	movw	%bx, %fs
 	movw	%bx, %gs
 	movw	%bx, %ss
 
 	.globl	bigJump
 bigJump:
 	/* this will be modified by mpInstallTramp() */
 	ljmp	$0x08, $0			/* far jmp to MPentry() */
 	
 dead:	hlt /* We should never get here */
 	jmp	dead
 
 /*
  * MP boot strap Global Descriptor Table
  */
 	.p2align 4
 	.globl	MP_GDT
 	.globl	bootCodeSeg
 	.globl	bootDataSeg
 MP_GDT:
 
 nulldesc:		/* offset = 0x0 */
 
 	.word	0x0	
 	.word	0x0	
 	.byte	0x0	
 	.byte	0x0	
 	.byte	0x0	
 	.byte	0x0	
 
 kernelcode:		/* offset = 0x08 */
 
 	.word	0xffff	/* segment limit 0..15 */
 	.word	0x0000	/* segment base 0..15 */
 	.byte	0x0	/* segment base 16..23; set for 0K */
 	.byte	0x9f	/* flags; Type	*/
 	.byte	0xcf	/* flags; Limit	*/
 	.byte	0x0	/* segment base 24..32 */
 
 kerneldata:		/* offset = 0x10 */
 
 	.word	0xffff	/* segment limit 0..15 */
 	.word	0x0000	/* segment base 0..15 */
 	.byte	0x0	/* segment base 16..23; set for 0k */
 	.byte	0x93	/* flags; Type  */
 	.byte	0xcf	/* flags; Limit */
 	.byte	0x0	/* segment base 24..32 */
 
 bootcode:		/* offset = 0x18 */
 
 	.word	0xffff	/* segment limit 0..15 */
 bootCodeSeg:		/* this will be modified by mpInstallTramp() */
 	.word	0x0000	/* segment base 0..15 */
 	.byte	0x00	/* segment base 16...23; set for 0x000xx000 */
 	.byte	0x9e	/* flags; Type  */
 	.byte	0xcf	/* flags; Limit */
 	.byte	0x0	/*segment base 24..32 */
 
 bootdata:		/* offset = 0x20 */
 
 	.word	0xffff	
 bootDataSeg:		/* this will be modified by mpInstallTramp() */
 	.word	0x0000	/* segment base 0..15 */
 	.byte	0x00	/* segment base 16...23; set for 0x000xx000 */
 	.byte	0x92	
 	.byte	0xcf	
 	.byte	0x0		
 
 /*
  * GDT pointer for the lgdt call
  */
 	.globl	mp_gdtbase
 
 MP_GDTptr:	
 mp_gdtlimit:
 	.word	0x0028		
 mp_gdtbase:		/* this will be modified by mpInstallTramp() */
 	.long	0
 
 	.space	0x100	/* space for boot_stk - 1st temporary stack */
 boot_stk:
 
 BOOTMP2:
 	.globl	bootMP_size
 bootMP_size:
 	.long	BOOTMP2 - BOOTMP1
Index: head/sys/i386/i386/pmap.c
===================================================================
--- head/sys/i386/i386/pmap.c	(revision 112840)
+++ head/sys/i386/i386/pmap.c	(revision 112841)
@@ -1,3425 +1,3473 @@
 /*
  * Copyright (c) 1991 Regents of the University of California.
  * All rights reserved.
  * Copyright (c) 1994 John S. Dyson
  * All rights reserved.
  * Copyright (c) 1994 David Greenman
  * All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * the Systems Programming Group of the University of Utah Computer
  * Science Department and William Jolitz of UUNET Technologies Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	from:	@(#)pmap.c	7.7 (Berkeley)	5/12/91
  * $FreeBSD$
  */
 /*-
  * Copyright (c) 2003 Networks Associates Technology, Inc.
  * All rights reserved.
  *
  * This software was developed for the FreeBSD Project by Jake Burkholder,
  * Safeport Network Services, and Network Associates Laboratories, the
  * Security Research Division of Network Associates, Inc. under
  * DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA
  * CHATS research program.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 /*
  *	Manages physical address maps.
  *
  *	In addition to hardware address maps, this
  *	module is called upon to provide software-use-only
  *	maps which may or may not be stored in the same
  *	form as hardware maps.  These pseudo-maps are
  *	used to store intermediate results from copy
  *	operations to and from address spaces.
  *
  *	Since the information managed by this module is
  *	also stored by the logical address mapping module,
  *	this module may throw away valid virtual-to-physical
  *	mappings at almost any time.  However, invalidations
  *	of virtual-to-physical mappings must be done as
  *	requested.
  *
  *	In order to cope with hardware architectures which
  *	make virtual-to-physical map invalidates expensive,
  *	this module may delay invalidate or reduced protection
  *	operations until such time as they are actually
  *	necessary.  This module is given full information as
  *	to which processors are currently using which maps,
  *	and to when physical maps must be made correct.
  */
 
 #include "opt_pmap.h"
 #include "opt_msgbuf.h"
 #include "opt_kstack_pages.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/mman.h>
 #include <sys/msgbuf.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/sx.h>
 #include <sys/user.h>
 #include <sys/vmmeter.h>
 #include <sys/sysctl.h>
 #ifdef SMP
 #include <sys/smp.h>
 #endif
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_page.h>
 #include <vm/vm_map.h>
 #include <vm/vm_object.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_pageout.h>
 #include <vm/vm_pager.h>
 #include <vm/uma.h>
 
 #include <machine/cpu.h>
 #include <machine/cputypes.h>
 #include <machine/md_var.h>
 #include <machine/specialreg.h>
 #if defined(SMP) || defined(APIC_IO)
 #include <machine/smp.h>
 #include <machine/apic.h>
 #include <machine/segments.h>
 #include <machine/tss.h>
 #endif /* SMP || APIC_IO */
 
 #define PMAP_KEEP_PDIRS
 #ifndef PMAP_SHPGPERPROC
 #define PMAP_SHPGPERPROC 200
 #endif
 
 #if defined(DIAGNOSTIC)
 #define PMAP_DIAGNOSTIC
 #endif
 
 #define MINPV 2048
 
 #if !defined(PMAP_DIAGNOSTIC)
 #define PMAP_INLINE __inline
 #else
 #define PMAP_INLINE
 #endif
 
 /*
  * Get PDEs and PTEs for user/kernel address space
  */
 #define	pmap_pde(m, v)	(&((m)->pm_pdir[(vm_offset_t)(v) >> PDRSHIFT]))
 #define pdir_pde(m, v) (m[(vm_offset_t)(v) >> PDRSHIFT])
 
 #define pmap_pde_v(pte)		((*(int *)pte & PG_V) != 0)
 #define pmap_pte_w(pte)		((*(int *)pte & PG_W) != 0)
 #define pmap_pte_m(pte)		((*(int *)pte & PG_M) != 0)
 #define pmap_pte_u(pte)		((*(int *)pte & PG_A) != 0)
 #define pmap_pte_v(pte)		((*(int *)pte & PG_V) != 0)
 
 #define pmap_pte_set_w(pte, v) ((v)?(*(int *)pte |= PG_W):(*(int *)pte &= ~PG_W))
 #define pmap_pte_set_prot(pte, v) ((*(int *)pte &= ~PG_PROT), (*(int *)pte |= (v)))
 
 /*
  * Given a map and a machine independent protection code,
  * convert to a vax protection code.
  */
 #define pte_prot(m, p)	(protection_codes[p])
 static int protection_codes[8];
 
 struct pmap kernel_pmap_store;
 LIST_HEAD(pmaplist, pmap);
 static struct pmaplist allpmaps;
 static struct mtx allpmaps_lock;
 
 vm_paddr_t avail_start;	/* PA of first available physical page */
 vm_paddr_t avail_end;	/* PA of last available physical page */
 vm_offset_t virtual_avail;	/* VA of first avail page (after kernel bss) */
 vm_offset_t virtual_end;	/* VA of last avail page (end of kernel AS) */
 static boolean_t pmap_initialized = FALSE;	/* Has pmap_init completed? */
 static int pgeflag;		/* PG_G or-in */
 static int pseflag;		/* PG_PS or-in */
 
 static int nkpt;
 vm_offset_t kernel_vm_end;
 extern u_int32_t KERNend;
 
+#ifdef PAE
+static uma_zone_t pdptzone;
+#endif
+
 /*
  * Data for the pv entry allocation mechanism
  */
 static uma_zone_t pvzone;
 static struct vm_object pvzone_obj;
 static int pv_entry_count = 0, pv_entry_max = 0, pv_entry_high_water = 0;
 int pmap_pagedaemon_waken;
 
 /*
  * All those kernel PT submaps that BSD is so fond of
  */
 pt_entry_t *CMAP1 = 0;
 static pt_entry_t *CMAP2, *CMAP3, *ptmmap;
 caddr_t CADDR1 = 0, ptvmmap = 0;
 static caddr_t CADDR2, CADDR3;
 static struct mtx CMAPCADDR12_lock;
 static pt_entry_t *msgbufmap;
 struct msgbuf *msgbufp = 0;
 
 /*
  * Crashdump maps.
  */
 static pt_entry_t *pt_crashdumpmap;
 static caddr_t crashdumpmap;
 
 #ifdef SMP
 extern pt_entry_t *SMPpt;
 #endif
 static pt_entry_t *PMAP1 = 0;
 static pt_entry_t *PADDR1 = 0;
 
 static PMAP_INLINE void	free_pv_entry(pv_entry_t pv);
 static pv_entry_t get_pv_entry(void);
 static void	i386_protection_init(void);
 static __inline void	pmap_changebit(vm_page_t m, int bit, boolean_t setem);
 
 static vm_page_t pmap_enter_quick(pmap_t pmap, vm_offset_t va,
 				      vm_page_t m, vm_page_t mpte);
 static int pmap_remove_pte(pmap_t pmap, pt_entry_t *ptq, vm_offset_t sva);
 static void pmap_remove_page(struct pmap *pmap, vm_offset_t va);
 static int pmap_remove_entry(struct pmap *pmap, vm_page_t m,
 					vm_offset_t va);
 static void pmap_insert_entry(pmap_t pmap, vm_offset_t va,
 		vm_page_t mpte, vm_page_t m);
 
 static vm_page_t pmap_allocpte(pmap_t pmap, vm_offset_t va);
 
 static vm_page_t _pmap_allocpte(pmap_t pmap, unsigned ptepindex);
 static vm_page_t pmap_page_lookup(vm_object_t object, vm_pindex_t pindex);
 static int pmap_unuse_pt(pmap_t, vm_offset_t, vm_page_t);
 static vm_offset_t pmap_kmem_choose(vm_offset_t addr);
-static void *pmap_allocf(uma_zone_t zone, int bytes, u_int8_t *flags, int wait);
+static void *pmap_pv_allocf(uma_zone_t zone, int bytes, u_int8_t *flags, int wait);
+#ifdef PAE
+static void *pmap_pdpt_allocf(uma_zone_t zone, int bytes, u_int8_t *flags, int wait);
+#endif
 
 static pd_entry_t pdir4mb;
 
 CTASSERT(1 << PDESHIFT == sizeof(pd_entry_t));
 CTASSERT(1 << PTESHIFT == sizeof(pt_entry_t));
 
 /*
  * Move the kernel virtual free pointer to the next
  * 4MB.  This is used to help improve performance
  * by using a large (4MB) page for much of the kernel
  * (.text, .data, .bss)
  */
 static vm_offset_t
 pmap_kmem_choose(vm_offset_t addr)
 {
 	vm_offset_t newaddr = addr;
 
 #ifdef I686_CPU_not	/* Problem seems to have gone away */
 	/* Deal with un-resolved Pentium4 issues */
 	if (cpu_class == CPUCLASS_686 &&
 	    strcmp(cpu_vendor, "GenuineIntel") == 0 &&
 	    (cpu_id & 0xf00) == 0xf00)
 		return newaddr;
 #endif
 #ifndef DISABLE_PSE
 	if (cpu_feature & CPUID_PSE)
 		newaddr = (addr + (NBPDR - 1)) & ~(NBPDR - 1);
 #endif
 	return newaddr;
 }
 
 /*
  *	Bootstrap the system enough to run with virtual memory.
  *
  *	On the i386 this is called after mapping has already been enabled
  *	and just syncs the pmap module with what has already been done.
  *	[We can't call it easily with mapping off since the kernel is not
  *	mapped with PA == VA, hence we would have to relocate every address
  *	from the linked base (virtual) address "KERNBASE" to the actual
  *	(physical) address starting relative to 0]
  */
 void
 pmap_bootstrap(firstaddr, loadaddr)
 	vm_paddr_t firstaddr;
 	vm_paddr_t loadaddr;
 {
 	vm_offset_t va;
 	pt_entry_t *pte;
 	int i;
 
 	avail_start = firstaddr;
 
 	/*
 	 * XXX The calculation of virtual_avail is wrong. It's NKPT*PAGE_SIZE too
 	 * large. It should instead be correctly calculated in locore.s and
 	 * not based on 'first' (which is a physical address, not a virtual
 	 * address, for the start of unused physical memory). The kernel
 	 * page tables are NOT double mapped and thus should not be included
 	 * in this calculation.
 	 */
 	virtual_avail = (vm_offset_t) KERNBASE + firstaddr;
 	virtual_avail = pmap_kmem_choose(virtual_avail);
 
 	virtual_end = VM_MAX_KERNEL_ADDRESS;
 
 	/*
 	 * Initialize protection array.
 	 */
 	i386_protection_init();
 
 	/*
 	 * Initialize the kernel pmap (which is statically allocated).
 	 */
 	kernel_pmap->pm_pdir = (pd_entry_t *) (KERNBASE + (u_int)IdlePTD);
+#ifdef PAE
+	kernel_pmap->pm_pdpt = (pdpt_entry_t *) (KERNBASE + (u_int)IdlePDPT);
+#endif
 	kernel_pmap->pm_active = -1;	/* don't allow deactivation */
 	TAILQ_INIT(&kernel_pmap->pm_pvlist);
 	LIST_INIT(&allpmaps);
 	mtx_init(&allpmaps_lock, "allpmaps", NULL, MTX_SPIN);
 	mtx_lock_spin(&allpmaps_lock);
 	LIST_INSERT_HEAD(&allpmaps, kernel_pmap, pm_list);
 	mtx_unlock_spin(&allpmaps_lock);
 	nkpt = NKPT;
 
 	/*
 	 * Reserve some special page table entries/VA space for temporary
 	 * mapping of pages.
 	 */
 #define	SYSMAP(c, p, v, n)	\
 	v = (c)va; va += ((n)*PAGE_SIZE); p = pte; pte += (n);
 
 	va = virtual_avail;
 	pte = vtopte(va);
 
 	/*
 	 * CMAP1/CMAP2 are used for zeroing and copying pages.
 	 * CMAP3 is used for the idle process page zeroing.
 	 */
 	SYSMAP(caddr_t, CMAP1, CADDR1, 1)
 	SYSMAP(caddr_t, CMAP2, CADDR2, 1)
 	SYSMAP(caddr_t, CMAP3, CADDR3, 1)
 
 	mtx_init(&CMAPCADDR12_lock, "CMAPCADDR12", NULL, MTX_DEF);
 
 	/*
 	 * Crashdump maps.
 	 */
 	SYSMAP(caddr_t, pt_crashdumpmap, crashdumpmap, MAXDUMPPGS);
 
 	/*
 	 * ptvmmap is used for reading arbitrary physical pages via /dev/mem.
 	 * XXX ptmmap is not used.
 	 */
 	SYSMAP(caddr_t, ptmmap, ptvmmap, 1)
 
 	/*
 	 * msgbufp is used to map the system message buffer.
 	 * XXX msgbufmap is not used.
 	 */
 	SYSMAP(struct msgbuf *, msgbufmap, msgbufp,
 	       atop(round_page(MSGBUF_SIZE)))
 
 	/*
 	 * ptemap is used for pmap_pte_quick
 	 */
 	SYSMAP(pt_entry_t *, PMAP1, PADDR1, 1);
 
 	virtual_avail = va;
 
 	*CMAP1 = *CMAP2 = 0;
 	for (i = 0; i < NKPT; i++)
 		PTD[i] = 0;
 
 	pgeflag = 0;
 #ifndef DISABLE_PG_G
 	if (cpu_feature & CPUID_PGE)
 		pgeflag = PG_G;
 #endif
 #ifdef I686_CPU_not	/* Problem seems to have gone away */
 	/* Deal with un-resolved Pentium4 issues */
 	if (cpu_class == CPUCLASS_686 &&
 	    strcmp(cpu_vendor, "GenuineIntel") == 0 &&
 	    (cpu_id & 0xf00) == 0xf00) {
 		printf("Warning: Pentium 4 cpu: PG_G disabled (global flag)\n");
 		pgeflag = 0;
 	}
 #endif
 	
 /*
  * Initialize the 4MB page size flag
  */
 	pseflag = 0;
 /*
  * The 4MB page version of the initial
  * kernel page mapping.
  */
 	pdir4mb = 0;
 
 #ifndef DISABLE_PSE
 	if (cpu_feature & CPUID_PSE)
 		pseflag = PG_PS;
 #endif
 #ifdef I686_CPU_not	/* Problem seems to have gone away */
 	/* Deal with un-resolved Pentium4 issues */
 	if (cpu_class == CPUCLASS_686 &&
 	    strcmp(cpu_vendor, "GenuineIntel") == 0 &&
 	    (cpu_id & 0xf00) == 0xf00) {
 		printf("Warning: Pentium 4 cpu: PG_PS disabled (4MB pages)\n");
 		pseflag = 0;
 	}
 #endif
 #ifndef DISABLE_PSE
 	if (pseflag) {
 		pd_entry_t ptditmp;
 		/*
 		 * Note that we have enabled PSE mode
 		 */
 		ptditmp = *(PTmap + i386_btop(KERNBASE));
 		ptditmp &= ~(NBPDR - 1);
 		ptditmp |= PG_V | PG_RW | PG_PS | PG_U | pgeflag;
 		pdir4mb = ptditmp;
 	}
 #endif
 #ifndef SMP
 	/*
 	 * Turn on PGE/PSE.  SMP does this later on since the
 	 * 4K page tables are required for AP boot (for now).
 	 * XXX fixme.
 	 */
 	pmap_set_opt();
 #endif
 #ifdef SMP
 	if (cpu_apic_address == 0)
 		panic("pmap_bootstrap: no local apic! (non-SMP hardware?)");
 
 	/* local apic is mapped on last page */
 	SMPpt[NPTEPG - 1] = (pt_entry_t)(PG_V | PG_RW | PG_N | pgeflag |
 	    (cpu_apic_address & PG_FRAME));
 #endif
 	invltlb();
 }
 
 /*
  * Enable 4MB page mode for MP startup.  Turn on PG_G support.
  * BSP will run this after all the AP's have started up.
  */
 void
 pmap_set_opt(void)
 {
 	pt_entry_t *pte;
 	vm_offset_t va, endva;
 
 	if (pgeflag && (cpu_feature & CPUID_PGE)) {
 		load_cr4(rcr4() | CR4_PGE);
 		invltlb();		/* Insurance */
 	}
 #ifndef DISABLE_PSE
 	if (pseflag && (cpu_feature & CPUID_PSE)) {
 		load_cr4(rcr4() | CR4_PSE);
 		invltlb();		/* Insurance */
 	}
 #endif
 	if (PCPU_GET(cpuid) == 0) {
 #ifndef DISABLE_PSE
 		if (pdir4mb) {
 			kernel_pmap->pm_pdir[KPTDI] = PTD[KPTDI] = pdir4mb;
 			invltlb();	/* Insurance */
 		}
 #endif
 		if (pgeflag) {
 			/* Turn on PG_G for text, data, bss pages. */
 			va = (vm_offset_t)btext;
 #ifndef DISABLE_PSE
 			if (pseflag && (cpu_feature & CPUID_PSE)) {
 				if (va < KERNBASE + (1 << PDRSHIFT))
 					va = KERNBASE + (1 << PDRSHIFT);
 			}
 #endif
 			endva = KERNBASE + KERNend;
 			while (va < endva) {
 				pte = vtopte(va);
 				if (*pte)
 					*pte |= pgeflag;
 				va += PAGE_SIZE;
 			}
 			invltlb();	/* Insurance */
 		}
 		/*
 		 * We do not need to broadcast the invltlb here, because
 		 * each AP does it the moment it is released from the boot
 		 * lock.  See ap_init().
 		 */
 	}
 }
 
 static void *
-pmap_allocf(uma_zone_t zone, int bytes, u_int8_t *flags, int wait)
+pmap_pv_allocf(uma_zone_t zone, int bytes, u_int8_t *flags, int wait)
 {
 	*flags = UMA_SLAB_PRIV;
 	return (void *)kmem_alloc(kernel_map, bytes);
 }
 
+#ifdef PAE
+static void *
+pmap_pdpt_allocf(uma_zone_t zone, int bytes, u_int8_t *flags, int wait)
+{
+	*flags = UMA_SLAB_PRIV;
+	return (contigmalloc(PAGE_SIZE, NULL, 0, 0x0ULL, 0xffffffffULL, 1, 0));
+}
+#endif
+
 /*
  *	Initialize the pmap module.
  *	Called by vm_init, to initialize any structures that the pmap
  *	system needs to map virtual memory.
  *	pmap_init has been enhanced to support in a fairly consistant
  *	way, discontiguous physical memory.
  */
 void
 pmap_init(phys_start, phys_end)
 	vm_paddr_t phys_start, phys_end;
 {
 	int i;
 	int initial_pvs;
 
 	/*
 	 * Allocate memory for random pmap data structures.  Includes the
 	 * pv_head_table.
 	 */
 
 	for(i = 0; i < vm_page_array_size; i++) {
 		vm_page_t m;
 
 		m = &vm_page_array[i];
 		TAILQ_INIT(&m->md.pv_list);
 		m->md.pv_list_count = 0;
 	}
 
 	/*
 	 * init the pv free list
 	 */
 	initial_pvs = vm_page_array_size;
 	if (initial_pvs < MINPV)
 		initial_pvs = MINPV;
 	pvzone = uma_zcreate("PV ENTRY", sizeof (struct pv_entry), NULL, NULL, 
 	    NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM);
-	uma_zone_set_allocf(pvzone, pmap_allocf);
+	uma_zone_set_allocf(pvzone, pmap_pv_allocf);
 	uma_prealloc(pvzone, initial_pvs);
 
+#ifdef PAE
+	pdptzone = uma_zcreate("PDPT", NPGPTD * sizeof(pdpt_entry_t), NULL,
+	    NULL, NULL, NULL, (NPGPTD * sizeof(pdpt_entry_t)) - 1, 0);
+	uma_zone_set_allocf(pdptzone, pmap_pdpt_allocf);
+#endif
+
 	/*
 	 * Now it is safe to enable pv_table recording.
 	 */
 	pmap_initialized = TRUE;
 }
 
 /*
  * Initialize the address space (zone) for the pv_entries.  Set a
  * high water mark so that the system can recover from excessive
  * numbers of pv entries.
  */
 void
 pmap_init2()
 {
 	int shpgperproc = PMAP_SHPGPERPROC;
 
 	TUNABLE_INT_FETCH("vm.pmap.shpgperproc", &shpgperproc);
 	pv_entry_max = shpgperproc * maxproc + vm_page_array_size;
 	TUNABLE_INT_FETCH("vm.pmap.pv_entries", &pv_entry_max);
 	pv_entry_high_water = 9 * (pv_entry_max / 10);
 	uma_zone_set_obj(pvzone, &pvzone_obj, pv_entry_max);
 }
 
 
 /***************************************************
  * Low level helper routines.....
  ***************************************************/
 
 #if defined(PMAP_DIAGNOSTIC)
 
 /*
  * This code checks for non-writeable/modified pages.
  * This should be an invalid condition.
  */
 static int
 pmap_nw_modified(pt_entry_t ptea)
 {
 	int pte;
 
 	pte = (int) ptea;
 
 	if ((pte & (PG_M|PG_RW)) == PG_M)
 		return 1;
 	else
 		return 0;
 }
 #endif
 
 
 /*
  * this routine defines the region(s) of memory that should
  * not be tested for the modified bit.
  */
 static PMAP_INLINE int
 pmap_track_modified(vm_offset_t va)
 {
 	if ((va < kmi.clean_sva) || (va >= kmi.clean_eva)) 
 		return 1;
 	else
 		return 0;
 }
 
 #ifdef I386_CPU
 /*
  * i386 only has "invalidate everything" and no SMP to worry about.
  */
 PMAP_INLINE void
 pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
 {
 
 	if (pmap == kernel_pmap || pmap->pm_active)
 		invltlb();
 }
 
 PMAP_INLINE void
 pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
 {
 
 	if (pmap == kernel_pmap || pmap->pm_active)
 		invltlb();
 }
 
 PMAP_INLINE void
 pmap_invalidate_all(pmap_t pmap)
 {
 
 	if (pmap == kernel_pmap || pmap->pm_active)
 		invltlb();
 }
 #else /* !I386_CPU */
 #ifdef SMP
 /*
  * For SMP, these functions have to use the IPI mechanism for coherence.
  */
 void
 pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
 {
 	u_int cpumask;
 	u_int other_cpus;
 
 	critical_enter();
 	/*
 	 * We need to disable interrupt preemption but MUST NOT have
 	 * interrupts disabled here.
 	 * XXX we may need to hold schedlock to get a coherent pm_active
 	 */
 	if (pmap->pm_active == -1 || pmap->pm_active == all_cpus) {
 		invlpg(va);
 		smp_invlpg(va);
 	} else {
 		cpumask = PCPU_GET(cpumask);
 		other_cpus = PCPU_GET(other_cpus);
 		if (pmap->pm_active & cpumask)
 			invlpg(va);
 		if (pmap->pm_active & other_cpus)
 			smp_masked_invlpg(pmap->pm_active & other_cpus, va);
 	}
 	critical_exit();
 }
 
 void
 pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
 {
 	u_int cpumask;
 	u_int other_cpus;
 	vm_offset_t addr;
 
 	critical_enter();
 	/*
 	 * We need to disable interrupt preemption but MUST NOT have
 	 * interrupts disabled here.
 	 * XXX we may need to hold schedlock to get a coherent pm_active
 	 */
 	if (pmap->pm_active == -1 || pmap->pm_active == all_cpus) {
 		for (addr = sva; addr < eva; addr += PAGE_SIZE)
 			invlpg(addr);
 		smp_invlpg_range(sva, eva);
 	} else {
 		cpumask = PCPU_GET(cpumask);
 		other_cpus = PCPU_GET(other_cpus);
 		if (pmap->pm_active & cpumask)
 			for (addr = sva; addr < eva; addr += PAGE_SIZE)
 				invlpg(addr);
 		if (pmap->pm_active & other_cpus)
 			smp_masked_invlpg_range(pmap->pm_active & other_cpus,
 			    sva, eva);
 	}
 	critical_exit();
 }
 
 void
 pmap_invalidate_all(pmap_t pmap)
 {
 	u_int cpumask;
 	u_int other_cpus;
 
 #ifdef SWTCH_OPTIM_STATS
 	tlb_flush_count++;
 #endif
 	critical_enter();
 	/*
 	 * We need to disable interrupt preemption but MUST NOT have
 	 * interrupts disabled here.
 	 * XXX we may need to hold schedlock to get a coherent pm_active
 	 */
 	if (pmap->pm_active == -1 || pmap->pm_active == all_cpus) {
 		invltlb();
 		smp_invltlb();
 	} else {
 		cpumask = PCPU_GET(cpumask);
 		other_cpus = PCPU_GET(other_cpus);
 		if (pmap->pm_active & cpumask)
 			invltlb();
 		if (pmap->pm_active & other_cpus)
 			smp_masked_invltlb(pmap->pm_active & other_cpus);
 	}
 	critical_exit();
 }
 #else /* !SMP */
 /*
  * Normal, non-SMP, 486+ invalidation functions.
  * We inline these within pmap.c for speed.
  */
 PMAP_INLINE void
 pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
 {
 
 	if (pmap == kernel_pmap || pmap->pm_active)
 		invlpg(va);
 }
 
 PMAP_INLINE void
 pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
 {
 	vm_offset_t addr;
 
 	if (pmap == kernel_pmap || pmap->pm_active)
 		for (addr = sva; addr < eva; addr += PAGE_SIZE)
 			invlpg(addr);
 }
 
 PMAP_INLINE void
 pmap_invalidate_all(pmap_t pmap)
 {
 
 	if (pmap == kernel_pmap || pmap->pm_active)
 		invltlb();
 }
 #endif /* !SMP */
 #endif /* !I386_CPU */
 
 /*
  * Are we current address space or kernel?
  */
 static __inline int
 pmap_is_current(pmap_t pmap)
 {
 	return (pmap == kernel_pmap ||
 	    (pmap->pm_pdir[PTDPTDI] & PG_FRAME) == (PTDpde[0] & PG_FRAME));
 }
 
 /*
  * Super fast pmap_pte routine best used when scanning
  * the pv lists.  This eliminates many coarse-grained
  * invltlb calls.  Note that many of the pv list
  * scans are across different pmaps.  It is very wasteful
  * to do an entire invltlb for checking a single mapping.
  */
 pt_entry_t * 
 pmap_pte_quick(pmap, va)
 	register pmap_t pmap;
 	vm_offset_t va;
 {
 	pd_entry_t newpf;
 	pd_entry_t *pde;
 
 	pde = pmap_pde(pmap, va);
 	if (*pde & PG_PS)
 		return (pde);
 	if (*pde != 0) {
 		/* are we current address space or kernel? */
 		if (pmap_is_current(pmap))
 			return vtopte(va);
 		newpf = *pde & PG_FRAME;
 		if (((*PMAP1) & PG_FRAME) != newpf) {
 			*PMAP1 = newpf | PG_RW | PG_V;
 			pmap_invalidate_page(kernel_pmap, (vm_offset_t)PADDR1);
 		}
 		return PADDR1 + (i386_btop(va) & (NPTEPG - 1));
 	}
 	return (0);
 }
 
 /*
  *	Routine:	pmap_extract
  *	Function:
  *		Extract the physical page address associated
  *		with the given map/virtual_address pair.
  */
 vm_paddr_t 
 pmap_extract(pmap, va)
 	register pmap_t pmap;
 	vm_offset_t va;
 {
 	vm_paddr_t rtval;
 	pt_entry_t *pte;
 	pd_entry_t pde;
 
 	if (pmap == 0)
 		return 0;
 	pde = pmap->pm_pdir[va >> PDRSHIFT];
 	if (pde != 0) {
 		if ((pde & PG_PS) != 0) {
 			rtval = (pde & ~PDRMASK) | (va & PDRMASK);
 			return rtval;
 		}
 		pte = pmap_pte_quick(pmap, va);
 		rtval = ((*pte & PG_FRAME) | (va & PAGE_MASK));
 		return rtval;
 	}
 	return 0;
 
 }
 
 /***************************************************
  * Low level mapping routines.....
  ***************************************************/
 
 /*
  * Add a wired page to the kva.
  * Note: not SMP coherent.
  */
 PMAP_INLINE void 
 pmap_kenter(vm_offset_t va, vm_paddr_t pa)
 {
 	pt_entry_t *pte;
 
 	pte = vtopte(va);
 	*pte = pa | PG_RW | PG_V | pgeflag;
 }
 
 /*
  * Remove a page from the kernel pagetables.
  * Note: not SMP coherent.
  */
 PMAP_INLINE void
 pmap_kremove(vm_offset_t va)
 {
 	pt_entry_t *pte;
 
 	pte = vtopte(va);
 	*pte = 0;
 }
 
 /*
  *	Used to map a range of physical addresses into kernel
  *	virtual address space.
  *
  *	The value passed in '*virt' is a suggested virtual address for
  *	the mapping. Architectures which can support a direct-mapped
  *	physical to virtual region can return the appropriate address
  *	within that region, leaving '*virt' unchanged. Other
  *	architectures should map the pages starting at '*virt' and
  *	update '*virt' with the first usable address after the mapped
  *	region.
  */
 vm_offset_t
 pmap_map(vm_offset_t *virt, vm_paddr_t start, vm_paddr_t end, int prot)
 {
 	vm_offset_t va, sva;
 
 	va = sva = *virt;
 	while (start < end) {
 		pmap_kenter(va, start);
 		va += PAGE_SIZE;
 		start += PAGE_SIZE;
 	}
 	pmap_invalidate_range(kernel_pmap, sva, va);
 	*virt = va;
 	return (sva);
 }
 
 
 /*
  * Add a list of wired pages to the kva
  * this routine is only used for temporary
  * kernel mappings that do not need to have
  * page modification or references recorded.
  * Note that old mappings are simply written
  * over.  The page *must* be wired.
  * Note: SMP coherent.  Uses a ranged shootdown IPI.
  */
 void
 pmap_qenter(vm_offset_t sva, vm_page_t *m, int count)
 {
 	vm_offset_t va;
 
 	va = sva;
 	while (count-- > 0) {
 		pmap_kenter(va, VM_PAGE_TO_PHYS(*m));
 		va += PAGE_SIZE;
 		m++;
 	}
 	pmap_invalidate_range(kernel_pmap, sva, va);
 }
 
 /*
  * This routine tears out page mappings from the
  * kernel -- it is meant only for temporary mappings.
  * Note: SMP coherent.  Uses a ranged shootdown IPI.
  */
 void
 pmap_qremove(vm_offset_t sva, int count)
 {
 	vm_offset_t va;
 
 	va = sva;
 	while (count-- > 0) {
 		pmap_kremove(va);
 		va += PAGE_SIZE;
 	}
 	pmap_invalidate_range(kernel_pmap, sva, va);
 }
 
 static vm_page_t
 pmap_page_lookup(vm_object_t object, vm_pindex_t pindex)
 {
 	vm_page_t m;
 
 retry:
 	m = vm_page_lookup(object, pindex);
 	if (m != NULL) {
 		vm_page_lock_queues();
 		if (vm_page_sleep_if_busy(m, FALSE, "pplookp"))
 			goto retry;
 		vm_page_unlock_queues();
 	}
 	return m;
 }
 
 #ifndef KSTACK_MAX_PAGES
 #define KSTACK_MAX_PAGES 32
 #endif
 
 /*
  * Create the kernel stack (including pcb for i386) for a new thread.
  * This routine directly affects the fork perf for a process and
  * create performance for a thread.
  */
 void
 pmap_new_thread(struct thread *td, int pages)
 {
 	int i;
 	vm_page_t ma[KSTACK_MAX_PAGES];
 	vm_object_t ksobj;
 	vm_page_t m;
 	vm_offset_t ks;
 
 	/* Bounds check */
 	if (pages <= 1)
 		pages = KSTACK_PAGES;
 	else if (pages > KSTACK_MAX_PAGES)
 		pages = KSTACK_MAX_PAGES;
 
 	/*
 	 * allocate object for the kstack
 	 */
 	ksobj = vm_object_allocate(OBJT_DEFAULT, pages);
 	td->td_kstack_obj = ksobj;
 
 	/* get a kernel virtual address for the kstack for this thread */
 #ifdef KSTACK_GUARD
 	ks = kmem_alloc_nofault(kernel_map, (pages + 1) * PAGE_SIZE);
 	if (ks == 0)
 		panic("pmap_new_thread: kstack allocation failed");
 	if (*vtopte(ks) != 0)
 		pmap_qremove(ks, 1);
 	ks += PAGE_SIZE;
 	td->td_kstack = ks;
 #else
 	/* get a kernel virtual address for the kstack for this thread */
 	ks = kmem_alloc_nofault(kernel_map, pages * PAGE_SIZE);
 	if (ks == 0)
 		panic("pmap_new_thread: kstack allocation failed");
 	td->td_kstack = ks;
 #endif
 	/*
 	 * Knowing the number of pages allocated is useful when you
 	 * want to deallocate them.
 	 */
 	td->td_kstack_pages = pages;
 
 	/* 
 	 * For the length of the stack, link in a real page of ram for each
 	 * page of stack.
 	 */
 	for (i = 0; i < pages; i++) {
 		/*
 		 * Get a kernel stack page
 		 */
 		m = vm_page_grab(ksobj, i,
 		    VM_ALLOC_NORMAL | VM_ALLOC_RETRY | VM_ALLOC_WIRED);
 		ma[i] = m;
 
 		vm_page_lock_queues();
 		vm_page_wakeup(m);
 		vm_page_flag_clear(m, PG_ZERO);
 		m->valid = VM_PAGE_BITS_ALL;
 		vm_page_unlock_queues();
 	}
 	pmap_qenter(ks, ma, pages);
 }
 
 /*
  * Dispose the kernel stack for a thread that has exited.
  * This routine directly impacts the exit perf of a process and thread.
  */
 void
 pmap_dispose_thread(td)
 	struct thread *td;
 {
 	int i;
 	int pages;
 	vm_object_t ksobj;
 	vm_offset_t ks;
 	vm_page_t m;
 
 	pages = td->td_kstack_pages;
 	ksobj = td->td_kstack_obj;
 	ks = td->td_kstack;
 	pmap_qremove(ks, pages);
 	for (i = 0; i < pages; i++) {
 		m = vm_page_lookup(ksobj, i);
 		if (m == NULL)
 			panic("pmap_dispose_thread: kstack already missing?");
 		vm_page_lock_queues();
 		vm_page_busy(m);
 		vm_page_unwire(m, 0);
 		vm_page_free(m);
 		vm_page_unlock_queues();
 	}
 	/*
 	 * Free the space that this stack was mapped to in the kernel
 	 * address map.
 	 */
 #ifdef KSTACK_GUARD
 	kmem_free(kernel_map, ks - PAGE_SIZE, (pages + 1) * PAGE_SIZE);
 #else
 	kmem_free(kernel_map, ks, pages * PAGE_SIZE);
 #endif
 	vm_object_deallocate(ksobj);
 }
 
 /*
  * Set up a variable sized alternate kstack.  Though it may look MI, it may
  * need to be different on certain arches like ia64.
  */
 void
 pmap_new_altkstack(struct thread *td, int pages)
 {
 	/* shuffle the original stack */
 	td->td_altkstack_obj = td->td_kstack_obj;
 	td->td_altkstack = td->td_kstack;
 	td->td_altkstack_pages = td->td_kstack_pages;
 
 	pmap_new_thread(td, pages);
 }
 
 void
 pmap_dispose_altkstack(td)
 	struct thread *td;
 {
 	pmap_dispose_thread(td);
 
 	/* restore the original kstack */
 	td->td_kstack = td->td_altkstack;
 	td->td_kstack_obj = td->td_altkstack_obj;
 	td->td_kstack_pages = td->td_altkstack_pages;
 	td->td_altkstack = 0;
 	td->td_altkstack_obj = NULL;
 	td->td_altkstack_pages = 0;
 }
 
 /*
  * Allow the Kernel stack for a thread to be prejudicially paged out.
  */
 void
 pmap_swapout_thread(td)
 	struct thread *td;
 {
 	int i;
 	int pages;
 	vm_object_t ksobj;
 	vm_offset_t ks;
 	vm_page_t m;
 
 	pages = td->td_kstack_pages;
 	ksobj = td->td_kstack_obj;
 	ks = td->td_kstack;
 	pmap_qremove(ks, pages);
 	for (i = 0; i < pages; i++) {
 		m = vm_page_lookup(ksobj, i);
 		if (m == NULL)
 			panic("pmap_swapout_thread: kstack already missing?");
 		vm_page_lock_queues();
 		vm_page_dirty(m);
 		vm_page_unwire(m, 0);
 		vm_page_unlock_queues();
 	}
 }
 
 /*
  * Bring the kernel stack for a specified thread back in.
  */
 void
 pmap_swapin_thread(td)
 	struct thread *td;
 {
 	int i, rv;
 	int pages;
 	vm_page_t ma[KSTACK_MAX_PAGES];
 	vm_object_t ksobj;
 	vm_offset_t ks;
 	vm_page_t m;
 
 	pages = td->td_kstack_pages;
 	ksobj = td->td_kstack_obj;
 	ks = td->td_kstack;
 	for (i = 0; i < pages; i++) {
 		m = vm_page_grab(ksobj, i, VM_ALLOC_NORMAL | VM_ALLOC_RETRY);
 		if (m->valid != VM_PAGE_BITS_ALL) {
 			rv = vm_pager_get_pages(ksobj, &m, 1, 0);
 			if (rv != VM_PAGER_OK)
 				panic("pmap_swapin_thread: cannot get kstack for proc: %d\n", td->td_proc->p_pid);
 			m = vm_page_lookup(ksobj, i);
 			m->valid = VM_PAGE_BITS_ALL;
 		}
 		ma[i] = m;
 		vm_page_lock_queues();
 		vm_page_wire(m);
 		vm_page_wakeup(m);
 		vm_page_unlock_queues();
 	}
 	pmap_qenter(ks, ma, pages);
 }
 
 /***************************************************
  * Page table page management routines.....
  ***************************************************/
 
 /*
  * This routine unholds page table pages, and if the hold count
  * drops to zero, then it decrements the wire count.
  */
 static int 
 _pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m)
 {
 
 	while (vm_page_sleep_if_busy(m, FALSE, "pmuwpt"))
 		vm_page_lock_queues();
 
 	if (m->hold_count == 0) {
 		vm_offset_t pteva;
 		/*
 		 * unmap the page table page
 		 */
 		pmap->pm_pdir[m->pindex] = 0;
 		--pmap->pm_stats.resident_count;
 		if (pmap_is_current(pmap)) {
 			/*
 			 * Do an invltlb to make the invalidated mapping
 			 * take effect immediately.
 			 */
 			pteva = VM_MAXUSER_ADDRESS + i386_ptob(m->pindex);
 			pmap_invalidate_page(pmap, pteva);
 		}
 
 		/*
 		 * If the page is finally unwired, simply free it.
 		 */
 		--m->wire_count;
 		if (m->wire_count == 0) {
 			vm_page_busy(m);
 			vm_page_free_zero(m);
 			atomic_subtract_int(&cnt.v_wire_count, 1);
 		}
 		return 1;
 	}
 	return 0;
 }
 
 static PMAP_INLINE int
 pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m)
 {
 	vm_page_unhold(m);
 	if (m->hold_count == 0)
 		return _pmap_unwire_pte_hold(pmap, m);
 	else
 		return 0;
 }
 
 /*
  * After removing a page table entry, this routine is used to
  * conditionally free the page, and manage the hold/wire counts.
  */
 static int
 pmap_unuse_pt(pmap_t pmap, vm_offset_t va, vm_page_t mpte)
 {
 	unsigned ptepindex;
 	if (va >= VM_MAXUSER_ADDRESS)
 		return 0;
 
 	if (mpte == NULL) {
 		ptepindex = (va >> PDRSHIFT);
 		if (pmap->pm_pteobj->root &&
 			(pmap->pm_pteobj->root->pindex == ptepindex)) {
 			mpte = pmap->pm_pteobj->root;
 		} else {
 			while ((mpte = vm_page_lookup(pmap->pm_pteobj, ptepindex)) != NULL &&
 			       vm_page_sleep_if_busy(mpte, FALSE, "pulook"))
 				vm_page_lock_queues();
 		}
 	}
 
 	return pmap_unwire_pte_hold(pmap, mpte);
 }
 
 void
 pmap_pinit0(pmap)
 	struct pmap *pmap;
 {
 
 	pmap->pm_pdir = (pd_entry_t *)(KERNBASE + (vm_offset_t)IdlePTD);
+#ifdef PAE
+	pmap->pm_pdpt = (pdpt_entry_t *)(KERNBASE + (vm_offset_t)IdlePDPT);
+#endif
 	pmap->pm_active = 0;
 	TAILQ_INIT(&pmap->pm_pvlist);
 	bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
 	mtx_lock_spin(&allpmaps_lock);
 	LIST_INSERT_HEAD(&allpmaps, pmap, pm_list);
 	mtx_unlock_spin(&allpmaps_lock);
 }
 
 /*
  * Initialize a preallocated and zeroed pmap structure,
  * such as one in a vmspace structure.
  */
 void
 pmap_pinit(pmap)
 	register struct pmap *pmap;
 {
 	vm_page_t ptdpg[NPGPTD];
 	vm_paddr_t pa;
 	int i;
 
 	/*
 	 * No need to allocate page table space yet but we do need a valid
 	 * page directory table.
 	 */
-	if (pmap->pm_pdir == NULL)
+	if (pmap->pm_pdir == NULL) {
 		pmap->pm_pdir = (pd_entry_t *)kmem_alloc_pageable(kernel_map,
 		    NBPTD);
+#ifdef PAE
+		pmap->pm_pdpt = uma_zalloc(pdptzone, M_WAITOK | M_ZERO);
+		KASSERT(((vm_offset_t)pmap->pm_pdpt &
+		    ((NPGPTD * sizeof(pdpt_entry_t)) - 1)) == 0,
+		    ("pmap_pinit: pdpt misaligned"));
+		KASSERT(pmap_kextract((vm_offset_t)pmap->pm_pdpt) < (4ULL<<30),
+		    ("pmap_pinit: pdpt above 4g"));
+#endif
+	}
 
 	/*
 	 * allocate object for the ptes
 	 */
 	if (pmap->pm_pteobj == NULL)
 		pmap->pm_pteobj = vm_object_allocate(OBJT_DEFAULT, PTDPTDI +
 		    NPGPTD);
 
 	/*
 	 * allocate the page directory page(s)
 	 */
 	for (i = 0; i < NPGPTD; i++) {
 		ptdpg[i] = vm_page_grab(pmap->pm_pteobj, PTDPTDI + i,
 		    VM_ALLOC_NORMAL | VM_ALLOC_RETRY | VM_ALLOC_WIRED |
 		    VM_ALLOC_ZERO);
 		vm_page_lock_queues();
 		vm_page_flag_clear(ptdpg[i], PG_BUSY);
 		ptdpg[i]->valid = VM_PAGE_BITS_ALL;
 		vm_page_unlock_queues();
 	}
 
 	pmap_qenter((vm_offset_t)pmap->pm_pdir, ptdpg, NPGPTD);
 
 	for (i = 0; i < NPGPTD; i++) {
 		if ((ptdpg[i]->flags & PG_ZERO) == 0)
 			bzero(pmap->pm_pdir + (i * NPDEPG), PAGE_SIZE);
 	}
 
 	mtx_lock_spin(&allpmaps_lock);
 	LIST_INSERT_HEAD(&allpmaps, pmap, pm_list);
 	mtx_unlock_spin(&allpmaps_lock);
 	/* Wire in kernel global address entries. */
 	/* XXX copies current process, does not fill in MPPTDI */
 	bcopy(PTD + KPTDI, pmap->pm_pdir + KPTDI, nkpt * sizeof(pd_entry_t));
 #ifdef SMP
 	pmap->pm_pdir[MPPTDI] = PTD[MPPTDI];
 #endif
 
 	/* install self-referential address mapping entry(s) */
 	for (i = 0; i < NPGPTD; i++) {
 		pa = VM_PAGE_TO_PHYS(ptdpg[i]);
 		pmap->pm_pdir[PTDPTDI + i] = pa | PG_V | PG_RW | PG_A | PG_M;
+#ifdef PAE
+		pmap->pm_pdpt[i] = pa | PG_V;
+#endif
 	}
 
 	pmap->pm_active = 0;
 	TAILQ_INIT(&pmap->pm_pvlist);
 	bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
 }
 
 /*
  * Wire in kernel global address entries.  To avoid a race condition
  * between pmap initialization and pmap_growkernel, this procedure
  * should be called after the vmspace is attached to the process
  * but before this pmap is activated.
  */
 void
 pmap_pinit2(pmap)
 	struct pmap *pmap;
 {
 	/* XXX: Remove this stub when no longer called */
 }
 
 /*
  * this routine is called if the page table page is not
  * mapped correctly.
  */
 static vm_page_t
 _pmap_allocpte(pmap, ptepindex)
 	pmap_t	pmap;
 	unsigned ptepindex;
 {
 	vm_paddr_t ptepa;
 	vm_offset_t pteva;
 	vm_page_t m;
 
 	/*
 	 * Find or fabricate a new pagetable page
 	 */
 	m = vm_page_grab(pmap->pm_pteobj, ptepindex,
 	    VM_ALLOC_WIRED | VM_ALLOC_ZERO | VM_ALLOC_RETRY);
 
 	KASSERT(m->queue == PQ_NONE,
 		("_pmap_allocpte: %p->queue != PQ_NONE", m));
 
 	/*
 	 * Increment the hold count for the page table page
 	 * (denoting a new mapping.)
 	 */
 	m->hold_count++;
 
 	/*
 	 * Map the pagetable page into the process address space, if
 	 * it isn't already there.
 	 */
 
 	pmap->pm_stats.resident_count++;
 
 	ptepa = VM_PAGE_TO_PHYS(m);
 	pmap->pm_pdir[ptepindex] =
 		(pd_entry_t) (ptepa | PG_U | PG_RW | PG_V | PG_A | PG_M);
 
 	/*
 	 * Try to use the new mapping, but if we cannot, then
 	 * do it with the routine that maps the page explicitly.
 	 */
 	if ((m->flags & PG_ZERO) == 0) {
 		if (pmap_is_current(pmap)) {
 			pteva = VM_MAXUSER_ADDRESS + i386_ptob(ptepindex);
 			bzero((caddr_t) pteva, PAGE_SIZE);
 		} else {
 			pmap_zero_page(m);
 		}
 	}
 	vm_page_lock_queues();
 	m->valid = VM_PAGE_BITS_ALL;
 	vm_page_flag_clear(m, PG_ZERO);
 	vm_page_wakeup(m);
 	vm_page_unlock_queues();
 
 	return m;
 }
 
 static vm_page_t
 pmap_allocpte(pmap_t pmap, vm_offset_t va)
 {
 	unsigned ptepindex;
 	pd_entry_t ptepa;
 	vm_page_t m;
 
 	/*
 	 * Calculate pagetable page index
 	 */
 	ptepindex = va >> PDRSHIFT;
 
 	/*
 	 * Get the page directory entry
 	 */
 	ptepa = (vm_offset_t) pmap->pm_pdir[ptepindex];
 
 	/*
 	 * This supports switching from a 4MB page to a
 	 * normal 4K page.
 	 */
 	if (ptepa & PG_PS) {
 		pmap->pm_pdir[ptepindex] = 0;
 		ptepa = 0;
 		pmap_invalidate_all(kernel_pmap);
 	}
 
 	/*
 	 * If the page table page is mapped, we just increment the
 	 * hold count, and activate it.
 	 */
 	if (ptepa) {
 		/*
 		 * In order to get the page table page, try the
 		 * hint first.
 		 */
 		if (pmap->pm_pteobj->root &&
 			(pmap->pm_pteobj->root->pindex == ptepindex)) {
 			m = pmap->pm_pteobj->root;
 		} else {
 			m = pmap_page_lookup(pmap->pm_pteobj, ptepindex);
 		}
 		m->hold_count++;
 		return m;
 	}
 	/*
 	 * Here if the pte page isn't mapped, or if it has been deallocated.
 	 */
 	return _pmap_allocpte(pmap, ptepindex);
 }
 
 
 /***************************************************
 * Pmap allocation/deallocation routines.
  ***************************************************/
 
 /*
  * Release any resources held by the given physical map.
  * Called when a pmap initialized by pmap_pinit is being released.
  * Should only be called if the map contains no valid mappings.
  */
 void
 pmap_release(pmap_t pmap)
 {
 	vm_object_t object;
 	vm_page_t m;
 	int i;
 
 	object = pmap->pm_pteobj;
 
 	KASSERT(object->ref_count == 1,
 	    ("pmap_release: pteobj reference count %d != 1",
 	    object->ref_count));
 	KASSERT(pmap->pm_stats.resident_count == 0,
 	    ("pmap_release: pmap resident count %ld != 0",
 	    pmap->pm_stats.resident_count));
 
 	mtx_lock_spin(&allpmaps_lock);
 	LIST_REMOVE(pmap, pm_list);
 	mtx_unlock_spin(&allpmaps_lock);
 
 	bzero(pmap->pm_pdir + KPTDI, nkpt * sizeof(*pmap->pm_pdir));
 	for (i = 0; i < NPGPTD; i++) {
 		pmap->pm_pdir[PTDPTDI + i] = 0;
 		pmap->pm_pdir[APTDPTDI + i] = 0;
 	}
 #ifdef SMP
 	pmap->pm_pdir[MPPTDI] = 0;
 #endif
 
 	pmap_qremove((vm_offset_t)pmap->pm_pdir, NPGPTD);
 
 	vm_page_lock_queues();
 	for (i = 0; i < NPGPTD; i++) {
 		m = TAILQ_FIRST(&object->memq);
+#ifdef PAE
+		KASSERT(VM_PAGE_TO_PHYS(m) == (pmap->pm_pdpt[i] & PG_FRAME),
+		    ("pmap_release: got wrong ptd page"));
+#endif
 		m->wire_count--;
 		atomic_subtract_int(&cnt.v_wire_count, 1);
 		vm_page_busy(m);
 		vm_page_free_zero(m);
 	}
 	KASSERT(TAILQ_EMPTY(&object->memq),
 	    ("pmap_release: leaking page table pages"));
 	vm_page_unlock_queues();
 }
 
 static int
 kvm_size(SYSCTL_HANDLER_ARGS)
 {
 	unsigned long ksize = VM_MAX_KERNEL_ADDRESS - KERNBASE;
 
 	return sysctl_handle_long(oidp, &ksize, 0, req);
 }
 SYSCTL_PROC(_vm, OID_AUTO, kvm_size, CTLTYPE_LONG|CTLFLAG_RD, 
     0, 0, kvm_size, "IU", "Size of KVM");
 
 static int
 kvm_free(SYSCTL_HANDLER_ARGS)
 {
 	unsigned long kfree = VM_MAX_KERNEL_ADDRESS - kernel_vm_end;
 
 	return sysctl_handle_long(oidp, &kfree, 0, req);
 }
 SYSCTL_PROC(_vm, OID_AUTO, kvm_free, CTLTYPE_LONG|CTLFLAG_RD, 
     0, 0, kvm_free, "IU", "Amount of KVM free");
 
 /*
  * grow the number of kernel page table entries, if needed
  */
 void
 pmap_growkernel(vm_offset_t addr)
 {
 	struct pmap *pmap;
 	int s;
 	vm_paddr_t ptppaddr;
 	vm_page_t nkpg;
 	pd_entry_t newpdir;
 
 	s = splhigh();
 	mtx_assert(&kernel_map->system_mtx, MA_OWNED);
 	if (kernel_vm_end == 0) {
 		kernel_vm_end = KERNBASE;
 		nkpt = 0;
 		while (pdir_pde(PTD, kernel_vm_end)) {
 			kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1);
 			nkpt++;
 		}
 	}
 	addr = roundup2(addr, PAGE_SIZE * NPTEPG);
 	while (kernel_vm_end < addr) {
 		if (pdir_pde(PTD, kernel_vm_end)) {
 			kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1);
 			continue;
 		}
 
 		/*
 		 * This index is bogus, but out of the way
 		 */
 		nkpg = vm_page_alloc(NULL, nkpt,
 		    VM_ALLOC_NOOBJ | VM_ALLOC_SYSTEM | VM_ALLOC_WIRED);
 		if (!nkpg)
 			panic("pmap_growkernel: no memory to grow kernel");
 
 		nkpt++;
 
 		pmap_zero_page(nkpg);
 		ptppaddr = VM_PAGE_TO_PHYS(nkpg);
 		newpdir = (pd_entry_t) (ptppaddr | PG_V | PG_RW | PG_A | PG_M);
 		pdir_pde(PTD, kernel_vm_end) = newpdir;
 
 		mtx_lock_spin(&allpmaps_lock);
 		LIST_FOREACH(pmap, &allpmaps, pm_list) {
 			*pmap_pde(pmap, kernel_vm_end) = newpdir;
 		}
 		mtx_unlock_spin(&allpmaps_lock);
 		kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1);
 	}
 	splx(s);
 }
 
 
 /***************************************************
  * page management routines.
  ***************************************************/
 
 /*
  * free the pv_entry back to the free list
  */
 static PMAP_INLINE void
 free_pv_entry(pv_entry_t pv)
 {
 	pv_entry_count--;
 	uma_zfree(pvzone, pv);
 }
 
 /*
  * get a new pv_entry, allocating a block from the system
  * when needed.
  * the memory allocation is performed bypassing the malloc code
  * because of the possibility of allocations at interrupt time.
  */
 static pv_entry_t
 get_pv_entry(void)
 {
 	pv_entry_count++;
 	if (pv_entry_high_water &&
 		(pv_entry_count > pv_entry_high_water) &&
 		(pmap_pagedaemon_waken == 0)) {
 		pmap_pagedaemon_waken = 1;
 		wakeup (&vm_pages_needed);
 	}
 	return uma_zalloc(pvzone, M_NOWAIT);
 }
 
 /*
  * If it is the first entry on the list, it is actually
  * in the header and we must copy the following entry up
  * to the header.  Otherwise we must search the list for
  * the entry.  In either case we free the now unused entry.
  */
 
 static int
 pmap_remove_entry(pmap_t pmap, vm_page_t m, vm_offset_t va)
 {
 	pv_entry_t pv;
 	int rtval;
 	int s;
 
 	s = splvm();
 	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
 	if (m->md.pv_list_count < pmap->pm_stats.resident_count) {
 		TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
 			if (pmap == pv->pv_pmap && va == pv->pv_va) 
 				break;
 		}
 	} else {
 		TAILQ_FOREACH(pv, &pmap->pm_pvlist, pv_plist) {
 			if (va == pv->pv_va) 
 				break;
 		}
 	}
 
 	rtval = 0;
 	if (pv) {
 		rtval = pmap_unuse_pt(pmap, va, pv->pv_ptem);
 		TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
 		m->md.pv_list_count--;
 		if (TAILQ_FIRST(&m->md.pv_list) == NULL)
 			vm_page_flag_clear(m, PG_WRITEABLE);
 
 		TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist);
 		free_pv_entry(pv);
 	}
 			
 	splx(s);
 	return rtval;
 }
 
 /*
  * Create a pv entry for page at pa for
  * (pmap, va).
  */
 static void
 pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t mpte, vm_page_t m)
 {
 
 	int s;
 	pv_entry_t pv;
 
 	s = splvm();
 	pv = get_pv_entry();
 	pv->pv_va = va;
 	pv->pv_pmap = pmap;
 	pv->pv_ptem = mpte;
 
 	TAILQ_INSERT_TAIL(&pmap->pm_pvlist, pv, pv_plist);
 	TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
 	m->md.pv_list_count++;
 
 	splx(s);
 }
 
 /*
  * pmap_remove_pte: do the things to unmap a page in a process
  */
 static int
 pmap_remove_pte(pmap_t pmap, pt_entry_t *ptq, vm_offset_t va)
 {
 	pt_entry_t oldpte;
 	vm_page_t m;
 
-	oldpte = atomic_readandclear_int(ptq);
+	oldpte = pte_load_clear(ptq);
 	if (oldpte & PG_W)
 		pmap->pm_stats.wired_count -= 1;
 	/*
 	 * Machines that don't support invlpg, also don't support
 	 * PG_G.
 	 */
 	if (oldpte & PG_G)
 		pmap_invalidate_page(kernel_pmap, va);
 	pmap->pm_stats.resident_count -= 1;
 	if (oldpte & PG_MANAGED) {
 		m = PHYS_TO_VM_PAGE(oldpte);
 		if (oldpte & PG_M) {
 #if defined(PMAP_DIAGNOSTIC)
 			if (pmap_nw_modified((pt_entry_t) oldpte)) {
 				printf(
 	"pmap_remove: modified page not writable: va: 0x%x, pte: 0x%x\n",
 				    va, oldpte);
 			}
 #endif
 			if (pmap_track_modified(va))
 				vm_page_dirty(m);
 		}
 		if (oldpte & PG_A)
 			vm_page_flag_set(m, PG_REFERENCED);
 		return pmap_remove_entry(pmap, m, va);
 	} else {
 		return pmap_unuse_pt(pmap, va, NULL);
 	}
 
 	return 0;
 }
 
 /*
  * Remove a single page from a process address space
  */
 static void
 pmap_remove_page(pmap_t pmap, vm_offset_t va)
 {
 	pt_entry_t *pte;
 
 	if ((pte = pmap_pte_quick(pmap, va)) == NULL || *pte == 0)
 		return;
 	pmap_remove_pte(pmap, pte, va);
 	pmap_invalidate_page(pmap, va);
 }
 
 /*
  *	Remove the given range of addresses from the specified map.
  *
  *	It is assumed that the start and end are properly
  *	rounded to the page size.
  */
 void
 pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
 {
 	vm_offset_t pdnxt;
 	pd_entry_t ptpaddr;
 	pt_entry_t *pte;
 	int anyvalid;
 
 	if (pmap == NULL)
 		return;
 
 	if (pmap->pm_stats.resident_count == 0)
 		return;
 
 	/*
 	 * special handling of removing one page.  a very
 	 * common operation and easy to short circuit some
 	 * code.
 	 */
 	if ((sva + PAGE_SIZE == eva) && 
 	    ((pmap->pm_pdir[(sva >> PDRSHIFT)] & PG_PS) == 0)) {
 		pmap_remove_page(pmap, sva);
 		return;
 	}
 
 	anyvalid = 0;
 
 	for (; sva < eva; sva = pdnxt) {
 		unsigned pdirindex;
 
 		/*
 		 * Calculate index for next page table.
 		 */
 		pdnxt = (sva + NBPDR) & ~PDRMASK;
 		if (pmap->pm_stats.resident_count == 0)
 			break;
 
 		pdirindex = sva >> PDRSHIFT;
 		ptpaddr = pmap->pm_pdir[pdirindex];
 
 		/*
 		 * Weed out invalid mappings. Note: we assume that the page
 		 * directory table is always allocated, and in kernel virtual.
 		 */
 		if (ptpaddr == 0)
 			continue;
 
 		/*
 		 * Check for large page.
 		 */
 		if ((ptpaddr & PG_PS) != 0) {
 			pmap->pm_pdir[pdirindex] = 0;
 			pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE;
 			anyvalid = 1;
 			continue;
 		}
 
 		/*
 		 * Limit our scan to either the end of the va represented
 		 * by the current page table page, or to the end of the
 		 * range being removed.
 		 */
 		if (pdnxt > eva)
 			pdnxt = eva;
 
 		for (; sva != pdnxt; sva += PAGE_SIZE) {
 			if ((pte = pmap_pte_quick(pmap, sva)) == NULL ||
 			    *pte == 0)
 				continue;
 			anyvalid = 1;
 			if (pmap_remove_pte(pmap, pte, sva))
 				break;
 		}
 	}
 
 	if (anyvalid)
 		pmap_invalidate_all(pmap);
 }
 
 /*
  *	Routine:	pmap_remove_all
  *	Function:
  *		Removes this physical page from
  *		all physical maps in which it resides.
  *		Reflects back modify bits to the pager.
  *
  *	Notes:
  *		Original versions of this routine were very
  *		inefficient because they iteratively called
  *		pmap_remove (slow...)
  */
 
 void
 pmap_remove_all(vm_page_t m)
 {
 	register pv_entry_t pv;
 	pt_entry_t *pte, tpte;
 	int s;
 
 #if defined(PMAP_DIAGNOSTIC)
 	/*
 	 * XXX This makes pmap_remove_all() illegal for non-managed pages!
 	 */
 	if (!pmap_initialized || (m->flags & PG_FICTITIOUS)) {
 		panic("pmap_remove_all: illegal for unmanaged page, va: 0x%x",
 		    VM_PAGE_TO_PHYS(m));
 	}
 #endif
 	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
 	s = splvm();
 	while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) {
 		pv->pv_pmap->pm_stats.resident_count--;
 		pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va);
-		tpte = atomic_readandclear_int(pte);
+		tpte = pte_load_clear(pte);
 		if (tpte & PG_W)
 			pv->pv_pmap->pm_stats.wired_count--;
 		if (tpte & PG_A)
 			vm_page_flag_set(m, PG_REFERENCED);
 
 		/*
 		 * Update the vm_page_t clean and reference bits.
 		 */
 		if (tpte & PG_M) {
 #if defined(PMAP_DIAGNOSTIC)
 			if (pmap_nw_modified((pt_entry_t) tpte)) {
 				printf(
 	"pmap_remove_all: modified page not writable: va: 0x%x, pte: 0x%x\n",
 				    pv->pv_va, tpte);
 			}
 #endif
 			if (pmap_track_modified(pv->pv_va))
 				vm_page_dirty(m);
 		}
 		pmap_invalidate_page(pv->pv_pmap, pv->pv_va);
 		TAILQ_REMOVE(&pv->pv_pmap->pm_pvlist, pv, pv_plist);
 		TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
 		m->md.pv_list_count--;
 		pmap_unuse_pt(pv->pv_pmap, pv->pv_va, pv->pv_ptem);
 		free_pv_entry(pv);
 	}
 	vm_page_flag_clear(m, PG_WRITEABLE);
 	splx(s);
 }
 
 /*
  *	Set the physical protection on the
  *	specified range of this map as requested.
  */
 void
 pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot)
 {
 	vm_offset_t pdnxt;
 	pd_entry_t ptpaddr;
 	int anychanged;
 
 	if (pmap == NULL)
 		return;
 
 	if ((prot & VM_PROT_READ) == VM_PROT_NONE) {
 		pmap_remove(pmap, sva, eva);
 		return;
 	}
 
 	if (prot & VM_PROT_WRITE)
 		return;
 
 	anychanged = 0;
 
 	for (; sva < eva; sva = pdnxt) {
 		unsigned pdirindex;
 
 		pdnxt = (sva + NBPDR) & ~PDRMASK;
 
 		pdirindex = sva >> PDRSHIFT;
 		ptpaddr = pmap->pm_pdir[pdirindex];
 
 		/*
 		 * Weed out invalid mappings. Note: we assume that the page
 		 * directory table is always allocated, and in kernel virtual.
 		 */
 		if (ptpaddr == 0)
 			continue;
 
 		/*
 		 * Check for large page.
 		 */
 		if ((ptpaddr & PG_PS) != 0) {
 			pmap->pm_pdir[pdirindex] &= ~(PG_M|PG_RW);
 			pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE;
 			anychanged = 1;
 			continue;
 		}
 
 		if (pdnxt > eva)
 			pdnxt = eva;
 
 		for (; sva != pdnxt; sva += PAGE_SIZE) {
 			pt_entry_t pbits;
 			pt_entry_t *pte;
 			vm_page_t m;
 
 			if ((pte = pmap_pte_quick(pmap, sva)) == NULL)
 				continue;
 			pbits = *pte;
 			if (pbits & PG_MANAGED) {
 				m = NULL;
 				if (pbits & PG_A) {
 					m = PHYS_TO_VM_PAGE(pbits);
 					vm_page_flag_set(m, PG_REFERENCED);
 					pbits &= ~PG_A;
 				}
 				if ((pbits & PG_M) != 0 &&
 				    pmap_track_modified(sva)) {
 					if (m == NULL)
 						m = PHYS_TO_VM_PAGE(pbits);
 					vm_page_dirty(m);
 					pbits &= ~PG_M;
 				}
 			}
 
 			pbits &= ~PG_RW;
 
 			if (pbits != *pte) {
 				*pte = pbits;
 				anychanged = 1;
 			}
 		}
 	}
 	if (anychanged)
 		pmap_invalidate_all(pmap);
 }
 
 /*
  *	Insert the given physical page (p) at
  *	the specified virtual address (v) in the
  *	target physical map with the protection requested.
  *
  *	If specified, the page will be wired down, meaning
  *	that the related pte can not be reclaimed.
  *
  *	NB:  This is the only routine which MAY NOT lazy-evaluate
  *	or lose information.  That is, this routine must actually
  *	insert this page into the given map NOW.
  */
 void
 pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
 	   boolean_t wired)
 {
 	vm_paddr_t pa;
 	register pt_entry_t *pte;
 	vm_paddr_t opa;
 	pt_entry_t origpte, newpte;
 	vm_page_t mpte;
 
 	if (pmap == NULL)
 		return;
 
 	va &= PG_FRAME;
 #ifdef PMAP_DIAGNOSTIC
 	if (va > VM_MAX_KERNEL_ADDRESS)
 		panic("pmap_enter: toobig");
 	if ((va >= UPT_MIN_ADDRESS) && (va < UPT_MAX_ADDRESS))
 		panic("pmap_enter: invalid to pmap_enter page table pages (va: 0x%x)", va);
 #endif
 
 	mpte = NULL;
 	/*
 	 * In the case that a page table page is not
 	 * resident, we are creating it here.
 	 */
 	if (va < VM_MAXUSER_ADDRESS) {
 		mpte = pmap_allocpte(pmap, va);
 	}
 #if 0 && defined(PMAP_DIAGNOSTIC)
 	else {
 		pd_entry_t *pdeaddr = pmap_pde(pmap, va);
 		origpte = *pdeaddr;
 		if ((origpte & PG_V) == 0) { 
 			panic("pmap_enter: invalid kernel page table page, pdir=%p, pde=%p, va=%p\n",
 				pmap->pm_pdir[PTDPTDI], origpte, va);
 		}
 	}
 #endif
 
 	pte = pmap_pte_quick(pmap, va);
 
 	/*
 	 * Page Directory table entry not valid, we need a new PT page
 	 */
 	if (pte == NULL) {
 		panic("pmap_enter: invalid page directory pdir=%#jx, va=%#x\n",
 			(uintmax_t)pmap->pm_pdir[PTDPTDI], va);
 	}
 
 	pa = VM_PAGE_TO_PHYS(m) & PG_FRAME;
 	origpte = *pte;
 	opa = origpte & PG_FRAME;
 
 	if (origpte & PG_PS)
 		panic("pmap_enter: attempted pmap_enter on 4MB page");
 
 	/*
 	 * Mapping has not changed, must be protection or wiring change.
 	 */
 	if (origpte && (opa == pa)) {
 		/*
 		 * Wiring change, just update stats. We don't worry about
 		 * wiring PT pages as they remain resident as long as there
 		 * are valid mappings in them. Hence, if a user page is wired,
 		 * the PT page will be also.
 		 */
 		if (wired && ((origpte & PG_W) == 0))
 			pmap->pm_stats.wired_count++;
 		else if (!wired && (origpte & PG_W))
 			pmap->pm_stats.wired_count--;
 
 #if defined(PMAP_DIAGNOSTIC)
 		if (pmap_nw_modified((pt_entry_t) origpte)) {
 			printf(
 	"pmap_enter: modified page not writable: va: 0x%x, pte: 0x%x\n",
 			    va, origpte);
 		}
 #endif
 
 		/*
 		 * Remove extra pte reference
 		 */
 		if (mpte)
 			mpte->hold_count--;
 
 		if ((prot & VM_PROT_WRITE) && (origpte & PG_V)) {
 			if ((origpte & PG_RW) == 0) {
 				*pte |= PG_RW;
 				pmap_invalidate_page(pmap, va);
 			}
 			return;
 		}
 
 		/*
 		 * We might be turning off write access to the page,
 		 * so we go ahead and sense modify status.
 		 */
 		if (origpte & PG_MANAGED) {
 			if ((origpte & PG_M) && pmap_track_modified(va)) {
 				vm_page_t om;
 				om = PHYS_TO_VM_PAGE(opa);
 				vm_page_dirty(om);
 			}
 			pa |= PG_MANAGED;
 		}
 		goto validate;
 	} 
 	/*
 	 * Mapping has changed, invalidate old range and fall through to
 	 * handle validating new mapping.
 	 */
 	if (opa) {
 		int err;
 		vm_page_lock_queues();
 		err = pmap_remove_pte(pmap, pte, va);
 		vm_page_unlock_queues();
 		if (err)
 			panic("pmap_enter: pte vanished, va: 0x%x", va);
 	}
 
 	/*
 	 * Enter on the PV list if part of our managed memory. Note that we
 	 * raise IPL while manipulating pv_table since pmap_enter can be
 	 * called at interrupt time.
 	 */
 	if (pmap_initialized && 
 	    (m->flags & (PG_FICTITIOUS|PG_UNMANAGED)) == 0) {
 		pmap_insert_entry(pmap, va, mpte, m);
 		pa |= PG_MANAGED;
 	}
 
 	/*
 	 * Increment counters
 	 */
 	pmap->pm_stats.resident_count++;
 	if (wired)
 		pmap->pm_stats.wired_count++;
 
 validate:
 	/*
 	 * Now validate mapping with desired protection/wiring.
 	 */
 	newpte = (pt_entry_t)(pa | pte_prot(pmap, prot) | PG_V);
 
 	if (wired)
 		newpte |= PG_W;
 	if (va < VM_MAXUSER_ADDRESS)
 		newpte |= PG_U;
 	if (pmap == kernel_pmap)
 		newpte |= pgeflag;
 
 	/*
 	 * if the mapping or permission bits are different, we need
 	 * to update the pte.
 	 */
 	if ((origpte & ~(PG_M|PG_A)) != newpte) {
 		*pte = newpte | PG_A;
 		/*if (origpte)*/ {
 			pmap_invalidate_page(pmap, va);
 		}
 	}
 }
 
 /*
  * this code makes some *MAJOR* assumptions:
  * 1. Current pmap & pmap exists.
  * 2. Not wired.
  * 3. Read access.
  * 4. No page table pages.
  * 5. Tlbflush is deferred to calling procedure.
  * 6. Page IS managed.
  * but is *MUCH* faster than pmap_enter...
  */
 
 static vm_page_t
 pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_page_t mpte)
 {
 	pt_entry_t *pte;
 	vm_paddr_t pa;
 
 	/*
 	 * In the case that a page table page is not
 	 * resident, we are creating it here.
 	 */
 	if (va < VM_MAXUSER_ADDRESS) {
 		unsigned ptepindex;
 		pd_entry_t ptepa;
 
 		/*
 		 * Calculate pagetable page index
 		 */
 		ptepindex = va >> PDRSHIFT;
 		if (mpte && (mpte->pindex == ptepindex)) {
 			mpte->hold_count++;
 		} else {
 retry:
 			/*
 			 * Get the page directory entry
 			 */
 			ptepa = pmap->pm_pdir[ptepindex];
 
 			/*
 			 * If the page table page is mapped, we just increment
 			 * the hold count, and activate it.
 			 */
 			if (ptepa) {
 				if (ptepa & PG_PS)
 					panic("pmap_enter_quick: unexpected mapping into 4MB page");
 				if (pmap->pm_pteobj->root &&
 					(pmap->pm_pteobj->root->pindex == ptepindex)) {
 					mpte = pmap->pm_pteobj->root;
 				} else {
 					mpte = pmap_page_lookup(pmap->pm_pteobj, ptepindex);
 				}
 				if (mpte == NULL)
 					goto retry;
 				mpte->hold_count++;
 			} else {
 				mpte = _pmap_allocpte(pmap, ptepindex);
 			}
 		}
 	} else {
 		mpte = NULL;
 	}
 
 	/*
 	 * This call to vtopte makes the assumption that we are
 	 * entering the page into the current pmap.  In order to support
 	 * quick entry into any pmap, one would likely use pmap_pte_quick.
 	 * But that isn't as quick as vtopte.
 	 */
 	pte = vtopte(va);
 	if (*pte) {
 		if (mpte != NULL) {
 			vm_page_lock_queues();
 			pmap_unwire_pte_hold(pmap, mpte);
 			vm_page_unlock_queues();
 		}
 		return 0;
 	}
 
 	/*
 	 * Enter on the PV list if part of our managed memory. Note that we
 	 * raise IPL while manipulating pv_table since pmap_enter can be
 	 * called at interrupt time.
 	 */
 	if ((m->flags & (PG_FICTITIOUS|PG_UNMANAGED)) == 0)
 		pmap_insert_entry(pmap, va, mpte, m);
 
 	/*
 	 * Increment counters
 	 */
 	pmap->pm_stats.resident_count++;
 
 	pa = VM_PAGE_TO_PHYS(m);
 
 	/*
 	 * Now validate mapping with RO protection
 	 */
 	if (m->flags & (PG_FICTITIOUS|PG_UNMANAGED))
 		*pte = pa | PG_V | PG_U;
 	else
 		*pte = pa | PG_V | PG_U | PG_MANAGED;
 
 	return mpte;
 }
 
 /*
  * Make a temporary mapping for a physical address.  This is only intended
  * to be used for panic dumps.
  */
 void *
 pmap_kenter_temporary(vm_offset_t pa, int i)
 {
 	vm_offset_t va;
 
 	va = (vm_offset_t)crashdumpmap + (i * PAGE_SIZE);
 	pmap_kenter(va, pa);
 #ifndef I386_CPU
 	invlpg(va);
 #else
 	invltlb();
 #endif
 	return ((void *)crashdumpmap);
 }
 
 #define MAX_INIT_PT (96)
 /*
  * pmap_object_init_pt preloads the ptes for a given object
  * into the specified pmap.  This eliminates the blast of soft
  * faults on process startup and immediately after an mmap.
  */
 void
 pmap_object_init_pt(pmap_t pmap, vm_offset_t addr,
 		    vm_object_t object, vm_pindex_t pindex,
 		    vm_size_t size, int limit)
 {
 	vm_offset_t tmpidx;
 	int psize;
 	vm_page_t p, mpte;
 
 	if (pmap == NULL || object == NULL)
 		return;
 
 	/*
 	 * This code maps large physical mmap regions into the
 	 * processor address space.  Note that some shortcuts
 	 * are taken, but the code works.
 	 */
 	if (pseflag && (object->type == OBJT_DEVICE) &&
 	    ((addr & (NBPDR - 1)) == 0) && ((size & (NBPDR - 1)) == 0)) {
 		int i;
 		vm_page_t m[1];
 		unsigned int ptepindex;
 		int npdes;
 		pd_entry_t ptepa;
 
 		if (pmap->pm_pdir[ptepindex = (addr >> PDRSHIFT)])
 			return;
 
 retry:
 		p = vm_page_lookup(object, pindex);
 		if (p != NULL) {
 			vm_page_lock_queues();
 			if (vm_page_sleep_if_busy(p, FALSE, "init4p"))
 				goto retry;
 		} else {
 			p = vm_page_alloc(object, pindex, VM_ALLOC_NORMAL);
 			if (p == NULL)
 				return;
 			m[0] = p;
 
 			if (vm_pager_get_pages(object, m, 1, 0) != VM_PAGER_OK) {
 				vm_page_lock_queues();
 				vm_page_free(p);
 				vm_page_unlock_queues();
 				return;
 			}
 
 			p = vm_page_lookup(object, pindex);
 			vm_page_lock_queues();
 			vm_page_wakeup(p);
 		}
 		vm_page_unlock_queues();
 
 		ptepa = VM_PAGE_TO_PHYS(p);
 		if (ptepa & (NBPDR - 1)) {
 			return;
 		}
 
 		p->valid = VM_PAGE_BITS_ALL;
 
 		pmap->pm_stats.resident_count += size >> PAGE_SHIFT;
 		npdes = size >> PDRSHIFT;
 		for(i = 0; i < npdes; i++) {
 			pmap->pm_pdir[ptepindex] =
 			    ptepa | PG_U | PG_RW | PG_V | PG_PS;
 			ptepa += NBPDR;
 			ptepindex += 1;
 		}
 		pmap_invalidate_all(kernel_pmap);
 		return;
 	}
 
 	psize = i386_btop(size);
 
 	if ((object->type != OBJT_VNODE) ||
 	    ((limit & MAP_PREFAULT_PARTIAL) && (psize > MAX_INIT_PT) &&
 	     (object->resident_page_count > MAX_INIT_PT))) {
 		return;
 	}
 
 	if (psize + pindex > object->size) {
 		if (object->size < pindex)
 			return;
 		psize = object->size - pindex;
 	}
 
 	mpte = NULL;
 
 	if ((p = TAILQ_FIRST(&object->memq)) != NULL) {
 		if (p->pindex < pindex) {
 			p = vm_page_splay(pindex, object->root);
 			if ((object->root = p)->pindex < pindex)
 				p = TAILQ_NEXT(p, listq);
 		}
 	}
 	/*
 	 * Assert: the variable p is either (1) the page with the
 	 * least pindex greater than or equal to the parameter pindex
 	 * or (2) NULL.
 	 */
 	for (;
 	     p != NULL && (tmpidx = p->pindex - pindex) < psize;
 	     p = TAILQ_NEXT(p, listq)) {
 		/*
 		 * don't allow an madvise to blow away our really
 		 * free pages allocating pv entries.
 		 */
 		if ((limit & MAP_PREFAULT_MADVISE) &&
 		    cnt.v_free_count < cnt.v_free_reserved) {
 			break;
 		}
 		vm_page_lock_queues();
 		if ((p->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL &&
 		    (p->busy == 0) &&
 		    (p->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) {
 			if ((p->queue - p->pc) == PQ_CACHE)
 				vm_page_deactivate(p);
 			vm_page_busy(p);
 			vm_page_unlock_queues();
 			mpte = pmap_enter_quick(pmap, 
 				addr + i386_ptob(tmpidx), p, mpte);
 			vm_page_lock_queues();
 			vm_page_wakeup(p);
 		}
 		vm_page_unlock_queues();
 	}
 	return;
 }
 
 /*
  * pmap_prefault provides a quick way of clustering
  * pagefaults into a processes address space.  It is a "cousin"
  * of pmap_object_init_pt, except it runs at page fault time instead
  * of mmap time.
  */
 #define PFBAK 4
 #define PFFOR 4
 #define PAGEORDER_SIZE (PFBAK+PFFOR)
 
 static int pmap_prefault_pageorder[] = {
 	-1 * PAGE_SIZE, 1 * PAGE_SIZE,
 	-2 * PAGE_SIZE, 2 * PAGE_SIZE,
 	-3 * PAGE_SIZE, 3 * PAGE_SIZE,
 	-4 * PAGE_SIZE, 4 * PAGE_SIZE
 };
 
 void
 pmap_prefault(pmap, addra, entry)
 	pmap_t pmap;
 	vm_offset_t addra;
 	vm_map_entry_t entry;
 {
 	int i;
 	vm_offset_t starta;
 	vm_offset_t addr;
 	vm_pindex_t pindex;
 	vm_page_t m, mpte;
 	vm_object_t object;
 
 	if (!curthread || (pmap != vmspace_pmap(curthread->td_proc->p_vmspace)))
 		return;
 
 	object = entry->object.vm_object;
 
 	starta = addra - PFBAK * PAGE_SIZE;
 	if (starta < entry->start) {
 		starta = entry->start;
 	} else if (starta > addra) {
 		starta = 0;
 	}
 
 	mpte = NULL;
 	for (i = 0; i < PAGEORDER_SIZE; i++) {
 		vm_object_t lobject;
 		pt_entry_t *pte;
 
 		addr = addra + pmap_prefault_pageorder[i];
 		if (addr > addra + (PFFOR * PAGE_SIZE))
 			addr = 0;
 
 		if (addr < starta || addr >= entry->end)
 			continue;
 
 		if ((*pmap_pde(pmap, addr)) == 0) 
 			continue;
 
 		pte = vtopte(addr);
 		if (*pte)
 			continue;
 
 		pindex = ((addr - entry->start) + entry->offset) >> PAGE_SHIFT;
 		lobject = object;
 		for (m = vm_page_lookup(lobject, pindex);
 		    (!m && (lobject->type == OBJT_DEFAULT) && (lobject->backing_object));
 		    lobject = lobject->backing_object) {
 			if (lobject->backing_object_offset & PAGE_MASK)
 				break;
 			pindex += (lobject->backing_object_offset >> PAGE_SHIFT);
 			m = vm_page_lookup(lobject->backing_object, pindex);
 		}
 
 		/*
 		 * give-up when a page is not in memory
 		 */
 		if (m == NULL)
 			break;
 		vm_page_lock_queues();
 		if (((m->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) &&
 			(m->busy == 0) &&
 		    (m->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) {
 
 			if ((m->queue - m->pc) == PQ_CACHE) {
 				vm_page_deactivate(m);
 			}
 			vm_page_busy(m);
 			vm_page_unlock_queues();
 			mpte = pmap_enter_quick(pmap, addr, m, mpte);
 			vm_page_lock_queues();
 			vm_page_wakeup(m);
 		}
 		vm_page_unlock_queues();
 	}
 }
 
 /*
  *	Routine:	pmap_change_wiring
  *	Function:	Change the wiring attribute for a map/virtual-address
  *			pair.
  *	In/out conditions:
  *			The mapping must already exist in the pmap.
  */
 void
 pmap_change_wiring(pmap, va, wired)
 	register pmap_t pmap;
 	vm_offset_t va;
 	boolean_t wired;
 {
 	register pt_entry_t *pte;
 
 	if (pmap == NULL)
 		return;
 
 	pte = pmap_pte_quick(pmap, va);
 
 	if (wired && !pmap_pte_w(pte))
 		pmap->pm_stats.wired_count++;
 	else if (!wired && pmap_pte_w(pte))
 		pmap->pm_stats.wired_count--;
 
 	/*
 	 * Wiring is not a hardware characteristic so there is no need to
 	 * invalidate TLB.
 	 */
 	pmap_pte_set_w(pte, wired);
 }
 
 
 
 /*
  *	Copy the range specified by src_addr/len
  *	from the source map to the range dst_addr/len
  *	in the destination map.
  *
  *	This routine is only advisory and need not do anything.
  */
 
 void
 pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len,
 	  vm_offset_t src_addr)
 {
 	vm_offset_t addr;
 	vm_offset_t end_addr = src_addr + len;
 	vm_offset_t pdnxt;
 	vm_page_t m;
 
 	if (dst_addr != src_addr)
 		return;
 
 	if (!pmap_is_current(src_pmap))
 		return;
 
 	for (addr = src_addr; addr < end_addr; addr = pdnxt) {
 		pt_entry_t *src_pte, *dst_pte;
 		vm_page_t dstmpte, srcmpte;
 		pd_entry_t srcptepaddr;
 		unsigned ptepindex;
 
 		if (addr >= UPT_MIN_ADDRESS)
 			panic("pmap_copy: invalid to pmap_copy page tables\n");
 
 		/*
 		 * Don't let optional prefaulting of pages make us go
 		 * way below the low water mark of free pages or way
 		 * above high water mark of used pv entries.
 		 */
 		if (cnt.v_free_count < cnt.v_free_reserved ||
 		    pv_entry_count > pv_entry_high_water)
 			break;
 		
 		pdnxt = (addr + NBPDR) & ~PDRMASK;
 		ptepindex = addr >> PDRSHIFT;
 
 		srcptepaddr = src_pmap->pm_pdir[ptepindex];
 		if (srcptepaddr == 0)
 			continue;
 			
 		if (srcptepaddr & PG_PS) {
 			if (dst_pmap->pm_pdir[ptepindex] == 0) {
 				dst_pmap->pm_pdir[ptepindex] = srcptepaddr;
 				dst_pmap->pm_stats.resident_count +=
 				    NBPDR / PAGE_SIZE;
 			}
 			continue;
 		}
 
 		srcmpte = vm_page_lookup(src_pmap->pm_pteobj, ptepindex);
 		if ((srcmpte == NULL) ||
 		    (srcmpte->hold_count == 0) || (srcmpte->flags & PG_BUSY))
 			continue;
 
 		if (pdnxt > end_addr)
 			pdnxt = end_addr;
 
 		src_pte = vtopte(addr);
 		while (addr < pdnxt) {
 			pt_entry_t ptetemp;
 			ptetemp = *src_pte;
 			/*
 			 * we only virtual copy managed pages
 			 */
 			if ((ptetemp & PG_MANAGED) != 0) {
 				/*
 				 * We have to check after allocpte for the
 				 * pte still being around...  allocpte can
 				 * block.
 				 */
 				dstmpte = pmap_allocpte(dst_pmap, addr);
 				dst_pte = pmap_pte_quick(dst_pmap, addr);
 				if ((*dst_pte == 0) && (ptetemp = *src_pte)) {
 					/*
 					 * Clear the modified and
 					 * accessed (referenced) bits
 					 * during the copy.
 					 */
 					m = PHYS_TO_VM_PAGE(ptetemp);
 					*dst_pte = ptetemp & ~(PG_M | PG_A);
 					dst_pmap->pm_stats.resident_count++;
 					pmap_insert_entry(dst_pmap, addr,
 						dstmpte, m);
 	 			} else {
 					vm_page_lock_queues();
 					pmap_unwire_pte_hold(dst_pmap, dstmpte);
 					vm_page_unlock_queues();
 				}
 				if (dstmpte->hold_count >= srcmpte->hold_count)
 					break;
 			}
 			addr += PAGE_SIZE;
 			src_pte++;
 		}
 	}
 }	
 
 #ifdef SMP
 
 /*
  *	pmap_zpi_switchin*()
  *
  *	These functions allow us to avoid doing IPIs alltogether in certain
  *	temporary page-mapping situations (page zeroing).  Instead to deal
  *	with being preempted and moved onto a different cpu we invalidate
  *	the page when the scheduler switches us in.  This does not occur
  *	very often so we remain relatively optimal with very little effort.
  */
 static void
 pmap_zpi_switchin12(void)
 {
 	invlpg((u_int)CADDR1);
 	invlpg((u_int)CADDR2);
 }
 
 static void
 pmap_zpi_switchin2(void)
 {
 	invlpg((u_int)CADDR2);
 }
 
 static void
 pmap_zpi_switchin3(void)
 {
 	invlpg((u_int)CADDR3);
 }
 
 #endif
 
 /*
  *	pmap_zero_page zeros the specified hardware page by mapping 
  *	the page into KVM and using bzero to clear its contents.
  */
 void
 pmap_zero_page(vm_page_t m)
 {
 
 	mtx_lock(&CMAPCADDR12_lock);
 	if (*CMAP2)
 		panic("pmap_zero_page: CMAP2 busy");
 	*CMAP2 = PG_V | PG_RW | VM_PAGE_TO_PHYS(m) | PG_A | PG_M;
 #ifdef I386_CPU
 	invltlb();
 #else
 #ifdef SMP
 	curthread->td_switchin = pmap_zpi_switchin2;
 #endif
 	invlpg((u_int)CADDR2);
 #endif
 #if defined(I686_CPU)
 	if (cpu_class == CPUCLASS_686)
 		i686_pagezero(CADDR2);
 	else
 #endif
 		bzero(CADDR2, PAGE_SIZE);
 #ifdef SMP
 	curthread->td_switchin = NULL;
 #endif
 	*CMAP2 = 0;
 	mtx_unlock(&CMAPCADDR12_lock);
 }
 
 /*
  *	pmap_zero_page_area zeros the specified hardware page by mapping 
  *	the page into KVM and using bzero to clear its contents.
  *
  *	off and size may not cover an area beyond a single hardware page.
  */
 void
 pmap_zero_page_area(vm_page_t m, int off, int size)
 {
 
 	mtx_lock(&CMAPCADDR12_lock);
 	if (*CMAP2)
 		panic("pmap_zero_page: CMAP2 busy");
 	*CMAP2 = PG_V | PG_RW | VM_PAGE_TO_PHYS(m) | PG_A | PG_M;
 #ifdef I386_CPU
 	invltlb();
 #else
 #ifdef SMP
 	curthread->td_switchin = pmap_zpi_switchin2;
 #endif
 	invlpg((u_int)CADDR2);
 #endif
 #if defined(I686_CPU)
 	if (cpu_class == CPUCLASS_686 && off == 0 && size == PAGE_SIZE)
 		i686_pagezero(CADDR2);
 	else
 #endif
 		bzero((char *)CADDR2 + off, size);
 #ifdef SMP
 	curthread->td_switchin = NULL;
 #endif
 	*CMAP2 = 0;
 	mtx_unlock(&CMAPCADDR12_lock);
 }
 
 /*
  *	pmap_zero_page_idle zeros the specified hardware page by mapping 
  *	the page into KVM and using bzero to clear its contents.  This
  *	is intended to be called from the vm_pagezero process only and
  *	outside of Giant.
  */
 void
 pmap_zero_page_idle(vm_page_t m)
 {
 
 	if (*CMAP3)
 		panic("pmap_zero_page: CMAP3 busy");
 	*CMAP3 = PG_V | PG_RW | VM_PAGE_TO_PHYS(m) | PG_A | PG_M;
 #ifdef I386_CPU
 	invltlb();
 #else
 #ifdef SMP
 	curthread->td_switchin = pmap_zpi_switchin3;
 #endif
 	invlpg((u_int)CADDR3);
 #endif
 #if defined(I686_CPU)
 	if (cpu_class == CPUCLASS_686)
 		i686_pagezero(CADDR3);
 	else
 #endif
 		bzero(CADDR3, PAGE_SIZE);
 #ifdef SMP
 	curthread->td_switchin = NULL;
 #endif
 	*CMAP3 = 0;
 }
 
 /*
  *	pmap_copy_page copies the specified (machine independent)
  *	page by mapping the page into virtual memory and using
  *	bcopy to copy the page, one machine dependent page at a
  *	time.
  */
 void
 pmap_copy_page(vm_page_t src, vm_page_t dst)
 {
 
 	mtx_lock(&CMAPCADDR12_lock);
 	if (*CMAP1)
 		panic("pmap_copy_page: CMAP1 busy");
 	if (*CMAP2)
 		panic("pmap_copy_page: CMAP2 busy");
 	*CMAP1 = PG_V | VM_PAGE_TO_PHYS(src) | PG_A;
 	*CMAP2 = PG_V | PG_RW | VM_PAGE_TO_PHYS(dst) | PG_A | PG_M;
 #ifdef I386_CPU
 	invltlb();
 #else
 #ifdef SMP
 	curthread->td_switchin = pmap_zpi_switchin12;
 #endif
 	invlpg((u_int)CADDR1);
 	invlpg((u_int)CADDR2);
 #endif
 	bcopy(CADDR1, CADDR2, PAGE_SIZE);
 #ifdef SMP
 	curthread->td_switchin = NULL;
 #endif
 	*CMAP1 = 0;
 	*CMAP2 = 0;
 	mtx_unlock(&CMAPCADDR12_lock);
 }
 
 /*
  * Returns true if the pmap's pv is one of the first
  * 16 pvs linked to from this page.  This count may
  * be changed upwards or downwards in the future; it
  * is only necessary that true be returned for a small
  * subset of pmaps for proper page aging.
  */
 boolean_t
 pmap_page_exists_quick(pmap, m)
 	pmap_t pmap;
 	vm_page_t m;
 {
 	pv_entry_t pv;
 	int loops = 0;
 	int s;
 
 	if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
 		return FALSE;
 
 	s = splvm();
 	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
 	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
 		if (pv->pv_pmap == pmap) {
 			splx(s);
 			return TRUE;
 		}
 		loops++;
 		if (loops >= 16)
 			break;
 	}
 	splx(s);
 	return (FALSE);
 }
 
 #define PMAP_REMOVE_PAGES_CURPROC_ONLY
 /*
  * Remove all pages from specified address space
  * this aids process exit speeds.  Also, this code
  * is special cased for current process only, but
  * can have the more generic (and slightly slower)
  * mode enabled.  This is much faster than pmap_remove
  * in the case of running down an entire address space.
  */
 void
 pmap_remove_pages(pmap, sva, eva)
 	pmap_t pmap;
 	vm_offset_t sva, eva;
 {
 	pt_entry_t *pte, tpte;
 	vm_page_t m;
 	pv_entry_t pv, npv;
 	int s;
 
 #ifdef PMAP_REMOVE_PAGES_CURPROC_ONLY
 	if (!curthread || (pmap != vmspace_pmap(curthread->td_proc->p_vmspace))) {
 		printf("warning: pmap_remove_pages called with non-current pmap\n");
 		return;
 	}
 #endif
 	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
 	s = splvm();
 	for (pv = TAILQ_FIRST(&pmap->pm_pvlist); pv; pv = npv) {
 
 		if (pv->pv_va >= eva || pv->pv_va < sva) {
 			npv = TAILQ_NEXT(pv, pv_plist);
 			continue;
 		}
 
 #ifdef PMAP_REMOVE_PAGES_CURPROC_ONLY
 		pte = vtopte(pv->pv_va);
 #else
 		pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va);
 #endif
 		tpte = *pte;
 
 		if (tpte == 0) {
 			printf("TPTE at %p  IS ZERO @ VA %08x\n",
 							pte, pv->pv_va);
 			panic("bad pte");
 		}
 
 /*
  * We cannot remove wired pages from a process' mapping at this time
  */
 		if (tpte & PG_W) {
 			npv = TAILQ_NEXT(pv, pv_plist);
 			continue;
 		}
 
 		m = PHYS_TO_VM_PAGE(tpte);
 		KASSERT(m->phys_addr == (tpte & PG_FRAME),
 		    ("vm_page_t %p phys_addr mismatch %016jx %016jx",
 		    m, (uintmax_t)m->phys_addr, (uintmax_t)tpte));
 
 		KASSERT(m < &vm_page_array[vm_page_array_size],
 			("pmap_remove_pages: bad tpte %#jx", (uintmax_t)tpte));
 
 		pv->pv_pmap->pm_stats.resident_count--;
 
 		*pte = 0;
 
 		/*
 		 * Update the vm_page_t clean and reference bits.
 		 */
 		if (tpte & PG_M) {
 			vm_page_dirty(m);
 		}
 
 		npv = TAILQ_NEXT(pv, pv_plist);
 		TAILQ_REMOVE(&pv->pv_pmap->pm_pvlist, pv, pv_plist);
 
 		m->md.pv_list_count--;
 		TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
 		if (TAILQ_FIRST(&m->md.pv_list) == NULL) {
 			vm_page_flag_clear(m, PG_WRITEABLE);
 		}
 
 		pmap_unuse_pt(pv->pv_pmap, pv->pv_va, pv->pv_ptem);
 		free_pv_entry(pv);
 	}
 	splx(s);
 	pmap_invalidate_all(pmap);
 }
 
 /*
  *	pmap_is_modified:
  *
  *	Return whether or not the specified physical page was modified
  *	in any physical maps.
  */
 boolean_t
 pmap_is_modified(vm_page_t m)
 {
 	pv_entry_t pv;
 	pt_entry_t *pte;
 	int s;
 
 	if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
 		return FALSE;
 
 	s = splvm();
 	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
 	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
 		/*
 		 * if the bit being tested is the modified bit, then
 		 * mark clean_map and ptes as never
 		 * modified.
 		 */
 		if (!pmap_track_modified(pv->pv_va))
 			continue;
 #if defined(PMAP_DIAGNOSTIC)
 		if (!pv->pv_pmap) {
 			printf("Null pmap (tb) at va: 0x%x\n", pv->pv_va);
 			continue;
 		}
 #endif
 		pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va);
 		if (*pte & PG_M) {
 			splx(s);
 			return TRUE;
 		}
 	}
 	splx(s);
 	return (FALSE);
 }
 
 /*
  * this routine is used to modify bits in ptes
  */
 static __inline void
 pmap_changebit(vm_page_t m, int bit, boolean_t setem)
 {
 	register pv_entry_t pv;
 	register pt_entry_t *pte;
 	int s;
 
 	if (!pmap_initialized || (m->flags & PG_FICTITIOUS) ||
 	    (!setem && bit == PG_RW && (m->flags & PG_WRITEABLE) == 0))
 		return;
 
 	s = splvm();
 	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
 	/*
 	 * Loop over all current mappings setting/clearing as appropos If
 	 * setting RO do we need to clear the VAC?
 	 */
 	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
 		/*
 		 * don't write protect pager mappings
 		 */
 		if (!setem && (bit == PG_RW)) {
 			if (!pmap_track_modified(pv->pv_va))
 				continue;
 		}
 
 #if defined(PMAP_DIAGNOSTIC)
 		if (!pv->pv_pmap) {
 			printf("Null pmap (cb) at va: 0x%x\n", pv->pv_va);
 			continue;
 		}
 #endif
 
 		pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va);
 
 		if (setem) {
 			*pte |= bit;
 			pmap_invalidate_page(pv->pv_pmap, pv->pv_va);
 		} else {
 			pt_entry_t pbits = *pte;
 			if (pbits & bit) {
 				if (bit == PG_RW) {
 					if (pbits & PG_M) {
 						vm_page_dirty(m);
 					}
 					*pte = pbits & ~(PG_M|PG_RW);
 				} else {
 					*pte = pbits & ~bit;
 				}
 				pmap_invalidate_page(pv->pv_pmap, pv->pv_va);
 			}
 		}
 	}
 	if (!setem && bit == PG_RW)
 		vm_page_flag_clear(m, PG_WRITEABLE);
 	splx(s);
 }
 
 /*
  *      pmap_page_protect:
  *
  *      Lower the permission for all mappings to a given page.
  */
 void
 pmap_page_protect(vm_page_t m, vm_prot_t prot)
 {
 	if ((prot & VM_PROT_WRITE) == 0) {
 		if (prot & (VM_PROT_READ | VM_PROT_EXECUTE)) {
 			pmap_changebit(m, PG_RW, FALSE);
 		} else {
 			pmap_remove_all(m);
 		}
 	}
 }
 
 /*
  *	pmap_ts_referenced:
  *
  *	Return a count of reference bits for a page, clearing those bits.
  *	It is not necessary for every reference bit to be cleared, but it
  *	is necessary that 0 only be returned when there are truly no
  *	reference bits set.
  *
  *	XXX: The exact number of bits to check and clear is a matter that
  *	should be tested and standardized at some point in the future for
  *	optimal aging of shared pages.
  */
 int
 pmap_ts_referenced(vm_page_t m)
 {
 	register pv_entry_t pv, pvf, pvn;
 	pt_entry_t *pte;
 	int s;
 	int rtval = 0;
 
 	if (!pmap_initialized || (m->flags & PG_FICTITIOUS))
 		return (rtval);
 
 	s = splvm();
 	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
 	if ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) {
 
 		pvf = pv;
 
 		do {
 			pvn = TAILQ_NEXT(pv, pv_list);
 
 			TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
 
 			TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
 
 			if (!pmap_track_modified(pv->pv_va))
 				continue;
 
 			pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va);
 
 			if (pte && (*pte & PG_A)) {
 				*pte &= ~PG_A;
 
 				pmap_invalidate_page(pv->pv_pmap, pv->pv_va);
 
 				rtval++;
 				if (rtval > 4) {
 					break;
 				}
 			}
 		} while ((pv = pvn) != NULL && pv != pvf);
 	}
 	splx(s);
 
 	return (rtval);
 }
 
 /*
  *	Clear the modify bits on the specified physical page.
  */
 void
 pmap_clear_modify(vm_page_t m)
 {
 	pmap_changebit(m, PG_M, FALSE);
 }
 
 /*
  *	pmap_clear_reference:
  *
  *	Clear the reference bit on the specified physical page.
  */
 void
 pmap_clear_reference(vm_page_t m)
 {
 	pmap_changebit(m, PG_A, FALSE);
 }
 
 /*
  * Miscellaneous support routines follow
  */
 
 static void
 i386_protection_init()
 {
 	register int *kp, prot;
 
 	kp = protection_codes;
 	for (prot = 0; prot < 8; prot++) {
 		switch (prot) {
 		case VM_PROT_NONE | VM_PROT_NONE | VM_PROT_NONE:
 			/*
 			 * Read access is also 0. There isn't any execute bit,
 			 * so just make it readable.
 			 */
 		case VM_PROT_READ | VM_PROT_NONE | VM_PROT_NONE:
 		case VM_PROT_READ | VM_PROT_NONE | VM_PROT_EXECUTE:
 		case VM_PROT_NONE | VM_PROT_NONE | VM_PROT_EXECUTE:
 			*kp++ = 0;
 			break;
 		case VM_PROT_NONE | VM_PROT_WRITE | VM_PROT_NONE:
 		case VM_PROT_NONE | VM_PROT_WRITE | VM_PROT_EXECUTE:
 		case VM_PROT_READ | VM_PROT_WRITE | VM_PROT_NONE:
 		case VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE:
 			*kp++ = PG_RW;
 			break;
 		}
 	}
 }
 
 /*
  * Map a set of physical memory pages into the kernel virtual
  * address space. Return a pointer to where it is mapped. This
  * routine is intended to be used for mapping device memory,
  * NOT real memory.
  */
 void *
 pmap_mapdev(pa, size)
 	vm_paddr_t pa;
 	vm_size_t size;
 {
 	vm_offset_t va, tmpva, offset;
 
 	offset = pa & PAGE_MASK;
 	size = roundup(offset + size, PAGE_SIZE);
 
 	GIANT_REQUIRED;
 
 	va = kmem_alloc_pageable(kernel_map, size);
 	if (!va)
 		panic("pmap_mapdev: Couldn't alloc kernel virtual memory");
 
 	pa = pa & PG_FRAME;
 	for (tmpva = va; size > 0; ) {
 		pmap_kenter(tmpva, pa);
 		size -= PAGE_SIZE;
 		tmpva += PAGE_SIZE;
 		pa += PAGE_SIZE;
 	}
 	pmap_invalidate_range(kernel_pmap, va, tmpva);
 	return ((void *)(va + offset));
 }
 
 void
 pmap_unmapdev(va, size)
 	vm_offset_t va;
 	vm_size_t size;
 {
 	vm_offset_t base, offset, tmpva;
 	pt_entry_t *pte;
 
 	base = va & PG_FRAME;
 	offset = va & PAGE_MASK;
 	size = roundup(offset + size, PAGE_SIZE);
 	for (tmpva = base; tmpva < (base + size); tmpva += PAGE_SIZE) {
 		pte = vtopte(tmpva);
 		*pte = 0;
 	}
 	pmap_invalidate_range(kernel_pmap, va, tmpva);
 	kmem_free(kernel_map, base, size);
 }
 
 /*
  * perform the pmap work for mincore
  */
 int
 pmap_mincore(pmap, addr)
 	pmap_t pmap;
 	vm_offset_t addr;
 {
 	pt_entry_t *ptep, pte;
 	vm_page_t m;
 	int val = 0;
 	
 	ptep = pmap_pte_quick(pmap, addr);
 	if (ptep == 0) {
 		return 0;
 	}
 
 	if ((pte = *ptep) != 0) {
 		vm_paddr_t pa;
 
 		val = MINCORE_INCORE;
 		if ((pte & PG_MANAGED) == 0)
 			return val;
 
 		pa = pte & PG_FRAME;
 
 		m = PHYS_TO_VM_PAGE(pa);
 
 		/*
 		 * Modified by us
 		 */
 		if (pte & PG_M)
 			val |= MINCORE_MODIFIED|MINCORE_MODIFIED_OTHER;
 		else {
 			/*
 			 * Modified by someone else
 			 */
 			vm_page_lock_queues();
 			if (m->dirty || pmap_is_modified(m))
 				val |= MINCORE_MODIFIED_OTHER;
 			vm_page_unlock_queues();
 		}
 		/*
 		 * Referenced by us
 		 */
 		if (pte & PG_A)
 			val |= MINCORE_REFERENCED|MINCORE_REFERENCED_OTHER;
 		else {
 			/*
 			 * Referenced by someone else
 			 */
 			vm_page_lock_queues();
 			if ((m->flags & PG_REFERENCED) ||
 			    pmap_ts_referenced(m)) {
 				val |= MINCORE_REFERENCED_OTHER;
 				vm_page_flag_set(m, PG_REFERENCED);
 			}
 			vm_page_unlock_queues();
 		}
 	} 
 	return val;
 }
 
 void
 pmap_activate(struct thread *td)
 {
 	struct proc *p = td->td_proc;
 	pmap_t	pmap;
 	u_int32_t  cr3;
 
 	pmap = vmspace_pmap(td->td_proc->p_vmspace);
 #if defined(SMP)
 	pmap->pm_active |= PCPU_GET(cpumask);
 #else
 	pmap->pm_active |= 1;
 #endif
+#ifdef PAE
+	cr3 = vtophys(pmap->pm_pdpt);
+#else
 	cr3 = vtophys(pmap->pm_pdir);
+#endif
 	/* XXXKSE this is wrong.
 	 * pmap_activate is for the current thread on the current cpu
 	 */
 	if (p->p_flag & P_THREADED) {
 		/* Make sure all other cr3 entries are updated. */
 		/* what if they are running?  XXXKSE (maybe abort them) */
 		FOREACH_THREAD_IN_PROC(p, td) {
 			td->td_pcb->pcb_cr3 = cr3;
 		}
 	} else {
 		td->td_pcb->pcb_cr3 = cr3;
 	}
 	load_cr3(cr3);
 #ifdef SWTCH_OPTIM_STATS
 	tlb_flush_count++;
 #endif
 }
 
 vm_offset_t
 pmap_addr_hint(vm_object_t obj, vm_offset_t addr, vm_size_t size)
 {
 
 	if ((obj == NULL) || (size < NBPDR) || (obj->type != OBJT_DEVICE)) {
 		return addr;
 	}
 
 	addr = (addr + (NBPDR - 1)) & ~(NBPDR - 1);
 	return addr;
 }
 
 
 #if defined(PMAP_DEBUG)
 pmap_pid_dump(int pid)
 {
 	pmap_t pmap;
 	struct proc *p;
 	int npte = 0;
 	int index;
 
 	sx_slock(&allproc_lock);
 	LIST_FOREACH(p, &allproc, p_list) {
 		if (p->p_pid != pid)
 			continue;
 
 		if (p->p_vmspace) {
 			int i,j;
 			index = 0;
 			pmap = vmspace_pmap(p->p_vmspace);
 			for (i = 0; i < NPDEPTD; i++) {
 				pd_entry_t *pde;
 				pt_entry_t *pte;
 				vm_offset_t base = i << PDRSHIFT;
 				
 				pde = &pmap->pm_pdir[i];
 				if (pde && pmap_pde_v(pde)) {
 					for (j = 0; j < NPTEPG; j++) {
 						vm_offset_t va = base + (j << PAGE_SHIFT);
 						if (va >= (vm_offset_t) VM_MIN_KERNEL_ADDRESS) {
 							if (index) {
 								index = 0;
 								printf("\n");
 							}
 							sx_sunlock(&allproc_lock);
 							return npte;
 						}
 						pte = pmap_pte_quick(pmap, va);
 						if (pte && pmap_pte_v(pte)) {
 							pt_entry_t pa;
 							vm_page_t m;
 							pa = *pte;
 							m = PHYS_TO_VM_PAGE(pa);
 							printf("va: 0x%x, pt: 0x%x, h: %d, w: %d, f: 0x%x",
 								va, pa, m->hold_count, m->wire_count, m->flags);
 							npte++;
 							index++;
 							if (index >= 2) {
 								index = 0;
 								printf("\n");
 							} else {
 								printf(" ");
 							}
 						}
 					}
 				}
 			}
 		}
 	}
 	sx_sunlock(&allproc_lock);
 	return npte;
 }
 #endif
 
 #if defined(DEBUG)
 
 static void	pads(pmap_t pm);
 void		pmap_pvdump(vm_offset_t pa);
 
 /* print address space of pmap*/
 static void
 pads(pm)
 	pmap_t pm;
 {
 	int i, j;
 	vm_paddr_t va;
 	pt_entry_t *ptep;
 
 	if (pm == kernel_pmap)
 		return;
 	for (i = 0; i < NPDEPTD; i++)
 		if (pm->pm_pdir[i])
 			for (j = 0; j < NPTEPG; j++) {
 				va = (i << PDRSHIFT) + (j << PAGE_SHIFT);
 				if (pm == kernel_pmap && va < KERNBASE)
 					continue;
 				if (pm != kernel_pmap && va > UPT_MAX_ADDRESS)
 					continue;
 				ptep = pmap_pte_quick(pm, va);
 				if (pmap_pte_v(ptep))
 					printf("%x:%x ", va, *ptep);
 			};
 
 }
 
 void
 pmap_pvdump(pa)
 	vm_paddr_t pa;
 {
 	pv_entry_t pv;
 	vm_page_t m;
 
 	printf("pa %x", pa);
 	m = PHYS_TO_VM_PAGE(pa);
 	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
 		printf(" -> pmap %p, va %x", (void *)pv->pv_pmap, pv->pv_va);
 		pads(pv->pv_pmap);
 	}
 	printf(" ");
 }
 #endif
Index: head/sys/i386/i386/vm86bios.s
===================================================================
--- head/sys/i386/i386/vm86bios.s	(revision 112840)
+++ head/sys/i386/i386/vm86bios.s	(revision 112841)
@@ -1,175 +1,178 @@
 /*-
  * Copyright (c) 1998 Jonathan Lemon
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #include "opt_npx.h"
 
 #include <machine/asmacros.h>		/* miscellaneous asm macros */
 #include <machine/trap.h>
 
 #include "assym.s"
 
 #define SCR_NEWPTD	PCB_ESI		/* readability macros */ 
 #define SCR_VMFRAME	PCB_EBP		/* see vm86.c for explanation */
 #define SCR_STACK	PCB_ESP
 #define SCR_PGTABLE	PCB_EBX
 #define SCR_ARGFRAME	PCB_EIP
 #define SCR_TSS0	PCB_SPARE
 #define SCR_TSS1	(PCB_SPARE+4)
 
 	.data
 	ALIGN_DATA
 
 	.globl	vm86pcb
 
 vm86pcb:		.long	0
 
 	.text
 
 /*
  * vm86_bioscall(struct trapframe_vm86 *vm86)
  */
 ENTRY(vm86_bioscall)
 	movl	vm86pcb,%edx		/* scratch data area */
 	movl	4(%esp),%eax
 	movl	%eax,SCR_ARGFRAME(%edx)	/* save argument pointer */
 	pushl	%ebx
 	pushl	%ebp
 	pushl	%esi
 	pushl	%edi
 	pushl	%gs
 
 #ifdef DEV_NPX
 	pushfl
 	cli
 	movl	PCPU(CURTHREAD),%ecx
 	cmpl	%ecx,PCPU(FPCURTHREAD)	/* do we need to save fp? */
 	jne	1f
 	testl	%ecx,%ecx
 	je 	1f			/* no curproc/npxproc */
 	pushl	%edx
 	movl	TD_PCB(%ecx),%ecx
 	addl	$PCB_SAVEFPU,%ecx
 	pushl	%ecx
 	call	npxsave
 	popl	%ecx
 	popl	%edx			/* recover our pcb */
 1:
 	popfl
 #endif
 
 	movl	SCR_VMFRAME(%edx),%ebx	/* target frame location */
 	movl	%ebx,%edi		/* destination */
 	movl    SCR_ARGFRAME(%edx),%esi	/* source (set on entry) */
 	movl	$VM86_FRAMESIZE/4,%ecx	/* sizeof(struct vm86frame)/4 */
 	cld
 	rep
 	movsl				/* copy frame to new stack */
 
 	movl	PCPU(CURPCB),%eax
 	pushl	%eax			/* save curpcb */
 	movl	%edx,PCPU(CURPCB)	/* set curpcb to vm86pcb */
 
 	movl	PCPU(TSS_GDT),%ebx	/* entry in GDT */
 	movl	0(%ebx),%eax
 	movl	%eax,SCR_TSS0(%edx)	/* save first word */
 	movl	4(%ebx),%eax
 	andl    $~0x200, %eax		/* flip 386BSY -> 386TSS */
 	movl	%eax,SCR_TSS1(%edx)	/* save second word */
 
 	movl	PCB_EXT(%edx),%edi	/* vm86 tssd entry */
 	movl	0(%edi),%eax
 	movl	%eax,0(%ebx)
 	movl	4(%edi),%eax
 	movl	%eax,4(%ebx)
 	movl	$GPROC0_SEL*8,%esi	/* GSEL(entry, SEL_KPL) */
 	ltr	%si
 
 	movl	%cr3,%eax
 	pushl	%eax			/* save address space */
 	movl	IdlePTD,%ecx
 	movl	%ecx,%ebx
 	addl	$KERNBASE,%ebx		/* va of Idle PTD */
 	movl	0(%ebx),%eax
 	pushl	%eax			/* old ptde != 0 when booting */
 	pushl	%ebx			/* keep for reuse */
 
 	movl	%esp,SCR_STACK(%edx)	/* save current stack location */
 
 	movl	SCR_NEWPTD(%edx),%eax	/* mapping for vm86 page table */
 	movl	%eax,0(%ebx)		/* ... install as PTD entry 0 */
 
+#ifdef PAE
+	movl	IdlePDPT,%ecx
+#endif
 	movl	%ecx,%cr3		/* new page tables */
 	movl	SCR_VMFRAME(%edx),%esp	/* switch to new stack */
 	
 	call	vm86_prepcall		/* finish setup */
 
 	/*
 	 * Return via doreti
 	 */
 	MEXITCOUNT
 	jmp	doreti
 
 
 /*
  * vm86_biosret(struct trapframe_vm86 *vm86)
  */
 ENTRY(vm86_biosret)
 	movl	vm86pcb,%edx		/* data area */
 
 	movl	4(%esp),%esi		/* source */
 	movl	SCR_ARGFRAME(%edx),%edi	/* destination */
 	movl	$VM86_FRAMESIZE/4,%ecx	/* size */
 	cld
 	rep
 	movsl				/* copy frame to original frame */
 
 	movl	SCR_STACK(%edx),%esp	/* back to old stack */
 	popl	%ebx			/* saved va of Idle PTD */
 	popl	%eax
 	movl	%eax,0(%ebx)		/* restore old pte */
 	popl	%eax
 	movl	%eax,%cr3		/* install old page table */
 
 	movl	PCPU(TSS_GDT),%ebx		/* entry in GDT */
 	movl	SCR_TSS0(%edx),%eax
 	movl	%eax,0(%ebx)		/* restore first word */
 	movl	SCR_TSS1(%edx),%eax
 	movl	%eax,4(%ebx)		/* restore second word */
 	movl	$GPROC0_SEL*8,%esi	/* GSEL(entry, SEL_KPL) */
 	ltr	%si
 	
 	popl	PCPU(CURPCB)		/* restore curpcb/curproc */
 	movl	SCR_ARGFRAME(%edx),%edx	/* original stack frame */
 	movl	TF_TRAPNO(%edx),%eax	/* return (trapno) */
 
 	popl	%gs
 	popl	%edi
 	popl	%esi
 	popl	%ebp
 	popl	%ebx
 	ret				/* back to our normal program */
Index: head/sys/i386/i386/vm_machdep.c
===================================================================
--- head/sys/i386/i386/vm_machdep.c	(revision 112840)
+++ head/sys/i386/i386/vm_machdep.c	(revision 112841)
@@ -1,559 +1,567 @@
 /*-
  * Copyright (c) 1982, 1986 The Regents of the University of California.
  * Copyright (c) 1989, 1990 William Jolitz
  * Copyright (c) 1994 John Dyson
  * All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * the Systems Programming Group of the University of Utah Computer
  * Science Department, and William Jolitz.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	from: @(#)vm_machdep.c	7.3 (Berkeley) 5/13/91
  *	Utah $Hdr: vm_machdep.c 1.16.1.1 89/06/23$
  * $FreeBSD$
  */
 
 #include "opt_npx.h"
 #ifdef PC98
 #include "opt_pc98.h"
 #endif
 #include "opt_reset.h"
 #include "opt_isa.h"
 #include "opt_kstack_pages.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/malloc.h>
 #include <sys/proc.h>
 #include <sys/kse.h>
 #include <sys/bio.h>
 #include <sys/buf.h>
 #include <sys/vnode.h>
 #include <sys/vmmeter.h>
 #include <sys/kernel.h>
 #include <sys/ktr.h>
 #include <sys/mutex.h>
 #include <sys/smp.h>
 #include <sys/sysctl.h>
 #include <sys/unistd.h>
 
 #include <machine/cpu.h>
 #include <machine/md_var.h>
 #include <machine/pcb.h>
 #include <machine/pcb_ext.h>
 #include <machine/vm86.h>
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <sys/lock.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_page.h>
 #include <vm/vm_map.h>
 #include <vm/vm_extern.h>
 
 #include <sys/user.h>
 
 #ifdef PC98
 #include <pc98/pc98/pc98.h>
 #else
 #include <i386/isa/isa.h>
 #endif
 
 static void	cpu_reset_real(void);
 #ifdef SMP
 static void	cpu_reset_proxy(void);
 static u_int	cpu_reset_proxyid;
 static volatile u_int	cpu_reset_proxy_active;
 #endif
 extern int	_ucodesel, _udatasel;
 
 /*
  * Finish a fork operation, with process p2 nearly set up.
  * Copy and update the pcb, set up the stack so that the child
  * ready to run and return to user mode.
  */
 void
 cpu_fork(td1, p2, td2, flags)
 	register struct thread *td1;
 	register struct proc *p2;
 	struct thread *td2;
 	int flags;
 {
 	register struct proc *p1;
 	struct pcb *pcb2;
 	struct mdproc *mdp2;
 #ifdef DEV_NPX
 	register_t savecrit;
 #endif
 
 	p1 = td1->td_proc;
 	if ((flags & RFPROC) == 0) {
 		if ((flags & RFMEM) == 0) {
 			/* unshare user LDT */
 			struct mdproc *mdp1 = &p1->p_md;
 			struct proc_ldt *pldt = mdp1->md_ldt;
 			if (pldt && pldt->ldt_refcnt > 1) {
 				pldt = user_ldt_alloc(mdp1, pldt->ldt_len);
 				if (pldt == NULL)
 					panic("could not copy LDT");
 				mdp1->md_ldt = pldt;
 				set_user_ldt(mdp1);
 				user_ldt_free(td1);
 			}
 		}
 		return;
 	}
 
 	/* Ensure that p1's pcb is up to date. */
 #ifdef DEV_NPX
 	if (td1 == curthread)
 		td1->td_pcb->pcb_gs = rgs();
 	savecrit = intr_disable();
 	if (PCPU_GET(fpcurthread) == td1)
 		npxsave(&td1->td_pcb->pcb_save);
 	intr_restore(savecrit);
 #endif
 
 	/* Point the pcb to the top of the stack */
 	pcb2 = (struct pcb *)(td2->td_kstack + KSTACK_PAGES * PAGE_SIZE) - 1;
 	td2->td_pcb = pcb2;
 
 	/* Copy p1's pcb */
 	bcopy(td1->td_pcb, pcb2, sizeof(*pcb2));
 
 	/* Point mdproc and then copy over td1's contents */
 	mdp2 = &p2->p_md;
 	bcopy(&p1->p_md, mdp2, sizeof(*mdp2));
 
 	/*
 	 * Create a new fresh stack for the new process.
 	 * Copy the trap frame for the return to user mode as if from a
 	 * syscall.  This copies most of the user mode register values.
 	 * The -16 is so we can expand the trapframe if we go to vm86.
 	 */
 	td2->td_frame = (struct trapframe *)((caddr_t)td2->td_pcb - 16) - 1;
 	bcopy(td1->td_frame, td2->td_frame, sizeof(struct trapframe));
 
 	td2->td_frame->tf_eax = 0;		/* Child returns zero */
 	td2->td_frame->tf_eflags &= ~PSL_C;	/* success */
 	td2->td_frame->tf_edx = 1;
 
 	/*
 	 * Set registers for trampoline to user mode.  Leave space for the
 	 * return address on stack.  These are the kernel mode register values.
 	 */
+#ifdef PAE
+	pcb2->pcb_cr3 = vtophys(vmspace_pmap(p2->p_vmspace)->pm_pdpt);
+#else
 	pcb2->pcb_cr3 = vtophys(vmspace_pmap(p2->p_vmspace)->pm_pdir);
+#endif
 	pcb2->pcb_edi = 0;
 	pcb2->pcb_esi = (int)fork_return;	/* fork_trampoline argument */
 	pcb2->pcb_ebp = 0;
 	pcb2->pcb_esp = (int)td2->td_frame - sizeof(void *);
 	pcb2->pcb_ebx = (int)td2;		/* fork_trampoline argument */
 	pcb2->pcb_eip = (int)fork_trampoline;
 	pcb2->pcb_psl = td2->td_frame->tf_eflags & ~PSL_I; /* ints disabled */
 	/*-
 	 * pcb2->pcb_dr*:	cloned above.
 	 * pcb2->pcb_savefpu:	cloned above.
 	 * pcb2->pcb_flags:	cloned above.
 	 * pcb2->pcb_onfault:	cloned above (always NULL here?).
 	 * pcb2->pcb_gs:	cloned above.
 	 * pcb2->pcb_ext:	cleared below.
 	 */
 
 	/*
 	 * XXX don't copy the i/o pages.  this should probably be fixed.
 	 */
 	pcb2->pcb_ext = 0;
 
         /* Copy the LDT, if necessary. */
 	mtx_lock_spin(&sched_lock);
         if (mdp2->md_ldt != 0) {
 		if (flags & RFMEM) {
 			mdp2->md_ldt->ldt_refcnt++;
 		} else {
 			mdp2->md_ldt = user_ldt_alloc(mdp2,
 			    mdp2->md_ldt->ldt_len);
 			if (mdp2->md_ldt == NULL)
 				panic("could not copy LDT");
 		}
         }
 	mtx_unlock_spin(&sched_lock);
 
 	/*
 	 * Now, cpu_switch() can schedule the new process.
 	 * pcb_esp is loaded pointing to the cpu_switch() stack frame
 	 * containing the return address when exiting cpu_switch.
 	 * This will normally be to fork_trampoline(), which will have
 	 * %ebx loaded with the new proc's pointer.  fork_trampoline()
 	 * will set up a stack to call fork_return(p, frame); to complete
 	 * the return to user-mode.
 	 */
 }
 
 /*
  * Intercept the return address from a freshly forked process that has NOT
  * been scheduled yet.
  *
  * This is needed to make kernel threads stay in kernel mode.
  */
 void
 cpu_set_fork_handler(td, func, arg)
 	struct thread *td;
 	void (*func)(void *);
 	void *arg;
 {
 	/*
 	 * Note that the trap frame follows the args, so the function
 	 * is really called like this:  func(arg, frame);
 	 */
 	td->td_pcb->pcb_esi = (int) func;	/* function */
 	td->td_pcb->pcb_ebx = (int) arg;	/* first arg */
 }
 
 void
 cpu_exit(struct thread *td)
 {
 	struct mdproc *mdp;
 
 	mdp = &td->td_proc->p_md;
 	if (mdp->md_ldt)
 		user_ldt_free(td);
 	reset_dbregs();
 }
 
 void
 cpu_thread_exit(struct thread *td)
 {
 	struct pcb *pcb = td->td_pcb; 
 #ifdef DEV_NPX
 	npxexit(td);
 #endif
         if (pcb->pcb_flags & PCB_DBREGS) {
                 /*
                  * disable all hardware breakpoints
                  */
                 reset_dbregs();
                 pcb->pcb_flags &= ~PCB_DBREGS;
         }
 }
 
 void
 cpu_thread_clean(struct thread *td)
 {
 	struct pcb *pcb;
 
 	pcb = td->td_pcb; 
 	if (pcb->pcb_ext != 0) {
 		/* XXXKSE  XXXSMP  not SMP SAFE.. what locks do we have? */
 		/* if (pcb->pcb_ext->ext_refcount-- == 1) ?? */
 		/*
 		 * XXX do we need to move the TSS off the allocated pages
 		 * before freeing them?  (not done here)
 		 */
 		mtx_lock(&Giant);
 		kmem_free(kernel_map, (vm_offset_t)pcb->pcb_ext,
 		    ctob(IOPAGES + 1));
 		mtx_unlock(&Giant);
 		pcb->pcb_ext = 0;
 	}
 }
 
 void
 cpu_sched_exit(td)
 	register struct thread *td;
 {
 }
 
 void
 cpu_thread_setup(struct thread *td)
 {
 
 	td->td_pcb =
 	     (struct pcb *)(td->td_kstack + KSTACK_PAGES * PAGE_SIZE) - 1;
 	td->td_frame = (struct trapframe *)((caddr_t)td->td_pcb - 16) - 1;
 }
 
 /*
  * Initialize machine state (pcb and trap frame) for a new thread about to
  * upcall. Pu t enough state in the new thread's PCB to get it to go back 
  * userret(), where we can intercept it again to set the return (upcall)
  * Address and stack, along with those from upcals that are from other sources
  * such as those generated in thread_userret() itself.
  */
 void
 cpu_set_upcall(struct thread *td, void *pcb)
 {
 	struct pcb *pcb2;
 
 	/* Point the pcb to the top of the stack. */
 	pcb2 = td->td_pcb;
 
 	/*
 	 * Copy the upcall pcb.  This loads kernel regs.
 	 * Those not loaded individually below get their default
 	 * values here.
 	 *
 	 * XXXKSE It might be a good idea to simply skip this as
 	 * the values of the other registers may be unimportant.
 	 * This would remove any requirement for knowing the KSE
 	 * at this time (see the matching comment below for
 	 * more analysis) (need a good safe default).
 	 */
 	bcopy(pcb, pcb2, sizeof(*pcb2));
 
 	/*
 	 * Create a new fresh stack for the new thread.
 	 * The -16 is so we can expand the trapframe if we go to vm86.
 	 * Don't forget to set this stack value into whatever supplies
 	 * the address for the fault handlers.
 	 * The contexts are filled in at the time we actually DO the
 	 * upcall as only then do we know which KSE we got.
 	 */
 	td->td_frame = (struct trapframe *)((caddr_t)pcb2 - 16) - 1;
 
 	/*
 	 * Set registers for trampoline to user mode.  Leave space for the
 	 * return address on stack.  These are the kernel mode register values.
 	 */
+#ifdef PAE
+	pcb2->pcb_cr3 = vtophys(vmspace_pmap(td->td_proc->p_vmspace)->pm_pdpt);
+#else
 	pcb2->pcb_cr3 = vtophys(vmspace_pmap(td->td_proc->p_vmspace)->pm_pdir);
+#endif
 	pcb2->pcb_edi = 0;
 	pcb2->pcb_esi = (int)fork_return;		    /* trampoline arg */
 	pcb2->pcb_ebp = 0;
 	pcb2->pcb_esp = (int)td->td_frame - sizeof(void *); /* trampoline arg */
 	pcb2->pcb_ebx = (int)td;			    /* trampoline arg */
 	pcb2->pcb_eip = (int)fork_trampoline;
 	pcb2->pcb_psl &= ~(PSL_I);	/* interrupts must be disabled */
 	/*
 	 * If we didn't copy the pcb, we'd need to do the following registers:
 	 * pcb2->pcb_dr*:	cloned above.
 	 * pcb2->pcb_savefpu:	cloned above.
 	 * pcb2->pcb_flags:	cloned above.
 	 * pcb2->pcb_onfault:	cloned above (always NULL here?).
 	 * pcb2->pcb_gs:	cloned above.  XXXKSE ???
 	 * pcb2->pcb_ext:	cleared below.
 	 */
 	 pcb2->pcb_ext = NULL;
 }
 
 /*
  * Set that machine state for performing an upcall that has to
  * be done in thread_userret() so that those upcalls generated
  * in thread_userret() itself can be done as well.
  */
 void
 cpu_set_upcall_kse(struct thread *td, struct kse_upcall *ku)
 {
 
 	/* 
 	 * Do any extra cleaning that needs to be done.
 	 * The thread may have optional components
 	 * that are not present in a fresh thread.
 	 * This may be a recycled thread so make it look
 	 * as though it's newly allocated.
 	 */
 	cpu_thread_clean(td);
 
 	/*
 	 * Set the trap frame to point at the beginning of the uts
 	 * function.
 	 */
 	td->td_frame->tf_esp =
 	    (int)ku->ku_stack.ss_sp + ku->ku_stack.ss_size - 16;
 	td->td_frame->tf_eip = (int)ku->ku_func;
 
 	/*
 	 * Pass the address of the mailbox for this kse to the uts
 	 * function as a parameter on the stack.
 	 */
 	suword((void *)(td->td_frame->tf_esp + sizeof(void *)),
 	    (int)ku->ku_mailbox);
 }
 
 void
 cpu_wait(p)
 	struct proc *p;
 {
 }
 
 /*
  * Convert kernel VA to physical address
  */
 vm_paddr_t
 kvtop(void *addr)
 {
 	vm_paddr_t pa;
 
 	pa = pmap_kextract((vm_offset_t)addr);
 	if (pa == 0)
 		panic("kvtop: zero page frame");
 	return (pa);
 }
 
 /*
  * Force reset the processor by invalidating the entire address space!
  */
 
 #ifdef SMP
 static void
 cpu_reset_proxy()
 {
 
 	cpu_reset_proxy_active = 1;
 	while (cpu_reset_proxy_active == 1)
 		;	 /* Wait for other cpu to see that we've started */
 	stop_cpus((1<<cpu_reset_proxyid));
 	printf("cpu_reset_proxy: Stopped CPU %d\n", cpu_reset_proxyid);
 	DELAY(1000000);
 	cpu_reset_real();
 }
 #endif
 
 void
 cpu_reset()
 {
 #ifdef SMP
 	if (smp_active == 0) {
 		cpu_reset_real();
 		/* NOTREACHED */
 	} else {
 
 		u_int map;
 		int cnt;
 		printf("cpu_reset called on cpu#%d\n", PCPU_GET(cpuid));
 
 		map = PCPU_GET(other_cpus) & ~ stopped_cpus;
 
 		if (map != 0) {
 			printf("cpu_reset: Stopping other CPUs\n");
 			stop_cpus(map);		/* Stop all other CPUs */
 		}
 
 		if (PCPU_GET(cpuid) == 0) {
 			DELAY(1000000);
 			cpu_reset_real();
 			/* NOTREACHED */
 		} else {
 			/* We are not BSP (CPU #0) */
 
 			cpu_reset_proxyid = PCPU_GET(cpuid);
 			cpustop_restartfunc = cpu_reset_proxy;
 			cpu_reset_proxy_active = 0;
 			printf("cpu_reset: Restarting BSP\n");
 			started_cpus = (1<<0);		/* Restart CPU #0 */
 
 			cnt = 0;
 			while (cpu_reset_proxy_active == 0 && cnt < 10000000)
 				cnt++;	/* Wait for BSP to announce restart */
 			if (cpu_reset_proxy_active == 0)
 				printf("cpu_reset: Failed to restart BSP\n");
 			enable_intr();
 			cpu_reset_proxy_active = 2;
 
 			while (1);
 			/* NOTREACHED */
 		}
 	}
 #else
 	cpu_reset_real();
 #endif
 }
 
 static void
 cpu_reset_real()
 {
 
 #ifdef PC98
 	/*
 	 * Attempt to do a CPU reset via CPU reset port.
 	 */
 	disable_intr();
 	if ((inb(0x35) & 0xa0) != 0xa0) {
 		outb(0x37, 0x0f);		/* SHUT0 = 0. */
 		outb(0x37, 0x0b);		/* SHUT1 = 0. */
 	}
 	outb(0xf0, 0x00);		/* Reset. */
 #else
 	/*
 	 * Attempt to do a CPU reset via the keyboard controller,
 	 * do not turn of the GateA20, as any machine that fails
 	 * to do the reset here would then end up in no man's land.
 	 */
 
 #if !defined(BROKEN_KEYBOARD_RESET)
 	outb(IO_KBD + 4, 0xFE);
 	DELAY(500000);	/* wait 0.5 sec to see if that did it */
 	printf("Keyboard reset did not work, attempting CPU shutdown\n");
 	DELAY(1000000);	/* wait 1 sec for printf to complete */
 #endif
 #endif /* PC98 */
 	/* force a shutdown by unmapping entire address space ! */
 	bzero((caddr_t)PTD, NBPTD);
 
 	/* "good night, sweet prince .... <THUNK!>" */
 	invltlb();
 	/* NOTREACHED */
 	while(1);
 }
 
 /*
  * Software interrupt handler for queued VM system processing.
  */   
 void  
 swi_vm(void *dummy) 
 {     
 	if (busdma_swi_pending != 0)
 		busdma_swi();
 }
 
 /*
  * Tell whether this address is in some physical memory region.
  * Currently used by the kernel coredump code in order to avoid
  * dumping the ``ISA memory hole'' which could cause indefinite hangs,
  * or other unpredictable behaviour.
  */
 
 int
 is_physical_memory(addr)
 	vm_offset_t addr;
 {
 
 #ifdef DEV_ISA
 	/* The ISA ``memory hole''. */
 	if (addr >= 0xa0000 && addr < 0x100000)
 		return 0;
 #endif
 
 	/*
 	 * stuff other tests for known memory-mapped devices (PCI?)
 	 * here
 	 */
 
 	return 1;
 }
Index: head/sys/i386/include/_types.h
===================================================================
--- head/sys/i386/include/_types.h	(revision 112840)
+++ head/sys/i386/include/_types.h	(revision 112841)
@@ -1,122 +1,126 @@
 /*-
  * Copyright (c) 2002 Mike Barcroft <mike@FreeBSD.org>
  * Copyright (c) 1990, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	From: @(#)ansi.h	8.2 (Berkeley) 1/4/94
  *	From: @(#)types.h	8.3 (Berkeley) 1/5/94
  * $FreeBSD$
  */
 
 #ifndef _MACHINE__TYPES_H_
 #define	_MACHINE__TYPES_H_
 
 /*
  * Basic types upon which most other types are built.
  */
 typedef	__signed char		__int8_t;
 typedef	unsigned char		__uint8_t;
 typedef	short			__int16_t;
 typedef	unsigned short		__uint16_t;
 typedef	int			__int32_t;
 typedef	unsigned int		__uint32_t;
 
 #if defined(lint)
 /* LONGLONG */
 typedef	long long		__int64_t;
 /* LONGLONG */
 typedef	unsigned long long	__uint64_t;
 #elif defined(__GNUC__)
 typedef	int __attribute__((__mode__(__DI__)))		__int64_t;
 typedef	unsigned int __attribute__((__mode__(__DI__)))	__uint64_t;
 #else
 /* LONGLONG */
 typedef	long long		__int64_t;
 /* LONGLONG */
 typedef	unsigned long long	__uint64_t;
 #endif
 
 /*
  * Standard type definitions.
  */
 typedef	unsigned long	__clock_t;		/* clock()... */
 typedef	__int32_t	__critical_t;
 typedef	double		__double_t;
 typedef	double		__float_t;
 typedef	__int32_t	__intfptr_t;
 typedef	__int64_t	__intmax_t;
 typedef	__int32_t	__intptr_t;
 typedef	__int32_t	__int_fast8_t;
 typedef	__int32_t	__int_fast16_t;
 typedef	__int32_t	__int_fast32_t;
 typedef	__int64_t	__int_fast64_t;
 typedef	__int8_t	__int_least8_t;
 typedef	__int16_t	__int_least16_t;
 typedef	__int32_t	__int_least32_t;
 typedef	__int64_t	__int_least64_t;
 typedef	__int32_t	__ptrdiff_t;		/* ptr1 - ptr2 */
 typedef	__int32_t	__register_t;
 typedef	__int32_t	__segsz_t;		/* segment size (in pages) */
 typedef	__uint32_t	__size_t;		/* sizeof() */
 typedef	__int32_t	__ssize_t;		/* byte count or error */
 typedef	__int32_t	__time_t;		/* time()... */
 typedef	__uint32_t	__uintfptr_t;
 typedef	__uint64_t	__uintmax_t;
 typedef	__uint32_t	__uintptr_t;
 typedef	__uint32_t	__uint_fast8_t;
 typedef	__uint32_t	__uint_fast16_t;
 typedef	__uint32_t	__uint_fast32_t;
 typedef	__uint64_t	__uint_fast64_t;
 typedef	__uint8_t	__uint_least8_t;
 typedef	__uint16_t	__uint_least16_t;
 typedef	__uint32_t	__uint_least32_t;
 typedef	__uint64_t	__uint_least64_t;
 typedef	__uint32_t	__u_register_t;
 typedef	__uint32_t	__vm_offset_t;
 typedef	__int64_t	__vm_ooffset_t;
+#ifdef PAE
+typedef	__uint64_t	__vm_paddr_t;
+#else
 typedef	__uint32_t	__vm_paddr_t;
+#endif
 typedef	__uint64_t	__vm_pindex_t;
 typedef	__uint32_t	__vm_size_t;
 
 /*
  * Unusual type definitions.
  */
 #if defined(__GNUC__) && (__GNUC__ == 2 && __GNUC_MINOR__ > 95 || __GNUC__ >= 3)
 typedef __builtin_va_list	__va_list;	/* internally known to gcc */
 #else
 typedef	char *			__va_list;
 #endif /* post GCC 2.95 */
 #if defined __GNUC__ && !defined(__GNUC_VA_LIST) && !defined(__NO_GNUC_VA_LIST)
 #define __GNUC_VA_LIST
 typedef __va_list		__gnuc_va_list;	/* compatibility w/GNU headers*/
 #endif
 
 #endif /* !_MACHINE__TYPES_H_ */
Index: head/sys/i386/include/bus_at386.h
===================================================================
--- head/sys/i386/include/bus_at386.h	(revision 112840)
+++ head/sys/i386/include/bus_at386.h	(revision 112841)
@@ -1,1216 +1,1224 @@
 /*	$NetBSD: bus.h,v 1.12 1997/10/01 08:25:15 fvdl Exp $	*/
 
 /*-
  * Copyright (c) 1996, 1997 The NetBSD Foundation, Inc.
  * All rights reserved.
  *
  * This code is derived from software contributed to The NetBSD Foundation
  * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
  * NASA Ames Research Center.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the NetBSD
  *	Foundation, Inc. and its contributors.
  * 4. Neither the name of The NetBSD Foundation nor the names of its
  *    contributors may be used to endorse or promote products derived
  *    from this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  * POSSIBILITY OF SUCH DAMAGE.
  */
 
 /*
  * Copyright (c) 1996 Charles M. Hannum.  All rights reserved.
  * Copyright (c) 1996 Christopher G. Demetriou.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *      This product includes software developed by Christopher G. Demetriou
  *	for the NetBSD Project.
  * 4. The name of the author may not be used to endorse or promote products
  *    derived from this software without specific prior written permission
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 /* $FreeBSD$ */
 
 #ifndef _I386_BUS_AT386_H_
 #define _I386_BUS_AT386_H_
 
 #include <machine/cpufunc.h>
 
 /*
  * To remain compatible with NetBSD's interface, default to both memio and
  * pio when neither of them is defined.
  */ 
 #if !defined(_I386_BUS_PIO_H_) && !defined(_I386_BUS_MEMIO_H_)
 #define _I386_BUS_PIO_H_
 #define _I386_BUS_MEMIO_H_
 #endif
 
 /*
  * Values for the i386 bus space tag, not to be used directly by MI code.
  */
 #define	I386_BUS_SPACE_IO	0	/* space is i/o space */
 #define I386_BUS_SPACE_MEM	1	/* space is mem space */
 
 /*
  * Bus address and size types
  */
-typedef u_int bus_addr_t;
-typedef u_int bus_size_t;
+#ifdef PAE
+typedef uint64_t bus_addr_t;
+#else
+typedef uint32_t bus_addr_t;
+#endif
+typedef uint32_t bus_size_t;
 
 #define BUS_SPACE_MAXSIZE_24BIT	0xFFFFFF
 #define BUS_SPACE_MAXSIZE_32BIT 0xFFFFFFFF
 #define BUS_SPACE_MAXSIZE	0xFFFFFFFF
 #define BUS_SPACE_MAXADDR_24BIT	0xFFFFFF
 #define BUS_SPACE_MAXADDR_32BIT 0xFFFFFFFF
+#ifdef PAE
+#define BUS_SPACE_MAXADDR	0xFFFFFFFFFFFFFFFFULL
+#else
 #define BUS_SPACE_MAXADDR	0xFFFFFFFF
+#endif
 
 #define BUS_SPACE_UNRESTRICTED	(~0)
 
 /*
  * Access methods for bus resources and address space.
  */
 typedef	int bus_space_tag_t;
 typedef	u_int bus_space_handle_t;
 
 /*
  * Map a region of device bus space into CPU virtual address space.
  */
 
 #define	BUS_SPACE_MAP_CACHEABLE		0x01
 #define	BUS_SPACE_MAP_LINEAR		0x02
 
 int	bus_space_map(bus_space_tag_t t, bus_addr_t addr, bus_size_t size,
 		      int flags, bus_space_handle_t *bshp);
 
 /*
  * Unmap a region of device bus space.
  */
 
 static __inline void bus_space_unmap(bus_space_tag_t t, bus_space_handle_t bsh,
 				     bus_size_t size);
 
 static __inline void
 bus_space_unmap(bus_space_tag_t t __unused, bus_space_handle_t bsh __unused,
 		bus_size_t size __unused)
 {
 }
 
 /*
  * Get a new handle for a subregion of an already-mapped area of bus space.
  */
 
 static __inline int bus_space_subregion(bus_space_tag_t t,
 					bus_space_handle_t bsh,
 					bus_size_t offset, bus_size_t size,
 					bus_space_handle_t *nbshp);
 
 static __inline int
 bus_space_subregion(bus_space_tag_t t __unused, bus_space_handle_t bsh,
 		    bus_size_t offset, bus_size_t size __unused,
 		    bus_space_handle_t *nbshp)
 {
 
 	*nbshp = bsh + offset;
 	return (0);
 }
 
 /*
  * Allocate a region of memory that is accessible to devices in bus space.
  */
 
 int	bus_space_alloc(bus_space_tag_t t, bus_addr_t rstart,
 			bus_addr_t rend, bus_size_t size, bus_size_t align,
 			bus_size_t boundary, int flags, bus_addr_t *addrp,
 			bus_space_handle_t *bshp);
 
 /*
  * Free a region of bus space accessible memory.
  */
 
 static __inline void bus_space_free(bus_space_tag_t t, bus_space_handle_t bsh,
 				    bus_size_t size);
 
 static __inline void
 bus_space_free(bus_space_tag_t t __unused, bus_space_handle_t bsh __unused,
 	       bus_size_t size __unused)
 {
 }
 
 
 #if defined(_I386_BUS_PIO_H_) || defined(_I386_BUS_MEMIO_H_)
 
 /*
  * Read a 1, 2, 4, or 8 byte quantity from bus space
  * described by tag/handle/offset.
  */
 static __inline u_int8_t bus_space_read_1(bus_space_tag_t tag,
 					  bus_space_handle_t handle,
 					  bus_size_t offset);
 
 static __inline u_int16_t bus_space_read_2(bus_space_tag_t tag,
 					   bus_space_handle_t handle,
 					   bus_size_t offset);
 
 static __inline u_int32_t bus_space_read_4(bus_space_tag_t tag,
 					   bus_space_handle_t handle,
 					   bus_size_t offset);
 
 static __inline u_int8_t
 bus_space_read_1(bus_space_tag_t tag, bus_space_handle_t handle,
 		 bus_size_t offset)
 {
 #if defined (_I386_BUS_PIO_H_)
 #if defined (_I386_BUS_MEMIO_H_)
 	if (tag == I386_BUS_SPACE_IO)
 #endif
 		return (inb(handle + offset));
 #endif
 #if defined (_I386_BUS_MEMIO_H_)
 	return (*(volatile u_int8_t *)(handle + offset));
 #endif
 }
 
 static __inline u_int16_t
 bus_space_read_2(bus_space_tag_t tag, bus_space_handle_t handle,
 		 bus_size_t offset)
 {
 #if defined(_I386_BUS_PIO_H_)
 #if defined(_I386_BUS_MEMIO_H_)
 	if (tag == I386_BUS_SPACE_IO)
 #endif
 		return (inw(handle + offset));
 #endif
 #if defined(_I386_BUS_MEMIO_H_)
 	return (*(volatile u_int16_t *)(handle + offset));
 #endif
 }
 
 static __inline u_int32_t
 bus_space_read_4(bus_space_tag_t tag, bus_space_handle_t handle,
 		 bus_size_t offset)
 {
 #if defined(_I386_BUS_PIO_H_)
 #if defined(_I386_BUS_MEMIO_H_)
 	if (tag == I386_BUS_SPACE_IO)
 #endif
 		return (inl(handle + offset));
 #endif
 #if defined(_I386_BUS_MEMIO_H_)
 	return (*(volatile u_int32_t *)(handle + offset));
 #endif
 }
 
 #if 0	/* Cause a link error for bus_space_read_8 */
 #define	bus_space_read_8(t, h, o)	!!! bus_space_read_8 unimplemented !!!
 #endif
 
 /*
  * Read `count' 1, 2, 4, or 8 byte quantities from bus space
  * described by tag/handle/offset and copy into buffer provided.
  */
 static __inline void bus_space_read_multi_1(bus_space_tag_t tag,
 					    bus_space_handle_t bsh,
 					    bus_size_t offset, u_int8_t *addr,
 					    size_t count);
 
 static __inline void bus_space_read_multi_2(bus_space_tag_t tag,
 					    bus_space_handle_t bsh,
 					    bus_size_t offset, u_int16_t *addr,
 					    size_t count);
 
 static __inline void bus_space_read_multi_4(bus_space_tag_t tag,
 					    bus_space_handle_t bsh,
 					    bus_size_t offset, u_int32_t *addr,
 					    size_t count);
 
 static __inline void
 bus_space_read_multi_1(bus_space_tag_t tag, bus_space_handle_t bsh,
 		       bus_size_t offset, u_int8_t *addr, size_t count)
 {
 #if defined(_I386_BUS_PIO_H_)
 #if defined(_I386_BUS_MEMIO_H_)
 	if (tag == I386_BUS_SPACE_IO)
 #endif
 		insb(bsh + offset, addr, count);
 #endif
 #if defined(_I386_BUS_MEMIO_H_)
 #if defined(_I386_BUS_PIO_H_)
 	else
 #endif
 	{
 #ifdef __GNUC__
 		__asm __volatile("				\n\
 			cld					\n\
 		1:	movb (%2),%%al				\n\
 			stosb					\n\
 			loop 1b"				:
 		    "=D" (addr), "=c" (count)			:
 		    "r" (bsh + offset), "0" (addr), "1" (count)	:
 		    "%eax", "memory");
 #endif
 	}
 #endif
 }
 
 static __inline void
 bus_space_read_multi_2(bus_space_tag_t tag, bus_space_handle_t bsh,
 		       bus_size_t offset, u_int16_t *addr, size_t count)
 {
 #if defined(_I386_BUS_PIO_H_)
 #if defined(_I386_BUS_MEMIO_H_)
 	if (tag == I386_BUS_SPACE_IO)
 #endif
 		insw(bsh + offset, addr, count);
 #endif
 #if defined(_I386_BUS_MEMIO_H_)
 #if defined(_I386_BUS_PIO_H_)
 	else
 #endif
 	{
 #ifdef __GNUC__
 		__asm __volatile("				\n\
 			cld					\n\
 		1:	movw (%2),%%ax				\n\
 			stosw					\n\
 			loop 1b"				:
 		    "=D" (addr), "=c" (count)			:
 		    "r" (bsh + offset), "0" (addr), "1" (count)	:
 		    "%eax", "memory");
 #endif
 	}
 #endif
 }
 
 static __inline void
 bus_space_read_multi_4(bus_space_tag_t tag, bus_space_handle_t bsh,
 		       bus_size_t offset, u_int32_t *addr, size_t count)
 {
 #if defined(_I386_BUS_PIO_H_)
 #if defined(_I386_BUS_MEMIO_H_)
 	if (tag == I386_BUS_SPACE_IO)
 #endif
 		insl(bsh + offset, addr, count);
 #endif
 #if defined(_I386_BUS_MEMIO_H_)
 #if defined(_I386_BUS_PIO_H_)
 	else
 #endif
 	{
 #ifdef __GNUC__
 		__asm __volatile("				\n\
 			cld					\n\
 		1:	movl (%2),%%eax				\n\
 			stosl					\n\
 			loop 1b"				:
 		    "=D" (addr), "=c" (count)			:
 		    "r" (bsh + offset), "0" (addr), "1" (count)	:
 		    "%eax", "memory");
 #endif
 	}
 #endif
 }
 
 #if 0	/* Cause a link error for bus_space_read_multi_8 */
 #define	bus_space_read_multi_8	!!! bus_space_read_multi_8 unimplemented !!!
 #endif
 
 /*
  * Read `count' 1, 2, 4, or 8 byte quantities from bus space
  * described by tag/handle and starting at `offset' and copy into
  * buffer provided.
  */
 static __inline void bus_space_read_region_1(bus_space_tag_t tag,
 					     bus_space_handle_t bsh,
 					     bus_size_t offset, u_int8_t *addr,
 					     size_t count);
 
 static __inline void bus_space_read_region_2(bus_space_tag_t tag,
 					     bus_space_handle_t bsh,
 					     bus_size_t offset, u_int16_t *addr,
 					     size_t count);
 
 static __inline void bus_space_read_region_4(bus_space_tag_t tag,
 					     bus_space_handle_t bsh,
 					     bus_size_t offset, u_int32_t *addr,
 					     size_t count);
 
 
 static __inline void
 bus_space_read_region_1(bus_space_tag_t tag, bus_space_handle_t bsh,
 			bus_size_t offset, u_int8_t *addr, size_t count)
 {
 #if defined(_I386_BUS_PIO_H_)
 #if defined(_I386_BUS_MEMIO_H_)
 	if (tag == I386_BUS_SPACE_IO)
 #endif
 	{
 		int _port_ = bsh + offset;
 #ifdef __GNUC__
 		__asm __volatile("				\n\
 			cld					\n\
 		1:	inb %w2,%%al				\n\
 			stosb					\n\
 			incl %2					\n\
 			loop 1b"				:
 		    "=D" (addr), "=c" (count), "=d" (_port_)	:
 		    "0" (addr), "1" (count), "2" (_port_)	:
 		    "%eax", "memory", "cc");
 #endif
 	}
 #endif
 #if defined(_I386_BUS_MEMIO_H_)
 #if defined(_I386_BUS_PIO_H_)
 	else
 #endif
 	{
 		int _port_ = bsh + offset;
 #ifdef __GNUC__
 		__asm __volatile("				\n\
 			cld					\n\
 			repne					\n\
 			movsb"					:
 		    "=D" (addr), "=c" (count), "=S" (_port_)	:
 		    "0" (addr), "1" (count), "2" (_port_)	:
 		    "memory", "cc");
 #endif
 	}
 #endif
 }
 
 static __inline void
 bus_space_read_region_2(bus_space_tag_t tag, bus_space_handle_t bsh,
 			bus_size_t offset, u_int16_t *addr, size_t count)
 {
 #if defined(_I386_BUS_PIO_H_)
 #if defined(_I386_BUS_MEMIO_H_)
 	if (tag == I386_BUS_SPACE_IO)
 #endif
 	{
 		int _port_ = bsh + offset;
 #ifdef	__GNUC__
 		__asm __volatile("				\n\
 			cld					\n\
 		1:	inw %w2,%%ax				\n\
 			stosw					\n\
 			addl $2,%2				\n\
 			loop 1b"				:
 		    "=D" (addr), "=c" (count), "=d" (_port_)	:
 		    "0" (addr), "1" (count), "2" (_port_)	:
 		    "%eax", "memory", "cc");
 #endif
 	}
 #endif
 #if defined(_I386_BUS_MEMIO_H_)
 #if defined(_I386_BUS_PIO_H_)
 	else
 #endif
 	{
 		int _port_ = bsh + offset;
 #ifdef	__GNUC__
 		__asm __volatile("				\n\
 			cld					\n\
 			repne					\n\
 			movsw"					:
 		    "=D" (addr), "=c" (count), "=S" (_port_)	:
 		    "0" (addr), "1" (count), "2" (_port_)	:
 		    "memory", "cc");
 #endif
 	}
 #endif
 }
 
 static __inline void
 bus_space_read_region_4(bus_space_tag_t tag, bus_space_handle_t bsh,
 			bus_size_t offset, u_int32_t *addr, size_t count)
 {
 #if defined(_I386_BUS_PIO_H_)
 #if defined(_I386_BUS_MEMIO_H_)
 	if (tag == I386_BUS_SPACE_IO)
 #endif
 	{
 		int _port_ = bsh + offset;
 #ifdef	__GNUC__
 		__asm __volatile("				\n\
 			cld					\n\
 		1:	inl %w2,%%eax				\n\
 			stosl					\n\
 			addl $4,%2				\n\
 			loop 1b"				:
 		    "=D" (addr), "=c" (count), "=d" (_port_)	:
 		    "0" (addr), "1" (count), "2" (_port_)	:
 		    "%eax", "memory", "cc");
 #endif
 	}
 #endif
 #if defined(_I386_BUS_MEMIO_H_)
 #if defined(_I386_BUS_PIO_H_)
 	else
 #endif
 	{
 		int _port_ = bsh + offset;
 #ifdef	__GNUC__
 		__asm __volatile("				\n\
 			cld					\n\
 			repne					\n\
 			movsl"					:
 		    "=D" (addr), "=c" (count), "=S" (_port_)	:
 		    "0" (addr), "1" (count), "2" (_port_)	:
 		    "memory", "cc");
 #endif
 	}
 #endif
 }
 
 #if 0	/* Cause a link error for bus_space_read_region_8 */
 #define	bus_space_read_region_8	!!! bus_space_read_region_8 unimplemented !!!
 #endif
 
 /*
  * Write the 1, 2, 4, or 8 byte value `value' to bus space
  * described by tag/handle/offset.
  */
 
 static __inline void bus_space_write_1(bus_space_tag_t tag,
 				       bus_space_handle_t bsh,
 				       bus_size_t offset, u_int8_t value);
 
 static __inline void bus_space_write_2(bus_space_tag_t tag,
 				       bus_space_handle_t bsh,
 				       bus_size_t offset, u_int16_t value);
 
 static __inline void bus_space_write_4(bus_space_tag_t tag,
 				       bus_space_handle_t bsh,
 				       bus_size_t offset, u_int32_t value);
 
 static __inline void
 bus_space_write_1(bus_space_tag_t tag, bus_space_handle_t bsh,
 		       bus_size_t offset, u_int8_t value)
 {
 #if defined(_I386_BUS_PIO_H_)
 #if defined(_I386_BUS_MEMIO_H_)
 	if (tag == I386_BUS_SPACE_IO)
 #endif
 		outb(bsh + offset, value);
 #endif
 #if defined(_I386_BUS_MEMIO_H_)
 #if defined(_I386_BUS_PIO_H_)
 	else
 #endif
 		*(volatile u_int8_t *)(bsh + offset) = value;
 #endif
 }
 
 static __inline void
 bus_space_write_2(bus_space_tag_t tag, bus_space_handle_t bsh,
 		       bus_size_t offset, u_int16_t value)
 {
 #if defined(_I386_BUS_PIO_H_)
 #if defined(_I386_BUS_MEMIO_H_)
 	if (tag == I386_BUS_SPACE_IO)
 #endif
 		outw(bsh + offset, value);
 #endif
 #if defined(_I386_BUS_MEMIO_H_)
 #if defined(_I386_BUS_PIO_H_)
 	else
 #endif
 		*(volatile u_int16_t *)(bsh + offset) = value;
 #endif
 }
 
 static __inline void
 bus_space_write_4(bus_space_tag_t tag, bus_space_handle_t bsh,
 		       bus_size_t offset, u_int32_t value)
 {
 #if defined(_I386_BUS_PIO_H_)
 #if defined(_I386_BUS_MEMIO_H_)
 	if (tag == I386_BUS_SPACE_IO)
 #endif
 		outl(bsh + offset, value);
 #endif
 #if defined(_I386_BUS_MEMIO_H_)
 #if defined(_I386_BUS_PIO_H_)
 	else
 #endif
 		*(volatile u_int32_t *)(bsh + offset) = value;
 #endif
 }
 
 #if 0	/* Cause a link error for bus_space_write_8 */
 #define	bus_space_write_8	!!! bus_space_write_8 not implemented !!!
 #endif
 
 /*
  * Write `count' 1, 2, 4, or 8 byte quantities from the buffer
  * provided to bus space described by tag/handle/offset.
  */
 
 static __inline void bus_space_write_multi_1(bus_space_tag_t tag,
 					     bus_space_handle_t bsh,
 					     bus_size_t offset,
 					     const u_int8_t *addr,
 					     size_t count);
 static __inline void bus_space_write_multi_2(bus_space_tag_t tag,
 					     bus_space_handle_t bsh,
 					     bus_size_t offset,
 					     const u_int16_t *addr,
 					     size_t count);
 
 static __inline void bus_space_write_multi_4(bus_space_tag_t tag,
 					     bus_space_handle_t bsh,
 					     bus_size_t offset,
 					     const u_int32_t *addr,
 					     size_t count);
 
 static __inline void
 bus_space_write_multi_1(bus_space_tag_t tag, bus_space_handle_t bsh,
 			bus_size_t offset, const u_int8_t *addr, size_t count)
 {
 #if defined(_I386_BUS_PIO_H_)
 #if defined(_I386_BUS_MEMIO_H_)
 	if (tag == I386_BUS_SPACE_IO)
 #endif
 		outsb(bsh + offset, addr, count);
 #endif
 #if defined(_I386_BUS_MEMIO_H_)
 #if defined(_I386_BUS_PIO_H_)
 	else
 #endif
 	{
 #ifdef	__GNUC__
 		__asm __volatile("				\n\
 			cld					\n\
 		1:	lodsb					\n\
 			movb %%al,(%2)				\n\
 			loop 1b"				:
 		    "=S" (addr), "=c" (count)			:
 		    "r" (bsh + offset), "0" (addr), "1" (count)	:
 		    "%eax", "memory", "cc");
 #endif
 	}
 #endif
 }
 
 static __inline void
 bus_space_write_multi_2(bus_space_tag_t tag, bus_space_handle_t bsh,
 			bus_size_t offset, const u_int16_t *addr, size_t count)
 {
 #if defined(_I386_BUS_PIO_H_)
 #if defined(_I386_BUS_MEMIO_H_)
 	if (tag == I386_BUS_SPACE_IO)
 #endif
 		outsw(bsh + offset, addr, count);
 #endif
 #if defined(_I386_BUS_MEMIO_H_)
 #if defined(_I386_BUS_PIO_H_)
 	else
 #endif
 	{
 #ifdef	__GNUC__
 		__asm __volatile("				\n\
 			cld					\n\
 		1:	lodsw					\n\
 			movw %%ax,(%2)				\n\
 			loop 1b"				:
 		    "=S" (addr), "=c" (count)			:
 		    "r" (bsh + offset), "0" (addr), "1" (count)	:
 		    "%eax", "memory", "cc");
 #endif
 	}
 #endif
 }
 
 static __inline void
 bus_space_write_multi_4(bus_space_tag_t tag, bus_space_handle_t bsh,
 			bus_size_t offset, const u_int32_t *addr, size_t count)
 {
 #if defined(_I386_BUS_PIO_H_)
 #if defined(_I386_BUS_MEMIO_H_)
 	if (tag == I386_BUS_SPACE_IO)
 #endif
 		outsl(bsh + offset, addr, count);
 #endif
 #if defined(_I386_BUS_MEMIO_H_)
 #if defined(_I386_BUS_PIO_H_)
 	else
 #endif
 	{
 #ifdef	__GNUC__
 		__asm __volatile("				\n\
 			cld					\n\
 		1:	lodsl					\n\
 			movl %%eax,(%2)				\n\
 			loop 1b"				:
 		    "=S" (addr), "=c" (count)			:
 		    "r" (bsh + offset), "0" (addr), "1" (count)	:
 		    "%eax", "memory", "cc");
 #endif
 	}
 #endif
 }
 
 #if 0	/* Cause a link error for bus_space_write_multi_8 */
 #define	bus_space_write_multi_8(t, h, o, a, c)				\
 			!!! bus_space_write_multi_8 unimplemented !!!
 #endif
 
 /*
  * Write `count' 1, 2, 4, or 8 byte quantities from the buffer provided
  * to bus space described by tag/handle starting at `offset'.
  */
 
 static __inline void bus_space_write_region_1(bus_space_tag_t tag,
 					      bus_space_handle_t bsh,
 					      bus_size_t offset,
 					      const u_int8_t *addr,
 					      size_t count);
 static __inline void bus_space_write_region_2(bus_space_tag_t tag,
 					      bus_space_handle_t bsh,
 					      bus_size_t offset,
 					      const u_int16_t *addr,
 					      size_t count);
 static __inline void bus_space_write_region_4(bus_space_tag_t tag,
 					      bus_space_handle_t bsh,
 					      bus_size_t offset,
 					      const u_int32_t *addr,
 					      size_t count);
 
 static __inline void
 bus_space_write_region_1(bus_space_tag_t tag, bus_space_handle_t bsh,
 			 bus_size_t offset, const u_int8_t *addr, size_t count)
 {
 #if defined(_I386_BUS_PIO_H_)
 #if defined(_I386_BUS_MEMIO_H_)
 	if (tag == I386_BUS_SPACE_IO)
 #endif
 	{
 		int _port_ = bsh + offset;
 #ifdef	__GNUC__
 		__asm __volatile("				\n\
 			cld					\n\
 		1:	lodsb					\n\
 			outb %%al,%w0				\n\
 			incl %0					\n\
 			loop 1b"				:
 		    "=d" (_port_), "=S" (addr), "=c" (count)	:
 		    "0" (_port_), "1" (addr), "2" (count)	:
 		    "%eax", "memory", "cc");
 #endif
 	}
 #endif
 #if defined(_I386_BUS_MEMIO_H_)
 #if defined(_I386_BUS_PIO_H_)
 	else
 #endif
 	{
 		int _port_ = bsh + offset;
 #ifdef	__GNUC__
 		__asm __volatile("				\n\
 			cld					\n\
 			repne					\n\
 			movsb"					:
 		    "=D" (_port_), "=S" (addr), "=c" (count)	:
 		    "0" (_port_), "1" (addr), "2" (count)	:
 		    "memory", "cc");
 #endif
 	}
 #endif
 }
 
 static __inline void
 bus_space_write_region_2(bus_space_tag_t tag, bus_space_handle_t bsh,
 			 bus_size_t offset, const u_int16_t *addr, size_t count)
 {
 #if defined(_I386_BUS_PIO_H_)
 #if defined(_I386_BUS_MEMIO_H_)
 	if (tag == I386_BUS_SPACE_IO)
 #endif
 	{
 		int _port_ = bsh + offset;
 #ifdef	__GNUC__
 		__asm __volatile("				\n\
 			cld					\n\
 		1:	lodsw					\n\
 			outw %%ax,%w0				\n\
 			addl $2,%0				\n\
 			loop 1b"				:
 		    "=d" (_port_), "=S" (addr), "=c" (count)	:
 		    "0" (_port_), "1" (addr), "2" (count)	:
 		    "%eax", "memory", "cc");
 #endif
 	}
 #endif
 #if defined(_I386_BUS_MEMIO_H_)
 #if defined(_I386_BUS_PIO_H_)
 	else
 #endif
 	{
 		int _port_ = bsh + offset;
 #ifdef	__GNUC__
 		__asm __volatile("				\n\
 			cld					\n\
 			repne					\n\
 			movsw"					:
 		    "=D" (_port_), "=S" (addr), "=c" (count)	:
 		    "0" (_port_), "1" (addr), "2" (count)	:
 		    "memory", "cc");
 #endif
 	}
 #endif
 }
 
 static __inline void
 bus_space_write_region_4(bus_space_tag_t tag, bus_space_handle_t bsh,
 			 bus_size_t offset, const u_int32_t *addr, size_t count)
 {
 #if defined(_I386_BUS_PIO_H_)
 #if defined(_I386_BUS_MEMIO_H_)
 	if (tag == I386_BUS_SPACE_IO)
 #endif
 	{
 		int _port_ = bsh + offset;
 #ifdef	__GNUC__
 		__asm __volatile("				\n\
 			cld					\n\
 		1:	lodsl					\n\
 			outl %%eax,%w0				\n\
 			addl $4,%0				\n\
 			loop 1b"				:
 		    "=d" (_port_), "=S" (addr), "=c" (count)	:
 		    "0" (_port_), "1" (addr), "2" (count)	:
 		    "%eax", "memory", "cc");
 #endif
 	}
 #endif
 #if defined(_I386_BUS_MEMIO_H_)
 #if defined(_I386_BUS_PIO_H_)
 	else
 #endif
 	{
 		int _port_ = bsh + offset;
 #ifdef	__GNUC__
 		__asm __volatile("				\n\
 			cld					\n\
 			repne					\n\
 			movsl"					:
 		    "=D" (_port_), "=S" (addr), "=c" (count)	:
 		    "0" (_port_), "1" (addr), "2" (count)	:
 		    "memory", "cc");
 #endif
 	}
 #endif
 }
 
 #if 0	/* Cause a link error for bus_space_write_region_8 */
 #define	bus_space_write_region_8					\
 			!!! bus_space_write_region_8 unimplemented !!!
 #endif
 
 /*
  * Write the 1, 2, 4, or 8 byte value `val' to bus space described
  * by tag/handle/offset `count' times.
  */
 
 static __inline void bus_space_set_multi_1(bus_space_tag_t tag,
 					   bus_space_handle_t bsh,
 					   bus_size_t offset,
 					   u_int8_t value, size_t count);
 static __inline void bus_space_set_multi_2(bus_space_tag_t tag,
 					   bus_space_handle_t bsh,
 					   bus_size_t offset,
 					   u_int16_t value, size_t count);
 static __inline void bus_space_set_multi_4(bus_space_tag_t tag,
 					   bus_space_handle_t bsh,
 					   bus_size_t offset,
 					   u_int32_t value, size_t count);
 
 static __inline void
 bus_space_set_multi_1(bus_space_tag_t tag, bus_space_handle_t bsh,
 		      bus_size_t offset, u_int8_t value, size_t count)
 {
 	bus_space_handle_t addr = bsh + offset;
 
 #if defined(_I386_BUS_PIO_H_)
 #if defined(_I386_BUS_MEMIO_H_)
 	if (tag == I386_BUS_SPACE_IO)
 #endif
 		while (count--)
 			outb(addr, value);
 #endif
 #if defined(_I386_BUS_MEMIO_H_)
 #if defined(_I386_BUS_PIO_H_)
 	else
 #endif
 		while (count--)
 			*(volatile u_int8_t *)(addr) = value;
 #endif
 }
 
 static __inline void
 bus_space_set_multi_2(bus_space_tag_t tag, bus_space_handle_t bsh,
 		     bus_size_t offset, u_int16_t value, size_t count)
 {
 	bus_space_handle_t addr = bsh + offset;
 
 #if defined(_I386_BUS_PIO_H_)
 #if defined(_I386_BUS_MEMIO_H_)
 	if (tag == I386_BUS_SPACE_IO)
 #endif
 		while (count--)
 			outw(addr, value);
 #endif
 #if defined(_I386_BUS_MEMIO_H_)
 #if defined(_I386_BUS_PIO_H_)
 	else
 #endif
 		while (count--)
 			*(volatile u_int16_t *)(addr) = value;
 #endif
 }
 
 static __inline void
 bus_space_set_multi_4(bus_space_tag_t tag, bus_space_handle_t bsh,
 		      bus_size_t offset, u_int32_t value, size_t count)
 {
 	bus_space_handle_t addr = bsh + offset;
 
 #if defined(_I386_BUS_PIO_H_)
 #if defined(_I386_BUS_MEMIO_H_)
 	if (tag == I386_BUS_SPACE_IO)
 #endif
 		while (count--)
 			outl(addr, value);
 #endif
 #if defined(_I386_BUS_MEMIO_H_)
 #if defined(_I386_BUS_PIO_H_)
 	else
 #endif
 		while (count--)
 			*(volatile u_int32_t *)(addr) = value;
 #endif
 }
 
 #if 0	/* Cause a link error for bus_space_set_multi_8 */
 #define	bus_space_set_multi_8 !!! bus_space_set_multi_8 unimplemented !!!
 #endif
 
 /*
  * Write `count' 1, 2, 4, or 8 byte value `val' to bus space described
  * by tag/handle starting at `offset'.
  */
 
 static __inline void bus_space_set_region_1(bus_space_tag_t tag,
 					    bus_space_handle_t bsh,
 					    bus_size_t offset, u_int8_t value,
 					    size_t count);
 static __inline void bus_space_set_region_2(bus_space_tag_t tag,
 					    bus_space_handle_t bsh,
 					    bus_size_t offset, u_int16_t value,
 					    size_t count);
 static __inline void bus_space_set_region_4(bus_space_tag_t tag,
 					    bus_space_handle_t bsh,
 					    bus_size_t offset, u_int32_t value,
 					    size_t count);
 
 static __inline void
 bus_space_set_region_1(bus_space_tag_t tag, bus_space_handle_t bsh,
 		       bus_size_t offset, u_int8_t value, size_t count)
 {
 	bus_space_handle_t addr = bsh + offset;
 
 #if defined(_I386_BUS_PIO_H_)
 #if defined(_I386_BUS_MEMIO_H_)
 	if (tag == I386_BUS_SPACE_IO)
 #endif
 		for (; count != 0; count--, addr++)
 			outb(addr, value);
 #endif
 #if defined(_I386_BUS_MEMIO_H_)
 #if defined(_I386_BUS_PIO_H_)
 	else
 #endif
 		for (; count != 0; count--, addr++)
 			*(volatile u_int8_t *)(addr) = value;
 #endif
 }
 
 static __inline void
 bus_space_set_region_2(bus_space_tag_t tag, bus_space_handle_t bsh,
 		       bus_size_t offset, u_int16_t value, size_t count)
 {
 	bus_space_handle_t addr = bsh + offset;
 
 #if defined(_I386_BUS_PIO_H_)
 #if defined(_I386_BUS_MEMIO_H_)
 	if (tag == I386_BUS_SPACE_IO)
 #endif
 		for (; count != 0; count--, addr += 2)
 			outw(addr, value);
 #endif
 #if defined(_I386_BUS_MEMIO_H_)
 #if defined(_I386_BUS_PIO_H_)
 	else
 #endif
 		for (; count != 0; count--, addr += 2)
 			*(volatile u_int16_t *)(addr) = value;
 #endif
 }
 
 static __inline void
 bus_space_set_region_4(bus_space_tag_t tag, bus_space_handle_t bsh,
 		       bus_size_t offset, u_int32_t value, size_t count)
 {
 	bus_space_handle_t addr = bsh + offset;
 
 #if defined(_I386_BUS_PIO_H_)
 #if defined(_I386_BUS_MEMIO_H_)
 	if (tag == I386_BUS_SPACE_IO)
 #endif
 		for (; count != 0; count--, addr += 4)
 			outl(addr, value);
 #endif
 #if defined(_I386_BUS_MEMIO_H_)
 #if defined(_I386_BUS_PIO_H_)
 	else
 #endif
 		for (; count != 0; count--, addr += 4)
 			*(volatile u_int32_t *)(addr) = value;
 #endif
 }
 
 #if 0	/* Cause a link error for bus_space_set_region_8 */
 #define	bus_space_set_region_8	!!! bus_space_set_region_8 unimplemented !!!
 #endif
 
 /*
  * Copy `count' 1, 2, 4, or 8 byte values from bus space starting
  * at tag/bsh1/off1 to bus space starting at tag/bsh2/off2.
  */
 
 static __inline void bus_space_copy_region_1(bus_space_tag_t tag,
 					     bus_space_handle_t bsh1,
 					     bus_size_t off1,
 					     bus_space_handle_t bsh2,
 					     bus_size_t off2, size_t count);
 
 static __inline void bus_space_copy_region_2(bus_space_tag_t tag,
 					     bus_space_handle_t bsh1,
 					     bus_size_t off1,
 					     bus_space_handle_t bsh2,
 					     bus_size_t off2, size_t count);
 
 static __inline void bus_space_copy_region_4(bus_space_tag_t tag,
 					     bus_space_handle_t bsh1,
 					     bus_size_t off1,
 					     bus_space_handle_t bsh2,
 					     bus_size_t off2, size_t count);
 
 static __inline void
 bus_space_copy_region_1(bus_space_tag_t tag, bus_space_handle_t bsh1,
 			bus_size_t off1, bus_space_handle_t bsh2,
 			bus_size_t off2, size_t count)
 {
 	bus_space_handle_t addr1 = bsh1 + off1;
 	bus_space_handle_t addr2 = bsh2 + off2;
 
 #if defined(_I386_BUS_PIO_H_)
 #if defined(_I386_BUS_MEMIO_H_)
 	if (tag == I386_BUS_SPACE_IO)
 #endif
 	{
 		if (addr1 >= addr2) {
 			/* src after dest: copy forward */
 			for (; count != 0; count--, addr1++, addr2++)
 				outb(addr2, inb(addr1));
 		} else {
 			/* dest after src: copy backwards */
 			for (addr1 += (count - 1), addr2 += (count - 1);
 			    count != 0; count--, addr1--, addr2--)
 				outb(addr2, inb(addr1));
 		}
 	}
 #endif
 #if defined(_I386_BUS_MEMIO_H_)
 #if defined(_I386_BUS_PIO_H_)
 	else
 #endif
 	{
 		if (addr1 >= addr2) {
 			/* src after dest: copy forward */
 			for (; count != 0; count--, addr1++, addr2++)
 				*(volatile u_int8_t *)(addr2) =
 				    *(volatile u_int8_t *)(addr1);
 		} else {
 			/* dest after src: copy backwards */
 			for (addr1 += (count - 1), addr2 += (count - 1);
 			    count != 0; count--, addr1--, addr2--)
 				*(volatile u_int8_t *)(addr2) =
 				    *(volatile u_int8_t *)(addr1);
 		}
 	}
 #endif
 }
 
 static __inline void
 bus_space_copy_region_2(bus_space_tag_t tag, bus_space_handle_t bsh1,
 			bus_size_t off1, bus_space_handle_t bsh2,
 			bus_size_t off2, size_t count)
 {
 	bus_space_handle_t addr1 = bsh1 + off1;
 	bus_space_handle_t addr2 = bsh2 + off2;
 
 #if defined(_I386_BUS_PIO_H_)
 #if defined(_I386_BUS_MEMIO_H_)
 	if (tag == I386_BUS_SPACE_IO)
 #endif
 	{
 		if (addr1 >= addr2) {
 			/* src after dest: copy forward */
 			for (; count != 0; count--, addr1 += 2, addr2 += 2)
 				outw(addr2, inw(addr1));
 		} else {
 			/* dest after src: copy backwards */
 			for (addr1 += 2 * (count - 1), addr2 += 2 * (count - 1);
 			    count != 0; count--, addr1 -= 2, addr2 -= 2)
 				outw(addr2, inw(addr1));
 		}
 	}
 #endif
 #if defined(_I386_BUS_MEMIO_H_)
 #if defined(_I386_BUS_PIO_H_)
 	else
 #endif
 	{
 		if (addr1 >= addr2) {
 			/* src after dest: copy forward */
 			for (; count != 0; count--, addr1 += 2, addr2 += 2)
 				*(volatile u_int16_t *)(addr2) =
 				    *(volatile u_int16_t *)(addr1);
 		} else {
 			/* dest after src: copy backwards */
 			for (addr1 += 2 * (count - 1), addr2 += 2 * (count - 1);
 			    count != 0; count--, addr1 -= 2, addr2 -= 2)
 				*(volatile u_int16_t *)(addr2) =
 				    *(volatile u_int16_t *)(addr1);
 		}
 	}
 #endif
 }
 
 static __inline void
 bus_space_copy_region_4(bus_space_tag_t tag, bus_space_handle_t bsh1,
 			bus_size_t off1, bus_space_handle_t bsh2,
 			bus_size_t off2, size_t count)
 {
 	bus_space_handle_t addr1 = bsh1 + off1;
 	bus_space_handle_t addr2 = bsh2 + off2;
 
 #if defined(_I386_BUS_PIO_H_)
 #if defined(_I386_BUS_MEMIO_H_)
 	if (tag == I386_BUS_SPACE_IO)
 #endif
 	{
 		if (addr1 >= addr2) {
 			/* src after dest: copy forward */
 			for (; count != 0; count--, addr1 += 4, addr2 += 4)
 				outl(addr2, inl(addr1));
 		} else {
 			/* dest after src: copy backwards */
 			for (addr1 += 4 * (count - 1), addr2 += 4 * (count - 1);
 			    count != 0; count--, addr1 -= 4, addr2 -= 4)
 				outl(addr2, inl(addr1));
 		}
 	}
 #endif
 #if defined(_I386_BUS_MEMIO_H_)
 #if defined(_I386_BUS_PIO_H_)
 	else
 #endif
 	{
 		if (addr1 >= addr2) {
 			/* src after dest: copy forward */
 			for (; count != 0; count--, addr1 += 4, addr2 += 4)
 				*(volatile u_int32_t *)(addr2) =
 				    *(volatile u_int32_t *)(addr1);
 		} else {
 			/* dest after src: copy backwards */
 			for (addr1 += 4 * (count - 1), addr2 += 4 * (count - 1);
 			    count != 0; count--, addr1 -= 4, addr2 -= 4)
 				*(volatile u_int32_t *)(addr2) =
 				    *(volatile u_int32_t *)(addr1);
 		}
 	}
 #endif
 }
 
 #endif /* defined(_I386_BUS_PIO_H_) || defined(_I386_MEM_IO_H_) */
 
 #if 0	/* Cause a link error for bus_space_copy_8 */
 #define	bus_space_copy_region_8	!!! bus_space_copy_region_8 unimplemented !!!
 #endif
 
 /*
  * Bus read/write barrier methods.
  *
  *	void bus_space_barrier(bus_space_tag_t tag, bus_space_handle_t bsh,
  *			       bus_size_t offset, bus_size_t len, int flags);
  *
  *
  * Note that BUS_SPACE_BARRIER_WRITE doesn't do anything other than
  * prevent reordering by the compiler; all Intel x86 processors currently
  * retire operations outside the CPU in program order.
  */
 #define	BUS_SPACE_BARRIER_READ	0x01		/* force read barrier */
 #define	BUS_SPACE_BARRIER_WRITE	0x02		/* force write barrier */
 
 static __inline void
 bus_space_barrier(bus_space_tag_t tag __unused, bus_space_handle_t bsh __unused,
 		  bus_size_t offset __unused, bus_size_t len __unused, int flags)
 {
 #ifdef	__GNUC__
 	if (flags & BUS_SPACE_BARRIER_READ)
 		__asm __volatile("lock; addl $0,0(%%esp)" : : : "memory");
 	else
 		__asm __volatile("" : : : "memory");
 #endif
 }
 
 #endif /* _I386_BUS_AT386_H_ */
Index: head/sys/i386/include/param.h
===================================================================
--- head/sys/i386/include/param.h	(revision 112840)
+++ head/sys/i386/include/param.h	(revision 112841)
@@ -1,142 +1,147 @@
 /*-
  * Copyright (c) 1990 The Regents of the University of California.
  * All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * William Jolitz.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	from: @(#)param.h	5.8 (Berkeley) 6/28/91
  * $FreeBSD$
  */
 
 /*
  * Machine dependent constants for Intel 386.
  */
 
 /*
  * Round p (pointer or byte index) up to a correctly-aligned value
  * for all data types (int, long, ...).   The result is unsigned int
  * and must be cast to any desired pointer type.
  */
 #ifndef _ALIGNBYTES
 #define _ALIGNBYTES	(sizeof(int) - 1)
 #endif
 #ifndef _ALIGN
 #define _ALIGN(p)	(((unsigned)(p) + _ALIGNBYTES) & ~_ALIGNBYTES)
 #endif
 
 #ifndef _MACHINE
 #define	_MACHINE	i386
 #endif
 #ifndef _MACHINE_ARCH
 #define	_MACHINE_ARCH	i386
 #endif
 
 #ifndef _NO_NAMESPACE_POLLUTION
 
 #ifndef _MACHINE_PARAM_H_
 #define	_MACHINE_PARAM_H_
 
 #ifndef MACHINE
 #define MACHINE		"i386"
 #endif
 #ifndef MACHINE_ARCH
 #define	MACHINE_ARCH	"i386"
 #endif
 #define MID_MACHINE	MID_I386
 
 #ifdef SMP
 #define MAXCPU		16
 #else
 #define MAXCPU		1
 #endif /* SMP */
 
 #define ALIGNBYTES	_ALIGNBYTES
 #define ALIGN(p)	_ALIGN(p)
 
 #define PAGE_SHIFT	12		/* LOG2(PAGE_SIZE) */
 #define PAGE_SIZE	(1<<PAGE_SHIFT)	/* bytes/page */
 #define PAGE_MASK	(PAGE_SIZE-1)
 #define NPTEPG		(PAGE_SIZE/(sizeof (pt_entry_t)))
 
+#ifdef PAE
+#define NPGPTD		4
+#define PDRSHIFT	21		/* LOG2(NBPDR) */
+#else
 #define NPGPTD		1
 #define PDRSHIFT	22		/* LOG2(NBPDR) */
+#endif
 
 #define NBPTD		(NPGPTD<<PAGE_SHIFT)
 #define NPDEPTD		(NBPTD/(sizeof (pd_entry_t)))
 #define NPDEPG		(PAGE_SIZE/(sizeof (pd_entry_t)))
 #define NBPDR		(1<<PDRSHIFT)	/* bytes/page dir */
 #define PDRMASK		(NBPDR-1)
 
 #define IOPAGES	2		/* pages of i/o permission bitmap */
 
 #ifndef KSTACK_PAGES
 #define KSTACK_PAGES 2		/* Includes pcb! */
 #endif
 #define UAREA_PAGES 1		/* holds struct user WITHOUT PCB (see def.) */
 
 #define KSTACK_GUARD		/* compile in the kstack guard page */
 
 /*
  * Ceiling on amount of swblock kva space, can be changed via
  * the kern.maxswzone /boot/loader.conf variable.
  */
 #ifndef VM_SWZONE_SIZE_MAX
 #define VM_SWZONE_SIZE_MAX	(32 * 1024 * 1024)
 #endif
 
 /*
  * Ceiling on size of buffer cache (really only effects write queueing,
  * the VM page cache is not effected), can be changed via
  * the kern.maxbcache /boot/loader.conf variable.
  */
 #ifndef VM_BCACHE_SIZE_MAX
 #define VM_BCACHE_SIZE_MAX	(200 * 1024 * 1024)
 #endif
 
 /*
  * Mach derived conversion macros
  */
 #define trunc_page(x)		((x) & ~PAGE_MASK)
 #define round_page(x)		(((x) + PAGE_MASK) & ~PAGE_MASK)
 #define trunc_4mpage(x)		((x) & ~PDRMASK)
 #define round_4mpage(x)		((((x)) + PDRMASK) & ~PDRMASK)
 
 #define atop(x)			((x) >> PAGE_SHIFT)
 #define ptoa(x)			((x) << PAGE_SHIFT)
 
 #define i386_btop(x)		((x) >> PAGE_SHIFT)
 #define i386_ptob(x)		((x) << PAGE_SHIFT)
 
 #define	pgtok(x)		((x) * (PAGE_SIZE / 1024))
 
 #endif /* !_MACHINE_PARAM_H_ */
 #endif /* !_NO_NAMESPACE_POLLUTION */
Index: head/sys/i386/include/pmap.h
===================================================================
--- head/sys/i386/include/pmap.h	(revision 112840)
+++ head/sys/i386/include/pmap.h	(revision 112841)
@@ -1,265 +1,317 @@
 /*
  * Copyright (c) 1991 Regents of the University of California.
  * All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * the Systems Programming Group of the University of Utah Computer
  * Science Department and William Jolitz of UUNET Technologies Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * Derived from hp300 version by Mike Hibler, this version by William
  * Jolitz uses a recursive map [a pde points to the page directory] to
  * map the page tables using the pagetables themselves. This is done to
  * reduce the impact on kernel virtual memory for lots of sparse address
  * space, and to reduce the cost of memory to each process.
  *
  *	from: hp300: @(#)pmap.h	7.2 (Berkeley) 12/16/90
  *	from: @(#)pmap.h	7.4 (Berkeley) 5/12/91
  * $FreeBSD$
  */
 
 #ifndef _MACHINE_PMAP_H_
 #define	_MACHINE_PMAP_H_
 
 /*
  * Page-directory and page-table entires follow this format, with a few
  * of the fields not present here and there, depending on a lot of things.
  */
 				/* ---- Intel Nomenclature ---- */
 #define	PG_V		0x001	/* P	Valid			*/
 #define PG_RW		0x002	/* R/W	Read/Write		*/
 #define PG_U		0x004	/* U/S  User/Supervisor		*/
 #define	PG_NC_PWT	0x008	/* PWT	Write through		*/
 #define	PG_NC_PCD	0x010	/* PCD	Cache disable		*/
 #define PG_A		0x020	/* A	Accessed		*/
 #define	PG_M		0x040	/* D	Dirty			*/
 #define	PG_PS		0x080	/* PS	Page size (0=4k,1=4M)	*/
 #define	PG_G		0x100	/* G	Global			*/
 #define	PG_AVAIL1	0x200	/*    /	Available for system	*/
 #define	PG_AVAIL2	0x400	/*   <	programmers use		*/
 #define	PG_AVAIL3	0x800	/*    \				*/
 
 
 /* Our various interpretations of the above */
 #define PG_W		PG_AVAIL1	/* "Wired" pseudoflag */
 #define	PG_MANAGED	PG_AVAIL2
 #define	PG_FRAME	(~((vm_paddr_t)PAGE_MASK))
 #define	PG_PROT		(PG_RW|PG_U)	/* all protection bits . */
 #define PG_N		(PG_NC_PWT|PG_NC_PCD)	/* Non-cacheable */
 
 /*
  * Page Protection Exception bits
  */
 
 #define PGEX_P		0x01	/* Protection violation vs. not present */
 #define PGEX_W		0x02	/* during a Write cycle */
 #define PGEX_U		0x04	/* access from User mode (UPL) */
 
 /*
  * Size of Kernel address space.  This is the number of page table pages
  * (4MB each) to use for the kernel.  256 pages == 1 Gigabyte.
  * This **MUST** be a multiple of 4 (eg: 252, 256, 260, etc).
  */
 #ifndef KVA_PAGES
+#ifdef PAE
+#define KVA_PAGES	512
+#else
 #define KVA_PAGES	256
 #endif
+#endif
 
 /*
  * Pte related macros
  */
 #define VADDR(pdi, pti) ((vm_offset_t)(((pdi)<<PDRSHIFT)|((pti)<<PAGE_SHIFT)))
 
 #ifndef NKPT
+#ifdef PAE
+#define	NKPT		120	/* actual number of kernel page tables */
+#else
 #define	NKPT		30	/* actual number of kernel page tables */
 #endif
+#endif
 #ifndef NKPDE
 #ifdef SMP
 #define NKPDE	(KVA_PAGES - (NPGPTD + 1)) /* number of page tables/pde's */
 #else
 #define NKPDE	(KVA_PAGES - NPGPTD)	/* number of page tables/pde's */
 #endif
 #endif
 
 /*
  * The *PTDI values control the layout of virtual memory
  *
  * XXX This works for now, but I am not real happy with it, I'll fix it
  * right after I fix locore.s and the magic 28K hole
  *
  * SMP_PRIVPAGES: The per-cpu address space is 0xff80000 -> 0xffbfffff
  */
 #define	APTDPTDI	(NPDEPTD-NPGPTD) /* alt ptd entry that points to APTD */
 #ifdef SMP
 #define MPPTDI		(APTDPTDI-1)	/* per cpu ptd entry */
 #define	KPTDI		(MPPTDI-NKPDE)	/* start of kernel virtual pde's */
 #else
 #define	KPTDI		(APTDPTDI-NKPDE)/* start of kernel virtual pde's */
 #endif	/* SMP */
 #define	PTDPTDI		(KPTDI-NPGPTD)	/* ptd entry that points to ptd! */
 
 /*
  * XXX doesn't really belong here I guess...
  */
 #define ISA_HOLE_START    0xa0000
 #define ISA_HOLE_LENGTH (0x100000-ISA_HOLE_START)
 
 #ifndef LOCORE
 
 #include <sys/queue.h>
 
-typedef u_int32_t pd_entry_t;
-typedef u_int32_t pt_entry_t;
+#ifdef PAE
 
+typedef uint64_t pdpt_entry_t;
+typedef uint64_t pd_entry_t;
+typedef uint64_t pt_entry_t;
+
+#define	PTESHIFT	(3)
+#define	PDESHIFT	(3)
+
+#else
+
+typedef uint32_t pd_entry_t;
+typedef uint32_t pt_entry_t;
+
 #define	PTESHIFT	(2)
 #define	PDESHIFT	(2)
 
+#endif
+
 /*
  * Address of current and alternate address space page table maps
  * and directories.
  */
 #ifdef _KERNEL
 extern pt_entry_t PTmap[], APTmap[];
 extern pd_entry_t PTD[], APTD[];
 extern pd_entry_t PTDpde[], APTDpde[];
 
+#ifdef PAE
+extern pdpt_entry_t *IdlePDPT;
+#endif
 extern pd_entry_t *IdlePTD;	/* physical address of "Idle" state directory */
 #endif
 
 #ifdef _KERNEL
 /*
  * virtual address to page table entry and
  * to physical address. Likewise for alternate address space.
  * Note: these work recursively, thus vtopte of a pte will give
  * the corresponding pde that in turn maps it.
  */
 #define	vtopte(va)	(PTmap + i386_btop(va))
 #define	avtopte(va)	(APTmap + i386_btop(va))
 
 /*
  *	Routine:	pmap_kextract
  *	Function:
  *		Extract the physical page address associated
  *		kernel virtual address.
  */
 static __inline vm_paddr_t
 pmap_kextract(vm_offset_t va)
 {
 	vm_paddr_t pa;
 
 	if ((pa = (vm_offset_t) PTD[va >> PDRSHIFT]) & PG_PS) {
 		pa = (pa & ~(NBPDR - 1)) | (va & (NBPDR - 1));
 	} else {
 		pa = *vtopte(va);
 		pa = (pa & PG_FRAME) | (va & PAGE_MASK);
 	}
 	return pa;
 }
 
 #define	vtophys(va)	pmap_kextract(((vm_offset_t) (va)))
+
+#ifdef PAE
+
+static __inline pt_entry_t
+pte_load_clear(pt_entry_t *pte)
+{
+	pt_entry_t r;
+
+	r = *pte;
+	__asm __volatile(
+	    "1:\n"
+	    "\tcmpxchg8b %1\n"
+	    "\tjnz 1b"
+	    : "+A" (r)
+	    : "m" (*pte), "b" (0), "c" (0));
+	return (r);
+}
+
+#else
+
+#define	pte_load_clear(pte)	atomic_readandclear_int(pte)
+
 #endif
 
+#endif
+
 /*
  * Pmap stuff
  */
 struct	pv_entry;
 
 struct md_page {
 	int pv_list_count;
 	TAILQ_HEAD(,pv_entry)	pv_list;
 };
 
 struct pmap {
 	pd_entry_t		*pm_pdir;	/* KVA of page directory */
 	vm_object_t		pm_pteobj;	/* Container for pte's */
 	TAILQ_HEAD(,pv_entry)	pm_pvlist;	/* list of mappings in pmap */
 	int			pm_active;	/* active on cpus */
 	struct pmap_statistics	pm_stats;	/* pmap statistics */
 	LIST_ENTRY(pmap) 	pm_list;	/* List of all pmaps */
+#ifdef PAE
+	pdpt_entry_t		*pm_pdpt;	/* KVA of page director pointer
+						   table */
+#endif
 };
 
 #define	pmap_page_is_mapped(m)	(!TAILQ_EMPTY(&(m)->md.pv_list))
 #define pmap_resident_count(pmap) (pmap)->pm_stats.resident_count
 
 typedef struct pmap	*pmap_t;
 
 #ifdef _KERNEL
 extern struct pmap	kernel_pmap_store;
 #define kernel_pmap	(&kernel_pmap_store)
 #endif
 
 /*
  * For each vm_page_t, there is a list of all currently valid virtual
  * mappings of that page.  An entry is a pv_entry_t, the list is pv_table.
  */
 typedef struct pv_entry {
 	pmap_t		pv_pmap;	/* pmap where mapping lies */
 	vm_offset_t	pv_va;		/* virtual address for mapping */
 	TAILQ_ENTRY(pv_entry)	pv_list;
 	TAILQ_ENTRY(pv_entry)	pv_plist;
 	vm_page_t	pv_ptem;	/* VM page for pte */
 } *pv_entry_t;
 
 #ifdef	_KERNEL
 
 #define NPPROVMTRR		8
 #define PPRO_VMTRRphysBase0	0x200
 #define PPRO_VMTRRphysMask0	0x201
 struct ppro_vmtrr {
 	u_int64_t base, mask;
 };
 extern struct ppro_vmtrr PPro_vmtrr[NPPROVMTRR];
 
 extern caddr_t	CADDR1;
 extern pt_entry_t *CMAP1;
 extern vm_paddr_t avail_end;
 extern vm_paddr_t avail_start;
 extern vm_offset_t clean_eva;
 extern vm_offset_t clean_sva;
 extern vm_paddr_t phys_avail[];
 extern char *ptvmmap;		/* poor name! */
 extern vm_offset_t virtual_avail;
 extern vm_offset_t virtual_end;
 
 void	pmap_bootstrap(vm_paddr_t, vm_paddr_t);
 void	pmap_kenter(vm_offset_t va, vm_paddr_t pa);
 void	pmap_kremove(vm_offset_t);
 void	*pmap_mapdev(vm_paddr_t, vm_size_t);
 void	pmap_unmapdev(vm_offset_t, vm_size_t);
 pt_entry_t *pmap_pte_quick(pmap_t, vm_offset_t) __pure2;
 void	pmap_set_opt(void);
 void	pmap_invalidate_page(pmap_t, vm_offset_t);
 void	pmap_invalidate_range(pmap_t, vm_offset_t, vm_offset_t);
 void	pmap_invalidate_all(pmap_t);
 
 #endif /* _KERNEL */
 
 #endif /* !LOCORE */
 
 #endif /* !_MACHINE_PMAP_H_ */