Index: head/share/man/man4/xen.4
===================================================================
--- head/share/man/man4/xen.4	(revision 282273)
+++ head/share/man/man4/xen.4	(revision 282274)
@@ -1,182 +1,149 @@
 .\" Copyright (c) 2010 Robert N. M. Watson
 .\" All rights reserved.
 .\"
 .\" This software was developed by SRI International and the University of
 .\" Cambridge Computer Laboratory under DARPA/AFRL contract FA8750-10-C-0237
 .\" ("CTSRD"), as part of the DARPA CRASH research program.
 .\"
 .\" Redistribution and use in source and binary forms, with or without
 .\" modification, are permitted provided that the following conditions
 .\" are met:
 .\" 1. Redistributions of source code must retain the above copyright
 .\"    notice, this list of conditions and the following disclaimer.
 .\" 2. Redistributions in binary form must reproduce the above copyright
 .\"    notice, this list of conditions and the following disclaimer in the
 .\"    documentation and/or other materials provided with the distribution.
 .\"
 .\" THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
 .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 .\" ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
 .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 .\" SUCH DAMAGE.
 .\"
 .\" $FreeBSD$
 .\"
 .Dd April 30, 2015
 .Dt XEN 4
 .Os
 .Sh NAME
 .Nm xen
 .Nd Xen Hypervisor Guest (DomU) Support
 .Sh SYNOPSIS
-To compile para-virtualized (PV) Xen guest support into an i386 kernel, place
-the following lines in your kernel configuration file:
-.Bd -ragged -offset indent
-.Cd "options PAE"
-.Cd "options XEN"
-.Cd "nooptions NATIVE"
-.Ed
-.Pp
 To compile hardware-assisted virtualization (HVM) Xen guest support with
 para-virtualized drivers into an amd64 or i386 kernel,
 place the following lines in your kernel configuration file:
 .Bd -ragged -offset indent
 .Cd "options XENHVM"
 .Cd "device xenpci"
 .Ed
 .Sh DESCRIPTION
 The Xen Hypervisor allows multiple virtual machines to be run on a single
 computer system.
 When first released, Xen required that i386 kernels be compiled
 "para-virtualized" as the x86 instruction set was not fully virtualizable.
 Primarily, para-virtualization modifies the virtual memory system to use
 hypervisor calls (hypercalls) rather than direct hardware instructions to
 modify the TLB, although para-virtualized device drivers were also required
 to access resources such as virtual network interfaces and disk devices.
 .Pp
 With later instruction set extensions from AMD and Intel to support fully
 virtualizable instructions, unmodified virtual memory systems can also be
 supported; this is referred to as hardware-assisted virtualization (HVM).
 HVM configurations may either rely on transparently emulated hardware
 peripherals, or para-virtualized drivers, which are aware of virtualization,
 and hence able to optimize certain behaviors to improve performance or
 semantics.
 .Pp
 .Fx
-supports a fully para-virtualized (PV) kernel on the i386 architecture using
-.Cd "options XEN"
-and
-.Cd "nooptions NATIVE" ;
-currently, this requires use of a PAE kernel, enabled via
-.Cd "options PAE" .
+supports hardware-assisted virtualization (HVM) on both i386 and amd64
+kernels.
 .Pp
-.Fx
-supports hardware-assisted virtualization (HVM) on both the i386 and amd64
-kernels; however, PV device drivers with an HVM kernel are only supported on
-the amd64 architecture, and require
-.Cd "options XENHVM"
-and
-.Cd "device xenpci" .
-.Pp
 Para-virtualized device drivers are required in order to support certain
 functionality, such as processing management requests, returning idle
 physical memory pages to the hypervisor, etc.
 .Ss Xen DomU device drivers
-Xen para-virtualized drivers are automatically added to the kernel if a PV
-kernel is compiled using
-.Cd "options XEN" ;
-for HVM environments,
-.Cd "options XENHVM"
-and
-.Cd "device xenpci"
-are required.
-The follow drivers are supported:
+These para-virtualized drivers are supported:
 .Bl -hang -offset indent -width blkfront
 .It Nm balloon
 Allow physical memory pages to be returned to the hypervisor as a result of
 manual tuning or automatic policy.
 .It Nm blkback
 Exports local block devices or files to other Xen domains where they can
 then be imported via
 .Nm blkfront .
 .It Nm blkfront
 Import block devices from other Xen domains as local block devices, to be
 used for file systems, swap, etc.
 .It Nm console
 Export the low-level system console via the Xen console service.
 .It Nm control
 Process management operations from Domain 0, including power off, reboot,
 suspend, crash, and halt requests.
 .It Nm evtchn
 Expose Xen events via the
 .Pa /dev/xen/evtchn
 special device.
 .It Nm netback
 Export local network interfaces to other Xen domains where they can be
 imported via
 .Nm netfront .
 .It Nm netfront
 Import network interfaces from other Xen domains as local network interfaces,
 which may be used for IPv4, IPv6, etc.
 .It Nm pcifront
 Allow physical PCI devices to be passed through into a PV domain.
 .It Nm xenpci
 Represents the Xen PCI device, an emulated PCI device that is exposed to
 HVM domains.
 This device allows detection of the Xen hypervisor, and provides interrupt
 and shared memory services required to interact with the hypervisor.
 .El
 .Ss Performance considerations
 In general, PV drivers will perform better than emulated hardware, and are
 the recommended configuration for HVM installations.
 .Pp
 Using a hypervisor introduces a second layer of scheduling that may limit the
 effectiveness of certain
 .Fx
 scheduling optimisations.
 Among these is adaptive locking, which is no longer able to determine whether
 a thread holding a lock is in execution.
 It is recommended that adaptive locking be disabled when using Xen:
 .Bd -unfilled -offset indent
 .Cd "options NO_ADAPTIVE_MUTEXES"
 .Cd "options NO_ADAPTIVE_RWLOCKS"
 .Cd "options NO_ADAPTIVE_SX"
 .Ed
-.Sh SEE ALSO
-.Xr pae 4
 .Sh HISTORY
 Support for
 .Nm
 first appeared in
 .Fx 8.1 .
 .Sh AUTHORS
 .An -nosplit
 .Fx
 support for Xen was first added by
 .An Kip Macy Aq Mt kmacy@FreeBSD.org
 and
 .An Doug Rabson Aq Mt dfr@FreeBSD.org .
 Further refinements were made by
 .An Justin Gibbs Aq Mt gibbs@FreeBSD.org ,
 .An Adrian Chadd Aq Mt adrian@FreeBSD.org ,
 and
 .An Colin Percival Aq Mt cperciva@FreeBSD.org .
 This manual page was written by
 .An Robert Watson Aq Mt rwatson@FreeBSD.org .
 .Sh BUGS
 .Fx
 is only able to run as a Xen guest (DomU) and not as a Xen host (Dom0).
-.Pp
-A fully para-virtualized (PV) kernel is only supported on i386, and not
-amd64.
 .Pp
 As of this release, Xen PV DomU support is not heavily tested; instability
 has been reported during VM migration of PV kernels.
 .Pp
 Certain PV driver features, such as the balloon driver, are under-exercised.
Index: head/sys/amd64/include/xen/xenpmap.h
===================================================================
--- head/sys/amd64/include/xen/xenpmap.h	(revision 282273)
+++ head/sys/amd64/include/xen/xenpmap.h	(nonexistent)
@@ -1,227 +0,0 @@
-/*
- *
- * Copyright (c) 2004 Christian Limpach.
- * Copyright (c) 2004,2005 Kip Macy
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- *    must display the following acknowledgement:
- *      This product includes software developed by Christian Limpach.
- * 4. The name of the author may not be used to endorse or promote products
- *    derived from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-
-#ifndef _XEN_XENPMAP_H_
-#define _XEN_XENPMAP_H_
-
-#include <machine/xen/features.h>
-
-void _xen_queue_pt_update(vm_paddr_t, vm_paddr_t, char *, int);
-void xen_pt_switch(vm_paddr_t);
-void xen_set_ldt(vm_paddr_t, unsigned long);
-void xen_pgdpt_pin(vm_paddr_t);
-void xen_pgd_pin(vm_paddr_t);
-void xen_pgd_unpin(vm_paddr_t);
-void xen_pt_pin(vm_paddr_t);
-void xen_pt_unpin(vm_paddr_t);
-void xen_flush_queue(void);
-void xen_check_queue(void);
-#if 0
-void pmap_ref(pt_entry_t *pte, vm_paddr_t ma);
-#endif
-
-#ifdef INVARIANTS
-#define xen_queue_pt_update(a, b) _xen_queue_pt_update((a), (b), __FILE__, __LINE__)
-#else
-#define xen_queue_pt_update(a, b) _xen_queue_pt_update((a), (b), NULL, 0)
-#endif	
-
-#ifdef PMAP_DEBUG
-#define PMAP_REF pmap_ref
-#define PMAP_DEC_REF_PAGE pmap_dec_ref_page
-#define PMAP_MARK_PRIV pmap_mark_privileged
-#define PMAP_MARK_UNPRIV pmap_mark_unprivileged
-#else 
-#define PMAP_MARK_PRIV(a)
-#define PMAP_MARK_UNPRIV(a)
-#define PMAP_REF(a, b)
-#define PMAP_DEC_REF_PAGE(a)
-#endif
-
-#define ALWAYS_SYNC 0
-
-#ifdef PT_DEBUG
-#define PT_LOG() printk("WP PT_SET %s:%d\n", __FILE__, __LINE__) 
-#else
-#define PT_LOG()
-#endif
-
-#define INVALID_P2M_ENTRY	(~0UL)
-
-#define pmap_valid_entry(E)           ((E) & PG_V) /* is PDE or PTE valid? */
-
-#define SH_PD_SET_VA        1
-#define SH_PD_SET_VA_MA     2
-#define SH_PD_SET_VA_CLEAR  3
-
-struct pmap;
-void pd_set(struct pmap *pmap, int ptepindex, vm_paddr_t val, int type);
-#ifdef notyet
-static vm_paddr_t
-vptetomachpte(vm_paddr_t *pte)
-{
-	vm_offset_t offset, ppte;
-	vm_paddr_t pgoffset, retval, *pdir_shadow_ptr;
-	int pgindex;
-
-	ppte = (vm_offset_t)pte;
-	pgoffset = (ppte & PAGE_MASK);
-	offset = ppte - (vm_offset_t)PTmap;
-	pgindex = ppte >> PDRSHIFT;
-
-	pdir_shadow_ptr = (vm_paddr_t *)PCPU_GET(pdir_shadow);
-	retval = (pdir_shadow_ptr[pgindex] & ~PAGE_MASK) + pgoffset;
-	return (retval);
-}
-#endif
-#define	PT_GET(_ptp)						\
-	(pmap_valid_entry(*(_ptp)) ? xpmap_mtop(*(_ptp)) : (0))
-
-#ifdef WRITABLE_PAGETABLES
-
-#define PT_SET_VA(_ptp,_npte,sync) do {				\
-        PMAP_REF((_ptp), xpmap_ptom(_npte));                    \
-        PT_LOG();                                               \
-        *(_ptp) = xpmap_ptom((_npte));                          \
-} while (/*CONSTCOND*/0)
-#define PT_SET_VA_MA(_ptp,_npte,sync) do {		        \
-        PMAP_REF((_ptp), (_npte));                              \
-        PT_LOG();                                               \
-        *(_ptp) = (_npte);                                      \
-} while (/*CONSTCOND*/0)
-#define PT_CLEAR_VA(_ptp, sync) do {				\
-        PMAP_REF((pt_entry_t *)(_ptp), 0);                      \
-        PT_LOG();                                               \
-        *(_ptp) = 0;                                            \
-} while (/*CONSTCOND*/0)
-
-#define PD_SET_VA(_pmap, _ptp, _npte, sync) do {		\
-        PMAP_REF((_ptp), xpmap_ptom(_npte));                    \
-        pd_set((_pmap),(_ptp),(_npte), SH_PD_SET_VA);           \
-	if (sync || ALWAYS_SYNC) xen_flush_queue();     	\
-} while (/*CONSTCOND*/0)
-#define PD_SET_VA_MA(_pmap, _ptp, _npte, sync) do {		\
-        PMAP_REF((_ptp), (_npte));                              \
-        pd_set((_pmap),(_ptp),(_npte), SH_PD_SET_VA_MA);        \
-	if (sync || ALWAYS_SYNC) xen_flush_queue();		\
-} while (/*CONSTCOND*/0)
-#define PD_CLEAR_VA(_pmap, _ptp, sync) do {			\
-        PMAP_REF((pt_entry_t *)(_ptp), 0);                      \
-        pd_set((_pmap),(_ptp), 0, SH_PD_SET_VA_CLEAR);  	\
-	if (sync || ALWAYS_SYNC) xen_flush_queue();		\
-} while (/*CONSTCOND*/0)
-
-#else /* !WRITABLE_PAGETABLES */
-
-#define PT_SET_VA(_ptp,_npte,sync) do {				\
-        PMAP_REF((_ptp), xpmap_ptom(_npte));                    \
-	xen_queue_pt_update(vtomach(_ptp), 	        \
-			    xpmap_ptom(_npte)); 		\
-	if (sync || ALWAYS_SYNC) xen_flush_queue();		\
-} while (/*CONSTCOND*/0)
-#define PT_SET_VA_MA(_ptp,_npte,sync) do {		        \
-        PMAP_REF((_ptp), (_npte));                              \
-	xen_queue_pt_update(vtomach(_ptp), _npte);        \
-	if (sync || ALWAYS_SYNC) xen_flush_queue();		\
-} while (/*CONSTCOND*/0)
-#define PT_CLEAR_VA(_ptp, sync) do {				\
-        PMAP_REF((pt_entry_t *)(_ptp), 0);                      \
-	xen_queue_pt_update(vtomach(_ptp), 0);            \
-	if (sync || ALWAYS_SYNC)				\
-		xen_flush_queue();				\
-} while (/*CONSTCOND*/0)
-
-#define PD_SET_VA(_pmap, _ptepindex,_npte,sync) do {		\
-        PMAP_REF((_ptp), xpmap_ptom(_npte));                    \
-        pd_set((_pmap),(_ptepindex),(_npte), SH_PD_SET_VA);     \
-	if (sync || ALWAYS_SYNC) xen_flush_queue();     	\
-} while (/*CONSTCOND*/0)
-#define PD_SET_VA_MA(_pmap, _ptepindex,_npte,sync) do {		\
-        PMAP_REF((_ptp), (_npte));                              \
-        pd_set((_pmap),(_ptepindex),(_npte), SH_PD_SET_VA_MA);  \
-	if (sync || ALWAYS_SYNC) xen_flush_queue();		\
-} while (/*CONSTCOND*/0)
-#define PD_CLEAR_VA(_pmap, _ptepindex, sync) do {		\
-        PMAP_REF((pt_entry_t *)(_ptp), 0);                      \
-        pd_set((_pmap),(_ptepindex), 0, SH_PD_SET_VA_CLEAR);    \
-	if (sync || ALWAYS_SYNC) xen_flush_queue();		\
-} while (/*CONSTCOND*/0)
-
-#endif
-
-#define PT_SET_MA(_va, _ma) 					\
-do { 								\
-   PANIC_IF(HYPERVISOR_update_va_mapping(((unsigned long)(_va)),\
-	   (_ma),						\
-	   UVMF_INVLPG| UVMF_ALL) < 0);			\
-} while (/*CONSTCOND*/0)	  
-
-#define	PT_UPDATES_FLUSH() do {				        \
-        xen_flush_queue();                                      \
-} while (/*CONSTCOND*/0)
-
-static __inline vm_paddr_t
-xpmap_mtop(vm_paddr_t mpa)
-{
-	vm_paddr_t tmp = (mpa & PG_FRAME);
-	
-	return machtophys(tmp) | (mpa & ~PG_FRAME);
-}
-
-static __inline vm_paddr_t
-xpmap_ptom(vm_paddr_t ppa)
-{
-	vm_paddr_t tmp = (ppa & PG_FRAME);
-
-	return phystomach(tmp) | (ppa & ~PG_FRAME);
-}
-
-static __inline void
-set_phys_to_machine(unsigned long pfn, unsigned long mfn)
-{
-#ifdef notyet	
-        PANIC_IF(max_mapnr && pfn >= max_mapnr);
-#endif	
-        if (xen_feature(XENFEAT_auto_translated_physmap)) {
-#ifdef notyet		
-                PANIC_IF((pfn != mfn && mfn != INVALID_P2M_ENTRY));
-#endif		
-                return;
-        }
-        xen_phys_machine[pfn] = mfn;
-}
-
-
-
-
-#endif /* _XEN_XENPMAP_H_ */

Property changes on: head/sys/amd64/include/xen/xenpmap.h
___________________________________________________________________
Deleted: svn:eol-style
## -1 +0,0 ##
-native
\ No newline at end of property
Deleted: svn:keywords
## -1 +0,0 ##
-FreeBSD=%H
\ No newline at end of property
Deleted: svn:mime-type
## -1 +0,0 ##
-text/plain
\ No newline at end of property
Index: head/sys/amd64/include/xen/xenfunc.h
===================================================================
--- head/sys/amd64/include/xen/xenfunc.h	(revision 282273)
+++ head/sys/amd64/include/xen/xenfunc.h	(revision 282274)
@@ -1,82 +1,73 @@
 /*-
  * Copyright (c) 2004, 2005 Kip Macy
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef _XEN_XENFUNC_H_
 #define _XEN_XENFUNC_H_
 
-#ifdef XENHVM
 #include <machine/xen/xenvar.h>
-#else
-#include <machine/xen/xenpmap.h>
-#include <machine/segments.h>
-#endif
 
 #define BKPT __asm__("int3");
 #define XPQ_CALL_DEPTH 5
 #define XPQ_CALL_COUNT 2
 #define PG_PRIV PG_AVAIL3
 typedef struct { 
 	unsigned long pt_ref;
 	unsigned long pt_eip[XPQ_CALL_COUNT][XPQ_CALL_DEPTH];
 } pteinfo_t;
 
 extern pteinfo_t *pteinfo_list;
 #ifdef XENDEBUG_LOW
 #define	__PRINTK(x) printk x
 #else
 #define	__PRINTK(x)
 #endif
 
 char *xen_setbootenv(char *cmd_line);
 
 int  xen_boothowto(char *envp);
 
 void _xen_machphys_update(vm_paddr_t, vm_paddr_t, char *file, int line);
 
 #ifdef INVARIANTS
 #define xen_machphys_update(a, b) _xen_machphys_update((a), (b), __FILE__, __LINE__)
 #else
 #define xen_machphys_update(a, b) _xen_machphys_update((a), (b), NULL, 0)
 #endif	
-
-#ifndef XENHVM
-void xen_update_descriptor(union descriptor *, union descriptor *);
-#endif
 
 extern struct mtx balloon_lock;
 #if 0
 #define balloon_lock(__flags)   mtx_lock_irqsave(&balloon_lock, __flags)
 #define balloon_unlock(__flags) mtx_unlock_irqrestore(&balloon_lock, __flags)
 #else
 #define balloon_lock(__flags)   __flags = 1
 #define balloon_unlock(__flags) __flags = 0
 #endif
 
 
 
 #endif /* _XEN_XENFUNC_H_ */
Index: head/sys/amd64/include/xen/xenvar.h
===================================================================
--- head/sys/amd64/include/xen/xenvar.h	(revision 282273)
+++ head/sys/amd64/include/xen/xenvar.h	(revision 282274)
@@ -1,120 +1,59 @@
 /*-
  * Copyright (c) 2008 Kip Macy
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 #ifndef XENVAR_H_
 #define XENVAR_H_
 #define XBOOTUP 0x1
 #define XPMAP   0x2
 extern int xendebug_flags;
 #ifndef NOXENDEBUG
 #define XENPRINTF printk
 #else
 #define XENPRINTF printf
 #endif
 #include <xen/features.h>
 
 #if 0
 #define TRACE_ENTER XENPRINTF("(file=%s, line=%d) entered %s\n", __FILE__, __LINE__, __FUNCTION__)
 #define TRACE_EXIT XENPRINTF("(file=%s, line=%d) exiting %s\n", __FILE__, __LINE__, __FUNCTION__)
 #define TRACE_DEBUG(argflags, _f, _a...) \
 if (xendebug_flags & argflags) XENPRINTF("(file=%s, line=%d) " _f "\n", __FILE__, __LINE__, ## _a);
 #else
 #define TRACE_ENTER
 #define TRACE_EXIT
 #define TRACE_DEBUG(argflags, _f, _a...)
 #endif
 
-#ifdef XENHVM
-
-static inline vm_paddr_t
-phystomach(vm_paddr_t pa)
-{
-
-	return (pa);
-}
-
-static inline vm_paddr_t
-machtophys(vm_paddr_t ma)
-{
-
-	return (ma);
-}
-
 #define vtomach(va)	pmap_kextract((vm_offset_t) (va))
-#define PFNTOMFN(pa)	(pa)
-#define MFNTOPFN(ma)	(ma)
-
-#define set_phys_to_machine(pfn, mfn)	((void)0)
-#define phys_to_machine_mapping_valid(pfn)	(TRUE)
-#define PT_UPDATES_FLUSH()		((void)0)
-
-#else
-
-extern	xen_pfn_t *xen_phys_machine;
-
-
-extern xen_pfn_t *xen_machine_phys;
-/* Xen starts physical pages after the 4MB ISA hole -
- * FreeBSD doesn't
- */
-
-
-#undef ADD_ISA_HOLE /* XXX */
-
-#ifdef ADD_ISA_HOLE
-#define ISA_INDEX_OFFSET 1024 
-#define ISA_PDR_OFFSET 1
-#else
-#define ISA_INDEX_OFFSET 0
-#define ISA_PDR_OFFSET 0
-#endif
-
-
-#define PFNTOMFN(i) (xen_phys_machine[(i)])
-#define MFNTOPFN(i) ((vm_paddr_t)xen_machine_phys[(i)])
-
-#define VTOP(x) ((((uintptr_t)(x))) - KERNBASE)
-#define PTOV(x) (((uintptr_t)(x)) + KERNBASE)
-
-#define VTOPFN(x) (VTOP(x) >> PAGE_SHIFT)
-#define PFNTOV(x) PTOV((vm_paddr_t)(x)  << PAGE_SHIFT)
-
-#define VTOMFN(va) (vtomach(va) >> PAGE_SHIFT)
-#define PFN_UP(x)    (((x) + PAGE_SIZE-1) >> PAGE_SHIFT)
-
-#define phystomach(pa) (((vm_paddr_t)(PFNTOMFN((pa) >> PAGE_SHIFT))) << PAGE_SHIFT)
-#define machtophys(ma) (((vm_paddr_t)(MFNTOPFN((ma) >> PAGE_SHIFT))) << PAGE_SHIFT)
-
-#endif
 
 void xpq_init(void);
 
 int  xen_create_contiguous_region(vm_page_t pages, int npages);
 
 void  xen_destroy_contiguous_region(void * addr, int npages);
 
 #endif
Index: head/sys/conf/files
===================================================================
--- head/sys/conf/files	(revision 282273)
+++ head/sys/conf/files	(revision 282274)
@@ -1,4058 +1,4058 @@
 # $FreeBSD$
 #
 # The long compile-with and dependency lines are required because of
 # limitations in config: backslash-newline doesn't work in strings, and
 # dependency lines other than the first are silently ignored.
 #
 acpi_quirks.h			optional acpi				   \
 	dependency	"$S/tools/acpi_quirks2h.awk $S/dev/acpica/acpi_quirks" \
 	compile-with	"${AWK} -f $S/tools/acpi_quirks2h.awk $S/dev/acpica/acpi_quirks" \
 	no-obj no-implicit-rule before-depend				   \
 	clean		"acpi_quirks.h"
 #
 # The 'fdt_dtb_file' target covers an actual DTB file name, which is derived
 # from the specified source (DTS) file: <platform>.dts -> <platform>.dtb
 #
 fdt_dtb_file			optional fdt fdt_dtb_static \
 	compile-with "sh -c 'MACHINE=${MACHINE} $S/tools/fdt/make_dtb.sh $S ${FDT_DTS_FILE} ${.CURDIR}'" \
 	no-obj no-implicit-rule before-depend	\
 	clean		"${FDT_DTS_FILE:R}.dtb"
 fdt_static_dtb.h		optional fdt fdt_dtb_static \
 	compile-with "sh -c 'MACHINE=${MACHINE} $S/tools/fdt/make_dtbh.sh ${FDT_DTS_FILE} ${.CURDIR}'" \
 	dependency	"fdt_dtb_file" \
 	no-obj no-implicit-rule before-depend \
 	clean		"fdt_static_dtb.h"
 feeder_eq_gen.h			optional sound				   \
 	dependency	"$S/tools/sound/feeder_eq_mkfilter.awk"		   \
 	compile-with	"${AWK} -f $S/tools/sound/feeder_eq_mkfilter.awk -- ${FEEDER_EQ_PRESETS} > feeder_eq_gen.h" \
 	no-obj no-implicit-rule before-depend				   \
 	clean		"feeder_eq_gen.h"
 feeder_rate_gen.h		optional sound				   \
 	dependency	"$S/tools/sound/feeder_rate_mkfilter.awk"	   \
 	compile-with	"${AWK} -f $S/tools/sound/feeder_rate_mkfilter.awk -- ${FEEDER_RATE_PRESETS} > feeder_rate_gen.h" \
 	no-obj no-implicit-rule before-depend				   \
 	clean		"feeder_rate_gen.h"
 snd_fxdiv_gen.h			optional sound				   \
 	dependency	"$S/tools/sound/snd_fxdiv_gen.awk"		   \
 	compile-with	"${AWK} -f $S/tools/sound/snd_fxdiv_gen.awk -- > snd_fxdiv_gen.h" \
 	no-obj no-implicit-rule before-depend				   \
 	clean		"snd_fxdiv_gen.h"
 miidevs.h			optional miibus | mii			   \
 	dependency	"$S/tools/miidevs2h.awk $S/dev/mii/miidevs"	   \
 	compile-with	"${AWK} -f $S/tools/miidevs2h.awk $S/dev/mii/miidevs" \
 	no-obj no-implicit-rule before-depend				   \
 	clean		"miidevs.h"
 pccarddevs.h			standard				   \
 	dependency	"$S/tools/pccarddevs2h.awk $S/dev/pccard/pccarddevs" \
 	compile-with	"${AWK} -f $S/tools/pccarddevs2h.awk $S/dev/pccard/pccarddevs" \
 	no-obj no-implicit-rule before-depend				   \
 	clean		"pccarddevs.h"
 teken_state.h		optional sc | vt				   \
 	dependency	"$S/teken/gensequences $S/teken/sequences" \
 	compile-with	"${AWK} -f $S/teken/gensequences $S/teken/sequences > teken_state.h" \
 	no-obj no-implicit-rule before-depend				   \
 	clean		"teken_state.h"
 usbdevs.h			optional usb				   \
 	dependency	"$S/tools/usbdevs2h.awk $S/dev/usb/usbdevs" \
 	compile-with	"${AWK} -f $S/tools/usbdevs2h.awk $S/dev/usb/usbdevs -h" \
 	no-obj no-implicit-rule before-depend				   \
 	clean		"usbdevs.h"
 usbdevs_data.h			optional usb				   \
 	dependency	"$S/tools/usbdevs2h.awk $S/dev/usb/usbdevs" \
 	compile-with	"${AWK} -f $S/tools/usbdevs2h.awk $S/dev/usb/usbdevs -d" \
 	no-obj no-implicit-rule before-depend				   \
 	clean		"usbdevs_data.h"
 cam/cam.c			optional scbus
 cam/cam_compat.c		optional scbus
 cam/cam_periph.c		optional scbus
 cam/cam_queue.c			optional scbus
 cam/cam_sim.c			optional scbus
 cam/cam_xpt.c			optional scbus
 cam/ata/ata_all.c		optional scbus
 cam/ata/ata_xpt.c		optional scbus
 cam/ata/ata_pmp.c		optional scbus
 cam/scsi/scsi_xpt.c		optional scbus
 cam/scsi/scsi_all.c		optional scbus
 cam/scsi/scsi_cd.c		optional cd
 cam/scsi/scsi_ch.c		optional ch
 cam/ata/ata_da.c		optional ada | da
 cam/ctl/ctl.c			optional ctl
 cam/ctl/ctl_backend.c		optional ctl
 cam/ctl/ctl_backend_block.c	optional ctl
 cam/ctl/ctl_backend_ramdisk.c	optional ctl
 cam/ctl/ctl_cmd_table.c		optional ctl
 cam/ctl/ctl_frontend.c		optional ctl
 cam/ctl/ctl_frontend_cam_sim.c	optional ctl
 cam/ctl/ctl_frontend_internal.c	optional ctl
 cam/ctl/ctl_frontend_iscsi.c	optional ctl
 cam/ctl/ctl_scsi_all.c		optional ctl
 cam/ctl/ctl_tpc.c		optional ctl
 cam/ctl/ctl_tpc_local.c		optional ctl
 cam/ctl/ctl_error.c		optional ctl
 cam/ctl/ctl_util.c		optional ctl
 cam/ctl/scsi_ctl.c		optional ctl
 cam/scsi/scsi_da.c		optional da
 cam/scsi/scsi_low.c		optional ct | ncv | nsp | stg
 cam/scsi/scsi_pass.c		optional pass
 cam/scsi/scsi_pt.c		optional pt
 cam/scsi/scsi_sa.c		optional sa
 cam/scsi/scsi_enc.c		optional ses
 cam/scsi/scsi_enc_ses.c		optional ses
 cam/scsi/scsi_enc_safte.c	optional ses
 cam/scsi/scsi_sg.c		optional sg
 cam/scsi/scsi_targ_bh.c		optional targbh
 cam/scsi/scsi_target.c		optional targ
 cam/scsi/smp_all.c		optional scbus
 # shared between zfs and dtrace
 cddl/compat/opensolaris/kern/opensolaris.c				optional zfs compile-with "${ZFS_C}"
 cddl/compat/opensolaris/kern/opensolaris_cmn_err.c			optional zfs compile-with "${ZFS_C}"
 cddl/compat/opensolaris/kern/opensolaris_kmem.c				optional zfs compile-with "${ZFS_C}"
 cddl/compat/opensolaris/kern/opensolaris_misc.c				optional zfs compile-with "${ZFS_C}"
 cddl/compat/opensolaris/kern/opensolaris_sunddi.c			optional zfs compile-with "${ZFS_C}"
 # zfs specific
 cddl/compat/opensolaris/kern/opensolaris_acl.c				optional zfs compile-with "${ZFS_C}"
 cddl/compat/opensolaris/kern/opensolaris_dtrace.c			optional zfs compile-with "${ZFS_C}"
 cddl/compat/opensolaris/kern/opensolaris_kobj.c				optional zfs compile-with "${ZFS_C}"
 cddl/compat/opensolaris/kern/opensolaris_kstat.c			optional zfs compile-with "${ZFS_C}"
 cddl/compat/opensolaris/kern/opensolaris_lookup.c			optional zfs compile-with "${ZFS_C}"
 cddl/compat/opensolaris/kern/opensolaris_policy.c			optional zfs compile-with "${ZFS_C}"
 cddl/compat/opensolaris/kern/opensolaris_string.c			optional zfs compile-with "${ZFS_C}"
 cddl/compat/opensolaris/kern/opensolaris_sysevent.c			optional zfs compile-with "${ZFS_C}"
 cddl/compat/opensolaris/kern/opensolaris_taskq.c			optional zfs compile-with "${ZFS_C}"
 cddl/compat/opensolaris/kern/opensolaris_uio.c				optional zfs compile-with "${ZFS_C}"
 cddl/compat/opensolaris/kern/opensolaris_vfs.c				optional zfs compile-with "${ZFS_C}"
 cddl/compat/opensolaris/kern/opensolaris_vm.c				optional zfs compile-with "${ZFS_C}"
 cddl/compat/opensolaris/kern/opensolaris_zone.c				optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/common/acl/acl_common.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/common/avl/avl.c				optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/common/nvpair/fnvpair.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/common/nvpair/nvpair.c				optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/common/nvpair/nvpair_alloc_fixed.c		optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/common/unicode/u8_textprep.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/common/zfs/zfeature_common.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/common/zfs/zfs_comutil.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/common/zfs/zfs_deleg.c				optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/common/zfs/zfs_fletcher.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/common/zfs/zfs_ioctl_compat.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/common/zfs/zfs_namecheck.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/common/zfs/zfs_prop.c				optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/common/zfs/zpool_prop.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/common/zfs/zprop_common.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/gfs.c				optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/vnode.c				optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/blkptr.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/bplist.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/bpobj.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/bptree.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/dbuf.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/ddt.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/ddt_zap.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/dmu.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_diff.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_object.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_objset.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_send.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_traverse.c		optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_tx.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_zfetch.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/dnode.c			optional zfs compile-with "${ZFS_C}" \
 	warning "kernel contains CDDL licensed ZFS filesystem"
 cddl/contrib/opensolaris/uts/common/fs/zfs/dnode_sync.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_bookmark.c		optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_dataset.c		optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_deadlist.c		optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_deleg.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_destroy.c		optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_dir.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_pool.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_prop.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_scan.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_userhold.c		optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_synctask.c		optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/gzip.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/lz4.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/lzjb.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/metaslab.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/range_tree.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/refcount.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/rrwlock.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/sa.c				optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/sha256.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/spa_config.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/spa_errlog.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/spa_history.c		optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/spa_misc.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/space_map.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/space_reftree.c		optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/trim_map.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/txg.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/uberblock.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/unique.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/vdev.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_cache.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_file.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_label.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_mirror.c		optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_missing.c		optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_queue.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_raidz.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_root.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/zap.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/zap_leaf.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/zap_micro.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/zfeature.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_acl.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_byteswap.c		optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ctldir.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_debug.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_dir.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_fm.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_fuid.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ioctl.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_log.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_onexit.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_replay.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_rlock.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_sa.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vfsops.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_znode.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/zil.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/zio.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/zio_checksum.c		optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/zio_compress.c		optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/zio_inject.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/zle.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/zrlock.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/zvol.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/os/callb.c				optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/os/fm.c				optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/os/list.c				optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/os/nvpair_alloc_system.c		optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/zmod/adler32.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/zmod/deflate.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/zmod/inffast.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/zmod/inflate.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/zmod/inftrees.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/zmod/opensolaris_crc32.c		optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/zmod/trees.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/zmod/zmod.c				optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/zmod/zmod_subr.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/zmod/zutil.c			optional zfs compile-with "${ZFS_C}"
 compat/freebsd32/freebsd32_capability.c	optional compat_freebsd32
 compat/freebsd32/freebsd32_ioctl.c	optional compat_freebsd32
 compat/freebsd32/freebsd32_misc.c	optional compat_freebsd32
 compat/freebsd32/freebsd32_syscalls.c	optional compat_freebsd32
 compat/freebsd32/freebsd32_sysent.c	optional compat_freebsd32
 contrib/dev/acpica/common/ahids.c			optional acpi acpi_debug
 contrib/dev/acpica/common/ahuuids.c			optional acpi acpi_debug
 contrib/dev/acpica/components/debugger/dbcmds.c		optional acpi acpi_debug
 contrib/dev/acpica/components/debugger/dbconvert.c	optional acpi acpi_debug
 contrib/dev/acpica/components/debugger/dbdisply.c	optional acpi acpi_debug
 contrib/dev/acpica/components/debugger/dbexec.c		optional acpi acpi_debug
 contrib/dev/acpica/components/debugger/dbfileio.c	optional acpi acpi_debug
 contrib/dev/acpica/components/debugger/dbhistry.c	optional acpi acpi_debug
 contrib/dev/acpica/components/debugger/dbinput.c	optional acpi acpi_debug
 contrib/dev/acpica/components/debugger/dbmethod.c	optional acpi acpi_debug
 contrib/dev/acpica/components/debugger/dbnames.c	optional acpi acpi_debug
 contrib/dev/acpica/components/debugger/dbstats.c	optional acpi acpi_debug
 contrib/dev/acpica/components/debugger/dbtest.c		optional acpi acpi_debug
 contrib/dev/acpica/components/debugger/dbutils.c	optional acpi acpi_debug
 contrib/dev/acpica/components/debugger/dbxface.c	optional acpi acpi_debug
 contrib/dev/acpica/components/disassembler/dmbuffer.c	optional acpi acpi_debug
 contrib/dev/acpica/components/disassembler/dmcstyle.c	optional acpi acpi_debug
 contrib/dev/acpica/components/disassembler/dmdeferred.c	optional acpi acpi_debug
 contrib/dev/acpica/components/disassembler/dmnames.c	optional acpi acpi_debug
 contrib/dev/acpica/components/disassembler/dmopcode.c	optional acpi acpi_debug
 contrib/dev/acpica/components/disassembler/dmobject.c	optional acpi acpi_debug
 contrib/dev/acpica/components/disassembler/dmresrc.c	optional acpi acpi_debug
 contrib/dev/acpica/components/disassembler/dmresrcl.c	optional acpi acpi_debug
 contrib/dev/acpica/components/disassembler/dmresrcl2.c	optional acpi acpi_debug
 contrib/dev/acpica/components/disassembler/dmresrcs.c	optional acpi acpi_debug
 contrib/dev/acpica/components/disassembler/dmutils.c	optional acpi acpi_debug
 contrib/dev/acpica/components/disassembler/dmwalk.c	optional acpi acpi_debug
 contrib/dev/acpica/components/dispatcher/dsargs.c	optional acpi
 contrib/dev/acpica/components/dispatcher/dscontrol.c	optional acpi
 contrib/dev/acpica/components/dispatcher/dsfield.c	optional acpi
 contrib/dev/acpica/components/dispatcher/dsinit.c	optional acpi
 contrib/dev/acpica/components/dispatcher/dsmethod.c	optional acpi
 contrib/dev/acpica/components/dispatcher/dsmthdat.c	optional acpi
 contrib/dev/acpica/components/dispatcher/dsobject.c	optional acpi
 contrib/dev/acpica/components/dispatcher/dsopcode.c	optional acpi
 contrib/dev/acpica/components/dispatcher/dsutils.c	optional acpi
 contrib/dev/acpica/components/dispatcher/dswexec.c	optional acpi
 contrib/dev/acpica/components/dispatcher/dswload.c	optional acpi
 contrib/dev/acpica/components/dispatcher/dswload2.c	optional acpi
 contrib/dev/acpica/components/dispatcher/dswscope.c	optional acpi
 contrib/dev/acpica/components/dispatcher/dswstate.c	optional acpi
 contrib/dev/acpica/components/events/evevent.c		optional acpi
 contrib/dev/acpica/components/events/evglock.c		optional acpi
 contrib/dev/acpica/components/events/evgpe.c		optional acpi
 contrib/dev/acpica/components/events/evgpeblk.c		optional acpi
 contrib/dev/acpica/components/events/evgpeinit.c	optional acpi
 contrib/dev/acpica/components/events/evgpeutil.c	optional acpi
 contrib/dev/acpica/components/events/evhandler.c	optional acpi
 contrib/dev/acpica/components/events/evmisc.c		optional acpi
 contrib/dev/acpica/components/events/evregion.c		optional acpi
 contrib/dev/acpica/components/events/evrgnini.c		optional acpi
 contrib/dev/acpica/components/events/evsci.c		optional acpi
 contrib/dev/acpica/components/events/evxface.c		optional acpi
 contrib/dev/acpica/components/events/evxfevnt.c		optional acpi
 contrib/dev/acpica/components/events/evxfgpe.c		optional acpi
 contrib/dev/acpica/components/events/evxfregn.c		optional acpi
 contrib/dev/acpica/components/executer/exconfig.c	optional acpi
 contrib/dev/acpica/components/executer/exconvrt.c	optional acpi
 contrib/dev/acpica/components/executer/excreate.c	optional acpi
 contrib/dev/acpica/components/executer/exdebug.c	optional acpi
 contrib/dev/acpica/components/executer/exdump.c		optional acpi
 contrib/dev/acpica/components/executer/exfield.c	optional acpi
 contrib/dev/acpica/components/executer/exfldio.c	optional acpi
 contrib/dev/acpica/components/executer/exmisc.c		optional acpi
 contrib/dev/acpica/components/executer/exmutex.c	optional acpi
 contrib/dev/acpica/components/executer/exnames.c	optional acpi
 contrib/dev/acpica/components/executer/exoparg1.c	optional acpi
 contrib/dev/acpica/components/executer/exoparg2.c	optional acpi
 contrib/dev/acpica/components/executer/exoparg3.c	optional acpi
 contrib/dev/acpica/components/executer/exoparg6.c	optional acpi
 contrib/dev/acpica/components/executer/exprep.c		optional acpi
 contrib/dev/acpica/components/executer/exregion.c	optional acpi
 contrib/dev/acpica/components/executer/exresnte.c	optional acpi
 contrib/dev/acpica/components/executer/exresolv.c	optional acpi
 contrib/dev/acpica/components/executer/exresop.c	optional acpi
 contrib/dev/acpica/components/executer/exstore.c	optional acpi
 contrib/dev/acpica/components/executer/exstoren.c	optional acpi
 contrib/dev/acpica/components/executer/exstorob.c	optional acpi
 contrib/dev/acpica/components/executer/exsystem.c	optional acpi
 contrib/dev/acpica/components/executer/exutils.c	optional acpi
 contrib/dev/acpica/components/hardware/hwacpi.c		optional acpi
 contrib/dev/acpica/components/hardware/hwesleep.c	optional acpi
 contrib/dev/acpica/components/hardware/hwgpe.c		optional acpi
 contrib/dev/acpica/components/hardware/hwpci.c		optional acpi
 contrib/dev/acpica/components/hardware/hwregs.c		optional acpi
 contrib/dev/acpica/components/hardware/hwsleep.c	optional acpi
 contrib/dev/acpica/components/hardware/hwtimer.c	optional acpi
 contrib/dev/acpica/components/hardware/hwvalid.c	optional acpi
 contrib/dev/acpica/components/hardware/hwxface.c	optional acpi
 contrib/dev/acpica/components/hardware/hwxfsleep.c	optional acpi
 contrib/dev/acpica/components/namespace/nsaccess.c	optional acpi
 contrib/dev/acpica/components/namespace/nsalloc.c	optional acpi
 contrib/dev/acpica/components/namespace/nsarguments.c	optional acpi
 contrib/dev/acpica/components/namespace/nsconvert.c	optional acpi
 contrib/dev/acpica/components/namespace/nsdump.c	optional acpi
 contrib/dev/acpica/components/namespace/nseval.c	optional acpi
 contrib/dev/acpica/components/namespace/nsinit.c	optional acpi
 contrib/dev/acpica/components/namespace/nsload.c	optional acpi
 contrib/dev/acpica/components/namespace/nsnames.c	optional acpi
 contrib/dev/acpica/components/namespace/nsobject.c	optional acpi
 contrib/dev/acpica/components/namespace/nsparse.c	optional acpi
 contrib/dev/acpica/components/namespace/nspredef.c	optional acpi
 contrib/dev/acpica/components/namespace/nsprepkg.c	optional acpi
 contrib/dev/acpica/components/namespace/nsrepair.c	optional acpi
 contrib/dev/acpica/components/namespace/nsrepair2.c	optional acpi
 contrib/dev/acpica/components/namespace/nssearch.c	optional acpi
 contrib/dev/acpica/components/namespace/nsutils.c	optional acpi
 contrib/dev/acpica/components/namespace/nswalk.c	optional acpi
 contrib/dev/acpica/components/namespace/nsxfeval.c	optional acpi
 contrib/dev/acpica/components/namespace/nsxfname.c	optional acpi
 contrib/dev/acpica/components/namespace/nsxfobj.c	optional acpi
 contrib/dev/acpica/components/parser/psargs.c		optional acpi
 contrib/dev/acpica/components/parser/psloop.c		optional acpi
 contrib/dev/acpica/components/parser/psobject.c		optional acpi
 contrib/dev/acpica/components/parser/psopcode.c		optional acpi
 contrib/dev/acpica/components/parser/psopinfo.c		optional acpi
 contrib/dev/acpica/components/parser/psparse.c		optional acpi
 contrib/dev/acpica/components/parser/psscope.c		optional acpi
 contrib/dev/acpica/components/parser/pstree.c		optional acpi
 contrib/dev/acpica/components/parser/psutils.c		optional acpi
 contrib/dev/acpica/components/parser/pswalk.c		optional acpi
 contrib/dev/acpica/components/parser/psxface.c		optional acpi
 contrib/dev/acpica/components/resources/rsaddr.c	optional acpi
 contrib/dev/acpica/components/resources/rscalc.c	optional acpi
 contrib/dev/acpica/components/resources/rscreate.c	optional acpi
 contrib/dev/acpica/components/resources/rsdump.c	optional acpi
 contrib/dev/acpica/components/resources/rsdumpinfo.c	optional acpi
 contrib/dev/acpica/components/resources/rsinfo.c	optional acpi
 contrib/dev/acpica/components/resources/rsio.c		optional acpi
 contrib/dev/acpica/components/resources/rsirq.c		optional acpi
 contrib/dev/acpica/components/resources/rslist.c	optional acpi
 contrib/dev/acpica/components/resources/rsmemory.c	optional acpi
 contrib/dev/acpica/components/resources/rsmisc.c	optional acpi
 contrib/dev/acpica/components/resources/rsserial.c	optional acpi
 contrib/dev/acpica/components/resources/rsutils.c	optional acpi
 contrib/dev/acpica/components/resources/rsxface.c	optional acpi
 contrib/dev/acpica/components/tables/tbdata.c		optional acpi
 contrib/dev/acpica/components/tables/tbfadt.c		optional acpi
 contrib/dev/acpica/components/tables/tbfind.c		optional acpi
 contrib/dev/acpica/components/tables/tbinstal.c		optional acpi
 contrib/dev/acpica/components/tables/tbprint.c		optional acpi
 contrib/dev/acpica/components/tables/tbutils.c		optional acpi
 contrib/dev/acpica/components/tables/tbxface.c		optional acpi
 contrib/dev/acpica/components/tables/tbxfload.c		optional acpi
 contrib/dev/acpica/components/tables/tbxfroot.c		optional acpi
 contrib/dev/acpica/components/utilities/utaddress.c	optional acpi
 contrib/dev/acpica/components/utilities/utalloc.c	optional acpi
 contrib/dev/acpica/components/utilities/utbuffer.c	optional acpi
 contrib/dev/acpica/components/utilities/utcache.c	optional acpi
 contrib/dev/acpica/components/utilities/utcopy.c	optional acpi
 contrib/dev/acpica/components/utilities/utdebug.c	optional acpi
 contrib/dev/acpica/components/utilities/utdecode.c	optional acpi
 contrib/dev/acpica/components/utilities/utdelete.c	optional acpi
 contrib/dev/acpica/components/utilities/uterror.c	optional acpi
 contrib/dev/acpica/components/utilities/uteval.c	optional acpi
 contrib/dev/acpica/components/utilities/utexcep.c	optional acpi
 contrib/dev/acpica/components/utilities/utglobal.c	optional acpi
 contrib/dev/acpica/components/utilities/uthex.c		optional acpi
 contrib/dev/acpica/components/utilities/utids.c		optional acpi
 contrib/dev/acpica/components/utilities/utinit.c	optional acpi
 contrib/dev/acpica/components/utilities/utlock.c	optional acpi
 contrib/dev/acpica/components/utilities/utmath.c	optional acpi
 contrib/dev/acpica/components/utilities/utmisc.c	optional acpi
 contrib/dev/acpica/components/utilities/utmutex.c	optional acpi
 contrib/dev/acpica/components/utilities/utobject.c	optional acpi
 contrib/dev/acpica/components/utilities/utosi.c		optional acpi
 contrib/dev/acpica/components/utilities/utownerid.c	optional acpi
 contrib/dev/acpica/components/utilities/utpredef.c	optional acpi
 contrib/dev/acpica/components/utilities/utresrc.c	optional acpi
 contrib/dev/acpica/components/utilities/utstate.c	optional acpi
 contrib/dev/acpica/components/utilities/utstring.c	optional acpi
 contrib/dev/acpica/components/utilities/utuuid.c	optional acpi acpi_debug
 contrib/dev/acpica/components/utilities/utxface.c	optional acpi
 contrib/dev/acpica/components/utilities/utxferror.c	optional acpi
 contrib/dev/acpica/components/utilities/utxfinit.c	optional acpi
 #contrib/dev/acpica/components/utilities/utxfmutex.c	optional acpi
 contrib/ipfilter/netinet/fil.c	optional ipfilter inet \
 	compile-with "${NORMAL_C} ${NO_WSELF_ASSIGN} -Wno-unused -I$S/contrib/ipfilter"
 contrib/ipfilter/netinet/ip_auth.c optional ipfilter inet \
 	compile-with "${NORMAL_C} -Wno-unused -I$S/contrib/ipfilter"
 contrib/ipfilter/netinet/ip_fil_freebsd.c optional ipfilter inet \
 	compile-with "${NORMAL_C} -Wno-unused -I$S/contrib/ipfilter"
 contrib/ipfilter/netinet/ip_frag.c optional ipfilter inet \
 	compile-with "${NORMAL_C} -Wno-unused -I$S/contrib/ipfilter"
 contrib/ipfilter/netinet/ip_log.c optional ipfilter inet \
 	compile-with "${NORMAL_C} -I$S/contrib/ipfilter"
 contrib/ipfilter/netinet/ip_nat.c optional ipfilter inet \
 	compile-with "${NORMAL_C} -Wno-unused -I$S/contrib/ipfilter"
 contrib/ipfilter/netinet/ip_proxy.c optional ipfilter inet \
 	compile-with "${NORMAL_C} ${NO_WSELF_ASSIGN} -Wno-unused -I$S/contrib/ipfilter"
 contrib/ipfilter/netinet/ip_state.c optional ipfilter inet \
 	compile-with "${NORMAL_C} -Wno-unused -I$S/contrib/ipfilter"
 contrib/ipfilter/netinet/ip_lookup.c optional ipfilter inet \
 	compile-with "${NORMAL_C} ${NO_WSELF_ASSIGN} -Wno-unused -Wno-error -I$S/contrib/ipfilter"
 contrib/ipfilter/netinet/ip_pool.c optional ipfilter inet \
 	compile-with "${NORMAL_C} -Wno-unused -I$S/contrib/ipfilter"
 contrib/ipfilter/netinet/ip_htable.c optional ipfilter inet \
 	compile-with "${NORMAL_C} -Wno-unused -I$S/contrib/ipfilter"
 contrib/ipfilter/netinet/ip_sync.c optional ipfilter inet \
 	compile-with "${NORMAL_C} -Wno-unused -I$S/contrib/ipfilter"
 contrib/ipfilter/netinet/mlfk_ipl.c optional ipfilter inet \
 	compile-with "${NORMAL_C} -I$S/contrib/ipfilter"
 contrib/ipfilter/netinet/ip_nat6.c optional ipfilter inet \
 	compile-with "${NORMAL_C} -Wno-unused -I$S/contrib/ipfilter"
 contrib/ipfilter/netinet/ip_rules.c optional ipfilter inet \
 	compile-with "${NORMAL_C} -I$S/contrib/ipfilter"
 contrib/ipfilter/netinet/ip_scan.c optional ipfilter inet \
 	compile-with "${NORMAL_C} -Wno-unused -I$S/contrib/ipfilter"
 contrib/ipfilter/netinet/ip_dstlist.c optional ipfilter inet \
 	compile-with "${NORMAL_C} -Wno-unused -I$S/contrib/ipfilter"
 contrib/ipfilter/netinet/radix_ipf.c optional ipfilter inet \
 	compile-with "${NORMAL_C} -I$S/contrib/ipfilter"
 contrib/libfdt/fdt.c		optional fdt
 contrib/libfdt/fdt_ro.c		optional fdt
 contrib/libfdt/fdt_rw.c		optional fdt
 contrib/libfdt/fdt_strerror.c	optional fdt
 contrib/libfdt/fdt_sw.c		optional fdt
 contrib/libfdt/fdt_wip.c	optional fdt
 contrib/ngatm/netnatm/api/cc_conn.c optional ngatm_ccatm \
 	compile-with "${NORMAL_C_NOWERROR} -I$S/contrib/ngatm"
 contrib/ngatm/netnatm/api/cc_data.c optional ngatm_ccatm \
 	compile-with "${NORMAL_C} -I$S/contrib/ngatm"
 contrib/ngatm/netnatm/api/cc_dump.c optional ngatm_ccatm \
 	compile-with "${NORMAL_C} -I$S/contrib/ngatm"
 contrib/ngatm/netnatm/api/cc_port.c optional ngatm_ccatm \
 	compile-with "${NORMAL_C} -I$S/contrib/ngatm"
 contrib/ngatm/netnatm/api/cc_sig.c optional ngatm_ccatm \
 	compile-with "${NORMAL_C} -I$S/contrib/ngatm"
 contrib/ngatm/netnatm/api/cc_user.c optional ngatm_ccatm \
 	compile-with "${NORMAL_C} -I$S/contrib/ngatm"
 contrib/ngatm/netnatm/api/unisap.c optional ngatm_ccatm \
 	compile-with "${NORMAL_C} -I$S/contrib/ngatm"
 contrib/ngatm/netnatm/misc/straddr.c optional ngatm_atmbase \
 	compile-with "${NORMAL_C} -I$S/contrib/ngatm"
 contrib/ngatm/netnatm/misc/unimsg_common.c optional ngatm_atmbase \
 	compile-with "${NORMAL_C} -I$S/contrib/ngatm"
 contrib/ngatm/netnatm/msg/traffic.c optional ngatm_atmbase \
 	compile-with "${NORMAL_C} -I$S/contrib/ngatm"
 contrib/ngatm/netnatm/msg/uni_ie.c optional ngatm_atmbase \
 	compile-with "${NORMAL_C} -I$S/contrib/ngatm"
 contrib/ngatm/netnatm/msg/uni_msg.c optional ngatm_atmbase \
 	compile-with "${NORMAL_C} -I$S/contrib/ngatm"
 contrib/ngatm/netnatm/saal/saal_sscfu.c	optional ngatm_sscfu \
 	compile-with "${NORMAL_C} -I$S/contrib/ngatm"
 contrib/ngatm/netnatm/saal/saal_sscop.c	optional ngatm_sscop \
 	compile-with "${NORMAL_C} -I$S/contrib/ngatm"
 contrib/ngatm/netnatm/sig/sig_call.c optional ngatm_uni \
 	compile-with "${NORMAL_C} -I$S/contrib/ngatm"
 contrib/ngatm/netnatm/sig/sig_coord.c optional ngatm_uni \
 	compile-with "${NORMAL_C} -I$S/contrib/ngatm"
 contrib/ngatm/netnatm/sig/sig_party.c optional ngatm_uni \
 	compile-with "${NORMAL_C} -I$S/contrib/ngatm"
 contrib/ngatm/netnatm/sig/sig_print.c optional ngatm_uni \
 	compile-with "${NORMAL_C} -I$S/contrib/ngatm"
 contrib/ngatm/netnatm/sig/sig_reset.c optional ngatm_uni \
 	compile-with "${NORMAL_C} -I$S/contrib/ngatm"
 contrib/ngatm/netnatm/sig/sig_uni.c optional ngatm_uni \
 	compile-with "${NORMAL_C} -I$S/contrib/ngatm"
 contrib/ngatm/netnatm/sig/sig_unimsgcpy.c optional ngatm_uni \
 	compile-with "${NORMAL_C} -I$S/contrib/ngatm"
 contrib/ngatm/netnatm/sig/sig_verify.c optional ngatm_uni \
 	compile-with "${NORMAL_C} -I$S/contrib/ngatm"
 crypto/blowfish/bf_ecb.c	optional ipsec
 crypto/blowfish/bf_skey.c	optional crypto | ipsec
 crypto/camellia/camellia.c	optional crypto | ipsec
 crypto/camellia/camellia-api.c	optional crypto | ipsec
 crypto/des/des_ecb.c		optional crypto | ipsec | netsmb
 crypto/des/des_setkey.c		optional crypto | ipsec | netsmb
 crypto/rc4/rc4.c		optional netgraph_mppc_encryption | kgssapi
 crypto/rijndael/rijndael-alg-fst.c optional crypto | geom_bde | \
 					 ipsec | random | wlan_ccmp
 crypto/rijndael/rijndael-api-fst.c optional geom_bde | random
 crypto/rijndael/rijndael-api.c	optional crypto | ipsec | wlan_ccmp
 crypto/sha1.c			optional carp | crypto | ipsec | \
 					 netgraph_mppc_encryption | sctp
 crypto/sha2/sha2.c		optional crypto | geom_bde | ipsec | random | \
 					 sctp | zfs
 crypto/sha2/sha256c.c		optional crypto | geom_bde | ipsec | random | \
 					 sctp | zfs
 crypto/siphash/siphash.c	optional inet | inet6
 crypto/siphash/siphash_test.c	optional inet | inet6
 ddb/db_access.c			optional ddb
 ddb/db_break.c			optional ddb
 ddb/db_capture.c		optional ddb
 ddb/db_command.c		optional ddb
 ddb/db_examine.c		optional ddb
 ddb/db_expr.c			optional ddb
 ddb/db_input.c			optional ddb
 ddb/db_lex.c			optional ddb
 ddb/db_main.c			optional ddb
 ddb/db_output.c			optional ddb
 ddb/db_print.c			optional ddb
 ddb/db_ps.c			optional ddb
 ddb/db_run.c			optional ddb
 ddb/db_script.c			optional ddb
 ddb/db_sym.c			optional ddb
 ddb/db_thread.c			optional ddb
 ddb/db_textdump.c		optional ddb
 ddb/db_variables.c		optional ddb
 ddb/db_watch.c			optional ddb
 ddb/db_write_cmd.c		optional ddb
 dev/aac/aac.c			optional aac
 dev/aac/aac_cam.c		optional aacp aac
 dev/aac/aac_debug.c		optional aac
 dev/aac/aac_disk.c		optional aac
 dev/aac/aac_linux.c		optional aac compat_linux
 dev/aac/aac_pci.c		optional aac pci
 dev/aacraid/aacraid.c		optional aacraid
 dev/aacraid/aacraid_cam.c	optional aacraid scbus
 dev/aacraid/aacraid_debug.c	optional aacraid
 dev/aacraid/aacraid_linux.c	optional aacraid compat_linux
 dev/aacraid/aacraid_pci.c	optional aacraid pci
 dev/acpi_support/acpi_wmi.c	optional acpi_wmi acpi
 dev/acpi_support/acpi_asus.c	optional acpi_asus acpi
 dev/acpi_support/acpi_asus_wmi.c	optional acpi_asus_wmi acpi
 dev/acpi_support/acpi_fujitsu.c	optional acpi_fujitsu acpi
 dev/acpi_support/acpi_hp.c	optional acpi_hp acpi
 dev/acpi_support/acpi_ibm.c	optional acpi_ibm acpi
 dev/acpi_support/acpi_panasonic.c optional acpi_panasonic acpi
 dev/acpi_support/acpi_sony.c	optional acpi_sony acpi
 dev/acpi_support/acpi_toshiba.c	optional acpi_toshiba acpi
 dev/acpi_support/atk0110.c	optional aibs acpi
 dev/acpica/Osd/OsdDebug.c	optional acpi
 dev/acpica/Osd/OsdHardware.c	optional acpi
 dev/acpica/Osd/OsdInterrupt.c	optional acpi
 dev/acpica/Osd/OsdMemory.c	optional acpi
 dev/acpica/Osd/OsdSchedule.c	optional acpi
 dev/acpica/Osd/OsdStream.c	optional acpi
 dev/acpica/Osd/OsdSynch.c	optional acpi
 dev/acpica/Osd/OsdTable.c	optional acpi
 dev/acpica/acpi.c		optional acpi
 dev/acpica/acpi_acad.c		optional acpi
 dev/acpica/acpi_battery.c	optional acpi
 dev/acpica/acpi_button.c	optional acpi
 dev/acpica/acpi_cmbat.c		optional acpi
 dev/acpica/acpi_cpu.c		optional acpi
 dev/acpica/acpi_ec.c		optional acpi
 dev/acpica/acpi_isab.c		optional acpi isa
 dev/acpica/acpi_lid.c		optional acpi
 dev/acpica/acpi_package.c	optional acpi
 dev/acpica/acpi_pci.c		optional acpi pci
 dev/acpica/acpi_pci_link.c	optional acpi pci
 dev/acpica/acpi_pcib.c		optional acpi pci
 dev/acpica/acpi_pcib_acpi.c	optional acpi pci
 dev/acpica/acpi_pcib_pci.c	optional acpi pci
 dev/acpica/acpi_perf.c		optional acpi
 dev/acpica/acpi_powerres.c	optional acpi
 dev/acpica/acpi_quirk.c		optional acpi
 dev/acpica/acpi_resource.c	optional acpi
 dev/acpica/acpi_smbat.c		optional acpi
 dev/acpica/acpi_thermal.c	optional acpi
 dev/acpica/acpi_throttle.c	optional acpi
 dev/acpica/acpi_timer.c		optional acpi
 dev/acpica/acpi_video.c		optional acpi_video acpi
 dev/acpica/acpi_dock.c		optional acpi_dock acpi
 dev/adlink/adlink.c		optional adlink
 dev/advansys/adv_eisa.c		optional adv eisa
 dev/advansys/adv_pci.c		optional adv pci
 dev/advansys/advansys.c		optional adv
 dev/advansys/advlib.c		optional adv
 dev/advansys/advmcode.c		optional adv
 dev/advansys/adw_pci.c		optional adw pci
 dev/advansys/adwcam.c		optional adw
 dev/advansys/adwlib.c		optional adw
 dev/advansys/adwmcode.c		optional adw
 dev/ae/if_ae.c			optional ae pci
 dev/age/if_age.c		optional age pci
 dev/agp/agp.c			optional agp pci
 dev/agp/agp_if.m		optional agp pci
 dev/aha/aha.c			optional aha
 dev/aha/aha_isa.c		optional aha isa
 dev/aha/aha_mca.c		optional aha mca
 dev/ahb/ahb.c			optional ahb eisa
 dev/ahci/ahci.c			optional ahci
 dev/ahci/ahciem.c		optional ahci
 dev/ahci/ahci_pci.c		optional ahci pci
 dev/aic/aic.c			optional aic
 dev/aic/aic_pccard.c		optional aic pccard
 dev/aic7xxx/ahc_eisa.c		optional ahc eisa
 dev/aic7xxx/ahc_isa.c		optional ahc isa
 dev/aic7xxx/ahc_pci.c		optional ahc pci \
 	compile-with "${NORMAL_C} ${NO_WCONSTANT_CONVERSION}"
 dev/aic7xxx/ahd_pci.c		optional ahd pci \
 	compile-with "${NORMAL_C} ${NO_WCONSTANT_CONVERSION}"
 dev/aic7xxx/aic7770.c		optional ahc
 dev/aic7xxx/aic79xx.c		optional ahd pci
 dev/aic7xxx/aic79xx_osm.c	optional ahd pci
 dev/aic7xxx/aic79xx_pci.c	optional ahd pci
 dev/aic7xxx/aic79xx_reg_print.c	optional ahd pci ahd_reg_pretty_print
 dev/aic7xxx/aic7xxx.c		optional ahc
 dev/aic7xxx/aic7xxx_93cx6.c	optional ahc
 dev/aic7xxx/aic7xxx_osm.c	optional ahc
 dev/aic7xxx/aic7xxx_pci.c	optional ahc pci
 dev/aic7xxx/aic7xxx_reg_print.c	optional ahc ahc_reg_pretty_print
 dev/alc/if_alc.c		optional alc pci
 dev/ale/if_ale.c		optional ale pci
 dev/alpm/alpm.c			optional alpm pci
 dev/altera/avgen/altera_avgen.c		optional altera_avgen
 dev/altera/avgen/altera_avgen_fdt.c	optional altera_avgen fdt
 dev/altera/avgen/altera_avgen_nexus.c	optional altera_avgen
 dev/altera/sdcard/altera_sdcard.c	optional altera_sdcard
 dev/altera/sdcard/altera_sdcard_disk.c	optional altera_sdcard
 dev/altera/sdcard/altera_sdcard_io.c	optional altera_sdcard
 dev/altera/sdcard/altera_sdcard_fdt.c	optional altera_sdcard fdt
 dev/altera/sdcard/altera_sdcard_nexus.c	optional altera_sdcard
 dev/altera/pio/pio.c		optional altera_pio
 dev/altera/pio/pio_if.m		optional altera_pio
 dev/amdpm/amdpm.c		optional amdpm pci | nfpm pci
 dev/amdsmb/amdsmb.c		optional amdsmb pci
 dev/amr/amr.c			optional amr
 dev/amr/amr_cam.c		optional amrp amr
 dev/amr/amr_disk.c		optional amr
 dev/amr/amr_linux.c		optional amr compat_linux
 dev/amr/amr_pci.c		optional amr pci
 dev/an/if_an.c			optional an
 dev/an/if_an_isa.c		optional an isa
 dev/an/if_an_pccard.c		optional an pccard
 dev/an/if_an_pci.c		optional an pci
 #
 dev/ata/ata_if.m		optional ata | atacore
 dev/ata/ata-all.c		optional ata | atacore
 dev/ata/ata-dma.c		optional ata | atacore
 dev/ata/ata-lowlevel.c		optional ata | atacore
 dev/ata/ata-sata.c		optional ata | atacore
 dev/ata/ata-card.c		optional ata pccard | atapccard
 dev/ata/ata-cbus.c		optional ata pc98 | atapc98
 dev/ata/ata-isa.c		optional ata isa | ataisa
 dev/ata/ata-pci.c		optional ata pci | atapci
 dev/ata/chipsets/ata-acard.c	optional ata pci | ataacard
 dev/ata/chipsets/ata-acerlabs.c	optional ata pci | ataacerlabs
 dev/ata/chipsets/ata-amd.c	optional ata pci | ataamd
 dev/ata/chipsets/ata-ati.c	optional ata pci | ataati
 dev/ata/chipsets/ata-cenatek.c	optional ata pci | atacenatek
 dev/ata/chipsets/ata-cypress.c	optional ata pci | atacypress
 dev/ata/chipsets/ata-cyrix.c	optional ata pci | atacyrix
 dev/ata/chipsets/ata-highpoint.c	optional ata pci | atahighpoint
 dev/ata/chipsets/ata-intel.c	optional ata pci | ataintel
 dev/ata/chipsets/ata-ite.c	optional ata pci | ataite
 dev/ata/chipsets/ata-jmicron.c	optional ata pci | atajmicron
 dev/ata/chipsets/ata-marvell.c	optional ata pci | atamarvell
 dev/ata/chipsets/ata-micron.c	optional ata pci | atamicron
 dev/ata/chipsets/ata-national.c	optional ata pci | atanational
 dev/ata/chipsets/ata-netcell.c	optional ata pci | atanetcell
 dev/ata/chipsets/ata-nvidia.c	optional ata pci | atanvidia
 dev/ata/chipsets/ata-promise.c	optional ata pci | atapromise
 dev/ata/chipsets/ata-serverworks.c	optional ata pci | ataserverworks
 dev/ata/chipsets/ata-siliconimage.c	optional ata pci | atasiliconimage | ataati
 dev/ata/chipsets/ata-sis.c	optional ata pci | atasis
 dev/ata/chipsets/ata-via.c	optional ata pci | atavia
 #
 dev/ath/if_ath_pci.c		optional ath_pci pci \
 	compile-with "${NORMAL_C} -I$S/dev/ath"
 #
 dev/ath/if_ath_ahb.c		optional ath_ahb \
 	compile-with "${NORMAL_C} -I$S/dev/ath"
 #
 dev/ath/if_ath.c		optional ath \
 	compile-with "${NORMAL_C} -I$S/dev/ath"
 dev/ath/if_ath_alq.c		optional ath \
 	compile-with "${NORMAL_C} -I$S/dev/ath"
 dev/ath/if_ath_beacon.c		optional ath \
 	compile-with "${NORMAL_C} -I$S/dev/ath"
 dev/ath/if_ath_btcoex.c		optional ath \
 	compile-with "${NORMAL_C} -I$S/dev/ath"
 dev/ath/if_ath_debug.c		optional ath \
 	compile-with "${NORMAL_C} -I$S/dev/ath"
 dev/ath/if_ath_keycache.c	optional ath \
 	compile-with "${NORMAL_C} -I$S/dev/ath"
 dev/ath/if_ath_led.c		optional ath \
 	compile-with "${NORMAL_C} -I$S/dev/ath"
 dev/ath/if_ath_lna_div.c	optional ath \
 	compile-with "${NORMAL_C} -I$S/dev/ath"
 dev/ath/if_ath_tx.c		optional ath \
 	compile-with "${NORMAL_C} -I$S/dev/ath"
 dev/ath/if_ath_tx_edma.c	optional ath \
 	compile-with "${NORMAL_C} -I$S/dev/ath"
 dev/ath/if_ath_tx_ht.c		optional ath \
 	compile-with "${NORMAL_C} -I$S/dev/ath"
 dev/ath/if_ath_tdma.c		optional ath \
 	compile-with "${NORMAL_C} -I$S/dev/ath"
 dev/ath/if_ath_sysctl.c		optional ath \
 	compile-with "${NORMAL_C} -I$S/dev/ath"
 dev/ath/if_ath_rx.c		optional ath \
 	compile-with "${NORMAL_C} -I$S/dev/ath"
 dev/ath/if_ath_rx_edma.c	optional ath \
 	compile-with "${NORMAL_C} -I$S/dev/ath"
 dev/ath/if_ath_spectral.c	optional ath \
 	compile-with "${NORMAL_C} -I$S/dev/ath"
 dev/ath/ah_osdep.c		optional ath \
 	compile-with "${NORMAL_C} -I$S/dev/ath"
 #
 dev/ath/ath_hal/ah.c		optional ath \
 	compile-with "${NORMAL_C} -I$S/dev/ath"
 dev/ath/ath_hal/ah_eeprom_v1.c	optional ath_hal | ath_ar5210 \
 	compile-with "${NORMAL_C} -I$S/dev/ath"
 dev/ath/ath_hal/ah_eeprom_v3.c	optional ath_hal | ath_ar5211 | ath_ar5212 \
 	compile-with "${NORMAL_C} -I$S/dev/ath"
 dev/ath/ath_hal/ah_eeprom_v14.c \
 	optional ath_hal | ath_ar5416 | ath_ar9160 | ath_ar9280 \
 	compile-with "${NORMAL_C} -I$S/dev/ath"
 dev/ath/ath_hal/ah_eeprom_v4k.c \
 	optional ath_hal | ath_ar9285 \
 	compile-with "${NORMAL_C} -I$S/dev/ath"
 dev/ath/ath_hal/ah_eeprom_9287.c \
 	optional ath_hal | ath_ar9287 \
 	compile-with "${NORMAL_C} -I$S/dev/ath"
 dev/ath/ath_hal/ah_regdomain.c	optional ath \
 	compile-with "${NORMAL_C} ${NO_WSHIFT_COUNT_NEGATIVE} ${NO_WSHIFT_COUNT_OVERFLOW} -I$S/dev/ath"
 # ar5210
 dev/ath/ath_hal/ar5210/ar5210_attach.c		optional ath_hal | ath_ar5210 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5210/ar5210_beacon.c		optional ath_hal | ath_ar5210 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5210/ar5210_interrupts.c	optional ath_hal | ath_ar5210 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5210/ar5210_keycache.c	optional ath_hal | ath_ar5210 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5210/ar5210_misc.c		optional ath_hal | ath_ar5210 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5210/ar5210_phy.c		optional ath_hal | ath_ar5210 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5210/ar5210_power.c		optional ath_hal | ath_ar5210 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5210/ar5210_recv.c		optional ath_hal | ath_ar5210 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5210/ar5210_reset.c		optional ath_hal | ath_ar5210 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5210/ar5210_xmit.c		optional ath_hal | ath_ar5210 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 # ar5211
 dev/ath/ath_hal/ar5211/ar5211_attach.c		optional ath_hal | ath_ar5211 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5211/ar5211_beacon.c		optional ath_hal | ath_ar5211 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5211/ar5211_interrupts.c	optional ath_hal | ath_ar5211 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5211/ar5211_keycache.c	optional ath_hal | ath_ar5211 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5211/ar5211_misc.c		optional ath_hal | ath_ar5211 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5211/ar5211_phy.c		optional ath_hal | ath_ar5211 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5211/ar5211_power.c		optional ath_hal | ath_ar5211 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5211/ar5211_recv.c		optional ath_hal | ath_ar5211 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5211/ar5211_reset.c		optional ath_hal | ath_ar5211 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5211/ar5211_xmit.c		optional ath_hal | ath_ar5211 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 # ar5212
 dev/ath/ath_hal/ar5212/ar5212_ani.c \
 	optional ath_hal | ath_ar5212 | ath_ar5416 | ath_ar9160 | ath_ar9280 | \
 	ath_ar9285 ath_ar9287 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5212/ar5212_attach.c \
 	optional ath_hal | ath_ar5212 | ath_ar5416 | ath_ar9160 | ath_ar9280 | \
 	ath_ar9285 ath_ar9287 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5212/ar5212_beacon.c \
 	optional ath_hal | ath_ar5212 | ath_ar5416 | ath_ar9160 | ath_ar9280 | \
 	ath_ar9285 ath_ar9287 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5212/ar5212_eeprom.c \
 	optional ath_hal | ath_ar5212 | ath_ar5416 | ath_ar9160 | ath_ar9280 | \
 	ath_ar9285 ath_ar9287 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5212/ar5212_gpio.c \
 	optional ath_hal | ath_ar5212 | ath_ar5416 | ath_ar9160 | ath_ar9280 | \
 	ath_ar9285 ath_ar9287 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5212/ar5212_interrupts.c \
 	optional ath_hal | ath_ar5212 | ath_ar5416 | ath_ar9160 | ath_ar9280 | \
 	ath_ar9285 ath_ar9287 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5212/ar5212_keycache.c \
 	optional ath_hal | ath_ar5212 | ath_ar5416 | ath_ar9160 | ath_ar9280 | \
 	ath_ar9285 ath_ar9287 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5212/ar5212_misc.c \
 	optional ath_hal | ath_ar5212 | ath_ar5416 | ath_ar9160 | ath_ar9280 | \
 	ath_ar9285 ath_ar9287 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5212/ar5212_phy.c \
 	optional ath_hal | ath_ar5212 | ath_ar5416 | ath_ar9160 | ath_ar9280 | \
 	ath_ar9285 ath_ar9287 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5212/ar5212_power.c \
 	optional ath_hal | ath_ar5212 | ath_ar5416 | ath_ar9160 | ath_ar9280 | \
 	ath_ar9285 ath_ar9287 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5212/ar5212_recv.c \
 	optional ath_hal | ath_ar5212 | ath_ar5416 | ath_ar9160 | ath_ar9280 | \
 	ath_ar9285 ath_ar9287 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5212/ar5212_reset.c \
 	optional ath_hal | ath_ar5212 | ath_ar5416 | ath_ar9160 | ath_ar9280 | \
 	ath_ar9285 ath_ar9287 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5212/ar5212_rfgain.c \
 	optional ath_hal | ath_ar5212 | ath_ar5416 | ath_ar9160 | ath_ar9280 | \
 	ath_ar9285 ath_ar9287 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5212/ar5212_xmit.c \
 	optional ath_hal | ath_ar5212 | ath_ar5416 | ath_ar9160 | ath_ar9280 | \
 	ath_ar9285 ath_ar9287 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 # ar5416 (depends on ar5212)
 dev/ath/ath_hal/ar5416/ar5416_ani.c \
 	optional ath_hal | ath_ar5416 | ath_ar9160 | ath_ar9280 | ath_ar9285 | \
 	ath_ar9287 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5416/ar5416_attach.c \
 	optional ath_hal | ath_ar5416 | ath_ar9160 | ath_ar9280 | ath_ar9285 | \
 	ath_ar9287 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5416/ar5416_beacon.c \
 	optional ath_hal | ath_ar5416 | ath_ar9160 | ath_ar9280 | ath_ar9285 | \
 	ath_ar9287 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5416/ar5416_btcoex.c \
 	optional ath_hal | ath_ar5416 | ath_ar9160 | ath_ar9280 | ath_ar9285 | \
 	ath_ar9287 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5416/ar5416_cal.c \
 	optional ath_hal | ath_ar5416 | ath_ar9160 | ath_ar9280 | ath_ar9285 | \
 	ath_ar9287 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5416/ar5416_cal_iq.c \
 	optional ath_hal | ath_ar5416 | ath_ar9160 | ath_ar9280 | ath_ar9285 | \
 	ath_ar9287 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5416/ar5416_cal_adcgain.c \
 	optional ath_hal | ath_ar5416 | ath_ar9160 | ath_ar9280 | ath_ar9285 | \
 	ath_ar9287 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5416/ar5416_cal_adcdc.c \
 	optional ath_hal | ath_ar5416 | ath_ar9160 | ath_ar9280 | ath_ar9285 | \
 	ath_ar9287 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5416/ar5416_eeprom.c \
 	optional ath_hal | ath_ar5416 | ath_ar9160 | ath_ar9280 | ath_ar9285 | \
 	ath_ar9287 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5416/ar5416_gpio.c \
 	optional ath_hal | ath_ar5416 | ath_ar9160 | ath_ar9280 | ath_ar9285 | \
 	ath_ar9287 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5416/ar5416_interrupts.c \
 	optional ath_hal | ath_ar5416 | ath_ar9160 | ath_ar9280 | ath_ar9285 | \
 	ath_ar9287 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5416/ar5416_keycache.c \
 	optional ath_hal | ath_ar5416 | ath_ar9160 | ath_ar9280 | ath_ar9285 | \
 	ath_ar9287 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5416/ar5416_misc.c \
 	optional ath_hal | ath_ar5416 | ath_ar9160 | ath_ar9280 | ath_ar9285 | \
 	ath_ar9287 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5416/ar5416_phy.c \
 	optional ath_hal | ath_ar5416 | ath_ar9160 | ath_ar9280 | ath_ar9285 | \
 	ath_ar9287 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5416/ar5416_power.c \
 	optional ath_hal | ath_ar5416 | ath_ar9160 | ath_ar9280 | ath_ar9285 | \
 	ath_ar9287 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5416/ar5416_radar.c \
 	optional ath_hal | ath_ar5416 | ath_ar9160 | ath_ar9280 | ath_ar9285 | \
 	ath_ar9287 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5416/ar5416_recv.c \
 	optional ath_hal | ath_ar5416 | ath_ar9160 | ath_ar9280 | ath_ar9285 | \
 	ath_ar9287 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5416/ar5416_reset.c \
 	optional ath_hal | ath_ar5416 | ath_ar9160 | ath_ar9280 | ath_ar9285 | \
 	ath_ar9287 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5416/ar5416_spectral.c \
 	optional ath_hal | ath_ar5416 | ath_ar9160 | ath_ar9280 | ath_ar9285 | \
 	ath_ar9287 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5416/ar5416_xmit.c \
 	optional ath_hal | ath_ar5416 | ath_ar9160 | ath_ar9280 | ath_ar9285 | \
 	ath_ar9287 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 # ar9130 (depends upon ar5416) - also requires AH_SUPPORT_AR9130
 #
 # Since this is an embedded MAC SoC, there's no need to compile it into the
 # default HAL.
 dev/ath/ath_hal/ar9001/ar9130_attach.c optional ath_ar9130 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar9001/ar9130_phy.c optional ath_ar9130 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar9001/ar9130_eeprom.c optional ath_ar9130 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 # ar9160 (depends on ar5416)
 dev/ath/ath_hal/ar9001/ar9160_attach.c optional ath_hal | ath_ar9160 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 # ar9280 (depends on ar5416)
 dev/ath/ath_hal/ar9002/ar9280_attach.c optional ath_hal | ath_ar9280 | \
 	ath_ar9285 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar9002/ar9280_olc.c optional ath_hal | ath_ar9280 | \
 	ath_ar9285 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 # ar9285 (depends on ar5416 and ar9280)
 dev/ath/ath_hal/ar9002/ar9285_attach.c optional ath_hal | ath_ar9285 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar9002/ar9285_btcoex.c optional ath_hal | ath_ar9285 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar9002/ar9285_reset.c optional ath_hal | ath_ar9285 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar9002/ar9285_cal.c optional ath_hal | ath_ar9285 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar9002/ar9285_phy.c optional ath_hal | ath_ar9285 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar9002/ar9285_diversity.c optional ath_hal | ath_ar9285 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 # ar9287 (depends on ar5416)
 dev/ath/ath_hal/ar9002/ar9287_attach.c optional ath_hal | ath_ar9287 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar9002/ar9287_reset.c optional ath_hal | ath_ar9287 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar9002/ar9287_cal.c optional ath_hal | ath_ar9287 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar9002/ar9287_olc.c optional ath_hal | ath_ar9287 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 
 # ar9300
 contrib/dev/ath/ath_hal/ar9300/ar9300_ani.c optional ath_hal | ath_ar9300 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal -I$S/contrib/dev/ath/ath_hal"
 contrib/dev/ath/ath_hal/ar9300/ar9300_attach.c optional ath_hal | ath_ar9300 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal -I$S/contrib/dev/ath/ath_hal"
 contrib/dev/ath/ath_hal/ar9300/ar9300_beacon.c optional ath_hal | ath_ar9300 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal -I$S/contrib/dev/ath/ath_hal"
 contrib/dev/ath/ath_hal/ar9300/ar9300_eeprom.c optional ath_hal | ath_ar9300 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal -I$S/contrib/dev/ath/ath_hal ${NO_WCONSTANT_CONVERSION}"
 contrib/dev/ath/ath_hal/ar9300/ar9300_freebsd.c optional ath_hal | ath_ar9300 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal -I$S/contrib/dev/ath/ath_hal"
 contrib/dev/ath/ath_hal/ar9300/ar9300_gpio.c optional ath_hal | ath_ar9300 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal -I$S/contrib/dev/ath/ath_hal"
 contrib/dev/ath/ath_hal/ar9300/ar9300_interrupts.c optional ath_hal | ath_ar9300 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal -I$S/contrib/dev/ath/ath_hal"
 contrib/dev/ath/ath_hal/ar9300/ar9300_keycache.c optional ath_hal | ath_ar9300 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal -I$S/contrib/dev/ath/ath_hal"
 contrib/dev/ath/ath_hal/ar9300/ar9300_mci.c optional ath_hal | ath_ar9300 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal -I$S/contrib/dev/ath/ath_hal"
 contrib/dev/ath/ath_hal/ar9300/ar9300_misc.c optional ath_hal | ath_ar9300 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal -I$S/contrib/dev/ath/ath_hal"
 contrib/dev/ath/ath_hal/ar9300/ar9300_paprd.c optional ath_hal | ath_ar9300 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal -I$S/contrib/dev/ath/ath_hal"
 contrib/dev/ath/ath_hal/ar9300/ar9300_phy.c optional ath_hal | ath_ar9300 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal -I$S/contrib/dev/ath/ath_hal"
 contrib/dev/ath/ath_hal/ar9300/ar9300_power.c optional ath_hal | ath_ar9300 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal -I$S/contrib/dev/ath/ath_hal"
 contrib/dev/ath/ath_hal/ar9300/ar9300_radar.c optional ath_hal | ath_ar9300 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal -I$S/contrib/dev/ath/ath_hal"
 contrib/dev/ath/ath_hal/ar9300/ar9300_radio.c optional ath_hal | ath_ar9300 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal -I$S/contrib/dev/ath/ath_hal"
 contrib/dev/ath/ath_hal/ar9300/ar9300_recv.c optional ath_hal | ath_ar9300 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal -I$S/contrib/dev/ath/ath_hal"
 contrib/dev/ath/ath_hal/ar9300/ar9300_recv_ds.c optional ath_hal | ath_ar9300 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal -I$S/contrib/dev/ath/ath_hal"
 contrib/dev/ath/ath_hal/ar9300/ar9300_reset.c optional ath_hal | ath_ar9300 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal -I$S/contrib/dev/ath/ath_hal ${NO_WSOMETIMES_UNINITIALIZED} -Wno-unused-function"
 contrib/dev/ath/ath_hal/ar9300/ar9300_stub.c optional ath_hal | ath_ar9300 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal -I$S/contrib/dev/ath/ath_hal"
 contrib/dev/ath/ath_hal/ar9300/ar9300_stub_funcs.c optional ath_hal | ath_ar9300 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal -I$S/contrib/dev/ath/ath_hal"
 contrib/dev/ath/ath_hal/ar9300/ar9300_timer.c optional ath_hal | ath_ar9300 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal -I$S/contrib/dev/ath/ath_hal"
 contrib/dev/ath/ath_hal/ar9300/ar9300_xmit.c optional ath_hal | ath_ar9300 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal -I$S/contrib/dev/ath/ath_hal"
 contrib/dev/ath/ath_hal/ar9300/ar9300_xmit_ds.c optional ath_hal | ath_ar9300 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal -I$S/contrib/dev/ath/ath_hal"
 
 # rf backends
 dev/ath/ath_hal/ar5212/ar2316.c	optional ath_rf2316 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5212/ar2317.c	optional ath_rf2317 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5212/ar2413.c	optional ath_hal | ath_rf2413 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5212/ar2425.c	optional ath_hal | ath_rf2425 | ath_rf2417 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5212/ar5111.c	optional ath_hal | ath_rf5111 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5212/ar5112.c	optional ath_hal | ath_rf5112 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5212/ar5413.c	optional ath_hal | ath_rf5413 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5416/ar2133.c optional ath_hal | ath_ar5416 | \
 	ath_ar9130 | ath_ar9160 | ath_ar9280 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar9002/ar9280.c optional ath_hal | ath_ar9280 | ath_ar9285 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar9002/ar9285.c optional ath_hal | ath_ar9285 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar9002/ar9287.c optional ath_hal | ath_ar9287 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 
 # ath rate control algorithms
 dev/ath/ath_rate/amrr/amrr.c	optional ath_rate_amrr \
 	compile-with "${NORMAL_C} -I$S/dev/ath"
 dev/ath/ath_rate/onoe/onoe.c	optional ath_rate_onoe \
 	compile-with "${NORMAL_C} -I$S/dev/ath"
 dev/ath/ath_rate/sample/sample.c	optional ath_rate_sample \
 	compile-with "${NORMAL_C} -I$S/dev/ath"
 # ath DFS modules
 dev/ath/ath_dfs/null/dfs_null.c	optional ath \
 	compile-with "${NORMAL_C} -I$S/dev/ath"
 #
 dev/bce/if_bce.c		optional bce
 dev/bfe/if_bfe.c		optional bfe
 dev/bge/if_bge.c		optional bge
 dev/bktr/bktr_audio.c		optional bktr pci
 dev/bktr/bktr_card.c		optional bktr pci
 dev/bktr/bktr_core.c		optional bktr pci
 dev/bktr/bktr_i2c.c		optional bktr pci smbus
 dev/bktr/bktr_os.c		optional bktr pci
 dev/bktr/bktr_tuner.c		optional bktr pci
 dev/bktr/msp34xx.c		optional bktr pci
 dev/buslogic/bt.c		optional bt
 dev/buslogic/bt_eisa.c		optional bt eisa
 dev/buslogic/bt_isa.c		optional bt isa
 dev/buslogic/bt_mca.c		optional bt mca
 dev/buslogic/bt_pci.c		optional bt pci
 dev/bwi/bwimac.c		optional bwi
 dev/bwi/bwiphy.c		optional bwi
 dev/bwi/bwirf.c			optional bwi
 dev/bwi/if_bwi.c		optional bwi
 dev/bwi/if_bwi_pci.c		optional bwi pci
 # XXX Work around clang warning, until maintainer approves fix.
 dev/bwn/if_bwn.c		optional bwn siba_bwn \
 	compile-with "${NORMAL_C} ${NO_WSOMETIMES_UNINITIALIZED}"
 dev/cardbus/cardbus.c		optional cardbus
 dev/cardbus/cardbus_cis.c	optional cardbus
 dev/cardbus/cardbus_device.c	optional cardbus
 dev/cas/if_cas.c		optional cas
 dev/cfi/cfi_bus_fdt.c		optional cfi fdt
 dev/cfi/cfi_bus_nexus.c		optional cfi
 dev/cfi/cfi_core.c		optional cfi
 dev/cfi/cfi_dev.c		optional cfi
 dev/cfi/cfi_disk.c		optional cfid
 dev/ciss/ciss.c			optional ciss
 dev/cm/smc90cx6.c		optional cm
 dev/cmx/cmx.c			optional cmx
 dev/cmx/cmx_pccard.c		optional cmx pccard
 dev/cpufreq/ichss.c		optional cpufreq
 dev/cs/if_cs.c			optional cs
 dev/cs/if_cs_isa.c		optional cs isa
 dev/cs/if_cs_pccard.c		optional cs pccard
 dev/cxgb/cxgb_main.c		optional cxgb pci \
 	compile-with "${NORMAL_C} -I$S/dev/cxgb"
 dev/cxgb/cxgb_sge.c		optional cxgb pci \
 	compile-with "${NORMAL_C} -I$S/dev/cxgb"
 dev/cxgb/common/cxgb_mc5.c	optional cxgb pci \
 	compile-with "${NORMAL_C} -I$S/dev/cxgb"
 dev/cxgb/common/cxgb_vsc7323.c	optional cxgb pci \
 	compile-with "${NORMAL_C} -I$S/dev/cxgb"
 dev/cxgb/common/cxgb_vsc8211.c	optional cxgb pci \
 	compile-with "${NORMAL_C} -I$S/dev/cxgb"
 dev/cxgb/common/cxgb_ael1002.c	optional cxgb pci \
 	compile-with "${NORMAL_C} -I$S/dev/cxgb"
 dev/cxgb/common/cxgb_aq100x.c	optional cxgb pci \
 	compile-with "${NORMAL_C} -I$S/dev/cxgb"
 dev/cxgb/common/cxgb_mv88e1xxx.c	optional cxgb pci \
 	compile-with "${NORMAL_C} -I$S/dev/cxgb"
 dev/cxgb/common/cxgb_xgmac.c	optional cxgb pci \
 	compile-with "${NORMAL_C} -I$S/dev/cxgb"
 dev/cxgb/common/cxgb_t3_hw.c	optional cxgb pci \
 	compile-with "${NORMAL_C} -I$S/dev/cxgb"
 dev/cxgb/common/cxgb_tn1010.c	optional cxgb pci \
 	compile-with "${NORMAL_C} -I$S/dev/cxgb"
 dev/cxgb/sys/uipc_mvec.c	optional cxgb pci \
 	compile-with "${NORMAL_C} -I$S/dev/cxgb"
 dev/cxgb/cxgb_t3fw.c		optional cxgb cxgb_t3fw \
 	compile-with "${NORMAL_C} -I$S/dev/cxgb"
 dev/cxgbe/t4_mp_ring.c		optional cxgbe pci \
 	compile-with "${NORMAL_C} -I$S/dev/cxgbe"
 dev/cxgbe/t4_main.c		optional cxgbe pci \
 	compile-with "${NORMAL_C} -I$S/dev/cxgbe"
 dev/cxgbe/t4_netmap.c		optional cxgbe pci \
 	compile-with "${NORMAL_C} -I$S/dev/cxgbe"
 dev/cxgbe/t4_sge.c		optional cxgbe pci \
 	compile-with "${NORMAL_C} -I$S/dev/cxgbe"
 dev/cxgbe/t4_l2t.c		optional cxgbe pci \
 	compile-with "${NORMAL_C} -I$S/dev/cxgbe"
 dev/cxgbe/t4_tracer.c		optional cxgbe pci \
 	compile-with "${NORMAL_C} -I$S/dev/cxgbe"
 dev/cxgbe/common/t4_hw.c	optional cxgbe pci \
 	compile-with "${NORMAL_C} -I$S/dev/cxgbe"
 t4fw_cfg.c		optional cxgbe					\
 	compile-with	"${AWK} -f $S/tools/fw_stub.awk t4fw_cfg.fw:t4fw_cfg t4fw_cfg_uwire.fw:t4fw_cfg_uwire t4fw.fw:t4fw -mt4fw_cfg -c${.TARGET}" \
 	no-implicit-rule before-depend local				\
 	clean		"t4fw_cfg.c"
 t4fw_cfg.fwo		optional cxgbe					\
 	dependency	"t4fw_cfg.fw"					\
 	compile-with	"${NORMAL_FWO}"					\
 	no-implicit-rule						\
 	clean		"t4fw_cfg.fwo"
 t4fw_cfg.fw		optional cxgbe					\
 	dependency	"$S/dev/cxgbe/firmware/t4fw_cfg.txt"		\
 	compile-with	"${CP} ${.ALLSRC} ${.TARGET}"			\
 	no-obj no-implicit-rule						\
 	clean		"t4fw_cfg.fw"
 t4fw_cfg_uwire.fwo	optional cxgbe					\
 	dependency	"t4fw_cfg_uwire.fw"				\
 	compile-with	"${NORMAL_FWO}"					\
 	no-implicit-rule						\
 	clean		"t4fw_cfg_uwire.fwo"
 t4fw_cfg_uwire.fw	optional cxgbe					\
 	dependency	"$S/dev/cxgbe/firmware/t4fw_cfg_uwire.txt"	\
 	compile-with	"${CP} ${.ALLSRC} ${.TARGET}"			\
 	no-obj no-implicit-rule						\
 	clean		"t4fw_cfg_uwire.fw"
 t4fw.fwo		optional cxgbe					\
 	dependency	"t4fw.fw"					\
 	compile-with	"${NORMAL_FWO}"					\
 	no-implicit-rule						\
 	clean		"t4fw.fwo"
 t4fw.fw			optional cxgbe					\
 	dependency	"$S/dev/cxgbe/firmware/t4fw-1.11.27.0.bin.uu"	\
 	compile-with	"${NORMAL_FW}"					\
 	no-obj no-implicit-rule						\
 	clean		"t4fw.fw"
 t5fw_cfg.c		optional cxgbe					\
 	compile-with	"${AWK} -f $S/tools/fw_stub.awk t5fw_cfg.fw:t5fw_cfg t5fw.fw:t5fw -mt5fw_cfg -c${.TARGET}" \
 	no-implicit-rule before-depend local				\
 	clean		"t5fw_cfg.c"
 t5fw_cfg.fwo		optional cxgbe					\
 	dependency	"t5fw_cfg.fw"					\
 	compile-with	"${NORMAL_FWO}"					\
 	no-implicit-rule						\
 	clean		"t5fw_cfg.fwo"
 t5fw_cfg.fw		optional cxgbe					\
 	dependency	"$S/dev/cxgbe/firmware/t5fw_cfg.txt"		\
 	compile-with	"${CP} ${.ALLSRC} ${.TARGET}"			\
 	no-obj no-implicit-rule						\
 	clean		"t5fw_cfg.fw"
 t5fw.fwo		optional cxgbe					\
 	dependency	"t5fw.fw"					\
 	compile-with	"${NORMAL_FWO}"					\
 	no-implicit-rule						\
 	clean		"t5fw.fwo"
 t5fw.fw			optional cxgbe					\
 	dependency	"$S/dev/cxgbe/firmware/t5fw-1.11.27.0.bin.uu"	\
 	compile-with	"${NORMAL_FW}"					\
 	no-obj no-implicit-rule						\
 	clean		"t5fw.fw"
 dev/cy/cy.c			optional cy
 dev/cy/cy_isa.c			optional cy isa
 dev/cy/cy_pci.c			optional cy pci
 dev/dc/if_dc.c			optional dc pci
 dev/dc/dcphy.c			optional dc pci
 dev/dc/pnphy.c			optional dc pci
 dev/dcons/dcons.c		optional dcons
 dev/dcons/dcons_crom.c		optional dcons_crom
 dev/dcons/dcons_os.c		optional dcons
 dev/de/if_de.c			optional de pci
 dev/digi/CX.c			optional digi_CX
 dev/digi/CX_PCI.c		optional digi_CX_PCI
 dev/digi/EPCX.c			optional digi_EPCX
 dev/digi/EPCX_PCI.c		optional digi_EPCX_PCI
 dev/digi/Xe.c			optional digi_Xe
 dev/digi/Xem.c			optional digi_Xem
 dev/digi/Xr.c			optional digi_Xr
 dev/digi/digi.c			optional digi
 dev/digi/digi_isa.c		optional digi isa
 dev/digi/digi_pci.c		optional digi pci
 dev/dpt/dpt_eisa.c		optional dpt eisa
 dev/dpt/dpt_pci.c		optional dpt pci
 dev/dpt/dpt_scsi.c		optional dpt
 dev/drm/ati_pcigart.c		optional drm
 dev/drm/drm_agpsupport.c	optional drm
 dev/drm/drm_auth.c		optional drm
 dev/drm/drm_bufs.c		optional drm
 dev/drm/drm_context.c		optional drm
 dev/drm/drm_dma.c		optional drm
 dev/drm/drm_drawable.c		optional drm
 dev/drm/drm_drv.c		optional drm
 dev/drm/drm_fops.c		optional drm
 dev/drm/drm_hashtab.c		optional drm
 dev/drm/drm_ioctl.c		optional drm
 dev/drm/drm_irq.c		optional drm
 dev/drm/drm_lock.c		optional drm
 dev/drm/drm_memory.c		optional drm
 dev/drm/drm_mm.c		optional drm
 dev/drm/drm_pci.c		optional drm
 dev/drm/drm_scatter.c		optional drm
 dev/drm/drm_sman.c		optional drm
 dev/drm/drm_sysctl.c		optional drm
 dev/drm/drm_vm.c		optional drm
 dev/drm/i915_dma.c		optional i915drm
 dev/drm/i915_drv.c		optional i915drm
 dev/drm/i915_irq.c		optional i915drm
 dev/drm/i915_mem.c		optional i915drm
 dev/drm/i915_suspend.c		optional i915drm
 dev/drm/mach64_dma.c		optional mach64drm
 dev/drm/mach64_drv.c		optional mach64drm
 dev/drm/mach64_irq.c		optional mach64drm
 dev/drm/mach64_state.c		optional mach64drm
 dev/drm/mga_dma.c		optional mgadrm
 dev/drm/mga_drv.c		optional mgadrm
 dev/drm/mga_irq.c		optional mgadrm
 dev/drm/mga_state.c		optional mgadrm
 dev/drm/mga_warp.c		optional mgadrm
 dev/drm/r128_cce.c		optional r128drm \
 	compile-with "${NORMAL_C} ${NO_WCONSTANT_CONVERSION}"
 dev/drm/r128_drv.c		optional r128drm
 dev/drm/r128_irq.c		optional r128drm
 dev/drm/r128_state.c		optional r128drm
 dev/drm/r300_cmdbuf.c		optional radeondrm
 dev/drm/r600_blit.c		optional radeondrm
 dev/drm/r600_cp.c		optional radeondrm \
 	compile-with "${NORMAL_C} ${NO_WCONSTANT_CONVERSION}"
 dev/drm/radeon_cp.c		optional radeondrm \
 	compile-with "${NORMAL_C} ${NO_WCONSTANT_CONVERSION}"
 dev/drm/radeon_cs.c		optional radeondrm
 dev/drm/radeon_drv.c		optional radeondrm
 dev/drm/radeon_irq.c		optional radeondrm
 dev/drm/radeon_mem.c		optional radeondrm
 dev/drm/radeon_state.c		optional radeondrm
 dev/drm/savage_bci.c		optional savagedrm
 dev/drm/savage_drv.c		optional savagedrm
 dev/drm/savage_state.c		optional savagedrm
 dev/drm/sis_drv.c		optional sisdrm
 dev/drm/sis_ds.c		optional sisdrm
 dev/drm/sis_mm.c		optional sisdrm
 dev/drm/tdfx_drv.c		optional tdfxdrm
 dev/drm/via_dma.c		optional viadrm
 dev/drm/via_dmablit.c		optional viadrm
 dev/drm/via_drv.c		optional viadrm
 dev/drm/via_irq.c		optional viadrm
 dev/drm/via_map.c		optional viadrm
 dev/drm/via_mm.c		optional viadrm
 dev/drm/via_verifier.c		optional viadrm
 dev/drm/via_video.c		optional viadrm
 dev/ed/if_ed.c			optional ed
 dev/ed/if_ed_novell.c		optional ed
 dev/ed/if_ed_rtl80x9.c		optional ed
 dev/ed/if_ed_pccard.c		optional ed pccard
 dev/ed/if_ed_pci.c		optional ed pci
 dev/eisa/eisa_if.m		standard
 dev/eisa/eisaconf.c		optional eisa
 dev/e1000/if_em.c		optional em \
 	compile-with "${NORMAL_C} -I$S/dev/e1000"
 dev/e1000/if_lem.c		optional em \
 	compile-with "${NORMAL_C} -I$S/dev/e1000"
 dev/e1000/if_igb.c		optional igb \
 	compile-with "${NORMAL_C} -I$S/dev/e1000"
 dev/e1000/e1000_80003es2lan.c	optional em | igb \
 	compile-with "${NORMAL_C} -I$S/dev/e1000"
 dev/e1000/e1000_82540.c		optional em | igb \
 	compile-with "${NORMAL_C} -I$S/dev/e1000"
 dev/e1000/e1000_82541.c		optional em | igb \
 	compile-with "${NORMAL_C} -I$S/dev/e1000"
 dev/e1000/e1000_82542.c		optional em | igb \
 	compile-with "${NORMAL_C} -I$S/dev/e1000"
 dev/e1000/e1000_82543.c		optional em | igb \
 	compile-with "${NORMAL_C} -I$S/dev/e1000"
 dev/e1000/e1000_82571.c		optional em | igb \
 	compile-with "${NORMAL_C} -I$S/dev/e1000"
 dev/e1000/e1000_82575.c		optional em | igb \
 	compile-with "${NORMAL_C} -I$S/dev/e1000"
 dev/e1000/e1000_ich8lan.c	optional em | igb \
 	compile-with "${NORMAL_C} -I$S/dev/e1000"
 dev/e1000/e1000_i210.c		optional em | igb \
 	compile-with "${NORMAL_C} -I$S/dev/e1000"
 dev/e1000/e1000_api.c		optional em | igb \
 	compile-with "${NORMAL_C} -I$S/dev/e1000"
 dev/e1000/e1000_mac.c		optional em | igb \
 	compile-with "${NORMAL_C} -I$S/dev/e1000"
 dev/e1000/e1000_manage.c	optional em | igb \
 	compile-with "${NORMAL_C} -I$S/dev/e1000"
 dev/e1000/e1000_nvm.c		optional em | igb \
 	compile-with "${NORMAL_C} -I$S/dev/e1000"
 dev/e1000/e1000_phy.c		optional em | igb \
 	compile-with "${NORMAL_C} -I$S/dev/e1000"
 dev/e1000/e1000_vf.c		optional em | igb \
 	compile-with "${NORMAL_C} -I$S/dev/e1000"
 dev/e1000/e1000_mbx.c		optional em | igb \
 	compile-with "${NORMAL_C} -I$S/dev/e1000"
 dev/e1000/e1000_osdep.c		optional em | igb \
 	compile-with "${NORMAL_C} -I$S/dev/e1000"
 dev/et/if_et.c			optional et
 dev/en/if_en_pci.c		optional en pci
 dev/en/midway.c			optional en
 dev/ep/if_ep.c			optional ep
 dev/ep/if_ep_eisa.c		optional ep eisa
 dev/ep/if_ep_isa.c		optional ep isa
 dev/ep/if_ep_mca.c		optional ep mca
 dev/ep/if_ep_pccard.c		optional ep pccard
 dev/esp/esp_pci.c		optional esp pci
 dev/esp/ncr53c9x.c		optional esp
 dev/etherswitch/arswitch/arswitch.c		optional arswitch
 dev/etherswitch/arswitch/arswitch_reg.c		optional arswitch
 dev/etherswitch/arswitch/arswitch_phy.c		optional arswitch
 dev/etherswitch/arswitch/arswitch_8216.c	optional arswitch
 dev/etherswitch/arswitch/arswitch_8226.c	optional arswitch
 dev/etherswitch/arswitch/arswitch_8316.c	optional arswitch
 dev/etherswitch/arswitch/arswitch_8327.c	optional arswitch
 dev/etherswitch/arswitch/arswitch_7240.c	optional arswitch
 dev/etherswitch/arswitch/arswitch_9340.c	optional arswitch
 dev/etherswitch/arswitch/arswitch_vlans.c	optional arswitch
 dev/etherswitch/etherswitch.c		optional etherswitch
 dev/etherswitch/etherswitch_if.m	optional etherswitch
 dev/etherswitch/ip17x/ip17x.c		optional ip17x
 dev/etherswitch/ip17x/ip175c.c		optional ip17x
 dev/etherswitch/ip17x/ip175d.c		optional ip17x
 dev/etherswitch/ip17x/ip17x_phy.c	optional ip17x
 dev/etherswitch/ip17x/ip17x_vlans.c	optional ip17x
 dev/etherswitch/mdio_if.m		optional miiproxy
 dev/etherswitch/mdio.c			optional miiproxy
 dev/etherswitch/miiproxy.c		optional miiproxy
 dev/etherswitch/rtl8366/rtl8366rb.c	optional rtl8366rb
 dev/etherswitch/ukswitch/ukswitch.c	optional ukswitch
 dev/ex/if_ex.c			optional ex
 dev/ex/if_ex_isa.c		optional ex isa
 dev/ex/if_ex_pccard.c		optional ex pccard
 dev/exca/exca.c			optional cbb
 dev/fatm/if_fatm.c		optional fatm pci
 dev/fb/fbd.c			optional fbd | vt
 dev/fb/fb_if.m			standard
 dev/fb/splash.c			optional sc splash
 dev/fdt/fdt_clock.c		optional fdt fdt_clock
 dev/fdt/fdt_clock_if.m		optional fdt fdt_clock
 dev/fdt/fdt_common.c		optional fdt
 dev/fdt/fdt_pinctrl.c		optional fdt fdt_pinctrl
 dev/fdt/fdt_pinctrl_if.m	optional fdt fdt_pinctrl
 dev/fdt/fdt_slicer.c		optional fdt cfi | fdt nand
 dev/fdt/fdt_static_dtb.S	optional fdt fdt_dtb_static \
 	dependency	"$S/boot/fdt/dts/${MACHINE}/${FDT_DTS_FILE}"
 dev/fdt/simplebus.c		optional fdt
 dev/fe/if_fe.c			optional fe
 dev/fe/if_fe_pccard.c		optional fe pccard
 dev/filemon/filemon.c		optional filemon
 dev/firewire/firewire.c		optional firewire
 dev/firewire/fwcrom.c		optional firewire
 dev/firewire/fwdev.c		optional firewire
 dev/firewire/fwdma.c		optional firewire
 dev/firewire/fwmem.c		optional firewire
 dev/firewire/fwohci.c		optional firewire
 dev/firewire/fwohci_pci.c	optional firewire pci
 dev/firewire/if_fwe.c		optional fwe
 dev/firewire/if_fwip.c		optional fwip
 dev/firewire/sbp.c		optional sbp
 dev/firewire/sbp_targ.c		optional sbp_targ
 dev/flash/at45d.c		optional at45d
 dev/flash/mx25l.c		optional mx25l
 dev/fxp/if_fxp.c		optional fxp
 dev/fxp/inphy.c			optional fxp
 dev/gem/if_gem.c		optional gem
 dev/gem/if_gem_pci.c		optional gem pci
 dev/gem/if_gem_sbus.c		optional gem sbus
 dev/gpio/gpiobus.c		optional gpio				\
 	dependency	"gpiobus_if.h"
 dev/gpio/gpioc.c		optional gpio				\
 	dependency	"gpio_if.h"
 dev/gpio/gpioiic.c		optional gpioiic
 dev/gpio/gpioled.c		optional gpioled
 dev/gpio/gpio_if.m		optional gpio
 dev/gpio/gpiobus_if.m		optional gpio
 dev/gpio/ofw_gpiobus.c		optional fdt gpio
 dev/hatm/if_hatm.c		optional hatm pci
 dev/hatm/if_hatm_intr.c		optional hatm pci
 dev/hatm/if_hatm_ioctl.c	optional hatm pci
 dev/hatm/if_hatm_rx.c		optional hatm pci
 dev/hatm/if_hatm_tx.c		optional hatm pci
 dev/hifn/hifn7751.c		optional hifn
 dev/hme/if_hme.c		optional hme
 dev/hme/if_hme_pci.c		optional hme pci
 dev/hme/if_hme_sbus.c		optional hme sbus
 dev/hptiop/hptiop.c		optional hptiop scbus
 dev/hwpmc/hwpmc_logging.c	optional hwpmc
 dev/hwpmc/hwpmc_mod.c		optional hwpmc
 dev/hwpmc/hwpmc_soft.c		optional hwpmc
 dev/ichsmb/ichsmb.c		optional ichsmb
 dev/ichsmb/ichsmb_pci.c		optional ichsmb pci
 dev/ida/ida.c			optional ida
 dev/ida/ida_disk.c		optional ida
 dev/ida/ida_eisa.c		optional ida eisa
 dev/ida/ida_pci.c		optional ida pci
 dev/ie/if_ie.c			optional ie isa nowerror
 dev/ie/if_ie_isa.c		optional ie isa
 dev/iicbus/ad7418.c		optional ad7418
 dev/iicbus/ds1307.c		optional ds1307
 dev/iicbus/ds133x.c		optional ds133x
 dev/iicbus/ds1374.c		optional ds1374
 dev/iicbus/ds1672.c		optional ds1672
 dev/iicbus/ds3231.c		optional ds3231
 dev/iicbus/icee.c		optional icee
 dev/iicbus/if_ic.c		optional ic
 dev/iicbus/iic.c		optional iic
 dev/iicbus/iicbb.c		optional iicbb
 dev/iicbus/iicbb_if.m		optional iicbb
 dev/iicbus/iicbus.c		optional iicbus
 dev/iicbus/iicbus_if.m		optional iicbus
 dev/iicbus/iiconf.c		optional iicbus
 dev/iicbus/iicsmb.c		optional iicsmb				\
 	dependency	"iicbus_if.h"
 dev/iicbus/iicoc.c		optional iicoc
 dev/iicbus/lm75.c		optional lm75
 dev/iicbus/pcf8563.c		optional pcf8563
 dev/iicbus/s35390a.c		optional s35390a
 dev/iir/iir.c			optional iir
 dev/iir/iir_ctrl.c		optional iir
 dev/iir/iir_pci.c		optional iir pci
 dev/intpm/intpm.c		optional intpm pci
 # XXX Work around clang warning, until maintainer approves fix.
 dev/ips/ips.c			optional ips \
 	compile-with "${NORMAL_C} ${NO_WSOMETIMES_UNINITIALIZED}"
 dev/ips/ips_commands.c		optional ips
 dev/ips/ips_disk.c		optional ips
 dev/ips/ips_ioctl.c		optional ips
 dev/ips/ips_pci.c		optional ips pci
 dev/ipw/if_ipw.c		optional ipw
 ipwbssfw.c			optional ipwbssfw | ipwfw		\
 	compile-with	"${AWK} -f $S/tools/fw_stub.awk ipw_bss.fw:ipw_bss:130 -lintel_ipw -mipw_bss -c${.TARGET}" \
 	no-implicit-rule before-depend local				\
 	clean		"ipwbssfw.c"
 ipw_bss.fwo			optional ipwbssfw | ipwfw		\
 	dependency	"ipw_bss.fw"					\
 	compile-with	"${NORMAL_FWO}"					\
 	no-implicit-rule						\
 	clean		"ipw_bss.fwo"
 ipw_bss.fw			optional ipwbssfw | ipwfw		\
 	dependency	"$S/contrib/dev/ipw/ipw2100-1.3.fw.uu"		\
 	compile-with	"${NORMAL_FW}"					\
 	no-obj no-implicit-rule						\
 	clean		"ipw_bss.fw"
 ipwibssfw.c			optional ipwibssfw | ipwfw		\
 	compile-with	"${AWK} -f $S/tools/fw_stub.awk ipw_ibss.fw:ipw_ibss:130 -lintel_ipw -mipw_ibss -c${.TARGET}" \
 	no-implicit-rule before-depend local				\
 	clean		"ipwibssfw.c"
 ipw_ibss.fwo			optional ipwibssfw | ipwfw		\
 	dependency	"ipw_ibss.fw"					\
 	compile-with	"${NORMAL_FWO}"					\
 	no-implicit-rule						\
 	clean		"ipw_ibss.fwo"
 ipw_ibss.fw			optional ipwibssfw | ipwfw		\
 	dependency	"$S/contrib/dev/ipw/ipw2100-1.3-i.fw.uu"	\
 	compile-with	"${NORMAL_FW}"					\
 	no-obj no-implicit-rule						\
 	clean		"ipw_ibss.fw"
 ipwmonitorfw.c			optional ipwmonitorfw | ipwfw		\
 	compile-with	"${AWK} -f $S/tools/fw_stub.awk ipw_monitor.fw:ipw_monitor:130 -lintel_ipw -mipw_monitor -c${.TARGET}" \
 	no-implicit-rule before-depend local				\
 	clean		"ipwmonitorfw.c"
 ipw_monitor.fwo			optional ipwmonitorfw | ipwfw		\
 	dependency	"ipw_monitor.fw"				\
 	compile-with	"${NORMAL_FWO}"					\
 	no-implicit-rule						\
 	clean		"ipw_monitor.fwo"
 ipw_monitor.fw			optional ipwmonitorfw | ipwfw		\
 	dependency	"$S/contrib/dev/ipw/ipw2100-1.3-p.fw.uu"	\
 	compile-with	"${NORMAL_FW}"					\
 	no-obj no-implicit-rule						\
 	clean		"ipw_monitor.fw"
 dev/iscsi/icl.c			optional iscsi | ctl 
 dev/iscsi/icl_conn_if.m		optional iscsi | ctl 
 dev/iscsi/icl_proxy.c		optional iscsi | ctl
 dev/iscsi/icl_soft.c		optional iscsi | ctl 
 dev/iscsi/iscsi.c		optional iscsi scbus
 dev/iscsi_initiator/iscsi.c	optional iscsi_initiator scbus
 dev/iscsi_initiator/iscsi_subr.c	optional iscsi_initiator scbus
 dev/iscsi_initiator/isc_cam.c	optional iscsi_initiator scbus
 dev/iscsi_initiator/isc_soc.c	optional iscsi_initiator scbus
 dev/iscsi_initiator/isc_sm.c	optional iscsi_initiator scbus
 dev/iscsi_initiator/isc_subr.c	optional iscsi_initiator scbus
 dev/ismt/ismt.c			optional ismt
 dev/isp/isp.c			optional isp
 dev/isp/isp_freebsd.c		optional isp
 dev/isp/isp_library.c		optional isp
 dev/isp/isp_pci.c		optional isp pci
 dev/isp/isp_sbus.c		optional isp sbus
 dev/isp/isp_target.c		optional isp
 dev/ispfw/ispfw.c		optional ispfw
 dev/iwi/if_iwi.c		optional iwi
 iwibssfw.c			optional iwibssfw | iwifw		\
 	compile-with	"${AWK} -f $S/tools/fw_stub.awk iwi_bss.fw:iwi_bss:300 -lintel_iwi -miwi_bss -c${.TARGET}" \
 	no-implicit-rule before-depend local				\
 	clean		"iwibssfw.c"
 iwi_bss.fwo			optional iwibssfw | iwifw		\
 	dependency	"iwi_bss.fw"					\
 	compile-with	"${NORMAL_FWO}"					\
 	no-implicit-rule						\
 	clean		"iwi_bss.fwo"
 iwi_bss.fw			optional iwibssfw | iwifw		\
 	dependency	"$S/contrib/dev/iwi/ipw2200-bss.fw.uu"		\
 	compile-with	"${NORMAL_FW}"					\
 	no-obj no-implicit-rule						\
 	clean		"iwi_bss.fw"
 iwiibssfw.c			optional iwiibssfw | iwifw		\
 	compile-with	"${AWK} -f $S/tools/fw_stub.awk iwi_ibss.fw:iwi_ibss:300 -lintel_iwi -miwi_ibss -c${.TARGET}" \
 	no-implicit-rule before-depend local				\
 	clean		"iwiibssfw.c"
 iwi_ibss.fwo			optional iwiibssfw | iwifw		\
 	dependency	"iwi_ibss.fw"					\
 	compile-with	"${NORMAL_FWO}"					\
 	no-implicit-rule						\
 	clean		"iwi_ibss.fwo"
 iwi_ibss.fw			optional iwiibssfw | iwifw		\
 	dependency	"$S/contrib/dev/iwi/ipw2200-ibss.fw.uu"		\
 	compile-with	"${NORMAL_FW}"					\
 	no-obj no-implicit-rule						\
 	clean		"iwi_ibss.fw"
 iwimonitorfw.c			optional iwimonitorfw | iwifw		\
 	compile-with	"${AWK} -f $S/tools/fw_stub.awk iwi_monitor.fw:iwi_monitor:300 -lintel_iwi -miwi_monitor -c${.TARGET}" \
 	no-implicit-rule before-depend local				\
 	clean		"iwimonitorfw.c"
 iwi_monitor.fwo			optional iwimonitorfw | iwifw		\
 	dependency	"iwi_monitor.fw"				\
 	compile-with	"${NORMAL_FWO}"					\
 	no-implicit-rule						\
 	clean		"iwi_monitor.fwo"
 iwi_monitor.fw			optional iwimonitorfw | iwifw		\
 	dependency	"$S/contrib/dev/iwi/ipw2200-sniffer.fw.uu"	\
 	compile-with	"${NORMAL_FW}"					\
 	no-obj no-implicit-rule						\
 	clean		"iwi_monitor.fw"
 dev/iwn/if_iwn.c		optional iwn
 iwn1000fw.c			optional iwn1000fw | iwnfw		\
 	compile-with	"${AWK} -f $S/tools/fw_stub.awk iwn1000.fw:iwn1000fw -miwn1000fw -c${.TARGET}" \
 	no-implicit-rule before-depend local				\
 	clean		"iwn1000fw.c"
 iwn1000fw.fwo			optional iwn1000fw | iwnfw		\
 	dependency	"iwn1000.fw"					\
 	compile-with	"${NORMAL_FWO}"					\
 	no-implicit-rule						\
 	clean		"iwn1000fw.fwo"
 iwn1000.fw			optional iwn1000fw | iwnfw		\
 	dependency	"$S/contrib/dev/iwn/iwlwifi-1000-39.31.5.1.fw.uu" \
 	compile-with	"${NORMAL_FW}"					\
 	no-obj no-implicit-rule						\
 	clean		"iwn1000.fw"
 iwn100fw.c			optional iwn100fw | iwnfw		\
 	compile-with	"${AWK} -f $S/tools/fw_stub.awk iwn100.fw:iwn100fw -miwn100fw -c${.TARGET}" \
 	no-implicit-rule before-depend local				\
 	clean		"iwn100fw.c"
 iwn100fw.fwo			optional iwn100fw | iwnfw		\
 	dependency	"iwn100.fw"					\
 	compile-with	"${NORMAL_FWO}"					\
 	no-implicit-rule						\
 	clean		"iwn100fw.fwo"
 iwn100.fw			optional iwn100fw | iwnfw		\
 	dependency	"$S/contrib/dev/iwn/iwlwifi-100-39.31.5.1.fw.uu" \
 	compile-with	"${NORMAL_FW}"					\
 	no-obj no-implicit-rule						\
 	clean		"iwn100.fw"
 iwn105fw.c			optional iwn105fw | iwnfw		\
 	compile-with	"${AWK} -f $S/tools/fw_stub.awk iwn105.fw:iwn105fw -miwn105fw -c${.TARGET}" \
 	no-implicit-rule before-depend local				\
 	clean		"iwn105fw.c"
 iwn105fw.fwo			optional iwn105fw | iwnfw		\
 	dependency	"iwn105.fw"					\
 	compile-with	"${NORMAL_FWO}"					\
 	no-implicit-rule						\
 	clean		"iwn105fw.fwo"
 iwn105.fw			optional iwn105fw | iwnfw		\
 	dependency	"$S/contrib/dev/iwn/iwlwifi-105-6-18.168.6.1.fw.uu" \
 	compile-with	"${NORMAL_FW}"					\
 	no-obj no-implicit-rule						\
 	clean		"iwn105.fw"
 iwn135fw.c			optional iwn135fw | iwnfw		\
 	compile-with	"${AWK} -f $S/tools/fw_stub.awk iwn135.fw:iwn135fw -miwn135fw -c${.TARGET}" \
 	no-implicit-rule before-depend local				\
 	clean		"iwn135fw.c"
 iwn135fw.fwo			optional iwn135fw | iwnfw		\
 	dependency	"iwn135.fw"					\
 	compile-with	"${NORMAL_FWO}"					\
 	no-implicit-rule						\
 	clean		"iwn135fw.fwo"
 iwn135.fw			optional iwn135fw | iwnfw		\
 	dependency	"$S/contrib/dev/iwn/iwlwifi-135-6-18.168.6.1.fw.uu" \
 	compile-with	"${NORMAL_FW}"					\
 	no-obj no-implicit-rule						\
 	clean		"iwn135.fw"
 iwn2000fw.c			optional iwn2000fw | iwnfw		\
 	compile-with	"${AWK} -f $S/tools/fw_stub.awk iwn2000.fw:iwn2000fw -miwn2000fw -c${.TARGET}" \
 	no-implicit-rule before-depend local				\
 	clean		"iwn2000fw.c"
 iwn2000fw.fwo			optional iwn2000fw | iwnfw		\
 	dependency	"iwn2000.fw"					\
 	compile-with	"${NORMAL_FWO}"					\
 	no-implicit-rule						\
 	clean		"iwn2000fw.fwo"
 iwn2000.fw			optional iwn2000fw | iwnfw		\
 	dependency	"$S/contrib/dev/iwn/iwlwifi-2000-18.168.6.1.fw.uu" \
 	compile-with	"${NORMAL_FW}"					\
 	no-obj no-implicit-rule						\
 	clean		"iwn2000.fw"
 iwn2030fw.c			optional iwn2030fw | iwnfw		\
 	compile-with	"${AWK} -f $S/tools/fw_stub.awk iwn2030.fw:iwn2030fw -miwn2030fw -c${.TARGET}" \
 	no-implicit-rule before-depend local				\
 	clean		"iwn2030fw.c"
 iwn2030fw.fwo			optional iwn2030fw | iwnfw		\
 	dependency	"iwn2030.fw"					\
 	compile-with	"${NORMAL_FWO}"					\
 	no-implicit-rule						\
 	clean		"iwn2030fw.fwo"
 iwn2030.fw			optional iwn2030fw | iwnfw		\
 	dependency	"$S/contrib/dev/iwn/iwnwifi-2030-18.168.6.1.fw.uu" \
 	compile-with	"${NORMAL_FW}"					\
 	no-obj no-implicit-rule						\
 	clean		"iwn2030.fw"
 iwn4965fw.c			optional iwn4965fw | iwnfw		\
 	compile-with	"${AWK} -f $S/tools/fw_stub.awk iwn4965.fw:iwn4965fw -miwn4965fw -c${.TARGET}" \
 	no-implicit-rule before-depend local				\
 	clean		"iwn4965fw.c"
 iwn4965fw.fwo			optional iwn4965fw | iwnfw		\
 	dependency	"iwn4965.fw"					\
 	compile-with	"${NORMAL_FWO}"					\
 	no-implicit-rule						\
 	clean		"iwn4965fw.fwo"
 iwn4965.fw			optional iwn4965fw | iwnfw		\
 	dependency	"$S/contrib/dev/iwn/iwlwifi-4965-228.61.2.24.fw.uu" \
 	compile-with	"${NORMAL_FW}"					\
 	no-obj no-implicit-rule						\
 	clean		"iwn4965.fw"
 iwn5000fw.c			optional iwn5000fw | iwnfw		\
 	compile-with	"${AWK} -f $S/tools/fw_stub.awk iwn5000.fw:iwn5000fw -miwn5000fw -c${.TARGET}" \
 	no-implicit-rule before-depend local				\
 	clean		"iwn5000fw.c"
 iwn5000fw.fwo		optional iwn5000fw | iwnfw			\
 	dependency	"iwn5000.fw"					\
 	compile-with	"${NORMAL_FWO}"					\
 	no-implicit-rule						\
 	clean		"iwn5000fw.fwo"
 iwn5000.fw			optional iwn5000fw | iwnfw		\
 	dependency	"$S/contrib/dev/iwn/iwlwifi-5000-8.83.5.1.fw.uu" \
 	compile-with	"${NORMAL_FW}"					\
 	no-obj no-implicit-rule						\
 	clean		"iwn5000.fw"
 iwn5150fw.c			optional iwn5150fw | iwnfw		\
 	compile-with	"${AWK} -f $S/tools/fw_stub.awk iwn5150.fw:iwn5150fw -miwn5150fw -c${.TARGET}" \
 	no-implicit-rule before-depend local				\
 	clean		"iwn5150fw.c"
 iwn5150fw.fwo			optional iwn5150fw | iwnfw		\
 	dependency	"iwn5150.fw"					\
 	compile-with	"${NORMAL_FWO}"					\
 	no-implicit-rule						\
 	clean		"iwn5150fw.fwo"
 iwn5150.fw			optional iwn5150fw | iwnfw		\
 	dependency	"$S/contrib/dev/iwn/iwlwifi-5150-8.24.2.2.fw.uu"\
 	compile-with	"${NORMAL_FW}"					\
 	no-obj no-implicit-rule						\
 	clean		"iwn5150.fw"
 iwn6000fw.c			optional iwn6000fw | iwnfw		\
 	compile-with	"${AWK} -f $S/tools/fw_stub.awk iwn6000.fw:iwn6000fw -miwn6000fw -c${.TARGET}" \
 	no-implicit-rule before-depend local				\
 	clean		"iwn6000fw.c"
 iwn6000fw.fwo			optional iwn6000fw | iwnfw		\
 	dependency	"iwn6000.fw"					\
 	compile-with	"${NORMAL_FWO}"					\
 	no-implicit-rule						\
 	clean		"iwn6000fw.fwo"
 iwn6000.fw			optional iwn6000fw | iwnfw		\
 	dependency	"$S/contrib/dev/iwn/iwlwifi-6000-9.221.4.1.fw.uu" \
 	compile-with	"${NORMAL_FW}"					\
 	no-obj no-implicit-rule						\
 	clean		"iwn6000.fw"
 iwn6000g2afw.c			optional iwn6000g2afw | iwnfw		\
 	compile-with	"${AWK} -f $S/tools/fw_stub.awk iwn6000g2a.fw:iwn6000g2afw -miwn6000g2afw -c${.TARGET}" \
 	no-implicit-rule before-depend local				\
 	clean		"iwn6000g2afw.c"
 iwn6000g2afw.fwo		optional iwn6000g2afw | iwnfw		\
 	dependency	"iwn6000g2a.fw"					\
 	compile-with	"${NORMAL_FWO}"					\
 	no-implicit-rule						\
 	clean		"iwn6000g2afw.fwo"
 iwn6000g2a.fw			optional iwn6000g2afw | iwnfw		\
 	dependency	"$S/contrib/dev/iwn/iwlwifi-6000g2a-18.168.6.1.fw.uu" \
 	compile-with	"${NORMAL_FW}"					\
 	no-obj no-implicit-rule						\
 	clean		"iwn6000g2a.fw"
 iwn6000g2bfw.c			optional iwn6000g2bfw | iwnfw		\
 	compile-with	"${AWK} -f $S/tools/fw_stub.awk iwn6000g2b.fw:iwn6000g2bfw -miwn6000g2bfw -c${.TARGET}" \
 	no-implicit-rule before-depend local				\
 	clean		"iwn6000g2bfw.c"
 iwn6000g2bfw.fwo		optional iwn6000g2bfw | iwnfw		\
 	dependency	"iwn6000g2b.fw"					\
 	compile-with	"${NORMAL_FWO}"					\
 	no-implicit-rule						\
 	clean		"iwn6000g2bfw.fwo"
 iwn6000g2b.fw			optional iwn6000g2bfw | iwnfw		\
 	dependency	"$S/contrib/dev/iwn/iwlwifi-6000g2b-18.168.6.1.fw.uu" \
 	compile-with	"${NORMAL_FW}"					\
 	no-obj no-implicit-rule						\
 	clean		"iwn6000g2b.fw"
 iwn6050fw.c			optional iwn6050fw | iwnfw		\
 	compile-with	"${AWK} -f $S/tools/fw_stub.awk iwn6050.fw:iwn6050fw -miwn6050fw -c${.TARGET}" \
 	no-implicit-rule before-depend local				\
 	clean		"iwn6050fw.c"
 iwn6050fw.fwo			optional iwn6050fw | iwnfw		\
 	dependency	"iwn6050.fw"					\
 	compile-with	"${NORMAL_FWO}"					\
 	no-implicit-rule						\
 	clean		"iwn6050fw.fwo"
 iwn6050.fw			optional iwn6050fw | iwnfw		\
 	dependency	"$S/contrib/dev/iwn/iwlwifi-6050-41.28.5.1.fw.uu" \
 	compile-with	"${NORMAL_FW}"					\
 	no-obj no-implicit-rule						\
 	clean		"iwn6050.fw"
 dev/ixgb/if_ixgb.c		optional ixgb
 dev/ixgb/ixgb_ee.c		optional ixgb
 dev/ixgb/ixgb_hw.c		optional ixgb
 dev/ixgbe/if_ix.c		optional ix inet \
 	compile-with "${NORMAL_C} -I$S/dev/ixgbe -DSMP"
 dev/ixgbe/if_ixv.c		optional ixv inet \
 	compile-with "${NORMAL_C} -I$S/dev/ixgbe -DSMP"
 dev/ixgbe/ix_txrx.c		optional ix ixv inet \
 	compile-with "${NORMAL_C} -I$S/dev/ixgbe -DSMP"
 dev/ixgbe/ixgbe_phy.c		optional ix ixv inet \
 	compile-with "${NORMAL_C} -I$S/dev/ixgbe"
 dev/ixgbe/ixgbe_api.c		optional ix ixv inet \
 	compile-with "${NORMAL_C} -I$S/dev/ixgbe"
 dev/ixgbe/ixgbe_common.c	optional ix ixv inet \
 	compile-with "${NORMAL_C} -I$S/dev/ixgbe"
 dev/ixgbe/ixgbe_mbx.c		optional ix ixv inet \
 	compile-with "${NORMAL_C} -I$S/dev/ixgbe"
 dev/ixgbe/ixgbe_vf.c		optional ix ixv inet \
 	compile-with "${NORMAL_C} -I$S/dev/ixgbe"
 dev/ixgbe/ixgbe_82598.c		optional ix ixv inet \
 	compile-with "${NORMAL_C} -I$S/dev/ixgbe"
 dev/ixgbe/ixgbe_82599.c		optional ix ixv inet \
 	compile-with "${NORMAL_C} -I$S/dev/ixgbe"
 dev/ixgbe/ixgbe_x540.c		optional ix ixv inet \
 	compile-with "${NORMAL_C} -I$S/dev/ixgbe"
 dev/ixgbe/ixgbe_dcb.c		optional ix ixv inet \
 	compile-with "${NORMAL_C} -I$S/dev/ixgbe"
 dev/ixgbe/ixgbe_dcb_82598.c	optional ix ixv inet \
 	compile-with "${NORMAL_C} -I$S/dev/ixgbe"
 dev/ixgbe/ixgbe_dcb_82599.c	optional ix ixv inet \
 	compile-with "${NORMAL_C} -I$S/dev/ixgbe"
 dev/jme/if_jme.c		optional jme pci
 dev/joy/joy.c			optional joy
 dev/joy/joy_isa.c		optional joy isa
 dev/kbdmux/kbdmux.c		optional kbdmux
 dev/ksyms/ksyms.c		optional ksyms
 dev/le/am7990.c			optional le
 dev/le/am79900.c		optional le
 dev/le/if_le_pci.c		optional le pci
 dev/le/lance.c			optional le
 dev/led/led.c			standard
 dev/lge/if_lge.c		optional lge
 dev/lmc/if_lmc.c		optional lmc
 dev/malo/if_malo.c		optional malo
 dev/malo/if_malohal.c		optional malo
 dev/malo/if_malo_pci.c		optional malo pci
 dev/mc146818/mc146818.c		optional mc146818
 dev/mca/mca_bus.c		optional mca
 dev/mcd/mcd.c			optional mcd isa nowerror
 dev/mcd/mcd_isa.c		optional mcd isa nowerror
 dev/md/md.c			optional md
 dev/mem/memdev.c		optional mem
 dev/mem/memutil.c		optional mem
 dev/mfi/mfi.c			optional mfi
 dev/mfi/mfi_debug.c		optional mfi
 dev/mfi/mfi_pci.c		optional mfi pci
 dev/mfi/mfi_disk.c		optional mfi
 dev/mfi/mfi_syspd.c		optional mfi
 dev/mfi/mfi_tbolt.c		optional mfi
 dev/mfi/mfi_linux.c		optional mfi compat_linux
 dev/mfi/mfi_cam.c		optional mfip scbus
 dev/mii/acphy.c			optional miibus | acphy
 dev/mii/amphy.c			optional miibus | amphy
 dev/mii/atphy.c			optional miibus | atphy
 dev/mii/axphy.c			optional miibus | axphy
 dev/mii/bmtphy.c		optional miibus | bmtphy
 dev/mii/brgphy.c		optional miibus | brgphy
 dev/mii/ciphy.c			optional miibus | ciphy
 dev/mii/e1000phy.c		optional miibus | e1000phy
 dev/mii/gentbi.c		optional miibus | gentbi
 dev/mii/icsphy.c		optional miibus | icsphy
 dev/mii/ip1000phy.c		optional miibus | ip1000phy
 dev/mii/jmphy.c			optional miibus | jmphy
 dev/mii/lxtphy.c		optional miibus | lxtphy
 dev/mii/mii.c			optional miibus | mii
 dev/mii/mii_bitbang.c		optional miibus | mii_bitbang
 dev/mii/mii_physubr.c		optional miibus | mii
 dev/mii/miibus_if.m		optional miibus | mii
 dev/mii/mlphy.c			optional miibus | mlphy
 dev/mii/nsgphy.c		optional miibus | nsgphy
 dev/mii/nsphy.c			optional miibus | nsphy
 dev/mii/nsphyter.c		optional miibus | nsphyter
 dev/mii/pnaphy.c		optional miibus | pnaphy
 dev/mii/qsphy.c			optional miibus | qsphy
 dev/mii/rdcphy.c		optional miibus | rdcphy
 dev/mii/rgephy.c		optional miibus | rgephy
 dev/mii/rlphy.c			optional miibus | rlphy
 dev/mii/rlswitch.c		optional rlswitch
 dev/mii/smcphy.c		optional miibus | smcphy
 dev/mii/smscphy.c		optional miibus | smscphy
 dev/mii/tdkphy.c		optional miibus | tdkphy
 dev/mii/tlphy.c			optional miibus | tlphy
 dev/mii/truephy.c		optional miibus | truephy
 dev/mii/ukphy.c			optional miibus | mii
 dev/mii/ukphy_subr.c		optional miibus | mii
 dev/mii/xmphy.c			optional miibus | xmphy
 dev/mk48txx/mk48txx.c		optional mk48txx
 dev/mlx/mlx.c			optional mlx
 dev/mlx/mlx_disk.c		optional mlx
 dev/mlx/mlx_pci.c		optional mlx pci
 dev/mly/mly.c			optional mly
 dev/mmc/mmc.c			optional mmc
 dev/mmc/mmcbr_if.m		standard
 dev/mmc/mmcbus_if.m		standard
 dev/mmc/mmcsd.c			optional mmcsd
 dev/mn/if_mn.c			optional mn pci
 dev/mpr/mpr.c			optional mpr
 dev/mpr/mpr_config.c		optional mpr
 # XXX Work around clang warning, until maintainer approves fix.
 dev/mpr/mpr_mapping.c		optional mpr \
 	compile-with "${NORMAL_C} ${NO_WSOMETIMES_UNINITIALIZED}"
 dev/mpr/mpr_pci.c		optional mpr pci
 dev/mpr/mpr_sas.c		optional mpr \
 	compile-with "${NORMAL_C} ${NO_WUNNEEDED_INTERNAL_DECL}"
 dev/mpr/mpr_sas_lsi.c		optional mpr
 dev/mpr/mpr_table.c		optional mpr
 dev/mpr/mpr_user.c		optional mpr
 dev/mps/mps.c			optional mps
 dev/mps/mps_config.c		optional mps
 # XXX Work around clang warning, until maintainer approves fix.
 dev/mps/mps_mapping.c		optional mps \
 	compile-with "${NORMAL_C} ${NO_WSOMETIMES_UNINITIALIZED}"
 dev/mps/mps_pci.c		optional mps pci
 dev/mps/mps_sas.c		optional mps \
 	compile-with "${NORMAL_C} ${NO_WUNNEEDED_INTERNAL_DECL}"
 dev/mps/mps_sas_lsi.c		optional mps
 dev/mps/mps_table.c		optional mps
 dev/mps/mps_user.c		optional mps
 dev/mpt/mpt.c			optional mpt
 dev/mpt/mpt_cam.c		optional mpt
 dev/mpt/mpt_debug.c		optional mpt
 dev/mpt/mpt_pci.c		optional mpt pci
 dev/mpt/mpt_raid.c		optional mpt
 dev/mpt/mpt_user.c		optional mpt
 dev/mrsas/mrsas.c		optional mrsas
 dev/mrsas/mrsas_cam.c		optional mrsas
 dev/mrsas/mrsas_ioctl.c		optional mrsas
 dev/mrsas/mrsas_fp.c		optional mrsas
 dev/msk/if_msk.c		optional msk
 dev/mvs/mvs.c			optional mvs
 dev/mvs/mvs_if.m		optional mvs
 dev/mvs/mvs_pci.c		optional mvs pci
 dev/mwl/if_mwl.c		optional mwl
 dev/mwl/if_mwl_pci.c		optional mwl pci
 dev/mwl/mwlhal.c		optional mwl
 mwlfw.c				optional mwlfw				\
 	compile-with	"${AWK} -f $S/tools/fw_stub.awk mw88W8363.fw:mw88W8363fw mwlboot.fw:mwlboot -mmwl -c${.TARGET}" \
 	no-implicit-rule before-depend local				\
 	clean		"mwlfw.c"
 mw88W8363.fwo		optional mwlfw					\
 	dependency	"mw88W8363.fw"					\
 	compile-with	"${NORMAL_FWO}"					\
 	no-implicit-rule						\
 	clean		"mw88W8363.fwo"
 mw88W8363.fw		optional mwlfw					\
 	dependency	"$S/contrib/dev/mwl/mw88W8363.fw.uu"		\
 	compile-with	"${NORMAL_FW}"					\
 	no-obj no-implicit-rule						\
 	clean		"mw88W8363.fw"
 mwlboot.fwo		optional mwlfw					\
 	dependency	"mwlboot.fw"					\
 	compile-with	"${NORMAL_FWO}"					\
 	no-implicit-rule						\
 	clean		"mwlboot.fwo"
 mwlboot.fw		optional mwlfw					\
 	dependency	"$S/contrib/dev/mwl/mwlboot.fw.uu"		\
 	compile-with	"${NORMAL_FW}"					\
 	no-obj no-implicit-rule						\
 	clean		"mwlboot.fw"
 dev/mxge/if_mxge.c		optional mxge pci
 dev/mxge/mxge_eth_z8e.c		optional mxge pci
 dev/mxge/mxge_ethp_z8e.c	optional mxge pci
 dev/mxge/mxge_rss_eth_z8e.c	optional mxge pci
 dev/mxge/mxge_rss_ethp_z8e.c	optional mxge pci
 dev/my/if_my.c			optional my
 dev/nand/nand.c			optional nand
 dev/nand/nand_bbt.c		optional nand
 dev/nand/nand_cdev.c		optional nand
 dev/nand/nand_generic.c		optional nand
 dev/nand/nand_geom.c		optional nand
 dev/nand/nand_id.c		optional nand
 dev/nand/nandbus.c		optional nand
 dev/nand/nandbus_if.m		optional nand
 dev/nand/nand_if.m		optional nand
 dev/nand/nandsim.c		optional nandsim nand
 dev/nand/nandsim_chip.c		optional nandsim nand
 dev/nand/nandsim_ctrl.c		optional nandsim nand
 dev/nand/nandsim_log.c		optional nandsim nand
 dev/nand/nandsim_swap.c		optional nandsim nand
 dev/nand/nfc_if.m		optional nand
 dev/ncr/ncr.c			optional ncr pci
 dev/ncv/ncr53c500.c		optional ncv
 dev/ncv/ncr53c500_pccard.c	optional ncv pccard
 dev/netmap/netmap.c		optional netmap
 dev/netmap/netmap_freebsd.c	optional netmap
 dev/netmap/netmap_generic.c	optional netmap
 dev/netmap/netmap_mbq.c		optional netmap
 dev/netmap/netmap_mem2.c	optional netmap
 dev/netmap/netmap_monitor.c	optional netmap
 dev/netmap/netmap_offloadings.c	optional netmap
 dev/netmap/netmap_pipe.c	optional netmap
 dev/netmap/netmap_vale.c	optional netmap
 # compile-with "${NORMAL_C} -Wconversion -Wextra"
 dev/nfsmb/nfsmb.c		optional nfsmb pci
 dev/nge/if_nge.c		optional nge
 dev/nxge/if_nxge.c		optional nxge \
 	compile-with "${NORMAL_C} ${NO_WSELF_ASSIGN}"
 dev/nxge/xgehal/xgehal-device.c	optional nxge \
 	compile-with "${NORMAL_C} ${NO_WSELF_ASSIGN}"
 dev/nxge/xgehal/xgehal-mm.c	optional nxge
 dev/nxge/xgehal/xge-queue.c	optional nxge
 dev/nxge/xgehal/xgehal-driver.c	optional nxge \
 	compile-with "${NORMAL_C} ${NO_WSELF_ASSIGN}"
 dev/nxge/xgehal/xgehal-ring.c	optional nxge \
 	compile-with "${NORMAL_C} ${NO_WSELF_ASSIGN}"
 dev/nxge/xgehal/xgehal-channel.c	optional nxge \
 	compile-with "${NORMAL_C} ${NO_WSELF_ASSIGN}"
 dev/nxge/xgehal/xgehal-fifo.c	optional nxge \
 	compile-with "${NORMAL_C} ${NO_WSELF_ASSIGN}"
 dev/nxge/xgehal/xgehal-stats.c	optional nxge \
 	compile-with "${NORMAL_C} ${NO_WSELF_ASSIGN}"
 dev/nxge/xgehal/xgehal-config.c	optional nxge
 dev/nxge/xgehal/xgehal-mgmt.c	optional nxge \
 	compile-with "${NORMAL_C} ${NO_WSELF_ASSIGN}"
 dev/nmdm/nmdm.c			optional nmdm
 dev/nsp/nsp.c			optional nsp
 dev/nsp/nsp_pccard.c		optional nsp pccard
 dev/null/null.c			standard
 dev/oce/oce_hw.c		optional oce pci
 dev/oce/oce_if.c		optional oce pci
 dev/oce/oce_mbox.c		optional oce pci
 dev/oce/oce_queue.c		optional oce pci
 dev/oce/oce_sysctl.c		optional oce pci
 dev/oce/oce_util.c		optional oce pci
 dev/ofw/ofw_bus_if.m		optional fdt
 dev/ofw/ofw_bus_subr.c		optional fdt
 dev/ofw/ofw_fdt.c		optional fdt
 dev/ofw/ofw_if.m		optional fdt
 dev/ofw/ofw_iicbus.c		optional fdt iicbus
 dev/ofw/ofwbus.c		optional fdt
 dev/ofw/openfirm.c		optional fdt
 dev/ofw/openfirmio.c		optional fdt
 dev/patm/if_patm.c		optional patm pci
 dev/patm/if_patm_attach.c	optional patm pci
 dev/patm/if_patm_intr.c		optional patm pci
 dev/patm/if_patm_ioctl.c	optional patm pci
 dev/patm/if_patm_rtables.c	optional patm pci
 dev/patm/if_patm_rx.c		optional patm pci
 dev/patm/if_patm_tx.c		optional patm pci
 dev/pbio/pbio.c			optional pbio isa
 dev/pccard/card_if.m		standard
 dev/pccard/pccard.c		optional pccard
 dev/pccard/pccard_cis.c		optional pccard
 dev/pccard/pccard_cis_quirks.c	optional pccard
 dev/pccard/pccard_device.c	optional pccard
 dev/pccard/power_if.m		standard
 dev/pccbb/pccbb.c		optional cbb
 dev/pccbb/pccbb_isa.c		optional cbb isa
 dev/pccbb/pccbb_pci.c		optional cbb pci
 dev/pcf/pcf.c			optional pcf
 dev/pci/eisa_pci.c		optional pci eisa
 dev/pci/fixup_pci.c		optional pci
 dev/pci/hostb_pci.c		optional pci
 dev/pci/ignore_pci.c		optional pci
 dev/pci/isa_pci.c		optional pci isa
 dev/pci/pci.c			optional pci
 dev/pci/pci_if.m		standard
 dev/pci/pci_iov.c		optional pci pci_iov
 dev/pci/pci_iov_schema.c	optional pci pci_iov
 dev/pci/pci_pci.c		optional pci
 dev/pci/pci_subr.c		optional pci
 dev/pci/pci_user.c		optional pci
 dev/pci/pcib_if.m		standard
 dev/pci/pcib_support.c		standard
 dev/pci/vga_pci.c		optional pci
 dev/pcn/if_pcn.c		optional pcn pci
 dev/pdq/if_fea.c		optional fea eisa
 dev/pdq/if_fpa.c		optional fpa pci
 dev/pdq/pdq.c			optional nowerror fea eisa | fpa pci
 dev/pdq/pdq_ifsubr.c		optional nowerror fea eisa | fpa pci
 dev/ppbus/if_plip.c		optional plip
 dev/ppbus/immio.c		optional vpo
 dev/ppbus/lpbb.c		optional lpbb
 dev/ppbus/lpt.c			optional lpt
 dev/ppbus/pcfclock.c		optional pcfclock
 dev/ppbus/ppb_1284.c		optional ppbus
 dev/ppbus/ppb_base.c		optional ppbus
 dev/ppbus/ppb_msq.c		optional ppbus
 dev/ppbus/ppbconf.c		optional ppbus
 dev/ppbus/ppbus_if.m		optional ppbus
 dev/ppbus/ppi.c			optional ppi
 dev/ppbus/pps.c			optional pps
 dev/ppbus/vpo.c			optional vpo
 dev/ppbus/vpoio.c		optional vpo
 dev/ppc/ppc.c			optional ppc
 dev/ppc/ppc_acpi.c		optional ppc acpi
 dev/ppc/ppc_isa.c		optional ppc isa
 dev/ppc/ppc_pci.c		optional ppc pci
 dev/ppc/ppc_puc.c		optional ppc puc
 dev/pst/pst-iop.c		optional pst
 dev/pst/pst-pci.c		optional pst pci
 dev/pst/pst-raid.c		optional pst
 dev/pty/pty.c			optional pty
 dev/puc/puc.c			optional puc
 dev/puc/puc_cfg.c		optional puc
 dev/puc/puc_pccard.c		optional puc pccard
 dev/puc/puc_pci.c		optional puc pci
 dev/puc/pucdata.c		optional puc pci
 dev/quicc/quicc_core.c		optional quicc
 dev/ral/rt2560.c		optional ral
 dev/ral/rt2661.c		optional ral
 dev/ral/rt2860.c		optional ral
 dev/ral/if_ral_pci.c		optional ral pci
 rt2561fw.c			optional rt2561fw | ralfw		\
 	compile-with	"${AWK} -f $S/tools/fw_stub.awk rt2561.fw:rt2561fw -mrt2561 -c${.TARGET}" \
 	no-implicit-rule before-depend local				\
 	clean		"rt2561fw.c"
 rt2561fw.fwo			optional rt2561fw | ralfw		\
 	dependency	"rt2561.fw"					\
 	compile-with	"${NORMAL_FWO}"					\
 	no-implicit-rule						\
 	clean		"rt2561fw.fwo"
 rt2561.fw			optional rt2561fw | ralfw		\
 	dependency	"$S/contrib/dev/ral/rt2561.fw.uu"		\
 	compile-with	"${NORMAL_FW}"					\
 	no-obj no-implicit-rule						\
 	clean		"rt2561.fw"
 rt2561sfw.c			optional rt2561sfw | ralfw		\
 	compile-with	"${AWK} -f $S/tools/fw_stub.awk rt2561s.fw:rt2561sfw -mrt2561s -c${.TARGET}" \
 	no-implicit-rule before-depend local				\
 	clean		"rt2561sfw.c"
 rt2561sfw.fwo			optional rt2561sfw | ralfw		\
 	dependency	"rt2561s.fw"					\
 	compile-with	"${NORMAL_FWO}"					\
 	no-implicit-rule						\
 	clean		"rt2561sfw.fwo"
 rt2561s.fw			optional rt2561sfw | ralfw		\
 	dependency	"$S/contrib/dev/ral/rt2561s.fw.uu"		\
 	compile-with	"${NORMAL_FW}"					\
 	no-obj no-implicit-rule						\
 	clean		"rt2561s.fw"
 rt2661fw.c			optional rt2661fw | ralfw		\
 	compile-with	"${AWK} -f $S/tools/fw_stub.awk rt2661.fw:rt2661fw -mrt2661 -c${.TARGET}" \
 	no-implicit-rule before-depend local				\
 	clean		"rt2661fw.c"
 rt2661fw.fwo			optional rt2661fw | ralfw		\
 	dependency	"rt2661.fw"					\
 	compile-with	"${NORMAL_FWO}"					\
 	no-implicit-rule						\
 	clean		"rt2661fw.fwo"
 rt2661.fw			optional rt2661fw | ralfw		\
 	dependency	"$S/contrib/dev/ral/rt2661.fw.uu"		\
 	compile-with	"${NORMAL_FW}"					\
 	no-obj no-implicit-rule						\
 	clean		"rt2661.fw"
 rt2860fw.c			optional rt2860fw | ralfw		\
 	compile-with	"${AWK} -f $S/tools/fw_stub.awk rt2860.fw:rt2860fw -mrt2860 -c${.TARGET}" \
 	no-implicit-rule before-depend local				\
 	clean		"rt2860fw.c"
 rt2860fw.fwo			optional rt2860fw | ralfw		\
 	dependency	"rt2860.fw"					\
 	compile-with	"${NORMAL_FWO}"					\
 	no-implicit-rule						\
 	clean		"rt2860fw.fwo"
 rt2860.fw			optional rt2860fw | ralfw		\
 	dependency	"$S/contrib/dev/ral/rt2860.fw.uu"		\
 	compile-with	"${NORMAL_FW}"					\
 	no-obj no-implicit-rule						\
 	clean		"rt2860.fw"
 dev/random/randomdev.c		standard
 dev/random/random_adaptors.c	standard
 dev/random/dummy_rng.c		standard
 dev/random/live_entropy_sources.c	standard
 dev/random/random_harvestq.c	standard
 dev/random/randomdev_soft.c	optional random
 dev/random/yarrow.c		optional random
 dev/random/fortuna.c		optional random
 dev/random/hash.c		optional random
 dev/rc/rc.c			optional rc
 dev/re/if_re.c			optional re
 dev/rl/if_rl.c			optional rl pci
 dev/rndtest/rndtest.c		optional rndtest
 dev/rp/rp.c			optional rp
 dev/rp/rp_isa.c			optional rp isa
 dev/rp/rp_pci.c			optional rp pci
 dev/safe/safe.c			optional safe
 dev/scc/scc_if.m		optional scc
 dev/scc/scc_bfe_ebus.c		optional scc ebus
 dev/scc/scc_bfe_quicc.c		optional scc quicc
 dev/scc/scc_bfe_sbus.c		optional scc fhc | scc sbus
 dev/scc/scc_core.c		optional scc
 dev/scc/scc_dev_quicc.c		optional scc quicc
 dev/scc/scc_dev_sab82532.c	optional scc
 dev/scc/scc_dev_z8530.c		optional scc
 dev/scd/scd.c			optional scd isa
 dev/scd/scd_isa.c		optional scd isa
 dev/sdhci/sdhci.c		optional sdhci
 dev/sdhci/sdhci_if.m		optional sdhci
 dev/sdhci/sdhci_pci.c		optional sdhci pci
 dev/sf/if_sf.c			optional sf pci
 dev/sfxge/common/efx_bootcfg.c	optional sfxge pci
 dev/sfxge/common/efx_ev.c	optional sfxge pci
 dev/sfxge/common/efx_filter.c	optional sfxge pci
 dev/sfxge/common/efx_intr.c	optional sfxge pci
 dev/sfxge/common/efx_mac.c	optional sfxge pci
 dev/sfxge/common/efx_mcdi.c	optional sfxge pci
 dev/sfxge/common/efx_mon.c	optional sfxge pci
 dev/sfxge/common/efx_nic.c	optional sfxge pci
 dev/sfxge/common/efx_nvram.c	optional sfxge pci
 dev/sfxge/common/efx_phy.c	optional sfxge pci
 dev/sfxge/common/efx_port.c	optional sfxge pci
 dev/sfxge/common/efx_rx.c	optional sfxge pci
 dev/sfxge/common/efx_sram.c	optional sfxge pci
 dev/sfxge/common/efx_tx.c	optional sfxge pci
 dev/sfxge/common/efx_vpd.c	optional sfxge pci
 dev/sfxge/common/efx_wol.c	optional sfxge pci
 dev/sfxge/common/siena_mac.c	optional sfxge pci
 dev/sfxge/common/siena_mon.c	optional sfxge pci
 dev/sfxge/common/siena_nic.c	optional sfxge pci
 dev/sfxge/common/siena_nvram.c	optional sfxge pci
 dev/sfxge/common/siena_phy.c	optional sfxge pci
 dev/sfxge/common/siena_sram.c	optional sfxge pci
 dev/sfxge/common/siena_vpd.c	optional sfxge pci
 dev/sfxge/sfxge.c		optional sfxge pci
 dev/sfxge/sfxge_dma.c		optional sfxge pci
 dev/sfxge/sfxge_ev.c		optional sfxge pci
 dev/sfxge/sfxge_intr.c		optional sfxge pci
 dev/sfxge/sfxge_mcdi.c		optional sfxge pci
 dev/sfxge/sfxge_port.c		optional sfxge pci
 dev/sfxge/sfxge_rx.c		optional sfxge pci
 dev/sfxge/sfxge_tx.c		optional sfxge pci
 dev/sge/if_sge.c		optional sge pci
 dev/si/si.c			optional si
 dev/si/si2_z280.c		optional si
 dev/si/si3_t225.c		optional si
 dev/si/si_eisa.c		optional si eisa
 dev/si/si_isa.c			optional si isa
 dev/si/si_pci.c			optional si pci
 dev/siba/siba.c			optional siba
 dev/siba/siba_bwn.c		optional siba_bwn pci
 dev/siba/siba_cc.c		optional siba
 dev/siba/siba_core.c		optional siba | siba_bwn pci
 dev/siba/siba_pcib.c		optional siba pci
 dev/siis/siis.c			optional siis pci
 dev/sis/if_sis.c		optional sis pci
 dev/sk/if_sk.c			optional sk pci
 dev/smbus/smb.c			optional smb
 dev/smbus/smbconf.c		optional smbus
 dev/smbus/smbus.c		optional smbus
 dev/smbus/smbus_if.m		optional smbus
 dev/smc/if_smc.c		optional smc
 dev/smc/if_smc_fdt.c		optional smc fdt
 dev/sn/if_sn.c			optional sn
 dev/sn/if_sn_isa.c		optional sn isa
 dev/sn/if_sn_pccard.c		optional sn pccard
 dev/snp/snp.c			optional snp
 dev/sound/clone.c		optional sound
 dev/sound/unit.c		optional sound
 dev/sound/isa/ad1816.c		optional snd_ad1816 isa
 dev/sound/isa/ess.c		optional snd_ess isa
 dev/sound/isa/gusc.c		optional snd_gusc isa
 dev/sound/isa/mss.c		optional snd_mss isa
 dev/sound/isa/sb16.c		optional snd_sb16 isa
 dev/sound/isa/sb8.c		optional snd_sb8 isa
 dev/sound/isa/sbc.c		optional snd_sbc isa
 dev/sound/isa/sndbuf_dma.c	optional sound isa
 dev/sound/pci/als4000.c		optional snd_als4000 pci
 dev/sound/pci/atiixp.c		optional snd_atiixp pci
 dev/sound/pci/cmi.c		optional snd_cmi pci
 dev/sound/pci/cs4281.c		optional snd_cs4281 pci
 dev/sound/pci/csa.c		optional snd_csa pci
 dev/sound/pci/csapcm.c		optional snd_csa pci
 dev/sound/pci/ds1.c		optional snd_ds1 pci
 dev/sound/pci/emu10k1.c		optional snd_emu10k1 pci
 dev/sound/pci/emu10kx.c		optional snd_emu10kx pci
 dev/sound/pci/emu10kx-pcm.c	optional snd_emu10kx pci
 dev/sound/pci/emu10kx-midi.c	optional snd_emu10kx pci
 dev/sound/pci/envy24.c		optional snd_envy24 pci
 dev/sound/pci/envy24ht.c	optional snd_envy24ht pci
 dev/sound/pci/es137x.c		optional snd_es137x pci
 dev/sound/pci/fm801.c		optional snd_fm801 pci
 dev/sound/pci/ich.c		optional snd_ich pci
 dev/sound/pci/maestro.c		optional snd_maestro pci
 dev/sound/pci/maestro3.c	optional snd_maestro3 pci
 dev/sound/pci/neomagic.c	optional snd_neomagic pci
 dev/sound/pci/solo.c		optional snd_solo pci
 dev/sound/pci/spicds.c		optional snd_spicds pci
 dev/sound/pci/t4dwave.c		optional snd_t4dwave pci
 dev/sound/pci/via8233.c		optional snd_via8233 pci
 dev/sound/pci/via82c686.c	optional snd_via82c686 pci
 dev/sound/pci/vibes.c		optional snd_vibes pci
 dev/sound/pci/hda/hdaa.c	optional snd_hda pci
 dev/sound/pci/hda/hdaa_patches.c	optional snd_hda pci
 dev/sound/pci/hda/hdac.c	optional snd_hda pci
 dev/sound/pci/hda/hdac_if.m	optional snd_hda pci
 dev/sound/pci/hda/hdacc.c	optional snd_hda pci
 dev/sound/pci/hdspe.c		optional snd_hdspe pci
 dev/sound/pci/hdspe-pcm.c	optional snd_hdspe pci
 dev/sound/pcm/ac97.c		optional sound
 dev/sound/pcm/ac97_if.m		optional sound
 dev/sound/pcm/ac97_patch.c	optional sound
 dev/sound/pcm/buffer.c		optional sound	\
 	dependency	"snd_fxdiv_gen.h"
 dev/sound/pcm/channel.c		optional sound
 dev/sound/pcm/channel_if.m	optional sound
 dev/sound/pcm/dsp.c		optional sound
 dev/sound/pcm/feeder.c		optional sound
 dev/sound/pcm/feeder_chain.c	optional sound
 dev/sound/pcm/feeder_eq.c	optional sound	\
 	dependency	"feeder_eq_gen.h"	\
 	dependency	"snd_fxdiv_gen.h"
 dev/sound/pcm/feeder_if.m	optional sound
 dev/sound/pcm/feeder_format.c	optional sound  \
 	dependency	"snd_fxdiv_gen.h"
 dev/sound/pcm/feeder_matrix.c	optional sound  \
 	dependency	"snd_fxdiv_gen.h"
 dev/sound/pcm/feeder_mixer.c	optional sound  \
 	dependency	"snd_fxdiv_gen.h"
 dev/sound/pcm/feeder_rate.c	optional sound	\
 	dependency	"feeder_rate_gen.h"	\
 	dependency	"snd_fxdiv_gen.h"
 dev/sound/pcm/feeder_volume.c	optional sound  \
 	dependency	"snd_fxdiv_gen.h"
 dev/sound/pcm/mixer.c		optional sound
 dev/sound/pcm/mixer_if.m	optional sound
 dev/sound/pcm/sndstat.c		optional sound
 dev/sound/pcm/sound.c		optional sound
 dev/sound/pcm/vchan.c		optional sound
 dev/sound/usb/uaudio.c		optional snd_uaudio usb
 dev/sound/usb/uaudio_pcm.c	optional snd_uaudio usb
 dev/sound/midi/midi.c		optional sound
 dev/sound/midi/mpu401.c		optional sound
 dev/sound/midi/mpu_if.m		optional sound
 dev/sound/midi/mpufoi_if.m	optional sound
 dev/sound/midi/sequencer.c	optional sound
 dev/sound/midi/synth_if.m	optional sound
 dev/spibus/ofw_spibus.c		optional fdt spibus
 dev/spibus/spibus.c		optional spibus				\
 	dependency	"spibus_if.h"
 dev/spibus/spibus_if.m		optional spibus
 dev/ste/if_ste.c		optional ste pci
 dev/stg/tmc18c30.c		optional stg
 dev/stg/tmc18c30_isa.c		optional stg isa
 dev/stg/tmc18c30_pccard.c	optional stg pccard
 dev/stg/tmc18c30_pci.c		optional stg pci
 dev/stg/tmc18c30_subr.c		optional stg
 dev/stge/if_stge.c		optional stge
 dev/streams/streams.c		optional streams
 dev/sym/sym_hipd.c		optional sym				\
 	dependency	"$S/dev/sym/sym_{conf,defs}.h"
 dev/syscons/blank/blank_saver.c	optional blank_saver
 dev/syscons/daemon/daemon_saver.c optional daemon_saver
 dev/syscons/dragon/dragon_saver.c optional dragon_saver
 dev/syscons/fade/fade_saver.c	optional fade_saver
 dev/syscons/fire/fire_saver.c	optional fire_saver
 dev/syscons/green/green_saver.c	optional green_saver
 dev/syscons/logo/logo.c		optional logo_saver
 dev/syscons/logo/logo_saver.c	optional logo_saver
 dev/syscons/rain/rain_saver.c	optional rain_saver
 dev/syscons/schistory.c		optional sc
 dev/syscons/scmouse.c		optional sc
 dev/syscons/scterm.c		optional sc
 dev/syscons/scvidctl.c		optional sc
 dev/syscons/snake/snake_saver.c	optional snake_saver
 dev/syscons/star/star_saver.c	optional star_saver
 dev/syscons/syscons.c		optional sc
 dev/syscons/sysmouse.c		optional sc
 dev/syscons/warp/warp_saver.c	optional warp_saver
 dev/tdfx/tdfx_linux.c		optional tdfx_linux tdfx compat_linux
 dev/tdfx/tdfx_pci.c		optional tdfx pci
 dev/ti/if_ti.c			optional ti pci
 dev/tl/if_tl.c			optional tl pci
 dev/trm/trm.c			optional trm
 dev/twa/tw_cl_init.c		optional twa \
 	compile-with "${NORMAL_C} -I$S/dev/twa"
 dev/twa/tw_cl_intr.c		optional twa \
 	compile-with "${NORMAL_C} -I$S/dev/twa"
 dev/twa/tw_cl_io.c		optional twa \
 	compile-with "${NORMAL_C} -I$S/dev/twa"
 dev/twa/tw_cl_misc.c		optional twa \
 	compile-with "${NORMAL_C} -I$S/dev/twa"
 dev/twa/tw_osl_cam.c		optional twa \
 	compile-with "${NORMAL_C} -I$S/dev/twa"
 dev/twa/tw_osl_freebsd.c	optional twa \
 	compile-with "${NORMAL_C} -I$S/dev/twa"
 dev/twe/twe.c			optional twe
 dev/twe/twe_freebsd.c		optional twe
 dev/tws/tws.c			optional tws
 dev/tws/tws_cam.c		optional tws
 dev/tws/tws_hdm.c		optional tws
 dev/tws/tws_services.c		optional tws
 dev/tws/tws_user.c		optional tws
 dev/tx/if_tx.c			optional tx
 dev/txp/if_txp.c		optional txp
 dev/uart/uart_bus_acpi.c	optional uart acpi
 #dev/uart/uart_bus_cbus.c	optional uart cbus
 dev/uart/uart_bus_ebus.c	optional uart ebus
 dev/uart/uart_bus_fdt.c		optional uart fdt
 dev/uart/uart_bus_isa.c		optional uart isa
 dev/uart/uart_bus_pccard.c	optional uart pccard
 dev/uart/uart_bus_pci.c		optional uart pci
 dev/uart/uart_bus_puc.c		optional uart puc
 dev/uart/uart_bus_scc.c		optional uart scc
 dev/uart/uart_core.c		optional uart
 dev/uart/uart_dbg.c		optional uart gdb
 dev/uart/uart_dev_ns8250.c	optional uart uart_ns8250
 dev/uart/uart_dev_pl011.c	optional uart pl011
 dev/uart/uart_dev_quicc.c	optional uart quicc
 dev/uart/uart_dev_sab82532.c	optional uart uart_sab82532
 dev/uart/uart_dev_sab82532.c	optional uart scc
 dev/uart/uart_dev_z8530.c	optional uart uart_z8530
 dev/uart/uart_dev_z8530.c	optional uart scc
 dev/uart/uart_if.m		optional uart
 dev/uart/uart_subr.c		optional uart
 dev/uart/uart_tty.c		optional uart
 dev/ubsec/ubsec.c		optional ubsec
 #
 # USB controller drivers
 #
 dev/usb/controller/at91dci.c		optional at91dci
 dev/usb/controller/at91dci_atmelarm.c	optional at91dci at91rm9200
 dev/usb/controller/musb_otg.c		optional musb
 dev/usb/controller/musb_otg_atmelarm.c	optional musb at91rm9200
 dev/usb/controller/dwc_otg.c		optional dwcotg
 dev/usb/controller/dwc_otg_fdt.c	optional dwcotg fdt
 dev/usb/controller/ehci.c		optional ehci
 dev/usb/controller/ehci_pci.c		optional ehci pci
 dev/usb/controller/ohci.c		optional ohci
 dev/usb/controller/ohci_atmelarm.c	optional ohci at91rm9200
 dev/usb/controller/ohci_pci.c		optional ohci pci
 dev/usb/controller/uhci.c		optional uhci
 dev/usb/controller/uhci_pci.c		optional uhci pci
 dev/usb/controller/xhci.c		optional xhci
 dev/usb/controller/xhci_pci.c		optional xhci pci
 dev/usb/controller/saf1761_otg.c	optional saf1761otg
 dev/usb/controller/saf1761_otg_fdt.c	optional saf1761otg fdt
 dev/usb/controller/uss820dci.c		optional uss820dci
 dev/usb/controller/uss820dci_atmelarm.c	optional uss820dci at91rm9200
 dev/usb/controller/usb_controller.c	optional usb
 #
 # USB storage drivers
 #
 dev/usb/storage/umass.c		optional umass
 dev/usb/storage/urio.c		optional urio
 dev/usb/storage/ustorage_fs.c	optional usfs
 #
 # USB core
 #
 dev/usb/usb_busdma.c		optional usb
 dev/usb/usb_compat_linux.c	optional usb
 dev/usb/usb_core.c		optional usb
 dev/usb/usb_debug.c		optional usb
 dev/usb/usb_dev.c		optional usb
 dev/usb/usb_device.c		optional usb
 dev/usb/usb_dynamic.c		optional usb
 dev/usb/usb_error.c		optional usb
 dev/usb/usb_generic.c		optional usb
 dev/usb/usb_handle_request.c	optional usb
 dev/usb/usb_hid.c		optional usb
 dev/usb/usb_hub.c		optional usb
 dev/usb/usb_if.m		optional usb
 dev/usb/usb_lookup.c		optional usb
 dev/usb/usb_mbuf.c		optional usb
 dev/usb/usb_msctest.c		optional usb
 dev/usb/usb_parse.c		optional usb
 dev/usb/usb_pf.c		optional usb
 dev/usb/usb_process.c		optional usb
 dev/usb/usb_request.c		optional usb
 dev/usb/usb_transfer.c		optional usb
 dev/usb/usb_util.c		optional usb
 #
 # USB network drivers
 #
 dev/usb/net/if_aue.c		optional aue
 dev/usb/net/if_axe.c		optional axe
 dev/usb/net/if_axge.c		optional axge
 dev/usb/net/if_cdce.c		optional cdce
 dev/usb/net/if_cue.c		optional cue
 dev/usb/net/if_ipheth.c		optional ipheth
 dev/usb/net/if_kue.c		optional kue
 dev/usb/net/if_mos.c		optional mos
 dev/usb/net/if_rue.c		optional rue
 dev/usb/net/if_smsc.c		optional smsc
 dev/usb/net/if_udav.c		optional udav
 dev/usb/net/if_usie.c		optional usie
 dev/usb/net/if_urndis.c		optional urndis
 dev/usb/net/ruephy.c		optional rue
 dev/usb/net/usb_ethernet.c	optional aue | axe | axge | cdce | cue | kue | \
 					 mos | rue | smsc | udav | ipheth | \
 					 urndis
 dev/usb/net/uhso.c		optional uhso
 #
 # USB WLAN drivers
 #
 dev/usb/wlan/if_rsu.c		optional rsu
 rsu-rtl8712fw.c			optional rsu-rtl8712fw | rsufw		\
 	compile-with	"${AWK} -f $S/tools/fw_stub.awk rsu-rtl8712fw.fw:rsu-rtl8712fw:120 -mrsu-rtl8712fw -c${.TARGET}" \
 	no-implicit-rule before-depend local				\
 	clean		"rsu-rtl8712fw.c"
 rsu-rtl8712fw.fwo		optional rsu-rtl8712fw | rsufw		\
 	dependency	"rsu-rtl8712fw.fw"				\
 	compile-with	"${NORMAL_FWO}"					\
 	no-implicit-rule						\
 	clean		"rsu-rtl8712fw.fwo"
 rsu-rtl8712fw.fw		optional rsu-rtl8712.fw | rsufw		\
 	dependency	"$S/contrib/dev/rsu/rsu-rtl8712fw.fw.uu"	\
 	compile-with	"${NORMAL_FW}"					\
 	no-obj no-implicit-rule						\
 	clean		"rsu-rtl8712fw.fw"
 dev/usb/wlan/if_rum.c		optional rum
 dev/usb/wlan/if_run.c		optional run
 runfw.c				optional runfw							\
 	compile-with	"${AWK} -f $S/tools/fw_stub.awk run.fw:runfw -mrunfw -c${.TARGET}"	\
 	no-implicit-rule before-depend local							\
 	clean		"runfw.c"
 runfw.fwo			optional runfw							\
 	dependency	"run.fw"								\
 	compile-with	"${NORMAL_FWO}"								\
 	no-implicit-rule									\
 	clean		"runfw.fwo"
 run.fw				optional runfw							\
 	dependency	"$S/contrib/dev/run/rt2870.fw.uu"					\
 	compile-with	"${NORMAL_FW}"								\
 	no-obj no-implicit-rule									\
 	clean		"run.fw"
 dev/usb/wlan/if_uath.c		optional uath
 dev/usb/wlan/if_upgt.c		optional upgt
 dev/usb/wlan/if_ural.c		optional ural
 dev/usb/wlan/if_urtw.c		optional urtw
 dev/usb/wlan/if_urtwn.c		optional urtwn
 urtwn-rtl8188eufw.c		optional urtwn-rtl8188eufw | urtwnfw	\
 	compile-with	"${AWK} -f $S/tools/fw_stub.awk urtwn-rtl8188eufw.fw:urtwn-rtl8188eufw:111 -murtwn-rtl8188eufw -c${.TARGET}" \
 	no-implicit-rule before-depend local				\
 	clean		"urtwn-rtl8188eufw.c"
 urtwn-rtl8188eufw.fwo		optional urtwn-rtl8188eufw | urtwnfw	\
 	dependency	"urtwn-rtl8188eufw.fw"				\
 	compile-with	"${NORMAL_FWO}"					\
 	no-implicit-rule						\
 	clean		"urtwn-rtl8188eufw.fwo"
 urtwn-rtl8188eufw.fw		optional urtwn-rtl8188eufw | urtwnfw	\
 	dependency	"$S/contrib/dev/urtwn/urtwn-rtl8188eufw.fw.uu"	\
 	compile-with	"${NORMAL_FW}"					\
 	no-obj no-implicit-rule						\
 	clean		"urtwn-rtl8188eufw.fw"
 urtwn-rtl8192cfwT.c		optional urtwn-rtl8192cfwT | urtwnfw	\
 	compile-with	"${AWK} -f $S/tools/fw_stub.awk urtwn-rtl8192cfwT.fw:urtwn-rtl8192cfwT:111 -murtwn-rtl8192cfwT -c${.TARGET}" \
 	no-implicit-rule before-depend local				\
 	clean		"urtwn-rtl8192cfwT.c"
 urtwn-rtl8192cfwT.fwo		optional urtwn-rtl8192cfwT | urtwnfw	\
 	dependency	"urtwn-rtl8192cfwT.fw"				\
 	compile-with	"${NORMAL_FWO}"					\
 	no-implicit-rule						\
 	clean		"urtwn-rtl8192cfwT.fwo"
 urtwn-rtl8192cfwT.fw		optional urtwn-rtl8192cfwT | urtwnfw	\
 	dependency	"$S/contrib/dev/urtwn/urtwn-rtl8192cfwT.fw.uu"	\
 	compile-with	"${NORMAL_FW}"					\
 	no-obj no-implicit-rule						\
 	clean		"urtwn-rtl8192cfwT.fw"
 urtwn-rtl8192cfwU.c		optional urtwn-rtl8192cfwU | urtwnfw	\
 	compile-with	"${AWK} -f $S/tools/fw_stub.awk urtwn-rtl8192cfwU.fw:urtwn-rtl8192cfwU:111 -murtwn-rtl8192cfwU -c${.TARGET}" \
 	no-implicit-rule before-depend local				\
 	clean		"urtwn-rtl8192cfwU.c"
 urtwn-rtl8192cfwU.fwo		optional urtwn-rtl8192cfwU | urtwnfw	\
 	dependency	"urtwn-rtl8192cfwU.fw"				\
 	compile-with	"${NORMAL_FWO}"					\
 	no-implicit-rule						\
 	clean		"urtwn-rtl8192cfwU.fwo"
 urtwn-rtl8192cfwU.fw		optional urtwn-rtl8192cfwU | urtwnfw	\
 	dependency	"$S/contrib/dev/urtwn/urtwn-rtl8192cfwU.fw.uu"	\
 	compile-with	"${NORMAL_FW}"					\
 	no-obj no-implicit-rule						\
 	clean		"urtwn-rtl8192cfwU.fw"
 
 dev/usb/wlan/if_zyd.c		optional zyd
 #
 # USB serial and parallel port drivers
 #
 dev/usb/serial/u3g.c		optional u3g
 dev/usb/serial/uark.c		optional uark
 dev/usb/serial/ubsa.c		optional ubsa
 dev/usb/serial/ubser.c		optional ubser
 dev/usb/serial/uchcom.c		optional uchcom
 dev/usb/serial/ucycom.c		optional ucycom
 dev/usb/serial/ufoma.c		optional ufoma
 dev/usb/serial/uftdi.c		optional uftdi
 dev/usb/serial/ugensa.c		optional ugensa
 dev/usb/serial/uipaq.c		optional uipaq
 dev/usb/serial/ulpt.c		optional ulpt
 dev/usb/serial/umcs.c		optional umcs
 dev/usb/serial/umct.c		optional umct
 dev/usb/serial/umodem.c		optional umodem
 dev/usb/serial/umoscom.c	optional umoscom
 dev/usb/serial/uplcom.c		optional uplcom
 dev/usb/serial/uslcom.c		optional uslcom
 dev/usb/serial/uvisor.c		optional uvisor
 dev/usb/serial/uvscom.c		optional uvscom
 dev/usb/serial/usb_serial.c 	optional ucom | u3g | uark | ubsa | ubser | \
 					 uchcom | ucycom | ufoma | uftdi | \
 					 ugensa | uipaq | umcs | umct | \
 					 umodem | umoscom | uplcom | usie | \
 					 uslcom | uvisor | uvscom
 #
 # USB misc drivers
 #
 dev/usb/misc/ufm.c		optional ufm
 dev/usb/misc/udbp.c		optional udbp
 dev/usb/misc/uled.c		optional uled
 #
 # USB input drivers
 #
 dev/usb/input/atp.c		optional atp
 dev/usb/input/uep.c		optional uep
 dev/usb/input/uhid.c		optional uhid
 dev/usb/input/ukbd.c		optional ukbd
 dev/usb/input/ums.c		optional ums
 dev/usb/input/wsp.c		optional wsp
 #
 # USB quirks
 #
 dev/usb/quirk/usb_quirk.c	optional usb
 #
 # USB templates
 #
 dev/usb/template/usb_template.c		optional usb_template
 dev/usb/template/usb_template_audio.c	optional usb_template
 dev/usb/template/usb_template_cdce.c	optional usb_template
 dev/usb/template/usb_template_kbd.c	optional usb_template
 dev/usb/template/usb_template_modem.c	optional usb_template
 dev/usb/template/usb_template_mouse.c	optional usb_template
 dev/usb/template/usb_template_msc.c	optional usb_template
 dev/usb/template/usb_template_mtp.c	optional usb_template
 dev/usb/template/usb_template_phone.c	optional usb_template
 dev/usb/template/usb_template_serialnet.c	optional usb_template
 #
 # USB video drivers
 #
 dev/usb/video/udl.c			optional udl
 #
 # USB END
 #
 dev/videomode/videomode.c		optional videomode
 dev/videomode/edid.c			optional videomode
 dev/videomode/pickmode.c		optional videomode
 dev/videomode/vesagtf.c			optional videomode
 dev/utopia/idtphy.c		optional utopia
 dev/utopia/suni.c		optional utopia
 dev/utopia/utopia.c		optional utopia
 dev/vge/if_vge.c		optional vge
 dev/viapm/viapm.c		optional viapm pci
 dev/virtio/virtio.c			optional	virtio
 dev/virtio/virtqueue.c			optional	virtio
 dev/virtio/virtio_bus_if.m		optional	virtio
 dev/virtio/virtio_if.m			optional	virtio
 dev/virtio/pci/virtio_pci.c		optional	virtio_pci
 dev/virtio/mmio/virtio_mmio.c		optional	virtio_mmio
 dev/virtio/mmio/virtio_mmio_if.m	optional	virtio_mmio
 dev/virtio/network/if_vtnet.c		optional	vtnet
 dev/virtio/block/virtio_blk.c		optional	virtio_blk
 dev/virtio/balloon/virtio_balloon.c	optional	virtio_balloon
 dev/virtio/scsi/virtio_scsi.c		optional	virtio_scsi
 dev/virtio/random/virtio_random.c	optional	virtio_random
 dev/virtio/console/virtio_console.c	optional	virtio_console
 dev/vkbd/vkbd.c			optional vkbd
 dev/vr/if_vr.c			optional vr pci
 dev/vt/colors/vt_termcolors.c	optional vt
 dev/vt/font/vt_font_default.c	optional vt
 dev/vt/font/vt_mouse_cursor.c	optional vt
 dev/vt/hw/efifb/efifb.c		optional vt_efifb
 dev/vt/hw/fb/vt_fb.c		optional vt
 dev/vt/hw/vga/vt_vga.c		optional vt vt_vga
 dev/vt/logo/logo_freebsd.c	optional vt splash
 dev/vt/vt_buf.c			optional vt
 dev/vt/vt_consolectl.c		optional vt
 dev/vt/vt_core.c		optional vt
 dev/vt/vt_font.c		optional vt
 dev/vt/vt_sysmouse.c		optional vt
 dev/vte/if_vte.c		optional vte pci
 dev/vx/if_vx.c			optional vx
 dev/vx/if_vx_eisa.c		optional vx eisa
 dev/vx/if_vx_pci.c		optional vx pci
 dev/vxge/vxge.c				optional vxge
 dev/vxge/vxgehal/vxgehal-ifmsg.c	optional vxge
 dev/vxge/vxgehal/vxgehal-mrpcim.c	optional vxge
 dev/vxge/vxgehal/vxge-queue.c		optional vxge
 dev/vxge/vxgehal/vxgehal-ring.c		optional vxge
 dev/vxge/vxgehal/vxgehal-swapper.c	optional vxge
 dev/vxge/vxgehal/vxgehal-mgmt.c		optional vxge
 dev/vxge/vxgehal/vxgehal-srpcim.c	optional vxge
 dev/vxge/vxgehal/vxgehal-config.c	optional vxge
 dev/vxge/vxgehal/vxgehal-blockpool.c	optional vxge
 dev/vxge/vxgehal/vxgehal-doorbells.c	optional vxge
 dev/vxge/vxgehal/vxgehal-mgmtaux.c	optional vxge
 dev/vxge/vxgehal/vxgehal-device.c	optional vxge
 dev/vxge/vxgehal/vxgehal-mm.c		optional vxge
 dev/vxge/vxgehal/vxgehal-driver.c	optional vxge
 dev/vxge/vxgehal/vxgehal-virtualpath.c	optional vxge
 dev/vxge/vxgehal/vxgehal-channel.c	optional vxge
 dev/vxge/vxgehal/vxgehal-fifo.c		optional vxge
 dev/watchdog/watchdog.c		standard
 dev/wb/if_wb.c			optional wb pci
 dev/wds/wd7000.c		optional wds isa
 dev/wi/if_wi.c			optional wi
 dev/wi/if_wi_pccard.c		optional wi pccard
 dev/wi/if_wi_pci.c		optional wi pci
 dev/wl/if_wl.c			optional wl isa
 dev/wpi/if_wpi.c		optional wpi pci
 wpifw.c			optional wpifw					\
 	compile-with	"${AWK} -f $S/tools/fw_stub.awk wpi.fw:wpifw:153229 -mwpi -c${.TARGET}" \
 	no-implicit-rule before-depend local				\
 	clean		"wpifw.c"
 wpifw.fwo			optional wpifw				\
 	dependency	"wpi.fw"					\
 	compile-with	"${NORMAL_FWO}"					\
 	no-implicit-rule						\
 	clean		"wpifw.fwo"
 wpi.fw			optional wpifw					\
 	dependency	"$S/contrib/dev/wpi/iwlwifi-3945-15.32.2.9.fw.uu"	\
 	compile-with	"${NORMAL_FW}"					\
 	no-obj no-implicit-rule						\
 	clean		"wpi.fw"
 dev/xe/if_xe.c			optional xe
 dev/xe/if_xe_pccard.c		optional xe pccard
-dev/xen/balloon/balloon.c	optional xen | xenhvm
-dev/xen/blkfront/blkfront.c	optional xen | xenhvm
-dev/xen/blkback/blkback.c	optional xen | xenhvm
-dev/xen/console/console.c	optional xen | xenhvm
-dev/xen/console/xencons_ring.c	optional xen | xenhvm
-dev/xen/control/control.c	optional xen | xenhvm
-dev/xen/grant_table/grant_table.c	optional xen | xenhvm
-dev/xen/netback/netback.c	optional xen | xenhvm
-dev/xen/netfront/netfront.c	optional xen | xenhvm
+dev/xen/balloon/balloon.c	optional xenhvm
+dev/xen/blkfront/blkfront.c	optional xenhvm
+dev/xen/blkback/blkback.c	optional xenhvm
+dev/xen/console/console.c	optional xenhvm
+dev/xen/console/xencons_ring.c	optional xenhvm
+dev/xen/control/control.c	optional xenhvm
+dev/xen/grant_table/grant_table.c	optional xenhvm
+dev/xen/netback/netback.c	optional xenhvm
+dev/xen/netfront/netfront.c	optional xenhvm
 dev/xen/xenpci/xenpci.c		optional xenpci
-dev/xen/timer/timer.c		optional xen | xenhvm
-dev/xen/pvcpu/pvcpu.c		optional xen | xenhvm
-dev/xen/xenstore/xenstore.c	optional xen | xenhvm
-dev/xen/xenstore/xenstore_dev.c	optional xen | xenhvm
-dev/xen/xenstore/xenstored_dev.c	optional xen | xenhvm
-dev/xen/evtchn/evtchn_dev.c	optional xen | xenhvm
-dev/xen/privcmd/privcmd.c	optional xen | xenhvm
-dev/xen/debug/debug.c		optional xen | xenhvm
+dev/xen/timer/timer.c		optional xenhvm
+dev/xen/pvcpu/pvcpu.c		optional xenhvm
+dev/xen/xenstore/xenstore.c	optional xenhvm
+dev/xen/xenstore/xenstore_dev.c	optional xenhvm
+dev/xen/xenstore/xenstored_dev.c	optional xenhvm
+dev/xen/evtchn/evtchn_dev.c	optional xenhvm
+dev/xen/privcmd/privcmd.c	optional xenhvm
+dev/xen/debug/debug.c		optional xenhvm
 dev/xl/if_xl.c			optional xl pci
 dev/xl/xlphy.c			optional xl pci
 fs/autofs/autofs.c		optional autofs
 fs/autofs/autofs_vfsops.c	optional autofs
 fs/autofs/autofs_vnops.c	optional autofs
 fs/deadfs/dead_vnops.c		standard
 fs/devfs/devfs_devs.c		standard
 fs/devfs/devfs_dir.c		standard
 fs/devfs/devfs_rule.c		standard
 fs/devfs/devfs_vfsops.c		standard
 fs/devfs/devfs_vnops.c		standard
 fs/fdescfs/fdesc_vfsops.c	optional fdescfs
 fs/fdescfs/fdesc_vnops.c	optional fdescfs
 fs/fifofs/fifo_vnops.c		standard
 fs/cuse/cuse.c			optional cuse
 fs/fuse/fuse_device.c		optional fuse
 fs/fuse/fuse_file.c		optional fuse
 fs/fuse/fuse_internal.c		optional fuse
 fs/fuse/fuse_io.c		optional fuse
 fs/fuse/fuse_ipc.c		optional fuse
 fs/fuse/fuse_main.c		optional fuse
 fs/fuse/fuse_node.c		optional fuse
 fs/fuse/fuse_vfsops.c		optional fuse
 fs/fuse/fuse_vnops.c		optional fuse
 fs/msdosfs/msdosfs_conv.c	optional msdosfs
 fs/msdosfs/msdosfs_denode.c	optional msdosfs
 fs/msdosfs/msdosfs_fat.c	optional msdosfs
 fs/msdosfs/msdosfs_fileno.c	optional msdosfs
 fs/msdosfs/msdosfs_iconv.c	optional msdosfs_iconv
 fs/msdosfs/msdosfs_lookup.c	optional msdosfs
 fs/msdosfs/msdosfs_vfsops.c	optional msdosfs
 fs/msdosfs/msdosfs_vnops.c	optional msdosfs
 fs/nandfs/bmap.c		optional nandfs
 fs/nandfs/nandfs_alloc.c	optional nandfs
 fs/nandfs/nandfs_bmap.c		optional nandfs
 fs/nandfs/nandfs_buffer.c	optional nandfs
 fs/nandfs/nandfs_cleaner.c	optional nandfs
 fs/nandfs/nandfs_cpfile.c	optional nandfs
 fs/nandfs/nandfs_dat.c		optional nandfs
 fs/nandfs/nandfs_dir.c		optional nandfs
 fs/nandfs/nandfs_ifile.c	optional nandfs
 fs/nandfs/nandfs_segment.c	optional nandfs
 fs/nandfs/nandfs_subr.c		optional nandfs
 fs/nandfs/nandfs_sufile.c	optional nandfs
 fs/nandfs/nandfs_vfsops.c	optional nandfs
 fs/nandfs/nandfs_vnops.c	optional nandfs
 fs/nfs/nfs_commonkrpc.c		optional nfscl | nfsd
 fs/nfs/nfs_commonsubs.c		optional nfscl | nfsd
 fs/nfs/nfs_commonport.c		optional nfscl | nfsd
 fs/nfs/nfs_commonacl.c		optional nfscl | nfsd
 fs/nfsclient/nfs_clcomsubs.c	optional nfscl
 fs/nfsclient/nfs_clsubs.c	optional nfscl
 fs/nfsclient/nfs_clstate.c	optional nfscl
 fs/nfsclient/nfs_clkrpc.c	optional nfscl
 fs/nfsclient/nfs_clrpcops.c	optional nfscl
 fs/nfsclient/nfs_clvnops.c	optional nfscl
 fs/nfsclient/nfs_clnode.c	optional nfscl
 fs/nfsclient/nfs_clvfsops.c	optional nfscl
 fs/nfsclient/nfs_clport.c	optional nfscl
 fs/nfsclient/nfs_clbio.c	optional nfscl
 fs/nfsclient/nfs_clnfsiod.c	optional nfscl
 fs/nfsserver/nfs_fha_new.c	optional nfsd inet
 fs/nfsserver/nfs_nfsdsocket.c	optional nfsd inet
 fs/nfsserver/nfs_nfsdsubs.c	optional nfsd inet
 fs/nfsserver/nfs_nfsdstate.c	optional nfsd inet
 fs/nfsserver/nfs_nfsdkrpc.c	optional nfsd inet
 fs/nfsserver/nfs_nfsdserv.c	optional nfsd inet
 fs/nfsserver/nfs_nfsdport.c	optional nfsd inet
 fs/nfsserver/nfs_nfsdcache.c	optional nfsd inet
 fs/nullfs/null_subr.c		optional nullfs
 fs/nullfs/null_vfsops.c		optional nullfs
 fs/nullfs/null_vnops.c		optional nullfs
 fs/procfs/procfs.c		optional procfs
 fs/procfs/procfs_ctl.c		optional procfs
 fs/procfs/procfs_dbregs.c	optional procfs
 fs/procfs/procfs_fpregs.c	optional procfs
 fs/procfs/procfs_ioctl.c	optional procfs
 fs/procfs/procfs_map.c		optional procfs
 fs/procfs/procfs_mem.c		optional procfs
 fs/procfs/procfs_note.c		optional procfs
 fs/procfs/procfs_osrel.c	optional procfs
 fs/procfs/procfs_regs.c		optional procfs
 fs/procfs/procfs_rlimit.c	optional procfs
 fs/procfs/procfs_status.c	optional procfs
 fs/procfs/procfs_type.c		optional procfs
 fs/pseudofs/pseudofs.c		optional pseudofs
 fs/pseudofs/pseudofs_fileno.c	optional pseudofs
 fs/pseudofs/pseudofs_vncache.c	optional pseudofs
 fs/pseudofs/pseudofs_vnops.c	optional pseudofs
 fs/smbfs/smbfs_io.c		optional smbfs
 fs/smbfs/smbfs_node.c		optional smbfs
 fs/smbfs/smbfs_smb.c		optional smbfs
 fs/smbfs/smbfs_subr.c		optional smbfs
 fs/smbfs/smbfs_vfsops.c		optional smbfs
 fs/smbfs/smbfs_vnops.c		optional smbfs
 fs/udf/osta.c			optional udf
 fs/udf/udf_iconv.c		optional udf_iconv
 fs/udf/udf_vfsops.c		optional udf
 fs/udf/udf_vnops.c		optional udf
 fs/unionfs/union_subr.c		optional unionfs
 fs/unionfs/union_vfsops.c	optional unionfs
 fs/unionfs/union_vnops.c	optional unionfs
 fs/tmpfs/tmpfs_vnops.c		optional tmpfs
 fs/tmpfs/tmpfs_fifoops.c 	optional tmpfs
 fs/tmpfs/tmpfs_vfsops.c 	optional tmpfs
 fs/tmpfs/tmpfs_subr.c 		optional tmpfs
 gdb/gdb_cons.c			optional gdb
 gdb/gdb_main.c			optional gdb
 gdb/gdb_packet.c		optional gdb
 geom/bde/g_bde.c		optional geom_bde
 geom/bde/g_bde_crypt.c		optional geom_bde
 geom/bde/g_bde_lock.c		optional geom_bde
 geom/bde/g_bde_work.c		optional geom_bde
 geom/cache/g_cache.c		optional geom_cache
 geom/concat/g_concat.c		optional geom_concat
 geom/eli/g_eli.c		optional geom_eli
 geom/eli/g_eli_crypto.c		optional geom_eli
 geom/eli/g_eli_ctl.c		optional geom_eli
 geom/eli/g_eli_integrity.c	optional geom_eli
 geom/eli/g_eli_key.c		optional geom_eli
 geom/eli/g_eli_key_cache.c	optional geom_eli
 geom/eli/g_eli_privacy.c	optional geom_eli
 geom/eli/pkcs5v2.c		optional geom_eli
 geom/gate/g_gate.c		optional geom_gate
 geom/geom_aes.c			optional geom_aes
 geom/geom_bsd.c			optional geom_bsd
 geom/geom_bsd_enc.c		optional geom_bsd | geom_part_bsd
 geom/geom_ccd.c			optional ccd | geom_ccd
 geom/geom_ctl.c			standard
 geom/geom_dev.c			standard
 geom/geom_disk.c		standard
 geom/geom_dump.c		standard
 geom/geom_event.c		standard
 geom/geom_fox.c			optional geom_fox
 geom/geom_flashmap.c		optional fdt cfi | fdt nand
 geom/geom_io.c			standard
 geom/geom_kern.c		standard
 geom/geom_map.c			optional geom_map
 geom/geom_mbr.c			optional geom_mbr
 geom/geom_mbr_enc.c		optional geom_mbr
 geom/geom_pc98.c		optional geom_pc98
 geom/geom_pc98_enc.c		optional geom_pc98
 geom/geom_redboot.c		optional geom_redboot
 geom/geom_slice.c		standard
 geom/geom_subr.c		standard
 geom/geom_sunlabel.c		optional geom_sunlabel
 geom/geom_sunlabel_enc.c	optional geom_sunlabel
 geom/geom_vfs.c			standard
 geom/geom_vol_ffs.c		optional geom_vol
 geom/journal/g_journal.c	optional geom_journal
 geom/journal/g_journal_ufs.c	optional geom_journal
 geom/label/g_label.c		optional geom_label | geom_label_gpt
 geom/label/g_label_ext2fs.c	optional geom_label
 geom/label/g_label_iso9660.c	optional geom_label
 geom/label/g_label_msdosfs.c	optional geom_label
 geom/label/g_label_ntfs.c	optional geom_label
 geom/label/g_label_reiserfs.c	optional geom_label
 geom/label/g_label_ufs.c	optional geom_label
 geom/label/g_label_gpt.c	optional geom_label | geom_label_gpt
 geom/label/g_label_disk_ident.c	optional geom_label
 geom/linux_lvm/g_linux_lvm.c	optional geom_linux_lvm
 geom/mirror/g_mirror.c		optional geom_mirror
 geom/mirror/g_mirror_ctl.c	optional geom_mirror
 geom/mountver/g_mountver.c	optional geom_mountver
 geom/multipath/g_multipath.c	optional geom_multipath
 geom/nop/g_nop.c		optional geom_nop
 geom/part/g_part.c		standard
 geom/part/g_part_if.m		standard
 geom/part/g_part_apm.c		optional geom_part_apm
 geom/part/g_part_bsd.c		optional geom_part_bsd
 geom/part/g_part_bsd64.c	optional geom_part_bsd64
 geom/part/g_part_ebr.c		optional geom_part_ebr
 geom/part/g_part_gpt.c		optional geom_part_gpt
 geom/part/g_part_ldm.c		optional geom_part_ldm
 geom/part/g_part_mbr.c		optional geom_part_mbr
 geom/part/g_part_pc98.c		optional geom_part_pc98
 geom/part/g_part_vtoc8.c	optional geom_part_vtoc8
 geom/raid/g_raid.c		optional geom_raid
 geom/raid/g_raid_ctl.c		optional geom_raid
 geom/raid/g_raid_md_if.m	optional geom_raid
 geom/raid/g_raid_tr_if.m	optional geom_raid
 geom/raid/md_ddf.c		optional geom_raid
 geom/raid/md_intel.c		optional geom_raid
 geom/raid/md_jmicron.c		optional geom_raid
 geom/raid/md_nvidia.c		optional geom_raid
 geom/raid/md_promise.c		optional geom_raid
 geom/raid/md_sii.c		optional geom_raid
 geom/raid/tr_concat.c		optional geom_raid
 geom/raid/tr_raid0.c		optional geom_raid
 geom/raid/tr_raid1.c		optional geom_raid
 geom/raid/tr_raid1e.c		optional geom_raid
 geom/raid/tr_raid5.c		optional geom_raid
 geom/raid3/g_raid3.c		optional geom_raid3
 geom/raid3/g_raid3_ctl.c	optional geom_raid3
 geom/shsec/g_shsec.c		optional geom_shsec
 geom/stripe/g_stripe.c		optional geom_stripe
 geom/uncompress/g_uncompress.c	optional geom_uncompress
 contrib/xz-embedded/freebsd/xz_malloc.c	\
 	optional xz_embedded | geom_uncompress \
 	compile-with "${NORMAL_C} -I$S/contrib/xz-embedded/freebsd/ -I$S/contrib/xz-embedded/linux/lib/xz/ -I$S/contrib/xz-embedded/linux/include/linux/"
 contrib/xz-embedded/linux/lib/xz/xz_crc32.c \
 	optional xz_embedded | geom_uncompress \
 	compile-with "${NORMAL_C} -I$S/contrib/xz-embedded/freebsd/ -I$S/contrib/xz-embedded/linux/lib/xz/ -I$S/contrib/xz-embedded/linux/include/linux/"
 contrib/xz-embedded/linux/lib/xz/xz_dec_bcj.c \
 	optional xz_embedded | geom_uncompress \
 	compile-with "${NORMAL_C} -I$S/contrib/xz-embedded/freebsd/ -I$S/contrib/xz-embedded/linux/lib/xz/ -I$S/contrib/xz-embedded/linux/include/linux/"
 contrib/xz-embedded/linux/lib/xz/xz_dec_lzma2.c \
 	optional xz_embedded | geom_uncompress \
 	compile-with "${NORMAL_C} -I$S/contrib/xz-embedded/freebsd/ -I$S/contrib/xz-embedded/linux/lib/xz/ -I$S/contrib/xz-embedded/linux/include/linux/"
 contrib/xz-embedded/linux/lib/xz/xz_dec_stream.c \
 	optional xz_embedded | geom_uncompress \
 	compile-with "${NORMAL_C} -I$S/contrib/xz-embedded/freebsd/ -I$S/contrib/xz-embedded/linux/lib/xz/ -I$S/contrib/xz-embedded/linux/include/linux/"
 geom/uzip/g_uzip.c		optional geom_uzip
 geom/vinum/geom_vinum.c		optional geom_vinum
 geom/vinum/geom_vinum_create.c	optional geom_vinum
 geom/vinum/geom_vinum_drive.c	optional geom_vinum
 geom/vinum/geom_vinum_plex.c	optional geom_vinum
 geom/vinum/geom_vinum_volume.c	optional geom_vinum
 geom/vinum/geom_vinum_subr.c	optional geom_vinum
 geom/vinum/geom_vinum_raid5.c	optional geom_vinum
 geom/vinum/geom_vinum_share.c	optional geom_vinum
 geom/vinum/geom_vinum_list.c	optional geom_vinum
 geom/vinum/geom_vinum_rm.c	optional geom_vinum
 geom/vinum/geom_vinum_init.c	optional geom_vinum
 geom/vinum/geom_vinum_state.c	optional geom_vinum
 geom/vinum/geom_vinum_rename.c	optional geom_vinum
 geom/vinum/geom_vinum_move.c	optional geom_vinum
 geom/vinum/geom_vinum_events.c	optional geom_vinum
 geom/virstor/binstream.c	optional geom_virstor
 geom/virstor/g_virstor.c	optional geom_virstor
 geom/virstor/g_virstor_md.c	optional geom_virstor
 geom/zero/g_zero.c		optional geom_zero
 fs/ext2fs/ext2_alloc.c		optional ext2fs
 fs/ext2fs/ext2_balloc.c		optional ext2fs
 fs/ext2fs/ext2_bmap.c		optional ext2fs
 fs/ext2fs/ext2_extents.c	optional ext2fs
 fs/ext2fs/ext2_inode.c		optional ext2fs
 fs/ext2fs/ext2_inode_cnv.c	optional ext2fs
 fs/ext2fs/ext2_lookup.c		optional ext2fs
 fs/ext2fs/ext2_subr.c		optional ext2fs
 fs/ext2fs/ext2_vfsops.c		optional ext2fs
 fs/ext2fs/ext2_vnops.c		optional ext2fs
 gnu/fs/reiserfs/reiserfs_hashes.c	optional reiserfs \
 	warning "kernel contains GPL contaminated ReiserFS filesystem"
 gnu/fs/reiserfs/reiserfs_inode.c	optional reiserfs
 gnu/fs/reiserfs/reiserfs_item_ops.c	optional reiserfs
 gnu/fs/reiserfs/reiserfs_namei.c	optional reiserfs
 gnu/fs/reiserfs/reiserfs_prints.c	optional reiserfs
 gnu/fs/reiserfs/reiserfs_stree.c	optional reiserfs
 gnu/fs/reiserfs/reiserfs_vfsops.c	optional reiserfs
 gnu/fs/reiserfs/reiserfs_vnops.c	optional reiserfs
 #
 isa/isa_if.m			standard
 isa/isa_common.c		optional isa
 isa/isahint.c			optional isa
 isa/pnp.c			optional isa isapnp
 isa/pnpparse.c			optional isa isapnp
 fs/cd9660/cd9660_bmap.c	optional cd9660
 fs/cd9660/cd9660_lookup.c	optional cd9660
 fs/cd9660/cd9660_node.c	optional cd9660
 fs/cd9660/cd9660_rrip.c	optional cd9660
 fs/cd9660/cd9660_util.c	optional cd9660
 fs/cd9660/cd9660_vfsops.c	optional cd9660
 fs/cd9660/cd9660_vnops.c	optional cd9660
 fs/cd9660/cd9660_iconv.c	optional cd9660_iconv
 kern/bus_if.m			standard
 kern/clock_if.m			standard
 kern/cpufreq_if.m		standard
 kern/device_if.m		standard
 kern/imgact_binmisc.c		optional	imagact_binmisc
 kern/imgact_elf.c		standard
 kern/imgact_elf32.c		optional compat_freebsd32
 kern/imgact_shell.c		standard
 kern/inflate.c			optional gzip
 kern/init_main.c		standard
 kern/init_sysent.c		standard
 kern/ksched.c			optional _kposix_priority_scheduling
 kern/kern_acct.c		standard
 kern/kern_alq.c			optional alq
 kern/kern_clock.c		standard
 kern/kern_condvar.c		standard
 kern/kern_conf.c		standard
 kern/kern_cons.c		standard
 kern/kern_cpu.c			standard
 kern/kern_cpuset.c		standard
 kern/kern_context.c		standard
 kern/kern_descrip.c		standard
 kern/kern_dtrace.c		optional kdtrace_hooks
 kern/kern_dump.c		standard
 kern/kern_environment.c		standard
 kern/kern_et.c			standard
 kern/kern_event.c		standard
 kern/kern_exec.c		standard
 kern/kern_exit.c		standard
 kern/kern_fail.c		standard
 kern/kern_ffclock.c		standard
 kern/kern_fork.c		standard
 kern/kern_gzio.c		optional gzio
 kern/kern_hhook.c		standard
 kern/kern_idle.c		standard
 kern/kern_intr.c		standard
 kern/kern_jail.c		standard
 kern/kern_khelp.c		standard
 kern/kern_kthread.c		standard
 kern/kern_ktr.c			optional ktr
 kern/kern_ktrace.c		standard
 kern/kern_linker.c		standard
 kern/kern_lock.c		standard
 kern/kern_lockf.c		standard
 kern/kern_lockstat.c		optional kdtrace_hooks
 kern/kern_loginclass.c		standard
 kern/kern_malloc.c		standard
 kern/kern_mbuf.c		standard
 kern/kern_mib.c			standard
 kern/kern_module.c		standard
 kern/kern_mtxpool.c		standard
 kern/kern_mutex.c		standard
 kern/kern_ntptime.c		standard
 kern/kern_osd.c			standard
 kern/kern_physio.c		standard
 kern/kern_pmc.c			standard
 kern/kern_poll.c		optional device_polling
 kern/kern_priv.c		standard
 kern/kern_proc.c		standard
 kern/kern_procctl.c		standard
 kern/kern_prot.c		standard
 kern/kern_racct.c		standard
 kern/kern_rangelock.c		standard
 kern/kern_rctl.c		standard
 kern/kern_resource.c		standard
 kern/kern_rmlock.c		standard
 kern/kern_rwlock.c		standard
 kern/kern_sdt.c			optional kdtrace_hooks
 kern/kern_sema.c		standard
 kern/kern_sharedpage.c		standard
 kern/kern_shutdown.c		standard
 kern/kern_sig.c			standard
 kern/kern_switch.c		standard
 kern/kern_sx.c			standard
 kern/kern_synch.c		standard
 kern/kern_syscalls.c		standard
 kern/kern_sysctl.c		standard
 kern/kern_tc.c			standard
 kern/kern_thr.c			standard
 kern/kern_thread.c		standard
 kern/kern_time.c		standard
 kern/kern_timeout.c		standard
 kern/kern_umtx.c		standard
 kern/kern_uuid.c		standard
 kern/kern_xxx.c			standard
 kern/link_elf.c			standard
 kern/linker_if.m		standard
 kern/md4c.c			optional netsmb
 kern/md5c.c			standard
 kern/p1003_1b.c			standard
 kern/posix4_mib.c		standard
 kern/sched_4bsd.c		optional sched_4bsd
 kern/sched_ule.c		optional sched_ule
 kern/serdev_if.m		standard
 kern/stack_protector.c		standard \
 	compile-with "${NORMAL_C:N-fstack-protector*}"
 kern/subr_acl_nfs4.c		optional ufs_acl | zfs
 kern/subr_acl_posix1e.c		optional ufs_acl
 kern/subr_autoconf.c		standard
 kern/subr_blist.c		standard
 kern/subr_bus.c			standard
 kern/subr_bus_dma.c		standard
 kern/subr_bufring.c		standard
 kern/subr_capability.c		standard
 kern/subr_clock.c		standard
 kern/subr_counter.c		standard
 kern/subr_devstat.c		standard
 kern/subr_disk.c		standard
 kern/subr_dnvlist.c		standard
 kern/subr_eventhandler.c	standard
 kern/subr_fattime.c		standard
 kern/subr_firmware.c		optional firmware
 kern/subr_hash.c		standard
 kern/subr_hints.c		standard
 kern/subr_kdb.c			standard
 kern/subr_kobj.c		standard
 kern/subr_lock.c		standard
 kern/subr_log.c			standard
 kern/subr_mbpool.c		optional libmbpool
 kern/subr_mchain.c		optional libmchain
 kern/subr_module.c		standard
 kern/subr_msgbuf.c		standard
 kern/subr_nvlist.c		standard
 kern/subr_nvpair.c		standard
 kern/subr_param.c		standard
 kern/subr_pcpu.c		standard
 kern/subr_pctrie.c		standard
 kern/subr_power.c		standard
 kern/subr_prf.c			standard
 kern/subr_prof.c		standard
 kern/subr_rman.c		standard
 kern/subr_rtc.c			standard
 kern/subr_sbuf.c		standard
 kern/subr_scanf.c		standard
 kern/subr_sglist.c		standard
 kern/subr_sleepqueue.c		standard
 kern/subr_smp.c			standard
 kern/subr_stack.c		optional ddb | stack | ktr
 kern/subr_taskqueue.c		standard
 kern/subr_terminal.c		optional vt
 kern/subr_trap.c		standard
 kern/subr_turnstile.c		standard
 kern/subr_uio.c			standard
 kern/subr_unit.c		standard
 kern/subr_vmem.c		standard
 kern/subr_witness.c		optional witness
 kern/sys_capability.c		standard
 kern/sys_generic.c		standard
 kern/sys_pipe.c			standard
 kern/sys_procdesc.c		standard
 kern/sys_process.c		standard
 kern/sys_socket.c		standard
 kern/syscalls.c			standard
 kern/sysv_ipc.c			standard
 kern/sysv_msg.c			optional sysvmsg
 kern/sysv_sem.c			optional sysvsem
 kern/sysv_shm.c			optional sysvshm
 kern/tty.c			standard
 kern/tty_compat.c		optional compat_43tty
 kern/tty_info.c			standard
 kern/tty_inq.c			standard
 kern/tty_outq.c			standard
 kern/tty_pts.c			standard
 kern/tty_tty.c			standard
 kern/tty_ttydisc.c		standard
 kern/uipc_accf.c		standard
 kern/uipc_debug.c		optional ddb
 kern/uipc_domain.c		standard
 kern/uipc_mbuf.c		standard
 kern/uipc_mbuf2.c		standard
 kern/uipc_mbufhash.c		standard
 kern/uipc_mqueue.c		optional p1003_1b_mqueue
 kern/uipc_sem.c			optional p1003_1b_semaphores
 kern/uipc_shm.c			standard
 kern/uipc_sockbuf.c		standard
 kern/uipc_socket.c		standard
 kern/uipc_syscalls.c		standard
 kern/uipc_usrreq.c		standard
 kern/vfs_acl.c			standard
 kern/vfs_aio.c			optional vfs_aio
 kern/vfs_bio.c			standard
 kern/vfs_cache.c		standard
 kern/vfs_cluster.c		standard
 kern/vfs_default.c		standard
 kern/vfs_export.c		standard
 kern/vfs_extattr.c		standard
 kern/vfs_hash.c			standard
 kern/vfs_init.c			standard
 kern/vfs_lookup.c		standard
 kern/vfs_mount.c		standard
 kern/vfs_mountroot.c		standard
 kern/vfs_subr.c			standard
 kern/vfs_syscalls.c		standard
 kern/vfs_vnops.c		standard
 #
 # Kernel GSS-API
 #
 gssd.h				optional kgssapi			\
 	dependency		"$S/kgssapi/gssd.x"			\
 	compile-with		"RPCGEN_CPP='${CPP}' rpcgen -hM $S/kgssapi/gssd.x | grep -v pthread.h > gssd.h" \
 	no-obj no-implicit-rule before-depend local			\
 	clean			"gssd.h"
 gssd_xdr.c			optional kgssapi			\
 	dependency		"$S/kgssapi/gssd.x gssd.h"		\
 	compile-with		"RPCGEN_CPP='${CPP}' rpcgen -c $S/kgssapi/gssd.x -o gssd_xdr.c" \
 	no-implicit-rule before-depend local				\
 	clean			"gssd_xdr.c"
 gssd_clnt.c			optional kgssapi			\
 	dependency		"$S/kgssapi/gssd.x gssd.h"		\
 	compile-with		"RPCGEN_CPP='${CPP}' rpcgen -lM $S/kgssapi/gssd.x | grep -v string.h > gssd_clnt.c" \
 	no-implicit-rule before-depend local				\
 	clean			"gssd_clnt.c"
 kgssapi/gss_accept_sec_context.c optional kgssapi
 kgssapi/gss_add_oid_set_member.c optional kgssapi
 kgssapi/gss_acquire_cred.c	optional kgssapi
 kgssapi/gss_canonicalize_name.c	optional kgssapi
 kgssapi/gss_create_empty_oid_set.c optional kgssapi
 kgssapi/gss_delete_sec_context.c optional kgssapi
 kgssapi/gss_display_status.c	optional kgssapi
 kgssapi/gss_export_name.c	optional kgssapi
 kgssapi/gss_get_mic.c		optional kgssapi
 kgssapi/gss_init_sec_context.c	optional kgssapi
 kgssapi/gss_impl.c		optional kgssapi
 kgssapi/gss_import_name.c	optional kgssapi
 kgssapi/gss_names.c		optional kgssapi
 kgssapi/gss_pname_to_uid.c	optional kgssapi
 kgssapi/gss_release_buffer.c	optional kgssapi
 kgssapi/gss_release_cred.c	optional kgssapi
 kgssapi/gss_release_name.c	optional kgssapi
 kgssapi/gss_release_oid_set.c	optional kgssapi
 kgssapi/gss_set_cred_option.c	optional kgssapi
 kgssapi/gss_test_oid_set_member.c optional kgssapi
 kgssapi/gss_unwrap.c		optional kgssapi
 kgssapi/gss_verify_mic.c	optional kgssapi
 kgssapi/gss_wrap.c		optional kgssapi
 kgssapi/gss_wrap_size_limit.c	optional kgssapi
 kgssapi/gssd_prot.c		optional kgssapi
 kgssapi/krb5/krb5_mech.c	optional kgssapi
 kgssapi/krb5/kcrypto.c		optional kgssapi
 kgssapi/krb5/kcrypto_aes.c	optional kgssapi
 kgssapi/krb5/kcrypto_arcfour.c	optional kgssapi
 kgssapi/krb5/kcrypto_des.c	optional kgssapi
 kgssapi/krb5/kcrypto_des3.c	optional kgssapi
 kgssapi/kgss_if.m		optional kgssapi
 kgssapi/gsstest.c		optional kgssapi_debug
 # These files in libkern/ are those needed by all architectures.  Some
 # of the files in libkern/ are only needed on some architectures, e.g.,
 # libkern/divdi3.c is needed by i386 but not alpha.  Also, some of these
 # routines may be optimized for a particular platform.  In either case,
 # the file should be moved to conf/files.<arch> from here.
 #
 libkern/arc4random.c		standard
 libkern/asprintf.c		standard
 libkern/bcd.c			standard
 libkern/bsearch.c		standard
 libkern/crc32.c			standard
 libkern/explicit_bzero.c	standard
 libkern/fnmatch.c		standard
 libkern/iconv.c			optional libiconv
 libkern/iconv_converter_if.m	optional libiconv
 libkern/iconv_ucs.c		optional libiconv
 libkern/iconv_xlat.c		optional libiconv
 libkern/iconv_xlat16.c		optional libiconv
 libkern/inet_aton.c		standard
 libkern/inet_ntoa.c		standard
 libkern/inet_ntop.c		standard
 libkern/inet_pton.c		standard
 libkern/jenkins_hash.c		standard
 libkern/murmur3_32.c		standard
 libkern/mcount.c		optional profiling-routine
 libkern/memcchr.c		standard
 libkern/memchr.c		standard
 libkern/memcmp.c		standard
 libkern/memmem.c		optional gdb
 libkern/qsort.c			standard
 libkern/qsort_r.c		standard
 libkern/random.c		standard
 libkern/scanc.c			standard
 libkern/strcasecmp.c		standard
 libkern/strcat.c		standard
 libkern/strchr.c		standard
 libkern/strcmp.c		standard
 libkern/strcpy.c		standard
 libkern/strcspn.c		standard
 libkern/strdup.c		standard
 libkern/strndup.c		standard
 libkern/strlcat.c		standard
 libkern/strlcpy.c		standard
 libkern/strlen.c		standard
 libkern/strncmp.c		standard
 libkern/strncpy.c		standard
 libkern/strnlen.c		standard
 libkern/strrchr.c		standard
 libkern/strsep.c		standard
 libkern/strspn.c		standard
 libkern/strstr.c		standard
 libkern/strtol.c		standard
 libkern/strtoq.c		standard
 libkern/strtoul.c		standard
 libkern/strtouq.c		standard
 libkern/strvalid.c		standard
 libkern/timingsafe_bcmp.c	standard
 libkern/zlib.c			optional crypto | geom_uzip | ipsec | \
 					 mxge | netgraph_deflate | \
 					 ddb_ctf | gzio | geom_uncompress
 net/altq/altq_cbq.c		optional altq
 net/altq/altq_cdnr.c		optional altq
 net/altq/altq_hfsc.c		optional altq
 net/altq/altq_priq.c		optional altq
 net/altq/altq_red.c		optional altq
 net/altq/altq_rio.c		optional altq
 net/altq/altq_rmclass.c		optional altq
 net/altq/altq_subr.c		optional altq
 net/bpf.c			standard
 net/bpf_buffer.c		optional bpf
 net/bpf_jitter.c		optional bpf_jitter
 net/bpf_filter.c		optional bpf | netgraph_bpf
 net/bpf_zerocopy.c		optional bpf
 net/bridgestp.c			optional bridge | if_bridge
 net/flowtable.c			optional flowtable inet | flowtable inet6
 net/ieee8023ad_lacp.c		optional lagg
 net/if.c			standard
 net/if_arcsubr.c		optional arcnet
 net/if_atmsubr.c		optional atm
 net/if_bridge.c			optional bridge inet | if_bridge inet
 net/if_clone.c			standard
 net/if_dead.c			standard
 net/if_debug.c			optional ddb
 net/if_disc.c			optional disc
 net/if_edsc.c			optional edsc
 net/if_enc.c			optional enc ipsec inet | enc ipsec inet6
 net/if_epair.c			optional epair
 net/if_ethersubr.c		optional ether
 net/if_fddisubr.c		optional fddi
 net/if_fwsubr.c			optional fwip
 net/if_gif.c			optional gif inet | gif inet6 | \
 					 netgraph_gif inet | netgraph_gif inet6
 net/if_gre.c			optional gre inet | gre inet6
 net/if_iso88025subr.c		optional token
 net/if_lagg.c			optional lagg
 net/if_loop.c			optional loop
 net/if_llatbl.c			standard
 net/if_me.c			optional me inet
 net/if_media.c			standard
 net/if_mib.c			standard
 net/if_spppfr.c			optional sppp | netgraph_sppp
 net/if_spppsubr.c		optional sppp | netgraph_sppp
 net/if_stf.c			optional stf inet inet6
 net/if_tun.c			optional tun
 net/if_tap.c			optional tap
 net/if_vlan.c			optional vlan
 net/if_vxlan.c			optional vxlan inet | vxlan inet6
 net/mppcc.c			optional netgraph_mppc_compression
 net/mppcd.c			optional netgraph_mppc_compression
 net/netisr.c			standard
 net/pfil.c			optional ether | inet
 net/radix.c			standard
 net/radix_mpath.c		standard
 net/raw_cb.c			standard
 net/raw_usrreq.c		standard
 net/route.c			standard
 net/rss_config.c		optional inet rss | inet6 rss
 net/rtsock.c			standard
 net/slcompress.c		optional netgraph_vjc | sppp | \
 					 netgraph_sppp
 net/toeplitz.c			optional inet rss | inet6 rss
 net/vnet.c			optional vimage
 net80211/ieee80211.c		optional wlan
 net80211/ieee80211_acl.c	optional wlan wlan_acl
 net80211/ieee80211_action.c	optional wlan
 net80211/ieee80211_ageq.c	optional wlan
 net80211/ieee80211_adhoc.c	optional wlan \
 	compile-with "${NORMAL_C} -Wno-unused-function"
 net80211/ieee80211_ageq.c	optional wlan
 net80211/ieee80211_amrr.c	optional wlan | wlan_amrr
 net80211/ieee80211_crypto.c	optional wlan \
 	compile-with "${NORMAL_C} -Wno-unused-function"
 net80211/ieee80211_crypto_ccmp.c optional wlan wlan_ccmp
 net80211/ieee80211_crypto_none.c optional wlan
 net80211/ieee80211_crypto_tkip.c optional wlan wlan_tkip
 net80211/ieee80211_crypto_wep.c	optional wlan wlan_wep
 net80211/ieee80211_ddb.c	optional wlan ddb
 net80211/ieee80211_dfs.c	optional wlan
 net80211/ieee80211_freebsd.c	optional wlan
 net80211/ieee80211_hostap.c	optional wlan \
 	compile-with "${NORMAL_C} -Wno-unused-function"
 net80211/ieee80211_ht.c		optional wlan
 net80211/ieee80211_hwmp.c	optional wlan ieee80211_support_mesh
 net80211/ieee80211_input.c	optional wlan
 net80211/ieee80211_ioctl.c	optional wlan
 net80211/ieee80211_mesh.c	optional wlan ieee80211_support_mesh \
 	compile-with "${NORMAL_C} -Wno-unused-function"
 net80211/ieee80211_monitor.c	optional wlan
 net80211/ieee80211_node.c	optional wlan
 net80211/ieee80211_output.c	optional wlan
 net80211/ieee80211_phy.c	optional wlan
 net80211/ieee80211_power.c	optional wlan
 net80211/ieee80211_proto.c	optional wlan
 net80211/ieee80211_radiotap.c	optional wlan
 net80211/ieee80211_ratectl.c	optional wlan
 net80211/ieee80211_ratectl_none.c optional wlan
 net80211/ieee80211_regdomain.c	optional wlan
 net80211/ieee80211_rssadapt.c	optional wlan wlan_rssadapt
 net80211/ieee80211_scan.c	optional wlan
 net80211/ieee80211_scan_sta.c	optional wlan
 net80211/ieee80211_sta.c	optional wlan \
 	compile-with "${NORMAL_C} -Wno-unused-function"
 net80211/ieee80211_superg.c	optional wlan ieee80211_support_superg
 net80211/ieee80211_scan_sw.c	optional wlan
 net80211/ieee80211_tdma.c	optional wlan ieee80211_support_tdma
 net80211/ieee80211_wds.c	optional wlan
 net80211/ieee80211_xauth.c	optional wlan wlan_xauth
 net80211/ieee80211_alq.c	optional wlan ieee80211_alq
 netgraph/atm/ccatm/ng_ccatm.c	optional ngatm_ccatm \
 	compile-with "${NORMAL_C} -I$S/contrib/ngatm"
 netgraph/atm/ng_atm.c		optional ngatm_atm
 netgraph/atm/ngatmbase.c	optional ngatm_atmbase \
 	compile-with "${NORMAL_C} -I$S/contrib/ngatm"
 netgraph/atm/sscfu/ng_sscfu.c	optional ngatm_sscfu \
 	compile-with "${NORMAL_C} -I$S/contrib/ngatm"
 netgraph/atm/sscop/ng_sscop.c optional ngatm_sscop \
 	compile-with "${NORMAL_C} -I$S/contrib/ngatm"
 netgraph/atm/uni/ng_uni.c	optional ngatm_uni \
 	compile-with "${NORMAL_C} -I$S/contrib/ngatm"
 netgraph/bluetooth/common/ng_bluetooth.c optional netgraph_bluetooth
 netgraph/bluetooth/drivers/bt3c/ng_bt3c_pccard.c optional netgraph_bluetooth_bt3c
 netgraph/bluetooth/drivers/h4/ng_h4.c optional netgraph_bluetooth_h4
 netgraph/bluetooth/drivers/ubt/ng_ubt.c optional netgraph_bluetooth_ubt usb
 netgraph/bluetooth/drivers/ubtbcmfw/ubtbcmfw.c optional netgraph_bluetooth_ubtbcmfw usb
 netgraph/bluetooth/hci/ng_hci_cmds.c optional netgraph_bluetooth_hci
 netgraph/bluetooth/hci/ng_hci_evnt.c optional netgraph_bluetooth_hci
 netgraph/bluetooth/hci/ng_hci_main.c optional netgraph_bluetooth_hci
 netgraph/bluetooth/hci/ng_hci_misc.c optional netgraph_bluetooth_hci
 netgraph/bluetooth/hci/ng_hci_ulpi.c optional netgraph_bluetooth_hci
 netgraph/bluetooth/l2cap/ng_l2cap_cmds.c optional netgraph_bluetooth_l2cap
 netgraph/bluetooth/l2cap/ng_l2cap_evnt.c optional netgraph_bluetooth_l2cap
 netgraph/bluetooth/l2cap/ng_l2cap_llpi.c optional netgraph_bluetooth_l2cap
 netgraph/bluetooth/l2cap/ng_l2cap_main.c optional netgraph_bluetooth_l2cap
 netgraph/bluetooth/l2cap/ng_l2cap_misc.c optional netgraph_bluetooth_l2cap
 netgraph/bluetooth/l2cap/ng_l2cap_ulpi.c optional netgraph_bluetooth_l2cap
 netgraph/bluetooth/socket/ng_btsocket.c optional netgraph_bluetooth_socket
 netgraph/bluetooth/socket/ng_btsocket_hci_raw.c	optional netgraph_bluetooth_socket
 netgraph/bluetooth/socket/ng_btsocket_l2cap.c optional netgraph_bluetooth_socket
 netgraph/bluetooth/socket/ng_btsocket_l2cap_raw.c optional netgraph_bluetooth_socket
 netgraph/bluetooth/socket/ng_btsocket_rfcomm.c optional netgraph_bluetooth_socket
 netgraph/bluetooth/socket/ng_btsocket_sco.c optional netgraph_bluetooth_socket
 netgraph/netflow/netflow.c	optional netgraph_netflow
 netgraph/netflow/netflow_v9.c	optional netgraph_netflow
 netgraph/netflow/ng_netflow.c	optional netgraph_netflow
 netgraph/ng_UI.c		optional netgraph_UI
 netgraph/ng_async.c		optional netgraph_async
 netgraph/ng_atmllc.c		optional netgraph_atmllc
 netgraph/ng_base.c		optional netgraph
 netgraph/ng_bpf.c		optional netgraph_bpf
 netgraph/ng_bridge.c		optional netgraph_bridge
 netgraph/ng_car.c		optional netgraph_car
 netgraph/ng_cisco.c		optional netgraph_cisco
 netgraph/ng_deflate.c		optional netgraph_deflate
 netgraph/ng_device.c		optional netgraph_device
 netgraph/ng_echo.c		optional netgraph_echo
 netgraph/ng_eiface.c		optional netgraph_eiface
 netgraph/ng_ether.c		optional netgraph_ether
 netgraph/ng_ether_echo.c	optional netgraph_ether_echo
 netgraph/ng_frame_relay.c	optional netgraph_frame_relay
 netgraph/ng_gif.c		optional netgraph_gif inet6 | netgraph_gif inet
 netgraph/ng_gif_demux.c		optional netgraph_gif_demux
 netgraph/ng_hole.c		optional netgraph_hole
 netgraph/ng_iface.c		optional netgraph_iface
 netgraph/ng_ip_input.c		optional netgraph_ip_input
 netgraph/ng_ipfw.c		optional netgraph_ipfw inet ipfirewall
 netgraph/ng_ksocket.c		optional netgraph_ksocket
 netgraph/ng_l2tp.c		optional netgraph_l2tp
 netgraph/ng_lmi.c		optional netgraph_lmi
 netgraph/ng_mppc.c		optional netgraph_mppc_compression | \
 					 netgraph_mppc_encryption
 netgraph/ng_nat.c		optional netgraph_nat inet libalias
 netgraph/ng_one2many.c		optional netgraph_one2many
 netgraph/ng_parse.c		optional netgraph
 netgraph/ng_patch.c		optional netgraph_patch
 netgraph/ng_pipe.c		optional netgraph_pipe
 netgraph/ng_ppp.c		optional netgraph_ppp
 netgraph/ng_pppoe.c		optional netgraph_pppoe
 netgraph/ng_pptpgre.c		optional netgraph_pptpgre
 netgraph/ng_pred1.c		optional netgraph_pred1
 netgraph/ng_rfc1490.c		optional netgraph_rfc1490
 netgraph/ng_socket.c		optional netgraph_socket
 netgraph/ng_split.c		optional netgraph_split
 netgraph/ng_sppp.c		optional netgraph_sppp
 netgraph/ng_tag.c		optional netgraph_tag
 netgraph/ng_tcpmss.c		optional netgraph_tcpmss
 netgraph/ng_tee.c		optional netgraph_tee
 netgraph/ng_tty.c		optional netgraph_tty
 netgraph/ng_vjc.c		optional netgraph_vjc
 netgraph/ng_vlan.c		optional netgraph_vlan
 netinet/accf_data.c		optional accept_filter_data inet
 netinet/accf_dns.c		optional accept_filter_dns inet
 netinet/accf_http.c		optional accept_filter_http inet
 netinet/if_atm.c		optional atm
 netinet/if_ether.c		optional inet ether
 netinet/igmp.c			optional inet
 netinet/in.c			optional inet
 netinet/in_debug.c		optional inet ddb
 netinet/in_kdtrace.c		optional inet | inet6
 netinet/ip_carp.c		optional inet carp | inet6 carp
 netinet/in_gif.c		optional gif inet | netgraph_gif inet
 netinet/ip_gre.c		optional gre inet
 netinet/ip_id.c			optional inet
 netinet/in_mcast.c		optional inet
 netinet/in_pcb.c		optional inet | inet6
 netinet/in_pcbgroup.c		optional inet pcbgroup | inet6 pcbgroup
 netinet/in_proto.c		optional inet | inet6
 netinet/in_rmx.c		optional inet
 netinet/in_rss.c		optional inet rss
 netinet/ip_divert.c		optional inet ipdivert ipfirewall
 netinet/ip_ecn.c		optional inet | inet6
 netinet/ip_encap.c		optional inet | inet6
 netinet/ip_fastfwd.c		optional inet
 netinet/ip_icmp.c		optional inet | inet6
 netinet/ip_input.c		optional inet
 netinet/ip_ipsec.c		optional inet ipsec
 netinet/ip_mroute.c		optional mrouting inet
 netinet/ip_options.c		optional inet
 netinet/ip_output.c		optional inet
 netinet/ip_reass.c		optional inet
 netinet/raw_ip.c		optional inet | inet6
 netinet/cc/cc.c			optional inet | inet6
 netinet/cc/cc_newreno.c		optional inet | inet6
 netinet/sctp_asconf.c		optional inet sctp | inet6 sctp
 netinet/sctp_auth.c		optional inet sctp | inet6 sctp
 netinet/sctp_bsd_addr.c		optional inet sctp | inet6 sctp
 netinet/sctp_cc_functions.c	optional inet sctp | inet6 sctp
 netinet/sctp_crc32.c		optional inet sctp | inet6 sctp
 netinet/sctp_indata.c		optional inet sctp | inet6 sctp
 netinet/sctp_input.c		optional inet sctp | inet6 sctp
 netinet/sctp_output.c		optional inet sctp | inet6 sctp
 netinet/sctp_pcb.c		optional inet sctp | inet6 sctp
 netinet/sctp_peeloff.c		optional inet sctp | inet6 sctp
 netinet/sctp_ss_functions.c	optional inet sctp | inet6 sctp
 netinet/sctp_syscalls.c		optional inet sctp | inet6 sctp
 netinet/sctp_sysctl.c		optional inet sctp | inet6 sctp
 netinet/sctp_timer.c		optional inet sctp | inet6 sctp
 netinet/sctp_usrreq.c		optional inet sctp | inet6 sctp
 netinet/sctputil.c		optional inet sctp | inet6 sctp
 netinet/siftr.c			optional inet siftr alq | inet6 siftr alq
 netinet/tcp_debug.c		optional tcpdebug
 netinet/tcp_hostcache.c		optional inet | inet6
 netinet/tcp_input.c		optional inet | inet6
 netinet/tcp_lro.c		optional inet | inet6
 netinet/tcp_output.c		optional inet | inet6
 netinet/tcp_offload.c		optional tcp_offload inet | tcp_offload inet6
 netinet/tcp_reass.c		optional inet | inet6
 netinet/tcp_sack.c		optional inet | inet6
 netinet/tcp_subr.c		optional inet | inet6
 netinet/tcp_syncache.c		optional inet | inet6
 netinet/tcp_timer.c		optional inet | inet6
 netinet/tcp_timewait.c		optional inet | inet6
 netinet/tcp_usrreq.c		optional inet | inet6
 netinet/udp_usrreq.c		optional inet | inet6
 netinet/libalias/alias.c	optional libalias inet | netgraph_nat inet
 netinet/libalias/alias_db.c	optional libalias inet | netgraph_nat inet
 netinet/libalias/alias_mod.c	optional libalias | netgraph_nat
 netinet/libalias/alias_proxy.c	optional libalias inet | netgraph_nat inet
 netinet/libalias/alias_util.c	optional libalias inet | netgraph_nat inet
 netinet/libalias/alias_sctp.c	optional libalias inet | netgraph_nat inet
 netinet6/dest6.c		optional inet6
 netinet6/frag6.c		optional inet6
 netinet6/icmp6.c		optional inet6
 netinet6/in6.c			optional inet6
 netinet6/in6_cksum.c		optional inet6
 netinet6/in6_gif.c		optional gif inet6 | netgraph_gif inet6
 netinet6/in6_ifattach.c		optional inet6
 netinet6/in6_mcast.c		optional inet6
 netinet6/in6_pcb.c		optional inet6
 netinet6/in6_pcbgroup.c		optional inet6 pcbgroup
 netinet6/in6_proto.c		optional inet6
 netinet6/in6_rmx.c		optional inet6
 netinet6/in6_rss.c		optional inet6 rss
 netinet6/in6_src.c		optional inet6
 netinet6/ip6_forward.c		optional inet6
 netinet6/ip6_gre.c		optional gre inet6
 netinet6/ip6_id.c		optional inet6
 netinet6/ip6_input.c		optional inet6
 netinet6/ip6_mroute.c		optional mrouting inet6
 netinet6/ip6_output.c		optional inet6
 netinet6/ip6_ipsec.c		optional inet6 ipsec
 netinet6/mld6.c			optional inet6
 netinet6/nd6.c			optional inet6
 netinet6/nd6_nbr.c		optional inet6
 netinet6/nd6_rtr.c		optional inet6
 netinet6/raw_ip6.c		optional inet6
 netinet6/route6.c		optional inet6
 netinet6/scope6.c		optional inet6
 netinet6/sctp6_usrreq.c		optional inet6 sctp
 netinet6/udp6_usrreq.c		optional inet6
 netipsec/ipsec.c		optional ipsec inet | ipsec inet6
 netipsec/ipsec_input.c		optional ipsec inet | ipsec inet6
 netipsec/ipsec_mbuf.c		optional ipsec inet | ipsec inet6
 netipsec/ipsec_output.c		optional ipsec inet | ipsec inet6
 netipsec/key.c			optional ipsec inet | ipsec inet6
 netipsec/key_debug.c		optional ipsec inet | ipsec inet6
 netipsec/keysock.c		optional ipsec inet | ipsec inet6
 netipsec/xform_ah.c		optional ipsec inet | ipsec inet6
 netipsec/xform_esp.c		optional ipsec inet | ipsec inet6
 netipsec/xform_ipcomp.c		optional ipsec inet | ipsec inet6
 netipsec/xform_tcp.c		optional ipsec inet tcp_signature | \
 					 ipsec inet6 tcp_signature
 netnatm/natm.c			optional natm
 netnatm/natm_pcb.c		optional natm
 netnatm/natm_proto.c		optional natm
 netpfil/ipfw/dn_heap.c		optional inet dummynet
 netpfil/ipfw/dn_sched_fifo.c	optional inet dummynet
 netpfil/ipfw/dn_sched_prio.c	optional inet dummynet
 netpfil/ipfw/dn_sched_qfq.c	optional inet dummynet
 netpfil/ipfw/dn_sched_rr.c	optional inet dummynet
 netpfil/ipfw/dn_sched_wf2q.c	optional inet dummynet
 netpfil/ipfw/ip_dummynet.c	optional inet dummynet
 netpfil/ipfw/ip_dn_io.c		optional inet dummynet
 netpfil/ipfw/ip_dn_glue.c	optional inet dummynet
 netpfil/ipfw/ip_fw2.c		optional inet ipfirewall
 netpfil/ipfw/ip_fw_dynamic.c	optional inet ipfirewall
 netpfil/ipfw/ip_fw_log.c	optional inet ipfirewall
 netpfil/ipfw/ip_fw_pfil.c	optional inet ipfirewall
 netpfil/ipfw/ip_fw_sockopt.c	optional inet ipfirewall
 netpfil/ipfw/ip_fw_table.c	optional inet ipfirewall
 netpfil/ipfw/ip_fw_table_algo.c	optional inet ipfirewall
 netpfil/ipfw/ip_fw_table_value.c	optional inet ipfirewall
 netpfil/ipfw/ip_fw_iface.c	optional inet ipfirewall
 netpfil/ipfw/ip_fw_nat.c	optional inet ipfirewall_nat
 netpfil/pf/if_pflog.c		optional pflog pf inet
 netpfil/pf/if_pfsync.c		optional pfsync pf inet
 netpfil/pf/pf.c			optional pf inet
 netpfil/pf/pf_if.c		optional pf inet
 netpfil/pf/pf_ioctl.c		optional pf inet
 netpfil/pf/pf_lb.c		optional pf inet
 netpfil/pf/pf_norm.c		optional pf inet
 netpfil/pf/pf_osfp.c		optional pf inet
 netpfil/pf/pf_ruleset.c		optional pf inet
 netpfil/pf/pf_table.c		optional pf inet
 netpfil/pf/in4_cksum.c		optional pf inet
 netsmb/smb_conn.c		optional netsmb
 netsmb/smb_crypt.c		optional netsmb
 netsmb/smb_dev.c		optional netsmb
 netsmb/smb_iod.c		optional netsmb
 netsmb/smb_rq.c			optional netsmb
 netsmb/smb_smb.c		optional netsmb
 netsmb/smb_subr.c		optional netsmb
 netsmb/smb_trantcp.c		optional netsmb
 netsmb/smb_usr.c		optional netsmb
 nfs/bootp_subr.c		optional bootp nfscl
 nfs/krpc_subr.c			optional bootp nfscl
 nfs/nfs_diskless.c		optional nfscl nfs_root
 nfs/nfs_fha.c			optional nfsd
 nfs/nfs_lock.c			optional nfscl | nfslockd | nfsd
 nfs/nfs_nfssvc.c		optional nfscl | nfsd
 nlm/nlm_advlock.c		optional nfslockd | nfsd
 nlm/nlm_prot_clnt.c		optional nfslockd | nfsd
 nlm/nlm_prot_impl.c		optional nfslockd | nfsd
 nlm/nlm_prot_server.c		optional nfslockd | nfsd
 nlm/nlm_prot_svc.c		optional nfslockd | nfsd
 nlm/nlm_prot_xdr.c		optional nfslockd | nfsd
 nlm/sm_inter_xdr.c		optional nfslockd | nfsd
 
 # Linux Kernel Compatibility API
 ofed/include/linux/linux_kmod.c			optional ofed | compat_linuxapi \
 	no-depend compile-with "${OFED_C}"
 ofed/include/linux/linux_compat.c		optional ofed | compat_linuxapi \
 	no-depend compile-with "${OFED_C}"
 ofed/include/linux/linux_pci.c			optional ofed | compat_linuxapi \
 	no-depend compile-with "${OFED_C}"
 ofed/include/linux/linux_idr.c			optional ofed | compat_linuxapi \
 	no-depend compile-with "${OFED_C}"
 ofed/include/linux/linux_radix.c		optional ofed | compat_linuxapi \
 	no-depend compile-with "${OFED_C}"
 # OpenFabrics Enterprise Distribution (Infiniband)
 ofed/drivers/infiniband/core/addr.c		optional ofed		\
 	no-depend							\
 	compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/"
 ofed/drivers/infiniband/core/agent.c		optional ofed		\
 	no-depend							\
 	compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/"
 ofed/drivers/infiniband/core/cache.c		optional ofed		\
 	no-depend							\
 	compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/"
 # XXX Mad.c must be ordered before cm.c for sysinit sets to occur in
 # the correct order.
 ofed/drivers/infiniband/core/mad.c		optional ofed		\
 	no-depend							\
 	compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/"
 ofed/drivers/infiniband/core/cm.c		optional ofed		\
 	no-depend							\
 	compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/ -Wno-unused-function"
 ofed/drivers/infiniband/core/cma.c		optional ofed		\
 	no-depend							\
 	compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/"
 ofed/drivers/infiniband/core/device.c		optional ofed		\
 	no-depend							\
 	compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/"
 ofed/drivers/infiniband/core/fmr_pool.c		optional ofed		\
 	no-depend							\
 	compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/"
 ofed/drivers/infiniband/core/iwcm.c		optional ofed		\
 	no-depend							\
 	compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/"
 ofed/drivers/infiniband/core/mad_rmpp.c		optional ofed		\
 	no-depend							\
 	compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/"
 ofed/drivers/infiniband/core/multicast.c	optional ofed		\
 	no-depend							\
 	compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/"
 ofed/drivers/infiniband/core/packer.c		optional ofed		\
 	no-depend							\
 	compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/"
 ofed/drivers/infiniband/core/peer_mem.c		optional ofed		\
 	no-depend							\
 	compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/"
 ofed/drivers/infiniband/core/sa_query.c		optional ofed		\
 	no-depend							\
 	compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/"
 ofed/drivers/infiniband/core/smi.c		optional ofed		\
 	no-depend							\
 	compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/"
 ofed/drivers/infiniband/core/sysfs.c		optional ofed		\
 	no-depend							\
 	compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/"
 ofed/drivers/infiniband/core/ucm.c		optional ofed		\
 	no-depend							\
 	compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/"
 ofed/drivers/infiniband/core/ucma.c		optional ofed		\
 	no-depend							\
 	compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/"
 ofed/drivers/infiniband/core/ud_header.c	optional ofed		\
 	no-depend							\
 	compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/"
 ofed/drivers/infiniband/core/umem.c		optional ofed		\
 	no-depend							\
 	compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/"
 ofed/drivers/infiniband/core/user_mad.c		optional ofed		\
 	no-depend							\
 	compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/"
 ofed/drivers/infiniband/core/uverbs_cmd.c	optional ofed		\
 	no-depend							\
 	compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/"
 ofed/drivers/infiniband/core/uverbs_main.c	optional ofed		\
 	no-depend							\
 	compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/"
 ofed/drivers/infiniband/core/uverbs_marshall.c	optional ofed		\
 	no-depend							\
 	compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/"
 ofed/drivers/infiniband/core/verbs.c		optional ofed		\
 	no-depend							\
 	compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/"
 
 ofed/drivers/infiniband/ulp/ipoib/ipoib_cm.c	optional ipoib		\
 	no-depend							\
 	compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/ulp/ipoib/"
 #ofed/drivers/infiniband/ulp/ipoib/ipoib_fs.c	optional ipoib		\
 #	no-depend							\
 #	compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/ulp/ipoib/"
 ofed/drivers/infiniband/ulp/ipoib/ipoib_ib.c	optional ipoib		\
 	no-depend							\
 	compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/ulp/ipoib/"
 ofed/drivers/infiniband/ulp/ipoib/ipoib_main.c	optional ipoib		\
 	no-depend							\
 	compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/ulp/ipoib/"
 ofed/drivers/infiniband/ulp/ipoib/ipoib_multicast.c	optional ipoib	\
 	no-depend							\
 	compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/ulp/ipoib/"
 ofed/drivers/infiniband/ulp/ipoib/ipoib_verbs.c	optional ipoib		\
 	no-depend							\
 	compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/ulp/ipoib/"
 #ofed/drivers/infiniband/ulp/ipoib/ipoib_vlan.c	optional ipoib		\
 #	no-depend							\
 #	compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/ulp/ipoib/"
 
 ofed/drivers/infiniband/ulp/sdp/sdp_bcopy.c	optional sdp inet	\
 	no-depend							\
 	compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/ulp/sdp/"
 ofed/drivers/infiniband/ulp/sdp/sdp_main.c	optional sdp inet 	\
 	no-depend							\
 	compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/ulp/sdp/"
 ofed/drivers/infiniband/ulp/sdp/sdp_rx.c	optional sdp inet 	\
 	no-depend							\
 	compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/ulp/sdp/"
 ofed/drivers/infiniband/ulp/sdp/sdp_cma.c	optional sdp inet 	\
 	no-depend							\
 	compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/ulp/sdp/"
 ofed/drivers/infiniband/ulp/sdp/sdp_tx.c	optional sdp inet 	\
 	no-depend							\
 	compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/ulp/sdp/"
 
 ofed/drivers/infiniband/hw/mlx4/alias_GUID.c    optional mlx4ib         \
         no-depend obj-prefix "mlx4ib_"                                  \
         compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/infiniband/hw/mlx4/"
 ofed/drivers/infiniband/hw/mlx4/mcg.c           optional mlx4ib         \
         no-depend obj-prefix "mlx4ib_"                                  \
         compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/infiniband/hw/mlx4/"
 ofed/drivers/infiniband/hw/mlx4/sysfs.c         optional mlx4ib         \
         no-depend obj-prefix "mlx4ib_"                                  \
         compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/infiniband/hw/mlx4/"
 ofed/drivers/infiniband/hw/mlx4/cm.c            optional mlx4ib         \
         no-depend obj-prefix "mlx4ib_"                                  \
         compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/infiniband/hw/mlx4/"
 ofed/drivers/infiniband/hw/mlx4/ah.c		optional mlx4ib		\
 	no-depend obj-prefix "mlx4ib_"					\
 	compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/infiniband/hw/mlx4/"
 ofed/drivers/infiniband/hw/mlx4/cq.c		optional mlx4ib		\
 	no-depend obj-prefix "mlx4ib_"					\
 	compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/infiniband/hw/mlx4/"
 ofed/drivers/infiniband/hw/mlx4/doorbell.c	optional mlx4ib		\
 	no-depend obj-prefix "mlx4ib_"					\
 	compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/infiniband/hw/mlx4/"
 ofed/drivers/infiniband/hw/mlx4/mad.c		optional mlx4ib		\
 	no-depend obj-prefix "mlx4ib_"					\
 	compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/infiniband/hw/mlx4/"
 ofed/drivers/infiniband/hw/mlx4/main.c		optional mlx4ib		\
 	no-depend obj-prefix "mlx4ib_"					\
 	compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/infiniband/hw/mlx4/"
 ofed/drivers/infiniband/hw/mlx4/mlx4_exp.c	optional mlx4ib		\
 	no-depend obj-prefix "mlx4ib_"					\
 	compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/infiniband/hw/mlx4/"
 ofed/drivers/infiniband/hw/mlx4/mr.c		optional mlx4ib		\
 	no-depend obj-prefix "mlx4ib_"					\
 	compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/infiniband/hw/mlx4/"
 ofed/drivers/infiniband/hw/mlx4/qp.c		optional mlx4ib		\
 	no-depend obj-prefix "mlx4ib_"					\
 	compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/infiniband/hw/mlx4/"
 ofed/drivers/infiniband/hw/mlx4/srq.c		optional mlx4ib		\
 	no-depend obj-prefix "mlx4ib_"					\
 	compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/infiniband/hw/mlx4/"
 ofed/drivers/infiniband/hw/mlx4/wc.c		optional mlx4ib		\
 	no-depend obj-prefix "mlx4ib_"					\
 	compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/infiniband/hw/mlx4/"
 
 ofed/drivers/net/mlx4/alloc.c			optional mlx4ib | mlxen	\
 	no-depend obj-prefix "mlx4_"					\
 	compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/net/mlx4/"
 ofed/drivers/net/mlx4/catas.c			optional mlx4ib | mlxen	\
 	no-depend obj-prefix "mlx4_"					\
 	compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/net/mlx4/"
 ofed/drivers/net/mlx4/cmd.c			optional mlx4ib | mlxen	\
 	no-depend obj-prefix "mlx4_"					\
 	compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/net/mlx4/"
 ofed/drivers/net/mlx4/cq.c			optional mlx4ib | mlxen	\
 	no-depend obj-prefix "mlx4_"					\
 	compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/net/mlx4/"
 ofed/drivers/net/mlx4/eq.c			optional mlx4ib | mlxen	\
 	no-depend obj-prefix "mlx4_"					\
 	compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/net/mlx4/"
 ofed/drivers/net/mlx4/fw.c			optional mlx4ib | mlxen	\
 	no-depend obj-prefix "mlx4_"					\
 	compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/net/mlx4/"
 ofed/drivers/net/mlx4/icm.c			optional mlx4ib | mlxen	\
 	no-depend obj-prefix "mlx4_"					\
 	compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/net/mlx4/"
 ofed/drivers/net/mlx4/intf.c			optional mlx4ib | mlxen	\
 	no-depend obj-prefix "mlx4_"					\
 	compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/net/mlx4/"
 ofed/drivers/net/mlx4/main.c			optional mlx4ib | mlxen	\
 	no-depend obj-prefix "mlx4_"					\
 	compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/net/mlx4/"
 ofed/drivers/net/mlx4/mcg.c			optional mlx4ib | mlxen	\
 	no-depend obj-prefix "mlx4_"					\
 	compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/net/mlx4/ -Wno-unused"
 ofed/drivers/net/mlx4/mr.c			optional mlx4ib | mlxen	\
 	no-depend obj-prefix "mlx4_"					\
 	compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/net/mlx4/"
 ofed/drivers/net/mlx4/pd.c			optional mlx4ib | mlxen	\
 	no-depend obj-prefix "mlx4_"					\
 	compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/net/mlx4/"
 ofed/drivers/net/mlx4/port.c			optional mlx4ib | mlxen	\
 	no-depend obj-prefix "mlx4_"					\
 	compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/net/mlx4/"
 ofed/drivers/net/mlx4/profile.c			optional mlx4ib | mlxen	\
 	no-depend obj-prefix "mlx4_"					\
 	compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/net/mlx4/"
 ofed/drivers/net/mlx4/qp.c			optional mlx4ib | mlxen	\
 	no-depend obj-prefix "mlx4_"					\
 	compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/net/mlx4/"
 ofed/drivers/net/mlx4/reset.c			optional mlx4ib | mlxen	\
 	no-depend obj-prefix "mlx4_"					\
 	compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/net/mlx4/"
 ofed/drivers/net/mlx4/sense.c			optional mlx4ib | mlxen	\
 	no-depend obj-prefix "mlx4_"					\
 	compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/net/mlx4/"
 ofed/drivers/net/mlx4/srq.c			optional mlx4ib | mlxen	\
 	no-depend obj-prefix "mlx4_"					\
 	compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/net/mlx4/"
 ofed/drivers/net/mlx4/resource_tracker.c        optional mlx4ib | mlxen	\
 	no-depend obj-prefix "mlx4_"					\
 	compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/net/mlx4/"
 ofed/drivers/net/mlx4/sys_tune.c		optional mlx4ib | mlxen	\
 	no-depend obj-prefix "mlx4_"					\
 	compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/net/mlx4/"
 
 ofed/drivers/net/mlx4/en_cq.c			optional mlxen		\
 	no-depend obj-prefix "mlx4_"					\
 	compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/net/mlx4/"
 ofed/drivers/net/mlx4/en_main.c			optional mlxen		\
 	no-depend obj-prefix "mlx4_"					\
 	compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/net/mlx4/"
 ofed/drivers/net/mlx4/en_netdev.c		optional mlxen		\
 	no-depend obj-prefix "mlx4_"					\
 	compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/net/mlx4/"
 ofed/drivers/net/mlx4/en_port.c			optional mlxen		\
 	no-depend obj-prefix "mlx4_"					\
 	compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/net/mlx4/"
 ofed/drivers/net/mlx4/en_resources.c		optional mlxen		\
 	no-depend obj-prefix "mlx4_"					\
 	compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/net/mlx4/"
 ofed/drivers/net/mlx4/en_rx.c			optional mlxen		\
 	no-depend obj-prefix "mlx4_"					\
 	compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/net/mlx4/"
 ofed/drivers/net/mlx4/en_tx.c			optional mlxen		\
 	no-depend obj-prefix "mlx4_"					\
 	compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/net/mlx4/"
 
 ofed/drivers/infiniband/hw/mthca/mthca_allocator.c	optional mthca	\
 	no-depend compile-with "${OFED_C}"
 ofed/drivers/infiniband/hw/mthca/mthca_av.c		optional mthca	\
 	no-depend compile-with "${OFED_C}"
 ofed/drivers/infiniband/hw/mthca/mthca_catas.c		optional mthca	\
 	no-depend compile-with "${OFED_C}"
 ofed/drivers/infiniband/hw/mthca/mthca_cmd.c		optional mthca	\
 	no-depend compile-with "${OFED_C}"
 ofed/drivers/infiniband/hw/mthca/mthca_cq.c		optional mthca	\
 	no-depend compile-with "${OFED_C}"
 ofed/drivers/infiniband/hw/mthca/mthca_eq.c		optional mthca	\
 	no-depend compile-with "${OFED_C}"
 ofed/drivers/infiniband/hw/mthca/mthca_mad.c		optional mthca	\
 	no-depend compile-with "${OFED_C}"
 ofed/drivers/infiniband/hw/mthca/mthca_main.c		optional mthca	\
 	no-depend compile-with "${OFED_C}"
 ofed/drivers/infiniband/hw/mthca/mthca_mcg.c		optional mthca	\
 	no-depend compile-with "${OFED_C}"
 ofed/drivers/infiniband/hw/mthca/mthca_memfree.c	optional mthca	\
 	no-depend compile-with "${OFED_C}"
 ofed/drivers/infiniband/hw/mthca/mthca_mr.c		optional mthca	\
 	no-depend compile-with "${OFED_C}"
 ofed/drivers/infiniband/hw/mthca/mthca_pd.c		optional mthca	\
 	no-depend compile-with "${OFED_C}"
 ofed/drivers/infiniband/hw/mthca/mthca_profile.c	optional mthca	\
 	no-depend compile-with "${OFED_C}"
 ofed/drivers/infiniband/hw/mthca/mthca_provider.c	optional mthca	\
 	no-depend compile-with "${OFED_C}"
 ofed/drivers/infiniband/hw/mthca/mthca_qp.c		optional mthca	\
 	no-depend compile-with "${OFED_C}"
 ofed/drivers/infiniband/hw/mthca/mthca_reset.c		optional mthca	\
 	no-depend compile-with "${OFED_C}"
 ofed/drivers/infiniband/hw/mthca/mthca_srq.c		optional mthca	\
 	no-depend compile-with "${OFED_C}"
 ofed/drivers/infiniband/hw/mthca/mthca_uar.c		optional mthca	\
 	no-depend compile-with "${OFED_C}"
 
 # crypto support
 opencrypto/cast.c		optional crypto | ipsec
 opencrypto/criov.c		optional crypto
 opencrypto/crypto.c		optional crypto
 opencrypto/cryptodev.c		optional cryptodev
 opencrypto/cryptodev_if.m	optional crypto
 opencrypto/cryptosoft.c		optional crypto
 opencrypto/cryptodeflate.c	optional crypto
 opencrypto/gmac.c		optional crypto
 opencrypto/gfmult.c		optional crypto
 opencrypto/rmd160.c		optional crypto | ipsec
 opencrypto/skipjack.c		optional crypto
 opencrypto/xform.c		optional crypto
 rpc/auth_none.c			optional krpc | nfslockd | nfscl | nfsd
 rpc/auth_unix.c			optional krpc | nfslockd | nfscl | nfsd
 rpc/authunix_prot.c		optional krpc | nfslockd | nfscl | nfsd
 rpc/clnt_bck.c			optional krpc | nfslockd | nfscl | nfsd
 rpc/clnt_dg.c			optional krpc | nfslockd | nfscl | nfsd
 rpc/clnt_rc.c			optional krpc | nfslockd | nfscl | nfsd
 rpc/clnt_vc.c			optional krpc | nfslockd | nfscl | nfsd
 rpc/getnetconfig.c		optional krpc | nfslockd | nfscl | nfsd
 rpc/replay.c			optional krpc | nfslockd | nfscl | nfsd
 rpc/rpc_callmsg.c		optional krpc | nfslockd | nfscl | nfsd
 rpc/rpc_generic.c		optional krpc | nfslockd | nfscl | nfsd
 rpc/rpc_prot.c			optional krpc | nfslockd | nfscl | nfsd
 rpc/rpcb_clnt.c			optional krpc | nfslockd | nfscl | nfsd
 rpc/rpcb_prot.c			optional krpc | nfslockd | nfscl | nfsd
 rpc/svc.c			optional krpc | nfslockd | nfscl | nfsd
 rpc/svc_auth.c			optional krpc | nfslockd | nfscl | nfsd
 rpc/svc_auth_unix.c		optional krpc | nfslockd | nfscl | nfsd
 rpc/svc_dg.c			optional krpc | nfslockd | nfscl | nfsd
 rpc/svc_generic.c		optional krpc | nfslockd | nfscl | nfsd
 rpc/svc_vc.c			optional krpc | nfslockd | nfscl | nfsd
 rpc/rpcsec_gss/rpcsec_gss.c	optional krpc kgssapi | nfslockd kgssapi | nfscl kgssapi | nfsd kgssapi
 rpc/rpcsec_gss/rpcsec_gss_conf.c optional krpc kgssapi | nfslockd kgssapi | nfscl kgssapi | nfsd kgssapi
 rpc/rpcsec_gss/rpcsec_gss_misc.c optional krpc kgssapi | nfslockd kgssapi | nfscl kgssapi | nfsd kgssapi
 rpc/rpcsec_gss/rpcsec_gss_prot.c optional krpc kgssapi | nfslockd kgssapi | nfscl kgssapi | nfsd kgssapi
 rpc/rpcsec_gss/svc_rpcsec_gss.c	optional krpc kgssapi | nfslockd kgssapi | nfscl kgssapi | nfsd kgssapi
 security/audit/audit.c		optional audit
 security/audit/audit_arg.c	optional audit
 security/audit/audit_bsm.c	optional audit
 security/audit/audit_bsm_klib.c	optional audit
 security/audit/audit_pipe.c	optional audit
 security/audit/audit_syscalls.c	standard
 security/audit/audit_trigger.c	optional audit
 security/audit/audit_worker.c	optional audit
 security/audit/bsm_domain.c	optional audit
 security/audit/bsm_errno.c	optional audit
 security/audit/bsm_fcntl.c	optional audit
 security/audit/bsm_socket_type.c	optional audit
 security/audit/bsm_token.c	optional audit
 security/mac/mac_audit.c	optional mac audit
 security/mac/mac_cred.c		optional mac
 security/mac/mac_framework.c	optional mac
 security/mac/mac_inet.c		optional mac inet | mac inet6
 security/mac/mac_inet6.c	optional mac inet6
 security/mac/mac_label.c	optional mac
 security/mac/mac_net.c		optional mac
 security/mac/mac_pipe.c		optional mac
 security/mac/mac_posix_sem.c	optional mac
 security/mac/mac_posix_shm.c	optional mac
 security/mac/mac_priv.c		optional mac
 security/mac/mac_process.c	optional mac
 security/mac/mac_socket.c	optional mac
 security/mac/mac_syscalls.c	standard
 security/mac/mac_system.c	optional mac
 security/mac/mac_sysv_msg.c	optional mac
 security/mac/mac_sysv_sem.c	optional mac
 security/mac/mac_sysv_shm.c	optional mac
 security/mac/mac_vfs.c		optional mac
 security/mac_biba/mac_biba.c	optional mac_biba
 security/mac_bsdextended/mac_bsdextended.c	optional mac_bsdextended
 security/mac_bsdextended/ugidfw_system.c	optional mac_bsdextended
 security/mac_bsdextended/ugidfw_vnode.c		optional mac_bsdextended
 security/mac_ifoff/mac_ifoff.c	optional mac_ifoff
 security/mac_lomac/mac_lomac.c	optional mac_lomac
 security/mac_mls/mac_mls.c	optional mac_mls
 security/mac_none/mac_none.c	optional mac_none
 security/mac_partition/mac_partition.c optional mac_partition
 security/mac_portacl/mac_portacl.c optional mac_portacl
 security/mac_seeotheruids/mac_seeotheruids.c optional mac_seeotheruids
 security/mac_stub/mac_stub.c	optional mac_stub
 security/mac_test/mac_test.c	optional mac_test
 teken/teken.c			optional sc | vt
 ufs/ffs/ffs_alloc.c		optional ffs
 ufs/ffs/ffs_balloc.c		optional ffs
 ufs/ffs/ffs_inode.c		optional ffs
 ufs/ffs/ffs_snapshot.c		optional ffs
 ufs/ffs/ffs_softdep.c		optional ffs
 ufs/ffs/ffs_subr.c		optional ffs
 ufs/ffs/ffs_tables.c		optional ffs
 ufs/ffs/ffs_vfsops.c		optional ffs
 ufs/ffs/ffs_vnops.c		optional ffs
 ufs/ffs/ffs_rawread.c		optional ffs directio
 ufs/ffs/ffs_suspend.c		optional ffs
 ufs/ufs/ufs_acl.c		optional ffs
 ufs/ufs/ufs_bmap.c		optional ffs
 ufs/ufs/ufs_dirhash.c		optional ffs
 ufs/ufs/ufs_extattr.c		optional ffs
 ufs/ufs/ufs_gjournal.c		optional ffs UFS_GJOURNAL
 ufs/ufs/ufs_inode.c		optional ffs
 ufs/ufs/ufs_lookup.c		optional ffs
 ufs/ufs/ufs_quota.c		optional ffs
 ufs/ufs/ufs_vfsops.c		optional ffs
 ufs/ufs/ufs_vnops.c		optional ffs
 vm/default_pager.c		standard
 vm/device_pager.c		standard
 vm/phys_pager.c			standard
 vm/redzone.c			optional DEBUG_REDZONE
 vm/sg_pager.c			standard
 vm/swap_pager.c			standard
 vm/uma_core.c			standard
 vm/uma_dbg.c			standard
 vm/memguard.c			optional DEBUG_MEMGUARD
 vm/vm_fault.c			standard
 vm/vm_glue.c			standard
 vm/vm_init.c			standard
 vm/vm_kern.c			standard
 vm/vm_map.c			standard
 vm/vm_meter.c			standard
 vm/vm_mmap.c			standard
 vm/vm_object.c			standard
 vm/vm_page.c			standard
 vm/vm_pageout.c			standard
 vm/vm_pager.c			standard
 vm/vm_phys.c			standard
 vm/vm_radix.c			standard
 vm/vm_reserv.c			standard
 vm/vm_unix.c			standard
 vm/vm_zeroidle.c		standard
 vm/vnode_pager.c		standard
-xen/features.c			optional xen | xenhvm
-xen/xenbus/xenbus_if.m		optional xen | xenhvm
-xen/xenbus/xenbus.c		optional xen | xenhvm
-xen/xenbus/xenbusb_if.m		optional xen | xenhvm
-xen/xenbus/xenbusb.c		optional xen | xenhvm
-xen/xenbus/xenbusb_front.c	optional xen | xenhvm
-xen/xenbus/xenbusb_back.c	optional xen | xenhvm
+xen/features.c			optional xenhvm
+xen/xenbus/xenbus_if.m		optional xenhvm
+xen/xenbus/xenbus.c		optional xenhvm
+xen/xenbus/xenbusb_if.m		optional xenhvm
+xen/xenbus/xenbusb.c		optional xenhvm
+xen/xenbus/xenbusb_front.c	optional xenhvm
+xen/xenbus/xenbusb_back.c	optional xenhvm
 xdr/xdr.c			optional krpc | nfslockd | nfscl | nfsd
 xdr/xdr_array.c			optional krpc | nfslockd | nfscl | nfsd
 xdr/xdr_mbuf.c			optional krpc | nfslockd | nfscl | nfsd
 xdr/xdr_mem.c			optional krpc | nfslockd | nfscl | nfsd
 xdr/xdr_reference.c		optional krpc | nfslockd | nfscl | nfsd
 xdr/xdr_sizeof.c		optional krpc | nfslockd | nfscl | nfsd
Index: head/sys/conf/files.amd64
===================================================================
--- head/sys/conf/files.amd64	(revision 282273)
+++ head/sys/conf/files.amd64	(revision 282274)
@@ -1,587 +1,587 @@
 # This file tells config what files go into building a kernel,
 # files marked standard are always included.
 #
 # $FreeBSD$
 #
 # The long compile-with and dependency lines are required because of
 # limitations in config: backslash-newline doesn't work in strings, and
 # dependency lines other than the first are silently ignored.
 #
 #
 linux32_genassym.o		optional	compat_linux32		\
 	dependency 	"$S/amd64/linux32/linux32_genassym.c"		\
 	compile-with	"${CC} ${CFLAGS:N-fno-common} -c ${.IMPSRC}"	\
 	no-obj no-implicit-rule						\
 	clean		"linux32_genassym.o"
 #
 linux32_assym.h			optional	compat_linux32		\
 	dependency 	"$S/kern/genassym.sh linux32_genassym.o"	\
 	compile-with	"sh $S/kern/genassym.sh linux32_genassym.o > ${.TARGET}" \
 	no-obj no-implicit-rule before-depend				\
 	clean		"linux32_assym.h"
 #
 ia32_genassym.o			standard				\
 	dependency 	"$S/compat/ia32/ia32_genassym.c"		\
 	compile-with	"${CC} ${CFLAGS:N-fno-common} -c ${.IMPSRC}"	\
 	no-obj no-implicit-rule						\
 	clean		"ia32_genassym.o"
 #
 ia32_assym.h			standard				\
 	dependency 	"$S/kern/genassym.sh ia32_genassym.o"		\
 	compile-with	"env NM='${NM}' sh $S/kern/genassym.sh ia32_genassym.o > ${.TARGET}" \
 	no-obj no-implicit-rule before-depend				\
 	clean		"ia32_assym.h"
 #
 font.h				optional	sc_dflt_font		\
 	compile-with	"uudecode < /usr/share/syscons/fonts/${SC_DFLT_FONT}-8x16.fnt && file2c 'static u_char dflt_font_16[16*256] = {' '};' < ${SC_DFLT_FONT}-8x16 > font.h && uudecode < /usr/share/syscons/fonts/${SC_DFLT_FONT}-8x14.fnt && file2c 'static u_char dflt_font_14[14*256] = {' '};' < ${SC_DFLT_FONT}-8x14 >> font.h && uudecode < /usr/share/syscons/fonts/${SC_DFLT_FONT}-8x8.fnt && file2c 'static u_char dflt_font_8[8*256] = {' '};' < ${SC_DFLT_FONT}-8x8 >> font.h"									\
 	no-obj no-implicit-rule before-depend				\
 	clean		"font.h ${SC_DFLT_FONT}-8x14 ${SC_DFLT_FONT}-8x16 ${SC_DFLT_FONT}-8x8"
 #
 atkbdmap.h			optional	atkbd_dflt_keymap	\
 	compile-with	"/usr/sbin/kbdcontrol -L ${ATKBD_DFLT_KEYMAP} | sed -e 's/^static keymap_t.* = /static keymap_t key_map = /' -e 's/^static accentmap_t.* = /static accentmap_t accent_map = /' > atkbdmap.h"			\
 	no-obj no-implicit-rule before-depend				\
 	clean		"atkbdmap.h"
 #
 ukbdmap.h			optional	ukbd_dflt_keymap	\
 	compile-with	"/usr/sbin/kbdcontrol -L ${UKBD_DFLT_KEYMAP} | sed -e 's/^static keymap_t.* = /static keymap_t key_map = /' -e 's/^static accentmap_t.* = /static accentmap_t accent_map = /' > ukbdmap.h"			\
 	no-obj no-implicit-rule before-depend				\
 	clean		"ukbdmap.h"
 #
 hpt27xx_lib.o			optional	hpt27xx			\
 	dependency	"$S/dev/hpt27xx/amd64-elf.hpt27xx_lib.o.uu"	\
 	compile-with	"uudecode < $S/dev/hpt27xx/amd64-elf.hpt27xx_lib.o.uu" \
 	no-implicit-rule
 #
 hptmvraid.o			optional	hptmv			\
 	dependency	"$S/dev/hptmv/amd64-elf.raid.o.uu"	\
 	compile-with	"uudecode < $S/dev/hptmv/amd64-elf.raid.o.uu" \
 	no-implicit-rule
 #
 hptnr_lib.o			optional	hptnr			\
 	dependency	"$S/dev/hptnr/amd64-elf.hptnr_lib.o.uu"	\
 	compile-with	"uudecode < $S/dev/hptnr/amd64-elf.hptnr_lib.o.uu" \
 	no-implicit-rule
 #
 hptrr_lib.o			optional	hptrr			\
 	dependency	"$S/dev/hptrr/amd64-elf.hptrr_lib.o.uu"		\
 	compile-with	"uudecode < $S/dev/hptrr/amd64-elf.hptrr_lib.o.uu" \
 	no-implicit-rule
 #
 amd64/acpica/acpi_machdep.c	optional	acpi
 acpi_wakecode.o			optional	acpi			\
 	dependency	"$S/amd64/acpica/acpi_wakecode.S assym.s"	\
 	compile-with	"${NORMAL_S}"					\
 	no-obj no-implicit-rule before-depend				\
 	clean		"acpi_wakecode.o"
 acpi_wakecode.bin		optional	acpi			\
 	dependency	"acpi_wakecode.o"				\
 	compile-with	"${OBJCOPY} -S -O binary acpi_wakecode.o ${.TARGET}" \
 	no-obj no-implicit-rule	before-depend				\
 	clean		"acpi_wakecode.bin"
 acpi_wakecode.h			optional	acpi			\
 	dependency	"acpi_wakecode.bin"				\
 	compile-with	"file2c -sx 'static char wakecode[] = {' '};' < acpi_wakecode.bin > ${.TARGET}" \
 	no-obj no-implicit-rule	before-depend				\
 	clean		"acpi_wakecode.h"
 acpi_wakedata.h			optional	acpi			\
 	dependency	"acpi_wakecode.o"				\
 	compile-with	'${NM} -n --defined-only acpi_wakecode.o | while read offset dummy what; do echo "#define	$${what}	0x$${offset}"; done > ${.TARGET}' \
 	no-obj no-implicit-rule	before-depend				\
 	clean		"acpi_wakedata.h"
 #
 amd64/amd64/amd64_mem.c		optional	mem
 #amd64/amd64/apic_vector.S	standard
 amd64/amd64/atomic.c		standard
 amd64/amd64/autoconf.c		standard
 amd64/amd64/bios.c		standard
 amd64/amd64/bpf_jit_machdep.c	optional	bpf_jitter
 amd64/amd64/cpu_switch.S	standard
 amd64/amd64/db_disasm.c		optional	ddb
 amd64/amd64/db_interface.c	optional	ddb
 amd64/amd64/db_trace.c		optional	ddb
 amd64/amd64/elf_machdep.c	standard
 amd64/amd64/exception.S		standard
 amd64/amd64/fpu.c		standard
 amd64/amd64/gdb_machdep.c	optional	gdb
 amd64/amd64/in_cksum.c		optional	inet | inet6
 amd64/amd64/initcpu.c		standard
 amd64/amd64/io.c		optional	io
 amd64/amd64/locore.S		standard	no-obj
 amd64/amd64/xen-locore.S	optional	xenhvm
 amd64/amd64/machdep.c		standard
 amd64/amd64/mem.c		optional	mem
 amd64/amd64/minidump_machdep.c	standard
 amd64/amd64/mp_machdep.c	optional	smp
 amd64/amd64/mp_watchdog.c	optional	mp_watchdog smp
 amd64/amd64/mpboot.S		optional	smp
 amd64/amd64/pmap.c		standard
 amd64/amd64/prof_machdep.c	optional	profiling-routine
 amd64/amd64/ptrace_machdep.c	standard
 amd64/amd64/sigtramp.S		standard
 amd64/amd64/stack_machdep.c	optional	ddb | stack
 amd64/amd64/support.S		standard
 amd64/amd64/sys_machdep.c	standard
 amd64/amd64/trap.c		standard
 amd64/amd64/uio_machdep.c	standard
 amd64/amd64/uma_machdep.c	standard
 amd64/amd64/vm_machdep.c	standard
 amd64/pci/pci_cfgreg.c		optional	pci
 cddl/contrib/opensolaris/common/atomic/amd64/opensolaris_atomic.S	optional zfs compile-with "${ZFS_S}"
 crypto/aesni/aeskeys_amd64.S	optional aesni
 crypto/aesni/aesni.c		optional aesni
 aesni_ghash.o			optional aesni				\
 	dependency	"$S/crypto/aesni/aesni_ghash.c"			\
 	compile-with	"${CC} -c ${CFLAGS:C/^-O2$/-O3/:N-nostdinc} ${WERROR} ${NO_WCAST_QUAL} ${PROF} -mmmx -msse -msse4 -maes -mpclmul ${.IMPSRC}" \
 	no-implicit-rule						\
 	clean		"aesni_ghash.o"
 aesni_wrap.o			optional aesni				\
 	dependency	"$S/crypto/aesni/aesni_wrap.c"			\
 	compile-with	"${CC} -c ${CFLAGS:C/^-O2$/-O3/:N-nostdinc} ${WERROR} ${NO_WCAST_QUAL} ${PROF} -mmmx -msse -msse4 -maes ${.IMPSRC}" \
 	no-implicit-rule						\
 	clean		"aesni_wrap.o"
 crypto/blowfish/bf_enc.c	optional	crypto | ipsec
 crypto/des/des_enc.c		optional	crypto | ipsec | netsmb
 crypto/via/padlock.c		optional	padlock
 crypto/via/padlock_cipher.c	optional	padlock
 crypto/via/padlock_hash.c	optional	padlock
 dev/acpica/acpi_if.m		standard
 dev/acpica/acpi_hpet.c		optional acpi
 dev/acpi_support/acpi_wmi_if.m	standard
 dev/agp/agp_amd64.c		optional	agp
 dev/agp/agp_i810.c		optional	agp
 dev/agp/agp_via.c		optional	agp
 dev/amdsbwd/amdsbwd.c		optional	amdsbwd
 dev/amdtemp/amdtemp.c		optional	amdtemp
 dev/arcmsr/arcmsr.c		optional	arcmsr pci
 dev/asmc/asmc.c			optional	asmc isa
 dev/atkbdc/atkbd.c		optional	atkbd atkbdc
 dev/atkbdc/atkbd_atkbdc.c	optional	atkbd atkbdc
 dev/atkbdc/atkbdc.c		optional	atkbdc
 dev/atkbdc/atkbdc_isa.c		optional	atkbdc isa
 dev/atkbdc/atkbdc_subr.c	optional	atkbdc
 dev/atkbdc/psm.c		optional	psm atkbdc
 dev/bxe/bxe.c                   optional	bxe pci
 dev/bxe/bxe_stats.c             optional	bxe pci
 dev/bxe/bxe_debug.c             optional	bxe pci
 dev/bxe/ecore_sp.c              optional	bxe pci
 dev/bxe/bxe_elink.c             optional	bxe pci
 dev/bxe/57710_init_values.c     optional	bxe pci
 dev/bxe/57711_init_values.c     optional	bxe pci
 dev/bxe/57712_init_values.c     optional	bxe pci
 dev/coretemp/coretemp.c		optional	coretemp
 dev/cpuctl/cpuctl.c		optional	cpuctl
 dev/dpms/dpms.c			optional	dpms
 # There are no systems with isa slots, so all ed isa entries should go..
 dev/ed/if_ed_3c503.c		optional	ed isa ed_3c503
 dev/ed/if_ed_isa.c		optional	ed isa
 dev/ed/if_ed_wd80x3.c		optional	ed isa
 dev/ed/if_ed_hpp.c		optional	ed isa ed_hpp
 dev/ed/if_ed_sic.c		optional	ed isa ed_sic
 dev/fb/fb.c			optional	fb | vga
 dev/fb/s3_pci.c			optional	s3pci
 dev/fb/vesa.c			optional	vga vesa
 dev/fb/vga.c			optional	vga
 dev/ichwd/ichwd.c		optional	ichwd
 dev/if_ndis/if_ndis.c		optional	ndis
 dev/if_ndis/if_ndis_pccard.c	optional	ndis pccard
 dev/if_ndis/if_ndis_pci.c	optional	ndis cardbus | ndis pci
 dev/if_ndis/if_ndis_usb.c	optional	ndis usb
 dev/io/iodev.c			optional	io
 dev/ipmi/ipmi.c			optional	ipmi
 dev/ipmi/ipmi_acpi.c		optional	ipmi acpi
 dev/ipmi/ipmi_isa.c		optional	ipmi isa
 dev/ipmi/ipmi_kcs.c		optional	ipmi
 dev/ipmi/ipmi_smic.c		optional	ipmi
 dev/ipmi/ipmi_smbus.c		optional	ipmi smbus
 dev/ipmi/ipmi_smbios.c		optional	ipmi
 dev/ipmi/ipmi_ssif.c		optional	ipmi smbus
 dev/ipmi/ipmi_pci.c		optional	ipmi pci
 dev/ipmi/ipmi_linux.c		optional	ipmi compat_linux32
 dev/ixl/if_ixl.c		optional	ixl pci \
 	compile-with "${NORMAL_C} -I$S/dev/ixl"
 dev/ixl/if_ixlv.c		optional	ixlv pci \
 	compile-with "${NORMAL_C} -I$S/dev/ixl"
 dev/ixl/ixlvc.c			optional	ixlv pci \
 	compile-with "${NORMAL_C} -I$S/dev/ixl"
 dev/ixl/ixl_txrx.c		optional	ixl pci | ixlv pci \
 	compile-with "${NORMAL_C} -I$S/dev/ixl"
 dev/ixl/i40e_osdep.c		optional	ixl pci | ixlv pci \
 	compile-with "${NORMAL_C} -I$S/dev/ixl"
 dev/ixl/i40e_lan_hmc.c		optional	ixl pci | ixlv pci \
 	compile-with "${NORMAL_C} -I$S/dev/ixl"
 dev/ixl/i40e_hmc.c		optional	ixl pci | ixlv pci \
 	compile-with "${NORMAL_C} -I$S/dev/ixl"
 dev/ixl/i40e_common.c		optional	ixl pci | ixlv pci \
 	compile-with "${NORMAL_C} -I$S/dev/ixl"
 dev/ixl/i40e_nvm.c		optional	ixl pci | ixlv pci \
 	compile-with "${NORMAL_C} -I$S/dev/ixl"
 dev/ixl/i40e_adminq.c		optional	ixl pci | ixlv pci \
 	compile-with "${NORMAL_C} -I$S/dev/ixl"
 dev/fdc/fdc.c			optional	fdc
 dev/fdc/fdc_acpi.c		optional	fdc
 dev/fdc/fdc_isa.c		optional	fdc isa
 dev/fdc/fdc_pccard.c		optional	fdc pccard
 dev/fdt/fdt_x86.c		optional	fdt
 dev/hpt27xx/hpt27xx_os_bsd.c	optional	hpt27xx
 dev/hpt27xx/hpt27xx_osm_bsd.c	optional	hpt27xx
 dev/hpt27xx/hpt27xx_config.c	optional	hpt27xx
 dev/hptmv/entry.c		optional	hptmv
 dev/hptmv/mv.c			optional	hptmv
 dev/hptmv/gui_lib.c		optional	hptmv
 dev/hptmv/hptproc.c		optional	hptmv
 dev/hptmv/ioctl.c		optional	hptmv
 dev/hptnr/hptnr_os_bsd.c	optional	hptnr
 dev/hptnr/hptnr_osm_bsd.c	optional	hptnr
 dev/hptnr/hptnr_config.c	optional	hptnr
 dev/hptrr/hptrr_os_bsd.c	optional	hptrr
 dev/hptrr/hptrr_osm_bsd.c	optional	hptrr
 dev/hptrr/hptrr_config.c	optional	hptrr
 dev/hwpmc/hwpmc_amd.c		optional	hwpmc
 dev/hwpmc/hwpmc_intel.c		optional	hwpmc
 dev/hwpmc/hwpmc_core.c		optional	hwpmc
 dev/hwpmc/hwpmc_uncore.c	optional	hwpmc
 dev/hwpmc/hwpmc_piv.c		optional	hwpmc
 dev/hwpmc/hwpmc_tsc.c		optional	hwpmc
 dev/hwpmc/hwpmc_x86.c		optional	hwpmc
 dev/hyperv/netvsc/hv_net_vsc.c				optional	hyperv
 dev/hyperv/netvsc/hv_netvsc_drv_freebsd.c		optional	hyperv
 dev/hyperv/netvsc/hv_rndis_filter.c			optional	hyperv
 dev/hyperv/stordisengage/hv_ata_pci_disengage.c		optional	hyperv
 dev/hyperv/storvsc/hv_storvsc_drv_freebsd.c		optional	hyperv
 dev/hyperv/utilities/hv_kvp.c				optional	hyperv
 dev/hyperv/utilities/hv_util.c				optional	hyperv
 dev/hyperv/vmbus/hv_channel.c				optional	hyperv
 dev/hyperv/vmbus/hv_channel_mgmt.c			optional	hyperv
 dev/hyperv/vmbus/hv_connection.c			optional	hyperv
 dev/hyperv/vmbus/hv_hv.c				optional	hyperv
 dev/hyperv/vmbus/hv_ring_buffer.c			optional	hyperv
 dev/hyperv/vmbus/hv_vmbus_drv_freebsd.c			optional	hyperv
 dev/kbd/kbd.c			optional	atkbd | sc | ukbd | vt
 dev/nfe/if_nfe.c		optional	nfe pci
 dev/ntb/if_ntb/if_ntb.c		optional	if_ntb
 dev/ntb/ntb_hw/ntb_hw.c		optional	if_ntb ntb_hw
 dev/nvd/nvd.c			optional	nvd nvme
 dev/nvme/nvme.c			optional	nvme
 dev/nvme/nvme_ctrlr.c		optional	nvme
 dev/nvme/nvme_ctrlr_cmd.c	optional	nvme
 dev/nvme/nvme_ns.c		optional	nvme
 dev/nvme/nvme_ns_cmd.c		optional	nvme
 dev/nvme/nvme_qpair.c		optional	nvme
 dev/nvme/nvme_sysctl.c		optional	nvme
 dev/nvme/nvme_test.c		optional	nvme
 dev/nvme/nvme_util.c		optional	nvme
 dev/nvram/nvram.c		optional	nvram isa
 dev/random/ivy.c		optional	rdrand_rng
 dev/random/nehemiah.c		optional	padlock_rng
 dev/qlxge/qls_dbg.c		optional	qlxge pci
 dev/qlxge/qls_dump.c		optional	qlxge pci
 dev/qlxge/qls_hw.c		optional	qlxge pci
 dev/qlxge/qls_ioctl.c		optional	qlxge pci
 dev/qlxge/qls_isr.c		optional	qlxge pci
 dev/qlxge/qls_os.c		optional	qlxge pci
 dev/qlxgb/qla_dbg.c		optional	qlxgb pci
 dev/qlxgb/qla_hw.c		optional	qlxgb pci
 dev/qlxgb/qla_ioctl.c		optional	qlxgb pci
 dev/qlxgb/qla_isr.c		optional	qlxgb pci
 dev/qlxgb/qla_misc.c		optional	qlxgb pci
 dev/qlxgb/qla_os.c		optional	qlxgb pci
 dev/qlxgbe/ql_dbg.c		optional	qlxgbe pci
 dev/qlxgbe/ql_hw.c		optional	qlxgbe pci
 dev/qlxgbe/ql_ioctl.c		optional	qlxgbe pci
 dev/qlxgbe/ql_isr.c		optional	qlxgbe pci
 dev/qlxgbe/ql_misc.c		optional	qlxgbe pci
 dev/qlxgbe/ql_os.c		optional	qlxgbe pci
 dev/qlxgbe/ql_reset.c		optional	qlxgbe pci
 dev/sfxge/common/efx_bootcfg.c	optional sfxge inet pci
 dev/sfxge/common/efx_ev.c	optional sfxge inet pci
 dev/sfxge/common/efx_filter.c	optional sfxge inet pci
 dev/sfxge/common/efx_intr.c	optional sfxge inet pci
 dev/sfxge/common/efx_mac.c	optional sfxge inet pci
 dev/sfxge/common/efx_mcdi.c	optional sfxge inet pci
 dev/sfxge/common/efx_mon.c	optional sfxge inet pci
 dev/sfxge/common/efx_nic.c	optional sfxge inet pci
 dev/sfxge/common/efx_nvram.c	optional sfxge inet pci
 dev/sfxge/common/efx_phy.c	optional sfxge inet pci
 dev/sfxge/common/efx_port.c	optional sfxge inet pci
 dev/sfxge/common/efx_rx.c	optional sfxge inet pci
 dev/sfxge/common/efx_sram.c	optional sfxge inet pci
 dev/sfxge/common/efx_tx.c	optional sfxge inet pci
 dev/sfxge/common/efx_vpd.c	optional sfxge inet pci
 dev/sfxge/common/efx_wol.c	optional sfxge inet pci
 dev/sfxge/common/siena_mac.c	optional sfxge inet pci
 dev/sfxge/common/siena_mon.c	optional sfxge inet pci
 dev/sfxge/common/siena_nic.c	optional sfxge inet pci
 dev/sfxge/common/siena_nvram.c	optional sfxge inet pci
 dev/sfxge/common/siena_phy.c	optional sfxge inet pci
 dev/sfxge/common/siena_sram.c	optional sfxge inet pci
 dev/sfxge/common/siena_vpd.c	optional sfxge inet pci
 dev/sfxge/sfxge.c		optional sfxge inet pci
 dev/sfxge/sfxge_dma.c		optional sfxge inet pci
 dev/sfxge/sfxge_ev.c		optional sfxge inet pci
 dev/sfxge/sfxge_intr.c		optional sfxge inet pci
 dev/sfxge/sfxge_mcdi.c		optional sfxge inet pci
 dev/sfxge/sfxge_port.c		optional sfxge inet pci
 dev/sfxge/sfxge_rx.c		optional sfxge inet pci
 dev/sfxge/sfxge_tx.c		optional sfxge inet pci
 dev/sio/sio.c			optional	sio
 dev/sio/sio_isa.c		optional	sio isa
 dev/sio/sio_pccard.c		optional	sio pccard
 dev/sio/sio_pci.c		optional	sio pci
 dev/sio/sio_puc.c		optional	sio puc
 dev/speaker/spkr.c		optional	speaker
 dev/syscons/apm/apm_saver.c	optional	apm_saver apm
 dev/syscons/scterm-teken.c	optional	sc
 dev/syscons/scvesactl.c		optional	sc vga vesa
 dev/syscons/scvgarndr.c		optional	sc vga
 dev/syscons/scvtb.c		optional	sc
 dev/tpm/tpm.c			optional	tpm
 dev/tpm/tpm_acpi.c		optional	tpm acpi
 dev/tpm/tpm_isa.c		optional	tpm isa
 dev/uart/uart_cpu_x86.c		optional	uart
 dev/viawd/viawd.c		optional	viawd
 dev/vmware/vmxnet3/if_vmx.c	optional	vmx
 dev/wbwd/wbwd.c			optional	wbwd
 dev/wpi/if_wpi.c		optional	wpi
 dev/xen/pci/xen_acpi_pci.c	optional	xenhvm
 dev/xen/pci/xen_pci.c		optional	xenhvm
 dev/isci/isci.c							optional isci
 dev/isci/isci_controller.c					optional isci
 dev/isci/isci_domain.c						optional isci
 dev/isci/isci_interrupt.c					optional isci
 dev/isci/isci_io_request.c					optional isci
 dev/isci/isci_logger.c						optional isci
 dev/isci/isci_oem_parameters.c					optional isci
 dev/isci/isci_remote_device.c					optional isci
 dev/isci/isci_sysctl.c						optional isci
 dev/isci/isci_task_request.c					optional isci
 dev/isci/isci_timer.c						optional isci
 dev/isci/scil/sati.c						optional isci
 dev/isci/scil/sati_abort_task_set.c				optional isci
 dev/isci/scil/sati_atapi.c					optional isci
 dev/isci/scil/sati_device.c					optional isci
 dev/isci/scil/sati_inquiry.c					optional isci
 dev/isci/scil/sati_log_sense.c					optional isci
 dev/isci/scil/sati_lun_reset.c					optional isci
 dev/isci/scil/sati_mode_pages.c					optional isci
 dev/isci/scil/sati_mode_select.c				optional isci
 dev/isci/scil/sati_mode_sense.c					optional isci
 dev/isci/scil/sati_mode_sense_10.c				optional isci
 dev/isci/scil/sati_mode_sense_6.c				optional isci
 dev/isci/scil/sati_move.c					optional isci
 dev/isci/scil/sati_passthrough.c				optional isci
 dev/isci/scil/sati_read.c					optional isci
 dev/isci/scil/sati_read_buffer.c				optional isci
 dev/isci/scil/sati_read_capacity.c				optional isci
 dev/isci/scil/sati_reassign_blocks.c				optional isci
 dev/isci/scil/sati_report_luns.c				optional isci
 dev/isci/scil/sati_request_sense.c				optional isci
 dev/isci/scil/sati_start_stop_unit.c				optional isci
 dev/isci/scil/sati_synchronize_cache.c				optional isci
 dev/isci/scil/sati_test_unit_ready.c				optional isci
 dev/isci/scil/sati_unmap.c					optional isci
 dev/isci/scil/sati_util.c					optional isci
 dev/isci/scil/sati_verify.c					optional isci
 dev/isci/scil/sati_write.c					optional isci
 dev/isci/scil/sati_write_and_verify.c				optional isci
 dev/isci/scil/sati_write_buffer.c				optional isci
 dev/isci/scil/sati_write_long.c					optional isci
 dev/isci/scil/sci_abstract_list.c				optional isci
 dev/isci/scil/sci_base_controller.c				optional isci
 dev/isci/scil/sci_base_domain.c					optional isci
 dev/isci/scil/sci_base_iterator.c				optional isci
 dev/isci/scil/sci_base_library.c				optional isci
 dev/isci/scil/sci_base_logger.c					optional isci
 dev/isci/scil/sci_base_memory_descriptor_list.c			optional isci
 dev/isci/scil/sci_base_memory_descriptor_list_decorator.c	optional isci
 dev/isci/scil/sci_base_object.c					optional isci
 dev/isci/scil/sci_base_observer.c				optional isci
 dev/isci/scil/sci_base_phy.c					optional isci
 dev/isci/scil/sci_base_port.c					optional isci
 dev/isci/scil/sci_base_remote_device.c				optional isci
 dev/isci/scil/sci_base_request.c				optional isci
 dev/isci/scil/sci_base_state_machine.c				optional isci
 dev/isci/scil/sci_base_state_machine_logger.c			optional isci
 dev/isci/scil/sci_base_state_machine_observer.c			optional isci
 dev/isci/scil/sci_base_subject.c				optional isci
 dev/isci/scil/sci_util.c					optional isci
 dev/isci/scil/scic_sds_controller.c				optional isci
 dev/isci/scil/scic_sds_library.c				optional isci
 dev/isci/scil/scic_sds_pci.c					optional isci
 dev/isci/scil/scic_sds_phy.c					optional isci
 dev/isci/scil/scic_sds_port.c					optional isci
 dev/isci/scil/scic_sds_port_configuration_agent.c		optional isci
 dev/isci/scil/scic_sds_remote_device.c				optional isci
 dev/isci/scil/scic_sds_remote_node_context.c			optional isci
 dev/isci/scil/scic_sds_remote_node_table.c			optional isci
 dev/isci/scil/scic_sds_request.c				optional isci
 dev/isci/scil/scic_sds_sgpio.c					optional isci
 dev/isci/scil/scic_sds_smp_remote_device.c			optional isci
 dev/isci/scil/scic_sds_smp_request.c				optional isci
 dev/isci/scil/scic_sds_ssp_request.c				optional isci
 dev/isci/scil/scic_sds_stp_packet_request.c			optional isci
 dev/isci/scil/scic_sds_stp_remote_device.c			optional isci
 dev/isci/scil/scic_sds_stp_request.c				optional isci
 dev/isci/scil/scic_sds_unsolicited_frame_control.c		optional isci
 dev/isci/scil/scif_sas_controller.c				optional isci
 dev/isci/scil/scif_sas_controller_state_handlers.c		optional isci
 dev/isci/scil/scif_sas_controller_states.c			optional isci
 dev/isci/scil/scif_sas_domain.c					optional isci
 dev/isci/scil/scif_sas_domain_state_handlers.c			optional isci
 dev/isci/scil/scif_sas_domain_states.c				optional isci
 dev/isci/scil/scif_sas_high_priority_request_queue.c		optional isci
 dev/isci/scil/scif_sas_internal_io_request.c			optional isci
 dev/isci/scil/scif_sas_io_request.c				optional isci
 dev/isci/scil/scif_sas_io_request_state_handlers.c		optional isci
 dev/isci/scil/scif_sas_io_request_states.c			optional isci
 dev/isci/scil/scif_sas_library.c				optional isci
 dev/isci/scil/scif_sas_remote_device.c				optional isci
 dev/isci/scil/scif_sas_remote_device_ready_substate_handlers.c	optional isci
 dev/isci/scil/scif_sas_remote_device_ready_substates.c		optional isci
 dev/isci/scil/scif_sas_remote_device_starting_substate_handlers.c		optional isci
 dev/isci/scil/scif_sas_remote_device_starting_substates.c	optional isci
 dev/isci/scil/scif_sas_remote_device_state_handlers.c		optional isci
 dev/isci/scil/scif_sas_remote_device_states.c			optional isci
 dev/isci/scil/scif_sas_request.c				optional isci
 dev/isci/scil/scif_sas_smp_activity_clear_affiliation.c		optional isci
 dev/isci/scil/scif_sas_smp_io_request.c				optional isci
 dev/isci/scil/scif_sas_smp_phy.c				optional isci
 dev/isci/scil/scif_sas_smp_remote_device.c			optional isci
 dev/isci/scil/scif_sas_stp_io_request.c				optional isci
 dev/isci/scil/scif_sas_stp_remote_device.c			optional isci
 dev/isci/scil/scif_sas_stp_task_request.c			optional isci
 dev/isci/scil/scif_sas_task_request.c				optional isci
 dev/isci/scil/scif_sas_task_request_state_handlers.c		optional isci
 dev/isci/scil/scif_sas_task_request_states.c			optional isci
 dev/isci/scil/scif_sas_timer.c					optional isci
 isa/syscons_isa.c		optional	sc
 isa/vga_isa.c			optional	vga
 kern/kern_clocksource.c		standard
 kern/link_elf_obj.c		standard
 #
 # IA32 binary support
 #
 #amd64/ia32/ia32_exception.S	optional	compat_freebsd32
 amd64/ia32/ia32_reg.c		optional	compat_freebsd32
 amd64/ia32/ia32_signal.c	optional	compat_freebsd32
 amd64/ia32/ia32_sigtramp.S	optional	compat_freebsd32
 amd64/ia32/ia32_syscall.c	optional	compat_freebsd32
 amd64/ia32/ia32_misc.c		optional	compat_freebsd32
 compat/ia32/ia32_sysvec.c	optional	compat_freebsd32
 compat/linprocfs/linprocfs.c	optional	linprocfs
 compat/linsysfs/linsysfs.c	optional	linsysfs
 #
 # Linux/i386 binary support
 #
 amd64/linux32/linux32_dummy.c	optional	compat_linux32
 amd64/linux32/linux32_locore.s	optional	compat_linux32		\
 	dependency 	"linux32_assym.h"
 amd64/linux32/linux32_machdep.c	optional	compat_linux32
 amd64/linux32/linux32_support.s	optional	compat_linux32		\
 	dependency 	"linux32_assym.h"
 amd64/linux32/linux32_sysent.c	optional	compat_linux32
 amd64/linux32/linux32_sysvec.c	optional	compat_linux32
 compat/linux/linux_emul.c	optional	compat_linux32
 compat/linux/linux_file.c	optional	compat_linux32
 compat/linux/linux_fork.c	optional	compat_linux32
 compat/linux/linux_futex.c	optional	compat_linux32
 compat/linux/linux_getcwd.c	optional	compat_linux32
 compat/linux/linux_ioctl.c	optional	compat_linux32
 compat/linux/linux_ipc.c	optional	compat_linux32
 compat/linux/linux_mib.c	optional	compat_linux32
 compat/linux/linux_misc.c	optional	compat_linux32
 compat/linux/linux_signal.c	optional	compat_linux32
 compat/linux/linux_socket.c	optional	compat_linux32
 compat/linux/linux_stats.c	optional	compat_linux32
 compat/linux/linux_sysctl.c	optional	compat_linux32
 compat/linux/linux_time.c	optional	compat_linux32
 compat/linux/linux_timer.c	optional	compat_linux32
 compat/linux/linux_uid16.c	optional	compat_linux32
 compat/linux/linux_util.c	optional	compat_linux32
 dev/amr/amr_linux.c		optional	compat_linux32 amr
 dev/mfi/mfi_linux.c		optional	compat_linux32 mfi
 #
 # Windows NDIS driver support
 #
 compat/ndis/kern_ndis.c		optional	ndisapi pci
 compat/ndis/kern_windrv.c	optional	ndisapi pci
 compat/ndis/subr_hal.c		optional	ndisapi pci
 compat/ndis/subr_ndis.c		optional	ndisapi pci
 compat/ndis/subr_ntoskrnl.c	optional	ndisapi pci
 compat/ndis/subr_pe.c		optional	ndisapi pci
 compat/ndis/subr_usbd.c		optional	ndisapi pci
 compat/ndis/winx64_wrap.S	optional	ndisapi pci
 #
 libkern/memmove.c		standard
 libkern/memset.c		standard
 #
 # x86 real mode BIOS emulator, required by atkbdc/dpms/pci/vesa
 #
 compat/x86bios/x86bios.c	optional x86bios | atkbd | dpms | pci | vesa
 contrib/x86emu/x86emu.c		optional x86bios | atkbd | dpms | pci | vesa
 #
 # bvm console
 #
 dev/bvm/bvm_console.c		optional	bvmconsole
 dev/bvm/bvm_dbg.c		optional	bvmdebug
 #
 # x86 shared code between IA32, AMD64 and PC98 architectures
 #
 x86/acpica/OsdEnvironment.c	optional	acpi
 x86/acpica/acpi_apm.c		optional	acpi
 x86/acpica/acpi_wakeup.c	optional	acpi
 x86/acpica/madt.c		optional	acpi
 x86/acpica/srat.c		optional	acpi
 x86/bios/smbios.c		optional	smbios
 x86/bios/vpd.c			optional	vpd
 x86/cpufreq/powernow.c		optional	cpufreq
 x86/cpufreq/est.c		optional	cpufreq
 x86/cpufreq/hwpstate.c		optional	cpufreq
 x86/cpufreq/p4tcc.c		optional	cpufreq
 x86/iommu/busdma_dmar.c		optional	acpi acpi_dmar pci
 x86/iommu/intel_ctx.c		optional	acpi acpi_dmar pci
 x86/iommu/intel_drv.c		optional	acpi acpi_dmar pci
 x86/iommu/intel_fault.c		optional	acpi acpi_dmar pci
 x86/iommu/intel_gas.c		optional	acpi acpi_dmar pci
 x86/iommu/intel_idpgtbl.c	optional	acpi acpi_dmar pci
 x86/iommu/intel_intrmap.c	optional	acpi acpi_dmar pci
 x86/iommu/intel_qi.c		optional	acpi acpi_dmar pci
 x86/iommu/intel_quirks.c	optional	acpi acpi_dmar pci
 x86/iommu/intel_utils.c		optional	acpi acpi_dmar pci
 x86/isa/atpic.c			optional	atpic isa
 x86/isa/atrtc.c			standard
 x86/isa/clock.c			standard
 x86/isa/elcr.c			optional	atpic isa | mptable
 x86/isa/isa.c			standard
 x86/isa/isa_dma.c		standard
 x86/isa/nmi.c			standard
 x86/isa/orm.c			optional	isa
 x86/pci/pci_bus.c		optional	pci
 x86/pci/qpi.c			optional	pci
 x86/x86/busdma_bounce.c		standard
 x86/x86/busdma_machdep.c	standard
 x86/x86/cpu_machdep.c		standard
 x86/x86/dump_machdep.c		standard
 x86/x86/fdt_machdep.c		optional	fdt
 x86/x86/identcpu.c		standard
 x86/x86/intr_machdep.c		standard
 x86/x86/io_apic.c		standard
 x86/x86/legacy.c		standard
 x86/x86/local_apic.c		standard
 x86/x86/mca.c			standard
 x86/x86/mptable.c		optional	mptable
 x86/x86/mptable_pci.c		optional	mptable pci
 x86/x86/mp_x86.c		optional	smp
 x86/x86/msi.c			optional	pci
 x86/x86/nexus.c			standard
 x86/x86/pvclock.c		standard
 x86/x86/tsc.c			standard
 x86/x86/delay.c			standard
 x86/xen/hvm.c			optional	xenhvm
-x86/xen/xen_intr.c		optional	xen | xenhvm
+x86/xen/xen_intr.c		optional	xenhvm
 x86/xen/pv.c			optional	xenhvm
 x86/xen/pvcpu_enum.c		optional	xenhvm
 x86/xen/xen_apic.c		optional	xenhvm
 x86/xen/xenpv.c			optional	xenhvm
 x86/xen/xen_nexus.c		optional	xenhvm
 x86/xen/xen_msi.c		optional	xenhvm
 x86/xen/xen_pci_bus.c		optional	xenhvm
Index: head/sys/conf/files.i386
===================================================================
--- head/sys/conf/files.i386	(revision 282273)
+++ head/sys/conf/files.i386	(revision 282274)
@@ -1,602 +1,595 @@
 # This file tells config what files go into building a kernel,
 # files marked standard are always included.
 #
 # $FreeBSD$
 #
 # The long compile-with and dependency lines are required because of
 # limitations in config: backslash-newline doesn't work in strings, and
 # dependency lines other than the first are silently ignored.
 #
 linux_genassym.o		optional	compat_linux		\
 	dependency 	"$S/i386/linux/linux_genassym.c"		\
 	compile-with	"${CC} ${CFLAGS:N-fno-common} -c ${.IMPSRC}"	\
 	no-obj no-implicit-rule						\
 	clean		"linux_genassym.o"
 #
 linux_assym.h			optional	compat_linux		\
 	dependency 	"$S/kern/genassym.sh linux_genassym.o"		\
 	compile-with	"sh $S/kern/genassym.sh linux_genassym.o > ${.TARGET}" \
 	no-obj no-implicit-rule before-depend				\
 	clean		"linux_assym.h"
 #
 svr4_genassym.o			optional	compat_svr4		\
 	dependency 	"$S/i386/svr4/svr4_genassym.c"			\
 	compile-with	"${CC} ${CFLAGS:N-fno-common} -c ${.IMPSRC}"	\
 	no-obj no-implicit-rule						\
 	clean		"svr4_genassym.o"
 #
 svr4_assym.h			optional	compat_svr4		\
 	dependency 	"$S/kern/genassym.sh svr4_genassym.o"	   	\
 	compile-with	"sh $S/kern/genassym.sh svr4_genassym.o > ${.TARGET}" \
 	no-obj no-implicit-rule before-depend				\
 	clean		"svr4_assym.h"
 #
 font.h				optional	sc_dflt_font		\
 	compile-with	"uudecode < /usr/share/syscons/fonts/${SC_DFLT_FONT}-8x16.fnt && file2c 'static u_char dflt_font_16[16*256] = {' '};' < ${SC_DFLT_FONT}-8x16 > font.h && uudecode < /usr/share/syscons/fonts/${SC_DFLT_FONT}-8x14.fnt && file2c 'static u_char dflt_font_14[14*256] = {' '};' < ${SC_DFLT_FONT}-8x14 >> font.h && uudecode < /usr/share/syscons/fonts/${SC_DFLT_FONT}-8x8.fnt && file2c 'static u_char dflt_font_8[8*256] = {' '};' < ${SC_DFLT_FONT}-8x8 >> font.h"										\
 	no-obj no-implicit-rule before-depend				\
 	clean		"font.h ${SC_DFLT_FONT}-8x14 ${SC_DFLT_FONT}-8x16 ${SC_DFLT_FONT}-8x8"
 #
 atkbdmap.h			optional	atkbd_dflt_keymap	\
 	compile-with	"/usr/sbin/kbdcontrol -L ${ATKBD_DFLT_KEYMAP} | sed -e 's/^static keymap_t.* = /static keymap_t key_map = /' -e 's/^static accentmap_t.* = /static accentmap_t accent_map = /' > atkbdmap.h"			\
 	no-obj no-implicit-rule before-depend				\
 	clean		"atkbdmap.h"
 #
 ukbdmap.h			optional	ukbd_dflt_keymap	\
 	compile-with	"/usr/sbin/kbdcontrol -L ${UKBD_DFLT_KEYMAP} | sed -e 's/^static keymap_t.* = /static keymap_t key_map = /' -e 's/^static accentmap_t.* = /static accentmap_t accent_map = /' > ukbdmap.h"			\
 	no-obj no-implicit-rule before-depend				\
 	clean		"ukbdmap.h"
 #
 hpt27xx_lib.o			optional	hpt27xx			\
 	dependency	"$S/dev/hpt27xx/i386-elf.hpt27xx_lib.o.uu"	\
 	compile-with	"uudecode < $S/dev/hpt27xx/i386-elf.hpt27xx_lib.o.uu" \
 	no-implicit-rule
 #
 hptmvraid.o			optional	hptmv			\
 	dependency	"$S/dev/hptmv/i386-elf.raid.o.uu"		\
 	compile-with	"uudecode < $S/dev/hptmv/i386-elf.raid.o.uu"	\
 	no-implicit-rule
 #
 hptnr_lib.o			optional	hptnr			\
 	dependency	"$S/dev/hptnr/i386-elf.hptnr_lib.o.uu"	\
 	compile-with	"uudecode < $S/dev/hptnr/i386-elf.hptnr_lib.o.uu" \
 	no-implicit-rule
 #
 hptrr_lib.o			optional	hptrr			\
 	dependency	"$S/dev/hptrr/i386-elf.hptrr_lib.o.uu"		\
 	compile-with	"uudecode < $S/dev/hptrr/i386-elf.hptrr_lib.o.uu" \
 	no-implicit-rule
 #
 cddl/contrib/opensolaris/common/atomic/i386/opensolaris_atomic.S	optional zfs compile-with "${ZFS_S}"
 compat/linprocfs/linprocfs.c	optional linprocfs
 compat/linsysfs/linsysfs.c	optional linsysfs
 compat/linux/linux_emul.c	optional compat_linux
 compat/linux/linux_file.c	optional compat_linux
 compat/linux/linux_fork.c	optional compat_linux
 compat/linux/linux_futex.c	optional compat_linux
 compat/linux/linux_getcwd.c	optional compat_linux
 compat/linux/linux_ioctl.c	optional compat_linux
 compat/linux/linux_ipc.c	optional compat_linux
 compat/linux/linux_mib.c	optional compat_linux
 compat/linux/linux_misc.c	optional compat_linux
 compat/linux/linux_signal.c	optional compat_linux
 compat/linux/linux_socket.c	optional compat_linux
 compat/linux/linux_stats.c	optional compat_linux
 compat/linux/linux_sysctl.c	optional compat_linux
 compat/linux/linux_time.c	optional compat_linux
 compat/linux/linux_timer.c	optional compat_linux
 compat/linux/linux_uid16.c	optional compat_linux
 compat/linux/linux_util.c	optional compat_linux
 compat/ndis/kern_ndis.c		optional ndisapi pci
 compat/ndis/kern_windrv.c	optional ndisapi pci
 compat/ndis/subr_hal.c		optional ndisapi pci
 compat/ndis/subr_ndis.c		optional ndisapi pci
 compat/ndis/subr_ntoskrnl.c	optional ndisapi pci
 compat/ndis/subr_pe.c		optional ndisapi pci
 compat/ndis/subr_usbd.c		optional ndisapi pci
 compat/ndis/winx32_wrap.S	optional ndisapi pci
 compat/svr4/imgact_svr4.c	optional compat_svr4
 compat/svr4/svr4_fcntl.c	optional compat_svr4
 compat/svr4/svr4_filio.c	optional compat_svr4
 compat/svr4/svr4_ioctl.c	optional compat_svr4
 compat/svr4/svr4_ipc.c		optional compat_svr4
 compat/svr4/svr4_misc.c		optional compat_svr4
 compat/svr4/svr4_resource.c	optional compat_svr4
 compat/svr4/svr4_signal.c	optional compat_svr4
 compat/svr4/svr4_socket.c	optional compat_svr4
 compat/svr4/svr4_sockio.c	optional compat_svr4
 compat/svr4/svr4_stat.c		optional compat_svr4
 compat/svr4/svr4_stream.c	optional compat_svr4
 compat/svr4/svr4_syscallnames.c	optional compat_svr4
 compat/svr4/svr4_sysent.c	optional compat_svr4
 compat/svr4/svr4_sysvec.c	optional compat_svr4
 compat/svr4/svr4_termios.c	optional compat_svr4
 bf_enc.o			optional crypto | ipsec	\
 	dependency	"$S/crypto/blowfish/arch/i386/bf_enc.S $S/crypto/blowfish/arch/i386/bf_enc_586.S $S/crypto/blowfish/arch/i386/bf_enc_686.S" \
 	compile-with	"${CC} -c -I$S/crypto/blowfish/arch/i386 ${ASM_CFLAGS} ${WERROR} ${.IMPSRC}" \
 	no-implicit-rule
 crypto/aesni/aeskeys_i386.S	optional aesni
 crypto/aesni/aesni.c		optional aesni
 aesni_ghash.o			optional aesni				\
 	dependency	"$S/crypto/aesni/aesni_ghash.c"			\
 	compile-with	"${CC} -c ${CFLAGS:C/^-O2$/-O3/:N-nostdinc} ${WERROR} ${NO_WCAST_QUAL} ${PROF} -mmmx -msse -msse4 -maes -mpclmul ${.IMPSRC}" \
 	no-implicit-rule						\
 	clean		"aesni_ghash.o"
 aesni_wrap.o			optional aesni				\
 	dependency	"$S/crypto/aesni/aesni_wrap.c"			\
 	compile-with	"${CC} -c ${CFLAGS:C/^-O2$/-O3/:N-nostdinc} ${WERROR} ${NO_WCAST_QUAL} ${PROF} -mmmx -msse -msse4 -maes ${.IMPSRC}" \
 	no-implicit-rule						\
 	clean		"aesni_wrap.o"
 crypto/des/arch/i386/des_enc.S	optional crypto | ipsec | netsmb
 crypto/via/padlock.c		optional padlock
 crypto/via/padlock_cipher.c	optional padlock
 crypto/via/padlock_hash.c	optional padlock
 dev/advansys/adv_isa.c		optional adv isa
 dev/agp/agp_ali.c		optional agp
 dev/agp/agp_amd.c		optional agp
 dev/agp/agp_amd64.c		optional agp
 dev/agp/agp_ati.c		optional agp
 dev/agp/agp_i810.c		optional agp
 dev/agp/agp_intel.c		optional agp
 dev/agp/agp_nvidia.c		optional agp
 dev/agp/agp_sis.c		optional agp
 dev/agp/agp_via.c		optional agp
 dev/aic/aic_isa.c		optional aic isa
 dev/amdsbwd/amdsbwd.c		optional amdsbwd
 dev/amdtemp/amdtemp.c		optional amdtemp
 dev/arcmsr/arcmsr.c		optional arcmsr pci
 dev/asmc/asmc.c			optional asmc isa
 dev/atkbdc/atkbd.c		optional atkbd atkbdc
 dev/atkbdc/atkbd_atkbdc.c	optional atkbd atkbdc
 dev/atkbdc/atkbdc.c		optional atkbdc
 dev/atkbdc/atkbdc_isa.c		optional atkbdc isa
 dev/atkbdc/atkbdc_subr.c	optional atkbdc
 dev/atkbdc/psm.c		optional psm atkbdc
 dev/bxe/bxe.c                   optional bxe pci
 dev/bxe/bxe_stats.c             optional bxe pci
 dev/bxe/bxe_debug.c             optional bxe pci
 dev/bxe/ecore_sp.c              optional bxe pci
 dev/bxe/bxe_elink.c             optional bxe pci
 dev/bxe/57710_init_values.c     optional bxe pci
 dev/bxe/57711_init_values.c     optional bxe pci
 dev/bxe/57712_init_values.c     optional bxe pci
 dev/ce/ceddk.c			optional ce
 dev/ce/if_ce.c			optional ce
 dev/ce/tau32-ddk.c		optional ce \
 	compile-with "${NORMAL_C} ${NO_WCONSTANT_CONVERSION}"
 dev/cm/if_cm_isa.c		optional cm isa
 dev/coretemp/coretemp.c		optional coretemp
 dev/cp/cpddk.c			optional cp
 dev/cp/if_cp.c			optional cp
 dev/cpuctl/cpuctl.c		optional cpuctl
 dev/ctau/ctau.c			optional ctau
 dev/ctau/ctddk.c		optional ctau
 dev/ctau/if_ct.c		optional ctau
 dev/cx/csigma.c			optional cx
 dev/cx/cxddk.c			optional cx
 dev/cx/if_cx.c			optional cx
 dev/dpms/dpms.c			optional dpms
 dev/ed/if_ed_3c503.c		optional ed isa ed_3c503
 dev/ed/if_ed_isa.c		optional ed isa
 dev/ed/if_ed_wd80x3.c		optional ed isa
 dev/ed/if_ed_hpp.c		optional ed isa ed_hpp
 dev/ed/if_ed_sic.c		optional ed isa ed_sic
 dev/fb/fb.c			optional fb | vga
 dev/fb/s3_pci.c			optional s3pci
 dev/fb/vesa.c			optional vga vesa
 dev/fb/vga.c			optional vga
 dev/fdc/fdc.c			optional fdc
 dev/fdc/fdc_acpi.c		optional fdc
 dev/fdc/fdc_isa.c		optional fdc isa
 dev/fdc/fdc_pccard.c		optional fdc pccard
 dev/fdt/fdt_x86.c		optional fdt
 dev/fe/if_fe_isa.c		optional fe isa
 dev/glxiic/glxiic.c		optional glxiic
 dev/glxsb/glxsb.c		optional glxsb
 dev/glxsb/glxsb_hash.c		optional glxsb
 dev/hpt27xx/hpt27xx_os_bsd.c	optional hpt27xx
 dev/hpt27xx/hpt27xx_osm_bsd.c	optional hpt27xx
 dev/hpt27xx/hpt27xx_config.c	optional hpt27xx
 dev/hptmv/entry.c		optional hptmv
 dev/hptmv/mv.c			optional hptmv
 dev/hptmv/gui_lib.c		optional hptmv
 dev/hptmv/hptproc.c		optional hptmv
 dev/hptmv/ioctl.c		optional hptmv
 dev/hptnr/hptnr_os_bsd.c	optional hptnr
 dev/hptnr/hptnr_osm_bsd.c	optional hptnr
 dev/hptnr/hptnr_config.c	optional hptnr
 dev/hptrr/hptrr_os_bsd.c	optional hptrr
 dev/hptrr/hptrr_osm_bsd.c	optional hptrr
 dev/hptrr/hptrr_config.c	optional hptrr
 dev/hwpmc/hwpmc_amd.c		optional hwpmc
 dev/hwpmc/hwpmc_intel.c		optional hwpmc
 dev/hwpmc/hwpmc_core.c		optional hwpmc
 dev/hwpmc/hwpmc_uncore.c	optional hwpmc
 dev/hwpmc/hwpmc_pentium.c	optional hwpmc
 dev/hwpmc/hwpmc_piv.c		optional hwpmc
 dev/hwpmc/hwpmc_ppro.c		optional hwpmc
 dev/hwpmc/hwpmc_tsc.c		optional hwpmc
 dev/hwpmc/hwpmc_x86.c		optional hwpmc
 dev/hyperv/netvsc/hv_net_vsc.c				optional	hyperv
 dev/hyperv/netvsc/hv_netvsc_drv_freebsd.c		optional	hyperv
 dev/hyperv/netvsc/hv_rndis_filter.c			optional	hyperv
 dev/hyperv/stordisengage/hv_ata_pci_disengage.c		optional	hyperv
 dev/hyperv/storvsc/hv_storvsc_drv_freebsd.c		optional	hyperv
 dev/hyperv/utilities/hv_kvp.c				optional	hyperv
 dev/hyperv/utilities/hv_util.c				optional	hyperv
 dev/hyperv/vmbus/hv_channel.c				optional	hyperv
 dev/hyperv/vmbus/hv_channel_mgmt.c			optional	hyperv
 dev/hyperv/vmbus/hv_connection.c			optional	hyperv
 dev/hyperv/vmbus/hv_hv.c				optional	hyperv
 dev/hyperv/vmbus/hv_ring_buffer.c			optional	hyperv
 dev/hyperv/vmbus/hv_vmbus_drv_freebsd.c			optional	hyperv
 dev/ichwd/ichwd.c		optional ichwd
 dev/if_ndis/if_ndis.c		optional ndis
 dev/if_ndis/if_ndis_pccard.c	optional ndis pccard
 dev/if_ndis/if_ndis_pci.c	optional ndis cardbus | ndis pci
 dev/if_ndis/if_ndis_usb.c	optional ndis usb
 dev/io/iodev.c			optional io
 dev/ipmi/ipmi.c			optional ipmi
 dev/ipmi/ipmi_acpi.c		optional ipmi acpi
 dev/ipmi/ipmi_isa.c		optional ipmi isa
 dev/ipmi/ipmi_kcs.c		optional ipmi
 dev/ipmi/ipmi_smic.c		optional ipmi
 dev/ipmi/ipmi_smbus.c		optional ipmi smbus
 dev/ipmi/ipmi_smbios.c		optional ipmi
 dev/ipmi/ipmi_ssif.c		optional ipmi smbus
 dev/ipmi/ipmi_pci.c		optional ipmi pci
 dev/ipmi/ipmi_linux.c		optional ipmi compat_linux
 dev/kbd/kbd.c			optional atkbd | sc | ukbd | vt
 dev/le/if_le_isa.c		optional le isa
 dev/mse/mse.c			optional mse
 dev/mse/mse_isa.c		optional mse isa
 dev/nfe/if_nfe.c		optional nfe pci
 dev/nvd/nvd.c			optional nvd nvme
 dev/nvme/nvme.c			optional nvme
 dev/nvme/nvme_ctrlr.c		optional nvme
 dev/nvme/nvme_ctrlr_cmd.c	optional nvme
 dev/nvme/nvme_ns.c		optional nvme
 dev/nvme/nvme_ns_cmd.c		optional nvme
 dev/nvme/nvme_qpair.c		optional nvme
 dev/nvme/nvme_sysctl.c		optional nvme
 dev/nvme/nvme_test.c		optional nvme
 dev/nvme/nvme_util.c		optional nvme
 dev/nvram/nvram.c		optional nvram isa
 dev/pcf/pcf_isa.c		optional pcf
 dev/random/ivy.c		optional rdrand_rng
 dev/random/nehemiah.c		optional padlock_rng
 dev/sbni/if_sbni.c		optional sbni
 dev/sbni/if_sbni_isa.c		optional sbni isa
 dev/sbni/if_sbni_pci.c		optional sbni pci
 dev/sio/sio.c			optional sio
 dev/sio/sio_isa.c		optional sio isa
 dev/sio/sio_pccard.c		optional sio pccard
 dev/sio/sio_pci.c		optional sio pci
 dev/sio/sio_puc.c		optional sio puc
 dev/speaker/spkr.c		optional speaker
 dev/syscons/apm/apm_saver.c	optional apm_saver apm
 dev/syscons/scterm-teken.c	optional sc
 dev/syscons/scvesactl.c		optional sc vga vesa
 dev/syscons/scvgarndr.c		optional sc vga
 dev/syscons/scvtb.c		optional sc
 dev/tpm/tpm.c			optional tpm
 dev/tpm/tpm_acpi.c		optional tpm acpi
 dev/tpm/tpm_isa.c		optional tpm isa
 dev/uart/uart_cpu_x86.c		optional uart
 dev/viawd/viawd.c		optional viawd
 dev/vmware/vmxnet3/if_vmx.c	optional vmx
 dev/acpica/acpi_if.m		standard
 dev/acpica/acpi_hpet.c		optional acpi
 dev/acpi_support/acpi_wmi_if.m	standard
 dev/wbwd/wbwd.c			optional wbwd
 dev/wpi/if_wpi.c		optional wpi
 dev/isci/isci.c							optional isci
 dev/isci/isci_controller.c					optional isci
 dev/isci/isci_domain.c						optional isci
 dev/isci/isci_interrupt.c					optional isci
 dev/isci/isci_io_request.c					optional isci
 dev/isci/isci_logger.c						optional isci
 dev/isci/isci_oem_parameters.c					optional isci
 dev/isci/isci_remote_device.c					optional isci
 dev/isci/isci_sysctl.c						optional isci
 dev/isci/isci_task_request.c					optional isci
 dev/isci/isci_timer.c						optional isci
 dev/isci/scil/sati.c						optional isci
 dev/isci/scil/sati_abort_task_set.c				optional isci
 dev/isci/scil/sati_atapi.c					optional isci
 dev/isci/scil/sati_device.c					optional isci
 dev/isci/scil/sati_inquiry.c					optional isci
 dev/isci/scil/sati_log_sense.c					optional isci
 dev/isci/scil/sati_lun_reset.c					optional isci
 dev/isci/scil/sati_mode_pages.c					optional isci
 dev/isci/scil/sati_mode_select.c				optional isci
 dev/isci/scil/sati_mode_sense.c					optional isci
 dev/isci/scil/sati_mode_sense_10.c				optional isci
 dev/isci/scil/sati_mode_sense_6.c				optional isci
 dev/isci/scil/sati_move.c					optional isci
 dev/isci/scil/sati_passthrough.c				optional isci
 dev/isci/scil/sati_read.c					optional isci
 dev/isci/scil/sati_read_buffer.c				optional isci
 dev/isci/scil/sati_read_capacity.c				optional isci
 dev/isci/scil/sati_reassign_blocks.c				optional isci
 dev/isci/scil/sati_report_luns.c				optional isci
 dev/isci/scil/sati_request_sense.c				optional isci
 dev/isci/scil/sati_start_stop_unit.c				optional isci
 dev/isci/scil/sati_synchronize_cache.c				optional isci
 dev/isci/scil/sati_test_unit_ready.c				optional isci
 dev/isci/scil/sati_unmap.c					optional isci
 dev/isci/scil/sati_util.c					optional isci
 dev/isci/scil/sati_verify.c					optional isci
 dev/isci/scil/sati_write.c					optional isci
 dev/isci/scil/sati_write_and_verify.c				optional isci
 dev/isci/scil/sati_write_buffer.c				optional isci
 dev/isci/scil/sati_write_long.c					optional isci
 dev/isci/scil/sci_abstract_list.c				optional isci
 dev/isci/scil/sci_base_controller.c				optional isci
 dev/isci/scil/sci_base_domain.c					optional isci
 dev/isci/scil/sci_base_iterator.c				optional isci
 dev/isci/scil/sci_base_library.c				optional isci
 dev/isci/scil/sci_base_logger.c					optional isci
 dev/isci/scil/sci_base_memory_descriptor_list.c			optional isci
 dev/isci/scil/sci_base_memory_descriptor_list_decorator.c	optional isci
 dev/isci/scil/sci_base_object.c					optional isci
 dev/isci/scil/sci_base_observer.c				optional isci
 dev/isci/scil/sci_base_phy.c					optional isci
 dev/isci/scil/sci_base_port.c					optional isci
 dev/isci/scil/sci_base_remote_device.c				optional isci
 dev/isci/scil/sci_base_request.c				optional isci
 dev/isci/scil/sci_base_state_machine.c				optional isci
 dev/isci/scil/sci_base_state_machine_logger.c			optional isci
 dev/isci/scil/sci_base_state_machine_observer.c			optional isci
 dev/isci/scil/sci_base_subject.c				optional isci
 dev/isci/scil/sci_util.c					optional isci
 dev/isci/scil/scic_sds_controller.c				optional isci
 dev/isci/scil/scic_sds_library.c				optional isci
 dev/isci/scil/scic_sds_pci.c					optional isci
 dev/isci/scil/scic_sds_phy.c					optional isci
 dev/isci/scil/scic_sds_port.c					optional isci
 dev/isci/scil/scic_sds_port_configuration_agent.c		optional isci
 dev/isci/scil/scic_sds_remote_device.c				optional isci
 dev/isci/scil/scic_sds_remote_node_context.c			optional isci
 dev/isci/scil/scic_sds_remote_node_table.c			optional isci
 dev/isci/scil/scic_sds_request.c				optional isci
 dev/isci/scil/scic_sds_sgpio.c					optional isci
 dev/isci/scil/scic_sds_smp_remote_device.c			optional isci
 dev/isci/scil/scic_sds_smp_request.c				optional isci
 dev/isci/scil/scic_sds_ssp_request.c				optional isci
 dev/isci/scil/scic_sds_stp_packet_request.c			optional isci
 dev/isci/scil/scic_sds_stp_remote_device.c			optional isci
 dev/isci/scil/scic_sds_stp_request.c				optional isci
 dev/isci/scil/scic_sds_unsolicited_frame_control.c		optional isci
 dev/isci/scil/scif_sas_controller.c				optional isci
 dev/isci/scil/scif_sas_controller_state_handlers.c		optional isci
 dev/isci/scil/scif_sas_controller_states.c			optional isci
 dev/isci/scil/scif_sas_domain.c					optional isci
 dev/isci/scil/scif_sas_domain_state_handlers.c			optional isci
 dev/isci/scil/scif_sas_domain_states.c				optional isci
 dev/isci/scil/scif_sas_high_priority_request_queue.c		optional isci
 dev/isci/scil/scif_sas_internal_io_request.c			optional isci
 dev/isci/scil/scif_sas_io_request.c				optional isci
 dev/isci/scil/scif_sas_io_request_state_handlers.c		optional isci
 dev/isci/scil/scif_sas_io_request_states.c			optional isci
 dev/isci/scil/scif_sas_library.c				optional isci
 dev/isci/scil/scif_sas_remote_device.c				optional isci
 dev/isci/scil/scif_sas_remote_device_ready_substate_handlers.c	optional isci
 dev/isci/scil/scif_sas_remote_device_ready_substates.c		optional isci
 dev/isci/scil/scif_sas_remote_device_starting_substate_handlers.c		optional isci
 dev/isci/scil/scif_sas_remote_device_starting_substates.c	optional isci
 dev/isci/scil/scif_sas_remote_device_state_handlers.c		optional isci
 dev/isci/scil/scif_sas_remote_device_states.c			optional isci
 dev/isci/scil/scif_sas_request.c				optional isci
 dev/isci/scil/scif_sas_smp_activity_clear_affiliation.c		optional isci
 dev/isci/scil/scif_sas_smp_io_request.c				optional isci
 dev/isci/scil/scif_sas_smp_phy.c				optional isci
 dev/isci/scil/scif_sas_smp_remote_device.c			optional isci
 dev/isci/scil/scif_sas_stp_io_request.c				optional isci
 dev/isci/scil/scif_sas_stp_remote_device.c			optional isci
 dev/isci/scil/scif_sas_stp_task_request.c			optional isci
 dev/isci/scil/scif_sas_task_request.c				optional isci
 dev/isci/scil/scif_sas_task_request_state_handlers.c		optional isci
 dev/isci/scil/scif_sas_task_request_states.c			optional isci
 dev/isci/scil/scif_sas_timer.c					optional isci
 i386/acpica/acpi_machdep.c	optional acpi
 acpi_wakecode.o			optional acpi				\
 	dependency	"$S/i386/acpica/acpi_wakecode.S assym.s"	\
 	compile-with	"${NORMAL_S}"					\
 	no-obj no-implicit-rule before-depend				\
 	clean		"acpi_wakecode.o"
 acpi_wakecode.bin		optional acpi				\
 	dependency	"acpi_wakecode.o"				\
 	compile-with	"${OBJCOPY} -S -O binary acpi_wakecode.o ${.TARGET}" \
 	no-obj no-implicit-rule	before-depend				\
 	clean		"acpi_wakecode.bin"
 acpi_wakecode.h			optional acpi				\
 	dependency	"acpi_wakecode.bin"				\
 	compile-with	"file2c -sx 'static char wakecode[] = {' '};' < acpi_wakecode.bin > ${.TARGET}" \
 	no-obj no-implicit-rule	before-depend				\
 	clean		"acpi_wakecode.h"
 acpi_wakedata.h			optional acpi				\
 	dependency	"acpi_wakecode.o"				\
 	compile-with	'${NM} -n --defined-only acpi_wakecode.o | while read offset dummy what; do echo "#define	$${what}	0x$${offset}"; done > ${.TARGET}' \
 	no-obj no-implicit-rule	before-depend				\
 	clean		"acpi_wakedata.h"
 #
 i386/bios/apm.c			optional apm
 i386/bios/mca_machdep.c		optional mca
 i386/bios/smapi.c		optional smapi
 i386/bios/smapi_bios.S		optional smapi
 #i386/i386/apic_vector.s		optional apic
 i386/i386/atomic.c		standard		\
 	compile-with	"${CC} -c ${CFLAGS} ${DEFINED_PROF:S/^$/-fomit-frame-pointer/} ${.IMPSRC}"
 i386/i386/autoconf.c		standard
-i386/i386/bios.c		optional native
-i386/i386/bioscall.s		optional native
+i386/i386/bios.c		standard
+i386/i386/bioscall.s		standard
 i386/i386/bpf_jit_machdep.c	optional bpf_jitter
 i386/i386/db_disasm.c		optional ddb
 i386/i386/db_interface.c	optional ddb
 i386/i386/db_trace.c		optional ddb
 i386/i386/elan-mmcr.c		optional cpu_elan | cpu_soekris
 i386/i386/elf_machdep.c		standard
-i386/i386/exception.s		optional native
-i386/xen/exception.s		optional xen
+i386/i386/exception.s		standard
 i386/i386/gdb_machdep.c		optional gdb
 i386/i386/geode.c		optional cpu_geode
 i386/i386/i686_mem.c		optional mem
 i386/i386/in_cksum.c		optional inet | inet6
 i386/i386/initcpu.c		standard
 i386/i386/io.c			optional io
 i386/i386/k6_mem.c		optional mem
-i386/i386/locore.s		optional native	no-obj
-i386/xen/locore.s		optional xen	no-obj
+i386/i386/locore.s		standard	no-obj
 i386/i386/longrun.c		optional cpu_enable_longrun
 i386/i386/machdep.c		standard
-i386/xen/xen_machdep.c		optional xen
 i386/i386/mem.c			optional mem
 i386/i386/minidump_machdep.c	standard
 i386/i386/mp_clock.c		optional smp
-i386/i386/mp_machdep.c		optional native smp
-i386/xen/mp_machdep.c		optional xen smp
+i386/i386/mp_machdep.c		optional smp
 i386/i386/mp_watchdog.c		optional mp_watchdog smp
-i386/i386/mpboot.s		optional smp native
-i386/xen/mptable.c		optional apic xen
+i386/i386/mpboot.s		optional smp
 i386/i386/perfmon.c		optional perfmon
-i386/i386/pmap.c		optional native
-i386/xen/pmap.c			optional xen
+i386/i386/pmap.c		standard
 i386/i386/ptrace_machdep.c	standard
 i386/i386/stack_machdep.c	optional ddb | stack
 i386/i386/support.s		standard
 i386/i386/swtch.s		standard
 i386/i386/sys_machdep.c		standard
 i386/i386/trap.c		standard
 i386/i386/uio_machdep.c		standard
 i386/i386/vm86.c		standard
 i386/i386/vm_machdep.c		standard
 i386/ibcs2/ibcs2_errno.c	optional ibcs2
 i386/ibcs2/ibcs2_fcntl.c	optional ibcs2
 i386/ibcs2/ibcs2_ioctl.c	optional ibcs2
 i386/ibcs2/ibcs2_ipc.c		optional ibcs2
 i386/ibcs2/ibcs2_isc.c		optional ibcs2
 i386/ibcs2/ibcs2_isc_sysent.c	optional ibcs2
 i386/ibcs2/ibcs2_misc.c		optional ibcs2
 i386/ibcs2/ibcs2_msg.c		optional ibcs2
 i386/ibcs2/ibcs2_other.c	optional ibcs2
 i386/ibcs2/ibcs2_signal.c	optional ibcs2
 i386/ibcs2/ibcs2_socksys.c	optional ibcs2
 i386/ibcs2/ibcs2_stat.c		optional ibcs2
 i386/ibcs2/ibcs2_sysent.c	optional ibcs2
 i386/ibcs2/ibcs2_sysi86.c	optional ibcs2
 i386/ibcs2/ibcs2_sysvec.c	optional ibcs2
 i386/ibcs2/ibcs2_util.c		optional ibcs2
 i386/ibcs2/ibcs2_xenix.c	optional ibcs2
 i386/ibcs2/ibcs2_xenix_sysent.c	optional ibcs2
 i386/ibcs2/imgact_coff.c	optional ibcs2
-i386/xen/clock.c		optional xen
 i386/isa/elink.c		optional ep | ie
 i386/isa/npx.c			optional npx
 i386/isa/pmtimer.c		optional pmtimer
 i386/isa/prof_machdep.c		optional profiling-routine
 i386/isa/spic.c			optional spic
 i386/linux/imgact_linux.c	optional compat_linux
 i386/linux/linux_dummy.c	optional compat_linux
 i386/linux/linux_locore.s	optional compat_linux	\
 	dependency 	"linux_assym.h"
 i386/linux/linux_machdep.c	optional compat_linux
 i386/linux/linux_ptrace.c	optional compat_linux
 i386/linux/linux_support.s	optional compat_linux	\
 	dependency 	"linux_assym.h"
 i386/linux/linux_sysent.c	optional compat_linux
 i386/linux/linux_sysvec.c	optional compat_linux
 i386/pci/pci_cfgreg.c		optional pci
 i386/pci/pci_pir.c		optional pci
 i386/svr4/svr4_locore.s		optional compat_svr4	\
 	dependency	"svr4_assym.h"	\
 	warning "COMPAT_SVR4 is broken and should be avoided"
 i386/svr4/svr4_machdep.c	optional compat_svr4
 #
 isa/syscons_isa.c		optional sc
 isa/vga_isa.c			optional vga
 kern/kern_clocksource.c		standard
 kern/imgact_aout.c		optional compat_aout
 kern/imgact_gzip.c		optional gzip
 kern/subr_sfbuf.c		standard
 libkern/divdi3.c		standard
 libkern/flsll.c			standard
 libkern/memmove.c		standard
 libkern/memset.c		standard
 libkern/moddi3.c		standard
 libkern/qdivrem.c		standard
 libkern/ucmpdi2.c		standard
 libkern/udivdi3.c		standard
 libkern/umoddi3.c		standard
 i386/xbox/xbox.c		optional xbox
 i386/xbox/xboxfb.c		optional xboxfb
 dev/fb/boot_font.c		optional xboxfb
 i386/xbox/pic16l.s		optional xbox
 #
 # x86 real mode BIOS support, required by atkbdc/dpms/pci/vesa
 #
 compat/x86bios/x86bios.c	optional x86bios | atkbd | dpms | pci | vesa
 #
 # bvm console
 #
 dev/bvm/bvm_console.c		optional	bvmconsole
 dev/bvm/bvm_dbg.c		optional	bvmdebug
 #
 # x86 shared code between IA32, AMD64 and PC98 architectures
 #
 x86/acpica/OsdEnvironment.c	optional acpi
 x86/acpica/acpi_apm.c		optional acpi
 x86/acpica/acpi_wakeup.c	optional acpi
 x86/acpica/madt.c		optional acpi apic
 x86/acpica/srat.c		optional acpi
 x86/bios/smbios.c		optional smbios
 x86/bios/vpd.c			optional vpd
 x86/cpufreq/est.c		optional cpufreq
 x86/cpufreq/hwpstate.c		optional cpufreq
 x86/cpufreq/p4tcc.c		optional cpufreq
 x86/cpufreq/powernow.c		optional cpufreq
 x86/cpufreq/smist.c		optional cpufreq
 x86/iommu/busdma_dmar.c		optional acpi acpi_dmar pci
 x86/iommu/intel_ctx.c		optional acpi acpi_dmar pci
 x86/iommu/intel_drv.c		optional acpi acpi_dmar pci
 x86/iommu/intel_fault.c		optional acpi acpi_dmar pci
 x86/iommu/intel_gas.c		optional acpi acpi_dmar pci
 x86/iommu/intel_idpgtbl.c	optional acpi acpi_dmar pci
 x86/iommu/intel_intrmap.c	optional acpi acpi_dmar pci
 x86/iommu/intel_qi.c		optional acpi acpi_dmar pci
 x86/iommu/intel_quirks.c	optional acpi acpi_dmar pci
 x86/iommu/intel_utils.c		optional acpi acpi_dmar pci
 x86/isa/atpic.c			optional atpic
-x86/isa/atrtc.c			optional native
-x86/isa/clock.c			optional native
-x86/isa/elcr.c			optional atpic | apic native
+x86/isa/atrtc.c			standard
+x86/isa/clock.c			standard
+x86/isa/elcr.c			optional atpic | apic
 x86/isa/isa.c			optional isa
 x86/isa/isa_dma.c		optional isa
 x86/isa/nmi.c			standard
 x86/isa/orm.c			optional isa
 x86/pci/pci_bus.c		optional pci
 x86/pci/qpi.c			optional pci
 x86/x86/busdma_bounce.c		standard
 x86/x86/busdma_machdep.c	standard
 x86/x86/cpu_machdep.c		standard
 x86/x86/dump_machdep.c		standard
 x86/x86/fdt_machdep.c		optional fdt
 x86/x86/identcpu.c		standard
 x86/x86/intr_machdep.c		standard
 x86/x86/io_apic.c		optional apic
-x86/x86/legacy.c		optional native
+x86/x86/legacy.c		standard
 x86/x86/local_apic.c		optional apic
 x86/x86/mca.c			standard
-x86/x86/mptable.c		optional apic native
-x86/x86/mptable_pci.c		optional apic native pci
-x86/x86/mp_x86.c		optional native smp
+x86/x86/mptable.c		optional apic
+x86/x86/mptable_pci.c		optional apic pci
+x86/x86/mp_x86.c		optional smp
 x86/x86/msi.c			optional apic pci
 x86/x86/nexus.c			standard
 x86/x86/tsc.c			standard
 x86/x86/pvclock.c		standard
 x86/x86/delay.c			standard
 x86/xen/hvm.c			optional xenhvm
-x86/xen/xen_intr.c		optional xen | xenhvm
+x86/xen/xen_intr.c		optional xenhvm
 x86/xen/xen_apic.c		optional xenhvm
-x86/xen/xenpv.c			optional xen | xenhvm
-x86/xen/xen_nexus.c		optional xen | xenhvm
-x86/xen/xen_msi.c		optional xen | xenhvm
+x86/xen/xenpv.c			optional xenhvm
+x86/xen/xen_nexus.c		optional xenhvm
+x86/xen/xen_msi.c		optional xenhvm
Index: head/sys/conf/options.i386
===================================================================
--- head/sys/conf/options.i386	(revision 282273)
+++ head/sys/conf/options.i386	(revision 282274)
@@ -1,131 +1,129 @@
 # $FreeBSD$
 # Options specific to the i386 platform kernels
 
 AUTO_EOI_1		opt_auto_eoi.h
 AUTO_EOI_2		opt_auto_eoi.h
 BROKEN_KEYBOARD_RESET	opt_reset.h
 COUNT_XINVLTLB_HITS	opt_smp.h
 COUNT_IPIS		opt_smp.h
 DISABLE_PG_G		opt_pmap.h
 DISABLE_PSE		opt_pmap.h
 I586_PMC_GUPROF		opt_i586_guprof.h
 MAXMEM
 MPTABLE_FORCE_HTT
 MP_WATCHDOG
 NKPT			opt_pmap.h
 PERFMON
 PMAP_SHPGPERPROC	opt_pmap.h
 POWERFAIL_NMI		opt_trap.h
 PV_STATS		opt_pmap.h
 
 # Options for emulators.  These should only be used at config time, so
 # they are handled like options for static filesystems
 # (see src/sys/conf/options), except for broken debugging options.
 COMPAT_AOUT		opt_dontuse.h
 IBCS2			opt_dontuse.h
 COMPAT_LINUX		opt_dontuse.h
 COMPAT_SVR4		opt_dontuse.h
 DEBUG_SVR4		opt_svr4.h
 LINPROCFS		opt_dontuse.h
 LINSYSFS		opt_dontuse.h
 NDISAPI			opt_dontuse.h
 
 # Change KVM size.  Changes things all over the kernel.
 KVA_PAGES		opt_global.h
 
 # Physical address extensions and support for >4G ram.  As above.
 PAE			opt_global.h
 
 # Use PAE page tables, but limit memory support to 4GB.
 # This keeps the i386 non-PAE KBI, in particular, drivers see
 # 32bit vm_paddr_t.
 PAE_TABLES		opt_global.h
 
 TIMER_FREQ			opt_clock.h
 
 CPU_ATHLON_SSE_HACK		opt_cpu.h
 CPU_BLUELIGHTNING_3X		opt_cpu.h
 CPU_BLUELIGHTNING_FPU_OP_CACHE	opt_cpu.h
 CPU_BTB_EN			opt_cpu.h
 CPU_CYRIX_NO_LOCK		opt_cpu.h
 CPU_DIRECT_MAPPED_CACHE		opt_cpu.h
 CPU_DISABLE_5X86_LSSER		opt_cpu.h
 CPU_DISABLE_CMPXCHG		opt_global.h	# XXX global, unlike other CPU_*
 CPU_DISABLE_SSE			opt_cpu.h
 CPU_ELAN			opt_cpu.h
 CPU_ELAN_PPS			opt_cpu.h
 CPU_ELAN_XTAL			opt_cpu.h
 CPU_ENABLE_LONGRUN		opt_cpu.h
 CPU_FASTER_5X86_FPU		opt_cpu.h
 CPU_GEODE			opt_cpu.h
 CPU_I486_ON_386			opt_cpu.h
 CPU_IORT			opt_cpu.h
 CPU_L2_LATENCY			opt_cpu.h
 CPU_LOOP_EN			opt_cpu.h
 CPU_PPRO2CELERON		opt_cpu.h
 CPU_RSTK_EN			opt_cpu.h
 CPU_SOEKRIS			opt_cpu.h
 CPU_SUSP_HLT			opt_cpu.h
 CPU_UPGRADE_HW_CACHE		opt_cpu.h
 CPU_WT_ALLOC			opt_cpu.h
 CYRIX_CACHE_REALLY_WORKS	opt_cpu.h
 CYRIX_CACHE_WORKS		opt_cpu.h
 NO_F00F_HACK			opt_cpu.h
 NO_MEMORY_HOLE			opt_cpu.h
 
 # The CPU type affects the endian conversion functions all over the kernel.
 I486_CPU		opt_global.h
 I586_CPU		opt_global.h
 I686_CPU		opt_global.h
 
 # options for serial support
 COM_ESP			opt_sio.h
 COM_MULTIPORT		opt_sio.h
 CONSPEED		opt_sio.h
 GDBSPEED		opt_sio.h
 COM_NO_ACPI		opt_sio.h
 
 VGA_ALT_SEQACCESS	opt_vga.h
 VGA_DEBUG		opt_vga.h
 VGA_NO_FONT_LOADING	opt_vga.h
 VGA_NO_MODE_CHANGE	opt_vga.h
 VGA_SLOW_IOACCESS	opt_vga.h
 VGA_WIDTH90		opt_vga.h
 
 VESA
 VESA_DEBUG		opt_vesa.h
 
 # AGP debugging support
 AGP_DEBUG		opt_agp.h
 
 PSM_DEBUG		opt_psm.h
 PSM_HOOKRESUME		opt_psm.h
 PSM_RESETAFTERSUSPEND	opt_psm.h
 
 ATKBD_DFLT_KEYMAP	opt_atkbd.h
 
 # Video spigot
 SPIGOT_UNSECURE		opt_spigot.h
 
 # Enables NETGRAPH support for Cronyx adapters
 NETGRAPH_CRONYX		opt_ng_cronyx.h
 
 # Device options
 DEV_APIC		opt_apic.h
 DEV_ATPIC		opt_atpic.h
 DEV_NPX			opt_npx.h
 
 # Debugging
 NPX_DEBUG		opt_npx.h
 
 # BPF just-in-time compiler
 BPF_JITTER		opt_bpf.h
 
-NATIVE			opt_global.h
-XEN			opt_global.h
 XENHVM			opt_global.h
 
 HYPERV			opt_global.h
 
 # options for the Intel C600 SAS driver (isci)
 ISCI_LOGGING	opt_isci.h
Index: head/sys/dev/xen/balloon/balloon.c
===================================================================
--- head/sys/dev/xen/balloon/balloon.c	(revision 282273)
+++ head/sys/dev/xen/balloon/balloon.c	(revision 282274)
@@ -1,453 +1,423 @@
 /******************************************************************************
  * balloon.c
  *
  * Xen balloon driver - enables returning/claiming memory to/from Xen.
  *
  * Copyright (c) 2003, B Dragovic
  * Copyright (c) 2003-2004, M Williamson, K Fraser
  * Copyright (c) 2005 Dan M. Smith, IBM Corporation
  * 
  * This file may be distributed separately from the Linux kernel, or
  * incorporated into other software packages, subject to the following license:
  * 
  * Permission is hereby granted, free of charge, to any person obtaining a copy
  * of this source file (the "Software"), to deal in the Software without
  * restriction, including without limitation the rights to use, copy, modify,
  * merge, publish, distribute, sublicense, and/or sell copies of the Software,
  * and to permit persons to whom the Software is furnished to do so, subject to
  * the following conditions:
  * 
  * The above copyright notice and this permission notice shall be included in
  * all copies or substantial portions of the Software.
  * 
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  * IN THE SOFTWARE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/lock.h>
 #include <sys/kernel.h>
 #include <sys/kthread.h>
 #include <sys/malloc.h>
 #include <sys/mutex.h>
 #include <sys/sysctl.h>
 #include <sys/module.h>
 
 #include <vm/vm.h>
 #include <vm/vm_page.h>
 
 #include <xen/xen-os.h>
 #include <xen/hypervisor.h>
 #include <xen/features.h>
 #include <xen/xenstore/xenstorevar.h>
 
 #include <machine/xen/xenvar.h>
 
 static MALLOC_DEFINE(M_BALLOON, "Balloon", "Xen Balloon Driver");
 
 /* Convert from KB (as fetched from xenstore) to number of PAGES */
 #define KB_TO_PAGE_SHIFT	(PAGE_SHIFT - 10)
 
 struct mtx balloon_mutex;
 
 /* We increase/decrease in batches which fit in a page */
 static unsigned long frame_list[PAGE_SIZE / sizeof(unsigned long)];
 
 struct balloon_stats {
 	/* We aim for 'current allocation' == 'target allocation'. */
 	unsigned long current_pages;
 	unsigned long target_pages;
 	/* We may hit the hard limit in Xen. If we do then we remember it. */
 	unsigned long hard_limit;
 	/*
 	 * Drivers may alter the memory reservation independently, but they
 	 * must inform the balloon driver so we avoid hitting the hard limit.
 	 */
 	unsigned long driver_pages;
 	/* Number of pages in high- and low-memory balloons. */
 	unsigned long balloon_low;
 	unsigned long balloon_high;
 };
 
 static struct balloon_stats balloon_stats;
 #define bs balloon_stats
 
 SYSCTL_DECL(_dev_xen);
 static SYSCTL_NODE(_dev_xen, OID_AUTO, balloon, CTLFLAG_RD, NULL, "Balloon");
 SYSCTL_ULONG(_dev_xen_balloon, OID_AUTO, current, CTLFLAG_RD,
     &bs.current_pages, 0, "Current allocation");
 SYSCTL_ULONG(_dev_xen_balloon, OID_AUTO, target, CTLFLAG_RD,
     &bs.target_pages, 0, "Target allocation");
 SYSCTL_ULONG(_dev_xen_balloon, OID_AUTO, driver_pages, CTLFLAG_RD,
     &bs.driver_pages, 0, "Driver pages");
 SYSCTL_ULONG(_dev_xen_balloon, OID_AUTO, hard_limit, CTLFLAG_RD,
     &bs.hard_limit, 0, "Xen hard limit");
 SYSCTL_ULONG(_dev_xen_balloon, OID_AUTO, low_mem, CTLFLAG_RD,
     &bs.balloon_low, 0, "Low-mem balloon");
 SYSCTL_ULONG(_dev_xen_balloon, OID_AUTO, high_mem, CTLFLAG_RD,
     &bs.balloon_high, 0, "High-mem balloon");
 
 /* List of ballooned pages, threaded through the mem_map array. */
 static TAILQ_HEAD(,vm_page) ballooned_pages;
 
 /* Main work function, always executed in process context. */
 static void balloon_process(void *unused);
 
 #define IPRINTK(fmt, args...) \
 	printk(KERN_INFO "xen_mem: " fmt, ##args)
 #define WPRINTK(fmt, args...) \
 	printk(KERN_WARNING "xen_mem: " fmt, ##args)
 
 static unsigned long 
 current_target(void)
 {
 	unsigned long target = min(bs.target_pages, bs.hard_limit);
 	if (target > (bs.current_pages + bs.balloon_low + bs.balloon_high))
 		target = bs.current_pages + bs.balloon_low + bs.balloon_high;
 	return (target);
 }
 
 static unsigned long
 minimum_target(void)
 {
-#ifdef XENHVM
-#define max_pfn realmem
-#else
-#define max_pfn HYPERVISOR_shared_info->arch.max_pfn
-#endif
 	unsigned long min_pages, curr_pages = current_target();
 
 #define MB2PAGES(mb) ((mb) << (20 - PAGE_SHIFT))
 	/*
 	 * Simple continuous piecewiese linear function:
 	 *  max MiB -> min MiB	gradient
 	 *       0	   0
 	 *      16	  16
 	 *      32	  24
 	 *     128	  72	(1/2)
 	 *     512 	 168	(1/4)
 	 *    2048	 360	(1/8)
 	 *    8192	 552	(1/32)
 	 *   32768	1320
 	 *  131072	4392
 	 */
-	if (max_pfn < MB2PAGES(128))
-		min_pages = MB2PAGES(8) + (max_pfn >> 1);
-	else if (max_pfn < MB2PAGES(512))
-		min_pages = MB2PAGES(40) + (max_pfn >> 2);
-	else if (max_pfn < MB2PAGES(2048))
-		min_pages = MB2PAGES(104) + (max_pfn >> 3);
+	if (realmem < MB2PAGES(128))
+		min_pages = MB2PAGES(8) + (realmem >> 1);
+	else if (realmem < MB2PAGES(512))
+		min_pages = MB2PAGES(40) + (realmem >> 2);
+	else if (realmem < MB2PAGES(2048))
+		min_pages = MB2PAGES(104) + (realmem >> 3);
 	else
-		min_pages = MB2PAGES(296) + (max_pfn >> 5);
+		min_pages = MB2PAGES(296) + (realmem >> 5);
 #undef MB2PAGES
-#undef max_pfn
 
 	/* Don't enforce growth */
 	return (min(min_pages, curr_pages));
 }
 
 static int 
 increase_reservation(unsigned long nr_pages)
 {
 	unsigned long  pfn, i;
 	vm_page_t      page;
 	long           rc;
 	struct xen_memory_reservation reservation = {
 		.address_bits = 0,
 		.extent_order = 0,
 		.domid        = DOMID_SELF
 	};
 
 	mtx_assert(&balloon_mutex, MA_OWNED);
 
 	if (nr_pages > nitems(frame_list))
 		nr_pages = nitems(frame_list);
 
 	for (page = TAILQ_FIRST(&ballooned_pages), i = 0;
 	    i < nr_pages; i++, page = TAILQ_NEXT(page, plinks.q)) {
 		KASSERT(page != NULL, ("ballooned_pages list corrupt"));
 		frame_list[i] = (VM_PAGE_TO_PHYS(page) >> PAGE_SHIFT);
 	}
 
 	set_xen_guest_handle(reservation.extent_start, frame_list);
 	reservation.nr_extents   = nr_pages;
 	rc = HYPERVISOR_memory_op(
 		XENMEM_populate_physmap, &reservation);
 	if (rc < nr_pages) {
 		if (rc > 0) {
 			int ret;
 
 			/* We hit the Xen hard limit: reprobe. */
 			reservation.nr_extents = rc;
 			ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation,
 					&reservation);
 			KASSERT(ret == rc, ("HYPERVISOR_memory_op failed"));
 		}
 		if (rc >= 0)
 			bs.hard_limit = (bs.current_pages + rc -
 					 bs.driver_pages);
 		goto out;
 	}
 
 	for (i = 0; i < nr_pages; i++) {
 		page = TAILQ_FIRST(&ballooned_pages);
 		KASSERT(page != NULL, ("Unable to get ballooned page"));
 		TAILQ_REMOVE(&ballooned_pages, page, plinks.q);
 		bs.balloon_low--;
 
 		pfn = (VM_PAGE_TO_PHYS(page) >> PAGE_SHIFT);
-		KASSERT((xen_feature(XENFEAT_auto_translated_physmap) ||
-			!phys_to_machine_mapping_valid(pfn)),
+		KASSERT(xen_feature(XENFEAT_auto_translated_physmap),
 		    ("auto translated physmap but mapping is valid"));
 
-		set_phys_to_machine(pfn, frame_list[i]);
-
 		vm_page_free(page);
 	}
 
 	bs.current_pages += nr_pages;
 
  out:
 	return (0);
 }
 
 static int
 decrease_reservation(unsigned long nr_pages)
 {
 	unsigned long  pfn, i;
 	vm_page_t      page;
 	int            need_sleep = 0;
 	int ret;
 	struct xen_memory_reservation reservation = {
 		.address_bits = 0,
 		.extent_order = 0,
 		.domid        = DOMID_SELF
 	};
 
 	mtx_assert(&balloon_mutex, MA_OWNED);
 
 	if (nr_pages > nitems(frame_list))
 		nr_pages = nitems(frame_list);
 
 	for (i = 0; i < nr_pages; i++) {
 		if ((page = vm_page_alloc(NULL, 0, 
 			    VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | 
 			    VM_ALLOC_ZERO)) == NULL) {
 			nr_pages = i;
 			need_sleep = 1;
 			break;
 		}
 
 		if ((page->flags & PG_ZERO) == 0) {
 			/*
 			 * Zero the page, or else we might be leaking
 			 * important data to other domains on the same
 			 * host. Xen doesn't scrub ballooned out memory
 			 * pages, the guest is in charge of making
 			 * sure that no information is leaked.
 			 */
 			pmap_zero_page(page);
 		}
 
 		pfn = (VM_PAGE_TO_PHYS(page) >> PAGE_SHIFT);
-		frame_list[i] = PFNTOMFN(pfn);
+		frame_list[i] = pfn;
 
-		set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
 		TAILQ_INSERT_HEAD(&ballooned_pages, page, plinks.q);
 		bs.balloon_low++;
 	}
 
 	set_xen_guest_handle(reservation.extent_start, frame_list);
 	reservation.nr_extents   = nr_pages;
 	ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation);
 	KASSERT(ret == nr_pages, ("HYPERVISOR_memory_op failed"));
 
 	bs.current_pages -= nr_pages;
 
 	return (need_sleep);
 }
 
 /*
  * We avoid multiple worker processes conflicting via the balloon mutex.
  * We may of course race updates of the target counts (which are protected
  * by the balloon lock), or with changes to the Xen hard limit, but we will
  * recover from these in time.
  */
 static void 
 balloon_process(void *unused)
 {
 	int need_sleep = 0;
 	long credit;
 	
 	mtx_lock(&balloon_mutex);
 	for (;;) {
 		int sleep_time;
 
 		do {
 			credit = current_target() - bs.current_pages;
 			if (credit > 0)
 				need_sleep = (increase_reservation(credit) != 0);
 			if (credit < 0)
 				need_sleep = (decrease_reservation(-credit) != 0);
 			
 		} while ((credit != 0) && !need_sleep);
 		
 		/* Schedule more work if there is some still to be done. */
 		if (current_target() != bs.current_pages)
 			sleep_time = hz;
 		else
 			sleep_time = 0;
 
 		msleep(balloon_process, &balloon_mutex, 0, "balloon",
 		       sleep_time);
 	}
 	mtx_unlock(&balloon_mutex);
 }
 
 /* Resets the Xen limit, sets new target, and kicks off processing. */
 static void 
 set_new_target(unsigned long target)
 {
 	/* No need for lock. Not read-modify-write updates. */
 	bs.hard_limit   = ~0UL;
 	bs.target_pages = max(target, minimum_target());
 	wakeup(balloon_process);
 }
 
 static struct xs_watch target_watch =
 {
 	.node = "memory/target"
 };
 
 /* React to a change in the target key */
 static void 
 watch_target(struct xs_watch *watch,
 	     const char **vec, unsigned int len)
 {
 	unsigned long long new_target;
 	int err;
 
 	err = xs_scanf(XST_NIL, "memory", "target", NULL,
 	    "%llu", &new_target);
 	if (err) {
 		/* This is ok (for domain0 at least) - so just return */
 		return;
 	} 
         
 	/*
 	 * The given memory/target value is in KiB, so it needs converting to
 	 * pages.  PAGE_SHIFT converts bytes to pages, hence PAGE_SHIFT - 10.
 	 */
 	set_new_target(new_target >> KB_TO_PAGE_SHIFT);
 }
 
 /*------------------ Private Device Attachment Functions  --------------------*/
 /**
  * \brief Identify instances of this device type in the system.
  *
  * \param driver  The driver performing this identify action.
  * \param parent  The NewBus parent device for any devices this method adds.
  */
 static void
 xenballoon_identify(driver_t *driver __unused, device_t parent)
 {
 	/*
 	 * A single device instance for our driver is always present
 	 * in a system operating under Xen.
 	 */
 	BUS_ADD_CHILD(parent, 0, driver->name, 0);
 }
 
 /**
  * \brief Probe for the existance of the Xen Balloon device
  *
  * \param dev  NewBus device_t for this Xen control instance.
  *
  * \return  Always returns 0 indicating success.
  */
 static int 
 xenballoon_probe(device_t dev)
 {
 
 	device_set_desc(dev, "Xen Balloon Device");
 	return (0);
 }
 
 /**
  * \brief Attach the Xen Balloon device.
  *
  * \param dev  NewBus device_t for this Xen control instance.
  *
  * \return  On success, 0. Otherwise an errno value indicating the
  *          type of failure.
  */
 static int
 xenballoon_attach(device_t dev)
 {
 	int err;
-#ifndef XENHVM
-	vm_page_t page;
-	unsigned long pfn;
 
-#define max_pfn HYPERVISOR_shared_info->arch.max_pfn
-#endif
-
 	mtx_init(&balloon_mutex, "balloon_mutex", NULL, MTX_DEF);
 
-#ifndef XENHVM
-	bs.current_pages = min(xen_start_info->nr_pages, max_pfn);
-#else
 	bs.current_pages = xen_pv_domain() ?
 	    HYPERVISOR_start_info->nr_pages : realmem;
-#endif
 	bs.target_pages  = bs.current_pages;
 	bs.balloon_low   = 0;
 	bs.balloon_high  = 0;
 	bs.driver_pages  = 0UL;
 	bs.hard_limit    = ~0UL;
 
 	kproc_create(balloon_process, NULL, NULL, 0, 0, "balloon");
     
-#ifndef XENHVM
-	/* Initialise the balloon with excess memory space. */
-	for (pfn = xen_start_info->nr_pages; pfn < max_pfn; pfn++) {
-		page = PHYS_TO_VM_PAGE(pfn << PAGE_SHIFT);
-		TAILQ_INSERT_HEAD(&ballooned_pages, page, plinks.q);
-		bs.balloon_low++;
-	}
-#undef max_pfn
-#endif
-
 	target_watch.callback = watch_target;
 
 	err = xs_register_watch(&target_watch);
 	if (err)
 		device_printf(dev,
 		    "xenballon: failed to set balloon watcher\n");
 
 	return (err);
 }
 
 /*-------------------- Private Device Attachment Data  -----------------------*/
 static device_method_t xenballoon_methods[] = {
 	/* Device interface */
 	DEVMETHOD(device_identify,	xenballoon_identify),
 	DEVMETHOD(device_probe,         xenballoon_probe),
 	DEVMETHOD(device_attach,        xenballoon_attach),
 
 	DEVMETHOD_END
 };
 
 DEFINE_CLASS_0(xenballoon, xenballoon_driver, xenballoon_methods, 0);
 devclass_t xenballoon_devclass;
 
 DRIVER_MODULE(xenballoon, xenstore, xenballoon_driver, xenballoon_devclass,
     NULL, NULL);
Index: head/sys/dev/xen/blkback/blkback.c
===================================================================
--- head/sys/dev/xen/blkback/blkback.c	(revision 282273)
+++ head/sys/dev/xen/blkback/blkback.c	(revision 282274)
@@ -1,4034 +1,4021 @@
 /*-
  * Copyright (c) 2009-2012 Spectra Logic Corporation
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions, and the following disclaimer,
  *    without modification.
  * 2. Redistributions in binary form must reproduce at minimum a disclaimer
  *    substantially similar to the "NO WARRANTY" disclaimer below
  *    ("Disclaimer") and any redistribution must be conditioned upon
  *    including a substantially similar Disclaimer requirement for further
  *    binary redistribution.
  *
  * NO WARRANTY
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  * POSSIBILITY OF SUCH DAMAGES.
  *
  * Authors: Justin T. Gibbs     (Spectra Logic Corporation)
  *          Ken Merry           (Spectra Logic Corporation)
  */
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 /**
  * \file blkback.c
  *
  * \brief Device driver supporting the vending of block storage from
  *        a FreeBSD domain to other domains.
  */
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/malloc.h>
 
 #include <sys/bio.h>
 #include <sys/bus.h>
 #include <sys/conf.h>
 #include <sys/devicestat.h>
 #include <sys/disk.h>
 #include <sys/fcntl.h>
 #include <sys/filedesc.h>
 #include <sys/kdb.h>
 #include <sys/module.h>
 #include <sys/namei.h>
 #include <sys/proc.h>
 #include <sys/rman.h>
 #include <sys/taskqueue.h>
 #include <sys/types.h>
 #include <sys/vnode.h>
 #include <sys/mount.h>
 #include <sys/sysctl.h>
 #include <sys/bitstring.h>
 #include <sys/sdt.h>
 
 #include <geom/geom.h>
 
 #include <machine/_inttypes.h>
 
 #include <vm/vm.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_kern.h>
 
 #include <xen/xen-os.h>
 #include <xen/blkif.h>
 #include <xen/gnttab.h>
 #include <xen/xen_intr.h>
 
 #include <xen/interface/event_channel.h>
 #include <xen/interface/grant_table.h>
 
 #include <xen/xenbus/xenbusvar.h>
 
 /*--------------------------- Compile-time Tunables --------------------------*/
 /**
  * The maximum number of outstanding request blocks (request headers plus
  * additional segment blocks) we will allow in a negotiated block-front/back
  * communication channel.
  */
 #define	XBB_MAX_REQUESTS	256
 
 /**
  * \brief Define to force all I/O to be performed on memory owned by the
  *        backend device, with a copy-in/out to the remote domain's memory.
  *
  * \note  This option is currently required when this driver's domain is
  *        operating in HVM mode on a system using an IOMMU.
  *
  * This driver uses Xen's grant table API to gain access to the memory of
  * the remote domains it serves.  When our domain is operating in PV mode,
  * the grant table mechanism directly updates our domain's page table entries
  * to point to the physical pages of the remote domain.  This scheme guarantees
  * that blkback and the backing devices it uses can safely perform DMA
  * operations to satisfy requests.  In HVM mode, Xen may use a HW IOMMU to
  * insure that our domain cannot DMA to pages owned by another domain.  As
  * of Xen 4.0, IOMMU mappings for HVM guests are not updated via the grant
  * table API.  For this reason, in HVM mode, we must bounce all requests into
  * memory that is mapped into our domain at domain startup and thus has
  * valid IOMMU mappings.
  */
 #define XBB_USE_BOUNCE_BUFFERS
 
 /**
  * \brief Define to enable rudimentary request logging to the console.
  */
 #undef XBB_DEBUG
 
 /*---------------------------------- Macros ----------------------------------*/
 /**
  * Custom malloc type for all driver allocations.
  */
 static MALLOC_DEFINE(M_XENBLOCKBACK, "xbbd", "Xen Block Back Driver Data");
 
 #ifdef XBB_DEBUG
 #define DPRINTF(fmt, args...)					\
     printf("xbb(%s:%d): " fmt, __FUNCTION__, __LINE__, ##args)
 #else
 #define DPRINTF(fmt, args...) do {} while(0)
 #endif
 
 /**
  * The maximum mapped region size per request we will allow in a negotiated
  * block-front/back communication channel.
  */
 #define	XBB_MAX_REQUEST_SIZE					\
 	MIN(MAXPHYS, BLKIF_MAX_SEGMENTS_PER_REQUEST * PAGE_SIZE)
 
 /**
  * The maximum number of segments (within a request header and accompanying
  * segment blocks) per request we will allow in a negotiated block-front/back
  * communication channel.
  */
 #define	XBB_MAX_SEGMENTS_PER_REQUEST				\
 	(MIN(UIO_MAXIOV,					\
 	     MIN(BLKIF_MAX_SEGMENTS_PER_REQUEST,		\
 		 (XBB_MAX_REQUEST_SIZE / PAGE_SIZE) + 1)))
 
 /**
  * The maximum number of shared memory ring pages we will allow in a
  * negotiated block-front/back communication channel.  Allow enough
  * ring space for all requests to be XBB_MAX_REQUEST_SIZE'd.
  */
 #define	XBB_MAX_RING_PAGES						    \
 	BLKIF_RING_PAGES(BLKIF_SEGS_TO_BLOCKS(XBB_MAX_SEGMENTS_PER_REQUEST) \
 		       * XBB_MAX_REQUESTS)
 /**
  * The maximum number of ring pages that we can allow per request list.
  * We limit this to the maximum number of segments per request, because
  * that is already a reasonable number of segments to aggregate.  This
  * number should never be smaller than XBB_MAX_SEGMENTS_PER_REQUEST,
  * because that would leave situations where we can't dispatch even one
  * large request.
  */
 #define	XBB_MAX_SEGMENTS_PER_REQLIST XBB_MAX_SEGMENTS_PER_REQUEST
 
 /*--------------------------- Forward Declarations ---------------------------*/
 struct xbb_softc;
 struct xbb_xen_req;
 
 static void xbb_attach_failed(struct xbb_softc *xbb, int err, const char *fmt,
 			      ...) __attribute__((format(printf, 3, 4)));
 static int  xbb_shutdown(struct xbb_softc *xbb);
 static int  xbb_detach(device_t dev);
 
 /*------------------------------ Data Structures -----------------------------*/
 
 STAILQ_HEAD(xbb_xen_req_list, xbb_xen_req);
 
 typedef enum {
 	XBB_REQLIST_NONE	= 0x00,
 	XBB_REQLIST_MAPPED	= 0x01
 } xbb_reqlist_flags;
 
 struct xbb_xen_reqlist {
 	/**
 	 * Back reference to the parent block back instance for this
 	 * request.  Used during bio_done handling.
 	 */
 	struct xbb_softc        *xbb;
 
 	/**
 	 * BLKIF_OP code for this request.
 	 */
 	int			 operation;
 
 	/**
 	 * Set to BLKIF_RSP_* to indicate request status.
 	 *
 	 * This field allows an error status to be recorded even if the
 	 * delivery of this status must be deferred.  Deferred reporting
 	 * is necessary, for example, when an error is detected during
 	 * completion processing of one bio when other bios for this
 	 * request are still outstanding.
 	 */
 	int			 status;
 
 	/**
 	 * Number of 512 byte sectors not transferred.
 	 */
 	int			 residual_512b_sectors;
 
 	/**
 	 * Starting sector number of the first request in the list.
 	 */
 	off_t			 starting_sector_number;
 
 	/**
 	 * If we're going to coalesce, the next contiguous sector would be
 	 * this one.
 	 */
 	off_t			 next_contig_sector;
 
 	/**
 	 * Number of child requests in the list.
 	 */
 	int			 num_children;
 
 	/**
 	 * Number of I/O requests still pending on the backend.
 	 */
 	int			 pendcnt;
 
 	/**
 	 * Total number of segments for requests in the list.
 	 */
 	int			 nr_segments;
 
 	/**
 	 * Flags for this particular request list.
 	 */
 	xbb_reqlist_flags	 flags;
 
 	/**
 	 * Kernel virtual address space reserved for this request
 	 * list structure and used to map the remote domain's pages for
 	 * this I/O, into our domain's address space.
 	 */
 	uint8_t			*kva;
 
 	/**
 	 * Base, psuedo-physical address, corresponding to the start
 	 * of this request's kva region.
 	 */
 	uint64_t	 	 gnt_base;
 
 
 #ifdef XBB_USE_BOUNCE_BUFFERS
 	/**
 	 * Pre-allocated domain local memory used to proxy remote
 	 * domain memory during I/O operations.
 	 */
 	uint8_t			*bounce;
 #endif
 
 	/**
 	 * Array of grant handles (one per page) used to map this request.
 	 */
 	grant_handle_t		*gnt_handles;
 
 	/**
 	 * Device statistics request ordering type (ordered or simple).
 	 */
 	devstat_tag_type	 ds_tag_type;
 
 	/**
 	 * Device statistics request type (read, write, no_data).
 	 */
 	devstat_trans_flags	 ds_trans_type;
 
 	/**
 	 * The start time for this request.
 	 */
 	struct bintime		 ds_t0;
 
 	/**
 	 * Linked list of contiguous requests with the same operation type.
 	 */
 	struct xbb_xen_req_list	 contig_req_list;
 
 	/**
 	 * Linked list links used to aggregate idle requests in the
 	 * request list free pool (xbb->reqlist_free_stailq) and pending
 	 * requests waiting for execution (xbb->reqlist_pending_stailq).
 	 */
 	STAILQ_ENTRY(xbb_xen_reqlist) links;
 };
 
 STAILQ_HEAD(xbb_xen_reqlist_list, xbb_xen_reqlist);
 
 /**
  * \brief Object tracking an in-flight I/O from a Xen VBD consumer.
  */
 struct xbb_xen_req {
 	/**
 	 * Linked list links used to aggregate requests into a reqlist
 	 * and to store them in the request free pool.
 	 */
 	STAILQ_ENTRY(xbb_xen_req) links;
 
 	/**
 	 * The remote domain's identifier for this I/O request.
 	 */
 	uint64_t		  id;
 
 	/**
 	 * The number of pages currently mapped for this request.
 	 */
 	int			  nr_pages;
 
 	/**
 	 * The number of 512 byte sectors comprising this requests.
 	 */
 	int			  nr_512b_sectors;
 
 	/**
 	 * BLKIF_OP code for this request.
 	 */
 	int			  operation;
 
 	/**
 	 * Storage used for non-native ring requests.
 	 */
 	blkif_request_t		 ring_req_storage;
 
 	/**
 	 * Pointer to the Xen request in the ring.
 	 */
 	blkif_request_t		*ring_req;
 
 	/**
 	 * Consumer index for this request.
 	 */
 	RING_IDX		 req_ring_idx;
 
 	/**
 	 * The start time for this request.
 	 */
 	struct bintime		 ds_t0;
 
 	/**
 	 * Pointer back to our parent request list.
 	 */
 	struct xbb_xen_reqlist  *reqlist;
 };
 SLIST_HEAD(xbb_xen_req_slist, xbb_xen_req);
 
 /**
  * \brief Configuration data for the shared memory request ring
  *        used to communicate with the front-end client of this
  *        this driver.
  */
 struct xbb_ring_config {
 	/** KVA address where ring memory is mapped. */
 	vm_offset_t	va;
 
 	/** The pseudo-physical address where ring memory is mapped.*/
 	uint64_t	gnt_addr;
 
 	/**
 	 * Grant table handles, one per-ring page, returned by the
 	 * hyperpervisor upon mapping of the ring and required to
 	 * unmap it when a connection is torn down.
 	 */
 	grant_handle_t	handle[XBB_MAX_RING_PAGES];
 
 	/**
 	 * The device bus address returned by the hypervisor when
 	 * mapping the ring and required to unmap it when a connection
 	 * is torn down.
 	 */
 	uint64_t	bus_addr[XBB_MAX_RING_PAGES];
 
 	/** The number of ring pages mapped for the current connection. */
 	u_int		ring_pages;
 
 	/**
 	 * The grant references, one per-ring page, supplied by the
 	 * front-end, allowing us to reference the ring pages in the
 	 * front-end's domain and to map these pages into our own domain.
 	 */
 	grant_ref_t	ring_ref[XBB_MAX_RING_PAGES];
 
 	/** The interrupt driven even channel used to signal ring events. */
 	evtchn_port_t   evtchn;
 };
 
 /**
  * Per-instance connection state flags.
  */
 typedef enum
 {
 	/**
 	 * The front-end requested a read-only mount of the
 	 * back-end device/file.
 	 */
 	XBBF_READ_ONLY         = 0x01,
 
 	/** Communication with the front-end has been established. */
 	XBBF_RING_CONNECTED    = 0x02,
 
 	/**
 	 * Front-end requests exist in the ring and are waiting for
 	 * xbb_xen_req objects to free up.
 	 */
 	XBBF_RESOURCE_SHORTAGE = 0x04,
 
 	/** Connection teardown in progress. */
 	XBBF_SHUTDOWN          = 0x08,
 
 	/** A thread is already performing shutdown processing. */
 	XBBF_IN_SHUTDOWN       = 0x10
 } xbb_flag_t;
 
 /** Backend device type.  */
 typedef enum {
 	/** Backend type unknown. */
 	XBB_TYPE_NONE		= 0x00,
 
 	/**
 	 * Backend type disk (access via cdev switch
 	 * strategy routine).
 	 */
 	XBB_TYPE_DISK		= 0x01,
 
 	/** Backend type file (access vnode operations.). */
 	XBB_TYPE_FILE		= 0x02
 } xbb_type;
 
 /**
  * \brief Structure used to memoize information about a per-request
  *        scatter-gather list.
  *
  * The chief benefit of using this data structure is it avoids having
  * to reparse the possibly discontiguous S/G list in the original
  * request.  Due to the way that the mapping of the memory backing an
  * I/O transaction is handled by Xen, a second pass is unavoidable.
  * At least this way the second walk is a simple array traversal.
  *
  * \note A single Scatter/Gather element in the block interface covers
  *       at most 1 machine page.  In this context a sector (blkif
  *       nomenclature, not what I'd choose) is a 512b aligned unit
  *       of mapping within the machine page referenced by an S/G
  *       element.
  */
 struct xbb_sg {
 	/** The number of 512b data chunks mapped in this S/G element. */
 	int16_t nsect;
 
 	/**
 	 * The index (0 based) of the first 512b data chunk mapped
 	 * in this S/G element.
 	 */
 	uint8_t first_sect;
 
 	/**
 	 * The index (0 based) of the last 512b data chunk mapped
 	 * in this S/G element.
 	 */
 	uint8_t last_sect;
 };
 
 /**
  * Character device backend specific configuration data.
  */
 struct xbb_dev_data {
 	/** Cdev used for device backend access.  */
 	struct cdev   *cdev;
 
 	/** Cdev switch used for device backend access.  */
 	struct cdevsw *csw;
 
 	/** Used to hold a reference on opened cdev backend devices. */
 	int	       dev_ref;
 };
 
 /**
  * File backend specific configuration data.
  */
 struct xbb_file_data {
 	/** Credentials to use for vnode backed (file based) I/O. */
 	struct ucred   *cred;
 
 	/**
 	 * \brief Array of io vectors used to process file based I/O.
 	 *
 	 * Only a single file based request is outstanding per-xbb instance,
 	 * so we only need one of these.
 	 */
 	struct iovec	xiovecs[XBB_MAX_SEGMENTS_PER_REQLIST];
 #ifdef XBB_USE_BOUNCE_BUFFERS
 
 	/**
 	 * \brief Array of io vectors used to handle bouncing of file reads.
 	 *
 	 * Vnode operations are free to modify uio data during their
 	 * exectuion.  In the case of a read with bounce buffering active,
 	 * we need some of the data from the original uio in order to
 	 * bounce-out the read data.  This array serves as the temporary
 	 * storage for this saved data.
 	 */
 	struct iovec	saved_xiovecs[XBB_MAX_SEGMENTS_PER_REQLIST];
 
 	/**
 	 * \brief Array of memoized bounce buffer kva offsets used
 	 *        in the file based backend.
 	 *
 	 * Due to the way that the mapping of the memory backing an
 	 * I/O transaction is handled by Xen, a second pass through
 	 * the request sg elements is unavoidable. We memoize the computed
 	 * bounce address here to reduce the cost of the second walk.
 	 */
 	void		*xiovecs_vaddr[XBB_MAX_SEGMENTS_PER_REQLIST];
 #endif /* XBB_USE_BOUNCE_BUFFERS */
 };
 
 /**
  * Collection of backend type specific data.
  */
 union xbb_backend_data {
 	struct xbb_dev_data  dev;
 	struct xbb_file_data file;
 };
 
 /**
  * Function signature of backend specific I/O handlers.
  */
 typedef int (*xbb_dispatch_t)(struct xbb_softc *xbb,
 			      struct xbb_xen_reqlist *reqlist, int operation,
 			      int flags);
 
 /**
  * Per-instance configuration data.
  */
 struct xbb_softc {
 
 	/**
 	 * Task-queue used to process I/O requests.
 	 */
 	struct taskqueue	 *io_taskqueue;
 
 	/**
 	 * Single "run the request queue" task enqueued
 	 * on io_taskqueue.
 	 */
 	struct task		  io_task;
 
 	/** Device type for this instance. */
 	xbb_type		  device_type;
 
 	/** NewBus device corresponding to this instance. */
 	device_t		  dev;
 
 	/** Backend specific dispatch routine for this instance. */
 	xbb_dispatch_t		  dispatch_io;
 
 	/** The number of requests outstanding on the backend device/file. */
 	int			  active_request_count;
 
 	/** Free pool of request tracking structures. */
 	struct xbb_xen_req_list   request_free_stailq;
 
 	/** Array, sized at connection time, of request tracking structures. */
 	struct xbb_xen_req	 *requests;
 
 	/** Free pool of request list structures. */
 	struct xbb_xen_reqlist_list reqlist_free_stailq;
 
 	/** List of pending request lists awaiting execution. */
 	struct xbb_xen_reqlist_list reqlist_pending_stailq;
 
 	/** Array, sized at connection time, of request list structures. */
 	struct xbb_xen_reqlist	 *request_lists;
 
 	/**
 	 * Global pool of kva used for mapping remote domain ring
 	 * and I/O transaction data.
 	 */
 	vm_offset_t		  kva;
 
 	/** Psuedo-physical address corresponding to kva. */
 	uint64_t		  gnt_base_addr;
 
 	/** The size of the global kva pool. */
 	int			  kva_size;
 
 	/** The size of the KVA area used for request lists. */
 	int			  reqlist_kva_size;
 
 	/** The number of pages of KVA used for request lists */
 	int			  reqlist_kva_pages;
 
 	/** Bitmap of free KVA pages */
 	bitstr_t		 *kva_free;
 
 	/**
 	 * \brief Cached value of the front-end's domain id.
 	 * 
 	 * This value is used at once for each mapped page in
 	 * a transaction.  We cache it to avoid incuring the
 	 * cost of an ivar access every time this is needed.
 	 */
 	domid_t			  otherend_id;
 
 	/**
 	 * \brief The blkif protocol abi in effect.
 	 *
 	 * There are situations where the back and front ends can
 	 * have a different, native abi (e.g. intel x86_64 and
 	 * 32bit x86 domains on the same machine).  The back-end
 	 * always accomodates the front-end's native abi.  That
 	 * value is pulled from the XenStore and recorded here.
 	 */
 	int			  abi;
 
 	/**
 	 * \brief The maximum number of requests and request lists allowed
 	 *        to be in flight at a time.
 	 *
 	 * This value is negotiated via the XenStore.
 	 */
 	u_int			  max_requests;
 
 	/**
 	 * \brief The maximum number of segments (1 page per segment)
 	 *	  that can be mapped by a request.
 	 *
 	 * This value is negotiated via the XenStore.
 	 */
 	u_int			  max_request_segments;
 
 	/**
 	 * \brief Maximum number of segments per request list.
 	 *
 	 * This value is derived from and will generally be larger than
 	 * max_request_segments.
 	 */
 	u_int			  max_reqlist_segments;
 
 	/**
 	 * The maximum size of any request to this back-end
 	 * device.
 	 *
 	 * This value is negotiated via the XenStore.
 	 */
 	u_int			  max_request_size;
 
 	/**
 	 * The maximum size of any request list.  This is derived directly
 	 * from max_reqlist_segments.
 	 */
 	u_int			  max_reqlist_size;
 
 	/** Various configuration and state bit flags. */
 	xbb_flag_t		  flags;
 
 	/** Ring mapping and interrupt configuration data. */
 	struct xbb_ring_config	  ring_config;
 
 	/** Runtime, cross-abi safe, structures for ring access. */
 	blkif_back_rings_t	  rings;
 
 	/** IRQ mapping for the communication ring event channel. */
 	xen_intr_handle_t	  xen_intr_handle;
 
 	/**
 	 * \brief Backend access mode flags (e.g. write, or read-only).
 	 *
 	 * This value is passed to us by the front-end via the XenStore.
 	 */
 	char			 *dev_mode;
 
 	/**
 	 * \brief Backend device type (e.g. "disk", "cdrom", "floppy").
 	 *
 	 * This value is passed to us by the front-end via the XenStore.
 	 * Currently unused.
 	 */
 	char			 *dev_type;
 
 	/**
 	 * \brief Backend device/file identifier.
 	 *
 	 * This value is passed to us by the front-end via the XenStore.
 	 * We expect this to be a POSIX path indicating the file or
 	 * device to open.
 	 */
 	char			 *dev_name;
 
 	/**
 	 * Vnode corresponding to the backend device node or file
 	 * we are acessing.
 	 */
 	struct vnode		 *vn;
 
 	union xbb_backend_data	  backend;
 
 	/** The native sector size of the backend. */
 	u_int			  sector_size;
 
 	/** log2 of sector_size.  */
 	u_int			  sector_size_shift;
 
 	/** Size in bytes of the backend device or file.  */
 	off_t			  media_size;
 
 	/**
 	 * \brief media_size expressed in terms of the backend native
 	 *	  sector size.
 	 *
 	 * (e.g. xbb->media_size >> xbb->sector_size_shift).
 	 */
 	uint64_t		  media_num_sectors;
 
 	/**
 	 * \brief Array of memoized scatter gather data computed during the
 	 *	  conversion of blkif ring requests to internal xbb_xen_req
 	 *	  structures.
 	 *
 	 * Ring processing is serialized so we only need one of these.
 	 */
 	struct xbb_sg		  xbb_sgs[XBB_MAX_SEGMENTS_PER_REQLIST];
 
 	/**
 	 * Temporary grant table map used in xbb_dispatch_io().  When
 	 * XBB_MAX_SEGMENTS_PER_REQLIST gets large, keeping this on the
 	 * stack could cause a stack overflow.
 	 */
 	struct gnttab_map_grant_ref   maps[XBB_MAX_SEGMENTS_PER_REQLIST];
 
 	/** Mutex protecting per-instance data. */
 	struct mtx		  lock;
 
-#ifdef XENHVM
 	/**
 	 * Resource representing allocated physical address space
 	 * associated with our per-instance kva region.
 	 */
 	struct resource		 *pseudo_phys_res;
 
 	/** Resource id for allocated physical address space. */
 	int			  pseudo_phys_res_id;
-#endif
 
 	/**
 	 * I/O statistics from BlockBack dispatch down.  These are
 	 * coalesced requests, and we start them right before execution.
 	 */
 	struct devstat		 *xbb_stats;
 
 	/**
 	 * I/O statistics coming into BlockBack.  These are the requests as
 	 * we get them from BlockFront.  They are started as soon as we
 	 * receive a request, and completed when the I/O is complete.
 	 */
 	struct devstat		 *xbb_stats_in;
 
 	/** Disable sending flush to the backend */
 	int			  disable_flush;
 
 	/** Send a real flush for every N flush requests */
 	int			  flush_interval;
 
 	/** Count of flush requests in the interval */
 	int			  flush_count;
 
 	/** Don't coalesce requests if this is set */
 	int			  no_coalesce_reqs;
 
 	/** Number of requests we have received */
 	uint64_t		  reqs_received;
 
 	/** Number of requests we have completed*/
 	uint64_t		  reqs_completed;
 
 	/** Number of requests we queued but not pushed*/
 	uint64_t		  reqs_queued_for_completion;
 
 	/** Number of requests we completed with an error status*/
 	uint64_t		  reqs_completed_with_error;
 
 	/** How many forced dispatches (i.e. without coalescing) have happend */
 	uint64_t		  forced_dispatch;
 
 	/** How many normal dispatches have happend */
 	uint64_t		  normal_dispatch;
 
 	/** How many total dispatches have happend */
 	uint64_t		  total_dispatch;
 
 	/** How many times we have run out of KVA */
 	uint64_t		  kva_shortages;
 
 	/** How many times we have run out of request structures */
 	uint64_t		  request_shortages;
 };
 
 /*---------------------------- Request Processing ----------------------------*/
 /**
  * Allocate an internal transaction tracking structure from the free pool.
  *
  * \param xbb  Per-instance xbb configuration structure.
  *
  * \return  On success, a pointer to the allocated xbb_xen_req structure.
  *          Otherwise NULL.
  */
 static inline struct xbb_xen_req *
 xbb_get_req(struct xbb_softc *xbb)
 {
 	struct xbb_xen_req *req;
 
 	req = NULL;
 
 	mtx_assert(&xbb->lock, MA_OWNED);
 
 	if ((req = STAILQ_FIRST(&xbb->request_free_stailq)) != NULL) {
 		STAILQ_REMOVE_HEAD(&xbb->request_free_stailq, links);
 		xbb->active_request_count++;
 	}
 
 	return (req);
 }
 
 /**
  * Return an allocated transaction tracking structure to the free pool.
  *
  * \param xbb  Per-instance xbb configuration structure.
  * \param req  The request structure to free.
  */
 static inline void
 xbb_release_req(struct xbb_softc *xbb, struct xbb_xen_req *req)
 {
 	mtx_assert(&xbb->lock, MA_OWNED);
 
 	STAILQ_INSERT_HEAD(&xbb->request_free_stailq, req, links);
 	xbb->active_request_count--;
 
 	KASSERT(xbb->active_request_count >= 0,
 		("xbb_release_req: negative active count"));
 }
 
 /**
  * Return an xbb_xen_req_list of allocated xbb_xen_reqs to the free pool.
  *
  * \param xbb	    Per-instance xbb configuration structure.
  * \param req_list  The list of requests to free.
  * \param nreqs	    The number of items in the list.
  */
 static inline void
 xbb_release_reqs(struct xbb_softc *xbb, struct xbb_xen_req_list *req_list,
 		 int nreqs)
 {
 	mtx_assert(&xbb->lock, MA_OWNED);
 
 	STAILQ_CONCAT(&xbb->request_free_stailq, req_list);
 	xbb->active_request_count -= nreqs;
 
 	KASSERT(xbb->active_request_count >= 0,
 		("xbb_release_reqs: negative active count"));
 }
 
 /**
  * Given a page index and 512b sector offset within that page,
  * calculate an offset into a request's kva region.
  *
  * \param reqlist The request structure whose kva region will be accessed.
  * \param pagenr  The page index used to compute the kva offset.
  * \param sector  The 512b sector index used to compute the page relative
  *                kva offset.
  *
  * \return  The computed global KVA offset.
  */
 static inline uint8_t *
 xbb_reqlist_vaddr(struct xbb_xen_reqlist *reqlist, int pagenr, int sector)
 {
 	return (reqlist->kva + (PAGE_SIZE * pagenr) + (sector << 9));
 }
 
 #ifdef XBB_USE_BOUNCE_BUFFERS
 /**
  * Given a page index and 512b sector offset within that page,
  * calculate an offset into a request's local bounce memory region.
  *
  * \param reqlist The request structure whose bounce region will be accessed.
  * \param pagenr  The page index used to compute the bounce offset.
  * \param sector  The 512b sector index used to compute the page relative
  *                bounce offset.
  *
  * \return  The computed global bounce buffer address.
  */
 static inline uint8_t *
 xbb_reqlist_bounce_addr(struct xbb_xen_reqlist *reqlist, int pagenr, int sector)
 {
 	return (reqlist->bounce + (PAGE_SIZE * pagenr) + (sector << 9));
 }
 #endif
 
 /**
  * Given a page number and 512b sector offset within that page,
  * calculate an offset into the request's memory region that the
  * underlying backend device/file should use for I/O.
  *
  * \param reqlist The request structure whose I/O region will be accessed.
  * \param pagenr  The page index used to compute the I/O offset.
  * \param sector  The 512b sector index used to compute the page relative
  *                I/O offset.
  *
  * \return  The computed global I/O address.
  *
  * Depending on configuration, this will either be a local bounce buffer
  * or a pointer to the memory mapped in from the front-end domain for
  * this request.
  */
 static inline uint8_t *
 xbb_reqlist_ioaddr(struct xbb_xen_reqlist *reqlist, int pagenr, int sector)
 {
 #ifdef XBB_USE_BOUNCE_BUFFERS
 	return (xbb_reqlist_bounce_addr(reqlist, pagenr, sector));
 #else
 	return (xbb_reqlist_vaddr(reqlist, pagenr, sector));
 #endif
 }
 
 /**
  * Given a page index and 512b sector offset within that page, calculate
  * an offset into the local psuedo-physical address space used to map a
  * front-end's request data into a request.
  *
  * \param reqlist The request list structure whose pseudo-physical region
  *                will be accessed.
  * \param pagenr  The page index used to compute the pseudo-physical offset.
  * \param sector  The 512b sector index used to compute the page relative
  *                pseudo-physical offset.
  *
  * \return  The computed global pseudo-phsyical address.
  *
  * Depending on configuration, this will either be a local bounce buffer
  * or a pointer to the memory mapped in from the front-end domain for
  * this request.
  */
 static inline uintptr_t
 xbb_get_gntaddr(struct xbb_xen_reqlist *reqlist, int pagenr, int sector)
 {
 	struct xbb_softc *xbb;
 
 	xbb = reqlist->xbb;
 
 	return ((uintptr_t)(xbb->gnt_base_addr +
 		(uintptr_t)(reqlist->kva - xbb->kva) +
 		(PAGE_SIZE * pagenr) + (sector << 9)));
 }
 
 /**
  * Get Kernel Virtual Address space for mapping requests.
  *
  * \param xbb         Per-instance xbb configuration structure.
  * \param nr_pages    Number of pages needed.
  * \param check_only  If set, check for free KVA but don't allocate it.
  * \param have_lock   If set, xbb lock is already held.
  *
  * \return  On success, a pointer to the allocated KVA region.  Otherwise NULL.
  *
  * Note:  This should be unnecessary once we have either chaining or
  * scatter/gather support for struct bio.  At that point we'll be able to
  * put multiple addresses and lengths in one bio/bio chain and won't need
  * to map everything into one virtual segment.
  */
 static uint8_t *
 xbb_get_kva(struct xbb_softc *xbb, int nr_pages)
 {
 	intptr_t first_clear;
 	intptr_t num_clear;
 	uint8_t *free_kva;
 	int      i;
 
 	KASSERT(nr_pages != 0, ("xbb_get_kva of zero length"));
 
 	first_clear = 0;
 	free_kva = NULL;
 
 	mtx_lock(&xbb->lock);
 
 	/*
 	 * Look for the first available page.  If there are none, we're done.
 	 */
 	bit_ffc(xbb->kva_free, xbb->reqlist_kva_pages, &first_clear);
 
 	if (first_clear == -1)
 		goto bailout;
 
 	/*
 	 * Starting at the first available page, look for consecutive free
 	 * pages that will satisfy the user's request.
 	 */
 	for (i = first_clear, num_clear = 0; i < xbb->reqlist_kva_pages; i++) {
 		/*
 		 * If this is true, the page is used, so we have to reset
 		 * the number of clear pages and the first clear page
 		 * (since it pointed to a region with an insufficient number
 		 * of clear pages).
 		 */
 		if (bit_test(xbb->kva_free, i)) {
 			num_clear = 0;
 			first_clear = -1;
 			continue;
 		}
 
 		if (first_clear == -1)
 			first_clear = i;
 
 		/*
 		 * If this is true, we've found a large enough free region
 		 * to satisfy the request.
 		 */
 		if (++num_clear == nr_pages) {
 
 			bit_nset(xbb->kva_free, first_clear,
 				 first_clear + nr_pages - 1);
 
 			free_kva = xbb->kva +
 				(uint8_t *)(first_clear * PAGE_SIZE);
 
 			KASSERT(free_kva >= (uint8_t *)xbb->kva &&
 				free_kva + (nr_pages * PAGE_SIZE) <=
 				(uint8_t *)xbb->ring_config.va,
 				("Free KVA %p len %d out of range, "
 				 "kva = %#jx, ring VA = %#jx\n", free_kva,
 				 nr_pages * PAGE_SIZE, (uintmax_t)xbb->kva,
 				 (uintmax_t)xbb->ring_config.va));
 			break;
 		}
 	}
 
 bailout:
 
 	if (free_kva == NULL) {
 		xbb->flags |= XBBF_RESOURCE_SHORTAGE;
 		xbb->kva_shortages++;
 	}
 
 	mtx_unlock(&xbb->lock);
 
 	return (free_kva);
 }
 
 /**
  * Free allocated KVA.
  *
  * \param xbb	    Per-instance xbb configuration structure.
  * \param kva_ptr   Pointer to allocated KVA region.  
  * \param nr_pages  Number of pages in the KVA region.
  */
 static void
 xbb_free_kva(struct xbb_softc *xbb, uint8_t *kva_ptr, int nr_pages)
 {
 	intptr_t start_page;
 
 	mtx_assert(&xbb->lock, MA_OWNED);
 
 	start_page = (intptr_t)(kva_ptr - xbb->kva) >> PAGE_SHIFT;
 	bit_nclear(xbb->kva_free, start_page, start_page + nr_pages - 1);
 
 }
 
 /**
  * Unmap the front-end pages associated with this I/O request.
  *
  * \param req  The request structure to unmap.
  */
 static void
 xbb_unmap_reqlist(struct xbb_xen_reqlist *reqlist)
 {
 	struct gnttab_unmap_grant_ref unmap[XBB_MAX_SEGMENTS_PER_REQLIST];
 	u_int			      i;
 	u_int			      invcount;
 	int			      error;
 
 	invcount = 0;
 	for (i = 0; i < reqlist->nr_segments; i++) {
 
 		if (reqlist->gnt_handles[i] == GRANT_REF_INVALID)
 			continue;
 
 		unmap[invcount].host_addr    = xbb_get_gntaddr(reqlist, i, 0);
 		unmap[invcount].dev_bus_addr = 0;
 		unmap[invcount].handle       = reqlist->gnt_handles[i];
 		reqlist->gnt_handles[i]	     = GRANT_REF_INVALID;
 		invcount++;
 	}
 
 	error = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref,
 					  unmap, invcount);
 	KASSERT(error == 0, ("Grant table operation failed"));
 }
 
 /**
  * Allocate an internal transaction tracking structure from the free pool.
  *
  * \param xbb  Per-instance xbb configuration structure.
  *
  * \return  On success, a pointer to the allocated xbb_xen_reqlist structure.
  *          Otherwise NULL.
  */
 static inline struct xbb_xen_reqlist *
 xbb_get_reqlist(struct xbb_softc *xbb)
 {
 	struct xbb_xen_reqlist *reqlist;
 
 	reqlist = NULL;
 
 	mtx_assert(&xbb->lock, MA_OWNED);
 
 	if ((reqlist = STAILQ_FIRST(&xbb->reqlist_free_stailq)) != NULL) {
 
 		STAILQ_REMOVE_HEAD(&xbb->reqlist_free_stailq, links);
 		reqlist->flags = XBB_REQLIST_NONE;
 		reqlist->kva = NULL;
 		reqlist->status = BLKIF_RSP_OKAY;
 		reqlist->residual_512b_sectors = 0;
 		reqlist->num_children = 0;
 		reqlist->nr_segments = 0;
 		STAILQ_INIT(&reqlist->contig_req_list);
 	}
 
 	return (reqlist);
 }
 
 /**
  * Return an allocated transaction tracking structure to the free pool.
  *
  * \param xbb        Per-instance xbb configuration structure.
  * \param req        The request list structure to free.
  * \param wakeup     If set, wakeup the work thread if freeing this reqlist
  *                   during a resource shortage condition.
  */
 static inline void
 xbb_release_reqlist(struct xbb_softc *xbb, struct xbb_xen_reqlist *reqlist,
 		    int wakeup)
 {
 
 	mtx_assert(&xbb->lock, MA_OWNED);
 
 	if (wakeup) {
 		wakeup = xbb->flags & XBBF_RESOURCE_SHORTAGE;
 		xbb->flags &= ~XBBF_RESOURCE_SHORTAGE;
 	}
 
 	if (reqlist->kva != NULL)
 		xbb_free_kva(xbb, reqlist->kva, reqlist->nr_segments);
 
 	xbb_release_reqs(xbb, &reqlist->contig_req_list, reqlist->num_children);
 
 	STAILQ_INSERT_TAIL(&xbb->reqlist_free_stailq, reqlist, links);
 
 	if ((xbb->flags & XBBF_SHUTDOWN) != 0) {
 		/*
 		 * Shutdown is in progress.  See if we can
 		 * progress further now that one more request
 		 * has completed and been returned to the
 		 * free pool.
 		 */
 		xbb_shutdown(xbb);
 	}
 
 	if (wakeup != 0)
 		taskqueue_enqueue(xbb->io_taskqueue, &xbb->io_task); 
 }
 
 /**
  * Request resources and do basic request setup.
  *
  * \param xbb          Per-instance xbb configuration structure.
  * \param reqlist      Pointer to reqlist pointer.
  * \param ring_req     Pointer to a block ring request.
  * \param ring_index   The ring index of this request.
  *
  * \return  0 for success, non-zero for failure.
  */
 static int
 xbb_get_resources(struct xbb_softc *xbb, struct xbb_xen_reqlist **reqlist,
 		  blkif_request_t *ring_req, RING_IDX ring_idx)
 {
 	struct xbb_xen_reqlist *nreqlist;
 	struct xbb_xen_req     *nreq;
 
 	nreqlist = NULL;
 	nreq     = NULL;
 
 	mtx_lock(&xbb->lock);
 
 	/*
 	 * We don't allow new resources to be allocated if we're in the
 	 * process of shutting down.
 	 */
 	if ((xbb->flags & XBBF_SHUTDOWN) != 0) {
 		mtx_unlock(&xbb->lock);
 		return (1);
 	}
 
 	/*
 	 * Allocate a reqlist if the caller doesn't have one already.
 	 */
 	if (*reqlist == NULL) {
 		nreqlist = xbb_get_reqlist(xbb);
 		if (nreqlist == NULL)
 			goto bailout_error;
 	}
 
 	/* We always allocate a request. */
 	nreq = xbb_get_req(xbb);
 	if (nreq == NULL)
 		goto bailout_error;
 
 	mtx_unlock(&xbb->lock);
 
 	if (*reqlist == NULL) {
 		*reqlist = nreqlist;
 		nreqlist->operation = ring_req->operation;
 		nreqlist->starting_sector_number = ring_req->sector_number;
 		STAILQ_INSERT_TAIL(&xbb->reqlist_pending_stailq, nreqlist,
 				   links);
 	}
 
 	nreq->reqlist = *reqlist;
 	nreq->req_ring_idx = ring_idx;
 	nreq->id = ring_req->id;
 	nreq->operation = ring_req->operation;
 
 	if (xbb->abi != BLKIF_PROTOCOL_NATIVE) {
 		bcopy(ring_req, &nreq->ring_req_storage, sizeof(*ring_req));
 		nreq->ring_req = &nreq->ring_req_storage;
 	} else {
 		nreq->ring_req = ring_req;
 	}
 
 	binuptime(&nreq->ds_t0);
 	devstat_start_transaction(xbb->xbb_stats_in, &nreq->ds_t0);
 	STAILQ_INSERT_TAIL(&(*reqlist)->contig_req_list, nreq, links);
 	(*reqlist)->num_children++;
 	(*reqlist)->nr_segments += ring_req->nr_segments;
 
 	return (0);
 
 bailout_error:
 
 	/*
 	 * We're out of resources, so set the shortage flag.  The next time
 	 * a request is released, we'll try waking up the work thread to
 	 * see if we can allocate more resources.
 	 */
 	xbb->flags |= XBBF_RESOURCE_SHORTAGE;
 	xbb->request_shortages++;
 
 	if (nreq != NULL)
 		xbb_release_req(xbb, nreq);
 
 	if (nreqlist != NULL)
 		xbb_release_reqlist(xbb, nreqlist, /*wakeup*/ 0);
 
 	mtx_unlock(&xbb->lock);
 
 	return (1);
 }
 
 /**
  * Create and queue a response to a blkif request.
  * 
  * \param xbb     Per-instance xbb configuration structure.
  * \param req     The request structure to which to respond.
  * \param status  The status code to report.  See BLKIF_RSP_*
  *                in sys/xen/interface/io/blkif.h.
  */
 static void
 xbb_queue_response(struct xbb_softc *xbb, struct xbb_xen_req *req, int status)
 {
 	blkif_response_t *resp;
 
 	/*
 	 * The mutex is required here, and should be held across this call
 	 * until after the subsequent call to xbb_push_responses().  This
 	 * is to guarantee that another context won't queue responses and
 	 * push them while we're active.
 	 *
 	 * That could lead to the other end being notified of responses
 	 * before the resources have been freed on this end.  The other end
 	 * would then be able to queue additional I/O, and we may run out
  	 * of resources because we haven't freed them all yet.
 	 */
 	mtx_assert(&xbb->lock, MA_OWNED);
 
 	/*
 	 * Place on the response ring for the relevant domain.
 	 * For now, only the spacing between entries is different
 	 * in the different ABIs, not the response entry layout.
 	 */
 	switch (xbb->abi) {
 	case BLKIF_PROTOCOL_NATIVE:
 		resp = RING_GET_RESPONSE(&xbb->rings.native,
 					 xbb->rings.native.rsp_prod_pvt);
 		break;
 	case BLKIF_PROTOCOL_X86_32:
 		resp = (blkif_response_t *)
 		    RING_GET_RESPONSE(&xbb->rings.x86_32,
 				      xbb->rings.x86_32.rsp_prod_pvt);
 		break;
 	case BLKIF_PROTOCOL_X86_64:
 		resp = (blkif_response_t *)
 		    RING_GET_RESPONSE(&xbb->rings.x86_64,
 				      xbb->rings.x86_64.rsp_prod_pvt);
 		break;
 	default:
 		panic("Unexpected blkif protocol ABI.");
 	}
 
 	resp->id        = req->id;
 	resp->operation = req->operation;
 	resp->status    = status;
 
 	if (status != BLKIF_RSP_OKAY)
 		xbb->reqs_completed_with_error++;
 
 	xbb->rings.common.rsp_prod_pvt += BLKIF_SEGS_TO_BLOCKS(req->nr_pages);
 
 	xbb->reqs_queued_for_completion++;
 
 }
 
 /**
  * Send queued responses to blkif requests.
  * 
  * \param xbb            Per-instance xbb configuration structure.
  * \param run_taskqueue  Flag that is set to 1 if the taskqueue
  *			 should be run, 0 if it does not need to be run.
  * \param notify	 Flag that is set to 1 if the other end should be
  * 			 notified via irq, 0 if the other end should not be
  *			 notified.
  */
 static void
 xbb_push_responses(struct xbb_softc *xbb, int *run_taskqueue, int *notify)
 {
 	int more_to_do;
 
 	/*
 	 * The mutex is required here.
 	 */
 	mtx_assert(&xbb->lock, MA_OWNED);
 
 	more_to_do = 0;
 
 	RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&xbb->rings.common, *notify);
 
 	if (xbb->rings.common.rsp_prod_pvt == xbb->rings.common.req_cons) {
 
 		/*
 		 * Tail check for pending requests. Allows frontend to avoid
 		 * notifications if requests are already in flight (lower
 		 * overheads and promotes batching).
 		 */
 		RING_FINAL_CHECK_FOR_REQUESTS(&xbb->rings.common, more_to_do);
 	} else if (RING_HAS_UNCONSUMED_REQUESTS(&xbb->rings.common)) {
 
 		more_to_do = 1;
 	}
 
 	xbb->reqs_completed += xbb->reqs_queued_for_completion;
 	xbb->reqs_queued_for_completion = 0;
 
 	*run_taskqueue = more_to_do;
 }
 
 /**
  * Complete a request list.
  *
  * \param xbb        Per-instance xbb configuration structure.
  * \param reqlist    Allocated internal request list structure.
  */
 static void
 xbb_complete_reqlist(struct xbb_softc *xbb, struct xbb_xen_reqlist *reqlist)
 {
 	struct xbb_xen_req *nreq;
 	off_t		    sectors_sent;
 	int		    notify, run_taskqueue;
 
 	sectors_sent = 0;
 
 	if (reqlist->flags & XBB_REQLIST_MAPPED)
 		xbb_unmap_reqlist(reqlist);
 
 	mtx_lock(&xbb->lock);
 
 	/*
 	 * All I/O is done, send the response. A lock is not necessary
 	 * to protect the request list, because all requests have
 	 * completed.  Therefore this is the only context accessing this
 	 * reqlist right now.  However, in order to make sure that no one
 	 * else queues responses onto the queue or pushes them to the other
 	 * side while we're active, we need to hold the lock across the
 	 * calls to xbb_queue_response() and xbb_push_responses().
 	 */
 	STAILQ_FOREACH(nreq, &reqlist->contig_req_list, links) {
 		off_t cur_sectors_sent;
 
 		/* Put this response on the ring, but don't push yet */
 		xbb_queue_response(xbb, nreq, reqlist->status);
 
 		/* We don't report bytes sent if there is an error. */
 		if (reqlist->status == BLKIF_RSP_OKAY)
 			cur_sectors_sent = nreq->nr_512b_sectors;
 		else
 			cur_sectors_sent = 0;
 
 		sectors_sent += cur_sectors_sent;
 
 		devstat_end_transaction(xbb->xbb_stats_in,
 					/*bytes*/cur_sectors_sent << 9,
 					reqlist->ds_tag_type,
 					reqlist->ds_trans_type,
 					/*now*/NULL,
 					/*then*/&nreq->ds_t0);
 	}
 
 	/*
 	 * Take out any sectors not sent.  If we wind up negative (which
 	 * might happen if an error is reported as well as a residual), just
 	 * report 0 sectors sent.
 	 */
 	sectors_sent -= reqlist->residual_512b_sectors;
 	if (sectors_sent < 0)
 		sectors_sent = 0;
 
 	devstat_end_transaction(xbb->xbb_stats,
 				/*bytes*/ sectors_sent << 9,
 				reqlist->ds_tag_type,
 				reqlist->ds_trans_type,
 				/*now*/NULL,
 				/*then*/&reqlist->ds_t0);
 
 	xbb_release_reqlist(xbb, reqlist, /*wakeup*/ 1);
 
 	xbb_push_responses(xbb, &run_taskqueue, &notify);
 
 	mtx_unlock(&xbb->lock);
 
 	if (run_taskqueue)
 		taskqueue_enqueue(xbb->io_taskqueue, &xbb->io_task); 
 
 	if (notify)
 		xen_intr_signal(xbb->xen_intr_handle);
 }
 
 /**
  * Completion handler for buffer I/O requests issued by the device
  * backend driver.
  *
  * \param bio  The buffer I/O request on which to perform completion
  *             processing.
  */
 static void
 xbb_bio_done(struct bio *bio)
 {
 	struct xbb_softc       *xbb;
 	struct xbb_xen_reqlist *reqlist;
 
 	reqlist = bio->bio_caller1;
 	xbb     = reqlist->xbb;
 
 	reqlist->residual_512b_sectors += bio->bio_resid >> 9;
 
 	/*
 	 * This is a bit imprecise.  With aggregated I/O a single
 	 * request list can contain multiple front-end requests and
 	 * a multiple bios may point to a single request.  By carefully
 	 * walking the request list, we could map residuals and errors
 	 * back to the original front-end request, but the interface
 	 * isn't sufficiently rich for us to properly report the error.
 	 * So, we just treat the entire request list as having failed if an
 	 * error occurs on any part.  And, if an error occurs, we treat
 	 * the amount of data transferred as 0.
 	 *
 	 * For residuals, we report it on the overall aggregated device,
 	 * but not on the individual requests, since we don't currently
 	 * do the work to determine which front-end request to which the
 	 * residual applies.
 	 */
 	if (bio->bio_error) {
 		DPRINTF("BIO returned error %d for operation on device %s\n",
 			bio->bio_error, xbb->dev_name);
 		reqlist->status = BLKIF_RSP_ERROR;
 
 		if (bio->bio_error == ENXIO
 		 && xenbus_get_state(xbb->dev) == XenbusStateConnected) {
 
 			/*
 			 * Backend device has disappeared.  Signal the
 			 * front-end that we (the device proxy) want to
 			 * go away.
 			 */
 			xenbus_set_state(xbb->dev, XenbusStateClosing);
 		}
 	}
 
 #ifdef XBB_USE_BOUNCE_BUFFERS
 	if (bio->bio_cmd == BIO_READ) {
 		vm_offset_t kva_offset;
 
 		kva_offset = (vm_offset_t)bio->bio_data
 			   - (vm_offset_t)reqlist->bounce;
 		memcpy((uint8_t *)reqlist->kva + kva_offset,
 		       bio->bio_data, bio->bio_bcount);
 	}
 #endif /* XBB_USE_BOUNCE_BUFFERS */
 
 	/*
 	 * Decrement the pending count for the request list.  When we're
 	 * done with the requests, send status back for all of them.
 	 */
 	if (atomic_fetchadd_int(&reqlist->pendcnt, -1) == 1)
 		xbb_complete_reqlist(xbb, reqlist);
 
 	g_destroy_bio(bio);
 }
 
 /**
  * Parse a blkif request into an internal request structure and send
  * it to the backend for processing.
  *
  * \param xbb       Per-instance xbb configuration structure.
  * \param reqlist   Allocated internal request list structure.
  *
  * \return          On success, 0.  For resource shortages, non-zero.
  *  
  * This routine performs the backend common aspects of request parsing
  * including compiling an internal request structure, parsing the S/G
  * list and any secondary ring requests in which they may reside, and
  * the mapping of front-end I/O pages into our domain.
  */
 static int
 xbb_dispatch_io(struct xbb_softc *xbb, struct xbb_xen_reqlist *reqlist)
 {
 	struct xbb_sg                *xbb_sg;
 	struct gnttab_map_grant_ref  *map;
 	struct blkif_request_segment *sg;
 	struct blkif_request_segment *last_block_sg;
 	struct xbb_xen_req	     *nreq;
 	u_int			      nseg;
 	u_int			      seg_idx;
 	u_int			      block_segs;
 	int			      nr_sects;
 	int			      total_sects;
 	int			      operation;
 	uint8_t			      bio_flags;
 	int			      error;
 
 	reqlist->ds_tag_type = DEVSTAT_TAG_SIMPLE;
 	bio_flags            = 0;
 	total_sects	     = 0;
 	nr_sects	     = 0;
 
 	/*
 	 * First determine whether we have enough free KVA to satisfy this
 	 * request list.  If not, tell xbb_run_queue() so it can go to
 	 * sleep until we have more KVA.
 	 */
 	reqlist->kva = NULL;
 	if (reqlist->nr_segments != 0) {
 		reqlist->kva = xbb_get_kva(xbb, reqlist->nr_segments);
 		if (reqlist->kva == NULL) {
 			/*
 			 * If we're out of KVA, return ENOMEM.
 			 */
 			return (ENOMEM);
 		}
 	}
 
 	binuptime(&reqlist->ds_t0);
 	devstat_start_transaction(xbb->xbb_stats, &reqlist->ds_t0);
 
 	switch (reqlist->operation) {
 	case BLKIF_OP_WRITE_BARRIER:
 		bio_flags       |= BIO_ORDERED;
 		reqlist->ds_tag_type = DEVSTAT_TAG_ORDERED;
 		/* FALLTHROUGH */
 	case BLKIF_OP_WRITE:
 		operation = BIO_WRITE;
 		reqlist->ds_trans_type = DEVSTAT_WRITE;
 		if ((xbb->flags & XBBF_READ_ONLY) != 0) {
 			DPRINTF("Attempt to write to read only device %s\n",
 				xbb->dev_name);
 			reqlist->status = BLKIF_RSP_ERROR;
 			goto send_response;
 		}
 		break;
 	case BLKIF_OP_READ:
 		operation = BIO_READ;
 		reqlist->ds_trans_type = DEVSTAT_READ;
 		break;
 	case BLKIF_OP_FLUSH_DISKCACHE:
 		/*
 		 * If this is true, the user has requested that we disable
 		 * flush support.  So we just complete the requests
 		 * successfully.
 		 */
 		if (xbb->disable_flush != 0) {
 			goto send_response;
 		}
 
 		/*
 		 * The user has requested that we only send a real flush
 		 * for every N flush requests.  So keep count, and either
 		 * complete the request immediately or queue it for the
 		 * backend.
 		 */
 		if (xbb->flush_interval != 0) {
 		 	if (++(xbb->flush_count) < xbb->flush_interval) {
 				goto send_response;
 			} else
 				xbb->flush_count = 0;
 		}
 
 		operation = BIO_FLUSH;
 		reqlist->ds_tag_type = DEVSTAT_TAG_ORDERED;
 		reqlist->ds_trans_type = DEVSTAT_NO_DATA;
 		goto do_dispatch;
 		/*NOTREACHED*/
 	default:
 		DPRINTF("error: unknown block io operation [%d]\n",
 			reqlist->operation);
 		reqlist->status = BLKIF_RSP_ERROR;
 		goto send_response;
 	}
 
 	reqlist->xbb  = xbb;
 	xbb_sg        = xbb->xbb_sgs;
 	map	      = xbb->maps;
 	seg_idx	      = 0;
 
 	STAILQ_FOREACH(nreq, &reqlist->contig_req_list, links) {
 		blkif_request_t		*ring_req;
 		RING_IDX		 req_ring_idx;
 		u_int			 req_seg_idx;
 
 		ring_req	      = nreq->ring_req;
 		req_ring_idx	      = nreq->req_ring_idx;
 		nr_sects              = 0;
 		nseg                  = ring_req->nr_segments;
 		nreq->nr_pages        = nseg;
 		nreq->nr_512b_sectors = 0;
 		req_seg_idx	      = 0;
 		sg	              = NULL;
 
 		/* Check that number of segments is sane. */
 		if (__predict_false(nseg == 0)
 		 || __predict_false(nseg > xbb->max_request_segments)) {
 			DPRINTF("Bad number of segments in request (%d)\n",
 				nseg);
 			reqlist->status = BLKIF_RSP_ERROR;
 			goto send_response;
 		}
 
 		block_segs    = MIN(nreq->nr_pages,
 				    BLKIF_MAX_SEGMENTS_PER_HEADER_BLOCK);
 		sg            = ring_req->seg;
 		last_block_sg = sg + block_segs;
 		while (1) {
 
 			while (sg < last_block_sg) {
 				KASSERT(seg_idx <
 					XBB_MAX_SEGMENTS_PER_REQLIST,
 					("seg_idx %d is too large, max "
 					"segs %d\n", seg_idx,
 					XBB_MAX_SEGMENTS_PER_REQLIST));
 			
 				xbb_sg->first_sect = sg->first_sect;
 				xbb_sg->last_sect  = sg->last_sect;
 				xbb_sg->nsect =
 				    (int8_t)(sg->last_sect -
 				    sg->first_sect + 1);
 
 				if ((sg->last_sect >= (PAGE_SIZE >> 9))
 				 || (xbb_sg->nsect <= 0)) {
 					reqlist->status = BLKIF_RSP_ERROR;
 					goto send_response;
 				}
 
 				nr_sects += xbb_sg->nsect;
 				map->host_addr = xbb_get_gntaddr(reqlist,
 							seg_idx, /*sector*/0);
 				KASSERT(map->host_addr + PAGE_SIZE <=
 					xbb->ring_config.gnt_addr,
 					("Host address %#jx len %d overlaps "
 					 "ring address %#jx\n",
 					(uintmax_t)map->host_addr, PAGE_SIZE,
 					(uintmax_t)xbb->ring_config.gnt_addr));
 					
 				map->flags     = GNTMAP_host_map;
 				map->ref       = sg->gref;
 				map->dom       = xbb->otherend_id;
 				if (operation == BIO_WRITE)
 					map->flags |= GNTMAP_readonly;
 				sg++;
 				map++;
 				xbb_sg++;
 				seg_idx++;
 				req_seg_idx++;
 			}
 
 			block_segs = MIN(nseg - req_seg_idx,
 					 BLKIF_MAX_SEGMENTS_PER_SEGMENT_BLOCK);
 			if (block_segs == 0)
 				break;
 
 			/*
 			 * Fetch the next request block full of SG elements.
 			 * For now, only the spacing between entries is
 			 * different in the different ABIs, not the sg entry
 			 * layout.
 			 */
 			req_ring_idx++;
 			switch (xbb->abi) {
 			case BLKIF_PROTOCOL_NATIVE:
 				sg = BLKRING_GET_SEG_BLOCK(&xbb->rings.native,
 							   req_ring_idx);
 				break;
 			case BLKIF_PROTOCOL_X86_32:
 			{
 				sg = BLKRING_GET_SEG_BLOCK(&xbb->rings.x86_32,
 							   req_ring_idx);
 				break;
 			}
 			case BLKIF_PROTOCOL_X86_64:
 			{
 				sg = BLKRING_GET_SEG_BLOCK(&xbb->rings.x86_64,
 							   req_ring_idx);
 				break;
 			}
 			default:
 				panic("Unexpected blkif protocol ABI.");
 				/* NOTREACHED */
 			} 
 			last_block_sg = sg + block_segs;
 		}
 
 		/* Convert to the disk's sector size */
 		nreq->nr_512b_sectors = nr_sects;
 		nr_sects = (nr_sects << 9) >> xbb->sector_size_shift;
 		total_sects += nr_sects;
 
 		if ((nreq->nr_512b_sectors &
 		    ((xbb->sector_size >> 9) - 1)) != 0) {
 			device_printf(xbb->dev, "%s: I/O size (%d) is not "
 				      "a multiple of the backing store sector "
 				      "size (%d)\n", __func__,
 				      nreq->nr_512b_sectors << 9,
 				      xbb->sector_size);
 			reqlist->status = BLKIF_RSP_ERROR;
 			goto send_response;
 		}
 	}
 
 	error = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref,
 					  xbb->maps, reqlist->nr_segments);
 	if (error != 0)
 		panic("Grant table operation failed (%d)", error);
 
 	reqlist->flags |= XBB_REQLIST_MAPPED;
 
 	for (seg_idx = 0, map = xbb->maps; seg_idx < reqlist->nr_segments;
 	     seg_idx++, map++){
 
 		if (__predict_false(map->status != 0)) {
 			DPRINTF("invalid buffer -- could not remap "
 			        "it (%d)\n", map->status);
 			DPRINTF("Mapping(%d): Host Addr 0x%lx, flags "
 			        "0x%x ref 0x%x, dom %d\n", seg_idx,
 				map->host_addr, map->flags, map->ref,
 				map->dom);
 			reqlist->status = BLKIF_RSP_ERROR;
 			goto send_response;
 		}
 
 		reqlist->gnt_handles[seg_idx] = map->handle;
 	}
 	if (reqlist->starting_sector_number + total_sects >
 	    xbb->media_num_sectors) {
 
 		DPRINTF("%s of [%" PRIu64 ",%" PRIu64 "] "
 			"extends past end of device %s\n",
 			operation == BIO_READ ? "read" : "write",
 			reqlist->starting_sector_number,
 			reqlist->starting_sector_number + total_sects,
 			xbb->dev_name); 
 		reqlist->status = BLKIF_RSP_ERROR;
 		goto send_response;
 	}
 
 do_dispatch:
 
 	error = xbb->dispatch_io(xbb,
 				 reqlist,
 				 operation,
 				 bio_flags);
 
 	if (error != 0) {
 		reqlist->status = BLKIF_RSP_ERROR;
 		goto send_response;
 	}
 
 	return (0);
 
 send_response:
 
 	xbb_complete_reqlist(xbb, reqlist);
 
 	return (0);
 }
 
 static __inline int
 xbb_count_sects(blkif_request_t *ring_req)
 {
 	int i;
 	int cur_size = 0;
 
 	for (i = 0; i < ring_req->nr_segments; i++) {
 		int nsect;
 
 		nsect = (int8_t)(ring_req->seg[i].last_sect -
 			ring_req->seg[i].first_sect + 1);
 		if (nsect <= 0)
 			break;
 
 		cur_size += nsect;
 	}
 
 	return (cur_size);
 }
 
 /**
  * Process incoming requests from the shared communication ring in response
  * to a signal on the ring's event channel.
  *
  * \param context  Callback argument registerd during task initialization -
  *                 the xbb_softc for this instance.
  * \param pending  The number of taskqueue_enqueue events that have
  *                 occurred since this handler was last run.
  */
 static void
 xbb_run_queue(void *context, int pending)
 {
 	struct xbb_softc       *xbb;
 	blkif_back_rings_t     *rings;
 	RING_IDX		rp;
 	uint64_t		cur_sector;
 	int			cur_operation;
 	struct xbb_xen_reqlist *reqlist;
 
 
 	xbb   = (struct xbb_softc *)context;
 	rings = &xbb->rings;
 
 	/*
 	 * Work gather and dispatch loop.  Note that we have a bias here
 	 * towards gathering I/O sent by blockfront.  We first gather up
 	 * everything in the ring, as long as we have resources.  Then we
 	 * dispatch one request, and then attempt to gather up any
 	 * additional requests that have come in while we were dispatching
 	 * the request.
 	 *
 	 * This allows us to get a clearer picture (via devstat) of how
 	 * many requests blockfront is queueing to us at any given time.
 	 */
 	for (;;) {
 		int retval;
 
 		/*
 		 * Initialize reqlist to the last element in the pending
 		 * queue, if there is one.  This allows us to add more
 		 * requests to that request list, if we have room.
 		 */
 		reqlist = STAILQ_LAST(&xbb->reqlist_pending_stailq,
 				      xbb_xen_reqlist, links);
 		if (reqlist != NULL) {
 			cur_sector = reqlist->next_contig_sector;
 			cur_operation = reqlist->operation;
 		} else {
 			cur_operation = 0;
 			cur_sector    = 0;
 		}
 
 		/*
 		 * Cache req_prod to avoid accessing a cache line shared
 		 * with the frontend.
 		 */
 		rp = rings->common.sring->req_prod;
 
 		/* Ensure we see queued requests up to 'rp'. */
 		rmb();
 
 		/**
 		 * Run so long as there is work to consume and the generation
 		 * of a response will not overflow the ring.
 		 *
 		 * @note There's a 1 to 1 relationship between requests and
 		 *       responses, so an overflow should never occur.  This
 		 *       test is to protect our domain from digesting bogus
 		 *       data.  Shouldn't we log this?
 		 */
 		while (rings->common.req_cons != rp
 		    && RING_REQUEST_CONS_OVERFLOW(&rings->common,
 						  rings->common.req_cons) == 0){
 			blkif_request_t	        ring_req_storage;
 			blkif_request_t	       *ring_req;
 			int			cur_size;
 
 			switch (xbb->abi) {
 			case BLKIF_PROTOCOL_NATIVE:
 				ring_req = RING_GET_REQUEST(&xbb->rings.native,
 				    rings->common.req_cons);
 				break;
 			case BLKIF_PROTOCOL_X86_32:
 			{
 				struct blkif_x86_32_request *ring_req32;
 
 				ring_req32 = RING_GET_REQUEST(
 				    &xbb->rings.x86_32, rings->common.req_cons);
 				blkif_get_x86_32_req(&ring_req_storage,
 						     ring_req32);
 				ring_req = &ring_req_storage;
 				break;
 			}
 			case BLKIF_PROTOCOL_X86_64:
 			{
 				struct blkif_x86_64_request *ring_req64;
 
 				ring_req64 =RING_GET_REQUEST(&xbb->rings.x86_64,
 				    rings->common.req_cons);
 				blkif_get_x86_64_req(&ring_req_storage,
 						     ring_req64);
 				ring_req = &ring_req_storage;
 				break;
 			}
 			default:
 				panic("Unexpected blkif protocol ABI.");
 				/* NOTREACHED */
 			} 
 
 			/*
 			 * Check for situations that would require closing
 			 * off this I/O for further coalescing:
 			 *  - Coalescing is turned off.
 			 *  - Current I/O is out of sequence with the previous
 			 *    I/O.
 			 *  - Coalesced I/O would be too large.
 			 */
 			if ((reqlist != NULL)
 			 && ((xbb->no_coalesce_reqs != 0)
 			  || ((xbb->no_coalesce_reqs == 0)
 			   && ((ring_req->sector_number != cur_sector)
 			    || (ring_req->operation != cur_operation)
 			    || ((ring_req->nr_segments + reqlist->nr_segments) >
 			         xbb->max_reqlist_segments))))) {
 				reqlist = NULL;
 			}
 
 			/*
 			 * Grab and check for all resources in one shot.
 			 * If we can't get all of the resources we need,
 			 * the shortage is noted and the thread will get
 			 * woken up when more resources are available.
 			 */
 			retval = xbb_get_resources(xbb, &reqlist, ring_req,
 						   xbb->rings.common.req_cons);
 
 			if (retval != 0) {
 				/*
 				 * Resource shortage has been recorded.
 				 * We'll be scheduled to run once a request
 				 * object frees up due to a completion.
 				 */
 				break;
 			}
 
 			/*
 			 * Signify that	we can overwrite this request with
 			 * a response by incrementing our consumer index.
 			 * The response won't be generated until after
 			 * we've already consumed all necessary data out
 			 * of the version of the request in the ring buffer
 			 * (for native mode).  We must update the consumer
 			 * index  before issueing back-end I/O so there is
 			 * no possibility that it will complete and a
 			 * response be generated before we make room in 
 			 * the queue for that response.
 			 */
 			xbb->rings.common.req_cons +=
 			    BLKIF_SEGS_TO_BLOCKS(ring_req->nr_segments);
 			xbb->reqs_received++;
 
 			cur_size = xbb_count_sects(ring_req);
 			cur_sector = ring_req->sector_number + cur_size;
 			reqlist->next_contig_sector = cur_sector;
 			cur_operation = ring_req->operation;
 		}
 
 		/* Check for I/O to dispatch */
 		reqlist = STAILQ_FIRST(&xbb->reqlist_pending_stailq);
 		if (reqlist == NULL) {
 			/*
 			 * We're out of work to do, put the task queue to
 			 * sleep.
 			 */
 			break;
 		}
 
 		/*
 		 * Grab the first request off the queue and attempt
 		 * to dispatch it.
 		 */
 		STAILQ_REMOVE_HEAD(&xbb->reqlist_pending_stailq, links);
 
 		retval = xbb_dispatch_io(xbb, reqlist);
 		if (retval != 0) {
 			/*
 			 * xbb_dispatch_io() returns non-zero only when
 			 * there is a resource shortage.  If that's the
 			 * case, re-queue this request on the head of the
 			 * queue, and go to sleep until we have more
 			 * resources.
 			 */
 			STAILQ_INSERT_HEAD(&xbb->reqlist_pending_stailq,
 					   reqlist, links);
 			break;
 		} else {
 			/*
 			 * If we still have anything on the queue after
 			 * removing the head entry, that is because we
 			 * met one of the criteria to create a new
 			 * request list (outlined above), and we'll call
 			 * that a forced dispatch for statistical purposes.
 			 *
 			 * Otherwise, if there is only one element on the
 			 * queue, we coalesced everything available on
 			 * the ring and we'll call that a normal dispatch.
 			 */
 			reqlist = STAILQ_FIRST(&xbb->reqlist_pending_stailq);
 
 			if (reqlist != NULL)
 				xbb->forced_dispatch++;
 			else
 				xbb->normal_dispatch++;
 
 			xbb->total_dispatch++;
 		}
 	}
 }
 
 /**
  * Interrupt handler bound to the shared ring's event channel.
  *
  * \param arg  Callback argument registerd during event channel
  *             binding - the xbb_softc for this instance.
  */
 static int
 xbb_filter(void *arg)
 {
 	struct xbb_softc *xbb;
 
 	/* Defer to taskqueue thread. */
 	xbb = (struct xbb_softc *)arg;
 	taskqueue_enqueue(xbb->io_taskqueue, &xbb->io_task); 
 
 	return (FILTER_HANDLED);
 }
 
 SDT_PROVIDER_DEFINE(xbb);
 SDT_PROBE_DEFINE1(xbb, kernel, xbb_dispatch_dev, flush, "int");
 SDT_PROBE_DEFINE3(xbb, kernel, xbb_dispatch_dev, read, "int", "uint64_t",
 		  "uint64_t");
 SDT_PROBE_DEFINE3(xbb, kernel, xbb_dispatch_dev, write, "int",
 		  "uint64_t", "uint64_t");
 
 /*----------------------------- Backend Handlers -----------------------------*/
 /**
  * Backend handler for character device access.
  *
  * \param xbb        Per-instance xbb configuration structure.
  * \param reqlist    Allocated internal request list structure.
  * \param operation  BIO_* I/O operation code.
  * \param bio_flags  Additional bio_flag data to pass to any generated
  *                   bios (e.g. BIO_ORDERED)..
  *
  * \return  0 for success, errno codes for failure.
  */
 static int
 xbb_dispatch_dev(struct xbb_softc *xbb, struct xbb_xen_reqlist *reqlist,
 		 int operation, int bio_flags)
 {
 	struct xbb_dev_data *dev_data;
 	struct bio          *bios[XBB_MAX_SEGMENTS_PER_REQLIST];
 	off_t                bio_offset;
 	struct bio          *bio;
 	struct xbb_sg       *xbb_sg;
 	u_int	             nbio;
 	u_int                bio_idx;
 	u_int		     nseg;
 	u_int                seg_idx;
 	int                  error;
 
 	dev_data   = &xbb->backend.dev;
 	bio_offset = (off_t)reqlist->starting_sector_number
 		   << xbb->sector_size_shift;
 	error      = 0;
 	nbio       = 0;
 	bio_idx    = 0;
 
 	if (operation == BIO_FLUSH) {
 		bio = g_new_bio();
 		if (__predict_false(bio == NULL)) {
 			DPRINTF("Unable to allocate bio for BIO_FLUSH\n");
 			error = ENOMEM;
 			return (error);
 		}
 
 		bio->bio_cmd	 = BIO_FLUSH;
 		bio->bio_flags	|= BIO_ORDERED;
 		bio->bio_dev	 = dev_data->cdev;
 		bio->bio_offset	 = 0;
 		bio->bio_data	 = 0;
 		bio->bio_done	 = xbb_bio_done;
 		bio->bio_caller1 = reqlist;
 		bio->bio_pblkno	 = 0;
 
 		reqlist->pendcnt = 1;
 
 		SDT_PROBE1(xbb, kernel, xbb_dispatch_dev, flush,
 			   device_get_unit(xbb->dev));
 
 		(*dev_data->csw->d_strategy)(bio);
 
 		return (0);
 	}
 
 	xbb_sg = xbb->xbb_sgs;
 	bio    = NULL;
 	nseg = reqlist->nr_segments;
 
 	for (seg_idx = 0; seg_idx < nseg; seg_idx++, xbb_sg++) {
 
 		/*
 		 * KVA will not be contiguous, so any additional
 		 * I/O will need to be represented in a new bio.
 		 */
 		if ((bio != NULL)
 		 && (xbb_sg->first_sect != 0)) {
 			if ((bio->bio_length & (xbb->sector_size - 1)) != 0) {
 				printf("%s: Discontiguous I/O request "
 				       "from domain %d ends on "
 				       "non-sector boundary\n",
 				       __func__, xbb->otherend_id);
 				error = EINVAL;
 				goto fail_free_bios;
 			}
 			bio = NULL;
 		}
 
 		if (bio == NULL) {
 			/*
 			 * Make sure that the start of this bio is
 			 * aligned to a device sector.
 			 */
 			if ((bio_offset & (xbb->sector_size - 1)) != 0){
 				printf("%s: Misaligned I/O request "
 				       "from domain %d\n", __func__,
 				       xbb->otherend_id);
 				error = EINVAL;
 				goto fail_free_bios;
 			}
 
 			bio = bios[nbio++] = g_new_bio();
 			if (__predict_false(bio == NULL)) {
 				error = ENOMEM;
 				goto fail_free_bios;
 			}
 			bio->bio_cmd     = operation;
 			bio->bio_flags  |= bio_flags;
 			bio->bio_dev     = dev_data->cdev;
 			bio->bio_offset  = bio_offset;
 			bio->bio_data    = xbb_reqlist_ioaddr(reqlist, seg_idx,
 						xbb_sg->first_sect);
 			bio->bio_done    = xbb_bio_done;
 			bio->bio_caller1 = reqlist;
 			bio->bio_pblkno  = bio_offset >> xbb->sector_size_shift;
 		}
 
 		bio->bio_length += xbb_sg->nsect << 9;
 		bio->bio_bcount  = bio->bio_length;
 		bio_offset      += xbb_sg->nsect << 9;
 
 		if (xbb_sg->last_sect != (PAGE_SIZE - 512) >> 9) {
 
 			if ((bio->bio_length & (xbb->sector_size - 1)) != 0) {
 				printf("%s: Discontiguous I/O request "
 				       "from domain %d ends on "
 				       "non-sector boundary\n",
 				       __func__, xbb->otherend_id);
 				error = EINVAL;
 				goto fail_free_bios;
 			}
 			/*
 			 * KVA will not be contiguous, so any additional
 			 * I/O will need to be represented in a new bio.
 			 */
 			bio = NULL;
 		}
 	}
 
 	reqlist->pendcnt = nbio;
 
 	for (bio_idx = 0; bio_idx < nbio; bio_idx++)
 	{
 #ifdef XBB_USE_BOUNCE_BUFFERS
 		vm_offset_t kva_offset;
 
 		kva_offset = (vm_offset_t)bios[bio_idx]->bio_data
 			   - (vm_offset_t)reqlist->bounce;
 		if (operation == BIO_WRITE) {
 			memcpy(bios[bio_idx]->bio_data,
 			       (uint8_t *)reqlist->kva + kva_offset,
 			       bios[bio_idx]->bio_bcount);
 		}
 #endif
 		if (operation == BIO_READ) {
 			SDT_PROBE3(xbb, kernel, xbb_dispatch_dev, read,
 				   device_get_unit(xbb->dev),
 				   bios[bio_idx]->bio_offset,
 				   bios[bio_idx]->bio_length);
 		} else if (operation == BIO_WRITE) {
 			SDT_PROBE3(xbb, kernel, xbb_dispatch_dev, write,
 				   device_get_unit(xbb->dev),
 				   bios[bio_idx]->bio_offset,
 				   bios[bio_idx]->bio_length);
 		}
 		(*dev_data->csw->d_strategy)(bios[bio_idx]);
 	}
 
 	return (error);
 
 fail_free_bios:
 	for (bio_idx = 0; bio_idx < (nbio-1); bio_idx++)
 		g_destroy_bio(bios[bio_idx]);
 	
 	return (error);
 }
 
 SDT_PROBE_DEFINE1(xbb, kernel, xbb_dispatch_file, flush, "int");
 SDT_PROBE_DEFINE3(xbb, kernel, xbb_dispatch_file, read, "int", "uint64_t",
 		  "uint64_t");
 SDT_PROBE_DEFINE3(xbb, kernel, xbb_dispatch_file, write, "int",
 		  "uint64_t", "uint64_t");
 
 /**
  * Backend handler for file access.
  *
  * \param xbb        Per-instance xbb configuration structure.
  * \param reqlist    Allocated internal request list.
  * \param operation  BIO_* I/O operation code.
  * \param flags      Additional bio_flag data to pass to any generated bios
  *                   (e.g. BIO_ORDERED)..
  *
  * \return  0 for success, errno codes for failure.
  */
 static int
 xbb_dispatch_file(struct xbb_softc *xbb, struct xbb_xen_reqlist *reqlist,
 		  int operation, int flags)
 {
 	struct xbb_file_data *file_data;
 	u_int                 seg_idx;
 	u_int		      nseg;
 	off_t		      sectors_sent;
 	struct uio            xuio;
 	struct xbb_sg        *xbb_sg;
 	struct iovec         *xiovec;
 #ifdef XBB_USE_BOUNCE_BUFFERS
 	void                **p_vaddr;
 	int                   saved_uio_iovcnt;
 #endif /* XBB_USE_BOUNCE_BUFFERS */
 	int                   error;
 
 	file_data = &xbb->backend.file;
 	sectors_sent = 0;
 	error = 0;
 	bzero(&xuio, sizeof(xuio));
 
 	switch (operation) {
 	case BIO_READ:
 		xuio.uio_rw = UIO_READ;
 		break;
 	case BIO_WRITE:
 		xuio.uio_rw = UIO_WRITE;
 		break;
 	case BIO_FLUSH: {
 		struct mount *mountpoint;
 
 		SDT_PROBE1(xbb, kernel, xbb_dispatch_file, flush,
 			   device_get_unit(xbb->dev));
 
 		(void) vn_start_write(xbb->vn, &mountpoint, V_WAIT);
 
 		vn_lock(xbb->vn, LK_EXCLUSIVE | LK_RETRY);
 		error = VOP_FSYNC(xbb->vn, MNT_WAIT, curthread);
 		VOP_UNLOCK(xbb->vn, 0);
 
 		vn_finished_write(mountpoint);
 
 		goto bailout_send_response;
 		/* NOTREACHED */
 	}
 	default:
 		panic("invalid operation %d", operation);
 		/* NOTREACHED */
 	}
 	xuio.uio_offset = (vm_offset_t)reqlist->starting_sector_number
 			<< xbb->sector_size_shift;
 	xuio.uio_segflg = UIO_SYSSPACE;
 	xuio.uio_iov = file_data->xiovecs;
 	xuio.uio_iovcnt = 0;
 	xbb_sg = xbb->xbb_sgs;
 	nseg = reqlist->nr_segments;
 
 	for (xiovec = NULL, seg_idx = 0; seg_idx < nseg; seg_idx++, xbb_sg++) {
 
 		/*
 		 * If the first sector is not 0, the KVA will
 		 * not be contiguous and we'll need to go on
 		 * to another segment.
 		 */
 		if (xbb_sg->first_sect != 0)
 			xiovec = NULL;
 
 		if (xiovec == NULL) {
 			xiovec = &file_data->xiovecs[xuio.uio_iovcnt];
 			xiovec->iov_base = xbb_reqlist_ioaddr(reqlist,
 			    seg_idx, xbb_sg->first_sect);
 #ifdef XBB_USE_BOUNCE_BUFFERS
 			/*
 			 * Store the address of the incoming
 			 * buffer at this particular offset
 			 * as well, so we can do the copy
 			 * later without having to do more
 			 * work to recalculate this address.
 		 	 */
 			p_vaddr = &file_data->xiovecs_vaddr[xuio.uio_iovcnt];
 			*p_vaddr = xbb_reqlist_vaddr(reqlist, seg_idx,
 			    xbb_sg->first_sect);
 #endif /* XBB_USE_BOUNCE_BUFFERS */
 			xiovec->iov_len = 0;
 			xuio.uio_iovcnt++;
 		}
 
 		xiovec->iov_len += xbb_sg->nsect << 9;
 
 		xuio.uio_resid += xbb_sg->nsect << 9;
 
 		/*
 		 * If the last sector is not the full page
 		 * size count, the next segment will not be
 		 * contiguous in KVA and we need a new iovec.
 		 */
 		if (xbb_sg->last_sect != (PAGE_SIZE - 512) >> 9)
 			xiovec = NULL;
 	}
 
 	xuio.uio_td = curthread;
 
 #ifdef XBB_USE_BOUNCE_BUFFERS
 	saved_uio_iovcnt = xuio.uio_iovcnt;
 
 	if (operation == BIO_WRITE) {
 		/* Copy the write data to the local buffer. */
 		for (seg_idx = 0, p_vaddr = file_data->xiovecs_vaddr,
 		     xiovec = xuio.uio_iov; seg_idx < xuio.uio_iovcnt;
 		     seg_idx++, xiovec++, p_vaddr++) {
 
 			memcpy(xiovec->iov_base, *p_vaddr, xiovec->iov_len);
 		}
 	} else {
 		/*
 		 * We only need to save off the iovecs in the case of a
 		 * read, because the copy for the read happens after the
 		 * VOP_READ().  (The uio will get modified in that call
 		 * sequence.)
 		 */
 		memcpy(file_data->saved_xiovecs, xuio.uio_iov,
 		       xuio.uio_iovcnt * sizeof(xuio.uio_iov[0]));
 	}
 #endif /* XBB_USE_BOUNCE_BUFFERS */
 
 	switch (operation) {
 	case BIO_READ:
 
 		SDT_PROBE3(xbb, kernel, xbb_dispatch_file, read,
 			   device_get_unit(xbb->dev), xuio.uio_offset,
 			   xuio.uio_resid);
 
 		vn_lock(xbb->vn, LK_EXCLUSIVE | LK_RETRY);
 
 		/*
 		 * UFS pays attention to IO_DIRECT for reads.  If the
 		 * DIRECTIO option is configured into the kernel, it calls
 		 * ffs_rawread().  But that only works for single-segment
 		 * uios with user space addresses.  In our case, with a
 		 * kernel uio, it still reads into the buffer cache, but it
 		 * will just try to release the buffer from the cache later
 		 * on in ffs_read().
 		 *
 		 * ZFS does not pay attention to IO_DIRECT for reads.
 		 *
 		 * UFS does not pay attention to IO_SYNC for reads.
 		 *
 		 * ZFS pays attention to IO_SYNC (which translates into the
 		 * Solaris define FRSYNC for zfs_read()) for reads.  It
 		 * attempts to sync the file before reading.
 		 *
 		 * So, to attempt to provide some barrier semantics in the
 		 * BIO_ORDERED case, set both IO_DIRECT and IO_SYNC.  
 		 */
 		error = VOP_READ(xbb->vn, &xuio, (flags & BIO_ORDERED) ? 
 				 (IO_DIRECT|IO_SYNC) : 0, file_data->cred);
 
 		VOP_UNLOCK(xbb->vn, 0);
 		break;
 	case BIO_WRITE: {
 		struct mount *mountpoint;
 
 		SDT_PROBE3(xbb, kernel, xbb_dispatch_file, write,
 			   device_get_unit(xbb->dev), xuio.uio_offset,
 			   xuio.uio_resid);
 
 		(void)vn_start_write(xbb->vn, &mountpoint, V_WAIT);
 
 		vn_lock(xbb->vn, LK_EXCLUSIVE | LK_RETRY);
 
 		/*
 		 * UFS pays attention to IO_DIRECT for writes.  The write
 		 * is done asynchronously.  (Normally the write would just
 		 * get put into cache.
 		 *
 		 * UFS pays attention to IO_SYNC for writes.  It will
 		 * attempt to write the buffer out synchronously if that
 		 * flag is set.
 		 *
 		 * ZFS does not pay attention to IO_DIRECT for writes.
 		 *
 		 * ZFS pays attention to IO_SYNC (a.k.a. FSYNC or FRSYNC)
 		 * for writes.  It will flush the transaction from the
 		 * cache before returning.
 		 *
 		 * So if we've got the BIO_ORDERED flag set, we want
 		 * IO_SYNC in either the UFS or ZFS case.
 		 */
 		error = VOP_WRITE(xbb->vn, &xuio, (flags & BIO_ORDERED) ?
 				  IO_SYNC : 0, file_data->cred);
 		VOP_UNLOCK(xbb->vn, 0);
 
 		vn_finished_write(mountpoint);
 
 		break;
 	}
 	default:
 		panic("invalid operation %d", operation);
 		/* NOTREACHED */
 	}
 
 #ifdef XBB_USE_BOUNCE_BUFFERS
 	/* We only need to copy here for read operations */
 	if (operation == BIO_READ) {
 
 		for (seg_idx = 0, p_vaddr = file_data->xiovecs_vaddr,
 		     xiovec = file_data->saved_xiovecs;
 		     seg_idx < saved_uio_iovcnt; seg_idx++,
 		     xiovec++, p_vaddr++) {
 
 			/*
 			 * Note that we have to use the copy of the 
 			 * io vector we made above.  uiomove() modifies
 			 * the uio and its referenced vector as uiomove
 			 * performs the copy, so we can't rely on any
 			 * state from the original uio.
 			 */
 			memcpy(*p_vaddr, xiovec->iov_base, xiovec->iov_len);
 		}
 	}
 #endif /* XBB_USE_BOUNCE_BUFFERS */
 
 bailout_send_response:
 
 	if (error != 0)
 		reqlist->status = BLKIF_RSP_ERROR;
 
 	xbb_complete_reqlist(xbb, reqlist);
 
 	return (0);
 }
 
 /*--------------------------- Backend Configuration --------------------------*/
 /**
  * Close and cleanup any backend device/file specific state for this
  * block back instance. 
  *
  * \param xbb  Per-instance xbb configuration structure.
  */
 static void
 xbb_close_backend(struct xbb_softc *xbb)
 {
 	DROP_GIANT();
 	DPRINTF("closing dev=%s\n", xbb->dev_name);
 	if (xbb->vn) {
 		int flags = FREAD;
 
 		if ((xbb->flags & XBBF_READ_ONLY) == 0)
 			flags |= FWRITE;
 
 		switch (xbb->device_type) {
 		case XBB_TYPE_DISK:
 			if (xbb->backend.dev.csw) {
 				dev_relthread(xbb->backend.dev.cdev,
 					      xbb->backend.dev.dev_ref);
 				xbb->backend.dev.csw  = NULL;
 				xbb->backend.dev.cdev = NULL;
 			}
 			break;
 		case XBB_TYPE_FILE:
 			break;
 		case XBB_TYPE_NONE:
 		default:
 			panic("Unexpected backend type.");
 			break;
 		}
 
 		(void)vn_close(xbb->vn, flags, NOCRED, curthread);
 		xbb->vn = NULL;
 
 		switch (xbb->device_type) {
 		case XBB_TYPE_DISK:
 			break;
 		case XBB_TYPE_FILE:
 			if (xbb->backend.file.cred != NULL) {
 				crfree(xbb->backend.file.cred);
 				xbb->backend.file.cred = NULL;
 			}
 			break;
 		case XBB_TYPE_NONE:
 		default:
 			panic("Unexpected backend type.");
 			break;
 		}
 	}
 	PICKUP_GIANT();
 }
 
 /**
  * Open a character device to be used for backend I/O.
  *
  * \param xbb  Per-instance xbb configuration structure.
  *
  * \return  0 for success, errno codes for failure.
  */
 static int
 xbb_open_dev(struct xbb_softc *xbb)
 {
 	struct vattr   vattr;
 	struct cdev   *dev;
 	struct cdevsw *devsw;
 	int	       error;
 
 	xbb->device_type = XBB_TYPE_DISK;
 	xbb->dispatch_io = xbb_dispatch_dev;
 	xbb->backend.dev.cdev = xbb->vn->v_rdev;
 	xbb->backend.dev.csw = dev_refthread(xbb->backend.dev.cdev,
 					     &xbb->backend.dev.dev_ref);
 	if (xbb->backend.dev.csw == NULL)
 		panic("Unable to retrieve device switch");
 
 	error = VOP_GETATTR(xbb->vn, &vattr, NOCRED);
 	if (error) {
 		xenbus_dev_fatal(xbb->dev, error, "error getting "
 				 "vnode attributes for device %s",
 				 xbb->dev_name);
 		return (error);
 	}
 
 
 	dev = xbb->vn->v_rdev;
 	devsw = dev->si_devsw;
 	if (!devsw->d_ioctl) {
 		xenbus_dev_fatal(xbb->dev, ENODEV, "no d_ioctl for "
 				 "device %s!", xbb->dev_name);
 		return (ENODEV);
 	}
 
 	error = devsw->d_ioctl(dev, DIOCGSECTORSIZE,
 			       (caddr_t)&xbb->sector_size, FREAD,
 			       curthread);
 	if (error) {
 		xenbus_dev_fatal(xbb->dev, error,
 				 "error calling ioctl DIOCGSECTORSIZE "
 				 "for device %s", xbb->dev_name);
 		return (error);
 	}
 
 	error = devsw->d_ioctl(dev, DIOCGMEDIASIZE,
 			       (caddr_t)&xbb->media_size, FREAD,
 			       curthread);
 	if (error) {
 		xenbus_dev_fatal(xbb->dev, error,
 				 "error calling ioctl DIOCGMEDIASIZE "
 				 "for device %s", xbb->dev_name);
 		return (error);
 	}
 
 	return (0);
 }
 
 /**
  * Open a file to be used for backend I/O.
  *
  * \param xbb  Per-instance xbb configuration structure.
  *
  * \return  0 for success, errno codes for failure.
  */
 static int
 xbb_open_file(struct xbb_softc *xbb)
 {
 	struct xbb_file_data *file_data;
 	struct vattr          vattr;
 	int                   error;
 
 	file_data = &xbb->backend.file;
 	xbb->device_type = XBB_TYPE_FILE;
 	xbb->dispatch_io = xbb_dispatch_file;
 	error = VOP_GETATTR(xbb->vn, &vattr, curthread->td_ucred);
 	if (error != 0) {
 		xenbus_dev_fatal(xbb->dev, error,
 				 "error calling VOP_GETATTR()"
 				 "for file %s", xbb->dev_name);
 		return (error);
 	}
 
 	/*
 	 * Verify that we have the ability to upgrade to exclusive
 	 * access on this file so we can trap errors at open instead
 	 * of reporting them during first access.
 	 */
 	if (VOP_ISLOCKED(xbb->vn) != LK_EXCLUSIVE) {
 		vn_lock(xbb->vn, LK_UPGRADE | LK_RETRY);
 		if (xbb->vn->v_iflag & VI_DOOMED) {
 			error = EBADF;
 			xenbus_dev_fatal(xbb->dev, error,
 					 "error locking file %s",
 					 xbb->dev_name);
 
 			return (error);
 		}
 	}
 
 	file_data->cred = crhold(curthread->td_ucred);
 	xbb->media_size = vattr.va_size;
 
 	/*
 	 * XXX KDM vattr.va_blocksize may be larger than 512 bytes here.
 	 * With ZFS, it is 131072 bytes.  Block sizes that large don't work
 	 * with disklabel and UFS on FreeBSD at least.  Large block sizes
 	 * may not work with other OSes as well.  So just export a sector
 	 * size of 512 bytes, which should work with any OS or
 	 * application.  Since our backing is a file, any block size will
 	 * work fine for the backing store.
 	 */
 #if 0
 	xbb->sector_size = vattr.va_blocksize;
 #endif
 	xbb->sector_size = 512;
 
 	/*
 	 * Sanity check.  The media size has to be at least one
 	 * sector long.
 	 */
 	if (xbb->media_size < xbb->sector_size) {
 		error = EINVAL;
 		xenbus_dev_fatal(xbb->dev, error,
 				 "file %s size %ju < block size %u",
 				 xbb->dev_name,
 				 (uintmax_t)xbb->media_size,
 				 xbb->sector_size);
 	}
 	return (error);
 }
 
 /**
  * Open the backend provider for this connection.
  *
  * \param xbb  Per-instance xbb configuration structure.
  *
  * \return  0 for success, errno codes for failure.
  */
 static int
 xbb_open_backend(struct xbb_softc *xbb)
 {
 	struct nameidata nd;
 	int		 flags;
 	int		 error;
 
 	flags = FREAD;
 	error = 0;
 
 	DPRINTF("opening dev=%s\n", xbb->dev_name);
 
 	if (rootvnode == NULL) {
 		xenbus_dev_fatal(xbb->dev, ENOENT,
 				 "Root file system not mounted");
 		return (ENOENT);
 	}
 
 	if ((xbb->flags & XBBF_READ_ONLY) == 0)
 		flags |= FWRITE;
 
 	if (!curthread->td_proc->p_fd->fd_cdir) {
 		curthread->td_proc->p_fd->fd_cdir = rootvnode;
 		VREF(rootvnode);
 	}
 	if (!curthread->td_proc->p_fd->fd_rdir) {
 		curthread->td_proc->p_fd->fd_rdir = rootvnode;
 		VREF(rootvnode);
 	}
 	if (!curthread->td_proc->p_fd->fd_jdir) {
 		curthread->td_proc->p_fd->fd_jdir = rootvnode;
 		VREF(rootvnode);
 	}
 
  again:
 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, xbb->dev_name, curthread);
 	error = vn_open(&nd, &flags, 0, NULL);
 	if (error) {
 		/*
 		 * This is the only reasonable guess we can make as far as
 		 * path if the user doesn't give us a fully qualified path.
 		 * If they want to specify a file, they need to specify the
 		 * full path.
 		 */
 		if (xbb->dev_name[0] != '/') {
 			char *dev_path = "/dev/";
 			char *dev_name;
 
 			/* Try adding device path at beginning of name */
 			dev_name = malloc(strlen(xbb->dev_name)
 					+ strlen(dev_path) + 1,
 					  M_XENBLOCKBACK, M_NOWAIT);
 			if (dev_name) {
 				sprintf(dev_name, "%s%s", dev_path,
 					xbb->dev_name);
 				free(xbb->dev_name, M_XENBLOCKBACK);
 				xbb->dev_name = dev_name;
 				goto again;
 			}
 		}
 		xenbus_dev_fatal(xbb->dev, error, "error opening device %s",
 				 xbb->dev_name);
 		return (error);
 	}
 
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 		
 	xbb->vn = nd.ni_vp;
 
 	/* We only support disks and files. */
 	if (vn_isdisk(xbb->vn, &error)) {
 		error = xbb_open_dev(xbb);
 	} else if (xbb->vn->v_type == VREG) {
 		error = xbb_open_file(xbb);
 	} else {
 		error = EINVAL;
 		xenbus_dev_fatal(xbb->dev, error, "%s is not a disk "
 				 "or file", xbb->dev_name);
 	}
 	VOP_UNLOCK(xbb->vn, 0);
 
 	if (error != 0) {
 		xbb_close_backend(xbb);
 		return (error);
 	}
 
 	xbb->sector_size_shift = fls(xbb->sector_size) - 1;
 	xbb->media_num_sectors = xbb->media_size >> xbb->sector_size_shift;
 
 	DPRINTF("opened %s=%s sector_size=%u media_size=%" PRId64 "\n",
 		(xbb->device_type == XBB_TYPE_DISK) ? "dev" : "file",
 		xbb->dev_name, xbb->sector_size, xbb->media_size);
 
 	return (0);
 }
 
 /*------------------------ Inter-Domain Communication ------------------------*/
 /**
  * Free dynamically allocated KVA or pseudo-physical address allocations.
  *
  * \param xbb  Per-instance xbb configuration structure.
  */
 static void
 xbb_free_communication_mem(struct xbb_softc *xbb)
 {
 	if (xbb->kva != 0) {
-#ifndef XENHVM
-		kva_free(xbb->kva, xbb->kva_size);
-#else
 		if (xbb->pseudo_phys_res != NULL) {
 			bus_release_resource(xbb->dev, SYS_RES_MEMORY,
 					     xbb->pseudo_phys_res_id,
 					     xbb->pseudo_phys_res);
 			xbb->pseudo_phys_res = NULL;
 		}
-#endif
 	}
 	xbb->kva = 0;
 	xbb->gnt_base_addr = 0;
 	if (xbb->kva_free != NULL) {
 		free(xbb->kva_free, M_XENBLOCKBACK);
 		xbb->kva_free = NULL;
 	}
 }
 
 /**
  * Cleanup all inter-domain communication mechanisms.
  *
  * \param xbb  Per-instance xbb configuration structure.
  */
 static int
 xbb_disconnect(struct xbb_softc *xbb)
 {
 	struct gnttab_unmap_grant_ref  ops[XBB_MAX_RING_PAGES];
 	struct gnttab_unmap_grant_ref *op;
 	u_int			       ring_idx;
 	int			       error;
 
 	DPRINTF("\n");
 
 	if ((xbb->flags & XBBF_RING_CONNECTED) == 0)
 		return (0);
 
 	xen_intr_unbind(&xbb->xen_intr_handle);
 
 	mtx_unlock(&xbb->lock);
 	taskqueue_drain(xbb->io_taskqueue, &xbb->io_task); 
 	mtx_lock(&xbb->lock);
 
 	/*
 	 * No new interrupts can generate work, but we must wait
 	 * for all currently active requests to drain.
 	 */
 	if (xbb->active_request_count != 0)
 		return (EAGAIN);
 	
 	for (ring_idx = 0, op = ops;
 	     ring_idx < xbb->ring_config.ring_pages;
 	     ring_idx++, op++) {
 
 		op->host_addr    = xbb->ring_config.gnt_addr
 			         + (ring_idx * PAGE_SIZE);
 		op->dev_bus_addr = xbb->ring_config.bus_addr[ring_idx];
 		op->handle	 = xbb->ring_config.handle[ring_idx];
 	}
 
 	error = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, ops,
 					  xbb->ring_config.ring_pages);
 	if (error != 0)
 		panic("Grant table op failed (%d)", error);
 
 	xbb_free_communication_mem(xbb);
 
 	if (xbb->requests != NULL) {
 		free(xbb->requests, M_XENBLOCKBACK);
 		xbb->requests = NULL;
 	}
 
 	if (xbb->request_lists != NULL) {
 		struct xbb_xen_reqlist *reqlist;
 		int i;
 
 		/* There is one request list for ever allocated request. */
 		for (i = 0, reqlist = xbb->request_lists;
 		     i < xbb->max_requests; i++, reqlist++){
 #ifdef XBB_USE_BOUNCE_BUFFERS
 			if (reqlist->bounce != NULL) {
 				free(reqlist->bounce, M_XENBLOCKBACK);
 				reqlist->bounce = NULL;
 			}
 #endif
 			if (reqlist->gnt_handles != NULL) {
 				free(reqlist->gnt_handles, M_XENBLOCKBACK);
 				reqlist->gnt_handles = NULL;
 			}
 		}
 		free(xbb->request_lists, M_XENBLOCKBACK);
 		xbb->request_lists = NULL;
 	}
 
 	xbb->flags &= ~XBBF_RING_CONNECTED;
 	return (0);
 }
 
 /**
  * Map shared memory ring into domain local address space, initialize
  * ring control structures, and bind an interrupt to the event channel
  * used to notify us of ring changes.
  *
  * \param xbb  Per-instance xbb configuration structure.
  */
 static int
 xbb_connect_ring(struct xbb_softc *xbb)
 {
 	struct gnttab_map_grant_ref  gnts[XBB_MAX_RING_PAGES];
 	struct gnttab_map_grant_ref *gnt;
 	u_int			     ring_idx;
 	int			     error;
 
 	if ((xbb->flags & XBBF_RING_CONNECTED) != 0)
 		return (0);
 
 	/*
 	 * Kva for our ring is at the tail of the region of kva allocated
 	 * by xbb_alloc_communication_mem().
 	 */
 	xbb->ring_config.va = xbb->kva
 			    + (xbb->kva_size
 			     - (xbb->ring_config.ring_pages * PAGE_SIZE));
 	xbb->ring_config.gnt_addr = xbb->gnt_base_addr
 				  + (xbb->kva_size
 				   - (xbb->ring_config.ring_pages * PAGE_SIZE));
 
 	for (ring_idx = 0, gnt = gnts;
 	     ring_idx < xbb->ring_config.ring_pages;
 	     ring_idx++, gnt++) {
 
 		gnt->host_addr = xbb->ring_config.gnt_addr
 			       + (ring_idx * PAGE_SIZE);
 		gnt->flags     = GNTMAP_host_map;
 		gnt->ref       = xbb->ring_config.ring_ref[ring_idx];
 		gnt->dom       = xbb->otherend_id;
 	}
 
 	error = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, gnts,
 					  xbb->ring_config.ring_pages);
 	if (error)
 		panic("blkback: Ring page grant table op failed (%d)", error);
 
 	for (ring_idx = 0, gnt = gnts;
 	     ring_idx < xbb->ring_config.ring_pages;
 	     ring_idx++, gnt++) {
 		if (gnt->status != 0) {
 			xbb->ring_config.va = 0;
 			xenbus_dev_fatal(xbb->dev, EACCES,
 					 "Ring shared page mapping failed. "
 					 "Status %d.", gnt->status);
 			return (EACCES);
 		}
 		xbb->ring_config.handle[ring_idx]   = gnt->handle;
 		xbb->ring_config.bus_addr[ring_idx] = gnt->dev_bus_addr;
 	}
 
 	/* Initialize the ring based on ABI. */
 	switch (xbb->abi) {
 	case BLKIF_PROTOCOL_NATIVE:
 	{
 		blkif_sring_t *sring;
 		sring = (blkif_sring_t *)xbb->ring_config.va;
 		BACK_RING_INIT(&xbb->rings.native, sring,
 			       xbb->ring_config.ring_pages * PAGE_SIZE);
 		break;
 	}
 	case BLKIF_PROTOCOL_X86_32:
 	{
 		blkif_x86_32_sring_t *sring_x86_32;
 		sring_x86_32 = (blkif_x86_32_sring_t *)xbb->ring_config.va;
 		BACK_RING_INIT(&xbb->rings.x86_32, sring_x86_32,
 			       xbb->ring_config.ring_pages * PAGE_SIZE);
 		break;
 	}
 	case BLKIF_PROTOCOL_X86_64:
 	{
 		blkif_x86_64_sring_t *sring_x86_64;
 		sring_x86_64 = (blkif_x86_64_sring_t *)xbb->ring_config.va;
 		BACK_RING_INIT(&xbb->rings.x86_64, sring_x86_64,
 			       xbb->ring_config.ring_pages * PAGE_SIZE);
 		break;
 	}
 	default:
 		panic("Unexpected blkif protocol ABI.");
 	}
 
 	xbb->flags |= XBBF_RING_CONNECTED;
 
 	error = xen_intr_bind_remote_port(xbb->dev,
 					  xbb->otherend_id,
 					  xbb->ring_config.evtchn,
 					  xbb_filter,
 					  /*ithread_handler*/NULL,
 					  /*arg*/xbb,
 					  INTR_TYPE_BIO | INTR_MPSAFE,
 					  &xbb->xen_intr_handle);
 	if (error) {
 		(void)xbb_disconnect(xbb);
 		xenbus_dev_fatal(xbb->dev, error, "binding event channel");
 		return (error);
 	}
 
 	DPRINTF("rings connected!\n");
 
 	return 0;
 }
 
 /* Needed to make bit_alloc() macro work */
 #define	calloc(count, size) malloc((count)*(size), M_XENBLOCKBACK,	\
 				   M_NOWAIT|M_ZERO);
 
 /**
  * Size KVA and pseudo-physical address allocations based on negotiated
  * values for the size and number of I/O requests, and the size of our
  * communication ring.
  *
  * \param xbb  Per-instance xbb configuration structure.
  *
  * These address spaces are used to dynamically map pages in the
  * front-end's domain into our own.
  */
 static int
 xbb_alloc_communication_mem(struct xbb_softc *xbb)
 {
 	xbb->reqlist_kva_pages = xbb->max_requests * xbb->max_request_segments;
 	xbb->reqlist_kva_size = xbb->reqlist_kva_pages * PAGE_SIZE;
 	xbb->kva_size = xbb->reqlist_kva_size +
 			(xbb->ring_config.ring_pages * PAGE_SIZE);
 
 	xbb->kva_free = bit_alloc(xbb->reqlist_kva_pages);
 	if (xbb->kva_free == NULL)
 		return (ENOMEM);
 
 	DPRINTF("%s: kva_size = %d, reqlist_kva_size = %d\n",
 		device_get_nameunit(xbb->dev), xbb->kva_size,
 		xbb->reqlist_kva_size);
-#ifndef XENHVM
-	xbb->kva = kva_alloc(xbb->kva_size);
-	if (xbb->kva == 0)
-		return (ENOMEM);
-	xbb->gnt_base_addr = xbb->kva;
-#else /* XENHVM */
 	/*
 	 * Reserve a range of pseudo physical memory that we can map
 	 * into kva.  These pages will only be backed by machine
 	 * pages ("real memory") during the lifetime of front-end requests
 	 * via grant table operations.
 	 */
 	xbb->pseudo_phys_res_id = 0;
 	xbb->pseudo_phys_res = bus_alloc_resource(xbb->dev, SYS_RES_MEMORY,
 						  &xbb->pseudo_phys_res_id,
 						  0, ~0, xbb->kva_size,
 						  RF_ACTIVE);
 	if (xbb->pseudo_phys_res == NULL) {
 		xbb->kva = 0;
 		return (ENOMEM);
 	}
 	xbb->kva = (vm_offset_t)rman_get_virtual(xbb->pseudo_phys_res);
 	xbb->gnt_base_addr = rman_get_start(xbb->pseudo_phys_res);
-#endif /* XENHVM */
 
 	DPRINTF("%s: kva: %#jx, gnt_base_addr: %#jx\n",
 		device_get_nameunit(xbb->dev), (uintmax_t)xbb->kva,
 		(uintmax_t)xbb->gnt_base_addr); 
 	return (0);
 }
 
 /**
  * Collect front-end information from the XenStore.
  *
  * \param xbb  Per-instance xbb configuration structure.
  */
 static int
 xbb_collect_frontend_info(struct xbb_softc *xbb)
 {
 	char	    protocol_abi[64];
 	const char *otherend_path;
 	int	    error;
 	u_int	    ring_idx;
 	u_int	    ring_page_order;
 	size_t	    ring_size;
 
 	otherend_path = xenbus_get_otherend_path(xbb->dev);
 
 	/*
 	 * Protocol defaults valid even if all negotiation fails.
 	 */
 	xbb->ring_config.ring_pages = 1;
 	xbb->max_request_segments   = BLKIF_MAX_SEGMENTS_PER_HEADER_BLOCK;
 	xbb->max_request_size	    = xbb->max_request_segments * PAGE_SIZE;
 
 	/*
 	 * Mandatory data (used in all versions of the protocol) first.
 	 */
 	error = xs_scanf(XST_NIL, otherend_path,
 			 "event-channel", NULL, "%" PRIu32,
 			 &xbb->ring_config.evtchn);
 	if (error != 0) {
 		xenbus_dev_fatal(xbb->dev, error,
 				 "Unable to retrieve event-channel information "
 				 "from frontend %s.  Unable to connect.",
 				 xenbus_get_otherend_path(xbb->dev));
 		return (error);
 	}
 
 	/*
 	 * These fields are initialized to legacy protocol defaults
 	 * so we only need to fail if reading the updated value succeeds
 	 * and the new value is outside of its allowed range.
 	 *
 	 * \note xs_gather() returns on the first encountered error, so
 	 *       we must use independant calls in order to guarantee
 	 *       we don't miss information in a sparsly populated front-end
 	 *       tree.
 	 *
 	 * \note xs_scanf() does not update variables for unmatched
 	 *       fields.
 	 */
 	ring_page_order = 0;
 	(void)xs_scanf(XST_NIL, otherend_path,
 		       "ring-page-order", NULL, "%u",
 		       &ring_page_order);
 	xbb->ring_config.ring_pages = 1 << ring_page_order;
 	(void)xs_scanf(XST_NIL, otherend_path,
 		       "num-ring-pages", NULL, "%u",
 		       &xbb->ring_config.ring_pages);
 	ring_size = PAGE_SIZE * xbb->ring_config.ring_pages;
 	xbb->max_requests = BLKIF_MAX_RING_REQUESTS(ring_size);
 
 	(void)xs_scanf(XST_NIL, otherend_path,
 		       "max-requests", NULL, "%u",
 		       &xbb->max_requests);
 
 	(void)xs_scanf(XST_NIL, otherend_path,
 		       "max-request-segments", NULL, "%u",
 		       &xbb->max_request_segments);
 
 	(void)xs_scanf(XST_NIL, otherend_path,
 		       "max-request-size", NULL, "%u",
 		       &xbb->max_request_size);
 
 	if (xbb->ring_config.ring_pages	> XBB_MAX_RING_PAGES) {
 		xenbus_dev_fatal(xbb->dev, EINVAL,
 				 "Front-end specified ring-pages of %u "
 				 "exceeds backend limit of %zu.  "
 				 "Unable to connect.",
 				 xbb->ring_config.ring_pages,
 				 XBB_MAX_RING_PAGES);
 		return (EINVAL);
 	} else if (xbb->max_requests > XBB_MAX_REQUESTS) {
 		xenbus_dev_fatal(xbb->dev, EINVAL,
 				 "Front-end specified max_requests of %u "
 				 "exceeds backend limit of %u.  "
 				 "Unable to connect.",
 				 xbb->max_requests,
 				 XBB_MAX_REQUESTS);
 		return (EINVAL);
 	} else if (xbb->max_request_segments > XBB_MAX_SEGMENTS_PER_REQUEST) {
 		xenbus_dev_fatal(xbb->dev, EINVAL,
 				 "Front-end specified max_requests_segments "
 				 "of %u exceeds backend limit of %u.  "
 				 "Unable to connect.",
 				 xbb->max_request_segments,
 				 XBB_MAX_SEGMENTS_PER_REQUEST);
 		return (EINVAL);
 	} else if (xbb->max_request_size > XBB_MAX_REQUEST_SIZE) {
 		xenbus_dev_fatal(xbb->dev, EINVAL,
 				 "Front-end specified max_request_size "
 				 "of %u exceeds backend limit of %u.  "
 				 "Unable to connect.",
 				 xbb->max_request_size,
 				 XBB_MAX_REQUEST_SIZE);
 		return (EINVAL);
 	}
 
 	if (xbb->ring_config.ring_pages	== 1) {
 		error = xs_gather(XST_NIL, otherend_path,
 				  "ring-ref", "%" PRIu32,
 				  &xbb->ring_config.ring_ref[0],
 				  NULL);
 		if (error != 0) {
 			xenbus_dev_fatal(xbb->dev, error,
 					 "Unable to retrieve ring information "
 					 "from frontend %s.  Unable to "
 					 "connect.",
 					 xenbus_get_otherend_path(xbb->dev));
 			return (error);
 		}
 	} else {
 		/* Multi-page ring format. */
 		for (ring_idx = 0; ring_idx < xbb->ring_config.ring_pages;
 		     ring_idx++) {
 			char ring_ref_name[]= "ring_refXX";
 
 			snprintf(ring_ref_name, sizeof(ring_ref_name),
 				 "ring-ref%u", ring_idx);
 			error = xs_scanf(XST_NIL, otherend_path,
 					 ring_ref_name, NULL, "%" PRIu32,
 					 &xbb->ring_config.ring_ref[ring_idx]);
 			if (error != 0) {
 				xenbus_dev_fatal(xbb->dev, error,
 						 "Failed to retriev grant "
 						 "reference for page %u of "
 						 "shared ring.  Unable "
 						 "to connect.", ring_idx);
 				return (error);
 			}
 		}
 	}
 
 	error = xs_gather(XST_NIL, otherend_path,
 			  "protocol", "%63s", protocol_abi,
 			  NULL); 
 	if (error != 0
 	 || !strcmp(protocol_abi, XEN_IO_PROTO_ABI_NATIVE)) {
 		/*
 		 * Assume native if the frontend has not
 		 * published ABI data or it has published and
 		 * matches our own ABI.
 		 */
 		xbb->abi = BLKIF_PROTOCOL_NATIVE;
 	} else if (!strcmp(protocol_abi, XEN_IO_PROTO_ABI_X86_32)) {
 
 		xbb->abi = BLKIF_PROTOCOL_X86_32;
 	} else if (!strcmp(protocol_abi, XEN_IO_PROTO_ABI_X86_64)) {
 
 		xbb->abi = BLKIF_PROTOCOL_X86_64;
 	} else {
 
 		xenbus_dev_fatal(xbb->dev, EINVAL,
 				 "Unknown protocol ABI (%s) published by "
 				 "frontend.  Unable to connect.", protocol_abi);
 		return (EINVAL);
 	}
 	return (0);
 }
 
 /**
  * Allocate per-request data structures given request size and number
  * information negotiated with the front-end.
  *
  * \param xbb  Per-instance xbb configuration structure.
  */
 static int
 xbb_alloc_requests(struct xbb_softc *xbb)
 {
 	struct xbb_xen_req *req;
 	struct xbb_xen_req *last_req;
 
 	/*
 	 * Allocate request book keeping datastructures.
 	 */
 	xbb->requests = malloc(xbb->max_requests * sizeof(*xbb->requests),
 			       M_XENBLOCKBACK, M_NOWAIT|M_ZERO);
 	if (xbb->requests == NULL) {
 		xenbus_dev_fatal(xbb->dev, ENOMEM, 
 				  "Unable to allocate request structures");
 		return (ENOMEM);
 	}
 
 	req      = xbb->requests;
 	last_req = &xbb->requests[xbb->max_requests - 1];
 	STAILQ_INIT(&xbb->request_free_stailq);
 	while (req <= last_req) {
 		STAILQ_INSERT_TAIL(&xbb->request_free_stailq, req, links);
 		req++;
 	}
 	return (0);
 }
 
 static int
 xbb_alloc_request_lists(struct xbb_softc *xbb)
 {
 	struct xbb_xen_reqlist *reqlist;
 	int			i;
 
 	/*
 	 * If no requests can be merged, we need 1 request list per
 	 * in flight request.
 	 */
 	xbb->request_lists = malloc(xbb->max_requests *
 		sizeof(*xbb->request_lists), M_XENBLOCKBACK, M_NOWAIT|M_ZERO);
 	if (xbb->request_lists == NULL) {
 		xenbus_dev_fatal(xbb->dev, ENOMEM, 
 				  "Unable to allocate request list structures");
 		return (ENOMEM);
 	}
 
 	STAILQ_INIT(&xbb->reqlist_free_stailq);
 	STAILQ_INIT(&xbb->reqlist_pending_stailq);
 	for (i = 0; i < xbb->max_requests; i++) {
 		int seg;
 
 		reqlist      = &xbb->request_lists[i];
 
 		reqlist->xbb = xbb;
 
 #ifdef XBB_USE_BOUNCE_BUFFERS
 		reqlist->bounce = malloc(xbb->max_reqlist_size,
 					 M_XENBLOCKBACK, M_NOWAIT);
 		if (reqlist->bounce == NULL) {
 			xenbus_dev_fatal(xbb->dev, ENOMEM, 
 					 "Unable to allocate request "
 					 "bounce buffers");
 			return (ENOMEM);
 		}
 #endif /* XBB_USE_BOUNCE_BUFFERS */
 
 		reqlist->gnt_handles = malloc(xbb->max_reqlist_segments *
 					      sizeof(*reqlist->gnt_handles),
 					      M_XENBLOCKBACK, M_NOWAIT|M_ZERO);
 		if (reqlist->gnt_handles == NULL) {
 			xenbus_dev_fatal(xbb->dev, ENOMEM,
 					  "Unable to allocate request "
 					  "grant references");
 			return (ENOMEM);
 		}
 
 		for (seg = 0; seg < xbb->max_reqlist_segments; seg++)
 			reqlist->gnt_handles[seg] = GRANT_REF_INVALID;
 
 		STAILQ_INSERT_TAIL(&xbb->reqlist_free_stailq, reqlist, links);
 	}
 	return (0);
 }
 
 /**
  * Supply information about the physical device to the frontend
  * via XenBus.
  *
  * \param xbb  Per-instance xbb configuration structure.
  */
 static int
 xbb_publish_backend_info(struct xbb_softc *xbb)
 {
 	struct xs_transaction xst;
 	const char	     *our_path;
 	const char	     *leaf;
 	int		      error;
 
 	our_path = xenbus_get_node(xbb->dev);
 	while (1) {
 		error = xs_transaction_start(&xst);
 		if (error != 0) {
 			xenbus_dev_fatal(xbb->dev, error,
 					 "Error publishing backend info "
 					 "(start transaction)");
 			return (error);
 		}
 
 		leaf = "sectors";
 		error = xs_printf(xst, our_path, leaf,
 				  "%"PRIu64, xbb->media_num_sectors);
 		if (error != 0)
 			break;
 
 		/* XXX Support all VBD attributes here. */
 		leaf = "info";
 		error = xs_printf(xst, our_path, leaf, "%u",
 				  xbb->flags & XBBF_READ_ONLY
 				? VDISK_READONLY : 0);
 		if (error != 0)
 			break;
 
 		leaf = "sector-size";
 		error = xs_printf(xst, our_path, leaf, "%u",
 				  xbb->sector_size);
 		if (error != 0)
 			break;
 
 		error = xs_transaction_end(xst, 0);
 		if (error == 0) {
 			return (0);
 		} else if (error != EAGAIN) {
 			xenbus_dev_fatal(xbb->dev, error, "ending transaction");
 			return (error);
 		}
 	}
 
 	xenbus_dev_fatal(xbb->dev, error, "writing %s/%s",
 			our_path, leaf);
 	xs_transaction_end(xst, 1);
 	return (error);
 }
 
 /**
  * Connect to our blkfront peer now that it has completed publishing
  * its configuration into the XenStore.
  *
  * \param xbb  Per-instance xbb configuration structure.
  */
 static void
 xbb_connect(struct xbb_softc *xbb)
 {
 	int error;
 
 	if (xenbus_get_state(xbb->dev) == XenbusStateConnected)
 		return;
 
 	if (xbb_collect_frontend_info(xbb) != 0)
 		return;
 
 	xbb->flags &= ~XBBF_SHUTDOWN;
 
 	/*
 	 * We limit the maximum number of reqlist segments to the maximum
 	 * number of segments in the ring, or our absolute maximum,
 	 * whichever is smaller.
 	 */
 	xbb->max_reqlist_segments = MIN(xbb->max_request_segments *
 		xbb->max_requests, XBB_MAX_SEGMENTS_PER_REQLIST);
 
 	/*
 	 * The maximum size is simply a function of the number of segments
 	 * we can handle.
 	 */
 	xbb->max_reqlist_size = xbb->max_reqlist_segments * PAGE_SIZE;
 
 	/* Allocate resources whose size depends on front-end configuration. */
 	error = xbb_alloc_communication_mem(xbb);
 	if (error != 0) {
 		xenbus_dev_fatal(xbb->dev, error,
 				 "Unable to allocate communication memory");
 		return;
 	}
 
 	error = xbb_alloc_requests(xbb);
 	if (error != 0) {
 		/* Specific errors are reported by xbb_alloc_requests(). */
 		return;
 	}
 
 	error = xbb_alloc_request_lists(xbb);
 	if (error != 0) {
 		/* Specific errors are reported by xbb_alloc_request_lists(). */
 		return;
 	}
 
 	/*
 	 * Connect communication channel.
 	 */
 	error = xbb_connect_ring(xbb);
 	if (error != 0) {
 		/* Specific errors are reported by xbb_connect_ring(). */
 		return;
 	}
 	
 	if (xbb_publish_backend_info(xbb) != 0) {
 		/*
 		 * If we can't publish our data, we cannot participate
 		 * in this connection, and waiting for a front-end state
 		 * change will not help the situation.
 		 */
 		(void)xbb_disconnect(xbb);
 		return;
 	}
 
 	/* Ready for I/O. */
 	xenbus_set_state(xbb->dev, XenbusStateConnected);
 }
 
 /*-------------------------- Device Teardown Support -------------------------*/
 /**
  * Perform device shutdown functions.
  *
  * \param xbb  Per-instance xbb configuration structure.
  *
  * Mark this instance as shutting down, wait for any active I/O on the
  * backend device/file to drain, disconnect from the front-end, and notify
  * any waiters (e.g. a thread invoking our detach method) that detach can
  * now proceed.
  */
 static int
 xbb_shutdown(struct xbb_softc *xbb)
 {
 	XenbusState frontState;
 	int	    error;
 
 	DPRINTF("\n");
 
 	/*
 	 * Due to the need to drop our mutex during some
 	 * xenbus operations, it is possible for two threads
 	 * to attempt to close out shutdown processing at
 	 * the same time.  Tell the caller that hits this
 	 * race to try back later. 
 	 */
 	if ((xbb->flags & XBBF_IN_SHUTDOWN) != 0)
 		return (EAGAIN);
 
 	xbb->flags |= XBBF_IN_SHUTDOWN;
 	mtx_unlock(&xbb->lock);
 
 	if (xenbus_get_state(xbb->dev) < XenbusStateClosing)
 		xenbus_set_state(xbb->dev, XenbusStateClosing);
 
 	frontState = xenbus_get_otherend_state(xbb->dev);
 	mtx_lock(&xbb->lock);
 	xbb->flags &= ~XBBF_IN_SHUTDOWN;
 
 	/* The front can submit I/O until entering the closed state. */
 	if (frontState < XenbusStateClosed)
 		return (EAGAIN);
 
 	DPRINTF("\n");
 
 	/* Indicate shutdown is in progress. */
 	xbb->flags |= XBBF_SHUTDOWN;
 
 	/* Disconnect from the front-end. */
 	error = xbb_disconnect(xbb);
 	if (error != 0) {
 		/*
 		 * Requests still outstanding.  We'll be called again
 		 * once they complete.
 		 */
 		KASSERT(error == EAGAIN,
 			("%s: Unexpected xbb_disconnect() failure %d",
 			 __func__, error));
 
 		return (error);
 	}
 
 	DPRINTF("\n");
 
 	/* Indicate to xbb_detach() that is it safe to proceed. */
 	wakeup(xbb);
 
 	return (0);
 }
 
 /**
  * Report an attach time error to the console and Xen, and cleanup
  * this instance by forcing immediate detach processing.
  *
  * \param xbb  Per-instance xbb configuration structure.
  * \param err  Errno describing the error.
  * \param fmt  Printf style format and arguments
  */
 static void
 xbb_attach_failed(struct xbb_softc *xbb, int err, const char *fmt, ...)
 {
 	va_list ap;
 	va_list ap_hotplug;
 
 	va_start(ap, fmt);
 	va_copy(ap_hotplug, ap);
 	xs_vprintf(XST_NIL, xenbus_get_node(xbb->dev),
 		  "hotplug-error", fmt, ap_hotplug);
 	va_end(ap_hotplug);
 	xs_printf(XST_NIL, xenbus_get_node(xbb->dev),
 		  "hotplug-status", "error");
 
 	xenbus_dev_vfatal(xbb->dev, err, fmt, ap);
 	va_end(ap);
 
 	xs_printf(XST_NIL, xenbus_get_node(xbb->dev),
 		  "online", "0");
 	xbb_detach(xbb->dev);
 }
 
 /*---------------------------- NewBus Entrypoints ----------------------------*/
 /**
  * Inspect a XenBus device and claim it if is of the appropriate type.
  * 
  * \param dev  NewBus device object representing a candidate XenBus device.
  *
  * \return  0 for success, errno codes for failure.
  */
 static int
 xbb_probe(device_t dev)
 {
  
         if (!strcmp(xenbus_get_type(dev), "vbd")) {
                 device_set_desc(dev, "Backend Virtual Block Device");
                 device_quiet(dev);
                 return (0);
         }
 
         return (ENXIO);
 }
 
 /**
  * Setup sysctl variables to control various Block Back parameters.
  *
  * \param xbb  Xen Block Back softc.
  *
  */
 static void
 xbb_setup_sysctl(struct xbb_softc *xbb)
 {
 	struct sysctl_ctx_list *sysctl_ctx = NULL;
 	struct sysctl_oid      *sysctl_tree = NULL;
 	
 	sysctl_ctx = device_get_sysctl_ctx(xbb->dev);
 	if (sysctl_ctx == NULL)
 		return;
 
 	sysctl_tree = device_get_sysctl_tree(xbb->dev);
 	if (sysctl_tree == NULL)
 		return;
 
 	SYSCTL_ADD_INT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO,
 		       "disable_flush", CTLFLAG_RW, &xbb->disable_flush, 0,
 		       "fake the flush command");
 
 	SYSCTL_ADD_INT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO,
 		       "flush_interval", CTLFLAG_RW, &xbb->flush_interval, 0,
 		       "send a real flush for N flush requests");
 
 	SYSCTL_ADD_INT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO,
 		       "no_coalesce_reqs", CTLFLAG_RW, &xbb->no_coalesce_reqs,0,
 		       "Don't coalesce contiguous requests");
 
 	SYSCTL_ADD_UQUAD(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO,
 			 "reqs_received", CTLFLAG_RW, &xbb->reqs_received,
 			 "how many I/O requests we have received");
 
 	SYSCTL_ADD_UQUAD(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO,
 			 "reqs_completed", CTLFLAG_RW, &xbb->reqs_completed,
 			 "how many I/O requests have been completed");
 
 	SYSCTL_ADD_UQUAD(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO,
 			 "reqs_queued_for_completion", CTLFLAG_RW,
 			 &xbb->reqs_queued_for_completion,
 			 "how many I/O requests queued but not yet pushed");
 
 	SYSCTL_ADD_UQUAD(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO,
 			 "reqs_completed_with_error", CTLFLAG_RW,
 			 &xbb->reqs_completed_with_error,
 			 "how many I/O requests completed with error status");
 
 	SYSCTL_ADD_UQUAD(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO,
 			 "forced_dispatch", CTLFLAG_RW, &xbb->forced_dispatch,
 			 "how many I/O dispatches were forced");
 
 	SYSCTL_ADD_UQUAD(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO,
 			 "normal_dispatch", CTLFLAG_RW, &xbb->normal_dispatch,
 			 "how many I/O dispatches were normal");
 
 	SYSCTL_ADD_UQUAD(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO,
 			 "total_dispatch", CTLFLAG_RW, &xbb->total_dispatch,
 			 "total number of I/O dispatches");
 
 	SYSCTL_ADD_UQUAD(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO,
 			 "kva_shortages", CTLFLAG_RW, &xbb->kva_shortages,
 			 "how many times we have run out of KVA");
 
 	SYSCTL_ADD_UQUAD(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO,
 			 "request_shortages", CTLFLAG_RW,
 			 &xbb->request_shortages,
 			 "how many times we have run out of requests");
 
 	SYSCTL_ADD_UINT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO,
 		        "max_requests", CTLFLAG_RD, &xbb->max_requests, 0,
 		        "maximum outstanding requests (negotiated)");
 
 	SYSCTL_ADD_UINT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO,
 		        "max_request_segments", CTLFLAG_RD,
 		        &xbb->max_request_segments, 0,
 		        "maximum number of pages per requests (negotiated)");
 
 	SYSCTL_ADD_UINT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO,
 		        "max_request_size", CTLFLAG_RD,
 		        &xbb->max_request_size, 0,
 		        "maximum size in bytes of a request (negotiated)");
 
 	SYSCTL_ADD_UINT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO,
 		        "ring_pages", CTLFLAG_RD,
 		        &xbb->ring_config.ring_pages, 0,
 		        "communication channel pages (negotiated)");
 }
 
 /**
  * Attach to a XenBus device that has been claimed by our probe routine.
  *
  * \param dev  NewBus device object representing this Xen Block Back instance.
  *
  * \return  0 for success, errno codes for failure.
  */
 static int
 xbb_attach(device_t dev)
 {
 	struct xbb_softc	*xbb;
 	int			 error;
 	u_int			 max_ring_page_order;
 
 	DPRINTF("Attaching to %s\n", xenbus_get_node(dev));
 
 	/*
 	 * Basic initialization.
 	 * After this block it is safe to call xbb_detach()
 	 * to clean up any allocated data for this instance.
 	 */
 	xbb = device_get_softc(dev);
 	xbb->dev = dev;
 	xbb->otherend_id = xenbus_get_otherend_id(dev);
 	TASK_INIT(&xbb->io_task, /*priority*/0, xbb_run_queue, xbb);
 	mtx_init(&xbb->lock, device_get_nameunit(dev), NULL, MTX_DEF);
 
 	/*
 	 * Publish protocol capabilities for consumption by the
 	 * front-end.
 	 */
 	error = xs_printf(XST_NIL, xenbus_get_node(xbb->dev),
 			  "feature-barrier", "1");
 	if (error) {
 		xbb_attach_failed(xbb, error, "writing %s/feature-barrier",
 				  xenbus_get_node(xbb->dev));
 		return (error);
 	}
 
 	error = xs_printf(XST_NIL, xenbus_get_node(xbb->dev),
 			  "feature-flush-cache", "1");
 	if (error) {
 		xbb_attach_failed(xbb, error, "writing %s/feature-flush-cache",
 				  xenbus_get_node(xbb->dev));
 		return (error);
 	}
 
 	/*
 	 * Amazon EC2 client compatility.  They refer to max-ring-pages
 	 * instead of to max-ring-page-order.
 	 */
 	error = xs_printf(XST_NIL, xenbus_get_node(xbb->dev),
 			  "max-ring-pages", "%zu", XBB_MAX_RING_PAGES);
 	if (error) {
 		xbb_attach_failed(xbb, error, "writing %s/max-ring-pages",
 				  xenbus_get_node(xbb->dev));
 		return (error);
 	}
 
 	max_ring_page_order = flsl(XBB_MAX_RING_PAGES) - 1;
 	error = xs_printf(XST_NIL, xenbus_get_node(xbb->dev),
 			  "max-ring-page-order", "%u", max_ring_page_order);
 	if (error) {
 		xbb_attach_failed(xbb, error, "writing %s/max-ring-page-order",
 				  xenbus_get_node(xbb->dev));
 		return (error);
 	}
 
 	error = xs_printf(XST_NIL, xenbus_get_node(xbb->dev),
 			  "max-requests", "%u", XBB_MAX_REQUESTS);
 	if (error) {
 		xbb_attach_failed(xbb, error, "writing %s/max-requests",
 				  xenbus_get_node(xbb->dev));
 		return (error);
 	}
 
 	error = xs_printf(XST_NIL, xenbus_get_node(xbb->dev),
 			  "max-request-segments", "%u",
 			  XBB_MAX_SEGMENTS_PER_REQUEST);
 	if (error) {
 		xbb_attach_failed(xbb, error, "writing %s/max-request-segments",
 				  xenbus_get_node(xbb->dev));
 		return (error);
 	}
 
 	error = xs_printf(XST_NIL, xenbus_get_node(xbb->dev),
 			  "max-request-size", "%u",
 			  XBB_MAX_REQUEST_SIZE);
 	if (error) {
 		xbb_attach_failed(xbb, error, "writing %s/max-request-size",
 				  xenbus_get_node(xbb->dev));
 		return (error);
 	}
 
 	/* Collect physical device information. */
 	error = xs_gather(XST_NIL, xenbus_get_otherend_path(xbb->dev),
 			  "device-type", NULL, &xbb->dev_type,
 			  NULL);
 	if (error != 0)
 		xbb->dev_type = NULL;
 
 	error = xs_gather(XST_NIL, xenbus_get_node(dev),
                           "mode", NULL, &xbb->dev_mode,
 			  "params", NULL, &xbb->dev_name,
                           NULL);
 	if (error != 0) {
 		xbb_attach_failed(xbb, error, "reading backend fields at %s",
 				  xenbus_get_node(dev));
                 return (ENXIO);
         }
 
 	/* Parse fopen style mode flags. */
 	if (strchr(xbb->dev_mode, 'w') == NULL)
 		xbb->flags |= XBBF_READ_ONLY;
 
 	/*
 	 * Verify the physical device is present and can support
 	 * the desired I/O mode.
 	 */
 	DROP_GIANT();
 	error = xbb_open_backend(xbb);
 	PICKUP_GIANT();
 	if (error != 0) {
 		xbb_attach_failed(xbb, error, "Unable to open %s",
 				  xbb->dev_name);
 		return (ENXIO);
 	}
 
 	/* Use devstat(9) for recording statistics. */
 	xbb->xbb_stats = devstat_new_entry("xbb", device_get_unit(xbb->dev),
 					   xbb->sector_size,
 					   DEVSTAT_ALL_SUPPORTED,
 					   DEVSTAT_TYPE_DIRECT
 					 | DEVSTAT_TYPE_IF_OTHER,
 					   DEVSTAT_PRIORITY_OTHER);
 
 	xbb->xbb_stats_in = devstat_new_entry("xbbi", device_get_unit(xbb->dev),
 					      xbb->sector_size,
 					      DEVSTAT_ALL_SUPPORTED,
 					      DEVSTAT_TYPE_DIRECT
 					    | DEVSTAT_TYPE_IF_OTHER,
 					      DEVSTAT_PRIORITY_OTHER);
 	/*
 	 * Setup sysctl variables.
 	 */
 	xbb_setup_sysctl(xbb);
 
 	/*
 	 * Create a taskqueue for doing work that must occur from a
 	 * thread context.
 	 */
 	xbb->io_taskqueue = taskqueue_create_fast(device_get_nameunit(dev),
 						  M_NOWAIT,
 						  taskqueue_thread_enqueue,
 						  /*contxt*/&xbb->io_taskqueue);
 	if (xbb->io_taskqueue == NULL) {
 		xbb_attach_failed(xbb, error, "Unable to create taskqueue");
 		return (ENOMEM);
 	}
 
 	taskqueue_start_threads(&xbb->io_taskqueue,
 				/*num threads*/1,
 				/*priority*/PWAIT,
 				/*thread name*/
 				"%s taskq", device_get_nameunit(dev));
 
 	/* Update hot-plug status to satisfy xend. */
 	error = xs_printf(XST_NIL, xenbus_get_node(xbb->dev),
 			  "hotplug-status", "connected");
 	if (error) {
 		xbb_attach_failed(xbb, error, "writing %s/hotplug-status",
 				  xenbus_get_node(xbb->dev));
 		return (error);
 	}
 
 	/* Tell the front end that we are ready to connect. */
 	xenbus_set_state(dev, XenbusStateInitWait);
 
 	return (0);
 }
 
 /**
  * Detach from a block back device instance.
  *
  * \param dev  NewBus device object representing this Xen Block Back instance.
  *
  * \return  0 for success, errno codes for failure.
  * 
  * \note A block back device may be detached at any time in its life-cycle,
  *       including part way through the attach process.  For this reason,
  *       initialization order and the intialization state checks in this
  *       routine must be carefully coupled so that attach time failures
  *       are gracefully handled.
  */
 static int
 xbb_detach(device_t dev)
 {
         struct xbb_softc *xbb;
 
 	DPRINTF("\n");
 
         xbb = device_get_softc(dev);
 	mtx_lock(&xbb->lock);
 	while (xbb_shutdown(xbb) == EAGAIN) {
 		msleep(xbb, &xbb->lock, /*wakeup prio unchanged*/0,
 		       "xbb_shutdown", 0);
 	}
 	mtx_unlock(&xbb->lock);
 
 	DPRINTF("\n");
 
 	if (xbb->io_taskqueue != NULL)
 		taskqueue_free(xbb->io_taskqueue);
 
 	if (xbb->xbb_stats != NULL)
 		devstat_remove_entry(xbb->xbb_stats);
 
 	if (xbb->xbb_stats_in != NULL)
 		devstat_remove_entry(xbb->xbb_stats_in);
 
 	xbb_close_backend(xbb);
 
 	if (xbb->dev_mode != NULL) {
 		free(xbb->dev_mode, M_XENSTORE);
 		xbb->dev_mode = NULL;
 	}
 
 	if (xbb->dev_type != NULL) {
 		free(xbb->dev_type, M_XENSTORE);
 		xbb->dev_type = NULL;
 	}
 
 	if (xbb->dev_name != NULL) {
 		free(xbb->dev_name, M_XENSTORE);
 		xbb->dev_name = NULL;
 	}
 
 	mtx_destroy(&xbb->lock);
         return (0);
 }
 
 /**
  * Prepare this block back device for suspension of this VM.
  * 
  * \param dev  NewBus device object representing this Xen Block Back instance.
  *
  * \return  0 for success, errno codes for failure.
  */
 static int
 xbb_suspend(device_t dev)
 {
 #ifdef NOT_YET
         struct xbb_softc *sc = device_get_softc(dev);
 
         /* Prevent new requests being issued until we fix things up. */
         mtx_lock(&sc->xb_io_lock);
         sc->connected = BLKIF_STATE_SUSPENDED;
         mtx_unlock(&sc->xb_io_lock);
 #endif
 
         return (0);
 }
 
 /**
  * Perform any processing required to recover from a suspended state.
  * 
  * \param dev  NewBus device object representing this Xen Block Back instance.
  *
  * \return  0 for success, errno codes for failure.
  */
 static int
 xbb_resume(device_t dev)
 {
 	return (0);
 }
 
 /**
  * Handle state changes expressed via the XenStore by our front-end peer.
  *
  * \param dev             NewBus device object representing this Xen
  *                        Block Back instance.
  * \param frontend_state  The new state of the front-end.
  *
  * \return  0 for success, errno codes for failure.
  */
 static void
 xbb_frontend_changed(device_t dev, XenbusState frontend_state)
 {
 	struct xbb_softc *xbb = device_get_softc(dev);
 
 	DPRINTF("frontend_state=%s, xbb_state=%s\n",
 	        xenbus_strstate(frontend_state),
 		xenbus_strstate(xenbus_get_state(xbb->dev)));
 
 	switch (frontend_state) {
 	case XenbusStateInitialising:
 		break;
 	case XenbusStateInitialised:
 	case XenbusStateConnected:
 		xbb_connect(xbb);
 		break;
 	case XenbusStateClosing:
 	case XenbusStateClosed:
 		mtx_lock(&xbb->lock);
 		xbb_shutdown(xbb);
 		mtx_unlock(&xbb->lock);
 		if (frontend_state == XenbusStateClosed)
 			xenbus_set_state(xbb->dev, XenbusStateClosed);
 		break;
 	default:
 		xenbus_dev_fatal(xbb->dev, EINVAL, "saw state %d at frontend",
 				 frontend_state);
 		break;
 	}
 }
 
 /*---------------------------- NewBus Registration ---------------------------*/
 static device_method_t xbb_methods[] = {
 	/* Device interface */
 	DEVMETHOD(device_probe,		xbb_probe),
 	DEVMETHOD(device_attach,	xbb_attach),
 	DEVMETHOD(device_detach,	xbb_detach),
 	DEVMETHOD(device_shutdown,	bus_generic_shutdown),
 	DEVMETHOD(device_suspend,	xbb_suspend),
 	DEVMETHOD(device_resume,	xbb_resume),
 
 	/* Xenbus interface */
 	DEVMETHOD(xenbus_otherend_changed, xbb_frontend_changed),
 
 	{ 0, 0 }
 };
 
 static driver_t xbb_driver = {
         "xbbd",
         xbb_methods,
         sizeof(struct xbb_softc),
 };
 devclass_t xbb_devclass;
 
 DRIVER_MODULE(xbbd, xenbusb_back, xbb_driver, xbb_devclass, 0, 0);
Index: head/sys/dev/xen/control/control.c
===================================================================
--- head/sys/dev/xen/control/control.c	(revision 282273)
+++ head/sys/dev/xen/control/control.c	(revision 282274)
@@ -1,567 +1,437 @@
 /*-
  * Copyright (c) 2010 Justin T. Gibbs, Spectra Logic Corporation
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions, and the following disclaimer,
  *    without modification.
  * 2. Redistributions in binary form must reproduce at minimum a disclaimer
  *    substantially similar to the "NO WARRANTY" disclaimer below
  *    ("Disclaimer") and any redistribution must be conditioned upon
  *    including a substantially similar Disclaimer requirement for further
  *    binary redistribution.
  *
  * NO WARRANTY
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  * POSSIBILITY OF SUCH DAMAGES.
  */
 
 /*-
  * PV suspend/resume support:
  *
  * Copyright (c) 2004 Christian Limpach.
  * Copyright (c) 2004-2006,2008 Kip Macy
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *      This product includes software developed by Christian Limpach.
  * 4. The name of the author may not be used to endorse or promote products
  *    derived from this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 /*-
  * HVM suspend/resume support:
  *
  * Copyright (c) 2008 Citrix Systems, Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 /**
  * \file control.c
  *
  * \brief Device driver to repond to control domain events that impact
  *        this VM.
  */
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/malloc.h>
 
 #include <sys/bio.h>
 #include <sys/bus.h>
 #include <sys/conf.h>
 #include <sys/disk.h>
 #include <sys/fcntl.h>
 #include <sys/filedesc.h>
 #include <sys/kdb.h>
 #include <sys/module.h>
 #include <sys/namei.h>
 #include <sys/proc.h>
 #include <sys/reboot.h>
 #include <sys/rman.h>
 #include <sys/sched.h>
 #include <sys/taskqueue.h>
 #include <sys/types.h>
 #include <sys/vnode.h>
 #include <sys/sched.h>
 #include <sys/smp.h>
 #include <sys/eventhandler.h>
 
 #include <geom/geom.h>
 
 #include <machine/_inttypes.h>
 #include <machine/intr_machdep.h>
 
 #include <vm/vm.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_kern.h>
 
 #include <xen/xen-os.h>
 #include <xen/blkif.h>
 #include <xen/evtchn.h>
 #include <xen/gnttab.h>
 #include <xen/xen_intr.h>
 
-#ifdef XENHVM
 #include <xen/hvm.h>
-#endif
 
 #include <xen/interface/event_channel.h>
 #include <xen/interface/grant_table.h>
 
 #include <xen/xenbus/xenbusvar.h>
 
 #include <machine/xen/xenvar.h>
 #include <machine/xen/xenfunc.h>
 
 /*--------------------------- Forward Declarations --------------------------*/
 /** Function signature for shutdown event handlers. */
 typedef	void (xctrl_shutdown_handler_t)(void);
 
 static xctrl_shutdown_handler_t xctrl_poweroff;
 static xctrl_shutdown_handler_t xctrl_reboot;
 static xctrl_shutdown_handler_t xctrl_suspend;
 static xctrl_shutdown_handler_t xctrl_crash;
 
 /*-------------------------- Private Data Structures -------------------------*/
 /** Element type for lookup table of event name to handler. */
 struct xctrl_shutdown_reason {
 	const char		 *name;
 	xctrl_shutdown_handler_t *handler;
 };
 
 /** Lookup table for shutdown event name to handler. */
 static const struct xctrl_shutdown_reason xctrl_shutdown_reasons[] = {
 	{ "poweroff", xctrl_poweroff },
 	{ "reboot",   xctrl_reboot   },
 	{ "suspend",  xctrl_suspend  },
 	{ "crash",    xctrl_crash    },
 	{ "halt",     xctrl_poweroff },
 };
 
 struct xctrl_softc {
 	struct xs_watch    xctrl_watch;	
 };
 
 /*------------------------------ Event Handlers ------------------------------*/
 static void
 xctrl_poweroff()
 {
 	shutdown_nice(RB_POWEROFF|RB_HALT);
 }
 
 static void
 xctrl_reboot()
 {
 	shutdown_nice(0);
 }
 
-#ifndef XENHVM
-extern void xencons_suspend(void);
-extern void xencons_resume(void);
-
-/* Full PV mode suspension. */
 static void
 xctrl_suspend()
 {
-	int i, j, k, fpp, suspend_cancelled;
-	unsigned long max_pfn, start_info_mfn;
-
-	EVENTHANDLER_INVOKE(power_suspend);
-
 #ifdef SMP
-	struct thread *td;
-	cpuset_t map;
-	u_int cpuid;
-
-	/*
-	 * Bind us to CPU 0 and stop any other VCPUs.
-	 */
-	td = curthread;
-	thread_lock(td);
-	sched_bind(td, 0);
-	thread_unlock(td);
-	cpuid = PCPU_GET(cpuid);
-	KASSERT(cpuid == 0, ("xen_suspend: not running on cpu 0"));
-
-	map = all_cpus;
-	CPU_CLR(cpuid, &map);
-	CPU_NAND(&map, &stopped_cpus);
-	if (!CPU_EMPTY(&map))
-		stop_cpus(map);
-#endif
-
-	/*
-	 * Be sure to hold Giant across DEVICE_SUSPEND/RESUME since non-MPSAFE
-	 * drivers need this.
-	 */
-	mtx_lock(&Giant);
-	if (DEVICE_SUSPEND(root_bus) != 0) {
-		mtx_unlock(&Giant);
-		printf("%s: device_suspend failed\n", __func__);
-#ifdef SMP
-		if (!CPU_EMPTY(&map))
-			restart_cpus(map);
-#endif
-		return;
-	}
-	mtx_unlock(&Giant);
-
-	local_irq_disable();
-
-	xencons_suspend();
-	gnttab_suspend();
-	intr_suspend();
-
-	max_pfn = HYPERVISOR_shared_info->arch.max_pfn;
-
-	void *shared_info = HYPERVISOR_shared_info;
-	HYPERVISOR_shared_info = NULL;
-	pmap_kremove((vm_offset_t) shared_info);
-	PT_UPDATES_FLUSH();
-
-	xen_start_info->store_mfn = MFNTOPFN(xen_start_info->store_mfn);
-	xen_start_info->console.domU.mfn = MFNTOPFN(xen_start_info->console.domU.mfn);
-
-	/*
-	 * We'll stop somewhere inside this hypercall. When it returns,
-	 * we'll start resuming after the restore.
-	 */
-	start_info_mfn = VTOMFN(xen_start_info);
-	pmap_suspend();
-	suspend_cancelled = HYPERVISOR_suspend(start_info_mfn);
-	pmap_resume();
-
-	pmap_kenter_ma((vm_offset_t) shared_info, xen_start_info->shared_info);
-	HYPERVISOR_shared_info = shared_info;
-
-	HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list =
-		VTOMFN(xen_pfn_to_mfn_frame_list_list);
-  
-	fpp = PAGE_SIZE/sizeof(unsigned long);
-	for (i = 0, j = 0, k = -1; i < max_pfn; i += fpp, j++) {
-		if ((j % fpp) == 0) {
-			k++;
-			xen_pfn_to_mfn_frame_list_list[k] = 
-				VTOMFN(xen_pfn_to_mfn_frame_list[k]);
-			j = 0;
-		}
-		xen_pfn_to_mfn_frame_list[k][j] = 
-			VTOMFN(&xen_phys_machine[i]);
-	}
-	HYPERVISOR_shared_info->arch.max_pfn = max_pfn;
-
-	gnttab_resume(NULL);
-	intr_resume(suspend_cancelled != 0);
-	local_irq_enable();
-	xencons_resume();
-
-#ifdef CONFIG_SMP
-	for_each_cpu(i)
-		vcpu_prepare(i);
-
-#endif
-
-	/* 
-	 * Only resume xenbus /after/ we've prepared our VCPUs; otherwise
-	 * the VCPU hotplug callback can race with our vcpu_prepare
-	 */
-	mtx_lock(&Giant);
-	DEVICE_RESUME(root_bus);
-	mtx_unlock(&Giant);
-
-#ifdef SMP
-	thread_lock(curthread);
-	sched_unbind(curthread);
-	thread_unlock(curthread);
-	if (!CPU_EMPTY(&map))
-		restart_cpus(map);
-#endif
-	EVENTHANDLER_INVOKE(power_resume);
-}
-
-#else
-
-/* HVM mode suspension. */
-static void
-xctrl_suspend()
-{
-#ifdef SMP
 	cpuset_t cpu_suspend_map;
 #endif
 	int suspend_cancelled;
 
 	EVENTHANDLER_INVOKE(power_suspend);
 
 	if (smp_started) {
 		thread_lock(curthread);
 		sched_bind(curthread, 0);
 		thread_unlock(curthread);
 	}
 	KASSERT((PCPU_GET(cpuid) == 0), ("Not running on CPU#0"));
 
 	/*
 	 * Clear our XenStore node so the toolstack knows we are
 	 * responding to the suspend request.
 	 */
 	xs_write(XST_NIL, "control", "shutdown", "");
 
 	/*
 	 * Be sure to hold Giant across DEVICE_SUSPEND/RESUME since non-MPSAFE
 	 * drivers need this.
 	 */
 	mtx_lock(&Giant);
 	if (DEVICE_SUSPEND(root_bus) != 0) {
 		mtx_unlock(&Giant);
 		printf("%s: device_suspend failed\n", __func__);
 		return;
 	}
 	mtx_unlock(&Giant);
 
 #ifdef SMP
 	CPU_ZERO(&cpu_suspend_map);	/* silence gcc */
 	if (smp_started) {
 		/*
 		 * Suspend other CPUs. This prevents IPIs while we
 		 * are resuming, and will allow us to reset per-cpu
 		 * vcpu_info on resume.
 		 */
 		cpu_suspend_map = all_cpus;
 		CPU_CLR(PCPU_GET(cpuid), &cpu_suspend_map);
 		if (!CPU_EMPTY(&cpu_suspend_map))
 			suspend_cpus(cpu_suspend_map);
 	}
 #endif
 
 	/*
 	 * Prevent any races with evtchn_interrupt() handler.
 	 */
 	disable_intr();
 	intr_suspend();
 	xen_hvm_suspend();
 
 	suspend_cancelled = HYPERVISOR_suspend(0);
 
 	xen_hvm_resume(suspend_cancelled != 0);
 	intr_resume(suspend_cancelled != 0);
 	enable_intr();
 
 	/*
 	 * Reset grant table info.
 	 */
 	gnttab_resume(NULL);
 
 #ifdef SMP
 	if (smp_started && !CPU_EMPTY(&cpu_suspend_map)) {
 		/*
 		 * Now that event channels have been initialized,
 		 * resume CPUs.
 		 */
 		resume_cpus(cpu_suspend_map);
 	}
 #endif
 
 	/*
 	 * FreeBSD really needs to add DEVICE_SUSPEND_CANCEL or
 	 * similar.
 	 */
 	mtx_lock(&Giant);
 	DEVICE_RESUME(root_bus);
 	mtx_unlock(&Giant);
 
 	if (smp_started) {
 		thread_lock(curthread);
 		sched_unbind(curthread);
 		thread_unlock(curthread);
 	}
 
 	EVENTHANDLER_INVOKE(power_resume);
 
 	if (bootverbose)
 		printf("System resumed after suspension\n");
 
 }
-#endif
 
 static void
 xctrl_crash()
 {
 	panic("Xen directed crash");
 }
 
 static void
 xen_pv_shutdown_final(void *arg, int howto)
 {
 	/*
 	 * Inform the hypervisor that shutdown is complete.
 	 * This is not necessary in HVM domains since Xen
 	 * emulates ACPI in that mode and FreeBSD's ACPI
 	 * support will request this transition.
 	 */
 	if (howto & (RB_HALT | RB_POWEROFF))
 		HYPERVISOR_shutdown(SHUTDOWN_poweroff);
 	else
 		HYPERVISOR_shutdown(SHUTDOWN_reboot);
 }
 
 /*------------------------------ Event Reception -----------------------------*/
 static void
 xctrl_on_watch_event(struct xs_watch *watch, const char **vec, unsigned int len)
 {
 	const struct xctrl_shutdown_reason *reason;
 	const struct xctrl_shutdown_reason *last_reason;
 	char *result;
 	int   error;
 	int   result_len;
 	
 	error = xs_read(XST_NIL, "control", "shutdown",
 			&result_len, (void **)&result);
 	if (error != 0)
 		return;
 
 	reason = xctrl_shutdown_reasons;
 	last_reason = reason + nitems(xctrl_shutdown_reasons);
 	while (reason < last_reason) {
 
 		if (!strcmp(result, reason->name)) {
 			reason->handler();
 			break;
 		}
 		reason++;
 	}
 
 	free(result, M_XENSTORE);
 }
 
 /*------------------ Private Device Attachment Functions  --------------------*/
 /**
  * \brief Identify instances of this device type in the system.
  *
  * \param driver  The driver performing this identify action.
  * \param parent  The NewBus parent device for any devices this method adds.
  */
 static void
 xctrl_identify(driver_t *driver __unused, device_t parent)
 {
 	/*
 	 * A single device instance for our driver is always present
 	 * in a system operating under Xen.
 	 */
 	BUS_ADD_CHILD(parent, 0, driver->name, 0);
 }
 
 /**
  * \brief Probe for the existance of the Xen Control device
  *
  * \param dev  NewBus device_t for this Xen control instance.
  *
  * \return  Always returns 0 indicating success.
  */
 static int 
 xctrl_probe(device_t dev)
 {
 	device_set_desc(dev, "Xen Control Device");
 
 	return (0);
 }
 
 /**
  * \brief Attach the Xen control device.
  *
  * \param dev  NewBus device_t for this Xen control instance.
  *
  * \return  On success, 0. Otherwise an errno value indicating the
  *          type of failure.
  */
 static int
 xctrl_attach(device_t dev)
 {
 	struct xctrl_softc *xctrl;
 
 	xctrl = device_get_softc(dev);
 
 	/* Activate watch */
 	xctrl->xctrl_watch.node = "control/shutdown";
 	xctrl->xctrl_watch.callback = xctrl_on_watch_event;
 	xctrl->xctrl_watch.callback_data = (uintptr_t)xctrl;
 	xs_register_watch(&xctrl->xctrl_watch);
 
 	if (xen_pv_domain())
 		EVENTHANDLER_REGISTER(shutdown_final, xen_pv_shutdown_final, NULL,
 		                      SHUTDOWN_PRI_LAST);
 
 	return (0);
 }
 
 /**
  * \brief Detach the Xen control device.
  *
  * \param dev  NewBus device_t for this Xen control device instance.
  *
  * \return  On success, 0. Otherwise an errno value indicating the
  *          type of failure.
  */
 static int
 xctrl_detach(device_t dev)
 {
 	struct xctrl_softc *xctrl;
 
 	xctrl = device_get_softc(dev);
 
 	/* Release watch */
 	xs_unregister_watch(&xctrl->xctrl_watch);
 
 	return (0);
 }
 
 /*-------------------- Private Device Attachment Data  -----------------------*/
 static device_method_t xctrl_methods[] = { 
 	/* Device interface */ 
 	DEVMETHOD(device_identify,	xctrl_identify),
 	DEVMETHOD(device_probe,         xctrl_probe), 
 	DEVMETHOD(device_attach,        xctrl_attach), 
 	DEVMETHOD(device_detach,        xctrl_detach), 
  
 	DEVMETHOD_END
 }; 
 
 DEFINE_CLASS_0(xctrl, xctrl_driver, xctrl_methods, sizeof(struct xctrl_softc));
 devclass_t xctrl_devclass; 
  
 DRIVER_MODULE(xctrl, xenstore, xctrl_driver, xctrl_devclass, NULL, NULL);
Index: head/sys/dev/xen/grant_table/grant_table.c
===================================================================
--- head/sys/dev/xen/grant_table/grant_table.c	(revision 282273)
+++ head/sys/dev/xen/grant_table/grant_table.c	(revision 282274)
@@ -1,776 +1,706 @@
 /******************************************************************************
  * gnttab.c
  * 
  * Two sets of functionality:
  * 1. Granting foreign access to our memory reservation.
  * 2. Accessing others' memory reservations via grant references.
  * (i.e., mechanisms for both sender and recipient of grant references)
  * 
  * Copyright (c) 2005, Christopher Clark
  * Copyright (c) 2004, K A Fraser
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_pmap.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/bus.h>
 #include <sys/conf.h>
 #include <sys/module.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mman.h>
 #include <sys/limits.h>
 #include <sys/rman.h>
 #include <machine/resource.h>
 
 #include <xen/xen-os.h>
 #include <xen/hypervisor.h>
 #include <machine/xen/synch_bitops.h>
 
 #include <xen/hypervisor.h>
 #include <xen/gnttab.h>
 
 #include <vm/vm.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_extern.h>
 #include <vm/pmap.h>
 
 #define cmpxchg(a, b, c) atomic_cmpset_int((volatile u_int *)(a),(b),(c))
 
 /* External tools reserve first few grant table entries. */
 #define NR_RESERVED_ENTRIES 8
 #define GREFS_PER_GRANT_FRAME (PAGE_SIZE / sizeof(grant_entry_t))
 
 static grant_ref_t **gnttab_list;
 static unsigned int nr_grant_frames;
 static unsigned int boot_max_nr_grant_frames;
 static int gnttab_free_count;
 static grant_ref_t gnttab_free_head;
 static struct mtx gnttab_list_lock;
 
-#ifdef XENHVM
 /*
  * Resource representing allocated physical address space
  * for the grant table metainfo
  */
 static struct resource *gnttab_pseudo_phys_res;
 
 /* Resource id for allocated physical address space. */
 static int gnttab_pseudo_phys_res_id;
-#endif
 
 static grant_entry_t *shared;
 
 static struct gnttab_free_callback *gnttab_free_callback_list = NULL;
 
 static int gnttab_expand(unsigned int req_entries);
 
 #define RPP (PAGE_SIZE / sizeof(grant_ref_t))
 #define gnttab_entry(entry) (gnttab_list[(entry) / RPP][(entry) % RPP])
 
 static int
 get_free_entries(int count, int *entries)
 {
 	int ref, error;
 	grant_ref_t head;
 
 	mtx_lock(&gnttab_list_lock);
 	if ((gnttab_free_count < count) &&
 	    ((error = gnttab_expand(count - gnttab_free_count)) != 0)) {
 		mtx_unlock(&gnttab_list_lock);
 		return (error);
 	}
 	ref = head = gnttab_free_head;
 	gnttab_free_count -= count;
 	while (count-- > 1)
 		head = gnttab_entry(head);
 	gnttab_free_head = gnttab_entry(head);
 	gnttab_entry(head) = GNTTAB_LIST_END;
 	mtx_unlock(&gnttab_list_lock);
 
 	*entries = ref;
 	return (0);
 }
 
 static void
 do_free_callbacks(void)
 {
 	struct gnttab_free_callback *callback, *next;
 
 	callback = gnttab_free_callback_list;
 	gnttab_free_callback_list = NULL;
 
 	while (callback != NULL) {
 		next = callback->next;
 		if (gnttab_free_count >= callback->count) {
 			callback->next = NULL;
 			callback->fn(callback->arg);
 		} else {
 			callback->next = gnttab_free_callback_list;
 			gnttab_free_callback_list = callback;
 		}
 		callback = next;
 	}
 }
 
 static inline void
 check_free_callbacks(void)
 {
 	if (__predict_false(gnttab_free_callback_list != NULL))
 		do_free_callbacks();
 }
 
 static void
 put_free_entry(grant_ref_t ref)
 {
 
 	mtx_lock(&gnttab_list_lock);
 	gnttab_entry(ref) = gnttab_free_head;
 	gnttab_free_head = ref;
 	gnttab_free_count++;
 	check_free_callbacks();
 	mtx_unlock(&gnttab_list_lock);
 }
 
 /*
  * Public grant-issuing interface functions
  */
 
 int
 gnttab_grant_foreign_access(domid_t domid, unsigned long frame, int readonly,
 	grant_ref_t *result)
 {
 	int error, ref;
 
 	error = get_free_entries(1, &ref);
 
 	if (__predict_false(error))
 		return (error);
 
 	shared[ref].frame = frame;
 	shared[ref].domid = domid;
 	wmb();
 	shared[ref].flags = GTF_permit_access | (readonly ? GTF_readonly : 0);
 
 	if (result)
 		*result = ref;
 
 	return (0);
 }
 
 void
 gnttab_grant_foreign_access_ref(grant_ref_t ref, domid_t domid,
 				unsigned long frame, int readonly)
 {
 
 	shared[ref].frame = frame;
 	shared[ref].domid = domid;
 	wmb();
 	shared[ref].flags = GTF_permit_access | (readonly ? GTF_readonly : 0);
 }
 
 int
 gnttab_query_foreign_access(grant_ref_t ref)
 {
 	uint16_t nflags;
 
 	nflags = shared[ref].flags;
 
 	return (nflags & (GTF_reading|GTF_writing));
 }
 
 int
 gnttab_end_foreign_access_ref(grant_ref_t ref)
 {
 	uint16_t flags, nflags;
 
 	nflags = shared[ref].flags;
 	do {
 		if ( (flags = nflags) & (GTF_reading|GTF_writing) ) {
 			printf("%s: WARNING: g.e. still in use!\n", __func__);
 			return (0);
 		}
 	} while ((nflags = synch_cmpxchg(&shared[ref].flags, flags, 0)) !=
 	       flags);
 
 	return (1);
 }
 
 void
 gnttab_end_foreign_access(grant_ref_t ref, void *page)
 {
 	if (gnttab_end_foreign_access_ref(ref)) {
 		put_free_entry(ref);
 		if (page != NULL) {
 			free(page, M_DEVBUF);
 		}
 	}
 	else {
 		/* XXX This needs to be fixed so that the ref and page are
 		   placed on a list to be freed up later. */
 		printf("%s: WARNING: leaking g.e. and page still in use!\n",
 		       __func__);
 	}
 }
 
 void
 gnttab_end_foreign_access_references(u_int count, grant_ref_t *refs)
 {
 	grant_ref_t *last_ref;
 	grant_ref_t  head;
 	grant_ref_t  tail;
 
 	head = GNTTAB_LIST_END;
 	tail = *refs;
 	last_ref = refs + count;
 	while (refs != last_ref) {
 
 		if (gnttab_end_foreign_access_ref(*refs)) {
 			gnttab_entry(*refs) = head;
 			head = *refs;
 		} else {
 			/*
 			 * XXX This needs to be fixed so that the ref 
 			 * is placed on a list to be freed up later.
 			 */
 			printf("%s: WARNING: leaking g.e. still in use!\n",
 			       __func__);
 			count--;
 		}
 		refs++;
 	}
 
 	if (count != 0) {
 		mtx_lock(&gnttab_list_lock);
 		gnttab_free_count += count;
 		gnttab_entry(tail) = gnttab_free_head;
 		gnttab_free_head = head;
 		mtx_unlock(&gnttab_list_lock);
 	}
 }
 
 int
 gnttab_grant_foreign_transfer(domid_t domid, unsigned long pfn,
     grant_ref_t *result)
 {
 	int error, ref;
 
 	error = get_free_entries(1, &ref);
 	if (__predict_false(error))
 		return (error);
 
 	gnttab_grant_foreign_transfer_ref(ref, domid, pfn);
 
 	*result = ref;
 	return (0);
 }
 
 void
 gnttab_grant_foreign_transfer_ref(grant_ref_t ref, domid_t domid,
 	unsigned long pfn)
 {
 	shared[ref].frame = pfn;
 	shared[ref].domid = domid;
 	wmb();
 	shared[ref].flags = GTF_accept_transfer;
 }
 
 unsigned long
 gnttab_end_foreign_transfer_ref(grant_ref_t ref)
 {
 	unsigned long frame;
 	uint16_t      flags;
 
 	/*
          * If a transfer is not even yet started, try to reclaim the grant
          * reference and return failure (== 0).
          */
 	while (!((flags = shared[ref].flags) & GTF_transfer_committed)) {
 		if ( synch_cmpxchg(&shared[ref].flags, flags, 0) == flags )
 			return (0);
 		cpu_relax();
 	}
 
 	/* If a transfer is in progress then wait until it is completed. */
 	while (!(flags & GTF_transfer_completed)) {
 		flags = shared[ref].flags;
 		cpu_relax();
 	}
 
 	/* Read the frame number /after/ reading completion status. */
 	rmb();
 	frame = shared[ref].frame;
 	KASSERT(frame != 0, ("grant table inconsistent"));
 
 	return (frame);
 }
 
 unsigned long
 gnttab_end_foreign_transfer(grant_ref_t ref)
 {
 	unsigned long frame = gnttab_end_foreign_transfer_ref(ref);
 
 	put_free_entry(ref);
 	return (frame);
 }
 
 void
 gnttab_free_grant_reference(grant_ref_t ref)
 {
 
 	put_free_entry(ref);
 }
 
 void
 gnttab_free_grant_references(grant_ref_t head)
 {
 	grant_ref_t ref;
 	int count = 1;
 
 	if (head == GNTTAB_LIST_END)
 		return;
 
 	ref = head;
 	while (gnttab_entry(ref) != GNTTAB_LIST_END) {
 		ref = gnttab_entry(ref);
 		count++;
 	}
 	mtx_lock(&gnttab_list_lock);
 	gnttab_entry(ref) = gnttab_free_head;
 	gnttab_free_head = head;
 	gnttab_free_count += count;
 	check_free_callbacks();
 	mtx_unlock(&gnttab_list_lock);
 }
 
 int
 gnttab_alloc_grant_references(uint16_t count, grant_ref_t *head)
 {
 	int ref, error;
 
 	error = get_free_entries(count, &ref);
 	if (__predict_false(error))
 		return (error);
 
 	*head = ref;
 	return (0);
 }
 
 int
 gnttab_empty_grant_references(const grant_ref_t *private_head)
 {
 
 	return (*private_head == GNTTAB_LIST_END);
 }
 
 int
 gnttab_claim_grant_reference(grant_ref_t *private_head)
 {
 	grant_ref_t g = *private_head;
 
 	if (__predict_false(g == GNTTAB_LIST_END))
 		return (g);
 	*private_head = gnttab_entry(g);
 	return (g);
 }
 
 void
 gnttab_release_grant_reference(grant_ref_t *private_head, grant_ref_t  release)
 {
 
 	gnttab_entry(release) = *private_head;
 	*private_head = release;
 }
 
 void
 gnttab_request_free_callback(struct gnttab_free_callback *callback,
     void (*fn)(void *), void *arg, uint16_t count)
 {
 
 	mtx_lock(&gnttab_list_lock);
 	if (callback->next)
 		goto out;
 	callback->fn = fn;
 	callback->arg = arg;
 	callback->count = count;
 	callback->next = gnttab_free_callback_list;
 	gnttab_free_callback_list = callback;
 	check_free_callbacks();
  out:
 	mtx_unlock(&gnttab_list_lock);
 
 }
 
 void
 gnttab_cancel_free_callback(struct gnttab_free_callback *callback)
 {
 	struct gnttab_free_callback **pcb;
 
 	mtx_lock(&gnttab_list_lock);
 	for (pcb = &gnttab_free_callback_list; *pcb; pcb = &(*pcb)->next) {
 		if (*pcb == callback) {
 			*pcb = callback->next;
 			break;
 		}
 	}
 	mtx_unlock(&gnttab_list_lock);
 }
 
 
 static int
 grow_gnttab_list(unsigned int more_frames)
 {
 	unsigned int new_nr_grant_frames, extra_entries, i;
 
 	new_nr_grant_frames = nr_grant_frames + more_frames;
 	extra_entries       = more_frames * GREFS_PER_GRANT_FRAME;
 
 	for (i = nr_grant_frames; i < new_nr_grant_frames; i++)
 	{
 		gnttab_list[i] = (grant_ref_t *)
 			malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT);
 
 		if (!gnttab_list[i])
 			goto grow_nomem;
 	}
 
 	for (i = GREFS_PER_GRANT_FRAME * nr_grant_frames;
 	     i < GREFS_PER_GRANT_FRAME * new_nr_grant_frames - 1; i++)
 		gnttab_entry(i) = i + 1;
 
 	gnttab_entry(i) = gnttab_free_head;
 	gnttab_free_head = GREFS_PER_GRANT_FRAME * nr_grant_frames;
 	gnttab_free_count += extra_entries;
 
 	nr_grant_frames = new_nr_grant_frames;
 
 	check_free_callbacks();
 
 	return (0);
 
 grow_nomem:
 	for ( ; i >= nr_grant_frames; i--)
 		free(gnttab_list[i], M_DEVBUF);
 	return (ENOMEM);
 }
 
 static unsigned int
 __max_nr_grant_frames(void)
 {
 	struct gnttab_query_size query;
 	int rc;
 
 	query.dom = DOMID_SELF;
 
 	rc = HYPERVISOR_grant_table_op(GNTTABOP_query_size, &query, 1);
 	if ((rc < 0) || (query.status != GNTST_okay))
 		return (4); /* Legacy max supported number of frames */
 
 	return (query.max_nr_frames);
 }
 
 static inline
 unsigned int max_nr_grant_frames(void)
 {
 	unsigned int xen_max = __max_nr_grant_frames();
 
 	if (xen_max > boot_max_nr_grant_frames)
 		return (boot_max_nr_grant_frames);
 	return (xen_max);
 }
 
 #ifdef notyet
 /*
  * XXX needed for backend support
  *
  */
 static int
 map_pte_fn(pte_t *pte, struct page *pmd_page,
 		      unsigned long addr, void *data)
 {
 	unsigned long **frames = (unsigned long **)data;
 
 	set_pte_at(&init_mm, addr, pte, pfn_pte_ma((*frames)[0], PAGE_KERNEL));
 	(*frames)++;
 	return 0;
 }
 
 static int
 unmap_pte_fn(pte_t *pte, struct page *pmd_page,
 			unsigned long addr, void *data)
 {
 
 	set_pte_at(&init_mm, addr, pte, __pte(0));
 	return 0;
 }
 #endif
 
-#ifndef XENHVM
-
-static int
-gnttab_map(unsigned int start_idx, unsigned int end_idx)
-{
-	struct gnttab_setup_table setup;
-	u_long *frames;
-
-	unsigned int nr_gframes = end_idx + 1;
-	int i, rc;
-
-	frames = malloc(nr_gframes * sizeof(unsigned long), M_DEVBUF, M_NOWAIT);
-	if (!frames)
-		return (ENOMEM);
-
-	setup.dom        = DOMID_SELF;
-	setup.nr_frames  = nr_gframes;
-	set_xen_guest_handle(setup.frame_list, frames);
-
-	rc = HYPERVISOR_grant_table_op(GNTTABOP_setup_table, &setup, 1);
-	if (rc == -ENOSYS) {
-		free(frames, M_DEVBUF);
-		return (ENOSYS);
-	}
-	KASSERT(!(rc || setup.status),
-	    ("unexpected result from grant_table_op"));
-
-	if (shared == NULL) {
-		vm_offset_t area;
-
-		area = kva_alloc(PAGE_SIZE * max_nr_grant_frames());
-		KASSERT(area, ("can't allocate VM space for grant table"));
-		shared = (grant_entry_t *)area;
-	}
-
-	for (i = 0; i < nr_gframes; i++)
-		PT_SET_MA(((caddr_t)shared) + i*PAGE_SIZE, 
-		    ((vm_paddr_t)frames[i]) << PAGE_SHIFT | PG_RW | PG_V);
-
-	free(frames, M_DEVBUF);
-
-	return (0);
-}
-
-int
-gnttab_resume(device_t dev)
-{
-
-	if (max_nr_grant_frames() < nr_grant_frames)
-		return (ENOSYS);
-	return (gnttab_map(0, nr_grant_frames - 1));
-}
-
-int
-gnttab_suspend(void)
-{
-	int i;
-
-	for (i = 0; i < nr_grant_frames; i++)
-		pmap_kremove((vm_offset_t) shared + i * PAGE_SIZE);
-
-	return (0);
-}
-
-#else /* XENHVM */
-
 static vm_paddr_t resume_frames;
 
 static int
 gnttab_map(unsigned int start_idx, unsigned int end_idx)
 {
 	struct xen_add_to_physmap xatp;
 	unsigned int i = end_idx;
 
 	/*
 	 * Loop backwards, so that the first hypercall has the largest index,
 	 * ensuring that the table will grow only once.
 	 */
 	do {
 		xatp.domid = DOMID_SELF;
 		xatp.idx = i;
 		xatp.space = XENMAPSPACE_grant_table;
 		xatp.gpfn = (resume_frames >> PAGE_SHIFT) + i;
 		if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp))
 			panic("HYPERVISOR_memory_op failed to map gnttab");
 	} while (i-- > start_idx);
 
 	if (shared == NULL) {
 		vm_offset_t area;
 
 		area = kva_alloc(PAGE_SIZE * max_nr_grant_frames());
 		KASSERT(area, ("can't allocate VM space for grant table"));
 		shared = (grant_entry_t *)area;
 	}
 
 	for (i = start_idx; i <= end_idx; i++) {
 		pmap_kenter((vm_offset_t) shared + i * PAGE_SIZE,
 		    resume_frames + i * PAGE_SIZE);
 	}
 
 	return (0);
 }
 
 int
 gnttab_resume(device_t dev)
 {
 	unsigned int max_nr_gframes, nr_gframes;
 
 	nr_gframes = nr_grant_frames;
 	max_nr_gframes = max_nr_grant_frames();
 	if (max_nr_gframes < nr_gframes)
 		return (ENOSYS);
 
 	if (!resume_frames) {
 		KASSERT(dev != NULL,
 		    ("No resume frames and no device provided"));
 
 		gnttab_pseudo_phys_res = bus_alloc_resource(dev,
 		    SYS_RES_MEMORY, &gnttab_pseudo_phys_res_id, 0, ~0,
 		    PAGE_SIZE * max_nr_gframes, RF_ACTIVE);
 		if (gnttab_pseudo_phys_res == NULL)
 			panic("Unable to reserve physical memory for gnttab");
 		resume_frames = rman_get_start(gnttab_pseudo_phys_res);
 	}
 
 	return (gnttab_map(0, nr_gframes - 1));
 }
-
-#endif
 
 static int
 gnttab_expand(unsigned int req_entries)
 {
 	int error;
 	unsigned int cur, extra;
 
 	cur = nr_grant_frames;
 	extra = ((req_entries + (GREFS_PER_GRANT_FRAME-1)) /
 		 GREFS_PER_GRANT_FRAME);
 	if (cur + extra > max_nr_grant_frames())
 		return (ENOSPC);
 
 	error = gnttab_map(cur, cur + extra - 1);
 	if (!error)
 		error = grow_gnttab_list(extra);
 
 	return (error);
 }
 
 MTX_SYSINIT(gnttab, &gnttab_list_lock, "GNTTAB LOCK", MTX_DEF); 
 
 /*------------------ Private Device Attachment Functions  --------------------*/
 /**
  * \brief Identify instances of this device type in the system.
  *
  * \param driver  The driver performing this identify action.
  * \param parent  The NewBus parent device for any devices this method adds.
  */
 static void
 granttable_identify(driver_t *driver __unused, device_t parent)
 {
 
 	KASSERT(xen_domain(),
 	    ("Trying to attach grant-table device on non Xen domain"));
 	/*
 	 * A single device instance for our driver is always present
 	 * in a system operating under Xen.
 	 */
 	if (BUS_ADD_CHILD(parent, 0, driver->name, 0) == NULL)
 		panic("unable to attach Xen Grant-table device");
 }
 
 /**
  * \brief Probe for the existence of the Xen Grant-table device
  *
  * \param dev  NewBus device_t for this instance.
  *
  * \return  Always returns 0 indicating success.
  */
 static int 
 granttable_probe(device_t dev)
 {
 
 	device_set_desc(dev, "Xen Grant-table Device");
 	return (BUS_PROBE_NOWILDCARD);
 }
 
 /**
  * \brief Attach the Xen Grant-table device.
  *
  * \param dev  NewBus device_t for this instance.
  *
  * \return  On success, 0. Otherwise an errno value indicating the
  *          type of failure.
  */
 static int
 granttable_attach(device_t dev)
 {
 	int i;
 	unsigned int max_nr_glist_frames;
 	unsigned int nr_init_grefs;
 
 	nr_grant_frames = 1;
 	boot_max_nr_grant_frames = __max_nr_grant_frames();
 
 	/* Determine the maximum number of frames required for the
 	 * grant reference free list on the current hypervisor.
 	 */
 	max_nr_glist_frames = (boot_max_nr_grant_frames *
 			       GREFS_PER_GRANT_FRAME /
 			       (PAGE_SIZE / sizeof(grant_ref_t)));
 
 	gnttab_list = malloc(max_nr_glist_frames * sizeof(grant_ref_t *),
 	    M_DEVBUF, M_NOWAIT);
 
 	if (gnttab_list == NULL)
 		return (ENOMEM);
 
 	for (i = 0; i < nr_grant_frames; i++) {
 		gnttab_list[i] = (grant_ref_t *)
 			malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT);
 		if (gnttab_list[i] == NULL)
 			goto ini_nomem;
 	}
 
 	if (gnttab_resume(dev))
 		return (ENODEV);
 
 	nr_init_grefs = nr_grant_frames * GREFS_PER_GRANT_FRAME;
 
 	for (i = NR_RESERVED_ENTRIES; i < nr_init_grefs - 1; i++)
 		gnttab_entry(i) = i + 1;
 
 	gnttab_entry(nr_init_grefs - 1) = GNTTAB_LIST_END;
 	gnttab_free_count = nr_init_grefs - NR_RESERVED_ENTRIES;
 	gnttab_free_head  = NR_RESERVED_ENTRIES;
 
 	if (bootverbose)
 		printf("Grant table initialized\n");
 
 	return (0);
 
 ini_nomem:
 	for (i--; i >= 0; i--)
 		free(gnttab_list[i], M_DEVBUF);
 	free(gnttab_list, M_DEVBUF);
 	return (ENOMEM);
 }
 
 /*-------------------- Private Device Attachment Data  -----------------------*/
 static device_method_t granttable_methods[] = {
 	/* Device interface */
 	DEVMETHOD(device_identify,	granttable_identify),
 	DEVMETHOD(device_probe,         granttable_probe),
 	DEVMETHOD(device_attach,        granttable_attach),
 
 	DEVMETHOD_END
 };
 
 DEFINE_CLASS_0(granttable, granttable_driver, granttable_methods, 0);
 devclass_t granttable_devclass;
 
 DRIVER_MODULE_ORDERED(granttable, xenpv, granttable_driver, granttable_devclass,
     NULL, NULL, SI_ORDER_FIRST);
Index: head/sys/dev/xen/netback/netback.c
===================================================================
--- head/sys/dev/xen/netback/netback.c	(revision 282273)
+++ head/sys/dev/xen/netback/netback.c	(revision 282274)
@@ -1,2535 +1,2523 @@
 /*-
  * Copyright (c) 2009-2011 Spectra Logic Corporation
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions, and the following disclaimer,
  *    without modification.
  * 2. Redistributions in binary form must reproduce at minimum a disclaimer
  *    substantially similar to the "NO WARRANTY" disclaimer below
  *    ("Disclaimer") and any redistribution must be conditioned upon
  *    including a substantially similar Disclaimer requirement for further
  *    binary redistribution.
  *
  * NO WARRANTY
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  * POSSIBILITY OF SUCH DAMAGES.
  *
  * Authors: Justin T. Gibbs     (Spectra Logic Corporation)
  *          Alan Somers         (Spectra Logic Corporation)
  *          John Suykerbuyk     (Spectra Logic Corporation)
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 /**
  * \file netback.c
  *
  * \brief Device driver supporting the vending of network access
  * 	  from this FreeBSD domain to other domains.
  */
 #include "opt_inet.h"
 #include "opt_inet6.h"
 
 #include "opt_sctp.h"
 
 #include <sys/param.h>
 #include <sys/kernel.h>
 
 #include <sys/bus.h>
 #include <sys/module.h>
 #include <sys/rman.h>
 #include <sys/socket.h>
 #include <sys/sockio.h>
 #include <sys/sysctl.h>
 
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/if_arp.h>
 #include <net/ethernet.h>
 #include <net/if_dl.h>
 #include <net/if_media.h>
 #include <net/if_types.h>
 
 #include <netinet/in.h>
 #include <netinet/ip.h>
 #include <netinet/if_ether.h>
 #if __FreeBSD_version >= 700000
 #include <netinet/tcp.h>
 #endif
 #include <netinet/ip_icmp.h>
 #include <netinet/udp.h>
 #include <machine/in_cksum.h>
 
 #include <vm/vm.h>
 #include <vm/pmap.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_kern.h>
 
 #include <machine/_inttypes.h>
 
 #include <xen/xen-os.h>
 #include <xen/hypervisor.h>
 #include <xen/xen_intr.h>
 #include <xen/interface/io/netif.h>
 #include <xen/xenbus/xenbusvar.h>
 
 #include <machine/xen/xenvar.h>
 
 /*--------------------------- Compile-time Tunables --------------------------*/
 
 /*---------------------------------- Macros ----------------------------------*/
 /**
  * Custom malloc type for all driver allocations.
  */
 static MALLOC_DEFINE(M_XENNETBACK, "xnb", "Xen Net Back Driver Data");
 
 #define	XNB_SG	1	/* netback driver supports feature-sg */
 #define	XNB_GSO_TCPV4 0	/* netback driver supports feature-gso-tcpv4 */
 #define	XNB_RX_COPY 1	/* netback driver supports feature-rx-copy */
 #define	XNB_RX_FLIP 0	/* netback driver does not support feature-rx-flip */
 
 #undef XNB_DEBUG
 #define	XNB_DEBUG /* hardcode on during development */
 
 #ifdef XNB_DEBUG
 #define	DPRINTF(fmt, args...) \
 	printf("xnb(%s:%d): " fmt, __FUNCTION__, __LINE__, ##args)
 #else
 #define	DPRINTF(fmt, args...) do {} while (0)
 #endif
 
 /* Default length for stack-allocated grant tables */
 #define	GNTTAB_LEN	(64)
 
 /* Features supported by all backends.  TSO and LRO can be negotiated */
 #define	XNB_CSUM_FEATURES	(CSUM_TCP | CSUM_UDP)
 
 #define	NET_TX_RING_SIZE __RING_SIZE((netif_tx_sring_t *)0, PAGE_SIZE)
 #define	NET_RX_RING_SIZE __RING_SIZE((netif_rx_sring_t *)0, PAGE_SIZE)
 
 /**
  * Two argument version of the standard macro.  Second argument is a tentative
  * value of req_cons
  */
 #define	RING_HAS_UNCONSUMED_REQUESTS_2(_r, cons) ({                     \
 	unsigned int req = (_r)->sring->req_prod - cons;          	\
 	unsigned int rsp = RING_SIZE(_r) -                              \
 	(cons - (_r)->rsp_prod_pvt);                          		\
 	req < rsp ? req : rsp;                                          \
 })
 
 #define	virt_to_mfn(x) (vtomach(x) >> PAGE_SHIFT)
 #define	virt_to_offset(x) ((x) & (PAGE_SIZE - 1))
 
 /**
  * Predefined array type of grant table copy descriptors.  Used to pass around
  * statically allocated memory structures.
  */
 typedef struct gnttab_copy gnttab_copy_table[GNTTAB_LEN];
 
 /*--------------------------- Forward Declarations ---------------------------*/
 struct xnb_softc;
 struct xnb_pkt;
 
 static void	xnb_attach_failed(struct xnb_softc *xnb,
 				  int err, const char *fmt, ...)
 				  __printflike(3,4);
 static int	xnb_shutdown(struct xnb_softc *xnb);
 static int	create_netdev(device_t dev);
 static int	xnb_detach(device_t dev);
 static int	xnb_ifmedia_upd(struct ifnet *ifp);
 static void	xnb_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr);
 static void 	xnb_intr(void *arg);
 static int	xnb_send(netif_rx_back_ring_t *rxb, domid_t otherend,
 			 const struct mbuf *mbufc, gnttab_copy_table gnttab);
 static int	xnb_recv(netif_tx_back_ring_t *txb, domid_t otherend,
 			 struct mbuf **mbufc, struct ifnet *ifnet,
 			 gnttab_copy_table gnttab);
 static int	xnb_ring2pkt(struct xnb_pkt *pkt,
 			     const netif_tx_back_ring_t *tx_ring,
 			     RING_IDX start);
 static void	xnb_txpkt2rsp(const struct xnb_pkt *pkt,
 			      netif_tx_back_ring_t *ring, int error);
 static struct mbuf *xnb_pkt2mbufc(const struct xnb_pkt *pkt, struct ifnet *ifp);
 static int	xnb_txpkt2gnttab(const struct xnb_pkt *pkt,
 				 const struct mbuf *mbufc,
 				 gnttab_copy_table gnttab,
 				 const netif_tx_back_ring_t *txb,
 				 domid_t otherend_id);
 static void	xnb_update_mbufc(struct mbuf *mbufc,
 				 const gnttab_copy_table gnttab, int n_entries);
 static int	xnb_mbufc2pkt(const struct mbuf *mbufc,
 			      struct xnb_pkt *pkt,
 			      RING_IDX start, int space);
 static int	xnb_rxpkt2gnttab(const struct xnb_pkt *pkt,
 				 const struct mbuf *mbufc,
 				 gnttab_copy_table gnttab,
 				 const netif_rx_back_ring_t *rxb,
 				 domid_t otherend_id);
 static int	xnb_rxpkt2rsp(const struct xnb_pkt *pkt,
 			      const gnttab_copy_table gnttab, int n_entries,
 			      netif_rx_back_ring_t *ring);
 static void	xnb_stop(struct xnb_softc*);
 static int	xnb_ioctl(struct ifnet*, u_long, caddr_t);
 static void	xnb_start_locked(struct ifnet*);
 static void	xnb_start(struct ifnet*);
 static void	xnb_ifinit_locked(struct xnb_softc*);
 static void	xnb_ifinit(void*);
 #ifdef XNB_DEBUG
 static int	xnb_unit_test_main(SYSCTL_HANDLER_ARGS);
 static int	xnb_dump_rings(SYSCTL_HANDLER_ARGS);
 #endif
 #if defined(INET) || defined(INET6)
 static void	xnb_add_mbuf_cksum(struct mbuf *mbufc);
 #endif
 /*------------------------------ Data Structures -----------------------------*/
 
 
 /**
  * Representation of a xennet packet.  Simplified version of a packet as
  * stored in the Xen tx ring.  Applicable to both RX and TX packets
  */
 struct xnb_pkt{
 	/**
 	 * Array index of the first data-bearing (eg, not extra info) entry
 	 * for this packet
 	 */
 	RING_IDX	car;
 
 	/**
 	 * Array index of the second data-bearing entry for this packet.
 	 * Invalid if the packet has only one data-bearing entry.  If the
 	 * packet has more than two data-bearing entries, then the second
 	 * through the last will be sequential modulo the ring size
 	 */
 	RING_IDX	cdr;
 
 	/**
 	 * Optional extra info.  Only valid if flags contains
 	 * NETTXF_extra_info.  Note that extra.type will always be
 	 * XEN_NETIF_EXTRA_TYPE_GSO.  Currently, no known netfront or netback
 	 * driver will ever set XEN_NETIF_EXTRA_TYPE_MCAST_*
 	 */
 	netif_extra_info_t extra;
 
 	/** Size of entire packet in bytes.       */
 	uint16_t	size;
 
 	/** The size of the first entry's data in bytes */
 	uint16_t	car_size;
 
 	/**
 	 * Either NETTXF_ or NETRXF_ flags.  Note that the flag values are
 	 * not the same for TX and RX packets
 	 */
 	uint16_t	flags;
 
 	/**
 	 * The number of valid data-bearing entries (either netif_tx_request's
 	 * or netif_rx_response's) in the packet.  If this is 0, it means the
 	 * entire packet is invalid.
 	 */
 	uint16_t	list_len;
 
 	/** There was an error processing the packet */
 	uint8_t		error;
 };
 
 /** xnb_pkt method: initialize it */
 static inline void
 xnb_pkt_initialize(struct xnb_pkt *pxnb)
 {
 	bzero(pxnb, sizeof(*pxnb));
 }
 
 /** xnb_pkt method: mark the packet as valid */
 static inline void
 xnb_pkt_validate(struct xnb_pkt *pxnb)
 {
 	pxnb->error = 0;
 };
 
 /** xnb_pkt method: mark the packet as invalid */
 static inline void
 xnb_pkt_invalidate(struct xnb_pkt *pxnb)
 {
 	pxnb->error = 1;
 };
 
 /** xnb_pkt method: Check whether the packet is valid */
 static inline int
 xnb_pkt_is_valid(const struct xnb_pkt *pxnb)
 {
 	return (! pxnb->error);
 }
 
 #ifdef XNB_DEBUG
 /** xnb_pkt method: print the packet's contents in human-readable format*/
 static void __unused
 xnb_dump_pkt(const struct xnb_pkt *pkt) {
 	if (pkt == NULL) {
 	  DPRINTF("Was passed a null pointer.\n");
 	  return;
 	}
 	DPRINTF("pkt address= %p\n", pkt);
 	DPRINTF("pkt->size=%d\n", pkt->size);
 	DPRINTF("pkt->car_size=%d\n", pkt->car_size);
 	DPRINTF("pkt->flags=0x%04x\n", pkt->flags);
 	DPRINTF("pkt->list_len=%d\n", pkt->list_len);
 	/* DPRINTF("pkt->extra");	TODO */
 	DPRINTF("pkt->car=%d\n", pkt->car);
 	DPRINTF("pkt->cdr=%d\n", pkt->cdr);
 	DPRINTF("pkt->error=%d\n", pkt->error);
 }
 #endif /* XNB_DEBUG */
 
 static void
 xnb_dump_txreq(RING_IDX idx, const struct netif_tx_request *txreq)
 {
 	if (txreq != NULL) {
 		DPRINTF("netif_tx_request index =%u\n", idx);
 		DPRINTF("netif_tx_request.gref  =%u\n", txreq->gref);
 		DPRINTF("netif_tx_request.offset=%hu\n", txreq->offset);
 		DPRINTF("netif_tx_request.flags =%hu\n", txreq->flags);
 		DPRINTF("netif_tx_request.id    =%hu\n", txreq->id);
 		DPRINTF("netif_tx_request.size  =%hu\n", txreq->size);
 	}
 }
 
 
 /**
  * \brief Configuration data for a shared memory request ring
  *        used to communicate with the front-end client of this
  *        this driver.
  */
 struct xnb_ring_config {
 	/**
 	 * Runtime structures for ring access.  Unfortunately, TX and RX rings
 	 * use different data structures, and that cannot be changed since it
 	 * is part of the interdomain protocol.
 	 */
 	union{
 		netif_rx_back_ring_t	  rx_ring;
 		netif_tx_back_ring_t	  tx_ring;
 	} back_ring;
 
 	/**
 	 * The device bus address returned by the hypervisor when
 	 * mapping the ring and required to unmap it when a connection
 	 * is torn down.
 	 */
 	uint64_t	bus_addr;
 
 	/** The pseudo-physical address where ring memory is mapped.*/
 	uint64_t	gnt_addr;
 
 	/** KVA address where ring memory is mapped. */
 	vm_offset_t	va;
 
 	/**
 	 * Grant table handles, one per-ring page, returned by the
 	 * hyperpervisor upon mapping of the ring and required to
 	 * unmap it when a connection is torn down.
 	 */
 	grant_handle_t	handle;
 
 	/** The number of ring pages mapped for the current connection. */
 	unsigned	ring_pages;
 
 	/**
 	 * The grant references, one per-ring page, supplied by the
 	 * front-end, allowing us to reference the ring pages in the
 	 * front-end's domain and to map these pages into our own domain.
 	 */
 	grant_ref_t	ring_ref;
 };
 
 /**
  * Per-instance connection state flags.
  */
 typedef enum
 {
 	/** Communication with the front-end has been established. */
 	XNBF_RING_CONNECTED    = 0x01,
 
 	/**
 	 * Front-end requests exist in the ring and are waiting for
 	 * xnb_xen_req objects to free up.
 	 */
 	XNBF_RESOURCE_SHORTAGE = 0x02,
 
 	/** Connection teardown has started. */
 	XNBF_SHUTDOWN          = 0x04,
 
 	/** A thread is already performing shutdown processing. */
 	XNBF_IN_SHUTDOWN       = 0x08
 } xnb_flag_t;
 
 /**
  * Types of rings.  Used for array indices and to identify a ring's control
  * data structure type
  */
 typedef enum{
 	XNB_RING_TYPE_TX = 0,	/* ID of TX rings, used for array indices */
 	XNB_RING_TYPE_RX = 1,	/* ID of RX rings, used for array indices */
 	XNB_NUM_RING_TYPES
 } xnb_ring_type_t;
 
 /**
  * Per-instance configuration data.
  */
 struct xnb_softc {
 	/** NewBus device corresponding to this instance. */
 	device_t		dev;
 
 	/* Media related fields */
 
 	/** Generic network media state */
 	struct ifmedia		sc_media;
 
 	/** Media carrier info */
 	struct ifnet 		*xnb_ifp;
 
 	/** Our own private carrier state */
 	unsigned carrier;
 
 	/** Device MAC Address */
 	uint8_t			mac[ETHER_ADDR_LEN];
 
 	/* Xen related fields */
 
 	/**
 	 * \brief The netif protocol abi in effect.
 	 *
 	 * There are situations where the back and front ends can
 	 * have a different, native abi (e.g. intel x86_64 and
 	 * 32bit x86 domains on the same machine).  The back-end
 	 * always accomodates the front-end's native abi.  That
 	 * value is pulled from the XenStore and recorded here.
 	 */
 	int			abi;
 
 	/**
 	 * Name of the bridge to which this VIF is connected, if any
 	 * This field is dynamically allocated by xenbus and must be free()ed
 	 * when no longer needed
 	 */
 	char			*bridge;
 
 	/** The interrupt driven even channel used to signal ring events. */
 	evtchn_port_t		evtchn;
 
 	/** Xen device handle.*/
 	long 			handle;
 
 	/** Handle to the communication ring event channel. */
 	xen_intr_handle_t	xen_intr_handle;
 
 	/**
 	 * \brief Cached value of the front-end's domain id.
 	 *
 	 * This value is used at once for each mapped page in
 	 * a transaction.  We cache it to avoid incuring the
 	 * cost of an ivar access every time this is needed.
 	 */
 	domid_t			otherend_id;
 
 	/**
 	 * Undocumented frontend feature.  Has something to do with
 	 * scatter/gather IO
 	 */
 	uint8_t			can_sg;
 	/** Undocumented frontend feature */
 	uint8_t			gso;
 	/** Undocumented frontend feature */
 	uint8_t			gso_prefix;
 	/** Can checksum TCP/UDP over IPv4 */
 	uint8_t			ip_csum;
 
 	/* Implementation related fields */
 	/**
 	 * Preallocated grant table copy descriptor for RX operations.
 	 * Access must be protected by rx_lock
 	 */
 	gnttab_copy_table	rx_gnttab;
 
 	/**
 	 * Preallocated grant table copy descriptor for TX operations.
 	 * Access must be protected by tx_lock
 	 */
 	gnttab_copy_table	tx_gnttab;
 
-#ifdef XENHVM
 	/**
 	 * Resource representing allocated physical address space
 	 * associated with our per-instance kva region.
 	 */
 	struct resource		*pseudo_phys_res;
 
 	/** Resource id for allocated physical address space. */
 	int			pseudo_phys_res_id;
-#endif
 
 	/** Ring mapping and interrupt configuration data. */
 	struct xnb_ring_config	ring_configs[XNB_NUM_RING_TYPES];
 
 	/**
 	 * Global pool of kva used for mapping remote domain ring
 	 * and I/O transaction data.
 	 */
 	vm_offset_t		kva;
 
 	/** Psuedo-physical address corresponding to kva. */
 	uint64_t		gnt_base_addr;
 
 	/** Various configuration and state bit flags. */
 	xnb_flag_t		flags;
 
 	/** Mutex protecting per-instance data in the receive path. */
 	struct mtx		rx_lock;
 
 	/** Mutex protecting per-instance data in the softc structure. */
 	struct mtx		sc_lock;
 
 	/** Mutex protecting per-instance data in the transmit path. */
 	struct mtx		tx_lock;
 
 	/** The size of the global kva pool. */
 	int			kva_size;
 
 	/** Name of the interface */
 	char			 if_name[IFNAMSIZ];
 };
 
 /*---------------------------- Debugging functions ---------------------------*/
 #ifdef XNB_DEBUG
 static void __unused
 xnb_dump_gnttab_copy(const struct gnttab_copy *entry)
 {
 	if (entry == NULL) {
 		printf("NULL grant table pointer\n");
 		return;
 	}
 
 	if (entry->flags & GNTCOPY_dest_gref)
 		printf("gnttab dest ref=\t%u\n", entry->dest.u.ref);
 	else
 		printf("gnttab dest gmfn=\t%lu\n", entry->dest.u.gmfn);
 	printf("gnttab dest offset=\t%hu\n", entry->dest.offset);
 	printf("gnttab dest domid=\t%hu\n", entry->dest.domid);
 	if (entry->flags & GNTCOPY_source_gref)
 		printf("gnttab source ref=\t%u\n", entry->source.u.ref);
 	else
 		printf("gnttab source gmfn=\t%lu\n", entry->source.u.gmfn);
 	printf("gnttab source offset=\t%hu\n", entry->source.offset);
 	printf("gnttab source domid=\t%hu\n", entry->source.domid);
 	printf("gnttab len=\t%hu\n", entry->len);
 	printf("gnttab flags=\t%hu\n", entry->flags);
 	printf("gnttab status=\t%hd\n", entry->status);
 }
 
 static int
 xnb_dump_rings(SYSCTL_HANDLER_ARGS)
 {
 	static char results[720];
 	struct xnb_softc const* xnb = (struct xnb_softc*)arg1;
 	netif_rx_back_ring_t const* rxb =
 		&xnb->ring_configs[XNB_RING_TYPE_RX].back_ring.rx_ring;
 	netif_tx_back_ring_t const* txb =
 		&xnb->ring_configs[XNB_RING_TYPE_TX].back_ring.tx_ring;
 
 	/* empty the result strings */
 	results[0] = 0;
 
 	if ( !txb || !txb->sring || !rxb || !rxb->sring )
 		return (SYSCTL_OUT(req, results, strnlen(results, 720)));
 
 	snprintf(results, 720,
 	    "\n\t%35s %18s\n"	/* TX, RX */
 	    "\t%16s %18d %18d\n"	/* req_cons */
 	    "\t%16s %18d %18d\n"	/* nr_ents */
 	    "\t%16s %18d %18d\n"	/* rsp_prod_pvt */
 	    "\t%16s %18p %18p\n"	/* sring */
 	    "\t%16s %18d %18d\n"	/* req_prod */
 	    "\t%16s %18d %18d\n"	/* req_event */
 	    "\t%16s %18d %18d\n"	/* rsp_prod */
 	    "\t%16s %18d %18d\n",	/* rsp_event */
 	    "TX", "RX",
 	    "req_cons", txb->req_cons, rxb->req_cons,
 	    "nr_ents", txb->nr_ents, rxb->nr_ents,
 	    "rsp_prod_pvt", txb->rsp_prod_pvt, rxb->rsp_prod_pvt,
 	    "sring", txb->sring, rxb->sring,
 	    "sring->req_prod", txb->sring->req_prod, rxb->sring->req_prod,
 	    "sring->req_event", txb->sring->req_event, rxb->sring->req_event,
 	    "sring->rsp_prod", txb->sring->rsp_prod, rxb->sring->rsp_prod,
 	    "sring->rsp_event", txb->sring->rsp_event, rxb->sring->rsp_event);
 
 	return (SYSCTL_OUT(req, results, strnlen(results, 720)));
 }
 
 static void __unused
 xnb_dump_mbuf(const struct mbuf *m)
 {
 	int len;
 	uint8_t *d;
 	if (m == NULL)
 		return;
 
 	printf("xnb_dump_mbuf:\n");
 	if (m->m_flags & M_PKTHDR) {
 		printf("    flowid=%10d, csum_flags=%#8x, csum_data=%#8x, "
 		       "tso_segsz=%5hd\n",
 		       m->m_pkthdr.flowid, (int)m->m_pkthdr.csum_flags,
 		       m->m_pkthdr.csum_data, m->m_pkthdr.tso_segsz);
 		printf("    rcvif=%16p,  len=%19d\n",
 		       m->m_pkthdr.rcvif, m->m_pkthdr.len);
 	}
 	printf("    m_next=%16p, m_nextpk=%16p, m_data=%16p\n",
 	       m->m_next, m->m_nextpkt, m->m_data);
 	printf("    m_len=%17d, m_flags=%#15x, m_type=%18u\n",
 	       m->m_len, m->m_flags, m->m_type);
 
 	len = m->m_len;
 	d = mtod(m, uint8_t*);
 	while (len > 0) {
 		int i;
 		printf("                ");
 		for (i = 0; (i < 16) && (len > 0); i++, len--) {
 			printf("%02hhx ", *(d++));
 		}
 		printf("\n");
 	}
 }
 #endif /* XNB_DEBUG */
 
 /*------------------------ Inter-Domain Communication ------------------------*/
 /**
  * Free dynamically allocated KVA or pseudo-physical address allocations.
  *
  * \param xnb  Per-instance xnb configuration structure.
  */
 static void
 xnb_free_communication_mem(struct xnb_softc *xnb)
 {
 	if (xnb->kva != 0) {
-#ifndef XENHVM
-		kva_free(xnb->kva, xnb->kva_size);
-#else
 		if (xnb->pseudo_phys_res != NULL) {
 			bus_release_resource(xnb->dev, SYS_RES_MEMORY,
 			    xnb->pseudo_phys_res_id,
 			    xnb->pseudo_phys_res);
 			xnb->pseudo_phys_res = NULL;
 		}
-#endif /* XENHVM */
 	}
 	xnb->kva = 0;
 	xnb->gnt_base_addr = 0;
 }
 
 /**
  * Cleanup all inter-domain communication mechanisms.
  *
  * \param xnb  Per-instance xnb configuration structure.
  */
 static int
 xnb_disconnect(struct xnb_softc *xnb)
 {
 	struct gnttab_unmap_grant_ref gnts[XNB_NUM_RING_TYPES];
 	int error;
 	int i;
 
 	if (xnb->xen_intr_handle != NULL)
 		xen_intr_unbind(&xnb->xen_intr_handle);
 
 	/*
 	 * We may still have another thread currently processing requests.  We
 	 * must acquire the rx and tx locks to make sure those threads are done,
 	 * but we can release those locks as soon as we acquire them, because no
 	 * more interrupts will be arriving.
 	 */
 	mtx_lock(&xnb->tx_lock);
 	mtx_unlock(&xnb->tx_lock);
 	mtx_lock(&xnb->rx_lock);
 	mtx_unlock(&xnb->rx_lock);
 
 	/* Free malloc'd softc member variables */
 	if (xnb->bridge != NULL) {
 		free(xnb->bridge, M_XENSTORE);
 		xnb->bridge = NULL;
 	}
 
 	/* All request processing has stopped, so unmap the rings */
 	for (i=0; i < XNB_NUM_RING_TYPES; i++) {
 		gnts[i].host_addr = xnb->ring_configs[i].gnt_addr;
 		gnts[i].dev_bus_addr = xnb->ring_configs[i].bus_addr;
 		gnts[i].handle = xnb->ring_configs[i].handle;
 	}
 	error = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, gnts,
 					  XNB_NUM_RING_TYPES);
 	KASSERT(error == 0, ("Grant table unmap op failed (%d)", error));
 
 	xnb_free_communication_mem(xnb);
 	/*
 	 * Zero the ring config structs because the pointers, handles, and
 	 * grant refs contained therein are no longer valid.
 	 */
 	bzero(&xnb->ring_configs[XNB_RING_TYPE_TX],
 	    sizeof(struct xnb_ring_config));
 	bzero(&xnb->ring_configs[XNB_RING_TYPE_RX],
 	    sizeof(struct xnb_ring_config));
 
 	xnb->flags &= ~XNBF_RING_CONNECTED;
 	return (0);
 }
 
 /**
  * Map a single shared memory ring into domain local address space and
  * initialize its control structure
  *
  * \param xnb	Per-instance xnb configuration structure
  * \param ring_type	Array index of this ring in the xnb's array of rings
  * \return 	An errno
  */
 static int
 xnb_connect_ring(struct xnb_softc *xnb, xnb_ring_type_t ring_type)
 {
 	struct gnttab_map_grant_ref gnt;
 	struct xnb_ring_config *ring = &xnb->ring_configs[ring_type];
 	int error;
 
 	/* TX ring type = 0, RX =1 */
 	ring->va = xnb->kva + ring_type * PAGE_SIZE;
 	ring->gnt_addr = xnb->gnt_base_addr + ring_type * PAGE_SIZE;
 
 	gnt.host_addr = ring->gnt_addr;
 	gnt.flags     = GNTMAP_host_map;
 	gnt.ref       = ring->ring_ref;
 	gnt.dom       = xnb->otherend_id;
 
 	error = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &gnt, 1);
 	if (error != 0)
 		panic("netback: Ring page grant table op failed (%d)", error);
 
 	if (gnt.status != 0) {
 		ring->va = 0;
 		error = EACCES;
 		xenbus_dev_fatal(xnb->dev, error,
 				 "Ring shared page mapping failed. "
 				 "Status %d.", gnt.status);
 	} else {
 		ring->handle = gnt.handle;
 		ring->bus_addr = gnt.dev_bus_addr;
 
 		if (ring_type == XNB_RING_TYPE_TX) {
 			BACK_RING_INIT(&ring->back_ring.tx_ring,
 			    (netif_tx_sring_t*)ring->va,
 			    ring->ring_pages * PAGE_SIZE);
 		} else if (ring_type == XNB_RING_TYPE_RX) {
 			BACK_RING_INIT(&ring->back_ring.rx_ring,
 			    (netif_rx_sring_t*)ring->va,
 			    ring->ring_pages * PAGE_SIZE);
 		} else {
 			xenbus_dev_fatal(xnb->dev, error,
 				 "Unknown ring type %d", ring_type);
 		}
 	}
 
 	return error;
 }
 
 /**
  * Setup the shared memory rings and bind an interrupt to the event channel
  * used to notify us of ring changes.
  *
  * \param xnb  Per-instance xnb configuration structure.
  */
 static int
 xnb_connect_comms(struct xnb_softc *xnb)
 {
 	int	error;
 	xnb_ring_type_t i;
 
 	if ((xnb->flags & XNBF_RING_CONNECTED) != 0)
 		return (0);
 
 	/*
 	 * Kva for our rings are at the tail of the region of kva allocated
 	 * by xnb_alloc_communication_mem().
 	 */
 	for (i=0; i < XNB_NUM_RING_TYPES; i++) {
 		error = xnb_connect_ring(xnb, i);
 		if (error != 0)
 	  		return error;
 	}
 
 	xnb->flags |= XNBF_RING_CONNECTED;
 
 	error = xen_intr_bind_remote_port(xnb->dev,
 					  xnb->otherend_id,
 					  xnb->evtchn,
 					  /*filter*/NULL,
 					  xnb_intr, /*arg*/xnb,
 					  INTR_TYPE_BIO | INTR_MPSAFE,
 					  &xnb->xen_intr_handle);
 	if (error != 0) {
 		(void)xnb_disconnect(xnb);
 		xenbus_dev_fatal(xnb->dev, error, "binding event channel");
 		return (error);
 	}
 
 	DPRINTF("rings connected!\n");
 
 	return (0);
 }
 
 /**
  * Size KVA and pseudo-physical address allocations based on negotiated
  * values for the size and number of I/O requests, and the size of our
  * communication ring.
  *
  * \param xnb  Per-instance xnb configuration structure.
  *
  * These address spaces are used to dynamically map pages in the
  * front-end's domain into our own.
  */
 static int
 xnb_alloc_communication_mem(struct xnb_softc *xnb)
 {
 	xnb_ring_type_t i;
 
 	xnb->kva_size = 0;
 	for (i=0; i < XNB_NUM_RING_TYPES; i++) {
 		xnb->kva_size += xnb->ring_configs[i].ring_pages * PAGE_SIZE;
 	}
-#ifndef XENHVM
-	xnb->kva = kva_alloc(xnb->kva_size);
-	if (xnb->kva == 0)
-		return (ENOMEM);
-	xnb->gnt_base_addr = xnb->kva;
-#else /* defined XENHVM */
+
 	/*
 	 * Reserve a range of pseudo physical memory that we can map
 	 * into kva.  These pages will only be backed by machine
 	 * pages ("real memory") during the lifetime of front-end requests
 	 * via grant table operations.  We will map the netif tx and rx rings
 	 * into this space.
 	 */
 	xnb->pseudo_phys_res_id = 0;
 	xnb->pseudo_phys_res = bus_alloc_resource(xnb->dev, SYS_RES_MEMORY,
 						  &xnb->pseudo_phys_res_id,
 						  0, ~0, xnb->kva_size,
 						  RF_ACTIVE);
 	if (xnb->pseudo_phys_res == NULL) {
 		xnb->kva = 0;
 		return (ENOMEM);
 	}
 	xnb->kva = (vm_offset_t)rman_get_virtual(xnb->pseudo_phys_res);
 	xnb->gnt_base_addr = rman_get_start(xnb->pseudo_phys_res);
-#endif /* !defined XENHVM */
 	return (0);
 }
 
 /**
  * Collect information from the XenStore related to our device and its frontend
  *
  * \param xnb  Per-instance xnb configuration structure.
  */
 static int
 xnb_collect_xenstore_info(struct xnb_softc *xnb)
 {
 	/**
 	 * \todo Linux collects the following info.  We should collect most
 	 * of this, too:
 	 * "feature-rx-notify"
 	 */
 	const char *otherend_path;
 	const char *our_path;
 	int err;
 	unsigned int rx_copy, bridge_len;
 	uint8_t no_csum_offload;
 
 	otherend_path = xenbus_get_otherend_path(xnb->dev);
 	our_path = xenbus_get_node(xnb->dev);
 
 	/* Collect the critical communication parameters */
 	err = xs_gather(XST_NIL, otherend_path,
 	    "tx-ring-ref", "%l" PRIu32,
 	    	&xnb->ring_configs[XNB_RING_TYPE_TX].ring_ref,
 	    "rx-ring-ref", "%l" PRIu32,
 	    	&xnb->ring_configs[XNB_RING_TYPE_RX].ring_ref,
 	    "event-channel", "%" PRIu32, &xnb->evtchn,
 	    NULL);
 	if (err != 0) {
 		xenbus_dev_fatal(xnb->dev, err,
 				 "Unable to retrieve ring information from "
 				 "frontend %s.  Unable to connect.",
 				 otherend_path);
 		return (err);
 	}
 
 	/* Collect the handle from xenstore */
 	err = xs_scanf(XST_NIL, our_path, "handle", NULL, "%li", &xnb->handle);
 	if (err != 0) {
 		xenbus_dev_fatal(xnb->dev, err,
 		    "Error reading handle from frontend %s.  "
 		    "Unable to connect.", otherend_path);
 	}
 
 	/*
 	 * Collect the bridgename, if any.  We do not need bridge_len; we just
 	 * throw it away
 	 */
 	err = xs_read(XST_NIL, our_path, "bridge", &bridge_len,
 		      (void**)&xnb->bridge);
 	if (err != 0)
 		xnb->bridge = NULL;
 
 	/*
 	 * Does the frontend request that we use rx copy?  If not, return an
 	 * error because this driver only supports rx copy.
 	 */
 	err = xs_scanf(XST_NIL, otherend_path, "request-rx-copy", NULL,
 		       "%" PRIu32, &rx_copy);
 	if (err == ENOENT) {
 		err = 0;
 	 	rx_copy = 0;
 	}
 	if (err < 0) {
 		xenbus_dev_fatal(xnb->dev, err, "reading %s/request-rx-copy",
 				 otherend_path);
 		return err;
 	}
 	/**
 	 * \todo: figure out the exact meaning of this feature, and when
 	 * the frontend will set it to true.  It should be set to true
 	 * at some point
 	 */
 /*        if (!rx_copy)*/
 /*          return EOPNOTSUPP;*/
 
 	/** \todo Collect the rx notify feature */
 
 	/*  Collect the feature-sg. */
 	if (xs_scanf(XST_NIL, otherend_path, "feature-sg", NULL,
 		     "%hhu", &xnb->can_sg) < 0)
 		xnb->can_sg = 0;
 
 	/* Collect remaining frontend features */
 	if (xs_scanf(XST_NIL, otherend_path, "feature-gso-tcpv4", NULL,
 		     "%hhu", &xnb->gso) < 0)
 		xnb->gso = 0;
 
 	if (xs_scanf(XST_NIL, otherend_path, "feature-gso-tcpv4-prefix", NULL,
 		     "%hhu", &xnb->gso_prefix) < 0)
 		xnb->gso_prefix = 0;
 
 	if (xs_scanf(XST_NIL, otherend_path, "feature-no-csum-offload", NULL,
 		     "%hhu", &no_csum_offload) < 0)
 		no_csum_offload = 0;
 	xnb->ip_csum = (no_csum_offload == 0);
 
 	return (0);
 }
 
 /**
  * Supply information about the physical device to the frontend
  * via XenBus.
  *
  * \param xnb  Per-instance xnb configuration structure.
  */
 static int
 xnb_publish_backend_info(struct xnb_softc *xnb)
 {
 	struct xs_transaction xst;
 	const char *our_path;
 	int error;
 
 	our_path = xenbus_get_node(xnb->dev);
 
 	do {
 		error = xs_transaction_start(&xst);
 		if (error != 0) {
 			xenbus_dev_fatal(xnb->dev, error,
 					 "Error publishing backend info "
 					 "(start transaction)");
 			break;
 		}
 
 		error = xs_printf(xst, our_path, "feature-sg",
 				  "%d", XNB_SG);
 		if (error != 0)
 			break;
 
 		error = xs_printf(xst, our_path, "feature-gso-tcpv4",
 				  "%d", XNB_GSO_TCPV4);
 		if (error != 0)
 			break;
 
 		error = xs_printf(xst, our_path, "feature-rx-copy",
 				  "%d", XNB_RX_COPY);
 		if (error != 0)
 			break;
 
 		error = xs_printf(xst, our_path, "feature-rx-flip",
 				  "%d", XNB_RX_FLIP);
 		if (error != 0)
 			break;
 
 		error = xs_transaction_end(xst, 0);
 		if (error != 0 && error != EAGAIN) {
 			xenbus_dev_fatal(xnb->dev, error, "ending transaction");
 			break;
 		}
 
 	} while (error == EAGAIN);
 
 	return (error);
 }
 
 /**
  * Connect to our netfront peer now that it has completed publishing
  * its configuration into the XenStore.
  *
  * \param xnb  Per-instance xnb configuration structure.
  */
 static void
 xnb_connect(struct xnb_softc *xnb)
 {
 	int	error;
 
 	if (xenbus_get_state(xnb->dev) == XenbusStateConnected)
 		return;
 
 	if (xnb_collect_xenstore_info(xnb) != 0)
 		return;
 
 	xnb->flags &= ~XNBF_SHUTDOWN;
 
 	/* Read front end configuration. */
 
 	/* Allocate resources whose size depends on front-end configuration. */
 	error = xnb_alloc_communication_mem(xnb);
 	if (error != 0) {
 		xenbus_dev_fatal(xnb->dev, error,
 				 "Unable to allocate communication memory");
 		return;
 	}
 
 	/*
 	 * Connect communication channel.
 	 */
 	error = xnb_connect_comms(xnb);
 	if (error != 0) {
 		/* Specific errors are reported by xnb_connect_comms(). */
 		return;
 	}
 	xnb->carrier = 1;
 
 	/* Ready for I/O. */
 	xenbus_set_state(xnb->dev, XenbusStateConnected);
 }
 
 /*-------------------------- Device Teardown Support -------------------------*/
 /**
  * Perform device shutdown functions.
  *
  * \param xnb  Per-instance xnb configuration structure.
  *
  * Mark this instance as shutting down, wait for any active requests
  * to drain, disconnect from the front-end, and notify any waiters (e.g.
  * a thread invoking our detach method) that detach can now proceed.
  */
 static int
 xnb_shutdown(struct xnb_softc *xnb)
 {
 	/*
 	 * Due to the need to drop our mutex during some
 	 * xenbus operations, it is possible for two threads
 	 * to attempt to close out shutdown processing at
 	 * the same time.  Tell the caller that hits this
 	 * race to try back later.
 	 */
 	if ((xnb->flags & XNBF_IN_SHUTDOWN) != 0)
 		return (EAGAIN);
 
 	xnb->flags |= XNBF_SHUTDOWN;
 
 	xnb->flags |= XNBF_IN_SHUTDOWN;
 
 	mtx_unlock(&xnb->sc_lock);
 	/* Free the network interface */
 	xnb->carrier = 0;
 	if (xnb->xnb_ifp != NULL) {
 		ether_ifdetach(xnb->xnb_ifp);
 		if_free(xnb->xnb_ifp);
 		xnb->xnb_ifp = NULL;
 	}
 	mtx_lock(&xnb->sc_lock);
 
 	xnb_disconnect(xnb);
 
 	mtx_unlock(&xnb->sc_lock);
 	if (xenbus_get_state(xnb->dev) < XenbusStateClosing)
 		xenbus_set_state(xnb->dev, XenbusStateClosing);
 	mtx_lock(&xnb->sc_lock);
 
 	xnb->flags &= ~XNBF_IN_SHUTDOWN;
 
 
 	/* Indicate to xnb_detach() that is it safe to proceed. */
 	wakeup(xnb);
 
 	return (0);
 }
 
 /**
  * Report an attach time error to the console and Xen, and cleanup
  * this instance by forcing immediate detach processing.
  *
  * \param xnb  Per-instance xnb configuration structure.
  * \param err  Errno describing the error.
  * \param fmt  Printf style format and arguments
  */
 static void
 xnb_attach_failed(struct xnb_softc *xnb, int err, const char *fmt, ...)
 {
 	va_list ap;
 	va_list ap_hotplug;
 
 	va_start(ap, fmt);
 	va_copy(ap_hotplug, ap);
 	xs_vprintf(XST_NIL, xenbus_get_node(xnb->dev),
 		  "hotplug-error", fmt, ap_hotplug);
 	va_end(ap_hotplug);
 	xs_printf(XST_NIL, xenbus_get_node(xnb->dev),
 		  "hotplug-status", "error");
 
 	xenbus_dev_vfatal(xnb->dev, err, fmt, ap);
 	va_end(ap);
 
 	xs_printf(XST_NIL, xenbus_get_node(xnb->dev),
 		  "online", "0");
 	xnb_detach(xnb->dev);
 }
 
 /*---------------------------- NewBus Entrypoints ----------------------------*/
 /**
  * Inspect a XenBus device and claim it if is of the appropriate type.
  *
  * \param dev  NewBus device object representing a candidate XenBus device.
  *
  * \return  0 for success, errno codes for failure.
  */
 static int
 xnb_probe(device_t dev)
 {
 	 if (!strcmp(xenbus_get_type(dev), "vif")) {
 		DPRINTF("Claiming device %d, %s\n", device_get_unit(dev),
 		    devclass_get_name(device_get_devclass(dev)));
 		device_set_desc(dev, "Backend Virtual Network Device");
 		device_quiet(dev);
 		return (0);
 	}
 	return (ENXIO);
 }
 
 /**
  * Setup sysctl variables to control various Network Back parameters.
  *
  * \param xnb  Xen Net Back softc.
  *
  */
 static void
 xnb_setup_sysctl(struct xnb_softc *xnb)
 {
 	struct sysctl_ctx_list *sysctl_ctx = NULL;
 	struct sysctl_oid      *sysctl_tree = NULL;
 
 	sysctl_ctx = device_get_sysctl_ctx(xnb->dev);
 	if (sysctl_ctx == NULL)
 		return;
 
 	sysctl_tree = device_get_sysctl_tree(xnb->dev);
 	if (sysctl_tree == NULL)
 		return;
 
 #ifdef XNB_DEBUG
 	SYSCTL_ADD_PROC(sysctl_ctx,
 			SYSCTL_CHILDREN(sysctl_tree),
 			OID_AUTO,
 			"unit_test_results",
 			CTLTYPE_STRING | CTLFLAG_RD,
 			xnb,
 			0,
 			xnb_unit_test_main,
 			"A",
 			"Results of builtin unit tests");
 
 	SYSCTL_ADD_PROC(sysctl_ctx,
 			SYSCTL_CHILDREN(sysctl_tree),
 			OID_AUTO,
 			"dump_rings",
 			CTLTYPE_STRING | CTLFLAG_RD,
 			xnb,
 			0,
 			xnb_dump_rings,
 			"A",
 			"Xennet Back Rings");
 #endif /* XNB_DEBUG */
 }
 
 /**
  * Create a network device.
  * @param handle device handle
  */
 int
 create_netdev(device_t dev)
 {
 	struct ifnet *ifp;
 	struct xnb_softc *xnb;
 	int err = 0;
 	uint32_t handle;
 
 	xnb = device_get_softc(dev);
 	mtx_init(&xnb->sc_lock, "xnb_softc", "xen netback softc lock", MTX_DEF);
 	mtx_init(&xnb->tx_lock, "xnb_tx", "xen netback tx lock", MTX_DEF);
 	mtx_init(&xnb->rx_lock, "xnb_rx", "xen netback rx lock", MTX_DEF);
 
 	xnb->dev = dev;
 
 	ifmedia_init(&xnb->sc_media, 0, xnb_ifmedia_upd, xnb_ifmedia_sts);
 	ifmedia_add(&xnb->sc_media, IFM_ETHER|IFM_MANUAL, 0, NULL);
 	ifmedia_set(&xnb->sc_media, IFM_ETHER|IFM_MANUAL);
 
 	/*
 	 * Set the MAC address to a dummy value (00:00:00:00:00),
 	 * if the MAC address of the host-facing interface is set
 	 * to the same as the guest-facing one (the value found in
 	 * xenstore), the bridge would stop delivering packets to
 	 * us because it would see that the destination address of
 	 * the packet is the same as the interface, and so the bridge
 	 * would expect the packet has already been delivered locally
 	 * (and just drop it).
 	 */
 	bzero(&xnb->mac[0], sizeof(xnb->mac));
 
 	/* The interface will be named using the following nomenclature:
 	 *
 	 * xnb<domid>.<handle>
 	 *
 	 * Where handle is the oder of the interface referred to the guest.
 	 */
 	err = xs_scanf(XST_NIL, xenbus_get_node(xnb->dev), "handle", NULL,
 		       "%" PRIu32, &handle);
 	if (err != 0)
 		return (err);
 	snprintf(xnb->if_name, IFNAMSIZ, "xnb%" PRIu16 ".%" PRIu32,
 	    xenbus_get_otherend_id(dev), handle);
 
 	if (err == 0) {
 		/* Set up ifnet structure */
 		ifp = xnb->xnb_ifp = if_alloc(IFT_ETHER);
 		ifp->if_softc = xnb;
 		if_initname(ifp, xnb->if_name,  IF_DUNIT_NONE);
 		ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
 		ifp->if_ioctl = xnb_ioctl;
 		ifp->if_output = ether_output;
 		ifp->if_start = xnb_start;
 #ifdef notyet
 		ifp->if_watchdog = xnb_watchdog;
 #endif
 		ifp->if_init = xnb_ifinit;
 		ifp->if_mtu = ETHERMTU;
 		ifp->if_snd.ifq_maxlen = NET_RX_RING_SIZE - 1;
 
 		ifp->if_hwassist = XNB_CSUM_FEATURES;
 		ifp->if_capabilities = IFCAP_HWCSUM;
 		ifp->if_capenable = IFCAP_HWCSUM;
 
 		ether_ifattach(ifp, xnb->mac);
 		xnb->carrier = 0;
 	}
 
 	return err;
 }
 
 /**
  * Attach to a XenBus device that has been claimed by our probe routine.
  *
  * \param dev  NewBus device object representing this Xen Net Back instance.
  *
  * \return  0 for success, errno codes for failure.
  */
 static int
 xnb_attach(device_t dev)
 {
 	struct xnb_softc *xnb;
 	int	error;
 	xnb_ring_type_t	i;
 
 	error = create_netdev(dev);
 	if (error != 0) {
 		xenbus_dev_fatal(dev, error, "creating netdev");
 		return (error);
 	}
 
 	DPRINTF("Attaching to %s\n", xenbus_get_node(dev));
 
 	/*
 	 * Basic initialization.
 	 * After this block it is safe to call xnb_detach()
 	 * to clean up any allocated data for this instance.
 	 */
 	xnb = device_get_softc(dev);
 	xnb->otherend_id = xenbus_get_otherend_id(dev);
 	for (i=0; i < XNB_NUM_RING_TYPES; i++) {
 		xnb->ring_configs[i].ring_pages = 1;
 	}
 
 	/*
 	 * Setup sysctl variables.
 	 */
 	xnb_setup_sysctl(xnb);
 
 	/* Update hot-plug status to satisfy xend. */
 	error = xs_printf(XST_NIL, xenbus_get_node(xnb->dev),
 			  "hotplug-status", "connected");
 	if (error != 0) {
 		xnb_attach_failed(xnb, error, "writing %s/hotplug-status",
 				  xenbus_get_node(xnb->dev));
 		return (error);
 	}
 
 	if ((error = xnb_publish_backend_info(xnb)) != 0) {
 		/*
 		 * If we can't publish our data, we cannot participate
 		 * in this connection, and waiting for a front-end state
 		 * change will not help the situation.
 		 */
 		xnb_attach_failed(xnb, error,
 		    "Publishing backend status for %s",
 				  xenbus_get_node(xnb->dev));
 		return error;
 	}
 
 	/* Tell the front end that we are ready to connect. */
 	xenbus_set_state(dev, XenbusStateInitWait);
 
 	return (0);
 }
 
 /**
  * Detach from a net back device instance.
  *
  * \param dev  NewBus device object representing this Xen Net Back instance.
  *
  * \return  0 for success, errno codes for failure.
  *
  * \note A net back device may be detached at any time in its life-cycle,
  *       including part way through the attach process.  For this reason,
  *       initialization order and the intialization state checks in this
  *       routine must be carefully coupled so that attach time failures
  *       are gracefully handled.
  */
 static int
 xnb_detach(device_t dev)
 {
 	struct xnb_softc *xnb;
 
 	DPRINTF("\n");
 
 	xnb = device_get_softc(dev);
 	mtx_lock(&xnb->sc_lock);
 	while (xnb_shutdown(xnb) == EAGAIN) {
 		msleep(xnb, &xnb->sc_lock, /*wakeup prio unchanged*/0,
 		       "xnb_shutdown", 0);
 	}
 	mtx_unlock(&xnb->sc_lock);
 	DPRINTF("\n");
 
 	mtx_destroy(&xnb->tx_lock);
 	mtx_destroy(&xnb->rx_lock);
 	mtx_destroy(&xnb->sc_lock);
 	return (0);
 }
 
 /**
  * Prepare this net back device for suspension of this VM.
  *
  * \param dev  NewBus device object representing this Xen net Back instance.
  *
  * \return  0 for success, errno codes for failure.
  */
 static int
 xnb_suspend(device_t dev)
 {
 	return (0);
 }
 
 /**
  * Perform any processing required to recover from a suspended state.
  *
  * \param dev  NewBus device object representing this Xen Net Back instance.
  *
  * \return  0 for success, errno codes for failure.
  */
 static int
 xnb_resume(device_t dev)
 {
 	return (0);
 }
 
 /**
  * Handle state changes expressed via the XenStore by our front-end peer.
  *
  * \param dev             NewBus device object representing this Xen
  *                        Net Back instance.
  * \param frontend_state  The new state of the front-end.
  *
  * \return  0 for success, errno codes for failure.
  */
 static void
 xnb_frontend_changed(device_t dev, XenbusState frontend_state)
 {
 	struct xnb_softc *xnb;
 
 	xnb = device_get_softc(dev);
 
 	DPRINTF("frontend_state=%s, xnb_state=%s\n",
 	        xenbus_strstate(frontend_state),
 		xenbus_strstate(xenbus_get_state(xnb->dev)));
 
 	switch (frontend_state) {
 	case XenbusStateInitialising:
 		break;
 	case XenbusStateInitialised:
 	case XenbusStateConnected:
 		xnb_connect(xnb);
 		break;
 	case XenbusStateClosing:
 	case XenbusStateClosed:
 		mtx_lock(&xnb->sc_lock);
 		xnb_shutdown(xnb);
 		mtx_unlock(&xnb->sc_lock);
 		if (frontend_state == XenbusStateClosed)
 			xenbus_set_state(xnb->dev, XenbusStateClosed);
 		break;
 	default:
 		xenbus_dev_fatal(xnb->dev, EINVAL, "saw state %d at frontend",
 				 frontend_state);
 		break;
 	}
 }
 
 
 /*---------------------------- Request Processing ----------------------------*/
 /**
  * Interrupt handler bound to the shared ring's event channel.
  * Entry point for the xennet transmit path in netback
  * Transfers packets from the Xen ring to the host's generic networking stack
  *
  * \param arg  Callback argument registerd during event channel
  *             binding - the xnb_softc for this instance.
  */
 static void
 xnb_intr(void *arg)
 {
 	struct xnb_softc *xnb;
 	struct ifnet *ifp;
 	netif_tx_back_ring_t *txb;
 	RING_IDX req_prod_local;
 
 	xnb = (struct xnb_softc *)arg;
 	ifp = xnb->xnb_ifp;
 	txb = &xnb->ring_configs[XNB_RING_TYPE_TX].back_ring.tx_ring;
 
 	mtx_lock(&xnb->tx_lock);
 	do {
 		int notify;
 		req_prod_local = txb->sring->req_prod;
 		xen_rmb();
 
 		for (;;) {
 			struct mbuf *mbufc;
 			int err;
 
 			err = xnb_recv(txb, xnb->otherend_id, &mbufc, ifp,
 			    	       xnb->tx_gnttab);
 			if (err || (mbufc == NULL))
 				break;
 
 			/* Send the packet to the generic network stack */
 			(*xnb->xnb_ifp->if_input)(xnb->xnb_ifp, mbufc);
 		}
 
 		RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(txb, notify);
 		if (notify != 0)
 			xen_intr_signal(xnb->xen_intr_handle);
 
 		txb->sring->req_event = txb->req_cons + 1;
 		xen_mb();
 	} while (txb->sring->req_prod != req_prod_local) ;
 	mtx_unlock(&xnb->tx_lock);
 
 	xnb_start(ifp);
 }
 
 
 /**
  * Build a struct xnb_pkt based on netif_tx_request's from a netif tx ring.
  * Will read exactly 0 or 1 packets from the ring; never a partial packet.
  * \param[out]	pkt	The returned packet.  If there is an error building
  * 			the packet, pkt.list_len will be set to 0.
  * \param[in]	tx_ring	Pointer to the Ring that is the input to this function
  * \param[in]	start	The ring index of the first potential request
  * \return		The number of requests consumed to build this packet
  */
 static int
 xnb_ring2pkt(struct xnb_pkt *pkt, const netif_tx_back_ring_t *tx_ring,
 	     RING_IDX start)
 {
 	/*
 	 * Outline:
 	 * 1) Initialize pkt
 	 * 2) Read the first request of the packet
 	 * 3) Read the extras
 	 * 4) Set cdr
 	 * 5) Loop on the remainder of the packet
 	 * 6) Finalize pkt (stuff like car_size and list_len)
 	 */
 	int idx = start;
 	int discard = 0;	/* whether to discard the packet */
 	int more_data = 0;	/* there are more request past the last one */
 	uint16_t cdr_size = 0;	/* accumulated size of requests 2 through n */
 
 	xnb_pkt_initialize(pkt);
 
 	/* Read the first request */
 	if (RING_HAS_UNCONSUMED_REQUESTS_2(tx_ring, idx)) {
 		netif_tx_request_t *tx = RING_GET_REQUEST(tx_ring, idx);
 		pkt->size = tx->size;
 		pkt->flags = tx->flags & ~NETTXF_more_data;
 		more_data = tx->flags & NETTXF_more_data;
 		pkt->list_len++;
 		pkt->car = idx;
 		idx++;
 	}
 
 	/* Read the extra info */
 	if ((pkt->flags & NETTXF_extra_info) &&
 	    RING_HAS_UNCONSUMED_REQUESTS_2(tx_ring, idx)) {
 		netif_extra_info_t *ext =
 		    (netif_extra_info_t*) RING_GET_REQUEST(tx_ring, idx);
 		pkt->extra.type = ext->type;
 		switch (pkt->extra.type) {
 			case XEN_NETIF_EXTRA_TYPE_GSO:
 				pkt->extra.u.gso = ext->u.gso;
 				break;
 			default:
 				/*
 				 * The reference Linux netfront driver will
 				 * never set any other extra.type.  So we don't
 				 * know what to do with it.  Let's print an
 				 * error, then consume and discard the packet
 				 */
 				printf("xnb(%s:%d): Unknown extra info type %d."
 				       "  Discarding packet\n",
 				       __func__, __LINE__, pkt->extra.type);
 				xnb_dump_txreq(start, RING_GET_REQUEST(tx_ring,
 				    start));
 				xnb_dump_txreq(idx, RING_GET_REQUEST(tx_ring,
 				    idx));
 				discard = 1;
 				break;
 		}
 
 		pkt->extra.flags = ext->flags;
 		if (ext->flags & XEN_NETIF_EXTRA_FLAG_MORE) {
 			/*
 			 * The reference linux netfront driver never sets this
 			 * flag (nor does any other known netfront).  So we
 			 * will discard the packet.
 			 */
 			printf("xnb(%s:%d): Request sets "
 			    "XEN_NETIF_EXTRA_FLAG_MORE, but we can't handle "
 			    "that\n", __func__, __LINE__);
 			xnb_dump_txreq(start, RING_GET_REQUEST(tx_ring, start));
 			xnb_dump_txreq(idx, RING_GET_REQUEST(tx_ring, idx));
 			discard = 1;
 		}
 
 		idx++;
 	}
 
 	/* Set cdr.  If there is not more data, cdr is invalid */
 	pkt->cdr = idx;
 
 	/* Loop on remainder of packet */
 	while (more_data && RING_HAS_UNCONSUMED_REQUESTS_2(tx_ring, idx)) {
 		netif_tx_request_t *tx = RING_GET_REQUEST(tx_ring, idx);
 		pkt->list_len++;
 		cdr_size += tx->size;
 		if (tx->flags & ~NETTXF_more_data) {
 			/* There should be no other flags set at this point */
 			printf("xnb(%s:%d): Request sets unknown flags %d "
 			    "after the 1st request in the packet.\n",
 			    __func__, __LINE__, tx->flags);
 			xnb_dump_txreq(start, RING_GET_REQUEST(tx_ring, start));
 			xnb_dump_txreq(idx, RING_GET_REQUEST(tx_ring, idx));
 		}
 
 		more_data = tx->flags & NETTXF_more_data;
 		idx++;
 	}
 
 	/* Finalize packet */
 	if (more_data != 0) {
 		/* The ring ran out of requests before finishing the packet */
 		xnb_pkt_invalidate(pkt);
 		idx = start;	/* tell caller that we consumed no requests */
 	} else {
 		/* Calculate car_size */
 		pkt->car_size = pkt->size - cdr_size;
 	}
 	if (discard != 0) {
 		xnb_pkt_invalidate(pkt);
 	}
 
 	return idx - start;
 }
 
 
 /**
  * Respond to all the requests that constituted pkt.  Builds the responses and
  * writes them to the ring, but doesn't push them to the shared ring.
  * \param[in] pkt	the packet that needs a response
  * \param[in] error	true if there was an error handling the packet, such
  * 			as in the hypervisor copy op or mbuf allocation
  * \param[out] ring	Responses go here
  */
 static void
 xnb_txpkt2rsp(const struct xnb_pkt *pkt, netif_tx_back_ring_t *ring,
 	      int error)
 {
 	/*
 	 * Outline:
 	 * 1) Respond to the first request
 	 * 2) Respond to the extra info reques
 	 * Loop through every remaining request in the packet, generating
 	 * responses that copy those requests' ids and sets the status
 	 * appropriately.
 	 */
 	netif_tx_request_t *tx;
 	netif_tx_response_t *rsp;
 	int i;
 	uint16_t status;
 
 	status = (xnb_pkt_is_valid(pkt) == 0) || error ?
 		NETIF_RSP_ERROR : NETIF_RSP_OKAY;
 	KASSERT((pkt->list_len == 0) || (ring->rsp_prod_pvt == pkt->car),
 	    ("Cannot respond to ring requests out of order"));
 
 	if (pkt->list_len >= 1) {
 		uint16_t id;
 		tx = RING_GET_REQUEST(ring, ring->rsp_prod_pvt);
 		id = tx->id;
 		rsp = RING_GET_RESPONSE(ring, ring->rsp_prod_pvt);
 		rsp->id = id;
 		rsp->status = status;
 		ring->rsp_prod_pvt++;
 
 		if (pkt->flags & NETRXF_extra_info) {
 			rsp = RING_GET_RESPONSE(ring, ring->rsp_prod_pvt);
 			rsp->status = NETIF_RSP_NULL;
 			ring->rsp_prod_pvt++;
 		}
 	}
 
 	for (i=0; i < pkt->list_len - 1; i++) {
 		uint16_t id;
 		tx = RING_GET_REQUEST(ring, ring->rsp_prod_pvt);
 		id = tx->id;
 		rsp = RING_GET_RESPONSE(ring, ring->rsp_prod_pvt);
 		rsp->id = id;
 		rsp->status = status;
 		ring->rsp_prod_pvt++;
 	}
 }
 
 /**
  * Create an mbuf chain to represent a packet.  Initializes all of the headers
  * in the mbuf chain, but does not copy the data.  The returned chain must be
  * free()'d when no longer needed
  * \param[in]	pkt	A packet to model the mbuf chain after
  * \return	A newly allocated mbuf chain, possibly with clusters attached.
  * 		NULL on failure
  */
 static struct mbuf*
 xnb_pkt2mbufc(const struct xnb_pkt *pkt, struct ifnet *ifp)
 {
 	/**
 	 * \todo consider using a memory pool for mbufs instead of
 	 * reallocating them for every packet
 	 */
 	/** \todo handle extra data */
 	struct mbuf *m;
 
 	m = m_getm(NULL, pkt->size, M_NOWAIT, MT_DATA);
 
 	if (m != NULL) {
 		m->m_pkthdr.rcvif = ifp;
 		if (pkt->flags & NETTXF_data_validated) {
 			/*
 			 * We lie to the host OS and always tell it that the
 			 * checksums are ok, because the packet is unlikely to
 			 * get corrupted going across domains.
 			 */
 			m->m_pkthdr.csum_flags = (
 				CSUM_IP_CHECKED |
 				CSUM_IP_VALID   |
 				CSUM_DATA_VALID |
 				CSUM_PSEUDO_HDR
 				);
 			m->m_pkthdr.csum_data = 0xffff;
 		}
 	}
 	return m;
 }
 
 /**
  * Build a gnttab_copy table that can be used to copy data from a pkt
  * to an mbufc.  Does not actually perform the copy.  Always uses gref's on
  * the packet side.
  * \param[in]	pkt	pkt's associated requests form the src for
  * 			the copy operation
  * \param[in]	mbufc	mbufc's storage forms the dest for the copy operation
  * \param[out]  gnttab	Storage for the returned grant table
  * \param[in]	txb	Pointer to the backend ring structure
  * \param[in]	otherend_id	The domain ID of the other end of the copy
  * \return 		The number of gnttab entries filled
  */
 static int
 xnb_txpkt2gnttab(const struct xnb_pkt *pkt, const struct mbuf *mbufc,
 		 gnttab_copy_table gnttab, const netif_tx_back_ring_t *txb,
 		 domid_t otherend_id)
 {
 
 	const struct mbuf *mbuf = mbufc;/* current mbuf within the chain */
 	int gnt_idx = 0;		/* index into grant table */
 	RING_IDX r_idx = pkt->car;	/* index into tx ring buffer */
 	int r_ofs = 0;	/* offset of next data within tx request's data area */
 	int m_ofs = 0;	/* offset of next data within mbuf's data area */
 	/* size in bytes that still needs to be represented in the table */
 	uint16_t size_remaining = pkt->size;
 
 	while (size_remaining > 0) {
 		const netif_tx_request_t *txq = RING_GET_REQUEST(txb, r_idx);
 		const size_t mbuf_space = M_TRAILINGSPACE(mbuf) - m_ofs;
 		const size_t req_size =
 			r_idx == pkt->car ? pkt->car_size : txq->size;
 		const size_t pkt_space = req_size - r_ofs;
 		/*
 		 * space is the largest amount of data that can be copied in the
 		 * grant table's next entry
 		 */
 		const size_t space = MIN(pkt_space, mbuf_space);
 
 		/* TODO: handle this error condition without panicking */
 		KASSERT(gnt_idx < GNTTAB_LEN, ("Grant table is too short"));
 
 		gnttab[gnt_idx].source.u.ref = txq->gref;
 		gnttab[gnt_idx].source.domid = otherend_id;
 		gnttab[gnt_idx].source.offset = txq->offset + r_ofs;
 		gnttab[gnt_idx].dest.u.gmfn = virt_to_mfn(
 		    mtod(mbuf, vm_offset_t) + m_ofs);
 		gnttab[gnt_idx].dest.offset = virt_to_offset(
 		    mtod(mbuf, vm_offset_t) + m_ofs);
 		gnttab[gnt_idx].dest.domid = DOMID_SELF;
 		gnttab[gnt_idx].len = space;
 		gnttab[gnt_idx].flags = GNTCOPY_source_gref;
 
 		gnt_idx++;
 		r_ofs += space;
 		m_ofs += space;
 		size_remaining -= space;
 		if (req_size - r_ofs <= 0) {
 			/* Must move to the next tx request */
 			r_ofs = 0;
 			r_idx = (r_idx == pkt->car) ? pkt->cdr : r_idx + 1;
 		}
 		if (M_TRAILINGSPACE(mbuf) - m_ofs <= 0) {
 			/* Must move to the next mbuf */
 			m_ofs = 0;
 			mbuf = mbuf->m_next;
 		}
 	}
 
 	return gnt_idx;
 }
 
 /**
  * Check the status of the grant copy operations, and update mbufs various
  * non-data fields to reflect the data present.
  * \param[in,out] mbufc	mbuf chain to update.  The chain must be valid and of
  * 			the correct length, and data should already be present
  * \param[in] gnttab	A grant table for a just completed copy op
  * \param[in] n_entries The number of valid entries in the grant table
  */
 static void
 xnb_update_mbufc(struct mbuf *mbufc, const gnttab_copy_table gnttab,
     		 int n_entries)
 {
 	struct mbuf *mbuf = mbufc;
 	int i;
 	size_t total_size = 0;
 
 	for (i = 0; i < n_entries; i++) {
 		KASSERT(gnttab[i].status == GNTST_okay,
 		    ("Some gnttab_copy entry had error status %hd\n",
 		    gnttab[i].status));
 
 		mbuf->m_len += gnttab[i].len;
 		total_size += gnttab[i].len;
 		if (M_TRAILINGSPACE(mbuf) <= 0) {
 			mbuf = mbuf->m_next;
 		}
 	}
 	mbufc->m_pkthdr.len = total_size;
 
 #if defined(INET) || defined(INET6)
 	xnb_add_mbuf_cksum(mbufc);
 #endif
 }
 
 /**
  * Dequeue at most one packet from the shared ring
  * \param[in,out] txb	Netif tx ring.  A packet will be removed from it, and
  * 			its private indices will be updated.  But the indices
  * 			will not be pushed to the shared ring.
  * \param[in] ifnet	Interface to which the packet will be sent
  * \param[in] otherend	Domain ID of the other end of the ring
  * \param[out] mbufc	The assembled mbuf chain, ready to send to the generic
  * 			networking stack
  * \param[in,out] gnttab Pointer to enough memory for a grant table.  We make
  * 			this a function parameter so that we will take less
  * 			stack space.
  * \return		An error code
  */
 static int
 xnb_recv(netif_tx_back_ring_t *txb, domid_t otherend, struct mbuf **mbufc,
 	 struct ifnet *ifnet, gnttab_copy_table gnttab)
 {
 	struct xnb_pkt pkt;
 	/* number of tx requests consumed to build the last packet */
 	int num_consumed;
 	int nr_ents;
 
 	*mbufc = NULL;
 	num_consumed = xnb_ring2pkt(&pkt, txb, txb->req_cons);
 	if (num_consumed == 0)
 		return 0;	/* Nothing to receive */
 
 	/* update statistics independent of errors */
 	if_inc_counter(ifnet, IFCOUNTER_IPACKETS, 1);
 
 	/*
 	 * if we got here, then 1 or more requests was consumed, but the packet
 	 * is not necessarily valid.
 	 */
 	if (xnb_pkt_is_valid(&pkt) == 0) {
 		/* got a garbage packet, respond and drop it */
 		xnb_txpkt2rsp(&pkt, txb, 1);
 		txb->req_cons += num_consumed;
 		DPRINTF("xnb_intr: garbage packet, num_consumed=%d\n",
 				num_consumed);
 		if_inc_counter(ifnet, IFCOUNTER_IERRORS, 1);
 		return EINVAL;
 	}
 
 	*mbufc = xnb_pkt2mbufc(&pkt, ifnet);
 
 	if (*mbufc == NULL) {
 		/*
 		 * Couldn't allocate mbufs.  Respond and drop the packet.  Do
 		 * not consume the requests
 		 */
 		xnb_txpkt2rsp(&pkt, txb, 1);
 		DPRINTF("xnb_intr: Couldn't allocate mbufs, num_consumed=%d\n",
 		    num_consumed);
 		if_inc_counter(ifnet, IFCOUNTER_IQDROPS, 1);
 		return ENOMEM;
 	}
 
 	nr_ents = xnb_txpkt2gnttab(&pkt, *mbufc, gnttab, txb, otherend);
 
 	if (nr_ents > 0) {
 		int __unused hv_ret = HYPERVISOR_grant_table_op(GNTTABOP_copy,
 		    gnttab, nr_ents);
 		KASSERT(hv_ret == 0,
 		    ("HYPERVISOR_grant_table_op returned %d\n", hv_ret));
 		xnb_update_mbufc(*mbufc, gnttab, nr_ents);
 	}
 
 	xnb_txpkt2rsp(&pkt, txb, 0);
 	txb->req_cons += num_consumed;
 	return 0;
 }
 
 /**
  * Create an xnb_pkt based on the contents of an mbuf chain.
  * \param[in] mbufc	mbuf chain to transform into a packet
  * \param[out] pkt	Storage for the newly generated xnb_pkt
  * \param[in] start	The ring index of the first available slot in the rx
  * 			ring
  * \param[in] space	The number of free slots in the rx ring
  * \retval 0		Success
  * \retval EINVAL	mbufc was corrupt or not convertible into a pkt
  * \retval EAGAIN	There was not enough space in the ring to queue the
  * 			packet
  */
 static int
 xnb_mbufc2pkt(const struct mbuf *mbufc, struct xnb_pkt *pkt,
 	      RING_IDX start, int space)
 {
 
 	int retval = 0;
 
 	if ((mbufc == NULL) ||
 	     ( (mbufc->m_flags & M_PKTHDR) == 0) ||
 	     (mbufc->m_pkthdr.len == 0)) {
 		xnb_pkt_invalidate(pkt);
 		retval = EINVAL;
 	} else {
 		int slots_required;
 
 		xnb_pkt_validate(pkt);
 		pkt->flags = 0;
 		pkt->size = mbufc->m_pkthdr.len;
 		pkt->car = start;
 		pkt->car_size = mbufc->m_len;
 
 		if (mbufc->m_pkthdr.csum_flags & CSUM_TSO) {
 			pkt->flags |= NETRXF_extra_info;
 			pkt->extra.u.gso.size = mbufc->m_pkthdr.tso_segsz;
 			pkt->extra.u.gso.type = XEN_NETIF_GSO_TYPE_TCPV4;
 			pkt->extra.u.gso.pad = 0;
 			pkt->extra.u.gso.features = 0;
 			pkt->extra.type = XEN_NETIF_EXTRA_TYPE_GSO;
 			pkt->extra.flags = 0;
 			pkt->cdr = start + 2;
 		} else {
 			pkt->cdr = start + 1;
 		}
 		if (mbufc->m_pkthdr.csum_flags & (CSUM_TSO | CSUM_DELAY_DATA)) {
 			pkt->flags |=
 			    (NETRXF_csum_blank | NETRXF_data_validated);
 		}
 
 		/*
 		 * Each ring response can have up to PAGE_SIZE of data.
 		 * Assume that we can defragment the mbuf chain efficiently
 		 * into responses so that each response but the last uses all
 		 * PAGE_SIZE bytes.
 		 */
 		pkt->list_len = (pkt->size + PAGE_SIZE - 1) / PAGE_SIZE;
 
 		if (pkt->list_len > 1) {
 			pkt->flags |= NETRXF_more_data;
 		}
 
 		slots_required = pkt->list_len +
 			(pkt->flags & NETRXF_extra_info ? 1 : 0);
 		if (slots_required > space) {
 			xnb_pkt_invalidate(pkt);
 			retval = EAGAIN;
 		}
 	}
 
 	return retval;
 }
 
 /**
  * Build a gnttab_copy table that can be used to copy data from an mbuf chain
  * to the frontend's shared buffers.  Does not actually perform the copy.
  * Always uses gref's on the other end's side.
  * \param[in]	pkt	pkt's associated responses form the dest for the copy
  * 			operatoin
  * \param[in]	mbufc	The source for the copy operation
  * \param[out]	gnttab	Storage for the returned grant table
  * \param[in]	rxb	Pointer to the backend ring structure
  * \param[in]	otherend_id	The domain ID of the other end of the copy
  * \return 		The number of gnttab entries filled
  */
 static int
 xnb_rxpkt2gnttab(const struct xnb_pkt *pkt, const struct mbuf *mbufc,
 		 gnttab_copy_table gnttab, const netif_rx_back_ring_t *rxb,
 		 domid_t otherend_id)
 {
 
 	const struct mbuf *mbuf = mbufc;/* current mbuf within the chain */
 	int gnt_idx = 0;		/* index into grant table */
 	RING_IDX r_idx = pkt->car;	/* index into rx ring buffer */
 	int r_ofs = 0;	/* offset of next data within rx request's data area */
 	int m_ofs = 0;	/* offset of next data within mbuf's data area */
 	/* size in bytes that still needs to be represented in the table */
 	uint16_t size_remaining;
 
 	size_remaining = (xnb_pkt_is_valid(pkt) != 0) ? pkt->size : 0;
 
 	while (size_remaining > 0) {
 		const netif_rx_request_t *rxq = RING_GET_REQUEST(rxb, r_idx);
 		const size_t mbuf_space = mbuf->m_len - m_ofs;
 		/* Xen shared pages have an implied size of PAGE_SIZE */
 		const size_t req_size = PAGE_SIZE;
 		const size_t pkt_space = req_size - r_ofs;
 		/*
 		 * space is the largest amount of data that can be copied in the
 		 * grant table's next entry
 		 */
 		const size_t space = MIN(pkt_space, mbuf_space);
 
 		/* TODO: handle this error condition without panicing */
 		KASSERT(gnt_idx < GNTTAB_LEN, ("Grant table is too short"));
 
 		gnttab[gnt_idx].dest.u.ref = rxq->gref;
 		gnttab[gnt_idx].dest.domid = otherend_id;
 		gnttab[gnt_idx].dest.offset = r_ofs;
 		gnttab[gnt_idx].source.u.gmfn = virt_to_mfn(
 		    mtod(mbuf, vm_offset_t) + m_ofs);
 		gnttab[gnt_idx].source.offset = virt_to_offset(
 		    mtod(mbuf, vm_offset_t) + m_ofs);
 		gnttab[gnt_idx].source.domid = DOMID_SELF;
 		gnttab[gnt_idx].len = space;
 		gnttab[gnt_idx].flags = GNTCOPY_dest_gref;
 
 		gnt_idx++;
 
 		r_ofs += space;
 		m_ofs += space;
 		size_remaining -= space;
 		if (req_size - r_ofs <= 0) {
 			/* Must move to the next rx request */
 			r_ofs = 0;
 			r_idx = (r_idx == pkt->car) ? pkt->cdr : r_idx + 1;
 		}
 		if (mbuf->m_len - m_ofs <= 0) {
 			/* Must move to the next mbuf */
 			m_ofs = 0;
 			mbuf = mbuf->m_next;
 		}
 	}
 
 	return gnt_idx;
 }
 
 /**
  * Generates responses for all the requests that constituted pkt.  Builds
  * responses and writes them to the ring, but doesn't push the shared ring
  * indices.
  * \param[in] pkt	the packet that needs a response
  * \param[in] gnttab	The grant copy table corresponding to this packet.
  * 			Used to determine how many rsp->netif_rx_response_t's to
  * 			generate.
  * \param[in] n_entries	Number of relevant entries in the grant table
  * \param[out] ring	Responses go here
  * \return		The number of RX requests that were consumed to generate
  * 			the responses
  */
 static int
 xnb_rxpkt2rsp(const struct xnb_pkt *pkt, const gnttab_copy_table gnttab,
     	      int n_entries, netif_rx_back_ring_t *ring)
 {
 	/*
 	 * This code makes the following assumptions:
 	 *	* All entries in gnttab set GNTCOPY_dest_gref
 	 *	* The entries in gnttab are grouped by their grefs: any two
 	 *	   entries with the same gref must be adjacent
 	 */
 	int error = 0;
 	int gnt_idx, i;
 	int n_responses = 0;
 	grant_ref_t last_gref = GRANT_REF_INVALID;
 	RING_IDX r_idx;
 
 	KASSERT(gnttab != NULL, ("Received a null granttable copy"));
 
 	/*
 	 * In the event of an error, we only need to send one response to the
 	 * netfront.  In that case, we musn't write any data to the responses
 	 * after the one we send.  So we must loop all the way through gnttab
 	 * looking for errors before we generate any responses
 	 *
 	 * Since we're looping through the grant table anyway, we'll count the
 	 * number of different gref's in it, which will tell us how many
 	 * responses to generate
 	 */
 	for (gnt_idx = 0; gnt_idx < n_entries; gnt_idx++) {
 		int16_t status = gnttab[gnt_idx].status;
 		if (status != GNTST_okay) {
 			DPRINTF(
 			    "Got error %d for hypervisor gnttab_copy status\n",
 			    status);
 			error = 1;
 			break;
 		}
 		if (gnttab[gnt_idx].dest.u.ref != last_gref) {
 			n_responses++;
 			last_gref = gnttab[gnt_idx].dest.u.ref;
 		}
 	}
 
 	if (error != 0) {
 		uint16_t id;
 		netif_rx_response_t *rsp;
 		
 		id = RING_GET_REQUEST(ring, ring->rsp_prod_pvt)->id;
 		rsp = RING_GET_RESPONSE(ring, ring->rsp_prod_pvt);
 		rsp->id = id;
 		rsp->status = NETIF_RSP_ERROR;
 		n_responses = 1;
 	} else {
 		gnt_idx = 0;
 		const int has_extra = pkt->flags & NETRXF_extra_info;
 		if (has_extra != 0)
 			n_responses++;
 
 		for (i = 0; i < n_responses; i++) {
 			netif_rx_request_t rxq;
 			netif_rx_response_t *rsp;
 
 			r_idx = ring->rsp_prod_pvt + i;
 			/*
 			 * We copy the structure of rxq instead of making a
 			 * pointer because it shares the same memory as rsp.
 			 */
 			rxq = *(RING_GET_REQUEST(ring, r_idx));
 			rsp = RING_GET_RESPONSE(ring, r_idx);
 			if (has_extra && (i == 1)) {
 				netif_extra_info_t *ext =
 					(netif_extra_info_t*)rsp;
 				ext->type = XEN_NETIF_EXTRA_TYPE_GSO;
 				ext->flags = 0;
 				ext->u.gso.size = pkt->extra.u.gso.size;
 				ext->u.gso.type = XEN_NETIF_GSO_TYPE_TCPV4;
 				ext->u.gso.pad = 0;
 				ext->u.gso.features = 0;
 			} else {
 				rsp->id = rxq.id;
 				rsp->status = GNTST_okay;
 				rsp->offset = 0;
 				rsp->flags = 0;
 				if (i < pkt->list_len - 1)
 					rsp->flags |= NETRXF_more_data;
 				if ((i == 0) && has_extra)
 					rsp->flags |= NETRXF_extra_info;
 				if ((i == 0) &&
 					(pkt->flags & NETRXF_data_validated)) {
 					rsp->flags |= NETRXF_data_validated;
 					rsp->flags |= NETRXF_csum_blank;
 				}
 				rsp->status = 0;
 				for (; gnttab[gnt_idx].dest.u.ref == rxq.gref;
 				    gnt_idx++) {
 					rsp->status += gnttab[gnt_idx].len;
 				}
 			}
 		}
 	}
 
 	ring->req_cons += n_responses;
 	ring->rsp_prod_pvt += n_responses;
 	return n_responses;
 }
 
 #if defined(INET) || defined(INET6)
 /**
  * Add IP, TCP, and/or UDP checksums to every mbuf in a chain.  The first mbuf
  * in the chain must start with a struct ether_header.
  *
  * XXX This function will perform incorrectly on UDP packets that are split up
  * into multiple ethernet frames.
  */
 static void
 xnb_add_mbuf_cksum(struct mbuf *mbufc)
 {
 	struct ether_header *eh;
 	struct ip *iph;
 	uint16_t ether_type;
 
 	eh = mtod(mbufc, struct ether_header*);
 	ether_type = ntohs(eh->ether_type);
 	if (ether_type != ETHERTYPE_IP) {
 		/* Nothing to calculate */
 		return;
 	}
 
 	iph = (struct ip*)(eh + 1);
 	if (mbufc->m_pkthdr.csum_flags & CSUM_IP_VALID) {
 		iph->ip_sum = 0;
 		iph->ip_sum = in_cksum_hdr(iph);
 	}
 
 	switch (iph->ip_p) {
 	case IPPROTO_TCP:
 		if (mbufc->m_pkthdr.csum_flags & CSUM_IP_VALID) {
 			size_t tcplen = ntohs(iph->ip_len) - sizeof(struct ip);
 			struct tcphdr *th = (struct tcphdr*)(iph + 1);
 			th->th_sum = in_pseudo(iph->ip_src.s_addr,
 			    iph->ip_dst.s_addr, htons(IPPROTO_TCP + tcplen));
 			th->th_sum = in_cksum_skip(mbufc,
 			    sizeof(struct ether_header) + ntohs(iph->ip_len),
 			    sizeof(struct ether_header) + (iph->ip_hl << 2));
 		}
 		break;
 	case IPPROTO_UDP:
 		if (mbufc->m_pkthdr.csum_flags & CSUM_IP_VALID) {
 			size_t udplen = ntohs(iph->ip_len) - sizeof(struct ip);
 			struct udphdr *uh = (struct udphdr*)(iph + 1);
 			uh->uh_sum = in_pseudo(iph->ip_src.s_addr,
 			    iph->ip_dst.s_addr, htons(IPPROTO_UDP + udplen));
 			uh->uh_sum = in_cksum_skip(mbufc,
 			    sizeof(struct ether_header) + ntohs(iph->ip_len),
 			    sizeof(struct ether_header) + (iph->ip_hl << 2));
 		}
 		break;
 	default:
 		break;
 	}
 }
 #endif /* INET || INET6 */
 
 static void
 xnb_stop(struct xnb_softc *xnb)
 {
 	struct ifnet *ifp;
 
 	mtx_assert(&xnb->sc_lock, MA_OWNED);
 	ifp = xnb->xnb_ifp;
 	ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
 	if_link_state_change(ifp, LINK_STATE_DOWN);
 }
 
 static int
 xnb_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
 {
 	struct xnb_softc *xnb = ifp->if_softc;
 	struct ifreq *ifr = (struct ifreq*) data;
 #ifdef INET
 	struct ifaddr *ifa = (struct ifaddr*)data;
 #endif
 	int error = 0;
 
 	switch (cmd) {
 		case SIOCSIFFLAGS:
 			mtx_lock(&xnb->sc_lock);
 			if (ifp->if_flags & IFF_UP) {
 				xnb_ifinit_locked(xnb);
 			} else {
 				if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
 					xnb_stop(xnb);
 				}
 			}
 			/*
 			 * Note: netfront sets a variable named xn_if_flags
 			 * here, but that variable is never read
 			 */
 			mtx_unlock(&xnb->sc_lock);
 			break;
 		case SIOCSIFADDR:
 #ifdef INET
 			mtx_lock(&xnb->sc_lock);
 			if (ifa->ifa_addr->sa_family == AF_INET) {
 				ifp->if_flags |= IFF_UP;
 				if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
 					ifp->if_drv_flags &= ~(IFF_DRV_RUNNING |
 							IFF_DRV_OACTIVE);
 					if_link_state_change(ifp,
 							LINK_STATE_DOWN);
 					ifp->if_drv_flags |= IFF_DRV_RUNNING;
 					ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
 					if_link_state_change(ifp,
 					    LINK_STATE_UP);
 				}
 				arp_ifinit(ifp, ifa);
 				mtx_unlock(&xnb->sc_lock);
 			} else {
 				mtx_unlock(&xnb->sc_lock);
 #endif
 				error = ether_ioctl(ifp, cmd, data);
 #ifdef INET
 			}
 #endif
 			break;
 		case SIOCSIFCAP:
 			mtx_lock(&xnb->sc_lock);
 			if (ifr->ifr_reqcap & IFCAP_TXCSUM) {
 				ifp->if_capenable |= IFCAP_TXCSUM;
 				ifp->if_hwassist |= XNB_CSUM_FEATURES;
 			} else {
 				ifp->if_capenable &= ~(IFCAP_TXCSUM);
 				ifp->if_hwassist &= ~(XNB_CSUM_FEATURES);
 			}
 			if ((ifr->ifr_reqcap & IFCAP_RXCSUM)) {
 				ifp->if_capenable |= IFCAP_RXCSUM;
 			} else {
 				ifp->if_capenable &= ~(IFCAP_RXCSUM);
 			}
 			/*
 			 * TODO enable TSO4 and LRO once we no longer need
 			 * to calculate checksums in software
 			 */
 #if 0
 			if (ifr->if_reqcap |= IFCAP_TSO4) {
 				if (IFCAP_TXCSUM & ifp->if_capenable) {
 					printf("xnb: Xen netif requires that "
 						"TXCSUM be enabled in order "
 						"to use TSO4\n");
 					error = EINVAL;
 				} else {
 					ifp->if_capenable |= IFCAP_TSO4;
 					ifp->if_hwassist |= CSUM_TSO;
 				}
 			} else {
 				ifp->if_capenable &= ~(IFCAP_TSO4);
 				ifp->if_hwassist &= ~(CSUM_TSO);
 			}
 			if (ifr->ifreqcap |= IFCAP_LRO) {
 				ifp->if_capenable |= IFCAP_LRO;
 			} else {
 				ifp->if_capenable &= ~(IFCAP_LRO);
 			}
 #endif
 			mtx_unlock(&xnb->sc_lock);
 			break;
 		case SIOCSIFMTU:
 			ifp->if_mtu = ifr->ifr_mtu;
 			ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
 			xnb_ifinit(xnb);
 			break;
 		case SIOCADDMULTI:
 		case SIOCDELMULTI:
 		case SIOCSIFMEDIA:
 		case SIOCGIFMEDIA:
 			error = ifmedia_ioctl(ifp, ifr, &xnb->sc_media, cmd);
 			break;
 		default:
 			error = ether_ioctl(ifp, cmd, data);
 			break;
 	}
 	return (error);
 }
 
 static void
 xnb_start_locked(struct ifnet *ifp)
 {
 	netif_rx_back_ring_t *rxb;
 	struct xnb_softc *xnb;
 	struct mbuf *mbufc;
 	RING_IDX req_prod_local;
 
 	xnb = ifp->if_softc;
 	rxb = &xnb->ring_configs[XNB_RING_TYPE_RX].back_ring.rx_ring;
 
 	if (!xnb->carrier)
 		return;
 
 	do {
 		int out_of_space = 0;
 		int notify;
 		req_prod_local = rxb->sring->req_prod;
 		xen_rmb();
 		for (;;) {
 			int error;
 
 			IF_DEQUEUE(&ifp->if_snd, mbufc);
 			if (mbufc == NULL)
 				break;
 			error = xnb_send(rxb, xnb->otherend_id, mbufc,
 			    		 xnb->rx_gnttab);
 			switch (error) {
 				case EAGAIN:
 					/*
 					 * Insufficient space in the ring.
 					 * Requeue pkt and send when space is
 					 * available.
 					 */
 					IF_PREPEND(&ifp->if_snd, mbufc);
 					/*
 					 * Perhaps the frontend missed an IRQ
 					 * and went to sleep.  Notify it to wake
 					 * it up.
 					 */
 					out_of_space = 1;
 					break;
 
 				case EINVAL:
 					/* OS gave a corrupt packet.  Drop it.*/
 					if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
 					/* FALLTHROUGH */
 				default:
 					/* Send succeeded, or packet had error.
 					 * Free the packet */
 					if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
 					if (mbufc)
 						m_freem(mbufc);
 					break;
 			}
 			if (out_of_space != 0)
 				break;
 		}
 
 		RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(rxb, notify);
 		if ((notify != 0) || (out_of_space != 0))
 			xen_intr_signal(xnb->xen_intr_handle);
 		rxb->sring->req_event = req_prod_local + 1;
 		xen_mb();
 	} while (rxb->sring->req_prod != req_prod_local) ;
 }
 
 /**
  * Sends one packet to the ring.  Blocks until the packet is on the ring
  * \param[in]	mbufc	Contains one packet to send.  Caller must free
  * \param[in,out] rxb	The packet will be pushed onto this ring, but the
  * 			otherend will not be notified.
  * \param[in]	otherend The domain ID of the other end of the connection
  * \retval	EAGAIN	The ring did not have enough space for the packet.
  * 			The ring has not been modified
  * \param[in,out] gnttab Pointer to enough memory for a grant table.  We make
  * 			this a function parameter so that we will take less
  * 			stack space.
  * \retval EINVAL	mbufc was corrupt or not convertible into a pkt
  */
 static int
 xnb_send(netif_rx_back_ring_t *ring, domid_t otherend, const struct mbuf *mbufc,
 	 gnttab_copy_table gnttab)
 {
 	struct xnb_pkt pkt;
 	int error, n_entries, n_reqs;
 	RING_IDX space;
 
 	space = ring->sring->req_prod - ring->req_cons;
 	error = xnb_mbufc2pkt(mbufc, &pkt, ring->rsp_prod_pvt, space);
 	if (error != 0)
 		return error;
 	n_entries = xnb_rxpkt2gnttab(&pkt, mbufc, gnttab, ring, otherend);
 	if (n_entries != 0) {
 		int __unused hv_ret = HYPERVISOR_grant_table_op(GNTTABOP_copy,
 		    gnttab, n_entries);
 		KASSERT(hv_ret == 0, ("HYPERVISOR_grant_table_op returned %d\n",
 		    hv_ret));
 	}
 
 	n_reqs = xnb_rxpkt2rsp(&pkt, gnttab, n_entries, ring);
 
 	return 0;
 }
 
 static void
 xnb_start(struct ifnet *ifp)
 {
 	struct xnb_softc *xnb;
 
 	xnb = ifp->if_softc;
 	mtx_lock(&xnb->rx_lock);
 	xnb_start_locked(ifp);
 	mtx_unlock(&xnb->rx_lock);
 }
 
 /* equivalent of network_open() in Linux */
 static void
 xnb_ifinit_locked(struct xnb_softc *xnb)
 {
 	struct ifnet *ifp;
 
 	ifp = xnb->xnb_ifp;
 
 	mtx_assert(&xnb->sc_lock, MA_OWNED);
 
 	if (ifp->if_drv_flags & IFF_DRV_RUNNING)
 		return;
 
 	xnb_stop(xnb);
 
 	ifp->if_drv_flags |= IFF_DRV_RUNNING;
 	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
 	if_link_state_change(ifp, LINK_STATE_UP);
 }
 
 
 static void
 xnb_ifinit(void *xsc)
 {
 	struct xnb_softc *xnb = xsc;
 
 	mtx_lock(&xnb->sc_lock);
 	xnb_ifinit_locked(xnb);
 	mtx_unlock(&xnb->sc_lock);
 }
 
 /**
  * Callback used by the generic networking code to tell us when our carrier
  * state has changed.  Since we don't have a physical carrier, we don't care
  */
 static int
 xnb_ifmedia_upd(struct ifnet *ifp)
 {
 	return (0);
 }
 
 /**
  * Callback used by the generic networking code to ask us what our carrier
  * state is.  Since we don't have a physical carrier, this is very simple
  */
 static void
 xnb_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr)
 {
 	ifmr->ifm_status = IFM_AVALID|IFM_ACTIVE;
 	ifmr->ifm_active = IFM_ETHER|IFM_MANUAL;
 }
 
 
 /*---------------------------- NewBus Registration ---------------------------*/
 static device_method_t xnb_methods[] = {
 	/* Device interface */
 	DEVMETHOD(device_probe,		xnb_probe),
 	DEVMETHOD(device_attach,	xnb_attach),
 	DEVMETHOD(device_detach,	xnb_detach),
 	DEVMETHOD(device_shutdown,	bus_generic_shutdown),
 	DEVMETHOD(device_suspend,	xnb_suspend),
 	DEVMETHOD(device_resume,	xnb_resume),
 
 	/* Xenbus interface */
 	DEVMETHOD(xenbus_otherend_changed, xnb_frontend_changed),
 
 	{ 0, 0 }
 };
 
 static driver_t xnb_driver = {
 	"xnb",
 	xnb_methods,
 	sizeof(struct xnb_softc),
 };
 devclass_t xnb_devclass;
 
 DRIVER_MODULE(xnb, xenbusb_back, xnb_driver, xnb_devclass, 0, 0);
 
 
 /*-------------------------- Unit Tests -------------------------------------*/
 #ifdef XNB_DEBUG
 #include "netback_unit_tests.c"
 #endif
Index: head/sys/dev/xen/netfront/netfront.c
===================================================================
--- head/sys/dev/xen/netfront/netfront.c	(revision 282273)
+++ head/sys/dev/xen/netfront/netfront.c	(revision 282274)
@@ -1,2223 +1,2219 @@
 /*-
  * Copyright (c) 2004-2006 Kip Macy
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/sockio.h>
 #include <sys/limits.h>
 #include <sys/mbuf.h>
 #include <sys/malloc.h>
 #include <sys/module.h>
 #include <sys/kernel.h>
 #include <sys/socket.h>
 #include <sys/sysctl.h>
 #include <sys/queue.h>
 #include <sys/lock.h>
 #include <sys/sx.h>
 
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/if_arp.h>
 #include <net/ethernet.h>
 #include <net/if_dl.h>
 #include <net/if_media.h>
 
 #include <net/bpf.h>
 
 #include <net/if_types.h>
 
 #include <netinet/in_systm.h>
 #include <netinet/in.h>
 #include <netinet/ip.h>
 #include <netinet/if_ether.h>
 #if __FreeBSD_version >= 700000
 #include <netinet/tcp.h>
 #include <netinet/tcp_lro.h>
 #endif
 
 #include <vm/vm.h>
 #include <vm/pmap.h>
 
 #include <machine/clock.h>      /* for DELAY */
 #include <machine/bus.h>
 #include <machine/resource.h>
 #include <machine/frame.h>
 #include <machine/vmparam.h>
 
 #include <sys/bus.h>
 #include <sys/rman.h>
 
 #include <machine/intr_machdep.h>
 
 #include <xen/xen-os.h>
 #include <xen/hypervisor.h>
 #include <xen/xen_intr.h>
 #include <xen/gnttab.h>
 #include <xen/interface/memory.h>
 #include <xen/interface/io/netif.h>
 #include <xen/xenbus/xenbusvar.h>
 
 #include <machine/xen/xenvar.h>
 
 #include "xenbus_if.h"
 
 /* Features supported by all backends.  TSO and LRO can be negotiated */
 #define XN_CSUM_FEATURES	(CSUM_TCP | CSUM_UDP)
 
 #define NET_TX_RING_SIZE __RING_SIZE((netif_tx_sring_t *)0, PAGE_SIZE)
 #define NET_RX_RING_SIZE __RING_SIZE((netif_rx_sring_t *)0, PAGE_SIZE)
 
 #if __FreeBSD_version >= 700000
 /*
  * Should the driver do LRO on the RX end
  *  this can be toggled on the fly, but the
  *  interface must be reset (down/up) for it
  *  to take effect.
  */
 static int xn_enable_lro = 1;
 TUNABLE_INT("hw.xn.enable_lro", &xn_enable_lro);
 #else
 
 #define IFCAP_TSO4	0
 #define CSUM_TSO	0
 
 #endif
 
 #ifdef CONFIG_XEN
 static int MODPARM_rx_copy = 0;
 module_param_named(rx_copy, MODPARM_rx_copy, bool, 0);
 MODULE_PARM_DESC(rx_copy, "Copy packets from network card (rather than flip)");
 static int MODPARM_rx_flip = 0;
 module_param_named(rx_flip, MODPARM_rx_flip, bool, 0);
 MODULE_PARM_DESC(rx_flip, "Flip packets from network card (rather than copy)");
 #else
 static const int MODPARM_rx_copy = 1;
 static const int MODPARM_rx_flip = 0;
 #endif
 
 /**
  * \brief The maximum allowed data fragments in a single transmit
  *        request.
  *
  * This limit is imposed by the backend driver.  We assume here that
  * we are dealing with a Linux driver domain and have set our limit
  * to mirror the Linux MAX_SKB_FRAGS constant.
  */
 #define	MAX_TX_REQ_FRAGS (65536 / PAGE_SIZE + 2)
 
 #define RX_COPY_THRESHOLD 256
 
 #define net_ratelimit() 0
 
 struct netfront_info;
 struct netfront_rx_info;
 
 static void xn_txeof(struct netfront_info *);
 static void xn_rxeof(struct netfront_info *);
 static void network_alloc_rx_buffers(struct netfront_info *);
 
 static void xn_tick_locked(struct netfront_info *);
 static void xn_tick(void *);
 
 static void xn_intr(void *);
 static inline int xn_count_frags(struct mbuf *m);
 static int  xn_assemble_tx_request(struct netfront_info *sc,
 				   struct mbuf *m_head);
 static void xn_start_locked(struct ifnet *);
 static void xn_start(struct ifnet *);
 static int  xn_ioctl(struct ifnet *, u_long, caddr_t);
 static void xn_ifinit_locked(struct netfront_info *);
 static void xn_ifinit(void *);
 static void xn_stop(struct netfront_info *);
 static void xn_query_features(struct netfront_info *np);
 static int  xn_configure_features(struct netfront_info *np);
 #ifdef notyet
 static void xn_watchdog(struct ifnet *);
 #endif
 
 #ifdef notyet
 static void netfront_closing(device_t dev);
 #endif
 static void netif_free(struct netfront_info *info);
 static int netfront_detach(device_t dev);
 
 static int talk_to_backend(device_t dev, struct netfront_info *info);
 static int create_netdev(device_t dev);
 static void netif_disconnect_backend(struct netfront_info *info);
 static int setup_device(device_t dev, struct netfront_info *info);
 static void free_ring(int *ref, void *ring_ptr_ref);
 
 static int  xn_ifmedia_upd(struct ifnet *ifp);
 static void xn_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr);
 
 /* Xenolinux helper functions */
 int network_connect(struct netfront_info *);
 
 static void xn_free_rx_ring(struct netfront_info *);
 
 static void xn_free_tx_ring(struct netfront_info *);
 
 static int xennet_get_responses(struct netfront_info *np,
 	struct netfront_rx_info *rinfo, RING_IDX rp, RING_IDX *cons,
 	struct mbuf **list, int *pages_flipped_p);
 
 #define virt_to_mfn(x) (vtomach(x) >> PAGE_SHIFT)
 
 #define INVALID_P2M_ENTRY (~0UL)
 
 /*
  * Mbuf pointers. We need these to keep track of the virtual addresses
  * of our mbuf chains since we can only convert from virtual to physical,
  * not the other way around.  The size must track the free index arrays.
  */
 struct xn_chain_data {
 	struct mbuf    *xn_tx_chain[NET_TX_RING_SIZE+1];
 	int		xn_tx_chain_cnt;
 	struct mbuf    *xn_rx_chain[NET_RX_RING_SIZE+1];
 };
 
 struct net_device_stats
 {
 	u_long	rx_packets;		/* total packets received	*/
 	u_long	tx_packets;		/* total packets transmitted	*/
 	u_long	rx_bytes;		/* total bytes received 	*/
 	u_long	tx_bytes;		/* total bytes transmitted	*/
 	u_long	rx_errors;		/* bad packets received		*/
 	u_long	tx_errors;		/* packet transmit problems	*/
 	u_long	rx_dropped;		/* no space in linux buffers	*/
 	u_long	tx_dropped;		/* no space available in linux	*/
 	u_long	multicast;		/* multicast packets received	*/
 	u_long	collisions;
 
 	/* detailed rx_errors: */
 	u_long	rx_length_errors;
 	u_long	rx_over_errors;		/* receiver ring buff overflow	*/
 	u_long	rx_crc_errors;		/* recved pkt with crc error	*/
 	u_long	rx_frame_errors;	/* recv'd frame alignment error */
 	u_long	rx_fifo_errors;		/* recv'r fifo overrun		*/
 	u_long	rx_missed_errors;	/* receiver missed packet	*/
 
 	/* detailed tx_errors */
 	u_long	tx_aborted_errors;
 	u_long	tx_carrier_errors;
 	u_long	tx_fifo_errors;
 	u_long	tx_heartbeat_errors;
 	u_long	tx_window_errors;
 	
 	/* for cslip etc */
 	u_long	rx_compressed;
 	u_long	tx_compressed;
 };
 
 struct netfront_info {
 	struct ifnet *xn_ifp;
 #if __FreeBSD_version >= 700000
 	struct lro_ctrl xn_lro;
 #endif
 
 	struct net_device_stats stats;
 	u_int tx_full;
 
 	netif_tx_front_ring_t tx;
 	netif_rx_front_ring_t rx;
 
 	struct mtx   tx_lock;
 	struct mtx   rx_lock;
 	struct mtx   sc_lock;
 
 	xen_intr_handle_t xen_intr_handle;
 	u_int copying_receiver;
 	u_int carrier;
 	u_int maxfrags;
 		
 	/* Receive-ring batched refills. */
 #define RX_MIN_TARGET 32
 #define RX_MAX_TARGET NET_RX_RING_SIZE
 	int rx_min_target;
 	int rx_max_target;
 	int rx_target;
 
 	grant_ref_t gref_tx_head;
 	grant_ref_t grant_tx_ref[NET_TX_RING_SIZE + 1]; 
 	grant_ref_t gref_rx_head;
 	grant_ref_t grant_rx_ref[NET_TX_RING_SIZE + 1]; 
 
 	device_t		xbdev;
 	int			tx_ring_ref;
 	int			rx_ring_ref;
 	uint8_t			mac[ETHER_ADDR_LEN];
 	struct xn_chain_data	xn_cdata;	/* mbufs */
 	struct mbufq		xn_rx_batch;	/* batch queue */
 
 	int			xn_if_flags;
 	struct callout	        xn_stat_ch;
 
 	u_long			rx_pfn_array[NET_RX_RING_SIZE];
 	multicall_entry_t	rx_mcl[NET_RX_RING_SIZE+1];
 	mmu_update_t		rx_mmu[NET_RX_RING_SIZE];
 	struct ifmedia		sc_media;
 };
 
 #define rx_mbufs xn_cdata.xn_rx_chain
 #define tx_mbufs xn_cdata.xn_tx_chain
 
 #define XN_LOCK_INIT(_sc, _name) \
         mtx_init(&(_sc)->tx_lock, #_name"_tx", "network transmit lock", MTX_DEF); \
         mtx_init(&(_sc)->rx_lock, #_name"_rx", "network receive lock", MTX_DEF);  \
         mtx_init(&(_sc)->sc_lock, #_name"_sc", "netfront softc lock", MTX_DEF)
 
 #define XN_RX_LOCK(_sc)           mtx_lock(&(_sc)->rx_lock)
 #define XN_RX_UNLOCK(_sc)         mtx_unlock(&(_sc)->rx_lock)
 
 #define XN_TX_LOCK(_sc)           mtx_lock(&(_sc)->tx_lock)
 #define XN_TX_UNLOCK(_sc)         mtx_unlock(&(_sc)->tx_lock)
 
 #define XN_LOCK(_sc)           mtx_lock(&(_sc)->sc_lock); 
 #define XN_UNLOCK(_sc)         mtx_unlock(&(_sc)->sc_lock); 
 
 #define XN_LOCK_ASSERT(_sc)    mtx_assert(&(_sc)->sc_lock, MA_OWNED); 
 #define XN_RX_LOCK_ASSERT(_sc)    mtx_assert(&(_sc)->rx_lock, MA_OWNED); 
 #define XN_TX_LOCK_ASSERT(_sc)    mtx_assert(&(_sc)->tx_lock, MA_OWNED); 
 #define XN_LOCK_DESTROY(_sc)   mtx_destroy(&(_sc)->rx_lock); \
                                mtx_destroy(&(_sc)->tx_lock); \
                                mtx_destroy(&(_sc)->sc_lock);
 
 struct netfront_rx_info {
 	struct netif_rx_response rx;
 	struct netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX - 1];
 };
 
 #define netfront_carrier_on(netif)	((netif)->carrier = 1)
 #define netfront_carrier_off(netif)	((netif)->carrier = 0)
 #define netfront_carrier_ok(netif)	((netif)->carrier)
 
 /* Access macros for acquiring freeing slots in xn_free_{tx,rx}_idxs[]. */
 
 static inline void
 add_id_to_freelist(struct mbuf **list, uintptr_t id)
 {
 	KASSERT(id != 0,
 		("%s: the head item (0) must always be free.", __func__));
 	list[id] = list[0];
 	list[0]  = (struct mbuf *)id;
 }
 
 static inline unsigned short
 get_id_from_freelist(struct mbuf **list)
 {
 	uintptr_t id;
 
 	id = (uintptr_t)list[0];
 	KASSERT(id != 0,
 		("%s: the head item (0) must always remain free.", __func__));
 	list[0] = list[id];
 	return (id);
 }
 
 static inline int
 xennet_rxidx(RING_IDX idx)
 {
 	return idx & (NET_RX_RING_SIZE - 1);
 }
 
 static inline struct mbuf *
 xennet_get_rx_mbuf(struct netfront_info *np, RING_IDX ri)
 {
 	int i = xennet_rxidx(ri);
 	struct mbuf *m;
 
 	m = np->rx_mbufs[i];
 	np->rx_mbufs[i] = NULL;
 	return (m);
 }
 
 static inline grant_ref_t
 xennet_get_rx_ref(struct netfront_info *np, RING_IDX ri)
 {
 	int i = xennet_rxidx(ri);
 	grant_ref_t ref = np->grant_rx_ref[i];
 	KASSERT(ref != GRANT_REF_INVALID, ("Invalid grant reference!\n"));
 	np->grant_rx_ref[i] = GRANT_REF_INVALID;
 	return ref;
 }
 
 #define IPRINTK(fmt, args...) \
     printf("[XEN] " fmt, ##args)
 #ifdef INVARIANTS
 #define WPRINTK(fmt, args...) \
     printf("[XEN] " fmt, ##args)
 #else
 #define WPRINTK(fmt, args...)
 #endif
 #ifdef DEBUG
 #define DPRINTK(fmt, args...) \
     printf("[XEN] %s: " fmt, __func__, ##args)
 #else
 #define DPRINTK(fmt, args...)
 #endif
 
 /**
  * Read the 'mac' node at the given device's node in the store, and parse that
  * as colon-separated octets, placing result the given mac array.  mac must be
  * a preallocated array of length ETH_ALEN (as declared in linux/if_ether.h).
  * Return 0 on success, or errno on error.
  */
 static int 
 xen_net_read_mac(device_t dev, uint8_t mac[])
 {
 	int error, i;
 	char *s, *e, *macstr;
 	const char *path;
 
 	path = xenbus_get_node(dev);
 	error = xs_read(XST_NIL, path, "mac", NULL, (void **) &macstr);
 	if (error == ENOENT) {
 		/*
 		 * Deal with missing mac XenStore nodes on devices with
 		 * HVM emulation (the 'ioemu' configuration attribute)
 		 * enabled.
 		 *
 		 * The HVM emulator may execute in a stub device model
 		 * domain which lacks the permission, only given to Dom0,
 		 * to update the guest's XenStore tree.  For this reason,
 		 * the HVM emulator doesn't even attempt to write the
 		 * front-side mac node, even when operating in Dom0.
 		 * However, there should always be a mac listed in the
 		 * backend tree.  Fallback to this version if our query
 		 * of the front side XenStore location doesn't find
 		 * anything.
 		 */
 		path = xenbus_get_otherend_path(dev);
 		error = xs_read(XST_NIL, path, "mac", NULL, (void **) &macstr);
 	}
 	if (error != 0) {
 		xenbus_dev_fatal(dev, error, "parsing %s/mac", path);
 		return (error);
 	}
 
 	s = macstr;
 	for (i = 0; i < ETHER_ADDR_LEN; i++) {
 		mac[i] = strtoul(s, &e, 16);
 		if (s == e || (e[0] != ':' && e[0] != 0)) {
 			free(macstr, M_XENBUS);
 			return (ENOENT);
 		}
 		s = &e[1];
 	}
 	free(macstr, M_XENBUS);
 	return (0);
 }
 
 /**
  * Entry point to this code when a new device is created.  Allocate the basic
  * structures and the ring buffers for communication with the backend, and
  * inform the backend of the appropriate details for those.  Switch to
  * Connected state.
  */
 static int 
 netfront_probe(device_t dev)
 {
 
 	if (!strcmp(xenbus_get_type(dev), "vif")) {
 		device_set_desc(dev, "Virtual Network Interface");
 		return (0);
 	}
 
 	return (ENXIO);
 }
 
 static int
 netfront_attach(device_t dev)
 {	
 	int err;
 
 	err = create_netdev(dev);
 	if (err) {
 		xenbus_dev_fatal(dev, err, "creating netdev");
 		return (err);
 	}
 
 #if __FreeBSD_version >= 700000
 	SYSCTL_ADD_INT(device_get_sysctl_ctx(dev),
 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
 	    OID_AUTO, "enable_lro", CTLFLAG_RW,
 	    &xn_enable_lro, 0, "Large Receive Offload");
 #endif
 
 	return (0);
 }
 
 static int
 netfront_suspend(device_t dev)
 {
 	struct netfront_info *info = device_get_softc(dev);
 
 	XN_RX_LOCK(info);
 	XN_TX_LOCK(info);
 	netfront_carrier_off(info);
 	XN_TX_UNLOCK(info);
 	XN_RX_UNLOCK(info);
 	return (0);
 }
 
 /**
  * We are reconnecting to the backend, due to a suspend/resume, or a backend
  * driver restart.  We tear down our netif structure and recreate it, but
  * leave the device-layer structures intact so that this is transparent to the
  * rest of the kernel.
  */
 static int
 netfront_resume(device_t dev)
 {
 	struct netfront_info *info = device_get_softc(dev);
 
 	netif_disconnect_backend(info);
 	return (0);
 }
 
 /* Common code used when first setting up, and when resuming. */
 static int 
 talk_to_backend(device_t dev, struct netfront_info *info)
 {
 	const char *message;
 	struct xs_transaction xst;
 	const char *node = xenbus_get_node(dev);
 	int err;
 
 	err = xen_net_read_mac(dev, info->mac);
 	if (err) {
 		xenbus_dev_fatal(dev, err, "parsing %s/mac", node);
 		goto out;
 	}
 
 	/* Create shared ring, alloc event channel. */
 	err = setup_device(dev, info);
 	if (err)
 		goto out;
 	
  again:
 	err = xs_transaction_start(&xst);
 	if (err) {
 		xenbus_dev_fatal(dev, err, "starting transaction");
 		goto destroy_ring;
 	}
 	err = xs_printf(xst, node, "tx-ring-ref","%u",
 			info->tx_ring_ref);
 	if (err) {
 		message = "writing tx ring-ref";
 		goto abort_transaction;
 	}
 	err = xs_printf(xst, node, "rx-ring-ref","%u",
 			info->rx_ring_ref);
 	if (err) {
 		message = "writing rx ring-ref";
 		goto abort_transaction;
 	}
 	err = xs_printf(xst, node,
 			"event-channel", "%u",
 			xen_intr_port(info->xen_intr_handle));
 	if (err) {
 		message = "writing event-channel";
 		goto abort_transaction;
 	}
 	err = xs_printf(xst, node, "request-rx-copy", "%u",
 			info->copying_receiver);
 	if (err) {
 		message = "writing request-rx-copy";
 		goto abort_transaction;
 	}
 	err = xs_printf(xst, node, "feature-rx-notify", "%d", 1);
 	if (err) {
 		message = "writing feature-rx-notify";
 		goto abort_transaction;
 	}
 	err = xs_printf(xst, node, "feature-sg", "%d", 1);
 	if (err) {
 		message = "writing feature-sg";
 		goto abort_transaction;
 	}
 #if __FreeBSD_version >= 700000
 	err = xs_printf(xst, node, "feature-gso-tcpv4", "%d", 1);
 	if (err) {
 		message = "writing feature-gso-tcpv4";
 		goto abort_transaction;
 	}
 #endif
 
 	err = xs_transaction_end(xst, 0);
 	if (err) {
 		if (err == EAGAIN)
 			goto again;
 		xenbus_dev_fatal(dev, err, "completing transaction");
 		goto destroy_ring;
 	}
 	
 	return 0;
 	
  abort_transaction:
 	xs_transaction_end(xst, 1);
 	xenbus_dev_fatal(dev, err, "%s", message);
  destroy_ring:
 	netif_free(info);
  out:
 	return err;
 }
 
 static int 
 setup_device(device_t dev, struct netfront_info *info)
 {
 	netif_tx_sring_t *txs;
 	netif_rx_sring_t *rxs;
 	int error;
 	struct ifnet *ifp;
 	
 	ifp = info->xn_ifp;
 
 	info->tx_ring_ref = GRANT_REF_INVALID;
 	info->rx_ring_ref = GRANT_REF_INVALID;
 	info->rx.sring = NULL;
 	info->tx.sring = NULL;
 
 	txs = (netif_tx_sring_t *)malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT|M_ZERO);
 	if (!txs) {
 		error = ENOMEM;
 		xenbus_dev_fatal(dev, error, "allocating tx ring page");
 		goto fail;
 	}
 	SHARED_RING_INIT(txs);
 	FRONT_RING_INIT(&info->tx, txs, PAGE_SIZE);
 	error = xenbus_grant_ring(dev, virt_to_mfn(txs), &info->tx_ring_ref);
 	if (error)
 		goto fail;
 
 	rxs = (netif_rx_sring_t *)malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT|M_ZERO);
 	if (!rxs) {
 		error = ENOMEM;
 		xenbus_dev_fatal(dev, error, "allocating rx ring page");
 		goto fail;
 	}
 	SHARED_RING_INIT(rxs);
 	FRONT_RING_INIT(&info->rx, rxs, PAGE_SIZE);
 
 	error = xenbus_grant_ring(dev, virt_to_mfn(rxs), &info->rx_ring_ref);
 	if (error)
 		goto fail;
 
 	error = xen_intr_alloc_and_bind_local_port(dev,
 	    xenbus_get_otherend_id(dev), /*filter*/NULL, xn_intr, info,
 	    INTR_TYPE_NET | INTR_MPSAFE | INTR_ENTROPY, &info->xen_intr_handle);
 
 	if (error) {
 		xenbus_dev_fatal(dev, error,
 				 "xen_intr_alloc_and_bind_local_port failed");
 		goto fail;
 	}
 
 	return (0);
 	
  fail:
 	netif_free(info);
 	return (error);
 }
 
 #ifdef INET
 /**
  * If this interface has an ipv4 address, send an arp for it. This
  * helps to get the network going again after migrating hosts.
  */
 static void
 netfront_send_fake_arp(device_t dev, struct netfront_info *info)
 {
 	struct ifnet *ifp;
 	struct ifaddr *ifa;
 	
 	ifp = info->xn_ifp;
 	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
 		if (ifa->ifa_addr->sa_family == AF_INET) {
 			arp_ifinit(ifp, ifa);
 		}
 	}
 }
 #endif
 
 /**
  * Callback received when the backend's state changes.
  */
 static void
 netfront_backend_changed(device_t dev, XenbusState newstate)
 {
 	struct netfront_info *sc = device_get_softc(dev);
 		
 	DPRINTK("newstate=%d\n", newstate);
 
 	switch (newstate) {
 	case XenbusStateInitialising:
 	case XenbusStateInitialised:
 	case XenbusStateConnected:
 	case XenbusStateUnknown:
 	case XenbusStateClosed:
 	case XenbusStateReconfigured:
 	case XenbusStateReconfiguring:
 		break;
 	case XenbusStateInitWait:
 		if (xenbus_get_state(dev) != XenbusStateInitialising)
 			break;
 		if (network_connect(sc) != 0)
 			break;
 		xenbus_set_state(dev, XenbusStateConnected);
 #ifdef INET
 		netfront_send_fake_arp(dev, sc);
 #endif
 		break;
 	case XenbusStateClosing:
 		xenbus_set_state(dev, XenbusStateClosed);
 		break;
 	}
 }
 
 static void
 xn_free_rx_ring(struct netfront_info *sc)
 {
 #if 0
 	int i;
 	
 	for (i = 0; i < NET_RX_RING_SIZE; i++) {
 		if (sc->xn_cdata.rx_mbufs[i] != NULL) {
 			m_freem(sc->rx_mbufs[i]);
 			sc->rx_mbufs[i] = NULL;
 		}
 	}
 	
 	sc->rx.rsp_cons = 0;
 	sc->xn_rx_if->req_prod = 0;
 	sc->xn_rx_if->event = sc->rx.rsp_cons ;
 #endif
 }
 
 static void
 xn_free_tx_ring(struct netfront_info *sc)
 {
 #if 0
 	int i;
 	
 	for (i = 0; i < NET_TX_RING_SIZE; i++) {
 		if (sc->tx_mbufs[i] != NULL) {
 			m_freem(sc->tx_mbufs[i]);
 			sc->xn_cdata.xn_tx_chain[i] = NULL;
 		}
 	}
 	
 	return;
 #endif
 }
 
 /**
  * \brief Verify that there is sufficient space in the Tx ring
  *        buffer for a maximally sized request to be enqueued.
  *
  * A transmit request requires a transmit descriptor for each packet
  * fragment, plus up to 2 entries for "options" (e.g. TSO).
  */
 static inline int
 xn_tx_slot_available(struct netfront_info *np)
 {
 	return (RING_FREE_REQUESTS(&np->tx) > (MAX_TX_REQ_FRAGS + 2));
 }
 
 static void
 netif_release_tx_bufs(struct netfront_info *np)
 {
 	int i;
 
 	for (i = 1; i <= NET_TX_RING_SIZE; i++) {
 		struct mbuf *m;
 
 		m = np->tx_mbufs[i];
 
 		/*
 		 * We assume that no kernel addresses are
 		 * less than NET_TX_RING_SIZE.  Any entry
 		 * in the table that is below this number
 		 * must be an index from free-list tracking.
 		 */
 		if (((uintptr_t)m) <= NET_TX_RING_SIZE)
 			continue;
 		gnttab_end_foreign_access_ref(np->grant_tx_ref[i]);
 		gnttab_release_grant_reference(&np->gref_tx_head,
 		    np->grant_tx_ref[i]);
 		np->grant_tx_ref[i] = GRANT_REF_INVALID;
 		add_id_to_freelist(np->tx_mbufs, i);
 		np->xn_cdata.xn_tx_chain_cnt--;
 		if (np->xn_cdata.xn_tx_chain_cnt < 0) {
 			panic("%s: tx_chain_cnt must be >= 0", __func__);
 		}
 		m_free(m);
 	}
 }
 
 static void
 network_alloc_rx_buffers(struct netfront_info *sc)
 {
 	int otherend_id = xenbus_get_otherend_id(sc->xbdev);
 	unsigned short id;
 	struct mbuf *m_new;
 	int i, batch_target, notify;
 	RING_IDX req_prod;
 	struct xen_memory_reservation reservation;
 	grant_ref_t ref;
 	int nr_flips;
 	netif_rx_request_t *req;
 	vm_offset_t vaddr;
 	u_long pfn;
 	
 	req_prod = sc->rx.req_prod_pvt;
 
 	if (__predict_false(sc->carrier == 0))
 		return;
 	
 	/*
 	 * Allocate mbufs greedily, even though we batch updates to the
 	 * receive ring. This creates a less bursty demand on the memory
 	 * allocator, and so should reduce the chance of failed allocation
 	 * requests both for ourself and for other kernel subsystems.
 	 *
 	 * Here we attempt to maintain rx_target buffers in flight, counting
 	 * buffers that we have yet to process in the receive ring.
 	 */
 	batch_target = sc->rx_target - (req_prod - sc->rx.rsp_cons);
 	for (i = mbufq_len(&sc->xn_rx_batch); i < batch_target; i++) {
 		m_new = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, MJUMPAGESIZE);
 		if (m_new == NULL) {
 			if (i != 0)
 				goto refill;
 			/*
 			 * XXX set timer
 			 */
 			break;
 		}
 		m_new->m_len = m_new->m_pkthdr.len = MJUMPAGESIZE;
 		
 		/* queue the mbufs allocated */
 		(void )mbufq_enqueue(&sc->xn_rx_batch, m_new);
 	}
 	
 	/*
 	 * If we've allocated at least half of our target number of entries,
 	 * submit them to the backend - we have enough to make the overhead
 	 * of submission worthwhile.  Otherwise wait for more mbufs and
 	 * request entries to become available.
 	 */
 	if (i < (sc->rx_target/2)) {
 		if (req_prod >sc->rx.sring->req_prod)
 			goto push;
 		return;
 	}
 
 	/*
 	 * Double floating fill target if we risked having the backend
 	 * run out of empty buffers for receive traffic.  We define "running
 	 * low" as having less than a fourth of our target buffers free
 	 * at the time we refilled the queue. 
 	 */
 	if ((req_prod - sc->rx.sring->rsp_prod) < (sc->rx_target / 4)) {
 		sc->rx_target *= 2;
 		if (sc->rx_target > sc->rx_max_target)
 			sc->rx_target = sc->rx_max_target;
 	}
 
 refill:
 	for (nr_flips = i = 0; ; i++) {
 		if ((m_new = mbufq_dequeue(&sc->xn_rx_batch)) == NULL)
 			break;
 
 		m_new->m_ext.ext_arg1 = (vm_paddr_t *)(uintptr_t)(
 				vtophys(m_new->m_ext.ext_buf) >> PAGE_SHIFT);
 
 		id = xennet_rxidx(req_prod + i);
 
 		KASSERT(sc->rx_mbufs[id] == NULL, ("non-NULL xm_rx_chain"));
 		sc->rx_mbufs[id] = m_new;
 
 		ref = gnttab_claim_grant_reference(&sc->gref_rx_head);
 		KASSERT(ref != GNTTAB_LIST_END,
 			("reserved grant references exhuasted"));
 		sc->grant_rx_ref[id] = ref;
 
 		vaddr = mtod(m_new, vm_offset_t);
 		pfn = vtophys(vaddr) >> PAGE_SHIFT;
 		req = RING_GET_REQUEST(&sc->rx, req_prod + i);
 
 		if (sc->copying_receiver == 0) {
 			gnttab_grant_foreign_transfer_ref(ref,
 			    otherend_id, pfn);
-			sc->rx_pfn_array[nr_flips] = PFNTOMFN(pfn);
+			sc->rx_pfn_array[nr_flips] = pfn;
 			if (!xen_feature(XENFEAT_auto_translated_physmap)) {
 				/* Remove this page before passing
 				 * back to Xen.
 				 */
-				set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
 				MULTI_update_va_mapping(&sc->rx_mcl[i],
 				    vaddr, 0, 0);
 			}
 			nr_flips++;
 		} else {
 			gnttab_grant_foreign_access_ref(ref,
 			    otherend_id,
-			    PFNTOMFN(pfn), 0);
+			    pfn, 0);
 		}
 		req->id = id;
 		req->gref = ref;
 		
 		sc->rx_pfn_array[i] =
 		    vtomach(mtod(m_new,vm_offset_t)) >> PAGE_SHIFT;
 	} 
 	
 	KASSERT(i, ("no mbufs processed")); /* should have returned earlier */
 	KASSERT(mbufq_len(&sc->xn_rx_batch) == 0, ("not all mbufs processed"));
 	/*
 	 * We may have allocated buffers which have entries outstanding
 	 * in the page * update queue -- make sure we flush those first!
 	 */
-	PT_UPDATES_FLUSH();
 	if (nr_flips != 0) {
 #ifdef notyet
 		/* Tell the ballon driver what is going on. */
 		balloon_update_driver_allowance(i);
 #endif
 		set_xen_guest_handle(reservation.extent_start, sc->rx_pfn_array);
 		reservation.nr_extents   = i;
 		reservation.extent_order = 0;
 		reservation.address_bits = 0;
 		reservation.domid        = DOMID_SELF;
 
 		if (!xen_feature(XENFEAT_auto_translated_physmap)) {
 			/* After all PTEs have been zapped, flush the TLB. */
 			sc->rx_mcl[i-1].args[MULTI_UVMFLAGS_INDEX] =
 			    UVMF_TLB_FLUSH|UVMF_ALL;
 	
 			/* Give away a batch of pages. */
 			sc->rx_mcl[i].op = __HYPERVISOR_memory_op;
 			sc->rx_mcl[i].args[0] = XENMEM_decrease_reservation;
 			sc->rx_mcl[i].args[1] =  (u_long)&reservation;
 			/* Zap PTEs and give away pages in one big multicall. */
 			(void)HYPERVISOR_multicall(sc->rx_mcl, i+1);
 
 			if (__predict_false(sc->rx_mcl[i].result != i ||
 			    HYPERVISOR_memory_op(XENMEM_decrease_reservation,
 			    &reservation) != i))
 				panic("%s: unable to reduce memory "
 				    "reservation\n", __func__);
 		}
 	} else {
 		wmb();
 	}
 			
 	/* Above is a suitable barrier to ensure backend will see requests. */
 	sc->rx.req_prod_pvt = req_prod + i;
 push:
 	RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&sc->rx, notify);
 	if (notify)
 		xen_intr_signal(sc->xen_intr_handle);
 }
 
 static void
 xn_rxeof(struct netfront_info *np)
 {
 	struct ifnet *ifp;
 #if __FreeBSD_version >= 700000 && (defined(INET) || defined(INET6))
 	struct lro_ctrl *lro = &np->xn_lro;
 	struct lro_entry *queued;
 #endif
 	struct netfront_rx_info rinfo;
 	struct netif_rx_response *rx = &rinfo.rx;
 	struct netif_extra_info *extras = rinfo.extras;
 	RING_IDX i, rp;
 	multicall_entry_t *mcl;
 	struct mbuf *m;
 	struct mbufq rxq, errq;
 	int err, pages_flipped = 0, work_to_do;
 
 	do {
 		XN_RX_LOCK_ASSERT(np);
 		if (!netfront_carrier_ok(np))
 			return;
 
 		/* XXX: there should be some sane limit. */
 		mbufq_init(&errq, INT_MAX);
 		mbufq_init(&rxq, INT_MAX);
 
 		ifp = np->xn_ifp;
 	
 		rp = np->rx.sring->rsp_prod;
 		rmb();	/* Ensure we see queued responses up to 'rp'. */
 
 		i = np->rx.rsp_cons;
 		while ((i != rp)) {
 			memcpy(rx, RING_GET_RESPONSE(&np->rx, i), sizeof(*rx));
 			memset(extras, 0, sizeof(rinfo.extras));
 
 			m = NULL;
 			err = xennet_get_responses(np, &rinfo, rp, &i, &m,
 			    &pages_flipped);
 
 			if (__predict_false(err)) {
 				if (m)
 					(void )mbufq_enqueue(&errq, m);
 				np->stats.rx_errors++;
 				continue;
 			}
 
 			m->m_pkthdr.rcvif = ifp;
 			if ( rx->flags & NETRXF_data_validated ) {
 				/* Tell the stack the checksums are okay */
 				/*
 				 * XXX this isn't necessarily the case - need to add
 				 * check
 				 */
 				
 				m->m_pkthdr.csum_flags |=
 					(CSUM_IP_CHECKED | CSUM_IP_VALID | CSUM_DATA_VALID
 					    | CSUM_PSEUDO_HDR);
 				m->m_pkthdr.csum_data = 0xffff;
 			}
 
 			np->stats.rx_packets++;
 			np->stats.rx_bytes += m->m_pkthdr.len;
 
 			(void )mbufq_enqueue(&rxq, m);
 			np->rx.rsp_cons = i;
 		}
 
 		if (pages_flipped) {
 			/* Some pages are no longer absent... */
 #ifdef notyet
 			balloon_update_driver_allowance(-pages_flipped);
 #endif
 			/* Do all the remapping work, and M->P updates, in one big
 			 * hypercall.
 			 */
 			if (!!xen_feature(XENFEAT_auto_translated_physmap)) {
 				mcl = np->rx_mcl + pages_flipped;
 				mcl->op = __HYPERVISOR_mmu_update;
 				mcl->args[0] = (u_long)np->rx_mmu;
 				mcl->args[1] = pages_flipped;
 				mcl->args[2] = 0;
 				mcl->args[3] = DOMID_SELF;
 				(void)HYPERVISOR_multicall(np->rx_mcl,
 				    pages_flipped + 1);
 			}
 		}
 	
 		mbufq_drain(&errq);
 
 		/* 
 		 * Process all the mbufs after the remapping is complete.
 		 * Break the mbuf chain first though.
 		 */
 		while ((m = mbufq_dequeue(&rxq)) != NULL) {
 			if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
 			
 			/*
 			 * Do we really need to drop the rx lock?
 			 */
 			XN_RX_UNLOCK(np);
 #if __FreeBSD_version >= 700000 && (defined(INET) || defined(INET6))
 			/* Use LRO if possible */
 			if ((ifp->if_capenable & IFCAP_LRO) == 0 ||
 			    lro->lro_cnt == 0 || tcp_lro_rx(lro, m, 0)) {
 				/*
 				 * If LRO fails, pass up to the stack
 				 * directly.
 				 */
 				(*ifp->if_input)(ifp, m);
 			}
 #else
 			(*ifp->if_input)(ifp, m);
 #endif
 			XN_RX_LOCK(np);
 		}
 	
 		np->rx.rsp_cons = i;
 
 #if __FreeBSD_version >= 700000 && (defined(INET) || defined(INET6))
 		/*
 		 * Flush any outstanding LRO work
 		 */
 		while (!SLIST_EMPTY(&lro->lro_active)) {
 			queued = SLIST_FIRST(&lro->lro_active);
 			SLIST_REMOVE_HEAD(&lro->lro_active, next);
 			tcp_lro_flush(lro, queued);
 		}
 #endif
 
 #if 0
 		/* If we get a callback with very few responses, reduce fill target. */
 		/* NB. Note exponential increase, linear decrease. */
 		if (((np->rx.req_prod_pvt - np->rx.sring->rsp_prod) > 
 			((3*np->rx_target) / 4)) && (--np->rx_target < np->rx_min_target))
 			np->rx_target = np->rx_min_target;
 #endif
 	
 		network_alloc_rx_buffers(np);
 
 		RING_FINAL_CHECK_FOR_RESPONSES(&np->rx, work_to_do);
 	} while (work_to_do);
 }
 
 static void 
 xn_txeof(struct netfront_info *np)
 {
 	RING_IDX i, prod;
 	unsigned short id;
 	struct ifnet *ifp;
 	netif_tx_response_t *txr;
 	struct mbuf *m;
 	
 	XN_TX_LOCK_ASSERT(np);
 	
 	if (!netfront_carrier_ok(np))
 		return;
 	
 	ifp = np->xn_ifp;
 	
 	do {
 		prod = np->tx.sring->rsp_prod;
 		rmb(); /* Ensure we see responses up to 'rp'. */
 		
 		for (i = np->tx.rsp_cons; i != prod; i++) {
 			txr = RING_GET_RESPONSE(&np->tx, i);
 			if (txr->status == NETIF_RSP_NULL)
 				continue;
 
 			if (txr->status != NETIF_RSP_OKAY) {
 				printf("%s: WARNING: response is %d!\n",
 				       __func__, txr->status);
 			}
 			id = txr->id;
 			m = np->tx_mbufs[id]; 
 			KASSERT(m != NULL, ("mbuf not found in xn_tx_chain"));
 			KASSERT((uintptr_t)m > NET_TX_RING_SIZE,
 				("mbuf already on the free list, but we're "
 				"trying to free it again!"));
 			M_ASSERTVALID(m);
 			
 			/*
 			 * Increment packet count if this is the last
 			 * mbuf of the chain.
 			 */
 			if (!m->m_next)
 				if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
 			if (__predict_false(gnttab_query_foreign_access(
 			    np->grant_tx_ref[id]) != 0)) {
 				panic("%s: grant id %u still in use by the "
 				    "backend", __func__, id);
 			}
 			gnttab_end_foreign_access_ref(
 				np->grant_tx_ref[id]);
 			gnttab_release_grant_reference(
 				&np->gref_tx_head, np->grant_tx_ref[id]);
 			np->grant_tx_ref[id] = GRANT_REF_INVALID;
 			
 			np->tx_mbufs[id] = NULL;
 			add_id_to_freelist(np->tx_mbufs, id);
 			np->xn_cdata.xn_tx_chain_cnt--;
 			m_free(m);
 			/* Only mark the queue active if we've freed up at least one slot to try */
 			ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
 		}
 		np->tx.rsp_cons = prod;
 		
 		/*
 		 * Set a new event, then check for race with update of
 		 * tx_cons. Note that it is essential to schedule a
 		 * callback, no matter how few buffers are pending. Even if
 		 * there is space in the transmit ring, higher layers may
 		 * be blocked because too much data is outstanding: in such
 		 * cases notification from Xen is likely to be the only kick
 		 * that we'll get.
 		 */
 		np->tx.sring->rsp_event =
 		    prod + ((np->tx.sring->req_prod - prod) >> 1) + 1;
 
 		mb();
 	} while (prod != np->tx.sring->rsp_prod);
 	
 	if (np->tx_full &&
 	    ((np->tx.sring->req_prod - prod) < NET_TX_RING_SIZE)) {
 		np->tx_full = 0;
 #if 0
 		if (np->user_state == UST_OPEN)
 			netif_wake_queue(dev);
 #endif
 	}
 }
 
 static void
 xn_intr(void *xsc)
 {
 	struct netfront_info *np = xsc;
 	struct ifnet *ifp = np->xn_ifp;
 
 #if 0
 	if (!(np->rx.rsp_cons != np->rx.sring->rsp_prod &&
 	    likely(netfront_carrier_ok(np)) &&
 	    ifp->if_drv_flags & IFF_DRV_RUNNING))
 		return;
 #endif
 	if (RING_HAS_UNCONSUMED_RESPONSES(&np->tx)) {
 		XN_TX_LOCK(np);
 		xn_txeof(np);
 		XN_TX_UNLOCK(np);			
 	}	
 
 	XN_RX_LOCK(np);
 	xn_rxeof(np);
 	XN_RX_UNLOCK(np);
 
 	if (ifp->if_drv_flags & IFF_DRV_RUNNING &&
 	    !IFQ_DRV_IS_EMPTY(&ifp->if_snd))
 		xn_start(ifp);
 }
 
 static void
 xennet_move_rx_slot(struct netfront_info *np, struct mbuf *m,
 	grant_ref_t ref)
 {
 	int new = xennet_rxidx(np->rx.req_prod_pvt);
 
 	KASSERT(np->rx_mbufs[new] == NULL, ("rx_mbufs != NULL"));
 	np->rx_mbufs[new] = m;
 	np->grant_rx_ref[new] = ref;
 	RING_GET_REQUEST(&np->rx, np->rx.req_prod_pvt)->id = new;
 	RING_GET_REQUEST(&np->rx, np->rx.req_prod_pvt)->gref = ref;
 	np->rx.req_prod_pvt++;
 }
 
 static int
 xennet_get_extras(struct netfront_info *np,
     struct netif_extra_info *extras, RING_IDX rp, RING_IDX *cons)
 {
 	struct netif_extra_info *extra;
 
 	int err = 0;
 
 	do {
 		struct mbuf *m;
 		grant_ref_t ref;
 
 		if (__predict_false(*cons + 1 == rp)) {
 #if 0			
 			if (net_ratelimit())
 				WPRINTK("Missing extra info\n");
 #endif			
 			err = EINVAL;
 			break;
 		}
 
 		extra = (struct netif_extra_info *)
 		RING_GET_RESPONSE(&np->rx, ++(*cons));
 
 		if (__predict_false(!extra->type ||
 			extra->type >= XEN_NETIF_EXTRA_TYPE_MAX)) {
 #if 0				
 			if (net_ratelimit())
 				WPRINTK("Invalid extra type: %d\n",
 					extra->type);
 #endif			
 			err = EINVAL;
 		} else {
 			memcpy(&extras[extra->type - 1], extra, sizeof(*extra));
 		}
 
 		m = xennet_get_rx_mbuf(np, *cons);
 		ref = xennet_get_rx_ref(np, *cons);
 		xennet_move_rx_slot(np, m, ref);
 	} while (extra->flags & XEN_NETIF_EXTRA_FLAG_MORE);
 
 	return err;
 }
 
 static int
 xennet_get_responses(struct netfront_info *np,
 	struct netfront_rx_info *rinfo, RING_IDX rp, RING_IDX *cons,
 	struct mbuf  **list,
 	int *pages_flipped_p)
 {
 	int pages_flipped = *pages_flipped_p;
 	struct mmu_update *mmu;
 	struct multicall_entry *mcl;
 	struct netif_rx_response *rx = &rinfo->rx;
 	struct netif_extra_info *extras = rinfo->extras;
 	struct mbuf *m, *m0, *m_prev;
 	grant_ref_t ref = xennet_get_rx_ref(np, *cons);
 	RING_IDX ref_cons = *cons;
 	int frags = 1;
 	int err = 0;
 	u_long ret;
 
 	m0 = m = m_prev = xennet_get_rx_mbuf(np, *cons);
 
 	if (rx->flags & NETRXF_extra_info) {
 		err = xennet_get_extras(np, extras, rp, cons);
 	}
 
 	if (m0 != NULL) {
 		m0->m_pkthdr.len = 0;
 		m0->m_next = NULL;
 	}
 
 	for (;;) {
 		u_long mfn;
 
 #if 0		
 		DPRINTK("rx->status=%hd rx->offset=%hu frags=%u\n",
 			rx->status, rx->offset, frags);
 #endif
 		if (__predict_false(rx->status < 0 ||
 			rx->offset + rx->status > PAGE_SIZE)) {
 
 #if 0						
 			if (net_ratelimit())
 				WPRINTK("rx->offset: %x, size: %u\n",
 					rx->offset, rx->status);
 #endif						
 			xennet_move_rx_slot(np, m, ref);
 			if (m0 == m)
 				m0 = NULL;
 			m = NULL;
 			err = EINVAL;
 			goto next_skip_queue;
 		}
 		
 		/*
 		 * This definitely indicates a bug, either in this driver or in
 		 * the backend driver. In future this should flag the bad
 		 * situation to the system controller to reboot the backed.
 		 */
 		if (ref == GRANT_REF_INVALID) {
 
 #if 0 				
 			if (net_ratelimit())
 				WPRINTK("Bad rx response id %d.\n", rx->id);
 #endif			
 			printf("%s: Bad rx response id %d.\n", __func__,rx->id);
 			err = EINVAL;
 			goto next;
 		}
 
 		if (!np->copying_receiver) {
 			/* Memory pressure, insufficient buffer
 			 * headroom, ...
 			 */
 			if (!(mfn = gnttab_end_foreign_transfer_ref(ref))) {
 				WPRINTK("Unfulfilled rx req (id=%d, st=%d).\n",
 					rx->id, rx->status);
 				xennet_move_rx_slot(np, m, ref);
 				err = ENOMEM;
 				goto next;
 			}
 
 			if (!xen_feature( XENFEAT_auto_translated_physmap)) {
 				/* Remap the page. */
 				void *vaddr = mtod(m, void *);
 				uint32_t pfn;
 
 				mcl = np->rx_mcl + pages_flipped;
 				mmu = np->rx_mmu + pages_flipped;
 
 				MULTI_update_va_mapping(mcl, (u_long)vaddr,
 				    (((vm_paddr_t)mfn) << PAGE_SHIFT) | PG_RW |
 				    PG_V | PG_M | PG_A, 0);
 				pfn = (uintptr_t)m->m_ext.ext_arg1;
 				mmu->ptr = ((vm_paddr_t)mfn << PAGE_SHIFT) |
 				    MMU_MACHPHYS_UPDATE;
 				mmu->val = pfn;
-
-				set_phys_to_machine(pfn, mfn);
 			}
 			pages_flipped++;
 		} else {
 			ret = gnttab_end_foreign_access_ref(ref);
 			KASSERT(ret, ("ret != 0"));
 		}
 
 		gnttab_release_grant_reference(&np->gref_rx_head, ref);
 
 next:
 		if (m == NULL)
 			break;
 
 		m->m_len = rx->status;
 		m->m_data += rx->offset;
 		m0->m_pkthdr.len += rx->status;
 		
 next_skip_queue:
 		if (!(rx->flags & NETRXF_more_data))
 			break;
 
 		if (*cons + frags == rp) {
 			if (net_ratelimit())
 				WPRINTK("Need more frags\n");
 			err = ENOENT;
 			printf("%s: cons %u frags %u rp %u, not enough frags\n",
 			       __func__, *cons, frags, rp);
 			break;
 		}
 		/*
 		 * Note that m can be NULL, if rx->status < 0 or if
 		 * rx->offset + rx->status > PAGE_SIZE above.  
 		 */
 		m_prev = m;
 		
 		rx = RING_GET_RESPONSE(&np->rx, *cons + frags);
 		m = xennet_get_rx_mbuf(np, *cons + frags);
 
 		/*
 		 * m_prev == NULL can happen if rx->status < 0 or if
 		 * rx->offset + * rx->status > PAGE_SIZE above.  
 		 */
 		if (m_prev != NULL)
 			m_prev->m_next = m;
 
 		/*
 		 * m0 can be NULL if rx->status < 0 or if * rx->offset +
 		 * rx->status > PAGE_SIZE above.  
 		 */
 		if (m0 == NULL)
 			m0 = m;
 		m->m_next = NULL;
 		ref = xennet_get_rx_ref(np, *cons + frags);
 		ref_cons = *cons + frags;
 		frags++;
 	}
 	*list = m0;
 	*cons += frags;
 	*pages_flipped_p = pages_flipped;
 
 	return (err);
 }
 
 static void
 xn_tick_locked(struct netfront_info *sc) 
 {
 	XN_RX_LOCK_ASSERT(sc);
 	callout_reset(&sc->xn_stat_ch, hz, xn_tick, sc);
 
 	/* XXX placeholder for printing debug information */
 }
 
 static void
 xn_tick(void *xsc) 
 {
 	struct netfront_info *sc;
     
 	sc = xsc;
 	XN_RX_LOCK(sc);
 	xn_tick_locked(sc);
 	XN_RX_UNLOCK(sc);
 }
 
 /**
  * \brief Count the number of fragments in an mbuf chain.
  *
  * Surprisingly, there isn't an M* macro for this.
  */
 static inline int
 xn_count_frags(struct mbuf *m)
 {
 	int nfrags;
 
 	for (nfrags = 0; m != NULL; m = m->m_next)
 		nfrags++;
 
 	return (nfrags);
 }
 
 /**
  * Given an mbuf chain, make sure we have enough room and then push
  * it onto the transmit ring.
  */
 static int
 xn_assemble_tx_request(struct netfront_info *sc, struct mbuf *m_head)
 {
 	struct ifnet *ifp;
 	struct mbuf *m;
 	u_int nfrags;
 	int otherend_id;
 
 	ifp = sc->xn_ifp;
 
 	/**
 	 * Defragment the mbuf if necessary.
 	 */
 	nfrags = xn_count_frags(m_head);
 
 	/*
 	 * Check to see whether this request is longer than netback
 	 * can handle, and try to defrag it.
 	 */
 	/**
 	 * It is a bit lame, but the netback driver in Linux can't
 	 * deal with nfrags > MAX_TX_REQ_FRAGS, which is a quirk of
 	 * the Linux network stack.
 	 */
 	if (nfrags > sc->maxfrags) {
 		m = m_defrag(m_head, M_NOWAIT);
 		if (!m) {
 			/*
 			 * Defrag failed, so free the mbuf and
 			 * therefore drop the packet.
 			 */
 			m_freem(m_head);
 			return (EMSGSIZE);
 		}
 		m_head = m;
 	}
 
 	/* Determine how many fragments now exist */
 	nfrags = xn_count_frags(m_head);
 
 	/*
 	 * Check to see whether the defragmented packet has too many
 	 * segments for the Linux netback driver.
 	 */
 	/**
 	 * The FreeBSD TCP stack, with TSO enabled, can produce a chain
 	 * of mbufs longer than Linux can handle.  Make sure we don't
 	 * pass a too-long chain over to the other side by dropping the
 	 * packet.  It doesn't look like there is currently a way to
 	 * tell the TCP stack to generate a shorter chain of packets.
 	 */
 	if (nfrags > MAX_TX_REQ_FRAGS) {
 #ifdef DEBUG
 		printf("%s: nfrags %d > MAX_TX_REQ_FRAGS %d, netback "
 		       "won't be able to handle it, dropping\n",
 		       __func__, nfrags, MAX_TX_REQ_FRAGS);
 #endif
 		m_freem(m_head);
 		return (EMSGSIZE);
 	}
 
 	/*
 	 * This check should be redundant.  We've already verified that we
 	 * have enough slots in the ring to handle a packet of maximum
 	 * size, and that our packet is less than the maximum size.  Keep
 	 * it in here as an assert for now just to make certain that
 	 * xn_tx_chain_cnt is accurate.
 	 */
 	KASSERT((sc->xn_cdata.xn_tx_chain_cnt + nfrags) <= NET_TX_RING_SIZE,
 		("%s: xn_tx_chain_cnt (%d) + nfrags (%d) > NET_TX_RING_SIZE "
 		 "(%d)!", __func__, (int) sc->xn_cdata.xn_tx_chain_cnt,
                     (int) nfrags, (int) NET_TX_RING_SIZE));
 
 	/*
 	 * Start packing the mbufs in this chain into
 	 * the fragment pointers. Stop when we run out
 	 * of fragments or hit the end of the mbuf chain.
 	 */
 	m = m_head;
 	otherend_id = xenbus_get_otherend_id(sc->xbdev);
 	for (m = m_head; m; m = m->m_next) {
 		netif_tx_request_t *tx;
 		uintptr_t id;
 		grant_ref_t ref;
 		u_long mfn; /* XXX Wrong type? */
 
 		tx = RING_GET_REQUEST(&sc->tx, sc->tx.req_prod_pvt);
 		id = get_id_from_freelist(sc->tx_mbufs);
 		if (id == 0)
 			panic("%s: was allocated the freelist head!\n",
 			    __func__);
 		sc->xn_cdata.xn_tx_chain_cnt++;
 		if (sc->xn_cdata.xn_tx_chain_cnt > NET_TX_RING_SIZE)
 			panic("%s: tx_chain_cnt must be <= NET_TX_RING_SIZE\n",
 			    __func__);
 		sc->tx_mbufs[id] = m;
 		tx->id = id;
 		ref = gnttab_claim_grant_reference(&sc->gref_tx_head);
 		KASSERT((short)ref >= 0, ("Negative ref"));
 		mfn = virt_to_mfn(mtod(m, vm_offset_t));
 		gnttab_grant_foreign_access_ref(ref, otherend_id,
 		    mfn, GNTMAP_readonly);
 		tx->gref = sc->grant_tx_ref[id] = ref;
 		tx->offset = mtod(m, vm_offset_t) & (PAGE_SIZE - 1);
 		tx->flags = 0;
 		if (m == m_head) {
 			/*
 			 * The first fragment has the entire packet
 			 * size, subsequent fragments have just the
 			 * fragment size. The backend works out the
 			 * true size of the first fragment by
 			 * subtracting the sizes of the other
 			 * fragments.
 			 */
 			tx->size = m->m_pkthdr.len;
 
 			/*
 			 * The first fragment contains the checksum flags
 			 * and is optionally followed by extra data for
 			 * TSO etc.
 			 */
 			/**
 			 * CSUM_TSO requires checksum offloading.
 			 * Some versions of FreeBSD fail to
 			 * set CSUM_TCP in the CSUM_TSO case,
 			 * so we have to test for CSUM_TSO
 			 * explicitly.
 			 */
 			if (m->m_pkthdr.csum_flags
 			    & (CSUM_DELAY_DATA | CSUM_TSO)) {
 				tx->flags |= (NETTXF_csum_blank
 				    | NETTXF_data_validated);
 			}
 #if __FreeBSD_version >= 700000
 			if (m->m_pkthdr.csum_flags & CSUM_TSO) {
 				struct netif_extra_info *gso =
 					(struct netif_extra_info *)
 					RING_GET_REQUEST(&sc->tx,
 							 ++sc->tx.req_prod_pvt);
 
 				tx->flags |= NETTXF_extra_info;
 
 				gso->u.gso.size = m->m_pkthdr.tso_segsz;
 				gso->u.gso.type =
 					XEN_NETIF_GSO_TYPE_TCPV4;
 				gso->u.gso.pad = 0;
 				gso->u.gso.features = 0;
 
 				gso->type = XEN_NETIF_EXTRA_TYPE_GSO;
 				gso->flags = 0;
 			}
 #endif
 		} else {
 			tx->size = m->m_len;
 		}
 		if (m->m_next)
 			tx->flags |= NETTXF_more_data;
 
 		sc->tx.req_prod_pvt++;
 	}
 	BPF_MTAP(ifp, m_head);
 
 	sc->stats.tx_bytes += m_head->m_pkthdr.len;
 	sc->stats.tx_packets++;
 
 	return (0);
 }
 
 static void
 xn_start_locked(struct ifnet *ifp) 
 {
 	struct netfront_info *sc;
 	struct mbuf *m_head;
 	int notify;
 
 	sc = ifp->if_softc;
 
 	if (!netfront_carrier_ok(sc))
 		return;
 
 	/*
 	 * While we have enough transmit slots available for at least one
 	 * maximum-sized packet, pull mbufs off the queue and put them on
 	 * the transmit ring.
 	 */
 	while (xn_tx_slot_available(sc)) {
 		IF_DEQUEUE(&ifp->if_snd, m_head);
 		if (m_head == NULL)
 			break;
 
 		if (xn_assemble_tx_request(sc, m_head) != 0)
 			break;
 	}
 
 	RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&sc->tx, notify);
 	if (notify)
 		xen_intr_signal(sc->xen_intr_handle);
 
 	if (RING_FULL(&sc->tx)) {
 		sc->tx_full = 1;
 #if 0
 		netif_stop_queue(dev);
 #endif
 	}
 }
 
 static void
 xn_start(struct ifnet *ifp)
 {
 	struct netfront_info *sc;
 	sc = ifp->if_softc;
 	XN_TX_LOCK(sc);
 	xn_start_locked(ifp);
 	XN_TX_UNLOCK(sc);
 }
 
 /* equivalent of network_open() in Linux */
 static void 
 xn_ifinit_locked(struct netfront_info *sc) 
 {
 	struct ifnet *ifp;
 	
 	XN_LOCK_ASSERT(sc);
 	
 	ifp = sc->xn_ifp;
 	
 	if (ifp->if_drv_flags & IFF_DRV_RUNNING) 
 		return;
 	
 	xn_stop(sc);
 	
 	network_alloc_rx_buffers(sc);
 	sc->rx.sring->rsp_event = sc->rx.rsp_cons + 1;
 	
 	ifp->if_drv_flags |= IFF_DRV_RUNNING;
 	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
 	if_link_state_change(ifp, LINK_STATE_UP);
 	
 	callout_reset(&sc->xn_stat_ch, hz, xn_tick, sc);
 }
 
 static void 
 xn_ifinit(void *xsc)
 {
 	struct netfront_info *sc = xsc;
     
 	XN_LOCK(sc);
 	xn_ifinit_locked(sc);
 	XN_UNLOCK(sc);
 }
 
 static int
 xn_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
 {
 	struct netfront_info *sc = ifp->if_softc;
 	struct ifreq *ifr = (struct ifreq *) data;
 #ifdef INET
 	struct ifaddr *ifa = (struct ifaddr *)data;
 #endif
 
 	int mask, error = 0;
 	switch(cmd) {
 	case SIOCSIFADDR:
 #ifdef INET
 		XN_LOCK(sc);
 		if (ifa->ifa_addr->sa_family == AF_INET) {
 			ifp->if_flags |= IFF_UP;
 			if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) 
 				xn_ifinit_locked(sc);
 			arp_ifinit(ifp, ifa);
 			XN_UNLOCK(sc);	
 		} else {
 			XN_UNLOCK(sc);	
 #endif
 			error = ether_ioctl(ifp, cmd, data);
 #ifdef INET
 		}
 #endif
 		break;
 	case SIOCSIFMTU:
 		/* XXX can we alter the MTU on a VN ?*/
 #ifdef notyet
 		if (ifr->ifr_mtu > XN_JUMBO_MTU)
 			error = EINVAL;
 		else 
 #endif
 		{
 			ifp->if_mtu = ifr->ifr_mtu;
 			ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
 			xn_ifinit(sc);
 		}
 		break;
 	case SIOCSIFFLAGS:
 		XN_LOCK(sc);
 		if (ifp->if_flags & IFF_UP) {
 			/*
 			 * If only the state of the PROMISC flag changed,
 			 * then just use the 'set promisc mode' command
 			 * instead of reinitializing the entire NIC. Doing
 			 * a full re-init means reloading the firmware and
 			 * waiting for it to start up, which may take a
 			 * second or two.
 			 */
 #ifdef notyet
 			/* No promiscuous mode with Xen */
 			if (ifp->if_drv_flags & IFF_DRV_RUNNING &&
 			    ifp->if_flags & IFF_PROMISC &&
 			    !(sc->xn_if_flags & IFF_PROMISC)) {
 				XN_SETBIT(sc, XN_RX_MODE,
 					  XN_RXMODE_RX_PROMISC);
 			} else if (ifp->if_drv_flags & IFF_DRV_RUNNING &&
 				   !(ifp->if_flags & IFF_PROMISC) &&
 				   sc->xn_if_flags & IFF_PROMISC) {
 				XN_CLRBIT(sc, XN_RX_MODE,
 					  XN_RXMODE_RX_PROMISC);
 			} else
 #endif
 				xn_ifinit_locked(sc);
 		} else {
 			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
 				xn_stop(sc);
 			}
 		}
 		sc->xn_if_flags = ifp->if_flags;
 		XN_UNLOCK(sc);
 		error = 0;
 		break;
 	case SIOCSIFCAP:
 		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
 		if (mask & IFCAP_TXCSUM) {
 			if (IFCAP_TXCSUM & ifp->if_capenable) {
 				ifp->if_capenable &= ~(IFCAP_TXCSUM|IFCAP_TSO4);
 				ifp->if_hwassist &= ~(CSUM_TCP | CSUM_UDP
 				    | CSUM_IP | CSUM_TSO);
 			} else {
 				ifp->if_capenable |= IFCAP_TXCSUM;
 				ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP
 				    | CSUM_IP);
 			}
 		}
 		if (mask & IFCAP_RXCSUM) {
 			ifp->if_capenable ^= IFCAP_RXCSUM;
 		}
 #if __FreeBSD_version >= 700000
 		if (mask & IFCAP_TSO4) {
 			if (IFCAP_TSO4 & ifp->if_capenable) {
 				ifp->if_capenable &= ~IFCAP_TSO4;
 				ifp->if_hwassist &= ~CSUM_TSO;
 			} else if (IFCAP_TXCSUM & ifp->if_capenable) {
 				ifp->if_capenable |= IFCAP_TSO4;
 				ifp->if_hwassist |= CSUM_TSO;
 			} else {
 				IPRINTK("Xen requires tx checksum offload"
 				    " be enabled to use TSO\n");
 				error = EINVAL;
 			}
 		}
 		if (mask & IFCAP_LRO) {
 			ifp->if_capenable ^= IFCAP_LRO;
 			
 		}
 #endif
 		error = 0;
 		break;
 	case SIOCADDMULTI:
 	case SIOCDELMULTI:
 #ifdef notyet
 		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
 			XN_LOCK(sc);
 			xn_setmulti(sc);
 			XN_UNLOCK(sc);
 			error = 0;
 		}
 #endif
 		/* FALLTHROUGH */
 	case SIOCSIFMEDIA:
 	case SIOCGIFMEDIA:
 		error = ifmedia_ioctl(ifp, ifr, &sc->sc_media, cmd);
 		break;
 	default:
 		error = ether_ioctl(ifp, cmd, data);
 	}
     
 	return (error);
 }
 
 static void
 xn_stop(struct netfront_info *sc)
 {	
 	struct ifnet *ifp;
 
 	XN_LOCK_ASSERT(sc);
     
 	ifp = sc->xn_ifp;
 
 	callout_stop(&sc->xn_stat_ch);
 
 	xn_free_rx_ring(sc);
 	xn_free_tx_ring(sc);
     
 	ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
 	if_link_state_change(ifp, LINK_STATE_DOWN);
 }
 
 /* START of Xenolinux helper functions adapted to FreeBSD */
 int
 network_connect(struct netfront_info *np)
 {
 	int i, requeue_idx, error;
 	grant_ref_t ref;
 	netif_rx_request_t *req;
 	u_int feature_rx_copy, feature_rx_flip;
 
 	error = xs_scanf(XST_NIL, xenbus_get_otherend_path(np->xbdev),
 	    "feature-rx-copy", NULL, "%u", &feature_rx_copy);
 	if (error)
 		feature_rx_copy = 0;
 	error = xs_scanf(XST_NIL, xenbus_get_otherend_path(np->xbdev),
 	    "feature-rx-flip", NULL, "%u", &feature_rx_flip);
 	if (error)
 		feature_rx_flip = 1;
 
 	/*
 	 * Copy packets on receive path if:
 	 *  (a) This was requested by user, and the backend supports it; or
 	 *  (b) Flipping was requested, but this is unsupported by the backend.
 	 */
 	np->copying_receiver = ((MODPARM_rx_copy && feature_rx_copy) ||
 				(MODPARM_rx_flip && !feature_rx_flip));
 
 	/* Recovery procedure: */
 	error = talk_to_backend(np->xbdev, np);
 	if (error) 
 		return (error);
 	
 	/* Step 1: Reinitialise variables. */
 	xn_query_features(np);
 	xn_configure_features(np);
 	netif_release_tx_bufs(np);
 
 	/* Step 2: Rebuild the RX buffer freelist and the RX ring itself. */
 	for (requeue_idx = 0, i = 0; i < NET_RX_RING_SIZE; i++) {
 		struct mbuf *m;
 		u_long pfn;
 
 		if (np->rx_mbufs[i] == NULL)
 			continue;
 
 		m = np->rx_mbufs[requeue_idx] = xennet_get_rx_mbuf(np, i);
 		ref = np->grant_rx_ref[requeue_idx] = xennet_get_rx_ref(np, i);
 
 		req = RING_GET_REQUEST(&np->rx, requeue_idx);
 		pfn = vtophys(mtod(m, vm_offset_t)) >> PAGE_SHIFT;
 
 		if (!np->copying_receiver) {
 			gnttab_grant_foreign_transfer_ref(ref,
 			    xenbus_get_otherend_id(np->xbdev),
 			    pfn);
 		} else {
 			gnttab_grant_foreign_access_ref(ref,
 			    xenbus_get_otherend_id(np->xbdev),
-			    PFNTOMFN(pfn), 0);
+			    pfn, 0);
 		}
 		req->gref = ref;
 		req->id   = requeue_idx;
 
 		requeue_idx++;
 	}
 
 	np->rx.req_prod_pvt = requeue_idx;
 	
 	/* Step 3: All public and private state should now be sane.  Get
 	 * ready to start sending and receiving packets and give the driver
 	 * domain a kick because we've probably just requeued some
 	 * packets.
 	 */
 	netfront_carrier_on(np);
 	xen_intr_signal(np->xen_intr_handle);
 	XN_TX_LOCK(np);
 	xn_txeof(np);
 	XN_TX_UNLOCK(np);
 	network_alloc_rx_buffers(np);
 
 	return (0);
 }
 
 static void
 xn_query_features(struct netfront_info *np)
 {
 	int val;
 
 	device_printf(np->xbdev, "backend features:");
 
 	if (xs_scanf(XST_NIL, xenbus_get_otherend_path(np->xbdev),
 		"feature-sg", NULL, "%d", &val) < 0)
 		val = 0;
 
 	np->maxfrags = 1;
 	if (val) {
 		np->maxfrags = MAX_TX_REQ_FRAGS;
 		printf(" feature-sg");
 	}
 
 	if (xs_scanf(XST_NIL, xenbus_get_otherend_path(np->xbdev),
 		"feature-gso-tcpv4", NULL, "%d", &val) < 0)
 		val = 0;
 
 	np->xn_ifp->if_capabilities &= ~(IFCAP_TSO4|IFCAP_LRO);
 	if (val) {
 		np->xn_ifp->if_capabilities |= IFCAP_TSO4|IFCAP_LRO;
 		printf(" feature-gso-tcp4");
 	}
 
 	printf("\n");
 }
 
 static int
 xn_configure_features(struct netfront_info *np)
 {
 	int err;
 
 	err = 0;
 #if __FreeBSD_version >= 700000 && (defined(INET) || defined(INET6))
 	if ((np->xn_ifp->if_capenable & IFCAP_LRO) != 0)
 		tcp_lro_free(&np->xn_lro);
 #endif
     	np->xn_ifp->if_capenable =
 	    np->xn_ifp->if_capabilities & ~(IFCAP_LRO|IFCAP_TSO4);
 	np->xn_ifp->if_hwassist &= ~CSUM_TSO;
 #if __FreeBSD_version >= 700000 && (defined(INET) || defined(INET6))
 	if (xn_enable_lro && (np->xn_ifp->if_capabilities & IFCAP_LRO) != 0) {
 		err = tcp_lro_init(&np->xn_lro);
 		if (err) {
 			device_printf(np->xbdev, "LRO initialization failed\n");
 		} else {
 			np->xn_lro.ifp = np->xn_ifp;
 			np->xn_ifp->if_capenable |= IFCAP_LRO;
 		}
 	}
 	if ((np->xn_ifp->if_capabilities & IFCAP_TSO4) != 0) {
 		np->xn_ifp->if_capenable |= IFCAP_TSO4;
 		np->xn_ifp->if_hwassist |= CSUM_TSO;
 	}
 #endif
 	return (err);
 }
 
 /**
  * Create a network device.
  * @param dev  Newbus device representing this virtual NIC.
  */
 int 
 create_netdev(device_t dev)
 {
 	int i;
 	struct netfront_info *np;
 	int err;
 	struct ifnet *ifp;
 
 	np = device_get_softc(dev);
 	
 	np->xbdev         = dev;
     
 	XN_LOCK_INIT(np, xennetif);
 
 	ifmedia_init(&np->sc_media, 0, xn_ifmedia_upd, xn_ifmedia_sts);
 	ifmedia_add(&np->sc_media, IFM_ETHER|IFM_MANUAL, 0, NULL);
 	ifmedia_set(&np->sc_media, IFM_ETHER|IFM_MANUAL);
 
 	np->rx_target     = RX_MIN_TARGET;
 	np->rx_min_target = RX_MIN_TARGET;
 	np->rx_max_target = RX_MAX_TARGET;
 
 	/* Initialise {tx,rx}_skbs to be a free chain containing every entry. */
 	for (i = 0; i <= NET_TX_RING_SIZE; i++) {
 		np->tx_mbufs[i] = (void *) ((u_long) i+1);
 		np->grant_tx_ref[i] = GRANT_REF_INVALID;	
 	}
 	np->tx_mbufs[NET_TX_RING_SIZE] = (void *)0;
 
 	for (i = 0; i <= NET_RX_RING_SIZE; i++) {
 
 		np->rx_mbufs[i] = NULL;
 		np->grant_rx_ref[i] = GRANT_REF_INVALID;
 	}
 
 	mbufq_init(&np->xn_rx_batch, INT_MAX);
 
 	/* A grant for every tx ring slot */
 	if (gnttab_alloc_grant_references(NET_TX_RING_SIZE,
 					  &np->gref_tx_head) != 0) {
 		IPRINTK("#### netfront can't alloc tx grant refs\n");
 		err = ENOMEM;
 		goto exit;
 	}
 	/* A grant for every rx ring slot */
 	if (gnttab_alloc_grant_references(RX_MAX_TARGET,
 					  &np->gref_rx_head) != 0) {
 		WPRINTK("#### netfront can't alloc rx grant refs\n");
 		gnttab_free_grant_references(np->gref_tx_head);
 		err = ENOMEM;
 		goto exit;
 	}
 	
 	err = xen_net_read_mac(dev, np->mac);
 	if (err)
 		goto out;
 	
 	/* Set up ifnet structure */
 	ifp = np->xn_ifp = if_alloc(IFT_ETHER);
     	ifp->if_softc = np;
     	if_initname(ifp, "xn",  device_get_unit(dev));
     	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
     	ifp->if_ioctl = xn_ioctl;
     	ifp->if_output = ether_output;
     	ifp->if_start = xn_start;
 #ifdef notyet
     	ifp->if_watchdog = xn_watchdog;
 #endif
     	ifp->if_init = xn_ifinit;
     	ifp->if_snd.ifq_maxlen = NET_TX_RING_SIZE - 1;
 	
     	ifp->if_hwassist = XN_CSUM_FEATURES;
     	ifp->if_capabilities = IFCAP_HWCSUM;
 	ifp->if_hw_tsomax = 65536 - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN);
 	ifp->if_hw_tsomaxsegcount = MAX_TX_REQ_FRAGS;
 	ifp->if_hw_tsomaxsegsize = PAGE_SIZE;
 	
     	ether_ifattach(ifp, np->mac);
     	callout_init(&np->xn_stat_ch, CALLOUT_MPSAFE);
 	netfront_carrier_off(np);
 
 	return (0);
 
 exit:
 	gnttab_free_grant_references(np->gref_tx_head);
 out:
 	return (err);
 }
 
 /**
  * Handle the change of state of the backend to Closing.  We must delete our
  * device-layer structures now, to ensure that writes are flushed through to
  * the backend.  Once is this done, we can switch to Closed in
  * acknowledgement.
  */
 #if 0
 static void
 netfront_closing(device_t dev)
 {
 #if 0
 	struct netfront_info *info = dev->dev_driver_data;
 
 	DPRINTK("netfront_closing: %s removed\n", dev->nodename);
 
 	close_netdev(info);
 #endif
 	xenbus_switch_state(dev, XenbusStateClosed);
 }
 #endif
 
 static int
 netfront_detach(device_t dev)
 {
 	struct netfront_info *info = device_get_softc(dev);
 
 	DPRINTK("%s\n", xenbus_get_node(dev));
 
 	netif_free(info);
 
 	return 0;
 }
 
 static void
 netif_free(struct netfront_info *info)
 {
 	XN_LOCK(info);
 	xn_stop(info);
 	XN_UNLOCK(info);
 	callout_drain(&info->xn_stat_ch);
 	netif_disconnect_backend(info);
 	if (info->xn_ifp != NULL) {
 		ether_ifdetach(info->xn_ifp);
 		if_free(info->xn_ifp);
 		info->xn_ifp = NULL;
 	}
 	ifmedia_removeall(&info->sc_media);
 }
 
 static void
 netif_disconnect_backend(struct netfront_info *info)
 {
 	XN_RX_LOCK(info);
 	XN_TX_LOCK(info);
 	netfront_carrier_off(info);
 	XN_TX_UNLOCK(info);
 	XN_RX_UNLOCK(info);
 
 	free_ring(&info->tx_ring_ref, &info->tx.sring);
 	free_ring(&info->rx_ring_ref, &info->rx.sring);
 
 	xen_intr_unbind(&info->xen_intr_handle);
 }
 
 static void
 free_ring(int *ref, void *ring_ptr_ref)
 {
 	void **ring_ptr_ptr = ring_ptr_ref;
 
 	if (*ref != GRANT_REF_INVALID) {
 		/* This API frees the associated storage. */
 		gnttab_end_foreign_access(*ref, *ring_ptr_ptr);
 		*ref = GRANT_REF_INVALID;
 	}
 	*ring_ptr_ptr = NULL;
 }
 
 static int
 xn_ifmedia_upd(struct ifnet *ifp)
 {
 	return (0);
 }
 
 static void
 xn_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr)
 {
 	ifmr->ifm_status = IFM_AVALID|IFM_ACTIVE;
 	ifmr->ifm_active = IFM_ETHER|IFM_MANUAL;
 }
 
 /* ** Driver registration ** */
 static device_method_t netfront_methods[] = { 
 	/* Device interface */ 
 	DEVMETHOD(device_probe,         netfront_probe), 
 	DEVMETHOD(device_attach,        netfront_attach), 
 	DEVMETHOD(device_detach,        netfront_detach), 
 	DEVMETHOD(device_shutdown,      bus_generic_shutdown), 
 	DEVMETHOD(device_suspend,       netfront_suspend), 
 	DEVMETHOD(device_resume,        netfront_resume), 
  
 	/* Xenbus interface */
 	DEVMETHOD(xenbus_otherend_changed, netfront_backend_changed),
 
 	DEVMETHOD_END
 }; 
 
 static driver_t netfront_driver = { 
 	"xn", 
 	netfront_methods, 
 	sizeof(struct netfront_info),                      
 }; 
 devclass_t netfront_devclass; 
  
 DRIVER_MODULE(xe, xenbusb_front, netfront_driver, netfront_devclass, NULL,
     NULL); 
Index: head/sys/i386/xen/pmap.c
===================================================================
--- head/sys/i386/xen/pmap.c	(revision 282273)
+++ head/sys/i386/xen/pmap.c	(nonexistent)
@@ -1,4420 +0,0 @@
-/*-
- * Copyright (c) 1991 Regents of the University of California.
- * All rights reserved.
- * Copyright (c) 1994 John S. Dyson
- * All rights reserved.
- * Copyright (c) 1994 David Greenman
- * All rights reserved.
- * Copyright (c) 2005 Alan L. Cox <alc@cs.rice.edu>
- * All rights reserved.
- *
- * This code is derived from software contributed to Berkeley by
- * the Systems Programming Group of the University of Utah Computer
- * Science Department and William Jolitz of UUNET Technologies Inc.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- *    must display the following acknowledgement:
- *	This product includes software developed by the University of
- *	California, Berkeley and its contributors.
- * 4. Neither the name of the University nor the names of its contributors
- *    may be used to endorse or promote products derived from this software
- *    without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- *	from:	@(#)pmap.c	7.7 (Berkeley)	5/12/91
- */
-/*-
- * Copyright (c) 2003 Networks Associates Technology, Inc.
- * All rights reserved.
- *
- * This software was developed for the FreeBSD Project by Jake Burkholder,
- * Safeport Network Services, and Network Associates Laboratories, the
- * Security Research Division of Network Associates, Inc. under
- * DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA
- * CHATS research program.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-
-/*
- *	Manages physical address maps.
- *
- *	Since the information managed by this module is
- *	also stored by the logical address mapping module,
- *	this module may throw away valid virtual-to-physical
- *	mappings at almost any time.  However, invalidations
- *	of virtual-to-physical mappings must be done as
- *	requested.
- *
- *	In order to cope with hardware architectures which
- *	make virtual-to-physical map invalidates expensive,
- *	this module may delay invalidate or reduced protection
- *	operations until such time as they are actually
- *	necessary.  This module is given full information as
- *	to which processors are currently using which maps,
- *	and to when physical maps must be made correct.
- */
-
-#include "opt_cpu.h"
-#include "opt_pmap.h"
-#include "opt_smp.h"
-#include "opt_xbox.h"
-
-#include <sys/param.h>
-#include <sys/systm.h>
-#include <sys/kernel.h>
-#include <sys/ktr.h>
-#include <sys/lock.h>
-#include <sys/malloc.h>
-#include <sys/mman.h>
-#include <sys/msgbuf.h>
-#include <sys/mutex.h>
-#include <sys/proc.h>
-#include <sys/rwlock.h>
-#include <sys/sf_buf.h>
-#include <sys/sx.h>
-#include <sys/vmmeter.h>
-#include <sys/sched.h>
-#include <sys/sysctl.h>
-#ifdef SMP
-#include <sys/smp.h>
-#else
-#include <sys/cpuset.h>
-#endif
-
-#include <vm/vm.h>
-#include <vm/vm_param.h>
-#include <vm/vm_kern.h>
-#include <vm/vm_page.h>
-#include <vm/vm_map.h>
-#include <vm/vm_object.h>
-#include <vm/vm_extern.h>
-#include <vm/vm_pageout.h>
-#include <vm/vm_pager.h>
-#include <vm/uma.h>
-
-#include <machine/cpu.h>
-#include <machine/cputypes.h>
-#include <machine/md_var.h>
-#include <machine/pcb.h>
-#include <machine/specialreg.h>
-#ifdef SMP
-#include <machine/smp.h>
-#endif
-
-#ifdef XBOX
-#include <machine/xbox.h>
-#endif
-
-#include <xen/interface/xen.h>
-#include <xen/hypervisor.h>
-#include <machine/xen/hypercall.h>
-#include <machine/xen/xenvar.h>
-#include <machine/xen/xenfunc.h>
-
-#if !defined(CPU_DISABLE_SSE) && defined(I686_CPU)
-#define CPU_ENABLE_SSE
-#endif
-
-#ifndef PMAP_SHPGPERPROC
-#define PMAP_SHPGPERPROC 200
-#endif
-
-#define DIAGNOSTIC
-
-#if !defined(DIAGNOSTIC)
-#ifdef __GNUC_GNU_INLINE__
-#define PMAP_INLINE	__attribute__((__gnu_inline__)) inline
-#else
-#define PMAP_INLINE	extern inline
-#endif
-#else
-#define PMAP_INLINE
-#endif
-
-#ifdef PV_STATS
-#define PV_STAT(x)	do { x ; } while (0)
-#else
-#define PV_STAT(x)	do { } while (0)
-#endif
-
-/*
- * Get PDEs and PTEs for user/kernel address space
- */
-#define	pmap_pde(m, v)	(&((m)->pm_pdir[(vm_offset_t)(v) >> PDRSHIFT]))
-#define pdir_pde(m, v) (m[(vm_offset_t)(v) >> PDRSHIFT])
-
-#define pmap_pde_v(pte)		((*(int *)pte & PG_V) != 0)
-#define pmap_pte_w(pte)		((*(int *)pte & PG_W) != 0)
-#define pmap_pte_m(pte)		((*(int *)pte & PG_M) != 0)
-#define pmap_pte_u(pte)		((*(int *)pte & PG_A) != 0)
-#define pmap_pte_v(pte)		((*(int *)pte & PG_V) != 0)
-
-#define pmap_pte_set_prot(pte, v) ((*(int *)pte &= ~PG_PROT), (*(int *)pte |= (v)))
-
-#define HAMFISTED_LOCKING
-#ifdef HAMFISTED_LOCKING
-static struct mtx createdelete_lock;
-#endif
-
-struct pmap kernel_pmap_store;
-LIST_HEAD(pmaplist, pmap);
-static struct pmaplist allpmaps;
-static struct mtx allpmaps_lock;
-
-vm_offset_t virtual_avail;	/* VA of first avail page (after kernel bss) */
-vm_offset_t virtual_end;	/* VA of last avail page (end of kernel AS) */
-int pgeflag = 0;		/* PG_G or-in */
-int pseflag = 0;		/* PG_PS or-in */
-
-int nkpt;
-vm_offset_t kernel_vm_end;
-extern u_int32_t KERNend;
-
-#ifdef PAE
-pt_entry_t pg_nx;
-#endif
-
-static SYSCTL_NODE(_vm, OID_AUTO, pmap, CTLFLAG_RD, 0, "VM/pmap parameters");
-
-static int pat_works;			/* Is page attribute table sane? */
-
-/*
- * This lock is defined as static in other pmap implementations.  It cannot,
- * however, be defined as static here, because it is (ab)used to serialize
- * queued page table changes in other sources files.
- */
-struct rwlock pvh_global_lock;
-
-/*
- * Data for the pv entry allocation mechanism
- */
-static TAILQ_HEAD(pch, pv_chunk) pv_chunks = TAILQ_HEAD_INITIALIZER(pv_chunks);
-static int pv_entry_count = 0, pv_entry_max = 0, pv_entry_high_water = 0;
-static int shpgperproc = PMAP_SHPGPERPROC;
-
-struct pv_chunk *pv_chunkbase;		/* KVA block for pv_chunks */
-int pv_maxchunks;			/* How many chunks we have KVA for */
-vm_offset_t pv_vafree;			/* freelist stored in the PTE */
-
-/*
- * All those kernel PT submaps that BSD is so fond of
- */
-struct sysmaps {
-	struct	mtx lock;
-	pt_entry_t *CMAP1;
-	pt_entry_t *CMAP2;
-	caddr_t	CADDR1;
-	caddr_t	CADDR2;
-};
-static struct sysmaps sysmaps_pcpu[MAXCPU];
-pt_entry_t *CMAP3;
-caddr_t ptvmmap = 0;
-caddr_t CADDR3;
-struct msgbuf *msgbufp = 0;
-
-/*
- * Crashdump maps.
- */
-static caddr_t crashdumpmap;
-
-static pt_entry_t *PMAP1 = 0, *PMAP2;
-static pt_entry_t *PADDR1 = 0, *PADDR2;
-#ifdef SMP
-static int PMAP1cpu;
-static int PMAP1changedcpu;
-SYSCTL_INT(_debug, OID_AUTO, PMAP1changedcpu, CTLFLAG_RD, 
-	   &PMAP1changedcpu, 0,
-	   "Number of times pmap_pte_quick changed CPU with same PMAP1");
-#endif
-static int PMAP1changed;
-SYSCTL_INT(_debug, OID_AUTO, PMAP1changed, CTLFLAG_RD, 
-	   &PMAP1changed, 0,
-	   "Number of times pmap_pte_quick changed PMAP1");
-static int PMAP1unchanged;
-SYSCTL_INT(_debug, OID_AUTO, PMAP1unchanged, CTLFLAG_RD, 
-	   &PMAP1unchanged, 0,
-	   "Number of times pmap_pte_quick didn't change PMAP1");
-static struct mtx PMAP2mutex;
-
-static void	free_pv_chunk(struct pv_chunk *pc);
-static void	free_pv_entry(pmap_t pmap, pv_entry_t pv);
-static pv_entry_t get_pv_entry(pmap_t pmap, boolean_t try);
-static void	pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va);
-static pv_entry_t pmap_pvh_remove(struct md_page *pvh, pmap_t pmap,
-		    vm_offset_t va);
-
-static vm_page_t pmap_enter_quick_locked(multicall_entry_t **mcl, int *count, pmap_t pmap, vm_offset_t va,
-    vm_page_t m, vm_prot_t prot, vm_page_t mpte);
-static void pmap_flush_page(vm_page_t m);
-static void pmap_kenter_attr(vm_offset_t va, vm_paddr_t pa, int mode);
-static int pmap_remove_pte(pmap_t pmap, pt_entry_t *ptq, vm_offset_t sva,
-    vm_page_t *free);
-static void pmap_remove_page(struct pmap *pmap, vm_offset_t va,
-    vm_page_t *free);
-static void pmap_remove_entry(struct pmap *pmap, vm_page_t m,
-					vm_offset_t va);
-static boolean_t pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va,
-    vm_page_t m);
-
-static vm_page_t pmap_allocpte(pmap_t pmap, vm_offset_t va, u_int flags);
-
-static vm_page_t _pmap_allocpte(pmap_t pmap, u_int ptepindex, u_int flags);
-static void _pmap_unwire_ptp(pmap_t pmap, vm_page_t m, vm_page_t *free);
-static pt_entry_t *pmap_pte_quick(pmap_t pmap, vm_offset_t va);
-static void pmap_pte_release(pt_entry_t *pte);
-static int pmap_unuse_pt(pmap_t, vm_offset_t, vm_page_t *);
-static boolean_t pmap_is_prefaultable_locked(pmap_t pmap, vm_offset_t addr);
-
-static __inline void pagezero(void *page);
-
-CTASSERT(1 << PDESHIFT == sizeof(pd_entry_t));
-CTASSERT(1 << PTESHIFT == sizeof(pt_entry_t));
-
-/*
- * If you get an error here, then you set KVA_PAGES wrong! See the
- * description of KVA_PAGES in sys/i386/include/pmap.h. It must be
- * multiple of 4 for a normal kernel, or a multiple of 8 for a PAE.
- */
-CTASSERT(KERNBASE % (1 << 24) == 0);
-
-void 
-pd_set(struct pmap *pmap, int ptepindex, vm_paddr_t val, int type)
-{
-	vm_paddr_t pdir_ma = vtomach(&pmap->pm_pdir[ptepindex]);
-	
-	switch (type) {
-	case SH_PD_SET_VA:
-#if 0		
-		xen_queue_pt_update(shadow_pdir_ma,
-				    xpmap_ptom(val & ~(PG_RW)));
-#endif		
-		xen_queue_pt_update(pdir_ma,
-				    xpmap_ptom(val)); 	
-		break;
-	case SH_PD_SET_VA_MA:
-#if 0		
-		xen_queue_pt_update(shadow_pdir_ma,
-				    val & ~(PG_RW));
-#endif		
-		xen_queue_pt_update(pdir_ma, val); 	
-		break;
-	case SH_PD_SET_VA_CLEAR:
-#if 0
-		xen_queue_pt_update(shadow_pdir_ma, 0);
-#endif		
-		xen_queue_pt_update(pdir_ma, 0); 	
-		break;
-	}
-}
-
-/*
- *	Bootstrap the system enough to run with virtual memory.
- *
- *	On the i386 this is called after mapping has already been enabled
- *	and just syncs the pmap module with what has already been done.
- *	[We can't call it easily with mapping off since the kernel is not
- *	mapped with PA == VA, hence we would have to relocate every address
- *	from the linked base (virtual) address "KERNBASE" to the actual
- *	(physical) address starting relative to 0]
- */
-void
-pmap_bootstrap(vm_paddr_t firstaddr)
-{
-	vm_offset_t va;
-	pt_entry_t *pte, *unused;
-	struct sysmaps *sysmaps;
-	int i;
-
-	/*
-	 * Initialize the first available kernel virtual address.  However,
-	 * using "firstaddr" may waste a few pages of the kernel virtual
-	 * address space, because locore may not have mapped every physical
-	 * page that it allocated.  Preferably, locore would provide a first
-	 * unused virtual address in addition to "firstaddr".
-	 */
-	virtual_avail = (vm_offset_t) KERNBASE + firstaddr;
-
-	virtual_end = VM_MAX_KERNEL_ADDRESS;
-
-	/*
-	 * Initialize the kernel pmap (which is statically allocated).
-	 */
-	PMAP_LOCK_INIT(kernel_pmap);
-	kernel_pmap->pm_pdir = (pd_entry_t *) (KERNBASE + (u_int)IdlePTD);
-#ifdef PAE
-	kernel_pmap->pm_pdpt = (pdpt_entry_t *) (KERNBASE + (u_int)IdlePDPT);
-#endif
-	CPU_FILL(&kernel_pmap->pm_active);	/* don't allow deactivation */
-	TAILQ_INIT(&kernel_pmap->pm_pvchunk);
-
- 	/*
-	 * Initialize the global pv list lock.
-	 */
-	rw_init_flags(&pvh_global_lock, "pmap pv global", RW_RECURSE);
-
-	LIST_INIT(&allpmaps);
-	mtx_init(&allpmaps_lock, "allpmaps", NULL, MTX_SPIN);
-	mtx_lock_spin(&allpmaps_lock);
-	LIST_INSERT_HEAD(&allpmaps, kernel_pmap, pm_list);
-	mtx_unlock_spin(&allpmaps_lock);
-	if (nkpt == 0)
-		nkpt = NKPT;
-
-	/*
-	 * Reserve some special page table entries/VA space for temporary
-	 * mapping of pages.
-	 */
-#define	SYSMAP(c, p, v, n)	\
-	v = (c)va; va += ((n)*PAGE_SIZE); p = pte; pte += (n);
-
-	va = virtual_avail;
-	pte = vtopte(va);
-
-	/*
-	 * CMAP1/CMAP2 are used for zeroing and copying pages.
-	 * CMAP3 is used for the idle process page zeroing.
-	 */
-	for (i = 0; i < MAXCPU; i++) {
-		sysmaps = &sysmaps_pcpu[i];
-		mtx_init(&sysmaps->lock, "SYSMAPS", NULL, MTX_DEF);
-		SYSMAP(caddr_t, sysmaps->CMAP1, sysmaps->CADDR1, 1)
-		SYSMAP(caddr_t, sysmaps->CMAP2, sysmaps->CADDR2, 1)
-		PT_SET_MA(sysmaps->CADDR1, 0);
-		PT_SET_MA(sysmaps->CADDR2, 0);
-	}
-	SYSMAP(caddr_t, CMAP3, CADDR3, 1)
-	PT_SET_MA(CADDR3, 0);
-
-	/*
-	 * Crashdump maps.
-	 */
-	SYSMAP(caddr_t, unused, crashdumpmap, MAXDUMPPGS)
-
-	/*
-	 * ptvmmap is used for reading arbitrary physical pages via /dev/mem.
-	 */
-	SYSMAP(caddr_t, unused, ptvmmap, 1)
-
-	/*
-	 * msgbufp is used to map the system message buffer.
-	 */
-	SYSMAP(struct msgbuf *, unused, msgbufp, atop(round_page(msgbufsize)))
-
-	/*
-	 * PADDR1 and PADDR2 are used by pmap_pte_quick() and pmap_pte(),
-	 * respectively.
-	 */
-	SYSMAP(pt_entry_t *, PMAP1, PADDR1, 1)
-	SYSMAP(pt_entry_t *, PMAP2, PADDR2, 1)
-
-	mtx_init(&PMAP2mutex, "PMAP2", NULL, MTX_DEF);
-
-	virtual_avail = va;
-
-	/*
-	 * Leave in place an identity mapping (virt == phys) for the low 1 MB
-	 * physical memory region that is used by the ACPI wakeup code.  This
-	 * mapping must not have PG_G set. 
-	 */
-#ifndef XEN
-	/*
-	 * leave here deliberately to show that this is not supported
-	 */
-#ifdef XBOX
-	/* FIXME: This is gross, but needed for the XBOX. Since we are in such
-	 * an early stadium, we cannot yet neatly map video memory ... :-(
-	 * Better fixes are very welcome! */
-	if (!arch_i386_is_xbox)
-#endif
-	for (i = 1; i < NKPT; i++)
-		PTD[i] = 0;
-
-	/* Initialize the PAT MSR if present. */
-	pmap_init_pat();
-
-	/* Turn on PG_G on kernel page(s) */
-	pmap_set_pg();
-#endif
-
-#ifdef HAMFISTED_LOCKING
-	mtx_init(&createdelete_lock, "pmap create/delete", NULL, MTX_DEF);
-#endif
-}
-
-/*
- * Setup the PAT MSR.
- */
-void
-pmap_init_pat(void)
-{
-	uint64_t pat_msr;
-
-	/* Bail if this CPU doesn't implement PAT. */
-	if (!(cpu_feature & CPUID_PAT))
-		return;
-
-	if (cpu_vendor_id != CPU_VENDOR_INTEL ||
-	    (CPUID_TO_FAMILY(cpu_id) == 6 && CPUID_TO_MODEL(cpu_id) >= 0xe)) {
-		/*
-		 * Leave the indices 0-3 at the default of WB, WT, UC, and UC-.
-		 * Program 4 and 5 as WP and WC.
-		 * Leave 6 and 7 as UC and UC-.
-		 */
-		pat_msr = rdmsr(MSR_PAT);
-		pat_msr &= ~(PAT_MASK(4) | PAT_MASK(5));
-		pat_msr |= PAT_VALUE(4, PAT_WRITE_PROTECTED) |
-		    PAT_VALUE(5, PAT_WRITE_COMBINING);
-		pat_works = 1;
-	} else {
-		/*
-		 * Due to some Intel errata, we can only safely use the lower 4
-		 * PAT entries.  Thus, just replace PAT Index 2 with WC instead
-		 * of UC-.
-		 *
-		 *   Intel Pentium III Processor Specification Update
-		 * Errata E.27 (Upper Four PAT Entries Not Usable With Mode B
-		 * or Mode C Paging)
-		 *
-		 *   Intel Pentium IV  Processor Specification Update
-		 * Errata N46 (PAT Index MSB May Be Calculated Incorrectly)
-		 */
-		pat_msr = rdmsr(MSR_PAT);
-		pat_msr &= ~PAT_MASK(2);
-		pat_msr |= PAT_VALUE(2, PAT_WRITE_COMBINING);
-		pat_works = 0;
-	}
-	wrmsr(MSR_PAT, pat_msr);
-}
-
-/*
- * Initialize a vm_page's machine-dependent fields.
- */
-void
-pmap_page_init(vm_page_t m)
-{
-
-	TAILQ_INIT(&m->md.pv_list);
-	m->md.pat_mode = PAT_WRITE_BACK;
-}
-
-/*
- * ABuse the pte nodes for unmapped kva to thread a kva freelist through.
- * Requirements:
- *  - Must deal with pages in order to ensure that none of the PG_* bits
- *    are ever set, PG_V in particular.
- *  - Assumes we can write to ptes without pte_store() atomic ops, even
- *    on PAE systems.  This should be ok.
- *  - Assumes nothing will ever test these addresses for 0 to indicate
- *    no mapping instead of correctly checking PG_V.
- *  - Assumes a vm_offset_t will fit in a pte (true for i386).
- * Because PG_V is never set, there can be no mappings to invalidate.
- */
-static int ptelist_count = 0;
-static vm_offset_t
-pmap_ptelist_alloc(vm_offset_t *head)
-{
-	vm_offset_t va;
-	vm_offset_t *phead = (vm_offset_t *)*head;
-	
-	if (ptelist_count == 0) {
-		printf("out of memory!!!!!!\n");
-		return (0);	/* Out of memory */
-	}
-	ptelist_count--;
-	va = phead[ptelist_count];
-	return (va);
-}
-
-static void
-pmap_ptelist_free(vm_offset_t *head, vm_offset_t va)
-{
-	vm_offset_t *phead = (vm_offset_t *)*head;
-
-	phead[ptelist_count++] = va;
-}
-
-static void
-pmap_ptelist_init(vm_offset_t *head, void *base, int npages)
-{
-	int i, nstackpages;
-	vm_offset_t va;
-	vm_page_t m;
-	
-	nstackpages = (npages + PAGE_SIZE/sizeof(vm_offset_t) - 1)/ (PAGE_SIZE/sizeof(vm_offset_t));
-	for (i = 0; i < nstackpages; i++) {
-		va = (vm_offset_t)base + i * PAGE_SIZE;
-		m = vm_page_alloc(NULL, i,
-		    VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED |
-		    VM_ALLOC_ZERO);
-		pmap_qenter(va, &m, 1);
-	}
-
-	*head = (vm_offset_t)base;
-	for (i = npages - 1; i >= nstackpages; i--) {
-		va = (vm_offset_t)base + i * PAGE_SIZE;
-		pmap_ptelist_free(head, va);
-	}
-}
-
-
-/*
- *	Initialize the pmap module.
- *	Called by vm_init, to initialize any structures that the pmap
- *	system needs to map virtual memory.
- */
-void
-pmap_init(void)
-{
-
-	/*
-	 * Initialize the address space (zone) for the pv entries.  Set a
-	 * high water mark so that the system can recover from excessive
-	 * numbers of pv entries.
-	 */
-	TUNABLE_INT_FETCH("vm.pmap.shpgperproc", &shpgperproc);
-	pv_entry_max = shpgperproc * maxproc + vm_cnt.v_page_count;
-	TUNABLE_INT_FETCH("vm.pmap.pv_entries", &pv_entry_max);
-	pv_entry_max = roundup(pv_entry_max, _NPCPV);
-	pv_entry_high_water = 9 * (pv_entry_max / 10);
-
-	pv_maxchunks = MAX(pv_entry_max / _NPCPV, maxproc);
-	pv_chunkbase = (struct pv_chunk *)kva_alloc(PAGE_SIZE * pv_maxchunks);
-	if (pv_chunkbase == NULL)
-		panic("pmap_init: not enough kvm for pv chunks");
-	pmap_ptelist_init(&pv_vafree, pv_chunkbase, pv_maxchunks);
-}
-
-
-SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_max, CTLFLAG_RD, &pv_entry_max, 0,
-	"Max number of PV entries");
-SYSCTL_INT(_vm_pmap, OID_AUTO, shpgperproc, CTLFLAG_RD, &shpgperproc, 0,
-	"Page share factor per proc");
-
-static SYSCTL_NODE(_vm_pmap, OID_AUTO, pde, CTLFLAG_RD, 0,
-    "2/4MB page mapping counters");
-
-static u_long pmap_pde_mappings;
-SYSCTL_ULONG(_vm_pmap_pde, OID_AUTO, mappings, CTLFLAG_RD,
-    &pmap_pde_mappings, 0, "2/4MB page mappings");
-
-/***************************************************
- * Low level helper routines.....
- ***************************************************/
-
-/*
- * Determine the appropriate bits to set in a PTE or PDE for a specified
- * caching mode.
- */
-int
-pmap_cache_bits(int mode, boolean_t is_pde)
-{
-	int pat_flag, pat_index, cache_bits;
-
-	/* The PAT bit is different for PTE's and PDE's. */
-	pat_flag = is_pde ? PG_PDE_PAT : PG_PTE_PAT;
-
-	/* If we don't support PAT, map extended modes to older ones. */
-	if (!(cpu_feature & CPUID_PAT)) {
-		switch (mode) {
-		case PAT_UNCACHEABLE:
-		case PAT_WRITE_THROUGH:
-		case PAT_WRITE_BACK:
-			break;
-		case PAT_UNCACHED:
-		case PAT_WRITE_COMBINING:
-		case PAT_WRITE_PROTECTED:
-			mode = PAT_UNCACHEABLE;
-			break;
-		}
-	}
-	
-	/* Map the caching mode to a PAT index. */
-	if (pat_works) {
-		switch (mode) {
-			case PAT_UNCACHEABLE:
-				pat_index = 3;
-				break;
-			case PAT_WRITE_THROUGH:
-				pat_index = 1;
-				break;
-			case PAT_WRITE_BACK:
-				pat_index = 0;
-				break;
-			case PAT_UNCACHED:
-				pat_index = 2;
-				break;
-			case PAT_WRITE_COMBINING:
-				pat_index = 5;
-				break;
-			case PAT_WRITE_PROTECTED:
-				pat_index = 4;
-				break;
-			default:
-				panic("Unknown caching mode %d\n", mode);
-		}
-	} else {
-		switch (mode) {
-			case PAT_UNCACHED:
-			case PAT_UNCACHEABLE:
-			case PAT_WRITE_PROTECTED:
-				pat_index = 3;
-				break;
-			case PAT_WRITE_THROUGH:
-				pat_index = 1;
-				break;
-			case PAT_WRITE_BACK:
-				pat_index = 0;
-				break;
-			case PAT_WRITE_COMBINING:
-				pat_index = 2;
-				break;
-			default:
-				panic("Unknown caching mode %d\n", mode);
-		}
-	}	
-
-	/* Map the 3-bit index value into the PAT, PCD, and PWT bits. */
-	cache_bits = 0;
-	if (pat_index & 0x4)
-		cache_bits |= pat_flag;
-	if (pat_index & 0x2)
-		cache_bits |= PG_NC_PCD;
-	if (pat_index & 0x1)
-		cache_bits |= PG_NC_PWT;
-	return (cache_bits);
-}
-#ifdef SMP
-/*
- * For SMP, these functions have to use the IPI mechanism for coherence.
- *
- * N.B.: Before calling any of the following TLB invalidation functions,
- * the calling processor must ensure that all stores updating a non-
- * kernel page table are globally performed.  Otherwise, another
- * processor could cache an old, pre-update entry without being
- * invalidated.  This can happen one of two ways: (1) The pmap becomes
- * active on another processor after its pm_active field is checked by
- * one of the following functions but before a store updating the page
- * table is globally performed. (2) The pmap becomes active on another
- * processor before its pm_active field is checked but due to
- * speculative loads one of the following functions stills reads the
- * pmap as inactive on the other processor.
- * 
- * The kernel page table is exempt because its pm_active field is
- * immutable.  The kernel page table is always active on every
- * processor.
- */
-void
-pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
-{
-	cpuset_t other_cpus;
-	u_int cpuid;
-
-	CTR2(KTR_PMAP, "pmap_invalidate_page: pmap=%p va=0x%x",
-	    pmap, va);
-	
-	sched_pin();
-	if (pmap == kernel_pmap || !CPU_CMP(&pmap->pm_active, &all_cpus)) {
-		invlpg(va);
-		smp_invlpg(va);
-	} else {
-		cpuid = PCPU_GET(cpuid);
-		other_cpus = all_cpus;
-		CPU_CLR(cpuid, &other_cpus);
-		if (CPU_ISSET(cpuid, &pmap->pm_active))
-			invlpg(va);
-		CPU_AND(&other_cpus, &pmap->pm_active);
-		if (!CPU_EMPTY(&other_cpus))
-			smp_masked_invlpg(other_cpus, va);
-	}
-	sched_unpin();
-	PT_UPDATES_FLUSH();
-}
-
-void
-pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
-{
-	cpuset_t other_cpus;
-	vm_offset_t addr;
-	u_int cpuid;
-
-	CTR3(KTR_PMAP, "pmap_invalidate_page: pmap=%p eva=0x%x sva=0x%x",
-	    pmap, sva, eva);
-
-	sched_pin();
-	if (pmap == kernel_pmap || !CPU_CMP(&pmap->pm_active, &all_cpus)) {
-		for (addr = sva; addr < eva; addr += PAGE_SIZE)
-			invlpg(addr);
-		smp_invlpg_range(sva, eva);
-	} else {
-		cpuid = PCPU_GET(cpuid);
-		other_cpus = all_cpus;
-		CPU_CLR(cpuid, &other_cpus);
-		if (CPU_ISSET(cpuid, &pmap->pm_active))
-			for (addr = sva; addr < eva; addr += PAGE_SIZE)
-				invlpg(addr);
-		CPU_AND(&other_cpus, &pmap->pm_active);
-		if (!CPU_EMPTY(&other_cpus))
-			smp_masked_invlpg_range(other_cpus, sva, eva);
-	}
-	sched_unpin();
-	PT_UPDATES_FLUSH();
-}
-
-void
-pmap_invalidate_all(pmap_t pmap)
-{
-	cpuset_t other_cpus;
-	u_int cpuid;
-
-	CTR1(KTR_PMAP, "pmap_invalidate_page: pmap=%p", pmap);
-
-	sched_pin();
-	if (pmap == kernel_pmap || !CPU_CMP(&pmap->pm_active, &all_cpus)) {
-		invltlb();
-		smp_invltlb();
-	} else {
-		cpuid = PCPU_GET(cpuid);
-		other_cpus = all_cpus;
-		CPU_CLR(cpuid, &other_cpus);
-		if (CPU_ISSET(cpuid, &pmap->pm_active))
-			invltlb();
-		CPU_AND(&other_cpus, &pmap->pm_active);
-		if (!CPU_EMPTY(&other_cpus))
-			smp_masked_invltlb(other_cpus);
-	}
-	sched_unpin();
-}
-
-void
-pmap_invalidate_cache(void)
-{
-
-	sched_pin();
-	wbinvd();
-	smp_cache_flush();
-	sched_unpin();
-}
-#else /* !SMP */
-/*
- * Normal, non-SMP, 486+ invalidation functions.
- * We inline these within pmap.c for speed.
- */
-PMAP_INLINE void
-pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
-{
-	CTR2(KTR_PMAP, "pmap_invalidate_page: pmap=%p va=0x%x",
-	    pmap, va);
-
-	if (pmap == kernel_pmap || !CPU_EMPTY(&pmap->pm_active))
-		invlpg(va);
-	PT_UPDATES_FLUSH();
-}
-
-PMAP_INLINE void
-pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
-{
-	vm_offset_t addr;
-
-	if (eva - sva > PAGE_SIZE)
-		CTR3(KTR_PMAP, "pmap_invalidate_range: pmap=%p sva=0x%x eva=0x%x",
-		    pmap, sva, eva);
-
-	if (pmap == kernel_pmap || !CPU_EMPTY(&pmap->pm_active))
-		for (addr = sva; addr < eva; addr += PAGE_SIZE)
-			invlpg(addr);
-	PT_UPDATES_FLUSH();
-}
-
-PMAP_INLINE void
-pmap_invalidate_all(pmap_t pmap)
-{
-
-	CTR1(KTR_PMAP, "pmap_invalidate_all: pmap=%p", pmap);
-	
-	if (pmap == kernel_pmap || !CPU_EMPTY(&pmap->pm_active))
-		invltlb();
-}
-
-PMAP_INLINE void
-pmap_invalidate_cache(void)
-{
-
-	wbinvd();
-}
-#endif /* !SMP */
-
-#define	PMAP_CLFLUSH_THRESHOLD	(2 * 1024 * 1024)
-
-void
-pmap_invalidate_cache_range(vm_offset_t sva, vm_offset_t eva, boolean_t force)
-{
-
-	if (force) {
-		sva &= ~(vm_offset_t)cpu_clflush_line_size;
-	} else {
-		KASSERT((sva & PAGE_MASK) == 0,
-		    ("pmap_invalidate_cache_range: sva not page-aligned"));
-		KASSERT((eva & PAGE_MASK) == 0,
-		    ("pmap_invalidate_cache_range: eva not page-aligned"));
-	}
-
-	if ((cpu_feature & CPUID_SS) != 0 && !force)
-		; /* If "Self Snoop" is supported, do nothing. */
-	else if ((cpu_feature & CPUID_CLFSH) != 0 &&
-	    eva - sva < PMAP_CLFLUSH_THRESHOLD) {
-
-		/*
-		 * Otherwise, do per-cache line flush.  Use the mfence
-		 * instruction to insure that previous stores are
-		 * included in the write-back.  The processor
-		 * propagates flush to other processors in the cache
-		 * coherence domain.
-		 */
-		mfence();
-		for (; sva < eva; sva += cpu_clflush_line_size)
-			clflush(sva);
-		mfence();
-	} else {
-
-		/*
-		 * No targeted cache flush methods are supported by CPU,
-		 * or the supplied range is bigger than 2MB.
-		 * Globally invalidate cache.
-		 */
-		pmap_invalidate_cache();
-	}
-}
-
-void
-pmap_invalidate_cache_pages(vm_page_t *pages, int count)
-{
-	int i;
-
-	if (count >= PMAP_CLFLUSH_THRESHOLD / PAGE_SIZE ||
-	    (cpu_feature & CPUID_CLFSH) == 0) {
-		pmap_invalidate_cache();
-	} else {
-		for (i = 0; i < count; i++)
-			pmap_flush_page(pages[i]);
-	}
-}
-
-/*
- * Are we current address space or kernel?  N.B. We return FALSE when
- * a pmap's page table is in use because a kernel thread is borrowing
- * it.  The borrowed page table can change spontaneously, making any
- * dependence on its continued use subject to a race condition.
- */
-static __inline int
-pmap_is_current(pmap_t pmap)
-{
-
-	return (pmap == kernel_pmap ||
-	    (pmap == vmspace_pmap(curthread->td_proc->p_vmspace) &&
-	    (pmap->pm_pdir[PTDPTDI] & PG_FRAME) == (PTDpde[0] & PG_FRAME)));
-}
-
-/*
- * If the given pmap is not the current or kernel pmap, the returned pte must
- * be released by passing it to pmap_pte_release().
- */
-pt_entry_t *
-pmap_pte(pmap_t pmap, vm_offset_t va)
-{
-	pd_entry_t newpf;
-	pd_entry_t *pde;
-
-	pde = pmap_pde(pmap, va);
-	if (*pde & PG_PS)
-		return (pde);
-	if (*pde != 0) {
-		/* are we current address space or kernel? */
-		if (pmap_is_current(pmap))
-			return (vtopte(va));
-		mtx_lock(&PMAP2mutex);
-		newpf = *pde & PG_FRAME;
-		if ((*PMAP2 & PG_FRAME) != newpf) {
-			PT_SET_MA(PADDR2, newpf | PG_V | PG_A | PG_M);
-			CTR3(KTR_PMAP, "pmap_pte: pmap=%p va=0x%x newpte=0x%08x",
-			    pmap, va, (*PMAP2 & 0xffffffff));
-		}
-		return (PADDR2 + (i386_btop(va) & (NPTEPG - 1)));
-	}
-	return (NULL);
-}
-
-/*
- * Releases a pte that was obtained from pmap_pte().  Be prepared for the pte
- * being NULL.
- */
-static __inline void
-pmap_pte_release(pt_entry_t *pte)
-{
-
-	if ((pt_entry_t *)((vm_offset_t)pte & ~PAGE_MASK) == PADDR2) {
-		CTR1(KTR_PMAP, "pmap_pte_release: pte=0x%jx",
-		    *PMAP2);
-		rw_wlock(&pvh_global_lock);
-		PT_SET_VA(PMAP2, 0, TRUE);
-		rw_wunlock(&pvh_global_lock);
-		mtx_unlock(&PMAP2mutex);
-	}
-}
-
-static __inline void
-invlcaddr(void *caddr)
-{
-
-	invlpg((u_int)caddr);
-	PT_UPDATES_FLUSH();
-}
-
-/*
- * Super fast pmap_pte routine best used when scanning
- * the pv lists.  This eliminates many coarse-grained
- * invltlb calls.  Note that many of the pv list
- * scans are across different pmaps.  It is very wasteful
- * to do an entire invltlb for checking a single mapping.
- *
- * If the given pmap is not the current pmap, pvh_global_lock
- * must be held and curthread pinned to a CPU.
- */
-static pt_entry_t *
-pmap_pte_quick(pmap_t pmap, vm_offset_t va)
-{
-	pd_entry_t newpf;
-	pd_entry_t *pde;
-
-	pde = pmap_pde(pmap, va);
-	if (*pde & PG_PS)
-		return (pde);
-	if (*pde != 0) {
-		/* are we current address space or kernel? */
-		if (pmap_is_current(pmap))
-			return (vtopte(va));
-		rw_assert(&pvh_global_lock, RA_WLOCKED);
-		KASSERT(curthread->td_pinned > 0, ("curthread not pinned"));
-		newpf = *pde & PG_FRAME;
-		if ((*PMAP1 & PG_FRAME) != newpf) {
-			PT_SET_MA(PADDR1, newpf | PG_V | PG_A | PG_M);
-			CTR3(KTR_PMAP, "pmap_pte_quick: pmap=%p va=0x%x newpte=0x%08x",
-			    pmap, va, (u_long)*PMAP1);
-			
-#ifdef SMP
-			PMAP1cpu = PCPU_GET(cpuid);
-#endif
-			PMAP1changed++;
-		} else
-#ifdef SMP
-		if (PMAP1cpu != PCPU_GET(cpuid)) {
-			PMAP1cpu = PCPU_GET(cpuid);
-			invlcaddr(PADDR1);
-			PMAP1changedcpu++;
-		} else
-#endif
-			PMAP1unchanged++;
-		return (PADDR1 + (i386_btop(va) & (NPTEPG - 1)));
-	}
-	return (0);
-}
-
-/*
- *	Routine:	pmap_extract
- *	Function:
- *		Extract the physical page address associated
- *		with the given map/virtual_address pair.
- */
-vm_paddr_t 
-pmap_extract(pmap_t pmap, vm_offset_t va)
-{
-	vm_paddr_t rtval;
-	pt_entry_t *pte;
-	pd_entry_t pde;
-	pt_entry_t pteval;
-
-	rtval = 0;
-	PMAP_LOCK(pmap);
-	pde = pmap->pm_pdir[va >> PDRSHIFT];
-	if (pde != 0) {
-		if ((pde & PG_PS) != 0) {
-			rtval = xpmap_mtop(pde & PG_PS_FRAME) | (va & PDRMASK);
-			PMAP_UNLOCK(pmap);
-			return rtval;
-		}
-		pte = pmap_pte(pmap, va);
-		pteval = *pte ? xpmap_mtop(*pte) : 0;
-		rtval = (pteval & PG_FRAME) | (va & PAGE_MASK);
-		pmap_pte_release(pte);
-	}
-	PMAP_UNLOCK(pmap);
-	return (rtval);
-}
-
-/*
- *	Routine:	pmap_extract_ma
- *	Function:
- *		Like pmap_extract, but returns machine address
- */
-vm_paddr_t 
-pmap_extract_ma(pmap_t pmap, vm_offset_t va)
-{
-	vm_paddr_t rtval;
-	pt_entry_t *pte;
-	pd_entry_t pde;
-
-	rtval = 0;
-	PMAP_LOCK(pmap);
-	pde = pmap->pm_pdir[va >> PDRSHIFT];
-	if (pde != 0) {
-		if ((pde & PG_PS) != 0) {
-			rtval = (pde & ~PDRMASK) | (va & PDRMASK);
-			PMAP_UNLOCK(pmap);
-			return rtval;
-		}
-		pte = pmap_pte(pmap, va);
-		rtval = (*pte & PG_FRAME) | (va & PAGE_MASK);
-		pmap_pte_release(pte);
-	}
-	PMAP_UNLOCK(pmap);
-	return (rtval);
-}
-
-/*
- *	Routine:	pmap_extract_and_hold
- *	Function:
- *		Atomically extract and hold the physical page
- *		with the given pmap and virtual address pair
- *		if that mapping permits the given protection.
- */
-vm_page_t
-pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot)
-{
-	pd_entry_t pde;
-	pt_entry_t pte, *ptep;
-	vm_page_t m;
-	vm_paddr_t pa;
-
-	pa = 0;
-	m = NULL;
-	PMAP_LOCK(pmap);
-retry:
-	pde = PT_GET(pmap_pde(pmap, va));
-	if (pde != 0) {
-		if (pde & PG_PS) {
-			if ((pde & PG_RW) || (prot & VM_PROT_WRITE) == 0) {
-				if (vm_page_pa_tryrelock(pmap, (pde &
-				    PG_PS_FRAME) | (va & PDRMASK), &pa))
-					goto retry;
-				m = PHYS_TO_VM_PAGE((pde & PG_PS_FRAME) |
-				    (va & PDRMASK));
-				vm_page_hold(m);
-			}
-		} else {
-			ptep = pmap_pte(pmap, va);
-			pte = PT_GET(ptep);
-			pmap_pte_release(ptep);
-			if (pte != 0 &&
-			    ((pte & PG_RW) || (prot & VM_PROT_WRITE) == 0)) {
-				if (vm_page_pa_tryrelock(pmap, pte & PG_FRAME,
-				    &pa))
-					goto retry;
-				m = PHYS_TO_VM_PAGE(pte & PG_FRAME);
-				vm_page_hold(m);
-			}
-		}
-	}
-	PA_UNLOCK_COND(pa);
-	PMAP_UNLOCK(pmap);
-	return (m);
-}
-
-/***************************************************
- * Low level mapping routines.....
- ***************************************************/
-
-/*
- * Add a wired page to the kva.
- * Note: not SMP coherent.
- *
- * This function may be used before pmap_bootstrap() is called.
- */
-void 
-pmap_kenter(vm_offset_t va, vm_paddr_t pa)
-{
-
-	PT_SET_MA(va, xpmap_ptom(pa)| PG_RW | PG_V | pgeflag);
-}
-
-void 
-pmap_kenter_ma(vm_offset_t va, vm_paddr_t ma)
-{
-	pt_entry_t *pte;
-
-	pte = vtopte(va);
-	pte_store_ma(pte, ma | PG_RW | PG_V | pgeflag);
-}
-
-static __inline void
-pmap_kenter_attr(vm_offset_t va, vm_paddr_t pa, int mode)
-{
-
-	PT_SET_MA(va, pa | PG_RW | PG_V | pgeflag | pmap_cache_bits(mode, 0));
-}
-
-/*
- * Remove a page from the kernel pagetables.
- * Note: not SMP coherent.
- *
- * This function may be used before pmap_bootstrap() is called.
- */
-PMAP_INLINE void
-pmap_kremove(vm_offset_t va)
-{
-	pt_entry_t *pte;
-
-	pte = vtopte(va);
-	PT_CLEAR_VA(pte, FALSE);
-}
-
-/*
- *	Used to map a range of physical addresses into kernel
- *	virtual address space.
- *
- *	The value passed in '*virt' is a suggested virtual address for
- *	the mapping. Architectures which can support a direct-mapped
- *	physical to virtual region can return the appropriate address
- *	within that region, leaving '*virt' unchanged. Other
- *	architectures should map the pages starting at '*virt' and
- *	update '*virt' with the first usable address after the mapped
- *	region.
- */
-vm_offset_t
-pmap_map(vm_offset_t *virt, vm_paddr_t start, vm_paddr_t end, int prot)
-{
-	vm_offset_t va, sva;
-
-	va = sva = *virt;
-	CTR4(KTR_PMAP, "pmap_map: va=0x%x start=0x%jx end=0x%jx prot=0x%x",
-	    va, start, end, prot);
-	while (start < end) {
-		pmap_kenter(va, start);
-		va += PAGE_SIZE;
-		start += PAGE_SIZE;
-	}
-	pmap_invalidate_range(kernel_pmap, sva, va);
-	*virt = va;
-	return (sva);
-}
-
-
-/*
- * Add a list of wired pages to the kva
- * this routine is only used for temporary
- * kernel mappings that do not need to have
- * page modification or references recorded.
- * Note that old mappings are simply written
- * over.  The page *must* be wired.
- * Note: SMP coherent.  Uses a ranged shootdown IPI.
- */
-void
-pmap_qenter(vm_offset_t sva, vm_page_t *ma, int count)
-{
-	pt_entry_t *endpte, *pte;
-	vm_paddr_t pa;
-	vm_offset_t va = sva;
-	int mclcount = 0;
-	multicall_entry_t mcl[16];
-	multicall_entry_t *mclp = mcl;
-	int error;
-
-	CTR2(KTR_PMAP, "pmap_qenter:sva=0x%x count=%d", va, count);
-	pte = vtopte(sva);
-	endpte = pte + count;
-	while (pte < endpte) {
-		pa = VM_PAGE_TO_MACH(*ma) | pgeflag | PG_RW | PG_V | PG_M | PG_A;
-
-		mclp->op = __HYPERVISOR_update_va_mapping;
-		mclp->args[0] = va;
-		mclp->args[1] = (uint32_t)(pa & 0xffffffff);
-		mclp->args[2] = (uint32_t)(pa >> 32);
-		mclp->args[3] = (*pte & PG_V) ? UVMF_INVLPG|UVMF_ALL : 0;
-	
-		va += PAGE_SIZE;
-		pte++;
-		ma++;
-		mclp++;
-		mclcount++;
-		if (mclcount == 16) {
-			error = HYPERVISOR_multicall(mcl, mclcount);
-			mclp = mcl;
-			mclcount = 0;
-			KASSERT(error == 0, ("bad multicall %d", error));
-		}		
-	}
-	if (mclcount) {
-		error = HYPERVISOR_multicall(mcl, mclcount);
-		KASSERT(error == 0, ("bad multicall %d", error));
-	}
-	
-#ifdef INVARIANTS
-	for (pte = vtopte(sva), mclcount = 0; mclcount < count; mclcount++, pte++)
-		KASSERT(*pte, ("pte not set for va=0x%x", sva + mclcount*PAGE_SIZE));
-#endif	
-}
-
-/*
- * This routine tears out page mappings from the
- * kernel -- it is meant only for temporary mappings.
- * Note: SMP coherent.  Uses a ranged shootdown IPI.
- */
-void
-pmap_qremove(vm_offset_t sva, int count)
-{
-	vm_offset_t va;
-
-	CTR2(KTR_PMAP, "pmap_qremove: sva=0x%x count=%d", sva, count);
-	va = sva;
-	rw_wlock(&pvh_global_lock);
-	critical_enter();
-	while (count-- > 0) {
-		pmap_kremove(va);
-		va += PAGE_SIZE;
-	}
-	PT_UPDATES_FLUSH();
-	pmap_invalidate_range(kernel_pmap, sva, va);
-	critical_exit();
-	rw_wunlock(&pvh_global_lock);
-}
-
-/***************************************************
- * Page table page management routines.....
- ***************************************************/
-static __inline void
-pmap_free_zero_pages(vm_page_t free)
-{
-	vm_page_t m;
-
-	while (free != NULL) {
-		m = free;
-		free = (void *)m->object;
-		m->object = NULL;
-		vm_page_free_zero(m);
-	}
-}
-
-/*
- * Decrements a page table page's wire count, which is used to record the
- * number of valid page table entries within the page.  If the wire count
- * drops to zero, then the page table page is unmapped.  Returns TRUE if the
- * page table page was unmapped and FALSE otherwise.
- */
-static inline boolean_t
-pmap_unwire_ptp(pmap_t pmap, vm_page_t m, vm_page_t *free)
-{
-
-	--m->wire_count;
-	if (m->wire_count == 0) {
-		_pmap_unwire_ptp(pmap, m, free);
-		return (TRUE);
-	} else
-		return (FALSE);
-}
-
-static void
-_pmap_unwire_ptp(pmap_t pmap, vm_page_t m, vm_page_t *free)
-{
-	vm_offset_t pteva;
-
-	PT_UPDATES_FLUSH();
-	/*
-	 * unmap the page table page
-	 */
-	xen_pt_unpin(pmap->pm_pdir[m->pindex]);
-	/*
-	 * page *might* contain residual mapping :-/  
-	 */
-	PD_CLEAR_VA(pmap, m->pindex, TRUE);
-	pmap_zero_page(m);
-	--pmap->pm_stats.resident_count;
-
-	/*
-	 * This is a release store so that the ordinary store unmapping
-	 * the page table page is globally performed before TLB shoot-
-	 * down is begun.
-	 */
-	atomic_subtract_rel_int(&vm_cnt.v_wire_count, 1);
-
-	/*
-	 * Do an invltlb to make the invalidated mapping
-	 * take effect immediately.
-	 */
-	pteva = VM_MAXUSER_ADDRESS + i386_ptob(m->pindex);
-	pmap_invalidate_page(pmap, pteva);
-
-	/* 
-	 * Put page on a list so that it is released after
-	 * *ALL* TLB shootdown is done
-	 */
-	m->object = (void *)*free;
-	*free = m;
-}
-
-/*
- * After removing a page table entry, this routine is used to
- * conditionally free the page, and manage the hold/wire counts.
- */
-static int
-pmap_unuse_pt(pmap_t pmap, vm_offset_t va, vm_page_t *free)
-{
-	pd_entry_t ptepde;
-	vm_page_t mpte;
-
-	if (va >= VM_MAXUSER_ADDRESS)
-		return (0);
-	ptepde = PT_GET(pmap_pde(pmap, va));
-	mpte = PHYS_TO_VM_PAGE(ptepde & PG_FRAME);
-	return (pmap_unwire_ptp(pmap, mpte, free));
-}
-
-/*
- * Initialize the pmap for the swapper process.
- */
-void
-pmap_pinit0(pmap_t pmap)
-{
-
-	PMAP_LOCK_INIT(pmap);
-	/*
-	 * Since the page table directory is shared with the kernel pmap,
-	 * which is already included in the list "allpmaps", this pmap does
-	 * not need to be inserted into that list.
-	 */
-	pmap->pm_pdir = (pd_entry_t *)(KERNBASE + (vm_offset_t)IdlePTD);
-#ifdef PAE
-	pmap->pm_pdpt = (pdpt_entry_t *)(KERNBASE + (vm_offset_t)IdlePDPT);
-#endif
-	CPU_ZERO(&pmap->pm_active);
-	PCPU_SET(curpmap, pmap);
-	TAILQ_INIT(&pmap->pm_pvchunk);
-	bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
-}
-
-/*
- * Initialize a preallocated and zeroed pmap structure,
- * such as one in a vmspace structure.
- */
-int
-pmap_pinit(pmap_t pmap)
-{
-	vm_page_t m, ptdpg[NPGPTD + 1];
-	int npgptd = NPGPTD + 1;
-	int i;
-
-#ifdef HAMFISTED_LOCKING
-	mtx_lock(&createdelete_lock);
-#endif
-
-	/*
-	 * No need to allocate page table space yet but we do need a valid
-	 * page directory table.
-	 */
-	if (pmap->pm_pdir == NULL) {
-		pmap->pm_pdir = (pd_entry_t *)kva_alloc(NBPTD);
-		if (pmap->pm_pdir == NULL) {
-#ifdef HAMFISTED_LOCKING
-			mtx_unlock(&createdelete_lock);
-#endif
-			return (0);
-		}
-#ifdef PAE
-		pmap->pm_pdpt = (pd_entry_t *)kva_alloc(1);
-#endif
-	}
-
-	/*
-	 * allocate the page directory page(s)
-	 */
-	for (i = 0; i < npgptd;) {
-		m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ |
-		    VM_ALLOC_WIRED | VM_ALLOC_ZERO);
-		if (m == NULL)
-			VM_WAIT;
-		else {
-			ptdpg[i++] = m;
-		}
-	}
-
-	pmap_qenter((vm_offset_t)pmap->pm_pdir, ptdpg, NPGPTD);
-
-	for (i = 0; i < NPGPTD; i++)
-		if ((ptdpg[i]->flags & PG_ZERO) == 0)
-			pagezero(pmap->pm_pdir + (i * NPDEPG));
-
-	mtx_lock_spin(&allpmaps_lock);
-	LIST_INSERT_HEAD(&allpmaps, pmap, pm_list);
-	/* Copy the kernel page table directory entries. */
-	bcopy(PTD + KPTDI, pmap->pm_pdir + KPTDI, nkpt * sizeof(pd_entry_t));
-	mtx_unlock_spin(&allpmaps_lock);
-
-#ifdef PAE
-	pmap_qenter((vm_offset_t)pmap->pm_pdpt, &ptdpg[NPGPTD], 1);
-	if ((ptdpg[NPGPTD]->flags & PG_ZERO) == 0)
-		bzero(pmap->pm_pdpt, PAGE_SIZE);
-	for (i = 0; i < NPGPTD; i++) {
-		vm_paddr_t ma;
-		
-		ma = VM_PAGE_TO_MACH(ptdpg[i]);
-		pmap->pm_pdpt[i] = ma | PG_V;
-
-	}
-#endif	
-	for (i = 0; i < NPGPTD; i++) {
-		pt_entry_t *pd;
-		vm_paddr_t ma;
-		
-		ma = VM_PAGE_TO_MACH(ptdpg[i]);
-		pd = pmap->pm_pdir + (i * NPDEPG);
-		PT_SET_MA(pd, *vtopte((vm_offset_t)pd) & ~(PG_M|PG_A|PG_U|PG_RW));
-#if 0		
-		xen_pgd_pin(ma);
-#endif		
-	}
-	
-#ifdef PAE	
-	PT_SET_MA(pmap->pm_pdpt, *vtopte((vm_offset_t)pmap->pm_pdpt) & ~PG_RW);
-#endif
-	rw_wlock(&pvh_global_lock);
-	xen_flush_queue();
-	xen_pgdpt_pin(VM_PAGE_TO_MACH(ptdpg[NPGPTD]));
-	for (i = 0; i < NPGPTD; i++) {
-		vm_paddr_t ma = VM_PAGE_TO_MACH(ptdpg[i]);
-		PT_SET_VA_MA(&pmap->pm_pdir[PTDPTDI + i], ma | PG_V | PG_A, FALSE);
-	}
-	xen_flush_queue();
-	rw_wunlock(&pvh_global_lock);
-	CPU_ZERO(&pmap->pm_active);
-	TAILQ_INIT(&pmap->pm_pvchunk);
-	bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
-
-#ifdef HAMFISTED_LOCKING
-	mtx_unlock(&createdelete_lock);
-#endif
-	return (1);
-}
-
-/*
- * this routine is called if the page table page is not
- * mapped correctly.
- */
-static vm_page_t
-_pmap_allocpte(pmap_t pmap, u_int ptepindex, u_int flags)
-{
-	vm_paddr_t ptema;
-	vm_page_t m;
-
-	/*
-	 * Allocate a page table page.
-	 */
-	if ((m = vm_page_alloc(NULL, ptepindex, VM_ALLOC_NOOBJ |
-	    VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL) {
-		if ((flags & PMAP_ENTER_NOSLEEP) == 0) {
-			PMAP_UNLOCK(pmap);
-			rw_wunlock(&pvh_global_lock);
-			VM_WAIT;
-			rw_wlock(&pvh_global_lock);
-			PMAP_LOCK(pmap);
-		}
-
-		/*
-		 * Indicate the need to retry.  While waiting, the page table
-		 * page may have been allocated.
-		 */
-		return (NULL);
-	}
-	if ((m->flags & PG_ZERO) == 0)
-		pmap_zero_page(m);
-
-	/*
-	 * Map the pagetable page into the process address space, if
-	 * it isn't already there.
-	 */
-
-	pmap->pm_stats.resident_count++;
-
-	ptema = VM_PAGE_TO_MACH(m);
-	xen_pt_pin(ptema);
-	PT_SET_VA_MA(&pmap->pm_pdir[ptepindex],
-		(ptema | PG_U | PG_RW | PG_V | PG_A | PG_M), TRUE);
-	
-	KASSERT(pmap->pm_pdir[ptepindex],
-	    ("_pmap_allocpte: ptepindex=%d did not get mapped", ptepindex));
-	return (m);
-}
-
-static vm_page_t
-pmap_allocpte(pmap_t pmap, vm_offset_t va, u_int flags)
-{
-	u_int ptepindex;
-	pd_entry_t ptema;
-	vm_page_t m;
-
-	/*
-	 * Calculate pagetable page index
-	 */
-	ptepindex = va >> PDRSHIFT;
-retry:
-	/*
-	 * Get the page directory entry
-	 */
-	ptema = pmap->pm_pdir[ptepindex];
-
-	/*
-	 * This supports switching from a 4MB page to a
-	 * normal 4K page.
-	 */
-	if (ptema & PG_PS) {
-		/*
-		 * XXX 
-		 */
-		pmap->pm_pdir[ptepindex] = 0;
-		ptema = 0;
-		pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE;
-		pmap_invalidate_all(kernel_pmap);
-	}
-
-	/*
-	 * If the page table page is mapped, we just increment the
-	 * hold count, and activate it.
-	 */
-	if (ptema & PG_V) {
-		m = PHYS_TO_VM_PAGE(xpmap_mtop(ptema) & PG_FRAME);
-		m->wire_count++;
-	} else {
-		/*
-		 * Here if the pte page isn't mapped, or if it has
-		 * been deallocated. 
-		 */
-		CTR3(KTR_PMAP, "pmap_allocpte: pmap=%p va=0x%08x flags=0x%x",
-		    pmap, va, flags);
-		m = _pmap_allocpte(pmap, ptepindex, flags);
-		if (m == NULL && (flags & PMAP_ENTER_NOSLEEP) == 0)
-			goto retry;
-
-		KASSERT(pmap->pm_pdir[ptepindex], ("ptepindex=%d did not get mapped", ptepindex));
-	}
-	return (m);
-}
-
-
-/***************************************************
-* Pmap allocation/deallocation routines.
- ***************************************************/
-
-
-/*
- * Release any resources held by the given physical map.
- * Called when a pmap initialized by pmap_pinit is being released.
- * Should only be called if the map contains no valid mappings.
- */
-void
-pmap_release(pmap_t pmap)
-{
-	vm_page_t m, ptdpg[2*NPGPTD+1];
-	vm_paddr_t ma;
-	int i;
-#ifdef PAE	
-	int npgptd = NPGPTD + 1;
-#else
-	int npgptd = NPGPTD;
-#endif
-
-	KASSERT(pmap->pm_stats.resident_count == 0,
-	    ("pmap_release: pmap resident count %ld != 0",
-	    pmap->pm_stats.resident_count));
-	PT_UPDATES_FLUSH();
-
-#ifdef HAMFISTED_LOCKING
-	mtx_lock(&createdelete_lock);
-#endif
-
-	KASSERT(CPU_EMPTY(&pmap->pm_active),
-	    ("releasing active pmap %p", pmap));
-	mtx_lock_spin(&allpmaps_lock);
-	LIST_REMOVE(pmap, pm_list);
-	mtx_unlock_spin(&allpmaps_lock);
-
-	for (i = 0; i < NPGPTD; i++)
-		ptdpg[i] = PHYS_TO_VM_PAGE(vtophys(pmap->pm_pdir + (i*NPDEPG)) & PG_FRAME);
-	pmap_qremove((vm_offset_t)pmap->pm_pdir, NPGPTD);
-#ifdef PAE
-	ptdpg[NPGPTD] = PHYS_TO_VM_PAGE(vtophys(pmap->pm_pdpt));
-#endif	
-
-	for (i = 0; i < npgptd; i++) {
-		m = ptdpg[i];
-		ma = VM_PAGE_TO_MACH(m);
-		/* unpinning L1 and L2 treated the same */
-#if 0
-                xen_pgd_unpin(ma);
-#else
-		if (i == NPGPTD)
-	                xen_pgd_unpin(ma);
-#endif
-#ifdef PAE
-		if (i < NPGPTD)
-			KASSERT(VM_PAGE_TO_MACH(m) == (pmap->pm_pdpt[i] & PG_FRAME),
-			    ("pmap_release: got wrong ptd page"));
-#endif
-		m->wire_count--;
-		atomic_subtract_int(&vm_cnt.v_wire_count, 1);
-		vm_page_free(m);
-	}
-#ifdef PAE
-	pmap_qremove((vm_offset_t)pmap->pm_pdpt, 1);
-#endif
-
-#ifdef HAMFISTED_LOCKING
-	mtx_unlock(&createdelete_lock);
-#endif
-}
-
-static int
-kvm_size(SYSCTL_HANDLER_ARGS)
-{
-	unsigned long ksize = VM_MAX_KERNEL_ADDRESS - KERNBASE;
-
-	return (sysctl_handle_long(oidp, &ksize, 0, req));
-}
-SYSCTL_PROC(_vm, OID_AUTO, kvm_size, CTLTYPE_LONG|CTLFLAG_RD, 
-    0, 0, kvm_size, "IU", "Size of KVM");
-
-static int
-kvm_free(SYSCTL_HANDLER_ARGS)
-{
-	unsigned long kfree = VM_MAX_KERNEL_ADDRESS - kernel_vm_end;
-
-	return (sysctl_handle_long(oidp, &kfree, 0, req));
-}
-SYSCTL_PROC(_vm, OID_AUTO, kvm_free, CTLTYPE_LONG|CTLFLAG_RD, 
-    0, 0, kvm_free, "IU", "Amount of KVM free");
-
-/*
- * grow the number of kernel page table entries, if needed
- */
-void
-pmap_growkernel(vm_offset_t addr)
-{
-	struct pmap *pmap;
-	vm_paddr_t ptppaddr;
-	vm_page_t nkpg;
-	pd_entry_t newpdir;
-
-	mtx_assert(&kernel_map->system_mtx, MA_OWNED);
-	if (kernel_vm_end == 0) {
-		kernel_vm_end = KERNBASE;
-		nkpt = 0;
-		while (pdir_pde(PTD, kernel_vm_end)) {
-			kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1);
-			nkpt++;
-			if (kernel_vm_end - 1 >= kernel_map->max_offset) {
-				kernel_vm_end = kernel_map->max_offset;
-				break;
-			}
-		}
-	}
-	addr = roundup2(addr, NBPDR);
-	if (addr - 1 >= kernel_map->max_offset)
-		addr = kernel_map->max_offset;
-	while (kernel_vm_end < addr) {
-		if (pdir_pde(PTD, kernel_vm_end)) {
-			kernel_vm_end = (kernel_vm_end + NBPDR) & ~PDRMASK;
-			if (kernel_vm_end - 1 >= kernel_map->max_offset) {
-				kernel_vm_end = kernel_map->max_offset;
-				break;
-			}
-			continue;
-		}
-
-		nkpg = vm_page_alloc(NULL, kernel_vm_end >> PDRSHIFT,
-		    VM_ALLOC_INTERRUPT | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED |
-		    VM_ALLOC_ZERO);
-		if (nkpg == NULL)
-			panic("pmap_growkernel: no memory to grow kernel");
-
-		nkpt++;
-
-		if ((nkpg->flags & PG_ZERO) == 0)
-			pmap_zero_page(nkpg);
-		ptppaddr = VM_PAGE_TO_PHYS(nkpg);
-		newpdir = (pd_entry_t) (ptppaddr | PG_V | PG_RW | PG_A | PG_M);
-		rw_wlock(&pvh_global_lock);
-		PD_SET_VA(kernel_pmap, (kernel_vm_end >> PDRSHIFT), newpdir, TRUE);
-		mtx_lock_spin(&allpmaps_lock);
-		LIST_FOREACH(pmap, &allpmaps, pm_list)
-			PD_SET_VA(pmap, (kernel_vm_end >> PDRSHIFT), newpdir, TRUE);
-
-		mtx_unlock_spin(&allpmaps_lock);
-		rw_wunlock(&pvh_global_lock);
-
-		kernel_vm_end = (kernel_vm_end + NBPDR) & ~PDRMASK;
-		if (kernel_vm_end - 1 >= kernel_map->max_offset) {
-			kernel_vm_end = kernel_map->max_offset;
-			break;
-		}
-	}
-}
-
-
-/***************************************************
- * page management routines.
- ***************************************************/
-
-CTASSERT(sizeof(struct pv_chunk) == PAGE_SIZE);
-CTASSERT(_NPCM == 11);
-CTASSERT(_NPCPV == 336);
-
-static __inline struct pv_chunk *
-pv_to_chunk(pv_entry_t pv)
-{
-
-	return ((struct pv_chunk *)((uintptr_t)pv & ~(uintptr_t)PAGE_MASK));
-}
-
-#define PV_PMAP(pv) (pv_to_chunk(pv)->pc_pmap)
-
-#define	PC_FREE0_9	0xfffffffful	/* Free values for index 0 through 9 */
-#define	PC_FREE10	0x0000fffful	/* Free values for index 10 */
-
-static const uint32_t pc_freemask[_NPCM] = {
-	PC_FREE0_9, PC_FREE0_9, PC_FREE0_9,
-	PC_FREE0_9, PC_FREE0_9, PC_FREE0_9,
-	PC_FREE0_9, PC_FREE0_9, PC_FREE0_9,
-	PC_FREE0_9, PC_FREE10
-};
-
-SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_count, CTLFLAG_RD, &pv_entry_count, 0,
-	"Current number of pv entries");
-
-#ifdef PV_STATS
-static int pc_chunk_count, pc_chunk_allocs, pc_chunk_frees, pc_chunk_tryfail;
-
-SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_count, CTLFLAG_RD, &pc_chunk_count, 0,
-	"Current number of pv entry chunks");
-SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_allocs, CTLFLAG_RD, &pc_chunk_allocs, 0,
-	"Current number of pv entry chunks allocated");
-SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_frees, CTLFLAG_RD, &pc_chunk_frees, 0,
-	"Current number of pv entry chunks frees");
-SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_tryfail, CTLFLAG_RD, &pc_chunk_tryfail, 0,
-	"Number of times tried to get a chunk page but failed.");
-
-static long pv_entry_frees, pv_entry_allocs;
-static int pv_entry_spare;
-
-SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_frees, CTLFLAG_RD, &pv_entry_frees, 0,
-	"Current number of pv entry frees");
-SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_allocs, CTLFLAG_RD, &pv_entry_allocs, 0,
-	"Current number of pv entry allocs");
-SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_spare, CTLFLAG_RD, &pv_entry_spare, 0,
-	"Current number of spare pv entries");
-#endif
-
-/*
- * We are in a serious low memory condition.  Resort to
- * drastic measures to free some pages so we can allocate
- * another pv entry chunk.
- */
-static vm_page_t
-pmap_pv_reclaim(pmap_t locked_pmap)
-{
-	struct pch newtail;
-	struct pv_chunk *pc;
-	pmap_t pmap;
-	pt_entry_t *pte, tpte;
-	pv_entry_t pv;
-	vm_offset_t va;
-	vm_page_t free, m, m_pc;
-	uint32_t inuse;
-	int bit, field, freed;
-
-	PMAP_LOCK_ASSERT(locked_pmap, MA_OWNED);
-	pmap = NULL;
-	free = m_pc = NULL;
-	TAILQ_INIT(&newtail);
-	while ((pc = TAILQ_FIRST(&pv_chunks)) != NULL && (pv_vafree == 0 ||
-	    free == NULL)) {
-		TAILQ_REMOVE(&pv_chunks, pc, pc_lru);
-		if (pmap != pc->pc_pmap) {
-			if (pmap != NULL) {
-				pmap_invalidate_all(pmap);
-				if (pmap != locked_pmap)
-					PMAP_UNLOCK(pmap);
-			}
-			pmap = pc->pc_pmap;
-			/* Avoid deadlock and lock recursion. */
-			if (pmap > locked_pmap)
-				PMAP_LOCK(pmap);
-			else if (pmap != locked_pmap && !PMAP_TRYLOCK(pmap)) {
-				pmap = NULL;
-				TAILQ_INSERT_TAIL(&newtail, pc, pc_lru);
-				continue;
-			}
-		}
-
-		/*
-		 * Destroy every non-wired, 4 KB page mapping in the chunk.
-		 */
-		freed = 0;
-		for (field = 0; field < _NPCM; field++) {
-			for (inuse = ~pc->pc_map[field] & pc_freemask[field];
-			    inuse != 0; inuse &= ~(1UL << bit)) {
-				bit = bsfl(inuse);
-				pv = &pc->pc_pventry[field * 32 + bit];
-				va = pv->pv_va;
-				pte = pmap_pte(pmap, va);
-				tpte = *pte;
-				if ((tpte & PG_W) == 0)
-					tpte = pte_load_clear(pte);
-				pmap_pte_release(pte);
-				if ((tpte & PG_W) != 0)
-					continue;
-				KASSERT(tpte != 0,
-				    ("pmap_pv_reclaim: pmap %p va %x zero pte",
-				    pmap, va));
-				if ((tpte & PG_G) != 0)
-					pmap_invalidate_page(pmap, va);
-				m = PHYS_TO_VM_PAGE(tpte & PG_FRAME);
-				if ((tpte & (PG_M | PG_RW)) == (PG_M | PG_RW))
-					vm_page_dirty(m);
-				if ((tpte & PG_A) != 0)
-					vm_page_aflag_set(m, PGA_REFERENCED);
-				TAILQ_REMOVE(&m->md.pv_list, pv, pv_next);
-				if (TAILQ_EMPTY(&m->md.pv_list))
-					vm_page_aflag_clear(m, PGA_WRITEABLE);
-				pc->pc_map[field] |= 1UL << bit;
-				pmap_unuse_pt(pmap, va, &free);
-				freed++;
-			}
-		}
-		if (freed == 0) {
-			TAILQ_INSERT_TAIL(&newtail, pc, pc_lru);
-			continue;
-		}
-		/* Every freed mapping is for a 4 KB page. */
-		pmap->pm_stats.resident_count -= freed;
-		PV_STAT(pv_entry_frees += freed);
-		PV_STAT(pv_entry_spare += freed);
-		pv_entry_count -= freed;
-		TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
-		for (field = 0; field < _NPCM; field++)
-			if (pc->pc_map[field] != pc_freemask[field]) {
-				TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc,
-				    pc_list);
-				TAILQ_INSERT_TAIL(&newtail, pc, pc_lru);
-
-				/*
-				 * One freed pv entry in locked_pmap is
-				 * sufficient.
-				 */
-				if (pmap == locked_pmap)
-					goto out;
-				break;
-			}
-		if (field == _NPCM) {
-			PV_STAT(pv_entry_spare -= _NPCPV);
-			PV_STAT(pc_chunk_count--);
-			PV_STAT(pc_chunk_frees++);
-			/* Entire chunk is free; return it. */
-			m_pc = PHYS_TO_VM_PAGE(pmap_kextract((vm_offset_t)pc));
-			pmap_qremove((vm_offset_t)pc, 1);
-			pmap_ptelist_free(&pv_vafree, (vm_offset_t)pc);
-			break;
-		}
-	}
-out:
-	TAILQ_CONCAT(&pv_chunks, &newtail, pc_lru);
-	if (pmap != NULL) {
-		pmap_invalidate_all(pmap);
-		if (pmap != locked_pmap)
-			PMAP_UNLOCK(pmap);
-	}
-	if (m_pc == NULL && pv_vafree != 0 && free != NULL) {
-		m_pc = free;
-		free = (void *)m_pc->object;
-		/* Recycle a freed page table page. */
-		m_pc->wire_count = 1;
-		atomic_add_int(&vm_cnt.v_wire_count, 1);
-	}
-	pmap_free_zero_pages(free);
-	return (m_pc);
-}
-
-/*
- * free the pv_entry back to the free list
- */
-static void
-free_pv_entry(pmap_t pmap, pv_entry_t pv)
-{
-	struct pv_chunk *pc;
-	int idx, field, bit;
-
-	rw_assert(&pvh_global_lock, RA_WLOCKED);
-	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
-	PV_STAT(pv_entry_frees++);
-	PV_STAT(pv_entry_spare++);
-	pv_entry_count--;
-	pc = pv_to_chunk(pv);
-	idx = pv - &pc->pc_pventry[0];
-	field = idx / 32;
-	bit = idx % 32;
-	pc->pc_map[field] |= 1ul << bit;
-	for (idx = 0; idx < _NPCM; idx++)
-		if (pc->pc_map[idx] != pc_freemask[idx]) {
-			/*
-			 * 98% of the time, pc is already at the head of the
-			 * list.  If it isn't already, move it to the head.
-			 */
-			if (__predict_false(TAILQ_FIRST(&pmap->pm_pvchunk) !=
-			    pc)) {
-				TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
-				TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc,
-				    pc_list);
-			}
-			return;
-		}
-	TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
-	free_pv_chunk(pc);
-}
-
-static void
-free_pv_chunk(struct pv_chunk *pc)
-{
-	vm_page_t m;
-
- 	TAILQ_REMOVE(&pv_chunks, pc, pc_lru);
-	PV_STAT(pv_entry_spare -= _NPCPV);
-	PV_STAT(pc_chunk_count--);
-	PV_STAT(pc_chunk_frees++);
-	/* entire chunk is free, return it */
-	m = PHYS_TO_VM_PAGE(pmap_kextract((vm_offset_t)pc));
-	pmap_qremove((vm_offset_t)pc, 1);
-	vm_page_unwire(m, PQ_INACTIVE);
-	vm_page_free(m);
-	pmap_ptelist_free(&pv_vafree, (vm_offset_t)pc);
-}
-
-/*
- * get a new pv_entry, allocating a block from the system
- * when needed.
- */
-static pv_entry_t
-get_pv_entry(pmap_t pmap, boolean_t try)
-{
-	static const struct timeval printinterval = { 60, 0 };
-	static struct timeval lastprint;
-	int bit, field;
-	pv_entry_t pv;
-	struct pv_chunk *pc;
-	vm_page_t m;
-
-	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
-	rw_assert(&pvh_global_lock, RA_WLOCKED);
-	PV_STAT(pv_entry_allocs++);
-	pv_entry_count++;
-	if (pv_entry_count > pv_entry_high_water)
-		if (ratecheck(&lastprint, &printinterval))
-			printf("Approaching the limit on PV entries, consider "
-			    "increasing either the vm.pmap.shpgperproc or the "
-			    "vm.pmap.pv_entry_max tunable.\n");
-retry:
-	pc = TAILQ_FIRST(&pmap->pm_pvchunk);
-	if (pc != NULL) {
-		for (field = 0; field < _NPCM; field++) {
-			if (pc->pc_map[field]) {
-				bit = bsfl(pc->pc_map[field]);
-				break;
-			}
-		}
-		if (field < _NPCM) {
-			pv = &pc->pc_pventry[field * 32 + bit];
-			pc->pc_map[field] &= ~(1ul << bit);
-			/* If this was the last item, move it to tail */
-			for (field = 0; field < _NPCM; field++)
-				if (pc->pc_map[field] != 0) {
-					PV_STAT(pv_entry_spare--);
-					return (pv);	/* not full, return */
-				}
-			TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
-			TAILQ_INSERT_TAIL(&pmap->pm_pvchunk, pc, pc_list);
-			PV_STAT(pv_entry_spare--);
-			return (pv);
-		}
-	}
-	/*
-	 * Access to the ptelist "pv_vafree" is synchronized by the page
-	 * queues lock.  If "pv_vafree" is currently non-empty, it will
-	 * remain non-empty until pmap_ptelist_alloc() completes.
-	 */
-	if (pv_vafree == 0 || (m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL |
-	    VM_ALLOC_NOOBJ | VM_ALLOC_WIRED)) == NULL) {
-		if (try) {
-			pv_entry_count--;
-			PV_STAT(pc_chunk_tryfail++);
-			return (NULL);
-		}
-		m = pmap_pv_reclaim(pmap);
-		if (m == NULL)
-			goto retry;
-	}
-	PV_STAT(pc_chunk_count++);
-	PV_STAT(pc_chunk_allocs++);
-	pc = (struct pv_chunk *)pmap_ptelist_alloc(&pv_vafree);
-	pmap_qenter((vm_offset_t)pc, &m, 1);
-	if ((m->flags & PG_ZERO) == 0)
-		pagezero(pc);
-	pc->pc_pmap = pmap;
-	pc->pc_map[0] = pc_freemask[0] & ~1ul;	/* preallocated bit 0 */
-	for (field = 1; field < _NPCM; field++)
-		pc->pc_map[field] = pc_freemask[field];
-	TAILQ_INSERT_TAIL(&pv_chunks, pc, pc_lru);
-	pv = &pc->pc_pventry[0];
-	TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list);
-	PV_STAT(pv_entry_spare += _NPCPV - 1);
-	return (pv);
-}
-
-static __inline pv_entry_t
-pmap_pvh_remove(struct md_page *pvh, pmap_t pmap, vm_offset_t va)
-{
-	pv_entry_t pv;
-
-	rw_assert(&pvh_global_lock, RA_WLOCKED);
-	TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) {
-		if (pmap == PV_PMAP(pv) && va == pv->pv_va) {
-			TAILQ_REMOVE(&pvh->pv_list, pv, pv_next);
-			break;
-		}
-	}
-	return (pv);
-}
-
-static void
-pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va)
-{
-	pv_entry_t pv;
-
-	pv = pmap_pvh_remove(pvh, pmap, va);
-	KASSERT(pv != NULL, ("pmap_pvh_free: pv not found"));
-	free_pv_entry(pmap, pv);
-}
-
-static void
-pmap_remove_entry(pmap_t pmap, vm_page_t m, vm_offset_t va)
-{
-
-	rw_assert(&pvh_global_lock, RA_WLOCKED);
-	pmap_pvh_free(&m->md, pmap, va);
-	if (TAILQ_EMPTY(&m->md.pv_list))
-		vm_page_aflag_clear(m, PGA_WRITEABLE);
-}
-
-/*
- * Conditionally create a pv entry.
- */
-static boolean_t
-pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va, vm_page_t m)
-{
-	pv_entry_t pv;
-
-	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
-	rw_assert(&pvh_global_lock, RA_WLOCKED);
-	if (pv_entry_count < pv_entry_high_water && 
-	    (pv = get_pv_entry(pmap, TRUE)) != NULL) {
-		pv->pv_va = va;
-		TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next);
-		return (TRUE);
-	} else
-		return (FALSE);
-}
-
-/*
- * pmap_remove_pte: do the things to unmap a page in a process
- */
-static int
-pmap_remove_pte(pmap_t pmap, pt_entry_t *ptq, vm_offset_t va, vm_page_t *free)
-{
-	pt_entry_t oldpte;
-	vm_page_t m;
-
-	CTR3(KTR_PMAP, "pmap_remove_pte: pmap=%p *ptq=0x%x va=0x%x",
-	    pmap, (u_long)*ptq, va);
-	
-	rw_assert(&pvh_global_lock, RA_WLOCKED);
-	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
-	oldpte = *ptq;
-	PT_SET_VA_MA(ptq, 0, TRUE);
-	KASSERT(oldpte != 0,
-	    ("pmap_remove_pte: pmap %p va %x zero pte", pmap, va));
-	if (oldpte & PG_W)
-		pmap->pm_stats.wired_count -= 1;
-	/*
-	 * Machines that don't support invlpg, also don't support
-	 * PG_G.
-	 */
-	if (oldpte & PG_G)
-		pmap_invalidate_page(kernel_pmap, va);
-	pmap->pm_stats.resident_count -= 1;
-	if (oldpte & PG_MANAGED) {
-		m = PHYS_TO_VM_PAGE(xpmap_mtop(oldpte) & PG_FRAME);
-		if ((oldpte & (PG_M | PG_RW)) == (PG_M | PG_RW))
-			vm_page_dirty(m);
-		if (oldpte & PG_A)
-			vm_page_aflag_set(m, PGA_REFERENCED);
-		pmap_remove_entry(pmap, m, va);
-	}
-	return (pmap_unuse_pt(pmap, va, free));
-}
-
-/*
- * Remove a single page from a process address space
- */
-static void
-pmap_remove_page(pmap_t pmap, vm_offset_t va, vm_page_t *free)
-{
-	pt_entry_t *pte;
-
-	CTR2(KTR_PMAP, "pmap_remove_page: pmap=%p va=0x%x",
-	    pmap, va);
-	
-	rw_assert(&pvh_global_lock, RA_WLOCKED);
-	KASSERT(curthread->td_pinned > 0, ("curthread not pinned"));
-	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
-	if ((pte = pmap_pte_quick(pmap, va)) == NULL || (*pte & PG_V) == 0)
-		return;
-	pmap_remove_pte(pmap, pte, va, free);
-	pmap_invalidate_page(pmap, va);
-	if (*PMAP1)
-		PT_SET_MA(PADDR1, 0);
-
-}
-
-/*
- *	Remove the given range of addresses from the specified map.
- *
- *	It is assumed that the start and end are properly
- *	rounded to the page size.
- */
-void
-pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
-{
-	vm_offset_t pdnxt;
-	pd_entry_t ptpaddr;
-	pt_entry_t *pte;
-	vm_page_t free = NULL;
-	int anyvalid;
-
-	CTR3(KTR_PMAP, "pmap_remove: pmap=%p sva=0x%x eva=0x%x",
-	    pmap, sva, eva);
-
-	/*
-	 * Perform an unsynchronized read.  This is, however, safe.
-	 */
-	if (pmap->pm_stats.resident_count == 0)
-		return;
-
-	anyvalid = 0;
-
-	rw_wlock(&pvh_global_lock);
-	sched_pin();
-	PMAP_LOCK(pmap);
-
-	/*
-	 * special handling of removing one page.  a very
-	 * common operation and easy to short circuit some
-	 * code.
-	 */
-	if ((sva + PAGE_SIZE == eva) && 
-	    ((pmap->pm_pdir[(sva >> PDRSHIFT)] & PG_PS) == 0)) {
-		pmap_remove_page(pmap, sva, &free);
-		goto out;
-	}
-
-	for (; sva < eva; sva = pdnxt) {
-		u_int pdirindex;
-
-		/*
-		 * Calculate index for next page table.
-		 */
-		pdnxt = (sva + NBPDR) & ~PDRMASK;
-		if (pdnxt < sva)
-			pdnxt = eva;
-		if (pmap->pm_stats.resident_count == 0)
-			break;
-
-		pdirindex = sva >> PDRSHIFT;
-		ptpaddr = pmap->pm_pdir[pdirindex];
-
-		/*
-		 * Weed out invalid mappings. Note: we assume that the page
-		 * directory table is always allocated, and in kernel virtual.
-		 */
-		if (ptpaddr == 0)
-			continue;
-
-		/*
-		 * Check for large page.
-		 */
-		if ((ptpaddr & PG_PS) != 0) {
-			PD_CLEAR_VA(pmap, pdirindex, TRUE);
-			pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE;
-			anyvalid = 1;
-			continue;
-		}
-
-		/*
-		 * Limit our scan to either the end of the va represented
-		 * by the current page table page, or to the end of the
-		 * range being removed.
-		 */
-		if (pdnxt > eva)
-			pdnxt = eva;
-
-		for (pte = pmap_pte_quick(pmap, sva); sva != pdnxt; pte++,
-		    sva += PAGE_SIZE) {
-			if ((*pte & PG_V) == 0)
-				continue;
-
-			/*
-			 * The TLB entry for a PG_G mapping is invalidated
-			 * by pmap_remove_pte().
-			 */
-			if ((*pte & PG_G) == 0)
-				anyvalid = 1;
-			if (pmap_remove_pte(pmap, pte, sva, &free))
-				break;
-		}
-	}
-	PT_UPDATES_FLUSH();
-	if (*PMAP1)
-		PT_SET_VA_MA(PMAP1, 0, TRUE);
-out:
-	if (anyvalid)
-		pmap_invalidate_all(pmap);
-	sched_unpin();
-	rw_wunlock(&pvh_global_lock);
-	PMAP_UNLOCK(pmap);
-	pmap_free_zero_pages(free);
-}
-
-/*
- *	Routine:	pmap_remove_all
- *	Function:
- *		Removes this physical page from
- *		all physical maps in which it resides.
- *		Reflects back modify bits to the pager.
- *
- *	Notes:
- *		Original versions of this routine were very
- *		inefficient because they iteratively called
- *		pmap_remove (slow...)
- */
-
-void
-pmap_remove_all(vm_page_t m)
-{
-	pv_entry_t pv;
-	pmap_t pmap;
-	pt_entry_t *pte, tpte;
-	vm_page_t free;
-
-	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
-	    ("pmap_remove_all: page %p is not managed", m));
-	free = NULL;
-	rw_wlock(&pvh_global_lock);
-	sched_pin();
-	while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) {
-		pmap = PV_PMAP(pv);
-		PMAP_LOCK(pmap);
-		pmap->pm_stats.resident_count--;
-		pte = pmap_pte_quick(pmap, pv->pv_va);
-		tpte = *pte;
-		PT_SET_VA_MA(pte, 0, TRUE);
-		KASSERT(tpte != 0, ("pmap_remove_all: pmap %p va %x zero pte",
-		    pmap, pv->pv_va));
-		if (tpte & PG_W)
-			pmap->pm_stats.wired_count--;
-		if (tpte & PG_A)
-			vm_page_aflag_set(m, PGA_REFERENCED);
-
-		/*
-		 * Update the vm_page_t clean and reference bits.
-		 */
-		if ((tpte & (PG_M | PG_RW)) == (PG_M | PG_RW))
-			vm_page_dirty(m);
-		pmap_unuse_pt(pmap, pv->pv_va, &free);
-		pmap_invalidate_page(pmap, pv->pv_va);
-		TAILQ_REMOVE(&m->md.pv_list, pv, pv_next);
-		free_pv_entry(pmap, pv);
-		PMAP_UNLOCK(pmap);
-	}
-	vm_page_aflag_clear(m, PGA_WRITEABLE);
-	PT_UPDATES_FLUSH();
-	if (*PMAP1)
-		PT_SET_MA(PADDR1, 0);
-	sched_unpin();
-	rw_wunlock(&pvh_global_lock);
-	pmap_free_zero_pages(free);
-}
-
-/*
- *	Set the physical protection on the
- *	specified range of this map as requested.
- */
-void
-pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot)
-{
-	vm_offset_t pdnxt;
-	pd_entry_t ptpaddr;
-	pt_entry_t *pte;
-	int anychanged;
-
-	CTR4(KTR_PMAP, "pmap_protect: pmap=%p sva=0x%x eva=0x%x prot=0x%x",
-	    pmap, sva, eva, prot);
-	
-	if ((prot & VM_PROT_READ) == VM_PROT_NONE) {
-		pmap_remove(pmap, sva, eva);
-		return;
-	}
-
-#ifdef PAE
-	if ((prot & (VM_PROT_WRITE|VM_PROT_EXECUTE)) ==
-	    (VM_PROT_WRITE|VM_PROT_EXECUTE))
-		return;
-#else
-	if (prot & VM_PROT_WRITE)
-		return;
-#endif
-
-	anychanged = 0;
-
-	rw_wlock(&pvh_global_lock);
-	sched_pin();
-	PMAP_LOCK(pmap);
-	for (; sva < eva; sva = pdnxt) {
-		pt_entry_t obits, pbits;
-		u_int pdirindex;
-
-		pdnxt = (sva + NBPDR) & ~PDRMASK;
-		if (pdnxt < sva)
-			pdnxt = eva;
-
-		pdirindex = sva >> PDRSHIFT;
-		ptpaddr = pmap->pm_pdir[pdirindex];
-
-		/*
-		 * Weed out invalid mappings. Note: we assume that the page
-		 * directory table is always allocated, and in kernel virtual.
-		 */
-		if (ptpaddr == 0)
-			continue;
-
-		/*
-		 * Check for large page.
-		 */
-		if ((ptpaddr & PG_PS) != 0) {
-			if ((prot & VM_PROT_WRITE) == 0)
-				pmap->pm_pdir[pdirindex] &= ~(PG_M|PG_RW);
-#ifdef PAE
-			if ((prot & VM_PROT_EXECUTE) == 0)
-				pmap->pm_pdir[pdirindex] |= pg_nx;
-#endif
-			anychanged = 1;
-			continue;
-		}
-
-		if (pdnxt > eva)
-			pdnxt = eva;
-
-		for (pte = pmap_pte_quick(pmap, sva); sva != pdnxt; pte++,
-		    sva += PAGE_SIZE) {
-			vm_page_t m;
-
-retry:
-			/*
-			 * Regardless of whether a pte is 32 or 64 bits in
-			 * size, PG_RW, PG_A, and PG_M are among the least
-			 * significant 32 bits.
-			 */
-			obits = pbits = *pte;
-			if ((pbits & PG_V) == 0)
-				continue;
-
-			if ((prot & VM_PROT_WRITE) == 0) {
-				if ((pbits & (PG_MANAGED | PG_M | PG_RW)) ==
-				    (PG_MANAGED | PG_M | PG_RW)) {
-					m = PHYS_TO_VM_PAGE(xpmap_mtop(pbits) &
-					    PG_FRAME);
-					vm_page_dirty(m);
-				}
-				pbits &= ~(PG_RW | PG_M);
-			}
-#ifdef PAE
-			if ((prot & VM_PROT_EXECUTE) == 0)
-				pbits |= pg_nx;
-#endif
-
-			if (pbits != obits) {
-				obits = *pte;
-				PT_SET_VA_MA(pte, pbits, TRUE);
-				if (*pte != pbits)
-					goto retry;
-				if (obits & PG_G)
-					pmap_invalidate_page(pmap, sva);
-				else
-					anychanged = 1;
-			}
-		}
-	}
-	PT_UPDATES_FLUSH();
-	if (*PMAP1)
-		PT_SET_VA_MA(PMAP1, 0, TRUE);
-	if (anychanged)
-		pmap_invalidate_all(pmap);
-	sched_unpin();
-	rw_wunlock(&pvh_global_lock);
-	PMAP_UNLOCK(pmap);
-}
-
-/*
- *	Insert the given physical page (p) at
- *	the specified virtual address (v) in the
- *	target physical map with the protection requested.
- *
- *	If specified, the page will be wired down, meaning
- *	that the related pte can not be reclaimed.
- *
- *	NB:  This is the only routine which MAY NOT lazy-evaluate
- *	or lose information.  That is, this routine must actually
- *	insert this page into the given map NOW.
- */
-int
-pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
-    u_int flags, int8_t psind __unused)
-{
-	pd_entry_t *pde;
-	pt_entry_t *pte;
-	pt_entry_t newpte, origpte;
-	pv_entry_t pv;
-	vm_paddr_t opa, pa;
-	vm_page_t mpte, om;
-	boolean_t invlva, wired;
-
-	CTR5(KTR_PMAP,
-	    "pmap_enter: pmap=%08p va=0x%08x ma=0x%08x prot=0x%x flags=0x%x",
-	    pmap, va, VM_PAGE_TO_MACH(m), prot, flags);
-	va = trunc_page(va);
-	KASSERT(va <= VM_MAX_KERNEL_ADDRESS, ("pmap_enter: toobig"));
-	KASSERT(va < UPT_MIN_ADDRESS || va >= UPT_MAX_ADDRESS,
-	    ("pmap_enter: invalid to pmap_enter page table pages (va: 0x%x)",
-	    va));
-	if ((m->oflags & VPO_UNMANAGED) == 0 && !vm_page_xbusied(m))
-		VM_OBJECT_ASSERT_LOCKED(m->object);
-
-	mpte = NULL;
-	wired = (flags & PMAP_ENTER_WIRED) != 0;
-
-	rw_wlock(&pvh_global_lock);
-	PMAP_LOCK(pmap);
-	sched_pin();
-
-	/*
-	 * In the case that a page table page is not
-	 * resident, we are creating it here.
-	 */
-	if (va < VM_MAXUSER_ADDRESS) {
-		mpte = pmap_allocpte(pmap, va, flags);
-		if (mpte == NULL) {
-			KASSERT((flags & PMAP_ENTER_NOSLEEP) != 0,
-			    ("pmap_allocpte failed with sleep allowed"));
-			sched_unpin();
-			rw_wunlock(&pvh_global_lock);
-			PMAP_UNLOCK(pmap);
-			return (KERN_RESOURCE_SHORTAGE);
-		}
-	}
-
-	pde = pmap_pde(pmap, va);
-	if ((*pde & PG_PS) != 0)
-		panic("pmap_enter: attempted pmap_enter on 4MB page");
-	pte = pmap_pte_quick(pmap, va);
-
-	/*
-	 * Page Directory table entry not valid, we need a new PT page
-	 */
-	if (pte == NULL) {
-		panic("pmap_enter: invalid page directory pdir=%#jx, va=%#x",
-			(uintmax_t)pmap->pm_pdir[va >> PDRSHIFT], va);
-	}
-
-	pa = VM_PAGE_TO_PHYS(m);
-	om = NULL;
-	opa = origpte = 0;
-
-#if 0
-	KASSERT((*pte & PG_V) || (*pte == 0), ("address set but not valid pte=%p *pte=0x%016jx",
-		pte, *pte));
-#endif
-	origpte = *pte;
-	if (origpte)
-		origpte = xpmap_mtop(origpte);
-	opa = origpte & PG_FRAME;
-
-	/*
-	 * Mapping has not changed, must be protection or wiring change.
-	 */
-	if (origpte && (opa == pa)) {
-		/*
-		 * Wiring change, just update stats. We don't worry about
-		 * wiring PT pages as they remain resident as long as there
-		 * are valid mappings in them. Hence, if a user page is wired,
-		 * the PT page will be also.
-		 */
-		if (wired && ((origpte & PG_W) == 0))
-			pmap->pm_stats.wired_count++;
-		else if (!wired && (origpte & PG_W))
-			pmap->pm_stats.wired_count--;
-
-		/*
-		 * Remove extra pte reference
-		 */
-		if (mpte)
-			mpte->wire_count--;
-
-		if (origpte & PG_MANAGED) {
-			om = m;
-			pa |= PG_MANAGED;
-		}
-		goto validate;
-	} 
-
-	pv = NULL;
-
-	/*
-	 * Mapping has changed, invalidate old range and fall through to
-	 * handle validating new mapping.
-	 */
-	if (opa) {
-		if (origpte & PG_W)
-			pmap->pm_stats.wired_count--;
-		if (origpte & PG_MANAGED) {
-			om = PHYS_TO_VM_PAGE(opa);
-			pv = pmap_pvh_remove(&om->md, pmap, va);
-		} else if (va < VM_MAXUSER_ADDRESS) 
-			printf("va=0x%x is unmanaged :-( \n", va);
-			
-		if (mpte != NULL) {
-			mpte->wire_count--;
-			KASSERT(mpte->wire_count > 0,
-			    ("pmap_enter: missing reference to page table page,"
-			     " va: 0x%x", va));
-		}
-	} else
-		pmap->pm_stats.resident_count++;
-
-	/*
-	 * Enter on the PV list if part of our managed memory.
-	 */
-	if ((m->oflags & VPO_UNMANAGED) == 0) {
-		KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva,
-		    ("pmap_enter: managed mapping within the clean submap"));
-		if (pv == NULL)
-			pv = get_pv_entry(pmap, FALSE);
-		pv->pv_va = va;
-		TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next);
-		pa |= PG_MANAGED;
-	} else if (pv != NULL)
-		free_pv_entry(pmap, pv);
-
-	/*
-	 * Increment counters
-	 */
-	if (wired)
-		pmap->pm_stats.wired_count++;
-
-validate:
-	/*
-	 * Now validate mapping with desired protection/wiring.
-	 */
-	newpte = (pt_entry_t)(pa | PG_V);
-	if ((prot & VM_PROT_WRITE) != 0) {
-		newpte |= PG_RW;
-		if ((newpte & PG_MANAGED) != 0)
-			vm_page_aflag_set(m, PGA_WRITEABLE);
-	}
-#ifdef PAE
-	if ((prot & VM_PROT_EXECUTE) == 0)
-		newpte |= pg_nx;
-#endif
-	if (wired)
-		newpte |= PG_W;
-	if (va < VM_MAXUSER_ADDRESS)
-		newpte |= PG_U;
-	if (pmap == kernel_pmap)
-		newpte |= pgeflag;
-
-	critical_enter();
-	/*
-	 * if the mapping or permission bits are different, we need
-	 * to update the pte.
-	 */
-	if ((origpte & ~(PG_M|PG_A)) != newpte) {
-		if (origpte) {
-			invlva = FALSE;
-			origpte = *pte;
-			PT_SET_VA(pte, newpte | PG_A, FALSE);
-			if (origpte & PG_A) {
-				if (origpte & PG_MANAGED)
-					vm_page_aflag_set(om, PGA_REFERENCED);
-				if (opa != VM_PAGE_TO_PHYS(m))
-					invlva = TRUE;
-#ifdef PAE
-				if ((origpte & PG_NX) == 0 &&
-				    (newpte & PG_NX) != 0)
-					invlva = TRUE;
-#endif
-			}
-			if ((origpte & (PG_M | PG_RW)) == (PG_M | PG_RW)) {
-				if ((origpte & PG_MANAGED) != 0)
-					vm_page_dirty(om);
-				if ((prot & VM_PROT_WRITE) == 0)
-					invlva = TRUE;
-			}
-			if ((origpte & PG_MANAGED) != 0 &&
-			    TAILQ_EMPTY(&om->md.pv_list))
-				vm_page_aflag_clear(om, PGA_WRITEABLE);
-			if (invlva)
-				pmap_invalidate_page(pmap, va);
-		} else{
-			PT_SET_VA(pte, newpte | PG_A, FALSE);
-		}
-		
-	}
-	PT_UPDATES_FLUSH();
-	critical_exit();
-	if (*PMAP1)
-		PT_SET_VA_MA(PMAP1, 0, TRUE);
-	sched_unpin();
-	rw_wunlock(&pvh_global_lock);
-	PMAP_UNLOCK(pmap);
-	return (KERN_SUCCESS);
-}
-
-/*
- * Maps a sequence of resident pages belonging to the same object.
- * The sequence begins with the given page m_start.  This page is
- * mapped at the given virtual address start.  Each subsequent page is
- * mapped at a virtual address that is offset from start by the same
- * amount as the page is offset from m_start within the object.  The
- * last page in the sequence is the page with the largest offset from
- * m_start that can be mapped at a virtual address less than the given
- * virtual address end.  Not every virtual page between start and end
- * is mapped; only those for which a resident page exists with the
- * corresponding offset from m_start are mapped.
- */
-void
-pmap_enter_object(pmap_t pmap, vm_offset_t start, vm_offset_t end,
-    vm_page_t m_start, vm_prot_t prot)
-{
-	vm_page_t m, mpte;
-	vm_pindex_t diff, psize;
-	multicall_entry_t mcl[16];
-	multicall_entry_t *mclp = mcl;
-	int error, count = 0;
-
-	VM_OBJECT_ASSERT_LOCKED(m_start->object);
-
-	psize = atop(end - start);
-	mpte = NULL;
-	m = m_start;
-	rw_wlock(&pvh_global_lock);
-	PMAP_LOCK(pmap);
-	while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) {
-		mpte = pmap_enter_quick_locked(&mclp, &count, pmap, start + ptoa(diff), m,
-		    prot, mpte);
-		m = TAILQ_NEXT(m, listq);
-		if (count == 16) {
-			error = HYPERVISOR_multicall(mcl, count);
-			KASSERT(error == 0, ("bad multicall %d", error));
-			mclp = mcl;
-			count = 0;
-		}
-	}
-	if (count) {
-		error = HYPERVISOR_multicall(mcl, count);
-		KASSERT(error == 0, ("bad multicall %d", error));
-	}
-	rw_wunlock(&pvh_global_lock);
-	PMAP_UNLOCK(pmap);
-}
-
-/*
- * this code makes some *MAJOR* assumptions:
- * 1. Current pmap & pmap exists.
- * 2. Not wired.
- * 3. Read access.
- * 4. No page table pages.
- * but is *MUCH* faster than pmap_enter...
- */
-
-void
-pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot)
-{
-	multicall_entry_t mcl, *mclp;
-	int count = 0;
-	mclp = &mcl;
-
-	CTR4(KTR_PMAP, "pmap_enter_quick: pmap=%p va=0x%x m=%p prot=0x%x",
-	    pmap, va, m, prot);
-	
-	rw_wlock(&pvh_global_lock);
-	PMAP_LOCK(pmap);
-	(void)pmap_enter_quick_locked(&mclp, &count, pmap, va, m, prot, NULL);
-	if (count)
-		HYPERVISOR_multicall(&mcl, count);
-	rw_wunlock(&pvh_global_lock);
-	PMAP_UNLOCK(pmap);
-}
-
-#ifdef notyet
-void
-pmap_enter_quick_range(pmap_t pmap, vm_offset_t *addrs, vm_page_t *pages, vm_prot_t *prots, int count)
-{
-	int i, error, index = 0;
-	multicall_entry_t mcl[16];
-	multicall_entry_t *mclp = mcl;
-		
-	PMAP_LOCK(pmap);
-	for (i = 0; i < count; i++, addrs++, pages++, prots++) {
-		if (!pmap_is_prefaultable_locked(pmap, *addrs))
-			continue;
-
-		(void) pmap_enter_quick_locked(&mclp, &index, pmap, *addrs, *pages, *prots, NULL);
-		if (index == 16) {
-			error = HYPERVISOR_multicall(mcl, index);
-			mclp = mcl;
-			index = 0;
-			KASSERT(error == 0, ("bad multicall %d", error));
-		}
-	}
-	if (index) {
-		error = HYPERVISOR_multicall(mcl, index);
-		KASSERT(error == 0, ("bad multicall %d", error));
-	}
-	
-	PMAP_UNLOCK(pmap);
-}
-#endif
-
-static vm_page_t
-pmap_enter_quick_locked(multicall_entry_t **mclpp, int *count, pmap_t pmap, vm_offset_t va, vm_page_t m,
-    vm_prot_t prot, vm_page_t mpte)
-{
-	pt_entry_t *pte;
-	vm_paddr_t pa;
-	vm_page_t free;
-	multicall_entry_t *mcl = *mclpp;
-
-	KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva ||
-	    (m->oflags & VPO_UNMANAGED) != 0,
-	    ("pmap_enter_quick_locked: managed mapping within the clean submap"));
-	rw_assert(&pvh_global_lock, RA_WLOCKED);
-	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
-
-	/*
-	 * In the case that a page table page is not
-	 * resident, we are creating it here.
-	 */
-	if (va < VM_MAXUSER_ADDRESS) {
-		u_int ptepindex;
-		pd_entry_t ptema;
-
-		/*
-		 * Calculate pagetable page index
-		 */
-		ptepindex = va >> PDRSHIFT;
-		if (mpte && (mpte->pindex == ptepindex)) {
-			mpte->wire_count++;
-		} else {
-			/*
-			 * Get the page directory entry
-			 */
-			ptema = pmap->pm_pdir[ptepindex];
-
-			/*
-			 * If the page table page is mapped, we just increment
-			 * the hold count, and activate it.
-			 */
-			if (ptema & PG_V) {
-				if (ptema & PG_PS)
-					panic("pmap_enter_quick: unexpected mapping into 4MB page");
-				mpte = PHYS_TO_VM_PAGE(xpmap_mtop(ptema) & PG_FRAME);
-				mpte->wire_count++;
-			} else {
-				mpte = _pmap_allocpte(pmap, ptepindex,
-				    PMAP_ENTER_NOSLEEP);
-				if (mpte == NULL)
-					return (mpte);
-			}
-		}
-	} else {
-		mpte = NULL;
-	}
-
-	/*
-	 * This call to vtopte makes the assumption that we are
-	 * entering the page into the current pmap.  In order to support
-	 * quick entry into any pmap, one would likely use pmap_pte_quick.
-	 * But that isn't as quick as vtopte.
-	 */
-	KASSERT(pmap_is_current(pmap), ("entering pages in non-current pmap"));
-	pte = vtopte(va);
-	if (*pte & PG_V) {
-		if (mpte != NULL) {
-			mpte->wire_count--;
-			mpte = NULL;
-		}
-		return (mpte);
-	}
-
-	/*
-	 * Enter on the PV list if part of our managed memory.
-	 */
-	if ((m->oflags & VPO_UNMANAGED) == 0 &&
-	    !pmap_try_insert_pv_entry(pmap, va, m)) {
-		if (mpte != NULL) {
-			free = NULL;
-			if (pmap_unwire_ptp(pmap, mpte, &free)) {
-				pmap_invalidate_page(pmap, va);
-				pmap_free_zero_pages(free);
-			}
-			
-			mpte = NULL;
-		}
-		return (mpte);
-	}
-
-	/*
-	 * Increment counters
-	 */
-	pmap->pm_stats.resident_count++;
-
-	pa = VM_PAGE_TO_PHYS(m);
-#ifdef PAE
-	if ((prot & VM_PROT_EXECUTE) == 0)
-		pa |= pg_nx;
-#endif
-
-#if 0
-	/*
-	 * Now validate mapping with RO protection
-	 */
-	if ((m->oflags & VPO_UNMANAGED) != 0)
-		pte_store(pte, pa | PG_V | PG_U);
-	else
-		pte_store(pte, pa | PG_V | PG_U | PG_MANAGED);
-#else
-	/*
-	 * Now validate mapping with RO protection
-	 */
-	if ((m->oflags & VPO_UNMANAGED) != 0)
-		pa = 	xpmap_ptom(pa | PG_V | PG_U);
-	else
-		pa = xpmap_ptom(pa | PG_V | PG_U | PG_MANAGED);
-
-	mcl->op = __HYPERVISOR_update_va_mapping;
-	mcl->args[0] = va;
-	mcl->args[1] = (uint32_t)(pa & 0xffffffff);
-	mcl->args[2] = (uint32_t)(pa >> 32);
-	mcl->args[3] = 0;
-	*mclpp = mcl + 1;
-	*count = *count + 1;
-#endif	
-	return (mpte);
-}
-
-/*
- * Make a temporary mapping for a physical address.  This is only intended
- * to be used for panic dumps.
- */
-void *
-pmap_kenter_temporary(vm_paddr_t pa, int i)
-{
-	vm_offset_t va;
-	vm_paddr_t ma = xpmap_ptom(pa);
-
-	va = (vm_offset_t)crashdumpmap + (i * PAGE_SIZE);
-	PT_SET_MA(va, (ma & ~PAGE_MASK) | PG_V | pgeflag);
-	invlpg(va);
-	return ((void *)crashdumpmap);
-}
-
-/*
- * This code maps large physical mmap regions into the
- * processor address space.  Note that some shortcuts
- * are taken, but the code works.
- */
-void
-pmap_object_init_pt(pmap_t pmap, vm_offset_t addr, vm_object_t object,
-    vm_pindex_t pindex, vm_size_t size)
-{
-	pd_entry_t *pde;
-	vm_paddr_t pa, ptepa;
-	vm_page_t p;
-	int pat_mode;
-
-	VM_OBJECT_ASSERT_WLOCKED(object);
-	KASSERT(object->type == OBJT_DEVICE || object->type == OBJT_SG,
-	    ("pmap_object_init_pt: non-device object"));
-	if (pseflag && 
-	    (addr & (NBPDR - 1)) == 0 && (size & (NBPDR - 1)) == 0) {
-		if (!vm_object_populate(object, pindex, pindex + atop(size)))
-			return;
-		p = vm_page_lookup(object, pindex);
-		KASSERT(p->valid == VM_PAGE_BITS_ALL,
-		    ("pmap_object_init_pt: invalid page %p", p));
-		pat_mode = p->md.pat_mode;
-
-		/*
-		 * Abort the mapping if the first page is not physically
-		 * aligned to a 2/4MB page boundary.
-		 */
-		ptepa = VM_PAGE_TO_PHYS(p);
-		if (ptepa & (NBPDR - 1))
-			return;
-
-		/*
-		 * Skip the first page.  Abort the mapping if the rest of
-		 * the pages are not physically contiguous or have differing
-		 * memory attributes.
-		 */
-		p = TAILQ_NEXT(p, listq);
-		for (pa = ptepa + PAGE_SIZE; pa < ptepa + size;
-		    pa += PAGE_SIZE) {
-			KASSERT(p->valid == VM_PAGE_BITS_ALL,
-			    ("pmap_object_init_pt: invalid page %p", p));
-			if (pa != VM_PAGE_TO_PHYS(p) ||
-			    pat_mode != p->md.pat_mode)
-				return;
-			p = TAILQ_NEXT(p, listq);
-		}
-
-		/*
-		 * Map using 2/4MB pages.  Since "ptepa" is 2/4M aligned and
-		 * "size" is a multiple of 2/4M, adding the PAT setting to
-		 * "pa" will not affect the termination of this loop.
-		 */
-		PMAP_LOCK(pmap);
-		for (pa = ptepa | pmap_cache_bits(pat_mode, 1); pa < ptepa +
-		    size; pa += NBPDR) {
-			pde = pmap_pde(pmap, addr);
-			if (*pde == 0) {
-				pde_store(pde, pa | PG_PS | PG_M | PG_A |
-				    PG_U | PG_RW | PG_V);
-				pmap->pm_stats.resident_count += NBPDR /
-				    PAGE_SIZE;
-				pmap_pde_mappings++;
-			}
-			/* Else continue on if the PDE is already valid. */
-			addr += NBPDR;
-		}
-		PMAP_UNLOCK(pmap);
-	}
-}
-
-/*
- *	Clear the wired attribute from the mappings for the specified range of
- *	addresses in the given pmap.  Every valid mapping within that range
- *	must have the wired attribute set.  In contrast, invalid mappings
- *	cannot have the wired attribute set, so they are ignored.
- *
- *	The wired attribute of the page table entry is not a hardware feature,
- *	so there is no need to invalidate any TLB entries.
- */
-void
-pmap_unwire(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
-{
-	vm_offset_t pdnxt;
-	pd_entry_t *pde;
-	pt_entry_t *pte;
-
-	CTR3(KTR_PMAP, "pmap_unwire: pmap=%p sva=0x%x eva=0x%x", pmap, sva,
-	    eva);
-	rw_wlock(&pvh_global_lock);
-	sched_pin();
-	PMAP_LOCK(pmap);
-	for (; sva < eva; sva = pdnxt) {
-		pdnxt = (sva + NBPDR) & ~PDRMASK;
-		if (pdnxt < sva)
-			pdnxt = eva;
-		pde = pmap_pde(pmap, sva);
-		if ((*pde & PG_V) == 0)
-			continue;
-		if ((*pde & PG_PS) != 0)
-			panic("pmap_unwire: unexpected PG_PS in pde %#jx",
-			    (uintmax_t)*pde);
-		if (pdnxt > eva)
-			pdnxt = eva;
-		for (pte = pmap_pte_quick(pmap, sva); sva != pdnxt; pte++,
-		    sva += PAGE_SIZE) {
-			if ((*pte & PG_V) == 0)
-				continue;
-			if ((*pte & PG_W) == 0)
-				panic("pmap_unwire: pte %#jx is missing PG_W",
-				    (uintmax_t)*pte);
-			PT_SET_VA_MA(pte, *pte & ~PG_W, FALSE);
-			pmap->pm_stats.wired_count--;
-		}
-	}
-	if (*PMAP1)
-		PT_CLEAR_VA(PMAP1, FALSE);
-	PT_UPDATES_FLUSH();
-	sched_unpin();
-	rw_wunlock(&pvh_global_lock);
-	PMAP_UNLOCK(pmap);
-}
-
-
-/*
- *	Copy the range specified by src_addr/len
- *	from the source map to the range dst_addr/len
- *	in the destination map.
- *
- *	This routine is only advisory and need not do anything.
- */
-
-void
-pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len,
-    vm_offset_t src_addr)
-{
-	vm_page_t   free;
-	vm_offset_t addr;
-	vm_offset_t end_addr = src_addr + len;
-	vm_offset_t pdnxt;
-
-	if (dst_addr != src_addr)
-		return;
-
-	if (!pmap_is_current(src_pmap)) {
-		CTR2(KTR_PMAP,
-		    "pmap_copy, skipping: pdir[PTDPTDI]=0x%jx PTDpde[0]=0x%jx",
-		    (src_pmap->pm_pdir[PTDPTDI] & PG_FRAME), (PTDpde[0] & PG_FRAME));
-		
-		return;
-	}
-	CTR5(KTR_PMAP, "pmap_copy:  dst_pmap=%p src_pmap=%p dst_addr=0x%x len=%d src_addr=0x%x",
-	    dst_pmap, src_pmap, dst_addr, len, src_addr);
-	
-#ifdef HAMFISTED_LOCKING
-	mtx_lock(&createdelete_lock);
-#endif
-
-	rw_wlock(&pvh_global_lock);
-	if (dst_pmap < src_pmap) {
-		PMAP_LOCK(dst_pmap);
-		PMAP_LOCK(src_pmap);
-	} else {
-		PMAP_LOCK(src_pmap);
-		PMAP_LOCK(dst_pmap);
-	}
-	sched_pin();
-	for (addr = src_addr; addr < end_addr; addr = pdnxt) {
-		pt_entry_t *src_pte, *dst_pte;
-		vm_page_t dstmpte, srcmpte;
-		pd_entry_t srcptepaddr;
-		u_int ptepindex;
-
-		KASSERT(addr < UPT_MIN_ADDRESS,
-		    ("pmap_copy: invalid to pmap_copy page tables"));
-
-		pdnxt = (addr + NBPDR) & ~PDRMASK;
-		if (pdnxt < addr)
-			pdnxt = end_addr;
-		ptepindex = addr >> PDRSHIFT;
-
-		srcptepaddr = PT_GET(&src_pmap->pm_pdir[ptepindex]);
-		if (srcptepaddr == 0)
-			continue;
-			
-		if (srcptepaddr & PG_PS) {
-			if (dst_pmap->pm_pdir[ptepindex] == 0) {
-				PD_SET_VA(dst_pmap, ptepindex, srcptepaddr & ~PG_W, TRUE);
-				dst_pmap->pm_stats.resident_count +=
-				    NBPDR / PAGE_SIZE;
-			}
-			continue;
-		}
-
-		srcmpte = PHYS_TO_VM_PAGE(srcptepaddr & PG_FRAME);
-		KASSERT(srcmpte->wire_count > 0,
-		    ("pmap_copy: source page table page is unused"));
-
-		if (pdnxt > end_addr)
-			pdnxt = end_addr;
-
-		src_pte = vtopte(addr);
-		while (addr < pdnxt) {
-			pt_entry_t ptetemp;
-			ptetemp = *src_pte;
-			/*
-			 * we only virtual copy managed pages
-			 */
-			if ((ptetemp & PG_MANAGED) != 0) {
-				dstmpte = pmap_allocpte(dst_pmap, addr,
-				    PMAP_ENTER_NOSLEEP);
-				if (dstmpte == NULL)
-					goto out;
-				dst_pte = pmap_pte_quick(dst_pmap, addr);
-				if (*dst_pte == 0 &&
-				    pmap_try_insert_pv_entry(dst_pmap, addr,
-				    PHYS_TO_VM_PAGE(xpmap_mtop(ptetemp) & PG_FRAME))) {
-					/*
-					 * Clear the wired, modified, and
-					 * accessed (referenced) bits
-					 * during the copy.
-					 */
-					KASSERT(ptetemp != 0, ("src_pte not set"));
-					PT_SET_VA_MA(dst_pte, ptetemp & ~(PG_W | PG_M | PG_A), TRUE /* XXX debug */);
-					KASSERT(*dst_pte == (ptetemp & ~(PG_W | PG_M | PG_A)),
-					    ("no pmap copy expected: 0x%jx saw: 0x%jx",
-						ptetemp &  ~(PG_W | PG_M | PG_A), *dst_pte));
-					dst_pmap->pm_stats.resident_count++;
-	 			} else {
-					free = NULL;
-					if (pmap_unwire_ptp(dst_pmap, dstmpte,
-					    &free)) {
-						pmap_invalidate_page(dst_pmap,
-						    addr);
-						pmap_free_zero_pages(free);
-					}
-					goto out;
-				}
-				if (dstmpte->wire_count >= srcmpte->wire_count)
-					break;
-			}
-			addr += PAGE_SIZE;
-			src_pte++;
-		}
-	}
-out:
-	PT_UPDATES_FLUSH();
-	sched_unpin();
-	rw_wunlock(&pvh_global_lock);
-	PMAP_UNLOCK(src_pmap);
-	PMAP_UNLOCK(dst_pmap);
-
-#ifdef HAMFISTED_LOCKING
-	mtx_unlock(&createdelete_lock);
-#endif
-}	
-
-static __inline void
-pagezero(void *page)
-{
-#if defined(I686_CPU)
-	if (cpu_class == CPUCLASS_686) {
-#if defined(CPU_ENABLE_SSE)
-		if (cpu_feature & CPUID_SSE2)
-			sse2_pagezero(page);
-		else
-#endif
-			i686_pagezero(page);
-	} else
-#endif
-		bzero(page, PAGE_SIZE);
-}
-
-/*
- *	pmap_zero_page zeros the specified hardware page by mapping 
- *	the page into KVM and using bzero to clear its contents.
- */
-void
-pmap_zero_page(vm_page_t m)
-{
-	struct sysmaps *sysmaps;
-
-	sysmaps = &sysmaps_pcpu[PCPU_GET(cpuid)];
-	mtx_lock(&sysmaps->lock);
-	if (*sysmaps->CMAP2)
-		panic("pmap_zero_page: CMAP2 busy");
-	sched_pin();
-	PT_SET_MA(sysmaps->CADDR2, PG_V | PG_RW | VM_PAGE_TO_MACH(m) | PG_A | PG_M);
-	pagezero(sysmaps->CADDR2);
-	PT_SET_MA(sysmaps->CADDR2, 0);
-	sched_unpin();
-	mtx_unlock(&sysmaps->lock);
-}
-
-/*
- *	pmap_zero_page_area zeros the specified hardware page by mapping 
- *	the page into KVM and using bzero to clear its contents.
- *
- *	off and size may not cover an area beyond a single hardware page.
- */
-void
-pmap_zero_page_area(vm_page_t m, int off, int size)
-{
-	struct sysmaps *sysmaps;
-
-	sysmaps = &sysmaps_pcpu[PCPU_GET(cpuid)];
-	mtx_lock(&sysmaps->lock);
-	if (*sysmaps->CMAP2)
-		panic("pmap_zero_page_area: CMAP2 busy");
-	sched_pin();
-	PT_SET_MA(sysmaps->CADDR2, PG_V | PG_RW | VM_PAGE_TO_MACH(m) | PG_A | PG_M);
-
-	if (off == 0 && size == PAGE_SIZE) 
-		pagezero(sysmaps->CADDR2);
-	else
-		bzero((char *)sysmaps->CADDR2 + off, size);
-	PT_SET_MA(sysmaps->CADDR2, 0);
-	sched_unpin();
-	mtx_unlock(&sysmaps->lock);
-}
-
-/*
- *	pmap_zero_page_idle zeros the specified hardware page by mapping 
- *	the page into KVM and using bzero to clear its contents.  This
- *	is intended to be called from the vm_pagezero process only and
- *	outside of Giant.
- */
-void
-pmap_zero_page_idle(vm_page_t m)
-{
-
-	if (*CMAP3)
-		panic("pmap_zero_page_idle: CMAP3 busy");
-	sched_pin();
-	PT_SET_MA(CADDR3, PG_V | PG_RW | VM_PAGE_TO_MACH(m) | PG_A | PG_M);
-	pagezero(CADDR3);
-	PT_SET_MA(CADDR3, 0);
-	sched_unpin();
-}
-
-/*
- *	pmap_copy_page copies the specified (machine independent)
- *	page by mapping the page into virtual memory and using
- *	bcopy to copy the page, one machine dependent page at a
- *	time.
- */
-void
-pmap_copy_page(vm_page_t src, vm_page_t dst)
-{
-	struct sysmaps *sysmaps;
-
-	sysmaps = &sysmaps_pcpu[PCPU_GET(cpuid)];
-	mtx_lock(&sysmaps->lock);
-	if (*sysmaps->CMAP1)
-		panic("pmap_copy_page: CMAP1 busy");
-	if (*sysmaps->CMAP2)
-		panic("pmap_copy_page: CMAP2 busy");
-	sched_pin();
-	PT_SET_MA(sysmaps->CADDR1, PG_V | VM_PAGE_TO_MACH(src) | PG_A);
-	PT_SET_MA(sysmaps->CADDR2, PG_V | PG_RW | VM_PAGE_TO_MACH(dst) | PG_A | PG_M);
-	bcopy(sysmaps->CADDR1, sysmaps->CADDR2, PAGE_SIZE);
-	PT_SET_MA(sysmaps->CADDR1, 0);
-	PT_SET_MA(sysmaps->CADDR2, 0);
-	sched_unpin();
-	mtx_unlock(&sysmaps->lock);
-}
-
-int unmapped_buf_allowed = 1;
-
-void
-pmap_copy_pages(vm_page_t ma[], vm_offset_t a_offset, vm_page_t mb[],
-    vm_offset_t b_offset, int xfersize)
-{
-	struct sysmaps *sysmaps;
-	vm_page_t a_pg, b_pg;
-	char *a_cp, *b_cp;
-	vm_offset_t a_pg_offset, b_pg_offset;
-	int cnt;
-
-	sysmaps = &sysmaps_pcpu[PCPU_GET(cpuid)];
-	mtx_lock(&sysmaps->lock);
-	if (*sysmaps->CMAP1 != 0)
-		panic("pmap_copy_pages: CMAP1 busy");
-	if (*sysmaps->CMAP2 != 0)
-		panic("pmap_copy_pages: CMAP2 busy");
-	sched_pin();
-	while (xfersize > 0) {
-		a_pg = ma[a_offset >> PAGE_SHIFT];
-		a_pg_offset = a_offset & PAGE_MASK;
-		cnt = min(xfersize, PAGE_SIZE - a_pg_offset);
-		b_pg = mb[b_offset >> PAGE_SHIFT];
-		b_pg_offset = b_offset & PAGE_MASK;
-		cnt = min(cnt, PAGE_SIZE - b_pg_offset);
-		PT_SET_MA(sysmaps->CADDR1, PG_V | VM_PAGE_TO_MACH(a_pg) | PG_A);
-		PT_SET_MA(sysmaps->CADDR2, PG_V | PG_RW |
-		    VM_PAGE_TO_MACH(b_pg) | PG_A | PG_M);
-		a_cp = sysmaps->CADDR1 + a_pg_offset;
-		b_cp = sysmaps->CADDR2 + b_pg_offset;
-		bcopy(a_cp, b_cp, cnt);
-		a_offset += cnt;
-		b_offset += cnt;
-		xfersize -= cnt;
-	}
-	PT_SET_MA(sysmaps->CADDR1, 0);
-	PT_SET_MA(sysmaps->CADDR2, 0);
-	sched_unpin();
-	mtx_unlock(&sysmaps->lock);
-}
-
-/*
- * Returns true if the pmap's pv is one of the first
- * 16 pvs linked to from this page.  This count may
- * be changed upwards or downwards in the future; it
- * is only necessary that true be returned for a small
- * subset of pmaps for proper page aging.
- */
-boolean_t
-pmap_page_exists_quick(pmap_t pmap, vm_page_t m)
-{
-	pv_entry_t pv;
-	int loops = 0;
-	boolean_t rv;
-
-	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
-	    ("pmap_page_exists_quick: page %p is not managed", m));
-	rv = FALSE;
-	rw_wlock(&pvh_global_lock);
-	TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) {
-		if (PV_PMAP(pv) == pmap) {
-			rv = TRUE;
-			break;
-		}
-		loops++;
-		if (loops >= 16)
-			break;
-	}
-	rw_wunlock(&pvh_global_lock);
-	return (rv);
-}
-
-/*
- *	pmap_page_wired_mappings:
- *
- *	Return the number of managed mappings to the given physical page
- *	that are wired.
- */
-int
-pmap_page_wired_mappings(vm_page_t m)
-{
-	pv_entry_t pv;
-	pt_entry_t *pte;
-	pmap_t pmap;
-	int count;
-
-	count = 0;
-	if ((m->oflags & VPO_UNMANAGED) != 0)
-		return (count);
-	rw_wlock(&pvh_global_lock);
-	sched_pin();
-	TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) {
-		pmap = PV_PMAP(pv);
-		PMAP_LOCK(pmap);
-		pte = pmap_pte_quick(pmap, pv->pv_va);
-		if ((*pte & PG_W) != 0)
-			count++;
-		PMAP_UNLOCK(pmap);
-	}
-	sched_unpin();
-	rw_wunlock(&pvh_global_lock);
-	return (count);
-}
-
-/*
- * Returns TRUE if the given page is mapped.  Otherwise, returns FALSE.
- */
-boolean_t
-pmap_page_is_mapped(vm_page_t m)
-{
-
-	if ((m->oflags & VPO_UNMANAGED) != 0)
-		return (FALSE);
-	return (!TAILQ_EMPTY(&m->md.pv_list));
-}
-
-/*
- * Remove all pages from specified address space
- * this aids process exit speeds.  Also, this code
- * is special cased for current process only, but
- * can have the more generic (and slightly slower)
- * mode enabled.  This is much faster than pmap_remove
- * in the case of running down an entire address space.
- */
-void
-pmap_remove_pages(pmap_t pmap)
-{
-	pt_entry_t *pte, tpte;
-	vm_page_t m, free = NULL;
-	pv_entry_t pv;
-	struct pv_chunk *pc, *npc;
-	int field, idx;
-	int32_t bit;
-	uint32_t inuse, bitmask;
-	int allfree;
-
-	CTR1(KTR_PMAP, "pmap_remove_pages: pmap=%p", pmap);
-	
-	if (pmap != vmspace_pmap(curthread->td_proc->p_vmspace)) {
-		printf("warning: pmap_remove_pages called with non-current pmap\n");
-		return;
-	}
-	rw_wlock(&pvh_global_lock);
-	KASSERT(pmap_is_current(pmap), ("removing pages from non-current pmap"));
-	PMAP_LOCK(pmap);
-	sched_pin();
-	TAILQ_FOREACH_SAFE(pc, &pmap->pm_pvchunk, pc_list, npc) {
-		KASSERT(pc->pc_pmap == pmap, ("Wrong pmap %p %p", pmap,
-		    pc->pc_pmap));
-		allfree = 1;
-		for (field = 0; field < _NPCM; field++) {
-			inuse = ~pc->pc_map[field] & pc_freemask[field];
-			while (inuse != 0) {
-				bit = bsfl(inuse);
-				bitmask = 1UL << bit;
-				idx = field * 32 + bit;
-				pv = &pc->pc_pventry[idx];
-				inuse &= ~bitmask;
-
-				pte = vtopte(pv->pv_va);
-				tpte = *pte ? xpmap_mtop(*pte) : 0;
-
-				if (tpte == 0) {
-					printf(
-					    "TPTE at %p  IS ZERO @ VA %08x\n",
-					    pte, pv->pv_va);
-					panic("bad pte");
-				}
-
-/*
- * We cannot remove wired pages from a process' mapping at this time
- */
-				if (tpte & PG_W) {
-					allfree = 0;
-					continue;
-				}
-
-				m = PHYS_TO_VM_PAGE(tpte & PG_FRAME);
-				KASSERT(m->phys_addr == (tpte & PG_FRAME),
-				    ("vm_page_t %p phys_addr mismatch %016jx %016jx",
-				    m, (uintmax_t)m->phys_addr,
-				    (uintmax_t)tpte));
-
-				KASSERT(m < &vm_page_array[vm_page_array_size],
-					("pmap_remove_pages: bad tpte %#jx",
-					(uintmax_t)tpte));
-
-
-				PT_CLEAR_VA(pte, FALSE);
-				
-				/*
-				 * Update the vm_page_t clean/reference bits.
-				 */
-				if (tpte & PG_M)
-					vm_page_dirty(m);
-
-				TAILQ_REMOVE(&m->md.pv_list, pv, pv_next);
-				if (TAILQ_EMPTY(&m->md.pv_list))
-					vm_page_aflag_clear(m, PGA_WRITEABLE);
-
-				pmap_unuse_pt(pmap, pv->pv_va, &free);
-
-				/* Mark free */
-				PV_STAT(pv_entry_frees++);
-				PV_STAT(pv_entry_spare++);
-				pv_entry_count--;
-				pc->pc_map[field] |= bitmask;
-				pmap->pm_stats.resident_count--;			
-			}
-		}
-		PT_UPDATES_FLUSH();
-		if (allfree) {
-			TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
-			free_pv_chunk(pc);
-		}
-	}
-	PT_UPDATES_FLUSH();
-	if (*PMAP1)
-		PT_SET_MA(PADDR1, 0);
-
-	sched_unpin();
-	pmap_invalidate_all(pmap);
-	rw_wunlock(&pvh_global_lock);
-	PMAP_UNLOCK(pmap);
-	pmap_free_zero_pages(free);
-}
-
-/*
- *	pmap_is_modified:
- *
- *	Return whether or not the specified physical page was modified
- *	in any physical maps.
- */
-boolean_t
-pmap_is_modified(vm_page_t m)
-{
-	pv_entry_t pv;
-	pt_entry_t *pte;
-	pmap_t pmap;
-	boolean_t rv;
-
-	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
-	    ("pmap_is_modified: page %p is not managed", m));
-	rv = FALSE;
-
-	/*
-	 * If the page is not exclusive busied, then PGA_WRITEABLE cannot be
-	 * concurrently set while the object is locked.  Thus, if PGA_WRITEABLE
-	 * is clear, no PTEs can have PG_M set.
-	 */
-	VM_OBJECT_ASSERT_WLOCKED(m->object);
-	if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0)
-		return (rv);
-	rw_wlock(&pvh_global_lock);
-	sched_pin();
-	TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) {
-		pmap = PV_PMAP(pv);
-		PMAP_LOCK(pmap);
-		pte = pmap_pte_quick(pmap, pv->pv_va);
-		rv = (*pte & PG_M) != 0;
-		PMAP_UNLOCK(pmap);
-		if (rv)
-			break;
-	}
-	if (*PMAP1)
-		PT_SET_MA(PADDR1, 0);
-	sched_unpin();
-	rw_wunlock(&pvh_global_lock);
-	return (rv);
-}
-
-/*
- *	pmap_is_prefaultable:
- *
- *	Return whether or not the specified virtual address is elgible
- *	for prefault.
- */
-static boolean_t
-pmap_is_prefaultable_locked(pmap_t pmap, vm_offset_t addr)
-{
-	pt_entry_t *pte;
-	boolean_t rv = FALSE;
-
-	return (rv);
-	
-	if (pmap_is_current(pmap) && *pmap_pde(pmap, addr)) {
-		pte = vtopte(addr);
-		rv = (*pte == 0);
-	}
-	return (rv);
-}
-
-boolean_t
-pmap_is_prefaultable(pmap_t pmap, vm_offset_t addr)
-{
-	boolean_t rv;
-	
-	PMAP_LOCK(pmap);
-	rv = pmap_is_prefaultable_locked(pmap, addr);
-	PMAP_UNLOCK(pmap);
-	return (rv);
-}
-
-boolean_t
-pmap_is_referenced(vm_page_t m)
-{
-	pv_entry_t pv;
-	pt_entry_t *pte;
-	pmap_t pmap;
-	boolean_t rv;
-
-	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
-	    ("pmap_is_referenced: page %p is not managed", m));
-	rv = FALSE;
-	rw_wlock(&pvh_global_lock);
-	sched_pin();
-	TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) {
-		pmap = PV_PMAP(pv);
-		PMAP_LOCK(pmap);
-		pte = pmap_pte_quick(pmap, pv->pv_va);
-		rv = (*pte & (PG_A | PG_V)) == (PG_A | PG_V);
-		PMAP_UNLOCK(pmap);
-		if (rv)
-			break;
-	}
-	if (*PMAP1)
-		PT_SET_MA(PADDR1, 0);
-	sched_unpin();
-	rw_wunlock(&pvh_global_lock);
-	return (rv);
-}
-
-void
-pmap_map_readonly(pmap_t pmap, vm_offset_t va, int len)
-{
-	int i, npages = round_page(len) >> PAGE_SHIFT;
-	for (i = 0; i < npages; i++) {
-		pt_entry_t *pte;
-		pte = pmap_pte(pmap, (vm_offset_t)(va + i*PAGE_SIZE));
-		rw_wlock(&pvh_global_lock);
-		pte_store(pte, xpmap_mtop(*pte & ~(PG_RW|PG_M)));
-		rw_wunlock(&pvh_global_lock);
-		PMAP_MARK_PRIV(xpmap_mtop(*pte));
-		pmap_pte_release(pte);
-	}
-}
-
-void
-pmap_map_readwrite(pmap_t pmap, vm_offset_t va, int len)
-{
-	int i, npages = round_page(len) >> PAGE_SHIFT;
-	for (i = 0; i < npages; i++) {
-		pt_entry_t *pte;
-		pte = pmap_pte(pmap, (vm_offset_t)(va + i*PAGE_SIZE));
-		PMAP_MARK_UNPRIV(xpmap_mtop(*pte));
-		rw_wlock(&pvh_global_lock);
-		pte_store(pte, xpmap_mtop(*pte) | (PG_RW|PG_M));
-		rw_wunlock(&pvh_global_lock);
-		pmap_pte_release(pte);
-	}
-}
-
-/*
- * Clear the write and modified bits in each of the given page's mappings.
- */
-void
-pmap_remove_write(vm_page_t m)
-{
-	pv_entry_t pv;
-	pmap_t pmap;
-	pt_entry_t oldpte, *pte;
-
-	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
-	    ("pmap_remove_write: page %p is not managed", m));
-
-	/*
-	 * If the page is not exclusive busied, then PGA_WRITEABLE cannot be
-	 * set by another thread while the object is locked.  Thus,
-	 * if PGA_WRITEABLE is clear, no page table entries need updating.
-	 */
-	VM_OBJECT_ASSERT_WLOCKED(m->object);
-	if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0)
-		return;
-	rw_wlock(&pvh_global_lock);
-	sched_pin();
-	TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) {
-		pmap = PV_PMAP(pv);
-		PMAP_LOCK(pmap);
-		pte = pmap_pte_quick(pmap, pv->pv_va);
-retry:
-		oldpte = *pte;
-		if ((oldpte & PG_RW) != 0) {
-			vm_paddr_t newpte = oldpte & ~(PG_RW | PG_M);
-			
-			/*
-			 * Regardless of whether a pte is 32 or 64 bits
-			 * in size, PG_RW and PG_M are among the least
-			 * significant 32 bits.
-			 */
-			PT_SET_VA_MA(pte, newpte, TRUE);
-			if (*pte != newpte)
-				goto retry;
-			
-			if ((oldpte & PG_M) != 0)
-				vm_page_dirty(m);
-			pmap_invalidate_page(pmap, pv->pv_va);
-		}
-		PMAP_UNLOCK(pmap);
-	}
-	vm_page_aflag_clear(m, PGA_WRITEABLE);
-	PT_UPDATES_FLUSH();
-	if (*PMAP1)
-		PT_SET_MA(PADDR1, 0);
-	sched_unpin();
-	rw_wunlock(&pvh_global_lock);
-}
-
-/*
- *	pmap_ts_referenced:
- *
- *	Return a count of reference bits for a page, clearing those bits.
- *	It is not necessary for every reference bit to be cleared, but it
- *	is necessary that 0 only be returned when there are truly no
- *	reference bits set.
- *
- *	XXX: The exact number of bits to check and clear is a matter that
- *	should be tested and standardized at some point in the future for
- *	optimal aging of shared pages.
- */
-int
-pmap_ts_referenced(vm_page_t m)
-{
-	pv_entry_t pv, pvf, pvn;
-	pmap_t pmap;
-	pt_entry_t *pte;
-	int rtval = 0;
-
-	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
-	    ("pmap_ts_referenced: page %p is not managed", m));
-	rw_wlock(&pvh_global_lock);
-	sched_pin();
-	if ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) {
-		pvf = pv;
-		do {
-			pvn = TAILQ_NEXT(pv, pv_next);
-			TAILQ_REMOVE(&m->md.pv_list, pv, pv_next);
-			TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next);
-			pmap = PV_PMAP(pv);
-			PMAP_LOCK(pmap);
-			pte = pmap_pte_quick(pmap, pv->pv_va);
-			if ((*pte & PG_A) != 0) {
-				PT_SET_VA_MA(pte, *pte & ~PG_A, FALSE);
-				pmap_invalidate_page(pmap, pv->pv_va);
-				rtval++;
-				if (rtval > 4)
-					pvn = NULL;
-			}
-			PMAP_UNLOCK(pmap);
-		} while ((pv = pvn) != NULL && pv != pvf);
-	}
-	PT_UPDATES_FLUSH();
-	if (*PMAP1)
-		PT_SET_MA(PADDR1, 0);
-	sched_unpin();
-	rw_wunlock(&pvh_global_lock);
-	return (rtval);
-}
-
-/*
- *	Apply the given advice to the specified range of addresses within the
- *	given pmap.  Depending on the advice, clear the referenced and/or
- *	modified flags in each mapping and set the mapped page's dirty field.
- */
-void
-pmap_advise(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, int advice)
-{
-	pd_entry_t oldpde;
-	pt_entry_t *pte;
-	vm_offset_t pdnxt;
-	vm_page_t m;
-	boolean_t anychanged;
-
-	if (advice != MADV_DONTNEED && advice != MADV_FREE)
-		return;
-	anychanged = FALSE;
-	rw_wlock(&pvh_global_lock);
-	sched_pin();
-	PMAP_LOCK(pmap);
-	for (; sva < eva; sva = pdnxt) {
-		pdnxt = (sva + NBPDR) & ~PDRMASK;
-		if (pdnxt < sva)
-			pdnxt = eva;
-		oldpde = pmap->pm_pdir[sva >> PDRSHIFT];
-		if ((oldpde & (PG_PS | PG_V)) != PG_V)
-			continue;
-		if (pdnxt > eva)
-			pdnxt = eva;
-		for (pte = pmap_pte_quick(pmap, sva); sva != pdnxt; pte++,
-		    sva += PAGE_SIZE) {
-			if ((*pte & (PG_MANAGED | PG_V)) != (PG_MANAGED |
-			    PG_V))
-				continue;
-			else if ((*pte & (PG_M | PG_RW)) == (PG_M | PG_RW)) {
-				if (advice == MADV_DONTNEED) {
-					/*
-					 * Future calls to pmap_is_modified()
-					 * can be avoided by making the page
-					 * dirty now.
-					 */
-					m = PHYS_TO_VM_PAGE(xpmap_mtop(*pte) &
-					    PG_FRAME);
-					vm_page_dirty(m);
-				}
-				PT_SET_VA_MA(pte, *pte & ~(PG_M | PG_A), TRUE);
-			} else if ((*pte & PG_A) != 0)
-				PT_SET_VA_MA(pte, *pte & ~PG_A, TRUE);
-			else
-				continue;
-			if ((*pte & PG_G) != 0)
-				pmap_invalidate_page(pmap, sva);
-			else
-				anychanged = TRUE;
-		}
-	}
-	PT_UPDATES_FLUSH();
-	if (*PMAP1)
-		PT_SET_VA_MA(PMAP1, 0, TRUE);
-	if (anychanged)
-		pmap_invalidate_all(pmap);
-	sched_unpin();
-	rw_wunlock(&pvh_global_lock);
-	PMAP_UNLOCK(pmap);
-}
-
-/*
- *	Clear the modify bits on the specified physical page.
- */
-void
-pmap_clear_modify(vm_page_t m)
-{
-	pv_entry_t pv;
-	pmap_t pmap;
-	pt_entry_t *pte;
-
-	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
-	    ("pmap_clear_modify: page %p is not managed", m));
-	VM_OBJECT_ASSERT_WLOCKED(m->object);
-	KASSERT(!vm_page_xbusied(m),
-	    ("pmap_clear_modify: page %p is exclusive busied", m));
-
-	/*
-	 * If the page is not PGA_WRITEABLE, then no PTEs can have PG_M set.
-	 * If the object containing the page is locked and the page is not
-	 * exclusive busied, then PGA_WRITEABLE cannot be concurrently set.
-	 */
-	if ((m->aflags & PGA_WRITEABLE) == 0)
-		return;
-	rw_wlock(&pvh_global_lock);
-	sched_pin();
-	TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) {
-		pmap = PV_PMAP(pv);
-		PMAP_LOCK(pmap);
-		pte = pmap_pte_quick(pmap, pv->pv_va);
-		if ((*pte & (PG_M | PG_RW)) == (PG_M | PG_RW)) {
-			/*
-			 * Regardless of whether a pte is 32 or 64 bits
-			 * in size, PG_M is among the least significant
-			 * 32 bits. 
-			 */
-			PT_SET_VA_MA(pte, *pte & ~PG_M, FALSE);
-			pmap_invalidate_page(pmap, pv->pv_va);
-		}
-		PMAP_UNLOCK(pmap);
-	}
-	sched_unpin();
-	rw_wunlock(&pvh_global_lock);
-}
-
-/*
- * Miscellaneous support routines follow
- */
-
-/*
- * Map a set of physical memory pages into the kernel virtual
- * address space. Return a pointer to where it is mapped. This
- * routine is intended to be used for mapping device memory,
- * NOT real memory.
- */
-void *
-pmap_mapdev_attr(vm_paddr_t pa, vm_size_t size, int mode)
-{
-	vm_offset_t va, offset;
-	vm_size_t tmpsize;
-
-	offset = pa & PAGE_MASK;
-	size = round_page(offset + size);
-	pa = pa & PG_FRAME;
-
-	if (pa < KERNLOAD && pa + size <= KERNLOAD)
-		va = KERNBASE + pa;
-	else
-		va = kva_alloc(size);
-	if (!va)
-		panic("pmap_mapdev: Couldn't alloc kernel virtual memory");
-
-	for (tmpsize = 0; tmpsize < size; tmpsize += PAGE_SIZE)
-		pmap_kenter_attr(va + tmpsize, pa + tmpsize, mode);
-	pmap_invalidate_range(kernel_pmap, va, va + tmpsize);
-	pmap_invalidate_cache_range(va, va + size, FALSE);
-	return ((void *)(va + offset));
-}
-
-void *
-pmap_mapdev(vm_paddr_t pa, vm_size_t size)
-{
-
-	return (pmap_mapdev_attr(pa, size, PAT_UNCACHEABLE));
-}
-
-void *
-pmap_mapbios(vm_paddr_t pa, vm_size_t size)
-{
-
-	return (pmap_mapdev_attr(pa, size, PAT_WRITE_BACK));
-}
-
-void
-pmap_unmapdev(vm_offset_t va, vm_size_t size)
-{
-	vm_offset_t base, offset;
-
-	if (va >= KERNBASE && va + size <= KERNBASE + KERNLOAD)
-		return;
-	base = trunc_page(va);
-	offset = va & PAGE_MASK;
-	size = round_page(offset + size);
-	kva_free(base, size);
-}
-
-/*
- * Sets the memory attribute for the specified page.
- */
-void
-pmap_page_set_memattr(vm_page_t m, vm_memattr_t ma)
-{
-
-	m->md.pat_mode = ma;
-	if ((m->flags & PG_FICTITIOUS) != 0)
-		return;
-
-	/*
-	 * If "m" is a normal page, flush it from the cache.
-	 * See pmap_invalidate_cache_range().
-	 *
-	 * First, try to find an existing mapping of the page by sf
-	 * buffer. sf_buf_invalidate_cache() modifies mapping and
-	 * flushes the cache.
-	 */    
-	if (sf_buf_invalidate_cache(m))
-		return;
-
-	/*
-	 * If page is not mapped by sf buffer, but CPU does not
-	 * support self snoop, map the page transient and do
-	 * invalidation. In the worst case, whole cache is flushed by
-	 * pmap_invalidate_cache_range().
-	 */
-	if ((cpu_feature & CPUID_SS) == 0)
-		pmap_flush_page(m);
-}
-
-static void
-pmap_flush_page(vm_page_t m)
-{
-	struct sysmaps *sysmaps;
-	vm_offset_t sva, eva;
-
-	if ((cpu_feature & CPUID_CLFSH) != 0) {
-		sysmaps = &sysmaps_pcpu[PCPU_GET(cpuid)];
-		mtx_lock(&sysmaps->lock);
-		if (*sysmaps->CMAP2)
-			panic("pmap_flush_page: CMAP2 busy");
-		sched_pin();
-		PT_SET_MA(sysmaps->CADDR2, PG_V | PG_RW |
-		    VM_PAGE_TO_MACH(m) | PG_A | PG_M |
-		    pmap_cache_bits(m->md.pat_mode, 0));
-		invlcaddr(sysmaps->CADDR2);
-		sva = (vm_offset_t)sysmaps->CADDR2;
-		eva = sva + PAGE_SIZE;
-
-		/*
-		 * Use mfence despite the ordering implied by
-		 * mtx_{un,}lock() because clflush is not guaranteed
-		 * to be ordered by any other instruction.
-		 */
-		mfence();
-		for (; sva < eva; sva += cpu_clflush_line_size)
-			clflush(sva);
-		mfence();
-		PT_SET_MA(sysmaps->CADDR2, 0);
-		sched_unpin();
-		mtx_unlock(&sysmaps->lock);
-	} else
-		pmap_invalidate_cache();
-}
-
-/*
- * Changes the specified virtual address range's memory type to that given by
- * the parameter "mode".  The specified virtual address range must be
- * completely contained within either the kernel map.
- *
- * Returns zero if the change completed successfully, and either EINVAL or
- * ENOMEM if the change failed.  Specifically, EINVAL is returned if some part
- * of the virtual address range was not mapped, and ENOMEM is returned if
- * there was insufficient memory available to complete the change.
- */
-int
-pmap_change_attr(vm_offset_t va, vm_size_t size, int mode)
-{
-	vm_offset_t base, offset, tmpva;
-	pt_entry_t *pte;
-	u_int opte, npte;
-	pd_entry_t *pde;
-	boolean_t changed;
-
-	base = trunc_page(va);
-	offset = va & PAGE_MASK;
-	size = round_page(offset + size);
-
-	/* Only supported on kernel virtual addresses. */
-	if (base <= VM_MAXUSER_ADDRESS)
-		return (EINVAL);
-
-	/* 4MB pages and pages that aren't mapped aren't supported. */
-	for (tmpva = base; tmpva < (base + size); tmpva += PAGE_SIZE) {
-		pde = pmap_pde(kernel_pmap, tmpva);
-		if (*pde & PG_PS)
-			return (EINVAL);
-		if ((*pde & PG_V) == 0)
-			return (EINVAL);
-		pte = vtopte(va);
-		if ((*pte & PG_V) == 0)
-			return (EINVAL);
-	}
-
-	changed = FALSE;
-
-	/*
-	 * Ok, all the pages exist and are 4k, so run through them updating
-	 * their cache mode.
-	 */
-	for (tmpva = base; size > 0; ) {
-		pte = vtopte(tmpva);
-
-		/*
-		 * The cache mode bits are all in the low 32-bits of the
-		 * PTE, so we can just spin on updating the low 32-bits.
-		 */
-		do {
-			opte = *(u_int *)pte;
-			npte = opte & ~(PG_PTE_PAT | PG_NC_PCD | PG_NC_PWT);
-			npte |= pmap_cache_bits(mode, 0);
-			PT_SET_VA_MA(pte, npte, TRUE);
-		} while (npte != opte && (*pte != npte));
-		if (npte != opte)
-			changed = TRUE;
-		tmpva += PAGE_SIZE;
-		size -= PAGE_SIZE;
-	}
-
-	/*
-	 * Flush CPU caches to make sure any data isn't cached that
-	 * shouldn't be, etc.
-	 */
-	if (changed) {
-		pmap_invalidate_range(kernel_pmap, base, tmpva);
-		pmap_invalidate_cache_range(base, tmpva, FALSE);
-	}
-	return (0);
-}
-
-/*
- * perform the pmap work for mincore
- */
-int
-pmap_mincore(pmap_t pmap, vm_offset_t addr, vm_paddr_t *locked_pa)
-{
-	pt_entry_t *ptep, pte;
-	vm_paddr_t pa;
-	int val;
-
-	PMAP_LOCK(pmap);
-retry:
-	ptep = pmap_pte(pmap, addr);
-	pte = (ptep != NULL) ? PT_GET(ptep) : 0;
-	pmap_pte_release(ptep);
-	val = 0;
-	if ((pte & PG_V) != 0) {
-		val |= MINCORE_INCORE;
-		if ((pte & (PG_M | PG_RW)) == (PG_M | PG_RW))
-			val |= MINCORE_MODIFIED | MINCORE_MODIFIED_OTHER;
-		if ((pte & PG_A) != 0)
-			val |= MINCORE_REFERENCED | MINCORE_REFERENCED_OTHER;
-	}
-	if ((val & (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER)) !=
-	    (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER) &&
-	    (pte & (PG_MANAGED | PG_V)) == (PG_MANAGED | PG_V)) {
-		pa = pte & PG_FRAME;
-		/* Ensure that "PHYS_TO_VM_PAGE(pa)->object" doesn't change. */
-		if (vm_page_pa_tryrelock(pmap, pa, locked_pa))
-			goto retry;
-	} else
-		PA_UNLOCK_COND(*locked_pa);
-	PMAP_UNLOCK(pmap);
-	return (val);
-}
-
-void
-pmap_activate(struct thread *td)
-{
-	pmap_t	pmap, oldpmap;
-	u_int	cpuid;
-	u_int32_t  cr3;
-
-	critical_enter();
-	pmap = vmspace_pmap(td->td_proc->p_vmspace);
-	oldpmap = PCPU_GET(curpmap);
-	cpuid = PCPU_GET(cpuid);
-#if defined(SMP)
-	CPU_CLR_ATOMIC(cpuid, &oldpmap->pm_active);
-	CPU_SET_ATOMIC(cpuid, &pmap->pm_active);
-#else
-	CPU_CLR(cpuid, &oldpmap->pm_active);
-	CPU_SET(cpuid, &pmap->pm_active);
-#endif
-#ifdef PAE
-	cr3 = vtophys(pmap->pm_pdpt);
-#else
-	cr3 = vtophys(pmap->pm_pdir);
-#endif
-	/*
-	 * pmap_activate is for the current thread on the current cpu
-	 */
-	td->td_pcb->pcb_cr3 = cr3;
-	PT_UPDATES_FLUSH();
-	load_cr3(cr3);
-	PCPU_SET(curpmap, pmap);
-	critical_exit();
-}
-
-void
-pmap_sync_icache(pmap_t pm, vm_offset_t va, vm_size_t sz)
-{
-}
-
-/*
- *	Increase the starting virtual address of the given mapping if a
- *	different alignment might result in more superpage mappings.
- */
-void
-pmap_align_superpage(vm_object_t object, vm_ooffset_t offset,
-    vm_offset_t *addr, vm_size_t size)
-{
-	vm_offset_t superpage_offset;
-
-	if (size < NBPDR)
-		return;
-	if (object != NULL && (object->flags & OBJ_COLORED) != 0)
-		offset += ptoa(object->pg_color);
-	superpage_offset = offset & PDRMASK;
-	if (size - ((NBPDR - superpage_offset) & PDRMASK) < NBPDR ||
-	    (*addr & PDRMASK) == superpage_offset)
-		return;
-	if ((*addr & PDRMASK) < superpage_offset)
-		*addr = (*addr & ~PDRMASK) + superpage_offset;
-	else
-		*addr = ((*addr + PDRMASK) & ~PDRMASK) + superpage_offset;
-}
-
-void
-pmap_suspend()
-{
-	pmap_t pmap;
-	int i, pdir, offset;
-	vm_paddr_t pdirma;
-	mmu_update_t mu[4];
-
-	/*
-	 * We need to remove the recursive mapping structure from all
-	 * our pmaps so that Xen doesn't get confused when it restores
-	 * the page tables. The recursive map lives at page directory
-	 * index PTDPTDI. We assume that the suspend code has stopped
-	 * the other vcpus (if any).
-	 */
-	LIST_FOREACH(pmap, &allpmaps, pm_list) {
-		for (i = 0; i < 4; i++) {
-			/*
-			 * Figure out which page directory (L2) page
-			 * contains this bit of the recursive map and
-			 * the offset within that page of the map
-			 * entry
-			 */
-			pdir = (PTDPTDI + i) / NPDEPG;
-			offset = (PTDPTDI + i) % NPDEPG;
-			pdirma = pmap->pm_pdpt[pdir] & PG_FRAME;
-			mu[i].ptr = pdirma + offset * sizeof(pd_entry_t);
-			mu[i].val = 0;
-		}
-		HYPERVISOR_mmu_update(mu, 4, NULL, DOMID_SELF);
-	}
-}
-
-void
-pmap_resume()
-{
-	pmap_t pmap;
-	int i, pdir, offset;
-	vm_paddr_t pdirma;
-	mmu_update_t mu[4];
-
-	/*
-	 * Restore the recursive map that we removed on suspend.
-	 */
-	LIST_FOREACH(pmap, &allpmaps, pm_list) {
-		for (i = 0; i < 4; i++) {
-			/*
-			 * Figure out which page directory (L2) page
-			 * contains this bit of the recursive map and
-			 * the offset within that page of the map
-			 * entry
-			 */
-			pdir = (PTDPTDI + i) / NPDEPG;
-			offset = (PTDPTDI + i) % NPDEPG;
-			pdirma = pmap->pm_pdpt[pdir] & PG_FRAME;
-			mu[i].ptr = pdirma + offset * sizeof(pd_entry_t);
-			mu[i].val = (pmap->pm_pdpt[i] & PG_FRAME) | PG_V;
-		}
-		HYPERVISOR_mmu_update(mu, 4, NULL, DOMID_SELF);
-	}
-}
-
-#if defined(PMAP_DEBUG)
-pmap_pid_dump(int pid)
-{
-	pmap_t pmap;
-	struct proc *p;
-	int npte = 0;
-	int index;
-
-	sx_slock(&allproc_lock);
-	FOREACH_PROC_IN_SYSTEM(p) {
-		if (p->p_pid != pid)
-			continue;
-
-		if (p->p_vmspace) {
-			int i,j;
-			index = 0;
-			pmap = vmspace_pmap(p->p_vmspace);
-			for (i = 0; i < NPDEPTD; i++) {
-				pd_entry_t *pde;
-				pt_entry_t *pte;
-				vm_offset_t base = i << PDRSHIFT;
-				
-				pde = &pmap->pm_pdir[i];
-				if (pde && pmap_pde_v(pde)) {
-					for (j = 0; j < NPTEPG; j++) {
-						vm_offset_t va = base + (j << PAGE_SHIFT);
-						if (va >= (vm_offset_t) VM_MIN_KERNEL_ADDRESS) {
-							if (index) {
-								index = 0;
-								printf("\n");
-							}
-							sx_sunlock(&allproc_lock);
-							return (npte);
-						}
-						pte = pmap_pte(pmap, va);
-						if (pte && pmap_pte_v(pte)) {
-							pt_entry_t pa;
-							vm_page_t m;
-							pa = PT_GET(pte);
-							m = PHYS_TO_VM_PAGE(pa & PG_FRAME);
-							printf("va: 0x%x, pt: 0x%x, h: %d, w: %d, f: 0x%x",
-								va, pa, m->hold_count, m->wire_count, m->flags);
-							npte++;
-							index++;
-							if (index >= 2) {
-								index = 0;
-								printf("\n");
-							} else {
-								printf(" ");
-							}
-						}
-					}
-				}
-			}
-		}
-	}
-	sx_sunlock(&allproc_lock);
-	return (npte);
-}
-#endif
-
-#if defined(DEBUG)
-
-static void	pads(pmap_t pm);
-void		pmap_pvdump(vm_paddr_t pa);
-
-/* print address space of pmap*/
-static void
-pads(pmap_t pm)
-{
-	int i, j;
-	vm_paddr_t va;
-	pt_entry_t *ptep;
-
-	if (pm == kernel_pmap)
-		return;
-	for (i = 0; i < NPDEPTD; i++)
-		if (pm->pm_pdir[i])
-			for (j = 0; j < NPTEPG; j++) {
-				va = (i << PDRSHIFT) + (j << PAGE_SHIFT);
-				if (pm == kernel_pmap && va < KERNBASE)
-					continue;
-				if (pm != kernel_pmap && va > UPT_MAX_ADDRESS)
-					continue;
-				ptep = pmap_pte(pm, va);
-				if (pmap_pte_v(ptep))
-					printf("%x:%x ", va, *ptep);
-			};
-
-}
-
-void
-pmap_pvdump(vm_paddr_t pa)
-{
-	pv_entry_t pv;
-	pmap_t pmap;
-	vm_page_t m;
-
-	printf("pa %x", pa);
-	m = PHYS_TO_VM_PAGE(pa);
-	TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) {
-		pmap = PV_PMAP(pv);
-		printf(" -> pmap %p, va %x", (void *)pmap, pv->pv_va);
-		pads(pmap);
-	}
-	printf(" ");
-}
-#endif

Property changes on: head/sys/i386/xen/pmap.c
___________________________________________________________________
Deleted: svn:eol-style
## -1 +0,0 ##
-native
\ No newline at end of property
Deleted: svn:keywords
## -1 +0,0 ##
-FreeBSD=%H
\ No newline at end of property
Deleted: svn:mime-type
## -1 +0,0 ##
-text/plain
\ No newline at end of property
Index: head/sys/i386/xen/clock.c
===================================================================
--- head/sys/i386/xen/clock.c	(revision 282273)
+++ head/sys/i386/xen/clock.c	(nonexistent)
@@ -1,570 +0,0 @@
-/*-
- * Copyright (c) 1990 The Regents of the University of California.
- * All rights reserved.
- *
- * This code is derived from software contributed to Berkeley by
- * William Jolitz and Don Ahn.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- *    must display the following acknowledgement:
- *	This product includes software developed by the University of
- *	California, Berkeley and its contributors.
- * 4. Neither the name of the University nor the names of its contributors
- *    may be used to endorse or promote products derived from this software
- *    without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- *	from: @(#)clock.c	7.2 (Berkeley) 5/12/91
- */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-
-/* #define DELAYDEBUG */
-/*
- * Routines to handle clock hardware.
- */
-
-#include "opt_ddb.h"
-#include "opt_clock.h"
-
-#include <sys/param.h>
-#include <sys/systm.h>
-#include <sys/bus.h>
-#include <sys/clock.h>
-#include <sys/lock.h>
-#include <sys/mutex.h>
-#include <sys/proc.h>
-#include <sys/time.h>
-#include <sys/timeet.h>
-#include <sys/timetc.h>
-#include <sys/kernel.h>
-#include <sys/limits.h>
-#include <sys/sysctl.h>
-#include <sys/cons.h>
-#include <sys/power.h>
-
-#include <machine/clock.h>
-#include <machine/cputypes.h>
-#include <machine/frame.h>
-#include <machine/intr_machdep.h>
-#include <machine/md_var.h>
-#include <machine/psl.h>
-#include <machine/pvclock.h>
-#if defined(SMP)
-#include <machine/smp.h>
-#endif
-#include <machine/specialreg.h>
-#include <machine/timerreg.h>
-
-#include <x86/isa/icu.h>
-#include <isa/isareg.h>
-#include <isa/rtc.h>
-
-#include <vm/vm.h>
-#include <vm/pmap.h>
-#include <machine/pmap.h>
-#include <xen/hypervisor.h>
-#include <xen/xen-os.h>
-#include <machine/xen/xenfunc.h>
-#include <xen/interface/vcpu.h>
-#include <machine/cpu.h>
-#include <xen/xen_intr.h>
-
-/*
- * 32-bit time_t's can't reach leap years before 1904 or after 2036, so we
- * can use a simple formula for leap years.
- */
-#define	LEAPYEAR(y)	(!((y) % 4))
-#define	DAYSPERYEAR	(28+30*4+31*7)
-
-#ifndef TIMER_FREQ
-#define	TIMER_FREQ	1193182
-#endif
-
-#ifdef CYC2NS_SCALE_FACTOR
-#undef	CYC2NS_SCALE_FACTOR
-#endif
-#define CYC2NS_SCALE_FACTOR	10
-
-/* Values for timerX_state: */
-#define	RELEASED	0
-#define	RELEASE_PENDING	1
-#define	ACQUIRED	2
-#define	ACQUIRE_PENDING	3
-
-struct mtx clock_lock;
-#define	RTC_LOCK_INIT							\
-	mtx_init(&clock_lock, "clk", NULL, MTX_SPIN | MTX_NOPROFILE)
-#define	RTC_LOCK	mtx_lock_spin(&clock_lock)
-#define	RTC_UNLOCK	mtx_unlock_spin(&clock_lock)
-#define	NS_PER_TICK	(1000000000ULL/hz)
-
-int adjkerntz;		/* local offset from UTC in seconds */
-int clkintr_pending;
-int pscnt = 1;
-int psdiv = 1;
-int wall_cmos_clock;
-u_int timer_freq = TIMER_FREQ;
-static u_long cyc2ns_scale; 
-static uint64_t processed_system_time;	/* stime (ns) at last processing. */
-
-#define do_div(n,base) ({ \
-        unsigned long __upper, __low, __high, __mod, __base; \
-        __base = (base); \
-        __asm("":"=a" (__low), "=d" (__high):"A" (n)); \
-        __upper = __high; \
-        if (__high) { \
-                __upper = __high % (__base); \
-                __high = __high / (__base); \
-        } \
-        __asm("divl %2":"=a" (__low), "=d" (__mod):"rm" (__base), "0" (__low), "1" (__upper)); \
-        __asm("":"=A" (n):"a" (__low),"d" (__high)); \
-        __mod; \
-})
-
-
-/* convert from cycles(64bits) => nanoseconds (64bits)
- *  basic equation:
- *		ns = cycles / (freq / ns_per_sec)
- *		ns = cycles * (ns_per_sec / freq)
- *		ns = cycles * (10^9 / (cpu_mhz * 10^6))
- *		ns = cycles * (10^3 / cpu_mhz)
- *
- *	Then we use scaling math (suggested by george@mvista.com) to get:
- *		ns = cycles * (10^3 * SC / cpu_mhz) / SC
- *		ns = cycles * cyc2ns_scale / SC
- *
- *	And since SC is a constant power of two, we can convert the div
- *  into a shift.   
- *			-johnstul@us.ibm.com "math is hard, lets go shopping!"
- */
-static inline void set_cyc2ns_scale(unsigned long cpu_mhz)
-{
-	cyc2ns_scale = (1000 << CYC2NS_SCALE_FACTOR)/cpu_mhz;
-}
-
-static inline unsigned long long cycles_2_ns(unsigned long long cyc)
-{
-	return ((cyc * cyc2ns_scale) >> CYC2NS_SCALE_FACTOR);
-}
-
-static uint32_t
-getit(void)
-{
-	return (pvclock_get_last_cycles());
-}
-
-
-/*
- * XXX: timer needs more SMP work.
- */
-void
-i8254_init(void)
-{
-
-	RTC_LOCK_INIT;
-}
-
-/*
- * Wait "n" microseconds.
- * Relies on timer 1 counting down from (timer_freq / hz)
- * Note: timer had better have been programmed before this is first used!
- */
-void
-i8254_delay(int n)
-{
-	int delta, ticks_left;
-	uint32_t tick, prev_tick;
-#ifdef DELAYDEBUG
-	int getit_calls = 1;
-	int n1;
-	static int state = 0;
-
-	if (state == 0) {
-		state = 1;
-		for (n1 = 1; n1 <= 10000000; n1 *= 10)
-			DELAY(n1);
-		state = 2;
-	}
-	if (state == 1)
-		printf("DELAY(%d)...", n);
-#endif
-	/*
-	 * Read the counter first, so that the rest of the setup overhead is
-	 * counted.  Guess the initial overhead is 20 usec (on most systems it
-	 * takes about 1.5 usec for each of the i/o's in getit().  The loop
-	 * takes about 6 usec on a 486/33 and 13 usec on a 386/20.  The
-	 * multiplications and divisions to scale the count take a while).
-	 *
-	 * However, if ddb is active then use a fake counter since reading
-	 * the i8254 counter involves acquiring a lock.  ddb must not go
-	 * locking for many reasons, but it calls here for at least atkbd
-	 * input.
-	 */
-	prev_tick = getit();
-
-	n -= 0;			/* XXX actually guess no initial overhead */
-	/*
-	 * Calculate (n * (timer_freq / 1e6)) without using floating point
-	 * and without any avoidable overflows.
-	 */
-	if (n <= 0)
-		ticks_left = 0;
-	else if (n < 256)
-		/*
-		 * Use fixed point to avoid a slow division by 1000000.
-		 * 39099 = 1193182 * 2^15 / 10^6 rounded to nearest.
-		 * 2^15 is the first power of 2 that gives exact results
-		 * for n between 0 and 256.
-		 */
-		ticks_left = ((u_int)n * 39099 + (1 << 15) - 1) >> 15;
-	else
-		/*
-		 * Don't bother using fixed point, although gcc-2.7.2
-		 * generates particularly poor code for the long long
-		 * division, since even the slow way will complete long
-		 * before the delay is up (unless we're interrupted).
-		 */
-		ticks_left = ((u_int)n * (long long)timer_freq + 999999)
-			/ 1000000;
-
-	while (ticks_left > 0) {
-		tick = getit();
-#ifdef DELAYDEBUG
-		++getit_calls;
-#endif
-		delta = tick - prev_tick;
-		prev_tick = tick;
-		if (delta < 0) {
-			/*
-			 * Guard against timer0_max_count being wrong.
-			 * This shouldn't happen in normal operation,
-			 * but it may happen if set_timer_freq() is
-			 * traced.
-			 */
-			/* delta += timer0_max_count; ??? */
-			if (delta < 0)
-				delta = 0;
-		}
-		ticks_left -= delta;
-	}
-#ifdef DELAYDEBUG
-	if (state == 1)
-		printf(" %d calls to getit() at %d usec each\n",
-		       getit_calls, (n + 5) / getit_calls);
-#endif
-}
-
-void
-startrtclock()
-{
-	uint64_t __cpu_khz;
-	uint32_t cpu_khz;
-	struct vcpu_time_info *info;
-
-	__cpu_khz = 1000000ULL << 32;
-	info = &HYPERVISOR_shared_info->vcpu_info[0].time;
-
-	(void)do_div(__cpu_khz, info->tsc_to_system_mul);
-	if ( info->tsc_shift < 0 )
-		cpu_khz = __cpu_khz << -info->tsc_shift;
-	else
-		cpu_khz = __cpu_khz >> info->tsc_shift;
-
-	printf("Xen reported: %u.%03u MHz processor.\n", 
-	       cpu_khz / 1000, cpu_khz % 1000);
-
-	/* (10^6 * 2^32) / cpu_hz = (10^3 * 2^32) / cpu_khz =
-	   (2^32 * 1 / (clocks/us)) */
-
-	set_cyc2ns_scale(cpu_khz/1000);
-	tsc_freq = cpu_khz * 1000;
-}
-
-/*
- * RTC support routines
- */
-
-
-static __inline int
-readrtc(int port)
-{
-	return(bcd2bin(rtcin(port)));
-}
-
-
-#ifdef XEN_PRIVILEGED_GUEST
-
-/*
- * Initialize the time of day register, based on the time base which is, e.g.
- * from a filesystem.
- */
-static void
-domu_inittodr(time_t base)
-{
-	unsigned long   sec;
-	int		s, y;
-	struct timespec ts;
-
-	update_wallclock();
-	add_uptime_to_wallclock();
-	
-	RTC_LOCK;
-	
-	if (base) {
-		ts.tv_sec = base;
-		ts.tv_nsec = 0;
-		tc_setclock(&ts);
-	}
-
-	sec += tz_minuteswest * 60 + (wall_cmos_clock ? adjkerntz : 0);
-
-	y = time_second - shadow_tv.tv_sec;
-	if (y <= -2 || y >= 2) {
-		/* badly off, adjust it */
-		tc_setclock(&shadow_tv);
-	}
-	RTC_UNLOCK;
-}
-
-/*
- * Write system time back to RTC.  
- */
-static void
-domu_resettodr(void)
-{
-	unsigned long tm;
-	int s;
-	dom0_op_t op;
-	struct shadow_time_info *shadow;
-	struct pcpu *pc;
-
-	pc = pcpu_find(smp_processor_id());
-	shadow = &pc->pc_shadow_time;
-	if (xen_disable_rtc_set)
-		return;
-	
-	s = splclock();
-	tm = time_second;
-	splx(s);
-	
-	tm -= tz_minuteswest * 60 + (wall_cmos_clock ? adjkerntz : 0);
-	
-	if ((xen_start_info->flags & SIF_INITDOMAIN) &&
-	    !independent_wallclock)
-	{
-		op.cmd = DOM0_SETTIME;
-		op.u.settime.secs        = tm;
-		op.u.settime.nsecs       = 0;
-		op.u.settime.system_time = shadow->system_timestamp;
-		HYPERVISOR_dom0_op(&op);
-		update_wallclock();
-		add_uptime_to_wallclock();
-	} else if (independent_wallclock) {
-		/* notyet */
-		;
-	}		
-}
-
-/*
- * Initialize the time of day register, based on the time base which is, e.g.
- * from a filesystem.
- */
-void
-inittodr(time_t base)
-{
-	unsigned long	sec, days;
-	int		year, month;
-	int		y, m, s;
-	struct timespec ts;
-
-	if (!(xen_start_info->flags & SIF_INITDOMAIN)) {
-	        domu_inittodr(base);
-		return;
-	}
-
-	if (base) {
-		s = splclock();
-		ts.tv_sec = base;
-		ts.tv_nsec = 0;
-		tc_setclock(&ts);
-		splx(s);
-	}
-
-	/* Look if we have a RTC present and the time is valid */
-	if (!(rtcin(RTC_STATUSD) & RTCSD_PWR))
-		goto wrong_time;
-
-	/* wait for time update to complete */
-	/* If RTCSA_TUP is zero, we have at least 244us before next update */
-	s = splhigh();
-	while (rtcin(RTC_STATUSA) & RTCSA_TUP) {
-		splx(s);
-		s = splhigh();
-	}
-
-	days = 0;
-#ifdef USE_RTC_CENTURY
-	year = readrtc(RTC_YEAR) + readrtc(RTC_CENTURY) * 100;
-#else
-	year = readrtc(RTC_YEAR) + 1900;
-	if (year < 1970)
-		year += 100;
-#endif
-	if (year < 1970) {
-		splx(s);
-		goto wrong_time;
-	}
-	month = readrtc(RTC_MONTH);
-	for (m = 1; m < month; m++)
-		days += daysinmonth[m-1];
-	if ((month > 2) && LEAPYEAR(year))
-		days ++;
-	days += readrtc(RTC_DAY) - 1;
-	for (y = 1970; y < year; y++)
-		days += DAYSPERYEAR + LEAPYEAR(y);
-	sec = ((( days * 24 +
-		  readrtc(RTC_HRS)) * 60 +
-		readrtc(RTC_MIN)) * 60 +
-	       readrtc(RTC_SEC));
-	/* sec now contains the number of seconds, since Jan 1 1970,
-	   in the local time zone */
-
-	sec += tz_minuteswest * 60 + (wall_cmos_clock ? adjkerntz : 0);
-
-	y = time_second - sec;
-	if (y <= -2 || y >= 2) {
-		/* badly off, adjust it */
-		ts.tv_sec = sec;
-		ts.tv_nsec = 0;
-		tc_setclock(&ts);
-	}
-	splx(s);
-	return;
-
- wrong_time:
-	printf("Invalid time in real time clock.\n");
-	printf("Check and reset the date immediately!\n");
-}
-
-
-/*
- * Write system time back to RTC
- */
-void
-resettodr()
-{
-	unsigned long	tm;
-	int		y, m, s;
-
-	if (!(xen_start_info->flags & SIF_INITDOMAIN)) {
-	        domu_resettodr();
-		return;
-	}
-	       
-	if (xen_disable_rtc_set)
-		return;
-
-	s = splclock();
-	tm = time_second;
-	splx(s);
-
-	/* Disable RTC updates and interrupts. */
-	writertc(RTC_STATUSB, RTCSB_HALT | RTCSB_24HR);
-
-	/* Calculate local time to put in RTC */
-
-	tm -= tz_minuteswest * 60 + (wall_cmos_clock ? adjkerntz : 0);
-
-	writertc(RTC_SEC, bin2bcd(tm%60)); tm /= 60;	/* Write back Seconds */
-	writertc(RTC_MIN, bin2bcd(tm%60)); tm /= 60;	/* Write back Minutes */
-	writertc(RTC_HRS, bin2bcd(tm%24)); tm /= 24;	/* Write back Hours   */
-
-	/* We have now the days since 01-01-1970 in tm */
-	writertc(RTC_WDAY, (tm + 4) % 7 + 1);		/* Write back Weekday */
-	for (y = 1970, m = DAYSPERYEAR + LEAPYEAR(y);
-	     tm >= m;
-	     y++,      m = DAYSPERYEAR + LEAPYEAR(y))
-		tm -= m;
-
-	/* Now we have the years in y and the day-of-the-year in tm */
-	writertc(RTC_YEAR, bin2bcd(y%100));		/* Write back Year    */
-#ifdef USE_RTC_CENTURY
-	writertc(RTC_CENTURY, bin2bcd(y/100));		/* ... and Century    */
-#endif
-	for (m = 0; ; m++) {
-		int ml;
-
-		ml = daysinmonth[m];
-		if (m == 1 && LEAPYEAR(y))
-			ml++;
-		if (tm < ml)
-			break;
-		tm -= ml;
-	}
-
-	writertc(RTC_MONTH, bin2bcd(m + 1));            /* Write back Month   */
-	writertc(RTC_DAY, bin2bcd(tm + 1));             /* Write back Month Day */
-
-	/* Reenable RTC updates and interrupts. */
-	writertc(RTC_STATUSB, RTCSB_24HR);
-	rtcin(RTC_INTR);
-}
-#endif
-
-/*
- * Start clocks running.
- */
-void
-cpu_initclocks(void)
-{
-	cpu_initclocks_bsp();
-}
-
-/* Return system time offset by ticks */
-uint64_t
-get_system_time(int ticks)
-{
-    return (processed_system_time + (ticks * NS_PER_TICK));
-}
-
-int
-timer_spkr_acquire(void)
-{
-
-	return (0);
-}
-
-int
-timer_spkr_release(void)
-{
-
-	return (0);
-}
-
-void
-timer_spkr_setfreq(int freq)
-{
-
-}
-

Property changes on: head/sys/i386/xen/clock.c
___________________________________________________________________
Deleted: svn:eol-style
## -1 +0,0 ##
-native
\ No newline at end of property
Deleted: svn:keywords
## -1 +0,0 ##
-FreeBSD=%H
\ No newline at end of property
Deleted: svn:mime-type
## -1 +0,0 ##
-text/plain
\ No newline at end of property
Index: head/sys/i386/xen/locore.s
===================================================================
--- head/sys/i386/xen/locore.s	(revision 282273)
+++ head/sys/i386/xen/locore.s	(nonexistent)
@@ -1,360 +0,0 @@
-/*-
- * Copyright (c) 1990 The Regents of the University of California.
- * All rights reserved.
- *
- * This code is derived from software contributed to Berkeley by
- * William Jolitz.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 4. Neither the name of the University nor the names of its contributors
- *    may be used to endorse or promote products derived from this software
- *    without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- *	from: @(#)locore.s	7.3 (Berkeley) 5/13/91
- * $FreeBSD$
- *
- *		originally from: locore.s, by William F. Jolitz
- *
- *		Substantially rewritten by David Greenman, Rod Grimes,
- *			Bruce Evans, Wolfgang Solfrank, Poul-Henning Kamp
- *			and many others.
- */
-
-#include "opt_bootp.h"
-#include "opt_compat.h"
-#include "opt_nfsroot.h"
-#include "opt_pmap.h"
-
-#include <sys/syscall.h>
-#include <sys/reboot.h>
-
-#include <machine/asmacros.h>
-#include <machine/cputypes.h>
-#include <machine/psl.h>
-#include <machine/pmap.h>
-#include <machine/specialreg.h>
-
-#define __ASSEMBLY__	
-#include <xen/interface/elfnote.h>
-		
-/* The defines below have been lifted out of <machine/xen-public/arch-x86_32.h> */
-#define FLAT_RING1_CS 0xe019    /* GDT index 259 */
-#define FLAT_RING1_DS 0xe021    /* GDT index 260 */
-#define KERNEL_CS FLAT_RING1_CS 
-#define KERNEL_DS FLAT_RING1_DS
-
-#include "assym.s"
-
-.section __xen_guest
-	.ascii "LOADER=generic,GUEST_OS=freebsd,GUEST_VER=7.0,XEN_VER=xen-3.0,BSD_SYMTAB,VIRT_BASE=0xc0000000"
-	.byte 0
-
-	ELFNOTE(Xen, XEN_ELFNOTE_GUEST_OS,       .asciz, "FreeBSD")	
-	ELFNOTE(Xen, XEN_ELFNOTE_GUEST_VERSION,  .asciz, "HEAD")
-	ELFNOTE(Xen, XEN_ELFNOTE_XEN_VERSION,    .asciz, "xen-3.0")
-	ELFNOTE(Xen, XEN_ELFNOTE_VIRT_BASE,      .long,  KERNBASE)
-	ELFNOTE(Xen, XEN_ELFNOTE_PADDR_OFFSET,   .long,  KERNBASE)
-	ELFNOTE(Xen, XEN_ELFNOTE_ENTRY,          .long,  btext)
-	ELFNOTE(Xen, XEN_ELFNOTE_HYPERCALL_PAGE, .long,  hypercall_page)
-	ELFNOTE(Xen, XEN_ELFNOTE_HV_START_LOW,   .long,  XEN_HYPERVISOR_VIRT_START)
-#if 0
-	ELFNOTE(Xen, XEN_ELFNOTE_FEATURES,       .asciz, "writable_page_tables|writable_descriptor_tables|auto_translated_physmap|pae_pgdir_above_4gb|supervisor_mode_kernel")
-#endif
-	ELFNOTE(Xen, XEN_ELFNOTE_FEATURES,       .asciz, "writable_page_tables|supervisor_mode_kernel|writable_descriptor_tables")
-		
-#ifdef PAE
-	ELFNOTE(Xen, XEN_ELFNOTE_PAE_MODE,       .asciz, "yes")
-	ELFNOTE(Xen, XEN_ELFNOTE_L1_MFN_VALID,   .long,  PG_V, PG_V)
-#else
-	ELFNOTE(Xen, XEN_ELFNOTE_PAE_MODE,       .asciz, "no")
-	ELFNOTE(Xen, XEN_ELFNOTE_L1_MFN_VALID,   .long,  PG_V, PG_V)
-#endif
-	ELFNOTE(Xen, XEN_ELFNOTE_LOADER,         .asciz, "generic")
-	ELFNOTE(Xen, XEN_ELFNOTE_SUSPEND_CANCEL, .long,  1)		
-
-	
-	
-/*
- *	XXX
- *
- * Note: This version greatly munged to avoid various assembler errors
- * that may be fixed in newer versions of gas. Perhaps newer versions
- * will have more pleasant appearance.
- */
-
-/*
- * PTmap is recursive pagemap at top of virtual address space.
- * Within PTmap, the page directory can be found (third indirection).
- */
-	.globl	PTmap,PTD,PTDpde
-	.set	PTmap,(PTDPTDI << PDRSHIFT)
-	.set	PTD,PTmap + (PTDPTDI * PAGE_SIZE)
-	.set	PTDpde,PTD + (PTDPTDI * PDESIZE)
-
-/*
- * Compiled KERNBASE location and the kernel load address
- */
-	.globl	kernbase
-	.set	kernbase,KERNBASE
-	.globl	kernload
-	.set	kernload,KERNLOAD
-
-/*
- * Globals
- */
-	.data
-	ALIGN_DATA			/* just to be sure */
-
-	.space	0x2000			/* space for tmpstk - temporary stack */
-tmpstk:
-
-		.globl	bootinfo
-bootinfo:	.space	BOOTINFO_SIZE	/* bootinfo that we can handle */
-
-		.globl KERNend
-KERNend:	.long	0		/* phys addr end of kernel (just after bss) */
-		.globl physfree
-physfree:	.long	0		/* phys addr of next free page */
-
-	.globl	IdlePTD
-IdlePTD:	.long	0		/* phys addr of kernel PTD */
-
-#ifdef PAE
-	.globl	IdlePDPT
-IdlePDPT:	.long	0		/* phys addr of kernel PDPT */
-#endif
-
-#ifdef SMP
-	.globl	KPTphys
-#endif
-KPTphys:	.long	0		/* phys addr of kernel page tables */
-	.globl	gdtset
-gdtset:		.long	0		/* GDT is valid */	
-
-	.globl	proc0kstack
-proc0kstack:	.long	0		/* address of proc 0 kstack space */
-p0kpa:		.long	0		/* phys addr of proc0's STACK */
-
-vm86phystk:	.long	0		/* PA of vm86/bios stack */
-
-	.globl	vm86paddr, vm86pa
-vm86paddr:	.long	0		/* address of vm86 region */
-vm86pa:		.long	0		/* phys addr of vm86 region */
-
-#ifdef PC98
-	.globl	pc98_system_parameter
-pc98_system_parameter:
-	.space	0x240
-#endif
-
-	.globl	avail_space
-avail_space:	.long 0
-
-/**********************************************************************
- *
- * Some handy macros
- *
- */
-
-/*
- * We're already in protected mode, so no remapping is needed.
- */	
-#define R(foo) (foo)
-	
-#define ALLOCPAGES(foo) \
-	movl	R(physfree), %esi ; \
-	movl	$((foo)*PAGE_SIZE), %eax ; \
-	addl	%esi, %eax ; \
-	movl	%eax, R(physfree) ; \
-	movl	%esi, %edi ; \
-	movl	$((foo)*PAGE_SIZE),%ecx ; \
-	xorl	%eax,%eax ; \
-	cld ; \
-	rep ; \
-	stosb
-
-/*
- * fillkpt
- *	eax = page frame address
- *	ebx = index into page table
- *	ecx = how many pages to map
- * 	base = base address of page dir/table
- *	prot = protection bits
- */
-#define	fillkpt(base, prot)		  \
-	shll	$PTESHIFT,%ebx		; \
-	addl	base,%ebx		; \
-	orl	$PG_V,%eax		; \
-	orl	prot,%eax		; \
-1:	movl	%eax,(%ebx)		; \
-	addl	$PAGE_SIZE,%eax		; /* increment physical address */ \
-	addl	$PTESIZE,%ebx		; /* next pte */ \
-	loop	1b
-
-/*
- * fillkptphys(prot)
- *	eax = physical address
- *	ecx = how many pages to map
- *	prot = protection bits
- */
-#define	fillkptphys(prot)		  \
-	movl	%eax, %ebx		; \
-	shrl	$PAGE_SHIFT, %ebx	; \
-	fillkpt(R(KPTphys), prot)
-
-/* Temporary stack */
-.space 	8192
-tmpstack:
-	.long	tmpstack, KERNEL_DS
-
-	.text
-
-.p2align 12,	0x90	
-		
-#define HYPERCALL_PAGE_OFFSET 0x1000
-.org HYPERCALL_PAGE_OFFSET
-ENTRY(hypercall_page)
-	.cfi_startproc
-	.skip	0x1000
-	.cfi_endproc
-
-/**********************************************************************
- *
- * This is where the bootblocks start us, set the ball rolling...
- *
- */
-NON_GPROF_ENTRY(btext)
-	/* At the end of our stack, we shall have free space - so store it */
-	movl	%esp,%ebx
-	movl	%ebx,R(avail_space)
-
-	lss	tmpstack,%esp
-
-	pushl   %esi
-	call	initvalues	
-	popl	%esi
-
-	/* Store the CPUID information */
-	xorl	%eax,%eax
-	cpuid					# cpuid 0
-	movl	%eax,R(cpu_high)		# highest capability
-	movl	%ebx,R(cpu_vendor)		# store vendor string
-	movl	%edx,R(cpu_vendor+4)
-	movl	%ecx,R(cpu_vendor+8)
-	movb	$0,R(cpu_vendor+12)
-
-	movl	$1,%eax
-	cpuid					# cpuid 1
-	movl	%eax,R(cpu_id)			# store cpu_id
-	movl	%ebx,R(cpu_procinfo)		# store cpu_procinfo
-	movl	%edx,R(cpu_feature)		# store cpu_feature
-	movl	%ecx,R(cpu_feature2)		# store cpu_feature2
-	rorl	$8,%eax				# extract family type
-	andl	$15,%eax
-	cmpl	$5,%eax
-	movl	$CPU_686,R(cpu)
-
-	movl	proc0kstack,%eax
-	leal	(KSTACK_PAGES*PAGE_SIZE-PCB_SIZE)(%eax),%esp
-	xorl    %ebp,%ebp               /* mark end of frames */
-#ifdef PAE
-	movl    IdlePDPT,%esi
-#else	
-	movl    IdlePTD,%esi
-#endif	
-	movl    %esi,(KSTACK_PAGES*PAGE_SIZE-PCB_SIZE+PCB_CR3)(%eax)
-	pushl	physfree
-	call	init386
-	addl	$4, %esp
-	call	mi_startup
-	/* NOTREACHED */
-	int	$3
-
-/*
- * Signal trampoline, copied to top of user stack
- */
-NON_GPROF_ENTRY(sigcode)
-	calll	*SIGF_HANDLER(%esp)
-	leal	SIGF_UC(%esp),%eax	/* get ucontext */
-	pushl	%eax
-	testl	$PSL_VM,UC_EFLAGS(%eax)
-	jne	1f
-	mov	UC_GS(%eax), %gs	/* restore %gs */
-1:
-	movl	$SYS_sigreturn,%eax
-	pushl	%eax			/* junk to fake return addr. */
-	int	$0x80			/* enter kernel with args */
-					/* on stack */
-1:
-	jmp	1b
-
-#ifdef COMPAT_FREEBSD4
-	ALIGN_TEXT
-freebsd4_sigcode:
-	calll	*SIGF_HANDLER(%esp)
-	leal	SIGF_UC4(%esp),%eax	/* get ucontext */
-	pushl	%eax
-	testl	$PSL_VM,UC4_EFLAGS(%eax)
-	jne	1f
-	mov	UC4_GS(%eax),%gs	/* restore %gs */
-1:
-	movl	$344,%eax		/* 4.x SYS_sigreturn */
-	pushl	%eax			/* junk to fake return addr. */
-	int	$0x80			/* enter kernel with args */
-					/* on stack */
-1:
-	jmp	1b
-#endif
-
-#ifdef COMPAT_43
-	ALIGN_TEXT
-osigcode:
-	call	*SIGF_HANDLER(%esp)	/* call signal handler */
-	lea	SIGF_SC(%esp),%eax	/* get sigcontext */
-	pushl	%eax
-	testl	$PSL_VM,SC_PS(%eax)
-	jne	9f
-	movl	SC_GS(%eax),%gs		/* restore %gs */
-9:
-	movl	$103,%eax		/* 3.x SYS_sigreturn */
-	pushl	%eax			/* junk to fake return addr. */
-	int	$0x80			/* enter kernel with args */
-0:	jmp	0b
-#endif /* COMPAT_43 */
-
-	ALIGN_TEXT
-esigcode:
-
-	.data
-	.globl	szsigcode
-szsigcode:
-	.long	esigcode-sigcode
-#ifdef COMPAT_FREEBSD4
-	.globl	szfreebsd4_sigcode
-szfreebsd4_sigcode:
-	.long	esigcode-freebsd4_sigcode
-#endif
-#ifdef COMPAT_43
-	.globl	szosigcode
-szosigcode:
-	.long	esigcode-osigcode
-#endif

Property changes on: head/sys/i386/xen/locore.s
___________________________________________________________________
Deleted: svn:eol-style
## -1 +0,0 ##
-native
\ No newline at end of property
Deleted: svn:keywords
## -1 +0,0 ##
-FreeBSD=%H
\ No newline at end of property
Deleted: svn:mime-type
## -1 +0,0 ##
-text/plain
\ No newline at end of property
Index: head/sys/i386/xen/exception.s
===================================================================
--- head/sys/i386/xen/exception.s	(revision 282273)
+++ head/sys/i386/xen/exception.s	(nonexistent)
@@ -1,494 +0,0 @@
-/*-
- * Copyright (c) 1989, 1990 William F. Jolitz.
- * Copyright (c) 1990 The Regents of the University of California.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 4. Neither the name of the University nor the names of its contributors
- *    may be used to endorse or promote products derived from this software
- *    without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * $FreeBSD$
- */
-
-#include "opt_apic.h"
-#include "opt_npx.h"
-
-#include <machine/asmacros.h>
-#include <machine/psl.h>
-#include <machine/trap.h>
-
-#include "assym.s"
-
-#define	SEL_RPL_MASK	0x0002
-#define __HYPERVISOR_iret	23
-	
-/* Offsets into shared_info_t. */
-
-#define evtchn_upcall_pending /* 0 */
-#define evtchn_upcall_mask       1
-
-#define	sizeof_vcpu_shift	6
-
-		
-#ifdef SMP
-#define GET_VCPU_INFO(reg)	movl PCPU(CPUID),reg			; \
-				shl  $sizeof_vcpu_shift,reg		; \
-				addl HYPERVISOR_shared_info,reg
-#else
-#define GET_VCPU_INFO(reg)	movl HYPERVISOR_shared_info,reg
-#endif
-
-#define __DISABLE_INTERRUPTS(reg)	movb $1,evtchn_upcall_mask(reg)
-#define __ENABLE_INTERRUPTS(reg)	movb $0,evtchn_upcall_mask(reg)
-#define DISABLE_INTERRUPTS(reg)	GET_VCPU_INFO(reg)			; \
-				__DISABLE_INTERRUPTS(reg)
-#define ENABLE_INTERRUPTS(reg)	GET_VCPU_INFO(reg)			; \
-				__ENABLE_INTERRUPTS(reg)
-#define __TEST_PENDING(reg)	testb $0xFF,evtchn_upcall_pending(reg)
-
-#define POPA \
-        popl %edi; \
-        popl %esi; \
-        popl %ebp; \
-        popl %ebx; \
-        popl %ebx; \
-        popl %edx; \
-        popl %ecx; \
-        popl %eax;
-
-	.text
-
-/*****************************************************************************/
-/* Trap handling                                                             */
-/*****************************************************************************/
-/*
- * Trap and fault vector routines.
- *
- * Most traps are 'trap gates', SDT_SYS386TGT.  A trap gate pushes state on
- * the stack that mostly looks like an interrupt, but does not disable 
- * interrupts.  A few of the traps we are use are interrupt gates, 
- * SDT_SYS386IGT, which are nearly the same thing except interrupts are
- * disabled on entry.
- *
- * The cpu will push a certain amount of state onto the kernel stack for
- * the current process.  The amount of state depends on the type of trap 
- * and whether the trap crossed rings or not.  See i386/include/frame.h.  
- * At the very least the current EFLAGS (status register, which includes 
- * the interrupt disable state prior to the trap), the code segment register,
- * and the return instruction pointer are pushed by the cpu.  The cpu 
- * will also push an 'error' code for certain traps.  We push a dummy 
- * error code for those traps where the cpu doesn't in order to maintain 
- * a consistent frame.  We also push a contrived 'trap number'.
- *
- * The cpu does not push the general registers, we must do that, and we 
- * must restore them prior to calling 'iret'.  The cpu adjusts the %cs and
- * %ss segment registers, but does not mess with %ds, %es, or %fs.  Thus we
- * must load them with appropriate values for supervisor mode operation.
- */
-
-MCOUNT_LABEL(user)
-MCOUNT_LABEL(btrap)
-
-#define	TRAP(a)		pushl $(a) ; jmp alltraps
-
-IDTVEC(div)
-	pushl $0; TRAP(T_DIVIDE)
-IDTVEC(dbg)
-	pushl $0; TRAP(T_TRCTRAP)
-IDTVEC(nmi)
-	pushl $0; TRAP(T_NMI)
-IDTVEC(bpt)
-	pushl $0; TRAP(T_BPTFLT)
-IDTVEC(ofl)
-	pushl $0; TRAP(T_OFLOW)
-IDTVEC(bnd)
-	pushl $0; TRAP(T_BOUND)
-IDTVEC(ill)
-	pushl $0; TRAP(T_PRIVINFLT)
-IDTVEC(dna)
-	pushl $0; TRAP(T_DNA)
-IDTVEC(fpusegm)
-	pushl $0; TRAP(T_FPOPFLT)
-IDTVEC(tss)
-	TRAP(T_TSSFLT)
-IDTVEC(missing)
-	TRAP(T_SEGNPFLT)
-IDTVEC(stk)
-	TRAP(T_STKFLT)
-IDTVEC(prot)
-	TRAP(T_PROTFLT)
-IDTVEC(page)
-	TRAP(T_PAGEFLT)
-IDTVEC(mchk)
-	pushl $0; TRAP(T_MCHK)
-IDTVEC(rsvd)
-	pushl $0; TRAP(T_RESERVED)
-IDTVEC(fpu)
-	pushl $0; TRAP(T_ARITHTRAP)
-IDTVEC(align)
-	TRAP(T_ALIGNFLT)
-IDTVEC(xmm)
-	pushl $0; TRAP(T_XMMFLT)
-
-IDTVEC(hypervisor_callback)
-	pushl $0; 
-	pushl $0; 
-	pushal
-	pushl	%ds
-	pushl	%es
-	pushl	%fs
-upcall_with_regs_pushed:
-	SET_KERNEL_SREGS
-	FAKE_MCOUNT(TF_EIP(%esp))
-call_evtchn_upcall:
-	movl	TF_EIP(%esp),%eax
-	cmpl	$scrit,%eax
-	jb	10f
-	cmpl	$ecrit,%eax
-	jb	critical_region_fixup
-	
-10:	pushl	%esp
-	call	xen_intr_handle_upcall
-	addl	$4,%esp
-
-	/*
-	 * Return via doreti to handle ASTs.
-	 */
-	MEXITCOUNT
-	jmp	doreti
-
-	
-hypervisor_callback_pending:
-	DISABLE_INTERRUPTS(%esi)				/*	cli */	
-	jmp	10b
-	/*
-	 * alltraps entry point.  Interrupts are enabled if this was a trap
-	 * gate (TGT), else disabled if this was an interrupt gate (IGT).
-	 * Note that int0x80_syscall is a trap gate.  Only page faults
-	 * use an interrupt gate.
-	 */
-	SUPERALIGN_TEXT
-	.globl	alltraps
-	.type	alltraps,@function
-alltraps:
-	pushal
-	pushl	%ds
-	pushl	%es
-	pushl	%fs
-
-alltraps_with_regs_pushed:
-	SET_KERNEL_SREGS
-	FAKE_MCOUNT(TF_EIP(%esp))
-
-calltrap:
-	push	%esp
-	call	trap
-	add	$4, %esp
-
-	/*
-	 * Return via doreti to handle ASTs.
-	 */
-	MEXITCOUNT
-	jmp	doreti
-
-/*
- * SYSCALL CALL GATE (old entry point for a.out binaries)
- *
- * The intersegment call has been set up to specify one dummy parameter.
- *
- * This leaves a place to put eflags so that the call frame can be
- * converted to a trap frame. Note that the eflags is (semi-)bogusly
- * pushed into (what will be) tf_err and then copied later into the
- * final spot. It has to be done this way because esp can't be just
- * temporarily altered for the pushfl - an interrupt might come in
- * and clobber the saved cs/eip.
- */
-	SUPERALIGN_TEXT
-IDTVEC(lcall_syscall)
-	pushfl				/* save eflags */
-	popl	8(%esp)			/* shuffle into tf_eflags */
-	pushl	$7			/* sizeof "lcall 7,0" */
-	subl	$4,%esp			/* skip over tf_trapno */
-	pushal
-	pushl	%ds
-	pushl	%es
-	pushl	%fs
-	SET_KERNEL_SREGS
-	FAKE_MCOUNT(TF_EIP(%esp))
-	pushl	%esp
-	call	syscall
-	add	$4, %esp
-	MEXITCOUNT
-	jmp	doreti
-
-/*
- * Call gate entry for FreeBSD ELF and Linux/NetBSD syscall (int 0x80)
- *
- * Even though the name says 'int0x80', this is actually a TGT (trap gate)
- * rather then an IGT (interrupt gate).  Thus interrupts are enabled on
- * entry just as they are for a normal syscall.
- */
-	SUPERALIGN_TEXT
-IDTVEC(int0x80_syscall)
-	pushl	$2			/* sizeof "int 0x80" */
-	pushl	$0xBEEF			/* for debug */
-	pushal
-	pushl	%ds
-	pushl	%es
-	pushl	%fs
-	SET_KERNEL_SREGS
-	FAKE_MCOUNT(TF_EIP(%esp))
-	pushl	%esp
-	call	syscall
-	add	$4, %esp
-	MEXITCOUNT
-	jmp	doreti
-
-ENTRY(fork_trampoline)
-	pushl	%esp			/* trapframe pointer */
-	pushl	%ebx			/* arg1 */
-	pushl	%esi			/* function */
-	call	fork_exit
-	addl	$12,%esp
-	/* cut from syscall */
-
-	/*
-	 * Return via doreti to handle ASTs.
-	 */
-	MEXITCOUNT
-	jmp	doreti
-
-
-/*
- * To efficiently implement classification of trap and interrupt handlers
- * for profiling, there must be only trap handlers between the labels btrap
- * and bintr, and only interrupt handlers between the labels bintr and
- * eintr.  This is implemented (partly) by including files that contain
- * some of the handlers.  Before including the files, set up a normal asm
- * environment so that the included files doen't need to know that they are
- * included.
- */
-
-	.data
-	.p2align 4
-	.text
-	SUPERALIGN_TEXT
-MCOUNT_LABEL(bintr)
-
-#ifdef DEV_APIC
-	.data
-	.p2align 4
-	.text
-	SUPERALIGN_TEXT
-
-#include <i386/i386/apic_vector.s>
-#endif
-
-	.data
-	.p2align 4
-	.text
-	SUPERALIGN_TEXT
-#include <i386/i386/vm86bios.s>
-
-	.text
-MCOUNT_LABEL(eintr)
-
-/*
- * void doreti(struct trapframe)
- *
- * Handle return from interrupts, traps and syscalls.
- */
-	.text
-	SUPERALIGN_TEXT
-	.type	doreti,@function
-doreti:
-	FAKE_MCOUNT($bintr)		/* init "from" bintr -> doreti */
-doreti_next:
-#ifdef notyet
-	/*
-	 * Check if ASTs can be handled now.  PSL_VM must be checked first
-	 * since segment registers only have an RPL in non-VM86 mode.
-	 */
-	testl	$PSL_VM,TF_EFLAGS(%esp)	/* are we in vm86 mode? */
-	jz	doreti_notvm86
-	movl	PCPU(CURPCB),%ecx
-	testl	$PCB_VM86CALL,PCB_FLAGS(%ecx)	/* are we in a vm86 call? */
-	jz	doreti_ast		/* can handle ASTS now if not */
-  	jmp	doreti_exit
-
-doreti_notvm86:
-#endif
-	testb	$SEL_RPL_MASK,TF_CS(%esp) /* are we returning to user mode? */
-	jz	doreti_exit		/* can't handle ASTs now if not */
-
-doreti_ast:
-	/*
-	 * Check for ASTs atomically with returning.  Disabling CPU
-	 * interrupts provides sufficient locking even in the SMP case,
-	 * since we will be informed of any new ASTs by an IPI.
-	 */
-	DISABLE_INTERRUPTS(%esi)				/*	cli */
-	movl	PCPU(CURTHREAD),%eax
-	testl	$TDF_ASTPENDING | TDF_NEEDRESCHED,TD_FLAGS(%eax)
-	je	doreti_exit
-	ENABLE_INTERRUPTS(%esi)	/* sti */
-	pushl	%esp			/* pass a pointer to the trapframe */
-	call	ast
-	add	$4,%esp
-	jmp	doreti_ast
-
-	/*
-	 * doreti_exit:	pop registers, iret.
-	 *
-	 *	The segment register pop is a special case, since it may
-	 *	fault if (for example) a sigreturn specifies bad segment
-	 *	registers.  The fault is handled in trap.c.
-	 */
-doreti_exit:
-	ENABLE_INTERRUPTS(%esi) # reenable event callbacks (sti)
-
-	.globl	scrit
-scrit:
-	__TEST_PENDING(%esi)
-        jnz	hypervisor_callback_pending	/* More to go  */
-
-	MEXITCOUNT
-
-	.globl	doreti_popl_fs
-doreti_popl_fs:
-	popl	%fs
-	.globl	doreti_popl_es
-doreti_popl_es:
-	popl	%es
-	.globl	doreti_popl_ds
-doreti_popl_ds:
-	popl	%ds
-
-	/*
-	 * This is important: as nothing is atomic over here (we can get
-	 * interrupted any time), we use the critical_region_fixup() in
-	 * order to figure out where out stack is. Therefore, do NOT use
-	 * 'popal' here without fixing up the table!
-	 */
-	POPA
-	addl	$8,%esp
-	.globl	doreti_iret
-doreti_iret:
-	jmp	hypercall_page + (__HYPERVISOR_iret * 32)
-	.globl	ecrit
-ecrit:
-  	/*
-	 * doreti_iret_fault and friends.  Alternative return code for
-	 * the case where we get a fault in the doreti_exit code
-	 * above.  trap() (i386/i386/trap.c) catches this specific
-	 * case, sends the process a signal and continues in the
-	 * corresponding place in the code below.
-	 */
-	ALIGN_TEXT
-	.globl	doreti_iret_fault
-doreti_iret_fault:
-	subl	$8,%esp
-	pushal
-	pushl	%ds
-	.globl	doreti_popl_ds_fault
-doreti_popl_ds_fault:
-	pushl	%es
-	.globl	doreti_popl_es_fault
-doreti_popl_es_fault:
-	pushl	%fs
-	.globl	doreti_popl_fs_fault
-doreti_popl_fs_fault:
-	movl	$0,TF_ERR(%esp)	/* XXX should be the error code */
-	movl	$T_PROTFLT,TF_TRAPNO(%esp)
-	jmp	alltraps_with_regs_pushed
-
-	/*
-# [How we do the fixup]. We want to merge the current stack frame with the
-# just-interrupted frame. How we do this depends on where in the critical
-# region the interrupted handler was executing, and so how many saved
-# registers are in each frame. We do this quickly using the lookup table
-# 'critical_fixup_table'. For each byte offset in the critical region, it
-# provides the number of bytes which have already been popped from the
-# interrupted stack frame.
-*/
-
-.globl critical_region_fixup
-critical_region_fixup:
-	addl $critical_fixup_table-scrit,%eax
-	movzbl (%eax),%eax    # %eax contains num bytes popped
-        movl  %esp,%esi
-        add  %eax,%esi        # %esi points at end of src region
-        movl  %esp,%edi
-        add  $0x40,%edi       # %edi points at end of dst region
-        movl  %eax,%ecx
-        shr  $2,%ecx          # convert bytes to words
-        je   16f              # skip loop if nothing to copy
-15:     subl $4,%esi          # pre-decrementing copy loop
-        subl $4,%edi
-        movl (%esi),%eax
-        movl %eax,(%edi)
-        loop 15b
-16:     movl %edi,%esp        # final %edi is top of merged stack
-	jmp  hypervisor_callback_pending
-
-
-critical_fixup_table:        
-.byte   0x0,0x0,0x0			#testb  $0x1,(%esi)
-.byte   0x0,0x0,0x0,0x0,0x0,0x0		#jne    ea 
-.byte   0x0,0x0				#pop    %fs
-.byte   0x04				#pop    %es
-.byte   0x08				#pop    %ds
-.byte   0x0c				#pop    %edi
-.byte   0x10	                        #pop    %esi
-.byte   0x14	                        #pop    %ebp
-.byte   0x18	                        #pop    %ebx
-.byte   0x1c	                        #pop    %ebx
-.byte   0x20	                        #pop    %edx
-.byte   0x24	                        #pop    %ecx
-.byte   0x28	                        #pop    %eax
-.byte   0x2c,0x2c,0x2c                  #add    $0x8,%esp
-#if 0
-	.byte   0x34	                        #iret   
-#endif
-.byte   0x34,0x34,0x34,0x34,0x34        #HYPERVISOR_iret 
-	
-	
-/* # Hypervisor uses this for application faults while it executes.*/
-ENTRY(failsafe_callback)
-	pushal
-	call xen_failsafe_handler
-/*#	call install_safe_pf_handler */
-        movl 28(%esp),%ebx
-1:      movl %ebx,%ds
-        movl 32(%esp),%ebx
-2:      movl %ebx,%es
-        movl 36(%esp),%ebx
-3:      movl %ebx,%fs
-        movl 40(%esp),%ebx
-4:      movl %ebx,%gs
-/*#        call install_normal_pf_handler */
-	popal
-	addl $12,%esp
-	iret
-
-

Property changes on: head/sys/i386/xen/exception.s
___________________________________________________________________
Deleted: svn:eol-style
## -1 +0,0 ##
-native
\ No newline at end of property
Deleted: svn:keywords
## -1 +0,0 ##
-FreeBSD=%H
\ No newline at end of property
Deleted: svn:mime-type
## -1 +0,0 ##
-text/plain
\ No newline at end of property
Index: head/sys/i386/xen/xen_machdep.c
===================================================================
--- head/sys/i386/xen/xen_machdep.c	(revision 282273)
+++ head/sys/i386/xen/xen_machdep.c	(nonexistent)
@@ -1,1236 +0,0 @@
-/*
- *
- * Copyright (c) 2004 Christian Limpach.
- * Copyright (c) 2004-2006,2008 Kip Macy
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- *    must display the following acknowledgement:
- *      This product includes software developed by Christian Limpach.
- * 4. The name of the author may not be used to endorse or promote products
- *    derived from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-
-#include <sys/param.h>
-#include <sys/systm.h>
-#include <sys/bus.h>
-#include <sys/ktr.h>
-#include <sys/lock.h>
-#include <sys/mount.h>
-#include <sys/malloc.h>
-#include <sys/mutex.h>
-#include <sys/kernel.h>
-#include <sys/proc.h>
-#include <sys/reboot.h>
-#include <sys/rwlock.h>
-#include <sys/sysproto.h>
-#include <sys/boot.h>
-
-#include <xen/xen-os.h>
-
-#include <vm/vm.h>
-#include <vm/pmap.h>
-#include <machine/segments.h>
-#include <machine/pcb.h>
-#include <machine/stdarg.h>
-#include <machine/vmparam.h>
-#include <machine/cpu.h>
-#include <machine/intr_machdep.h>
-#include <machine/md_var.h>
-#include <machine/asmacros.h>
-
-
-
-#include <xen/hypervisor.h>
-#include <xen/xenstore/xenstorevar.h>
-#include <machine/xen/xenvar.h>
-#include <machine/xen/xenfunc.h>
-#include <machine/xen/xenpmap.h>
-#include <machine/xen/xenfunc.h>
-#include <xen/interface/memory.h>
-#include <machine/xen/features.h>
-#ifdef SMP
-#include <machine/privatespace.h>
-#endif
-
-
-#include <vm/vm_page.h>
-
-
-#define	IDTVEC(name)	__CONCAT(X,name)
-
-extern inthand_t
-IDTVEC(div), IDTVEC(dbg), IDTVEC(nmi), IDTVEC(bpt), IDTVEC(ofl),
-	IDTVEC(bnd), IDTVEC(ill), IDTVEC(dna), IDTVEC(fpusegm),
-	IDTVEC(tss), IDTVEC(missing), IDTVEC(stk), IDTVEC(prot),
-	IDTVEC(page), IDTVEC(mchk), IDTVEC(rsvd), IDTVEC(fpu), IDTVEC(align),
-	IDTVEC(xmm), IDTVEC(lcall_syscall), IDTVEC(int0x80_syscall);
-
-
-int xendebug_flags; 
-start_info_t *xen_start_info;
-start_info_t *HYPERVISOR_start_info;
-shared_info_t *HYPERVISOR_shared_info;
-xen_pfn_t *xen_machine_phys = machine_to_phys_mapping;
-xen_pfn_t *xen_phys_machine;
-xen_pfn_t *xen_pfn_to_mfn_frame_list[16];
-xen_pfn_t *xen_pfn_to_mfn_frame_list_list;
-int preemptable, init_first;
-extern unsigned int avail_space;
-int xen_vector_callback_enabled = 0;
-enum xen_domain_type xen_domain_type = XEN_PV_DOMAIN;
-
-void ni_cli(void);
-void ni_sti(void);
-
-
-void
-ni_cli(void)
-{
-	CTR0(KTR_SPARE2, "ni_cli disabling interrupts");
-	__asm__("pushl %edx;"
-		"pushl %eax;"
-		);
-	__cli();
-	__asm__("popl %eax;"
-		"popl %edx;"
-		);
-}
-
-
-void
-ni_sti(void)
-{
-	__asm__("pushl %edx;"
-		"pushl %esi;"
-		"pushl %eax;"
-		);
-	__sti();
-	__asm__("popl %eax;"
-		"popl %esi;"
-		"popl %edx;"
-		);
-}
-
-void
-force_evtchn_callback(void)
-{
-    (void)HYPERVISOR_xen_version(0, NULL);
-}
-
-/*
- * Modify the cmd_line by converting ',' to NULLs so that it is in a  format 
- * suitable for the static env vars.
- */
-char *
-xen_setbootenv(char *cmd_line)
-{
-	char *cmd_line_next;
-    
-        /* Skip leading spaces */
-        for (; *cmd_line == ' '; cmd_line++);
-
-	xc_printf("xen_setbootenv(): cmd_line='%s'\n", cmd_line);
-
-	for (cmd_line_next = cmd_line; strsep(&cmd_line_next, ",") != NULL;);
-	return cmd_line;
-}
-
-int 
-xen_boothowto(char *envp)
-{
-	int i, howto = 0;
-
-	/* get equivalents from the environment */
-	for (i = 0; howto_names[i].ev != NULL; i++)
-		if (kern_getenv(howto_names[i].ev) != NULL)
-			howto |= howto_names[i].mask;
-	return howto;
-}
-
-
-#define XPQUEUE_SIZE 128
-
-struct mmu_log {
-	char *file;
-	int line;
-};
-
-#ifdef SMP
-/* per-cpu queues and indices */
-#ifdef INVARIANTS
-static struct mmu_log xpq_queue_log[XEN_LEGACY_MAX_VCPUS][XPQUEUE_SIZE];
-#endif
-
-static int xpq_idx[XEN_LEGACY_MAX_VCPUS];
-static mmu_update_t xpq_queue[XEN_LEGACY_MAX_VCPUS][XPQUEUE_SIZE];
-
-#define	XPQ_QUEUE_LOG xpq_queue_log[vcpu]
-#define	XPQ_QUEUE xpq_queue[vcpu]
-#define	XPQ_IDX xpq_idx[vcpu]
-#define	SET_VCPU() int vcpu = smp_processor_id()
-#else
-	
-static mmu_update_t xpq_queue[XPQUEUE_SIZE];
-#ifdef INVARIANTS
-static struct mmu_log xpq_queue_log[XPQUEUE_SIZE];
-#endif
-static int xpq_idx = 0;
-
-#define	XPQ_QUEUE_LOG xpq_queue_log
-#define	XPQ_QUEUE xpq_queue
-#define	XPQ_IDX xpq_idx
-#define	SET_VCPU()
-#endif /* !SMP */
-
-#define XPQ_IDX_INC atomic_add_int(&XPQ_IDX, 1);
-
-#if 0
-static void
-xen_dump_queue(void)
-{
-	int _xpq_idx = XPQ_IDX;
-	int i;
-
-	if (_xpq_idx <= 1)
-		return;
-
-	xc_printf("xen_dump_queue(): %u entries\n", _xpq_idx);
-	for (i = 0; i < _xpq_idx; i++) {
-		xc_printf(" val: %llx ptr: %llx\n", XPQ_QUEUE[i].val,
-		    XPQ_QUEUE[i].ptr);
-	}
-}
-#endif
-
-
-static __inline void
-_xen_flush_queue(void)
-{
-	SET_VCPU();
-	int _xpq_idx = XPQ_IDX;
-	int error, i;
-
-#ifdef INVARIANTS
-	if (__predict_true(gdtset))
-		CRITICAL_ASSERT(curthread);
-#endif
-
-	XPQ_IDX = 0;
-	/* Make sure index is cleared first to avoid double updates. */
-	error = HYPERVISOR_mmu_update((mmu_update_t *)&XPQ_QUEUE,
-				      _xpq_idx, NULL, DOMID_SELF);
-    
-#if 0
-	if (__predict_true(gdtset))
-	for (i = _xpq_idx; i > 0;) {
-		if (i >= 3) {
-			CTR6(KTR_PMAP, "mmu:val: %lx ptr: %lx val: %lx "
-			    "ptr: %lx val: %lx ptr: %lx",
-			    (XPQ_QUEUE[i-1].val & 0xffffffff),
-			    (XPQ_QUEUE[i-1].ptr & 0xffffffff),
-			    (XPQ_QUEUE[i-2].val & 0xffffffff),
-			    (XPQ_QUEUE[i-2].ptr & 0xffffffff),
-			    (XPQ_QUEUE[i-3].val & 0xffffffff),
-			    (XPQ_QUEUE[i-3].ptr & 0xffffffff));
-			    i -= 3;
-		} else if (i == 2) {
-			CTR4(KTR_PMAP, "mmu: val: %lx ptr: %lx val: %lx ptr: %lx",
-			    (XPQ_QUEUE[i-1].val & 0xffffffff),
-			    (XPQ_QUEUE[i-1].ptr & 0xffffffff),
-			    (XPQ_QUEUE[i-2].val & 0xffffffff),
-			    (XPQ_QUEUE[i-2].ptr & 0xffffffff));
-			i = 0;
-		} else {
-			CTR2(KTR_PMAP, "mmu: val: %lx ptr: %lx", 
-			    (XPQ_QUEUE[i-1].val & 0xffffffff),
-			    (XPQ_QUEUE[i-1].ptr & 0xffffffff));
-			i = 0;
-		}
-	}
-#endif	
-	if (__predict_false(error < 0)) {
-		for (i = 0; i < _xpq_idx; i++)
-			printf("val: %llx ptr: %llx\n",
-			    XPQ_QUEUE[i].val, XPQ_QUEUE[i].ptr);
-		panic("Failed to execute MMU updates: %d", error);
-	}
-
-}
-
-void
-xen_flush_queue(void)
-{
-	SET_VCPU();
-
-	if (__predict_true(gdtset))
-		critical_enter();
-	if (XPQ_IDX != 0) _xen_flush_queue();
-	if (__predict_true(gdtset))
-		critical_exit();
-}
-
-static __inline void
-xen_increment_idx(void)
-{
-	SET_VCPU();
-
-	XPQ_IDX++;
-	if (__predict_false(XPQ_IDX == XPQUEUE_SIZE))
-		xen_flush_queue();
-}
-
-void
-xen_check_queue(void)
-{
-#ifdef INVARIANTS
-	SET_VCPU();
-	
-	KASSERT(XPQ_IDX == 0, ("pending operations XPQ_IDX=%d", XPQ_IDX));
-#endif
-}
-
-void
-xen_invlpg(vm_offset_t va)
-{
-	struct mmuext_op op;
-	op.cmd = MMUEXT_INVLPG_ALL;
-	op.arg1.linear_addr = va & ~PAGE_MASK;
-	PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
-}
-
-void
-xen_load_cr3(u_int val)
-{
-	struct mmuext_op op;
-#ifdef INVARIANTS
-	SET_VCPU();
-	
-	KASSERT(XPQ_IDX == 0, ("pending operations XPQ_IDX=%d", XPQ_IDX));
-#endif
-	op.cmd = MMUEXT_NEW_BASEPTR;
-	op.arg1.mfn = xpmap_ptom(val) >> PAGE_SHIFT;
-	PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
-}
-
-#ifdef KTR
-static __inline u_int
-rebp(void)
-{
-	u_int	data;
-
-	__asm __volatile("movl 4(%%ebp),%0" : "=r" (data));	
-	return (data);
-}
-#endif
-
-u_int
-read_eflags(void)
-{
-        vcpu_info_t *_vcpu;
-	u_int eflags;
-
-	eflags = _read_eflags();
-        _vcpu = &HYPERVISOR_shared_info->vcpu_info[smp_processor_id()]; 
-	if (_vcpu->evtchn_upcall_mask)
-		eflags &= ~PSL_I;
-
-	return (eflags);
-}
-
-void
-write_eflags(u_int eflags)
-{
-	u_int intr;
-
-	CTR2(KTR_SPARE2, "%x xen_restore_flags eflags %x", rebp(), eflags);
-	intr = ((eflags & PSL_I) == 0);
-	__restore_flags(intr);
-	_write_eflags(eflags);
-}
-
-void
-xen_cli(void)
-{
-	CTR1(KTR_SPARE2, "%x xen_cli disabling interrupts", rebp());
-	__cli();
-}
-
-void
-xen_sti(void)
-{
-	CTR1(KTR_SPARE2, "%x xen_sti enabling interrupts", rebp());
-	__sti();
-}
-
-u_int
-xen_rcr2(void)
-{
-
-	return (HYPERVISOR_shared_info->vcpu_info[curcpu].arch.cr2);
-}
-
-void
-_xen_machphys_update(vm_paddr_t mfn, vm_paddr_t pfn, char *file, int line)
-{
-	SET_VCPU();
-	
-	if (__predict_true(gdtset))
-		critical_enter();
-	XPQ_QUEUE[XPQ_IDX].ptr = (mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE;
-	XPQ_QUEUE[XPQ_IDX].val = pfn;
-#ifdef INVARIANTS
-	XPQ_QUEUE_LOG[XPQ_IDX].file = file;
-	XPQ_QUEUE_LOG[XPQ_IDX].line = line;	
-#endif		
-	xen_increment_idx();
-	if (__predict_true(gdtset))
-		critical_exit();
-}
-
-extern struct rwlock pvh_global_lock;
-
-void
-_xen_queue_pt_update(vm_paddr_t ptr, vm_paddr_t val, char *file, int line)
-{
-	SET_VCPU();
-
-	if (__predict_true(gdtset))	
-		rw_assert(&pvh_global_lock, RA_WLOCKED);
-
-	KASSERT((ptr & 7) == 0, ("misaligned update"));
-	
-	if (__predict_true(gdtset))
-		critical_enter();
-	
-	XPQ_QUEUE[XPQ_IDX].ptr = ((uint64_t)ptr) | MMU_NORMAL_PT_UPDATE;
-	XPQ_QUEUE[XPQ_IDX].val = (uint64_t)val;
-#ifdef INVARIANTS
-	XPQ_QUEUE_LOG[XPQ_IDX].file = file;
-	XPQ_QUEUE_LOG[XPQ_IDX].line = line;	
-#endif	
-	xen_increment_idx();
-	if (__predict_true(gdtset))
-		critical_exit();
-}
-
-void 
-xen_pgdpt_pin(vm_paddr_t ma)
-{
-	struct mmuext_op op;
-	op.cmd = MMUEXT_PIN_L3_TABLE;
-	op.arg1.mfn = ma >> PAGE_SHIFT;
-	xen_flush_queue();
-	PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
-}
-
-void 
-xen_pgd_pin(vm_paddr_t ma)
-{
-	struct mmuext_op op;
-	op.cmd = MMUEXT_PIN_L2_TABLE;
-	op.arg1.mfn = ma >> PAGE_SHIFT;
-	xen_flush_queue();
-	PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
-}
-
-void 
-xen_pgd_unpin(vm_paddr_t ma)
-{
-	struct mmuext_op op;
-	op.cmd = MMUEXT_UNPIN_TABLE;
-	op.arg1.mfn = ma >> PAGE_SHIFT;
-	xen_flush_queue();
-	PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
-}
-
-void 
-xen_pt_pin(vm_paddr_t ma)
-{
-	struct mmuext_op op;
-	op.cmd = MMUEXT_PIN_L1_TABLE;
-	op.arg1.mfn = ma >> PAGE_SHIFT;
-	xen_flush_queue();
-	PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
-}
-
-void 
-xen_pt_unpin(vm_paddr_t ma)
-{
-	struct mmuext_op op;
-	op.cmd = MMUEXT_UNPIN_TABLE;
-	op.arg1.mfn = ma >> PAGE_SHIFT;
-	xen_flush_queue();
-	PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
-}
-
-void 
-xen_set_ldt(vm_paddr_t ptr, unsigned long len)
-{
-	struct mmuext_op op;
-	op.cmd = MMUEXT_SET_LDT;
-	op.arg1.linear_addr = ptr;
-	op.arg2.nr_ents = len;
-	xen_flush_queue();
-	PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
-}
-
-void xen_tlb_flush(void)
-{
-	struct mmuext_op op;
-	op.cmd = MMUEXT_TLB_FLUSH_LOCAL;
-	xen_flush_queue();
-	PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
-}
-
-void
-xen_update_descriptor(union descriptor *table, union descriptor *entry)
-{
-	vm_paddr_t pa;
-	pt_entry_t *ptp;
-
-	ptp = vtopte((vm_offset_t)table);
-	pa = (*ptp & PG_FRAME) | ((vm_offset_t)table & PAGE_MASK);
-	if (HYPERVISOR_update_descriptor(pa, *(uint64_t *)entry))
-		panic("HYPERVISOR_update_descriptor failed\n");
-}
-
-
-#if 0
-/*
- * Bitmap is indexed by page number. If bit is set, the page is part of a
- * xen_create_contiguous_region() area of memory.
- */
-unsigned long *contiguous_bitmap;
-
-static void 
-contiguous_bitmap_set(unsigned long first_page, unsigned long nr_pages)
-{
-	unsigned long start_off, end_off, curr_idx, end_idx;
-
-	curr_idx  = first_page / BITS_PER_LONG;
-	start_off = first_page & (BITS_PER_LONG-1);
-	end_idx   = (first_page + nr_pages) / BITS_PER_LONG;
-	end_off   = (first_page + nr_pages) & (BITS_PER_LONG-1);
-
-	if (curr_idx == end_idx) {
-		contiguous_bitmap[curr_idx] |=
-			((1UL<<end_off)-1) & -(1UL<<start_off);
-	} else {
-		contiguous_bitmap[curr_idx] |= -(1UL<<start_off);
-		while ( ++curr_idx < end_idx )
-			contiguous_bitmap[curr_idx] = ~0UL;
-		contiguous_bitmap[curr_idx] |= (1UL<<end_off)-1;
-	}
-}
-
-static void 
-contiguous_bitmap_clear(unsigned long first_page, unsigned long nr_pages)
-{
-	unsigned long start_off, end_off, curr_idx, end_idx;
-
-	curr_idx  = first_page / BITS_PER_LONG;
-	start_off = first_page & (BITS_PER_LONG-1);
-	end_idx   = (first_page + nr_pages) / BITS_PER_LONG;
-	end_off   = (first_page + nr_pages) & (BITS_PER_LONG-1);
-
-	if (curr_idx == end_idx) {
-		contiguous_bitmap[curr_idx] &=
-			-(1UL<<end_off) | ((1UL<<start_off)-1);
-	} else {
-		contiguous_bitmap[curr_idx] &= (1UL<<start_off)-1;
-		while ( ++curr_idx != end_idx )
-			contiguous_bitmap[curr_idx] = 0;
-		contiguous_bitmap[curr_idx] &= -(1UL<<end_off);
-	}
-}
-#endif
-
-/* Ensure multi-page extents are contiguous in machine memory. */
-int 
-xen_create_contiguous_region(vm_page_t pages, int npages)
-{
-	unsigned long  mfn, i, flags;
-	int order;
-	struct xen_memory_reservation reservation = {
-		.nr_extents   = 1,
-		.extent_order = 0,
-		.domid        = DOMID_SELF
-	};
-	set_xen_guest_handle(reservation.extent_start, &mfn);
-	
-	balloon_lock(flags);
-
-	/* can currently only handle power of two allocation */
-	PANIC_IF(ffs(npages) != fls(npages));
-
-	/* 0. determine order */
-	order = (ffs(npages) == fls(npages)) ? fls(npages) - 1 : fls(npages);
-	
-	/* 1. give away machine pages. */
-	for (i = 0; i < (1 << order); i++) {
-		int pfn;
-		pfn = VM_PAGE_TO_PHYS(&pages[i]) >> PAGE_SHIFT;
-		mfn = PFNTOMFN(pfn);
-		PFNTOMFN(pfn) = INVALID_P2M_ENTRY;
-		PANIC_IF(HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation) != 1);
-	}
-
-
-	/* 2. Get a new contiguous memory extent. */
-	reservation.extent_order = order;
-	/* xenlinux hardcodes this because of aacraid - maybe set to 0 if we're not 
-	 * running with a broxen driver XXXEN
-	 */
-	reservation.address_bits = 31; 
-	if (HYPERVISOR_memory_op(XENMEM_increase_reservation, &reservation) != 1)
-		goto fail;
-
-	/* 3. Map the new extent in place of old pages. */
-	for (i = 0; i < (1 << order); i++) {
-		int pfn;
-		pfn = VM_PAGE_TO_PHYS(&pages[i]) >> PAGE_SHIFT;
-		xen_machphys_update(mfn+i, pfn);
-		PFNTOMFN(pfn) = mfn+i;
-	}
-
-	xen_tlb_flush();
-
-#if 0
-	contiguous_bitmap_set(VM_PAGE_TO_PHYS(&pages[0]) >> PAGE_SHIFT, 1UL << order);
-#endif
-
-	balloon_unlock(flags);
-
-	return 0;
-
- fail:
-	reservation.extent_order = 0;
-	reservation.address_bits = 0;
-
-	for (i = 0; i < (1 << order); i++) {
-		int pfn;
-		pfn = VM_PAGE_TO_PHYS(&pages[i]) >> PAGE_SHIFT;
-		PANIC_IF(HYPERVISOR_memory_op(
-			XENMEM_increase_reservation, &reservation) != 1);
-		xen_machphys_update(mfn, pfn);
-		PFNTOMFN(pfn) = mfn;
-	}
-
-	xen_tlb_flush();
-
-	balloon_unlock(flags);
-
-	return ENOMEM;
-}
-
-void 
-xen_destroy_contiguous_region(void *addr, int npages)
-{
-	unsigned long  mfn, i, flags, order, pfn0;
-	struct xen_memory_reservation reservation = {
-		.nr_extents   = 1,
-		.extent_order = 0,
-		.domid        = DOMID_SELF
-	};
-	set_xen_guest_handle(reservation.extent_start, &mfn);
-	
-	pfn0 = vtophys(addr) >> PAGE_SHIFT;
-#if 0
-	scrub_pages(vstart, 1 << order);
-#endif
-	/* can currently only handle power of two allocation */
-	PANIC_IF(ffs(npages) != fls(npages));
-
-	/* 0. determine order */
-	order = (ffs(npages) == fls(npages)) ? fls(npages) - 1 : fls(npages);
-
-	balloon_lock(flags);
-
-#if 0
-	contiguous_bitmap_clear(vtophys(addr) >> PAGE_SHIFT, 1UL << order);
-#endif
-
-	/* 1. Zap current PTEs, giving away the underlying pages. */
-	for (i = 0; i < (1 << order); i++) {
-		int pfn;
-		uint64_t new_val = 0;
-		pfn = vtomach((char *)addr + i*PAGE_SIZE) >> PAGE_SHIFT;
-
-		PANIC_IF(HYPERVISOR_update_va_mapping((vm_offset_t)((char *)addr + (i * PAGE_SIZE)), new_val, 0));
-		PFNTOMFN(pfn) = INVALID_P2M_ENTRY;
-		PANIC_IF(HYPERVISOR_memory_op(
-			XENMEM_decrease_reservation, &reservation) != 1);
-	}
-
-	/* 2. Map new pages in place of old pages. */
-	for (i = 0; i < (1 << order); i++) {
-		int pfn;
-		uint64_t new_val;
-		pfn = pfn0 + i;
-		PANIC_IF(HYPERVISOR_memory_op(XENMEM_increase_reservation, &reservation) != 1);
-		
-		new_val = mfn << PAGE_SHIFT;
-		PANIC_IF(HYPERVISOR_update_va_mapping((vm_offset_t)addr + (i * PAGE_SIZE), 
-						      new_val, PG_KERNEL));
-		xen_machphys_update(mfn, pfn);
-		PFNTOMFN(pfn) = mfn;
-	}
-
-	xen_tlb_flush();
-
-	balloon_unlock(flags);
-}
-
-extern  vm_offset_t	proc0kstack;
-extern int vm86paddr, vm86phystk;
-char *bootmem_start, *bootmem_current, *bootmem_end;
-
-pteinfo_t *pteinfo_list;
-void initvalues(start_info_t *startinfo);
-
-void *
-bootmem_alloc(unsigned int size) 
-{
-	char *retptr;
-	
-	retptr = bootmem_current;
-	PANIC_IF(retptr + size > bootmem_end);
-	bootmem_current += size;
-
-	return retptr;
-}
-
-void 
-bootmem_free(void *ptr, unsigned int size) 
-{
-	char *tptr;
-	
-	tptr = ptr;
-	PANIC_IF(tptr != bootmem_current - size ||
-		bootmem_current - size < bootmem_start);	
-
-	bootmem_current -= size;
-}
-
-#if 0
-static vm_paddr_t
-xpmap_mtop2(vm_paddr_t mpa)
-{
-        return ((machine_to_phys_mapping[mpa >> PAGE_SHIFT] << PAGE_SHIFT)
-            ) | (mpa & ~PG_FRAME);
-}
-
-static pd_entry_t 
-xpmap_get_bootpde(vm_paddr_t va)
-{
-
-        return ((pd_entry_t *)xen_start_info->pt_base)[va >> 22];
-}
-
-static pd_entry_t
-xpmap_get_vbootpde(vm_paddr_t va)
-{
-        pd_entry_t pde;
-
-        pde = xpmap_get_bootpde(va);
-        if ((pde & PG_V) == 0)
-                return (pde & ~PG_FRAME);
-        return (pde & ~PG_FRAME) |
-                (xpmap_mtop2(pde & PG_FRAME) + KERNBASE);
-}
-
-static pt_entry_t 8*
-xpmap_get_bootptep(vm_paddr_t va)
-{
-        pd_entry_t pde;
-
-        pde = xpmap_get_vbootpde(va);
-        if ((pde & PG_V) == 0)
-                return (void *)-1;
-#define PT_MASK         0x003ff000      /* page table address bits */
-        return &(((pt_entry_t *)(pde & PG_FRAME))[(va & PT_MASK) >> PAGE_SHIFT]);
-}
-
-static pt_entry_t
-xpmap_get_bootpte(vm_paddr_t va)
-{
-
-        return xpmap_get_bootptep(va)[0];
-}
-#endif
-
-
-#ifdef ADD_ISA_HOLE
-static void
-shift_phys_machine(unsigned long *phys_machine, int nr_pages)
-{
-
-        unsigned long *tmp_page, *current_page, *next_page;
-	int i;
-
-	tmp_page = bootmem_alloc(PAGE_SIZE);
-	current_page = phys_machine + nr_pages - (PAGE_SIZE/sizeof(unsigned long));  
-	next_page = current_page - (PAGE_SIZE/sizeof(unsigned long));  
-	bcopy(phys_machine, tmp_page, PAGE_SIZE);
-
-	while (current_page > phys_machine) { 
-	        /*  save next page */
-	        bcopy(next_page, tmp_page, PAGE_SIZE);
-	        /* shift down page */
-		bcopy(current_page, next_page, PAGE_SIZE);
-	        /*  finish swap */
-	        bcopy(tmp_page, current_page, PAGE_SIZE);
-	  
-		current_page -= (PAGE_SIZE/sizeof(unsigned long));
-		next_page -= (PAGE_SIZE/sizeof(unsigned long));
-	}
-	bootmem_free(tmp_page, PAGE_SIZE);	
-	
-	for (i = 0; i < nr_pages; i++) {
-	        xen_machphys_update(phys_machine[i], i);
-	}
-	memset(phys_machine, INVALID_P2M_ENTRY, PAGE_SIZE);
-
-}
-#endif /* ADD_ISA_HOLE */
-
-/*
- * Build a directory of the pages that make up our Physical to Machine
- * mapping table. The Xen suspend/restore code uses this to find our
- * mapping table.
- */
-static void
-init_frame_list_list(void *arg)
-{
-	unsigned long nr_pages = xen_start_info->nr_pages;
-#define FPP	(PAGE_SIZE/sizeof(xen_pfn_t))
-	int i, j, k;
-
-	xen_pfn_to_mfn_frame_list_list = malloc(PAGE_SIZE, M_DEVBUF, M_WAITOK);
-	for (i = 0, j = 0, k = -1; i < nr_pages;
-	     i += FPP, j++) {
-		if ((j & (FPP - 1)) == 0) {
-			k++;
-			xen_pfn_to_mfn_frame_list[k] =
-				malloc(PAGE_SIZE, M_DEVBUF, M_WAITOK);
-			xen_pfn_to_mfn_frame_list_list[k] =
-				VTOMFN(xen_pfn_to_mfn_frame_list[k]);
-			j = 0;
-		}
-		xen_pfn_to_mfn_frame_list[k][j] = 
-			VTOMFN(&xen_phys_machine[i]);
-	}
-
-	HYPERVISOR_shared_info->arch.max_pfn = nr_pages;
-	HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list
-		= VTOMFN(xen_pfn_to_mfn_frame_list_list);
-}	
-SYSINIT(init_fll, SI_SUB_DEVFS, SI_ORDER_ANY, init_frame_list_list, NULL);
-
-extern unsigned long physfree;
-
-int pdir, curoffset;
-extern int nkpt;
-
-extern uint32_t kernbase;
-
-void
-initvalues(start_info_t *startinfo)
-{ 
-	vm_offset_t cur_space, cur_space_pt;
-	struct physdev_set_iopl set_iopl;
-	
-	int l3_pages, l2_pages, l1_pages, offset;
-	vm_paddr_t console_page_ma, xen_store_ma;
-	vm_offset_t tmpva;
-	vm_paddr_t shinfo;
-#ifdef PAE
-	vm_paddr_t IdlePDPTma, IdlePDPTnewma;
-	vm_paddr_t IdlePTDnewma[4];
-	pd_entry_t *IdlePDPTnew, *IdlePTDnew;
-	vm_paddr_t IdlePTDma[4];
-#else
-	vm_paddr_t IdlePTDma[1];
-#endif
-	unsigned long i;
-	int ncpus = MAXCPU;
-
-	nkpt = min(
-		min(
-			max((startinfo->nr_pages >> NPGPTD_SHIFT), nkpt),
-		    NPGPTD*NPDEPG - KPTDI),
-		    (HYPERVISOR_VIRT_START - KERNBASE) >> PDRSHIFT);
-
-	HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_4gb_segments);	
-#ifdef notyet
-	/*
-	 * need to install handler
-	 */
-	HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_4gb_segments_notify);	
-#endif	
-	xen_start_info = startinfo;
-	HYPERVISOR_start_info = startinfo;
-	xen_phys_machine = (xen_pfn_t *)startinfo->mfn_list;
-
-	IdlePTD = (pd_entry_t *)((uint8_t *)startinfo->pt_base + PAGE_SIZE);
-	l1_pages = 0;
-	
-#ifdef PAE
-	l3_pages = 1;
-	l2_pages = 0;
-	IdlePDPT = (pd_entry_t *)startinfo->pt_base;
-	IdlePDPTma = VTOM(startinfo->pt_base);
-	for (i = (KERNBASE >> 30);
-	     (i < 4) && (IdlePDPT[i] != 0); i++)
-			l2_pages++;
-	/*
-	 * Note that only one page directory has been allocated at this point.
-	 * Thus, if KERNBASE
-	 */
-	for (i = 0; i < l2_pages; i++)
-		IdlePTDma[i] = VTOM(IdlePTD + i*PAGE_SIZE);
-
-	l2_pages = (l2_pages == 0) ? 1 : l2_pages;
-#else	
-	l3_pages = 0;
-	l2_pages = 1;
-#endif
-	for (i = (((KERNBASE>>18) & PAGE_MASK)>>PAGE_SHIFT);
-	     (i<l2_pages*NPDEPG) && (i<(VM_MAX_KERNEL_ADDRESS>>PDRSHIFT)); i++) {
-		
-		if (IdlePTD[i] == 0)
-			break;
-		l1_pages++;
-	}
-
-	/* number of pages allocated after the pts + 1*/;
-	cur_space = xen_start_info->pt_base +
-	    (l3_pages + l2_pages + l1_pages + 1)*PAGE_SIZE;
-
-	xc_printf("initvalues(): wooh - availmem=%x,%x\n", avail_space,
-	    cur_space);
-
-	xc_printf("KERNBASE=%x,pt_base=%lx, VTOPFN(base)=%x, nr_pt_frames=%lx\n",
-	    KERNBASE,xen_start_info->pt_base, VTOPFN(xen_start_info->pt_base),
-	    xen_start_info->nr_pt_frames);
-	xendebug_flags = 0; /* 0xffffffff; */
-
-#ifdef ADD_ISA_HOLE
-	shift_phys_machine(xen_phys_machine, xen_start_info->nr_pages);
-#endif
-	XENPRINTF("IdlePTD %p\n", IdlePTD);
-	XENPRINTF("nr_pages: %ld shared_info: 0x%lx flags: 0x%x pt_base: 0x%lx "
-		  "mod_start: 0x%lx mod_len: 0x%lx\n",
-		  xen_start_info->nr_pages, xen_start_info->shared_info, 
-		  xen_start_info->flags, xen_start_info->pt_base, 
-		  xen_start_info->mod_start, xen_start_info->mod_len);
-
-#ifdef PAE
-	IdlePDPTnew = (pd_entry_t *)cur_space; cur_space += PAGE_SIZE;
-	bzero(IdlePDPTnew, PAGE_SIZE);
-
-	IdlePDPTnewma =  VTOM(IdlePDPTnew);
-	IdlePTDnew = (pd_entry_t *)cur_space; cur_space += 4*PAGE_SIZE;
-	bzero(IdlePTDnew, 4*PAGE_SIZE);
-
-	for (i = 0; i < 4; i++) 
-		IdlePTDnewma[i] = VTOM((uint8_t *)IdlePTDnew + i*PAGE_SIZE);
-	/*
-	 * L3
-	 *
-	 * Copy the 4 machine addresses of the new PTDs in to the PDPT
-	 * 
-	 */
-	for (i = 0; i < 4; i++)
-		IdlePDPTnew[i] = IdlePTDnewma[i] | PG_V;
-
-	__asm__("nop;");
-	/*
-	 *
-	 * re-map the new PDPT read-only
-	 */
-	PT_SET_MA(IdlePDPTnew, IdlePDPTnewma | PG_V);
-	/*
-	 * 
-	 * Unpin the current PDPT
-	 */
-	xen_pt_unpin(IdlePDPTma);
-
-#endif  /* PAE */
-
-	/* Map proc0's KSTACK */
-	proc0kstack = cur_space; cur_space += (KSTACK_PAGES * PAGE_SIZE);
-	xc_printf("proc0kstack=%u\n", proc0kstack);
-
-	/* vm86/bios stack */
-	cur_space += PAGE_SIZE;
-
-	/* Map space for the vm86 region */
-	vm86paddr = (vm_offset_t)cur_space;
-	cur_space += (PAGE_SIZE * 3);
-
-	/* allocate 4 pages for bootmem allocator */
-	bootmem_start = bootmem_current = (char *)cur_space;
-	cur_space += (4 * PAGE_SIZE);
-	bootmem_end = (char *)cur_space;
-	
-	/* allocate pages for gdt */
-	gdt = (union descriptor *)cur_space;
-	cur_space += PAGE_SIZE*ncpus;
-
-        /* allocate page for ldt */
-	ldt = (union descriptor *)cur_space; cur_space += PAGE_SIZE;
-	cur_space += PAGE_SIZE;
-	
-	/* unmap remaining pages from initial chunk
-	 *
-	 */
-	for (tmpva = cur_space; tmpva < (((uint32_t)&kernbase) + (l1_pages<<PDRSHIFT));
-	     tmpva += PAGE_SIZE) {
-		bzero((char *)tmpva, PAGE_SIZE);
-		PT_SET_MA(tmpva, (vm_paddr_t)0);
-	}
-
-	PT_UPDATES_FLUSH();
-
-	memcpy(((uint8_t *)IdlePTDnew) + ((unsigned int)(KERNBASE >> 18)),
-	    ((uint8_t *)IdlePTD) + ((KERNBASE >> 18) & PAGE_MASK),
-	    l1_pages*sizeof(pt_entry_t));
-
-	for (i = 0; i < 4; i++) {
-		PT_SET_MA((uint8_t *)IdlePTDnew + i*PAGE_SIZE,
-		    IdlePTDnewma[i] | PG_V);
-	}
-	xen_load_cr3(VTOP(IdlePDPTnew));
-	xen_pgdpt_pin(VTOM(IdlePDPTnew));
-
-	/* allocate remainder of nkpt pages */
-	cur_space_pt = cur_space;
-	for (offset = (KERNBASE >> PDRSHIFT), i = l1_pages; i < nkpt;
-	     i++, cur_space += PAGE_SIZE) {
-		pdir = (offset + i) / NPDEPG;
-		curoffset = ((offset + i) % NPDEPG);
-		if (((offset + i) << PDRSHIFT) == VM_MAX_KERNEL_ADDRESS)
-			break;
-
-		/*
-		 * make sure that all the initial page table pages
-		 * have been zeroed
-		 */
-		PT_SET_MA(cur_space, VTOM(cur_space) | PG_V | PG_RW);
-		bzero((char *)cur_space, PAGE_SIZE);
-		PT_SET_MA(cur_space, (vm_paddr_t)0);
-		xen_pt_pin(VTOM(cur_space));
-		xen_queue_pt_update((vm_paddr_t)(IdlePTDnewma[pdir] +
-			curoffset*sizeof(vm_paddr_t)), 
-		    VTOM(cur_space) | PG_KERNEL);
-		PT_UPDATES_FLUSH();
-	}
-	
-	for (i = 0; i < 4; i++) {
-		pdir = (PTDPTDI + i) / NPDEPG;
-		curoffset = (PTDPTDI + i) % NPDEPG;
-
-		xen_queue_pt_update((vm_paddr_t)(IdlePTDnewma[pdir] +
-			curoffset*sizeof(vm_paddr_t)), 
-		    IdlePTDnewma[i] | PG_V);
-	}
-
-	PT_UPDATES_FLUSH();
-	
-	IdlePTD = IdlePTDnew;
-	IdlePDPT = IdlePDPTnew;
-	IdlePDPTma = IdlePDPTnewma;
-	
-	HYPERVISOR_shared_info = (shared_info_t *)cur_space;
-	cur_space += PAGE_SIZE;
-
-	xen_store = (struct xenstore_domain_interface *)cur_space;
-	cur_space += PAGE_SIZE;
-
-	console_page = (char *)cur_space;
-	cur_space += PAGE_SIZE;
-	
-	/*
-	 * shared_info is an unsigned long so this will randomly break if
-	 * it is allocated above 4GB - I guess people are used to that
-	 * sort of thing with Xen ... sigh
-	 */
-	shinfo = xen_start_info->shared_info;
-	PT_SET_MA(HYPERVISOR_shared_info, shinfo | PG_KERNEL);
-	
-	xc_printf("#4\n");
-
-	xen_store_ma = (((vm_paddr_t)xen_start_info->store_mfn) << PAGE_SHIFT);
-	PT_SET_MA(xen_store, xen_store_ma | PG_KERNEL);
-	console_page_ma = (((vm_paddr_t)xen_start_info->console.domU.mfn) << PAGE_SHIFT);
-	PT_SET_MA(console_page, console_page_ma | PG_KERNEL);
-
-	xc_printf("#5\n");
-
-	set_iopl.iopl = 1;
-	PANIC_IF(HYPERVISOR_physdev_op(PHYSDEVOP_SET_IOPL, &set_iopl));
-	xc_printf("#6\n");
-#if 0
-	/* add page table for KERNBASE */
-	xen_queue_pt_update(IdlePTDma + KPTDI*sizeof(vm_paddr_t), 
-			    VTOM(cur_space) | PG_KERNEL);
-	xen_flush_queue();
-#ifdef PAE	
-	xen_queue_pt_update(pdir_shadow_ma[3] + KPTDI*sizeof(vm_paddr_t), 
-			    VTOM(cur_space) | PG_V | PG_A);
-#else
-	xen_queue_pt_update(pdir_shadow_ma + KPTDI*sizeof(vm_paddr_t), 
-			    VTOM(cur_space) | PG_V | PG_A);
-#endif	
-	xen_flush_queue();
-	cur_space += PAGE_SIZE;
-	xc_printf("#6\n");
-#endif /* 0 */	
-#ifdef notyet
-	if (xen_start_info->flags & SIF_INITDOMAIN) {
-		/* Map first megabyte */
-		for (i = 0; i < (256 << PAGE_SHIFT); i += PAGE_SIZE) 
-			PT_SET_MA(KERNBASE + i, i | PG_KERNEL | PG_NC_PCD);
-		xen_flush_queue();
-	}
-#endif
-	/*
-	 * re-map kernel text read-only
-	 *
-	 */
-	for (i = (((vm_offset_t)&btext) & ~PAGE_MASK);
-	     i < (((vm_offset_t)&etext) & ~PAGE_MASK); i += PAGE_SIZE)
-		PT_SET_MA(i, VTOM(i) | PG_V | PG_A);
-	
-	xc_printf("#7\n");
-	physfree = VTOP(cur_space);
-	init_first = physfree >> PAGE_SHIFT;
-	IdlePTD = (pd_entry_t *)VTOP(IdlePTD);
-	IdlePDPT = (pd_entry_t *)VTOP(IdlePDPT);
-	setup_xen_features();
-	xc_printf("#8, proc0kstack=%u\n", proc0kstack);
-}
-
-
-trap_info_t trap_table[] = {
-	{ 0,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(div)},
-	{ 1,   0|4, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(dbg)},
-	{ 3,   3|4, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(bpt)},
-	{ 4,   3, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(ofl)},
-	/* This is UPL on Linux and KPL on BSD */
-	{ 5,   3, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(bnd)},
-	{ 6,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(ill)},
-	{ 7,   0|4, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(dna)},
-	/*
-	 * { 8,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(XXX)},
-	 *   no handler for double fault
-	 */
-	{ 9,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(fpusegm)},
-	{10,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(tss)},
-	{11,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(missing)},
-	{12,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(stk)},
-	{13,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(prot)},
-	{14,   0|4, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(page)},
-	{15,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(rsvd)},
-	{16,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(fpu)},
-	{17,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(align)},
-	{18,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(mchk)},
-	{19,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(xmm)},
-	{0x80, 3, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(int0x80_syscall)},
-	{  0, 0,           0, 0 }
-};
-
-/* Perform a multicall and check that individual calls succeeded. */
-int
-HYPERVISOR_multicall(struct multicall_entry * call_list, int nr_calls)
-{
-	int ret = 0;
-	int i;
-
-	/* Perform the multicall. */
-	PANIC_IF(_HYPERVISOR_multicall(call_list, nr_calls));
-
-	/* Check the results of individual hypercalls. */
-	for (i = 0; i < nr_calls; i++)
-		if (__predict_false(call_list[i].result < 0))
-			ret++;
-	if (__predict_false(ret > 0))
-		panic("%d multicall(s) failed: cpu %d\n",
-		    ret, smp_processor_id());
-
-	/* If we didn't panic already, everything succeeded. */
-	return (0);
-}
-
-/********** CODE WORTH KEEPING ABOVE HERE *****************/ 
-
-void xen_failsafe_handler(void);
-
-void
-xen_failsafe_handler(void)
-{
-
-	panic("xen_failsafe_handler called!\n");
-}
-
-void xen_handle_thread_switch(struct pcb *pcb);
-
-/* This is called by cpu_switch() when switching threads. */
-/* The pcb arg refers to the process control block of the */
-/* next thread which is to run */
-void
-xen_handle_thread_switch(struct pcb *pcb)
-{
-    uint32_t *a = (uint32_t *)&PCPU_GET(fsgs_gdt)[0];
-    uint32_t *b = (uint32_t *)&pcb->pcb_fsd;
-    multicall_entry_t mcl[3];
-    int i = 0;
-
-    /* Notify Xen of task switch */
-    mcl[i].op = __HYPERVISOR_stack_switch;
-    mcl[i].args[0] = GSEL(GDATA_SEL, SEL_KPL);
-    mcl[i++].args[1] = (unsigned long)pcb;
-
-    /* Check for update of fsd */
-    if (*a != *b || *(a+1) != *(b+1)) {
-        mcl[i].op = __HYPERVISOR_update_descriptor;
-        *(uint64_t *)&mcl[i].args[0] = vtomach((vm_offset_t)a);
-        *(uint64_t *)&mcl[i++].args[2] = *(uint64_t *)b;
-    }    
-
-    a += 2;
-    b += 2;
-
-    /* Check for update of gsd */
-    if (*a != *b || *(a+1) != *(b+1)) {
-        mcl[i].op = __HYPERVISOR_update_descriptor;
-        *(uint64_t *)&mcl[i].args[0] = vtomach((vm_offset_t)a);
-        *(uint64_t *)&mcl[i++].args[2] = *(uint64_t *)b;
-    }    
-
-    (void)HYPERVISOR_multicall(mcl, i);
-}

Property changes on: head/sys/i386/xen/xen_machdep.c
___________________________________________________________________
Deleted: svn:eol-style
## -1 +0,0 ##
-native
\ No newline at end of property
Deleted: svn:keywords
## -1 +0,0 ##
-FreeBSD=%H
\ No newline at end of property
Deleted: svn:mime-type
## -1 +0,0 ##
-text/plain
\ No newline at end of property
Index: head/sys/i386/xen/mp_machdep.c
===================================================================
--- head/sys/i386/xen/mp_machdep.c	(revision 282273)
+++ head/sys/i386/xen/mp_machdep.c	(nonexistent)
@@ -1,1292 +0,0 @@
-/*-
- * Copyright (c) 1996, by Steve Passe
- * Copyright (c) 2008, by Kip Macy
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. The name of the developer may NOT be used to endorse or promote products
- *    derived from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-
-#include "opt_apic.h"
-#include "opt_cpu.h"
-#include "opt_kstack_pages.h"
-#include "opt_mp_watchdog.h"
-#include "opt_pmap.h"
-#include "opt_sched.h"
-#include "opt_smp.h"
-
-#if !defined(lint)
-#if !defined(SMP)
-#error How did you get here?
-#endif
-
-#ifndef DEV_APIC
-#error The apic device is required for SMP, add "device apic" to your config file.
-#endif
-#if defined(CPU_DISABLE_CMPXCHG) && !defined(COMPILING_LINT)
-#error SMP not supported with CPU_DISABLE_CMPXCHG
-#endif
-#endif /* not lint */
-
-#include <sys/param.h>
-#include <sys/systm.h>
-#include <sys/bus.h>
-#include <sys/cons.h>	/* cngetc() */
-#include <sys/cpuset.h>
-#ifdef GPROF 
-#include <sys/gmon.h>
-#endif
-#include <sys/kernel.h>
-#include <sys/ktr.h>
-#include <sys/lock.h>
-#include <sys/malloc.h>
-#include <sys/memrange.h>
-#include <sys/mutex.h>
-#include <sys/pcpu.h>
-#include <sys/proc.h>
-#include <sys/rwlock.h>
-#include <sys/sched.h>
-#include <sys/smp.h>
-#include <sys/sysctl.h>
-
-#include <vm/vm.h>
-#include <vm/vm_param.h>
-#include <vm/pmap.h>
-#include <vm/vm_kern.h>
-#include <vm/vm_extern.h>
-#include <vm/vm_page.h>
-
-#include <x86/apicreg.h>
-#include <machine/md_var.h>
-#include <machine/mp_watchdog.h>
-#include <machine/pcb.h>
-#include <machine/psl.h>
-#include <machine/smp.h>
-#include <machine/specialreg.h>
-#include <machine/pcpu.h>
-
-#include <xen/xen-os.h>
-#include <xen/evtchn.h>
-#include <xen/xen_intr.h>
-#include <xen/hypervisor.h>
-#include <xen/interface/vcpu.h>
-
-/*---------------------------- Extern Declarations ---------------------------*/
-extern	struct pcpu __pcpu[];
-
-extern void Xhypervisor_callback(void);
-extern void failsafe_callback(void);
-
-/*--------------------------- Forward Declarations ---------------------------*/
-static driver_filter_t	smp_reschedule_interrupt;
-static driver_filter_t	smp_call_function_interrupt;
-static int		start_all_aps(void);
-static int		start_ap(int apic_id);
-static void		release_aps(void *dummy);
-
-/*---------------------------------- Macros ----------------------------------*/
-#define	IPI_TO_IDX(ipi) ((ipi) - APIC_IPI_INTS)
-
-/*-------------------------------- Local Types -------------------------------*/
-typedef void call_data_func_t(uintptr_t , uintptr_t);
-
-struct xen_ipi_handler
-{
-	driver_filter_t	*filter;
-	const char	*description;
-};
-
-enum {
-	RESCHEDULE_VECTOR,
-	CALL_FUNCTION_VECTOR,
-};
-
-/*-------------------------------- Global Data -------------------------------*/
-static u_int	hyperthreading_cpus;
-static cpuset_t	hyperthreading_cpus_mask;
-
-int	mp_naps;		/* # of Applications processors */
-int	boot_cpu_id = -1;	/* designated BSP */
-
-int bootAP;
-static union descriptor *bootAPgdt;
-
-/* Free these after use */
-void *bootstacks[MAXCPU];
-
-struct pcb stoppcbs[MAXCPU];
-
-/* Variables needed for SMP tlb shootdown. */
-vm_offset_t smp_tlb_addr1;
-vm_offset_t smp_tlb_addr2;
-volatile int smp_tlb_wait;
-
-static u_int logical_cpus;
-static volatile cpuset_t ipi_nmi_pending;
-
-/* used to hold the AP's until we are ready to release them */
-struct mtx ap_boot_mtx;
-
-/* Set to 1 once we're ready to let the APs out of the pen. */
-volatile int aps_ready = 0;
-
-/*
- * Store data from cpu_add() until later in the boot when we actually setup
- * the APs.
- */
-struct cpu_info cpu_info[MAX_APIC_ID + 1];
-int cpu_apic_ids[MAXCPU];
-int apic_cpuids[MAX_APIC_ID + 1];
-
-/* Holds pending bitmap based IPIs per CPU */
-volatile u_int cpu_ipi_pending[MAXCPU];
-
-int cpu_logical;
-int cpu_cores;
-
-static const struct xen_ipi_handler xen_ipis[] = 
-{
-	[RESCHEDULE_VECTOR]	= { smp_reschedule_interrupt,	"resched"  },
-	[CALL_FUNCTION_VECTOR]	= { smp_call_function_interrupt,"callfunc" }
-};
-
-/*------------------------------- Per-CPU Data -------------------------------*/
-DPCPU_DEFINE(xen_intr_handle_t, ipi_handle[nitems(xen_ipis)]);
-DPCPU_DEFINE(struct vcpu_info *, vcpu_info);
-
-/*------------------------------ Implementation ------------------------------*/
-struct cpu_group *
-cpu_topo(void)
-{
-	if (cpu_cores == 0)
-		cpu_cores = 1;
-	if (cpu_logical == 0)
-		cpu_logical = 1;
-	if (mp_ncpus % (cpu_cores * cpu_logical) != 0) {
-		printf("WARNING: Non-uniform processors.\n");
-		printf("WARNING: Using suboptimal topology.\n");
-		return (smp_topo_none());
-	}
-	/*
-	 * No multi-core or hyper-threaded.
-	 */
-	if (cpu_logical * cpu_cores == 1)
-		return (smp_topo_none());
-	/*
-	 * Only HTT no multi-core.
-	 */
-	if (cpu_logical > 1 && cpu_cores == 1)
-		return (smp_topo_1level(CG_SHARE_L1, cpu_logical, CG_FLAG_HTT));
-	/*
-	 * Only multi-core no HTT.
-	 */
-	if (cpu_cores > 1 && cpu_logical == 1)
-		return (smp_topo_1level(CG_SHARE_NONE, cpu_cores, 0));
-	/*
-	 * Both HTT and multi-core.
-	 */
-	return (smp_topo_2level(CG_SHARE_NONE, cpu_cores,
-	    CG_SHARE_L1, cpu_logical, CG_FLAG_HTT));
-}
-
-/*
- * Calculate usable address in base memory for AP trampoline code.
- */
-u_int
-mp_bootaddress(u_int basemem)
-{
-
-	return (basemem);
-}
-
-void
-cpu_add(u_int apic_id, char boot_cpu)
-{
-
-	if (apic_id > MAX_APIC_ID) {
-		panic("SMP: APIC ID %d too high", apic_id);
-		return;
-	}
-	KASSERT(cpu_info[apic_id].cpu_present == 0, ("CPU %d added twice",
-	    apic_id));
-	cpu_info[apic_id].cpu_present = 1;
-	if (boot_cpu) {
-		KASSERT(boot_cpu_id == -1,
-		    ("CPU %d claims to be BSP, but CPU %d already is", apic_id,
-		    boot_cpu_id));
-		boot_cpu_id = apic_id;
-		cpu_info[apic_id].cpu_bsp = 1;
-	}
-	if (mp_ncpus < MAXCPU)
-		mp_ncpus++;
-	if (bootverbose)
-		printf("SMP: Added CPU %d (%s)\n", apic_id, boot_cpu ? "BSP" :
-		    "AP");
-}
-
-void
-cpu_mp_setmaxid(void)
-{
-
-	mp_maxid = MAXCPU - 1;
-}
-
-int
-cpu_mp_probe(void)
-{
-
-	/*
-	 * Always record BSP in CPU map so that the mbuf init code works
-	 * correctly.
-	 */
-	CPU_SETOF(0, &all_cpus);
-	if (mp_ncpus == 0) {
-		/*
-		 * No CPUs were found, so this must be a UP system.  Setup
-		 * the variables to represent a system with a single CPU
-		 * with an id of 0.
-		 */
-		mp_ncpus = 1;
-		return (0);
-	}
-
-	/* At least one CPU was found. */
-	if (mp_ncpus == 1) {
-		/*
-		 * One CPU was found, so this must be a UP system with
-		 * an I/O APIC.
-		 */
-		return (0);
-	}
-
-	/* At least two CPUs were found. */
-	return (1);
-}
-
-/*
- * Initialize the IPI handlers and start up the AP's.
- */
-void
-cpu_mp_start(void)
-{
-	int i;
-
-	/* Initialize the logical ID to APIC ID table. */
-	for (i = 0; i < MAXCPU; i++) {
-		cpu_apic_ids[i] = -1;
-		cpu_ipi_pending[i] = 0;
-	}
-
-	/* Set boot_cpu_id if needed. */
-	if (boot_cpu_id == -1) {
-		boot_cpu_id = PCPU_GET(apic_id);
-		cpu_info[boot_cpu_id].cpu_bsp = 1;
-	} else
-		KASSERT(boot_cpu_id == PCPU_GET(apic_id),
-		    ("BSP's APIC ID doesn't match boot_cpu_id"));
-	cpu_apic_ids[0] = boot_cpu_id;
-	apic_cpuids[boot_cpu_id] = 0;
-
-	assign_cpu_ids();
-
-	/* Start each Application Processor */
-	start_all_aps();
-
-	/* Setup the initial logical CPUs info. */
-	logical_cpus = 0;
-	CPU_ZERO(&logical_cpus_mask);
-	if (cpu_feature & CPUID_HTT)
-		logical_cpus = (cpu_procinfo & CPUID_HTT_CORES) >> 16;
-
-	set_interrupt_apic_ids();
-}
-
-
-static void
-iv_rendezvous(uintptr_t a, uintptr_t b)
-{
-	smp_rendezvous_action();
-}
-
-static void
-iv_invltlb(uintptr_t a, uintptr_t b)
-{
-	xen_tlb_flush();
-}
-
-static void
-iv_invlpg(uintptr_t a, uintptr_t b)
-{
-	xen_invlpg(a);
-}
-
-static void
-iv_invlrng(uintptr_t a, uintptr_t b)
-{
-	vm_offset_t start = (vm_offset_t)a;
-	vm_offset_t end = (vm_offset_t)b;
-
-	while (start < end) {
-		xen_invlpg(start);
-		start += PAGE_SIZE;
-	}
-}
-
-
-static void
-iv_invlcache(uintptr_t a, uintptr_t b)
-{
-
-	wbinvd();
-	atomic_add_int(&smp_tlb_wait, 1);
-}
-
-/*
- * These start from "IPI offset" APIC_IPI_INTS
- */
-static call_data_func_t *ipi_vectors[5] = 
-{
-	iv_rendezvous,
-	iv_invltlb,
-	iv_invlpg,
-	iv_invlrng,
-	iv_invlcache,
-};
-
-/*
- * Reschedule call back. Nothing to do,
- * all the work is done automatically when
- * we return from the interrupt.
- */
-static int
-smp_reschedule_interrupt(void *unused)
-{
-	int cpu = PCPU_GET(cpuid);
-	u_int ipi_bitmap;
-
-	ipi_bitmap = atomic_readandclear_int(&cpu_ipi_pending[cpu]);
-
-	if (ipi_bitmap & (1 << IPI_PREEMPT)) {
-#ifdef COUNT_IPIS
-		(*ipi_preempt_counts[cpu])++;
-#endif
-		sched_preempt(curthread);
-	}
-
-	if (ipi_bitmap & (1 << IPI_AST)) {
-#ifdef COUNT_IPIS
-		(*ipi_ast_counts[cpu])++;
-#endif
-		/* Nothing to do for AST */
-	}	
-	return (FILTER_HANDLED);
-}
-
-struct _call_data {
-	uint16_t func_id;
-	uint16_t wait;
-	uintptr_t arg1;
-	uintptr_t arg2;
-	atomic_t started;
-	atomic_t finished;
-};
-
-static struct _call_data *call_data;
-
-static int
-smp_call_function_interrupt(void *unused)
-{	
-	call_data_func_t *func;
-	uintptr_t arg1 = call_data->arg1;
-	uintptr_t arg2 = call_data->arg2;
-	int wait = call_data->wait;
-	atomic_t *started = &call_data->started;
-	atomic_t *finished = &call_data->finished;
-
-	/* We only handle function IPIs, not bitmap IPIs */
-	if (call_data->func_id < APIC_IPI_INTS ||
-	    call_data->func_id > IPI_BITMAP_VECTOR)
-		panic("invalid function id %u", call_data->func_id);
-	
-	func = ipi_vectors[IPI_TO_IDX(call_data->func_id)];
-	/*
-	 * Notify initiating CPU that I've grabbed the data and am
-	 * about to execute the function
-	 */
-	mb();
-	atomic_inc(started);
-	/*
-	 * At this point the info structure may be out of scope unless wait==1
-	 */
-	(*func)(arg1, arg2);
-
-	if (wait) {
-		mb();
-		atomic_inc(finished);
-	}
-	atomic_add_int(&smp_tlb_wait, 1);
-	return (FILTER_HANDLED);
-}
-
-/*
- * Print various information about the SMP system hardware and setup.
- */
-void
-cpu_mp_announce(void)
-{
-	int i, x;
-
-	/* List CPUs */
-	printf(" cpu0 (BSP): APIC ID: %2d\n", boot_cpu_id);
-	for (i = 1, x = 0; x <= MAX_APIC_ID; x++) {
-		if (!cpu_info[x].cpu_present || cpu_info[x].cpu_bsp)
-			continue;
-		if (cpu_info[x].cpu_disabled)
-			printf("  cpu (AP): APIC ID: %2d (disabled)\n", x);
-		else {
-			KASSERT(i < mp_ncpus,
-			    ("mp_ncpus and actual cpus are out of whack"));
-			printf(" cpu%d (AP): APIC ID: %2d\n", i++, x);
-		}
-	}
-}
-
-static int
-xen_smp_cpu_init(unsigned int cpu)
-{
-	xen_intr_handle_t *ipi_handle;
-	const struct xen_ipi_handler *ipi;
-	int idx, rc;
-
-	ipi_handle = DPCPU_ID_GET(cpu, ipi_handle);
-	for (ipi = xen_ipis, idx = 0; idx < nitems(xen_ipis); ipi++, idx++) {
-
-		/*
-		 * The PCPU variable pc_device is not initialized on i386 PV,
-		 * so we have to use the root_bus device in order to setup
-		 * the IPIs.
-		 */
-		rc = xen_intr_alloc_and_bind_ipi(root_bus, cpu,
-		    ipi->filter, INTR_TYPE_TTY, &ipi_handle[idx]);
-		if (rc != 0) {
-			printf("Unable to allocate a XEN IPI port. "
-			    "Error %d\n", rc);
-			break;
-		}
-		xen_intr_describe(ipi_handle[idx], "%s", ipi->description);
-	}
-
-	for (;idx < nitems(xen_ipis); idx++)
-		    ipi_handle[idx] = NULL;
-
-	if (rc == 0)
-		return (0);
-
-	/* Either all are successfully mapped, or none at all. */
-	for (idx = 0; idx < nitems(xen_ipis); idx++) {
-		if (ipi_handle[idx] == NULL)
-			continue;
-
-		xen_intr_unbind(ipi_handle[idx]);
-		ipi_handle[idx] = NULL;
-	}
-
-	return (rc);
-}
-
-static void
-xen_smp_intr_init_cpus(void *unused)
-{
-	int i;
-	    
-	for (i = 0; i < mp_ncpus; i++)
-		xen_smp_cpu_init(i);
-}
-
-static void
-xen_smp_intr_setup_cpus(void *unused)
-{
-	int i;
-
-	for (i = 0; i < mp_ncpus; i++)
-		DPCPU_ID_SET(i, vcpu_info,
-		    &HYPERVISOR_shared_info->vcpu_info[i]);
-}
-
-#define MTOPSIZE (1<<(14 + PAGE_SHIFT))
-
-/*
- * AP CPU's call this to initialize themselves.
- */
-void
-init_secondary(void)
-{
-	vm_offset_t addr;
-	u_int	cpuid;
-	int	gsel_tss;
-	
-	
-	/* bootAP is set in start_ap() to our ID. */
-	PCPU_SET(currentldt, _default_ldt);
-	gsel_tss = GSEL(GPROC0_SEL, SEL_KPL);
-#if 0
-	gdt[bootAP * NGDT + GPROC0_SEL].sd.sd_type = SDT_SYS386TSS;
-#endif
-	PCPU_SET(common_tss.tss_esp0, 0); /* not used until after switch */
-	PCPU_SET(common_tss.tss_ss0, GSEL(GDATA_SEL, SEL_KPL));
-	PCPU_SET(common_tss.tss_ioopt, (sizeof (struct i386tss)) << 16);
-#if 0
-	PCPU_SET(tss_gdt, &gdt[bootAP * NGDT + GPROC0_SEL].sd);
-
-	PCPU_SET(common_tssd, *PCPU_GET(tss_gdt));
-#endif
-	PCPU_SET(fsgs_gdt, &gdt[GUFS_SEL].sd);
-
-	/*
-	 * Set to a known state:
-	 * Set by mpboot.s: CR0_PG, CR0_PE
-	 * Set by cpu_setregs: CR0_NE, CR0_MP, CR0_TS, CR0_WP, CR0_AM
-	 */
-	/*
-	 * signal our startup to the BSP.
-	 */
-	mp_naps++;
-
-	/* Spin until the BSP releases the AP's. */
-	while (!aps_ready)
-		ia32_pause();
-
-	/* BSP may have changed PTD while we were waiting */
-	invltlb();
-	for (addr = 0; addr < NKPT * NBPDR - 1; addr += PAGE_SIZE)
-		invlpg(addr);
-
-#if 0
-	/* set up SSE/NX */
-	initializecpu();
-#endif
-
-	/* set up FPU state on the AP */
-	npxinit(false);
-#if 0
-	/* A quick check from sanity claus */
-	if (PCPU_GET(apic_id) != lapic_id()) {
-		printf("SMP: cpuid = %d\n", PCPU_GET(cpuid));
-		printf("SMP: actual apic_id = %d\n", lapic_id());
-		printf("SMP: correct apic_id = %d\n", PCPU_GET(apic_id));
-		panic("cpuid mismatch! boom!!");
-	}
-#endif
-	
-	/* Initialize curthread. */
-	KASSERT(PCPU_GET(idlethread) != NULL, ("no idle thread"));
-	PCPU_SET(curthread, PCPU_GET(idlethread));
-
-	mtx_lock_spin(&ap_boot_mtx);
-#if 0
-	
-	/* Init local apic for irq's */
-	lapic_setup(1);
-#endif
-	smp_cpus++;
-
-	cpuid = PCPU_GET(cpuid);
-	CTR1(KTR_SMP, "SMP: AP CPU #%d Launched", cpuid);
-	printf("SMP: AP CPU #%d Launched!\n", cpuid);
-
-	/* Determine if we are a logical CPU. */
-	if (logical_cpus > 1 && PCPU_GET(apic_id) % logical_cpus != 0)
-		CPU_SET(cpuid, &logical_cpus_mask);
-	
-	/* Determine if we are a hyperthread. */
-	if (hyperthreading_cpus > 1 &&
-	    PCPU_GET(apic_id) % hyperthreading_cpus != 0)
-		CPU_SET(cpuid, &hyperthreading_cpus_mask);
-#if 0
-	if (bootverbose)
-		lapic_dump("AP");
-#endif
-	if (smp_cpus == mp_ncpus) {
-		/* enable IPI's, tlb shootdown, freezes etc */
-		atomic_store_rel_int(&smp_started, 1);
-	}
-
-	mtx_unlock_spin(&ap_boot_mtx);
-
-	/* wait until all the AP's are up */
-	while (smp_started == 0)
-		ia32_pause();
-
-	PCPU_SET(curthread, PCPU_GET(idlethread));
-
-	/* Start per-CPU event timers. */
-	cpu_initclocks_ap();
-
-	/* enter the scheduler */
-	sched_throw(NULL);
-
-	panic("scheduler returned us to %s", __func__);
-	/* NOTREACHED */
-}
-
-/*******************************************************************
- * local functions and data
- */
-
-/*
- * We tell the I/O APIC code about all the CPUs we want to receive
- * interrupts.  If we don't want certain CPUs to receive IRQs we
- * can simply not tell the I/O APIC code about them in this function.
- * We also do not tell it about the BSP since it tells itself about
- * the BSP internally to work with UP kernels and on UP machines.
- */
-void
-set_interrupt_apic_ids(void)
-{
-	u_int i, apic_id;
-
-	for (i = 0; i < MAXCPU; i++) {
-		apic_id = cpu_apic_ids[i];
-		if (apic_id == -1)
-			continue;
-		if (cpu_info[apic_id].cpu_bsp)
-			continue;
-		if (cpu_info[apic_id].cpu_disabled)
-			continue;
-
-		/* Don't let hyperthreads service interrupts. */
-		if (hyperthreading_cpus > 1 &&
-		    apic_id % hyperthreading_cpus != 0)
-			continue;
-
-		intr_add_cpu(i);
-	}
-}
-
-/*
- * Assign logical CPU IDs to local APICs.
- */
-void
-assign_cpu_ids(void)
-{
-	u_int i;
-
-	/* Check for explicitly disabled CPUs. */
-	for (i = 0; i <= MAX_APIC_ID; i++) {
-		if (!cpu_info[i].cpu_present || cpu_info[i].cpu_bsp)
-			continue;
-
-		/* Don't use this CPU if it has been disabled by a tunable. */
-		if (resource_disabled("lapic", i)) {
-			cpu_info[i].cpu_disabled = 1;
-			continue;
-		}
-	}
-
-	/*
-	 * Assign CPU IDs to local APIC IDs and disable any CPUs
-	 * beyond MAXCPU.  CPU 0 has already been assigned to the BSP,
-	 * so we only have to assign IDs for APs.
-	 */
-	mp_ncpus = 1;
-	for (i = 0; i <= MAX_APIC_ID; i++) {
-		if (!cpu_info[i].cpu_present || cpu_info[i].cpu_bsp ||
-		    cpu_info[i].cpu_disabled)
-			continue;
-
-		if (mp_ncpus < MAXCPU) {
-			cpu_apic_ids[mp_ncpus] = i;
-			apic_cpuids[i] = mp_ncpus;
-			mp_ncpus++;
-		} else
-			cpu_info[i].cpu_disabled = 1;
-	}
-	KASSERT(mp_maxid >= mp_ncpus - 1,
-	    ("%s: counters out of sync: max %d, count %d", __func__, mp_maxid,
-	    mp_ncpus));		
-}
-
-/*
- * start each AP in our list
- */
-/* Lowest 1MB is already mapped: don't touch*/
-#define TMPMAP_START 1
-int
-start_all_aps(void)
-{
-	int x,apic_id, cpu;
-	struct pcpu *pc;
-	
-	mtx_init(&ap_boot_mtx, "ap boot", NULL, MTX_SPIN);
-
-	/* set up temporary P==V mapping for AP boot */
-	/* XXX this is a hack, we should boot the AP on its own stack/PTD */
-
-	/* start each AP */
-	for (cpu = 1; cpu < mp_ncpus; cpu++) {
-		apic_id = cpu_apic_ids[cpu];
-
-
-		bootAP = cpu;
-		bootAPgdt = gdt + (512*cpu);
-
-		/* Get per-cpu data */
-		pc = &__pcpu[bootAP];
-		pcpu_init(pc, bootAP, sizeof(struct pcpu));
-		dpcpu_init((void *)kmem_malloc(kernel_arena, DPCPU_SIZE,
-		    M_WAITOK | M_ZERO), bootAP);
-		pc->pc_apic_id = cpu_apic_ids[bootAP];
-		pc->pc_vcpu_id = cpu_apic_ids[bootAP];
-		pc->pc_prvspace = pc;
-		pc->pc_curthread = 0;
-
-		gdt_segs[GPRIV_SEL].ssd_base = (int) pc;
-		gdt_segs[GPROC0_SEL].ssd_base = (int) &pc->pc_common_tss;
-		
-		PT_SET_MA(bootAPgdt, VTOM(bootAPgdt) | PG_V | PG_RW);
-		bzero(bootAPgdt, PAGE_SIZE);
-		for (x = 0; x < NGDT; x++)
-			ssdtosd(&gdt_segs[x], &bootAPgdt[x].sd);
-		PT_SET_MA(bootAPgdt, vtomach(bootAPgdt) | PG_V);
-#ifdef notyet
-		
-                if (HYPERVISOR_vcpu_op(VCPUOP_get_physid, cpu, &cpu_id) == 0) { 
-                        apicid = xen_vcpu_physid_to_x86_apicid(cpu_id.phys_id); 
-                        acpiid = xen_vcpu_physid_to_x86_acpiid(cpu_id.phys_id); 
-#ifdef CONFIG_ACPI 
-                        if (acpiid != 0xff) 
-                                x86_acpiid_to_apicid[acpiid] = apicid; 
-#endif 
-                } 
-#endif
-		
-		/* attempt to start the Application Processor */
-		if (!start_ap(cpu)) {
-			printf("AP #%d (PHY# %d) failed!\n", cpu, apic_id);
-			/* better panic as the AP may be running loose */
-			printf("panic y/n? [y] ");
-			if (cngetc() != 'n')
-				panic("bye-bye");
-		}
-
-		CPU_SET(cpu, &all_cpus);	/* record AP in CPU map */
-	}
-	
-
-	pmap_invalidate_range(kernel_pmap, 0, NKPT * NBPDR - 1);
-	
-	/* number of APs actually started */
-	return (mp_naps);
-}
-
-extern uint8_t *pcpu_boot_stack;
-extern trap_info_t trap_table[];
-
-static void
-smp_trap_init(trap_info_t *trap_ctxt)
-{
-        const trap_info_t *t = trap_table;
-
-        for (t = trap_table; t->address; t++) {
-                trap_ctxt[t->vector].flags = t->flags;
-                trap_ctxt[t->vector].cs = t->cs;
-                trap_ctxt[t->vector].address = t->address;
-        }
-}
-
-extern struct rwlock pvh_global_lock;
-extern int nkpt;
-static void
-cpu_initialize_context(unsigned int cpu)
-{
-	/* vcpu_guest_context_t is too large to allocate on the stack.
-	 * Hence we allocate statically and protect it with a lock */
-	vm_page_t m[NPGPTD + 2];
-	static vcpu_guest_context_t ctxt;
-	vm_offset_t boot_stack;
-	vm_offset_t newPTD;
-	vm_paddr_t ma[NPGPTD];
-	int i;
-
-	/*
-	 * Page 0,[0-3]	PTD
-	 * Page 1, [4]	boot stack
-	 * Page [5]	PDPT
-	 *
-	 */
-	for (i = 0; i < NPGPTD + 2; i++) {
-		m[i] = vm_page_alloc(NULL, 0,
-		    VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED |
-		    VM_ALLOC_ZERO);
-
-		pmap_zero_page(m[i]);
-
-	}
-	boot_stack = kva_alloc(PAGE_SIZE);
-	newPTD = kva_alloc(NPGPTD * PAGE_SIZE);
-	ma[0] = VM_PAGE_TO_MACH(m[0])|PG_V;
-
-#ifdef PAE	
-	pmap_kenter(boot_stack, VM_PAGE_TO_PHYS(m[NPGPTD + 1]));
-	for (i = 0; i < NPGPTD; i++) {
-		((vm_paddr_t *)boot_stack)[i] =
-		ma[i] = VM_PAGE_TO_MACH(m[i])|PG_V;
-	}
-#endif	
-
-	/*
-	 * Copy cpu0 IdlePTD to new IdlePTD - copying only
-	 * kernel mappings
-	 */
-	pmap_qenter(newPTD, m, 4);
-	
-	memcpy((uint8_t *)newPTD + KPTDI*sizeof(vm_paddr_t),
-	    (uint8_t *)PTOV(IdlePTD) + KPTDI*sizeof(vm_paddr_t),
-	    nkpt*sizeof(vm_paddr_t));
-
-	pmap_qremove(newPTD, 4);
-	kva_free(newPTD, 4 * PAGE_SIZE);
-	/*
-	 * map actual idle stack to boot_stack
-	 */
-	pmap_kenter(boot_stack, VM_PAGE_TO_PHYS(m[NPGPTD]));
-
-
-	xen_pgdpt_pin(VM_PAGE_TO_MACH(m[NPGPTD + 1]));
-	rw_wlock(&pvh_global_lock);
-	for (i = 0; i < 4; i++) {
-		int pdir = (PTDPTDI + i) / NPDEPG;
-		int curoffset = (PTDPTDI + i) % NPDEPG;
-		
-		xen_queue_pt_update((vm_paddr_t)
-		    ((ma[pdir] & ~PG_V) + (curoffset*sizeof(vm_paddr_t))), 
-		    ma[i]);
-	}
-	PT_UPDATES_FLUSH();
-	rw_wunlock(&pvh_global_lock);
-	
-	memset(&ctxt, 0, sizeof(ctxt));
-	ctxt.flags = VGCF_IN_KERNEL;
-	ctxt.user_regs.ds = GSEL(GDATA_SEL, SEL_KPL);
-	ctxt.user_regs.es = GSEL(GDATA_SEL, SEL_KPL);
-	ctxt.user_regs.fs = GSEL(GPRIV_SEL, SEL_KPL);
-	ctxt.user_regs.gs = GSEL(GDATA_SEL, SEL_KPL);
-	ctxt.user_regs.cs = GSEL(GCODE_SEL, SEL_KPL);
-	ctxt.user_regs.ss = GSEL(GDATA_SEL, SEL_KPL);
-	ctxt.user_regs.eip = (unsigned long)init_secondary;
-	ctxt.user_regs.eflags = PSL_KERNEL | 0x1000; /* IOPL_RING1 */
-
-	memset(&ctxt.fpu_ctxt, 0, sizeof(ctxt.fpu_ctxt));
-
-	smp_trap_init(ctxt.trap_ctxt);
-
-	ctxt.ldt_ents = 0;
-	ctxt.gdt_frames[0] =
-	    (uint32_t)((uint64_t)vtomach(bootAPgdt) >> PAGE_SHIFT);
-	ctxt.gdt_ents      = 512;
-
-#ifdef __i386__
-	ctxt.user_regs.esp = boot_stack + PAGE_SIZE;
-
-	ctxt.kernel_ss = GSEL(GDATA_SEL, SEL_KPL);
-	ctxt.kernel_sp = boot_stack + PAGE_SIZE;
-
-	ctxt.event_callback_cs     = GSEL(GCODE_SEL, SEL_KPL);
-	ctxt.event_callback_eip    = (unsigned long)Xhypervisor_callback;
-	ctxt.failsafe_callback_cs  = GSEL(GCODE_SEL, SEL_KPL);
-	ctxt.failsafe_callback_eip = (unsigned long)failsafe_callback;
-
-	ctxt.ctrlreg[3] = VM_PAGE_TO_MACH(m[NPGPTD + 1]);
-#else /* __x86_64__ */
-	ctxt.user_regs.esp = idle->thread.rsp0 - sizeof(struct pt_regs);
-	ctxt.kernel_ss = GSEL(GDATA_SEL, SEL_KPL);
-	ctxt.kernel_sp = idle->thread.rsp0;
-
-	ctxt.event_callback_eip    = (unsigned long)hypervisor_callback;
-	ctxt.failsafe_callback_eip = (unsigned long)failsafe_callback;
-	ctxt.syscall_callback_eip  = (unsigned long)system_call;
-
-	ctxt.ctrlreg[3] = xen_pfn_to_cr3(virt_to_mfn(init_level4_pgt));
-
-	ctxt.gs_base_kernel = (unsigned long)(cpu_pda(cpu));
-#endif
-
-	printf("gdtpfn=%lx pdptpfn=%lx\n",
-	    ctxt.gdt_frames[0],
-	    ctxt.ctrlreg[3] >> PAGE_SHIFT);
-
-	PANIC_IF(HYPERVISOR_vcpu_op(VCPUOP_initialise, cpu, &ctxt));
-	DELAY(3000);
-	PANIC_IF(HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL));
-}
-
-/*
- * This function starts the AP (application processor) identified
- * by the APIC ID 'physicalCpu'.  It does quite a "song and dance"
- * to accomplish this.  This is necessary because of the nuances
- * of the different hardware we might encounter.  It isn't pretty,
- * but it seems to work.
- */
-
-int cpus;
-static int
-start_ap(int apic_id)
-{
-	int ms;
-
-	/* used as a watchpoint to signal AP startup */
-	cpus = mp_naps;
-
-	cpu_initialize_context(apic_id);
-	
-	/* Wait up to 5 seconds for it to start. */
-	for (ms = 0; ms < 5000; ms++) {
-		if (mp_naps > cpus)
-			return (1);	/* return SUCCESS */
-		DELAY(1000);
-	}
-	return (0);		/* return FAILURE */
-}
-
-static void
-ipi_pcpu(int cpu, u_int ipi)
-{
-	KASSERT((ipi <= nitems(xen_ipis)), ("invalid IPI"));
-	xen_intr_signal(DPCPU_ID_GET(cpu, ipi_handle[ipi]));
-}
-
-/*
- * send an IPI to a specific CPU.
- */
-void
-ipi_send_cpu(int cpu, u_int ipi)
-{
-	u_int bitmap, old_pending, new_pending;
-
-	if (IPI_IS_BITMAPED(ipi)) { 
-		bitmap = 1 << ipi;
-		ipi = IPI_BITMAP_VECTOR;
-		do {
-			old_pending = cpu_ipi_pending[cpu];
-			new_pending = old_pending | bitmap;
-		} while  (!atomic_cmpset_int(&cpu_ipi_pending[cpu],
-		    old_pending, new_pending));	
-		if (!old_pending)
-			ipi_pcpu(cpu, RESCHEDULE_VECTOR);
-	} else {
-		KASSERT(call_data != NULL, ("call_data not set"));
-		ipi_pcpu(cpu, CALL_FUNCTION_VECTOR);
-	}
-}
-
-/*
- * Flush the TLB on all other CPU's
- */
-static void
-smp_tlb_shootdown(u_int vector, vm_offset_t addr1, vm_offset_t addr2)
-{
-	u_int ncpu;
-	struct _call_data data;
-
-	ncpu = mp_ncpus - 1;	/* does not shootdown self */
-	if (ncpu < 1)
-		return;		/* no other cpus */
-	if (!(read_eflags() & PSL_I))
-		panic("%s: interrupts disabled", __func__);
-	mtx_lock_spin(&smp_ipi_mtx);
-	KASSERT(call_data == NULL, ("call_data isn't null?!"));
-	call_data = &data;
-	call_data->func_id = vector;
-	call_data->arg1 = addr1;
-	call_data->arg2 = addr2;
-	atomic_store_rel_int(&smp_tlb_wait, 0);
-	ipi_all_but_self(vector);
-	while (smp_tlb_wait < ncpu)
-		ia32_pause();
-	call_data = NULL;
-	mtx_unlock_spin(&smp_ipi_mtx);
-}
-
-static void
-smp_targeted_tlb_shootdown(cpuset_t mask, u_int vector, vm_offset_t addr1,
-    vm_offset_t addr2)
-{
-	int cpu, ncpu, othercpus;
-	struct _call_data data;
-
-	othercpus = mp_ncpus - 1;
-	if (CPU_ISFULLSET(&mask)) {
-		if (othercpus < 1)
-			return;
-	} else {
-		CPU_CLR(PCPU_GET(cpuid), &mask);
-		if (CPU_EMPTY(&mask))
-			return;
-	}
-	if (!(read_eflags() & PSL_I))
-		panic("%s: interrupts disabled", __func__);
-	mtx_lock_spin(&smp_ipi_mtx);
-	KASSERT(call_data == NULL, ("call_data isn't null?!"));
-	call_data = &data;		
-	call_data->func_id = vector;
-	call_data->arg1 = addr1;
-	call_data->arg2 = addr2;
-	atomic_store_rel_int(&smp_tlb_wait, 0);
-	if (CPU_ISFULLSET(&mask)) {
-		ncpu = othercpus;
-		ipi_all_but_self(vector);
-	} else {
-		ncpu = 0;
-		while ((cpu = CPU_FFS(&mask)) != 0) {
-			cpu--;
-			CPU_CLR(cpu, &mask);
-			CTR3(KTR_SMP, "%s: cpu: %d ipi: %x", __func__, cpu,
-			    vector);
-			ipi_send_cpu(cpu, vector);
-			ncpu++;
-		}
-	}
-	while (smp_tlb_wait < ncpu)
-		ia32_pause();
-	call_data = NULL;
-	mtx_unlock_spin(&smp_ipi_mtx);
-}
-
-void
-smp_cache_flush(void)
-{
-
-	if (smp_started)
-		smp_tlb_shootdown(IPI_INVLCACHE, 0, 0);
-}
-
-void
-smp_invltlb(void)
-{
-
-	if (smp_started) {
-		smp_tlb_shootdown(IPI_INVLTLB, 0, 0);
-	}
-}
-
-void
-smp_invlpg(vm_offset_t addr)
-{
-
-	if (smp_started) {
-		smp_tlb_shootdown(IPI_INVLPG, addr, 0);
-	}
-}
-
-void
-smp_invlpg_range(vm_offset_t addr1, vm_offset_t addr2)
-{
-
-	if (smp_started) {
-		smp_tlb_shootdown(IPI_INVLRNG, addr1, addr2);
-	}
-}
-
-void
-smp_masked_invltlb(cpuset_t mask)
-{
-
-	if (smp_started) {
-		smp_targeted_tlb_shootdown(mask, IPI_INVLTLB, 0, 0);
-	}
-}
-
-void
-smp_masked_invlpg(cpuset_t mask, vm_offset_t addr)
-{
-
-	if (smp_started) {
-		smp_targeted_tlb_shootdown(mask, IPI_INVLPG, addr, 0);
-	}
-}
-
-void
-smp_masked_invlpg_range(cpuset_t mask, vm_offset_t addr1, vm_offset_t addr2)
-{
-
-	if (smp_started) {
-		smp_targeted_tlb_shootdown(mask, IPI_INVLRNG, addr1, addr2);
-	}
-}
-
-/*
- * send an IPI to a set of cpus.
- */
-void
-ipi_selected(cpuset_t cpus, u_int ipi)
-{
-	int cpu;
-
-	/*
-	 * IPI_STOP_HARD maps to a NMI and the trap handler needs a bit
-	 * of help in order to understand what is the source.
-	 * Set the mask of receiving CPUs for this purpose.
-	 */
-	if (ipi == IPI_STOP_HARD)
-		CPU_OR_ATOMIC(&ipi_nmi_pending, &cpus);
-
-	while ((cpu = CPU_FFS(&cpus)) != 0) {
-		cpu--;
-		CPU_CLR(cpu, &cpus);
-		CTR3(KTR_SMP, "%s: cpu: %d ipi: %x", __func__, cpu, ipi);
-		ipi_send_cpu(cpu, ipi);
-	}
-}
-
-/*
- * send an IPI to a specific CPU.
- */
-void
-ipi_cpu(int cpu, u_int ipi)
-{
-
-	/*
-	 * IPI_STOP_HARD maps to a NMI and the trap handler needs a bit
-	 * of help in order to understand what is the source.
-	 * Set the mask of receiving CPUs for this purpose.
-	 */
-	if (ipi == IPI_STOP_HARD)
-		CPU_SET_ATOMIC(cpu, &ipi_nmi_pending);
-
-	CTR3(KTR_SMP, "%s: cpu: %d ipi: %x", __func__, cpu, ipi);
-	ipi_send_cpu(cpu, ipi);
-}
-
-/*
- * send an IPI to all CPUs EXCEPT myself
- */
-void
-ipi_all_but_self(u_int ipi)
-{
-	cpuset_t other_cpus;
-
-	/*
-	 * IPI_STOP_HARD maps to a NMI and the trap handler needs a bit
-	 * of help in order to understand what is the source.
-	 * Set the mask of receiving CPUs for this purpose.
-	 */
-	other_cpus = all_cpus;
-	CPU_CLR(PCPU_GET(cpuid), &other_cpus);
-	if (ipi == IPI_STOP_HARD)
-		CPU_OR_ATOMIC(&ipi_nmi_pending, &other_cpus);
-
-	CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi);
-	ipi_selected(other_cpus, ipi);
-}
-
-int
-ipi_nmi_handler()
-{
-	u_int cpuid;
-
-	/*
-	 * As long as there is not a simple way to know about a NMI's
-	 * source, if the bitmask for the current CPU is present in
-	 * the global pending bitword an IPI_STOP_HARD has been issued
-	 * and should be handled.
-	 */
-	cpuid = PCPU_GET(cpuid);
-	if (!CPU_ISSET(cpuid, &ipi_nmi_pending))
-		return (1);
-
-	CPU_CLR_ATOMIC(cpuid, &ipi_nmi_pending);
-	cpustop_handler();
-	return (0);
-}
-
-/*
- * Handle an IPI_STOP by saving our current context and spinning until we
- * are resumed.
- */
-void
-cpustop_handler(void)
-{
-	int cpu;
-
-	cpu = PCPU_GET(cpuid);
-
-	savectx(&stoppcbs[cpu]);
-
-	/* Indicate that we are stopped */
-	CPU_SET_ATOMIC(cpu, &stopped_cpus);
-
-	/* Wait for restart */
-	while (!CPU_ISSET(cpu, &started_cpus))
-	    ia32_pause();
-
-	CPU_CLR_ATOMIC(cpu, &started_cpus);
-	CPU_CLR_ATOMIC(cpu, &stopped_cpus);
-
-	if (cpu == 0 && cpustop_restartfunc != NULL) {
-		cpustop_restartfunc();
-		cpustop_restartfunc = NULL;
-	}
-}
-
-/*
- * Handlers for TLB related IPIs
- *
- * On i386 Xen PV this are no-ops since this port doesn't support SMP.
- */
-void
-invltlb_handler(void)
-{
-}
-
-void
-invlpg_handler(void)
-{
-}
-
-void
-invlrng_handler(void)
-{
-}
-
-void
-invlcache_handler(void)
-{
-}
-
-/*
- * This is called once the rest of the system is up and running and we're
- * ready to let the AP's out of the pen.
- */
-static void
-release_aps(void *dummy __unused)
-{
-
-	if (mp_ncpus == 1) 
-		return;
-	atomic_store_rel_int(&aps_ready, 1);
-	while (smp_started == 0)
-		ia32_pause();
-}
-SYSINIT(start_aps, SI_SUB_SMP, SI_ORDER_FIRST, release_aps, NULL);
-SYSINIT(start_ipis, SI_SUB_SMP, SI_ORDER_ANY, xen_smp_intr_init_cpus, NULL);
-SYSINIT(start_cpu, SI_SUB_INTR, SI_ORDER_ANY, xen_smp_intr_setup_cpus, NULL);

Property changes on: head/sys/i386/xen/mp_machdep.c
___________________________________________________________________
Deleted: svn:eol-style
## -1 +0,0 ##
-native
\ No newline at end of property
Deleted: svn:keywords
## -1 +0,0 ##
-FreeBSD=%H
\ No newline at end of property
Deleted: svn:mime-type
## -1 +0,0 ##
-text/plain
\ No newline at end of property
Index: head/sys/i386/xen/mptable.c
===================================================================
--- head/sys/i386/xen/mptable.c	(revision 282273)
+++ head/sys/i386/xen/mptable.c	(nonexistent)
@@ -1,109 +0,0 @@
-/*-
- * Copyright (c) 2003 John Baldwin <jhb@FreeBSD.org>
- * Copyright (c) 1996, by Steve Passe
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. The name of the developer may NOT be used to endorse or promote products
- *    derived from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-
-#include <sys/param.h>
-#include <sys/bus.h>
-#include <sys/kernel.h>
-
-#include <vm/vm.h>
-#include <vm/vm_param.h>
-#include <vm/pmap.h>
-
-#include <machine/frame.h>
-#include <machine/intr_machdep.h>
-#include <x86/apicvar.h>
-
-#include <xen/hypervisor.h>
-#include <xen/xen-os.h>
-#include <machine/smp.h>
-#include <xen/interface/vcpu.h>
-
-
-static int	mptable_probe(void);
-static int	mptable_probe_cpus(void);
-static void	mptable_register(void *dummy);
-static int	mptable_setup_local(void);
-static int	mptable_setup_io(void);
-
-static struct apic_enumerator mptable_enumerator = {
-	"MPTable",
-	mptable_probe,
-	mptable_probe_cpus,
-	mptable_setup_local,
-	mptable_setup_io
-};
-
-static int
-mptable_probe(void)
-{
-
-	return (-100);
-}
-
-static int
-mptable_probe_cpus(void)
-{
-	int i, rc;
-
-	for (i = 0; i < MAXCPU; i++) {
-		rc = HYPERVISOR_vcpu_op(VCPUOP_is_up, i, NULL);
-		if (rc >= 0)
-			cpu_add(i, (i == 0));
-	}
-
-	return (0);
-}
-
-/*
- * Initialize the local APIC on the BSP.
- */
-static int
-mptable_setup_local(void)
-{
-
-	PCPU_SET(apic_id, 0);
-	PCPU_SET(vcpu_id, 0);
-	return (0);
-}
-
-static int
-mptable_setup_io(void)
-{
-
-	return (0);
-}
-
-static void
-mptable_register(void *dummy __unused)
-{
-
-	apic_register_enumerator(&mptable_enumerator);
-}
-SYSINIT(mptable_register, SI_SUB_TUNABLES - 1, SI_ORDER_FIRST, mptable_register,
-    NULL);

Property changes on: head/sys/i386/xen/mptable.c
___________________________________________________________________
Deleted: svn:eol-style
## -1 +0,0 ##
-native
\ No newline at end of property
Deleted: svn:keywords
## -1 +0,0 ##
-FreeBSD=%H
\ No newline at end of property
Deleted: svn:mime-type
## -1 +0,0 ##
-text/plain
\ No newline at end of property
Index: head/sys/i386/conf/XEN
===================================================================
--- head/sys/i386/conf/XEN	(revision 282273)
+++ head/sys/i386/conf/XEN	(nonexistent)
@@ -1,96 +0,0 @@
-#
-# XEN -- Kernel configuration for i386 XEN DomU
-#
-# $FreeBSD$
-
-cpu		I686_CPU
-ident		XEN
-
-makeoptions	DEBUG=-g		# Build kernel with gdb(1) debug symbols
-
-# The following drivers don't build with PAE or XEN enabled.
-makeoptions	WITHOUT_MODULES="ctl dpt drm drm2 hptmv ida"
-
-# The following drivers don't work with PAE enabled.
-makeoptions	WITHOUT_MODULES+="ncr pst"
-
-options 	SCHED_ULE		# ULE scheduler
-options 	PREEMPTION		# Enable kernel thread preemption
-
-options 	INET			# InterNETworking
-options 	INET6			# IPv6 communications protocols
-options 	SCTP			# Stream Control Transmission Protocol
-options 	FFS			# Berkeley Fast Filesystem
-options 	SOFTUPDATES		# Enable FFS soft updates support
-options 	UFS_ACL			# Support for access control lists
-options 	UFS_DIRHASH		# Improve performance on big directories
-options 	UFS_GJOURNAL		# Enable gjournal-based UFS journaling
-options 	NFSCL			# Network Filesystem Client
-options 	NFSD			# Network Filesystem Server
-options 	NFSLOCKD		# Network Lock Manager
-options 	NFS_ROOT		# NFS usable as /, requires NFSCL
-options 	MSDOSFS			# MSDOS Filesystem
-options 	CD9660			# ISO 9660 Filesystem
-options 	PROCFS			# Process filesystem (requires PSEUDOFS)
-options 	PSEUDOFS		# Pseudo-filesystem framework
-options 	GEOM_PART_GPT		# GUID Partition Tables.
-options 	GEOM_LABEL		# Provides labelization
-options 	COMPAT_FREEBSD4		# Compatible with FreeBSD4
-options 	COMPAT_FREEBSD5		# Compatible with FreeBSD5
-options 	COMPAT_FREEBSD6		# Compatible with FreeBSD6
-options 	COMPAT_FREEBSD7		# Compatible with FreeBSD7
-options 	COMPAT_FREEBSD9		# Compatible with FreeBSD9
-options 	COMPAT_FREEBSD10	# Compatible with FreeBSD10
-options 	KTRACE			# ktrace(1) support
-options 	STACK			# stack(9) support
-options 	SYSVSHM			# SYSV-style shared memory
-options 	SYSVMSG			# SYSV-style message queues
-options 	SYSVSEM			# SYSV-style semaphores
-options 	_KPOSIX_PRIORITY_SCHEDULING # POSIX P1003_1B real-time extensions
-options 	KBD_INSTALL_CDEV	# install a CDEV entry in /dev
-options 	AUDIT			# Security event auditing
-
-# Debugging for use in -current
-options 	KDB			# Enable kernel debugger support.
-options 	DDB			# Support DDB.
-options 	GDB			# Support remote GDB.
-options 	DEADLKRES		# Enable the deadlock resolver
-options 	INVARIANTS		# Enable calls of extra sanity checking
-options 	INVARIANT_SUPPORT	# Extra sanity checks of internal structures, required by INVARIANTS
-options 	WITNESS			# Enable checks to detect deadlocks and cycles
-options 	WITNESS_SKIPSPIN	# Don't run witness on spinlocks for speed
-
-options 	PAE
-nooption	NATIVE
-option		XEN
-nodevice	atpic
-nodevice	isa
-options 	MCLSHIFT=12
-
-# To make an SMP kernel, the next two lines are needed
-options 	SMP			# Symmetric MultiProcessor Kernel
-device		apic			# I/O APIC
-
-#device		atkbdc		# AT keyboard controller
-#device		atkbd		# AT keyboard
-device		psm		# PS/2 mouse
-device		pci
-
-#device		kbdmux		# keyboard multiplexer
-
-# Pseudo devices.
-device		loop		# Network loopback
-device		random		# Entropy device
-device		ether		# Ethernet support
-device		tun		# Packet tunnel.
-device		md		# Memory "disks"
-device		gif		# IPv6 and IPv4 tunneling
-
-# Wireless cards
-options 	IEEE80211_SUPPORT_MESH
-options 	AH_SUPPORT_AR5416
-
-# The `bpf' device enables the Berkeley Packet Filter.
-# Be aware of the administrative consequences of enabling this!
-# Note that 'bpf' is required for DHCP.
-device		bpf		# Berkeley packet filter

Property changes on: head/sys/i386/conf/XEN
___________________________________________________________________
Deleted: svn:keywords
## -1 +0,0 ##
-FreeBSD=%H
\ No newline at end of property
Index: head/sys/i386/conf/DEFAULTS
===================================================================
--- head/sys/i386/conf/DEFAULTS	(revision 282273)
+++ head/sys/i386/conf/DEFAULTS	(revision 282274)
@@ -1,32 +1,31 @@
 #
 # DEFAULTS -- Default kernel configuration file for FreeBSD/i386
 #
 # $FreeBSD$
 
 machine		i386
 
 # Bus support.
 device		isa
 options 	ISAPNP
 
 # Floating point support.
 device		npx
 
 # Pseudo devices.
 device		mem		# Memory and kernel memory devices
 device		io		# I/O device
 
 # UART chips on this platform
 device		uart_ns8250
 
 # Default partitioning schemes
 options 	GEOM_PART_BSD
 options 	GEOM_PART_EBR
 options 	GEOM_PART_EBR_COMPAT
 options 	GEOM_PART_MBR
 
 # enable support for native hardware
-options 	NATIVE
 device		atpic
 
 options 	NEW_PCIB
Index: head/sys/i386/i386/apic_vector.s
===================================================================
--- head/sys/i386/i386/apic_vector.s	(revision 282273)
+++ head/sys/i386/i386/apic_vector.s	(revision 282274)
@@ -1,342 +1,339 @@
 /*-
  * Copyright (c) 1989, 1990 William F. Jolitz.
  * Copyright (c) 1990 The Regents of the University of California.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	from: vector.s, 386BSD 0.1 unknown origin
  * $FreeBSD$
  */
 
 /*
  * Interrupt entry points for external interrupts triggered by I/O APICs
  * as well as IPI handlers.
  */
 
 #include "opt_smp.h"
 
 #include <machine/asmacros.h>
 #include <machine/specialreg.h>
 #include <x86/apicreg.h>
 
 #include "assym.s"
 
 	.text
 	SUPERALIGN_TEXT
 	/* End Of Interrupt to APIC */
 as_lapic_eoi:
 	cmpl	$0,x2apic_mode
 	jne	1f
 	movl	lapic_map,%eax
 	movl	$0,LA_EOI(%eax)
 	ret
 1:
 	movl	$MSR_APIC_EOI,%ecx
 	xorl	%eax,%eax
 	xorl	%edx,%edx
 	wrmsr
 	ret
 
 /*
  * I/O Interrupt Entry Point.  Rather than having one entry point for
  * each interrupt source, we use one entry point for each 32-bit word
  * in the ISR.  The handler determines the highest bit set in the ISR,
  * translates that into a vector, and passes the vector to the
  * lapic_handle_intr() function.
  */
 #define	ISR_VEC(index, vec_name)					\
 	.text ;								\
 	SUPERALIGN_TEXT ;						\
 IDTVEC(vec_name) ;							\
 	PUSH_FRAME ;							\
 	SET_KERNEL_SREGS ;						\
 	cld ;								\
 	FAKE_MCOUNT(TF_EIP(%esp)) ;					\
 	cmpl	$0,x2apic_mode ;					\
 	je	1f ;							\
 	movl	$(MSR_APIC_ISR0 + index),%ecx ;				\
 	rdmsr ;								\
 	jmp	2f ;							\
 1: ;									\
 	movl	lapic_map, %edx ;/* pointer to local APIC */		\
 	movl	LA_ISR + 16 * (index)(%edx), %eax ;	/* load ISR */	\
 2: ;									\
 	bsrl	%eax, %eax ;	/* index of highest set bit in ISR */	\
 	jz	3f ;							\
 	addl	$(32 * index),%eax ;					\
 	pushl	%esp		;                                       \
 	pushl	%eax ;		/* pass the IRQ */			\
 	call	lapic_handle_intr ;					\
 	addl	$8, %esp ;	/* discard parameter */			\
 3: ;									\
 	MEXITCOUNT ;							\
 	jmp	doreti
 
 /*
  * Handle "spurious INTerrupts".
  * Notes:
  *  This is different than the "spurious INTerrupt" generated by an
  *   8259 PIC for missing INTs.  See the APIC documentation for details.
  *  This routine should NOT do an 'EOI' cycle.
  */
 	.text
 	SUPERALIGN_TEXT
 IDTVEC(spuriousint)
 
 	/* No EOI cycle used here */
 
 	iret
 
 	ISR_VEC(1, apic_isr1)
 	ISR_VEC(2, apic_isr2)
 	ISR_VEC(3, apic_isr3)
 	ISR_VEC(4, apic_isr4)
 	ISR_VEC(5, apic_isr5)
 	ISR_VEC(6, apic_isr6)
 	ISR_VEC(7, apic_isr7)
 
 /*
  * Local APIC periodic timer handler.
  */
 	.text
 	SUPERALIGN_TEXT
 IDTVEC(timerint)
 	PUSH_FRAME
 	SET_KERNEL_SREGS
 	cld
 	FAKE_MCOUNT(TF_EIP(%esp))
 	pushl	%esp
 	call	lapic_handle_timer
 	add	$4, %esp
 	MEXITCOUNT
 	jmp	doreti
 
 /*
  * Local APIC CMCI handler.
  */
 	.text
 	SUPERALIGN_TEXT
 IDTVEC(cmcint)
 	PUSH_FRAME
 	SET_KERNEL_SREGS
 	cld
 	FAKE_MCOUNT(TF_EIP(%esp))
 	call	lapic_handle_cmc
 	MEXITCOUNT
 	jmp	doreti
 
 /*
  * Local APIC error interrupt handler.
  */
 	.text
 	SUPERALIGN_TEXT
 IDTVEC(errorint)
 	PUSH_FRAME
 	SET_KERNEL_SREGS
 	cld
 	FAKE_MCOUNT(TF_EIP(%esp))
 	call	lapic_handle_error
 	MEXITCOUNT
 	jmp	doreti
 
 #ifdef XENHVM
 /*
  * Xen event channel upcall interrupt handler.
  * Only used when the hypervisor supports direct vector callbacks.
  */
 	.text
 	SUPERALIGN_TEXT
 IDTVEC(xen_intr_upcall)
 	PUSH_FRAME
 	SET_KERNEL_SREGS
 	cld
 	FAKE_MCOUNT(TF_EIP(%esp))
 	pushl	%esp
 	call	xen_intr_handle_upcall
 	add	$4, %esp
 	MEXITCOUNT
 	jmp	doreti
 #endif
 
 #ifdef HYPERV
 /*
  * This is the Hyper-V vmbus channel direct callback interrupt.
  * Only used when it is running on Hyper-V.
  */
 	.text
 	SUPERALIGN_TEXT
 IDTVEC(hv_vmbus_callback)
 	PUSH_FRAME
 	SET_KERNEL_SREGS
 	cld
 	FAKE_MCOUNT(TF_EIP(%esp))
 	pushl	%esp
 	call	hv_vector_handler
 	add	$4, %esp
 	MEXITCOUNT
 	jmp	doreti
 #endif
 
 #ifdef SMP
 /*
  * Global address space TLB shootdown.
  */
 	.text
 	SUPERALIGN_TEXT
 invltlb_ret:
 	call	as_lapic_eoi
 	POP_FRAME
 	iret
 
 	SUPERALIGN_TEXT
 IDTVEC(invltlb)
 	PUSH_FRAME
 	SET_KERNEL_SREGS
 	cld
 
 	call	invltlb_handler
 
 	jmp	invltlb_ret
 
 /*
  * Single page TLB shootdown
  */
 	.text
 	SUPERALIGN_TEXT
 IDTVEC(invlpg)
 	PUSH_FRAME
 	SET_KERNEL_SREGS
 	cld
 
 	call	invlpg_handler
 
 	jmp	invltlb_ret
 
 /*
  * Page range TLB shootdown.
  */
 	.text
 	SUPERALIGN_TEXT
 IDTVEC(invlrng)
 	PUSH_FRAME
 	SET_KERNEL_SREGS
 	cld
 
 	call	invlrng_handler
 
 	jmp	invltlb_ret
 
 /*
  * Invalidate cache.
  */
 	.text
 	SUPERALIGN_TEXT
 IDTVEC(invlcache)
 	PUSH_FRAME
 	SET_KERNEL_SREGS
 	cld
 
 	call	invlcache_handler
 
 	jmp	invltlb_ret
 
 /*
  * Handler for IPIs sent via the per-cpu IPI bitmap.
  */
-#ifndef XEN
 	.text
 	SUPERALIGN_TEXT
 IDTVEC(ipi_intr_bitmap_handler)	
 	PUSH_FRAME
 	SET_KERNEL_SREGS
 	cld
 
 	call	as_lapic_eoi
 	
 	FAKE_MCOUNT(TF_EIP(%esp))
 
 	call	ipi_bitmap_handler
 	MEXITCOUNT
 	jmp	doreti
-#endif
+
 /*
  * Executed by a CPU when it receives an IPI_STOP from another CPU.
  */
 	.text
 	SUPERALIGN_TEXT
 IDTVEC(cpustop)
 	PUSH_FRAME
 	SET_KERNEL_SREGS
 	cld
 
 	call	as_lapic_eoi
 	call	cpustop_handler
 
 	POP_FRAME
 	iret
 
 /*
  * Executed by a CPU when it receives an IPI_SUSPEND from another CPU.
  */
-#ifndef XEN
 	.text
 	SUPERALIGN_TEXT
 IDTVEC(cpususpend)
 	PUSH_FRAME
 	SET_KERNEL_SREGS
 	cld
 
 	call	as_lapic_eoi
 	call	cpususpend_handler
 
 	POP_FRAME
 	jmp	doreti_iret
-#endif
 
 /*
  * Executed by a CPU when it receives a RENDEZVOUS IPI from another CPU.
  *
  * - Calls the generic rendezvous action function.
  */
 	.text
 	SUPERALIGN_TEXT
 IDTVEC(rendezvous)
 	PUSH_FRAME
 	SET_KERNEL_SREGS
 	cld
 
 #ifdef COUNT_IPIS
 	movl	PCPU(CPUID), %eax
 	movl	ipi_rendezvous_counts(,%eax,4), %eax
 	incl	(%eax)
 #endif
 	call	smp_rendezvous_action
 
 	call	as_lapic_eoi
 	POP_FRAME
 	iret
 	
 #endif /* SMP */
Index: head/sys/i386/i386/genassym.c
===================================================================
--- head/sys/i386/i386/genassym.c	(revision 282273)
+++ head/sys/i386/i386/genassym.c	(revision 282274)
@@ -1,248 +1,243 @@
 /*-
  * Copyright (c) 1982, 1990 The Regents of the University of California.
  * All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * William Jolitz.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	from: @(#)genassym.c	5.11 (Berkeley) 5/10/91
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_apic.h"
 #include "opt_compat.h"
 #include "opt_hwpmc_hooks.h"
 #include "opt_kstack_pages.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/assym.h>
 #include <sys/bio.h>
 #include <sys/buf.h>
 #ifdef	HWPMC_HOOKS
 #include <sys/pmckern.h>
 #endif
 #include <sys/proc.h>
 #include <sys/errno.h>
 #include <sys/mount.h>
 #include <sys/mutex.h>
 #include <sys/socket.h>
 #include <sys/resourcevar.h>
 #include <sys/signalvar.h>
 #include <sys/ucontext.h>
 #include <machine/bootinfo.h>
 #include <machine/tss.h>
 #include <sys/vmmeter.h>
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/pmap.h>
 #include <vm/vm_map.h>
 #include <sys/proc.h>
 #include <net/if.h>
 #include <netinet/in.h>
 #include <nfs/nfsproto.h>
 #include <nfsclient/nfs.h>
 #include <nfs/nfsdiskless.h>
 #ifdef DEV_APIC
 #include <x86/apicreg.h>
 #endif
 #include <machine/cpu.h>
 #include <machine/pcb.h>
 #include <machine/sigframe.h>
 #include <machine/vm86.h>
 #include <machine/proc.h>
 
 ASSYM(P_VMSPACE, offsetof(struct proc, p_vmspace));
 ASSYM(VM_PMAP, offsetof(struct vmspace, vm_pmap));
 ASSYM(PM_ACTIVE, offsetof(struct pmap, pm_active));
 
 ASSYM(TD_FLAGS, offsetof(struct thread, td_flags));
 ASSYM(TD_LOCK, offsetof(struct thread, td_lock));
 ASSYM(TD_PCB, offsetof(struct thread, td_pcb));
 ASSYM(TD_PFLAGS, offsetof(struct thread, td_pflags));
 ASSYM(TD_PROC, offsetof(struct thread, td_proc));
 ASSYM(TD_MD, offsetof(struct thread, td_md));
 ASSYM(TD_TID, offsetof(struct thread, td_tid));
 
 ASSYM(TDP_CALLCHAIN, TDP_CALLCHAIN);
 
 ASSYM(P_MD, offsetof(struct proc, p_md));
 ASSYM(MD_LDT, offsetof(struct mdproc, md_ldt));
 
 ASSYM(TDF_ASTPENDING, TDF_ASTPENDING);
 ASSYM(TDF_NEEDRESCHED, TDF_NEEDRESCHED);
 
 ASSYM(V_TRAP, offsetof(struct vmmeter, v_trap));
 ASSYM(V_SYSCALL, offsetof(struct vmmeter, v_syscall));
 ASSYM(V_INTR, offsetof(struct vmmeter, v_intr));
 /* ASSYM(UPAGES, UPAGES);*/
 ASSYM(KSTACK_PAGES, KSTACK_PAGES);
 ASSYM(PAGE_SIZE, PAGE_SIZE);
 ASSYM(NPTEPG, NPTEPG);
 ASSYM(NPDEPG, NPDEPG);
 ASSYM(NPDEPTD, NPDEPTD);
 ASSYM(NPGPTD, NPGPTD);
 ASSYM(PDESIZE, sizeof(pd_entry_t));
 ASSYM(PTESIZE, sizeof(pt_entry_t));
 ASSYM(PDESHIFT, PDESHIFT);
 ASSYM(PTESHIFT, PTESHIFT);
 ASSYM(PAGE_SHIFT, PAGE_SHIFT);
 ASSYM(PAGE_MASK, PAGE_MASK);
 ASSYM(PDRSHIFT, PDRSHIFT);
 ASSYM(PDRMASK, PDRMASK);
 ASSYM(USRSTACK, USRSTACK);
 ASSYM(VM_MAXUSER_ADDRESS, VM_MAXUSER_ADDRESS);
 ASSYM(KERNBASE, KERNBASE);
 ASSYM(KERNLOAD, KERNLOAD);
 ASSYM(MCLBYTES, MCLBYTES);
 ASSYM(PCB_CR0, offsetof(struct pcb, pcb_cr0));
 ASSYM(PCB_CR2, offsetof(struct pcb, pcb_cr2));
 ASSYM(PCB_CR3, offsetof(struct pcb, pcb_cr3));
 ASSYM(PCB_CR4, offsetof(struct pcb, pcb_cr4));
 ASSYM(PCB_EDI, offsetof(struct pcb, pcb_edi));
 ASSYM(PCB_ESI, offsetof(struct pcb, pcb_esi));
 ASSYM(PCB_EBP, offsetof(struct pcb, pcb_ebp));
 ASSYM(PCB_ESP, offsetof(struct pcb, pcb_esp));
 ASSYM(PCB_EBX, offsetof(struct pcb, pcb_ebx));
 ASSYM(PCB_EIP, offsetof(struct pcb, pcb_eip));
 ASSYM(TSS_ESP0, offsetof(struct i386tss, tss_esp0));
 
 ASSYM(PCB_DS, offsetof(struct pcb, pcb_ds));
 ASSYM(PCB_ES, offsetof(struct pcb, pcb_es));
 ASSYM(PCB_FS, offsetof(struct pcb, pcb_fs));
 ASSYM(PCB_GS, offsetof(struct pcb, pcb_gs));
 ASSYM(PCB_SS, offsetof(struct pcb, pcb_ss));
 ASSYM(PCB_DR0, offsetof(struct pcb, pcb_dr0));
 ASSYM(PCB_DR1, offsetof(struct pcb, pcb_dr1));
 ASSYM(PCB_DR2, offsetof(struct pcb, pcb_dr2));
 ASSYM(PCB_DR3, offsetof(struct pcb, pcb_dr3));
 ASSYM(PCB_DR6, offsetof(struct pcb, pcb_dr6));
 ASSYM(PCB_DR7, offsetof(struct pcb, pcb_dr7));
 ASSYM(PCB_PSL, offsetof(struct pcb, pcb_psl));
 ASSYM(PCB_DBREGS, PCB_DBREGS);
 ASSYM(PCB_EXT, offsetof(struct pcb, pcb_ext));
 
 ASSYM(PCB_FSD, offsetof(struct pcb, pcb_fsd));
 ASSYM(PCB_GSD, offsetof(struct pcb, pcb_gsd));
 ASSYM(PCB_VM86, offsetof(struct pcb, pcb_vm86));
 ASSYM(PCB_FLAGS, offsetof(struct pcb, pcb_flags));
 ASSYM(PCB_SAVEFPU, offsetof(struct pcb, pcb_save));
 ASSYM(PCB_ONFAULT, offsetof(struct pcb, pcb_onfault));
 
 ASSYM(PCB_SIZE, sizeof(struct pcb));
 ASSYM(PCB_VM86CALL, PCB_VM86CALL);
 
 ASSYM(PCB_GDT, offsetof(struct pcb, pcb_gdt));
 ASSYM(PCB_IDT, offsetof(struct pcb, pcb_idt));
 ASSYM(PCB_LDT, offsetof(struct pcb, pcb_ldt));
 ASSYM(PCB_TR, offsetof(struct pcb, pcb_tr));
 
 ASSYM(TF_TRAPNO, offsetof(struct trapframe, tf_trapno));
 ASSYM(TF_ERR, offsetof(struct trapframe, tf_err));
 ASSYM(TF_EIP, offsetof(struct trapframe, tf_eip));
 ASSYM(TF_CS, offsetof(struct trapframe, tf_cs));
 ASSYM(TF_EFLAGS, offsetof(struct trapframe, tf_eflags));
 ASSYM(SIGF_HANDLER, offsetof(struct sigframe, sf_ahu.sf_handler));
 #ifdef COMPAT_43
 ASSYM(SIGF_SC, offsetof(struct osigframe, sf_siginfo.si_sc));
 #endif
 ASSYM(SIGF_UC, offsetof(struct sigframe, sf_uc));
 #ifdef COMPAT_FREEBSD4
 ASSYM(SIGF_UC4, offsetof(struct sigframe4, sf_uc));
 #endif
 #ifdef COMPAT_43
 ASSYM(SC_PS, offsetof(struct osigcontext, sc_ps));
 ASSYM(SC_FS, offsetof(struct osigcontext, sc_fs));
 ASSYM(SC_GS, offsetof(struct osigcontext, sc_gs));
 ASSYM(SC_TRAPNO, offsetof(struct osigcontext, sc_trapno));
 #endif
 #ifdef COMPAT_FREEBSD4
 ASSYM(UC4_EFLAGS, offsetof(struct ucontext4, uc_mcontext.mc_eflags));
 ASSYM(UC4_GS, offsetof(struct ucontext4, uc_mcontext.mc_gs));
 #endif
 ASSYM(UC_EFLAGS, offsetof(ucontext_t, uc_mcontext.mc_eflags));
 ASSYM(UC_GS, offsetof(ucontext_t, uc_mcontext.mc_gs));
 ASSYM(ENOENT, ENOENT);
 ASSYM(EFAULT, EFAULT);
 ASSYM(ENAMETOOLONG, ENAMETOOLONG);
 ASSYM(MAXCOMLEN, MAXCOMLEN);
 ASSYM(MAXPATHLEN, MAXPATHLEN);
 ASSYM(BOOTINFO_SIZE, sizeof(struct bootinfo));
 ASSYM(BI_VERSION, offsetof(struct bootinfo, bi_version));
 ASSYM(BI_KERNELNAME, offsetof(struct bootinfo, bi_kernelname));
 ASSYM(BI_NFS_DISKLESS, offsetof(struct bootinfo, bi_nfs_diskless));
 ASSYM(BI_ENDCOMMON, offsetof(struct bootinfo, bi_endcommon));
 ASSYM(NFSDISKLESS_SIZE, sizeof(struct nfs_diskless));
 ASSYM(BI_SIZE, offsetof(struct bootinfo, bi_size));
 ASSYM(BI_SYMTAB, offsetof(struct bootinfo, bi_symtab));
 ASSYM(BI_ESYMTAB, offsetof(struct bootinfo, bi_esymtab));
 ASSYM(BI_KERNEND, offsetof(struct bootinfo, bi_kernend));
 ASSYM(PC_SIZEOF, sizeof(struct pcpu));
 ASSYM(PC_PRVSPACE, offsetof(struct pcpu, pc_prvspace));
 ASSYM(PC_CURTHREAD, offsetof(struct pcpu, pc_curthread));
 ASSYM(PC_FPCURTHREAD, offsetof(struct pcpu, pc_fpcurthread));
 ASSYM(PC_IDLETHREAD, offsetof(struct pcpu, pc_idlethread));
 ASSYM(PC_CURPCB, offsetof(struct pcpu, pc_curpcb));
 ASSYM(PC_COMMON_TSS, offsetof(struct pcpu, pc_common_tss));
 ASSYM(PC_COMMON_TSSD, offsetof(struct pcpu, pc_common_tssd));
 ASSYM(PC_TSS_GDT, offsetof(struct pcpu, pc_tss_gdt));
 ASSYM(PC_FSGS_GDT, offsetof(struct pcpu, pc_fsgs_gdt));
 ASSYM(PC_CURRENTLDT, offsetof(struct pcpu, pc_currentldt));
 ASSYM(PC_CPUID, offsetof(struct pcpu, pc_cpuid));
 ASSYM(PC_CURPMAP, offsetof(struct pcpu, pc_curpmap));
 ASSYM(PC_PRIVATE_TSS, offsetof(struct pcpu, pc_private_tss));
 
 #ifdef DEV_APIC
 ASSYM(LA_EOI, LAPIC_EOI * LAPIC_MEM_MUL);
 ASSYM(LA_ISR, LAPIC_ISR0 * LAPIC_MEM_MUL);
 #endif
 
 ASSYM(KCSEL, GSEL(GCODE_SEL, SEL_KPL));
 ASSYM(KDSEL, GSEL(GDATA_SEL, SEL_KPL));
 ASSYM(KPSEL, GSEL(GPRIV_SEL, SEL_KPL));
 
 ASSYM(BC32SEL, GSEL(GBIOSCODE32_SEL, SEL_KPL));
 ASSYM(GPROC0_SEL, GPROC0_SEL);
 ASSYM(VM86_FRAMESIZE, sizeof(struct vm86frame));
 
 #ifdef PC98
 #include <machine/bus.h>
 
 ASSYM(BUS_SPACE_HANDLE_BASE, offsetof(struct bus_space_handle, bsh_base));
 ASSYM(BUS_SPACE_HANDLE_IAT, offsetof(struct bus_space_handle, bsh_iat));
 #endif
 
-#ifdef XEN
-ASSYM(PC_CR3, offsetof(struct pcpu, pc_cr3));
-ASSYM(XEN_HYPERVISOR_VIRT_START, HYPERVISOR_VIRT_START);
-#endif
-
 #ifdef	HWPMC_HOOKS
 ASSYM(PMC_FN_USER_CALLCHAIN, PMC_FN_USER_CALLCHAIN);
 #endif
Index: head/sys/i386/i386/machdep.c
===================================================================
--- head/sys/i386/i386/machdep.c	(revision 282273)
+++ head/sys/i386/i386/machdep.c	(revision 282274)
@@ -1,3765 +1,3451 @@
 /*-
  * Copyright (c) 1992 Terrence R. Lambert.
  * Copyright (c) 1982, 1987, 1990 The Regents of the University of California.
  * All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * William Jolitz.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	from: @(#)machdep.c	7.4 (Berkeley) 6/3/91
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_apic.h"
 #include "opt_atpic.h"
 #include "opt_compat.h"
 #include "opt_cpu.h"
 #include "opt_ddb.h"
 #include "opt_inet.h"
 #include "opt_isa.h"
 #include "opt_kstack_pages.h"
 #include "opt_maxmem.h"
 #include "opt_mp_watchdog.h"
 #include "opt_npx.h"
 #include "opt_perfmon.h"
 #include "opt_platform.h"
 #include "opt_xbox.h"
 
 #include <sys/param.h>
 #include <sys/proc.h>
 #include <sys/systm.h>
 #include <sys/bio.h>
 #include <sys/buf.h>
 #include <sys/bus.h>
 #include <sys/callout.h>
 #include <sys/cons.h>
 #include <sys/cpu.h>
 #include <sys/eventhandler.h>
 #include <sys/exec.h>
 #include <sys/imgact.h>
 #include <sys/kdb.h>
 #include <sys/kernel.h>
 #include <sys/ktr.h>
 #include <sys/linker.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/memrange.h>
 #include <sys/msgbuf.h>
 #include <sys/mutex.h>
 #include <sys/pcpu.h>
 #include <sys/ptrace.h>
 #include <sys/reboot.h>
 #include <sys/rwlock.h>
 #include <sys/sched.h>
 #include <sys/signalvar.h>
 #ifdef SMP
 #include <sys/smp.h>
 #endif
 #include <sys/syscallsubr.h>
 #include <sys/sysctl.h>
 #include <sys/sysent.h>
 #include <sys/sysproto.h>
 #include <sys/ucontext.h>
 #include <sys/vmmeter.h>
 
 #include <vm/vm.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_page.h>
 #include <vm/vm_map.h>
 #include <vm/vm_object.h>
 #include <vm/vm_pager.h>
 #include <vm/vm_param.h>
 
 #ifdef DDB
 #ifndef KDB
 #error KDB must be enabled in order for DDB to work!
 #endif
 #include <ddb/ddb.h>
 #include <ddb/db_sym.h>
 #endif
 
 #ifdef PC98
 #include <pc98/pc98/pc98_machdep.h>
 #else
 #include <isa/rtc.h>
 #endif
 
 #include <net/netisr.h>
 
 #include <machine/bootinfo.h>
 #include <machine/clock.h>
 #include <machine/cpu.h>
 #include <machine/cputypes.h>
 #include <machine/intr_machdep.h>
 #include <x86/mca.h>
 #include <machine/md_var.h>
 #include <machine/metadata.h>
 #include <machine/mp_watchdog.h>
 #include <machine/pc/bios.h>
 #include <machine/pcb.h>
 #include <machine/pcb_ext.h>
 #include <machine/proc.h>
 #include <machine/reg.h>
 #include <machine/sigframe.h>
 #include <machine/specialreg.h>
 #include <machine/vm86.h>
 #include <x86/init.h>
 #ifdef PERFMON
 #include <machine/perfmon.h>
 #endif
 #ifdef SMP
 #include <machine/smp.h>
 #endif
 #ifdef FDT
 #include <x86/fdt.h>
 #endif
 
 #ifdef DEV_APIC
 #include <x86/apicvar.h>
 #endif
 
 #ifdef DEV_ISA
 #include <x86/isa/icu.h>
 #endif
 
 #ifdef XBOX
 #include <machine/xbox.h>
 
 int arch_i386_is_xbox = 0;
 uint32_t arch_i386_xbox_memsize = 0;
 #endif
 
-#ifdef XEN
-/* XEN includes */
-#include <xen/xen-os.h>
-#include <xen/hypervisor.h>
-#include <machine/xen/xenvar.h>
-#include <machine/xen/xenfunc.h>
-#include <xen/xen_intr.h>
-
-void Xhypervisor_callback(void);
-void failsafe_callback(void);
-
-extern trap_info_t trap_table[];
-struct proc_ldt default_proc_ldt;
-extern int init_first;
-int running_xen = 1;
-extern unsigned long physfree;
-#endif /* XEN */
-
 /* Sanity check for __curthread() */
 CTASSERT(offsetof(struct pcpu, pc_curthread) == 0);
 
 extern register_t init386(int first);
 extern void dblfault_handler(void);
 
 #define	CS_SECURE(cs)		(ISPL(cs) == SEL_UPL)
 #define	EFL_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
 
 #if !defined(CPU_DISABLE_SSE) && defined(I686_CPU)
 #define CPU_ENABLE_SSE
 #endif
 
 static void cpu_startup(void *);
 static void fpstate_drop(struct thread *td);
 static void get_fpcontext(struct thread *td, mcontext_t *mcp,
     char *xfpusave, size_t xfpusave_len);
 static int  set_fpcontext(struct thread *td, mcontext_t *mcp,
     char *xfpustate, size_t xfpustate_len);
 #ifdef CPU_ENABLE_SSE
 static void set_fpregs_xmm(struct save87 *, struct savexmm *);
 static void fill_fpregs_xmm(struct savexmm *, struct save87 *);
 #endif /* CPU_ENABLE_SSE */
 SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL);
 
 /* Intel ICH registers */
 #define ICH_PMBASE	0x400
 #define ICH_SMI_EN	ICH_PMBASE + 0x30
 
 int	_udatasel, _ucodesel;
 u_int	basemem;
 
 #ifdef PC98
 int	need_pre_dma_flush;	/* If 1, use wbinvd befor DMA transfer. */
 int	need_post_dma_flush;	/* If 1, use invd after DMA transfer. */
 
 static int	ispc98 = 1;
 SYSCTL_INT(_machdep, OID_AUTO, ispc98, CTLFLAG_RD, &ispc98, 0, "");
 #endif
 
 int cold = 1;
 
 #ifdef COMPAT_43
 static void osendsig(sig_t catcher, ksiginfo_t *, sigset_t *mask);
 #endif
 #ifdef COMPAT_FREEBSD4
 static void freebsd4_sendsig(sig_t catcher, ksiginfo_t *, sigset_t *mask);
 #endif
 
 long Maxmem = 0;
 long realmem = 0;
 
 #ifdef PAE
 FEATURE(pae, "Physical Address Extensions");
 #endif
 
 /*
  * The number of PHYSMAP entries must be one less than the number of
  * PHYSSEG entries because the PHYSMAP entry that spans the largest
  * physical address that is accessible by ISA DMA is split into two
  * PHYSSEG entries.
  */
 #define	PHYSMAP_SIZE	(2 * (VM_PHYSSEG_MAX - 1))
 
 vm_paddr_t phys_avail[PHYSMAP_SIZE + 2];
 vm_paddr_t dump_avail[PHYSMAP_SIZE + 2];
 
 /* must be 2 less so 0 0 can signal end of chunks */
 #define PHYS_AVAIL_ARRAY_END ((sizeof(phys_avail) / sizeof(phys_avail[0])) - 2)
 #define DUMP_AVAIL_ARRAY_END ((sizeof(dump_avail) / sizeof(dump_avail[0])) - 2)
 
 struct kva_md_info kmi;
 
 static struct trapframe proc0_tf;
 struct pcpu __pcpu[MAXCPU];
 
 struct mtx icu_lock;
 
 struct mem_range_softc mem_range_softc;
 
  /* Default init_ops implementation. */
  struct init_ops init_ops = {
 	.early_clock_source_init =	i8254_init,
 	.early_delay =			i8254_delay,
 #ifdef DEV_APIC
 	.msi_init =			msi_init,
 #endif
  };
 
 static void
 cpu_startup(dummy)
 	void *dummy;
 {
 	uintmax_t memsize;
 	char *sysenv;
 
 #ifndef PC98
 	/*
 	 * On MacBooks, we need to disallow the legacy USB circuit to
 	 * generate an SMI# because this can cause several problems,
 	 * namely: incorrect CPU frequency detection and failure to
 	 * start the APs.
 	 * We do this by disabling a bit in the SMI_EN (SMI Control and
 	 * Enable register) of the Intel ICH LPC Interface Bridge.
 	 */
 	sysenv = kern_getenv("smbios.system.product");
 	if (sysenv != NULL) {
 		if (strncmp(sysenv, "MacBook1,1", 10) == 0 ||
 		    strncmp(sysenv, "MacBook3,1", 10) == 0 ||
 		    strncmp(sysenv, "MacBook4,1", 10) == 0 ||
 		    strncmp(sysenv, "MacBookPro1,1", 13) == 0 ||
 		    strncmp(sysenv, "MacBookPro1,2", 13) == 0 ||
 		    strncmp(sysenv, "MacBookPro3,1", 13) == 0 ||
 		    strncmp(sysenv, "MacBookPro4,1", 13) == 0 ||
 		    strncmp(sysenv, "Macmini1,1", 10) == 0) {
 			if (bootverbose)
 				printf("Disabling LEGACY_USB_EN bit on "
 				    "Intel ICH.\n");
 			outl(ICH_SMI_EN, inl(ICH_SMI_EN) & ~0x8);
 		}
 		freeenv(sysenv);
 	}
 #endif /* !PC98 */
 
 	/*
 	 * Good {morning,afternoon,evening,night}.
 	 */
 	startrtclock();
 	printcpuinfo();
 	panicifcpuunsupported();
 #ifdef PERFMON
 	perfmon_init();
 #endif
 
 	/*
 	 * Display physical memory if SMBIOS reports reasonable amount.
 	 */
 	memsize = 0;
 	sysenv = kern_getenv("smbios.memory.enabled");
 	if (sysenv != NULL) {
 		memsize = (uintmax_t)strtoul(sysenv, (char **)NULL, 10) << 10;
 		freeenv(sysenv);
 	}
 	if (memsize < ptoa((uintmax_t)vm_cnt.v_free_count))
 		memsize = ptoa((uintmax_t)Maxmem);
 	printf("real memory  = %ju (%ju MB)\n", memsize, memsize >> 20);
 	realmem = atop(memsize);
 
 	/*
 	 * Display any holes after the first chunk of extended memory.
 	 */
 	if (bootverbose) {
 		int indx;
 
 		printf("Physical memory chunk(s):\n");
 		for (indx = 0; phys_avail[indx + 1] != 0; indx += 2) {
 			vm_paddr_t size;
 
 			size = phys_avail[indx + 1] - phys_avail[indx];
 			printf(
 			    "0x%016jx - 0x%016jx, %ju bytes (%ju pages)\n",
 			    (uintmax_t)phys_avail[indx],
 			    (uintmax_t)phys_avail[indx + 1] - 1,
 			    (uintmax_t)size, (uintmax_t)size / PAGE_SIZE);
 		}
 	}
 
 	vm_ksubmap_init(&kmi);
 
 	printf("avail memory = %ju (%ju MB)\n",
 	    ptoa((uintmax_t)vm_cnt.v_free_count),
 	    ptoa((uintmax_t)vm_cnt.v_free_count) / 1048576);
 
 	/*
 	 * Set up buffers, so they can be used to read disk labels.
 	 */
 	bufinit();
 	vm_pager_bufferinit();
-#ifndef XEN
 	cpu_setregs();
-#endif
 }
 
 /*
  * Send an interrupt to process.
  *
  * Stack is set up to allow sigcode stored
  * at top to call routine, followed by call
  * to sigreturn routine below.  After sigreturn
  * resets the signal mask, the stack, and the
  * frame pointer, it returns to the user
  * specified pc, psl.
  */
 #ifdef COMPAT_43
 static void
 osendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
 {
 	struct osigframe sf, *fp;
 	struct proc *p;
 	struct thread *td;
 	struct sigacts *psp;
 	struct trapframe *regs;
 	int sig;
 	int oonstack;
 
 	td = curthread;
 	p = td->td_proc;
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 	sig = ksi->ksi_signo;
 	psp = p->p_sigacts;
 	mtx_assert(&psp->ps_mtx, MA_OWNED);
 	regs = td->td_frame;
 	oonstack = sigonstack(regs->tf_esp);
 
 	/* Allocate space for the signal handler context. */
 	if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
 	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
 		fp = (struct osigframe *)(td->td_sigstk.ss_sp +
 		    td->td_sigstk.ss_size - sizeof(struct osigframe));
 #if defined(COMPAT_43)
 		td->td_sigstk.ss_flags |= SS_ONSTACK;
 #endif
 	} else
 		fp = (struct osigframe *)regs->tf_esp - 1;
 
 	/* Translate the signal if appropriate. */
 	if (p->p_sysent->sv_sigtbl && sig <= p->p_sysent->sv_sigsize)
 		sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
 
 	/* Build the argument list for the signal handler. */
 	sf.sf_signum = sig;
 	sf.sf_scp = (register_t)&fp->sf_siginfo.si_sc;
 	bzero(&sf.sf_siginfo, sizeof(sf.sf_siginfo));
 	if (SIGISMEMBER(psp->ps_siginfo, sig)) {
 		/* Signal handler installed with SA_SIGINFO. */
 		sf.sf_arg2 = (register_t)&fp->sf_siginfo;
 		sf.sf_siginfo.si_signo = sig;
 		sf.sf_siginfo.si_code = ksi->ksi_code;
 		sf.sf_ahu.sf_action = (__osiginfohandler_t *)catcher;
 		sf.sf_addr = 0;
 	} else {
 		/* Old FreeBSD-style arguments. */
 		sf.sf_arg2 = ksi->ksi_code;
 		sf.sf_addr = (register_t)ksi->ksi_addr;
 		sf.sf_ahu.sf_handler = catcher;
 	}
 	mtx_unlock(&psp->ps_mtx);
 	PROC_UNLOCK(p);
 
 	/* Save most if not all of trap frame. */
 	sf.sf_siginfo.si_sc.sc_eax = regs->tf_eax;
 	sf.sf_siginfo.si_sc.sc_ebx = regs->tf_ebx;
 	sf.sf_siginfo.si_sc.sc_ecx = regs->tf_ecx;
 	sf.sf_siginfo.si_sc.sc_edx = regs->tf_edx;
 	sf.sf_siginfo.si_sc.sc_esi = regs->tf_esi;
 	sf.sf_siginfo.si_sc.sc_edi = regs->tf_edi;
 	sf.sf_siginfo.si_sc.sc_cs = regs->tf_cs;
 	sf.sf_siginfo.si_sc.sc_ds = regs->tf_ds;
 	sf.sf_siginfo.si_sc.sc_ss = regs->tf_ss;
 	sf.sf_siginfo.si_sc.sc_es = regs->tf_es;
 	sf.sf_siginfo.si_sc.sc_fs = regs->tf_fs;
 	sf.sf_siginfo.si_sc.sc_gs = rgs();
 	sf.sf_siginfo.si_sc.sc_isp = regs->tf_isp;
 
 	/* Build the signal context to be used by osigreturn(). */
 	sf.sf_siginfo.si_sc.sc_onstack = (oonstack) ? 1 : 0;
 	SIG2OSIG(*mask, sf.sf_siginfo.si_sc.sc_mask);
 	sf.sf_siginfo.si_sc.sc_sp = regs->tf_esp;
 	sf.sf_siginfo.si_sc.sc_fp = regs->tf_ebp;
 	sf.sf_siginfo.si_sc.sc_pc = regs->tf_eip;
 	sf.sf_siginfo.si_sc.sc_ps = regs->tf_eflags;
 	sf.sf_siginfo.si_sc.sc_trapno = regs->tf_trapno;
 	sf.sf_siginfo.si_sc.sc_err = regs->tf_err;
 
 	/*
 	 * If we're a vm86 process, we want to save the segment registers.
 	 * We also change eflags to be our emulated eflags, not the actual
 	 * eflags.
 	 */
 	if (regs->tf_eflags & PSL_VM) {
 		/* XXX confusing names: `tf' isn't a trapframe; `regs' is. */
 		struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs;
 		struct vm86_kernel *vm86 = &td->td_pcb->pcb_ext->ext_vm86;
 
 		sf.sf_siginfo.si_sc.sc_gs = tf->tf_vm86_gs;
 		sf.sf_siginfo.si_sc.sc_fs = tf->tf_vm86_fs;
 		sf.sf_siginfo.si_sc.sc_es = tf->tf_vm86_es;
 		sf.sf_siginfo.si_sc.sc_ds = tf->tf_vm86_ds;
 
 		if (vm86->vm86_has_vme == 0)
 			sf.sf_siginfo.si_sc.sc_ps =
 			    (tf->tf_eflags & ~(PSL_VIF | PSL_VIP)) |
 			    (vm86->vm86_eflags & (PSL_VIF | PSL_VIP));
 
 		/* See sendsig() for comments. */
 		tf->tf_eflags &= ~(PSL_VM | PSL_NT | PSL_VIF | PSL_VIP);
 	}
 
 	/*
 	 * Copy the sigframe out to the user's stack.
 	 */
 	if (copyout(&sf, fp, sizeof(*fp)) != 0) {
 #ifdef DEBUG
 		printf("process %ld has trashed its stack\n", (long)p->p_pid);
 #endif
 		PROC_LOCK(p);
 		sigexit(td, SIGILL);
 	}
 
 	regs->tf_esp = (int)fp;
 	if (p->p_sysent->sv_sigcode_base != 0) {
 		regs->tf_eip = p->p_sysent->sv_sigcode_base + szsigcode -
 		    szosigcode;
 	} else {
 		/* a.out sysentvec does not use shared page */
 		regs->tf_eip = p->p_sysent->sv_psstrings - szosigcode;
 	}
 	regs->tf_eflags &= ~(PSL_T | PSL_D);
 	regs->tf_cs = _ucodesel;
 	regs->tf_ds = _udatasel;
 	regs->tf_es = _udatasel;
 	regs->tf_fs = _udatasel;
 	load_gs(_udatasel);
 	regs->tf_ss = _udatasel;
 	PROC_LOCK(p);
 	mtx_lock(&psp->ps_mtx);
 }
 #endif /* COMPAT_43 */
 
 #ifdef COMPAT_FREEBSD4
 static void
 freebsd4_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
 {
 	struct sigframe4 sf, *sfp;
 	struct proc *p;
 	struct thread *td;
 	struct sigacts *psp;
 	struct trapframe *regs;
 	int sig;
 	int oonstack;
 
 	td = curthread;
 	p = td->td_proc;
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 	sig = ksi->ksi_signo;
 	psp = p->p_sigacts;
 	mtx_assert(&psp->ps_mtx, MA_OWNED);
 	regs = td->td_frame;
 	oonstack = sigonstack(regs->tf_esp);
 
 	/* Save user context. */
 	bzero(&sf, sizeof(sf));
 	sf.sf_uc.uc_sigmask = *mask;
 	sf.sf_uc.uc_stack = td->td_sigstk;
 	sf.sf_uc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK)
 	    ? ((oonstack) ? SS_ONSTACK : 0) : SS_DISABLE;
 	sf.sf_uc.uc_mcontext.mc_onstack = (oonstack) ? 1 : 0;
 	sf.sf_uc.uc_mcontext.mc_gs = rgs();
 	bcopy(regs, &sf.sf_uc.uc_mcontext.mc_fs, sizeof(*regs));
 	bzero(sf.sf_uc.uc_mcontext.mc_fpregs,
 	    sizeof(sf.sf_uc.uc_mcontext.mc_fpregs));
 	bzero(sf.sf_uc.uc_mcontext.__spare__,
 	    sizeof(sf.sf_uc.uc_mcontext.__spare__));
 	bzero(sf.sf_uc.__spare__, sizeof(sf.sf_uc.__spare__));
 
 	/* Allocate space for the signal handler context. */
 	if ((td->td_pflags & TDP_ALTSTACK) != 0 && !oonstack &&
 	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
 		sfp = (struct sigframe4 *)(td->td_sigstk.ss_sp +
 		    td->td_sigstk.ss_size - sizeof(struct sigframe4));
 #if defined(COMPAT_43)
 		td->td_sigstk.ss_flags |= SS_ONSTACK;
 #endif
 	} else
 		sfp = (struct sigframe4 *)regs->tf_esp - 1;
 
 	/* Translate the signal if appropriate. */
 	if (p->p_sysent->sv_sigtbl && sig <= p->p_sysent->sv_sigsize)
 		sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
 
 	/* Build the argument list for the signal handler. */
 	sf.sf_signum = sig;
 	sf.sf_ucontext = (register_t)&sfp->sf_uc;
 	bzero(&sf.sf_si, sizeof(sf.sf_si));
 	if (SIGISMEMBER(psp->ps_siginfo, sig)) {
 		/* Signal handler installed with SA_SIGINFO. */
 		sf.sf_siginfo = (register_t)&sfp->sf_si;
 		sf.sf_ahu.sf_action = (__siginfohandler_t *)catcher;
 
 		/* Fill in POSIX parts */
 		sf.sf_si.si_signo = sig;
 		sf.sf_si.si_code = ksi->ksi_code;
 		sf.sf_si.si_addr = ksi->ksi_addr;
 	} else {
 		/* Old FreeBSD-style arguments. */
 		sf.sf_siginfo = ksi->ksi_code;
 		sf.sf_addr = (register_t)ksi->ksi_addr;
 		sf.sf_ahu.sf_handler = catcher;
 	}
 	mtx_unlock(&psp->ps_mtx);
 	PROC_UNLOCK(p);
 
 	/*
 	 * If we're a vm86 process, we want to save the segment registers.
 	 * We also change eflags to be our emulated eflags, not the actual
 	 * eflags.
 	 */
 	if (regs->tf_eflags & PSL_VM) {
 		struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs;
 		struct vm86_kernel *vm86 = &td->td_pcb->pcb_ext->ext_vm86;
 
 		sf.sf_uc.uc_mcontext.mc_gs = tf->tf_vm86_gs;
 		sf.sf_uc.uc_mcontext.mc_fs = tf->tf_vm86_fs;
 		sf.sf_uc.uc_mcontext.mc_es = tf->tf_vm86_es;
 		sf.sf_uc.uc_mcontext.mc_ds = tf->tf_vm86_ds;
 
 		if (vm86->vm86_has_vme == 0)
 			sf.sf_uc.uc_mcontext.mc_eflags =
 			    (tf->tf_eflags & ~(PSL_VIF | PSL_VIP)) |
 			    (vm86->vm86_eflags & (PSL_VIF | PSL_VIP));
 
 		/*
 		 * Clear PSL_NT to inhibit T_TSSFLT faults on return from
 		 * syscalls made by the signal handler.  This just avoids
 		 * wasting time for our lazy fixup of such faults.  PSL_NT
 		 * does nothing in vm86 mode, but vm86 programs can set it
 		 * almost legitimately in probes for old cpu types.
 		 */
 		tf->tf_eflags &= ~(PSL_VM | PSL_NT | PSL_VIF | PSL_VIP);
 	}
 
 	/*
 	 * Copy the sigframe out to the user's stack.
 	 */
 	if (copyout(&sf, sfp, sizeof(*sfp)) != 0) {
 #ifdef DEBUG
 		printf("process %ld has trashed its stack\n", (long)p->p_pid);
 #endif
 		PROC_LOCK(p);
 		sigexit(td, SIGILL);
 	}
 
 	regs->tf_esp = (int)sfp;
 	regs->tf_eip = p->p_sysent->sv_sigcode_base + szsigcode -
 	    szfreebsd4_sigcode;
 	regs->tf_eflags &= ~(PSL_T | PSL_D);
 	regs->tf_cs = _ucodesel;
 	regs->tf_ds = _udatasel;
 	regs->tf_es = _udatasel;
 	regs->tf_fs = _udatasel;
 	regs->tf_ss = _udatasel;
 	PROC_LOCK(p);
 	mtx_lock(&psp->ps_mtx);
 }
 #endif	/* COMPAT_FREEBSD4 */
 
 void
 sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
 {
 	struct sigframe sf, *sfp;
 	struct proc *p;
 	struct thread *td;
 	struct sigacts *psp;
 	char *sp;
 	struct trapframe *regs;
 	struct segment_descriptor *sdp;
 	char *xfpusave;
 	size_t xfpusave_len;
 	int sig;
 	int oonstack;
 
 	td = curthread;
 	p = td->td_proc;
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 	sig = ksi->ksi_signo;
 	psp = p->p_sigacts;
 	mtx_assert(&psp->ps_mtx, MA_OWNED);
 #ifdef COMPAT_FREEBSD4
 	if (SIGISMEMBER(psp->ps_freebsd4, sig)) {
 		freebsd4_sendsig(catcher, ksi, mask);
 		return;
 	}
 #endif
 #ifdef COMPAT_43
 	if (SIGISMEMBER(psp->ps_osigset, sig)) {
 		osendsig(catcher, ksi, mask);
 		return;
 	}
 #endif
 	regs = td->td_frame;
 	oonstack = sigonstack(regs->tf_esp);
 
 #ifdef CPU_ENABLE_SSE
 	if (cpu_max_ext_state_size > sizeof(union savefpu) && use_xsave) {
 		xfpusave_len = cpu_max_ext_state_size - sizeof(union savefpu);
 		xfpusave = __builtin_alloca(xfpusave_len);
 	} else {
 #else
 	{
 #endif
 		xfpusave_len = 0;
 		xfpusave = NULL;
 	}
 
 	/* Save user context. */
 	bzero(&sf, sizeof(sf));
 	sf.sf_uc.uc_sigmask = *mask;
 	sf.sf_uc.uc_stack = td->td_sigstk;
 	sf.sf_uc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK)
 	    ? ((oonstack) ? SS_ONSTACK : 0) : SS_DISABLE;
 	sf.sf_uc.uc_mcontext.mc_onstack = (oonstack) ? 1 : 0;
 	sf.sf_uc.uc_mcontext.mc_gs = rgs();
 	bcopy(regs, &sf.sf_uc.uc_mcontext.mc_fs, sizeof(*regs));
 	sf.sf_uc.uc_mcontext.mc_len = sizeof(sf.sf_uc.uc_mcontext); /* magic */
 	get_fpcontext(td, &sf.sf_uc.uc_mcontext, xfpusave, xfpusave_len);
 	fpstate_drop(td);
 	/*
 	 * Unconditionally fill the fsbase and gsbase into the mcontext.
 	 */
 	sdp = &td->td_pcb->pcb_fsd;
 	sf.sf_uc.uc_mcontext.mc_fsbase = sdp->sd_hibase << 24 |
 	    sdp->sd_lobase;
 	sdp = &td->td_pcb->pcb_gsd;
 	sf.sf_uc.uc_mcontext.mc_gsbase = sdp->sd_hibase << 24 |
 	    sdp->sd_lobase;
 	bzero(sf.sf_uc.uc_mcontext.mc_spare2,
 	    sizeof(sf.sf_uc.uc_mcontext.mc_spare2));
 	bzero(sf.sf_uc.__spare__, sizeof(sf.sf_uc.__spare__));
 
 	/* Allocate space for the signal handler context. */
 	if ((td->td_pflags & TDP_ALTSTACK) != 0 && !oonstack &&
 	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
 		sp = td->td_sigstk.ss_sp + td->td_sigstk.ss_size;
 #if defined(COMPAT_43)
 		td->td_sigstk.ss_flags |= SS_ONSTACK;
 #endif
 	} else
 		sp = (char *)regs->tf_esp - 128;
 	if (xfpusave != NULL) {
 		sp -= xfpusave_len;
 		sp = (char *)((unsigned int)sp & ~0x3F);
 		sf.sf_uc.uc_mcontext.mc_xfpustate = (register_t)sp;
 	}
 	sp -= sizeof(struct sigframe);
 
 	/* Align to 16 bytes. */
 	sfp = (struct sigframe *)((unsigned int)sp & ~0xF);
 
 	/* Translate the signal if appropriate. */
 	if (p->p_sysent->sv_sigtbl && sig <= p->p_sysent->sv_sigsize)
 		sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
 
 	/* Build the argument list for the signal handler. */
 	sf.sf_signum = sig;
 	sf.sf_ucontext = (register_t)&sfp->sf_uc;
 	bzero(&sf.sf_si, sizeof(sf.sf_si));
 	if (SIGISMEMBER(psp->ps_siginfo, sig)) {
 		/* Signal handler installed with SA_SIGINFO. */
 		sf.sf_siginfo = (register_t)&sfp->sf_si;
 		sf.sf_ahu.sf_action = (__siginfohandler_t *)catcher;
 
 		/* Fill in POSIX parts */
 		sf.sf_si = ksi->ksi_info;
 		sf.sf_si.si_signo = sig; /* maybe a translated signal */
 	} else {
 		/* Old FreeBSD-style arguments. */
 		sf.sf_siginfo = ksi->ksi_code;
 		sf.sf_addr = (register_t)ksi->ksi_addr;
 		sf.sf_ahu.sf_handler = catcher;
 	}
 	mtx_unlock(&psp->ps_mtx);
 	PROC_UNLOCK(p);
 
 	/*
 	 * If we're a vm86 process, we want to save the segment registers.
 	 * We also change eflags to be our emulated eflags, not the actual
 	 * eflags.
 	 */
 	if (regs->tf_eflags & PSL_VM) {
 		struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs;
 		struct vm86_kernel *vm86 = &td->td_pcb->pcb_ext->ext_vm86;
 
 		sf.sf_uc.uc_mcontext.mc_gs = tf->tf_vm86_gs;
 		sf.sf_uc.uc_mcontext.mc_fs = tf->tf_vm86_fs;
 		sf.sf_uc.uc_mcontext.mc_es = tf->tf_vm86_es;
 		sf.sf_uc.uc_mcontext.mc_ds = tf->tf_vm86_ds;
 
 		if (vm86->vm86_has_vme == 0)
 			sf.sf_uc.uc_mcontext.mc_eflags =
 			    (tf->tf_eflags & ~(PSL_VIF | PSL_VIP)) |
 			    (vm86->vm86_eflags & (PSL_VIF | PSL_VIP));
 
 		/*
 		 * Clear PSL_NT to inhibit T_TSSFLT faults on return from
 		 * syscalls made by the signal handler.  This just avoids
 		 * wasting time for our lazy fixup of such faults.  PSL_NT
 		 * does nothing in vm86 mode, but vm86 programs can set it
 		 * almost legitimately in probes for old cpu types.
 		 */
 		tf->tf_eflags &= ~(PSL_VM | PSL_NT | PSL_VIF | PSL_VIP);
 	}
 
 	/*
 	 * Copy the sigframe out to the user's stack.
 	 */
 	if (copyout(&sf, sfp, sizeof(*sfp)) != 0 ||
 	    (xfpusave != NULL && copyout(xfpusave,
 	    (void *)sf.sf_uc.uc_mcontext.mc_xfpustate, xfpusave_len)
 	    != 0)) {
 #ifdef DEBUG
 		printf("process %ld has trashed its stack\n", (long)p->p_pid);
 #endif
 		PROC_LOCK(p);
 		sigexit(td, SIGILL);
 	}
 
 	regs->tf_esp = (int)sfp;
 	regs->tf_eip = p->p_sysent->sv_sigcode_base;
 	if (regs->tf_eip == 0)
 		regs->tf_eip = p->p_sysent->sv_psstrings - szsigcode;
 	regs->tf_eflags &= ~(PSL_T | PSL_D);
 	regs->tf_cs = _ucodesel;
 	regs->tf_ds = _udatasel;
 	regs->tf_es = _udatasel;
 	regs->tf_fs = _udatasel;
 	regs->tf_ss = _udatasel;
 	PROC_LOCK(p);
 	mtx_lock(&psp->ps_mtx);
 }
 
 /*
  * System call to cleanup state after a signal
  * has been taken.  Reset signal mask and
  * stack state from context left by sendsig (above).
  * Return to previous pc and psl as specified by
  * context left by sendsig. Check carefully to
  * make sure that the user has not modified the
  * state to gain improper privileges.
  *
  * MPSAFE
  */
 #ifdef COMPAT_43
 int
 osigreturn(td, uap)
 	struct thread *td;
 	struct osigreturn_args /* {
 		struct osigcontext *sigcntxp;
 	} */ *uap;
 {
 	struct osigcontext sc;
 	struct trapframe *regs;
 	struct osigcontext *scp;
 	int eflags, error;
 	ksiginfo_t ksi;
 
 	regs = td->td_frame;
 	error = copyin(uap->sigcntxp, &sc, sizeof(sc));
 	if (error != 0)
 		return (error);
 	scp = &sc;
 	eflags = scp->sc_ps;
 	if (eflags & PSL_VM) {
 		struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs;
 		struct vm86_kernel *vm86;
 
 		/*
 		 * if pcb_ext == 0 or vm86_inited == 0, the user hasn't
 		 * set up the vm86 area, and we can't enter vm86 mode.
 		 */
 		if (td->td_pcb->pcb_ext == 0)
 			return (EINVAL);
 		vm86 = &td->td_pcb->pcb_ext->ext_vm86;
 		if (vm86->vm86_inited == 0)
 			return (EINVAL);
 
 		/* Go back to user mode if both flags are set. */
 		if ((eflags & PSL_VIP) && (eflags & PSL_VIF)) {
 			ksiginfo_init_trap(&ksi);
 			ksi.ksi_signo = SIGBUS;
 			ksi.ksi_code = BUS_OBJERR;
 			ksi.ksi_addr = (void *)regs->tf_eip;
 			trapsignal(td, &ksi);
 		}
 
 		if (vm86->vm86_has_vme) {
 			eflags = (tf->tf_eflags & ~VME_USERCHANGE) |
 			    (eflags & VME_USERCHANGE) | PSL_VM;
 		} else {
 			vm86->vm86_eflags = eflags;	/* save VIF, VIP */
 			eflags = (tf->tf_eflags & ~VM_USERCHANGE) |
 			    (eflags & VM_USERCHANGE) | PSL_VM;
 		}
 		tf->tf_vm86_ds = scp->sc_ds;
 		tf->tf_vm86_es = scp->sc_es;
 		tf->tf_vm86_fs = scp->sc_fs;
 		tf->tf_vm86_gs = scp->sc_gs;
 		tf->tf_ds = _udatasel;
 		tf->tf_es = _udatasel;
 		tf->tf_fs = _udatasel;
 	} else {
 		/*
 		 * Don't allow users to change privileged or reserved flags.
 		 */
 		if (!EFL_SECURE(eflags, regs->tf_eflags)) {
 	    		return (EINVAL);
 		}
 
 		/*
 		 * Don't allow users to load a valid privileged %cs.  Let the
 		 * hardware check for invalid selectors, excess privilege in
 		 * other selectors, invalid %eip's and invalid %esp's.
 		 */
 		if (!CS_SECURE(scp->sc_cs)) {
 			ksiginfo_init_trap(&ksi);
 			ksi.ksi_signo = SIGBUS;
 			ksi.ksi_code = BUS_OBJERR;
 			ksi.ksi_trapno = T_PROTFLT;
 			ksi.ksi_addr = (void *)regs->tf_eip;
 			trapsignal(td, &ksi);
 			return (EINVAL);
 		}
 		regs->tf_ds = scp->sc_ds;
 		regs->tf_es = scp->sc_es;
 		regs->tf_fs = scp->sc_fs;
 	}
 
 	/* Restore remaining registers. */
 	regs->tf_eax = scp->sc_eax;
 	regs->tf_ebx = scp->sc_ebx;
 	regs->tf_ecx = scp->sc_ecx;
 	regs->tf_edx = scp->sc_edx;
 	regs->tf_esi = scp->sc_esi;
 	regs->tf_edi = scp->sc_edi;
 	regs->tf_cs = scp->sc_cs;
 	regs->tf_ss = scp->sc_ss;
 	regs->tf_isp = scp->sc_isp;
 	regs->tf_ebp = scp->sc_fp;
 	regs->tf_esp = scp->sc_sp;
 	regs->tf_eip = scp->sc_pc;
 	regs->tf_eflags = eflags;
 
 #if defined(COMPAT_43)
 	if (scp->sc_onstack & 1)
 		td->td_sigstk.ss_flags |= SS_ONSTACK;
 	else
 		td->td_sigstk.ss_flags &= ~SS_ONSTACK;
 #endif
 	kern_sigprocmask(td, SIG_SETMASK, (sigset_t *)&scp->sc_mask, NULL,
 	    SIGPROCMASK_OLD);
 	return (EJUSTRETURN);
 }
 #endif /* COMPAT_43 */
 
 #ifdef COMPAT_FREEBSD4
 /*
  * MPSAFE
  */
 int
 freebsd4_sigreturn(td, uap)
 	struct thread *td;
 	struct freebsd4_sigreturn_args /* {
 		const ucontext4 *sigcntxp;
 	} */ *uap;
 {
 	struct ucontext4 uc;
 	struct trapframe *regs;
 	struct ucontext4 *ucp;
 	int cs, eflags, error;
 	ksiginfo_t ksi;
 
 	error = copyin(uap->sigcntxp, &uc, sizeof(uc));
 	if (error != 0)
 		return (error);
 	ucp = &uc;
 	regs = td->td_frame;
 	eflags = ucp->uc_mcontext.mc_eflags;
 	if (eflags & PSL_VM) {
 		struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs;
 		struct vm86_kernel *vm86;
 
 		/*
 		 * if pcb_ext == 0 or vm86_inited == 0, the user hasn't
 		 * set up the vm86 area, and we can't enter vm86 mode.
 		 */
 		if (td->td_pcb->pcb_ext == 0)
 			return (EINVAL);
 		vm86 = &td->td_pcb->pcb_ext->ext_vm86;
 		if (vm86->vm86_inited == 0)
 			return (EINVAL);
 
 		/* Go back to user mode if both flags are set. */
 		if ((eflags & PSL_VIP) && (eflags & PSL_VIF)) {
 			ksiginfo_init_trap(&ksi);
 			ksi.ksi_signo = SIGBUS;
 			ksi.ksi_code = BUS_OBJERR;
 			ksi.ksi_addr = (void *)regs->tf_eip;
 			trapsignal(td, &ksi);
 		}
 		if (vm86->vm86_has_vme) {
 			eflags = (tf->tf_eflags & ~VME_USERCHANGE) |
 			    (eflags & VME_USERCHANGE) | PSL_VM;
 		} else {
 			vm86->vm86_eflags = eflags;	/* save VIF, VIP */
 			eflags = (tf->tf_eflags & ~VM_USERCHANGE) |
 			    (eflags & VM_USERCHANGE) | PSL_VM;
 		}
 		bcopy(&ucp->uc_mcontext.mc_fs, tf, sizeof(struct trapframe));
 		tf->tf_eflags = eflags;
 		tf->tf_vm86_ds = tf->tf_ds;
 		tf->tf_vm86_es = tf->tf_es;
 		tf->tf_vm86_fs = tf->tf_fs;
 		tf->tf_vm86_gs = ucp->uc_mcontext.mc_gs;
 		tf->tf_ds = _udatasel;
 		tf->tf_es = _udatasel;
 		tf->tf_fs = _udatasel;
 	} else {
 		/*
 		 * Don't allow users to change privileged or reserved flags.
 		 */
 		if (!EFL_SECURE(eflags, regs->tf_eflags)) {
 			uprintf("pid %d (%s): freebsd4_sigreturn eflags = 0x%x\n",
 			    td->td_proc->p_pid, td->td_name, eflags);
 	    		return (EINVAL);
 		}
 
 		/*
 		 * Don't allow users to load a valid privileged %cs.  Let the
 		 * hardware check for invalid selectors, excess privilege in
 		 * other selectors, invalid %eip's and invalid %esp's.
 		 */
 		cs = ucp->uc_mcontext.mc_cs;
 		if (!CS_SECURE(cs)) {
 			uprintf("pid %d (%s): freebsd4_sigreturn cs = 0x%x\n",
 			    td->td_proc->p_pid, td->td_name, cs);
 			ksiginfo_init_trap(&ksi);
 			ksi.ksi_signo = SIGBUS;
 			ksi.ksi_code = BUS_OBJERR;
 			ksi.ksi_trapno = T_PROTFLT;
 			ksi.ksi_addr = (void *)regs->tf_eip;
 			trapsignal(td, &ksi);
 			return (EINVAL);
 		}
 
 		bcopy(&ucp->uc_mcontext.mc_fs, regs, sizeof(*regs));
 	}
 
 #if defined(COMPAT_43)
 	if (ucp->uc_mcontext.mc_onstack & 1)
 		td->td_sigstk.ss_flags |= SS_ONSTACK;
 	else
 		td->td_sigstk.ss_flags &= ~SS_ONSTACK;
 #endif
 	kern_sigprocmask(td, SIG_SETMASK, &ucp->uc_sigmask, NULL, 0);
 	return (EJUSTRETURN);
 }
 #endif	/* COMPAT_FREEBSD4 */
 
 /*
  * MPSAFE
  */
 int
 sys_sigreturn(td, uap)
 	struct thread *td;
 	struct sigreturn_args /* {
 		const struct __ucontext *sigcntxp;
 	} */ *uap;
 {
 	ucontext_t uc;
 	struct proc *p;
 	struct trapframe *regs;
 	ucontext_t *ucp;
 	char *xfpustate;
 	size_t xfpustate_len;
 	int cs, eflags, error, ret;
 	ksiginfo_t ksi;
 
 	p = td->td_proc;
 
 	error = copyin(uap->sigcntxp, &uc, sizeof(uc));
 	if (error != 0)
 		return (error);
 	ucp = &uc;
 	if ((ucp->uc_mcontext.mc_flags & ~_MC_FLAG_MASK) != 0) {
 		uprintf("pid %d (%s): sigreturn mc_flags %x\n", p->p_pid,
 		    td->td_name, ucp->uc_mcontext.mc_flags);
 		return (EINVAL);
 	}
 	regs = td->td_frame;
 	eflags = ucp->uc_mcontext.mc_eflags;
 	if (eflags & PSL_VM) {
 		struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs;
 		struct vm86_kernel *vm86;
 
 		/*
 		 * if pcb_ext == 0 or vm86_inited == 0, the user hasn't
 		 * set up the vm86 area, and we can't enter vm86 mode.
 		 */
 		if (td->td_pcb->pcb_ext == 0)
 			return (EINVAL);
 		vm86 = &td->td_pcb->pcb_ext->ext_vm86;
 		if (vm86->vm86_inited == 0)
 			return (EINVAL);
 
 		/* Go back to user mode if both flags are set. */
 		if ((eflags & PSL_VIP) && (eflags & PSL_VIF)) {
 			ksiginfo_init_trap(&ksi);
 			ksi.ksi_signo = SIGBUS;
 			ksi.ksi_code = BUS_OBJERR;
 			ksi.ksi_addr = (void *)regs->tf_eip;
 			trapsignal(td, &ksi);
 		}
 
 		if (vm86->vm86_has_vme) {
 			eflags = (tf->tf_eflags & ~VME_USERCHANGE) |
 			    (eflags & VME_USERCHANGE) | PSL_VM;
 		} else {
 			vm86->vm86_eflags = eflags;	/* save VIF, VIP */
 			eflags = (tf->tf_eflags & ~VM_USERCHANGE) |
 			    (eflags & VM_USERCHANGE) | PSL_VM;
 		}
 		bcopy(&ucp->uc_mcontext.mc_fs, tf, sizeof(struct trapframe));
 		tf->tf_eflags = eflags;
 		tf->tf_vm86_ds = tf->tf_ds;
 		tf->tf_vm86_es = tf->tf_es;
 		tf->tf_vm86_fs = tf->tf_fs;
 		tf->tf_vm86_gs = ucp->uc_mcontext.mc_gs;
 		tf->tf_ds = _udatasel;
 		tf->tf_es = _udatasel;
 		tf->tf_fs = _udatasel;
 	} else {
 		/*
 		 * Don't allow users to change privileged or reserved flags.
 		 */
 		if (!EFL_SECURE(eflags, regs->tf_eflags)) {
 			uprintf("pid %d (%s): sigreturn eflags = 0x%x\n",
 			    td->td_proc->p_pid, td->td_name, eflags);
 	    		return (EINVAL);
 		}
 
 		/*
 		 * Don't allow users to load a valid privileged %cs.  Let the
 		 * hardware check for invalid selectors, excess privilege in
 		 * other selectors, invalid %eip's and invalid %esp's.
 		 */
 		cs = ucp->uc_mcontext.mc_cs;
 		if (!CS_SECURE(cs)) {
 			uprintf("pid %d (%s): sigreturn cs = 0x%x\n",
 			    td->td_proc->p_pid, td->td_name, cs);
 			ksiginfo_init_trap(&ksi);
 			ksi.ksi_signo = SIGBUS;
 			ksi.ksi_code = BUS_OBJERR;
 			ksi.ksi_trapno = T_PROTFLT;
 			ksi.ksi_addr = (void *)regs->tf_eip;
 			trapsignal(td, &ksi);
 			return (EINVAL);
 		}
 
 		if ((uc.uc_mcontext.mc_flags & _MC_HASFPXSTATE) != 0) {
 			xfpustate_len = uc.uc_mcontext.mc_xfpustate_len;
 			if (xfpustate_len > cpu_max_ext_state_size -
 			    sizeof(union savefpu)) {
 				uprintf(
 			    "pid %d (%s): sigreturn xfpusave_len = 0x%zx\n",
 				    p->p_pid, td->td_name, xfpustate_len);
 				return (EINVAL);
 			}
 			xfpustate = __builtin_alloca(xfpustate_len);
 			error = copyin((const void *)uc.uc_mcontext.mc_xfpustate,
 			    xfpustate, xfpustate_len);
 			if (error != 0) {
 				uprintf(
 	"pid %d (%s): sigreturn copying xfpustate failed\n",
 				    p->p_pid, td->td_name);
 				return (error);
 			}
 		} else {
 			xfpustate = NULL;
 			xfpustate_len = 0;
 		}
 		ret = set_fpcontext(td, &ucp->uc_mcontext, xfpustate,
 		    xfpustate_len);
 		if (ret != 0)
 			return (ret);
 		bcopy(&ucp->uc_mcontext.mc_fs, regs, sizeof(*regs));
 	}
 
 #if defined(COMPAT_43)
 	if (ucp->uc_mcontext.mc_onstack & 1)
 		td->td_sigstk.ss_flags |= SS_ONSTACK;
 	else
 		td->td_sigstk.ss_flags &= ~SS_ONSTACK;
 #endif
 
 	kern_sigprocmask(td, SIG_SETMASK, &ucp->uc_sigmask, NULL, 0);
 	return (EJUSTRETURN);
 }
 
 /*
  * Reset registers to default values on exec.
  */
 void
 exec_setregs(struct thread *td, struct image_params *imgp, u_long stack)
 {
 	struct trapframe *regs = td->td_frame;
 	struct pcb *pcb = td->td_pcb;
 
 	/* Reset pc->pcb_gs and %gs before possibly invalidating it. */
 	pcb->pcb_gs = _udatasel;
 	load_gs(_udatasel);
 
 	mtx_lock_spin(&dt_lock);
 	if (td->td_proc->p_md.md_ldt)
 		user_ldt_free(td);
 	else
 		mtx_unlock_spin(&dt_lock);
   
 	bzero((char *)regs, sizeof(struct trapframe));
 	regs->tf_eip = imgp->entry_addr;
 	regs->tf_esp = stack;
 	regs->tf_eflags = PSL_USER | (regs->tf_eflags & PSL_T);
 	regs->tf_ss = _udatasel;
 	regs->tf_ds = _udatasel;
 	regs->tf_es = _udatasel;
 	regs->tf_fs = _udatasel;
 	regs->tf_cs = _ucodesel;
 
 	/* PS_STRINGS value for BSD/OS binaries.  It is 0 for non-BSD/OS. */
 	regs->tf_ebx = imgp->ps_strings;
 
         /*
          * Reset the hardware debug registers if they were in use.
          * They won't have any meaning for the newly exec'd process.  
          */
         if (pcb->pcb_flags & PCB_DBREGS) {
                 pcb->pcb_dr0 = 0;
                 pcb->pcb_dr1 = 0;
                 pcb->pcb_dr2 = 0;
                 pcb->pcb_dr3 = 0;
                 pcb->pcb_dr6 = 0;
                 pcb->pcb_dr7 = 0;
                 if (pcb == curpcb) {
 		        /*
 			 * Clear the debug registers on the running
 			 * CPU, otherwise they will end up affecting
 			 * the next process we switch to.
 			 */
 		        reset_dbregs();
                 }
 		pcb->pcb_flags &= ~PCB_DBREGS;
         }
 
 	pcb->pcb_initial_npxcw = __INITIAL_NPXCW__;
 
 	/*
 	 * Drop the FP state if we hold it, so that the process gets a
 	 * clean FP state if it uses the FPU again.
 	 */
 	fpstate_drop(td);
 
 	/*
 	 * XXX - Linux emulator
 	 * Make sure sure edx is 0x0 on entry. Linux binaries depend
 	 * on it.
 	 */
 	td->td_retval[1] = 0;
 }
 
 void
 cpu_setregs(void)
 {
 	unsigned int cr0;
 
 	cr0 = rcr0();
 
 	/*
 	 * CR0_MP, CR0_NE and CR0_TS are set for NPX (FPU) support:
 	 *
 	 * Prepare to trap all ESC (i.e., NPX) instructions and all WAIT
 	 * instructions.  We must set the CR0_MP bit and use the CR0_TS
 	 * bit to control the trap, because setting the CR0_EM bit does
 	 * not cause WAIT instructions to trap.  It's important to trap
 	 * WAIT instructions - otherwise the "wait" variants of no-wait
 	 * control instructions would degenerate to the "no-wait" variants
 	 * after FP context switches but work correctly otherwise.  It's
 	 * particularly important to trap WAITs when there is no NPX -
 	 * otherwise the "wait" variants would always degenerate.
 	 *
 	 * Try setting CR0_NE to get correct error reporting on 486DX's.
 	 * Setting it should fail or do nothing on lesser processors.
 	 */
 	cr0 |= CR0_MP | CR0_NE | CR0_TS | CR0_WP | CR0_AM;
 	load_cr0(cr0);
 	load_gs(_udatasel);
 }
 
 u_long bootdev;		/* not a struct cdev *- encoding is different */
 SYSCTL_ULONG(_machdep, OID_AUTO, guessed_bootdev,
 	CTLFLAG_RD, &bootdev, 0, "Maybe the Boot device (not in struct cdev *format)");
 
 static char bootmethod[16] = "BIOS";
 SYSCTL_STRING(_machdep, OID_AUTO, bootmethod, CTLFLAG_RD, bootmethod, 0,
     "System firmware boot method");
 
 /*
  * Initialize 386 and configure to run kernel
  */
 
 /*
  * Initialize segments & interrupt table
  */
 
 int _default_ldt;
 
-#ifdef XEN
-union descriptor *gdt;
-union descriptor *ldt;
-#else
 union descriptor gdt[NGDT * MAXCPU];	/* global descriptor table */
 union descriptor ldt[NLDT];		/* local descriptor table */
-#endif
 static struct gate_descriptor idt0[NIDT];
 struct gate_descriptor *idt = &idt0[0];	/* interrupt descriptor table */
 struct region_descriptor r_gdt, r_idt;	/* table descriptors */
 struct mtx dt_lock;			/* lock for GDT and LDT */
 
 static struct i386tss dblfault_tss;
 static char dblfault_stack[PAGE_SIZE];
 
 extern  vm_offset_t	proc0kstack;
 
 
 /*
  * software prototypes -- in more palatable form.
  *
  * GCODE_SEL through GUDATA_SEL must be in this order for syscall/sysret
  * GUFS_SEL and GUGS_SEL must be in this order (swtch.s knows it)
  */
 struct soft_segment_descriptor gdt_segs[] = {
 /* GNULL_SEL	0 Null Descriptor */
 {	.ssd_base = 0x0,
 	.ssd_limit = 0x0,
 	.ssd_type = 0,
 	.ssd_dpl = SEL_KPL,
 	.ssd_p = 0,
 	.ssd_xx = 0, .ssd_xx1 = 0,
 	.ssd_def32 = 0,
 	.ssd_gran = 0		},
 /* GPRIV_SEL	1 SMP Per-Processor Private Data Descriptor */
 {	.ssd_base = 0x0,
 	.ssd_limit = 0xfffff,
 	.ssd_type = SDT_MEMRWA,
 	.ssd_dpl = SEL_KPL,
 	.ssd_p = 1,
 	.ssd_xx = 0, .ssd_xx1 = 0,
 	.ssd_def32 = 1,
 	.ssd_gran = 1		},
 /* GUFS_SEL	2 %fs Descriptor for user */
 {	.ssd_base = 0x0,
 	.ssd_limit = 0xfffff,
 	.ssd_type = SDT_MEMRWA,
 	.ssd_dpl = SEL_UPL,
 	.ssd_p = 1,
 	.ssd_xx = 0, .ssd_xx1 = 0,
 	.ssd_def32 = 1,
 	.ssd_gran = 1		},
 /* GUGS_SEL	3 %gs Descriptor for user */
 {	.ssd_base = 0x0,
 	.ssd_limit = 0xfffff,
 	.ssd_type = SDT_MEMRWA,
 	.ssd_dpl = SEL_UPL,
 	.ssd_p = 1,
 	.ssd_xx = 0, .ssd_xx1 = 0,
 	.ssd_def32 = 1,
 	.ssd_gran = 1		},
 /* GCODE_SEL	4 Code Descriptor for kernel */
 {	.ssd_base = 0x0,
 	.ssd_limit = 0xfffff,
 	.ssd_type = SDT_MEMERA,
 	.ssd_dpl = SEL_KPL,
 	.ssd_p = 1,
 	.ssd_xx = 0, .ssd_xx1 = 0,
 	.ssd_def32 = 1,
 	.ssd_gran = 1		},
 /* GDATA_SEL	5 Data Descriptor for kernel */
 {	.ssd_base = 0x0,
 	.ssd_limit = 0xfffff,
 	.ssd_type = SDT_MEMRWA,
 	.ssd_dpl = SEL_KPL,
 	.ssd_p = 1,
 	.ssd_xx = 0, .ssd_xx1 = 0,
 	.ssd_def32 = 1,
 	.ssd_gran = 1		},
 /* GUCODE_SEL	6 Code Descriptor for user */
 {	.ssd_base = 0x0,
 	.ssd_limit = 0xfffff,
 	.ssd_type = SDT_MEMERA,
 	.ssd_dpl = SEL_UPL,
 	.ssd_p = 1,
 	.ssd_xx = 0, .ssd_xx1 = 0,
 	.ssd_def32 = 1,
 	.ssd_gran = 1		},
 /* GUDATA_SEL	7 Data Descriptor for user */
 {	.ssd_base = 0x0,
 	.ssd_limit = 0xfffff,
 	.ssd_type = SDT_MEMRWA,
 	.ssd_dpl = SEL_UPL,
 	.ssd_p = 1,
 	.ssd_xx = 0, .ssd_xx1 = 0,
 	.ssd_def32 = 1,
 	.ssd_gran = 1		},
 /* GBIOSLOWMEM_SEL 8 BIOS access to realmode segment 0x40, must be #8 in GDT */
 {	.ssd_base = 0x400,
 	.ssd_limit = 0xfffff,
 	.ssd_type = SDT_MEMRWA,
 	.ssd_dpl = SEL_KPL,
 	.ssd_p = 1,
 	.ssd_xx = 0, .ssd_xx1 = 0,
 	.ssd_def32 = 1,
 	.ssd_gran = 1		},
-#ifndef XEN
 /* GPROC0_SEL	9 Proc 0 Tss Descriptor */
 {
 	.ssd_base = 0x0,
 	.ssd_limit = sizeof(struct i386tss)-1,
 	.ssd_type = SDT_SYS386TSS,
 	.ssd_dpl = 0,
 	.ssd_p = 1,
 	.ssd_xx = 0, .ssd_xx1 = 0,
 	.ssd_def32 = 0,
 	.ssd_gran = 0		},
 /* GLDT_SEL	10 LDT Descriptor */
 {	.ssd_base = (int) ldt,
 	.ssd_limit = sizeof(ldt)-1,
 	.ssd_type = SDT_SYSLDT,
 	.ssd_dpl = SEL_UPL,
 	.ssd_p = 1,
 	.ssd_xx = 0, .ssd_xx1 = 0,
 	.ssd_def32 = 0,
 	.ssd_gran = 0		},
 /* GUSERLDT_SEL	11 User LDT Descriptor per process */
 {	.ssd_base = (int) ldt,
 	.ssd_limit = (512 * sizeof(union descriptor)-1),
 	.ssd_type = SDT_SYSLDT,
 	.ssd_dpl = 0,
 	.ssd_p = 1,
 	.ssd_xx = 0, .ssd_xx1 = 0,
 	.ssd_def32 = 0,
 	.ssd_gran = 0		},
 /* GPANIC_SEL	12 Panic Tss Descriptor */
 {	.ssd_base = (int) &dblfault_tss,
 	.ssd_limit = sizeof(struct i386tss)-1,
 	.ssd_type = SDT_SYS386TSS,
 	.ssd_dpl = 0,
 	.ssd_p = 1,
 	.ssd_xx = 0, .ssd_xx1 = 0,
 	.ssd_def32 = 0,
 	.ssd_gran = 0		},
 /* GBIOSCODE32_SEL 13 BIOS 32-bit interface (32bit Code) */
 {	.ssd_base = 0,
 	.ssd_limit = 0xfffff,
 	.ssd_type = SDT_MEMERA,
 	.ssd_dpl = 0,
 	.ssd_p = 1,
 	.ssd_xx = 0, .ssd_xx1 = 0,
 	.ssd_def32 = 0,
 	.ssd_gran = 1		},
 /* GBIOSCODE16_SEL 14 BIOS 32-bit interface (16bit Code) */
 {	.ssd_base = 0,
 	.ssd_limit = 0xfffff,
 	.ssd_type = SDT_MEMERA,
 	.ssd_dpl = 0,
 	.ssd_p = 1,
 	.ssd_xx = 0, .ssd_xx1 = 0,
 	.ssd_def32 = 0,
 	.ssd_gran = 1		},
 /* GBIOSDATA_SEL 15 BIOS 32-bit interface (Data) */
 {	.ssd_base = 0,
 	.ssd_limit = 0xfffff,
 	.ssd_type = SDT_MEMRWA,
 	.ssd_dpl = 0,
 	.ssd_p = 1,
 	.ssd_xx = 0, .ssd_xx1 = 0,
 	.ssd_def32 = 1,
 	.ssd_gran = 1		},
 /* GBIOSUTIL_SEL 16 BIOS 16-bit interface (Utility) */
 {	.ssd_base = 0,
 	.ssd_limit = 0xfffff,
 	.ssd_type = SDT_MEMRWA,
 	.ssd_dpl = 0,
 	.ssd_p = 1,
 	.ssd_xx = 0, .ssd_xx1 = 0,
 	.ssd_def32 = 0,
 	.ssd_gran = 1		},
 /* GBIOSARGS_SEL 17 BIOS 16-bit interface (Arguments) */
 {	.ssd_base = 0,
 	.ssd_limit = 0xfffff,
 	.ssd_type = SDT_MEMRWA,
 	.ssd_dpl = 0,
 	.ssd_p = 1,
 	.ssd_xx = 0, .ssd_xx1 = 0,
 	.ssd_def32 = 0,
 	.ssd_gran = 1		},
 /* GNDIS_SEL	18 NDIS Descriptor */
 {	.ssd_base = 0x0,
 	.ssd_limit = 0x0,
 	.ssd_type = 0,
 	.ssd_dpl = 0,
 	.ssd_p = 0,
 	.ssd_xx = 0, .ssd_xx1 = 0,
 	.ssd_def32 = 0,
 	.ssd_gran = 0		},
-#endif /* !XEN */
 };
 
 static struct soft_segment_descriptor ldt_segs[] = {
 	/* Null Descriptor - overwritten by call gate */
 {	.ssd_base = 0x0,
 	.ssd_limit = 0x0,
 	.ssd_type = 0,
 	.ssd_dpl = 0,
 	.ssd_p = 0,
 	.ssd_xx = 0, .ssd_xx1 = 0,
 	.ssd_def32 = 0,
 	.ssd_gran = 0		},
 	/* Null Descriptor - overwritten by call gate */
 {	.ssd_base = 0x0,
 	.ssd_limit = 0x0,
 	.ssd_type = 0,
 	.ssd_dpl = 0,
 	.ssd_p = 0,
 	.ssd_xx = 0, .ssd_xx1 = 0,
 	.ssd_def32 = 0,
 	.ssd_gran = 0		},
 	/* Null Descriptor - overwritten by call gate */
 {	.ssd_base = 0x0,
 	.ssd_limit = 0x0,
 	.ssd_type = 0,
 	.ssd_dpl = 0,
 	.ssd_p = 0,
 	.ssd_xx = 0, .ssd_xx1 = 0,
 	.ssd_def32 = 0,
 	.ssd_gran = 0		},
 	/* Code Descriptor for user */
 {	.ssd_base = 0x0,
 	.ssd_limit = 0xfffff,
 	.ssd_type = SDT_MEMERA,
 	.ssd_dpl = SEL_UPL,
 	.ssd_p = 1,
 	.ssd_xx = 0, .ssd_xx1 = 0,
 	.ssd_def32 = 1,
 	.ssd_gran = 1		},
 	/* Null Descriptor - overwritten by call gate */
 {	.ssd_base = 0x0,
 	.ssd_limit = 0x0,
 	.ssd_type = 0,
 	.ssd_dpl = 0,
 	.ssd_p = 0,
 	.ssd_xx = 0, .ssd_xx1 = 0,
 	.ssd_def32 = 0,
 	.ssd_gran = 0		},
 	/* Data Descriptor for user */
 {	.ssd_base = 0x0,
 	.ssd_limit = 0xfffff,
 	.ssd_type = SDT_MEMRWA,
 	.ssd_dpl = SEL_UPL,
 	.ssd_p = 1,
 	.ssd_xx = 0, .ssd_xx1 = 0,
 	.ssd_def32 = 1,
 	.ssd_gran = 1		},
 };
 
 void
 setidt(idx, func, typ, dpl, selec)
 	int idx;
 	inthand_t *func;
 	int typ;
 	int dpl;
 	int selec;
 {
 	struct gate_descriptor *ip;
 
 	ip = idt + idx;
 	ip->gd_looffset = (int)func;
 	ip->gd_selector = selec;
 	ip->gd_stkcpy = 0;
 	ip->gd_xx = 0;
 	ip->gd_type = typ;
 	ip->gd_dpl = dpl;
 	ip->gd_p = 1;
 	ip->gd_hioffset = ((int)func)>>16 ;
 }
 
 extern inthand_t
 	IDTVEC(div), IDTVEC(dbg), IDTVEC(nmi), IDTVEC(bpt), IDTVEC(ofl),
 	IDTVEC(bnd), IDTVEC(ill), IDTVEC(dna), IDTVEC(fpusegm),
 	IDTVEC(tss), IDTVEC(missing), IDTVEC(stk), IDTVEC(prot),
 	IDTVEC(page), IDTVEC(mchk), IDTVEC(rsvd), IDTVEC(fpu), IDTVEC(align),
 	IDTVEC(xmm),
 #ifdef KDTRACE_HOOKS
 	IDTVEC(dtrace_ret),
 #endif
 #ifdef XENHVM
 	IDTVEC(xen_intr_upcall),
 #endif
 	IDTVEC(lcall_syscall), IDTVEC(int0x80_syscall);
 
 #ifdef DDB
 /*
  * Display the index and function name of any IDT entries that don't use
  * the default 'rsvd' entry point.
  */
 DB_SHOW_COMMAND(idt, db_show_idt)
 {
 	struct gate_descriptor *ip;
 	int idx;
 	uintptr_t func;
 
 	ip = idt;
 	for (idx = 0; idx < NIDT && !db_pager_quit; idx++) {
 		func = (ip->gd_hioffset << 16 | ip->gd_looffset);
 		if (func != (uintptr_t)&IDTVEC(rsvd)) {
 			db_printf("%3d\t", idx);
 			db_printsym(func, DB_STGY_PROC);
 			db_printf("\n");
 		}
 		ip++;
 	}
 }
 
 /* Show privileged registers. */
 DB_SHOW_COMMAND(sysregs, db_show_sysregs)
 {
 	uint64_t idtr, gdtr;
 
 	idtr = ridt();
 	db_printf("idtr\t0x%08x/%04x\n",
 	    (u_int)(idtr >> 16), (u_int)idtr & 0xffff);
 	gdtr = rgdt();
 	db_printf("gdtr\t0x%08x/%04x\n",
 	    (u_int)(gdtr >> 16), (u_int)gdtr & 0xffff);
 	db_printf("ldtr\t0x%04x\n", rldt());
 	db_printf("tr\t0x%04x\n", rtr());
 	db_printf("cr0\t0x%08x\n", rcr0());
 	db_printf("cr2\t0x%08x\n", rcr2());
 	db_printf("cr3\t0x%08x\n", rcr3());
 	db_printf("cr4\t0x%08x\n", rcr4());
 }
 #endif
 
 void
 sdtossd(sd, ssd)
 	struct segment_descriptor *sd;
 	struct soft_segment_descriptor *ssd;
 {
 	ssd->ssd_base  = (sd->sd_hibase << 24) | sd->sd_lobase;
 	ssd->ssd_limit = (sd->sd_hilimit << 16) | sd->sd_lolimit;
 	ssd->ssd_type  = sd->sd_type;
 	ssd->ssd_dpl   = sd->sd_dpl;
 	ssd->ssd_p     = sd->sd_p;
 	ssd->ssd_def32 = sd->sd_def32;
 	ssd->ssd_gran  = sd->sd_gran;
 }
 
-#if !defined(PC98) && !defined(XEN)
+#if !defined(PC98)
 static int
 add_physmap_entry(uint64_t base, uint64_t length, vm_paddr_t *physmap,
     int *physmap_idxp)
 {
 	int i, insert_idx, physmap_idx;
 
 	physmap_idx = *physmap_idxp;
 	
 	if (length == 0)
 		return (1);
 
 #ifndef PAE
 	if (base > 0xffffffff) {
 		printf("%uK of memory above 4GB ignored\n",
 		    (u_int)(length / 1024));
 		return (1);
 	}
 #endif
 
 	/*
 	 * Find insertion point while checking for overlap.  Start off by
 	 * assuming the new entry will be added to the end.
 	 */
 	insert_idx = physmap_idx + 2;
 	for (i = 0; i <= physmap_idx; i += 2) {
 		if (base < physmap[i + 1]) {
 			if (base + length <= physmap[i]) {
 				insert_idx = i;
 				break;
 			}
 			if (boothowto & RB_VERBOSE)
 				printf(
 		    "Overlapping memory regions, ignoring second region\n");
 			return (1);
 		}
 	}
 
 	/* See if we can prepend to the next entry. */
 	if (insert_idx <= physmap_idx && base + length == physmap[insert_idx]) {
 		physmap[insert_idx] = base;
 		return (1);
 	}
 
 	/* See if we can append to the previous entry. */
 	if (insert_idx > 0 && base == physmap[insert_idx - 1]) {
 		physmap[insert_idx - 1] += length;
 		return (1);
 	}
 
 	physmap_idx += 2;
 	*physmap_idxp = physmap_idx;
 	if (physmap_idx == PHYSMAP_SIZE) {
 		printf(
 		"Too many segments in the physical address map, giving up\n");
 		return (0);
 	}
 
 	/*
 	 * Move the last 'N' entries down to make room for the new
 	 * entry if needed.
 	 */
 	for (i = physmap_idx; i > insert_idx; i -= 2) {
 		physmap[i] = physmap[i - 2];
 		physmap[i + 1] = physmap[i - 1];
 	}
 
 	/* Insert the new entry. */
 	physmap[insert_idx] = base;
 	physmap[insert_idx + 1] = base + length;
 	return (1);
 }
 
 static int
 add_smap_entry(struct bios_smap *smap, vm_paddr_t *physmap, int *physmap_idxp)
 {
 	if (boothowto & RB_VERBOSE)
 		printf("SMAP type=%02x base=%016llx len=%016llx\n",
 		    smap->type, smap->base, smap->length);
 
 	if (smap->type != SMAP_TYPE_MEMORY)
 		return (1);
 
 	return (add_physmap_entry(smap->base, smap->length, physmap,
 	    physmap_idxp));
 }
 
 static void
 add_smap_entries(struct bios_smap *smapbase, vm_paddr_t *physmap,
     int *physmap_idxp)
 {
 	struct bios_smap *smap, *smapend;
 	u_int32_t smapsize;
 	/*
 	 * Memory map from INT 15:E820.
 	 *
 	 * subr_module.c says:
 	 * "Consumer may safely assume that size value precedes data."
 	 * ie: an int32_t immediately precedes SMAP.
 	 */
 	smapsize = *((u_int32_t *)smapbase - 1);
 	smapend = (struct bios_smap *)((uintptr_t)smapbase + smapsize);
 
 	for (smap = smapbase; smap < smapend; smap++)
 		if (!add_smap_entry(smap, physmap, physmap_idxp))
 			break;
 }
-#endif /* !PC98 && !XEN */
+#endif /* !PC98 */
 
-#ifndef XEN
 static void
 basemem_setup(void)
 {
 	vm_paddr_t pa;
 	pt_entry_t *pte;
 	int i;
 
 	if (basemem > 640) {
 		printf("Preposterous BIOS basemem of %uK, truncating to 640K\n",
 			basemem);
 		basemem = 640;
 	}
 
 	/*
 	 * XXX if biosbasemem is now < 640, there is a `hole'
 	 * between the end of base memory and the start of
 	 * ISA memory.  The hole may be empty or it may
 	 * contain BIOS code or data.  Map it read/write so
 	 * that the BIOS can write to it.  (Memory from 0 to
 	 * the physical end of the kernel is mapped read-only
 	 * to begin with and then parts of it are remapped.
 	 * The parts that aren't remapped form holes that
 	 * remain read-only and are unused by the kernel.
 	 * The base memory area is below the physical end of
 	 * the kernel and right now forms a read-only hole.
 	 * The part of it from PAGE_SIZE to
 	 * (trunc_page(biosbasemem * 1024) - 1) will be
 	 * remapped and used by the kernel later.)
 	 *
 	 * This code is similar to the code used in
 	 * pmap_mapdev, but since no memory needs to be
 	 * allocated we simply change the mapping.
 	 */
 	for (pa = trunc_page(basemem * 1024);
 	     pa < ISA_HOLE_START; pa += PAGE_SIZE)
 		pmap_kenter(KERNBASE + pa, pa);
 
 	/*
 	 * Map pages between basemem and ISA_HOLE_START, if any, r/w into
 	 * the vm86 page table so that vm86 can scribble on them using
 	 * the vm86 map too.  XXX: why 2 ways for this and only 1 way for
 	 * page 0, at least as initialized here?
 	 */
 	pte = (pt_entry_t *)vm86paddr;
 	for (i = basemem / 4; i < 160; i++)
 		pte[i] = (i << PAGE_SHIFT) | PG_V | PG_RW | PG_U;
 }
-#endif /* !XEN */
 
 /*
  * Populate the (physmap) array with base/bound pairs describing the
  * available physical memory in the system, then test this memory and
  * build the phys_avail array describing the actually-available memory.
  *
  * If we cannot accurately determine the physical memory map, then use
  * value from the 0xE801 call, and failing that, the RTC.
  *
  * Total memory size may be set by the kernel environment variable
  * hw.physmem or the compile-time define MAXMEM.
  *
  * XXX first should be vm_paddr_t.
  */
 #ifdef PC98
 static void
 getmemsize(int first)
 {
 	int off, physmap_idx, pa_indx, da_indx;
 	u_long physmem_tunable, memtest;
 	vm_paddr_t physmap[PHYSMAP_SIZE];
 	pt_entry_t *pte;
 	quad_t dcons_addr, dcons_size;
 	int i;
 	int pg_n;
 	u_int extmem;
 	u_int under16;
 	vm_paddr_t pa;
 
 	bzero(physmap, sizeof(physmap));
 
 	/* XXX - some of EPSON machines can't use PG_N */
 	pg_n = PG_N;
 	if (pc98_machine_type & M_EPSON_PC98) {
 		switch (epson_machine_id) {
 #ifdef WB_CACHE
 		default:
 #endif
 		case EPSON_PC486_HX:
 		case EPSON_PC486_HG:
 		case EPSON_PC486_HA:
 			pg_n = 0;
 			break;
 		}
 	}
 
 	under16 = pc98_getmemsize(&basemem, &extmem);
 	basemem_setup();
 
 	physmap[0] = 0;
 	physmap[1] = basemem * 1024;
 	physmap_idx = 2;
 	physmap[physmap_idx] = 0x100000;
 	physmap[physmap_idx + 1] = physmap[physmap_idx] + extmem * 1024;
 
 	/*
 	 * Now, physmap contains a map of physical memory.
 	 */
 
 #ifdef SMP
 	/* make hole for AP bootstrap code */
 	physmap[1] = mp_bootaddress(physmap[1]);
 #endif
 
 	/*
 	 * Maxmem isn't the "maximum memory", it's one larger than the
 	 * highest page of the physical address space.  It should be
 	 * called something like "Maxphyspage".  We may adjust this 
 	 * based on ``hw.physmem'' and the results of the memory test.
 	 */
 	Maxmem = atop(physmap[physmap_idx + 1]);
 
 #ifdef MAXMEM
 	Maxmem = MAXMEM / 4;
 #endif
 
 	if (TUNABLE_ULONG_FETCH("hw.physmem", &physmem_tunable))
 		Maxmem = atop(physmem_tunable);
 
 	/*
 	 * By default keep the memtest enabled.  Use a general name so that
 	 * one could eventually do more with the code than just disable it.
 	 */
 	memtest = 1;
 	TUNABLE_ULONG_FETCH("hw.memtest.tests", &memtest);
 
 	if (atop(physmap[physmap_idx + 1]) != Maxmem &&
 	    (boothowto & RB_VERBOSE))
 		printf("Physical memory use set to %ldK\n", Maxmem * 4);
 
 	/*
 	 * If Maxmem has been increased beyond what the system has detected,
 	 * extend the last memory segment to the new limit.
 	 */ 
 	if (atop(physmap[physmap_idx + 1]) < Maxmem)
 		physmap[physmap_idx + 1] = ptoa((vm_paddr_t)Maxmem);
 
 	/*
 	 * We need to divide chunk if Maxmem is larger than 16MB and
 	 * under 16MB area is not full of memory.
 	 * (1) system area (15-16MB region) is cut off
 	 * (2) extended memory is only over 16MB area (ex. Melco "HYPERMEMORY")
 	 */
 	if ((under16 != 16 * 1024) && (extmem > 15 * 1024)) {
 		/* 15M - 16M region is cut off, so need to divide chunk */
 		physmap[physmap_idx + 1] = under16 * 1024;
 		physmap_idx += 2;
 		physmap[physmap_idx] = 0x1000000;
 		physmap[physmap_idx + 1] = physmap[2] + extmem * 1024;
 	}
 
 	/* call pmap initialization to make new kernel address space */
 	pmap_bootstrap(first);
 
 	/*
 	 * Size up each available chunk of physical memory.
 	 */
 	physmap[0] = PAGE_SIZE;		/* mask off page 0 */
 	pa_indx = 0;
 	da_indx = 1;
 	phys_avail[pa_indx++] = physmap[0];
 	phys_avail[pa_indx] = physmap[0];
 	dump_avail[da_indx] = physmap[0];
 	pte = CMAP3;
 
 	/*
 	 * Get dcons buffer address
 	 */
 	if (getenv_quad("dcons.addr", &dcons_addr) == 0 ||
 	    getenv_quad("dcons.size", &dcons_size) == 0)
 		dcons_addr = 0;
 
 	/*
 	 * physmap is in bytes, so when converting to page boundaries,
 	 * round up the start address and round down the end address.
 	 */
 	for (i = 0; i <= physmap_idx; i += 2) {
 		vm_paddr_t end;
 
 		end = ptoa((vm_paddr_t)Maxmem);
 		if (physmap[i + 1] < end)
 			end = trunc_page(physmap[i + 1]);
 		for (pa = round_page(physmap[i]); pa < end; pa += PAGE_SIZE) {
 			int tmp, page_bad, full;
 			int *ptr = (int *)CADDR3;
 
 			full = FALSE;
 			/*
 			 * block out kernel memory as not available.
 			 */
 			if (pa >= KERNLOAD && pa < first)
 				goto do_dump_avail;
 
 			/*
 			 * block out dcons buffer
 			 */
 			if (dcons_addr > 0
 			    && pa >= trunc_page(dcons_addr)
 			    && pa < dcons_addr + dcons_size)
 				goto do_dump_avail;
 
 			page_bad = FALSE;
 			if (memtest == 0)
 				goto skip_memtest;
 
 			/*
 			 * map page into kernel: valid, read/write,non-cacheable
 			 */
 			*pte = pa | PG_V | PG_RW | pg_n;
 			invltlb();
 
 			tmp = *(int *)ptr;
 			/*
 			 * Test for alternating 1's and 0's
 			 */
 			*(volatile int *)ptr = 0xaaaaaaaa;
 			if (*(volatile int *)ptr != 0xaaaaaaaa)
 				page_bad = TRUE;
 			/*
 			 * Test for alternating 0's and 1's
 			 */
 			*(volatile int *)ptr = 0x55555555;
 			if (*(volatile int *)ptr != 0x55555555)
 				page_bad = TRUE;
 			/*
 			 * Test for all 1's
 			 */
 			*(volatile int *)ptr = 0xffffffff;
 			if (*(volatile int *)ptr != 0xffffffff)
 				page_bad = TRUE;
 			/*
 			 * Test for all 0's
 			 */
 			*(volatile int *)ptr = 0x0;
 			if (*(volatile int *)ptr != 0x0)
 				page_bad = TRUE;
 			/*
 			 * Restore original value.
 			 */
 			*(int *)ptr = tmp;
 
 skip_memtest:
 			/*
 			 * Adjust array of valid/good pages.
 			 */
 			if (page_bad == TRUE)
 				continue;
 			/*
 			 * If this good page is a continuation of the
 			 * previous set of good pages, then just increase
 			 * the end pointer. Otherwise start a new chunk.
 			 * Note that "end" points one higher than end,
 			 * making the range >= start and < end.
 			 * If we're also doing a speculative memory
 			 * test and we at or past the end, bump up Maxmem
 			 * so that we keep going. The first bad page
 			 * will terminate the loop.
 			 */
 			if (phys_avail[pa_indx] == pa) {
 				phys_avail[pa_indx] += PAGE_SIZE;
 			} else {
 				pa_indx++;
 				if (pa_indx == PHYS_AVAIL_ARRAY_END) {
 					printf(
 		"Too many holes in the physical address space, giving up\n");
 					pa_indx--;
 					full = TRUE;
 					goto do_dump_avail;
 				}
 				phys_avail[pa_indx++] = pa;	/* start */
 				phys_avail[pa_indx] = pa + PAGE_SIZE; /* end */
 			}
 			physmem++;
 do_dump_avail:
 			if (dump_avail[da_indx] == pa) {
 				dump_avail[da_indx] += PAGE_SIZE;
 			} else {
 				da_indx++;
 				if (da_indx == DUMP_AVAIL_ARRAY_END) {
 					da_indx--;
 					goto do_next;
 				}
 				dump_avail[da_indx++] = pa;	/* start */
 				dump_avail[da_indx] = pa + PAGE_SIZE; /* end */
 			}
 do_next:
 			if (full)
 				break;
 		}
 	}
 	*pte = 0;
 	invltlb();
 	
 	/*
 	 * XXX
 	 * The last chunk must contain at least one page plus the message
 	 * buffer to avoid complicating other code (message buffer address
 	 * calculation, etc.).
 	 */
 	while (phys_avail[pa_indx - 1] + PAGE_SIZE +
 	    round_page(msgbufsize) >= phys_avail[pa_indx]) {
 		physmem -= atop(phys_avail[pa_indx] - phys_avail[pa_indx - 1]);
 		phys_avail[pa_indx--] = 0;
 		phys_avail[pa_indx--] = 0;
 	}
 
 	Maxmem = atop(phys_avail[pa_indx]);
 
 	/* Trim off space for the message buffer. */
 	phys_avail[pa_indx] -= round_page(msgbufsize);
 
 	/* Map the message buffer. */
 	for (off = 0; off < round_page(msgbufsize); off += PAGE_SIZE)
 		pmap_kenter((vm_offset_t)msgbufp + off, phys_avail[pa_indx] +
 		    off);
-
-	PT_UPDATES_FLUSH();
 }
 #else /* PC98 */
 static void
 getmemsize(int first)
 {
 	int has_smap, off, physmap_idx, pa_indx, da_indx;
 	u_long memtest;
 	vm_paddr_t physmap[PHYSMAP_SIZE];
 	pt_entry_t *pte;
 	quad_t dcons_addr, dcons_size, physmem_tunable;
-#ifndef XEN
 	int hasbrokenint12, i, res;
 	u_int extmem;
 	struct vm86frame vmf;
 	struct vm86context vmc;
 	vm_paddr_t pa;
 	struct bios_smap *smap, *smapbase;
 	caddr_t kmdp;
-#endif
 
 	has_smap = 0;
-#if defined(XEN)
-	Maxmem = xen_start_info->nr_pages - init_first;
-	physmem = Maxmem;
-	basemem = 0;
-	physmap[0] = init_first << PAGE_SHIFT;
-	physmap[1] = ptoa(Maxmem) - round_page(msgbufsize);
-	physmap_idx = 0;
-#else
 #ifdef XBOX
 	if (arch_i386_is_xbox) {
 		/*
 		 * We queried the memory size before, so chop off 4MB for
 		 * the framebuffer and inform the OS of this.
 		 */
 		physmap[0] = 0;
 		physmap[1] = (arch_i386_xbox_memsize * 1024 * 1024) - XBOX_FB_SIZE;
 		physmap_idx = 0;
 		goto physmap_done;
 	}
 #endif
 	bzero(&vmf, sizeof(vmf));
 	bzero(physmap, sizeof(physmap));
 	basemem = 0;
 
 	/*
 	 * Check if the loader supplied an SMAP memory map.  If so,
 	 * use that and do not make any VM86 calls.
 	 */
 	physmap_idx = 0;
 	smapbase = NULL;
 	kmdp = preload_search_by_type("elf kernel");
 	if (kmdp == NULL)
 		kmdp = preload_search_by_type("elf32 kernel");
 	if (kmdp != NULL)
 		smapbase = (struct bios_smap *)preload_search_info(kmdp,
 		    MODINFO_METADATA | MODINFOMD_SMAP);
 	if (smapbase != NULL) {
 		add_smap_entries(smapbase, physmap, &physmap_idx);
 		has_smap = 1;
 		goto have_smap;
 	}
 
 	/*
 	 * Some newer BIOSes have a broken INT 12H implementation
 	 * which causes a kernel panic immediately.  In this case, we
 	 * need use the SMAP to determine the base memory size.
 	 */
 	hasbrokenint12 = 0;
 	TUNABLE_INT_FETCH("hw.hasbrokenint12", &hasbrokenint12);
 	if (hasbrokenint12 == 0) {
 		/* Use INT12 to determine base memory size. */
 		vm86_intcall(0x12, &vmf);
 		basemem = vmf.vmf_ax;
 		basemem_setup();
 	}
 
 	/*
 	 * Fetch the memory map with INT 15:E820.  Map page 1 R/W into
 	 * the kernel page table so we can use it as a buffer.  The
 	 * kernel will unmap this page later.
 	 */
 	pmap_kenter(KERNBASE + (1 << PAGE_SHIFT), 1 << PAGE_SHIFT);
 	vmc.npages = 0;
 	smap = (void *)vm86_addpage(&vmc, 1, KERNBASE + (1 << PAGE_SHIFT));
 	res = vm86_getptr(&vmc, (vm_offset_t)smap, &vmf.vmf_es, &vmf.vmf_di);
 	KASSERT(res != 0, ("vm86_getptr() failed: address not found"));
 
 	vmf.vmf_ebx = 0;
 	do {
 		vmf.vmf_eax = 0xE820;
 		vmf.vmf_edx = SMAP_SIG;
 		vmf.vmf_ecx = sizeof(struct bios_smap);
 		i = vm86_datacall(0x15, &vmf, &vmc);
 		if (i || vmf.vmf_eax != SMAP_SIG)
 			break;
 		has_smap = 1;
 		if (!add_smap_entry(smap, physmap, &physmap_idx))
 			break;
 	} while (vmf.vmf_ebx != 0);
 
 have_smap:
 	/*
 	 * If we didn't fetch the "base memory" size from INT12,
 	 * figure it out from the SMAP (or just guess).
 	 */
 	if (basemem == 0) {
 		for (i = 0; i <= physmap_idx; i += 2) {
 			if (physmap[i] == 0x00000000) {
 				basemem = physmap[i + 1] / 1024;
 				break;
 			}
 		}
 
 		/* XXX: If we couldn't find basemem from SMAP, just guess. */
 		if (basemem == 0)
 			basemem = 640;
 		basemem_setup();
 	}
 
 	if (physmap[1] != 0)
 		goto physmap_done;
 
 	/*
 	 * If we failed to find an SMAP, figure out the extended
 	 * memory size.  We will then build a simple memory map with
 	 * two segments, one for "base memory" and the second for
 	 * "extended memory".  Note that "extended memory" starts at a
 	 * physical address of 1MB and that both basemem and extmem
 	 * are in units of 1KB.
 	 *
 	 * First, try to fetch the extended memory size via INT 15:E801.
 	 */
 	vmf.vmf_ax = 0xE801;
 	if (vm86_intcall(0x15, &vmf) == 0) {
 		extmem = vmf.vmf_cx + vmf.vmf_dx * 64;
 	} else {
 		/*
 		 * If INT15:E801 fails, this is our last ditch effort
 		 * to determine the extended memory size.  Currently
 		 * we prefer the RTC value over INT15:88.
 		 */
 #if 0
 		vmf.vmf_ah = 0x88;
 		vm86_intcall(0x15, &vmf);
 		extmem = vmf.vmf_ax;
 #else
 		extmem = rtcin(RTC_EXTLO) + (rtcin(RTC_EXTHI) << 8);
 #endif
 	}
 
 	/*
 	 * Special hack for chipsets that still remap the 384k hole when
 	 * there's 16MB of memory - this really confuses people that
 	 * are trying to use bus mastering ISA controllers with the
 	 * "16MB limit"; they only have 16MB, but the remapping puts
 	 * them beyond the limit.
 	 *
 	 * If extended memory is between 15-16MB (16-17MB phys address range),
 	 *	chop it to 15MB.
 	 */
 	if ((extmem > 15 * 1024) && (extmem < 16 * 1024))
 		extmem = 15 * 1024;
 
 	physmap[0] = 0;
 	physmap[1] = basemem * 1024;
 	physmap_idx = 2;
 	physmap[physmap_idx] = 0x100000;
 	physmap[physmap_idx + 1] = physmap[physmap_idx] + extmem * 1024;
 
 physmap_done:
-#endif	
 	/*
 	 * Now, physmap contains a map of physical memory.
 	 */
 
 #ifdef SMP
 	/* make hole for AP bootstrap code */
 	physmap[1] = mp_bootaddress(physmap[1]);
 #endif
 
 	/*
 	 * Maxmem isn't the "maximum memory", it's one larger than the
 	 * highest page of the physical address space.  It should be
 	 * called something like "Maxphyspage".  We may adjust this 
 	 * based on ``hw.physmem'' and the results of the memory test.
 	 */
 	Maxmem = atop(physmap[physmap_idx + 1]);
 
 #ifdef MAXMEM
 	Maxmem = MAXMEM / 4;
 #endif
 
 	if (TUNABLE_QUAD_FETCH("hw.physmem", &physmem_tunable))
 		Maxmem = atop(physmem_tunable);
 
 	/*
 	 * If we have an SMAP, don't allow MAXMEM or hw.physmem to extend
 	 * the amount of memory in the system.
 	 */
 	if (has_smap && Maxmem > atop(physmap[physmap_idx + 1]))
 		Maxmem = atop(physmap[physmap_idx + 1]);
 
 	/*
 	 * By default enable the memory test on real hardware, and disable
 	 * it if we appear to be running in a VM.  This avoids touching all
 	 * pages unnecessarily, which doesn't matter on real hardware but is
 	 * bad for shared VM hosts.  Use a general name so that
 	 * one could eventually do more with the code than just disable it.
 	 */
 	memtest = (vm_guest > VM_GUEST_NO) ? 0 : 1;
 	TUNABLE_ULONG_FETCH("hw.memtest.tests", &memtest);
 
 	if (atop(physmap[physmap_idx + 1]) != Maxmem &&
 	    (boothowto & RB_VERBOSE))
 		printf("Physical memory use set to %ldK\n", Maxmem * 4);
 
 	/*
 	 * If Maxmem has been increased beyond what the system has detected,
 	 * extend the last memory segment to the new limit.
 	 */ 
 	if (atop(physmap[physmap_idx + 1]) < Maxmem)
 		physmap[physmap_idx + 1] = ptoa((vm_paddr_t)Maxmem);
 
 	/* call pmap initialization to make new kernel address space */
 	pmap_bootstrap(first);
 
 	/*
 	 * Size up each available chunk of physical memory.
 	 */
 	physmap[0] = PAGE_SIZE;		/* mask off page 0 */
 	pa_indx = 0;
 	da_indx = 1;
 	phys_avail[pa_indx++] = physmap[0];
 	phys_avail[pa_indx] = physmap[0];
 	dump_avail[da_indx] = physmap[0];
 	pte = CMAP3;
 
 	/*
 	 * Get dcons buffer address
 	 */
 	if (getenv_quad("dcons.addr", &dcons_addr) == 0 ||
 	    getenv_quad("dcons.size", &dcons_size) == 0)
 		dcons_addr = 0;
 
-#ifndef XEN
 	/*
 	 * physmap is in bytes, so when converting to page boundaries,
 	 * round up the start address and round down the end address.
 	 */
 	for (i = 0; i <= physmap_idx; i += 2) {
 		vm_paddr_t end;
 
 		end = ptoa((vm_paddr_t)Maxmem);
 		if (physmap[i + 1] < end)
 			end = trunc_page(physmap[i + 1]);
 		for (pa = round_page(physmap[i]); pa < end; pa += PAGE_SIZE) {
 			int tmp, page_bad, full;
 			int *ptr = (int *)CADDR3;
 
 			full = FALSE;
 			/*
 			 * block out kernel memory as not available.
 			 */
 			if (pa >= KERNLOAD && pa < first)
 				goto do_dump_avail;
 
 			/*
 			 * block out dcons buffer
 			 */
 			if (dcons_addr > 0
 			    && pa >= trunc_page(dcons_addr)
 			    && pa < dcons_addr + dcons_size)
 				goto do_dump_avail;
 
 			page_bad = FALSE;
 			if (memtest == 0)
 				goto skip_memtest;
 
 			/*
 			 * map page into kernel: valid, read/write,non-cacheable
 			 */
 			*pte = pa | PG_V | PG_RW | PG_N;
 			invltlb();
 
 			tmp = *(int *)ptr;
 			/*
 			 * Test for alternating 1's and 0's
 			 */
 			*(volatile int *)ptr = 0xaaaaaaaa;
 			if (*(volatile int *)ptr != 0xaaaaaaaa)
 				page_bad = TRUE;
 			/*
 			 * Test for alternating 0's and 1's
 			 */
 			*(volatile int *)ptr = 0x55555555;
 			if (*(volatile int *)ptr != 0x55555555)
 				page_bad = TRUE;
 			/*
 			 * Test for all 1's
 			 */
 			*(volatile int *)ptr = 0xffffffff;
 			if (*(volatile int *)ptr != 0xffffffff)
 				page_bad = TRUE;
 			/*
 			 * Test for all 0's
 			 */
 			*(volatile int *)ptr = 0x0;
 			if (*(volatile int *)ptr != 0x0)
 				page_bad = TRUE;
 			/*
 			 * Restore original value.
 			 */
 			*(int *)ptr = tmp;
 
 skip_memtest:
 			/*
 			 * Adjust array of valid/good pages.
 			 */
 			if (page_bad == TRUE)
 				continue;
 			/*
 			 * If this good page is a continuation of the
 			 * previous set of good pages, then just increase
 			 * the end pointer. Otherwise start a new chunk.
 			 * Note that "end" points one higher than end,
 			 * making the range >= start and < end.
 			 * If we're also doing a speculative memory
 			 * test and we at or past the end, bump up Maxmem
 			 * so that we keep going. The first bad page
 			 * will terminate the loop.
 			 */
 			if (phys_avail[pa_indx] == pa) {
 				phys_avail[pa_indx] += PAGE_SIZE;
 			} else {
 				pa_indx++;
 				if (pa_indx == PHYS_AVAIL_ARRAY_END) {
 					printf(
 		"Too many holes in the physical address space, giving up\n");
 					pa_indx--;
 					full = TRUE;
 					goto do_dump_avail;
 				}
 				phys_avail[pa_indx++] = pa;	/* start */
 				phys_avail[pa_indx] = pa + PAGE_SIZE; /* end */
 			}
 			physmem++;
 do_dump_avail:
 			if (dump_avail[da_indx] == pa) {
 				dump_avail[da_indx] += PAGE_SIZE;
 			} else {
 				da_indx++;
 				if (da_indx == DUMP_AVAIL_ARRAY_END) {
 					da_indx--;
 					goto do_next;
 				}
 				dump_avail[da_indx++] = pa;	/* start */
 				dump_avail[da_indx] = pa + PAGE_SIZE; /* end */
 			}
 do_next:
 			if (full)
 				break;
 		}
 	}
 	*pte = 0;
 	invltlb();
-#else
-	phys_avail[0] = physfree;
-	phys_avail[1] = xen_start_info->nr_pages*PAGE_SIZE;
-	dump_avail[0] = 0;	
-	dump_avail[1] = xen_start_info->nr_pages*PAGE_SIZE;
 	
-#endif
-	
 	/*
 	 * XXX
 	 * The last chunk must contain at least one page plus the message
 	 * buffer to avoid complicating other code (message buffer address
 	 * calculation, etc.).
 	 */
 	while (phys_avail[pa_indx - 1] + PAGE_SIZE +
 	    round_page(msgbufsize) >= phys_avail[pa_indx]) {
 		physmem -= atop(phys_avail[pa_indx] - phys_avail[pa_indx - 1]);
 		phys_avail[pa_indx--] = 0;
 		phys_avail[pa_indx--] = 0;
 	}
 
 	Maxmem = atop(phys_avail[pa_indx]);
 
 	/* Trim off space for the message buffer. */
 	phys_avail[pa_indx] -= round_page(msgbufsize);
 
 	/* Map the message buffer. */
 	for (off = 0; off < round_page(msgbufsize); off += PAGE_SIZE)
 		pmap_kenter((vm_offset_t)msgbufp + off, phys_avail[pa_indx] +
 		    off);
-
-	PT_UPDATES_FLUSH();
 }
 #endif /* PC98 */
 
-#ifdef XEN
-#define MTOPSIZE (1<<(14 + PAGE_SHIFT))
-
 register_t
 init386(first)
 	int first;
 {
-	unsigned long gdtmachpfn;
-	int error, gsel_tss, metadata_missing, x, pa;
-	struct pcpu *pc;
-#ifdef CPU_ENABLE_SSE
-	struct xstate_hdr *xhdr;
-#endif
-	struct callback_register event = {
-		.type = CALLBACKTYPE_event,
-		.address = {GSEL(GCODE_SEL, SEL_KPL), (unsigned long)Xhypervisor_callback },
-	};
-	struct callback_register failsafe = {
-		.type = CALLBACKTYPE_failsafe,
-		.address = {GSEL(GCODE_SEL, SEL_KPL), (unsigned long)failsafe_callback },
-	};
-
-	thread0.td_kstack = proc0kstack;
-	thread0.td_kstack_pages = KSTACK_PAGES;
-
-	/*
- 	 * This may be done better later if it gets more high level
- 	 * components in it. If so just link td->td_proc here.
-	 */
-	proc_linkup0(&proc0, &thread0);
-
-	metadata_missing = 0;
-	if (xen_start_info->mod_start) {
-		preload_metadata = (caddr_t)xen_start_info->mod_start;
-		preload_bootstrap_relocate(KERNBASE);
-	} else {
-		metadata_missing = 1;
-	}
-	if (envmode == 1)
-		kern_envp = static_env;
-	else if ((caddr_t)xen_start_info->cmd_line)
-	        kern_envp = xen_setbootenv((caddr_t)xen_start_info->cmd_line);
-
-	boothowto |= xen_boothowto(kern_envp);
-	
-	/* Init basic tunables, hz etc */
-	init_param1();
-
-	/*
-	 * XEN occupies a portion of the upper virtual address space 
-	 * At its base it manages an array mapping machine page frames 
-	 * to physical page frames - hence we need to be able to 
-	 * access 4GB - (64MB  - 4MB + 64k) 
-	 */
-	gdt_segs[GPRIV_SEL].ssd_limit = atop(HYPERVISOR_VIRT_START + MTOPSIZE);
-	gdt_segs[GUFS_SEL].ssd_limit = atop(HYPERVISOR_VIRT_START + MTOPSIZE);
-	gdt_segs[GUGS_SEL].ssd_limit = atop(HYPERVISOR_VIRT_START + MTOPSIZE);
-	gdt_segs[GCODE_SEL].ssd_limit = atop(HYPERVISOR_VIRT_START + MTOPSIZE);
-	gdt_segs[GDATA_SEL].ssd_limit = atop(HYPERVISOR_VIRT_START + MTOPSIZE);
-	gdt_segs[GUCODE_SEL].ssd_limit = atop(HYPERVISOR_VIRT_START + MTOPSIZE);
-	gdt_segs[GUDATA_SEL].ssd_limit = atop(HYPERVISOR_VIRT_START + MTOPSIZE);
-	gdt_segs[GBIOSLOWMEM_SEL].ssd_limit = atop(HYPERVISOR_VIRT_START + MTOPSIZE);
-
-	pc = &__pcpu[0];
-	gdt_segs[GPRIV_SEL].ssd_base = (int) pc;
-	gdt_segs[GPROC0_SEL].ssd_base = (int) &pc->pc_common_tss;
-
-	PT_SET_MA(gdt, xpmap_ptom(VTOP(gdt)) | PG_V | PG_RW);
-	bzero(gdt, PAGE_SIZE);
-	for (x = 0; x < NGDT; x++)
-		ssdtosd(&gdt_segs[x], &gdt[x].sd);
-
-	mtx_init(&dt_lock, "descriptor tables", NULL, MTX_SPIN);
-
-	gdtmachpfn = vtomach(gdt) >> PAGE_SHIFT;
-	PT_SET_MA(gdt, xpmap_ptom(VTOP(gdt)) | PG_V);
-	PANIC_IF(HYPERVISOR_set_gdt(&gdtmachpfn, 512) != 0);	
-	lgdt(&r_gdt);
-	gdtset = 1;
-
-	if ((error = HYPERVISOR_set_trap_table(trap_table)) != 0) {
-		panic("set_trap_table failed - error %d\n", error);
-	}
-	
-	error = HYPERVISOR_callback_op(CALLBACKOP_register, &event);
-	if (error == 0)
-		error = HYPERVISOR_callback_op(CALLBACKOP_register, &failsafe);
-#if	CONFIG_XEN_COMPAT <= 0x030002
-	if (error == -ENOXENSYS)
-		HYPERVISOR_set_callbacks(GSEL(GCODE_SEL, SEL_KPL),
-		    (unsigned long)Xhypervisor_callback,
-		    GSEL(GCODE_SEL, SEL_KPL), (unsigned long)failsafe_callback);
-#endif
-	pcpu_init(pc, 0, sizeof(struct pcpu));
-	for (pa = first; pa < first + DPCPU_SIZE; pa += PAGE_SIZE)
-		pmap_kenter(pa + KERNBASE, pa);
-	dpcpu_init((void *)(first + KERNBASE), 0);
-	first += DPCPU_SIZE;
-	physfree += DPCPU_SIZE;
-	init_first += DPCPU_SIZE / PAGE_SIZE;
-
-	PCPU_SET(prvspace, pc);
-	PCPU_SET(curthread, &thread0);
-
-	/*
-	 * Initialize mutexes.
-	 *
-	 * icu_lock: in order to allow an interrupt to occur in a critical
-	 * 	     section, to set pcpu->ipending (etc...) properly, we
-	 *	     must be able to get the icu lock, so it can't be
-	 *	     under witness.
-	 */
-	mutex_init();
-	mtx_init(&icu_lock, "icu", NULL, MTX_SPIN | MTX_NOWITNESS | MTX_NOPROFILE);
-
-	/* make ldt memory segments */
-	PT_SET_MA(ldt, xpmap_ptom(VTOP(ldt)) | PG_V | PG_RW);
-	bzero(ldt, PAGE_SIZE);
-	ldt_segs[LUCODE_SEL].ssd_limit = atop(0 - 1);
-	ldt_segs[LUDATA_SEL].ssd_limit = atop(0 - 1);
-	for (x = 0; x < sizeof ldt_segs / sizeof ldt_segs[0]; x++)
-		ssdtosd(&ldt_segs[x], &ldt[x].sd);
-
-	default_proc_ldt.ldt_base = (caddr_t)ldt;
-	default_proc_ldt.ldt_len = 6;
-	_default_ldt = (int)&default_proc_ldt;
-	PCPU_SET(currentldt, _default_ldt);
-	PT_SET_MA(ldt, *vtopte((unsigned long)ldt) & ~PG_RW);
-	xen_set_ldt((unsigned long) ldt, (sizeof ldt_segs / sizeof ldt_segs[0]));
-	
-#if defined(XEN_PRIVILEGED)
-	/*
-	 * Initialize the i8254 before the console so that console
-	 * initialization can use DELAY().
-	 */
-	i8254_init();
-#endif
-	
-	/*
-	 * Initialize the console before we print anything out.
-	 */
-	cninit();
-
-	if (metadata_missing)
-		printf("WARNING: loader(8) metadata is missing!\n");
-
-#ifdef DEV_ISA
-#ifdef DEV_ATPIC
-	elcr_probe();
-	atpic_startup();
-#else
-	/* Reset and mask the atpics and leave them shut down. */
-	atpic_reset();
-
-	/*
-	 * Point the ICU spurious interrupt vectors at the APIC spurious
-	 * interrupt handler.
-	 */
-	setidt(IDT_IO_INTS + 7, IDTVEC(spuriousint), SDT_SYS386IGT, SEL_KPL,
-	    GSEL(GCODE_SEL, SEL_KPL));
-	setidt(IDT_IO_INTS + 15, IDTVEC(spuriousint), SDT_SYS386IGT, SEL_KPL,
-	    GSEL(GCODE_SEL, SEL_KPL));
-#endif
-#endif
-
-#ifdef DDB
-	db_fetch_ksymtab(bootinfo.bi_symtab, bootinfo.bi_esymtab);
-#endif
-
-	kdb_init();
-
-#ifdef KDB
-	if (boothowto & RB_KDB)
-		kdb_enter(KDB_WHY_BOOTFLAGS, "Boot flags requested debugger");
-#endif
-
-	finishidentcpu();	/* Final stage of CPU initialization */
-	setidt(IDT_UD, &IDTVEC(ill),  SDT_SYS386TGT, SEL_KPL,
-	    GSEL(GCODE_SEL, SEL_KPL));
-	setidt(IDT_GP, &IDTVEC(prot),  SDT_SYS386TGT, SEL_KPL,
-	    GSEL(GCODE_SEL, SEL_KPL));
-	initializecpu();	/* Initialize CPU registers */
-	initializecpucache();
-
-	/* pointer to selector slot for %fs/%gs */
-	PCPU_SET(fsgs_gdt, &gdt[GUFS_SEL].sd);
-
-	dblfault_tss.tss_esp = dblfault_tss.tss_esp0 = dblfault_tss.tss_esp1 =
-	    dblfault_tss.tss_esp2 = (int)&dblfault_stack[sizeof(dblfault_stack)];
-	dblfault_tss.tss_ss = dblfault_tss.tss_ss0 = dblfault_tss.tss_ss1 =
-	    dblfault_tss.tss_ss2 = GSEL(GDATA_SEL, SEL_KPL);
-#if defined(PAE) || defined(PAE_TABLES)
-	dblfault_tss.tss_cr3 = (int)IdlePDPT;
-#else
-	dblfault_tss.tss_cr3 = (int)IdlePTD;
-#endif
-	dblfault_tss.tss_eip = (int)dblfault_handler;
-	dblfault_tss.tss_eflags = PSL_KERNEL;
-	dblfault_tss.tss_ds = dblfault_tss.tss_es =
-	    dblfault_tss.tss_gs = GSEL(GDATA_SEL, SEL_KPL);
-	dblfault_tss.tss_fs = GSEL(GPRIV_SEL, SEL_KPL);
-	dblfault_tss.tss_cs = GSEL(GCODE_SEL, SEL_KPL);
-	dblfault_tss.tss_ldt = GSEL(GLDT_SEL, SEL_KPL);
-
-	vm86_initialize();
-	getmemsize(first);
-	init_param2(physmem);
-
-	/* now running on new page tables, configured,and u/iom is accessible */
-
-	msgbufinit(msgbufp, msgbufsize);
-#ifdef DEV_NPX
-	npxinit(true);
-#endif
-	/*
-	 * Set up thread0 pcb after npxinit calculated pcb + fpu save
-	 * area size.  Zero out the extended state header in fpu save
-	 * area.
-	 */
-	thread0.td_pcb = get_pcb_td(&thread0);
-	bzero(get_pcb_user_save_td(&thread0), cpu_max_ext_state_size);
-#ifdef CPU_ENABLE_SSE
-	if (use_xsave) {
-		xhdr = (struct xstate_hdr *)(get_pcb_user_save_td(&thread0) +
-		    1);
-		xhdr->xstate_bv = xsave_mask;
-	}
-#endif
-	PCPU_SET(curpcb, thread0.td_pcb);
-	/* make an initial tss so cpu can get interrupt stack on syscall! */
-	/* Note: -16 is so we can grow the trapframe if we came from vm86 */
-	PCPU_SET(common_tss.tss_esp0, (vm_offset_t)thread0.td_pcb - 16);
-	PCPU_SET(common_tss.tss_ss0, GSEL(GDATA_SEL, SEL_KPL));
-	gsel_tss = GSEL(GPROC0_SEL, SEL_KPL);
-	HYPERVISOR_stack_switch(GSEL(GDATA_SEL, SEL_KPL),
-	    PCPU_GET(common_tss.tss_esp0));
-	
-	/* transfer to user mode */
-
-	_ucodesel = GSEL(GUCODE_SEL, SEL_UPL);
-	_udatasel = GSEL(GUDATA_SEL, SEL_UPL);
-
-	/* setup proc 0's pcb */
-	thread0.td_pcb->pcb_flags = 0;
-#if defined(PAE) || defined(PAE_TABLES)
-	thread0.td_pcb->pcb_cr3 = (int)IdlePDPT;
-#else
-	thread0.td_pcb->pcb_cr3 = (int)IdlePTD;
-#endif
-	thread0.td_pcb->pcb_ext = 0;
-	thread0.td_frame = &proc0_tf;
-	thread0.td_pcb->pcb_fsd = PCPU_GET(fsgs_gdt)[0];
-	thread0.td_pcb->pcb_gsd = PCPU_GET(fsgs_gdt)[1];
-
-	cpu_probe_amdc1e();
-
-	/* Location of kernel stack for locore */
-	return ((register_t)thread0.td_pcb);
-}
-
-#else
-register_t
-init386(first)
-	int first;
-{
 	struct gate_descriptor *gdp;
 	int gsel_tss, metadata_missing, x, pa;
 	struct pcpu *pc;
 #ifdef CPU_ENABLE_SSE
 	struct xstate_hdr *xhdr;
 #endif
 
 	thread0.td_kstack = proc0kstack;
 	thread0.td_kstack_pages = KSTACK_PAGES;
 
 	/*
  	 * This may be done better later if it gets more high level
  	 * components in it. If so just link td->td_proc here.
 	 */
 	proc_linkup0(&proc0, &thread0);
 
 #ifdef PC98
 	/*
 	 * Initialize DMAC
 	 */
 	pc98_init_dmac();
 #endif
 
 	metadata_missing = 0;
 	if (bootinfo.bi_modulep) {
 		preload_metadata = (caddr_t)bootinfo.bi_modulep + KERNBASE;
 		preload_bootstrap_relocate(KERNBASE);
 	} else {
 		metadata_missing = 1;
 	}
 	if (envmode == 1)
 		kern_envp = static_env;
 	else if (bootinfo.bi_envp)
 		kern_envp = (caddr_t)bootinfo.bi_envp + KERNBASE;
 
 	/* Init basic tunables, hz etc */
 	init_param1();
 
 	/*
 	 * Make gdt memory segments.  All segments cover the full 4GB
 	 * of address space and permissions are enforced at page level.
 	 */
 	gdt_segs[GCODE_SEL].ssd_limit = atop(0 - 1);
 	gdt_segs[GDATA_SEL].ssd_limit = atop(0 - 1);
 	gdt_segs[GUCODE_SEL].ssd_limit = atop(0 - 1);
 	gdt_segs[GUDATA_SEL].ssd_limit = atop(0 - 1);
 	gdt_segs[GUFS_SEL].ssd_limit = atop(0 - 1);
 	gdt_segs[GUGS_SEL].ssd_limit = atop(0 - 1);
 
 	pc = &__pcpu[0];
 	gdt_segs[GPRIV_SEL].ssd_limit = atop(0 - 1);
 	gdt_segs[GPRIV_SEL].ssd_base = (int) pc;
 	gdt_segs[GPROC0_SEL].ssd_base = (int) &pc->pc_common_tss;
 
 	for (x = 0; x < NGDT; x++)
 		ssdtosd(&gdt_segs[x], &gdt[x].sd);
 
 	r_gdt.rd_limit = NGDT * sizeof(gdt[0]) - 1;
 	r_gdt.rd_base =  (int) gdt;
 	mtx_init(&dt_lock, "descriptor tables", NULL, MTX_SPIN);
 	lgdt(&r_gdt);
 
 	pcpu_init(pc, 0, sizeof(struct pcpu));
 	for (pa = first; pa < first + DPCPU_SIZE; pa += PAGE_SIZE)
 		pmap_kenter(pa + KERNBASE, pa);
 	dpcpu_init((void *)(first + KERNBASE), 0);
 	first += DPCPU_SIZE;
 	PCPU_SET(prvspace, pc);
 	PCPU_SET(curthread, &thread0);
 
 	/*
 	 * Initialize mutexes.
 	 *
 	 * icu_lock: in order to allow an interrupt to occur in a critical
 	 * 	     section, to set pcpu->ipending (etc...) properly, we
 	 *	     must be able to get the icu lock, so it can't be
 	 *	     under witness.
 	 */
 	mutex_init();
 	mtx_init(&icu_lock, "icu", NULL, MTX_SPIN | MTX_NOWITNESS | MTX_NOPROFILE);
 
 	/* make ldt memory segments */
 	ldt_segs[LUCODE_SEL].ssd_limit = atop(0 - 1);
 	ldt_segs[LUDATA_SEL].ssd_limit = atop(0 - 1);
 	for (x = 0; x < sizeof ldt_segs / sizeof ldt_segs[0]; x++)
 		ssdtosd(&ldt_segs[x], &ldt[x].sd);
 
 	_default_ldt = GSEL(GLDT_SEL, SEL_KPL);
 	lldt(_default_ldt);
 	PCPU_SET(currentldt, _default_ldt);
 
 	/* exceptions */
 	for (x = 0; x < NIDT; x++)
 		setidt(x, &IDTVEC(rsvd), SDT_SYS386TGT, SEL_KPL,
 		    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(IDT_DE, &IDTVEC(div),  SDT_SYS386TGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(IDT_DB, &IDTVEC(dbg),  SDT_SYS386IGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(IDT_NMI, &IDTVEC(nmi),  SDT_SYS386IGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
  	setidt(IDT_BP, &IDTVEC(bpt),  SDT_SYS386IGT, SEL_UPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(IDT_OF, &IDTVEC(ofl),  SDT_SYS386TGT, SEL_UPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(IDT_BR, &IDTVEC(bnd),  SDT_SYS386TGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(IDT_UD, &IDTVEC(ill),  SDT_SYS386TGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(IDT_NM, &IDTVEC(dna),  SDT_SYS386TGT, SEL_KPL
 	    , GSEL(GCODE_SEL, SEL_KPL));
 	setidt(IDT_DF, 0,  SDT_SYSTASKGT, SEL_KPL, GSEL(GPANIC_SEL, SEL_KPL));
 	setidt(IDT_FPUGP, &IDTVEC(fpusegm),  SDT_SYS386TGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(IDT_TS, &IDTVEC(tss),  SDT_SYS386TGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(IDT_NP, &IDTVEC(missing),  SDT_SYS386TGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(IDT_SS, &IDTVEC(stk),  SDT_SYS386TGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(IDT_GP, &IDTVEC(prot),  SDT_SYS386TGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(IDT_PF, &IDTVEC(page),  SDT_SYS386IGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(IDT_MF, &IDTVEC(fpu),  SDT_SYS386TGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(IDT_AC, &IDTVEC(align), SDT_SYS386TGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(IDT_MC, &IDTVEC(mchk),  SDT_SYS386TGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(IDT_XF, &IDTVEC(xmm), SDT_SYS386TGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
  	setidt(IDT_SYSCALL, &IDTVEC(int0x80_syscall), SDT_SYS386TGT, SEL_UPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 #ifdef KDTRACE_HOOKS
 	setidt(IDT_DTRACE_RET, &IDTVEC(dtrace_ret), SDT_SYS386TGT, SEL_UPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 #endif
 #ifdef XENHVM
 	setidt(IDT_EVTCHN, &IDTVEC(xen_intr_upcall), SDT_SYS386IGT, SEL_UPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 #endif
 
 	r_idt.rd_limit = sizeof(idt0) - 1;
 	r_idt.rd_base = (int) idt;
 	lidt(&r_idt);
 
 #ifdef XBOX
 	/*
 	 * The following code queries the PCI ID of 0:0:0. For the XBOX,
 	 * This should be 0x10de / 0x02a5.
 	 *
 	 * This is exactly what Linux does.
 	 */
 	outl(0xcf8, 0x80000000);
 	if (inl(0xcfc) == 0x02a510de) {
 		arch_i386_is_xbox = 1;
 		pic16l_setled(XBOX_LED_GREEN);
 
 		/*
 		 * We are an XBOX, but we may have either 64MB or 128MB of
 		 * memory. The PCI host bridge should be programmed for this,
 		 * so we just query it. 
 		 */
 		outl(0xcf8, 0x80000084);
 		arch_i386_xbox_memsize = (inl(0xcfc) == 0x7FFFFFF) ? 128 : 64;
 	}
 #endif /* XBOX */
 
 	/*
 	 * Initialize the clock before the console so that console
 	 * initialization can use DELAY().
 	 */
 	clock_init();
 
 	/*
 	 * Initialize the console before we print anything out.
 	 */
 	cninit();
 
 	if (metadata_missing)
 		printf("WARNING: loader(8) metadata is missing!\n");
 
 #ifdef DEV_ISA
 #ifdef DEV_ATPIC
 #ifndef PC98
 	elcr_probe();
 #endif
 	atpic_startup();
 #else
 	/* Reset and mask the atpics and leave them shut down. */
 	atpic_reset();
 
 	/*
 	 * Point the ICU spurious interrupt vectors at the APIC spurious
 	 * interrupt handler.
 	 */
 	setidt(IDT_IO_INTS + 7, IDTVEC(spuriousint), SDT_SYS386IGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(IDT_IO_INTS + 15, IDTVEC(spuriousint), SDT_SYS386IGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 #endif
 #endif
 
 #ifdef DDB
 	db_fetch_ksymtab(bootinfo.bi_symtab, bootinfo.bi_esymtab);
 #endif
 
 	kdb_init();
 
 #ifdef KDB
 	if (boothowto & RB_KDB)
 		kdb_enter(KDB_WHY_BOOTFLAGS, "Boot flags requested debugger");
 #endif
 
 	finishidentcpu();	/* Final stage of CPU initialization */
 	setidt(IDT_UD, &IDTVEC(ill),  SDT_SYS386TGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(IDT_GP, &IDTVEC(prot),  SDT_SYS386TGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	initializecpu();	/* Initialize CPU registers */
 	initializecpucache();
 
 	/* pointer to selector slot for %fs/%gs */
 	PCPU_SET(fsgs_gdt, &gdt[GUFS_SEL].sd);
 
 	dblfault_tss.tss_esp = dblfault_tss.tss_esp0 = dblfault_tss.tss_esp1 =
 	    dblfault_tss.tss_esp2 = (int)&dblfault_stack[sizeof(dblfault_stack)];
 	dblfault_tss.tss_ss = dblfault_tss.tss_ss0 = dblfault_tss.tss_ss1 =
 	    dblfault_tss.tss_ss2 = GSEL(GDATA_SEL, SEL_KPL);
 #if defined(PAE) || defined(PAE_TABLES)
 	dblfault_tss.tss_cr3 = (int)IdlePDPT;
 #else
 	dblfault_tss.tss_cr3 = (int)IdlePTD;
 #endif
 	dblfault_tss.tss_eip = (int)dblfault_handler;
 	dblfault_tss.tss_eflags = PSL_KERNEL;
 	dblfault_tss.tss_ds = dblfault_tss.tss_es =
 	    dblfault_tss.tss_gs = GSEL(GDATA_SEL, SEL_KPL);
 	dblfault_tss.tss_fs = GSEL(GPRIV_SEL, SEL_KPL);
 	dblfault_tss.tss_cs = GSEL(GCODE_SEL, SEL_KPL);
 	dblfault_tss.tss_ldt = GSEL(GLDT_SEL, SEL_KPL);
 
 	vm86_initialize();
 	getmemsize(first);
 	init_param2(physmem);
 
 	/* now running on new page tables, configured,and u/iom is accessible */
 
 	msgbufinit(msgbufp, msgbufsize);
 #ifdef DEV_NPX
 	npxinit(true);
 #endif
 	/*
 	 * Set up thread0 pcb after npxinit calculated pcb + fpu save
 	 * area size.  Zero out the extended state header in fpu save
 	 * area.
 	 */
 	thread0.td_pcb = get_pcb_td(&thread0);
 	bzero(get_pcb_user_save_td(&thread0), cpu_max_ext_state_size);
 #ifdef CPU_ENABLE_SSE
 	if (use_xsave) {
 		xhdr = (struct xstate_hdr *)(get_pcb_user_save_td(&thread0) +
 		    1);
 		xhdr->xstate_bv = xsave_mask;
 	}
 #endif
 	PCPU_SET(curpcb, thread0.td_pcb);
 	/* make an initial tss so cpu can get interrupt stack on syscall! */
 	/* Note: -16 is so we can grow the trapframe if we came from vm86 */
 	PCPU_SET(common_tss.tss_esp0, (vm_offset_t)thread0.td_pcb - 16);
 	PCPU_SET(common_tss.tss_ss0, GSEL(GDATA_SEL, SEL_KPL));
 	gsel_tss = GSEL(GPROC0_SEL, SEL_KPL);
 	PCPU_SET(tss_gdt, &gdt[GPROC0_SEL].sd);
 	PCPU_SET(common_tssd, *PCPU_GET(tss_gdt));
 	PCPU_SET(common_tss.tss_ioopt, (sizeof (struct i386tss)) << 16);
 	ltr(gsel_tss);
 
 	/* make a call gate to reenter kernel with */
 	gdp = &ldt[LSYS5CALLS_SEL].gd;
 
 	x = (int) &IDTVEC(lcall_syscall);
 	gdp->gd_looffset = x;
 	gdp->gd_selector = GSEL(GCODE_SEL,SEL_KPL);
 	gdp->gd_stkcpy = 1;
 	gdp->gd_type = SDT_SYS386CGT;
 	gdp->gd_dpl = SEL_UPL;
 	gdp->gd_p = 1;
 	gdp->gd_hioffset = x >> 16;
 
 	/* XXX does this work? */
 	/* XXX yes! */
 	ldt[LBSDICALLS_SEL] = ldt[LSYS5CALLS_SEL];
 	ldt[LSOL26CALLS_SEL] = ldt[LSYS5CALLS_SEL];
 
 	/* transfer to user mode */
 
 	_ucodesel = GSEL(GUCODE_SEL, SEL_UPL);
 	_udatasel = GSEL(GUDATA_SEL, SEL_UPL);
 
 	/* setup proc 0's pcb */
 	thread0.td_pcb->pcb_flags = 0;
 #if defined(PAE) || defined(PAE_TABLES)
 	thread0.td_pcb->pcb_cr3 = (int)IdlePDPT;
 #else
 	thread0.td_pcb->pcb_cr3 = (int)IdlePTD;
 #endif
 	thread0.td_pcb->pcb_ext = 0;
 	thread0.td_frame = &proc0_tf;
 
 	cpu_probe_amdc1e();
 
 #ifdef FDT
 	x86_init_fdt();
 #endif
 
 	/* Location of kernel stack for locore */
 	return ((register_t)thread0.td_pcb);
 }
-#endif
 
 void
 cpu_pcpu_init(struct pcpu *pcpu, int cpuid, size_t size)
 {
 
 	pcpu->pc_acpi_id = 0xffffffff;
 }
 
 #ifndef PC98
 static int
 smap_sysctl_handler(SYSCTL_HANDLER_ARGS)
 {
 	struct bios_smap *smapbase;
 	struct bios_smap_xattr smap;
 	caddr_t kmdp;
 	uint32_t *smapattr;
 	int count, error, i;
 
 	/* Retrieve the system memory map from the loader. */
 	kmdp = preload_search_by_type("elf kernel");
 	if (kmdp == NULL)
 		kmdp = preload_search_by_type("elf32 kernel");
 	if (kmdp == NULL)
 		return (0);
 	smapbase = (struct bios_smap *)preload_search_info(kmdp,
 	    MODINFO_METADATA | MODINFOMD_SMAP);
 	if (smapbase == NULL)
 		return (0);
 	smapattr = (uint32_t *)preload_search_info(kmdp,
 	    MODINFO_METADATA | MODINFOMD_SMAP_XATTR);
 	count = *((u_int32_t *)smapbase - 1) / sizeof(*smapbase);
 	error = 0;
 	for (i = 0; i < count; i++) {
 		smap.base = smapbase[i].base;
 		smap.length = smapbase[i].length;
 		smap.type = smapbase[i].type;
 		if (smapattr != NULL)
 			smap.xattr = smapattr[i];
 		else
 			smap.xattr = 0;
 		error = SYSCTL_OUT(req, &smap, sizeof(smap));
 	}
 	return (error);
 }
 SYSCTL_PROC(_machdep, OID_AUTO, smap, CTLTYPE_OPAQUE|CTLFLAG_RD, NULL, 0,
     smap_sysctl_handler, "S,bios_smap_xattr", "Raw BIOS SMAP data");
 #endif /* !PC98 */
 
 void
 spinlock_enter(void)
 {
 	struct thread *td;
 	register_t flags;
 
 	td = curthread;
 	if (td->td_md.md_spinlock_count == 0) {
 		flags = intr_disable();
 		td->td_md.md_spinlock_count = 1;
 		td->td_md.md_saved_flags = flags;
 	} else
 		td->td_md.md_spinlock_count++;
 	critical_enter();
 }
 
 void
 spinlock_exit(void)
 {
 	struct thread *td;
 	register_t flags;
 
 	td = curthread;
 	critical_exit();
 	flags = td->td_md.md_saved_flags;
 	td->td_md.md_spinlock_count--;
 	if (td->td_md.md_spinlock_count == 0)
 		intr_restore(flags);
 }
 
 #if defined(I586_CPU) && !defined(NO_F00F_HACK)
 static void f00f_hack(void *unused);
 SYSINIT(f00f_hack, SI_SUB_INTRINSIC, SI_ORDER_FIRST, f00f_hack, NULL);
 
 static void
 f00f_hack(void *unused)
 {
 	struct gate_descriptor *new_idt;
 	vm_offset_t tmp;
 
 	if (!has_f00f_bug)
 		return;
 
 	GIANT_REQUIRED;
 
 	printf("Intel Pentium detected, installing workaround for F00F bug\n");
 
 	tmp = kmem_malloc(kernel_arena, PAGE_SIZE * 2, M_WAITOK | M_ZERO);
 	if (tmp == 0)
 		panic("kmem_malloc returned 0");
 
 	/* Put the problematic entry (#6) at the end of the lower page. */
 	new_idt = (struct gate_descriptor*)
 	    (tmp + PAGE_SIZE - 7 * sizeof(struct gate_descriptor));
 	bcopy(idt, new_idt, sizeof(idt0));
 	r_idt.rd_base = (u_int)new_idt;
 	lidt(&r_idt);
 	idt = new_idt;
 	pmap_protect(kernel_pmap, tmp, tmp + PAGE_SIZE, VM_PROT_READ);
 }
 #endif /* defined(I586_CPU) && !NO_F00F_HACK */
 
 /*
  * Construct a PCB from a trapframe. This is called from kdb_trap() where
  * we want to start a backtrace from the function that caused us to enter
  * the debugger. We have the context in the trapframe, but base the trace
  * on the PCB. The PCB doesn't have to be perfect, as long as it contains
  * enough for a backtrace.
  */
 void
 makectx(struct trapframe *tf, struct pcb *pcb)
 {
 
 	pcb->pcb_edi = tf->tf_edi;
 	pcb->pcb_esi = tf->tf_esi;
 	pcb->pcb_ebp = tf->tf_ebp;
 	pcb->pcb_ebx = tf->tf_ebx;
 	pcb->pcb_eip = tf->tf_eip;
 	pcb->pcb_esp = (ISPL(tf->tf_cs)) ? tf->tf_esp : (int)(tf + 1) - 8;
 }
 
 int
 ptrace_set_pc(struct thread *td, u_long addr)
 {
 
 	td->td_frame->tf_eip = addr;
 	return (0);
 }
 
 int
 ptrace_single_step(struct thread *td)
 {
 	td->td_frame->tf_eflags |= PSL_T;
 	return (0);
 }
 
 int
 ptrace_clear_single_step(struct thread *td)
 {
 	td->td_frame->tf_eflags &= ~PSL_T;
 	return (0);
 }
 
 int
 fill_regs(struct thread *td, struct reg *regs)
 {
 	struct pcb *pcb;
 	struct trapframe *tp;
 
 	tp = td->td_frame;
 	pcb = td->td_pcb;
 	regs->r_gs = pcb->pcb_gs;
 	return (fill_frame_regs(tp, regs));
 }
 
 int
 fill_frame_regs(struct trapframe *tp, struct reg *regs)
 {
 	regs->r_fs = tp->tf_fs;
 	regs->r_es = tp->tf_es;
 	regs->r_ds = tp->tf_ds;
 	regs->r_edi = tp->tf_edi;
 	regs->r_esi = tp->tf_esi;
 	regs->r_ebp = tp->tf_ebp;
 	regs->r_ebx = tp->tf_ebx;
 	regs->r_edx = tp->tf_edx;
 	regs->r_ecx = tp->tf_ecx;
 	regs->r_eax = tp->tf_eax;
 	regs->r_eip = tp->tf_eip;
 	regs->r_cs = tp->tf_cs;
 	regs->r_eflags = tp->tf_eflags;
 	regs->r_esp = tp->tf_esp;
 	regs->r_ss = tp->tf_ss;
 	return (0);
 }
 
 int
 set_regs(struct thread *td, struct reg *regs)
 {
 	struct pcb *pcb;
 	struct trapframe *tp;
 
 	tp = td->td_frame;
 	if (!EFL_SECURE(regs->r_eflags, tp->tf_eflags) ||
 	    !CS_SECURE(regs->r_cs))
 		return (EINVAL);
 	pcb = td->td_pcb;
 	tp->tf_fs = regs->r_fs;
 	tp->tf_es = regs->r_es;
 	tp->tf_ds = regs->r_ds;
 	tp->tf_edi = regs->r_edi;
 	tp->tf_esi = regs->r_esi;
 	tp->tf_ebp = regs->r_ebp;
 	tp->tf_ebx = regs->r_ebx;
 	tp->tf_edx = regs->r_edx;
 	tp->tf_ecx = regs->r_ecx;
 	tp->tf_eax = regs->r_eax;
 	tp->tf_eip = regs->r_eip;
 	tp->tf_cs = regs->r_cs;
 	tp->tf_eflags = regs->r_eflags;
 	tp->tf_esp = regs->r_esp;
 	tp->tf_ss = regs->r_ss;
 	pcb->pcb_gs = regs->r_gs;
 	return (0);
 }
 
 #ifdef CPU_ENABLE_SSE
 static void
 fill_fpregs_xmm(sv_xmm, sv_87)
 	struct savexmm *sv_xmm;
 	struct save87 *sv_87;
 {
 	register struct env87 *penv_87 = &sv_87->sv_env;
 	register struct envxmm *penv_xmm = &sv_xmm->sv_env;
 	int i;
 
 	bzero(sv_87, sizeof(*sv_87));
 
 	/* FPU control/status */
 	penv_87->en_cw = penv_xmm->en_cw;
 	penv_87->en_sw = penv_xmm->en_sw;
 	penv_87->en_tw = penv_xmm->en_tw;
 	penv_87->en_fip = penv_xmm->en_fip;
 	penv_87->en_fcs = penv_xmm->en_fcs;
 	penv_87->en_opcode = penv_xmm->en_opcode;
 	penv_87->en_foo = penv_xmm->en_foo;
 	penv_87->en_fos = penv_xmm->en_fos;
 
 	/* FPU registers */
 	for (i = 0; i < 8; ++i)
 		sv_87->sv_ac[i] = sv_xmm->sv_fp[i].fp_acc;
 }
 
 static void
 set_fpregs_xmm(sv_87, sv_xmm)
 	struct save87 *sv_87;
 	struct savexmm *sv_xmm;
 {
 	register struct env87 *penv_87 = &sv_87->sv_env;
 	register struct envxmm *penv_xmm = &sv_xmm->sv_env;
 	int i;
 
 	/* FPU control/status */
 	penv_xmm->en_cw = penv_87->en_cw;
 	penv_xmm->en_sw = penv_87->en_sw;
 	penv_xmm->en_tw = penv_87->en_tw;
 	penv_xmm->en_fip = penv_87->en_fip;
 	penv_xmm->en_fcs = penv_87->en_fcs;
 	penv_xmm->en_opcode = penv_87->en_opcode;
 	penv_xmm->en_foo = penv_87->en_foo;
 	penv_xmm->en_fos = penv_87->en_fos;
 
 	/* FPU registers */
 	for (i = 0; i < 8; ++i)
 		sv_xmm->sv_fp[i].fp_acc = sv_87->sv_ac[i];
 }
 #endif /* CPU_ENABLE_SSE */
 
 int
 fill_fpregs(struct thread *td, struct fpreg *fpregs)
 {
 
 	KASSERT(td == curthread || TD_IS_SUSPENDED(td) ||
 	    P_SHOULDSTOP(td->td_proc),
 	    ("not suspended thread %p", td));
 #ifdef DEV_NPX
 	npxgetregs(td);
 #else
 	bzero(fpregs, sizeof(*fpregs));
 #endif
 #ifdef CPU_ENABLE_SSE
 	if (cpu_fxsr)
 		fill_fpregs_xmm(&get_pcb_user_save_td(td)->sv_xmm,
 		    (struct save87 *)fpregs);
 	else
 #endif /* CPU_ENABLE_SSE */
 		bcopy(&get_pcb_user_save_td(td)->sv_87, fpregs,
 		    sizeof(*fpregs));
 	return (0);
 }
 
 int
 set_fpregs(struct thread *td, struct fpreg *fpregs)
 {
 
 #ifdef CPU_ENABLE_SSE
 	if (cpu_fxsr)
 		set_fpregs_xmm((struct save87 *)fpregs,
 		    &get_pcb_user_save_td(td)->sv_xmm);
 	else
 #endif /* CPU_ENABLE_SSE */
 		bcopy(fpregs, &get_pcb_user_save_td(td)->sv_87,
 		    sizeof(*fpregs));
 #ifdef DEV_NPX
 	npxuserinited(td);
 #endif
 	return (0);
 }
 
 /*
  * Get machine context.
  */
 int
 get_mcontext(struct thread *td, mcontext_t *mcp, int flags)
 {
 	struct trapframe *tp;
 	struct segment_descriptor *sdp;
 
 	tp = td->td_frame;
 
 	PROC_LOCK(curthread->td_proc);
 	mcp->mc_onstack = sigonstack(tp->tf_esp);
 	PROC_UNLOCK(curthread->td_proc);
 	mcp->mc_gs = td->td_pcb->pcb_gs;
 	mcp->mc_fs = tp->tf_fs;
 	mcp->mc_es = tp->tf_es;
 	mcp->mc_ds = tp->tf_ds;
 	mcp->mc_edi = tp->tf_edi;
 	mcp->mc_esi = tp->tf_esi;
 	mcp->mc_ebp = tp->tf_ebp;
 	mcp->mc_isp = tp->tf_isp;
 	mcp->mc_eflags = tp->tf_eflags;
 	if (flags & GET_MC_CLEAR_RET) {
 		mcp->mc_eax = 0;
 		mcp->mc_edx = 0;
 		mcp->mc_eflags &= ~PSL_C;
 	} else {
 		mcp->mc_eax = tp->tf_eax;
 		mcp->mc_edx = tp->tf_edx;
 	}
 	mcp->mc_ebx = tp->tf_ebx;
 	mcp->mc_ecx = tp->tf_ecx;
 	mcp->mc_eip = tp->tf_eip;
 	mcp->mc_cs = tp->tf_cs;
 	mcp->mc_esp = tp->tf_esp;
 	mcp->mc_ss = tp->tf_ss;
 	mcp->mc_len = sizeof(*mcp);
 	get_fpcontext(td, mcp, NULL, 0);
 	sdp = &td->td_pcb->pcb_fsd;
 	mcp->mc_fsbase = sdp->sd_hibase << 24 | sdp->sd_lobase;
 	sdp = &td->td_pcb->pcb_gsd;
 	mcp->mc_gsbase = sdp->sd_hibase << 24 | sdp->sd_lobase;
 	mcp->mc_flags = 0;
 	mcp->mc_xfpustate = 0;
 	mcp->mc_xfpustate_len = 0;
 	bzero(mcp->mc_spare2, sizeof(mcp->mc_spare2));
 	return (0);
 }
 
 /*
  * Set machine context.
  *
  * However, we don't set any but the user modifiable flags, and we won't
  * touch the cs selector.
  */
 int
 set_mcontext(struct thread *td, mcontext_t *mcp)
 {
 	struct trapframe *tp;
 	char *xfpustate;
 	int eflags, ret;
 
 	tp = td->td_frame;
 	if (mcp->mc_len != sizeof(*mcp) ||
 	    (mcp->mc_flags & ~_MC_FLAG_MASK) != 0)
 		return (EINVAL);
 	eflags = (mcp->mc_eflags & PSL_USERCHANGE) |
 	    (tp->tf_eflags & ~PSL_USERCHANGE);
 	if (mcp->mc_flags & _MC_HASFPXSTATE) {
 		if (mcp->mc_xfpustate_len > cpu_max_ext_state_size -
 		    sizeof(union savefpu))
 			return (EINVAL);
 		xfpustate = __builtin_alloca(mcp->mc_xfpustate_len);
 		ret = copyin((void *)mcp->mc_xfpustate, xfpustate,
 		    mcp->mc_xfpustate_len);
 		if (ret != 0)
 			return (ret);
 	} else
 		xfpustate = NULL;
 	ret = set_fpcontext(td, mcp, xfpustate, mcp->mc_xfpustate_len);
 	if (ret != 0)
 		return (ret);
 	tp->tf_fs = mcp->mc_fs;
 	tp->tf_es = mcp->mc_es;
 	tp->tf_ds = mcp->mc_ds;
 	tp->tf_edi = mcp->mc_edi;
 	tp->tf_esi = mcp->mc_esi;
 	tp->tf_ebp = mcp->mc_ebp;
 	tp->tf_ebx = mcp->mc_ebx;
 	tp->tf_edx = mcp->mc_edx;
 	tp->tf_ecx = mcp->mc_ecx;
 	tp->tf_eax = mcp->mc_eax;
 	tp->tf_eip = mcp->mc_eip;
 	tp->tf_eflags = eflags;
 	tp->tf_esp = mcp->mc_esp;
 	tp->tf_ss = mcp->mc_ss;
 	td->td_pcb->pcb_gs = mcp->mc_gs;
 	return (0);
 }
 
 static void
 get_fpcontext(struct thread *td, mcontext_t *mcp, char *xfpusave,
     size_t xfpusave_len)
 {
 #ifdef CPU_ENABLE_SSE
 	size_t max_len, len;
 #endif
 
 #ifndef DEV_NPX
 	mcp->mc_fpformat = _MC_FPFMT_NODEV;
 	mcp->mc_ownedfp = _MC_FPOWNED_NONE;
 	bzero(mcp->mc_fpstate, sizeof(mcp->mc_fpstate));
 #else
 	mcp->mc_ownedfp = npxgetregs(td);
 	bcopy(get_pcb_user_save_td(td), &mcp->mc_fpstate[0],
 	    sizeof(mcp->mc_fpstate));
 	mcp->mc_fpformat = npxformat();
 #ifdef CPU_ENABLE_SSE
 	if (!use_xsave || xfpusave_len == 0)
 		return;
 	max_len = cpu_max_ext_state_size - sizeof(union savefpu);
 	len = xfpusave_len;
 	if (len > max_len) {
 		len = max_len;
 		bzero(xfpusave + max_len, len - max_len);
 	}
 	mcp->mc_flags |= _MC_HASFPXSTATE;
 	mcp->mc_xfpustate_len = len;
 	bcopy(get_pcb_user_save_td(td) + 1, xfpusave, len);
 #endif
 #endif
 }
 
 static int
 set_fpcontext(struct thread *td, mcontext_t *mcp, char *xfpustate,
     size_t xfpustate_len)
 {
 	union savefpu *fpstate;
 	int error;
 
 	if (mcp->mc_fpformat == _MC_FPFMT_NODEV)
 		return (0);
 	else if (mcp->mc_fpformat != _MC_FPFMT_387 &&
 	    mcp->mc_fpformat != _MC_FPFMT_XMM)
 		return (EINVAL);
 	else if (mcp->mc_ownedfp == _MC_FPOWNED_NONE) {
 		/* We don't care what state is left in the FPU or PCB. */
 		fpstate_drop(td);
 		error = 0;
 	} else if (mcp->mc_ownedfp == _MC_FPOWNED_FPU ||
 	    mcp->mc_ownedfp == _MC_FPOWNED_PCB) {
 #ifdef DEV_NPX
 		fpstate = (union savefpu *)&mcp->mc_fpstate;
 #ifdef CPU_ENABLE_SSE
 		if (cpu_fxsr)
 			fpstate->sv_xmm.sv_env.en_mxcsr &= cpu_mxcsr_mask;
 #endif
 		error = npxsetregs(td, fpstate, xfpustate, xfpustate_len);
 #else
 		error = EINVAL;
 #endif
 	} else
 		return (EINVAL);
 	return (error);
 }
 
 static void
 fpstate_drop(struct thread *td)
 {
 
 	KASSERT(PCB_USER_FPU(td->td_pcb), ("fpstate_drop: kernel-owned fpu"));
 	critical_enter();
 #ifdef DEV_NPX
 	if (PCPU_GET(fpcurthread) == td)
 		npxdrop();
 #endif
 	/*
 	 * XXX force a full drop of the npx.  The above only drops it if we
 	 * owned it.  npxgetregs() has the same bug in the !cpu_fxsr case.
 	 *
 	 * XXX I don't much like npxgetregs()'s semantics of doing a full
 	 * drop.  Dropping only to the pcb matches fnsave's behaviour.
 	 * We only need to drop to !PCB_INITDONE in sendsig().  But
 	 * sendsig() is the only caller of npxgetregs()... perhaps we just
 	 * have too many layers.
 	 */
 	curthread->td_pcb->pcb_flags &= ~(PCB_NPXINITDONE |
 	    PCB_NPXUSERINITDONE);
 	critical_exit();
 }
 
 int
 fill_dbregs(struct thread *td, struct dbreg *dbregs)
 {
 	struct pcb *pcb;
 
 	if (td == NULL) {
 		dbregs->dr[0] = rdr0();
 		dbregs->dr[1] = rdr1();
 		dbregs->dr[2] = rdr2();
 		dbregs->dr[3] = rdr3();
 		dbregs->dr[4] = rdr4();
 		dbregs->dr[5] = rdr5();
 		dbregs->dr[6] = rdr6();
 		dbregs->dr[7] = rdr7();
 	} else {
 		pcb = td->td_pcb;
 		dbregs->dr[0] = pcb->pcb_dr0;
 		dbregs->dr[1] = pcb->pcb_dr1;
 		dbregs->dr[2] = pcb->pcb_dr2;
 		dbregs->dr[3] = pcb->pcb_dr3;
 		dbregs->dr[4] = 0;
 		dbregs->dr[5] = 0;
 		dbregs->dr[6] = pcb->pcb_dr6;
 		dbregs->dr[7] = pcb->pcb_dr7;
 	}
 	return (0);
 }
 
 int
 set_dbregs(struct thread *td, struct dbreg *dbregs)
 {
 	struct pcb *pcb;
 	int i;
 
 	if (td == NULL) {
 		load_dr0(dbregs->dr[0]);
 		load_dr1(dbregs->dr[1]);
 		load_dr2(dbregs->dr[2]);
 		load_dr3(dbregs->dr[3]);
 		load_dr4(dbregs->dr[4]);
 		load_dr5(dbregs->dr[5]);
 		load_dr6(dbregs->dr[6]);
 		load_dr7(dbregs->dr[7]);
 	} else {
 		/*
 		 * Don't let an illegal value for dr7 get set.	Specifically,
 		 * check for undefined settings.  Setting these bit patterns
 		 * result in undefined behaviour and can lead to an unexpected
 		 * TRCTRAP.
 		 */
 		for (i = 0; i < 4; i++) {
 			if (DBREG_DR7_ACCESS(dbregs->dr[7], i) == 0x02)
 				return (EINVAL);
 			if (DBREG_DR7_LEN(dbregs->dr[7], i) == 0x02)
 				return (EINVAL);
 		}
 		
 		pcb = td->td_pcb;
 		
 		/*
 		 * Don't let a process set a breakpoint that is not within the
 		 * process's address space.  If a process could do this, it
 		 * could halt the system by setting a breakpoint in the kernel
 		 * (if ddb was enabled).  Thus, we need to check to make sure
 		 * that no breakpoints are being enabled for addresses outside
 		 * process's address space.
 		 *
 		 * XXX - what about when the watched area of the user's
 		 * address space is written into from within the kernel
 		 * ... wouldn't that still cause a breakpoint to be generated
 		 * from within kernel mode?
 		 */
 
 		if (DBREG_DR7_ENABLED(dbregs->dr[7], 0)) {
 			/* dr0 is enabled */
 			if (dbregs->dr[0] >= VM_MAXUSER_ADDRESS)
 				return (EINVAL);
 		}
 			
 		if (DBREG_DR7_ENABLED(dbregs->dr[7], 1)) {
 			/* dr1 is enabled */
 			if (dbregs->dr[1] >= VM_MAXUSER_ADDRESS)
 				return (EINVAL);
 		}
 			
 		if (DBREG_DR7_ENABLED(dbregs->dr[7], 2)) {
 			/* dr2 is enabled */
 			if (dbregs->dr[2] >= VM_MAXUSER_ADDRESS)
 				return (EINVAL);
 		}
 			
 		if (DBREG_DR7_ENABLED(dbregs->dr[7], 3)) {
 			/* dr3 is enabled */
 			if (dbregs->dr[3] >= VM_MAXUSER_ADDRESS)
 				return (EINVAL);
 		}
 
 		pcb->pcb_dr0 = dbregs->dr[0];
 		pcb->pcb_dr1 = dbregs->dr[1];
 		pcb->pcb_dr2 = dbregs->dr[2];
 		pcb->pcb_dr3 = dbregs->dr[3];
 		pcb->pcb_dr6 = dbregs->dr[6];
 		pcb->pcb_dr7 = dbregs->dr[7];
 
 		pcb->pcb_flags |= PCB_DBREGS;
 	}
 
 	return (0);
 }
 
 /*
  * Return > 0 if a hardware breakpoint has been hit, and the
  * breakpoint was in user space.  Return 0, otherwise.
  */
 int
 user_dbreg_trap(void)
 {
         u_int32_t dr7, dr6; /* debug registers dr6 and dr7 */
         u_int32_t bp;       /* breakpoint bits extracted from dr6 */
         int nbp;            /* number of breakpoints that triggered */
         caddr_t addr[4];    /* breakpoint addresses */
         int i;
         
         dr7 = rdr7();
         if ((dr7 & 0x000000ff) == 0) {
                 /*
                  * all GE and LE bits in the dr7 register are zero,
                  * thus the trap couldn't have been caused by the
                  * hardware debug registers
                  */
                 return 0;
         }
 
         nbp = 0;
         dr6 = rdr6();
         bp = dr6 & 0x0000000f;
 
         if (!bp) {
                 /*
                  * None of the breakpoint bits are set meaning this
                  * trap was not caused by any of the debug registers
                  */
                 return 0;
         }
 
         /*
          * at least one of the breakpoints were hit, check to see
          * which ones and if any of them are user space addresses
          */
 
         if (bp & 0x01) {
                 addr[nbp++] = (caddr_t)rdr0();
         }
         if (bp & 0x02) {
                 addr[nbp++] = (caddr_t)rdr1();
         }
         if (bp & 0x04) {
                 addr[nbp++] = (caddr_t)rdr2();
         }
         if (bp & 0x08) {
                 addr[nbp++] = (caddr_t)rdr3();
         }
 
         for (i = 0; i < nbp; i++) {
                 if (addr[i] < (caddr_t)VM_MAXUSER_ADDRESS) {
                         /*
                          * addr[i] is in user space
                          */
                         return nbp;
                 }
         }
 
         /*
          * None of the breakpoints are in user space.
          */
         return 0;
 }
 
 #ifdef KDB
 
 /*
  * Provide inb() and outb() as functions.  They are normally only available as
  * inline functions, thus cannot be called from the debugger.
  */
 
 /* silence compiler warnings */
 u_char inb_(u_short);
 void outb_(u_short, u_char);
 
 u_char
 inb_(u_short port)
 {
 	return inb(port);
 }
 
 void
 outb_(u_short port, u_char data)
 {
 	outb(port, data);
 }
 
 #endif /* KDB */
Index: head/sys/i386/i386/minidump_machdep.c
===================================================================
--- head/sys/i386/i386/minidump_machdep.c	(revision 282273)
+++ head/sys/i386/i386/minidump_machdep.c	(revision 282274)
@@ -1,416 +1,396 @@
 /*-
  * Copyright (c) 2006 Peter Wemm
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  *
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_watchdog.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/conf.h>
 #include <sys/cons.h>
 #include <sys/kernel.h>
 #include <sys/kerneldump.h>
 #include <sys/msgbuf.h>
 #include <sys/watchdog.h>
 #include <vm/vm.h>
 #include <vm/pmap.h>
 #include <machine/atomic.h>
 #include <machine/elf.h>
 #include <machine/md_var.h>
 #include <machine/vmparam.h>
 #include <machine/minidump.h>
 
 CTASSERT(sizeof(struct kerneldumpheader) == 512);
 
 /*
  * Don't touch the first SIZEOF_METADATA bytes on the dump device. This
  * is to protect us from metadata and to protect metadata from us.
  */
 #define	SIZEOF_METADATA		(64*1024)
 
 #define	MD_ALIGN(x)	(((off_t)(x) + PAGE_MASK) & ~PAGE_MASK)
 #define	DEV_ALIGN(x)	(((off_t)(x) + (DEV_BSIZE-1)) & ~(DEV_BSIZE-1))
 
 uint32_t *vm_page_dump;
 int vm_page_dump_size;
 
 static struct kerneldumpheader kdh;
 static off_t dumplo;
 
 /* Handle chunked writes. */
 static size_t fragsz;
 static void *dump_va;
 static uint64_t counter, progress;
 
 CTASSERT(sizeof(*vm_page_dump) == 4);
-#ifndef XEN
-#define xpmap_mtop(x) (x)
-#define xpmap_ptom(x) (x)
-#endif
 
 
 static int
 is_dumpable(vm_paddr_t pa)
 {
 	int i;
 
 	for (i = 0; dump_avail[i] != 0 || dump_avail[i + 1] != 0; i += 2) {
 		if (pa >= dump_avail[i] && pa < dump_avail[i + 1])
 			return (1);
 	}
 	return (0);
 }
 
 #define PG2MB(pgs) (((pgs) + (1 << 8) - 1) >> 8)
 
 static int
 blk_flush(struct dumperinfo *di)
 {
 	int error;
 
 	if (fragsz == 0)
 		return (0);
 
 	error = dump_write(di, dump_va, 0, dumplo, fragsz);
 	dumplo += fragsz;
 	fragsz = 0;
 	return (error);
 }
 
 static int
 blk_write(struct dumperinfo *di, char *ptr, vm_paddr_t pa, size_t sz)
 {
 	size_t len;
 	int error, i, c;
 	u_int maxdumpsz;
 
 	maxdumpsz = min(di->maxiosize, MAXDUMPPGS * PAGE_SIZE);
 	if (maxdumpsz == 0)	/* seatbelt */
 		maxdumpsz = PAGE_SIZE;
 	error = 0;
 	if ((sz % PAGE_SIZE) != 0) {
 		printf("size not page aligned\n");
 		return (EINVAL);
 	}
 	if (ptr != NULL && pa != 0) {
 		printf("cant have both va and pa!\n");
 		return (EINVAL);
 	}
 	if (pa != 0 && (((uintptr_t)ptr) % PAGE_SIZE) != 0) {
 		printf("address not page aligned\n");
 		return (EINVAL);
 	}
 	if (ptr != NULL) {
 		/* If we're doing a virtual dump, flush any pre-existing pa pages */
 		error = blk_flush(di);
 		if (error)
 			return (error);
 	}
 	while (sz) {
 		len = maxdumpsz - fragsz;
 		if (len > sz)
 			len = sz;
 		counter += len;
 		progress -= len;
 		if (counter >> 24) {
 			printf(" %lld", PG2MB(progress >> PAGE_SHIFT));
 			counter &= (1<<24) - 1;
 		}
 
 		wdog_kern_pat(WD_LASTVAL);
 
 		if (ptr) {
 			error = dump_write(di, ptr, 0, dumplo, len);
 			if (error)
 				return (error);
 			dumplo += len;
 			ptr += len;
 			sz -= len;
 		} else {
 			for (i = 0; i < len; i += PAGE_SIZE)
 				dump_va = pmap_kenter_temporary(pa + i, (i + fragsz) >> PAGE_SHIFT);
 			fragsz += len;
 			pa += len;
 			sz -= len;
 			if (fragsz == maxdumpsz) {
 				error = blk_flush(di);
 				if (error)
 					return (error);
 			}
 		}
 
 		/* Check for user abort. */
 		c = cncheckc();
 		if (c == 0x03)
 			return (ECANCELED);
 		if (c != -1)
 			printf(" (CTRL-C to abort) ");
 	}
 
 	return (0);
 }
 
 /* A fake page table page, to avoid having to handle both 4K and 2M pages */
 static pt_entry_t fakept[NPTEPG];
 
 int
 minidumpsys(struct dumperinfo *di)
 {
 	uint64_t dumpsize;
 	uint32_t ptesize;
 	vm_offset_t va;
 	int error;
 	uint32_t bits;
 	uint64_t pa;
 	pd_entry_t *pd;
 	pt_entry_t *pt;
 	int i, j, k, bit;
 	struct minidumphdr mdhdr;
 
 	counter = 0;
 	/* Walk page table pages, set bits in vm_page_dump */
 	ptesize = 0;
 	for (va = KERNBASE; va < kernel_vm_end; va += NBPDR) {
 		/*
 		 * We always write a page, even if it is zero. Each
 		 * page written corresponds to 2MB of space
 		 */
 		ptesize += PAGE_SIZE;
 		pd = (pd_entry_t *)((uintptr_t)IdlePTD + KERNBASE);	/* always mapped! */
 		j = va >> PDRSHIFT;
 		if ((pd[j] & (PG_PS | PG_V)) == (PG_PS | PG_V))  {
 			/* This is an entire 2M page. */
-			pa = xpmap_mtop(pd[j] & PG_PS_FRAME);
+			pa = pd[j] & PG_PS_FRAME;
 			for (k = 0; k < NPTEPG; k++) {
 				if (is_dumpable(pa))
 					dump_add_page(pa);
 				pa += PAGE_SIZE;
 			}
 			continue;
 		}
 		if ((pd[j] & PG_V) == PG_V) {
 			/* set bit for each valid page in this 2MB block */
-			pt = pmap_kenter_temporary(xpmap_mtop(pd[j] & PG_FRAME), 0);
+			pt = pmap_kenter_temporary(pd[j] & PG_FRAME, 0);
 			for (k = 0; k < NPTEPG; k++) {
 				if ((pt[k] & PG_V) == PG_V) {
-					pa = xpmap_mtop(pt[k] & PG_FRAME);
+					pa = pt[k] & PG_FRAME;
 					if (is_dumpable(pa))
 						dump_add_page(pa);
 				}
 			}
 		} else {
 			/* nothing, we're going to dump a null page */
 		}
 	}
 
 	/* Calculate dump size. */
 	dumpsize = ptesize;
 	dumpsize += round_page(msgbufp->msg_size);
 	dumpsize += round_page(vm_page_dump_size);
 	for (i = 0; i < vm_page_dump_size / sizeof(*vm_page_dump); i++) {
 		bits = vm_page_dump[i];
 		while (bits) {
 			bit = bsfl(bits);
 			pa = (((uint64_t)i * sizeof(*vm_page_dump) * NBBY) + bit) * PAGE_SIZE;
 			/* Clear out undumpable pages now if needed */
 			if (is_dumpable(pa)) {
 				dumpsize += PAGE_SIZE;
 			} else {
 				dump_drop_page(pa);
 			}
 			bits &= ~(1ul << bit);
 		}
 	}
 	dumpsize += PAGE_SIZE;
 
 	/* Determine dump offset on device. */
 	if (di->mediasize < SIZEOF_METADATA + dumpsize + sizeof(kdh) * 2) {
 		error = ENOSPC;
 		goto fail;
 	}
 	dumplo = di->mediaoffset + di->mediasize - dumpsize;
 	dumplo -= sizeof(kdh) * 2;
 	progress = dumpsize;
 
 	/* Initialize mdhdr */
 	bzero(&mdhdr, sizeof(mdhdr));
 	strcpy(mdhdr.magic, MINIDUMP_MAGIC);
 	mdhdr.version = MINIDUMP_VERSION;
 	mdhdr.msgbufsize = msgbufp->msg_size;
 	mdhdr.bitmapsize = vm_page_dump_size;
 	mdhdr.ptesize = ptesize;
 	mdhdr.kernbase = KERNBASE;
 #if defined(PAE) || defined(PAE_TABLES)
 	mdhdr.paemode = 1;
 #endif
 
 	mkdumpheader(&kdh, KERNELDUMPMAGIC, KERNELDUMP_I386_VERSION, dumpsize, di->blocksize);
 
 	printf("Physical memory: %ju MB\n", ptoa((uintmax_t)physmem) / 1048576);
 	printf("Dumping %llu MB:", (long long)dumpsize >> 20);
 
 	/* Dump leader */
 	error = dump_write(di, &kdh, 0, dumplo, sizeof(kdh));
 	if (error)
 		goto fail;
 	dumplo += sizeof(kdh);
 
 	/* Dump my header */
 	bzero(&fakept, sizeof(fakept));
 	bcopy(&mdhdr, &fakept, sizeof(mdhdr));
 	error = blk_write(di, (char *)&fakept, 0, PAGE_SIZE);
 	if (error)
 		goto fail;
 
 	/* Dump msgbuf up front */
 	error = blk_write(di, (char *)msgbufp->msg_ptr, 0, round_page(msgbufp->msg_size));
 	if (error)
 		goto fail;
 
 	/* Dump bitmap */
 	error = blk_write(di, (char *)vm_page_dump, 0, round_page(vm_page_dump_size));
 	if (error)
 		goto fail;
 
 	/* Dump kernel page table pages */
 	for (va = KERNBASE; va < kernel_vm_end; va += NBPDR) {
 		/* We always write a page, even if it is zero */
 		pd = (pd_entry_t *)((uintptr_t)IdlePTD + KERNBASE);	/* always mapped! */
 		j = va >> PDRSHIFT;
 		if ((pd[j] & (PG_PS | PG_V)) == (PG_PS | PG_V))  {
 			/* This is a single 2M block. Generate a fake PTP */
 			pa = pd[j] & PG_PS_FRAME;
 			for (k = 0; k < NPTEPG; k++) {
 				fakept[k] = (pa + (k * PAGE_SIZE)) | PG_V | PG_RW | PG_A | PG_M;
 			}
 			error = blk_write(di, (char *)&fakept, 0, PAGE_SIZE);
 			if (error)
 				goto fail;
 			/* flush, in case we reuse fakept in the same block */
 			error = blk_flush(di);
 			if (error)
 				goto fail;
 			continue;
 		}
 		if ((pd[j] & PG_V) == PG_V) {
-			pa = xpmap_mtop(pd[j] & PG_FRAME);
-#ifndef XEN
+			pa = pd[j] & PG_FRAME;
 			error = blk_write(di, 0, pa, PAGE_SIZE);
-#else
-			pt = pmap_kenter_temporary(pa, 0);
-			memcpy(fakept, pt, PAGE_SIZE);
-			for (i = 0; i < NPTEPG; i++) 
-				fakept[i] = xpmap_mtop(fakept[i]);
-			error = blk_write(di, (char *)&fakept, 0, PAGE_SIZE);
-			if (error)
-				goto fail;
-			/* flush, in case we reuse fakept in the same block */
-			error = blk_flush(di);
-			if (error)
-				goto fail;
-			bzero(fakept, sizeof(fakept));
-#endif			
-			
 			if (error)
 				goto fail;
 		} else {
 			bzero(fakept, sizeof(fakept));
 			error = blk_write(di, (char *)&fakept, 0, PAGE_SIZE);
 			if (error)
 				goto fail;
 			/* flush, in case we reuse fakept in the same block */
 			error = blk_flush(di);
 			if (error)
 				goto fail;
 		}
 	}
 
 	/* Dump memory chunks */
 	/* XXX cluster it up and use blk_dump() */
 	for (i = 0; i < vm_page_dump_size / sizeof(*vm_page_dump); i++) {
 		bits = vm_page_dump[i];
 		while (bits) {
 			bit = bsfl(bits);
 			pa = (((uint64_t)i * sizeof(*vm_page_dump) * NBBY) + bit) * PAGE_SIZE;
 			error = blk_write(di, 0, pa, PAGE_SIZE);
 			if (error)
 				goto fail;
 			bits &= ~(1ul << bit);
 		}
 	}
 
 	error = blk_flush(di);
 	if (error)
 		goto fail;
 
 	/* Dump trailer */
 	error = dump_write(di, &kdh, 0, dumplo, sizeof(kdh));
 	if (error)
 		goto fail;
 	dumplo += sizeof(kdh);
 
 	/* Signal completion, signoff and exit stage left. */
 	dump_write(di, NULL, 0, 0, 0);
 	printf("\nDump complete\n");
 	return (0);
 
  fail:
 	if (error < 0)
 		error = -error;
 
 	if (error == ECANCELED)
 		printf("\nDump aborted\n");
 	else if (error == ENOSPC)
 		printf("\nDump failed. Partition too small.\n");
 	else
 		printf("\n** DUMP FAILED (ERROR %d) **\n", error);
 	return (error);
 }
 
 void
 dump_add_page(vm_paddr_t pa)
 {
 	int idx, bit;
 
 	pa >>= PAGE_SHIFT;
 	idx = pa >> 5;		/* 2^5 = 32 */
 	bit = pa & 31;
 	atomic_set_int(&vm_page_dump[idx], 1ul << bit);
 }
 
 void
 dump_drop_page(vm_paddr_t pa)
 {
 	int idx, bit;
 
 	pa >>= PAGE_SHIFT;
 	idx = pa >> 5;		/* 2^5 = 32 */
 	bit = pa & 31;
 	atomic_clear_int(&vm_page_dump[idx], 1ul << bit);
 }
 
Index: head/sys/i386/i386/support.s
===================================================================
--- head/sys/i386/i386/support.s	(revision 282273)
+++ head/sys/i386/i386/support.s	(revision 282274)
@@ -1,837 +1,835 @@
 /*-
  * Copyright (c) 1993 The Regents of the University of California.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #include "opt_npx.h"
 
 #include <machine/asmacros.h>
 #include <machine/cputypes.h>
 #include <machine/intr_machdep.h>
 #include <machine/pmap.h>
 #include <machine/specialreg.h>
 
 #include "assym.s"
 
 #define IDXSHIFT	10
 
 	.text
 
 /*
  * bcopy family
  * void bzero(void *buf, u_int len)
  */
 ENTRY(bzero)
 	pushl	%edi
 	movl	8(%esp),%edi
 	movl	12(%esp),%ecx
 	xorl	%eax,%eax
 	shrl	$2,%ecx
 	cld
 	rep
 	stosl
 	movl	12(%esp),%ecx
 	andl	$3,%ecx
 	rep
 	stosb
 	popl	%edi
 	ret
 END(bzero)
 
 ENTRY(sse2_pagezero)
 	pushl	%ebx
 	movl	8(%esp),%ecx
 	movl	%ecx,%eax
 	addl	$4096,%eax
 	xor	%ebx,%ebx
 1:
 	movnti	%ebx,(%ecx)
 	addl	$4,%ecx
 	cmpl	%ecx,%eax
 	jne	1b
 	sfence
 	popl	%ebx
 	ret
 END(sse2_pagezero)
 
 ENTRY(i686_pagezero)
 	pushl	%edi
 	pushl	%ebx
 
 	movl	12(%esp),%edi
 	movl	$1024,%ecx
 	cld
 
 	ALIGN_TEXT
 1:
 	xorl	%eax,%eax
 	repe
 	scasl
 	jnz	2f
 
 	popl	%ebx
 	popl	%edi
 	ret
 
 	ALIGN_TEXT
 
 2:
 	incl	%ecx
 	subl	$4,%edi
 
 	movl	%ecx,%edx
 	cmpl	$16,%ecx
 
 	jge	3f
 
 	movl	%edi,%ebx
 	andl	$0x3f,%ebx
 	shrl	%ebx
 	shrl	%ebx
 	movl	$16,%ecx
 	subl	%ebx,%ecx
 
 3:
 	subl	%ecx,%edx
 	rep
 	stosl
 
 	movl	%edx,%ecx
 	testl	%edx,%edx
 	jnz	1b
 
 	popl	%ebx
 	popl	%edi
 	ret
 END(i686_pagezero)
 
 /* fillw(pat, base, cnt) */
 ENTRY(fillw)
 	pushl	%edi
 	movl	8(%esp),%eax
 	movl	12(%esp),%edi
 	movl	16(%esp),%ecx
 	cld
 	rep
 	stosw
 	popl	%edi
 	ret
 END(fillw)
 
 ENTRY(bcopyb)
 	pushl	%esi
 	pushl	%edi
 	movl	12(%esp),%esi
 	movl	16(%esp),%edi
 	movl	20(%esp),%ecx
 	movl	%edi,%eax
 	subl	%esi,%eax
 	cmpl	%ecx,%eax			/* overlapping && src < dst? */
 	jb	1f
 	cld					/* nope, copy forwards */
 	rep
 	movsb
 	popl	%edi
 	popl	%esi
 	ret
 
 	ALIGN_TEXT
 1:
 	addl	%ecx,%edi			/* copy backwards. */
 	addl	%ecx,%esi
 	decl	%edi
 	decl	%esi
 	std
 	rep
 	movsb
 	popl	%edi
 	popl	%esi
 	cld
 	ret
 END(bcopyb)
 
 /*
  * bcopy(src, dst, cnt)
  *  ws@tools.de     (Wolfgang Solfrank, TooLs GmbH) +49-228-985800
  */
 ENTRY(bcopy)
 	pushl	%ebp
 	movl	%esp,%ebp
 	pushl	%esi
 	pushl	%edi
 	movl	8(%ebp),%esi
 	movl	12(%ebp),%edi
 	movl	16(%ebp),%ecx
 
 	movl	%edi,%eax
 	subl	%esi,%eax
 	cmpl	%ecx,%eax			/* overlapping && src < dst? */
 	jb	1f
 
 	shrl	$2,%ecx				/* copy by 32-bit words */
 	cld					/* nope, copy forwards */
 	rep
 	movsl
 	movl	16(%ebp),%ecx
 	andl	$3,%ecx				/* any bytes left? */
 	rep
 	movsb
 	popl	%edi
 	popl	%esi
 	popl	%ebp
 	ret
 
 	ALIGN_TEXT
 1:
 	addl	%ecx,%edi			/* copy backwards */
 	addl	%ecx,%esi
 	decl	%edi
 	decl	%esi
 	andl	$3,%ecx				/* any fractional bytes? */
 	std
 	rep
 	movsb
 	movl	16(%ebp),%ecx			/* copy remainder by 32-bit words */
 	shrl	$2,%ecx
 	subl	$3,%esi
 	subl	$3,%edi
 	rep
 	movsl
 	popl	%edi
 	popl	%esi
 	cld
 	popl	%ebp
 	ret
 END(bcopy)
 
 /*
  * Note: memcpy does not support overlapping copies
  */
 ENTRY(memcpy)
 	pushl	%edi
 	pushl	%esi
 	movl	12(%esp),%edi
 	movl	16(%esp),%esi
 	movl	20(%esp),%ecx
 	movl	%edi,%eax
 	shrl	$2,%ecx				/* copy by 32-bit words */
 	cld					/* nope, copy forwards */
 	rep
 	movsl
 	movl	20(%esp),%ecx
 	andl	$3,%ecx				/* any bytes left? */
 	rep
 	movsb
 	popl	%esi
 	popl	%edi
 	ret
 END(memcpy)
 
 /*****************************************************************************/
 /* copyout and fubyte family                                                 */
 /*****************************************************************************/
 /*
  * Access user memory from inside the kernel. These routines and possibly
  * the math- and DOS emulators should be the only places that do this.
  *
  * We have to access the memory with user's permissions, so use a segment
  * selector with RPL 3. For writes to user space we have to additionally
  * check the PTE for write permission, because the 386 does not check
  * write permissions when we are executing with EPL 0. The 486 does check
  * this if the WP bit is set in CR0, so we can use a simpler version here.
  *
  * These routines set curpcb->pcb_onfault for the time they execute. When a
  * protection violation occurs inside the functions, the trap handler
  * returns to *curpcb->pcb_onfault instead of the function.
  */
 
 /*
  * copyout(from_kernel, to_user, len)  - MP SAFE
  */
 ENTRY(copyout)
 	movl	PCPU(CURPCB),%eax
 	movl	$copyout_fault,PCB_ONFAULT(%eax)
 	pushl	%esi
 	pushl	%edi
 	pushl	%ebx
 	movl	16(%esp),%esi
 	movl	20(%esp),%edi
 	movl	24(%esp),%ebx
 	testl	%ebx,%ebx			/* anything to do? */
 	jz	done_copyout
 
 	/*
 	 * Check explicitly for non-user addresses.  If 486 write protection
 	 * is being used, this check is essential because we are in kernel
 	 * mode so the h/w does not provide any protection against writing
 	 * kernel addresses.
 	 */
 
 	/*
 	 * First, prevent address wrapping.
 	 */
 	movl	%edi,%eax
 	addl	%ebx,%eax
 	jc	copyout_fault
 /*
  * XXX STOP USING VM_MAXUSER_ADDRESS.
  * It is an end address, not a max, so every time it is used correctly it
  * looks like there is an off by one error, and of course it caused an off
  * by one error in several places.
  */
 	cmpl	$VM_MAXUSER_ADDRESS,%eax
 	ja	copyout_fault
 
 	/* bcopy(%esi, %edi, %ebx) */
 	movl	%ebx,%ecx
 
 	shrl	$2,%ecx
 	cld
 	rep
 	movsl
 	movb	%bl,%cl
 	andb	$3,%cl
 	rep
 	movsb
 
 done_copyout:
 	popl	%ebx
 	popl	%edi
 	popl	%esi
 	xorl	%eax,%eax
 	movl	PCPU(CURPCB),%edx
 	movl	%eax,PCB_ONFAULT(%edx)
 	ret
 END(copyout)
 
 	ALIGN_TEXT
 copyout_fault:
 	popl	%ebx
 	popl	%edi
 	popl	%esi
 	movl	PCPU(CURPCB),%edx
 	movl	$0,PCB_ONFAULT(%edx)
 	movl	$EFAULT,%eax
 	ret
 
 /*
  * copyin(from_user, to_kernel, len) - MP SAFE
  */
 ENTRY(copyin)
 	movl	PCPU(CURPCB),%eax
 	movl	$copyin_fault,PCB_ONFAULT(%eax)
 	pushl	%esi
 	pushl	%edi
 	movl	12(%esp),%esi			/* caddr_t from */
 	movl	16(%esp),%edi			/* caddr_t to */
 	movl	20(%esp),%ecx			/* size_t  len */
 
 	/*
 	 * make sure address is valid
 	 */
 	movl	%esi,%edx
 	addl	%ecx,%edx
 	jc	copyin_fault
 	cmpl	$VM_MAXUSER_ADDRESS,%edx
 	ja	copyin_fault
 
 	movb	%cl,%al
 	shrl	$2,%ecx				/* copy longword-wise */
 	cld
 	rep
 	movsl
 	movb	%al,%cl
 	andb	$3,%cl				/* copy remaining bytes */
 	rep
 	movsb
 
 	popl	%edi
 	popl	%esi
 	xorl	%eax,%eax
 	movl	PCPU(CURPCB),%edx
 	movl	%eax,PCB_ONFAULT(%edx)
 	ret
 END(copyin)
 
 	ALIGN_TEXT
 copyin_fault:
 	popl	%edi
 	popl	%esi
 	movl	PCPU(CURPCB),%edx
 	movl	$0,PCB_ONFAULT(%edx)
 	movl	$EFAULT,%eax
 	ret
 
 /*
  * casueword.  Compare and set user word.  Returns -1 on fault,
  * 0 on non-faulting access.  The current value is in *oldp.
  */
 ALTENTRY(casueword32)
 ENTRY(casueword)
 	movl	PCPU(CURPCB),%ecx
 	movl	$fusufault,PCB_ONFAULT(%ecx)
 	movl	4(%esp),%edx			/* dst */
 	movl	8(%esp),%eax			/* old */
 	movl	16(%esp),%ecx			/* new */
 
 	cmpl	$VM_MAXUSER_ADDRESS-4,%edx	/* verify address is valid */
 	ja	fusufault
 
 #ifdef SMP
 	lock
 #endif
 	cmpxchgl %ecx,(%edx)			/* Compare and set. */
 
 	/*
 	 * The old value is in %eax.  If the store succeeded it will be the
 	 * value we expected (old) from before the store, otherwise it will
 	 * be the current value.
 	 */
 
 	movl	PCPU(CURPCB),%ecx
 	movl	$0,PCB_ONFAULT(%ecx)
 	movl	12(%esp),%edx			/* oldp */
 	movl	%eax,(%edx)
 	xorl	%eax,%eax
 	ret
 END(casueword32)
 END(casueword)
 
 /*
  * Fetch (load) a 32-bit word, a 16-bit word, or an 8-bit byte from user
  * memory.
  */
 
 ALTENTRY(fueword32)
 ENTRY(fueword)
 	movl	PCPU(CURPCB),%ecx
 	movl	$fusufault,PCB_ONFAULT(%ecx)
 	movl	4(%esp),%edx			/* from */
 
 	cmpl	$VM_MAXUSER_ADDRESS-4,%edx	/* verify address is valid */
 	ja	fusufault
 
 	movl	(%edx),%eax
 	movl	$0,PCB_ONFAULT(%ecx)
 	movl	8(%esp),%edx
 	movl	%eax,(%edx)
 	xorl	%eax,%eax
 	ret
 END(fueword32)
 END(fueword)
 
 /*
  * fuswintr() and suswintr() are specialized variants of fuword16() and
  * suword16(), respectively.  They are called from the profiling code,
  * potentially at interrupt time.  If they fail, that's okay; good things
  * will happen later.  They always fail for now, until the trap code is
  * able to deal with this.
  */
 ALTENTRY(suswintr)
 ENTRY(fuswintr)
 	movl	$-1,%eax
 	ret
 END(suswintr)
 END(fuswintr)
 
 ENTRY(fuword16)
 	movl	PCPU(CURPCB),%ecx
 	movl	$fusufault,PCB_ONFAULT(%ecx)
 	movl	4(%esp),%edx
 
 	cmpl	$VM_MAXUSER_ADDRESS-2,%edx
 	ja	fusufault
 
 	movzwl	(%edx),%eax
 	movl	$0,PCB_ONFAULT(%ecx)
 	ret
 END(fuword16)
 
 ENTRY(fubyte)
 	movl	PCPU(CURPCB),%ecx
 	movl	$fusufault,PCB_ONFAULT(%ecx)
 	movl	4(%esp),%edx
 
 	cmpl	$VM_MAXUSER_ADDRESS-1,%edx
 	ja	fusufault
 
 	movzbl	(%edx),%eax
 	movl	$0,PCB_ONFAULT(%ecx)
 	ret
 END(fubyte)
 
 	ALIGN_TEXT
 fusufault:
 	movl	PCPU(CURPCB),%ecx
 	xorl	%eax,%eax
 	movl	%eax,PCB_ONFAULT(%ecx)
 	decl	%eax
 	ret
 
 /*
  * Store a 32-bit word, a 16-bit word, or an 8-bit byte to user memory.
  * All these functions are MPSAFE.
  */
 
 ALTENTRY(suword32)
 ENTRY(suword)
 	movl	PCPU(CURPCB),%ecx
 	movl	$fusufault,PCB_ONFAULT(%ecx)
 	movl	4(%esp),%edx
 
 	cmpl	$VM_MAXUSER_ADDRESS-4,%edx	/* verify address validity */
 	ja	fusufault
 
 	movl	8(%esp),%eax
 	movl	%eax,(%edx)
 	xorl	%eax,%eax
 	movl	PCPU(CURPCB),%ecx
 	movl	%eax,PCB_ONFAULT(%ecx)
 	ret
 END(suword32)
 END(suword)
 
 ENTRY(suword16)
 	movl	PCPU(CURPCB),%ecx
 	movl	$fusufault,PCB_ONFAULT(%ecx)
 	movl	4(%esp),%edx
 
 	cmpl	$VM_MAXUSER_ADDRESS-2,%edx	/* verify address validity */
 	ja	fusufault
 
 	movw	8(%esp),%ax
 	movw	%ax,(%edx)
 	xorl	%eax,%eax
 	movl	PCPU(CURPCB),%ecx		/* restore trashed register */
 	movl	%eax,PCB_ONFAULT(%ecx)
 	ret
 END(suword16)
 
 ENTRY(subyte)
 	movl	PCPU(CURPCB),%ecx
 	movl	$fusufault,PCB_ONFAULT(%ecx)
 	movl	4(%esp),%edx
 
 	cmpl	$VM_MAXUSER_ADDRESS-1,%edx	/* verify address validity */
 	ja	fusufault
 
 	movb	8(%esp),%al
 	movb	%al,(%edx)
 	xorl	%eax,%eax
 	movl	PCPU(CURPCB),%ecx		/* restore trashed register */
 	movl	%eax,PCB_ONFAULT(%ecx)
 	ret
 END(subyte)
 
 /*
  * copyinstr(from, to, maxlen, int *lencopied) - MP SAFE
  *
  *	copy a string from from to to, stop when a 0 character is reached.
  *	return ENAMETOOLONG if string is longer than maxlen, and
  *	EFAULT on protection violations. If lencopied is non-zero,
  *	return the actual length in *lencopied.
  */
 ENTRY(copyinstr)
 	pushl	%esi
 	pushl	%edi
 	movl	PCPU(CURPCB),%ecx
 	movl	$cpystrflt,PCB_ONFAULT(%ecx)
 
 	movl	12(%esp),%esi			/* %esi = from */
 	movl	16(%esp),%edi			/* %edi = to */
 	movl	20(%esp),%edx			/* %edx = maxlen */
 
 	movl	$VM_MAXUSER_ADDRESS,%eax
 
 	/* make sure 'from' is within bounds */
 	subl	%esi,%eax
 	jbe	cpystrflt
 
 	/* restrict maxlen to <= VM_MAXUSER_ADDRESS-from */
 	cmpl	%edx,%eax
 	jae	1f
 	movl	%eax,%edx
 	movl	%eax,20(%esp)
 1:
 	incl	%edx
 	cld
 
 2:
 	decl	%edx
 	jz	3f
 
 	lodsb
 	stosb
 	orb	%al,%al
 	jnz	2b
 
 	/* Success -- 0 byte reached */
 	decl	%edx
 	xorl	%eax,%eax
 	jmp	cpystrflt_x
 3:
 	/* edx is zero - return ENAMETOOLONG or EFAULT */
 	cmpl	$VM_MAXUSER_ADDRESS,%esi
 	jae	cpystrflt
 4:
 	movl	$ENAMETOOLONG,%eax
 	jmp	cpystrflt_x
 
 cpystrflt:
 	movl	$EFAULT,%eax
 
 cpystrflt_x:
 	/* set *lencopied and return %eax */
 	movl	PCPU(CURPCB),%ecx
 	movl	$0,PCB_ONFAULT(%ecx)
 	movl	20(%esp),%ecx
 	subl	%edx,%ecx
 	movl	24(%esp),%edx
 	testl	%edx,%edx
 	jz	1f
 	movl	%ecx,(%edx)
 1:
 	popl	%edi
 	popl	%esi
 	ret
 END(copyinstr)
 
 /*
  * copystr(from, to, maxlen, int *lencopied) - MP SAFE
  */
 ENTRY(copystr)
 	pushl	%esi
 	pushl	%edi
 
 	movl	12(%esp),%esi			/* %esi = from */
 	movl	16(%esp),%edi			/* %edi = to */
 	movl	20(%esp),%edx			/* %edx = maxlen */
 	incl	%edx
 	cld
 1:
 	decl	%edx
 	jz	4f
 	lodsb
 	stosb
 	orb	%al,%al
 	jnz	1b
 
 	/* Success -- 0 byte reached */
 	decl	%edx
 	xorl	%eax,%eax
 	jmp	6f
 4:
 	/* edx is zero -- return ENAMETOOLONG */
 	movl	$ENAMETOOLONG,%eax
 
 6:
 	/* set *lencopied and return %eax */
 	movl	20(%esp),%ecx
 	subl	%edx,%ecx
 	movl	24(%esp),%edx
 	testl	%edx,%edx
 	jz	7f
 	movl	%ecx,(%edx)
 7:
 	popl	%edi
 	popl	%esi
 	ret
 END(copystr)
 
 ENTRY(bcmp)
 	pushl	%edi
 	pushl	%esi
 	movl	12(%esp),%edi
 	movl	16(%esp),%esi
 	movl	20(%esp),%edx
 
 	movl	%edx,%ecx
 	shrl	$2,%ecx
 	cld					/* compare forwards */
 	repe
 	cmpsl
 	jne	1f
 
 	movl	%edx,%ecx
 	andl	$3,%ecx
 	repe
 	cmpsb
 1:
 	setne	%al
 	movsbl	%al,%eax
 	popl	%esi
 	popl	%edi
 	ret
 END(bcmp)
 
 /*
  * Handling of special 386 registers and descriptor tables etc
  */
 /* void lgdt(struct region_descriptor *rdp); */
 ENTRY(lgdt)
-#ifndef XEN
 	/* reload the descriptor table */
 	movl	4(%esp),%eax
 	lgdt	(%eax)
-#endif
 
 	/* flush the prefetch q */
 	jmp	1f
 	nop
 1:
 	/* reload "stale" selectors */
 	movl	$KDSEL,%eax
 	movl	%eax,%ds
 	movl	%eax,%es
 	movl	%eax,%gs
 	movl	%eax,%ss
 	movl	$KPSEL,%eax
 	movl	%eax,%fs
 
 	/* reload code selector by turning return into intersegmental return */
 	movl	(%esp),%eax
 	pushl	%eax
 	movl	$KCSEL,4(%esp)
 	MEXITCOUNT
 	lret
 END(lgdt)
 
 /* ssdtosd(*ssdp,*sdp) */
 ENTRY(ssdtosd)
 	pushl	%ebx
 	movl	8(%esp),%ecx
 	movl	8(%ecx),%ebx
 	shll	$16,%ebx
 	movl	(%ecx),%edx
 	roll	$16,%edx
 	movb	%dh,%bl
 	movb	%dl,%bh
 	rorl	$8,%ebx
 	movl	4(%ecx),%eax
 	movw	%ax,%dx
 	andl	$0xf0000,%eax
 	orl	%eax,%ebx
 	movl	12(%esp),%ecx
 	movl	%edx,(%ecx)
 	movl	%ebx,4(%ecx)
 	popl	%ebx
 	ret
 END(ssdtosd)
 
 /* void reset_dbregs() */
 ENTRY(reset_dbregs)
 	movl	$0,%eax
 	movl	%eax,%dr7	/* disable all breakpoints first */
 	movl	%eax,%dr0
 	movl	%eax,%dr1
 	movl	%eax,%dr2
 	movl	%eax,%dr3
 	movl	%eax,%dr6
 	ret
 END(reset_dbregs)
 
 /*****************************************************************************/
 /* setjump, longjump                                                         */
 /*****************************************************************************/
 
 ENTRY(setjmp)
 	movl	4(%esp),%eax
 	movl	%ebx,(%eax)			/* save ebx */
 	movl	%esp,4(%eax)			/* save esp */
 	movl	%ebp,8(%eax)			/* save ebp */
 	movl	%esi,12(%eax)			/* save esi */
 	movl	%edi,16(%eax)			/* save edi */
 	movl	(%esp),%edx			/* get rta */
 	movl	%edx,20(%eax)			/* save eip */
 	xorl	%eax,%eax			/* return(0); */
 	ret
 END(setjmp)
 
 ENTRY(longjmp)
 	movl	4(%esp),%eax
 	movl	(%eax),%ebx			/* restore ebx */
 	movl	4(%eax),%esp			/* restore esp */
 	movl	8(%eax),%ebp			/* restore ebp */
 	movl	12(%eax),%esi			/* restore esi */
 	movl	16(%eax),%edi			/* restore edi */
 	movl	20(%eax),%edx			/* get rta */
 	movl	%edx,(%esp)			/* put in return frame */
 	xorl	%eax,%eax			/* return(1); */
 	incl	%eax
 	ret
 END(longjmp)
 
 /*
  * Support for reading MSRs in the safe manner.
  */
 ENTRY(rdmsr_safe)
 /* int rdmsr_safe(u_int msr, uint64_t *data) */
 	movl	PCPU(CURPCB),%ecx
 	movl	$msr_onfault,PCB_ONFAULT(%ecx)
 
 	movl	4(%esp),%ecx
 	rdmsr
 	movl	8(%esp),%ecx
 	movl	%eax,(%ecx)
 	movl	%edx,4(%ecx)
 	xorl	%eax,%eax
 
 	movl	PCPU(CURPCB),%ecx
 	movl	%eax,PCB_ONFAULT(%ecx)
 
 	ret
 
 /*
  * Support for writing MSRs in the safe manner.
  */
 ENTRY(wrmsr_safe)
 /* int wrmsr_safe(u_int msr, uint64_t data) */
 	movl	PCPU(CURPCB),%ecx
 	movl	$msr_onfault,PCB_ONFAULT(%ecx)
 
 	movl	4(%esp),%ecx
 	movl	8(%esp),%eax
 	movl	12(%esp),%edx
 	wrmsr
 	xorl	%eax,%eax
 
 	movl	PCPU(CURPCB),%ecx
 	movl	%eax,PCB_ONFAULT(%ecx)
 
 	ret
 
 /*
  * MSR operations fault handler
  */
 	ALIGN_TEXT
 msr_onfault:
 	movl	PCPU(CURPCB),%ecx
 	movl	$0,PCB_ONFAULT(%ecx)
 	movl	$EFAULT,%eax
 	ret
Index: head/sys/i386/i386/swtch.s
===================================================================
--- head/sys/i386/i386/swtch.s	(revision 282273)
+++ head/sys/i386/i386/swtch.s	(revision 282274)
@@ -1,489 +1,477 @@
 /*-
  * Copyright (c) 1990 The Regents of the University of California.
  * All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * William Jolitz.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #include "opt_npx.h"
 #include "opt_sched.h"
 
 #include <machine/asmacros.h>
 
 #include "assym.s"
 
 #if defined(SMP) && defined(SCHED_ULE)
 #define	SETOP		xchgl
 #define	BLOCK_SPIN(reg)							\
 		movl		$blocked_lock,%eax ;			\
 	100: ;								\
 		lock ;							\
 		cmpxchgl	%eax,TD_LOCK(reg) ;			\
 		jne		101f ;					\
 		pause ;							\
 		jmp		100b ;					\
 	101:
 #else
 #define	SETOP		movl
 #define	BLOCK_SPIN(reg)
 #endif
 
 /*****************************************************************************/
 /* Scheduling                                                                */
 /*****************************************************************************/
 
 	.text
 
 /*
  * cpu_throw()
  *
  * This is the second half of cpu_switch(). It is used when the current
  * thread is either a dummy or slated to die, and we no longer care
  * about its state.  This is only a slight optimization and is probably
  * not worth it anymore.  Note that we need to clear the pm_active bits so
  * we do need the old proc if it still exists.
  * 0(%esp) = ret
  * 4(%esp) = oldtd
  * 8(%esp) = newtd
  */
 ENTRY(cpu_throw)
 	movl	PCPU(CPUID), %esi
 	movl	4(%esp),%ecx			/* Old thread */
 	testl	%ecx,%ecx			/* no thread? */
 	jz	1f
 	/* release bit from old pm_active */
 	movl	PCPU(CURPMAP), %ebx
 #ifdef SMP
 	lock
 #endif
 	btrl	%esi, PM_ACTIVE(%ebx)		/* clear old */
 1:
 	movl	8(%esp),%ecx			/* New thread */
 	movl	TD_PCB(%ecx),%edx
 	movl	PCB_CR3(%edx),%eax
-	LOAD_CR3(%eax)
+	movl	%eax,%cr3
 	/* set bit in new pm_active */
 	movl	TD_PROC(%ecx),%eax
 	movl	P_VMSPACE(%eax), %ebx
 	addl	$VM_PMAP, %ebx
 	movl	%ebx, PCPU(CURPMAP)
 #ifdef SMP
 	lock
 #endif
 	btsl	%esi, PM_ACTIVE(%ebx)		/* set new */
 	jmp	sw1
 END(cpu_throw)
 
 /*
  * cpu_switch(old, new)
  *
  * Save the current thread state, then select the next thread to run
  * and load its state.
  * 0(%esp) = ret
  * 4(%esp) = oldtd
  * 8(%esp) = newtd
  * 12(%esp) = newlock
  */
 ENTRY(cpu_switch)
 
 	/* Switch to new thread.  First, save context. */
 	movl	4(%esp),%ecx
 
 #ifdef INVARIANTS
 	testl	%ecx,%ecx			/* no thread? */
 	jz	badsw2				/* no, panic */
 #endif
 
 	movl	TD_PCB(%ecx),%edx
 
 	movl	(%esp),%eax			/* Hardware registers */
 	movl	%eax,PCB_EIP(%edx)
 	movl	%ebx,PCB_EBX(%edx)
 	movl	%esp,PCB_ESP(%edx)
 	movl	%ebp,PCB_EBP(%edx)
 	movl	%esi,PCB_ESI(%edx)
 	movl	%edi,PCB_EDI(%edx)
 	mov	%gs,PCB_GS(%edx)
 	pushfl					/* PSL */
 	popl	PCB_PSL(%edx)
 	/* Test if debug registers should be saved. */
 	testl	$PCB_DBREGS,PCB_FLAGS(%edx)
 	jz      1f                              /* no, skip over */
 	movl    %dr7,%eax                       /* yes, do the save */
 	movl    %eax,PCB_DR7(%edx)
 	andl    $0x0000fc00, %eax               /* disable all watchpoints */
 	movl    %eax,%dr7
 	movl    %dr6,%eax
 	movl    %eax,PCB_DR6(%edx)
 	movl    %dr3,%eax
 	movl    %eax,PCB_DR3(%edx)
 	movl    %dr2,%eax
 	movl    %eax,PCB_DR2(%edx)
 	movl    %dr1,%eax
 	movl    %eax,PCB_DR1(%edx)
 	movl    %dr0,%eax
 	movl    %eax,PCB_DR0(%edx)
 1:
 
 #ifdef DEV_NPX
 	/* have we used fp, and need a save? */
 	cmpl	%ecx,PCPU(FPCURTHREAD)
 	jne	1f
 	pushl	PCB_SAVEFPU(%edx)		/* h/w bugs make saving complicated */
 	call	npxsave				/* do it in a big C function */
 	popl	%eax
 1:
 #endif
 
 	/* Save is done.  Now fire up new thread. Leave old vmspace. */
 	movl	4(%esp),%edi
 	movl	8(%esp),%ecx			/* New thread */
 	movl	12(%esp),%esi			/* New lock */
 #ifdef INVARIANTS
 	testl	%ecx,%ecx			/* no thread? */
 	jz	badsw3				/* no, panic */
 #endif
 	movl	TD_PCB(%ecx),%edx
 
 	/* switch address space */
 	movl	PCB_CR3(%edx),%eax
-	READ_CR3(%ebx)				/* The same address space? */
+	movl	%cr3,%ebx			/* The same address space? */
 	cmpl	%ebx,%eax
 	je	sw0
-	LOAD_CR3(%eax)				/* new address space */
+	movl	%eax,%cr3			/* new address space */
 	movl	%esi,%eax
 	movl	PCPU(CPUID),%esi
 	SETOP	%eax,TD_LOCK(%edi)		/* Switchout td_lock */
 
 	/* Release bit from old pmap->pm_active */
 	movl	PCPU(CURPMAP), %ebx
 #ifdef SMP
 	lock
 #endif
 	btrl	%esi, PM_ACTIVE(%ebx)		/* clear old */
 
 	/* Set bit in new pmap->pm_active */
 	movl	TD_PROC(%ecx),%eax		/* newproc */
 	movl	P_VMSPACE(%eax), %ebx
 	addl	$VM_PMAP, %ebx
 	movl	%ebx, PCPU(CURPMAP)
 #ifdef SMP
 	lock
 #endif
 	btsl	%esi, PM_ACTIVE(%ebx)		/* set new */
 	jmp	sw1
 
 sw0:
 	SETOP	%esi,TD_LOCK(%edi)		/* Switchout td_lock */
 sw1:
 	BLOCK_SPIN(%ecx)
-#ifdef XEN
-	pushl	%eax
-	pushl	%ecx
-	pushl	%edx
-	call	xen_handle_thread_switch
-	popl	%edx
-	popl	%ecx
-	popl	%eax
 	/*
-	 * XXX set IOPL
-	 */
-#else		
-	/*
 	 * At this point, we've switched address spaces and are ready
 	 * to load up the rest of the next context.
 	 */
 	cmpl	$0, PCB_EXT(%edx)		/* has pcb extension? */
 	je	1f				/* If not, use the default */
 	movl	$1, PCPU(PRIVATE_TSS) 		/* mark use of private tss */
 	movl	PCB_EXT(%edx), %edi		/* new tss descriptor */
 	jmp	2f				/* Load it up */
 
 1:	/*
 	 * Use the common default TSS instead of our own.
 	 * Set our stack pointer into the TSS, it's set to just
 	 * below the PCB.  In C, common_tss.tss_esp0 = &pcb - 16;
 	 */
 	leal	-16(%edx), %ebx			/* leave space for vm86 */
 	movl	%ebx, PCPU(COMMON_TSS) + TSS_ESP0
 
 	/*
 	 * Test this CPU's  bit in the bitmap to see if this
 	 * CPU was using a private TSS.
 	 */
 	cmpl	$0, PCPU(PRIVATE_TSS)		/* Already using the common? */
 	je	3f				/* if so, skip reloading */
 	movl	$0, PCPU(PRIVATE_TSS)
 	PCPU_ADDR(COMMON_TSSD, %edi)
 2:
 	/* Move correct tss descriptor into GDT slot, then reload tr. */
 	movl	PCPU(TSS_GDT), %ebx		/* entry in GDT */
 	movl	0(%edi), %eax
 	movl	4(%edi), %esi
 	movl	%eax, 0(%ebx)
 	movl	%esi, 4(%ebx)
 	movl	$GPROC0_SEL*8, %esi		/* GSEL(GPROC0_SEL, SEL_KPL) */
 	ltr	%si
 3:
 
 	/* Copy the %fs and %gs selectors into this pcpu gdt */
 	leal	PCB_FSD(%edx), %esi
 	movl	PCPU(FSGS_GDT), %edi
 	movl	0(%esi), %eax		/* %fs selector */
 	movl	4(%esi), %ebx
 	movl	%eax, 0(%edi)
 	movl	%ebx, 4(%edi)
 	movl	8(%esi), %eax		/* %gs selector, comes straight after */
 	movl	12(%esi), %ebx
 	movl	%eax, 8(%edi)
 	movl	%ebx, 12(%edi)
-#endif
+
 	/* Restore context. */
 	movl	PCB_EBX(%edx),%ebx
 	movl	PCB_ESP(%edx),%esp
 	movl	PCB_EBP(%edx),%ebp
 	movl	PCB_ESI(%edx),%esi
 	movl	PCB_EDI(%edx),%edi
 	movl	PCB_EIP(%edx),%eax
 	movl	%eax,(%esp)
 	pushl	PCB_PSL(%edx)
 	popfl
 
 	movl	%edx, PCPU(CURPCB)
 	movl	TD_TID(%ecx),%eax
 	movl	%ecx, PCPU(CURTHREAD)		/* into next thread */
 
 	/*
 	 * Determine the LDT to use and load it if is the default one and
 	 * that is not the current one.
 	 */
 	movl	TD_PROC(%ecx),%eax
 	cmpl    $0,P_MD+MD_LDT(%eax)
 	jnz	1f
 	movl	_default_ldt,%eax
 	cmpl	PCPU(CURRENTLDT),%eax
 	je	2f
-	LLDT(_default_ldt)
+	lldt	_default_ldt
 	movl	%eax,PCPU(CURRENTLDT)
 	jmp	2f
 1:
 	/* Load the LDT when it is not the default one. */
 	pushl	%edx				/* Preserve pointer to pcb. */
 	addl	$P_MD,%eax			/* Pointer to mdproc is arg. */
 	pushl	%eax
 	call	set_user_ldt
 	addl	$4,%esp
 	popl	%edx
 2:
 
 	/* This must be done after loading the user LDT. */
 	.globl	cpu_switch_load_gs
 cpu_switch_load_gs:
 	mov	PCB_GS(%edx),%gs
 
 	/* Test if debug registers should be restored. */
 	testl	$PCB_DBREGS,PCB_FLAGS(%edx)
 	jz      1f
 
 	/*
 	 * Restore debug registers.  The special code for dr7 is to
 	 * preserve the current values of its reserved bits.
 	 */
 	movl    PCB_DR6(%edx),%eax
 	movl    %eax,%dr6
 	movl    PCB_DR3(%edx),%eax
 	movl    %eax,%dr3
 	movl    PCB_DR2(%edx),%eax
 	movl    %eax,%dr2
 	movl    PCB_DR1(%edx),%eax
 	movl    %eax,%dr1
 	movl    PCB_DR0(%edx),%eax
 	movl    %eax,%dr0
 	movl	%dr7,%eax
 	andl    $0x0000fc00,%eax
 	movl    PCB_DR7(%edx),%ecx
 	andl	$~0x0000fc00,%ecx
 	orl     %ecx,%eax
 	movl    %eax,%dr7
 1:
 	ret
 
 #ifdef INVARIANTS
 badsw1:
 	pushal
 	pushl	$sw0_1
 	call	panic
 sw0_1:	.asciz	"cpu_throw: no newthread supplied"
 
 badsw2:
 	pushal
 	pushl	$sw0_2
 	call	panic
 sw0_2:	.asciz	"cpu_switch: no curthread supplied"
 
 badsw3:
 	pushal
 	pushl	$sw0_3
 	call	panic
 sw0_3:	.asciz	"cpu_switch: no newthread supplied"
 #endif
 END(cpu_switch)
 
 /*
  * savectx(pcb)
  * Update pcb, saving current processor state.
  */
 ENTRY(savectx)
 	/* Fetch PCB. */
 	movl	4(%esp),%ecx
 
 	/* Save caller's return address.  Child won't execute this routine. */
 	movl	(%esp),%eax
 	movl	%eax,PCB_EIP(%ecx)
 
 	movl	%cr3,%eax
 	movl	%eax,PCB_CR3(%ecx)
 
 	movl	%ebx,PCB_EBX(%ecx)
 	movl	%esp,PCB_ESP(%ecx)
 	movl	%ebp,PCB_EBP(%ecx)
 	movl	%esi,PCB_ESI(%ecx)
 	movl	%edi,PCB_EDI(%ecx)
 	mov	%gs,PCB_GS(%ecx)
 	pushfl
 	popl	PCB_PSL(%ecx)
 
 	movl	%cr0,%eax
 	movl	%eax,PCB_CR0(%ecx)
 	movl	%cr2,%eax
 	movl	%eax,PCB_CR2(%ecx)
 	movl	%cr4,%eax
 	movl	%eax,PCB_CR4(%ecx)
 
 	movl	%dr0,%eax
 	movl	%eax,PCB_DR0(%ecx)
 	movl	%dr1,%eax
 	movl	%eax,PCB_DR1(%ecx)
 	movl	%dr2,%eax
 	movl	%eax,PCB_DR2(%ecx)
 	movl	%dr3,%eax
 	movl	%eax,PCB_DR3(%ecx)
 	movl	%dr6,%eax
 	movl	%eax,PCB_DR6(%ecx)
 	movl	%dr7,%eax
 	movl	%eax,PCB_DR7(%ecx)
 
 	mov	%ds,PCB_DS(%ecx)
 	mov	%es,PCB_ES(%ecx)
 	mov	%fs,PCB_FS(%ecx)
 	mov	%ss,PCB_SS(%ecx)
 	
 	sgdt	PCB_GDT(%ecx)
 	sidt	PCB_IDT(%ecx)
 	sldt	PCB_LDT(%ecx)
 	str	PCB_TR(%ecx)
 
 	movl	$1,%eax
 	ret
 END(savectx)
 
 /*
  * resumectx(pcb) __fastcall
  * Resuming processor state from pcb.
  */
 ENTRY(resumectx)
 	/* Restore GDT. */
 	lgdt	PCB_GDT(%ecx)
 
 	/* Restore segment registers */
 	movzwl	PCB_DS(%ecx),%eax
 	mov	%ax,%ds
 	movzwl	PCB_ES(%ecx),%eax
 	mov	%ax,%es
 	movzwl	PCB_FS(%ecx),%eax
 	mov	%ax,%fs
 	movzwl	PCB_GS(%ecx),%eax
 	movw	%ax,%gs
 	movzwl	PCB_SS(%ecx),%eax
 	mov	%ax,%ss
 
 	/* Restore CR2, CR4, CR3 and CR0 */
 	movl	PCB_CR2(%ecx),%eax
 	movl	%eax,%cr2
 	movl	PCB_CR4(%ecx),%eax
 	movl	%eax,%cr4
 	movl	PCB_CR3(%ecx),%eax
 	movl	%eax,%cr3
 	movl	PCB_CR0(%ecx),%eax
 	movl	%eax,%cr0
 	jmp	1f
 1:
 
 	/* Restore descriptor tables */
 	lidt	PCB_IDT(%ecx)
 	lldt	PCB_LDT(%ecx)
 
 #define SDT_SYS386TSS	9
 #define SDT_SYS386BSY	11
 	/* Clear "task busy" bit and reload TR */
 	movl	PCPU(TSS_GDT),%eax
 	andb	$(~SDT_SYS386BSY | SDT_SYS386TSS),5(%eax)
 	movzwl	PCB_TR(%ecx),%eax
 	ltr	%ax
 #undef SDT_SYS386TSS
 #undef SDT_SYS386BSY
 
 	/* Restore debug registers */
 	movl	PCB_DR0(%ecx),%eax
 	movl	%eax,%dr0
 	movl	PCB_DR1(%ecx),%eax
 	movl	%eax,%dr1
 	movl	PCB_DR2(%ecx),%eax
 	movl	%eax,%dr2
 	movl	PCB_DR3(%ecx),%eax
 	movl	%eax,%dr3
 	movl	PCB_DR6(%ecx),%eax
 	movl	%eax,%dr6
 	movl	PCB_DR7(%ecx),%eax
 	movl	%eax,%dr7
 
 	/* Restore other registers */
 	movl	PCB_EDI(%ecx),%edi
 	movl	PCB_ESI(%ecx),%esi
 	movl	PCB_EBP(%ecx),%ebp
 	movl	PCB_ESP(%ecx),%esp
 	movl	PCB_EBX(%ecx),%ebx
 
 	/* reload code selector by turning return into intersegmental return */
 	pushl	PCB_EIP(%ecx)
 	movl	$KCSEL,4(%esp)
 	xorl	%eax,%eax
 	lret
 END(resumectx)
Index: head/sys/i386/i386/sys_machdep.c
===================================================================
--- head/sys/i386/i386/sys_machdep.c	(revision 282273)
+++ head/sys/i386/i386/sys_machdep.c	(revision 282274)
@@ -1,896 +1,793 @@
 /*-
  * Copyright (c) 1990 The Regents of the University of California.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	from: @(#)sys_machdep.c	5.5 (Berkeley) 1/19/91
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_capsicum.h"
 #include "opt_kstack_pages.h"
 
 #include <sys/param.h>
 #include <sys/capsicum.h>
 #include <sys/systm.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mutex.h>
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/smp.h>
 #include <sys/sysproto.h>
 
 #include <vm/vm.h>
 #include <vm/pmap.h>
 #include <vm/vm_map.h>
 #include <vm/vm_extern.h>
 
 #include <machine/cpu.h>
 #include <machine/pcb.h>
 #include <machine/pcb_ext.h>
 #include <machine/proc.h>
 #include <machine/sysarch.h>
 
 #include <security/audit/audit.h>
 
-#ifdef XEN 
-#include <machine/xen/xenfunc.h>
-
-void i386_reset_ldt(struct proc_ldt *pldt); 
-
-void 
-i386_reset_ldt(struct proc_ldt *pldt) 
-{ 
-        xen_set_ldt((vm_offset_t)pldt->ldt_base, pldt->ldt_len); 
-} 
-#else  
-#define i386_reset_ldt(x) 
-#endif 
-
 #include <vm/vm_kern.h>		/* for kernel_map */
 
 #define MAX_LD 8192
 #define LD_PER_PAGE 512
 #define NEW_MAX_LD(num)  ((num + LD_PER_PAGE) & ~(LD_PER_PAGE-1))
 #define SIZE_FROM_LARGEST_LD(num) (NEW_MAX_LD(num) << 3)
 #define	NULL_LDT_BASE	((caddr_t)NULL)
 
 #ifdef SMP
 static void set_user_ldt_rv(struct vmspace *vmsp);
 #endif
 static int i386_set_ldt_data(struct thread *, int start, int num,
 	union descriptor *descs);
 static int i386_ldt_grow(struct thread *td, int len);
 
 #ifndef _SYS_SYSPROTO_H_
 struct sysarch_args {
 	int op;
 	char *parms;
 };
 #endif
 
 int
 sysarch(td, uap)
 	struct thread *td;
 	register struct sysarch_args *uap;
 {
 	int error;
 	union descriptor *lp;
 	union {
 		struct i386_ldt_args largs;
 		struct i386_ioperm_args iargs;
 		struct i386_get_xfpustate xfpu;
 	} kargs;
 	uint32_t base;
 	struct segment_descriptor sd, *sdp;
 
 	AUDIT_ARG_CMD(uap->op);
 
 #ifdef CAPABILITY_MODE
 	/*
 	 * When adding new operations, add a new case statement here to
 	 * explicitly indicate whether or not the operation is safe to
 	 * perform in capability mode.
 	 */
 	if (IN_CAPABILITY_MODE(td)) {
 		switch (uap->op) {
 		case I386_GET_LDT:
 		case I386_SET_LDT:
 		case I386_GET_IOPERM:
 		case I386_GET_FSBASE:
 		case I386_SET_FSBASE:
 		case I386_GET_GSBASE:
 		case I386_SET_GSBASE:
 		case I386_GET_XFPUSTATE:
 			break;
 
 		case I386_SET_IOPERM:
 		default:
 #ifdef KTRACE
 			if (KTRPOINT(td, KTR_CAPFAIL))
 				ktrcapfail(CAPFAIL_SYSCALL, NULL, NULL);
 #endif
 			return (ECAPMODE);
 		}
 	}
 #endif
 
 	switch (uap->op) {
 	case I386_GET_IOPERM:
 	case I386_SET_IOPERM:
 		if ((error = copyin(uap->parms, &kargs.iargs,
 		    sizeof(struct i386_ioperm_args))) != 0)
 			return (error);
 		break;
 	case I386_GET_LDT:
 	case I386_SET_LDT:
 		if ((error = copyin(uap->parms, &kargs.largs,
 		    sizeof(struct i386_ldt_args))) != 0)
 			return (error);
 		if (kargs.largs.num > MAX_LD || kargs.largs.num <= 0)
 			return (EINVAL);
 		break;
 	case I386_GET_XFPUSTATE:
 		if ((error = copyin(uap->parms, &kargs.xfpu,
 		    sizeof(struct i386_get_xfpustate))) != 0)
 			return (error);
 		break;
 	default:
 		break;
 	}
 
 	switch(uap->op) {
 	case I386_GET_LDT:
 		error = i386_get_ldt(td, &kargs.largs);
 		break;
 	case I386_SET_LDT:
 		if (kargs.largs.descs != NULL) {
 			lp = (union descriptor *)malloc(
 			    kargs.largs.num * sizeof(union descriptor),
 			    M_TEMP, M_WAITOK);
 			error = copyin(kargs.largs.descs, lp,
 			    kargs.largs.num * sizeof(union descriptor));
 			if (error == 0)
 				error = i386_set_ldt(td, &kargs.largs, lp);
 			free(lp, M_TEMP);
 		} else {
 			error = i386_set_ldt(td, &kargs.largs, NULL);
 		}
 		break;
 	case I386_GET_IOPERM:
 		error = i386_get_ioperm(td, &kargs.iargs);
 		if (error == 0)
 			error = copyout(&kargs.iargs, uap->parms,
 			    sizeof(struct i386_ioperm_args));
 		break;
 	case I386_SET_IOPERM:
 		error = i386_set_ioperm(td, &kargs.iargs);
 		break;
 	case I386_VM86:
 		error = vm86_sysarch(td, uap->parms);
 		break;
 	case I386_GET_FSBASE:
 		sdp = &td->td_pcb->pcb_fsd;
 		base = sdp->sd_hibase << 24 | sdp->sd_lobase;
 		error = copyout(&base, uap->parms, sizeof(base));
 		break;
 	case I386_SET_FSBASE:
 		error = copyin(uap->parms, &base, sizeof(base));
 		if (!error) {
 			/*
 			 * Construct a descriptor and store it in the pcb for
 			 * the next context switch.  Also store it in the gdt
 			 * so that the load of tf_fs into %fs will activate it
 			 * at return to userland.
 			 */
 			sd.sd_lobase = base & 0xffffff;
 			sd.sd_hibase = (base >> 24) & 0xff;
-#ifdef XEN
-			/* need to do nosegneg like Linux */
-			sd.sd_lolimit = (HYPERVISOR_VIRT_START >> 12) & 0xffff;
-#else			
 			sd.sd_lolimit = 0xffff;	/* 4GB limit, wraps around */
-#endif
 			sd.sd_hilimit = 0xf;
 			sd.sd_type  = SDT_MEMRWA;
 			sd.sd_dpl   = SEL_UPL;
 			sd.sd_p     = 1;
 			sd.sd_xx    = 0;
 			sd.sd_def32 = 1;
 			sd.sd_gran  = 1;
 			critical_enter();
 			td->td_pcb->pcb_fsd = sd;
-#ifdef XEN
-			HYPERVISOR_update_descriptor(vtomach(&PCPU_GET(fsgs_gdt)[0]),
-			    *(uint64_t *)&sd);
-#else
 			PCPU_GET(fsgs_gdt)[0] = sd;
-#endif
 			critical_exit();
 			td->td_frame->tf_fs = GSEL(GUFS_SEL, SEL_UPL);
 		}
 		break;
 	case I386_GET_GSBASE:
 		sdp = &td->td_pcb->pcb_gsd;
 		base = sdp->sd_hibase << 24 | sdp->sd_lobase;
 		error = copyout(&base, uap->parms, sizeof(base));
 		break;
 	case I386_SET_GSBASE:
 		error = copyin(uap->parms, &base, sizeof(base));
 		if (!error) {
 			/*
 			 * Construct a descriptor and store it in the pcb for
 			 * the next context switch.  Also store it in the gdt
 			 * because we have to do a load_gs() right now.
 			 */
 			sd.sd_lobase = base & 0xffffff;
 			sd.sd_hibase = (base >> 24) & 0xff;
 
-#ifdef XEN
-			/* need to do nosegneg like Linux */
-			sd.sd_lolimit = (HYPERVISOR_VIRT_START >> 12) & 0xffff;
-#else	
 			sd.sd_lolimit = 0xffff;	/* 4GB limit, wraps around */
-#endif
 			sd.sd_hilimit = 0xf;
 			sd.sd_type  = SDT_MEMRWA;
 			sd.sd_dpl   = SEL_UPL;
 			sd.sd_p     = 1;
 			sd.sd_xx    = 0;
 			sd.sd_def32 = 1;
 			sd.sd_gran  = 1;
 			critical_enter();
 			td->td_pcb->pcb_gsd = sd;
-#ifdef XEN
-			HYPERVISOR_update_descriptor(vtomach(&PCPU_GET(fsgs_gdt)[1]),
-			    *(uint64_t *)&sd);
-#else			
 			PCPU_GET(fsgs_gdt)[1] = sd;
-#endif
 			critical_exit();
 			load_gs(GSEL(GUGS_SEL, SEL_UPL));
 		}
 		break;
 	case I386_GET_XFPUSTATE:
 		if (kargs.xfpu.len > cpu_max_ext_state_size -
 		    sizeof(union savefpu))
 			return (EINVAL);
 		npxgetregs(td);
 		error = copyout((char *)(get_pcb_user_save_td(td) + 1),
 		    kargs.xfpu.addr, kargs.xfpu.len);
 		break;
 	default:
 		error = EINVAL;
 		break;
 	}
 	return (error);
 }
 
 int
 i386_extend_pcb(struct thread *td)
 {
 	int i, offset;
 	u_long *addr;
 	struct pcb_ext *ext;
 	struct soft_segment_descriptor ssd = {
 		0,			/* segment base address (overwritten) */
 		ctob(IOPAGES + 1) - 1,	/* length */
 		SDT_SYS386TSS,		/* segment type */
 		0,			/* priority level */
 		1,			/* descriptor present */
 		0, 0,
 		0,			/* default 32 size */
 		0			/* granularity */
 	};
 
 	ext = (struct pcb_ext *)kmem_malloc(kernel_arena, ctob(IOPAGES+1),
 	    M_WAITOK | M_ZERO);
 	/* -16 is so we can convert a trapframe into vm86trapframe inplace */
 	ext->ext_tss.tss_esp0 = td->td_kstack + ctob(KSTACK_PAGES) -
 	    sizeof(struct pcb) - 16;
 	ext->ext_tss.tss_ss0 = GSEL(GDATA_SEL, SEL_KPL);
 	/*
 	 * The last byte of the i/o map must be followed by an 0xff byte.
 	 * We arbitrarily allocate 16 bytes here, to keep the starting
 	 * address on a doubleword boundary.
 	 */
 	offset = PAGE_SIZE - 16;
 	ext->ext_tss.tss_ioopt = 
 	    (offset - ((unsigned)&ext->ext_tss - (unsigned)ext)) << 16;
 	ext->ext_iomap = (caddr_t)ext + offset;
 	ext->ext_vm86.vm86_intmap = (caddr_t)ext + offset - 32;
 
 	addr = (u_long *)ext->ext_vm86.vm86_intmap;
 	for (i = 0; i < (ctob(IOPAGES) + 32 + 16) / sizeof(u_long); i++)
 		*addr++ = ~0;
 
 	ssd.ssd_base = (unsigned)&ext->ext_tss;
 	ssd.ssd_limit -= ((unsigned)&ext->ext_tss - (unsigned)ext);
 	ssdtosd(&ssd, &ext->ext_tssd);
 
 	KASSERT(td == curthread, ("giving TSS to !curthread"));
 	KASSERT(td->td_pcb->pcb_ext == 0, ("already have a TSS!"));
 
 	/* Switch to the new TSS. */
 	critical_enter();
 	td->td_pcb->pcb_ext = ext;
 	PCPU_SET(private_tss, 1);
 	*PCPU_GET(tss_gdt) = ext->ext_tssd;
 	ltr(GSEL(GPROC0_SEL, SEL_KPL));
 	critical_exit();
 
 	return 0;
 }
 
 int
 i386_set_ioperm(td, uap)
 	struct thread *td;
 	struct i386_ioperm_args *uap;
 {
 	int i, error;
 	char *iomap;
 
 	if ((error = priv_check(td, PRIV_IO)) != 0)
 		return (error);
 	if ((error = securelevel_gt(td->td_ucred, 0)) != 0)
 		return (error);
 	/*
 	 * XXX 
 	 * While this is restricted to root, we should probably figure out
 	 * whether any other driver is using this i/o address, as so not to
 	 * cause confusion.  This probably requires a global 'usage registry'.
 	 */
 
 	if (td->td_pcb->pcb_ext == 0)
 		if ((error = i386_extend_pcb(td)) != 0)
 			return (error);
 	iomap = (char *)td->td_pcb->pcb_ext->ext_iomap;
 
 	if (uap->start + uap->length > IOPAGES * PAGE_SIZE * NBBY)
 		return (EINVAL);
 
 	for (i = uap->start; i < uap->start + uap->length; i++) {
 		if (uap->enable)
 			iomap[i >> 3] &= ~(1 << (i & 7));
 		else
 			iomap[i >> 3] |= (1 << (i & 7));
 	}
 	return (error);
 }
 
 int
 i386_get_ioperm(td, uap)
 	struct thread *td;
 	struct i386_ioperm_args *uap;
 {
 	int i, state;
 	char *iomap;
 
 	if (uap->start >= IOPAGES * PAGE_SIZE * NBBY)
 		return (EINVAL);
 
 	if (td->td_pcb->pcb_ext == 0) {
 		uap->length = 0;
 		goto done;
 	}
 
 	iomap = (char *)td->td_pcb->pcb_ext->ext_iomap;
 
 	i = uap->start;
 	state = (iomap[i >> 3] >> (i & 7)) & 1;
 	uap->enable = !state;
 	uap->length = 1;
 
 	for (i = uap->start + 1; i < IOPAGES * PAGE_SIZE * NBBY; i++) {
 		if (state != ((iomap[i >> 3] >> (i & 7)) & 1))
 			break;
 		uap->length++;
 	}
 
 done:
 	return (0);
 }
 
 /*
  * Update the GDT entry pointing to the LDT to point to the LDT of the
  * current process. Manage dt_lock holding/unholding autonomously.
  */   
 void
 set_user_ldt(struct mdproc *mdp)
 {
 	struct proc_ldt *pldt;
 	int dtlocked;
 
 	dtlocked = 0;
 	if (!mtx_owned(&dt_lock)) {
 		mtx_lock_spin(&dt_lock);
 		dtlocked = 1;
 	}
 
 	pldt = mdp->md_ldt;
-#ifdef XEN
-	i386_reset_ldt(pldt);
-	PCPU_SET(currentldt, (int)pldt);
-#else	
 #ifdef SMP
 	gdt[PCPU_GET(cpuid) * NGDT + GUSERLDT_SEL].sd = pldt->ldt_sd;
 #else
 	gdt[GUSERLDT_SEL].sd = pldt->ldt_sd;
 #endif
 	lldt(GSEL(GUSERLDT_SEL, SEL_KPL));
 	PCPU_SET(currentldt, GSEL(GUSERLDT_SEL, SEL_KPL));
-#endif /* XEN */ 
 	if (dtlocked)
 		mtx_unlock_spin(&dt_lock);
 }
 
 #ifdef SMP
 static void
 set_user_ldt_rv(struct vmspace *vmsp)
 {
 	struct thread *td;
 
 	td = curthread;
 	if (vmsp != td->td_proc->p_vmspace)
 		return;
 
 	set_user_ldt(&td->td_proc->p_md);
 }
 #endif
 
-#ifdef XEN
-
-/* 
- * dt_lock must be held. Returns with dt_lock held. 
- */ 
-struct proc_ldt * 
-user_ldt_alloc(struct mdproc *mdp, int len) 
-{ 
-        struct proc_ldt *pldt, *new_ldt; 
- 
-        mtx_assert(&dt_lock, MA_OWNED); 
-        mtx_unlock_spin(&dt_lock); 
-        new_ldt = malloc(sizeof(struct proc_ldt), 
-                M_SUBPROC, M_WAITOK); 
- 
-        new_ldt->ldt_len = len = NEW_MAX_LD(len); 
-        new_ldt->ldt_base = (caddr_t)kmem_malloc(kernel_arena, 
-	    round_page(len * sizeof(union descriptor)), M_WAITOK);
-        new_ldt->ldt_refcnt = 1; 
-        new_ldt->ldt_active = 0; 
- 
-	mtx_lock_spin(&dt_lock);
-        if ((pldt = mdp->md_ldt)) { 
-                if (len > pldt->ldt_len) 
-                        len = pldt->ldt_len; 
-                bcopy(pldt->ldt_base, new_ldt->ldt_base, 
-                    len * sizeof(union descriptor)); 
-        } else { 
-                bcopy(ldt, new_ldt->ldt_base, PAGE_SIZE); 
-        } 
-        mtx_unlock_spin(&dt_lock);  /* XXX kill once pmap locking fixed. */
-        pmap_map_readonly(kernel_pmap, (vm_offset_t)new_ldt->ldt_base, 
-                          new_ldt->ldt_len*sizeof(union descriptor)); 
-        mtx_lock_spin(&dt_lock);  /* XXX kill once pmap locking fixed. */
-        return (new_ldt);
-} 
-#else
 /*
  * dt_lock must be held. Returns with dt_lock held.
  */
 struct proc_ldt *
 user_ldt_alloc(struct mdproc *mdp, int len)
 {
 	struct proc_ldt *pldt, *new_ldt;
 
 	mtx_assert(&dt_lock, MA_OWNED);
 	mtx_unlock_spin(&dt_lock);
 	new_ldt = malloc(sizeof(struct proc_ldt),
 		M_SUBPROC, M_WAITOK);
 
 	new_ldt->ldt_len = len = NEW_MAX_LD(len);
 	new_ldt->ldt_base = (caddr_t)kmem_malloc(kernel_arena,
 	    len * sizeof(union descriptor), M_WAITOK);
 	new_ldt->ldt_refcnt = 1;
 	new_ldt->ldt_active = 0;
 
 	mtx_lock_spin(&dt_lock);
 	gdt_segs[GUSERLDT_SEL].ssd_base = (unsigned)new_ldt->ldt_base;
 	gdt_segs[GUSERLDT_SEL].ssd_limit = len * sizeof(union descriptor) - 1;
 	ssdtosd(&gdt_segs[GUSERLDT_SEL], &new_ldt->ldt_sd);
 
 	if ((pldt = mdp->md_ldt) != NULL) {
 		if (len > pldt->ldt_len)
 			len = pldt->ldt_len;
 		bcopy(pldt->ldt_base, new_ldt->ldt_base,
 		    len * sizeof(union descriptor));
 	} else
 		bcopy(ldt, new_ldt->ldt_base, sizeof(ldt));
 	
 	return (new_ldt);
 }
-#endif /* !XEN */
 
 /*
  * Must be called with dt_lock held.  Returns with dt_lock unheld.
  */
 void
 user_ldt_free(struct thread *td)
 {
 	struct mdproc *mdp = &td->td_proc->p_md;
 	struct proc_ldt *pldt;
 
 	mtx_assert(&dt_lock, MA_OWNED);
 	if ((pldt = mdp->md_ldt) == NULL) {
 		mtx_unlock_spin(&dt_lock);
 		return;
 	}
 
 	if (td == curthread) {
-#ifdef XEN
-		i386_reset_ldt(&default_proc_ldt);
-		PCPU_SET(currentldt, (int)&default_proc_ldt);
-#else
 		lldt(_default_ldt);
 		PCPU_SET(currentldt, _default_ldt);
-#endif
 	}
 
 	mdp->md_ldt = NULL;
 	user_ldt_deref(pldt);
 }
 
 void
 user_ldt_deref(struct proc_ldt *pldt)
 {
 
 	mtx_assert(&dt_lock, MA_OWNED);
 	if (--pldt->ldt_refcnt == 0) {
 		mtx_unlock_spin(&dt_lock);
 		kmem_free(kernel_arena, (vm_offset_t)pldt->ldt_base,
 			pldt->ldt_len * sizeof(union descriptor));
 		free(pldt, M_SUBPROC);
 	} else
 		mtx_unlock_spin(&dt_lock);
 }
 
 /*
  * Note for the authors of compat layers (linux, etc): copyout() in
  * the function below is not a problem since it presents data in
  * arch-specific format (i.e. i386-specific in this case), not in
  * the OS-specific one.
  */
 int
 i386_get_ldt(td, uap)
 	struct thread *td;
 	struct i386_ldt_args *uap;
 {
 	int error = 0;
 	struct proc_ldt *pldt;
 	int nldt, num;
 	union descriptor *lp;
 
 #ifdef	DEBUG
 	printf("i386_get_ldt: start=%d num=%d descs=%p\n",
 	    uap->start, uap->num, (void *)uap->descs);
 #endif
 
 	mtx_lock_spin(&dt_lock);
 	if ((pldt = td->td_proc->p_md.md_ldt) != NULL) {
 		nldt = pldt->ldt_len;
 		lp = &((union descriptor *)(pldt->ldt_base))[uap->start];
 		mtx_unlock_spin(&dt_lock);
 		num = min(uap->num, nldt);
 	} else {
 		mtx_unlock_spin(&dt_lock);
 		nldt = sizeof(ldt)/sizeof(ldt[0]);
 		num = min(uap->num, nldt);
 		lp = &ldt[uap->start];
 	}
 
 	if ((uap->start > (unsigned int)nldt) ||
 	    ((unsigned int)num > (unsigned int)nldt) ||
 	    ((unsigned int)(uap->start + num) > (unsigned int)nldt))
 		return(EINVAL);
 
 	error = copyout(lp, uap->descs, num * sizeof(union descriptor));
 	if (!error)
 		td->td_retval[0] = num;
 
 	return(error);
 }
 
 int
 i386_set_ldt(td, uap, descs)
 	struct thread *td;
 	struct i386_ldt_args *uap;
 	union descriptor *descs;
 {
 	int error = 0, i;
 	int largest_ld;
 	struct mdproc *mdp = &td->td_proc->p_md;
 	struct proc_ldt *pldt;
 	union descriptor *dp;
 
 #ifdef	DEBUG
 	printf("i386_set_ldt: start=%d num=%d descs=%p\n",
 	    uap->start, uap->num, (void *)uap->descs);
 #endif
 
 	if (descs == NULL) {
 		/* Free descriptors */
 		if (uap->start == 0 && uap->num == 0) {
 			/*
 			 * Treat this as a special case, so userland needn't
 			 * know magic number NLDT.
 			 */
 			uap->start = NLDT;
 			uap->num = MAX_LD - NLDT;
 		}
 		if (uap->num == 0)
 			return (EINVAL);
 		mtx_lock_spin(&dt_lock);
 		if ((pldt = mdp->md_ldt) == NULL ||
 		    uap->start >= pldt->ldt_len) {
 			mtx_unlock_spin(&dt_lock);
 			return (0);
 		}
 		largest_ld = uap->start + uap->num;
 		if (largest_ld > pldt->ldt_len)
 			largest_ld = pldt->ldt_len;
 		i = largest_ld - uap->start;
 		bzero(&((union descriptor *)(pldt->ldt_base))[uap->start],
 		    sizeof(union descriptor) * i);
 		mtx_unlock_spin(&dt_lock);
 		return (0);
 	}
 
 	if (!(uap->start == LDT_AUTO_ALLOC && uap->num == 1)) {
 		/* verify range of descriptors to modify */
 		largest_ld = uap->start + uap->num;
 		if (uap->start >= MAX_LD || largest_ld > MAX_LD) {
 			return (EINVAL);
 		}
 	}
 
 	/* Check descriptors for access violations */
 	for (i = 0; i < uap->num; i++) {
 		dp = &descs[i];
 
 		switch (dp->sd.sd_type) {
 		case SDT_SYSNULL:	/* system null */ 
 			dp->sd.sd_p = 0;
 			break;
 		case SDT_SYS286TSS: /* system 286 TSS available */
 		case SDT_SYSLDT:    /* system local descriptor table */
 		case SDT_SYS286BSY: /* system 286 TSS busy */
 		case SDT_SYSTASKGT: /* system task gate */
 		case SDT_SYS286IGT: /* system 286 interrupt gate */
 		case SDT_SYS286TGT: /* system 286 trap gate */
 		case SDT_SYSNULL2:  /* undefined by Intel */ 
 		case SDT_SYS386TSS: /* system 386 TSS available */
 		case SDT_SYSNULL3:  /* undefined by Intel */
 		case SDT_SYS386BSY: /* system 386 TSS busy */
 		case SDT_SYSNULL4:  /* undefined by Intel */ 
 		case SDT_SYS386IGT: /* system 386 interrupt gate */
 		case SDT_SYS386TGT: /* system 386 trap gate */
 		case SDT_SYS286CGT: /* system 286 call gate */ 
 		case SDT_SYS386CGT: /* system 386 call gate */
 			/* I can't think of any reason to allow a user proc
 			 * to create a segment of these types.  They are
 			 * for OS use only.
 			 */
 			return (EACCES);
 			/*NOTREACHED*/
 
 		/* memory segment types */
 		case SDT_MEMEC:   /* memory execute only conforming */
 		case SDT_MEMEAC:  /* memory execute only accessed conforming */
 		case SDT_MEMERC:  /* memory execute read conforming */
 		case SDT_MEMERAC: /* memory execute read accessed conforming */
 			 /* Must be "present" if executable and conforming. */
 			if (dp->sd.sd_p == 0)
 				return (EACCES);
 			break;
 		case SDT_MEMRO:   /* memory read only */
 		case SDT_MEMROA:  /* memory read only accessed */
 		case SDT_MEMRW:   /* memory read write */
 		case SDT_MEMRWA:  /* memory read write accessed */
 		case SDT_MEMROD:  /* memory read only expand dwn limit */
 		case SDT_MEMRODA: /* memory read only expand dwn lim accessed */
 		case SDT_MEMRWD:  /* memory read write expand dwn limit */  
 		case SDT_MEMRWDA: /* memory read write expand dwn lim acessed */
 		case SDT_MEME:    /* memory execute only */ 
 		case SDT_MEMEA:   /* memory execute only accessed */
 		case SDT_MEMER:   /* memory execute read */
 		case SDT_MEMERA:  /* memory execute read accessed */
 			break;
 		default:
 			return(EINVAL);
 			/*NOTREACHED*/
 		}
 
 		/* Only user (ring-3) descriptors may be present. */
 		if ((dp->sd.sd_p != 0) && (dp->sd.sd_dpl != SEL_UPL))
 			return (EACCES);
 	}
 
 	if (uap->start == LDT_AUTO_ALLOC && uap->num == 1) {
 		/* Allocate a free slot */
 		mtx_lock_spin(&dt_lock);
 		if ((pldt = mdp->md_ldt) == NULL) {
 			if ((error = i386_ldt_grow(td, NLDT + 1))) {
 				mtx_unlock_spin(&dt_lock);
 				return (error);
 			}
 			pldt = mdp->md_ldt;
 		}
 again:
 		/*
 		 * start scanning a bit up to leave room for NVidia and
 		 * Wine, which still user the "Blat" method of allocation.
 		 */
 		dp = &((union descriptor *)(pldt->ldt_base))[NLDT];
 		for (i = NLDT; i < pldt->ldt_len; ++i) {
 			if (dp->sd.sd_type == SDT_SYSNULL)
 				break;
 			dp++;
 		}
 		if (i >= pldt->ldt_len) {
 			if ((error = i386_ldt_grow(td, pldt->ldt_len+1))) {
 				mtx_unlock_spin(&dt_lock);
 				return (error);
 			}
 			goto again;
 		}
 		uap->start = i;
 		error = i386_set_ldt_data(td, i, 1, descs);
 		mtx_unlock_spin(&dt_lock);
 	} else {
 		largest_ld = uap->start + uap->num;
 		mtx_lock_spin(&dt_lock);
 		if (!(error = i386_ldt_grow(td, largest_ld))) {
 			error = i386_set_ldt_data(td, uap->start, uap->num,
 			    descs);
 		}
 		mtx_unlock_spin(&dt_lock);
 	}
 	if (error == 0)
 		td->td_retval[0] = uap->start;
 	return (error);
 }
-#ifdef XEN
-static int
-i386_set_ldt_data(struct thread *td, int start, int num,
-	union descriptor *descs)
-{
-	struct mdproc *mdp = &td->td_proc->p_md;
-	struct proc_ldt *pldt = mdp->md_ldt;
 
-	mtx_assert(&dt_lock, MA_OWNED);
-
-	while (num) {
-		xen_update_descriptor(
-		    &((union descriptor *)(pldt->ldt_base))[start],
-		    descs);
-		num--;
-		start++;
-		descs++;
-	}
-	return (0);
-}
-#else
 static int
 i386_set_ldt_data(struct thread *td, int start, int num,
 	union descriptor *descs)
 {
 	struct mdproc *mdp = &td->td_proc->p_md;
 	struct proc_ldt *pldt = mdp->md_ldt;
 
 	mtx_assert(&dt_lock, MA_OWNED);
 
 	/* Fill in range */
 	bcopy(descs,
 	    &((union descriptor *)(pldt->ldt_base))[start],
 	    num * sizeof(union descriptor));
 	return (0);
 }
-#endif /* !XEN */
 
 static int
 i386_ldt_grow(struct thread *td, int len) 
 {
 	struct mdproc *mdp = &td->td_proc->p_md;
 	struct proc_ldt *new_ldt, *pldt;
 	caddr_t old_ldt_base = NULL_LDT_BASE;
 	int old_ldt_len = 0;
 
 	mtx_assert(&dt_lock, MA_OWNED);
 
 	if (len > MAX_LD)
 		return (ENOMEM);
 	if (len < NLDT + 1)
 		len = NLDT + 1;
 
 	/* Allocate a user ldt. */
 	if ((pldt = mdp->md_ldt) == NULL || len > pldt->ldt_len) {
 		new_ldt = user_ldt_alloc(mdp, len);
 		if (new_ldt == NULL)
 			return (ENOMEM);
 		pldt = mdp->md_ldt;
 
 		if (pldt != NULL) {
 			if (new_ldt->ldt_len <= pldt->ldt_len) {
 				/*
 				 * We just lost the race for allocation, so
 				 * free the new object and return.
 				 */
 				mtx_unlock_spin(&dt_lock);
 				kmem_free(kernel_arena,
 				   (vm_offset_t)new_ldt->ldt_base,
 				   new_ldt->ldt_len * sizeof(union descriptor));
 				free(new_ldt, M_SUBPROC);
 				mtx_lock_spin(&dt_lock);
 				return (0);
 			}
 
 			/*
 			 * We have to substitute the current LDT entry for
 			 * curproc with the new one since its size grew.
 			 */
 			old_ldt_base = pldt->ldt_base;
 			old_ldt_len = pldt->ldt_len;
 			pldt->ldt_sd = new_ldt->ldt_sd;
 			pldt->ldt_base = new_ldt->ldt_base;
 			pldt->ldt_len = new_ldt->ldt_len;
 		} else
 			mdp->md_ldt = pldt = new_ldt;
 #ifdef SMP
 		/*
 		 * Signal other cpus to reload ldt.  We need to unlock dt_lock
 		 * here because other CPU will contest on it since their
 		 * curthreads won't hold the lock and will block when trying
 		 * to acquire it.
 		 */
 		mtx_unlock_spin(&dt_lock);
 		smp_rendezvous(NULL, (void (*)(void *))set_user_ldt_rv,
 		    NULL, td->td_proc->p_vmspace);
 #else
 		set_user_ldt(&td->td_proc->p_md);
 		mtx_unlock_spin(&dt_lock);
 #endif
 		if (old_ldt_base != NULL_LDT_BASE) {
 			kmem_free(kernel_arena, (vm_offset_t)old_ldt_base,
 			    old_ldt_len * sizeof(union descriptor));
 			free(new_ldt, M_SUBPROC);
 		}
 		mtx_lock_spin(&dt_lock);
 	}
 	return (0);
 }
Index: head/sys/i386/i386/vm_machdep.c
===================================================================
--- head/sys/i386/i386/vm_machdep.c	(revision 282273)
+++ head/sys/i386/i386/vm_machdep.c	(revision 282274)
@@ -1,927 +1,904 @@
 /*-
  * Copyright (c) 1982, 1986 The Regents of the University of California.
  * Copyright (c) 1989, 1990 William Jolitz
  * Copyright (c) 1994 John Dyson
  * All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * the Systems Programming Group of the University of Utah Computer
  * Science Department, and William Jolitz.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	from: @(#)vm_machdep.c	7.3 (Berkeley) 5/13/91
  *	Utah $Hdr: vm_machdep.c 1.16.1.1 89/06/23$
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_isa.h"
 #include "opt_npx.h"
 #include "opt_reset.h"
 #include "opt_cpu.h"
 #include "opt_xbox.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/bio.h>
 #include <sys/buf.h>
 #include <sys/kernel.h>
 #include <sys/ktr.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/mutex.h>
 #include <sys/pioctl.h>
 #include <sys/proc.h>
 #include <sys/sysent.h>
 #include <sys/sf_buf.h>
 #include <sys/smp.h>
 #include <sys/sched.h>
 #include <sys/sysctl.h>
 #include <sys/unistd.h>
 #include <sys/vnode.h>
 #include <sys/vmmeter.h>
 
 #include <machine/cpu.h>
 #include <machine/cputypes.h>
 #include <machine/md_var.h>
 #include <machine/pcb.h>
 #include <machine/pcb_ext.h>
 #include <machine/smp.h>
 #include <machine/vm86.h>
 
 #ifdef CPU_ELAN
 #include <machine/elan_mmcr.h>
 #endif
 
 #include <vm/vm.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_page.h>
 #include <vm/vm_map.h>
 #include <vm/vm_param.h>
 
-#ifdef XEN
-#include <xen/hypervisor.h>
-#endif
 #ifdef PC98
 #include <pc98/cbus/cbus.h>
 #else
 #include <isa/isareg.h>
 #endif
 
 #ifdef XBOX
 #include <machine/xbox.h>
 #endif
 
 #ifndef NSFBUFS
 #define	NSFBUFS		(512 + maxusers * 16)
 #endif
 
 #if !defined(CPU_DISABLE_SSE) && defined(I686_CPU)
 #define CPU_ENABLE_SSE
 #endif
 
 _Static_assert(OFFSETOF_CURTHREAD == offsetof(struct pcpu, pc_curthread),
     "OFFSETOF_CURTHREAD does not correspond with offset of pc_curthread.");
 _Static_assert(OFFSETOF_CURPCB == offsetof(struct pcpu, pc_curpcb),
     "OFFSETOF_CURPCB does not correspond with offset of pc_curpcb.");
 
 static void	cpu_reset_real(void);
 #ifdef SMP
 static void	cpu_reset_proxy(void);
 static u_int	cpu_reset_proxyid;
 static volatile u_int	cpu_reset_proxy_active;
 #endif
 
 union savefpu *
 get_pcb_user_save_td(struct thread *td)
 {
 	vm_offset_t p;
 
 	p = td->td_kstack + td->td_kstack_pages * PAGE_SIZE -
 	    cpu_max_ext_state_size;
 	KASSERT((p % 64) == 0, ("Unaligned pcb_user_save area"));
 	return ((union savefpu *)p);
 }
 
 union savefpu *
 get_pcb_user_save_pcb(struct pcb *pcb)
 {
 	vm_offset_t p;
 
 	p = (vm_offset_t)(pcb + 1);
 	return ((union savefpu *)p);
 }
 
 struct pcb *
 get_pcb_td(struct thread *td)
 {
 	vm_offset_t p;
 
 	p = td->td_kstack + td->td_kstack_pages * PAGE_SIZE -
 	    cpu_max_ext_state_size - sizeof(struct pcb);
 	return ((struct pcb *)p);
 }
 
 void *
 alloc_fpusave(int flags)
 {
 	void *res;
 #ifdef CPU_ENABLE_SSE
 	struct savefpu_ymm *sf;
 #endif
 
 	res = malloc(cpu_max_ext_state_size, M_DEVBUF, flags);
 #ifdef CPU_ENABLE_SSE
 	if (use_xsave) {
 		sf = (struct savefpu_ymm *)res;
 		bzero(&sf->sv_xstate.sx_hd, sizeof(sf->sv_xstate.sx_hd));
 		sf->sv_xstate.sx_hd.xstate_bv = xsave_mask;
 	}
 #endif
 	return (res);
 }
 /*
  * Finish a fork operation, with process p2 nearly set up.
  * Copy and update the pcb, set up the stack so that the child
  * ready to run and return to user mode.
  */
 void
 cpu_fork(td1, p2, td2, flags)
 	register struct thread *td1;
 	register struct proc *p2;
 	struct thread *td2;
 	int flags;
 {
 	register struct proc *p1;
 	struct pcb *pcb2;
 	struct mdproc *mdp2;
 
 	p1 = td1->td_proc;
 	if ((flags & RFPROC) == 0) {
 		if ((flags & RFMEM) == 0) {
 			/* unshare user LDT */
 			struct mdproc *mdp1 = &p1->p_md;
 			struct proc_ldt *pldt, *pldt1;
 
 			mtx_lock_spin(&dt_lock);
 			if ((pldt1 = mdp1->md_ldt) != NULL &&
 			    pldt1->ldt_refcnt > 1) {
 				pldt = user_ldt_alloc(mdp1, pldt1->ldt_len);
 				if (pldt == NULL)
 					panic("could not copy LDT");
 				mdp1->md_ldt = pldt;
 				set_user_ldt(mdp1);
 				user_ldt_deref(pldt1);
 			} else
 				mtx_unlock_spin(&dt_lock);
 		}
 		return;
 	}
 
 	/* Ensure that td1's pcb is up to date. */
 	if (td1 == curthread)
 		td1->td_pcb->pcb_gs = rgs();
 #ifdef DEV_NPX
 	critical_enter();
 	if (PCPU_GET(fpcurthread) == td1)
 		npxsave(td1->td_pcb->pcb_save);
 	critical_exit();
 #endif
 
 	/* Point the pcb to the top of the stack */
 	pcb2 = get_pcb_td(td2);
 	td2->td_pcb = pcb2;
 
 	/* Copy td1's pcb */
 	bcopy(td1->td_pcb, pcb2, sizeof(*pcb2));
 
 	/* Properly initialize pcb_save */
 	pcb2->pcb_save = get_pcb_user_save_pcb(pcb2);
 	bcopy(get_pcb_user_save_td(td1), get_pcb_user_save_pcb(pcb2),
 	    cpu_max_ext_state_size);
 
 	/* Point mdproc and then copy over td1's contents */
 	mdp2 = &p2->p_md;
 	bcopy(&p1->p_md, mdp2, sizeof(*mdp2));
 
 	/*
 	 * Create a new fresh stack for the new process.
 	 * Copy the trap frame for the return to user mode as if from a
 	 * syscall.  This copies most of the user mode register values.
 	 * The -16 is so we can expand the trapframe if we go to vm86.
 	 */
 	td2->td_frame = (struct trapframe *)((caddr_t)td2->td_pcb - 16) - 1;
 	bcopy(td1->td_frame, td2->td_frame, sizeof(struct trapframe));
 
 	td2->td_frame->tf_eax = 0;		/* Child returns zero */
 	td2->td_frame->tf_eflags &= ~PSL_C;	/* success */
 	td2->td_frame->tf_edx = 1;
 
 	/*
 	 * If the parent process has the trap bit set (i.e. a debugger had
 	 * single stepped the process to the system call), we need to clear
 	 * the trap flag from the new frame unless the debugger had set PF_FORK
 	 * on the parent.  Otherwise, the child will receive a (likely
 	 * unexpected) SIGTRAP when it executes the first instruction after
 	 * returning  to userland.
 	 */
 	if ((p1->p_pfsflags & PF_FORK) == 0)
 		td2->td_frame->tf_eflags &= ~PSL_T;
 
 	/*
 	 * Set registers for trampoline to user mode.  Leave space for the
 	 * return address on stack.  These are the kernel mode register values.
 	 */
 #if defined(PAE) || defined(PAE_TABLES)
 	pcb2->pcb_cr3 = vtophys(vmspace_pmap(p2->p_vmspace)->pm_pdpt);
 #else
 	pcb2->pcb_cr3 = vtophys(vmspace_pmap(p2->p_vmspace)->pm_pdir);
 #endif
 	pcb2->pcb_edi = 0;
 	pcb2->pcb_esi = (int)fork_return;	/* fork_trampoline argument */
 	pcb2->pcb_ebp = 0;
 	pcb2->pcb_esp = (int)td2->td_frame - sizeof(void *);
 	pcb2->pcb_ebx = (int)td2;		/* fork_trampoline argument */
 	pcb2->pcb_eip = (int)fork_trampoline;
 	pcb2->pcb_psl = PSL_KERNEL;		/* ints disabled */
 	/*-
 	 * pcb2->pcb_dr*:	cloned above.
 	 * pcb2->pcb_savefpu:	cloned above.
 	 * pcb2->pcb_flags:	cloned above.
 	 * pcb2->pcb_onfault:	cloned above (always NULL here?).
 	 * pcb2->pcb_gs:	cloned above.
 	 * pcb2->pcb_ext:	cleared below.
 	 */
 
 	/*
 	 * XXX don't copy the i/o pages.  this should probably be fixed.
 	 */
 	pcb2->pcb_ext = 0;
 
 	/* Copy the LDT, if necessary. */
 	mtx_lock_spin(&dt_lock);
 	if (mdp2->md_ldt != NULL) {
 		if (flags & RFMEM) {
 			mdp2->md_ldt->ldt_refcnt++;
 		} else {
 			mdp2->md_ldt = user_ldt_alloc(mdp2,
 			    mdp2->md_ldt->ldt_len);
 			if (mdp2->md_ldt == NULL)
 				panic("could not copy LDT");
 		}
 	}
 	mtx_unlock_spin(&dt_lock);
 
 	/* Setup to release spin count in fork_exit(). */
 	td2->td_md.md_spinlock_count = 1;
-	/*
-	 * XXX XEN need to check on PSL_USER is handled
-	 */
 	td2->td_md.md_saved_flags = PSL_KERNEL | PSL_I;
+
 	/*
 	 * Now, cpu_switch() can schedule the new process.
 	 * pcb_esp is loaded pointing to the cpu_switch() stack frame
 	 * containing the return address when exiting cpu_switch.
 	 * This will normally be to fork_trampoline(), which will have
 	 * %ebx loaded with the new proc's pointer.  fork_trampoline()
 	 * will set up a stack to call fork_return(p, frame); to complete
 	 * the return to user-mode.
 	 */
 }
 
 /*
  * Intercept the return address from a freshly forked process that has NOT
  * been scheduled yet.
  *
  * This is needed to make kernel threads stay in kernel mode.
  */
 void
 cpu_set_fork_handler(td, func, arg)
 	struct thread *td;
 	void (*func)(void *);
 	void *arg;
 {
 	/*
 	 * Note that the trap frame follows the args, so the function
 	 * is really called like this:  func(arg, frame);
 	 */
 	td->td_pcb->pcb_esi = (int) func;	/* function */
 	td->td_pcb->pcb_ebx = (int) arg;	/* first arg */
 }
 
 void
 cpu_exit(struct thread *td)
 {
 
 	/*
 	 * If this process has a custom LDT, release it.  Reset pc->pcb_gs
 	 * and %gs before we free it in case they refer to an LDT entry.
 	 */
 	mtx_lock_spin(&dt_lock);
 	if (td->td_proc->p_md.md_ldt) {
 		td->td_pcb->pcb_gs = _udatasel;
 		load_gs(_udatasel);
 		user_ldt_free(td);
 	} else
 		mtx_unlock_spin(&dt_lock);
 }
 
 void
 cpu_thread_exit(struct thread *td)
 {
 
 #ifdef DEV_NPX
 	critical_enter();
 	if (td == PCPU_GET(fpcurthread))
 		npxdrop();
 	critical_exit();
 #endif
 
 	/* Disable any hardware breakpoints. */
 	if (td->td_pcb->pcb_flags & PCB_DBREGS) {
 		reset_dbregs();
 		td->td_pcb->pcb_flags &= ~PCB_DBREGS;
 	}
 }
 
 void
 cpu_thread_clean(struct thread *td)
 {
 	struct pcb *pcb;
 
 	pcb = td->td_pcb; 
 	if (pcb->pcb_ext != NULL) {
 		/* if (pcb->pcb_ext->ext_refcount-- == 1) ?? */
 		/*
 		 * XXX do we need to move the TSS off the allocated pages
 		 * before freeing them?  (not done here)
 		 */
 		kmem_free(kernel_arena, (vm_offset_t)pcb->pcb_ext,
 		    ctob(IOPAGES + 1));
 		pcb->pcb_ext = NULL;
 	}
 }
 
 void
 cpu_thread_swapin(struct thread *td)
 {
 }
 
 void
 cpu_thread_swapout(struct thread *td)
 {
 }
 
 void
 cpu_thread_alloc(struct thread *td)
 {
 	struct pcb *pcb;
 #ifdef CPU_ENABLE_SSE
 	struct xstate_hdr *xhdr;
 #endif
 
 	td->td_pcb = pcb = get_pcb_td(td);
 	td->td_frame = (struct trapframe *)((caddr_t)pcb - 16) - 1;
 	pcb->pcb_ext = NULL; 
 	pcb->pcb_save = get_pcb_user_save_pcb(pcb);
 #ifdef CPU_ENABLE_SSE
 	if (use_xsave) {
 		xhdr = (struct xstate_hdr *)(pcb->pcb_save + 1);
 		bzero(xhdr, sizeof(*xhdr));
 		xhdr->xstate_bv = xsave_mask;
 	}
 #endif
 }
 
 void
 cpu_thread_free(struct thread *td)
 {
 
 	cpu_thread_clean(td);
 }
 
 void
 cpu_set_syscall_retval(struct thread *td, int error)
 {
 
 	switch (error) {
 	case 0:
 		td->td_frame->tf_eax = td->td_retval[0];
 		td->td_frame->tf_edx = td->td_retval[1];
 		td->td_frame->tf_eflags &= ~PSL_C;
 		break;
 
 	case ERESTART:
 		/*
 		 * Reconstruct pc, assuming lcall $X,y is 7 bytes, int
 		 * 0x80 is 2 bytes. We saved this in tf_err.
 		 */
 		td->td_frame->tf_eip -= td->td_frame->tf_err;
 		break;
 
 	case EJUSTRETURN:
 		break;
 
 	default:
 		if (td->td_proc->p_sysent->sv_errsize) {
 			if (error >= td->td_proc->p_sysent->sv_errsize)
 				error = -1;	/* XXX */
 			else
 				error = td->td_proc->p_sysent->sv_errtbl[error];
 		}
 		td->td_frame->tf_eax = error;
 		td->td_frame->tf_eflags |= PSL_C;
 		break;
 	}
 }
 
 /*
  * Initialize machine state (pcb and trap frame) for a new thread about to
  * upcall. Put enough state in the new thread's PCB to get it to go back 
  * userret(), where we can intercept it again to set the return (upcall)
  * Address and stack, along with those from upcals that are from other sources
  * such as those generated in thread_userret() itself.
  */
 void
 cpu_set_upcall(struct thread *td, struct thread *td0)
 {
 	struct pcb *pcb2;
 
 	/* Point the pcb to the top of the stack. */
 	pcb2 = td->td_pcb;
 
 	/*
 	 * Copy the upcall pcb.  This loads kernel regs.
 	 * Those not loaded individually below get their default
 	 * values here.
 	 */
 	bcopy(td0->td_pcb, pcb2, sizeof(*pcb2));
 	pcb2->pcb_flags &= ~(PCB_NPXINITDONE | PCB_NPXUSERINITDONE |
 	    PCB_KERNNPX);
 	pcb2->pcb_save = get_pcb_user_save_pcb(pcb2);
 	bcopy(get_pcb_user_save_td(td0), pcb2->pcb_save,
 	    cpu_max_ext_state_size);
 
 	/*
 	 * Create a new fresh stack for the new thread.
 	 */
 	bcopy(td0->td_frame, td->td_frame, sizeof(struct trapframe));
 
 	/* If the current thread has the trap bit set (i.e. a debugger had
 	 * single stepped the process to the system call), we need to clear
 	 * the trap flag from the new frame. Otherwise, the new thread will
 	 * receive a (likely unexpected) SIGTRAP when it executes the first
 	 * instruction after returning to userland.
 	 */
 	td->td_frame->tf_eflags &= ~PSL_T;
 
 	/*
 	 * Set registers for trampoline to user mode.  Leave space for the
 	 * return address on stack.  These are the kernel mode register values.
 	 */
 	pcb2->pcb_edi = 0;
 	pcb2->pcb_esi = (int)fork_return;		    /* trampoline arg */
 	pcb2->pcb_ebp = 0;
 	pcb2->pcb_esp = (int)td->td_frame - sizeof(void *); /* trampoline arg */
 	pcb2->pcb_ebx = (int)td;			    /* trampoline arg */
 	pcb2->pcb_eip = (int)fork_trampoline;
 	pcb2->pcb_psl &= ~(PSL_I);	/* interrupts must be disabled */
 	pcb2->pcb_gs = rgs();
 	/*
 	 * If we didn't copy the pcb, we'd need to do the following registers:
 	 * pcb2->pcb_cr3:	cloned above.
 	 * pcb2->pcb_dr*:	cloned above.
 	 * pcb2->pcb_savefpu:	cloned above.
 	 * pcb2->pcb_flags:	cloned above.
 	 * pcb2->pcb_onfault:	cloned above (always NULL here?).
 	 * pcb2->pcb_gs:	cloned above.
 	 * pcb2->pcb_ext:	cleared below.
 	 */
 	pcb2->pcb_ext = NULL;
 
 	/* Setup to release spin count in fork_exit(). */
 	td->td_md.md_spinlock_count = 1;
 	td->td_md.md_saved_flags = PSL_KERNEL | PSL_I;
 }
 
 /*
  * Set that machine state for performing an upcall that has to
  * be done in thread_userret() so that those upcalls generated
  * in thread_userret() itself can be done as well.
  */
 void
 cpu_set_upcall_kse(struct thread *td, void (*entry)(void *), void *arg,
 	stack_t *stack)
 {
 
 	/* 
 	 * Do any extra cleaning that needs to be done.
 	 * The thread may have optional components
 	 * that are not present in a fresh thread.
 	 * This may be a recycled thread so make it look
 	 * as though it's newly allocated.
 	 */
 	cpu_thread_clean(td);
 
 	/*
 	 * Set the trap frame to point at the beginning of the uts
 	 * function.
 	 */
 	td->td_frame->tf_ebp = 0; 
 	td->td_frame->tf_esp =
 	    (((int)stack->ss_sp + stack->ss_size - 4) & ~0x0f) - 4;
 	td->td_frame->tf_eip = (int)entry;
 
 	/*
 	 * Pass the address of the mailbox for this kse to the uts
 	 * function as a parameter on the stack.
 	 */
 	suword((void *)(td->td_frame->tf_esp + sizeof(void *)),
 	    (int)arg);
 }
 
 int
 cpu_set_user_tls(struct thread *td, void *tls_base)
 {
 	struct segment_descriptor sd;
 	uint32_t base;
 
 	/*
 	 * Construct a descriptor and store it in the pcb for
 	 * the next context switch.  Also store it in the gdt
 	 * so that the load of tf_fs into %fs will activate it
 	 * at return to userland.
 	 */
 	base = (uint32_t)tls_base;
 	sd.sd_lobase = base & 0xffffff;
 	sd.sd_hibase = (base >> 24) & 0xff;
 	sd.sd_lolimit = 0xffff;	/* 4GB limit, wraps around */
 	sd.sd_hilimit = 0xf;
 	sd.sd_type  = SDT_MEMRWA;
 	sd.sd_dpl   = SEL_UPL;
 	sd.sd_p     = 1;
 	sd.sd_xx    = 0;
 	sd.sd_def32 = 1;
 	sd.sd_gran  = 1;
 	critical_enter();
 	/* set %gs */
 	td->td_pcb->pcb_gsd = sd;
 	if (td == curthread) {
 		PCPU_GET(fsgs_gdt)[1] = sd;
 		load_gs(GSEL(GUGS_SEL, SEL_UPL));
 	}
 	critical_exit();
 	return (0);
 }
 
 /*
  * Convert kernel VA to physical address
  */
 vm_paddr_t
 kvtop(void *addr)
 {
 	vm_paddr_t pa;
 
 	pa = pmap_kextract((vm_offset_t)addr);
 	if (pa == 0)
 		panic("kvtop: zero page frame");
 	return (pa);
 }
 
 #ifdef SMP
 static void
 cpu_reset_proxy()
 {
 	cpuset_t tcrp;
 
 	cpu_reset_proxy_active = 1;
 	while (cpu_reset_proxy_active == 1)
 		;	/* Wait for other cpu to see that we've started */
 	CPU_SETOF(cpu_reset_proxyid, &tcrp);
 	stop_cpus(tcrp);
 	printf("cpu_reset_proxy: Stopped CPU %d\n", cpu_reset_proxyid);
 	DELAY(1000000);
 	cpu_reset_real();
 }
 #endif
 
 void
 cpu_reset()
 {
 #ifdef XBOX
 	if (arch_i386_is_xbox) {
 		/* Kick the PIC16L, it can reboot the box */
 		pic16l_reboot();
 		for (;;);
 	}
 #endif
 
 #ifdef SMP
 	cpuset_t map;
 	u_int cnt;
 
 	if (smp_started) {
 		map = all_cpus;
 		CPU_CLR(PCPU_GET(cpuid), &map);
 		CPU_NAND(&map, &stopped_cpus);
 		if (!CPU_EMPTY(&map)) {
 			printf("cpu_reset: Stopping other CPUs\n");
 			stop_cpus(map);
 		}
 
 		if (PCPU_GET(cpuid) != 0) {
 			cpu_reset_proxyid = PCPU_GET(cpuid);
 			cpustop_restartfunc = cpu_reset_proxy;
 			cpu_reset_proxy_active = 0;
 			printf("cpu_reset: Restarting BSP\n");
 
 			/* Restart CPU #0. */
 			/* XXX: restart_cpus(1 << 0); */
 			CPU_SETOF(0, &started_cpus);
 			wmb();
 
 			cnt = 0;
 			while (cpu_reset_proxy_active == 0 && cnt < 10000000)
 				cnt++;	/* Wait for BSP to announce restart */
 			if (cpu_reset_proxy_active == 0)
 				printf("cpu_reset: Failed to restart BSP\n");
 			enable_intr();
 			cpu_reset_proxy_active = 2;
 
 			while (1);
 			/* NOTREACHED */
 		}
 
 		DELAY(1000000);
 	}
 #endif
 	cpu_reset_real();
 	/* NOTREACHED */
 }
 
 static void
 cpu_reset_real()
 {
 	struct region_descriptor null_idt;
 #ifndef PC98
 	int b;
 #endif
 
 	disable_intr();
-#ifdef XEN
-	if (smp_processor_id() == 0)
-		HYPERVISOR_shutdown(SHUTDOWN_reboot);
-	else
-		HYPERVISOR_shutdown(SHUTDOWN_poweroff);
-#endif 
 #ifdef CPU_ELAN
 	if (elan_mmcr != NULL)
 		elan_mmcr->RESCFG = 1;
 #endif
 
 	if (cpu == CPU_GEODE1100) {
 		/* Attempt Geode's own reset */
 		outl(0xcf8, 0x80009044ul);
 		outl(0xcfc, 0xf);
 	}
 
 #ifdef PC98
 	/*
 	 * Attempt to do a CPU reset via CPU reset port.
 	 */
 	if ((inb(0x35) & 0xa0) != 0xa0) {
 		outb(0x37, 0x0f);		/* SHUT0 = 0. */
 		outb(0x37, 0x0b);		/* SHUT1 = 0. */
 	}
 	outb(0xf0, 0x00);		/* Reset. */
 #else
 #if !defined(BROKEN_KEYBOARD_RESET)
 	/*
 	 * Attempt to do a CPU reset via the keyboard controller,
 	 * do not turn off GateA20, as any machine that fails
 	 * to do the reset here would then end up in no man's land.
 	 */
 	outb(IO_KBD + 4, 0xFE);
 	DELAY(500000);	/* wait 0.5 sec to see if that did it */
 #endif
 
 	/*
 	 * Attempt to force a reset via the Reset Control register at
 	 * I/O port 0xcf9.  Bit 2 forces a system reset when it
 	 * transitions from 0 to 1.  Bit 1 selects the type of reset
 	 * to attempt: 0 selects a "soft" reset, and 1 selects a
 	 * "hard" reset.  We try a "hard" reset.  The first write sets
 	 * bit 1 to select a "hard" reset and clears bit 2.  The
 	 * second write forces a 0 -> 1 transition in bit 2 to trigger
 	 * a reset.
 	 */
 	outb(0xcf9, 0x2);
 	outb(0xcf9, 0x6);
 	DELAY(500000);  /* wait 0.5 sec to see if that did it */
 
 	/*
 	 * Attempt to force a reset via the Fast A20 and Init register
 	 * at I/O port 0x92.  Bit 1 serves as an alternate A20 gate.
 	 * Bit 0 asserts INIT# when set to 1.  We are careful to only
 	 * preserve bit 1 while setting bit 0.  We also must clear bit
 	 * 0 before setting it if it isn't already clear.
 	 */
 	b = inb(0x92);
 	if (b != 0xff) {
 		if ((b & 0x1) != 0)
 			outb(0x92, b & 0xfe);
 		outb(0x92, b | 0x1);
 		DELAY(500000);  /* wait 0.5 sec to see if that did it */
 	}
 #endif /* PC98 */
 
 	printf("No known reset method worked, attempting CPU shutdown\n");
 	DELAY(1000000); /* wait 1 sec for printf to complete */
 
 	/* Wipe the IDT. */
 	null_idt.rd_limit = 0;
 	null_idt.rd_base = 0;
 	lidt(&null_idt);
 
 	/* "good night, sweet prince .... <THUNK!>" */
 	breakpoint();
 
 	/* NOTREACHED */
 	while(1);
 }
 
 /*
  * Get an sf_buf from the freelist.  May block if none are available.
  */
 void
 sf_buf_map(struct sf_buf *sf, int flags)
 {
 	pt_entry_t opte, *ptep;
 
 	/*
 	 * Update the sf_buf's virtual-to-physical mapping, flushing the
 	 * virtual address from the TLB.  Since the reference count for 
 	 * the sf_buf's old mapping was zero, that mapping is not 
 	 * currently in use.  Consequently, there is no need to exchange 
 	 * the old and new PTEs atomically, even under PAE.
 	 */
 	ptep = vtopte(sf->kva);
 	opte = *ptep;
-#ifdef XEN
-       PT_SET_MA(sf->kva, xpmap_ptom(VM_PAGE_TO_PHYS(sf->m)) | pgeflag
-	   | PG_RW | PG_V | pmap_cache_bits(sf->m->md.pat_mode, 0));
-#else
 	*ptep = VM_PAGE_TO_PHYS(sf->m) | pgeflag | PG_RW | PG_V |
 	    pmap_cache_bits(sf->m->md.pat_mode, 0);
-#endif
 
 	/*
 	 * Avoid unnecessary TLB invalidations: If the sf_buf's old
 	 * virtual-to-physical mapping was not used, then any processor
 	 * that has invalidated the sf_buf's virtual address from its TLB
 	 * since the last used mapping need not invalidate again.
 	 */
 #ifdef SMP
 	if ((opte & (PG_V | PG_A)) ==  (PG_V | PG_A))
 		CPU_ZERO(&sf->cpumask);
 
 	sf_buf_shootdown(sf, flags);
 #else
 	if ((opte & (PG_V | PG_A)) ==  (PG_V | PG_A))
 		pmap_invalidate_page(kernel_pmap, sf->kva);
 #endif
 }
 
 #ifdef SMP
 void
 sf_buf_shootdown(struct sf_buf *sf, int flags)
 {
 	cpuset_t other_cpus;
 	u_int cpuid;
 
 	sched_pin();
 	cpuid = PCPU_GET(cpuid);
 	if (!CPU_ISSET(cpuid, &sf->cpumask)) {
 		CPU_SET(cpuid, &sf->cpumask);
 		invlpg(sf->kva);
 	}
 	if ((flags & SFB_CPUPRIVATE) == 0) {
 		other_cpus = all_cpus;
 		CPU_CLR(cpuid, &other_cpus);
 		CPU_NAND(&other_cpus, &sf->cpumask);
 		if (!CPU_EMPTY(&other_cpus)) {
 			CPU_OR(&sf->cpumask, &other_cpus);
 			smp_masked_invlpg(other_cpus, sf->kva);
 		}
 	}
 	sched_unpin();
 }
 #endif
 
 /*
  * MD part of sf_buf_free().
  */
 int
 sf_buf_unmap(struct sf_buf *sf)
 {
-#ifdef XEN
-	/*
-	 * Xen doesn't like having dangling R/W mappings
-	 */
-	pmap_qremove(sf->kva, 1);
-	return (1);
-#else
+
 	return (0);
-#endif
 }
 
 static void
 sf_buf_invalidate(struct sf_buf *sf)
 {
 	vm_page_t m = sf->m;
 
 	/*
 	 * Use pmap_qenter to update the pte for
 	 * existing mapping, in particular, the PAT
 	 * settings are recalculated.
 	 */
 	pmap_qenter(sf->kva, &m, 1);
 	pmap_invalidate_cache_range(sf->kva, sf->kva + PAGE_SIZE, FALSE);
 }
 
 /*
  * Invalidate the cache lines that may belong to the page, if
  * (possibly old) mapping of the page by sf buffer exists.  Returns
  * TRUE when mapping was found and cache invalidated.
  */
 boolean_t
 sf_buf_invalidate_cache(vm_page_t m)
 {
 
 	return (sf_buf_process_page(m, sf_buf_invalidate));
 }
 
 /*
  * Software interrupt handler for queued VM system processing.
  */   
 void  
 swi_vm(void *dummy) 
 {     
 	if (busdma_swi_pending != 0)
 		busdma_swi();
 }
 
 /*
  * Tell whether this address is in some physical memory region.
  * Currently used by the kernel coredump code in order to avoid
  * dumping the ``ISA memory hole'' which could cause indefinite hangs,
  * or other unpredictable behaviour.
  */
 
 int
 is_physical_memory(vm_paddr_t addr)
 {
 
 #ifdef DEV_ISA
 	/* The ISA ``memory hole''. */
 	if (addr >= 0xa0000 && addr < 0x100000)
 		return 0;
 #endif
 
 	/*
 	 * stuff other tests for known memory-mapped devices (PCI?)
 	 * here
 	 */
 
 	return 1;
 }
Index: head/sys/i386/include/asmacros.h
===================================================================
--- head/sys/i386/include/asmacros.h	(revision 282273)
+++ head/sys/i386/include/asmacros.h	(revision 282274)
@@ -1,238 +1,207 @@
 /*-
  * Copyright (c) 1993 The Regents of the University of California.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef _MACHINE_ASMACROS_H_
 #define _MACHINE_ASMACROS_H_
 
 #include <sys/cdefs.h>
 
 /* XXX too much duplication in various asm*.h's. */
 
 /*
  * CNAME is used to manage the relationship between symbol names in C
  * and the equivalent assembly language names.  CNAME is given a name as
  * it would be used in a C program.  It expands to the equivalent assembly
  * language name.
  */
 #define CNAME(csym)		csym
 
 #define ALIGN_DATA	.p2align 2	/* 4 byte alignment, zero filled */
 #ifdef GPROF
 #define ALIGN_TEXT	.p2align 4,0x90	/* 16-byte alignment, nop filled */
 #else
 #define ALIGN_TEXT	.p2align 2,0x90	/* 4-byte alignment, nop filled */
 #endif
 #define SUPERALIGN_TEXT	.p2align 4,0x90	/* 16-byte alignment, nop filled */
 
 #define GEN_ENTRY(name)		ALIGN_TEXT; .globl CNAME(name); \
 				.type CNAME(name),@function; CNAME(name):
 #define NON_GPROF_ENTRY(name)	GEN_ENTRY(name)
 #define NON_GPROF_RET		.byte 0xc3	/* opcode for `ret' */
 
 #define	END(name)		.size name, . - name
 
 #ifdef GPROF
 /*
  * __mcount is like [.]mcount except that doesn't require its caller to set
  * up a frame pointer.  It must be called before pushing anything onto the
  * stack.  gcc should eventually generate code to call __mcount in most
  * cases.  This would make -pg in combination with -fomit-frame-pointer
  * useful.  gcc has a configuration variable PROFILE_BEFORE_PROLOGUE to
  * allow profiling before setting up the frame pointer, but this is
  * inadequate for good handling of special cases, e.g., -fpic works best
  * with profiling after the prologue.
  *
  * [.]mexitcount is a new function to support non-statistical profiling if an
  * accurate clock is available.  For C sources, calls to it are generated
  * by the FreeBSD extension `-mprofiler-epilogue' to gcc.  It is best to
  * call [.]mexitcount at the end of a function like the MEXITCOUNT macro does,
  * but gcc currently generates calls to it at the start of the epilogue to
  * avoid problems with -fpic.
  *
  * [.]mcount and __mcount may clobber the call-used registers and %ef.
  * [.]mexitcount may clobber %ecx and %ef.
  *
  * Cross-jumping makes non-statistical profiling timing more complicated.
  * It is handled in many cases by calling [.]mexitcount before jumping.  It
  * is handled for conditional jumps using CROSSJUMP() and CROSSJUMP_LABEL().
  * It is handled for some fault-handling jumps by not sharing the exit
  * routine.
  *
  * ALTENTRY() must be before a corresponding ENTRY() so that it can jump to
  * the main entry point.  Note that alt entries are counted twice.  They
  * have to be counted as ordinary entries for gprof to get the call times
  * right for the ordinary entries.
  *
  * High local labels are used in macros to avoid clashes with local labels
  * in functions.
  *
  * Ordinary `ret' is used instead of a macro `RET' because there are a lot
  * of `ret's.  0xc3 is the opcode for `ret' (`#define ret ... ret' can't
  * be used because this file is sometimes preprocessed in traditional mode).
  * `ret' clobbers eflags but this doesn't matter.
  */
 #define ALTENTRY(name)		GEN_ENTRY(name) ; MCOUNT ; MEXITCOUNT ; jmp 9f
 #define	CROSSJUMP(jtrue, label, jfalse) \
 	jfalse 8f; MEXITCOUNT; jmp __CONCAT(to,label); 8:
 #define CROSSJUMPTARGET(label) \
 	ALIGN_TEXT; __CONCAT(to,label): ; MCOUNT; jmp label
 #define ENTRY(name)		GEN_ENTRY(name) ; 9: ; MCOUNT
 #define FAKE_MCOUNT(caller)	pushl caller ; call __mcount ; popl %ecx
 #define MCOUNT			call __mcount
 #define MCOUNT_LABEL(name)	GEN_ENTRY(name) ; nop ; ALIGN_TEXT
 #ifdef GUPROF
 #define MEXITCOUNT		call .mexitcount
 #define ret			MEXITCOUNT ; NON_GPROF_RET
 #else
 #define MEXITCOUNT
 #endif
 
 #else /* !GPROF */
 /*
  * ALTENTRY() has to align because it is before a corresponding ENTRY().
  * ENTRY() has to align to because there may be no ALTENTRY() before it.
  * If there is a previous ALTENTRY() then the alignment code for ENTRY()
  * is empty.
  */
 #define ALTENTRY(name)		GEN_ENTRY(name)
 #define	CROSSJUMP(jtrue, label, jfalse)	jtrue label
 #define	CROSSJUMPTARGET(label)
 #define ENTRY(name)		GEN_ENTRY(name)
 #define FAKE_MCOUNT(caller)
 #define MCOUNT
 #define MCOUNT_LABEL(name)
 #define MEXITCOUNT
 #endif /* GPROF */
 
 #ifdef LOCORE
 /*
  * Convenience macro for declaring interrupt entry points.
  */
 #define	IDTVEC(name)	ALIGN_TEXT; .globl __CONCAT(X,name); \
 			.type __CONCAT(X,name),@function; __CONCAT(X,name):
 
 /*
  * Macros to create and destroy a trap frame.
  */
 #define	PUSH_FRAME							\
 	pushl	$0 ;		/* dummy error code */			\
 	pushl	$0 ;		/* dummy trap type */			\
 	pushal ;		/* 8 ints */				\
 	pushl	%ds ;		/* save data and extra segments ... */	\
 	pushl	%es ;							\
 	pushl	%fs
 	
 #define	POP_FRAME							\
 	popl	%fs ;							\
 	popl	%es ;							\
 	popl	%ds ;							\
 	popal ;								\
 	addl	$4+4,%esp
 
 /*
  * Access per-CPU data.
  */
 #define	PCPU(member)	%fs:PC_ ## member
 
 #define	PCPU_ADDR(member, reg)						\
 	movl %fs:PC_PRVSPACE, reg ;					\
 	addl $PC_ ## member, reg
 
 /*
  * Setup the kernel segment registers.
  */
 #define	SET_KERNEL_SREGS						\
 	movl	$KDSEL, %eax ;	/* reload with kernel's data segment */	\
 	movl	%eax, %ds ;						\
 	movl	%eax, %es ;						\
 	movl	$KPSEL, %eax ;	/* reload with per-CPU data segment */	\
 	movl	%eax, %fs
 
-#ifdef XEN
-#define LOAD_CR3(reg)          \
-        movl    reg,PCPU(CR3); \
-        pushl   %ecx ;         \
-        pushl   %edx ;         \
-        pushl   %esi ;         \
-        pushl   reg ;          \
-        call    xen_load_cr3 ;     \
-        addl    $4,%esp ;      \
-        popl    %esi ;         \
-        popl    %edx ;         \
-        popl    %ecx ;         \
- 
-#define READ_CR3(reg)   movl PCPU(CR3),reg;
-#define LLDT(arg)                 \
-        pushl   %edx ;                    \
-        pushl   %eax ;                    \
-        xorl    %eax,%eax ;               \
-        movl    %eax,%gs ;                \
-        call    i386_reset_ldt ;          \
-        popl    %eax ;                    \
-        popl    %edx 
-#define CLI             call ni_cli
-#else
-#define LOAD_CR3(reg)   movl reg,%cr3; 
-#define READ_CR3(reg)   movl %cr3,reg; 
-#define LLDT(arg)       lldt arg; 
-#define CLI             cli 
-#endif /* !XEN */ 
-
-
 #endif /* LOCORE */
 
 #ifdef __STDC__
 #define ELFNOTE(name, type, desctype, descdata...) \
 .pushsection .note.name                 ;       \
   .align 4                              ;       \
   .long 2f - 1f         /* namesz */    ;       \
   .long 4f - 3f         /* descsz */    ;       \
   .long type                            ;       \
 1:.asciz #name                          ;       \
 2:.align 4                              ;       \
 3:desctype descdata                     ;       \
 4:.align 4                              ;       \
 .popsection
 #else /* !__STDC__, i.e. -traditional */
 #define ELFNOTE(name, type, desctype, descdata) \
 .pushsection .note.name                 ;       \
   .align 4                              ;       \
   .long 2f - 1f         /* namesz */    ;       \
   .long 4f - 3f         /* descsz */    ;       \
   .long type                            ;       \
 1:.asciz "name"                         ;       \
 2:.align 4                              ;       \
 3:desctype descdata                     ;       \
 4:.align 4                              ;       \
 .popsection
 #endif /* __STDC__ */
 
 #endif /* !_MACHINE_ASMACROS_H_ */
Index: head/sys/i386/include/cpufunc.h
===================================================================
--- head/sys/i386/include/cpufunc.h	(revision 282273)
+++ head/sys/i386/include/cpufunc.h	(revision 282274)
@@ -1,827 +1,789 @@
 /*-
  * Copyright (c) 1993 The Regents of the University of California.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 /*
  * Functions to provide access to special i386 instructions.
  * This in included in sys/systm.h, and that file should be
  * used in preference to this.
  */
 
 #ifndef _MACHINE_CPUFUNC_H_
 #define	_MACHINE_CPUFUNC_H_
 
 #ifndef _SYS_CDEFS_H_
 #error this file needs sys/cdefs.h as a prerequisite
 #endif
 
-#ifdef XEN
-extern void xen_cli(void);
-extern void xen_sti(void);
-extern u_int xen_rcr2(void);
-extern void xen_load_cr3(u_int data);
-extern void xen_tlb_flush(void);
-extern void xen_invlpg(u_int addr);
-extern void write_eflags(u_int eflags);
-extern u_int read_eflags(void);
-#endif
-
 struct region_descriptor;
 
 #define readb(va)	(*(volatile uint8_t *) (va))
 #define readw(va)	(*(volatile uint16_t *) (va))
 #define readl(va)	(*(volatile uint32_t *) (va))
 
 #define writeb(va, d)	(*(volatile uint8_t *) (va) = (d))
 #define writew(va, d)	(*(volatile uint16_t *) (va) = (d))
 #define writel(va, d)	(*(volatile uint32_t *) (va) = (d))
 
 #if defined(__GNUCLIKE_ASM) && defined(__CC_SUPPORTS___INLINE)
 
 static __inline void
 breakpoint(void)
 {
 	__asm __volatile("int $3");
 }
 
 static __inline u_int
 bsfl(u_int mask)
 {
 	u_int	result;
 
 	__asm("bsfl %1,%0" : "=r" (result) : "rm" (mask) : "cc");
 	return (result);
 }
 
 static __inline u_int
 bsrl(u_int mask)
 {
 	u_int	result;
 
 	__asm("bsrl %1,%0" : "=r" (result) : "rm" (mask) : "cc");
 	return (result);
 }
 
 static __inline void
 clflush(u_long addr)
 {
 
 	__asm __volatile("clflush %0" : : "m" (*(char *)addr));
 }
 
 static __inline void
 clts(void)
 {
 
 	__asm __volatile("clts");
 }
 
 static __inline void
 disable_intr(void)
 {
-#ifdef XEN
-	xen_cli();
-#else	
+
 	__asm __volatile("cli" : : : "memory");
-#endif
 }
 
 static __inline void
 do_cpuid(u_int ax, u_int *p)
 {
 	__asm __volatile("cpuid"
 			 : "=a" (p[0]), "=b" (p[1]), "=c" (p[2]), "=d" (p[3])
 			 :  "0" (ax));
 }
 
 static __inline void
 cpuid_count(u_int ax, u_int cx, u_int *p)
 {
 	__asm __volatile("cpuid"
 			 : "=a" (p[0]), "=b" (p[1]), "=c" (p[2]), "=d" (p[3])
 			 :  "0" (ax), "c" (cx));
 }
 
 static __inline void
 enable_intr(void)
 {
-#ifdef XEN
-	xen_sti();
-#else
+
 	__asm __volatile("sti");
-#endif
 }
 
 static __inline void
 cpu_monitor(const void *addr, u_long extensions, u_int hints)
 {
 
 	__asm __volatile("monitor"
 	    : : "a" (addr), "c" (extensions), "d" (hints));
 }
 
 static __inline void
 cpu_mwait(u_long extensions, u_int hints)
 {
 
 	__asm __volatile("mwait" : : "a" (hints), "c" (extensions));
 }
 
 static __inline void
 lfence(void)
 {
 
 	__asm __volatile("lfence" : : : "memory");
 }
 
 static __inline void
 mfence(void)
 {
 
 	__asm __volatile("mfence" : : : "memory");
 }
 
 #ifdef _KERNEL
 
 #define	HAVE_INLINE_FFS
 
 static __inline int
 ffs(int mask)
 {
 	/*
 	 * Note that gcc-2's builtin ffs would be used if we didn't declare
 	 * this inline or turn off the builtin.  The builtin is faster but
 	 * broken in gcc-2.4.5 and slower but working in gcc-2.5 and later
 	 * versions.
 	 */
 	 return (mask == 0 ? mask : (int)bsfl((u_int)mask) + 1);
 }
 
 #define	HAVE_INLINE_FFSL
 
 static __inline int
 ffsl(long mask)
 {
 	return (ffs((int)mask));
 }
 
 #define	HAVE_INLINE_FLS
 
 static __inline int
 fls(int mask)
 {
 	return (mask == 0 ? mask : (int)bsrl((u_int)mask) + 1);
 }
 
 #define	HAVE_INLINE_FLSL
 
 static __inline int
 flsl(long mask)
 {
 	return (fls((int)mask));
 }
 
 #endif /* _KERNEL */
 
 static __inline void
 halt(void)
 {
 	__asm __volatile("hlt");
 }
 
 static __inline u_char
 inb(u_int port)
 {
 	u_char	data;
 
 	__asm __volatile("inb %w1, %0" : "=a" (data) : "Nd" (port));
 	return (data);
 }
 
 static __inline u_int
 inl(u_int port)
 {
 	u_int	data;
 
 	__asm __volatile("inl %w1, %0" : "=a" (data) : "Nd" (port));
 	return (data);
 }
 
 static __inline void
 insb(u_int port, void *addr, size_t count)
 {
 	__asm __volatile("cld; rep; insb"
 			 : "+D" (addr), "+c" (count)
 			 : "d" (port)
 			 : "memory");
 }
 
 static __inline void
 insw(u_int port, void *addr, size_t count)
 {
 	__asm __volatile("cld; rep; insw"
 			 : "+D" (addr), "+c" (count)
 			 : "d" (port)
 			 : "memory");
 }
 
 static __inline void
 insl(u_int port, void *addr, size_t count)
 {
 	__asm __volatile("cld; rep; insl"
 			 : "+D" (addr), "+c" (count)
 			 : "d" (port)
 			 : "memory");
 }
 
 static __inline void
 invd(void)
 {
 	__asm __volatile("invd");
 }
 
 static __inline u_short
 inw(u_int port)
 {
 	u_short	data;
 
 	__asm __volatile("inw %w1, %0" : "=a" (data) : "Nd" (port));
 	return (data);
 }
 
 static __inline void
 outb(u_int port, u_char data)
 {
 	__asm __volatile("outb %0, %w1" : : "a" (data), "Nd" (port));
 }
 
 static __inline void
 outl(u_int port, u_int data)
 {
 	__asm __volatile("outl %0, %w1" : : "a" (data), "Nd" (port));
 }
 
 static __inline void
 outsb(u_int port, const void *addr, size_t count)
 {
 	__asm __volatile("cld; rep; outsb"
 			 : "+S" (addr), "+c" (count)
 			 : "d" (port));
 }
 
 static __inline void
 outsw(u_int port, const void *addr, size_t count)
 {
 	__asm __volatile("cld; rep; outsw"
 			 : "+S" (addr), "+c" (count)
 			 : "d" (port));
 }
 
 static __inline void
 outsl(u_int port, const void *addr, size_t count)
 {
 	__asm __volatile("cld; rep; outsl"
 			 : "+S" (addr), "+c" (count)
 			 : "d" (port));
 }
 
 static __inline void
 outw(u_int port, u_short data)
 {
 	__asm __volatile("outw %0, %w1" : : "a" (data), "Nd" (port));
 }
 
 static __inline void
 ia32_pause(void)
 {
 	__asm __volatile("pause");
 }
 
 static __inline u_int
-#ifdef XEN
-_read_eflags(void)
-#else	
 read_eflags(void)
-#endif
 {
 	u_int	ef;
 
 	__asm __volatile("pushfl; popl %0" : "=r" (ef));
 	return (ef);
 }
 
 static __inline uint64_t
 rdmsr(u_int msr)
 {
 	uint64_t rv;
 
 	__asm __volatile("rdmsr" : "=A" (rv) : "c" (msr));
 	return (rv);
 }
 
 static __inline uint32_t
 rdmsr32(u_int msr)
 {
 	uint32_t low;
 
 	__asm __volatile("rdmsr" : "=a" (low) : "c" (msr) : "edx");
 	return (low);
 }
 
 static __inline uint64_t
 rdpmc(u_int pmc)
 {
 	uint64_t rv;
 
 	__asm __volatile("rdpmc" : "=A" (rv) : "c" (pmc));
 	return (rv);
 }
 
 static __inline uint64_t
 rdtsc(void)
 {
 	uint64_t rv;
 
 	__asm __volatile("rdtsc" : "=A" (rv));
 	return (rv);
 }
 
 static __inline uint32_t
 rdtsc32(void)
 {
 	uint32_t rv;
 
 	__asm __volatile("rdtsc" : "=a" (rv) : : "edx");
 	return (rv);
 }
 
 static __inline void
 wbinvd(void)
 {
 	__asm __volatile("wbinvd");
 }
 
 static __inline void
-#ifdef XEN
-_write_eflags(u_int ef)
-#else
 write_eflags(u_int ef)
-#endif
 {
 	__asm __volatile("pushl %0; popfl" : : "r" (ef));
 }
 
 static __inline void
 wrmsr(u_int msr, uint64_t newval)
 {
 	__asm __volatile("wrmsr" : : "A" (newval), "c" (msr));
 }
 
 static __inline void
 load_cr0(u_int data)
 {
 
 	__asm __volatile("movl %0,%%cr0" : : "r" (data));
 }
 
 static __inline u_int
 rcr0(void)
 {
 	u_int	data;
 
 	__asm __volatile("movl %%cr0,%0" : "=r" (data));
 	return (data);
 }
 
 static __inline u_int
 rcr2(void)
 {
 	u_int	data;
 
-#ifdef XEN
-	return (xen_rcr2());
-#endif
 	__asm __volatile("movl %%cr2,%0" : "=r" (data));
 	return (data);
 }
 
 static __inline void
 load_cr3(u_int data)
 {
-#ifdef XEN
-	xen_load_cr3(data);
-#else
+
 	__asm __volatile("movl %0,%%cr3" : : "r" (data) : "memory");
-#endif
 }
 
 static __inline u_int
 rcr3(void)
 {
 	u_int	data;
 
 	__asm __volatile("movl %%cr3,%0" : "=r" (data));
 	return (data);
 }
 
 static __inline void
 load_cr4(u_int data)
 {
 	__asm __volatile("movl %0,%%cr4" : : "r" (data));
 }
 
 static __inline u_int
 rcr4(void)
 {
 	u_int	data;
 
 	__asm __volatile("movl %%cr4,%0" : "=r" (data));
 	return (data);
 }
 
 static __inline uint64_t
 rxcr(u_int reg)
 {
 	u_int low, high;
 
 	__asm __volatile("xgetbv" : "=a" (low), "=d" (high) : "c" (reg));
 	return (low | ((uint64_t)high << 32));
 }
 
 static __inline void
 load_xcr(u_int reg, uint64_t val)
 {
 	u_int low, high;
 
 	low = val;
 	high = val >> 32;
 	__asm __volatile("xsetbv" : : "c" (reg), "a" (low), "d" (high));
 }
 
 /*
  * Global TLB flush (except for thise for pages marked PG_G)
  */
 static __inline void
 invltlb(void)
 {
-#ifdef XEN
-	xen_tlb_flush();
-#else	
+
 	load_cr3(rcr3());
-#endif
 }
 
 /*
  * TLB flush for an individual page (even if it has PG_G).
  * Only works on 486+ CPUs (i386 does not have PG_G).
  */
 static __inline void
 invlpg(u_int addr)
 {
 
-#ifdef XEN
-	xen_invlpg(addr);
-#else
 	__asm __volatile("invlpg %0" : : "m" (*(char *)addr) : "memory");
-#endif
 }
 
 static __inline u_short
 rfs(void)
 {
 	u_short sel;
 	__asm __volatile("movw %%fs,%0" : "=rm" (sel));
 	return (sel);
 }
 
 static __inline uint64_t
 rgdt(void)
 {
 	uint64_t gdtr;
 	__asm __volatile("sgdt %0" : "=m" (gdtr));
 	return (gdtr);
 }
 
 static __inline u_short
 rgs(void)
 {
 	u_short sel;
 	__asm __volatile("movw %%gs,%0" : "=rm" (sel));
 	return (sel);
 }
 
 static __inline uint64_t
 ridt(void)
 {
 	uint64_t idtr;
 	__asm __volatile("sidt %0" : "=m" (idtr));
 	return (idtr);
 }
 
 static __inline u_short
 rldt(void)
 {
 	u_short ldtr;
 	__asm __volatile("sldt %0" : "=g" (ldtr));
 	return (ldtr);
 }
 
 static __inline u_short
 rss(void)
 {
 	u_short sel;
 	__asm __volatile("movw %%ss,%0" : "=rm" (sel));
 	return (sel);
 }
 
 static __inline u_short
 rtr(void)
 {
 	u_short tr;
 	__asm __volatile("str %0" : "=g" (tr));
 	return (tr);
 }
 
 static __inline void
 load_fs(u_short sel)
 {
 	__asm __volatile("movw %0,%%fs" : : "rm" (sel));
 }
 
 static __inline void
 load_gs(u_short sel)
 {
 	__asm __volatile("movw %0,%%gs" : : "rm" (sel));
 }
 
 static __inline void
 lidt(struct region_descriptor *addr)
 {
 	__asm __volatile("lidt (%0)" : : "r" (addr));
 }
 
 static __inline void
 lldt(u_short sel)
 {
 	__asm __volatile("lldt %0" : : "r" (sel));
 }
 
 static __inline void
 ltr(u_short sel)
 {
 	__asm __volatile("ltr %0" : : "r" (sel));
 }
 
 static __inline u_int
 rdr0(void)
 {
 	u_int	data;
 	__asm __volatile("movl %%dr0,%0" : "=r" (data));
 	return (data);
 }
 
 static __inline void
 load_dr0(u_int dr0)
 {
 	__asm __volatile("movl %0,%%dr0" : : "r" (dr0));
 }
 
 static __inline u_int
 rdr1(void)
 {
 	u_int	data;
 	__asm __volatile("movl %%dr1,%0" : "=r" (data));
 	return (data);
 }
 
 static __inline void
 load_dr1(u_int dr1)
 {
 	__asm __volatile("movl %0,%%dr1" : : "r" (dr1));
 }
 
 static __inline u_int
 rdr2(void)
 {
 	u_int	data;
 	__asm __volatile("movl %%dr2,%0" : "=r" (data));
 	return (data);
 }
 
 static __inline void
 load_dr2(u_int dr2)
 {
 	__asm __volatile("movl %0,%%dr2" : : "r" (dr2));
 }
 
 static __inline u_int
 rdr3(void)
 {
 	u_int	data;
 	__asm __volatile("movl %%dr3,%0" : "=r" (data));
 	return (data);
 }
 
 static __inline void
 load_dr3(u_int dr3)
 {
 	__asm __volatile("movl %0,%%dr3" : : "r" (dr3));
 }
 
 static __inline u_int
 rdr4(void)
 {
 	u_int	data;
 	__asm __volatile("movl %%dr4,%0" : "=r" (data));
 	return (data);
 }
 
 static __inline void
 load_dr4(u_int dr4)
 {
 	__asm __volatile("movl %0,%%dr4" : : "r" (dr4));
 }
 
 static __inline u_int
 rdr5(void)
 {
 	u_int	data;
 	__asm __volatile("movl %%dr5,%0" : "=r" (data));
 	return (data);
 }
 
 static __inline void
 load_dr5(u_int dr5)
 {
 	__asm __volatile("movl %0,%%dr5" : : "r" (dr5));
 }
 
 static __inline u_int
 rdr6(void)
 {
 	u_int	data;
 	__asm __volatile("movl %%dr6,%0" : "=r" (data));
 	return (data);
 }
 
 static __inline void
 load_dr6(u_int dr6)
 {
 	__asm __volatile("movl %0,%%dr6" : : "r" (dr6));
 }
 
 static __inline u_int
 rdr7(void)
 {
 	u_int	data;
 	__asm __volatile("movl %%dr7,%0" : "=r" (data));
 	return (data);
 }
 
 static __inline void
 load_dr7(u_int dr7)
 {
 	__asm __volatile("movl %0,%%dr7" : : "r" (dr7));
 }
 
 static __inline u_char
 read_cyrix_reg(u_char reg)
 {
 	outb(0x22, reg);
 	return inb(0x23);
 }
 
 static __inline void
 write_cyrix_reg(u_char reg, u_char data)
 {
 	outb(0x22, reg);
 	outb(0x23, data);
 }
 
 static __inline register_t
 intr_disable(void)
 {
 	register_t eflags;
 
 	eflags = read_eflags();
 	disable_intr();
 	return (eflags);
 }
 
 static __inline void
 intr_restore(register_t eflags)
 {
 	write_eflags(eflags);
 }
 
 #else /* !(__GNUCLIKE_ASM && __CC_SUPPORTS___INLINE) */
 
 int	breakpoint(void);
 u_int	bsfl(u_int mask);
 u_int	bsrl(u_int mask);
 void	clflush(u_long addr);
 void	clts(void);
 void	cpuid_count(u_int ax, u_int cx, u_int *p);
 void	disable_intr(void);
 void	do_cpuid(u_int ax, u_int *p);
 void	enable_intr(void);
 void	halt(void);
 void	ia32_pause(void);
 u_char	inb(u_int port);
 u_int	inl(u_int port);
 void	insb(u_int port, void *addr, size_t count);
 void	insl(u_int port, void *addr, size_t count);
 void	insw(u_int port, void *addr, size_t count);
 register_t	intr_disable(void);
 void	intr_restore(register_t ef);
 void	invd(void);
 void	invlpg(u_int addr);
 void	invltlb(void);
 u_short	inw(u_int port);
 void	lidt(struct region_descriptor *addr);
 void	lldt(u_short sel);
 void	load_cr0(u_int cr0);
 void	load_cr3(u_int cr3);
 void	load_cr4(u_int cr4);
 void	load_dr0(u_int dr0);
 void	load_dr1(u_int dr1);
 void	load_dr2(u_int dr2);
 void	load_dr3(u_int dr3);
 void	load_dr4(u_int dr4);
 void	load_dr5(u_int dr5);
 void	load_dr6(u_int dr6);
 void	load_dr7(u_int dr7);
 void	load_fs(u_short sel);
 void	load_gs(u_short sel);
 void	ltr(u_short sel);
 void	outb(u_int port, u_char data);
 void	outl(u_int port, u_int data);
 void	outsb(u_int port, const void *addr, size_t count);
 void	outsl(u_int port, const void *addr, size_t count);
 void	outsw(u_int port, const void *addr, size_t count);
 void	outw(u_int port, u_short data);
 u_int	rcr0(void);
 u_int	rcr2(void);
 u_int	rcr3(void);
 u_int	rcr4(void);
 uint64_t rdmsr(u_int msr);
 uint64_t rdpmc(u_int pmc);
 u_int	rdr0(void);
 u_int	rdr1(void);
 u_int	rdr2(void);
 u_int	rdr3(void);
 u_int	rdr4(void);
 u_int	rdr5(void);
 u_int	rdr6(void);
 u_int	rdr7(void);
 uint64_t rdtsc(void);
 u_char	read_cyrix_reg(u_char reg);
 u_int	read_eflags(void);
 u_int	rfs(void);
 uint64_t rgdt(void);
 u_int	rgs(void);
 uint64_t ridt(void);
 u_short	rldt(void);
 u_short	rtr(void);
 void	wbinvd(void);
 void	write_cyrix_reg(u_char reg, u_char data);
 void	write_eflags(u_int ef);
 void	wrmsr(u_int msr, uint64_t newval);
 
 #endif	/* __GNUCLIKE_ASM && __CC_SUPPORTS___INLINE */
 
 void    reset_dbregs(void);
 
 #ifdef _KERNEL
 int	rdmsr_safe(u_int msr, uint64_t *val);
 int	wrmsr_safe(u_int msr, uint64_t newval);
 #endif
 
 #endif /* !_MACHINE_CPUFUNC_H_ */
Index: head/sys/i386/include/intr_machdep.h
===================================================================
--- head/sys/i386/include/intr_machdep.h	(revision 282273)
+++ head/sys/i386/include/intr_machdep.h	(revision 282274)
@@ -1,184 +1,178 @@
 /*-
  * Copyright (c) 2003 John Baldwin <jhb@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef __MACHINE_INTR_MACHDEP_H__
 #define	__MACHINE_INTR_MACHDEP_H__
 
 #ifdef _KERNEL
 
 /*
  * The maximum number of I/O interrupts we allow.  This number is rather
  * arbitrary as it is just the maximum IRQ resource value.  The interrupt
  * source for a given IRQ maps that I/O interrupt to device interrupt
  * source whether it be a pin on an interrupt controller or an MSI interrupt.
  * The 16 ISA IRQs are assigned fixed IDT vectors, but all other device
  * interrupts allocate IDT vectors on demand.  Currently we have 191 IDT
  * vectors available for device interrupts.  On many systems with I/O APICs,
  * a lot of the IRQs are not used, so this number can be much larger than
  * 191 and still be safe since only interrupt sources in actual use will
  * allocate IDT vectors.
  *
  * The first 255 IRQs (0 - 254) are reserved for ISA IRQs and PCI intline IRQs.
  * IRQ values from 256 to 767 are used by MSI.  When running under the Xen
  * Hypervisor, IRQ values from 768 to 4863 are available for binding to
  * event channel events.  We leave 255 unused to avoid confusion since 255 is
  * used in PCI to indicate an invalid IRQ.
  */
 #define	NUM_MSI_INTS	512
 #define	FIRST_MSI_INT	256
 #ifdef XENHVM
 #include <xen/xen-os.h>
 #define	NUM_EVTCHN_INTS	NR_EVENT_CHANNELS
 #define	FIRST_EVTCHN_INT \
     (FIRST_MSI_INT + NUM_MSI_INTS)
 #define	LAST_EVTCHN_INT \
     (FIRST_EVTCHN_INT + NUM_EVTCHN_INTS - 1)
-#elif defined(XEN)
-#include <xen/xen-os.h>
-#define	NUM_EVTCHN_INTS	NR_EVENT_CHANNELS
-#define	FIRST_EVTCHN_INT 0
-#define	LAST_EVTCHN_INT \
-    (FIRST_EVTCHN_INT + NUM_EVTCHN_INTS - 1)
-#else /* !XEN && !XENHVM */
+#else /* !XENHVM */
 #define	NUM_EVTCHN_INTS	0
 #endif
 #define	NUM_IO_INTS	(FIRST_MSI_INT + NUM_MSI_INTS + NUM_EVTCHN_INTS)
 
 /*
  * Default base address for MSI messages on x86 platforms.
  */
 #define	MSI_INTEL_ADDR_BASE		0xfee00000
 
 /*
  * - 1 ??? dummy counter.
  * - 2 counters for each I/O interrupt.
  * - 1 counter for each CPU for lapic timer.
  * - 9 counters for each CPU for IPI counters for SMP.
  */
 #ifdef SMP
 #define	INTRCNT_COUNT	(1 + NUM_IO_INTS * 2 + (1 + 9) * MAXCPU)
 #else
 #define	INTRCNT_COUNT	(1 + NUM_IO_INTS * 2 + 1)
 #endif
 
 #ifndef LOCORE
 
 typedef void inthand_t(u_int cs, u_int ef, u_int esp, u_int ss);
 
 #define	IDTVEC(name)	__CONCAT(X,name)
 
 struct intsrc;
 
 /*
  * Methods that a PIC provides to mask/unmask a given interrupt source,
  * "turn on" the interrupt on the CPU side by setting up an IDT entry, and
  * return the vector associated with this source.
  */
 struct pic {
 	void (*pic_enable_source)(struct intsrc *);
 	void (*pic_disable_source)(struct intsrc *, int);
 	void (*pic_eoi_source)(struct intsrc *);
 	void (*pic_enable_intr)(struct intsrc *);
 	void (*pic_disable_intr)(struct intsrc *);
 	int (*pic_vector)(struct intsrc *);
 	int (*pic_source_pending)(struct intsrc *);
 	void (*pic_suspend)(struct pic *);
 	void (*pic_resume)(struct pic *, bool suspend_cancelled);
 	int (*pic_config_intr)(struct intsrc *, enum intr_trigger,
 	    enum intr_polarity);
 	int (*pic_assign_cpu)(struct intsrc *, u_int apic_id);
 	void (*pic_reprogram_pin)(struct intsrc *);
 	TAILQ_ENTRY(pic) pics;
 };
 
 /* Flags for pic_disable_source() */
 enum {
 	PIC_EOI,
 	PIC_NO_EOI,
 };
 
 /*
  * An interrupt source.  The upper-layer code uses the PIC methods to
  * control a given source.  The lower-layer PIC drivers can store additional
  * private data in a given interrupt source such as an interrupt pin number
  * or an I/O APIC pointer.
  */
 struct intsrc {
 	struct pic *is_pic;
 	struct intr_event *is_event;
 	u_long *is_count;
 	u_long *is_straycount;
 	u_int is_index;
 	u_int is_handlers;
 };
 
 struct trapframe;
 
 extern struct mtx icu_lock;
 extern int elcr_found;
 
 #ifndef DEV_ATPIC
 void	atpic_reset(void);
 #endif
 /* XXX: The elcr_* prototypes probably belong somewhere else. */
 int	elcr_probe(void);
 enum intr_trigger elcr_read_trigger(u_int irq);
 void	elcr_resume(void);
 void	elcr_write_trigger(u_int irq, enum intr_trigger trigger);
 #ifdef SMP
 void	intr_add_cpu(u_int cpu);
 #endif
 int	intr_add_handler(const char *name, int vector, driver_filter_t filter,
     driver_intr_t handler, void *arg, enum intr_type flags, void **cookiep);
 #ifdef SMP
 int	intr_bind(u_int vector, u_char cpu);
 #endif
 int	intr_config_intr(int vector, enum intr_trigger trig,
     enum intr_polarity pol);
 int	intr_describe(u_int vector, void *ih, const char *descr);
 void	intr_execute_handlers(struct intsrc *isrc, struct trapframe *frame);
 u_int	intr_next_cpu(void);
 struct intsrc *intr_lookup_source(int vector);
 int	intr_register_pic(struct pic *pic);
 int	intr_register_source(struct intsrc *isrc);
 int	intr_remove_handler(void *cookie);
 void	intr_resume(bool suspend_cancelled);
 void	intr_suspend(void);
 void	intr_reprogram(void);
 void	intrcnt_add(const char *name, u_long **countp);
 void	nexus_add_irq(u_long irq);
 int	msi_alloc(device_t dev, int count, int maxcount, int *irqs);
 void	msi_init(void);
 int	msi_map(int irq, uint64_t *addr, uint32_t *data);
 int	msi_release(int* irqs, int count);
 int	msix_alloc(device_t dev, int *irq);
 int	msix_release(int irq);
 
 #endif	/* !LOCORE */
 #endif	/* _KERNEL */
 #endif	/* !__MACHINE_INTR_MACHDEP_H__ */
Index: head/sys/i386/include/pcpu.h
===================================================================
--- head/sys/i386/include/pcpu.h	(revision 282273)
+++ head/sys/i386/include/pcpu.h	(revision 282274)
@@ -1,261 +1,233 @@
 /*-
  * Copyright (c) Peter Wemm
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef _MACHINE_PCPU_H_
 #define	_MACHINE_PCPU_H_
 
 #ifndef _SYS_CDEFS_H_
 #error "sys/cdefs.h is a prerequisite for this file"
 #endif
 
 #include <machine/segments.h>
 #include <machine/tss.h>
 
 /*
  * The SMP parts are setup in pmap.c and locore.s for the BSP, and
  * mp_machdep.c sets up the data for the AP's to "see" when they awake.
  * The reason for doing it via a struct is so that an array of pointers
  * to each CPU's data can be set up for things like "check curproc on all
  * other processors"
  */
 
-#if defined(XEN)
-
-/* These are peridically updated in shared_info, and then copied here. */
-struct shadow_time_info {
-	uint64_t tsc_timestamp;     /* TSC at last update of time vals.  */
-	uint64_t system_timestamp;  /* Time, in nanosecs, since boot.    */
-	uint32_t tsc_to_nsec_mul;
-	uint32_t tsc_to_usec_mul;
-	int tsc_shift;
-	uint32_t version;
-};
-
-#define	PCPU_XEN_FIELDS							\
-	;								\
-	u_int	pc_cr3;		/* track cr3 for R1/R3*/		\
-	vm_paddr_t *pc_pdir_shadow;					\
-	uint64_t pc_processed_system_time;				\
-	struct shadow_time_info pc_shadow_time;				\
-	char	__pad[185]
-
-#else /* !XEN */
-
-#define PCPU_XEN_FIELDS							\
-	;								\
-	char	__pad[233]
-
-#endif
-
 #define	PCPU_MD_FIELDS							\
 	char	pc_monitorbuf[128] __aligned(128); /* cache line */	\
 	struct	pcpu *pc_prvspace;	/* Self-reference */		\
 	struct	pmap *pc_curpmap;					\
 	struct	i386tss pc_common_tss;					\
 	struct	segment_descriptor pc_common_tssd;			\
 	struct	segment_descriptor *pc_tss_gdt;				\
 	struct	segment_descriptor *pc_fsgs_gdt;			\
 	int	pc_currentldt;						\
 	u_int   pc_acpi_id;		/* ACPI CPU id */		\
 	u_int	pc_apic_id;						\
 	int	pc_private_tss;		/* Flag indicating private tss*/\
 	u_int	pc_cmci_mask;		/* MCx banks for CMCI */	\
-	u_int	pc_vcpu_id		/* Xen vCPU ID */		\
-	PCPU_XEN_FIELDS
+	u_int	pc_vcpu_id;		/* Xen vCPU ID */		\
+	char	__pad[233]
 
 #ifdef _KERNEL
 
 #ifdef lint
 
 extern struct pcpu *pcpup;
 
 #define	PCPU_GET(member)	(pcpup->pc_ ## member)
 #define	PCPU_ADD(member, val)	(pcpup->pc_ ## member += (val))
 #define	PCPU_INC(member)	PCPU_ADD(member, 1)
 #define	PCPU_PTR(member)	(&pcpup->pc_ ## member)
 #define	PCPU_SET(member, val)	(pcpup->pc_ ## member = (val))
 
 #elif defined(__GNUCLIKE_ASM) && defined(__GNUCLIKE___TYPEOF)
 
 /*
  * Evaluates to the byte offset of the per-cpu variable name.
  */
 #define	__pcpu_offset(name)						\
 	__offsetof(struct pcpu, name)
 
 /*
  * Evaluates to the type of the per-cpu variable name.
  */
 #define	__pcpu_type(name)						\
 	__typeof(((struct pcpu *)0)->name)
 
 /*
  * Evaluates to the address of the per-cpu variable name.
  */
 #define	__PCPU_PTR(name) __extension__ ({				\
 	__pcpu_type(name) *__p;						\
 									\
 	__asm __volatile("movl %%fs:%1,%0; addl %2,%0"			\
 	    : "=r" (__p)						\
 	    : "m" (*(struct pcpu *)(__pcpu_offset(pc_prvspace))),	\
 	      "i" (__pcpu_offset(name)));				\
 									\
 	__p;								\
 })
 
 /*
  * Evaluates to the value of the per-cpu variable name.
  */
 #define	__PCPU_GET(name) __extension__ ({				\
 	__pcpu_type(name) __res;					\
 	struct __s {							\
 		u_char	__b[MIN(sizeof(__res), 4)];			\
 	} __s;								\
 									\
 	if (sizeof(__res) == 1 || sizeof(__res) == 2 ||			\
 	    sizeof(__res) == 4) {					\
 		__asm __volatile("mov %%fs:%1,%0"			\
 		    : "=r" (__s)					\
 		    : "m" (*(struct __s *)(__pcpu_offset(name))));	\
 		*(struct __s *)(void *)&__res = __s;			\
 	} else {							\
 		__res = *__PCPU_PTR(name);				\
 	}								\
 	__res;								\
 })
 
 /*
  * Adds a value of the per-cpu counter name.  The implementation
  * must be atomic with respect to interrupts.
  */
 #define	__PCPU_ADD(name, val) do {					\
 	__pcpu_type(name) __val;					\
 	struct __s {							\
 		u_char	__b[MIN(sizeof(__val), 4)];			\
 	} __s;								\
 									\
 	__val = (val);							\
 	if (sizeof(__val) == 1 || sizeof(__val) == 2 ||			\
 	    sizeof(__val) == 4) {					\
 		__s = *(struct __s *)(void *)&__val;			\
 		__asm __volatile("add %1,%%fs:%0"			\
 		    : "=m" (*(struct __s *)(__pcpu_offset(name)))	\
 		    : "r" (__s));					\
 	} else								\
 		*__PCPU_PTR(name) += __val;				\
 } while (0)
 
 /*
  * Increments the value of the per-cpu counter name.  The implementation
  * must be atomic with respect to interrupts.
  */
 #define	__PCPU_INC(name) do {						\
 	CTASSERT(sizeof(__pcpu_type(name)) == 1 ||			\
 	    sizeof(__pcpu_type(name)) == 2 ||				\
 	    sizeof(__pcpu_type(name)) == 4);				\
 	if (sizeof(__pcpu_type(name)) == 1) {				\
 		__asm __volatile("incb %%fs:%0"				\
 		    : "=m" (*(__pcpu_type(name) *)(__pcpu_offset(name)))\
 		    : "m" (*(__pcpu_type(name) *)(__pcpu_offset(name))));\
 	} else if (sizeof(__pcpu_type(name)) == 2) {			\
 		__asm __volatile("incw %%fs:%0"				\
 		    : "=m" (*(__pcpu_type(name) *)(__pcpu_offset(name)))\
 		    : "m" (*(__pcpu_type(name) *)(__pcpu_offset(name))));\
 	} else if (sizeof(__pcpu_type(name)) == 4) {			\
 		__asm __volatile("incl %%fs:%0"				\
 		    : "=m" (*(__pcpu_type(name) *)(__pcpu_offset(name)))\
 		    : "m" (*(__pcpu_type(name) *)(__pcpu_offset(name))));\
 	}								\
 } while (0)
 
 /*
  * Sets the value of the per-cpu variable name to value val.
  */
 #define	__PCPU_SET(name, val) do {					\
 	__pcpu_type(name) __val;					\
 	struct __s {							\
 		u_char	__b[MIN(sizeof(__val), 4)];			\
 	} __s;								\
 									\
 	__val = (val);							\
 	if (sizeof(__val) == 1 || sizeof(__val) == 2 ||			\
 	    sizeof(__val) == 4) {					\
 		__s = *(struct __s *)(void *)&__val;			\
 		__asm __volatile("mov %1,%%fs:%0"			\
 		    : "=m" (*(struct __s *)(__pcpu_offset(name)))	\
 		    : "r" (__s));					\
 	} else {							\
 		*__PCPU_PTR(name) = __val;				\
 	}								\
 } while (0)
 
 #define	PCPU_GET(member)	__PCPU_GET(pc_ ## member)
 #define	PCPU_ADD(member, val)	__PCPU_ADD(pc_ ## member, val)
 #define	PCPU_INC(member)	__PCPU_INC(pc_ ## member)
 #define	PCPU_PTR(member)	__PCPU_PTR(pc_ ## member)
 #define	PCPU_SET(member, val)	__PCPU_SET(pc_ ## member, val)
 
 #define	OFFSETOF_CURTHREAD	0
 #ifdef __clang__
 #pragma clang diagnostic push
 #pragma clang diagnostic ignored "-Wnull-dereference"
 #endif
 static __inline __pure2 struct thread *
 __curthread(void)
 {
 	struct thread *td;
 
 	__asm("movl %%fs:%1,%0" : "=r" (td)
 	    : "m" (*(char *)OFFSETOF_CURTHREAD));
 	return (td);
 }
 #ifdef __clang__
 #pragma clang diagnostic pop
 #endif
 #define	curthread		(__curthread())
 
 #define	OFFSETOF_CURPCB		16
 static __inline __pure2 struct pcb *
 __curpcb(void)
 {
 	struct pcb *pcb;
 
 	__asm("movl %%fs:%1,%0" : "=r" (pcb) : "m" (*(char *)OFFSETOF_CURPCB));
 	return (pcb);
 }
 #define	curpcb		(__curpcb())
 
 #else /* !lint || defined(__GNUCLIKE_ASM) && defined(__GNUCLIKE___TYPEOF) */
 
 #error "this file needs to be ported to your compiler"
 
 #endif /* lint, etc. */
 
 #endif /* _KERNEL */
 
 #endif /* !_MACHINE_PCPU_H_ */
Index: head/sys/i386/include/pmap.h
===================================================================
--- head/sys/i386/include/pmap.h	(revision 282273)
+++ head/sys/i386/include/pmap.h	(revision 282274)
@@ -1,476 +1,401 @@
 /*-
  * Copyright (c) 1991 Regents of the University of California.
  * All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * the Systems Programming Group of the University of Utah Computer
  * Science Department and William Jolitz of UUNET Technologies Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * Derived from hp300 version by Mike Hibler, this version by William
  * Jolitz uses a recursive map [a pde points to the page directory] to
  * map the page tables using the pagetables themselves. This is done to
  * reduce the impact on kernel virtual memory for lots of sparse address
  * space, and to reduce the cost of memory to each process.
  *
  *	from: hp300: @(#)pmap.h	7.2 (Berkeley) 12/16/90
  *	from: @(#)pmap.h	7.4 (Berkeley) 5/12/91
  * $FreeBSD$
  */
 
 #ifndef _MACHINE_PMAP_H_
 #define	_MACHINE_PMAP_H_
 
 /*
  * Page-directory and page-table entries follow this format, with a few
  * of the fields not present here and there, depending on a lot of things.
  */
 				/* ---- Intel Nomenclature ---- */
 #define	PG_V		0x001	/* P	Valid			*/
 #define PG_RW		0x002	/* R/W	Read/Write		*/
 #define PG_U		0x004	/* U/S  User/Supervisor		*/
 #define	PG_NC_PWT	0x008	/* PWT	Write through		*/
 #define	PG_NC_PCD	0x010	/* PCD	Cache disable		*/
 #define PG_A		0x020	/* A	Accessed		*/
 #define	PG_M		0x040	/* D	Dirty			*/
 #define	PG_PS		0x080	/* PS	Page size (0=4k,1=4M)	*/
 #define	PG_PTE_PAT	0x080	/* PAT	PAT index		*/
 #define	PG_G		0x100	/* G	Global			*/
 #define	PG_AVAIL1	0x200	/*    /	Available for system	*/
 #define	PG_AVAIL2	0x400	/*   <	programmers use		*/
 #define	PG_AVAIL3	0x800	/*    \				*/
 #define	PG_PDE_PAT	0x1000	/* PAT	PAT index		*/
 #if defined(PAE) || defined(PAE_TABLES)
 #define	PG_NX		(1ull<<63) /* No-execute */
 #endif
 
 
 /* Our various interpretations of the above */
 #define PG_W		PG_AVAIL1	/* "Wired" pseudoflag */
 #define	PG_MANAGED	PG_AVAIL2
 #if defined(PAE) || defined(PAE_TABLES)
 #define	PG_FRAME	(0x000ffffffffff000ull)
 #define	PG_PS_FRAME	(0x000fffffffe00000ull)
 #else
 #define	PG_FRAME	(~PAGE_MASK)
 #define	PG_PS_FRAME	(0xffc00000)
 #endif
 #define	PG_PROT		(PG_RW|PG_U)	/* all protection bits . */
 #define PG_N		(PG_NC_PWT|PG_NC_PCD)	/* Non-cacheable */
 
 /* Page level cache control fields used to determine the PAT type */
 #define PG_PDE_CACHE	(PG_PDE_PAT | PG_NC_PWT | PG_NC_PCD)
 #define PG_PTE_CACHE	(PG_PTE_PAT | PG_NC_PWT | PG_NC_PCD)
 
 /*
  * Promotion to a 2 or 4MB (PDE) page mapping requires that the corresponding
  * 4KB (PTE) page mappings have identical settings for the following fields:
  */
 #define PG_PTE_PROMOTE	(PG_MANAGED | PG_W | PG_G | PG_PTE_PAT | \
 	    PG_M | PG_A | PG_NC_PCD | PG_NC_PWT | PG_U | PG_RW | PG_V)
 
 /*
  * Page Protection Exception bits
  */
 
 #define PGEX_P		0x01	/* Protection violation vs. not present */
 #define PGEX_W		0x02	/* during a Write cycle */
 #define PGEX_U		0x04	/* access from User mode (UPL) */
 #define PGEX_RSV	0x08	/* reserved PTE field is non-zero */
 #define PGEX_I		0x10	/* during an instruction fetch */
 
 /*
  * Size of Kernel address space.  This is the number of page table pages
  * (4MB each) to use for the kernel.  256 pages == 1 Gigabyte.
  * This **MUST** be a multiple of 4 (eg: 252, 256, 260, etc).
  * For PAE, the page table page unit size is 2MB.  This means that 512 pages
  * is 1 Gigabyte.  Double everything.  It must be a multiple of 8 for PAE.
  */
 #ifndef KVA_PAGES
 #if defined(PAE) || defined(PAE_TABLES)
 #define KVA_PAGES	512
 #else
 #define KVA_PAGES	256
 #endif
 #endif
 
 /*
  * Pte related macros
  */
 #define VADDR(pdi, pti) ((vm_offset_t)(((pdi)<<PDRSHIFT)|((pti)<<PAGE_SHIFT)))
 
 /*
  * The initial number of kernel page table pages that are constructed
  * by locore must be sufficient to map vm_page_array.  That number can
  * be calculated as follows:
  *     max_phys / PAGE_SIZE * sizeof(struct vm_page) / NBPDR
  * PAE:      max_phys 16G, sizeof(vm_page) 76, NBPDR 2M, 152 page table pages.
  * PAE_TABLES: max_phys 4G,  sizeof(vm_page) 68, NBPDR 2M, 36 page table pages.
  * Non-PAE:  max_phys 4G,  sizeof(vm_page) 68, NBPDR 4M, 18 page table pages.
  */
 #ifndef NKPT
 #if defined(PAE)
 #define	NKPT		240
 #elif defined(PAE_TABLES)
 #define	NKPT		60
 #else
 #define	NKPT		30
 #endif
 #endif
 
 #ifndef NKPDE
 #define NKPDE	(KVA_PAGES)	/* number of page tables/pde's */
 #endif
 
 /*
  * The *PTDI values control the layout of virtual memory
  *
  * XXX This works for now, but I am not real happy with it, I'll fix it
  * right after I fix locore.s and the magic 28K hole
  */
 #define	KPTDI		(NPDEPTD-NKPDE)	/* start of kernel virtual pde's */
 #define	PTDPTDI		(KPTDI-NPGPTD)	/* ptd entry that points to ptd! */
 
 /*
  * XXX doesn't really belong here I guess...
  */
 #define ISA_HOLE_START    0xa0000
 #define ISA_HOLE_LENGTH (0x100000-ISA_HOLE_START)
 
 #ifndef LOCORE
 
 #include <sys/queue.h>
 #include <sys/_cpuset.h>
 #include <sys/_lock.h>
 #include <sys/_mutex.h>
 
 #include <vm/_vm_radix.h>
 
 #if defined(PAE) || defined(PAE_TABLES)
 
 typedef uint64_t pdpt_entry_t;
 typedef uint64_t pd_entry_t;
 typedef uint64_t pt_entry_t;
 
 #define	PTESHIFT	(3)
 #define	PDESHIFT	(3)
 
 #else
 
 typedef uint32_t pd_entry_t;
 typedef uint32_t pt_entry_t;
 
 #define	PTESHIFT	(2)
 #define	PDESHIFT	(2)
 
 #endif
 
 /*
  * Address of current address space page table maps and directories.
  */
 #ifdef _KERNEL
 extern pt_entry_t PTmap[];
 extern pd_entry_t PTD[];
 extern pd_entry_t PTDpde[];
 
 #if defined(PAE) || defined(PAE_TABLES)
 extern pdpt_entry_t *IdlePDPT;
 #endif
 extern pd_entry_t *IdlePTD;	/* physical address of "Idle" state directory */
 
 /*
  * Translate a virtual address to the kernel virtual address of its page table
  * entry (PTE).  This can be used recursively.  If the address of a PTE as
  * previously returned by this macro is itself given as the argument, then the
  * address of the page directory entry (PDE) that maps the PTE will be
  * returned.
  *
  * This macro may be used before pmap_bootstrap() is called.
  */
 #define	vtopte(va)	(PTmap + i386_btop(va))
 
 /*
  * Translate a virtual address to its physical address.
  *
  * This macro may be used before pmap_bootstrap() is called.
  */
 #define	vtophys(va)	pmap_kextract((vm_offset_t)(va))
 
-#if defined(XEN)
-#include <sys/param.h>
-
-#include <xen/xen-os.h>
-
-#include <machine/xen/xenvar.h>
-#include <machine/xen/xenpmap.h>
-
-extern pt_entry_t pg_nx;
-
-#define PG_KERNEL  (PG_V | PG_A | PG_RW | PG_M)
-
-#define MACH_TO_VM_PAGE(ma) PHYS_TO_VM_PAGE(xpmap_mtop((ma)))
-#define VM_PAGE_TO_MACH(m) xpmap_ptom(VM_PAGE_TO_PHYS((m)))
-
-#define VTOM(va) xpmap_ptom(VTOP(va))
-
-static __inline vm_paddr_t
-pmap_kextract_ma(vm_offset_t va)
-{
-        vm_paddr_t ma;
-        if ((ma = PTD[va >> PDRSHIFT]) & PG_PS) {
-                ma = (ma & ~(NBPDR - 1)) | (va & (NBPDR - 1));
-        } else {
-                ma = (*vtopte(va) & PG_FRAME) | (va & PAGE_MASK);
-        }
-        return ma;
-}
-
-static __inline vm_paddr_t
-pmap_kextract(vm_offset_t va)
-{
-        return xpmap_mtop(pmap_kextract_ma(va));
-}
-#define vtomach(va)     pmap_kextract_ma(((vm_offset_t) (va)))
-
-vm_paddr_t pmap_extract_ma(struct pmap *pmap, vm_offset_t va);
-
-void    pmap_kenter_ma(vm_offset_t va, vm_paddr_t pa);
-void    pmap_map_readonly(struct pmap *pmap, vm_offset_t va, int len);
-void    pmap_map_readwrite(struct pmap *pmap, vm_offset_t va, int len);
-
-static __inline pt_entry_t
-pte_load_store(pt_entry_t *ptep, pt_entry_t v)
-{
-	pt_entry_t r;
-
-	r = *ptep;
-	PT_SET_VA(ptep, v, TRUE);
-	return (r);
-}
-
-static __inline pt_entry_t
-pte_load_store_ma(pt_entry_t *ptep, pt_entry_t v)
-{
-	pt_entry_t r;
-
-	r = *ptep;
-	PT_SET_VA_MA(ptep, v, TRUE);
-	return (r);
-}
-
-#define	pte_load_clear(ptep)	pte_load_store((ptep), (pt_entry_t)0ULL)
-
-#define	pte_store(ptep, pte)	pte_load_store((ptep), (pt_entry_t)pte)
-#define	pte_store_ma(ptep, pte)	pte_load_store_ma((ptep), (pt_entry_t)pte)
-#define	pde_store_ma(ptep, pte)	pte_load_store_ma((ptep), (pt_entry_t)pte)
-
-#elif !defined(XEN)
-
 /*
  * KPTmap is a linear mapping of the kernel page table.  It differs from the
  * recursive mapping in two ways: (1) it only provides access to kernel page
  * table pages, and not user page table pages, and (2) it provides access to
  * a kernel page table page after the corresponding virtual addresses have
  * been promoted to a 2/4MB page mapping.
  *
  * KPTmap is first initialized by locore to support just NPKT page table
  * pages.  Later, it is reinitialized by pmap_bootstrap() to allow for
  * expansion of the kernel page table.
  */
 extern pt_entry_t *KPTmap;
 
 /*
  * Extract from the kernel page table the physical address that is mapped by
  * the given virtual address "va".
  *
  * This function may be used before pmap_bootstrap() is called.
  */
 static __inline vm_paddr_t
 pmap_kextract(vm_offset_t va)
 {
 	vm_paddr_t pa;
 
 	if ((pa = PTD[va >> PDRSHIFT]) & PG_PS) {
 		pa = (pa & PG_PS_FRAME) | (va & PDRMASK);
 	} else {
 		/*
 		 * Beware of a concurrent promotion that changes the PDE at
 		 * this point!  For example, vtopte() must not be used to
 		 * access the PTE because it would use the new PDE.  It is,
 		 * however, safe to use the old PDE because the page table
 		 * page is preserved by the promotion.
 		 */
 		pa = KPTmap[i386_btop(va)];
 		pa = (pa & PG_FRAME) | (va & PAGE_MASK);
 	}
 	return (pa);
 }
-#endif
 
-#if !defined(XEN)
-#define PT_UPDATES_FLUSH()
-#endif
+#if (defined(PAE) || defined(PAE_TABLES))
 
-#if (defined(PAE) || defined(PAE_TABLES)) && !defined(XEN)
-
 #define	pde_cmpset(pdep, old, new)	atomic_cmpset_64_i586(pdep, old, new)
 #define	pte_load_store(ptep, pte)	atomic_swap_64_i586(ptep, pte)
 #define	pte_load_clear(ptep)		atomic_swap_64_i586(ptep, 0)
 #define	pte_store(ptep, pte)		atomic_store_rel_64_i586(ptep, pte)
 
 extern pt_entry_t pg_nx;
 
-#elif !defined(PAE) && !defined(PAE_TABLES) && !defined(XEN)
+#else /* !(PAE || PAE_TABLES) */
 
 #define	pde_cmpset(pdep, old, new)	atomic_cmpset_int(pdep, old, new)
 #define	pte_load_store(ptep, pte)	atomic_swap_int(ptep, pte)
 #define	pte_load_clear(ptep)		atomic_swap_int(ptep, 0)
 #define	pte_store(ptep, pte) do { \
 	*(u_int *)(ptep) = (u_int)(pte); \
 } while (0)
 
-#endif /* PAE */
+#endif /* !(PAE || PAE_TABLES) */
 
 #define	pte_clear(ptep)			pte_store(ptep, 0)
 
 #define	pde_store(pdep, pde)		pte_store(pdep, pde)
 
 #endif /* _KERNEL */
 
 /*
  * Pmap stuff
  */
 struct	pv_entry;
 struct	pv_chunk;
 
 struct md_page {
 	TAILQ_HEAD(,pv_entry)	pv_list;
 	int			pat_mode;
 };
 
 struct pmap {
 	struct mtx		pm_mtx;
 	pd_entry_t		*pm_pdir;	/* KVA of page directory */
 	TAILQ_HEAD(,pv_chunk)	pm_pvchunk;	/* list of mappings in pmap */
 	cpuset_t		pm_active;	/* active on cpus */
 	struct pmap_statistics	pm_stats;	/* pmap statistics */
 	LIST_ENTRY(pmap) 	pm_list;	/* List of all pmaps */
 #if defined(PAE) || defined(PAE_TABLES)
 	pdpt_entry_t		*pm_pdpt;	/* KVA of page directory pointer
 						   table */
 #endif
 	struct vm_radix		pm_root;	/* spare page table pages */
 };
 
 typedef struct pmap	*pmap_t;
 
 #ifdef _KERNEL
 extern struct pmap	kernel_pmap_store;
 #define kernel_pmap	(&kernel_pmap_store)
 
 #define	PMAP_LOCK(pmap)		mtx_lock(&(pmap)->pm_mtx)
 #define	PMAP_LOCK_ASSERT(pmap, type) \
 				mtx_assert(&(pmap)->pm_mtx, (type))
 #define	PMAP_LOCK_DESTROY(pmap)	mtx_destroy(&(pmap)->pm_mtx)
 #define	PMAP_LOCK_INIT(pmap)	mtx_init(&(pmap)->pm_mtx, "pmap", \
 				    NULL, MTX_DEF | MTX_DUPOK)
 #define	PMAP_LOCKED(pmap)	mtx_owned(&(pmap)->pm_mtx)
 #define	PMAP_MTX(pmap)		(&(pmap)->pm_mtx)
 #define	PMAP_TRYLOCK(pmap)	mtx_trylock(&(pmap)->pm_mtx)
 #define	PMAP_UNLOCK(pmap)	mtx_unlock(&(pmap)->pm_mtx)
 #endif
 
 /*
  * For each vm_page_t, there is a list of all currently valid virtual
  * mappings of that page.  An entry is a pv_entry_t, the list is pv_list.
  */
 typedef struct pv_entry {
 	vm_offset_t	pv_va;		/* virtual address for mapping */
 	TAILQ_ENTRY(pv_entry)	pv_next;
 } *pv_entry_t;
 
 /*
  * pv_entries are allocated in chunks per-process.  This avoids the
  * need to track per-pmap assignments.
  */
 #define	_NPCM	11
 #define	_NPCPV	336
 struct pv_chunk {
 	pmap_t			pc_pmap;
 	TAILQ_ENTRY(pv_chunk)	pc_list;
 	uint32_t		pc_map[_NPCM];	/* bitmap; 1 = free */
 	TAILQ_ENTRY(pv_chunk)	pc_lru;
 	struct pv_entry		pc_pventry[_NPCPV];
 };
 
 #ifdef	_KERNEL
 
 extern caddr_t	CADDR3;
 extern pt_entry_t *CMAP3;
 extern vm_paddr_t phys_avail[];
 extern vm_paddr_t dump_avail[];
 extern int pseflag;
 extern int pgeflag;
 extern char *ptvmmap;		/* poor name! */
 extern vm_offset_t virtual_avail;
 extern vm_offset_t virtual_end;
 
 #define	pmap_page_get_memattr(m)	((vm_memattr_t)(m)->md.pat_mode)
 #define	pmap_page_is_write_mapped(m)	(((m)->aflags & PGA_WRITEABLE) != 0)
 #define	pmap_unmapbios(va, sz)	pmap_unmapdev((va), (sz))
 
 /*
  * Only the following functions or macros may be used before pmap_bootstrap()
  * is called: pmap_kenter(), pmap_kextract(), pmap_kremove(), vtophys(), and
  * vtopte().
  */
 void	pmap_bootstrap(vm_paddr_t);
 int	pmap_cache_bits(int mode, boolean_t is_pde);
 int	pmap_change_attr(vm_offset_t, vm_size_t, int);
 void	pmap_init_pat(void);
 void	pmap_kenter(vm_offset_t va, vm_paddr_t pa);
 void	*pmap_kenter_temporary(vm_paddr_t pa, int i);
 void	pmap_kremove(vm_offset_t);
 void	*pmap_mapbios(vm_paddr_t, vm_size_t);
 void	*pmap_mapdev(vm_paddr_t, vm_size_t);
 void	*pmap_mapdev_attr(vm_paddr_t, vm_size_t, int);
 boolean_t pmap_page_is_mapped(vm_page_t m);
 void	pmap_page_set_memattr(vm_page_t m, vm_memattr_t ma);
 void	pmap_unmapdev(vm_offset_t, vm_size_t);
 pt_entry_t *pmap_pte(pmap_t, vm_offset_t) __pure2;
 void	pmap_invalidate_page(pmap_t, vm_offset_t);
 void	pmap_invalidate_range(pmap_t, vm_offset_t, vm_offset_t);
 void	pmap_invalidate_all(pmap_t);
 void	pmap_invalidate_cache(void);
 void	pmap_invalidate_cache_pages(vm_page_t *pages, int count);
 void	pmap_invalidate_cache_range(vm_offset_t sva, vm_offset_t eva,
 	    boolean_t force);
 
 #endif /* _KERNEL */
 
 #endif /* !LOCORE */
 
 #endif /* !_MACHINE_PMAP_H_ */
Index: head/sys/i386/include/segments.h
===================================================================
--- head/sys/i386/include/segments.h	(revision 282273)
+++ head/sys/i386/include/segments.h	(revision 282274)
@@ -1,104 +1,98 @@
 /*-
  * Copyright (c) 1989, 1990 William F. Jolitz
  * Copyright (c) 1990 The Regents of the University of California.
  * All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * William Jolitz.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	from: @(#)segments.h	7.1 (Berkeley) 5/9/91
  * $FreeBSD$
  */
 
 #ifndef _MACHINE_SEGMENTS_H_
 #define	_MACHINE_SEGMENTS_H_
 
 /*
  * 386 Segmentation Data Structures and definitions
  *	William F. Jolitz (william@ernie.berkeley.edu) 6/20/1989
  */
 
 #include <x86/segments.h>
 
 /*
  * Software definitions are in this convenient format,
  * which are translated into inconvenient segment descriptors
  * when needed to be used by the 386 hardware
  */
 
 struct	soft_segment_descriptor	{
 	unsigned ssd_base ;		/* segment base address  */
 	unsigned ssd_limit ;		/* segment extent */
 	unsigned ssd_type:5 ;		/* segment type */
 	unsigned ssd_dpl:2 ;		/* segment descriptor priority level */
 	unsigned ssd_p:1 ;		/* segment descriptor present */
 	unsigned ssd_xx:4 ;		/* unused */
 	unsigned ssd_xx1:2 ;		/* unused */
 	unsigned ssd_def32:1 ;		/* default 32 vs 16 bit size */
 	unsigned ssd_gran:1 ;		/* limit granularity (byte/page units)*/
 };
 
 /*
  * region descriptors, used to load gdt/idt tables before segments yet exist.
  */
 struct region_descriptor {
 	unsigned rd_limit:16;		/* segment extent */
 	unsigned rd_base:32 __packed;	/* base address  */
 };
 
 /*
  * Segment Protection Exception code bits
  */
 
 #define	SEGEX_EXT	0x01	/* recursive or externally induced */
 #define	SEGEX_IDT	0x02	/* interrupt descriptor table */
 #define	SEGEX_TI	0x04	/* local descriptor table */
 				/* other bits are affected descriptor index */
 #define SEGEX_IDX(s)	(((s)>>3)&0x1fff)
 
 #ifdef _KERNEL
 extern int	_default_ldt;
-#ifdef XEN
-extern struct proc_ldt default_proc_ldt;
-extern union descriptor *gdt;
-extern union descriptor *ldt;
-#else
 extern union descriptor gdt[];
 extern union descriptor ldt[NLDT];
-#endif
 extern struct soft_segment_descriptor gdt_segs[];
 extern struct gate_descriptor *idt;
 extern struct region_descriptor r_gdt, r_idt;
 
 void	lgdt(struct region_descriptor *rdp);
 void	sdtossd(struct segment_descriptor *sdp,
 	    struct soft_segment_descriptor *ssdp);
 void	ssdtosd(struct soft_segment_descriptor *ssdp,
 	    struct segment_descriptor *sdp);
 #endif /* _KERNEL */
 
 #endif /* !_MACHINE_SEGMENTS_H_ */
Index: head/sys/i386/include/smp.h
===================================================================
--- head/sys/i386/include/smp.h	(revision 282273)
+++ head/sys/i386/include/smp.h	(revision 282274)
@@ -1,131 +1,124 @@
 /*-
  * ----------------------------------------------------------------------------
  * "THE BEER-WARE LICENSE" (Revision 42):
  * <phk@FreeBSD.org> wrote this file.  As long as you retain this notice you
  * can do whatever you want with this stuff. If we meet some day, and you think
  * this stuff is worth it, you can buy me a beer in return.   Poul-Henning Kamp
  * ----------------------------------------------------------------------------
  *
  * $FreeBSD$
  *
  */
 
 #ifndef _MACHINE_SMP_H_
 #define _MACHINE_SMP_H_
 
 #ifdef _KERNEL
 
 #ifdef SMP
 
 #ifndef LOCORE
 
 #include <sys/bus.h>
 #include <machine/frame.h>
 #include <machine/intr_machdep.h>
 #include <x86/apicvar.h>
 #include <machine/pcb.h>
 
 /* global data in mpboot.s */
 extern int			bootMP_size;
 
 /* functions in mpboot.s */
 void	bootMP(void);
 
 /* global data in mp_machdep.c */
 extern int			mp_naps;
 extern int			boot_cpu_id;
 extern struct pcb		stoppcbs[];
 extern int			cpu_apic_ids[];
 extern int bootAP;
 extern void *dpcpu;
 extern char *bootSTK;
 extern int bootAP;
 extern void *bootstacks[];
 extern volatile u_int cpu_ipi_pending[];
 extern volatile int aps_ready;
 extern struct mtx ap_boot_mtx;
 extern int cpu_logical;
 extern int cpu_cores;
 extern volatile int smp_tlb_wait;
 extern u_int xhits_gbl[];
 extern u_int xhits_pg[];
 extern u_int xhits_rng[];
 extern u_int ipi_global;
 extern u_int ipi_page;
 extern u_int ipi_range;
 extern u_int ipi_range_size;
 extern u_int ipi_masked_global;
 extern u_int ipi_masked_page;
 extern u_int ipi_masked_range;
 extern u_int ipi_masked_range_size;
 
 struct cpu_info {
 	int	cpu_present:1;
 	int	cpu_bsp:1;
 	int	cpu_disabled:1;
 	int	cpu_hyperthread:1;
 };
 extern struct cpu_info cpu_info[];
 
 #ifdef COUNT_IPIS
 extern u_long *ipi_invltlb_counts[MAXCPU];
 extern u_long *ipi_invlrng_counts[MAXCPU];
 extern u_long *ipi_invlpg_counts[MAXCPU];
 extern u_long *ipi_invlcache_counts[MAXCPU];
 extern u_long *ipi_rendezvous_counts[MAXCPU];
 #endif
 
 /* IPI handlers */
 inthand_t
 	IDTVEC(invltlb),	/* TLB shootdowns - global */
 	IDTVEC(invlpg),		/* TLB shootdowns - 1 page */
 	IDTVEC(invlrng),	/* TLB shootdowns - page range */
 	IDTVEC(invlcache),	/* Write back and invalidate cache */
 	IDTVEC(ipi_intr_bitmap_handler), /* Bitmap based IPIs */ 
 	IDTVEC(cpustop),	/* CPU stops & waits to be restarted */
 	IDTVEC(cpususpend),	/* CPU suspends & waits to be resumed */
 	IDTVEC(rendezvous);	/* handle CPU rendezvous */
 
 /* functions in mp_machdep.c */
 void	assign_cpu_ids(void);
 void	cpu_add(u_int apic_id, char boot_cpu);
 void	cpustop_handler(void);
-#ifndef XEN
 void	cpususpend_handler(void);
-#endif
 void	init_secondary_tail(void);
 void	invltlb_handler(void);
 void	invlpg_handler(void);
 void	invlrng_handler(void);
 void	invlcache_handler(void);
 void	init_secondary(void);
 void	ipi_startup(int apic_id, int vector);
 void	ipi_all_but_self(u_int ipi);
-#ifndef XEN
 void 	ipi_bitmap_handler(struct trapframe frame);
-#endif
 void	ipi_cpu(int cpu, u_int ipi);
 int	ipi_nmi_handler(void);
 void	ipi_selected(cpuset_t cpus, u_int ipi);
 u_int	mp_bootaddress(u_int);
 void	set_interrupt_apic_ids(void);
 void	smp_cache_flush(void);
 void	smp_invlpg(vm_offset_t addr);
 void	smp_masked_invlpg(cpuset_t mask, vm_offset_t addr);
 void	smp_invlpg_range(vm_offset_t startva, vm_offset_t endva);
 void	smp_masked_invlpg_range(cpuset_t mask, vm_offset_t startva,
 	    vm_offset_t endva);
 void	smp_invltlb(void);
 void	smp_masked_invltlb(cpuset_t mask);
 void	mem_range_AP_init(void);
 void	topo_probe(void);
 void	ipi_send_cpu(int cpu, u_int ipi);
 
-#ifdef XEN
-void ipi_to_irq_init(void);
-#endif
 #endif /* !LOCORE */
 #endif /* SMP */
 
 #endif /* _KERNEL */
 #endif /* _MACHINE_SMP_H_ */
Index: head/sys/i386/include/vmparam.h
===================================================================
--- head/sys/i386/include/vmparam.h	(revision 282273)
+++ head/sys/i386/include/vmparam.h	(revision 282274)
@@ -1,212 +1,204 @@
 /*-
  * Copyright (c) 1990 The Regents of the University of California.
  * All rights reserved.
  * Copyright (c) 1994 John S. Dyson
  * All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * William Jolitz.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	from: @(#)vmparam.h	5.9 (Berkeley) 5/12/91
  * $FreeBSD$
  */
 
 
 #ifndef _MACHINE_VMPARAM_H_
 #define _MACHINE_VMPARAM_H_ 1
 
 /*
  * Machine dependent constants for 386.
  */
 
 /*
  * Virtual memory related constants, all in bytes
  */
 #define	MAXTSIZ		(128UL*1024*1024)	/* max text size */
 #ifndef DFLDSIZ
 #define	DFLDSIZ		(128UL*1024*1024)	/* initial data size limit */
 #endif
 #ifndef MAXDSIZ
 #define	MAXDSIZ		(512UL*1024*1024)	/* max data size */
 #endif
 #ifndef	DFLSSIZ
 #define	DFLSSIZ		(8UL*1024*1024)		/* initial stack size limit */
 #endif
 #ifndef	MAXSSIZ
 #define	MAXSSIZ		(64UL*1024*1024)	/* max stack size */
 #endif
 #ifndef SGROWSIZ
 #define SGROWSIZ	(128UL*1024)		/* amount to grow stack */
 #endif
 
 /*
  * Choose between DENSE and SPARSE based on whether lower execution time or
  * lower kernel address space consumption is desired.  Under PAE, kernel
  * address space is often in short supply.
  */
 #ifdef PAE
 #define	VM_PHYSSEG_SPARSE
 #else
 #define	VM_PHYSSEG_DENSE
 #endif
 
 /*
  * The number of PHYSSEG entries must be one greater than the number
  * of phys_avail entries because the phys_avail entry that spans the
  * largest physical address that is accessible by ISA DMA is split
  * into two PHYSSEG entries. 
  */
 #define	VM_PHYSSEG_MAX		17
 
 /*
  * Create two free page pools.  Since the i386 kernel virtual address
  * space does not include a mapping onto the machine's entire physical
  * memory, VM_FREEPOOL_DIRECT is defined as an alias for the default
  * pool, VM_FREEPOOL_DEFAULT.
  */
 #define	VM_NFREEPOOL		2
 #define	VM_FREEPOOL_CACHE	1
 #define	VM_FREEPOOL_DEFAULT	0
 #define	VM_FREEPOOL_DIRECT	0
 
 /*
  * Create two free page lists: VM_FREELIST_DEFAULT is for physical
  * pages that are above the largest physical address that is
  * accessible by ISA DMA and VM_FREELIST_ISADMA is for physical pages
  * that are below that address.
  */
 #define	VM_NFREELIST		2
 #define	VM_FREELIST_DEFAULT	0
 #define	VM_FREELIST_ISADMA	1
 
 /*
  * The largest allocation size is 2MB under PAE and 4MB otherwise.
  */
 #ifdef PAE
 #define	VM_NFREEORDER		10
 #else
 #define	VM_NFREEORDER		11
 #endif
 
 /*
  * Enable superpage reservations: 1 level.
  */
 #ifndef	VM_NRESERVLEVEL
 #define	VM_NRESERVLEVEL		1
 #endif
 
 /*
  * Level 0 reservations consist of 512 pages when PAE pagetables are
  * used, and 1024 pages otherwise.
  */
 #ifndef	VM_LEVEL_0_ORDER
 #if defined(PAE) || defined(PAE_TABLES)
 #define	VM_LEVEL_0_ORDER	9
 #else
 #define	VM_LEVEL_0_ORDER	10
 #endif
 #endif
 
 /*
  * Kernel physical load address.
  */
 #ifndef KERNLOAD
-#if defined(XEN) && !defined(XEN_PRIVILEGED_GUEST)
-#define	KERNLOAD		0
-#else
 #define	KERNLOAD		(1 << PDRSHIFT)
-#endif
 #endif /* !defined(KERNLOAD) */
 
 /*
  * Virtual addresses of things.  Derived from the page directory and
  * page table indexes from pmap.h for precision.
  * Because of the page that is both a PD and PT, it looks a little
  * messy at times, but hey, we'll do anything to save a page :-)
  */
 
-#ifdef XEN
-#define VM_MAX_KERNEL_ADDRESS	HYPERVISOR_VIRT_START
-#else
 #define VM_MAX_KERNEL_ADDRESS	VADDR(KPTDI+NKPDE-1, NPTEPG-1)
-#endif
 
 #define VM_MIN_KERNEL_ADDRESS	VADDR(PTDPTDI, PTDPTDI)
 
 #define	KERNBASE		VADDR(KPTDI, 0)
 
 #define UPT_MAX_ADDRESS		VADDR(PTDPTDI, PTDPTDI)
 #define UPT_MIN_ADDRESS		VADDR(PTDPTDI, 0)
 
 #define VM_MAXUSER_ADDRESS	VADDR(PTDPTDI, 0)
 
 #define	SHAREDPAGE		(VM_MAXUSER_ADDRESS - PAGE_SIZE)
 #define	USRSTACK		SHAREDPAGE
 
 #define VM_MAX_ADDRESS		VADDR(PTDPTDI, PTDPTDI)
 #define VM_MIN_ADDRESS		((vm_offset_t)0)
 
 /*
  * How many physical pages per kmem arena virtual page.
  */
 #ifndef VM_KMEM_SIZE_SCALE
 #define	VM_KMEM_SIZE_SCALE	(3)
 #endif
 
 /*
  * Optional floor (in bytes) on the size of the kmem arena.
  */
 #ifndef VM_KMEM_SIZE_MIN
 #define	VM_KMEM_SIZE_MIN	(12 * 1024 * 1024)
 #endif
 
 /*
  * Optional ceiling (in bytes) on the size of the kmem arena: 40% of the
  * kernel map rounded to the nearest multiple of the superpage size.
  */
 #ifndef VM_KMEM_SIZE_MAX
 #define	VM_KMEM_SIZE_MAX	(((((VM_MAX_KERNEL_ADDRESS - \
     VM_MIN_KERNEL_ADDRESS) >> (PDRSHIFT - 2)) + 5) / 10) << PDRSHIFT)
 #endif
 
 /* initial pagein size of beginning of executable file */
 #ifndef VM_INITIAL_PAGEIN
 #define	VM_INITIAL_PAGEIN	16
 #endif
 
 #define	ZERO_REGION_SIZE	(64 * 1024)	/* 64KB */
 
 #ifndef VM_MAX_AUTOTUNE_MAXUSERS
 #define VM_MAX_AUTOTUNE_MAXUSERS 384
 #endif
 
 #define	SFBUF
 #define	SFBUF_MAP
 #define	SFBUF_CPUSET
 #define	SFBUF_PROCESS_PAGE
 
 #endif /* _MACHINE_VMPARAM_H_ */
Index: head/sys/i386/include/xen/features.h
===================================================================
--- head/sys/i386/include/xen/features.h	(revision 282273)
+++ head/sys/i386/include/xen/features.h	(nonexistent)
@@ -1,22 +0,0 @@
-/******************************************************************************
- * features.h
- *
- * Query the features reported by Xen.
- *
- * Copyright (c) 2006, Ian Campbell
- *
- * $FreeBSD$
- */
-
-#ifndef __ASM_XEN_FEATURES_H__
-#define __ASM_XEN_FEATURES_H__
-
-#include <xen/interface/version.h>
-
-extern void setup_xen_features(void);
-
-extern uint8_t xen_features[XENFEAT_NR_SUBMAPS * 32];
-
-#define xen_feature(flag)	(xen_features[flag])
-
-#endif /* __ASM_XEN_FEATURES_H__ */

Property changes on: head/sys/i386/include/xen/features.h
___________________________________________________________________
Deleted: svn:eol-style
## -1 +0,0 ##
-native
\ No newline at end of property
Deleted: svn:keywords
## -1 +0,0 ##
-FreeBSD=%H
\ No newline at end of property
Deleted: svn:mime-type
## -1 +0,0 ##
-text/plain
\ No newline at end of property
Index: head/sys/i386/include/xen/xenstored.h
===================================================================
--- head/sys/i386/include/xen/xenstored.h	(revision 282273)
+++ head/sys/i386/include/xen/xenstored.h	(nonexistent)
@@ -1,89 +0,0 @@
-/*
- * Simple prototyle Xen Store Daemon providing simple tree-like database.
- * Copyright (C) 2005 Rusty Russell IBM Corporation
- *
- * This file may be distributed separately from the Linux kernel, or
- * incorporated into other software packages, subject to the following license:
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this source file (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use, copy, modify,
- * merge, publish, distribute, sublicense, and/or sell copies of the Software,
- * and to permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-#ifndef _XENSTORED_H
-#define _XENSTORED_H
-
-enum xsd_sockmsg_type
-{
-	XS_DEBUG,
-	XS_SHUTDOWN,
-	XS_DIRECTORY,
-	XS_READ,
-	XS_GET_PERMS,
-	XS_WATCH,
-	XS_WATCH_ACK,
-	XS_UNWATCH,
-	XS_TRANSACTION_START,
-	XS_TRANSACTION_END,
-	XS_OP_READ_ONLY = XS_TRANSACTION_END,
-	XS_INTRODUCE,
-	XS_RELEASE,
-	XS_GETDOMAINPATH,
-	XS_WRITE,
-	XS_MKDIR,
-	XS_RM,
-	XS_SET_PERMS,
-	XS_WATCH_EVENT,
-	XS_ERROR,
-};
-
-#define XS_WRITE_NONE "NONE"
-#define XS_WRITE_CREATE "CREATE"
-#define XS_WRITE_CREATE_EXCL "CREATE|EXCL"
-
-/* We hand errors as strings, for portability. */
-struct xsd_errors
-{
-	int errnum;
-	const char *errstring;
-};
-#define XSD_ERROR(x) { x, #x }
-static struct xsd_errors xsd_errors[] __attribute__((unused)) = {
-	XSD_ERROR(EINVAL),
-	XSD_ERROR(EACCES),
-	XSD_ERROR(EEXIST),
-	XSD_ERROR(EISDIR),
-	XSD_ERROR(ENOENT),
-	XSD_ERROR(ENOMEM),
-	XSD_ERROR(ENOSPC),
-	XSD_ERROR(EIO),
-	XSD_ERROR(ENOTEMPTY),
-	XSD_ERROR(ENOSYS),
-	XSD_ERROR(EROFS),
-	XSD_ERROR(EBUSY),
-	XSD_ERROR(ETIMEDOUT),
-	XSD_ERROR(EISCONN),
-};
-struct xsd_sockmsg
-{
-	uint32_t type;
-	uint32_t len; 		/* Length of data following this. */
-
-	/* Generally followed by nul-terminated string(s). */
-};
-
-#endif /* _XENSTORED_H */

Property changes on: head/sys/i386/include/xen/xenstored.h
___________________________________________________________________
Deleted: fbsd:nokeywords
## -1 +0,0 ##
-true
\ No newline at end of property
Deleted: svn:eol-style
## -1 +0,0 ##
-native
\ No newline at end of property
Deleted: svn:mime-type
## -1 +0,0 ##
-text/plain
\ No newline at end of property
Index: head/sys/i386/include/xen/xenpmap.h
===================================================================
--- head/sys/i386/include/xen/xenpmap.h	(revision 282273)
+++ head/sys/i386/include/xen/xenpmap.h	(nonexistent)
@@ -1,237 +0,0 @@
-/*
- *
- * Copyright (c) 2004 Christian Limpach.
- * Copyright (c) 2004,2005 Kip Macy
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- *    must display the following acknowledgement:
- *      This product includes software developed by Christian Limpach.
- * 4. The name of the author may not be used to endorse or promote products
- *    derived from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- *
- * $FreeBSD$
- */
-
-#ifndef _XEN_XENPMAP_H_
-#define _XEN_XENPMAP_H_
-
-#if defined(XEN)
-void _xen_queue_pt_update(vm_paddr_t, vm_paddr_t, char *, int);
-void xen_pt_switch(vm_paddr_t);
-void xen_set_ldt(vm_paddr_t, unsigned long);
-void xen_pgdpt_pin(vm_paddr_t);
-void xen_pgd_pin(vm_paddr_t);
-void xen_pgd_unpin(vm_paddr_t);
-void xen_pt_pin(vm_paddr_t);
-void xen_pt_unpin(vm_paddr_t);
-void xen_flush_queue(void);
-void pmap_ref(pt_entry_t *pte, vm_paddr_t ma);
-void pmap_suspend(void);
-void pmap_resume(void);
-void xen_check_queue(void);
-
-#ifdef INVARIANTS
-#define xen_queue_pt_update(a, b) _xen_queue_pt_update((a), (b), __FILE__, __LINE__)
-#else
-#define xen_queue_pt_update(a, b) _xen_queue_pt_update((a), (b), NULL, 0)
-#endif	
-
-
-#include <sys/param.h>
-#include <sys/pcpu.h>
-
-#ifdef PMAP_DEBUG
-#define PMAP_REF pmap_ref
-#define PMAP_DEC_REF_PAGE pmap_dec_ref_page
-#define PMAP_MARK_PRIV pmap_mark_privileged
-#define PMAP_MARK_UNPRIV pmap_mark_unprivileged
-#else 
-#define PMAP_MARK_PRIV(a)
-#define PMAP_MARK_UNPRIV(a)
-#define PMAP_REF(a, b)
-#define PMAP_DEC_REF_PAGE(a)
-#endif
-
-#define ALWAYS_SYNC 0
-
-#ifdef PT_DEBUG
-#define PT_LOG() printk("WP PT_SET %s:%d\n", __FILE__, __LINE__) 
-#else
-#define PT_LOG()
-#endif
-
-#define INVALID_P2M_ENTRY	(~0UL)
-
-#define pmap_valid_entry(E)           ((E) & PG_V) /* is PDE or PTE valid? */
-
-#define SH_PD_SET_VA        1
-#define SH_PD_SET_VA_MA     2
-#define SH_PD_SET_VA_CLEAR  3
-
-struct pmap;
-void pd_set(struct pmap *pmap, int ptepindex, vm_paddr_t val, int type);
-#ifdef notyet
-static vm_paddr_t
-vptetomachpte(vm_paddr_t *pte)
-{
-	vm_offset_t offset, ppte;
-	vm_paddr_t pgoffset, retval, *pdir_shadow_ptr;
-	int pgindex;
-
-	ppte = (vm_offset_t)pte;
-	pgoffset = (ppte & PAGE_MASK);
-	offset = ppte - (vm_offset_t)PTmap;
-	pgindex = ppte >> PDRSHIFT;
-
-	pdir_shadow_ptr = (vm_paddr_t *)PCPU_GET(pdir_shadow);
-	retval = (pdir_shadow_ptr[pgindex] & ~PAGE_MASK) + pgoffset;
-	return (retval);
-}
-#endif
-#define	PT_GET(_ptp)						\
-	(pmap_valid_entry(*(_ptp)) ? xpmap_mtop(*(_ptp)) : (0))
-
-#ifdef WRITABLE_PAGETABLES
-
-#define PT_SET_VA(_ptp,_npte,sync) do {				\
-        PMAP_REF((_ptp), xpmap_ptom(_npte));                    \
-        PT_LOG();                                               \
-        *(_ptp) = xpmap_ptom((_npte));                          \
-} while (/*CONSTCOND*/0)
-#define PT_SET_VA_MA(_ptp,_npte,sync) do {		        \
-        PMAP_REF((_ptp), (_npte));                              \
-        PT_LOG();                                               \
-        *(_ptp) = (_npte);                                      \
-} while (/*CONSTCOND*/0)
-#define PT_CLEAR_VA(_ptp, sync) do {				\
-        PMAP_REF((pt_entry_t *)(_ptp), 0);                      \
-        PT_LOG();                                               \
-        *(_ptp) = 0;                                            \
-} while (/*CONSTCOND*/0)
-
-#define PD_SET_VA(_pmap, _ptp, _npte, sync) do {		\
-        PMAP_REF((_ptp), xpmap_ptom(_npte));                    \
-        pd_set((_pmap),(_ptp),(_npte), SH_PD_SET_VA);           \
-	if (sync || ALWAYS_SYNC) xen_flush_queue();     	\
-} while (/*CONSTCOND*/0)
-#define PD_SET_VA_MA(_pmap, _ptp, _npte, sync) do {		\
-        PMAP_REF((_ptp), (_npte));                              \
-        pd_set((_pmap),(_ptp),(_npte), SH_PD_SET_VA_MA);        \
-	if (sync || ALWAYS_SYNC) xen_flush_queue();		\
-} while (/*CONSTCOND*/0)
-#define PD_CLEAR_VA(_pmap, _ptp, sync) do {			\
-        PMAP_REF((pt_entry_t *)(_ptp), 0);                      \
-        pd_set((_pmap),(_ptp), 0, SH_PD_SET_VA_CLEAR);  	\
-	if (sync || ALWAYS_SYNC) xen_flush_queue();		\
-} while (/*CONSTCOND*/0)
-
-#else /* !WRITABLE_PAGETABLES */
-
-#define PT_SET_VA(_ptp,_npte,sync) do {				\
-        PMAP_REF((_ptp), xpmap_ptom(_npte));                    \
-	xen_queue_pt_update(vtomach(_ptp), 	        \
-			    xpmap_ptom(_npte)); 		\
-	if (sync || ALWAYS_SYNC) xen_flush_queue();		\
-} while (/*CONSTCOND*/0)
-#define PT_SET_VA_MA(_ptp,_npte,sync) do {		        \
-        PMAP_REF((_ptp), (_npte));                              \
-	xen_queue_pt_update(vtomach(_ptp), _npte);        \
-	if (sync || ALWAYS_SYNC) xen_flush_queue();		\
-} while (/*CONSTCOND*/0)
-#define PT_CLEAR_VA(_ptp, sync) do {				\
-        PMAP_REF((pt_entry_t *)(_ptp), 0);                      \
-	xen_queue_pt_update(vtomach(_ptp), 0);            \
-	if (sync || ALWAYS_SYNC)				\
-		xen_flush_queue();				\
-} while (/*CONSTCOND*/0)
-
-#define PD_SET_VA(_pmap, _ptepindex,_npte,sync) do {		\
-        PMAP_REF((_ptp), xpmap_ptom(_npte));                    \
-        pd_set((_pmap),(_ptepindex),(_npte), SH_PD_SET_VA);     \
-	if (sync || ALWAYS_SYNC) xen_flush_queue();     	\
-} while (/*CONSTCOND*/0)
-#define PD_SET_VA_MA(_pmap, _ptepindex,_npte,sync) do {		\
-        PMAP_REF((_ptp), (_npte));                              \
-        pd_set((_pmap),(_ptepindex),(_npte), SH_PD_SET_VA_MA);  \
-	if (sync || ALWAYS_SYNC) xen_flush_queue();		\
-} while (/*CONSTCOND*/0)
-#define PD_CLEAR_VA(_pmap, _ptepindex, sync) do {		\
-        PMAP_REF((pt_entry_t *)(_ptp), 0);                      \
-        pd_set((_pmap),(_ptepindex), 0, SH_PD_SET_VA_CLEAR);    \
-	if (sync || ALWAYS_SYNC) xen_flush_queue();		\
-} while (/*CONSTCOND*/0)
-
-#endif
-
-#define PT_SET_MA(_va, _ma) 					\
-do { 								\
-   PANIC_IF(HYPERVISOR_update_va_mapping(((unsigned long)(_va)),\
-	   (_ma),						\
-	   UVMF_INVLPG| UVMF_ALL) < 0);			\
-} while (/*CONSTCOND*/0)	  
-
-#define	PT_UPDATES_FLUSH() do {				        \
-        xen_flush_queue();                                      \
-} while (/*CONSTCOND*/0)
-
-static __inline vm_paddr_t
-xpmap_mtop(vm_paddr_t mpa)
-{
-	vm_paddr_t tmp = (mpa & PG_FRAME);
-	
-	return machtophys(tmp) | (mpa & ~PG_FRAME);
-}
-
-static __inline vm_paddr_t
-xpmap_ptom(vm_paddr_t ppa)
-{
-	vm_paddr_t tmp = (ppa & PG_FRAME);
-
-	return phystomach(tmp) | (ppa & ~PG_FRAME);
-}
-
-static __inline void
-set_phys_to_machine(unsigned long pfn, unsigned long mfn)
-{
-#ifdef notyet	
-        PANIC_IF(max_mapnr && pfn >= max_mapnr);
-#endif	
-        if (xen_feature(XENFEAT_auto_translated_physmap)) {
-#ifdef notyet		
-                PANIC_IF((pfn != mfn && mfn != INVALID_P2M_ENTRY));
-#endif		
-                return;
-        }
-        xen_phys_machine[pfn] = mfn;
-}
-
-static __inline int
-phys_to_machine_mapping_valid(unsigned long pfn)
-{
-	return xen_phys_machine[pfn] != INVALID_P2M_ENTRY;
-}
-
-#endif /* !XEN */
-
-#endif /* _XEN_XENPMAP_H_ */

Property changes on: head/sys/i386/include/xen/xenpmap.h
___________________________________________________________________
Deleted: svn:eol-style
## -1 +0,0 ##
-native
\ No newline at end of property
Deleted: svn:keywords
## -1 +0,0 ##
-FreeBSD=%H
\ No newline at end of property
Deleted: svn:mime-type
## -1 +0,0 ##
-text/plain
\ No newline at end of property
Index: head/sys/i386/include/xen/hypercall.h
===================================================================
--- head/sys/i386/include/xen/hypercall.h	(revision 282273)
+++ head/sys/i386/include/xen/hypercall.h	(revision 282274)
@@ -1,423 +1,417 @@
 /******************************************************************************
  * hypercall.h
  * 
  * Linux-specific hypervisor handling.
  * 
  * Copyright (c) 2002-2004, K A Fraser
  * 
  * This file may be distributed separately from the Linux kernel, or
  * incorporated into other software packages, subject to the following license:
  * 
  * Permission is hereby granted, free of charge, to any person obtaining a copy
  * of this source file (the "Software"), to deal in the Software without
  * restriction, including without limitation the rights to use, copy, modify,
  * merge, publish, distribute, sublicense, and/or sell copies of the Software,
  * and to permit persons to whom the Software is furnished to do so, subject to
  * the following conditions:
  * 
  * The above copyright notice and this permission notice shall be included in
  * all copies or substantial portions of the Software.
  * 
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  * IN THE SOFTWARE.
  */
 
 #ifndef __HYPERCALL_H__
 #define __HYPERCALL_H__
 
 #include <sys/systm.h>
 #include <xen/interface/xen.h>
 #include <xen/interface/sched.h>
 
 extern char *hypercall_page;
 
 #define __STR(x) #x
 #define STR(x) __STR(x)
 #define	ENOXENSYS	38
 #define CONFIG_XEN_COMPAT	0x030002
 
 #define HYPERCALL_STR(name)                                     \
         "call hypercall_page + ("STR(__HYPERVISOR_##name)" * 32)"
 
 #define _hypercall0(type, name)                 \
 ({                                              \
         long __res;                             \
         __asm__ volatile (                          \
                 HYPERCALL_STR(name)             \
                 : "=a" (__res)                  \
                 :                               \
                 : "memory" );                   \
         (type)__res;                            \
 })
 
 #define _hypercall1(type, name, a1)                             \
 ({                                                              \
         long __res, __ign1;                                     \
         __asm__ volatile (                                          \
                 HYPERCALL_STR(name)                             \
                 : "=a" (__res), "=b" (__ign1)                   \
                 : "1" ((long)(a1))                              \
                 : "memory" );                                   \
         (type)__res;                                            \
 })
 
 #define _hypercall2(type, name, a1, a2)                         \
 ({                                                              \
         long __res, __ign1, __ign2;                             \
         __asm__ volatile (                                          \
                 HYPERCALL_STR(name)                             \
                 : "=a" (__res), "=b" (__ign1), "=c" (__ign2)    \
                 : "1" ((long)(a1)), "2" ((long)(a2))            \
                 : "memory" );                                   \
         (type)__res;                                            \
 })
 
 #define _hypercall3(type, name, a1, a2, a3)                     \
 ({                                                              \
         long __res, __ign1, __ign2, __ign3;                     \
         __asm__ volatile (                                          \
                 HYPERCALL_STR(name)                             \
                 : "=a" (__res), "=b" (__ign1), "=c" (__ign2),   \
                 "=d" (__ign3)                                   \
                 : "1" ((long)(a1)), "2" ((long)(a2)),           \
                 "3" ((long)(a3))                                \
                 : "memory" );                                   \
         (type)__res;                                            \
 })
 
 #define _hypercall4(type, name, a1, a2, a3, a4)                 \
 ({                                                              \
         long __res, __ign1, __ign2, __ign3, __ign4;             \
         __asm__ volatile (                                          \
                 HYPERCALL_STR(name)                             \
                 : "=a" (__res), "=b" (__ign1), "=c" (__ign2),   \
                 "=d" (__ign3), "=S" (__ign4)                    \
                 : "1" ((long)(a1)), "2" ((long)(a2)),           \
                 "3" ((long)(a3)), "4" ((long)(a4))              \
                 : "memory" );                                   \
         (type)__res;                                            \
 })
 
 #define _hypercall5(type, name, a1, a2, a3, a4, a5)             \
 ({                                                              \
         long __res, __ign1, __ign2, __ign3, __ign4, __ign5;     \
         __asm__ volatile (                                          \
                 HYPERCALL_STR(name)                             \
                 : "=a" (__res), "=b" (__ign1), "=c" (__ign2),   \
                 "=d" (__ign3), "=S" (__ign4), "=D" (__ign5)     \
                 : "1" ((long)(a1)), "2" ((long)(a2)),           \
                 "3" ((long)(a3)), "4" ((long)(a4)),             \
                 "5" ((long)(a5))                                \
                 : "memory" );                                   \
         (type)__res;                                            \
 })								
 
 static inline long
 privcmd_hypercall(long op, long a1, long a2, long a3, long a4, long a5)
 {
 	long __res, __ign1, __ign2, __ign3, __ign4, __ign5, __call;
 
 	__call = (long)&hypercall_page + (op * 32);
 	__asm__ volatile (
 		"call *%[call]"
 		: "=a" (__res), "=b" (__ign1), "=c" (__ign2),
                 "=d" (__ign3), "=S" (__ign4), "=D" (__ign5)
                 : "1" ((long)(a1)), "2" ((long)(a2)),
                 "3" ((long)(a3)), "4" ((long)(a4)),
                 "5" ((long)(a5)), [call] "a" (__call)
 		: "memory" );
 
 	return __res;
 }
 
 static inline int
 HYPERVISOR_set_trap_table(
 	trap_info_t *table)
 {
 	return _hypercall1(int, set_trap_table, table);
 }
 
 static inline int
 HYPERVISOR_mmu_update(
 	mmu_update_t *req, int count, int *success_count, domid_t domid)
 {
 	return _hypercall4(int, mmu_update, req, count, success_count, domid);
 }
 
 static inline int
 HYPERVISOR_mmuext_op(
 	mmuext_op_t *op, int count, int *success_count, domid_t domid)
 {
 	return _hypercall4(int, mmuext_op, op, count, success_count, domid);
 }
 
 static inline int
 HYPERVISOR_set_gdt(
 	unsigned long *frame_list, int entries)
 {
 	return _hypercall2(int, set_gdt, frame_list, entries);
 }
 
 static inline int
 HYPERVISOR_stack_switch(
 	unsigned long ss, unsigned long esp)
 {
 	return _hypercall2(int, stack_switch, ss, esp);
 }
 
 static inline int
 HYPERVISOR_set_callbacks(
 	unsigned long event_selector, unsigned long event_address,
 	unsigned long failsafe_selector, unsigned long failsafe_address)
 {
 	return _hypercall4(int, set_callbacks,
 			   event_selector, event_address,
 			   failsafe_selector, failsafe_address);
 }
 
 static inline int
 HYPERVISOR_fpu_taskswitch(
 	int set)
 {
 	return _hypercall1(int, fpu_taskswitch, set);
 }
 
 static inline int 
 HYPERVISOR_sched_op_compat(
 	int cmd, unsigned long arg)
 {
 	return _hypercall2(int, sched_op_compat, cmd, arg);
 }
 
 static inline int
 HYPERVISOR_sched_op(
 	int cmd, void *arg)
 {
 	return _hypercall2(int, sched_op, cmd, arg);
 }
 
 static inline long
 HYPERVISOR_set_timer_op(
 	uint64_t timeout)
 {
 	unsigned long timeout_hi = (unsigned long)(timeout>>32);
 	unsigned long timeout_lo = (unsigned long)timeout;
 	return _hypercall2(long, set_timer_op, timeout_lo, timeout_hi);
 }
 #if 0
 static inline int
 HYPERVISOR_platform_op(
         struct xen_platform_op *platform_op)
 {
         platform_op->interface_version = XENPF_INTERFACE_VERSION;
         return _hypercall1(int, platform_op, platform_op);
 }
 #endif
 static inline int
 HYPERVISOR_set_debugreg(
 	int reg, unsigned long value)
 {
 	return _hypercall2(int, set_debugreg, reg, value);
 }
 
 static inline unsigned long
 HYPERVISOR_get_debugreg(
 	int reg)
 {
 	return _hypercall1(unsigned long, get_debugreg, reg);
 }
 
 static inline int
 HYPERVISOR_update_descriptor(
 	uint64_t ma, uint64_t desc)
 {
 	return _hypercall4(int, update_descriptor, ma, ma>>32, desc, desc>>32);
 }
 
 static inline int
 HYPERVISOR_memory_op(
 	unsigned int cmd, void *arg)
 {
 	return _hypercall2(int, memory_op, cmd, arg);
 }
 
-#if defined(XEN)
-int HYPERVISOR_multicall(multicall_entry_t *, int);
 static inline int
-_HYPERVISOR_multicall(
-#else /* XENHVM */
-static inline int
 HYPERVISOR_multicall(
-#endif
 	void *call_list, int nr_calls)
 {
 	return _hypercall2(int, multicall, call_list, nr_calls);
 }
 
 static inline int
 HYPERVISOR_update_va_mapping(
 	unsigned long va, uint64_t new_val, unsigned long flags)
 {
 	uint32_t hi, lo;
 
 	lo = (uint32_t)(new_val & 0xffffffff);
 	hi = (uint32_t)(new_val >> 32);
 	
 	return _hypercall4(int, update_va_mapping, va,
 			   lo, hi, flags);
 }
 
 static inline int
 HYPERVISOR_event_channel_op(
 	int cmd, void *arg)
 {
 	int rc = _hypercall2(int, event_channel_op, cmd, arg);
 
 #if CONFIG_XEN_COMPAT <= 0x030002
 	if (__predict_false(rc == -ENOXENSYS)) {
 		struct evtchn_op op;
 		op.cmd = cmd;
 		memcpy(&op.u, arg, sizeof(op.u));
 		rc = _hypercall1(int, event_channel_op_compat, &op);
 		memcpy(arg, &op.u, sizeof(op.u));
 	}
 #endif
 	return (rc);
 }
 
 static inline int
 HYPERVISOR_xen_version(
 	int cmd, void *arg)
 {
 	return _hypercall2(int, xen_version, cmd, arg);
 }
 
 static inline int
 HYPERVISOR_console_io(
 	int cmd, int count, char *str)
 {
 	return _hypercall3(int, console_io, cmd, count, str);
 }
 
 static inline int
 HYPERVISOR_physdev_op(
 	int cmd, void *arg)
 {
 	int rc = _hypercall2(int, physdev_op, cmd, arg);
 #if CONFIG_XEN_COMPAT <= 0x030002
 	if (__predict_false(rc == -ENOXENSYS)) {
 		struct physdev_op op;
 		op.cmd = cmd;
 		memcpy(&op.u, arg, sizeof(op.u));
 		rc = _hypercall1(int, physdev_op_compat, &op);
 		memcpy(arg, &op.u, sizeof(op.u));
 	}
 #endif
 	return (rc);
 }
 
 static inline int
 HYPERVISOR_grant_table_op(
 	unsigned int cmd, void *uop, unsigned int count)
 {
 	return _hypercall3(int, grant_table_op, cmd, uop, count);
 }
 
 static inline int
 HYPERVISOR_update_va_mapping_otherdomain(
 	unsigned long va, uint64_t new_val, unsigned long flags, domid_t domid)
 {
 	uint32_t hi, lo;
 	
 	lo = (uint32_t)(new_val & 0xffffffff);
 	hi = (uint32_t)(new_val >> 32);
 	
 	return _hypercall5(int, update_va_mapping_otherdomain, va,
 			   lo, hi, flags, domid);
 }
 
 static inline int
 HYPERVISOR_vm_assist(
 	unsigned int cmd, unsigned int type)
 {
 	return _hypercall2(int, vm_assist, cmd, type);
 }
 
 static inline int
 HYPERVISOR_vcpu_op(
 	int cmd, int vcpuid, void *extra_args)
 {
 	return _hypercall3(int, vcpu_op, cmd, vcpuid, extra_args);
 }
 
 static inline int
 HYPERVISOR_suspend(
 	unsigned long srec)
 {
 	struct sched_shutdown sched_shutdown = {
 		.reason = SHUTDOWN_suspend
 	};
 	int rc = _hypercall3(int, sched_op, SCHEDOP_shutdown,
 			   &sched_shutdown, srec);
 #if CONFIG_XEN_COMPAT <= 0x030002
 	if (rc == -ENOXENSYS)
 		rc = _hypercall3(int, sched_op_compat, SCHEDOP_shutdown,
 				 SHUTDOWN_suspend, srec);
 #endif	
 	return (rc);
 }
 
 #if CONFIG_XEN_COMPAT <= 0x030002
 static inline int
 HYPERVISOR_nmi_op(
         unsigned long op, void *arg)
 {
         return _hypercall2(int, nmi_op, op, arg);
 }
 #endif
 
 static inline int
 HYPERVISOR_callback_op(
         int cmd, void *arg)
 {
         return _hypercall2(int, callback_op, cmd, arg);
 }
 
 #ifndef CONFIG_XEN
 static inline unsigned long
 HYPERVISOR_hvm_op(
     int op, void *arg)
 {
     return _hypercall2(unsigned long, hvm_op, op, arg);
 }
 #endif
 
 static inline int
 HYPERVISOR_xenoprof_op(
         int op, void *arg)
 {
         return _hypercall2(int, xenoprof_op, op, arg);
 }
 
 static inline int
 HYPERVISOR_kexec_op(
         unsigned long op, void *args)
 {
         return _hypercall2(int, kexec_op, op, args);
 }
 #endif /* __HYPERCALL_H__ */
 
 /*
  * Local variables:
  *  c-file-style: "linux"
  *  indent-tabs-mode: t
  *  c-indent-level: 8
  *  c-basic-offset: 8
  *  tab-width: 8
  * End:
  */
Index: head/sys/i386/include/xen/xen-os.h
===================================================================
--- head/sys/i386/include/xen/xen-os.h	(revision 282273)
+++ head/sys/i386/include/xen/xen-os.h	(revision 282274)
@@ -1,287 +1,188 @@
 /*****************************************************************************
  * i386/xen/xen-os.h
  * 
  * Random collection of macros and definition
  *
  * Copyright (c) 2003, 2004 Keir Fraser (on behalf of the Xen team)
  * All rights reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a copy
  * of this software and associated documentation files (the "Software"), to
  * deal in the Software without restriction, including without limitation the
  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
  * sell copies of the Software, and to permit persons to whom the Software is
  * furnished to do so, subject to the following conditions:
  * 
  * The above copyright notice and this permission notice shall be included in
  * all copies or substantial portions of the Software.
  * 
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 
  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 
  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 
  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 
  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 
  * DEALINGS IN THE SOFTWARE.
  *
  * $FreeBSD$
  */
 
 #ifndef _MACHINE_XEN_XEN_OS_H_
 #define _MACHINE_XEN_XEN_OS_H_
 
 #ifdef PAE
 #define CONFIG_X86_PAE
 #endif
 
 /* Everything below this point is not included by assembler (.S) files. */
 #ifndef __ASSEMBLY__
 
 /* REP NOP (PAUSE) is a good thing to insert into busy-wait loops. */
 static inline void rep_nop(void)
 {
     __asm__ __volatile__ ( "rep;nop" : : : "memory" );
 }
 #define cpu_relax() rep_nop()
 
-#ifndef XENHVM
-
-#ifdef SMP
-extern int gdtset;
-
-#include <sys/time.h> /* XXX for pcpu.h */
-#include <sys/pcpu.h> /* XXX for PCPU_GET */
-static inline int 
-smp_processor_id(void)  
-{
-    if (__predict_true(gdtset))
-	return PCPU_GET(cpuid);
-    return 0;
-}
-
-#else
-#define smp_processor_id() 0
-#endif
-
-#ifndef PANIC_IF
-#define PANIC_IF(exp) if (__predict_false(exp)) {printf("panic - %s: %s:%d\n",#exp, __FILE__, __LINE__); panic("%s: %s:%d", #exp, __FILE__, __LINE__);} 
-#endif
-
-/*
- * Crude memory allocator for memory allocation early in boot.
- */
-void *bootmem_alloc(unsigned int size);
-void bootmem_free(void *ptr, unsigned int size);
-
-/*
- * STI/CLI equivalents. These basically set and clear the virtual
- * event_enable flag in the shared_info structure. Note that when
- * the enable bit is set, there may be pending events to be handled.
- * We may therefore call into do_hypervisor_callback() directly.
- */
-
-#define __cli()                                                         \
-do {                                                                    \
-        vcpu_info_t *_vcpu;                                             \
-        _vcpu = &HYPERVISOR_shared_info->vcpu_info[smp_processor_id()]; \
-        _vcpu->evtchn_upcall_mask = 1;                                  \
-        barrier();                                                      \
-} while (0)
-
-#define __sti()                                                         \
-do {                                                                    \
-        vcpu_info_t *_vcpu;                                             \
-        barrier();                                                      \
-        _vcpu = &HYPERVISOR_shared_info->vcpu_info[smp_processor_id()]; \
-        _vcpu->evtchn_upcall_mask = 0;                                  \
-        barrier(); /* unmask then check (avoid races) */                \
-        if (__predict_false(_vcpu->evtchn_upcall_pending))              \
-                force_evtchn_callback();                                \
-} while (0)
-
-#define __restore_flags(x)                                              \
-do {                                                                    \
-        vcpu_info_t *_vcpu;                                             \
-        barrier();                                                      \
-        _vcpu = &HYPERVISOR_shared_info->vcpu_info[smp_processor_id()]; \
-        if ((_vcpu->evtchn_upcall_mask = (x)) == 0) {                   \
-                barrier(); /* unmask then check (avoid races) */        \
-                if (__predict_false(_vcpu->evtchn_upcall_pending))      \
-                        force_evtchn_callback();                        \
-        } 								\
-} while (0)
-
-/*
- * Add critical_{enter, exit}?
- *
- */
-#define __save_and_cli(x)                                               \
-do {                                                                    \
-        vcpu_info_t *_vcpu;                                             \
-        _vcpu = &HYPERVISOR_shared_info->vcpu_info[smp_processor_id()]; \
-        (x) = _vcpu->evtchn_upcall_mask;                                \
-        _vcpu->evtchn_upcall_mask = 1;                                  \
-        barrier();                                                      \
-} while (0)
-
-
-#define cli() __cli()
-#define sti() __sti()
-#define save_flags(x) __save_flags(x)
-#define restore_flags(x) __restore_flags(x)
-#define save_and_cli(x) __save_and_cli(x)
-
-#define local_irq_save(x)       __save_and_cli(x)
-#define local_irq_restore(x)    __restore_flags(x)
-#define local_irq_disable()     __cli()
-#define local_irq_enable()      __sti()
-
-#define mtx_lock_irqsave(lock, x) {local_irq_save((x)); mtx_lock_spin((lock));}
-#define mtx_unlock_irqrestore(lock, x) {mtx_unlock_spin((lock)); local_irq_restore((x)); }
-#define spin_lock_irqsave mtx_lock_irqsave
-#define spin_unlock_irqrestore mtx_unlock_irqrestore
-
-#endif /* !XENHVM */
-
 /* This is a barrier for the compiler only, NOT the processor! */
 #define barrier() __asm__ __volatile__("": : :"memory")
 
 #define LOCK_PREFIX ""
 #define LOCK ""
 #define ADDR (*(volatile long *) addr)
 /*
  * Make sure gcc doesn't try to be clever and move things around
  * on us. We need to use _exactly_ the address the user gave us,
  * not some alias that contains the same information.
  */
 typedef struct { volatile int counter; } atomic_t;
 
 #define xen_xchg(ptr,v) \
         ((__typeof__(*(ptr)))__xchg((unsigned long)(v),(ptr),sizeof(*(ptr))))
 struct __xchg_dummy { unsigned long a[100]; };
 #define __xg(x) ((volatile struct __xchg_dummy *)(x))
 static __inline unsigned long __xchg(unsigned long x, volatile void * ptr,
                                    int size)
 {
     switch (size) {
     case 1:
         __asm__ __volatile__("xchgb %b0,%1"
                              :"=q" (x)
                              :"m" (*__xg(ptr)), "0" (x)
                              :"memory");
         break;
     case 2:
         __asm__ __volatile__("xchgw %w0,%1"
                              :"=r" (x)
                              :"m" (*__xg(ptr)), "0" (x)
                              :"memory");
         break;
     case 4:
         __asm__ __volatile__("xchgl %0,%1"
                              :"=r" (x)
                              :"m" (*__xg(ptr)), "0" (x)
                              :"memory");
         break;
     }
     return x;
 }
 
 /**
  * test_and_clear_bit - Clear a bit and return its old value
  * @nr: Bit to set
  * @addr: Address to count from
  *
  * This operation is atomic and cannot be reordered.  
  * It also implies a memory barrier.
  */
 static __inline int test_and_clear_bit(int nr, volatile void * addr)
 {
         int oldbit;
 
         __asm__ __volatile__( LOCK_PREFIX
                 "btrl %2,%1\n\tsbbl %0,%0"
                 :"=r" (oldbit),"=m" (ADDR)
                 :"Ir" (nr) : "memory");
         return oldbit;
 }
 
 static __inline int constant_test_bit(int nr, const volatile void * addr)
 {
     return ((1UL << (nr & 31)) & (((const volatile unsigned int *) addr)[nr >> 5])) != 0;
 }
 
 static __inline int variable_test_bit(int nr, volatile void * addr)
 {
     int oldbit;
     
     __asm__ __volatile__(
         "btl %2,%1\n\tsbbl %0,%0"
         :"=r" (oldbit)
         :"m" (ADDR),"Ir" (nr));
     return oldbit;
 }
 
 #define test_bit(nr,addr) \
 (__builtin_constant_p(nr) ? \
  constant_test_bit((nr),(addr)) : \
  variable_test_bit((nr),(addr)))
 
 
 /**
  * set_bit - Atomically set a bit in memory
  * @nr: the bit to set
  * @addr: the address to start counting from
  *
  * This function is atomic and may not be reordered.  See __set_bit()
  * if you do not require the atomic guarantees.
  * Note that @nr may be almost arbitrarily large; this function is not
  * restricted to acting on a single-word quantity.
  */
 static __inline__ void set_bit(int nr, volatile void * addr)
 {
         __asm__ __volatile__( LOCK_PREFIX
                 "btsl %1,%0"
                 :"=m" (ADDR)
                 :"Ir" (nr));
 }
 
 /**
  * clear_bit - Clears a bit in memory
  * @nr: Bit to clear
  * @addr: Address to start counting from
  *
  * clear_bit() is atomic and may not be reordered.  However, it does
  * not contain a memory barrier, so if it is used for locking purposes,
  * you should call smp_mb__before_clear_bit() and/or smp_mb__after_clear_bit()
  * in order to ensure changes are visible on other processors.
  */
 static __inline__ void clear_bit(int nr, volatile void * addr)
 {
         __asm__ __volatile__( LOCK_PREFIX
                 "btrl %1,%0"
                 :"=m" (ADDR)
                 :"Ir" (nr));
 }
 
 /**
  * atomic_inc - increment atomic variable
  * @v: pointer of type atomic_t
  * 
  * Atomically increments @v by 1.  Note that the guaranteed
  * useful range of an atomic_t is only 24 bits.
  */ 
 static __inline__ void atomic_inc(atomic_t *v)
 {
         __asm__ __volatile__(
                 LOCK "incl %0"
                 :"=m" (v->counter)
                 :"m" (v->counter));
 }
 
 
 #define rdtscll(val) \
      __asm__ __volatile__("rdtsc" : "=A" (val))
 
 #endif /* !__ASSEMBLY__ */
 
 #endif /* _MACHINE_XEN_XEN_OS_H_ */
Index: head/sys/i386/include/xen/xenfunc.h
===================================================================
--- head/sys/i386/include/xen/xenfunc.h	(revision 282273)
+++ head/sys/i386/include/xen/xenfunc.h	(revision 282274)
@@ -1,82 +1,81 @@
 /*-
  * Copyright (c) 2004, 2005 Kip Macy
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef _XEN_XENFUNC_H_
 #define _XEN_XENFUNC_H_
 
 #include <xen/xen-os.h>
 #include <xen/hypervisor.h>
 
 #include <vm/pmap.h>
 
-#include <machine/xen/xenpmap.h>
 #include <machine/segments.h>
 
 #include <sys/pcpu.h>
 #define BKPT __asm__("int3");
 #define XPQ_CALL_DEPTH 5
 #define XPQ_CALL_COUNT 2
 #define PG_PRIV PG_AVAIL3
 typedef struct { 
 	unsigned long pt_ref;
 	unsigned long pt_eip[XPQ_CALL_COUNT][XPQ_CALL_DEPTH];
 } pteinfo_t;
 
 extern pteinfo_t *pteinfo_list;
 #ifdef XENDEBUG_LOW
 #define	__PRINTK(x) printk x
 #else
 #define	__PRINTK(x)
 #endif
 
 char *xen_setbootenv(char *cmd_line);
 
 int  xen_boothowto(char *envp);
 
 void _xen_machphys_update(vm_paddr_t, vm_paddr_t, char *file, int line);
 
 #ifdef INVARIANTS
 #define xen_machphys_update(a, b) _xen_machphys_update((a), (b), __FILE__, __LINE__)
 #else
 #define xen_machphys_update(a, b) _xen_machphys_update((a), (b), NULL, 0)
 #endif	
 
 void xen_update_descriptor(union descriptor *, union descriptor *);
 
 extern struct mtx balloon_lock;
 #if 0
 #define balloon_lock(__flags)   mtx_lock_irqsave(&balloon_lock, __flags)
 #define balloon_unlock(__flags) mtx_unlock_irqrestore(&balloon_lock, __flags)
 #else
 #define balloon_lock(__flags)   __flags = 1
 #define balloon_unlock(__flags) __flags = 0
 #endif
 
 
 
 #endif /* _XEN_XENFUNC_H_ */
Index: head/sys/i386/include/xen/xenvar.h
===================================================================
--- head/sys/i386/include/xen/xenvar.h	(revision 282273)
+++ head/sys/i386/include/xen/xenvar.h	(revision 282274)
@@ -1,119 +1,36 @@
 /*-
  * Copyright (c) 2008 Kip Macy
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef XENVAR_H_
 #define XENVAR_H_
 
-#include <machine/xen/features.h>
+#include <xen/features.h>
 
-#if defined(XEN)
-
-#define XBOOTUP 0x1
-#define XPMAP   0x2
-extern int xendebug_flags;
-#ifndef NOXENDEBUG
-/* Print directly to the Xen console during debugging. */
-#define XENPRINTF xc_printf
-#else
-#define XENPRINTF printf
-#endif
-
-extern	xen_pfn_t *xen_phys_machine;
-extern	xen_pfn_t *xen_pfn_to_mfn_frame_list[16];
-extern	xen_pfn_t *xen_pfn_to_mfn_frame_list_list;
-
-#if 0
-#define TRACE_ENTER XENPRINTF("(file=%s, line=%d) entered %s\n", __FILE__, __LINE__, __FUNCTION__)
-#define TRACE_EXIT XENPRINTF("(file=%s, line=%d) exiting %s\n", __FILE__, __LINE__, __FUNCTION__)
-#define TRACE_DEBUG(argflags, _f, _a...) \
-if (xendebug_flags & argflags) XENPRINTF("(file=%s, line=%d) " _f "\n", __FILE__, __LINE__, ## _a);
-#else
-#define TRACE_ENTER
-#define TRACE_EXIT
-#define TRACE_DEBUG(argflags, _f, _a...)
-#endif
-
-extern xen_pfn_t *xen_machine_phys;
-/* Xen starts physical pages after the 4MB ISA hole -
- * FreeBSD doesn't
- */
-
-
-#undef ADD_ISA_HOLE /* XXX */
-
-#ifdef ADD_ISA_HOLE
-#define ISA_INDEX_OFFSET 1024 
-#define ISA_PDR_OFFSET 1
-#else
-#define ISA_INDEX_OFFSET 0
-#define ISA_PDR_OFFSET 0
-#endif
-
-
-#define PFNTOMFN(i) (xen_phys_machine[(i)])
-#define MFNTOPFN(i) ((vm_paddr_t)xen_machine_phys[(i)])
-
-#define VTOP(x) ((((uintptr_t)(x))) - KERNBASE)
-#define PTOV(x) (((uintptr_t)(x)) + KERNBASE)
-
-#define VTOPFN(x) (VTOP(x) >> PAGE_SHIFT)
-#define PFNTOV(x) PTOV((vm_paddr_t)(x)  << PAGE_SHIFT)
-
-#define VTOMFN(va) (vtomach(va) >> PAGE_SHIFT)
-#define PFN_UP(x)    (((x) + PAGE_SIZE-1) >> PAGE_SHIFT)
-
-#define phystomach(pa) (((vm_paddr_t)(PFNTOMFN((pa) >> PAGE_SHIFT))) << PAGE_SHIFT)
-#define machtophys(ma) (((vm_paddr_t)(MFNTOPFN((ma) >> PAGE_SHIFT))) << PAGE_SHIFT)
-
-
-void xpq_init(void);
-
-#define BITS_PER_LONG 32
-#define NR_CPUS      XEN_LEGACY_MAX_VCPUS
-
-#define BITS_TO_LONGS(bits) \
-	(((bits)+BITS_PER_LONG-1)/BITS_PER_LONG)
-#define DECLARE_BITMAP(name,bits) \
-	unsigned long name[BITS_TO_LONGS(bits)]
-
-int  xen_create_contiguous_region(vm_page_t pages, int npages);
-
-void  xen_destroy_contiguous_region(void * addr, int npages);
-
-#elif defined(XENHVM)
-
 #define	vtomach(va)	pmap_kextract((vm_offset_t) (va))
-#define	PFNTOMFN(pa)	(pa)
-#define	MFNTOPFN(ma)	(ma)
-
-#define	set_phys_to_machine(pfn, mfn)		((void)0)
-#define	phys_to_machine_mapping_valid(pfn)	(TRUE)
-
-#endif /* !XEN && !XENHVM */
 
 #endif
Index: head/sys/i386/isa/npx.c
===================================================================
--- head/sys/i386/isa/npx.c	(revision 282273)
+++ head/sys/i386/isa/npx.c	(revision 282274)
@@ -1,1410 +1,1401 @@
 /*-
  * Copyright (c) 1990 William Jolitz.
  * Copyright (c) 1991 The Regents of the University of California.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	from: @(#)npx.c	7.2 (Berkeley) 5/12/91
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_cpu.h"
 #include "opt_isa.h"
 #include "opt_npx.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/bus.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/module.h>
 #include <sys/mutex.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/smp.h>
 #include <sys/sysctl.h>
 #include <machine/bus.h>
 #include <sys/rman.h>
 #ifdef NPX_DEBUG
 #include <sys/syslog.h>
 #endif
 #include <sys/signalvar.h>
 #include <vm/uma.h>
 
 #include <machine/asmacros.h>
 #include <machine/cputypes.h>
 #include <machine/frame.h>
 #include <machine/md_var.h>
 #include <machine/pcb.h>
 #include <machine/psl.h>
 #include <machine/resource.h>
 #include <machine/specialreg.h>
 #include <machine/segments.h>
 #include <machine/ucontext.h>
 
 #include <machine/intr_machdep.h>
-#ifdef XEN
-#include <xen/xen-os.h>
-#include <xen/hypervisor.h>
-#endif
 
 #ifdef DEV_ISA
 #include <isa/isavar.h>
 #endif
 
 #if !defined(CPU_DISABLE_SSE) && defined(I686_CPU)
 #define CPU_ENABLE_SSE
 #endif
 
 /*
  * 387 and 287 Numeric Coprocessor Extension (NPX) Driver.
  */
 
 #if defined(__GNUCLIKE_ASM) && !defined(lint)
 
 #define	fldcw(cw)		__asm __volatile("fldcw %0" : : "m" (cw))
 #define	fnclex()		__asm __volatile("fnclex")
 #define	fninit()		__asm __volatile("fninit")
 #define	fnsave(addr)		__asm __volatile("fnsave %0" : "=m" (*(addr)))
 #define	fnstcw(addr)		__asm __volatile("fnstcw %0" : "=m" (*(addr)))
 #define	fnstsw(addr)		__asm __volatile("fnstsw %0" : "=am" (*(addr)))
 #define	fp_divide_by_0()	__asm __volatile( \
 				    "fldz; fld1; fdiv %st,%st(1); fnop")
 #define	frstor(addr)		__asm __volatile("frstor %0" : : "m" (*(addr)))
 #ifdef CPU_ENABLE_SSE
 #define	fxrstor(addr)		__asm __volatile("fxrstor %0" : : "m" (*(addr)))
 #define	fxsave(addr)		__asm __volatile("fxsave %0" : "=m" (*(addr)))
 #define	ldmxcsr(csr)		__asm __volatile("ldmxcsr %0" : : "m" (csr))
 #define	stmxcsr(addr)		__asm __volatile("stmxcsr %0" : : "m" (*(addr)))
 
 static __inline void
 xrstor(char *addr, uint64_t mask)
 {
 	uint32_t low, hi;
 
 	low = mask;
 	hi = mask >> 32;
 	__asm __volatile("xrstor %0" : : "m" (*addr), "a" (low), "d" (hi));
 }
 
 static __inline void
 xsave(char *addr, uint64_t mask)
 {
 	uint32_t low, hi;
 
 	low = mask;
 	hi = mask >> 32;
 	__asm __volatile("xsave %0" : "=m" (*addr) : "a" (low), "d" (hi) :
 	    "memory");
 }
 
 static __inline void
 xsaveopt(char *addr, uint64_t mask)
 {
 	uint32_t low, hi;
 
 	low = mask;
 	hi = mask >> 32;
 	__asm __volatile("xsaveopt %0" : "=m" (*addr) : "a" (low), "d" (hi) :
 	    "memory");
 }
 #endif
 #else	/* !(__GNUCLIKE_ASM && !lint) */
 
 void	fldcw(u_short cw);
 void	fnclex(void);
 void	fninit(void);
 void	fnsave(caddr_t addr);
 void	fnstcw(caddr_t addr);
 void	fnstsw(caddr_t addr);
 void	fp_divide_by_0(void);
 void	frstor(caddr_t addr);
 #ifdef CPU_ENABLE_SSE
 void	fxsave(caddr_t addr);
 void	fxrstor(caddr_t addr);
 void	ldmxcsr(u_int csr);
 void	stmxcsr(u_int *csr);
 void	xrstor(char *addr, uint64_t mask);
 void	xsave(char *addr, uint64_t mask);
 void	xsaveopt(char *addr, uint64_t mask);
 #endif
 
 #endif	/* __GNUCLIKE_ASM && !lint */
 
-#ifdef XEN
-#define	start_emulating()	(HYPERVISOR_fpu_taskswitch(1))
-#define	stop_emulating()	(HYPERVISOR_fpu_taskswitch(0))
-#else
 #define	start_emulating()	load_cr0(rcr0() | CR0_TS)
 #define	stop_emulating()	clts()
-#endif
 
 #ifdef CPU_ENABLE_SSE
 #define GET_FPU_CW(thread) \
 	(cpu_fxsr ? \
 		(thread)->td_pcb->pcb_save->sv_xmm.sv_env.en_cw : \
 		(thread)->td_pcb->pcb_save->sv_87.sv_env.en_cw)
 #define GET_FPU_SW(thread) \
 	(cpu_fxsr ? \
 		(thread)->td_pcb->pcb_save->sv_xmm.sv_env.en_sw : \
 		(thread)->td_pcb->pcb_save->sv_87.sv_env.en_sw)
 #define SET_FPU_CW(savefpu, value) do { \
 	if (cpu_fxsr) \
 		(savefpu)->sv_xmm.sv_env.en_cw = (value); \
 	else \
 		(savefpu)->sv_87.sv_env.en_cw = (value); \
 } while (0)
 #else /* CPU_ENABLE_SSE */
 #define GET_FPU_CW(thread) \
 	(thread->td_pcb->pcb_save->sv_87.sv_env.en_cw)
 #define GET_FPU_SW(thread) \
 	(thread->td_pcb->pcb_save->sv_87.sv_env.en_sw)
 #define SET_FPU_CW(savefpu, value) \
 	(savefpu)->sv_87.sv_env.en_cw = (value)
 #endif /* CPU_ENABLE_SSE */
 
 #ifdef CPU_ENABLE_SSE
 CTASSERT(sizeof(union savefpu) == 512);
 CTASSERT(sizeof(struct xstate_hdr) == 64);
 CTASSERT(sizeof(struct savefpu_ymm) == 832);
 
 /*
  * This requirement is to make it easier for asm code to calculate
  * offset of the fpu save area from the pcb address. FPU save area
  * must be 64-byte aligned.
  */
 CTASSERT(sizeof(struct pcb) % XSAVE_AREA_ALIGN == 0);
 
 /*
  * Ensure the copy of XCR0 saved in a core is contained in the padding
  * area.
  */
 CTASSERT(X86_XSTATE_XCR0_OFFSET >= offsetof(struct savexmm, sv_pad) &&
     X86_XSTATE_XCR0_OFFSET + sizeof(uint64_t) <= sizeof(struct savexmm));
 
 static	void	fpu_clean_state(void);
 #endif
 
 static	void	fpusave(union savefpu *);
 static	void	fpurstor(union savefpu *);
 
 int	hw_float;
 
 SYSCTL_INT(_hw, HW_FLOATINGPT, floatingpoint, CTLFLAG_RD,
     &hw_float, 0, "Floating point instructions executed in hardware");
 
 #ifdef CPU_ENABLE_SSE
 int use_xsave;
 uint64_t xsave_mask;
 #endif
 static	uma_zone_t fpu_save_area_zone;
 static	union savefpu *npx_initialstate;
 
 #ifdef CPU_ENABLE_SSE
 struct xsave_area_elm_descr {
 	u_int	offset;
 	u_int	size;
 } *xsave_area_desc;
 
 static int use_xsaveopt;
 #endif
 
 static	volatile u_int		npx_traps_while_probing;
 
 alias_for_inthand_t probetrap;
 __asm("								\n\
 	.text							\n\
 	.p2align 2,0x90						\n\
 	.type	" __XSTRING(CNAME(probetrap)) ",@function	\n\
 " __XSTRING(CNAME(probetrap)) ":				\n\
 	ss							\n\
 	incl	" __XSTRING(CNAME(npx_traps_while_probing)) "	\n\
 	fnclex							\n\
 	iret							\n\
 ");
 
 /*
  * Determine if an FPU is present and how to use it.
  */
 static int
 npx_probe(void)
 {
 	struct gate_descriptor save_idt_npxtrap;
 	u_short control, status;
 
 	/*
 	 * Modern CPUs all have an FPU that uses the INT16 interface
 	 * and provide a simple way to verify that, so handle the
 	 * common case right away.
 	 */
 	if (cpu_feature & CPUID_FPU) {
 		hw_float = 1;
 		return (1);
 	}
 
 	save_idt_npxtrap = idt[IDT_MF];
 	setidt(IDT_MF, probetrap, SDT_SYS386TGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 
 	/*
 	 * Don't trap while we're probing.
 	 */
 	stop_emulating();
 
 	/*
 	 * Finish resetting the coprocessor, if any.  If there is an error
 	 * pending, then we may get a bogus IRQ13, but npx_intr() will handle
 	 * it OK.  Bogus halts have never been observed, but we enabled
 	 * IRQ13 and cleared the BUSY# latch early to handle them anyway.
 	 */
 	fninit();
 
 	/*
 	 * Don't use fwait here because it might hang.
 	 * Don't use fnop here because it usually hangs if there is no FPU.
 	 */
 	DELAY(1000);		/* wait for any IRQ13 */
 #ifdef DIAGNOSTIC
 	if (npx_traps_while_probing != 0)
 		printf("fninit caused %u bogus npx trap(s)\n",
 		       npx_traps_while_probing);
 #endif
 	/*
 	 * Check for a status of mostly zero.
 	 */
 	status = 0x5a5a;
 	fnstsw(&status);
 	if ((status & 0xb8ff) == 0) {
 		/*
 		 * Good, now check for a proper control word.
 		 */
 		control = 0x5a5a;
 		fnstcw(&control);
 		if ((control & 0x1f3f) == 0x033f) {
 			/*
 			 * We have an npx, now divide by 0 to see if exception
 			 * 16 works.
 			 */
 			control &= ~(1 << 2);	/* enable divide by 0 trap */
 			fldcw(control);
 #ifdef FPU_ERROR_BROKEN
 			/*
 			 * FPU error signal doesn't work on some CPU
 			 * accelerator board.
 			 */
 			hw_float = 1;
 			return (1);
 #endif
 			npx_traps_while_probing = 0;
 			fp_divide_by_0();
 			if (npx_traps_while_probing != 0) {
 				/*
 				 * Good, exception 16 works.
 				 */
 				hw_float = 1;
 				goto cleanup;
 			}
 			printf(
 	"FPU does not use exception 16 for error reporting\n");
 			goto cleanup;
 		}
 	}
 
 	/*
 	 * Probe failed.  Floating point simply won't work.
 	 * Notify user and disable FPU/MMX/SSE instruction execution.
 	 */
 	printf("WARNING: no FPU!\n");
 	__asm __volatile("smsw %%ax; orb %0,%%al; lmsw %%ax" : :
 	    "n" (CR0_EM | CR0_MP) : "ax");
 
 cleanup:
 	idt[IDT_MF] = save_idt_npxtrap;
 	return (hw_float);
 }
 
 #ifdef CPU_ENABLE_SSE
 /*
  * Enable XSAVE if supported and allowed by user.
  * Calculate the xsave_mask.
  */
 static void
 npxinit_bsp1(void)
 {
 	u_int cp[4];
 	uint64_t xsave_mask_user;
 
 	if (cpu_fxsr && (cpu_feature2 & CPUID2_XSAVE) != 0) {
 		use_xsave = 1;
 		TUNABLE_INT_FETCH("hw.use_xsave", &use_xsave);
 	}
 	if (!use_xsave)
 		return;
 
 	cpuid_count(0xd, 0x0, cp);
 	xsave_mask = XFEATURE_ENABLED_X87 | XFEATURE_ENABLED_SSE;
 	if ((cp[0] & xsave_mask) != xsave_mask)
 		panic("CPU0 does not support X87 or SSE: %x", cp[0]);
 	xsave_mask = ((uint64_t)cp[3] << 32) | cp[0];
 	xsave_mask_user = xsave_mask;
 	TUNABLE_QUAD_FETCH("hw.xsave_mask", &xsave_mask_user);
 	xsave_mask_user |= XFEATURE_ENABLED_X87 | XFEATURE_ENABLED_SSE;
 	xsave_mask &= xsave_mask_user;
 	if ((xsave_mask & XFEATURE_AVX512) != XFEATURE_AVX512)
 		xsave_mask &= ~XFEATURE_AVX512;
 	if ((xsave_mask & XFEATURE_MPX) != XFEATURE_MPX)
 		xsave_mask &= ~XFEATURE_MPX;
 
 	cpuid_count(0xd, 0x1, cp);
 	if ((cp[0] & CPUID_EXTSTATE_XSAVEOPT) != 0)
 		use_xsaveopt = 1;
 }
 #endif
 /*
 
  * Calculate the fpu save area size.
  */
 static void
 npxinit_bsp2(void)
 {
 #ifdef CPU_ENABLE_SSE
 	u_int cp[4];
 
 	if (use_xsave) {
 		cpuid_count(0xd, 0x0, cp);
 		cpu_max_ext_state_size = cp[1];
 
 		/*
 		 * Reload the cpu_feature2, since we enabled OSXSAVE.
 		 */
 		do_cpuid(1, cp);
 		cpu_feature2 = cp[2];
 	} else
 #endif
 		cpu_max_ext_state_size = sizeof(union savefpu);
 }
 
 /*
  * Initialize floating point unit.
  */
 void
 npxinit(bool bsp)
 {
 	static union savefpu dummy;
 	register_t saveintr;
 #ifdef CPU_ENABLE_SSE
 	u_int mxcsr;
 #endif
 	u_short control;
 
 	if (bsp) {
 		if (!npx_probe())
 			return;
 #ifdef CPU_ENABLE_SSE
 		npxinit_bsp1();
 #endif
 	}
 
 #ifdef CPU_ENABLE_SSE
 	if (use_xsave) {
 		load_cr4(rcr4() | CR4_XSAVE);
 		load_xcr(XCR0, xsave_mask);
 	}
 #endif
 
 	/*
 	 * XCR0 shall be set up before CPU can report the save area size.
 	 */
 	if (bsp)
 		npxinit_bsp2();
 	
 	/*
 	 * fninit has the same h/w bugs as fnsave.  Use the detoxified
 	 * fnsave to throw away any junk in the fpu.  fpusave() initializes
 	 * the fpu.
 	 *
 	 * It is too early for critical_enter() to work on AP.
 	 */
 	saveintr = intr_disable();
 	stop_emulating();
 #ifdef CPU_ENABLE_SSE
 	if (cpu_fxsr)
 		fninit();
 	else
 #endif
 		fnsave(&dummy);
 	control = __INITIAL_NPXCW__;
 	fldcw(control);
 #ifdef CPU_ENABLE_SSE
 	if (cpu_fxsr) {
 		mxcsr = __INITIAL_MXCSR__;
 		ldmxcsr(mxcsr);
 	}
 #endif
 	start_emulating();
 	intr_restore(saveintr);
 }
 
 /*
  * On the boot CPU we generate a clean state that is used to
  * initialize the floating point unit when it is first used by a
  * process.
  */
 static void
 npxinitstate(void *arg __unused)
 {
 	register_t saveintr;
 #ifdef CPU_ENABLE_SSE
 	int cp[4], i, max_ext_n;
 #endif
 
 	if (!hw_float)
 		return;
 
 	npx_initialstate = malloc(cpu_max_ext_state_size, M_DEVBUF,
 	    M_WAITOK | M_ZERO);
 	saveintr = intr_disable();
 	stop_emulating();
 
 	fpusave(npx_initialstate);
 #ifdef CPU_ENABLE_SSE
 	if (cpu_fxsr) {
 		if (npx_initialstate->sv_xmm.sv_env.en_mxcsr_mask)
 			cpu_mxcsr_mask = 
 			    npx_initialstate->sv_xmm.sv_env.en_mxcsr_mask;
 		else
 			cpu_mxcsr_mask = 0xFFBF;
 
 		/*
 		 * The fninit instruction does not modify XMM
 		 * registers.  The fpusave call dumped the garbage
 		 * contained in the registers after reset to the
 		 * initial state saved.  Clear XMM registers file
 		 * image to make the startup program state and signal
 		 * handler XMM register content predictable.
 		 */
 		bzero(npx_initialstate->sv_xmm.sv_fp,
 		    sizeof(npx_initialstate->sv_xmm.sv_fp));
 		bzero(npx_initialstate->sv_xmm.sv_xmm,
 		    sizeof(npx_initialstate->sv_xmm.sv_xmm));
 	} else
 #endif
 		bzero(npx_initialstate->sv_87.sv_ac,
 		    sizeof(npx_initialstate->sv_87.sv_ac));
 
 #ifdef CPU_ENABLE_SSE
 	/*
 	 * Create a table describing the layout of the CPU Extended
 	 * Save Area.
 	 */
 	if (use_xsave) {
 		if (xsave_mask >> 32 != 0)
 			max_ext_n = fls(xsave_mask >> 32) + 32;
 		else
 			max_ext_n = fls(xsave_mask);
 		xsave_area_desc = malloc(max_ext_n * sizeof(struct
 		    xsave_area_elm_descr), M_DEVBUF, M_WAITOK | M_ZERO);
 		/* x87 state */
 		xsave_area_desc[0].offset = 0;
 		xsave_area_desc[0].size = 160;
 		/* XMM */
 		xsave_area_desc[1].offset = 160;
 		xsave_area_desc[1].size = 288 - 160;
 
 		for (i = 2; i < max_ext_n; i++) {
 			cpuid_count(0xd, i, cp);
 			xsave_area_desc[i].offset = cp[1];
 			xsave_area_desc[i].size = cp[0];
 		}
 	}
 #endif
 
 	fpu_save_area_zone = uma_zcreate("FPU_save_area",
 	    cpu_max_ext_state_size, NULL, NULL, NULL, NULL,
 	    XSAVE_AREA_ALIGN - 1, 0);
 
 	start_emulating();
 	intr_restore(saveintr);
 }
 SYSINIT(npxinitstate, SI_SUB_DRIVERS, SI_ORDER_ANY, npxinitstate, NULL);
 
 /*
  * Free coprocessor (if we have it).
  */
 void
 npxexit(td)
 	struct thread *td;
 {
 
 	critical_enter();
 	if (curthread == PCPU_GET(fpcurthread)) {
 		stop_emulating();
 		fpusave(curpcb->pcb_save);
 		start_emulating();
 		PCPU_SET(fpcurthread, NULL);
 	}
 	critical_exit();
 #ifdef NPX_DEBUG
 	if (hw_float) {
 		u_int	masked_exceptions;
 
 		masked_exceptions = GET_FPU_CW(td) & GET_FPU_SW(td) & 0x7f;
 		/*
 		 * Log exceptions that would have trapped with the old
 		 * control word (overflow, divide by 0, and invalid operand).
 		 */
 		if (masked_exceptions & 0x0d)
 			log(LOG_ERR,
 	"pid %d (%s) exited with masked floating point exceptions 0x%02x\n",
 			    td->td_proc->p_pid, td->td_proc->p_comm,
 			    masked_exceptions);
 	}
 #endif
 }
 
 int
 npxformat()
 {
 
 	if (!hw_float)
 		return (_MC_FPFMT_NODEV);
 #ifdef	CPU_ENABLE_SSE
 	if (cpu_fxsr)
 		return (_MC_FPFMT_XMM);
 #endif
 	return (_MC_FPFMT_387);
 }
 
 /* 
  * The following mechanism is used to ensure that the FPE_... value
  * that is passed as a trapcode to the signal handler of the user
  * process does not have more than one bit set.
  * 
  * Multiple bits may be set if the user process modifies the control
  * word while a status word bit is already set.  While this is a sign
  * of bad coding, we have no choise than to narrow them down to one
  * bit, since we must not send a trapcode that is not exactly one of
  * the FPE_ macros.
  *
  * The mechanism has a static table with 127 entries.  Each combination
  * of the 7 FPU status word exception bits directly translates to a
  * position in this table, where a single FPE_... value is stored.
  * This FPE_... value stored there is considered the "most important"
  * of the exception bits and will be sent as the signal code.  The
  * precedence of the bits is based upon Intel Document "Numerical
  * Applications", Chapter "Special Computational Situations".
  *
  * The macro to choose one of these values does these steps: 1) Throw
  * away status word bits that cannot be masked.  2) Throw away the bits
  * currently masked in the control word, assuming the user isn't
  * interested in them anymore.  3) Reinsert status word bit 7 (stack
  * fault) if it is set, which cannot be masked but must be presered.
  * 4) Use the remaining bits to point into the trapcode table.
  *
  * The 6 maskable bits in order of their preference, as stated in the
  * above referenced Intel manual:
  * 1  Invalid operation (FP_X_INV)
  * 1a   Stack underflow
  * 1b   Stack overflow
  * 1c   Operand of unsupported format
  * 1d   SNaN operand.
  * 2  QNaN operand (not an exception, irrelavant here)
  * 3  Any other invalid-operation not mentioned above or zero divide
  *      (FP_X_INV, FP_X_DZ)
  * 4  Denormal operand (FP_X_DNML)
  * 5  Numeric over/underflow (FP_X_OFL, FP_X_UFL)
  * 6  Inexact result (FP_X_IMP) 
  */
 static char fpetable[128] = {
 	0,
 	FPE_FLTINV,	/*  1 - INV */
 	FPE_FLTUND,	/*  2 - DNML */
 	FPE_FLTINV,	/*  3 - INV | DNML */
 	FPE_FLTDIV,	/*  4 - DZ */
 	FPE_FLTINV,	/*  5 - INV | DZ */
 	FPE_FLTDIV,	/*  6 - DNML | DZ */
 	FPE_FLTINV,	/*  7 - INV | DNML | DZ */
 	FPE_FLTOVF,	/*  8 - OFL */
 	FPE_FLTINV,	/*  9 - INV | OFL */
 	FPE_FLTUND,	/*  A - DNML | OFL */
 	FPE_FLTINV,	/*  B - INV | DNML | OFL */
 	FPE_FLTDIV,	/*  C - DZ | OFL */
 	FPE_FLTINV,	/*  D - INV | DZ | OFL */
 	FPE_FLTDIV,	/*  E - DNML | DZ | OFL */
 	FPE_FLTINV,	/*  F - INV | DNML | DZ | OFL */
 	FPE_FLTUND,	/* 10 - UFL */
 	FPE_FLTINV,	/* 11 - INV | UFL */
 	FPE_FLTUND,	/* 12 - DNML | UFL */
 	FPE_FLTINV,	/* 13 - INV | DNML | UFL */
 	FPE_FLTDIV,	/* 14 - DZ | UFL */
 	FPE_FLTINV,	/* 15 - INV | DZ | UFL */
 	FPE_FLTDIV,	/* 16 - DNML | DZ | UFL */
 	FPE_FLTINV,	/* 17 - INV | DNML | DZ | UFL */
 	FPE_FLTOVF,	/* 18 - OFL | UFL */
 	FPE_FLTINV,	/* 19 - INV | OFL | UFL */
 	FPE_FLTUND,	/* 1A - DNML | OFL | UFL */
 	FPE_FLTINV,	/* 1B - INV | DNML | OFL | UFL */
 	FPE_FLTDIV,	/* 1C - DZ | OFL | UFL */
 	FPE_FLTINV,	/* 1D - INV | DZ | OFL | UFL */
 	FPE_FLTDIV,	/* 1E - DNML | DZ | OFL | UFL */
 	FPE_FLTINV,	/* 1F - INV | DNML | DZ | OFL | UFL */
 	FPE_FLTRES,	/* 20 - IMP */
 	FPE_FLTINV,	/* 21 - INV | IMP */
 	FPE_FLTUND,	/* 22 - DNML | IMP */
 	FPE_FLTINV,	/* 23 - INV | DNML | IMP */
 	FPE_FLTDIV,	/* 24 - DZ | IMP */
 	FPE_FLTINV,	/* 25 - INV | DZ | IMP */
 	FPE_FLTDIV,	/* 26 - DNML | DZ | IMP */
 	FPE_FLTINV,	/* 27 - INV | DNML | DZ | IMP */
 	FPE_FLTOVF,	/* 28 - OFL | IMP */
 	FPE_FLTINV,	/* 29 - INV | OFL | IMP */
 	FPE_FLTUND,	/* 2A - DNML | OFL | IMP */
 	FPE_FLTINV,	/* 2B - INV | DNML | OFL | IMP */
 	FPE_FLTDIV,	/* 2C - DZ | OFL | IMP */
 	FPE_FLTINV,	/* 2D - INV | DZ | OFL | IMP */
 	FPE_FLTDIV,	/* 2E - DNML | DZ | OFL | IMP */
 	FPE_FLTINV,	/* 2F - INV | DNML | DZ | OFL | IMP */
 	FPE_FLTUND,	/* 30 - UFL | IMP */
 	FPE_FLTINV,	/* 31 - INV | UFL | IMP */
 	FPE_FLTUND,	/* 32 - DNML | UFL | IMP */
 	FPE_FLTINV,	/* 33 - INV | DNML | UFL | IMP */
 	FPE_FLTDIV,	/* 34 - DZ | UFL | IMP */
 	FPE_FLTINV,	/* 35 - INV | DZ | UFL | IMP */
 	FPE_FLTDIV,	/* 36 - DNML | DZ | UFL | IMP */
 	FPE_FLTINV,	/* 37 - INV | DNML | DZ | UFL | IMP */
 	FPE_FLTOVF,	/* 38 - OFL | UFL | IMP */
 	FPE_FLTINV,	/* 39 - INV | OFL | UFL | IMP */
 	FPE_FLTUND,	/* 3A - DNML | OFL | UFL | IMP */
 	FPE_FLTINV,	/* 3B - INV | DNML | OFL | UFL | IMP */
 	FPE_FLTDIV,	/* 3C - DZ | OFL | UFL | IMP */
 	FPE_FLTINV,	/* 3D - INV | DZ | OFL | UFL | IMP */
 	FPE_FLTDIV,	/* 3E - DNML | DZ | OFL | UFL | IMP */
 	FPE_FLTINV,	/* 3F - INV | DNML | DZ | OFL | UFL | IMP */
 	FPE_FLTSUB,	/* 40 - STK */
 	FPE_FLTSUB,	/* 41 - INV | STK */
 	FPE_FLTUND,	/* 42 - DNML | STK */
 	FPE_FLTSUB,	/* 43 - INV | DNML | STK */
 	FPE_FLTDIV,	/* 44 - DZ | STK */
 	FPE_FLTSUB,	/* 45 - INV | DZ | STK */
 	FPE_FLTDIV,	/* 46 - DNML | DZ | STK */
 	FPE_FLTSUB,	/* 47 - INV | DNML | DZ | STK */
 	FPE_FLTOVF,	/* 48 - OFL | STK */
 	FPE_FLTSUB,	/* 49 - INV | OFL | STK */
 	FPE_FLTUND,	/* 4A - DNML | OFL | STK */
 	FPE_FLTSUB,	/* 4B - INV | DNML | OFL | STK */
 	FPE_FLTDIV,	/* 4C - DZ | OFL | STK */
 	FPE_FLTSUB,	/* 4D - INV | DZ | OFL | STK */
 	FPE_FLTDIV,	/* 4E - DNML | DZ | OFL | STK */
 	FPE_FLTSUB,	/* 4F - INV | DNML | DZ | OFL | STK */
 	FPE_FLTUND,	/* 50 - UFL | STK */
 	FPE_FLTSUB,	/* 51 - INV | UFL | STK */
 	FPE_FLTUND,	/* 52 - DNML | UFL | STK */
 	FPE_FLTSUB,	/* 53 - INV | DNML | UFL | STK */
 	FPE_FLTDIV,	/* 54 - DZ | UFL | STK */
 	FPE_FLTSUB,	/* 55 - INV | DZ | UFL | STK */
 	FPE_FLTDIV,	/* 56 - DNML | DZ | UFL | STK */
 	FPE_FLTSUB,	/* 57 - INV | DNML | DZ | UFL | STK */
 	FPE_FLTOVF,	/* 58 - OFL | UFL | STK */
 	FPE_FLTSUB,	/* 59 - INV | OFL | UFL | STK */
 	FPE_FLTUND,	/* 5A - DNML | OFL | UFL | STK */
 	FPE_FLTSUB,	/* 5B - INV | DNML | OFL | UFL | STK */
 	FPE_FLTDIV,	/* 5C - DZ | OFL | UFL | STK */
 	FPE_FLTSUB,	/* 5D - INV | DZ | OFL | UFL | STK */
 	FPE_FLTDIV,	/* 5E - DNML | DZ | OFL | UFL | STK */
 	FPE_FLTSUB,	/* 5F - INV | DNML | DZ | OFL | UFL | STK */
 	FPE_FLTRES,	/* 60 - IMP | STK */
 	FPE_FLTSUB,	/* 61 - INV | IMP | STK */
 	FPE_FLTUND,	/* 62 - DNML | IMP | STK */
 	FPE_FLTSUB,	/* 63 - INV | DNML | IMP | STK */
 	FPE_FLTDIV,	/* 64 - DZ | IMP | STK */
 	FPE_FLTSUB,	/* 65 - INV | DZ | IMP | STK */
 	FPE_FLTDIV,	/* 66 - DNML | DZ | IMP | STK */
 	FPE_FLTSUB,	/* 67 - INV | DNML | DZ | IMP | STK */
 	FPE_FLTOVF,	/* 68 - OFL | IMP | STK */
 	FPE_FLTSUB,	/* 69 - INV | OFL | IMP | STK */
 	FPE_FLTUND,	/* 6A - DNML | OFL | IMP | STK */
 	FPE_FLTSUB,	/* 6B - INV | DNML | OFL | IMP | STK */
 	FPE_FLTDIV,	/* 6C - DZ | OFL | IMP | STK */
 	FPE_FLTSUB,	/* 6D - INV | DZ | OFL | IMP | STK */
 	FPE_FLTDIV,	/* 6E - DNML | DZ | OFL | IMP | STK */
 	FPE_FLTSUB,	/* 6F - INV | DNML | DZ | OFL | IMP | STK */
 	FPE_FLTUND,	/* 70 - UFL | IMP | STK */
 	FPE_FLTSUB,	/* 71 - INV | UFL | IMP | STK */
 	FPE_FLTUND,	/* 72 - DNML | UFL | IMP | STK */
 	FPE_FLTSUB,	/* 73 - INV | DNML | UFL | IMP | STK */
 	FPE_FLTDIV,	/* 74 - DZ | UFL | IMP | STK */
 	FPE_FLTSUB,	/* 75 - INV | DZ | UFL | IMP | STK */
 	FPE_FLTDIV,	/* 76 - DNML | DZ | UFL | IMP | STK */
 	FPE_FLTSUB,	/* 77 - INV | DNML | DZ | UFL | IMP | STK */
 	FPE_FLTOVF,	/* 78 - OFL | UFL | IMP | STK */
 	FPE_FLTSUB,	/* 79 - INV | OFL | UFL | IMP | STK */
 	FPE_FLTUND,	/* 7A - DNML | OFL | UFL | IMP | STK */
 	FPE_FLTSUB,	/* 7B - INV | DNML | OFL | UFL | IMP | STK */
 	FPE_FLTDIV,	/* 7C - DZ | OFL | UFL | IMP | STK */
 	FPE_FLTSUB,	/* 7D - INV | DZ | OFL | UFL | IMP | STK */
 	FPE_FLTDIV,	/* 7E - DNML | DZ | OFL | UFL | IMP | STK */
 	FPE_FLTSUB,	/* 7F - INV | DNML | DZ | OFL | UFL | IMP | STK */
 };
 
 /*
  * Read the FP status and control words, then generate si_code value
  * for SIGFPE.  The error code chosen will be one of the
  * FPE_... macros.  It will be sent as the second argument to old
  * BSD-style signal handlers and as "siginfo_t->si_code" (second
  * argument) to SA_SIGINFO signal handlers.
  *
  * Some time ago, we cleared the x87 exceptions with FNCLEX there.
  * Clearing exceptions was necessary mainly to avoid IRQ13 bugs.  The
  * usermode code which understands the FPU hardware enough to enable
  * the exceptions, can also handle clearing the exception state in the
  * handler.  The only consequence of not clearing the exception is the
  * rethrow of the SIGFPE on return from the signal handler and
  * reexecution of the corresponding instruction.
  *
  * For XMM traps, the exceptions were never cleared.
  */
 int
 npxtrap_x87(void)
 {
 	u_short control, status;
 
 	if (!hw_float) {
 		printf(
 	"npxtrap_x87: fpcurthread = %p, curthread = %p, hw_float = %d\n",
 		       PCPU_GET(fpcurthread), curthread, hw_float);
 		panic("npxtrap from nowhere");
 	}
 	critical_enter();
 
 	/*
 	 * Interrupt handling (for another interrupt) may have pushed the
 	 * state to memory.  Fetch the relevant parts of the state from
 	 * wherever they are.
 	 */
 	if (PCPU_GET(fpcurthread) != curthread) {
 		control = GET_FPU_CW(curthread);
 		status = GET_FPU_SW(curthread);
 	} else {
 		fnstcw(&control);
 		fnstsw(&status);
 	}
 	critical_exit();
 	return (fpetable[status & ((~control & 0x3f) | 0x40)]);
 }
 
 #ifdef CPU_ENABLE_SSE
 int
 npxtrap_sse(void)
 {
 	u_int mxcsr;
 
 	if (!hw_float) {
 		printf(
 	"npxtrap_sse: fpcurthread = %p, curthread = %p, hw_float = %d\n",
 		       PCPU_GET(fpcurthread), curthread, hw_float);
 		panic("npxtrap from nowhere");
 	}
 	critical_enter();
 	if (PCPU_GET(fpcurthread) != curthread)
 		mxcsr = curthread->td_pcb->pcb_save->sv_xmm.sv_env.en_mxcsr;
 	else
 		stmxcsr(&mxcsr);
 	critical_exit();
 	return (fpetable[(mxcsr & (~mxcsr >> 7)) & 0x3f]);
 }
 #endif
 
 /*
  * Implement device not available (DNA) exception
  *
  * It would be better to switch FP context here (if curthread != fpcurthread)
  * and not necessarily for every context switch, but it is too hard to
  * access foreign pcb's.
  */
 
 static int err_count = 0;
 
 int
 npxdna(void)
 {
 
 	if (!hw_float)
 		return (0);
 	critical_enter();
 	if (PCPU_GET(fpcurthread) == curthread) {
 		printf("npxdna: fpcurthread == curthread %d times\n",
 		    ++err_count);
 		stop_emulating();
 		critical_exit();
 		return (1);
 	}
 	if (PCPU_GET(fpcurthread) != NULL) {
 		printf("npxdna: fpcurthread = %p (%d), curthread = %p (%d)\n",
 		       PCPU_GET(fpcurthread),
 		       PCPU_GET(fpcurthread)->td_proc->p_pid,
 		       curthread, curthread->td_proc->p_pid);
 		panic("npxdna");
 	}
 	stop_emulating();
 	/*
 	 * Record new context early in case frstor causes a trap.
 	 */
 	PCPU_SET(fpcurthread, curthread);
 
 #ifdef CPU_ENABLE_SSE
 	if (cpu_fxsr)
 		fpu_clean_state();
 #endif
 
 	if ((curpcb->pcb_flags & PCB_NPXINITDONE) == 0) {
 		/*
 		 * This is the first time this thread has used the FPU or
 		 * the PCB doesn't contain a clean FPU state.  Explicitly
 		 * load an initial state.
 		 *
 		 * We prefer to restore the state from the actual save
 		 * area in PCB instead of directly loading from
 		 * npx_initialstate, to ignite the XSAVEOPT
 		 * tracking engine.
 		 */
 		bcopy(npx_initialstate, curpcb->pcb_save, cpu_max_ext_state_size);
 		fpurstor(curpcb->pcb_save);
 		if (curpcb->pcb_initial_npxcw != __INITIAL_NPXCW__)
 			fldcw(curpcb->pcb_initial_npxcw);
 		curpcb->pcb_flags |= PCB_NPXINITDONE;
 		if (PCB_USER_FPU(curpcb))
 			curpcb->pcb_flags |= PCB_NPXUSERINITDONE;
 	} else {
 		fpurstor(curpcb->pcb_save);
 	}
 	critical_exit();
 
 	return (1);
 }
 
 /*
  * Wrapper for fpusave() called from context switch routines.
  *
  * npxsave() must be called with interrupts disabled, so that it clears
  * fpcurthread atomically with saving the state.  We require callers to do the
  * disabling, since most callers need to disable interrupts anyway to call
  * npxsave() atomically with checking fpcurthread.
  */
 void
 npxsave(addr)
 	union savefpu *addr;
 {
 
 	stop_emulating();
 #ifdef CPU_ENABLE_SSE
 	if (use_xsaveopt)
 		xsaveopt((char *)addr, xsave_mask);
 	else
 #endif
 		fpusave(addr);
 	start_emulating();
 	PCPU_SET(fpcurthread, NULL);
 }
 
 /*
  * Unconditionally save the current co-processor state across suspend and
  * resume.
  */
 void
 npxsuspend(union savefpu *addr)
 {
 	register_t cr0;
 
 	if (!hw_float)
 		return;
 	if (PCPU_GET(fpcurthread) == NULL) {
 		bcopy(npx_initialstate, addr, cpu_max_ext_state_size);
 		return;
 	}
 	cr0 = rcr0();
 	stop_emulating();
 	fpusave(addr);
 	load_cr0(cr0);
 }
 
 void
 npxresume(union savefpu *addr)
 {
 	register_t cr0;
 
 	if (!hw_float)
 		return;
 
 	cr0 = rcr0();
 	npxinit(false);
 	stop_emulating();
 	fpurstor(addr);
 	load_cr0(cr0);
 }
 
 void
 npxdrop()
 {
 	struct thread *td;
 
 	/*
 	 * Discard pending exceptions in the !cpu_fxsr case so that unmasked
 	 * ones don't cause a panic on the next frstor.
 	 */
 #ifdef CPU_ENABLE_SSE
 	if (!cpu_fxsr)
 #endif
 		fnclex();
 
 	td = PCPU_GET(fpcurthread);
 	KASSERT(td == curthread, ("fpudrop: fpcurthread != curthread"));
 	CRITICAL_ASSERT(td);
 	PCPU_SET(fpcurthread, NULL);
 	td->td_pcb->pcb_flags &= ~PCB_NPXINITDONE;
 	start_emulating();
 }
 
 /*
  * Get the user state of the FPU into pcb->pcb_user_save without
  * dropping ownership (if possible).  It returns the FPU ownership
  * status.
  */
 int
 npxgetregs(struct thread *td)
 {
 	struct pcb *pcb;
 #ifdef CPU_ENABLE_SSE
 	uint64_t *xstate_bv, bit;
 	char *sa;
 	int max_ext_n, i;
 #endif
 	int owned;
 
 	if (!hw_float)
 		return (_MC_FPOWNED_NONE);
 
 	pcb = td->td_pcb;
 	if ((pcb->pcb_flags & PCB_NPXINITDONE) == 0) {
 		bcopy(npx_initialstate, get_pcb_user_save_pcb(pcb),
 		    cpu_max_ext_state_size);
 		SET_FPU_CW(get_pcb_user_save_pcb(pcb), pcb->pcb_initial_npxcw);
 		npxuserinited(td);
 		return (_MC_FPOWNED_PCB);
 	}
 	critical_enter();
 	if (td == PCPU_GET(fpcurthread)) {
 		fpusave(get_pcb_user_save_pcb(pcb));
 #ifdef CPU_ENABLE_SSE
 		if (!cpu_fxsr)
 #endif
 			/*
 			 * fnsave initializes the FPU and destroys whatever
 			 * context it contains.  Make sure the FPU owner
 			 * starts with a clean state next time.
 			 */
 			npxdrop();
 		owned = _MC_FPOWNED_FPU;
 	} else {
 		owned = _MC_FPOWNED_PCB;
 	}
 	critical_exit();
 #ifdef CPU_ENABLE_SSE
 	if (use_xsave) {
 		/*
 		 * Handle partially saved state.
 		 */
 		sa = (char *)get_pcb_user_save_pcb(pcb);
 		xstate_bv = (uint64_t *)(sa + sizeof(union savefpu) +
 		    offsetof(struct xstate_hdr, xstate_bv));
 		if (xsave_mask >> 32 != 0)
 			max_ext_n = fls(xsave_mask >> 32) + 32;
 		else
 			max_ext_n = fls(xsave_mask);
 		for (i = 0; i < max_ext_n; i++) {
 			bit = 1ULL << i;
 			if ((xsave_mask & bit) == 0 || (*xstate_bv & bit) != 0)
 				continue;
 			bcopy((char *)npx_initialstate +
 			    xsave_area_desc[i].offset,
 			    sa + xsave_area_desc[i].offset,
 			    xsave_area_desc[i].size);
 			*xstate_bv |= bit;
 		}
 	}
 #endif
 	return (owned);
 }
 
 void
 npxuserinited(struct thread *td)
 {
 	struct pcb *pcb;
 
 	pcb = td->td_pcb;
 	if (PCB_USER_FPU(pcb))
 		pcb->pcb_flags |= PCB_NPXINITDONE;
 	pcb->pcb_flags |= PCB_NPXUSERINITDONE;
 }
 
 #ifdef CPU_ENABLE_SSE
 int
 npxsetxstate(struct thread *td, char *xfpustate, size_t xfpustate_size)
 {
 	struct xstate_hdr *hdr, *ehdr;
 	size_t len, max_len;
 	uint64_t bv;
 
 	/* XXXKIB should we clear all extended state in xstate_bv instead ? */
 	if (xfpustate == NULL)
 		return (0);
 	if (!use_xsave)
 		return (EOPNOTSUPP);
 
 	len = xfpustate_size;
 	if (len < sizeof(struct xstate_hdr))
 		return (EINVAL);
 	max_len = cpu_max_ext_state_size - sizeof(union savefpu);
 	if (len > max_len)
 		return (EINVAL);
 
 	ehdr = (struct xstate_hdr *)xfpustate;
 	bv = ehdr->xstate_bv;
 
 	/*
 	 * Avoid #gp.
 	 */
 	if (bv & ~xsave_mask)
 		return (EINVAL);
 
 	hdr = (struct xstate_hdr *)(get_pcb_user_save_td(td) + 1);
 
 	hdr->xstate_bv = bv;
 	bcopy(xfpustate + sizeof(struct xstate_hdr),
 	    (char *)(hdr + 1), len - sizeof(struct xstate_hdr));
 
 	return (0);
 }
 #endif
 
 int
 npxsetregs(struct thread *td, union savefpu *addr, char *xfpustate,
 	size_t xfpustate_size)
 {
 	struct pcb *pcb;
 #ifdef CPU_ENABLE_SSE
 	int error;
 #endif
 
 	if (!hw_float)
 		return (ENXIO);
 
 	pcb = td->td_pcb;
 	critical_enter();
 	if (td == PCPU_GET(fpcurthread) && PCB_USER_FPU(pcb)) {
 #ifdef CPU_ENABLE_SSE
 		error = npxsetxstate(td, xfpustate, xfpustate_size);
 		if (error != 0) {
 			critical_exit();
 			return (error);
 		}
 		if (!cpu_fxsr)
 #endif
 			fnclex();	/* As in npxdrop(). */
 		bcopy(addr, get_pcb_user_save_td(td), sizeof(*addr));
 		fpurstor(get_pcb_user_save_td(td));
 		critical_exit();
 		pcb->pcb_flags |= PCB_NPXUSERINITDONE | PCB_NPXINITDONE;
 	} else {
 		critical_exit();
 #ifdef CPU_ENABLE_SSE
 		error = npxsetxstate(td, xfpustate, xfpustate_size);
 		if (error != 0)
 			return (error);
 #endif
 		bcopy(addr, get_pcb_user_save_td(td), sizeof(*addr));
 		npxuserinited(td);
 	}
 	return (0);
 }
 
 static void
 fpusave(addr)
 	union savefpu *addr;
 {
 	
 #ifdef CPU_ENABLE_SSE
 	if (use_xsave)
 		xsave((char *)addr, xsave_mask);
 	else if (cpu_fxsr)
 		fxsave(addr);
 	else
 #endif
 		fnsave(addr);
 }
 
 #ifdef CPU_ENABLE_SSE
 /*
  * On AuthenticAMD processors, the fxrstor instruction does not restore
  * the x87's stored last instruction pointer, last data pointer, and last
  * opcode values, except in the rare case in which the exception summary
  * (ES) bit in the x87 status word is set to 1.
  *
  * In order to avoid leaking this information across processes, we clean
  * these values by performing a dummy load before executing fxrstor().
  */
 static void
 fpu_clean_state(void)
 {
 	static float dummy_variable = 0.0;
 	u_short status;
 
 	/*
 	 * Clear the ES bit in the x87 status word if it is currently
 	 * set, in order to avoid causing a fault in the upcoming load.
 	 */
 	fnstsw(&status);
 	if (status & 0x80)
 		fnclex();
 
 	/*
 	 * Load the dummy variable into the x87 stack.  This mangles
 	 * the x87 stack, but we don't care since we're about to call
 	 * fxrstor() anyway.
 	 */
 	__asm __volatile("ffree %%st(7); flds %0" : : "m" (dummy_variable));
 }
 #endif /* CPU_ENABLE_SSE */
 
 static void
 fpurstor(addr)
 	union savefpu *addr;
 {
 
 #ifdef CPU_ENABLE_SSE
 	if (use_xsave)
 		xrstor((char *)addr, xsave_mask);
 	else if (cpu_fxsr)
 		fxrstor(addr);
 	else
 #endif
 		frstor(addr);
 }
 
 #ifdef DEV_ISA
 /*
  * This sucks up the legacy ISA support assignments from PNPBIOS/ACPI.
  */
 static struct isa_pnp_id npxisa_ids[] = {
 	{ 0x040cd041, "Legacy ISA coprocessor support" }, /* PNP0C04 */
 	{ 0 }
 };
 
 static int
 npxisa_probe(device_t dev)
 {
 	int result;
 	if ((result = ISA_PNP_PROBE(device_get_parent(dev), dev, npxisa_ids)) <= 0) {
 		device_quiet(dev);
 	}
 	return(result);
 }
 
 static int
 npxisa_attach(device_t dev)
 {
 	return (0);
 }
 
 static device_method_t npxisa_methods[] = {
 	/* Device interface */
 	DEVMETHOD(device_probe,		npxisa_probe),
 	DEVMETHOD(device_attach,	npxisa_attach),
 	DEVMETHOD(device_detach,	bus_generic_detach),
 	DEVMETHOD(device_shutdown,	bus_generic_shutdown),
 	DEVMETHOD(device_suspend,	bus_generic_suspend),
 	DEVMETHOD(device_resume,	bus_generic_resume),
 	
 	{ 0, 0 }
 };
 
 static driver_t npxisa_driver = {
 	"npxisa",
 	npxisa_methods,
 	1,			/* no softc */
 };
 
 static devclass_t npxisa_devclass;
 
 DRIVER_MODULE(npxisa, isa, npxisa_driver, npxisa_devclass, 0, 0);
 #ifndef PC98
 DRIVER_MODULE(npxisa, acpi, npxisa_driver, npxisa_devclass, 0, 0);
 #endif
 #endif /* DEV_ISA */
 
 static MALLOC_DEFINE(M_FPUKERN_CTX, "fpukern_ctx",
     "Kernel contexts for FPU state");
 
 #define	FPU_KERN_CTX_NPXINITDONE 0x01
 #define	FPU_KERN_CTX_DUMMY	 0x02
 
 struct fpu_kern_ctx {
 	union savefpu *prev;
 	uint32_t flags;
 	char hwstate1[];
 };
 
 struct fpu_kern_ctx *
 fpu_kern_alloc_ctx(u_int flags)
 {
 	struct fpu_kern_ctx *res;
 	size_t sz;
 
 	sz = sizeof(struct fpu_kern_ctx) + XSAVE_AREA_ALIGN +
 	    cpu_max_ext_state_size;
 	res = malloc(sz, M_FPUKERN_CTX, ((flags & FPU_KERN_NOWAIT) ?
 	    M_NOWAIT : M_WAITOK) | M_ZERO);
 	return (res);
 }
 
 void
 fpu_kern_free_ctx(struct fpu_kern_ctx *ctx)
 {
 
 	/* XXXKIB clear the memory ? */
 	free(ctx, M_FPUKERN_CTX);
 }
 
 static union savefpu *
 fpu_kern_ctx_savefpu(struct fpu_kern_ctx *ctx)
 {
 	vm_offset_t p;
 
 	p = (vm_offset_t)&ctx->hwstate1;
 	p = roundup2(p, XSAVE_AREA_ALIGN);
 	return ((union savefpu *)p);
 }
 
 int
 fpu_kern_enter(struct thread *td, struct fpu_kern_ctx *ctx, u_int flags)
 {
 	struct pcb *pcb;
 
 	if ((flags & FPU_KERN_KTHR) != 0 && is_fpu_kern_thread(0)) {
 		ctx->flags = FPU_KERN_CTX_DUMMY;
 		return (0);
 	}
 	pcb = td->td_pcb;
 	KASSERT(!PCB_USER_FPU(pcb) || pcb->pcb_save ==
 	    get_pcb_user_save_pcb(pcb), ("mangled pcb_save"));
 	ctx->flags = 0;
 	if ((pcb->pcb_flags & PCB_NPXINITDONE) != 0)
 		ctx->flags |= FPU_KERN_CTX_NPXINITDONE;
 	npxexit(td);
 	ctx->prev = pcb->pcb_save;
 	pcb->pcb_save = fpu_kern_ctx_savefpu(ctx);
 	pcb->pcb_flags |= PCB_KERNNPX;
 	pcb->pcb_flags &= ~PCB_NPXINITDONE;
 	return (0);
 }
 
 int
 fpu_kern_leave(struct thread *td, struct fpu_kern_ctx *ctx)
 {
 	struct pcb *pcb;
 
 	if (is_fpu_kern_thread(0) && (ctx->flags & FPU_KERN_CTX_DUMMY) != 0)
 		return (0);
 	pcb = td->td_pcb;
 	critical_enter();
 	if (curthread == PCPU_GET(fpcurthread))
 		npxdrop();
 	critical_exit();
 	pcb->pcb_save = ctx->prev;
 	if (pcb->pcb_save == get_pcb_user_save_pcb(pcb)) {
 		if ((pcb->pcb_flags & PCB_NPXUSERINITDONE) != 0)
 			pcb->pcb_flags |= PCB_NPXINITDONE;
 		else
 			pcb->pcb_flags &= ~PCB_NPXINITDONE;
 		pcb->pcb_flags &= ~PCB_KERNNPX;
 	} else {
 		if ((ctx->flags & FPU_KERN_CTX_NPXINITDONE) != 0)
 			pcb->pcb_flags |= PCB_NPXINITDONE;
 		else
 			pcb->pcb_flags &= ~PCB_NPXINITDONE;
 		KASSERT(!PCB_USER_FPU(pcb), ("unpaired fpu_kern_leave"));
 	}
 	return (0);
 }
 
 int
 fpu_kern_thread(u_int flags)
 {
 
 	KASSERT((curthread->td_pflags & TDP_KTHREAD) != 0,
 	    ("Only kthread may use fpu_kern_thread"));
 	KASSERT(curpcb->pcb_save == get_pcb_user_save_pcb(curpcb),
 	    ("mangled pcb_save"));
 	KASSERT(PCB_USER_FPU(curpcb), ("recursive call"));
 
 	curpcb->pcb_flags |= PCB_KERNNPX;
 	return (0);
 }
 
 int
 is_fpu_kern_thread(u_int flags)
 {
 
 	if ((curthread->td_pflags & TDP_KTHREAD) == 0)
 		return (0);
 	return ((curpcb->pcb_flags & PCB_KERNNPX) != 0);
 }
 
 /*
  * FPU save area alloc/free/init utility routines
  */
 union savefpu *
 fpu_save_area_alloc(void)
 {
 
 	return (uma_zalloc(fpu_save_area_zone, 0));
 }
 
 void
 fpu_save_area_free(union savefpu *fsa)
 {
 
 	uma_zfree(fpu_save_area_zone, fsa);
 }
 
 void
 fpu_save_area_reset(union savefpu *fsa)
 {
 
 	bcopy(npx_initialstate, fsa, cpu_max_ext_state_size);
 }
Index: head/sys/i386/pci/pci_cfgreg.c
===================================================================
--- head/sys/i386/pci/pci_cfgreg.c	(revision 282273)
+++ head/sys/i386/pci/pci_cfgreg.c	(revision 282274)
@@ -1,728 +1,718 @@
 /*-
  * Copyright (c) 1997, Stefan Esser <se@freebsd.org>
  * Copyright (c) 2000, Michael Smith <msmith@freebsd.org>
  * Copyright (c) 2000, BSDi
  * Copyright (c) 2004, Scott Long <scottl@freebsd.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice unmodified, this list of conditions, and the following
  *    disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_xbox.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/bus.h>
 #include <sys/lock.h>
 #include <sys/kernel.h>
 #include <sys/mutex.h>
 #include <sys/malloc.h>
 #include <sys/queue.h>
 #include <sys/sysctl.h>
 #include <dev/pci/pcivar.h>
 #include <dev/pci/pcireg.h>
 #include <machine/pci_cfgreg.h>
 #include <machine/pc/bios.h>
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_extern.h>
 #include <vm/pmap.h>
 #include <machine/pmap.h>
 
 #ifdef XBOX
 #include <machine/xbox.h>
 #endif
 
 #define PRVERB(a) do {							\
 	if (bootverbose)						\
 		printf a ;						\
 } while(0)
 
 #define PCIE_CACHE 8
 struct pcie_cfg_elem {
 	TAILQ_ENTRY(pcie_cfg_elem)	elem;
 	vm_offset_t	vapage;
 	vm_paddr_t	papage;
 };
 
 enum {
 	CFGMECH_NONE = 0,
 	CFGMECH_1,
 	CFGMECH_2,
 	CFGMECH_PCIE,
 };
 
 SYSCTL_DECL(_hw_pci);
 
 static TAILQ_HEAD(pcie_cfg_list, pcie_cfg_elem) pcie_list[MAXCPU];
 static uint64_t pcie_base;
 static int pcie_minbus, pcie_maxbus;
 static uint32_t pcie_badslots;
 static int cfgmech;
 static int devmax;
 static struct mtx pcicfg_mtx;
 static int mcfg_enable = 1;
 SYSCTL_INT(_hw_pci, OID_AUTO, mcfg, CTLFLAG_RDTUN, &mcfg_enable, 0,
     "Enable support for PCI-e memory mapped config access");
 
 static uint32_t	pci_docfgregread(int bus, int slot, int func, int reg,
 		    int bytes);
 static int	pcireg_cfgread(int bus, int slot, int func, int reg, int bytes);
 static void	pcireg_cfgwrite(int bus, int slot, int func, int reg, int data, int bytes);
-#ifndef XEN
 static int	pcireg_cfgopen(void);
-#endif
 static int	pciereg_cfgread(int bus, unsigned slot, unsigned func,
 		    unsigned reg, unsigned bytes);
 static void	pciereg_cfgwrite(int bus, unsigned slot, unsigned func,
 		    unsigned reg, int data, unsigned bytes);
 
 /*
  * Some BIOS writers seem to want to ignore the spec and put
  * 0 in the intline rather than 255 to indicate none.  Some use
  * numbers in the range 128-254 to indicate something strange and
  * apparently undocumented anywhere.  Assume these are completely bogus
  * and map them to 255, which means "none".
  */
 static __inline int 
 pci_i386_map_intline(int line)
 {
 	if (line == 0 || line >= 128)
 		return (PCI_INVALID_IRQ);
 	return (line);
 }
 
-#ifndef XEN
 static u_int16_t
 pcibios_get_version(void)
 {
 	struct bios_regs args;
 
 	if (PCIbios.ventry == 0) {
 		PRVERB(("pcibios: No call entry point\n"));
 		return (0);
 	}
 	args.eax = PCIBIOS_BIOS_PRESENT;
 	if (bios32(&args, PCIbios.ventry, GSEL(GCODE_SEL, SEL_KPL))) {
 		PRVERB(("pcibios: BIOS_PRESENT call failed\n"));
 		return (0);
 	}
 	if (args.edx != 0x20494350) {
 		PRVERB(("pcibios: BIOS_PRESENT didn't return 'PCI ' in edx\n"));
 		return (0);
 	}
 	return (args.ebx & 0xffff);
 }
-#endif
 
 /* 
  * Initialise access to PCI configuration space 
  */
 int
 pci_cfgregopen(void)
 {
-#ifdef XEN
-	return (0);
-#else
 	static int		opened = 0;
 	uint64_t		pciebar;
 	u_int16_t		vid, did;
 	u_int16_t		v;
 
 	if (opened)
 		return (1);
 
 	if (cfgmech == CFGMECH_NONE && pcireg_cfgopen() == 0)
 		return (0);
 
 	v = pcibios_get_version();
 	if (v > 0)
 		PRVERB(("pcibios: BIOS version %x.%02x\n", (v & 0xff00) >> 8,
 		    v & 0xff));
 	mtx_init(&pcicfg_mtx, "pcicfg", NULL, MTX_SPIN);
 	opened = 1;
 
 	/* $PIR requires PCI BIOS 2.10 or greater. */
 	if (v >= 0x0210)
 		pci_pir_open();
 
 	if (cfgmech == CFGMECH_PCIE)
 		return (1);	
 
 	/*
 	 * Grope around in the PCI config space to see if this is a
 	 * chipset that is capable of doing memory-mapped config cycles.
 	 * This also implies that it can do PCIe extended config cycles.
 	 */
 
 	/* Check for supported chipsets */
 	vid = pci_cfgregread(0, 0, 0, PCIR_VENDOR, 2);
 	did = pci_cfgregread(0, 0, 0, PCIR_DEVICE, 2);
 	switch (vid) {
 	case 0x8086:
 		switch (did) {
 		case 0x3590:
 		case 0x3592:
 			/* Intel 7520 or 7320 */
 			pciebar = pci_cfgregread(0, 0, 0, 0xce, 2) << 16;
 			pcie_cfgregopen(pciebar, 0, 255);
 			break;
 		case 0x2580:
 		case 0x2584:
 		case 0x2590:
 			/* Intel 915, 925, or 915GM */
 			pciebar = pci_cfgregread(0, 0, 0, 0x48, 4);
 			pcie_cfgregopen(pciebar, 0, 255);
 			break;
 		}
 	}
 
 	return(1);
-#endif
 }
 
 static uint32_t
 pci_docfgregread(int bus, int slot, int func, int reg, int bytes)
 {
 
 	if (cfgmech == CFGMECH_PCIE &&
 	    (bus >= pcie_minbus && bus <= pcie_maxbus) &&
 	    (bus != 0 || !(1 << slot & pcie_badslots)))
 		return (pciereg_cfgread(bus, slot, func, reg, bytes));
 	else
 		return (pcireg_cfgread(bus, slot, func, reg, bytes));
 }
 
 /* 
  * Read configuration space register
  */
 u_int32_t
 pci_cfgregread(int bus, int slot, int func, int reg, int bytes)
 {
 	uint32_t line;
 
 	/*
 	 * Some BIOS writers seem to want to ignore the spec and put
 	 * 0 in the intline rather than 255 to indicate none.  The rest of
 	 * the code uses 255 as an invalid IRQ.
 	 */
 	if (reg == PCIR_INTLINE && bytes == 1) {
 		line = pci_docfgregread(bus, slot, func, PCIR_INTLINE, 1);
 		return (pci_i386_map_intline(line));
 	}
 	return (pci_docfgregread(bus, slot, func, reg, bytes));
 }
 
 /* 
  * Write configuration space register 
  */
 void
 pci_cfgregwrite(int bus, int slot, int func, int reg, u_int32_t data, int bytes)
 {
 
 	if (cfgmech == CFGMECH_PCIE &&
 	    (bus >= pcie_minbus && bus <= pcie_maxbus) &&
 	    (bus != 0 || !(1 << slot & pcie_badslots)))
 		pciereg_cfgwrite(bus, slot, func, reg, data, bytes);
 	else
 		pcireg_cfgwrite(bus, slot, func, reg, data, bytes);
 }
 
 /* 
  * Configuration space access using direct register operations
  */
 
 /* enable configuration space accesses and return data port address */
 static int
 pci_cfgenable(unsigned bus, unsigned slot, unsigned func, int reg, int bytes)
 {
 	int dataport = 0;
 
 #ifdef XBOX
 	if (arch_i386_is_xbox) {
 		/*
 		 * The Xbox MCPX chipset is a derivative of the nForce 1
 		 * chipset. It almost has the same bus layout; some devices
 		 * cannot be used, because they have been removed.
 		 */
 
 		/*
 		 * Devices 00:00.1 and 00:00.2 used to be memory controllers on
 		 * the nForce chipset, but on the Xbox, using them will lockup
 		 * the chipset.
 		 */
 		if (bus == 0 && slot == 0 && (func == 1 || func == 2))
 			return dataport;
 		
 		/*
 		 * Bus 1 only contains a VGA controller at 01:00.0. When you try
 		 * to probe beyond that device, you only get garbage, which
 		 * could cause lockups.
 		 */
 		if (bus == 1 && (slot != 0 || func != 0))
 			return dataport;
 		
 		/*
 		 * Bus 2 used to contain the AGP controller, but the Xbox MCPX
 		 * doesn't have one. Probing it can cause lockups.
 		 */
 		if (bus >= 2)
 			return dataport;
 	}
 #endif
 
 	if (bus <= PCI_BUSMAX
 	    && slot < devmax
 	    && func <= PCI_FUNCMAX
 	    && (unsigned)reg <= PCI_REGMAX
 	    && bytes != 3
 	    && (unsigned)bytes <= 4
 	    && (reg & (bytes - 1)) == 0) {
 		switch (cfgmech) {
 		case CFGMECH_PCIE:
 		case CFGMECH_1:
 			outl(CONF1_ADDR_PORT, (1U << 31)
 			    | (bus << 16) | (slot << 11) 
 			    | (func << 8) | (reg & ~0x03));
 			dataport = CONF1_DATA_PORT + (reg & 0x03);
 			break;
 		case CFGMECH_2:
 			outb(CONF2_ENABLE_PORT, 0xf0 | (func << 1));
 			outb(CONF2_FORWARD_PORT, bus);
 			dataport = 0xc000 | (slot << 8) | reg;
 			break;
 		}
 	}
 	return (dataport);
 }
 
 /* disable configuration space accesses */
 static void
 pci_cfgdisable(void)
 {
 	switch (cfgmech) {
 	case CFGMECH_PCIE:
 	case CFGMECH_1:
 		/*
 		 * Do nothing for the config mechanism 1 case.
 		 * Writing a 0 to the address port can apparently
 		 * confuse some bridges and cause spurious
 		 * access failures.
 		 */
 		break;
 	case CFGMECH_2:
 		outb(CONF2_ENABLE_PORT, 0);
 		break;
 	}
 }
 
 static int
 pcireg_cfgread(int bus, int slot, int func, int reg, int bytes)
 {
 	int data = -1;
 	int port;
 
 	mtx_lock_spin(&pcicfg_mtx);
 	port = pci_cfgenable(bus, slot, func, reg, bytes);
 	if (port != 0) {
 		switch (bytes) {
 		case 1:
 			data = inb(port);
 			break;
 		case 2:
 			data = inw(port);
 			break;
 		case 4:
 			data = inl(port);
 			break;
 		}
 		pci_cfgdisable();
 	}
 	mtx_unlock_spin(&pcicfg_mtx);
 	return (data);
 }
 
 static void
 pcireg_cfgwrite(int bus, int slot, int func, int reg, int data, int bytes)
 {
 	int port;
 
 	mtx_lock_spin(&pcicfg_mtx);
 	port = pci_cfgenable(bus, slot, func, reg, bytes);
 	if (port != 0) {
 		switch (bytes) {
 		case 1:
 			outb(port, data);
 			break;
 		case 2:
 			outw(port, data);
 			break;
 		case 4:
 			outl(port, data);
 			break;
 		}
 		pci_cfgdisable();
 	}
 	mtx_unlock_spin(&pcicfg_mtx);
 }
 
-#ifndef XEN
 /* check whether the configuration mechanism has been correctly identified */
 static int
 pci_cfgcheck(int maxdev)
 {
 	uint32_t id, class;
 	uint8_t header;
 	uint8_t device;
 	int port;
 
 	if (bootverbose) 
 		printf("pci_cfgcheck:\tdevice ");
 
 	for (device = 0; device < maxdev; device++) {
 		if (bootverbose) 
 			printf("%d ", device);
 
 		port = pci_cfgenable(0, device, 0, 0, 4);
 		id = inl(port);
 		if (id == 0 || id == 0xffffffff)
 			continue;
 
 		port = pci_cfgenable(0, device, 0, 8, 4);
 		class = inl(port) >> 8;
 		if (bootverbose)
 			printf("[class=%06x] ", class);
 		if (class == 0 || (class & 0xf870ff) != 0)
 			continue;
 
 		port = pci_cfgenable(0, device, 0, 14, 1);
 		header = inb(port);
 		if (bootverbose)
 			printf("[hdr=%02x] ", header);
 		if ((header & 0x7e) != 0)
 			continue;
 
 		if (bootverbose)
 			printf("is there (id=%08x)\n", id);
 
 		pci_cfgdisable();
 		return (1);
 	}
 	if (bootverbose) 
 		printf("-- nothing found\n");
 
 	pci_cfgdisable();
 	return (0);
 }
 
 static int
 pcireg_cfgopen(void)
 {
 	uint32_t mode1res, oldval1;
 	uint8_t mode2res, oldval2;
 
 	/* Check for type #1 first. */
 	oldval1 = inl(CONF1_ADDR_PORT);
 
 	if (bootverbose) {
 		printf("pci_open(1):\tmode 1 addr port (0x0cf8) is 0x%08x\n",
 		    oldval1);
 	}
 
 	cfgmech = CFGMECH_1;
 	devmax = 32;
 
 	outl(CONF1_ADDR_PORT, CONF1_ENABLE_CHK);
 	DELAY(1);
 	mode1res = inl(CONF1_ADDR_PORT);
 	outl(CONF1_ADDR_PORT, oldval1);
 
 	if (bootverbose)
 		printf("pci_open(1a):\tmode1res=0x%08x (0x%08lx)\n",  mode1res,
 		    CONF1_ENABLE_CHK);
 
 	if (mode1res) {
 		if (pci_cfgcheck(32)) 
 			return (cfgmech);
 	}
 
 	outl(CONF1_ADDR_PORT, CONF1_ENABLE_CHK1);
 	mode1res = inl(CONF1_ADDR_PORT);
 	outl(CONF1_ADDR_PORT, oldval1);
 
 	if (bootverbose)
 		printf("pci_open(1b):\tmode1res=0x%08x (0x%08lx)\n",  mode1res,
 		    CONF1_ENABLE_CHK1);
 
 	if ((mode1res & CONF1_ENABLE_MSK1) == CONF1_ENABLE_RES1) {
 		if (pci_cfgcheck(32)) 
 			return (cfgmech);
 	}
 
 	/* Type #1 didn't work, so try type #2. */
 	oldval2 = inb(CONF2_ENABLE_PORT);
 
 	if (bootverbose) {
 		printf("pci_open(2):\tmode 2 enable port (0x0cf8) is 0x%02x\n",
 		    oldval2);
 	}
 
 	if ((oldval2 & 0xf0) == 0) {
 
 		cfgmech = CFGMECH_2;
 		devmax = 16;
 
 		outb(CONF2_ENABLE_PORT, CONF2_ENABLE_CHK);
 		mode2res = inb(CONF2_ENABLE_PORT);
 		outb(CONF2_ENABLE_PORT, oldval2);
 
 		if (bootverbose)
 			printf("pci_open(2a):\tmode2res=0x%02x (0x%02x)\n", 
 			    mode2res, CONF2_ENABLE_CHK);
 
 		if (mode2res == CONF2_ENABLE_RES) {
 			if (bootverbose)
 				printf("pci_open(2a):\tnow trying mechanism 2\n");
 
 			if (pci_cfgcheck(16)) 
 				return (cfgmech);
 		}
 	}
 
 	/* Nothing worked, so punt. */
 	cfgmech = CFGMECH_NONE;
 	devmax = 0;
 	return (cfgmech);
 }
 
 int
 pcie_cfgregopen(uint64_t base, uint8_t minbus, uint8_t maxbus)
 {
 	struct pcie_cfg_list *pcielist;
 	struct pcie_cfg_elem *pcie_array, *elem;
 #ifdef SMP
 	struct pcpu *pc;
 #endif
 	vm_offset_t va;
 	uint32_t val1, val2;
 	int i, slot;
 
 	if (!mcfg_enable)
 		return (0);
 
 	if (minbus != 0)
 		return (0);
 
 #ifndef PAE
 	if (base >= 0x100000000) {
 		if (bootverbose)
 			printf(
 	    "PCI: Memory Mapped PCI configuration area base 0x%jx too high\n",
 			    (uintmax_t)base);
 		return (0);
 	}
 #endif
 		
 	if (bootverbose)
 		printf("PCIe: Memory Mapped configuration base @ 0x%jx\n",
 		    (uintmax_t)base);
 
 #ifdef SMP
 	STAILQ_FOREACH(pc, &cpuhead, pc_allcpu)
 #endif
 	{
 
 		pcie_array = malloc(sizeof(struct pcie_cfg_elem) * PCIE_CACHE,
 		    M_DEVBUF, M_NOWAIT);
 		if (pcie_array == NULL)
 			return (0);
 
 		va = kva_alloc(PCIE_CACHE * PAGE_SIZE);
 		if (va == 0) {
 			free(pcie_array, M_DEVBUF);
 			return (0);
 		}
 
 #ifdef SMP
 		pcielist = &pcie_list[pc->pc_cpuid];
 #else
 		pcielist = &pcie_list[0];
 #endif
 		TAILQ_INIT(pcielist);
 		for (i = 0; i < PCIE_CACHE; i++) {
 			elem = &pcie_array[i];
 			elem->vapage = va + (i * PAGE_SIZE);
 			elem->papage = 0;
 			TAILQ_INSERT_HEAD(pcielist, elem, elem);
 		}
 	}
 
 	pcie_base = base;
 	pcie_minbus = minbus;
 	pcie_maxbus = maxbus;
 	cfgmech = CFGMECH_PCIE;
 	devmax = 32;
 
 	/*
 	 * On some AMD systems, some of the devices on bus 0 are
 	 * inaccessible using memory-mapped PCI config access.  Walk
 	 * bus 0 looking for such devices.  For these devices, we will
 	 * fall back to using type 1 config access instead.
 	 */
 	if (pci_cfgregopen() != 0) {
 		for (slot = 0; slot <= PCI_SLOTMAX; slot++) {
 			val1 = pcireg_cfgread(0, slot, 0, 0, 4);
 			if (val1 == 0xffffffff)
 				continue;
 
 			val2 = pciereg_cfgread(0, slot, 0, 0, 4);
 			if (val2 != val1)
 				pcie_badslots |= (1 << slot);
 		}
 	}
 
 	return (1);
 }
-#endif /* !XEN */
 
 #define PCIE_PADDR(base, reg, bus, slot, func)	\
 	((base)				+	\
 	((((bus) & 0xff) << 20)		|	\
 	(((slot) & 0x1f) << 15)		|	\
 	(((func) & 0x7) << 12)		|	\
 	((reg) & 0xfff)))
 
 static __inline vm_offset_t
 pciereg_findaddr(int bus, unsigned slot, unsigned func, unsigned reg)
 {
 	struct pcie_cfg_list *pcielist;
 	struct pcie_cfg_elem *elem;
 	vm_paddr_t pa, papage;
 
 	pa = PCIE_PADDR(pcie_base, reg, bus, slot, func);
 	papage = pa & ~PAGE_MASK;
 
 	/*
 	 * Find an element in the cache that matches the physical page desired,
 	 * or create a new mapping from the least recently used element.
 	 * A very simple LRU algorithm is used here, does it need to be more
 	 * efficient?
 	 */
 	pcielist = &pcie_list[PCPU_GET(cpuid)];
 	TAILQ_FOREACH(elem, pcielist, elem) {
 		if (elem->papage == papage)
 			break;
 	}
 
 	if (elem == NULL) {
 		elem = TAILQ_LAST(pcielist, pcie_cfg_list);
 		if (elem->papage != 0) {
 			pmap_kremove(elem->vapage);
 			invlpg(elem->vapage);
 		}
 		pmap_kenter(elem->vapage, papage);
 		elem->papage = papage;
 	}
 
 	if (elem != TAILQ_FIRST(pcielist)) {
 		TAILQ_REMOVE(pcielist, elem, elem);
 		TAILQ_INSERT_HEAD(pcielist, elem, elem);
 	}
 	return (elem->vapage | (pa & PAGE_MASK));
 }
 
 /*
  * AMD BIOS And Kernel Developer's Guides for CPU families starting with 10h
  * have a requirement that all accesses to the memory mapped PCI configuration
  * space are done using AX class of registers.
  * Since other vendors do not currently have any contradicting requirements
  * the AMD access pattern is applied universally.
  */
 
 static int
 pciereg_cfgread(int bus, unsigned slot, unsigned func, unsigned reg,
     unsigned bytes)
 {
 	vm_offset_t va;
 	int data = -1;
 
 	if (bus < pcie_minbus || bus > pcie_maxbus || slot > PCI_SLOTMAX ||
 	    func > PCI_FUNCMAX || reg > PCIE_REGMAX)
 		return (-1);
 
 	critical_enter();
 	va = pciereg_findaddr(bus, slot, func, reg);
 
 	switch (bytes) {
 	case 4:
 		__asm("movl %1, %0" : "=a" (data)
 		    : "m" (*(volatile uint32_t *)va));
 		break;
 	case 2:
 		__asm("movzwl %1, %0" : "=a" (data)
 		    : "m" (*(volatile uint16_t *)va));
 		break;
 	case 1:
 		__asm("movzbl %1, %0" : "=a" (data)
 		    : "m" (*(volatile uint8_t *)va));
 		break;
 	}
 
 	critical_exit();
 	return (data);
 }
 
 static void
 pciereg_cfgwrite(int bus, unsigned slot, unsigned func, unsigned reg, int data,
     unsigned bytes)
 {
 	vm_offset_t va;
 
 	if (bus < pcie_minbus || bus > pcie_maxbus || slot > PCI_SLOTMAX ||
 	    func > PCI_FUNCMAX || reg > PCIE_REGMAX)
 		return;
 
 	critical_enter();
 	va = pciereg_findaddr(bus, slot, func, reg);
 
 	switch (bytes) {
 	case 4:
 		__asm("movl %1, %0" : "=m" (*(volatile uint32_t *)va)
 		    : "a" (data));
 		break;
 	case 2:
 		__asm("movw %1, %0" : "=m" (*(volatile uint16_t *)va)
 		    : "a" ((uint16_t)data));
 		break;
 	case 1:
 		__asm("movb %1, %0" : "=m" (*(volatile uint8_t *)va)
 		    : "a" ((uint8_t)data));
 		break;
 	}
 
 	critical_exit();
 }
Index: head/sys/i386/pci/pci_pir.c
===================================================================
--- head/sys/i386/pci/pci_pir.c	(revision 282273)
+++ head/sys/i386/pci/pci_pir.c	(revision 282274)
@@ -1,754 +1,747 @@
 /*-
  * Copyright (c) 1997, Stefan Esser <se@freebsd.org>
  * Copyright (c) 2000, Michael Smith <msmith@freebsd.org>
  * Copyright (c) 2000, BSDi
  * Copyright (c) 2004, John Baldwin <jhb@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice unmodified, this list of conditions, and the following
  *    disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/bus.h>
 #include <sys/kernel.h>
 #include <sys/malloc.h>
 #include <sys/module.h>
 #include <sys/sysctl.h>
 #include <vm/vm.h>
 #include <vm/pmap.h>
 #include <vm/vm_param.h>
 #include <machine/md_var.h>
 #include <dev/pci/pcivar.h>
 #include <dev/pci/pcireg.h>
 #include <machine/pci_cfgreg.h>
 #include <machine/segments.h>
 #include <machine/pc/bios.h>
 
 #define	NUM_ISA_INTERRUPTS	16
 
 /*
  * A link device.  Loosely based on the ACPI PCI link device.  This doesn't
  * try to support priorities for different ISA interrupts.
  */
 struct pci_link {
 	TAILQ_ENTRY(pci_link) pl_links;
 	uint8_t		pl_id;
 	uint8_t		pl_irq;
 	uint16_t	pl_irqmask;
 	int		pl_references;
 	int		pl_routed;
 };
 
 struct pci_link_lookup {
 	struct pci_link	**pci_link_ptr;
 	int		bus;
 	int		device;
 	int		pin;
 };
 
 struct pci_dev_lookup {
 	uint8_t		link;
 	int		bus;
 	int		device;
 	int		pin;
 };
 
 typedef void pir_entry_handler(struct PIR_entry *entry,
     struct PIR_intpin* intpin, void *arg);
 
 static void	pci_print_irqmask(u_int16_t irqs);
 static int	pci_pir_biosroute(int bus, int device, int func, int pin,
 		    int irq);
 static int	pci_pir_choose_irq(struct pci_link *pci_link, int irqmask);
 static void	pci_pir_create_links(struct PIR_entry *entry,
 		    struct PIR_intpin *intpin, void *arg);
 static void	pci_pir_dump_links(void);
 static struct pci_link *pci_pir_find_link(uint8_t link_id);
 static void	pci_pir_find_link_handler(struct PIR_entry *entry,
 		    struct PIR_intpin *intpin, void *arg);
 static void	pci_pir_initial_irqs(struct PIR_entry *entry,
 		    struct PIR_intpin *intpin, void *arg);
 static void	pci_pir_parse(void);
 static uint8_t	pci_pir_search_irq(int bus, int device, int pin);
 static int	pci_pir_valid_irq(struct pci_link *pci_link, int irq);
 static void	pci_pir_walk_table(pir_entry_handler *handler, void *arg);
 
 static MALLOC_DEFINE(M_PIR, "$PIR", "$PIR structures");
 
 static struct PIR_table *pci_route_table;
 static device_t pir_device;
 static int pci_route_count, pir_bios_irqs, pir_parsed;
 static TAILQ_HEAD(, pci_link) pci_links;
 static int pir_interrupt_weight[NUM_ISA_INTERRUPTS];
 
 /* sysctl vars */
 SYSCTL_DECL(_hw_pci);
 
 /* XXX this likely should live in a header file */
 #ifdef PC98
 /* IRQs 3, 5, 7, 9, 10, 11, 12, 13 */
 #define PCI_IRQ_OVERRIDE_MASK 0x3e68
 #else
 /* IRQs 3, 4, 5, 6, 7, 9, 10, 11, 12, 14, 15 */
 #define PCI_IRQ_OVERRIDE_MASK 0xdef8
 #endif
 
 static uint32_t pci_irq_override_mask = PCI_IRQ_OVERRIDE_MASK;
 SYSCTL_INT(_hw_pci, OID_AUTO, irq_override_mask, CTLFLAG_RDTUN,
     &pci_irq_override_mask, PCI_IRQ_OVERRIDE_MASK,
     "Mask of allowed irqs to try to route when it has no good clue about\n"
     "which irqs it should use.");
 
 /*
  * Look for the interrupt routing table.
  *
  * We use PCI BIOS's PIR table if it's available. $PIR is the standard way
  * to do this.  Sadly, some machines are not standards conforming and have
  * _PIR instead.  We shrug and cope by looking for both.
  */
 void
 pci_pir_open(void)
 {
 	struct PIR_table *pt;
 	uint32_t sigaddr;
 	int i;
 	uint8_t ck, *cv;
 
-#ifdef XEN
-	return;
-#else	
 	/* Don't try if we've already found a table. */
 	if (pci_route_table != NULL)
 		return;
 
 	/* Look for $PIR and then _PIR. */
 	sigaddr = bios_sigsearch(0, "$PIR", 4, 16, 0);
 	if (sigaddr == 0)
 		sigaddr = bios_sigsearch(0, "_PIR", 4, 16, 0);
 	if (sigaddr == 0)
 		return;
-#endif
+
 	/* If we found something, check the checksum and length. */
 	/* XXX - Use pmap_mapdev()? */
 	pt = (struct PIR_table *)(uintptr_t)BIOS_PADDRTOVADDR(sigaddr);
 	if (pt->pt_header.ph_length <= sizeof(struct PIR_header))
 		return;
 	for (cv = (u_int8_t *)pt, ck = 0, i = 0;
 	     i < (pt->pt_header.ph_length); i++)
 		ck += cv[i];
 	if (ck != 0)
 		return;
 
 	/* Ok, we've got a valid table. */
 	pci_route_table = pt;
 	pci_route_count = (pt->pt_header.ph_length -
 	    sizeof(struct PIR_header)) / 
 	    sizeof(struct PIR_entry);
 }
 
 /*
  * Find the pci_link structure for a given link ID.
  */
 static struct pci_link *
 pci_pir_find_link(uint8_t link_id)
 {
 	struct pci_link *pci_link;
 
 	TAILQ_FOREACH(pci_link, &pci_links, pl_links) {
 		if (pci_link->pl_id == link_id)
 			return (pci_link);
 	}
 	return (NULL);
 }
 
 /*
  * Find the link device associated with a PCI device in the table.
  */
 static void
 pci_pir_find_link_handler(struct PIR_entry *entry, struct PIR_intpin *intpin,
     void *arg)
 {
 	struct pci_link_lookup *lookup;
 
 	lookup = (struct pci_link_lookup *)arg;
 	if (entry->pe_bus == lookup->bus &&
 	    entry->pe_device == lookup->device &&
 	    intpin - entry->pe_intpin == lookup->pin)
 		*lookup->pci_link_ptr = pci_pir_find_link(intpin->link);
 }
 
 /*
  * Check to see if a possible IRQ setting is valid.
  */
 static int
 pci_pir_valid_irq(struct pci_link *pci_link, int irq)
 {
 
 	if (!PCI_INTERRUPT_VALID(irq))
 		return (0);
 	return (pci_link->pl_irqmask & (1 << irq));
 }
 
 /*
  * Walk the $PIR executing the worker function for each valid intpin entry
  * in the table.  The handler is passed a pointer to both the entry and
  * the intpin in the entry.
  */
 static void
 pci_pir_walk_table(pir_entry_handler *handler, void *arg)
 {
 	struct PIR_entry *entry;
 	struct PIR_intpin *intpin;
 	int i, pin;
 
 	entry = &pci_route_table->pt_entry[0];
 	for (i = 0; i < pci_route_count; i++, entry++) {
 		intpin = &entry->pe_intpin[0];
 		for (pin = 0; pin < 4; pin++, intpin++)
 			if (intpin->link != 0)
 				handler(entry, intpin, arg);
 	}
 }
 
 static void
 pci_pir_create_links(struct PIR_entry *entry, struct PIR_intpin *intpin,
     void *arg)
 {
 	struct pci_link *pci_link;
 
 	pci_link = pci_pir_find_link(intpin->link);
 	if (pci_link != NULL) {
 		pci_link->pl_references++;
 		if (intpin->irqs != pci_link->pl_irqmask) {
 			if (bootverbose)
 				printf(
 	"$PIR: Entry %d.%d.INT%c has different mask for link %#x, merging\n",
 				    entry->pe_bus, entry->pe_device,
 				    (intpin - entry->pe_intpin) + 'A',
 				    pci_link->pl_id);
 			pci_link->pl_irqmask &= intpin->irqs;
 		}
 	} else {
 		pci_link = malloc(sizeof(struct pci_link), M_PIR, M_WAITOK);
 		pci_link->pl_id = intpin->link;
 		pci_link->pl_irqmask = intpin->irqs;
 		pci_link->pl_irq = PCI_INVALID_IRQ;
 		pci_link->pl_references = 1;
 		pci_link->pl_routed = 0;
 		TAILQ_INSERT_TAIL(&pci_links, pci_link, pl_links);
 	}
 }
 
 /*
  * Look to see if any of the function on the PCI device at bus/device have
  * an interrupt routed to intpin 'pin' by the BIOS.
  */
 static uint8_t
 pci_pir_search_irq(int bus, int device, int pin)
 {
 	uint32_t value;
 	uint8_t func, maxfunc;
 
 	/* See if we have a valid device at function 0. */
 	value = pci_cfgregread(bus, device, 0, PCIR_HDRTYPE, 1);
 	if ((value & PCIM_HDRTYPE) > PCI_MAXHDRTYPE)
 		return (PCI_INVALID_IRQ);
 	if (value & PCIM_MFDEV)
 		maxfunc = PCI_FUNCMAX;
 	else
 		maxfunc = 0;
 
 	/* Scan all possible functions at this device. */
 	for (func = 0; func <= maxfunc; func++) {
 		value = pci_cfgregread(bus, device, func, PCIR_DEVVENDOR, 4);
 		if (value == 0xffffffff)
 			continue;
 		value = pci_cfgregread(bus, device, func, PCIR_INTPIN, 1);
 
 		/*
 		 * See if it uses the pin in question.  Note that the passed
 		 * in pin uses 0 for A, .. 3 for D whereas the intpin
 		 * register uses 0 for no interrupt, 1 for A, .. 4 for D.
 		 */
 		if (value != pin + 1)
 			continue;
 		value = pci_cfgregread(bus, device, func, PCIR_INTLINE, 1);
 		if (bootverbose)
 			printf(
 		"$PIR: Found matching pin for %d.%d.INT%c at func %d: %d\n",
 			    bus, device, pin + 'A', func, value);
 		if (value != PCI_INVALID_IRQ)
 			return (value);
 	}
 	return (PCI_INVALID_IRQ);
 }
 
 /*
  * Try to initialize IRQ based on this device's IRQ.
  */
 static void
 pci_pir_initial_irqs(struct PIR_entry *entry, struct PIR_intpin *intpin,
     void *arg)
 {
 	struct pci_link *pci_link;
 	uint8_t irq, pin;
 
 	pin = intpin - entry->pe_intpin;
 	pci_link = pci_pir_find_link(intpin->link);
 	irq = pci_pir_search_irq(entry->pe_bus, entry->pe_device, pin);
 	if (irq == PCI_INVALID_IRQ || irq == pci_link->pl_irq)
 		return;
 
 	/* Don't trust any BIOS IRQs greater than 15. */
 	if (irq >= NUM_ISA_INTERRUPTS) {
 		printf(
 	"$PIR: Ignoring invalid BIOS IRQ %d from %d.%d.INT%c for link %#x\n",
 		    irq, entry->pe_bus, entry->pe_device, pin + 'A',
 		    pci_link->pl_id);
 		return;
 	}
 
 	/*
 	 * If we don't have an IRQ for this link yet, then we trust the
 	 * BIOS, even if it seems invalid from the $PIR entries.
 	 */
 	if (pci_link->pl_irq == PCI_INVALID_IRQ) {
 		if (!pci_pir_valid_irq(pci_link, irq))
 			printf(
 	"$PIR: Using invalid BIOS IRQ %d from %d.%d.INT%c for link %#x\n",
 			    irq, entry->pe_bus, entry->pe_device, pin + 'A',
 			    pci_link->pl_id);
 		pci_link->pl_irq = irq;
 		pci_link->pl_routed = 1;
 		return;
 	}
 
 	/*
 	 * We have an IRQ and it doesn't match the current IRQ for this
 	 * link.  If the new IRQ is invalid, then warn about it and ignore
 	 * it.  If the old IRQ is invalid and the new IRQ is valid, then
 	 * prefer the new IRQ instead.  If both IRQs are valid, then just
 	 * use the first one.  Note that if we ever get into this situation
 	 * we are having to guess which setting the BIOS actually routed.
 	 * Perhaps we should just give up instead.
 	 */
 	if (!pci_pir_valid_irq(pci_link, irq)) {
 		printf(
 		"$PIR: BIOS IRQ %d for %d.%d.INT%c is not valid for link %#x\n",
 		    irq, entry->pe_bus, entry->pe_device, pin + 'A',
 		    pci_link->pl_id);
 	} else if (!pci_pir_valid_irq(pci_link, pci_link->pl_irq)) {
 		printf(
 "$PIR: Preferring valid BIOS IRQ %d from %d.%d.INT%c for link %#x to IRQ %d\n", 
 		    irq, entry->pe_bus, entry->pe_device, pin + 'A',
 		    pci_link->pl_id, pci_link->pl_irq);
 		pci_link->pl_irq = irq;
 		pci_link->pl_routed = 1;
 	} else
 		printf(
 	"$PIR: BIOS IRQ %d for %d.%d.INT%c does not match link %#x irq %d\n",
 		    irq, entry->pe_bus, entry->pe_device, pin + 'A',
 		    pci_link->pl_id, pci_link->pl_irq);
 }
 
 /*
  * Parse $PIR to enumerate link devices and attempt to determine their
  * initial state.  This could perhaps be cleaner if we had drivers for the
  * various interrupt routers as they could read the initial IRQ for each
  * link.
  */
 static void
 pci_pir_parse(void)
 {
 	char tunable_buffer[64];
 	struct pci_link *pci_link;
 	int i, irq;
 
 	/* Only parse once. */
 	if (pir_parsed)
 		return;
 	pir_parsed = 1;
 
 	/* Enumerate link devices. */
 	TAILQ_INIT(&pci_links);
 	pci_pir_walk_table(pci_pir_create_links, NULL);
 	if (bootverbose) {
 		printf("$PIR: Links after initial probe:\n");
 		pci_pir_dump_links();
 	}
 
 	/*
 	 * Check to see if the BIOS has already routed any of the links by
 	 * checking each device connected to each link to see if it has a
 	 * valid IRQ.
 	 */
 	pci_pir_walk_table(pci_pir_initial_irqs, NULL);
 	if (bootverbose) {
 		printf("$PIR: Links after initial IRQ discovery:\n");
 		pci_pir_dump_links();
 	}
 
 	/*
 	 * Allow the user to override the IRQ for a given link device.  We
 	 * allow invalid IRQs to be specified but warn about them.  An IRQ
 	 * of 255 or 0 clears any preset IRQ.
 	 */
 	i = 0;
 	TAILQ_FOREACH(pci_link, &pci_links, pl_links) {
 		snprintf(tunable_buffer, sizeof(tunable_buffer),
 		    "hw.pci.link.%#x.irq", pci_link->pl_id);
 		if (getenv_int(tunable_buffer, &irq) == 0)
 			continue;
 		if (irq == 0)
 			irq = PCI_INVALID_IRQ;
 		if (irq != PCI_INVALID_IRQ &&
 		    !pci_pir_valid_irq(pci_link, irq) && bootverbose)
 			printf(
 		"$PIR: Warning, IRQ %d for link %#x is not listed as valid\n",
 			    irq, pci_link->pl_id);
 		pci_link->pl_routed = 0;
 		pci_link->pl_irq = irq;
 		i = 1;
 	}
 	if (bootverbose && i) {
 		printf("$PIR: Links after tunable overrides:\n");
 		pci_pir_dump_links();
 	}
 
 	/*
 	 * Build initial interrupt weights as well as bitmap of "known-good"
 	 * IRQs that the BIOS has already used for PCI link devices.
 	 */
 	TAILQ_FOREACH(pci_link, &pci_links, pl_links) {
 		if (!PCI_INTERRUPT_VALID(pci_link->pl_irq))
 			continue;
 		pir_bios_irqs |= 1 << pci_link->pl_irq;
 		pir_interrupt_weight[pci_link->pl_irq] +=
 		    pci_link->pl_references;
 	}
 	if (bootverbose) {
 		printf("$PIR: IRQs used by BIOS: ");
 		pci_print_irqmask(pir_bios_irqs);
 		printf("\n");
 		printf("$PIR: Interrupt Weights:\n[ ");
 		for (i = 0; i < NUM_ISA_INTERRUPTS; i++)
 			printf(" %3d", i);
 		printf(" ]\n[ ");
 		for (i = 0; i < NUM_ISA_INTERRUPTS; i++)
 			printf(" %3d", pir_interrupt_weight[i]);
 		printf(" ]\n");
 	}
 }
 
 /*
  * Use the PCI BIOS to route an interrupt for a given device.
  *
  * Input:
  * AX = PCIBIOS_ROUTE_INTERRUPT
  * BH = bus
  * BL = device [7:3] / function [2:0]
  * CH = IRQ
  * CL = Interrupt Pin (0x0A = A, ... 0x0D = D)
  */
 static int
 pci_pir_biosroute(int bus, int device, int func, int pin, int irq)
 {
 	struct bios_regs args;
 
 	args.eax = PCIBIOS_ROUTE_INTERRUPT;
 	args.ebx = (bus << 8) | (device << 3) | func;
 	args.ecx = (irq << 8) | (0xa + pin);
-#ifdef XEN
-	return (0);
-#else	
 	return (bios32(&args, PCIbios.ventry, GSEL(GCODE_SEL, SEL_KPL)));
-#endif
 }
 
 
 /*
  * Route a PCI interrupt using a link device from the $PIR.
  */
 int
 pci_pir_route_interrupt(int bus, int device, int func, int pin)
 {
 	struct pci_link_lookup lookup;
 	struct pci_link *pci_link;
 	int error, irq;
 
 	if (pci_route_table == NULL)
 		return (PCI_INVALID_IRQ);
 
 	/* Lookup link device for this PCI device/pin. */
 	pci_link = NULL;
 	lookup.bus = bus;
 	lookup.device = device;
 	lookup.pin = pin - 1;
 	lookup.pci_link_ptr = &pci_link;
 	pci_pir_walk_table(pci_pir_find_link_handler, &lookup);
 	if (pci_link == NULL) {
 		printf("$PIR: No matching entry for %d.%d.INT%c\n", bus,
 		    device, pin - 1 + 'A');
 		return (PCI_INVALID_IRQ);
 	}
 
 	/*
 	 * Pick a new interrupt if we don't have one already.  We look
 	 * for an interrupt from several different sets.  First, if
 	 * this link only has one valid IRQ, use that.  Second, we
 	 * check the set of PCI only interrupts from the $PIR.  Third,
 	 * we check the set of known-good interrupts that the BIOS has
 	 * already used.  Lastly, we check the "all possible valid
 	 * IRQs" set.
 	 */
 	if (!PCI_INTERRUPT_VALID(pci_link->pl_irq)) {
 		if (pci_link->pl_irqmask != 0 && powerof2(pci_link->pl_irqmask))
 			irq = ffs(pci_link->pl_irqmask) - 1;
 		else
 			irq = pci_pir_choose_irq(pci_link,
 			    pci_route_table->pt_header.ph_pci_irqs);
 		if (!PCI_INTERRUPT_VALID(irq))
 			irq = pci_pir_choose_irq(pci_link, pir_bios_irqs);
 		if (!PCI_INTERRUPT_VALID(irq))
 			irq = pci_pir_choose_irq(pci_link,
 			    pci_irq_override_mask);
 		if (!PCI_INTERRUPT_VALID(irq)) {
 			if (bootverbose)
 				printf(
 			"$PIR: Failed to route interrupt for %d:%d INT%c\n",
 				    bus, device, pin - 1 + 'A');
 			return (PCI_INVALID_IRQ);
 		}
 		pci_link->pl_irq = irq;
 	}
 
 	/* Ask the BIOS to route this IRQ if we haven't done so already. */
 	if (!pci_link->pl_routed) {
 		error = pci_pir_biosroute(bus, device, func, pin - 1,
 		    pci_link->pl_irq);
 
 		/* Ignore errors when routing a unique interrupt. */
 		if (error && !powerof2(pci_link->pl_irqmask)) {
 			printf("$PIR: ROUTE_INTERRUPT failed.\n");
 			return (PCI_INVALID_IRQ);
 		}
 		pci_link->pl_routed = 1;
 
 		/* Ensure the interrupt is set to level/low trigger. */
 		KASSERT(pir_device != NULL, ("missing pir device"));
 		BUS_CONFIG_INTR(pir_device, pci_link->pl_irq,
 		    INTR_TRIGGER_LEVEL, INTR_POLARITY_LOW);
 	}
 	if (bootverbose)
 		printf("$PIR: %d:%d INT%c routed to irq %d\n", bus, device,
 		    pin - 1 + 'A', pci_link->pl_irq);
 	return (pci_link->pl_irq);
 }
 
 /*
  * Try to pick an interrupt for the specified link from the interrupts
  * set in the mask.
  */
 static int
 pci_pir_choose_irq(struct pci_link *pci_link, int irqmask)
 {
 	int i, irq, realmask;
 
 	/* XXX: Need to have a #define of known bad IRQs to also mask out? */
 	realmask = pci_link->pl_irqmask & irqmask;
 	if (realmask == 0)
 		return (PCI_INVALID_IRQ);
 
 	/* Find IRQ with lowest weight. */
 	irq = PCI_INVALID_IRQ;
 	for (i = 0; i < NUM_ISA_INTERRUPTS; i++) {
 		if (!(realmask & 1 << i))
 			continue;
 		if (irq == PCI_INVALID_IRQ ||
 		    pir_interrupt_weight[i] < pir_interrupt_weight[irq])
 			irq = i;
 	}
 	if (bootverbose && PCI_INTERRUPT_VALID(irq)) {
 		printf("$PIR: Found IRQ %d for link %#x from ", irq,
 		    pci_link->pl_id);
 		pci_print_irqmask(realmask);
 		printf("\n");
 	}
 	return (irq);
 }
 
 static void
 pci_print_irqmask(u_int16_t irqs)
 {
 	int i, first;
 
 	if (irqs == 0) {
 		printf("none");
 		return;
 	}
 	first = 1;
 	for (i = 0; i < 16; i++, irqs >>= 1)
 		if (irqs & 1) {
 			if (!first)
 				printf(" ");
 			else
 				first = 0;
 			printf("%d", i);
 		}
 }
 
 /*
  * Display link devices.
  */
 static void
 pci_pir_dump_links(void)
 {
 	struct pci_link *pci_link;
 
 	printf("Link  IRQ  Rtd  Ref  IRQs\n");
 	TAILQ_FOREACH(pci_link, &pci_links, pl_links) {
 		printf("%#4x  %3d   %c   %3d  ", pci_link->pl_id,
 		    pci_link->pl_irq, pci_link->pl_routed ? 'Y' : 'N',
 		    pci_link->pl_references);
 		pci_print_irqmask(pci_link->pl_irqmask);
 		printf("\n");
 	}
 }
 
 /*
  * See if any interrupts for a given PCI bus are routed in the PIR.  Don't
  * even bother looking if the BIOS doesn't support routing anyways.  If we
  * are probing a PCI-PCI bridge, then require_parse will be true and we should
  * only succeed if a host-PCI bridge has already attached and parsed the PIR.
  */
 int
 pci_pir_probe(int bus, int require_parse)
 {
 	int i;
 
 	if (pci_route_table == NULL || (require_parse && !pir_parsed))
 		return (0);
 	for (i = 0; i < pci_route_count; i++)
 		if (pci_route_table->pt_entry[i].pe_bus == bus)
 			return (1);
 	return (0);
 }
 
 /*
  * The driver for the new-bus psuedo device pir0 for the $PIR table.
  */
 
 static int
 pir_probe(device_t dev)
 {
 	char buf[64];
 
 	snprintf(buf, sizeof(buf), "PCI Interrupt Routing Table: %d Entries",
 	    pci_route_count);
 	device_set_desc_copy(dev, buf);
 	return (0);
 }
 
 static int
 pir_attach(device_t dev)
 {
 
 	pci_pir_parse();
 	KASSERT(pir_device == NULL, ("Multiple pir devices"));
 	pir_device = dev;
 	return (0);
 }
 
 static void
 pir_resume_find_device(struct PIR_entry *entry, struct PIR_intpin *intpin,
     void *arg)
 {
 	struct pci_dev_lookup *pd;
 
 	pd = (struct pci_dev_lookup *)arg;
 	if (intpin->link != pd->link || pd->bus != -1)
 		return;
 	pd->bus = entry->pe_bus;
 	pd->device = entry->pe_device;
 	pd->pin = intpin - entry->pe_intpin;
 }
 
 static int
 pir_resume(device_t dev)
 {
 	struct pci_dev_lookup pd;
 	struct pci_link *pci_link;
 	int error;
 
 	/* Ask the BIOS to re-route each link that was already routed. */
 	TAILQ_FOREACH(pci_link, &pci_links, pl_links) {
 		if (!PCI_INTERRUPT_VALID(pci_link->pl_irq)) {
 			KASSERT(!pci_link->pl_routed,
 			    ("link %#x is routed but has invalid PCI IRQ",
 			    pci_link->pl_id));
 			continue;
 		}
 		if (pci_link->pl_routed) {
 			pd.bus = -1;
 			pd.link = pci_link->pl_id;
 			pci_pir_walk_table(pir_resume_find_device, &pd);
 			KASSERT(pd.bus != -1,
 		("did not find matching entry for link %#x in the $PIR table",
 			    pci_link->pl_id));
 			if (bootverbose)
 				device_printf(dev,
 			    "Using %d.%d.INT%c to route link %#x to IRQ %d\n",
 				    pd.bus, pd.device, pd.pin + 'A',
 				    pci_link->pl_id, pci_link->pl_irq);
 			error = pci_pir_biosroute(pd.bus, pd.device, 0, pd.pin,
 			    pci_link->pl_irq);
 			if (error)
 				device_printf(dev,
 			    "ROUTE_INTERRUPT on resume for link %#x failed.\n",
 				    pci_link->pl_id);
 		}
 	}
 	return (0);
 }
 
 static device_method_t pir_methods[] = {
 	/* Device interface */
 	DEVMETHOD(device_probe,		pir_probe),
 	DEVMETHOD(device_attach,	pir_attach),
 	DEVMETHOD(device_resume,	pir_resume),
 
 	{ 0, 0 }
 };
 
 static driver_t pir_driver = {
 	"pir",
 	pir_methods,
 	1,
 };
 
 static devclass_t pir_devclass;
 
 DRIVER_MODULE(pir, legacy, pir_driver, pir_devclass, 0, 0);
Index: head/sys/kern/kern_intr.c
===================================================================
--- head/sys/kern/kern_intr.c	(revision 282273)
+++ head/sys/kern/kern_intr.c	(revision 282274)
@@ -1,1925 +1,1920 @@
 /*-
  * Copyright (c) 1997, Stefan Esser <se@freebsd.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice unmodified, this list of conditions, and the following
  *    disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_ddb.h"
 #include "opt_kstack_usage_prof.h"
 
 #include <sys/param.h>
 #include <sys/bus.h>
 #include <sys/conf.h>
 #include <sys/cpuset.h>
 #include <sys/rtprio.h>
 #include <sys/systm.h>
 #include <sys/interrupt.h>
 #include <sys/kernel.h>
 #include <sys/kthread.h>
 #include <sys/ktr.h>
 #include <sys/limits.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mutex.h>
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/random.h>
 #include <sys/resourcevar.h>
 #include <sys/sched.h>
 #include <sys/smp.h>
 #include <sys/sysctl.h>
 #include <sys/syslog.h>
 #include <sys/unistd.h>
 #include <sys/vmmeter.h>
 #include <machine/atomic.h>
 #include <machine/cpu.h>
 #include <machine/md_var.h>
 #include <machine/stdarg.h>
 #ifdef DDB
 #include <ddb/ddb.h>
 #include <ddb/db_sym.h>
 #endif
 
 /*
  * Describe an interrupt thread.  There is one of these per interrupt event.
  */
 struct intr_thread {
 	struct intr_event *it_event;
 	struct thread *it_thread;	/* Kernel thread. */
 	int	it_flags;		/* (j) IT_* flags. */
 	int	it_need;		/* Needs service. */
 };
 
 /* Interrupt thread flags kept in it_flags */
 #define	IT_DEAD		0x000001	/* Thread is waiting to exit. */
 #define	IT_WAIT		0x000002	/* Thread is waiting for completion. */
 
 struct	intr_entropy {
 	struct	thread *td;
 	uintptr_t event;
 };
 
 struct	intr_event *clk_intr_event;
 struct	intr_event *tty_intr_event;
 void	*vm_ih;
 struct proc *intrproc;
 
 static MALLOC_DEFINE(M_ITHREAD, "ithread", "Interrupt Threads");
 
 static int intr_storm_threshold = 1000;
 SYSCTL_INT(_hw, OID_AUTO, intr_storm_threshold, CTLFLAG_RWTUN,
     &intr_storm_threshold, 0,
     "Number of consecutive interrupts before storm protection is enabled");
 static TAILQ_HEAD(, intr_event) event_list =
     TAILQ_HEAD_INITIALIZER(event_list);
 static struct mtx event_lock;
 MTX_SYSINIT(intr_event_list, &event_lock, "intr event list", MTX_DEF);
 
 static void	intr_event_update(struct intr_event *ie);
 #ifdef INTR_FILTER
 static int	intr_event_schedule_thread(struct intr_event *ie,
 		    struct intr_thread *ithd);
 static int	intr_filter_loop(struct intr_event *ie,
 		    struct trapframe *frame, struct intr_thread **ithd);
 static struct intr_thread *ithread_create(const char *name,
 			      struct intr_handler *ih);
 #else
 static int	intr_event_schedule_thread(struct intr_event *ie);
 static struct intr_thread *ithread_create(const char *name);
 #endif
 static void	ithread_destroy(struct intr_thread *ithread);
 static void	ithread_execute_handlers(struct proc *p, 
 		    struct intr_event *ie);
 #ifdef INTR_FILTER
 static void	priv_ithread_execute_handler(struct proc *p, 
 		    struct intr_handler *ih);
 #endif
 static void	ithread_loop(void *);
 static void	ithread_update(struct intr_thread *ithd);
 static void	start_softintr(void *);
 
 /* Map an interrupt type to an ithread priority. */
 u_char
 intr_priority(enum intr_type flags)
 {
 	u_char pri;
 
 	flags &= (INTR_TYPE_TTY | INTR_TYPE_BIO | INTR_TYPE_NET |
 	    INTR_TYPE_CAM | INTR_TYPE_MISC | INTR_TYPE_CLK | INTR_TYPE_AV);
 	switch (flags) {
 	case INTR_TYPE_TTY:
 		pri = PI_TTY;
 		break;
 	case INTR_TYPE_BIO:
 		pri = PI_DISK;
 		break;
 	case INTR_TYPE_NET:
 		pri = PI_NET;
 		break;
 	case INTR_TYPE_CAM:
 		pri = PI_DISK;
 		break;
 	case INTR_TYPE_AV:
 		pri = PI_AV;
 		break;
 	case INTR_TYPE_CLK:
 		pri = PI_REALTIME;
 		break;
 	case INTR_TYPE_MISC:
 		pri = PI_DULL;          /* don't care */
 		break;
 	default:
 		/* We didn't specify an interrupt level. */
 		panic("intr_priority: no interrupt type in flags");
 	}
 
 	return pri;
 }
 
 /*
  * Update an ithread based on the associated intr_event.
  */
 static void
 ithread_update(struct intr_thread *ithd)
 {
 	struct intr_event *ie;
 	struct thread *td;
 	u_char pri;
 
 	ie = ithd->it_event;
 	td = ithd->it_thread;
 
 	/* Determine the overall priority of this event. */
 	if (TAILQ_EMPTY(&ie->ie_handlers))
 		pri = PRI_MAX_ITHD;
 	else
 		pri = TAILQ_FIRST(&ie->ie_handlers)->ih_pri;
 
 	/* Update name and priority. */
 	strlcpy(td->td_name, ie->ie_fullname, sizeof(td->td_name));
 #ifdef KTR
 	sched_clear_tdname(td);
 #endif
 	thread_lock(td);
 	sched_prio(td, pri);
 	thread_unlock(td);
 }
 
 /*
  * Regenerate the full name of an interrupt event and update its priority.
  */
 static void
 intr_event_update(struct intr_event *ie)
 {
 	struct intr_handler *ih;
 	char *last;
 	int missed, space;
 
 	/* Start off with no entropy and just the name of the event. */
 	mtx_assert(&ie->ie_lock, MA_OWNED);
 	strlcpy(ie->ie_fullname, ie->ie_name, sizeof(ie->ie_fullname));
 	ie->ie_flags &= ~IE_ENTROPY;
 	missed = 0;
 	space = 1;
 
 	/* Run through all the handlers updating values. */
 	TAILQ_FOREACH(ih, &ie->ie_handlers, ih_next) {
 		if (strlen(ie->ie_fullname) + strlen(ih->ih_name) + 1 <
 		    sizeof(ie->ie_fullname)) {
 			strcat(ie->ie_fullname, " ");
 			strcat(ie->ie_fullname, ih->ih_name);
 			space = 0;
 		} else
 			missed++;
 		if (ih->ih_flags & IH_ENTROPY)
 			ie->ie_flags |= IE_ENTROPY;
 	}
 
 	/*
 	 * If the handler names were too long, add +'s to indicate missing
 	 * names. If we run out of room and still have +'s to add, change
 	 * the last character from a + to a *.
 	 */
 	last = &ie->ie_fullname[sizeof(ie->ie_fullname) - 2];
 	while (missed-- > 0) {
 		if (strlen(ie->ie_fullname) + 1 == sizeof(ie->ie_fullname)) {
 			if (*last == '+') {
 				*last = '*';
 				break;
 			} else
 				*last = '+';
 		} else if (space) {
 			strcat(ie->ie_fullname, " +");
 			space = 0;
 		} else
 			strcat(ie->ie_fullname, "+");
 	}
 
 	/*
 	 * If this event has an ithread, update it's priority and
 	 * name.
 	 */
 	if (ie->ie_thread != NULL)
 		ithread_update(ie->ie_thread);
 	CTR2(KTR_INTR, "%s: updated %s", __func__, ie->ie_fullname);
 }
 
 int
 intr_event_create(struct intr_event **event, void *source, int flags, int irq,
     void (*pre_ithread)(void *), void (*post_ithread)(void *),
     void (*post_filter)(void *), int (*assign_cpu)(void *, int),
     const char *fmt, ...)
 {
 	struct intr_event *ie;
 	va_list ap;
 
 	/* The only valid flag during creation is IE_SOFT. */
 	if ((flags & ~IE_SOFT) != 0)
 		return (EINVAL);
 	ie = malloc(sizeof(struct intr_event), M_ITHREAD, M_WAITOK | M_ZERO);
 	ie->ie_source = source;
 	ie->ie_pre_ithread = pre_ithread;
 	ie->ie_post_ithread = post_ithread;
 	ie->ie_post_filter = post_filter;
 	ie->ie_assign_cpu = assign_cpu;
 	ie->ie_flags = flags;
 	ie->ie_irq = irq;
 	ie->ie_cpu = NOCPU;
 	TAILQ_INIT(&ie->ie_handlers);
 	mtx_init(&ie->ie_lock, "intr event", NULL, MTX_DEF);
 
 	va_start(ap, fmt);
 	vsnprintf(ie->ie_name, sizeof(ie->ie_name), fmt, ap);
 	va_end(ap);
 	strlcpy(ie->ie_fullname, ie->ie_name, sizeof(ie->ie_fullname));
 	mtx_lock(&event_lock);
 	TAILQ_INSERT_TAIL(&event_list, ie, ie_list);
 	mtx_unlock(&event_lock);
 	if (event != NULL)
 		*event = ie;
 	CTR2(KTR_INTR, "%s: created %s", __func__, ie->ie_name);
 	return (0);
 }
 
 /*
  * Bind an interrupt event to the specified CPU.  Note that not all
  * platforms support binding an interrupt to a CPU.  For those
  * platforms this request will fail.  For supported platforms, any
  * associated ithreads as well as the primary interrupt context will
  * be bound to the specificed CPU.  Using a cpu id of NOCPU unbinds
  * the interrupt event.
  */
 int
 intr_event_bind(struct intr_event *ie, int cpu)
 {
 	lwpid_t id;
 	int error;
 
 	/* Need a CPU to bind to. */
 	if (cpu != NOCPU && CPU_ABSENT(cpu))
 		return (EINVAL);
 
 	if (ie->ie_assign_cpu == NULL)
 		return (EOPNOTSUPP);
 
 	error = priv_check(curthread, PRIV_SCHED_CPUSET_INTR);
 	if (error)
 		return (error);
 
 	/*
 	 * If we have any ithreads try to set their mask first to verify
 	 * permissions, etc.
 	 */
 	mtx_lock(&ie->ie_lock);
 	if (ie->ie_thread != NULL) {
 		id = ie->ie_thread->it_thread->td_tid;
 		mtx_unlock(&ie->ie_lock);
 		error = cpuset_setithread(id, cpu);
 		if (error)
 			return (error);
 	} else
 		mtx_unlock(&ie->ie_lock);
 	error = ie->ie_assign_cpu(ie->ie_source, cpu);
 	if (error) {
 		mtx_lock(&ie->ie_lock);
 		if (ie->ie_thread != NULL) {
 			cpu = ie->ie_cpu;
 			id = ie->ie_thread->it_thread->td_tid;
 			mtx_unlock(&ie->ie_lock);
 			(void)cpuset_setithread(id, cpu);
 		} else
 			mtx_unlock(&ie->ie_lock);
 		return (error);
 	}
 
 	mtx_lock(&ie->ie_lock);
 	ie->ie_cpu = cpu;
 	mtx_unlock(&ie->ie_lock);
 
 	return (error);
 }
 
 static struct intr_event *
 intr_lookup(int irq)
 {
 	struct intr_event *ie;
 
 	mtx_lock(&event_lock);
 	TAILQ_FOREACH(ie, &event_list, ie_list)
 		if (ie->ie_irq == irq &&
 		    (ie->ie_flags & IE_SOFT) == 0 &&
 		    TAILQ_FIRST(&ie->ie_handlers) != NULL)
 			break;
 	mtx_unlock(&event_lock);
 	return (ie);
 }
 
 int
 intr_setaffinity(int irq, void *m)
 {
 	struct intr_event *ie;
 	cpuset_t *mask;
 	int cpu, n;
 
 	mask = m;
 	cpu = NOCPU;
 	/*
 	 * If we're setting all cpus we can unbind.  Otherwise make sure
 	 * only one cpu is in the set.
 	 */
 	if (CPU_CMP(cpuset_root, mask)) {
 		for (n = 0; n < CPU_SETSIZE; n++) {
 			if (!CPU_ISSET(n, mask))
 				continue;
 			if (cpu != NOCPU)
 				return (EINVAL);
 			cpu = n;
 		}
 	}
 	ie = intr_lookup(irq);
 	if (ie == NULL)
 		return (ESRCH);
 	return (intr_event_bind(ie, cpu));
 }
 
 int
 intr_getaffinity(int irq, void *m)
 {
 	struct intr_event *ie;
 	cpuset_t *mask;
 
 	mask = m;
 	ie = intr_lookup(irq);
 	if (ie == NULL)
 		return (ESRCH);
 	CPU_ZERO(mask);
 	mtx_lock(&ie->ie_lock);
 	if (ie->ie_cpu == NOCPU)
 		CPU_COPY(cpuset_root, mask);
 	else
 		CPU_SET(ie->ie_cpu, mask);
 	mtx_unlock(&ie->ie_lock);
 	return (0);
 }
 
 int
 intr_event_destroy(struct intr_event *ie)
 {
 
 	mtx_lock(&event_lock);
 	mtx_lock(&ie->ie_lock);
 	if (!TAILQ_EMPTY(&ie->ie_handlers)) {
 		mtx_unlock(&ie->ie_lock);
 		mtx_unlock(&event_lock);
 		return (EBUSY);
 	}
 	TAILQ_REMOVE(&event_list, ie, ie_list);
 #ifndef notyet
 	if (ie->ie_thread != NULL) {
 		ithread_destroy(ie->ie_thread);
 		ie->ie_thread = NULL;
 	}
 #endif
 	mtx_unlock(&ie->ie_lock);
 	mtx_unlock(&event_lock);
 	mtx_destroy(&ie->ie_lock);
 	free(ie, M_ITHREAD);
 	return (0);
 }
 
 #ifndef INTR_FILTER
 static struct intr_thread *
 ithread_create(const char *name)
 {
 	struct intr_thread *ithd;
 	struct thread *td;
 	int error;
 
 	ithd = malloc(sizeof(struct intr_thread), M_ITHREAD, M_WAITOK | M_ZERO);
 
 	error = kproc_kthread_add(ithread_loop, ithd, &intrproc,
 		    &td, RFSTOPPED | RFHIGHPID,
 	    	    0, "intr", "%s", name);
 	if (error)
 		panic("kproc_create() failed with %d", error);
 	thread_lock(td);
 	sched_class(td, PRI_ITHD);
 	TD_SET_IWAIT(td);
 	thread_unlock(td);
 	td->td_pflags |= TDP_ITHREAD;
 	ithd->it_thread = td;
 	CTR2(KTR_INTR, "%s: created %s", __func__, name);
 	return (ithd);
 }
 #else
 static struct intr_thread *
 ithread_create(const char *name, struct intr_handler *ih)
 {
 	struct intr_thread *ithd;
 	struct thread *td;
 	int error;
 
 	ithd = malloc(sizeof(struct intr_thread), M_ITHREAD, M_WAITOK | M_ZERO);
 
 	error = kproc_kthread_add(ithread_loop, ih, &intrproc,
 		    &td, RFSTOPPED | RFHIGHPID,
 	    	    0, "intr", "%s", name);
 	if (error)
 		panic("kproc_create() failed with %d", error);
 	thread_lock(td);
 	sched_class(td, PRI_ITHD);
 	TD_SET_IWAIT(td);
 	thread_unlock(td);
 	td->td_pflags |= TDP_ITHREAD;
 	ithd->it_thread = td;
 	CTR2(KTR_INTR, "%s: created %s", __func__, name);
 	return (ithd);
 }
 #endif
 
 static void
 ithread_destroy(struct intr_thread *ithread)
 {
 	struct thread *td;
 
 	CTR2(KTR_INTR, "%s: killing %s", __func__, ithread->it_event->ie_name);
 	td = ithread->it_thread;
 	thread_lock(td);
 	ithread->it_flags |= IT_DEAD;
 	if (TD_AWAITING_INTR(td)) {
 		TD_CLR_IWAIT(td);
 		sched_add(td, SRQ_INTR);
 	}
 	thread_unlock(td);
 }
 
 #ifndef INTR_FILTER
 int
 intr_event_add_handler(struct intr_event *ie, const char *name,
     driver_filter_t filter, driver_intr_t handler, void *arg, u_char pri,
     enum intr_type flags, void **cookiep)
 {
 	struct intr_handler *ih, *temp_ih;
 	struct intr_thread *it;
 
 	if (ie == NULL || name == NULL || (handler == NULL && filter == NULL))
 		return (EINVAL);
 
 	/* Allocate and populate an interrupt handler structure. */
 	ih = malloc(sizeof(struct intr_handler), M_ITHREAD, M_WAITOK | M_ZERO);
 	ih->ih_filter = filter;
 	ih->ih_handler = handler;
 	ih->ih_argument = arg;
 	strlcpy(ih->ih_name, name, sizeof(ih->ih_name));
 	ih->ih_event = ie;
 	ih->ih_pri = pri;
 	if (flags & INTR_EXCL)
 		ih->ih_flags = IH_EXCLUSIVE;
 	if (flags & INTR_MPSAFE)
 		ih->ih_flags |= IH_MPSAFE;
 	if (flags & INTR_ENTROPY)
 		ih->ih_flags |= IH_ENTROPY;
 
 	/* We can only have one exclusive handler in a event. */
 	mtx_lock(&ie->ie_lock);
 	if (!TAILQ_EMPTY(&ie->ie_handlers)) {
 		if ((flags & INTR_EXCL) ||
 		    (TAILQ_FIRST(&ie->ie_handlers)->ih_flags & IH_EXCLUSIVE)) {
 			mtx_unlock(&ie->ie_lock);
 			free(ih, M_ITHREAD);
 			return (EINVAL);
 		}
 	}
 
 	/* Create a thread if we need one. */
 	while (ie->ie_thread == NULL && handler != NULL) {
 		if (ie->ie_flags & IE_ADDING_THREAD)
 			msleep(ie, &ie->ie_lock, 0, "ithread", 0);
 		else {
 			ie->ie_flags |= IE_ADDING_THREAD;
 			mtx_unlock(&ie->ie_lock);
 			it = ithread_create("intr: newborn");
 			mtx_lock(&ie->ie_lock);
 			ie->ie_flags &= ~IE_ADDING_THREAD;
 			ie->ie_thread = it;
 			it->it_event = ie;
 			ithread_update(it);
 			wakeup(ie);
 		}
 	}
 
 	/* Add the new handler to the event in priority order. */
 	TAILQ_FOREACH(temp_ih, &ie->ie_handlers, ih_next) {
 		if (temp_ih->ih_pri > ih->ih_pri)
 			break;
 	}
 	if (temp_ih == NULL)
 		TAILQ_INSERT_TAIL(&ie->ie_handlers, ih, ih_next);
 	else
 		TAILQ_INSERT_BEFORE(temp_ih, ih, ih_next);
 	intr_event_update(ie);
 
 	CTR3(KTR_INTR, "%s: added %s to %s", __func__, ih->ih_name,
 	    ie->ie_name);
 	mtx_unlock(&ie->ie_lock);
 
 	if (cookiep != NULL)
 		*cookiep = ih;
 	return (0);
 }
 #else
 int
 intr_event_add_handler(struct intr_event *ie, const char *name,
     driver_filter_t filter, driver_intr_t handler, void *arg, u_char pri,
     enum intr_type flags, void **cookiep)
 {
 	struct intr_handler *ih, *temp_ih;
 	struct intr_thread *it;
 
 	if (ie == NULL || name == NULL || (handler == NULL && filter == NULL))
 		return (EINVAL);
 
 	/* Allocate and populate an interrupt handler structure. */
 	ih = malloc(sizeof(struct intr_handler), M_ITHREAD, M_WAITOK | M_ZERO);
 	ih->ih_filter = filter;
 	ih->ih_handler = handler;
 	ih->ih_argument = arg;
 	strlcpy(ih->ih_name, name, sizeof(ih->ih_name));
 	ih->ih_event = ie;
 	ih->ih_pri = pri;
 	if (flags & INTR_EXCL)
 		ih->ih_flags = IH_EXCLUSIVE;
 	if (flags & INTR_MPSAFE)
 		ih->ih_flags |= IH_MPSAFE;
 	if (flags & INTR_ENTROPY)
 		ih->ih_flags |= IH_ENTROPY;
 
 	/* We can only have one exclusive handler in a event. */
 	mtx_lock(&ie->ie_lock);
 	if (!TAILQ_EMPTY(&ie->ie_handlers)) {
 		if ((flags & INTR_EXCL) ||
 		    (TAILQ_FIRST(&ie->ie_handlers)->ih_flags & IH_EXCLUSIVE)) {
 			mtx_unlock(&ie->ie_lock);
 			free(ih, M_ITHREAD);
 			return (EINVAL);
 		}
 	}
 
 	/* For filtered handlers, create a private ithread to run on. */
 	if (filter != NULL && handler != NULL) {
 		mtx_unlock(&ie->ie_lock);
 		it = ithread_create("intr: newborn", ih);
 		mtx_lock(&ie->ie_lock);
 		it->it_event = ie;
 		ih->ih_thread = it;
 		ithread_update(it); /* XXX - do we really need this?!?!? */
 	} else { /* Create the global per-event thread if we need one. */
 		while (ie->ie_thread == NULL && handler != NULL) {
 			if (ie->ie_flags & IE_ADDING_THREAD)
 				msleep(ie, &ie->ie_lock, 0, "ithread", 0);
 			else {
 				ie->ie_flags |= IE_ADDING_THREAD;
 				mtx_unlock(&ie->ie_lock);
 				it = ithread_create("intr: newborn", ih);
 				mtx_lock(&ie->ie_lock);
 				ie->ie_flags &= ~IE_ADDING_THREAD;
 				ie->ie_thread = it;
 				it->it_event = ie;
 				ithread_update(it);
 				wakeup(ie);
 			}
 		}
 	}
 
 	/* Add the new handler to the event in priority order. */
 	TAILQ_FOREACH(temp_ih, &ie->ie_handlers, ih_next) {
 		if (temp_ih->ih_pri > ih->ih_pri)
 			break;
 	}
 	if (temp_ih == NULL)
 		TAILQ_INSERT_TAIL(&ie->ie_handlers, ih, ih_next);
 	else
 		TAILQ_INSERT_BEFORE(temp_ih, ih, ih_next);
 	intr_event_update(ie);
 
 	CTR3(KTR_INTR, "%s: added %s to %s", __func__, ih->ih_name,
 	    ie->ie_name);
 	mtx_unlock(&ie->ie_lock);
 
 	if (cookiep != NULL)
 		*cookiep = ih;
 	return (0);
 }
 #endif
 
 /*
  * Append a description preceded by a ':' to the name of the specified
  * interrupt handler.
  */
 int
 intr_event_describe_handler(struct intr_event *ie, void *cookie,
     const char *descr)
 {
 	struct intr_handler *ih;
 	size_t space;
 	char *start;
 
 	mtx_lock(&ie->ie_lock);
 #ifdef INVARIANTS
 	TAILQ_FOREACH(ih, &ie->ie_handlers, ih_next) {
 		if (ih == cookie)
 			break;
 	}
 	if (ih == NULL) {
 		mtx_unlock(&ie->ie_lock);
 		panic("handler %p not found in interrupt event %p", cookie, ie);
 	}
 #endif
 	ih = cookie;
 
 	/*
 	 * Look for an existing description by checking for an
 	 * existing ":".  This assumes device names do not include
 	 * colons.  If one is found, prepare to insert the new
 	 * description at that point.  If one is not found, find the
 	 * end of the name to use as the insertion point.
 	 */
 	start = strchr(ih->ih_name, ':');
 	if (start == NULL)
 		start = strchr(ih->ih_name, 0);
 
 	/*
 	 * See if there is enough remaining room in the string for the
 	 * description + ":".  The "- 1" leaves room for the trailing
 	 * '\0'.  The "+ 1" accounts for the colon.
 	 */
 	space = sizeof(ih->ih_name) - (start - ih->ih_name) - 1;
 	if (strlen(descr) + 1 > space) {
 		mtx_unlock(&ie->ie_lock);
 		return (ENOSPC);
 	}
 
 	/* Append a colon followed by the description. */
 	*start = ':';
 	strcpy(start + 1, descr);
 	intr_event_update(ie);
 	mtx_unlock(&ie->ie_lock);
 	return (0);
 }
 
 /*
  * Return the ie_source field from the intr_event an intr_handler is
  * associated with.
  */
 void *
 intr_handler_source(void *cookie)
 {
 	struct intr_handler *ih;
 	struct intr_event *ie;
 
 	ih = (struct intr_handler *)cookie;
 	if (ih == NULL)
 		return (NULL);
 	ie = ih->ih_event;
 	KASSERT(ie != NULL,
 	    ("interrupt handler \"%s\" has a NULL interrupt event",
 	    ih->ih_name));
 	return (ie->ie_source);
 }
 
 /*
  * Sleep until an ithread finishes executing an interrupt handler.
  *
  * XXX Doesn't currently handle interrupt filters or fast interrupt
  * handlers.  This is intended for compatibility with linux drivers
  * only.  Do not use in BSD code.
  */
 void
 _intr_drain(int irq)
 {
 	struct intr_event *ie;
 	struct intr_thread *ithd;
 	struct thread *td;
 
 	ie = intr_lookup(irq);
 	if (ie == NULL)
 		return;
 	if (ie->ie_thread == NULL)
 		return;
 	ithd = ie->ie_thread;
 	td = ithd->it_thread;
 	/*
 	 * We set the flag and wait for it to be cleared to avoid
 	 * long delays with potentially busy interrupt handlers
 	 * were we to only sample TD_AWAITING_INTR() every tick.
 	 */
 	thread_lock(td);
 	if (!TD_AWAITING_INTR(td)) {
 		ithd->it_flags |= IT_WAIT;
 		while (ithd->it_flags & IT_WAIT) {
 			thread_unlock(td);
 			pause("idrain", 1);
 			thread_lock(td);
 		}
 	}
 	thread_unlock(td);
 	return;
 }
 
 
 #ifndef INTR_FILTER
 int
 intr_event_remove_handler(void *cookie)
 {
 	struct intr_handler *handler = (struct intr_handler *)cookie;
 	struct intr_event *ie;
 #ifdef INVARIANTS
 	struct intr_handler *ih;
 #endif
 #ifdef notyet
 	int dead;
 #endif
 
 	if (handler == NULL)
 		return (EINVAL);
 	ie = handler->ih_event;
 	KASSERT(ie != NULL,
 	    ("interrupt handler \"%s\" has a NULL interrupt event",
 	    handler->ih_name));
 	mtx_lock(&ie->ie_lock);
 	CTR3(KTR_INTR, "%s: removing %s from %s", __func__, handler->ih_name,
 	    ie->ie_name);
 #ifdef INVARIANTS
 	TAILQ_FOREACH(ih, &ie->ie_handlers, ih_next)
 		if (ih == handler)
 			goto ok;
 	mtx_unlock(&ie->ie_lock);
 	panic("interrupt handler \"%s\" not found in interrupt event \"%s\"",
 	    ih->ih_name, ie->ie_name);
 ok:
 #endif
 	/*
 	 * If there is no ithread, then just remove the handler and return.
 	 * XXX: Note that an INTR_FAST handler might be running on another
 	 * CPU!
 	 */
 	if (ie->ie_thread == NULL) {
 		TAILQ_REMOVE(&ie->ie_handlers, handler, ih_next);
 		mtx_unlock(&ie->ie_lock);
 		free(handler, M_ITHREAD);
 		return (0);
 	}
 
 	/*
 	 * If the interrupt thread is already running, then just mark this
 	 * handler as being dead and let the ithread do the actual removal.
 	 *
 	 * During a cold boot while cold is set, msleep() does not sleep,
 	 * so we have to remove the handler here rather than letting the
 	 * thread do it.
 	 */
 	thread_lock(ie->ie_thread->it_thread);
 	if (!TD_AWAITING_INTR(ie->ie_thread->it_thread) && !cold) {
 		handler->ih_flags |= IH_DEAD;
 
 		/*
 		 * Ensure that the thread will process the handler list
 		 * again and remove this handler if it has already passed
 		 * it on the list.
 		 */
 		atomic_store_rel_int(&ie->ie_thread->it_need, 1);
 	} else
 		TAILQ_REMOVE(&ie->ie_handlers, handler, ih_next);
 	thread_unlock(ie->ie_thread->it_thread);
 	while (handler->ih_flags & IH_DEAD)
 		msleep(handler, &ie->ie_lock, 0, "iev_rmh", 0);
 	intr_event_update(ie);
 #ifdef notyet
 	/*
 	 * XXX: This could be bad in the case of ppbus(8).  Also, I think
 	 * this could lead to races of stale data when servicing an
 	 * interrupt.
 	 */
 	dead = 1;
 	TAILQ_FOREACH(ih, &ie->ie_handlers, ih_next) {
 		if (!(ih->ih_flags & IH_FAST)) {
 			dead = 0;
 			break;
 		}
 	}
 	if (dead) {
 		ithread_destroy(ie->ie_thread);
 		ie->ie_thread = NULL;
 	}
 #endif
 	mtx_unlock(&ie->ie_lock);
 	free(handler, M_ITHREAD);
 	return (0);
 }
 
 static int
 intr_event_schedule_thread(struct intr_event *ie)
 {
 	struct intr_entropy entropy;
 	struct intr_thread *it;
 	struct thread *td;
 	struct thread *ctd;
 	struct proc *p;
 
 	/*
 	 * If no ithread or no handlers, then we have a stray interrupt.
 	 */
 	if (ie == NULL || TAILQ_EMPTY(&ie->ie_handlers) ||
 	    ie->ie_thread == NULL)
 		return (EINVAL);
 
 	ctd = curthread;
 	it = ie->ie_thread;
 	td = it->it_thread;
 	p = td->td_proc;
 
 	/*
 	 * If any of the handlers for this ithread claim to be good
 	 * sources of entropy, then gather some.
 	 */
 	if (ie->ie_flags & IE_ENTROPY) {
 		entropy.event = (uintptr_t)ie;
 		entropy.td = ctd;
 		random_harvest(&entropy, sizeof(entropy), 2, RANDOM_INTERRUPT);
 	}
 
 	KASSERT(p != NULL, ("ithread %s has no process", ie->ie_name));
 
 	/*
 	 * Set it_need to tell the thread to keep running if it is already
 	 * running.  Then, lock the thread and see if we actually need to
 	 * put it on the runqueue.
 	 */
 	atomic_store_rel_int(&it->it_need, 1);
 	thread_lock(td);
 	if (TD_AWAITING_INTR(td)) {
 		CTR3(KTR_INTR, "%s: schedule pid %d (%s)", __func__, p->p_pid,
 		    td->td_name);
 		TD_CLR_IWAIT(td);
 		sched_add(td, SRQ_INTR);
 	} else {
 		CTR5(KTR_INTR, "%s: pid %d (%s): it_need %d, state %d",
 		    __func__, p->p_pid, td->td_name, it->it_need, td->td_state);
 	}
 	thread_unlock(td);
 
 	return (0);
 }
 #else
 int
 intr_event_remove_handler(void *cookie)
 {
 	struct intr_handler *handler = (struct intr_handler *)cookie;
 	struct intr_event *ie;
 	struct intr_thread *it;
 #ifdef INVARIANTS
 	struct intr_handler *ih;
 #endif
 #ifdef notyet
 	int dead;
 #endif
 
 	if (handler == NULL)
 		return (EINVAL);
 	ie = handler->ih_event;
 	KASSERT(ie != NULL,
 	    ("interrupt handler \"%s\" has a NULL interrupt event",
 	    handler->ih_name));
 	mtx_lock(&ie->ie_lock);
 	CTR3(KTR_INTR, "%s: removing %s from %s", __func__, handler->ih_name,
 	    ie->ie_name);
 #ifdef INVARIANTS
 	TAILQ_FOREACH(ih, &ie->ie_handlers, ih_next)
 		if (ih == handler)
 			goto ok;
 	mtx_unlock(&ie->ie_lock);
 	panic("interrupt handler \"%s\" not found in interrupt event \"%s\"",
 	    ih->ih_name, ie->ie_name);
 ok:
 #endif
 	/*
 	 * If there are no ithreads (per event and per handler), then
 	 * just remove the handler and return.  
 	 * XXX: Note that an INTR_FAST handler might be running on another CPU!
 	 */
 	if (ie->ie_thread == NULL && handler->ih_thread == NULL) {
 		TAILQ_REMOVE(&ie->ie_handlers, handler, ih_next);
 		mtx_unlock(&ie->ie_lock);
 		free(handler, M_ITHREAD);
 		return (0);
 	}
 
 	/* Private or global ithread? */
 	it = (handler->ih_thread) ? handler->ih_thread : ie->ie_thread;
 	/*
 	 * If the interrupt thread is already running, then just mark this
 	 * handler as being dead and let the ithread do the actual removal.
 	 *
 	 * During a cold boot while cold is set, msleep() does not sleep,
 	 * so we have to remove the handler here rather than letting the
 	 * thread do it.
 	 */
 	thread_lock(it->it_thread);
 	if (!TD_AWAITING_INTR(it->it_thread) && !cold) {
 		handler->ih_flags |= IH_DEAD;
 
 		/*
 		 * Ensure that the thread will process the handler list
 		 * again and remove this handler if it has already passed
 		 * it on the list.
 		 */
 		atomic_store_rel_int(&it->it_need, 1);
 	} else
 		TAILQ_REMOVE(&ie->ie_handlers, handler, ih_next);
 	thread_unlock(it->it_thread);
 	while (handler->ih_flags & IH_DEAD)
 		msleep(handler, &ie->ie_lock, 0, "iev_rmh", 0);
 	/* 
 	 * At this point, the handler has been disconnected from the event,
 	 * so we can kill the private ithread if any.
 	 */
 	if (handler->ih_thread) {
 		ithread_destroy(handler->ih_thread);
 		handler->ih_thread = NULL;
 	}
 	intr_event_update(ie);
 #ifdef notyet
 	/*
 	 * XXX: This could be bad in the case of ppbus(8).  Also, I think
 	 * this could lead to races of stale data when servicing an
 	 * interrupt.
 	 */
 	dead = 1;
 	TAILQ_FOREACH(ih, &ie->ie_handlers, ih_next) {
 		if (handler != NULL) {
 			dead = 0;
 			break;
 		}
 	}
 	if (dead) {
 		ithread_destroy(ie->ie_thread);
 		ie->ie_thread = NULL;
 	}
 #endif
 	mtx_unlock(&ie->ie_lock);
 	free(handler, M_ITHREAD);
 	return (0);
 }
 
 static int
 intr_event_schedule_thread(struct intr_event *ie, struct intr_thread *it)
 {
 	struct intr_entropy entropy;
 	struct thread *td;
 	struct thread *ctd;
 	struct proc *p;
 
 	/*
 	 * If no ithread or no handlers, then we have a stray interrupt.
 	 */
 	if (ie == NULL || TAILQ_EMPTY(&ie->ie_handlers) || it == NULL)
 		return (EINVAL);
 
 	ctd = curthread;
 	td = it->it_thread;
 	p = td->td_proc;
 
 	/*
 	 * If any of the handlers for this ithread claim to be good
 	 * sources of entropy, then gather some.
 	 */
 	if (ie->ie_flags & IE_ENTROPY) {
 		entropy.event = (uintptr_t)ie;
 		entropy.td = ctd;
 		random_harvest(&entropy, sizeof(entropy), 2, RANDOM_INTERRUPT);
 	}
 
 	KASSERT(p != NULL, ("ithread %s has no process", ie->ie_name));
 
 	/*
 	 * Set it_need to tell the thread to keep running if it is already
 	 * running.  Then, lock the thread and see if we actually need to
 	 * put it on the runqueue.
 	 */
 	atomic_store_rel_int(&it->it_need, 1);
 	thread_lock(td);
 	if (TD_AWAITING_INTR(td)) {
 		CTR3(KTR_INTR, "%s: schedule pid %d (%s)", __func__, p->p_pid,
 		    td->td_name);
 		TD_CLR_IWAIT(td);
 		sched_add(td, SRQ_INTR);
 	} else {
 		CTR5(KTR_INTR, "%s: pid %d (%s): it_need %d, state %d",
 		    __func__, p->p_pid, td->td_name, it->it_need, td->td_state);
 	}
 	thread_unlock(td);
 
 	return (0);
 }
 #endif
 
 /*
  * Allow interrupt event binding for software interrupt handlers -- a no-op,
  * since interrupts are generated in software rather than being directed by
  * a PIC.
  */
 static int
 swi_assign_cpu(void *arg, int cpu)
 {
 
 	return (0);
 }
 
 /*
  * Add a software interrupt handler to a specified event.  If a given event
  * is not specified, then a new event is created.
  */
 int
 swi_add(struct intr_event **eventp, const char *name, driver_intr_t handler,
 	    void *arg, int pri, enum intr_type flags, void **cookiep)
 {
 	struct intr_event *ie;
 	int error;
 
 	if (flags & INTR_ENTROPY)
 		return (EINVAL);
 
 	ie = (eventp != NULL) ? *eventp : NULL;
 
 	if (ie != NULL) {
 		if (!(ie->ie_flags & IE_SOFT))
 			return (EINVAL);
 	} else {
 		error = intr_event_create(&ie, NULL, IE_SOFT, 0,
 		    NULL, NULL, NULL, swi_assign_cpu, "swi%d:", pri);
 		if (error)
 			return (error);
 		if (eventp != NULL)
 			*eventp = ie;
 	}
 	error = intr_event_add_handler(ie, name, NULL, handler, arg,
 	    PI_SWI(pri), flags, cookiep);
 	return (error);
 }
 
 /*
  * Schedule a software interrupt thread.
  */
 void
 swi_sched(void *cookie, int flags)
 {
 	struct intr_handler *ih = (struct intr_handler *)cookie;
 	struct intr_event *ie = ih->ih_event;
 	struct intr_entropy entropy;
 	int error;
 
 	CTR3(KTR_INTR, "swi_sched: %s %s need=%d", ie->ie_name, ih->ih_name,
 	    ih->ih_need);
 
 	entropy.event = (uintptr_t)ih;
 	entropy.td = curthread;
 	random_harvest(&entropy, sizeof(entropy), 1, RANDOM_SWI);
 
 	/*
 	 * Set ih_need for this handler so that if the ithread is already
 	 * running it will execute this handler on the next pass.  Otherwise,
 	 * it will execute it the next time it runs.
 	 */
 	atomic_store_rel_int(&ih->ih_need, 1);
 
 	if (!(flags & SWI_DELAY)) {
 		PCPU_INC(cnt.v_soft);
 #ifdef INTR_FILTER
 		error = intr_event_schedule_thread(ie, ie->ie_thread);
 #else
 		error = intr_event_schedule_thread(ie);
 #endif
 		KASSERT(error == 0, ("stray software interrupt"));
 	}
 }
 
 /*
  * Remove a software interrupt handler.  Currently this code does not
  * remove the associated interrupt event if it becomes empty.  Calling code
  * may do so manually via intr_event_destroy(), but that's not really
  * an optimal interface.
  */
 int
 swi_remove(void *cookie)
 {
 
 	return (intr_event_remove_handler(cookie));
 }
 
 #ifdef INTR_FILTER
 static void
 priv_ithread_execute_handler(struct proc *p, struct intr_handler *ih)
 {
 	struct intr_event *ie;
 
 	ie = ih->ih_event;
 	/*
 	 * If this handler is marked for death, remove it from
 	 * the list of handlers and wake up the sleeper.
 	 */
 	if (ih->ih_flags & IH_DEAD) {
 		mtx_lock(&ie->ie_lock);
 		TAILQ_REMOVE(&ie->ie_handlers, ih, ih_next);
 		ih->ih_flags &= ~IH_DEAD;
 		wakeup(ih);
 		mtx_unlock(&ie->ie_lock);
 		return;
 	}
 	
 	/* Execute this handler. */
 	CTR6(KTR_INTR, "%s: pid %d exec %p(%p) for %s flg=%x",
 	     __func__, p->p_pid, (void *)ih->ih_handler, ih->ih_argument,
 	     ih->ih_name, ih->ih_flags);
 	
 	if (!(ih->ih_flags & IH_MPSAFE))
 		mtx_lock(&Giant);
 	ih->ih_handler(ih->ih_argument);
 	if (!(ih->ih_flags & IH_MPSAFE))
 		mtx_unlock(&Giant);
 }
 #endif
 
 /*
  * This is a public function for use by drivers that mux interrupt
  * handlers for child devices from their interrupt handler.
  */
 void
 intr_event_execute_handlers(struct proc *p, struct intr_event *ie)
 {
 	struct intr_handler *ih, *ihn;
 
 	TAILQ_FOREACH_SAFE(ih, &ie->ie_handlers, ih_next, ihn) {
 		/*
 		 * If this handler is marked for death, remove it from
 		 * the list of handlers and wake up the sleeper.
 		 */
 		if (ih->ih_flags & IH_DEAD) {
 			mtx_lock(&ie->ie_lock);
 			TAILQ_REMOVE(&ie->ie_handlers, ih, ih_next);
 			ih->ih_flags &= ~IH_DEAD;
 			wakeup(ih);
 			mtx_unlock(&ie->ie_lock);
 			continue;
 		}
 
 		/* Skip filter only handlers */
 		if (ih->ih_handler == NULL)
 			continue;
 
 		/*
 		 * For software interrupt threads, we only execute
 		 * handlers that have their need flag set.  Hardware
 		 * interrupt threads always invoke all of their handlers.
 		 */
 		if (ie->ie_flags & IE_SOFT) {
 			if (atomic_load_acq_int(&ih->ih_need) == 0)
 				continue;
 			else
 				atomic_store_rel_int(&ih->ih_need, 0);
 		}
 
 		/* Execute this handler. */
 		CTR6(KTR_INTR, "%s: pid %d exec %p(%p) for %s flg=%x",
 		    __func__, p->p_pid, (void *)ih->ih_handler, 
 		    ih->ih_argument, ih->ih_name, ih->ih_flags);
 
 		if (!(ih->ih_flags & IH_MPSAFE))
 			mtx_lock(&Giant);
 		ih->ih_handler(ih->ih_argument);
 		if (!(ih->ih_flags & IH_MPSAFE))
 			mtx_unlock(&Giant);
 	}
 }
 
 static void
 ithread_execute_handlers(struct proc *p, struct intr_event *ie)
 {
 
 	/* Interrupt handlers should not sleep. */
 	if (!(ie->ie_flags & IE_SOFT))
 		THREAD_NO_SLEEPING();
 	intr_event_execute_handlers(p, ie);
 	if (!(ie->ie_flags & IE_SOFT))
 		THREAD_SLEEPING_OK();
 
 	/*
 	 * Interrupt storm handling:
 	 *
 	 * If this interrupt source is currently storming, then throttle
 	 * it to only fire the handler once  per clock tick.
 	 *
 	 * If this interrupt source is not currently storming, but the
 	 * number of back to back interrupts exceeds the storm threshold,
 	 * then enter storming mode.
 	 */
 	if (intr_storm_threshold != 0 && ie->ie_count >= intr_storm_threshold &&
 	    !(ie->ie_flags & IE_SOFT)) {
 		/* Report the message only once every second. */
 		if (ppsratecheck(&ie->ie_warntm, &ie->ie_warncnt, 1)) {
 			printf(
 	"interrupt storm detected on \"%s\"; throttling interrupt source\n",
 			    ie->ie_name);
 		}
 		pause("istorm", 1);
 	} else
 		ie->ie_count++;
 
 	/*
 	 * Now that all the handlers have had a chance to run, reenable
 	 * the interrupt source.
 	 */
 	if (ie->ie_post_ithread != NULL)
 		ie->ie_post_ithread(ie->ie_source);
 }
 
 #ifndef INTR_FILTER
 /*
  * This is the main code for interrupt threads.
  */
 static void
 ithread_loop(void *arg)
 {
 	struct intr_thread *ithd;
 	struct intr_event *ie;
 	struct thread *td;
 	struct proc *p;
 	int wake;
 
 	td = curthread;
 	p = td->td_proc;
 	ithd = (struct intr_thread *)arg;
 	KASSERT(ithd->it_thread == td,
 	    ("%s: ithread and proc linkage out of sync", __func__));
 	ie = ithd->it_event;
 	ie->ie_count = 0;
 	wake = 0;
 
 	/*
 	 * As long as we have interrupts outstanding, go through the
 	 * list of handlers, giving each one a go at it.
 	 */
 	for (;;) {
 		/*
 		 * If we are an orphaned thread, then just die.
 		 */
 		if (ithd->it_flags & IT_DEAD) {
 			CTR3(KTR_INTR, "%s: pid %d (%s) exiting", __func__,
 			    p->p_pid, td->td_name);
 			free(ithd, M_ITHREAD);
 			kthread_exit();
 		}
 
 		/*
 		 * Service interrupts.  If another interrupt arrives while
 		 * we are running, it will set it_need to note that we
 		 * should make another pass.
 		 */
 		while (atomic_load_acq_int(&ithd->it_need) != 0) {
 			/*
 			 * This might need a full read and write barrier
 			 * to make sure that this write posts before any
 			 * of the memory or device accesses in the
 			 * handlers.
 			 */
 			atomic_store_rel_int(&ithd->it_need, 0);
 			ithread_execute_handlers(p, ie);
 		}
 		WITNESS_WARN(WARN_PANIC, NULL, "suspending ithread");
 		mtx_assert(&Giant, MA_NOTOWNED);
 
 		/*
 		 * Processed all our interrupts.  Now get the sched
 		 * lock.  This may take a while and it_need may get
 		 * set again, so we have to check it again.
 		 */
 		thread_lock(td);
 		if ((atomic_load_acq_int(&ithd->it_need) == 0) &&
 		    !(ithd->it_flags & (IT_DEAD | IT_WAIT))) {
 			TD_SET_IWAIT(td);
 			ie->ie_count = 0;
 			mi_switch(SW_VOL | SWT_IWAIT, NULL);
 		}
 		if (ithd->it_flags & IT_WAIT) {
 			wake = 1;
 			ithd->it_flags &= ~IT_WAIT;
 		}
 		thread_unlock(td);
 		if (wake) {
 			wakeup(ithd);
 			wake = 0;
 		}
 	}
 }
 
 /*
  * Main interrupt handling body.
  *
  * Input:
  * o ie:                        the event connected to this interrupt.
  * o frame:                     some archs (i.e. i386) pass a frame to some.
  *                              handlers as their main argument.
  * Return value:
  * o 0:                         everything ok.
  * o EINVAL:                    stray interrupt.
  */
 int
 intr_event_handle(struct intr_event *ie, struct trapframe *frame)
 {
 	struct intr_handler *ih;
 	struct trapframe *oldframe;
 	struct thread *td;
 	int error, ret, thread;
 
 	td = curthread;
 
 #ifdef KSTACK_USAGE_PROF
 	intr_prof_stack_use(td, frame);
 #endif
 
 	/* An interrupt with no event or handlers is a stray interrupt. */
 	if (ie == NULL || TAILQ_EMPTY(&ie->ie_handlers))
 		return (EINVAL);
 
 	/*
 	 * Execute fast interrupt handlers directly.
 	 * To support clock handlers, if a handler registers
 	 * with a NULL argument, then we pass it a pointer to
 	 * a trapframe as its argument.
 	 */
 	td->td_intr_nesting_level++;
 	thread = 0;
 	ret = 0;
 	critical_enter();
 	oldframe = td->td_intr_frame;
 	td->td_intr_frame = frame;
 	TAILQ_FOREACH(ih, &ie->ie_handlers, ih_next) {
 		if (ih->ih_filter == NULL) {
 			thread = 1;
 			continue;
 		}
 		CTR4(KTR_INTR, "%s: exec %p(%p) for %s", __func__,
 		    ih->ih_filter, ih->ih_argument == NULL ? frame :
 		    ih->ih_argument, ih->ih_name);
 		if (ih->ih_argument == NULL)
 			ret = ih->ih_filter(frame);
 		else
 			ret = ih->ih_filter(ih->ih_argument);
 		KASSERT(ret == FILTER_STRAY ||
 		    ((ret & (FILTER_SCHEDULE_THREAD | FILTER_HANDLED)) != 0 &&
 		    (ret & ~(FILTER_SCHEDULE_THREAD | FILTER_HANDLED)) == 0),
 		    ("%s: incorrect return value %#x from %s", __func__, ret,
 		    ih->ih_name));
 
 		/* 
 		 * Wrapper handler special handling:
 		 *
 		 * in some particular cases (like pccard and pccbb), 
 		 * the _real_ device handler is wrapped in a couple of
 		 * functions - a filter wrapper and an ithread wrapper.
 		 * In this case (and just in this case), the filter wrapper 
 		 * could ask the system to schedule the ithread and mask
 		 * the interrupt source if the wrapped handler is composed
 		 * of just an ithread handler.
 		 *
 		 * TODO: write a generic wrapper to avoid people rolling 
 		 * their own
 		 */
 		if (!thread) {
 			if (ret == FILTER_SCHEDULE_THREAD)
 				thread = 1;
 		}
 	}
 	td->td_intr_frame = oldframe;
 
 	if (thread) {
 		if (ie->ie_pre_ithread != NULL)
 			ie->ie_pre_ithread(ie->ie_source);
 	} else {
 		if (ie->ie_post_filter != NULL)
 			ie->ie_post_filter(ie->ie_source);
 	}
 	
 	/* Schedule the ithread if needed. */
 	if (thread) {
 		error = intr_event_schedule_thread(ie);
-#ifndef XEN		
 		KASSERT(error == 0, ("bad stray interrupt"));
-#else
-		if (error != 0)
-			log(LOG_WARNING, "bad stray interrupt");
-#endif		
 	}
 	critical_exit();
 	td->td_intr_nesting_level--;
 	return (0);
 }
 #else
 /*
  * This is the main code for interrupt threads.
  */
 static void
 ithread_loop(void *arg)
 {
 	struct intr_thread *ithd;
 	struct intr_handler *ih;
 	struct intr_event *ie;
 	struct thread *td;
 	struct proc *p;
 	int priv;
 	int wake;
 
 	td = curthread;
 	p = td->td_proc;
 	ih = (struct intr_handler *)arg;
 	priv = (ih->ih_thread != NULL) ? 1 : 0;
 	ithd = (priv) ? ih->ih_thread : ih->ih_event->ie_thread;
 	KASSERT(ithd->it_thread == td,
 	    ("%s: ithread and proc linkage out of sync", __func__));
 	ie = ithd->it_event;
 	ie->ie_count = 0;
 	wake = 0;
 
 	/*
 	 * As long as we have interrupts outstanding, go through the
 	 * list of handlers, giving each one a go at it.
 	 */
 	for (;;) {
 		/*
 		 * If we are an orphaned thread, then just die.
 		 */
 		if (ithd->it_flags & IT_DEAD) {
 			CTR3(KTR_INTR, "%s: pid %d (%s) exiting", __func__,
 			    p->p_pid, td->td_name);
 			free(ithd, M_ITHREAD);
 			kthread_exit();
 		}
 
 		/*
 		 * Service interrupts.  If another interrupt arrives while
 		 * we are running, it will set it_need to note that we
 		 * should make another pass.
 		 */
 		while (atomic_load_acq_int(&ithd->it_need) != 0) {
 			/*
 			 * This might need a full read and write barrier
 			 * to make sure that this write posts before any
 			 * of the memory or device accesses in the
 			 * handlers.
 			 */
 			atomic_store_rel_int(&ithd->it_need, 0);
 			if (priv)
 				priv_ithread_execute_handler(p, ih);
 			else 
 				ithread_execute_handlers(p, ie);
 		}
 		WITNESS_WARN(WARN_PANIC, NULL, "suspending ithread");
 		mtx_assert(&Giant, MA_NOTOWNED);
 
 		/*
 		 * Processed all our interrupts.  Now get the sched
 		 * lock.  This may take a while and it_need may get
 		 * set again, so we have to check it again.
 		 */
 		thread_lock(td);
 		if ((atomic_load_acq_int(&ithd->it_need) == 0) &&
 		    !(ithd->it_flags & (IT_DEAD | IT_WAIT))) {
 			TD_SET_IWAIT(td);
 			ie->ie_count = 0;
 			mi_switch(SW_VOL | SWT_IWAIT, NULL);
 		}
 		if (ithd->it_flags & IT_WAIT) {
 			wake = 1;
 			ithd->it_flags &= ~IT_WAIT;
 		}
 		thread_unlock(td);
 		if (wake) {
 			wakeup(ithd);
 			wake = 0;
 		}
 	}
 }
 
 /* 
  * Main loop for interrupt filter.
  *
  * Some architectures (i386, amd64 and arm) require the optional frame 
  * parameter, and use it as the main argument for fast handler execution
  * when ih_argument == NULL.
  *
  * Return value:
  * o FILTER_STRAY:              No filter recognized the event, and no
  *                              filter-less handler is registered on this 
  *                              line.
  * o FILTER_HANDLED:            A filter claimed the event and served it.
  * o FILTER_SCHEDULE_THREAD:    No filter claimed the event, but there's at
  *                              least one filter-less handler on this line.
  * o FILTER_HANDLED | 
  *   FILTER_SCHEDULE_THREAD:    A filter claimed the event, and asked for
  *                              scheduling the per-handler ithread.
  *
  * In case an ithread has to be scheduled, in *ithd there will be a 
  * pointer to a struct intr_thread containing the thread to be
  * scheduled.
  */
 
 static int
 intr_filter_loop(struct intr_event *ie, struct trapframe *frame, 
 		 struct intr_thread **ithd) 
 {
 	struct intr_handler *ih;
 	void *arg;
 	int ret, thread_only;
 
 	ret = 0;
 	thread_only = 0;
 	TAILQ_FOREACH(ih, &ie->ie_handlers, ih_next) {
 		/*
 		 * Execute fast interrupt handlers directly.
 		 * To support clock handlers, if a handler registers
 		 * with a NULL argument, then we pass it a pointer to
 		 * a trapframe as its argument.
 		 */
 		arg = ((ih->ih_argument == NULL) ? frame : ih->ih_argument);
 		
 		CTR5(KTR_INTR, "%s: exec %p/%p(%p) for %s", __func__,
 		     ih->ih_filter, ih->ih_handler, arg, ih->ih_name);
 
 		if (ih->ih_filter != NULL)
 			ret = ih->ih_filter(arg);
 		else {
 			thread_only = 1;
 			continue;
 		}
 		KASSERT(ret == FILTER_STRAY ||
 		    ((ret & (FILTER_SCHEDULE_THREAD | FILTER_HANDLED)) != 0 &&
 		    (ret & ~(FILTER_SCHEDULE_THREAD | FILTER_HANDLED)) == 0),
 		    ("%s: incorrect return value %#x from %s", __func__, ret,
 		    ih->ih_name));
 		if (ret & FILTER_STRAY)
 			continue;
 		else { 
 			*ithd = ih->ih_thread;
 			return (ret);
 		}
 	}
 
 	/*
 	 * No filters handled the interrupt and we have at least
 	 * one handler without a filter.  In this case, we schedule
 	 * all of the filter-less handlers to run in the ithread.
 	 */	
 	if (thread_only) {
 		*ithd = ie->ie_thread;
 		return (FILTER_SCHEDULE_THREAD);
 	}
 	return (FILTER_STRAY);
 }
 
 /*
  * Main interrupt handling body.
  *
  * Input:
  * o ie:                        the event connected to this interrupt.
  * o frame:                     some archs (i.e. i386) pass a frame to some.
  *                              handlers as their main argument.
  * Return value:
  * o 0:                         everything ok.
  * o EINVAL:                    stray interrupt.
  */
 int
 intr_event_handle(struct intr_event *ie, struct trapframe *frame)
 {
 	struct intr_thread *ithd;
 	struct trapframe *oldframe;
 	struct thread *td;
 	int thread;
 
 	ithd = NULL;
 	td = curthread;
 
 	if (ie == NULL || TAILQ_EMPTY(&ie->ie_handlers))
 		return (EINVAL);
 
 	td->td_intr_nesting_level++;
 	thread = 0;
 	critical_enter();
 	oldframe = td->td_intr_frame;
 	td->td_intr_frame = frame;
 	thread = intr_filter_loop(ie, frame, &ithd);	
 	if (thread & FILTER_HANDLED) {
 		if (ie->ie_post_filter != NULL)
 			ie->ie_post_filter(ie->ie_source);
 	} else {
 		if (ie->ie_pre_ithread != NULL)
 			ie->ie_pre_ithread(ie->ie_source);
 	}
 	td->td_intr_frame = oldframe;
 	critical_exit();
 	
 	/* Interrupt storm logic */
 	if (thread & FILTER_STRAY) {
 		ie->ie_count++;
 		if (ie->ie_count < intr_storm_threshold)
 			printf("Interrupt stray detection not present\n");
 	}
 
 	/* Schedule an ithread if needed. */
 	if (thread & FILTER_SCHEDULE_THREAD) {
 		if (intr_event_schedule_thread(ie, ithd) != 0)
 			panic("%s: impossible stray interrupt", __func__);
 	}
 	td->td_intr_nesting_level--;
 	return (0);
 }
 #endif
 
 #ifdef DDB
 /*
  * Dump details about an interrupt handler
  */
 static void
 db_dump_intrhand(struct intr_handler *ih)
 {
 	int comma;
 
 	db_printf("\t%-10s ", ih->ih_name);
 	switch (ih->ih_pri) {
 	case PI_REALTIME:
 		db_printf("CLK ");
 		break;
 	case PI_AV:
 		db_printf("AV  ");
 		break;
 	case PI_TTY:
 		db_printf("TTY ");
 		break;
 	case PI_NET:
 		db_printf("NET ");
 		break;
 	case PI_DISK:
 		db_printf("DISK");
 		break;
 	case PI_DULL:
 		db_printf("DULL");
 		break;
 	default:
 		if (ih->ih_pri >= PI_SOFT)
 			db_printf("SWI ");
 		else
 			db_printf("%4u", ih->ih_pri);
 		break;
 	}
 	db_printf(" ");
 	if (ih->ih_filter != NULL) {
 		db_printf("[F]");
 		db_printsym((uintptr_t)ih->ih_filter, DB_STGY_PROC);
 	}
 	if (ih->ih_handler != NULL) {
 		if (ih->ih_filter != NULL)
 			db_printf(",");
 		db_printf("[H]");
 		db_printsym((uintptr_t)ih->ih_handler, DB_STGY_PROC);
 	}
 	db_printf("(%p)", ih->ih_argument);
 	if (ih->ih_need ||
 	    (ih->ih_flags & (IH_EXCLUSIVE | IH_ENTROPY | IH_DEAD |
 	    IH_MPSAFE)) != 0) {
 		db_printf(" {");
 		comma = 0;
 		if (ih->ih_flags & IH_EXCLUSIVE) {
 			if (comma)
 				db_printf(", ");
 			db_printf("EXCL");
 			comma = 1;
 		}
 		if (ih->ih_flags & IH_ENTROPY) {
 			if (comma)
 				db_printf(", ");
 			db_printf("ENTROPY");
 			comma = 1;
 		}
 		if (ih->ih_flags & IH_DEAD) {
 			if (comma)
 				db_printf(", ");
 			db_printf("DEAD");
 			comma = 1;
 		}
 		if (ih->ih_flags & IH_MPSAFE) {
 			if (comma)
 				db_printf(", ");
 			db_printf("MPSAFE");
 			comma = 1;
 		}
 		if (ih->ih_need) {
 			if (comma)
 				db_printf(", ");
 			db_printf("NEED");
 		}
 		db_printf("}");
 	}
 	db_printf("\n");
 }
 
 /*
  * Dump details about a event.
  */
 void
 db_dump_intr_event(struct intr_event *ie, int handlers)
 {
 	struct intr_handler *ih;
 	struct intr_thread *it;
 	int comma;
 
 	db_printf("%s ", ie->ie_fullname);
 	it = ie->ie_thread;
 	if (it != NULL)
 		db_printf("(pid %d)", it->it_thread->td_proc->p_pid);
 	else
 		db_printf("(no thread)");
 	if ((ie->ie_flags & (IE_SOFT | IE_ENTROPY | IE_ADDING_THREAD)) != 0 ||
 	    (it != NULL && it->it_need)) {
 		db_printf(" {");
 		comma = 0;
 		if (ie->ie_flags & IE_SOFT) {
 			db_printf("SOFT");
 			comma = 1;
 		}
 		if (ie->ie_flags & IE_ENTROPY) {
 			if (comma)
 				db_printf(", ");
 			db_printf("ENTROPY");
 			comma = 1;
 		}
 		if (ie->ie_flags & IE_ADDING_THREAD) {
 			if (comma)
 				db_printf(", ");
 			db_printf("ADDING_THREAD");
 			comma = 1;
 		}
 		if (it != NULL && it->it_need) {
 			if (comma)
 				db_printf(", ");
 			db_printf("NEED");
 		}
 		db_printf("}");
 	}
 	db_printf("\n");
 
 	if (handlers)
 		TAILQ_FOREACH(ih, &ie->ie_handlers, ih_next)
 		    db_dump_intrhand(ih);
 }
 
 /*
  * Dump data about interrupt handlers
  */
 DB_SHOW_COMMAND(intr, db_show_intr)
 {
 	struct intr_event *ie;
 	int all, verbose;
 
 	verbose = strchr(modif, 'v') != NULL;
 	all = strchr(modif, 'a') != NULL;
 	TAILQ_FOREACH(ie, &event_list, ie_list) {
 		if (!all && TAILQ_EMPTY(&ie->ie_handlers))
 			continue;
 		db_dump_intr_event(ie, verbose);
 		if (db_pager_quit)
 			break;
 	}
 }
 #endif /* DDB */
 
 /*
  * Start standard software interrupt threads
  */
 static void
 start_softintr(void *dummy)
 {
 
 	if (swi_add(NULL, "vm", swi_vm, NULL, SWI_VM, INTR_MPSAFE, &vm_ih))
 		panic("died while creating vm swi ithread");
 }
 SYSINIT(start_softintr, SI_SUB_SOFTINTR, SI_ORDER_FIRST, start_softintr,
     NULL);
 
 /*
  * Sysctls used by systat and others: hw.intrnames and hw.intrcnt.
  * The data for this machine dependent, and the declarations are in machine
  * dependent code.  The layout of intrnames and intrcnt however is machine
  * independent.
  *
  * We do not know the length of intrcnt and intrnames at compile time, so
  * calculate things at run time.
  */
 static int
 sysctl_intrnames(SYSCTL_HANDLER_ARGS)
 {
 	return (sysctl_handle_opaque(oidp, intrnames, sintrnames, req));
 }
 
 SYSCTL_PROC(_hw, OID_AUTO, intrnames, CTLTYPE_OPAQUE | CTLFLAG_RD,
     NULL, 0, sysctl_intrnames, "", "Interrupt Names");
 
 static int
 sysctl_intrcnt(SYSCTL_HANDLER_ARGS)
 {
 #ifdef SCTL_MASK32
 	uint32_t *intrcnt32;
 	unsigned i;
 	int error;
 
 	if (req->flags & SCTL_MASK32) {
 		if (!req->oldptr)
 			return (sysctl_handle_opaque(oidp, NULL, sintrcnt / 2, req));
 		intrcnt32 = malloc(sintrcnt / 2, M_TEMP, M_NOWAIT);
 		if (intrcnt32 == NULL)
 			return (ENOMEM);
 		for (i = 0; i < sintrcnt / sizeof (u_long); i++)
 			intrcnt32[i] = intrcnt[i];
 		error = sysctl_handle_opaque(oidp, intrcnt32, sintrcnt / 2, req);
 		free(intrcnt32, M_TEMP);
 		return (error);
 	}
 #endif
 	return (sysctl_handle_opaque(oidp, intrcnt, sintrcnt, req));
 }
 
 SYSCTL_PROC(_hw, OID_AUTO, intrcnt, CTLTYPE_OPAQUE | CTLFLAG_RD,
     NULL, 0, sysctl_intrcnt, "", "Interrupt Counts");
 
 #ifdef DDB
 /*
  * DDB command to dump the interrupt statistics.
  */
 DB_SHOW_COMMAND(intrcnt, db_show_intrcnt)
 {
 	u_long *i;
 	char *cp;
 	u_int j;
 
 	cp = intrnames;
 	j = 0;
 	for (i = intrcnt; j < (sintrcnt / sizeof(u_long)) && !db_pager_quit;
 	    i++, j++) {
 		if (*cp == '\0')
 			break;
 		if (*i != 0)
 			db_printf("%s\t%lu\n", cp, *i);
 		cp += strlen(cp) + 1;
 	}
 }
 #endif
Index: head/sys/kern/kern_synch.c
===================================================================
--- head/sys/kern/kern_synch.c	(revision 282273)
+++ head/sys/kern/kern_synch.c	(revision 282274)
@@ -1,620 +1,611 @@
 /*-
  * Copyright (c) 1982, 1986, 1990, 1991, 1993
  *	The Regents of the University of California.  All rights reserved.
  * (c) UNIX System Laboratories, Inc.
  * All or some portions of this file are derived from material licensed
  * to the University of California by American Telephone and Telegraph
  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
  * the permission of UNIX System Laboratories, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)kern_synch.c	8.9 (Berkeley) 5/19/95
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_ktrace.h"
 #include "opt_sched.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/condvar.h>
 #include <sys/kdb.h>
 #include <sys/kernel.h>
 #include <sys/ktr.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/resourcevar.h>
 #include <sys/sched.h>
 #include <sys/sdt.h>
 #include <sys/signalvar.h>
 #include <sys/sleepqueue.h>
 #include <sys/smp.h>
 #include <sys/sx.h>
 #include <sys/sysctl.h>
 #include <sys/sysproto.h>
 #include <sys/vmmeter.h>
 #ifdef KTRACE
 #include <sys/uio.h>
 #include <sys/ktrace.h>
 #endif
 
 #include <machine/cpu.h>
 
-#ifdef XEN
-#include <vm/vm.h>
-#include <vm/vm_param.h>
-#include <vm/pmap.h>
-#endif
-
 #define	KTDSTATE(td)							\
 	(((td)->td_inhibitors & TDI_SLEEPING) != 0 ? "sleep"  :		\
 	((td)->td_inhibitors & TDI_SUSPENDED) != 0 ? "suspended" :	\
 	((td)->td_inhibitors & TDI_SWAPPED) != 0 ? "swapped" :		\
 	((td)->td_inhibitors & TDI_LOCK) != 0 ? "blocked" :		\
 	((td)->td_inhibitors & TDI_IWAIT) != 0 ? "iwait" : "yielding")
 
 static void synch_setup(void *dummy);
 SYSINIT(synch_setup, SI_SUB_KICK_SCHEDULER, SI_ORDER_FIRST, synch_setup,
     NULL);
 
 int	hogticks;
 static uint8_t pause_wchan[MAXCPU];
 
 static struct callout loadav_callout;
 
 struct loadavg averunnable =
 	{ {0, 0, 0}, FSCALE };	/* load average, of runnable procs */
 /*
  * Constants for averages over 1, 5, and 15 minutes
  * when sampling at 5 second intervals.
  */
 static fixpt_t cexp[3] = {
 	0.9200444146293232 * FSCALE,	/* exp(-1/12) */
 	0.9834714538216174 * FSCALE,	/* exp(-1/60) */
 	0.9944598480048967 * FSCALE,	/* exp(-1/180) */
 };
 
 /* kernel uses `FSCALE', userland (SHOULD) use kern.fscale */
 SYSCTL_INT(_kern, OID_AUTO, fscale, CTLFLAG_RD, SYSCTL_NULL_INT_PTR, FSCALE, "");
 
 static void	loadav(void *arg);
 
 SDT_PROVIDER_DECLARE(sched);
 SDT_PROBE_DEFINE(sched, , , preempt);
 
 static void
 sleepinit(void *unused)
 {
 
 	hogticks = (hz / 10) * 2;	/* Default only. */
 	init_sleepqueues();
 }
 
 /*
  * vmem tries to lock the sleepq mutexes when free'ing kva, so make sure
  * it is available.
  */
 SYSINIT(sleepinit, SI_SUB_KMEM, SI_ORDER_ANY, sleepinit, 0);
 
 /*
  * General sleep call.  Suspends the current thread until a wakeup is
  * performed on the specified identifier.  The thread will then be made
  * runnable with the specified priority.  Sleeps at most sbt units of time
  * (0 means no timeout).  If pri includes the PCATCH flag, let signals
  * interrupt the sleep, otherwise ignore them while sleeping.  Returns 0 if
  * awakened, EWOULDBLOCK if the timeout expires.  If PCATCH is set and a
  * signal becomes pending, ERESTART is returned if the current system
  * call should be restarted if possible, and EINTR is returned if the system
  * call should be interrupted by the signal (return EINTR).
  *
  * The lock argument is unlocked before the caller is suspended, and
  * re-locked before _sleep() returns.  If priority includes the PDROP
  * flag the lock is not re-locked before returning.
  */
 int
 _sleep(void *ident, struct lock_object *lock, int priority,
     const char *wmesg, sbintime_t sbt, sbintime_t pr, int flags)
 {
 	struct thread *td;
 	struct proc *p;
 	struct lock_class *class;
 	uintptr_t lock_state;
 	int catch, pri, rval, sleepq_flags;
 	WITNESS_SAVE_DECL(lock_witness);
 
 	td = curthread;
 	p = td->td_proc;
 #ifdef KTRACE
 	if (KTRPOINT(td, KTR_CSW))
 		ktrcsw(1, 0, wmesg);
 #endif
 	WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, lock,
 	    "Sleeping on \"%s\"", wmesg);
 	KASSERT(sbt != 0 || mtx_owned(&Giant) || lock != NULL,
 	    ("sleeping without a lock"));
 	KASSERT(p != NULL, ("msleep1"));
 	KASSERT(ident != NULL && TD_IS_RUNNING(td), ("msleep"));
 	if (priority & PDROP)
 		KASSERT(lock != NULL && lock != &Giant.lock_object,
 		    ("PDROP requires a non-Giant lock"));
 	if (lock != NULL)
 		class = LOCK_CLASS(lock);
 	else
 		class = NULL;
 
 	if (cold || SCHEDULER_STOPPED()) {
 		/*
 		 * During autoconfiguration, just return;
 		 * don't run any other threads or panic below,
 		 * in case this is the idle thread and already asleep.
 		 * XXX: this used to do "s = splhigh(); splx(safepri);
 		 * splx(s);" to give interrupts a chance, but there is
 		 * no way to give interrupts a chance now.
 		 */
 		if (lock != NULL && priority & PDROP)
 			class->lc_unlock(lock);
 		return (0);
 	}
 	catch = priority & PCATCH;
 	pri = priority & PRIMASK;
 
 	/*
 	 * If we are already on a sleep queue, then remove us from that
 	 * sleep queue first.  We have to do this to handle recursive
 	 * sleeps.
 	 */
 	if (TD_ON_SLEEPQ(td))
 		sleepq_remove(td, td->td_wchan);
 
 	if ((uint8_t *)ident >= &pause_wchan[0] &&
 	    (uint8_t *)ident <= &pause_wchan[MAXCPU - 1])
 		sleepq_flags = SLEEPQ_PAUSE;
 	else
 		sleepq_flags = SLEEPQ_SLEEP;
 	if (catch)
 		sleepq_flags |= SLEEPQ_INTERRUPTIBLE;
 
 	sleepq_lock(ident);
 	CTR5(KTR_PROC, "sleep: thread %ld (pid %ld, %s) on %s (%p)",
 	    td->td_tid, p->p_pid, td->td_name, wmesg, ident);
 
 	if (lock == &Giant.lock_object)
 		mtx_assert(&Giant, MA_OWNED);
 	DROP_GIANT();
 	if (lock != NULL && lock != &Giant.lock_object &&
 	    !(class->lc_flags & LC_SLEEPABLE)) {
 		WITNESS_SAVE(lock, lock_witness);
 		lock_state = class->lc_unlock(lock);
 	} else
 		/* GCC needs to follow the Yellow Brick Road */
 		lock_state = -1;
 
 	/*
 	 * We put ourselves on the sleep queue and start our timeout
 	 * before calling thread_suspend_check, as we could stop there,
 	 * and a wakeup or a SIGCONT (or both) could occur while we were
 	 * stopped without resuming us.  Thus, we must be ready for sleep
 	 * when cursig() is called.  If the wakeup happens while we're
 	 * stopped, then td will no longer be on a sleep queue upon
 	 * return from cursig().
 	 */
 	sleepq_add(ident, lock, wmesg, sleepq_flags, 0);
 	if (sbt != 0)
 		sleepq_set_timeout_sbt(ident, sbt, pr, flags);
 	if (lock != NULL && class->lc_flags & LC_SLEEPABLE) {
 		sleepq_release(ident);
 		WITNESS_SAVE(lock, lock_witness);
 		lock_state = class->lc_unlock(lock);
 		sleepq_lock(ident);
 	}
 	if (sbt != 0 && catch)
 		rval = sleepq_timedwait_sig(ident, pri);
 	else if (sbt != 0)
 		rval = sleepq_timedwait(ident, pri);
 	else if (catch)
 		rval = sleepq_wait_sig(ident, pri);
 	else {
 		sleepq_wait(ident, pri);
 		rval = 0;
 	}
 #ifdef KTRACE
 	if (KTRPOINT(td, KTR_CSW))
 		ktrcsw(0, 0, wmesg);
 #endif
 	PICKUP_GIANT();
 	if (lock != NULL && lock != &Giant.lock_object && !(priority & PDROP)) {
 		class->lc_lock(lock, lock_state);
 		WITNESS_RESTORE(lock, lock_witness);
 	}
 	return (rval);
 }
 
 int
 msleep_spin_sbt(void *ident, struct mtx *mtx, const char *wmesg,
     sbintime_t sbt, sbintime_t pr, int flags)
 {
 	struct thread *td;
 	struct proc *p;
 	int rval;
 	WITNESS_SAVE_DECL(mtx);
 
 	td = curthread;
 	p = td->td_proc;
 	KASSERT(mtx != NULL, ("sleeping without a mutex"));
 	KASSERT(p != NULL, ("msleep1"));
 	KASSERT(ident != NULL && TD_IS_RUNNING(td), ("msleep"));
 
 	if (cold || SCHEDULER_STOPPED()) {
 		/*
 		 * During autoconfiguration, just return;
 		 * don't run any other threads or panic below,
 		 * in case this is the idle thread and already asleep.
 		 * XXX: this used to do "s = splhigh(); splx(safepri);
 		 * splx(s);" to give interrupts a chance, but there is
 		 * no way to give interrupts a chance now.
 		 */
 		return (0);
 	}
 
 	sleepq_lock(ident);
 	CTR5(KTR_PROC, "msleep_spin: thread %ld (pid %ld, %s) on %s (%p)",
 	    td->td_tid, p->p_pid, td->td_name, wmesg, ident);
 
 	DROP_GIANT();
 	mtx_assert(mtx, MA_OWNED | MA_NOTRECURSED);
 	WITNESS_SAVE(&mtx->lock_object, mtx);
 	mtx_unlock_spin(mtx);
 
 	/*
 	 * We put ourselves on the sleep queue and start our timeout.
 	 */
 	sleepq_add(ident, &mtx->lock_object, wmesg, SLEEPQ_SLEEP, 0);
 	if (sbt != 0)
 		sleepq_set_timeout_sbt(ident, sbt, pr, flags);
 
 	/*
 	 * Can't call ktrace with any spin locks held so it can lock the
 	 * ktrace_mtx lock, and WITNESS_WARN considers it an error to hold
 	 * any spin lock.  Thus, we have to drop the sleepq spin lock while
 	 * we handle those requests.  This is safe since we have placed our
 	 * thread on the sleep queue already.
 	 */
 #ifdef KTRACE
 	if (KTRPOINT(td, KTR_CSW)) {
 		sleepq_release(ident);
 		ktrcsw(1, 0, wmesg);
 		sleepq_lock(ident);
 	}
 #endif
 #ifdef WITNESS
 	sleepq_release(ident);
 	WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, "Sleeping on \"%s\"",
 	    wmesg);
 	sleepq_lock(ident);
 #endif
 	if (sbt != 0)
 		rval = sleepq_timedwait(ident, 0);
 	else {
 		sleepq_wait(ident, 0);
 		rval = 0;
 	}
 #ifdef KTRACE
 	if (KTRPOINT(td, KTR_CSW))
 		ktrcsw(0, 0, wmesg);
 #endif
 	PICKUP_GIANT();
 	mtx_lock_spin(mtx);
 	WITNESS_RESTORE(&mtx->lock_object, mtx);
 	return (rval);
 }
 
 /*
  * pause() delays the calling thread by the given number of system ticks.
  * During cold bootup, pause() uses the DELAY() function instead of
  * the tsleep() function to do the waiting. The "timo" argument must be
  * greater than or equal to zero. A "timo" value of zero is equivalent
  * to a "timo" value of one.
  */
 int
 pause_sbt(const char *wmesg, sbintime_t sbt, sbintime_t pr, int flags)
 {
 	KASSERT(sbt >= 0, ("pause: timeout must be >= 0"));
 
 	/* silently convert invalid timeouts */
 	if (sbt == 0)
 		sbt = tick_sbt;
 
 	if (cold || kdb_active) {
 		/*
 		 * We delay one second at a time to avoid overflowing the
 		 * system specific DELAY() function(s):
 		 */
 		while (sbt >= SBT_1S) {
 			DELAY(1000000);
 			sbt -= SBT_1S;
 		}
 		/* Do the delay remainder, if any */
 		sbt = (sbt + SBT_1US - 1) / SBT_1US;
 		if (sbt > 0)
 			DELAY(sbt);
 		return (0);
 	}
 	return (_sleep(&pause_wchan[curcpu], NULL, 0, wmesg, sbt, pr, flags));
 }
 
 /*
  * Make all threads sleeping on the specified identifier runnable.
  */
 void
 wakeup(void *ident)
 {
 	int wakeup_swapper;
 
 	sleepq_lock(ident);
 	wakeup_swapper = sleepq_broadcast(ident, SLEEPQ_SLEEP, 0, 0);
 	sleepq_release(ident);
 	if (wakeup_swapper) {
 		KASSERT(ident != &proc0,
 		    ("wakeup and wakeup_swapper and proc0"));
 		kick_proc0();
 	}
 }
 
 /*
  * Make a thread sleeping on the specified identifier runnable.
  * May wake more than one thread if a target thread is currently
  * swapped out.
  */
 void
 wakeup_one(void *ident)
 {
 	int wakeup_swapper;
 
 	sleepq_lock(ident);
 	wakeup_swapper = sleepq_signal(ident, SLEEPQ_SLEEP, 0, 0);
 	sleepq_release(ident);
 	if (wakeup_swapper)
 		kick_proc0();
 }
 
 static void
 kdb_switch(void)
 {
 	thread_unlock(curthread);
 	kdb_backtrace();
 	kdb_reenter();
 	panic("%s: did not reenter debugger", __func__);
 }
 
 /*
  * The machine independent parts of context switching.
  */
 void
 mi_switch(int flags, struct thread *newtd)
 {
 	uint64_t runtime, new_switchtime;
 	struct thread *td;
 	struct proc *p;
 
 	td = curthread;			/* XXX */
 	THREAD_LOCK_ASSERT(td, MA_OWNED | MA_NOTRECURSED);
 	p = td->td_proc;		/* XXX */
 	KASSERT(!TD_ON_RUNQ(td), ("mi_switch: called by old code"));
 #ifdef INVARIANTS
 	if (!TD_ON_LOCK(td) && !TD_IS_RUNNING(td))
 		mtx_assert(&Giant, MA_NOTOWNED);
 #endif
 	KASSERT(td->td_critnest == 1 || panicstr,
 	    ("mi_switch: switch in a critical section"));
 	KASSERT((flags & (SW_INVOL | SW_VOL)) != 0,
 	    ("mi_switch: switch must be voluntary or involuntary"));
 	KASSERT(newtd != curthread, ("mi_switch: preempting back to ourself"));
 
 	/*
 	 * Don't perform context switches from the debugger.
 	 */
 	if (kdb_active)
 		kdb_switch();
 	if (SCHEDULER_STOPPED())
 		return;
 	if (flags & SW_VOL) {
 		td->td_ru.ru_nvcsw++;
 		td->td_swvoltick = ticks;
 	} else
 		td->td_ru.ru_nivcsw++;
 #ifdef SCHED_STATS
 	SCHED_STAT_INC(sched_switch_stats[flags & SW_TYPE_MASK]);
 #endif
 	/*
 	 * Compute the amount of time during which the current
 	 * thread was running, and add that to its total so far.
 	 */
 	new_switchtime = cpu_ticks();
 	runtime = new_switchtime - PCPU_GET(switchtime);
 	td->td_runtime += runtime;
 	td->td_incruntime += runtime;
 	PCPU_SET(switchtime, new_switchtime);
 	td->td_generation++;	/* bump preempt-detect counter */
 	PCPU_INC(cnt.v_swtch);
 	PCPU_SET(switchticks, ticks);
 	CTR4(KTR_PROC, "mi_switch: old thread %ld (td_sched %p, pid %ld, %s)",
 	    td->td_tid, td->td_sched, p->p_pid, td->td_name);
 #if (KTR_COMPILE & KTR_SCHED) != 0
 	if (TD_IS_IDLETHREAD(td))
 		KTR_STATE1(KTR_SCHED, "thread", sched_tdname(td), "idle",
 		    "prio:%d", td->td_priority);
 	else
 		KTR_STATE3(KTR_SCHED, "thread", sched_tdname(td), KTDSTATE(td),
 		    "prio:%d", td->td_priority, "wmesg:\"%s\"", td->td_wmesg,
 		    "lockname:\"%s\"", td->td_lockname);
 #endif
 	SDT_PROBE0(sched, , , preempt);
-#ifdef XEN
-	PT_UPDATES_FLUSH();
-#endif
 	sched_switch(td, newtd, flags);
 	KTR_STATE1(KTR_SCHED, "thread", sched_tdname(td), "running",
 	    "prio:%d", td->td_priority);
 
 	CTR4(KTR_PROC, "mi_switch: new thread %ld (td_sched %p, pid %ld, %s)",
 	    td->td_tid, td->td_sched, p->p_pid, td->td_name);
 
 	/* 
 	 * If the last thread was exiting, finish cleaning it up.
 	 */
 	if ((td = PCPU_GET(deadthread))) {
 		PCPU_SET(deadthread, NULL);
 		thread_stash(td);
 	}
 }
 
 /*
  * Change thread state to be runnable, placing it on the run queue if
  * it is in memory.  If it is swapped out, return true so our caller
  * will know to awaken the swapper.
  */
 int
 setrunnable(struct thread *td)
 {
 
 	THREAD_LOCK_ASSERT(td, MA_OWNED);
 	KASSERT(td->td_proc->p_state != PRS_ZOMBIE,
 	    ("setrunnable: pid %d is a zombie", td->td_proc->p_pid));
 	switch (td->td_state) {
 	case TDS_RUNNING:
 	case TDS_RUNQ:
 		return (0);
 	case TDS_INHIBITED:
 		/*
 		 * If we are only inhibited because we are swapped out
 		 * then arange to swap in this process. Otherwise just return.
 		 */
 		if (td->td_inhibitors != TDI_SWAPPED)
 			return (0);
 		/* FALLTHROUGH */
 	case TDS_CAN_RUN:
 		break;
 	default:
 		printf("state is 0x%x", td->td_state);
 		panic("setrunnable(2)");
 	}
 	if ((td->td_flags & TDF_INMEM) == 0) {
 		if ((td->td_flags & TDF_SWAPINREQ) == 0) {
 			td->td_flags |= TDF_SWAPINREQ;
 			return (1);
 		}
 	} else
 		sched_wakeup(td);
 	return (0);
 }
 
 /*
  * Compute a tenex style load average of a quantity on
  * 1, 5 and 15 minute intervals.
  */
 static void
 loadav(void *arg)
 {
 	int i, nrun;
 	struct loadavg *avg;
 
 	nrun = sched_load();
 	avg = &averunnable;
 
 	for (i = 0; i < 3; i++)
 		avg->ldavg[i] = (cexp[i] * avg->ldavg[i] +
 		    nrun * FSCALE * (FSCALE - cexp[i])) >> FSHIFT;
 
 	/*
 	 * Schedule the next update to occur after 5 seconds, but add a
 	 * random variation to avoid synchronisation with processes that
 	 * run at regular intervals.
 	 */
 	callout_reset_sbt(&loadav_callout,
 	    SBT_1US * (4000000 + (int)(random() % 2000001)), SBT_1US,
 	    loadav, NULL, C_DIRECT_EXEC | C_PREL(32));
 }
 
 /* ARGSUSED */
 static void
 synch_setup(void *dummy)
 {
 	callout_init(&loadav_callout, CALLOUT_MPSAFE);
 
 	/* Kick off timeout driven events by calling first time. */
 	loadav(NULL);
 }
 
 int
 should_yield(void)
 {
 
 	return ((u_int)ticks - (u_int)curthread->td_swvoltick >= hogticks);
 }
 
 void
 maybe_yield(void)
 {
 
 	if (should_yield())
 		kern_yield(PRI_USER);
 }
 
 void
 kern_yield(int prio)
 {
 	struct thread *td;
 
 	td = curthread;
 	DROP_GIANT();
 	thread_lock(td);
 	if (prio == PRI_USER)
 		prio = td->td_user_pri;
 	if (prio >= 0)
 		sched_prio(td, prio);
 	mi_switch(SW_VOL | SWT_RELINQUISH, NULL);
 	thread_unlock(td);
 	PICKUP_GIANT();
 }
 
 /*
  * General purpose yield system call.
  */
 int
 sys_yield(struct thread *td, struct yield_args *uap)
 {
 
 	thread_lock(td);
 	if (PRI_BASE(td->td_pri_class) == PRI_TIMESHARE)
 		sched_prio(td, PRI_MAX_TIMESHARE);
 	mi_switch(SW_VOL | SWT_RELINQUISH, NULL);
 	thread_unlock(td);
 	td->td_retval[0] = 0;
 	return (0);
 }
Index: head/sys/kern/subr_param.c
===================================================================
--- head/sys/kern/subr_param.c	(revision 282273)
+++ head/sys/kern/subr_param.c	(revision 282274)
@@ -1,304 +1,300 @@
 /*-
  * Copyright (c) 1980, 1986, 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
  * (c) UNIX System Laboratories, Inc.
  * All or some portions of this file are derived from material licensed
  * to the University of California by American Telephone and Telegraph
  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
  * the permission of UNIX System Laboratories, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)param.c	8.3 (Berkeley) 8/20/94
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_param.h"
 #include "opt_msgbuf.h"
 #include "opt_maxusers.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/limits.h>
 #include <sys/msgbuf.h>
 #include <sys/sysctl.h>
 #include <sys/proc.h>
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/pmap.h>
 
 /*
  * System parameter formulae.
  */
 
 #ifndef HZ
 #  if defined(__mips__) || defined(__arm__)
 #    define	HZ 100
 #  else
 #    define	HZ 1000
 #  endif
 #  ifndef HZ_VM
 #    define	HZ_VM 100
 #  endif
 #else
 #  ifndef HZ_VM
 #    define	HZ_VM HZ
 #  endif
 #endif
 #define	NPROC (20 + 16 * maxusers)
 #ifndef NBUF
 #define NBUF 0
 #endif
 #ifndef MAXFILES
 #define	MAXFILES (maxproc * 2)
 #endif
 
 static int sysctl_kern_vm_guest(SYSCTL_HANDLER_ARGS);
 
 int	hz;				/* system clock's frequency */
 int	tick;				/* usec per tick (1000000 / hz) */
 struct bintime tick_bt;			/* bintime per tick (1s / hz) */
 sbintime_t tick_sbt;
 int	maxusers;			/* base tunable */
 int	maxproc;			/* maximum # of processes */
 int	maxprocperuid;			/* max # of procs per user */
 int	maxfiles;			/* sys. wide open files limit */
 int	maxfilesperproc;		/* per-proc open files limit */
 int	msgbufsize;			/* size of kernel message buffer */
 int	nbuf;
 int	bio_transient_maxcnt;
 int	ngroups_max;			/* max # groups per process */
 int	nswbuf;
 pid_t	pid_max = PID_MAX;
 long	maxswzone;			/* max swmeta KVA storage */
 long	maxbcache;			/* max buffer cache KVA storage */
 long	maxpipekva;			/* Limit on pipe KVA */
-#ifdef XEN
-int	vm_guest = VM_GUEST_XEN;
-#else
 int	vm_guest = VM_GUEST_NO;		/* Running as virtual machine guest? */
-#endif
 u_long	maxtsiz;			/* max text size */
 u_long	dfldsiz;			/* initial data size limit */
 u_long	maxdsiz;			/* max data size */
 u_long	dflssiz;			/* initial stack size limit */
 u_long	maxssiz;			/* max stack size */
 u_long	sgrowsiz;			/* amount to grow stack */
 
 SYSCTL_INT(_kern, OID_AUTO, hz, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &hz, 0,
     "Number of clock ticks per second");
 SYSCTL_INT(_kern, OID_AUTO, nbuf, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &nbuf, 0,
     "Number of buffers in the buffer cache");
 SYSCTL_INT(_kern, OID_AUTO, nswbuf, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &nswbuf, 0,
     "Number of swap buffers");
 SYSCTL_INT(_kern, OID_AUTO, msgbufsize, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &msgbufsize, 0,
     "Size of the kernel message buffer");
 SYSCTL_LONG(_kern, OID_AUTO, maxswzone, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &maxswzone, 0,
     "Maximum memory for swap metadata");
 SYSCTL_LONG(_kern, OID_AUTO, maxbcache, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &maxbcache, 0,
     "Maximum value of vfs.maxbufspace");
 SYSCTL_INT(_kern, OID_AUTO, bio_transient_maxcnt, CTLFLAG_RDTUN | CTLFLAG_NOFETCH,
     &bio_transient_maxcnt, 0,
     "Maximum number of transient BIOs mappings");
 SYSCTL_ULONG(_kern, OID_AUTO, maxtsiz, CTLFLAG_RWTUN | CTLFLAG_NOFETCH, &maxtsiz, 0,
     "Maximum text size");
 SYSCTL_ULONG(_kern, OID_AUTO, dfldsiz, CTLFLAG_RWTUN | CTLFLAG_NOFETCH, &dfldsiz, 0,
     "Initial data size limit");
 SYSCTL_ULONG(_kern, OID_AUTO, maxdsiz, CTLFLAG_RWTUN | CTLFLAG_NOFETCH, &maxdsiz, 0,
     "Maximum data size");
 SYSCTL_ULONG(_kern, OID_AUTO, dflssiz, CTLFLAG_RWTUN | CTLFLAG_NOFETCH, &dflssiz, 0,
     "Initial stack size limit");
 SYSCTL_ULONG(_kern, OID_AUTO, maxssiz, CTLFLAG_RWTUN | CTLFLAG_NOFETCH, &maxssiz, 0,
     "Maximum stack size");
 SYSCTL_ULONG(_kern, OID_AUTO, sgrowsiz, CTLFLAG_RWTUN | CTLFLAG_NOFETCH, &sgrowsiz, 0,
     "Amount to grow stack on a stack fault");
 SYSCTL_PROC(_kern, OID_AUTO, vm_guest, CTLFLAG_RD | CTLTYPE_STRING,
     NULL, 0, sysctl_kern_vm_guest, "A",
     "Virtual machine guest detected?");
 
 /*
  * These have to be allocated somewhere; allocating
  * them here forces loader errors if this file is omitted
  * (if they've been externed everywhere else; hah!).
  */
 struct	buf *swbuf;
 
 /*
  * The elements of this array are ordered based upon the values of the
  * corresponding enum VM_GUEST members.
  */
 static const char *const vm_guest_sysctl_names[] = {
 	"none",
 	"generic",
 	"xen",
 	"hv",
 	"vmware",
 	NULL
 };
 CTASSERT(nitems(vm_guest_sysctl_names) - 1 == VM_LAST);
 
 /*
  * Boot time overrides that are not scaled against main memory
  */
 void
 init_param1(void)
 {
 
 	hz = -1;
 	TUNABLE_INT_FETCH("kern.hz", &hz);
 	if (hz == -1)
 		hz = vm_guest > VM_GUEST_NO ? HZ_VM : HZ;
 	tick = 1000000 / hz;
 	tick_sbt = SBT_1S / hz;
 	tick_bt = sbttobt(tick_sbt);
 
 #ifdef VM_SWZONE_SIZE_MAX
 	maxswzone = VM_SWZONE_SIZE_MAX;
 #endif
 	TUNABLE_LONG_FETCH("kern.maxswzone", &maxswzone);
 #ifdef VM_BCACHE_SIZE_MAX
 	maxbcache = VM_BCACHE_SIZE_MAX;
 #endif
 	TUNABLE_LONG_FETCH("kern.maxbcache", &maxbcache);
 	msgbufsize = MSGBUF_SIZE;
 	TUNABLE_INT_FETCH("kern.msgbufsize", &msgbufsize);
 
 	maxtsiz = MAXTSIZ;
 	TUNABLE_ULONG_FETCH("kern.maxtsiz", &maxtsiz);
 	dfldsiz = DFLDSIZ;
 	TUNABLE_ULONG_FETCH("kern.dfldsiz", &dfldsiz);
 	maxdsiz = MAXDSIZ;
 	TUNABLE_ULONG_FETCH("kern.maxdsiz", &maxdsiz);
 	dflssiz = DFLSSIZ;
 	TUNABLE_ULONG_FETCH("kern.dflssiz", &dflssiz);
 	maxssiz = MAXSSIZ;
 	TUNABLE_ULONG_FETCH("kern.maxssiz", &maxssiz);
 	sgrowsiz = SGROWSIZ;
 	TUNABLE_ULONG_FETCH("kern.sgrowsiz", &sgrowsiz);
 
 	/*
 	 * Let the administrator set {NGROUPS_MAX}, but disallow values
 	 * less than NGROUPS_MAX which would violate POSIX.1-2008 or
 	 * greater than INT_MAX-1 which would result in overflow.
 	 */
 	ngroups_max = NGROUPS_MAX;
 	TUNABLE_INT_FETCH("kern.ngroups", &ngroups_max);
 	if (ngroups_max < NGROUPS_MAX)
 		ngroups_max = NGROUPS_MAX;
 
 	/*
 	 * Only allow to lower the maximal pid.
 	 * Prevent setting up a non-bootable system if pid_max is too low.
 	 */
 	TUNABLE_INT_FETCH("kern.pid_max", &pid_max);
 	if (pid_max > PID_MAX)
 		pid_max = PID_MAX;
 	else if (pid_max < 300)
 		pid_max = 300;
 
 	TUNABLE_INT_FETCH("vfs.unmapped_buf_allowed", &unmapped_buf_allowed);
 }
 
 /*
  * Boot time overrides that are scaled against main memory
  */
 void
 init_param2(long physpages)
 {
 
 	/* Base parameters */
 	maxusers = MAXUSERS;
 	TUNABLE_INT_FETCH("kern.maxusers", &maxusers);
 	if (maxusers == 0) {
 		maxusers = physpages / (2 * 1024 * 1024 / PAGE_SIZE);
 		if (maxusers < 32)
 			maxusers = 32;
 #ifdef VM_MAX_AUTOTUNE_MAXUSERS
                 if (maxusers > VM_MAX_AUTOTUNE_MAXUSERS)
                         maxusers = VM_MAX_AUTOTUNE_MAXUSERS;
 #endif
                 /*
                  * Scales down the function in which maxusers grows once
                  * we hit 384.
                  */
                 if (maxusers > 384)
                         maxusers = 384 + ((maxusers - 384) / 8);
         }
 
 	/*
 	 * The following can be overridden after boot via sysctl.  Note:
 	 * unless overriden, these macros are ultimately based on maxusers.
 	 * Limit maxproc so that kmap entries cannot be exhausted by
 	 * processes.
 	 */
 	maxproc = NPROC;
 	TUNABLE_INT_FETCH("kern.maxproc", &maxproc);
 	if (maxproc > (physpages / 12))
 		maxproc = physpages / 12;
 	maxprocperuid = (maxproc * 9) / 10;
 
 	/*
 	 * The default limit for maxfiles is 1/12 of the number of
 	 * physical page but not less than 16 times maxusers.
 	 * At most it can be 1/6 the number of physical pages.
 	 */
 	maxfiles = imax(MAXFILES, physpages / 8);
 	TUNABLE_INT_FETCH("kern.maxfiles", &maxfiles);
 	if (maxfiles > (physpages / 4))
 		maxfiles = physpages / 4;
 	maxfilesperproc = (maxfiles / 10) * 9;
 	
 	/*
 	 * Cannot be changed after boot.
 	 */
 	nbuf = NBUF;
 	TUNABLE_INT_FETCH("kern.nbuf", &nbuf);
 	TUNABLE_INT_FETCH("kern.bio_transient_maxcnt", &bio_transient_maxcnt);
 
 	/*
 	 * The default for maxpipekva is min(1/64 of the kernel address space,
 	 * max(1/64 of main memory, 512KB)).  See sys_pipe.c for more details.
 	 */
 	maxpipekva = (physpages / 64) * PAGE_SIZE;
 	TUNABLE_LONG_FETCH("kern.ipc.maxpipekva", &maxpipekva);
 	if (maxpipekva < 512 * 1024)
 		maxpipekva = 512 * 1024;
 	if (maxpipekva > (VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS) / 64)
 		maxpipekva = (VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS) /
 		    64;
 }
 
 /*
  * Sysctl stringifying handler for kern.vm_guest.
  */
 static int
 sysctl_kern_vm_guest(SYSCTL_HANDLER_ARGS)
 {
 	return (SYSCTL_OUT_STR(req, vm_guest_sysctl_names[vm_guest]));
 }
Index: head/sys/kern/subr_trap.c
===================================================================
--- head/sys/kern/subr_trap.c	(revision 282273)
+++ head/sys/kern/subr_trap.c	(revision 282274)
@@ -1,308 +1,299 @@
 /*-
  * Copyright (C) 1994, David Greenman
  * Copyright (c) 1990, 1993
  *	The Regents of the University of California.  All rights reserved.
  * Copyright (c) 2007 The FreeBSD Foundation
  *
  * This code is derived from software contributed to Berkeley by
  * the University of Utah, and William Jolitz.
  *
  * Portions of this software were developed by A. Joseph Koshy under
  * sponsorship from the FreeBSD Foundation and Google, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	from: @(#)trap.c	7.4 (Berkeley) 5/13/91
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_hwpmc_hooks.h"
 #include "opt_ktrace.h"
 #include "opt_sched.h"
 
 #include <sys/param.h>
 #include <sys/bus.h>
 #include <sys/capsicum.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/pmckern.h>
 #include <sys/proc.h>
 #include <sys/ktr.h>
 #include <sys/pioctl.h>
 #include <sys/ptrace.h>
 #include <sys/racct.h>
 #include <sys/resourcevar.h>
 #include <sys/sched.h>
 #include <sys/signalvar.h>
 #include <sys/syscall.h>
 #include <sys/syscallsubr.h>
 #include <sys/sysent.h>
 #include <sys/systm.h>
 #include <sys/vmmeter.h>
 #ifdef KTRACE
 #include <sys/uio.h>
 #include <sys/ktrace.h>
 #endif
 #include <security/audit/audit.h>
 
 #include <machine/cpu.h>
 
 #ifdef VIMAGE
 #include <net/vnet.h>
 #endif
 
-#ifdef XEN
-#include <vm/vm.h>
-#include <vm/vm_param.h>
-#include <vm/pmap.h>
-#endif
-
 #ifdef	HWPMC_HOOKS
 #include <sys/pmckern.h>
 #endif
 
 #include <security/mac/mac_framework.h>
 
 /*
  * Define the code needed before returning to user mode, for trap and
  * syscall.
  */
 void
 userret(struct thread *td, struct trapframe *frame)
 {
 	struct proc *p = td->td_proc;
 
 	CTR3(KTR_SYSC, "userret: thread %p (pid %d, %s)", td, p->p_pid,
             td->td_name);
 	KASSERT((p->p_flag & P_WEXIT) == 0,
 	    ("Exiting process returns to usermode"));
 #if 0
 #ifdef DIAGNOSTIC
 	/* Check that we called signotify() enough. */
 	PROC_LOCK(p);
 	thread_lock(td);
 	if (SIGPENDING(td) && ((td->td_flags & TDF_NEEDSIGCHK) == 0 ||
 	    (td->td_flags & TDF_ASTPENDING) == 0))
 		printf("failed to set signal flags properly for ast()\n");
 	thread_unlock(td);
 	PROC_UNLOCK(p);
 #endif
 #endif
 #ifdef KTRACE
 	KTRUSERRET(td);
 #endif
 	/*
 	 * If this thread tickled GEOM, we need to wait for the giggling to
 	 * stop before we return to userland
 	 */
 	if (td->td_pflags & TDP_GEOM)
 		g_waitidle();
 
 	/*
 	 * Charge system time if profiling.
 	 */
 	if (p->p_flag & P_PROFIL)
 		addupc_task(td, TRAPF_PC(frame), td->td_pticks * psratio);
 	/*
 	 * Let the scheduler adjust our priority etc.
 	 */
 	sched_userret(td);
-#ifdef XEN
-	PT_UPDATES_FLUSH();
-#endif
 
 	/*
 	 * Check for misbehavior.
 	 *
 	 * In case there is a callchain tracing ongoing because of
 	 * hwpmc(4), skip the scheduler pinning check.
 	 * hwpmc(4) subsystem, infact, will collect callchain informations
 	 * at ast() checkpoint, which is past userret().
 	 */
 	WITNESS_WARN(WARN_PANIC, NULL, "userret: returning");
 	KASSERT(td->td_critnest == 0,
 	    ("userret: Returning in a critical section"));
 	KASSERT(td->td_locks == 0,
 	    ("userret: Returning with %d locks held", td->td_locks));
 	KASSERT(td->td_rw_rlocks == 0,
 	    ("userret: Returning with %d rwlocks held in read mode",
 	    td->td_rw_rlocks));
 	KASSERT((td->td_pflags & TDP_NOFAULTING) == 0,
 	    ("userret: Returning with pagefaults disabled"));
 	KASSERT(td->td_no_sleeping == 0,
 	    ("userret: Returning with sleep disabled"));
 	KASSERT(td->td_pinned == 0 || (td->td_pflags & TDP_CALLCHAIN) != 0,
 	    ("userret: Returning with with pinned thread"));
 	KASSERT(td->td_vp_reserv == 0,
 	    ("userret: Returning while holding vnode reservation"));
 	KASSERT((td->td_flags & TDF_SBDRY) == 0,
 	    ("userret: Returning with stop signals deferred"));
 #ifdef VIMAGE
 	/* Unfortunately td_vnet_lpush needs VNET_DEBUG. */
 	VNET_ASSERT(curvnet == NULL,
 	    ("%s: Returning on td %p (pid %d, %s) with vnet %p set in %s",
 	    __func__, td, p->p_pid, td->td_name, curvnet,
 	    (td->td_vnet_lpush != NULL) ? td->td_vnet_lpush : "N/A"));
 #endif
 #ifdef RACCT
 	if (racct_enable) {
 		PROC_LOCK(p);
 		while (p->p_throttled == 1)
 			msleep(p->p_racct, &p->p_mtx, 0, "racct", 0);
 		PROC_UNLOCK(p);
 	}
 #endif
 }
 
 /*
  * Process an asynchronous software trap.
  * This is relatively easy.
  * This function will return with preemption disabled.
  */
 void
 ast(struct trapframe *framep)
 {
 	struct thread *td;
 	struct proc *p;
 	int flags;
 	int sig;
 
 	td = curthread;
 	p = td->td_proc;
 
 	CTR3(KTR_SYSC, "ast: thread %p (pid %d, %s)", td, p->p_pid,
             p->p_comm);
 	KASSERT(TRAPF_USERMODE(framep), ("ast in kernel mode"));
 	WITNESS_WARN(WARN_PANIC, NULL, "Returning to user mode");
 	mtx_assert(&Giant, MA_NOTOWNED);
 	THREAD_LOCK_ASSERT(td, MA_NOTOWNED);
 	td->td_frame = framep;
 	td->td_pticks = 0;
 
 	/*
 	 * This updates the td_flag's for the checks below in one
 	 * "atomic" operation with turning off the astpending flag.
 	 * If another AST is triggered while we are handling the
 	 * AST's saved in flags, the astpending flag will be set and
 	 * ast() will be called again.
 	 */
 	thread_lock(td);
 	flags = td->td_flags;
 	td->td_flags &= ~(TDF_ASTPENDING | TDF_NEEDSIGCHK | TDF_NEEDSUSPCHK |
 	    TDF_NEEDRESCHED | TDF_ALRMPEND | TDF_PROFPEND | TDF_MACPEND);
 	thread_unlock(td);
 	PCPU_INC(cnt.v_trap);
 
 	if (td->td_ucred != p->p_ucred) 
 		cred_update_thread(td);
 	if (td->td_pflags & TDP_OWEUPC && p->p_flag & P_PROFIL) {
 		addupc_task(td, td->td_profil_addr, td->td_profil_ticks);
 		td->td_profil_ticks = 0;
 		td->td_pflags &= ~TDP_OWEUPC;
 	}
 #ifdef HWPMC_HOOKS
 	/* Handle Software PMC callchain capture. */
 	if (PMC_IS_PENDING_CALLCHAIN(td))
 		PMC_CALL_HOOK_UNLOCKED(td, PMC_FN_USER_CALLCHAIN_SOFT, (void *) framep);
 #endif
 	if (flags & TDF_ALRMPEND) {
 		PROC_LOCK(p);
 		kern_psignal(p, SIGVTALRM);
 		PROC_UNLOCK(p);
 	}
 	if (flags & TDF_PROFPEND) {
 		PROC_LOCK(p);
 		kern_psignal(p, SIGPROF);
 		PROC_UNLOCK(p);
 	}
 #ifdef MAC
 	if (flags & TDF_MACPEND)
 		mac_thread_userret(td);
 #endif
 	if (flags & TDF_NEEDRESCHED) {
 #ifdef KTRACE
 		if (KTRPOINT(td, KTR_CSW))
 			ktrcsw(1, 1, __func__);
 #endif
 		thread_lock(td);
 		sched_prio(td, td->td_user_pri);
 		mi_switch(SW_INVOL | SWT_NEEDRESCHED, NULL);
 		thread_unlock(td);
 #ifdef KTRACE
 		if (KTRPOINT(td, KTR_CSW))
 			ktrcsw(0, 1, __func__);
 #endif
 	}
 
 	/*
 	 * Check for signals. Unlocked reads of p_pendingcnt or
 	 * p_siglist might cause process-directed signal to be handled
 	 * later.
 	 */
 	if (flags & TDF_NEEDSIGCHK || p->p_pendingcnt > 0 ||
 	    !SIGISEMPTY(p->p_siglist)) {
 		PROC_LOCK(p);
 		mtx_lock(&p->p_sigacts->ps_mtx);
 		while ((sig = cursig(td)) != 0)
 			postsig(sig);
 		mtx_unlock(&p->p_sigacts->ps_mtx);
 		PROC_UNLOCK(p);
 	}
 	/*
 	 * We need to check to see if we have to exit or wait due to a
 	 * single threading requirement or some other STOP condition.
 	 */
 	if (flags & TDF_NEEDSUSPCHK) {
 		PROC_LOCK(p);
 		thread_suspend_check(0);
 		PROC_UNLOCK(p);
 	}
 
 	if (td->td_pflags & TDP_OLDMASK) {
 		td->td_pflags &= ~TDP_OLDMASK;
 		kern_sigprocmask(td, SIG_SETMASK, &td->td_oldsigmask, NULL, 0);
 	}
 
 	userret(td, framep);
 }
 
 const char *
 syscallname(struct proc *p, u_int code)
 {
 	static const char unknown[] = "unknown";
 	struct sysentvec *sv;
 
 	sv = p->p_sysent;
 	if (sv->sv_syscallnames == NULL || code >= sv->sv_size)
 		return (unknown);
 	return (sv->sv_syscallnames[code]);
 }
Index: head/sys/vm/vm_page.c
===================================================================
--- head/sys/vm/vm_page.c	(revision 282273)
+++ head/sys/vm/vm_page.c	(revision 282274)
@@ -1,3373 +1,3365 @@
 /*-
  * Copyright (c) 1991 Regents of the University of California.
  * All rights reserved.
  * Copyright (c) 1998 Matthew Dillon.  All Rights Reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * The Mach Operating System project at Carnegie-Mellon University.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	from: @(#)vm_page.c	7.4 (Berkeley) 5/7/91
  */
 
 /*-
  * Copyright (c) 1987, 1990 Carnegie-Mellon University.
  * All rights reserved.
  *
  * Authors: Avadis Tevanian, Jr., Michael Wayne Young
  *
  * Permission to use, copy, modify and distribute this software and
  * its documentation is hereby granted, provided that both the copyright
  * notice and this permission notice appear in all copies of the
  * software, derivative works or modified versions, and any portions
  * thereof, and that both notices appear in supporting documentation.
  *
  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
  *
  * Carnegie Mellon requests users of this software to return to
  *
  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
  *  School of Computer Science
  *  Carnegie Mellon University
  *  Pittsburgh PA 15213-3890
  *
  * any improvements or extensions that they make and grant Carnegie the
  * rights to redistribute these changes.
  */
 
 /*
  *			GENERAL RULES ON VM_PAGE MANIPULATION
  *
  *	- A page queue lock is required when adding or removing a page from a
  *	  page queue regardless of other locks or the busy state of a page.
  *
  *		* In general, no thread besides the page daemon can acquire or
  *		  hold more than one page queue lock at a time.
  *
  *		* The page daemon can acquire and hold any pair of page queue
  *		  locks in any order.
  *
  *	- The object lock is required when inserting or removing
  *	  pages from an object (vm_page_insert() or vm_page_remove()).
  *
  */
 
 /*
  *	Resident memory management module.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_vm.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/lock.h>
 #include <sys/kernel.h>
 #include <sys/limits.h>
 #include <sys/linker.h>
 #include <sys/malloc.h>
 #include <sys/mman.h>
 #include <sys/msgbuf.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/rwlock.h>
 #include <sys/sbuf.h>
 #include <sys/sysctl.h>
 #include <sys/vmmeter.h>
 #include <sys/vnode.h>
 
 #include <vm/vm.h>
 #include <vm/pmap.h>
 #include <vm/vm_param.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_object.h>
 #include <vm/vm_page.h>
 #include <vm/vm_pageout.h>
 #include <vm/vm_pager.h>
 #include <vm/vm_phys.h>
 #include <vm/vm_radix.h>
 #include <vm/vm_reserv.h>
 #include <vm/vm_extern.h>
 #include <vm/uma.h>
 #include <vm/uma_int.h>
 
 #include <machine/md_var.h>
 
 /*
  *	Associated with page of user-allocatable memory is a
  *	page structure.
  */
 
 struct vm_domain vm_dom[MAXMEMDOM];
 struct mtx_padalign vm_page_queue_free_mtx;
 
 struct mtx_padalign pa_lock[PA_LOCK_COUNT];
 
 vm_page_t vm_page_array;
 long vm_page_array_size;
 long first_page;
 int vm_page_zero_count;
 
 static int boot_pages = UMA_BOOT_PAGES;
 SYSCTL_INT(_vm, OID_AUTO, boot_pages, CTLFLAG_RDTUN | CTLFLAG_NOFETCH,
     &boot_pages, 0,
     "number of pages allocated for bootstrapping the VM system");
 
 static int pa_tryrelock_restart;
 SYSCTL_INT(_vm, OID_AUTO, tryrelock_restart, CTLFLAG_RD,
     &pa_tryrelock_restart, 0, "Number of tryrelock restarts");
 
 static TAILQ_HEAD(, vm_page) blacklist_head;
 static int sysctl_vm_page_blacklist(SYSCTL_HANDLER_ARGS);
 SYSCTL_PROC(_vm, OID_AUTO, page_blacklist, CTLTYPE_STRING | CTLFLAG_RD |
     CTLFLAG_MPSAFE, NULL, 0, sysctl_vm_page_blacklist, "A", "Blacklist pages");
 
 
 static uma_zone_t fakepg_zone;
 
 static struct vnode *vm_page_alloc_init(vm_page_t m);
 static void vm_page_cache_turn_free(vm_page_t m);
 static void vm_page_clear_dirty_mask(vm_page_t m, vm_page_bits_t pagebits);
 static void vm_page_enqueue(uint8_t queue, vm_page_t m);
 static void vm_page_init_fakepg(void *dummy);
 static int vm_page_insert_after(vm_page_t m, vm_object_t object,
     vm_pindex_t pindex, vm_page_t mpred);
 static void vm_page_insert_radixdone(vm_page_t m, vm_object_t object,
     vm_page_t mpred);
 
 SYSINIT(vm_page, SI_SUB_VM, SI_ORDER_SECOND, vm_page_init_fakepg, NULL);
 
 static void
 vm_page_init_fakepg(void *dummy)
 {
 
 	fakepg_zone = uma_zcreate("fakepg", sizeof(struct vm_page), NULL, NULL,
 	    NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE | UMA_ZONE_VM);
 }
 
 /* Make sure that u_long is at least 64 bits when PAGE_SIZE is 32K. */
 #if PAGE_SIZE == 32768
 #ifdef CTASSERT
 CTASSERT(sizeof(u_long) >= 8);
 #endif
 #endif
 
 /*
  * Try to acquire a physical address lock while a pmap is locked.  If we
  * fail to trylock we unlock and lock the pmap directly and cache the
  * locked pa in *locked.  The caller should then restart their loop in case
  * the virtual to physical mapping has changed.
  */
 int
 vm_page_pa_tryrelock(pmap_t pmap, vm_paddr_t pa, vm_paddr_t *locked)
 {
 	vm_paddr_t lockpa;
 
 	lockpa = *locked;
 	*locked = pa;
 	if (lockpa) {
 		PA_LOCK_ASSERT(lockpa, MA_OWNED);
 		if (PA_LOCKPTR(pa) == PA_LOCKPTR(lockpa))
 			return (0);
 		PA_UNLOCK(lockpa);
 	}
 	if (PA_TRYLOCK(pa))
 		return (0);
 	PMAP_UNLOCK(pmap);
 	atomic_add_int(&pa_tryrelock_restart, 1);
 	PA_LOCK(pa);
 	PMAP_LOCK(pmap);
 	return (EAGAIN);
 }
 
 /*
  *	vm_set_page_size:
  *
  *	Sets the page size, perhaps based upon the memory
  *	size.  Must be called before any use of page-size
  *	dependent functions.
  */
 void
 vm_set_page_size(void)
 {
 	if (vm_cnt.v_page_size == 0)
 		vm_cnt.v_page_size = PAGE_SIZE;
 	if (((vm_cnt.v_page_size - 1) & vm_cnt.v_page_size) != 0)
 		panic("vm_set_page_size: page size not a power of two");
 }
 
 /*
  *	vm_page_blacklist_next:
  *
  *	Find the next entry in the provided string of blacklist
  *	addresses.  Entries are separated by space, comma, or newline.
  *	If an invalid integer is encountered then the rest of the
  *	string is skipped.  Updates the list pointer to the next
  *	character, or NULL if the string is exhausted or invalid.
  */
 static vm_paddr_t
 vm_page_blacklist_next(char **list, char *end)
 {
 	vm_paddr_t bad;
 	char *cp, *pos;
 
 	if (list == NULL || *list == NULL)
 		return (0);
 	if (**list =='\0') {
 		*list = NULL;
 		return (0);
 	}
 
 	/*
 	 * If there's no end pointer then the buffer is coming from
 	 * the kenv and we know it's null-terminated.
 	 */
 	if (end == NULL)
 		end = *list + strlen(*list);
 
 	/* Ensure that strtoq() won't walk off the end */
 	if (*end != '\0') {
 		if (*end == '\n' || *end == ' ' || *end  == ',')
 			*end = '\0';
 		else {
 			printf("Blacklist not terminated, skipping\n");
 			*list = NULL;
 			return (0);
 		}
 	}
 
 	for (pos = *list; *pos != '\0'; pos = cp) {
 		bad = strtoq(pos, &cp, 0);
 		if (*cp == '\0' || *cp == ' ' || *cp == ',' || *cp == '\n') {
 			if (bad == 0) {
 				if (++cp < end)
 					continue;
 				else
 					break;
 			}
 		} else
 			break;
 		if (*cp == '\0' || ++cp >= end)
 			*list = NULL;
 		else
 			*list = cp;
 		return (trunc_page(bad));
 	}
 	printf("Garbage in RAM blacklist, skipping\n");
 	*list = NULL;
 	return (0);
 }
 
 /*
  *	vm_page_blacklist_check:
  *
  *	Iterate through the provided string of blacklist addresses, pulling
  *	each entry out of the physical allocator free list and putting it
  *	onto a list for reporting via the vm.page_blacklist sysctl.
  */
 static void
 vm_page_blacklist_check(char *list, char *end)
 {
 	vm_paddr_t pa;
 	vm_page_t m;
 	char *next;
 	int ret;
 
 	next = list;
 	while (next != NULL) {
 		if ((pa = vm_page_blacklist_next(&next, end)) == 0)
 			continue;
 		m = vm_phys_paddr_to_vm_page(pa);
 		if (m == NULL)
 			continue;
 		mtx_lock(&vm_page_queue_free_mtx);
 		ret = vm_phys_unfree_page(m);
 		mtx_unlock(&vm_page_queue_free_mtx);
 		if (ret == TRUE) {
 			TAILQ_INSERT_TAIL(&blacklist_head, m, listq);
 			if (bootverbose)
 				printf("Skipping page with pa 0x%jx\n",
 				    (uintmax_t)pa);
 		}
 	}
 }
 
 /*
  *	vm_page_blacklist_load:
  *
  *	Search for a special module named "ram_blacklist".  It'll be a
  *	plain text file provided by the user via the loader directive
  *	of the same name.
  */
 static void
 vm_page_blacklist_load(char **list, char **end)
 {
 	void *mod;
 	u_char *ptr;
 	u_int len;
 
 	mod = NULL;
 	ptr = NULL;
 
 	mod = preload_search_by_type("ram_blacklist");
 	if (mod != NULL) {
 		ptr = preload_fetch_addr(mod);
 		len = preload_fetch_size(mod);
         }
 	*list = ptr;
 	if (ptr != NULL)
 		*end = ptr + len;
 	else
 		*end = NULL;
 	return;
 }
 
 static int
 sysctl_vm_page_blacklist(SYSCTL_HANDLER_ARGS)
 {
 	vm_page_t m;
 	struct sbuf sbuf;
 	int error, first;
 
 	first = 1;
 	error = sysctl_wire_old_buffer(req, 0);
 	if (error != 0)
 		return (error);
 	sbuf_new_for_sysctl(&sbuf, NULL, 128, req);
 	TAILQ_FOREACH(m, &blacklist_head, listq) {
 		sbuf_printf(&sbuf, "%s%#jx", first ? "" : ",",
 		    (uintmax_t)m->phys_addr);
 		first = 0;
 	}
 	error = sbuf_finish(&sbuf);
 	sbuf_delete(&sbuf);
 	return (error);
 }
 
 static void
 vm_page_domain_init(struct vm_domain *vmd)
 {
 	struct vm_pagequeue *pq;
 	int i;
 
 	*__DECONST(char **, &vmd->vmd_pagequeues[PQ_INACTIVE].pq_name) =
 	    "vm inactive pagequeue";
 	*__DECONST(int **, &vmd->vmd_pagequeues[PQ_INACTIVE].pq_vcnt) =
 	    &vm_cnt.v_inactive_count;
 	*__DECONST(char **, &vmd->vmd_pagequeues[PQ_ACTIVE].pq_name) =
 	    "vm active pagequeue";
 	*__DECONST(int **, &vmd->vmd_pagequeues[PQ_ACTIVE].pq_vcnt) =
 	    &vm_cnt.v_active_count;
 	vmd->vmd_page_count = 0;
 	vmd->vmd_free_count = 0;
 	vmd->vmd_segs = 0;
 	vmd->vmd_oom = FALSE;
 	vmd->vmd_pass = 0;
 	for (i = 0; i < PQ_COUNT; i++) {
 		pq = &vmd->vmd_pagequeues[i];
 		TAILQ_INIT(&pq->pq_pl);
 		mtx_init(&pq->pq_mutex, pq->pq_name, "vm pagequeue",
 		    MTX_DEF | MTX_DUPOK);
 	}
 }
 
 /*
  *	vm_page_startup:
  *
  *	Initializes the resident memory module.
  *
  *	Allocates memory for the page cells, and
  *	for the object/offset-to-page hash table headers.
  *	Each page cell is initialized and placed on the free list.
  */
 vm_offset_t
 vm_page_startup(vm_offset_t vaddr)
 {
 	vm_offset_t mapped;
 	vm_paddr_t page_range;
 	vm_paddr_t new_end;
 	int i;
 	vm_paddr_t pa;
 	vm_paddr_t last_pa;
 	char *list, *listend;
 	vm_paddr_t end;
 	vm_paddr_t biggestsize;
 	vm_paddr_t low_water, high_water;
 	int biggestone;
 
 	biggestsize = 0;
 	biggestone = 0;
 	vaddr = round_page(vaddr);
 
 	for (i = 0; phys_avail[i + 1]; i += 2) {
 		phys_avail[i] = round_page(phys_avail[i]);
 		phys_avail[i + 1] = trunc_page(phys_avail[i + 1]);
 	}
 
-#ifdef XEN
-	/*
-	 * There is no obvious reason why i386 PV Xen needs vm_page structs
-	 * created for these pseudo-physical addresses.  XXX
-	 */
-	vm_phys_add_seg(0, phys_avail[0]);
-#endif
-
 	low_water = phys_avail[0];
 	high_water = phys_avail[1];
 
 	for (i = 0; i < vm_phys_nsegs; i++) {
 		if (vm_phys_segs[i].start < low_water)
 			low_water = vm_phys_segs[i].start;
 		if (vm_phys_segs[i].end > high_water)
 			high_water = vm_phys_segs[i].end;
 	}
 	for (i = 0; phys_avail[i + 1]; i += 2) {
 		vm_paddr_t size = phys_avail[i + 1] - phys_avail[i];
 
 		if (size > biggestsize) {
 			biggestone = i;
 			biggestsize = size;
 		}
 		if (phys_avail[i] < low_water)
 			low_water = phys_avail[i];
 		if (phys_avail[i + 1] > high_water)
 			high_water = phys_avail[i + 1];
 	}
 
 	end = phys_avail[biggestone+1];
 
 	/*
 	 * Initialize the page and queue locks.
 	 */
 	mtx_init(&vm_page_queue_free_mtx, "vm page free queue", NULL, MTX_DEF);
 	for (i = 0; i < PA_LOCK_COUNT; i++)
 		mtx_init(&pa_lock[i], "vm page", NULL, MTX_DEF);
 	for (i = 0; i < vm_ndomains; i++)
 		vm_page_domain_init(&vm_dom[i]);
 
 	/*
 	 * Allocate memory for use when boot strapping the kernel memory
 	 * allocator.
 	 *
 	 * CTFLAG_RDTUN doesn't work during the early boot process, so we must
 	 * manually fetch the value.
 	 */
 	TUNABLE_INT_FETCH("vm.boot_pages", &boot_pages);
 	new_end = end - (boot_pages * UMA_SLAB_SIZE);
 	new_end = trunc_page(new_end);
 	mapped = pmap_map(&vaddr, new_end, end,
 	    VM_PROT_READ | VM_PROT_WRITE);
 	bzero((void *)mapped, end - new_end);
 	uma_startup((void *)mapped, boot_pages);
 
 #if defined(__amd64__) || defined(__i386__) || defined(__arm__) || \
     defined(__mips__)
 	/*
 	 * Allocate a bitmap to indicate that a random physical page
 	 * needs to be included in a minidump.
 	 *
 	 * The amd64 port needs this to indicate which direct map pages
 	 * need to be dumped, via calls to dump_add_page()/dump_drop_page().
 	 *
 	 * However, i386 still needs this workspace internally within the
 	 * minidump code.  In theory, they are not needed on i386, but are
 	 * included should the sf_buf code decide to use them.
 	 */
 	last_pa = 0;
 	for (i = 0; dump_avail[i + 1] != 0; i += 2)
 		if (dump_avail[i + 1] > last_pa)
 			last_pa = dump_avail[i + 1];
 	page_range = last_pa / PAGE_SIZE;
 	vm_page_dump_size = round_page(roundup2(page_range, NBBY) / NBBY);
 	new_end -= vm_page_dump_size;
 	vm_page_dump = (void *)(uintptr_t)pmap_map(&vaddr, new_end,
 	    new_end + vm_page_dump_size, VM_PROT_READ | VM_PROT_WRITE);
 	bzero((void *)vm_page_dump, vm_page_dump_size);
 #endif
 #ifdef __amd64__
 	/*
 	 * Request that the physical pages underlying the message buffer be
 	 * included in a crash dump.  Since the message buffer is accessed
 	 * through the direct map, they are not automatically included.
 	 */
 	pa = DMAP_TO_PHYS((vm_offset_t)msgbufp->msg_ptr);
 	last_pa = pa + round_page(msgbufsize);
 	while (pa < last_pa) {
 		dump_add_page(pa);
 		pa += PAGE_SIZE;
 	}
 #endif
 	/*
 	 * Compute the number of pages of memory that will be available for
 	 * use (taking into account the overhead of a page structure per
 	 * page).
 	 */
 	first_page = low_water / PAGE_SIZE;
 #ifdef VM_PHYSSEG_SPARSE
 	page_range = 0;
 	for (i = 0; i < vm_phys_nsegs; i++) {
 		page_range += atop(vm_phys_segs[i].end -
 		    vm_phys_segs[i].start);
 	}
 	for (i = 0; phys_avail[i + 1] != 0; i += 2)
 		page_range += atop(phys_avail[i + 1] - phys_avail[i]);
 #elif defined(VM_PHYSSEG_DENSE)
 	page_range = high_water / PAGE_SIZE - first_page;
 #else
 #error "Either VM_PHYSSEG_DENSE or VM_PHYSSEG_SPARSE must be defined."
 #endif
 	end = new_end;
 
 	/*
 	 * Reserve an unmapped guard page to trap access to vm_page_array[-1].
 	 */
 	vaddr += PAGE_SIZE;
 
 	/*
 	 * Initialize the mem entry structures now, and put them in the free
 	 * queue.
 	 */
 	new_end = trunc_page(end - page_range * sizeof(struct vm_page));
 	mapped = pmap_map(&vaddr, new_end, end,
 	    VM_PROT_READ | VM_PROT_WRITE);
 	vm_page_array = (vm_page_t) mapped;
 #if VM_NRESERVLEVEL > 0
 	/*
 	 * Allocate memory for the reservation management system's data
 	 * structures.
 	 */
 	new_end = vm_reserv_startup(&vaddr, new_end, high_water);
 #endif
 #if defined(__amd64__) || defined(__mips__)
 	/*
 	 * pmap_map on amd64 and mips can come out of the direct-map, not kvm
 	 * like i386, so the pages must be tracked for a crashdump to include
 	 * this data.  This includes the vm_page_array and the early UMA
 	 * bootstrap pages.
 	 */
 	for (pa = new_end; pa < phys_avail[biggestone + 1]; pa += PAGE_SIZE)
 		dump_add_page(pa);
 #endif
 	phys_avail[biggestone + 1] = new_end;
 
 	/*
 	 * Add physical memory segments corresponding to the available
 	 * physical pages.
 	 */
 	for (i = 0; phys_avail[i + 1] != 0; i += 2)
 		vm_phys_add_seg(phys_avail[i], phys_avail[i + 1]);
 
 	/*
 	 * Clear all of the page structures
 	 */
 	bzero((caddr_t) vm_page_array, page_range * sizeof(struct vm_page));
 	for (i = 0; i < page_range; i++)
 		vm_page_array[i].order = VM_NFREEORDER;
 	vm_page_array_size = page_range;
 
 	/*
 	 * Initialize the physical memory allocator.
 	 */
 	vm_phys_init();
 
 	/*
 	 * Add every available physical page that is not blacklisted to
 	 * the free lists.
 	 */
 	vm_cnt.v_page_count = 0;
 	vm_cnt.v_free_count = 0;
 	for (i = 0; phys_avail[i + 1] != 0; i += 2) {
 		pa = phys_avail[i];
 		last_pa = phys_avail[i + 1];
 		while (pa < last_pa) {
 			vm_phys_add_page(pa);
 			pa += PAGE_SIZE;
 		}
 	}
 
 	TAILQ_INIT(&blacklist_head);
 	vm_page_blacklist_load(&list, &listend);
 	vm_page_blacklist_check(list, listend);
 
 	list = kern_getenv("vm.blacklist");
 	vm_page_blacklist_check(list, NULL);
 
 	freeenv(list);
 #if VM_NRESERVLEVEL > 0
 	/*
 	 * Initialize the reservation management system.
 	 */
 	vm_reserv_init();
 #endif
 	return (vaddr);
 }
 
 void
 vm_page_reference(vm_page_t m)
 {
 
 	vm_page_aflag_set(m, PGA_REFERENCED);
 }
 
 /*
  *	vm_page_busy_downgrade:
  *
  *	Downgrade an exclusive busy page into a single shared busy page.
  */
 void
 vm_page_busy_downgrade(vm_page_t m)
 {
 	u_int x;
 
 	vm_page_assert_xbusied(m);
 
 	for (;;) {
 		x = m->busy_lock;
 		x &= VPB_BIT_WAITERS;
 		if (atomic_cmpset_rel_int(&m->busy_lock,
 		    VPB_SINGLE_EXCLUSIVER | x, VPB_SHARERS_WORD(1) | x))
 			break;
 	}
 }
 
 /*
  *	vm_page_sbusied:
  *
  *	Return a positive value if the page is shared busied, 0 otherwise.
  */
 int
 vm_page_sbusied(vm_page_t m)
 {
 	u_int x;
 
 	x = m->busy_lock;
 	return ((x & VPB_BIT_SHARED) != 0 && x != VPB_UNBUSIED);
 }
 
 /*
  *	vm_page_sunbusy:
  *
  *	Shared unbusy a page.
  */
 void
 vm_page_sunbusy(vm_page_t m)
 {
 	u_int x;
 
 	vm_page_assert_sbusied(m);
 
 	for (;;) {
 		x = m->busy_lock;
 		if (VPB_SHARERS(x) > 1) {
 			if (atomic_cmpset_int(&m->busy_lock, x,
 			    x - VPB_ONE_SHARER))
 				break;
 			continue;
 		}
 		if ((x & VPB_BIT_WAITERS) == 0) {
 			KASSERT(x == VPB_SHARERS_WORD(1),
 			    ("vm_page_sunbusy: invalid lock state"));
 			if (atomic_cmpset_int(&m->busy_lock,
 			    VPB_SHARERS_WORD(1), VPB_UNBUSIED))
 				break;
 			continue;
 		}
 		KASSERT(x == (VPB_SHARERS_WORD(1) | VPB_BIT_WAITERS),
 		    ("vm_page_sunbusy: invalid lock state for waiters"));
 
 		vm_page_lock(m);
 		if (!atomic_cmpset_int(&m->busy_lock, x, VPB_UNBUSIED)) {
 			vm_page_unlock(m);
 			continue;
 		}
 		wakeup(m);
 		vm_page_unlock(m);
 		break;
 	}
 }
 
 /*
  *	vm_page_busy_sleep:
  *
  *	Sleep and release the page lock, using the page pointer as wchan.
  *	This is used to implement the hard-path of busying mechanism.
  *
  *	The given page must be locked.
  */
 void
 vm_page_busy_sleep(vm_page_t m, const char *wmesg)
 {
 	u_int x;
 
 	vm_page_lock_assert(m, MA_OWNED);
 
 	x = m->busy_lock;
 	if (x == VPB_UNBUSIED) {
 		vm_page_unlock(m);
 		return;
 	}
 	if ((x & VPB_BIT_WAITERS) == 0 &&
 	    !atomic_cmpset_int(&m->busy_lock, x, x | VPB_BIT_WAITERS)) {
 		vm_page_unlock(m);
 		return;
 	}
 	msleep(m, vm_page_lockptr(m), PVM | PDROP, wmesg, 0);
 }
 
 /*
  *	vm_page_trysbusy:
  *
  *	Try to shared busy a page.
  *	If the operation succeeds 1 is returned otherwise 0.
  *	The operation never sleeps.
  */
 int
 vm_page_trysbusy(vm_page_t m)
 {
 	u_int x;
 
 	for (;;) {
 		x = m->busy_lock;
 		if ((x & VPB_BIT_SHARED) == 0)
 			return (0);
 		if (atomic_cmpset_acq_int(&m->busy_lock, x, x + VPB_ONE_SHARER))
 			return (1);
 	}
 }
 
 /*
  *	vm_page_xunbusy_hard:
  *
  *	Called after the first try the exclusive unbusy of a page failed.
  *	It is assumed that the waiters bit is on.
  */
 void
 vm_page_xunbusy_hard(vm_page_t m)
 {
 
 	vm_page_assert_xbusied(m);
 
 	vm_page_lock(m);
 	atomic_store_rel_int(&m->busy_lock, VPB_UNBUSIED);
 	wakeup(m);
 	vm_page_unlock(m);
 }
 
 /*
  *	vm_page_flash:
  *
  *	Wakeup anyone waiting for the page.
  *	The ownership bits do not change.
  *
  *	The given page must be locked.
  */
 void
 vm_page_flash(vm_page_t m)
 {
 	u_int x;
 
 	vm_page_lock_assert(m, MA_OWNED);
 
 	for (;;) {
 		x = m->busy_lock;
 		if ((x & VPB_BIT_WAITERS) == 0)
 			return;
 		if (atomic_cmpset_int(&m->busy_lock, x,
 		    x & (~VPB_BIT_WAITERS)))
 			break;
 	}
 	wakeup(m);
 }
 
 /*
  * Keep page from being freed by the page daemon
  * much of the same effect as wiring, except much lower
  * overhead and should be used only for *very* temporary
  * holding ("wiring").
  */
 void
 vm_page_hold(vm_page_t mem)
 {
 
 	vm_page_lock_assert(mem, MA_OWNED);
         mem->hold_count++;
 }
 
 void
 vm_page_unhold(vm_page_t mem)
 {
 
 	vm_page_lock_assert(mem, MA_OWNED);
 	KASSERT(mem->hold_count >= 1, ("vm_page_unhold: hold count < 0!!!"));
 	--mem->hold_count;
 	if (mem->hold_count == 0 && (mem->flags & PG_UNHOLDFREE) != 0)
 		vm_page_free_toq(mem);
 }
 
 /*
  *	vm_page_unhold_pages:
  *
  *	Unhold each of the pages that is referenced by the given array.
  */
 void
 vm_page_unhold_pages(vm_page_t *ma, int count)
 {
 	struct mtx *mtx, *new_mtx;
 
 	mtx = NULL;
 	for (; count != 0; count--) {
 		/*
 		 * Avoid releasing and reacquiring the same page lock.
 		 */
 		new_mtx = vm_page_lockptr(*ma);
 		if (mtx != new_mtx) {
 			if (mtx != NULL)
 				mtx_unlock(mtx);
 			mtx = new_mtx;
 			mtx_lock(mtx);
 		}
 		vm_page_unhold(*ma);
 		ma++;
 	}
 	if (mtx != NULL)
 		mtx_unlock(mtx);
 }
 
 vm_page_t
 PHYS_TO_VM_PAGE(vm_paddr_t pa)
 {
 	vm_page_t m;
 
 #ifdef VM_PHYSSEG_SPARSE
 	m = vm_phys_paddr_to_vm_page(pa);
 	if (m == NULL)
 		m = vm_phys_fictitious_to_vm_page(pa);
 	return (m);
 #elif defined(VM_PHYSSEG_DENSE)
 	long pi;
 
 	pi = atop(pa);
 	if (pi >= first_page && (pi - first_page) < vm_page_array_size) {
 		m = &vm_page_array[pi - first_page];
 		return (m);
 	}
 	return (vm_phys_fictitious_to_vm_page(pa));
 #else
 #error "Either VM_PHYSSEG_DENSE or VM_PHYSSEG_SPARSE must be defined."
 #endif
 }
 
 /*
  *	vm_page_getfake:
  *
  *	Create a fictitious page with the specified physical address and
  *	memory attribute.  The memory attribute is the only the machine-
  *	dependent aspect of a fictitious page that must be initialized.
  */
 vm_page_t
 vm_page_getfake(vm_paddr_t paddr, vm_memattr_t memattr)
 {
 	vm_page_t m;
 
 	m = uma_zalloc(fakepg_zone, M_WAITOK | M_ZERO);
 	vm_page_initfake(m, paddr, memattr);
 	return (m);
 }
 
 void
 vm_page_initfake(vm_page_t m, vm_paddr_t paddr, vm_memattr_t memattr)
 {
 
 	if ((m->flags & PG_FICTITIOUS) != 0) {
 		/*
 		 * The page's memattr might have changed since the
 		 * previous initialization.  Update the pmap to the
 		 * new memattr.
 		 */
 		goto memattr;
 	}
 	m->phys_addr = paddr;
 	m->queue = PQ_NONE;
 	/* Fictitious pages don't use "segind". */
 	m->flags = PG_FICTITIOUS;
 	/* Fictitious pages don't use "order" or "pool". */
 	m->oflags = VPO_UNMANAGED;
 	m->busy_lock = VPB_SINGLE_EXCLUSIVER;
 	m->wire_count = 1;
 	pmap_page_init(m);
 memattr:
 	pmap_page_set_memattr(m, memattr);
 }
 
 /*
  *	vm_page_putfake:
  *
  *	Release a fictitious page.
  */
 void
 vm_page_putfake(vm_page_t m)
 {
 
 	KASSERT((m->oflags & VPO_UNMANAGED) != 0, ("managed %p", m));
 	KASSERT((m->flags & PG_FICTITIOUS) != 0,
 	    ("vm_page_putfake: bad page %p", m));
 	uma_zfree(fakepg_zone, m);
 }
 
 /*
  *	vm_page_updatefake:
  *
  *	Update the given fictitious page to the specified physical address and
  *	memory attribute.
  */
 void
 vm_page_updatefake(vm_page_t m, vm_paddr_t paddr, vm_memattr_t memattr)
 {
 
 	KASSERT((m->flags & PG_FICTITIOUS) != 0,
 	    ("vm_page_updatefake: bad page %p", m));
 	m->phys_addr = paddr;
 	pmap_page_set_memattr(m, memattr);
 }
 
 /*
  *	vm_page_free:
  *
  *	Free a page.
  */
 void
 vm_page_free(vm_page_t m)
 {
 
 	m->flags &= ~PG_ZERO;
 	vm_page_free_toq(m);
 }
 
 /*
  *	vm_page_free_zero:
  *
  *	Free a page to the zerod-pages queue
  */
 void
 vm_page_free_zero(vm_page_t m)
 {
 
 	m->flags |= PG_ZERO;
 	vm_page_free_toq(m);
 }
 
 /*
  * Unbusy and handle the page queueing for a page from the VOP_GETPAGES()
  * array which is not the request page.
  */
 void
 vm_page_readahead_finish(vm_page_t m)
 {
 
 	if (m->valid != 0) {
 		/*
 		 * Since the page is not the requested page, whether
 		 * it should be activated or deactivated is not
 		 * obvious.  Empirical results have shown that
 		 * deactivating the page is usually the best choice,
 		 * unless the page is wanted by another thread.
 		 */
 		vm_page_lock(m);
 		if ((m->busy_lock & VPB_BIT_WAITERS) != 0)
 			vm_page_activate(m);
 		else
 			vm_page_deactivate(m);
 		vm_page_unlock(m);
 		vm_page_xunbusy(m);
 	} else {
 		/*
 		 * Free the completely invalid page.  Such page state
 		 * occurs due to the short read operation which did
 		 * not covered our page at all, or in case when a read
 		 * error happens.
 		 */
 		vm_page_lock(m);
 		vm_page_free(m);
 		vm_page_unlock(m);
 	}
 }
 
 /*
  *	vm_page_sleep_if_busy:
  *
  *	Sleep and release the page queues lock if the page is busied.
  *	Returns TRUE if the thread slept.
  *
  *	The given page must be unlocked and object containing it must
  *	be locked.
  */
 int
 vm_page_sleep_if_busy(vm_page_t m, const char *msg)
 {
 	vm_object_t obj;
 
 	vm_page_lock_assert(m, MA_NOTOWNED);
 	VM_OBJECT_ASSERT_WLOCKED(m->object);
 
 	if (vm_page_busied(m)) {
 		/*
 		 * The page-specific object must be cached because page
 		 * identity can change during the sleep, causing the
 		 * re-lock of a different object.
 		 * It is assumed that a reference to the object is already
 		 * held by the callers.
 		 */
 		obj = m->object;
 		vm_page_lock(m);
 		VM_OBJECT_WUNLOCK(obj);
 		vm_page_busy_sleep(m, msg);
 		VM_OBJECT_WLOCK(obj);
 		return (TRUE);
 	}
 	return (FALSE);
 }
 
 /*
  *	vm_page_dirty_KBI:		[ internal use only ]
  *
  *	Set all bits in the page's dirty field.
  *
  *	The object containing the specified page must be locked if the
  *	call is made from the machine-independent layer.
  *
  *	See vm_page_clear_dirty_mask().
  *
  *	This function should only be called by vm_page_dirty().
  */
 void
 vm_page_dirty_KBI(vm_page_t m)
 {
 
 	/* These assertions refer to this operation by its public name. */
 	KASSERT((m->flags & PG_CACHED) == 0,
 	    ("vm_page_dirty: page in cache!"));
 	KASSERT(m->valid == VM_PAGE_BITS_ALL,
 	    ("vm_page_dirty: page is invalid!"));
 	m->dirty = VM_PAGE_BITS_ALL;
 }
 
 /*
  *	vm_page_insert:		[ internal use only ]
  *
  *	Inserts the given mem entry into the object and object list.
  *
  *	The object must be locked.
  */
 int
 vm_page_insert(vm_page_t m, vm_object_t object, vm_pindex_t pindex)
 {
 	vm_page_t mpred;
 
 	VM_OBJECT_ASSERT_WLOCKED(object);
 	mpred = vm_radix_lookup_le(&object->rtree, pindex);
 	return (vm_page_insert_after(m, object, pindex, mpred));
 }
 
 /*
  *	vm_page_insert_after:
  *
  *	Inserts the page "m" into the specified object at offset "pindex".
  *
  *	The page "mpred" must immediately precede the offset "pindex" within
  *	the specified object.
  *
  *	The object must be locked.
  */
 static int
 vm_page_insert_after(vm_page_t m, vm_object_t object, vm_pindex_t pindex,
     vm_page_t mpred)
 {
 	vm_pindex_t sidx;
 	vm_object_t sobj;
 	vm_page_t msucc;
 
 	VM_OBJECT_ASSERT_WLOCKED(object);
 	KASSERT(m->object == NULL,
 	    ("vm_page_insert_after: page already inserted"));
 	if (mpred != NULL) {
 		KASSERT(mpred->object == object,
 		    ("vm_page_insert_after: object doesn't contain mpred"));
 		KASSERT(mpred->pindex < pindex,
 		    ("vm_page_insert_after: mpred doesn't precede pindex"));
 		msucc = TAILQ_NEXT(mpred, listq);
 	} else
 		msucc = TAILQ_FIRST(&object->memq);
 	if (msucc != NULL)
 		KASSERT(msucc->pindex > pindex,
 		    ("vm_page_insert_after: msucc doesn't succeed pindex"));
 
 	/*
 	 * Record the object/offset pair in this page
 	 */
 	sobj = m->object;
 	sidx = m->pindex;
 	m->object = object;
 	m->pindex = pindex;
 
 	/*
 	 * Now link into the object's ordered list of backed pages.
 	 */
 	if (vm_radix_insert(&object->rtree, m)) {
 		m->object = sobj;
 		m->pindex = sidx;
 		return (1);
 	}
 	vm_page_insert_radixdone(m, object, mpred);
 	return (0);
 }
 
 /*
  *	vm_page_insert_radixdone:
  *
  *	Complete page "m" insertion into the specified object after the
  *	radix trie hooking.
  *
  *	The page "mpred" must precede the offset "m->pindex" within the
  *	specified object.
  *
  *	The object must be locked.
  */
 static void
 vm_page_insert_radixdone(vm_page_t m, vm_object_t object, vm_page_t mpred)
 {
 
 	VM_OBJECT_ASSERT_WLOCKED(object);
 	KASSERT(object != NULL && m->object == object,
 	    ("vm_page_insert_radixdone: page %p has inconsistent object", m));
 	if (mpred != NULL) {
 		KASSERT(mpred->object == object,
 		    ("vm_page_insert_after: object doesn't contain mpred"));
 		KASSERT(mpred->pindex < m->pindex,
 		    ("vm_page_insert_after: mpred doesn't precede pindex"));
 	}
 
 	if (mpred != NULL)
 		TAILQ_INSERT_AFTER(&object->memq, mpred, m, listq);
 	else
 		TAILQ_INSERT_HEAD(&object->memq, m, listq);
 
 	/*
 	 * Show that the object has one more resident page.
 	 */
 	object->resident_page_count++;
 
 	/*
 	 * Hold the vnode until the last page is released.
 	 */
 	if (object->resident_page_count == 1 && object->type == OBJT_VNODE)
 		vhold(object->handle);
 
 	/*
 	 * Since we are inserting a new and possibly dirty page,
 	 * update the object's OBJ_MIGHTBEDIRTY flag.
 	 */
 	if (pmap_page_is_write_mapped(m))
 		vm_object_set_writeable_dirty(object);
 }
 
 /*
  *	vm_page_remove:
  *
  *	Removes the given mem entry from the object/offset-page
  *	table and the object page list, but do not invalidate/terminate
  *	the backing store.
  *
  *	The object must be locked.  The page must be locked if it is managed.
  */
 void
 vm_page_remove(vm_page_t m)
 {
 	vm_object_t object;
 	boolean_t lockacq;
 
 	if ((m->oflags & VPO_UNMANAGED) == 0)
 		vm_page_lock_assert(m, MA_OWNED);
 	if ((object = m->object) == NULL)
 		return;
 	VM_OBJECT_ASSERT_WLOCKED(object);
 	if (vm_page_xbusied(m)) {
 		lockacq = FALSE;
 		if ((m->oflags & VPO_UNMANAGED) != 0 &&
 		    !mtx_owned(vm_page_lockptr(m))) {
 			lockacq = TRUE;
 			vm_page_lock(m);
 		}
 		vm_page_flash(m);
 		atomic_store_rel_int(&m->busy_lock, VPB_UNBUSIED);
 		if (lockacq)
 			vm_page_unlock(m);
 	}
 
 	/*
 	 * Now remove from the object's list of backed pages.
 	 */
 	vm_radix_remove(&object->rtree, m->pindex);
 	TAILQ_REMOVE(&object->memq, m, listq);
 
 	/*
 	 * And show that the object has one fewer resident page.
 	 */
 	object->resident_page_count--;
 
 	/*
 	 * The vnode may now be recycled.
 	 */
 	if (object->resident_page_count == 0 && object->type == OBJT_VNODE)
 		vdrop(object->handle);
 
 	m->object = NULL;
 }
 
 /*
  *	vm_page_lookup:
  *
  *	Returns the page associated with the object/offset
  *	pair specified; if none is found, NULL is returned.
  *
  *	The object must be locked.
  */
 vm_page_t
 vm_page_lookup(vm_object_t object, vm_pindex_t pindex)
 {
 
 	VM_OBJECT_ASSERT_LOCKED(object);
 	return (vm_radix_lookup(&object->rtree, pindex));
 }
 
 /*
  *	vm_page_find_least:
  *
  *	Returns the page associated with the object with least pindex
  *	greater than or equal to the parameter pindex, or NULL.
  *
  *	The object must be locked.
  */
 vm_page_t
 vm_page_find_least(vm_object_t object, vm_pindex_t pindex)
 {
 	vm_page_t m;
 
 	VM_OBJECT_ASSERT_LOCKED(object);
 	if ((m = TAILQ_FIRST(&object->memq)) != NULL && m->pindex < pindex)
 		m = vm_radix_lookup_ge(&object->rtree, pindex);
 	return (m);
 }
 
 /*
  * Returns the given page's successor (by pindex) within the object if it is
  * resident; if none is found, NULL is returned.
  *
  * The object must be locked.
  */
 vm_page_t
 vm_page_next(vm_page_t m)
 {
 	vm_page_t next;
 
 	VM_OBJECT_ASSERT_WLOCKED(m->object);
 	if ((next = TAILQ_NEXT(m, listq)) != NULL &&
 	    next->pindex != m->pindex + 1)
 		next = NULL;
 	return (next);
 }
 
 /*
  * Returns the given page's predecessor (by pindex) within the object if it is
  * resident; if none is found, NULL is returned.
  *
  * The object must be locked.
  */
 vm_page_t
 vm_page_prev(vm_page_t m)
 {
 	vm_page_t prev;
 
 	VM_OBJECT_ASSERT_WLOCKED(m->object);
 	if ((prev = TAILQ_PREV(m, pglist, listq)) != NULL &&
 	    prev->pindex != m->pindex - 1)
 		prev = NULL;
 	return (prev);
 }
 
 /*
  * Uses the page mnew as a replacement for an existing page at index
  * pindex which must be already present in the object.
  *
  * The existing page must not be on a paging queue.
  */
 vm_page_t
 vm_page_replace(vm_page_t mnew, vm_object_t object, vm_pindex_t pindex)
 {
 	vm_page_t mold, mpred;
 
 	VM_OBJECT_ASSERT_WLOCKED(object);
 
 	/*
 	 * This function mostly follows vm_page_insert() and
 	 * vm_page_remove() without the radix, object count and vnode
 	 * dance.  Double check such functions for more comments.
 	 */
 	mpred = vm_radix_lookup(&object->rtree, pindex);
 	KASSERT(mpred != NULL,
 	    ("vm_page_replace: replacing page not present with pindex"));
 	mpred = TAILQ_PREV(mpred, respgs, listq);
 	if (mpred != NULL)
 		KASSERT(mpred->pindex < pindex,
 		    ("vm_page_insert_after: mpred doesn't precede pindex"));
 
 	mnew->object = object;
 	mnew->pindex = pindex;
 	mold = vm_radix_replace(&object->rtree, mnew);
 	KASSERT(mold->queue == PQ_NONE,
 	    ("vm_page_replace: mold is on a paging queue"));
 
 	/* Detach the old page from the resident tailq. */
 	TAILQ_REMOVE(&object->memq, mold, listq);
 
 	mold->object = NULL;
 	vm_page_xunbusy(mold);
 
 	/* Insert the new page in the resident tailq. */
 	if (mpred != NULL)
 		TAILQ_INSERT_AFTER(&object->memq, mpred, mnew, listq);
 	else
 		TAILQ_INSERT_HEAD(&object->memq, mnew, listq);
 	if (pmap_page_is_write_mapped(mnew))
 		vm_object_set_writeable_dirty(object);
 	return (mold);
 }
 
 /*
  *	vm_page_rename:
  *
  *	Move the given memory entry from its
  *	current object to the specified target object/offset.
  *
  *	Note: swap associated with the page must be invalidated by the move.  We
  *	      have to do this for several reasons:  (1) we aren't freeing the
  *	      page, (2) we are dirtying the page, (3) the VM system is probably
  *	      moving the page from object A to B, and will then later move
  *	      the backing store from A to B and we can't have a conflict.
  *
  *	Note: we *always* dirty the page.  It is necessary both for the
  *	      fact that we moved it, and because we may be invalidating
  *	      swap.  If the page is on the cache, we have to deactivate it
  *	      or vm_page_dirty() will panic.  Dirty pages are not allowed
  *	      on the cache.
  *
  *	The objects must be locked.
  */
 int
 vm_page_rename(vm_page_t m, vm_object_t new_object, vm_pindex_t new_pindex)
 {
 	vm_page_t mpred;
 	vm_pindex_t opidx;
 
 	VM_OBJECT_ASSERT_WLOCKED(new_object);
 
 	mpred = vm_radix_lookup_le(&new_object->rtree, new_pindex);
 	KASSERT(mpred == NULL || mpred->pindex != new_pindex,
 	    ("vm_page_rename: pindex already renamed"));
 
 	/*
 	 * Create a custom version of vm_page_insert() which does not depend
 	 * by m_prev and can cheat on the implementation aspects of the
 	 * function.
 	 */
 	opidx = m->pindex;
 	m->pindex = new_pindex;
 	if (vm_radix_insert(&new_object->rtree, m)) {
 		m->pindex = opidx;
 		return (1);
 	}
 
 	/*
 	 * The operation cannot fail anymore.  The removal must happen before
 	 * the listq iterator is tainted.
 	 */
 	m->pindex = opidx;
 	vm_page_lock(m);
 	vm_page_remove(m);
 
 	/* Return back to the new pindex to complete vm_page_insert(). */
 	m->pindex = new_pindex;
 	m->object = new_object;
 	vm_page_unlock(m);
 	vm_page_insert_radixdone(m, new_object, mpred);
 	vm_page_dirty(m);
 	return (0);
 }
 
 /*
  *	Convert all of the given object's cached pages that have a
  *	pindex within the given range into free pages.  If the value
  *	zero is given for "end", then the range's upper bound is
  *	infinity.  If the given object is backed by a vnode and it
  *	transitions from having one or more cached pages to none, the
  *	vnode's hold count is reduced.
  */
 void
 vm_page_cache_free(vm_object_t object, vm_pindex_t start, vm_pindex_t end)
 {
 	vm_page_t m;
 	boolean_t empty;
 
 	mtx_lock(&vm_page_queue_free_mtx);
 	if (__predict_false(vm_radix_is_empty(&object->cache))) {
 		mtx_unlock(&vm_page_queue_free_mtx);
 		return;
 	}
 	while ((m = vm_radix_lookup_ge(&object->cache, start)) != NULL) {
 		if (end != 0 && m->pindex >= end)
 			break;
 		vm_radix_remove(&object->cache, m->pindex);
 		vm_page_cache_turn_free(m);
 	}
 	empty = vm_radix_is_empty(&object->cache);
 	mtx_unlock(&vm_page_queue_free_mtx);
 	if (object->type == OBJT_VNODE && empty)
 		vdrop(object->handle);
 }
 
 /*
  *	Returns the cached page that is associated with the given
  *	object and offset.  If, however, none exists, returns NULL.
  *
  *	The free page queue must be locked.
  */
 static inline vm_page_t
 vm_page_cache_lookup(vm_object_t object, vm_pindex_t pindex)
 {
 
 	mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
 	return (vm_radix_lookup(&object->cache, pindex));
 }
 
 /*
  *	Remove the given cached page from its containing object's
  *	collection of cached pages.
  *
  *	The free page queue must be locked.
  */
 static void
 vm_page_cache_remove(vm_page_t m)
 {
 
 	mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
 	KASSERT((m->flags & PG_CACHED) != 0,
 	    ("vm_page_cache_remove: page %p is not cached", m));
 	vm_radix_remove(&m->object->cache, m->pindex);
 	m->object = NULL;
 	vm_cnt.v_cache_count--;
 }
 
 /*
  *	Transfer all of the cached pages with offset greater than or
  *	equal to 'offidxstart' from the original object's cache to the
  *	new object's cache.  However, any cached pages with offset
  *	greater than or equal to the new object's size are kept in the
  *	original object.  Initially, the new object's cache must be
  *	empty.  Offset 'offidxstart' in the original object must
  *	correspond to offset zero in the new object.
  *
  *	The new object must be locked.
  */
 void
 vm_page_cache_transfer(vm_object_t orig_object, vm_pindex_t offidxstart,
     vm_object_t new_object)
 {
 	vm_page_t m;
 
 	/*
 	 * Insertion into an object's collection of cached pages
 	 * requires the object to be locked.  In contrast, removal does
 	 * not.
 	 */
 	VM_OBJECT_ASSERT_WLOCKED(new_object);
 	KASSERT(vm_radix_is_empty(&new_object->cache),
 	    ("vm_page_cache_transfer: object %p has cached pages",
 	    new_object));
 	mtx_lock(&vm_page_queue_free_mtx);
 	while ((m = vm_radix_lookup_ge(&orig_object->cache,
 	    offidxstart)) != NULL) {
 		/*
 		 * Transfer all of the pages with offset greater than or
 		 * equal to 'offidxstart' from the original object's
 		 * cache to the new object's cache.
 		 */
 		if ((m->pindex - offidxstart) >= new_object->size)
 			break;
 		vm_radix_remove(&orig_object->cache, m->pindex);
 		/* Update the page's object and offset. */
 		m->object = new_object;
 		m->pindex -= offidxstart;
 		if (vm_radix_insert(&new_object->cache, m))
 			vm_page_cache_turn_free(m);
 	}
 	mtx_unlock(&vm_page_queue_free_mtx);
 }
 
 /*
  *	Returns TRUE if a cached page is associated with the given object and
  *	offset, and FALSE otherwise.
  *
  *	The object must be locked.
  */
 boolean_t
 vm_page_is_cached(vm_object_t object, vm_pindex_t pindex)
 {
 	vm_page_t m;
 
 	/*
 	 * Insertion into an object's collection of cached pages requires the
 	 * object to be locked.  Therefore, if the object is locked and the
 	 * object's collection is empty, there is no need to acquire the free
 	 * page queues lock in order to prove that the specified page doesn't
 	 * exist.
 	 */
 	VM_OBJECT_ASSERT_WLOCKED(object);
 	if (__predict_true(vm_object_cache_is_empty(object)))
 		return (FALSE);
 	mtx_lock(&vm_page_queue_free_mtx);
 	m = vm_page_cache_lookup(object, pindex);
 	mtx_unlock(&vm_page_queue_free_mtx);
 	return (m != NULL);
 }
 
 /*
  *	vm_page_alloc:
  *
  *	Allocate and return a page that is associated with the specified
  *	object and offset pair.  By default, this page is exclusive busied.
  *
  *	The caller must always specify an allocation class.
  *
  *	allocation classes:
  *	VM_ALLOC_NORMAL		normal process request
  *	VM_ALLOC_SYSTEM		system *really* needs a page
  *	VM_ALLOC_INTERRUPT	interrupt time request
  *
  *	optional allocation flags:
  *	VM_ALLOC_COUNT(number)	the number of additional pages that the caller
  *				intends to allocate
  *	VM_ALLOC_IFCACHED	return page only if it is cached
  *	VM_ALLOC_IFNOTCACHED	return NULL, do not reactivate if the page
  *				is cached
  *	VM_ALLOC_NOBUSY		do not exclusive busy the page
  *	VM_ALLOC_NODUMP		do not include the page in a kernel core dump
  *	VM_ALLOC_NOOBJ		page is not associated with an object and
  *				should not be exclusive busy
  *	VM_ALLOC_SBUSY		shared busy the allocated page
  *	VM_ALLOC_WIRED		wire the allocated page
  *	VM_ALLOC_ZERO		prefer a zeroed page
  *
  *	This routine may not sleep.
  */
 vm_page_t
 vm_page_alloc(vm_object_t object, vm_pindex_t pindex, int req)
 {
 	struct vnode *vp = NULL;
 	vm_object_t m_object;
 	vm_page_t m, mpred;
 	int flags, req_class;
 
 	mpred = 0;	/* XXX: pacify gcc */
 	KASSERT((object != NULL) == ((req & VM_ALLOC_NOOBJ) == 0) &&
 	    (object != NULL || (req & VM_ALLOC_SBUSY) == 0) &&
 	    ((req & (VM_ALLOC_NOBUSY | VM_ALLOC_SBUSY)) !=
 	    (VM_ALLOC_NOBUSY | VM_ALLOC_SBUSY)),
 	    ("vm_page_alloc: inconsistent object(%p)/req(%x)", (void *)object,
 	    req));
 	if (object != NULL)
 		VM_OBJECT_ASSERT_WLOCKED(object);
 
 	req_class = req & VM_ALLOC_CLASS_MASK;
 
 	/*
 	 * The page daemon is allowed to dig deeper into the free page list.
 	 */
 	if (curproc == pageproc && req_class != VM_ALLOC_INTERRUPT)
 		req_class = VM_ALLOC_SYSTEM;
 
 	if (object != NULL) {
 		mpred = vm_radix_lookup_le(&object->rtree, pindex);
 		KASSERT(mpred == NULL || mpred->pindex != pindex,
 		   ("vm_page_alloc: pindex already allocated"));
 	}
 
 	/*
 	 * The page allocation request can came from consumers which already
 	 * hold the free page queue mutex, like vm_page_insert() in
 	 * vm_page_cache().
 	 */
 	mtx_lock_flags(&vm_page_queue_free_mtx, MTX_RECURSE);
 	if (vm_cnt.v_free_count + vm_cnt.v_cache_count > vm_cnt.v_free_reserved ||
 	    (req_class == VM_ALLOC_SYSTEM &&
 	    vm_cnt.v_free_count + vm_cnt.v_cache_count > vm_cnt.v_interrupt_free_min) ||
 	    (req_class == VM_ALLOC_INTERRUPT &&
 	    vm_cnt.v_free_count + vm_cnt.v_cache_count > 0)) {
 		/*
 		 * Allocate from the free queue if the number of free pages
 		 * exceeds the minimum for the request class.
 		 */
 		if (object != NULL &&
 		    (m = vm_page_cache_lookup(object, pindex)) != NULL) {
 			if ((req & VM_ALLOC_IFNOTCACHED) != 0) {
 				mtx_unlock(&vm_page_queue_free_mtx);
 				return (NULL);
 			}
 			if (vm_phys_unfree_page(m))
 				vm_phys_set_pool(VM_FREEPOOL_DEFAULT, m, 0);
 #if VM_NRESERVLEVEL > 0
 			else if (!vm_reserv_reactivate_page(m))
 #else
 			else
 #endif
 				panic("vm_page_alloc: cache page %p is missing"
 				    " from the free queue", m);
 		} else if ((req & VM_ALLOC_IFCACHED) != 0) {
 			mtx_unlock(&vm_page_queue_free_mtx);
 			return (NULL);
 #if VM_NRESERVLEVEL > 0
 		} else if (object == NULL || (object->flags & (OBJ_COLORED |
 		    OBJ_FICTITIOUS)) != OBJ_COLORED || (m =
 		    vm_reserv_alloc_page(object, pindex, mpred)) == NULL) {
 #else
 		} else {
 #endif
 			m = vm_phys_alloc_pages(object != NULL ?
 			    VM_FREEPOOL_DEFAULT : VM_FREEPOOL_DIRECT, 0);
 #if VM_NRESERVLEVEL > 0
 			if (m == NULL && vm_reserv_reclaim_inactive()) {
 				m = vm_phys_alloc_pages(object != NULL ?
 				    VM_FREEPOOL_DEFAULT : VM_FREEPOOL_DIRECT,
 				    0);
 			}
 #endif
 		}
 	} else {
 		/*
 		 * Not allocatable, give up.
 		 */
 		mtx_unlock(&vm_page_queue_free_mtx);
 		atomic_add_int(&vm_pageout_deficit,
 		    max((u_int)req >> VM_ALLOC_COUNT_SHIFT, 1));
 		pagedaemon_wakeup();
 		return (NULL);
 	}
 
 	/*
 	 *  At this point we had better have found a good page.
 	 */
 	KASSERT(m != NULL, ("vm_page_alloc: missing page"));
 	KASSERT(m->queue == PQ_NONE,
 	    ("vm_page_alloc: page %p has unexpected queue %d", m, m->queue));
 	KASSERT(m->wire_count == 0, ("vm_page_alloc: page %p is wired", m));
 	KASSERT(m->hold_count == 0, ("vm_page_alloc: page %p is held", m));
 	KASSERT(!vm_page_sbusied(m),
 	    ("vm_page_alloc: page %p is busy", m));
 	KASSERT(m->dirty == 0, ("vm_page_alloc: page %p is dirty", m));
 	KASSERT(pmap_page_get_memattr(m) == VM_MEMATTR_DEFAULT,
 	    ("vm_page_alloc: page %p has unexpected memattr %d", m,
 	    pmap_page_get_memattr(m)));
 	if ((m->flags & PG_CACHED) != 0) {
 		KASSERT((m->flags & PG_ZERO) == 0,
 		    ("vm_page_alloc: cached page %p is PG_ZERO", m));
 		KASSERT(m->valid != 0,
 		    ("vm_page_alloc: cached page %p is invalid", m));
 		if (m->object == object && m->pindex == pindex)
 			vm_cnt.v_reactivated++;
 		else
 			m->valid = 0;
 		m_object = m->object;
 		vm_page_cache_remove(m);
 		if (m_object->type == OBJT_VNODE &&
 		    vm_object_cache_is_empty(m_object))
 			vp = m_object->handle;
 	} else {
 		KASSERT(m->valid == 0,
 		    ("vm_page_alloc: free page %p is valid", m));
 		vm_phys_freecnt_adj(m, -1);
 		if ((m->flags & PG_ZERO) != 0)
 			vm_page_zero_count--;
 	}
 	mtx_unlock(&vm_page_queue_free_mtx);
 
 	/*
 	 * Initialize the page.  Only the PG_ZERO flag is inherited.
 	 */
 	flags = 0;
 	if ((req & VM_ALLOC_ZERO) != 0)
 		flags = PG_ZERO;
 	flags &= m->flags;
 	if ((req & VM_ALLOC_NODUMP) != 0)
 		flags |= PG_NODUMP;
 	m->flags = flags;
 	m->aflags = 0;
 	m->oflags = object == NULL || (object->flags & OBJ_UNMANAGED) != 0 ?
 	    VPO_UNMANAGED : 0;
 	m->busy_lock = VPB_UNBUSIED;
 	if ((req & (VM_ALLOC_NOBUSY | VM_ALLOC_NOOBJ | VM_ALLOC_SBUSY)) == 0)
 		m->busy_lock = VPB_SINGLE_EXCLUSIVER;
 	if ((req & VM_ALLOC_SBUSY) != 0)
 		m->busy_lock = VPB_SHARERS_WORD(1);
 	if (req & VM_ALLOC_WIRED) {
 		/*
 		 * The page lock is not required for wiring a page until that
 		 * page is inserted into the object.
 		 */
 		atomic_add_int(&vm_cnt.v_wire_count, 1);
 		m->wire_count = 1;
 	}
 	m->act_count = 0;
 
 	if (object != NULL) {
 		if (vm_page_insert_after(m, object, pindex, mpred)) {
 			/* See the comment below about hold count. */
 			if (vp != NULL)
 				vdrop(vp);
 			pagedaemon_wakeup();
 			if (req & VM_ALLOC_WIRED) {
 				atomic_subtract_int(&vm_cnt.v_wire_count, 1);
 				m->wire_count = 0;
 			}
 			m->object = NULL;
 			vm_page_free(m);
 			return (NULL);
 		}
 
 		/* Ignore device objects; the pager sets "memattr" for them. */
 		if (object->memattr != VM_MEMATTR_DEFAULT &&
 		    (object->flags & OBJ_FICTITIOUS) == 0)
 			pmap_page_set_memattr(m, object->memattr);
 	} else
 		m->pindex = pindex;
 
 	/*
 	 * The following call to vdrop() must come after the above call
 	 * to vm_page_insert() in case both affect the same object and
 	 * vnode.  Otherwise, the affected vnode's hold count could
 	 * temporarily become zero.
 	 */
 	if (vp != NULL)
 		vdrop(vp);
 
 	/*
 	 * Don't wakeup too often - wakeup the pageout daemon when
 	 * we would be nearly out of memory.
 	 */
 	if (vm_paging_needed())
 		pagedaemon_wakeup();
 
 	return (m);
 }
 
 static void
 vm_page_alloc_contig_vdrop(struct spglist *lst)
 {
 
 	while (!SLIST_EMPTY(lst)) {
 		vdrop((struct vnode *)SLIST_FIRST(lst)-> plinks.s.pv);
 		SLIST_REMOVE_HEAD(lst, plinks.s.ss);
 	}
 }
 
 /*
  *	vm_page_alloc_contig:
  *
  *	Allocate a contiguous set of physical pages of the given size "npages"
  *	from the free lists.  All of the physical pages must be at or above
  *	the given physical address "low" and below the given physical address
  *	"high".  The given value "alignment" determines the alignment of the
  *	first physical page in the set.  If the given value "boundary" is
  *	non-zero, then the set of physical pages cannot cross any physical
  *	address boundary that is a multiple of that value.  Both "alignment"
  *	and "boundary" must be a power of two.
  *
  *	If the specified memory attribute, "memattr", is VM_MEMATTR_DEFAULT,
  *	then the memory attribute setting for the physical pages is configured
  *	to the object's memory attribute setting.  Otherwise, the memory
  *	attribute setting for the physical pages is configured to "memattr",
  *	overriding the object's memory attribute setting.  However, if the
  *	object's memory attribute setting is not VM_MEMATTR_DEFAULT, then the
  *	memory attribute setting for the physical pages cannot be configured
  *	to VM_MEMATTR_DEFAULT.
  *
  *	The caller must always specify an allocation class.
  *
  *	allocation classes:
  *	VM_ALLOC_NORMAL		normal process request
  *	VM_ALLOC_SYSTEM		system *really* needs a page
  *	VM_ALLOC_INTERRUPT	interrupt time request
  *
  *	optional allocation flags:
  *	VM_ALLOC_NOBUSY		do not exclusive busy the page
  *	VM_ALLOC_NOOBJ		page is not associated with an object and
  *				should not be exclusive busy
  *	VM_ALLOC_SBUSY		shared busy the allocated page
  *	VM_ALLOC_WIRED		wire the allocated page
  *	VM_ALLOC_ZERO		prefer a zeroed page
  *
  *	This routine may not sleep.
  */
 vm_page_t
 vm_page_alloc_contig(vm_object_t object, vm_pindex_t pindex, int req,
     u_long npages, vm_paddr_t low, vm_paddr_t high, u_long alignment,
     vm_paddr_t boundary, vm_memattr_t memattr)
 {
 	struct vnode *drop;
 	struct spglist deferred_vdrop_list;
 	vm_page_t m, m_tmp, m_ret;
 	u_int flags;
 	int req_class;
 
 	KASSERT((object != NULL) == ((req & VM_ALLOC_NOOBJ) == 0) &&
 	    (object != NULL || (req & VM_ALLOC_SBUSY) == 0) &&
 	    ((req & (VM_ALLOC_NOBUSY | VM_ALLOC_SBUSY)) !=
 	    (VM_ALLOC_NOBUSY | VM_ALLOC_SBUSY)),
 	    ("vm_page_alloc: inconsistent object(%p)/req(%x)", (void *)object,
 	    req));
 	if (object != NULL) {
 		VM_OBJECT_ASSERT_WLOCKED(object);
 		KASSERT(object->type == OBJT_PHYS,
 		    ("vm_page_alloc_contig: object %p isn't OBJT_PHYS",
 		    object));
 	}
 	KASSERT(npages > 0, ("vm_page_alloc_contig: npages is zero"));
 	req_class = req & VM_ALLOC_CLASS_MASK;
 
 	/*
 	 * The page daemon is allowed to dig deeper into the free page list.
 	 */
 	if (curproc == pageproc && req_class != VM_ALLOC_INTERRUPT)
 		req_class = VM_ALLOC_SYSTEM;
 
 	SLIST_INIT(&deferred_vdrop_list);
 	mtx_lock(&vm_page_queue_free_mtx);
 	if (vm_cnt.v_free_count + vm_cnt.v_cache_count >= npages +
 	    vm_cnt.v_free_reserved || (req_class == VM_ALLOC_SYSTEM &&
 	    vm_cnt.v_free_count + vm_cnt.v_cache_count >= npages +
 	    vm_cnt.v_interrupt_free_min) || (req_class == VM_ALLOC_INTERRUPT &&
 	    vm_cnt.v_free_count + vm_cnt.v_cache_count >= npages)) {
 #if VM_NRESERVLEVEL > 0
 retry:
 		if (object == NULL || (object->flags & OBJ_COLORED) == 0 ||
 		    (m_ret = vm_reserv_alloc_contig(object, pindex, npages,
 		    low, high, alignment, boundary)) == NULL)
 #endif
 			m_ret = vm_phys_alloc_contig(npages, low, high,
 			    alignment, boundary);
 	} else {
 		mtx_unlock(&vm_page_queue_free_mtx);
 		atomic_add_int(&vm_pageout_deficit, npages);
 		pagedaemon_wakeup();
 		return (NULL);
 	}
 	if (m_ret != NULL)
 		for (m = m_ret; m < &m_ret[npages]; m++) {
 			drop = vm_page_alloc_init(m);
 			if (drop != NULL) {
 				/*
 				 * Enqueue the vnode for deferred vdrop().
 				 */
 				m->plinks.s.pv = drop;
 				SLIST_INSERT_HEAD(&deferred_vdrop_list, m,
 				    plinks.s.ss);
 			}
 		}
 	else {
 #if VM_NRESERVLEVEL > 0
 		if (vm_reserv_reclaim_contig(npages, low, high, alignment,
 		    boundary))
 			goto retry;
 #endif
 	}
 	mtx_unlock(&vm_page_queue_free_mtx);
 	if (m_ret == NULL)
 		return (NULL);
 
 	/*
 	 * Initialize the pages.  Only the PG_ZERO flag is inherited.
 	 */
 	flags = 0;
 	if ((req & VM_ALLOC_ZERO) != 0)
 		flags = PG_ZERO;
 	if ((req & VM_ALLOC_NODUMP) != 0)
 		flags |= PG_NODUMP;
 	if ((req & VM_ALLOC_WIRED) != 0)
 		atomic_add_int(&vm_cnt.v_wire_count, npages);
 	if (object != NULL) {
 		if (object->memattr != VM_MEMATTR_DEFAULT &&
 		    memattr == VM_MEMATTR_DEFAULT)
 			memattr = object->memattr;
 	}
 	for (m = m_ret; m < &m_ret[npages]; m++) {
 		m->aflags = 0;
 		m->flags = (m->flags | PG_NODUMP) & flags;
 		m->busy_lock = VPB_UNBUSIED;
 		if (object != NULL) {
 			if ((req & (VM_ALLOC_NOBUSY | VM_ALLOC_SBUSY)) == 0)
 				m->busy_lock = VPB_SINGLE_EXCLUSIVER;
 			if ((req & VM_ALLOC_SBUSY) != 0)
 				m->busy_lock = VPB_SHARERS_WORD(1);
 		}
 		if ((req & VM_ALLOC_WIRED) != 0)
 			m->wire_count = 1;
 		/* Unmanaged pages don't use "act_count". */
 		m->oflags = VPO_UNMANAGED;
 		if (object != NULL) {
 			if (vm_page_insert(m, object, pindex)) {
 				vm_page_alloc_contig_vdrop(
 				    &deferred_vdrop_list);
 				if (vm_paging_needed())
 					pagedaemon_wakeup();
 				if ((req & VM_ALLOC_WIRED) != 0)
 					atomic_subtract_int(&vm_cnt.v_wire_count,
 					    npages);
 				for (m_tmp = m, m = m_ret;
 				    m < &m_ret[npages]; m++) {
 					if ((req & VM_ALLOC_WIRED) != 0)
 						m->wire_count = 0;
 					if (m >= m_tmp)
 						m->object = NULL;
 					vm_page_free(m);
 				}
 				return (NULL);
 			}
 		} else
 			m->pindex = pindex;
 		if (memattr != VM_MEMATTR_DEFAULT)
 			pmap_page_set_memattr(m, memattr);
 		pindex++;
 	}
 	vm_page_alloc_contig_vdrop(&deferred_vdrop_list);
 	if (vm_paging_needed())
 		pagedaemon_wakeup();
 	return (m_ret);
 }
 
 /*
  * Initialize a page that has been freshly dequeued from a freelist.
  * The caller has to drop the vnode returned, if it is not NULL.
  *
  * This function may only be used to initialize unmanaged pages.
  *
  * To be called with vm_page_queue_free_mtx held.
  */
 static struct vnode *
 vm_page_alloc_init(vm_page_t m)
 {
 	struct vnode *drop;
 	vm_object_t m_object;
 
 	KASSERT(m->queue == PQ_NONE,
 	    ("vm_page_alloc_init: page %p has unexpected queue %d",
 	    m, m->queue));
 	KASSERT(m->wire_count == 0,
 	    ("vm_page_alloc_init: page %p is wired", m));
 	KASSERT(m->hold_count == 0,
 	    ("vm_page_alloc_init: page %p is held", m));
 	KASSERT(!vm_page_sbusied(m),
 	    ("vm_page_alloc_init: page %p is busy", m));
 	KASSERT(m->dirty == 0,
 	    ("vm_page_alloc_init: page %p is dirty", m));
 	KASSERT(pmap_page_get_memattr(m) == VM_MEMATTR_DEFAULT,
 	    ("vm_page_alloc_init: page %p has unexpected memattr %d",
 	    m, pmap_page_get_memattr(m)));
 	mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
 	drop = NULL;
 	if ((m->flags & PG_CACHED) != 0) {
 		KASSERT((m->flags & PG_ZERO) == 0,
 		    ("vm_page_alloc_init: cached page %p is PG_ZERO", m));
 		m->valid = 0;
 		m_object = m->object;
 		vm_page_cache_remove(m);
 		if (m_object->type == OBJT_VNODE &&
 		    vm_object_cache_is_empty(m_object))
 			drop = m_object->handle;
 	} else {
 		KASSERT(m->valid == 0,
 		    ("vm_page_alloc_init: free page %p is valid", m));
 		vm_phys_freecnt_adj(m, -1);
 		if ((m->flags & PG_ZERO) != 0)
 			vm_page_zero_count--;
 	}
 	return (drop);
 }
 
 /*
  * 	vm_page_alloc_freelist:
  *
  *	Allocate a physical page from the specified free page list.
  *
  *	The caller must always specify an allocation class.
  *
  *	allocation classes:
  *	VM_ALLOC_NORMAL		normal process request
  *	VM_ALLOC_SYSTEM		system *really* needs a page
  *	VM_ALLOC_INTERRUPT	interrupt time request
  *
  *	optional allocation flags:
  *	VM_ALLOC_COUNT(number)	the number of additional pages that the caller
  *				intends to allocate
  *	VM_ALLOC_WIRED		wire the allocated page
  *	VM_ALLOC_ZERO		prefer a zeroed page
  *
  *	This routine may not sleep.
  */
 vm_page_t
 vm_page_alloc_freelist(int flind, int req)
 {
 	struct vnode *drop;
 	vm_page_t m;
 	u_int flags;
 	int req_class;
 
 	req_class = req & VM_ALLOC_CLASS_MASK;
 
 	/*
 	 * The page daemon is allowed to dig deeper into the free page list.
 	 */
 	if (curproc == pageproc && req_class != VM_ALLOC_INTERRUPT)
 		req_class = VM_ALLOC_SYSTEM;
 
 	/*
 	 * Do not allocate reserved pages unless the req has asked for it.
 	 */
 	mtx_lock_flags(&vm_page_queue_free_mtx, MTX_RECURSE);
 	if (vm_cnt.v_free_count + vm_cnt.v_cache_count > vm_cnt.v_free_reserved ||
 	    (req_class == VM_ALLOC_SYSTEM &&
 	    vm_cnt.v_free_count + vm_cnt.v_cache_count > vm_cnt.v_interrupt_free_min) ||
 	    (req_class == VM_ALLOC_INTERRUPT &&
 	    vm_cnt.v_free_count + vm_cnt.v_cache_count > 0))
 		m = vm_phys_alloc_freelist_pages(flind, VM_FREEPOOL_DIRECT, 0);
 	else {
 		mtx_unlock(&vm_page_queue_free_mtx);
 		atomic_add_int(&vm_pageout_deficit,
 		    max((u_int)req >> VM_ALLOC_COUNT_SHIFT, 1));
 		pagedaemon_wakeup();
 		return (NULL);
 	}
 	if (m == NULL) {
 		mtx_unlock(&vm_page_queue_free_mtx);
 		return (NULL);
 	}
 	drop = vm_page_alloc_init(m);
 	mtx_unlock(&vm_page_queue_free_mtx);
 
 	/*
 	 * Initialize the page.  Only the PG_ZERO flag is inherited.
 	 */
 	m->aflags = 0;
 	flags = 0;
 	if ((req & VM_ALLOC_ZERO) != 0)
 		flags = PG_ZERO;
 	m->flags &= flags;
 	if ((req & VM_ALLOC_WIRED) != 0) {
 		/*
 		 * The page lock is not required for wiring a page that does
 		 * not belong to an object.
 		 */
 		atomic_add_int(&vm_cnt.v_wire_count, 1);
 		m->wire_count = 1;
 	}
 	/* Unmanaged pages don't use "act_count". */
 	m->oflags = VPO_UNMANAGED;
 	if (drop != NULL)
 		vdrop(drop);
 	if (vm_paging_needed())
 		pagedaemon_wakeup();
 	return (m);
 }
 
 /*
  *	vm_wait:	(also see VM_WAIT macro)
  *
  *	Sleep until free pages are available for allocation.
  *	- Called in various places before memory allocations.
  */
 void
 vm_wait(void)
 {
 
 	mtx_lock(&vm_page_queue_free_mtx);
 	if (curproc == pageproc) {
 		vm_pageout_pages_needed = 1;
 		msleep(&vm_pageout_pages_needed, &vm_page_queue_free_mtx,
 		    PDROP | PSWP, "VMWait", 0);
 	} else {
 		if (!vm_pages_needed) {
 			vm_pages_needed = 1;
 			wakeup(&vm_pages_needed);
 		}
 		msleep(&vm_cnt.v_free_count, &vm_page_queue_free_mtx, PDROP | PVM,
 		    "vmwait", 0);
 	}
 }
 
 /*
  *	vm_waitpfault:	(also see VM_WAITPFAULT macro)
  *
  *	Sleep until free pages are available for allocation.
  *	- Called only in vm_fault so that processes page faulting
  *	  can be easily tracked.
  *	- Sleeps at a lower priority than vm_wait() so that vm_wait()ing
  *	  processes will be able to grab memory first.  Do not change
  *	  this balance without careful testing first.
  */
 void
 vm_waitpfault(void)
 {
 
 	mtx_lock(&vm_page_queue_free_mtx);
 	if (!vm_pages_needed) {
 		vm_pages_needed = 1;
 		wakeup(&vm_pages_needed);
 	}
 	msleep(&vm_cnt.v_free_count, &vm_page_queue_free_mtx, PDROP | PUSER,
 	    "pfault", 0);
 }
 
 struct vm_pagequeue *
 vm_page_pagequeue(vm_page_t m)
 {
 
 	return (&vm_phys_domain(m)->vmd_pagequeues[m->queue]);
 }
 
 /*
  *	vm_page_dequeue:
  *
  *	Remove the given page from its current page queue.
  *
  *	The page must be locked.
  */
 void
 vm_page_dequeue(vm_page_t m)
 {
 	struct vm_pagequeue *pq;
 
 	vm_page_assert_locked(m);
 	KASSERT(m->queue < PQ_COUNT, ("vm_page_dequeue: page %p is not queued",
 	    m));
 	pq = vm_page_pagequeue(m);
 	vm_pagequeue_lock(pq);
 	m->queue = PQ_NONE;
 	TAILQ_REMOVE(&pq->pq_pl, m, plinks.q);
 	vm_pagequeue_cnt_dec(pq);
 	vm_pagequeue_unlock(pq);
 }
 
 /*
  *	vm_page_dequeue_locked:
  *
  *	Remove the given page from its current page queue.
  *
  *	The page and page queue must be locked.
  */
 void
 vm_page_dequeue_locked(vm_page_t m)
 {
 	struct vm_pagequeue *pq;
 
 	vm_page_lock_assert(m, MA_OWNED);
 	pq = vm_page_pagequeue(m);
 	vm_pagequeue_assert_locked(pq);
 	m->queue = PQ_NONE;
 	TAILQ_REMOVE(&pq->pq_pl, m, plinks.q);
 	vm_pagequeue_cnt_dec(pq);
 }
 
 /*
  *	vm_page_enqueue:
  *
  *	Add the given page to the specified page queue.
  *
  *	The page must be locked.
  */
 static void
 vm_page_enqueue(uint8_t queue, vm_page_t m)
 {
 	struct vm_pagequeue *pq;
 
 	vm_page_lock_assert(m, MA_OWNED);
 	KASSERT(queue < PQ_COUNT,
 	    ("vm_page_enqueue: invalid queue %u request for page %p",
 	    queue, m));
 	pq = &vm_phys_domain(m)->vmd_pagequeues[queue];
 	vm_pagequeue_lock(pq);
 	m->queue = queue;
 	TAILQ_INSERT_TAIL(&pq->pq_pl, m, plinks.q);
 	vm_pagequeue_cnt_inc(pq);
 	vm_pagequeue_unlock(pq);
 }
 
 /*
  *	vm_page_requeue:
  *
  *	Move the given page to the tail of its current page queue.
  *
  *	The page must be locked.
  */
 void
 vm_page_requeue(vm_page_t m)
 {
 	struct vm_pagequeue *pq;
 
 	vm_page_lock_assert(m, MA_OWNED);
 	KASSERT(m->queue != PQ_NONE,
 	    ("vm_page_requeue: page %p is not queued", m));
 	pq = vm_page_pagequeue(m);
 	vm_pagequeue_lock(pq);
 	TAILQ_REMOVE(&pq->pq_pl, m, plinks.q);
 	TAILQ_INSERT_TAIL(&pq->pq_pl, m, plinks.q);
 	vm_pagequeue_unlock(pq);
 }
 
 /*
  *	vm_page_requeue_locked:
  *
  *	Move the given page to the tail of its current page queue.
  *
  *	The page queue must be locked.
  */
 void
 vm_page_requeue_locked(vm_page_t m)
 {
 	struct vm_pagequeue *pq;
 
 	KASSERT(m->queue != PQ_NONE,
 	    ("vm_page_requeue_locked: page %p is not queued", m));
 	pq = vm_page_pagequeue(m);
 	vm_pagequeue_assert_locked(pq);
 	TAILQ_REMOVE(&pq->pq_pl, m, plinks.q);
 	TAILQ_INSERT_TAIL(&pq->pq_pl, m, plinks.q);
 }
 
 /*
  *	vm_page_activate:
  *
  *	Put the specified page on the active list (if appropriate).
  *	Ensure that act_count is at least ACT_INIT but do not otherwise
  *	mess with it.
  *
  *	The page must be locked.
  */
 void
 vm_page_activate(vm_page_t m)
 {
 	int queue;
 
 	vm_page_lock_assert(m, MA_OWNED);
 	if ((queue = m->queue) != PQ_ACTIVE) {
 		if (m->wire_count == 0 && (m->oflags & VPO_UNMANAGED) == 0) {
 			if (m->act_count < ACT_INIT)
 				m->act_count = ACT_INIT;
 			if (queue != PQ_NONE)
 				vm_page_dequeue(m);
 			vm_page_enqueue(PQ_ACTIVE, m);
 		} else
 			KASSERT(queue == PQ_NONE,
 			    ("vm_page_activate: wired page %p is queued", m));
 	} else {
 		if (m->act_count < ACT_INIT)
 			m->act_count = ACT_INIT;
 	}
 }
 
 /*
  *	vm_page_free_wakeup:
  *
  *	Helper routine for vm_page_free_toq() and vm_page_cache().  This
  *	routine is called when a page has been added to the cache or free
  *	queues.
  *
  *	The page queues must be locked.
  */
 static inline void
 vm_page_free_wakeup(void)
 {
 
 	mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
 	/*
 	 * if pageout daemon needs pages, then tell it that there are
 	 * some free.
 	 */
 	if (vm_pageout_pages_needed &&
 	    vm_cnt.v_cache_count + vm_cnt.v_free_count >= vm_cnt.v_pageout_free_min) {
 		wakeup(&vm_pageout_pages_needed);
 		vm_pageout_pages_needed = 0;
 	}
 	/*
 	 * wakeup processes that are waiting on memory if we hit a
 	 * high water mark. And wakeup scheduler process if we have
 	 * lots of memory. this process will swapin processes.
 	 */
 	if (vm_pages_needed && !vm_page_count_min()) {
 		vm_pages_needed = 0;
 		wakeup(&vm_cnt.v_free_count);
 	}
 }
 
 /*
  *	Turn a cached page into a free page, by changing its attributes.
  *	Keep the statistics up-to-date.
  *
  *	The free page queue must be locked.
  */
 static void
 vm_page_cache_turn_free(vm_page_t m)
 {
 
 	mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
 
 	m->object = NULL;
 	m->valid = 0;
 	KASSERT((m->flags & PG_CACHED) != 0,
 	    ("vm_page_cache_turn_free: page %p is not cached", m));
 	m->flags &= ~PG_CACHED;
 	vm_cnt.v_cache_count--;
 	vm_phys_freecnt_adj(m, 1);
 }
 
 /*
  *	vm_page_free_toq:
  *
  *	Returns the given page to the free list,
  *	disassociating it with any VM object.
  *
  *	The object must be locked.  The page must be locked if it is managed.
  */
 void
 vm_page_free_toq(vm_page_t m)
 {
 
 	if ((m->oflags & VPO_UNMANAGED) == 0) {
 		vm_page_lock_assert(m, MA_OWNED);
 		KASSERT(!pmap_page_is_mapped(m),
 		    ("vm_page_free_toq: freeing mapped page %p", m));
 	} else
 		KASSERT(m->queue == PQ_NONE,
 		    ("vm_page_free_toq: unmanaged page %p is queued", m));
 	PCPU_INC(cnt.v_tfree);
 
 	if (vm_page_sbusied(m))
 		panic("vm_page_free: freeing busy page %p", m);
 
 	/*
 	 * Unqueue, then remove page.  Note that we cannot destroy
 	 * the page here because we do not want to call the pager's
 	 * callback routine until after we've put the page on the
 	 * appropriate free queue.
 	 */
 	vm_page_remque(m);
 	vm_page_remove(m);
 
 	/*
 	 * If fictitious remove object association and
 	 * return, otherwise delay object association removal.
 	 */
 	if ((m->flags & PG_FICTITIOUS) != 0) {
 		return;
 	}
 
 	m->valid = 0;
 	vm_page_undirty(m);
 
 	if (m->wire_count != 0)
 		panic("vm_page_free: freeing wired page %p", m);
 	if (m->hold_count != 0) {
 		m->flags &= ~PG_ZERO;
 		KASSERT((m->flags & PG_UNHOLDFREE) == 0,
 		    ("vm_page_free: freeing PG_UNHOLDFREE page %p", m));
 		m->flags |= PG_UNHOLDFREE;
 	} else {
 		/*
 		 * Restore the default memory attribute to the page.
 		 */
 		if (pmap_page_get_memattr(m) != VM_MEMATTR_DEFAULT)
 			pmap_page_set_memattr(m, VM_MEMATTR_DEFAULT);
 
 		/*
 		 * Insert the page into the physical memory allocator's
 		 * cache/free page queues.
 		 */
 		mtx_lock(&vm_page_queue_free_mtx);
 		vm_phys_freecnt_adj(m, 1);
 #if VM_NRESERVLEVEL > 0
 		if (!vm_reserv_free_page(m))
 #else
 		if (TRUE)
 #endif
 			vm_phys_free_pages(m, 0);
 		if ((m->flags & PG_ZERO) != 0)
 			++vm_page_zero_count;
 		else
 			vm_page_zero_idle_wakeup();
 		vm_page_free_wakeup();
 		mtx_unlock(&vm_page_queue_free_mtx);
 	}
 }
 
 /*
  *	vm_page_wire:
  *
  *	Mark this page as wired down by yet
  *	another map, removing it from paging queues
  *	as necessary.
  *
  *	If the page is fictitious, then its wire count must remain one.
  *
  *	The page must be locked.
  */
 void
 vm_page_wire(vm_page_t m)
 {
 
 	/*
 	 * Only bump the wire statistics if the page is not already wired,
 	 * and only unqueue the page if it is on some queue (if it is unmanaged
 	 * it is already off the queues).
 	 */
 	vm_page_lock_assert(m, MA_OWNED);
 	if ((m->flags & PG_FICTITIOUS) != 0) {
 		KASSERT(m->wire_count == 1,
 		    ("vm_page_wire: fictitious page %p's wire count isn't one",
 		    m));
 		return;
 	}
 	if (m->wire_count == 0) {
 		KASSERT((m->oflags & VPO_UNMANAGED) == 0 ||
 		    m->queue == PQ_NONE,
 		    ("vm_page_wire: unmanaged page %p is queued", m));
 		vm_page_remque(m);
 		atomic_add_int(&vm_cnt.v_wire_count, 1);
 	}
 	m->wire_count++;
 	KASSERT(m->wire_count != 0, ("vm_page_wire: wire_count overflow m=%p", m));
 }
 
 /*
  * vm_page_unwire:
  *
  * Release one wiring of the specified page, potentially enabling it to be
  * paged again.  If paging is enabled, then the value of the parameter
  * "queue" determines the queue to which the page is added.
  *
  * However, unless the page belongs to an object, it is not enqueued because
  * it cannot be paged out.
  *
  * If a page is fictitious, then its wire count must always be one.
  *
  * A managed page must be locked.
  */
 void
 vm_page_unwire(vm_page_t m, uint8_t queue)
 {
 
 	KASSERT(queue < PQ_COUNT,
 	    ("vm_page_unwire: invalid queue %u request for page %p",
 	    queue, m));
 	if ((m->oflags & VPO_UNMANAGED) == 0)
 		vm_page_lock_assert(m, MA_OWNED);
 	if ((m->flags & PG_FICTITIOUS) != 0) {
 		KASSERT(m->wire_count == 1,
 	    ("vm_page_unwire: fictitious page %p's wire count isn't one", m));
 		return;
 	}
 	if (m->wire_count > 0) {
 		m->wire_count--;
 		if (m->wire_count == 0) {
 			atomic_subtract_int(&vm_cnt.v_wire_count, 1);
 			if ((m->oflags & VPO_UNMANAGED) != 0 ||
 			    m->object == NULL)
 				return;
 			if (queue == PQ_INACTIVE)
 				m->flags &= ~PG_WINATCFLS;
 			vm_page_enqueue(queue, m);
 		}
 	} else
 		panic("vm_page_unwire: page %p's wire count is zero", m);
 }
 
 /*
  * Move the specified page to the inactive queue.
  *
  * Many pages placed on the inactive queue should actually go
  * into the cache, but it is difficult to figure out which.  What
  * we do instead, if the inactive target is well met, is to put
  * clean pages at the head of the inactive queue instead of the tail.
  * This will cause them to be moved to the cache more quickly and
  * if not actively re-referenced, reclaimed more quickly.  If we just
  * stick these pages at the end of the inactive queue, heavy filesystem
  * meta-data accesses can cause an unnecessary paging load on memory bound
  * processes.  This optimization causes one-time-use metadata to be
  * reused more quickly.
  *
  * Normally athead is 0 resulting in LRU operation.  athead is set
  * to 1 if we want this page to be 'as if it were placed in the cache',
  * except without unmapping it from the process address space.
  *
  * The page must be locked.
  */
 static inline void
 _vm_page_deactivate(vm_page_t m, int athead)
 {
 	struct vm_pagequeue *pq;
 	int queue;
 
 	vm_page_lock_assert(m, MA_OWNED);
 
 	/*
 	 * Ignore if already inactive.
 	 */
 	if ((queue = m->queue) == PQ_INACTIVE)
 		return;
 	if (m->wire_count == 0 && (m->oflags & VPO_UNMANAGED) == 0) {
 		if (queue != PQ_NONE)
 			vm_page_dequeue(m);
 		m->flags &= ~PG_WINATCFLS;
 		pq = &vm_phys_domain(m)->vmd_pagequeues[PQ_INACTIVE];
 		vm_pagequeue_lock(pq);
 		m->queue = PQ_INACTIVE;
 		if (athead)
 			TAILQ_INSERT_HEAD(&pq->pq_pl, m, plinks.q);
 		else
 			TAILQ_INSERT_TAIL(&pq->pq_pl, m, plinks.q);
 		vm_pagequeue_cnt_inc(pq);
 		vm_pagequeue_unlock(pq);
 	}
 }
 
 /*
  * Move the specified page to the inactive queue.
  *
  * The page must be locked.
  */
 void
 vm_page_deactivate(vm_page_t m)
 {
 
 	_vm_page_deactivate(m, 0);
 }
 
 /*
  * vm_page_try_to_cache:
  *
  * Returns 0 on failure, 1 on success
  */
 int
 vm_page_try_to_cache(vm_page_t m)
 {
 
 	vm_page_lock_assert(m, MA_OWNED);
 	VM_OBJECT_ASSERT_WLOCKED(m->object);
 	if (m->dirty || m->hold_count || m->wire_count ||
 	    (m->oflags & VPO_UNMANAGED) != 0 || vm_page_busied(m))
 		return (0);
 	pmap_remove_all(m);
 	if (m->dirty)
 		return (0);
 	vm_page_cache(m);
 	return (1);
 }
 
 /*
  * vm_page_try_to_free()
  *
  *	Attempt to free the page.  If we cannot free it, we do nothing.
  *	1 is returned on success, 0 on failure.
  */
 int
 vm_page_try_to_free(vm_page_t m)
 {
 
 	vm_page_lock_assert(m, MA_OWNED);
 	if (m->object != NULL)
 		VM_OBJECT_ASSERT_WLOCKED(m->object);
 	if (m->dirty || m->hold_count || m->wire_count ||
 	    (m->oflags & VPO_UNMANAGED) != 0 || vm_page_busied(m))
 		return (0);
 	pmap_remove_all(m);
 	if (m->dirty)
 		return (0);
 	vm_page_free(m);
 	return (1);
 }
 
 /*
  * vm_page_cache
  *
  * Put the specified page onto the page cache queue (if appropriate).
  *
  * The object and page must be locked.
  */
 void
 vm_page_cache(vm_page_t m)
 {
 	vm_object_t object;
 	boolean_t cache_was_empty;
 
 	vm_page_lock_assert(m, MA_OWNED);
 	object = m->object;
 	VM_OBJECT_ASSERT_WLOCKED(object);
 	if (vm_page_busied(m) || (m->oflags & VPO_UNMANAGED) ||
 	    m->hold_count || m->wire_count)
 		panic("vm_page_cache: attempting to cache busy page");
 	KASSERT(!pmap_page_is_mapped(m),
 	    ("vm_page_cache: page %p is mapped", m));
 	KASSERT(m->dirty == 0, ("vm_page_cache: page %p is dirty", m));
 	if (m->valid == 0 || object->type == OBJT_DEFAULT ||
 	    (object->type == OBJT_SWAP &&
 	    !vm_pager_has_page(object, m->pindex, NULL, NULL))) {
 		/*
 		 * Hypothesis: A cache-eligible page belonging to a
 		 * default object or swap object but without a backing
 		 * store must be zero filled.
 		 */
 		vm_page_free(m);
 		return;
 	}
 	KASSERT((m->flags & PG_CACHED) == 0,
 	    ("vm_page_cache: page %p is already cached", m));
 
 	/*
 	 * Remove the page from the paging queues.
 	 */
 	vm_page_remque(m);
 
 	/*
 	 * Remove the page from the object's collection of resident
 	 * pages.
 	 */
 	vm_radix_remove(&object->rtree, m->pindex);
 	TAILQ_REMOVE(&object->memq, m, listq);
 	object->resident_page_count--;
 
 	/*
 	 * Restore the default memory attribute to the page.
 	 */
 	if (pmap_page_get_memattr(m) != VM_MEMATTR_DEFAULT)
 		pmap_page_set_memattr(m, VM_MEMATTR_DEFAULT);
 
 	/*
 	 * Insert the page into the object's collection of cached pages
 	 * and the physical memory allocator's cache/free page queues.
 	 */
 	m->flags &= ~PG_ZERO;
 	mtx_lock(&vm_page_queue_free_mtx);
 	cache_was_empty = vm_radix_is_empty(&object->cache);
 	if (vm_radix_insert(&object->cache, m)) {
 		mtx_unlock(&vm_page_queue_free_mtx);
 		if (object->resident_page_count == 0)
 			vdrop(object->handle);
 		m->object = NULL;
 		vm_page_free(m);
 		return;
 	}
 
 	/*
 	 * The above call to vm_radix_insert() could reclaim the one pre-
 	 * existing cached page from this object, resulting in a call to
 	 * vdrop().
 	 */
 	if (!cache_was_empty)
 		cache_was_empty = vm_radix_is_singleton(&object->cache);
 
 	m->flags |= PG_CACHED;
 	vm_cnt.v_cache_count++;
 	PCPU_INC(cnt.v_tcached);
 #if VM_NRESERVLEVEL > 0
 	if (!vm_reserv_free_page(m)) {
 #else
 	if (TRUE) {
 #endif
 		vm_phys_set_pool(VM_FREEPOOL_CACHE, m, 0);
 		vm_phys_free_pages(m, 0);
 	}
 	vm_page_free_wakeup();
 	mtx_unlock(&vm_page_queue_free_mtx);
 
 	/*
 	 * Increment the vnode's hold count if this is the object's only
 	 * cached page.  Decrement the vnode's hold count if this was
 	 * the object's only resident page.
 	 */
 	if (object->type == OBJT_VNODE) {
 		if (cache_was_empty && object->resident_page_count != 0)
 			vhold(object->handle);
 		else if (!cache_was_empty && object->resident_page_count == 0)
 			vdrop(object->handle);
 	}
 }
 
 /*
  * vm_page_advise
  *
  *	Cache, deactivate, or do nothing as appropriate.  This routine
  *	is used by madvise().
  *
  *	Generally speaking we want to move the page into the cache so
  *	it gets reused quickly.  However, this can result in a silly syndrome
  *	due to the page recycling too quickly.  Small objects will not be
  *	fully cached.  On the other hand, if we move the page to the inactive
  *	queue we wind up with a problem whereby very large objects
  *	unnecessarily blow away our inactive and cache queues.
  *
  *	The solution is to move the pages based on a fixed weighting.  We
  *	either leave them alone, deactivate them, or move them to the cache,
  *	where moving them to the cache has the highest weighting.
  *	By forcing some pages into other queues we eventually force the
  *	system to balance the queues, potentially recovering other unrelated
  *	space from active.  The idea is to not force this to happen too
  *	often.
  *
  *	The object and page must be locked.
  */
 void
 vm_page_advise(vm_page_t m, int advice)
 {
 	int dnw, head;
 
 	vm_page_assert_locked(m);
 	VM_OBJECT_ASSERT_WLOCKED(m->object);
 	if (advice == MADV_FREE) {
 		/*
 		 * Mark the page clean.  This will allow the page to be freed
 		 * up by the system.  However, such pages are often reused
 		 * quickly by malloc() so we do not do anything that would
 		 * cause a page fault if we can help it.
 		 *
 		 * Specifically, we do not try to actually free the page now
 		 * nor do we try to put it in the cache (which would cause a
 		 * page fault on reuse).
 		 *
 		 * But we do make the page is freeable as we can without
 		 * actually taking the step of unmapping it.
 		 */
 		m->dirty = 0;
 		m->act_count = 0;
 	} else if (advice != MADV_DONTNEED)
 		return;
 	dnw = PCPU_GET(dnweight);
 	PCPU_INC(dnweight);
 
 	/*
 	 * Occasionally leave the page alone.
 	 */
 	if ((dnw & 0x01F0) == 0 || m->queue == PQ_INACTIVE) {
 		if (m->act_count >= ACT_INIT)
 			--m->act_count;
 		return;
 	}
 
 	/*
 	 * Clear any references to the page.  Otherwise, the page daemon will
 	 * immediately reactivate the page.
 	 */
 	vm_page_aflag_clear(m, PGA_REFERENCED);
 
 	if (advice != MADV_FREE && m->dirty == 0 && pmap_is_modified(m))
 		vm_page_dirty(m);
 
 	if (m->dirty || (dnw & 0x0070) == 0) {
 		/*
 		 * Deactivate the page 3 times out of 32.
 		 */
 		head = 0;
 	} else {
 		/*
 		 * Cache the page 28 times out of every 32.  Note that
 		 * the page is deactivated instead of cached, but placed
 		 * at the head of the queue instead of the tail.
 		 */
 		head = 1;
 	}
 	_vm_page_deactivate(m, head);
 }
 
 /*
  * Grab a page, waiting until we are waken up due to the page
  * changing state.  We keep on waiting, if the page continues
  * to be in the object.  If the page doesn't exist, first allocate it
  * and then conditionally zero it.
  *
  * This routine may sleep.
  *
  * The object must be locked on entry.  The lock will, however, be released
  * and reacquired if the routine sleeps.
  */
 vm_page_t
 vm_page_grab(vm_object_t object, vm_pindex_t pindex, int allocflags)
 {
 	vm_page_t m;
 	int sleep;
 
 	VM_OBJECT_ASSERT_WLOCKED(object);
 	KASSERT((allocflags & VM_ALLOC_SBUSY) == 0 ||
 	    (allocflags & VM_ALLOC_IGN_SBUSY) != 0,
 	    ("vm_page_grab: VM_ALLOC_SBUSY/VM_ALLOC_IGN_SBUSY mismatch"));
 retrylookup:
 	if ((m = vm_page_lookup(object, pindex)) != NULL) {
 		sleep = (allocflags & VM_ALLOC_IGN_SBUSY) != 0 ?
 		    vm_page_xbusied(m) : vm_page_busied(m);
 		if (sleep) {
 			if ((allocflags & VM_ALLOC_NOWAIT) != 0)
 				return (NULL);
 			/*
 			 * Reference the page before unlocking and
 			 * sleeping so that the page daemon is less
 			 * likely to reclaim it.
 			 */
 			vm_page_aflag_set(m, PGA_REFERENCED);
 			vm_page_lock(m);
 			VM_OBJECT_WUNLOCK(object);
 			vm_page_busy_sleep(m, "pgrbwt");
 			VM_OBJECT_WLOCK(object);
 			goto retrylookup;
 		} else {
 			if ((allocflags & VM_ALLOC_WIRED) != 0) {
 				vm_page_lock(m);
 				vm_page_wire(m);
 				vm_page_unlock(m);
 			}
 			if ((allocflags &
 			    (VM_ALLOC_NOBUSY | VM_ALLOC_SBUSY)) == 0)
 				vm_page_xbusy(m);
 			if ((allocflags & VM_ALLOC_SBUSY) != 0)
 				vm_page_sbusy(m);
 			return (m);
 		}
 	}
 	m = vm_page_alloc(object, pindex, allocflags);
 	if (m == NULL) {
 		if ((allocflags & VM_ALLOC_NOWAIT) != 0)
 			return (NULL);
 		VM_OBJECT_WUNLOCK(object);
 		VM_WAIT;
 		VM_OBJECT_WLOCK(object);
 		goto retrylookup;
 	} else if (m->valid != 0)
 		return (m);
 	if (allocflags & VM_ALLOC_ZERO && (m->flags & PG_ZERO) == 0)
 		pmap_zero_page(m);
 	return (m);
 }
 
 /*
  * Mapping function for valid or dirty bits in a page.
  *
  * Inputs are required to range within a page.
  */
 vm_page_bits_t
 vm_page_bits(int base, int size)
 {
 	int first_bit;
 	int last_bit;
 
 	KASSERT(
 	    base + size <= PAGE_SIZE,
 	    ("vm_page_bits: illegal base/size %d/%d", base, size)
 	);
 
 	if (size == 0)		/* handle degenerate case */
 		return (0);
 
 	first_bit = base >> DEV_BSHIFT;
 	last_bit = (base + size - 1) >> DEV_BSHIFT;
 
 	return (((vm_page_bits_t)2 << last_bit) -
 	    ((vm_page_bits_t)1 << first_bit));
 }
 
 /*
  *	vm_page_set_valid_range:
  *
  *	Sets portions of a page valid.  The arguments are expected
  *	to be DEV_BSIZE aligned but if they aren't the bitmap is inclusive
  *	of any partial chunks touched by the range.  The invalid portion of
  *	such chunks will be zeroed.
  *
  *	(base + size) must be less then or equal to PAGE_SIZE.
  */
 void
 vm_page_set_valid_range(vm_page_t m, int base, int size)
 {
 	int endoff, frag;
 
 	VM_OBJECT_ASSERT_WLOCKED(m->object);
 	if (size == 0)	/* handle degenerate case */
 		return;
 
 	/*
 	 * If the base is not DEV_BSIZE aligned and the valid
 	 * bit is clear, we have to zero out a portion of the
 	 * first block.
 	 */
 	if ((frag = base & ~(DEV_BSIZE - 1)) != base &&
 	    (m->valid & (1 << (base >> DEV_BSHIFT))) == 0)
 		pmap_zero_page_area(m, frag, base - frag);
 
 	/*
 	 * If the ending offset is not DEV_BSIZE aligned and the
 	 * valid bit is clear, we have to zero out a portion of
 	 * the last block.
 	 */
 	endoff = base + size;
 	if ((frag = endoff & ~(DEV_BSIZE - 1)) != endoff &&
 	    (m->valid & (1 << (endoff >> DEV_BSHIFT))) == 0)
 		pmap_zero_page_area(m, endoff,
 		    DEV_BSIZE - (endoff & (DEV_BSIZE - 1)));
 
 	/*
 	 * Assert that no previously invalid block that is now being validated
 	 * is already dirty.
 	 */
 	KASSERT((~m->valid & vm_page_bits(base, size) & m->dirty) == 0,
 	    ("vm_page_set_valid_range: page %p is dirty", m));
 
 	/*
 	 * Set valid bits inclusive of any overlap.
 	 */
 	m->valid |= vm_page_bits(base, size);
 }
 
 /*
  * Clear the given bits from the specified page's dirty field.
  */
 static __inline void
 vm_page_clear_dirty_mask(vm_page_t m, vm_page_bits_t pagebits)
 {
 	uintptr_t addr;
 #if PAGE_SIZE < 16384
 	int shift;
 #endif
 
 	/*
 	 * If the object is locked and the page is neither exclusive busy nor
 	 * write mapped, then the page's dirty field cannot possibly be
 	 * set by a concurrent pmap operation.
 	 */
 	VM_OBJECT_ASSERT_WLOCKED(m->object);
 	if (!vm_page_xbusied(m) && !pmap_page_is_write_mapped(m))
 		m->dirty &= ~pagebits;
 	else {
 		/*
 		 * The pmap layer can call vm_page_dirty() without
 		 * holding a distinguished lock.  The combination of
 		 * the object's lock and an atomic operation suffice
 		 * to guarantee consistency of the page dirty field.
 		 *
 		 * For PAGE_SIZE == 32768 case, compiler already
 		 * properly aligns the dirty field, so no forcible
 		 * alignment is needed. Only require existence of
 		 * atomic_clear_64 when page size is 32768.
 		 */
 		addr = (uintptr_t)&m->dirty;
 #if PAGE_SIZE == 32768
 		atomic_clear_64((uint64_t *)addr, pagebits);
 #elif PAGE_SIZE == 16384
 		atomic_clear_32((uint32_t *)addr, pagebits);
 #else		/* PAGE_SIZE <= 8192 */
 		/*
 		 * Use a trick to perform a 32-bit atomic on the
 		 * containing aligned word, to not depend on the existence
 		 * of atomic_clear_{8, 16}.
 		 */
 		shift = addr & (sizeof(uint32_t) - 1);
 #if BYTE_ORDER == BIG_ENDIAN
 		shift = (sizeof(uint32_t) - sizeof(m->dirty) - shift) * NBBY;
 #else
 		shift *= NBBY;
 #endif
 		addr &= ~(sizeof(uint32_t) - 1);
 		atomic_clear_32((uint32_t *)addr, pagebits << shift);
 #endif		/* PAGE_SIZE */
 	}
 }
 
 /*
  *	vm_page_set_validclean:
  *
  *	Sets portions of a page valid and clean.  The arguments are expected
  *	to be DEV_BSIZE aligned but if they aren't the bitmap is inclusive
  *	of any partial chunks touched by the range.  The invalid portion of
  *	such chunks will be zero'd.
  *
  *	(base + size) must be less then or equal to PAGE_SIZE.
  */
 void
 vm_page_set_validclean(vm_page_t m, int base, int size)
 {
 	vm_page_bits_t oldvalid, pagebits;
 	int endoff, frag;
 
 	VM_OBJECT_ASSERT_WLOCKED(m->object);
 	if (size == 0)	/* handle degenerate case */
 		return;
 
 	/*
 	 * If the base is not DEV_BSIZE aligned and the valid
 	 * bit is clear, we have to zero out a portion of the
 	 * first block.
 	 */
 	if ((frag = base & ~(DEV_BSIZE - 1)) != base &&
 	    (m->valid & ((vm_page_bits_t)1 << (base >> DEV_BSHIFT))) == 0)
 		pmap_zero_page_area(m, frag, base - frag);
 
 	/*
 	 * If the ending offset is not DEV_BSIZE aligned and the
 	 * valid bit is clear, we have to zero out a portion of
 	 * the last block.
 	 */
 	endoff = base + size;
 	if ((frag = endoff & ~(DEV_BSIZE - 1)) != endoff &&
 	    (m->valid & ((vm_page_bits_t)1 << (endoff >> DEV_BSHIFT))) == 0)
 		pmap_zero_page_area(m, endoff,
 		    DEV_BSIZE - (endoff & (DEV_BSIZE - 1)));
 
 	/*
 	 * Set valid, clear dirty bits.  If validating the entire
 	 * page we can safely clear the pmap modify bit.  We also
 	 * use this opportunity to clear the VPO_NOSYNC flag.  If a process
 	 * takes a write fault on a MAP_NOSYNC memory area the flag will
 	 * be set again.
 	 *
 	 * We set valid bits inclusive of any overlap, but we can only
 	 * clear dirty bits for DEV_BSIZE chunks that are fully within
 	 * the range.
 	 */
 	oldvalid = m->valid;
 	pagebits = vm_page_bits(base, size);
 	m->valid |= pagebits;
 #if 0	/* NOT YET */
 	if ((frag = base & (DEV_BSIZE - 1)) != 0) {
 		frag = DEV_BSIZE - frag;
 		base += frag;
 		size -= frag;
 		if (size < 0)
 			size = 0;
 	}
 	pagebits = vm_page_bits(base, size & (DEV_BSIZE - 1));
 #endif
 	if (base == 0 && size == PAGE_SIZE) {
 		/*
 		 * The page can only be modified within the pmap if it is
 		 * mapped, and it can only be mapped if it was previously
 		 * fully valid.
 		 */
 		if (oldvalid == VM_PAGE_BITS_ALL)
 			/*
 			 * Perform the pmap_clear_modify() first.  Otherwise,
 			 * a concurrent pmap operation, such as
 			 * pmap_protect(), could clear a modification in the
 			 * pmap and set the dirty field on the page before
 			 * pmap_clear_modify() had begun and after the dirty
 			 * field was cleared here.
 			 */
 			pmap_clear_modify(m);
 		m->dirty = 0;
 		m->oflags &= ~VPO_NOSYNC;
 	} else if (oldvalid != VM_PAGE_BITS_ALL)
 		m->dirty &= ~pagebits;
 	else
 		vm_page_clear_dirty_mask(m, pagebits);
 }
 
 void
 vm_page_clear_dirty(vm_page_t m, int base, int size)
 {
 
 	vm_page_clear_dirty_mask(m, vm_page_bits(base, size));
 }
 
 /*
  *	vm_page_set_invalid:
  *
  *	Invalidates DEV_BSIZE'd chunks within a page.  Both the
  *	valid and dirty bits for the effected areas are cleared.
  */
 void
 vm_page_set_invalid(vm_page_t m, int base, int size)
 {
 	vm_page_bits_t bits;
 	vm_object_t object;
 
 	object = m->object;
 	VM_OBJECT_ASSERT_WLOCKED(object);
 	if (object->type == OBJT_VNODE && base == 0 && IDX_TO_OFF(m->pindex) +
 	    size >= object->un_pager.vnp.vnp_size)
 		bits = VM_PAGE_BITS_ALL;
 	else
 		bits = vm_page_bits(base, size);
 	if (m->valid == VM_PAGE_BITS_ALL && bits != 0)
 		pmap_remove_all(m);
 	KASSERT((bits == 0 && m->valid == VM_PAGE_BITS_ALL) ||
 	    !pmap_page_is_mapped(m),
 	    ("vm_page_set_invalid: page %p is mapped", m));
 	m->valid &= ~bits;
 	m->dirty &= ~bits;
 }
 
 /*
  * vm_page_zero_invalid()
  *
  *	The kernel assumes that the invalid portions of a page contain
  *	garbage, but such pages can be mapped into memory by user code.
  *	When this occurs, we must zero out the non-valid portions of the
  *	page so user code sees what it expects.
  *
  *	Pages are most often semi-valid when the end of a file is mapped
  *	into memory and the file's size is not page aligned.
  */
 void
 vm_page_zero_invalid(vm_page_t m, boolean_t setvalid)
 {
 	int b;
 	int i;
 
 	VM_OBJECT_ASSERT_WLOCKED(m->object);
 	/*
 	 * Scan the valid bits looking for invalid sections that
 	 * must be zerod.  Invalid sub-DEV_BSIZE'd areas ( where the
 	 * valid bit may be set ) have already been zerod by
 	 * vm_page_set_validclean().
 	 */
 	for (b = i = 0; i <= PAGE_SIZE / DEV_BSIZE; ++i) {
 		if (i == (PAGE_SIZE / DEV_BSIZE) ||
 		    (m->valid & ((vm_page_bits_t)1 << i))) {
 			if (i > b) {
 				pmap_zero_page_area(m,
 				    b << DEV_BSHIFT, (i - b) << DEV_BSHIFT);
 			}
 			b = i + 1;
 		}
 	}
 
 	/*
 	 * setvalid is TRUE when we can safely set the zero'd areas
 	 * as being valid.  We can do this if there are no cache consistancy
 	 * issues.  e.g. it is ok to do with UFS, but not ok to do with NFS.
 	 */
 	if (setvalid)
 		m->valid = VM_PAGE_BITS_ALL;
 }
 
 /*
  *	vm_page_is_valid:
  *
  *	Is (partial) page valid?  Note that the case where size == 0
  *	will return FALSE in the degenerate case where the page is
  *	entirely invalid, and TRUE otherwise.
  */
 int
 vm_page_is_valid(vm_page_t m, int base, int size)
 {
 	vm_page_bits_t bits;
 
 	VM_OBJECT_ASSERT_LOCKED(m->object);
 	bits = vm_page_bits(base, size);
 	return (m->valid != 0 && (m->valid & bits) == bits);
 }
 
 /*
  *	vm_page_ps_is_valid:
  *
  *	Returns TRUE if the entire (super)page is valid and FALSE otherwise.
  */
 boolean_t
 vm_page_ps_is_valid(vm_page_t m)
 {
 	int i, npages;
 
 	VM_OBJECT_ASSERT_LOCKED(m->object);
 	npages = atop(pagesizes[m->psind]);
 
 	/*
 	 * The physically contiguous pages that make up a superpage, i.e., a
 	 * page with a page size index ("psind") greater than zero, will
 	 * occupy adjacent entries in vm_page_array[].
 	 */
 	for (i = 0; i < npages; i++) {
 		if (m[i].valid != VM_PAGE_BITS_ALL)
 			return (FALSE);
 	}
 	return (TRUE);
 }
 
 /*
  * Set the page's dirty bits if the page is modified.
  */
 void
 vm_page_test_dirty(vm_page_t m)
 {
 
 	VM_OBJECT_ASSERT_WLOCKED(m->object);
 	if (m->dirty != VM_PAGE_BITS_ALL && pmap_is_modified(m))
 		vm_page_dirty(m);
 }
 
 void
 vm_page_lock_KBI(vm_page_t m, const char *file, int line)
 {
 
 	mtx_lock_flags_(vm_page_lockptr(m), 0, file, line);
 }
 
 void
 vm_page_unlock_KBI(vm_page_t m, const char *file, int line)
 {
 
 	mtx_unlock_flags_(vm_page_lockptr(m), 0, file, line);
 }
 
 int
 vm_page_trylock_KBI(vm_page_t m, const char *file, int line)
 {
 
 	return (mtx_trylock_flags_(vm_page_lockptr(m), 0, file, line));
 }
 
 #if defined(INVARIANTS) || defined(INVARIANT_SUPPORT)
 void
 vm_page_assert_locked_KBI(vm_page_t m, const char *file, int line)
 {
 
 	vm_page_lock_assert_KBI(m, MA_OWNED, file, line);
 }
 
 void
 vm_page_lock_assert_KBI(vm_page_t m, int a, const char *file, int line)
 {
 
 	mtx_assert_(vm_page_lockptr(m), a, file, line);
 }
 #endif
 
 #ifdef INVARIANTS
 void
 vm_page_object_lock_assert(vm_page_t m)
 {
 
 	/*
 	 * Certain of the page's fields may only be modified by the
 	 * holder of the containing object's lock or the exclusive busy.
 	 * holder.  Unfortunately, the holder of the write busy is
 	 * not recorded, and thus cannot be checked here.
 	 */
 	if (m->object != NULL && !vm_page_xbusied(m))
 		VM_OBJECT_ASSERT_WLOCKED(m->object);
 }
 
 void
 vm_page_assert_pga_writeable(vm_page_t m, uint8_t bits)
 {
 
 	if ((bits & PGA_WRITEABLE) == 0)
 		return;
 
 	/*
 	 * The PGA_WRITEABLE flag can only be set if the page is
 	 * managed, is exclusively busied or the object is locked.
 	 * Currently, this flag is only set by pmap_enter().
 	 */
 	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
 	    ("PGA_WRITEABLE on unmanaged page"));
 	if (!vm_page_xbusied(m))
 		VM_OBJECT_ASSERT_LOCKED(m->object);
 }
 #endif
 
 #include "opt_ddb.h"
 #ifdef DDB
 #include <sys/kernel.h>
 
 #include <ddb/ddb.h>
 
 DB_SHOW_COMMAND(page, vm_page_print_page_info)
 {
 	db_printf("vm_cnt.v_free_count: %d\n", vm_cnt.v_free_count);
 	db_printf("vm_cnt.v_cache_count: %d\n", vm_cnt.v_cache_count);
 	db_printf("vm_cnt.v_inactive_count: %d\n", vm_cnt.v_inactive_count);
 	db_printf("vm_cnt.v_active_count: %d\n", vm_cnt.v_active_count);
 	db_printf("vm_cnt.v_wire_count: %d\n", vm_cnt.v_wire_count);
 	db_printf("vm_cnt.v_free_reserved: %d\n", vm_cnt.v_free_reserved);
 	db_printf("vm_cnt.v_free_min: %d\n", vm_cnt.v_free_min);
 	db_printf("vm_cnt.v_free_target: %d\n", vm_cnt.v_free_target);
 	db_printf("vm_cnt.v_cache_min: %d\n", vm_cnt.v_cache_min);
 	db_printf("vm_cnt.v_inactive_target: %d\n", vm_cnt.v_inactive_target);
 }
 
 DB_SHOW_COMMAND(pageq, vm_page_print_pageq_info)
 {
 	int dom;
 
 	db_printf("pq_free %d pq_cache %d\n",
 	    vm_cnt.v_free_count, vm_cnt.v_cache_count);
 	for (dom = 0; dom < vm_ndomains; dom++) {
 		db_printf(
 	"dom %d page_cnt %d free %d pq_act %d pq_inact %d pass %d\n",
 		    dom,
 		    vm_dom[dom].vmd_page_count,
 		    vm_dom[dom].vmd_free_count,
 		    vm_dom[dom].vmd_pagequeues[PQ_ACTIVE].pq_cnt,
 		    vm_dom[dom].vmd_pagequeues[PQ_INACTIVE].pq_cnt,
 		    vm_dom[dom].vmd_pass);
 	}
 }
 
 DB_SHOW_COMMAND(pginfo, vm_page_print_pginfo)
 {
 	vm_page_t m;
 	boolean_t phys;
 
 	if (!have_addr) {
 		db_printf("show pginfo addr\n");
 		return;
 	}
 
 	phys = strchr(modif, 'p') != NULL;
 	if (phys)
 		m = PHYS_TO_VM_PAGE(addr);
 	else
 		m = (vm_page_t)addr;
 	db_printf(
     "page %p obj %p pidx 0x%jx phys 0x%jx q %d hold %d wire %d\n"
     "  af 0x%x of 0x%x f 0x%x act %d busy %x valid 0x%x dirty 0x%x\n",
 	    m, m->object, (uintmax_t)m->pindex, (uintmax_t)m->phys_addr,
 	    m->queue, m->hold_count, m->wire_count, m->aflags, m->oflags,
 	    m->flags, m->act_count, m->busy_lock, m->valid, m->dirty);
 }
 #endif /* DDB */
Index: head/sys/x86/include/segments.h
===================================================================
--- head/sys/x86/include/segments.h	(revision 282273)
+++ head/sys/x86/include/segments.h	(revision 282274)
@@ -1,287 +1,279 @@
 /*-
  * Copyright (c) 1989, 1990 William F. Jolitz
  * Copyright (c) 1990 The Regents of the University of California.
  * All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * William Jolitz.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	from: @(#)segments.h	7.1 (Berkeley) 5/9/91
  * $FreeBSD$
  */
 
 #ifndef _X86_SEGMENTS_H_
 #define	_X86_SEGMENTS_H_
 
 /*
  * X86 Segmentation Data Structures and definitions
  */
 
 /*
  * Selectors
  */
 #define	SEL_RPL_MASK	3		/* requester priv level */
 #define	ISPL(s)		((s)&3)		/* priority level of a selector */
-#ifdef XEN
-#define	SEL_KPL		1		/* kernel priority level */
-#else
 #define	SEL_KPL		0		/* kernel priority level */
-#endif
 #define	SEL_UPL		3		/* user priority level */
 #define	ISLDT(s)	((s)&SEL_LDT)	/* is it local or global */
 #define	SEL_LDT		4		/* local descriptor table */
 #define	IDXSEL(s)	(((s)>>3) & 0x1fff) /* index of selector */
 #define	LSEL(s,r)	(((s)<<3) | SEL_LDT | r) /* a local selector */
 #define	GSEL(s,r)	(((s)<<3) | r)	/* a global selector */
 
 /*
  * User segment descriptors (%cs, %ds etc for i386 apps. 64 bit wide)
  * For long-mode apps, %cs only has the conforming bit in sd_type, the sd_dpl,
  * sd_p, sd_l and sd_def32 which must be zero).  %ds only has sd_p.
  */
 struct segment_descriptor {
 	unsigned sd_lolimit:16;		/* segment extent (lsb) */
 	unsigned sd_lobase:24;		/* segment base address (lsb) */
 	unsigned sd_type:5;		/* segment type */
 	unsigned sd_dpl:2;		/* segment descriptor priority level */
 	unsigned sd_p:1;		/* segment descriptor present */
 	unsigned sd_hilimit:4;		/* segment extent (msb) */
 	unsigned sd_xx:2;		/* unused */
 	unsigned sd_def32:1;		/* default 32 vs 16 bit size */
 	unsigned sd_gran:1;		/* limit granularity (byte/page units)*/
 	unsigned sd_hibase:8;		/* segment base address  (msb) */
 } __packed;
 
 struct user_segment_descriptor {
 	unsigned sd_lolimit:16;		/* segment extent (lsb) */
 	unsigned sd_lobase:24;		/* segment base address (lsb) */
 	unsigned sd_type:5;		/* segment type */
 	unsigned sd_dpl:2;		/* segment descriptor priority level */
 	unsigned sd_p:1;		/* segment descriptor present */
 	unsigned sd_hilimit:4;		/* segment extent (msb) */
 	unsigned sd_xx:1;		/* unused */
 	unsigned sd_long:1;		/* long mode (cs only) */
 	unsigned sd_def32:1;		/* default 32 vs 16 bit size */
 	unsigned sd_gran:1;		/* limit granularity (byte/page units)*/
 	unsigned sd_hibase:8;		/* segment base address  (msb) */
 } __packed;
 
 #define	USD_GETBASE(sd)		(((sd)->sd_lobase) | (sd)->sd_hibase << 24)
 #define	USD_SETBASE(sd, b)	(sd)->sd_lobase = (b);	\
 				(sd)->sd_hibase = ((b) >> 24);
 #define	USD_GETLIMIT(sd)	(((sd)->sd_lolimit) | (sd)->sd_hilimit << 16)
 #define	USD_SETLIMIT(sd, l)	(sd)->sd_lolimit = (l);	\
 				(sd)->sd_hilimit = ((l) >> 16);
 
 #ifdef __i386__
 /*
  * Gate descriptors (e.g. indirect descriptors)
  */
 struct gate_descriptor {
 	unsigned gd_looffset:16;	/* gate offset (lsb) */
 	unsigned gd_selector:16;	/* gate segment selector */
 	unsigned gd_stkcpy:5;		/* number of stack wds to cpy */
 	unsigned gd_xx:3;		/* unused */
 	unsigned gd_type:5;		/* segment type */
 	unsigned gd_dpl:2;		/* segment descriptor priority level */
 	unsigned gd_p:1;		/* segment descriptor present */
 	unsigned gd_hioffset:16;	/* gate offset (msb) */
 } __packed;
 
 /*
  * Generic descriptor
  */
 union descriptor {
 	struct segment_descriptor sd;
 	struct gate_descriptor gd;
 };
 #else
 /*
  * Gate descriptors (e.g. indirect descriptors, trap, interrupt etc. 128 bit)
  * Only interrupt and trap gates have gd_ist.
  */
 struct gate_descriptor {
 	uint64_t gd_looffset:16;	/* gate offset (lsb) */
 	uint64_t gd_selector:16;	/* gate segment selector */
 	uint64_t gd_ist:3;		/* IST table index */
 	uint64_t gd_xx:5;		/* unused */
 	uint64_t gd_type:5;		/* segment type */
 	uint64_t gd_dpl:2;		/* segment descriptor priority level */
 	uint64_t gd_p:1;		/* segment descriptor present */
 	uint64_t gd_hioffset:48;	/* gate offset (msb) */
 	uint64_t sd_xx1:32;
 } __packed;
 
 /*
  * Generic descriptor
  */
 union descriptor {
 	struct user_segment_descriptor sd;
 	struct gate_descriptor gd;
 };
 #endif
 
 	/* system segments and gate types */
 #define	SDT_SYSNULL	 0	/* system null */
 #define	SDT_SYS286TSS	 1	/* system 286 TSS available */
 #define	SDT_SYSLDT	 2	/* system local descriptor table */
 #define	SDT_SYS286BSY	 3	/* system 286 TSS busy */
 #define	SDT_SYS286CGT	 4	/* system 286 call gate */
 #define	SDT_SYSTASKGT	 5	/* system task gate */
 #define	SDT_SYS286IGT	 6	/* system 286 interrupt gate */
 #define	SDT_SYS286TGT	 7	/* system 286 trap gate */
 #define	SDT_SYSNULL2	 8	/* system null again */
 #define	SDT_SYS386TSS	 9	/* system 386 TSS available */
 #define	SDT_SYSTSS	 9	/* system available 64 bit TSS */
 #define	SDT_SYSNULL3	10	/* system null again */
 #define	SDT_SYS386BSY	11	/* system 386 TSS busy */
 #define	SDT_SYSBSY	11	/* system busy 64 bit TSS */
 #define	SDT_SYS386CGT	12	/* system 386 call gate */
 #define	SDT_SYSCGT	12	/* system 64 bit call gate */
 #define	SDT_SYSNULL4	13	/* system null again */
 #define	SDT_SYS386IGT	14	/* system 386 interrupt gate */
 #define	SDT_SYSIGT	14	/* system 64 bit interrupt gate */
 #define	SDT_SYS386TGT	15	/* system 386 trap gate */
 #define	SDT_SYSTGT	15	/* system 64 bit trap gate */
 
 	/* memory segment types */
 #define	SDT_MEMRO	16	/* memory read only */
 #define	SDT_MEMROA	17	/* memory read only accessed */
 #define	SDT_MEMRW	18	/* memory read write */
 #define	SDT_MEMRWA	19	/* memory read write accessed */
 #define	SDT_MEMROD	20	/* memory read only expand dwn limit */
 #define	SDT_MEMRODA	21	/* memory read only expand dwn limit accessed */
 #define	SDT_MEMRWD	22	/* memory read write expand dwn limit */
 #define	SDT_MEMRWDA	23	/* memory read write expand dwn limit accessed*/
 #define	SDT_MEME	24	/* memory execute only */
 #define	SDT_MEMEA	25	/* memory execute only accessed */
 #define	SDT_MEMER	26	/* memory execute read */
 #define	SDT_MEMERA	27	/* memory execute read accessed */
 #define	SDT_MEMEC	28	/* memory execute only conforming */
 #define	SDT_MEMEAC	29	/* memory execute only accessed conforming */
 #define	SDT_MEMERC	30	/* memory execute read conforming */
 #define	SDT_MEMERAC	31	/* memory execute read accessed conforming */
 
 /*
  * Size of IDT table
  */
 #define	NIDT		256	/* 32 reserved, 0x80 syscall, most are h/w */
 #define	NRSVIDT		32	/* reserved entries for cpu exceptions */
 
 /*
  * Entries in the Interrupt Descriptor Table (IDT)
  */
 #define	IDT_DE		0	/* #DE: Divide Error */
 #define	IDT_DB		1	/* #DB: Debug */
 #define	IDT_NMI		2	/* Nonmaskable External Interrupt */
 #define	IDT_BP		3	/* #BP: Breakpoint */
 #define	IDT_OF		4	/* #OF: Overflow */
 #define	IDT_BR		5	/* #BR: Bound Range Exceeded */
 #define	IDT_UD		6	/* #UD: Undefined/Invalid Opcode */
 #define	IDT_NM		7	/* #NM: No Math Coprocessor */
 #define	IDT_DF		8	/* #DF: Double Fault */
 #define	IDT_FPUGP	9	/* Coprocessor Segment Overrun */
 #define	IDT_TS		10	/* #TS: Invalid TSS */
 #define	IDT_NP		11	/* #NP: Segment Not Present */
 #define	IDT_SS		12	/* #SS: Stack Segment Fault */
 #define	IDT_GP		13	/* #GP: General Protection Fault */
 #define	IDT_PF		14	/* #PF: Page Fault */
 #define	IDT_MF		16	/* #MF: FPU Floating-Point Error */
 #define	IDT_AC		17	/* #AC: Alignment Check */
 #define	IDT_MC		18	/* #MC: Machine Check */
 #define	IDT_XF		19	/* #XF: SIMD Floating-Point Exception */
 #define	IDT_IO_INTS	NRSVIDT	/* Base of IDT entries for I/O interrupts. */
 #define	IDT_SYSCALL	0x80	/* System Call Interrupt Vector */
 #define	IDT_DTRACE_RET	0x92	/* DTrace pid provider Interrupt Vector */
 #define	IDT_EVTCHN	0x93	/* Xen HVM Event Channel Interrupt Vector */
 
 #if defined(__i386__)
 /*
  * Entries in the Global Descriptor Table (GDT)
  * Note that each 4 entries share a single 32 byte L1 cache line.
  * Some of the fast syscall instructions require a specific order here.
  */
 #define	GNULL_SEL	0	/* Null Descriptor */
 #define	GPRIV_SEL	1	/* SMP Per-Processor Private Data */
 #define	GUFS_SEL	2	/* User %fs Descriptor (order critical: 1) */
 #define	GUGS_SEL	3	/* User %gs Descriptor (order critical: 2) */
 #define	GCODE_SEL	4	/* Kernel Code Descriptor (order critical: 1) */
 #define	GDATA_SEL	5	/* Kernel Data Descriptor (order critical: 2) */
 #define	GUCODE_SEL	6	/* User Code Descriptor (order critical: 3) */
 #define	GUDATA_SEL	7	/* User Data Descriptor (order critical: 4) */
 #define	GBIOSLOWMEM_SEL	8	/* BIOS low memory access (must be entry 8) */
 #define	GPROC0_SEL	9	/* Task state process slot zero and up */
 #define	GLDT_SEL	10	/* Default User LDT */
 #define	GUSERLDT_SEL	11	/* User LDT */
 #define	GPANIC_SEL	12	/* Task state to consider panic from */
 #define	GBIOSCODE32_SEL	13	/* BIOS interface (32bit Code) */
 #define	GBIOSCODE16_SEL	14	/* BIOS interface (16bit Code) */
 #define	GBIOSDATA_SEL	15	/* BIOS interface (Data) */
 #define	GBIOSUTIL_SEL	16	/* BIOS interface (Utility) */
 #define	GBIOSARGS_SEL	17	/* BIOS interface (Arguments) */
 #define	GNDIS_SEL	18	/* For the NDIS layer */
-#ifdef XEN
-#define	NGDT		9
-#else
 #define	NGDT		19
-#endif
 
 /*
  * Entries in the Local Descriptor Table (LDT)
  */
 #define	LSYS5CALLS_SEL	0	/* forced by intel BCS */
 #define	LSYS5SIGR_SEL	1
 #define	L43BSDCALLS_SEL	2	/* notyet */
 #define	LUCODE_SEL	3
 #define	LSOL26CALLS_SEL	4	/* Solaris >= 2.6 system call gate */
 #define	LUDATA_SEL	5
 /* separate stack, es,fs,gs sels ? */
 /* #define	LPOSIXCALLS_SEL	5*/	/* notyet */
 #define	LBSDICALLS_SEL	16	/* BSDI system call gate */
 #define	NLDT		(LBSDICALLS_SEL + 1)
 
 #else /* !__i386__ */
 /*
  * Entries in the Global Descriptor Table (GDT)
  */
 #define	GNULL_SEL	0	/* Null Descriptor */
 #define	GNULL2_SEL	1	/* Null Descriptor */
 #define	GUFS32_SEL	2	/* User 32 bit %fs Descriptor */
 #define	GUGS32_SEL	3	/* User 32 bit %gs Descriptor */
 #define	GCODE_SEL	4	/* Kernel Code Descriptor */
 #define	GDATA_SEL	5	/* Kernel Data Descriptor */
 #define	GUCODE32_SEL	6	/* User 32 bit code Descriptor */
 #define	GUDATA_SEL	7	/* User 32/64 bit Data Descriptor */
 #define	GUCODE_SEL	8	/* User 64 bit Code Descriptor */
 #define	GPROC0_SEL	9	/* TSS for entering kernel etc */
 /* slot 10 is second half of GPROC0_SEL */
 #define	GUSERLDT_SEL	11	/* LDT */
 /* slot 12 is second half of GUSERLDT_SEL */
 #define	NGDT 		13
 #endif /* __i386__ */
 
 #endif /* !_X86_SEGMENTS_H_ */
Index: head/sys/x86/x86/busdma_bounce.c
===================================================================
--- head/sys/x86/x86/busdma_bounce.c	(revision 282273)
+++ head/sys/x86/x86/busdma_bounce.c	(revision 282274)
@@ -1,1078 +1,1073 @@
 /*-
  * Copyright (c) 1997, 1998 Justin T. Gibbs.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions, and the following disclaimer,
  *    without modification, immediately at the beginning of the file.
  * 2. The name of the author may not be used to endorse or promote products
  *    derived from this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/malloc.h>
 #include <sys/bus.h>
 #include <sys/interrupt.h>
 #include <sys/kernel.h>
 #include <sys/ktr.h>
 #include <sys/lock.h>
 #include <sys/proc.h>
 #include <sys/memdesc.h>
 #include <sys/mutex.h>
 #include <sys/sysctl.h>
 #include <sys/uio.h>
 
 #include <vm/vm.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_page.h>
 #include <vm/vm_map.h>
 
 #include <machine/atomic.h>
 #include <machine/bus.h>
 #include <machine/md_var.h>
 #include <machine/specialreg.h>
 #include <x86/include/busdma_impl.h>
 
 #ifdef __i386__
 #define MAX_BPAGES 512
 #else
 #define MAX_BPAGES 8192
 #endif
 
 enum {
 	BUS_DMA_COULD_BOUNCE	= 0x01,
 	BUS_DMA_MIN_ALLOC_COMP	= 0x02,
 	BUS_DMA_KMEM_ALLOC	= 0x04,
 };
 
 struct bounce_zone;
 
 struct bus_dma_tag {
 	struct bus_dma_tag_common common;
 	int			map_count;
 	int			bounce_flags;
 	bus_dma_segment_t	*segments;
 	struct bounce_zone	*bounce_zone;
 };
 
 struct bounce_page {
 	vm_offset_t	vaddr;		/* kva of bounce buffer */
 	bus_addr_t	busaddr;	/* Physical address */
 	vm_offset_t	datavaddr;	/* kva of client data */
 	bus_addr_t	dataaddr;	/* client physical address */
 	bus_size_t	datacount;	/* client data count */
 	STAILQ_ENTRY(bounce_page) links;
 };
 
 int busdma_swi_pending;
 
 struct bounce_zone {
 	STAILQ_ENTRY(bounce_zone) links;
 	STAILQ_HEAD(bp_list, bounce_page) bounce_page_list;
 	int		total_bpages;
 	int		free_bpages;
 	int		reserved_bpages;
 	int		active_bpages;
 	int		total_bounced;
 	int		total_deferred;
 	int		map_count;
 	bus_size_t	alignment;
 	bus_addr_t	lowaddr;
 	char		zoneid[8];
 	char		lowaddrid[20];
 	struct sysctl_ctx_list sysctl_tree;
 	struct sysctl_oid *sysctl_tree_top;
 };
 
 static struct mtx bounce_lock;
 static int total_bpages;
 static int busdma_zonecount;
 static STAILQ_HEAD(, bounce_zone) bounce_zone_list;
 
 static SYSCTL_NODE(_hw, OID_AUTO, busdma, CTLFLAG_RD, 0, "Busdma parameters");
 SYSCTL_INT(_hw_busdma, OID_AUTO, total_bpages, CTLFLAG_RD, &total_bpages, 0,
 	   "Total bounce pages");
 
 struct bus_dmamap {
 	struct bp_list	       bpages;
 	int		       pagesneeded;
 	int		       pagesreserved;
 	bus_dma_tag_t	       dmat;
 	struct memdesc	       mem;
 	bus_dmamap_callback_t *callback;
 	void		      *callback_arg;
 	STAILQ_ENTRY(bus_dmamap) links;
 };
 
 static STAILQ_HEAD(, bus_dmamap) bounce_map_waitinglist;
 static STAILQ_HEAD(, bus_dmamap) bounce_map_callbacklist;
 static struct bus_dmamap nobounce_dmamap;
 
 static void init_bounce_pages(void *dummy);
 static int alloc_bounce_zone(bus_dma_tag_t dmat);
 static int alloc_bounce_pages(bus_dma_tag_t dmat, u_int numpages);
 static int reserve_bounce_pages(bus_dma_tag_t dmat, bus_dmamap_t map,
 				int commit);
 static bus_addr_t add_bounce_page(bus_dma_tag_t dmat, bus_dmamap_t map,
 				  vm_offset_t vaddr, bus_addr_t addr,
 				  bus_size_t size);
 static void free_bounce_page(bus_dma_tag_t dmat, struct bounce_page *bpage);
 int run_filter(bus_dma_tag_t dmat, bus_addr_t paddr);
 static void _bus_dmamap_count_pages(bus_dma_tag_t dmat, bus_dmamap_t map,
 				    pmap_t pmap, void *buf, bus_size_t buflen,
 				    int flags);
 static void _bus_dmamap_count_phys(bus_dma_tag_t dmat, bus_dmamap_t map,
 				   vm_paddr_t buf, bus_size_t buflen,
 				   int flags);
 static int _bus_dmamap_reserve_pages(bus_dma_tag_t dmat, bus_dmamap_t map,
 				     int flags);
 
-#ifdef XEN
-#undef pmap_kextract
-#define pmap_kextract pmap_kextract_ma
-#endif
-
 /*
  * Allocate a device specific dma_tag.
  */
 static int
 bounce_bus_dma_tag_create(bus_dma_tag_t parent, bus_size_t alignment,
     bus_addr_t boundary, bus_addr_t lowaddr, bus_addr_t highaddr,
     bus_dma_filter_t *filter, void *filterarg, bus_size_t maxsize,
     int nsegments, bus_size_t maxsegsz, int flags, bus_dma_lock_t *lockfunc,
     void *lockfuncarg, bus_dma_tag_t *dmat)
 {
 	bus_dma_tag_t newtag;
 	int error;
 
 	*dmat = NULL;
 	error = common_bus_dma_tag_create(parent != NULL ? &parent->common :
 	    NULL, alignment, boundary, lowaddr, highaddr, filter, filterarg,
 	    maxsize, nsegments, maxsegsz, flags, lockfunc, lockfuncarg,
 	    sizeof (struct bus_dma_tag), (void **)&newtag);
 	if (error != 0)
 		return (error);
 
 	newtag->common.impl = &bus_dma_bounce_impl;
 	newtag->map_count = 0;
 	newtag->segments = NULL;
 
 	if (parent != NULL && ((newtag->common.filter != NULL) ||
 	    ((parent->bounce_flags & BUS_DMA_COULD_BOUNCE) != 0)))
 		newtag->bounce_flags |= BUS_DMA_COULD_BOUNCE;
 
 	if (newtag->common.lowaddr < ptoa((vm_paddr_t)Maxmem) ||
 	    newtag->common.alignment > 1)
 		newtag->bounce_flags |= BUS_DMA_COULD_BOUNCE;
 
 	if (((newtag->bounce_flags & BUS_DMA_COULD_BOUNCE) != 0) &&
 	    (flags & BUS_DMA_ALLOCNOW) != 0) {
 		struct bounce_zone *bz;
 
 		/* Must bounce */
 		if ((error = alloc_bounce_zone(newtag)) != 0) {
 			free(newtag, M_DEVBUF);
 			return (error);
 		}
 		bz = newtag->bounce_zone;
 
 		if (ptoa(bz->total_bpages) < maxsize) {
 			int pages;
 
 			pages = atop(maxsize) - bz->total_bpages;
 
 			/* Add pages to our bounce pool */
 			if (alloc_bounce_pages(newtag, pages) < pages)
 				error = ENOMEM;
 		}
 		/* Performed initial allocation */
 		newtag->bounce_flags |= BUS_DMA_MIN_ALLOC_COMP;
 	} else
 		error = 0;
 	
 	if (error != 0)
 		free(newtag, M_DEVBUF);
 	else
 		*dmat = newtag;
 	CTR4(KTR_BUSDMA, "%s returned tag %p tag flags 0x%x error %d",
 	    __func__, newtag, (newtag != NULL ? newtag->common.flags : 0),
 	    error);
 	return (error);
 }
 
 static int
 bounce_bus_dma_tag_destroy(bus_dma_tag_t dmat)
 {
 	bus_dma_tag_t dmat_copy, parent;
 	int error;
 
 	error = 0;
 	dmat_copy = dmat;
 
 	if (dmat != NULL) {
 		if (dmat->map_count != 0) {
 			error = EBUSY;
 			goto out;
 		}
 		while (dmat != NULL) {
 			parent = (bus_dma_tag_t)dmat->common.parent;
 			atomic_subtract_int(&dmat->common.ref_count, 1);
 			if (dmat->common.ref_count == 0) {
 				if (dmat->segments != NULL)
 					free(dmat->segments, M_DEVBUF);
 				free(dmat, M_DEVBUF);
 				/*
 				 * Last reference count, so
 				 * release our reference
 				 * count on our parent.
 				 */
 				dmat = parent;
 			} else
 				dmat = NULL;
 		}
 	}
 out:
 	CTR3(KTR_BUSDMA, "%s tag %p error %d", __func__, dmat_copy, error);
 	return (error);
 }
 
 /*
  * Allocate a handle for mapping from kva/uva/physical
  * address space into bus device space.
  */
 static int
 bounce_bus_dmamap_create(bus_dma_tag_t dmat, int flags, bus_dmamap_t *mapp)
 {
 	struct bounce_zone *bz;
 	int error, maxpages, pages;
 
 	error = 0;
 
 	if (dmat->segments == NULL) {
 		dmat->segments = (bus_dma_segment_t *)malloc(
 		    sizeof(bus_dma_segment_t) * dmat->common.nsegments,
 		    M_DEVBUF, M_NOWAIT);
 		if (dmat->segments == NULL) {
 			CTR3(KTR_BUSDMA, "%s: tag %p error %d",
 			    __func__, dmat, ENOMEM);
 			return (ENOMEM);
 		}
 	}
 
 	/*
 	 * Bouncing might be required if the driver asks for an active
 	 * exclusion region, a data alignment that is stricter than 1, and/or
 	 * an active address boundary.
 	 */
 	if (dmat->bounce_flags & BUS_DMA_COULD_BOUNCE) {
 		/* Must bounce */
 		if (dmat->bounce_zone == NULL) {
 			if ((error = alloc_bounce_zone(dmat)) != 0)
 				return (error);
 		}
 		bz = dmat->bounce_zone;
 
 		*mapp = (bus_dmamap_t)malloc(sizeof(**mapp), M_DEVBUF,
 		    M_NOWAIT | M_ZERO);
 		if (*mapp == NULL) {
 			CTR3(KTR_BUSDMA, "%s: tag %p error %d",
 			    __func__, dmat, ENOMEM);
 			return (ENOMEM);
 		}
 
 		/* Initialize the new map */
 		STAILQ_INIT(&((*mapp)->bpages));
 
 		/*
 		 * Attempt to add pages to our pool on a per-instance
 		 * basis up to a sane limit.
 		 */
 		if (dmat->common.alignment > 1)
 			maxpages = MAX_BPAGES;
 		else
 			maxpages = MIN(MAX_BPAGES, Maxmem -
 			    atop(dmat->common.lowaddr));
 		if ((dmat->bounce_flags & BUS_DMA_MIN_ALLOC_COMP) == 0 ||
 		    (bz->map_count > 0 && bz->total_bpages < maxpages)) {
 			pages = MAX(atop(dmat->common.maxsize), 1);
 			pages = MIN(maxpages - bz->total_bpages, pages);
 			pages = MAX(pages, 1);
 			if (alloc_bounce_pages(dmat, pages) < pages)
 				error = ENOMEM;
 			if ((dmat->bounce_flags & BUS_DMA_MIN_ALLOC_COMP)
 			    == 0) {
 				if (error == 0) {
 					dmat->bounce_flags |=
 					    BUS_DMA_MIN_ALLOC_COMP;
 				}
 			} else
 				error = 0;
 		}
 		bz->map_count++;
 	} else {
 		*mapp = NULL;
 	}
 	if (error == 0)
 		dmat->map_count++;
 	CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d",
 	    __func__, dmat, dmat->common.flags, error);
 	return (error);
 }
 
 /*
  * Destroy a handle for mapping from kva/uva/physical
  * address space into bus device space.
  */
 static int
 bounce_bus_dmamap_destroy(bus_dma_tag_t dmat, bus_dmamap_t map)
 {
 
 	if (map != NULL && map != &nobounce_dmamap) {
 		if (STAILQ_FIRST(&map->bpages) != NULL) {
 			CTR3(KTR_BUSDMA, "%s: tag %p error %d",
 			    __func__, dmat, EBUSY);
 			return (EBUSY);
 		}
 		if (dmat->bounce_zone)
 			dmat->bounce_zone->map_count--;
 		free(map, M_DEVBUF);
 	}
 	dmat->map_count--;
 	CTR2(KTR_BUSDMA, "%s: tag %p error 0", __func__, dmat);
 	return (0);
 }
 
 
 /*
  * Allocate a piece of memory that can be efficiently mapped into
  * bus device space based on the constraints lited in the dma tag.
  * A dmamap to for use with dmamap_load is also allocated.
  */
 static int
 bounce_bus_dmamem_alloc(bus_dma_tag_t dmat, void** vaddr, int flags,
     bus_dmamap_t *mapp)
 {
 	vm_memattr_t attr;
 	int mflags;
 
 	if (flags & BUS_DMA_NOWAIT)
 		mflags = M_NOWAIT;
 	else
 		mflags = M_WAITOK;
 
 	/* If we succeed, no mapping/bouncing will be required */
 	*mapp = NULL;
 
 	if (dmat->segments == NULL) {
 		dmat->segments = (bus_dma_segment_t *)malloc(
 		    sizeof(bus_dma_segment_t) * dmat->common.nsegments,
 		    M_DEVBUF, mflags);
 		if (dmat->segments == NULL) {
 			CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d",
 			    __func__, dmat, dmat->common.flags, ENOMEM);
 			return (ENOMEM);
 		}
 	}
 	if (flags & BUS_DMA_ZERO)
 		mflags |= M_ZERO;
 	if (flags & BUS_DMA_NOCACHE)
 		attr = VM_MEMATTR_UNCACHEABLE;
 	else
 		attr = VM_MEMATTR_DEFAULT;
 
 	/* 
 	 * XXX:
 	 * (dmat->alignment < dmat->maxsize) is just a quick hack; the exact
 	 * alignment guarantees of malloc need to be nailed down, and the
 	 * code below should be rewritten to take that into account.
 	 *
 	 * In the meantime, we'll warn the user if malloc gets it wrong.
 	 */
 	if ((dmat->common.maxsize <= PAGE_SIZE) &&
 	   (dmat->common.alignment < dmat->common.maxsize) &&
 	    dmat->common.lowaddr >= ptoa((vm_paddr_t)Maxmem) &&
 	    attr == VM_MEMATTR_DEFAULT) {
 		*vaddr = malloc(dmat->common.maxsize, M_DEVBUF, mflags);
 	} else if (dmat->common.nsegments >= btoc(dmat->common.maxsize) &&
 	    dmat->common.alignment <= PAGE_SIZE &&
 	    (dmat->common.boundary == 0 ||
 	    dmat->common.boundary >= dmat->common.lowaddr)) {
 		/* Page-based multi-segment allocations allowed */
 		*vaddr = (void *)kmem_alloc_attr(kernel_arena,
 		    dmat->common.maxsize, mflags, 0ul, dmat->common.lowaddr,
 		    attr);
 		dmat->bounce_flags |= BUS_DMA_KMEM_ALLOC;
 	} else {
 		*vaddr = (void *)kmem_alloc_contig(kernel_arena,
 		    dmat->common.maxsize, mflags, 0ul, dmat->common.lowaddr,
 		    dmat->common.alignment != 0 ? dmat->common.alignment : 1ul,
 		    dmat->common.boundary, attr);
 		dmat->bounce_flags |= BUS_DMA_KMEM_ALLOC;
 	}
 	if (*vaddr == NULL) {
 		CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d",
 		    __func__, dmat, dmat->common.flags, ENOMEM);
 		return (ENOMEM);
 	} else if (vtophys(*vaddr) & (dmat->common.alignment - 1)) {
 		printf("bus_dmamem_alloc failed to align memory properly.\n");
 	}
 	CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d",
 	    __func__, dmat, dmat->common.flags, 0);
 	return (0);
 }
 
 /*
  * Free a piece of memory and it's allociated dmamap, that was allocated
  * via bus_dmamem_alloc.  Make the same choice for free/contigfree.
  */
 static void
 bounce_bus_dmamem_free(bus_dma_tag_t dmat, void *vaddr, bus_dmamap_t map)
 {
 	/*
 	 * dmamem does not need to be bounced, so the map should be
 	 * NULL and the BUS_DMA_KMEM_ALLOC flag cleared if malloc()
 	 * was used and set if kmem_alloc_contig() was used.
 	 */
 	if (map != NULL)
 		panic("bus_dmamem_free: Invalid map freed\n");
 	if ((dmat->bounce_flags & BUS_DMA_KMEM_ALLOC) == 0)
 		free(vaddr, M_DEVBUF);
 	else
 		kmem_free(kernel_arena, (vm_offset_t)vaddr,
 		    dmat->common.maxsize);
 	CTR3(KTR_BUSDMA, "%s: tag %p flags 0x%x", __func__, dmat,
 	    dmat->bounce_flags);
 }
 
 static void
 _bus_dmamap_count_phys(bus_dma_tag_t dmat, bus_dmamap_t map, vm_paddr_t buf,
     bus_size_t buflen, int flags)
 {
 	bus_addr_t curaddr;
 	bus_size_t sgsize;
 
 	if ((map != &nobounce_dmamap && map->pagesneeded == 0)) {
 		/*
 		 * Count the number of bounce pages
 		 * needed in order to complete this transfer
 		 */
 		curaddr = buf;
 		while (buflen != 0) {
 			sgsize = MIN(buflen, dmat->common.maxsegsz);
 			if (bus_dma_run_filter(&dmat->common, curaddr)) {
 				sgsize = MIN(sgsize, PAGE_SIZE);
 				map->pagesneeded++;
 			}
 			curaddr += sgsize;
 			buflen -= sgsize;
 		}
 		CTR1(KTR_BUSDMA, "pagesneeded= %d\n", map->pagesneeded);
 	}
 }
 
 static void
 _bus_dmamap_count_pages(bus_dma_tag_t dmat, bus_dmamap_t map, pmap_t pmap,
     void *buf, bus_size_t buflen, int flags)
 {
 	vm_offset_t vaddr;
 	vm_offset_t vendaddr;
 	bus_addr_t paddr;
 	bus_size_t sg_len;
 
 	if ((map != &nobounce_dmamap && map->pagesneeded == 0)) {
 		CTR4(KTR_BUSDMA, "lowaddr= %d Maxmem= %d, boundary= %d, "
 		    "alignment= %d", dmat->common.lowaddr,
 		    ptoa((vm_paddr_t)Maxmem),
 		    dmat->common.boundary, dmat->common.alignment);
 		CTR3(KTR_BUSDMA, "map= %p, nobouncemap= %p, pagesneeded= %d",
 		    map, &nobounce_dmamap, map->pagesneeded);
 		/*
 		 * Count the number of bounce pages
 		 * needed in order to complete this transfer
 		 */
 		vaddr = (vm_offset_t)buf;
 		vendaddr = (vm_offset_t)buf + buflen;
 
 		while (vaddr < vendaddr) {
 			sg_len = PAGE_SIZE - ((vm_offset_t)vaddr & PAGE_MASK);
 			if (pmap == kernel_pmap)
 				paddr = pmap_kextract(vaddr);
 			else
 				paddr = pmap_extract(pmap, vaddr);
 			if (bus_dma_run_filter(&dmat->common, paddr) != 0) {
 				sg_len = roundup2(sg_len,
 				    dmat->common.alignment);
 				map->pagesneeded++;
 			}
 			vaddr += sg_len;
 		}
 		CTR1(KTR_BUSDMA, "pagesneeded= %d\n", map->pagesneeded);
 	}
 }
 
 static int
 _bus_dmamap_reserve_pages(bus_dma_tag_t dmat, bus_dmamap_t map, int flags)
 {
 
 	/* Reserve Necessary Bounce Pages */
 	mtx_lock(&bounce_lock);
 	if (flags & BUS_DMA_NOWAIT) {
 		if (reserve_bounce_pages(dmat, map, 0) != 0) {
 			mtx_unlock(&bounce_lock);
 			return (ENOMEM);
 		}
 	} else {
 		if (reserve_bounce_pages(dmat, map, 1) != 0) {
 			/* Queue us for resources */
 			STAILQ_INSERT_TAIL(&bounce_map_waitinglist, map, links);
 			mtx_unlock(&bounce_lock);
 			return (EINPROGRESS);
 		}
 	}
 	mtx_unlock(&bounce_lock);
 
 	return (0);
 }
 
 /*
  * Add a single contiguous physical range to the segment list.
  */
 static int
 _bus_dmamap_addseg(bus_dma_tag_t dmat, bus_dmamap_t map, bus_addr_t curaddr,
     bus_size_t sgsize, bus_dma_segment_t *segs, int *segp)
 {
 	bus_addr_t baddr, bmask;
 	int seg;
 
 	/*
 	 * Make sure we don't cross any boundaries.
 	 */
 	bmask = ~(dmat->common.boundary - 1);
 	if (dmat->common.boundary > 0) {
 		baddr = (curaddr + dmat->common.boundary) & bmask;
 		if (sgsize > (baddr - curaddr))
 			sgsize = (baddr - curaddr);
 	}
 
 	/*
 	 * Insert chunk into a segment, coalescing with
 	 * previous segment if possible.
 	 */
 	seg = *segp;
 	if (seg == -1) {
 		seg = 0;
 		segs[seg].ds_addr = curaddr;
 		segs[seg].ds_len = sgsize;
 	} else {
 		if (curaddr == segs[seg].ds_addr + segs[seg].ds_len &&
 		    (segs[seg].ds_len + sgsize) <= dmat->common.maxsegsz &&
 		    (dmat->common.boundary == 0 ||
 		     (segs[seg].ds_addr & bmask) == (curaddr & bmask)))
 			segs[seg].ds_len += sgsize;
 		else {
 			if (++seg >= dmat->common.nsegments)
 				return (0);
 			segs[seg].ds_addr = curaddr;
 			segs[seg].ds_len = sgsize;
 		}
 	}
 	*segp = seg;
 	return (sgsize);
 }
 
 /*
  * Utility function to load a physical buffer.  segp contains
  * the starting segment on entrace, and the ending segment on exit.
  */
 static int
 bounce_bus_dmamap_load_phys(bus_dma_tag_t dmat, bus_dmamap_t map,
     vm_paddr_t buf, bus_size_t buflen, int flags, bus_dma_segment_t *segs,
     int *segp)
 {
 	bus_size_t sgsize;
 	bus_addr_t curaddr;
 	int error;
 
 	if (map == NULL)
 		map = &nobounce_dmamap;
 
 	if (segs == NULL)
 		segs = dmat->segments;
 
 	if ((dmat->bounce_flags & BUS_DMA_COULD_BOUNCE) != 0) {
 		_bus_dmamap_count_phys(dmat, map, buf, buflen, flags);
 		if (map->pagesneeded != 0) {
 			error = _bus_dmamap_reserve_pages(dmat, map, flags);
 			if (error)
 				return (error);
 		}
 	}
 
 	while (buflen > 0) {
 		curaddr = buf;
 		sgsize = MIN(buflen, dmat->common.maxsegsz);
 		if (((dmat->bounce_flags & BUS_DMA_COULD_BOUNCE) != 0) &&
 		    map->pagesneeded != 0 &&
 		    bus_dma_run_filter(&dmat->common, curaddr)) {
 			sgsize = MIN(sgsize, PAGE_SIZE);
 			curaddr = add_bounce_page(dmat, map, 0, curaddr,
 			    sgsize);
 		}
 		sgsize = _bus_dmamap_addseg(dmat, map, curaddr, sgsize, segs,
 		    segp);
 		if (sgsize == 0)
 			break;
 		buf += sgsize;
 		buflen -= sgsize;
 	}
 
 	/*
 	 * Did we fit?
 	 */
 	return (buflen != 0 ? EFBIG : 0); /* XXX better return value here? */
 }
 
 /*
  * Utility function to load a linear buffer.  segp contains
  * the starting segment on entrace, and the ending segment on exit.
  */
 static int
 bounce_bus_dmamap_load_buffer(bus_dma_tag_t dmat, bus_dmamap_t map, void *buf,
     bus_size_t buflen, pmap_t pmap, int flags, bus_dma_segment_t *segs,
     int *segp)
 {
 	bus_size_t sgsize, max_sgsize;
 	bus_addr_t curaddr;
 	vm_offset_t vaddr;
 	int error;
 
 	if (map == NULL)
 		map = &nobounce_dmamap;
 
 	if (segs == NULL)
 		segs = dmat->segments;
 
 	if ((dmat->bounce_flags & BUS_DMA_COULD_BOUNCE) != 0) {
 		_bus_dmamap_count_pages(dmat, map, pmap, buf, buflen, flags);
 		if (map->pagesneeded != 0) {
 			error = _bus_dmamap_reserve_pages(dmat, map, flags);
 			if (error)
 				return (error);
 		}
 	}
 
 	vaddr = (vm_offset_t)buf;
 	while (buflen > 0) {
 		/*
 		 * Get the physical address for this segment.
 		 */
 		if (pmap == kernel_pmap)
 			curaddr = pmap_kextract(vaddr);
 		else
 			curaddr = pmap_extract(pmap, vaddr);
 
 		/*
 		 * Compute the segment size, and adjust counts.
 		 */
 		max_sgsize = MIN(buflen, dmat->common.maxsegsz);
 		sgsize = PAGE_SIZE - ((vm_offset_t)curaddr & PAGE_MASK);
 		if (((dmat->bounce_flags & BUS_DMA_COULD_BOUNCE) != 0) &&
 		    map->pagesneeded != 0 &&
 		    bus_dma_run_filter(&dmat->common, curaddr)) {
 			sgsize = roundup2(sgsize, dmat->common.alignment);
 			sgsize = MIN(sgsize, max_sgsize);
 			curaddr = add_bounce_page(dmat, map, vaddr, curaddr,
 			    sgsize);
 		} else {
 			sgsize = MIN(sgsize, max_sgsize);
 		}
 		sgsize = _bus_dmamap_addseg(dmat, map, curaddr, sgsize, segs,
 		    segp);
 		if (sgsize == 0)
 			break;
 		vaddr += sgsize;
 		buflen -= sgsize;
 	}
 
 	/*
 	 * Did we fit?
 	 */
 	return (buflen != 0 ? EFBIG : 0); /* XXX better return value here? */
 }
 
 static void
 bounce_bus_dmamap_waitok(bus_dma_tag_t dmat, bus_dmamap_t map,
     struct memdesc *mem, bus_dmamap_callback_t *callback, void *callback_arg)
 {
 
 	if (map == NULL)
 		return;
 	map->mem = *mem;
 	map->dmat = dmat;
 	map->callback = callback;
 	map->callback_arg = callback_arg;
 }
 
 static bus_dma_segment_t *
 bounce_bus_dmamap_complete(bus_dma_tag_t dmat, bus_dmamap_t map,
     bus_dma_segment_t *segs, int nsegs, int error)
 {
 
 	if (segs == NULL)
 		segs = dmat->segments;
 	return (segs);
 }
 
 /*
  * Release the mapping held by map.
  */
 static void
 bounce_bus_dmamap_unload(bus_dma_tag_t dmat, bus_dmamap_t map)
 {
 	struct bounce_page *bpage;
 
 	while ((bpage = STAILQ_FIRST(&map->bpages)) != NULL) {
 		STAILQ_REMOVE_HEAD(&map->bpages, links);
 		free_bounce_page(dmat, bpage);
 	}
 }
 
 static void
 bounce_bus_dmamap_sync(bus_dma_tag_t dmat, bus_dmamap_t map,
     bus_dmasync_op_t op)
 {
 	struct bounce_page *bpage;
 
 	if ((bpage = STAILQ_FIRST(&map->bpages)) != NULL) {
 		/*
 		 * Handle data bouncing.  We might also
 		 * want to add support for invalidating
 		 * the caches on broken hardware
 		 */
 		CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x op 0x%x "
 		    "performing bounce", __func__, dmat,
 		    dmat->common.flags, op);
 
 		if ((op & BUS_DMASYNC_PREWRITE) != 0) {
 			while (bpage != NULL) {
 				if (bpage->datavaddr != 0) {
 					bcopy((void *)bpage->datavaddr,
 					    (void *)bpage->vaddr,
 					    bpage->datacount);
 				} else {
 					physcopyout(bpage->dataaddr,
 					    (void *)bpage->vaddr,
 					    bpage->datacount);
 				}
 				bpage = STAILQ_NEXT(bpage, links);
 			}
 			dmat->bounce_zone->total_bounced++;
 		}
 
 		if ((op & BUS_DMASYNC_POSTREAD) != 0) {
 			while (bpage != NULL) {
 				if (bpage->datavaddr != 0) {
 					bcopy((void *)bpage->vaddr,
 					    (void *)bpage->datavaddr,
 					    bpage->datacount);
 				} else {
 					physcopyin((void *)bpage->vaddr,
 					    bpage->dataaddr,
 					    bpage->datacount);
 				}
 				bpage = STAILQ_NEXT(bpage, links);
 			}
 			dmat->bounce_zone->total_bounced++;
 		}
 	}
 }
 
 static void
 init_bounce_pages(void *dummy __unused)
 {
 
 	total_bpages = 0;
 	STAILQ_INIT(&bounce_zone_list);
 	STAILQ_INIT(&bounce_map_waitinglist);
 	STAILQ_INIT(&bounce_map_callbacklist);
 	mtx_init(&bounce_lock, "bounce pages lock", NULL, MTX_DEF);
 }
 SYSINIT(bpages, SI_SUB_LOCK, SI_ORDER_ANY, init_bounce_pages, NULL);
 
 static struct sysctl_ctx_list *
 busdma_sysctl_tree(struct bounce_zone *bz)
 {
 	return (&bz->sysctl_tree);
 }
 
 static struct sysctl_oid *
 busdma_sysctl_tree_top(struct bounce_zone *bz)
 {
 	return (bz->sysctl_tree_top);
 }
 
 static int
 alloc_bounce_zone(bus_dma_tag_t dmat)
 {
 	struct bounce_zone *bz;
 
 	/* Check to see if we already have a suitable zone */
 	STAILQ_FOREACH(bz, &bounce_zone_list, links) {
 		if ((dmat->common.alignment <= bz->alignment) &&
 		    (dmat->common.lowaddr >= bz->lowaddr)) {
 			dmat->bounce_zone = bz;
 			return (0);
 		}
 	}
 
 	if ((bz = (struct bounce_zone *)malloc(sizeof(*bz), M_DEVBUF,
 	    M_NOWAIT | M_ZERO)) == NULL)
 		return (ENOMEM);
 
 	STAILQ_INIT(&bz->bounce_page_list);
 	bz->free_bpages = 0;
 	bz->reserved_bpages = 0;
 	bz->active_bpages = 0;
 	bz->lowaddr = dmat->common.lowaddr;
 	bz->alignment = MAX(dmat->common.alignment, PAGE_SIZE);
 	bz->map_count = 0;
 	snprintf(bz->zoneid, 8, "zone%d", busdma_zonecount);
 	busdma_zonecount++;
 	snprintf(bz->lowaddrid, 18, "%#jx", (uintmax_t)bz->lowaddr);
 	STAILQ_INSERT_TAIL(&bounce_zone_list, bz, links);
 	dmat->bounce_zone = bz;
 
 	sysctl_ctx_init(&bz->sysctl_tree);
 	bz->sysctl_tree_top = SYSCTL_ADD_NODE(&bz->sysctl_tree,
 	    SYSCTL_STATIC_CHILDREN(_hw_busdma), OID_AUTO, bz->zoneid,
 	    CTLFLAG_RD, 0, "");
 	if (bz->sysctl_tree_top == NULL) {
 		sysctl_ctx_free(&bz->sysctl_tree);
 		return (0);	/* XXX error code? */
 	}
 
 	SYSCTL_ADD_INT(busdma_sysctl_tree(bz),
 	    SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO,
 	    "total_bpages", CTLFLAG_RD, &bz->total_bpages, 0,
 	    "Total bounce pages");
 	SYSCTL_ADD_INT(busdma_sysctl_tree(bz),
 	    SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO,
 	    "free_bpages", CTLFLAG_RD, &bz->free_bpages, 0,
 	    "Free bounce pages");
 	SYSCTL_ADD_INT(busdma_sysctl_tree(bz),
 	    SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO,
 	    "reserved_bpages", CTLFLAG_RD, &bz->reserved_bpages, 0,
 	    "Reserved bounce pages");
 	SYSCTL_ADD_INT(busdma_sysctl_tree(bz),
 	    SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO,
 	    "active_bpages", CTLFLAG_RD, &bz->active_bpages, 0,
 	    "Active bounce pages");
 	SYSCTL_ADD_INT(busdma_sysctl_tree(bz),
 	    SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO,
 	    "total_bounced", CTLFLAG_RD, &bz->total_bounced, 0,
 	    "Total bounce requests");
 	SYSCTL_ADD_INT(busdma_sysctl_tree(bz),
 	    SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO,
 	    "total_deferred", CTLFLAG_RD, &bz->total_deferred, 0,
 	    "Total bounce requests that were deferred");
 	SYSCTL_ADD_STRING(busdma_sysctl_tree(bz),
 	    SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO,
 	    "lowaddr", CTLFLAG_RD, bz->lowaddrid, 0, "");
 	SYSCTL_ADD_UAUTO(busdma_sysctl_tree(bz),
 	    SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO,
 	    "alignment", CTLFLAG_RD, &bz->alignment, "");
 
 	return (0);
 }
 
 static int
 alloc_bounce_pages(bus_dma_tag_t dmat, u_int numpages)
 {
 	struct bounce_zone *bz;
 	int count;
 
 	bz = dmat->bounce_zone;
 	count = 0;
 	while (numpages > 0) {
 		struct bounce_page *bpage;
 
 		bpage = (struct bounce_page *)malloc(sizeof(*bpage), M_DEVBUF,
 						     M_NOWAIT | M_ZERO);
 
 		if (bpage == NULL)
 			break;
 		bpage->vaddr = (vm_offset_t)contigmalloc(PAGE_SIZE, M_DEVBUF,
 							 M_NOWAIT, 0ul,
 							 bz->lowaddr,
 							 PAGE_SIZE,
 							 0);
 		if (bpage->vaddr == 0) {
 			free(bpage, M_DEVBUF);
 			break;
 		}
 		bpage->busaddr = pmap_kextract(bpage->vaddr);
 		mtx_lock(&bounce_lock);
 		STAILQ_INSERT_TAIL(&bz->bounce_page_list, bpage, links);
 		total_bpages++;
 		bz->total_bpages++;
 		bz->free_bpages++;
 		mtx_unlock(&bounce_lock);
 		count++;
 		numpages--;
 	}
 	return (count);
 }
 
 static int
 reserve_bounce_pages(bus_dma_tag_t dmat, bus_dmamap_t map, int commit)
 {
 	struct bounce_zone *bz;
 	int pages;
 
 	mtx_assert(&bounce_lock, MA_OWNED);
 	bz = dmat->bounce_zone;
 	pages = MIN(bz->free_bpages, map->pagesneeded - map->pagesreserved);
 	if (commit == 0 && map->pagesneeded > (map->pagesreserved + pages))
 		return (map->pagesneeded - (map->pagesreserved + pages));
 	bz->free_bpages -= pages;
 	bz->reserved_bpages += pages;
 	map->pagesreserved += pages;
 	pages = map->pagesneeded - map->pagesreserved;
 
 	return (pages);
 }
 
 static bus_addr_t
 add_bounce_page(bus_dma_tag_t dmat, bus_dmamap_t map, vm_offset_t vaddr,
 		bus_addr_t addr, bus_size_t size)
 {
 	struct bounce_zone *bz;
 	struct bounce_page *bpage;
 
 	KASSERT(dmat->bounce_zone != NULL, ("no bounce zone in dma tag"));
 	KASSERT(map != NULL && map != &nobounce_dmamap,
 	    ("add_bounce_page: bad map %p", map));
 
 	bz = dmat->bounce_zone;
 	if (map->pagesneeded == 0)
 		panic("add_bounce_page: map doesn't need any pages");
 	map->pagesneeded--;
 
 	if (map->pagesreserved == 0)
 		panic("add_bounce_page: map doesn't need any pages");
 	map->pagesreserved--;
 
 	mtx_lock(&bounce_lock);
 	bpage = STAILQ_FIRST(&bz->bounce_page_list);
 	if (bpage == NULL)
 		panic("add_bounce_page: free page list is empty");
 
 	STAILQ_REMOVE_HEAD(&bz->bounce_page_list, links);
 	bz->reserved_bpages--;
 	bz->active_bpages++;
 	mtx_unlock(&bounce_lock);
 
 	if (dmat->common.flags & BUS_DMA_KEEP_PG_OFFSET) {
 		/* Page offset needs to be preserved. */
 		bpage->vaddr |= addr & PAGE_MASK;
 		bpage->busaddr |= addr & PAGE_MASK;
 	}
 	bpage->datavaddr = vaddr;
 	bpage->dataaddr = addr;
 	bpage->datacount = size;
 	STAILQ_INSERT_TAIL(&(map->bpages), bpage, links);
 	return (bpage->busaddr);
 }
 
 static void
 free_bounce_page(bus_dma_tag_t dmat, struct bounce_page *bpage)
 {
 	struct bus_dmamap *map;
 	struct bounce_zone *bz;
 
 	bz = dmat->bounce_zone;
 	bpage->datavaddr = 0;
 	bpage->datacount = 0;
 	if (dmat->common.flags & BUS_DMA_KEEP_PG_OFFSET) {
 		/*
 		 * Reset the bounce page to start at offset 0.  Other uses
 		 * of this bounce page may need to store a full page of
 		 * data and/or assume it starts on a page boundary.
 		 */
 		bpage->vaddr &= ~PAGE_MASK;
 		bpage->busaddr &= ~PAGE_MASK;
 	}
 
 	mtx_lock(&bounce_lock);
 	STAILQ_INSERT_HEAD(&bz->bounce_page_list, bpage, links);
 	bz->free_bpages++;
 	bz->active_bpages--;
 	if ((map = STAILQ_FIRST(&bounce_map_waitinglist)) != NULL) {
 		if (reserve_bounce_pages(map->dmat, map, 1) == 0) {
 			STAILQ_REMOVE_HEAD(&bounce_map_waitinglist, links);
 			STAILQ_INSERT_TAIL(&bounce_map_callbacklist,
 			    map, links);
 			busdma_swi_pending = 1;
 			bz->total_deferred++;
 			swi_sched(vm_ih, 0);
 		}
 	}
 	mtx_unlock(&bounce_lock);
 }
 
 void
 busdma_swi(void)
 {
 	bus_dma_tag_t dmat;
 	struct bus_dmamap *map;
 
 	mtx_lock(&bounce_lock);
 	while ((map = STAILQ_FIRST(&bounce_map_callbacklist)) != NULL) {
 		STAILQ_REMOVE_HEAD(&bounce_map_callbacklist, links);
 		mtx_unlock(&bounce_lock);
 		dmat = map->dmat;
 		(dmat->common.lockfunc)(dmat->common.lockfuncarg, BUS_DMA_LOCK);
 		bus_dmamap_load_mem(map->dmat, map, &map->mem,
 		    map->callback, map->callback_arg, BUS_DMA_WAITOK);
 		(dmat->common.lockfunc)(dmat->common.lockfuncarg,
 		    BUS_DMA_UNLOCK);
 		mtx_lock(&bounce_lock);
 	}
 	mtx_unlock(&bounce_lock);
 }
 
 struct bus_dma_impl bus_dma_bounce_impl = {
 	.tag_create = bounce_bus_dma_tag_create,
 	.tag_destroy = bounce_bus_dma_tag_destroy,
 	.map_create = bounce_bus_dmamap_create,
 	.map_destroy = bounce_bus_dmamap_destroy,
 	.mem_alloc = bounce_bus_dmamem_alloc,
 	.mem_free = bounce_bus_dmamem_free,
 	.load_phys = bounce_bus_dmamap_load_phys,
 	.load_buffer = bounce_bus_dmamap_load_buffer,
 	.load_ma = bus_dmamap_load_ma_triv,
 	.map_waitok = bounce_bus_dmamap_waitok,
 	.map_complete = bounce_bus_dmamap_complete,
 	.map_unload = bounce_bus_dmamap_unload,
 	.map_sync = bounce_bus_dmamap_sync
 };
Index: head/sys/x86/x86/cpu_machdep.c
===================================================================
--- head/sys/x86/x86/cpu_machdep.c	(revision 282273)
+++ head/sys/x86/x86/cpu_machdep.c	(revision 282274)
@@ -1,533 +1,486 @@
 /*-
  * Copyright (c) 2003 Peter Wemm.
  * Copyright (c) 1992 Terrence R. Lambert.
  * Copyright (c) 1982, 1987, 1990 The Regents of the University of California.
  * All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * William Jolitz.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	from: @(#)machdep.c	7.4 (Berkeley) 6/3/91
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_atpic.h"
 #include "opt_compat.h"
 #include "opt_cpu.h"
 #include "opt_ddb.h"
 #include "opt_inet.h"
 #include "opt_isa.h"
 #include "opt_kstack_pages.h"
 #include "opt_maxmem.h"
 #include "opt_mp_watchdog.h"
 #include "opt_perfmon.h"
 #include "opt_platform.h"
 #ifdef __i386__
 #include "opt_npx.h"
 #include "opt_apic.h"
 #include "opt_xbox.h"
 #endif
 
 #include <sys/param.h>
 #include <sys/proc.h>
 #include <sys/systm.h>
 #include <sys/bus.h>
 #include <sys/cpu.h>
 #include <sys/kdb.h>
 #include <sys/kernel.h>
 #include <sys/ktr.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mutex.h>
 #include <sys/pcpu.h>
 #include <sys/rwlock.h>
 #include <sys/sched.h>
 #ifdef SMP
 #include <sys/smp.h>
 #endif
 #include <sys/sysctl.h>
 
 #include <machine/clock.h>
 #include <machine/cpu.h>
 #include <machine/cputypes.h>
 #include <machine/specialreg.h>
 #include <machine/md_var.h>
 #include <machine/mp_watchdog.h>
 #ifdef PERFMON
 #include <machine/perfmon.h>
 #endif
 #include <machine/tss.h>
 #ifdef SMP
 #include <machine/smp.h>
 #endif
 
 #include <vm/vm.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_page.h>
 #include <vm/vm_map.h>
 #include <vm/vm_object.h>
 #include <vm/vm_pager.h>
 #include <vm/vm_param.h>
 
-#ifdef XEN
-/* XEN includes */
-#include <xen/xen-os.h>
-#include <xen/hypervisor.h>
-#include <machine/xen/xenvar.h>
-#include <machine/xen/xenfunc.h>
-#include <xen/xen_intr.h>
-#endif
-
 /*
  * Machine dependent boot() routine
  *
  * I haven't seen anything to put here yet
  * Possibly some stuff might be grafted back here from boot()
  */
 void
 cpu_boot(int howto)
 {
 }
 
 /*
  * Flush the D-cache for non-DMA I/O so that the I-cache can
  * be made coherent later.
  */
 void
 cpu_flush_dcache(void *ptr, size_t len)
 {
 	/* Not applicable */
 }
 
 /* Get current clock frequency for the given cpu id. */
 int
 cpu_est_clockrate(int cpu_id, uint64_t *rate)
 {
 	uint64_t tsc1, tsc2;
 	uint64_t acnt, mcnt, perf;
 	register_t reg;
 
 	if (pcpu_find(cpu_id) == NULL || rate == NULL)
 		return (EINVAL);
 #ifdef __i386__
 	if ((cpu_feature & CPUID_TSC) == 0)
 		return (EOPNOTSUPP);
 #endif
 
 	/*
 	 * If TSC is P-state invariant and APERF/MPERF MSRs do not exist,
 	 * DELAY(9) based logic fails.
 	 */
 	if (tsc_is_invariant && !tsc_perf_stat)
 		return (EOPNOTSUPP);
 
 #ifdef SMP
 	if (smp_cpus > 1) {
 		/* Schedule ourselves on the indicated cpu. */
 		thread_lock(curthread);
 		sched_bind(curthread, cpu_id);
 		thread_unlock(curthread);
 	}
 #endif
 
 	/* Calibrate by measuring a short delay. */
 	reg = intr_disable();
 	if (tsc_is_invariant) {
 		wrmsr(MSR_MPERF, 0);
 		wrmsr(MSR_APERF, 0);
 		tsc1 = rdtsc();
 		DELAY(1000);
 		mcnt = rdmsr(MSR_MPERF);
 		acnt = rdmsr(MSR_APERF);
 		tsc2 = rdtsc();
 		intr_restore(reg);
 		perf = 1000 * acnt / mcnt;
 		*rate = (tsc2 - tsc1) * perf;
 	} else {
 		tsc1 = rdtsc();
 		DELAY(1000);
 		tsc2 = rdtsc();
 		intr_restore(reg);
 		*rate = (tsc2 - tsc1) * 1000;
 	}
 
 #ifdef SMP
 	if (smp_cpus > 1) {
 		thread_lock(curthread);
 		sched_unbind(curthread);
 		thread_unlock(curthread);
 	}
 #endif
 
 	return (0);
 }
 
-#if defined(__i386__) && defined(XEN)
-
-static void
-idle_block(void)
-{
-
-	HYPERVISOR_sched_op(SCHEDOP_block, 0);
-}
-
-void
-cpu_halt(void)
-{
-	HYPERVISOR_shutdown(SHUTDOWN_poweroff);
-}
-
-int scheduler_running;
-
-static void
-cpu_idle_hlt(sbintime_t sbt)
-{
-
-	scheduler_running = 1;
-	enable_intr();
-	idle_block();
-}
-
-#else
 /*
  * Shutdown the CPU as much as possible
  */
 void
 cpu_halt(void)
 {
 	for (;;)
 		halt();
 }
 
-#endif
-
 void (*cpu_idle_hook)(sbintime_t) = NULL;	/* ACPI idle hook. */
 static int	cpu_ident_amdc1e = 0;	/* AMD C1E supported. */
 static int	idle_mwait = 1;		/* Use MONITOR/MWAIT for short idle. */
 SYSCTL_INT(_machdep, OID_AUTO, idle_mwait, CTLFLAG_RWTUN, &idle_mwait,
     0, "Use MONITOR/MWAIT for short idle");
 
 #define	STATE_RUNNING	0x0
 #define	STATE_MWAIT	0x1
 #define	STATE_SLEEPING	0x2
 
 #ifndef PC98
 static void
 cpu_idle_acpi(sbintime_t sbt)
 {
 	int *state;
 
 	state = (int *)PCPU_PTR(monitorbuf);
 	*state = STATE_SLEEPING;
 
 	/* See comments in cpu_idle_hlt(). */
 	disable_intr();
 	if (sched_runnable())
 		enable_intr();
 	else if (cpu_idle_hook)
 		cpu_idle_hook(sbt);
 	else
 		__asm __volatile("sti; hlt");
 	*state = STATE_RUNNING;
 }
 #endif /* !PC98 */
 
-#if !defined(__i386__) || !defined(XEN)
 static void
 cpu_idle_hlt(sbintime_t sbt)
 {
 	int *state;
 
 	state = (int *)PCPU_PTR(monitorbuf);
 	*state = STATE_SLEEPING;
 
 	/*
 	 * Since we may be in a critical section from cpu_idle(), if
 	 * an interrupt fires during that critical section we may have
 	 * a pending preemption.  If the CPU halts, then that thread
 	 * may not execute until a later interrupt awakens the CPU.
 	 * To handle this race, check for a runnable thread after
 	 * disabling interrupts and immediately return if one is
 	 * found.  Also, we must absolutely guarentee that hlt is
 	 * the next instruction after sti.  This ensures that any
 	 * interrupt that fires after the call to disable_intr() will
 	 * immediately awaken the CPU from hlt.  Finally, please note
 	 * that on x86 this works fine because of interrupts enabled only
 	 * after the instruction following sti takes place, while IF is set
 	 * to 1 immediately, allowing hlt instruction to acknowledge the
 	 * interrupt.
 	 */
 	disable_intr();
 	if (sched_runnable())
 		enable_intr();
 	else
 		__asm __volatile("sti; hlt");
 	*state = STATE_RUNNING;
 }
-#endif
 
 static void
 cpu_idle_mwait(sbintime_t sbt)
 {
 	int *state;
 
 	state = (int *)PCPU_PTR(monitorbuf);
 	*state = STATE_MWAIT;
 
 	/* See comments in cpu_idle_hlt(). */
 	disable_intr();
 	if (sched_runnable()) {
 		enable_intr();
 		*state = STATE_RUNNING;
 		return;
 	}
 	cpu_monitor(state, 0, 0);
 	if (*state == STATE_MWAIT)
 		__asm __volatile("sti; mwait" : : "a" (MWAIT_C1), "c" (0));
 	else
 		enable_intr();
 	*state = STATE_RUNNING;
 }
 
 static void
 cpu_idle_spin(sbintime_t sbt)
 {
 	int *state;
 	int i;
 
 	state = (int *)PCPU_PTR(monitorbuf);
 	*state = STATE_RUNNING;
 
 	/*
 	 * The sched_runnable() call is racy but as long as there is
 	 * a loop missing it one time will have just a little impact if any 
 	 * (and it is much better than missing the check at all).
 	 */
 	for (i = 0; i < 1000; i++) {
 		if (sched_runnable())
 			return;
 		cpu_spinwait();
 	}
 }
 
 /*
  * C1E renders the local APIC timer dead, so we disable it by
  * reading the Interrupt Pending Message register and clearing
  * both C1eOnCmpHalt (bit 28) and SmiOnCmpHalt (bit 27).
  * 
  * Reference:
  *   "BIOS and Kernel Developer's Guide for AMD NPT Family 0Fh Processors"
  *   #32559 revision 3.00+
  */
 #define	MSR_AMDK8_IPM		0xc0010055
 #define	AMDK8_SMIONCMPHALT	(1ULL << 27)
 #define	AMDK8_C1EONCMPHALT	(1ULL << 28)
 #define	AMDK8_CMPHALT		(AMDK8_SMIONCMPHALT | AMDK8_C1EONCMPHALT)
 
 void
 cpu_probe_amdc1e(void)
 {
 
 	/*
 	 * Detect the presence of C1E capability mostly on latest
 	 * dual-cores (or future) k8 family.
 	 */
 	if (cpu_vendor_id == CPU_VENDOR_AMD &&
 	    (cpu_id & 0x00000f00) == 0x00000f00 &&
 	    (cpu_id & 0x0fff0000) >=  0x00040000) {
 		cpu_ident_amdc1e = 1;
 	}
 }
 
-#if defined(__i386__) && (defined(PC98) || defined(XEN))
+#if defined(__i386__) && defined(PC98)
 void (*cpu_idle_fn)(sbintime_t) = cpu_idle_hlt;
 #else
 void (*cpu_idle_fn)(sbintime_t) = cpu_idle_acpi;
 #endif
 
 void
 cpu_idle(int busy)
 {
-#if !defined(__i386__) || !defined(XEN)
 	uint64_t msr;
-#endif
 	sbintime_t sbt = -1;
 
 	CTR2(KTR_SPARE2, "cpu_idle(%d) at %d",
 	    busy, curcpu);
-#if defined(MP_WATCHDOG) && (!defined(__i386__) || !defined(XEN))
+#ifdef MP_WATCHDOG
 	ap_watchdog(PCPU_GET(cpuid));
 #endif
-#if !defined(__i386__) || !defined(XEN)
+
 	/* If we are busy - try to use fast methods. */
 	if (busy) {
 		if ((cpu_feature2 & CPUID2_MON) && idle_mwait) {
 			cpu_idle_mwait(busy);
 			goto out;
 		}
 	}
-#endif
 
 	/* If we have time - switch timers into idle mode. */
 	if (!busy) {
 		critical_enter();
 		sbt = cpu_idleclock();
 	}
 
-#if !defined(__i386__) || !defined(XEN)
 	/* Apply AMD APIC timer C1E workaround. */
 	if (cpu_ident_amdc1e && cpu_disable_c3_sleep) {
 		msr = rdmsr(MSR_AMDK8_IPM);
 		if (msr & AMDK8_CMPHALT)
 			wrmsr(MSR_AMDK8_IPM, msr & ~AMDK8_CMPHALT);
 	}
-#endif
 
 	/* Call main idle method. */
 	cpu_idle_fn(sbt);
 
 	/* Switch timers back into active mode. */
 	if (!busy) {
 		cpu_activeclock();
 		critical_exit();
 	}
-#if !defined(__i386__) || !defined(XEN)
 out:
-#endif
 	CTR2(KTR_SPARE2, "cpu_idle(%d) at %d done",
 	    busy, curcpu);
 }
 
 int
 cpu_idle_wakeup(int cpu)
 {
 	struct pcpu *pcpu;
 	int *state;
 
 	pcpu = pcpu_find(cpu);
 	state = (int *)pcpu->pc_monitorbuf;
 	/*
 	 * This doesn't need to be atomic since missing the race will
 	 * simply result in unnecessary IPIs.
 	 */
 	if (*state == STATE_SLEEPING)
 		return (0);
 	if (*state == STATE_MWAIT)
 		*state = STATE_RUNNING;
 	return (1);
 }
 
 /*
  * Ordered by speed/power consumption.
  */
 struct {
 	void	*id_fn;
 	char	*id_name;
 } idle_tbl[] = {
 	{ cpu_idle_spin, "spin" },
 	{ cpu_idle_mwait, "mwait" },
 	{ cpu_idle_hlt, "hlt" },
 #if !defined(__i386__) || !defined(PC98)
 	{ cpu_idle_acpi, "acpi" },
 #endif
 	{ NULL, NULL }
 };
 
 static int
 idle_sysctl_available(SYSCTL_HANDLER_ARGS)
 {
 	char *avail, *p;
 	int error;
 	int i;
 
 	avail = malloc(256, M_TEMP, M_WAITOK);
 	p = avail;
 	for (i = 0; idle_tbl[i].id_name != NULL; i++) {
 		if (strstr(idle_tbl[i].id_name, "mwait") &&
 		    (cpu_feature2 & CPUID2_MON) == 0)
 			continue;
 #if !defined(__i386__) || !defined(PC98)
 		if (strcmp(idle_tbl[i].id_name, "acpi") == 0 &&
 		    cpu_idle_hook == NULL)
 			continue;
 #endif
 		p += sprintf(p, "%s%s", p != avail ? ", " : "",
 		    idle_tbl[i].id_name);
 	}
 	error = sysctl_handle_string(oidp, avail, 0, req);
 	free(avail, M_TEMP);
 	return (error);
 }
 
 SYSCTL_PROC(_machdep, OID_AUTO, idle_available, CTLTYPE_STRING | CTLFLAG_RD,
     0, 0, idle_sysctl_available, "A", "list of available idle functions");
 
 static int
 idle_sysctl(SYSCTL_HANDLER_ARGS)
 {
 	char buf[16];
 	int error;
 	char *p;
 	int i;
 
 	p = "unknown";
 	for (i = 0; idle_tbl[i].id_name != NULL; i++) {
 		if (idle_tbl[i].id_fn == cpu_idle_fn) {
 			p = idle_tbl[i].id_name;
 			break;
 		}
 	}
 	strncpy(buf, p, sizeof(buf));
 	error = sysctl_handle_string(oidp, buf, sizeof(buf), req);
 	if (error != 0 || req->newptr == NULL)
 		return (error);
 	for (i = 0; idle_tbl[i].id_name != NULL; i++) {
 		if (strstr(idle_tbl[i].id_name, "mwait") &&
 		    (cpu_feature2 & CPUID2_MON) == 0)
 			continue;
 #if !defined(__i386__) || !defined(PC98)
 		if (strcmp(idle_tbl[i].id_name, "acpi") == 0 &&
 		    cpu_idle_hook == NULL)
 			continue;
 #endif
 		if (strcmp(idle_tbl[i].id_name, buf))
 			continue;
 		cpu_idle_fn = idle_tbl[i].id_fn;
 		return (0);
 	}
 	return (EINVAL);
 }
 
 SYSCTL_PROC(_machdep, OID_AUTO, idle, CTLTYPE_STRING | CTLFLAG_RW, 0, 0,
     idle_sysctl, "A", "currently selected idle function");
Index: head/sys/x86/x86/identcpu.c
===================================================================
--- head/sys/x86/x86/identcpu.c	(revision 282273)
+++ head/sys/x86/x86/identcpu.c	(revision 282274)
@@ -1,2163 +1,2159 @@
 /*-
  * Copyright (c) 1992 Terrence R. Lambert.
  * Copyright (c) 1982, 1987, 1990 The Regents of the University of California.
  * Copyright (c) 1997 KATO Takenori.
  * All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * William Jolitz.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	from: Id: machdep.c,v 1.193 1996/06/18 01:22:04 bde Exp
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_cpu.h"
 
 #include <sys/param.h>
 #include <sys/bus.h>
 #include <sys/cpu.h>
 #include <sys/eventhandler.h>
 #include <sys/limits.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/sysctl.h>
 #include <sys/power.h>
 
 #include <machine/asmacros.h>
 #include <machine/clock.h>
 #include <machine/cputypes.h>
 #include <machine/frame.h>
 #include <machine/intr_machdep.h>
 #include <machine/md_var.h>
 #include <machine/segments.h>
 #include <machine/specialreg.h>
 
 #include <amd64/vmm/intel/vmx_controls.h>
 #include <x86/isa/icu.h>
 #include <x86/vmware.h>
 
 #ifdef __i386__
 #define	IDENTBLUE_CYRIX486	0
 #define	IDENTBLUE_IBMCPU	1
 #define	IDENTBLUE_CYRIXM2	2
 
 static void identifycyrix(void);
 static void print_transmeta_info(void);
 #endif
 static u_int find_cpu_vendor_id(void);
 static void print_AMD_info(void);
 static void print_INTEL_info(void);
 static void print_INTEL_TLB(u_int data);
 static void print_hypervisor_info(void);
 static void print_svm_info(void);
 static void print_via_padlock_info(void);
 static void print_vmx_info(void);
 
 int	cpu_class;
 char machine[] = MACHINE;
 
 #ifdef __amd64__
 #ifdef SCTL_MASK32
 extern int adaptive_machine_arch;
 #endif
 
 static int
 sysctl_hw_machine(SYSCTL_HANDLER_ARGS)
 {
 #ifdef SCTL_MASK32
 	static const char machine32[] = "i386";
 #endif
 	int error;
 
 #ifdef SCTL_MASK32
 	if ((req->flags & SCTL_MASK32) != 0 && adaptive_machine_arch)
 		error = SYSCTL_OUT(req, machine32, sizeof(machine32));
 	else
 #endif
 		error = SYSCTL_OUT(req, machine, sizeof(machine));
 	return (error);
 
 }
 SYSCTL_PROC(_hw, HW_MACHINE, machine, CTLTYPE_STRING | CTLFLAG_RD,
     NULL, 0, sysctl_hw_machine, "A", "Machine class");
 #else
 SYSCTL_STRING(_hw, HW_MACHINE, machine, CTLFLAG_RD,
     machine, 0, "Machine class");
 #endif
 
 static char cpu_model[128];
 SYSCTL_STRING(_hw, HW_MODEL, model, CTLFLAG_RD,
     cpu_model, 0, "Machine model");
 
 static int hw_clockrate;
 SYSCTL_INT(_hw, OID_AUTO, clockrate, CTLFLAG_RD,
     &hw_clockrate, 0, "CPU instruction clock rate");
 
 u_int hv_high;
 char hv_vendor[16];
 SYSCTL_STRING(_hw, OID_AUTO, hv_vendor, CTLFLAG_RD, hv_vendor, 0,
     "Hypervisor vendor");
 
 static eventhandler_tag tsc_post_tag;
 
 static char cpu_brand[48];
 
 #ifdef __i386__
 #define	MAX_BRAND_INDEX	8
 
 static const char *cpu_brandtable[MAX_BRAND_INDEX + 1] = {
 	NULL,			/* No brand */
 	"Intel Celeron",
 	"Intel Pentium III",
 	"Intel Pentium III Xeon",
 	NULL,
 	NULL,
 	NULL,
 	NULL,
 	"Intel Pentium 4"
 };
 #endif
 
 static struct {
 	char	*cpu_name;
 	int	cpu_class;
 } cpus[] = {
 #ifdef __i386__
 	{ "Intel 80286",	CPUCLASS_286 },		/* CPU_286   */
 	{ "i386SX",		CPUCLASS_386 },		/* CPU_386SX */
 	{ "i386DX",		CPUCLASS_386 },		/* CPU_386   */
 	{ "i486SX",		CPUCLASS_486 },		/* CPU_486SX */
 	{ "i486DX",		CPUCLASS_486 },		/* CPU_486   */
 	{ "Pentium",		CPUCLASS_586 },		/* CPU_586   */
 	{ "Cyrix 486",		CPUCLASS_486 },		/* CPU_486DLC */
 	{ "Pentium Pro",	CPUCLASS_686 },		/* CPU_686 */
 	{ "Cyrix 5x86",		CPUCLASS_486 },		/* CPU_M1SC */
 	{ "Cyrix 6x86",		CPUCLASS_486 },		/* CPU_M1 */
 	{ "Blue Lightning",	CPUCLASS_486 },		/* CPU_BLUE */
 	{ "Cyrix 6x86MX",	CPUCLASS_686 },		/* CPU_M2 */
 	{ "NexGen 586",		CPUCLASS_386 },		/* CPU_NX586 (XXX) */
 	{ "Cyrix 486S/DX",	CPUCLASS_486 },		/* CPU_CY486DX */
 	{ "Pentium II",		CPUCLASS_686 },		/* CPU_PII */
 	{ "Pentium III",	CPUCLASS_686 },		/* CPU_PIII */
 	{ "Pentium 4",		CPUCLASS_686 },		/* CPU_P4 */
 #else
 	{ "Clawhammer",		CPUCLASS_K8 },		/* CPU_CLAWHAMMER */
 	{ "Sledgehammer",	CPUCLASS_K8 },		/* CPU_SLEDGEHAMMER */
 #endif
 };
 
 static struct {
 	char	*vendor;
 	u_int	vendor_id;
 } cpu_vendors[] = {
 	{ INTEL_VENDOR_ID,	CPU_VENDOR_INTEL },	/* GenuineIntel */
 	{ AMD_VENDOR_ID,	CPU_VENDOR_AMD },	/* AuthenticAMD */
 	{ CENTAUR_VENDOR_ID,	CPU_VENDOR_CENTAUR },	/* CentaurHauls */
 #ifdef __i386__
 	{ NSC_VENDOR_ID,	CPU_VENDOR_NSC },	/* Geode by NSC */
 	{ CYRIX_VENDOR_ID,	CPU_VENDOR_CYRIX },	/* CyrixInstead */
 	{ TRANSMETA_VENDOR_ID,	CPU_VENDOR_TRANSMETA },	/* GenuineTMx86 */
 	{ SIS_VENDOR_ID,	CPU_VENDOR_SIS },	/* SiS SiS SiS  */
 	{ UMC_VENDOR_ID,	CPU_VENDOR_UMC },	/* UMC UMC UMC  */
 	{ NEXGEN_VENDOR_ID,	CPU_VENDOR_NEXGEN },	/* NexGenDriven */
 	{ RISE_VENDOR_ID,	CPU_VENDOR_RISE },	/* RiseRiseRise */
 #if 0
 	/* XXX CPUID 8000_0000h and 8086_0000h, not 0000_0000h */
 	{ "TransmetaCPU",	CPU_VENDOR_TRANSMETA },
 #endif
 #endif
 };
 
 void
 printcpuinfo(void)
 {
 	u_int regs[4], i;
 	char *brand;
 
 	cpu_class = cpus[cpu].cpu_class;
 	printf("CPU: ");
 	strncpy(cpu_model, cpus[cpu].cpu_name, sizeof (cpu_model));
 
 	/* Check for extended CPUID information and a processor name. */
 	if (cpu_exthigh >= 0x80000004) {
 		brand = cpu_brand;
 		for (i = 0x80000002; i < 0x80000005; i++) {
 			do_cpuid(i, regs);
 			memcpy(brand, regs, sizeof(regs));
 			brand += sizeof(regs);
 		}
 	}
 
 	switch (cpu_vendor_id) {
 	case CPU_VENDOR_INTEL:
 #ifdef __i386__
 		if ((cpu_id & 0xf00) > 0x300) {
 			u_int brand_index;
 
 			cpu_model[0] = '\0';
 
 			switch (cpu_id & 0x3000) {
 			case 0x1000:
 				strcpy(cpu_model, "Overdrive ");
 				break;
 			case 0x2000:
 				strcpy(cpu_model, "Dual ");
 				break;
 			}
 
 			switch (cpu_id & 0xf00) {
 			case 0x400:
 				strcat(cpu_model, "i486 ");
 			        /* Check the particular flavor of 486 */
 				switch (cpu_id & 0xf0) {
 				case 0x00:
 				case 0x10:
 					strcat(cpu_model, "DX");
 					break;
 				case 0x20:
 					strcat(cpu_model, "SX");
 					break;
 				case 0x30:
 					strcat(cpu_model, "DX2");
 					break;
 				case 0x40:
 					strcat(cpu_model, "SL");
 					break;
 				case 0x50:
 					strcat(cpu_model, "SX2");
 					break;
 				case 0x70:
 					strcat(cpu_model,
 					    "DX2 Write-Back Enhanced");
 					break;
 				case 0x80:
 					strcat(cpu_model, "DX4");
 					break;
 				}
 				break;
 			case 0x500:
 			        /* Check the particular flavor of 586 */
 			        strcat(cpu_model, "Pentium");
 			        switch (cpu_id & 0xf0) {
 				case 0x00:
 				        strcat(cpu_model, " A-step");
 					break;
 				case 0x10:
 				        strcat(cpu_model, "/P5");
 					break;
 				case 0x20:
 				        strcat(cpu_model, "/P54C");
 					break;
 				case 0x30:
 				        strcat(cpu_model, "/P24T");
 					break;
 				case 0x40:
 				        strcat(cpu_model, "/P55C");
 					break;
 				case 0x70:
 				        strcat(cpu_model, "/P54C");
 					break;
 				case 0x80:
 				        strcat(cpu_model, "/P55C (quarter-micron)");
 					break;
 				default:
 				        /* nothing */
 					break;
 				}
 #if defined(I586_CPU) && !defined(NO_F00F_HACK)
 				/*
 				 * XXX - If/when Intel fixes the bug, this
 				 * should also check the version of the
 				 * CPU, not just that it's a Pentium.
 				 */
 				has_f00f_bug = 1;
 #endif
 				break;
 			case 0x600:
 			        /* Check the particular flavor of 686 */
   			        switch (cpu_id & 0xf0) {
 				case 0x00:
 				        strcat(cpu_model, "Pentium Pro A-step");
 					break;
 				case 0x10:
 				        strcat(cpu_model, "Pentium Pro");
 					break;
 				case 0x30:
 				case 0x50:
 				case 0x60:
 				        strcat(cpu_model,
 				"Pentium II/Pentium II Xeon/Celeron");
 					cpu = CPU_PII;
 					break;
 				case 0x70:
 				case 0x80:
 				case 0xa0:
 				case 0xb0:
 				        strcat(cpu_model,
 					"Pentium III/Pentium III Xeon/Celeron");
 					cpu = CPU_PIII;
 					break;
 				default:
 				        strcat(cpu_model, "Unknown 80686");
 					break;
 				}
 				break;
 			case 0xf00:
 				strcat(cpu_model, "Pentium 4");
 				cpu = CPU_P4;
 				break;
 			default:
 				strcat(cpu_model, "unknown");
 				break;
 			}
 
 			/*
 			 * If we didn't get a brand name from the extended
 			 * CPUID, try to look it up in the brand table.
 			 */
 			if (cpu_high > 0 && *cpu_brand == '\0') {
 				brand_index = cpu_procinfo & CPUID_BRAND_INDEX;
 				if (brand_index <= MAX_BRAND_INDEX &&
 				    cpu_brandtable[brand_index] != NULL)
 					strcpy(cpu_brand,
 					    cpu_brandtable[brand_index]);
 			}
 		}
 #else
 		/* Please make up your mind folks! */
 		strcat(cpu_model, "EM64T");
 #endif
 		break;
 	case CPU_VENDOR_AMD:
 		/*
 		 * Values taken from AMD Processor Recognition
 		 * http://www.amd.com/K6/k6docs/pdf/20734g.pdf
 		 * (also describes ``Features'' encodings.
 		 */
 		strcpy(cpu_model, "AMD ");
 #ifdef __i386__
 		switch (cpu_id & 0xFF0) {
 		case 0x410:
 			strcat(cpu_model, "Standard Am486DX");
 			break;
 		case 0x430:
 			strcat(cpu_model, "Enhanced Am486DX2 Write-Through");
 			break;
 		case 0x470:
 			strcat(cpu_model, "Enhanced Am486DX2 Write-Back");
 			break;
 		case 0x480:
 			strcat(cpu_model, "Enhanced Am486DX4/Am5x86 Write-Through");
 			break;
 		case 0x490:
 			strcat(cpu_model, "Enhanced Am486DX4/Am5x86 Write-Back");
 			break;
 		case 0x4E0:
 			strcat(cpu_model, "Am5x86 Write-Through");
 			break;
 		case 0x4F0:
 			strcat(cpu_model, "Am5x86 Write-Back");
 			break;
 		case 0x500:
 			strcat(cpu_model, "K5 model 0");
 			break;
 		case 0x510:
 			strcat(cpu_model, "K5 model 1");
 			break;
 		case 0x520:
 			strcat(cpu_model, "K5 PR166 (model 2)");
 			break;
 		case 0x530:
 			strcat(cpu_model, "K5 PR200 (model 3)");
 			break;
 		case 0x560:
 			strcat(cpu_model, "K6");
 			break;
 		case 0x570:
 			strcat(cpu_model, "K6 266 (model 1)");
 			break;
 		case 0x580:
 			strcat(cpu_model, "K6-2");
 			break;
 		case 0x590:
 			strcat(cpu_model, "K6-III");
 			break;
 		case 0x5a0:
 			strcat(cpu_model, "Geode LX");
 			break;
 		default:
 			strcat(cpu_model, "Unknown");
 			break;
 		}
 #else
 		if ((cpu_id & 0xf00) == 0xf00)
 			strcat(cpu_model, "AMD64 Processor");
 		else
 			strcat(cpu_model, "Unknown");
 #endif
 		break;
 #ifdef __i386__
 	case CPU_VENDOR_CYRIX:
 		strcpy(cpu_model, "Cyrix ");
 		switch (cpu_id & 0xff0) {
 		case 0x440:
 			strcat(cpu_model, "MediaGX");
 			break;
 		case 0x520:
 			strcat(cpu_model, "6x86");
 			break;
 		case 0x540:
 			cpu_class = CPUCLASS_586;
 			strcat(cpu_model, "GXm");
 			break;
 		case 0x600:
 			strcat(cpu_model, "6x86MX");
 			break;
 		default:
 			/*
 			 * Even though CPU supports the cpuid
 			 * instruction, it can be disabled.
 			 * Therefore, this routine supports all Cyrix
 			 * CPUs.
 			 */
 			switch (cyrix_did & 0xf0) {
 			case 0x00:
 				switch (cyrix_did & 0x0f) {
 				case 0x00:
 					strcat(cpu_model, "486SLC");
 					break;
 				case 0x01:
 					strcat(cpu_model, "486DLC");
 					break;
 				case 0x02:
 					strcat(cpu_model, "486SLC2");
 					break;
 				case 0x03:
 					strcat(cpu_model, "486DLC2");
 					break;
 				case 0x04:
 					strcat(cpu_model, "486SRx");
 					break;
 				case 0x05:
 					strcat(cpu_model, "486DRx");
 					break;
 				case 0x06:
 					strcat(cpu_model, "486SRx2");
 					break;
 				case 0x07:
 					strcat(cpu_model, "486DRx2");
 					break;
 				case 0x08:
 					strcat(cpu_model, "486SRu");
 					break;
 				case 0x09:
 					strcat(cpu_model, "486DRu");
 					break;
 				case 0x0a:
 					strcat(cpu_model, "486SRu2");
 					break;
 				case 0x0b:
 					strcat(cpu_model, "486DRu2");
 					break;
 				default:
 					strcat(cpu_model, "Unknown");
 					break;
 				}
 				break;
 			case 0x10:
 				switch (cyrix_did & 0x0f) {
 				case 0x00:
 					strcat(cpu_model, "486S");
 					break;
 				case 0x01:
 					strcat(cpu_model, "486S2");
 					break;
 				case 0x02:
 					strcat(cpu_model, "486Se");
 					break;
 				case 0x03:
 					strcat(cpu_model, "486S2e");
 					break;
 				case 0x0a:
 					strcat(cpu_model, "486DX");
 					break;
 				case 0x0b:
 					strcat(cpu_model, "486DX2");
 					break;
 				case 0x0f:
 					strcat(cpu_model, "486DX4");
 					break;
 				default:
 					strcat(cpu_model, "Unknown");
 					break;
 				}
 				break;
 			case 0x20:
 				if ((cyrix_did & 0x0f) < 8)
 					strcat(cpu_model, "6x86");	/* Where did you get it? */
 				else
 					strcat(cpu_model, "5x86");
 				break;
 			case 0x30:
 				strcat(cpu_model, "6x86");
 				break;
 			case 0x40:
 				if ((cyrix_did & 0xf000) == 0x3000) {
 					cpu_class = CPUCLASS_586;
 					strcat(cpu_model, "GXm");
 				} else
 					strcat(cpu_model, "MediaGX");
 				break;
 			case 0x50:
 				strcat(cpu_model, "6x86MX");
 				break;
 			case 0xf0:
 				switch (cyrix_did & 0x0f) {
 				case 0x0d:
 					strcat(cpu_model, "Overdrive CPU");
 					break;
 				case 0x0e:
 					strcpy(cpu_model, "Texas Instruments 486SXL");
 					break;
 				case 0x0f:
 					strcat(cpu_model, "486SLC/DLC");
 					break;
 				default:
 					strcat(cpu_model, "Unknown");
 					break;
 				}
 				break;
 			default:
 				strcat(cpu_model, "Unknown");
 				break;
 			}
 			break;
 		}
 		break;
 	case CPU_VENDOR_RISE:
 		strcpy(cpu_model, "Rise ");
 		switch (cpu_id & 0xff0) {
 		case 0x500:	/* 6401 and 6441 (Kirin) */
 		case 0x520:	/* 6510 (Lynx) */
 			strcat(cpu_model, "mP6");
 			break;
 		default:
 			strcat(cpu_model, "Unknown");
 		}
 		break;
 #endif
 	case CPU_VENDOR_CENTAUR:
 #ifdef __i386__
 		switch (cpu_id & 0xff0) {
 		case 0x540:
 			strcpy(cpu_model, "IDT WinChip C6");
 			break;
 		case 0x580:
 			strcpy(cpu_model, "IDT WinChip 2");
 			break;
 		case 0x590:
 			strcpy(cpu_model, "IDT WinChip 3");
 			break;
 		case 0x660:
 			strcpy(cpu_model, "VIA C3 Samuel");
 			break;
 		case 0x670:
 			if (cpu_id & 0x8)
 				strcpy(cpu_model, "VIA C3 Ezra");
 			else
 				strcpy(cpu_model, "VIA C3 Samuel 2");
 			break;
 		case 0x680:
 			strcpy(cpu_model, "VIA C3 Ezra-T");
 			break;
 		case 0x690:
 			strcpy(cpu_model, "VIA C3 Nehemiah");
 			break;
 		case 0x6a0:
 		case 0x6d0:
 			strcpy(cpu_model, "VIA C7 Esther");
 			break;
 		case 0x6f0:
 			strcpy(cpu_model, "VIA Nano");
 			break;
 		default:
 			strcpy(cpu_model, "VIA/IDT Unknown");
 		}
 #else
 		strcpy(cpu_model, "VIA ");
 		if ((cpu_id & 0xff0) == 0x6f0)
 			strcat(cpu_model, "Nano Processor");
 		else
 			strcat(cpu_model, "Unknown");
 #endif
 		break;
 #ifdef __i386__
 	case CPU_VENDOR_IBM:
 		strcpy(cpu_model, "Blue Lightning CPU");
 		break;
 	case CPU_VENDOR_NSC:
 		switch (cpu_id & 0xff0) {
 		case 0x540:
 			strcpy(cpu_model, "Geode SC1100");
 			cpu = CPU_GEODE1100;
 			break;
 		default:
 			strcpy(cpu_model, "Geode/NSC unknown");
 			break;
 		}
 		break;
 #endif
 	default:
 		strcat(cpu_model, "Unknown");
 		break;
 	}
 
 	/*
 	 * Replace cpu_model with cpu_brand minus leading spaces if
 	 * we have one.
 	 */
 	brand = cpu_brand;
 	while (*brand == ' ')
 		++brand;
 	if (*brand != '\0')
 		strcpy(cpu_model, brand);
 
 	printf("%s (", cpu_model);
 	if (tsc_freq != 0) {
 		hw_clockrate = (tsc_freq + 5000) / 1000000;
 		printf("%jd.%02d-MHz ",
 		    (intmax_t)(tsc_freq + 4999) / 1000000,
 		    (u_int)((tsc_freq + 4999) / 10000) % 100);
 	}
 	switch(cpu_class) {
 #ifdef __i386__
 	case CPUCLASS_286:
 		printf("286");
 		break;
 	case CPUCLASS_386:
 		printf("386");
 		break;
 #if defined(I486_CPU)
 	case CPUCLASS_486:
 		printf("486");
 		break;
 #endif
 #if defined(I586_CPU)
 	case CPUCLASS_586:
 		printf("586");
 		break;
 #endif
 #if defined(I686_CPU)
 	case CPUCLASS_686:
 		printf("686");
 		break;
 #endif
 #else
 	case CPUCLASS_K8:
 		printf("K8");
 		break;
 #endif
 	default:
 		printf("Unknown");	/* will panic below... */
 	}
 	printf("-class CPU)\n");
 	if (*cpu_vendor)
 		printf("  Origin=\"%s\"", cpu_vendor);
 	if (cpu_id)
 		printf("  Id=0x%x", cpu_id);
 
 	if (cpu_vendor_id == CPU_VENDOR_INTEL ||
 	    cpu_vendor_id == CPU_VENDOR_AMD ||
 	    cpu_vendor_id == CPU_VENDOR_CENTAUR ||
 #ifdef __i386__
 	    cpu_vendor_id == CPU_VENDOR_TRANSMETA ||
 	    cpu_vendor_id == CPU_VENDOR_RISE ||
 	    cpu_vendor_id == CPU_VENDOR_NSC ||
 	    (cpu_vendor_id == CPU_VENDOR_CYRIX && ((cpu_id & 0xf00) > 0x500)) ||
 #endif
 	    0) {
 		printf("  Family=0x%x", CPUID_TO_FAMILY(cpu_id));
 		printf("  Model=0x%x", CPUID_TO_MODEL(cpu_id));
 		printf("  Stepping=%u", cpu_id & CPUID_STEPPING);
 #ifdef __i386__
 		if (cpu_vendor_id == CPU_VENDOR_CYRIX)
 			printf("\n  DIR=0x%04x", cyrix_did);
 #endif
 
 		/*
 		 * AMD CPUID Specification
 		 * http://support.amd.com/us/Embedded_TechDocs/25481.pdf
 		 *
 		 * Intel Processor Identification and CPUID Instruction
 		 * http://www.intel.com/assets/pdf/appnote/241618.pdf
 		 */
 		if (cpu_high > 0) {
 
 			/*
 			 * Here we should probably set up flags indicating
 			 * whether or not various features are available.
 			 * The interesting ones are probably VME, PSE, PAE,
 			 * and PGE.  The code already assumes without bothering
 			 * to check that all CPUs >= Pentium have a TSC and
 			 * MSRs.
 			 */
 			printf("\n  Features=0x%b", cpu_feature,
 			"\020"
 			"\001FPU"	/* Integral FPU */
 			"\002VME"	/* Extended VM86 mode support */
 			"\003DE"	/* Debugging Extensions (CR4.DE) */
 			"\004PSE"	/* 4MByte page tables */
 			"\005TSC"	/* Timestamp counter */
 			"\006MSR"	/* Machine specific registers */
 			"\007PAE"	/* Physical address extension */
 			"\010MCE"	/* Machine Check support */
 			"\011CX8"	/* CMPEXCH8 instruction */
 			"\012APIC"	/* SMP local APIC */
 			"\013oldMTRR"	/* Previous implementation of MTRR */
 			"\014SEP"	/* Fast System Call */
 			"\015MTRR"	/* Memory Type Range Registers */
 			"\016PGE"	/* PG_G (global bit) support */
 			"\017MCA"	/* Machine Check Architecture */
 			"\020CMOV"	/* CMOV instruction */
 			"\021PAT"	/* Page attributes table */
 			"\022PSE36"	/* 36 bit address space support */
 			"\023PN"	/* Processor Serial number */
 			"\024CLFLUSH"	/* Has the CLFLUSH instruction */
 			"\025<b20>"
 			"\026DTS"	/* Debug Trace Store */
 			"\027ACPI"	/* ACPI support */
 			"\030MMX"	/* MMX instructions */
 			"\031FXSR"	/* FXSAVE/FXRSTOR */
 			"\032SSE"	/* Streaming SIMD Extensions */
 			"\033SSE2"	/* Streaming SIMD Extensions #2 */
 			"\034SS"	/* Self snoop */
 			"\035HTT"	/* Hyperthreading (see EBX bit 16-23) */
 			"\036TM"	/* Thermal Monitor clock slowdown */
 			"\037IA64"	/* CPU can execute IA64 instructions */
 			"\040PBE"	/* Pending Break Enable */
 			);
 
 			if (cpu_feature2 != 0) {
 				printf("\n  Features2=0x%b", cpu_feature2,
 				"\020"
 				"\001SSE3"	/* SSE3 */
 				"\002PCLMULQDQ"	/* Carry-Less Mul Quadword */
 				"\003DTES64"	/* 64-bit Debug Trace */
 				"\004MON"	/* MONITOR/MWAIT Instructions */
 				"\005DS_CPL"	/* CPL Qualified Debug Store */
 				"\006VMX"	/* Virtual Machine Extensions */
 				"\007SMX"	/* Safer Mode Extensions */
 				"\010EST"	/* Enhanced SpeedStep */
 				"\011TM2"	/* Thermal Monitor 2 */
 				"\012SSSE3"	/* SSSE3 */
 				"\013CNXT-ID"	/* L1 context ID available */
 				"\014SDBG"	/* IA32 silicon debug */
 				"\015FMA"	/* Fused Multiply Add */
 				"\016CX16"	/* CMPXCHG16B Instruction */
 				"\017xTPR"	/* Send Task Priority Messages*/
 				"\020PDCM"	/* Perf/Debug Capability MSR */
 				"\021<b16>"
 				"\022PCID"	/* Process-context Identifiers*/
 				"\023DCA"	/* Direct Cache Access */
 				"\024SSE4.1"	/* SSE 4.1 */
 				"\025SSE4.2"	/* SSE 4.2 */
 				"\026x2APIC"	/* xAPIC Extensions */
 				"\027MOVBE"	/* MOVBE Instruction */
 				"\030POPCNT"	/* POPCNT Instruction */
 				"\031TSCDLT"	/* TSC-Deadline Timer */
 				"\032AESNI"	/* AES Crypto */
 				"\033XSAVE"	/* XSAVE/XRSTOR States */
 				"\034OSXSAVE"	/* OS-Enabled State Management*/
 				"\035AVX"	/* Advanced Vector Extensions */
 				"\036F16C"	/* Half-precision conversions */
 				"\037RDRAND"	/* RDRAND Instruction */
 				"\040HV"	/* Hypervisor */
 				);
 			}
 
 			if (amd_feature != 0) {
 				printf("\n  AMD Features=0x%b", amd_feature,
 				"\020"		/* in hex */
 				"\001<s0>"	/* Same */
 				"\002<s1>"	/* Same */
 				"\003<s2>"	/* Same */
 				"\004<s3>"	/* Same */
 				"\005<s4>"	/* Same */
 				"\006<s5>"	/* Same */
 				"\007<s6>"	/* Same */
 				"\010<s7>"	/* Same */
 				"\011<s8>"	/* Same */
 				"\012<s9>"	/* Same */
 				"\013<b10>"	/* Undefined */
 				"\014SYSCALL"	/* Have SYSCALL/SYSRET */
 				"\015<s12>"	/* Same */
 				"\016<s13>"	/* Same */
 				"\017<s14>"	/* Same */
 				"\020<s15>"	/* Same */
 				"\021<s16>"	/* Same */
 				"\022<s17>"	/* Same */
 				"\023<b18>"	/* Reserved, unknown */
 				"\024MP"	/* Multiprocessor Capable */
 				"\025NX"	/* Has EFER.NXE, NX */
 				"\026<b21>"	/* Undefined */
 				"\027MMX+"	/* AMD MMX Extensions */
 				"\030<s23>"	/* Same */
 				"\031<s24>"	/* Same */
 				"\032FFXSR"	/* Fast FXSAVE/FXRSTOR */
 				"\033Page1GB"	/* 1-GB large page support */
 				"\034RDTSCP"	/* RDTSCP */
 				"\035<b28>"	/* Undefined */
 				"\036LM"	/* 64 bit long mode */
 				"\0373DNow!+"	/* AMD 3DNow! Extensions */
 				"\0403DNow!"	/* AMD 3DNow! */
 				);
 			}
 
 			if (amd_feature2 != 0) {
 				printf("\n  AMD Features2=0x%b", amd_feature2,
 				"\020"
 				"\001LAHF"	/* LAHF/SAHF in long mode */
 				"\002CMP"	/* CMP legacy */
 				"\003SVM"	/* Secure Virtual Mode */
 				"\004ExtAPIC"	/* Extended APIC register */
 				"\005CR8"	/* CR8 in legacy mode */
 				"\006ABM"	/* LZCNT instruction */
 				"\007SSE4A"	/* SSE4A */
 				"\010MAS"	/* Misaligned SSE mode */
 				"\011Prefetch"	/* 3DNow! Prefetch/PrefetchW */
 				"\012OSVW"	/* OS visible workaround */
 				"\013IBS"	/* Instruction based sampling */
 				"\014XOP"	/* XOP extended instructions */
 				"\015SKINIT"	/* SKINIT/STGI */
 				"\016WDT"	/* Watchdog timer */
 				"\017<b14>"
 				"\020LWP"	/* Lightweight Profiling */
 				"\021FMA4"	/* 4-operand FMA instructions */
 				"\022TCE"	/* Translation Cache Extension */
 				"\023<b18>"
 				"\024NodeId"	/* NodeId MSR support */
 				"\025<b20>"
 				"\026TBM"	/* Trailing Bit Manipulation */
 				"\027Topology"	/* Topology Extensions */
 				"\030PCXC"	/* Core perf count */
 				"\031PNXC"	/* NB perf count */
 				"\032<b25>"
 				"\033DBE"	/* Data Breakpoint extension */
 				"\034PTSC"	/* Performance TSC */
 				"\035PL2I"	/* L2I perf count */
 				"\036<b29>"
 				"\037<b30>"
 				"\040<b31>"
 				);
 			}
 
 			if (cpu_stdext_feature != 0) {
 				printf("\n  Structured Extended Features=0x%b",
 				    cpu_stdext_feature,
 				       "\020"
 				       /* RDFSBASE/RDGSBASE/WRFSBASE/WRGSBASE */
 				       "\001FSGSBASE"
 				       "\002TSCADJ"
 				       /* Bit Manipulation Instructions */
 				       "\004BMI1"
 				       /* Hardware Lock Elision */
 				       "\005HLE"
 				       /* Advanced Vector Instructions 2 */
 				       "\006AVX2"
 				       /* Supervisor Mode Execution Prot. */
 				       "\010SMEP"
 				       /* Bit Manipulation Instructions */
 				       "\011BMI2"
 				       "\012ERMS"
 				       /* Invalidate Processor Context ID */
 				       "\013INVPCID"
 				       /* Restricted Transactional Memory */
 				       "\014RTM"
 				       /* Intel Memory Protection Extensions */
 				       "\017MPX"
 				       /* AVX512 Foundation */
 				       "\021AVX512F"
 				       /* Enhanced NRBG */
 				       "\023RDSEED"
 				       /* ADCX + ADOX */
 				       "\024ADX"
 				       /* Supervisor Mode Access Prevention */
 				       "\025SMAP"
 				       "\030CLFLUSHOPT"
 				       "\032PROCTRACE"
 				       "\033AVX512PF"
 				       "\034AVX512ER"
 				       "\035AVX512CD"
 				       "\036SHA"
 				       );
 			}
 
 			if ((cpu_feature2 & CPUID2_XSAVE) != 0) {
 				cpuid_count(0xd, 0x1, regs);
 				if (regs[0] != 0) {
 					printf("\n  XSAVE Features=0x%b",
 					    regs[0],
 					    "\020"
 					    "\001XSAVEOPT"
 					    "\002XSAVEC"
 					    "\003XINUSE"
 					    "\004XSAVES");
 				}
 			}
 
 			if (via_feature_rng != 0 || via_feature_xcrypt != 0)
 				print_via_padlock_info();
 
 			if (cpu_feature2 & CPUID2_VMX)
 				print_vmx_info();
 
 			if (amd_feature2 & AMDID2_SVM)
 				print_svm_info();
 
 			if ((cpu_feature & CPUID_HTT) &&
 			    cpu_vendor_id == CPU_VENDOR_AMD)
 				cpu_feature &= ~CPUID_HTT;
 
 			/*
 			 * If this CPU supports P-state invariant TSC then
 			 * mention the capability.
 			 */
 			if (tsc_is_invariant) {
 				printf("\n  TSC: P-state invariant");
 				if (tsc_perf_stat)
 					printf(", performance statistics");
 			}
 		}
 #ifdef __i386__
 	} else if (cpu_vendor_id == CPU_VENDOR_CYRIX) {
 		printf("  DIR=0x%04x", cyrix_did);
 		printf("  Stepping=%u", (cyrix_did & 0xf000) >> 12);
 		printf("  Revision=%u", (cyrix_did & 0x0f00) >> 8);
 #ifndef CYRIX_CACHE_REALLY_WORKS
 		if (cpu == CPU_M1 && (cyrix_did & 0xff00) < 0x1700)
 			printf("\n  CPU cache: write-through mode");
 #endif
 #endif
 	}
 
 	/* Avoid ugly blank lines: only print newline when we have to. */
 	if (*cpu_vendor || cpu_id)
 		printf("\n");
 
 	if (bootverbose) {
 		if (cpu_vendor_id == CPU_VENDOR_AMD)
 			print_AMD_info();
 		else if (cpu_vendor_id == CPU_VENDOR_INTEL)
 			print_INTEL_info();
 #ifdef __i386__
 		else if (cpu_vendor_id == CPU_VENDOR_TRANSMETA)
 			print_transmeta_info();
 #endif
 	}
 
 	print_hypervisor_info();
 }
 
 void
 panicifcpuunsupported(void)
 {
 
 #ifdef __i386__
 #if !defined(lint)
 #if !defined(I486_CPU) && !defined(I586_CPU) && !defined(I686_CPU)
 #error This kernel is not configured for one of the supported CPUs
 #endif
 #else /* lint */
 #endif /* lint */
 #else /* __amd64__ */
 #ifndef HAMMER
 #error "You need to specify a cpu type"
 #endif
 #endif
 	/*
 	 * Now that we have told the user what they have,
 	 * let them know if that machine type isn't configured.
 	 */
 	switch (cpu_class) {
 #ifdef __i386__
 	case CPUCLASS_286:	/* a 286 should not make it this far, anyway */
 	case CPUCLASS_386:
 #if !defined(I486_CPU)
 	case CPUCLASS_486:
 #endif
 #if !defined(I586_CPU)
 	case CPUCLASS_586:
 #endif
 #if !defined(I686_CPU)
 	case CPUCLASS_686:
 #endif
 #else /* __amd64__ */
 	case CPUCLASS_X86:
 #ifndef HAMMER
 	case CPUCLASS_K8:
 #endif
 #endif
 		panic("CPU class not configured");
 	default:
 		break;
 	}
 }
 
 #ifdef __i386__
 static	volatile u_int trap_by_rdmsr;
 
 /*
  * Special exception 6 handler.
  * The rdmsr instruction generates invalid opcodes fault on 486-class
  * Cyrix CPU.  Stacked eip register points the rdmsr instruction in the
  * function identblue() when this handler is called.  Stacked eip should
  * be advanced.
  */
 inthand_t	bluetrap6;
 #ifdef __GNUCLIKE_ASM
 __asm
 ("									\n\
 	.text								\n\
 	.p2align 2,0x90							\n\
 	.type	" __XSTRING(CNAME(bluetrap6)) ",@function		\n\
 " __XSTRING(CNAME(bluetrap6)) ":					\n\
 	ss								\n\
 	movl	$0xa8c1d," __XSTRING(CNAME(trap_by_rdmsr)) "		\n\
 	addl	$2, (%esp)	/* rdmsr is a 2-byte instruction */	\n\
 	iret								\n\
 ");
 #endif
 
 /*
  * Special exception 13 handler.
  * Accessing non-existent MSR generates general protection fault.
  */
 inthand_t	bluetrap13;
 #ifdef __GNUCLIKE_ASM
 __asm
 ("									\n\
 	.text								\n\
 	.p2align 2,0x90							\n\
 	.type	" __XSTRING(CNAME(bluetrap13)) ",@function		\n\
 " __XSTRING(CNAME(bluetrap13)) ":					\n\
 	ss								\n\
 	movl	$0xa89c4," __XSTRING(CNAME(trap_by_rdmsr)) "		\n\
 	popl	%eax		/* discard error code */		\n\
 	addl	$2, (%esp)	/* rdmsr is a 2-byte instruction */	\n\
 	iret								\n\
 ");
 #endif
 
 /*
  * Distinguish IBM Blue Lightning CPU from Cyrix CPUs that does not
  * support cpuid instruction.  This function should be called after
  * loading interrupt descriptor table register.
  *
  * I don't like this method that handles fault, but I couldn't get
  * information for any other methods.  Does blue giant know?
  */
 static int
 identblue(void)
 {
 
 	trap_by_rdmsr = 0;
 
 	/*
 	 * Cyrix 486-class CPU does not support rdmsr instruction.
 	 * The rdmsr instruction generates invalid opcode fault, and exception
 	 * will be trapped by bluetrap6() on Cyrix 486-class CPU.  The
 	 * bluetrap6() set the magic number to trap_by_rdmsr.
 	 */
 	setidt(IDT_UD, bluetrap6, SDT_SYS386TGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 
 	/*
 	 * Certain BIOS disables cpuid instruction of Cyrix 6x86MX CPU.
 	 * In this case, rdmsr generates general protection fault, and
 	 * exception will be trapped by bluetrap13().
 	 */
 	setidt(IDT_GP, bluetrap13, SDT_SYS386TGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 
 	rdmsr(0x1002);		/* Cyrix CPU generates fault. */
 
 	if (trap_by_rdmsr == 0xa8c1d)
 		return IDENTBLUE_CYRIX486;
 	else if (trap_by_rdmsr == 0xa89c4)
 		return IDENTBLUE_CYRIXM2;
 	return IDENTBLUE_IBMCPU;
 }
 
 
 /*
  * identifycyrix() set lower 16 bits of cyrix_did as follows:
  *
  *  F E D C B A 9 8 7 6 5 4 3 2 1 0
  * +-------+-------+---------------+
  * |  SID  |  RID  |   Device ID   |
  * |    (DIR 1)    |    (DIR 0)    |
  * +-------+-------+---------------+
  */
 static void
 identifycyrix(void)
 {
 	register_t saveintr;
 	int	ccr2_test = 0, dir_test = 0;
 	u_char	ccr2, ccr3;
 
 	saveintr = intr_disable();
 
 	ccr2 = read_cyrix_reg(CCR2);
 	write_cyrix_reg(CCR2, ccr2 ^ CCR2_LOCK_NW);
 	read_cyrix_reg(CCR2);
 	if (read_cyrix_reg(CCR2) != ccr2)
 		ccr2_test = 1;
 	write_cyrix_reg(CCR2, ccr2);
 
 	ccr3 = read_cyrix_reg(CCR3);
 	write_cyrix_reg(CCR3, ccr3 ^ CCR3_MAPEN3);
 	read_cyrix_reg(CCR3);
 	if (read_cyrix_reg(CCR3) != ccr3)
 		dir_test = 1;					/* CPU supports DIRs. */
 	write_cyrix_reg(CCR3, ccr3);
 
 	if (dir_test) {
 		/* Device ID registers are available. */
 		cyrix_did = read_cyrix_reg(DIR1) << 8;
 		cyrix_did += read_cyrix_reg(DIR0);
 	} else if (ccr2_test)
 		cyrix_did = 0x0010;		/* 486S A-step */
 	else
 		cyrix_did = 0x00ff;		/* Old 486SLC/DLC and TI486SXLC/SXL */
 
 	intr_restore(saveintr);
 }
 #endif
 
 /* Update TSC freq with the value indicated by the caller. */
 static void
 tsc_freq_changed(void *arg __unused, const struct cf_level *level, int status)
 {
 
 	/* If there was an error during the transition, don't do anything. */
 	if (status != 0)
 		return;
 
 	/* Total setting for this level gives the new frequency in MHz. */
 	hw_clockrate = level->total_set.freq;
 }
 
 static void
 hook_tsc_freq(void *arg __unused)
 {
 
 	if (tsc_is_invariant)
 		return;
 
 	tsc_post_tag = EVENTHANDLER_REGISTER(cpufreq_post_change,
 	    tsc_freq_changed, NULL, EVENTHANDLER_PRI_ANY);
 }
 
 SYSINIT(hook_tsc_freq, SI_SUB_CONFIGURE, SI_ORDER_ANY, hook_tsc_freq, NULL);
 
-#ifndef XEN
 static const char *const vm_bnames[] = {
 	"QEMU",				/* QEMU */
 	"Plex86",			/* Plex86 */
 	"Bochs",			/* Bochs */
 	"Xen",				/* Xen */
 	"BHYVE",			/* bhyve */
 	"Seabios",			/* KVM */
 	NULL
 };
 
 static const char *const vm_pnames[] = {
 	"VMware Virtual Platform",	/* VMWare VM */
 	"Virtual Machine",		/* Microsoft VirtualPC */
 	"VirtualBox",			/* Sun xVM VirtualBox */
 	"Parallels Virtual Platform",	/* Parallels VM */
 	"KVM",				/* KVM */
 	NULL
 };
 
 static void
 identify_hypervisor(void)
 {
 	u_int regs[4];
 	char *p;
 	int i;
 
 	/*
 	 * [RFC] CPUID usage for interaction between Hypervisors and Linux.
 	 * http://lkml.org/lkml/2008/10/1/246
 	 *
 	 * KB1009458: Mechanisms to determine if software is running in
 	 * a VMware virtual machine
 	 * http://kb.vmware.com/kb/1009458
 	 */
 	if (cpu_feature2 & CPUID2_HV) {
 		vm_guest = VM_GUEST_VM;
 		do_cpuid(0x40000000, regs);
 		if (regs[0] >= 0x40000000) {
 			hv_high = regs[0];
 			((u_int *)&hv_vendor)[0] = regs[1];
 			((u_int *)&hv_vendor)[1] = regs[2];
 			((u_int *)&hv_vendor)[2] = regs[3];
 			hv_vendor[12] = '\0';
 			if (strcmp(hv_vendor, "VMwareVMware") == 0)
 				vm_guest = VM_GUEST_VMWARE;
 		}
 		return;
 	}
 
 	/*
 	 * Examine SMBIOS strings for older hypervisors.
 	 */
 	p = kern_getenv("smbios.system.serial");
 	if (p != NULL) {
 		if (strncmp(p, "VMware-", 7) == 0 || strncmp(p, "VMW", 3) == 0) {
 			vmware_hvcall(VMW_HVCMD_GETVERSION, regs);
 			if (regs[1] == VMW_HVMAGIC) {
 				vm_guest = VM_GUEST_VMWARE;			
 				freeenv(p);
 				return;
 			}
 		}
 		freeenv(p);
 	}
 
 	/*
 	 * XXX: Some of these entries may not be needed since they were
 	 * added to FreeBSD before the checks above.
 	 */
 	p = kern_getenv("smbios.bios.vendor");
 	if (p != NULL) {
 		for (i = 0; vm_bnames[i] != NULL; i++)
 			if (strcmp(p, vm_bnames[i]) == 0) {
 				vm_guest = VM_GUEST_VM;
 				freeenv(p);
 				return;
 			}
 		freeenv(p);
 	}
 	p = kern_getenv("smbios.system.product");
 	if (p != NULL) {
 		for (i = 0; vm_pnames[i] != NULL; i++)
 			if (strcmp(p, vm_pnames[i]) == 0) {
 				vm_guest = VM_GUEST_VM;
 				freeenv(p);
 				return;
 			}
 		freeenv(p);
 	}
 }
-#endif
 
 /*
  * Final stage of CPU identification.
  */
 #ifdef __i386__
 void
 finishidentcpu(void)
 #else
 void
 identify_cpu(void)
 #endif
 {
 	u_int regs[4], cpu_stdext_disable;
 #ifdef __i386__
 	u_char ccr3;
 #endif
 
 #ifdef __amd64__
 	do_cpuid(0, regs);
 	cpu_high = regs[0];
 	((u_int *)&cpu_vendor)[0] = regs[1];
 	((u_int *)&cpu_vendor)[1] = regs[3];
 	((u_int *)&cpu_vendor)[2] = regs[2];
 	cpu_vendor[12] = '\0';
 
 	do_cpuid(1, regs);
 	cpu_id = regs[0];
 	cpu_procinfo = regs[1];
 	cpu_feature = regs[3];
 	cpu_feature2 = regs[2];
 #endif
 
-#ifndef XEN
 	identify_hypervisor();
-#endif
 	cpu_vendor_id = find_cpu_vendor_id();
 
 	/*
 	 * Clear "Limit CPUID Maxval" bit and get the largest standard CPUID
 	 * function number again if it is set from BIOS.  It is necessary
 	 * for probing correct CPU topology later.
 	 * XXX This is only done on the BSP package.
 	 */
 	if (cpu_vendor_id == CPU_VENDOR_INTEL && cpu_high > 0 && cpu_high < 4 &&
 	    ((CPUID_TO_FAMILY(cpu_id) == 0xf && CPUID_TO_MODEL(cpu_id) >= 0x3) ||
 	    (CPUID_TO_FAMILY(cpu_id) == 0x6 && CPUID_TO_MODEL(cpu_id) >= 0xe))) {
 		uint64_t msr;
 		msr = rdmsr(MSR_IA32_MISC_ENABLE);
 		if ((msr & 0x400000ULL) != 0) {
 			wrmsr(MSR_IA32_MISC_ENABLE, msr & ~0x400000ULL);
 			do_cpuid(0, regs);
 			cpu_high = regs[0];
 		}
 	}
 
 	if (cpu_high >= 5 && (cpu_feature2 & CPUID2_MON) != 0) {
 		do_cpuid(5, regs);
 		cpu_mon_mwait_flags = regs[2];
 		cpu_mon_min_size = regs[0] &  CPUID5_MON_MIN_SIZE;
 		cpu_mon_max_size = regs[1] &  CPUID5_MON_MAX_SIZE;
 	}
 
 	if (cpu_high >= 7) {
 		cpuid_count(7, 0, regs);
 		cpu_stdext_feature = regs[1];
 
 		/*
 		 * Some hypervisors fail to filter out unsupported
 		 * extended features.  For now, disable the
 		 * extensions, activation of which requires setting a
 		 * bit in CR4, and which VM monitors do not support.
 		 */
 		if (cpu_feature2 & CPUID2_HV) {
 			cpu_stdext_disable = CPUID_STDEXT_FSGSBASE |
 			    CPUID_STDEXT_SMEP;
 		} else
 			cpu_stdext_disable = 0;
 		TUNABLE_INT_FETCH("hw.cpu_stdext_disable", &cpu_stdext_disable);
 		cpu_stdext_feature &= ~cpu_stdext_disable;
 	}
 
 #ifdef __i386__
 	if (cpu_high > 0 &&
 	    (cpu_vendor_id == CPU_VENDOR_INTEL ||
 	     cpu_vendor_id == CPU_VENDOR_AMD ||
 	     cpu_vendor_id == CPU_VENDOR_TRANSMETA ||
 	     cpu_vendor_id == CPU_VENDOR_CENTAUR ||
 	     cpu_vendor_id == CPU_VENDOR_NSC)) {
 		do_cpuid(0x80000000, regs);
 		if (regs[0] >= 0x80000000)
 			cpu_exthigh = regs[0];
 	}
 #else
 	if (cpu_vendor_id == CPU_VENDOR_INTEL ||
 	    cpu_vendor_id == CPU_VENDOR_AMD ||
 	    cpu_vendor_id == CPU_VENDOR_CENTAUR) {
 		do_cpuid(0x80000000, regs);
 		cpu_exthigh = regs[0];
 	}
 #endif
 	if (cpu_exthigh >= 0x80000001) {
 		do_cpuid(0x80000001, regs);
 		amd_feature = regs[3] & ~(cpu_feature & 0x0183f3ff);
 		amd_feature2 = regs[2];
 	}
 	if (cpu_exthigh >= 0x80000007) {
 		do_cpuid(0x80000007, regs);
 		amd_pminfo = regs[3];
 	}
 	if (cpu_exthigh >= 0x80000008) {
 		do_cpuid(0x80000008, regs);
 		cpu_maxphyaddr = regs[0] & 0xff;
 		cpu_procinfo2 = regs[2];
 	} else {
 		cpu_maxphyaddr = (cpu_feature & CPUID_PAE) != 0 ? 36 : 32;
 	}
 
 #ifdef __i386__
 	if (cpu_vendor_id == CPU_VENDOR_CYRIX) {
 		if (cpu == CPU_486) {
 			/*
 			 * These conditions are equivalent to:
 			 *     - CPU does not support cpuid instruction.
 			 *     - Cyrix/IBM CPU is detected.
 			 */
 			if (identblue() == IDENTBLUE_IBMCPU) {
 				strcpy(cpu_vendor, "IBM");
 				cpu_vendor_id = CPU_VENDOR_IBM;
 				cpu = CPU_BLUE;
 				return;
 			}
 		}
 		switch (cpu_id & 0xf00) {
 		case 0x600:
 			/*
 			 * Cyrix's datasheet does not describe DIRs.
 			 * Therefor, I assume it does not have them
 			 * and use the result of the cpuid instruction.
 			 * XXX they seem to have it for now at least. -Peter
 			 */
 			identifycyrix();
 			cpu = CPU_M2;
 			break;
 		default:
 			identifycyrix();
 			/*
 			 * This routine contains a trick.
 			 * Don't check (cpu_id & 0x00f0) == 0x50 to detect M2, now.
 			 */
 			switch (cyrix_did & 0x00f0) {
 			case 0x00:
 			case 0xf0:
 				cpu = CPU_486DLC;
 				break;
 			case 0x10:
 				cpu = CPU_CY486DX;
 				break;
 			case 0x20:
 				if ((cyrix_did & 0x000f) < 8)
 					cpu = CPU_M1;
 				else
 					cpu = CPU_M1SC;
 				break;
 			case 0x30:
 				cpu = CPU_M1;
 				break;
 			case 0x40:
 				/* MediaGX CPU */
 				cpu = CPU_M1SC;
 				break;
 			default:
 				/* M2 and later CPUs are treated as M2. */
 				cpu = CPU_M2;
 
 				/*
 				 * enable cpuid instruction.
 				 */
 				ccr3 = read_cyrix_reg(CCR3);
 				write_cyrix_reg(CCR3, CCR3_MAPEN0);
 				write_cyrix_reg(CCR4, read_cyrix_reg(CCR4) | CCR4_CPUID);
 				write_cyrix_reg(CCR3, ccr3);
 
 				do_cpuid(0, regs);
 				cpu_high = regs[0];	/* eax */
 				do_cpuid(1, regs);
 				cpu_id = regs[0];	/* eax */
 				cpu_feature = regs[3];	/* edx */
 				break;
 			}
 		}
 	} else if (cpu == CPU_486 && *cpu_vendor == '\0') {
 		/*
 		 * There are BlueLightning CPUs that do not change
 		 * undefined flags by dividing 5 by 2.  In this case,
 		 * the CPU identification routine in locore.s leaves
 		 * cpu_vendor null string and puts CPU_486 into the
 		 * cpu.
 		 */
 		if (identblue() == IDENTBLUE_IBMCPU) {
 			strcpy(cpu_vendor, "IBM");
 			cpu_vendor_id = CPU_VENDOR_IBM;
 			cpu = CPU_BLUE;
 			return;
 		}
 	}
 #else
 	/* XXX */
 	cpu = CPU_CLAWHAMMER;
 #endif
 }
 
 static u_int
 find_cpu_vendor_id(void)
 {
 	int	i;
 
 	for (i = 0; i < sizeof(cpu_vendors) / sizeof(cpu_vendors[0]); i++)
 		if (strcmp(cpu_vendor, cpu_vendors[i].vendor) == 0)
 			return (cpu_vendors[i].vendor_id);
 	return (0);
 }
 
 static void
 print_AMD_assoc(int i)
 {
 	if (i == 255)
 		printf(", fully associative\n");
 	else
 		printf(", %d-way associative\n", i);
 }
 
 static void
 print_AMD_l2_assoc(int i)
 {
 	switch (i & 0x0f) {
 	case 0: printf(", disabled/not present\n"); break;
 	case 1: printf(", direct mapped\n"); break;
 	case 2: printf(", 2-way associative\n"); break;
 	case 4: printf(", 4-way associative\n"); break;
 	case 6: printf(", 8-way associative\n"); break;
 	case 8: printf(", 16-way associative\n"); break;
 	case 15: printf(", fully associative\n"); break;
 	default: printf(", reserved configuration\n"); break;
 	}
 }
 
 static void
 print_AMD_info(void)
 {
 #ifdef __i386__
 	uint64_t amd_whcr;
 #endif
 	u_int regs[4];
 
 	if (cpu_exthigh >= 0x80000005) {
 		do_cpuid(0x80000005, regs);
 		printf("L1 2MB data TLB: %d entries", (regs[0] >> 16) & 0xff);
 		print_AMD_assoc(regs[0] >> 24);
 
 		printf("L1 2MB instruction TLB: %d entries", regs[0] & 0xff);
 		print_AMD_assoc((regs[0] >> 8) & 0xff);
 
 		printf("L1 4KB data TLB: %d entries", (regs[1] >> 16) & 0xff);
 		print_AMD_assoc(regs[1] >> 24);
 
 		printf("L1 4KB instruction TLB: %d entries", regs[1] & 0xff);
 		print_AMD_assoc((regs[1] >> 8) & 0xff);
 
 		printf("L1 data cache: %d kbytes", regs[2] >> 24);
 		printf(", %d bytes/line", regs[2] & 0xff);
 		printf(", %d lines/tag", (regs[2] >> 8) & 0xff);
 		print_AMD_assoc((regs[2] >> 16) & 0xff);
 
 		printf("L1 instruction cache: %d kbytes", regs[3] >> 24);
 		printf(", %d bytes/line", regs[3] & 0xff);
 		printf(", %d lines/tag", (regs[3] >> 8) & 0xff);
 		print_AMD_assoc((regs[3] >> 16) & 0xff);
 	}
 
 	if (cpu_exthigh >= 0x80000006) {
 		do_cpuid(0x80000006, regs);
 		if ((regs[0] >> 16) != 0) {
 			printf("L2 2MB data TLB: %d entries",
 			    (regs[0] >> 16) & 0xfff);
 			print_AMD_l2_assoc(regs[0] >> 28);
 			printf("L2 2MB instruction TLB: %d entries",
 			    regs[0] & 0xfff);
 			print_AMD_l2_assoc((regs[0] >> 28) & 0xf);
 		} else {
 			printf("L2 2MB unified TLB: %d entries",
 			    regs[0] & 0xfff);
 			print_AMD_l2_assoc((regs[0] >> 28) & 0xf);
 		}
 		if ((regs[1] >> 16) != 0) {
 			printf("L2 4KB data TLB: %d entries",
 			    (regs[1] >> 16) & 0xfff);
 			print_AMD_l2_assoc(regs[1] >> 28);
 
 			printf("L2 4KB instruction TLB: %d entries",
 			    (regs[1] >> 16) & 0xfff);
 			print_AMD_l2_assoc((regs[1] >> 28) & 0xf);
 		} else {
 			printf("L2 4KB unified TLB: %d entries",
 			    (regs[1] >> 16) & 0xfff);
 			print_AMD_l2_assoc((regs[1] >> 28) & 0xf);
 		}
 		printf("L2 unified cache: %d kbytes", regs[2] >> 16);
 		printf(", %d bytes/line", regs[2] & 0xff);
 		printf(", %d lines/tag", (regs[2] >> 8) & 0x0f);
 		print_AMD_l2_assoc((regs[2] >> 12) & 0x0f);
 	}
 
 #ifdef __i386__
 	if (((cpu_id & 0xf00) == 0x500)
 	    && (((cpu_id & 0x0f0) > 0x80)
 		|| (((cpu_id & 0x0f0) == 0x80)
 		    && (cpu_id & 0x00f) > 0x07))) {
 		/* K6-2(new core [Stepping 8-F]), K6-III or later */
 		amd_whcr = rdmsr(0xc0000082);
 		if (!(amd_whcr & (0x3ff << 22))) {
 			printf("Write Allocate Disable\n");
 		} else {
 			printf("Write Allocate Enable Limit: %dM bytes\n",
 			    (u_int32_t)((amd_whcr & (0x3ff << 22)) >> 22) * 4);
 			printf("Write Allocate 15-16M bytes: %s\n",
 			    (amd_whcr & (1 << 16)) ? "Enable" : "Disable");
 		}
 	} else if (((cpu_id & 0xf00) == 0x500)
 		   && ((cpu_id & 0x0f0) > 0x50)) {
 		/* K6, K6-2(old core) */
 		amd_whcr = rdmsr(0xc0000082);
 		if (!(amd_whcr & (0x7f << 1))) {
 			printf("Write Allocate Disable\n");
 		} else {
 			printf("Write Allocate Enable Limit: %dM bytes\n",
 			    (u_int32_t)((amd_whcr & (0x7f << 1)) >> 1) * 4);
 			printf("Write Allocate 15-16M bytes: %s\n",
 			    (amd_whcr & 0x0001) ? "Enable" : "Disable");
 			printf("Hardware Write Allocate Control: %s\n",
 			    (amd_whcr & 0x0100) ? "Enable" : "Disable");
 		}
 	}
 #endif
 	/*
 	 * Opteron Rev E shows a bug as in very rare occasions a read memory
 	 * barrier is not performed as expected if it is followed by a
 	 * non-atomic read-modify-write instruction.
 	 * As long as that bug pops up very rarely (intensive machine usage
 	 * on other operating systems generally generates one unexplainable
 	 * crash any 2 months) and as long as a model specific fix would be
 	 * impratical at this stage, print out a warning string if the broken
 	 * model and family are identified.
 	 */
 	if (CPUID_TO_FAMILY(cpu_id) == 0xf && CPUID_TO_MODEL(cpu_id) >= 0x20 &&
 	    CPUID_TO_MODEL(cpu_id) <= 0x3f)
 		printf("WARNING: This architecture revision has known SMP "
 		    "hardware bugs which may cause random instability\n");
 }
 
 static void
 print_INTEL_info(void)
 {
 	u_int regs[4];
 	u_int rounds, regnum;
 	u_int nwaycode, nway;
 
 	if (cpu_high >= 2) {
 		rounds = 0;
 		do {
 			do_cpuid(0x2, regs);
 			if (rounds == 0 && (rounds = (regs[0] & 0xff)) == 0)
 				break;	/* we have a buggy CPU */
 
 			for (regnum = 0; regnum <= 3; ++regnum) {
 				if (regs[regnum] & (1<<31))
 					continue;
 				if (regnum != 0)
 					print_INTEL_TLB(regs[regnum] & 0xff);
 				print_INTEL_TLB((regs[regnum] >> 8) & 0xff);
 				print_INTEL_TLB((regs[regnum] >> 16) & 0xff);
 				print_INTEL_TLB((regs[regnum] >> 24) & 0xff);
 			}
 		} while (--rounds > 0);
 	}
 
 	if (cpu_exthigh >= 0x80000006) {
 		do_cpuid(0x80000006, regs);
 		nwaycode = (regs[2] >> 12) & 0x0f;
 		if (nwaycode >= 0x02 && nwaycode <= 0x08)
 			nway = 1 << (nwaycode / 2);
 		else
 			nway = 0;
 		printf("L2 cache: %u kbytes, %u-way associative, %u bytes/line\n",
 		    (regs[2] >> 16) & 0xffff, nway, regs[2] & 0xff);
 	}
 }
 
 static void
 print_INTEL_TLB(u_int data)
 {
 	switch (data) {
 	case 0x0:
 	case 0x40:
 	default:
 		break;
 	case 0x1:
 		printf("Instruction TLB: 4 KB pages, 4-way set associative, 32 entries\n");
 		break;
 	case 0x2:
 		printf("Instruction TLB: 4 MB pages, fully associative, 2 entries\n");
 		break;
 	case 0x3:
 		printf("Data TLB: 4 KB pages, 4-way set associative, 64 entries\n");
 		break;
 	case 0x4:
 		printf("Data TLB: 4 MB Pages, 4-way set associative, 8 entries\n");
 		break;
 	case 0x6:
 		printf("1st-level instruction cache: 8 KB, 4-way set associative, 32 byte line size\n");
 		break;
 	case 0x8:
 		printf("1st-level instruction cache: 16 KB, 4-way set associative, 32 byte line size\n");
 		break;
 	case 0xa:
 		printf("1st-level data cache: 8 KB, 2-way set associative, 32 byte line size\n");
 		break;
 	case 0xc:
 		printf("1st-level data cache: 16 KB, 4-way set associative, 32 byte line size\n");
 		break;
 	case 0x22:
 		printf("3rd-level cache: 512 KB, 4-way set associative, sectored cache, 64 byte line size\n");
 		break;
 	case 0x23:
 		printf("3rd-level cache: 1 MB, 8-way set associative, sectored cache, 64 byte line size\n");
 		break;
 	case 0x25:
 		printf("3rd-level cache: 2 MB, 8-way set associative, sectored cache, 64 byte line size\n");
 		break;
 	case 0x29:
 		printf("3rd-level cache: 4 MB, 8-way set associative, sectored cache, 64 byte line size\n");
 		break;
 	case 0x2c:
 		printf("1st-level data cache: 32 KB, 8-way set associative, 64 byte line size\n");
 		break;
 	case 0x30:
 		printf("1st-level instruction cache: 32 KB, 8-way set associative, 64 byte line size\n");
 		break;
 	case 0x39:
 		printf("2nd-level cache: 128 KB, 4-way set associative, sectored cache, 64 byte line size\n");
 		break;
 	case 0x3b:
 		printf("2nd-level cache: 128 KB, 2-way set associative, sectored cache, 64 byte line size\n");
 		break;
 	case 0x3c:
 		printf("2nd-level cache: 256 KB, 4-way set associative, sectored cache, 64 byte line size\n");
 		break;
 	case 0x41:
 		printf("2nd-level cache: 128 KB, 4-way set associative, 32 byte line size\n");
 		break;
 	case 0x42:
 		printf("2nd-level cache: 256 KB, 4-way set associative, 32 byte line size\n");
 		break;
 	case 0x43:
 		printf("2nd-level cache: 512 KB, 4-way set associative, 32 byte line size\n");
 		break;
 	case 0x44:
 		printf("2nd-level cache: 1 MB, 4-way set associative, 32 byte line size\n");
 		break;
 	case 0x45:
 		printf("2nd-level cache: 2 MB, 4-way set associative, 32 byte line size\n");
 		break;
 	case 0x46:
 		printf("3rd-level cache: 4 MB, 4-way set associative, 64 byte line size\n");
 		break;
 	case 0x47:
 		printf("3rd-level cache: 8 MB, 8-way set associative, 64 byte line size\n");
 		break;
 	case 0x50:
 		printf("Instruction TLB: 4 KB, 2 MB or 4 MB pages, fully associative, 64 entries\n");
 		break;
 	case 0x51:
 		printf("Instruction TLB: 4 KB, 2 MB or 4 MB pages, fully associative, 128 entries\n");
 		break;
 	case 0x52:
 		printf("Instruction TLB: 4 KB, 2 MB or 4 MB pages, fully associative, 256 entries\n");
 		break;
 	case 0x5b:
 		printf("Data TLB: 4 KB or 4 MB pages, fully associative, 64 entries\n");
 		break;
 	case 0x5c:
 		printf("Data TLB: 4 KB or 4 MB pages, fully associative, 128 entries\n");
 		break;
 	case 0x5d:
 		printf("Data TLB: 4 KB or 4 MB pages, fully associative, 256 entries\n");
 		break;
 	case 0x60:
 		printf("1st-level data cache: 16 KB, 8-way set associative, sectored cache, 64 byte line size\n");
 		break;
 	case 0x66:
 		printf("1st-level data cache: 8 KB, 4-way set associative, sectored cache, 64 byte line size\n");
 		break;
 	case 0x67:
 		printf("1st-level data cache: 16 KB, 4-way set associative, sectored cache, 64 byte line size\n");
 		break;
 	case 0x68:
 		printf("1st-level data cache: 32 KB, 4 way set associative, sectored cache, 64 byte line size\n");
 		break;
 	case 0x70:
 		printf("Trace cache: 12K-uops, 8-way set associative\n");
 		break;
 	case 0x71:
 		printf("Trace cache: 16K-uops, 8-way set associative\n");
 		break;
 	case 0x72:
 		printf("Trace cache: 32K-uops, 8-way set associative\n");
 		break;
 	case 0x78:
 		printf("2nd-level cache: 1 MB, 4-way set associative, 64-byte line size\n");
 		break;
 	case 0x79:
 		printf("2nd-level cache: 128 KB, 8-way set associative, sectored cache, 64 byte line size\n");
 		break;
 	case 0x7a:
 		printf("2nd-level cache: 256 KB, 8-way set associative, sectored cache, 64 byte line size\n");
 		break;
 	case 0x7b:
 		printf("2nd-level cache: 512 KB, 8-way set associative, sectored cache, 64 byte line size\n");
 		break;
 	case 0x7c:
 		printf("2nd-level cache: 1 MB, 8-way set associative, sectored cache, 64 byte line size\n");
 		break;
 	case 0x7d:
 		printf("2nd-level cache: 2-MB, 8-way set associative, 64-byte line size\n");
 		break;
 	case 0x7f:
 		printf("2nd-level cache: 512-KB, 2-way set associative, 64-byte line size\n");
 		break;
 	case 0x82:
 		printf("2nd-level cache: 256 KB, 8-way set associative, 32 byte line size\n");
 		break;
 	case 0x83:
 		printf("2nd-level cache: 512 KB, 8-way set associative, 32 byte line size\n");
 		break;
 	case 0x84:
 		printf("2nd-level cache: 1 MB, 8-way set associative, 32 byte line size\n");
 		break;
 	case 0x85:
 		printf("2nd-level cache: 2 MB, 8-way set associative, 32 byte line size\n");
 		break;
 	case 0x86:
 		printf("2nd-level cache: 512 KB, 4-way set associative, 64 byte line size\n");
 		break;
 	case 0x87:
 		printf("2nd-level cache: 1 MB, 8-way set associative, 64 byte line size\n");
 		break;
 	case 0xb0:
 		printf("Instruction TLB: 4 KB Pages, 4-way set associative, 128 entries\n");
 		break;
 	case 0xb3:
 		printf("Data TLB: 4 KB Pages, 4-way set associative, 128 entries\n");
 		break;
 	}
 }
 
 static void
 print_svm_info(void)
 {
 	u_int features, regs[4];
 	uint64_t msr;
 	int comma;
 
 	printf("\n  SVM: ");
 	do_cpuid(0x8000000A, regs);
 	features = regs[3];
 
 	msr = rdmsr(MSR_VM_CR);
 	if ((msr & VM_CR_SVMDIS) == VM_CR_SVMDIS)
 		printf("(disabled in BIOS) ");
 
 	if (!bootverbose) {
 		comma = 0;
 		if (features & (1 << 0)) {
 			printf("%sNP", comma ? "," : "");
                         comma = 1; 
 		}
 		if (features & (1 << 3)) {
 			printf("%sNRIP", comma ? "," : "");
                         comma = 1; 
 		}
 		if (features & (1 << 5)) {
 			printf("%sVClean", comma ? "," : "");
                         comma = 1; 
 		}
 		if (features & (1 << 6)) {
 			printf("%sAFlush", comma ? "," : "");
                         comma = 1; 
 		}
 		if (features & (1 << 7)) {
 			printf("%sDAssist", comma ? "," : "");
                         comma = 1; 
 		}
 		printf("%sNAsids=%d", comma ? "," : "", regs[1]);
 		return;
 	}
 
 	printf("Features=0x%b", features,
 	       "\020"
 	       "\001NP"			/* Nested paging */
 	       "\002LbrVirt"		/* LBR virtualization */
 	       "\003SVML"		/* SVM lock */
 	       "\004NRIPS"		/* NRIP save */
 	       "\005TscRateMsr"		/* MSR based TSC rate control */
 	       "\006VmcbClean"		/* VMCB clean bits */
 	       "\007FlushByAsid"	/* Flush by ASID */
 	       "\010DecodeAssist"	/* Decode assist */
 	       "\011<b8>"
 	       "\012<b9>"
 	       "\013PauseFilter"	/* PAUSE intercept filter */    
 	       "\014<b11>"
 	       "\015PauseFilterThreshold" /* PAUSE filter threshold */
 	       "\016AVIC"		/* virtual interrupt controller */
                 );
 	printf("\nRevision=%d, ASIDs=%d", regs[0] & 0xff, regs[1]);
 }
 
 #ifdef __i386__
 static void
 print_transmeta_info(void)
 {
 	u_int regs[4], nreg = 0;
 
 	do_cpuid(0x80860000, regs);
 	nreg = regs[0];
 	if (nreg >= 0x80860001) {
 		do_cpuid(0x80860001, regs);
 		printf("  Processor revision %u.%u.%u.%u\n",
 		       (regs[1] >> 24) & 0xff,
 		       (regs[1] >> 16) & 0xff,
 		       (regs[1] >> 8) & 0xff,
 		       regs[1] & 0xff);
 	}
 	if (nreg >= 0x80860002) {
 		do_cpuid(0x80860002, regs);
 		printf("  Code Morphing Software revision %u.%u.%u-%u-%u\n",
 		       (regs[1] >> 24) & 0xff,
 		       (regs[1] >> 16) & 0xff,
 		       (regs[1] >> 8) & 0xff,
 		       regs[1] & 0xff,
 		       regs[2]);
 	}
 	if (nreg >= 0x80860006) {
 		char info[65];
 		do_cpuid(0x80860003, (u_int*) &info[0]);
 		do_cpuid(0x80860004, (u_int*) &info[16]);
 		do_cpuid(0x80860005, (u_int*) &info[32]);
 		do_cpuid(0x80860006, (u_int*) &info[48]);
 		info[64] = 0;
 		printf("  %s\n", info);
 	}
 }
 #endif
 
 static void
 print_via_padlock_info(void)
 {
 	u_int regs[4];
 
 	do_cpuid(0xc0000001, regs);
 	printf("\n  VIA Padlock Features=0x%b", regs[3],
 	"\020"
 	"\003RNG"		/* RNG */
 	"\007AES"		/* ACE */
 	"\011AES-CTR"		/* ACE2 */
 	"\013SHA1,SHA256"	/* PHE */
 	"\015RSA"		/* PMM */
 	);
 }
 
 static uint32_t
 vmx_settable(uint64_t basic, int msr, int true_msr)
 {
 	uint64_t val;
 
 	if (basic & (1ULL << 55))
 		val = rdmsr(true_msr);
 	else
 		val = rdmsr(msr);
 
 	/* Just report the controls that can be set to 1. */
 	return (val >> 32);
 }
 
 static void
 print_vmx_info(void)
 {
 	uint64_t basic, msr;
 	uint32_t entry, exit, mask, pin, proc, proc2;
 	int comma;
 
 	printf("\n  VT-x: ");
 	msr = rdmsr(MSR_IA32_FEATURE_CONTROL);
 	if (!(msr & IA32_FEATURE_CONTROL_VMX_EN))
 		printf("(disabled in BIOS) ");
 	basic = rdmsr(MSR_VMX_BASIC);
 	pin = vmx_settable(basic, MSR_VMX_PINBASED_CTLS,
 	    MSR_VMX_TRUE_PINBASED_CTLS);
 	proc = vmx_settable(basic, MSR_VMX_PROCBASED_CTLS,
 	    MSR_VMX_TRUE_PROCBASED_CTLS);
 	if (proc & PROCBASED_SECONDARY_CONTROLS)
 		proc2 = vmx_settable(basic, MSR_VMX_PROCBASED_CTLS2,
 		    MSR_VMX_PROCBASED_CTLS2);
 	else
 		proc2 = 0;
 	exit = vmx_settable(basic, MSR_VMX_EXIT_CTLS, MSR_VMX_TRUE_EXIT_CTLS);
 	entry = vmx_settable(basic, MSR_VMX_ENTRY_CTLS, MSR_VMX_TRUE_ENTRY_CTLS);
 
 	if (!bootverbose) {
 		comma = 0;
 		if (exit & VM_EXIT_SAVE_PAT && exit & VM_EXIT_LOAD_PAT &&
 		    entry & VM_ENTRY_LOAD_PAT) {
 			printf("%sPAT", comma ? "," : "");
 			comma = 1;
 		}
 		if (proc & PROCBASED_HLT_EXITING) {
 			printf("%sHLT", comma ? "," : "");
 			comma = 1;
 		}
 		if (proc & PROCBASED_MTF) {
 			printf("%sMTF", comma ? "," : "");
 			comma = 1;
 		}
 		if (proc & PROCBASED_PAUSE_EXITING) {
 			printf("%sPAUSE", comma ? "," : "");
 			comma = 1;
 		}
 		if (proc2 & PROCBASED2_ENABLE_EPT) {
 			printf("%sEPT", comma ? "," : "");
 			comma = 1;
 		}
 		if (proc2 & PROCBASED2_UNRESTRICTED_GUEST) {
 			printf("%sUG", comma ? "," : "");
 			comma = 1;
 		}
 		if (proc2 & PROCBASED2_ENABLE_VPID) {
 			printf("%sVPID", comma ? "," : "");
 			comma = 1;
 		}
 		if (proc & PROCBASED_USE_TPR_SHADOW &&
 		    proc2 & PROCBASED2_VIRTUALIZE_APIC_ACCESSES &&
 		    proc2 & PROCBASED2_VIRTUALIZE_X2APIC_MODE &&
 		    proc2 & PROCBASED2_APIC_REGISTER_VIRTUALIZATION &&
 		    proc2 & PROCBASED2_VIRTUAL_INTERRUPT_DELIVERY) {
 			printf("%sVID", comma ? "," : "");
 			comma = 1;
 			if (pin & PINBASED_POSTED_INTERRUPT)
 				printf(",PostIntr");
 		}
 		return;
 	}
 
 	mask = basic >> 32;
 	printf("Basic Features=0x%b", mask,
 	"\020"
 	"\02132PA"		/* 32-bit physical addresses */
 	"\022SMM"		/* SMM dual-monitor */
 	"\027INS/OUTS"		/* VM-exit info for INS and OUTS */
 	"\030TRUE"		/* TRUE_CTLS MSRs */
 	);
 	printf("\n        Pin-Based Controls=0x%b", pin,
 	"\020"
 	"\001ExtINT"		/* External-interrupt exiting */
 	"\004NMI"		/* NMI exiting */
 	"\006VNMI"		/* Virtual NMIs */
 	"\007PreTmr"		/* Activate VMX-preemption timer */
 	"\010PostIntr"		/* Process posted interrupts */
 	);
 	printf("\n        Primary Processor Controls=0x%b", proc,
 	"\020"
 	"\003INTWIN"		/* Interrupt-window exiting */
 	"\004TSCOff"		/* Use TSC offsetting */
 	"\010HLT"		/* HLT exiting */
 	"\012INVLPG"		/* INVLPG exiting */
 	"\013MWAIT"		/* MWAIT exiting */
 	"\014RDPMC"		/* RDPMC exiting */
 	"\015RDTSC"		/* RDTSC exiting */
 	"\020CR3-LD"		/* CR3-load exiting */
 	"\021CR3-ST"		/* CR3-store exiting */
 	"\024CR8-LD"		/* CR8-load exiting */
 	"\025CR8-ST"		/* CR8-store exiting */
 	"\026TPR"		/* Use TPR shadow */
 	"\027NMIWIN"		/* NMI-window exiting */
 	"\030MOV-DR"		/* MOV-DR exiting */
 	"\031IO"		/* Unconditional I/O exiting */
 	"\032IOmap"		/* Use I/O bitmaps */
 	"\034MTF"		/* Monitor trap flag */
 	"\035MSRmap"		/* Use MSR bitmaps */
 	"\036MONITOR"		/* MONITOR exiting */
 	"\037PAUSE"		/* PAUSE exiting */
 	);
 	if (proc & PROCBASED_SECONDARY_CONTROLS)
 		printf("\n        Secondary Processor Controls=0x%b", proc2,
 		"\020"
 		"\001APIC"		/* Virtualize APIC accesses */
 		"\002EPT"		/* Enable EPT */
 		"\003DT"		/* Descriptor-table exiting */
 		"\004RDTSCP"		/* Enable RDTSCP */
 		"\005x2APIC"		/* Virtualize x2APIC mode */
 		"\006VPID"		/* Enable VPID */
 		"\007WBINVD"		/* WBINVD exiting */
 		"\010UG"		/* Unrestricted guest */
 		"\011APIC-reg"		/* APIC-register virtualization */
 		"\012VID"		/* Virtual-interrupt delivery */
 		"\013PAUSE-loop"	/* PAUSE-loop exiting */
 		"\014RDRAND"		/* RDRAND exiting */
 		"\015INVPCID"		/* Enable INVPCID */
 		"\016VMFUNC"		/* Enable VM functions */
 		"\017VMCS"		/* VMCS shadowing */
 		"\020EPT#VE"		/* EPT-violation #VE */
 		"\021XSAVES"		/* Enable XSAVES/XRSTORS */
 		);
 	printf("\n        Exit Controls=0x%b", mask,
 	"\020"
 	"\003DR"		/* Save debug controls */
 				/* Ignore Host address-space size */
 	"\015PERF"		/* Load MSR_PERF_GLOBAL_CTRL */
 	"\020AckInt"		/* Acknowledge interrupt on exit */
 	"\023PAT-SV"		/* Save MSR_PAT */
 	"\024PAT-LD"		/* Load MSR_PAT */
 	"\025EFER-SV"		/* Save MSR_EFER */
 	"\026EFER-LD"		/* Load MSR_EFER */
 	"\027PTMR-SV"		/* Save VMX-preemption timer value */
 	);
 	printf("\n        Entry Controls=0x%b", mask,
 	"\020"
 	"\003DR"		/* Save debug controls */
 				/* Ignore IA-32e mode guest */
 				/* Ignore Entry to SMM */
 				/* Ignore Deactivate dual-monitor treatment */
 	"\016PERF"		/* Load MSR_PERF_GLOBAL_CTRL */
 	"\017PAT"		/* Load MSR_PAT */
 	"\020EFER"		/* Load MSR_EFER */
 	);
 	if (proc & PROCBASED_SECONDARY_CONTROLS &&
 	    (proc2 & (PROCBASED2_ENABLE_EPT | PROCBASED2_ENABLE_VPID)) != 0) {
 		msr = rdmsr(MSR_VMX_EPT_VPID_CAP);
 		mask = msr;
 		printf("\n        EPT Features=0x%b", mask,
 		"\020"
 		"\001XO"		/* Execute-only translations */
 		"\007PW4"		/* Page-walk length of 4 */
 		"\011UC"		/* EPT paging-structure mem can be UC */
 		"\017WB"		/* EPT paging-structure mem can be WB */
 		"\0212M"		/* EPT PDE can map a 2-Mbyte page */
 		"\0221G"		/* EPT PDPTE can map a 1-Gbyte page */
 		"\025INVEPT"		/* INVEPT is supported */
 		"\026AD"		/* Accessed and dirty flags for EPT */
 		"\032single"		/* INVEPT single-context type */
 		"\033all"		/* INVEPT all-context type */
 		);
 		mask = msr >> 32;
 		printf("\n        VPID Features=0x%b", mask,
 		"\020"
 		"\001INVVPID"		/* INVVPID is supported */
 		"\011individual"	/* INVVPID individual-address type */
 		"\012single"		/* INVVPID single-context type */
 		"\013all"		/* INVVPID all-context type */
 		 /* INVVPID single-context-retaining-globals type */
 		"\014single-globals"
 		);
 	}
 }
 
 static void
 print_hypervisor_info(void)
 {
 
 	if (*hv_vendor)
 		printf("Hypervisor: Origin = \"%s\"\n", hv_vendor);
 }
Index: head/sys/x86/x86/intr_machdep.c
===================================================================
--- head/sys/x86/x86/intr_machdep.c	(revision 282273)
+++ head/sys/x86/x86/intr_machdep.c	(revision 282274)
@@ -1,582 +1,575 @@
 /*-
  * Copyright (c) 2003 John Baldwin <jhb@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 /*
  * Machine dependent interrupt code for x86.  For x86, we have to
  * deal with different PICs.  Thus, we use the passed in vector to lookup
  * an interrupt source associated with that vector.  The interrupt source
  * describes which PIC the source belongs to and includes methods to handle
  * that source.
  */
 
 #include "opt_atpic.h"
 #include "opt_ddb.h"
 
 #include <sys/param.h>
 #include <sys/bus.h>
 #include <sys/interrupt.h>
 #include <sys/ktr.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/smp.h>
 #include <sys/syslog.h>
 #include <sys/systm.h>
 #include <machine/clock.h>
 #include <machine/intr_machdep.h>
 #include <machine/smp.h>
 #ifdef DDB
 #include <ddb/ddb.h>
 #endif
 
 #ifndef DEV_ATPIC
 #include <machine/segments.h>
 #include <machine/frame.h>
 #include <dev/ic/i8259.h>
 #include <x86/isa/icu.h>
 #ifdef PC98
 #include <pc98/cbus/cbus.h>
 #else
 #include <isa/isareg.h>
 #endif
 #endif
 
 #define	MAX_STRAY_LOG	5
 
 typedef void (*mask_fn)(void *);
 
 static int intrcnt_index;
 static struct intsrc *interrupt_sources[NUM_IO_INTS];
 static struct mtx intr_table_lock;
 static struct mtx intrcnt_lock;
 static TAILQ_HEAD(pics_head, pic) pics;
 
 #ifdef SMP
 static int assign_cpu;
 #endif
 
 u_long intrcnt[INTRCNT_COUNT];
 char intrnames[INTRCNT_COUNT * (MAXCOMLEN + 1)];
 size_t sintrcnt = sizeof(intrcnt);
 size_t sintrnames = sizeof(intrnames);
 
 static int	intr_assign_cpu(void *arg, int cpu);
 static void	intr_disable_src(void *arg);
 static void	intr_init(void *__dummy);
 static int	intr_pic_registered(struct pic *pic);
 static void	intrcnt_setname(const char *name, int index);
 static void	intrcnt_updatename(struct intsrc *is);
 static void	intrcnt_register(struct intsrc *is);
 
 static int
 intr_pic_registered(struct pic *pic)
 {
 	struct pic *p;
 
 	TAILQ_FOREACH(p, &pics, pics) {
 		if (p == pic)
 			return (1);
 	}
 	return (0);
 }
 
 /*
  * Register a new interrupt controller (PIC).  This is to support suspend
  * and resume where we suspend/resume controllers rather than individual
  * sources.  This also allows controllers with no active sources (such as
  * 8259As in a system using the APICs) to participate in suspend and resume.
  */
 int
 intr_register_pic(struct pic *pic)
 {
 	int error;
 
 	mtx_lock(&intr_table_lock);
 	if (intr_pic_registered(pic))
 		error = EBUSY;
 	else {
 		TAILQ_INSERT_TAIL(&pics, pic, pics);
 		error = 0;
 	}
 	mtx_unlock(&intr_table_lock);
 	return (error);
 }
 
 /*
  * Register a new interrupt source with the global interrupt system.
  * The global interrupts need to be disabled when this function is
  * called.
  */
 int
 intr_register_source(struct intsrc *isrc)
 {
 	int error, vector;
 
 	KASSERT(intr_pic_registered(isrc->is_pic), ("unregistered PIC"));
 	vector = isrc->is_pic->pic_vector(isrc);
 	if (interrupt_sources[vector] != NULL)
 		return (EEXIST);
 	error = intr_event_create(&isrc->is_event, isrc, 0, vector,
 	    intr_disable_src, (mask_fn)isrc->is_pic->pic_enable_source,
 	    (mask_fn)isrc->is_pic->pic_eoi_source, intr_assign_cpu, "irq%d:",
 	    vector);
 	if (error)
 		return (error);
 	mtx_lock(&intr_table_lock);
 	if (interrupt_sources[vector] != NULL) {
 		mtx_unlock(&intr_table_lock);
 		intr_event_destroy(isrc->is_event);
 		return (EEXIST);
 	}
 	intrcnt_register(isrc);
 	interrupt_sources[vector] = isrc;
 	isrc->is_handlers = 0;
 	mtx_unlock(&intr_table_lock);
 	return (0);
 }
 
 struct intsrc *
 intr_lookup_source(int vector)
 {
 
 	return (interrupt_sources[vector]);
 }
 
 int
 intr_add_handler(const char *name, int vector, driver_filter_t filter,
     driver_intr_t handler, void *arg, enum intr_type flags, void **cookiep)
 {
 	struct intsrc *isrc;
 	int error;
 
 	isrc = intr_lookup_source(vector);
 	if (isrc == NULL)
 		return (EINVAL);
 	error = intr_event_add_handler(isrc->is_event, name, filter, handler,
 	    arg, intr_priority(flags), flags, cookiep);
 	if (error == 0) {
 		mtx_lock(&intr_table_lock);
 		intrcnt_updatename(isrc);
 		isrc->is_handlers++;
 		if (isrc->is_handlers == 1) {
 			isrc->is_pic->pic_enable_intr(isrc);
 			isrc->is_pic->pic_enable_source(isrc);
 		}
 		mtx_unlock(&intr_table_lock);
 	}
 	return (error);
 }
 
 int
 intr_remove_handler(void *cookie)
 {
 	struct intsrc *isrc;
 	int error;
 
 	isrc = intr_handler_source(cookie);
 	error = intr_event_remove_handler(cookie);
 	if (error == 0) {
 		mtx_lock(&intr_table_lock);
 		isrc->is_handlers--;
 		if (isrc->is_handlers == 0) {
 			isrc->is_pic->pic_disable_source(isrc, PIC_NO_EOI);
 			isrc->is_pic->pic_disable_intr(isrc);
 		}
 		intrcnt_updatename(isrc);
 		mtx_unlock(&intr_table_lock);
 	}
 	return (error);
 }
 
 int
 intr_config_intr(int vector, enum intr_trigger trig, enum intr_polarity pol)
 {
 	struct intsrc *isrc;
 
 	isrc = intr_lookup_source(vector);
 	if (isrc == NULL)
 		return (EINVAL);
 	return (isrc->is_pic->pic_config_intr(isrc, trig, pol));
 }
 
 static void
 intr_disable_src(void *arg)
 {
 	struct intsrc *isrc;
 
 	isrc = arg;
 	isrc->is_pic->pic_disable_source(isrc, PIC_EOI);
 }
 
 void
 intr_execute_handlers(struct intsrc *isrc, struct trapframe *frame)
 {
 	struct intr_event *ie;
 	int vector;
 
 	/*
 	 * We count software interrupts when we process them.  The
 	 * code here follows previous practice, but there's an
 	 * argument for counting hardware interrupts when they're
 	 * processed too.
 	 */
 	(*isrc->is_count)++;
 	PCPU_INC(cnt.v_intr);
 
 	ie = isrc->is_event;
 
 	/*
 	 * XXX: We assume that IRQ 0 is only used for the ISA timer
 	 * device (clk).
 	 */
 	vector = isrc->is_pic->pic_vector(isrc);
 	if (vector == 0)
 		clkintr_pending = 1;
 
 	/*
 	 * For stray interrupts, mask and EOI the source, bump the
 	 * stray count, and log the condition.
 	 */
 	if (intr_event_handle(ie, frame) != 0) {
 		isrc->is_pic->pic_disable_source(isrc, PIC_EOI);
 		(*isrc->is_straycount)++;
 		if (*isrc->is_straycount < MAX_STRAY_LOG)
 			log(LOG_ERR, "stray irq%d\n", vector);
 		else if (*isrc->is_straycount == MAX_STRAY_LOG)
 			log(LOG_CRIT,
 			    "too many stray irq %d's: not logging anymore\n",
 			    vector);
 	}
 }
 
 void
 intr_resume(bool suspend_cancelled)
 {
 	struct pic *pic;
 
 #ifndef DEV_ATPIC
 	atpic_reset();
 #endif
 	mtx_lock(&intr_table_lock);
 	TAILQ_FOREACH(pic, &pics, pics) {
 		if (pic->pic_resume != NULL)
 			pic->pic_resume(pic, suspend_cancelled);
 	}
 	mtx_unlock(&intr_table_lock);
 }
 
 void
 intr_suspend(void)
 {
 	struct pic *pic;
 
 	mtx_lock(&intr_table_lock);
 	TAILQ_FOREACH_REVERSE(pic, &pics, pics_head, pics) {
 		if (pic->pic_suspend != NULL)
 			pic->pic_suspend(pic);
 	}
 	mtx_unlock(&intr_table_lock);
 }
 
 static int
 intr_assign_cpu(void *arg, int cpu)
 {
 #ifdef SMP
 	struct intsrc *isrc;
 	int error;
 
 	/*
 	 * Don't do anything during early boot.  We will pick up the
 	 * assignment once the APs are started.
 	 */
 	if (assign_cpu && cpu != NOCPU) {
 		isrc = arg;
 		mtx_lock(&intr_table_lock);
 		error = isrc->is_pic->pic_assign_cpu(isrc, cpu_apic_ids[cpu]);
 		mtx_unlock(&intr_table_lock);
 	} else
 		error = 0;
 	return (error);
 #else
 	return (EOPNOTSUPP);
 #endif
 }
 
 static void
 intrcnt_setname(const char *name, int index)
 {
 
 	snprintf(intrnames + (MAXCOMLEN + 1) * index, MAXCOMLEN + 1, "%-*s",
 	    MAXCOMLEN, name);
 }
 
 static void
 intrcnt_updatename(struct intsrc *is)
 {
 
 	intrcnt_setname(is->is_event->ie_fullname, is->is_index);
 }
 
 static void
 intrcnt_register(struct intsrc *is)
 {
 	char straystr[MAXCOMLEN + 1];
 
 	KASSERT(is->is_event != NULL, ("%s: isrc with no event", __func__));
 	mtx_lock_spin(&intrcnt_lock);
 	is->is_index = intrcnt_index;
 	intrcnt_index += 2;
 	snprintf(straystr, MAXCOMLEN + 1, "stray irq%d",
 	    is->is_pic->pic_vector(is));
 	intrcnt_updatename(is);
 	is->is_count = &intrcnt[is->is_index];
 	intrcnt_setname(straystr, is->is_index + 1);
 	is->is_straycount = &intrcnt[is->is_index + 1];
 	mtx_unlock_spin(&intrcnt_lock);
 }
 
 void
 intrcnt_add(const char *name, u_long **countp)
 {
 
 	mtx_lock_spin(&intrcnt_lock);
 	*countp = &intrcnt[intrcnt_index];
 	intrcnt_setname(name, intrcnt_index);
 	intrcnt_index++;
 	mtx_unlock_spin(&intrcnt_lock);
 }
 
 static void
 intr_init(void *dummy __unused)
 {
 
 	intrcnt_setname("???", 0);
 	intrcnt_index = 1;
 	TAILQ_INIT(&pics);
 	mtx_init(&intr_table_lock, "intr sources", NULL, MTX_DEF);
 	mtx_init(&intrcnt_lock, "intrcnt", NULL, MTX_SPIN);
 }
 SYSINIT(intr_init, SI_SUB_INTR, SI_ORDER_FIRST, intr_init, NULL);
 
 #ifndef DEV_ATPIC
 /* Initialize the two 8259A's to a known-good shutdown state. */
 void
 atpic_reset(void)
 {
 
 	outb(IO_ICU1, ICW1_RESET | ICW1_IC4);
 	outb(IO_ICU1 + ICU_IMR_OFFSET, IDT_IO_INTS);
 	outb(IO_ICU1 + ICU_IMR_OFFSET, IRQ_MASK(ICU_SLAVEID));
 	outb(IO_ICU1 + ICU_IMR_OFFSET, MASTER_MODE);
 	outb(IO_ICU1 + ICU_IMR_OFFSET, 0xff);
 	outb(IO_ICU1, OCW3_SEL | OCW3_RR);
 
 	outb(IO_ICU2, ICW1_RESET | ICW1_IC4);
 	outb(IO_ICU2 + ICU_IMR_OFFSET, IDT_IO_INTS + 8);
 	outb(IO_ICU2 + ICU_IMR_OFFSET, ICU_SLAVEID);
 	outb(IO_ICU2 + ICU_IMR_OFFSET, SLAVE_MODE);
 	outb(IO_ICU2 + ICU_IMR_OFFSET, 0xff);
 	outb(IO_ICU2, OCW3_SEL | OCW3_RR);
 }
 #endif
 
 /* Add a description to an active interrupt handler. */
 int
 intr_describe(u_int vector, void *ih, const char *descr)
 {
 	struct intsrc *isrc;
 	int error;
 
 	isrc = intr_lookup_source(vector);
 	if (isrc == NULL)
 		return (EINVAL);
 	error = intr_event_describe_handler(isrc->is_event, ih, descr);
 	if (error)
 		return (error);
 	intrcnt_updatename(isrc);
 	return (0);
 }
 
 void
 intr_reprogram(void)
 {
 	struct intsrc *is;
 	int v;
 
 	mtx_lock(&intr_table_lock);
 	for (v = 0; v < NUM_IO_INTS; v++) {
 		is = interrupt_sources[v];
 		if (is == NULL)
 			continue;
 		if (is->is_pic->pic_reprogram_pin != NULL)
 			is->is_pic->pic_reprogram_pin(is);
 	}
 	mtx_unlock(&intr_table_lock);
 }
 
 #ifdef DDB
 /*
  * Dump data about interrupt handlers
  */
 DB_SHOW_COMMAND(irqs, db_show_irqs)
 {
 	struct intsrc **isrc;
 	int i, verbose;
 
 	if (strcmp(modif, "v") == 0)
 		verbose = 1;
 	else
 		verbose = 0;
 	isrc = interrupt_sources;
 	for (i = 0; i < NUM_IO_INTS && !db_pager_quit; i++, isrc++)
 		if (*isrc != NULL)
 			db_dump_intr_event((*isrc)->is_event, verbose);
 }
 #endif
 
 #ifdef SMP
 /*
  * Support for balancing interrupt sources across CPUs.  For now we just
  * allocate CPUs round-robin.
  */
 
 static cpuset_t intr_cpus = CPUSET_T_INITIALIZER(0x1);
 static int current_cpu;
 
 /*
  * Return the CPU that the next interrupt source should use.  For now
  * this just returns the next local APIC according to round-robin.
  */
 u_int
 intr_next_cpu(void)
 {
 	u_int apic_id;
 
 	/* Leave all interrupts on the BSP during boot. */
 	if (!assign_cpu)
 		return (PCPU_GET(apic_id));
 
 	mtx_lock_spin(&icu_lock);
 	apic_id = cpu_apic_ids[current_cpu];
 	do {
 		current_cpu++;
 		if (current_cpu > mp_maxid)
 			current_cpu = 0;
 	} while (!CPU_ISSET(current_cpu, &intr_cpus));
 	mtx_unlock_spin(&icu_lock);
 	return (apic_id);
 }
 
 /* Attempt to bind the specified IRQ to the specified CPU. */
 int
 intr_bind(u_int vector, u_char cpu)
 {
 	struct intsrc *isrc;
 
 	isrc = intr_lookup_source(vector);
 	if (isrc == NULL)
 		return (EINVAL);
 	return (intr_event_bind(isrc->is_event, cpu));
 }
 
 /*
  * Add a CPU to our mask of valid CPUs that can be destinations of
  * interrupts.
  */
 void
 intr_add_cpu(u_int cpu)
 {
 
 	if (cpu >= MAXCPU)
 		panic("%s: Invalid CPU ID", __func__);
 	if (bootverbose)
 		printf("INTR: Adding local APIC %d as a target\n",
 		    cpu_apic_ids[cpu]);
 
 	CPU_SET(cpu, &intr_cpus);
 }
 
 /*
  * Distribute all the interrupt sources among the available CPUs once the
  * AP's have been launched.
  */
 static void
 intr_shuffle_irqs(void *arg __unused)
 {
 	struct intsrc *isrc;
 	int i;
 
-#ifdef XEN
-	/*
-	 * Doesn't work yet
-	 */
-	return;
-#endif
-
 	/* Don't bother on UP. */
 	if (mp_ncpus == 1)
 		return;
 
 	/* Round-robin assign a CPU to each enabled source. */
 	mtx_lock(&intr_table_lock);
 	assign_cpu = 1;
 	for (i = 0; i < NUM_IO_INTS; i++) {
 		isrc = interrupt_sources[i];
 		if (isrc != NULL && isrc->is_handlers > 0) {
 			/*
 			 * If this event is already bound to a CPU,
 			 * then assign the source to that CPU instead
 			 * of picking one via round-robin.  Note that
 			 * this is careful to only advance the
 			 * round-robin if the CPU assignment succeeds.
 			 */
 			if (isrc->is_event->ie_cpu != NOCPU)
 				(void)isrc->is_pic->pic_assign_cpu(isrc,
 				    cpu_apic_ids[isrc->is_event->ie_cpu]);
 			else if (isrc->is_pic->pic_assign_cpu(isrc,
 				cpu_apic_ids[current_cpu]) == 0)
 				(void)intr_next_cpu();
 
 		}
 	}
 	mtx_unlock(&intr_table_lock);
 }
 SYSINIT(intr_shuffle_irqs, SI_SUB_SMP, SI_ORDER_SECOND, intr_shuffle_irqs,
     NULL);
 #else
 /*
  * Always route interrupts to the current processor in the UP case.
  */
 u_int
 intr_next_cpu(void)
 {
 
 	return (PCPU_GET(apic_id));
 }
 #endif
Index: head/sys/x86/x86/local_apic.c
===================================================================
--- head/sys/x86/x86/local_apic.c	(revision 282273)
+++ head/sys/x86/x86/local_apic.c	(revision 282274)
@@ -1,1824 +1,1820 @@
 /*-
  * Copyright (c) 2003 John Baldwin <jhb@FreeBSD.org>
  * Copyright (c) 1996, by Steve Passe
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. The name of the developer may NOT be used to endorse or promote products
  *    derived from this software without specific prior written permission.
  * 3. Neither the name of the author nor the names of any co-contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 /*
  * Local APIC support on Pentium and later processors.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_atpic.h"
 #include "opt_hwpmc_hooks.h"
 
 #include "opt_ddb.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/bus.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/pcpu.h>
 #include <sys/proc.h>
 #include <sys/sched.h>
 #include <sys/smp.h>
 #include <sys/sysctl.h>
 #include <sys/timeet.h>
 
 #include <vm/vm.h>
 #include <vm/pmap.h>
 
 #include <x86/apicreg.h>
 #include <machine/cpufunc.h>
 #include <machine/cputypes.h>
 #include <machine/frame.h>
 #include <machine/intr_machdep.h>
 #include <x86/apicvar.h>
 #include <x86/mca.h>
 #include <machine/md_var.h>
 #include <machine/smp.h>
 #include <machine/specialreg.h>
 #include <x86/init.h>
 
 #ifdef DDB
 #include <sys/interrupt.h>
 #include <ddb/ddb.h>
 #endif
 
 #ifdef __amd64__
 #define	SDT_APIC	SDT_SYSIGT
 #define	SDT_APICT	SDT_SYSIGT
 #define	GSEL_APIC	0
 #else
 #define	SDT_APIC	SDT_SYS386IGT
 #define	SDT_APICT	SDT_SYS386TGT
 #define	GSEL_APIC	GSEL(GCODE_SEL, SEL_KPL)
 #endif
 
 /* Sanity checks on IDT vectors. */
 CTASSERT(APIC_IO_INTS + APIC_NUM_IOINTS == APIC_TIMER_INT);
 CTASSERT(APIC_TIMER_INT < APIC_LOCAL_INTS);
 CTASSERT(APIC_LOCAL_INTS == 240);
 CTASSERT(IPI_STOP < APIC_SPURIOUS_INT);
 
 /* Magic IRQ values for the timer and syscalls. */
 #define	IRQ_TIMER	(NUM_IO_INTS + 1)
 #define	IRQ_SYSCALL	(NUM_IO_INTS + 2)
 #define	IRQ_DTRACE_RET	(NUM_IO_INTS + 3)
 #define	IRQ_EVTCHN	(NUM_IO_INTS + 4)
 
 /*
  * Support for local APICs.  Local APICs manage interrupts on each
  * individual processor as opposed to I/O APICs which receive interrupts
  * from I/O devices and then forward them on to the local APICs.
  *
  * Local APICs can also send interrupts to each other thus providing the
  * mechanism for IPIs.
  */
 
 struct lvt {
 	u_int lvt_edgetrigger:1;
 	u_int lvt_activehi:1;
 	u_int lvt_masked:1;
 	u_int lvt_active:1;
 	u_int lvt_mode:16;
 	u_int lvt_vector:8;
 };
 
 struct lapic {
 	struct lvt la_lvts[APIC_LVT_MAX + 1];
 	u_int la_id:8;
 	u_int la_cluster:4;
 	u_int la_cluster_id:2;
 	u_int la_present:1;
 	u_long *la_timer_count;
 	u_long la_timer_period;
 	u_int la_timer_mode;
 	uint32_t lvt_timer_cache;
 	/* Include IDT_SYSCALL to make indexing easier. */
 	int la_ioint_irqs[APIC_NUM_IOINTS + 1];
 } static lapics[MAX_APIC_ID + 1];
 
 /* Global defaults for local APIC LVT entries. */
 static struct lvt lvts[APIC_LVT_MAX + 1] = {
 	{ 1, 1, 1, 1, APIC_LVT_DM_EXTINT, 0 },	/* LINT0: masked ExtINT */
 	{ 1, 1, 0, 1, APIC_LVT_DM_NMI, 0 },	/* LINT1: NMI */
 	{ 1, 1, 1, 1, APIC_LVT_DM_FIXED, APIC_TIMER_INT },	/* Timer */
 	{ 1, 1, 0, 1, APIC_LVT_DM_FIXED, APIC_ERROR_INT },	/* Error */
 	{ 1, 1, 1, 1, APIC_LVT_DM_NMI, 0 },	/* PMC */
 	{ 1, 1, 1, 1, APIC_LVT_DM_FIXED, APIC_THERMAL_INT },	/* Thermal */
 	{ 1, 1, 1, 1, APIC_LVT_DM_FIXED, APIC_CMC_INT },	/* CMCI */
 };
 
 static inthand_t *ioint_handlers[] = {
 	NULL,			/* 0 - 31 */
 	IDTVEC(apic_isr1),	/* 32 - 63 */
 	IDTVEC(apic_isr2),	/* 64 - 95 */
 	IDTVEC(apic_isr3),	/* 96 - 127 */
 	IDTVEC(apic_isr4),	/* 128 - 159 */
 	IDTVEC(apic_isr5),	/* 160 - 191 */
 	IDTVEC(apic_isr6),	/* 192 - 223 */
 	IDTVEC(apic_isr7),	/* 224 - 255 */
 };
 
 
 static u_int32_t lapic_timer_divisors[] = {
 	APIC_TDCR_1, APIC_TDCR_2, APIC_TDCR_4, APIC_TDCR_8, APIC_TDCR_16,
 	APIC_TDCR_32, APIC_TDCR_64, APIC_TDCR_128
 };
 
 extern inthand_t IDTVEC(rsvd);
 
 volatile char *lapic_map;
 vm_paddr_t lapic_paddr;
 int x2apic_mode;
 int lapic_eoi_suppression;
 static u_long lapic_timer_divisor;
 static struct eventtimer lapic_et;
 
 SYSCTL_NODE(_hw, OID_AUTO, apic, CTLFLAG_RD, 0, "APIC options");
 SYSCTL_INT(_hw_apic, OID_AUTO, x2apic_mode, CTLFLAG_RD, &x2apic_mode, 0, "");
 SYSCTL_INT(_hw_apic, OID_AUTO, eoi_suppression, CTLFLAG_RD,
     &lapic_eoi_suppression, 0, "");
 
 static uint32_t
 lapic_read32(enum LAPIC_REGISTERS reg)
 {
 	uint32_t res;
 
 	if (x2apic_mode) {
 		res = rdmsr32(MSR_APIC_000 + reg);
 	} else {
 		res = *(volatile uint32_t *)(lapic_map + reg * LAPIC_MEM_MUL);
 	}
 	return (res);
 }
 
 static void
 lapic_write32(enum LAPIC_REGISTERS reg, uint32_t val)
 {
 
 	if (x2apic_mode) {
 		mfence();
 		wrmsr(MSR_APIC_000 + reg, val);
 	} else {
 		*(volatile uint32_t *)(lapic_map + reg * LAPIC_MEM_MUL) = val;
 	}
 }
 
 static void
 lapic_write32_nofence(enum LAPIC_REGISTERS reg, uint32_t val)
 {
 
 	if (x2apic_mode) {
 		wrmsr(MSR_APIC_000 + reg, val);
 	} else {
 		*(volatile uint32_t *)(lapic_map + reg * LAPIC_MEM_MUL) = val;
 	}
 }
 
 static uint64_t
 lapic_read_icr(void)
 {
 	uint64_t v;
 	uint32_t vhi, vlo;
 
 	if (x2apic_mode) {
 		v = rdmsr(MSR_APIC_000 + LAPIC_ICR_LO);
 	} else {
 		vhi = lapic_read32(LAPIC_ICR_HI);
 		vlo = lapic_read32(LAPIC_ICR_LO);
 		v = ((uint64_t)vhi << 32) | vlo;
 	}
 	return (v);
 }
 
 static uint64_t
 lapic_read_icr_lo(void)
 {
 
 	return (lapic_read32(LAPIC_ICR_LO));
 }
 
 static void
 lapic_write_icr(uint32_t vhi, uint32_t vlo)
 {
 	uint64_t v;
 
 	if (x2apic_mode) {
 		v = ((uint64_t)vhi << 32) | vlo;
 		mfence();
 		wrmsr(MSR_APIC_000 + LAPIC_ICR_LO, v);
 	} else {
 		lapic_write32(LAPIC_ICR_HI, vhi);
 		lapic_write32(LAPIC_ICR_LO, vlo);
 	}
 }
 
 static void
 native_lapic_enable_x2apic(void)
 {
 	uint64_t apic_base;
 
 	apic_base = rdmsr(MSR_APICBASE);
 	apic_base |= APICBASE_X2APIC | APICBASE_ENABLED;
 	wrmsr(MSR_APICBASE, apic_base);
 }
 
 static void	lapic_enable(void);
 static void	lapic_resume(struct pic *pic, bool suspend_cancelled);
 static void	lapic_timer_oneshot(struct lapic *,
 		    u_int count, int enable_int);
 static void	lapic_timer_periodic(struct lapic *,
 		    u_int count, int enable_int);
 static void	lapic_timer_stop(struct lapic *);
 static void	lapic_timer_set_divisor(u_int divisor);
 static uint32_t	lvt_mode(struct lapic *la, u_int pin, uint32_t value);
 static int	lapic_et_start(struct eventtimer *et,
 		    sbintime_t first, sbintime_t period);
 static int	lapic_et_stop(struct eventtimer *et);
 static u_int	apic_idt_to_irq(u_int apic_id, u_int vector);
 static void	lapic_set_tpr(u_int vector);
 
 struct pic lapic_pic = { .pic_resume = lapic_resume };
 
 /* Forward declarations for apic_ops */
 static void	native_lapic_create(u_int apic_id, int boot_cpu);
 static void	native_lapic_init(vm_paddr_t addr);
 static void	native_lapic_xapic_mode(void);
 static void	native_lapic_setup(int boot);
 static void	native_lapic_dump(const char *str);
 static void	native_lapic_disable(void);
 static void	native_lapic_eoi(void);
 static int	native_lapic_id(void);
 static int	native_lapic_intr_pending(u_int vector);
 static u_int	native_apic_cpuid(u_int apic_id);
 static u_int	native_apic_alloc_vector(u_int apic_id, u_int irq);
 static u_int	native_apic_alloc_vectors(u_int apic_id, u_int *irqs,
 		    u_int count, u_int align);
 static void 	native_apic_disable_vector(u_int apic_id, u_int vector);
 static void 	native_apic_enable_vector(u_int apic_id, u_int vector);
 static void 	native_apic_free_vector(u_int apic_id, u_int vector, u_int irq);
 static void 	native_lapic_set_logical_id(u_int apic_id, u_int cluster,
 		    u_int cluster_id);
 static int 	native_lapic_enable_pmc(void);
 static void 	native_lapic_disable_pmc(void);
 static void 	native_lapic_reenable_pmc(void);
 static void 	native_lapic_enable_cmc(void);
 static void 	native_lapic_ipi_raw(register_t icrlo, u_int dest);
 static void 	native_lapic_ipi_vectored(u_int vector, int dest);
 static int 	native_lapic_ipi_wait(int delay);
 static int 	native_lapic_set_lvt_mask(u_int apic_id, u_int lvt,
 		    u_char masked);
 static int 	native_lapic_set_lvt_mode(u_int apic_id, u_int lvt,
 		    uint32_t mode);
 static int 	native_lapic_set_lvt_polarity(u_int apic_id, u_int lvt,
 		    enum intr_polarity pol);
 static int 	native_lapic_set_lvt_triggermode(u_int apic_id, u_int lvt,
 		    enum intr_trigger trigger);
 static int	native_lapic_ipi_alloc(inthand_t *ipifunc);
 static void	native_lapic_ipi_free(int vector);
 
 struct apic_ops apic_ops = {
 	.create			= native_lapic_create,
 	.init			= native_lapic_init,
 	.xapic_mode		= native_lapic_xapic_mode,
 	.setup			= native_lapic_setup,
 	.dump			= native_lapic_dump,
 	.disable		= native_lapic_disable,
 	.eoi			= native_lapic_eoi,
 	.id			= native_lapic_id,
 	.intr_pending		= native_lapic_intr_pending,
 	.set_logical_id		= native_lapic_set_logical_id,
 	.cpuid			= native_apic_cpuid,
 	.alloc_vector		= native_apic_alloc_vector,
 	.alloc_vectors		= native_apic_alloc_vectors,
 	.enable_vector		= native_apic_enable_vector,
 	.disable_vector		= native_apic_disable_vector,
 	.free_vector		= native_apic_free_vector,
 	.enable_pmc		= native_lapic_enable_pmc,
 	.disable_pmc		= native_lapic_disable_pmc,
 	.reenable_pmc		= native_lapic_reenable_pmc,
 	.enable_cmc		= native_lapic_enable_cmc,
 #ifdef SMP
 	.ipi_raw		= native_lapic_ipi_raw,
 	.ipi_vectored		= native_lapic_ipi_vectored,
 	.ipi_wait		= native_lapic_ipi_wait,
 	.ipi_alloc		= native_lapic_ipi_alloc,
 	.ipi_free		= native_lapic_ipi_free,
 #endif
 	.set_lvt_mask		= native_lapic_set_lvt_mask,
 	.set_lvt_mode		= native_lapic_set_lvt_mode,
 	.set_lvt_polarity	= native_lapic_set_lvt_polarity,
 	.set_lvt_triggermode	= native_lapic_set_lvt_triggermode,
 };
 
 static uint32_t
 lvt_mode(struct lapic *la, u_int pin, uint32_t value)
 {
 	struct lvt *lvt;
 
 	KASSERT(pin <= APIC_LVT_MAX, ("%s: pin %u out of range", __func__, pin));
 	if (la->la_lvts[pin].lvt_active)
 		lvt = &la->la_lvts[pin];
 	else
 		lvt = &lvts[pin];
 
 	value &= ~(APIC_LVT_M | APIC_LVT_TM | APIC_LVT_IIPP | APIC_LVT_DM |
 	    APIC_LVT_VECTOR);
 	if (lvt->lvt_edgetrigger == 0)
 		value |= APIC_LVT_TM;
 	if (lvt->lvt_activehi == 0)
 		value |= APIC_LVT_IIPP_INTALO;
 	if (lvt->lvt_masked)
 		value |= APIC_LVT_M;
 	value |= lvt->lvt_mode;
 	switch (lvt->lvt_mode) {
 	case APIC_LVT_DM_NMI:
 	case APIC_LVT_DM_SMI:
 	case APIC_LVT_DM_INIT:
 	case APIC_LVT_DM_EXTINT:
 		if (!lvt->lvt_edgetrigger && bootverbose) {
 			printf("lapic%u: Forcing LINT%u to edge trigger\n",
 			    la->la_id, pin);
 			value |= APIC_LVT_TM;
 		}
 		/* Use a vector of 0. */
 		break;
 	case APIC_LVT_DM_FIXED:
 		value |= lvt->lvt_vector;
 		break;
 	default:
 		panic("bad APIC LVT delivery mode: %#x\n", value);
 	}
 	return (value);
 }
 
 /*
  * Map the local APIC and setup necessary interrupt vectors.
  */
 static void
 native_lapic_init(vm_paddr_t addr)
 {
 	uint32_t ver;
 	u_int regs[4];
 	int i, arat;
 
 	/*
 	 * Enable x2APIC mode if possible. Map the local APIC
 	 * registers page.
 	 *
 	 * Keep the LAPIC registers page mapped uncached for x2APIC
 	 * mode too, to have direct map page attribute set to
 	 * uncached.  This is needed to work around CPU errata present
 	 * on all Intel processors.
 	 */
 	KASSERT(trunc_page(addr) == addr,
 	    ("local APIC not aligned on a page boundary"));
 	lapic_paddr = addr;
 	lapic_map = pmap_mapdev(addr, PAGE_SIZE);
 	if (x2apic_mode) {
 		native_lapic_enable_x2apic();
 		lapic_map = NULL;
 	}
 
 	/* Setup the spurious interrupt handler. */
 	setidt(APIC_SPURIOUS_INT, IDTVEC(spuriousint), SDT_APIC, SEL_KPL,
 	    GSEL_APIC);
 
 	/* Perform basic initialization of the BSP's local APIC. */
 	lapic_enable();
 
 	/* Set BSP's per-CPU local APIC ID. */
 	PCPU_SET(apic_id, lapic_id());
 
 	/* Local APIC timer interrupt. */
 	setidt(APIC_TIMER_INT, IDTVEC(timerint), SDT_APIC, SEL_KPL, GSEL_APIC);
 
 	/* Local APIC error interrupt. */
 	setidt(APIC_ERROR_INT, IDTVEC(errorint), SDT_APIC, SEL_KPL, GSEL_APIC);
 
 	/* XXX: Thermal interrupt */
 
 	/* Local APIC CMCI. */
 	setidt(APIC_CMC_INT, IDTVEC(cmcint), SDT_APICT, SEL_KPL, GSEL_APIC);
 
 	if ((resource_int_value("apic", 0, "clock", &i) != 0 || i != 0)) {
 		arat = 0;
 		/* Intel CPUID 0x06 EAX[2] set if APIC timer runs in C3. */
 		if (cpu_vendor_id == CPU_VENDOR_INTEL && cpu_high >= 6) {
 			do_cpuid(0x06, regs);
 			if ((regs[0] & CPUTPM1_ARAT) != 0)
 				arat = 1;
 		}
 		bzero(&lapic_et, sizeof(lapic_et));
 		lapic_et.et_name = "LAPIC";
 		lapic_et.et_flags = ET_FLAGS_PERIODIC | ET_FLAGS_ONESHOT |
 		    ET_FLAGS_PERCPU;
 		lapic_et.et_quality = 600;
 		if (!arat) {
 			lapic_et.et_flags |= ET_FLAGS_C3STOP;
 			lapic_et.et_quality -= 200;
 		}
 		lapic_et.et_frequency = 0;
 		/* We don't know frequency yet, so trying to guess. */
 		lapic_et.et_min_period = 0x00001000LL;
 		lapic_et.et_max_period = SBT_1S;
 		lapic_et.et_start = lapic_et_start;
 		lapic_et.et_stop = lapic_et_stop;
 		lapic_et.et_priv = NULL;
 		et_register(&lapic_et);
 	}
 
 	/*
 	 * Set lapic_eoi_suppression after lapic_enable(), to not
 	 * enable suppression in the hardware prematurely.  Note that
 	 * we by default enable suppression even when system only has
 	 * one IO-APIC, since EOI is broadcasted to all APIC agents,
 	 * including CPUs, otherwise.
 	 */
 	ver = lapic_read32(LAPIC_VERSION);
 	if ((ver & APIC_VER_EOI_SUPPRESSION) != 0) {
 		lapic_eoi_suppression = 1;
 		TUNABLE_INT_FETCH("hw.lapic_eoi_suppression",
 		    &lapic_eoi_suppression);
 	}
 }
 
 /*
  * Create a local APIC instance.
  */
 static void
 native_lapic_create(u_int apic_id, int boot_cpu)
 {
 	int i;
 
 	if (apic_id > MAX_APIC_ID) {
 		printf("APIC: Ignoring local APIC with ID %d\n", apic_id);
 		if (boot_cpu)
 			panic("Can't ignore BSP");
 		return;
 	}
 	KASSERT(!lapics[apic_id].la_present, ("duplicate local APIC %u",
 	    apic_id));
 
 	/*
 	 * Assume no local LVT overrides and a cluster of 0 and
 	 * intra-cluster ID of 0.
 	 */
 	lapics[apic_id].la_present = 1;
 	lapics[apic_id].la_id = apic_id;
 	for (i = 0; i <= APIC_LVT_MAX; i++) {
 		lapics[apic_id].la_lvts[i] = lvts[i];
 		lapics[apic_id].la_lvts[i].lvt_active = 0;
 	}
 	for (i = 0; i <= APIC_NUM_IOINTS; i++)
 	    lapics[apic_id].la_ioint_irqs[i] = -1;
 	lapics[apic_id].la_ioint_irqs[IDT_SYSCALL - APIC_IO_INTS] = IRQ_SYSCALL;
 	lapics[apic_id].la_ioint_irqs[APIC_TIMER_INT - APIC_IO_INTS] =
 	    IRQ_TIMER;
 #ifdef KDTRACE_HOOKS
 	lapics[apic_id].la_ioint_irqs[IDT_DTRACE_RET - APIC_IO_INTS] =
 	    IRQ_DTRACE_RET;
 #endif
 #ifdef XENHVM
 	lapics[apic_id].la_ioint_irqs[IDT_EVTCHN - APIC_IO_INTS] = IRQ_EVTCHN;
 #endif
 
 
 #ifdef SMP
 	cpu_add(apic_id, boot_cpu);
 #endif
 }
 
 /*
  * Dump contents of local APIC registers
  */
 static void
 native_lapic_dump(const char* str)
 {
 	uint32_t maxlvt;
 
 	maxlvt = (lapic_read32(LAPIC_VERSION) & APIC_VER_MAXLVT) >> MAXLVTSHIFT;
 	printf("cpu%d %s:\n", PCPU_GET(cpuid), str);
 	printf("     ID: 0x%08x   VER: 0x%08x LDR: 0x%08x DFR: 0x%08x",
 	    lapic_read32(LAPIC_ID), lapic_read32(LAPIC_VERSION),
 	    lapic_read32(LAPIC_LDR), x2apic_mode ? 0 : lapic_read32(LAPIC_DFR));
 	if ((cpu_feature2 & CPUID2_X2APIC) != 0)
 		printf(" x2APIC: %d", x2apic_mode);
 	printf("\n  lint0: 0x%08x lint1: 0x%08x TPR: 0x%08x SVR: 0x%08x\n",
 	    lapic_read32(LAPIC_LVT_LINT0), lapic_read32(LAPIC_LVT_LINT1),
 	    lapic_read32(LAPIC_TPR), lapic_read32(LAPIC_SVR));
 	printf("  timer: 0x%08x therm: 0x%08x err: 0x%08x",
 	    lapic_read32(LAPIC_LVT_TIMER), lapic_read32(LAPIC_LVT_THERMAL),
 	    lapic_read32(LAPIC_LVT_ERROR));
 	if (maxlvt >= APIC_LVT_PMC)
 		printf(" pmc: 0x%08x", lapic_read32(LAPIC_LVT_PCINT));
 	printf("\n");
 	if (maxlvt >= APIC_LVT_CMCI)
 		printf("   cmci: 0x%08x\n", lapic_read32(LAPIC_LVT_CMCI));
 }
 
 static void
 native_lapic_xapic_mode(void)
 {
 	register_t saveintr;
 
 	saveintr = intr_disable();
 	if (x2apic_mode)
 		native_lapic_enable_x2apic();
 	intr_restore(saveintr);
 }
 
 static void
 native_lapic_setup(int boot)
 {
 	struct lapic *la;
 	uint32_t maxlvt;
 	register_t saveintr;
 	char buf[MAXCOMLEN + 1];
 
 	saveintr = intr_disable();
 
 	la = &lapics[lapic_id()];
 	KASSERT(la->la_present, ("missing APIC structure"));
 	maxlvt = (lapic_read32(LAPIC_VERSION) & APIC_VER_MAXLVT) >> MAXLVTSHIFT;
 
 	/* Initialize the TPR to allow all interrupts. */
 	lapic_set_tpr(0);
 
 	/* Setup spurious vector and enable the local APIC. */
 	lapic_enable();
 
 	/* Program LINT[01] LVT entries. */
 	lapic_write32(LAPIC_LVT_LINT0, lvt_mode(la, APIC_LVT_LINT0,
 	    lapic_read32(LAPIC_LVT_LINT0)));
 	lapic_write32(LAPIC_LVT_LINT1, lvt_mode(la, APIC_LVT_LINT1,
 	    lapic_read32(LAPIC_LVT_LINT1)));
 
 	/* Program the PMC LVT entry if present. */
 	if (maxlvt >= APIC_LVT_PMC) {
 		lapic_write32(LAPIC_LVT_PCINT, lvt_mode(la, APIC_LVT_PMC,
 		    LAPIC_LVT_PCINT));
 	}
 
 	/* Program timer LVT and setup handler. */
 	la->lvt_timer_cache = lvt_mode(la, APIC_LVT_TIMER,
 	    lapic_read32(LAPIC_LVT_TIMER));
 	lapic_write32(LAPIC_LVT_TIMER, la->lvt_timer_cache);
 	if (boot) {
 		snprintf(buf, sizeof(buf), "cpu%d:timer", PCPU_GET(cpuid));
 		intrcnt_add(buf, &la->la_timer_count);
 	}
 
 	/* Setup the timer if configured. */
 	if (la->la_timer_mode != 0) {
 		KASSERT(la->la_timer_period != 0, ("lapic%u: zero divisor",
 		    lapic_id()));
 		lapic_timer_set_divisor(lapic_timer_divisor);
 		if (la->la_timer_mode == 1)
 			lapic_timer_periodic(la, la->la_timer_period, 1);
 		else
 			lapic_timer_oneshot(la, la->la_timer_period, 1);
 	}
 
 	/* Program error LVT and clear any existing errors. */
 	lapic_write32(LAPIC_LVT_ERROR, lvt_mode(la, APIC_LVT_ERROR,
 	    lapic_read32(LAPIC_LVT_ERROR)));
 	lapic_write32(LAPIC_ESR, 0);
 
 	/* XXX: Thermal LVT */
 
 	/* Program the CMCI LVT entry if present. */
 	if (maxlvt >= APIC_LVT_CMCI) {
 		lapic_write32(LAPIC_LVT_CMCI, lvt_mode(la, APIC_LVT_CMCI,
 		    lapic_read32(LAPIC_LVT_CMCI)));
 	}
 	    
 	intr_restore(saveintr);
 }
 
 static void
 native_lapic_reenable_pmc(void)
 {
 #ifdef HWPMC_HOOKS
 	uint32_t value;
 
 	value = lapic_read32(LAPIC_LVT_PCINT);
 	value &= ~APIC_LVT_M;
 	lapic_write32(LAPIC_LVT_PCINT, value);
 #endif
 }
 
 #ifdef HWPMC_HOOKS
 static void
 lapic_update_pmc(void *dummy)
 {
 	struct lapic *la;
 
 	la = &lapics[lapic_id()];
 	lapic_write32(LAPIC_LVT_PCINT, lvt_mode(la, APIC_LVT_PMC,
 	    lapic_read32(LAPIC_LVT_PCINT)));
 }
 #endif
 
 static int
 native_lapic_enable_pmc(void)
 {
 #ifdef HWPMC_HOOKS
 	u_int32_t maxlvt;
 
 	/* Fail if the local APIC is not present. */
 	if (!x2apic_mode && lapic_map == NULL)
 		return (0);
 
 	/* Fail if the PMC LVT is not present. */
 	maxlvt = (lapic_read32(LAPIC_VERSION) & APIC_VER_MAXLVT) >> MAXLVTSHIFT;
 	if (maxlvt < APIC_LVT_PMC)
 		return (0);
 
 	lvts[APIC_LVT_PMC].lvt_masked = 0;
 
 #ifdef SMP
 	/*
 	 * If hwpmc was loaded at boot time then the APs may not be
 	 * started yet.  In that case, don't forward the request to
 	 * them as they will program the lvt when they start.
 	 */
 	if (smp_started)
 		smp_rendezvous(NULL, lapic_update_pmc, NULL, NULL);
 	else
 #endif
 		lapic_update_pmc(NULL);
 	return (1);
 #else
 	return (0);
 #endif
 }
 
 static void
 native_lapic_disable_pmc(void)
 {
 #ifdef HWPMC_HOOKS
 	u_int32_t maxlvt;
 
 	/* Fail if the local APIC is not present. */
 	if (!x2apic_mode && lapic_map == NULL)
 		return;
 
 	/* Fail if the PMC LVT is not present. */
 	maxlvt = (lapic_read32(LAPIC_VERSION) & APIC_VER_MAXLVT) >> MAXLVTSHIFT;
 	if (maxlvt < APIC_LVT_PMC)
 		return;
 
 	lvts[APIC_LVT_PMC].lvt_masked = 1;
 
 #ifdef SMP
 	/* The APs should always be started when hwpmc is unloaded. */
 	KASSERT(mp_ncpus == 1 || smp_started, ("hwpmc unloaded too early"));
 #endif
 	smp_rendezvous(NULL, lapic_update_pmc, NULL, NULL);
 #endif
 }
 
 static int
 lapic_et_start(struct eventtimer *et, sbintime_t first, sbintime_t period)
 {
 	struct lapic *la;
 	u_long value;
 
 	la = &lapics[PCPU_GET(apic_id)];
 	if (et->et_frequency == 0) {
 		/* Start off with a divisor of 2 (power on reset default). */
 		lapic_timer_divisor = 2;
 		/* Try to calibrate the local APIC timer. */
 		do {
 			lapic_timer_set_divisor(lapic_timer_divisor);
 			lapic_timer_oneshot(la, APIC_TIMER_MAX_COUNT, 0);
 			DELAY(1000000);
 			value = APIC_TIMER_MAX_COUNT -
 			    lapic_read32(LAPIC_CCR_TIMER);
 			if (value != APIC_TIMER_MAX_COUNT)
 				break;
 			lapic_timer_divisor <<= 1;
 		} while (lapic_timer_divisor <= 128);
 		if (lapic_timer_divisor > 128)
 			panic("lapic: Divisor too big");
 		if (bootverbose)
 			printf("lapic: Divisor %lu, Frequency %lu Hz\n",
 			    lapic_timer_divisor, value);
 		et->et_frequency = value;
 		et->et_min_period = (0x00000002LLU << 32) / et->et_frequency;
 		et->et_max_period = (0xfffffffeLLU << 32) / et->et_frequency;
 	}
 	if (la->la_timer_mode == 0)
 		lapic_timer_set_divisor(lapic_timer_divisor);
 	if (period != 0) {
 		la->la_timer_mode = 1;
 		la->la_timer_period = ((uint32_t)et->et_frequency * period) >> 32;
 		lapic_timer_periodic(la, la->la_timer_period, 1);
 	} else {
 		la->la_timer_mode = 2;
 		la->la_timer_period = ((uint32_t)et->et_frequency * first) >> 32;
 		lapic_timer_oneshot(la, la->la_timer_period, 1);
 	}
 	return (0);
 }
 
 static int
 lapic_et_stop(struct eventtimer *et)
 {
 	struct lapic *la = &lapics[PCPU_GET(apic_id)];
 
 	la->la_timer_mode = 0;
 	lapic_timer_stop(la);
 	return (0);
 }
 
 static void
 native_lapic_disable(void)
 {
 	uint32_t value;
 
 	/* Software disable the local APIC. */
 	value = lapic_read32(LAPIC_SVR);
 	value &= ~APIC_SVR_SWEN;
 	lapic_write32(LAPIC_SVR, value);
 }
 
 static void
 lapic_enable(void)
 {
 	uint32_t value;
 
 	/* Program the spurious vector to enable the local APIC. */
 	value = lapic_read32(LAPIC_SVR);
 	value &= ~(APIC_SVR_VECTOR | APIC_SVR_FOCUS);
 	value |= APIC_SVR_FEN | APIC_SVR_SWEN | APIC_SPURIOUS_INT;
 	if (lapic_eoi_suppression)
 		value |= APIC_SVR_EOI_SUPPRESSION;
 	lapic_write32(LAPIC_SVR, value);
 }
 
 /* Reset the local APIC on the BSP during resume. */
 static void
 lapic_resume(struct pic *pic, bool suspend_cancelled)
 {
 
 	lapic_setup(0);
 }
 
 static int
 native_lapic_id(void)
 {
 	uint32_t v;
 
 	KASSERT(x2apic_mode || lapic_map != NULL, ("local APIC is not mapped"));
 	v = lapic_read32(LAPIC_ID);
 	if (!x2apic_mode)
 		v >>= APIC_ID_SHIFT;
 	return (v);
 }
 
 static int
 native_lapic_intr_pending(u_int vector)
 {
 	uint32_t irr;
 
 	/*
 	 * The IRR registers are an array of registers each of which
 	 * only describes 32 interrupts in the low 32 bits.  Thus, we
 	 * divide the vector by 32 to get the register index.
 	 * Finally, we modulus the vector by 32 to determine the
 	 * individual bit to test.
 	 */
 	irr = lapic_read32(LAPIC_IRR0 + vector / 32);
 	return (irr & 1 << (vector % 32));
 }
 
 static void
 native_lapic_set_logical_id(u_int apic_id, u_int cluster, u_int cluster_id)
 {
 	struct lapic *la;
 
 	KASSERT(lapics[apic_id].la_present, ("%s: APIC %u doesn't exist",
 	    __func__, apic_id));
 	KASSERT(cluster <= APIC_MAX_CLUSTER, ("%s: cluster %u too big",
 	    __func__, cluster));
 	KASSERT(cluster_id <= APIC_MAX_INTRACLUSTER_ID,
 	    ("%s: intra cluster id %u too big", __func__, cluster_id));
 	la = &lapics[apic_id];
 	la->la_cluster = cluster;
 	la->la_cluster_id = cluster_id;
 }
 
 static int
 native_lapic_set_lvt_mask(u_int apic_id, u_int pin, u_char masked)
 {
 
 	if (pin > APIC_LVT_MAX)
 		return (EINVAL);
 	if (apic_id == APIC_ID_ALL) {
 		lvts[pin].lvt_masked = masked;
 		if (bootverbose)
 			printf("lapic:");
 	} else {
 		KASSERT(lapics[apic_id].la_present,
 		    ("%s: missing APIC %u", __func__, apic_id));
 		lapics[apic_id].la_lvts[pin].lvt_masked = masked;
 		lapics[apic_id].la_lvts[pin].lvt_active = 1;
 		if (bootverbose)
 			printf("lapic%u:", apic_id);
 	}
 	if (bootverbose)
 		printf(" LINT%u %s\n", pin, masked ? "masked" : "unmasked");
 	return (0);
 }
 
 static int
 native_lapic_set_lvt_mode(u_int apic_id, u_int pin, u_int32_t mode)
 {
 	struct lvt *lvt;
 
 	if (pin > APIC_LVT_MAX)
 		return (EINVAL);
 	if (apic_id == APIC_ID_ALL) {
 		lvt = &lvts[pin];
 		if (bootverbose)
 			printf("lapic:");
 	} else {
 		KASSERT(lapics[apic_id].la_present,
 		    ("%s: missing APIC %u", __func__, apic_id));
 		lvt = &lapics[apic_id].la_lvts[pin];
 		lvt->lvt_active = 1;
 		if (bootverbose)
 			printf("lapic%u:", apic_id);
 	}
 	lvt->lvt_mode = mode;
 	switch (mode) {
 	case APIC_LVT_DM_NMI:
 	case APIC_LVT_DM_SMI:
 	case APIC_LVT_DM_INIT:
 	case APIC_LVT_DM_EXTINT:
 		lvt->lvt_edgetrigger = 1;
 		lvt->lvt_activehi = 1;
 		if (mode == APIC_LVT_DM_EXTINT)
 			lvt->lvt_masked = 1;
 		else
 			lvt->lvt_masked = 0;
 		break;
 	default:
 		panic("Unsupported delivery mode: 0x%x\n", mode);
 	}
 	if (bootverbose) {
 		printf(" Routing ");
 		switch (mode) {
 		case APIC_LVT_DM_NMI:
 			printf("NMI");
 			break;
 		case APIC_LVT_DM_SMI:
 			printf("SMI");
 			break;
 		case APIC_LVT_DM_INIT:
 			printf("INIT");
 			break;
 		case APIC_LVT_DM_EXTINT:
 			printf("ExtINT");
 			break;
 		}
 		printf(" -> LINT%u\n", pin);
 	}
 	return (0);
 }
 
 static int
 native_lapic_set_lvt_polarity(u_int apic_id, u_int pin, enum intr_polarity pol)
 {
 
 	if (pin > APIC_LVT_MAX || pol == INTR_POLARITY_CONFORM)
 		return (EINVAL);
 	if (apic_id == APIC_ID_ALL) {
 		lvts[pin].lvt_activehi = (pol == INTR_POLARITY_HIGH);
 		if (bootverbose)
 			printf("lapic:");
 	} else {
 		KASSERT(lapics[apic_id].la_present,
 		    ("%s: missing APIC %u", __func__, apic_id));
 		lapics[apic_id].la_lvts[pin].lvt_active = 1;
 		lapics[apic_id].la_lvts[pin].lvt_activehi =
 		    (pol == INTR_POLARITY_HIGH);
 		if (bootverbose)
 			printf("lapic%u:", apic_id);
 	}
 	if (bootverbose)
 		printf(" LINT%u polarity: %s\n", pin,
 		    pol == INTR_POLARITY_HIGH ? "high" : "low");
 	return (0);
 }
 
 static int
 native_lapic_set_lvt_triggermode(u_int apic_id, u_int pin,
      enum intr_trigger trigger)
 {
 
 	if (pin > APIC_LVT_MAX || trigger == INTR_TRIGGER_CONFORM)
 		return (EINVAL);
 	if (apic_id == APIC_ID_ALL) {
 		lvts[pin].lvt_edgetrigger = (trigger == INTR_TRIGGER_EDGE);
 		if (bootverbose)
 			printf("lapic:");
 	} else {
 		KASSERT(lapics[apic_id].la_present,
 		    ("%s: missing APIC %u", __func__, apic_id));
 		lapics[apic_id].la_lvts[pin].lvt_edgetrigger =
 		    (trigger == INTR_TRIGGER_EDGE);
 		lapics[apic_id].la_lvts[pin].lvt_active = 1;
 		if (bootverbose)
 			printf("lapic%u:", apic_id);
 	}
 	if (bootverbose)
 		printf(" LINT%u trigger: %s\n", pin,
 		    trigger == INTR_TRIGGER_EDGE ? "edge" : "level");
 	return (0);
 }
 
 /*
  * Adjust the TPR of the current CPU so that it blocks all interrupts below
  * the passed in vector.
  */
 static void
 lapic_set_tpr(u_int vector)
 {
 #ifdef CHEAP_TPR
 	lapic_write32(LAPIC_TPR, vector);
 #else
 	uint32_t tpr;
 
 	tpr = lapic_read32(LAPIC_TPR) & ~APIC_TPR_PRIO;
 	tpr |= vector;
 	lapic_write32(LAPIC_TPR, tpr);
 #endif
 }
 
 static void
 native_lapic_eoi(void)
 {
 
 	lapic_write32_nofence(LAPIC_EOI, 0);
 }
 
 void
 lapic_handle_intr(int vector, struct trapframe *frame)
 {
 	struct intsrc *isrc;
 
 	isrc = intr_lookup_source(apic_idt_to_irq(PCPU_GET(apic_id),
 	    vector));
 	intr_execute_handlers(isrc, frame);
 }
 
 void
 lapic_handle_timer(struct trapframe *frame)
 {
 	struct lapic *la;
 	struct trapframe *oldframe;
 	struct thread *td;
 
 	/* Send EOI first thing. */
 	lapic_eoi();
 
 #if defined(SMP) && !defined(SCHED_ULE)
 	/*
 	 * Don't do any accounting for the disabled HTT cores, since it
 	 * will provide misleading numbers for the userland.
 	 *
 	 * No locking is necessary here, since even if we lose the race
 	 * when hlt_cpus_mask changes it is not a big deal, really.
 	 *
 	 * Don't do that for ULE, since ULE doesn't consider hlt_cpus_mask
 	 * and unlike other schedulers it actually schedules threads to
 	 * those CPUs.
 	 */
 	if (CPU_ISSET(PCPU_GET(cpuid), &hlt_cpus_mask))
 		return;
 #endif
 
 	/* Look up our local APIC structure for the tick counters. */
 	la = &lapics[PCPU_GET(apic_id)];
 	(*la->la_timer_count)++;
 	critical_enter();
 	if (lapic_et.et_active) {
 		td = curthread;
 		td->td_intr_nesting_level++;
 		oldframe = td->td_intr_frame;
 		td->td_intr_frame = frame;
 		lapic_et.et_event_cb(&lapic_et, lapic_et.et_arg);
 		td->td_intr_frame = oldframe;
 		td->td_intr_nesting_level--;
 	}
 	critical_exit();
 }
 
 static void
 lapic_timer_set_divisor(u_int divisor)
 {
 
 	KASSERT(powerof2(divisor), ("lapic: invalid divisor %u", divisor));
 	KASSERT(ffs(divisor) <= sizeof(lapic_timer_divisors) /
 	    sizeof(u_int32_t), ("lapic: invalid divisor %u", divisor));
 	lapic_write32(LAPIC_DCR_TIMER, lapic_timer_divisors[ffs(divisor) - 1]);
 }
 
 static void
 lapic_timer_oneshot(struct lapic *la, u_int count, int enable_int)
 {
 	uint32_t value;
 
 	value = la->lvt_timer_cache;
 	value &= ~APIC_LVTT_TM;
 	value |= APIC_LVTT_TM_ONE_SHOT;
 	if (enable_int)
 		value &= ~APIC_LVT_M;
 	lapic_write32(LAPIC_LVT_TIMER, value);
 	lapic_write32(LAPIC_ICR_TIMER, count);
 }
 
 static void
 lapic_timer_periodic(struct lapic *la, u_int count, int enable_int)
 {
 	uint32_t value;
 
 	value = la->lvt_timer_cache;
 	value &= ~APIC_LVTT_TM;
 	value |= APIC_LVTT_TM_PERIODIC;
 	if (enable_int)
 		value &= ~APIC_LVT_M;
 	lapic_write32(LAPIC_LVT_TIMER, value);
 	lapic_write32(LAPIC_ICR_TIMER, count);
 }
 
 static void
 lapic_timer_stop(struct lapic *la)
 {
 	uint32_t value;
 
 	value = la->lvt_timer_cache;
 	value &= ~APIC_LVTT_TM;
 	value |= APIC_LVT_M;
 	lapic_write32(LAPIC_LVT_TIMER, value);
 }
 
 void
 lapic_handle_cmc(void)
 {
 
 	lapic_eoi();
 	cmc_intr();
 }
 
 /*
  * Called from the mca_init() to activate the CMC interrupt if this CPU is
  * responsible for monitoring any MC banks for CMC events.  Since mca_init()
  * is called prior to lapic_setup() during boot, this just needs to unmask
  * this CPU's LVT_CMCI entry.
  */
 static void
 native_lapic_enable_cmc(void)
 {
 	u_int apic_id;
 
 #ifdef DEV_ATPIC
 	if (!x2apic_mode && lapic_map == NULL)
 		return;
 #endif
 	apic_id = PCPU_GET(apic_id);
 	KASSERT(lapics[apic_id].la_present,
 	    ("%s: missing APIC %u", __func__, apic_id));
 	lapics[apic_id].la_lvts[APIC_LVT_CMCI].lvt_masked = 0;
 	lapics[apic_id].la_lvts[APIC_LVT_CMCI].lvt_active = 1;
 	if (bootverbose)
 		printf("lapic%u: CMCI unmasked\n", apic_id);
 }
 
 void
 lapic_handle_error(void)
 {
 	uint32_t esr;
 
 	/*
 	 * Read the contents of the error status register.  Write to
 	 * the register first before reading from it to force the APIC
 	 * to update its value to indicate any errors that have
 	 * occurred since the previous write to the register.
 	 */
 	lapic_write32(LAPIC_ESR, 0);
 	esr = lapic_read32(LAPIC_ESR);
 
 	printf("CPU%d: local APIC error 0x%x\n", PCPU_GET(cpuid), esr);
 	lapic_eoi();
 }
 
 static u_int
 native_apic_cpuid(u_int apic_id)
 {
 #ifdef SMP
 	return apic_cpuids[apic_id];
 #else
 	return 0;
 #endif
 }
 
 /* Request a free IDT vector to be used by the specified IRQ. */
 static u_int
 native_apic_alloc_vector(u_int apic_id, u_int irq)
 {
 	u_int vector;
 
 	KASSERT(irq < NUM_IO_INTS, ("Invalid IRQ %u", irq));
 
 	/*
 	 * Search for a free vector.  Currently we just use a very simple
 	 * algorithm to find the first free vector.
 	 */
 	mtx_lock_spin(&icu_lock);
 	for (vector = 0; vector < APIC_NUM_IOINTS; vector++) {
 		if (lapics[apic_id].la_ioint_irqs[vector] != -1)
 			continue;
 		lapics[apic_id].la_ioint_irqs[vector] = irq;
 		mtx_unlock_spin(&icu_lock);
 		return (vector + APIC_IO_INTS);
 	}
 	mtx_unlock_spin(&icu_lock);
 	return (0);
 }
 
 /*
  * Request 'count' free contiguous IDT vectors to be used by 'count'
  * IRQs.  'count' must be a power of two and the vectors will be
  * aligned on a boundary of 'align'.  If the request cannot be
  * satisfied, 0 is returned.
  */
 static u_int
 native_apic_alloc_vectors(u_int apic_id, u_int *irqs, u_int count, u_int align)
 {
 	u_int first, run, vector;
 
 	KASSERT(powerof2(count), ("bad count"));
 	KASSERT(powerof2(align), ("bad align"));
 	KASSERT(align >= count, ("align < count"));
 #ifdef INVARIANTS
 	for (run = 0; run < count; run++)
 		KASSERT(irqs[run] < NUM_IO_INTS, ("Invalid IRQ %u at index %u",
 		    irqs[run], run));
 #endif
 
 	/*
 	 * Search for 'count' free vectors.  As with apic_alloc_vector(),
 	 * this just uses a simple first fit algorithm.
 	 */
 	run = 0;
 	first = 0;
 	mtx_lock_spin(&icu_lock);
 	for (vector = 0; vector < APIC_NUM_IOINTS; vector++) {
 
 		/* Vector is in use, end run. */
 		if (lapics[apic_id].la_ioint_irqs[vector] != -1) {
 			run = 0;
 			first = 0;
 			continue;
 		}
 
 		/* Start a new run if run == 0 and vector is aligned. */
 		if (run == 0) {
 			if ((vector & (align - 1)) != 0)
 				continue;
 			first = vector;
 		}
 		run++;
 
 		/* Keep looping if the run isn't long enough yet. */
 		if (run < count)
 			continue;
 
 		/* Found a run, assign IRQs and return the first vector. */
 		for (vector = 0; vector < count; vector++)
 			lapics[apic_id].la_ioint_irqs[first + vector] =
 			    irqs[vector];
 		mtx_unlock_spin(&icu_lock);
 		return (first + APIC_IO_INTS);
 	}
 	mtx_unlock_spin(&icu_lock);
 	printf("APIC: Couldn't find APIC vectors for %u IRQs\n", count);
 	return (0);
 }
 
 /*
  * Enable a vector for a particular apic_id.  Since all lapics share idt
  * entries and ioint_handlers this enables the vector on all lapics.  lapics
  * which do not have the vector configured would report spurious interrupts
  * should it fire.
  */
 static void
 native_apic_enable_vector(u_int apic_id, u_int vector)
 {
 
 	KASSERT(vector != IDT_SYSCALL, ("Attempt to overwrite syscall entry"));
 	KASSERT(ioint_handlers[vector / 32] != NULL,
 	    ("No ISR handler for vector %u", vector));
 #ifdef KDTRACE_HOOKS
 	KASSERT(vector != IDT_DTRACE_RET,
 	    ("Attempt to overwrite DTrace entry"));
 #endif
 	setidt(vector, ioint_handlers[vector / 32], SDT_APIC, SEL_KPL,
 	    GSEL_APIC);
 }
 
 static void
 native_apic_disable_vector(u_int apic_id, u_int vector)
 {
 
 	KASSERT(vector != IDT_SYSCALL, ("Attempt to overwrite syscall entry"));
 #ifdef KDTRACE_HOOKS
 	KASSERT(vector != IDT_DTRACE_RET,
 	    ("Attempt to overwrite DTrace entry"));
 #endif
 	KASSERT(ioint_handlers[vector / 32] != NULL,
 	    ("No ISR handler for vector %u", vector));
 #ifdef notyet
 	/*
 	 * We can not currently clear the idt entry because other cpus
 	 * may have a valid vector at this offset.
 	 */
 	setidt(vector, &IDTVEC(rsvd), SDT_APICT, SEL_KPL, GSEL_APIC);
 #endif
 }
 
 /* Release an APIC vector when it's no longer in use. */
 static void
 native_apic_free_vector(u_int apic_id, u_int vector, u_int irq)
 {
 	struct thread *td;
 
 	KASSERT(vector >= APIC_IO_INTS && vector != IDT_SYSCALL &&
 	    vector <= APIC_IO_INTS + APIC_NUM_IOINTS,
 	    ("Vector %u does not map to an IRQ line", vector));
 	KASSERT(irq < NUM_IO_INTS, ("Invalid IRQ %u", irq));
 	KASSERT(lapics[apic_id].la_ioint_irqs[vector - APIC_IO_INTS] ==
 	    irq, ("IRQ mismatch"));
 #ifdef KDTRACE_HOOKS
 	KASSERT(vector != IDT_DTRACE_RET,
 	    ("Attempt to overwrite DTrace entry"));
 #endif
 
 	/*
 	 * Bind us to the cpu that owned the vector before freeing it so
 	 * we don't lose an interrupt delivery race.
 	 */
 	td = curthread;
 	if (!rebooting) {
 		thread_lock(td);
 		if (sched_is_bound(td))
 			panic("apic_free_vector: Thread already bound.\n");
 		sched_bind(td, apic_cpuid(apic_id));
 		thread_unlock(td);
 	}
 	mtx_lock_spin(&icu_lock);
 	lapics[apic_id].la_ioint_irqs[vector - APIC_IO_INTS] = -1;
 	mtx_unlock_spin(&icu_lock);
 	if (!rebooting) {
 		thread_lock(td);
 		sched_unbind(td);
 		thread_unlock(td);
 	}
 }
 
 /* Map an IDT vector (APIC) to an IRQ (interrupt source). */
 static u_int
 apic_idt_to_irq(u_int apic_id, u_int vector)
 {
 	int irq;
 
 	KASSERT(vector >= APIC_IO_INTS && vector != IDT_SYSCALL &&
 	    vector <= APIC_IO_INTS + APIC_NUM_IOINTS,
 	    ("Vector %u does not map to an IRQ line", vector));
 #ifdef KDTRACE_HOOKS
 	KASSERT(vector != IDT_DTRACE_RET,
 	    ("Attempt to overwrite DTrace entry"));
 #endif
 	irq = lapics[apic_id].la_ioint_irqs[vector - APIC_IO_INTS];
 	if (irq < 0)
 		irq = 0;
 	return (irq);
 }
 
 #ifdef DDB
 /*
  * Dump data about APIC IDT vector mappings.
  */
 DB_SHOW_COMMAND(apic, db_show_apic)
 {
 	struct intsrc *isrc;
 	int i, verbose;
 	u_int apic_id;
 	u_int irq;
 
 	if (strcmp(modif, "vv") == 0)
 		verbose = 2;
 	else if (strcmp(modif, "v") == 0)
 		verbose = 1;
 	else
 		verbose = 0;
 	for (apic_id = 0; apic_id <= MAX_APIC_ID; apic_id++) {
 		if (lapics[apic_id].la_present == 0)
 			continue;
 		db_printf("Interrupts bound to lapic %u\n", apic_id);
 		for (i = 0; i < APIC_NUM_IOINTS + 1 && !db_pager_quit; i++) {
 			irq = lapics[apic_id].la_ioint_irqs[i];
 			if (irq == -1 || irq == IRQ_SYSCALL)
 				continue;
 #ifdef KDTRACE_HOOKS
 			if (irq == IRQ_DTRACE_RET)
 				continue;
 #endif
 #ifdef XENHVM
 			if (irq == IRQ_EVTCHN)
 				continue;
 #endif
 			db_printf("vec 0x%2x -> ", i + APIC_IO_INTS);
 			if (irq == IRQ_TIMER)
 				db_printf("lapic timer\n");
 			else if (irq < NUM_IO_INTS) {
 				isrc = intr_lookup_source(irq);
 				if (isrc == NULL || verbose == 0)
 					db_printf("IRQ %u\n", irq);
 				else
 					db_dump_intr_event(isrc->is_event,
 					    verbose == 2);
 			} else
 				db_printf("IRQ %u ???\n", irq);
 		}
 	}
 }
 
 static void
 dump_mask(const char *prefix, uint32_t v, int base)
 {
 	int i, first;
 
 	first = 1;
 	for (i = 0; i < 32; i++)
 		if (v & (1 << i)) {
 			if (first) {
 				db_printf("%s:", prefix);
 				first = 0;
 			}
 			db_printf(" %02x", base + i);
 		}
 	if (!first)
 		db_printf("\n");
 }
 
 /* Show info from the lapic regs for this CPU. */
 DB_SHOW_COMMAND(lapic, db_show_lapic)
 {
 	uint32_t v;
 
 	db_printf("lapic ID = %d\n", lapic_id());
 	v = lapic_read32(LAPIC_VERSION);
 	db_printf("version  = %d.%d\n", (v & APIC_VER_VERSION) >> 4,
 	    v & 0xf);
 	db_printf("max LVT  = %d\n", (v & APIC_VER_MAXLVT) >> MAXLVTSHIFT);
 	v = lapic_read32(LAPIC_SVR);
 	db_printf("SVR      = %02x (%s)\n", v & APIC_SVR_VECTOR,
 	    v & APIC_SVR_ENABLE ? "enabled" : "disabled");
 	db_printf("TPR      = %02x\n", lapic_read32(LAPIC_TPR));
 
 #define dump_field(prefix, regn, index)					\
 	dump_mask(__XSTRING(prefix ## index), 				\
 	    lapic_read32(LAPIC_ ## regn ## index),			\
 	    index * 32)
 
 	db_printf("In-service Interrupts:\n");
 	dump_field(isr, ISR, 0);
 	dump_field(isr, ISR, 1);
 	dump_field(isr, ISR, 2);
 	dump_field(isr, ISR, 3);
 	dump_field(isr, ISR, 4);
 	dump_field(isr, ISR, 5);
 	dump_field(isr, ISR, 6);
 	dump_field(isr, ISR, 7);
 
 	db_printf("TMR Interrupts:\n");
 	dump_field(tmr, TMR, 0);
 	dump_field(tmr, TMR, 1);
 	dump_field(tmr, TMR, 2);
 	dump_field(tmr, TMR, 3);
 	dump_field(tmr, TMR, 4);
 	dump_field(tmr, TMR, 5);
 	dump_field(tmr, TMR, 6);
 	dump_field(tmr, TMR, 7);
 
 	db_printf("IRR Interrupts:\n");
 	dump_field(irr, IRR, 0);
 	dump_field(irr, IRR, 1);
 	dump_field(irr, IRR, 2);
 	dump_field(irr, IRR, 3);
 	dump_field(irr, IRR, 4);
 	dump_field(irr, IRR, 5);
 	dump_field(irr, IRR, 6);
 	dump_field(irr, IRR, 7);
 
 #undef dump_field
 }
 #endif
 
 /*
  * APIC probing support code.  This includes code to manage enumerators.
  */
 
 static SLIST_HEAD(, apic_enumerator) enumerators =
 	SLIST_HEAD_INITIALIZER(enumerators);
 static struct apic_enumerator *best_enum;
 
 void
 apic_register_enumerator(struct apic_enumerator *enumerator)
 {
 #ifdef INVARIANTS
 	struct apic_enumerator *apic_enum;
 
 	SLIST_FOREACH(apic_enum, &enumerators, apic_next) {
 		if (apic_enum == enumerator)
 			panic("%s: Duplicate register of %s", __func__,
 			    enumerator->apic_name);
 	}
 #endif
 	SLIST_INSERT_HEAD(&enumerators, enumerator, apic_next);
 }
 
 /*
  * We have to look for CPU's very, very early because certain subsystems
  * want to know how many CPU's we have extremely early on in the boot
  * process.
  */
 static void
 apic_init(void *dummy __unused)
 {
 	struct apic_enumerator *enumerator;
 	int retval, best;
 
 	/* We only support built in local APICs. */
 	if (!(cpu_feature & CPUID_APIC))
 		return;
 
 	/* Don't probe if APIC mode is disabled. */
 	if (resource_disabled("apic", 0))
 		return;
 
 	/* Probe all the enumerators to find the best match. */
 	best_enum = NULL;
 	best = 0;
 	SLIST_FOREACH(enumerator, &enumerators, apic_next) {
 		retval = enumerator->apic_probe();
 		if (retval > 0)
 			continue;
 		if (best_enum == NULL || best < retval) {
 			best_enum = enumerator;
 			best = retval;
 		}
 	}
 	if (best_enum == NULL) {
 		if (bootverbose)
 			printf("APIC: Could not find any APICs.\n");
 #ifndef DEV_ATPIC
 		panic("running without device atpic requires a local APIC");
 #endif
 		return;
 	}
 
 	if (bootverbose)
 		printf("APIC: Using the %s enumerator.\n",
 		    best_enum->apic_name);
 
 #ifdef I686_CPU
 	/*
 	 * To work around an errata, we disable the local APIC on some
 	 * CPUs during early startup.  We need to turn the local APIC back
 	 * on on such CPUs now.
 	 */
 	ppro_reenable_apic();
 #endif
 
 	/* Probe the CPU's in the system. */
 	retval = best_enum->apic_probe_cpus();
 	if (retval != 0)
 		printf("%s: Failed to probe CPUs: returned %d\n",
 		    best_enum->apic_name, retval);
 
 }
 SYSINIT(apic_init, SI_SUB_TUNABLES - 1, SI_ORDER_SECOND, apic_init, NULL);
 
 /*
  * Setup the local APIC.  We have to do this prior to starting up the APs
  * in the SMP case.
  */
 static void
 apic_setup_local(void *dummy __unused)
 {
 	int retval;
  
 	if (best_enum == NULL)
 		return;
 
 	/* Initialize the local APIC. */
 	retval = best_enum->apic_setup_local();
 	if (retval != 0)
 		printf("%s: Failed to setup the local APIC: returned %d\n",
 		    best_enum->apic_name, retval);
 }
 SYSINIT(apic_setup_local, SI_SUB_CPU, SI_ORDER_SECOND, apic_setup_local, NULL);
 
 /*
  * Setup the I/O APICs.
  */
 static void
 apic_setup_io(void *dummy __unused)
 {
 	int retval;
 
 	if (best_enum == NULL)
 		return;
 
 	/*
 	 * Local APIC must be registered before other PICs and pseudo PICs
 	 * for proper suspend/resume order.
 	 */
-#ifndef XEN
 	intr_register_pic(&lapic_pic);
-#endif
 
 	retval = best_enum->apic_setup_io();
 	if (retval != 0)
 		printf("%s: Failed to setup I/O APICs: returned %d\n",
 		    best_enum->apic_name, retval);
-#ifdef XEN
-	return;
-#endif
+
 	/*
 	 * Finish setting up the local APIC on the BSP once we know
 	 * how to properly program the LINT pins.  In particular, this
 	 * enables the EOI suppression mode, if LAPIC support it and
 	 * user did not disabled the mode.
 	 */
 	lapic_setup(1);
 	if (bootverbose)
 		lapic_dump("BSP");
 
 	/* Enable the MSI "pic". */
 	init_ops.msi_init();
 }
 SYSINIT(apic_setup_io, SI_SUB_INTR, SI_ORDER_THIRD, apic_setup_io, NULL);
 
 #ifdef SMP
 /*
  * Inter Processor Interrupt functions.  The lapic_ipi_*() functions are
  * private to the MD code.  The public interface for the rest of the
  * kernel is defined in mp_machdep.c.
  */
 static int
 native_lapic_ipi_wait(int delay)
 {
 	int x;
 
 	/* LAPIC_ICR.APIC_DELSTAT_MASK is undefined in x2APIC mode */
 	if (x2apic_mode)
 		return (1);
 
 	/*
 	 * Wait delay microseconds for IPI to be sent.  If delay is
 	 * -1, we wait forever.
 	 */
 	if (delay == -1) {
 		while ((lapic_read_icr_lo() & APIC_DELSTAT_MASK) !=
 		    APIC_DELSTAT_IDLE)
 			ia32_pause();
 		return (1);
 	}
 
 	for (x = 0; x < delay; x += 5) {
 		if ((lapic_read_icr_lo() & APIC_DELSTAT_MASK) ==
 		    APIC_DELSTAT_IDLE)
 			return (1);
 		DELAY(5);
 	}
 	return (0);
 }
 
 static void
 native_lapic_ipi_raw(register_t icrlo, u_int dest)
 {
 	uint64_t icr;
 	uint32_t vhi, vlo;
 	register_t saveintr;
 
 	/* XXX: Need more sanity checking of icrlo? */
 	KASSERT(x2apic_mode || lapic_map != NULL,
 	    ("%s called too early", __func__));
 	KASSERT(x2apic_mode ||
 	    (dest & ~(APIC_ID_MASK >> APIC_ID_SHIFT)) == 0,
 	    ("%s: invalid dest field", __func__));
 	KASSERT((icrlo & APIC_ICRLO_RESV_MASK) == 0,
 	    ("%s: reserved bits set in ICR LO register", __func__));
 
 	/* Set destination in ICR HI register if it is being used. */
 	saveintr = intr_disable();
 	if (!x2apic_mode)
 		icr = lapic_read_icr();
 
 	if ((icrlo & APIC_DEST_MASK) == APIC_DEST_DESTFLD) {
 		if (x2apic_mode) {
 			vhi = dest;
 		} else {
 			vhi = icr >> 32;
 			vhi &= ~APIC_ID_MASK;
 			vhi |= dest << APIC_ID_SHIFT;
 		}
 	} else {
 		vhi = 0;
 	}
 
 	/* Program the contents of the IPI and dispatch it. */
 	if (x2apic_mode) {
 		vlo = icrlo;
 	} else {
 		vlo = icr;
 		vlo &= APIC_ICRLO_RESV_MASK;
 		vlo |= icrlo;
 	}
 	lapic_write_icr(vhi, vlo);
 	intr_restore(saveintr);
 }
 
 #define	BEFORE_SPIN	50000
 #ifdef DETECT_DEADLOCK
 #define	AFTER_SPIN	50
 #endif
 
 static void
 native_lapic_ipi_vectored(u_int vector, int dest)
 {
 	register_t icrlo, destfield;
 
 	KASSERT((vector & ~APIC_VECTOR_MASK) == 0,
 	    ("%s: invalid vector %d", __func__, vector));
 
 	icrlo = APIC_DESTMODE_PHY | APIC_TRIGMOD_EDGE | APIC_LEVEL_ASSERT;
 
 	/*
 	 * IPI_STOP_HARD is just a "fake" vector used to send a NMI.
 	 * Use special rules regard NMI if passed, otherwise specify
 	 * the vector.
 	 */
 	if (vector == IPI_STOP_HARD)
 		icrlo |= APIC_DELMODE_NMI;
 	else
 		icrlo |= vector | APIC_DELMODE_FIXED;
 	destfield = 0;
 	switch (dest) {
 	case APIC_IPI_DEST_SELF:
 		icrlo |= APIC_DEST_SELF;
 		break;
 	case APIC_IPI_DEST_ALL:
 		icrlo |= APIC_DEST_ALLISELF;
 		break;
 	case APIC_IPI_DEST_OTHERS:
 		icrlo |= APIC_DEST_ALLESELF;
 		break;
 	default:
 		KASSERT(x2apic_mode ||
 		    (dest & ~(APIC_ID_MASK >> APIC_ID_SHIFT)) == 0,
 		    ("%s: invalid destination 0x%x", __func__, dest));
 		destfield = dest;
 	}
 
 	/* Wait for an earlier IPI to finish. */
 	if (!lapic_ipi_wait(BEFORE_SPIN)) {
 		if (panicstr != NULL)
 			return;
 		else
 			panic("APIC: Previous IPI is stuck");
 	}
 
 	lapic_ipi_raw(icrlo, destfield);
 
 #ifdef DETECT_DEADLOCK
 	/* Wait for IPI to be delivered. */
 	if (!lapic_ipi_wait(AFTER_SPIN)) {
 #ifdef needsattention
 		/*
 		 * XXX FIXME:
 		 *
 		 * The above function waits for the message to actually be
 		 * delivered.  It breaks out after an arbitrary timeout
 		 * since the message should eventually be delivered (at
 		 * least in theory) and that if it wasn't we would catch
 		 * the failure with the check above when the next IPI is
 		 * sent.
 		 *
 		 * We could skip this wait entirely, EXCEPT it probably
 		 * protects us from other routines that assume that the
 		 * message was delivered and acted upon when this function
 		 * returns.
 		 */
 		printf("APIC: IPI might be stuck\n");
 #else /* !needsattention */
 		/* Wait until mesage is sent without a timeout. */
 		while (lapic_read_icr_lo() & APIC_DELSTAT_PEND)
 			ia32_pause();
 #endif /* needsattention */
 	}
 #endif /* DETECT_DEADLOCK */
 }
 
 /*
  * Since the IDT is shared by all CPUs the IPI slot update needs to be globally
  * visible.
  *
  * Consider the case where an IPI is generated immediately after allocation:
  *     vector = lapic_ipi_alloc(ipifunc);
  *     ipi_selected(other_cpus, vector);
  *
  * In xAPIC mode a write to ICR_LO has serializing semantics because the
  * APIC page is mapped as an uncached region. In x2APIC mode there is an
  * explicit 'mfence' before the ICR MSR is written. Therefore in both cases
  * the IDT slot update is globally visible before the IPI is delivered.
  */
 static int
 native_lapic_ipi_alloc(inthand_t *ipifunc)
 {
 	struct gate_descriptor *ip;
 	long func;
 	int idx, vector;
 
 	KASSERT(ipifunc != &IDTVEC(rsvd), ("invalid ipifunc %p", ipifunc));
 
 	vector = -1;
 	mtx_lock_spin(&icu_lock);
 	for (idx = IPI_DYN_FIRST; idx <= IPI_DYN_LAST; idx++) {
 		ip = &idt[idx];
 		func = (ip->gd_hioffset << 16) | ip->gd_looffset;
 		if (func == (uintptr_t)&IDTVEC(rsvd)) {
 			vector = idx;
 			setidt(vector, ipifunc, SDT_APIC, SEL_KPL, GSEL_APIC);
 			break;
 		}
 	}
 	mtx_unlock_spin(&icu_lock);
 	return (vector);
 }
 
 static void
 native_lapic_ipi_free(int vector)
 {
 	struct gate_descriptor *ip;
 	long func;
 
 	KASSERT(vector >= IPI_DYN_FIRST && vector <= IPI_DYN_LAST,
 	    ("%s: invalid vector %d", __func__, vector));
 
 	mtx_lock_spin(&icu_lock);
 	ip = &idt[vector];
 	func = (ip->gd_hioffset << 16) | ip->gd_looffset;
 	KASSERT(func != (uintptr_t)&IDTVEC(rsvd),
 	    ("invalid idtfunc %#lx", func));
 	setidt(vector, &IDTVEC(rsvd), SDT_APICT, SEL_KPL, GSEL_APIC);
 	mtx_unlock_spin(&icu_lock);
 }
 
 #endif /* SMP */
Index: head/sys/x86/xen/xen_intr.c
===================================================================
--- head/sys/x86/xen/xen_intr.c	(revision 282273)
+++ head/sys/x86/xen/xen_intr.c	(revision 282274)
@@ -1,1634 +1,1632 @@
 /******************************************************************************
  * xen_intr.c
  *
- * Xen event and interrupt services for x86 PV and HVM guests.
+ * Xen event and interrupt services for x86 HVM guests.
  *
  * Copyright (c) 2002-2005, K A Fraser
  * Copyright (c) 2005, Intel Corporation <xiaofeng.ling@intel.com>
  * Copyright (c) 2012, Spectra Logic Corporation
  *
  * This file may be distributed separately from the Linux kernel, or
  * incorporated into other software packages, subject to the following license:
  *
  * Permission is hereby granted, free of charge, to any person obtaining a copy
  * of this source file (the "Software"), to deal in the Software without
  * restriction, including without limitation the rights to use, copy, modify,
  * merge, publish, distribute, sublicense, and/or sell copies of the Software,
  * and to permit persons to whom the Software is furnished to do so, subject to
  * the following conditions:
  *
  * The above copyright notice and this permission notice shall be included in
  * all copies or substantial portions of the Software.
  *
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  * IN THE SOFTWARE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_ddb.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/bus.h>
 #include <sys/malloc.h>
 #include <sys/kernel.h>
 #include <sys/limits.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/interrupt.h>
 #include <sys/pcpu.h>
 #include <sys/smp.h>
 
 #include <vm/vm.h>
 #include <vm/pmap.h>
 
 #include <machine/intr_machdep.h>
 #include <x86/apicvar.h>
 #include <x86/apicreg.h>
 #include <machine/smp.h>
 #include <machine/stdarg.h>
 
 #include <machine/xen/synch_bitops.h>
 #include <machine/xen/xen-os.h>
 #include <machine/xen/xenvar.h>
 
 #include <xen/hypervisor.h>
 #include <xen/xen_intr.h>
 #include <xen/evtchn/evtchnvar.h>
 
 #include <dev/xen/xenpci/xenpcivar.h>
 #include <dev/pci/pcivar.h>
 
 #ifdef DDB
 #include <ddb/ddb.h>
 #endif
 
 static MALLOC_DEFINE(M_XENINTR, "xen_intr", "Xen Interrupt Services");
 
 /**
  * Per-cpu event channel processing state.
  */
 struct xen_intr_pcpu_data {
 	/**
 	 * The last event channel bitmap section (level one bit) processed.
 	 * This is used to ensure we scan all ports before
 	 * servicing an already servied port again.
 	 */
 	u_int	last_processed_l1i;
 
 	/**
 	 * The last event channel processed within the event channel
 	 * bitmap being scanned.
 	 */
 	u_int	last_processed_l2i;
 
 	/** Pointer to this CPU's interrupt statistic counter. */
 	u_long *evtchn_intrcnt;
 
 	/**
 	 * A bitmap of ports that can be serviced from this CPU.
 	 * A set bit means interrupt handling is enabled.
 	 */
 	u_long	evtchn_enabled[sizeof(u_long) * 8];
 };
 
 /*
  * Start the scan at port 0 by initializing the last scanned
  * location as the highest numbered event channel port.
  */
 DPCPU_DEFINE(struct xen_intr_pcpu_data, xen_intr_pcpu) = {
 	.last_processed_l1i = LONG_BIT - 1,
 	.last_processed_l2i = LONG_BIT - 1
 };
 
 DPCPU_DECLARE(struct vcpu_info *, vcpu_info);
 
 #define	XEN_EEXIST		17 /* Xen "already exists" error */
 #define	XEN_ALLOCATE_VECTOR	0 /* Allocate a vector for this event channel */
 #define	XEN_INVALID_EVTCHN	0 /* Invalid event channel */
 
 #define	is_valid_evtchn(x)	((x) != XEN_INVALID_EVTCHN)
 
 struct xenisrc {
 	struct intsrc	xi_intsrc;
 	enum evtchn_type xi_type;
 	int		xi_cpu;		/* VCPU for delivery. */
 	int		xi_vector;	/* Global isrc vector number. */
 	evtchn_port_t	xi_port;
 	int		xi_pirq;
 	int		xi_virq;
 	void		*xi_cookie;
 	u_int		xi_close:1;	/* close on unbind? */
 	u_int		xi_activehi:1;
 	u_int		xi_edgetrigger:1;
 	u_int		xi_masked:1;
 };
 
 #define ARRAY_SIZE(a)	(sizeof(a) / sizeof(a[0]))
 
 static void	xen_intr_suspend(struct pic *);
 static void	xen_intr_resume(struct pic *, bool suspend_cancelled);
 static void	xen_intr_enable_source(struct intsrc *isrc);
 static void	xen_intr_disable_source(struct intsrc *isrc, int eoi);
 static void	xen_intr_eoi_source(struct intsrc *isrc);
 static void	xen_intr_enable_intr(struct intsrc *isrc);
 static void	xen_intr_disable_intr(struct intsrc *isrc);
 static int	xen_intr_vector(struct intsrc *isrc);
 static int	xen_intr_source_pending(struct intsrc *isrc);
 static int	xen_intr_config_intr(struct intsrc *isrc,
 		     enum intr_trigger trig, enum intr_polarity pol);
 static int	xen_intr_assign_cpu(struct intsrc *isrc, u_int apic_id);
 
 static void	xen_intr_pirq_enable_source(struct intsrc *isrc);
 static void	xen_intr_pirq_disable_source(struct intsrc *isrc, int eoi);
 static void	xen_intr_pirq_eoi_source(struct intsrc *isrc);
 static void	xen_intr_pirq_enable_intr(struct intsrc *isrc);
 static void	xen_intr_pirq_disable_intr(struct intsrc *isrc);
 static int	xen_intr_pirq_config_intr(struct intsrc *isrc,
 		     enum intr_trigger trig, enum intr_polarity pol);
 
 /**
  * PIC interface for all event channel port types except physical IRQs.
  */
 struct pic xen_intr_pic = {
 	.pic_enable_source  = xen_intr_enable_source,
 	.pic_disable_source = xen_intr_disable_source,
 	.pic_eoi_source     = xen_intr_eoi_source,
 	.pic_enable_intr    = xen_intr_enable_intr,
 	.pic_disable_intr   = xen_intr_disable_intr,
 	.pic_vector         = xen_intr_vector,
 	.pic_source_pending = xen_intr_source_pending,
 	.pic_suspend        = xen_intr_suspend,
 	.pic_resume         = xen_intr_resume,
 	.pic_config_intr    = xen_intr_config_intr,
 	.pic_assign_cpu     = xen_intr_assign_cpu
 };
 
 /**
  * PIC interface for all event channel representing
  * physical interrupt sources.
  */
 struct pic xen_intr_pirq_pic = {
 	.pic_enable_source  = xen_intr_pirq_enable_source,
 	.pic_disable_source = xen_intr_pirq_disable_source,
 	.pic_eoi_source     = xen_intr_pirq_eoi_source,
 	.pic_enable_intr    = xen_intr_pirq_enable_intr,
 	.pic_disable_intr   = xen_intr_pirq_disable_intr,
 	.pic_vector         = xen_intr_vector,
 	.pic_source_pending = xen_intr_source_pending,
 	.pic_config_intr    = xen_intr_pirq_config_intr,
 	.pic_assign_cpu     = xen_intr_assign_cpu
 };
 
 static struct mtx	 xen_intr_isrc_lock;
 static int		 xen_intr_auto_vector_count;
 static struct xenisrc	*xen_intr_port_to_isrc[NR_EVENT_CHANNELS];
 static u_long		*xen_intr_pirq_eoi_map;
 static boolean_t	 xen_intr_pirq_eoi_map_enabled;
 
 /*------------------------- Private Functions --------------------------------*/
 /**
  * Disable signal delivery for an event channel port on the
  * specified CPU.
  *
  * \param port  The event channel port to mask.
  *
  * This API is used to manage the port<=>CPU binding of event
  * channel handlers.
  *
  * \note  This operation does not preclude reception of an event
  *        for this event channel on another CPU.  To mask the
  *        event channel globally, use evtchn_mask().
  */
 static inline void
 evtchn_cpu_mask_port(u_int cpu, evtchn_port_t port)
 {
 	struct xen_intr_pcpu_data *pcpu;
 
 	pcpu = DPCPU_ID_PTR(cpu, xen_intr_pcpu);
 	clear_bit(port, pcpu->evtchn_enabled);
 }
 
 /**
  * Enable signal delivery for an event channel port on the
  * specified CPU.
  *
  * \param port  The event channel port to unmask.
  *
  * This API is used to manage the port<=>CPU binding of event
  * channel handlers.
  *
  * \note  This operation does not guarantee that event delivery
  *        is enabled for this event channel port.  The port must
  *        also be globally enabled.  See evtchn_unmask().
  */
 static inline void
 evtchn_cpu_unmask_port(u_int cpu, evtchn_port_t port)
 {
 	struct xen_intr_pcpu_data *pcpu;
 
 	pcpu = DPCPU_ID_PTR(cpu, xen_intr_pcpu);
 	set_bit(port, pcpu->evtchn_enabled);
 }
 
 /**
  * Allocate and register a per-cpu Xen upcall interrupt counter.
  *
  * \param cpu  The cpu for which to register this interrupt count.
  */
 static void
 xen_intr_intrcnt_add(u_int cpu)
 {
 	char buf[MAXCOMLEN + 1];
 	struct xen_intr_pcpu_data *pcpu;
 
 	pcpu = DPCPU_ID_PTR(cpu, xen_intr_pcpu);
 	if (pcpu->evtchn_intrcnt != NULL)
 		return;
 
 	snprintf(buf, sizeof(buf), "cpu%d:xen", cpu);
 	intrcnt_add(buf, &pcpu->evtchn_intrcnt);
 }
 
 /**
  * Search for an already allocated but currently unused Xen interrupt
  * source object.
  *
  * \param type  Restrict the search to interrupt sources of the given
  *              type.
  *
  * \return  A pointer to a free Xen interrupt source object or NULL.
  */
 static struct xenisrc *
 xen_intr_find_unused_isrc(enum evtchn_type type)
 {
 	int isrc_idx;
 
 	KASSERT(mtx_owned(&xen_intr_isrc_lock), ("Evtchn isrc lock not held"));
 
 	for (isrc_idx = 0; isrc_idx < xen_intr_auto_vector_count; isrc_idx ++) {
 		struct xenisrc *isrc;
 		u_int vector;
 
 		vector = FIRST_EVTCHN_INT + isrc_idx;
 		isrc = (struct xenisrc *)intr_lookup_source(vector);
 		if (isrc != NULL
 		 && isrc->xi_type == EVTCHN_TYPE_UNBOUND) {
 			KASSERT(isrc->xi_intsrc.is_handlers == 0,
 			    ("Free evtchn still has handlers"));
 			isrc->xi_type = type;
 			return (isrc);
 		}
 	}
 	return (NULL);
 }
 
 /**
  * Allocate a Xen interrupt source object.
  *
  * \param type  The type of interrupt source to create.
  *
  * \return  A pointer to a newly allocated Xen interrupt source
  *          object or NULL.
  */
 static struct xenisrc *
 xen_intr_alloc_isrc(enum evtchn_type type, int vector)
 {
 	static int warned;
 	struct xenisrc *isrc;
 
 	KASSERT(mtx_owned(&xen_intr_isrc_lock), ("Evtchn alloc lock not held"));
 
 	if (xen_intr_auto_vector_count > NR_EVENT_CHANNELS) {
 		if (!warned) {
 			warned = 1;
 			printf("xen_intr_alloc: Event channels exhausted.\n");
 		}
 		return (NULL);
 	}
 
 	if (type != EVTCHN_TYPE_PIRQ) {
 		vector = FIRST_EVTCHN_INT + xen_intr_auto_vector_count;
 		xen_intr_auto_vector_count++;
 	}
 
 	KASSERT((intr_lookup_source(vector) == NULL),
 	    ("Trying to use an already allocated vector"));
 
 	mtx_unlock(&xen_intr_isrc_lock);
 	isrc = malloc(sizeof(*isrc), M_XENINTR, M_WAITOK | M_ZERO);
 	isrc->xi_intsrc.is_pic =
 	    (type == EVTCHN_TYPE_PIRQ) ? &xen_intr_pirq_pic : &xen_intr_pic;
 	isrc->xi_vector = vector;
 	isrc->xi_type = type;
 	intr_register_source(&isrc->xi_intsrc);
 	mtx_lock(&xen_intr_isrc_lock);
 
 	return (isrc);
 }
 
 /**
  * Attempt to free an active Xen interrupt source object.
  *
  * \param isrc  The interrupt source object to release.
  *
  * \returns  EBUSY if the source is still in use, otherwise 0.
  */
 static int
 xen_intr_release_isrc(struct xenisrc *isrc)
 {
 
 	mtx_lock(&xen_intr_isrc_lock);
 	if (isrc->xi_intsrc.is_handlers != 0) {
 		mtx_unlock(&xen_intr_isrc_lock);
 		return (EBUSY);
 	}
 	evtchn_mask_port(isrc->xi_port);
 	evtchn_clear_port(isrc->xi_port);
 
 	/* Rebind port to CPU 0. */
 	evtchn_cpu_mask_port(isrc->xi_cpu, isrc->xi_port);
 	evtchn_cpu_unmask_port(0, isrc->xi_port);
 
 	if (isrc->xi_close != 0 && is_valid_evtchn(isrc->xi_port)) {
 		struct evtchn_close close = { .port = isrc->xi_port };
 		if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close))
 			panic("EVTCHNOP_close failed");
 	}
 
 	xen_intr_port_to_isrc[isrc->xi_port] = NULL;
 	isrc->xi_cpu = 0;
 	isrc->xi_type = EVTCHN_TYPE_UNBOUND;
 	isrc->xi_port = 0;
 	isrc->xi_cookie = NULL;
 	mtx_unlock(&xen_intr_isrc_lock);
 	return (0);
 }
 
 /**
  * Associate an interrupt handler with an already allocated local Xen
  * event channel port.
  *
  * \param isrcp       The returned Xen interrupt object associated with
  *                    the specified local port.
  * \param local_port  The event channel to bind.
  * \param type        The event channel type of local_port.
  * \param intr_owner  The device making this bind request.
  * \param filter      An interrupt filter handler.  Specify NULL
  *                    to always dispatch to the ithread handler.
  * \param handler     An interrupt ithread handler.  Optional (can
  *                    specify NULL) if all necessary event actions
  *                    are performed by filter.
  * \param arg         Argument to present to both filter and handler.
  * \param irqflags    Interrupt handler flags.  See sys/bus.h.
  * \param handlep     Pointer to an opaque handle used to manage this
  *                    registration.
  *
  * \returns  0 on success, otherwise an errno.
  */
 static int
 xen_intr_bind_isrc(struct xenisrc **isrcp, evtchn_port_t local_port,
     enum evtchn_type type, device_t intr_owner, driver_filter_t filter,
     driver_intr_t handler, void *arg, enum intr_type flags,
     xen_intr_handle_t *port_handlep)
 {
 	struct xenisrc *isrc;
 	int error;
 
 	*isrcp = NULL;
 	if (port_handlep == NULL) {
 		device_printf(intr_owner,
 			      "xen_intr_bind_isrc: Bad event handle\n");
 		return (EINVAL);
 	}
 
 	mtx_lock(&xen_intr_isrc_lock);
 	isrc = xen_intr_find_unused_isrc(type);
 	if (isrc == NULL) {
 		isrc = xen_intr_alloc_isrc(type, XEN_ALLOCATE_VECTOR);
 		if (isrc == NULL) {
 			mtx_unlock(&xen_intr_isrc_lock);
 			return (ENOSPC);
 		}
 	}
 	isrc->xi_port = local_port;
 	xen_intr_port_to_isrc[local_port] = isrc;
 	mtx_unlock(&xen_intr_isrc_lock);
 
 	/* Assign the opaque handler (the event channel port) */
 	*port_handlep = &isrc->xi_port;
 
 #ifdef SMP
 	if (type == EVTCHN_TYPE_PORT) {
 		/*
 		 * By default all interrupts are assigned to vCPU#0
 		 * unless specified otherwise, so shuffle them to balance
 		 * the interrupt load.
 		 */
 		xen_intr_assign_cpu(&isrc->xi_intsrc, intr_next_cpu());
 	}
 #endif
 
 	if (filter == NULL && handler == NULL) {
 		/*
 		 * No filter/handler provided, leave the event channel
 		 * masked and without a valid handler, the caller is
 		 * in charge of setting that up.
 		 */
 		*isrcp = isrc;
 		return (0);
 	}
 
 	error = xen_intr_add_handler(intr_owner, filter, handler, arg, flags,
 	    *port_handlep);
 	if (error != 0) {
 		xen_intr_release_isrc(isrc);
 		return (error);
 	}
 	*isrcp = isrc;
 	return (0);
 }
 
 /**
  * Lookup a Xen interrupt source object given an interrupt binding handle.
  * 
  * \param handle  A handle initialized by a previous call to
  *                xen_intr_bind_isrc().
  *
  * \returns  A pointer to the Xen interrupt source object associated
  *           with the given interrupt handle.  NULL if no association
  *           currently exists.
  */
 static struct xenisrc *
 xen_intr_isrc(xen_intr_handle_t handle)
 {
 	evtchn_port_t port;
 
 	if (handle == NULL)
 		return (NULL);
 
 	port = *(evtchn_port_t *)handle;
 	if (!is_valid_evtchn(port) || port >= NR_EVENT_CHANNELS)
 		return (NULL);
 
 	return (xen_intr_port_to_isrc[port]);
 }
 
 /**
  * Determine the event channel ports at the given section of the
  * event port bitmap which have pending events for the given cpu.
  * 
  * \param pcpu  The Xen interrupt pcpu data for the cpu being querried.
  * \param sh    The Xen shared info area.
  * \param idx   The index of the section of the event channel bitmap to
  *              inspect.
  *
  * \returns  A u_long with bits set for every event channel with pending
  *           events.
  */
 static inline u_long
 xen_intr_active_ports(struct xen_intr_pcpu_data *pcpu, shared_info_t *sh,
     u_int idx)
 {
 	return (sh->evtchn_pending[idx]
 	      & ~sh->evtchn_mask[idx]
 	      & pcpu->evtchn_enabled[idx]);
 }
 
 /**
  * Interrupt handler for processing all Xen event channel events.
  * 
  * \param trap_frame  The trap frame context for the current interrupt.
  */
 void
 xen_intr_handle_upcall(struct trapframe *trap_frame)
 {
 	u_int l1i, l2i, port, cpu;
 	u_long masked_l1, masked_l2;
 	struct xenisrc *isrc;
 	shared_info_t *s;
 	vcpu_info_t *v;
 	struct xen_intr_pcpu_data *pc;
 	u_long l1, l2;
 
 	/*
 	 * Disable preemption in order to always check and fire events
 	 * on the right vCPU
 	 */
 	critical_enter();
 
 	cpu = PCPU_GET(cpuid);
 	pc  = DPCPU_PTR(xen_intr_pcpu);
 	s   = HYPERVISOR_shared_info;
 	v   = DPCPU_GET(vcpu_info);
 
 	if (xen_hvm_domain() && !xen_vector_callback_enabled) {
 		KASSERT((cpu == 0), ("Fired PCI event callback on wrong CPU"));
 	}
 
 	v->evtchn_upcall_pending = 0;
 
 #if 0
 #ifndef CONFIG_X86 /* No need for a barrier -- XCHG is a barrier on x86. */
 	/* Clear master flag /before/ clearing selector flag. */
 	wmb();
 #endif
 #endif
 
 	l1 = atomic_readandclear_long(&v->evtchn_pending_sel);
 
 	l1i = pc->last_processed_l1i;
 	l2i = pc->last_processed_l2i;
 	(*pc->evtchn_intrcnt)++;
 
 	while (l1 != 0) {
 
 		l1i = (l1i + 1) % LONG_BIT;
 		masked_l1 = l1 & ((~0UL) << l1i);
 
 		if (masked_l1 == 0) {
 			/*
 			 * if we masked out all events, wrap around
 			 * to the beginning.
 			 */
 			l1i = LONG_BIT - 1;
 			l2i = LONG_BIT - 1;
 			continue;
 		}
 		l1i = ffsl(masked_l1) - 1;
 
 		do {
 			l2 = xen_intr_active_ports(pc, s, l1i);
 
 			l2i = (l2i + 1) % LONG_BIT;
 			masked_l2 = l2 & ((~0UL) << l2i);
 
 			if (masked_l2 == 0) {
 				/* if we masked out all events, move on */
 				l2i = LONG_BIT - 1;
 				break;
 			}
 			l2i = ffsl(masked_l2) - 1;
 
 			/* process port */
 			port = (l1i * LONG_BIT) + l2i;
 			synch_clear_bit(port, &s->evtchn_pending[0]);
 
 			isrc = xen_intr_port_to_isrc[port];
 			if (__predict_false(isrc == NULL))
 				continue;
 
 			/* Make sure we are firing on the right vCPU */
 			KASSERT((isrc->xi_cpu == PCPU_GET(cpuid)),
 				("Received unexpected event on vCPU#%d, event bound to vCPU#%d",
 				PCPU_GET(cpuid), isrc->xi_cpu));
 
 			intr_execute_handlers(&isrc->xi_intsrc, trap_frame);
 
 			/*
 			 * If this is the final port processed,
 			 * we'll pick up here+1 next time.
 			 */
 			pc->last_processed_l1i = l1i;
 			pc->last_processed_l2i = l2i;
 
 		} while (l2i != LONG_BIT - 1);
 
 		l2 = xen_intr_active_ports(pc, s, l1i);
 		if (l2 == 0) {
 			/*
 			 * We handled all ports, so we can clear the
 			 * selector bit.
 			 */
 			l1 &= ~(1UL << l1i);
 		}
 	}
 	critical_exit();
 }
 
 static int
 xen_intr_init(void *dummy __unused)
 {
 	shared_info_t *s = HYPERVISOR_shared_info;
 	struct xen_intr_pcpu_data *pcpu;
 	struct physdev_pirq_eoi_gmfn eoi_gmfn;
 	int i, rc;
 
 	if (!xen_domain())
 		return (0);
 
 	mtx_init(&xen_intr_isrc_lock, "xen-irq-lock", NULL, MTX_DEF);
 
 	/*
 	 * Register interrupt count manually as we aren't
 	 * guaranteed to see a call to xen_intr_assign_cpu()
 	 * before our first interrupt. Also set the per-cpu
 	 * mask of CPU#0 to enable all, since by default
 	 * all event channels are bound to CPU#0.
 	 */
 	CPU_FOREACH(i) {
 		pcpu = DPCPU_ID_PTR(i, xen_intr_pcpu);
 		memset(pcpu->evtchn_enabled, i == 0 ? ~0 : 0,
 		       sizeof(pcpu->evtchn_enabled));
 		xen_intr_intrcnt_add(i);
 	}
 
 	for (i = 0; i < nitems(s->evtchn_mask); i++)
 		atomic_store_rel_long(&s->evtchn_mask[i], ~0);
 
 	/* Try to register PIRQ EOI map */
 	xen_intr_pirq_eoi_map = malloc(PAGE_SIZE, M_XENINTR, M_WAITOK | M_ZERO);
 	eoi_gmfn.gmfn = atop(vtophys(xen_intr_pirq_eoi_map));
 	rc = HYPERVISOR_physdev_op(PHYSDEVOP_pirq_eoi_gmfn_v2, &eoi_gmfn);
 	if (rc != 0 && bootverbose)
 		printf("Xen interrupts: unable to register PIRQ EOI map\n");
 	else
 		xen_intr_pirq_eoi_map_enabled = true;
 
 	intr_register_pic(&xen_intr_pic);
 	intr_register_pic(&xen_intr_pirq_pic);
 
 	if (bootverbose)
 		printf("Xen interrupt system initialized\n");
 
 	return (0);
 }
 SYSINIT(xen_intr_init, SI_SUB_INTR, SI_ORDER_SECOND, xen_intr_init, NULL);
 
 /*--------------------------- Common PIC Functions ---------------------------*/
 /**
  * Prepare this PIC for system suspension.
  */
 static void
 xen_intr_suspend(struct pic *unused)
 {
 }
 
 static void
 xen_rebind_ipi(struct xenisrc *isrc)
 {
 #ifdef SMP
 	int cpu = isrc->xi_cpu;
 	int vcpu_id = pcpu_find(cpu)->pc_vcpu_id;
 	int error;
 	struct evtchn_bind_ipi bind_ipi = { .vcpu = vcpu_id };
 
 	error = HYPERVISOR_event_channel_op(EVTCHNOP_bind_ipi,
 	                                    &bind_ipi);
 	if (error != 0)
 		panic("unable to rebind xen IPI: %d", error);
 
 	isrc->xi_port = bind_ipi.port;
 	isrc->xi_cpu = 0;
 	xen_intr_port_to_isrc[bind_ipi.port] = isrc;
 
 	error = xen_intr_assign_cpu(&isrc->xi_intsrc,
 	                            cpu_apic_ids[cpu]);
 	if (error)
 		panic("unable to bind xen IPI to CPU#%d: %d",
 		      cpu, error);
 
 	evtchn_unmask_port(bind_ipi.port);
 #else
 	panic("Resume IPI event channel on UP");
 #endif
 }
 
 static void
 xen_rebind_virq(struct xenisrc *isrc)
 {
 	int cpu = isrc->xi_cpu;
 	int vcpu_id = pcpu_find(cpu)->pc_vcpu_id;
 	int error;
 	struct evtchn_bind_virq bind_virq = { .virq = isrc->xi_virq,
 	                                      .vcpu = vcpu_id };
 
 	error = HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq,
 	                                    &bind_virq);
 	if (error != 0)
 		panic("unable to rebind xen VIRQ#%d: %d", isrc->xi_virq, error);
 
 	isrc->xi_port = bind_virq.port;
 	isrc->xi_cpu = 0;
 	xen_intr_port_to_isrc[bind_virq.port] = isrc;
 
 #ifdef SMP
 	error = xen_intr_assign_cpu(&isrc->xi_intsrc,
 	                            cpu_apic_ids[cpu]);
 	if (error)
 		panic("unable to bind xen VIRQ#%d to CPU#%d: %d",
 		      isrc->xi_virq, cpu, error);
 #endif
 
 	evtchn_unmask_port(bind_virq.port);
 }
 
 /**
  * Return this PIC to service after being suspended.
  */
 static void
 xen_intr_resume(struct pic *unused, bool suspend_cancelled)
 {
 	shared_info_t *s = HYPERVISOR_shared_info;
 	struct xenisrc *isrc;
 	u_int isrc_idx;
 	int i;
 
 	if (suspend_cancelled)
 		return;
 
 	/* Reset the per-CPU masks */
 	CPU_FOREACH(i) {
 		struct xen_intr_pcpu_data *pcpu;
 
 		pcpu = DPCPU_ID_PTR(i, xen_intr_pcpu);
 		memset(pcpu->evtchn_enabled,
 		       i == 0 ? ~0 : 0, sizeof(pcpu->evtchn_enabled));
 	}
 
 	/* Mask all event channels. */
 	for (i = 0; i < nitems(s->evtchn_mask); i++)
 		atomic_store_rel_long(&s->evtchn_mask[i], ~0);
 
 	/* Remove port -> isrc mappings */
 	memset(xen_intr_port_to_isrc, 0, sizeof(xen_intr_port_to_isrc));
 
 	/* Free unused isrcs and rebind VIRQs and IPIs */
 	for (isrc_idx = 0; isrc_idx < xen_intr_auto_vector_count; isrc_idx++) {
 		u_int vector;
 
 		vector = FIRST_EVTCHN_INT + isrc_idx;
 		isrc = (struct xenisrc *)intr_lookup_source(vector);
 		if (isrc != NULL) {
 			isrc->xi_port = 0;
 			switch (isrc->xi_type) {
 			case EVTCHN_TYPE_IPI:
 				xen_rebind_ipi(isrc);
 				break;
 			case EVTCHN_TYPE_VIRQ:
 				xen_rebind_virq(isrc);
 				break;
 			default:
 				isrc->xi_cpu = 0;
 				break;
 			}
 		}
 	}
 }
 
 /**
  * Disable a Xen interrupt source.
  *
  * \param isrc  The interrupt source to disable.
  */
 static void
 xen_intr_disable_intr(struct intsrc *base_isrc)
 {
 	struct xenisrc *isrc = (struct xenisrc *)base_isrc;
 
 	evtchn_mask_port(isrc->xi_port);
 }
 
 /**
  * Determine the global interrupt vector number for
  * a Xen interrupt source.
  *
  * \param isrc  The interrupt source to query.
  *
  * \return  The vector number corresponding to the given interrupt source.
  */
 static int
 xen_intr_vector(struct intsrc *base_isrc)
 {
 	struct xenisrc *isrc = (struct xenisrc *)base_isrc;
 
 	return (isrc->xi_vector);
 }
 
 /**
  * Determine whether or not interrupt events are pending on the
  * the given interrupt source.
  *
  * \param isrc  The interrupt source to query.
  *
  * \returns  0 if no events are pending, otherwise non-zero.
  */
 static int
 xen_intr_source_pending(struct intsrc *isrc)
 {
 	/*
 	 * EventChannels are edge triggered and never masked.
 	 * There can be no pending events.
 	 */
 	return (0);
 }
 
 /**
  * Perform configuration of an interrupt source.
  *
  * \param isrc  The interrupt source to configure.
  * \param trig  Edge or level.
  * \param pol   Active high or low.
  *
  * \returns  0 if no events are pending, otherwise non-zero.
  */
 static int
 xen_intr_config_intr(struct intsrc *isrc, enum intr_trigger trig,
     enum intr_polarity pol)
 {
 	/* Configuration is only possible via the evtchn apis. */
 	return (ENODEV);
 }
 
 /**
  * Configure CPU affinity for interrupt source event delivery.
  *
  * \param isrc     The interrupt source to configure.
  * \param apic_id  The apic id of the CPU for handling future events.
  *
  * \returns  0 if successful, otherwise an errno.
  */
 static int
 xen_intr_assign_cpu(struct intsrc *base_isrc, u_int apic_id)
 {
 #ifdef SMP
 	struct evtchn_bind_vcpu bind_vcpu;
 	struct xenisrc *isrc;
 	u_int to_cpu, vcpu_id;
 	int error, masked;
 
-#ifdef XENHVM
 	if (xen_vector_callback_enabled == 0)
 		return (EOPNOTSUPP);
-#endif
 
 	to_cpu = apic_cpuid(apic_id);
 	vcpu_id = pcpu_find(to_cpu)->pc_vcpu_id;
 	xen_intr_intrcnt_add(to_cpu);
 
 	mtx_lock(&xen_intr_isrc_lock);
 	isrc = (struct xenisrc *)base_isrc;
 	if (!is_valid_evtchn(isrc->xi_port)) {
 		mtx_unlock(&xen_intr_isrc_lock);
 		return (EINVAL);
 	}
 
 	/*
 	 * Mask the event channel while binding it to prevent interrupt
 	 * delivery with an inconsistent state in isrc->xi_cpu.
 	 */
 	masked = evtchn_test_and_set_mask(isrc->xi_port);
 	if ((isrc->xi_type == EVTCHN_TYPE_VIRQ) ||
 		(isrc->xi_type == EVTCHN_TYPE_IPI)) {
 		/*
 		 * Virtual IRQs are associated with a cpu by
 		 * the Hypervisor at evtchn_bind_virq time, so
 		 * all we need to do is update the per-CPU masks.
 		 */
 		evtchn_cpu_mask_port(isrc->xi_cpu, isrc->xi_port);
 		isrc->xi_cpu = to_cpu;
 		evtchn_cpu_unmask_port(isrc->xi_cpu, isrc->xi_port);
 		goto out;
 	}
 
 	bind_vcpu.port = isrc->xi_port;
 	bind_vcpu.vcpu = vcpu_id;
 
 	error = HYPERVISOR_event_channel_op(EVTCHNOP_bind_vcpu, &bind_vcpu);
 	if (isrc->xi_cpu != to_cpu) {
 		if (error == 0) {
 			/* Commit to new binding by removing the old one. */
 			evtchn_cpu_mask_port(isrc->xi_cpu, isrc->xi_port);
 			isrc->xi_cpu = to_cpu;
 			evtchn_cpu_unmask_port(isrc->xi_cpu, isrc->xi_port);
 		}
 	}
 
 out:
 	if (masked == 0)
 		evtchn_unmask_port(isrc->xi_port);
 	mtx_unlock(&xen_intr_isrc_lock);
 	return (0);
 #else
 	return (EOPNOTSUPP);
 #endif
 }
 
 /*------------------- Virtual Interrupt Source PIC Functions -----------------*/
 /*
  * Mask a level triggered interrupt source.
  *
  * \param isrc  The interrupt source to mask (if necessary).
  * \param eoi   If non-zero, perform any necessary end-of-interrupt
  *              acknowledgements.
  */
 static void
 xen_intr_disable_source(struct intsrc *base_isrc, int eoi)
 {
 	struct xenisrc *isrc;
 
 	isrc = (struct xenisrc *)base_isrc;
 
 	/*
 	 * NB: checking if the event channel is already masked is
 	 * needed because the event channel user-space device
 	 * masks event channels on it's filter as part of it's
 	 * normal operation, and those shouldn't be automatically
 	 * unmasked by the generic interrupt code. The event channel
 	 * device will unmask them when needed.
 	 */
 	isrc->xi_masked = !!evtchn_test_and_set_mask(isrc->xi_port);
 }
 
 /*
  * Unmask a level triggered interrupt source.
  *
  * \param isrc  The interrupt source to unmask (if necessary).
  */
 static void
 xen_intr_enable_source(struct intsrc *base_isrc)
 {
 	struct xenisrc *isrc;
 
 	isrc = (struct xenisrc *)base_isrc;
 
 	if (isrc->xi_masked == 0)
 		evtchn_unmask_port(isrc->xi_port);
 }
 
 /*
  * Perform any necessary end-of-interrupt acknowledgements.
  *
  * \param isrc  The interrupt source to EOI.
  */
 static void
 xen_intr_eoi_source(struct intsrc *base_isrc)
 {
 }
 
 /*
  * Enable and unmask the interrupt source.
  *
  * \param isrc  The interrupt source to enable.
  */
 static void
 xen_intr_enable_intr(struct intsrc *base_isrc)
 {
 	struct xenisrc *isrc = (struct xenisrc *)base_isrc;
 
 	evtchn_unmask_port(isrc->xi_port);
 }
 
 /*------------------ Physical Interrupt Source PIC Functions -----------------*/
 /*
  * Mask a level triggered interrupt source.
  *
  * \param isrc  The interrupt source to mask (if necessary).
  * \param eoi   If non-zero, perform any necessary end-of-interrupt
  *              acknowledgements.
  */
 static void
 xen_intr_pirq_disable_source(struct intsrc *base_isrc, int eoi)
 {
 	struct xenisrc *isrc;
 
 	isrc = (struct xenisrc *)base_isrc;
 
 	if (isrc->xi_edgetrigger == 0)
 		evtchn_mask_port(isrc->xi_port);
 	if (eoi == PIC_EOI)
 		xen_intr_pirq_eoi_source(base_isrc);
 }
 
 /*
  * Unmask a level triggered interrupt source.
  *
  * \param isrc  The interrupt source to unmask (if necessary).
  */
 static void
 xen_intr_pirq_enable_source(struct intsrc *base_isrc)
 {
 	struct xenisrc *isrc;
 
 	isrc = (struct xenisrc *)base_isrc;
 
 	if (isrc->xi_edgetrigger == 0)
 		evtchn_unmask_port(isrc->xi_port);
 }
 
 /*
  * Perform any necessary end-of-interrupt acknowledgements.
  *
  * \param isrc  The interrupt source to EOI.
  */
 static void
 xen_intr_pirq_eoi_source(struct intsrc *base_isrc)
 {
 	struct xenisrc *isrc;
 	int error;
 
 	isrc = (struct xenisrc *)base_isrc;
 
 	if (test_bit(isrc->xi_pirq, xen_intr_pirq_eoi_map)) {
 		struct physdev_eoi eoi = { .irq = isrc->xi_pirq };
 
 		error = HYPERVISOR_physdev_op(PHYSDEVOP_eoi, &eoi);
 		if (error != 0)
 			panic("Unable to EOI PIRQ#%d: %d\n",
 			    isrc->xi_pirq, error);
 	}
 }
 
 /*
  * Enable and unmask the interrupt source.
  *
  * \param isrc  The interrupt source to enable.
  */
 static void
 xen_intr_pirq_enable_intr(struct intsrc *base_isrc)
 {
 	struct xenisrc *isrc;
 	struct evtchn_bind_pirq bind_pirq;
 	struct physdev_irq_status_query irq_status;
 	int error;
 
 	isrc = (struct xenisrc *)base_isrc;
 
 	if (!xen_intr_pirq_eoi_map_enabled) {
 		irq_status.irq = isrc->xi_pirq;
 		error = HYPERVISOR_physdev_op(PHYSDEVOP_irq_status_query,
 		    &irq_status);
 		if (error)
 			panic("unable to get status of IRQ#%d", isrc->xi_pirq);
 
 		if (irq_status.flags & XENIRQSTAT_needs_eoi) {
 			/*
 			 * Since the dynamic PIRQ EOI map is not available
 			 * mark the PIRQ as needing EOI unconditionally.
 			 */
 			set_bit(isrc->xi_pirq, xen_intr_pirq_eoi_map);
 		}
 	}
 
 	bind_pirq.pirq = isrc->xi_pirq;
 	bind_pirq.flags = isrc->xi_edgetrigger ? 0 : BIND_PIRQ__WILL_SHARE;
 	error = HYPERVISOR_event_channel_op(EVTCHNOP_bind_pirq, &bind_pirq);
 	if (error)
 		panic("unable to bind IRQ#%d", isrc->xi_pirq);
 
 	isrc->xi_port = bind_pirq.port;
 
 	mtx_lock(&xen_intr_isrc_lock);
 	KASSERT((xen_intr_port_to_isrc[bind_pirq.port] == NULL),
 	    ("trying to override an already setup event channel port"));
 	xen_intr_port_to_isrc[bind_pirq.port] = isrc;
 	mtx_unlock(&xen_intr_isrc_lock);
 
 	evtchn_unmask_port(isrc->xi_port);
 }
 
 /*
  * Disable an interrupt source.
  *
  * \param isrc  The interrupt source to disable.
  */
 static void
 xen_intr_pirq_disable_intr(struct intsrc *base_isrc)
 {
 	struct xenisrc *isrc;
 	struct evtchn_close close;
 	int error;
 
 	isrc = (struct xenisrc *)base_isrc;
 
 	evtchn_mask_port(isrc->xi_port);
 
 	close.port = isrc->xi_port;
 	error = HYPERVISOR_event_channel_op(EVTCHNOP_close, &close);
 	if (error)
 		panic("unable to close event channel %d IRQ#%d",
 		    isrc->xi_port, isrc->xi_pirq);
 
 	mtx_lock(&xen_intr_isrc_lock);
 	xen_intr_port_to_isrc[isrc->xi_port] = NULL;
 	mtx_unlock(&xen_intr_isrc_lock);
 
 	isrc->xi_port = 0;
 }
 
 /**
  * Perform configuration of an interrupt source.
  *
  * \param isrc  The interrupt source to configure.
  * \param trig  Edge or level.
  * \param pol   Active high or low.
  *
  * \returns  0 if no events are pending, otherwise non-zero.
  */
 static int
 xen_intr_pirq_config_intr(struct intsrc *base_isrc, enum intr_trigger trig,
     enum intr_polarity pol)
 {
 	struct xenisrc *isrc = (struct xenisrc *)base_isrc;
 	struct physdev_setup_gsi setup_gsi;
 	int error;
 
 	KASSERT(!(trig == INTR_TRIGGER_CONFORM || pol == INTR_POLARITY_CONFORM),
 	    ("%s: Conforming trigger or polarity\n", __func__));
 
 	setup_gsi.gsi = isrc->xi_pirq;
 	setup_gsi.triggering = trig == INTR_TRIGGER_EDGE ? 0 : 1;
 	setup_gsi.polarity = pol == INTR_POLARITY_HIGH ? 0 : 1;
 
 	error = HYPERVISOR_physdev_op(PHYSDEVOP_setup_gsi, &setup_gsi);
 	if (error == -XEN_EEXIST) {
 		if ((isrc->xi_edgetrigger && (trig != INTR_TRIGGER_EDGE)) ||
 		    (isrc->xi_activehi && (pol != INTR_POLARITY_HIGH)))
 			panic("unable to reconfigure interrupt IRQ#%d",
 			    isrc->xi_pirq);
 		error = 0;
 	}
 	if (error)
 		panic("unable to configure IRQ#%d\n", isrc->xi_pirq);
 
 	isrc->xi_activehi = pol == INTR_POLARITY_HIGH ? 1 : 0;
 	isrc->xi_edgetrigger = trig == INTR_TRIGGER_EDGE ? 1 : 0;
 
 	return (0);
 }
 
 /*--------------------------- Public Functions -------------------------------*/
 /*------- API comments for these methods can be found in xen/xenintr.h -------*/
 int
 xen_intr_bind_local_port(device_t dev, evtchn_port_t local_port,
     driver_filter_t filter, driver_intr_t handler, void *arg,
     enum intr_type flags, xen_intr_handle_t *port_handlep)
 {
 	struct xenisrc *isrc;
 	int error;
 
 	error = xen_intr_bind_isrc(&isrc, local_port, EVTCHN_TYPE_PORT, dev,
 		    filter, handler, arg, flags, port_handlep);
 	if (error != 0)
 		return (error);
 
 	/*
 	 * The Event Channel API didn't open this port, so it is not
 	 * responsible for closing it automatically on unbind.
 	 */
 	isrc->xi_close = 0;
 	return (0);
 }
 
 int
 xen_intr_alloc_and_bind_local_port(device_t dev, u_int remote_domain,
     driver_filter_t filter, driver_intr_t handler, void *arg,
     enum intr_type flags, xen_intr_handle_t *port_handlep)
 {
 	struct xenisrc *isrc;
 	struct evtchn_alloc_unbound alloc_unbound;
 	int error;
 
 	alloc_unbound.dom        = DOMID_SELF;
 	alloc_unbound.remote_dom = remote_domain;
 	error = HYPERVISOR_event_channel_op(EVTCHNOP_alloc_unbound,
 		    &alloc_unbound);
 	if (error != 0) {
 		/*
 		 * XXX Trap Hypercall error code Linuxisms in
 		 *     the HYPERCALL layer.
 		 */
 		return (-error);
 	}
 
 	error = xen_intr_bind_isrc(&isrc, alloc_unbound.port, EVTCHN_TYPE_PORT,
 				 dev, filter, handler, arg, flags,
 				 port_handlep);
 	if (error != 0) {
 		evtchn_close_t close = { .port = alloc_unbound.port };
 		if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close))
 			panic("EVTCHNOP_close failed");
 		return (error);
 	}
 
 	isrc->xi_close = 1;
 	return (0);
 }
 
 int 
 xen_intr_bind_remote_port(device_t dev, u_int remote_domain,
     u_int remote_port, driver_filter_t filter, driver_intr_t handler,
     void *arg, enum intr_type flags, xen_intr_handle_t *port_handlep)
 {
 	struct xenisrc *isrc;
 	struct evtchn_bind_interdomain bind_interdomain;
 	int error;
 
 	bind_interdomain.remote_dom  = remote_domain;
 	bind_interdomain.remote_port = remote_port;
 	error = HYPERVISOR_event_channel_op(EVTCHNOP_bind_interdomain,
 					    &bind_interdomain);
 	if (error != 0) {
 		/*
 		 * XXX Trap Hypercall error code Linuxisms in
 		 *     the HYPERCALL layer.
 		 */
 		return (-error);
 	}
 
 	error = xen_intr_bind_isrc(&isrc, bind_interdomain.local_port,
 				 EVTCHN_TYPE_PORT, dev, filter, handler,
 				 arg, flags, port_handlep);
 	if (error) {
 		evtchn_close_t close = { .port = bind_interdomain.local_port };
 		if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close))
 			panic("EVTCHNOP_close failed");
 		return (error);
 	}
 
 	/*
 	 * The Event Channel API opened this port, so it is
 	 * responsible for closing it automatically on unbind.
 	 */
 	isrc->xi_close = 1;
 	return (0);
 }
 
 int 
 xen_intr_bind_virq(device_t dev, u_int virq, u_int cpu,
     driver_filter_t filter, driver_intr_t handler, void *arg,
     enum intr_type flags, xen_intr_handle_t *port_handlep)
 {
 	int vcpu_id = pcpu_find(cpu)->pc_vcpu_id;
 	struct xenisrc *isrc;
 	struct evtchn_bind_virq bind_virq = { .virq = virq, .vcpu = vcpu_id };
 	int error;
 
 	/* Ensure the target CPU is ready to handle evtchn interrupts. */
 	xen_intr_intrcnt_add(cpu);
 
 	isrc = NULL;
 	error = HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq, &bind_virq);
 	if (error != 0) {
 		/*
 		 * XXX Trap Hypercall error code Linuxisms in
 		 *     the HYPERCALL layer.
 		 */
 		return (-error);
 	}
 
 	error = xen_intr_bind_isrc(&isrc, bind_virq.port, EVTCHN_TYPE_VIRQ, dev,
 				 filter, handler, arg, flags, port_handlep);
 
 #ifdef SMP
 	if (error == 0)
 		error = intr_event_bind(isrc->xi_intsrc.is_event, cpu);
 #endif
 
 	if (error != 0) {
 		evtchn_close_t close = { .port = bind_virq.port };
 
 		xen_intr_unbind(*port_handlep);
 		if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close))
 			panic("EVTCHNOP_close failed");
 		return (error);
 	}
 
 #ifdef SMP
 	if (isrc->xi_cpu != cpu) {
 		/*
 		 * Too early in the boot process for the generic interrupt
 		 * code to perform the binding.  Update our event channel
 		 * masks manually so events can't fire on the wrong cpu
 		 * during AP startup.
 		 */
 		xen_intr_assign_cpu(&isrc->xi_intsrc, cpu_apic_ids[cpu]);
 	}
 #endif
 
 	/*
 	 * The Event Channel API opened this port, so it is
 	 * responsible for closing it automatically on unbind.
 	 */
 	isrc->xi_close = 1;
 	isrc->xi_virq = virq;
 
 	return (0);
 }
 
 int
 xen_intr_alloc_and_bind_ipi(device_t dev, u_int cpu,
     driver_filter_t filter, enum intr_type flags,
     xen_intr_handle_t *port_handlep)
 {
 #ifdef SMP
 	int vcpu_id = pcpu_find(cpu)->pc_vcpu_id;
 	struct xenisrc *isrc;
 	struct evtchn_bind_ipi bind_ipi = { .vcpu = vcpu_id };
 	int error;
 
 	/* Ensure the target CPU is ready to handle evtchn interrupts. */
 	xen_intr_intrcnt_add(cpu);
 
 	isrc = NULL;
 	error = HYPERVISOR_event_channel_op(EVTCHNOP_bind_ipi, &bind_ipi);
 	if (error != 0) {
 		/*
 		 * XXX Trap Hypercall error code Linuxisms in
 		 *     the HYPERCALL layer.
 		 */
 		return (-error);
 	}
 
 	error = xen_intr_bind_isrc(&isrc, bind_ipi.port, EVTCHN_TYPE_IPI,
 	                           dev, filter, NULL, NULL, flags,
 	                           port_handlep);
 	if (error == 0)
 		error = intr_event_bind(isrc->xi_intsrc.is_event, cpu);
 
 	if (error != 0) {
 		evtchn_close_t close = { .port = bind_ipi.port };
 
 		xen_intr_unbind(*port_handlep);
 		if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close))
 			panic("EVTCHNOP_close failed");
 		return (error);
 	}
 
 	if (isrc->xi_cpu != cpu) {
 		/*
 		 * Too early in the boot process for the generic interrupt
 		 * code to perform the binding.  Update our event channel
 		 * masks manually so events can't fire on the wrong cpu
 		 * during AP startup.
 		 */
 		xen_intr_assign_cpu(&isrc->xi_intsrc, cpu_apic_ids[cpu]);
 	}
 
 	/*
 	 * The Event Channel API opened this port, so it is
 	 * responsible for closing it automatically on unbind.
 	 */
 	isrc->xi_close = 1;
 	return (0);
 #else
 	return (EOPNOTSUPP);
 #endif
 }
 
 int
 xen_register_pirq(int vector, enum intr_trigger trig, enum intr_polarity pol)
 {
 	struct physdev_map_pirq map_pirq;
 	struct xenisrc *isrc;
 	int error;
 
 	if (vector == 0)
 		return (EINVAL);
 
 	if (bootverbose)
 		printf("xen: register IRQ#%d\n", vector);
 
 	map_pirq.domid = DOMID_SELF;
 	map_pirq.type = MAP_PIRQ_TYPE_GSI;
 	map_pirq.index = vector;
 	map_pirq.pirq = vector;
 
 	error = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq, &map_pirq);
 	if (error) {
 		printf("xen: unable to map IRQ#%d\n", vector);
 		return (error);
 	}
 
 	mtx_lock(&xen_intr_isrc_lock);
 	isrc = xen_intr_alloc_isrc(EVTCHN_TYPE_PIRQ, vector);
 	mtx_unlock(&xen_intr_isrc_lock);
 	KASSERT((isrc != NULL), ("xen: unable to allocate isrc for interrupt"));
 	isrc->xi_pirq = vector;
 	isrc->xi_activehi = pol == INTR_POLARITY_HIGH ? 1 : 0;
 	isrc->xi_edgetrigger = trig == INTR_TRIGGER_EDGE ? 1 : 0;
 
 	return (0);
 }
 
 int
 xen_register_msi(device_t dev, int vector, int count)
 {
 	struct physdev_map_pirq msi_irq;
 	struct xenisrc *isrc;
 	int ret;
 
 	memset(&msi_irq, 0, sizeof(msi_irq));
 	msi_irq.domid = DOMID_SELF;
 	msi_irq.type = count == 1 ?
 	    MAP_PIRQ_TYPE_MSI_SEG : MAP_PIRQ_TYPE_MULTI_MSI;
 	msi_irq.index = -1;
 	msi_irq.pirq = -1;
 	msi_irq.bus = pci_get_bus(dev) | (pci_get_domain(dev) << 16);
 	msi_irq.devfn = (pci_get_slot(dev) << 3) | pci_get_function(dev);
 	msi_irq.entry_nr = count;
 
 	ret = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq, &msi_irq);
 	if (ret != 0)
 		return (ret);
 	if (count != msi_irq.entry_nr) {
 		panic("unable to setup all requested MSI vectors "
 		    "(expected %d got %d)", count, msi_irq.entry_nr);
 	}
 
 	mtx_lock(&xen_intr_isrc_lock);
 	for (int i = 0; i < count; i++) {
 		isrc = xen_intr_alloc_isrc(EVTCHN_TYPE_PIRQ, vector + i);
 		KASSERT(isrc != NULL,
 		    ("xen: unable to allocate isrc for interrupt"));
 		isrc->xi_pirq = msi_irq.pirq + i;
 		/* MSI interrupts are always edge triggered */
 		isrc->xi_edgetrigger = 1;
 	}
 	mtx_unlock(&xen_intr_isrc_lock);
 
 	return (0);
 }
 
 int
 xen_release_msi(int vector)
 {
 	struct physdev_unmap_pirq unmap;
 	struct xenisrc *isrc;
 	int ret;
 
 	isrc = (struct xenisrc *)intr_lookup_source(vector);
 	if (isrc == NULL)
 		return (ENXIO);
 
 	unmap.pirq = isrc->xi_pirq;
 	ret = HYPERVISOR_physdev_op(PHYSDEVOP_unmap_pirq, &unmap);
 	if (ret != 0)
 		return (ret);
 
 	xen_intr_release_isrc(isrc);
 
 	return (0);
 }
 
 int
 xen_intr_describe(xen_intr_handle_t port_handle, const char *fmt, ...)
 {
 	char descr[MAXCOMLEN + 1];
 	struct xenisrc *isrc;
 	va_list ap;
 
 	isrc = xen_intr_isrc(port_handle);
 	if (isrc == NULL)
 		return (EINVAL);
 
 	va_start(ap, fmt);
 	vsnprintf(descr, sizeof(descr), fmt, ap);
 	va_end(ap);
 	return (intr_describe(isrc->xi_vector, isrc->xi_cookie, descr));
 }
 
 void
 xen_intr_unbind(xen_intr_handle_t *port_handlep)
 {
 	struct xenisrc *isrc;
 
 	KASSERT(port_handlep != NULL,
 	    ("NULL xen_intr_handle_t passed to xen_intr_unbind"));
 
 	isrc = xen_intr_isrc(*port_handlep);
 	*port_handlep = NULL;
 	if (isrc == NULL)
 		return;
 
 	if (isrc->xi_cookie != NULL)
 		intr_remove_handler(isrc->xi_cookie);
 	xen_intr_release_isrc(isrc);
 }
 
 void
 xen_intr_signal(xen_intr_handle_t handle)
 {
 	struct xenisrc *isrc;
 
 	isrc = xen_intr_isrc(handle);
 	if (isrc != NULL) {
 		KASSERT(isrc->xi_type == EVTCHN_TYPE_PORT ||
 			isrc->xi_type == EVTCHN_TYPE_IPI,
 			("evtchn_signal on something other than a local port"));
 		struct evtchn_send send = { .port = isrc->xi_port };
 		(void)HYPERVISOR_event_channel_op(EVTCHNOP_send, &send);
 	}
 }
 
 evtchn_port_t
 xen_intr_port(xen_intr_handle_t handle)
 {
 	struct xenisrc *isrc;
 
 	isrc = xen_intr_isrc(handle);
 	if (isrc == NULL)
 		return (0);
 	
 	return (isrc->xi_port);
 }
 
 int
 xen_intr_add_handler(device_t dev, driver_filter_t filter,
     driver_intr_t handler, void *arg, enum intr_type flags,
     xen_intr_handle_t handle)
 {
 	struct xenisrc *isrc;
 	int error;
 
 	isrc = xen_intr_isrc(handle);
 	if (isrc == NULL || isrc->xi_cookie != NULL)
 		return (EINVAL);
 
 	error = intr_add_handler(device_get_nameunit(dev), isrc->xi_vector,
 	    filter, handler, arg, flags|INTR_EXCL, &isrc->xi_cookie);
 	if (error != 0) {
 		device_printf(dev,
 		    "xen_intr_add_handler: intr_add_handler failed: %d\n",
 		    error);
 	}
 
 	return (error);
 }
 
 #ifdef DDB
 static const char *
 xen_intr_print_type(enum evtchn_type type)
 {
 	static const char *evtchn_type_to_string[EVTCHN_TYPE_COUNT] = {
 		[EVTCHN_TYPE_UNBOUND]	= "UNBOUND",
 		[EVTCHN_TYPE_PIRQ]	= "PIRQ",
 		[EVTCHN_TYPE_VIRQ]	= "VIRQ",
 		[EVTCHN_TYPE_IPI]	= "IPI",
 		[EVTCHN_TYPE_PORT]	= "PORT",
 	};
 
 	if (type >= EVTCHN_TYPE_COUNT)
 		return ("UNKNOWN");
 
 	return (evtchn_type_to_string[type]);
 }
 
 static void
 xen_intr_dump_port(struct xenisrc *isrc)
 {
 	struct xen_intr_pcpu_data *pcpu;
 	shared_info_t *s = HYPERVISOR_shared_info;
 	int i;
 
 	db_printf("Port %d Type: %s\n",
 	    isrc->xi_port, xen_intr_print_type(isrc->xi_type));
 	if (isrc->xi_type == EVTCHN_TYPE_PIRQ) {
 		db_printf("\tPirq: %d ActiveHi: %d EdgeTrigger: %d "
 		    "NeedsEOI: %d\n",
 		    isrc->xi_pirq, isrc->xi_activehi, isrc->xi_edgetrigger,
 		    !!test_bit(isrc->xi_pirq, xen_intr_pirq_eoi_map));
 	}
 	if (isrc->xi_type == EVTCHN_TYPE_VIRQ)
 		db_printf("\tVirq: %d\n", isrc->xi_virq);
 
 	db_printf("\tMasked: %d Pending: %d\n",
 	    !!test_bit(isrc->xi_port, &s->evtchn_mask[0]),
 	    !!test_bit(isrc->xi_port, &s->evtchn_pending[0]));
 
 	db_printf("\tPer-CPU Masks: ");
 	CPU_FOREACH(i) {
 		pcpu = DPCPU_ID_PTR(i, xen_intr_pcpu);
 		db_printf("cpu#%d: %d ", i,
 		    !!test_bit(isrc->xi_port, pcpu->evtchn_enabled));
 	}
 	db_printf("\n");
 }
 
 DB_SHOW_COMMAND(xen_evtchn, db_show_xen_evtchn)
 {
 	int i;
 
 	if (!xen_domain()) {
 		db_printf("Only available on Xen guests\n");
 		return;
 	}
 
 	for (i = 0; i < NR_EVENT_CHANNELS; i++) {
 		struct xenisrc *isrc;
 
 		isrc = xen_intr_port_to_isrc[i];
 		if (isrc == NULL)
 			continue;
 
 		xen_intr_dump_port(isrc);
 	}
 }
 #endif /* DDB */
Index: head/sys/x86/xen/xen_nexus.c
===================================================================
--- head/sys/x86/xen/xen_nexus.c	(revision 282273)
+++ head/sys/x86/xen/xen_nexus.c	(revision 282274)
@@ -1,173 +1,167 @@
 /*
  * Copyright (c) 2013 Roger Pau Monné <roger.pau@citrix.com>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/bus.h>
 #include <sys/kernel.h>
 #include <sys/module.h>
 #include <sys/sysctl.h>
 #include <sys/systm.h>
 #include <sys/smp.h>
 
 #include <contrib/dev/acpica/include/acpi.h>
 
 #include <dev/acpica/acpivar.h>
 
 #include <x86/init.h>
 #include <machine/nexusvar.h>
 #include <machine/intr_machdep.h>
 
 #include <xen/xen-os.h>
 #include <xen/xen_intr.h>
 #include <xen/xen_msi.h>
 
 #include "pcib_if.h"
 
 /*
  * Xen nexus(4) driver.
  */
 static int
 nexus_xen_probe(device_t dev)
 {
 
 	if (!xen_pv_domain())
 		return (ENXIO);
 
 	return (BUS_PROBE_SPECIFIC);
 }
 
 static int
 nexus_xen_attach(device_t dev)
 {
 	int error;
-#ifndef XEN
 	device_t acpi_dev = NULL;
-#endif
 
 	nexus_init_resources();
 	bus_generic_probe(dev);
 
-#ifndef XEN
 	if (xen_initial_domain()) {
 		/* Disable some ACPI devices that are not usable by Dom0 */
 		acpi_cpu_disabled = true;
 		acpi_hpet_disabled = true;
 		acpi_timer_disabled = true;
 
 		acpi_dev = BUS_ADD_CHILD(dev, 10, "acpi", 0);
 		if (acpi_dev == NULL)
 			panic("Unable to add ACPI bus to Xen Dom0");
 	}
-#endif
 
 	error = bus_generic_attach(dev);
-#ifndef XEN
 	if (xen_initial_domain() && (error == 0))
 		acpi_install_wakeup_handler(device_get_softc(acpi_dev));
-#endif
 
 	return (error);
 }
 
 static int
 nexus_xen_config_intr(device_t dev, int irq, enum intr_trigger trig,
     enum intr_polarity pol)
 {
 	int ret;
 
 	/*
 	 * ISA and PCI intline IRQs are not preregistered on Xen, so
 	 * intercept calls to configure those and register them on the fly.
 	 */
 	if ((irq < FIRST_MSI_INT) && (intr_lookup_source(irq) == NULL)) {
 		ret = xen_register_pirq(irq, trig, pol);
 		if (ret != 0)
 			return (ret);
 		nexus_add_irq(irq);
 	}
 	return (intr_config_intr(irq, trig, pol));
 }
 
 static int
 nexus_xen_alloc_msix(device_t pcib, device_t dev, int *irq)
 {
 
 	return (xen_msix_alloc(dev, irq));
 }
 
 static int
 nexus_xen_release_msix(device_t pcib, device_t dev, int irq)
 {
 
 	return (xen_msix_release(irq));
 }
 
 static int
 nexus_xen_alloc_msi(device_t pcib, device_t dev, int count, int maxcount, int *irqs)
 {
 
 	return (xen_msi_alloc(dev, count, maxcount, irqs));
 }
 
 static int
 nexus_xen_release_msi(device_t pcib, device_t dev, int count, int *irqs)
 {
 
 	return (xen_msi_release(irqs, count));
 }
 
 static int
 nexus_xen_map_msi(device_t pcib, device_t dev, int irq, uint64_t *addr, uint32_t *data)
 {
 
 	return (xen_msi_map(irq, addr, data));
 }
 
 static device_method_t nexus_xen_methods[] = {
 	/* Device interface */
 	DEVMETHOD(device_probe,		nexus_xen_probe),
 	DEVMETHOD(device_attach,	nexus_xen_attach),
 
 	/* INTR */
 	DEVMETHOD(bus_config_intr,	nexus_xen_config_intr),
 
 	/* MSI */
 	DEVMETHOD(pcib_alloc_msi,	nexus_xen_alloc_msi),
 	DEVMETHOD(pcib_release_msi,	nexus_xen_release_msi),
 	DEVMETHOD(pcib_alloc_msix,	nexus_xen_alloc_msix),
 	DEVMETHOD(pcib_release_msix,	nexus_xen_release_msix),
 	DEVMETHOD(pcib_map_msi,		nexus_xen_map_msi),
 
 	{ 0, 0 }
 };
 
 DEFINE_CLASS_1(nexus, nexus_xen_driver, nexus_xen_methods, 1, nexus_driver);
 static devclass_t nexus_devclass;
 
 DRIVER_MODULE(nexus_xen, root, nexus_xen_driver, nexus_devclass, 0, 0);