diff --git a/sys/amd64/amd64/apic_vector.S b/sys/amd64/amd64/apic_vector.S
index 646524717c2c..7551cc5c8b93 100644
--- a/sys/amd64/amd64/apic_vector.S
+++ b/sys/amd64/amd64/apic_vector.S
@@ -1,326 +1,342 @@
 /*-
  * Copyright (c) 1989, 1990 William F. Jolitz.
  * Copyright (c) 1990 The Regents of the University of California.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	from: vector.s, 386BSD 0.1 unknown origin
  * $FreeBSD$
  */
 
 /*
  * Interrupt entry points for external interrupts triggered by I/O APICs
  * as well as IPI handlers.
  */
 
 #include "opt_smp.h"
 
 #include <machine/asmacros.h>
 #include <x86/apicreg.h>
 
 #include "assym.s"
 
 /*
  * I/O Interrupt Entry Point.  Rather than having one entry point for
  * each interrupt source, we use one entry point for each 32-bit word
  * in the ISR.  The handler determines the highest bit set in the ISR,
  * translates that into a vector, and passes the vector to the
  * lapic_handle_intr() function.
  */
 #define	ISR_VEC(index, vec_name)					\
 	.text ;								\
 	SUPERALIGN_TEXT ;						\
 IDTVEC(vec_name) ;							\
 	PUSH_FRAME ;							\
 	FAKE_MCOUNT(TF_RIP(%rsp)) ;					\
 	movq	lapic, %rdx ;	/* pointer to local APIC */		\
 	movl	LA_ISR + 16 * (index)(%rdx), %eax ;	/* load ISR */	\
 	bsrl	%eax, %eax ;	/* index of highest set bit in ISR */	\
 	jz	1f ;							\
 	addl	$(32 * index),%eax ;					\
 	movq	%rsp, %rsi	;                                       \
 	movl	%eax, %edi ;	/* pass the IRQ */			\
 	call	lapic_handle_intr ;					\
 1: ;									\
 	MEXITCOUNT ;							\
 	jmp	doreti
 
 /*
  * Handle "spurious INTerrupts".
  * Notes:
  *  This is different than the "spurious INTerrupt" generated by an
  *   8259 PIC for missing INTs.  See the APIC documentation for details.
  *  This routine should NOT do an 'EOI' cycle.
  */
 	.text
 	SUPERALIGN_TEXT
 IDTVEC(spuriousint)
 
 	/* No EOI cycle used here */
 
 	jmp	doreti_iret
 
 	ISR_VEC(1, apic_isr1)
 	ISR_VEC(2, apic_isr2)
 	ISR_VEC(3, apic_isr3)
 	ISR_VEC(4, apic_isr4)
 	ISR_VEC(5, apic_isr5)
 	ISR_VEC(6, apic_isr6)
 	ISR_VEC(7, apic_isr7)
 
 /*
  * Local APIC periodic timer handler.
  */
 	.text
 	SUPERALIGN_TEXT
 IDTVEC(timerint)
 	PUSH_FRAME
 	FAKE_MCOUNT(TF_RIP(%rsp))
 	movq	%rsp, %rdi
 	call	lapic_handle_timer
 	MEXITCOUNT
 	jmp	doreti
 
 /*
  * Local APIC CMCI handler.
  */
 	.text
 	SUPERALIGN_TEXT
 IDTVEC(cmcint)
 	PUSH_FRAME
 	FAKE_MCOUNT(TF_RIP(%rsp))
 	call	lapic_handle_cmc
 	MEXITCOUNT
 	jmp	doreti
 
 /*
  * Local APIC error interrupt handler.
  */
 	.text
 	SUPERALIGN_TEXT
 IDTVEC(errorint)
 	PUSH_FRAME
 	FAKE_MCOUNT(TF_RIP(%rsp))
 	call	lapic_handle_error
 	MEXITCOUNT
 	jmp	doreti
 
+#ifdef XENHVM
+/*
+ * Xen event channel upcall interrupt handler.
+ * Only used when the hypervisor supports direct vector callbacks.
+ */
+	.text
+	SUPERALIGN_TEXT
+IDTVEC(xen_intr_upcall)
+	PUSH_FRAME
+	FAKE_MCOUNT(TF_RIP(%rsp))
+	movq	%rsp, %rdi
+	call	xen_intr_handle_upcall
+	MEXITCOUNT
+	jmp	doreti
+#endif
+
 #ifdef SMP
 /*
  * Global address space TLB shootdown.
  */
 	.text
 	SUPERALIGN_TEXT
 IDTVEC(invltlb)
 #if defined(COUNT_XINVLTLB_HITS) || defined(COUNT_IPIS)
 	PUSH_FRAME
 	movl	PCPU(CPUID), %eax
 #ifdef COUNT_XINVLTLB_HITS
 	incl	xhits_gbl(,%rax,4)
 #endif
 #ifdef COUNT_IPIS
 	movq	ipi_invltlb_counts(,%rax,8),%rax
 	incq	(%rax)
 #endif
 	POP_FRAME
 #endif
 
 	pushq	%rax
 
 	movq	%cr3, %rax		/* invalidate the TLB */
 	movq	%rax, %cr3
 
 	movq	lapic, %rax
 	movl	$0, LA_EOI(%rax)	/* End Of Interrupt to APIC */
 
 	lock
 	incl	smp_tlb_wait
 
 	popq	%rax
 	jmp	doreti_iret
 
 /*
  * Single page TLB shootdown
  */
 	.text
 	SUPERALIGN_TEXT
 IDTVEC(invlpg)
 #if defined(COUNT_XINVLTLB_HITS) || defined(COUNT_IPIS)
 	PUSH_FRAME
 	movl	PCPU(CPUID), %eax
 #ifdef COUNT_XINVLTLB_HITS
 	incl	xhits_pg(,%rax,4)
 #endif
 #ifdef COUNT_IPIS
 	movq	ipi_invlpg_counts(,%rax,8),%rax
 	incq	(%rax)
 #endif
 	POP_FRAME
 #endif
 
 	pushq	%rax
 
 	movq	smp_tlb_addr1, %rax
 	invlpg	(%rax)			/* invalidate single page */
 
 	movq	lapic, %rax
 	movl	$0, LA_EOI(%rax)	/* End Of Interrupt to APIC */
 
 	lock
 	incl	smp_tlb_wait
 
 	popq	%rax
 	jmp	doreti_iret
 
 /*
  * Page range TLB shootdown.
  */
 	.text
 	SUPERALIGN_TEXT
 IDTVEC(invlrng)
 #if defined(COUNT_XINVLTLB_HITS) || defined(COUNT_IPIS)
 	PUSH_FRAME
 	movl	PCPU(CPUID), %eax
 #ifdef COUNT_XINVLTLB_HITS
 	incl	xhits_rng(,%rax,4)
 #endif
 #ifdef COUNT_IPIS
 	movq	ipi_invlrng_counts(,%rax,8),%rax
 	incq	(%rax)
 #endif
 	POP_FRAME
 #endif
 
 	pushq	%rax
 	pushq	%rdx
 
 	movq	smp_tlb_addr1, %rdx
 	movq	smp_tlb_addr2, %rax
 1:	invlpg	(%rdx)			/* invalidate single page */
 	addq	$PAGE_SIZE, %rdx
 	cmpq	%rax, %rdx
 	jb	1b
 
 	movq	lapic, %rax
 	movl	$0, LA_EOI(%rax)	/* End Of Interrupt to APIC */
 
 	lock
 	incl	smp_tlb_wait
 
 	popq	%rdx
 	popq	%rax
 	jmp	doreti_iret
 
 /*
  * Invalidate cache.
  */
 	.text
 	SUPERALIGN_TEXT
 IDTVEC(invlcache)
 #ifdef COUNT_IPIS
 	PUSH_FRAME
 	movl	PCPU(CPUID), %eax
 	movq	ipi_invlcache_counts(,%rax,8),%rax
 	incq	(%rax)
 	POP_FRAME
 #endif
 
 	pushq	%rax
 
 	wbinvd
 
 	movq	lapic, %rax
 	movl	$0, LA_EOI(%rax)	/* End Of Interrupt to APIC */
 
 	lock
 	incl	smp_tlb_wait
 
 	popq	%rax
 	jmp	doreti_iret
 
 /*
  * Handler for IPIs sent via the per-cpu IPI bitmap.
  */
 	.text
 	SUPERALIGN_TEXT
 IDTVEC(ipi_intr_bitmap_handler)		
 	PUSH_FRAME
 
 	movq	lapic, %rdx
 	movl	$0, LA_EOI(%rdx)	/* End Of Interrupt to APIC */
 	
 	FAKE_MCOUNT(TF_RIP(%rsp))
 
 	call	ipi_bitmap_handler
 	MEXITCOUNT
 	jmp	doreti
 
 /*
  * Executed by a CPU when it receives an IPI_STOP from another CPU.
  */
 	.text
 	SUPERALIGN_TEXT
 IDTVEC(cpustop)
 	PUSH_FRAME
 
 	movq	lapic, %rax
 	movl	$0, LA_EOI(%rax)	/* End Of Interrupt to APIC */
 
 	call	cpustop_handler
 	jmp	doreti
 
 /*
  * Executed by a CPU when it receives an IPI_SUSPEND from another CPU.
  */
 	.text
 	SUPERALIGN_TEXT
 IDTVEC(cpususpend)
 	PUSH_FRAME
 
 	call	cpususpend_handler
 	movq	lapic, %rax
 	movl	$0, LA_EOI(%rax)	/* End Of Interrupt to APIC */
 	jmp	doreti
 
 /*
  * Executed by a CPU when it receives a RENDEZVOUS IPI from another CPU.
  *
  * - Calls the generic rendezvous action function.
  */
 	.text
 	SUPERALIGN_TEXT
 IDTVEC(rendezvous)
 	PUSH_FRAME
 #ifdef COUNT_IPIS
 	movl	PCPU(CPUID), %eax
 	movq	ipi_rendezvous_counts(,%rax,8), %rax
 	incq	(%rax)
 #endif
 	call	smp_rendezvous_action
 	movq	lapic, %rax
 	movl	$0, LA_EOI(%rax)	/* End Of Interrupt to APIC */
 	jmp	doreti
 #endif /* SMP */
diff --git a/sys/amd64/amd64/machdep.c b/sys/amd64/amd64/machdep.c
index 7a39ef8f43b0..7f7e54a5a191 100644
--- a/sys/amd64/amd64/machdep.c
+++ b/sys/amd64/amd64/machdep.c
@@ -1,2594 +1,2592 @@
 /*-
  * Copyright (c) 2003 Peter Wemm.
  * Copyright (c) 1992 Terrence R. Lambert.
  * Copyright (c) 1982, 1987, 1990 The Regents of the University of California.
  * All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * William Jolitz.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	from: @(#)machdep.c	7.4 (Berkeley) 6/3/91
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_atalk.h"
 #include "opt_atpic.h"
 #include "opt_compat.h"
 #include "opt_cpu.h"
 #include "opt_ddb.h"
 #include "opt_inet.h"
 #include "opt_ipx.h"
 #include "opt_isa.h"
 #include "opt_kstack_pages.h"
 #include "opt_maxmem.h"
 #include "opt_mp_watchdog.h"
 #include "opt_perfmon.h"
 #include "opt_platform.h"
 #include "opt_sched.h"
 #include "opt_kdtrace.h"
 
 #include <sys/param.h>
 #include <sys/proc.h>
 #include <sys/systm.h>
 #include <sys/bio.h>
 #include <sys/buf.h>
 #include <sys/bus.h>
 #include <sys/callout.h>
 #include <sys/cons.h>
 #include <sys/cpu.h>
 #include <sys/eventhandler.h>
 #include <sys/exec.h>
 #include <sys/imgact.h>
 #include <sys/kdb.h>
 #include <sys/kernel.h>
 #include <sys/ktr.h>
 #include <sys/linker.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/memrange.h>
 #include <sys/msgbuf.h>
 #include <sys/mutex.h>
 #include <sys/pcpu.h>
 #include <sys/ptrace.h>
 #include <sys/reboot.h>
 #include <sys/rwlock.h>
 #include <sys/sched.h>
 #include <sys/signalvar.h>
 #ifdef SMP
 #include <sys/smp.h>
 #endif
 #include <sys/syscallsubr.h>
 #include <sys/sysctl.h>
 #include <sys/sysent.h>
 #include <sys/sysproto.h>
 #include <sys/ucontext.h>
 #include <sys/vmmeter.h>
 
 #include <vm/vm.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_page.h>
 #include <vm/vm_map.h>
 #include <vm/vm_object.h>
 #include <vm/vm_pager.h>
 #include <vm/vm_param.h>
 
 #ifdef DDB
 #ifndef KDB
 #error KDB must be enabled in order for DDB to work!
 #endif
 #include <ddb/ddb.h>
 #include <ddb/db_sym.h>
 #endif
 
 #include <net/netisr.h>
 
 #include <machine/clock.h>
 #include <machine/cpu.h>
 #include <machine/cputypes.h>
 #include <machine/intr_machdep.h>
 #include <x86/mca.h>
 #include <machine/md_var.h>
 #include <machine/metadata.h>
 #include <machine/mp_watchdog.h>
 #include <machine/pc/bios.h>
 #include <machine/pcb.h>
 #include <machine/proc.h>
 #include <machine/reg.h>
 #include <machine/sigframe.h>
 #include <machine/specialreg.h>
 #ifdef PERFMON
 #include <machine/perfmon.h>
 #endif
 #include <machine/tss.h>
 #ifdef SMP
 #include <machine/smp.h>
 #endif
 #ifdef FDT
 #include <x86/fdt.h>
 #endif
 
 #ifdef DEV_ATPIC
 #include <x86/isa/icu.h>
 #else
 #include <machine/apicvar.h>
 #endif
 
 #include <isa/isareg.h>
 #include <isa/rtc.h>
 
 /* Sanity check for __curthread() */
 CTASSERT(offsetof(struct pcpu, pc_curthread) == 0);
 
 extern u_int64_t hammer_time(u_int64_t, u_int64_t);
 
 extern void printcpuinfo(void);	/* XXX header file */
 extern void identify_cpu(void);
 extern void panicifcpuunsupported(void);
 
 #define	CS_SECURE(cs)		(ISPL(cs) == SEL_UPL)
 #define	EFL_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
 
 static void cpu_startup(void *);
 static void get_fpcontext(struct thread *td, mcontext_t *mcp,
     char *xfpusave, size_t xfpusave_len);
 static int  set_fpcontext(struct thread *td, const mcontext_t *mcp,
     char *xfpustate, size_t xfpustate_len);
 SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL);
 
 /*
  * The file "conf/ldscript.amd64" defines the symbol "kernphys".  Its value is
  * the physical address at which the kernel is loaded.
  */
 extern char kernphys[];
 #ifdef DDB
 extern vm_offset_t ksym_start, ksym_end;
 #endif
 
 struct msgbuf *msgbufp;
 
 /* Intel ICH registers */
 #define ICH_PMBASE	0x400
 #define ICH_SMI_EN	ICH_PMBASE + 0x30
 
 int	_udatasel, _ucodesel, _ucode32sel, _ufssel, _ugssel;
 
 int cold = 1;
 
 long Maxmem = 0;
 long realmem = 0;
 
 /*
  * The number of PHYSMAP entries must be one less than the number of
  * PHYSSEG entries because the PHYSMAP entry that spans the largest
  * physical address that is accessible by ISA DMA is split into two
  * PHYSSEG entries.
  */
 #define	PHYSMAP_SIZE	(2 * (VM_PHYSSEG_MAX - 1))
 
 vm_paddr_t phys_avail[PHYSMAP_SIZE + 2];
 vm_paddr_t dump_avail[PHYSMAP_SIZE + 2];
 
 /* must be 2 less so 0 0 can signal end of chunks */
 #define PHYS_AVAIL_ARRAY_END ((sizeof(phys_avail) / sizeof(phys_avail[0])) - 2)
 #define DUMP_AVAIL_ARRAY_END ((sizeof(dump_avail) / sizeof(dump_avail[0])) - 2)
 
 struct kva_md_info kmi;
 
 static struct trapframe proc0_tf;
 struct region_descriptor r_gdt, r_idt;
 
 struct pcpu __pcpu[MAXCPU];
 
 struct mtx icu_lock;
 
 struct mem_range_softc mem_range_softc;
 
 struct mtx dt_lock;	/* lock for GDT and LDT */
 
 static void
 cpu_startup(dummy)
 	void *dummy;
 {
 	uintmax_t memsize;
 	char *sysenv;
 
 	/*
 	 * On MacBooks, we need to disallow the legacy USB circuit to
 	 * generate an SMI# because this can cause several problems,
 	 * namely: incorrect CPU frequency detection and failure to
 	 * start the APs.
 	 * We do this by disabling a bit in the SMI_EN (SMI Control and
 	 * Enable register) of the Intel ICH LPC Interface Bridge. 
 	 */
 	sysenv = getenv("smbios.system.product");
 	if (sysenv != NULL) {
 		if (strncmp(sysenv, "MacBook1,1", 10) == 0 ||
 		    strncmp(sysenv, "MacBook3,1", 10) == 0 ||
 		    strncmp(sysenv, "MacBookPro1,1", 13) == 0 ||
 		    strncmp(sysenv, "MacBookPro1,2", 13) == 0 ||
 		    strncmp(sysenv, "MacBookPro3,1", 13) == 0 ||
 		    strncmp(sysenv, "Macmini1,1", 10) == 0) {
 			if (bootverbose)
 				printf("Disabling LEGACY_USB_EN bit on "
 				    "Intel ICH.\n");
 			outl(ICH_SMI_EN, inl(ICH_SMI_EN) & ~0x8);
 		}
 		freeenv(sysenv);
 	}
 
 	/*
 	 * Good {morning,afternoon,evening,night}.
 	 */
 	startrtclock();
 	printcpuinfo();
 	panicifcpuunsupported();
 #ifdef PERFMON
 	perfmon_init();
 #endif
 	realmem = Maxmem;
 
 	/*
 	 * Display physical memory if SMBIOS reports reasonable amount.
 	 */
 	memsize = 0;
 	sysenv = getenv("smbios.memory.enabled");
 	if (sysenv != NULL) {
 		memsize = (uintmax_t)strtoul(sysenv, (char **)NULL, 10) << 10;
 		freeenv(sysenv);
 	}
 	if (memsize < ptoa((uintmax_t)cnt.v_free_count))
 		memsize = ptoa((uintmax_t)Maxmem);
 	printf("real memory  = %ju (%ju MB)\n", memsize, memsize >> 20);
 
 	/*
 	 * Display any holes after the first chunk of extended memory.
 	 */
 	if (bootverbose) {
 		int indx;
 
 		printf("Physical memory chunk(s):\n");
 		for (indx = 0; phys_avail[indx + 1] != 0; indx += 2) {
 			vm_paddr_t size;
 
 			size = phys_avail[indx + 1] - phys_avail[indx];
 			printf(
 			    "0x%016jx - 0x%016jx, %ju bytes (%ju pages)\n",
 			    (uintmax_t)phys_avail[indx],
 			    (uintmax_t)phys_avail[indx + 1] - 1,
 			    (uintmax_t)size, (uintmax_t)size / PAGE_SIZE);
 		}
 	}
 
 	vm_ksubmap_init(&kmi);
 
 	printf("avail memory = %ju (%ju MB)\n",
 	    ptoa((uintmax_t)cnt.v_free_count),
 	    ptoa((uintmax_t)cnt.v_free_count) / 1048576);
 
 	/*
 	 * Set up buffers, so they can be used to read disk labels.
 	 */
 	bufinit();
 	vm_pager_bufferinit();
 
 	cpu_setregs();
 }
 
 /*
  * Send an interrupt to process.
  *
  * Stack is set up to allow sigcode stored
  * at top to call routine, followed by call
  * to sigreturn routine below.  After sigreturn
  * resets the signal mask, the stack, and the
  * frame pointer, it returns to the user
  * specified pc, psl.
  */
 void
 sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
 {
 	struct sigframe sf, *sfp;
 	struct pcb *pcb;
 	struct proc *p;
 	struct thread *td;
 	struct sigacts *psp;
 	char *sp;
 	struct trapframe *regs;
 	char *xfpusave;
 	size_t xfpusave_len;
 	int sig;
 	int oonstack;
 
 	td = curthread;
 	pcb = td->td_pcb;
 	p = td->td_proc;
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 	sig = ksi->ksi_signo;
 	psp = p->p_sigacts;
 	mtx_assert(&psp->ps_mtx, MA_OWNED);
 	regs = td->td_frame;
 	oonstack = sigonstack(regs->tf_rsp);
 
 	if (cpu_max_ext_state_size > sizeof(struct savefpu) && use_xsave) {
 		xfpusave_len = cpu_max_ext_state_size - sizeof(struct savefpu);
 		xfpusave = __builtin_alloca(xfpusave_len);
 	} else {
 		xfpusave_len = 0;
 		xfpusave = NULL;
 	}
 
 	/* Save user context. */
 	bzero(&sf, sizeof(sf));
 	sf.sf_uc.uc_sigmask = *mask;
 	sf.sf_uc.uc_stack = td->td_sigstk;
 	sf.sf_uc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK)
 	    ? ((oonstack) ? SS_ONSTACK : 0) : SS_DISABLE;
 	sf.sf_uc.uc_mcontext.mc_onstack = (oonstack) ? 1 : 0;
 	bcopy(regs, &sf.sf_uc.uc_mcontext.mc_rdi, sizeof(*regs));
 	sf.sf_uc.uc_mcontext.mc_len = sizeof(sf.sf_uc.uc_mcontext); /* magic */
 	get_fpcontext(td, &sf.sf_uc.uc_mcontext, xfpusave, xfpusave_len);
 	fpstate_drop(td);
 	sf.sf_uc.uc_mcontext.mc_fsbase = pcb->pcb_fsbase;
 	sf.sf_uc.uc_mcontext.mc_gsbase = pcb->pcb_gsbase;
 	bzero(sf.sf_uc.uc_mcontext.mc_spare,
 	    sizeof(sf.sf_uc.uc_mcontext.mc_spare));
 	bzero(sf.sf_uc.__spare__, sizeof(sf.sf_uc.__spare__));
 
 	/* Allocate space for the signal handler context. */
 	if ((td->td_pflags & TDP_ALTSTACK) != 0 && !oonstack &&
 	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
 		sp = td->td_sigstk.ss_sp + td->td_sigstk.ss_size;
 #if defined(COMPAT_43)
 		td->td_sigstk.ss_flags |= SS_ONSTACK;
 #endif
 	} else
 		sp = (char *)regs->tf_rsp - 128;
 	if (xfpusave != NULL) {
 		sp -= xfpusave_len;
 		sp = (char *)((unsigned long)sp & ~0x3Ful);
 		sf.sf_uc.uc_mcontext.mc_xfpustate = (register_t)sp;
 	}
 	sp -= sizeof(struct sigframe);
 	/* Align to 16 bytes. */
 	sfp = (struct sigframe *)((unsigned long)sp & ~0xFul);
 
 	/* Translate the signal if appropriate. */
 	if (p->p_sysent->sv_sigtbl && sig <= p->p_sysent->sv_sigsize)
 		sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
 
 	/* Build the argument list for the signal handler. */
 	regs->tf_rdi = sig;			/* arg 1 in %rdi */
 	regs->tf_rdx = (register_t)&sfp->sf_uc;	/* arg 3 in %rdx */
 	bzero(&sf.sf_si, sizeof(sf.sf_si));
 	if (SIGISMEMBER(psp->ps_siginfo, sig)) {
 		/* Signal handler installed with SA_SIGINFO. */
 		regs->tf_rsi = (register_t)&sfp->sf_si;	/* arg 2 in %rsi */
 		sf.sf_ahu.sf_action = (__siginfohandler_t *)catcher;
 
 		/* Fill in POSIX parts */
 		sf.sf_si = ksi->ksi_info;
 		sf.sf_si.si_signo = sig; /* maybe a translated signal */
 		regs->tf_rcx = (register_t)ksi->ksi_addr; /* arg 4 in %rcx */
 	} else {
 		/* Old FreeBSD-style arguments. */
 		regs->tf_rsi = ksi->ksi_code;	/* arg 2 in %rsi */
 		regs->tf_rcx = (register_t)ksi->ksi_addr; /* arg 4 in %rcx */
 		sf.sf_ahu.sf_handler = catcher;
 	}
 	mtx_unlock(&psp->ps_mtx);
 	PROC_UNLOCK(p);
 
 	/*
 	 * Copy the sigframe out to the user's stack.
 	 */
 	if (copyout(&sf, sfp, sizeof(*sfp)) != 0 ||
 	    (xfpusave != NULL && copyout(xfpusave,
 	    (void *)sf.sf_uc.uc_mcontext.mc_xfpustate, xfpusave_len)
 	    != 0)) {
 #ifdef DEBUG
 		printf("process %ld has trashed its stack\n", (long)p->p_pid);
 #endif
 		PROC_LOCK(p);
 		sigexit(td, SIGILL);
 	}
 
 	regs->tf_rsp = (long)sfp;
 	regs->tf_rip = p->p_sysent->sv_sigcode_base;
 	regs->tf_rflags &= ~(PSL_T | PSL_D);
 	regs->tf_cs = _ucodesel;
 	regs->tf_ds = _udatasel;
 	regs->tf_es = _udatasel;
 	regs->tf_fs = _ufssel;
 	regs->tf_gs = _ugssel;
 	regs->tf_flags = TF_HASSEGS;
 	set_pcb_flags(pcb, PCB_FULL_IRET);
 	PROC_LOCK(p);
 	mtx_lock(&psp->ps_mtx);
 }
 
 /*
  * System call to cleanup state after a signal
  * has been taken.  Reset signal mask and
  * stack state from context left by sendsig (above).
  * Return to previous pc and psl as specified by
  * context left by sendsig. Check carefully to
  * make sure that the user has not modified the
  * state to gain improper privileges.
  *
  * MPSAFE
  */
 int
 sys_sigreturn(td, uap)
 	struct thread *td;
 	struct sigreturn_args /* {
 		const struct __ucontext *sigcntxp;
 	} */ *uap;
 {
 	ucontext_t uc;
 	struct pcb *pcb;
 	struct proc *p;
 	struct trapframe *regs;
 	ucontext_t *ucp;
 	char *xfpustate;
 	size_t xfpustate_len;
 	long rflags;
 	int cs, error, ret;
 	ksiginfo_t ksi;
 
 	pcb = td->td_pcb;
 	p = td->td_proc;
 
 	error = copyin(uap->sigcntxp, &uc, sizeof(uc));
 	if (error != 0) {
 		uprintf("pid %d (%s): sigreturn copyin failed\n",
 		    p->p_pid, td->td_name);
 		return (error);
 	}
 	ucp = &uc;
 	if ((ucp->uc_mcontext.mc_flags & ~_MC_FLAG_MASK) != 0) {
 		uprintf("pid %d (%s): sigreturn mc_flags %x\n", p->p_pid,
 		    td->td_name, ucp->uc_mcontext.mc_flags);
 		return (EINVAL);
 	}
 	regs = td->td_frame;
 	rflags = ucp->uc_mcontext.mc_rflags;
 	/*
 	 * Don't allow users to change privileged or reserved flags.
 	 */
 	/*
 	 * XXX do allow users to change the privileged flag PSL_RF.
 	 * The cpu sets PSL_RF in tf_rflags for faults.  Debuggers
 	 * should sometimes set it there too.  tf_rflags is kept in
 	 * the signal context during signal handling and there is no
 	 * other place to remember it, so the PSL_RF bit may be
 	 * corrupted by the signal handler without us knowing.
 	 * Corruption of the PSL_RF bit at worst causes one more or
 	 * one less debugger trap, so allowing it is fairly harmless.
 	 */
 	if (!EFL_SECURE(rflags & ~PSL_RF, regs->tf_rflags & ~PSL_RF)) {
 		uprintf("pid %d (%s): sigreturn rflags = 0x%lx\n", p->p_pid,
 		    td->td_name, rflags);
 		return (EINVAL);
 	}
 
 	/*
 	 * Don't allow users to load a valid privileged %cs.  Let the
 	 * hardware check for invalid selectors, excess privilege in
 	 * other selectors, invalid %eip's and invalid %esp's.
 	 */
 	cs = ucp->uc_mcontext.mc_cs;
 	if (!CS_SECURE(cs)) {
 		uprintf("pid %d (%s): sigreturn cs = 0x%x\n", p->p_pid,
 		    td->td_name, cs);
 		ksiginfo_init_trap(&ksi);
 		ksi.ksi_signo = SIGBUS;
 		ksi.ksi_code = BUS_OBJERR;
 		ksi.ksi_trapno = T_PROTFLT;
 		ksi.ksi_addr = (void *)regs->tf_rip;
 		trapsignal(td, &ksi);
 		return (EINVAL);
 	}
 
 	if ((uc.uc_mcontext.mc_flags & _MC_HASFPXSTATE) != 0) {
 		xfpustate_len = uc.uc_mcontext.mc_xfpustate_len;
 		if (xfpustate_len > cpu_max_ext_state_size -
 		    sizeof(struct savefpu)) {
 			uprintf("pid %d (%s): sigreturn xfpusave_len = 0x%zx\n",
 			    p->p_pid, td->td_name, xfpustate_len);
 			return (EINVAL);
 		}
 		xfpustate = __builtin_alloca(xfpustate_len);
 		error = copyin((const void *)uc.uc_mcontext.mc_xfpustate,
 		    xfpustate, xfpustate_len);
 		if (error != 0) {
 			uprintf(
 	"pid %d (%s): sigreturn copying xfpustate failed\n",
 			    p->p_pid, td->td_name);
 			return (error);
 		}
 	} else {
 		xfpustate = NULL;
 		xfpustate_len = 0;
 	}
 	ret = set_fpcontext(td, &ucp->uc_mcontext, xfpustate, xfpustate_len);
 	if (ret != 0) {
 		uprintf("pid %d (%s): sigreturn set_fpcontext err %d\n",
 		    p->p_pid, td->td_name, ret);
 		return (ret);
 	}
 	bcopy(&ucp->uc_mcontext.mc_rdi, regs, sizeof(*regs));
 	pcb->pcb_fsbase = ucp->uc_mcontext.mc_fsbase;
 	pcb->pcb_gsbase = ucp->uc_mcontext.mc_gsbase;
 
 #if defined(COMPAT_43)
 	if (ucp->uc_mcontext.mc_onstack & 1)
 		td->td_sigstk.ss_flags |= SS_ONSTACK;
 	else
 		td->td_sigstk.ss_flags &= ~SS_ONSTACK;
 #endif
 
 	kern_sigprocmask(td, SIG_SETMASK, &ucp->uc_sigmask, NULL, 0);
 	set_pcb_flags(pcb, PCB_FULL_IRET);
 	return (EJUSTRETURN);
 }
 
 #ifdef COMPAT_FREEBSD4
 int
 freebsd4_sigreturn(struct thread *td, struct freebsd4_sigreturn_args *uap)
 {
  
 	return sys_sigreturn(td, (struct sigreturn_args *)uap);
 }
 #endif
 
 
 /*
  * Machine dependent boot() routine
  *
  * I haven't seen anything to put here yet
  * Possibly some stuff might be grafted back here from boot()
  */
 void
 cpu_boot(int howto)
 {
 }
 
 /*
  * Flush the D-cache for non-DMA I/O so that the I-cache can
  * be made coherent later.
  */
 void
 cpu_flush_dcache(void *ptr, size_t len)
 {
 	/* Not applicable */
 }
 
 /* Get current clock frequency for the given cpu id. */
 int
 cpu_est_clockrate(int cpu_id, uint64_t *rate)
 {
 	uint64_t tsc1, tsc2;
 	uint64_t acnt, mcnt, perf;
 	register_t reg;
 
 	if (pcpu_find(cpu_id) == NULL || rate == NULL)
 		return (EINVAL);
 
 	/*
 	 * If TSC is P-state invariant and APERF/MPERF MSRs do not exist,
 	 * DELAY(9) based logic fails.
 	 */
 	if (tsc_is_invariant && !tsc_perf_stat)
 		return (EOPNOTSUPP);
 
 #ifdef SMP
 	if (smp_cpus > 1) {
 		/* Schedule ourselves on the indicated cpu. */
 		thread_lock(curthread);
 		sched_bind(curthread, cpu_id);
 		thread_unlock(curthread);
 	}
 #endif
 
 	/* Calibrate by measuring a short delay. */
 	reg = intr_disable();
 	if (tsc_is_invariant) {
 		wrmsr(MSR_MPERF, 0);
 		wrmsr(MSR_APERF, 0);
 		tsc1 = rdtsc();
 		DELAY(1000);
 		mcnt = rdmsr(MSR_MPERF);
 		acnt = rdmsr(MSR_APERF);
 		tsc2 = rdtsc();
 		intr_restore(reg);
 		perf = 1000 * acnt / mcnt;
 		*rate = (tsc2 - tsc1) * perf;
 	} else {
 		tsc1 = rdtsc();
 		DELAY(1000);
 		tsc2 = rdtsc();
 		intr_restore(reg);
 		*rate = (tsc2 - tsc1) * 1000;
 	}
 
 #ifdef SMP
 	if (smp_cpus > 1) {
 		thread_lock(curthread);
 		sched_unbind(curthread);
 		thread_unlock(curthread);
 	}
 #endif
 
 	return (0);
 }
 
 /*
  * Shutdown the CPU as much as possible
  */
 void
 cpu_halt(void)
 {
 	for (;;)
 		halt();
 }
 
 void (*cpu_idle_hook)(sbintime_t) = NULL;	/* ACPI idle hook. */
 static int	cpu_ident_amdc1e = 0;	/* AMD C1E supported. */
 static int	idle_mwait = 1;		/* Use MONITOR/MWAIT for short idle. */
 TUNABLE_INT("machdep.idle_mwait", &idle_mwait);
 SYSCTL_INT(_machdep, OID_AUTO, idle_mwait, CTLFLAG_RW, &idle_mwait,
     0, "Use MONITOR/MWAIT for short idle");
 
 #define	STATE_RUNNING	0x0
 #define	STATE_MWAIT	0x1
 #define	STATE_SLEEPING	0x2
 
 static void
 cpu_idle_acpi(sbintime_t sbt)
 {
 	int *state;
 
 	state = (int *)PCPU_PTR(monitorbuf);
 	*state = STATE_SLEEPING;
 
 	/* See comments in cpu_idle_hlt(). */
 	disable_intr();
 	if (sched_runnable())
 		enable_intr();
 	else if (cpu_idle_hook)
 		cpu_idle_hook(sbt);
 	else
 		__asm __volatile("sti; hlt");
 	*state = STATE_RUNNING;
 }
 
 static void
 cpu_idle_hlt(sbintime_t sbt)
 {
 	int *state;
 
 	state = (int *)PCPU_PTR(monitorbuf);
 	*state = STATE_SLEEPING;
 
 	/*
 	 * Since we may be in a critical section from cpu_idle(), if
 	 * an interrupt fires during that critical section we may have
 	 * a pending preemption.  If the CPU halts, then that thread
 	 * may not execute until a later interrupt awakens the CPU.
 	 * To handle this race, check for a runnable thread after
 	 * disabling interrupts and immediately return if one is
 	 * found.  Also, we must absolutely guarentee that hlt is
 	 * the next instruction after sti.  This ensures that any
 	 * interrupt that fires after the call to disable_intr() will
 	 * immediately awaken the CPU from hlt.  Finally, please note
 	 * that on x86 this works fine because of interrupts enabled only
 	 * after the instruction following sti takes place, while IF is set
 	 * to 1 immediately, allowing hlt instruction to acknowledge the
 	 * interrupt.
 	 */
 	disable_intr();
 	if (sched_runnable())
 		enable_intr();
 	else
 		__asm __volatile("sti; hlt");
 	*state = STATE_RUNNING;
 }
 
 /*
  * MWAIT cpu power states.  Lower 4 bits are sub-states.
  */
 #define	MWAIT_C0	0xf0
 #define	MWAIT_C1	0x00
 #define	MWAIT_C2	0x10
 #define	MWAIT_C3	0x20
 #define	MWAIT_C4	0x30
 
 static void
 cpu_idle_mwait(sbintime_t sbt)
 {
 	int *state;
 
 	state = (int *)PCPU_PTR(monitorbuf);
 	*state = STATE_MWAIT;
 
 	/* See comments in cpu_idle_hlt(). */
 	disable_intr();
 	if (sched_runnable()) {
 		enable_intr();
 		*state = STATE_RUNNING;
 		return;
 	}
 	cpu_monitor(state, 0, 0);
 	if (*state == STATE_MWAIT)
 		__asm __volatile("sti; mwait" : : "a" (MWAIT_C1), "c" (0));
 	else
 		enable_intr();
 	*state = STATE_RUNNING;
 }
 
 static void
 cpu_idle_spin(sbintime_t sbt)
 {
 	int *state;
 	int i;
 
 	state = (int *)PCPU_PTR(monitorbuf);
 	*state = STATE_RUNNING;
 
 	/*
 	 * The sched_runnable() call is racy but as long as there is
 	 * a loop missing it one time will have just a little impact if any
 	 * (and it is much better than missing the check at all).
 	 */
 	for (i = 0; i < 1000; i++) {
 		if (sched_runnable())
 			return;
 		cpu_spinwait();
 	}
 }
 
 /*
  * C1E renders the local APIC timer dead, so we disable it by
  * reading the Interrupt Pending Message register and clearing
  * both C1eOnCmpHalt (bit 28) and SmiOnCmpHalt (bit 27).
  * 
  * Reference:
  *   "BIOS and Kernel Developer's Guide for AMD NPT Family 0Fh Processors"
  *   #32559 revision 3.00+
  */
 #define	MSR_AMDK8_IPM		0xc0010055
 #define	AMDK8_SMIONCMPHALT	(1ULL << 27)
 #define	AMDK8_C1EONCMPHALT	(1ULL << 28)
 #define	AMDK8_CMPHALT		(AMDK8_SMIONCMPHALT | AMDK8_C1EONCMPHALT)
 
 static void
 cpu_probe_amdc1e(void)
 {
 
 	/*
 	 * Detect the presence of C1E capability mostly on latest
 	 * dual-cores (or future) k8 family.
 	 */
 	if (cpu_vendor_id == CPU_VENDOR_AMD &&
 	    (cpu_id & 0x00000f00) == 0x00000f00 &&
 	    (cpu_id & 0x0fff0000) >=  0x00040000) {
 		cpu_ident_amdc1e = 1;
 	}
 }
 
 void (*cpu_idle_fn)(sbintime_t) = cpu_idle_acpi;
 
 void
 cpu_idle(int busy)
 {
 	uint64_t msr;
 	sbintime_t sbt = -1;
 
 	CTR2(KTR_SPARE2, "cpu_idle(%d) at %d",
 	    busy, curcpu);
 #ifdef MP_WATCHDOG
 	ap_watchdog(PCPU_GET(cpuid));
 #endif
 	/* If we are busy - try to use fast methods. */
 	if (busy) {
 		if ((cpu_feature2 & CPUID2_MON) && idle_mwait) {
 			cpu_idle_mwait(busy);
 			goto out;
 		}
 	}
 
 	/* If we have time - switch timers into idle mode. */
 	if (!busy) {
 		critical_enter();
 		sbt = cpu_idleclock();
 	}
 
 	/* Apply AMD APIC timer C1E workaround. */
 	if (cpu_ident_amdc1e && cpu_disable_deep_sleep) {
 		msr = rdmsr(MSR_AMDK8_IPM);
 		if (msr & AMDK8_CMPHALT)
 			wrmsr(MSR_AMDK8_IPM, msr & ~AMDK8_CMPHALT);
 	}
 
 	/* Call main idle method. */
 	cpu_idle_fn(sbt);
 
 	/* Switch timers mack into active mode. */
 	if (!busy) {
 		cpu_activeclock();
 		critical_exit();
 	}
 out:
 	CTR2(KTR_SPARE2, "cpu_idle(%d) at %d done",
 	    busy, curcpu);
 }
 
 int
 cpu_idle_wakeup(int cpu)
 {
 	struct pcpu *pcpu;
 	int *state;
 
 	pcpu = pcpu_find(cpu);
 	state = (int *)pcpu->pc_monitorbuf;
 	/*
 	 * This doesn't need to be atomic since missing the race will
 	 * simply result in unnecessary IPIs.
 	 */
 	if (*state == STATE_SLEEPING)
 		return (0);
 	if (*state == STATE_MWAIT)
 		*state = STATE_RUNNING;
 	return (1);
 }
 
 /*
  * Ordered by speed/power consumption.
  */
 struct {
 	void	*id_fn;
 	char	*id_name;
 } idle_tbl[] = {
 	{ cpu_idle_spin, "spin" },
 	{ cpu_idle_mwait, "mwait" },
 	{ cpu_idle_hlt, "hlt" },
 	{ cpu_idle_acpi, "acpi" },
 	{ NULL, NULL }
 };
 
 static int
 idle_sysctl_available(SYSCTL_HANDLER_ARGS)
 {
 	char *avail, *p;
 	int error;
 	int i;
 
 	avail = malloc(256, M_TEMP, M_WAITOK);
 	p = avail;
 	for (i = 0; idle_tbl[i].id_name != NULL; i++) {
 		if (strstr(idle_tbl[i].id_name, "mwait") &&
 		    (cpu_feature2 & CPUID2_MON) == 0)
 			continue;
 		if (strcmp(idle_tbl[i].id_name, "acpi") == 0 &&
 		    cpu_idle_hook == NULL)
 			continue;
 		p += sprintf(p, "%s%s", p != avail ? ", " : "",
 		    idle_tbl[i].id_name);
 	}
 	error = sysctl_handle_string(oidp, avail, 0, req);
 	free(avail, M_TEMP);
 	return (error);
 }
 
 SYSCTL_PROC(_machdep, OID_AUTO, idle_available, CTLTYPE_STRING | CTLFLAG_RD,
     0, 0, idle_sysctl_available, "A", "list of available idle functions");
 
 static int
 idle_sysctl(SYSCTL_HANDLER_ARGS)
 {
 	char buf[16];
 	int error;
 	char *p;
 	int i;
 
 	p = "unknown";
 	for (i = 0; idle_tbl[i].id_name != NULL; i++) {
 		if (idle_tbl[i].id_fn == cpu_idle_fn) {
 			p = idle_tbl[i].id_name;
 			break;
 		}
 	}
 	strncpy(buf, p, sizeof(buf));
 	error = sysctl_handle_string(oidp, buf, sizeof(buf), req);
 	if (error != 0 || req->newptr == NULL)
 		return (error);
 	for (i = 0; idle_tbl[i].id_name != NULL; i++) {
 		if (strstr(idle_tbl[i].id_name, "mwait") &&
 		    (cpu_feature2 & CPUID2_MON) == 0)
 			continue;
 		if (strcmp(idle_tbl[i].id_name, "acpi") == 0 &&
 		    cpu_idle_hook == NULL)
 			continue;
 		if (strcmp(idle_tbl[i].id_name, buf))
 			continue;
 		cpu_idle_fn = idle_tbl[i].id_fn;
 		return (0);
 	}
 	return (EINVAL);
 }
 
 SYSCTL_PROC(_machdep, OID_AUTO, idle, CTLTYPE_STRING | CTLFLAG_RW, 0, 0,
     idle_sysctl, "A", "currently selected idle function");
 
 /*
  * Reset registers to default values on exec.
  */
 void
 exec_setregs(struct thread *td, struct image_params *imgp, u_long stack)
 {
 	struct trapframe *regs = td->td_frame;
 	struct pcb *pcb = td->td_pcb;
 
 	mtx_lock(&dt_lock);
 	if (td->td_proc->p_md.md_ldt != NULL)
 		user_ldt_free(td);
 	else
 		mtx_unlock(&dt_lock);
 	
 	pcb->pcb_fsbase = 0;
 	pcb->pcb_gsbase = 0;
 	clear_pcb_flags(pcb, PCB_32BIT);
 	pcb->pcb_initial_fpucw = __INITIAL_FPUCW__;
 	set_pcb_flags(pcb, PCB_FULL_IRET);
 
 	bzero((char *)regs, sizeof(struct trapframe));
 	regs->tf_rip = imgp->entry_addr;
 	regs->tf_rsp = ((stack - 8) & ~0xFul) + 8;
 	regs->tf_rdi = stack;		/* argv */
 	regs->tf_rflags = PSL_USER | (regs->tf_rflags & PSL_T);
 	regs->tf_ss = _udatasel;
 	regs->tf_cs = _ucodesel;
 	regs->tf_ds = _udatasel;
 	regs->tf_es = _udatasel;
 	regs->tf_fs = _ufssel;
 	regs->tf_gs = _ugssel;
 	regs->tf_flags = TF_HASSEGS;
 	td->td_retval[1] = 0;
 
 	/*
 	 * Reset the hardware debug registers if they were in use.
 	 * They won't have any meaning for the newly exec'd process.
 	 */
 	if (pcb->pcb_flags & PCB_DBREGS) {
 		pcb->pcb_dr0 = 0;
 		pcb->pcb_dr1 = 0;
 		pcb->pcb_dr2 = 0;
 		pcb->pcb_dr3 = 0;
 		pcb->pcb_dr6 = 0;
 		pcb->pcb_dr7 = 0;
 		if (pcb == curpcb) {
 			/*
 			 * Clear the debug registers on the running
 			 * CPU, otherwise they will end up affecting
 			 * the next process we switch to.
 			 */
 			reset_dbregs();
 		}
 		clear_pcb_flags(pcb, PCB_DBREGS);
 	}
 
 	/*
 	 * Drop the FP state if we hold it, so that the process gets a
 	 * clean FP state if it uses the FPU again.
 	 */
 	fpstate_drop(td);
 }
 
 void
 cpu_setregs(void)
 {
 	register_t cr0;
 
 	cr0 = rcr0();
 	/*
 	 * CR0_MP, CR0_NE and CR0_TS are also set by npx_probe() for the
 	 * BSP.  See the comments there about why we set them.
 	 */
 	cr0 |= CR0_MP | CR0_NE | CR0_TS | CR0_WP | CR0_AM;
 	load_cr0(cr0);
 }
 
 /*
  * Initialize amd64 and configure to run kernel
  */
 
 /*
  * Initialize segments & interrupt table
  */
 
 struct user_segment_descriptor gdt[NGDT * MAXCPU];/* global descriptor tables */
 static struct gate_descriptor idt0[NIDT];
 struct gate_descriptor *idt = &idt0[0];	/* interrupt descriptor table */
 
 static char dblfault_stack[PAGE_SIZE] __aligned(16);
 
 static char nmi0_stack[PAGE_SIZE] __aligned(16);
 CTASSERT(sizeof(struct nmi_pcpu) == 16);
 
 struct amd64tss common_tss[MAXCPU];
 
 /*
  * Software prototypes -- in more palatable form.
  *
  * Keep GUFS32, GUGS32, GUCODE32 and GUDATA at the same
  * slots as corresponding segments for i386 kernel.
  */
 struct soft_segment_descriptor gdt_segs[] = {
 /* GNULL_SEL	0 Null Descriptor */
 {	.ssd_base = 0x0,
 	.ssd_limit = 0x0,
 	.ssd_type = 0,
 	.ssd_dpl = 0,
 	.ssd_p = 0,
 	.ssd_long = 0,
 	.ssd_def32 = 0,
 	.ssd_gran = 0		},
 /* GNULL2_SEL	1 Null Descriptor */
 {	.ssd_base = 0x0,
 	.ssd_limit = 0x0,
 	.ssd_type = 0,
 	.ssd_dpl = 0,
 	.ssd_p = 0,
 	.ssd_long = 0,
 	.ssd_def32 = 0,
 	.ssd_gran = 0		},
 /* GUFS32_SEL	2 32 bit %gs Descriptor for user */
 {	.ssd_base = 0x0,
 	.ssd_limit = 0xfffff,
 	.ssd_type = SDT_MEMRWA,
 	.ssd_dpl = SEL_UPL,
 	.ssd_p = 1,
 	.ssd_long = 0,
 	.ssd_def32 = 1,
 	.ssd_gran = 1		},
 /* GUGS32_SEL	3 32 bit %fs Descriptor for user */
 {	.ssd_base = 0x0,
 	.ssd_limit = 0xfffff,
 	.ssd_type = SDT_MEMRWA,
 	.ssd_dpl = SEL_UPL,
 	.ssd_p = 1,
 	.ssd_long = 0,
 	.ssd_def32 = 1,
 	.ssd_gran = 1		},
 /* GCODE_SEL	4 Code Descriptor for kernel */
 {	.ssd_base = 0x0,
 	.ssd_limit = 0xfffff,
 	.ssd_type = SDT_MEMERA,
 	.ssd_dpl = SEL_KPL,
 	.ssd_p = 1,
 	.ssd_long = 1,
 	.ssd_def32 = 0,
 	.ssd_gran = 1		},
 /* GDATA_SEL	5 Data Descriptor for kernel */
 {	.ssd_base = 0x0,
 	.ssd_limit = 0xfffff,
 	.ssd_type = SDT_MEMRWA,
 	.ssd_dpl = SEL_KPL,
 	.ssd_p = 1,
 	.ssd_long = 1,
 	.ssd_def32 = 0,
 	.ssd_gran = 1		},
 /* GUCODE32_SEL	6 32 bit Code Descriptor for user */
 {	.ssd_base = 0x0,
 	.ssd_limit = 0xfffff,
 	.ssd_type = SDT_MEMERA,
 	.ssd_dpl = SEL_UPL,
 	.ssd_p = 1,
 	.ssd_long = 0,
 	.ssd_def32 = 1,
 	.ssd_gran = 1		},
 /* GUDATA_SEL	7 32/64 bit Data Descriptor for user */
 {	.ssd_base = 0x0,
 	.ssd_limit = 0xfffff,
 	.ssd_type = SDT_MEMRWA,
 	.ssd_dpl = SEL_UPL,
 	.ssd_p = 1,
 	.ssd_long = 0,
 	.ssd_def32 = 1,
 	.ssd_gran = 1		},
 /* GUCODE_SEL	8 64 bit Code Descriptor for user */
 {	.ssd_base = 0x0,
 	.ssd_limit = 0xfffff,
 	.ssd_type = SDT_MEMERA,
 	.ssd_dpl = SEL_UPL,
 	.ssd_p = 1,
 	.ssd_long = 1,
 	.ssd_def32 = 0,
 	.ssd_gran = 1		},
 /* GPROC0_SEL	9 Proc 0 Tss Descriptor */
 {	.ssd_base = 0x0,
 	.ssd_limit = sizeof(struct amd64tss) + IOPAGES * PAGE_SIZE - 1,
 	.ssd_type = SDT_SYSTSS,
 	.ssd_dpl = SEL_KPL,
 	.ssd_p = 1,
 	.ssd_long = 0,
 	.ssd_def32 = 0,
 	.ssd_gran = 0		},
 /* Actually, the TSS is a system descriptor which is double size */
 {	.ssd_base = 0x0,
 	.ssd_limit = 0x0,
 	.ssd_type = 0,
 	.ssd_dpl = 0,
 	.ssd_p = 0,
 	.ssd_long = 0,
 	.ssd_def32 = 0,
 	.ssd_gran = 0		},
 /* GUSERLDT_SEL	11 LDT Descriptor */
 {	.ssd_base = 0x0,
 	.ssd_limit = 0x0,
 	.ssd_type = 0,
 	.ssd_dpl = 0,
 	.ssd_p = 0,
 	.ssd_long = 0,
 	.ssd_def32 = 0,
 	.ssd_gran = 0		},
 /* GUSERLDT_SEL	12 LDT Descriptor, double size */
 {	.ssd_base = 0x0,
 	.ssd_limit = 0x0,
 	.ssd_type = 0,
 	.ssd_dpl = 0,
 	.ssd_p = 0,
 	.ssd_long = 0,
 	.ssd_def32 = 0,
 	.ssd_gran = 0		},
 };
 
 void
 setidt(idx, func, typ, dpl, ist)
 	int idx;
 	inthand_t *func;
 	int typ;
 	int dpl;
 	int ist;
 {
 	struct gate_descriptor *ip;
 
 	ip = idt + idx;
 	ip->gd_looffset = (uintptr_t)func;
 	ip->gd_selector = GSEL(GCODE_SEL, SEL_KPL);
 	ip->gd_ist = ist;
 	ip->gd_xx = 0;
 	ip->gd_type = typ;
 	ip->gd_dpl = dpl;
 	ip->gd_p = 1;
 	ip->gd_hioffset = ((uintptr_t)func)>>16 ;
 }
 
 extern inthand_t
 	IDTVEC(div), IDTVEC(dbg), IDTVEC(nmi), IDTVEC(bpt), IDTVEC(ofl),
 	IDTVEC(bnd), IDTVEC(ill), IDTVEC(dna), IDTVEC(fpusegm),
 	IDTVEC(tss), IDTVEC(missing), IDTVEC(stk), IDTVEC(prot),
 	IDTVEC(page), IDTVEC(mchk), IDTVEC(rsvd), IDTVEC(fpu), IDTVEC(align),
 	IDTVEC(xmm), IDTVEC(dblfault),
 #ifdef KDTRACE_HOOKS
 	IDTVEC(dtrace_ret),
+#endif
+#ifdef XENHVM
+	IDTVEC(xen_intr_upcall),
 #endif
 	IDTVEC(fast_syscall), IDTVEC(fast_syscall32);
 
 #ifdef DDB
 /*
  * Display the index and function name of any IDT entries that don't use
  * the default 'rsvd' entry point.
  */
 DB_SHOW_COMMAND(idt, db_show_idt)
 {
 	struct gate_descriptor *ip;
 	int idx;
 	uintptr_t func;
 
 	ip = idt;
 	for (idx = 0; idx < NIDT && !db_pager_quit; idx++) {
 		func = ((long)ip->gd_hioffset << 16 | ip->gd_looffset);
 		if (func != (uintptr_t)&IDTVEC(rsvd)) {
 			db_printf("%3d\t", idx);
 			db_printsym(func, DB_STGY_PROC);
 			db_printf("\n");
 		}
 		ip++;
 	}
 }
 
 /* Show privileged registers. */
 DB_SHOW_COMMAND(sysregs, db_show_sysregs)
 {
 	struct {
 		uint16_t limit;
 		uint64_t base;
 	} __packed idtr, gdtr;
 	uint16_t ldt, tr;
 
 	__asm __volatile("sidt %0" : "=m" (idtr));
 	db_printf("idtr\t0x%016lx/%04x\n",
 	    (u_long)idtr.base, (u_int)idtr.limit);
 	__asm __volatile("sgdt %0" : "=m" (gdtr));
 	db_printf("gdtr\t0x%016lx/%04x\n",
 	    (u_long)gdtr.base, (u_int)gdtr.limit);
 	__asm __volatile("sldt %0" : "=r" (ldt));
 	db_printf("ldtr\t0x%04x\n", ldt);
 	__asm __volatile("str %0" : "=r" (tr));
 	db_printf("tr\t0x%04x\n", tr);
 	db_printf("cr0\t0x%016lx\n", rcr0());
 	db_printf("cr2\t0x%016lx\n", rcr2());
 	db_printf("cr3\t0x%016lx\n", rcr3());
 	db_printf("cr4\t0x%016lx\n", rcr4());
 	db_printf("EFER\t%016lx\n", rdmsr(MSR_EFER));
 	db_printf("FEATURES_CTL\t%016lx\n", rdmsr(MSR_IA32_FEATURE_CONTROL));
 	db_printf("DEBUG_CTL\t%016lx\n", rdmsr(MSR_DEBUGCTLMSR));
 	db_printf("PAT\t%016lx\n", rdmsr(MSR_PAT));
 	db_printf("GSBASE\t%016lx\n", rdmsr(MSR_GSBASE));
 }
 #endif
 
 void
 sdtossd(sd, ssd)
 	struct user_segment_descriptor *sd;
 	struct soft_segment_descriptor *ssd;
 {
 
 	ssd->ssd_base  = (sd->sd_hibase << 24) | sd->sd_lobase;
 	ssd->ssd_limit = (sd->sd_hilimit << 16) | sd->sd_lolimit;
 	ssd->ssd_type  = sd->sd_type;
 	ssd->ssd_dpl   = sd->sd_dpl;
 	ssd->ssd_p     = sd->sd_p;
 	ssd->ssd_long  = sd->sd_long;
 	ssd->ssd_def32 = sd->sd_def32;
 	ssd->ssd_gran  = sd->sd_gran;
 }
 
 void
 ssdtosd(ssd, sd)
 	struct soft_segment_descriptor *ssd;
 	struct user_segment_descriptor *sd;
 {
 
 	sd->sd_lobase = (ssd->ssd_base) & 0xffffff;
 	sd->sd_hibase = (ssd->ssd_base >> 24) & 0xff;
 	sd->sd_lolimit = (ssd->ssd_limit) & 0xffff;
 	sd->sd_hilimit = (ssd->ssd_limit >> 16) & 0xf;
 	sd->sd_type  = ssd->ssd_type;
 	sd->sd_dpl   = ssd->ssd_dpl;
 	sd->sd_p     = ssd->ssd_p;
 	sd->sd_long  = ssd->ssd_long;
 	sd->sd_def32 = ssd->ssd_def32;
 	sd->sd_gran  = ssd->ssd_gran;
 }
 
 void
 ssdtosyssd(ssd, sd)
 	struct soft_segment_descriptor *ssd;
 	struct system_segment_descriptor *sd;
 {
 
 	sd->sd_lobase = (ssd->ssd_base) & 0xffffff;
 	sd->sd_hibase = (ssd->ssd_base >> 24) & 0xfffffffffful;
 	sd->sd_lolimit = (ssd->ssd_limit) & 0xffff;
 	sd->sd_hilimit = (ssd->ssd_limit >> 16) & 0xf;
 	sd->sd_type  = ssd->ssd_type;
 	sd->sd_dpl   = ssd->ssd_dpl;
 	sd->sd_p     = ssd->ssd_p;
 	sd->sd_gran  = ssd->ssd_gran;
 }
 
 #if !defined(DEV_ATPIC) && defined(DEV_ISA)
 #include <isa/isavar.h>
 #include <isa/isareg.h>
 /*
  * Return a bitmap of the current interrupt requests.  This is 8259-specific
  * and is only suitable for use at probe time.
  * This is only here to pacify sio.  It is NOT FATAL if this doesn't work.
  * It shouldn't be here.  There should probably be an APIC centric
  * implementation in the apic driver code, if at all.
  */
 intrmask_t
 isa_irq_pending(void)
 {
 	u_char irr1;
 	u_char irr2;
 
 	irr1 = inb(IO_ICU1);
 	irr2 = inb(IO_ICU2);
 	return ((irr2 << 8) | irr1);
 }
 #endif
 
 u_int basemem;
 
 static int
 add_smap_entry(struct bios_smap *smap, vm_paddr_t *physmap, int *physmap_idxp)
 {
 	int i, insert_idx, physmap_idx;
 
 	physmap_idx = *physmap_idxp;
 
 	if (boothowto & RB_VERBOSE)
 		printf("SMAP type=%02x base=%016lx len=%016lx\n",
 		    smap->type, smap->base, smap->length);
 
 	if (smap->type != SMAP_TYPE_MEMORY)
 		return (1);
 
 	if (smap->length == 0)
 		return (0);
 
 	/*
 	 * Find insertion point while checking for overlap.  Start off by
 	 * assuming the new entry will be added to the end.
 	 */
 	insert_idx = physmap_idx + 2;
 	for (i = 0; i <= physmap_idx; i += 2) {
 		if (smap->base < physmap[i + 1]) {
 			if (smap->base + smap->length <= physmap[i]) {
 				insert_idx = i;
 				break;
 			}
 			if (boothowto & RB_VERBOSE)
 				printf(
 		    "Overlapping memory regions, ignoring second region\n");
 			return (1);
 		}
 	}
 
 	/* See if we can prepend to the next entry. */
 	if (insert_idx <= physmap_idx &&
 	    smap->base + smap->length == physmap[insert_idx]) {
 		physmap[insert_idx] = smap->base;
 		return (1);
 	}
 
 	/* See if we can append to the previous entry. */
 	if (insert_idx > 0 && smap->base == physmap[insert_idx - 1]) {
 		physmap[insert_idx - 1] += smap->length;
 		return (1);
 	}
 
 	physmap_idx += 2;
 	*physmap_idxp = physmap_idx;
 	if (physmap_idx == PHYSMAP_SIZE) {
 		printf(
 		"Too many segments in the physical address map, giving up\n");
 		return (0);
 	}
 
 	/*
 	 * Move the last 'N' entries down to make room for the new
 	 * entry if needed.
 	 */
 	for (i = physmap_idx; i > insert_idx; i -= 2) {
 		physmap[i] = physmap[i - 2];
 		physmap[i + 1] = physmap[i - 1];
 	}
 
 	/* Insert the new entry. */
 	physmap[insert_idx] = smap->base;
 	physmap[insert_idx + 1] = smap->base + smap->length;
 	return (1);
 }
 
 /*
  * Populate the (physmap) array with base/bound pairs describing the
  * available physical memory in the system, then test this memory and
  * build the phys_avail array describing the actually-available memory.
  *
  * Total memory size may be set by the kernel environment variable
  * hw.physmem or the compile-time define MAXMEM.
  *
  * XXX first should be vm_paddr_t.
  */
 static void
 getmemsize(caddr_t kmdp, u_int64_t first)
 {
 	int i, physmap_idx, pa_indx, da_indx;
 	vm_paddr_t pa, physmap[PHYSMAP_SIZE];
 	u_long physmem_start, physmem_tunable, memtest;
 	pt_entry_t *pte;
 	struct bios_smap *smapbase, *smap, *smapend;
 	u_int32_t smapsize;
 	quad_t dcons_addr, dcons_size;
 
 	bzero(physmap, sizeof(physmap));
 	basemem = 0;
 	physmap_idx = 0;
 
 	/*
 	 * get memory map from INT 15:E820, kindly supplied by the loader.
 	 *
 	 * subr_module.c says:
 	 * "Consumer may safely assume that size value precedes data."
 	 * ie: an int32_t immediately precedes smap.
 	 */
 	smapbase = (struct bios_smap *)preload_search_info(kmdp,
 	    MODINFO_METADATA | MODINFOMD_SMAP);
 	if (smapbase == NULL)
 		panic("No BIOS smap info from loader!");
 
 	smapsize = *((u_int32_t *)smapbase - 1);
 	smapend = (struct bios_smap *)((uintptr_t)smapbase + smapsize);
 
 	for (smap = smapbase; smap < smapend; smap++)
 		if (!add_smap_entry(smap, physmap, &physmap_idx))
 			break;
 
 	/*
 	 * Find the 'base memory' segment for SMP
 	 */
 	basemem = 0;
 	for (i = 0; i <= physmap_idx; i += 2) {
 		if (physmap[i] == 0x00000000) {
 			basemem = physmap[i + 1] / 1024;
 			break;
 		}
 	}
 	if (basemem == 0)
 		panic("BIOS smap did not include a basemem segment!");
 
 #ifdef SMP
 	/* make hole for AP bootstrap code */
 	physmap[1] = mp_bootaddress(physmap[1] / 1024);
 #endif
 
 	/*
 	 * Maxmem isn't the "maximum memory", it's one larger than the
 	 * highest page of the physical address space.  It should be
 	 * called something like "Maxphyspage".  We may adjust this
 	 * based on ``hw.physmem'' and the results of the memory test.
 	 */
 	Maxmem = atop(physmap[physmap_idx + 1]);
 
 #ifdef MAXMEM
 	Maxmem = MAXMEM / 4;
 #endif
 
 	if (TUNABLE_ULONG_FETCH("hw.physmem", &physmem_tunable))
 		Maxmem = atop(physmem_tunable);
 
 	/*
 	 * By default enable the memory test on real hardware, and disable
 	 * it if we appear to be running in a VM.  This avoids touching all
 	 * pages unnecessarily, which doesn't matter on real hardware but is
 	 * bad for shared VM hosts.  Use a general name so that
 	 * one could eventually do more with the code than just disable it.
 	 */
 	memtest = (vm_guest > VM_GUEST_NO) ? 0 : 1;
 	TUNABLE_ULONG_FETCH("hw.memtest.tests", &memtest);
 
 	/*
 	 * Don't allow MAXMEM or hw.physmem to extend the amount of memory
 	 * in the system.
 	 */
 	if (Maxmem > atop(physmap[physmap_idx + 1]))
 		Maxmem = atop(physmap[physmap_idx + 1]);
 
 	if (atop(physmap[physmap_idx + 1]) != Maxmem &&
 	    (boothowto & RB_VERBOSE))
 		printf("Physical memory use set to %ldK\n", Maxmem * 4);
 
 	/* call pmap initialization to make new kernel address space */
 	pmap_bootstrap(&first);
 
 	/*
 	 * Size up each available chunk of physical memory.
 	 *
 	 * XXX Some BIOSes corrupt low 64KB between suspend and resume.
 	 * By default, mask off the first 16 pages unless we appear to be
 	 * running in a VM.
 	 */
 	physmem_start = (vm_guest > VM_GUEST_NO ? 1 : 16) << PAGE_SHIFT;
 	TUNABLE_ULONG_FETCH("hw.physmem.start", &physmem_start);
 	if (physmem_start < PAGE_SIZE)
 		physmap[0] = PAGE_SIZE;
 	else if (physmem_start >= physmap[1])
 		physmap[0] = round_page(physmap[1] - PAGE_SIZE);
 	else
 		physmap[0] = round_page(physmem_start);
 	pa_indx = 0;
 	da_indx = 1;
 	phys_avail[pa_indx++] = physmap[0];
 	phys_avail[pa_indx] = physmap[0];
 	dump_avail[da_indx] = physmap[0];
 	pte = CMAP1;
 
 	/*
 	 * Get dcons buffer address
 	 */
 	if (getenv_quad("dcons.addr", &dcons_addr) == 0 ||
 	    getenv_quad("dcons.size", &dcons_size) == 0)
 		dcons_addr = 0;
 
 	/*
 	 * physmap is in bytes, so when converting to page boundaries,
 	 * round up the start address and round down the end address.
 	 */
 	for (i = 0; i <= physmap_idx; i += 2) {
 		vm_paddr_t end;
 
 		end = ptoa((vm_paddr_t)Maxmem);
 		if (physmap[i + 1] < end)
 			end = trunc_page(physmap[i + 1]);
 		for (pa = round_page(physmap[i]); pa < end; pa += PAGE_SIZE) {
 			int tmp, page_bad, full;
 			int *ptr = (int *)CADDR1;
 
 			full = FALSE;
 			/*
 			 * block out kernel memory as not available.
 			 */
 			if (pa >= (vm_paddr_t)kernphys && pa < first)
 				goto do_dump_avail;
 
 			/*
 			 * block out dcons buffer
 			 */
 			if (dcons_addr > 0
 			    && pa >= trunc_page(dcons_addr)
 			    && pa < dcons_addr + dcons_size)
 				goto do_dump_avail;
 
 			page_bad = FALSE;
 			if (memtest == 0)
 				goto skip_memtest;
 
 			/*
 			 * map page into kernel: valid, read/write,non-cacheable
 			 */
 			*pte = pa | PG_V | PG_RW | PG_N;
 			invltlb();
 
 			tmp = *(int *)ptr;
 			/*
 			 * Test for alternating 1's and 0's
 			 */
 			*(volatile int *)ptr = 0xaaaaaaaa;
 			if (*(volatile int *)ptr != 0xaaaaaaaa)
 				page_bad = TRUE;
 			/*
 			 * Test for alternating 0's and 1's
 			 */
 			*(volatile int *)ptr = 0x55555555;
 			if (*(volatile int *)ptr != 0x55555555)
 				page_bad = TRUE;
 			/*
 			 * Test for all 1's
 			 */
 			*(volatile int *)ptr = 0xffffffff;
 			if (*(volatile int *)ptr != 0xffffffff)
 				page_bad = TRUE;
 			/*
 			 * Test for all 0's
 			 */
 			*(volatile int *)ptr = 0x0;
 			if (*(volatile int *)ptr != 0x0)
 				page_bad = TRUE;
 			/*
 			 * Restore original value.
 			 */
 			*(int *)ptr = tmp;
 
 skip_memtest:
 			/*
 			 * Adjust array of valid/good pages.
 			 */
 			if (page_bad == TRUE)
 				continue;
 			/*
 			 * If this good page is a continuation of the
 			 * previous set of good pages, then just increase
 			 * the end pointer. Otherwise start a new chunk.
 			 * Note that "end" points one higher than end,
 			 * making the range >= start and < end.
 			 * If we're also doing a speculative memory
 			 * test and we at or past the end, bump up Maxmem
 			 * so that we keep going. The first bad page
 			 * will terminate the loop.
 			 */
 			if (phys_avail[pa_indx] == pa) {
 				phys_avail[pa_indx] += PAGE_SIZE;
 			} else {
 				pa_indx++;
 				if (pa_indx == PHYS_AVAIL_ARRAY_END) {
 					printf(
 		"Too many holes in the physical address space, giving up\n");
 					pa_indx--;
 					full = TRUE;
 					goto do_dump_avail;
 				}
 				phys_avail[pa_indx++] = pa;	/* start */
 				phys_avail[pa_indx] = pa + PAGE_SIZE; /* end */
 			}
 			physmem++;
 do_dump_avail:
 			if (dump_avail[da_indx] == pa) {
 				dump_avail[da_indx] += PAGE_SIZE;
 			} else {
 				da_indx++;
 				if (da_indx == DUMP_AVAIL_ARRAY_END) {
 					da_indx--;
 					goto do_next;
 				}
 				dump_avail[da_indx++] = pa; /* start */
 				dump_avail[da_indx] = pa + PAGE_SIZE; /* end */
 			}
 do_next:
 			if (full)
 				break;
 		}
 	}
 	*pte = 0;
 	invltlb();
 
 	/*
 	 * XXX
 	 * The last chunk must contain at least one page plus the message
 	 * buffer to avoid complicating other code (message buffer address
 	 * calculation, etc.).
 	 */
 	while (phys_avail[pa_indx - 1] + PAGE_SIZE +
 	    round_page(msgbufsize) >= phys_avail[pa_indx]) {
 		physmem -= atop(phys_avail[pa_indx] - phys_avail[pa_indx - 1]);
 		phys_avail[pa_indx--] = 0;
 		phys_avail[pa_indx--] = 0;
 	}
 
 	Maxmem = atop(phys_avail[pa_indx]);
 
 	/* Trim off space for the message buffer. */
 	phys_avail[pa_indx] -= round_page(msgbufsize);
 
 	/* Map the message buffer. */
 	msgbufp = (struct msgbuf *)PHYS_TO_DMAP(phys_avail[pa_indx]);
 }
 
 u_int64_t
 hammer_time(u_int64_t modulep, u_int64_t physfree)
 {
 	caddr_t kmdp;
 	int gsel_tss, x;
 	struct pcpu *pc;
 	struct nmi_pcpu *np;
 	struct xstate_hdr *xhdr;
 	u_int64_t msr;
 	char *env;
 	size_t kstack0_sz;
 
 	thread0.td_kstack = physfree + KERNBASE;
 	thread0.td_kstack_pages = KSTACK_PAGES;
 	kstack0_sz = thread0.td_kstack_pages * PAGE_SIZE;
 	bzero((void *)thread0.td_kstack, kstack0_sz);
 	physfree += kstack0_sz;
 
 	/*
  	 * This may be done better later if it gets more high level
  	 * components in it. If so just link td->td_proc here.
 	 */
 	proc_linkup0(&proc0, &thread0);
 
 	preload_metadata = (caddr_t)(uintptr_t)(modulep + KERNBASE);
 	preload_bootstrap_relocate(KERNBASE);
 	kmdp = preload_search_by_type("elf kernel");
 	if (kmdp == NULL)
 		kmdp = preload_search_by_type("elf64 kernel");
 	boothowto = MD_FETCH(kmdp, MODINFOMD_HOWTO, int);
 	kern_envp = MD_FETCH(kmdp, MODINFOMD_ENVP, char *) + KERNBASE;
 #ifdef DDB
 	ksym_start = MD_FETCH(kmdp, MODINFOMD_SSYM, uintptr_t);
 	ksym_end = MD_FETCH(kmdp, MODINFOMD_ESYM, uintptr_t);
 #endif
 
 	/* Init basic tunables, hz etc */
 	init_param1();
 
 	/*
 	 * make gdt memory segments
 	 */
 	for (x = 0; x < NGDT; x++) {
 		if (x != GPROC0_SEL && x != (GPROC0_SEL + 1) &&
 		    x != GUSERLDT_SEL && x != (GUSERLDT_SEL) + 1)
 			ssdtosd(&gdt_segs[x], &gdt[x]);
 	}
 	gdt_segs[GPROC0_SEL].ssd_base = (uintptr_t)&common_tss[0];
 	ssdtosyssd(&gdt_segs[GPROC0_SEL],
 	    (struct system_segment_descriptor *)&gdt[GPROC0_SEL]);
 
 	r_gdt.rd_limit = NGDT * sizeof(gdt[0]) - 1;
 	r_gdt.rd_base =  (long) gdt;
 	lgdt(&r_gdt);
 	pc = &__pcpu[0];
 
 	wrmsr(MSR_FSBASE, 0);		/* User value */
 	wrmsr(MSR_GSBASE, (u_int64_t)pc);
 	wrmsr(MSR_KGSBASE, 0);		/* User value while in the kernel */
 
 	pcpu_init(pc, 0, sizeof(struct pcpu));
 	dpcpu_init((void *)(physfree + KERNBASE), 0);
 	physfree += DPCPU_SIZE;
 	PCPU_SET(prvspace, pc);
 	PCPU_SET(curthread, &thread0);
 	PCPU_SET(tssp, &common_tss[0]);
 	PCPU_SET(commontssp, &common_tss[0]);
 	PCPU_SET(tss, (struct system_segment_descriptor *)&gdt[GPROC0_SEL]);
 	PCPU_SET(ldt, (struct system_segment_descriptor *)&gdt[GUSERLDT_SEL]);
 	PCPU_SET(fs32p, &gdt[GUFS32_SEL]);
 	PCPU_SET(gs32p, &gdt[GUGS32_SEL]);
 
 	/*
 	 * Initialize mutexes.
 	 *
 	 * icu_lock: in order to allow an interrupt to occur in a critical
 	 * 	     section, to set pcpu->ipending (etc...) properly, we
 	 *	     must be able to get the icu lock, so it can't be
 	 *	     under witness.
 	 */
 	mutex_init();
 	mtx_init(&icu_lock, "icu", NULL, MTX_SPIN | MTX_NOWITNESS);
 	mtx_init(&dt_lock, "descriptor tables", NULL, MTX_DEF);
 
 	/* exceptions */
 	for (x = 0; x < NIDT; x++)
 		setidt(x, &IDTVEC(rsvd), SDT_SYSIGT, SEL_KPL, 0);
 	setidt(IDT_DE, &IDTVEC(div),  SDT_SYSIGT, SEL_KPL, 0);
 	setidt(IDT_DB, &IDTVEC(dbg),  SDT_SYSIGT, SEL_KPL, 0);
 	setidt(IDT_NMI, &IDTVEC(nmi),  SDT_SYSIGT, SEL_KPL, 2);
  	setidt(IDT_BP, &IDTVEC(bpt),  SDT_SYSIGT, SEL_UPL, 0);
 	setidt(IDT_OF, &IDTVEC(ofl),  SDT_SYSIGT, SEL_KPL, 0);
 	setidt(IDT_BR, &IDTVEC(bnd),  SDT_SYSIGT, SEL_KPL, 0);
 	setidt(IDT_UD, &IDTVEC(ill),  SDT_SYSIGT, SEL_KPL, 0);
 	setidt(IDT_NM, &IDTVEC(dna),  SDT_SYSIGT, SEL_KPL, 0);
 	setidt(IDT_DF, &IDTVEC(dblfault), SDT_SYSIGT, SEL_KPL, 1);
 	setidt(IDT_FPUGP, &IDTVEC(fpusegm),  SDT_SYSIGT, SEL_KPL, 0);
 	setidt(IDT_TS, &IDTVEC(tss),  SDT_SYSIGT, SEL_KPL, 0);
 	setidt(IDT_NP, &IDTVEC(missing),  SDT_SYSIGT, SEL_KPL, 0);
 	setidt(IDT_SS, &IDTVEC(stk),  SDT_SYSIGT, SEL_KPL, 0);
 	setidt(IDT_GP, &IDTVEC(prot),  SDT_SYSIGT, SEL_KPL, 0);
 	setidt(IDT_PF, &IDTVEC(page),  SDT_SYSIGT, SEL_KPL, 0);
 	setidt(IDT_MF, &IDTVEC(fpu),  SDT_SYSIGT, SEL_KPL, 0);
 	setidt(IDT_AC, &IDTVEC(align), SDT_SYSIGT, SEL_KPL, 0);
 	setidt(IDT_MC, &IDTVEC(mchk),  SDT_SYSIGT, SEL_KPL, 0);
 	setidt(IDT_XF, &IDTVEC(xmm), SDT_SYSIGT, SEL_KPL, 0);
 #ifdef KDTRACE_HOOKS
 	setidt(IDT_DTRACE_RET, &IDTVEC(dtrace_ret), SDT_SYSIGT, SEL_UPL, 0);
 #endif
+#ifdef XENHVM
+	setidt(IDT_EVTCHN, &IDTVEC(xen_intr_upcall), SDT_SYSIGT, SEL_UPL, 0);
+#endif
 
 	r_idt.rd_limit = sizeof(idt0) - 1;
 	r_idt.rd_base = (long) idt;
 	lidt(&r_idt);
 
 	/*
 	 * Initialize the i8254 before the console so that console
 	 * initialization can use DELAY().
 	 */
 	i8254_init();
 
 	/*
 	 * Initialize the console before we print anything out.
 	 */
 	cninit();
 
 #ifdef DEV_ISA
 #ifdef DEV_ATPIC
 	elcr_probe();
 	atpic_startup();
 #else
 	/* Reset and mask the atpics and leave them shut down. */
 	atpic_reset();
 
 	/*
 	 * Point the ICU spurious interrupt vectors at the APIC spurious
 	 * interrupt handler.
 	 */
 	setidt(IDT_IO_INTS + 7, IDTVEC(spuriousint), SDT_SYSIGT, SEL_KPL, 0);
 	setidt(IDT_IO_INTS + 15, IDTVEC(spuriousint), SDT_SYSIGT, SEL_KPL, 0);
 #endif
 #else
 #error "have you forgotten the isa device?";
 #endif
 
 	kdb_init();
 
 #ifdef KDB
 	if (boothowto & RB_KDB)
 		kdb_enter(KDB_WHY_BOOTFLAGS,
 		    "Boot flags requested debugger");
 #endif
 
 	identify_cpu();		/* Final stage of CPU initialization */
 	initializecpu();	/* Initialize CPU registers */
 	initializecpucache();
 
 	/* doublefault stack space, runs on ist1 */
 	common_tss[0].tss_ist1 = (long)&dblfault_stack[sizeof(dblfault_stack)];
 
 	/*
 	 * NMI stack, runs on ist2.  The pcpu pointer is stored just
 	 * above the start of the ist2 stack.
 	 */
 	np = ((struct nmi_pcpu *) &nmi0_stack[sizeof(nmi0_stack)]) - 1;
 	np->np_pcpu = (register_t) pc;
 	common_tss[0].tss_ist2 = (long) np;
 
 	/* Set the IO permission bitmap (empty due to tss seg limit) */
 	common_tss[0].tss_iobase = sizeof(struct amd64tss) +
 	    IOPAGES * PAGE_SIZE;
 
 	gsel_tss = GSEL(GPROC0_SEL, SEL_KPL);
 	ltr(gsel_tss);
 
 	/* Set up the fast syscall stuff */
 	msr = rdmsr(MSR_EFER) | EFER_SCE;
 	wrmsr(MSR_EFER, msr);
 	wrmsr(MSR_LSTAR, (u_int64_t)IDTVEC(fast_syscall));
 	wrmsr(MSR_CSTAR, (u_int64_t)IDTVEC(fast_syscall32));
 	msr = ((u_int64_t)GSEL(GCODE_SEL, SEL_KPL) << 32) |
 	      ((u_int64_t)GSEL(GUCODE32_SEL, SEL_UPL) << 48);
 	wrmsr(MSR_STAR, msr);
 	wrmsr(MSR_SF_MASK, PSL_NT|PSL_T|PSL_I|PSL_C|PSL_D);
 
 	getmemsize(kmdp, physfree);
 	init_param2(physmem);
 
 	/* now running on new page tables, configured,and u/iom is accessible */
 
 	msgbufinit(msgbufp, msgbufsize);
 	fpuinit();
 
 	/*
 	 * Set up thread0 pcb after fpuinit calculated pcb + fpu save
 	 * area size.  Zero out the extended state header in fpu save
 	 * area.
 	 */
 	thread0.td_pcb = get_pcb_td(&thread0);
 	bzero(get_pcb_user_save_td(&thread0), cpu_max_ext_state_size);
 	if (use_xsave) {
 		xhdr = (struct xstate_hdr *)(get_pcb_user_save_td(&thread0) +
 		    1);
 		xhdr->xstate_bv = xsave_mask;
 	}
 	/* make an initial tss so cpu can get interrupt stack on syscall! */
 	common_tss[0].tss_rsp0 = (vm_offset_t)thread0.td_pcb;
 	/* Ensure the stack is aligned to 16 bytes */
 	common_tss[0].tss_rsp0 &= ~0xFul;
 	PCPU_SET(rsp0, common_tss[0].tss_rsp0);
 	PCPU_SET(curpcb, thread0.td_pcb);
 
 	/* transfer to user mode */
 
 	_ucodesel = GSEL(GUCODE_SEL, SEL_UPL);
 	_udatasel = GSEL(GUDATA_SEL, SEL_UPL);
 	_ucode32sel = GSEL(GUCODE32_SEL, SEL_UPL);
 	_ufssel = GSEL(GUFS32_SEL, SEL_UPL);
 	_ugssel = GSEL(GUGS32_SEL, SEL_UPL);
 
 	load_ds(_udatasel);
 	load_es(_udatasel);
 	load_fs(_ufssel);
 
 	/* setup proc 0's pcb */
 	thread0.td_pcb->pcb_flags = 0;
 	thread0.td_pcb->pcb_cr3 = KPML4phys;
 	thread0.td_frame = &proc0_tf;
 
         env = getenv("kernelname");
 	if (env != NULL)
 		strlcpy(kernelname, env, sizeof(kernelname));
 
-#ifdef XENHVM
-	if (inw(0x10) == 0x49d2) {
-		if (bootverbose)
-			printf("Xen detected: disabling emulated block and network devices\n");
-		outw(0x10, 3);
-	}
-#endif
-
 	cpu_probe_amdc1e();
 
 #ifdef FDT
 	x86_init_fdt();
 #endif
 
 	/* Location of kernel stack for locore */
 	return ((u_int64_t)thread0.td_pcb);
 }
 
 void
 cpu_pcpu_init(struct pcpu *pcpu, int cpuid, size_t size)
 {
 
 	pcpu->pc_acpi_id = 0xffffffff;
 }
 
 void
 spinlock_enter(void)
 {
 	struct thread *td;
 	register_t flags;
 
 	td = curthread;
 	if (td->td_md.md_spinlock_count == 0) {
 		flags = intr_disable();
 		td->td_md.md_spinlock_count = 1;
 		td->td_md.md_saved_flags = flags;
 	} else
 		td->td_md.md_spinlock_count++;
 	critical_enter();
 }
 
 void
 spinlock_exit(void)
 {
 	struct thread *td;
 	register_t flags;
 
 	td = curthread;
 	critical_exit();
 	flags = td->td_md.md_saved_flags;
 	td->td_md.md_spinlock_count--;
 	if (td->td_md.md_spinlock_count == 0)
 		intr_restore(flags);
 }
 
 /*
  * Construct a PCB from a trapframe. This is called from kdb_trap() where
  * we want to start a backtrace from the function that caused us to enter
  * the debugger. We have the context in the trapframe, but base the trace
  * on the PCB. The PCB doesn't have to be perfect, as long as it contains
  * enough for a backtrace.
  */
 void
 makectx(struct trapframe *tf, struct pcb *pcb)
 {
 
 	pcb->pcb_r12 = tf->tf_r12;
 	pcb->pcb_r13 = tf->tf_r13;
 	pcb->pcb_r14 = tf->tf_r14;
 	pcb->pcb_r15 = tf->tf_r15;
 	pcb->pcb_rbp = tf->tf_rbp;
 	pcb->pcb_rbx = tf->tf_rbx;
 	pcb->pcb_rip = tf->tf_rip;
 	pcb->pcb_rsp = tf->tf_rsp;
 }
 
 int
 ptrace_set_pc(struct thread *td, unsigned long addr)
 {
 	td->td_frame->tf_rip = addr;
 	return (0);
 }
 
 int
 ptrace_single_step(struct thread *td)
 {
 	td->td_frame->tf_rflags |= PSL_T;
 	return (0);
 }
 
 int
 ptrace_clear_single_step(struct thread *td)
 {
 	td->td_frame->tf_rflags &= ~PSL_T;
 	return (0);
 }
 
 int
 fill_regs(struct thread *td, struct reg *regs)
 {
 	struct trapframe *tp;
 
 	tp = td->td_frame;
 	return (fill_frame_regs(tp, regs));
 }
 
 int
 fill_frame_regs(struct trapframe *tp, struct reg *regs)
 {
 	regs->r_r15 = tp->tf_r15;
 	regs->r_r14 = tp->tf_r14;
 	regs->r_r13 = tp->tf_r13;
 	regs->r_r12 = tp->tf_r12;
 	regs->r_r11 = tp->tf_r11;
 	regs->r_r10 = tp->tf_r10;
 	regs->r_r9  = tp->tf_r9;
 	regs->r_r8  = tp->tf_r8;
 	regs->r_rdi = tp->tf_rdi;
 	regs->r_rsi = tp->tf_rsi;
 	regs->r_rbp = tp->tf_rbp;
 	regs->r_rbx = tp->tf_rbx;
 	regs->r_rdx = tp->tf_rdx;
 	regs->r_rcx = tp->tf_rcx;
 	regs->r_rax = tp->tf_rax;
 	regs->r_rip = tp->tf_rip;
 	regs->r_cs = tp->tf_cs;
 	regs->r_rflags = tp->tf_rflags;
 	regs->r_rsp = tp->tf_rsp;
 	regs->r_ss = tp->tf_ss;
 	if (tp->tf_flags & TF_HASSEGS) {
 		regs->r_ds = tp->tf_ds;
 		regs->r_es = tp->tf_es;
 		regs->r_fs = tp->tf_fs;
 		regs->r_gs = tp->tf_gs;
 	} else {
 		regs->r_ds = 0;
 		regs->r_es = 0;
 		regs->r_fs = 0;
 		regs->r_gs = 0;
 	}
 	return (0);
 }
 
 int
 set_regs(struct thread *td, struct reg *regs)
 {
 	struct trapframe *tp;
 	register_t rflags;
 
 	tp = td->td_frame;
 	rflags = regs->r_rflags & 0xffffffff;
 	if (!EFL_SECURE(rflags, tp->tf_rflags) || !CS_SECURE(regs->r_cs))
 		return (EINVAL);
 	tp->tf_r15 = regs->r_r15;
 	tp->tf_r14 = regs->r_r14;
 	tp->tf_r13 = regs->r_r13;
 	tp->tf_r12 = regs->r_r12;
 	tp->tf_r11 = regs->r_r11;
 	tp->tf_r10 = regs->r_r10;
 	tp->tf_r9  = regs->r_r9;
 	tp->tf_r8  = regs->r_r8;
 	tp->tf_rdi = regs->r_rdi;
 	tp->tf_rsi = regs->r_rsi;
 	tp->tf_rbp = regs->r_rbp;
 	tp->tf_rbx = regs->r_rbx;
 	tp->tf_rdx = regs->r_rdx;
 	tp->tf_rcx = regs->r_rcx;
 	tp->tf_rax = regs->r_rax;
 	tp->tf_rip = regs->r_rip;
 	tp->tf_cs = regs->r_cs;
 	tp->tf_rflags = rflags;
 	tp->tf_rsp = regs->r_rsp;
 	tp->tf_ss = regs->r_ss;
 	if (0) {	/* XXXKIB */
 		tp->tf_ds = regs->r_ds;
 		tp->tf_es = regs->r_es;
 		tp->tf_fs = regs->r_fs;
 		tp->tf_gs = regs->r_gs;
 		tp->tf_flags = TF_HASSEGS;
 		set_pcb_flags(td->td_pcb, PCB_FULL_IRET);
 	}
 	return (0);
 }
 
 /* XXX check all this stuff! */
 /* externalize from sv_xmm */
 static void
 fill_fpregs_xmm(struct savefpu *sv_xmm, struct fpreg *fpregs)
 {
 	struct envxmm *penv_fpreg = (struct envxmm *)&fpregs->fpr_env;
 	struct envxmm *penv_xmm = &sv_xmm->sv_env;
 	int i;
 
 	/* pcb -> fpregs */
 	bzero(fpregs, sizeof(*fpregs));
 
 	/* FPU control/status */
 	penv_fpreg->en_cw = penv_xmm->en_cw;
 	penv_fpreg->en_sw = penv_xmm->en_sw;
 	penv_fpreg->en_tw = penv_xmm->en_tw;
 	penv_fpreg->en_opcode = penv_xmm->en_opcode;
 	penv_fpreg->en_rip = penv_xmm->en_rip;
 	penv_fpreg->en_rdp = penv_xmm->en_rdp;
 	penv_fpreg->en_mxcsr = penv_xmm->en_mxcsr;
 	penv_fpreg->en_mxcsr_mask = penv_xmm->en_mxcsr_mask;
 
 	/* FPU registers */
 	for (i = 0; i < 8; ++i)
 		bcopy(sv_xmm->sv_fp[i].fp_acc.fp_bytes, fpregs->fpr_acc[i], 10);
 
 	/* SSE registers */
 	for (i = 0; i < 16; ++i)
 		bcopy(sv_xmm->sv_xmm[i].xmm_bytes, fpregs->fpr_xacc[i], 16);
 }
 
 /* internalize from fpregs into sv_xmm */
 static void
 set_fpregs_xmm(struct fpreg *fpregs, struct savefpu *sv_xmm)
 {
 	struct envxmm *penv_xmm = &sv_xmm->sv_env;
 	struct envxmm *penv_fpreg = (struct envxmm *)&fpregs->fpr_env;
 	int i;
 
 	/* fpregs -> pcb */
 	/* FPU control/status */
 	penv_xmm->en_cw = penv_fpreg->en_cw;
 	penv_xmm->en_sw = penv_fpreg->en_sw;
 	penv_xmm->en_tw = penv_fpreg->en_tw;
 	penv_xmm->en_opcode = penv_fpreg->en_opcode;
 	penv_xmm->en_rip = penv_fpreg->en_rip;
 	penv_xmm->en_rdp = penv_fpreg->en_rdp;
 	penv_xmm->en_mxcsr = penv_fpreg->en_mxcsr;
 	penv_xmm->en_mxcsr_mask = penv_fpreg->en_mxcsr_mask & cpu_mxcsr_mask;
 
 	/* FPU registers */
 	for (i = 0; i < 8; ++i)
 		bcopy(fpregs->fpr_acc[i], sv_xmm->sv_fp[i].fp_acc.fp_bytes, 10);
 
 	/* SSE registers */
 	for (i = 0; i < 16; ++i)
 		bcopy(fpregs->fpr_xacc[i], sv_xmm->sv_xmm[i].xmm_bytes, 16);
 }
 
 /* externalize from td->pcb */
 int
 fill_fpregs(struct thread *td, struct fpreg *fpregs)
 {
 
 	KASSERT(td == curthread || TD_IS_SUSPENDED(td) ||
 	    P_SHOULDSTOP(td->td_proc),
 	    ("not suspended thread %p", td));
 	fpugetregs(td);
 	fill_fpregs_xmm(get_pcb_user_save_td(td), fpregs);
 	return (0);
 }
 
 /* internalize to td->pcb */
 int
 set_fpregs(struct thread *td, struct fpreg *fpregs)
 {
 
 	set_fpregs_xmm(fpregs, get_pcb_user_save_td(td));
 	fpuuserinited(td);
 	return (0);
 }
 
 /*
  * Get machine context.
  */
 int
 get_mcontext(struct thread *td, mcontext_t *mcp, int flags)
 {
 	struct pcb *pcb;
 	struct trapframe *tp;
 
 	pcb = td->td_pcb;
 	tp = td->td_frame;
 	PROC_LOCK(curthread->td_proc);
 	mcp->mc_onstack = sigonstack(tp->tf_rsp);
 	PROC_UNLOCK(curthread->td_proc);
 	mcp->mc_r15 = tp->tf_r15;
 	mcp->mc_r14 = tp->tf_r14;
 	mcp->mc_r13 = tp->tf_r13;
 	mcp->mc_r12 = tp->tf_r12;
 	mcp->mc_r11 = tp->tf_r11;
 	mcp->mc_r10 = tp->tf_r10;
 	mcp->mc_r9  = tp->tf_r9;
 	mcp->mc_r8  = tp->tf_r8;
 	mcp->mc_rdi = tp->tf_rdi;
 	mcp->mc_rsi = tp->tf_rsi;
 	mcp->mc_rbp = tp->tf_rbp;
 	mcp->mc_rbx = tp->tf_rbx;
 	mcp->mc_rcx = tp->tf_rcx;
 	mcp->mc_rflags = tp->tf_rflags;
 	if (flags & GET_MC_CLEAR_RET) {
 		mcp->mc_rax = 0;
 		mcp->mc_rdx = 0;
 		mcp->mc_rflags &= ~PSL_C;
 	} else {
 		mcp->mc_rax = tp->tf_rax;
 		mcp->mc_rdx = tp->tf_rdx;
 	}
 	mcp->mc_rip = tp->tf_rip;
 	mcp->mc_cs = tp->tf_cs;
 	mcp->mc_rsp = tp->tf_rsp;
 	mcp->mc_ss = tp->tf_ss;
 	mcp->mc_ds = tp->tf_ds;
 	mcp->mc_es = tp->tf_es;
 	mcp->mc_fs = tp->tf_fs;
 	mcp->mc_gs = tp->tf_gs;
 	mcp->mc_flags = tp->tf_flags;
 	mcp->mc_len = sizeof(*mcp);
 	get_fpcontext(td, mcp, NULL, 0);
 	mcp->mc_fsbase = pcb->pcb_fsbase;
 	mcp->mc_gsbase = pcb->pcb_gsbase;
 	mcp->mc_xfpustate = 0;
 	mcp->mc_xfpustate_len = 0;
 	bzero(mcp->mc_spare, sizeof(mcp->mc_spare));
 	return (0);
 }
 
 /*
  * Set machine context.
  *
  * However, we don't set any but the user modifiable flags, and we won't
  * touch the cs selector.
  */
 int
 set_mcontext(struct thread *td, const mcontext_t *mcp)
 {
 	struct pcb *pcb;
 	struct trapframe *tp;
 	char *xfpustate;
 	long rflags;
 	int ret;
 
 	pcb = td->td_pcb;
 	tp = td->td_frame;
 	if (mcp->mc_len != sizeof(*mcp) ||
 	    (mcp->mc_flags & ~_MC_FLAG_MASK) != 0)
 		return (EINVAL);
 	rflags = (mcp->mc_rflags & PSL_USERCHANGE) |
 	    (tp->tf_rflags & ~PSL_USERCHANGE);
 	if (mcp->mc_flags & _MC_HASFPXSTATE) {
 		if (mcp->mc_xfpustate_len > cpu_max_ext_state_size -
 		    sizeof(struct savefpu))
 			return (EINVAL);
 		xfpustate = __builtin_alloca(mcp->mc_xfpustate_len);
 		ret = copyin((void *)mcp->mc_xfpustate, xfpustate,
 		    mcp->mc_xfpustate_len);
 		if (ret != 0)
 			return (ret);
 	} else
 		xfpustate = NULL;
 	ret = set_fpcontext(td, mcp, xfpustate, mcp->mc_xfpustate_len);
 	if (ret != 0)
 		return (ret);
 	tp->tf_r15 = mcp->mc_r15;
 	tp->tf_r14 = mcp->mc_r14;
 	tp->tf_r13 = mcp->mc_r13;
 	tp->tf_r12 = mcp->mc_r12;
 	tp->tf_r11 = mcp->mc_r11;
 	tp->tf_r10 = mcp->mc_r10;
 	tp->tf_r9  = mcp->mc_r9;
 	tp->tf_r8  = mcp->mc_r8;
 	tp->tf_rdi = mcp->mc_rdi;
 	tp->tf_rsi = mcp->mc_rsi;
 	tp->tf_rbp = mcp->mc_rbp;
 	tp->tf_rbx = mcp->mc_rbx;
 	tp->tf_rdx = mcp->mc_rdx;
 	tp->tf_rcx = mcp->mc_rcx;
 	tp->tf_rax = mcp->mc_rax;
 	tp->tf_rip = mcp->mc_rip;
 	tp->tf_rflags = rflags;
 	tp->tf_rsp = mcp->mc_rsp;
 	tp->tf_ss = mcp->mc_ss;
 	tp->tf_flags = mcp->mc_flags;
 	if (tp->tf_flags & TF_HASSEGS) {
 		tp->tf_ds = mcp->mc_ds;
 		tp->tf_es = mcp->mc_es;
 		tp->tf_fs = mcp->mc_fs;
 		tp->tf_gs = mcp->mc_gs;
 	}
 	if (mcp->mc_flags & _MC_HASBASES) {
 		pcb->pcb_fsbase = mcp->mc_fsbase;
 		pcb->pcb_gsbase = mcp->mc_gsbase;
 	}
 	set_pcb_flags(pcb, PCB_FULL_IRET);
 	return (0);
 }
 
 static void
 get_fpcontext(struct thread *td, mcontext_t *mcp, char *xfpusave,
     size_t xfpusave_len)
 {
 	size_t max_len, len;
 
 	mcp->mc_ownedfp = fpugetregs(td);
 	bcopy(get_pcb_user_save_td(td), &mcp->mc_fpstate[0],
 	    sizeof(mcp->mc_fpstate));
 	mcp->mc_fpformat = fpuformat();
 	if (!use_xsave || xfpusave_len == 0)
 		return;
 	max_len = cpu_max_ext_state_size - sizeof(struct savefpu);
 	len = xfpusave_len;
 	if (len > max_len) {
 		len = max_len;
 		bzero(xfpusave + max_len, len - max_len);
 	}
 	mcp->mc_flags |= _MC_HASFPXSTATE;
 	mcp->mc_xfpustate_len = len;
 	bcopy(get_pcb_user_save_td(td) + 1, xfpusave, len);
 }
 
 static int
 set_fpcontext(struct thread *td, const mcontext_t *mcp, char *xfpustate,
     size_t xfpustate_len)
 {
 	struct savefpu *fpstate;
 	int error;
 
 	if (mcp->mc_fpformat == _MC_FPFMT_NODEV)
 		return (0);
 	else if (mcp->mc_fpformat != _MC_FPFMT_XMM)
 		return (EINVAL);
 	else if (mcp->mc_ownedfp == _MC_FPOWNED_NONE) {
 		/* We don't care what state is left in the FPU or PCB. */
 		fpstate_drop(td);
 		error = 0;
 	} else if (mcp->mc_ownedfp == _MC_FPOWNED_FPU ||
 	    mcp->mc_ownedfp == _MC_FPOWNED_PCB) {
 		fpstate = (struct savefpu *)&mcp->mc_fpstate;
 		fpstate->sv_env.en_mxcsr &= cpu_mxcsr_mask;
 		error = fpusetregs(td, fpstate, xfpustate, xfpustate_len);
 	} else
 		return (EINVAL);
 	return (error);
 }
 
 void
 fpstate_drop(struct thread *td)
 {
 
 	KASSERT(PCB_USER_FPU(td->td_pcb), ("fpstate_drop: kernel-owned fpu"));
 	critical_enter();
 	if (PCPU_GET(fpcurthread) == td)
 		fpudrop();
 	/*
 	 * XXX force a full drop of the fpu.  The above only drops it if we
 	 * owned it.
 	 *
 	 * XXX I don't much like fpugetuserregs()'s semantics of doing a full
 	 * drop.  Dropping only to the pcb matches fnsave's behaviour.
 	 * We only need to drop to !PCB_INITDONE in sendsig().  But
 	 * sendsig() is the only caller of fpugetuserregs()... perhaps we just
 	 * have too many layers.
 	 */
 	clear_pcb_flags(curthread->td_pcb,
 	    PCB_FPUINITDONE | PCB_USERFPUINITDONE);
 	critical_exit();
 }
 
 int
 fill_dbregs(struct thread *td, struct dbreg *dbregs)
 {
 	struct pcb *pcb;
 
 	if (td == NULL) {
 		dbregs->dr[0] = rdr0();
 		dbregs->dr[1] = rdr1();
 		dbregs->dr[2] = rdr2();
 		dbregs->dr[3] = rdr3();
 		dbregs->dr[6] = rdr6();
 		dbregs->dr[7] = rdr7();
 	} else {
 		pcb = td->td_pcb;
 		dbregs->dr[0] = pcb->pcb_dr0;
 		dbregs->dr[1] = pcb->pcb_dr1;
 		dbregs->dr[2] = pcb->pcb_dr2;
 		dbregs->dr[3] = pcb->pcb_dr3;
 		dbregs->dr[6] = pcb->pcb_dr6;
 		dbregs->dr[7] = pcb->pcb_dr7;
 	}
 	dbregs->dr[4] = 0;
 	dbregs->dr[5] = 0;
 	dbregs->dr[8] = 0;
 	dbregs->dr[9] = 0;
 	dbregs->dr[10] = 0;
 	dbregs->dr[11] = 0;
 	dbregs->dr[12] = 0;
 	dbregs->dr[13] = 0;
 	dbregs->dr[14] = 0;
 	dbregs->dr[15] = 0;
 	return (0);
 }
 
 int
 set_dbregs(struct thread *td, struct dbreg *dbregs)
 {
 	struct pcb *pcb;
 	int i;
 
 	if (td == NULL) {
 		load_dr0(dbregs->dr[0]);
 		load_dr1(dbregs->dr[1]);
 		load_dr2(dbregs->dr[2]);
 		load_dr3(dbregs->dr[3]);
 		load_dr6(dbregs->dr[6]);
 		load_dr7(dbregs->dr[7]);
 	} else {
 		/*
 		 * Don't let an illegal value for dr7 get set.  Specifically,
 		 * check for undefined settings.  Setting these bit patterns
 		 * result in undefined behaviour and can lead to an unexpected
 		 * TRCTRAP or a general protection fault right here.
 		 * Upper bits of dr6 and dr7 must not be set
 		 */
 		for (i = 0; i < 4; i++) {
 			if (DBREG_DR7_ACCESS(dbregs->dr[7], i) == 0x02)
 				return (EINVAL);
 			if (td->td_frame->tf_cs == _ucode32sel &&
 			    DBREG_DR7_LEN(dbregs->dr[7], i) == DBREG_DR7_LEN_8)
 				return (EINVAL);
 		}
 		if ((dbregs->dr[6] & 0xffffffff00000000ul) != 0 ||
 		    (dbregs->dr[7] & 0xffffffff00000000ul) != 0)
 			return (EINVAL);
 
 		pcb = td->td_pcb;
 
 		/*
 		 * Don't let a process set a breakpoint that is not within the
 		 * process's address space.  If a process could do this, it
 		 * could halt the system by setting a breakpoint in the kernel
 		 * (if ddb was enabled).  Thus, we need to check to make sure
 		 * that no breakpoints are being enabled for addresses outside
 		 * process's address space.
 		 *
 		 * XXX - what about when the watched area of the user's
 		 * address space is written into from within the kernel
 		 * ... wouldn't that still cause a breakpoint to be generated
 		 * from within kernel mode?
 		 */
 
 		if (DBREG_DR7_ENABLED(dbregs->dr[7], 0)) {
 			/* dr0 is enabled */
 			if (dbregs->dr[0] >= VM_MAXUSER_ADDRESS)
 				return (EINVAL);
 		}
 		if (DBREG_DR7_ENABLED(dbregs->dr[7], 1)) {
 			/* dr1 is enabled */
 			if (dbregs->dr[1] >= VM_MAXUSER_ADDRESS)
 				return (EINVAL);
 		}
 		if (DBREG_DR7_ENABLED(dbregs->dr[7], 2)) {
 			/* dr2 is enabled */
 			if (dbregs->dr[2] >= VM_MAXUSER_ADDRESS)
 				return (EINVAL);
 		}
 		if (DBREG_DR7_ENABLED(dbregs->dr[7], 3)) {
 			/* dr3 is enabled */
 			if (dbregs->dr[3] >= VM_MAXUSER_ADDRESS)
 				return (EINVAL);
 		}
 
 		pcb->pcb_dr0 = dbregs->dr[0];
 		pcb->pcb_dr1 = dbregs->dr[1];
 		pcb->pcb_dr2 = dbregs->dr[2];
 		pcb->pcb_dr3 = dbregs->dr[3];
 		pcb->pcb_dr6 = dbregs->dr[6];
 		pcb->pcb_dr7 = dbregs->dr[7];
 
 		set_pcb_flags(pcb, PCB_DBREGS);
 	}
 
 	return (0);
 }
 
 void
 reset_dbregs(void)
 {
 
 	load_dr7(0);	/* Turn off the control bits first */
 	load_dr0(0);
 	load_dr1(0);
 	load_dr2(0);
 	load_dr3(0);
 	load_dr6(0);
 }
 
 /*
  * Return > 0 if a hardware breakpoint has been hit, and the
  * breakpoint was in user space.  Return 0, otherwise.
  */
 int
 user_dbreg_trap(void)
 {
         u_int64_t dr7, dr6; /* debug registers dr6 and dr7 */
         u_int64_t bp;       /* breakpoint bits extracted from dr6 */
         int nbp;            /* number of breakpoints that triggered */
         caddr_t addr[4];    /* breakpoint addresses */
         int i;
         
         dr7 = rdr7();
         if ((dr7 & 0x000000ff) == 0) {
                 /*
                  * all GE and LE bits in the dr7 register are zero,
                  * thus the trap couldn't have been caused by the
                  * hardware debug registers
                  */
                 return 0;
         }
 
         nbp = 0;
         dr6 = rdr6();
         bp = dr6 & 0x0000000f;
 
         if (!bp) {
                 /*
                  * None of the breakpoint bits are set meaning this
                  * trap was not caused by any of the debug registers
                  */
                 return 0;
         }
 
         /*
          * at least one of the breakpoints were hit, check to see
          * which ones and if any of them are user space addresses
          */
 
         if (bp & 0x01) {
                 addr[nbp++] = (caddr_t)rdr0();
         }
         if (bp & 0x02) {
                 addr[nbp++] = (caddr_t)rdr1();
         }
         if (bp & 0x04) {
                 addr[nbp++] = (caddr_t)rdr2();
         }
         if (bp & 0x08) {
                 addr[nbp++] = (caddr_t)rdr3();
         }
 
         for (i = 0; i < nbp; i++) {
                 if (addr[i] < (caddr_t)VM_MAXUSER_ADDRESS) {
                         /*
                          * addr[i] is in user space
                          */
                         return nbp;
                 }
         }
 
         /*
          * None of the breakpoints are in user space.
          */
         return 0;
 }
 
 #ifdef KDB
 
 /*
  * Provide inb() and outb() as functions.  They are normally only available as
  * inline functions, thus cannot be called from the debugger.
  */
 
 /* silence compiler warnings */
 u_char inb_(u_short);
 void outb_(u_short, u_char);
 
 u_char
 inb_(u_short port)
 {
 	return inb(port);
 }
 
 void
 outb_(u_short port, u_char data)
 {
 	outb(port, data);
 }
 
 #endif /* KDB */
diff --git a/sys/amd64/amd64/mp_machdep.c b/sys/amd64/amd64/mp_machdep.c
index 79aeb9c32291..267b933cefd2 100644
--- a/sys/amd64/amd64/mp_machdep.c
+++ b/sys/amd64/amd64/mp_machdep.c
@@ -1,1497 +1,1506 @@
 /*-
  * Copyright (c) 1996, by Steve Passe
  * Copyright (c) 2003, by Peter Wemm
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. The name of the developer may NOT be used to endorse or promote products
  *    derived from this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_cpu.h"
 #include "opt_ddb.h"
 #include "opt_kstack_pages.h"
 #include "opt_sched.h"
 #include "opt_smp.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/bus.h>
 #include <sys/cpuset.h>
 #ifdef GPROF 
 #include <sys/gmon.h>
 #endif
 #include <sys/kernel.h>
 #include <sys/ktr.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/memrange.h>
 #include <sys/mutex.h>
 #include <sys/pcpu.h>
 #include <sys/proc.h>
 #include <sys/sched.h>
 #include <sys/smp.h>
 #include <sys/sysctl.h>
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/pmap.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_extern.h>
 
 #include <x86/apicreg.h>
 #include <machine/clock.h>
 #include <machine/cputypes.h>
 #include <machine/cpufunc.h>
 #include <x86/mca.h>
 #include <machine/md_var.h>
 #include <machine/pcb.h>
 #include <machine/psl.h>
 #include <machine/smp.h>
 #include <machine/specialreg.h>
 #include <machine/tss.h>
 
+#ifdef XENHVM
+#include <xen/hvm.h>
+#endif
+
 #define WARMBOOT_TARGET		0
 #define WARMBOOT_OFF		(KERNBASE + 0x0467)
 #define WARMBOOT_SEG		(KERNBASE + 0x0469)
 
 #define CMOS_REG		(0x70)
 #define CMOS_DATA		(0x71)
 #define BIOS_RESET		(0x0f)
 #define BIOS_WARM		(0x0a)
 
 /* lock region used by kernel profiling */
 int	mcount_lock;
 
 int	mp_naps;		/* # of Applications processors */
 int	boot_cpu_id = -1;	/* designated BSP */
 
 extern  struct pcpu __pcpu[];
 
 /* AP uses this during bootstrap.  Do not staticize.  */
 char *bootSTK;
 static int bootAP;
 
 /* Free these after use */
 void *bootstacks[MAXCPU];
 
 /* Temporary variables for init_secondary()  */
 char *doublefault_stack;
 char *nmi_stack;
 void *dpcpu;
 
 struct pcb stoppcbs[MAXCPU];
 struct pcb **susppcbs;
 
 /* Variables needed for SMP tlb shootdown. */
 vm_offset_t smp_tlb_addr1;
 vm_offset_t smp_tlb_addr2;
 volatile int smp_tlb_wait;
 
 #ifdef COUNT_IPIS
 /* Interrupt counts. */
 static u_long *ipi_preempt_counts[MAXCPU];
 static u_long *ipi_ast_counts[MAXCPU];
 u_long *ipi_invltlb_counts[MAXCPU];
 u_long *ipi_invlrng_counts[MAXCPU];
 u_long *ipi_invlpg_counts[MAXCPU];
 u_long *ipi_invlcache_counts[MAXCPU];
 u_long *ipi_rendezvous_counts[MAXCPU];
 static u_long *ipi_hardclock_counts[MAXCPU];
 #endif
 
 extern inthand_t IDTVEC(fast_syscall), IDTVEC(fast_syscall32);
 
 /*
  * Local data and functions.
  */
 
 static volatile cpuset_t ipi_nmi_pending;
 
 /* used to hold the AP's until we are ready to release them */
 static struct mtx ap_boot_mtx;
 
 /* Set to 1 once we're ready to let the APs out of the pen. */
 static volatile int aps_ready = 0;
 
 /*
  * Store data from cpu_add() until later in the boot when we actually setup
  * the APs.
  */
 struct cpu_info {
 	int	cpu_present:1;
 	int	cpu_bsp:1;
 	int	cpu_disabled:1;
 	int	cpu_hyperthread:1;
 } static cpu_info[MAX_APIC_ID + 1];
 int cpu_apic_ids[MAXCPU];
 int apic_cpuids[MAX_APIC_ID + 1];
 
 /* Holds pending bitmap based IPIs per CPU */
 static volatile u_int cpu_ipi_pending[MAXCPU];
 
 static u_int boot_address;
 static int cpu_logical;			/* logical cpus per core */
 static int cpu_cores;			/* cores per package */
 
 static void	assign_cpu_ids(void);
 static void	set_interrupt_apic_ids(void);
 static int	start_all_aps(void);
 static int	start_ap(int apic_id);
 static void	release_aps(void *dummy);
 
 static u_int	hyperthreading_cpus;	/* logical cpus sharing L1 cache */
 static int	hyperthreading_allowed = 1;
 static u_int	bootMP_size;
 
 static void
 mem_range_AP_init(void)
 {
 	if (mem_range_softc.mr_op && mem_range_softc.mr_op->initAP)
 		mem_range_softc.mr_op->initAP(&mem_range_softc);
 }
 
 static void
 topo_probe_amd(void)
 {
 	int core_id_bits;
 	int id;
 
 	/* AMD processors do not support HTT. */
 	cpu_logical = 1;
 
 	if ((amd_feature2 & AMDID2_CMP) == 0) {
 		cpu_cores = 1;
 		return;
 	}
 
 	core_id_bits = (cpu_procinfo2 & AMDID_COREID_SIZE) >>
 	    AMDID_COREID_SIZE_SHIFT;
 	if (core_id_bits == 0) {
 		cpu_cores = (cpu_procinfo2 & AMDID_CMP_CORES) + 1;
 		return;
 	}
 
 	/* Fam 10h and newer should get here. */
 	for (id = 0; id <= MAX_APIC_ID; id++) {
 		/* Check logical CPU availability. */
 		if (!cpu_info[id].cpu_present || cpu_info[id].cpu_disabled)
 			continue;
 		/* Check if logical CPU has the same package ID. */
 		if ((id >> core_id_bits) != (boot_cpu_id >> core_id_bits))
 			continue;
 		cpu_cores++;
 	}
 }
 
 /*
  * Round up to the next power of two, if necessary, and then
  * take log2.
  * Returns -1 if argument is zero.
  */
 static __inline int
 mask_width(u_int x)
 {
 
 	return (fls(x << (1 - powerof2(x))) - 1);
 }
 
 static void
 topo_probe_0x4(void)
 {
 	u_int p[4];
 	int pkg_id_bits;
 	int core_id_bits;
 	int max_cores;
 	int max_logical;
 	int id;
 
 	/* Both zero and one here mean one logical processor per package. */
 	max_logical = (cpu_feature & CPUID_HTT) != 0 ?
 	    (cpu_procinfo & CPUID_HTT_CORES) >> 16 : 1;
 	if (max_logical <= 1)
 		return;
 
 	/*
 	 * Because of uniformity assumption we examine only
 	 * those logical processors that belong to the same
 	 * package as BSP.  Further, we count number of
 	 * logical processors that belong to the same core
 	 * as BSP thus deducing number of threads per core.
 	 */
 	if (cpu_high >= 0x4) {
 		cpuid_count(0x04, 0, p);
 		max_cores = ((p[0] >> 26) & 0x3f) + 1;
 	} else
 		max_cores = 1;
 	core_id_bits = mask_width(max_logical/max_cores);
 	if (core_id_bits < 0)
 		return;
 	pkg_id_bits = core_id_bits + mask_width(max_cores);
 
 	for (id = 0; id <= MAX_APIC_ID; id++) {
 		/* Check logical CPU availability. */
 		if (!cpu_info[id].cpu_present || cpu_info[id].cpu_disabled)
 			continue;
 		/* Check if logical CPU has the same package ID. */
 		if ((id >> pkg_id_bits) != (boot_cpu_id >> pkg_id_bits))
 			continue;
 		cpu_cores++;
 		/* Check if logical CPU has the same package and core IDs. */
 		if ((id >> core_id_bits) == (boot_cpu_id >> core_id_bits))
 			cpu_logical++;
 	}
 
 	KASSERT(cpu_cores >= 1 && cpu_logical >= 1,
 	    ("topo_probe_0x4 couldn't find BSP"));
 
 	cpu_cores /= cpu_logical;
 	hyperthreading_cpus = cpu_logical;
 }
 
 static void
 topo_probe_0xb(void)
 {
 	u_int p[4];
 	int bits;
 	int cnt;
 	int i;
 	int logical;
 	int type;
 	int x;
 
 	/* We only support three levels for now. */
 	for (i = 0; i < 3; i++) {
 		cpuid_count(0x0b, i, p);
 
 		/* Fall back if CPU leaf 11 doesn't really exist. */
 		if (i == 0 && p[1] == 0) {
 			topo_probe_0x4();
 			return;
 		}
 
 		bits = p[0] & 0x1f;
 		logical = p[1] &= 0xffff;
 		type = (p[2] >> 8) & 0xff;
 		if (type == 0 || logical == 0)
 			break;
 		/*
 		 * Because of uniformity assumption we examine only
 		 * those logical processors that belong to the same
 		 * package as BSP.
 		 */
 		for (cnt = 0, x = 0; x <= MAX_APIC_ID; x++) {
 			if (!cpu_info[x].cpu_present ||
 			    cpu_info[x].cpu_disabled)
 				continue;
 			if (x >> bits == boot_cpu_id >> bits)
 				cnt++;
 		}
 		if (type == CPUID_TYPE_SMT)
 			cpu_logical = cnt;
 		else if (type == CPUID_TYPE_CORE)
 			cpu_cores = cnt;
 	}
 	if (cpu_logical == 0)
 		cpu_logical = 1;
 	cpu_cores /= cpu_logical;
 }
 
 /*
  * Both topology discovery code and code that consumes topology
  * information assume top-down uniformity of the topology.
  * That is, all physical packages must be identical and each
  * core in a package must have the same number of threads.
  * Topology information is queried only on BSP, on which this
  * code runs and for which it can query CPUID information.
  * Then topology is extrapolated on all packages using the
  * uniformity assumption.
  */
 static void
 topo_probe(void)
 {
 	static int cpu_topo_probed = 0;
 
 	if (cpu_topo_probed)
 		return;
 
 	CPU_ZERO(&logical_cpus_mask);
 	if (mp_ncpus <= 1)
 		cpu_cores = cpu_logical = 1;
 	else if (cpu_vendor_id == CPU_VENDOR_AMD)
 		topo_probe_amd();
 	else if (cpu_vendor_id == CPU_VENDOR_INTEL) {
 		/*
 		 * See Intel(R) 64 Architecture Processor
 		 * Topology Enumeration article for details.
 		 *
 		 * Note that 0x1 <= cpu_high < 4 case should be
 		 * compatible with topo_probe_0x4() logic when
 		 * CPUID.1:EBX[23:16] > 0 (cpu_cores will be 1)
 		 * or it should trigger the fallback otherwise.
 		 */
 		if (cpu_high >= 0xb)
 			topo_probe_0xb();
 		else if (cpu_high >= 0x1)
 			topo_probe_0x4();
 	}
 
 	/*
 	 * Fallback: assume each logical CPU is in separate
 	 * physical package.  That is, no multi-core, no SMT.
 	 */
 	if (cpu_cores == 0 || cpu_logical == 0)
 		cpu_cores = cpu_logical = 1;
 	cpu_topo_probed = 1;
 }
 
 struct cpu_group *
 cpu_topo(void)
 {
 	int cg_flags;
 
 	/*
 	 * Determine whether any threading flags are
 	 * necessry.
 	 */
 	topo_probe();
 	if (cpu_logical > 1 && hyperthreading_cpus)
 		cg_flags = CG_FLAG_HTT;
 	else if (cpu_logical > 1)
 		cg_flags = CG_FLAG_SMT;
 	else
 		cg_flags = 0;
 	if (mp_ncpus % (cpu_cores * cpu_logical) != 0) {
 		printf("WARNING: Non-uniform processors.\n");
 		printf("WARNING: Using suboptimal topology.\n");
 		return (smp_topo_none());
 	}
 	/*
 	 * No multi-core or hyper-threaded.
 	 */
 	if (cpu_logical * cpu_cores == 1)
 		return (smp_topo_none());
 	/*
 	 * Only HTT no multi-core.
 	 */
 	if (cpu_logical > 1 && cpu_cores == 1)
 		return (smp_topo_1level(CG_SHARE_L1, cpu_logical, cg_flags));
 	/*
 	 * Only multi-core no HTT.
 	 */
 	if (cpu_cores > 1 && cpu_logical == 1)
 		return (smp_topo_1level(CG_SHARE_L2, cpu_cores, cg_flags));
 	/*
 	 * Both HTT and multi-core.
 	 */
 	return (smp_topo_2level(CG_SHARE_L2, cpu_cores,
 	    CG_SHARE_L1, cpu_logical, cg_flags));
 }
 
 /*
  * Calculate usable address in base memory for AP trampoline code.
  */
 u_int
 mp_bootaddress(u_int basemem)
 {
 
 	bootMP_size = mptramp_end - mptramp_start;
 	boot_address = trunc_page(basemem * 1024); /* round down to 4k boundary */
 	if (((basemem * 1024) - boot_address) < bootMP_size)
 		boot_address -= PAGE_SIZE;	/* not enough, lower by 4k */
 	/* 3 levels of page table pages */
 	mptramp_pagetables = boot_address - (PAGE_SIZE * 3);
 
 	return mptramp_pagetables;
 }
 
 void
 cpu_add(u_int apic_id, char boot_cpu)
 {
 
 	if (apic_id > MAX_APIC_ID) {
 		panic("SMP: APIC ID %d too high", apic_id);
 		return;
 	}
 	KASSERT(cpu_info[apic_id].cpu_present == 0, ("CPU %d added twice",
 	    apic_id));
 	cpu_info[apic_id].cpu_present = 1;
 	if (boot_cpu) {
 		KASSERT(boot_cpu_id == -1,
 		    ("CPU %d claims to be BSP, but CPU %d already is", apic_id,
 		    boot_cpu_id));
 		boot_cpu_id = apic_id;
 		cpu_info[apic_id].cpu_bsp = 1;
 	}
 	if (mp_ncpus < MAXCPU) {
 		mp_ncpus++;
 		mp_maxid = mp_ncpus - 1;
 	}
 	if (bootverbose)
 		printf("SMP: Added CPU %d (%s)\n", apic_id, boot_cpu ? "BSP" :
 		    "AP");
 }
 
 void
 cpu_mp_setmaxid(void)
 {
 
 	/*
 	 * mp_maxid should be already set by calls to cpu_add().
 	 * Just sanity check its value here.
 	 */
 	if (mp_ncpus == 0)
 		KASSERT(mp_maxid == 0,
 		    ("%s: mp_ncpus is zero, but mp_maxid is not", __func__));
 	else if (mp_ncpus == 1)
 		mp_maxid = 0;
 	else
 		KASSERT(mp_maxid >= mp_ncpus - 1,
 		    ("%s: counters out of sync: max %d, count %d", __func__,
 			mp_maxid, mp_ncpus));
 }
 
 int
 cpu_mp_probe(void)
 {
 
 	/*
 	 * Always record BSP in CPU map so that the mbuf init code works
 	 * correctly.
 	 */
 	CPU_SETOF(0, &all_cpus);
 	if (mp_ncpus == 0) {
 		/*
 		 * No CPUs were found, so this must be a UP system.  Setup
 		 * the variables to represent a system with a single CPU
 		 * with an id of 0.
 		 */
 		mp_ncpus = 1;
 		return (0);
 	}
 
 	/* At least one CPU was found. */
 	if (mp_ncpus == 1) {
 		/*
 		 * One CPU was found, so this must be a UP system with
 		 * an I/O APIC.
 		 */
 		mp_maxid = 0;
 		return (0);
 	}
 
 	/* At least two CPUs were found. */
 	return (1);
 }
 
 /*
  * Initialize the IPI handlers and start up the AP's.
  */
 void
 cpu_mp_start(void)
 {
 	int i;
 
 	/* Initialize the logical ID to APIC ID table. */
 	for (i = 0; i < MAXCPU; i++) {
 		cpu_apic_ids[i] = -1;
 		cpu_ipi_pending[i] = 0;
 	}
 
 	/* Install an inter-CPU IPI for TLB invalidation */
 	setidt(IPI_INVLTLB, IDTVEC(invltlb), SDT_SYSIGT, SEL_KPL, 0);
 	setidt(IPI_INVLPG, IDTVEC(invlpg), SDT_SYSIGT, SEL_KPL, 0);
 	setidt(IPI_INVLRNG, IDTVEC(invlrng), SDT_SYSIGT, SEL_KPL, 0);
 
 	/* Install an inter-CPU IPI for cache invalidation. */
 	setidt(IPI_INVLCACHE, IDTVEC(invlcache), SDT_SYSIGT, SEL_KPL, 0);
 
 	/* Install an inter-CPU IPI for all-CPU rendezvous */
 	setidt(IPI_RENDEZVOUS, IDTVEC(rendezvous), SDT_SYSIGT, SEL_KPL, 0);
 
 	/* Install generic inter-CPU IPI handler */
 	setidt(IPI_BITMAP_VECTOR, IDTVEC(ipi_intr_bitmap_handler),
 	       SDT_SYSIGT, SEL_KPL, 0);
 
 	/* Install an inter-CPU IPI for CPU stop/restart */
 	setidt(IPI_STOP, IDTVEC(cpustop), SDT_SYSIGT, SEL_KPL, 0);
 
 	/* Install an inter-CPU IPI for CPU suspend/resume */
 	setidt(IPI_SUSPEND, IDTVEC(cpususpend), SDT_SYSIGT, SEL_KPL, 0);
 
 	/* Set boot_cpu_id if needed. */
 	if (boot_cpu_id == -1) {
 		boot_cpu_id = PCPU_GET(apic_id);
 		cpu_info[boot_cpu_id].cpu_bsp = 1;
 	} else
 		KASSERT(boot_cpu_id == PCPU_GET(apic_id),
 		    ("BSP's APIC ID doesn't match boot_cpu_id"));
 
 	/* Probe logical/physical core configuration. */
 	topo_probe();
 
 	assign_cpu_ids();
 
 	/* Start each Application Processor */
 	start_all_aps();
 
 	set_interrupt_apic_ids();
 }
 
 
 /*
  * Print various information about the SMP system hardware and setup.
  */
 void
 cpu_mp_announce(void)
 {
 	const char *hyperthread;
 	int i;
 
 	printf("FreeBSD/SMP: %d package(s) x %d core(s)",
 	    mp_ncpus / (cpu_cores * cpu_logical), cpu_cores);
 	if (hyperthreading_cpus > 1)
 	    printf(" x %d HTT threads", cpu_logical);
 	else if (cpu_logical > 1)
 	    printf(" x %d SMT threads", cpu_logical);
 	printf("\n");
 
 	/* List active CPUs first. */
 	printf(" cpu0 (BSP): APIC ID: %2d\n", boot_cpu_id);
 	for (i = 1; i < mp_ncpus; i++) {
 		if (cpu_info[cpu_apic_ids[i]].cpu_hyperthread)
 			hyperthread = "/HT";
 		else
 			hyperthread = "";
 		printf(" cpu%d (AP%s): APIC ID: %2d\n", i, hyperthread,
 		    cpu_apic_ids[i]);
 	}
 
 	/* List disabled CPUs last. */
 	for (i = 0; i <= MAX_APIC_ID; i++) {
 		if (!cpu_info[i].cpu_present || !cpu_info[i].cpu_disabled)
 			continue;
 		if (cpu_info[i].cpu_hyperthread)
 			hyperthread = "/HT";
 		else
 			hyperthread = "";
 		printf("  cpu (AP%s): APIC ID: %2d (disabled)\n", hyperthread,
 		    i);
 	}
 }
 
 /*
  * AP CPU's call this to initialize themselves.
  */
 void
 init_secondary(void)
 {
 	struct pcpu *pc;
 	struct nmi_pcpu *np;
 	u_int64_t msr, cr0;
 	u_int cpuid;
 	int cpu, gsel_tss, x;
 	struct region_descriptor ap_gdt;
 
 	/* Set by the startup code for us to use */
 	cpu = bootAP;
 
 	/* Init tss */
 	common_tss[cpu] = common_tss[0];
 	common_tss[cpu].tss_rsp0 = 0;   /* not used until after switch */
 	common_tss[cpu].tss_iobase = sizeof(struct amd64tss) +
 	    IOPAGES * PAGE_SIZE;
 	common_tss[cpu].tss_ist1 = (long)&doublefault_stack[PAGE_SIZE];
 
 	/* The NMI stack runs on IST2. */
 	np = ((struct nmi_pcpu *) &nmi_stack[PAGE_SIZE]) - 1;
 	common_tss[cpu].tss_ist2 = (long) np;
 
 	/* Prepare private GDT */
 	gdt_segs[GPROC0_SEL].ssd_base = (long) &common_tss[cpu];
 	for (x = 0; x < NGDT; x++) {
 		if (x != GPROC0_SEL && x != (GPROC0_SEL + 1) &&
 		    x != GUSERLDT_SEL && x != (GUSERLDT_SEL + 1))
 			ssdtosd(&gdt_segs[x], &gdt[NGDT * cpu + x]);
 	}
 	ssdtosyssd(&gdt_segs[GPROC0_SEL],
 	    (struct system_segment_descriptor *)&gdt[NGDT * cpu + GPROC0_SEL]);
 	ap_gdt.rd_limit = NGDT * sizeof(gdt[0]) - 1;
 	ap_gdt.rd_base =  (long) &gdt[NGDT * cpu];
 	lgdt(&ap_gdt);			/* does magic intra-segment return */
 
 	/* Get per-cpu data */
 	pc = &__pcpu[cpu];
 
 	/* prime data page for it to use */
 	pcpu_init(pc, cpu, sizeof(struct pcpu));
 	dpcpu_init(dpcpu, cpu);
 	pc->pc_apic_id = cpu_apic_ids[cpu];
 	pc->pc_prvspace = pc;
 	pc->pc_curthread = 0;
 	pc->pc_tssp = &common_tss[cpu];
 	pc->pc_commontssp = &common_tss[cpu];
 	pc->pc_rsp0 = 0;
 	pc->pc_tss = (struct system_segment_descriptor *)&gdt[NGDT * cpu +
 	    GPROC0_SEL];
 	pc->pc_fs32p = &gdt[NGDT * cpu + GUFS32_SEL];
 	pc->pc_gs32p = &gdt[NGDT * cpu + GUGS32_SEL];
 	pc->pc_ldt = (struct system_segment_descriptor *)&gdt[NGDT * cpu +
 	    GUSERLDT_SEL];
 
 	/* Save the per-cpu pointer for use by the NMI handler. */
 	np->np_pcpu = (register_t) pc;
 
 	wrmsr(MSR_FSBASE, 0);		/* User value */
 	wrmsr(MSR_GSBASE, (u_int64_t)pc);
 	wrmsr(MSR_KGSBASE, (u_int64_t)pc);	/* XXX User value while we're in the kernel */
 
 	lidt(&r_idt);
 
 	gsel_tss = GSEL(GPROC0_SEL, SEL_KPL);
 	ltr(gsel_tss);
 
 	/*
 	 * Set to a known state:
 	 * Set by mpboot.s: CR0_PG, CR0_PE
 	 * Set by cpu_setregs: CR0_NE, CR0_MP, CR0_TS, CR0_WP, CR0_AM
 	 */
 	cr0 = rcr0();
 	cr0 &= ~(CR0_CD | CR0_NW | CR0_EM);
 	load_cr0(cr0);
 
 	/* Set up the fast syscall stuff */
 	msr = rdmsr(MSR_EFER) | EFER_SCE;
 	wrmsr(MSR_EFER, msr);
 	wrmsr(MSR_LSTAR, (u_int64_t)IDTVEC(fast_syscall));
 	wrmsr(MSR_CSTAR, (u_int64_t)IDTVEC(fast_syscall32));
 	msr = ((u_int64_t)GSEL(GCODE_SEL, SEL_KPL) << 32) |
 	      ((u_int64_t)GSEL(GUCODE32_SEL, SEL_UPL) << 48);
 	wrmsr(MSR_STAR, msr);
 	wrmsr(MSR_SF_MASK, PSL_NT|PSL_T|PSL_I|PSL_C|PSL_D);
 
 	/* Disable local APIC just to be sure. */
 	lapic_disable();
 
 	/* signal our startup to the BSP. */
 	mp_naps++;
 
 	/* Spin until the BSP releases the AP's. */
 	while (!aps_ready)
 		ia32_pause();
 
 	/* Initialize the PAT MSR. */
 	pmap_init_pat();
 
 	/* set up CPU registers and state */
 	cpu_setregs();
 
 	/* set up SSE/NX registers */
 	initializecpu();
 
 	/* set up FPU state on the AP */
 	fpuinit();
 
+#ifdef XENHVM
+	/* register vcpu_info area */
+	xen_hvm_init_cpu();
+#endif
+
 	/* A quick check from sanity claus */
 	cpuid = PCPU_GET(cpuid);
 	if (PCPU_GET(apic_id) != lapic_id()) {
 		printf("SMP: cpuid = %d\n", cpuid);
 		printf("SMP: actual apic_id = %d\n", lapic_id());
 		printf("SMP: correct apic_id = %d\n", PCPU_GET(apic_id));
 		panic("cpuid mismatch! boom!!");
 	}
 
 	/* Initialize curthread. */
 	KASSERT(PCPU_GET(idlethread) != NULL, ("no idle thread"));
 	PCPU_SET(curthread, PCPU_GET(idlethread));
 
 	mca_init();
 
 	mtx_lock_spin(&ap_boot_mtx);
 
 	/* Init local apic for irq's */
 	lapic_setup(1);
 
 	/* Set memory range attributes for this CPU to match the BSP */
 	mem_range_AP_init();
 
 	smp_cpus++;
 
 	CTR1(KTR_SMP, "SMP: AP CPU #%d Launched", cpuid);
 	printf("SMP: AP CPU #%d Launched!\n", cpuid);
 
 	/* Determine if we are a logical CPU. */
 	/* XXX Calculation depends on cpu_logical being a power of 2, e.g. 2 */
 	if (cpu_logical > 1 && PCPU_GET(apic_id) % cpu_logical != 0)
 		CPU_SET(cpuid, &logical_cpus_mask);
 
 	if (bootverbose)
 		lapic_dump("AP");
 
 	if (smp_cpus == mp_ncpus) {
 		/* enable IPI's, tlb shootdown, freezes etc */
 		atomic_store_rel_int(&smp_started, 1);
 		smp_active = 1;	 /* historic */
 	}
 
 	/*
 	 * Enable global pages TLB extension
 	 * This also implicitly flushes the TLB 
 	 */
 
 	load_cr4(rcr4() | CR4_PGE);
 	load_ds(_udatasel);
 	load_es(_udatasel);
 	load_fs(_ufssel);
 	mtx_unlock_spin(&ap_boot_mtx);
 
 	/* Wait until all the AP's are up. */
 	while (smp_started == 0)
 		ia32_pause();
 
 	/* Start per-CPU event timers. */
 	cpu_initclocks_ap();
 
 	sched_throw(NULL);
 
 	panic("scheduler returned us to %s", __func__);
 	/* NOTREACHED */
 }
 
 /*******************************************************************
  * local functions and data
  */
 
 /*
  * We tell the I/O APIC code about all the CPUs we want to receive
  * interrupts.  If we don't want certain CPUs to receive IRQs we
  * can simply not tell the I/O APIC code about them in this function.
  * We also do not tell it about the BSP since it tells itself about
  * the BSP internally to work with UP kernels and on UP machines.
  */
 static void
 set_interrupt_apic_ids(void)
 {
 	u_int i, apic_id;
 
 	for (i = 0; i < MAXCPU; i++) {
 		apic_id = cpu_apic_ids[i];
 		if (apic_id == -1)
 			continue;
 		if (cpu_info[apic_id].cpu_bsp)
 			continue;
 		if (cpu_info[apic_id].cpu_disabled)
 			continue;
 
 		/* Don't let hyperthreads service interrupts. */
 		if (hyperthreading_cpus > 1 &&
 		    apic_id % hyperthreading_cpus != 0)
 			continue;
 
 		intr_add_cpu(i);
 	}
 }
 
 /*
  * Assign logical CPU IDs to local APICs.
  */
 static void
 assign_cpu_ids(void)
 {
 	u_int i;
 
 	TUNABLE_INT_FETCH("machdep.hyperthreading_allowed",
 	    &hyperthreading_allowed);
 
 	/* Check for explicitly disabled CPUs. */
 	for (i = 0; i <= MAX_APIC_ID; i++) {
 		if (!cpu_info[i].cpu_present || cpu_info[i].cpu_bsp)
 			continue;
 
 		if (hyperthreading_cpus > 1 && i % hyperthreading_cpus != 0) {
 			cpu_info[i].cpu_hyperthread = 1;
 
 			/*
 			 * Don't use HT CPU if it has been disabled by a
 			 * tunable.
 			 */
 			if (hyperthreading_allowed == 0) {
 				cpu_info[i].cpu_disabled = 1;
 				continue;
 			}
 		}
 
 		/* Don't use this CPU if it has been disabled by a tunable. */
 		if (resource_disabled("lapic", i)) {
 			cpu_info[i].cpu_disabled = 1;
 			continue;
 		}
 	}
 
 	if (hyperthreading_allowed == 0 && hyperthreading_cpus > 1) {
 		hyperthreading_cpus = 0;
 		cpu_logical = 1;
 	}
 
 	/*
 	 * Assign CPU IDs to local APIC IDs and disable any CPUs
 	 * beyond MAXCPU.  CPU 0 is always assigned to the BSP.
 	 *
 	 * To minimize confusion for userland, we attempt to number
 	 * CPUs such that all threads and cores in a package are
 	 * grouped together.  For now we assume that the BSP is always
 	 * the first thread in a package and just start adding APs
 	 * starting with the BSP's APIC ID.
 	 */
 	mp_ncpus = 1;
 	cpu_apic_ids[0] = boot_cpu_id;
 	apic_cpuids[boot_cpu_id] = 0;
 	for (i = boot_cpu_id + 1; i != boot_cpu_id;
 	     i == MAX_APIC_ID ? i = 0 : i++) {
 		if (!cpu_info[i].cpu_present || cpu_info[i].cpu_bsp ||
 		    cpu_info[i].cpu_disabled)
 			continue;
 
 		if (mp_ncpus < MAXCPU) {
 			cpu_apic_ids[mp_ncpus] = i;
 			apic_cpuids[i] = mp_ncpus;
 			mp_ncpus++;
 		} else
 			cpu_info[i].cpu_disabled = 1;
 	}
 	KASSERT(mp_maxid >= mp_ncpus - 1,
 	    ("%s: counters out of sync: max %d, count %d", __func__, mp_maxid,
 	    mp_ncpus));		
 }
 
 /*
  * start each AP in our list
  */
 static int
 start_all_aps(void)
 {
 	vm_offset_t va = boot_address + KERNBASE;
 	u_int64_t *pt4, *pt3, *pt2;
 	u_int32_t mpbioswarmvec;
 	int apic_id, cpu, i;
 	u_char mpbiosreason;
 
 	mtx_init(&ap_boot_mtx, "ap boot", NULL, MTX_SPIN);
 
 	/* install the AP 1st level boot code */
 	pmap_kenter(va, boot_address);
 	pmap_invalidate_page(kernel_pmap, va);
 	bcopy(mptramp_start, (void *)va, bootMP_size);
 
 	/* Locate the page tables, they'll be below the trampoline */
 	pt4 = (u_int64_t *)(uintptr_t)(mptramp_pagetables + KERNBASE);
 	pt3 = pt4 + (PAGE_SIZE) / sizeof(u_int64_t);
 	pt2 = pt3 + (PAGE_SIZE) / sizeof(u_int64_t);
 
 	/* Create the initial 1GB replicated page tables */
 	for (i = 0; i < 512; i++) {
 		/* Each slot of the level 4 pages points to the same level 3 page */
 		pt4[i] = (u_int64_t)(uintptr_t)(mptramp_pagetables + PAGE_SIZE);
 		pt4[i] |= PG_V | PG_RW | PG_U;
 
 		/* Each slot of the level 3 pages points to the same level 2 page */
 		pt3[i] = (u_int64_t)(uintptr_t)(mptramp_pagetables + (2 * PAGE_SIZE));
 		pt3[i] |= PG_V | PG_RW | PG_U;
 
 		/* The level 2 page slots are mapped with 2MB pages for 1GB. */
 		pt2[i] = i * (2 * 1024 * 1024);
 		pt2[i] |= PG_V | PG_RW | PG_PS | PG_U;
 	}
 
 	/* save the current value of the warm-start vector */
 	mpbioswarmvec = *((u_int32_t *) WARMBOOT_OFF);
 	outb(CMOS_REG, BIOS_RESET);
 	mpbiosreason = inb(CMOS_DATA);
 
 	/* setup a vector to our boot code */
 	*((volatile u_short *) WARMBOOT_OFF) = WARMBOOT_TARGET;
 	*((volatile u_short *) WARMBOOT_SEG) = (boot_address >> 4);
 	outb(CMOS_REG, BIOS_RESET);
 	outb(CMOS_DATA, BIOS_WARM);	/* 'warm-start' */
 
 	/* start each AP */
 	for (cpu = 1; cpu < mp_ncpus; cpu++) {
 		apic_id = cpu_apic_ids[cpu];
 
 		/* allocate and set up an idle stack data page */
 		bootstacks[cpu] = (void *)kmem_malloc(kernel_arena,
 		    KSTACK_PAGES * PAGE_SIZE, M_WAITOK | M_ZERO);
 		doublefault_stack = (char *)kmem_malloc(kernel_arena,
 		    PAGE_SIZE, M_WAITOK | M_ZERO);
 		nmi_stack = (char *)kmem_malloc(kernel_arena, PAGE_SIZE,
 		    M_WAITOK | M_ZERO);
 		dpcpu = (void *)kmem_malloc(kernel_arena, DPCPU_SIZE,
 		    M_WAITOK | M_ZERO);
 
 		bootSTK = (char *)bootstacks[cpu] + KSTACK_PAGES * PAGE_SIZE - 8;
 		bootAP = cpu;
 
 		/* attempt to start the Application Processor */
 		if (!start_ap(apic_id)) {
 			/* restore the warmstart vector */
 			*(u_int32_t *) WARMBOOT_OFF = mpbioswarmvec;
 			panic("AP #%d (PHY# %d) failed!", cpu, apic_id);
 		}
 
 		CPU_SET(cpu, &all_cpus);	/* record AP in CPU map */
 	}
 
 	/* restore the warmstart vector */
 	*(u_int32_t *) WARMBOOT_OFF = mpbioswarmvec;
 
 	outb(CMOS_REG, BIOS_RESET);
 	outb(CMOS_DATA, mpbiosreason);
 
 	/* number of APs actually started */
 	return mp_naps;
 }
 
 
 /*
  * This function starts the AP (application processor) identified
  * by the APIC ID 'physicalCpu'.  It does quite a "song and dance"
  * to accomplish this.  This is necessary because of the nuances
  * of the different hardware we might encounter.  It isn't pretty,
  * but it seems to work.
  */
 static int
 start_ap(int apic_id)
 {
 	int vector, ms;
 	int cpus;
 
 	/* calculate the vector */
 	vector = (boot_address >> 12) & 0xff;
 
 	/* used as a watchpoint to signal AP startup */
 	cpus = mp_naps;
 
 	ipi_startup(apic_id, vector);
 
 	/* Wait up to 5 seconds for it to start. */
 	for (ms = 0; ms < 5000; ms++) {
 		if (mp_naps > cpus)
 			return 1;	/* return SUCCESS */
 		DELAY(1000);
 	}
 	return 0;		/* return FAILURE */
 }
 
 #ifdef COUNT_XINVLTLB_HITS
 u_int xhits_gbl[MAXCPU];
 u_int xhits_pg[MAXCPU];
 u_int xhits_rng[MAXCPU];
 static SYSCTL_NODE(_debug, OID_AUTO, xhits, CTLFLAG_RW, 0, "");
 SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, global, CTLFLAG_RW, &xhits_gbl,
     sizeof(xhits_gbl), "IU", "");
 SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, page, CTLFLAG_RW, &xhits_pg,
     sizeof(xhits_pg), "IU", "");
 SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, range, CTLFLAG_RW, &xhits_rng,
     sizeof(xhits_rng), "IU", "");
 
 u_int ipi_global;
 u_int ipi_page;
 u_int ipi_range;
 u_int ipi_range_size;
 SYSCTL_UINT(_debug_xhits, OID_AUTO, ipi_global, CTLFLAG_RW, &ipi_global, 0, "");
 SYSCTL_UINT(_debug_xhits, OID_AUTO, ipi_page, CTLFLAG_RW, &ipi_page, 0, "");
 SYSCTL_UINT(_debug_xhits, OID_AUTO, ipi_range, CTLFLAG_RW, &ipi_range, 0, "");
 SYSCTL_UINT(_debug_xhits, OID_AUTO, ipi_range_size, CTLFLAG_RW,
     &ipi_range_size, 0, "");
 
 u_int ipi_masked_global;
 u_int ipi_masked_page;
 u_int ipi_masked_range;
 u_int ipi_masked_range_size;
 SYSCTL_UINT(_debug_xhits, OID_AUTO, ipi_masked_global, CTLFLAG_RW,
     &ipi_masked_global, 0, "");
 SYSCTL_UINT(_debug_xhits, OID_AUTO, ipi_masked_page, CTLFLAG_RW,
     &ipi_masked_page, 0, "");
 SYSCTL_UINT(_debug_xhits, OID_AUTO, ipi_masked_range, CTLFLAG_RW,
     &ipi_masked_range, 0, "");
 SYSCTL_UINT(_debug_xhits, OID_AUTO, ipi_masked_range_size, CTLFLAG_RW,
     &ipi_masked_range_size, 0, "");
 #endif /* COUNT_XINVLTLB_HITS */
 
 /*
  * Init and startup IPI.
  */
 void
 ipi_startup(int apic_id, int vector)
 {
 
 	/*
 	 * first we do an INIT IPI: this INIT IPI might be run, resetting
 	 * and running the target CPU. OR this INIT IPI might be latched (P5
 	 * bug), CPU waiting for STARTUP IPI. OR this INIT IPI might be
 	 * ignored.
 	 */
 	lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_EDGE |
 	    APIC_LEVEL_ASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_INIT, apic_id);
 	lapic_ipi_wait(-1);
 	DELAY(10000);		/* wait ~10mS */
 
 	/*
 	 * next we do a STARTUP IPI: the previous INIT IPI might still be
 	 * latched, (P5 bug) this 1st STARTUP would then terminate
 	 * immediately, and the previously started INIT IPI would continue. OR
 	 * the previous INIT IPI has already run. and this STARTUP IPI will
 	 * run. OR the previous INIT IPI was ignored. and this STARTUP IPI
 	 * will run.
 	 */
 	lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_EDGE |
 	    APIC_LEVEL_DEASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_STARTUP |
 	    vector, apic_id);
 	lapic_ipi_wait(-1);
 	DELAY(200);		/* wait ~200uS */
 
 	/*
 	 * finally we do a 2nd STARTUP IPI: this 2nd STARTUP IPI should run IF
 	 * the previous STARTUP IPI was cancelled by a latched INIT IPI. OR
 	 * this STARTUP IPI will be ignored, as only ONE STARTUP IPI is
 	 * recognized after hardware RESET or INIT IPI.
 	 */
 	lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_EDGE |
 	    APIC_LEVEL_DEASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_STARTUP |
 	    vector, apic_id);
 	lapic_ipi_wait(-1);
 	DELAY(200);		/* wait ~200uS */
 }
 
 /*
  * Send an IPI to specified CPU handling the bitmap logic.
  */
 static void
 ipi_send_cpu(int cpu, u_int ipi)
 {
 	u_int bitmap, old_pending, new_pending;
 
 	KASSERT(cpu_apic_ids[cpu] != -1, ("IPI to non-existent CPU %d", cpu));
 
 	if (IPI_IS_BITMAPED(ipi)) {
 		bitmap = 1 << ipi;
 		ipi = IPI_BITMAP_VECTOR;
 		do {
 			old_pending = cpu_ipi_pending[cpu];
 			new_pending = old_pending | bitmap;
 		} while  (!atomic_cmpset_int(&cpu_ipi_pending[cpu],
 		    old_pending, new_pending)); 
 		if (old_pending)
 			return;
 	}
 	lapic_ipi_vectored(ipi, cpu_apic_ids[cpu]);
 }
 
 /*
  * Flush the TLB on all other CPU's
  */
 static void
 smp_tlb_shootdown(u_int vector, vm_offset_t addr1, vm_offset_t addr2)
 {
 	u_int ncpu;
 
 	ncpu = mp_ncpus - 1;	/* does not shootdown self */
 	if (ncpu < 1)
 		return;		/* no other cpus */
 	if (!(read_rflags() & PSL_I))
 		panic("%s: interrupts disabled", __func__);
 	mtx_lock_spin(&smp_ipi_mtx);
 	smp_tlb_addr1 = addr1;
 	smp_tlb_addr2 = addr2;
 	atomic_store_rel_int(&smp_tlb_wait, 0);
 	ipi_all_but_self(vector);
 	while (smp_tlb_wait < ncpu)
 		ia32_pause();
 	mtx_unlock_spin(&smp_ipi_mtx);
 }
 
 static void
 smp_targeted_tlb_shootdown(cpuset_t mask, u_int vector, vm_offset_t addr1, vm_offset_t addr2)
 {
 	int cpu, ncpu, othercpus;
 
 	othercpus = mp_ncpus - 1;
 	if (CPU_ISFULLSET(&mask)) {
 		if (othercpus < 1)
 			return;
 	} else {
 		CPU_CLR(PCPU_GET(cpuid), &mask);
 		if (CPU_EMPTY(&mask))
 			return;
 	}
 	if (!(read_rflags() & PSL_I))
 		panic("%s: interrupts disabled", __func__);
 	mtx_lock_spin(&smp_ipi_mtx);
 	smp_tlb_addr1 = addr1;
 	smp_tlb_addr2 = addr2;
 	atomic_store_rel_int(&smp_tlb_wait, 0);
 	if (CPU_ISFULLSET(&mask)) {
 		ncpu = othercpus;
 		ipi_all_but_self(vector);
 	} else {
 		ncpu = 0;
 		while ((cpu = CPU_FFS(&mask)) != 0) {
 			cpu--;
 			CPU_CLR(cpu, &mask);
 			CTR3(KTR_SMP, "%s: cpu: %d ipi: %x", __func__,
 			    cpu, vector);
 			ipi_send_cpu(cpu, vector);
 			ncpu++;
 		}
 	}
 	while (smp_tlb_wait < ncpu)
 		ia32_pause();
 	mtx_unlock_spin(&smp_ipi_mtx);
 }
 
 void
 smp_cache_flush(void)
 {
 
 	if (smp_started)
 		smp_tlb_shootdown(IPI_INVLCACHE, 0, 0);
 }
 
 void
 smp_invltlb(void)
 {
 
 	if (smp_started) {
 		smp_tlb_shootdown(IPI_INVLTLB, 0, 0);
 #ifdef COUNT_XINVLTLB_HITS
 		ipi_global++;
 #endif
 	}
 }
 
 void
 smp_invlpg(vm_offset_t addr)
 {
 
 	if (smp_started) {
 		smp_tlb_shootdown(IPI_INVLPG, addr, 0);
 #ifdef COUNT_XINVLTLB_HITS
 		ipi_page++;
 #endif
 	}
 }
 
 void
 smp_invlpg_range(vm_offset_t addr1, vm_offset_t addr2)
 {
 
 	if (smp_started) {
 		smp_tlb_shootdown(IPI_INVLRNG, addr1, addr2);
 #ifdef COUNT_XINVLTLB_HITS
 		ipi_range++;
 		ipi_range_size += (addr2 - addr1) / PAGE_SIZE;
 #endif
 	}
 }
 
 void
 smp_masked_invltlb(cpuset_t mask)
 {
 
 	if (smp_started) {
 		smp_targeted_tlb_shootdown(mask, IPI_INVLTLB, 0, 0);
 #ifdef COUNT_XINVLTLB_HITS
 		ipi_masked_global++;
 #endif
 	}
 }
 
 void
 smp_masked_invlpg(cpuset_t mask, vm_offset_t addr)
 {
 
 	if (smp_started) {
 		smp_targeted_tlb_shootdown(mask, IPI_INVLPG, addr, 0);
 #ifdef COUNT_XINVLTLB_HITS
 		ipi_masked_page++;
 #endif
 	}
 }
 
 void
 smp_masked_invlpg_range(cpuset_t mask, vm_offset_t addr1, vm_offset_t addr2)
 {
 
 	if (smp_started) {
 		smp_targeted_tlb_shootdown(mask, IPI_INVLRNG, addr1, addr2);
 #ifdef COUNT_XINVLTLB_HITS
 		ipi_masked_range++;
 		ipi_masked_range_size += (addr2 - addr1) / PAGE_SIZE;
 #endif
 	}
 }
 
 void
 ipi_bitmap_handler(struct trapframe frame)
 {
 	struct trapframe *oldframe;
 	struct thread *td;
 	int cpu = PCPU_GET(cpuid);
 	u_int ipi_bitmap;
 
 	critical_enter();
 	td = curthread;
 	td->td_intr_nesting_level++;
 	oldframe = td->td_intr_frame;
 	td->td_intr_frame = &frame;
 	ipi_bitmap = atomic_readandclear_int(&cpu_ipi_pending[cpu]);
 	if (ipi_bitmap & (1 << IPI_PREEMPT)) {
 #ifdef COUNT_IPIS
 		(*ipi_preempt_counts[cpu])++;
 #endif
 		sched_preempt(td);
 	}
 	if (ipi_bitmap & (1 << IPI_AST)) {
 #ifdef COUNT_IPIS
 		(*ipi_ast_counts[cpu])++;
 #endif
 		/* Nothing to do for AST */
 	}
 	if (ipi_bitmap & (1 << IPI_HARDCLOCK)) {
 #ifdef COUNT_IPIS
 		(*ipi_hardclock_counts[cpu])++;
 #endif
 		hardclockintr();
 	}
 	td->td_intr_frame = oldframe;
 	td->td_intr_nesting_level--;
 	critical_exit();
 }
 
 /*
  * send an IPI to a set of cpus.
  */
 void
 ipi_selected(cpuset_t cpus, u_int ipi)
 {
 	int cpu;
 
 	/*
 	 * IPI_STOP_HARD maps to a NMI and the trap handler needs a bit
 	 * of help in order to understand what is the source.
 	 * Set the mask of receiving CPUs for this purpose.
 	 */
 	if (ipi == IPI_STOP_HARD)
 		CPU_OR_ATOMIC(&ipi_nmi_pending, &cpus);
 
 	while ((cpu = CPU_FFS(&cpus)) != 0) {
 		cpu--;
 		CPU_CLR(cpu, &cpus);
 		CTR3(KTR_SMP, "%s: cpu: %d ipi: %x", __func__, cpu, ipi);
 		ipi_send_cpu(cpu, ipi);
 	}
 }
 
 /*
  * send an IPI to a specific CPU.
  */
 void
 ipi_cpu(int cpu, u_int ipi)
 {
 
 	/*
 	 * IPI_STOP_HARD maps to a NMI and the trap handler needs a bit
 	 * of help in order to understand what is the source.
 	 * Set the mask of receiving CPUs for this purpose.
 	 */
 	if (ipi == IPI_STOP_HARD)
 		CPU_SET_ATOMIC(cpu, &ipi_nmi_pending);
 
 	CTR3(KTR_SMP, "%s: cpu: %d ipi: %x", __func__, cpu, ipi);
 	ipi_send_cpu(cpu, ipi);
 }
 
 /*
  * send an IPI to all CPUs EXCEPT myself
  */
 void
 ipi_all_but_self(u_int ipi)
 {
 	cpuset_t other_cpus;
 
 	other_cpus = all_cpus;
 	CPU_CLR(PCPU_GET(cpuid), &other_cpus);
 
 	if (IPI_IS_BITMAPED(ipi)) {
 		ipi_selected(other_cpus, ipi);
 		return;
 	}
 
 	/*
 	 * IPI_STOP_HARD maps to a NMI and the trap handler needs a bit
 	 * of help in order to understand what is the source.
 	 * Set the mask of receiving CPUs for this purpose.
 	 */
 	if (ipi == IPI_STOP_HARD)
 		CPU_OR_ATOMIC(&ipi_nmi_pending, &other_cpus);
 
 	CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi);
 	lapic_ipi_vectored(ipi, APIC_IPI_DEST_OTHERS);
 }
 
 int
 ipi_nmi_handler()
 {
 	u_int cpuid;
 
 	/*
 	 * As long as there is not a simple way to know about a NMI's
 	 * source, if the bitmask for the current CPU is present in
 	 * the global pending bitword an IPI_STOP_HARD has been issued
 	 * and should be handled.
 	 */
 	cpuid = PCPU_GET(cpuid);
 	if (!CPU_ISSET(cpuid, &ipi_nmi_pending))
 		return (1);
 
 	CPU_CLR_ATOMIC(cpuid, &ipi_nmi_pending);
 	cpustop_handler();
 	return (0);
 }
      
 /*
  * Handle an IPI_STOP by saving our current context and spinning until we
  * are resumed.
  */
 void
 cpustop_handler(void)
 {
 	u_int cpu;
 
 	cpu = PCPU_GET(cpuid);
 
 	savectx(&stoppcbs[cpu]);
 
 	/* Indicate that we are stopped */
 	CPU_SET_ATOMIC(cpu, &stopped_cpus);
 
 	/* Wait for restart */
 	while (!CPU_ISSET(cpu, &started_cpus))
 	    ia32_pause();
 
 	CPU_CLR_ATOMIC(cpu, &started_cpus);
 	CPU_CLR_ATOMIC(cpu, &stopped_cpus);
 
 #ifdef DDB
 	amd64_db_resume_dbreg();
 #endif
 
 	if (cpu == 0 && cpustop_restartfunc != NULL) {
 		cpustop_restartfunc();
 		cpustop_restartfunc = NULL;
 	}
 }
 
 /*
  * Handle an IPI_SUSPEND by saving our current context and spinning until we
  * are resumed.
  */
 void
 cpususpend_handler(void)
 {
 	u_int cpu;
 
 	cpu = PCPU_GET(cpuid);
 
 	if (savectx(susppcbs[cpu])) {
 		ctx_fpusave(susppcbs[cpu]->pcb_fpususpend);
 		wbinvd();
 		CPU_SET_ATOMIC(cpu, &suspended_cpus);
 	} else {
 		pmap_init_pat();
 		initializecpu();
 		PCPU_SET(switchtime, 0);
 		PCPU_SET(switchticks, ticks);
 
 		/* Indicate that we are resumed */
 		CPU_CLR_ATOMIC(cpu, &suspended_cpus);
 	}
 
 	/* Wait for resume */
 	while (!CPU_ISSET(cpu, &started_cpus))
 		ia32_pause();
 
 	/* Resume MCA and local APIC */
 	mca_resume();
 	lapic_setup(0);
 
 	CPU_CLR_ATOMIC(cpu, &started_cpus);
 }
 
 /*
  * This is called once the rest of the system is up and running and we're
  * ready to let the AP's out of the pen.
  */
 static void
 release_aps(void *dummy __unused)
 {
 
 	if (mp_ncpus == 1) 
 		return;
 	atomic_store_rel_int(&aps_ready, 1);
 	while (smp_started == 0)
 		ia32_pause();
 }
 SYSINIT(start_aps, SI_SUB_SMP, SI_ORDER_FIRST, release_aps, NULL);
 
 #ifdef COUNT_IPIS
 /*
  * Setup interrupt counters for IPI handlers.
  */
 static void
 mp_ipi_intrcnt(void *dummy)
 {
 	char buf[64];
 	int i;
 
 	CPU_FOREACH(i) {
 		snprintf(buf, sizeof(buf), "cpu%d:invltlb", i);
 		intrcnt_add(buf, &ipi_invltlb_counts[i]);
 		snprintf(buf, sizeof(buf), "cpu%d:invlrng", i);
 		intrcnt_add(buf, &ipi_invlrng_counts[i]);
 		snprintf(buf, sizeof(buf), "cpu%d:invlpg", i);
 		intrcnt_add(buf, &ipi_invlpg_counts[i]);
 		snprintf(buf, sizeof(buf), "cpu%d:invlcache", i);
 		intrcnt_add(buf, &ipi_invlcache_counts[i]);
 		snprintf(buf, sizeof(buf), "cpu%d:preempt", i);
 		intrcnt_add(buf, &ipi_preempt_counts[i]);
 		snprintf(buf, sizeof(buf), "cpu%d:ast", i);
 		intrcnt_add(buf, &ipi_ast_counts[i]);
 		snprintf(buf, sizeof(buf), "cpu%d:rendezvous", i);
 		intrcnt_add(buf, &ipi_rendezvous_counts[i]);
 		snprintf(buf, sizeof(buf), "cpu%d:hardclock", i);
 		intrcnt_add(buf, &ipi_hardclock_counts[i]);
 	}
 }
 SYSINIT(mp_ipi_intrcnt, SI_SUB_INTR, SI_ORDER_MIDDLE, mp_ipi_intrcnt, NULL);
 #endif
 
diff --git a/sys/amd64/include/apicvar.h b/sys/amd64/include/apicvar.h
index ae2f5b90791e..9cd4c95aff8d 100644
--- a/sys/amd64/include/apicvar.h
+++ b/sys/amd64/include/apicvar.h
@@ -1,232 +1,233 @@
 /*-
  * Copyright (c) 2003 John Baldwin <jhb@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the author nor the names of any co-contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef _MACHINE_APICVAR_H_
 #define _MACHINE_APICVAR_H_
 
 #include <machine/segments.h>
 
 /*
  * Local && I/O APIC variable definitions.
  */
 
 /*
  * Layout of local APIC interrupt vectors:
  *
  *	0xff (255)  +-------------+
  *                  |             | 15 (Spurious / IPIs / Local Interrupts)
  *	0xf0 (240)  +-------------+
  *                  |             | 14 (I/O Interrupts / Timer)
  *	0xe0 (224)  +-------------+
  *                  |             | 13 (I/O Interrupts)
  *	0xd0 (208)  +-------------+
  *                  |             | 12 (I/O Interrupts)
  *	0xc0 (192)  +-------------+
  *                  |             | 11 (I/O Interrupts)
  *	0xb0 (176)  +-------------+
  *                  |             | 10 (I/O Interrupts)
  *	0xa0 (160)  +-------------+
  *                  |             | 9 (I/O Interrupts)
  *	0x90 (144)  +-------------+
  *                  |             | 8 (I/O Interrupts / System Calls)
  *	0x80 (128)  +-------------+
  *                  |             | 7 (I/O Interrupts)
  *	0x70 (112)  +-------------+
  *                  |             | 6 (I/O Interrupts)
  *	0x60 (96)   +-------------+
  *                  |             | 5 (I/O Interrupts)
  *	0x50 (80)   +-------------+
  *                  |             | 4 (I/O Interrupts)
  *	0x40 (64)   +-------------+
  *                  |             | 3 (I/O Interrupts)
  *	0x30 (48)   +-------------+
  *                  |             | 2 (ATPIC Interrupts)
  *	0x20 (32)   +-------------+
  *                  |             | 1 (Exceptions, traps, faults, etc.)
  *	0x10 (16)   +-------------+
  *                  |             | 0 (Exceptions, traps, faults, etc.)
  *	0x00 (0)    +-------------+
  *
  * Note: 0x80 needs to be handled specially and not allocated to an
  * I/O device!
  */
 
 #define	MAX_APIC_ID	0xfe
 #define	APIC_ID_ALL	0xff
 
 /* I/O Interrupts are used for external devices such as ISA, PCI, etc. */
 #define	APIC_IO_INTS	(IDT_IO_INTS + 16)
 #define	APIC_NUM_IOINTS	191
 
 /* The timer interrupt is used for clock handling and drives hardclock, etc. */
 #define	APIC_TIMER_INT	(APIC_IO_INTS + APIC_NUM_IOINTS)
 
 /*  
  ********************* !!! WARNING !!! ******************************
  * Each local apic has an interrupt receive fifo that is two entries deep
  * for each interrupt priority class (higher 4 bits of interrupt vector).
  * Once the fifo is full the APIC can no longer receive interrupts for this
  * class and sending IPIs from other CPUs will be blocked.
  * To avoid deadlocks there should be no more than two IPI interrupts
  * pending at the same time.
  * Currently this is guaranteed by dividing the IPIs in two groups that have 
  * each at most one IPI interrupt pending. The first group is protected by the
  * smp_ipi_mtx and waits for the completion of the IPI (Only one IPI user 
  * at a time) The second group uses a single interrupt and a bitmap to avoid
  * redundant IPI interrupts.
  */ 
 
 /* Interrupts for local APIC LVT entries other than the timer. */
 #define	APIC_LOCAL_INTS	240
 #define	APIC_ERROR_INT	APIC_LOCAL_INTS
 #define	APIC_THERMAL_INT (APIC_LOCAL_INTS + 1)
 #define	APIC_CMC_INT	(APIC_LOCAL_INTS + 2)
 
 #define	APIC_IPI_INTS	(APIC_LOCAL_INTS + 3)
 #define	IPI_RENDEZVOUS	(APIC_IPI_INTS)		/* Inter-CPU rendezvous. */
 #define	IPI_INVLTLB	(APIC_IPI_INTS + 1)	/* TLB Shootdown IPIs */
 #define	IPI_INVLPG	(APIC_IPI_INTS + 2)
 #define	IPI_INVLRNG	(APIC_IPI_INTS + 3)
 #define	IPI_INVLCACHE	(APIC_IPI_INTS + 4)
 /* Vector to handle bitmap based IPIs */
 #define	IPI_BITMAP_VECTOR	(APIC_IPI_INTS + 6) 
 
 /* IPIs handled by IPI_BITMAPED_VECTOR  (XXX ups is there a better place?) */
 #define	IPI_AST		0 	/* Generate software trap. */
 #define IPI_PREEMPT     1
 #define IPI_HARDCLOCK   2
 #define IPI_BITMAP_LAST IPI_HARDCLOCK
 #define IPI_IS_BITMAPED(x) ((x) <= IPI_BITMAP_LAST)
 
 #define	IPI_STOP	(APIC_IPI_INTS + 7)	/* Stop CPU until restarted. */
 #define	IPI_SUSPEND	(APIC_IPI_INTS + 8)	/* Suspend CPU until restarted. */
 #define	IPI_STOP_HARD	(APIC_IPI_INTS + 9)	/* Stop CPU with a NMI. */
 
 /*
  * The spurious interrupt can share the priority class with the IPIs since
  * it is not a normal interrupt. (Does not use the APIC's interrupt fifo)
  */
 #define	APIC_SPURIOUS_INT 255
 
 #define	LVT_LINT0	0
 #define	LVT_LINT1	1
 #define	LVT_TIMER	2
 #define	LVT_ERROR	3
 #define	LVT_PMC		4
 #define	LVT_THERMAL	5
 #define	LVT_CMCI	6
 #define	LVT_MAX		LVT_CMCI
 
 #ifndef LOCORE
 
 #define	APIC_IPI_DEST_SELF	-1
 #define	APIC_IPI_DEST_ALL	-2
 #define	APIC_IPI_DEST_OTHERS	-3
 
 #define	APIC_BUS_UNKNOWN	-1
 #define	APIC_BUS_ISA		0
 #define	APIC_BUS_EISA		1
 #define	APIC_BUS_PCI		2
 #define	APIC_BUS_MAX		APIC_BUS_PCI
 
 /*
  * An APIC enumerator is a psuedo bus driver that enumerates APIC's including
  * CPU's and I/O APIC's.
  */
 struct apic_enumerator {
 	const char *apic_name;
 	int (*apic_probe)(void);
 	int (*apic_probe_cpus)(void);
 	int (*apic_setup_local)(void);
 	int (*apic_setup_io)(void);
 	SLIST_ENTRY(apic_enumerator) apic_next;
 };
 
 inthand_t
 	IDTVEC(apic_isr1), IDTVEC(apic_isr2), IDTVEC(apic_isr3),
 	IDTVEC(apic_isr4), IDTVEC(apic_isr5), IDTVEC(apic_isr6),
 	IDTVEC(apic_isr7), IDTVEC(cmcint), IDTVEC(errorint),
 	IDTVEC(spuriousint), IDTVEC(timerint);
 
 extern vm_paddr_t lapic_paddr;
 extern int apic_cpuids[];
 
 u_int	apic_alloc_vector(u_int apic_id, u_int irq);
 u_int	apic_alloc_vectors(u_int apic_id, u_int *irqs, u_int count,
 	    u_int align);
 void	apic_disable_vector(u_int apic_id, u_int vector);
 void	apic_enable_vector(u_int apic_id, u_int vector);
 void	apic_free_vector(u_int apic_id, u_int vector, u_int irq);
 u_int	apic_idt_to_irq(u_int apic_id, u_int vector);
 void	apic_register_enumerator(struct apic_enumerator *enumerator);
 u_int	apic_cpuid(u_int apic_id);
 void	*ioapic_create(vm_paddr_t addr, int32_t apic_id, int intbase);
 int	ioapic_disable_pin(void *cookie, u_int pin);
 int	ioapic_get_vector(void *cookie, u_int pin);
 void	ioapic_register(void *cookie);
 int	ioapic_remap_vector(void *cookie, u_int pin, int vector);
 int	ioapic_set_bus(void *cookie, u_int pin, int bus_type);
 int	ioapic_set_extint(void *cookie, u_int pin);
 int	ioapic_set_nmi(void *cookie, u_int pin);
 int	ioapic_set_polarity(void *cookie, u_int pin, enum intr_polarity pol);
 int	ioapic_set_triggermode(void *cookie, u_int pin,
 	    enum intr_trigger trigger);
 int	ioapic_set_smi(void *cookie, u_int pin);
 void	lapic_create(u_int apic_id, int boot_cpu);
 void	lapic_disable(void);
 void	lapic_disable_pmc(void);
 void	lapic_dump(const char *str);
 void	lapic_enable_cmc(void);
 int	lapic_enable_pmc(void);
 void	lapic_eoi(void);
 int	lapic_id(void);
 void	lapic_init(vm_paddr_t addr);
 int	lapic_intr_pending(u_int vector);
 void	lapic_ipi_raw(register_t icrlo, u_int dest);
 void	lapic_ipi_vectored(u_int vector, int dest);
 int	lapic_ipi_wait(int delay);
 void	lapic_handle_cmc(void);
 void	lapic_handle_error(void);
 void	lapic_handle_intr(int vector, struct trapframe *frame);
 void	lapic_handle_timer(struct trapframe *frame);
 void	lapic_reenable_pmc(void);
 void	lapic_set_logical_id(u_int apic_id, u_int cluster, u_int cluster_id);
 int	lapic_set_lvt_mask(u_int apic_id, u_int lvt, u_char masked);
 int	lapic_set_lvt_mode(u_int apic_id, u_int lvt, u_int32_t mode);
 int	lapic_set_lvt_polarity(u_int apic_id, u_int lvt,
 	    enum intr_polarity pol);
 int	lapic_set_lvt_triggermode(u_int apic_id, u_int lvt,
 	    enum intr_trigger trigger);
 void	lapic_set_tpr(u_int vector);
 void	lapic_setup(int boot);
+void	xen_intr_handle_upcall(struct trapframe *frame);
 
 #endif /* !LOCORE */
 #endif /* _MACHINE_APICVAR_H_ */
diff --git a/sys/amd64/include/intr_machdep.h b/sys/amd64/include/intr_machdep.h
index 8671605f8aed..c8b454d6d783 100644
--- a/sys/amd64/include/intr_machdep.h
+++ b/sys/amd64/include/intr_machdep.h
@@ -1,174 +1,186 @@
 /*-
  * Copyright (c) 2003 John Baldwin <jhb@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef __MACHINE_INTR_MACHDEP_H__
 #define	__MACHINE_INTR_MACHDEP_H__
 
 #ifdef _KERNEL
 
 /*
  * The maximum number of I/O interrupts we allow.  This number is rather
  * arbitrary as it is just the maximum IRQ resource value.  The interrupt
  * source for a given IRQ maps that I/O interrupt to device interrupt
  * source whether it be a pin on an interrupt controller or an MSI interrupt.
  * The 16 ISA IRQs are assigned fixed IDT vectors, but all other device
  * interrupts allocate IDT vectors on demand.  Currently we have 191 IDT
  * vectors available for device interrupts.  On many systems with I/O APICs,
  * a lot of the IRQs are not used, so this number can be much larger than
  * 191 and still be safe since only interrupt sources in actual use will
  * allocate IDT vectors.
  *
  * The first 255 IRQs (0 - 254) are reserved for ISA IRQs and PCI intline IRQs.
- * IRQ values beyond 256 are used by MSI.  We leave 255 unused to avoid
- * confusion since 255 is used in PCI to indicate an invalid IRQ.
+ * IRQ values from 256 to 767 are used by MSI.  When running under the Xen
+ * Hypervisor, IRQ values from 768 to 4863 are available for binding to
+ * event channel events.  We leave 255 unused to avoid confusion since 255 is
+ * used in PCI to indicate an invalid IRQ.
  */
 #define	NUM_MSI_INTS	512
 #define	FIRST_MSI_INT	256
-#define	NUM_IO_INTS	(FIRST_MSI_INT + NUM_MSI_INTS)
+#ifdef XENHVM
+#include <xen/xen-os.h>
+#define	NUM_EVTCHN_INTS	NR_EVENT_CHANNELS
+#define	FIRST_EVTCHN_INT \
+    (FIRST_MSI_INT + NUM_MSI_INTS)
+#define	LAST_EVTCHN_INT \
+    (FIRST_EVTCHN_INT + NUM_EVTCHN_INTS - 1)
+#else
+#define	NUM_EVTCHN_INTS	0
+#endif
+#define	NUM_IO_INTS	(FIRST_MSI_INT + NUM_MSI_INTS + NUM_EVTCHN_INTS)
 
 /*
  * Default base address for MSI messages on x86 platforms.
  */
 #define	MSI_INTEL_ADDR_BASE		0xfee00000
 
 /*
  * - 1 ??? dummy counter.
  * - 2 counters for each I/O interrupt.
  * - 1 counter for each CPU for lapic timer.
  * - 8 counters for each CPU for IPI counters for SMP.
  */
 #ifdef SMP
 #define	INTRCNT_COUNT	(1 + NUM_IO_INTS * 2 + (1 + 8) * MAXCPU)
 #else
 #define	INTRCNT_COUNT	(1 + NUM_IO_INTS * 2 + 1)
 #endif
 
 #ifndef LOCORE
 
 typedef void inthand_t(u_int cs, u_int ef, u_int esp, u_int ss);
 
 #define	IDTVEC(name)	__CONCAT(X,name)
 
 struct intsrc;
 
 /*
  * Methods that a PIC provides to mask/unmask a given interrupt source,
  * "turn on" the interrupt on the CPU side by setting up an IDT entry, and
  * return the vector associated with this source.
  */
 struct pic {
 	void (*pic_enable_source)(struct intsrc *);
 	void (*pic_disable_source)(struct intsrc *, int);
 	void (*pic_eoi_source)(struct intsrc *);
 	void (*pic_enable_intr)(struct intsrc *);
 	void (*pic_disable_intr)(struct intsrc *);
 	int (*pic_vector)(struct intsrc *);
 	int (*pic_source_pending)(struct intsrc *);
 	void (*pic_suspend)(struct pic *);
 	void (*pic_resume)(struct pic *);
 	int (*pic_config_intr)(struct intsrc *, enum intr_trigger,
 	    enum intr_polarity);
 	int (*pic_assign_cpu)(struct intsrc *, u_int apic_id);
 	TAILQ_ENTRY(pic) pics;
 };
 
 /* Flags for pic_disable_source() */
 enum {
 	PIC_EOI,
 	PIC_NO_EOI,
 };
 
 /*
  * An interrupt source.  The upper-layer code uses the PIC methods to
  * control a given source.  The lower-layer PIC drivers can store additional
  * private data in a given interrupt source such as an interrupt pin number
  * or an I/O APIC pointer.
  */
 struct intsrc {
 	struct pic *is_pic;
 	struct intr_event *is_event;
 	u_long *is_count;
 	u_long *is_straycount;
 	u_int is_index;
 	u_int is_handlers;
 };
 
 struct trapframe;
 
 /*
  * The following data structure holds per-cpu data, and is placed just
  * above the top of the space used for the NMI stack.
  */
 struct nmi_pcpu {
 	register_t	np_pcpu;
 	register_t	__padding;	/* pad to 16 bytes */
 };
 
 extern struct mtx icu_lock;
 extern int elcr_found;
 
 #ifndef DEV_ATPIC
 void	atpic_reset(void);
 #endif
 /* XXX: The elcr_* prototypes probably belong somewhere else. */
 int	elcr_probe(void);
 enum intr_trigger elcr_read_trigger(u_int irq);
 void	elcr_resume(void);
 void	elcr_write_trigger(u_int irq, enum intr_trigger trigger);
 #ifdef SMP
 void	intr_add_cpu(u_int cpu);
 #endif
 int	intr_add_handler(const char *name, int vector, driver_filter_t filter, 
 			 driver_intr_t handler, void *arg, enum intr_type flags, 
 			 void **cookiep);    
 #ifdef SMP
 int	intr_bind(u_int vector, u_char cpu);
 #endif
 int	intr_config_intr(int vector, enum intr_trigger trig,
     enum intr_polarity pol);
 int	intr_describe(u_int vector, void *ih, const char *descr);
 void	intr_execute_handlers(struct intsrc *isrc, struct trapframe *frame);
 u_int	intr_next_cpu(void);
 struct intsrc *intr_lookup_source(int vector);
 int	intr_register_pic(struct pic *pic);
 int	intr_register_source(struct intsrc *isrc);
 int	intr_remove_handler(void *cookie);
 void	intr_resume(void);
 void	intr_suspend(void);
 void	intrcnt_add(const char *name, u_long **countp);
 void	nexus_add_irq(u_long irq);
 int	msi_alloc(device_t dev, int count, int maxcount, int *irqs);
 void	msi_init(void);
 int	msi_map(int irq, uint64_t *addr, uint32_t *data);
 int	msi_release(int *irqs, int count);
 int	msix_alloc(device_t dev, int *irq);
 int	msix_release(int irq);
 
 #endif	/* !LOCORE */
 #endif	/* _KERNEL */
 #endif	/* !__MACHINE_INTR_MACHDEP_H__ */
diff --git a/sys/amd64/include/pcpu.h b/sys/amd64/include/pcpu.h
index ba4c61870797..1c83c2a625ee 100644
--- a/sys/amd64/include/pcpu.h
+++ b/sys/amd64/include/pcpu.h
@@ -1,266 +1,256 @@
 /*-
  * Copyright (c) Peter Wemm <peter@netplex.com.au>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef _MACHINE_PCPU_H_
 #define	_MACHINE_PCPU_H_
 
 #ifndef _SYS_CDEFS_H_
 #error "sys/cdefs.h is a prerequisite for this file"
 #endif
 
 #if defined(XEN) || defined(XENHVM)
 #ifndef NR_VIRQS
 #define	NR_VIRQS	24
 #endif
 #ifndef NR_IPIS
 #define	NR_IPIS		2
 #endif
 #endif
 
-#ifdef XENHVM
-#define PCPU_XEN_FIELDS							\
-	;								\
-	unsigned int pc_last_processed_l1i;				\
-	unsigned int pc_last_processed_l2i
-#else
-#define PCPU_XEN_FIELDS
-#endif
-
 /*
  * The SMP parts are setup in pmap.c and locore.s for the BSP, and
  * mp_machdep.c sets up the data for the AP's to "see" when they awake.
  * The reason for doing it via a struct is so that an array of pointers
  * to each CPU's data can be set up for things like "check curproc on all
  * other processors"
  */
 #define	PCPU_MD_FIELDS							\
 	char	pc_monitorbuf[128] __aligned(128); /* cache line */	\
 	struct	pcpu *pc_prvspace;	/* Self-reference */		\
 	struct	pmap *pc_curpmap;					\
 	struct	amd64tss *pc_tssp;	/* TSS segment active on CPU */	\
 	struct	amd64tss *pc_commontssp;/* Common TSS for the CPU */	\
 	register_t pc_rsp0;						\
 	register_t pc_scratch_rsp;	/* User %rsp in syscall */	\
 	u_int	pc_apic_id;						\
 	u_int   pc_acpi_id;		/* ACPI CPU id */		\
 	/* Pointer to the CPU %fs descriptor */				\
 	struct user_segment_descriptor	*pc_fs32p;			\
 	/* Pointer to the CPU %gs descriptor */				\
 	struct user_segment_descriptor	*pc_gs32p;			\
 	/* Pointer to the CPU LDT descriptor */				\
 	struct system_segment_descriptor *pc_ldt;			\
 	/* Pointer to the CPU TSS descriptor */				\
 	struct system_segment_descriptor *pc_tss;			\
-	u_int	pc_cmci_mask		/* MCx banks for CMCI */	\
-	PCPU_XEN_FIELDS;						\
+	u_int	pc_cmci_mask;		/* MCx banks for CMCI */	\
 	uint64_t pc_dbreg[16];		/* ddb debugging regs */	\
 	int pc_dbreg_cmd;		/* ddb debugging reg cmd */	\
 	char	__pad[161]		/* be divisor of PAGE_SIZE	\
 					   after cache alignment */
 
 #define	PC_DBREG_CMD_NONE	0
 #define	PC_DBREG_CMD_LOAD	1
 
 #ifdef _KERNEL
 
 #ifdef lint
 
 extern struct pcpu *pcpup;
 
 #define	PCPU_GET(member)	(pcpup->pc_ ## member)
 #define	PCPU_ADD(member, val)	(pcpup->pc_ ## member += (val))
 #define	PCPU_INC(member)	PCPU_ADD(member, 1)
 #define	PCPU_PTR(member)	(&pcpup->pc_ ## member)
 #define	PCPU_SET(member, val)	(pcpup->pc_ ## member = (val))
 
 #elif defined(__GNUCLIKE_ASM) && defined(__GNUCLIKE___TYPEOF)
 
 /*
  * Evaluates to the byte offset of the per-cpu variable name.
  */
 #define	__pcpu_offset(name)						\
 	__offsetof(struct pcpu, name)
 
 /*
  * Evaluates to the type of the per-cpu variable name.
  */
 #define	__pcpu_type(name)						\
 	__typeof(((struct pcpu *)0)->name)
 
 /*
  * Evaluates to the address of the per-cpu variable name.
  */
 #define	__PCPU_PTR(name) __extension__ ({				\
 	__pcpu_type(name) *__p;						\
 									\
 	__asm __volatile("movq %%gs:%1,%0; addq %2,%0"			\
 	    : "=r" (__p)						\
 	    : "m" (*(struct pcpu *)(__pcpu_offset(pc_prvspace))),	\
 	      "i" (__pcpu_offset(name)));				\
 									\
 	__p;								\
 })
 
 /*
  * Evaluates to the value of the per-cpu variable name.
  */
 #define	__PCPU_GET(name) __extension__ ({				\
 	__pcpu_type(name) __res;					\
 	struct __s {							\
 		u_char	__b[MIN(sizeof(__pcpu_type(name)), 8)];		\
 	} __s;								\
 									\
 	if (sizeof(__res) == 1 || sizeof(__res) == 2 ||			\
 	    sizeof(__res) == 4 || sizeof(__res) == 8) {			\
 		__asm __volatile("mov %%gs:%1,%0"			\
 		    : "=r" (__s)					\
 		    : "m" (*(struct __s *)(__pcpu_offset(name))));	\
 		*(struct __s *)(void *)&__res = __s;			\
 	} else {							\
 		__res = *__PCPU_PTR(name);				\
 	}								\
 	__res;								\
 })
 
 /*
  * Adds the value to the per-cpu counter name.  The implementation
  * must be atomic with respect to interrupts.
  */
 #define	__PCPU_ADD(name, val) do {					\
 	__pcpu_type(name) __val;					\
 	struct __s {							\
 		u_char	__b[MIN(sizeof(__pcpu_type(name)), 8)];		\
 	} __s;								\
 									\
 	__val = (val);							\
 	if (sizeof(__val) == 1 || sizeof(__val) == 2 ||			\
 	    sizeof(__val) == 4 || sizeof(__val) == 8) {			\
 		__s = *(struct __s *)(void *)&__val;			\
 		__asm __volatile("add %1,%%gs:%0"			\
 		    : "=m" (*(struct __s *)(__pcpu_offset(name)))	\
 		    : "r" (__s));					\
 	} else								\
 		*__PCPU_PTR(name) += __val;				\
 } while (0)
 
 /*
  * Increments the value of the per-cpu counter name.  The implementation
  * must be atomic with respect to interrupts.
  */
 #define	__PCPU_INC(name) do {						\
 	CTASSERT(sizeof(__pcpu_type(name)) == 1 ||			\
 	    sizeof(__pcpu_type(name)) == 2 ||				\
 	    sizeof(__pcpu_type(name)) == 4 ||				\
 	    sizeof(__pcpu_type(name)) == 8);				\
 	if (sizeof(__pcpu_type(name)) == 1) {				\
 		__asm __volatile("incb %%gs:%0"				\
 		    : "=m" (*(__pcpu_type(name) *)(__pcpu_offset(name)))\
 		    : "m" (*(__pcpu_type(name) *)(__pcpu_offset(name))));\
 	} else if (sizeof(__pcpu_type(name)) == 2) {			\
 		__asm __volatile("incw %%gs:%0"				\
 		    : "=m" (*(__pcpu_type(name) *)(__pcpu_offset(name)))\
 		    : "m" (*(__pcpu_type(name) *)(__pcpu_offset(name))));\
 	} else if (sizeof(__pcpu_type(name)) == 4) {			\
 		__asm __volatile("incl %%gs:%0"				\
 		    : "=m" (*(__pcpu_type(name) *)(__pcpu_offset(name)))\
 		    : "m" (*(__pcpu_type(name) *)(__pcpu_offset(name))));\
 	} else if (sizeof(__pcpu_type(name)) == 8) {			\
 		__asm __volatile("incq %%gs:%0"				\
 		    : "=m" (*(__pcpu_type(name) *)(__pcpu_offset(name)))\
 		    : "m" (*(__pcpu_type(name) *)(__pcpu_offset(name))));\
 	}								\
 } while (0)
 
 /*
  * Sets the value of the per-cpu variable name to value val.
  */
 #define	__PCPU_SET(name, val) {						\
 	__pcpu_type(name) __val;					\
 	struct __s {							\
 		u_char	__b[MIN(sizeof(__pcpu_type(name)), 8)];		\
 	} __s;								\
 									\
 	__val = (val);							\
 	if (sizeof(__val) == 1 || sizeof(__val) == 2 ||			\
 	    sizeof(__val) == 4 || sizeof(__val) == 8) {			\
 		__s = *(struct __s *)(void *)&__val;			\
 		__asm __volatile("mov %1,%%gs:%0"			\
 		    : "=m" (*(struct __s *)(__pcpu_offset(name)))	\
 		    : "r" (__s));					\
 	} else {							\
 		*__PCPU_PTR(name) = __val;				\
 	}								\
 }
 
 #define	PCPU_GET(member)	__PCPU_GET(pc_ ## member)
 #define	PCPU_ADD(member, val)	__PCPU_ADD(pc_ ## member, val)
 #define	PCPU_INC(member)	__PCPU_INC(pc_ ## member)
 #define	PCPU_PTR(member)	__PCPU_PTR(pc_ ## member)
 #define	PCPU_SET(member, val)	__PCPU_SET(pc_ ## member, val)
 
 #define	OFFSETOF_CURTHREAD	0
 #ifdef __clang__
 #pragma clang diagnostic push
 #pragma clang diagnostic ignored "-Wnull-dereference"
 #endif
 static __inline __pure2 struct thread *
 __curthread(void)
 {
 	struct thread *td;
 
 	__asm("movq %%gs:%1,%0" : "=r" (td)
 	    : "m" (*(char *)OFFSETOF_CURTHREAD));
 	return (td);
 }
 #ifdef __clang__
 #pragma clang diagnostic pop
 #endif
 #define	curthread		(__curthread())
 
 #define	OFFSETOF_CURPCB		32
 static __inline __pure2 struct pcb *
 __curpcb(void)
 {
 	struct pcb *pcb;
 
 	__asm("movq %%gs:%1,%0" : "=r" (pcb) : "m" (*(char *)OFFSETOF_CURPCB));
 	return (pcb);
 }
 #define	curpcb		(__curpcb())
 
 #define	IS_BSP()	(PCPU_GET(cpuid) == 0)
 
 #else /* !lint || defined(__GNUCLIKE_ASM) && defined(__GNUCLIKE___TYPEOF) */
 
 #error "this file needs to be ported to your compiler"
 
 #endif /* lint, etc. */
 
 #endif /* _KERNEL */
 
 #endif /* !_MACHINE_PCPU_H_ */
diff --git a/sys/amd64/include/xen/hypercall.h b/sys/amd64/include/xen/hypercall.h
index 50fa376ff90b..a1b2a5cdeb3a 100644
--- a/sys/amd64/include/xen/hypercall.h
+++ b/sys/amd64/include/xen/hypercall.h
@@ -1,415 +1,417 @@
 /******************************************************************************
  * hypercall.h
  * 
- * Linux-specific hypervisor handling.
+ * FreeBSD-specific hypervisor handling.
  * 
  * Copyright (c) 2002-2004, K A Fraser
  * 
  * 64-bit updates:
  *   Benjamin Liu <benjamin.liu@intel.com>
  *   Jun Nakajima <jun.nakajima@intel.com>
  * 
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public License version 2
  * as published by the Free Software Foundation; or, when distributed
  * separately from the Linux kernel or incorporated into other
  * software packages, subject to the following license:
  * 
  * Permission is hereby granted, free of charge, to any person obtaining a copy
  * of this source file (the "Software"), to deal in the Software without
  * restriction, including without limitation the rights to use, copy, modify,
  * merge, publish, distribute, sublicense, and/or sell copies of the Software,
  * and to permit persons to whom the Software is furnished to do so, subject to
  * the following conditions:
  * 
  * The above copyright notice and this permission notice shall be included in
  * all copies or substantial portions of the Software.
  * 
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  * IN THE SOFTWARE.
+ *
+ * $FreeBSD$
  */
 
 #ifndef __MACHINE_XEN_HYPERCALL_H__
 #define __MACHINE_XEN_HYPERCALL_H__
 
 #include <sys/systm.h>
 
 #ifndef __XEN_HYPERVISOR_H__
 # error "please don't include this file directly"
 #endif
 
 #define __STR(x) #x
 #define STR(x) __STR(x)
 #define	ENOXENSYS	38
 #define CONFIG_XEN_COMPAT	0x030002
 #define __must_check
 
 #ifdef XEN
 #define HYPERCALL_STR(name)					\
 	"call hypercall_page + ("STR(__HYPERVISOR_##name)" * 32)"
 #else
 #define HYPERCALL_STR(name)					\
 	"mov $("STR(__HYPERVISOR_##name)" * 32),%%eax; "\
 	"add hypercall_stubs(%%rip),%%rax; "			\
 	"call *%%rax"
 #endif
 
 #define _hypercall0(type, name)			\
 ({						\
 	type __res;				\
 	__asm__ volatile (				\
 		HYPERCALL_STR(name)		\
 		: "=a" (__res)			\
 		:				\
 		: "memory" );			\
 	__res;					\
 })
 
 #define _hypercall1(type, name, a1)				\
 ({								\
 	type __res;						\
 	long __ign1;						\
 	__asm__ volatile (						\
 		HYPERCALL_STR(name)				\
 		: "=a" (__res), "=D" (__ign1)			\
 		: "1" ((long)(a1))				\
 		: "memory" );					\
 	__res;							\
 })
 
 #define _hypercall2(type, name, a1, a2)				\
 ({								\
 	type __res;						\
 	long __ign1, __ign2;					\
 	__asm__ volatile (						\
 		HYPERCALL_STR(name)				\
 		: "=a" (__res), "=D" (__ign1), "=S" (__ign2)	\
 		: "1" ((long)(a1)), "2" ((long)(a2))		\
 		: "memory" );					\
 	__res;							\
 })
 
 #define _hypercall3(type, name, a1, a2, a3)			\
 ({								\
 	type __res;						\
 	long __ign1, __ign2, __ign3;				\
 	__asm__ volatile (						\
 		HYPERCALL_STR(name)				\
 		: "=a" (__res), "=D" (__ign1), "=S" (__ign2), 	\
 		"=d" (__ign3)					\
 		: "1" ((long)(a1)), "2" ((long)(a2)),		\
 		"3" ((long)(a3))				\
 		: "memory" );					\
 	__res;							\
 })
 
 #define _hypercall4(type, name, a1, a2, a3, a4)			\
 ({								\
 	type __res;						\
 	long __ign1, __ign2, __ign3;				\
 	register long __arg4 __asm__("r10") = (long)(a4);		\
 	__asm__ volatile (						\
 		HYPERCALL_STR(name)				\
 		: "=a" (__res), "=D" (__ign1), "=S" (__ign2),	\
 		  "=d" (__ign3), "+r" (__arg4)			\
 		: "1" ((long)(a1)), "2" ((long)(a2)),		\
 		  "3" ((long)(a3))				\
 		: "memory" );					\
 	__res;							\
 })
 
 #define _hypercall5(type, name, a1, a2, a3, a4, a5)		\
 ({								\
 	type __res;						\
 	long __ign1, __ign2, __ign3;				\
 	register long __arg4 __asm__("r10") = (long)(a4);		\
 	register long __arg5 __asm__("r8") = (long)(a5);		\
 	__asm__ volatile (						\
 		HYPERCALL_STR(name)				\
 		: "=a" (__res), "=D" (__ign1), "=S" (__ign2),	\
 		  "=d" (__ign3), "+r" (__arg4), "+r" (__arg5)	\
 		: "1" ((long)(a1)), "2" ((long)(a2)),		\
 		  "3" ((long)(a3))				\
 		: "memory" );					\
 	__res;							\
 })
 
 static inline int __must_check
 HYPERVISOR_set_trap_table(
 	const trap_info_t *table)
 {
 	return _hypercall1(int, set_trap_table, table);
 }
 
 static inline int __must_check
 HYPERVISOR_mmu_update(
 	mmu_update_t *req, unsigned int count, unsigned int *success_count,
 	domid_t domid)
 {
 	return _hypercall4(int, mmu_update, req, count, success_count, domid);
 }
 
 static inline int __must_check
 HYPERVISOR_mmuext_op(
 	struct mmuext_op *op, unsigned int count, unsigned int *success_count,
 	domid_t domid)
 {
 	return _hypercall4(int, mmuext_op, op, count, success_count, domid);
 }
 
 static inline int __must_check
 HYPERVISOR_set_gdt(
 	unsigned long *frame_list, unsigned int entries)
 {
 	return _hypercall2(int, set_gdt, frame_list, entries);
 }
 
 static inline int __must_check
 HYPERVISOR_stack_switch(
 	unsigned long ss, unsigned long esp)
 {
 	return _hypercall2(int, stack_switch, ss, esp);
 }
 
 static inline int __must_check
 HYPERVISOR_set_callbacks(
 	unsigned long event_address, unsigned long failsafe_address, 
 	unsigned long syscall_address)
 {
 	return _hypercall3(int, set_callbacks,
 			   event_address, failsafe_address, syscall_address);
 }
 
 static inline int
 HYPERVISOR_fpu_taskswitch(
 	int set)
 {
 	return _hypercall1(int, fpu_taskswitch, set);
 }
 
 static inline int __must_check
 HYPERVISOR_sched_op_compat(
 	int cmd, unsigned long arg)
 {
 	return _hypercall2(int, sched_op_compat, cmd, arg);
 }
 
 static inline int __must_check
 HYPERVISOR_sched_op(
 	int cmd, void *arg)
 {
 	return _hypercall2(int, sched_op, cmd, arg);
 }
 
 static inline long __must_check
 HYPERVISOR_set_timer_op(
 	uint64_t timeout)
 {
 	return _hypercall1(long, set_timer_op, timeout);
 }
 
 static inline int __must_check
 HYPERVISOR_platform_op(
 	struct xen_platform_op *platform_op)
 {
 	platform_op->interface_version = XENPF_INTERFACE_VERSION;
 	return _hypercall1(int, platform_op, platform_op);
 }
 
 static inline int __must_check
 HYPERVISOR_set_debugreg(
 	unsigned int reg, unsigned long value)
 {
 	return _hypercall2(int, set_debugreg, reg, value);
 }
 
 static inline unsigned long __must_check
 HYPERVISOR_get_debugreg(
 	unsigned int reg)
 {
 	return _hypercall1(unsigned long, get_debugreg, reg);
 }
 
 static inline int __must_check
 HYPERVISOR_update_descriptor(
 	unsigned long ma, unsigned long word)
 {
 	return _hypercall2(int, update_descriptor, ma, word);
 }
 
 static inline int __must_check
 HYPERVISOR_memory_op(
 	unsigned int cmd, void *arg)
 {
 	return _hypercall2(int, memory_op, cmd, arg);
 }
 
 static inline int __must_check
 HYPERVISOR_multicall(
 	multicall_entry_t *call_list, unsigned int nr_calls)
 {
 	return _hypercall2(int, multicall, call_list, nr_calls);
 }
 
 static inline int __must_check
 HYPERVISOR_update_va_mapping(
 	unsigned long va, uint64_t new_val, unsigned long flags)
 {
 	return _hypercall3(int, update_va_mapping, va, new_val, flags);
 }
 
 static inline int __must_check
 HYPERVISOR_event_channel_op(
 	int cmd, void *arg)
 {
 	int rc = _hypercall2(int, event_channel_op, cmd, arg);
 
 #if CONFIG_XEN_COMPAT <= 0x030002
-	if (unlikely(rc == -ENOXENSYS)) {
+	if (__predict_false(rc == -ENOXENSYS)) {
 		struct evtchn_op op;
 		op.cmd = cmd;
 		memcpy(&op.u, arg, sizeof(op.u));
 		rc = _hypercall1(int, event_channel_op_compat, &op);
 		memcpy(arg, &op.u, sizeof(op.u));
 	}
 #endif
 
 	return rc;
 }
 
 static inline int __must_check
 HYPERVISOR_xen_version(
 	int cmd, void *arg)
 {
 	return _hypercall2(int, xen_version, cmd, arg);
 }
 
 static inline int __must_check
 HYPERVISOR_console_io(
 	int cmd, unsigned int count, char *str)
 {
 	return _hypercall3(int, console_io, cmd, count, str);
 }
 
 static inline int __must_check
 HYPERVISOR_physdev_op(
 	int cmd, void *arg)
 {
 	int rc = _hypercall2(int, physdev_op, cmd, arg);
 
 #if CONFIG_XEN_COMPAT <= 0x030002
-	if (unlikely(rc == -ENOXENSYS)) {
+	if (__predict_false(rc == -ENOXENSYS)) {
 		struct physdev_op op;
 		op.cmd = cmd;
 		memcpy(&op.u, arg, sizeof(op.u));
 		rc = _hypercall1(int, physdev_op_compat, &op);
 		memcpy(arg, &op.u, sizeof(op.u));
 	}
 #endif
 
 	return rc;
 }
 
 static inline int __must_check
 HYPERVISOR_grant_table_op(
 	unsigned int cmd, void *uop, unsigned int count)
 {
 	return _hypercall3(int, grant_table_op, cmd, uop, count);
 }
 
 static inline int __must_check
 HYPERVISOR_update_va_mapping_otherdomain(
 	unsigned long va, uint64_t new_val, unsigned long flags, domid_t domid)
 {
 	return _hypercall4(int, update_va_mapping_otherdomain, va,
 			   new_val, flags, domid);
 }
 
 static inline int __must_check
 HYPERVISOR_vm_assist(
 	unsigned int cmd, unsigned int type)
 {
 	return _hypercall2(int, vm_assist, cmd, type);
 }
 
 static inline int __must_check
 HYPERVISOR_vcpu_op(
 	int cmd, unsigned int vcpuid, void *extra_args)
 {
 	return _hypercall3(int, vcpu_op, cmd, vcpuid, extra_args);
 }
 
 static inline int __must_check
 HYPERVISOR_set_segment_base(
 	int reg, unsigned long value)
 {
 	return _hypercall2(int, set_segment_base, reg, value);
 }
 
 static inline int __must_check
 HYPERVISOR_suspend(
 	unsigned long srec)
 {
 	struct sched_shutdown sched_shutdown = {
 		.reason = SHUTDOWN_suspend
 	};
 
 	int rc = _hypercall3(int, sched_op, SCHEDOP_shutdown,
 			     &sched_shutdown, srec);
 
 #if CONFIG_XEN_COMPAT <= 0x030002
 	if (rc == -ENOXENSYS)
 		rc = _hypercall3(int, sched_op_compat, SCHEDOP_shutdown,
 				 SHUTDOWN_suspend, srec);
 #endif
 
 	return rc;
 }
 
 #if CONFIG_XEN_COMPAT <= 0x030002
 static inline int
 HYPERVISOR_nmi_op(
 	unsigned long op, void *arg)
 {
 	return _hypercall2(int, nmi_op, op, arg);
 }
 #endif
 
 #ifndef CONFIG_XEN
 static inline unsigned long __must_check
 HYPERVISOR_hvm_op(
     int op, void *arg)
 {
     return _hypercall2(unsigned long, hvm_op, op, arg);
 }
 #endif
 
 static inline int __must_check
 HYPERVISOR_callback_op(
 	int cmd, const void *arg)
 {
 	return _hypercall2(int, callback_op, cmd, arg);
 }
 
 static inline int __must_check
 HYPERVISOR_xenoprof_op(
 	int op, void *arg)
 {
 	return _hypercall2(int, xenoprof_op, op, arg);
 }
 
 static inline int __must_check
 HYPERVISOR_kexec_op(
 	unsigned long op, void *args)
 {
 	return _hypercall2(int, kexec_op, op, args);
 }
 
 #undef __must_check
 
 #endif /* __MACHINE_XEN_HYPERCALL_H__ */
diff --git a/sys/amd64/include/xen/xen-os.h b/sys/amd64/include/xen/xen-os.h
index 89743237ff90..ee498b9d09ac 100644
--- a/sys/amd64/include/xen/xen-os.h
+++ b/sys/amd64/include/xen/xen-os.h
@@ -1,299 +1,132 @@
 /******************************************************************************
- * os.h
+ * amd64/xen/xen-os.h
  * 
- * random collection of macros and definition
+ * Random collection of macros and definition
+ *
+ * Copyright (c) 2003, 2004 Keir Fraser (on behalf of the Xen team)
+ * All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ * 
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 
+ * DEALINGS IN THE SOFTWARE.
  *
  * $FreeBSD$
  */
 
-#ifndef _XEN_OS_H_
-#define _XEN_OS_H_
+#ifndef _MACHINE_XEN_XEN_OS_H_
+#define _MACHINE_XEN_XEN_OS_H_
 
 #ifdef PAE
 #define CONFIG_X86_PAE
 #endif
 
-#ifdef LOCORE
-#define __ASSEMBLY__
-#endif
-
-#if !defined(__XEN_INTERFACE_VERSION__)  
-#define  __XEN_INTERFACE_VERSION__ 0x00030208
-#endif  
-
-#define GRANT_REF_INVALID   0xffffffff
-
-#include <xen/interface/xen.h>
-
 /* Everything below this point is not included by assembler (.S) files. */
 #ifndef __ASSEMBLY__
 
-/* Force a proper event-channel callback from Xen. */
-void force_evtchn_callback(void);
-
-extern int gdtset;
-
-extern shared_info_t *HYPERVISOR_shared_info;
-
 /* REP NOP (PAUSE) is a good thing to insert into busy-wait loops. */
 static inline void rep_nop(void)
 {
     __asm__ __volatile__ ( "rep;nop" : : : "memory" );
 }
 #define cpu_relax() rep_nop()
 
-/* crude memory allocator for memory allocation early in 
- *  boot
- */
-void *bootmem_alloc(unsigned int size);
-void bootmem_free(void *ptr, unsigned int size);
-
-void printk(const char *fmt, ...);
-
-/* some function prototypes */
-void trap_init(void);
-
-#define likely(x)  __builtin_expect((x),1)
-#define unlikely(x)  __builtin_expect((x),0)
-
-#ifndef XENHVM
-
-/*
- * STI/CLI equivalents. These basically set and clear the virtual
- * event_enable flag in the shared_info structure. Note that when
- * the enable bit is set, there may be pending events to be handled.
- * We may therefore call into do_hypervisor_callback() directly.
- */
-
-#define __cli()                                                         \
-do {                                                                    \
-        vcpu_info_t *_vcpu;                                             \
-        _vcpu = &HYPERVISOR_shared_info->vcpu_info[PCPU_GET(cpuid)];	\
-        _vcpu->evtchn_upcall_mask = 1;                                  \
-        barrier();                                                      \
-} while (0)
-
-#define __sti()                                                         \
-do {                                                                    \
-        vcpu_info_t *_vcpu;                                             \
-        barrier();                                                      \
-        _vcpu = &HYPERVISOR_shared_info->vcpu_info[PCPU_GET(cpuid)];	\
-        _vcpu->evtchn_upcall_mask = 0;                                  \
-        barrier(); /* unmask then check (avoid races) */                \
-        if ( unlikely(_vcpu->evtchn_upcall_pending) )                   \
-                force_evtchn_callback();                                \
-} while (0)
-
-#define __restore_flags(x)                                              \
-do {                                                                    \
-        vcpu_info_t *_vcpu;                                             \
-        barrier();                                                      \
-        _vcpu = &HYPERVISOR_shared_info->vcpu_info[PCPU_GET(cpuid)];	\
-        if ((_vcpu->evtchn_upcall_mask = (x)) == 0) {                   \
-                barrier(); /* unmask then check (avoid races) */        \
-                if ( unlikely(_vcpu->evtchn_upcall_pending) )           \
-                        force_evtchn_callback();                        \
-        } 								\
-} while (0)
-
-/*
- * Add critical_{enter, exit}?
- *
- */
-#define __save_and_cli(x)                                               \
-do {                                                                    \
-        vcpu_info_t *_vcpu;                                             \
-        _vcpu = &HYPERVISOR_shared_info->vcpu_info[PCPU_GET(cpuid)];	\
-        (x) = _vcpu->evtchn_upcall_mask;                                \
-        _vcpu->evtchn_upcall_mask = 1;                                  \
-        barrier();                                                      \
-} while (0)
-
-
-#define cli() __cli()
-#define sti() __sti()
-#define save_flags(x) __save_flags(x)
-#define restore_flags(x) __restore_flags(x)
-#define save_and_cli(x) __save_and_cli(x)
-
-#define local_irq_save(x)       __save_and_cli(x)
-#define local_irq_restore(x)    __restore_flags(x)
-#define local_irq_disable()     __cli()
-#define local_irq_enable()      __sti()
-
-#define mtx_lock_irqsave(lock, x) {local_irq_save((x)); mtx_lock_spin((lock));}
-#define mtx_unlock_irqrestore(lock, x) {mtx_unlock_spin((lock)); local_irq_restore((x)); }
-#define spin_lock_irqsave mtx_lock_irqsave
-#define spin_unlock_irqrestore mtx_unlock_irqrestore
-
-#else
-#endif
-
-#ifndef xen_mb
-#define xen_mb() mb()
-#endif
-#ifndef xen_rmb
-#define xen_rmb() rmb()
-#endif
-#ifndef xen_wmb
-#define xen_wmb() wmb()
-#endif
-#ifdef SMP
-#define smp_mb() mb() 
-#define smp_rmb() rmb()
-#define smp_wmb() wmb()
-#define smp_read_barrier_depends()      read_barrier_depends()
-#define set_mb(var, value) do { xchg(&var, value); } while (0)
-#else
-#define smp_mb()        barrier()
-#define smp_rmb()       barrier()
-#define smp_wmb()       barrier()
-#define smp_read_barrier_depends()      do { } while(0)
-#define set_mb(var, value) do { var = value; barrier(); } while (0)
-#endif
-
-
 /* This is a barrier for the compiler only, NOT the processor! */
 #define barrier() __asm__ __volatile__("": : :"memory")
 
 #define LOCK_PREFIX ""
 #define LOCK ""
 #define ADDR (*(volatile long *) addr)
-/*
- * Make sure gcc doesn't try to be clever and move things around
- * on us. We need to use _exactly_ the address the user gave us,
- * not some alias that contains the same information.
- */
-typedef struct { volatile int counter; } atomic_t;
-
-
-
-#define xen_xchg(ptr,v) \
-        ((__typeof__(*(ptr)))__xchg((unsigned long)(v),(ptr),sizeof(*(ptr))))
-struct __xchg_dummy { unsigned long a[100]; };
-#define __xg(x) ((volatile struct __xchg_dummy *)(x))
-static __inline unsigned long __xchg(unsigned long x, volatile void * ptr,
-                                   int size)
-{
-    switch (size) {
-    case 1:
-        __asm__ __volatile__("xchgb %b0,%1"
-                             :"=q" (x)
-                             :"m" (*__xg(ptr)), "0" (x)
-                             :"memory");
-        break;
-    case 2:
-        __asm__ __volatile__("xchgw %w0,%1"
-                             :"=r" (x)
-                             :"m" (*__xg(ptr)), "0" (x)
-                             :"memory");
-        break;
-    case 4:
-        __asm__ __volatile__("xchgl %0,%1"
-                             :"=r" (x)
-                             :"m" (*__xg(ptr)), "0" (x)
-                             :"memory");
-        break;
-    }
-    return x;
-}
 
 /**
  * test_and_clear_bit - Clear a bit and return its old value
  * @nr: Bit to set
  * @addr: Address to count from
  *
  * This operation is atomic and cannot be reordered.  
  * It also implies a memory barrier.
  */
 static __inline int test_and_clear_bit(int nr, volatile void * addr)
 {
         int oldbit;
 
         __asm__ __volatile__( LOCK_PREFIX
                 "btrl %2,%1\n\tsbbl %0,%0"
                 :"=r" (oldbit),"=m" (ADDR)
                 :"Ir" (nr) : "memory");
         return oldbit;
 }
 
 static __inline int constant_test_bit(int nr, const volatile void * addr)
 {
     return ((1UL << (nr & 31)) & (((const volatile unsigned int *) addr)[nr >> 5])) != 0;
 }
 
 static __inline int variable_test_bit(int nr, volatile void * addr)
 {
     int oldbit;
     
     __asm__ __volatile__(
         "btl %2,%1\n\tsbbl %0,%0"
         :"=r" (oldbit)
         :"m" (ADDR),"Ir" (nr));
     return oldbit;
 }
 
 #define test_bit(nr,addr) \
 (__builtin_constant_p(nr) ? \
  constant_test_bit((nr),(addr)) : \
  variable_test_bit((nr),(addr)))
 
-
 /**
  * set_bit - Atomically set a bit in memory
  * @nr: the bit to set
  * @addr: the address to start counting from
  *
  * This function is atomic and may not be reordered.  See __set_bit()
  * if you do not require the atomic guarantees.
  * Note that @nr may be almost arbitrarily large; this function is not
  * restricted to acting on a single-word quantity.
  */
 static __inline__ void set_bit(int nr, volatile void * addr)
 {
         __asm__ __volatile__( LOCK_PREFIX
                 "btsl %1,%0"
                 :"=m" (ADDR)
                 :"Ir" (nr));
 }
 
 /**
  * clear_bit - Clears a bit in memory
  * @nr: Bit to clear
  * @addr: Address to start counting from
  *
  * clear_bit() is atomic and may not be reordered.  However, it does
  * not contain a memory barrier, so if it is used for locking purposes,
  * you should call smp_mb__before_clear_bit() and/or smp_mb__after_clear_bit()
  * in order to ensure changes are visible on other processors.
  */
 static __inline__ void clear_bit(int nr, volatile void * addr)
 {
         __asm__ __volatile__( LOCK_PREFIX
                 "btrl %1,%0"
                 :"=m" (ADDR)
                 :"Ir" (nr));
 }
 
-/**
- * atomic_inc - increment atomic variable
- * @v: pointer of type atomic_t
- * 
- * Atomically increments @v by 1.  Note that the guaranteed
- * useful range of an atomic_t is only 24 bits.
- */ 
-static __inline__ void atomic_inc(atomic_t *v)
-{
-        __asm__ __volatile__(
-                LOCK "incl %0"
-                :"=m" (v->counter)
-                :"m" (v->counter));
-}
-
-
-#define rdtscll(val) \
-     __asm__ __volatile__("rdtsc" : "=A" (val))
-
 #endif /* !__ASSEMBLY__ */
 
-#endif /* _OS_H_ */
+#endif /* _MACHINE_XEN_XEN_OS_H_ */
diff --git a/sys/conf/files b/sys/conf/files
index ec59771573ae..8fda4772420a 100644
--- a/sys/conf/files
+++ b/sys/conf/files
@@ -1,3833 +1,3831 @@
 # $FreeBSD$
 #
 # The long compile-with and dependency lines are required because of
 # limitations in config: backslash-newline doesn't work in strings, and
 # dependency lines other than the first are silently ignored.
 #
 acpi_quirks.h			optional acpi				   \
 	dependency	"$S/tools/acpi_quirks2h.awk $S/dev/acpica/acpi_quirks" \
 	compile-with	"${AWK} -f $S/tools/acpi_quirks2h.awk $S/dev/acpica/acpi_quirks" \
 	no-obj no-implicit-rule before-depend				   \
 	clean		"acpi_quirks.h"
 aicasm				optional ahc | ahd			   \
 	dependency	"$S/dev/aic7xxx/aicasm/*.[chyl]"		   \
 	compile-with	"CC='${CC}' ${MAKE} -f $S/dev/aic7xxx/aicasm/Makefile MAKESRCPATH=$S/dev/aic7xxx/aicasm" \
 	no-obj no-implicit-rule						   \
 	clean		"aicasm* y.tab.h"
 aic7xxx_seq.h			optional ahc				   \
 	compile-with	"./aicasm ${INCLUDES} -I$S/cam/scsi -I$S/dev/aic7xxx -o aic7xxx_seq.h -r aic7xxx_reg.h -p aic7xxx_reg_print.c -i $S/dev/aic7xxx/aic7xxx_osm.h $S/dev/aic7xxx/aic7xxx.seq"   \
 	no-obj no-implicit-rule before-depend local			   \
 	clean		"aic7xxx_seq.h"					   \
 	dependency	"$S/dev/aic7xxx/aic7xxx.{reg,seq} $S/cam/scsi/scsi_message.h aicasm"
 aic7xxx_reg.h			optional ahc				   \
 	compile-with	"./aicasm ${INCLUDES} -I$S/cam/scsi -I$S/dev/aic7xxx -o aic7xxx_seq.h -r aic7xxx_reg.h -p aic7xxx_reg_print.c -i $S/dev/aic7xxx/aic7xxx_osm.h $S/dev/aic7xxx/aic7xxx.seq"   \
 	no-obj no-implicit-rule before-depend local			   \
 	clean		"aic7xxx_reg.h"					   \
 	dependency	"$S/dev/aic7xxx/aic7xxx.{reg,seq} $S/cam/scsi/scsi_message.h aicasm"
 aic7xxx_reg_print.c		optional ahc				   \
 	compile-with	"./aicasm ${INCLUDES} -I$S/cam/scsi -I$S/dev/aic7xxx -o aic7xxx_seq.h -r aic7xxx_reg.h -p aic7xxx_reg_print.c -i $S/dev/aic7xxx/aic7xxx_osm.h $S/dev/aic7xxx/aic7xxx.seq"   \
 	no-obj no-implicit-rule local					   \
 	clean		"aic7xxx_reg_print.c"				   \
 	dependency	"$S/dev/aic7xxx/aic7xxx.{reg,seq} $S/cam/scsi/scsi_message.h aicasm"
 aic7xxx_reg_print.o		optional ahc ahc_reg_pretty_print	   \
 	compile-with	"${NORMAL_C}"					   \
 	no-implicit-rule local
 aic79xx_seq.h		optional ahd pci				   \
 	compile-with	"./aicasm ${INCLUDES} -I$S/cam/scsi -I$S/dev/aic7xxx -o aic79xx_seq.h -r aic79xx_reg.h -p aic79xx_reg_print.c -i $S/dev/aic7xxx/aic79xx_osm.h $S/dev/aic7xxx/aic79xx.seq"   \
 	no-obj no-implicit-rule before-depend local			   \
 	clean		"aic79xx_seq.h"					   \
 	dependency	"$S/dev/aic7xxx/aic79xx.{reg,seq} $S/cam/scsi/scsi_message.h aicasm"
 aic79xx_reg.h		optional ahd pci				   \
 	compile-with	"./aicasm ${INCLUDES} -I$S/cam/scsi -I$S/dev/aic7xxx -o aic79xx_seq.h -r aic79xx_reg.h -p aic79xx_reg_print.c -i $S/dev/aic7xxx/aic79xx_osm.h $S/dev/aic7xxx/aic79xx.seq"   \
 	no-obj no-implicit-rule before-depend local			   \
 	clean		"aic79xx_reg.h"					   \
 	dependency	"$S/dev/aic7xxx/aic79xx.{reg,seq} $S/cam/scsi/scsi_message.h aicasm"
 aic79xx_reg_print.c	optional ahd pci				   \
 	compile-with	"./aicasm ${INCLUDES} -I$S/cam/scsi -I$S/dev/aic7xxx -o aic79xx_seq.h -r aic79xx_reg.h -p aic79xx_reg_print.c -i $S/dev/aic7xxx/aic79xx_osm.h $S/dev/aic7xxx/aic79xx.seq"   \
 	no-obj no-implicit-rule local					   \
 	clean		"aic79xx_reg_print.c"				   \
 	dependency	"$S/dev/aic7xxx/aic79xx.{reg,seq} $S/cam/scsi/scsi_message.h aicasm"
 aic79xx_reg_print.o		optional ahd pci ahd_reg_pretty_print	   \
 	compile-with	"${NORMAL_C}"					   \
 	no-implicit-rule local
 #
 # The 'fdt_dtb_file' target covers an actual DTB file name, which is derived
 # from the specified source (DTS) file: <platform>.dts -> <platform>.dtb
 #
 fdt_dtb_file			optional fdt \
 	compile-with "if [ -f $S/boot/fdt/dts/${FDT_DTS_FILE} ]; then dtc -O dtb -o ${FDT_DTS_FILE:R}.dtb -b 0 -p 1024 $S/boot/fdt/dts/${FDT_DTS_FILE}; fi" \
 	no-obj no-implicit-rule before-depend	\
 	clean		"${FDT_DTS_FILE:R}.dtb"
 fdt_static_dtb.h		optional fdt fdt_dtb_static \
 	compile-with "sh $S/tools/fdt/make_dtbh.sh ${FDT_DTS_FILE} ." \
 	no-obj no-implicit-rule before-depend \
 	clean		"fdt_static_dtb.h"
 feeder_eq_gen.h			optional sound				   \
 	dependency	"$S/tools/sound/feeder_eq_mkfilter.awk"		   \
 	compile-with	"${AWK} -f $S/tools/sound/feeder_eq_mkfilter.awk -- ${FEEDER_EQ_PRESETS} > feeder_eq_gen.h" \
 	no-obj no-implicit-rule before-depend				   \
 	clean		"feeder_eq_gen.h"
 feeder_rate_gen.h		optional sound				   \
 	dependency	"$S/tools/sound/feeder_rate_mkfilter.awk"	   \
 	compile-with	"${AWK} -f $S/tools/sound/feeder_rate_mkfilter.awk -- ${FEEDER_RATE_PRESETS} > feeder_rate_gen.h" \
 	no-obj no-implicit-rule before-depend				   \
 	clean		"feeder_rate_gen.h"
 snd_fxdiv_gen.h			optional sound				   \
 	dependency	"$S/tools/sound/snd_fxdiv_gen.awk"		   \
 	compile-with	"${AWK} -f $S/tools/sound/snd_fxdiv_gen.awk -- > snd_fxdiv_gen.h" \
 	no-obj no-implicit-rule before-depend				   \
 	clean		"snd_fxdiv_gen.h"
 miidevs.h			optional miibus | mii			   \
 	dependency	"$S/tools/miidevs2h.awk $S/dev/mii/miidevs"	   \
 	compile-with	"${AWK} -f $S/tools/miidevs2h.awk $S/dev/mii/miidevs" \
 	no-obj no-implicit-rule before-depend				   \
 	clean		"miidevs.h"
 pccarddevs.h			standard				   \
 	dependency	"$S/tools/pccarddevs2h.awk $S/dev/pccard/pccarddevs" \
 	compile-with	"${AWK} -f $S/tools/pccarddevs2h.awk $S/dev/pccard/pccarddevs" \
 	no-obj no-implicit-rule before-depend				   \
 	clean		"pccarddevs.h"
 teken_state.h		optional sc					   \
 	dependency	"$S/teken/gensequences $S/teken/sequences" \
 	compile-with	"${AWK} -f $S/teken/gensequences $S/teken/sequences > teken_state.h" \
 	no-obj no-implicit-rule before-depend				   \
 	clean		"teken_state.h"
 usbdevs.h			optional usb				   \
 	dependency	"$S/tools/usbdevs2h.awk $S/dev/usb/usbdevs" \
 	compile-with	"${AWK} -f $S/tools/usbdevs2h.awk $S/dev/usb/usbdevs -h" \
 	no-obj no-implicit-rule before-depend				   \
 	clean		"usbdevs.h"
 usbdevs_data.h			optional usb				   \
 	dependency	"$S/tools/usbdevs2h.awk $S/dev/usb/usbdevs" \
 	compile-with	"${AWK} -f $S/tools/usbdevs2h.awk $S/dev/usb/usbdevs -d" \
 	no-obj no-implicit-rule before-depend				   \
 	clean		"usbdevs_data.h"
 cam/cam.c			optional scbus
 cam/cam_compat.c		optional scbus
 cam/cam_periph.c		optional scbus
 cam/cam_queue.c			optional scbus
 cam/cam_sim.c			optional scbus
 cam/cam_xpt.c			optional scbus
 cam/ata/ata_all.c		optional scbus
 cam/ata/ata_xpt.c		optional scbus
 cam/ata/ata_pmp.c		optional scbus
 cam/scsi/scsi_xpt.c		optional scbus
 cam/scsi/scsi_all.c		optional scbus
 cam/scsi/scsi_cd.c		optional cd
 cam/scsi/scsi_ch.c		optional ch
 cam/ata/ata_da.c		optional ada | da
 cam/ctl/ctl.c			optional ctl
 cam/ctl/ctl_backend.c		optional ctl
 cam/ctl/ctl_backend_block.c	optional ctl
 cam/ctl/ctl_backend_ramdisk.c	optional ctl
 cam/ctl/ctl_cmd_table.c		optional ctl
 cam/ctl/ctl_frontend.c		optional ctl
 cam/ctl/ctl_frontend_cam_sim.c	optional ctl
 cam/ctl/ctl_frontend_internal.c	optional ctl
 cam/ctl/ctl_mem_pool.c		optional ctl
 cam/ctl/ctl_scsi_all.c		optional ctl
 cam/ctl/ctl_error.c		optional ctl
 cam/ctl/ctl_util.c		optional ctl
 cam/ctl/scsi_ctl.c		optional ctl
 cam/scsi/scsi_da.c		optional da
 cam/scsi/scsi_low.c		optional ct | ncv | nsp | stg
 cam/scsi/scsi_pass.c		optional pass
 cam/scsi/scsi_pt.c		optional pt
 cam/scsi/scsi_sa.c		optional sa
 cam/scsi/scsi_enc.c		optional ses
 cam/scsi/scsi_enc_ses.c		optional ses
 cam/scsi/scsi_enc_safte.c	optional ses
 cam/scsi/scsi_sg.c		optional sg
 cam/scsi/scsi_targ_bh.c		optional targbh
 cam/scsi/scsi_target.c		optional targ
 cam/scsi/smp_all.c		optional scbus
 # shared between zfs and dtrace
 cddl/compat/opensolaris/kern/opensolaris.c				optional zfs compile-with "${ZFS_C}"
 cddl/compat/opensolaris/kern/opensolaris_cmn_err.c			optional zfs compile-with "${ZFS_C}"
 cddl/compat/opensolaris/kern/opensolaris_kmem.c				optional zfs compile-with "${ZFS_C}"
 cddl/compat/opensolaris/kern/opensolaris_misc.c				optional zfs compile-with "${ZFS_C}"
 cddl/compat/opensolaris/kern/opensolaris_sunddi.c			optional zfs compile-with "${ZFS_C}"
 # zfs specific
 cddl/compat/opensolaris/kern/opensolaris_acl.c				optional zfs compile-with "${ZFS_C}"
 cddl/compat/opensolaris/kern/opensolaris_kobj.c				optional zfs compile-with "${ZFS_C}"
 cddl/compat/opensolaris/kern/opensolaris_kstat.c			optional zfs compile-with "${ZFS_C}"
 cddl/compat/opensolaris/kern/opensolaris_lookup.c			optional zfs compile-with "${ZFS_C}"
 cddl/compat/opensolaris/kern/opensolaris_policy.c			optional zfs compile-with "${ZFS_C}"
 cddl/compat/opensolaris/kern/opensolaris_string.c			optional zfs compile-with "${ZFS_C}"
 cddl/compat/opensolaris/kern/opensolaris_sysevent.c			optional zfs compile-with "${ZFS_C}"
 cddl/compat/opensolaris/kern/opensolaris_taskq.c			optional zfs compile-with "${ZFS_C}"
 cddl/compat/opensolaris/kern/opensolaris_uio.c				optional zfs compile-with "${ZFS_C}"
 cddl/compat/opensolaris/kern/opensolaris_vfs.c				optional zfs compile-with "${ZFS_C}"
 cddl/compat/opensolaris/kern/opensolaris_vm.c				optional zfs compile-with "${ZFS_C}"
 cddl/compat/opensolaris/kern/opensolaris_zone.c				optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/common/acl/acl_common.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/common/avl/avl.c				optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/common/nvpair/fnvpair.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/common/nvpair/nvpair.c				optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/common/nvpair/nvpair_alloc_fixed.c		optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/common/unicode/u8_textprep.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/common/zfs/zfeature_common.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/common/zfs/zfs_comutil.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/common/zfs/zfs_deleg.c				optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/common/zfs/zfs_fletcher.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/common/zfs/zfs_ioctl_compat.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/common/zfs/zfs_namecheck.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/common/zfs/zfs_prop.c				optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/common/zfs/zpool_prop.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/common/zfs/zprop_common.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/gfs.c				optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/vnode.c				optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/bplist.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/bpobj.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/bptree.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/dbuf.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/ddt.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/ddt_zap.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/dmu.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_diff.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_object.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_objset.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_send.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_traverse.c		optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_tx.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_zfetch.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/dnode.c			optional zfs compile-with "${ZFS_C}" \
 	warning "kernel contains CDDL licensed ZFS filesystem"
 cddl/contrib/opensolaris/uts/common/fs/zfs/dnode_sync.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_dataset.c		optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_deadlist.c		optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_deleg.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_destroy.c		optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_dir.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_pool.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_prop.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_scan.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_userhold.c		optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_synctask.c		optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/gzip.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/lz4.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/lzjb.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/metaslab.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/refcount.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/rrwlock.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/sa.c				optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/sha256.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/spa_config.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/spa_errlog.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/spa_history.c		optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/spa_misc.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/space_map.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/trim_map.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/txg.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/uberblock.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/unique.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/vdev.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_cache.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_file.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_label.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_mirror.c		optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_missing.c		optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_queue.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_raidz.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_root.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/zap.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/zap_leaf.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/zap_micro.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/zfeature.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_acl.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_byteswap.c		optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ctldir.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_debug.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_dir.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_fm.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_fuid.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ioctl.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_log.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_onexit.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_replay.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_rlock.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_sa.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vfsops.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_znode.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/zil.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/zio.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/zio_checksum.c		optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/zio_compress.c		optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/zio_inject.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/zle.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/zrlock.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/zvol.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/os/callb.c				optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/os/fm.c				optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/os/list.c				optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/os/nvpair_alloc_system.c		optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/zmod/adler32.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/zmod/deflate.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/zmod/inffast.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/zmod/inflate.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/zmod/inftrees.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/zmod/opensolaris_crc32.c		optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/zmod/trees.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/zmod/zmod.c				optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/zmod/zmod_subr.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/zmod/zutil.c			optional zfs compile-with "${ZFS_C}"
 compat/freebsd32/freebsd32_capability.c	optional compat_freebsd32
 compat/freebsd32/freebsd32_ioctl.c	optional compat_freebsd32
 compat/freebsd32/freebsd32_misc.c	optional compat_freebsd32
 compat/freebsd32/freebsd32_syscalls.c	optional compat_freebsd32
 compat/freebsd32/freebsd32_sysent.c	optional compat_freebsd32
 contrib/altq/altq/altq_cbq.c		optional altq
 contrib/altq/altq/altq_cdnr.c		optional altq
 contrib/altq/altq/altq_hfsc.c		optional altq
 contrib/altq/altq/altq_priq.c		optional altq
 contrib/altq/altq/altq_red.c		optional altq
 contrib/altq/altq/altq_rio.c		optional altq
 contrib/altq/altq/altq_rmclass.c	optional altq
 contrib/altq/altq/altq_subr.c		optional altq
 contrib/dev/acpica/components/debugger/dbcmds.c		optional acpi acpi_debug
 contrib/dev/acpica/components/debugger/dbconvert.c	optional acpi acpi_debug
 contrib/dev/acpica/components/debugger/dbdisply.c	optional acpi acpi_debug
 contrib/dev/acpica/components/debugger/dbexec.c		optional acpi acpi_debug
 contrib/dev/acpica/components/debugger/dbfileio.c	optional acpi acpi_debug
 contrib/dev/acpica/components/debugger/dbhistry.c	optional acpi acpi_debug
 contrib/dev/acpica/components/debugger/dbinput.c	optional acpi acpi_debug
 contrib/dev/acpica/components/debugger/dbmethod.c	optional acpi acpi_debug
 contrib/dev/acpica/components/debugger/dbnames.c	optional acpi acpi_debug
 contrib/dev/acpica/components/debugger/dbstats.c	optional acpi acpi_debug
 contrib/dev/acpica/components/debugger/dbutils.c	optional acpi acpi_debug
 contrib/dev/acpica/components/debugger/dbxface.c	optional acpi acpi_debug
 contrib/dev/acpica/components/disassembler/dmbuffer.c	optional acpi acpi_debug
 contrib/dev/acpica/components/disassembler/dmdeferred.c	optional acpi acpi_debug
 contrib/dev/acpica/components/disassembler/dmnames.c	optional acpi acpi_debug
 contrib/dev/acpica/components/disassembler/dmopcode.c	optional acpi acpi_debug
 contrib/dev/acpica/components/disassembler/dmobject.c	optional acpi acpi_debug
 contrib/dev/acpica/components/disassembler/dmresrc.c	optional acpi acpi_debug
 contrib/dev/acpica/components/disassembler/dmresrcl.c	optional acpi acpi_debug
 contrib/dev/acpica/components/disassembler/dmresrcl2.c	optional acpi acpi_debug
 contrib/dev/acpica/components/disassembler/dmresrcs.c	optional acpi acpi_debug
 contrib/dev/acpica/components/disassembler/dmutils.c	optional acpi acpi_debug
 contrib/dev/acpica/components/disassembler/dmwalk.c	optional acpi acpi_debug
 contrib/dev/acpica/components/dispatcher/dsargs.c	optional acpi
 contrib/dev/acpica/components/dispatcher/dscontrol.c	optional acpi
 contrib/dev/acpica/components/dispatcher/dsfield.c	optional acpi
 contrib/dev/acpica/components/dispatcher/dsinit.c	optional acpi
 contrib/dev/acpica/components/dispatcher/dsmethod.c	optional acpi
 contrib/dev/acpica/components/dispatcher/dsmthdat.c	optional acpi
 contrib/dev/acpica/components/dispatcher/dsobject.c	optional acpi
 contrib/dev/acpica/components/dispatcher/dsopcode.c	optional acpi
 contrib/dev/acpica/components/dispatcher/dsutils.c	optional acpi
 contrib/dev/acpica/components/dispatcher/dswexec.c	optional acpi
 contrib/dev/acpica/components/dispatcher/dswload.c	optional acpi
 contrib/dev/acpica/components/dispatcher/dswload2.c	optional acpi
 contrib/dev/acpica/components/dispatcher/dswscope.c	optional acpi
 contrib/dev/acpica/components/dispatcher/dswstate.c	optional acpi
 contrib/dev/acpica/components/events/evevent.c		optional acpi
 contrib/dev/acpica/components/events/evglock.c		optional acpi
 contrib/dev/acpica/components/events/evgpe.c		optional acpi
 contrib/dev/acpica/components/events/evgpeblk.c		optional acpi
 contrib/dev/acpica/components/events/evgpeinit.c	optional acpi
 contrib/dev/acpica/components/events/evgpeutil.c	optional acpi
 contrib/dev/acpica/components/events/evhandler.c	optional acpi
 contrib/dev/acpica/components/events/evmisc.c		optional acpi
 contrib/dev/acpica/components/events/evregion.c		optional acpi
 contrib/dev/acpica/components/events/evrgnini.c		optional acpi
 contrib/dev/acpica/components/events/evsci.c		optional acpi
 contrib/dev/acpica/components/events/evxface.c		optional acpi
 contrib/dev/acpica/components/events/evxfevnt.c		optional acpi
 contrib/dev/acpica/components/events/evxfgpe.c		optional acpi
 contrib/dev/acpica/components/events/evxfregn.c		optional acpi
 contrib/dev/acpica/components/executer/exconfig.c	optional acpi
 contrib/dev/acpica/components/executer/exconvrt.c	optional acpi
 contrib/dev/acpica/components/executer/excreate.c	optional acpi
 contrib/dev/acpica/components/executer/exdebug.c	optional acpi
 contrib/dev/acpica/components/executer/exdump.c		optional acpi
 contrib/dev/acpica/components/executer/exfield.c	optional acpi
 contrib/dev/acpica/components/executer/exfldio.c	optional acpi
 contrib/dev/acpica/components/executer/exmisc.c		optional acpi
 contrib/dev/acpica/components/executer/exmutex.c	optional acpi
 contrib/dev/acpica/components/executer/exnames.c	optional acpi
 contrib/dev/acpica/components/executer/exoparg1.c	optional acpi
 contrib/dev/acpica/components/executer/exoparg2.c	optional acpi
 contrib/dev/acpica/components/executer/exoparg3.c	optional acpi
 contrib/dev/acpica/components/executer/exoparg6.c	optional acpi
 contrib/dev/acpica/components/executer/exprep.c		optional acpi
 contrib/dev/acpica/components/executer/exregion.c	optional acpi
 contrib/dev/acpica/components/executer/exresnte.c	optional acpi
 contrib/dev/acpica/components/executer/exresolv.c	optional acpi
 contrib/dev/acpica/components/executer/exresop.c	optional acpi
 contrib/dev/acpica/components/executer/exstore.c	optional acpi
 contrib/dev/acpica/components/executer/exstoren.c	optional acpi
 contrib/dev/acpica/components/executer/exstorob.c	optional acpi
 contrib/dev/acpica/components/executer/exsystem.c	optional acpi
 contrib/dev/acpica/components/executer/exutils.c	optional acpi
 contrib/dev/acpica/components/hardware/hwacpi.c		optional acpi
 contrib/dev/acpica/components/hardware/hwesleep.c	optional acpi
 contrib/dev/acpica/components/hardware/hwgpe.c		optional acpi
 contrib/dev/acpica/components/hardware/hwpci.c		optional acpi
 contrib/dev/acpica/components/hardware/hwregs.c		optional acpi
 contrib/dev/acpica/components/hardware/hwsleep.c	optional acpi
 contrib/dev/acpica/components/hardware/hwtimer.c	optional acpi
 contrib/dev/acpica/components/hardware/hwvalid.c	optional acpi
 contrib/dev/acpica/components/hardware/hwxface.c	optional acpi
 contrib/dev/acpica/components/hardware/hwxfsleep.c	optional acpi
 contrib/dev/acpica/components/namespace/nsaccess.c	optional acpi
 contrib/dev/acpica/components/namespace/nsalloc.c	optional acpi
 contrib/dev/acpica/components/namespace/nsarguments.c	optional acpi
 contrib/dev/acpica/components/namespace/nsconvert.c	optional acpi
 contrib/dev/acpica/components/namespace/nsdump.c	optional acpi
 contrib/dev/acpica/components/namespace/nseval.c	optional acpi
 contrib/dev/acpica/components/namespace/nsinit.c	optional acpi
 contrib/dev/acpica/components/namespace/nsload.c	optional acpi
 contrib/dev/acpica/components/namespace/nsnames.c	optional acpi
 contrib/dev/acpica/components/namespace/nsobject.c	optional acpi
 contrib/dev/acpica/components/namespace/nsparse.c	optional acpi
 contrib/dev/acpica/components/namespace/nspredef.c	optional acpi
 contrib/dev/acpica/components/namespace/nsprepkg.c	optional acpi
 contrib/dev/acpica/components/namespace/nsrepair.c	optional acpi
 contrib/dev/acpica/components/namespace/nsrepair2.c	optional acpi
 contrib/dev/acpica/components/namespace/nssearch.c	optional acpi
 contrib/dev/acpica/components/namespace/nsutils.c	optional acpi
 contrib/dev/acpica/components/namespace/nswalk.c	optional acpi
 contrib/dev/acpica/components/namespace/nsxfeval.c	optional acpi
 contrib/dev/acpica/components/namespace/nsxfname.c	optional acpi
 contrib/dev/acpica/components/namespace/nsxfobj.c	optional acpi
 contrib/dev/acpica/components/parser/psargs.c		optional acpi
 contrib/dev/acpica/components/parser/psloop.c		optional acpi
 contrib/dev/acpica/components/parser/psobject.c		optional acpi
 contrib/dev/acpica/components/parser/psopcode.c		optional acpi
 contrib/dev/acpica/components/parser/psopinfo.c		optional acpi
 contrib/dev/acpica/components/parser/psparse.c		optional acpi
 contrib/dev/acpica/components/parser/psscope.c		optional acpi
 contrib/dev/acpica/components/parser/pstree.c		optional acpi
 contrib/dev/acpica/components/parser/psutils.c		optional acpi
 contrib/dev/acpica/components/parser/pswalk.c		optional acpi
 contrib/dev/acpica/components/parser/psxface.c		optional acpi
 contrib/dev/acpica/components/resources/rsaddr.c	optional acpi
 contrib/dev/acpica/components/resources/rscalc.c	optional acpi
 contrib/dev/acpica/components/resources/rscreate.c	optional acpi
 contrib/dev/acpica/components/resources/rsdump.c	optional acpi
 contrib/dev/acpica/components/resources/rsdumpinfo.c	optional acpi
 contrib/dev/acpica/components/resources/rsinfo.c	optional acpi
 contrib/dev/acpica/components/resources/rsio.c		optional acpi
 contrib/dev/acpica/components/resources/rsirq.c		optional acpi
 contrib/dev/acpica/components/resources/rslist.c	optional acpi
 contrib/dev/acpica/components/resources/rsmemory.c	optional acpi
 contrib/dev/acpica/components/resources/rsmisc.c	optional acpi
 contrib/dev/acpica/components/resources/rsserial.c	optional acpi
 contrib/dev/acpica/components/resources/rsutils.c	optional acpi
 contrib/dev/acpica/components/resources/rsxface.c	optional acpi
 contrib/dev/acpica/components/tables/tbfadt.c		optional acpi
 contrib/dev/acpica/components/tables/tbfind.c		optional acpi
 contrib/dev/acpica/components/tables/tbinstal.c		optional acpi
 contrib/dev/acpica/components/tables/tbprint.c		optional acpi
 contrib/dev/acpica/components/tables/tbutils.c		optional acpi
 contrib/dev/acpica/components/tables/tbxface.c		optional acpi
 contrib/dev/acpica/components/tables/tbxfload.c		optional acpi
 contrib/dev/acpica/components/tables/tbxfroot.c		optional acpi
 contrib/dev/acpica/components/utilities/utaddress.c	optional acpi
 contrib/dev/acpica/components/utilities/utalloc.c	optional acpi
 contrib/dev/acpica/components/utilities/utbuffer.c	optional acpi
 contrib/dev/acpica/components/utilities/utcache.c	optional acpi
 contrib/dev/acpica/components/utilities/utcopy.c	optional acpi
 contrib/dev/acpica/components/utilities/utdebug.c	optional acpi
 contrib/dev/acpica/components/utilities/utdecode.c	optional acpi
 contrib/dev/acpica/components/utilities/utdelete.c	optional acpi
 contrib/dev/acpica/components/utilities/uterror.c	optional acpi
 contrib/dev/acpica/components/utilities/uteval.c	optional acpi
 contrib/dev/acpica/components/utilities/utexcep.c	optional acpi
 contrib/dev/acpica/components/utilities/utglobal.c	optional acpi
 contrib/dev/acpica/components/utilities/utids.c		optional acpi
 contrib/dev/acpica/components/utilities/utinit.c	optional acpi
 contrib/dev/acpica/components/utilities/utlock.c	optional acpi
 contrib/dev/acpica/components/utilities/utmath.c	optional acpi
 contrib/dev/acpica/components/utilities/utmisc.c	optional acpi
 contrib/dev/acpica/components/utilities/utmutex.c	optional acpi
 contrib/dev/acpica/components/utilities/utobject.c	optional acpi
 contrib/dev/acpica/components/utilities/utosi.c		optional acpi
 contrib/dev/acpica/components/utilities/utownerid.c	optional acpi
 contrib/dev/acpica/components/utilities/utpredef.c	optional acpi
 contrib/dev/acpica/components/utilities/utresrc.c	optional acpi
 contrib/dev/acpica/components/utilities/utstate.c	optional acpi
 contrib/dev/acpica/components/utilities/utstring.c	optional acpi
 contrib/dev/acpica/components/utilities/utxface.c	optional acpi
 contrib/dev/acpica/components/utilities/utxferror.c	optional acpi
 contrib/dev/acpica/components/utilities/utxfinit.c	optional acpi
 #contrib/dev/acpica/components/utilities/utxfmutex.c	optional acpi
 contrib/ipfilter/netinet/fil.c	optional ipfilter inet \
 	compile-with "${NORMAL_C} ${NO_WSELF_ASSIGN} -I$S/contrib/ipfilter"
 contrib/ipfilter/netinet/ip_auth.c optional ipfilter inet \
 	compile-with "${NORMAL_C} -I$S/contrib/ipfilter"
 contrib/ipfilter/netinet/ip_fil_freebsd.c optional ipfilter inet \
 	compile-with "${NORMAL_C} -I$S/contrib/ipfilter"
 contrib/ipfilter/netinet/ip_frag.c optional ipfilter inet \
 	compile-with "${NORMAL_C} -I$S/contrib/ipfilter"
 contrib/ipfilter/netinet/ip_log.c optional ipfilter inet \
 	compile-with "${NORMAL_C} -I$S/contrib/ipfilter"
 contrib/ipfilter/netinet/ip_nat.c optional ipfilter inet \
 	compile-with "${NORMAL_C} -I$S/contrib/ipfilter"
 contrib/ipfilter/netinet/ip_proxy.c optional ipfilter inet \
 	compile-with "${NORMAL_C} ${NO_WSELF_ASSIGN} -I$S/contrib/ipfilter"
 contrib/ipfilter/netinet/ip_state.c optional ipfilter inet \
 	compile-with "${NORMAL_C} -I$S/contrib/ipfilter"
 contrib/ipfilter/netinet/ip_lookup.c optional ipfilter inet \
 	compile-with "${NORMAL_C} ${NO_WSELF_ASSIGN} -Wno-error -I$S/contrib/ipfilter"
 contrib/ipfilter/netinet/ip_pool.c optional ipfilter inet \
 	compile-with "${NORMAL_C} -I$S/contrib/ipfilter"
 contrib/ipfilter/netinet/ip_htable.c optional ipfilter inet \
 	compile-with "${NORMAL_C} -I$S/contrib/ipfilter"
 contrib/ipfilter/netinet/ip_sync.c optional ipfilter inet \
 	compile-with "${NORMAL_C} -I$S/contrib/ipfilter"
 contrib/ipfilter/netinet/mlfk_ipl.c optional ipfilter inet \
 	compile-with "${NORMAL_C} -I$S/contrib/ipfilter"
 contrib/libfdt/fdt.c		optional fdt
 contrib/libfdt/fdt_ro.c		optional fdt
 contrib/libfdt/fdt_rw.c		optional fdt
 contrib/libfdt/fdt_strerror.c	optional fdt
 contrib/libfdt/fdt_sw.c		optional fdt
 contrib/libfdt/fdt_wip.c	optional fdt
 contrib/ngatm/netnatm/api/cc_conn.c optional ngatm_ccatm \
 	compile-with "${NORMAL_C_NOWERROR} -I$S/contrib/ngatm"
 contrib/ngatm/netnatm/api/cc_data.c optional ngatm_ccatm \
 	compile-with "${NORMAL_C} -I$S/contrib/ngatm"
 contrib/ngatm/netnatm/api/cc_dump.c optional ngatm_ccatm \
 	compile-with "${NORMAL_C} -I$S/contrib/ngatm"
 contrib/ngatm/netnatm/api/cc_port.c optional ngatm_ccatm \
 	compile-with "${NORMAL_C} -I$S/contrib/ngatm"
 contrib/ngatm/netnatm/api/cc_sig.c optional ngatm_ccatm \
 	compile-with "${NORMAL_C} -I$S/contrib/ngatm"
 contrib/ngatm/netnatm/api/cc_user.c optional ngatm_ccatm \
 	compile-with "${NORMAL_C} -I$S/contrib/ngatm"
 contrib/ngatm/netnatm/api/unisap.c optional ngatm_ccatm \
 	compile-with "${NORMAL_C} -I$S/contrib/ngatm"
 contrib/ngatm/netnatm/misc/straddr.c optional ngatm_atmbase \
 	compile-with "${NORMAL_C} -I$S/contrib/ngatm"
 contrib/ngatm/netnatm/misc/unimsg_common.c optional ngatm_atmbase \
 	compile-with "${NORMAL_C} -I$S/contrib/ngatm"
 contrib/ngatm/netnatm/msg/traffic.c optional ngatm_atmbase \
 	compile-with "${NORMAL_C} -I$S/contrib/ngatm"
 contrib/ngatm/netnatm/msg/uni_ie.c optional ngatm_atmbase \
 	compile-with "${NORMAL_C} -I$S/contrib/ngatm"
 contrib/ngatm/netnatm/msg/uni_msg.c optional ngatm_atmbase \
 	compile-with "${NORMAL_C} -I$S/contrib/ngatm"
 contrib/ngatm/netnatm/saal/saal_sscfu.c	optional ngatm_sscfu \
 	compile-with "${NORMAL_C} -I$S/contrib/ngatm"
 contrib/ngatm/netnatm/saal/saal_sscop.c	optional ngatm_sscop \
 	compile-with "${NORMAL_C} -I$S/contrib/ngatm"
 contrib/ngatm/netnatm/sig/sig_call.c optional ngatm_uni \
 	compile-with "${NORMAL_C} -I$S/contrib/ngatm"
 contrib/ngatm/netnatm/sig/sig_coord.c optional ngatm_uni \
 	compile-with "${NORMAL_C} -I$S/contrib/ngatm"
 contrib/ngatm/netnatm/sig/sig_party.c optional ngatm_uni \
 	compile-with "${NORMAL_C} -I$S/contrib/ngatm"
 contrib/ngatm/netnatm/sig/sig_print.c optional ngatm_uni \
 	compile-with "${NORMAL_C} -I$S/contrib/ngatm"
 contrib/ngatm/netnatm/sig/sig_reset.c optional ngatm_uni \
 	compile-with "${NORMAL_C} -I$S/contrib/ngatm"
 contrib/ngatm/netnatm/sig/sig_uni.c optional ngatm_uni \
 	compile-with "${NORMAL_C} -I$S/contrib/ngatm"
 contrib/ngatm/netnatm/sig/sig_unimsgcpy.c optional ngatm_uni \
 	compile-with "${NORMAL_C} -I$S/contrib/ngatm"
 contrib/ngatm/netnatm/sig/sig_verify.c optional ngatm_uni \
 	compile-with "${NORMAL_C} -I$S/contrib/ngatm"
 crypto/blowfish/bf_ecb.c	optional ipsec
 crypto/blowfish/bf_skey.c	optional crypto | ipsec
 crypto/camellia/camellia.c	optional crypto | ipsec
 crypto/camellia/camellia-api.c	optional crypto | ipsec
 crypto/des/des_ecb.c		optional crypto | ipsec | netsmb
 crypto/des/des_setkey.c		optional crypto | ipsec | netsmb
 crypto/rc4/rc4.c		optional netgraph_mppc_encryption | kgssapi
 crypto/rijndael/rijndael-alg-fst.c optional crypto | geom_bde | \
 					 ipsec | random | wlan_ccmp
 crypto/rijndael/rijndael-api-fst.c optional geom_bde | random
 crypto/rijndael/rijndael-api.c	optional crypto | ipsec | wlan_ccmp
 crypto/sha1.c			optional carp | crypto | ipsec | \
 					 netgraph_mppc_encryption | sctp
 crypto/sha2/sha2.c		optional crypto | geom_bde | ipsec | random | \
 					 sctp | zfs
 crypto/siphash/siphash.c	optional inet | inet6
 crypto/siphash/siphash_test.c	optional inet | inet6
 ddb/db_access.c			optional ddb
 ddb/db_break.c			optional ddb
 ddb/db_capture.c		optional ddb
 ddb/db_command.c		optional ddb
 ddb/db_examine.c		optional ddb
 ddb/db_expr.c			optional ddb
 ddb/db_input.c			optional ddb
 ddb/db_lex.c			optional ddb
 ddb/db_main.c			optional ddb
 ddb/db_output.c			optional ddb
 ddb/db_print.c			optional ddb
 ddb/db_ps.c			optional ddb
 ddb/db_run.c			optional ddb
 ddb/db_script.c			optional ddb
 ddb/db_sym.c			optional ddb
 ddb/db_thread.c			optional ddb
 ddb/db_textdump.c		optional ddb
 ddb/db_variables.c		optional ddb
 ddb/db_watch.c			optional ddb
 ddb/db_write_cmd.c		optional ddb
 #dev/dpt/dpt_control.c		optional dpt
 dev/aac/aac.c			optional aac
 dev/aac/aac_cam.c		optional aacp aac
 dev/aac/aac_debug.c		optional aac
 dev/aac/aac_disk.c		optional aac
 dev/aac/aac_linux.c		optional aac compat_linux
 dev/aac/aac_pci.c		optional aac pci
 dev/aacraid/aacraid.c		optional aacraid
 dev/aacraid/aacraid_cam.c	optional aacraid scbus
 dev/aacraid/aacraid_debug.c	optional aacraid
 dev/aacraid/aacraid_linux.c	optional aacraid compat_linux
 dev/aacraid/aacraid_pci.c	optional aacraid pci
 dev/acpi_support/acpi_wmi.c	optional acpi_wmi acpi
 dev/acpi_support/acpi_asus.c	optional acpi_asus acpi
 dev/acpi_support/acpi_asus_wmi.c	optional acpi_asus_wmi acpi
 dev/acpi_support/acpi_fujitsu.c	optional acpi_fujitsu acpi
 dev/acpi_support/acpi_hp.c	optional acpi_hp acpi
 dev/acpi_support/acpi_ibm.c	optional acpi_ibm acpi
 dev/acpi_support/acpi_panasonic.c optional acpi_panasonic acpi
 dev/acpi_support/acpi_sony.c	optional acpi_sony acpi
 dev/acpi_support/acpi_toshiba.c	optional acpi_toshiba acpi
 dev/acpi_support/atk0110.c	optional aibs acpi
 dev/acpica/Osd/OsdDebug.c	optional acpi
 dev/acpica/Osd/OsdHardware.c	optional acpi
 dev/acpica/Osd/OsdInterrupt.c	optional acpi
 dev/acpica/Osd/OsdMemory.c	optional acpi
 dev/acpica/Osd/OsdSchedule.c	optional acpi
 dev/acpica/Osd/OsdStream.c	optional acpi
 dev/acpica/Osd/OsdSynch.c	optional acpi
 dev/acpica/Osd/OsdTable.c	optional acpi
 dev/acpica/acpi.c		optional acpi
 dev/acpica/acpi_acad.c		optional acpi
 dev/acpica/acpi_battery.c	optional acpi
 dev/acpica/acpi_button.c	optional acpi
 dev/acpica/acpi_cmbat.c		optional acpi
 dev/acpica/acpi_cpu.c		optional acpi
 dev/acpica/acpi_ec.c		optional acpi
 dev/acpica/acpi_hpet.c		optional acpi
 dev/acpica/acpi_isab.c		optional acpi isa
 dev/acpica/acpi_lid.c		optional acpi
 dev/acpica/acpi_package.c	optional acpi
 dev/acpica/acpi_pci.c		optional acpi pci
 dev/acpica/acpi_pci_link.c	optional acpi pci
 dev/acpica/acpi_pcib.c		optional acpi pci
 dev/acpica/acpi_pcib_acpi.c	optional acpi pci
 dev/acpica/acpi_pcib_pci.c	optional acpi pci
 dev/acpica/acpi_perf.c		optional acpi
 dev/acpica/acpi_powerres.c	optional acpi
 dev/acpica/acpi_quirk.c		optional acpi
 dev/acpica/acpi_resource.c	optional acpi
 dev/acpica/acpi_smbat.c		optional acpi
 dev/acpica/acpi_thermal.c	optional acpi
 dev/acpica/acpi_throttle.c	optional acpi
 dev/acpica/acpi_timer.c		optional acpi
 dev/acpica/acpi_video.c		optional acpi_video acpi
 dev/acpica/acpi_dock.c		optional acpi_dock acpi
 dev/adlink/adlink.c		optional adlink
 dev/advansys/adv_eisa.c		optional adv eisa
 dev/advansys/adv_pci.c		optional adv pci
 dev/advansys/advansys.c		optional adv
 dev/advansys/advlib.c		optional adv
 dev/advansys/advmcode.c		optional adv
 dev/advansys/adw_pci.c		optional adw pci
 dev/advansys/adwcam.c		optional adw
 dev/advansys/adwlib.c		optional adw
 dev/advansys/adwmcode.c		optional adw
 dev/ae/if_ae.c			optional ae pci
 dev/age/if_age.c		optional age pci
 dev/agp/agp.c			optional agp pci
 dev/agp/agp_if.m		optional agp pci
 dev/aha/aha.c			optional aha
 dev/aha/aha_isa.c		optional aha isa
 dev/aha/aha_mca.c		optional aha mca
 dev/ahb/ahb.c			optional ahb eisa
 dev/ahci/ahci.c			optional ahci pci
 dev/ahci/ahciem.c		optional ahci pci
 dev/aic/aic.c			optional aic
 dev/aic/aic_pccard.c		optional aic pccard
 dev/aic7xxx/ahc_eisa.c		optional ahc eisa
 dev/aic7xxx/ahc_isa.c		optional ahc isa
 dev/aic7xxx/ahc_pci.c		optional ahc pci \
 	compile-with "${NORMAL_C} ${NO_WCONSTANT_CONVERSION}"
 dev/aic7xxx/ahd_pci.c		optional ahd pci \
 	compile-with "${NORMAL_C} ${NO_WCONSTANT_CONVERSION}"
 dev/aic7xxx/aic7770.c		optional ahc
 dev/aic7xxx/aic79xx.c		optional ahd pci
 dev/aic7xxx/aic79xx_osm.c	optional ahd pci
 dev/aic7xxx/aic79xx_pci.c	optional ahd pci
 dev/aic7xxx/aic7xxx.c		optional ahc
 dev/aic7xxx/aic7xxx_93cx6.c	optional ahc
 dev/aic7xxx/aic7xxx_osm.c	optional ahc
 dev/aic7xxx/aic7xxx_pci.c	optional ahc pci
 dev/alc/if_alc.c		optional alc pci
 dev/ale/if_ale.c		optional ale pci
 dev/altera/avgen/altera_avgen.c		optional altera_avgen
 dev/altera/avgen/altera_avgen_fdt.c	optional altera_avgen fdt
 dev/altera/avgen/altera_avgen_nexus.c	optional altera_avgen
 dev/altera/sdcard/altera_sdcard.c	optional altera_sdcard
 dev/altera/sdcard/altera_sdcard_disk.c	optional altera_sdcard
 dev/altera/sdcard/altera_sdcard_io.c	optional altera_sdcard
 dev/altera/sdcard/altera_sdcard_fdt.c	optional altera_sdcard fdt
 dev/altera/sdcard/altera_sdcard_nexus.c	optional altera_sdcard
 dev/amr/amr.c			optional amr
 dev/amr/amr_cam.c		optional amrp amr
 dev/amr/amr_disk.c		optional amr
 dev/amr/amr_linux.c		optional amr compat_linux
 dev/amr/amr_pci.c		optional amr pci
 dev/an/if_an.c			optional an
 dev/an/if_an_isa.c		optional an isa
 dev/an/if_an_pccard.c		optional an pccard
 dev/an/if_an_pci.c		optional an pci
 dev/asr/asr.c			optional asr pci \
 	compile-with "${NORMAL_C} ${NO_WARRAY_BOUNDS}"
 #
 dev/ata/ata_if.m		optional ata | atacore
 dev/ata/ata-all.c		optional ata | atacore
 dev/ata/ata-dma.c		optional ata | atacore
 dev/ata/ata-lowlevel.c		optional ata | atacore
 dev/ata/ata-sata.c		optional ata | atacore
 dev/ata/ata-card.c		optional ata pccard | atapccard
 dev/ata/ata-cbus.c		optional ata pc98 | atapc98
 dev/ata/ata-isa.c		optional ata isa | ataisa
 dev/ata/ata-pci.c		optional ata pci | atapci
 dev/ata/chipsets/ata-ahci.c	optional ata pci | ataahci | ataacerlabs | \
 					 ataati | ataintel | atajmicron | \
 					 atavia | atanvidia
 dev/ata/chipsets/ata-acard.c	optional ata pci | ataacard
 dev/ata/chipsets/ata-acerlabs.c	optional ata pci | ataacerlabs
 dev/ata/chipsets/ata-adaptec.c	optional ata pci | ataadaptec
 dev/ata/chipsets/ata-amd.c	optional ata pci | ataamd
 dev/ata/chipsets/ata-ati.c	optional ata pci | ataati
 dev/ata/chipsets/ata-cenatek.c	optional ata pci | atacenatek
 dev/ata/chipsets/ata-cypress.c	optional ata pci | atacypress
 dev/ata/chipsets/ata-cyrix.c	optional ata pci | atacyrix
 dev/ata/chipsets/ata-highpoint.c	optional ata pci | atahighpoint
 dev/ata/chipsets/ata-intel.c	optional ata pci | ataintel
 dev/ata/chipsets/ata-ite.c	optional ata pci | ataite
 dev/ata/chipsets/ata-jmicron.c	optional ata pci | atajmicron
 dev/ata/chipsets/ata-marvell.c	optional ata pci | atamarvell | ataadaptec
 dev/ata/chipsets/ata-micron.c	optional ata pci | atamicron
 dev/ata/chipsets/ata-national.c	optional ata pci | atanational
 dev/ata/chipsets/ata-netcell.c	optional ata pci | atanetcell
 dev/ata/chipsets/ata-nvidia.c	optional ata pci | atanvidia
 dev/ata/chipsets/ata-promise.c	optional ata pci | atapromise
 dev/ata/chipsets/ata-serverworks.c	optional ata pci | ataserverworks
 dev/ata/chipsets/ata-siliconimage.c	optional ata pci | atasiliconimage | ataati
 dev/ata/chipsets/ata-sis.c	optional ata pci | atasis
 dev/ata/chipsets/ata-via.c	optional ata pci | atavia
 #
 dev/ath/if_ath_pci.c		optional ath_pci pci \
 	compile-with "${NORMAL_C} -I$S/dev/ath"
 #
 dev/ath/if_ath_ahb.c		optional ath_ahb \
 	compile-with "${NORMAL_C} -I$S/dev/ath"
 #
 dev/ath/if_ath.c		optional ath \
 	compile-with "${NORMAL_C} -I$S/dev/ath"
 dev/ath/if_ath_beacon.c		optional ath \
 	compile-with "${NORMAL_C} -I$S/dev/ath"
 dev/ath/if_ath_btcoex.c		optional ath \
 	compile-with "${NORMAL_C} -I$S/dev/ath"
 dev/ath/if_ath_debug.c		optional ath \
 	compile-with "${NORMAL_C} -I$S/dev/ath"
 dev/ath/if_ath_keycache.c	optional ath \
 	compile-with "${NORMAL_C} -I$S/dev/ath"
 dev/ath/if_ath_led.c		optional ath \
 	compile-with "${NORMAL_C} -I$S/dev/ath"
 dev/ath/if_ath_lna_div.c	optional ath \
 	compile-with "${NORMAL_C} -I$S/dev/ath"
 dev/ath/if_ath_tx.c		optional ath \
 	compile-with "${NORMAL_C} -I$S/dev/ath"
 dev/ath/if_ath_tx_edma.c	optional ath \
 	compile-with "${NORMAL_C} -I$S/dev/ath"
 dev/ath/if_ath_tx_ht.c		optional ath \
 	compile-with "${NORMAL_C} -I$S/dev/ath"
 dev/ath/if_ath_tdma.c		optional ath \
 	compile-with "${NORMAL_C} -I$S/dev/ath"
 dev/ath/if_ath_sysctl.c		optional ath \
 	compile-with "${NORMAL_C} -I$S/dev/ath"
 dev/ath/if_ath_rx.c		optional ath \
 	compile-with "${NORMAL_C} -I$S/dev/ath"
 dev/ath/if_ath_rx_edma.c	optional ath \
 	compile-with "${NORMAL_C} -I$S/dev/ath"
 dev/ath/if_ath_spectral.c	optional ath \
 	compile-with "${NORMAL_C} -I$S/dev/ath"
 dev/ath/ah_osdep.c		optional ath \
 	compile-with "${NORMAL_C} -I$S/dev/ath"
 #
 dev/ath/ath_hal/ah.c		optional ath \
 	compile-with "${NORMAL_C} -I$S/dev/ath"
 dev/ath/ath_hal/ah_eeprom_v1.c	optional ath_hal | ath_ar5210 \
 	compile-with "${NORMAL_C} -I$S/dev/ath"
 dev/ath/ath_hal/ah_eeprom_v3.c	optional ath_hal | ath_ar5211 | ath_ar5212 \
 	compile-with "${NORMAL_C} -I$S/dev/ath"
 dev/ath/ath_hal/ah_eeprom_v14.c \
 	optional ath_hal | ath_ar5416 | ath_ar9160 | ath_ar9280 \
 	compile-with "${NORMAL_C} -I$S/dev/ath"
 dev/ath/ath_hal/ah_eeprom_v4k.c \
 	optional ath_hal | ath_ar9285 \
 	compile-with "${NORMAL_C} -I$S/dev/ath"
 dev/ath/ath_hal/ah_eeprom_9287.c \
 	optional ath_hal | ath_ar9287 \
 	compile-with "${NORMAL_C} -I$S/dev/ath"
 dev/ath/ath_hal/ah_regdomain.c	optional ath \
 	compile-with "${NORMAL_C} ${NO_WSHIFT_COUNT_NEGATIVE} ${NO_WSHIFT_COUNT_OVERFLOW} -I$S/dev/ath"
 # ar5210
 dev/ath/ath_hal/ar5210/ar5210_attach.c		optional ath_hal | ath_ar5210 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5210/ar5210_beacon.c		optional ath_hal | ath_ar5210 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5210/ar5210_interrupts.c	optional ath_hal | ath_ar5210 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5210/ar5210_keycache.c	optional ath_hal | ath_ar5210 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5210/ar5210_misc.c		optional ath_hal | ath_ar5210 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5210/ar5210_phy.c		optional ath_hal | ath_ar5210 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5210/ar5210_power.c		optional ath_hal | ath_ar5210 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5210/ar5210_recv.c		optional ath_hal | ath_ar5210 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5210/ar5210_reset.c		optional ath_hal | ath_ar5210 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5210/ar5210_xmit.c		optional ath_hal | ath_ar5210 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 # ar5211
 dev/ath/ath_hal/ar5211/ar5211_attach.c		optional ath_hal | ath_ar5211 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5211/ar5211_beacon.c		optional ath_hal | ath_ar5211 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5211/ar5211_interrupts.c	optional ath_hal | ath_ar5211 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5211/ar5211_keycache.c	optional ath_hal | ath_ar5211 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5211/ar5211_misc.c		optional ath_hal | ath_ar5211 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5211/ar5211_phy.c		optional ath_hal | ath_ar5211 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5211/ar5211_power.c		optional ath_hal | ath_ar5211 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5211/ar5211_recv.c		optional ath_hal | ath_ar5211 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5211/ar5211_reset.c		optional ath_hal | ath_ar5211 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5211/ar5211_xmit.c		optional ath_hal | ath_ar5211 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 # ar5212
 dev/ath/ath_hal/ar5212/ar5212_ani.c \
 	optional ath_hal | ath_ar5212 | ath_ar5416 | ath_ar9160 | ath_ar9280 | \
 	ath_ar9285 ath_ar9287 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5212/ar5212_attach.c \
 	optional ath_hal | ath_ar5212 | ath_ar5416 | ath_ar9160 | ath_ar9280 | \
 	ath_ar9285 ath_ar9287 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5212/ar5212_beacon.c \
 	optional ath_hal | ath_ar5212 | ath_ar5416 | ath_ar9160 | ath_ar9280 | \
 	ath_ar9285 ath_ar9287 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5212/ar5212_eeprom.c \
 	optional ath_hal | ath_ar5212 | ath_ar5416 | ath_ar9160 | ath_ar9280 | \
 	ath_ar9285 ath_ar9287 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5212/ar5212_gpio.c \
 	optional ath_hal | ath_ar5212 | ath_ar5416 | ath_ar9160 | ath_ar9280 | \
 	ath_ar9285 ath_ar9287 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5212/ar5212_interrupts.c \
 	optional ath_hal | ath_ar5212 | ath_ar5416 | ath_ar9160 | ath_ar9280 | \
 	ath_ar9285 ath_ar9287 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5212/ar5212_keycache.c \
 	optional ath_hal | ath_ar5212 | ath_ar5416 | ath_ar9160 | ath_ar9280 | \
 	ath_ar9285 ath_ar9287 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5212/ar5212_misc.c \
 	optional ath_hal | ath_ar5212 | ath_ar5416 | ath_ar9160 | ath_ar9280 | \
 	ath_ar9285 ath_ar9287 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5212/ar5212_phy.c \
 	optional ath_hal | ath_ar5212 | ath_ar5416 | ath_ar9160 | ath_ar9280 | \
 	ath_ar9285 ath_ar9287 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5212/ar5212_power.c \
 	optional ath_hal | ath_ar5212 | ath_ar5416 | ath_ar9160 | ath_ar9280 | \
 	ath_ar9285 ath_ar9287 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5212/ar5212_recv.c \
 	optional ath_hal | ath_ar5212 | ath_ar5416 | ath_ar9160 | ath_ar9280 | \
 	ath_ar9285 ath_ar9287 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5212/ar5212_reset.c \
 	optional ath_hal | ath_ar5212 | ath_ar5416 | ath_ar9160 | ath_ar9280 | \
 	ath_ar9285 ath_ar9287 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5212/ar5212_rfgain.c \
 	optional ath_hal | ath_ar5212 | ath_ar5416 | ath_ar9160 | ath_ar9280 | \
 	ath_ar9285 ath_ar9287 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5212/ar5212_xmit.c \
 	optional ath_hal | ath_ar5212 | ath_ar5416 | ath_ar9160 | ath_ar9280 | \
 	ath_ar9285 ath_ar9287 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 # ar5416 (depends on ar5212)
 dev/ath/ath_hal/ar5416/ar5416_ani.c \
 	optional ath_hal | ath_ar5416 | ath_ar9160 | ath_ar9280 | ath_ar9285 | \
 	ath_ar9287 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5416/ar5416_attach.c \
 	optional ath_hal | ath_ar5416 | ath_ar9160 | ath_ar9280 | ath_ar9285 | \
 	ath_ar9287 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5416/ar5416_beacon.c \
 	optional ath_hal | ath_ar5416 | ath_ar9160 | ath_ar9280 | ath_ar9285 | \
 	ath_ar9287 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5416/ar5416_btcoex.c \
 	optional ath_hal | ath_ar5416 | ath_ar9160 | ath_ar9280 | ath_ar9285 | \
 	ath_ar9287 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5416/ar5416_cal.c \
 	optional ath_hal | ath_ar5416 | ath_ar9160 | ath_ar9280 | ath_ar9285 | \
 	ath_ar9287 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5416/ar5416_cal_iq.c \
 	optional ath_hal | ath_ar5416 | ath_ar9160 | ath_ar9280 | ath_ar9285 | \
 	ath_ar9287 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5416/ar5416_cal_adcgain.c \
 	optional ath_hal | ath_ar5416 | ath_ar9160 | ath_ar9280 | ath_ar9285 | \
 	ath_ar9287 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5416/ar5416_cal_adcdc.c \
 	optional ath_hal | ath_ar5416 | ath_ar9160 | ath_ar9280 | ath_ar9285 | \
 	ath_ar9287 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5416/ar5416_eeprom.c \
 	optional ath_hal | ath_ar5416 | ath_ar9160 | ath_ar9280 | ath_ar9285 | \
 	ath_ar9287 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5416/ar5416_gpio.c \
 	optional ath_hal | ath_ar5416 | ath_ar9160 | ath_ar9280 | ath_ar9285 | \
 	ath_ar9287 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5416/ar5416_interrupts.c \
 	optional ath_hal | ath_ar5416 | ath_ar9160 | ath_ar9280 | ath_ar9285 | \
 	ath_ar9287 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5416/ar5416_keycache.c \
 	optional ath_hal | ath_ar5416 | ath_ar9160 | ath_ar9280 | ath_ar9285 | \
 	ath_ar9287 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5416/ar5416_misc.c \
 	optional ath_hal | ath_ar5416 | ath_ar9160 | ath_ar9280 | ath_ar9285 | \
 	ath_ar9287 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5416/ar5416_phy.c \
 	optional ath_hal | ath_ar5416 | ath_ar9160 | ath_ar9280 | ath_ar9285 | \
 	ath_ar9287 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5416/ar5416_power.c \
 	optional ath_hal | ath_ar5416 | ath_ar9160 | ath_ar9280 | ath_ar9285 | \
 	ath_ar9287 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5416/ar5416_radar.c \
 	optional ath_hal | ath_ar5416 | ath_ar9160 | ath_ar9280 | ath_ar9285 | \
 	ath_ar9287 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5416/ar5416_recv.c \
 	optional ath_hal | ath_ar5416 | ath_ar9160 | ath_ar9280 | ath_ar9285 | \
 	ath_ar9287 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5416/ar5416_reset.c \
 	optional ath_hal | ath_ar5416 | ath_ar9160 | ath_ar9280 | ath_ar9285 | \
 	ath_ar9287 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5416/ar5416_spectral.c \
 	optional ath_hal | ath_ar5416 | ath_ar9160 | ath_ar9280 | ath_ar9285 | \
 	ath_ar9287 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5416/ar5416_xmit.c \
 	optional ath_hal | ath_ar5416 | ath_ar9160 | ath_ar9280 | ath_ar9285 | \
 	ath_ar9287 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 # ar9130 (depends upon ar5416) - also requires AH_SUPPORT_AR9130
 #
 # Since this is an embedded MAC SoC, there's no need to compile it into the
 # default HAL.
 dev/ath/ath_hal/ar9001/ar9130_attach.c optional ath_ar9130 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar9001/ar9130_phy.c optional ath_ar9130 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar9001/ar9130_eeprom.c optional ath_ar9130 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 # ar9160 (depends on ar5416)
 dev/ath/ath_hal/ar9001/ar9160_attach.c optional ath_hal | ath_ar9160 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 # ar9280 (depends on ar5416)
 dev/ath/ath_hal/ar9002/ar9280_attach.c optional ath_hal | ath_ar9280 | \
 	ath_ar9285 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar9002/ar9280_olc.c optional ath_hal | ath_ar9280 | \
 	ath_ar9285 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 # ar9285 (depends on ar5416 and ar9280)
 dev/ath/ath_hal/ar9002/ar9285_attach.c optional ath_hal | ath_ar9285 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar9002/ar9285_btcoex.c optional ath_hal | ath_ar9285 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar9002/ar9285_reset.c optional ath_hal | ath_ar9285 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar9002/ar9285_cal.c optional ath_hal | ath_ar9285 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar9002/ar9285_phy.c optional ath_hal | ath_ar9285 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar9002/ar9285_diversity.c optional ath_hal | ath_ar9285 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 # ar9287 (depends on ar5416)
 dev/ath/ath_hal/ar9002/ar9287_attach.c optional ath_hal | ath_ar9287 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar9002/ar9287_reset.c optional ath_hal | ath_ar9287 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar9002/ar9287_cal.c optional ath_hal | ath_ar9287 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar9002/ar9287_olc.c optional ath_hal | ath_ar9287 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 
 # ar9300
 contrib/dev/ath/ath_hal/ar9300/ar9300_ani.c optional ath_hal | ath_ar9300 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal -I$S/contrib/dev/ath/ath_hal"
 contrib/dev/ath/ath_hal/ar9300/ar9300_attach.c optional ath_hal | ath_ar9300 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal -I$S/contrib/dev/ath/ath_hal"
 contrib/dev/ath/ath_hal/ar9300/ar9300_beacon.c optional ath_hal | ath_ar9300 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal -I$S/contrib/dev/ath/ath_hal"
 contrib/dev/ath/ath_hal/ar9300/ar9300_eeprom.c optional ath_hal | ath_ar9300 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal -I$S/contrib/dev/ath/ath_hal ${NO_WCONSTANT_CONVERSION}"
 contrib/dev/ath/ath_hal/ar9300/ar9300_freebsd.c optional ath_hal | ath_ar9300 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal -I$S/contrib/dev/ath/ath_hal"
 contrib/dev/ath/ath_hal/ar9300/ar9300_gpio.c optional ath_hal | ath_ar9300 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal -I$S/contrib/dev/ath/ath_hal"
 contrib/dev/ath/ath_hal/ar9300/ar9300_interrupts.c optional ath_hal | ath_ar9300 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal -I$S/contrib/dev/ath/ath_hal"
 contrib/dev/ath/ath_hal/ar9300/ar9300_keycache.c optional ath_hal | ath_ar9300 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal -I$S/contrib/dev/ath/ath_hal"
 contrib/dev/ath/ath_hal/ar9300/ar9300_mci.c optional ath_hal | ath_ar9300 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal -I$S/contrib/dev/ath/ath_hal"
 contrib/dev/ath/ath_hal/ar9300/ar9300_misc.c optional ath_hal | ath_ar9300 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal -I$S/contrib/dev/ath/ath_hal"
 contrib/dev/ath/ath_hal/ar9300/ar9300_paprd.c optional ath_hal | ath_ar9300 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal -I$S/contrib/dev/ath/ath_hal"
 contrib/dev/ath/ath_hal/ar9300/ar9300_phy.c optional ath_hal | ath_ar9300 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal -I$S/contrib/dev/ath/ath_hal"
 contrib/dev/ath/ath_hal/ar9300/ar9300_power.c optional ath_hal | ath_ar9300 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal -I$S/contrib/dev/ath/ath_hal"
 contrib/dev/ath/ath_hal/ar9300/ar9300_radar.c optional ath_hal | ath_ar9300 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal -I$S/contrib/dev/ath/ath_hal"
 contrib/dev/ath/ath_hal/ar9300/ar9300_radio.c optional ath_hal | ath_ar9300 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal -I$S/contrib/dev/ath/ath_hal"
 contrib/dev/ath/ath_hal/ar9300/ar9300_recv.c optional ath_hal | ath_ar9300 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal -I$S/contrib/dev/ath/ath_hal"
 contrib/dev/ath/ath_hal/ar9300/ar9300_recv_ds.c optional ath_hal | ath_ar9300 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal -I$S/contrib/dev/ath/ath_hal"
 contrib/dev/ath/ath_hal/ar9300/ar9300_reset.c optional ath_hal | ath_ar9300 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal -I$S/contrib/dev/ath/ath_hal ${NO_WSOMETIMES_UNINITIALIZED}"
 contrib/dev/ath/ath_hal/ar9300/ar9300_stub.c optional ath_hal | ath_ar9300 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal -I$S/contrib/dev/ath/ath_hal"
 contrib/dev/ath/ath_hal/ar9300/ar9300_stub_funcs.c optional ath_hal | ath_ar9300 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal -I$S/contrib/dev/ath/ath_hal"
 contrib/dev/ath/ath_hal/ar9300/ar9300_timer.c optional ath_hal | ath_ar9300 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal -I$S/contrib/dev/ath/ath_hal"
 contrib/dev/ath/ath_hal/ar9300/ar9300_xmit.c optional ath_hal | ath_ar9300 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal -I$S/contrib/dev/ath/ath_hal"
 contrib/dev/ath/ath_hal/ar9300/ar9300_xmit_ds.c optional ath_hal | ath_ar9300 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal -I$S/contrib/dev/ath/ath_hal"
 
 # rf backends
 dev/ath/ath_hal/ar5212/ar2316.c	optional ath_rf2316 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5212/ar2317.c	optional ath_rf2317 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5212/ar2413.c	optional ath_hal | ath_rf2413 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5212/ar2425.c	optional ath_hal | ath_rf2425 | ath_rf2417 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5212/ar5111.c	optional ath_hal | ath_rf5111 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5212/ar5112.c	optional ath_hal | ath_rf5112 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5212/ar5413.c	optional ath_hal | ath_rf5413 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5416/ar2133.c optional ath_hal | ath_ar5416 | \
 	ath_ar9130 | ath_ar9160 | ath_ar9280 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar9002/ar9280.c optional ath_hal | ath_ar9280 | ath_ar9285 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar9002/ar9285.c optional ath_hal | ath_ar9285 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar9002/ar9287.c optional ath_hal | ath_ar9287 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 
 # ath rate control algorithms
 dev/ath/ath_rate/amrr/amrr.c	optional ath_rate_amrr \
 	compile-with "${NORMAL_C} -I$S/dev/ath"
 dev/ath/ath_rate/onoe/onoe.c	optional ath_rate_onoe \
 	compile-with "${NORMAL_C} -I$S/dev/ath"
 dev/ath/ath_rate/sample/sample.c	optional ath_rate_sample \
 	compile-with "${NORMAL_C} -I$S/dev/ath"
 # ath DFS modules
 dev/ath/ath_dfs/null/dfs_null.c	optional ath \
 	compile-with "${NORMAL_C} -I$S/dev/ath"
 #
 dev/bce/if_bce.c		optional bce
 dev/bfe/if_bfe.c		optional bfe
 dev/bge/if_bge.c		optional bge
 dev/bktr/bktr_audio.c		optional bktr pci
 dev/bktr/bktr_card.c		optional bktr pci
 dev/bktr/bktr_core.c		optional bktr pci
 dev/bktr/bktr_i2c.c		optional bktr pci smbus
 dev/bktr/bktr_os.c		optional bktr pci
 dev/bktr/bktr_tuner.c		optional bktr pci
 dev/bktr/msp34xx.c		optional bktr pci
 dev/buslogic/bt.c		optional bt
 dev/buslogic/bt_eisa.c		optional bt eisa
 dev/buslogic/bt_isa.c		optional bt isa
 dev/buslogic/bt_mca.c		optional bt mca
 dev/buslogic/bt_pci.c		optional bt pci
 dev/bwi/bwimac.c		optional bwi
 dev/bwi/bwiphy.c		optional bwi
 dev/bwi/bwirf.c			optional bwi
 dev/bwi/if_bwi.c		optional bwi
 dev/bwi/if_bwi_pci.c		optional bwi pci
 # XXX Work around clang warning, until maintainer approves fix.
 dev/bwn/if_bwn.c		optional bwn siba_bwn \
 	compile-with "${NORMAL_C} ${NO_WSOMETIMES_UNINITIALIZED}"
 dev/bxe/if_bxe.c		optional bxe
 dev/bxe/bxe_link.c		optional bxe
 dev/cardbus/cardbus.c		optional cardbus
 dev/cardbus/cardbus_cis.c	optional cardbus
 dev/cardbus/cardbus_device.c	optional cardbus
 dev/cas/if_cas.c		optional cas
 dev/cfi/cfi_bus_fdt.c		optional cfi fdt
 dev/cfi/cfi_bus_nexus.c		optional cfi
 dev/cfi/cfi_core.c		optional cfi
 dev/cfi/cfi_dev.c		optional cfi
 dev/cfi/cfi_disk.c		optional cfid
 dev/ciss/ciss.c			optional ciss
 dev/cm/smc90cx6.c		optional cm
 dev/cmx/cmx.c			optional cmx
 dev/cmx/cmx_pccard.c		optional cmx pccard
 dev/cpufreq/ichss.c		optional cpufreq
 dev/cs/if_cs.c			optional cs
 dev/cs/if_cs_isa.c		optional cs isa
 dev/cs/if_cs_pccard.c		optional cs pccard
 dev/cxgb/cxgb_main.c		optional cxgb pci \
 	compile-with "${NORMAL_C} -I$S/dev/cxgb"
 dev/cxgb/cxgb_sge.c		optional cxgb pci \
 	compile-with "${NORMAL_C} -I$S/dev/cxgb"
 dev/cxgb/common/cxgb_mc5.c	optional cxgb pci \
 	compile-with "${NORMAL_C} -I$S/dev/cxgb"
 dev/cxgb/common/cxgb_vsc7323.c	optional cxgb pci \
 	compile-with "${NORMAL_C} -I$S/dev/cxgb"
 dev/cxgb/common/cxgb_vsc8211.c	optional cxgb pci \
 	compile-with "${NORMAL_C} -I$S/dev/cxgb"
 dev/cxgb/common/cxgb_ael1002.c	optional cxgb pci \
 	compile-with "${NORMAL_C} -I$S/dev/cxgb"
 dev/cxgb/common/cxgb_aq100x.c	optional cxgb pci \
 	compile-with "${NORMAL_C} -I$S/dev/cxgb"
 dev/cxgb/common/cxgb_mv88e1xxx.c	optional cxgb pci \
 	compile-with "${NORMAL_C} -I$S/dev/cxgb"
 dev/cxgb/common/cxgb_xgmac.c	optional cxgb pci \
 	compile-with "${NORMAL_C} -I$S/dev/cxgb"
 dev/cxgb/common/cxgb_t3_hw.c	optional cxgb pci \
 	compile-with "${NORMAL_C} -I$S/dev/cxgb"
 dev/cxgb/common/cxgb_tn1010.c	optional cxgb pci \
 	compile-with "${NORMAL_C} -I$S/dev/cxgb"
 dev/cxgb/sys/uipc_mvec.c	optional cxgb pci \
 	compile-with "${NORMAL_C} -I$S/dev/cxgb"
 dev/cxgb/cxgb_t3fw.c		optional cxgb cxgb_t3fw \
 	compile-with "${NORMAL_C} -I$S/dev/cxgb"
 dev/cxgbe/t4_main.c		optional cxgbe pci \
 	compile-with "${NORMAL_C} -I$S/dev/cxgbe"
 dev/cxgbe/t4_sge.c		optional cxgbe pci \
 	compile-with "${NORMAL_C} -I$S/dev/cxgbe"
 dev/cxgbe/t4_l2t.c		optional cxgbe pci \
 	compile-with "${NORMAL_C} -I$S/dev/cxgbe"
 dev/cxgbe/t4_tracer.c		optional cxgbe pci \
 	compile-with "${NORMAL_C} -I$S/dev/cxgbe"
 dev/cxgbe/common/t4_hw.c	optional cxgbe pci \
 	compile-with "${NORMAL_C} -I$S/dev/cxgbe"
 t4fw_cfg.c		optional cxgbe					\
 	compile-with	"${AWK} -f $S/tools/fw_stub.awk t4fw_cfg.fw:t4fw_cfg t4fw_cfg_uwire.fw:t4fw_cfg_uwire t4fw.fw:t4fw -mt4fw_cfg -c${.TARGET}" \
 	no-implicit-rule before-depend local				\
 	clean		"t4fw_cfg.c"
 t4fw_cfg.fwo		optional cxgbe					\
 	dependency	"t4fw_cfg.fw"					\
 	compile-with	"${NORMAL_FWO}"					\
 	no-implicit-rule						\
 	clean		"t4fw_cfg.fwo"
 t4fw_cfg.fw		optional cxgbe					\
 	dependency	"$S/dev/cxgbe/firmware/t4fw_cfg.txt"		\
 	compile-with	"${CP} ${.ALLSRC} ${.TARGET}"			\
 	no-obj no-implicit-rule						\
 	clean		"t4fw_cfg.fw"
 t4fw_cfg_uwire.fwo	optional cxgbe					\
 	dependency	"t4fw_cfg_uwire.fw"				\
 	compile-with	"${NORMAL_FWO}"					\
 	no-implicit-rule						\
 	clean		"t4fw_cfg_uwire.fwo"
 t4fw_cfg_uwire.fw	optional cxgbe					\
 	dependency	"$S/dev/cxgbe/firmware/t4fw_cfg_uwire.txt"	\
 	compile-with	"${CP} ${.ALLSRC} ${.TARGET}"			\
 	no-obj no-implicit-rule						\
 	clean		"t4fw_cfg_uwire.fw"
 t4fw.fwo		optional cxgbe					\
 	dependency	"t4fw.fw"					\
 	compile-with	"${NORMAL_FWO}"					\
 	no-implicit-rule						\
 	clean		"t4fw.fwo"
 t4fw.fw			optional cxgbe					\
 	dependency	"$S/dev/cxgbe/firmware/t4fw-1.8.11.0.bin.uu"	\
 	compile-with	"${NORMAL_FW}"					\
 	no-obj no-implicit-rule						\
 	clean		"t4fw.fw"
 t5fw_cfg.c		optional cxgbe					\
 	compile-with	"${AWK} -f $S/tools/fw_stub.awk t5fw_cfg.fw:t5fw_cfg t5fw.fw:t5fw -mt5fw_cfg -c${.TARGET}" \
 	no-implicit-rule before-depend local				\
 	clean		"t5fw_cfg.c"
 t5fw_cfg.fwo		optional cxgbe					\
 	dependency	"t5fw_cfg.fw"					\
 	compile-with	"${NORMAL_FWO}"					\
 	no-implicit-rule						\
 	clean		"t5fw_cfg.fwo"
 t5fw_cfg.fw		optional cxgbe					\
 	dependency	"$S/dev/cxgbe/firmware/t5fw_cfg.txt"		\
 	compile-with	"${CP} ${.ALLSRC} ${.TARGET}"			\
 	no-obj no-implicit-rule						\
 	clean		"t5fw_cfg.fw"
 t5fw.fwo		optional cxgbe					\
 	dependency	"t5fw.fw"					\
 	compile-with	"${NORMAL_FWO}"					\
 	no-implicit-rule						\
 	clean		"t5fw.fwo"
 t5fw.fw			optional cxgbe					\
 	dependency	"$S/dev/cxgbe/firmware/t5fw-1.8.22.0.bin.uu"	\
 	compile-with	"${NORMAL_FW}"					\
 	no-obj no-implicit-rule						\
 	clean		"t5fw.fw"
 dev/cy/cy.c			optional cy
 dev/cy/cy_isa.c			optional cy isa
 dev/cy/cy_pci.c			optional cy pci
 dev/dc/if_dc.c			optional dc pci
 dev/dc/dcphy.c			optional dc pci
 dev/dc/pnphy.c			optional dc pci
 dev/dcons/dcons.c		optional dcons
 dev/dcons/dcons_crom.c		optional dcons_crom
 dev/dcons/dcons_os.c		optional dcons
 dev/de/if_de.c			optional de pci
 dev/digi/CX.c			optional digi_CX
 dev/digi/CX_PCI.c		optional digi_CX_PCI
 dev/digi/EPCX.c			optional digi_EPCX
 dev/digi/EPCX_PCI.c		optional digi_EPCX_PCI
 dev/digi/Xe.c			optional digi_Xe
 dev/digi/Xem.c			optional digi_Xem
 dev/digi/Xr.c			optional digi_Xr
 dev/digi/digi.c			optional digi
 dev/digi/digi_isa.c		optional digi isa
 dev/digi/digi_pci.c		optional digi pci
 dev/dpt/dpt_eisa.c		optional dpt eisa
 dev/dpt/dpt_pci.c		optional dpt pci
 dev/dpt/dpt_scsi.c		optional dpt
 dev/drm/ati_pcigart.c		optional drm
 dev/drm/drm_agpsupport.c	optional drm
 dev/drm/drm_auth.c		optional drm
 dev/drm/drm_bufs.c		optional drm
 dev/drm/drm_context.c		optional drm
 dev/drm/drm_dma.c		optional drm
 dev/drm/drm_drawable.c		optional drm
 dev/drm/drm_drv.c		optional drm
 dev/drm/drm_fops.c		optional drm
 dev/drm/drm_hashtab.c		optional drm
 dev/drm/drm_ioctl.c		optional drm
 dev/drm/drm_irq.c		optional drm
 dev/drm/drm_lock.c		optional drm
 dev/drm/drm_memory.c		optional drm
 dev/drm/drm_mm.c		optional drm
 dev/drm/drm_pci.c		optional drm
 dev/drm/drm_scatter.c		optional drm
 dev/drm/drm_sman.c		optional drm
 dev/drm/drm_sysctl.c		optional drm
 dev/drm/drm_vm.c		optional drm
 dev/drm/i915_dma.c		optional i915drm
 dev/drm/i915_drv.c		optional i915drm
 dev/drm/i915_irq.c		optional i915drm
 dev/drm/i915_mem.c		optional i915drm
 dev/drm/i915_suspend.c		optional i915drm
 dev/drm/mach64_dma.c		optional mach64drm
 dev/drm/mach64_drv.c		optional mach64drm
 dev/drm/mach64_irq.c		optional mach64drm
 dev/drm/mach64_state.c		optional mach64drm
 dev/drm/mga_dma.c		optional mgadrm
 dev/drm/mga_drv.c		optional mgadrm
 dev/drm/mga_irq.c		optional mgadrm
 dev/drm/mga_state.c		optional mgadrm
 dev/drm/mga_warp.c		optional mgadrm
 dev/drm/r128_cce.c		optional r128drm \
 	compile-with "${NORMAL_C} ${NO_WUNUSED_VALUE} ${NO_WCONSTANT_CONVERSION}"
 dev/drm/r128_drv.c		optional r128drm
 dev/drm/r128_irq.c		optional r128drm
 dev/drm/r128_state.c		optional r128drm \
 	compile-with "${NORMAL_C} ${NO_WUNUSED_VALUE}"
 dev/drm/r300_cmdbuf.c		optional radeondrm
 dev/drm/r600_blit.c		optional radeondrm
 dev/drm/r600_cp.c		optional radeondrm \
 	compile-with "${NORMAL_C} ${NO_WUNUSED_VALUE} ${NO_WCONSTANT_CONVERSION}"
 dev/drm/radeon_cp.c		optional radeondrm \
 	compile-with "${NORMAL_C} ${NO_WUNUSED_VALUE} ${NO_WCONSTANT_CONVERSION}"
 dev/drm/radeon_cs.c		optional radeondrm
 dev/drm/radeon_drv.c		optional radeondrm
 dev/drm/radeon_irq.c		optional radeondrm
 dev/drm/radeon_mem.c		optional radeondrm
 dev/drm/radeon_state.c		optional radeondrm
 dev/drm/savage_bci.c		optional savagedrm
 dev/drm/savage_drv.c		optional savagedrm
 dev/drm/savage_state.c		optional savagedrm
 dev/drm/sis_drv.c		optional sisdrm
 dev/drm/sis_ds.c		optional sisdrm
 dev/drm/sis_mm.c		optional sisdrm
 dev/drm/tdfx_drv.c		optional tdfxdrm
 dev/drm/via_dma.c		optional viadrm
 dev/drm/via_dmablit.c		optional viadrm
 dev/drm/via_drv.c		optional viadrm
 dev/drm/via_irq.c		optional viadrm
 dev/drm/via_map.c		optional viadrm
 dev/drm/via_mm.c		optional viadrm
 dev/drm/via_verifier.c		optional viadrm
 dev/drm/via_video.c		optional viadrm
 dev/ed/if_ed.c			optional ed
 dev/ed/if_ed_novell.c		optional ed
 dev/ed/if_ed_rtl80x9.c		optional ed
 dev/ed/if_ed_pccard.c		optional ed pccard
 dev/ed/if_ed_pci.c		optional ed pci
 dev/eisa/eisa_if.m		standard
 dev/eisa/eisaconf.c		optional eisa
 dev/e1000/if_em.c		optional em \
 	compile-with "${NORMAL_C} -I$S/dev/e1000"
 dev/e1000/if_lem.c		optional em \
 	compile-with "${NORMAL_C} -I$S/dev/e1000"
 dev/e1000/if_igb.c		optional igb \
 	compile-with "${NORMAL_C} -I$S/dev/e1000"
 dev/e1000/e1000_80003es2lan.c	optional em | igb \
 	compile-with "${NORMAL_C} -I$S/dev/e1000"
 dev/e1000/e1000_82540.c		optional em | igb \
 	compile-with "${NORMAL_C} -I$S/dev/e1000"
 dev/e1000/e1000_82541.c		optional em | igb \
 	compile-with "${NORMAL_C} -I$S/dev/e1000"
 dev/e1000/e1000_82542.c		optional em | igb \
 	compile-with "${NORMAL_C} -I$S/dev/e1000"
 dev/e1000/e1000_82543.c		optional em | igb \
 	compile-with "${NORMAL_C} -I$S/dev/e1000"
 dev/e1000/e1000_82571.c		optional em | igb \
 	compile-with "${NORMAL_C} -I$S/dev/e1000"
 dev/e1000/e1000_82575.c		optional em | igb \
 	compile-with "${NORMAL_C} -I$S/dev/e1000"
 dev/e1000/e1000_ich8lan.c	optional em | igb \
 	compile-with "${NORMAL_C} -I$S/dev/e1000"
 dev/e1000/e1000_i210.c		optional em | igb \
 	compile-with "${NORMAL_C} -I$S/dev/e1000"
 dev/e1000/e1000_api.c		optional em | igb \
 	compile-with "${NORMAL_C} -I$S/dev/e1000"
 dev/e1000/e1000_mac.c		optional em | igb \
 	compile-with "${NORMAL_C} -I$S/dev/e1000"
 dev/e1000/e1000_manage.c	optional em | igb \
 	compile-with "${NORMAL_C} -I$S/dev/e1000"
 dev/e1000/e1000_nvm.c		optional em | igb \
 	compile-with "${NORMAL_C} -I$S/dev/e1000"
 dev/e1000/e1000_phy.c		optional em | igb \
 	compile-with "${NORMAL_C} -I$S/dev/e1000"
 dev/e1000/e1000_vf.c		optional em | igb \
 	compile-with "${NORMAL_C} -I$S/dev/e1000"
 dev/e1000/e1000_mbx.c		optional em | igb \
 	compile-with "${NORMAL_C} -I$S/dev/e1000"
 dev/e1000/e1000_osdep.c		optional em | igb \
 	compile-with "${NORMAL_C} -I$S/dev/e1000"
 dev/et/if_et.c			optional et
 dev/en/if_en_pci.c		optional en pci
 dev/en/midway.c			optional en
 dev/ep/if_ep.c			optional ep
 dev/ep/if_ep_eisa.c		optional ep eisa
 dev/ep/if_ep_isa.c		optional ep isa
 dev/ep/if_ep_mca.c		optional ep mca
 dev/ep/if_ep_pccard.c		optional ep pccard
 dev/esp/esp_pci.c		optional esp pci
 dev/esp/ncr53c9x.c		optional esp
 dev/etherswitch/arswitch/arswitch.c		optional arswitch
 dev/etherswitch/arswitch/arswitch_reg.c		optional arswitch
 dev/etherswitch/arswitch/arswitch_phy.c		optional arswitch
 dev/etherswitch/arswitch/arswitch_8216.c	optional arswitch
 dev/etherswitch/arswitch/arswitch_8226.c	optional arswitch
 dev/etherswitch/arswitch/arswitch_8316.c	optional arswitch
 dev/etherswitch/arswitch/arswitch_7240.c	optional arswitch
 dev/etherswitch/arswitch/arswitch_vlans.c	optional arswitch
 dev/etherswitch/etherswitch.c		optional etherswitch
 dev/etherswitch/etherswitch_if.m	optional etherswitch
 dev/etherswitch/ip17x/ip17x.c		optional ip17x
 dev/etherswitch/ip17x/ip175c.c		optional ip17x
 dev/etherswitch/ip17x/ip175d.c		optional ip17x
 dev/etherswitch/ip17x/ip17x_phy.c	optional ip17x
 dev/etherswitch/ip17x/ip17x_vlans.c	optional ip17x
 dev/etherswitch/mdio_if.m		optional miiproxy
 dev/etherswitch/mdio.c			optional miiproxy
 dev/etherswitch/miiproxy.c		optional miiproxy
 dev/etherswitch/rtl8366/rtl8366rb.c	optional rtl8366rb
 dev/etherswitch/ukswitch/ukswitch.c	optional ukswitch
 dev/ex/if_ex.c			optional ex
 dev/ex/if_ex_isa.c		optional ex isa
 dev/ex/if_ex_pccard.c		optional ex pccard
 dev/exca/exca.c			optional cbb
 dev/fatm/if_fatm.c		optional fatm pci
 dev/fb/splash.c			optional splash
 dev/fdt/fdt_common.c		optional fdt
 dev/fdt/fdt_pci.c		optional fdt pci
 dev/fdt/fdt_slicer.c		optional fdt cfi | fdt nand
 dev/fdt/fdt_static_dtb.S	optional fdt fdt_dtb_static \
 	dependency	"$S/boot/fdt/dts/${FDT_DTS_FILE}"
 dev/fdt/fdtbus.c		optional fdt
 dev/fdt/simplebus.c		optional fdt
 dev/fe/if_fe.c			optional fe
 dev/fe/if_fe_pccard.c		optional fe pccard
 dev/filemon/filemon.c		optional filemon
 dev/firewire/firewire.c		optional firewire
 dev/firewire/fwcrom.c		optional firewire
 dev/firewire/fwdev.c		optional firewire
 dev/firewire/fwdma.c		optional firewire
 dev/firewire/fwmem.c		optional firewire
 dev/firewire/fwohci.c		optional firewire
 dev/firewire/fwohci_pci.c	optional firewire pci
 dev/firewire/if_fwe.c		optional fwe
 dev/firewire/if_fwip.c		optional fwip
 dev/firewire/sbp.c		optional sbp
 dev/firewire/sbp_targ.c		optional sbp_targ
 dev/flash/at45d.c		optional at45d
 dev/flash/mx25l.c		optional mx25l
 dev/fxp/if_fxp.c		optional fxp
 dev/fxp/inphy.c			optional fxp
 dev/gem/if_gem.c		optional gem
 dev/gem/if_gem_pci.c		optional gem pci
 dev/gem/if_gem_sbus.c		optional gem sbus
 dev/gpio/gpiobus.c		optional gpio				\
 	dependency	"gpiobus_if.h"
 dev/gpio/gpioc.c		optional gpio				\
 	dependency	"gpio_if.h"
 dev/gpio/gpioiic.c		optional gpioiic
 dev/gpio/gpioled.c		optional gpioled
 dev/gpio/gpio_if.m		optional gpio
 dev/gpio/gpiobus_if.m		optional gpio
 dev/hatm/if_hatm.c		optional hatm pci
 dev/hatm/if_hatm_intr.c		optional hatm pci
 dev/hatm/if_hatm_ioctl.c	optional hatm pci
 dev/hatm/if_hatm_rx.c		optional hatm pci
 dev/hatm/if_hatm_tx.c		optional hatm pci
 dev/hifn/hifn7751.c		optional hifn
 dev/hme/if_hme.c		optional hme
 dev/hme/if_hme_pci.c		optional hme pci
 dev/hme/if_hme_sbus.c		optional hme sbus
 dev/hptiop/hptiop.c		optional hptiop scbus
 dev/hwpmc/hwpmc_logging.c	optional hwpmc
 dev/hwpmc/hwpmc_mod.c		optional hwpmc
 dev/hwpmc/hwpmc_soft.c		optional hwpmc
 dev/ichsmb/ichsmb.c		optional ichsmb
 dev/ichsmb/ichsmb_pci.c		optional ichsmb pci
 dev/ida/ida.c			optional ida
 dev/ida/ida_disk.c		optional ida
 dev/ida/ida_eisa.c		optional ida eisa
 dev/ida/ida_pci.c		optional ida pci
 dev/ie/if_ie.c			optional ie isa nowerror
 dev/ie/if_ie_isa.c		optional ie isa
 dev/ieee488/ibfoo.c		optional pcii | tnt4882
 dev/ieee488/pcii.c		optional pcii
 dev/ieee488/tnt4882.c		optional tnt4882
 dev/ieee488/upd7210.c		optional pcii | tnt4882
 dev/iicbus/ad7418.c		optional ad7418
 dev/iicbus/ds133x.c		optional ds133x
 dev/iicbus/ds1374.c		optional ds1374
 dev/iicbus/ds1672.c		optional ds1672
 dev/iicbus/icee.c		optional icee
 dev/iicbus/if_ic.c		optional ic
 dev/iicbus/iic.c		optional iic
 dev/iicbus/iicbb.c		optional iicbb
 dev/iicbus/iicbb_if.m		optional iicbb
 dev/iicbus/iicbus.c		optional iicbus
 dev/iicbus/iicbus_if.m		optional iicbus
 dev/iicbus/iiconf.c		optional iicbus
 dev/iicbus/iicsmb.c		optional iicsmb				\
 	dependency	"iicbus_if.h"
 dev/iicbus/iicoc.c		optional iicoc
 dev/iicbus/pcf8563.c		optional pcf8563
 dev/iicbus/s35390a.c		optional s35390a
 dev/iir/iir.c			optional iir
 dev/iir/iir_ctrl.c		optional iir
 dev/iir/iir_pci.c		optional iir pci
 # XXX Work around clang warning, until maintainer approves fix.
 dev/ips/ips.c			optional ips \
 	compile-with "${NORMAL_C} ${NO_WSOMETIMES_UNINITIALIZED}"
 dev/ips/ips_commands.c		optional ips
 dev/ips/ips_disk.c		optional ips
 dev/ips/ips_ioctl.c		optional ips
 dev/ips/ips_pci.c		optional ips pci
 dev/ipw/if_ipw.c		optional ipw
 ipwbssfw.c			optional ipwbssfw | ipwfw		\
 	compile-with	"${AWK} -f $S/tools/fw_stub.awk ipw_bss.fw:ipw_bss:130 -lintel_ipw -mipw_bss -c${.TARGET}" \
 	no-implicit-rule before-depend local				\
 	clean		"ipwbssfw.c"
 ipw_bss.fwo			optional ipwbssfw | ipwfw		\
 	dependency	"ipw_bss.fw"					\
 	compile-with	"${NORMAL_FWO}"					\
 	no-implicit-rule						\
 	clean		"ipw_bss.fwo"
 ipw_bss.fw			optional ipwbssfw | ipwfw		\
 	dependency	"$S/contrib/dev/ipw/ipw2100-1.3.fw.uu"		\
 	compile-with	"${NORMAL_FW}"					\
 	no-obj no-implicit-rule						\
 	clean		"ipw_bss.fw"
 ipwibssfw.c			optional ipwibssfw | ipwfw		\
 	compile-with	"${AWK} -f $S/tools/fw_stub.awk ipw_ibss.fw:ipw_ibss:130 -lintel_ipw -mipw_ibss -c${.TARGET}" \
 	no-implicit-rule before-depend local				\
 	clean		"ipwibssfw.c"
 ipw_ibss.fwo			optional ipwibssfw | ipwfw		\
 	dependency	"ipw_ibss.fw"					\
 	compile-with	"${NORMAL_FWO}"					\
 	no-implicit-rule						\
 	clean		"ipw_ibss.fwo"
 ipw_ibss.fw			optional ipwibssfw | ipwfw		\
 	dependency	"$S/contrib/dev/ipw/ipw2100-1.3-i.fw.uu"	\
 	compile-with	"${NORMAL_FW}"					\
 	no-obj no-implicit-rule						\
 	clean		"ipw_ibss.fw"
 ipwmonitorfw.c			optional ipwmonitorfw | ipwfw		\
 	compile-with	"${AWK} -f $S/tools/fw_stub.awk ipw_monitor.fw:ipw_monitor:130 -lintel_ipw -mipw_monitor -c${.TARGET}" \
 	no-implicit-rule before-depend local				\
 	clean		"ipwmonitorfw.c"
 ipw_monitor.fwo			optional ipwmonitorfw | ipwfw		\
 	dependency	"ipw_monitor.fw"				\
 	compile-with	"${NORMAL_FWO}"					\
 	no-implicit-rule						\
 	clean		"ipw_monitor.fwo"
 ipw_monitor.fw			optional ipwmonitorfw | ipwfw		\
 	dependency	"$S/contrib/dev/ipw/ipw2100-1.3-p.fw.uu"	\
 	compile-with	"${NORMAL_FW}"					\
 	no-obj no-implicit-rule						\
 	clean		"ipw_monitor.fw"
 dev/iscsi_initiator/iscsi.c	optional iscsi_initiator scbus
 dev/iscsi_initiator/iscsi_subr.c	optional iscsi_initiator scbus
 dev/iscsi_initiator/isc_cam.c	optional iscsi_initiator scbus
 dev/iscsi_initiator/isc_soc.c	optional iscsi_initiator scbus
 dev/iscsi_initiator/isc_sm.c	optional iscsi_initiator scbus
 dev/iscsi_initiator/isc_subr.c	optional iscsi_initiator scbus
 dev/isf/isf.c			optional isf
 dev/isf/isf_fdt.c		optional isf fdt
 dev/isf/isf_nexus.c		optional isf
 dev/isp/isp.c			optional isp
 dev/isp/isp_freebsd.c		optional isp
 dev/isp/isp_library.c		optional isp
 dev/isp/isp_pci.c		optional isp pci
 dev/isp/isp_sbus.c		optional isp sbus
 dev/isp/isp_target.c		optional isp
 dev/ispfw/ispfw.c		optional ispfw
 dev/iwi/if_iwi.c		optional iwi
 iwibssfw.c			optional iwibssfw | iwifw		\
 	compile-with	"${AWK} -f $S/tools/fw_stub.awk iwi_bss.fw:iwi_bss:300 -lintel_iwi -miwi_bss -c${.TARGET}" \
 	no-implicit-rule before-depend local				\
 	clean		"iwibssfw.c"
 iwi_bss.fwo			optional iwibssfw | iwifw		\
 	dependency	"iwi_bss.fw"					\
 	compile-with	"${NORMAL_FWO}"					\
 	no-implicit-rule						\
 	clean		"iwi_bss.fwo"
 iwi_bss.fw			optional iwibssfw | iwifw		\
 	dependency	"$S/contrib/dev/iwi/ipw2200-bss.fw.uu"		\
 	compile-with	"${NORMAL_FW}"					\
 	no-obj no-implicit-rule						\
 	clean		"iwi_bss.fw"
 iwiibssfw.c			optional iwiibssfw | iwifw		\
 	compile-with	"${AWK} -f $S/tools/fw_stub.awk iwi_ibss.fw:iwi_ibss:300 -lintel_iwi -miwi_ibss -c${.TARGET}" \
 	no-implicit-rule before-depend local				\
 	clean		"iwiibssfw.c"
 iwi_ibss.fwo			optional iwiibssfw | iwifw		\
 	dependency	"iwi_ibss.fw"					\
 	compile-with	"${NORMAL_FWO}"					\
 	no-implicit-rule						\
 	clean		"iwi_ibss.fwo"
 iwi_ibss.fw			optional iwiibssfw | iwifw		\
 	dependency	"$S/contrib/dev/iwi/ipw2200-ibss.fw.uu"		\
 	compile-with	"${NORMAL_FW}"					\
 	no-obj no-implicit-rule						\
 	clean		"iwi_ibss.fw"
 iwimonitorfw.c			optional iwimonitorfw | iwifw		\
 	compile-with	"${AWK} -f $S/tools/fw_stub.awk iwi_monitor.fw:iwi_monitor:300 -lintel_iwi -miwi_monitor -c${.TARGET}" \
 	no-implicit-rule before-depend local				\
 	clean		"iwimonitorfw.c"
 iwi_monitor.fwo			optional iwimonitorfw | iwifw		\
 	dependency	"iwi_monitor.fw"				\
 	compile-with	"${NORMAL_FWO}"					\
 	no-implicit-rule						\
 	clean		"iwi_monitor.fwo"
 iwi_monitor.fw			optional iwimonitorfw | iwifw		\
 	dependency	"$S/contrib/dev/iwi/ipw2200-sniffer.fw.uu"	\
 	compile-with	"${NORMAL_FW}"					\
 	no-obj no-implicit-rule						\
 	clean		"iwi_monitor.fw"
 dev/iwn/if_iwn.c		optional iwn
 iwn1000fw.c			optional iwn1000fw | iwnfw		\
 	compile-with	"${AWK} -f $S/tools/fw_stub.awk iwn1000.fw:iwn1000fw -miwn1000fw -c${.TARGET}" \
 	no-implicit-rule before-depend local				\
 	clean		"iwn1000fw.c"
 iwn1000fw.fwo			optional iwn1000fw | iwnfw		\
 	dependency	"iwn1000.fw"					\
 	compile-with	"${NORMAL_FWO}"					\
 	no-implicit-rule						\
 	clean		"iwn1000fw.fwo"
 iwn1000.fw			optional iwn1000fw | iwnfw		\
 	dependency	"$S/contrib/dev/iwn/iwlwifi-1000-39.31.5.1.fw.uu" \
 	compile-with	"${NORMAL_FW}"					\
 	no-obj no-implicit-rule						\
 	clean		"iwn1000.fw"
 iwn4965fw.c			optional iwn4965fw | iwnfw		\
 	compile-with	"${AWK} -f $S/tools/fw_stub.awk iwn4965.fw:iwn4965fw -miwn4965fw -c${.TARGET}" \
 	no-implicit-rule before-depend local				\
 	clean		"iwn4965fw.c"
 iwn4965fw.fwo			optional iwn4965fw | iwnfw		\
 	dependency	"iwn4965.fw"					\
 	compile-with	"${NORMAL_FWO}"					\
 	no-implicit-rule						\
 	clean		"iwn4965fw.fwo"
 iwn4965.fw			optional iwn4965fw | iwnfw		\
 	dependency	"$S/contrib/dev/iwn/iwlwifi-4965-228.61.2.24.fw.uu" \
 	compile-with	"${NORMAL_FW}"					\
 	no-obj no-implicit-rule						\
 	clean		"iwn4965.fw"
 iwn5000fw.c			optional iwn5000fw | iwnfw		\
 	compile-with	"${AWK} -f $S/tools/fw_stub.awk iwn5000.fw:iwn5000fw -miwn5000fw -c${.TARGET}" \
 	no-implicit-rule before-depend local				\
 	clean		"iwn5000fw.c"
 iwn5000fw.fwo		optional iwn5000fw | iwnfw			\
 	dependency	"iwn5000.fw"					\
 	compile-with	"${NORMAL_FWO}"					\
 	no-implicit-rule						\
 	clean		"iwn5000fw.fwo"
 iwn5000.fw			optional iwn5000fw | iwnfw		\
 	dependency	"$S/contrib/dev/iwn/iwlwifi-5000-8.83.5.1.fw.uu" \
 	compile-with	"${NORMAL_FW}"					\
 	no-obj no-implicit-rule						\
 	clean		"iwn5000.fw"
 iwn5150fw.c			optional iwn5150fw | iwnfw		\
 	compile-with	"${AWK} -f $S/tools/fw_stub.awk iwn5150.fw:iwn5150fw -miwn5150fw -c${.TARGET}" \
 	no-implicit-rule before-depend local				\
 	clean		"iwn5150fw.c"
 iwn5150fw.fwo			optional iwn5150fw | iwnfw		\
 	dependency	"iwn5150.fw"					\
 	compile-with	"${NORMAL_FWO}"					\
 	no-implicit-rule						\
 	clean		"iwn5150fw.fwo"
 iwn5150.fw			optional iwn5150fw | iwnfw		\
 	dependency	"$S/contrib/dev/iwn/iwlwifi-5150-8.24.2.2.fw.uu"\
 	compile-with	"${NORMAL_FW}"					\
 	no-obj no-implicit-rule						\
 	clean		"iwn5150.fw"
 iwn6000fw.c			optional iwn6000fw | iwnfw		\
 	compile-with	"${AWK} -f $S/tools/fw_stub.awk iwn6000.fw:iwn6000fw -miwn6000fw -c${.TARGET}" \
 	no-implicit-rule before-depend local				\
 	clean		"iwn6000fw.c"
 iwn6000fw.fwo			optional iwn6000fw | iwnfw		\
 	dependency	"iwn6000.fw"					\
 	compile-with	"${NORMAL_FWO}"					\
 	no-implicit-rule						\
 	clean		"iwn6000fw.fwo"
 iwn6000.fw			optional iwn6000fw | iwnfw		\
 	dependency	"$S/contrib/dev/iwn/iwlwifi-6000-9.221.4.1.fw.uu" \
 	compile-with	"${NORMAL_FW}"					\
 	no-obj no-implicit-rule						\
 	clean		"iwn6000.fw"
 iwn6000g2afw.c			optional iwn6000g2afw | iwnfw		\
 	compile-with	"${AWK} -f $S/tools/fw_stub.awk iwn6000g2a.fw:iwn6000g2afw -miwn6000g2afw -c${.TARGET}" \
 	no-implicit-rule before-depend local				\
 	clean		"iwn6000g2afw.c"
 iwn6000g2afw.fwo		optional iwn6000g2afw | iwnfw		\
 	dependency	"iwn6000g2a.fw"					\
 	compile-with	"${NORMAL_FWO}"					\
 	no-implicit-rule						\
 	clean		"iwn6000g2afw.fwo"
 iwn6000g2a.fw			optional iwn6000g2afw | iwnfw		\
 	dependency	"$S/contrib/dev/iwn/iwlwifi-6000g2a-17.168.5.2.fw.uu" \
 	compile-with	"${NORMAL_FW}"					\
 	no-obj no-implicit-rule						\
 	clean		"iwn6000g2a.fw"
 iwn6000g2bfw.c			optional iwn6000g2bfw | iwnfw		\
 	compile-with	"${AWK} -f $S/tools/fw_stub.awk iwn6000g2b.fw:iwn6000g2bfw -miwn6000g2bfw -c${.TARGET}" \
 	no-implicit-rule before-depend local				\
 	clean		"iwn6000g2bfw.c"
 iwn6000g2bfw.fwo		optional iwn6000g2bfw | iwnfw		\
 	dependency	"iwn6000g2b.fw"					\
 	compile-with	"${NORMAL_FWO}"					\
 	no-implicit-rule						\
 	clean		"iwn6000g2bfw.fwo"
 iwn6000g2b.fw			optional iwn6000g2bfw | iwnfw		\
 	dependency	"$S/contrib/dev/iwn/iwlwifi-6000g2b-17.168.5.2.fw.uu" \
 	compile-with	"${NORMAL_FW}"					\
 	no-obj no-implicit-rule						\
 	clean		"iwn6000g2b.fw"
 iwn6050fw.c			optional iwn6050fw | iwnfw		\
 	compile-with	"${AWK} -f $S/tools/fw_stub.awk iwn6050.fw:iwn6050fw -miwn6050fw -c${.TARGET}" \
 	no-implicit-rule before-depend local				\
 	clean		"iwn6050fw.c"
 iwn6050fw.fwo			optional iwn6050fw | iwnfw		\
 	dependency	"iwn6050.fw"					\
 	compile-with	"${NORMAL_FWO}"					\
 	no-implicit-rule						\
 	clean		"iwn6050fw.fwo"
 iwn6050.fw			optional iwn6050fw | iwnfw		\
 	dependency	"$S/contrib/dev/iwn/iwlwifi-6050-41.28.5.1.fw.uu" \
 	compile-with	"${NORMAL_FW}"					\
 	no-obj no-implicit-rule						\
 	clean		"iwn6050.fw"
 dev/ixgb/if_ixgb.c		optional ixgb
 dev/ixgb/ixgb_ee.c		optional ixgb
 dev/ixgb/ixgb_hw.c		optional ixgb
 dev/ixgbe/ixgbe.c		optional ixgbe inet \
 	compile-with "${NORMAL_C} -I$S/dev/ixgbe -DSMP -DIXGBE_FDIR"
 dev/ixgbe/ixv.c			optional ixgbe inet \
 	compile-with "${NORMAL_C} -I$S/dev/ixgbe"
 dev/ixgbe/ixgbe_phy.c		optional ixgbe inet \
 	compile-with "${NORMAL_C} -I$S/dev/ixgbe"
 dev/ixgbe/ixgbe_api.c		optional ixgbe inet \
 	compile-with "${NORMAL_C} -I$S/dev/ixgbe"
 dev/ixgbe/ixgbe_common.c	optional ixgbe inet \
 	compile-with "${NORMAL_C} -I$S/dev/ixgbe"
 dev/ixgbe/ixgbe_mbx.c		optional ixgbe inet \
 	compile-with "${NORMAL_C} -I$S/dev/ixgbe"
 dev/ixgbe/ixgbe_vf.c		optional ixgbe inet \
 	compile-with "${NORMAL_C} -I$S/dev/ixgbe"
 dev/ixgbe/ixgbe_82598.c		optional ixgbe inet \
 	compile-with "${NORMAL_C} -I$S/dev/ixgbe"
 dev/ixgbe/ixgbe_82599.c		optional ixgbe inet \
 	compile-with "${NORMAL_C} -I$S/dev/ixgbe"
 dev/ixgbe/ixgbe_x540.c		optional ixgbe inet \
 	compile-with "${NORMAL_C} -I$S/dev/ixgbe"
 dev/ixgbe/ixgbe_dcb.c		optional ixgbe inet \
 	compile-with "${NORMAL_C} -I$S/dev/ixgbe"
 dev/ixgbe/ixgbe_dcb_82598.c	optional ixgbe inet \
 	compile-with "${NORMAL_C} -I$S/dev/ixgbe"
 dev/ixgbe/ixgbe_dcb_82599.c	optional ixgbe inet \
 	compile-with "${NORMAL_C} -I$S/dev/ixgbe"
 dev/jme/if_jme.c		optional jme pci
 dev/joy/joy.c			optional joy
 dev/joy/joy_isa.c		optional joy isa
 dev/joy/joy_pccard.c		optional joy pccard
 dev/kbdmux/kbdmux.c		optional kbdmux
 dev/ksyms/ksyms.c		optional ksyms
 dev/le/am7990.c			optional le
 dev/le/am79900.c		optional le
 dev/le/if_le_pci.c		optional le pci
 dev/le/lance.c			optional le
 dev/led/led.c			standard
 dev/lge/if_lge.c		optional lge
 dev/lmc/if_lmc.c		optional lmc
 dev/malo/if_malo.c		optional malo
 dev/malo/if_malohal.c		optional malo
 dev/malo/if_malo_pci.c		optional malo pci
 dev/mc146818/mc146818.c		optional mc146818
 dev/mca/mca_bus.c		optional mca
 dev/mcd/mcd.c			optional mcd isa nowerror
 dev/mcd/mcd_isa.c		optional mcd isa nowerror
 dev/md/md.c			optional md
 dev/mem/memdev.c		optional mem
 dev/mem/memutil.c		optional mem
 dev/mfi/mfi.c			optional mfi
 dev/mfi/mfi_debug.c		optional mfi
 dev/mfi/mfi_pci.c		optional mfi pci
 dev/mfi/mfi_disk.c		optional mfi
 dev/mfi/mfi_syspd.c		optional mfi
 dev/mfi/mfi_tbolt.c		optional mfi
 dev/mfi/mfi_linux.c		optional mfi compat_linux
 dev/mfi/mfi_cam.c		optional mfip scbus
 dev/mii/acphy.c			optional miibus | acphy
 dev/mii/amphy.c			optional miibus | amphy
 dev/mii/atphy.c			optional miibus | atphy
 dev/mii/axphy.c			optional miibus | axphy
 dev/mii/bmtphy.c		optional miibus | bmtphy
 dev/mii/brgphy.c		optional miibus | brgphy
 dev/mii/ciphy.c			optional miibus | ciphy
 dev/mii/e1000phy.c		optional miibus | e1000phy
 dev/mii/gentbi.c		optional miibus | gentbi
 dev/mii/icsphy.c		optional miibus | icsphy
 dev/mii/ip1000phy.c		optional miibus | ip1000phy
 dev/mii/jmphy.c			optional miibus | jmphy
 dev/mii/lxtphy.c		optional miibus | lxtphy
 dev/mii/mii.c			optional miibus | mii
 dev/mii/mii_bitbang.c		optional miibus | mii_bitbang
 dev/mii/mii_physubr.c		optional miibus | mii
 dev/mii/miibus_if.m		optional miibus | mii
 dev/mii/mlphy.c			optional miibus | mlphy
 dev/mii/nsgphy.c		optional miibus | nsgphy
 dev/mii/nsphy.c			optional miibus | nsphy
 dev/mii/nsphyter.c		optional miibus | nsphyter
 dev/mii/pnaphy.c		optional miibus | pnaphy
 dev/mii/qsphy.c			optional miibus | qsphy
 dev/mii/rdcphy.c		optional miibus | rdcphy
 dev/mii/rgephy.c		optional miibus | rgephy
 dev/mii/rlphy.c			optional miibus | rlphy
 dev/mii/rlswitch.c		optional rlswitch
 dev/mii/smcphy.c		optional miibus | smcphy
 dev/mii/smscphy.c		optional miibus | smscphy
 dev/mii/tdkphy.c		optional miibus | tdkphy
 dev/mii/tlphy.c			optional miibus | tlphy
 dev/mii/truephy.c		optional miibus | truephy
 dev/mii/ukphy.c			optional miibus | mii
 dev/mii/ukphy_subr.c		optional miibus | mii
 dev/mii/xmphy.c			optional miibus | xmphy
 dev/mk48txx/mk48txx.c		optional mk48txx
 dev/mlx/mlx.c			optional mlx
 dev/mlx/mlx_disk.c		optional mlx
 dev/mlx/mlx_pci.c		optional mlx pci
 dev/mly/mly.c			optional mly
 dev/mmc/mmc.c			optional mmc
 dev/mmc/mmcbr_if.m		standard
 dev/mmc/mmcbus_if.m		standard
 dev/mmc/mmcsd.c			optional mmcsd
 dev/mn/if_mn.c			optional mn pci
 dev/mps/mps.c			optional mps
 dev/mps/mps_config.c		optional mps
 # XXX Work around clang warning, until maintainer approves fix.
 dev/mps/mps_mapping.c		optional mps \
 	compile-with "${NORMAL_C} ${NO_WSOMETIMES_UNINITIALIZED}"
 dev/mps/mps_pci.c		optional mps pci
 dev/mps/mps_sas.c		optional mps \
 	compile-with "${NORMAL_C} ${NO_WUNNEEDED_INTERNAL_DECL}"
 dev/mps/mps_sas_lsi.c		optional mps
 dev/mps/mps_table.c		optional mps
 dev/mps/mps_user.c		optional mps
 dev/mpt/mpt.c			optional mpt
 dev/mpt/mpt_cam.c		optional mpt
 dev/mpt/mpt_debug.c		optional mpt
 dev/mpt/mpt_pci.c		optional mpt pci
 dev/mpt/mpt_raid.c		optional mpt
 dev/mpt/mpt_user.c		optional mpt
 dev/msk/if_msk.c		optional msk
 dev/mvs/mvs.c			optional mvs
 dev/mvs/mvs_if.m		optional mvs
 dev/mvs/mvs_pci.c		optional mvs pci
 dev/mwl/if_mwl.c		optional mwl
 dev/mwl/if_mwl_pci.c		optional mwl pci
 dev/mwl/mwlhal.c		optional mwl
 mwlfw.c				optional mwlfw				\
 	compile-with	"${AWK} -f $S/tools/fw_stub.awk mw88W8363.fw:mw88W8363fw mwlboot.fw:mwlboot -mmwl -c${.TARGET}" \
 	no-implicit-rule before-depend local				\
 	clean		"mwlfw.c"
 mw88W8363.fwo		optional mwlfw					\
 	dependency	"mw88W8363.fw"					\
 	compile-with	"${NORMAL_FWO}"					\
 	no-implicit-rule						\
 	clean		"mw88W8363.fwo"
 mw88W8363.fw		optional mwlfw					\
 	dependency	"$S/contrib/dev/mwl/mw88W8363.fw.uu"		\
 	compile-with	"${NORMAL_FW}"					\
 	no-obj no-implicit-rule						\
 	clean		"mw88W8363.fw"
 mwlboot.fwo		optional mwlfw					\
 	dependency	"mwlboot.fw"					\
 	compile-with	"${NORMAL_FWO}"					\
 	no-implicit-rule						\
 	clean		"mwlboot.fwo"
 mwlboot.fw		optional mwlfw					\
 	dependency	"$S/contrib/dev/mwl/mwlboot.fw.uu"		\
 	compile-with	"${NORMAL_FW}"					\
 	no-obj no-implicit-rule						\
 	clean		"mwlboot.fw"
 dev/mxge/if_mxge.c		optional mxge pci
 dev/mxge/mxge_eth_z8e.c		optional mxge pci
 dev/mxge/mxge_ethp_z8e.c	optional mxge pci
 dev/mxge/mxge_rss_eth_z8e.c	optional mxge pci
 dev/mxge/mxge_rss_ethp_z8e.c	optional mxge pci
 dev/my/if_my.c			optional my
 dev/nand/nand.c			optional nand
 dev/nand/nand_bbt.c		optional nand
 dev/nand/nand_cdev.c		optional nand
 dev/nand/nand_generic.c		optional nand
 dev/nand/nand_geom.c		optional nand
 dev/nand/nand_id.c		optional nand
 dev/nand/nandbus.c		optional nand
 dev/nand/nandbus_if.m		optional nand
 dev/nand/nand_if.m		optional nand
 dev/nand/nandsim.c		optional nandsim nand
 dev/nand/nandsim_chip.c		optional nandsim nand
 dev/nand/nandsim_ctrl.c		optional nandsim nand
 dev/nand/nandsim_log.c		optional nandsim nand
 dev/nand/nandsim_swap.c		optional nandsim nand
 dev/nand/nfc_if.m		optional nand
 dev/ncv/ncr53c500.c		optional ncv
 dev/ncv/ncr53c500_pccard.c	optional ncv pccard
 dev/netmap/netmap.c		optional netmap
 dev/nge/if_nge.c		optional nge
 dev/nxge/if_nxge.c		optional nxge \
 	compile-with "${NORMAL_C} ${NO_WSELF_ASSIGN}"
 dev/nxge/xgehal/xgehal-device.c	optional nxge \
 	compile-with "${NORMAL_C} ${NO_WSELF_ASSIGN}"
 dev/nxge/xgehal/xgehal-mm.c	optional nxge
 dev/nxge/xgehal/xge-queue.c	optional nxge
 dev/nxge/xgehal/xgehal-driver.c	optional nxge \
 	compile-with "${NORMAL_C} ${NO_WSELF_ASSIGN}"
 dev/nxge/xgehal/xgehal-ring.c	optional nxge \
 	compile-with "${NORMAL_C} ${NO_WSELF_ASSIGN}"
 dev/nxge/xgehal/xgehal-channel.c	optional nxge \
 	compile-with "${NORMAL_C} ${NO_WSELF_ASSIGN}"
 dev/nxge/xgehal/xgehal-fifo.c	optional nxge \
 	compile-with "${NORMAL_C} ${NO_WSELF_ASSIGN}"
 dev/nxge/xgehal/xgehal-stats.c	optional nxge \
 	compile-with "${NORMAL_C} ${NO_WSELF_ASSIGN}"
 dev/nxge/xgehal/xgehal-config.c	optional nxge
 dev/nxge/xgehal/xgehal-mgmt.c	optional nxge \
 	compile-with "${NORMAL_C} ${NO_WSELF_ASSIGN}"
 dev/nmdm/nmdm.c			optional nmdm
 dev/nsp/nsp.c			optional nsp
 dev/nsp/nsp_pccard.c		optional nsp pccard
 dev/null/null.c			standard
 dev/oce/oce_hw.c		optional oce pci
 dev/oce/oce_if.c		optional oce pci
 dev/oce/oce_mbox.c		optional oce pci
 dev/oce/oce_queue.c		optional oce pci
 dev/oce/oce_sysctl.c		optional oce pci
 dev/oce/oce_util.c		optional oce pci
 dev/ofw/ofw_bus_if.m		optional fdt
 dev/ofw/ofw_bus_subr.c		optional fdt
 dev/ofw/ofw_fdt.c		optional fdt
 dev/ofw/ofw_if.m		optional fdt
 dev/ofw/openfirm.c		optional fdt
 dev/ofw/openfirmio.c		optional fdt
 dev/patm/if_patm.c		optional patm pci
 dev/patm/if_patm_attach.c	optional patm pci
 dev/patm/if_patm_intr.c		optional patm pci
 dev/patm/if_patm_ioctl.c	optional patm pci
 dev/patm/if_patm_rtables.c	optional patm pci
 dev/patm/if_patm_rx.c		optional patm pci
 dev/patm/if_patm_tx.c		optional patm pci
 dev/pbio/pbio.c			optional pbio isa
 dev/pccard/card_if.m		standard
 dev/pccard/pccard.c		optional pccard
 dev/pccard/pccard_cis.c		optional pccard
 dev/pccard/pccard_cis_quirks.c	optional pccard
 dev/pccard/pccard_device.c	optional pccard
 dev/pccard/power_if.m		standard
 dev/pccbb/pccbb.c		optional cbb
 dev/pccbb/pccbb_isa.c		optional cbb isa
 dev/pccbb/pccbb_pci.c		optional cbb pci
 dev/pcf/pcf.c			optional pcf
 dev/pci/eisa_pci.c		optional pci eisa
 dev/pci/fixup_pci.c		optional pci
 dev/pci/hostb_pci.c		optional pci
 dev/pci/ignore_pci.c		optional pci
 dev/pci/isa_pci.c		optional pci isa
 dev/pci/pci.c			optional pci
 dev/pci/pci_if.m		standard
 dev/pci/pci_pci.c		optional pci
 dev/pci/pci_subr.c		optional pci
 dev/pci/pci_user.c		optional pci
 dev/pci/pcib_if.m		standard
 dev/pci/vga_pci.c		optional pci
 dev/pcn/if_pcn.c		optional pcn pci
 dev/pdq/if_fea.c		optional fea eisa
 dev/pdq/if_fpa.c		optional fpa pci
 dev/pdq/pdq.c			optional nowerror fea eisa | fpa pci
 dev/pdq/pdq_ifsubr.c		optional nowerror fea eisa | fpa pci
 dev/ppbus/if_plip.c		optional plip
 dev/ppbus/immio.c		optional vpo
 dev/ppbus/lpbb.c		optional lpbb
 dev/ppbus/lpt.c			optional lpt
 dev/ppbus/pcfclock.c		optional pcfclock
 dev/ppbus/ppb_1284.c		optional ppbus
 dev/ppbus/ppb_base.c		optional ppbus
 dev/ppbus/ppb_msq.c		optional ppbus
 dev/ppbus/ppbconf.c		optional ppbus
 dev/ppbus/ppbus_if.m		optional ppbus
 dev/ppbus/ppi.c			optional ppi
 dev/ppbus/pps.c			optional pps
 dev/ppbus/vpo.c			optional vpo
 dev/ppbus/vpoio.c		optional vpo
 dev/ppc/ppc.c			optional ppc
 dev/ppc/ppc_acpi.c		optional ppc acpi
 dev/ppc/ppc_isa.c		optional ppc isa
 dev/ppc/ppc_pci.c		optional ppc pci
 dev/ppc/ppc_puc.c		optional ppc puc
 dev/pst/pst-iop.c		optional pst
 dev/pst/pst-pci.c		optional pst pci
 dev/pst/pst-raid.c		optional pst
 dev/pty/pty.c			optional pty
 dev/puc/puc.c			optional puc
 dev/puc/puc_cfg.c		optional puc
 dev/puc/puc_pccard.c		optional puc pccard
 dev/puc/puc_pci.c		optional puc pci
 dev/puc/pucdata.c		optional puc pci
 dev/quicc/quicc_core.c		optional quicc
 dev/ral/rt2560.c		optional ral
 dev/ral/rt2661.c		optional ral
 dev/ral/rt2860.c		optional ral
 dev/ral/if_ral_pci.c		optional ral pci
 rt2561fw.c			optional rt2561fw | ralfw		\
 	compile-with	"${AWK} -f $S/tools/fw_stub.awk rt2561.fw:rt2561fw -mrt2561 -c${.TARGET}" \
 	no-implicit-rule before-depend local				\
 	clean		"rt2561fw.c"
 rt2561fw.fwo			optional rt2561fw | ralfw		\
 	dependency	"rt2561.fw"					\
 	compile-with	"${NORMAL_FWO}"					\
 	no-implicit-rule						\
 	clean		"rt2561fw.fwo"
 rt2561.fw			optional rt2561fw | ralfw		\
 	dependency	"$S/contrib/dev/ral/rt2561.fw.uu"		\
 	compile-with	"${NORMAL_FW}"					\
 	no-obj no-implicit-rule						\
 	clean		"rt2561.fw"
 rt2561sfw.c			optional rt2561sfw | ralfw		\
 	compile-with	"${AWK} -f $S/tools/fw_stub.awk rt2561s.fw:rt2561sfw -mrt2561s -c${.TARGET}" \
 	no-implicit-rule before-depend local				\
 	clean		"rt2561sfw.c"
 rt2561sfw.fwo			optional rt2561sfw | ralfw		\
 	dependency	"rt2561s.fw"					\
 	compile-with	"${NORMAL_FWO}"					\
 	no-implicit-rule						\
 	clean		"rt2561sfw.fwo"
 rt2561s.fw			optional rt2561sfw | ralfw		\
 	dependency	"$S/contrib/dev/ral/rt2561s.fw.uu"		\
 	compile-with	"${NORMAL_FW}"					\
 	no-obj no-implicit-rule						\
 	clean		"rt2561s.fw"
 rt2661fw.c			optional rt2661fw | ralfw		\
 	compile-with	"${AWK} -f $S/tools/fw_stub.awk rt2661.fw:rt2661fw -mrt2661 -c${.TARGET}" \
 	no-implicit-rule before-depend local				\
 	clean		"rt2661fw.c"
 rt2661fw.fwo			optional rt2661fw | ralfw		\
 	dependency	"rt2661.fw"					\
 	compile-with	"${NORMAL_FWO}"					\
 	no-implicit-rule						\
 	clean		"rt2661fw.fwo"
 rt2661.fw			optional rt2661fw | ralfw		\
 	dependency	"$S/contrib/dev/ral/rt2661.fw.uu"		\
 	compile-with	"${NORMAL_FW}"					\
 	no-obj no-implicit-rule						\
 	clean		"rt2661.fw"
 rt2860fw.c			optional rt2860fw | ralfw		\
 	compile-with	"${AWK} -f $S/tools/fw_stub.awk rt2860.fw:rt2860fw -mrt2860 -c${.TARGET}" \
 	no-implicit-rule before-depend local				\
 	clean		"rt2860fw.c"
 rt2860fw.fwo			optional rt2860fw | ralfw		\
 	dependency	"rt2860.fw"					\
 	compile-with	"${NORMAL_FWO}"					\
 	no-implicit-rule						\
 	clean		"rt2860fw.fwo"
 rt2860.fw			optional rt2860fw | ralfw		\
 	dependency	"$S/contrib/dev/ral/rt2860.fw.uu"		\
 	compile-with	"${NORMAL_FW}"					\
 	no-obj no-implicit-rule						\
 	clean		"rt2860.fw"
 dev/random/harvest.c		standard
 dev/random/hash.c		optional random
 dev/random/probe.c		optional random
 dev/random/random_adaptors.c	standard
 dev/random/randomdev.c		optional random
 dev/random/randomdev_soft.c	optional random
 dev/random/yarrow.c		optional random
 dev/rc/rc.c			optional rc
 dev/re/if_re.c			optional re
 dev/rndtest/rndtest.c		optional rndtest
 dev/rp/rp.c			optional rp
 dev/rp/rp_isa.c			optional rp isa
 dev/rp/rp_pci.c			optional rp pci
 dev/safe/safe.c			optional safe
 dev/scc/scc_if.m		optional scc
 dev/scc/scc_bfe_ebus.c		optional scc ebus
 dev/scc/scc_bfe_quicc.c		optional scc quicc
 dev/scc/scc_bfe_sbus.c		optional scc fhc | scc sbus
 dev/scc/scc_core.c		optional scc
 dev/scc/scc_dev_quicc.c		optional scc quicc
 dev/scc/scc_dev_sab82532.c	optional scc
 dev/scc/scc_dev_z8530.c		optional scc
 dev/scd/scd.c			optional scd isa
 dev/scd/scd_isa.c		optional scd isa
 dev/sdhci/sdhci.c		optional sdhci
 dev/sdhci/sdhci_if.m		optional sdhci
 dev/sdhci/sdhci_pci.c		optional sdhci pci
 dev/sf/if_sf.c			optional sf pci
 dev/sge/if_sge.c		optional sge pci
 dev/si/si.c			optional si
 dev/si/si2_z280.c		optional si
 dev/si/si3_t225.c		optional si
 dev/si/si_eisa.c		optional si eisa
 dev/si/si_isa.c			optional si isa
 dev/si/si_pci.c			optional si pci
 dev/siba/siba.c			optional siba
 dev/siba/siba_bwn.c		optional siba_bwn pci
 dev/siba/siba_cc.c		optional siba
 dev/siba/siba_core.c		optional siba | siba_bwn pci
 dev/siba/siba_pcib.c		optional siba pci
 dev/siis/siis.c			optional siis pci
 dev/sis/if_sis.c		optional sis pci
 dev/sk/if_sk.c			optional sk pci
 dev/smbus/smb.c			optional smb
 dev/smbus/smbconf.c		optional smbus
 dev/smbus/smbus.c		optional smbus
 dev/smbus/smbus_if.m		optional smbus
 dev/smc/if_smc.c		optional smc
 dev/sn/if_sn.c			optional sn
 dev/sn/if_sn_isa.c		optional sn isa
 dev/sn/if_sn_pccard.c		optional sn pccard
 dev/snp/snp.c			optional snp
 dev/sound/clone.c		optional sound
 dev/sound/unit.c		optional sound
 dev/sound/isa/ad1816.c		optional snd_ad1816 isa
 dev/sound/isa/ess.c		optional snd_ess isa
 dev/sound/isa/gusc.c		optional snd_gusc isa
 dev/sound/isa/mss.c		optional snd_mss isa
 dev/sound/isa/sb16.c		optional snd_sb16 isa
 dev/sound/isa/sb8.c		optional snd_sb8 isa
 dev/sound/isa/sbc.c		optional snd_sbc isa
 dev/sound/isa/sndbuf_dma.c	optional sound isa
 dev/sound/pci/als4000.c		optional snd_als4000 pci
 dev/sound/pci/atiixp.c		optional snd_atiixp pci
 dev/sound/pci/cmi.c		optional snd_cmi pci
 dev/sound/pci/cs4281.c		optional snd_cs4281 pci
 dev/sound/pci/csa.c		optional snd_csa pci
 dev/sound/pci/csapcm.c		optional snd_csa pci
 dev/sound/pci/ds1.c		optional snd_ds1 pci
 dev/sound/pci/emu10k1.c		optional snd_emu10k1 pci
 dev/sound/pci/emu10kx.c		optional snd_emu10kx pci
 dev/sound/pci/emu10kx-pcm.c	optional snd_emu10kx pci
 dev/sound/pci/emu10kx-midi.c	optional snd_emu10kx pci
 dev/sound/pci/envy24.c		optional snd_envy24 pci
 dev/sound/pci/envy24ht.c	optional snd_envy24ht pci
 dev/sound/pci/es137x.c		optional snd_es137x pci
 dev/sound/pci/fm801.c		optional snd_fm801 pci
 dev/sound/pci/ich.c		optional snd_ich pci
 dev/sound/pci/maestro.c		optional snd_maestro pci
 dev/sound/pci/maestro3.c	optional snd_maestro3 pci
 dev/sound/pci/neomagic.c	optional snd_neomagic pci
 dev/sound/pci/solo.c		optional snd_solo pci
 dev/sound/pci/spicds.c		optional snd_spicds pci
 dev/sound/pci/t4dwave.c		optional snd_t4dwave pci
 dev/sound/pci/via8233.c		optional snd_via8233 pci
 dev/sound/pci/via82c686.c	optional snd_via82c686 pci
 dev/sound/pci/vibes.c		optional snd_vibes pci
 dev/sound/pci/hda/hdaa.c	optional snd_hda pci
 dev/sound/pci/hda/hdaa_patches.c	optional snd_hda pci
 dev/sound/pci/hda/hdac.c	optional snd_hda pci
 dev/sound/pci/hda/hdac_if.m	optional snd_hda pci
 dev/sound/pci/hda/hdacc.c	optional snd_hda pci
 dev/sound/pci/hdspe.c		optional snd_hdspe pci
 dev/sound/pci/hdspe-pcm.c	optional snd_hdspe pci
 dev/sound/pcm/ac97.c		optional sound
 dev/sound/pcm/ac97_if.m		optional sound
 dev/sound/pcm/ac97_patch.c	optional sound
 dev/sound/pcm/buffer.c		optional sound	\
 	dependency	"snd_fxdiv_gen.h"
 dev/sound/pcm/channel.c		optional sound
 dev/sound/pcm/channel_if.m	optional sound
 dev/sound/pcm/dsp.c		optional sound
 dev/sound/pcm/feeder.c		optional sound
 dev/sound/pcm/feeder_chain.c	optional sound
 dev/sound/pcm/feeder_eq.c	optional sound	\
 	dependency	"feeder_eq_gen.h"	\
 	dependency	"snd_fxdiv_gen.h"
 dev/sound/pcm/feeder_if.m	optional sound
 dev/sound/pcm/feeder_format.c	optional sound  \
 	dependency	"snd_fxdiv_gen.h"
 dev/sound/pcm/feeder_matrix.c	optional sound  \
 	dependency	"snd_fxdiv_gen.h"
 dev/sound/pcm/feeder_mixer.c	optional sound  \
 	dependency	"snd_fxdiv_gen.h"
 dev/sound/pcm/feeder_rate.c	optional sound	\
 	dependency	"feeder_rate_gen.h"	\
 	dependency	"snd_fxdiv_gen.h"
 dev/sound/pcm/feeder_volume.c	optional sound  \
 	dependency	"snd_fxdiv_gen.h"
 dev/sound/pcm/mixer.c		optional sound
 dev/sound/pcm/mixer_if.m	optional sound
 dev/sound/pcm/sndstat.c		optional sound
 dev/sound/pcm/sound.c		optional sound
 dev/sound/pcm/vchan.c		optional sound
 dev/sound/usb/uaudio.c		optional snd_uaudio usb
 dev/sound/usb/uaudio_pcm.c	optional snd_uaudio usb
 dev/sound/midi/midi.c		optional sound
 dev/sound/midi/mpu401.c		optional sound
 dev/sound/midi/mpu_if.m		optional sound
 dev/sound/midi/mpufoi_if.m	optional sound
 dev/sound/midi/sequencer.c	optional sound
 dev/sound/midi/synth_if.m	optional sound
 dev/spibus/spibus.c		optional spibus				\
 	dependency	"spibus_if.h"
 dev/spibus/spibus_if.m		optional spibus
 dev/ste/if_ste.c		optional ste pci
 dev/stg/tmc18c30.c		optional stg
 dev/stg/tmc18c30_isa.c		optional stg isa
 dev/stg/tmc18c30_pccard.c	optional stg pccard
 dev/stg/tmc18c30_pci.c		optional stg pci
 dev/stg/tmc18c30_subr.c		optional stg
 dev/stge/if_stge.c		optional stge
 dev/streams/streams.c		optional streams
 dev/sym/sym_hipd.c		optional sym				\
 	dependency	"$S/dev/sym/sym_{conf,defs}.h"
 dev/syscons/blank/blank_saver.c	optional blank_saver
 dev/syscons/daemon/daemon_saver.c optional daemon_saver
 dev/syscons/dragon/dragon_saver.c optional dragon_saver
 dev/syscons/fade/fade_saver.c	optional fade_saver
 dev/syscons/fire/fire_saver.c	optional fire_saver
 dev/syscons/green/green_saver.c	optional green_saver
 dev/syscons/logo/logo.c		optional logo_saver
 dev/syscons/logo/logo_saver.c	optional logo_saver
 dev/syscons/rain/rain_saver.c	optional rain_saver
 dev/syscons/schistory.c		optional sc
 dev/syscons/scmouse.c		optional sc
 dev/syscons/scterm.c		optional sc
 dev/syscons/scvidctl.c		optional sc
 dev/syscons/snake/snake_saver.c	optional snake_saver
 dev/syscons/star/star_saver.c	optional star_saver
 dev/syscons/syscons.c		optional sc
 dev/syscons/sysmouse.c		optional sc
 dev/syscons/warp/warp_saver.c	optional warp_saver
 dev/tdfx/tdfx_linux.c		optional tdfx_linux tdfx compat_linux
 dev/tdfx/tdfx_pci.c		optional tdfx pci
 dev/ti/if_ti.c			optional ti pci
 dev/tl/if_tl.c			optional tl pci
 dev/trm/trm.c			optional trm
 dev/twa/tw_cl_init.c		optional twa \
 	compile-with "${NORMAL_C} -I$S/dev/twa"
 dev/twa/tw_cl_intr.c		optional twa \
 	compile-with "${NORMAL_C} -I$S/dev/twa"
 dev/twa/tw_cl_io.c		optional twa \
 	compile-with "${NORMAL_C} -I$S/dev/twa"
 dev/twa/tw_cl_misc.c		optional twa \
 	compile-with "${NORMAL_C} -I$S/dev/twa"
 dev/twa/tw_osl_cam.c		optional twa \
 	compile-with "${NORMAL_C} -I$S/dev/twa"
 dev/twa/tw_osl_freebsd.c	optional twa \
 	compile-with "${NORMAL_C} -I$S/dev/twa"
 dev/twe/twe.c			optional twe
 dev/twe/twe_freebsd.c		optional twe
 dev/tws/tws.c			optional tws
 dev/tws/tws_cam.c		optional tws
 dev/tws/tws_hdm.c		optional tws
 dev/tws/tws_services.c		optional tws
 dev/tws/tws_user.c		optional tws
 dev/tx/if_tx.c			optional tx
 dev/txp/if_txp.c		optional txp
 dev/uart/uart_bus_acpi.c	optional uart acpi
 #dev/uart/uart_bus_cbus.c	optional uart cbus
 dev/uart/uart_bus_ebus.c	optional uart ebus
 dev/uart/uart_bus_fdt.c		optional uart fdt
 dev/uart/uart_bus_isa.c		optional uart isa
 dev/uart/uart_bus_pccard.c	optional uart pccard
 dev/uart/uart_bus_pci.c		optional uart pci
 dev/uart/uart_bus_puc.c		optional uart puc
 dev/uart/uart_bus_scc.c		optional uart scc
 dev/uart/uart_core.c		optional uart
 dev/uart/uart_dbg.c		optional uart gdb
 dev/uart/uart_dev_ns8250.c	optional uart uart_ns8250
 dev/uart/uart_dev_pl011.c	optional uart pl011
 dev/uart/uart_dev_quicc.c	optional uart quicc
 dev/uart/uart_dev_sab82532.c	optional uart uart_sab82532
 dev/uart/uart_dev_sab82532.c	optional uart scc
 dev/uart/uart_dev_z8530.c	optional uart uart_z8530
 dev/uart/uart_dev_z8530.c	optional uart scc
 dev/uart/uart_if.m		optional uart
 dev/uart/uart_subr.c		optional uart
 dev/uart/uart_tty.c		optional uart
 dev/ubsec/ubsec.c		optional ubsec
 #
 # USB controller drivers
 #
 dev/usb/controller/at91dci.c		optional at91dci
 dev/usb/controller/at91dci_atmelarm.c	optional at91dci at91rm9200
 dev/usb/controller/musb_otg.c		optional musb
 dev/usb/controller/musb_otg_atmelarm.c	optional musb at91rm9200
 dev/usb/controller/dwc_otg.c		optional dwcotg
 dev/usb/controller/ehci.c		optional ehci
 dev/usb/controller/ehci_pci.c		optional ehci pci
 dev/usb/controller/ohci.c		optional ohci
 dev/usb/controller/ohci_atmelarm.c	optional ohci at91rm9200
 dev/usb/controller/ohci_pci.c		optional ohci pci
 dev/usb/controller/uhci.c		optional uhci
 dev/usb/controller/uhci_pci.c		optional uhci pci
 dev/usb/controller/xhci.c		optional xhci
 dev/usb/controller/xhci_pci.c		optional xhci pci
 dev/usb/controller/uss820dci.c		optional uss820dci
 dev/usb/controller/uss820dci_atmelarm.c	optional uss820dci at91rm9200
 dev/usb/controller/usb_controller.c	optional usb
 #
 # USB storage drivers
 #
 dev/usb/storage/umass.c		optional umass
 dev/usb/storage/urio.c		optional urio
 dev/usb/storage/ustorage_fs.c	optional usfs
 #
 # USB core
 #
 dev/usb/usb_busdma.c		optional usb
 dev/usb/usb_compat_linux.c	optional usb
 dev/usb/usb_core.c		optional usb
 dev/usb/usb_debug.c		optional usb
 dev/usb/usb_dev.c		optional usb
 dev/usb/usb_device.c		optional usb
 dev/usb/usb_dynamic.c		optional usb
 dev/usb/usb_error.c		optional usb
 dev/usb/usb_generic.c		optional usb
 dev/usb/usb_handle_request.c	optional usb
 dev/usb/usb_hid.c		optional usb
 dev/usb/usb_hub.c		optional usb
 dev/usb/usb_if.m		optional usb
 dev/usb/usb_lookup.c		optional usb
 dev/usb/usb_mbuf.c		optional usb
 dev/usb/usb_msctest.c		optional usb
 dev/usb/usb_parse.c		optional usb
 dev/usb/usb_pf.c		optional usb
 dev/usb/usb_process.c		optional usb
 dev/usb/usb_request.c		optional usb
 dev/usb/usb_transfer.c		optional usb
 dev/usb/usb_util.c		optional usb
 #
 # USB network drivers
 #
 dev/usb/net/if_aue.c		optional aue
 dev/usb/net/if_axe.c		optional axe
 dev/usb/net/if_cdce.c		optional cdce
 dev/usb/net/if_cue.c		optional cue
 dev/usb/net/if_ipheth.c		optional ipheth
 dev/usb/net/if_kue.c		optional kue
 dev/usb/net/if_mos.c		optional mos
 dev/usb/net/if_rue.c		optional rue
 dev/usb/net/if_smsc.c		optional smsc
 dev/usb/net/if_udav.c		optional udav
 dev/usb/net/if_usie.c		optional usie
 dev/usb/net/ruephy.c		optional rue
 dev/usb/net/usb_ethernet.c	optional aue | axe | cdce | cue | kue | mos | \
 					 rue | smsc | udav | ipheth
 dev/usb/net/uhso.c		optional uhso
 #
 # USB WLAN drivers
 #
 dev/usb/wlan/if_rsu.c		optional rsu
 rsu-rtl8712fw.c			optional rsu-rtl8712fw | rsufw		\
 	compile-with	"${AWK} -f $S/tools/fw_stub.awk rsu-rtl8712fw.fw:rsu-rtl8712fw:120 -mrsu-rtl8712fw -c${.TARGET}" \
 	no-implicit-rule before-depend local				\
 	clean		"rsu-rtl8712fw.c"
 rsu-rtl8712fw.fwo		optional rsu-rtl8712fw | rsufw		\
 	dependency	"rsu-rtl8712fw.fw"				\
 	compile-with	"${NORMAL_FWO}"					\
 	no-implicit-rule						\
 	clean		"rsu-rtl8712fw.fwo"
 rsu-rtl8712fw.fw		optional rsu-rtl8712.fw | rsufw		\
 	dependency	"$S/contrib/dev/rsu/rsu-rtl8712fw.fw.uu"	\
 	compile-with	"${NORMAL_FW}"					\
 	no-obj no-implicit-rule						\
 	clean		"rsu-rtl8712fw.fw"
 dev/usb/wlan/if_rum.c		optional rum
 dev/usb/wlan/if_run.c		optional run
 runfw.c				optional runfw							\
 	compile-with	"${AWK} -f $S/tools/fw_stub.awk run.fw:runfw -mrunfw -c${.TARGET}"	\
 	no-implicit-rule before-depend local							\
 	clean		"runfw.c"
 runfw.fwo			optional runfw							\
 	dependency	"run.fw"								\
 	compile-with	"${NORMAL_FWO}"								\
 	no-implicit-rule									\
 	clean		"runfw.fwo"
 run.fw				optional runfw							\
 	dependency	"$S/contrib/dev/run/rt2870.fw.uu"					\
 	compile-with	"${NORMAL_FW}"								\
 	no-obj no-implicit-rule									\
 	clean		"run.fw"
 dev/usb/wlan/if_uath.c		optional uath
 dev/usb/wlan/if_upgt.c		optional upgt
 dev/usb/wlan/if_ural.c		optional ural
 dev/usb/wlan/if_urtw.c		optional urtw
 dev/usb/wlan/if_urtwn.c		optional urtwn
 urtwn-rtl8192cfwT.c		optional urtwn-rtl8192cfwT | urtwnfw	\
 	compile-with	"${AWK} -f $S/tools/fw_stub.awk urtwn-rtl8192cfwT.fw:urtwn-rtl8192cfwT:111 -murtwn-rtl8192cfwT -c${.TARGET}" \
 	no-implicit-rule before-depend local				\
 	clean		"urtwn-rtl8192cfwT.c"
 urtwn-rtl8192cfwT.fwo		optional urtwn-rtl8192cfwT | urtwnfw	\
 	dependency	"urtwn-rtl8192cfwT.fw"				\
 	compile-with	"${NORMAL_FWO}"					\
 	no-implicit-rule						\
 	clean		"urtwn-rtl8192cfwT.fwo"
 urtwn-rtl8192cfwT.fw		optional urtwn-rtl8192cfwT | urtwnfw	\
 	dependency	"$S/contrib/dev/urtwn/urtwn-rtl8192cfwT.fw.uu"	\
 	compile-with	"${NORMAL_FW}"					\
 	no-obj no-implicit-rule						\
 	clean		"urtwn-rtl8192cfwT.fw"
 urtwn-rtl8192cfwU.c		optional urtwn-rtl8192cfwU | urtwnfw	\
 	compile-with	"${AWK} -f $S/tools/fw_stub.awk urtwn-rtl8192cfwU.fw:urtwn-rtl8192cfwU:111 -murtwn-rtl8192cfwU -c${.TARGET}" \
 	no-implicit-rule before-depend local				\
 	clean		"urtwn-rtl8192cfwU.c"
 urtwn-rtl8192cfwU.fwo		optional urtwn-rtl8192cfwU | urtwnfw	\
 	dependency	"urtwn-rtl8192cfwU.fw"				\
 	compile-with	"${NORMAL_FWO}"					\
 	no-implicit-rule						\
 	clean		"urtwn-rtl8192cfwU.fwo"
 urtwn-rtl8192cfwU.fw		optional urtwn-rtl8192cfwU | urtwnfw	\
 	dependency	"$S/contrib/dev/urtwn/urtwn-rtl8192cfwU.fw.uu"	\
 	compile-with	"${NORMAL_FW}"					\
 	no-obj no-implicit-rule						\
 	clean		"urtwn-rtl8192cfwU.fw"
 
 dev/usb/wlan/if_zyd.c		optional zyd
 #
 # USB serial and parallel port drivers
 #
 dev/usb/serial/u3g.c		optional u3g
 dev/usb/serial/uark.c		optional uark
 dev/usb/serial/ubsa.c		optional ubsa
 dev/usb/serial/ubser.c		optional ubser
 dev/usb/serial/uchcom.c		optional uchcom
 dev/usb/serial/ucycom.c		optional ucycom
 dev/usb/serial/ufoma.c		optional ufoma
 dev/usb/serial/uftdi.c		optional uftdi
 dev/usb/serial/ugensa.c		optional ugensa
 dev/usb/serial/uipaq.c		optional uipaq
 dev/usb/serial/ulpt.c		optional ulpt
 dev/usb/serial/umcs.c		optional umcs
 dev/usb/serial/umct.c		optional umct
 dev/usb/serial/umodem.c		optional umodem
 dev/usb/serial/umoscom.c	optional umoscom
 dev/usb/serial/uplcom.c		optional uplcom
 dev/usb/serial/uslcom.c		optional uslcom
 dev/usb/serial/uvisor.c		optional uvisor
 dev/usb/serial/uvscom.c		optional uvscom
 dev/usb/serial/usb_serial.c 	optional ucom | u3g | uark | ubsa | ubser | \
 					 uchcom | ucycom | ufoma | uftdi | \
 					 ugensa | uipaq | umcs | umct | \
 					 umodem | umoscom | uplcom | usie | \
 					 uslcom | uvisor | uvscom
 #
 # USB misc drivers
 #
 dev/usb/misc/ufm.c		optional ufm
 dev/usb/misc/udbp.c		optional udbp
 #
 # USB input drivers
 #
 dev/usb/input/atp.c		optional atp
 dev/usb/input/uep.c		optional uep
 dev/usb/input/uhid.c		optional uhid
 dev/usb/input/ukbd.c		optional ukbd
 dev/usb/input/ums.c		optional ums
 #
 # USB quirks
 #
 dev/usb/quirk/usb_quirk.c	optional usb
 #
 # USB templates
 #
 dev/usb/template/usb_template.c		optional usb_template
 dev/usb/template/usb_template_audio.c	optional usb_template
 dev/usb/template/usb_template_cdce.c	optional usb_template
 dev/usb/template/usb_template_kbd.c	optional usb_template
 dev/usb/template/usb_template_modem.c	optional usb_template
 dev/usb/template/usb_template_mouse.c	optional usb_template
 dev/usb/template/usb_template_msc.c	optional usb_template
 dev/usb/template/usb_template_mtp.c	optional usb_template
 #
 # USB END
 #
 dev/utopia/idtphy.c		optional utopia
 dev/utopia/suni.c		optional utopia
 dev/utopia/utopia.c		optional utopia
 dev/vge/if_vge.c		optional vge
 
 dev/vkbd/vkbd.c			optional vkbd
 dev/vr/if_vr.c			optional vr pci
 dev/vte/if_vte.c		optional vte pci
 dev/vx/if_vx.c			optional vx
 dev/vx/if_vx_eisa.c		optional vx eisa
 dev/vx/if_vx_pci.c		optional vx pci
 dev/vxge/vxge.c				optional vxge
 dev/vxge/vxgehal/vxgehal-ifmsg.c	optional vxge
 dev/vxge/vxgehal/vxgehal-mrpcim.c	optional vxge
 dev/vxge/vxgehal/vxge-queue.c		optional vxge
 dev/vxge/vxgehal/vxgehal-ring.c		optional vxge
 dev/vxge/vxgehal/vxgehal-swapper.c	optional vxge
 dev/vxge/vxgehal/vxgehal-mgmt.c		optional vxge
 dev/vxge/vxgehal/vxgehal-srpcim.c	optional vxge
 dev/vxge/vxgehal/vxgehal-config.c	optional vxge
 dev/vxge/vxgehal/vxgehal-blockpool.c	optional vxge
 dev/vxge/vxgehal/vxgehal-doorbells.c	optional vxge
 dev/vxge/vxgehal/vxgehal-mgmtaux.c	optional vxge
 dev/vxge/vxgehal/vxgehal-device.c	optional vxge
 dev/vxge/vxgehal/vxgehal-mm.c		optional vxge
 dev/vxge/vxgehal/vxgehal-driver.c	optional vxge
 dev/vxge/vxgehal/vxgehal-virtualpath.c	optional vxge
 dev/vxge/vxgehal/vxgehal-channel.c	optional vxge
 dev/vxge/vxgehal/vxgehal-fifo.c		optional vxge
 dev/watchdog/watchdog.c		standard
 dev/wb/if_wb.c			optional wb pci
 dev/wds/wd7000.c		optional wds isa
 dev/wi/if_wi.c			optional wi
 dev/wi/if_wi_pccard.c		optional wi pccard
 dev/wi/if_wi_pci.c		optional wi pci
 dev/wl/if_wl.c			optional wl isa
 dev/wpi/if_wpi.c		optional wpi pci
 wpifw.c			optional wpifw					\
 	compile-with	"${AWK} -f $S/tools/fw_stub.awk wpi.fw:wpifw:153229 -mwpi -c${.TARGET}" \
 	no-implicit-rule before-depend local				\
 	clean		"wpifw.c"
 wpifw.fwo			optional wpifw				\
 	dependency	"wpi.fw"					\
 	compile-with	"${NORMAL_FWO}"					\
 	no-implicit-rule						\
 	clean		"wpifw.fwo"
 wpi.fw			optional wpifw					\
 	dependency	"$S/contrib/dev/wpi/iwlwifi-3945-15.32.2.9.fw.uu"	\
 	compile-with	"${NORMAL_FW}"					\
 	no-obj no-implicit-rule						\
 	clean		"wpi.fw"
 dev/xe/if_xe.c			optional xe
 dev/xe/if_xe_pccard.c		optional xe pccard
 dev/xen/balloon/balloon.c	optional xen | xenhvm
 dev/xen/blkfront/blkfront.c	optional xen | xenhvm
 dev/xen/blkback/blkback.c	optional xen | xenhvm
 dev/xen/console/console.c	optional xen
 dev/xen/console/xencons_ring.c	optional xen
 dev/xen/control/control.c	optional xen | xenhvm
 dev/xen/netback/netback.c	optional xen | xenhvm
 dev/xen/netfront/netfront.c	optional xen | xenhvm
 dev/xen/xenpci/xenpci.c		optional xenpci
-dev/xen/xenpci/evtchn.c         optional xenpci
 dev/xl/if_xl.c			optional xl pci
 dev/xl/xlphy.c			optional xl pci
 fs/deadfs/dead_vnops.c		standard
 fs/devfs/devfs_devs.c		standard
 fs/devfs/devfs_dir.c		standard
 fs/devfs/devfs_rule.c		standard
 fs/devfs/devfs_vfsops.c		standard
 fs/devfs/devfs_vnops.c		standard
 fs/fdescfs/fdesc_vfsops.c	optional fdescfs
 fs/fdescfs/fdesc_vnops.c	optional fdescfs
 fs/fifofs/fifo_vnops.c		standard
 fs/fuse/fuse_device.c		optional fuse
 fs/fuse/fuse_file.c		optional fuse
 fs/fuse/fuse_internal.c		optional fuse
 fs/fuse/fuse_io.c		optional fuse
 fs/fuse/fuse_ipc.c		optional fuse
 fs/fuse/fuse_main.c		optional fuse
 fs/fuse/fuse_node.c		optional fuse
 fs/fuse/fuse_vfsops.c		optional fuse
 fs/fuse/fuse_vnops.c		optional fuse
 fs/msdosfs/msdosfs_conv.c	optional msdosfs
 fs/msdosfs/msdosfs_denode.c	optional msdosfs
 fs/msdosfs/msdosfs_fat.c	optional msdosfs
 fs/msdosfs/msdosfs_fileno.c	optional msdosfs
 fs/msdosfs/msdosfs_iconv.c	optional msdosfs_iconv
 fs/msdosfs/msdosfs_lookup.c	optional msdosfs
 fs/msdosfs/msdosfs_vfsops.c	optional msdosfs
 fs/msdosfs/msdosfs_vnops.c	optional msdosfs
 fs/nandfs/bmap.c		optional nandfs
 fs/nandfs/nandfs_alloc.c	optional nandfs
 fs/nandfs/nandfs_bmap.c		optional nandfs
 fs/nandfs/nandfs_buffer.c	optional nandfs
 fs/nandfs/nandfs_cleaner.c	optional nandfs
 fs/nandfs/nandfs_cpfile.c	optional nandfs
 fs/nandfs/nandfs_dat.c		optional nandfs
 fs/nandfs/nandfs_dir.c		optional nandfs
 fs/nandfs/nandfs_ifile.c	optional nandfs
 fs/nandfs/nandfs_segment.c	optional nandfs
 fs/nandfs/nandfs_subr.c		optional nandfs
 fs/nandfs/nandfs_sufile.c	optional nandfs
 fs/nandfs/nandfs_vfsops.c	optional nandfs
 fs/nandfs/nandfs_vnops.c	optional nandfs
 fs/nfs/nfs_commonkrpc.c		optional nfscl | nfsd
 fs/nfs/nfs_commonsubs.c		optional nfscl | nfsd
 fs/nfs/nfs_commonport.c		optional nfscl | nfsd
 fs/nfs/nfs_commonacl.c		optional nfscl | nfsd
 fs/nfsclient/nfs_clcomsubs.c	optional nfscl
 fs/nfsclient/nfs_clsubs.c	optional nfscl
 fs/nfsclient/nfs_clstate.c	optional nfscl
 fs/nfsclient/nfs_clkrpc.c	optional nfscl
 fs/nfsclient/nfs_clrpcops.c	optional nfscl
 fs/nfsclient/nfs_clvnops.c	optional nfscl
 fs/nfsclient/nfs_clnode.c	optional nfscl
 fs/nfsclient/nfs_clvfsops.c	optional nfscl
 fs/nfsclient/nfs_clport.c	optional nfscl
 fs/nfsclient/nfs_clbio.c	optional nfscl
 fs/nfsclient/nfs_clnfsiod.c	optional nfscl
 fs/nfsserver/nfs_fha_new.c	optional nfsd inet
 fs/nfsserver/nfs_nfsdsocket.c	optional nfsd inet
 fs/nfsserver/nfs_nfsdsubs.c	optional nfsd inet
 fs/nfsserver/nfs_nfsdstate.c	optional nfsd inet
 fs/nfsserver/nfs_nfsdkrpc.c	optional nfsd inet
 fs/nfsserver/nfs_nfsdserv.c	optional nfsd inet
 fs/nfsserver/nfs_nfsdport.c	optional nfsd inet
 fs/nfsserver/nfs_nfsdcache.c	optional nfsd inet
 fs/nullfs/null_subr.c		optional nullfs
 fs/nullfs/null_vfsops.c		optional nullfs
 fs/nullfs/null_vnops.c		optional nullfs
 fs/procfs/procfs.c		optional procfs
 fs/procfs/procfs_ctl.c		optional procfs
 fs/procfs/procfs_dbregs.c	optional procfs
 fs/procfs/procfs_fpregs.c	optional procfs
 fs/procfs/procfs_ioctl.c	optional procfs
 fs/procfs/procfs_map.c		optional procfs
 fs/procfs/procfs_mem.c		optional procfs
 fs/procfs/procfs_note.c		optional procfs
 fs/procfs/procfs_osrel.c	optional procfs
 fs/procfs/procfs_regs.c		optional procfs
 fs/procfs/procfs_rlimit.c	optional procfs
 fs/procfs/procfs_status.c	optional procfs
 fs/procfs/procfs_type.c		optional procfs
 fs/pseudofs/pseudofs.c		optional pseudofs
 fs/pseudofs/pseudofs_fileno.c	optional pseudofs
 fs/pseudofs/pseudofs_vncache.c	optional pseudofs
 fs/pseudofs/pseudofs_vnops.c	optional pseudofs
 fs/smbfs/smbfs_io.c		optional smbfs
 fs/smbfs/smbfs_node.c		optional smbfs
 fs/smbfs/smbfs_smb.c		optional smbfs
 fs/smbfs/smbfs_subr.c		optional smbfs
 fs/smbfs/smbfs_vfsops.c		optional smbfs
 fs/smbfs/smbfs_vnops.c		optional smbfs
 fs/udf/osta.c			optional udf
 fs/udf/udf_iconv.c		optional udf_iconv
 fs/udf/udf_vfsops.c		optional udf
 fs/udf/udf_vnops.c		optional udf
 fs/unionfs/union_subr.c		optional unionfs
 fs/unionfs/union_vfsops.c	optional unionfs
 fs/unionfs/union_vnops.c	optional unionfs
 fs/tmpfs/tmpfs_vnops.c		optional tmpfs
 fs/tmpfs/tmpfs_fifoops.c 	optional tmpfs
 fs/tmpfs/tmpfs_vfsops.c 	optional tmpfs
 fs/tmpfs/tmpfs_subr.c 		optional tmpfs
 gdb/gdb_cons.c			optional gdb
 gdb/gdb_main.c			optional gdb
 gdb/gdb_packet.c		optional gdb
 geom/bde/g_bde.c		optional geom_bde
 geom/bde/g_bde_crypt.c		optional geom_bde
 geom/bde/g_bde_lock.c		optional geom_bde
 geom/bde/g_bde_work.c		optional geom_bde
 geom/cache/g_cache.c		optional geom_cache
 geom/concat/g_concat.c		optional geom_concat
 geom/eli/g_eli.c		optional geom_eli
 geom/eli/g_eli_crypto.c		optional geom_eli
 geom/eli/g_eli_ctl.c		optional geom_eli
 geom/eli/g_eli_integrity.c	optional geom_eli
 geom/eli/g_eli_key.c		optional geom_eli
 geom/eli/g_eli_key_cache.c	optional geom_eli
 geom/eli/g_eli_privacy.c	optional geom_eli
 geom/eli/pkcs5v2.c		optional geom_eli
 geom/gate/g_gate.c		optional geom_gate
 geom/geom_aes.c			optional geom_aes
 geom/geom_bsd.c			optional geom_bsd
 geom/geom_bsd_enc.c		optional geom_bsd
 geom/geom_ccd.c			optional ccd | geom_ccd
 geom/geom_ctl.c			standard
 geom/geom_dev.c			standard
 geom/geom_disk.c		standard
 geom/geom_dump.c		standard
 geom/geom_event.c		standard
 geom/geom_fox.c			optional geom_fox
 geom/geom_flashmap.c		optional fdt cfi | fdt nand
 geom/geom_io.c			standard
 geom/geom_kern.c		standard
 geom/geom_map.c			optional geom_map
 geom/geom_mbr.c			optional geom_mbr
 geom/geom_mbr_enc.c		optional geom_mbr
 geom/geom_pc98.c		optional geom_pc98
 geom/geom_pc98_enc.c		optional geom_pc98
 geom/geom_redboot.c		optional geom_redboot
 geom/geom_slice.c		standard
 geom/geom_subr.c		standard
 geom/geom_sunlabel.c		optional geom_sunlabel
 geom/geom_sunlabel_enc.c	optional geom_sunlabel
 geom/geom_vfs.c			standard
 geom/geom_vol_ffs.c		optional geom_vol
 geom/journal/g_journal.c	optional geom_journal
 geom/journal/g_journal_ufs.c	optional geom_journal
 geom/label/g_label.c		optional geom_label
 geom/label/g_label_ext2fs.c	optional geom_label
 geom/label/g_label_iso9660.c	optional geom_label
 geom/label/g_label_msdosfs.c	optional geom_label
 geom/label/g_label_ntfs.c	optional geom_label
 geom/label/g_label_reiserfs.c	optional geom_label
 geom/label/g_label_ufs.c	optional geom_label
 geom/label/g_label_gpt.c	optional geom_label
 geom/label/g_label_disk_ident.c	optional geom_label
 geom/linux_lvm/g_linux_lvm.c	optional geom_linux_lvm
 geom/mirror/g_mirror.c		optional geom_mirror
 geom/mirror/g_mirror_ctl.c	optional geom_mirror
 geom/mountver/g_mountver.c	optional geom_mountver
 geom/multipath/g_multipath.c	optional geom_multipath
 geom/nop/g_nop.c		optional geom_nop
 geom/part/g_part.c		standard
 geom/part/g_part_if.m		standard
 geom/part/g_part_apm.c		optional geom_part_apm
 geom/part/g_part_bsd.c		optional geom_part_bsd
 geom/part/g_part_ebr.c		optional geom_part_ebr
 geom/part/g_part_gpt.c		optional geom_part_gpt
 geom/part/g_part_ldm.c		optional geom_part_ldm
 geom/part/g_part_mbr.c		optional geom_part_mbr
 geom/part/g_part_pc98.c		optional geom_part_pc98
 geom/part/g_part_vtoc8.c	optional geom_part_vtoc8
 geom/raid/g_raid.c		optional geom_raid
 geom/raid/g_raid_ctl.c		optional geom_raid
 geom/raid/g_raid_md_if.m	optional geom_raid
 geom/raid/g_raid_tr_if.m	optional geom_raid
 geom/raid/md_ddf.c		optional geom_raid
 geom/raid/md_intel.c		optional geom_raid
 geom/raid/md_jmicron.c		optional geom_raid
 geom/raid/md_nvidia.c		optional geom_raid
 geom/raid/md_promise.c		optional geom_raid
 geom/raid/md_sii.c		optional geom_raid
 geom/raid/tr_concat.c		optional geom_raid
 geom/raid/tr_raid0.c		optional geom_raid
 geom/raid/tr_raid1.c		optional geom_raid
 geom/raid/tr_raid1e.c		optional geom_raid
 geom/raid/tr_raid5.c		optional geom_raid
 geom/raid3/g_raid3.c		optional geom_raid3
 geom/raid3/g_raid3_ctl.c	optional geom_raid3
 geom/shsec/g_shsec.c		optional geom_shsec
 geom/stripe/g_stripe.c		optional geom_stripe
 geom/uncompress/g_uncompress.c	optional geom_uncompress
 contrib/xz-embedded/freebsd/xz_malloc.c	\
 	optional xz_embedded | geom_uncompress \
 	compile-with "${NORMAL_C} -I$S/contrib/xz-embedded/freebsd/ -I$S/contrib/xz-embedded/linux/lib/xz/ -I$S/contrib/xz-embedded/linux/include/linux/"
 contrib/xz-embedded/linux/lib/xz/xz_crc32.c \
 	optional xz_embedded | geom_uncompress \
 	compile-with "${NORMAL_C} -I$S/contrib/xz-embedded/freebsd/ -I$S/contrib/xz-embedded/linux/lib/xz/ -I$S/contrib/xz-embedded/linux/include/linux/"
 contrib/xz-embedded/linux/lib/xz/xz_dec_bcj.c \
 	optional xz_embedded | geom_uncompress \
 	compile-with "${NORMAL_C} -I$S/contrib/xz-embedded/freebsd/ -I$S/contrib/xz-embedded/linux/lib/xz/ -I$S/contrib/xz-embedded/linux/include/linux/"
 contrib/xz-embedded/linux/lib/xz/xz_dec_lzma2.c \
 	optional xz_embedded | geom_uncompress \
 	compile-with "${NORMAL_C} -I$S/contrib/xz-embedded/freebsd/ -I$S/contrib/xz-embedded/linux/lib/xz/ -I$S/contrib/xz-embedded/linux/include/linux/"
 contrib/xz-embedded/linux/lib/xz/xz_dec_stream.c \
 	optional xz_embedded | geom_uncompress \
 	compile-with "${NORMAL_C} -I$S/contrib/xz-embedded/freebsd/ -I$S/contrib/xz-embedded/linux/lib/xz/ -I$S/contrib/xz-embedded/linux/include/linux/"
 geom/uzip/g_uzip.c		optional geom_uzip
 geom/virstor/binstream.c	optional geom_virstor
 geom/virstor/g_virstor.c	optional geom_virstor
 geom/virstor/g_virstor_md.c	optional geom_virstor
 geom/zero/g_zero.c		optional geom_zero
 fs/ext2fs/ext2_alloc.c		optional ext2fs
 fs/ext2fs/ext2_balloc.c		optional ext2fs
 fs/ext2fs/ext2_bmap.c		optional ext2fs
 fs/ext2fs/ext2_extents.c	optional ext2fs
 fs/ext2fs/ext2_inode.c		optional ext2fs
 fs/ext2fs/ext2_inode_cnv.c	optional ext2fs
 fs/ext2fs/ext2_hash.c		optional ext2fs
 fs/ext2fs/ext2_htree.c		optional ext2fs
 fs/ext2fs/ext2_lookup.c		optional ext2fs
 fs/ext2fs/ext2_subr.c		optional ext2fs
 fs/ext2fs/ext2_vfsops.c		optional ext2fs
 fs/ext2fs/ext2_vnops.c		optional ext2fs
 gnu/fs/reiserfs/reiserfs_hashes.c	optional reiserfs \
 	warning "kernel contains GPL contaminated ReiserFS filesystem"
 gnu/fs/reiserfs/reiserfs_inode.c	optional reiserfs
 gnu/fs/reiserfs/reiserfs_item_ops.c	optional reiserfs
 gnu/fs/reiserfs/reiserfs_namei.c	optional reiserfs
 gnu/fs/reiserfs/reiserfs_prints.c	optional reiserfs
 gnu/fs/reiserfs/reiserfs_stree.c	optional reiserfs
 gnu/fs/reiserfs/reiserfs_vfsops.c	optional reiserfs
 gnu/fs/reiserfs/reiserfs_vnops.c	optional reiserfs
 #
 isa/isa_if.m			standard
 isa/isa_common.c		optional isa
 isa/isahint.c			optional isa
 isa/pnp.c			optional isa isapnp
 isa/pnpparse.c			optional isa isapnp
 fs/cd9660/cd9660_bmap.c	optional cd9660
 fs/cd9660/cd9660_lookup.c	optional cd9660
 fs/cd9660/cd9660_node.c	optional cd9660
 fs/cd9660/cd9660_rrip.c	optional cd9660
 fs/cd9660/cd9660_util.c	optional cd9660
 fs/cd9660/cd9660_vfsops.c	optional cd9660
 fs/cd9660/cd9660_vnops.c	optional cd9660
 fs/cd9660/cd9660_iconv.c	optional cd9660_iconv
 kern/bus_if.m			standard
 kern/clock_if.m			standard
 kern/cpufreq_if.m		standard
 kern/device_if.m		standard
 kern/imgact_elf.c		standard
 kern/imgact_elf32.c		optional compat_freebsd32
 kern/imgact_shell.c		standard
 kern/inflate.c			optional gzip
 kern/init_main.c		standard
 kern/init_sysent.c		standard
 kern/ksched.c			optional _kposix_priority_scheduling
 kern/kern_acct.c		standard
 kern/kern_alq.c			optional alq
 kern/kern_clock.c		standard
 kern/kern_condvar.c		standard
 kern/kern_conf.c		standard
 kern/kern_cons.c		standard
 kern/kern_cpu.c			standard
 kern/kern_cpuset.c		standard
 kern/kern_context.c		standard
 kern/kern_descrip.c		standard
 kern/kern_dtrace.c		optional kdtrace_hooks
 kern/kern_environment.c		standard
 kern/kern_et.c			standard
 kern/kern_event.c		standard
 kern/kern_exec.c		standard
 kern/kern_exit.c		standard
 kern/kern_fail.c		standard
 kern/kern_ffclock.c		standard
 kern/kern_fork.c		standard
 kern/kern_gzio.c		optional gzio
 kern/kern_hhook.c		standard
 kern/kern_idle.c		standard
 kern/kern_intr.c		standard
 kern/kern_jail.c		standard
 kern/kern_khelp.c		standard
 kern/kern_kthread.c		standard
 kern/kern_ktr.c			optional ktr
 kern/kern_ktrace.c		standard
 kern/kern_linker.c		standard
 kern/kern_lock.c		standard
 kern/kern_lockf.c		standard
 kern/kern_lockstat.c		optional kdtrace_hooks
 kern/kern_loginclass.c		standard
 kern/kern_malloc.c		standard
 kern/kern_mbuf.c		standard
 kern/kern_mib.c			standard
 kern/kern_module.c		standard
 kern/kern_mtxpool.c		standard
 kern/kern_mutex.c		standard
 kern/kern_ntptime.c		standard
 kern/kern_osd.c			standard
 kern/kern_physio.c		standard
 kern/kern_pmc.c			standard
 kern/kern_poll.c		optional device_polling
 kern/kern_priv.c		standard
 kern/kern_proc.c		standard
 kern/kern_prot.c		standard
 kern/kern_racct.c		standard
 kern/kern_rangelock.c		standard
 kern/kern_rctl.c		standard
 kern/kern_resource.c		standard
 kern/kern_rmlock.c		standard
 kern/kern_rwlock.c		standard
 kern/kern_sdt.c			optional kdtrace_hooks
 kern/kern_sema.c		standard
 kern/kern_sharedpage.c		standard
 kern/kern_shutdown.c		standard
 kern/kern_sig.c			standard
 kern/kern_switch.c		standard
 kern/kern_sx.c			standard
 kern/kern_synch.c		standard
 kern/kern_syscalls.c		standard
 kern/kern_sysctl.c		standard
 kern/kern_tc.c			standard
 kern/kern_thr.c			standard
 kern/kern_thread.c		standard
 kern/kern_time.c		standard
 kern/kern_timeout.c		standard
 kern/kern_umtx.c		standard
 kern/kern_uuid.c		standard
 kern/kern_xxx.c			standard
 kern/link_elf.c			standard
 kern/linker_if.m		standard
 kern/md4c.c			optional netsmb
 kern/md5c.c			standard
 kern/p1003_1b.c			standard
 kern/posix4_mib.c		standard
 kern/sched_4bsd.c		optional sched_4bsd
 kern/sched_ule.c		optional sched_ule
 kern/serdev_if.m		standard
 kern/stack_protector.c		standard \
 	compile-with "${NORMAL_C:N-fstack-protector*}"
 kern/subr_acl_nfs4.c		optional ufs_acl | zfs
 kern/subr_acl_posix1e.c		optional ufs_acl
 kern/subr_autoconf.c		standard
 kern/subr_blist.c		standard
 kern/subr_bus.c			standard
 kern/subr_bus_dma.c		standard
 kern/subr_bufring.c		standard
 kern/subr_clock.c		standard
 kern/subr_counter.c		standard
 kern/subr_devstat.c		standard
 kern/subr_disk.c		standard
 kern/subr_eventhandler.c	standard
 kern/subr_fattime.c		standard
 kern/subr_firmware.c		optional firmware
 kern/subr_hash.c		standard
 kern/subr_hints.c		standard
 kern/subr_kdb.c			standard
 kern/subr_kobj.c		standard
 kern/subr_lock.c		standard
 kern/subr_log.c			standard
 kern/subr_mbpool.c		optional libmbpool
 kern/subr_mchain.c		optional libmchain
 kern/subr_module.c		standard
 kern/subr_msgbuf.c		standard
 kern/subr_param.c		standard
 kern/subr_pcpu.c		standard
 kern/subr_pctrie.c		standard
 kern/subr_power.c		standard
 kern/subr_prf.c			standard
 kern/subr_prof.c		standard
 kern/subr_rman.c		standard
 kern/subr_rtc.c			standard
 kern/subr_sbuf.c		standard
 kern/subr_scanf.c		standard
 kern/subr_sglist.c		standard
 kern/subr_sleepqueue.c		standard
 kern/subr_smp.c			standard
 kern/subr_stack.c		optional ddb | stack | ktr
 kern/subr_taskqueue.c		standard
 kern/subr_trap.c		standard
 kern/subr_turnstile.c		standard
 kern/subr_uio.c			standard
 kern/subr_unit.c		standard
 kern/subr_vmem.c		standard
 kern/subr_witness.c		optional witness
 kern/sys_capability.c		standard
 kern/sys_generic.c		standard
 kern/sys_pipe.c			standard
 kern/sys_procdesc.c		standard
 kern/sys_process.c		standard
 kern/sys_socket.c		standard
 kern/syscalls.c			standard
 kern/sysv_ipc.c			standard
 kern/sysv_msg.c			optional sysvmsg
 kern/sysv_sem.c			optional sysvsem
 kern/sysv_shm.c			optional sysvshm
 kern/tty.c			standard
 kern/tty_compat.c		optional compat_43tty
 kern/tty_info.c			standard
 kern/tty_inq.c			standard
 kern/tty_outq.c			standard
 kern/tty_pts.c			standard
 kern/tty_tty.c			standard
 kern/tty_ttydisc.c		standard
 kern/uipc_accf.c		optional inet
 kern/uipc_cow.c			optional socket_send_cow
 kern/uipc_debug.c		optional ddb
 kern/uipc_domain.c		standard
 kern/uipc_mbuf.c		standard
 kern/uipc_mbuf2.c		standard
 kern/uipc_mqueue.c		optional p1003_1b_mqueue
 kern/uipc_sem.c			optional p1003_1b_semaphores
 kern/uipc_shm.c			standard
 kern/uipc_sockbuf.c		standard
 kern/uipc_socket.c		standard
 kern/uipc_syscalls.c		standard
 kern/uipc_usrreq.c		standard
 kern/vfs_acl.c			standard
 kern/vfs_aio.c			optional vfs_aio
 kern/vfs_bio.c			standard
 kern/vfs_cache.c		standard
 kern/vfs_cluster.c		standard
 kern/vfs_default.c		standard
 kern/vfs_export.c		standard
 kern/vfs_extattr.c		standard
 kern/vfs_hash.c			standard
 kern/vfs_init.c			standard
 kern/vfs_lookup.c		standard
 kern/vfs_mount.c		standard
 kern/vfs_mountroot.c		standard
 kern/vfs_subr.c			standard
 kern/vfs_syscalls.c		standard
 kern/vfs_vnops.c		standard
 #
 # Kernel GSS-API
 #
 gssd.h				optional kgssapi			\
 	dependency		"$S/kgssapi/gssd.x"			\
 	compile-with		"RPCGEN_CPP='${CPP}' rpcgen -hM $S/kgssapi/gssd.x | grep -v pthread.h > gssd.h" \
 	no-obj no-implicit-rule before-depend local			\
 	clean			"gssd.h"
 gssd_xdr.c			optional kgssapi			\
 	dependency		"$S/kgssapi/gssd.x gssd.h"		\
 	compile-with		"RPCGEN_CPP='${CPP}' rpcgen -c $S/kgssapi/gssd.x -o gssd_xdr.c" \
 	no-implicit-rule before-depend local				\
 	clean			"gssd_xdr.c"
 gssd_clnt.c			optional kgssapi			\
 	dependency		"$S/kgssapi/gssd.x gssd.h"		\
 	compile-with		"RPCGEN_CPP='${CPP}' rpcgen -lM $S/kgssapi/gssd.x | grep -v string.h > gssd_clnt.c" \
 	no-implicit-rule before-depend local				\
 	clean			"gssd_clnt.c"
 kgssapi/gss_accept_sec_context.c optional kgssapi
 kgssapi/gss_add_oid_set_member.c optional kgssapi
 kgssapi/gss_acquire_cred.c	optional kgssapi
 kgssapi/gss_canonicalize_name.c	optional kgssapi
 kgssapi/gss_create_empty_oid_set.c optional kgssapi
 kgssapi/gss_delete_sec_context.c optional kgssapi
 kgssapi/gss_display_status.c	optional kgssapi
 kgssapi/gss_export_name.c	optional kgssapi
 kgssapi/gss_get_mic.c		optional kgssapi
 kgssapi/gss_init_sec_context.c	optional kgssapi
 kgssapi/gss_impl.c		optional kgssapi
 kgssapi/gss_import_name.c	optional kgssapi
 kgssapi/gss_names.c		optional kgssapi
 kgssapi/gss_pname_to_uid.c	optional kgssapi
 kgssapi/gss_release_buffer.c	optional kgssapi
 kgssapi/gss_release_cred.c	optional kgssapi
 kgssapi/gss_release_name.c	optional kgssapi
 kgssapi/gss_release_oid_set.c	optional kgssapi
 kgssapi/gss_set_cred_option.c	optional kgssapi
 kgssapi/gss_test_oid_set_member.c optional kgssapi
 kgssapi/gss_unwrap.c		optional kgssapi
 kgssapi/gss_verify_mic.c	optional kgssapi
 kgssapi/gss_wrap.c		optional kgssapi
 kgssapi/gss_wrap_size_limit.c	optional kgssapi
 kgssapi/gssd_prot.c		optional kgssapi
 kgssapi/krb5/krb5_mech.c	optional kgssapi
 kgssapi/krb5/kcrypto.c		optional kgssapi
 kgssapi/krb5/kcrypto_aes.c	optional kgssapi
 kgssapi/krb5/kcrypto_arcfour.c	optional kgssapi
 kgssapi/krb5/kcrypto_des.c	optional kgssapi
 kgssapi/krb5/kcrypto_des3.c	optional kgssapi
 kgssapi/kgss_if.m		optional kgssapi
 kgssapi/gsstest.c		optional kgssapi_debug
 # These files in libkern/ are those needed by all architectures.  Some
 # of the files in libkern/ are only needed on some architectures, e.g.,
 # libkern/divdi3.c is needed by i386 but not alpha.  Also, some of these
 # routines may be optimized for a particular platform.  In either case,
 # the file should be moved to conf/files.<arch> from here.
 #
 libkern/arc4random.c		standard
 libkern/bcd.c			standard
 libkern/bsearch.c		standard
 libkern/crc32.c			standard
 libkern/flsll.c                 standard
 libkern/fnmatch.c		standard
 libkern/iconv.c			optional libiconv
 libkern/iconv_converter_if.m	optional libiconv
 libkern/iconv_ucs.c		optional libiconv
 libkern/iconv_xlat.c		optional libiconv
 libkern/iconv_xlat16.c		optional libiconv
 libkern/inet_aton.c		standard
 libkern/inet_ntoa.c		standard
 libkern/inet_ntop.c		standard
 libkern/inet_pton.c		standard
 libkern/jenkins_hash.c		standard
 libkern/mcount.c		optional profiling-routine
 libkern/memcchr.c		standard
 libkern/memchr.c		optional fdt
 libkern/memcmp.c		standard
 libkern/qsort.c			standard
 libkern/qsort_r.c		standard
 libkern/random.c		standard
 libkern/scanc.c			standard
 libkern/strcasecmp.c		standard
 libkern/strcat.c		standard
 libkern/strchr.c		standard
 libkern/strcmp.c		standard
 libkern/strcpy.c		standard
 libkern/strcspn.c		standard
 libkern/strdup.c		standard
 libkern/strlcat.c		standard
 libkern/strlcpy.c		standard
 libkern/strlen.c		standard
 libkern/strncmp.c		standard
 libkern/strncpy.c		standard
 libkern/strnlen.c		standard
 libkern/strrchr.c		standard
 libkern/strsep.c		standard
 libkern/strspn.c		standard
 libkern/strstr.c		standard
 libkern/strtol.c		standard
 libkern/strtoq.c		standard
 libkern/strtoul.c		standard
 libkern/strtouq.c		standard
 libkern/strvalid.c		standard
 net/bpf.c			standard
 net/bpf_buffer.c		optional bpf
 net/bpf_jitter.c		optional bpf_jitter
 net/bpf_filter.c		optional bpf | netgraph_bpf
 net/bpf_zerocopy.c		optional bpf
 net/bridgestp.c			optional bridge | if_bridge
 net/flowtable.c			optional flowtable inet | flowtable inet6
 net/ieee8023ad_lacp.c		optional lagg
 net/if.c			standard
 net/if_arcsubr.c		optional arcnet
 net/if_atmsubr.c		optional atm
 net/if_bridge.c			optional bridge inet | if_bridge inet
 net/if_clone.c			standard
 net/if_dead.c			standard
 net/if_debug.c			optional ddb
 net/if_disc.c			optional disc
 net/if_edsc.c			optional edsc
 net/if_ef.c			optional ef
 net/if_enc.c			optional enc ipsec inet | enc ipsec inet6
 net/if_epair.c			optional epair
 net/if_ethersubr.c		optional ether
 net/if_faith.c			optional faith
 net/if_fddisubr.c		optional fddi
 net/if_fwsubr.c			optional fwip
 net/if_gif.c			optional gif | netgraph_gif
 net/if_gre.c			optional gre inet
 net/if_iso88025subr.c		optional token
 net/if_lagg.c			optional lagg
 net/if_loop.c			optional loop
 net/if_llatbl.c			standard
 net/if_media.c			standard
 net/if_mib.c			standard
 net/if_spppfr.c			optional sppp | netgraph_sppp
 net/if_spppsubr.c		optional sppp | netgraph_sppp
 net/if_stf.c			optional stf inet inet6
 net/if_tun.c			optional tun
 net/if_tap.c			optional tap
 net/if_vlan.c			optional vlan
 net/mppcc.c			optional netgraph_mppc_compression
 net/mppcd.c			optional netgraph_mppc_compression
 net/netisr.c			standard
 net/pfil.c			optional ether | inet
 net/radix.c			standard
 net/radix_mpath.c		standard
 net/raw_cb.c			standard
 net/raw_usrreq.c		standard
 net/route.c			standard
 net/rtsock.c			standard
 net/slcompress.c		optional netgraph_vjc | sppp | \
 					 netgraph_sppp
 net/vnet.c			optional vimage
 net/zlib.c			optional crypto | geom_uzip | ipsec | \
 					 mxge | netgraph_deflate | \
 					 ddb_ctf | gzio | geom_uncompress
 net80211/ieee80211.c		optional wlan
 net80211/ieee80211_acl.c	optional wlan wlan_acl
 net80211/ieee80211_action.c	optional wlan
 net80211/ieee80211_ageq.c	optional wlan
 net80211/ieee80211_adhoc.c	optional wlan
 net80211/ieee80211_ageq.c	optional wlan
 net80211/ieee80211_amrr.c	optional wlan | wlan_amrr
 net80211/ieee80211_crypto.c	optional wlan
 net80211/ieee80211_crypto_ccmp.c optional wlan wlan_ccmp
 net80211/ieee80211_crypto_none.c optional wlan
 net80211/ieee80211_crypto_tkip.c optional wlan wlan_tkip
 net80211/ieee80211_crypto_wep.c	optional wlan wlan_wep
 net80211/ieee80211_ddb.c	optional wlan ddb
 net80211/ieee80211_dfs.c	optional wlan
 net80211/ieee80211_freebsd.c	optional wlan
 net80211/ieee80211_hostap.c	optional wlan
 net80211/ieee80211_ht.c		optional wlan
 net80211/ieee80211_hwmp.c	optional wlan ieee80211_support_mesh
 net80211/ieee80211_input.c	optional wlan
 net80211/ieee80211_ioctl.c	optional wlan
 net80211/ieee80211_mesh.c	optional wlan ieee80211_support_mesh
 net80211/ieee80211_monitor.c	optional wlan
 net80211/ieee80211_node.c	optional wlan
 net80211/ieee80211_output.c	optional wlan
 net80211/ieee80211_phy.c	optional wlan
 net80211/ieee80211_power.c	optional wlan
 net80211/ieee80211_proto.c	optional wlan
 net80211/ieee80211_radiotap.c	optional wlan
 net80211/ieee80211_ratectl.c	optional wlan
 net80211/ieee80211_ratectl_none.c optional wlan
 net80211/ieee80211_regdomain.c	optional wlan
 net80211/ieee80211_rssadapt.c	optional wlan wlan_rssadapt
 net80211/ieee80211_scan.c	optional wlan
 net80211/ieee80211_scan_sta.c	optional wlan
 net80211/ieee80211_sta.c	optional wlan
 net80211/ieee80211_superg.c	optional wlan ieee80211_support_superg
 net80211/ieee80211_tdma.c	optional wlan ieee80211_support_tdma
 net80211/ieee80211_wds.c	optional wlan
 net80211/ieee80211_xauth.c	optional wlan wlan_xauth
 net80211/ieee80211_alq.c	optional wlan ieee80211_alq
 netatalk/aarp.c			optional netatalk
 netatalk/at_control.c		optional netatalk
 netatalk/at_proto.c		optional netatalk
 netatalk/at_rmx.c		optional netatalk
 netatalk/ddp_input.c		optional netatalk
 netatalk/ddp_output.c		optional netatalk
 netatalk/ddp_pcb.c		optional netatalk
 netatalk/ddp_usrreq.c		optional netatalk
 netgraph/atm/ccatm/ng_ccatm.c	optional ngatm_ccatm \
 	compile-with "${NORMAL_C} -I$S/contrib/ngatm"
 netgraph/atm/ng_atm.c		optional ngatm_atm
 netgraph/atm/ngatmbase.c	optional ngatm_atmbase \
 	compile-with "${NORMAL_C} -I$S/contrib/ngatm"
 netgraph/atm/sscfu/ng_sscfu.c	optional ngatm_sscfu \
 	compile-with "${NORMAL_C} -I$S/contrib/ngatm"
 netgraph/atm/sscop/ng_sscop.c optional ngatm_sscop \
 	compile-with "${NORMAL_C} -I$S/contrib/ngatm"
 netgraph/atm/uni/ng_uni.c	optional ngatm_uni \
 	compile-with "${NORMAL_C} -I$S/contrib/ngatm"
 netgraph/bluetooth/common/ng_bluetooth.c optional netgraph_bluetooth
 netgraph/bluetooth/drivers/bt3c/ng_bt3c_pccard.c optional netgraph_bluetooth_bt3c
 netgraph/bluetooth/drivers/h4/ng_h4.c optional netgraph_bluetooth_h4
 netgraph/bluetooth/drivers/ubt/ng_ubt.c optional netgraph_bluetooth_ubt usb
 netgraph/bluetooth/drivers/ubtbcmfw/ubtbcmfw.c optional netgraph_bluetooth_ubtbcmfw usb
 netgraph/bluetooth/hci/ng_hci_cmds.c optional netgraph_bluetooth_hci
 netgraph/bluetooth/hci/ng_hci_evnt.c optional netgraph_bluetooth_hci
 netgraph/bluetooth/hci/ng_hci_main.c optional netgraph_bluetooth_hci
 netgraph/bluetooth/hci/ng_hci_misc.c optional netgraph_bluetooth_hci
 netgraph/bluetooth/hci/ng_hci_ulpi.c optional netgraph_bluetooth_hci
 netgraph/bluetooth/l2cap/ng_l2cap_cmds.c optional netgraph_bluetooth_l2cap
 netgraph/bluetooth/l2cap/ng_l2cap_evnt.c optional netgraph_bluetooth_l2cap
 netgraph/bluetooth/l2cap/ng_l2cap_llpi.c optional netgraph_bluetooth_l2cap
 netgraph/bluetooth/l2cap/ng_l2cap_main.c optional netgraph_bluetooth_l2cap
 netgraph/bluetooth/l2cap/ng_l2cap_misc.c optional netgraph_bluetooth_l2cap
 netgraph/bluetooth/l2cap/ng_l2cap_ulpi.c optional netgraph_bluetooth_l2cap
 netgraph/bluetooth/socket/ng_btsocket.c optional netgraph_bluetooth_socket
 netgraph/bluetooth/socket/ng_btsocket_hci_raw.c	optional netgraph_bluetooth_socket
 netgraph/bluetooth/socket/ng_btsocket_l2cap.c optional netgraph_bluetooth_socket
 netgraph/bluetooth/socket/ng_btsocket_l2cap_raw.c optional netgraph_bluetooth_socket
 netgraph/bluetooth/socket/ng_btsocket_rfcomm.c optional netgraph_bluetooth_socket
 netgraph/bluetooth/socket/ng_btsocket_sco.c optional netgraph_bluetooth_socket
 netgraph/netflow/netflow.c	optional netgraph_netflow
 netgraph/netflow/netflow_v9.c	optional netgraph_netflow
 netgraph/netflow/ng_netflow.c	optional netgraph_netflow
 netgraph/ng_UI.c		optional netgraph_UI
 netgraph/ng_async.c		optional netgraph_async
 netgraph/ng_atmllc.c		optional netgraph_atmllc
 netgraph/ng_base.c		optional netgraph
 netgraph/ng_bpf.c		optional netgraph_bpf
 netgraph/ng_bridge.c		optional netgraph_bridge
 netgraph/ng_car.c		optional netgraph_car
 netgraph/ng_cisco.c		optional netgraph_cisco
 netgraph/ng_deflate.c		optional netgraph_deflate
 netgraph/ng_device.c		optional netgraph_device
 netgraph/ng_echo.c		optional netgraph_echo
 netgraph/ng_eiface.c		optional netgraph_eiface
 netgraph/ng_ether.c		optional netgraph_ether
 netgraph/ng_ether_echo.c	optional netgraph_ether_echo
 netgraph/ng_fec.c		optional netgraph_fec
 netgraph/ng_frame_relay.c	optional netgraph_frame_relay
 netgraph/ng_gif.c		optional netgraph_gif
 netgraph/ng_gif_demux.c		optional netgraph_gif_demux
 netgraph/ng_hole.c		optional netgraph_hole
 netgraph/ng_iface.c		optional netgraph_iface
 netgraph/ng_ip_input.c		optional netgraph_ip_input
 netgraph/ng_ipfw.c		optional netgraph_ipfw inet ipfirewall
 netgraph/ng_ksocket.c		optional netgraph_ksocket
 netgraph/ng_l2tp.c		optional netgraph_l2tp
 netgraph/ng_lmi.c		optional netgraph_lmi
 netgraph/ng_mppc.c		optional netgraph_mppc_compression | \
 					 netgraph_mppc_encryption
 netgraph/ng_nat.c		optional netgraph_nat inet libalias
 netgraph/ng_one2many.c		optional netgraph_one2many
 netgraph/ng_parse.c		optional netgraph
 netgraph/ng_patch.c		optional netgraph_patch
 netgraph/ng_pipe.c		optional netgraph_pipe
 netgraph/ng_ppp.c		optional netgraph_ppp
 netgraph/ng_pppoe.c		optional netgraph_pppoe
 netgraph/ng_pptpgre.c		optional netgraph_pptpgre
 netgraph/ng_pred1.c		optional netgraph_pred1
 netgraph/ng_rfc1490.c		optional netgraph_rfc1490
 netgraph/ng_socket.c		optional netgraph_socket
 netgraph/ng_split.c		optional netgraph_split
 netgraph/ng_sppp.c		optional netgraph_sppp
 netgraph/ng_tag.c		optional netgraph_tag
 netgraph/ng_tcpmss.c		optional netgraph_tcpmss
 netgraph/ng_tee.c		optional netgraph_tee
 netgraph/ng_tty.c		optional netgraph_tty
 netgraph/ng_vjc.c		optional netgraph_vjc
 netgraph/ng_vlan.c		optional netgraph_vlan
 netinet/accf_data.c		optional accept_filter_data inet
 netinet/accf_dns.c		optional accept_filter_dns inet
 netinet/accf_http.c		optional accept_filter_http inet
 netinet/if_atm.c		optional atm
 netinet/if_ether.c		optional inet ether
 netinet/igmp.c			optional inet
 netinet/in.c			optional inet
 netinet/in_debug.c		optional inet ddb
 netinet/in_kdtrace.c		optional inet | inet6
 netinet/ip_carp.c		optional inet carp | inet6 carp
 netinet/in_gif.c		optional gif inet | netgraph_gif inet
 netinet/ip_gre.c		optional gre inet
 netinet/ip_id.c			optional inet
 netinet/in_mcast.c		optional inet
 netinet/in_pcb.c		optional inet | inet6
 netinet/in_pcbgroup.c		optional inet pcbgroup | inet6 pcbgroup
 netinet/in_proto.c		optional inet | inet6
 netinet/in_rmx.c		optional inet
 netinet/ip_divert.c		optional inet ipdivert ipfirewall
 netinet/ip_ecn.c		optional inet | inet6
 netinet/ip_encap.c		optional inet | inet6
 netinet/ip_fastfwd.c		optional inet
 netinet/ip_icmp.c		optional inet | inet6
 netinet/ip_input.c		optional inet
 netinet/ip_ipsec.c		optional inet ipsec
 netinet/ip_mroute.c		optional mrouting inet
 netinet/ip_options.c		optional inet
 netinet/ip_output.c		optional inet
 netinet/raw_ip.c		optional inet | inet6
 netinet/cc/cc.c			optional inet | inet6
 netinet/cc/cc_newreno.c		optional inet | inet6
 netinet/sctp_asconf.c		optional inet sctp | inet6 sctp
 netinet/sctp_auth.c		optional inet sctp | inet6 sctp
 netinet/sctp_bsd_addr.c		optional inet sctp | inet6 sctp
 netinet/sctp_cc_functions.c	optional inet sctp | inet6 sctp
 netinet/sctp_crc32.c		optional inet sctp | inet6 sctp
 netinet/sctp_indata.c		optional inet sctp | inet6 sctp
 netinet/sctp_input.c		optional inet sctp | inet6 sctp
 netinet/sctp_output.c		optional inet sctp | inet6 sctp
 netinet/sctp_pcb.c		optional inet sctp | inet6 sctp
 netinet/sctp_peeloff.c		optional inet sctp | inet6 sctp
 netinet/sctp_ss_functions.c	optional inet sctp | inet6 sctp
 netinet/sctp_sysctl.c		optional inet sctp | inet6 sctp
 netinet/sctp_timer.c		optional inet sctp | inet6 sctp
 netinet/sctp_usrreq.c		optional inet sctp | inet6 sctp
 netinet/sctputil.c		optional inet sctp | inet6 sctp
 netinet/tcp_debug.c		optional tcpdebug
 netinet/tcp_hostcache.c		optional inet | inet6
 netinet/tcp_input.c		optional inet | inet6
 netinet/tcp_lro.c		optional inet | inet6
 netinet/tcp_output.c		optional inet | inet6
 netinet/tcp_offload.c		optional tcp_offload inet | tcp_offload inet6
 netinet/tcp_reass.c		optional inet | inet6
 netinet/tcp_sack.c		optional inet | inet6
 netinet/tcp_subr.c		optional inet | inet6
 netinet/tcp_syncache.c		optional inet | inet6
 netinet/tcp_timer.c		optional inet | inet6
 netinet/tcp_timewait.c		optional inet | inet6
 netinet/tcp_usrreq.c		optional inet | inet6
 netinet/udp_usrreq.c		optional inet | inet6
 netinet/libalias/alias.c	optional libalias inet | netgraph_nat inet
 netinet/libalias/alias_db.c	optional libalias inet | netgraph_nat inet
 netinet/libalias/alias_mod.c	optional libalias | netgraph_nat
 netinet/libalias/alias_proxy.c	optional libalias inet | netgraph_nat inet
 netinet/libalias/alias_util.c	optional libalias inet | netgraph_nat inet
 netinet/libalias/alias_sctp.c	optional libalias inet | netgraph_nat inet
 netinet6/dest6.c		optional inet6
 netinet6/frag6.c		optional inet6
 netinet6/icmp6.c		optional inet6
 netinet6/in6.c			optional inet6
 netinet6/in6_cksum.c		optional inet6
 netinet6/in6_gif.c		optional gif inet6 | netgraph_gif inet6
 netinet6/in6_ifattach.c		optional inet6
 netinet6/in6_mcast.c		optional inet6
 netinet6/in6_pcb.c		optional inet6
 netinet6/in6_pcbgroup.c		optional inet6 pcbgroup
 netinet6/in6_proto.c		optional inet6
 netinet6/in6_rmx.c		optional inet6
 netinet6/in6_src.c		optional inet6
 netinet6/ip6_forward.c		optional inet6
 netinet6/ip6_id.c		optional inet6
 netinet6/ip6_input.c		optional inet6
 netinet6/ip6_mroute.c		optional mrouting inet6
 netinet6/ip6_output.c		optional inet6
 netinet6/ip6_ipsec.c		optional inet6 ipsec
 netinet6/mld6.c			optional inet6
 netinet6/nd6.c			optional inet6
 netinet6/nd6_nbr.c		optional inet6
 netinet6/nd6_rtr.c		optional inet6
 netinet6/raw_ip6.c		optional inet6
 netinet6/route6.c		optional inet6
 netinet6/scope6.c		optional inet6
 netinet6/sctp6_usrreq.c		optional inet6 sctp
 netinet6/udp6_usrreq.c		optional inet6
 netipsec/ipsec.c		optional ipsec inet | ipsec inet6
 netipsec/ipsec_input.c		optional ipsec inet | ipsec inet6
 netipsec/ipsec_mbuf.c		optional ipsec inet | ipsec inet6
 netipsec/ipsec_output.c		optional ipsec inet | ipsec inet6
 netipsec/key.c			optional ipsec inet | ipsec inet6
 netipsec/key_debug.c		optional ipsec inet | ipsec inet6
 netipsec/keysock.c		optional ipsec inet | ipsec inet6
 netipsec/xform_ah.c		optional ipsec inet | ipsec inet6
 netipsec/xform_esp.c		optional ipsec inet | ipsec inet6
 netipsec/xform_ipcomp.c		optional ipsec inet | ipsec inet6
 netipsec/xform_ipip.c		optional ipsec inet | ipsec inet6
 netipsec/xform_tcp.c		optional ipsec inet tcp_signature | \
 					 ipsec inet6 tcp_signature
 netipx/ipx.c			optional ipx
 netipx/ipx_cksum.c		optional ipx
 netipx/ipx_input.c		optional ipx
 netipx/ipx_outputfl.c		optional ipx
 netipx/ipx_pcb.c		optional ipx
 netipx/ipx_proto.c		optional ipx
 netipx/ipx_usrreq.c		optional ipx
 netipx/spx_debug.c		optional ipx
 netipx/spx_reass.c		optional ipx
 netipx/spx_usrreq.c		optional ipx
 netnatm/natm.c			optional natm
 netnatm/natm_pcb.c		optional natm
 netnatm/natm_proto.c		optional natm
 netpfil/ipfw/dn_heap.c		optional inet dummynet
 netpfil/ipfw/dn_sched_fifo.c	optional inet dummynet
 netpfil/ipfw/dn_sched_prio.c	optional inet dummynet
 netpfil/ipfw/dn_sched_qfq.c	optional inet dummynet
 netpfil/ipfw/dn_sched_rr.c	optional inet dummynet
 netpfil/ipfw/dn_sched_wf2q.c	optional inet dummynet
 netpfil/ipfw/ip_dummynet.c	optional inet dummynet
 netpfil/ipfw/ip_dn_io.c		optional inet dummynet
 netpfil/ipfw/ip_dn_glue.c	optional inet dummynet
 netpfil/ipfw/ip_fw2.c		optional inet ipfirewall
 netpfil/ipfw/ip_fw_dynamic.c	optional inet ipfirewall
 netpfil/ipfw/ip_fw_log.c	optional inet ipfirewall
 netpfil/ipfw/ip_fw_pfil.c	optional inet ipfirewall
 netpfil/ipfw/ip_fw_sockopt.c	optional inet ipfirewall
 netpfil/ipfw/ip_fw_table.c	optional inet ipfirewall
 netpfil/ipfw/ip_fw_nat.c	optional inet ipfirewall_nat
 netpfil/pf/if_pflog.c		optional pflog pf inet
 netpfil/pf/if_pfsync.c		optional pfsync pf inet
 netpfil/pf/pf.c			optional pf inet
 netpfil/pf/pf_if.c		optional pf inet
 netpfil/pf/pf_ioctl.c		optional pf inet
 netpfil/pf/pf_lb.c		optional pf inet
 netpfil/pf/pf_norm.c		optional pf inet
 netpfil/pf/pf_osfp.c		optional pf inet
 netpfil/pf/pf_ruleset.c		optional pf inet
 netpfil/pf/pf_table.c		optional pf inet
 netpfil/pf/in4_cksum.c		optional pf inet
 netsmb/smb_conn.c		optional netsmb
 netsmb/smb_crypt.c		optional netsmb
 netsmb/smb_dev.c		optional netsmb
 netsmb/smb_iod.c		optional netsmb
 netsmb/smb_rq.c			optional netsmb
 netsmb/smb_smb.c		optional netsmb
 netsmb/smb_subr.c		optional netsmb
 netsmb/smb_trantcp.c		optional netsmb
 netsmb/smb_usr.c		optional netsmb
 nfs/bootp_subr.c		optional bootp nfsclient | bootp nfscl
 nfs/krpc_subr.c			optional bootp nfsclient | bootp nfscl
 nfs/nfs_common.c		optional nfsclient | nfsserver
 nfs/nfs_diskless.c		optional nfsclient nfs_root | nfscl nfs_root
 nfs/nfs_fha.c			optional nfsserver | nfsd
 nfs/nfs_lock.c			optional nfsclient | nfscl | nfslockd | nfsd
 nfsclient/nfs_bio.c		optional nfsclient
 nfsclient/nfs_node.c		optional nfsclient
 nfsclient/nfs_krpc.c		optional nfsclient
 nfsclient/nfs_subs.c		optional nfsclient
 nfsclient/nfs_nfsiod.c		optional nfsclient
 nfsclient/nfs_vfsops.c		optional nfsclient
 nfsclient/nfs_vnops.c		optional nfsclient
 nfsserver/nfs_fha_old.c		optional nfsserver
 nfsserver/nfs_serv.c		optional nfsserver
 nfsserver/nfs_srvkrpc.c		optional nfsserver
 nfsserver/nfs_srvsubs.c		optional nfsserver
 nfs/nfs_nfssvc.c		optional nfsserver | nfscl | nfsd
 nlm/nlm_advlock.c		optional nfslockd | nfsd
 nlm/nlm_prot_clnt.c		optional nfslockd | nfsd
 nlm/nlm_prot_impl.c		optional nfslockd | nfsd
 nlm/nlm_prot_server.c		optional nfslockd | nfsd
 nlm/nlm_prot_svc.c		optional nfslockd | nfsd
 nlm/nlm_prot_xdr.c		optional nfslockd | nfsd
 nlm/sm_inter_xdr.c		optional nfslockd | nfsd
 
 # OpenFabrics Enterprise Distribution (Infiniband)
 ofed/include/linux/linux_compat.c		optional ofed		\
 	no-depend compile-with "${OFED_C}"
 ofed/include/linux/linux_idr.c			optional ofed		\
 	no-depend compile-with "${OFED_C}"
 ofed/include/linux/linux_radix.c		optional ofed		\
 	no-depend compile-with "${OFED_C}"
 ofed/drivers/infiniband/core/addr.c		optional ofed		\
 	no-depend							\
 	compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/"
 ofed/drivers/infiniband/core/agent.c		optional ofed		\
 	no-depend							\
 	compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/"
 ofed/drivers/infiniband/core/cache.c		optional ofed		\
 	no-depend							\
 	compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/"
 # XXX Mad.c must be ordered before cm.c for sysinit sets to occur in
 # the correct order.
 ofed/drivers/infiniband/core/mad.c		optional ofed		\
 	no-depend							\
 	compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/"
 ofed/drivers/infiniband/core/cm.c		optional ofed		\
 	no-depend							\
 	compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/"
 ofed/drivers/infiniband/core/cma.c		optional ofed		\
 	no-depend							\
 	compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/"
 ofed/drivers/infiniband/core/device.c		optional ofed		\
 	no-depend							\
 	compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/"
 ofed/drivers/infiniband/core/fmr_pool.c		optional ofed		\
 	no-depend							\
 	compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/"
 ofed/drivers/infiniband/core/iwcm.c		optional ofed		\
 	no-depend							\
 	compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/"
 ofed/drivers/infiniband/core/local_sa.c		optional ofed		\
 	no-depend							\
 	compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/"
 ofed/drivers/infiniband/core/mad_rmpp.c		optional ofed		\
 	no-depend							\
 	compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/"
 ofed/drivers/infiniband/core/multicast.c	optional ofed		\
 	no-depend							\
 	compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/"
 ofed/drivers/infiniband/core/notice.c		optional ofed		\
 	no-depend							\
 	compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/"
 ofed/drivers/infiniband/core/packer.c		optional ofed		\
 	no-depend							\
 	compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/"
 ofed/drivers/infiniband/core/sa_query.c		optional ofed		\
 	no-depend							\
 	compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/"
 ofed/drivers/infiniband/core/smi.c		optional ofed		\
 	no-depend							\
 	compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/"
 ofed/drivers/infiniband/core/sysfs.c		optional ofed		\
 	no-depend							\
 	compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/"
 ofed/drivers/infiniband/core/ucm.c		optional ofed		\
 	no-depend							\
 	compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/"
 ofed/drivers/infiniband/core/ucma.c		optional ofed		\
 	no-depend							\
 	compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/"
 ofed/drivers/infiniband/core/ud_header.c	optional ofed		\
 	no-depend							\
 	compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/"
 ofed/drivers/infiniband/core/umem.c		optional ofed		\
 	no-depend							\
 	compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/"
 ofed/drivers/infiniband/core/user_mad.c		optional ofed		\
 	no-depend							\
 	compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/"
 ofed/drivers/infiniband/core/uverbs_cmd.c	optional ofed		\
 	no-depend							\
 	compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/"
 ofed/drivers/infiniband/core/uverbs_main.c	optional ofed		\
 	no-depend							\
 	compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/"
 ofed/drivers/infiniband/core/uverbs_marshall.c	optional ofed		\
 	no-depend							\
 	compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/"
 ofed/drivers/infiniband/core/verbs.c		optional ofed		\
 	no-depend							\
 	compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/"
 
 ofed/drivers/infiniband/ulp/ipoib/ipoib_cm.c	optional ipoib		\
 	no-depend							\
 	compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/ulp/ipoib/"
 #ofed/drivers/infiniband/ulp/ipoib/ipoib_fs.c	optional ipoib		\
 #	no-depend							\
 #	compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/ulp/ipoib/"
 ofed/drivers/infiniband/ulp/ipoib/ipoib_ib.c	optional ipoib		\
 	no-depend							\
 	compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/ulp/ipoib/"
 ofed/drivers/infiniband/ulp/ipoib/ipoib_main.c	optional ipoib		\
 	no-depend							\
 	compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/ulp/ipoib/"
 ofed/drivers/infiniband/ulp/ipoib/ipoib_multicast.c	optional ipoib	\
 	no-depend							\
 	compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/ulp/ipoib/"
 ofed/drivers/infiniband/ulp/ipoib/ipoib_verbs.c	optional ipoib		\
 	no-depend							\
 	compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/ulp/ipoib/"
 #ofed/drivers/infiniband/ulp/ipoib/ipoib_vlan.c	optional ipoib		\
 #	no-depend							\
 #	compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/ulp/ipoib/"
 
 ofed/drivers/infiniband/ulp/sdp/sdp_bcopy.c	optional sdp inet	\
 	no-depend							\
 	compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/ulp/sdp/"
 ofed/drivers/infiniband/ulp/sdp/sdp_main.c	optional sdp inet 	\
 	no-depend							\
 	compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/ulp/sdp/"
 ofed/drivers/infiniband/ulp/sdp/sdp_rx.c	optional sdp inet 	\
 	no-depend							\
 	compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/ulp/sdp/"
 ofed/drivers/infiniband/ulp/sdp/sdp_cma.c	optional sdp inet 	\
 	no-depend							\
 	compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/ulp/sdp/"
 ofed/drivers/infiniband/ulp/sdp/sdp_tx.c	optional sdp inet 	\
 	no-depend							\
 	compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/ulp/sdp/"
 
 ofed/drivers/infiniband/hw/mlx4/ah.c		optional mlx4ib		\
 	no-depend obj-prefix "mlx4ib_"					\
 	compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/infiniband/hw/mlx4/"
 ofed/drivers/infiniband/hw/mlx4/cq.c		optional mlx4ib		\
 	no-depend obj-prefix "mlx4ib_"					\
 	compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/infiniband/hw/mlx4/"
 ofed/drivers/infiniband/hw/mlx4/doorbell.c	optional mlx4ib		\
 	no-depend obj-prefix "mlx4ib_"					\
 	compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/infiniband/hw/mlx4/"
 ofed/drivers/infiniband/hw/mlx4/mad.c		optional mlx4ib		\
 	no-depend obj-prefix "mlx4ib_"					\
 	compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/infiniband/hw/mlx4/"
 ofed/drivers/infiniband/hw/mlx4/main.c		optional mlx4ib		\
 	no-depend obj-prefix "mlx4ib_"					\
 	compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/infiniband/hw/mlx4/"
 ofed/drivers/infiniband/hw/mlx4/mr.c		optional mlx4ib		\
 	no-depend obj-prefix "mlx4ib_"					\
 	compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/infiniband/hw/mlx4/"
 ofed/drivers/infiniband/hw/mlx4/qp.c		optional mlx4ib		\
 	no-depend obj-prefix "mlx4ib_"					\
 	compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/infiniband/hw/mlx4/"
 ofed/drivers/infiniband/hw/mlx4/srq.c		optional mlx4ib		\
 	no-depend obj-prefix "mlx4ib_"					\
 	compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/infiniband/hw/mlx4/"
 ofed/drivers/infiniband/hw/mlx4/wc.c		optional mlx4ib		\
 	no-depend obj-prefix "mlx4ib_"					\
 	compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/infiniband/hw/mlx4/"
 
 ofed/drivers/net/mlx4/alloc.c			optional mlx4ib | mlxen	\
 	no-depend obj-prefix "mlx4_"					\
 	compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/net/mlx4/"
 ofed/drivers/net/mlx4/catas.c			optional mlx4ib | mlxen	\
 	no-depend obj-prefix "mlx4_"					\
 	compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/net/mlx4/"
 ofed/drivers/net/mlx4/cmd.c			optional mlx4ib | mlxen	\
 	no-depend obj-prefix "mlx4_"					\
 	compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/net/mlx4/"
 ofed/drivers/net/mlx4/cq.c			optional mlx4ib | mlxen	\
 	no-depend obj-prefix "mlx4_"					\
 	compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/net/mlx4/"
 ofed/drivers/net/mlx4/eq.c			optional mlx4ib | mlxen	\
 	no-depend obj-prefix "mlx4_"					\
 	compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/net/mlx4/"
 ofed/drivers/net/mlx4/fw.c			optional mlx4ib | mlxen	\
 	no-depend obj-prefix "mlx4_"					\
 	compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/net/mlx4/"
 ofed/drivers/net/mlx4/icm.c			optional mlx4ib | mlxen	\
 	no-depend obj-prefix "mlx4_"					\
 	compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/net/mlx4/"
 ofed/drivers/net/mlx4/intf.c			optional mlx4ib | mlxen	\
 	no-depend obj-prefix "mlx4_"					\
 	compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/net/mlx4/"
 ofed/drivers/net/mlx4/main.c			optional mlx4ib | mlxen	\
 	no-depend obj-prefix "mlx4_"					\
 	compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/net/mlx4/"
 ofed/drivers/net/mlx4/mcg.c			optional mlx4ib | mlxen	\
 	no-depend obj-prefix "mlx4_"					\
 	compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/net/mlx4/"
 ofed/drivers/net/mlx4/mr.c			optional mlx4ib | mlxen	\
 	no-depend obj-prefix "mlx4_"					\
 	compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/net/mlx4/"
 ofed/drivers/net/mlx4/pd.c			optional mlx4ib | mlxen	\
 	no-depend obj-prefix "mlx4_"					\
 	compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/net/mlx4/"
 ofed/drivers/net/mlx4/port.c			optional mlx4ib | mlxen	\
 	no-depend obj-prefix "mlx4_"					\
 	compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/net/mlx4/"
 ofed/drivers/net/mlx4/profile.c			optional mlx4ib | mlxen	\
 	no-depend obj-prefix "mlx4_"					\
 	compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/net/mlx4/"
 ofed/drivers/net/mlx4/qp.c			optional mlx4ib | mlxen	\
 	no-depend obj-prefix "mlx4_"					\
 	compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/net/mlx4/"
 ofed/drivers/net/mlx4/reset.c			optional mlx4ib | mlxen	\
 	no-depend obj-prefix "mlx4_"					\
 	compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/net/mlx4/"
 ofed/drivers/net/mlx4/sense.c			optional mlx4ib | mlxen	\
 	no-depend obj-prefix "mlx4_"					\
 	compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/net/mlx4/"
 ofed/drivers/net/mlx4/srq.c			optional mlx4ib | mlxen	\
 	no-depend obj-prefix "mlx4_"					\
 	compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/net/mlx4/"
 ofed/drivers/net/mlx4/xrcd.c			optional mlx4ib | mlxen	\
 	no-depend obj-prefix "mlx4_"					\
 	compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/net/mlx4/"
 
 ofed/drivers/net/mlx4/en_cq.c			optional mlxen		\
 	no-depend obj-prefix "mlx4_"					\
 	compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/net/mlx4/"
 ofed/drivers/net/mlx4/en_frag.c			optional mlxen		\
 	no-depend obj-prefix "mlx4_"					\
 	compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/net/mlx4/"
 ofed/drivers/net/mlx4/en_main.c			optional mlxen		\
 	no-depend obj-prefix "mlx4_"					\
 	compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/net/mlx4/"
 ofed/drivers/net/mlx4/en_netdev.c		optional mlxen		\
 	no-depend obj-prefix "mlx4_"					\
 	compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/net/mlx4/"
 ofed/drivers/net/mlx4/en_port.c			optional mlxen		\
 	no-depend obj-prefix "mlx4_"					\
 	compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/net/mlx4/"
 ofed/drivers/net/mlx4/en_resources.c		optional mlxen		\
 	no-depend obj-prefix "mlx4_"					\
 	compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/net/mlx4/"
 ofed/drivers/net/mlx4/en_rx.c			optional mlxen		\
 	no-depend obj-prefix "mlx4_"					\
 	compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/net/mlx4/"
 ofed/drivers/net/mlx4/en_tx.c			optional mlxen		\
 	no-depend obj-prefix "mlx4_"					\
 	compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/net/mlx4/"
 
 ofed/drivers/infiniband/hw/mthca/mthca_allocator.c	optional mthca	\
 	no-depend compile-with "${OFED_C}"
 ofed/drivers/infiniband/hw/mthca/mthca_av.c		optional mthca	\
 	no-depend compile-with "${OFED_C}"
 ofed/drivers/infiniband/hw/mthca/mthca_catas.c		optional mthca	\
 	no-depend compile-with "${OFED_C}"
 ofed/drivers/infiniband/hw/mthca/mthca_cmd.c		optional mthca	\
 	no-depend compile-with "${OFED_C}"
 ofed/drivers/infiniband/hw/mthca/mthca_cq.c		optional mthca	\
 	no-depend compile-with "${OFED_C}"
 ofed/drivers/infiniband/hw/mthca/mthca_eq.c		optional mthca	\
 	no-depend compile-with "${OFED_C}"
 ofed/drivers/infiniband/hw/mthca/mthca_mad.c		optional mthca	\
 	no-depend compile-with "${OFED_C}"
 ofed/drivers/infiniband/hw/mthca/mthca_main.c		optional mthca	\
 	no-depend compile-with "${OFED_C}"
 ofed/drivers/infiniband/hw/mthca/mthca_mcg.c		optional mthca	\
 	no-depend compile-with "${OFED_C}"
 ofed/drivers/infiniband/hw/mthca/mthca_memfree.c	optional mthca	\
 	no-depend compile-with "${OFED_C}"
 ofed/drivers/infiniband/hw/mthca/mthca_mr.c		optional mthca	\
 	no-depend compile-with "${OFED_C}"
 ofed/drivers/infiniband/hw/mthca/mthca_pd.c		optional mthca	\
 	no-depend compile-with "${OFED_C}"
 ofed/drivers/infiniband/hw/mthca/mthca_profile.c	optional mthca	\
 	no-depend compile-with "${OFED_C}"
 ofed/drivers/infiniband/hw/mthca/mthca_provider.c	optional mthca	\
 	no-depend compile-with "${OFED_C}"
 ofed/drivers/infiniband/hw/mthca/mthca_qp.c		optional mthca	\
 	no-depend compile-with "${OFED_C}"
 ofed/drivers/infiniband/hw/mthca/mthca_reset.c		optional mthca	\
 	no-depend compile-with "${OFED_C}"
 ofed/drivers/infiniband/hw/mthca/mthca_srq.c		optional mthca	\
 	no-depend compile-with "${OFED_C}"
 ofed/drivers/infiniband/hw/mthca/mthca_uar.c		optional mthca	\
 	no-depend compile-with "${OFED_C}"
 
 # crypto support
 opencrypto/cast.c		optional crypto | ipsec
 opencrypto/criov.c		optional crypto
 opencrypto/crypto.c		optional crypto
 opencrypto/cryptodev.c		optional cryptodev
 opencrypto/cryptodev_if.m	optional crypto
 opencrypto/cryptosoft.c		optional crypto
 opencrypto/deflate.c		optional crypto
 opencrypto/rmd160.c		optional crypto | ipsec
 opencrypto/skipjack.c		optional crypto
 opencrypto/xform.c		optional crypto
 pci/alpm.c			optional alpm pci
 pci/amdpm.c			optional amdpm pci | nfpm pci
 pci/amdsmb.c			optional amdsmb pci
 pci/if_rl.c			optional rl pci
 pci/intpm.c			optional intpm pci
 pci/ncr.c			optional ncr pci
 pci/nfsmb.c			optional nfsmb pci
 pci/viapm.c			optional viapm pci
 rpc/auth_none.c			optional krpc | nfslockd | nfsclient | nfsserver | nfscl | nfsd
 rpc/auth_unix.c			optional krpc | nfslockd | nfsclient | nfscl | nfsd
 rpc/authunix_prot.c		optional krpc | nfslockd | nfsclient | nfsserver | nfscl | nfsd
 rpc/clnt_dg.c			optional krpc | nfslockd | nfsclient | nfscl | nfsd
 rpc/clnt_rc.c			optional krpc | nfslockd | nfsclient | nfscl | nfsd
 rpc/clnt_vc.c			optional krpc | nfslockd | nfsclient | nfsserver | nfscl | nfsd
 rpc/getnetconfig.c		optional krpc | nfslockd | nfsclient | nfsserver | nfscl | nfsd
 rpc/replay.c			optional krpc | nfslockd | nfsclient | nfsserver | nfscl | nfsd
 rpc/rpc_callmsg.c		optional krpc | nfslockd | nfsclient | nfsserver | nfscl | nfsd
 rpc/rpc_generic.c		optional krpc | nfslockd | nfsclient | nfsserver | nfscl | nfsd
 rpc/rpc_prot.c			optional krpc | nfslockd | nfsclient | nfsserver | nfscl | nfsd
 rpc/rpcb_clnt.c			optional krpc | nfslockd | nfsclient | nfsserver | nfscl | nfsd
 rpc/rpcb_prot.c			optional krpc | nfslockd | nfsclient | nfsserver | nfscl | nfsd
 rpc/svc.c			optional krpc | nfslockd | nfsclient | nfsserver | nfscl | nfsd
 rpc/svc_auth.c			optional krpc | nfslockd | nfsclient | nfsserver | nfscl | nfsd
 rpc/svc_auth_unix.c		optional krpc | nfslockd | nfsclient | nfsserver | nfscl | nfsd
 rpc/svc_dg.c			optional krpc | nfslockd | nfsserver | nfscl | nfsd
 rpc/svc_generic.c		optional krpc | nfslockd | nfsserver | nfscl | nfsd
 rpc/svc_vc.c			optional krpc | nfslockd | nfsserver | nfscl | nfsd
 rpc/rpcsec_gss/rpcsec_gss.c	optional krpc kgssapi | nfslockd kgssapi | nfscl kgssapi | nfsd kgssapi
 rpc/rpcsec_gss/rpcsec_gss_conf.c optional krpc kgssapi | nfslockd kgssapi | nfscl kgssapi | nfsd kgssapi
 rpc/rpcsec_gss/rpcsec_gss_misc.c optional krpc kgssapi | nfslockd kgssapi | nfscl kgssapi | nfsd kgssapi
 rpc/rpcsec_gss/rpcsec_gss_prot.c optional krpc kgssapi | nfslockd kgssapi | nfscl kgssapi | nfsd kgssapi
 rpc/rpcsec_gss/svc_rpcsec_gss.c	optional krpc kgssapi | nfslockd kgssapi | nfscl kgssapi | nfsd kgssapi
 security/audit/audit.c		optional audit
 security/audit/audit_arg.c	optional audit
 security/audit/audit_bsm.c	optional audit
 security/audit/audit_bsm_klib.c	optional audit
 security/audit/audit_pipe.c	optional audit
 security/audit/audit_syscalls.c	standard
 security/audit/audit_trigger.c	optional audit
 security/audit/audit_worker.c	optional audit
 security/audit/bsm_domain.c	optional audit
 security/audit/bsm_errno.c	optional audit
 security/audit/bsm_fcntl.c	optional audit
 security/audit/bsm_socket_type.c	optional audit
 security/audit/bsm_token.c	optional audit
 security/mac/mac_atalk.c	optional mac netatalk
 security/mac/mac_audit.c	optional mac audit
 security/mac/mac_cred.c		optional mac
 security/mac/mac_framework.c	optional mac
 security/mac/mac_inet.c		optional mac inet | mac inet6
 security/mac/mac_inet6.c	optional mac inet6
 security/mac/mac_label.c	optional mac
 security/mac/mac_net.c		optional mac
 security/mac/mac_pipe.c		optional mac
 security/mac/mac_posix_sem.c	optional mac
 security/mac/mac_posix_shm.c	optional mac
 security/mac/mac_priv.c		optional mac
 security/mac/mac_process.c	optional mac
 security/mac/mac_socket.c	optional mac
 security/mac/mac_syscalls.c	standard
 security/mac/mac_system.c	optional mac
 security/mac/mac_sysv_msg.c	optional mac
 security/mac/mac_sysv_sem.c	optional mac
 security/mac/mac_sysv_shm.c	optional mac
 security/mac/mac_vfs.c		optional mac
 security/mac_biba/mac_biba.c	optional mac_biba
 security/mac_bsdextended/mac_bsdextended.c	optional mac_bsdextended
 security/mac_bsdextended/ugidfw_system.c	optional mac_bsdextended
 security/mac_bsdextended/ugidfw_vnode.c		optional mac_bsdextended
 security/mac_ifoff/mac_ifoff.c	optional mac_ifoff
 security/mac_lomac/mac_lomac.c	optional mac_lomac
 security/mac_mls/mac_mls.c	optional mac_mls
 security/mac_none/mac_none.c	optional mac_none
 security/mac_partition/mac_partition.c optional mac_partition
 security/mac_portacl/mac_portacl.c optional mac_portacl
 security/mac_seeotheruids/mac_seeotheruids.c optional mac_seeotheruids
 security/mac_stub/mac_stub.c	optional mac_stub
 security/mac_test/mac_test.c	optional mac_test
 teken/teken.c			optional sc
 ufs/ffs/ffs_alloc.c		optional ffs
 ufs/ffs/ffs_balloc.c		optional ffs
 ufs/ffs/ffs_inode.c		optional ffs
 ufs/ffs/ffs_snapshot.c		optional ffs
 ufs/ffs/ffs_softdep.c		optional ffs
 ufs/ffs/ffs_subr.c		optional ffs
 ufs/ffs/ffs_tables.c		optional ffs
 ufs/ffs/ffs_vfsops.c		optional ffs
 ufs/ffs/ffs_vnops.c		optional ffs
 ufs/ffs/ffs_rawread.c		optional directio
 ufs/ffs/ffs_suspend.c		optional ffs
 ufs/ufs/ufs_acl.c		optional ffs
 ufs/ufs/ufs_bmap.c		optional ffs
 ufs/ufs/ufs_dirhash.c		optional ffs
 ufs/ufs/ufs_extattr.c		optional ffs
 ufs/ufs/ufs_gjournal.c		optional ffs UFS_GJOURNAL
 ufs/ufs/ufs_inode.c		optional ffs
 ufs/ufs/ufs_lookup.c		optional ffs
 ufs/ufs/ufs_quota.c		optional ffs
 ufs/ufs/ufs_vfsops.c		optional ffs
 ufs/ufs/ufs_vnops.c		optional ffs
 vm/default_pager.c		standard
 vm/device_pager.c		standard
 vm/phys_pager.c			standard
 vm/redzone.c			optional DEBUG_REDZONE
 vm/sg_pager.c			standard
 vm/swap_pager.c			standard
 vm/uma_core.c			standard
 vm/uma_dbg.c			standard
 vm/memguard.c			optional DEBUG_MEMGUARD
 vm/vm_fault.c			standard
 vm/vm_glue.c			standard
 vm/vm_init.c			standard
 vm/vm_kern.c			standard
 vm/vm_map.c			standard
 vm/vm_meter.c			standard
 vm/vm_mmap.c			standard
 vm/vm_object.c			standard
 vm/vm_page.c			standard
 vm/vm_pageout.c			standard
 vm/vm_pager.c			standard
 vm/vm_phys.c			standard
 vm/vm_radix.c			standard
 vm/vm_reserv.c			standard
 vm/vm_unix.c			standard
 vm/vm_zeroidle.c		standard
 vm/vnode_pager.c		standard
 xen/gnttab.c			optional xen | xenhvm
 xen/features.c			optional xen | xenhvm
-xen/evtchn/evtchn.c		optional xen
 xen/evtchn/evtchn_dev.c		optional xen | xenhvm
 xen/xenbus/xenbus_if.m		optional xen | xenhvm
 xen/xenbus/xenbus.c		optional xen | xenhvm
 xen/xenbus/xenbusb_if.m		optional xen | xenhvm
 xen/xenbus/xenbusb.c		optional xen | xenhvm
 xen/xenbus/xenbusb_front.c	optional xen | xenhvm
 xen/xenbus/xenbusb_back.c	optional xen | xenhvm
 xen/xenstore/xenstore.c		optional xen | xenhvm
 xen/xenstore/xenstore_dev.c	optional xen | xenhvm
 xdr/xdr.c			optional krpc | nfslockd | nfsclient | nfsserver | nfscl | nfsd
 xdr/xdr_array.c			optional krpc | nfslockd | nfsclient | nfsserver | nfscl | nfsd
 xdr/xdr_mbuf.c			optional krpc | nfslockd | nfsclient | nfsserver | nfscl | nfsd
 xdr/xdr_mem.c			optional krpc | nfslockd | nfsclient | nfsserver | nfscl | nfsd
 xdr/xdr_reference.c		optional krpc | nfslockd | nfsclient | nfsserver | nfscl | nfsd
 xdr/xdr_sizeof.c		optional krpc | nfslockd | nfsclient | nfsserver | nfscl | nfsd
diff --git a/sys/conf/files.amd64 b/sys/conf/files.amd64
index 1d5fa769b46e..6d35d1f678db 100644
--- a/sys/conf/files.amd64
+++ b/sys/conf/files.amd64
@@ -1,533 +1,535 @@
 # This file tells config what files go into building a kernel,
 # files marked standard are always included.
 #
 # $FreeBSD$
 #
 # The long compile-with and dependency lines are required because of
 # limitations in config: backslash-newline doesn't work in strings, and
 # dependency lines other than the first are silently ignored.
 #
 #
 linux32_genassym.o		optional	compat_linux32		\
 	dependency 	"$S/amd64/linux32/linux32_genassym.c"		\
 	compile-with	"${CC} ${CFLAGS:N-fno-common} -c ${.IMPSRC}"	\
 	no-obj no-implicit-rule						\
 	clean		"linux32_genassym.o"
 #
 linux32_assym.h			optional	compat_linux32		\
 	dependency 	"$S/kern/genassym.sh linux32_genassym.o"	\
 	compile-with	"sh $S/kern/genassym.sh linux32_genassym.o > ${.TARGET}" \
 	no-obj no-implicit-rule before-depend				\
 	clean		"linux32_assym.h"
 #
 ia32_genassym.o			standard				\
 	dependency 	"$S/compat/ia32/ia32_genassym.c"		\
 	compile-with	"${CC} ${CFLAGS:N-fno-common} -c ${.IMPSRC}"	\
 	no-obj no-implicit-rule						\
 	clean		"ia32_genassym.o"
 #
 ia32_assym.h			standard				\
 	dependency 	"$S/kern/genassym.sh ia32_genassym.o"		\
 	compile-with	"env NM='${NM}' sh $S/kern/genassym.sh ia32_genassym.o > ${.TARGET}" \
 	no-obj no-implicit-rule before-depend				\
 	clean		"ia32_assym.h"
 #
 font.h				optional	sc_dflt_font		\
 	compile-with	"uudecode < /usr/share/syscons/fonts/${SC_DFLT_FONT}-8x16.fnt && file2c 'static u_char dflt_font_16[16*256] = {' '};' < ${SC_DFLT_FONT}-8x16 > font.h && uudecode < /usr/share/syscons/fonts/${SC_DFLT_FONT}-8x14.fnt && file2c 'static u_char dflt_font_14[14*256] = {' '};' < ${SC_DFLT_FONT}-8x14 >> font.h && uudecode < /usr/share/syscons/fonts/${SC_DFLT_FONT}-8x8.fnt && file2c 'static u_char dflt_font_8[8*256] = {' '};' < ${SC_DFLT_FONT}-8x8 >> font.h"									\
 	no-obj no-implicit-rule before-depend				\
 	clean		"font.h ${SC_DFLT_FONT}-8x14 ${SC_DFLT_FONT}-8x16 ${SC_DFLT_FONT}-8x8"
 #
 atkbdmap.h			optional	atkbd_dflt_keymap	\
 	compile-with	"/usr/sbin/kbdcontrol -L ${ATKBD_DFLT_KEYMAP} | sed -e 's/^static keymap_t.* = /static keymap_t key_map = /' -e 's/^static accentmap_t.* = /static accentmap_t accent_map = /' > atkbdmap.h"			\
 	no-obj no-implicit-rule before-depend				\
 	clean		"atkbdmap.h"
 #
 ukbdmap.h			optional	ukbd_dflt_keymap	\
 	compile-with	"/usr/sbin/kbdcontrol -L ${UKBD_DFLT_KEYMAP} | sed -e 's/^static keymap_t.* = /static keymap_t key_map = /' -e 's/^static accentmap_t.* = /static accentmap_t accent_map = /' > ukbdmap.h"			\
 	no-obj no-implicit-rule before-depend				\
 	clean		"ukbdmap.h"
 #
 nvenetlib.o			optional	nve pci			\
 	dependency	"$S/contrib/dev/nve/amd64/nvenetlib.o.bz2.uu"	\
 	compile-with	"uudecode $S/contrib/dev/nve/amd64/nvenetlib.o.bz2.uu ; bzip2 -df nvenetlib.o.bz2" \
 	no-implicit-rule
 #
 os+%DIKED-nve.h		optional	nve pci			\
 	dependency	"$S/contrib/dev/nve/os.h"			\
 	compile-with	"sed -e 's/^.*#include.*phy\.h.*$$//' $S/contrib/dev/nve/os.h > os+%DIKED-nve.h" \
 	no-implicit-rule no-obj before-depend				\
 	clean		"os+%DIKED-nve.h"
 #
 hpt27xx_lib.o			optional	hpt27xx			\
 	dependency	"$S/dev/hpt27xx/amd64-elf.hpt27xx_lib.o.uu"	\
 	compile-with	"uudecode < $S/dev/hpt27xx/amd64-elf.hpt27xx_lib.o.uu" \
 	no-implicit-rule
 #
 hptmvraid.o			optional	hptmv			\
 	dependency	"$S/dev/hptmv/amd64-elf.raid.o.uu"	\
 	compile-with	"uudecode < $S/dev/hptmv/amd64-elf.raid.o.uu" \
 	no-implicit-rule
 #
 hptnr_lib.o			optional	hptnr			\
 	dependency	"$S/dev/hptnr/amd64-elf.hptnr_lib.o.uu"	\
 	compile-with	"uudecode < $S/dev/hptnr/amd64-elf.hptnr_lib.o.uu" \
 	no-implicit-rule
 #
 hptrr_lib.o			optional	hptrr			\
 	dependency	"$S/dev/hptrr/amd64-elf.hptrr_lib.o.uu"		\
 	compile-with	"uudecode < $S/dev/hptrr/amd64-elf.hptrr_lib.o.uu" \
 	no-implicit-rule
 #
 amd64/acpica/acpi_machdep.c	optional	acpi
 acpi_wakecode.o			optional	acpi			\
 	dependency	"$S/amd64/acpica/acpi_wakecode.S assym.s"	\
 	compile-with	"${NORMAL_S}"					\
 	no-obj no-implicit-rule before-depend				\
 	clean		"acpi_wakecode.o"
 acpi_wakecode.bin		optional	acpi			\
 	dependency	"acpi_wakecode.o"				\
 	compile-with	"${OBJCOPY} -S -O binary acpi_wakecode.o ${.TARGET}" \
 	no-obj no-implicit-rule	before-depend				\
 	clean		"acpi_wakecode.bin"
 acpi_wakecode.h			optional	acpi			\
 	dependency	"acpi_wakecode.bin"				\
 	compile-with	"file2c -sx 'static char wakecode[] = {' '};' < acpi_wakecode.bin > ${.TARGET}" \
 	no-obj no-implicit-rule	before-depend				\
 	clean		"acpi_wakecode.h"
 acpi_wakedata.h			optional	acpi			\
 	dependency	"acpi_wakecode.o"				\
 	compile-with	'${NM} -n --defined-only acpi_wakecode.o | while read offset dummy what; do echo "#define	$${what}	0x$${offset}"; done > ${.TARGET}' \
 	no-obj no-implicit-rule	before-depend				\
 	clean		"acpi_wakedata.h"
 #
 amd64/amd64/amd64_mem.c		optional	mem
 #amd64/amd64/apic_vector.S	standard
 amd64/amd64/atomic.c		standard
 amd64/amd64/autoconf.c		standard
 amd64/amd64/bios.c		standard
 amd64/amd64/bpf_jit_machdep.c	optional	bpf_jitter
 amd64/amd64/cpu_switch.S	standard
 amd64/amd64/db_disasm.c		optional	ddb
 amd64/amd64/db_interface.c	optional	ddb
 amd64/amd64/db_trace.c		optional	ddb
 amd64/amd64/elf_machdep.c	standard
 amd64/amd64/exception.S		standard
 amd64/amd64/fpu.c		standard
 amd64/amd64/gdb_machdep.c	optional	gdb
 amd64/amd64/identcpu.c		standard
 amd64/amd64/in_cksum.c		optional	inet | inet6
 amd64/amd64/initcpu.c		standard
 amd64/amd64/io.c		optional	io
 amd64/amd64/locore.S		standard	no-obj
 amd64/amd64/machdep.c		standard
 amd64/amd64/mem.c		optional	mem
 amd64/amd64/minidump_machdep.c	standard
 amd64/amd64/mp_machdep.c	optional	smp
 amd64/amd64/mp_watchdog.c	optional	mp_watchdog smp
 amd64/amd64/mpboot.S		optional	smp
 amd64/amd64/pmap.c		standard
 amd64/amd64/prof_machdep.c	optional	profiling-routine
 amd64/amd64/ptrace_machdep.c	standard
 amd64/amd64/sigtramp.S		standard
 amd64/amd64/stack_machdep.c	optional	ddb | stack
 amd64/amd64/support.S		standard
 amd64/amd64/sys_machdep.c	standard
 amd64/amd64/trap.c		standard
 amd64/amd64/uio_machdep.c	standard
 amd64/amd64/uma_machdep.c	standard
 amd64/amd64/vm_machdep.c	standard
 amd64/pci/pci_cfgreg.c		optional	pci
 cddl/contrib/opensolaris/common/atomic/amd64/opensolaris_atomic.S	optional zfs compile-with "${ZFS_S}"
 crypto/aesni/aesencdec_amd64.S	optional aesni
 crypto/aesni/aeskeys_amd64.S	optional aesni
 crypto/aesni/aesni.c		optional aesni
 crypto/aesni/aesni_wrap.c	optional aesni
 crypto/blowfish/bf_enc.c	optional	crypto | ipsec
 crypto/des/des_enc.c		optional	crypto | ipsec | netsmb
 crypto/via/padlock.c		optional	padlock
 crypto/via/padlock_cipher.c	optional	padlock
 crypto/via/padlock_hash.c	optional	padlock
 dev/acpica/acpi_if.m		standard
 dev/acpi_support/acpi_wmi_if.m	standard
 dev/agp/agp_amd64.c		optional	agp
 dev/agp/agp_i810.c		optional	agp
 dev/agp/agp_intel.c		optional	agp
 dev/agp/agp_via.c		optional	agp
 dev/amdsbwd/amdsbwd.c		optional	amdsbwd
 dev/amdtemp/amdtemp.c		optional	amdtemp
 dev/arcmsr/arcmsr.c		optional	arcmsr pci
 dev/asmc/asmc.c			optional	asmc isa
 dev/atkbdc/atkbd.c		optional	atkbd atkbdc
 dev/atkbdc/atkbd_atkbdc.c	optional	atkbd atkbdc
 dev/atkbdc/atkbdc.c		optional	atkbdc
 dev/atkbdc/atkbdc_isa.c		optional	atkbdc isa
 dev/atkbdc/atkbdc_subr.c	optional	atkbdc
 dev/atkbdc/psm.c		optional	psm atkbdc
 dev/coretemp/coretemp.c		optional	coretemp
 dev/cpuctl/cpuctl.c		optional	cpuctl
 dev/dpms/dpms.c			optional	dpms
 # There are no systems with isa slots, so all ed isa entries should go..
 dev/ed/if_ed_3c503.c		optional	ed isa ed_3c503
 dev/ed/if_ed_isa.c		optional	ed isa
 dev/ed/if_ed_wd80x3.c		optional	ed isa
 dev/ed/if_ed_hpp.c		optional	ed isa ed_hpp
 dev/ed/if_ed_sic.c		optional	ed isa ed_sic
 dev/fb/fb.c			optional	fb | vga
 dev/fb/s3_pci.c			optional	s3pci
 dev/fb/vesa.c			optional	vga vesa
 dev/fb/vga.c			optional	vga
 dev/ichwd/ichwd.c		optional	ichwd
 dev/if_ndis/if_ndis.c		optional	ndis
 dev/if_ndis/if_ndis_pccard.c	optional	ndis pccard
 dev/if_ndis/if_ndis_pci.c	optional	ndis cardbus | ndis pci
 dev/if_ndis/if_ndis_usb.c	optional	ndis usb
 dev/io/iodev.c			optional	io
 dev/ipmi/ipmi.c			optional	ipmi
 dev/ipmi/ipmi_acpi.c		optional	ipmi acpi
 dev/ipmi/ipmi_isa.c		optional	ipmi isa
 dev/ipmi/ipmi_kcs.c		optional	ipmi
 dev/ipmi/ipmi_smic.c		optional	ipmi
 dev/ipmi/ipmi_smbus.c		optional	ipmi smbus
 dev/ipmi/ipmi_smbios.c		optional	ipmi
 dev/ipmi/ipmi_ssif.c		optional	ipmi smbus
 dev/ipmi/ipmi_pci.c		optional	ipmi pci
 dev/ipmi/ipmi_linux.c		optional	ipmi compat_linux32
 dev/fdc/fdc.c			optional	fdc
 dev/fdc/fdc_acpi.c		optional	fdc
 dev/fdc/fdc_isa.c		optional	fdc isa
 dev/fdc/fdc_pccard.c		optional	fdc pccard
 dev/fdt/fdt_x86.c		optional	fdt
 dev/hpt27xx/hpt27xx_os_bsd.c	optional	hpt27xx
 dev/hpt27xx/hpt27xx_osm_bsd.c	optional	hpt27xx
 dev/hpt27xx/hpt27xx_config.c	optional	hpt27xx
 dev/hptmv/entry.c		optional	hptmv
 dev/hptmv/mv.c			optional	hptmv
 dev/hptmv/gui_lib.c		optional	hptmv
 dev/hptmv/hptproc.c		optional	hptmv
 dev/hptmv/ioctl.c		optional	hptmv
 dev/hptnr/hptnr_os_bsd.c	optional	hptnr
 dev/hptnr/hptnr_osm_bsd.c	optional	hptnr
 dev/hptnr/hptnr_config.c	optional	hptnr
 dev/hptrr/hptrr_os_bsd.c	optional	hptrr
 dev/hptrr/hptrr_osm_bsd.c	optional	hptrr
 dev/hptrr/hptrr_config.c	optional	hptrr
 dev/hwpmc/hwpmc_amd.c		optional	hwpmc
 dev/hwpmc/hwpmc_intel.c		optional	hwpmc
 dev/hwpmc/hwpmc_core.c		optional	hwpmc
 dev/hwpmc/hwpmc_uncore.c	optional	hwpmc
 dev/hwpmc/hwpmc_piv.c		optional	hwpmc
 dev/hwpmc/hwpmc_tsc.c		optional	hwpmc
 dev/hwpmc/hwpmc_x86.c		optional	hwpmc
 dev/kbd/kbd.c			optional	atkbd | sc | ukbd
 dev/lindev/full.c		optional	lindev
 dev/lindev/lindev.c		optional	lindev
 dev/nfe/if_nfe.c		optional	nfe pci
 dev/ntb/if_ntb/if_ntb.c		optional	if_ntb
 dev/ntb/ntb_hw/ntb_hw.c		optional	if_ntb ntb_hw
 dev/nvd/nvd.c			optional	nvd nvme
 dev/nve/if_nve.c		optional	nve pci
 dev/nvme/nvme.c			optional	nvme
 dev/nvme/nvme_ctrlr.c		optional	nvme
 dev/nvme/nvme_ctrlr_cmd.c	optional	nvme
 dev/nvme/nvme_ns.c		optional	nvme
 dev/nvme/nvme_ns_cmd.c		optional	nvme
 dev/nvme/nvme_qpair.c		optional	nvme
 dev/nvme/nvme_sysctl.c		optional	nvme
 dev/nvme/nvme_test.c		optional	nvme
 dev/nvme/nvme_util.c		optional	nvme
 dev/nvram/nvram.c		optional	nvram isa
 dev/random/ivy.c		optional	random rdrand_rng
 dev/random/nehemiah.c		optional	random padlock_rng
 dev/qlxge/qls_dbg.c		optional	qlxge pci
 dev/qlxge/qls_dump.c		optional	qlxge pci
 dev/qlxge/qls_hw.c		optional	qlxge pci
 dev/qlxge/qls_ioctl.c		optional	qlxge pci
 dev/qlxge/qls_isr.c		optional	qlxge pci
 dev/qlxge/qls_os.c		optional	qlxge pci
 dev/qlxgb/qla_dbg.c		optional	qlxgb pci
 dev/qlxgb/qla_hw.c		optional	qlxgb pci
 dev/qlxgb/qla_ioctl.c		optional	qlxgb pci
 dev/qlxgb/qla_isr.c		optional	qlxgb pci
 dev/qlxgb/qla_misc.c		optional	qlxgb pci
 dev/qlxgb/qla_os.c		optional	qlxgb pci
 dev/qlxgbe/ql_dbg.c		optional	qlxgbe pci
 dev/qlxgbe/ql_hw.c		optional	qlxgbe pci
 dev/qlxgbe/ql_ioctl.c		optional	qlxgbe pci
 dev/qlxgbe/ql_isr.c		optional	qlxgbe pci
 dev/qlxgbe/ql_misc.c		optional	qlxgbe pci
 dev/qlxgbe/ql_os.c		optional	qlxgbe pci
 dev/qlxgbe/ql_reset.c		optional	qlxgbe pci
 dev/sfxge/common/efx_bootcfg.c	optional sfxge inet pci
 dev/sfxge/common/efx_ev.c	optional sfxge inet pci
 dev/sfxge/common/efx_filter.c	optional sfxge inet pci
 dev/sfxge/common/efx_intr.c	optional sfxge inet pci
 dev/sfxge/common/efx_mac.c	optional sfxge inet pci
 dev/sfxge/common/efx_mcdi.c	optional sfxge inet pci
 dev/sfxge/common/efx_mon.c	optional sfxge inet pci
 dev/sfxge/common/efx_nic.c	optional sfxge inet pci
 dev/sfxge/common/efx_nvram.c	optional sfxge inet pci
 dev/sfxge/common/efx_phy.c	optional sfxge inet pci
 dev/sfxge/common/efx_port.c	optional sfxge inet pci
 dev/sfxge/common/efx_rx.c	optional sfxge inet pci
 dev/sfxge/common/efx_sram.c	optional sfxge inet pci
 dev/sfxge/common/efx_tx.c	optional sfxge inet pci
 dev/sfxge/common/efx_vpd.c	optional sfxge inet pci
 dev/sfxge/common/efx_wol.c	optional sfxge inet pci
 dev/sfxge/common/siena_mac.c	optional sfxge inet pci
 dev/sfxge/common/siena_mon.c	optional sfxge inet pci
 dev/sfxge/common/siena_nic.c	optional sfxge inet pci
 dev/sfxge/common/siena_nvram.c	optional sfxge inet pci
 dev/sfxge/common/siena_phy.c	optional sfxge inet pci
 dev/sfxge/common/siena_sram.c	optional sfxge inet pci
 dev/sfxge/common/siena_vpd.c	optional sfxge inet pci
 dev/sfxge/sfxge.c		optional sfxge inet pci
 dev/sfxge/sfxge_dma.c		optional sfxge inet pci
 dev/sfxge/sfxge_ev.c		optional sfxge inet pci
 dev/sfxge/sfxge_intr.c		optional sfxge inet pci
 dev/sfxge/sfxge_mcdi.c		optional sfxge inet pci
 dev/sfxge/sfxge_port.c		optional sfxge inet pci
 dev/sfxge/sfxge_rx.c		optional sfxge inet pci
 dev/sfxge/sfxge_tx.c		optional sfxge inet pci
 dev/sio/sio.c			optional	sio
 dev/sio/sio_isa.c		optional	sio isa
 dev/sio/sio_pccard.c		optional	sio pccard
 dev/sio/sio_pci.c		optional	sio pci
 dev/sio/sio_puc.c		optional	sio puc
 dev/speaker/spkr.c		optional	speaker
 dev/syscons/apm/apm_saver.c	optional	apm_saver apm
 dev/syscons/scterm-teken.c	optional	sc
 dev/syscons/scvesactl.c		optional	sc vga vesa
 dev/syscons/scvgarndr.c		optional	sc vga
 dev/syscons/scvtb.c		optional	sc
 dev/tpm/tpm.c			optional	tpm
 dev/tpm/tpm_acpi.c		optional	tpm acpi
 dev/tpm/tpm_isa.c		optional	tpm isa
 dev/uart/uart_cpu_x86.c		optional	uart
 dev/viawd/viawd.c		optional	viawd
 dev/vmware/vmxnet3/if_vmx.c	optional	vmx
 dev/wbwd/wbwd.c			optional	wbwd
 dev/wpi/if_wpi.c		optional	wpi
 dev/isci/isci.c							optional isci
 dev/isci/isci_controller.c					optional isci
 dev/isci/isci_domain.c						optional isci
 dev/isci/isci_interrupt.c					optional isci
 dev/isci/isci_io_request.c					optional isci
 dev/isci/isci_logger.c						optional isci
 dev/isci/isci_oem_parameters.c					optional isci
 dev/isci/isci_remote_device.c					optional isci
 dev/isci/isci_sysctl.c						optional isci
 dev/isci/isci_task_request.c					optional isci
 dev/isci/isci_timer.c						optional isci
 dev/isci/scil/sati.c						optional isci
 dev/isci/scil/sati_abort_task_set.c				optional isci
 dev/isci/scil/sati_atapi.c					optional isci
 dev/isci/scil/sati_device.c					optional isci
 dev/isci/scil/sati_inquiry.c					optional isci
 dev/isci/scil/sati_log_sense.c					optional isci
 dev/isci/scil/sati_lun_reset.c					optional isci
 dev/isci/scil/sati_mode_pages.c					optional isci
 dev/isci/scil/sati_mode_select.c				optional isci
 dev/isci/scil/sati_mode_sense.c					optional isci
 dev/isci/scil/sati_mode_sense_10.c				optional isci
 dev/isci/scil/sati_mode_sense_6.c				optional isci
 dev/isci/scil/sati_move.c					optional isci
 dev/isci/scil/sati_passthrough.c				optional isci
 dev/isci/scil/sati_read.c					optional isci
 dev/isci/scil/sati_read_buffer.c				optional isci
 dev/isci/scil/sati_read_capacity.c				optional isci
 dev/isci/scil/sati_reassign_blocks.c				optional isci
 dev/isci/scil/sati_report_luns.c				optional isci
 dev/isci/scil/sati_request_sense.c				optional isci
 dev/isci/scil/sati_start_stop_unit.c				optional isci
 dev/isci/scil/sati_synchronize_cache.c				optional isci
 dev/isci/scil/sati_test_unit_ready.c				optional isci
 dev/isci/scil/sati_unmap.c					optional isci
 dev/isci/scil/sati_util.c					optional isci
 dev/isci/scil/sati_verify.c					optional isci
 dev/isci/scil/sati_write.c					optional isci
 dev/isci/scil/sati_write_and_verify.c				optional isci
 dev/isci/scil/sati_write_buffer.c				optional isci
 dev/isci/scil/sati_write_long.c					optional isci
 dev/isci/scil/sci_abstract_list.c				optional isci
 dev/isci/scil/sci_base_controller.c				optional isci
 dev/isci/scil/sci_base_domain.c					optional isci
 dev/isci/scil/sci_base_iterator.c				optional isci
 dev/isci/scil/sci_base_library.c				optional isci
 dev/isci/scil/sci_base_logger.c					optional isci
 dev/isci/scil/sci_base_memory_descriptor_list.c			optional isci
 dev/isci/scil/sci_base_memory_descriptor_list_decorator.c	optional isci
 dev/isci/scil/sci_base_object.c					optional isci
 dev/isci/scil/sci_base_observer.c				optional isci
 dev/isci/scil/sci_base_phy.c					optional isci
 dev/isci/scil/sci_base_port.c					optional isci
 dev/isci/scil/sci_base_remote_device.c				optional isci
 dev/isci/scil/sci_base_request.c				optional isci
 dev/isci/scil/sci_base_state_machine.c				optional isci
 dev/isci/scil/sci_base_state_machine_logger.c			optional isci
 dev/isci/scil/sci_base_state_machine_observer.c			optional isci
 dev/isci/scil/sci_base_subject.c				optional isci
 dev/isci/scil/sci_util.c					optional isci
 dev/isci/scil/scic_sds_controller.c				optional isci
 dev/isci/scil/scic_sds_library.c				optional isci
 dev/isci/scil/scic_sds_pci.c					optional isci
 dev/isci/scil/scic_sds_phy.c					optional isci
 dev/isci/scil/scic_sds_port.c					optional isci
 dev/isci/scil/scic_sds_port_configuration_agent.c		optional isci
 dev/isci/scil/scic_sds_remote_device.c				optional isci
 dev/isci/scil/scic_sds_remote_node_context.c			optional isci
 dev/isci/scil/scic_sds_remote_node_table.c			optional isci
 dev/isci/scil/scic_sds_request.c				optional isci
 dev/isci/scil/scic_sds_sgpio.c					optional isci
 dev/isci/scil/scic_sds_smp_remote_device.c			optional isci
 dev/isci/scil/scic_sds_smp_request.c				optional isci
 dev/isci/scil/scic_sds_ssp_request.c				optional isci
 dev/isci/scil/scic_sds_stp_packet_request.c			optional isci
 dev/isci/scil/scic_sds_stp_remote_device.c			optional isci
 dev/isci/scil/scic_sds_stp_request.c				optional isci
 dev/isci/scil/scic_sds_unsolicited_frame_control.c		optional isci
 dev/isci/scil/scif_sas_controller.c				optional isci
 dev/isci/scil/scif_sas_controller_state_handlers.c		optional isci
 dev/isci/scil/scif_sas_controller_states.c			optional isci
 dev/isci/scil/scif_sas_domain.c					optional isci
 dev/isci/scil/scif_sas_domain_state_handlers.c			optional isci
 dev/isci/scil/scif_sas_domain_states.c				optional isci
 dev/isci/scil/scif_sas_high_priority_request_queue.c		optional isci
 dev/isci/scil/scif_sas_internal_io_request.c			optional isci
 dev/isci/scil/scif_sas_io_request.c				optional isci
 dev/isci/scil/scif_sas_io_request_state_handlers.c		optional isci
 dev/isci/scil/scif_sas_io_request_states.c			optional isci
 dev/isci/scil/scif_sas_library.c				optional isci
 dev/isci/scil/scif_sas_remote_device.c				optional isci
 dev/isci/scil/scif_sas_remote_device_ready_substate_handlers.c	optional isci
 dev/isci/scil/scif_sas_remote_device_ready_substates.c		optional isci
 dev/isci/scil/scif_sas_remote_device_starting_substate_handlers.c		optional isci
 dev/isci/scil/scif_sas_remote_device_starting_substates.c	optional isci
 dev/isci/scil/scif_sas_remote_device_state_handlers.c		optional isci
 dev/isci/scil/scif_sas_remote_device_states.c			optional isci
 dev/isci/scil/scif_sas_request.c				optional isci
 dev/isci/scil/scif_sas_smp_activity_clear_affiliation.c		optional isci
 dev/isci/scil/scif_sas_smp_io_request.c				optional isci
 dev/isci/scil/scif_sas_smp_phy.c				optional isci
 dev/isci/scil/scif_sas_smp_remote_device.c			optional isci
 dev/isci/scil/scif_sas_stp_io_request.c				optional isci
 dev/isci/scil/scif_sas_stp_remote_device.c			optional isci
 dev/isci/scil/scif_sas_stp_task_request.c			optional isci
 dev/isci/scil/scif_sas_task_request.c				optional isci
 dev/isci/scil/scif_sas_task_request_state_handlers.c		optional isci
 dev/isci/scil/scif_sas_task_request_states.c			optional isci
 dev/isci/scil/scif_sas_timer.c					optional isci
 dev/virtio/virtio.c			optional	virtio
 dev/virtio/virtqueue.c			optional	virtio
 dev/virtio/virtio_bus_if.m		optional	virtio
 dev/virtio/virtio_if.m			optional	virtio
 dev/virtio/pci/virtio_pci.c		optional	virtio_pci
 dev/virtio/network/if_vtnet.c		optional	vtnet
 dev/virtio/block/virtio_blk.c		optional	virtio_blk
 dev/virtio/balloon/virtio_balloon.c	optional	virtio_balloon
 dev/virtio/scsi/virtio_scsi.c		optional	virtio_scsi
 isa/syscons_isa.c		optional	sc
 isa/vga_isa.c			optional	vga
 kern/kern_clocksource.c		standard
 kern/link_elf_obj.c		standard
 #
 # IA32 binary support
 #
 #amd64/ia32/ia32_exception.S	optional	compat_freebsd32
 amd64/ia32/ia32_reg.c		optional	compat_freebsd32
 amd64/ia32/ia32_signal.c	optional	compat_freebsd32
 amd64/ia32/ia32_sigtramp.S	optional	compat_freebsd32
 amd64/ia32/ia32_syscall.c	optional	compat_freebsd32
 amd64/ia32/ia32_misc.c		optional	compat_freebsd32
 compat/ia32/ia32_sysvec.c	optional	compat_freebsd32
 compat/linprocfs/linprocfs.c	optional	linprocfs
 compat/linsysfs/linsysfs.c	optional	linsysfs
 #
 # Linux/i386 binary support
 #
 amd64/linux32/linux32_dummy.c	optional	compat_linux32
 amd64/linux32/linux32_locore.s	optional	compat_linux32		\
 	dependency 	"linux32_assym.h"
 amd64/linux32/linux32_machdep.c	optional	compat_linux32
 amd64/linux32/linux32_support.s	optional	compat_linux32		\
 	dependency 	"linux32_assym.h"
 amd64/linux32/linux32_sysent.c	optional	compat_linux32
 amd64/linux32/linux32_sysvec.c	optional	compat_linux32
 compat/linux/linux_emul.c	optional	compat_linux32
 compat/linux/linux_file.c	optional	compat_linux32
 compat/linux/linux_fork.c	optional	compat_linux32
 compat/linux/linux_futex.c	optional	compat_linux32
 compat/linux/linux_getcwd.c	optional	compat_linux32
 compat/linux/linux_ioctl.c	optional	compat_linux32
 compat/linux/linux_ipc.c	optional	compat_linux32
 compat/linux/linux_mib.c	optional	compat_linux32
 compat/linux/linux_misc.c	optional	compat_linux32
 compat/linux/linux_signal.c	optional	compat_linux32
 compat/linux/linux_socket.c	optional	compat_linux32
 compat/linux/linux_stats.c	optional	compat_linux32
 compat/linux/linux_sysctl.c	optional	compat_linux32
 compat/linux/linux_time.c	optional	compat_linux32
 compat/linux/linux_uid16.c	optional	compat_linux32
 compat/linux/linux_util.c	optional	compat_linux32
 dev/amr/amr_linux.c		optional	compat_linux32 amr
 dev/mfi/mfi_linux.c		optional	compat_linux32 mfi
 #
 # Windows NDIS driver support
 #
 compat/ndis/kern_ndis.c		optional	ndisapi pci
 compat/ndis/kern_windrv.c	optional	ndisapi pci
 compat/ndis/subr_hal.c		optional	ndisapi pci
 compat/ndis/subr_ndis.c		optional	ndisapi pci
 compat/ndis/subr_ntoskrnl.c	optional	ndisapi pci
 compat/ndis/subr_pe.c		optional	ndisapi pci
 compat/ndis/subr_usbd.c		optional	ndisapi pci
 compat/ndis/winx64_wrap.S	optional	ndisapi pci
 #
 libkern/memmove.c		standard
 libkern/memset.c		standard
 #
 # x86 real mode BIOS emulator, required by atkbdc/dpms/vesa
 #
 compat/x86bios/x86bios.c	optional x86bios | atkbd | dpms | vesa
 contrib/x86emu/x86emu.c		optional x86bios | atkbd | dpms | vesa
 #
 # bvm console
 #
 dev/bvm/bvm_console.c		optional	bvmconsole
 dev/bvm/bvm_dbg.c		optional	bvmdebug
 #
 # x86 shared code between IA32, AMD64 and PC98 architectures
 #
 x86/acpica/OsdEnvironment.c	optional	acpi
 x86/acpica/acpi_apm.c		optional	acpi
 x86/acpica/acpi_wakeup.c	optional	acpi
 x86/acpica/madt.c		optional	acpi
 x86/acpica/srat.c		optional	acpi
 x86/bios/smbios.c		optional	smbios
 x86/bios/vpd.c			optional	vpd
 x86/cpufreq/powernow.c		optional	cpufreq
 x86/cpufreq/est.c		optional	cpufreq
 x86/cpufreq/hwpstate.c		optional	cpufreq
 x86/cpufreq/p4tcc.c		optional	cpufreq
 x86/isa/atpic.c			optional	atpic isa
 x86/isa/atrtc.c			standard
 x86/isa/clock.c			standard
 x86/isa/elcr.c			optional	atpic isa | mptable
 x86/isa/isa.c			standard
 x86/isa/isa_dma.c		standard
 x86/isa/nmi.c			standard
 x86/isa/orm.c			optional	isa
 x86/pci/pci_bus.c		optional	pci
 x86/pci/qpi.c			optional	pci
 x86/x86/busdma_machdep.c	standard
 x86/x86/dump_machdep.c		standard
 x86/x86/fdt_machdep.c		optional	fdt
 x86/x86/intr_machdep.c		standard
 x86/x86/io_apic.c		standard
 x86/x86/legacy.c		standard
 x86/x86/local_apic.c		standard
 x86/x86/mca.c			standard
 x86/x86/mptable.c		optional	mptable
 x86/x86/mptable_pci.c		optional	mptable pci
 x86/x86/msi.c			optional	pci
 x86/x86/nexus.c			standard
 x86/x86/tsc.c			standard
+x86/xen/hvm.c			optional	xenhvm
+x86/xen/xen_intr.c		optional	xen | xenhvm
diff --git a/sys/conf/files.i386 b/sys/conf/files.i386
index 4f8f8ed16a18..0613337132c3 100644
--- a/sys/conf/files.i386
+++ b/sys/conf/files.i386
@@ -1,570 +1,572 @@
 # This file tells config what files go into building a kernel,
 # files marked standard are always included.
 #
 # $FreeBSD$
 #
 # The long compile-with and dependency lines are required because of
 # limitations in config: backslash-newline doesn't work in strings, and
 # dependency lines other than the first are silently ignored.
 #
 linux_genassym.o		optional	compat_linux		\
 	dependency 	"$S/i386/linux/linux_genassym.c"		\
 	compile-with	"${CC} ${CFLAGS:N-fno-common} -c ${.IMPSRC}"	\
 	no-obj no-implicit-rule						\
 	clean		"linux_genassym.o"
 #
 linux_assym.h			optional	compat_linux		\
 	dependency 	"$S/kern/genassym.sh linux_genassym.o"		\
 	compile-with	"sh $S/kern/genassym.sh linux_genassym.o > ${.TARGET}" \
 	no-obj no-implicit-rule before-depend				\
 	clean		"linux_assym.h"
 #
 svr4_genassym.o			optional	compat_svr4		\
 	dependency 	"$S/i386/svr4/svr4_genassym.c"			\
 	compile-with	"${CC} ${CFLAGS:N-fno-common} -c ${.IMPSRC}"	\
 	no-obj no-implicit-rule						\
 	clean		"svr4_genassym.o"
 #
 svr4_assym.h			optional	compat_svr4		\
 	dependency 	"$S/kern/genassym.sh svr4_genassym.o"	   	\
 	compile-with	"sh $S/kern/genassym.sh svr4_genassym.o > ${.TARGET}" \
 	no-obj no-implicit-rule before-depend				\
 	clean		"svr4_assym.h"
 #
 font.h				optional	sc_dflt_font		\
 	compile-with	"uudecode < /usr/share/syscons/fonts/${SC_DFLT_FONT}-8x16.fnt && file2c 'static u_char dflt_font_16[16*256] = {' '};' < ${SC_DFLT_FONT}-8x16 > font.h && uudecode < /usr/share/syscons/fonts/${SC_DFLT_FONT}-8x14.fnt && file2c 'static u_char dflt_font_14[14*256] = {' '};' < ${SC_DFLT_FONT}-8x14 >> font.h && uudecode < /usr/share/syscons/fonts/${SC_DFLT_FONT}-8x8.fnt && file2c 'static u_char dflt_font_8[8*256] = {' '};' < ${SC_DFLT_FONT}-8x8 >> font.h"										\
 	no-obj no-implicit-rule before-depend				\
 	clean		"font.h ${SC_DFLT_FONT}-8x14 ${SC_DFLT_FONT}-8x16 ${SC_DFLT_FONT}-8x8"
 #
 atkbdmap.h			optional	atkbd_dflt_keymap	\
 	compile-with	"/usr/sbin/kbdcontrol -L ${ATKBD_DFLT_KEYMAP} | sed -e 's/^static keymap_t.* = /static keymap_t key_map = /' -e 's/^static accentmap_t.* = /static accentmap_t accent_map = /' > atkbdmap.h"			\
 	no-obj no-implicit-rule before-depend				\
 	clean		"atkbdmap.h"
 #
 ukbdmap.h			optional	ukbd_dflt_keymap	\
 	compile-with	"/usr/sbin/kbdcontrol -L ${UKBD_DFLT_KEYMAP} | sed -e 's/^static keymap_t.* = /static keymap_t key_map = /' -e 's/^static accentmap_t.* = /static accentmap_t accent_map = /' > ukbdmap.h"			\
 	no-obj no-implicit-rule before-depend				\
 	clean		"ukbdmap.h"
 #
 nvenetlib.o			optional	nve pci			\
 	dependency	"$S/contrib/dev/nve/i386/nvenetlib.o.bz2.uu"	\
 	compile-with	"uudecode $S/contrib/dev/nve/i386/nvenetlib.o.bz2.uu ; bzip2 -df nvenetlib.o.bz2" \
 	no-implicit-rule
 #
 os+%DIKED-nve.h		optional	nve pci				\
 	dependency	"$S/contrib/dev/nve/os.h"			\
 	compile-with	"sed -e 's/^.*#include.*phy\.h.*$$//' $S/contrib/dev/nve/os.h > os+%DIKED-nve.h" \
 	no-implicit-rule no-obj before-depend				\
 	clean		"os+%DIKED-nve.h"
 #
 hpt27xx_lib.o			optional	hpt27xx			\
 	dependency	"$S/dev/hpt27xx/i386-elf.hpt27xx_lib.o.uu"	\
 	compile-with	"uudecode < $S/dev/hpt27xx/i386-elf.hpt27xx_lib.o.uu" \
 	no-implicit-rule
 #
 hptmvraid.o			optional	hptmv			\
 	dependency	"$S/dev/hptmv/i386-elf.raid.o.uu"		\
 	compile-with	"uudecode < $S/dev/hptmv/i386-elf.raid.o.uu"	\
 	no-implicit-rule
 #
 hptnr_lib.o			optional	hptnr			\
 	dependency	"$S/dev/hptnr/i386-elf.hptnr_lib.o.uu"	\
 	compile-with	"uudecode < $S/dev/hptnr/i386-elf.hptnr_lib.o.uu" \
 	no-implicit-rule
 #
 hptrr_lib.o			optional	hptrr			\
 	dependency	"$S/dev/hptrr/i386-elf.hptrr_lib.o.uu"		\
 	compile-with	"uudecode < $S/dev/hptrr/i386-elf.hptrr_lib.o.uu" \
 	no-implicit-rule
 #
 cddl/contrib/opensolaris/common/atomic/i386/opensolaris_atomic.S	optional zfs compile-with "${ZFS_S}"
 compat/linprocfs/linprocfs.c	optional linprocfs
 compat/linsysfs/linsysfs.c	optional linsysfs
 compat/linux/linux_emul.c	optional compat_linux
 compat/linux/linux_file.c	optional compat_linux
 compat/linux/linux_fork.c	optional compat_linux
 compat/linux/linux_futex.c	optional compat_linux
 compat/linux/linux_getcwd.c	optional compat_linux
 compat/linux/linux_ioctl.c	optional compat_linux
 compat/linux/linux_ipc.c	optional compat_linux
 compat/linux/linux_mib.c	optional compat_linux
 compat/linux/linux_misc.c	optional compat_linux
 compat/linux/linux_signal.c	optional compat_linux
 compat/linux/linux_socket.c	optional compat_linux
 compat/linux/linux_stats.c	optional compat_linux
 compat/linux/linux_sysctl.c	optional compat_linux
 compat/linux/linux_time.c	optional compat_linux
 compat/linux/linux_uid16.c	optional compat_linux
 compat/linux/linux_util.c	optional compat_linux
 compat/ndis/kern_ndis.c		optional ndisapi pci
 compat/ndis/kern_windrv.c	optional ndisapi pci
 compat/ndis/subr_hal.c		optional ndisapi pci
 compat/ndis/subr_ndis.c		optional ndisapi pci
 compat/ndis/subr_ntoskrnl.c	optional ndisapi pci
 compat/ndis/subr_pe.c		optional ndisapi pci
 compat/ndis/subr_usbd.c		optional ndisapi pci
 compat/ndis/winx32_wrap.S	optional ndisapi pci
 compat/svr4/imgact_svr4.c	optional compat_svr4
 compat/svr4/svr4_fcntl.c	optional compat_svr4
 compat/svr4/svr4_filio.c	optional compat_svr4
 compat/svr4/svr4_ioctl.c	optional compat_svr4
 compat/svr4/svr4_ipc.c		optional compat_svr4
 compat/svr4/svr4_misc.c		optional compat_svr4
 compat/svr4/svr4_resource.c	optional compat_svr4
 compat/svr4/svr4_signal.c	optional compat_svr4
 compat/svr4/svr4_socket.c	optional compat_svr4
 compat/svr4/svr4_sockio.c	optional compat_svr4
 compat/svr4/svr4_stat.c		optional compat_svr4
 compat/svr4/svr4_stream.c	optional compat_svr4
 compat/svr4/svr4_syscallnames.c	optional compat_svr4
 compat/svr4/svr4_sysent.c	optional compat_svr4
 compat/svr4/svr4_sysvec.c	optional compat_svr4
 compat/svr4/svr4_termios.c	optional compat_svr4
 bf_enc.o			optional crypto | ipsec	\
 	dependency	"$S/crypto/blowfish/arch/i386/bf_enc.S $S/crypto/blowfish/arch/i386/bf_enc_586.S $S/crypto/blowfish/arch/i386/bf_enc_686.S" \
 	compile-with	"${CC} -c -I$S/crypto/blowfish/arch/i386 ${ASM_CFLAGS} ${WERROR} ${.IMPSRC}" \
 	no-implicit-rule
 crypto/aesni/aesencdec_i386.S	optional aesni
 crypto/aesni/aeskeys_i386.S	optional aesni
 crypto/aesni/aesni.c		optional aesni
 crypto/aesni/aesni_wrap.c	optional aesni
 crypto/des/arch/i386/des_enc.S	optional crypto | ipsec | netsmb
 crypto/via/padlock.c		optional padlock
 crypto/via/padlock_cipher.c	optional padlock
 crypto/via/padlock_hash.c	optional padlock
 dev/advansys/adv_isa.c		optional adv isa
 dev/agp/agp_ali.c		optional agp
 dev/agp/agp_amd.c		optional agp
 dev/agp/agp_amd64.c		optional agp
 dev/agp/agp_ati.c		optional agp
 dev/agp/agp_i810.c		optional agp
 dev/agp/agp_intel.c		optional agp
 dev/agp/agp_nvidia.c		optional agp
 dev/agp/agp_sis.c		optional agp
 dev/agp/agp_via.c		optional agp
 dev/aic/aic_isa.c		optional aic isa
 dev/amdsbwd/amdsbwd.c		optional amdsbwd
 dev/amdtemp/amdtemp.c		optional amdtemp
 dev/arcmsr/arcmsr.c		optional arcmsr pci
 dev/asmc/asmc.c			optional asmc isa
 dev/atkbdc/atkbd.c		optional atkbd atkbdc
 dev/atkbdc/atkbd_atkbdc.c	optional atkbd atkbdc
 dev/atkbdc/atkbdc.c		optional atkbdc
 dev/atkbdc/atkbdc_isa.c		optional atkbdc isa
 dev/atkbdc/atkbdc_subr.c	optional atkbdc
 dev/atkbdc/psm.c		optional psm atkbdc
 dev/ce/ceddk.c			optional ce
 dev/ce/if_ce.c			optional ce
 dev/ce/tau32-ddk.c		optional ce \
 	compile-with "${NORMAL_C} ${NO_WCONSTANT_CONVERSION}"
 dev/cm/if_cm_isa.c		optional cm isa
 dev/coretemp/coretemp.c		optional coretemp
 dev/cp/cpddk.c			optional cp
 dev/cp/if_cp.c			optional cp
 dev/cpuctl/cpuctl.c		optional cpuctl
 dev/ctau/ctau.c			optional ctau
 dev/ctau/ctddk.c		optional ctau
 dev/ctau/if_ct.c		optional ctau
 dev/cx/csigma.c			optional cx
 dev/cx/cxddk.c			optional cx
 dev/cx/if_cx.c			optional cx
 dev/dpms/dpms.c			optional dpms
 dev/ed/if_ed_3c503.c		optional ed isa ed_3c503
 dev/ed/if_ed_isa.c		optional ed isa
 dev/ed/if_ed_wd80x3.c		optional ed isa
 dev/ed/if_ed_hpp.c		optional ed isa ed_hpp
 dev/ed/if_ed_sic.c		optional ed isa ed_sic
 dev/fb/fb.c			optional fb | vga
 dev/fb/s3_pci.c			optional s3pci
 dev/fb/vesa.c			optional vga vesa
 dev/fb/vga.c			optional vga
 dev/fdc/fdc.c			optional fdc
 dev/fdc/fdc_acpi.c		optional fdc
 dev/fdc/fdc_isa.c		optional fdc isa
 dev/fdc/fdc_pccard.c		optional fdc pccard
 dev/fdt/fdt_x86.c		optional fdt
 dev/fe/if_fe_isa.c		optional fe isa
 dev/glxiic/glxiic.c		optional glxiic
 dev/glxsb/glxsb.c		optional glxsb
 dev/glxsb/glxsb_hash.c		optional glxsb
 dev/hpt27xx/hpt27xx_os_bsd.c	optional hpt27xx
 dev/hpt27xx/hpt27xx_osm_bsd.c	optional hpt27xx
 dev/hpt27xx/hpt27xx_config.c	optional hpt27xx
 dev/hptmv/entry.c		optional hptmv
 dev/hptmv/mv.c			optional hptmv
 dev/hptmv/gui_lib.c		optional hptmv
 dev/hptmv/hptproc.c		optional hptmv
 dev/hptmv/ioctl.c		optional hptmv
 dev/hptnr/hptnr_os_bsd.c	optional hptnr
 dev/hptnr/hptnr_osm_bsd.c	optional hptnr
 dev/hptnr/hptnr_config.c	optional hptnr
 dev/hptrr/hptrr_os_bsd.c	optional hptrr
 dev/hptrr/hptrr_osm_bsd.c	optional hptrr
 dev/hptrr/hptrr_config.c	optional hptrr
 dev/hwpmc/hwpmc_amd.c		optional hwpmc
 dev/hwpmc/hwpmc_intel.c		optional hwpmc
 dev/hwpmc/hwpmc_core.c		optional hwpmc
 dev/hwpmc/hwpmc_uncore.c	optional hwpmc
 dev/hwpmc/hwpmc_pentium.c	optional hwpmc
 dev/hwpmc/hwpmc_piv.c		optional hwpmc
 dev/hwpmc/hwpmc_ppro.c		optional hwpmc
 dev/hwpmc/hwpmc_tsc.c		optional hwpmc
 dev/hwpmc/hwpmc_x86.c		optional hwpmc
 dev/ichwd/ichwd.c		optional ichwd
 dev/if_ndis/if_ndis.c		optional ndis
 dev/if_ndis/if_ndis_pccard.c	optional ndis pccard
 dev/if_ndis/if_ndis_pci.c	optional ndis cardbus | ndis pci
 dev/if_ndis/if_ndis_usb.c	optional ndis usb
 dev/io/iodev.c			optional io
 dev/ipmi/ipmi.c			optional ipmi
 dev/ipmi/ipmi_acpi.c		optional ipmi acpi
 dev/ipmi/ipmi_isa.c		optional ipmi isa
 dev/ipmi/ipmi_kcs.c		optional ipmi
 dev/ipmi/ipmi_smic.c		optional ipmi
 dev/ipmi/ipmi_smbus.c		optional ipmi smbus
 dev/ipmi/ipmi_smbios.c		optional ipmi
 dev/ipmi/ipmi_ssif.c		optional ipmi smbus
 dev/ipmi/ipmi_pci.c		optional ipmi pci
 dev/ipmi/ipmi_linux.c		optional ipmi compat_linux
 dev/kbd/kbd.c			optional atkbd | sc | ukbd
 dev/le/if_le_isa.c		optional le isa
 dev/lindev/full.c		optional lindev
 dev/lindev/lindev.c		optional lindev
 dev/mse/mse.c			optional mse
 dev/mse/mse_isa.c		optional mse isa
 dev/nfe/if_nfe.c		optional nfe pci
 dev/nvd/nvd.c			optional nvd nvme
 dev/nve/if_nve.c		optional nve pci
 dev/nvme/nvme.c			optional nvme
 dev/nvme/nvme_ctrlr.c		optional nvme
 dev/nvme/nvme_ctrlr_cmd.c	optional nvme
 dev/nvme/nvme_ns.c		optional nvme
 dev/nvme/nvme_ns_cmd.c		optional nvme
 dev/nvme/nvme_qpair.c		optional nvme
 dev/nvme/nvme_sysctl.c		optional nvme
 dev/nvme/nvme_test.c		optional nvme
 dev/nvme/nvme_util.c		optional nvme
 dev/nvram/nvram.c		optional nvram isa
 dev/pcf/pcf_isa.c		optional pcf
 dev/random/ivy.c		optional random rdrand_rng
 dev/random/nehemiah.c		optional random padlock_rng
 dev/sbni/if_sbni.c		optional sbni
 dev/sbni/if_sbni_isa.c		optional sbni isa
 dev/sbni/if_sbni_pci.c		optional sbni pci
 dev/sio/sio.c			optional sio
 dev/sio/sio_isa.c		optional sio isa
 dev/sio/sio_pccard.c		optional sio pccard
 dev/sio/sio_pci.c		optional sio pci
 dev/sio/sio_puc.c		optional sio puc
 dev/speaker/spkr.c		optional speaker
 dev/syscons/apm/apm_saver.c	optional apm_saver apm
 dev/syscons/scterm-teken.c	optional sc
 dev/syscons/scvesactl.c		optional sc vga vesa
 dev/syscons/scvgarndr.c		optional sc vga
 dev/syscons/scvtb.c		optional sc
 dev/tpm/tpm.c			optional tpm
 dev/tpm/tpm_acpi.c		optional tpm acpi
 dev/tpm/tpm_isa.c		optional tpm isa
 dev/uart/uart_cpu_x86.c		optional uart
 dev/viawd/viawd.c		optional viawd
 dev/vmware/vmxnet3/if_vmx.c	optional vmx
 dev/acpica/acpi_if.m		standard
 dev/acpi_support/acpi_wmi_if.m	standard
 dev/wbwd/wbwd.c			optional wbwd
 dev/wpi/if_wpi.c		optional wpi
 dev/isci/isci.c							optional isci
 dev/isci/isci_controller.c					optional isci
 dev/isci/isci_domain.c						optional isci
 dev/isci/isci_interrupt.c					optional isci
 dev/isci/isci_io_request.c					optional isci
 dev/isci/isci_logger.c						optional isci
 dev/isci/isci_oem_parameters.c					optional isci
 dev/isci/isci_remote_device.c					optional isci
 dev/isci/isci_sysctl.c						optional isci
 dev/isci/isci_task_request.c					optional isci
 dev/isci/isci_timer.c						optional isci
 dev/isci/scil/sati.c						optional isci
 dev/isci/scil/sati_abort_task_set.c				optional isci
 dev/isci/scil/sati_atapi.c					optional isci
 dev/isci/scil/sati_device.c					optional isci
 dev/isci/scil/sati_inquiry.c					optional isci
 dev/isci/scil/sati_log_sense.c					optional isci
 dev/isci/scil/sati_lun_reset.c					optional isci
 dev/isci/scil/sati_mode_pages.c					optional isci
 dev/isci/scil/sati_mode_select.c				optional isci
 dev/isci/scil/sati_mode_sense.c					optional isci
 dev/isci/scil/sati_mode_sense_10.c				optional isci
 dev/isci/scil/sati_mode_sense_6.c				optional isci
 dev/isci/scil/sati_move.c					optional isci
 dev/isci/scil/sati_passthrough.c				optional isci
 dev/isci/scil/sati_read.c					optional isci
 dev/isci/scil/sati_read_buffer.c				optional isci
 dev/isci/scil/sati_read_capacity.c				optional isci
 dev/isci/scil/sati_reassign_blocks.c				optional isci
 dev/isci/scil/sati_report_luns.c				optional isci
 dev/isci/scil/sati_request_sense.c				optional isci
 dev/isci/scil/sati_start_stop_unit.c				optional isci
 dev/isci/scil/sati_synchronize_cache.c				optional isci
 dev/isci/scil/sati_test_unit_ready.c				optional isci
 dev/isci/scil/sati_unmap.c					optional isci
 dev/isci/scil/sati_util.c					optional isci
 dev/isci/scil/sati_verify.c					optional isci
 dev/isci/scil/sati_write.c					optional isci
 dev/isci/scil/sati_write_and_verify.c				optional isci
 dev/isci/scil/sati_write_buffer.c				optional isci
 dev/isci/scil/sati_write_long.c					optional isci
 dev/isci/scil/sci_abstract_list.c				optional isci
 dev/isci/scil/sci_base_controller.c				optional isci
 dev/isci/scil/sci_base_domain.c					optional isci
 dev/isci/scil/sci_base_iterator.c				optional isci
 dev/isci/scil/sci_base_library.c				optional isci
 dev/isci/scil/sci_base_logger.c					optional isci
 dev/isci/scil/sci_base_memory_descriptor_list.c			optional isci
 dev/isci/scil/sci_base_memory_descriptor_list_decorator.c	optional isci
 dev/isci/scil/sci_base_object.c					optional isci
 dev/isci/scil/sci_base_observer.c				optional isci
 dev/isci/scil/sci_base_phy.c					optional isci
 dev/isci/scil/sci_base_port.c					optional isci
 dev/isci/scil/sci_base_remote_device.c				optional isci
 dev/isci/scil/sci_base_request.c				optional isci
 dev/isci/scil/sci_base_state_machine.c				optional isci
 dev/isci/scil/sci_base_state_machine_logger.c			optional isci
 dev/isci/scil/sci_base_state_machine_observer.c			optional isci
 dev/isci/scil/sci_base_subject.c				optional isci
 dev/isci/scil/sci_util.c					optional isci
 dev/isci/scil/scic_sds_controller.c				optional isci
 dev/isci/scil/scic_sds_library.c				optional isci
 dev/isci/scil/scic_sds_pci.c					optional isci
 dev/isci/scil/scic_sds_phy.c					optional isci
 dev/isci/scil/scic_sds_port.c					optional isci
 dev/isci/scil/scic_sds_port_configuration_agent.c		optional isci
 dev/isci/scil/scic_sds_remote_device.c				optional isci
 dev/isci/scil/scic_sds_remote_node_context.c			optional isci
 dev/isci/scil/scic_sds_remote_node_table.c			optional isci
 dev/isci/scil/scic_sds_request.c				optional isci
 dev/isci/scil/scic_sds_sgpio.c					optional isci
 dev/isci/scil/scic_sds_smp_remote_device.c			optional isci
 dev/isci/scil/scic_sds_smp_request.c				optional isci
 dev/isci/scil/scic_sds_ssp_request.c				optional isci
 dev/isci/scil/scic_sds_stp_packet_request.c			optional isci
 dev/isci/scil/scic_sds_stp_remote_device.c			optional isci
 dev/isci/scil/scic_sds_stp_request.c				optional isci
 dev/isci/scil/scic_sds_unsolicited_frame_control.c		optional isci
 dev/isci/scil/scif_sas_controller.c				optional isci
 dev/isci/scil/scif_sas_controller_state_handlers.c		optional isci
 dev/isci/scil/scif_sas_controller_states.c			optional isci
 dev/isci/scil/scif_sas_domain.c					optional isci
 dev/isci/scil/scif_sas_domain_state_handlers.c			optional isci
 dev/isci/scil/scif_sas_domain_states.c				optional isci
 dev/isci/scil/scif_sas_high_priority_request_queue.c		optional isci
 dev/isci/scil/scif_sas_internal_io_request.c			optional isci
 dev/isci/scil/scif_sas_io_request.c				optional isci
 dev/isci/scil/scif_sas_io_request_state_handlers.c		optional isci
 dev/isci/scil/scif_sas_io_request_states.c			optional isci
 dev/isci/scil/scif_sas_library.c				optional isci
 dev/isci/scil/scif_sas_remote_device.c				optional isci
 dev/isci/scil/scif_sas_remote_device_ready_substate_handlers.c	optional isci
 dev/isci/scil/scif_sas_remote_device_ready_substates.c		optional isci
 dev/isci/scil/scif_sas_remote_device_starting_substate_handlers.c		optional isci
 dev/isci/scil/scif_sas_remote_device_starting_substates.c	optional isci
 dev/isci/scil/scif_sas_remote_device_state_handlers.c		optional isci
 dev/isci/scil/scif_sas_remote_device_states.c			optional isci
 dev/isci/scil/scif_sas_request.c				optional isci
 dev/isci/scil/scif_sas_smp_activity_clear_affiliation.c		optional isci
 dev/isci/scil/scif_sas_smp_io_request.c				optional isci
 dev/isci/scil/scif_sas_smp_phy.c				optional isci
 dev/isci/scil/scif_sas_smp_remote_device.c			optional isci
 dev/isci/scil/scif_sas_stp_io_request.c				optional isci
 dev/isci/scil/scif_sas_stp_remote_device.c			optional isci
 dev/isci/scil/scif_sas_stp_task_request.c			optional isci
 dev/isci/scil/scif_sas_task_request.c				optional isci
 dev/isci/scil/scif_sas_task_request_state_handlers.c		optional isci
 dev/isci/scil/scif_sas_task_request_states.c			optional isci
 dev/isci/scil/scif_sas_timer.c					optional isci
 dev/virtio/virtio.c			optional	virtio
 dev/virtio/virtqueue.c			optional	virtio
 dev/virtio/virtio_bus_if.m		optional	virtio
 dev/virtio/virtio_if.m			optional	virtio
 dev/virtio/pci/virtio_pci.c		optional	virtio_pci
 dev/virtio/network/if_vtnet.c		optional	vtnet
 dev/virtio/block/virtio_blk.c		optional	virtio_blk
 dev/virtio/balloon/virtio_balloon.c	optional	virtio_balloon
 dev/virtio/scsi/virtio_scsi.c		optional	virtio_scsi
 i386/acpica/acpi_machdep.c	optional acpi
 acpi_wakecode.o			optional acpi				\
 	dependency	"$S/i386/acpica/acpi_wakecode.S assym.s"	\
 	compile-with	"${NORMAL_S}"					\
 	no-obj no-implicit-rule before-depend				\
 	clean		"acpi_wakecode.o"
 acpi_wakecode.bin		optional acpi				\
 	dependency	"acpi_wakecode.o"				\
 	compile-with	"${OBJCOPY} -S -O binary acpi_wakecode.o ${.TARGET}" \
 	no-obj no-implicit-rule	before-depend				\
 	clean		"acpi_wakecode.bin"
 acpi_wakecode.h			optional acpi				\
 	dependency	"acpi_wakecode.bin"				\
 	compile-with	"file2c -sx 'static char wakecode[] = {' '};' < acpi_wakecode.bin > ${.TARGET}" \
 	no-obj no-implicit-rule	before-depend				\
 	clean		"acpi_wakecode.h"
 acpi_wakedata.h			optional acpi				\
 	dependency	"acpi_wakecode.o"				\
 	compile-with	'${NM} -n --defined-only acpi_wakecode.o | while read offset dummy what; do echo "#define	$${what}	0x$${offset}"; done > ${.TARGET}' \
 	no-obj no-implicit-rule	before-depend				\
 	clean		"acpi_wakedata.h"
 #
 i386/bios/apm.c			optional apm
 i386/bios/mca_machdep.c		optional mca
 i386/bios/smapi.c		optional smapi
 i386/bios/smapi_bios.S		optional smapi
 #i386/i386/apic_vector.s		optional apic
 i386/i386/atomic.c		standard		\
 	compile-with	"${CC} -c ${CFLAGS} ${DEFINED_PROF:S/^$/-fomit-frame-pointer/} ${.IMPSRC}"
 i386/i386/autoconf.c		standard
 i386/i386/bios.c		optional native
 i386/i386/bioscall.s		optional native
 i386/i386/bpf_jit_machdep.c	optional bpf_jitter
 i386/i386/db_disasm.c		optional ddb
 i386/i386/db_interface.c	optional ddb
 i386/i386/db_trace.c		optional ddb
 i386/i386/elan-mmcr.c		optional cpu_elan | cpu_soekris
 i386/i386/elf_machdep.c		standard
 i386/i386/exception.s		optional native
 i386/xen/exception.s		optional xen
 i386/i386/gdb_machdep.c		optional gdb
 i386/i386/geode.c		optional cpu_geode
 i386/i386/i686_mem.c		optional mem
 i386/i386/identcpu.c		standard
 i386/i386/in_cksum.c		optional inet | inet6
 i386/i386/initcpu.c		standard
 i386/i386/io.c			optional io
 i386/i386/k6_mem.c		optional mem
 i386/i386/locore.s		optional native	no-obj
 i386/xen/locore.s		optional xen	no-obj
 i386/i386/longrun.c		optional cpu_enable_longrun
 i386/i386/machdep.c		standard
 i386/xen/xen_machdep.c		optional xen
 i386/i386/mem.c			optional mem
 i386/i386/minidump_machdep.c	standard
 i386/i386/mp_clock.c		optional smp
 i386/i386/mp_machdep.c		optional native smp
 i386/xen/mp_machdep.c		optional xen smp
 i386/i386/mp_watchdog.c		optional mp_watchdog smp
 i386/i386/mpboot.s		optional smp native
 i386/xen/mptable.c		optional apic xen
 i386/i386/perfmon.c		optional perfmon
 i386/i386/pmap.c		optional native
 i386/xen/pmap.c			optional xen
 i386/i386/ptrace_machdep.c	standard
 i386/i386/stack_machdep.c	optional ddb | stack
 i386/i386/support.s		standard
 i386/i386/swtch.s		standard
 i386/i386/sys_machdep.c		standard
 i386/i386/trap.c		standard
 i386/i386/uio_machdep.c		standard
 i386/i386/vm86.c		standard
 i386/i386/vm_machdep.c		standard
 i386/ibcs2/ibcs2_errno.c	optional ibcs2
 i386/ibcs2/ibcs2_fcntl.c	optional ibcs2
 i386/ibcs2/ibcs2_ioctl.c	optional ibcs2
 i386/ibcs2/ibcs2_ipc.c		optional ibcs2
 i386/ibcs2/ibcs2_isc.c		optional ibcs2
 i386/ibcs2/ibcs2_isc_sysent.c	optional ibcs2
 i386/ibcs2/ibcs2_misc.c		optional ibcs2
 i386/ibcs2/ibcs2_msg.c		optional ibcs2
 i386/ibcs2/ibcs2_other.c	optional ibcs2
 i386/ibcs2/ibcs2_signal.c	optional ibcs2
 i386/ibcs2/ibcs2_socksys.c	optional ibcs2
 i386/ibcs2/ibcs2_stat.c		optional ibcs2
 i386/ibcs2/ibcs2_sysent.c	optional ibcs2
 i386/ibcs2/ibcs2_sysi86.c	optional ibcs2
 i386/ibcs2/ibcs2_sysvec.c	optional ibcs2
 i386/ibcs2/ibcs2_util.c		optional ibcs2
 i386/ibcs2/ibcs2_xenix.c	optional ibcs2
 i386/ibcs2/ibcs2_xenix_sysent.c	optional ibcs2
 i386/ibcs2/imgact_coff.c	optional ibcs2
 i386/xen/clock.c		optional xen
 i386/xen/xen_clock_util.c	optional xen
 i386/xen/xen_rtc.c		optional xen
 i386/isa/elink.c		optional ep | ie
 i386/isa/npx.c			optional npx
 i386/isa/pmtimer.c		optional pmtimer
 i386/isa/prof_machdep.c		optional profiling-routine
 i386/isa/spic.c			optional spic
 i386/linux/imgact_linux.c	optional compat_linux
 i386/linux/linux_dummy.c	optional compat_linux
 i386/linux/linux_locore.s	optional compat_linux	\
 	dependency 	"linux_assym.h"
 i386/linux/linux_machdep.c	optional compat_linux
 i386/linux/linux_ptrace.c	optional compat_linux
 i386/linux/linux_support.s	optional compat_linux	\
 	dependency 	"linux_assym.h"
 i386/linux/linux_sysent.c	optional compat_linux
 i386/linux/linux_sysvec.c	optional compat_linux
 i386/pci/pci_cfgreg.c		optional pci
 i386/pci/pci_pir.c		optional pci
 i386/svr4/svr4_locore.s		optional compat_svr4	\
 	dependency	"svr4_assym.h"	\
 	warning "COMPAT_SVR4 is broken and should be avoided"
 i386/svr4/svr4_machdep.c	optional compat_svr4
 #
 isa/syscons_isa.c		optional sc
 isa/vga_isa.c			optional vga
 kern/kern_clocksource.c		standard
 kern/imgact_aout.c		optional compat_aout
 kern/imgact_gzip.c		optional gzip
 libkern/divdi3.c		standard
 libkern/ffsl.c			standard
 libkern/flsl.c			standard
 libkern/memmove.c		standard
 libkern/memset.c		standard
 libkern/moddi3.c		standard
 libkern/qdivrem.c		standard
 libkern/ucmpdi2.c		standard
 libkern/udivdi3.c		standard
 libkern/umoddi3.c		standard
 i386/xbox/xbox.c		optional xbox
 i386/xbox/xboxfb.c		optional xboxfb
 dev/fb/boot_font.c		optional xboxfb
 i386/xbox/pic16l.s		optional xbox
 #
 # x86 real mode BIOS support, required by atkbdc/dpms/vesa
 #
 compat/x86bios/x86bios.c	optional x86bios | atkbd | dpms | vesa
 #
 # x86 shared code between IA32, AMD64 and PC98 architectures
 #
 x86/acpica/OsdEnvironment.c	optional acpi
 x86/acpica/acpi_apm.c		optional acpi
 x86/acpica/acpi_wakeup.c	optional acpi
 x86/acpica/madt.c		optional acpi apic
 x86/acpica/srat.c		optional acpi
 x86/bios/smbios.c		optional smbios
 x86/bios/vpd.c			optional vpd
 x86/cpufreq/est.c		optional cpufreq
 x86/cpufreq/hwpstate.c		optional cpufreq
 x86/cpufreq/p4tcc.c		optional cpufreq
 x86/cpufreq/powernow.c		optional cpufreq
 x86/cpufreq/smist.c		optional cpufreq
 x86/isa/atpic.c			optional atpic
 x86/isa/atrtc.c			optional native
 x86/isa/clock.c			optional native
 x86/isa/elcr.c			optional atpic | apic native
 x86/isa/isa.c			optional isa
 x86/isa/isa_dma.c		optional isa
 x86/isa/nmi.c			standard
 x86/isa/orm.c			optional isa
 x86/pci/pci_bus.c		optional pci
 x86/pci/qpi.c			optional pci
 x86/x86/busdma_machdep.c	standard
 x86/x86/dump_machdep.c		standard
 x86/x86/fdt_machdep.c		optional fdt
 x86/x86/intr_machdep.c		standard
 x86/x86/io_apic.c		optional apic
 x86/x86/legacy.c		optional native
 x86/x86/local_apic.c		optional apic
 x86/x86/mca.c			standard
 x86/x86/mptable.c		optional apic native
 x86/x86/mptable_pci.c		optional apic native pci
 x86/x86/msi.c			optional apic pci
 x86/x86/nexus.c			standard
 x86/x86/tsc.c			standard
+x86/xen/hvm.c			optional xenhvm
+x86/xen/xen_intr.c		optional xen | xenhvm
diff --git a/sys/dev/xen/balloon/balloon.c b/sys/dev/xen/balloon/balloon.c
index d4f94ab9ee00..9021abb1172d 100644
--- a/sys/dev/xen/balloon/balloon.c
+++ b/sys/dev/xen/balloon/balloon.c
@@ -1,571 +1,572 @@
 /******************************************************************************
  * balloon.c
  *
  * Xen balloon driver - enables returning/claiming memory to/from Xen.
  *
  * Copyright (c) 2003, B Dragovic
  * Copyright (c) 2003-2004, M Williamson, K Fraser
  * Copyright (c) 2005 Dan M. Smith, IBM Corporation
  * 
  * This file may be distributed separately from the Linux kernel, or
  * incorporated into other software packages, subject to the following license:
  * 
  * Permission is hereby granted, free of charge, to any person obtaining a copy
  * of this source file (the "Software"), to deal in the Software without
  * restriction, including without limitation the rights to use, copy, modify,
  * merge, publish, distribute, sublicense, and/or sell copies of the Software,
  * and to permit persons to whom the Software is furnished to do so, subject to
  * the following conditions:
  * 
  * The above copyright notice and this permission notice shall be included in
  * all copies or substantial portions of the Software.
  * 
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  * IN THE SOFTWARE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/lock.h>
 #include <sys/kernel.h>
 #include <sys/kthread.h>
 #include <sys/malloc.h>
 #include <sys/mutex.h>
 #include <sys/sysctl.h>
 
-#include <machine/xen/xen-os.h>
-#include <machine/xen/xenvar.h>
-#include <machine/xen/xenfunc.h>
+#include <vm/vm.h>
+#include <vm/vm_page.h>
+
+#include <xen/xen-os.h>
 #include <xen/hypervisor.h>
+#include <xen/features.h>
 #include <xen/xenstore/xenstorevar.h>
 
-#include <vm/vm.h>
-#include <vm/vm_page.h>
+#include <machine/xen/xenvar.h>
 
 static MALLOC_DEFINE(M_BALLOON, "Balloon", "Xen Balloon Driver");
 
 struct mtx balloon_mutex;
 
 /*
  * Protects atomic reservation decrease/increase against concurrent increases.
  * Also protects non-atomic updates of current_pages and driver_pages, and
  * balloon lists.
  */
 struct mtx balloon_lock;
 
 /* We increase/decrease in batches which fit in a page */
 static unsigned long frame_list[PAGE_SIZE / sizeof(unsigned long)];
 #define ARRAY_SIZE(A)	(sizeof(A) / sizeof(A[0]))
 
 struct balloon_stats {
 	/* We aim for 'current allocation' == 'target allocation'. */
 	unsigned long current_pages;
 	unsigned long target_pages;
 	/* We may hit the hard limit in Xen. If we do then we remember it. */
 	unsigned long hard_limit;
 	/*
 	 * Drivers may alter the memory reservation independently, but they
 	 * must inform the balloon driver so we avoid hitting the hard limit.
 	 */
 	unsigned long driver_pages;
 	/* Number of pages in high- and low-memory balloons. */
 	unsigned long balloon_low;
 	unsigned long balloon_high;
 };
 
 static struct balloon_stats balloon_stats;
 #define bs balloon_stats
 
 SYSCTL_DECL(_dev_xen);
 static SYSCTL_NODE(_dev_xen, OID_AUTO, balloon, CTLFLAG_RD, NULL, "Balloon");
 SYSCTL_ULONG(_dev_xen_balloon, OID_AUTO, current, CTLFLAG_RD,
     &bs.current_pages, 0, "Current allocation");
 SYSCTL_ULONG(_dev_xen_balloon, OID_AUTO, target, CTLFLAG_RD,
     &bs.target_pages, 0, "Target allocation");
 SYSCTL_ULONG(_dev_xen_balloon, OID_AUTO, driver_pages, CTLFLAG_RD,
     &bs.driver_pages, 0, "Driver pages");
 SYSCTL_ULONG(_dev_xen_balloon, OID_AUTO, hard_limit, CTLFLAG_RD,
     &bs.hard_limit, 0, "Xen hard limit");
 SYSCTL_ULONG(_dev_xen_balloon, OID_AUTO, low_mem, CTLFLAG_RD,
     &bs.balloon_low, 0, "Low-mem balloon");
 SYSCTL_ULONG(_dev_xen_balloon, OID_AUTO, high_mem, CTLFLAG_RD,
     &bs.balloon_high, 0, "High-mem balloon");
 
 struct balloon_entry {
 	vm_page_t page;
 	STAILQ_ENTRY(balloon_entry) list;
 };
 
 /* List of ballooned pages, threaded through the mem_map array. */
 static STAILQ_HEAD(,balloon_entry) ballooned_pages;
 
 /* Main work function, always executed in process context. */
 static void balloon_process(void *unused);
 
 #define IPRINTK(fmt, args...) \
 	printk(KERN_INFO "xen_mem: " fmt, ##args)
 #define WPRINTK(fmt, args...) \
 	printk(KERN_WARNING "xen_mem: " fmt, ##args)
 
 /* balloon_append: add the given page to the balloon. */
 static void 
 balloon_append(vm_page_t page)
 {
 	struct balloon_entry *entry;
 
 	entry = malloc(sizeof(struct balloon_entry), M_BALLOON, M_WAITOK);
 	entry->page = page;
 	STAILQ_INSERT_HEAD(&ballooned_pages, entry, list);
 	bs.balloon_low++;
 }
 
 /* balloon_retrieve: rescue a page from the balloon, if it is not empty. */
 static vm_page_t
 balloon_retrieve(void)
 {
 	vm_page_t page;
 	struct balloon_entry *entry;
 
 	if (STAILQ_EMPTY(&ballooned_pages))
 		return NULL;
 
 	entry = STAILQ_FIRST(&ballooned_pages);
 	STAILQ_REMOVE_HEAD(&ballooned_pages, list);
 
 	page = entry->page;
 	free(entry, M_BALLOON);
 	
 	bs.balloon_low--;
 
 	return page;
 }
 
 static unsigned long 
 current_target(void)
 {
 	unsigned long target = min(bs.target_pages, bs.hard_limit);
 	if (target > (bs.current_pages + bs.balloon_low + bs.balloon_high))
 		target = bs.current_pages + bs.balloon_low + bs.balloon_high;
 	return target;
 }
 
 static unsigned long
 minimum_target(void)
 {
 #ifdef XENHVM
 #define max_pfn physmem
 #else
 #define max_pfn HYPERVISOR_shared_info->arch.max_pfn
 #endif
 	unsigned long min_pages, curr_pages = current_target();
 
 #define MB2PAGES(mb) ((mb) << (20 - PAGE_SHIFT))
 	/* Simple continuous piecewiese linear function:
 	 *  max MiB -> min MiB	gradient
 	 *       0	   0
 	 *      16	  16
 	 *      32	  24
 	 *     128	  72	(1/2)
 	 *     512 	 168	(1/4)
 	 *    2048	 360	(1/8)
 	 *    8192	 552	(1/32)
 	 *   32768	1320
 	 *  131072	4392
 	 */
 	if (max_pfn < MB2PAGES(128))
 		min_pages = MB2PAGES(8) + (max_pfn >> 1);
 	else if (max_pfn < MB2PAGES(512))
 		min_pages = MB2PAGES(40) + (max_pfn >> 2);
 	else if (max_pfn < MB2PAGES(2048))
 		min_pages = MB2PAGES(104) + (max_pfn >> 3);
 	else
 		min_pages = MB2PAGES(296) + (max_pfn >> 5);
 #undef MB2PAGES
 
 	/* Don't enforce growth */
 	return min(min_pages, curr_pages);
 #ifndef CONFIG_XEN
 #undef max_pfn
 #endif
 }
 
 static int 
 increase_reservation(unsigned long nr_pages)
 {
 	unsigned long  pfn, i;
 	struct balloon_entry *entry;
 	vm_page_t      page;
 	long           rc;
 	struct xen_memory_reservation reservation = {
 		.address_bits = 0,
 		.extent_order = 0,
 		.domid        = DOMID_SELF
 	};
 
 	if (nr_pages > ARRAY_SIZE(frame_list))
 		nr_pages = ARRAY_SIZE(frame_list);
 
 	mtx_lock(&balloon_lock);
 
 	for (entry = STAILQ_FIRST(&ballooned_pages), i = 0;
 	     i < nr_pages; i++, entry = STAILQ_NEXT(entry, list)) {
 		KASSERT(entry, ("ballooned_pages list corrupt"));
 		page = entry->page;
 		frame_list[i] = (VM_PAGE_TO_PHYS(page) >> PAGE_SHIFT);
 	}
 
 	set_xen_guest_handle(reservation.extent_start, frame_list);
 	reservation.nr_extents   = nr_pages;
 	rc = HYPERVISOR_memory_op(
 		XENMEM_populate_physmap, &reservation);
 	if (rc < nr_pages) {
 		if (rc > 0) {
 			int ret;
 
 			/* We hit the Xen hard limit: reprobe. */
 			reservation.nr_extents = rc;
 			ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation,
 					&reservation);
 			KASSERT(ret == rc, ("HYPERVISOR_memory_op failed"));
 		}
 		if (rc >= 0)
 			bs.hard_limit = (bs.current_pages + rc -
 					 bs.driver_pages);
 		goto out;
 	}
 
 	for (i = 0; i < nr_pages; i++) {
 		page = balloon_retrieve();
 		KASSERT(page, ("balloon_retrieve failed"));
 
 		pfn = (VM_PAGE_TO_PHYS(page) >> PAGE_SHIFT);
 		KASSERT((xen_feature(XENFEAT_auto_translated_physmap) ||
 			!phys_to_machine_mapping_valid(pfn)),
 		    ("auto translated physmap but mapping is valid"));
 
 		set_phys_to_machine(pfn, frame_list[i]);
 
 #if 0
 #ifndef XENHVM
 		/* Link back into the page tables if not highmem. */
 		if (pfn < max_low_pfn) {
 			int ret;
 			ret = HYPERVISOR_update_va_mapping(
 				(unsigned long)__va(pfn << PAGE_SHIFT),
 				pfn_pte_ma(frame_list[i], PAGE_KERNEL),
 				0);
 			PASSING(ret == 0,
 			    ("HYPERVISOR_update_va_mapping failed"));
 		}
 #endif
 #endif
 
 		/* Relinquish the page back to the allocator. */
 		vm_page_unwire(page, 0);
 		vm_page_free(page);
 	}
 
 	bs.current_pages += nr_pages;
 	//totalram_pages = bs.current_pages;
 
  out:
 	mtx_unlock(&balloon_lock);
 
 	return 0;
 }
 
 static int
 decrease_reservation(unsigned long nr_pages)
 {
 	unsigned long  pfn, i;
 	vm_page_t      page;
 	int            need_sleep = 0;
 	int ret;
 	struct xen_memory_reservation reservation = {
 		.address_bits = 0,
 		.extent_order = 0,
 		.domid        = DOMID_SELF
 	};
 
 	if (nr_pages > ARRAY_SIZE(frame_list))
 		nr_pages = ARRAY_SIZE(frame_list);
 
 	for (i = 0; i < nr_pages; i++) {
 		if ((page = vm_page_alloc(NULL, 0, 
 			    VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | 
 			    VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL) {
 			nr_pages = i;
 			need_sleep = 1;
 			break;
 		}
 
 		pfn = (VM_PAGE_TO_PHYS(page) >> PAGE_SHIFT);
 		frame_list[i] = PFNTOMFN(pfn);
 
 #if 0
 		if (!PageHighMem(page)) {
 			v = phys_to_virt(pfn << PAGE_SHIFT);
 			scrub_pages(v, 1);
 #ifdef CONFIG_XEN
 			ret = HYPERVISOR_update_va_mapping(
 				(unsigned long)v, __pte_ma(0), 0);
 			BUG_ON(ret);
 #endif
 		}
 #endif
 #ifdef CONFIG_XEN_SCRUB_PAGES
 		else {
 			v = kmap(page);
 			scrub_pages(v, 1);
 			kunmap(page);
 		}
 #endif
 	}
 
 #ifdef CONFIG_XEN
 	/* Ensure that ballooned highmem pages don't have kmaps. */
 	kmap_flush_unused();
 	flush_tlb_all();
 #endif
 
 	mtx_lock(&balloon_lock);
 
 	/* No more mappings: invalidate P2M and add to balloon. */
 	for (i = 0; i < nr_pages; i++) {
 		pfn = MFNTOPFN(frame_list[i]);
 		set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
 		balloon_append(PHYS_TO_VM_PAGE(pfn << PAGE_SHIFT));
 	}
 
 	set_xen_guest_handle(reservation.extent_start, frame_list);
 	reservation.nr_extents   = nr_pages;
 	ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation);
 	KASSERT(ret == nr_pages, ("HYPERVISOR_memory_op failed"));
 
 	bs.current_pages -= nr_pages;
 	//totalram_pages = bs.current_pages;
 
 	mtx_unlock(&balloon_lock);
 
 	return (need_sleep);
 }
 
 /*
  * We avoid multiple worker processes conflicting via the balloon mutex.
  * We may of course race updates of the target counts (which are protected
  * by the balloon lock), or with changes to the Xen hard limit, but we will
  * recover from these in time.
  */
 static void 
 balloon_process(void *unused)
 {
 	int need_sleep = 0;
 	long credit;
 	
 	mtx_lock(&balloon_mutex);
 	for (;;) {
 		int sleep_time;
 
 		do {
 			credit = current_target() - bs.current_pages;
 			if (credit > 0)
 				need_sleep = (increase_reservation(credit) != 0);
 			if (credit < 0)
 				need_sleep = (decrease_reservation(-credit) != 0);
 			
 		} while ((credit != 0) && !need_sleep);
 		
 		/* Schedule more work if there is some still to be done. */
 		if (current_target() != bs.current_pages)
 			sleep_time = hz;
 		else
 			sleep_time = 0;
 
 		msleep(balloon_process, &balloon_mutex, 0, "balloon",
 		       sleep_time);
 	}
 	mtx_unlock(&balloon_mutex);
 }
 
 /* Resets the Xen limit, sets new target, and kicks off processing. */
 static void 
 set_new_target(unsigned long target)
 {
 	/* No need for lock. Not read-modify-write updates. */
 	bs.hard_limit   = ~0UL;
 	bs.target_pages = max(target, minimum_target());
 	wakeup(balloon_process);
 }
 
 static struct xs_watch target_watch =
 {
 	.node = "memory/target"
 };
 
 /* React to a change in the target key */
 static void 
 watch_target(struct xs_watch *watch,
 	     const char **vec, unsigned int len)
 {
 	unsigned long long new_target;
 	int err;
 
 	err = xs_scanf(XST_NIL, "memory", "target", NULL,
 	    "%llu", &new_target);
 	if (err) {
 		/* This is ok (for domain0 at least) - so just return */
 		return;
 	} 
         
 	/* The given memory/target value is in KiB, so it needs converting to
 	   pages.  PAGE_SHIFT converts bytes to pages, hence PAGE_SHIFT - 10.
 	*/
 	set_new_target(new_target >> (PAGE_SHIFT - 10));
     
 }
 
 static void 
 balloon_init_watcher(void *arg)
 {
 	int err;
 
 	if (!is_running_on_xen())
 		return;
 
 	err = xs_register_watch(&target_watch);
 	if (err)
 		printf("Failed to set balloon watcher\n");
 
 }
 SYSINIT(balloon_init_watcher, SI_SUB_PSEUDO, SI_ORDER_ANY,
     balloon_init_watcher, NULL);
 
 static void 
 balloon_init(void *arg)
 {
 #ifndef XENHVM
 	vm_page_t page;
 	unsigned long pfn;
 
 #define max_pfn HYPERVISOR_shared_info->arch.max_pfn
 #endif
 
 	if (!is_running_on_xen())
 		return;
 
 	mtx_init(&balloon_lock, "balloon_lock", NULL, MTX_DEF);
 	mtx_init(&balloon_mutex, "balloon_mutex", NULL, MTX_DEF);
 
 #ifndef XENHVM
 	bs.current_pages = min(xen_start_info->nr_pages, max_pfn);
 #else
 	bs.current_pages = physmem;
 #endif
 	bs.target_pages  = bs.current_pages;
 	bs.balloon_low   = 0;
 	bs.balloon_high  = 0;
 	bs.driver_pages  = 0UL;
 	bs.hard_limit    = ~0UL;
 
 	kproc_create(balloon_process, NULL, NULL, 0, 0, "balloon");
     
 #ifndef XENHVM
 	/* Initialise the balloon with excess memory space. */
 	for (pfn = xen_start_info->nr_pages; pfn < max_pfn; pfn++) {
 		page = PHYS_TO_VM_PAGE(pfn << PAGE_SHIFT);
 		balloon_append(page);
 	}
 #undef max_pfn
 #endif
 
 	target_watch.callback = watch_target;
     
 	return;
 }
 SYSINIT(balloon_init, SI_SUB_PSEUDO, SI_ORDER_ANY, balloon_init, NULL);
 
 void balloon_update_driver_allowance(long delta);
 
 void 
 balloon_update_driver_allowance(long delta)
 {
 	mtx_lock(&balloon_lock);
 	bs.driver_pages += delta;
 	mtx_unlock(&balloon_lock);
 }
 
 #if 0
 static int dealloc_pte_fn(
 	pte_t *pte, struct page *pte_page, unsigned long addr, void *data)
 {
 	unsigned long mfn = pte_mfn(*pte);
 	int ret;
 	struct xen_memory_reservation reservation = {
 		.extent_start = &mfn,
 		.nr_extents   = 1,
 		.extent_order = 0,
 		.domid        = DOMID_SELF
 	};
 	set_pte_at(&init_mm, addr, pte, __pte_ma(0));
 	set_phys_to_machine(__pa(addr) >> PAGE_SHIFT, INVALID_P2M_ENTRY);
 	ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation);
 	KASSERT(ret == 1, ("HYPERVISOR_memory_op failed"));
 	return 0;
 }
 
 #endif
 
 #if 0
 vm_page_t
 balloon_alloc_empty_page_range(unsigned long nr_pages)
 {
 	vm_page_t pages;
 	int i, rc;
 	unsigned long *mfn_list;
 	struct xen_memory_reservation reservation = {
 		.address_bits = 0,
 		.extent_order = 0,
 		.domid        = DOMID_SELF
 	};
 
 	pages = vm_page_alloc_contig(nr_pages, 0, -1, 4, 4)
 	if (pages == NULL)
 		return NULL;
 	
 	mfn_list = malloc(nr_pages*sizeof(unsigned long), M_DEVBUF, M_WAITOK);
 	
 	for (i = 0; i < nr_pages; i++) {
 		mfn_list[i] = PFNTOMFN(VM_PAGE_TO_PHYS(pages[i]) >> PAGE_SHIFT);
 		PFNTOMFN(i) = INVALID_P2M_ENTRY;
 		reservation.extent_start = mfn_list;
 		reservation.nr_extents = nr_pages;
 		rc = HYPERVISOR_memory_op(XENMEM_decrease_reservation,
 		    &reservation);
 		KASSERT(rc == nr_pages, ("HYPERVISOR_memory_op failed"));
 	}
 
 	current_pages -= nr_pages;
 
 	wakeup(balloon_process);
 
 	return pages;
 }
 
 void 
 balloon_dealloc_empty_page_range(vm_page_t page, unsigned long nr_pages)
 {
 	unsigned long i;
 
 	for (i = 0; i < nr_pages; i++)
 		balloon_append(page + i);
 
 	wakeup(balloon_process);
 }
 #endif
diff --git a/sys/dev/xen/blkback/blkback.c b/sys/dev/xen/blkback/blkback.c
index 4208702c4016..33f6fafc6d27 100644
--- a/sys/dev/xen/blkback/blkback.c
+++ b/sys/dev/xen/blkback/blkback.c
@@ -1,3981 +1,3981 @@
 /*-
  * Copyright (c) 2009-2011 Spectra Logic Corporation
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions, and the following disclaimer,
  *    without modification.
  * 2. Redistributions in binary form must reproduce at minimum a disclaimer
  *    substantially similar to the "NO WARRANTY" disclaimer below
  *    ("Disclaimer") and any redistribution must be conditioned upon
  *    including a substantially similar Disclaimer requirement for further
  *    binary redistribution.
  *
  * NO WARRANTY
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  * POSSIBILITY OF SUCH DAMAGES.
  *
  * Authors: Justin T. Gibbs     (Spectra Logic Corporation)
  *          Ken Merry           (Spectra Logic Corporation)
  */
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 /**
  * \file blkback.c
  *
  * \brief Device driver supporting the vending of block storage from
  *        a FreeBSD domain to other domains.
  */
 
 #include "opt_kdtrace.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/malloc.h>
 
 #include <sys/bio.h>
 #include <sys/bus.h>
 #include <sys/conf.h>
 #include <sys/devicestat.h>
 #include <sys/disk.h>
 #include <sys/fcntl.h>
 #include <sys/filedesc.h>
 #include <sys/kdb.h>
 #include <sys/module.h>
 #include <sys/namei.h>
 #include <sys/proc.h>
 #include <sys/rman.h>
 #include <sys/taskqueue.h>
 #include <sys/types.h>
 #include <sys/vnode.h>
 #include <sys/mount.h>
 #include <sys/sysctl.h>
 #include <sys/bitstring.h>
 #include <sys/sdt.h>
 
 #include <geom/geom.h>
 
 #include <machine/_inttypes.h>
-#include <machine/xen/xen-os.h>
 
 #include <vm/vm.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_kern.h>
 
+#include <xen/xen-os.h>
 #include <xen/blkif.h>
-#include <xen/evtchn.h>
 #include <xen/gnttab.h>
 #include <xen/xen_intr.h>
 
 #include <xen/interface/event_channel.h>
 #include <xen/interface/grant_table.h>
 
 #include <xen/xenbus/xenbusvar.h>
 
 /*--------------------------- Compile-time Tunables --------------------------*/
 /**
  * The maximum number of outstanding request blocks (request headers plus
  * additional segment blocks) we will allow in a negotiated block-front/back
  * communication channel.
  */
 #define	XBB_MAX_REQUESTS	256
 
 /**
  * \brief Define to force all I/O to be performed on memory owned by the
  *        backend device, with a copy-in/out to the remote domain's memory.
  *
  * \note  This option is currently required when this driver's domain is
  *        operating in HVM mode on a system using an IOMMU.
  *
  * This driver uses Xen's grant table API to gain access to the memory of
  * the remote domains it serves.  When our domain is operating in PV mode,
  * the grant table mechanism directly updates our domain's page table entries
  * to point to the physical pages of the remote domain.  This scheme guarantees
  * that blkback and the backing devices it uses can safely perform DMA
  * operations to satisfy requests.  In HVM mode, Xen may use a HW IOMMU to
  * insure that our domain cannot DMA to pages owned by another domain.  As
  * of Xen 4.0, IOMMU mappings for HVM guests are not updated via the grant
  * table API.  For this reason, in HVM mode, we must bounce all requests into
  * memory that is mapped into our domain at domain startup and thus has
  * valid IOMMU mappings.
  */
 #define XBB_USE_BOUNCE_BUFFERS
 
 /**
  * \brief Define to enable rudimentary request logging to the console.
  */
 #undef XBB_DEBUG
 
 /*---------------------------------- Macros ----------------------------------*/
 /**
  * Custom malloc type for all driver allocations.
  */
 static MALLOC_DEFINE(M_XENBLOCKBACK, "xbbd", "Xen Block Back Driver Data");
 
 #ifdef XBB_DEBUG
 #define DPRINTF(fmt, args...)					\
     printf("xbb(%s:%d): " fmt, __FUNCTION__, __LINE__, ##args)
 #else
 #define DPRINTF(fmt, args...) do {} while(0)
 #endif
 
 /**
  * The maximum mapped region size per request we will allow in a negotiated
  * block-front/back communication channel.
  */
 #define	XBB_MAX_REQUEST_SIZE					\
 	MIN(MAXPHYS, BLKIF_MAX_SEGMENTS_PER_REQUEST * PAGE_SIZE)
 
 /**
  * The maximum number of segments (within a request header and accompanying
  * segment blocks) per request we will allow in a negotiated block-front/back
  * communication channel.
  */
 #define	XBB_MAX_SEGMENTS_PER_REQUEST				\
 	(MIN(UIO_MAXIOV,					\
 	     MIN(BLKIF_MAX_SEGMENTS_PER_REQUEST,		\
 		 (XBB_MAX_REQUEST_SIZE / PAGE_SIZE) + 1)))
 
 /**
  * The maximum number of shared memory ring pages we will allow in a
  * negotiated block-front/back communication channel.  Allow enough
  * ring space for all requests to be XBB_MAX_REQUEST_SIZE'd.
  */
 #define	XBB_MAX_RING_PAGES						    \
 	BLKIF_RING_PAGES(BLKIF_SEGS_TO_BLOCKS(XBB_MAX_SEGMENTS_PER_REQUEST) \
 		       * XBB_MAX_REQUESTS)
 /**
  * The maximum number of ring pages that we can allow per request list.
  * We limit this to the maximum number of segments per request, because
  * that is already a reasonable number of segments to aggregate.  This
  * number should never be smaller than XBB_MAX_SEGMENTS_PER_REQUEST,
  * because that would leave situations where we can't dispatch even one
  * large request.
  */
 #define	XBB_MAX_SEGMENTS_PER_REQLIST XBB_MAX_SEGMENTS_PER_REQUEST
 
 /*--------------------------- Forward Declarations ---------------------------*/
 struct xbb_softc;
 struct xbb_xen_req;
 
 static void xbb_attach_failed(struct xbb_softc *xbb, int err, const char *fmt,
 			      ...) __attribute__((format(printf, 3, 4)));
 static int  xbb_shutdown(struct xbb_softc *xbb);
 static int  xbb_detach(device_t dev);
 
 /*------------------------------ Data Structures -----------------------------*/
 
 STAILQ_HEAD(xbb_xen_req_list, xbb_xen_req);
 
 typedef enum {
 	XBB_REQLIST_NONE	= 0x00,
 	XBB_REQLIST_MAPPED	= 0x01
 } xbb_reqlist_flags;
 
 struct xbb_xen_reqlist {
 	/**
 	 * Back reference to the parent block back instance for this
 	 * request.  Used during bio_done handling.
 	 */
 	struct xbb_softc        *xbb;
 
 	/**
 	 * BLKIF_OP code for this request.
 	 */
 	int			 operation;
 
 	/**
 	 * Set to BLKIF_RSP_* to indicate request status.
 	 *
 	 * This field allows an error status to be recorded even if the
 	 * delivery of this status must be deferred.  Deferred reporting
 	 * is necessary, for example, when an error is detected during
 	 * completion processing of one bio when other bios for this
 	 * request are still outstanding.
 	 */
 	int			 status;
 
 	/**
 	 * Number of 512 byte sectors not transferred.
 	 */
 	int			 residual_512b_sectors;
 
 	/**
 	 * Starting sector number of the first request in the list.
 	 */
 	off_t			 starting_sector_number;
 
 	/**
 	 * If we're going to coalesce, the next contiguous sector would be
 	 * this one.
 	 */
 	off_t			 next_contig_sector;
 
 	/**
 	 * Number of child requests in the list.
 	 */
 	int			 num_children;
 
 	/**
 	 * Number of I/O requests dispatched to the backend.
 	 */
 	int			 pendcnt;
 
 	/**
 	 * Total number of segments for requests in the list.
 	 */
 	int			 nr_segments;
 
 	/**
 	 * Flags for this particular request list.
 	 */
 	xbb_reqlist_flags	 flags;
 
 	/**
 	 * Kernel virtual address space reserved for this request
 	 * list structure and used to map the remote domain's pages for
 	 * this I/O, into our domain's address space.
 	 */
 	uint8_t			*kva;
 
 	/**
 	 * Base, psuedo-physical address, corresponding to the start
 	 * of this request's kva region.
 	 */
 	uint64_t	 	 gnt_base;
 
 
 #ifdef XBB_USE_BOUNCE_BUFFERS
 	/**
 	 * Pre-allocated domain local memory used to proxy remote
 	 * domain memory during I/O operations.
 	 */
 	uint8_t			*bounce;
 #endif
 
 	/**
 	 * Array of grant handles (one per page) used to map this request.
 	 */
 	grant_handle_t		*gnt_handles;
 
 	/**
 	 * Device statistics request ordering type (ordered or simple).
 	 */
 	devstat_tag_type	 ds_tag_type;
 
 	/**
 	 * Device statistics request type (read, write, no_data).
 	 */
 	devstat_trans_flags	 ds_trans_type;
 
 	/**
 	 * The start time for this request.
 	 */
 	struct bintime		 ds_t0;
 
 	/**
 	 * Linked list of contiguous requests with the same operation type.
 	 */
 	struct xbb_xen_req_list	 contig_req_list;
 
 	/**
 	 * Linked list links used to aggregate idle requests in the
 	 * request list free pool (xbb->reqlist_free_stailq) and pending
 	 * requests waiting for execution (xbb->reqlist_pending_stailq).
 	 */
 	STAILQ_ENTRY(xbb_xen_reqlist) links;
 };
 
 STAILQ_HEAD(xbb_xen_reqlist_list, xbb_xen_reqlist);
 
 /**
  * \brief Object tracking an in-flight I/O from a Xen VBD consumer.
  */
 struct xbb_xen_req {
 	/**
 	 * Linked list links used to aggregate requests into a reqlist
 	 * and to store them in the request free pool.
 	 */
 	STAILQ_ENTRY(xbb_xen_req) links;
 
 	/**
 	 * The remote domain's identifier for this I/O request.
 	 */
 	uint64_t		  id;
 
 	/**
 	 * The number of pages currently mapped for this request.
 	 */
 	int			  nr_pages;
 
 	/**
 	 * The number of 512 byte sectors comprising this requests.
 	 */
 	int			  nr_512b_sectors;
 
 	/**
 	 * The number of struct bio requests still outstanding for this
 	 * request on the backend device.  This field is only used for	
 	 * device (rather than file) backed I/O.
 	 */
 	int			  pendcnt;
 
 	/**
 	 * BLKIF_OP code for this request.
 	 */
 	int			  operation;
 
 	/**
 	 * Storage used for non-native ring requests.
 	 */
 	blkif_request_t		 ring_req_storage;
 
 	/**
 	 * Pointer to the Xen request in the ring.
 	 */
 	blkif_request_t		*ring_req;
 
 	/**
 	 * Consumer index for this request.
 	 */
 	RING_IDX		 req_ring_idx;
 
 	/**
 	 * The start time for this request.
 	 */
 	struct bintime		 ds_t0;
 
 	/**
 	 * Pointer back to our parent request list.
 	 */
 	struct xbb_xen_reqlist  *reqlist;
 };
 SLIST_HEAD(xbb_xen_req_slist, xbb_xen_req);
 
 /**
  * \brief Configuration data for the shared memory request ring
  *        used to communicate with the front-end client of this
  *        this driver.
  */
 struct xbb_ring_config {
 	/** KVA address where ring memory is mapped. */
 	vm_offset_t	va;
 
 	/** The pseudo-physical address where ring memory is mapped.*/
 	uint64_t	gnt_addr;
 
 	/**
 	 * Grant table handles, one per-ring page, returned by the
 	 * hyperpervisor upon mapping of the ring and required to
 	 * unmap it when a connection is torn down.
 	 */
 	grant_handle_t	handle[XBB_MAX_RING_PAGES];
 
 	/**
 	 * The device bus address returned by the hypervisor when
 	 * mapping the ring and required to unmap it when a connection
 	 * is torn down.
 	 */
 	uint64_t	bus_addr[XBB_MAX_RING_PAGES];
 
 	/** The number of ring pages mapped for the current connection. */
 	u_int		ring_pages;
 
 	/**
 	 * The grant references, one per-ring page, supplied by the
 	 * front-end, allowing us to reference the ring pages in the
 	 * front-end's domain and to map these pages into our own domain.
 	 */
 	grant_ref_t	ring_ref[XBB_MAX_RING_PAGES];
 
 	/** The interrupt driven even channel used to signal ring events. */
 	evtchn_port_t   evtchn;
 };
 
 /**
  * Per-instance connection state flags.
  */
 typedef enum
 {
 	/**
 	 * The front-end requested a read-only mount of the
 	 * back-end device/file.
 	 */
 	XBBF_READ_ONLY         = 0x01,
 
 	/** Communication with the front-end has been established. */
 	XBBF_RING_CONNECTED    = 0x02,
 
 	/**
 	 * Front-end requests exist in the ring and are waiting for
 	 * xbb_xen_req objects to free up.
 	 */
 	XBBF_RESOURCE_SHORTAGE = 0x04,
 
 	/** Connection teardown in progress. */
 	XBBF_SHUTDOWN          = 0x08,
 
 	/** A thread is already performing shutdown processing. */
 	XBBF_IN_SHUTDOWN       = 0x10
 } xbb_flag_t;
 
 /** Backend device type.  */
 typedef enum {
 	/** Backend type unknown. */
 	XBB_TYPE_NONE		= 0x00,
 
 	/**
 	 * Backend type disk (access via cdev switch
 	 * strategy routine).
 	 */
 	XBB_TYPE_DISK		= 0x01,
 
 	/** Backend type file (access vnode operations.). */
 	XBB_TYPE_FILE		= 0x02
 } xbb_type;
 
 /**
  * \brief Structure used to memoize information about a per-request
  *        scatter-gather list.
  *
  * The chief benefit of using this data structure is it avoids having
  * to reparse the possibly discontiguous S/G list in the original
  * request.  Due to the way that the mapping of the memory backing an
  * I/O transaction is handled by Xen, a second pass is unavoidable.
  * At least this way the second walk is a simple array traversal.
  *
  * \note A single Scatter/Gather element in the block interface covers
  *       at most 1 machine page.  In this context a sector (blkif
  *       nomenclature, not what I'd choose) is a 512b aligned unit
  *       of mapping within the machine page referenced by an S/G
  *       element.
  */
 struct xbb_sg {
 	/** The number of 512b data chunks mapped in this S/G element. */
 	int16_t nsect;
 
 	/**
 	 * The index (0 based) of the first 512b data chunk mapped
 	 * in this S/G element.
 	 */
 	uint8_t first_sect;
 
 	/**
 	 * The index (0 based) of the last 512b data chunk mapped
 	 * in this S/G element.
 	 */
 	uint8_t last_sect;
 };
 
 /**
  * Character device backend specific configuration data.
  */
 struct xbb_dev_data {
 	/** Cdev used for device backend access.  */
 	struct cdev   *cdev;
 
 	/** Cdev switch used for device backend access.  */
 	struct cdevsw *csw;
 
 	/** Used to hold a reference on opened cdev backend devices. */
 	int	       dev_ref;
 };
 
 /**
  * File backend specific configuration data.
  */
 struct xbb_file_data {
 	/** Credentials to use for vnode backed (file based) I/O. */
 	struct ucred   *cred;
 
 	/**
 	 * \brief Array of io vectors used to process file based I/O.
 	 *
 	 * Only a single file based request is outstanding per-xbb instance,
 	 * so we only need one of these.
 	 */
 	struct iovec	xiovecs[XBB_MAX_SEGMENTS_PER_REQLIST];
 #ifdef XBB_USE_BOUNCE_BUFFERS
 
 	/**
 	 * \brief Array of io vectors used to handle bouncing of file reads.
 	 *
 	 * Vnode operations are free to modify uio data during their
 	 * exectuion.  In the case of a read with bounce buffering active,
 	 * we need some of the data from the original uio in order to
 	 * bounce-out the read data.  This array serves as the temporary
 	 * storage for this saved data.
 	 */
 	struct iovec	saved_xiovecs[XBB_MAX_SEGMENTS_PER_REQLIST];
 
 	/**
 	 * \brief Array of memoized bounce buffer kva offsets used
 	 *        in the file based backend.
 	 *
 	 * Due to the way that the mapping of the memory backing an
 	 * I/O transaction is handled by Xen, a second pass through
 	 * the request sg elements is unavoidable. We memoize the computed
 	 * bounce address here to reduce the cost of the second walk.
 	 */
 	void		*xiovecs_vaddr[XBB_MAX_SEGMENTS_PER_REQLIST];
 #endif /* XBB_USE_BOUNCE_BUFFERS */
 };
 
 /**
  * Collection of backend type specific data.
  */
 union xbb_backend_data {
 	struct xbb_dev_data  dev;
 	struct xbb_file_data file;
 };
 
 /**
  * Function signature of backend specific I/O handlers.
  */
 typedef int (*xbb_dispatch_t)(struct xbb_softc *xbb,
 			      struct xbb_xen_reqlist *reqlist, int operation,
 			      int flags);
 
 /**
  * Per-instance configuration data.
  */
 struct xbb_softc {
 
 	/**
 	 * Task-queue used to process I/O requests.
 	 */
 	struct taskqueue	 *io_taskqueue;
 
 	/**
 	 * Single "run the request queue" task enqueued
 	 * on io_taskqueue.
 	 */
 	struct task		  io_task;
 
 	/** Device type for this instance. */
 	xbb_type		  device_type;
 
 	/** NewBus device corresponding to this instance. */
 	device_t		  dev;
 
 	/** Backend specific dispatch routine for this instance. */
 	xbb_dispatch_t		  dispatch_io;
 
 	/** The number of requests outstanding on the backend device/file. */
 	int			  active_request_count;
 
 	/** Free pool of request tracking structures. */
 	struct xbb_xen_req_list   request_free_stailq;
 
 	/** Array, sized at connection time, of request tracking structures. */
 	struct xbb_xen_req	 *requests;
 
 	/** Free pool of request list structures. */
 	struct xbb_xen_reqlist_list reqlist_free_stailq;
 
 	/** List of pending request lists awaiting execution. */
 	struct xbb_xen_reqlist_list reqlist_pending_stailq;
 
 	/** Array, sized at connection time, of request list structures. */
 	struct xbb_xen_reqlist	 *request_lists;
 
 	/**
 	 * Global pool of kva used for mapping remote domain ring
 	 * and I/O transaction data.
 	 */
 	vm_offset_t		  kva;
 
 	/** Psuedo-physical address corresponding to kva. */
 	uint64_t		  gnt_base_addr;
 
 	/** The size of the global kva pool. */
 	int			  kva_size;
 
 	/** The size of the KVA area used for request lists. */
 	int			  reqlist_kva_size;
 
 	/** The number of pages of KVA used for request lists */
 	int			  reqlist_kva_pages;
 
 	/** Bitmap of free KVA pages */
 	bitstr_t		 *kva_free;
 
 	/**
 	 * \brief Cached value of the front-end's domain id.
 	 * 
 	 * This value is used at once for each mapped page in
 	 * a transaction.  We cache it to avoid incuring the
 	 * cost of an ivar access every time this is needed.
 	 */
 	domid_t			  otherend_id;
 
 	/**
 	 * \brief The blkif protocol abi in effect.
 	 *
 	 * There are situations where the back and front ends can
 	 * have a different, native abi (e.g. intel x86_64 and
 	 * 32bit x86 domains on the same machine).  The back-end
 	 * always accomodates the front-end's native abi.  That
 	 * value is pulled from the XenStore and recorded here.
 	 */
 	int			  abi;
 
 	/**
 	 * \brief The maximum number of requests and request lists allowed
 	 *        to be in flight at a time.
 	 *
 	 * This value is negotiated via the XenStore.
 	 */
 	u_int			  max_requests;
 
 	/**
 	 * \brief The maximum number of segments (1 page per segment)
 	 *	  that can be mapped by a request.
 	 *
 	 * This value is negotiated via the XenStore.
 	 */
 	u_int			  max_request_segments;
 
 	/**
 	 * \brief Maximum number of segments per request list.
 	 *
 	 * This value is derived from and will generally be larger than
 	 * max_request_segments.
 	 */
 	u_int			  max_reqlist_segments;
 
 	/**
 	 * The maximum size of any request to this back-end
 	 * device.
 	 *
 	 * This value is negotiated via the XenStore.
 	 */
 	u_int			  max_request_size;
 
 	/**
 	 * The maximum size of any request list.  This is derived directly
 	 * from max_reqlist_segments.
 	 */
 	u_int			  max_reqlist_size;
 
 	/** Various configuration and state bit flags. */
 	xbb_flag_t		  flags;
 
 	/** Ring mapping and interrupt configuration data. */
 	struct xbb_ring_config	  ring_config;
 
 	/** Runtime, cross-abi safe, structures for ring access. */
 	blkif_back_rings_t	  rings;
 
 	/** IRQ mapping for the communication ring event channel. */
-	int			  irq;
+	xen_intr_handle_t	  xen_intr_handle;
 
 	/**
 	 * \brief Backend access mode flags (e.g. write, or read-only).
 	 *
 	 * This value is passed to us by the front-end via the XenStore.
 	 */
 	char			 *dev_mode;
 
 	/**
 	 * \brief Backend device type (e.g. "disk", "cdrom", "floppy").
 	 *
 	 * This value is passed to us by the front-end via the XenStore.
 	 * Currently unused.
 	 */
 	char			 *dev_type;
 
 	/**
 	 * \brief Backend device/file identifier.
 	 *
 	 * This value is passed to us by the front-end via the XenStore.
 	 * We expect this to be a POSIX path indicating the file or
 	 * device to open.
 	 */
 	char			 *dev_name;
 
 	/**
 	 * Vnode corresponding to the backend device node or file
 	 * we are acessing.
 	 */
 	struct vnode		 *vn;
 
 	union xbb_backend_data	  backend;
 
 	/** The native sector size of the backend. */
 	u_int			  sector_size;
 
 	/** log2 of sector_size.  */
 	u_int			  sector_size_shift;
 
 	/** Size in bytes of the backend device or file.  */
 	off_t			  media_size;
 
 	/**
 	 * \brief media_size expressed in terms of the backend native
 	 *	  sector size.
 	 *
 	 * (e.g. xbb->media_size >> xbb->sector_size_shift).
 	 */
 	uint64_t		  media_num_sectors;
 
 	/**
 	 * \brief Array of memoized scatter gather data computed during the
 	 *	  conversion of blkif ring requests to internal xbb_xen_req
 	 *	  structures.
 	 *
 	 * Ring processing is serialized so we only need one of these.
 	 */
 	struct xbb_sg		  xbb_sgs[XBB_MAX_SEGMENTS_PER_REQLIST];
 
 	/**
 	 * Temporary grant table map used in xbb_dispatch_io().  When
 	 * XBB_MAX_SEGMENTS_PER_REQLIST gets large, keeping this on the
 	 * stack could cause a stack overflow.
 	 */
 	struct gnttab_map_grant_ref   maps[XBB_MAX_SEGMENTS_PER_REQLIST];
 
 	/** Mutex protecting per-instance data. */
 	struct mtx		  lock;
 
 #ifdef XENHVM
 	/**
 	 * Resource representing allocated physical address space
 	 * associated with our per-instance kva region.
 	 */
 	struct resource		 *pseudo_phys_res;
 
 	/** Resource id for allocated physical address space. */
 	int			  pseudo_phys_res_id;
 #endif
 
 	/**
 	 * I/O statistics from BlockBack dispatch down.  These are
 	 * coalesced requests, and we start them right before execution.
 	 */
 	struct devstat		 *xbb_stats;
 
 	/**
 	 * I/O statistics coming into BlockBack.  These are the requests as
 	 * we get them from BlockFront.  They are started as soon as we
 	 * receive a request, and completed when the I/O is complete.
 	 */
 	struct devstat		 *xbb_stats_in;
 
 	/** Disable sending flush to the backend */
 	int			  disable_flush;
 
 	/** Send a real flush for every N flush requests */
 	int			  flush_interval;
 
 	/** Count of flush requests in the interval */
 	int			  flush_count;
 
 	/** Don't coalesce requests if this is set */
 	int			  no_coalesce_reqs;
 
 	/** Number of requests we have received */
 	uint64_t		  reqs_received;
 
 	/** Number of requests we have completed*/
 	uint64_t		  reqs_completed;
 
 	/** How many forced dispatches (i.e. without coalescing) have happend */
 	uint64_t		  forced_dispatch;
 
 	/** How many normal dispatches have happend */
 	uint64_t		  normal_dispatch;
 
 	/** How many total dispatches have happend */
 	uint64_t		  total_dispatch;
 
 	/** How many times we have run out of KVA */
 	uint64_t		  kva_shortages;
 
 	/** How many times we have run out of request structures */
 	uint64_t		  request_shortages;
 };
 
 /*---------------------------- Request Processing ----------------------------*/
 /**
  * Allocate an internal transaction tracking structure from the free pool.
  *
  * \param xbb  Per-instance xbb configuration structure.
  *
  * \return  On success, a pointer to the allocated xbb_xen_req structure.
  *          Otherwise NULL.
  */
 static inline struct xbb_xen_req *
 xbb_get_req(struct xbb_softc *xbb)
 {
 	struct xbb_xen_req *req;
 
 	req = NULL;
 
 	mtx_assert(&xbb->lock, MA_OWNED);
 
 	if ((req = STAILQ_FIRST(&xbb->request_free_stailq)) != NULL) {
 		STAILQ_REMOVE_HEAD(&xbb->request_free_stailq, links);
 		xbb->active_request_count++;
 	}
 
 	return (req);
 }
 
 /**
  * Return an allocated transaction tracking structure to the free pool.
  *
  * \param xbb  Per-instance xbb configuration structure.
  * \param req  The request structure to free.
  */
 static inline void
 xbb_release_req(struct xbb_softc *xbb, struct xbb_xen_req *req)
 {
 	mtx_assert(&xbb->lock, MA_OWNED);
 
 	STAILQ_INSERT_HEAD(&xbb->request_free_stailq, req, links);
 	xbb->active_request_count--;
 
 	KASSERT(xbb->active_request_count >= 0,
 		("xbb_release_req: negative active count"));
 }
 
 /**
  * Return an xbb_xen_req_list of allocated xbb_xen_reqs to the free pool.
  *
  * \param xbb	    Per-instance xbb configuration structure.
  * \param req_list  The list of requests to free.
  * \param nreqs	    The number of items in the list.
  */
 static inline void
 xbb_release_reqs(struct xbb_softc *xbb, struct xbb_xen_req_list *req_list,
 		 int nreqs)
 {
 	mtx_assert(&xbb->lock, MA_OWNED);
 
 	STAILQ_CONCAT(&xbb->request_free_stailq, req_list);
 	xbb->active_request_count -= nreqs;
 
 	KASSERT(xbb->active_request_count >= 0,
 		("xbb_release_reqs: negative active count"));
 }
 
 /**
  * Given a page index and 512b sector offset within that page,
  * calculate an offset into a request's kva region.
  *
  * \param reqlist The request structure whose kva region will be accessed.
  * \param pagenr  The page index used to compute the kva offset.
  * \param sector  The 512b sector index used to compute the page relative
  *                kva offset.
  *
  * \return  The computed global KVA offset.
  */
 static inline uint8_t *
 xbb_reqlist_vaddr(struct xbb_xen_reqlist *reqlist, int pagenr, int sector)
 {
 	return (reqlist->kva + (PAGE_SIZE * pagenr) + (sector << 9));
 }
 
 #ifdef XBB_USE_BOUNCE_BUFFERS
 /**
  * Given a page index and 512b sector offset within that page,
  * calculate an offset into a request's local bounce memory region.
  *
  * \param reqlist The request structure whose bounce region will be accessed.
  * \param pagenr  The page index used to compute the bounce offset.
  * \param sector  The 512b sector index used to compute the page relative
  *                bounce offset.
  *
  * \return  The computed global bounce buffer address.
  */
 static inline uint8_t *
 xbb_reqlist_bounce_addr(struct xbb_xen_reqlist *reqlist, int pagenr, int sector)
 {
 	return (reqlist->bounce + (PAGE_SIZE * pagenr) + (sector << 9));
 }
 #endif
 
 /**
  * Given a page number and 512b sector offset within that page,
  * calculate an offset into the request's memory region that the
  * underlying backend device/file should use for I/O.
  *
  * \param reqlist The request structure whose I/O region will be accessed.
  * \param pagenr  The page index used to compute the I/O offset.
  * \param sector  The 512b sector index used to compute the page relative
  *                I/O offset.
  *
  * \return  The computed global I/O address.
  *
  * Depending on configuration, this will either be a local bounce buffer
  * or a pointer to the memory mapped in from the front-end domain for
  * this request.
  */
 static inline uint8_t *
 xbb_reqlist_ioaddr(struct xbb_xen_reqlist *reqlist, int pagenr, int sector)
 {
 #ifdef XBB_USE_BOUNCE_BUFFERS
 	return (xbb_reqlist_bounce_addr(reqlist, pagenr, sector));
 #else
 	return (xbb_reqlist_vaddr(reqlist, pagenr, sector));
 #endif
 }
 
 /**
  * Given a page index and 512b sector offset within that page, calculate
  * an offset into the local psuedo-physical address space used to map a
  * front-end's request data into a request.
  *
  * \param reqlist The request list structure whose pseudo-physical region
  *                will be accessed.
  * \param pagenr  The page index used to compute the pseudo-physical offset.
  * \param sector  The 512b sector index used to compute the page relative
  *                pseudo-physical offset.
  *
  * \return  The computed global pseudo-phsyical address.
  *
  * Depending on configuration, this will either be a local bounce buffer
  * or a pointer to the memory mapped in from the front-end domain for
  * this request.
  */
 static inline uintptr_t
 xbb_get_gntaddr(struct xbb_xen_reqlist *reqlist, int pagenr, int sector)
 {
 	struct xbb_softc *xbb;
 
 	xbb = reqlist->xbb;
 
 	return ((uintptr_t)(xbb->gnt_base_addr +
 		(uintptr_t)(reqlist->kva - xbb->kva) +
 		(PAGE_SIZE * pagenr) + (sector << 9)));
 }
 
 /**
  * Get Kernel Virtual Address space for mapping requests.
  *
  * \param xbb         Per-instance xbb configuration structure.
  * \param nr_pages    Number of pages needed.
  * \param check_only  If set, check for free KVA but don't allocate it.
  * \param have_lock   If set, xbb lock is already held.
  *
  * \return  On success, a pointer to the allocated KVA region.  Otherwise NULL.
  *
  * Note:  This should be unnecessary once we have either chaining or
  * scatter/gather support for struct bio.  At that point we'll be able to
  * put multiple addresses and lengths in one bio/bio chain and won't need
  * to map everything into one virtual segment.
  */
 static uint8_t *
 xbb_get_kva(struct xbb_softc *xbb, int nr_pages)
 {
 	intptr_t first_clear;
 	intptr_t num_clear;
 	uint8_t *free_kva;
 	int      i;
 
 	KASSERT(nr_pages != 0, ("xbb_get_kva of zero length"));
 
 	first_clear = 0;
 	free_kva = NULL;
 
 	mtx_lock(&xbb->lock);
 
 	/*
 	 * Look for the first available page.  If there are none, we're done.
 	 */
 	bit_ffc(xbb->kva_free, xbb->reqlist_kva_pages, &first_clear);
 
 	if (first_clear == -1)
 		goto bailout;
 
 	/*
 	 * Starting at the first available page, look for consecutive free
 	 * pages that will satisfy the user's request.
 	 */
 	for (i = first_clear, num_clear = 0; i < xbb->reqlist_kva_pages; i++) {
 		/*
 		 * If this is true, the page is used, so we have to reset
 		 * the number of clear pages and the first clear page
 		 * (since it pointed to a region with an insufficient number
 		 * of clear pages).
 		 */
 		if (bit_test(xbb->kva_free, i)) {
 			num_clear = 0;
 			first_clear = -1;
 			continue;
 		}
 
 		if (first_clear == -1)
 			first_clear = i;
 
 		/*
 		 * If this is true, we've found a large enough free region
 		 * to satisfy the request.
 		 */
 		if (++num_clear == nr_pages) {
 
 			bit_nset(xbb->kva_free, first_clear,
 				 first_clear + nr_pages - 1);
 
 			free_kva = xbb->kva +
 				(uint8_t *)(first_clear * PAGE_SIZE);
 
 			KASSERT(free_kva >= (uint8_t *)xbb->kva &&
 				free_kva + (nr_pages * PAGE_SIZE) <=
 				(uint8_t *)xbb->ring_config.va,
 				("Free KVA %p len %d out of range, "
 				 "kva = %#jx, ring VA = %#jx\n", free_kva,
 				 nr_pages * PAGE_SIZE, (uintmax_t)xbb->kva,
 				 (uintmax_t)xbb->ring_config.va));
 			break;
 		}
 	}
 
 bailout:
 
 	if (free_kva == NULL) {
 		xbb->flags |= XBBF_RESOURCE_SHORTAGE;
 		xbb->kva_shortages++;
 	}
 
 	mtx_unlock(&xbb->lock);
 
 	return (free_kva);
 }
 
 /**
  * Free allocated KVA.
  *
  * \param xbb	    Per-instance xbb configuration structure.
  * \param kva_ptr   Pointer to allocated KVA region.  
  * \param nr_pages  Number of pages in the KVA region.
  */
 static void
 xbb_free_kva(struct xbb_softc *xbb, uint8_t *kva_ptr, int nr_pages)
 {
 	intptr_t start_page;
 
 	mtx_assert(&xbb->lock, MA_OWNED);
 
 	start_page = (intptr_t)(kva_ptr - xbb->kva) >> PAGE_SHIFT;
 	bit_nclear(xbb->kva_free, start_page, start_page + nr_pages - 1);
 
 }
 
 /**
  * Unmap the front-end pages associated with this I/O request.
  *
  * \param req  The request structure to unmap.
  */
 static void
 xbb_unmap_reqlist(struct xbb_xen_reqlist *reqlist)
 {
 	struct gnttab_unmap_grant_ref unmap[XBB_MAX_SEGMENTS_PER_REQLIST];
 	u_int			      i;
 	u_int			      invcount;
 	int			      error;
 
 	invcount = 0;
 	for (i = 0; i < reqlist->nr_segments; i++) {
 
 		if (reqlist->gnt_handles[i] == GRANT_REF_INVALID)
 			continue;
 
 		unmap[invcount].host_addr    = xbb_get_gntaddr(reqlist, i, 0);
 		unmap[invcount].dev_bus_addr = 0;
 		unmap[invcount].handle       = reqlist->gnt_handles[i];
 		reqlist->gnt_handles[i]	     = GRANT_REF_INVALID;
 		invcount++;
 	}
 
 	error = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref,
 					  unmap, invcount);
 	KASSERT(error == 0, ("Grant table operation failed"));
 }
 
 /**
  * Allocate an internal transaction tracking structure from the free pool.
  *
  * \param xbb  Per-instance xbb configuration structure.
  *
  * \return  On success, a pointer to the allocated xbb_xen_reqlist structure.
  *          Otherwise NULL.
  */
 static inline struct xbb_xen_reqlist *
 xbb_get_reqlist(struct xbb_softc *xbb)
 {
 	struct xbb_xen_reqlist *reqlist;
 
 	reqlist = NULL;
 
 	mtx_assert(&xbb->lock, MA_OWNED);
 
 	if ((reqlist = STAILQ_FIRST(&xbb->reqlist_free_stailq)) != NULL) {
 
 		STAILQ_REMOVE_HEAD(&xbb->reqlist_free_stailq, links);
 		reqlist->flags = XBB_REQLIST_NONE;
 		reqlist->kva = NULL;
 		reqlist->status = BLKIF_RSP_OKAY;
 		reqlist->residual_512b_sectors = 0;
 		reqlist->num_children = 0;
 		reqlist->nr_segments = 0;
 		STAILQ_INIT(&reqlist->contig_req_list);
 	}
 
 	return (reqlist);
 }
 
 /**
  * Return an allocated transaction tracking structure to the free pool.
  *
  * \param xbb        Per-instance xbb configuration structure.
  * \param req        The request list structure to free.
  * \param wakeup     If set, wakeup the work thread if freeing this reqlist
  *                   during a resource shortage condition.
  */
 static inline void
 xbb_release_reqlist(struct xbb_softc *xbb, struct xbb_xen_reqlist *reqlist,
 		    int wakeup)
 {
 
 	mtx_lock(&xbb->lock);
 
 	if (wakeup) {
 		wakeup = xbb->flags & XBBF_RESOURCE_SHORTAGE;
 		xbb->flags &= ~XBBF_RESOURCE_SHORTAGE;
 	}
 
 	if (reqlist->kva != NULL)
 		xbb_free_kva(xbb, reqlist->kva, reqlist->nr_segments);
 
 	xbb_release_reqs(xbb, &reqlist->contig_req_list, reqlist->num_children);
 
 	STAILQ_INSERT_TAIL(&xbb->reqlist_free_stailq, reqlist, links);
 
 	if ((xbb->flags & XBBF_SHUTDOWN) != 0) {
 		/*
 		 * Shutdown is in progress.  See if we can
 		 * progress further now that one more request
 		 * has completed and been returned to the
 		 * free pool.
 		 */
 		xbb_shutdown(xbb);
 	}
 
 	mtx_unlock(&xbb->lock);
 
 	if (wakeup != 0)
 		taskqueue_enqueue(xbb->io_taskqueue, &xbb->io_task); 
 }
 
 /**
  * Request resources and do basic request setup.
  *
  * \param xbb          Per-instance xbb configuration structure.
  * \param reqlist      Pointer to reqlist pointer.
  * \param ring_req     Pointer to a block ring request.
  * \param ring_index   The ring index of this request.
  *
  * \return  0 for success, non-zero for failure.
  */
 static int
 xbb_get_resources(struct xbb_softc *xbb, struct xbb_xen_reqlist **reqlist,
 		  blkif_request_t *ring_req, RING_IDX ring_idx)
 {
 	struct xbb_xen_reqlist *nreqlist;
 	struct xbb_xen_req     *nreq;
 
 	nreqlist = NULL;
 	nreq     = NULL;
 
 	mtx_lock(&xbb->lock);
 
 	/*
 	 * We don't allow new resources to be allocated if we're in the
 	 * process of shutting down.
 	 */
 	if ((xbb->flags & XBBF_SHUTDOWN) != 0) {
 		mtx_unlock(&xbb->lock);
 		return (1);
 	}
 
 	/*
 	 * Allocate a reqlist if the caller doesn't have one already.
 	 */
 	if (*reqlist == NULL) {
 		nreqlist = xbb_get_reqlist(xbb);
 		if (nreqlist == NULL)
 			goto bailout_error;
 	}
 
 	/* We always allocate a request. */
 	nreq = xbb_get_req(xbb);
 	if (nreq == NULL)
 		goto bailout_error;
 
 	mtx_unlock(&xbb->lock);
 
 	if (*reqlist == NULL) {
 		*reqlist = nreqlist;
 		nreqlist->operation = ring_req->operation;
 		nreqlist->starting_sector_number = ring_req->sector_number;
 		STAILQ_INSERT_TAIL(&xbb->reqlist_pending_stailq, nreqlist,
 				   links);
 	}
 
 	nreq->reqlist = *reqlist;
 	nreq->req_ring_idx = ring_idx;
 
 	if (xbb->abi != BLKIF_PROTOCOL_NATIVE) {
 		bcopy(ring_req, &nreq->ring_req_storage, sizeof(*ring_req));
 		nreq->ring_req = &nreq->ring_req_storage;
 	} else {
 		nreq->ring_req = ring_req;
 	}
 
 	binuptime(&nreq->ds_t0);
 	devstat_start_transaction(xbb->xbb_stats_in, &nreq->ds_t0);
 	STAILQ_INSERT_TAIL(&(*reqlist)->contig_req_list, nreq, links);
 	(*reqlist)->num_children++;
 	(*reqlist)->nr_segments += ring_req->nr_segments;
 
 	return (0);
 
 bailout_error:
 
 	/*
 	 * We're out of resources, so set the shortage flag.  The next time
 	 * a request is released, we'll try waking up the work thread to
 	 * see if we can allocate more resources.
 	 */
 	xbb->flags |= XBBF_RESOURCE_SHORTAGE;
 	xbb->request_shortages++;
 
 	if (nreq != NULL)
 		xbb_release_req(xbb, nreq);
 
 	mtx_unlock(&xbb->lock);
 
 	if (nreqlist != NULL)
 		xbb_release_reqlist(xbb, nreqlist, /*wakeup*/ 0);
 
 	return (1);
 }
 
 /**
  * Create and transmit a response to a blkif request.
  * 
  * \param xbb     Per-instance xbb configuration structure.
  * \param req     The request structure to which to respond.
  * \param status  The status code to report.  See BLKIF_RSP_*
  *                in sys/xen/interface/io/blkif.h.
  */
 static void
 xbb_send_response(struct xbb_softc *xbb, struct xbb_xen_req *req, int status)
 {
 	blkif_response_t *resp;
 	int		  more_to_do;
 	int		  notify;
 
 	more_to_do = 0;
 
 	/*
 	 * Place on the response ring for the relevant domain.
 	 * For now, only the spacing between entries is different
 	 * in the different ABIs, not the response entry layout.
 	 */
 	mtx_lock(&xbb->lock);
 	switch (xbb->abi) {
 	case BLKIF_PROTOCOL_NATIVE:
 		resp = RING_GET_RESPONSE(&xbb->rings.native,
 					 xbb->rings.native.rsp_prod_pvt);
 		break;
 	case BLKIF_PROTOCOL_X86_32:
 		resp = (blkif_response_t *)
 		    RING_GET_RESPONSE(&xbb->rings.x86_32,
 				      xbb->rings.x86_32.rsp_prod_pvt);
 		break;
 	case BLKIF_PROTOCOL_X86_64:
 		resp = (blkif_response_t *)
 		    RING_GET_RESPONSE(&xbb->rings.x86_64,
 				      xbb->rings.x86_64.rsp_prod_pvt);
 		break;
 	default:
 		panic("Unexpected blkif protocol ABI.");
 	}
 
 	resp->id        = req->id;
 	resp->operation = req->operation;
 	resp->status    = status;
 
 	xbb->rings.common.rsp_prod_pvt += BLKIF_SEGS_TO_BLOCKS(req->nr_pages);
 	RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&xbb->rings.common, notify);
 
 	if (xbb->rings.common.rsp_prod_pvt == xbb->rings.common.req_cons) {
 
 		/*
 		 * Tail check for pending requests. Allows frontend to avoid
 		 * notifications if requests are already in flight (lower
 		 * overheads and promotes batching).
 		 */
 		RING_FINAL_CHECK_FOR_REQUESTS(&xbb->rings.common, more_to_do);
 	} else if (RING_HAS_UNCONSUMED_REQUESTS(&xbb->rings.common)) {
 
 		more_to_do = 1;
 	}
 
 	xbb->reqs_completed++;
 
 	mtx_unlock(&xbb->lock);
 
 	if (more_to_do)
 		taskqueue_enqueue(xbb->io_taskqueue, &xbb->io_task); 
 
 	if (notify)
-		notify_remote_via_irq(xbb->irq);
+		xen_intr_signal(xbb->xen_intr_handle);
 }
 
 /**
  * Complete a request list.
  *
  * \param xbb        Per-instance xbb configuration structure.
  * \param reqlist    Allocated internal request list structure.
  */
 static void
 xbb_complete_reqlist(struct xbb_softc *xbb, struct xbb_xen_reqlist *reqlist)
 {
 	struct xbb_xen_req *nreq;
 	off_t		    sectors_sent;
 
 	sectors_sent = 0;
 
 	if (reqlist->flags & XBB_REQLIST_MAPPED)
 		xbb_unmap_reqlist(reqlist);
 
 	/*
 	 * All I/O is done, send the response.  A lock should not be
 	 * necessary here because the request list is complete, and
 	 * therefore this is the only context accessing this request
 	 * right now.  The functions we call do their own locking if
 	 * necessary.
 	 */
 	STAILQ_FOREACH(nreq, &reqlist->contig_req_list, links) {
 		off_t cur_sectors_sent;
 
 		xbb_send_response(xbb, nreq, reqlist->status);
 
 		/* We don't report bytes sent if there is an error. */
 		if (reqlist->status == BLKIF_RSP_OKAY)
 			cur_sectors_sent = nreq->nr_512b_sectors;
 		else
 			cur_sectors_sent = 0;
 
 		sectors_sent += cur_sectors_sent;
 
 		devstat_end_transaction(xbb->xbb_stats_in,
 					/*bytes*/cur_sectors_sent << 9,
 					reqlist->ds_tag_type,
 					reqlist->ds_trans_type,
 					/*now*/NULL,
 					/*then*/&nreq->ds_t0);
 	}
 
 	/*
 	 * Take out any sectors not sent.  If we wind up negative (which
 	 * might happen if an error is reported as well as a residual), just
 	 * report 0 sectors sent.
 	 */
 	sectors_sent -= reqlist->residual_512b_sectors;
 	if (sectors_sent < 0)
 		sectors_sent = 0;
 
 	devstat_end_transaction(xbb->xbb_stats,
 				/*bytes*/ sectors_sent << 9,
 				reqlist->ds_tag_type,
 				reqlist->ds_trans_type,
 				/*now*/NULL,
 				/*then*/&reqlist->ds_t0);
 
 	xbb_release_reqlist(xbb, reqlist, /*wakeup*/ 1);
 }
 
 /**
  * Completion handler for buffer I/O requests issued by the device
  * backend driver.
  *
  * \param bio  The buffer I/O request on which to perform completion
  *             processing.
  */
 static void
 xbb_bio_done(struct bio *bio)
 {
 	struct xbb_softc       *xbb;
 	struct xbb_xen_reqlist *reqlist;
 
 	reqlist = bio->bio_caller1;
 	xbb     = reqlist->xbb;
 
 	reqlist->residual_512b_sectors += bio->bio_resid >> 9;
 
 	/*
 	 * This is a bit imprecise.  With aggregated I/O a single
 	 * request list can contain multiple front-end requests and
 	 * a multiple bios may point to a single request.  By carefully
 	 * walking the request list, we could map residuals and errors
 	 * back to the original front-end request, but the interface
 	 * isn't sufficiently rich for us to properly report the error.
 	 * So, we just treat the entire request list as having failed if an
 	 * error occurs on any part.  And, if an error occurs, we treat
 	 * the amount of data transferred as 0.
 	 *
 	 * For residuals, we report it on the overall aggregated device,
 	 * but not on the individual requests, since we don't currently
 	 * do the work to determine which front-end request to which the
 	 * residual applies.
 	 */
 	if (bio->bio_error) {
 		DPRINTF("BIO returned error %d for operation on device %s\n",
 			bio->bio_error, xbb->dev_name);
 		reqlist->status = BLKIF_RSP_ERROR;
 
 		if (bio->bio_error == ENXIO
 		 && xenbus_get_state(xbb->dev) == XenbusStateConnected) {
 
 			/*
 			 * Backend device has disappeared.  Signal the
 			 * front-end that we (the device proxy) want to
 			 * go away.
 			 */
 			xenbus_set_state(xbb->dev, XenbusStateClosing);
 		}
 	}
 
 #ifdef XBB_USE_BOUNCE_BUFFERS
 	if (bio->bio_cmd == BIO_READ) {
 		vm_offset_t kva_offset;
 
 		kva_offset = (vm_offset_t)bio->bio_data
 			   - (vm_offset_t)reqlist->bounce;
 		memcpy((uint8_t *)reqlist->kva + kva_offset,
 		       bio->bio_data, bio->bio_bcount);
 	}
 #endif /* XBB_USE_BOUNCE_BUFFERS */
 
 	/*
 	 * Decrement the pending count for the request list.  When we're
 	 * done with the requests, send status back for all of them.
 	 */
 	if (atomic_fetchadd_int(&reqlist->pendcnt, -1) == 1)
 		xbb_complete_reqlist(xbb, reqlist);
 
 	g_destroy_bio(bio);
 }
 
 /**
  * Parse a blkif request into an internal request structure and send
  * it to the backend for processing.
  *
  * \param xbb       Per-instance xbb configuration structure.
  * \param reqlist   Allocated internal request list structure.
  *
  * \return          On success, 0.  For resource shortages, non-zero.
  *  
  * This routine performs the backend common aspects of request parsing
  * including compiling an internal request structure, parsing the S/G
  * list and any secondary ring requests in which they may reside, and
  * the mapping of front-end I/O pages into our domain.
  */
 static int
 xbb_dispatch_io(struct xbb_softc *xbb, struct xbb_xen_reqlist *reqlist)
 {
 	struct xbb_sg                *xbb_sg;
 	struct gnttab_map_grant_ref  *map;
 	struct blkif_request_segment *sg;
 	struct blkif_request_segment *last_block_sg;
 	struct xbb_xen_req	     *nreq;
 	u_int			      nseg;
 	u_int			      seg_idx;
 	u_int			      block_segs;
 	int			      nr_sects;
 	int			      total_sects;
 	int			      operation;
 	uint8_t			      bio_flags;
 	int			      error;
 
 	reqlist->ds_tag_type = DEVSTAT_TAG_SIMPLE;
 	bio_flags            = 0;
 	total_sects	     = 0;
 	nr_sects	     = 0;
 
 	/*
 	 * First determine whether we have enough free KVA to satisfy this
 	 * request list.  If not, tell xbb_run_queue() so it can go to
 	 * sleep until we have more KVA.
 	 */
 	reqlist->kva = NULL;
 	if (reqlist->nr_segments != 0) {
 		reqlist->kva = xbb_get_kva(xbb, reqlist->nr_segments);
 		if (reqlist->kva == NULL) {
 			/*
 			 * If we're out of KVA, return ENOMEM.
 			 */
 			return (ENOMEM);
 		}
 	}
 
 	binuptime(&reqlist->ds_t0);
 	devstat_start_transaction(xbb->xbb_stats, &reqlist->ds_t0);
 
 	switch (reqlist->operation) {
 	case BLKIF_OP_WRITE_BARRIER:
 		bio_flags       |= BIO_ORDERED;
 		reqlist->ds_tag_type = DEVSTAT_TAG_ORDERED;
 		/* FALLTHROUGH */
 	case BLKIF_OP_WRITE:
 		operation = BIO_WRITE;
 		reqlist->ds_trans_type = DEVSTAT_WRITE;
 		if ((xbb->flags & XBBF_READ_ONLY) != 0) {
 			DPRINTF("Attempt to write to read only device %s\n",
 				xbb->dev_name);
 			reqlist->status = BLKIF_RSP_ERROR;
 			goto send_response;
 		}
 		break;
 	case BLKIF_OP_READ:
 		operation = BIO_READ;
 		reqlist->ds_trans_type = DEVSTAT_READ;
 		break;
 	case BLKIF_OP_FLUSH_DISKCACHE:
 		/*
 		 * If this is true, the user has requested that we disable
 		 * flush support.  So we just complete the requests
 		 * successfully.
 		 */
 		if (xbb->disable_flush != 0) {
 			goto send_response;
 		}
 
 		/*
 		 * The user has requested that we only send a real flush
 		 * for every N flush requests.  So keep count, and either
 		 * complete the request immediately or queue it for the
 		 * backend.
 		 */
 		if (xbb->flush_interval != 0) {
 		 	if (++(xbb->flush_count) < xbb->flush_interval) {
 				goto send_response;
 			} else
 				xbb->flush_count = 0;
 		}
 
 		operation = BIO_FLUSH;
 		reqlist->ds_tag_type = DEVSTAT_TAG_ORDERED;
 		reqlist->ds_trans_type = DEVSTAT_NO_DATA;
 		goto do_dispatch;
 		/*NOTREACHED*/
 	default:
 		DPRINTF("error: unknown block io operation [%d]\n",
 			reqlist->operation);
 		reqlist->status = BLKIF_RSP_ERROR;
 		goto send_response;
 	}
 
 	reqlist->xbb  = xbb;
 	xbb_sg        = xbb->xbb_sgs;
 	map	      = xbb->maps;
 	seg_idx	      = 0;
 
 	STAILQ_FOREACH(nreq, &reqlist->contig_req_list, links) {
 		blkif_request_t		*ring_req;
 		RING_IDX		 req_ring_idx;
 		u_int			 req_seg_idx;
 
 		ring_req	      = nreq->ring_req;
 		req_ring_idx	      = nreq->req_ring_idx;
 		nr_sects              = 0;
 		nseg                  = ring_req->nr_segments;
 		nreq->id              = ring_req->id;
 		nreq->nr_pages        = nseg;
 		nreq->nr_512b_sectors = 0;
 		req_seg_idx	      = 0;
 		sg	              = NULL;
 
 		/* Check that number of segments is sane. */
-		if (unlikely(nseg == 0)
-		 || unlikely(nseg > xbb->max_request_segments)) {
+		if (__predict_false(nseg == 0)
+		 || __predict_false(nseg > xbb->max_request_segments)) {
 			DPRINTF("Bad number of segments in request (%d)\n",
 				nseg);
 			reqlist->status = BLKIF_RSP_ERROR;
 			goto send_response;
 		}
 
 		block_segs    = MIN(nreq->nr_pages,
 				    BLKIF_MAX_SEGMENTS_PER_HEADER_BLOCK);
 		sg            = ring_req->seg;
 		last_block_sg = sg + block_segs;
 		while (1) {
 
 			while (sg < last_block_sg) {
 				KASSERT(seg_idx <
 					XBB_MAX_SEGMENTS_PER_REQLIST,
 					("seg_idx %d is too large, max "
 					"segs %d\n", seg_idx,
 					XBB_MAX_SEGMENTS_PER_REQLIST));
 			
 				xbb_sg->first_sect = sg->first_sect;
 				xbb_sg->last_sect  = sg->last_sect;
 				xbb_sg->nsect =
 				    (int8_t)(sg->last_sect -
 				    sg->first_sect + 1);
 
 				if ((sg->last_sect >= (PAGE_SIZE >> 9))
 				 || (xbb_sg->nsect <= 0)) {
 					reqlist->status = BLKIF_RSP_ERROR;
 					goto send_response;
 				}
 
 				nr_sects += xbb_sg->nsect;
 				map->host_addr = xbb_get_gntaddr(reqlist,
 							seg_idx, /*sector*/0);
 				KASSERT(map->host_addr + PAGE_SIZE <=
 					xbb->ring_config.gnt_addr,
 					("Host address %#jx len %d overlaps "
 					 "ring address %#jx\n",
 					(uintmax_t)map->host_addr, PAGE_SIZE,
 					(uintmax_t)xbb->ring_config.gnt_addr));
 					
 				map->flags     = GNTMAP_host_map;
 				map->ref       = sg->gref;
 				map->dom       = xbb->otherend_id;
 				if (operation == BIO_WRITE)
 					map->flags |= GNTMAP_readonly;
 				sg++;
 				map++;
 				xbb_sg++;
 				seg_idx++;
 				req_seg_idx++;
 			}
 
 			block_segs = MIN(nseg - req_seg_idx,
 					 BLKIF_MAX_SEGMENTS_PER_SEGMENT_BLOCK);
 			if (block_segs == 0)
 				break;
 
 			/*
 			 * Fetch the next request block full of SG elements.
 			 * For now, only the spacing between entries is
 			 * different in the different ABIs, not the sg entry
 			 * layout.
 			 */
 			req_ring_idx++;
 			switch (xbb->abi) {
 			case BLKIF_PROTOCOL_NATIVE:
 				sg = BLKRING_GET_SEG_BLOCK(&xbb->rings.native,
 							   req_ring_idx);
 				break;
 			case BLKIF_PROTOCOL_X86_32:
 			{
 				sg = BLKRING_GET_SEG_BLOCK(&xbb->rings.x86_32,
 							   req_ring_idx);
 				break;
 			}
 			case BLKIF_PROTOCOL_X86_64:
 			{
 				sg = BLKRING_GET_SEG_BLOCK(&xbb->rings.x86_64,
 							   req_ring_idx);
 				break;
 			}
 			default:
 				panic("Unexpected blkif protocol ABI.");
 				/* NOTREACHED */
 			} 
 			last_block_sg = sg + block_segs;
 		}
 
 		/* Convert to the disk's sector size */
 		nreq->nr_512b_sectors = nr_sects;
 		nr_sects = (nr_sects << 9) >> xbb->sector_size_shift;
 		total_sects += nr_sects;
 
 		if ((nreq->nr_512b_sectors &
 		    ((xbb->sector_size >> 9) - 1)) != 0) {
 			device_printf(xbb->dev, "%s: I/O size (%d) is not "
 				      "a multiple of the backing store sector "
 				      "size (%d)\n", __func__,
 				      nreq->nr_512b_sectors << 9,
 				      xbb->sector_size);
 			reqlist->status = BLKIF_RSP_ERROR;
 			goto send_response;
 		}
 	}
 
 	error = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref,
 					  xbb->maps, reqlist->nr_segments);
 	if (error != 0)
 		panic("Grant table operation failed (%d)", error);
 
 	reqlist->flags |= XBB_REQLIST_MAPPED;
 
 	for (seg_idx = 0, map = xbb->maps; seg_idx < reqlist->nr_segments;
 	     seg_idx++, map++){
 
-		if (unlikely(map->status != 0)) {
+		if (__predict_false(map->status != 0)) {
 			DPRINTF("invalid buffer -- could not remap "
 			        "it (%d)\n", map->status);
 			DPRINTF("Mapping(%d): Host Addr 0x%lx, flags "
 			        "0x%x ref 0x%x, dom %d\n", seg_idx,
 				map->host_addr, map->flags, map->ref,
 				map->dom);
 			reqlist->status = BLKIF_RSP_ERROR;
 			goto send_response;
 		}
 
 		reqlist->gnt_handles[seg_idx] = map->handle;
 	}
 	if (reqlist->starting_sector_number + total_sects >
 	    xbb->media_num_sectors) {
 
 		DPRINTF("%s of [%" PRIu64 ",%" PRIu64 "] "
 			"extends past end of device %s\n",
 			operation == BIO_READ ? "read" : "write",
 			reqlist->starting_sector_number,
 			reqlist->starting_sector_number + total_sects,
 			xbb->dev_name); 
 		reqlist->status = BLKIF_RSP_ERROR;
 		goto send_response;
 	}
 
 do_dispatch:
 
 	error = xbb->dispatch_io(xbb,
 				 reqlist,
 				 operation,
 				 bio_flags);
 
 	if (error != 0) {
 		reqlist->status = BLKIF_RSP_ERROR;
 		goto send_response;
 	}
 
 	return (0);
 
 send_response:
 
 	xbb_complete_reqlist(xbb, reqlist);
 
 	return (0);
 }
 
 static __inline int
 xbb_count_sects(blkif_request_t *ring_req)
 {
 	int i;
 	int cur_size = 0;
 
 	for (i = 0; i < ring_req->nr_segments; i++) {
 		int nsect;
 
 		nsect = (int8_t)(ring_req->seg[i].last_sect -
 			ring_req->seg[i].first_sect + 1);
 		if (nsect <= 0)
 			break;
 
 		cur_size += nsect;
 	}
 
 	return (cur_size);
 }
 
 /**
  * Process incoming requests from the shared communication ring in response
  * to a signal on the ring's event channel.
  *
  * \param context  Callback argument registerd during task initialization -
  *                 the xbb_softc for this instance.
  * \param pending  The number of taskqueue_enqueue events that have
  *                 occurred since this handler was last run.
  */
 static void
 xbb_run_queue(void *context, int pending)
 {
 	struct xbb_softc       *xbb;
 	blkif_back_rings_t     *rings;
 	RING_IDX		rp;
 	uint64_t		cur_sector;
 	int			cur_operation;
 	struct xbb_xen_reqlist *reqlist;
 
 
 	xbb   = (struct xbb_softc *)context;
 	rings = &xbb->rings;
 
 	/*
 	 * Work gather and dispatch loop.  Note that we have a bias here
 	 * towards gathering I/O sent by blockfront.  We first gather up
 	 * everything in the ring, as long as we have resources.  Then we
 	 * dispatch one request, and then attempt to gather up any
 	 * additional requests that have come in while we were dispatching
 	 * the request.
 	 *
 	 * This allows us to get a clearer picture (via devstat) of how
 	 * many requests blockfront is queueing to us at any given time.
 	 */
 	for (;;) {
 		int retval;
 
 		/*
 		 * Initialize reqlist to the last element in the pending
 		 * queue, if there is one.  This allows us to add more
 		 * requests to that request list, if we have room.
 		 */
 		reqlist = STAILQ_LAST(&xbb->reqlist_pending_stailq,
 				      xbb_xen_reqlist, links);
 		if (reqlist != NULL) {
 			cur_sector = reqlist->next_contig_sector;
 			cur_operation = reqlist->operation;
 		} else {
 			cur_operation = 0;
 			cur_sector    = 0;
 		}
 
 		/*
 		 * Cache req_prod to avoid accessing a cache line shared
 		 * with the frontend.
 		 */
 		rp = rings->common.sring->req_prod;
 
 		/* Ensure we see queued requests up to 'rp'. */
 		rmb();
 
 		/**
 		 * Run so long as there is work to consume and the generation
 		 * of a response will not overflow the ring.
 		 *
 		 * @note There's a 1 to 1 relationship between requests and
 		 *       responses, so an overflow should never occur.  This
 		 *       test is to protect our domain from digesting bogus
 		 *       data.  Shouldn't we log this?
 		 */
 		while (rings->common.req_cons != rp
 		    && RING_REQUEST_CONS_OVERFLOW(&rings->common,
 						  rings->common.req_cons) == 0){
 			blkif_request_t	        ring_req_storage;
 			blkif_request_t	       *ring_req;
 			int			cur_size;
 
 			switch (xbb->abi) {
 			case BLKIF_PROTOCOL_NATIVE:
 				ring_req = RING_GET_REQUEST(&xbb->rings.native,
 				    rings->common.req_cons);
 				break;
 			case BLKIF_PROTOCOL_X86_32:
 			{
 				struct blkif_x86_32_request *ring_req32;
 
 				ring_req32 = RING_GET_REQUEST(
 				    &xbb->rings.x86_32, rings->common.req_cons);
 				blkif_get_x86_32_req(&ring_req_storage,
 						     ring_req32);
 				ring_req = &ring_req_storage;
 				break;
 			}
 			case BLKIF_PROTOCOL_X86_64:
 			{
 				struct blkif_x86_64_request *ring_req64;
 
 				ring_req64 =RING_GET_REQUEST(&xbb->rings.x86_64,
 				    rings->common.req_cons);
 				blkif_get_x86_64_req(&ring_req_storage,
 						     ring_req64);
 				ring_req = &ring_req_storage;
 				break;
 			}
 			default:
 				panic("Unexpected blkif protocol ABI.");
 				/* NOTREACHED */
 			} 
 
 			/*
 			 * Check for situations that would require closing
 			 * off this I/O for further coalescing:
 			 *  - Coalescing is turned off.
 			 *  - Current I/O is out of sequence with the previous
 			 *    I/O.
 			 *  - Coalesced I/O would be too large.
 			 */
 			if ((reqlist != NULL)
 			 && ((xbb->no_coalesce_reqs != 0)
 			  || ((xbb->no_coalesce_reqs == 0)
 			   && ((ring_req->sector_number != cur_sector)
 			    || (ring_req->operation != cur_operation)
 			    || ((ring_req->nr_segments + reqlist->nr_segments) >
 			         xbb->max_reqlist_segments))))) {
 				reqlist = NULL;
 			}
 
 			/*
 			 * Grab and check for all resources in one shot.
 			 * If we can't get all of the resources we need,
 			 * the shortage is noted and the thread will get
 			 * woken up when more resources are available.
 			 */
 			retval = xbb_get_resources(xbb, &reqlist, ring_req,
 						   xbb->rings.common.req_cons);
 
 			if (retval != 0) {
 				/*
 				 * Resource shortage has been recorded.
 				 * We'll be scheduled to run once a request
 				 * object frees up due to a completion.
 				 */
 				break;
 			}
 
 			/*
 			 * Signify that	we can overwrite this request with
 			 * a response by incrementing our consumer index.
 			 * The response won't be generated until after
 			 * we've already consumed all necessary data out
 			 * of the version of the request in the ring buffer
 			 * (for native mode).  We must update the consumer
 			 * index  before issueing back-end I/O so there is
 			 * no possibility that it will complete and a
 			 * response be generated before we make room in 
 			 * the queue for that response.
 			 */
 			xbb->rings.common.req_cons +=
 			    BLKIF_SEGS_TO_BLOCKS(ring_req->nr_segments);
 			xbb->reqs_received++;
 
 			cur_size = xbb_count_sects(ring_req);
 			cur_sector = ring_req->sector_number + cur_size;
 			reqlist->next_contig_sector = cur_sector;
 			cur_operation = ring_req->operation;
 		}
 
 		/* Check for I/O to dispatch */
 		reqlist = STAILQ_FIRST(&xbb->reqlist_pending_stailq);
 		if (reqlist == NULL) {
 			/*
 			 * We're out of work to do, put the task queue to
 			 * sleep.
 			 */
 			break;
 		}
 
 		/*
 		 * Grab the first request off the queue and attempt
 		 * to dispatch it.
 		 */
 		STAILQ_REMOVE_HEAD(&xbb->reqlist_pending_stailq, links);
 
 		retval = xbb_dispatch_io(xbb, reqlist);
 		if (retval != 0) {
 			/*
 			 * xbb_dispatch_io() returns non-zero only when
 			 * there is a resource shortage.  If that's the
 			 * case, re-queue this request on the head of the
 			 * queue, and go to sleep until we have more
 			 * resources.
 			 */
 			STAILQ_INSERT_HEAD(&xbb->reqlist_pending_stailq,
 					   reqlist, links);
 			break;
 		} else {
 			/*
 			 * If we still have anything on the queue after
 			 * removing the head entry, that is because we
 			 * met one of the criteria to create a new
 			 * request list (outlined above), and we'll call
 			 * that a forced dispatch for statistical purposes.
 			 *
 			 * Otherwise, if there is only one element on the
 			 * queue, we coalesced everything available on
 			 * the ring and we'll call that a normal dispatch.
 			 */
 			reqlist = STAILQ_FIRST(&xbb->reqlist_pending_stailq);
 
 			if (reqlist != NULL)
 				xbb->forced_dispatch++;
 			else
 				xbb->normal_dispatch++;
 
 			xbb->total_dispatch++;
 		}
 	}
 }
 
 /**
  * Interrupt handler bound to the shared ring's event channel.
  *
  * \param arg  Callback argument registerd during event channel
  *             binding - the xbb_softc for this instance.
  */
-static void
-xbb_intr(void *arg)
+static int
+xbb_filter(void *arg)
 {
 	struct xbb_softc *xbb;
 
-	/* Defer to kernel thread. */
+	/* Defer to taskqueue thread. */
 	xbb = (struct xbb_softc *)arg;
 	taskqueue_enqueue(xbb->io_taskqueue, &xbb->io_task); 
+
+	return (FILTER_HANDLED);
 }
 
 SDT_PROVIDER_DEFINE(xbb);
 SDT_PROBE_DEFINE1(xbb, kernel, xbb_dispatch_dev, flush, flush, "int");
 SDT_PROBE_DEFINE3(xbb, kernel, xbb_dispatch_dev, read, read, "int", "uint64_t",
 		  "uint64_t");
 SDT_PROBE_DEFINE3(xbb, kernel, xbb_dispatch_dev, write, write, "int",
 		  "uint64_t", "uint64_t");
 
 /*----------------------------- Backend Handlers -----------------------------*/
 /**
  * Backend handler for character device access.
  *
  * \param xbb        Per-instance xbb configuration structure.
  * \param reqlist    Allocated internal request list structure.
  * \param operation  BIO_* I/O operation code.
  * \param bio_flags  Additional bio_flag data to pass to any generated
  *                   bios (e.g. BIO_ORDERED)..
  *
  * \return  0 for success, errno codes for failure.
  */
 static int
 xbb_dispatch_dev(struct xbb_softc *xbb, struct xbb_xen_reqlist *reqlist,
 		 int operation, int bio_flags)
 {
 	struct xbb_dev_data *dev_data;
 	struct bio          *bios[XBB_MAX_SEGMENTS_PER_REQLIST];
 	struct xbb_xen_req  *nreq;
 	off_t                bio_offset;
 	struct bio          *bio;
 	struct xbb_sg       *xbb_sg;
 	u_int	             nbio;
 	u_int                bio_idx;
 	u_int		     nseg;
 	u_int                seg_idx;
 	int                  error;
 
 	dev_data   = &xbb->backend.dev;
 	bio_offset = (off_t)reqlist->starting_sector_number
 		   << xbb->sector_size_shift;
 	error      = 0;
 	nbio       = 0;
 	bio_idx    = 0;
 
 	if (operation == BIO_FLUSH) {
 		nreq = STAILQ_FIRST(&reqlist->contig_req_list);
 		bio = g_new_bio();
-		if (unlikely(bio == NULL)) {
+		if (__predict_false(bio == NULL)) {
 			DPRINTF("Unable to allocate bio for BIO_FLUSH\n");
 			error = ENOMEM;
 			return (error);
 		}
 
 		bio->bio_cmd	 = BIO_FLUSH;
 		bio->bio_flags	|= BIO_ORDERED;
 		bio->bio_dev	 = dev_data->cdev;
 		bio->bio_offset	 = 0;
 		bio->bio_data	 = 0;
 		bio->bio_done	 = xbb_bio_done;
 		bio->bio_caller1 = nreq;
 		bio->bio_pblkno	 = 0;
 
 		nreq->pendcnt	 = 1;
 
 		SDT_PROBE1(xbb, kernel, xbb_dispatch_dev, flush,
 			   device_get_unit(xbb->dev));
 
 		(*dev_data->csw->d_strategy)(bio);
 
 		return (0);
 	}
 
 	xbb_sg = xbb->xbb_sgs;
 	bio    = NULL;
 	nseg = reqlist->nr_segments;
 
 	for (seg_idx = 0; seg_idx < nseg; seg_idx++, xbb_sg++) {
 
 		/*
 		 * KVA will not be contiguous, so any additional
 		 * I/O will need to be represented in a new bio.
 		 */
 		if ((bio != NULL)
 		 && (xbb_sg->first_sect != 0)) {
 			if ((bio->bio_length & (xbb->sector_size - 1)) != 0) {
 				printf("%s: Discontiguous I/O request "
 				       "from domain %d ends on "
 				       "non-sector boundary\n",
 				       __func__, xbb->otherend_id);
 				error = EINVAL;
 				goto fail_free_bios;
 			}
 			bio = NULL;
 		}
 
 		if (bio == NULL) {
 			/*
 			 * Make sure that the start of this bio is
 			 * aligned to a device sector.
 			 */
 			if ((bio_offset & (xbb->sector_size - 1)) != 0){
 				printf("%s: Misaligned I/O request "
 				       "from domain %d\n", __func__,
 				       xbb->otherend_id);
 				error = EINVAL;
 				goto fail_free_bios;
 			}
 
 			bio = bios[nbio++] = g_new_bio();
-			if (unlikely(bio == NULL)) {
+			if (__predict_false(bio == NULL)) {
 				error = ENOMEM;
 				goto fail_free_bios;
 			}
 			bio->bio_cmd     = operation;
 			bio->bio_flags  |= bio_flags;
 			bio->bio_dev     = dev_data->cdev;
 			bio->bio_offset  = bio_offset;
 			bio->bio_data    = xbb_reqlist_ioaddr(reqlist, seg_idx,
 						xbb_sg->first_sect);
 			bio->bio_done    = xbb_bio_done;
 			bio->bio_caller1 = reqlist;
 			bio->bio_pblkno  = bio_offset >> xbb->sector_size_shift;
 		}
 
 		bio->bio_length += xbb_sg->nsect << 9;
 		bio->bio_bcount  = bio->bio_length;
 		bio_offset      += xbb_sg->nsect << 9;
 
 		if (xbb_sg->last_sect != (PAGE_SIZE - 512) >> 9) {
 
 			if ((bio->bio_length & (xbb->sector_size - 1)) != 0) {
 				printf("%s: Discontiguous I/O request "
 				       "from domain %d ends on "
 				       "non-sector boundary\n",
 				       __func__, xbb->otherend_id);
 				error = EINVAL;
 				goto fail_free_bios;
 			}
 			/*
 			 * KVA will not be contiguous, so any additional
 			 * I/O will need to be represented in a new bio.
 			 */
 			bio = NULL;
 		}
 	}
 
 	reqlist->pendcnt = nbio;
 
 	for (bio_idx = 0; bio_idx < nbio; bio_idx++)
 	{
 #ifdef XBB_USE_BOUNCE_BUFFERS
 		vm_offset_t kva_offset;
 
 		kva_offset = (vm_offset_t)bios[bio_idx]->bio_data
 			   - (vm_offset_t)reqlist->bounce;
 		if (operation == BIO_WRITE) {
 			memcpy(bios[bio_idx]->bio_data,
 			       (uint8_t *)reqlist->kva + kva_offset,
 			       bios[bio_idx]->bio_bcount);
 		}
 #endif
 		if (operation == BIO_READ) {
 			SDT_PROBE3(xbb, kernel, xbb_dispatch_dev, read,
 				   device_get_unit(xbb->dev),
 				   bios[bio_idx]->bio_offset,
 				   bios[bio_idx]->bio_length);
 		} else if (operation == BIO_WRITE) {
 			SDT_PROBE3(xbb, kernel, xbb_dispatch_dev, write,
 				   device_get_unit(xbb->dev),
 				   bios[bio_idx]->bio_offset,
 				   bios[bio_idx]->bio_length);
 		}
 		(*dev_data->csw->d_strategy)(bios[bio_idx]);
 	}
 
 	return (error);
 
 fail_free_bios:
 	for (bio_idx = 0; bio_idx < (nbio-1); bio_idx++)
 		g_destroy_bio(bios[bio_idx]);
 	
 	return (error);
 }
 
 SDT_PROBE_DEFINE1(xbb, kernel, xbb_dispatch_file, flush, flush, "int");
 SDT_PROBE_DEFINE3(xbb, kernel, xbb_dispatch_file, read, read, "int", "uint64_t",
 		  "uint64_t");
 SDT_PROBE_DEFINE3(xbb, kernel, xbb_dispatch_file, write, write, "int",
 		  "uint64_t", "uint64_t");
 
 /**
  * Backend handler for file access.
  *
  * \param xbb        Per-instance xbb configuration structure.
  * \param reqlist    Allocated internal request list.
  * \param operation  BIO_* I/O operation code.
  * \param flags      Additional bio_flag data to pass to any generated bios
  *                   (e.g. BIO_ORDERED)..
  *
  * \return  0 for success, errno codes for failure.
  */
 static int
 xbb_dispatch_file(struct xbb_softc *xbb, struct xbb_xen_reqlist *reqlist,
 		  int operation, int flags)
 {
 	struct xbb_file_data *file_data;
 	u_int                 seg_idx;
 	u_int		      nseg;
 	off_t		      sectors_sent;
 	struct uio            xuio;
 	struct xbb_sg        *xbb_sg;
 	struct iovec         *xiovec;
 #ifdef XBB_USE_BOUNCE_BUFFERS
 	void                **p_vaddr;
 	int                   saved_uio_iovcnt;
 #endif /* XBB_USE_BOUNCE_BUFFERS */
 	int                   error;
 
 	file_data = &xbb->backend.file;
 	sectors_sent = 0;
 	error = 0;
 	bzero(&xuio, sizeof(xuio));
 
 	switch (operation) {
 	case BIO_READ:
 		xuio.uio_rw = UIO_READ;
 		break;
 	case BIO_WRITE:
 		xuio.uio_rw = UIO_WRITE;
 		break;
 	case BIO_FLUSH: {
 		struct mount *mountpoint;
 
 		SDT_PROBE1(xbb, kernel, xbb_dispatch_file, flush,
 			   device_get_unit(xbb->dev));
 
 		(void) vn_start_write(xbb->vn, &mountpoint, V_WAIT);
 
 		vn_lock(xbb->vn, LK_EXCLUSIVE | LK_RETRY);
 		error = VOP_FSYNC(xbb->vn, MNT_WAIT, curthread);
 		VOP_UNLOCK(xbb->vn, 0);
 
 		vn_finished_write(mountpoint);
 
 		goto bailout_send_response;
 		/* NOTREACHED */
 	}
 	default:
 		panic("invalid operation %d", operation);
 		/* NOTREACHED */
 	}
 	xuio.uio_offset = (vm_offset_t)reqlist->starting_sector_number
 			<< xbb->sector_size_shift;
 	xuio.uio_segflg = UIO_SYSSPACE;
 	xuio.uio_iov = file_data->xiovecs;
 	xuio.uio_iovcnt = 0;
 	xbb_sg = xbb->xbb_sgs;
 	nseg = reqlist->nr_segments;
 
 	for (xiovec = NULL, seg_idx = 0; seg_idx < nseg; seg_idx++, xbb_sg++) {
 
 		/*
 		 * If the first sector is not 0, the KVA will
 		 * not be contiguous and we'll need to go on
 		 * to another segment.
 		 */
 		if (xbb_sg->first_sect != 0)
 			xiovec = NULL;
 
 		if (xiovec == NULL) {
 			xiovec = &file_data->xiovecs[xuio.uio_iovcnt];
 			xiovec->iov_base = xbb_reqlist_ioaddr(reqlist,
 			    seg_idx, xbb_sg->first_sect);
 #ifdef XBB_USE_BOUNCE_BUFFERS
 			/*
 			 * Store the address of the incoming
 			 * buffer at this particular offset
 			 * as well, so we can do the copy
 			 * later without having to do more
 			 * work to recalculate this address.
 		 	 */
 			p_vaddr = &file_data->xiovecs_vaddr[xuio.uio_iovcnt];
 			*p_vaddr = xbb_reqlist_vaddr(reqlist, seg_idx,
 			    xbb_sg->first_sect);
 #endif /* XBB_USE_BOUNCE_BUFFERS */
 			xiovec->iov_len = 0;
 			xuio.uio_iovcnt++;
 		}
 
 		xiovec->iov_len += xbb_sg->nsect << 9;
 
 		xuio.uio_resid += xbb_sg->nsect << 9;
 
 		/*
 		 * If the last sector is not the full page
 		 * size count, the next segment will not be
 		 * contiguous in KVA and we need a new iovec.
 		 */
 		if (xbb_sg->last_sect != (PAGE_SIZE - 512) >> 9)
 			xiovec = NULL;
 	}
 
 	xuio.uio_td = curthread;
 
 #ifdef XBB_USE_BOUNCE_BUFFERS
 	saved_uio_iovcnt = xuio.uio_iovcnt;
 
 	if (operation == BIO_WRITE) {
 		/* Copy the write data to the local buffer. */
 		for (seg_idx = 0, p_vaddr = file_data->xiovecs_vaddr,
 		     xiovec = xuio.uio_iov; seg_idx < xuio.uio_iovcnt;
 		     seg_idx++, xiovec++, p_vaddr++) {
 
 			memcpy(xiovec->iov_base, *p_vaddr, xiovec->iov_len);
 		}
 	} else {
 		/*
 		 * We only need to save off the iovecs in the case of a
 		 * read, because the copy for the read happens after the
 		 * VOP_READ().  (The uio will get modified in that call
 		 * sequence.)
 		 */
 		memcpy(file_data->saved_xiovecs, xuio.uio_iov,
 		       xuio.uio_iovcnt * sizeof(xuio.uio_iov[0]));
 	}
 #endif /* XBB_USE_BOUNCE_BUFFERS */
 
 	switch (operation) {
 	case BIO_READ:
 
 		SDT_PROBE3(xbb, kernel, xbb_dispatch_file, read,
 			   device_get_unit(xbb->dev), xuio.uio_offset,
 			   xuio.uio_resid);
 
 		vn_lock(xbb->vn, LK_EXCLUSIVE | LK_RETRY);
 
 		/*
 		 * UFS pays attention to IO_DIRECT for reads.  If the
 		 * DIRECTIO option is configured into the kernel, it calls
 		 * ffs_rawread().  But that only works for single-segment
 		 * uios with user space addresses.  In our case, with a
 		 * kernel uio, it still reads into the buffer cache, but it
 		 * will just try to release the buffer from the cache later
 		 * on in ffs_read().
 		 *
 		 * ZFS does not pay attention to IO_DIRECT for reads.
 		 *
 		 * UFS does not pay attention to IO_SYNC for reads.
 		 *
 		 * ZFS pays attention to IO_SYNC (which translates into the
 		 * Solaris define FRSYNC for zfs_read()) for reads.  It
 		 * attempts to sync the file before reading.
 		 *
 		 * So, to attempt to provide some barrier semantics in the
 		 * BIO_ORDERED case, set both IO_DIRECT and IO_SYNC.  
 		 */
 		error = VOP_READ(xbb->vn, &xuio, (flags & BIO_ORDERED) ? 
 				 (IO_DIRECT|IO_SYNC) : 0, file_data->cred);
 
 		VOP_UNLOCK(xbb->vn, 0);
 		break;
 	case BIO_WRITE: {
 		struct mount *mountpoint;
 
 		SDT_PROBE3(xbb, kernel, xbb_dispatch_file, write,
 			   device_get_unit(xbb->dev), xuio.uio_offset,
 			   xuio.uio_resid);
 
 		(void)vn_start_write(xbb->vn, &mountpoint, V_WAIT);
 
 		vn_lock(xbb->vn, LK_EXCLUSIVE | LK_RETRY);
 
 		/*
 		 * UFS pays attention to IO_DIRECT for writes.  The write
 		 * is done asynchronously.  (Normally the write would just
 		 * get put into cache.
 		 *
 		 * UFS pays attention to IO_SYNC for writes.  It will
 		 * attempt to write the buffer out synchronously if that
 		 * flag is set.
 		 *
 		 * ZFS does not pay attention to IO_DIRECT for writes.
 		 *
 		 * ZFS pays attention to IO_SYNC (a.k.a. FSYNC or FRSYNC)
 		 * for writes.  It will flush the transaction from the
 		 * cache before returning.
 		 *
 		 * So if we've got the BIO_ORDERED flag set, we want
 		 * IO_SYNC in either the UFS or ZFS case.
 		 */
 		error = VOP_WRITE(xbb->vn, &xuio, (flags & BIO_ORDERED) ?
 				  IO_SYNC : 0, file_data->cred);
 		VOP_UNLOCK(xbb->vn, 0);
 
 		vn_finished_write(mountpoint);
 
 		break;
 	}
 	default:
 		panic("invalid operation %d", operation);
 		/* NOTREACHED */
 	}
 
 #ifdef XBB_USE_BOUNCE_BUFFERS
 	/* We only need to copy here for read operations */
 	if (operation == BIO_READ) {
 
 		for (seg_idx = 0, p_vaddr = file_data->xiovecs_vaddr,
 		     xiovec = file_data->saved_xiovecs;
 		     seg_idx < saved_uio_iovcnt; seg_idx++,
 		     xiovec++, p_vaddr++) {
 
 			/*
 			 * Note that we have to use the copy of the 
 			 * io vector we made above.  uiomove() modifies
 			 * the uio and its referenced vector as uiomove
 			 * performs the copy, so we can't rely on any
 			 * state from the original uio.
 			 */
 			memcpy(*p_vaddr, xiovec->iov_base, xiovec->iov_len);
 		}
 	}
 #endif /* XBB_USE_BOUNCE_BUFFERS */
 
 bailout_send_response:
 
 	if (error != 0)
 		reqlist->status = BLKIF_RSP_ERROR;
 
 	xbb_complete_reqlist(xbb, reqlist);
 
 	return (0);
 }
 
 /*--------------------------- Backend Configuration --------------------------*/
 /**
  * Close and cleanup any backend device/file specific state for this
  * block back instance. 
  *
  * \param xbb  Per-instance xbb configuration structure.
  */
 static void
 xbb_close_backend(struct xbb_softc *xbb)
 {
 	DROP_GIANT();
 	DPRINTF("closing dev=%s\n", xbb->dev_name);
 	if (xbb->vn) {
 		int flags = FREAD;
 
 		if ((xbb->flags & XBBF_READ_ONLY) == 0)
 			flags |= FWRITE;
 
 		switch (xbb->device_type) {
 		case XBB_TYPE_DISK:
 			if (xbb->backend.dev.csw) {
 				dev_relthread(xbb->backend.dev.cdev,
 					      xbb->backend.dev.dev_ref);
 				xbb->backend.dev.csw  = NULL;
 				xbb->backend.dev.cdev = NULL;
 			}
 			break;
 		case XBB_TYPE_FILE:
 			break;
 		case XBB_TYPE_NONE:
 		default:
 			panic("Unexpected backend type.");
 			break;
 		}
 
 		(void)vn_close(xbb->vn, flags, NOCRED, curthread);
 		xbb->vn = NULL;
 
 		switch (xbb->device_type) {
 		case XBB_TYPE_DISK:
 			break;
 		case XBB_TYPE_FILE:
 			if (xbb->backend.file.cred != NULL) {
 				crfree(xbb->backend.file.cred);
 				xbb->backend.file.cred = NULL;
 			}
 			break;
 		case XBB_TYPE_NONE:
 		default:
 			panic("Unexpected backend type.");
 			break;
 		}
 	}
 	PICKUP_GIANT();
 }
 
 /**
  * Open a character device to be used for backend I/O.
  *
  * \param xbb  Per-instance xbb configuration structure.
  *
  * \return  0 for success, errno codes for failure.
  */
 static int
 xbb_open_dev(struct xbb_softc *xbb)
 {
 	struct vattr   vattr;
 	struct cdev   *dev;
 	struct cdevsw *devsw;
 	int	       error;
 
 	xbb->device_type = XBB_TYPE_DISK;
 	xbb->dispatch_io = xbb_dispatch_dev;
 	xbb->backend.dev.cdev = xbb->vn->v_rdev;
 	xbb->backend.dev.csw = dev_refthread(xbb->backend.dev.cdev,
 					     &xbb->backend.dev.dev_ref);
 	if (xbb->backend.dev.csw == NULL)
 		panic("Unable to retrieve device switch");
 
 	error = VOP_GETATTR(xbb->vn, &vattr, NOCRED);
 	if (error) {
 		xenbus_dev_fatal(xbb->dev, error, "error getting "
 				 "vnode attributes for device %s",
 				 xbb->dev_name);
 		return (error);
 	}
 
 
 	dev = xbb->vn->v_rdev;
 	devsw = dev->si_devsw;
 	if (!devsw->d_ioctl) {
 		xenbus_dev_fatal(xbb->dev, ENODEV, "no d_ioctl for "
 				 "device %s!", xbb->dev_name);
 		return (ENODEV);
 	}
 
 	error = devsw->d_ioctl(dev, DIOCGSECTORSIZE,
 			       (caddr_t)&xbb->sector_size, FREAD,
 			       curthread);
 	if (error) {
 		xenbus_dev_fatal(xbb->dev, error,
 				 "error calling ioctl DIOCGSECTORSIZE "
 				 "for device %s", xbb->dev_name);
 		return (error);
 	}
 
 	error = devsw->d_ioctl(dev, DIOCGMEDIASIZE,
 			       (caddr_t)&xbb->media_size, FREAD,
 			       curthread);
 	if (error) {
 		xenbus_dev_fatal(xbb->dev, error,
 				 "error calling ioctl DIOCGMEDIASIZE "
 				 "for device %s", xbb->dev_name);
 		return (error);
 	}
 
 	return (0);
 }
 
 /**
  * Open a file to be used for backend I/O.
  *
  * \param xbb  Per-instance xbb configuration structure.
  *
  * \return  0 for success, errno codes for failure.
  */
 static int
 xbb_open_file(struct xbb_softc *xbb)
 {
 	struct xbb_file_data *file_data;
 	struct vattr          vattr;
 	int                   error;
 
 	file_data = &xbb->backend.file;
 	xbb->device_type = XBB_TYPE_FILE;
 	xbb->dispatch_io = xbb_dispatch_file;
 	error = VOP_GETATTR(xbb->vn, &vattr, curthread->td_ucred);
 	if (error != 0) {
 		xenbus_dev_fatal(xbb->dev, error,
 				 "error calling VOP_GETATTR()"
 				 "for file %s", xbb->dev_name);
 		return (error);
 	}
 
 	/*
 	 * Verify that we have the ability to upgrade to exclusive
 	 * access on this file so we can trap errors at open instead
 	 * of reporting them during first access.
 	 */
 	if (VOP_ISLOCKED(xbb->vn) != LK_EXCLUSIVE) {
 		vn_lock(xbb->vn, LK_UPGRADE | LK_RETRY);
 		if (xbb->vn->v_iflag & VI_DOOMED) {
 			error = EBADF;
 			xenbus_dev_fatal(xbb->dev, error,
 					 "error locking file %s",
 					 xbb->dev_name);
 
 			return (error);
 		}
 	}
 
 	file_data->cred = crhold(curthread->td_ucred);
 	xbb->media_size = vattr.va_size;
 
 	/*
 	 * XXX KDM vattr.va_blocksize may be larger than 512 bytes here.
 	 * With ZFS, it is 131072 bytes.  Block sizes that large don't work
 	 * with disklabel and UFS on FreeBSD at least.  Large block sizes
 	 * may not work with other OSes as well.  So just export a sector
 	 * size of 512 bytes, which should work with any OS or
 	 * application.  Since our backing is a file, any block size will
 	 * work fine for the backing store.
 	 */
 #if 0
 	xbb->sector_size = vattr.va_blocksize;
 #endif
 	xbb->sector_size = 512;
 
 	/*
 	 * Sanity check.  The media size has to be at least one
 	 * sector long.
 	 */
 	if (xbb->media_size < xbb->sector_size) {
 		error = EINVAL;
 		xenbus_dev_fatal(xbb->dev, error,
 				 "file %s size %ju < block size %u",
 				 xbb->dev_name,
 				 (uintmax_t)xbb->media_size,
 				 xbb->sector_size);
 	}
 	return (error);
 }
 
 /**
  * Open the backend provider for this connection.
  *
  * \param xbb  Per-instance xbb configuration structure.
  *
  * \return  0 for success, errno codes for failure.
  */
 static int
 xbb_open_backend(struct xbb_softc *xbb)
 {
 	struct nameidata nd;
 	int		 flags;
 	int		 error;
 
 	flags = FREAD;
 	error = 0;
 
 	DPRINTF("opening dev=%s\n", xbb->dev_name);
 
 	if (rootvnode == NULL) {
 		xenbus_dev_fatal(xbb->dev, ENOENT,
 				 "Root file system not mounted");
 		return (ENOENT);
 	}
 
 	if ((xbb->flags & XBBF_READ_ONLY) == 0)
 		flags |= FWRITE;
 
 	if (!curthread->td_proc->p_fd->fd_cdir) {
 		curthread->td_proc->p_fd->fd_cdir = rootvnode;
 		VREF(rootvnode);
 	}
 	if (!curthread->td_proc->p_fd->fd_rdir) {
 		curthread->td_proc->p_fd->fd_rdir = rootvnode;
 		VREF(rootvnode);
 	}
 	if (!curthread->td_proc->p_fd->fd_jdir) {
 		curthread->td_proc->p_fd->fd_jdir = rootvnode;
 		VREF(rootvnode);
 	}
 
  again:
 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, xbb->dev_name, curthread);
 	error = vn_open(&nd, &flags, 0, NULL);
 	if (error) {
 		/*
 		 * This is the only reasonable guess we can make as far as
 		 * path if the user doesn't give us a fully qualified path.
 		 * If they want to specify a file, they need to specify the
 		 * full path.
 		 */
 		if (xbb->dev_name[0] != '/') {
 			char *dev_path = "/dev/";
 			char *dev_name;
 
 			/* Try adding device path at beginning of name */
 			dev_name = malloc(strlen(xbb->dev_name)
 					+ strlen(dev_path) + 1,
 					  M_XENBLOCKBACK, M_NOWAIT);
 			if (dev_name) {
 				sprintf(dev_name, "%s%s", dev_path,
 					xbb->dev_name);
 				free(xbb->dev_name, M_XENBLOCKBACK);
 				xbb->dev_name = dev_name;
 				goto again;
 			}
 		}
 		xenbus_dev_fatal(xbb->dev, error, "error opening device %s",
 				 xbb->dev_name);
 		return (error);
 	}
 
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 		
 	xbb->vn = nd.ni_vp;
 
 	/* We only support disks and files. */
 	if (vn_isdisk(xbb->vn, &error)) {
 		error = xbb_open_dev(xbb);
 	} else if (xbb->vn->v_type == VREG) {
 		error = xbb_open_file(xbb);
 	} else {
 		error = EINVAL;
 		xenbus_dev_fatal(xbb->dev, error, "%s is not a disk "
 				 "or file", xbb->dev_name);
 	}
 	VOP_UNLOCK(xbb->vn, 0);
 
 	if (error != 0) {
 		xbb_close_backend(xbb);
 		return (error);
 	}
 
 	xbb->sector_size_shift = fls(xbb->sector_size) - 1;
 	xbb->media_num_sectors = xbb->media_size >> xbb->sector_size_shift;
 
 	DPRINTF("opened %s=%s sector_size=%u media_size=%" PRId64 "\n",
 		(xbb->device_type == XBB_TYPE_DISK) ? "dev" : "file",
 		xbb->dev_name, xbb->sector_size, xbb->media_size);
 
 	return (0);
 }
 
 /*------------------------ Inter-Domain Communication ------------------------*/
 /**
  * Free dynamically allocated KVA or pseudo-physical address allocations.
  *
  * \param xbb  Per-instance xbb configuration structure.
  */
 static void
 xbb_free_communication_mem(struct xbb_softc *xbb)
 {
 	if (xbb->kva != 0) {
 #ifndef XENHVM
 		kva_free(xbb->kva, xbb->kva_size);
 #else
 		if (xbb->pseudo_phys_res != NULL) {
 			bus_release_resource(xbb->dev, SYS_RES_MEMORY,
 					     xbb->pseudo_phys_res_id,
 					     xbb->pseudo_phys_res);
 			xbb->pseudo_phys_res = NULL;
 		}
 #endif
 	}
 	xbb->kva = 0;
 	xbb->gnt_base_addr = 0;
 	if (xbb->kva_free != NULL) {
 		free(xbb->kva_free, M_XENBLOCKBACK);
 		xbb->kva_free = NULL;
 	}
 }
 
 /**
  * Cleanup all inter-domain communication mechanisms.
  *
  * \param xbb  Per-instance xbb configuration structure.
  */
 static int
 xbb_disconnect(struct xbb_softc *xbb)
 {
 	struct gnttab_unmap_grant_ref  ops[XBB_MAX_RING_PAGES];
 	struct gnttab_unmap_grant_ref *op;
 	u_int			       ring_idx;
 	int			       error;
 
 	DPRINTF("\n");
 
 	if ((xbb->flags & XBBF_RING_CONNECTED) == 0)
 		return (0);
 
-	if (xbb->irq != 0) {
-		unbind_from_irqhandler(xbb->irq);
-		xbb->irq = 0;
-	}
+	xen_intr_unbind(&xbb->xen_intr_handle);
 
 	mtx_unlock(&xbb->lock);
 	taskqueue_drain(xbb->io_taskqueue, &xbb->io_task); 
 	mtx_lock(&xbb->lock);
 
 	/*
 	 * No new interrupts can generate work, but we must wait
 	 * for all currently active requests to drain.
 	 */
 	if (xbb->active_request_count != 0)
 		return (EAGAIN);
 	
 	for (ring_idx = 0, op = ops;
 	     ring_idx < xbb->ring_config.ring_pages;
 	     ring_idx++, op++) {
 
 		op->host_addr    = xbb->ring_config.gnt_addr
 			         + (ring_idx * PAGE_SIZE);
 		op->dev_bus_addr = xbb->ring_config.bus_addr[ring_idx];
 		op->handle	 = xbb->ring_config.handle[ring_idx];
 	}
 
 	error = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, ops,
 					  xbb->ring_config.ring_pages);
 	if (error != 0)
 		panic("Grant table op failed (%d)", error);
 
 	xbb_free_communication_mem(xbb);
 
 	if (xbb->requests != NULL) {
 		free(xbb->requests, M_XENBLOCKBACK);
 		xbb->requests = NULL;
 	}
 
 	if (xbb->request_lists != NULL) {
 		struct xbb_xen_reqlist *reqlist;
 		int i;
 
 		/* There is one request list for ever allocated request. */
 		for (i = 0, reqlist = xbb->request_lists;
 		     i < xbb->max_requests; i++, reqlist++){
 #ifdef XBB_USE_BOUNCE_BUFFERS
 			if (reqlist->bounce != NULL) {
 				free(reqlist->bounce, M_XENBLOCKBACK);
 				reqlist->bounce = NULL;
 			}
 #endif
 			if (reqlist->gnt_handles != NULL) {
 				free(reqlist->gnt_handles, M_XENBLOCKBACK);
 				reqlist->gnt_handles = NULL;
 			}
 		}
 		free(xbb->request_lists, M_XENBLOCKBACK);
 		xbb->request_lists = NULL;
 	}
 
 	xbb->flags &= ~XBBF_RING_CONNECTED;
 	return (0);
 }
 
 /**
  * Map shared memory ring into domain local address space, initialize
  * ring control structures, and bind an interrupt to the event channel
  * used to notify us of ring changes.
  *
  * \param xbb  Per-instance xbb configuration structure.
  */
 static int
 xbb_connect_ring(struct xbb_softc *xbb)
 {
 	struct gnttab_map_grant_ref  gnts[XBB_MAX_RING_PAGES];
 	struct gnttab_map_grant_ref *gnt;
 	u_int			     ring_idx;
 	int			     error;
 
 	if ((xbb->flags & XBBF_RING_CONNECTED) != 0)
 		return (0);
 
 	/*
 	 * Kva for our ring is at the tail of the region of kva allocated
 	 * by xbb_alloc_communication_mem().
 	 */
 	xbb->ring_config.va = xbb->kva
 			    + (xbb->kva_size
 			     - (xbb->ring_config.ring_pages * PAGE_SIZE));
 	xbb->ring_config.gnt_addr = xbb->gnt_base_addr
 				  + (xbb->kva_size
 				   - (xbb->ring_config.ring_pages * PAGE_SIZE));
 
 	for (ring_idx = 0, gnt = gnts;
 	     ring_idx < xbb->ring_config.ring_pages;
 	     ring_idx++, gnt++) {
 
 		gnt->host_addr = xbb->ring_config.gnt_addr
 			       + (ring_idx * PAGE_SIZE);
 		gnt->flags     = GNTMAP_host_map;
 		gnt->ref       = xbb->ring_config.ring_ref[ring_idx];
 		gnt->dom       = xbb->otherend_id;
 	}
 
 	error = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, gnts,
 					  xbb->ring_config.ring_pages);
 	if (error)
 		panic("blkback: Ring page grant table op failed (%d)", error);
 
 	for (ring_idx = 0, gnt = gnts;
 	     ring_idx < xbb->ring_config.ring_pages;
 	     ring_idx++, gnt++) {
 		if (gnt->status != 0) {
 			xbb->ring_config.va = 0;
 			xenbus_dev_fatal(xbb->dev, EACCES,
 					 "Ring shared page mapping failed. "
 					 "Status %d.", gnt->status);
 			return (EACCES);
 		}
 		xbb->ring_config.handle[ring_idx]   = gnt->handle;
 		xbb->ring_config.bus_addr[ring_idx] = gnt->dev_bus_addr;
 	}
 
 	/* Initialize the ring based on ABI. */
 	switch (xbb->abi) {
 	case BLKIF_PROTOCOL_NATIVE:
 	{
 		blkif_sring_t *sring;
 		sring = (blkif_sring_t *)xbb->ring_config.va;
 		BACK_RING_INIT(&xbb->rings.native, sring,
 			       xbb->ring_config.ring_pages * PAGE_SIZE);
 		break;
 	}
 	case BLKIF_PROTOCOL_X86_32:
 	{
 		blkif_x86_32_sring_t *sring_x86_32;
 		sring_x86_32 = (blkif_x86_32_sring_t *)xbb->ring_config.va;
 		BACK_RING_INIT(&xbb->rings.x86_32, sring_x86_32,
 			       xbb->ring_config.ring_pages * PAGE_SIZE);
 		break;
 	}
 	case BLKIF_PROTOCOL_X86_64:
 	{
 		blkif_x86_64_sring_t *sring_x86_64;
 		sring_x86_64 = (blkif_x86_64_sring_t *)xbb->ring_config.va;
 		BACK_RING_INIT(&xbb->rings.x86_64, sring_x86_64,
 			       xbb->ring_config.ring_pages * PAGE_SIZE);
 		break;
 	}
 	default:
 		panic("Unexpected blkif protocol ABI.");
 	}
 
 	xbb->flags |= XBBF_RING_CONNECTED;
 
-	error =
-	    bind_interdomain_evtchn_to_irqhandler(xbb->otherend_id,
-						  xbb->ring_config.evtchn,
-						  device_get_nameunit(xbb->dev),
-						  xbb_intr, /*arg*/xbb,
-						  INTR_TYPE_BIO | INTR_MPSAFE,
-						  &xbb->irq);
+	error = xen_intr_bind_remote_port(xbb->dev,
+					  xbb->otherend_id,
+					  xbb->ring_config.evtchn,
+					  xbb_filter,
+					  /*ithread_handler*/NULL,
+					  /*arg*/xbb,
+					  INTR_TYPE_BIO | INTR_MPSAFE,
+					  &xbb->xen_intr_handle);
 	if (error) {
 		(void)xbb_disconnect(xbb);
 		xenbus_dev_fatal(xbb->dev, error, "binding event channel");
 		return (error);
 	}
 
 	DPRINTF("rings connected!\n");
 
 	return 0;
 }
 
 /* Needed to make bit_alloc() macro work */
 #define	calloc(count, size) malloc((count)*(size), M_XENBLOCKBACK,	\
 				   M_NOWAIT|M_ZERO);
 
 /**
  * Size KVA and pseudo-physical address allocations based on negotiated
  * values for the size and number of I/O requests, and the size of our
  * communication ring.
  *
  * \param xbb  Per-instance xbb configuration structure.
  *
  * These address spaces are used to dynamically map pages in the
  * front-end's domain into our own.
  */
 static int
 xbb_alloc_communication_mem(struct xbb_softc *xbb)
 {
 	xbb->reqlist_kva_pages = xbb->max_requests * xbb->max_request_segments;
 	xbb->reqlist_kva_size = xbb->reqlist_kva_pages * PAGE_SIZE;
 	xbb->kva_size = xbb->reqlist_kva_size +
 			(xbb->ring_config.ring_pages * PAGE_SIZE);
 
 	xbb->kva_free = bit_alloc(xbb->reqlist_kva_pages);
 	if (xbb->kva_free == NULL)
 		return (ENOMEM);
 
 	DPRINTF("%s: kva_size = %d, reqlist_kva_size = %d\n",
 		device_get_nameunit(xbb->dev), xbb->kva_size,
 		xbb->reqlist_kva_size);
 #ifndef XENHVM
 	xbb->kva = kva_alloc(xbb->kva_size);
 	if (xbb->kva == 0)
 		return (ENOMEM);
 	xbb->gnt_base_addr = xbb->kva;
 #else /* XENHVM */
 	/*
 	 * Reserve a range of pseudo physical memory that we can map
 	 * into kva.  These pages will only be backed by machine
 	 * pages ("real memory") during the lifetime of front-end requests
 	 * via grant table operations.
 	 */
 	xbb->pseudo_phys_res_id = 0;
 	xbb->pseudo_phys_res = bus_alloc_resource(xbb->dev, SYS_RES_MEMORY,
 						  &xbb->pseudo_phys_res_id,
 						  0, ~0, xbb->kva_size,
 						  RF_ACTIVE);
 	if (xbb->pseudo_phys_res == NULL) {
 		xbb->kva = 0;
 		return (ENOMEM);
 	}
 	xbb->kva = (vm_offset_t)rman_get_virtual(xbb->pseudo_phys_res);
 	xbb->gnt_base_addr = rman_get_start(xbb->pseudo_phys_res);
 #endif /* XENHVM */
 
 	DPRINTF("%s: kva: %#jx, gnt_base_addr: %#jx\n",
 		device_get_nameunit(xbb->dev), (uintmax_t)xbb->kva,
 		(uintmax_t)xbb->gnt_base_addr); 
 	return (0);
 }
 
 /**
  * Collect front-end information from the XenStore.
  *
  * \param xbb  Per-instance xbb configuration structure.
  */
 static int
 xbb_collect_frontend_info(struct xbb_softc *xbb)
 {
 	char	    protocol_abi[64];
 	const char *otherend_path;
 	int	    error;
 	u_int	    ring_idx;
 	u_int	    ring_page_order;
 	size_t	    ring_size;
 
 	otherend_path = xenbus_get_otherend_path(xbb->dev);
 
 	/*
 	 * Protocol defaults valid even if all negotiation fails.
 	 */
 	xbb->ring_config.ring_pages = 1;
 	xbb->max_request_segments   = BLKIF_MAX_SEGMENTS_PER_HEADER_BLOCK;
 	xbb->max_request_size	    = xbb->max_request_segments * PAGE_SIZE;
 
 	/*
 	 * Mandatory data (used in all versions of the protocol) first.
 	 */
 	error = xs_scanf(XST_NIL, otherend_path,
 			 "event-channel", NULL, "%" PRIu32,
 			 &xbb->ring_config.evtchn);
 	if (error != 0) {
 		xenbus_dev_fatal(xbb->dev, error,
 				 "Unable to retrieve event-channel information "
 				 "from frontend %s.  Unable to connect.",
 				 xenbus_get_otherend_path(xbb->dev));
 		return (error);
 	}
 
 	/*
 	 * These fields are initialized to legacy protocol defaults
 	 * so we only need to fail if reading the updated value succeeds
 	 * and the new value is outside of its allowed range.
 	 *
 	 * \note xs_gather() returns on the first encountered error, so
 	 *       we must use independant calls in order to guarantee
 	 *       we don't miss information in a sparsly populated front-end
 	 *       tree.
 	 *
 	 * \note xs_scanf() does not update variables for unmatched
 	 *       fields.
 	 */
 	ring_page_order = 0;
 	(void)xs_scanf(XST_NIL, otherend_path,
 		       "ring-page-order", NULL, "%u",
 		       &ring_page_order);
 	xbb->ring_config.ring_pages = 1 << ring_page_order;
 	(void)xs_scanf(XST_NIL, otherend_path,
 		       "num-ring-pages", NULL, "%u",
 		       &xbb->ring_config.ring_pages);
 	ring_size = PAGE_SIZE * xbb->ring_config.ring_pages;
 	xbb->max_requests = BLKIF_MAX_RING_REQUESTS(ring_size);
 
 	(void)xs_scanf(XST_NIL, otherend_path,
 		       "max-requests", NULL, "%u",
 		       &xbb->max_requests);
 
 	(void)xs_scanf(XST_NIL, otherend_path,
 		       "max-request-segments", NULL, "%u",
 		       &xbb->max_request_segments);
 
 	(void)xs_scanf(XST_NIL, otherend_path,
 		       "max-request-size", NULL, "%u",
 		       &xbb->max_request_size);
 
 	if (xbb->ring_config.ring_pages	> XBB_MAX_RING_PAGES) {
 		xenbus_dev_fatal(xbb->dev, EINVAL,
 				 "Front-end specified ring-pages of %u "
 				 "exceeds backend limit of %zu.  "
 				 "Unable to connect.",
 				 xbb->ring_config.ring_pages,
 				 XBB_MAX_RING_PAGES);
 		return (EINVAL);
 	} else if (xbb->max_requests > XBB_MAX_REQUESTS) {
 		xenbus_dev_fatal(xbb->dev, EINVAL,
 				 "Front-end specified max_requests of %u "
 				 "exceeds backend limit of %u.  "
 				 "Unable to connect.",
 				 xbb->max_requests,
 				 XBB_MAX_REQUESTS);
 		return (EINVAL);
 	} else if (xbb->max_request_segments > XBB_MAX_SEGMENTS_PER_REQUEST) {
 		xenbus_dev_fatal(xbb->dev, EINVAL,
 				 "Front-end specified max_requests_segments "
 				 "of %u exceeds backend limit of %u.  "
 				 "Unable to connect.",
 				 xbb->max_request_segments,
 				 XBB_MAX_SEGMENTS_PER_REQUEST);
 		return (EINVAL);
 	} else if (xbb->max_request_size > XBB_MAX_REQUEST_SIZE) {
 		xenbus_dev_fatal(xbb->dev, EINVAL,
 				 "Front-end specified max_request_size "
 				 "of %u exceeds backend limit of %u.  "
 				 "Unable to connect.",
 				 xbb->max_request_size,
 				 XBB_MAX_REQUEST_SIZE);
 		return (EINVAL);
 	}
 
 	if (xbb->ring_config.ring_pages	== 1) {
 		error = xs_gather(XST_NIL, otherend_path,
 				  "ring-ref", "%" PRIu32,
 				  &xbb->ring_config.ring_ref[0],
 				  NULL);
 		if (error != 0) {
 			xenbus_dev_fatal(xbb->dev, error,
 					 "Unable to retrieve ring information "
 					 "from frontend %s.  Unable to "
 					 "connect.",
 					 xenbus_get_otherend_path(xbb->dev));
 			return (error);
 		}
 	} else {
 		/* Multi-page ring format. */
 		for (ring_idx = 0; ring_idx < xbb->ring_config.ring_pages;
 		     ring_idx++) {
 			char ring_ref_name[]= "ring_refXX";
 
 			snprintf(ring_ref_name, sizeof(ring_ref_name),
 				 "ring-ref%u", ring_idx);
 			error = xs_scanf(XST_NIL, otherend_path,
 					 ring_ref_name, NULL, "%" PRIu32,
 					 &xbb->ring_config.ring_ref[ring_idx]);
 			if (error != 0) {
 				xenbus_dev_fatal(xbb->dev, error,
 						 "Failed to retriev grant "
 						 "reference for page %u of "
 						 "shared ring.  Unable "
 						 "to connect.", ring_idx);
 				return (error);
 			}
 		}
 	}
 
 	error = xs_gather(XST_NIL, otherend_path,
 			  "protocol", "%63s", protocol_abi,
 			  NULL); 
 	if (error != 0
 	 || !strcmp(protocol_abi, XEN_IO_PROTO_ABI_NATIVE)) {
 		/*
 		 * Assume native if the frontend has not
 		 * published ABI data or it has published and
 		 * matches our own ABI.
 		 */
 		xbb->abi = BLKIF_PROTOCOL_NATIVE;
 	} else if (!strcmp(protocol_abi, XEN_IO_PROTO_ABI_X86_32)) {
 
 		xbb->abi = BLKIF_PROTOCOL_X86_32;
 	} else if (!strcmp(protocol_abi, XEN_IO_PROTO_ABI_X86_64)) {
 
 		xbb->abi = BLKIF_PROTOCOL_X86_64;
 	} else {
 
 		xenbus_dev_fatal(xbb->dev, EINVAL,
 				 "Unknown protocol ABI (%s) published by "
 				 "frontend.  Unable to connect.", protocol_abi);
 		return (EINVAL);
 	}
 	return (0);
 }
 
 /**
  * Allocate per-request data structures given request size and number
  * information negotiated with the front-end.
  *
  * \param xbb  Per-instance xbb configuration structure.
  */
 static int
 xbb_alloc_requests(struct xbb_softc *xbb)
 {
 	struct xbb_xen_req *req;
 	struct xbb_xen_req *last_req;
 
 	/*
 	 * Allocate request book keeping datastructures.
 	 */
 	xbb->requests = malloc(xbb->max_requests * sizeof(*xbb->requests),
 			       M_XENBLOCKBACK, M_NOWAIT|M_ZERO);
 	if (xbb->requests == NULL) {
 		xenbus_dev_fatal(xbb->dev, ENOMEM, 
 				  "Unable to allocate request structures");
 		return (ENOMEM);
 	}
 
 	req      = xbb->requests;
 	last_req = &xbb->requests[xbb->max_requests - 1];
 	STAILQ_INIT(&xbb->request_free_stailq);
 	while (req <= last_req) {
 		STAILQ_INSERT_TAIL(&xbb->request_free_stailq, req, links);
 		req++;
 	}
 	return (0);
 }
 
 static int
 xbb_alloc_request_lists(struct xbb_softc *xbb)
 {
 	struct xbb_xen_reqlist *reqlist;
 	int			i;
 
 	/*
 	 * If no requests can be merged, we need 1 request list per
 	 * in flight request.
 	 */
 	xbb->request_lists = malloc(xbb->max_requests *
 		sizeof(*xbb->request_lists), M_XENBLOCKBACK, M_NOWAIT|M_ZERO);
 	if (xbb->request_lists == NULL) {
 		xenbus_dev_fatal(xbb->dev, ENOMEM, 
 				  "Unable to allocate request list structures");
 		return (ENOMEM);
 	}
 
 	STAILQ_INIT(&xbb->reqlist_free_stailq);
 	STAILQ_INIT(&xbb->reqlist_pending_stailq);
 	for (i = 0; i < xbb->max_requests; i++) {
 		int seg;
 
 		reqlist      = &xbb->request_lists[i];
 
 		reqlist->xbb = xbb;
 
 #ifdef XBB_USE_BOUNCE_BUFFERS
 		reqlist->bounce = malloc(xbb->max_reqlist_size,
 					 M_XENBLOCKBACK, M_NOWAIT);
 		if (reqlist->bounce == NULL) {
 			xenbus_dev_fatal(xbb->dev, ENOMEM, 
 					 "Unable to allocate request "
 					 "bounce buffers");
 			return (ENOMEM);
 		}
 #endif /* XBB_USE_BOUNCE_BUFFERS */
 
 		reqlist->gnt_handles = malloc(xbb->max_reqlist_segments *
 					      sizeof(*reqlist->gnt_handles),
 					      M_XENBLOCKBACK, M_NOWAIT|M_ZERO);
 		if (reqlist->gnt_handles == NULL) {
 			xenbus_dev_fatal(xbb->dev, ENOMEM,
 					  "Unable to allocate request "
 					  "grant references");
 			return (ENOMEM);
 		}
 
 		for (seg = 0; seg < xbb->max_reqlist_segments; seg++)
 			reqlist->gnt_handles[seg] = GRANT_REF_INVALID;
 
 		STAILQ_INSERT_TAIL(&xbb->reqlist_free_stailq, reqlist, links);
 	}
 	return (0);
 }
 
 /**
  * Supply information about the physical device to the frontend
  * via XenBus.
  *
  * \param xbb  Per-instance xbb configuration structure.
  */
 static int
 xbb_publish_backend_info(struct xbb_softc *xbb)
 {
 	struct xs_transaction xst;
 	const char	     *our_path;
 	const char	     *leaf;
 	int		      error;
 
 	our_path = xenbus_get_node(xbb->dev);
 	while (1) {
 		error = xs_transaction_start(&xst);
 		if (error != 0) {
 			xenbus_dev_fatal(xbb->dev, error,
 					 "Error publishing backend info "
 					 "(start transaction)");
 			return (error);
 		}
 
 		leaf = "sectors";
 		error = xs_printf(xst, our_path, leaf,
 				  "%"PRIu64, xbb->media_num_sectors);
 		if (error != 0)
 			break;
 
 		/* XXX Support all VBD attributes here. */
 		leaf = "info";
 		error = xs_printf(xst, our_path, leaf, "%u",
 				  xbb->flags & XBBF_READ_ONLY
 				? VDISK_READONLY : 0);
 		if (error != 0)
 			break;
 
 		leaf = "sector-size";
 		error = xs_printf(xst, our_path, leaf, "%u",
 				  xbb->sector_size);
 		if (error != 0)
 			break;
 
 		error = xs_transaction_end(xst, 0);
 		if (error == 0) {
 			return (0);
 		} else if (error != EAGAIN) {
 			xenbus_dev_fatal(xbb->dev, error, "ending transaction");
 			return (error);
 		}
 	}
 
 	xenbus_dev_fatal(xbb->dev, error, "writing %s/%s",
 			our_path, leaf);
 	xs_transaction_end(xst, 1);
 	return (error);
 }
 
 /**
  * Connect to our blkfront peer now that it has completed publishing
  * its configuration into the XenStore.
  *
  * \param xbb  Per-instance xbb configuration structure.
  */
 static void
 xbb_connect(struct xbb_softc *xbb)
 {
 	int error;
 
 	if (xenbus_get_state(xbb->dev) == XenbusStateConnected)
 		return;
 
 	if (xbb_collect_frontend_info(xbb) != 0)
 		return;
 
 	xbb->flags &= ~XBBF_SHUTDOWN;
 
 	/*
 	 * We limit the maximum number of reqlist segments to the maximum
 	 * number of segments in the ring, or our absolute maximum,
 	 * whichever is smaller.
 	 */
 	xbb->max_reqlist_segments = MIN(xbb->max_request_segments *
 		xbb->max_requests, XBB_MAX_SEGMENTS_PER_REQLIST);
 
 	/*
 	 * The maximum size is simply a function of the number of segments
 	 * we can handle.
 	 */
 	xbb->max_reqlist_size = xbb->max_reqlist_segments * PAGE_SIZE;
 
 	/* Allocate resources whose size depends on front-end configuration. */
 	error = xbb_alloc_communication_mem(xbb);
 	if (error != 0) {
 		xenbus_dev_fatal(xbb->dev, error,
 				 "Unable to allocate communication memory");
 		return;
 	}
 
 	error = xbb_alloc_requests(xbb);
 	if (error != 0) {
 		/* Specific errors are reported by xbb_alloc_requests(). */
 		return;
 	}
 
 	error = xbb_alloc_request_lists(xbb);
 	if (error != 0) {
 		/* Specific errors are reported by xbb_alloc_request_lists(). */
 		return;
 	}
 
 	/*
 	 * Connect communication channel.
 	 */
 	error = xbb_connect_ring(xbb);
 	if (error != 0) {
 		/* Specific errors are reported by xbb_connect_ring(). */
 		return;
 	}
 	
 	if (xbb_publish_backend_info(xbb) != 0) {
 		/*
 		 * If we can't publish our data, we cannot participate
 		 * in this connection, and waiting for a front-end state
 		 * change will not help the situation.
 		 */
 		(void)xbb_disconnect(xbb);
 		return;
 	}
 
 	/* Ready for I/O. */
 	xenbus_set_state(xbb->dev, XenbusStateConnected);
 }
 
 /*-------------------------- Device Teardown Support -------------------------*/
 /**
  * Perform device shutdown functions.
  *
  * \param xbb  Per-instance xbb configuration structure.
  *
  * Mark this instance as shutting down, wait for any active I/O on the
  * backend device/file to drain, disconnect from the front-end, and notify
  * any waiters (e.g. a thread invoking our detach method) that detach can
  * now proceed.
  */
 static int
 xbb_shutdown(struct xbb_softc *xbb)
 {
 	XenbusState frontState;
 	int	    error;
 
 	DPRINTF("\n");
 
 	/*
 	 * Due to the need to drop our mutex during some
 	 * xenbus operations, it is possible for two threads
 	 * to attempt to close out shutdown processing at
 	 * the same time.  Tell the caller that hits this
 	 * race to try back later. 
 	 */
 	if ((xbb->flags & XBBF_IN_SHUTDOWN) != 0)
 		return (EAGAIN);
 
 	xbb->flags |= XBBF_IN_SHUTDOWN;
 	mtx_unlock(&xbb->lock);
 
 	if (xenbus_get_state(xbb->dev) < XenbusStateClosing)
 		xenbus_set_state(xbb->dev, XenbusStateClosing);
 
 	frontState = xenbus_get_otherend_state(xbb->dev);
 	mtx_lock(&xbb->lock);
 	xbb->flags &= ~XBBF_IN_SHUTDOWN;
 
 	/* The front can submit I/O until entering the closed state. */
 	if (frontState < XenbusStateClosed)
 		return (EAGAIN);
 
 	DPRINTF("\n");
 
 	/* Indicate shutdown is in progress. */
 	xbb->flags |= XBBF_SHUTDOWN;
 
 	/* Disconnect from the front-end. */
 	error = xbb_disconnect(xbb);
 	if (error != 0) {
 		/*
 		 * Requests still outstanding.  We'll be called again
 		 * once they complete.
 		 */
 		KASSERT(error == EAGAIN,
 			("%s: Unexpected xbb_disconnect() failure %d",
 			 __func__, error));
 
 		return (error);
 	}
 
 	DPRINTF("\n");
 
 	/* Indicate to xbb_detach() that is it safe to proceed. */
 	wakeup(xbb);
 
 	return (0);
 }
 
 /**
  * Report an attach time error to the console and Xen, and cleanup
  * this instance by forcing immediate detach processing.
  *
  * \param xbb  Per-instance xbb configuration structure.
  * \param err  Errno describing the error.
  * \param fmt  Printf style format and arguments
  */
 static void
 xbb_attach_failed(struct xbb_softc *xbb, int err, const char *fmt, ...)
 {
 	va_list ap;
 	va_list ap_hotplug;
 
 	va_start(ap, fmt);
 	va_copy(ap_hotplug, ap);
 	xs_vprintf(XST_NIL, xenbus_get_node(xbb->dev),
 		  "hotplug-error", fmt, ap_hotplug);
 	va_end(ap_hotplug);
 	xs_printf(XST_NIL, xenbus_get_node(xbb->dev),
 		  "hotplug-status", "error");
 
 	xenbus_dev_vfatal(xbb->dev, err, fmt, ap);
 	va_end(ap);
 
 	xs_printf(XST_NIL, xenbus_get_node(xbb->dev),
 		  "online", "0");
 	xbb_detach(xbb->dev);
 }
 
 /*---------------------------- NewBus Entrypoints ----------------------------*/
 /**
  * Inspect a XenBus device and claim it if is of the appropriate type.
  * 
  * \param dev  NewBus device object representing a candidate XenBus device.
  *
  * \return  0 for success, errno codes for failure.
  */
 static int
 xbb_probe(device_t dev)
 {
  
         if (!strcmp(xenbus_get_type(dev), "vbd")) {
                 device_set_desc(dev, "Backend Virtual Block Device");
                 device_quiet(dev);
                 return (0);
         }
 
         return (ENXIO);
 }
 
 /**
  * Setup sysctl variables to control various Block Back parameters.
  *
  * \param xbb  Xen Block Back softc.
  *
  */
 static void
 xbb_setup_sysctl(struct xbb_softc *xbb)
 {
 	struct sysctl_ctx_list *sysctl_ctx = NULL;
 	struct sysctl_oid      *sysctl_tree = NULL;
 	
 	sysctl_ctx = device_get_sysctl_ctx(xbb->dev);
 	if (sysctl_ctx == NULL)
 		return;
 
 	sysctl_tree = device_get_sysctl_tree(xbb->dev);
 	if (sysctl_tree == NULL)
 		return;
 
 	SYSCTL_ADD_INT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO,
 		       "disable_flush", CTLFLAG_RW, &xbb->disable_flush, 0,
 		       "fake the flush command");
 
 	SYSCTL_ADD_INT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO,
 		       "flush_interval", CTLFLAG_RW, &xbb->flush_interval, 0,
 		       "send a real flush for N flush requests");
 
 	SYSCTL_ADD_INT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO,
 		       "no_coalesce_reqs", CTLFLAG_RW, &xbb->no_coalesce_reqs,0,
 		       "Don't coalesce contiguous requests");
 
 	SYSCTL_ADD_UQUAD(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO,
 			 "reqs_received", CTLFLAG_RW, &xbb->reqs_received,
 			 "how many I/O requests we have received");
 
 	SYSCTL_ADD_UQUAD(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO,
 			 "reqs_completed", CTLFLAG_RW, &xbb->reqs_completed,
 			 "how many I/O requests have been completed");
 
 	SYSCTL_ADD_UQUAD(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO,
 			 "forced_dispatch", CTLFLAG_RW, &xbb->forced_dispatch,
 			 "how many I/O dispatches were forced");
 
 	SYSCTL_ADD_UQUAD(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO,
 			 "normal_dispatch", CTLFLAG_RW, &xbb->normal_dispatch,
 			 "how many I/O dispatches were normal");
 
 	SYSCTL_ADD_UQUAD(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO,
 			 "total_dispatch", CTLFLAG_RW, &xbb->total_dispatch,
 			 "total number of I/O dispatches");
 
 	SYSCTL_ADD_UQUAD(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO,
 			 "kva_shortages", CTLFLAG_RW, &xbb->kva_shortages,
 			 "how many times we have run out of KVA");
 
 	SYSCTL_ADD_UQUAD(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO,
 			 "request_shortages", CTLFLAG_RW,
 			 &xbb->request_shortages,
 			 "how many times we have run out of requests");
 
 	SYSCTL_ADD_UINT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO,
 		        "max_requests", CTLFLAG_RD, &xbb->max_requests, 0,
 		        "maximum outstanding requests (negotiated)");
 
 	SYSCTL_ADD_UINT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO,
 		        "max_request_segments", CTLFLAG_RD,
 		        &xbb->max_request_segments, 0,
 		        "maximum number of pages per requests (negotiated)");
 
 	SYSCTL_ADD_UINT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO,
 		        "max_request_size", CTLFLAG_RD,
 		        &xbb->max_request_size, 0,
 		        "maximum size in bytes of a request (negotiated)");
 
 	SYSCTL_ADD_UINT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO,
 		        "ring_pages", CTLFLAG_RD,
 		        &xbb->ring_config.ring_pages, 0,
 		        "communication channel pages (negotiated)");
 }
 
 /**
  * Attach to a XenBus device that has been claimed by our probe routine.
  *
  * \param dev  NewBus device object representing this Xen Block Back instance.
  *
  * \return  0 for success, errno codes for failure.
  */
 static int
 xbb_attach(device_t dev)
 {
 	struct xbb_softc	*xbb;
 	int			 error;
 	u_int			 max_ring_page_order;
 
 	DPRINTF("Attaching to %s\n", xenbus_get_node(dev));
 
 	/*
 	 * Basic initialization.
 	 * After this block it is safe to call xbb_detach()
 	 * to clean up any allocated data for this instance.
 	 */
 	xbb = device_get_softc(dev);
 	xbb->dev = dev;
 	xbb->otherend_id = xenbus_get_otherend_id(dev);
 	TASK_INIT(&xbb->io_task, /*priority*/0, xbb_run_queue, xbb);
 	mtx_init(&xbb->lock, device_get_nameunit(dev), NULL, MTX_DEF);
 
 	/*
 	 * Publish protocol capabilities for consumption by the
 	 * front-end.
 	 */
 	error = xs_printf(XST_NIL, xenbus_get_node(xbb->dev),
 			  "feature-barrier", "1");
 	if (error) {
 		xbb_attach_failed(xbb, error, "writing %s/feature-barrier",
 				  xenbus_get_node(xbb->dev));
 		return (error);
 	}
 
 	error = xs_printf(XST_NIL, xenbus_get_node(xbb->dev),
 			  "feature-flush-cache", "1");
 	if (error) {
 		xbb_attach_failed(xbb, error, "writing %s/feature-flush-cache",
 				  xenbus_get_node(xbb->dev));
 		return (error);
 	}
 
 	/*
 	 * Amazon EC2 client compatility.  They refer to max-ring-pages
 	 * instead of to max-ring-page-order.
 	 */
 	error = xs_printf(XST_NIL, xenbus_get_node(xbb->dev),
 			  "max-ring-pages", "%zu", XBB_MAX_RING_PAGES);
 	if (error) {
 		xbb_attach_failed(xbb, error, "writing %s/max-ring-pages",
 				  xenbus_get_node(xbb->dev));
 		return (error);
 	}
 
 	max_ring_page_order = flsl(XBB_MAX_RING_PAGES) - 1;
 	error = xs_printf(XST_NIL, xenbus_get_node(xbb->dev),
 			  "max-ring-page-order", "%u", max_ring_page_order);
 	if (error) {
 		xbb_attach_failed(xbb, error, "writing %s/max-ring-page-order",
 				  xenbus_get_node(xbb->dev));
 		return (error);
 	}
 
 	error = xs_printf(XST_NIL, xenbus_get_node(xbb->dev),
 			  "max-requests", "%u", XBB_MAX_REQUESTS);
 	if (error) {
 		xbb_attach_failed(xbb, error, "writing %s/max-requests",
 				  xenbus_get_node(xbb->dev));
 		return (error);
 	}
 
 	error = xs_printf(XST_NIL, xenbus_get_node(xbb->dev),
 			  "max-request-segments", "%u",
 			  XBB_MAX_SEGMENTS_PER_REQUEST);
 	if (error) {
 		xbb_attach_failed(xbb, error, "writing %s/max-request-segments",
 				  xenbus_get_node(xbb->dev));
 		return (error);
 	}
 
 	error = xs_printf(XST_NIL, xenbus_get_node(xbb->dev),
 			  "max-request-size", "%u",
 			  XBB_MAX_REQUEST_SIZE);
 	if (error) {
 		xbb_attach_failed(xbb, error, "writing %s/max-request-size",
 				  xenbus_get_node(xbb->dev));
 		return (error);
 	}
 
 	/* Collect physical device information. */
 	error = xs_gather(XST_NIL, xenbus_get_otherend_path(xbb->dev),
 			  "device-type", NULL, &xbb->dev_type,
 			  NULL);
 	if (error != 0)
 		xbb->dev_type = NULL;
 
 	error = xs_gather(XST_NIL, xenbus_get_node(dev),
                           "mode", NULL, &xbb->dev_mode,
 			  "params", NULL, &xbb->dev_name,
                           NULL);
 	if (error != 0) {
 		xbb_attach_failed(xbb, error, "reading backend fields at %s",
 				  xenbus_get_node(dev));
                 return (ENXIO);
         }
 
 	/* Parse fopen style mode flags. */
 	if (strchr(xbb->dev_mode, 'w') == NULL)
 		xbb->flags |= XBBF_READ_ONLY;
 
 	/*
 	 * Verify the physical device is present and can support
 	 * the desired I/O mode.
 	 */
 	DROP_GIANT();
 	error = xbb_open_backend(xbb);
 	PICKUP_GIANT();
 	if (error != 0) {
 		xbb_attach_failed(xbb, error, "Unable to open %s",
 				  xbb->dev_name);
 		return (ENXIO);
 	}
 
 	/* Use devstat(9) for recording statistics. */
 	xbb->xbb_stats = devstat_new_entry("xbb", device_get_unit(xbb->dev),
 					   xbb->sector_size,
 					   DEVSTAT_ALL_SUPPORTED,
 					   DEVSTAT_TYPE_DIRECT
 					 | DEVSTAT_TYPE_IF_OTHER,
 					   DEVSTAT_PRIORITY_OTHER);
 
 	xbb->xbb_stats_in = devstat_new_entry("xbbi", device_get_unit(xbb->dev),
 					      xbb->sector_size,
 					      DEVSTAT_ALL_SUPPORTED,
 					      DEVSTAT_TYPE_DIRECT
 					    | DEVSTAT_TYPE_IF_OTHER,
 					      DEVSTAT_PRIORITY_OTHER);
 	/*
 	 * Setup sysctl variables.
 	 */
 	xbb_setup_sysctl(xbb);
 
 	/*
 	 * Create a taskqueue for doing work that must occur from a
 	 * thread context.
 	 */
-	xbb->io_taskqueue = taskqueue_create(device_get_nameunit(dev), M_NOWAIT,
-					     taskqueue_thread_enqueue,
-					     /*context*/&xbb->io_taskqueue);
+	xbb->io_taskqueue = taskqueue_create_fast(device_get_nameunit(dev),
+						  M_NOWAIT,
+						  taskqueue_thread_enqueue,
+						  /*contxt*/&xbb->io_taskqueue);
 	if (xbb->io_taskqueue == NULL) {
 		xbb_attach_failed(xbb, error, "Unable to create taskqueue");
 		return (ENOMEM);
 	}
 
 	taskqueue_start_threads(&xbb->io_taskqueue,
 				/*num threads*/1,
 				/*priority*/PWAIT,
 				/*thread name*/
 				"%s taskq", device_get_nameunit(dev));
 
 	/* Update hot-plug status to satisfy xend. */
 	error = xs_printf(XST_NIL, xenbus_get_node(xbb->dev),
 			  "hotplug-status", "connected");
 	if (error) {
 		xbb_attach_failed(xbb, error, "writing %s/hotplug-status",
 				  xenbus_get_node(xbb->dev));
 		return (error);
 	}
 
 	/* Tell the front end that we are ready to connect. */
 	xenbus_set_state(dev, XenbusStateInitWait);
 
 	return (0);
 }
 
 /**
  * Detach from a block back device instance.
  *
  * \param dev  NewBus device object representing this Xen Block Back instance.
  *
  * \return  0 for success, errno codes for failure.
  * 
  * \note A block back device may be detached at any time in its life-cycle,
  *       including part way through the attach process.  For this reason,
  *       initialization order and the intialization state checks in this
  *       routine must be carefully coupled so that attach time failures
  *       are gracefully handled.
  */
 static int
 xbb_detach(device_t dev)
 {
         struct xbb_softc *xbb;
 
 	DPRINTF("\n");
 
         xbb = device_get_softc(dev);
 	mtx_lock(&xbb->lock);
 	while (xbb_shutdown(xbb) == EAGAIN) {
 		msleep(xbb, &xbb->lock, /*wakeup prio unchanged*/0,
 		       "xbb_shutdown", 0);
 	}
 	mtx_unlock(&xbb->lock);
 
 	DPRINTF("\n");
 
 	if (xbb->io_taskqueue != NULL)
 		taskqueue_free(xbb->io_taskqueue);
 
 	if (xbb->xbb_stats != NULL)
 		devstat_remove_entry(xbb->xbb_stats);
 
 	if (xbb->xbb_stats_in != NULL)
 		devstat_remove_entry(xbb->xbb_stats_in);
 
 	xbb_close_backend(xbb);
 
 	if (xbb->dev_mode != NULL) {
 		free(xbb->dev_mode, M_XENBUS);
 		xbb->dev_mode = NULL;
 	}
 
 	if (xbb->dev_type != NULL) {
 		free(xbb->dev_type, M_XENBUS);
 		xbb->dev_type = NULL;
 	}
 
 	if (xbb->dev_name != NULL) {
 		free(xbb->dev_name, M_XENBUS);
 		xbb->dev_name = NULL;
 	}
 
 	mtx_destroy(&xbb->lock);
         return (0);
 }
 
 /**
  * Prepare this block back device for suspension of this VM.
  * 
  * \param dev  NewBus device object representing this Xen Block Back instance.
  *
  * \return  0 for success, errno codes for failure.
  */
 static int
 xbb_suspend(device_t dev)
 {
 #ifdef NOT_YET
         struct xbb_softc *sc = device_get_softc(dev);
 
         /* Prevent new requests being issued until we fix things up. */
         mtx_lock(&sc->xb_io_lock);
         sc->connected = BLKIF_STATE_SUSPENDED;
         mtx_unlock(&sc->xb_io_lock);
 #endif
 
         return (0);
 }
 
 /**
  * Perform any processing required to recover from a suspended state.
  * 
  * \param dev  NewBus device object representing this Xen Block Back instance.
  *
  * \return  0 for success, errno codes for failure.
  */
 static int
 xbb_resume(device_t dev)
 {
 	return (0);
 }
 
 /**
  * Handle state changes expressed via the XenStore by our front-end peer.
  *
  * \param dev             NewBus device object representing this Xen
  *                        Block Back instance.
  * \param frontend_state  The new state of the front-end.
  *
  * \return  0 for success, errno codes for failure.
  */
 static void
 xbb_frontend_changed(device_t dev, XenbusState frontend_state)
 {
 	struct xbb_softc *xbb = device_get_softc(dev);
 
 	DPRINTF("frontend_state=%s, xbb_state=%s\n",
 	        xenbus_strstate(frontend_state),
 		xenbus_strstate(xenbus_get_state(xbb->dev)));
 
 	switch (frontend_state) {
 	case XenbusStateInitialising:
 		break;
 	case XenbusStateInitialised:
 	case XenbusStateConnected:
 		xbb_connect(xbb);
 		break;
 	case XenbusStateClosing:
 	case XenbusStateClosed:
 		mtx_lock(&xbb->lock);
 		xbb_shutdown(xbb);
 		mtx_unlock(&xbb->lock);
 		if (frontend_state == XenbusStateClosed)
 			xenbus_set_state(xbb->dev, XenbusStateClosed);
 		break;
 	default:
 		xenbus_dev_fatal(xbb->dev, EINVAL, "saw state %d at frontend",
 				 frontend_state);
 		break;
 	}
 }
 
 /*---------------------------- NewBus Registration ---------------------------*/
 static device_method_t xbb_methods[] = {
 	/* Device interface */
 	DEVMETHOD(device_probe,		xbb_probe),
 	DEVMETHOD(device_attach,	xbb_attach),
 	DEVMETHOD(device_detach,	xbb_detach),
 	DEVMETHOD(device_shutdown,	bus_generic_shutdown),
 	DEVMETHOD(device_suspend,	xbb_suspend),
 	DEVMETHOD(device_resume,	xbb_resume),
 
 	/* Xenbus interface */
 	DEVMETHOD(xenbus_otherend_changed, xbb_frontend_changed),
 
 	{ 0, 0 }
 };
 
 static driver_t xbb_driver = {
         "xbbd",
         xbb_methods,
         sizeof(struct xbb_softc),
 };
 devclass_t xbb_devclass;
 
 DRIVER_MODULE(xbbd, xenbusb_back, xbb_driver, xbb_devclass, 0, 0);
diff --git a/sys/dev/xen/blkfront/blkfront.c b/sys/dev/xen/blkfront/blkfront.c
index 969c3d8caf1a..7d70f42ce1cb 100644
--- a/sys/dev/xen/blkfront/blkfront.c
+++ b/sys/dev/xen/blkfront/blkfront.c
@@ -1,1560 +1,1555 @@
 /*
  * XenBSD block device driver
  *
  * Copyright (c) 2010-2013 Spectra Logic Corporation
  * Copyright (c) 2009 Scott Long, Yahoo!
  * Copyright (c) 2009 Frank Suchomel, Citrix
  * Copyright (c) 2009 Doug F. Rabson, Citrix
  * Copyright (c) 2005 Kip Macy
  * Copyright (c) 2003-2004, Keir Fraser & Steve Hand
  * Modifications by Mark A. Williamson are (c) Intel Research Cambridge
  *
  *
  * Permission is hereby granted, free of charge, to any person obtaining a copy
  * of this software and associated documentation files (the "Software"), to
  * deal in the Software without restriction, including without limitation the
  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
  * sell copies of the Software, and to permit persons to whom the Software is
  * furnished to do so, subject to the following conditions:
  *
  * The above copyright notice and this permission notice shall be included in
  * all copies or substantial portions of the Software.
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  * DEALINGS IN THE SOFTWARE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/malloc.h>
 #include <sys/kernel.h>
 #include <vm/vm.h>
 #include <vm/pmap.h>
 
 #include <sys/bio.h>
 #include <sys/bus.h>
 #include <sys/conf.h>
 #include <sys/module.h>
 #include <sys/sysctl.h>
 
 #include <machine/bus.h>
 #include <sys/rman.h>
 #include <machine/resource.h>
 #include <machine/intr_machdep.h>
 #include <machine/vmparam.h>
 #include <sys/bus_dma.h>
 
-#include <machine/_inttypes.h>
-#include <machine/xen/xen-os.h>
-#include <machine/xen/xenvar.h>
-#include <machine/xen/xenfunc.h>
-
+#include <xen/xen-os.h>
 #include <xen/hypervisor.h>
 #include <xen/xen_intr.h>
-#include <xen/evtchn.h>
 #include <xen/gnttab.h>
 #include <xen/interface/grant_table.h>
 #include <xen/interface/io/protocols.h>
 #include <xen/xenbus/xenbusvar.h>
 
+#include <machine/_inttypes.h>
+#include <machine/xen/xenvar.h>
+
 #include <geom/geom_disk.h>
 
 #include <dev/xen/blkfront/block.h>
 
 #include "xenbus_if.h"
 
 /*--------------------------- Forward Declarations ---------------------------*/
 static void xbd_closing(device_t);
 static void xbd_startio(struct xbd_softc *sc);
 
 /*---------------------------------- Macros ----------------------------------*/
 #if 0
 #define DPRINTK(fmt, args...) printf("[XEN] %s:%d: " fmt ".\n", __func__, __LINE__, ##args)
 #else
 #define DPRINTK(fmt, args...) 
 #endif
 
 #define XBD_SECTOR_SHFT		9
 
 /*---------------------------- Global Static Data ----------------------------*/
 static MALLOC_DEFINE(M_XENBLOCKFRONT, "xbd", "Xen Block Front driver data");
 
 /*---------------------------- Command Processing ----------------------------*/
 static void
 xbd_freeze(struct xbd_softc *sc, xbd_flag_t xbd_flag)
 {
 	if (xbd_flag != XBDF_NONE && (sc->xbd_flags & xbd_flag) != 0)
 		return;
 
 	sc->xbd_flags |= xbd_flag;
 	sc->xbd_qfrozen_cnt++;
 }
 
 static void
 xbd_thaw(struct xbd_softc *sc, xbd_flag_t xbd_flag)
 {
 	if (xbd_flag != XBDF_NONE && (sc->xbd_flags & xbd_flag) == 0)
 		return;
 
 	if (sc->xbd_qfrozen_cnt == 0)
 		panic("%s: Thaw with flag 0x%x while not frozen.",
 		    __func__, xbd_flag);
 
 	sc->xbd_flags &= ~xbd_flag;
 	sc->xbd_qfrozen_cnt--;
 }
 
 static void
 xbd_cm_freeze(struct xbd_softc *sc, struct xbd_command *cm, xbdc_flag_t cm_flag)
 {
 	if ((cm->cm_flags & XBDCF_FROZEN) != 0)
 		return;
 
 	cm->cm_flags |= XBDCF_FROZEN|cm_flag;
 	xbd_freeze(sc, XBDF_NONE);
 }
 
 static void
 xbd_cm_thaw(struct xbd_softc *sc, struct xbd_command *cm)
 {
 	if ((cm->cm_flags & XBDCF_FROZEN) == 0)
 		return;
 
 	cm->cm_flags &= ~XBDCF_FROZEN;
 	xbd_thaw(sc, XBDF_NONE);
 }
 
 static inline void 
 xbd_flush_requests(struct xbd_softc *sc)
 {
 	int notify;
 
 	RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&sc->xbd_ring, notify);
 
 	if (notify)
-		notify_remote_via_irq(sc->xbd_irq);
+		xen_intr_signal(sc->xen_intr_handle);
 }
 
 static void
 xbd_free_command(struct xbd_command *cm)
 {
 
 	KASSERT((cm->cm_flags & XBDCF_Q_MASK) == XBD_Q_NONE,
 	    ("Freeing command that is still on queue %d.",
 	    cm->cm_flags & XBDCF_Q_MASK));
 
 	cm->cm_flags = XBDCF_INITIALIZER;
 	cm->cm_bp = NULL;
 	cm->cm_complete = NULL;
 	xbd_enqueue_cm(cm, XBD_Q_FREE);
 	xbd_thaw(cm->cm_sc, XBDF_CM_SHORTAGE);
 }
 
 static void
 xbd_queue_cb(void *arg, bus_dma_segment_t *segs, int nsegs, int error)
 {
 	struct xbd_softc *sc;
 	struct xbd_command *cm;
 	blkif_request_t	*ring_req;
 	struct blkif_request_segment *sg;
 	struct blkif_request_segment *last_block_sg;
 	grant_ref_t *sg_ref;
 	vm_paddr_t buffer_ma;
 	uint64_t fsect, lsect;
 	int ref;
 	int op;
 	int block_segs;
 
 	cm = arg;
 	sc = cm->cm_sc;
 
 	if (error) {
 		printf("error %d in xbd_queue_cb\n", error);
 		cm->cm_bp->bio_error = EIO;
 		biodone(cm->cm_bp);
 		xbd_free_command(cm);
 		return;
 	}
 
 	/* Fill out a communications ring structure. */
 	ring_req = RING_GET_REQUEST(&sc->xbd_ring, sc->xbd_ring.req_prod_pvt);
 	sc->xbd_ring.req_prod_pvt++;
 	ring_req->id = cm->cm_id;
 	ring_req->operation = cm->cm_operation;
 	ring_req->sector_number = cm->cm_sector_number;
 	ring_req->handle = (blkif_vdev_t)(uintptr_t)sc->xbd_disk;
 	ring_req->nr_segments = nsegs;
 	cm->cm_nseg = nsegs;
 
 	block_segs    = MIN(nsegs, BLKIF_MAX_SEGMENTS_PER_HEADER_BLOCK);
 	sg            = ring_req->seg;
 	last_block_sg = sg + block_segs;
 	sg_ref        = cm->cm_sg_refs;
 
 	while (1) {
 
 		while (sg < last_block_sg) {
 			buffer_ma = segs->ds_addr;
 			fsect = (buffer_ma & PAGE_MASK) >> XBD_SECTOR_SHFT;
 			lsect = fsect + (segs->ds_len  >> XBD_SECTOR_SHFT) - 1;
 
 			KASSERT(lsect <= 7, ("XEN disk driver data cannot "
 			    "cross a page boundary"));
 
 			/* install a grant reference. */
 			ref = gnttab_claim_grant_reference(&cm->cm_gref_head);
 
 			/*
 			 * GNTTAB_LIST_END == 0xffffffff, but it is private
 			 * to gnttab.c.
 			 */
 			KASSERT(ref != ~0, ("grant_reference failed"));
 
 			gnttab_grant_foreign_access_ref(
 			    ref,
 			    xenbus_get_otherend_id(sc->xbd_dev),
 			    buffer_ma >> PAGE_SHIFT,
 			    ring_req->operation == BLKIF_OP_WRITE);
 
 			*sg_ref = ref;
 			*sg = (struct blkif_request_segment) {
 				.gref       = ref,
 				.first_sect = fsect, 
 				.last_sect  = lsect
 			};
 			sg++;
 			sg_ref++;
 			segs++;
 			nsegs--;
 		}
 		block_segs = MIN(nsegs, BLKIF_MAX_SEGMENTS_PER_SEGMENT_BLOCK);
 		if (block_segs == 0)
 			break;
 
 		sg = BLKRING_GET_SEG_BLOCK(&sc->xbd_ring,
 		    sc->xbd_ring.req_prod_pvt);
 		sc->xbd_ring.req_prod_pvt++;
 		last_block_sg = sg + block_segs;
 	}
 
 	if (cm->cm_operation == BLKIF_OP_READ)
 		op = BUS_DMASYNC_PREREAD;
 	else if (cm->cm_operation == BLKIF_OP_WRITE)
 		op = BUS_DMASYNC_PREWRITE;
 	else
 		op = 0;
 	bus_dmamap_sync(sc->xbd_io_dmat, cm->cm_map, op);
 
 	gnttab_free_grant_references(cm->cm_gref_head);
 
 	xbd_enqueue_cm(cm, XBD_Q_BUSY);
 
 	/*
 	 * If bus dma had to asynchronously call us back to dispatch
 	 * this command, we are no longer executing in the context of 
 	 * xbd_startio().  Thus we cannot rely on xbd_startio()'s call to
 	 * xbd_flush_requests() to publish this command to the backend
 	 * along with any other commands that it could batch.
 	 */
 	if ((cm->cm_flags & XBDCF_ASYNC_MAPPING) != 0)
 		xbd_flush_requests(sc);
 
 	return;
 }
 
 static int
 xbd_queue_request(struct xbd_softc *sc, struct xbd_command *cm)
 {
 	int error;
 
 	error = bus_dmamap_load(sc->xbd_io_dmat, cm->cm_map, cm->cm_data,
 	    cm->cm_datalen, xbd_queue_cb, cm, 0);
 	if (error == EINPROGRESS) {
 		/*
 		 * Maintain queuing order by freezing the queue.  The next
 		 * command may not require as many resources as the command
 		 * we just attempted to map, so we can't rely on bus dma
 		 * blocking for it too.
 		 */
 		xbd_cm_freeze(sc, cm, XBDCF_ASYNC_MAPPING);
 		return (0);
 	}
 
 	return (error);
 }
 
 static void
 xbd_restart_queue_callback(void *arg)
 {
 	struct xbd_softc *sc = arg;
 
 	mtx_lock(&sc->xbd_io_lock);
 
 	xbd_thaw(sc, XBDF_GNT_SHORTAGE);
 
 	xbd_startio(sc);
 
 	mtx_unlock(&sc->xbd_io_lock);
 }
 
 static struct xbd_command *
 xbd_bio_command(struct xbd_softc *sc)
 {
 	struct xbd_command *cm;
 	struct bio *bp;
 
-	if (unlikely(sc->xbd_state != XBD_STATE_CONNECTED))
+	if (__predict_false(sc->xbd_state != XBD_STATE_CONNECTED))
 		return (NULL);
 
 	bp = xbd_dequeue_bio(sc);
 	if (bp == NULL)
 		return (NULL);
 
 	if ((cm = xbd_dequeue_cm(sc, XBD_Q_FREE)) == NULL) {
 		xbd_freeze(sc, XBDF_CM_SHORTAGE);
 		xbd_requeue_bio(sc, bp);
 		return (NULL);
 	}
 
 	if (gnttab_alloc_grant_references(sc->xbd_max_request_segments,
 	    &cm->cm_gref_head) != 0) {
 		gnttab_request_free_callback(&sc->xbd_callback,
 		    xbd_restart_queue_callback, sc,
 		    sc->xbd_max_request_segments);
 		xbd_freeze(sc, XBDF_GNT_SHORTAGE);
 		xbd_requeue_bio(sc, bp);
 		xbd_enqueue_cm(cm, XBD_Q_FREE);
 		return (NULL);
 	}
 
 	cm->cm_bp = bp;
 	cm->cm_data = bp->bio_data;
 	cm->cm_datalen = bp->bio_bcount;
 	cm->cm_sector_number = (blkif_sector_t)bp->bio_pblkno;
 
 	switch (bp->bio_cmd) {
 	case BIO_READ:
 		cm->cm_operation = BLKIF_OP_READ;
 		break;
 	case BIO_WRITE:
 		cm->cm_operation = BLKIF_OP_WRITE;
 		if ((bp->bio_flags & BIO_ORDERED) != 0) {
 			if ((sc->xbd_flags & XBDF_BARRIER) != 0) {
 				cm->cm_operation = BLKIF_OP_WRITE_BARRIER;
 			} else {
 				/*
 				 * Single step this command.
 				 */
 				cm->cm_flags |= XBDCF_Q_FREEZE;
 				if (xbd_queue_length(sc, XBD_Q_BUSY) != 0) {
 					/*
 					 * Wait for in-flight requests to
 					 * finish.
 					 */
 					xbd_freeze(sc, XBDF_WAIT_IDLE);
 					xbd_requeue_cm(cm, XBD_Q_READY);
 					return (NULL);
 				}
 			}
 		}
 		break;
 	case BIO_FLUSH:
 		if ((sc->xbd_flags & XBDF_FLUSH) != 0)
 			cm->cm_operation = BLKIF_OP_FLUSH_DISKCACHE;
 		else if ((sc->xbd_flags & XBDF_BARRIER) != 0)
 			cm->cm_operation = BLKIF_OP_WRITE_BARRIER;
 		else
 			panic("flush request, but no flush support available");
 		break;
 	default:
 		panic("unknown bio command %d", bp->bio_cmd);
 	}
 
 	return (cm);
 }
 
 /*
  * Dequeue buffers and place them in the shared communication ring.
  * Return when no more requests can be accepted or all buffers have 
  * been queued.
  *
  * Signal XEN once the ring has been filled out.
  */
 static void
 xbd_startio(struct xbd_softc *sc)
 {
 	struct xbd_command *cm;
 	int error, queued = 0;
 
 	mtx_assert(&sc->xbd_io_lock, MA_OWNED);
 
 	if (sc->xbd_state != XBD_STATE_CONNECTED)
 		return;
 
 	while (RING_FREE_REQUESTS(&sc->xbd_ring) >=
 	    sc->xbd_max_request_blocks) {
 		if (sc->xbd_qfrozen_cnt != 0)
 			break;
 
 		cm = xbd_dequeue_cm(sc, XBD_Q_READY);
 
 		if (cm == NULL)
 		    cm = xbd_bio_command(sc);
 
 		if (cm == NULL)
 			break;
 
 		if ((cm->cm_flags & XBDCF_Q_FREEZE) != 0) {
 			/*
 			 * Single step command.  Future work is
 			 * held off until this command completes.
 			 */
 			xbd_cm_freeze(sc, cm, XBDCF_Q_FREEZE);
 		}
 
 		if ((error = xbd_queue_request(sc, cm)) != 0) {
 			printf("xbd_queue_request returned %d\n", error);
 			break;
 		}
 		queued++;
 	}
 
 	if (queued != 0) 
 		xbd_flush_requests(sc);
 }
 
 static void
 xbd_bio_complete(struct xbd_softc *sc, struct xbd_command *cm)
 {
 	struct bio *bp;
 
 	bp = cm->cm_bp;
 
-	if (unlikely(cm->cm_status != BLKIF_RSP_OKAY)) {
+	if (__predict_false(cm->cm_status != BLKIF_RSP_OKAY)) {
 		disk_err(bp, "disk error" , -1, 0);
 		printf(" status: %x\n", cm->cm_status);
 		bp->bio_flags |= BIO_ERROR;
 	}
 
 	if (bp->bio_flags & BIO_ERROR)
 		bp->bio_error = EIO;
 	else
 		bp->bio_resid = 0;
 
 	xbd_free_command(cm);
 	biodone(bp);
 }
 
 static int
 xbd_completion(struct xbd_command *cm)
 {
 	gnttab_end_foreign_access_references(cm->cm_nseg, cm->cm_sg_refs);
 	return (BLKIF_SEGS_TO_BLOCKS(cm->cm_nseg));
 }
 
 static void
 xbd_int(void *xsc)
 {
 	struct xbd_softc *sc = xsc;
 	struct xbd_command *cm;
 	blkif_response_t *bret;
 	RING_IDX i, rp;
 	int op;
 
 	mtx_lock(&sc->xbd_io_lock);
 
-	if (unlikely(sc->xbd_state == XBD_STATE_DISCONNECTED)) {
+	if (__predict_false(sc->xbd_state == XBD_STATE_DISCONNECTED)) {
 		mtx_unlock(&sc->xbd_io_lock);
 		return;
 	}
 
  again:
 	rp = sc->xbd_ring.sring->rsp_prod;
 	rmb(); /* Ensure we see queued responses up to 'rp'. */
 
 	for (i = sc->xbd_ring.rsp_cons; i != rp;) {
 		bret = RING_GET_RESPONSE(&sc->xbd_ring, i);
 		cm   = &sc->xbd_shadow[bret->id];
 
 		xbd_remove_cm(cm, XBD_Q_BUSY);
 		i += xbd_completion(cm);
 
 		if (cm->cm_operation == BLKIF_OP_READ)
 			op = BUS_DMASYNC_POSTREAD;
 		else if (cm->cm_operation == BLKIF_OP_WRITE ||
 		    cm->cm_operation == BLKIF_OP_WRITE_BARRIER)
 			op = BUS_DMASYNC_POSTWRITE;
 		else
 			op = 0;
 		bus_dmamap_sync(sc->xbd_io_dmat, cm->cm_map, op);
 		bus_dmamap_unload(sc->xbd_io_dmat, cm->cm_map);
 
 		/*
 		 * Release any hold this command has on future command
 		 * dispatch. 
 		 */
 		xbd_cm_thaw(sc, cm);
 
 		/*
 		 * Directly call the i/o complete routine to save an
 		 * an indirection in the common case.
 		 */
 		cm->cm_status = bret->status;
 		if (cm->cm_bp)
 			xbd_bio_complete(sc, cm);
 		else if (cm->cm_complete != NULL)
 			cm->cm_complete(cm);
 		else
 			xbd_free_command(cm);
 	}
 
 	sc->xbd_ring.rsp_cons = i;
 
 	if (i != sc->xbd_ring.req_prod_pvt) {
 		int more_to_do;
 		RING_FINAL_CHECK_FOR_RESPONSES(&sc->xbd_ring, more_to_do);
 		if (more_to_do)
 			goto again;
 	} else {
 		sc->xbd_ring.sring->rsp_event = i + 1;
 	}
 
 	if (xbd_queue_length(sc, XBD_Q_BUSY) == 0)
 		xbd_thaw(sc, XBDF_WAIT_IDLE);
 
 	xbd_startio(sc);
 
-	if (unlikely(sc->xbd_state == XBD_STATE_SUSPENDED))
+	if (__predict_false(sc->xbd_state == XBD_STATE_SUSPENDED))
 		wakeup(&sc->xbd_cm_q[XBD_Q_BUSY]);
 
 	mtx_unlock(&sc->xbd_io_lock);
 }
 
 /*------------------------------- Dump Support -------------------------------*/
 /**
  * Quiesce the disk writes for a dump file before allowing the next buffer.
  */
 static void
 xbd_quiesce(struct xbd_softc *sc)
 {
 	int mtd;
 
 	// While there are outstanding requests
 	while (xbd_queue_length(sc, XBD_Q_BUSY) != 0) {
 		RING_FINAL_CHECK_FOR_RESPONSES(&sc->xbd_ring, mtd);
 		if (mtd) {
 			/* Recieved request completions, update queue. */
 			xbd_int(sc);
 		}
 		if (xbd_queue_length(sc, XBD_Q_BUSY) != 0) {
 			/*
 			 * Still pending requests, wait for the disk i/o
 			 * to complete.
 			 */
 			HYPERVISOR_yield();
 		}
 	}
 }
 
 /* Kernel dump function for a paravirtualized disk device */
 static void
 xbd_dump_complete(struct xbd_command *cm)
 {
 
 	xbd_enqueue_cm(cm, XBD_Q_COMPLETE);
 }
 
 static int
 xbd_dump(void *arg, void *virtual, vm_offset_t physical, off_t offset,
     size_t length)
 {
 	struct disk *dp = arg;
 	struct xbd_softc *sc = dp->d_drv1;
 	struct xbd_command *cm;
 	size_t chunk;
 	int sbp;
 	int rc = 0;
 
 	if (length <= 0)
 		return (rc);
 
 	xbd_quiesce(sc);	/* All quiet on the western front. */
 
 	/*
 	 * If this lock is held, then this module is failing, and a
 	 * successful kernel dump is highly unlikely anyway.
 	 */
 	mtx_lock(&sc->xbd_io_lock);
 
 	/* Split the 64KB block as needed */
 	for (sbp=0; length > 0; sbp++) {
 		cm = xbd_dequeue_cm(sc, XBD_Q_FREE);
 		if (cm == NULL) {
 			mtx_unlock(&sc->xbd_io_lock);
 			device_printf(sc->xbd_dev, "dump: no more commands?\n");
 			return (EBUSY);
 		}
 
 		if (gnttab_alloc_grant_references(sc->xbd_max_request_segments,
 		    &cm->cm_gref_head) != 0) {
 			xbd_free_command(cm);
 			mtx_unlock(&sc->xbd_io_lock);
 			device_printf(sc->xbd_dev, "no more grant allocs?\n");
 			return (EBUSY);
 		}
 
 		chunk = length > sc->xbd_max_request_size ?
 		    sc->xbd_max_request_size : length;
 		cm->cm_data = virtual;
 		cm->cm_datalen = chunk;
 		cm->cm_operation = BLKIF_OP_WRITE;
 		cm->cm_sector_number = offset / dp->d_sectorsize;
 		cm->cm_complete = xbd_dump_complete;
 
 		xbd_enqueue_cm(cm, XBD_Q_READY);
 
 		length -= chunk;
 		offset += chunk;
 		virtual = (char *) virtual + chunk;
 	}
 
 	/* Tell DOM0 to do the I/O */
 	xbd_startio(sc);
 	mtx_unlock(&sc->xbd_io_lock);
 
 	/* Poll for the completion. */
 	xbd_quiesce(sc);	/* All quite on the eastern front */
 
 	/* If there were any errors, bail out... */
 	while ((cm = xbd_dequeue_cm(sc, XBD_Q_COMPLETE)) != NULL) {
 		if (cm->cm_status != BLKIF_RSP_OKAY) {
 			device_printf(sc->xbd_dev,
 			    "Dump I/O failed at sector %jd\n",
 			    cm->cm_sector_number);
 			rc = EIO;
 		}
 		xbd_free_command(cm);
 	}
 
 	return (rc);
 }
 
 /*----------------------------- Disk Entrypoints -----------------------------*/
 static int
 xbd_open(struct disk *dp)
 {
 	struct xbd_softc *sc = dp->d_drv1;
 
 	if (sc == NULL) {
 		printf("xb%d: not found", sc->xbd_unit);
 		return (ENXIO);
 	}
 
 	sc->xbd_flags |= XBDF_OPEN;
 	sc->xbd_users++;
 	return (0);
 }
 
 static int
 xbd_close(struct disk *dp)
 {
 	struct xbd_softc *sc = dp->d_drv1;
 
 	if (sc == NULL)
 		return (ENXIO);
 	sc->xbd_flags &= ~XBDF_OPEN;
 	if (--(sc->xbd_users) == 0) {
 		/*
 		 * Check whether we have been instructed to close.  We will
 		 * have ignored this request initially, as the device was
 		 * still mounted.
 		 */
 		if (xenbus_get_otherend_state(sc->xbd_dev) ==
 		    XenbusStateClosing)
 			xbd_closing(sc->xbd_dev);
 	}
 	return (0);
 }
 
 static int
 xbd_ioctl(struct disk *dp, u_long cmd, void *addr, int flag, struct thread *td)
 {
 	struct xbd_softc *sc = dp->d_drv1;
 
 	if (sc == NULL)
 		return (ENXIO);
 
 	return (ENOTTY);
 }
 
 /*
  * Read/write routine for a buffer.  Finds the proper unit, place it on
  * the sortq and kick the controller.
  */
 static void
 xbd_strategy(struct bio *bp)
 {
 	struct xbd_softc *sc = bp->bio_disk->d_drv1;
 
 	/* bogus disk? */
 	if (sc == NULL) {
 		bp->bio_error = EINVAL;
 		bp->bio_flags |= BIO_ERROR;
 		bp->bio_resid = bp->bio_bcount;
 		biodone(bp);
 		return;
 	}
 
 	/*
 	 * Place it in the queue of disk activities for this disk
 	 */
 	mtx_lock(&sc->xbd_io_lock);
 
 	xbd_enqueue_bio(sc, bp);
 	xbd_startio(sc);
 
 	mtx_unlock(&sc->xbd_io_lock);
 	return;
 }
 
 /*------------------------------ Ring Management -----------------------------*/
 static int 
 xbd_alloc_ring(struct xbd_softc *sc)
 {
 	blkif_sring_t *sring;
 	uintptr_t sring_page_addr;
 	int error;
 	int i;
 
 	sring = malloc(sc->xbd_ring_pages * PAGE_SIZE, M_XENBLOCKFRONT,
 	    M_NOWAIT|M_ZERO);
 	if (sring == NULL) {
 		xenbus_dev_fatal(sc->xbd_dev, ENOMEM, "allocating shared ring");
 		return (ENOMEM);
 	}
 	SHARED_RING_INIT(sring);
 	FRONT_RING_INIT(&sc->xbd_ring, sring, sc->xbd_ring_pages * PAGE_SIZE);
 
 	for (i = 0, sring_page_addr = (uintptr_t)sring;
 	     i < sc->xbd_ring_pages;
 	     i++, sring_page_addr += PAGE_SIZE) {
 
 		error = xenbus_grant_ring(sc->xbd_dev,
 		    (vtomach(sring_page_addr) >> PAGE_SHIFT),
 		    &sc->xbd_ring_ref[i]);
 		if (error) {
 			xenbus_dev_fatal(sc->xbd_dev, error,
 			    "granting ring_ref(%d)", i);
 			return (error);
 		}
 	}
 	if (sc->xbd_ring_pages == 1) {
 		error = xs_printf(XST_NIL, xenbus_get_node(sc->xbd_dev),
 		    "ring-ref", "%u", sc->xbd_ring_ref[0]);
 		if (error) {
 			xenbus_dev_fatal(sc->xbd_dev, error,
 			    "writing %s/ring-ref",
 			    xenbus_get_node(sc->xbd_dev));
 			return (error);
 		}
 	} else {
 		for (i = 0; i < sc->xbd_ring_pages; i++) {
 			char ring_ref_name[]= "ring_refXX";
 
 			snprintf(ring_ref_name, sizeof(ring_ref_name),
 			    "ring-ref%u", i);
 			error = xs_printf(XST_NIL, xenbus_get_node(sc->xbd_dev),
 			     ring_ref_name, "%u", sc->xbd_ring_ref[i]);
 			if (error) {
 				xenbus_dev_fatal(sc->xbd_dev, error,
 				    "writing %s/%s",
 				    xenbus_get_node(sc->xbd_dev),
 				    ring_ref_name);
 				return (error);
 			}
 		}
 	}
 
-	error = bind_listening_port_to_irqhandler(
-	    xenbus_get_otherend_id(sc->xbd_dev),
-	    "xbd", (driver_intr_t *)xbd_int, sc,
-	    INTR_TYPE_BIO | INTR_MPSAFE, &sc->xbd_irq);
+	error = xen_intr_alloc_and_bind_local_port(sc->xbd_dev,
+	    xenbus_get_otherend_id(sc->xbd_dev), NULL, xbd_int, sc,
+	    INTR_TYPE_BIO | INTR_MPSAFE, &sc->xen_intr_handle);
 	if (error) {
 		xenbus_dev_fatal(sc->xbd_dev, error,
-		    "bind_evtchn_to_irqhandler failed");
+		    "xen_intr_alloc_and_bind_local_port failed");
 		return (error);
 	}
 
 	return (0);
 }
 
 static void
 xbd_free_ring(struct xbd_softc *sc)
 {
 	int i;
 
 	if (sc->xbd_ring.sring == NULL)
 		return;
 
 	for (i = 0; i < sc->xbd_ring_pages; i++) {
 		if (sc->xbd_ring_ref[i] != GRANT_REF_INVALID) {
 			gnttab_end_foreign_access_ref(sc->xbd_ring_ref[i]);
 			sc->xbd_ring_ref[i] = GRANT_REF_INVALID;
 		}
 	}
 	free(sc->xbd_ring.sring, M_XENBLOCKFRONT);
 	sc->xbd_ring.sring = NULL;
 }
 
 /*-------------------------- Initialization/Teardown -------------------------*/
 static int
 xbd_feature_string(struct xbd_softc *sc, char *features, size_t len)
 {
 	struct sbuf sb;
 	int feature_cnt;
 
 	sbuf_new(&sb, features, len, SBUF_FIXEDLEN);
 
 	feature_cnt = 0;
 	if ((sc->xbd_flags & XBDF_FLUSH) != 0) {
 		sbuf_printf(&sb, "flush");
 		feature_cnt++;
 	}
 
 	if ((sc->xbd_flags & XBDF_BARRIER) != 0) {
 		if (feature_cnt != 0)
 			sbuf_printf(&sb, ", ");
 		sbuf_printf(&sb, "write_barrier");
 		feature_cnt++;
 	}
 
 	(void) sbuf_finish(&sb);
 	return (sbuf_len(&sb));
 }
 
 static int
 xbd_sysctl_features(SYSCTL_HANDLER_ARGS)
 {
 	char features[80];
 	struct xbd_softc *sc = arg1;
 	int error;
 	int len;
 
 	error = sysctl_wire_old_buffer(req, 0);
 	if (error != 0)
 		return (error);
 
 	len = xbd_feature_string(sc, features, sizeof(features));
 
 	/* len is -1 on error, which will make the SYSCTL_OUT a no-op. */
 	return (SYSCTL_OUT(req, features, len + 1/*NUL*/));
 }
 
 static void
 xbd_setup_sysctl(struct xbd_softc *xbd)
 {
 	struct sysctl_ctx_list *sysctl_ctx = NULL;
 	struct sysctl_oid *sysctl_tree = NULL;
 	struct sysctl_oid_list *children;
 	
 	sysctl_ctx = device_get_sysctl_ctx(xbd->xbd_dev);
 	if (sysctl_ctx == NULL)
 		return;
 
 	sysctl_tree = device_get_sysctl_tree(xbd->xbd_dev);
 	if (sysctl_tree == NULL)
 		return;
 
 	children = SYSCTL_CHILDREN(sysctl_tree);
 	SYSCTL_ADD_UINT(sysctl_ctx, children, OID_AUTO,
 	    "max_requests", CTLFLAG_RD, &xbd->xbd_max_requests, -1,
 	    "maximum outstanding requests (negotiated)");
 
 	SYSCTL_ADD_UINT(sysctl_ctx, children, OID_AUTO,
 	    "max_request_segments", CTLFLAG_RD,
 	    &xbd->xbd_max_request_segments, 0,
 	    "maximum number of pages per requests (negotiated)");
 
 	SYSCTL_ADD_UINT(sysctl_ctx, children, OID_AUTO,
 	    "max_request_size", CTLFLAG_RD, &xbd->xbd_max_request_size, 0,
 	    "maximum size in bytes of a request (negotiated)");
 
 	SYSCTL_ADD_UINT(sysctl_ctx, children, OID_AUTO,
 	    "ring_pages", CTLFLAG_RD, &xbd->xbd_ring_pages, 0,
 	    "communication channel pages (negotiated)");
 
 	SYSCTL_ADD_PROC(sysctl_ctx, children, OID_AUTO,
 	    "features", CTLTYPE_STRING|CTLFLAG_RD, xbd, 0,
 	    xbd_sysctl_features, "A", "protocol features (negotiated)");
 }
 
 /*
  * Translate Linux major/minor to an appropriate name and unit
  * number. For HVM guests, this allows us to use the same drive names
  * with blkfront as the emulated drives, easing transition slightly.
  */
 static void
 xbd_vdevice_to_unit(uint32_t vdevice, int *unit, const char **name)
 {
 	static struct vdev_info {
 		int major;
 		int shift;
 		int base;
 		const char *name;
 	} info[] = {
 		{3,	6,	0,	"ada"},	/* ide0 */
 		{22,	6,	2,	"ada"},	/* ide1 */
 		{33,	6,	4,	"ada"},	/* ide2 */
 		{34,	6,	6,	"ada"},	/* ide3 */
 		{56,	6,	8,	"ada"},	/* ide4 */
 		{57,	6,	10,	"ada"},	/* ide5 */
 		{88,	6,	12,	"ada"},	/* ide6 */
 		{89,	6,	14,	"ada"},	/* ide7 */
 		{90,	6,	16,	"ada"},	/* ide8 */
 		{91,	6,	18,	"ada"},	/* ide9 */
 
 		{8,	4,	0,	"da"},	/* scsi disk0 */
 		{65,	4,	16,	"da"},	/* scsi disk1 */
 		{66,	4,	32,	"da"},	/* scsi disk2 */
 		{67,	4,	48,	"da"},	/* scsi disk3 */
 		{68,	4,	64,	"da"},	/* scsi disk4 */
 		{69,	4,	80,	"da"},	/* scsi disk5 */
 		{70,	4,	96,	"da"},	/* scsi disk6 */
 		{71,	4,	112,	"da"},	/* scsi disk7 */
 		{128,	4,	128,	"da"},	/* scsi disk8 */
 		{129,	4,	144,	"da"},	/* scsi disk9 */
 		{130,	4,	160,	"da"},	/* scsi disk10 */
 		{131,	4,	176,	"da"},	/* scsi disk11 */
 		{132,	4,	192,	"da"},	/* scsi disk12 */
 		{133,	4,	208,	"da"},	/* scsi disk13 */
 		{134,	4,	224,	"da"},	/* scsi disk14 */
 		{135,	4,	240,	"da"},	/* scsi disk15 */
 
 		{202,	4,	0,	"xbd"},	/* xbd */
 
 		{0,	0,	0,	NULL},
 	};
 	int major = vdevice >> 8;
 	int minor = vdevice & 0xff;
 	int i;
 
 	if (vdevice & (1 << 28)) {
 		*unit = (vdevice & ((1 << 28) - 1)) >> 8;
 		*name = "xbd";
 		return;
 	}
 
 	for (i = 0; info[i].major; i++) {
 		if (info[i].major == major) {
 			*unit = info[i].base + (minor >> info[i].shift);
 			*name = info[i].name;
 			return;
 		}
 	}
 
 	*unit = minor >> 4;
 	*name = "xbd";
 }
 
 int
 xbd_instance_create(struct xbd_softc *sc, blkif_sector_t sectors,
     int vdevice, uint16_t vdisk_info, unsigned long sector_size)
 {
 	char features[80];
 	int unit, error = 0;
 	const char *name;
 
 	xbd_vdevice_to_unit(vdevice, &unit, &name);
 
 	sc->xbd_unit = unit;
 
 	if (strcmp(name, "xbd") != 0)
 		device_printf(sc->xbd_dev, "attaching as %s%d\n", name, unit);
 
 	if (xbd_feature_string(sc, features, sizeof(features)) > 0) {
 		device_printf(sc->xbd_dev, "features: %s\n",
 		    features);
 	}
 
 	sc->xbd_disk = disk_alloc();
 	sc->xbd_disk->d_unit = sc->xbd_unit;
 	sc->xbd_disk->d_open = xbd_open;
 	sc->xbd_disk->d_close = xbd_close;
 	sc->xbd_disk->d_ioctl = xbd_ioctl;
 	sc->xbd_disk->d_strategy = xbd_strategy;
 	sc->xbd_disk->d_dump = xbd_dump;
 	sc->xbd_disk->d_name = name;
 	sc->xbd_disk->d_drv1 = sc;
 	sc->xbd_disk->d_sectorsize = sector_size;
 
 	sc->xbd_disk->d_mediasize = sectors * sector_size;
 	sc->xbd_disk->d_maxsize = sc->xbd_max_request_size;
 	sc->xbd_disk->d_flags = 0;
 	if ((sc->xbd_flags & (XBDF_FLUSH|XBDF_BARRIER)) != 0) {
 		sc->xbd_disk->d_flags |= DISKFLAG_CANFLUSHCACHE;
 		device_printf(sc->xbd_dev,
 		    "synchronize cache commands enabled.\n");
 	}
 	disk_create(sc->xbd_disk, DISK_VERSION);
 
 	return error;
 }
 
 static void 
 xbd_free(struct xbd_softc *sc)
 {
 	int i;
 	
 	/* Prevent new requests being issued until we fix things up. */
 	mtx_lock(&sc->xbd_io_lock);
 	sc->xbd_state = XBD_STATE_DISCONNECTED; 
 	mtx_unlock(&sc->xbd_io_lock);
 
 	/* Free resources associated with old device channel. */
 	xbd_free_ring(sc);
 	if (sc->xbd_shadow) {
 
 		for (i = 0; i < sc->xbd_max_requests; i++) {
 			struct xbd_command *cm;
 
 			cm = &sc->xbd_shadow[i];
 			if (cm->cm_sg_refs != NULL) {
 				free(cm->cm_sg_refs, M_XENBLOCKFRONT);
 				cm->cm_sg_refs = NULL;
 			}
 
 			bus_dmamap_destroy(sc->xbd_io_dmat, cm->cm_map);
 		}
 		free(sc->xbd_shadow, M_XENBLOCKFRONT);
 		sc->xbd_shadow = NULL;
 
 		bus_dma_tag_destroy(sc->xbd_io_dmat);
 		
 		xbd_initq_cm(sc, XBD_Q_FREE);
 		xbd_initq_cm(sc, XBD_Q_READY);
 		xbd_initq_cm(sc, XBD_Q_COMPLETE);
 	}
 		
-	if (sc->xbd_irq) {
-		unbind_from_irqhandler(sc->xbd_irq);
-		sc->xbd_irq = 0;
-	}
+	xen_intr_unbind(&sc->xen_intr_handle);
+
 }
 
 /*--------------------------- State Change Handlers --------------------------*/
 static void
 xbd_initialize(struct xbd_softc *sc)
 {
 	const char *otherend_path;
 	const char *node_path;
 	uint32_t max_ring_page_order;
 	int error;
 	int i;
 
 	if (xenbus_get_state(sc->xbd_dev) != XenbusStateInitialising) {
 		/* Initialization has already been performed. */
 		return;
 	}
 
 	/*
 	 * Protocol defaults valid even if negotiation for a
 	 * setting fails.
 	 */
 	max_ring_page_order = 0;
 	sc->xbd_ring_pages = 1;
 	sc->xbd_max_request_segments = BLKIF_MAX_SEGMENTS_PER_HEADER_BLOCK;
 	sc->xbd_max_request_size =
 	    XBD_SEGS_TO_SIZE(sc->xbd_max_request_segments);
 	sc->xbd_max_request_blocks =
 	    BLKIF_SEGS_TO_BLOCKS(sc->xbd_max_request_segments);
 
 	/*
 	 * Protocol negotiation.
 	 *
 	 * \note xs_gather() returns on the first encountered error, so
 	 *       we must use independant calls in order to guarantee
 	 *       we don't miss information in a sparsly populated back-end
 	 *       tree.
 	 *
 	 * \note xs_scanf() does not update variables for unmatched
 	 *	 fields.
 	 */
 	otherend_path = xenbus_get_otherend_path(sc->xbd_dev);
 	node_path = xenbus_get_node(sc->xbd_dev);
 
 	/* Support both backend schemes for relaying ring page limits. */
 	(void)xs_scanf(XST_NIL, otherend_path,
 	    "max-ring-page-order", NULL, "%" PRIu32,
 	    &max_ring_page_order);
 	sc->xbd_ring_pages = 1 << max_ring_page_order;
 	(void)xs_scanf(XST_NIL, otherend_path,
 	    "max-ring-pages", NULL, "%" PRIu32,
 	    &sc->xbd_ring_pages);
 	if (sc->xbd_ring_pages < 1)
 		sc->xbd_ring_pages = 1;
 
 	sc->xbd_max_requests =
 	    BLKIF_MAX_RING_REQUESTS(sc->xbd_ring_pages * PAGE_SIZE);
 	(void)xs_scanf(XST_NIL, otherend_path,
 	    "max-requests", NULL, "%" PRIu32,
 	    &sc->xbd_max_requests);
 
 	(void)xs_scanf(XST_NIL, otherend_path,
 	    "max-request-segments", NULL, "%" PRIu32,
 	    &sc->xbd_max_request_segments);
 
 	(void)xs_scanf(XST_NIL, otherend_path,
 	    "max-request-size", NULL, "%" PRIu32,
 	    &sc->xbd_max_request_size);
 
 	if (sc->xbd_ring_pages > XBD_MAX_RING_PAGES) {
 		device_printf(sc->xbd_dev,
 		    "Back-end specified ring-pages of %u "
 		    "limited to front-end limit of %zu.\n",
 		    sc->xbd_ring_pages, XBD_MAX_RING_PAGES);
 		sc->xbd_ring_pages = XBD_MAX_RING_PAGES;
 	}
 
 	if (powerof2(sc->xbd_ring_pages) == 0) {
 		uint32_t new_page_limit;
 
 		new_page_limit = 0x01 << (fls(sc->xbd_ring_pages) - 1);
 		device_printf(sc->xbd_dev,
 		    "Back-end specified ring-pages of %u "
 		    "is not a power of 2. Limited to %u.\n",
 		    sc->xbd_ring_pages, new_page_limit);
 		sc->xbd_ring_pages = new_page_limit;
 	}
 
 	if (sc->xbd_max_requests > XBD_MAX_REQUESTS) {
 		device_printf(sc->xbd_dev,
 		    "Back-end specified max_requests of %u "
 		    "limited to front-end limit of %u.\n",
 		    sc->xbd_max_requests, XBD_MAX_REQUESTS);
 		sc->xbd_max_requests = XBD_MAX_REQUESTS;
 	}
 
 	if (sc->xbd_max_request_segments > XBD_MAX_SEGMENTS_PER_REQUEST) {
 		device_printf(sc->xbd_dev,
 		    "Back-end specified max_request_segments of %u "
 		    "limited to front-end limit of %u.\n",
 		    sc->xbd_max_request_segments,
 		    XBD_MAX_SEGMENTS_PER_REQUEST);
 		sc->xbd_max_request_segments = XBD_MAX_SEGMENTS_PER_REQUEST;
 	}
 
 	if (sc->xbd_max_request_size > XBD_MAX_REQUEST_SIZE) {
 		device_printf(sc->xbd_dev,
 		    "Back-end specified max_request_size of %u "
 		    "limited to front-end limit of %u.\n",
 		    sc->xbd_max_request_size,
 		    XBD_MAX_REQUEST_SIZE);
 		sc->xbd_max_request_size = XBD_MAX_REQUEST_SIZE;
 	}
  
  	if (sc->xbd_max_request_size >
 	    XBD_SEGS_TO_SIZE(sc->xbd_max_request_segments)) {
  		device_printf(sc->xbd_dev,
 		    "Back-end specified max_request_size of %u "
 		    "limited to front-end limit of %u.  (Too few segments.)\n",
 		    sc->xbd_max_request_size,
 		    XBD_SEGS_TO_SIZE(sc->xbd_max_request_segments));
  		sc->xbd_max_request_size =
  		    XBD_SEGS_TO_SIZE(sc->xbd_max_request_segments);
  	}
 
 	sc->xbd_max_request_blocks =
 	    BLKIF_SEGS_TO_BLOCKS(sc->xbd_max_request_segments);
 
 	/* Allocate datastructures based on negotiated values. */
 	error = bus_dma_tag_create(
 	    bus_get_dma_tag(sc->xbd_dev),	/* parent */
 	    512, PAGE_SIZE,			/* algnmnt, boundary */
 	    BUS_SPACE_MAXADDR,			/* lowaddr */
 	    BUS_SPACE_MAXADDR,			/* highaddr */
 	    NULL, NULL,				/* filter, filterarg */
 	    sc->xbd_max_request_size,
 	    sc->xbd_max_request_segments,
 	    PAGE_SIZE,				/* maxsegsize */
 	    BUS_DMA_ALLOCNOW,			/* flags */
 	    busdma_lock_mutex,			/* lockfunc */
 	    &sc->xbd_io_lock,			/* lockarg */
 	    &sc->xbd_io_dmat);
 	if (error != 0) {
 		xenbus_dev_fatal(sc->xbd_dev, error,
 		    "Cannot allocate parent DMA tag\n");
 		return;
 	}
 
 	/* Per-transaction data allocation. */
 	sc->xbd_shadow = malloc(sizeof(*sc->xbd_shadow) * sc->xbd_max_requests,
 	    M_XENBLOCKFRONT, M_NOWAIT|M_ZERO);
 	if (sc->xbd_shadow == NULL) {
 		bus_dma_tag_destroy(sc->xbd_io_dmat);
 		xenbus_dev_fatal(sc->xbd_dev, error,
 		    "Cannot allocate request structures\n");
 		return;
 	}
 
 	for (i = 0; i < sc->xbd_max_requests; i++) {
 		struct xbd_command *cm;
 
 		cm = &sc->xbd_shadow[i];
 		cm->cm_sg_refs = malloc(
 		    sizeof(grant_ref_t) * sc->xbd_max_request_segments,
 		    M_XENBLOCKFRONT, M_NOWAIT);
 		if (cm->cm_sg_refs == NULL)
 			break;
 		cm->cm_id = i;
 		cm->cm_flags = XBDCF_INITIALIZER;
 		cm->cm_sc = sc;
 		if (bus_dmamap_create(sc->xbd_io_dmat, 0, &cm->cm_map) != 0)
 			break;
 		xbd_free_command(cm);
 	}
 
 	if (xbd_alloc_ring(sc) != 0)
 		return;
 
 	/* Support both backend schemes for relaying ring page limits. */
 	if (sc->xbd_ring_pages > 1) {
 		error = xs_printf(XST_NIL, node_path,
 		    "num-ring-pages","%u",
 		    sc->xbd_ring_pages);
 		if (error) {
 			xenbus_dev_fatal(sc->xbd_dev, error,
 			    "writing %s/num-ring-pages",
 			    node_path);
 			return;
 		}
 
 		error = xs_printf(XST_NIL, node_path,
 		    "ring-page-order", "%u",
 		    fls(sc->xbd_ring_pages) - 1);
 		if (error) {
 			xenbus_dev_fatal(sc->xbd_dev, error,
 			    "writing %s/ring-page-order",
 			    node_path);
 			return;
 		}
 	}
 
 	error = xs_printf(XST_NIL, node_path,
 	    "max-requests","%u",
 	    sc->xbd_max_requests);
 	if (error) {
 		xenbus_dev_fatal(sc->xbd_dev, error,
 		    "writing %s/max-requests",
 		    node_path);
 		return;
 	}
 
 	error = xs_printf(XST_NIL, node_path,
 	    "max-request-segments","%u",
 	    sc->xbd_max_request_segments);
 	if (error) {
 		xenbus_dev_fatal(sc->xbd_dev, error,
 		    "writing %s/max-request-segments",
 		    node_path);
 		return;
 	}
 
 	error = xs_printf(XST_NIL, node_path,
 	    "max-request-size","%u",
 	    sc->xbd_max_request_size);
 	if (error) {
 		xenbus_dev_fatal(sc->xbd_dev, error,
 		    "writing %s/max-request-size",
 		    node_path);
 		return;
 	}
 
 	error = xs_printf(XST_NIL, node_path, "event-channel",
-	    "%u", irq_to_evtchn_port(sc->xbd_irq));
+	    "%u", xen_intr_port(sc->xen_intr_handle));
 	if (error) {
 		xenbus_dev_fatal(sc->xbd_dev, error,
 		    "writing %s/event-channel",
 		    node_path);
 		return;
 	}
 
 	error = xs_printf(XST_NIL, node_path, "protocol",
 	    "%s", XEN_IO_PROTO_ABI_NATIVE);
 	if (error) {
 		xenbus_dev_fatal(sc->xbd_dev, error,
 		    "writing %s/protocol",
 		    node_path);
 		return;
 	}
 
 	xenbus_set_state(sc->xbd_dev, XenbusStateInitialised);
 }
 
 /* 
  * Invoked when the backend is finally 'ready' (and has published
  * the details about the physical device - #sectors, size, etc). 
  */
 static void 
 xbd_connect(struct xbd_softc *sc)
 {
 	device_t dev = sc->xbd_dev;
 	unsigned long sectors, sector_size;
 	unsigned int binfo;
 	int err, feature_barrier, feature_flush;
 
 	if (sc->xbd_state == XBD_STATE_CONNECTED || 
 	    sc->xbd_state == XBD_STATE_SUSPENDED)
 		return;
 
 	DPRINTK("blkfront.c:connect:%s.\n", xenbus_get_otherend_path(dev));
 
 	err = xs_gather(XST_NIL, xenbus_get_otherend_path(dev),
 	    "sectors", "%lu", &sectors,
 	    "info", "%u", &binfo,
 	    "sector-size", "%lu", &sector_size,
 	    NULL);
 	if (err) {
 		xenbus_dev_fatal(dev, err,
 		    "reading backend fields at %s",
 		    xenbus_get_otherend_path(dev));
 		return;
 	}
 	err = xs_gather(XST_NIL, xenbus_get_otherend_path(dev),
 	     "feature-barrier", "%lu", &feature_barrier,
 	     NULL);
 	if (err == 0 && feature_barrier != 0)
 		sc->xbd_flags |= XBDF_BARRIER;
 
 	err = xs_gather(XST_NIL, xenbus_get_otherend_path(dev),
 	     "feature-flush-cache", "%lu", &feature_flush,
 	     NULL);
 	if (err == 0 && feature_flush != 0)
 		sc->xbd_flags |= XBDF_FLUSH;
 
 	if (sc->xbd_disk == NULL) {
 		device_printf(dev, "%juMB <%s> at %s",
 		    (uintmax_t) sectors / (1048576 / sector_size),
 		    device_get_desc(dev),
 		    xenbus_get_node(dev));
 		bus_print_child_footer(device_get_parent(dev), dev);
 
 		xbd_instance_create(sc, sectors, sc->xbd_vdevice, binfo,
 		    sector_size);
 	}
 
 	(void)xenbus_set_state(dev, XenbusStateConnected); 
 
 	/* Kick pending requests. */
 	mtx_lock(&sc->xbd_io_lock);
 	sc->xbd_state = XBD_STATE_CONNECTED;
 	xbd_startio(sc);
 	sc->xbd_flags |= XBDF_READY;
 	mtx_unlock(&sc->xbd_io_lock);
 }
 
 /**
  * Handle the change of state of the backend to Closing.  We must delete our
  * device-layer structures now, to ensure that writes are flushed through to
  * the backend.  Once this is done, we can switch to Closed in
  * acknowledgement.
  */
 static void
 xbd_closing(device_t dev)
 {
 	struct xbd_softc *sc = device_get_softc(dev);
 
 	xenbus_set_state(dev, XenbusStateClosing);
 
 	DPRINTK("xbd_closing: %s removed\n", xenbus_get_node(dev));
 
 	if (sc->xbd_disk != NULL) {
 		disk_destroy(sc->xbd_disk);
 		sc->xbd_disk = NULL;
 	}
 
 	xenbus_set_state(dev, XenbusStateClosed); 
 }
 
 /*---------------------------- NewBus Entrypoints ----------------------------*/
 static int
 xbd_probe(device_t dev)
 {
 
 	if (!strcmp(xenbus_get_type(dev), "vbd")) {
 		device_set_desc(dev, "Virtual Block Device");
 		device_quiet(dev);
 		return (0);
 	}
 
 	return (ENXIO);
 }
 
 /*
  * Setup supplies the backend dir, virtual device.  We place an event
  * channel and shared frame entries.  We watch backend to wait if it's
  * ok.
  */
 static int
 xbd_attach(device_t dev)
 {
 	struct xbd_softc *sc;
 	const char *name;
 	uint32_t vdevice;
 	int error;
 	int i;
 	int unit;
 
 	/* FIXME: Use dynamic device id if this is not set. */
 	error = xs_scanf(XST_NIL, xenbus_get_node(dev),
 	    "virtual-device", NULL, "%" PRIu32, &vdevice);
 	if (error) {
 		xenbus_dev_fatal(dev, error, "reading virtual-device");
 		device_printf(dev, "Couldn't determine virtual device.\n");
 		return (error);
 	}
 
 	xbd_vdevice_to_unit(vdevice, &unit, &name);
 	if (!strcmp(name, "xbd"))
 		device_set_unit(dev, unit);
 
 	sc = device_get_softc(dev);
 	mtx_init(&sc->xbd_io_lock, "blkfront i/o lock", NULL, MTX_DEF);
 	xbd_initqs(sc);
 	for (i = 0; i < XBD_MAX_RING_PAGES; i++)
 		sc->xbd_ring_ref[i] = GRANT_REF_INVALID;
 
 	sc->xbd_dev = dev;
 	sc->xbd_vdevice = vdevice;
 	sc->xbd_state = XBD_STATE_DISCONNECTED;
 
 	xbd_setup_sysctl(sc);
 
 	/* Wait for backend device to publish its protocol capabilities. */
 	xenbus_set_state(dev, XenbusStateInitialising);
 
 	return (0);
 }
 
 static int
 xbd_detach(device_t dev)
 {
 	struct xbd_softc *sc = device_get_softc(dev);
 
 	DPRINTK("%s: %s removed\n", __func__, xenbus_get_node(dev));
 
 	xbd_free(sc);
 	mtx_destroy(&sc->xbd_io_lock);
 
 	return 0;
 }
 
 static int
 xbd_suspend(device_t dev)
 {
 	struct xbd_softc *sc = device_get_softc(dev);
 	int retval;
 	int saved_state;
 
 	/* Prevent new requests being issued until we fix things up. */
 	mtx_lock(&sc->xbd_io_lock);
 	saved_state = sc->xbd_state;
 	sc->xbd_state = XBD_STATE_SUSPENDED;
 
 	/* Wait for outstanding I/O to drain. */
 	retval = 0;
 	while (xbd_queue_length(sc, XBD_Q_BUSY) != 0) {
 		if (msleep(&sc->xbd_cm_q[XBD_Q_BUSY], &sc->xbd_io_lock,
 		    PRIBIO, "blkf_susp", 30 * hz) == EWOULDBLOCK) {
 			retval = EBUSY;
 			break;
 		}
 	}
 	mtx_unlock(&sc->xbd_io_lock);
 
 	if (retval != 0)
 		sc->xbd_state = saved_state;
 
 	return (retval);
 }
 
 static int
 xbd_resume(device_t dev)
 {
 	struct xbd_softc *sc = device_get_softc(dev);
 
 	DPRINTK("xbd_resume: %s\n", xenbus_get_node(dev));
 
 	xbd_free(sc);
 	xbd_initialize(sc);
 	return (0);
 }
 
 /**
  * Callback received when the backend's state changes.
  */
 static void
 xbd_backend_changed(device_t dev, XenbusState backend_state)
 {
 	struct xbd_softc *sc = device_get_softc(dev);
 
 	DPRINTK("backend_state=%d\n", backend_state);
 
 	switch (backend_state) {
 	case XenbusStateUnknown:
 	case XenbusStateInitialising:
 	case XenbusStateReconfigured:
 	case XenbusStateReconfiguring:
 	case XenbusStateClosed:
 		break;
 
 	case XenbusStateInitWait:
 	case XenbusStateInitialised:
 		xbd_initialize(sc);
 		break;
 
 	case XenbusStateConnected:
 		xbd_initialize(sc);
 		xbd_connect(sc);
 		break;
 
 	case XenbusStateClosing:
 		if (sc->xbd_users > 0)
 			xenbus_dev_error(dev, -EBUSY,
 			    "Device in use; refusing to close");
 		else
 			xbd_closing(dev);
 		break;	
 	}
 }
 
 /*---------------------------- NewBus Registration ---------------------------*/
 static device_method_t xbd_methods[] = { 
 	/* Device interface */ 
 	DEVMETHOD(device_probe,         xbd_probe), 
 	DEVMETHOD(device_attach,        xbd_attach), 
 	DEVMETHOD(device_detach,        xbd_detach), 
 	DEVMETHOD(device_shutdown,      bus_generic_shutdown), 
 	DEVMETHOD(device_suspend,       xbd_suspend), 
 	DEVMETHOD(device_resume,        xbd_resume), 
  
 	/* Xenbus interface */
 	DEVMETHOD(xenbus_otherend_changed, xbd_backend_changed),
 
 	{ 0, 0 } 
 }; 
 
 static driver_t xbd_driver = { 
 	"xbd", 
 	xbd_methods, 
 	sizeof(struct xbd_softc),                      
 }; 
 devclass_t xbd_devclass; 
  
 DRIVER_MODULE(xbd, xenbusb_front, xbd_driver, xbd_devclass, 0, 0); 
diff --git a/sys/dev/xen/blkfront/block.h b/sys/dev/xen/blkfront/block.h
index 0f7d6cb124d8..9c803bc2eacf 100644
--- a/sys/dev/xen/blkfront/block.h
+++ b/sys/dev/xen/blkfront/block.h
@@ -1,349 +1,349 @@
 /*
  * XenBSD block device driver
  *
  * Copyright (c) 2010-2013 Spectra Logic Corporation
  * Copyright (c) 2009 Scott Long, Yahoo!
  * Copyright (c) 2009 Frank Suchomel, Citrix
  * Copyright (c) 2009 Doug F. Rabson, Citrix
  * Copyright (c) 2005 Kip Macy
  * Copyright (c) 2003-2004, Keir Fraser & Steve Hand
  * Modifications by Mark A. Williamson are (c) Intel Research Cambridge
  *
  *
  * Permission is hereby granted, free of charge, to any person obtaining a copy
  * of this software and associated documentation files (the "Software"), to
  * deal in the Software without restriction, including without limitation the
  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
  * sell copies of the Software, and to permit persons to whom the Software is
  * furnished to do so, subject to the following conditions:
  *
  * The above copyright notice and this permission notice shall be included in
  * all copies or substantial portions of the Software.
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  * DEALINGS IN THE SOFTWARE.
  *
  * $FreeBSD$
  */
 
 #ifndef __XEN_BLKFRONT_BLOCK_H__
 #define __XEN_BLKFRONT_BLOCK_H__
 #include <xen/blkif.h>
 
 /**
  * Given a number of blkif segments, compute the maximum I/O size supported.
  *
  * \note This calculation assumes that all but the first and last segments 
  *       of the I/O are fully utilized.
  *
  * \note We reserve a segement from the maximum supported by the transport to
  *       guarantee we can handle an unaligned transfer without the need to
  *       use a bounce buffer.
  */
 #define	XBD_SEGS_TO_SIZE(segs)						\
 	(((segs) - 1) * PAGE_SIZE)
 
 /**
  * Compute the maximum number of blkif segments requried to represent
  * an I/O of the given size.
  *
  * \note This calculation assumes that all but the first and last segments
  *       of the I/O are fully utilized.
  *
  * \note We reserve a segement to guarantee we can handle an unaligned
  *       transfer without the need to use a bounce buffer.
  */
 #define	XBD_SIZE_TO_SEGS(size)						\
 	((size / PAGE_SIZE) + 1)
 
 /**
  * The maximum number of outstanding requests blocks (request headers plus
  * additional segment blocks) we will allow in a negotiated block-front/back
  * communication channel.
  */
 #define XBD_MAX_REQUESTS		256
 
 /**
  * The maximum mapped region size per request we will allow in a negotiated
  * block-front/back communication channel.
  */
 #define	XBD_MAX_REQUEST_SIZE						\
 	MIN(MAXPHYS, XBD_SEGS_TO_SIZE(BLKIF_MAX_SEGMENTS_PER_REQUEST))
 
 /**
  * The maximum number of segments (within a request header and accompanying
  * segment blocks) per request we will allow in a negotiated block-front/back
  * communication channel.
  */
 #define	XBD_MAX_SEGMENTS_PER_REQUEST					\
 	(MIN(BLKIF_MAX_SEGMENTS_PER_REQUEST,				\
 	     XBD_SIZE_TO_SEGS(XBD_MAX_REQUEST_SIZE)))
 
 /**
  * The maximum number of shared memory ring pages we will allow in a
  * negotiated block-front/back communication channel.  Allow enough
  * ring space for all requests to be  XBD_MAX_REQUEST_SIZE'd.
  */
 #define XBD_MAX_RING_PAGES						    \
 	BLKIF_RING_PAGES(BLKIF_SEGS_TO_BLOCKS(XBD_MAX_SEGMENTS_PER_REQUEST) \
 		       * XBD_MAX_REQUESTS)
 
 typedef enum {
 	XBDCF_Q_MASK		= 0xFF,
 	/* This command has contributed to xbd_qfrozen_cnt. */
 	XBDCF_FROZEN		= 1<<8,
 	/* Freeze the command queue on dispatch (i.e. single step command). */
 	XBDCF_Q_FREEZE		= 1<<9,
 	/* Bus DMA returned EINPROGRESS for this command. */
 	XBDCF_ASYNC_MAPPING	= 1<<10,
 	XBDCF_INITIALIZER	= XBDCF_Q_MASK
 } xbdc_flag_t;
 
 struct xbd_command;
 typedef void xbd_cbcf_t(struct xbd_command *);
 
 struct xbd_command {
 	TAILQ_ENTRY(xbd_command) cm_link;
 	struct xbd_softc	*cm_sc;
 	xbdc_flag_t		 cm_flags;
 	bus_dmamap_t		 cm_map;
 	uint64_t		 cm_id;
 	grant_ref_t		*cm_sg_refs;
 	struct bio		*cm_bp;
 	grant_ref_t		 cm_gref_head;
 	void			*cm_data;
 	size_t			 cm_datalen;
 	u_int			 cm_nseg;
 	int			 cm_operation;
 	blkif_sector_t		 cm_sector_number;
 	int			 cm_status;
 	xbd_cbcf_t		*cm_complete;
 };
 
 typedef enum {
 	XBD_Q_FREE,
 	XBD_Q_READY,
 	XBD_Q_BUSY,
 	XBD_Q_COMPLETE,
 	XBD_Q_BIO,
 	XBD_Q_COUNT,
 	XBD_Q_NONE = XBDCF_Q_MASK
 } xbd_q_index_t;
 
 typedef struct xbd_cm_q {
 	TAILQ_HEAD(, xbd_command) q_tailq;
 	uint32_t		  q_length;
 	uint32_t		  q_max;
 } xbd_cm_q_t;
 
 typedef enum {
 	XBD_STATE_DISCONNECTED,
 	XBD_STATE_CONNECTED,
 	XBD_STATE_SUSPENDED
 } xbd_state_t;
 
 typedef enum {
 	XBDF_NONE	  = 0,
 	XBDF_OPEN	  = 1 << 0, /* drive is open (can't shut down) */
 	XBDF_BARRIER	  = 1 << 1, /* backend supports barriers */
 	XBDF_FLUSH	  = 1 << 2, /* backend supports flush */
 	XBDF_READY	  = 1 << 3, /* Is ready */
 	XBDF_CM_SHORTAGE  = 1 << 4, /* Free cm resource shortage active. */
 	XBDF_GNT_SHORTAGE = 1 << 5, /* Grant ref resource shortage active */
 	XBDF_WAIT_IDLE	  = 1 << 6  /*
 				     * No new work until oustanding work
 				     * completes.
 				     */
 } xbd_flag_t;
 
 /*
  * We have one of these per vbd, whether ide, scsi or 'other'.
  */
 struct xbd_softc {
 	device_t			 xbd_dev;
 	struct disk			*xbd_disk;	/* disk params */
 	struct bio_queue_head 		 xbd_bioq;	/* sort queue */
 	int				 xbd_unit;
 	xbd_flag_t			 xbd_flags;
 	int				 xbd_qfrozen_cnt;
 	int				 xbd_vdevice;
 	xbd_state_t			 xbd_state;
 	u_int				 xbd_ring_pages;
 	uint32_t			 xbd_max_requests;
 	uint32_t			 xbd_max_request_segments;
 	uint32_t			 xbd_max_request_blocks;
 	uint32_t			 xbd_max_request_size;
 	grant_ref_t			 xbd_ring_ref[XBD_MAX_RING_PAGES];
 	blkif_front_ring_t		 xbd_ring;
-	unsigned int			 xbd_irq;
+	xen_intr_handle_t		 xen_intr_handle;
 	struct gnttab_free_callback	 xbd_callback;
 	xbd_cm_q_t			 xbd_cm_q[XBD_Q_COUNT];
 	bus_dma_tag_t			 xbd_io_dmat;
 
 	/**
 	 * The number of people holding this device open.  We won't allow a
 	 * hot-unplug unless this is 0.
 	 */
 	int				 xbd_users;
 	struct mtx			 xbd_io_lock;
 
 	struct xbd_command		*xbd_shadow;
 };
 
 int xbd_instance_create(struct xbd_softc *, blkif_sector_t sectors, int device,
 			uint16_t vdisk_info, unsigned long sector_size);
 
 static inline void
 xbd_added_qentry(struct xbd_softc *sc, xbd_q_index_t index)
 {
 	struct xbd_cm_q *cmq;
 
 	cmq = &sc->xbd_cm_q[index];
 	cmq->q_length++;
 	if (cmq->q_length > cmq->q_max)
 		cmq->q_max = cmq->q_length;
 }
 
 static inline void
 xbd_removed_qentry(struct xbd_softc *sc, xbd_q_index_t index)
 {
 	sc->xbd_cm_q[index].q_length--;
 }
 
 static inline uint32_t
 xbd_queue_length(struct xbd_softc *sc, xbd_q_index_t index)
 {
 	return (sc->xbd_cm_q[index].q_length);
 }
 
 static inline void
 xbd_initq_cm(struct xbd_softc *sc, xbd_q_index_t index)
 {
 	struct xbd_cm_q *cmq;
 
 	cmq = &sc->xbd_cm_q[index];
 	TAILQ_INIT(&cmq->q_tailq);
 	cmq->q_length = 0;
 	cmq->q_max = 0;
 }
 
 static inline void
 xbd_enqueue_cm(struct xbd_command *cm, xbd_q_index_t index)
 {
 	KASSERT(index != XBD_Q_BIO,
 	    ("%s: Commands cannot access the bio queue.", __func__));
 	if ((cm->cm_flags & XBDCF_Q_MASK) != XBD_Q_NONE)
 		panic("%s: command %p is already on queue %d.",
 		    __func__, cm, cm->cm_flags & XBDCF_Q_MASK);
 	TAILQ_INSERT_TAIL(&cm->cm_sc->xbd_cm_q[index].q_tailq, cm, cm_link);
 	cm->cm_flags &= ~XBDCF_Q_MASK;
 	cm->cm_flags |= index;
 	xbd_added_qentry(cm->cm_sc, index);
 }
 
 static inline void
 xbd_requeue_cm(struct xbd_command *cm, xbd_q_index_t index)
 {
 	KASSERT(index != XBD_Q_BIO,
 	    ("%s: Commands cannot access the bio queue.", __func__));
 	if ((cm->cm_flags & XBDCF_Q_MASK) != XBD_Q_NONE)
 		panic("%s: command %p is already on queue %d.",
 		    __func__, cm, cm->cm_flags & XBDCF_Q_MASK);
 	TAILQ_INSERT_HEAD(&cm->cm_sc->xbd_cm_q[index].q_tailq, cm, cm_link);
 	cm->cm_flags &= ~XBDCF_Q_MASK;
 	cm->cm_flags |= index;
 	xbd_added_qentry(cm->cm_sc, index);
 }
 
 static inline struct xbd_command *
 xbd_dequeue_cm(struct xbd_softc *sc, xbd_q_index_t index)
 {
 	struct xbd_command *cm;
 
 	KASSERT(index != XBD_Q_BIO,
 	    ("%s: Commands cannot access the bio queue.", __func__));
 
 	if ((cm = TAILQ_FIRST(&sc->xbd_cm_q[index].q_tailq)) != NULL) {
 		if ((cm->cm_flags & XBDCF_Q_MASK) != index) {
 			panic("%s: command %p is on queue %d, "
 			    "not specified queue %d",
 			    __func__, cm,
 			    cm->cm_flags & XBDCF_Q_MASK,
 			    index);
 		}
 		TAILQ_REMOVE(&sc->xbd_cm_q[index].q_tailq, cm, cm_link);
 		cm->cm_flags &= ~XBDCF_Q_MASK;
 		cm->cm_flags |= XBD_Q_NONE;
 		xbd_removed_qentry(cm->cm_sc, index);
 	}
 	return (cm);
 }
 
 static inline void
 xbd_remove_cm(struct xbd_command *cm, xbd_q_index_t expected_index)
 {
 	xbd_q_index_t index;
 
 	index = cm->cm_flags & XBDCF_Q_MASK;
 
 	KASSERT(index != XBD_Q_BIO,
 	    ("%s: Commands cannot access the bio queue.", __func__));
 
 	if (index != expected_index) {
 		panic("%s: command %p is on queue %d, not specified queue %d",
 		    __func__, cm, index, expected_index);
 	}
 	TAILQ_REMOVE(&cm->cm_sc->xbd_cm_q[index].q_tailq, cm, cm_link);
 	cm->cm_flags &= ~XBDCF_Q_MASK;
 	cm->cm_flags |= XBD_Q_NONE;
 	xbd_removed_qentry(cm->cm_sc, index);
 }
 
 static inline void
 xbd_initq_bio(struct xbd_softc *sc)
 {
 	bioq_init(&sc->xbd_bioq);
 }
 
 static inline void
 xbd_enqueue_bio(struct xbd_softc *sc, struct bio *bp)
 {
 	bioq_insert_tail(&sc->xbd_bioq, bp);
 	xbd_added_qentry(sc, XBD_Q_BIO);
 }
 
 static inline void
 xbd_requeue_bio(struct xbd_softc *sc, struct bio *bp)
 {
 	bioq_insert_head(&sc->xbd_bioq, bp);
 	xbd_added_qentry(sc, XBD_Q_BIO);
 }
 
 static inline struct bio *
 xbd_dequeue_bio(struct xbd_softc *sc)
 {
 	struct bio *bp;
 
 	if ((bp = bioq_first(&sc->xbd_bioq)) != NULL) {
 		bioq_remove(&sc->xbd_bioq, bp);
 		xbd_removed_qentry(sc, XBD_Q_BIO);
 	}
 	return (bp);
 }
 
 static inline void
 xbd_initqs(struct xbd_softc *sc)
 {
 	u_int index;
 
 	for (index = 0; index < XBD_Q_COUNT; index++)
 		xbd_initq_cm(sc, index);
 
 	xbd_initq_bio(sc);
 }
 
 #endif /* __XEN_BLKFRONT_BLOCK_H__ */
diff --git a/sys/dev/xen/console/console.c b/sys/dev/xen/console/console.c
index 6281bf2165e0..65a0e7dad961 100644
--- a/sys/dev/xen/console/console.c
+++ b/sys/dev/xen/console/console.c
@@ -1,444 +1,442 @@
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/module.h>
 #include <sys/systm.h>
 #include <sys/consio.h>
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/uio.h>
 #include <sys/tty.h>
 #include <sys/systm.h>
 #include <sys/taskqueue.h>
 #include <sys/conf.h>
 #include <sys/kernel.h>
 #include <sys/bus.h>
 #include <machine/stdarg.h>
-#include <machine/xen/xen-os.h>
+#include <xen/xen-os.h>
 #include <xen/hypervisor.h>
 #include <xen/xen_intr.h>
 #include <sys/cons.h>
 #include <sys/kdb.h>
 #include <sys/proc.h>
 
 #include <dev/xen/console/xencons_ring.h>
 #include <xen/interface/io/console.h>
 
 
 #include "opt_ddb.h"
 #ifdef DDB
 #include <ddb/ddb.h>
 #endif
 
 static char driver_name[] = "xc";
 devclass_t xc_devclass; /* do not make static */
 static void	xcoutwakeup(struct tty *);
 static void	xc_timeout(void *);
 static void __xencons_tx_flush(void);
 static boolean_t xcons_putc(int c);
 
 /* switch console so that shutdown can occur gracefully */
 static void xc_shutdown(void *arg, int howto);
 static int xc_mute;
 
 static void xcons_force_flush(void);
 static void xencons_priv_interrupt(void *);
 
 static cn_probe_t       xc_cnprobe;
 static cn_init_t        xc_cninit;
 static cn_term_t        xc_cnterm;
 static cn_getc_t        xc_cngetc;
 static cn_putc_t        xc_cnputc;
 static cn_grab_t        xc_cngrab;
 static cn_ungrab_t      xc_cnungrab;
 
 #define XC_POLLTIME 	(hz/10)
 
 CONSOLE_DRIVER(xc);
 
 static int xen_console_up;
 static boolean_t xc_start_needed;
 static struct callout xc_callout;
 struct mtx              cn_mtx;
 
 #define RBUF_SIZE     1024
 #define RBUF_MASK(_i) ((_i)&(RBUF_SIZE-1))
 #define WBUF_SIZE     4096
 #define WBUF_MASK(_i) ((_i)&(WBUF_SIZE-1))
 static char wbuf[WBUF_SIZE];
 static char rbuf[RBUF_SIZE];
 static int rc, rp;
 static unsigned int cnsl_evt_reg;
 static unsigned int wc, wp; /* write_cons, write_prod */
+xen_intr_handle_t xen_intr_handle;
+device_t xencons_dev;
 
 #ifdef KDB
 static int	xc_altbrk;
 #endif
 
 #define CDEV_MAJOR 12
 #define	XCUNIT(x)	(dev2unit(x))
 #define ISTTYOPEN(tp)	((tp) && ((tp)->t_state & TS_ISOPEN))
 #define CN_LOCK_INIT(x, _name) \
         mtx_init(&x, _name, NULL, MTX_SPIN|MTX_RECURSE)
 
 #define CN_LOCK(l)        								\
 		do {											\
 				if (panicstr == NULL)					\
                         mtx_lock_spin(&(l));			\
 		} while (0)
 #define CN_UNLOCK(l)        							\
 		do {											\
 				if (panicstr == NULL)					\
                         mtx_unlock_spin(&(l));			\
 		} while (0)
 #define CN_LOCK_ASSERT(x)    mtx_assert(&x, MA_OWNED)
 #define CN_LOCK_DESTROY(x)   mtx_destroy(&x)
 
 
 static struct tty *xccons;
 
 static tsw_open_t	xcopen;
 static tsw_close_t	xcclose;
 
 static struct ttydevsw xc_ttydevsw = {
         .tsw_flags	= TF_NOPREFIX,
         .tsw_open	= xcopen,
         .tsw_close	= xcclose,
         .tsw_outwakeup	= xcoutwakeup,
 };
 
 static void
 xc_cnprobe(struct consdev *cp)
 {
 	cp->cn_pri = CN_REMOTE;
 	sprintf(cp->cn_name, "%s0", driver_name);
 }
 
 
 static void
 xc_cninit(struct consdev *cp)
 { 
 	CN_LOCK_INIT(cn_mtx,"XCONS LOCK");
 
 }
 
 static void
 xc_cnterm(struct consdev *cp)
 { 
 }
 
 static void
 xc_cngrab(struct consdev *cp)
 {
 }
 
 static void
 xc_cnungrab(struct consdev *cp)
 {
 }
 
 static int
 xc_cngetc(struct consdev *dev)
 {
 	int ret;
 
 	if (xencons_has_input())
 		xencons_handle_input(NULL);
 	
 	CN_LOCK(cn_mtx);
 	if ((rp - rc) && !xc_mute) {
 		/* we need to return only one char */
 		ret = (int)rbuf[RBUF_MASK(rc)];
 		rc++;
 	} else
 		ret = -1;
 	CN_UNLOCK(cn_mtx);
 	return(ret);
 }
 
 static void
 xc_cnputc_domu(struct consdev *dev, int c)
 {
 	xcons_putc(c);
 }
 
 static void
 xc_cnputc_dom0(struct consdev *dev, int c)
 {
 	HYPERVISOR_console_io(CONSOLEIO_write, 1, (char *)&c);
 }
 
 static void
 xc_cnputc(struct consdev *dev, int c)
 {
 
 	if (xen_start_info->flags & SIF_INITDOMAIN)
 		xc_cnputc_dom0(dev, c);
 	else
 		xc_cnputc_domu(dev, c);
 }
 
 extern int db_active;
 static boolean_t
 xcons_putc(int c)
 {
 	int force_flush = xc_mute ||
 #ifdef DDB
 		db_active ||
 #endif
 		panicstr;	/* we're not gonna recover, so force
 				 * flush 
 				 */
 
 	if ((wp-wc) < (WBUF_SIZE-1)) {
 		if ((wbuf[WBUF_MASK(wp++)] = c) == '\n') {
         		wbuf[WBUF_MASK(wp++)] = '\r';
 #ifdef notyet
 			if (force_flush)
 				xcons_force_flush();
 #endif
 		}
 	} else if (force_flush) {
 #ifdef notyet
 		xcons_force_flush();
 #endif	    	
 	}
 	if (cnsl_evt_reg)
 		__xencons_tx_flush();
 	
 	/* inform start path that we're pretty full */
 	return ((wp - wc) >= WBUF_SIZE - 100) ? TRUE : FALSE;
 }
 
 static void
 xc_identify(driver_t *driver, device_t parent)
 {
 	device_t child;
 	child = BUS_ADD_CHILD(parent, 0, driver_name, 0);
 	device_set_driver(child, driver);
 	device_set_desc(child, "Xen Console");
 }
 
 static int
 xc_probe(device_t dev)
 {
 
 	return (0);
 }
 
 static int
 xc_attach(device_t dev) 
 {
 	int error;
 
+	xencons_dev = dev;
 	xccons = tty_alloc(&xc_ttydevsw, NULL);
 	tty_makedev(xccons, NULL, "xc%r", 0);
 
 	callout_init(&xc_callout, 0);
 
 	xencons_ring_init();
 
 	cnsl_evt_reg = 1;
 	callout_reset(&xc_callout, XC_POLLTIME, xc_timeout, xccons);
     
 	if (xen_start_info->flags & SIF_INITDOMAIN) {
-			error = bind_virq_to_irqhandler(
-				 VIRQ_CONSOLE,
-				 0,
-				 "console",
-				 NULL,
-				 xencons_priv_interrupt, NULL,
-				 INTR_TYPE_TTY, NULL);
-		
-				KASSERT(error >= 0, ("can't register console interrupt"));
+		error = xen_intr_bind_virq(dev, VIRQ_CONSOLE, 0, NULL,
+		                           xencons_priv_interrupt, NULL,
+		                           INTR_TYPE_TTY, &xen_intr_handle);
+		KASSERT(error >= 0, ("can't register console interrupt"));
 	}
 
 	/* register handler to flush console on shutdown */
 	if ((EVENTHANDLER_REGISTER(shutdown_post_sync, xc_shutdown,
 				   NULL, SHUTDOWN_PRI_DEFAULT)) == NULL)
 		printf("xencons: shutdown event registration failed!\n");
 	
 	return (0);
 }
 
 /*
  * return 0 for all console input, force flush all output.
  */
 static void
 xc_shutdown(void *arg, int howto)
 {
 	xc_mute = 1;
 	xcons_force_flush();
 }
 
 void 
 xencons_rx(char *buf, unsigned len)
 {
 	int           i;
 	struct tty *tp = xccons;
 
 	if (xen_console_up
 #ifdef DDB
 	    && !kdb_active
 #endif
 		) {
 		tty_lock(tp);
 		for (i = 0; i < len; i++) {
 #ifdef KDB
 			kdb_alt_break(buf[i], &xc_altbrk);
 #endif
 			ttydisc_rint(tp, buf[i], 0);
 		}
 		ttydisc_rint_done(tp);
 		tty_unlock(tp);
 	} else {
 		CN_LOCK(cn_mtx);
 		for (i = 0; i < len; i++)
 			rbuf[RBUF_MASK(rp++)] = buf[i];
 		CN_UNLOCK(cn_mtx);
 	}
 }
 
 static void 
 __xencons_tx_flush(void)
 {
 	int        sz;
 
 	CN_LOCK(cn_mtx);
 	while (wc != wp) {
 		int sent;
 		sz = wp - wc;
 		if (sz > (WBUF_SIZE - WBUF_MASK(wc)))
 			sz = WBUF_SIZE - WBUF_MASK(wc);
 		if (xen_start_info->flags & SIF_INITDOMAIN) {
 			HYPERVISOR_console_io(CONSOLEIO_write, sz, &wbuf[WBUF_MASK(wc)]);
 			wc += sz;
 		} else {
 			sent = xencons_ring_send(&wbuf[WBUF_MASK(wc)], sz);
 			if (sent == 0) 
 				break;
 			wc += sent;
 		}
 	}
 	CN_UNLOCK(cn_mtx);
 }
 
 void
 xencons_tx(void)
 {
 	__xencons_tx_flush();
 }
 
 static void
 xencons_priv_interrupt(void *arg)
 {
 
 	static char rbuf[16];
 	int         l;
 
 	while ((l = HYPERVISOR_console_io(CONSOLEIO_read, 16, rbuf)) > 0)
 		xencons_rx(rbuf, l);
 
 	xencons_tx();
 }
 
 static int
 xcopen(struct tty *tp)
 {
 
 	xen_console_up = 1;
 	return (0);
 }
 
 static void
 xcclose(struct tty *tp)
 {
 
 	xen_console_up = 0;
 }
 
 static inline int 
 __xencons_put_char(int ch)
 {
 	char _ch = (char)ch;
 	if ((wp - wc) == WBUF_SIZE)
 		return 0;
 	wbuf[WBUF_MASK(wp++)] = _ch;
 	return 1;
 }
 
 
 static void
 xcoutwakeup(struct tty *tp)
 {
 	boolean_t cons_full = FALSE;
 	char c;
 
 	while (ttydisc_getc(tp, &c, 1) == 1 && !cons_full)
 		cons_full = xcons_putc(c);
 
 	if (cons_full) {
 	    	/* let the timeout kick us in a bit */
 	    	xc_start_needed = TRUE;
 	}
 
 }
 
 static void
 xc_timeout(void *v)
 {
 	struct	tty *tp;
 	int 	c;
 
 	tp = (struct tty *)v;
 
 	tty_lock(tp);
 	while ((c = xc_cngetc(NULL)) != -1)
 		ttydisc_rint(tp, c, 0);
 
 	if (xc_start_needed) {
 	    	xc_start_needed = FALSE;
 		xcoutwakeup(tp);
 	}
 	tty_unlock(tp);
 
 	callout_reset(&xc_callout, XC_POLLTIME, xc_timeout, tp);
 }
 
 static device_method_t xc_methods[] = {
 	DEVMETHOD(device_identify, xc_identify),
 	DEVMETHOD(device_probe, xc_probe),
 	DEVMETHOD(device_attach, xc_attach),
 
 	DEVMETHOD_END
 };
 
 static driver_t xc_driver = {
 	driver_name,
 	xc_methods,
 	0,
 };
 
 /*** Forcibly flush console data before dying. ***/
 void 
 xcons_force_flush(void)
 {
 	int        sz;
 
 	if (xen_start_info->flags & SIF_INITDOMAIN)
 		return;
 
 	/* Spin until console data is flushed through to the domain controller. */
 	while (wc != wp) {
 		int sent = 0;
 		if ((sz = wp - wc) == 0)
 			continue;
 		
 		sent = xencons_ring_send(&wbuf[WBUF_MASK(wc)], sz);
 		if (sent > 0)
 			wc += sent;		
 	}
 }
 
 DRIVER_MODULE(xc, nexus, xc_driver, xc_devclass, 0, 0);
diff --git a/sys/dev/xen/console/xencons_ring.c b/sys/dev/xen/console/xencons_ring.c
index 077d286f3fe0..3701551ea101 100644
--- a/sys/dev/xen/console/xencons_ring.c
+++ b/sys/dev/xen/console/xencons_ring.c
@@ -1,167 +1,169 @@
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/module.h>
 #include <sys/systm.h>
 #include <sys/consio.h>
 #include <sys/proc.h>
 #include <sys/uio.h>
 #include <sys/tty.h>
 #include <sys/systm.h>
 #include <sys/taskqueue.h>
 #include <sys/conf.h>
 #include <sys/kernel.h>
 #include <sys/bus.h>
 #include <sys/cons.h>
 
 #include <machine/stdarg.h>
-#include <machine/xen/xen-os.h>
+
+#include <xen/xen-os.h>
 #include <xen/hypervisor.h>
 #include <xen/xen_intr.h>
 #include <sys/cons.h>
 
 #include <xen/xen_intr.h>
 #include <xen/evtchn.h>
 #include <xen/interface/io/console.h>
 
 #include <dev/xen/console/xencons_ring.h>
 #include <xen/evtchn.h>
 #include <xen/interface/io/console.h>
 
 #define console_evtchn	console.domU.evtchn
-static unsigned int console_irq;
+xen_intr_handle_t console_handle;
 extern char *console_page;
 extern struct mtx              cn_mtx;
+extern device_t xencons_dev;
 
 static inline struct xencons_interface *
 xencons_interface(void)
 {
 	return (struct xencons_interface *)console_page;
 }
 
 
 int
 xencons_has_input(void)
 {
 	struct xencons_interface *intf; 
 
 	intf = xencons_interface();		
 
 	return (intf->in_cons != intf->in_prod);
 }
 
 
 int 
 xencons_ring_send(const char *data, unsigned len)
 {
 	struct xencons_interface *intf; 
 	XENCONS_RING_IDX cons, prod;
 	int sent;
 
 	intf = xencons_interface();
 	cons = intf->out_cons;
 	prod = intf->out_prod;
 	sent = 0;
 
 	mb();
 	KASSERT((prod - cons) <= sizeof(intf->out),
 		("console send ring inconsistent"));
 	
 	while ((sent < len) && ((prod - cons) < sizeof(intf->out)))
 		intf->out[MASK_XENCONS_IDX(prod++, intf->out)] = data[sent++];
 
 	wmb();
 	intf->out_prod = prod;
 
-	notify_remote_via_evtchn(xen_start_info->console_evtchn);
+	xen_intr_signal(console_handle);
 
 	return sent;
 
 }	
 
 
 static xencons_receiver_func *xencons_receiver;
 
 void 
 xencons_handle_input(void *unused)
 {
 	struct xencons_interface *intf;
 	XENCONS_RING_IDX cons, prod;
 
 	CN_LOCK(cn_mtx);
 	intf = xencons_interface();
 
 	cons = intf->in_cons;
 	prod = intf->in_prod;
 	CN_UNLOCK(cn_mtx);
 	
 	/* XXX needs locking */
 	while (cons != prod) {
 		xencons_rx(intf->in + MASK_XENCONS_IDX(cons, intf->in), 1);
 		cons++;
 	}
 
 	mb();
 	intf->in_cons = cons;
 
 	CN_LOCK(cn_mtx);
-	notify_remote_via_evtchn(xen_start_info->console_evtchn);
+	xen_intr_signal(console_handle);
 
 	xencons_tx();
 	CN_UNLOCK(cn_mtx);
 }
 
 void 
 xencons_ring_register_receiver(xencons_receiver_func *f)
 {
 	xencons_receiver = f;
 }
 
 int
 xencons_ring_init(void)
 {
 	int err;
 
 	if (!xen_start_info->console_evtchn)
 		return 0;
 
-	err = bind_caller_port_to_irqhandler(xen_start_info->console_evtchn,
-		"xencons", xencons_handle_input, NULL,
-		INTR_TYPE_MISC | INTR_MPSAFE, &console_irq);
+	err = xen_intr_bind_local_port(xencons_dev,
+	    xen_start_info->console_evtchn, NULL, xencons_handle_input, NULL,
+	    INTR_TYPE_MISC | INTR_MPSAFE, &console_handle);
 	if (err) {
 		return err;
 	}
 
 	return 0;
 }
 
 extern void xencons_suspend(void);
 extern void xencons_resume(void);
 
 void 
 xencons_suspend(void)
 {
 
 	if (!xen_start_info->console_evtchn)
 		return;
 
-	unbind_from_irqhandler(console_irq);
+	xen_intr_unbind(&console_handle);
 }
 
 void 
 xencons_resume(void)
 {
 
 	(void)xencons_ring_init();
 }
 
 /*
  * Local variables:
  * mode: C
  * c-set-style: "BSD"
  * c-basic-offset: 8
  * tab-width: 4
  * indent-tabs-mode: t
  * End:
  */
diff --git a/sys/dev/xen/control/control.c b/sys/dev/xen/control/control.c
index 18f42bbe653b..649f28160407 100644
--- a/sys/dev/xen/control/control.c
+++ b/sys/dev/xen/control/control.c
@@ -1,518 +1,521 @@
 /*-
  * Copyright (c) 2010 Justin T. Gibbs, Spectra Logic Corporation
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions, and the following disclaimer,
  *    without modification.
  * 2. Redistributions in binary form must reproduce at minimum a disclaimer
  *    substantially similar to the "NO WARRANTY" disclaimer below
  *    ("Disclaimer") and any redistribution must be conditioned upon
  *    including a substantially similar Disclaimer requirement for further
  *    binary redistribution.
  *
  * NO WARRANTY
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  * POSSIBILITY OF SUCH DAMAGES.
  */
 
 /*-
  * PV suspend/resume support:
  *
  * Copyright (c) 2004 Christian Limpach.
  * Copyright (c) 2004-2006,2008 Kip Macy
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *      This product includes software developed by Christian Limpach.
  * 4. The name of the author may not be used to endorse or promote products
  *    derived from this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 /*-
  * HVM suspend/resume support:
  *
  * Copyright (c) 2008 Citrix Systems, Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 /**
  * \file control.c
  *
  * \brief Device driver to repond to control domain events that impact
  *        this VM.
  */
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/malloc.h>
 
 #include <sys/bio.h>
 #include <sys/bus.h>
 #include <sys/conf.h>
 #include <sys/disk.h>
 #include <sys/fcntl.h>
 #include <sys/filedesc.h>
 #include <sys/kdb.h>
 #include <sys/module.h>
 #include <sys/namei.h>
 #include <sys/proc.h>
 #include <sys/reboot.h>
 #include <sys/rman.h>
 #include <sys/sched.h>
 #include <sys/taskqueue.h>
 #include <sys/types.h>
 #include <sys/vnode.h>
 
 #ifndef XENHVM
 #include <sys/sched.h>
 #include <sys/smp.h>
 #endif
 
 #include <geom/geom.h>
 
 #include <machine/_inttypes.h>
-#include <machine/xen/xen-os.h>
+#include <machine/intr_machdep.h>
 
 #include <vm/vm.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_kern.h>
 
+#include <xen/xen-os.h>
 #include <xen/blkif.h>
 #include <xen/evtchn.h>
 #include <xen/gnttab.h>
 #include <xen/xen_intr.h>
 
 #include <xen/interface/event_channel.h>
 #include <xen/interface/grant_table.h>
 
 #include <xen/xenbus/xenbusvar.h>
 
+#include <machine/xen/xenvar.h>
+#include <machine/xen/xenfunc.h>
+
 /*--------------------------- Forward Declarations --------------------------*/
 /** Function signature for shutdown event handlers. */
 typedef	void (xctrl_shutdown_handler_t)(void);
 
 static xctrl_shutdown_handler_t xctrl_poweroff;
 static xctrl_shutdown_handler_t xctrl_reboot;
 static xctrl_shutdown_handler_t xctrl_suspend;
 static xctrl_shutdown_handler_t xctrl_crash;
 static xctrl_shutdown_handler_t xctrl_halt;
 
 /*-------------------------- Private Data Structures -------------------------*/
 /** Element type for lookup table of event name to handler. */
 struct xctrl_shutdown_reason {
 	const char		 *name;
 	xctrl_shutdown_handler_t *handler;
 };
 
 /** Lookup table for shutdown event name to handler. */
 static const struct xctrl_shutdown_reason xctrl_shutdown_reasons[] = {
 	{ "poweroff", xctrl_poweroff },
 	{ "reboot",   xctrl_reboot   },
 	{ "suspend",  xctrl_suspend  },
 	{ "crash",    xctrl_crash    },
 	{ "halt",     xctrl_halt     },
 };
 
 struct xctrl_softc {
 	struct xs_watch    xctrl_watch;	
 };
 
 /*------------------------------ Event Handlers ------------------------------*/
 static void
 xctrl_poweroff()
 {
 	shutdown_nice(RB_POWEROFF|RB_HALT);
 }
 
 static void
 xctrl_reboot()
 {
 	shutdown_nice(0);
 }
 
 #ifndef XENHVM
 extern void xencons_suspend(void);
 extern void xencons_resume(void);
 
 /* Full PV mode suspension. */
 static void
 xctrl_suspend()
 {
 	int i, j, k, fpp;
 	unsigned long max_pfn, start_info_mfn;
 
 	EVENTHANDLER_INVOKE(power_suspend);
 
 #ifdef SMP
 	struct thread *td;
 	cpuset_t map;
 	u_int cpuid;
 
 	/*
 	 * Bind us to CPU 0 and stop any other VCPUs.
 	 */
 	td = curthread;
 	thread_lock(td);
 	sched_bind(td, 0);
 	thread_unlock(td);
 	cpuid = PCPU_GET(cpuid);
 	KASSERT(cpuid == 0, ("xen_suspend: not running on cpu 0"));
 
 	map = all_cpus;
 	CPU_CLR(cpuid, &map);
 	CPU_NAND(&map, &stopped_cpus);
 	if (!CPU_EMPTY(&map))
 		stop_cpus(map);
 #endif
 
 	/*
 	 * Be sure to hold Giant across DEVICE_SUSPEND/RESUME since non-MPSAFE
 	 * drivers need this.
 	 */
 	mtx_lock(&Giant);
 	if (DEVICE_SUSPEND(root_bus) != 0) {
 		mtx_unlock(&Giant);
 		printf("%s: device_suspend failed\n", __func__);
 #ifdef SMP
 		if (!CPU_EMPTY(&map))
 			restart_cpus(map);
 #endif
 		return;
 	}
 	mtx_unlock(&Giant);
 
 	local_irq_disable();
 
 	xencons_suspend();
 	gnttab_suspend();
+	intr_suspend();
 
 	max_pfn = HYPERVISOR_shared_info->arch.max_pfn;
 
 	void *shared_info = HYPERVISOR_shared_info;
 	HYPERVISOR_shared_info = NULL;
 	pmap_kremove((vm_offset_t) shared_info);
 	PT_UPDATES_FLUSH();
 
 	xen_start_info->store_mfn = MFNTOPFN(xen_start_info->store_mfn);
 	xen_start_info->console.domU.mfn = MFNTOPFN(xen_start_info->console.domU.mfn);
 
 	/*
 	 * We'll stop somewhere inside this hypercall. When it returns,
 	 * we'll start resuming after the restore.
 	 */
 	start_info_mfn = VTOMFN(xen_start_info);
 	pmap_suspend();
 	HYPERVISOR_suspend(start_info_mfn);
 	pmap_resume();
 
 	pmap_kenter_ma((vm_offset_t) shared_info, xen_start_info->shared_info);
 	HYPERVISOR_shared_info = shared_info;
 
 	HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list =
 		VTOMFN(xen_pfn_to_mfn_frame_list_list);
   
 	fpp = PAGE_SIZE/sizeof(unsigned long);
 	for (i = 0, j = 0, k = -1; i < max_pfn; i += fpp, j++) {
 		if ((j % fpp) == 0) {
 			k++;
 			xen_pfn_to_mfn_frame_list_list[k] = 
 				VTOMFN(xen_pfn_to_mfn_frame_list[k]);
 			j = 0;
 		}
 		xen_pfn_to_mfn_frame_list[k][j] = 
 			VTOMFN(&xen_phys_machine[i]);
 	}
 	HYPERVISOR_shared_info->arch.max_pfn = max_pfn;
 
 	gnttab_resume();
-	irq_resume();
+	intr_resume();
 	local_irq_enable();
 	xencons_resume();
 
 #ifdef CONFIG_SMP
 	for_each_cpu(i)
 		vcpu_prepare(i);
 
 #endif
 
 	/* 
 	 * Only resume xenbus /after/ we've prepared our VCPUs; otherwise
 	 * the VCPU hotplug callback can race with our vcpu_prepare
 	 */
 	mtx_lock(&Giant);
 	DEVICE_RESUME(root_bus);
 	mtx_unlock(&Giant);
 
 #ifdef SMP
 	thread_lock(curthread);
 	sched_unbind(curthread);
 	thread_unlock(curthread);
 	if (!CPU_EMPTY(&map))
 		restart_cpus(map);
 #endif
 	EVENTHANDLER_INVOKE(power_resume);
 }
 
 static void
 xen_pv_shutdown_final(void *arg, int howto)
 {
 	/*
 	 * Inform the hypervisor that shutdown is complete.
 	 * This is not necessary in HVM domains since Xen
 	 * emulates ACPI in that mode and FreeBSD's ACPI
 	 * support will request this transition.
 	 */
 	if (howto & (RB_HALT | RB_POWEROFF))
 		HYPERVISOR_shutdown(SHUTDOWN_poweroff);
 	else
 		HYPERVISOR_shutdown(SHUTDOWN_reboot);
 }
 
 #else
 extern void xenpci_resume(void);
 
 /* HVM mode suspension. */
 static void
 xctrl_suspend()
 {
 	int suspend_cancelled;
 
 	EVENTHANDLER_INVOKE(power_suspend);
 
 	/*
 	 * Be sure to hold Giant across DEVICE_SUSPEND/RESUME since non-MPSAFE
 	 * drivers need this.
 	 */
 	mtx_lock(&Giant);
 	if (DEVICE_SUSPEND(root_bus) != 0) {
 		mtx_unlock(&Giant);
 		printf("%s: device_suspend failed\n", __func__);
 		return;
 	}
 	mtx_unlock(&Giant);
 
 	/*
 	 * Prevent any races with evtchn_interrupt() handler.
 	 */
 	disable_intr();
-	irq_suspend();
+	intr_suspend();
 
 	suspend_cancelled = HYPERVISOR_suspend(0);
-	if (suspend_cancelled)
-		irq_resume();
-	else
-		xenpci_resume();
+
+	intr_resume();
 
 	/*
 	 * Re-enable interrupts and put the scheduler back to normal.
 	 */
 	enable_intr();
 
 	/*
 	 * FreeBSD really needs to add DEVICE_SUSPEND_CANCEL or
 	 * similar.
 	 */
 	mtx_lock(&Giant);
 	if (!suspend_cancelled)
 		DEVICE_RESUME(root_bus);
 	mtx_unlock(&Giant);
 
 	EVENTHANDLER_INVOKE(power_resume);
 }
 #endif
 
 static void
 xctrl_crash()
 {
 	panic("Xen directed crash");
 }
 
 static void
 xctrl_halt()
 {
 	shutdown_nice(RB_HALT);
 }
 
 /*------------------------------ Event Reception -----------------------------*/
 static void
 xctrl_on_watch_event(struct xs_watch *watch, const char **vec, unsigned int len)
 {
 	const struct xctrl_shutdown_reason *reason;
 	const struct xctrl_shutdown_reason *last_reason;
 	char *result;
 	int   error;
 	int   result_len;
 	
 	error = xs_read(XST_NIL, "control", "shutdown",
 			&result_len, (void **)&result);
 	if (error != 0)
 		return;
 
 	reason = xctrl_shutdown_reasons;
 	last_reason = reason + nitems(xctrl_shutdown_reasons);
 	while (reason < last_reason) {
 
 		if (!strcmp(result, reason->name)) {
 			reason->handler();
 			break;
 		}
 		reason++;
 	}
 
 	free(result, M_XENSTORE);
 }
 
 /*------------------ Private Device Attachment Functions  --------------------*/
 /**
  * \brief Identify instances of this device type in the system.
  *
  * \param driver  The driver performing this identify action.
  * \param parent  The NewBus parent device for any devices this method adds.
  */
 static void
 xctrl_identify(driver_t *driver __unused, device_t parent)
 {
 	/*
 	 * A single device instance for our driver is always present
 	 * in a system operating under Xen.
 	 */
 	BUS_ADD_CHILD(parent, 0, driver->name, 0);
 }
 
 /**
  * \brief Probe for the existance of the Xen Control device
  *
  * \param dev  NewBus device_t for this Xen control instance.
  *
  * \return  Always returns 0 indicating success.
  */
 static int 
 xctrl_probe(device_t dev)
 {
 	device_set_desc(dev, "Xen Control Device");
 
 	return (0);
 }
 
 /**
  * \brief Attach the Xen control device.
  *
  * \param dev  NewBus device_t for this Xen control instance.
  *
  * \return  On success, 0. Otherwise an errno value indicating the
  *          type of failure.
  */
 static int
 xctrl_attach(device_t dev)
 {
 	struct xctrl_softc *xctrl;
 
 	xctrl = device_get_softc(dev);
 
 	/* Activate watch */
 	xctrl->xctrl_watch.node = "control/shutdown";
 	xctrl->xctrl_watch.callback = xctrl_on_watch_event;
 	xctrl->xctrl_watch.callback_data = (uintptr_t)xctrl;
 	xs_register_watch(&xctrl->xctrl_watch);
 
 #ifndef XENHVM
 	EVENTHANDLER_REGISTER(shutdown_final, xen_pv_shutdown_final, NULL,
 			      SHUTDOWN_PRI_LAST);
 #endif
 
 	return (0);
 }
 
 /**
  * \brief Detach the Xen control device.
  *
  * \param dev  NewBus device_t for this Xen control device instance.
  *
  * \return  On success, 0. Otherwise an errno value indicating the
  *          type of failure.
  */
 static int
 xctrl_detach(device_t dev)
 {
 	struct xctrl_softc *xctrl;
 
 	xctrl = device_get_softc(dev);
 
 	/* Release watch */
 	xs_unregister_watch(&xctrl->xctrl_watch);
 
 	return (0);
 }
 
 /*-------------------- Private Device Attachment Data  -----------------------*/
 static device_method_t xctrl_methods[] = { 
 	/* Device interface */ 
 	DEVMETHOD(device_identify,	xctrl_identify),
 	DEVMETHOD(device_probe,         xctrl_probe), 
 	DEVMETHOD(device_attach,        xctrl_attach), 
 	DEVMETHOD(device_detach,        xctrl_detach), 
  
 	DEVMETHOD_END
 }; 
 
 DEFINE_CLASS_0(xctrl, xctrl_driver, xctrl_methods, sizeof(struct xctrl_softc));
 devclass_t xctrl_devclass; 
  
 DRIVER_MODULE(xctrl, xenstore, xctrl_driver, xctrl_devclass, NULL, NULL);
diff --git a/sys/dev/xen/netback/netback.c b/sys/dev/xen/netback/netback.c
index 9463bad7a8b1..8f0286c31f06 100644
--- a/sys/dev/xen/netback/netback.c
+++ b/sys/dev/xen/netback/netback.c
@@ -1,2537 +1,2535 @@
 /*-
  * Copyright (c) 2009-2011 Spectra Logic Corporation
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions, and the following disclaimer,
  *    without modification.
  * 2. Redistributions in binary form must reproduce at minimum a disclaimer
  *    substantially similar to the "NO WARRANTY" disclaimer below
  *    ("Disclaimer") and any redistribution must be conditioned upon
  *    including a substantially similar Disclaimer requirement for further
  *    binary redistribution.
  *
  * NO WARRANTY
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  * POSSIBILITY OF SUCH DAMAGES.
  *
  * Authors: Justin T. Gibbs     (Spectra Logic Corporation)
  *          Alan Somers         (Spectra Logic Corporation)
  *          John Suykerbuyk     (Spectra Logic Corporation)
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 /**
  * \file netback.c
  *
  * \brief Device driver supporting the vending of network access
  * 	  from this FreeBSD domain to other domains.
  */
 #include "opt_inet.h"
 #include "opt_global.h"
 
 #include "opt_sctp.h"
 
 #include <sys/param.h>
 #include <sys/kernel.h>
 
 #include <sys/bus.h>
 #include <sys/module.h>
 #include <sys/rman.h>
 #include <sys/socket.h>
 #include <sys/sockio.h>
 #include <sys/sysctl.h>
 
 #include <net/if.h>
 #include <net/if_arp.h>
 #include <net/ethernet.h>
 #include <net/if_dl.h>
 #include <net/if_media.h>
 #include <net/if_types.h>
 
 #include <netinet/in.h>
 #include <netinet/ip.h>
 #include <netinet/if_ether.h>
 #if __FreeBSD_version >= 700000
 #include <netinet/tcp.h>
 #endif
 #include <netinet/ip_icmp.h>
 #include <netinet/udp.h>
 #include <machine/in_cksum.h>
 
 #include <vm/vm.h>
 #include <vm/pmap.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_kern.h>
 
 #include <machine/_inttypes.h>
-#include <machine/xen/xen-os.h>
-#include <machine/xen/xenvar.h>
 
-#include <xen/evtchn.h>
+#include <xen/xen-os.h>
+#include <xen/hypervisor.h>
 #include <xen/xen_intr.h>
 #include <xen/interface/io/netif.h>
 #include <xen/xenbus/xenbusvar.h>
 
+#include <machine/xen/xenvar.h>
+
 /*--------------------------- Compile-time Tunables --------------------------*/
 
 /*---------------------------------- Macros ----------------------------------*/
 /**
  * Custom malloc type for all driver allocations.
  */
 static MALLOC_DEFINE(M_XENNETBACK, "xnb", "Xen Net Back Driver Data");
 
 #define	XNB_SG	1	/* netback driver supports feature-sg */
 #define	XNB_GSO_TCPV4 1	/* netback driver supports feature-gso-tcpv4 */
 #define	XNB_RX_COPY 1	/* netback driver supports feature-rx-copy */
 #define	XNB_RX_FLIP 0	/* netback driver does not support feature-rx-flip */
 
 #undef XNB_DEBUG
 #define	XNB_DEBUG /* hardcode on during development */
 
 #ifdef XNB_DEBUG
 #define	DPRINTF(fmt, args...) \
 	printf("xnb(%s:%d): " fmt, __FUNCTION__, __LINE__, ##args)
 #else
 #define	DPRINTF(fmt, args...) do {} while (0)
 #endif
 
 /* Default length for stack-allocated grant tables */
 #define	GNTTAB_LEN	(64)
 
 /* Features supported by all backends.  TSO and LRO can be negotiated */
 #define	XNB_CSUM_FEATURES	(CSUM_TCP | CSUM_UDP)
 
 #define	NET_TX_RING_SIZE __RING_SIZE((netif_tx_sring_t *)0, PAGE_SIZE)
 #define	NET_RX_RING_SIZE __RING_SIZE((netif_rx_sring_t *)0, PAGE_SIZE)
 
 /**
  * Two argument version of the standard macro.  Second argument is a tentative
  * value of req_cons
  */
 #define	RING_HAS_UNCONSUMED_REQUESTS_2(_r, cons) ({                     \
 	unsigned int req = (_r)->sring->req_prod - cons;          	\
 	unsigned int rsp = RING_SIZE(_r) -                              \
 	(cons - (_r)->rsp_prod_pvt);                          		\
 	req < rsp ? req : rsp;                                          \
 })
 
 #define	virt_to_mfn(x) (vtomach(x) >> PAGE_SHIFT)
 #define	virt_to_offset(x) ((x) & (PAGE_SIZE - 1))
 
 /**
  * Predefined array type of grant table copy descriptors.  Used to pass around
  * statically allocated memory structures.
  */
 typedef struct gnttab_copy gnttab_copy_table[GNTTAB_LEN];
 
 /*--------------------------- Forward Declarations ---------------------------*/
 struct xnb_softc;
 struct xnb_pkt;
 
 static void	xnb_attach_failed(struct xnb_softc *xnb,
 				  int err, const char *fmt, ...)
 				  __printflike(3,4);
 static int	xnb_shutdown(struct xnb_softc *xnb);
 static int	create_netdev(device_t dev);
 static int	xnb_detach(device_t dev);
 static int	xen_net_read_mac(device_t dev, uint8_t mac[]);
 static int	xnb_ifmedia_upd(struct ifnet *ifp);
 static void	xnb_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr);
 static void 	xnb_intr(void *arg);
 static int	xnb_send(netif_rx_back_ring_t *rxb, domid_t otherend,
 			 const struct mbuf *mbufc, gnttab_copy_table gnttab);
 static int	xnb_recv(netif_tx_back_ring_t *txb, domid_t otherend,
 			 struct mbuf **mbufc, struct ifnet *ifnet,
 			 gnttab_copy_table gnttab);
 static int	xnb_ring2pkt(struct xnb_pkt *pkt,
 			     const netif_tx_back_ring_t *tx_ring,
 			     RING_IDX start);
 static void	xnb_txpkt2rsp(const struct xnb_pkt *pkt,
 			      netif_tx_back_ring_t *ring, int error);
 static struct mbuf *xnb_pkt2mbufc(const struct xnb_pkt *pkt, struct ifnet *ifp);
 static int	xnb_txpkt2gnttab(const struct xnb_pkt *pkt,
 				 const struct mbuf *mbufc,
 				 gnttab_copy_table gnttab,
 				 const netif_tx_back_ring_t *txb,
 				 domid_t otherend_id);
 static void	xnb_update_mbufc(struct mbuf *mbufc,
 				 const gnttab_copy_table gnttab, int n_entries);
 static int	xnb_mbufc2pkt(const struct mbuf *mbufc,
 			      struct xnb_pkt *pkt,
 			      RING_IDX start, int space);
 static int	xnb_rxpkt2gnttab(const struct xnb_pkt *pkt,
 				 const struct mbuf *mbufc,
 				 gnttab_copy_table gnttab,
 				 const netif_rx_back_ring_t *rxb,
 				 domid_t otherend_id);
 static int	xnb_rxpkt2rsp(const struct xnb_pkt *pkt,
 			      const gnttab_copy_table gnttab, int n_entries,
 			      netif_rx_back_ring_t *ring);
 static void	xnb_add_mbuf_cksum(struct mbuf *mbufc);
 static void	xnb_stop(struct xnb_softc*);
 static int	xnb_ioctl(struct ifnet*, u_long, caddr_t);
 static void	xnb_start_locked(struct ifnet*);
 static void	xnb_start(struct ifnet*);
 static void	xnb_ifinit_locked(struct xnb_softc*);
 static void	xnb_ifinit(void*);
 #ifdef XNB_DEBUG
 static int	xnb_unit_test_main(SYSCTL_HANDLER_ARGS);
 static int	xnb_dump_rings(SYSCTL_HANDLER_ARGS);
 #endif
 /*------------------------------ Data Structures -----------------------------*/
 
 
 /**
  * Representation of a xennet packet.  Simplified version of a packet as
  * stored in the Xen tx ring.  Applicable to both RX and TX packets
  */
 struct xnb_pkt{
 	/**
 	 * Array index of the first data-bearing (eg, not extra info) entry
 	 * for this packet
 	 */
 	RING_IDX	car;
 
 	/**
 	 * Array index of the second data-bearing entry for this packet.
 	 * Invalid if the packet has only one data-bearing entry.  If the
 	 * packet has more than two data-bearing entries, then the second
 	 * through the last will be sequential modulo the ring size
 	 */
 	RING_IDX	cdr;
 
 	/**
 	 * Optional extra info.  Only valid if flags contains
 	 * NETTXF_extra_info.  Note that extra.type will always be
 	 * XEN_NETIF_EXTRA_TYPE_GSO.  Currently, no known netfront or netback
 	 * driver will ever set XEN_NETIF_EXTRA_TYPE_MCAST_*
 	 */
 	netif_extra_info_t extra;
 
 	/** Size of entire packet in bytes.       */
 	uint16_t	size;
 
 	/** The size of the first entry's data in bytes */
 	uint16_t	car_size;
 
 	/**
 	 * Either NETTXF_ or NETRXF_ flags.  Note that the flag values are
 	 * not the same for TX and RX packets
 	 */
 	uint16_t	flags;
 
 	/**
 	 * The number of valid data-bearing entries (either netif_tx_request's
 	 * or netif_rx_response's) in the packet.  If this is 0, it means the
 	 * entire packet is invalid.
 	 */
 	uint16_t	list_len;
 
 	/** There was an error processing the packet */
 	uint8_t		error;
 };
 
 /** xnb_pkt method: initialize it */
 static inline void
 xnb_pkt_initialize(struct xnb_pkt *pxnb)
 {
 	bzero(pxnb, sizeof(*pxnb));
 }
 
 /** xnb_pkt method: mark the packet as valid */
 static inline void
 xnb_pkt_validate(struct xnb_pkt *pxnb)
 {
 	pxnb->error = 0;
 };
 
 /** xnb_pkt method: mark the packet as invalid */
 static inline void
 xnb_pkt_invalidate(struct xnb_pkt *pxnb)
 {
 	pxnb->error = 1;
 };
 
 /** xnb_pkt method: Check whether the packet is valid */
 static inline int
 xnb_pkt_is_valid(const struct xnb_pkt *pxnb)
 {
 	return (! pxnb->error);
 }
 
 #ifdef XNB_DEBUG
 /** xnb_pkt method: print the packet's contents in human-readable format*/
 static void __unused
 xnb_dump_pkt(const struct xnb_pkt *pkt) {
 	if (pkt == NULL) {
 	  DPRINTF("Was passed a null pointer.\n");
 	  return;
 	}
 	DPRINTF("pkt address= %p\n", pkt);
 	DPRINTF("pkt->size=%d\n", pkt->size);
 	DPRINTF("pkt->car_size=%d\n", pkt->car_size);
 	DPRINTF("pkt->flags=0x%04x\n", pkt->flags);
 	DPRINTF("pkt->list_len=%d\n", pkt->list_len);
 	/* DPRINTF("pkt->extra");	TODO */
 	DPRINTF("pkt->car=%d\n", pkt->car);
 	DPRINTF("pkt->cdr=%d\n", pkt->cdr);
 	DPRINTF("pkt->error=%d\n", pkt->error);
 }
 #endif /* XNB_DEBUG */
 
 static void
 xnb_dump_txreq(RING_IDX idx, const struct netif_tx_request *txreq)
 {
 	if (txreq != NULL) {
 		DPRINTF("netif_tx_request index =%u\n", idx);
 		DPRINTF("netif_tx_request.gref  =%u\n", txreq->gref);
 		DPRINTF("netif_tx_request.offset=%hu\n", txreq->offset);
 		DPRINTF("netif_tx_request.flags =%hu\n", txreq->flags);
 		DPRINTF("netif_tx_request.id    =%hu\n", txreq->id);
 		DPRINTF("netif_tx_request.size  =%hu\n", txreq->size);
 	}
 }
 
 
 /**
  * \brief Configuration data for a shared memory request ring
  *        used to communicate with the front-end client of this
  *        this driver.
  */
 struct xnb_ring_config {
 	/**
 	 * Runtime structures for ring access.  Unfortunately, TX and RX rings
 	 * use different data structures, and that cannot be changed since it
 	 * is part of the interdomain protocol.
 	 */
 	union{
 		netif_rx_back_ring_t	  rx_ring;
 		netif_tx_back_ring_t	  tx_ring;
 	} back_ring;
 
 	/**
 	 * The device bus address returned by the hypervisor when
 	 * mapping the ring and required to unmap it when a connection
 	 * is torn down.
 	 */
 	uint64_t	bus_addr;
 
 	/** The pseudo-physical address where ring memory is mapped.*/
 	uint64_t	gnt_addr;
 
 	/** KVA address where ring memory is mapped. */
 	vm_offset_t	va;
 
 	/**
 	 * Grant table handles, one per-ring page, returned by the
 	 * hyperpervisor upon mapping of the ring and required to
 	 * unmap it when a connection is torn down.
 	 */
 	grant_handle_t	handle;
 
 	/** The number of ring pages mapped for the current connection. */
 	unsigned	ring_pages;
 
 	/**
 	 * The grant references, one per-ring page, supplied by the
 	 * front-end, allowing us to reference the ring pages in the
 	 * front-end's domain and to map these pages into our own domain.
 	 */
 	grant_ref_t	ring_ref;
 };
 
 /**
  * Per-instance connection state flags.
  */
 typedef enum
 {
 	/** Communication with the front-end has been established. */
 	XNBF_RING_CONNECTED    = 0x01,
 
 	/**
 	 * Front-end requests exist in the ring and are waiting for
 	 * xnb_xen_req objects to free up.
 	 */
 	XNBF_RESOURCE_SHORTAGE = 0x02,
 
 	/** Connection teardown has started. */
 	XNBF_SHUTDOWN          = 0x04,
 
 	/** A thread is already performing shutdown processing. */
 	XNBF_IN_SHUTDOWN       = 0x08
 } xnb_flag_t;
 
 /**
  * Types of rings.  Used for array indices and to identify a ring's control
  * data structure type
  */
 typedef enum{
 	XNB_RING_TYPE_TX = 0,	/* ID of TX rings, used for array indices */
 	XNB_RING_TYPE_RX = 1,	/* ID of RX rings, used for array indices */
 	XNB_NUM_RING_TYPES
 } xnb_ring_type_t;
 
 /**
  * Per-instance configuration data.
  */
 struct xnb_softc {
 	/** NewBus device corresponding to this instance. */
 	device_t		dev;
 
 	/* Media related fields */
 
 	/** Generic network media state */
 	struct ifmedia		sc_media;
 
 	/** Media carrier info */
 	struct ifnet 		*xnb_ifp;
 
 	/** Our own private carrier state */
 	unsigned carrier;
 
 	/** Device MAC Address */
 	uint8_t			mac[ETHER_ADDR_LEN];
 
 	/* Xen related fields */
 
 	/**
 	 * \brief The netif protocol abi in effect.
 	 *
 	 * There are situations where the back and front ends can
 	 * have a different, native abi (e.g. intel x86_64 and
 	 * 32bit x86 domains on the same machine).  The back-end
 	 * always accomodates the front-end's native abi.  That
 	 * value is pulled from the XenStore and recorded here.
 	 */
 	int			abi;
 
 	/**
 	 * Name of the bridge to which this VIF is connected, if any
 	 * This field is dynamically allocated by xenbus and must be free()ed
 	 * when no longer needed
 	 */
 	char			*bridge;
 
 	/** The interrupt driven even channel used to signal ring events. */
 	evtchn_port_t		evtchn;
 
 	/** Xen device handle.*/
 	long 			handle;
 
-	/** IRQ mapping for the communication ring event channel. */
-	int			irq;
+	/** Handle to the communication ring event channel. */
+	xen_intr_handle_t	xen_intr_handle;
 
 	/**
 	 * \brief Cached value of the front-end's domain id.
 	 *
 	 * This value is used at once for each mapped page in
 	 * a transaction.  We cache it to avoid incuring the
 	 * cost of an ivar access every time this is needed.
 	 */
 	domid_t			otherend_id;
 
 	/**
 	 * Undocumented frontend feature.  Has something to do with
 	 * scatter/gather IO
 	 */
 	uint8_t			can_sg;
 	/** Undocumented frontend feature */
 	uint8_t			gso;
 	/** Undocumented frontend feature */
 	uint8_t			gso_prefix;
 	/** Can checksum TCP/UDP over IPv4 */
 	uint8_t			ip_csum;
 
 	/* Implementation related fields */
 	/**
 	 * Preallocated grant table copy descriptor for RX operations.
 	 * Access must be protected by rx_lock
 	 */
 	gnttab_copy_table	rx_gnttab;
 
 	/**
 	 * Preallocated grant table copy descriptor for TX operations.
 	 * Access must be protected by tx_lock
 	 */
 	gnttab_copy_table	tx_gnttab;
 
 #ifdef XENHVM
 	/**
 	 * Resource representing allocated physical address space
 	 * associated with our per-instance kva region.
 	 */
 	struct resource		*pseudo_phys_res;
 
 	/** Resource id for allocated physical address space. */
 	int			pseudo_phys_res_id;
 #endif
 
 	/** Ring mapping and interrupt configuration data. */
 	struct xnb_ring_config	ring_configs[XNB_NUM_RING_TYPES];
 
 	/**
 	 * Global pool of kva used for mapping remote domain ring
 	 * and I/O transaction data.
 	 */
 	vm_offset_t		kva;
 
 	/** Psuedo-physical address corresponding to kva. */
 	uint64_t		gnt_base_addr;
 
 	/** Various configuration and state bit flags. */
 	xnb_flag_t		flags;
 
 	/** Mutex protecting per-instance data in the receive path. */
 	struct mtx		rx_lock;
 
 	/** Mutex protecting per-instance data in the softc structure. */
 	struct mtx		sc_lock;
 
 	/** Mutex protecting per-instance data in the transmit path. */
 	struct mtx		tx_lock;
 
 	/** The size of the global kva pool. */
 	int			kva_size;
 };
 
 /*---------------------------- Debugging functions ---------------------------*/
 #ifdef XNB_DEBUG
 static void __unused
 xnb_dump_gnttab_copy(const struct gnttab_copy *entry)
 {
 	if (entry == NULL) {
 		printf("NULL grant table pointer\n");
 		return;
 	}
 
 	if (entry->flags & GNTCOPY_dest_gref)
 		printf("gnttab dest ref=\t%u\n", entry->dest.u.ref);
 	else
 		printf("gnttab dest gmfn=\t%lu\n", entry->dest.u.gmfn);
 	printf("gnttab dest offset=\t%hu\n", entry->dest.offset);
 	printf("gnttab dest domid=\t%hu\n", entry->dest.domid);
 	if (entry->flags & GNTCOPY_source_gref)
 		printf("gnttab source ref=\t%u\n", entry->source.u.ref);
 	else
 		printf("gnttab source gmfn=\t%lu\n", entry->source.u.gmfn);
 	printf("gnttab source offset=\t%hu\n", entry->source.offset);
 	printf("gnttab source domid=\t%hu\n", entry->source.domid);
 	printf("gnttab len=\t%hu\n", entry->len);
 	printf("gnttab flags=\t%hu\n", entry->flags);
 	printf("gnttab status=\t%hd\n", entry->status);
 }
 
 static int
 xnb_dump_rings(SYSCTL_HANDLER_ARGS)
 {
 	static char results[720];
 	struct xnb_softc const* xnb = (struct xnb_softc*)arg1;
 	netif_rx_back_ring_t const* rxb =
 		&xnb->ring_configs[XNB_RING_TYPE_RX].back_ring.rx_ring;
 	netif_tx_back_ring_t const* txb =
 		&xnb->ring_configs[XNB_RING_TYPE_TX].back_ring.tx_ring;
 
 	/* empty the result strings */
 	results[0] = 0;
 
 	if ( !txb || !txb->sring || !rxb || !rxb->sring )
 		return (SYSCTL_OUT(req, results, strnlen(results, 720)));
 
 	snprintf(results, 720,
 	    "\n\t%35s %18s\n"	/* TX, RX */
 	    "\t%16s %18d %18d\n"	/* req_cons */
 	    "\t%16s %18d %18d\n"	/* nr_ents */
 	    "\t%16s %18d %18d\n"	/* rsp_prod_pvt */
 	    "\t%16s %18p %18p\n"	/* sring */
 	    "\t%16s %18d %18d\n"	/* req_prod */
 	    "\t%16s %18d %18d\n"	/* req_event */
 	    "\t%16s %18d %18d\n"	/* rsp_prod */
 	    "\t%16s %18d %18d\n",	/* rsp_event */
 	    "TX", "RX",
 	    "req_cons", txb->req_cons, rxb->req_cons,
 	    "nr_ents", txb->nr_ents, rxb->nr_ents,
 	    "rsp_prod_pvt", txb->rsp_prod_pvt, rxb->rsp_prod_pvt,
 	    "sring", txb->sring, rxb->sring,
 	    "sring->req_prod", txb->sring->req_prod, rxb->sring->req_prod,
 	    "sring->req_event", txb->sring->req_event, rxb->sring->req_event,
 	    "sring->rsp_prod", txb->sring->rsp_prod, rxb->sring->rsp_prod,
 	    "sring->rsp_event", txb->sring->rsp_event, rxb->sring->rsp_event);
 
 	return (SYSCTL_OUT(req, results, strnlen(results, 720)));
 }
 
 static void __unused
 xnb_dump_mbuf(const struct mbuf *m)
 {
 	int len;
 	uint8_t *d;
 	if (m == NULL)
 		return;
 
 	printf("xnb_dump_mbuf:\n");
 	if (m->m_flags & M_PKTHDR) {
 		printf("    flowid=%10d, csum_flags=%#8x, csum_data=%#8x, "
 		       "tso_segsz=%5hd\n",
 		       m->m_pkthdr.flowid, (int)m->m_pkthdr.csum_flags,
 		       m->m_pkthdr.csum_data, m->m_pkthdr.tso_segsz);
 		printf("    rcvif=%16p,  len=%19d\n",
 		       m->m_pkthdr.rcvif, m->m_pkthdr.len);
 	}
 	printf("    m_next=%16p, m_nextpk=%16p, m_data=%16p\n",
 	       m->m_next, m->m_nextpkt, m->m_data);
 	printf("    m_len=%17d, m_flags=%#15x, m_type=%18u\n",
 	       m->m_len, m->m_flags, m->m_type);
 
 	len = m->m_len;
 	d = mtod(m, uint8_t*);
 	while (len > 0) {
 		int i;
 		printf("                ");
 		for (i = 0; (i < 16) && (len > 0); i++, len--) {
 			printf("%02hhx ", *(d++));
 		}
 		printf("\n");
 	}
 }
 #endif /* XNB_DEBUG */
 
 /*------------------------ Inter-Domain Communication ------------------------*/
 /**
  * Free dynamically allocated KVA or pseudo-physical address allocations.
  *
  * \param xnb  Per-instance xnb configuration structure.
  */
 static void
 xnb_free_communication_mem(struct xnb_softc *xnb)
 {
 	if (xnb->kva != 0) {
 #ifndef XENHVM
 		kva_free(xnb->kva, xnb->kva_size);
 #else
 		if (xnb->pseudo_phys_res != NULL) {
 			bus_release_resource(xnb->dev, SYS_RES_MEMORY,
 			    xnb->pseudo_phys_res_id,
 			    xnb->pseudo_phys_res);
 			xnb->pseudo_phys_res = NULL;
 		}
 #endif /* XENHVM */
 	}
 	xnb->kva = 0;
 	xnb->gnt_base_addr = 0;
 }
 
 /**
  * Cleanup all inter-domain communication mechanisms.
  *
  * \param xnb  Per-instance xnb configuration structure.
  */
 static int
 xnb_disconnect(struct xnb_softc *xnb)
 {
 	struct gnttab_unmap_grant_ref gnts[XNB_NUM_RING_TYPES];
 	int error;
 	int i;
 
-	if (xnb->irq != 0) {
-		unbind_from_irqhandler(xnb->irq);
-		xnb->irq = 0;
-	}
+	xen_intr_unbind(xnb->xen_intr_handle);
 
 	/*
 	 * We may still have another thread currently processing requests.  We
 	 * must acquire the rx and tx locks to make sure those threads are done,
 	 * but we can release those locks as soon as we acquire them, because no
 	 * more interrupts will be arriving.
 	 */
 	mtx_lock(&xnb->tx_lock);
 	mtx_unlock(&xnb->tx_lock);
 	mtx_lock(&xnb->rx_lock);
 	mtx_unlock(&xnb->rx_lock);
 
 	/* Free malloc'd softc member variables */
 	if (xnb->bridge != NULL)
 		free(xnb->bridge, M_XENSTORE);
 
 	/* All request processing has stopped, so unmap the rings */
 	for (i=0; i < XNB_NUM_RING_TYPES; i++) {
 		gnts[i].host_addr = xnb->ring_configs[i].gnt_addr;
 		gnts[i].dev_bus_addr = xnb->ring_configs[i].bus_addr;
 		gnts[i].handle = xnb->ring_configs[i].handle;
 	}
 	error = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, gnts,
 					  XNB_NUM_RING_TYPES);
 	KASSERT(error == 0, ("Grant table unmap op failed (%d)", error));
 
 	xnb_free_communication_mem(xnb);
 	/*
 	 * Zero the ring config structs because the pointers, handles, and
 	 * grant refs contained therein are no longer valid.
 	 */
 	bzero(&xnb->ring_configs[XNB_RING_TYPE_TX],
 	    sizeof(struct xnb_ring_config));
 	bzero(&xnb->ring_configs[XNB_RING_TYPE_RX],
 	    sizeof(struct xnb_ring_config));
 
 	xnb->flags &= ~XNBF_RING_CONNECTED;
 	return (0);
 }
 
 /**
  * Map a single shared memory ring into domain local address space and
  * initialize its control structure
  *
  * \param xnb	Per-instance xnb configuration structure
  * \param ring_type	Array index of this ring in the xnb's array of rings
  * \return 	An errno
  */
 static int
 xnb_connect_ring(struct xnb_softc *xnb, xnb_ring_type_t ring_type)
 {
 	struct gnttab_map_grant_ref gnt;
 	struct xnb_ring_config *ring = &xnb->ring_configs[ring_type];
 	int error;
 
 	/* TX ring type = 0, RX =1 */
 	ring->va = xnb->kva + ring_type * PAGE_SIZE;
 	ring->gnt_addr = xnb->gnt_base_addr + ring_type * PAGE_SIZE;
 
 	gnt.host_addr = ring->gnt_addr;
 	gnt.flags     = GNTMAP_host_map;
 	gnt.ref       = ring->ring_ref;
 	gnt.dom       = xnb->otherend_id;
 
 	error = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &gnt, 1);
 	if (error != 0)
 		panic("netback: Ring page grant table op failed (%d)", error);
 
 	if (gnt.status != 0) {
 		ring->va = 0;
 		error = EACCES;
 		xenbus_dev_fatal(xnb->dev, error,
 				 "Ring shared page mapping failed. "
 				 "Status %d.", gnt.status);
 	} else {
 		ring->handle = gnt.handle;
 		ring->bus_addr = gnt.dev_bus_addr;
 
 		if (ring_type == XNB_RING_TYPE_TX) {
 			BACK_RING_INIT(&ring->back_ring.tx_ring,
 			    (netif_tx_sring_t*)ring->va,
 			    ring->ring_pages * PAGE_SIZE);
 		} else if (ring_type == XNB_RING_TYPE_RX) {
 			BACK_RING_INIT(&ring->back_ring.rx_ring,
 			    (netif_rx_sring_t*)ring->va,
 			    ring->ring_pages * PAGE_SIZE);
 		} else {
 			xenbus_dev_fatal(xnb->dev, error,
 				 "Unknown ring type %d", ring_type);
 		}
 	}
 
 	return error;
 }
 
 /**
  * Setup the shared memory rings and bind an interrupt to the event channel
  * used to notify us of ring changes.
  *
  * \param xnb  Per-instance xnb configuration structure.
  */
 static int
 xnb_connect_comms(struct xnb_softc *xnb)
 {
 	int	error;
 	xnb_ring_type_t i;
 
 	if ((xnb->flags & XNBF_RING_CONNECTED) != 0)
 		return (0);
 
 	/*
 	 * Kva for our rings are at the tail of the region of kva allocated
 	 * by xnb_alloc_communication_mem().
 	 */
 	for (i=0; i < XNB_NUM_RING_TYPES; i++) {
 		error = xnb_connect_ring(xnb, i);
 		if (error != 0)
 	  		return error;
 	}
 
 	xnb->flags |= XNBF_RING_CONNECTED;
 
-	error =
-	    bind_interdomain_evtchn_to_irqhandler(xnb->otherend_id,
-						  xnb->evtchn,
-						  device_get_nameunit(xnb->dev),
-						  xnb_intr, /*arg*/xnb,
-						  INTR_TYPE_BIO | INTR_MPSAFE,
-						  &xnb->irq);
+	error = xen_intr_bind_remote_port(xnb->dev,
+					  xnb->otherend_id,
+					  xnb->evtchn,
+					  /*filter*/NULL,
+					  xnb_intr, /*arg*/xnb,
+					  INTR_TYPE_BIO | INTR_MPSAFE,
+					  &xnb->xen_intr_handle);
 	if (error != 0) {
 		(void)xnb_disconnect(xnb);
 		xenbus_dev_fatal(xnb->dev, error, "binding event channel");
 		return (error);
 	}
 
 	DPRINTF("rings connected!\n");
 
 	return (0);
 }
 
 /**
  * Size KVA and pseudo-physical address allocations based on negotiated
  * values for the size and number of I/O requests, and the size of our
  * communication ring.
  *
  * \param xnb  Per-instance xnb configuration structure.
  *
  * These address spaces are used to dynamically map pages in the
  * front-end's domain into our own.
  */
 static int
 xnb_alloc_communication_mem(struct xnb_softc *xnb)
 {
 	xnb_ring_type_t i;
 
 	xnb->kva_size = 0;
 	for (i=0; i < XNB_NUM_RING_TYPES; i++) {
 		xnb->kva_size += xnb->ring_configs[i].ring_pages * PAGE_SIZE;
 	}
 #ifndef XENHVM
 	xnb->kva = kva_alloc(xnb->kva_size);
 	if (xnb->kva == 0)
 		return (ENOMEM);
 	xnb->gnt_base_addr = xnb->kva;
 #else /* defined XENHVM */
 	/*
 	 * Reserve a range of pseudo physical memory that we can map
 	 * into kva.  These pages will only be backed by machine
 	 * pages ("real memory") during the lifetime of front-end requests
 	 * via grant table operations.  We will map the netif tx and rx rings
 	 * into this space.
 	 */
 	xnb->pseudo_phys_res_id = 0;
 	xnb->pseudo_phys_res = bus_alloc_resource(xnb->dev, SYS_RES_MEMORY,
 						  &xnb->pseudo_phys_res_id,
 						  0, ~0, xnb->kva_size,
 						  RF_ACTIVE);
 	if (xnb->pseudo_phys_res == NULL) {
 		xnb->kva = 0;
 		return (ENOMEM);
 	}
 	xnb->kva = (vm_offset_t)rman_get_virtual(xnb->pseudo_phys_res);
 	xnb->gnt_base_addr = rman_get_start(xnb->pseudo_phys_res);
 #endif /* !defined XENHVM */
 	return (0);
 }
 
 /**
  * Collect information from the XenStore related to our device and its frontend
  *
  * \param xnb  Per-instance xnb configuration structure.
  */
 static int
 xnb_collect_xenstore_info(struct xnb_softc *xnb)
 {
 	/**
 	 * \todo Linux collects the following info.  We should collect most
 	 * of this, too:
 	 * "feature-rx-notify"
 	 */
 	const char *otherend_path;
 	const char *our_path;
 	int err;
 	unsigned int rx_copy, bridge_len;
 	uint8_t no_csum_offload;
 
 	otherend_path = xenbus_get_otherend_path(xnb->dev);
 	our_path = xenbus_get_node(xnb->dev);
 
 	/* Collect the critical communication parameters */
 	err = xs_gather(XST_NIL, otherend_path,
 	    "tx-ring-ref", "%l" PRIu32,
 	    	&xnb->ring_configs[XNB_RING_TYPE_TX].ring_ref,
 	    "rx-ring-ref", "%l" PRIu32,
 	    	&xnb->ring_configs[XNB_RING_TYPE_RX].ring_ref,
 	    "event-channel", "%" PRIu32, &xnb->evtchn,
 	    NULL);
 	if (err != 0) {
 		xenbus_dev_fatal(xnb->dev, err,
 				 "Unable to retrieve ring information from "
 				 "frontend %s.  Unable to connect.",
 				 otherend_path);
 		return (err);
 	}
 
 	/* Collect the handle from xenstore */
 	err = xs_scanf(XST_NIL, our_path, "handle", NULL, "%li", &xnb->handle);
 	if (err != 0) {
 		xenbus_dev_fatal(xnb->dev, err,
 		    "Error reading handle from frontend %s.  "
 		    "Unable to connect.", otherend_path);
 	}
 
 	/*
 	 * Collect the bridgename, if any.  We do not need bridge_len; we just
 	 * throw it away
 	 */
 	err = xs_read(XST_NIL, our_path, "bridge", &bridge_len,
 		      (void**)&xnb->bridge);
 	if (err != 0)
 		xnb->bridge = NULL;
 
 	/*
 	 * Does the frontend request that we use rx copy?  If not, return an
 	 * error because this driver only supports rx copy.
 	 */
 	err = xs_scanf(XST_NIL, otherend_path, "request-rx-copy", NULL,
 		       "%" PRIu32, &rx_copy);
 	if (err == ENOENT) {
 		err = 0;
 	 	rx_copy = 0;
 	}
 	if (err < 0) {
 		xenbus_dev_fatal(xnb->dev, err, "reading %s/request-rx-copy",
 				 otherend_path);
 		return err;
 	}
 	/**
 	 * \todo: figure out the exact meaning of this feature, and when
 	 * the frontend will set it to true.  It should be set to true
 	 * at some point
 	 */
 /*        if (!rx_copy)*/
 /*          return EOPNOTSUPP;*/
 
 	/** \todo Collect the rx notify feature */
 
 	/*  Collect the feature-sg. */
 	if (xs_scanf(XST_NIL, otherend_path, "feature-sg", NULL,
 		     "%hhu", &xnb->can_sg) < 0)
 		xnb->can_sg = 0;
 
 	/* Collect remaining frontend features */
 	if (xs_scanf(XST_NIL, otherend_path, "feature-gso-tcpv4", NULL,
 		     "%hhu", &xnb->gso) < 0)
 		xnb->gso = 0;
 
 	if (xs_scanf(XST_NIL, otherend_path, "feature-gso-tcpv4-prefix", NULL,
 		     "%hhu", &xnb->gso_prefix) < 0)
 		xnb->gso_prefix = 0;
 
 	if (xs_scanf(XST_NIL, otherend_path, "feature-no-csum-offload", NULL,
 		     "%hhu", &no_csum_offload) < 0)
 		no_csum_offload = 0;
 	xnb->ip_csum = (no_csum_offload == 0);
 
 	return (0);
 }
 
 /**
  * Supply information about the physical device to the frontend
  * via XenBus.
  *
  * \param xnb  Per-instance xnb configuration structure.
  */
 static int
 xnb_publish_backend_info(struct xnb_softc *xnb)
 {
 	struct xs_transaction xst;
 	const char *our_path;
 	int error;
 
 	our_path = xenbus_get_node(xnb->dev);
 
 	do {
 		error = xs_transaction_start(&xst);
 		if (error != 0) {
 			xenbus_dev_fatal(xnb->dev, error,
 					 "Error publishing backend info "
 					 "(start transaction)");
 			break;
 		}
 
 		error = xs_printf(xst, our_path, "feature-sg",
 				  "%d", XNB_SG);
 		if (error != 0)
 			break;
 
 		error = xs_printf(xst, our_path, "feature-gso-tcpv4",
 				  "%d", XNB_GSO_TCPV4);
 		if (error != 0)
 			break;
 
 		error = xs_printf(xst, our_path, "feature-rx-copy",
 				  "%d", XNB_RX_COPY);
 		if (error != 0)
 			break;
 
 		error = xs_printf(xst, our_path, "feature-rx-flip",
 				  "%d", XNB_RX_FLIP);
 		if (error != 0)
 			break;
 
 		error = xs_transaction_end(xst, 0);
 		if (error != 0 && error != EAGAIN) {
 			xenbus_dev_fatal(xnb->dev, error, "ending transaction");
 			break;
 		}
 
 	} while (error == EAGAIN);
 
 	return (error);
 }
 
 /**
  * Connect to our netfront peer now that it has completed publishing
  * its configuration into the XenStore.
  *
  * \param xnb  Per-instance xnb configuration structure.
  */
 static void
 xnb_connect(struct xnb_softc *xnb)
 {
 	int	error;
 
 	if (xenbus_get_state(xnb->dev) == XenbusStateConnected)
 		return;
 
 	if (xnb_collect_xenstore_info(xnb) != 0)
 		return;
 
 	xnb->flags &= ~XNBF_SHUTDOWN;
 
 	/* Read front end configuration. */
 
 	/* Allocate resources whose size depends on front-end configuration. */
 	error = xnb_alloc_communication_mem(xnb);
 	if (error != 0) {
 		xenbus_dev_fatal(xnb->dev, error,
 				 "Unable to allocate communication memory");
 		return;
 	}
 
 	/*
 	 * Connect communication channel.
 	 */
 	error = xnb_connect_comms(xnb);
 	if (error != 0) {
 		/* Specific errors are reported by xnb_connect_comms(). */
 		return;
 	}
 	xnb->carrier = 1;
 
 	/* Ready for I/O. */
 	xenbus_set_state(xnb->dev, XenbusStateConnected);
 }
 
 /*-------------------------- Device Teardown Support -------------------------*/
 /**
  * Perform device shutdown functions.
  *
  * \param xnb  Per-instance xnb configuration structure.
  *
  * Mark this instance as shutting down, wait for any active requests
  * to drain, disconnect from the front-end, and notify any waiters (e.g.
  * a thread invoking our detach method) that detach can now proceed.
  */
 static int
 xnb_shutdown(struct xnb_softc *xnb)
 {
 	/*
 	 * Due to the need to drop our mutex during some
 	 * xenbus operations, it is possible for two threads
 	 * to attempt to close out shutdown processing at
 	 * the same time.  Tell the caller that hits this
 	 * race to try back later.
 	 */
 	if ((xnb->flags & XNBF_IN_SHUTDOWN) != 0)
 		return (EAGAIN);
 
 	xnb->flags |= XNBF_SHUTDOWN;
 
 	xnb->flags |= XNBF_IN_SHUTDOWN;
 
 	mtx_unlock(&xnb->sc_lock);
 	/* Free the network interface */
 	xnb->carrier = 0;
 	if (xnb->xnb_ifp != NULL) {
 		ether_ifdetach(xnb->xnb_ifp);
 		if_free(xnb->xnb_ifp);
 		xnb->xnb_ifp = NULL;
 	}
 	mtx_lock(&xnb->sc_lock);
 
 	xnb_disconnect(xnb);
 
 	mtx_unlock(&xnb->sc_lock);
 	if (xenbus_get_state(xnb->dev) < XenbusStateClosing)
 		xenbus_set_state(xnb->dev, XenbusStateClosing);
 	mtx_lock(&xnb->sc_lock);
 
 	xnb->flags &= ~XNBF_IN_SHUTDOWN;
 
 
 	/* Indicate to xnb_detach() that is it safe to proceed. */
 	wakeup(xnb);
 
 	return (0);
 }
 
 /**
  * Report an attach time error to the console and Xen, and cleanup
  * this instance by forcing immediate detach processing.
  *
  * \param xnb  Per-instance xnb configuration structure.
  * \param err  Errno describing the error.
  * \param fmt  Printf style format and arguments
  */
 static void
 xnb_attach_failed(struct xnb_softc *xnb, int err, const char *fmt, ...)
 {
 	va_list ap;
 	va_list ap_hotplug;
 
 	va_start(ap, fmt);
 	va_copy(ap_hotplug, ap);
 	xs_vprintf(XST_NIL, xenbus_get_node(xnb->dev),
 		  "hotplug-error", fmt, ap_hotplug);
 	va_end(ap_hotplug);
 	xs_printf(XST_NIL, xenbus_get_node(xnb->dev),
 		  "hotplug-status", "error");
 
 	xenbus_dev_vfatal(xnb->dev, err, fmt, ap);
 	va_end(ap);
 
 	xs_printf(XST_NIL, xenbus_get_node(xnb->dev),
 		  "online", "0");
 	xnb_detach(xnb->dev);
 }
 
 /*---------------------------- NewBus Entrypoints ----------------------------*/
 /**
  * Inspect a XenBus device and claim it if is of the appropriate type.
  *
  * \param dev  NewBus device object representing a candidate XenBus device.
  *
  * \return  0 for success, errno codes for failure.
  */
 static int
 xnb_probe(device_t dev)
 {
 	 if (!strcmp(xenbus_get_type(dev), "vif")) {
 		DPRINTF("Claiming device %d, %s\n", device_get_unit(dev),
 		    devclass_get_name(device_get_devclass(dev)));
 		device_set_desc(dev, "Backend Virtual Network Device");
 		device_quiet(dev);
 		return (0);
 	}
 	return (ENXIO);
 }
 
 /**
  * Setup sysctl variables to control various Network Back parameters.
  *
  * \param xnb  Xen Net Back softc.
  *
  */
 static void
 xnb_setup_sysctl(struct xnb_softc *xnb)
 {
 	struct sysctl_ctx_list *sysctl_ctx = NULL;
 	struct sysctl_oid      *sysctl_tree = NULL;
 
 	sysctl_ctx = device_get_sysctl_ctx(xnb->dev);
 	if (sysctl_ctx == NULL)
 		return;
 
 	sysctl_tree = device_get_sysctl_tree(xnb->dev);
 	if (sysctl_tree == NULL)
 		return;
 
 #ifdef XNB_DEBUG
 	SYSCTL_ADD_PROC(sysctl_ctx,
 			SYSCTL_CHILDREN(sysctl_tree),
 			OID_AUTO,
 			"unit_test_results",
 			CTLTYPE_STRING | CTLFLAG_RD,
 			xnb,
 			0,
 			xnb_unit_test_main,
 			"A",
 			"Results of builtin unit tests");
 
 	SYSCTL_ADD_PROC(sysctl_ctx,
 			SYSCTL_CHILDREN(sysctl_tree),
 			OID_AUTO,
 			"dump_rings",
 			CTLTYPE_STRING | CTLFLAG_RD,
 			xnb,
 			0,
 			xnb_dump_rings,
 			"A",
 			"Xennet Back Rings");
 #endif /* XNB_DEBUG */
 }
 
 /**
  * Create a network device.
  * @param handle device handle
  */
 int
 create_netdev(device_t dev)
 {
 	struct ifnet *ifp;
 	struct xnb_softc *xnb;
 	int err = 0;
 
 	xnb = device_get_softc(dev);
 	mtx_init(&xnb->sc_lock, "xnb_softc", "xen netback softc lock", MTX_DEF);
 	mtx_init(&xnb->tx_lock, "xnb_tx", "xen netback tx lock", MTX_DEF);
 	mtx_init(&xnb->rx_lock, "xnb_rx", "xen netback rx lock", MTX_DEF);
 
 	xnb->dev = dev;
 
 	ifmedia_init(&xnb->sc_media, 0, xnb_ifmedia_upd, xnb_ifmedia_sts);
 	ifmedia_add(&xnb->sc_media, IFM_ETHER|IFM_MANUAL, 0, NULL);
 	ifmedia_set(&xnb->sc_media, IFM_ETHER|IFM_MANUAL);
 
 	err = xen_net_read_mac(dev, xnb->mac);
 	if (err == 0) {
 		/* Set up ifnet structure */
 		ifp = xnb->xnb_ifp = if_alloc(IFT_ETHER);
 		ifp->if_softc = xnb;
 		if_initname(ifp, "xnb",  device_get_unit(dev));
 		ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
 		ifp->if_ioctl = xnb_ioctl;
 		ifp->if_output = ether_output;
 		ifp->if_start = xnb_start;
 #ifdef notyet
 		ifp->if_watchdog = xnb_watchdog;
 #endif
 		ifp->if_init = xnb_ifinit;
 		ifp->if_mtu = ETHERMTU;
 		ifp->if_snd.ifq_maxlen = NET_RX_RING_SIZE - 1;
 
 		ifp->if_hwassist = XNB_CSUM_FEATURES;
 		ifp->if_capabilities = IFCAP_HWCSUM;
 		ifp->if_capenable = IFCAP_HWCSUM;
 
 		ether_ifattach(ifp, xnb->mac);
 		xnb->carrier = 0;
 	}
 
 	return err;
 }
 
 /**
  * Attach to a XenBus device that has been claimed by our probe routine.
  *
  * \param dev  NewBus device object representing this Xen Net Back instance.
  *
  * \return  0 for success, errno codes for failure.
  */
 static int
 xnb_attach(device_t dev)
 {
 	struct xnb_softc *xnb;
 	int	error;
 	xnb_ring_type_t	i;
 
 	error = create_netdev(dev);
 	if (error != 0) {
 		xenbus_dev_fatal(dev, error, "creating netdev");
 		return (error);
 	}
 
 	DPRINTF("Attaching to %s\n", xenbus_get_node(dev));
 
 	/*
 	 * Basic initialization.
 	 * After this block it is safe to call xnb_detach()
 	 * to clean up any allocated data for this instance.
 	 */
 	xnb = device_get_softc(dev);
 	xnb->otherend_id = xenbus_get_otherend_id(dev);
 	for (i=0; i < XNB_NUM_RING_TYPES; i++) {
 		xnb->ring_configs[i].ring_pages = 1;
 	}
 
 	/*
 	 * Setup sysctl variables.
 	 */
 	xnb_setup_sysctl(xnb);
 
 	/* Update hot-plug status to satisfy xend. */
 	error = xs_printf(XST_NIL, xenbus_get_node(xnb->dev),
 			  "hotplug-status", "connected");
 	if (error != 0) {
 		xnb_attach_failed(xnb, error, "writing %s/hotplug-status",
 				  xenbus_get_node(xnb->dev));
 		return (error);
 	}
 
 	if ((error = xnb_publish_backend_info(xnb)) != 0) {
 		/*
 		 * If we can't publish our data, we cannot participate
 		 * in this connection, and waiting for a front-end state
 		 * change will not help the situation.
 		 */
 		xnb_attach_failed(xnb, error,
 		    "Publishing backend status for %s",
 				  xenbus_get_node(xnb->dev));
 		return error;
 	}
 
 	/* Tell the front end that we are ready to connect. */
 	xenbus_set_state(dev, XenbusStateInitWait);
 
 	return (0);
 }
 
 /**
  * Detach from a net back device instance.
  *
  * \param dev  NewBus device object representing this Xen Net Back instance.
  *
  * \return  0 for success, errno codes for failure.
  *
  * \note A net back device may be detached at any time in its life-cycle,
  *       including part way through the attach process.  For this reason,
  *       initialization order and the intialization state checks in this
  *       routine must be carefully coupled so that attach time failures
  *       are gracefully handled.
  */
 static int
 xnb_detach(device_t dev)
 {
 	struct xnb_softc *xnb;
 
 	DPRINTF("\n");
 
 	xnb = device_get_softc(dev);
 	mtx_lock(&xnb->sc_lock);
 	while (xnb_shutdown(xnb) == EAGAIN) {
 		msleep(xnb, &xnb->sc_lock, /*wakeup prio unchanged*/0,
 		       "xnb_shutdown", 0);
 	}
 	mtx_unlock(&xnb->sc_lock);
 	DPRINTF("\n");
 
 	mtx_destroy(&xnb->tx_lock);
 	mtx_destroy(&xnb->rx_lock);
 	mtx_destroy(&xnb->sc_lock);
 	return (0);
 }
 
 /**
  * Prepare this net back device for suspension of this VM.
  *
  * \param dev  NewBus device object representing this Xen net Back instance.
  *
  * \return  0 for success, errno codes for failure.
  */
 static int
 xnb_suspend(device_t dev)
 {
 	return (0);
 }
 
 /**
  * Perform any processing required to recover from a suspended state.
  *
  * \param dev  NewBus device object representing this Xen Net Back instance.
  *
  * \return  0 for success, errno codes for failure.
  */
 static int
 xnb_resume(device_t dev)
 {
 	return (0);
 }
 
 /**
  * Handle state changes expressed via the XenStore by our front-end peer.
  *
  * \param dev             NewBus device object representing this Xen
  *                        Net Back instance.
  * \param frontend_state  The new state of the front-end.
  *
  * \return  0 for success, errno codes for failure.
  */
 static void
 xnb_frontend_changed(device_t dev, XenbusState frontend_state)
 {
 	struct xnb_softc *xnb;
 
 	xnb = device_get_softc(dev);
 
 	DPRINTF("frontend_state=%s, xnb_state=%s\n",
 	        xenbus_strstate(frontend_state),
 		xenbus_strstate(xenbus_get_state(xnb->dev)));
 
 	switch (frontend_state) {
 	case XenbusStateInitialising:
 		break;
 	case XenbusStateInitialised:
 	case XenbusStateConnected:
 		xnb_connect(xnb);
 		break;
 	case XenbusStateClosing:
 	case XenbusStateClosed:
 		mtx_lock(&xnb->sc_lock);
 		xnb_shutdown(xnb);
 		mtx_unlock(&xnb->sc_lock);
 		if (frontend_state == XenbusStateClosed)
 			xenbus_set_state(xnb->dev, XenbusStateClosed);
 		break;
 	default:
 		xenbus_dev_fatal(xnb->dev, EINVAL, "saw state %d at frontend",
 				 frontend_state);
 		break;
 	}
 }
 
 
 /*---------------------------- Request Processing ----------------------------*/
 /**
  * Interrupt handler bound to the shared ring's event channel.
  * Entry point for the xennet transmit path in netback
  * Transfers packets from the Xen ring to the host's generic networking stack
  *
  * \param arg  Callback argument registerd during event channel
  *             binding - the xnb_softc for this instance.
  */
 static void
 xnb_intr(void *arg)
 {
 	struct xnb_softc *xnb;
 	struct ifnet *ifp;
 	netif_tx_back_ring_t *txb;
 	RING_IDX req_prod_local;
 
 	xnb = (struct xnb_softc *)arg;
 	ifp = xnb->xnb_ifp;
 	txb = &xnb->ring_configs[XNB_RING_TYPE_TX].back_ring.tx_ring;
 
 	mtx_lock(&xnb->tx_lock);
 	do {
 		int notify;
 		req_prod_local = txb->sring->req_prod;
 		xen_rmb();
 
 		for (;;) {
 			struct mbuf *mbufc;
 			int err;
 
 			err = xnb_recv(txb, xnb->otherend_id, &mbufc, ifp,
 			    	       xnb->tx_gnttab);
 			if (err || (mbufc == NULL))
 				break;
 
 			/* Send the packet to the generic network stack */
 			(*xnb->xnb_ifp->if_input)(xnb->xnb_ifp, mbufc);
 		}
 
 		RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(txb, notify);
 		if (notify != 0)
-			notify_remote_via_irq(xnb->irq);
+			xen_intr_signal(xnb->xen_intr_handle);
 
 		txb->sring->req_event = txb->req_cons + 1;
 		xen_mb();
 	} while (txb->sring->req_prod != req_prod_local) ;
 	mtx_unlock(&xnb->tx_lock);
 
 	xnb_start(ifp);
 }
 
 
 /**
  * Build a struct xnb_pkt based on netif_tx_request's from a netif tx ring.
  * Will read exactly 0 or 1 packets from the ring; never a partial packet.
  * \param[out]	pkt	The returned packet.  If there is an error building
  * 			the packet, pkt.list_len will be set to 0.
  * \param[in]	tx_ring	Pointer to the Ring that is the input to this function
  * \param[in]	start	The ring index of the first potential request
  * \return		The number of requests consumed to build this packet
  */
 static int
 xnb_ring2pkt(struct xnb_pkt *pkt, const netif_tx_back_ring_t *tx_ring,
 	     RING_IDX start)
 {
 	/*
 	 * Outline:
 	 * 1) Initialize pkt
 	 * 2) Read the first request of the packet
 	 * 3) Read the extras
 	 * 4) Set cdr
 	 * 5) Loop on the remainder of the packet
 	 * 6) Finalize pkt (stuff like car_size and list_len)
 	 */
 	int idx = start;
 	int discard = 0;	/* whether to discard the packet */
 	int more_data = 0;	/* there are more request past the last one */
 	uint16_t cdr_size = 0;	/* accumulated size of requests 2 through n */
 
 	xnb_pkt_initialize(pkt);
 
 	/* Read the first request */
 	if (RING_HAS_UNCONSUMED_REQUESTS_2(tx_ring, idx)) {
 		netif_tx_request_t *tx = RING_GET_REQUEST(tx_ring, idx);
 		pkt->size = tx->size;
 		pkt->flags = tx->flags & ~NETTXF_more_data;
 		more_data = tx->flags & NETTXF_more_data;
 		pkt->list_len++;
 		pkt->car = idx;
 		idx++;
 	}
 
 	/* Read the extra info */
 	if ((pkt->flags & NETTXF_extra_info) &&
 	    RING_HAS_UNCONSUMED_REQUESTS_2(tx_ring, idx)) {
 		netif_extra_info_t *ext =
 		    (netif_extra_info_t*) RING_GET_REQUEST(tx_ring, idx);
 		pkt->extra.type = ext->type;
 		switch (pkt->extra.type) {
 			case XEN_NETIF_EXTRA_TYPE_GSO:
 				pkt->extra.u.gso = ext->u.gso;
 				break;
 			default:
 				/*
 				 * The reference Linux netfront driver will
 				 * never set any other extra.type.  So we don't
 				 * know what to do with it.  Let's print an
 				 * error, then consume and discard the packet
 				 */
 				printf("xnb(%s:%d): Unknown extra info type %d."
 				       "  Discarding packet\n",
 				       __func__, __LINE__, pkt->extra.type);
 				xnb_dump_txreq(start, RING_GET_REQUEST(tx_ring,
 				    start));
 				xnb_dump_txreq(idx, RING_GET_REQUEST(tx_ring,
 				    idx));
 				discard = 1;
 				break;
 		}
 
 		pkt->extra.flags = ext->flags;
 		if (ext->flags & XEN_NETIF_EXTRA_FLAG_MORE) {
 			/*
 			 * The reference linux netfront driver never sets this
 			 * flag (nor does any other known netfront).  So we
 			 * will discard the packet.
 			 */
 			printf("xnb(%s:%d): Request sets "
 			    "XEN_NETIF_EXTRA_FLAG_MORE, but we can't handle "
 			    "that\n", __func__, __LINE__);
 			xnb_dump_txreq(start, RING_GET_REQUEST(tx_ring, start));
 			xnb_dump_txreq(idx, RING_GET_REQUEST(tx_ring, idx));
 			discard = 1;
 		}
 
 		idx++;
 	}
 
 	/* Set cdr.  If there is not more data, cdr is invalid */
 	pkt->cdr = idx;
 
 	/* Loop on remainder of packet */
 	while (more_data && RING_HAS_UNCONSUMED_REQUESTS_2(tx_ring, idx)) {
 		netif_tx_request_t *tx = RING_GET_REQUEST(tx_ring, idx);
 		pkt->list_len++;
 		cdr_size += tx->size;
 		if (tx->flags & ~NETTXF_more_data) {
 			/* There should be no other flags set at this point */
 			printf("xnb(%s:%d): Request sets unknown flags %d "
 			    "after the 1st request in the packet.\n",
 			    __func__, __LINE__, tx->flags);
 			xnb_dump_txreq(start, RING_GET_REQUEST(tx_ring, start));
 			xnb_dump_txreq(idx, RING_GET_REQUEST(tx_ring, idx));
 		}
 
 		more_data = tx->flags & NETTXF_more_data;
 		idx++;
 	}
 
 	/* Finalize packet */
 	if (more_data != 0) {
 		/* The ring ran out of requests before finishing the packet */
 		xnb_pkt_invalidate(pkt);
 		idx = start;	/* tell caller that we consumed no requests */
 	} else {
 		/* Calculate car_size */
 		pkt->car_size = pkt->size - cdr_size;
 	}
 	if (discard != 0) {
 		xnb_pkt_invalidate(pkt);
 	}
 
 	return idx - start;
 }
 
 
 /**
  * Respond to all the requests that constituted pkt.  Builds the responses and
  * writes them to the ring, but doesn't push them to the shared ring.
  * \param[in] pkt	the packet that needs a response
  * \param[in] error	true if there was an error handling the packet, such
  * 			as in the hypervisor copy op or mbuf allocation
  * \param[out] ring	Responses go here
  */
 static void
 xnb_txpkt2rsp(const struct xnb_pkt *pkt, netif_tx_back_ring_t *ring,
 	      int error)
 {
 	/*
 	 * Outline:
 	 * 1) Respond to the first request
 	 * 2) Respond to the extra info reques
 	 * Loop through every remaining request in the packet, generating
 	 * responses that copy those requests' ids and sets the status
 	 * appropriately.
 	 */
 	netif_tx_request_t *tx;
 	netif_tx_response_t *rsp;
 	int i;
 	uint16_t status;
 
 	status = (xnb_pkt_is_valid(pkt) == 0) || error ?
 		NETIF_RSP_ERROR : NETIF_RSP_OKAY;
 	KASSERT((pkt->list_len == 0) || (ring->rsp_prod_pvt == pkt->car),
 	    ("Cannot respond to ring requests out of order"));
 
 	if (pkt->list_len >= 1) {
 		uint16_t id;
 		tx = RING_GET_REQUEST(ring, ring->rsp_prod_pvt);
 		id = tx->id;
 		rsp = RING_GET_RESPONSE(ring, ring->rsp_prod_pvt);
 		rsp->id = id;
 		rsp->status = status;
 		ring->rsp_prod_pvt++;
 
 		if (pkt->flags & NETRXF_extra_info) {
 			rsp = RING_GET_RESPONSE(ring, ring->rsp_prod_pvt);
 			rsp->status = NETIF_RSP_NULL;
 			ring->rsp_prod_pvt++;
 		}
 	}
 
 	for (i=0; i < pkt->list_len - 1; i++) {
 		uint16_t id;
 		tx = RING_GET_REQUEST(ring, ring->rsp_prod_pvt);
 		id = tx->id;
 		rsp = RING_GET_RESPONSE(ring, ring->rsp_prod_pvt);
 		rsp->id = id;
 		rsp->status = status;
 		ring->rsp_prod_pvt++;
 	}
 }
 
 /**
  * Create an mbuf chain to represent a packet.  Initializes all of the headers
  * in the mbuf chain, but does not copy the data.  The returned chain must be
  * free()'d when no longer needed
  * \param[in]	pkt	A packet to model the mbuf chain after
  * \return	A newly allocated mbuf chain, possibly with clusters attached.
  * 		NULL on failure
  */
 static struct mbuf*
 xnb_pkt2mbufc(const struct xnb_pkt *pkt, struct ifnet *ifp)
 {
 	/**
 	 * \todo consider using a memory pool for mbufs instead of
 	 * reallocating them for every packet
 	 */
 	/** \todo handle extra data */
 	struct mbuf *m;
 
 	m = m_getm(NULL, pkt->size, M_NOWAIT, MT_DATA);
 
 	if (m != NULL) {
 		m->m_pkthdr.rcvif = ifp;
 		if (pkt->flags & NETTXF_data_validated) {
 			/*
 			 * We lie to the host OS and always tell it that the
 			 * checksums are ok, because the packet is unlikely to
 			 * get corrupted going across domains.
 			 */
 			m->m_pkthdr.csum_flags = (
 				CSUM_IP_CHECKED |
 				CSUM_IP_VALID   |
 				CSUM_DATA_VALID |
 				CSUM_PSEUDO_HDR
 				);
 			m->m_pkthdr.csum_data = 0xffff;
 		}
 	}
 	return m;
 }
 
 /**
  * Build a gnttab_copy table that can be used to copy data from a pkt
  * to an mbufc.  Does not actually perform the copy.  Always uses gref's on
  * the packet side.
  * \param[in]	pkt	pkt's associated requests form the src for
  * 			the copy operation
  * \param[in]	mbufc	mbufc's storage forms the dest for the copy operation
  * \param[out]  gnttab	Storage for the returned grant table
  * \param[in]	txb	Pointer to the backend ring structure
  * \param[in]	otherend_id	The domain ID of the other end of the copy
  * \return 		The number of gnttab entries filled
  */
 static int
 xnb_txpkt2gnttab(const struct xnb_pkt *pkt, const struct mbuf *mbufc,
 		 gnttab_copy_table gnttab, const netif_tx_back_ring_t *txb,
 		 domid_t otherend_id)
 {
 
 	const struct mbuf *mbuf = mbufc;/* current mbuf within the chain */
 	int gnt_idx = 0;		/* index into grant table */
 	RING_IDX r_idx = pkt->car;	/* index into tx ring buffer */
 	int r_ofs = 0;	/* offset of next data within tx request's data area */
 	int m_ofs = 0;	/* offset of next data within mbuf's data area */
 	/* size in bytes that still needs to be represented in the table */
 	uint16_t size_remaining = pkt->size;
 
 	while (size_remaining > 0) {
 		const netif_tx_request_t *txq = RING_GET_REQUEST(txb, r_idx);
 		const size_t mbuf_space = M_TRAILINGSPACE(mbuf) - m_ofs;
 		const size_t req_size =
 			r_idx == pkt->car ? pkt->car_size : txq->size;
 		const size_t pkt_space = req_size - r_ofs;
 		/*
 		 * space is the largest amount of data that can be copied in the
 		 * grant table's next entry
 		 */
 		const size_t space = MIN(pkt_space, mbuf_space);
 
 		/* TODO: handle this error condition without panicking */
 		KASSERT(gnt_idx < GNTTAB_LEN, ("Grant table is too short"));
 
 		gnttab[gnt_idx].source.u.ref = txq->gref;
 		gnttab[gnt_idx].source.domid = otherend_id;
 		gnttab[gnt_idx].source.offset = txq->offset + r_ofs;
 		gnttab[gnt_idx].dest.u.gmfn = virt_to_mfn(
 		    mtod(mbuf, vm_offset_t) + m_ofs);
 		gnttab[gnt_idx].dest.offset = virt_to_offset(
 		    mtod(mbuf, vm_offset_t) + m_ofs);
 		gnttab[gnt_idx].dest.domid = DOMID_SELF;
 		gnttab[gnt_idx].len = space;
 		gnttab[gnt_idx].flags = GNTCOPY_source_gref;
 
 		gnt_idx++;
 		r_ofs += space;
 		m_ofs += space;
 		size_remaining -= space;
 		if (req_size - r_ofs <= 0) {
 			/* Must move to the next tx request */
 			r_ofs = 0;
 			r_idx = (r_idx == pkt->car) ? pkt->cdr : r_idx + 1;
 		}
 		if (M_TRAILINGSPACE(mbuf) - m_ofs <= 0) {
 			/* Must move to the next mbuf */
 			m_ofs = 0;
 			mbuf = mbuf->m_next;
 		}
 	}
 
 	return gnt_idx;
 }
 
 /**
  * Check the status of the grant copy operations, and update mbufs various
  * non-data fields to reflect the data present.
  * \param[in,out] mbufc	mbuf chain to update.  The chain must be valid and of
  * 			the correct length, and data should already be present
  * \param[in] gnttab	A grant table for a just completed copy op
  * \param[in] n_entries The number of valid entries in the grant table
  */
 static void
 xnb_update_mbufc(struct mbuf *mbufc, const gnttab_copy_table gnttab,
     		 int n_entries)
 {
 	struct mbuf *mbuf = mbufc;
 	int i;
 	size_t total_size = 0;
 
 	for (i = 0; i < n_entries; i++) {
 		KASSERT(gnttab[i].status == GNTST_okay,
 		    ("Some gnttab_copy entry had error status %hd\n",
 		    gnttab[i].status));
 
 		mbuf->m_len += gnttab[i].len;
 		total_size += gnttab[i].len;
 		if (M_TRAILINGSPACE(mbuf) <= 0) {
 			mbuf = mbuf->m_next;
 		}
 	}
 	mbufc->m_pkthdr.len = total_size;
 
 	xnb_add_mbuf_cksum(mbufc);
 }
 
 /**
  * Dequeue at most one packet from the shared ring
  * \param[in,out] txb	Netif tx ring.  A packet will be removed from it, and
  * 			its private indices will be updated.  But the indices
  * 			will not be pushed to the shared ring.
  * \param[in] ifnet	Interface to which the packet will be sent
  * \param[in] otherend	Domain ID of the other end of the ring
  * \param[out] mbufc	The assembled mbuf chain, ready to send to the generic
  * 			networking stack
  * \param[in,out] gnttab Pointer to enough memory for a grant table.  We make
  * 			this a function parameter so that we will take less
  * 			stack space.
  * \return		An error code
  */
 static int
 xnb_recv(netif_tx_back_ring_t *txb, domid_t otherend, struct mbuf **mbufc,
 	 struct ifnet *ifnet, gnttab_copy_table gnttab)
 {
 	struct xnb_pkt pkt;
 	/* number of tx requests consumed to build the last packet */
 	int num_consumed;
 	int nr_ents;
 
 	*mbufc = NULL;
 	num_consumed = xnb_ring2pkt(&pkt, txb, txb->req_cons);
 	if (num_consumed == 0)
 		return 0;	/* Nothing to receive */
 
 	/* update statistics independent of errors */
 	ifnet->if_ipackets++;
 
 	/*
 	 * if we got here, then 1 or more requests was consumed, but the packet
 	 * is not necessarily valid.
 	 */
 	if (xnb_pkt_is_valid(&pkt) == 0) {
 		/* got a garbage packet, respond and drop it */
 		xnb_txpkt2rsp(&pkt, txb, 1);
 		txb->req_cons += num_consumed;
 		DPRINTF("xnb_intr: garbage packet, num_consumed=%d\n",
 				num_consumed);
 		ifnet->if_ierrors++;
 		return EINVAL;
 	}
 
 	*mbufc = xnb_pkt2mbufc(&pkt, ifnet);
 
 	if (*mbufc == NULL) {
 		/*
 		 * Couldn't allocate mbufs.  Respond and drop the packet.  Do
 		 * not consume the requests
 		 */
 		xnb_txpkt2rsp(&pkt, txb, 1);
 		DPRINTF("xnb_intr: Couldn't allocate mbufs, num_consumed=%d\n",
 		    num_consumed);
 		ifnet->if_iqdrops++;
 		return ENOMEM;
 	}
 
 	nr_ents = xnb_txpkt2gnttab(&pkt, *mbufc, gnttab, txb, otherend);
 
 	if (nr_ents > 0) {
 		int __unused hv_ret = HYPERVISOR_grant_table_op(GNTTABOP_copy,
 		    gnttab, nr_ents);
 		KASSERT(hv_ret == 0,
 		    ("HYPERVISOR_grant_table_op returned %d\n", hv_ret));
 		xnb_update_mbufc(*mbufc, gnttab, nr_ents);
 	}
 
 	xnb_txpkt2rsp(&pkt, txb, 0);
 	txb->req_cons += num_consumed;
 	return 0;
 }
 
 /**
  * Create an xnb_pkt based on the contents of an mbuf chain.
  * \param[in] mbufc	mbuf chain to transform into a packet
  * \param[out] pkt	Storage for the newly generated xnb_pkt
  * \param[in] start	The ring index of the first available slot in the rx
  * 			ring
  * \param[in] space	The number of free slots in the rx ring
  * \retval 0		Success
  * \retval EINVAL	mbufc was corrupt or not convertible into a pkt
  * \retval EAGAIN	There was not enough space in the ring to queue the
  * 			packet
  */
 static int
 xnb_mbufc2pkt(const struct mbuf *mbufc, struct xnb_pkt *pkt,
 	      RING_IDX start, int space)
 {
 
 	int retval = 0;
 
 	if ((mbufc == NULL) ||
 	     ( (mbufc->m_flags & M_PKTHDR) == 0) ||
 	     (mbufc->m_pkthdr.len == 0)) {
 		xnb_pkt_invalidate(pkt);
 		retval = EINVAL;
 	} else {
 		int slots_required;
 
 		xnb_pkt_validate(pkt);
 		pkt->flags = 0;
 		pkt->size = mbufc->m_pkthdr.len;
 		pkt->car = start;
 		pkt->car_size = mbufc->m_len;
 
 		if (mbufc->m_pkthdr.csum_flags & CSUM_TSO) {
 			pkt->flags |= NETRXF_extra_info;
 			pkt->extra.u.gso.size = mbufc->m_pkthdr.tso_segsz;
 			pkt->extra.u.gso.type = XEN_NETIF_GSO_TYPE_TCPV4;
 			pkt->extra.u.gso.pad = 0;
 			pkt->extra.u.gso.features = 0;
 			pkt->extra.type = XEN_NETIF_EXTRA_TYPE_GSO;
 			pkt->extra.flags = 0;
 			pkt->cdr = start + 2;
 		} else {
 			pkt->cdr = start + 1;
 		}
 		if (mbufc->m_pkthdr.csum_flags & (CSUM_TSO | CSUM_DELAY_DATA)) {
 			pkt->flags |=
 			    (NETRXF_csum_blank | NETRXF_data_validated);
 		}
 
 		/*
 		 * Each ring response can have up to PAGE_SIZE of data.
 		 * Assume that we can defragment the mbuf chain efficiently
 		 * into responses so that each response but the last uses all
 		 * PAGE_SIZE bytes.
 		 */
 		pkt->list_len = (pkt->size + PAGE_SIZE - 1) / PAGE_SIZE;
 
 		if (pkt->list_len > 1) {
 			pkt->flags |= NETRXF_more_data;
 		}
 
 		slots_required = pkt->list_len +
 			(pkt->flags & NETRXF_extra_info ? 1 : 0);
 		if (slots_required > space) {
 			xnb_pkt_invalidate(pkt);
 			retval = EAGAIN;
 		}
 	}
 
 	return retval;
 }
 
 /**
  * Build a gnttab_copy table that can be used to copy data from an mbuf chain
  * to the frontend's shared buffers.  Does not actually perform the copy.
  * Always uses gref's on the other end's side.
  * \param[in]	pkt	pkt's associated responses form the dest for the copy
  * 			operatoin
  * \param[in]	mbufc	The source for the copy operation
  * \param[out]	gnttab	Storage for the returned grant table
  * \param[in]	rxb	Pointer to the backend ring structure
  * \param[in]	otherend_id	The domain ID of the other end of the copy
  * \return 		The number of gnttab entries filled
  */
 static int
 xnb_rxpkt2gnttab(const struct xnb_pkt *pkt, const struct mbuf *mbufc,
 		 gnttab_copy_table gnttab, const netif_rx_back_ring_t *rxb,
 		 domid_t otherend_id)
 {
 
 	const struct mbuf *mbuf = mbufc;/* current mbuf within the chain */
 	int gnt_idx = 0;		/* index into grant table */
 	RING_IDX r_idx = pkt->car;	/* index into rx ring buffer */
 	int r_ofs = 0;	/* offset of next data within rx request's data area */
 	int m_ofs = 0;	/* offset of next data within mbuf's data area */
 	/* size in bytes that still needs to be represented in the table */
 	uint16_t size_remaining;
 
 	size_remaining = (xnb_pkt_is_valid(pkt) != 0) ? pkt->size : 0;
 
 	while (size_remaining > 0) {
 		const netif_rx_request_t *rxq = RING_GET_REQUEST(rxb, r_idx);
 		const size_t mbuf_space = mbuf->m_len - m_ofs;
 		/* Xen shared pages have an implied size of PAGE_SIZE */
 		const size_t req_size = PAGE_SIZE;
 		const size_t pkt_space = req_size - r_ofs;
 		/*
 		 * space is the largest amount of data that can be copied in the
 		 * grant table's next entry
 		 */
 		const size_t space = MIN(pkt_space, mbuf_space);
 
 		/* TODO: handle this error condition without panicing */
 		KASSERT(gnt_idx < GNTTAB_LEN, ("Grant table is too short"));
 
 		gnttab[gnt_idx].dest.u.ref = rxq->gref;
 		gnttab[gnt_idx].dest.domid = otherend_id;
 		gnttab[gnt_idx].dest.offset = r_ofs;
 		gnttab[gnt_idx].source.u.gmfn = virt_to_mfn(
 		    mtod(mbuf, vm_offset_t) + m_ofs);
 		gnttab[gnt_idx].source.offset = virt_to_offset(
 		    mtod(mbuf, vm_offset_t) + m_ofs);
 		gnttab[gnt_idx].source.domid = DOMID_SELF;
 		gnttab[gnt_idx].len = space;
 		gnttab[gnt_idx].flags = GNTCOPY_dest_gref;
 
 		gnt_idx++;
 
 		r_ofs += space;
 		m_ofs += space;
 		size_remaining -= space;
 		if (req_size - r_ofs <= 0) {
 			/* Must move to the next rx request */
 			r_ofs = 0;
 			r_idx = (r_idx == pkt->car) ? pkt->cdr : r_idx + 1;
 		}
 		if (mbuf->m_len - m_ofs <= 0) {
 			/* Must move to the next mbuf */
 			m_ofs = 0;
 			mbuf = mbuf->m_next;
 		}
 	}
 
 	return gnt_idx;
 }
 
 /**
  * Generates responses for all the requests that constituted pkt.  Builds
  * responses and writes them to the ring, but doesn't push the shared ring
  * indices.
  * \param[in] pkt	the packet that needs a response
  * \param[in] gnttab	The grant copy table corresponding to this packet.
  * 			Used to determine how many rsp->netif_rx_response_t's to
  * 			generate.
  * \param[in] n_entries	Number of relevant entries in the grant table
  * \param[out] ring	Responses go here
  * \return		The number of RX requests that were consumed to generate
  * 			the responses
  */
 static int
 xnb_rxpkt2rsp(const struct xnb_pkt *pkt, const gnttab_copy_table gnttab,
     	      int n_entries, netif_rx_back_ring_t *ring)
 {
 	/*
 	 * This code makes the following assumptions:
 	 *	* All entries in gnttab set GNTCOPY_dest_gref
 	 *	* The entries in gnttab are grouped by their grefs: any two
 	 *	   entries with the same gref must be adjacent
 	 */
 	int error = 0;
 	int gnt_idx, i;
 	int n_responses = 0;
 	grant_ref_t last_gref = GRANT_REF_INVALID;
 	RING_IDX r_idx;
 
 	KASSERT(gnttab != NULL, ("Received a null granttable copy"));
 
 	/*
 	 * In the event of an error, we only need to send one response to the
 	 * netfront.  In that case, we musn't write any data to the responses
 	 * after the one we send.  So we must loop all the way through gnttab
 	 * looking for errors before we generate any responses
 	 *
 	 * Since we're looping through the grant table anyway, we'll count the
 	 * number of different gref's in it, which will tell us how many
 	 * responses to generate
 	 */
 	for (gnt_idx = 0; gnt_idx < n_entries; gnt_idx++) {
 		int16_t status = gnttab[gnt_idx].status;
 		if (status != GNTST_okay) {
 			DPRINTF(
 			    "Got error %d for hypervisor gnttab_copy status\n",
 			    status);
 			error = 1;
 			break;
 		}
 		if (gnttab[gnt_idx].dest.u.ref != last_gref) {
 			n_responses++;
 			last_gref = gnttab[gnt_idx].dest.u.ref;
 		}
 	}
 
 	if (error != 0) {
 		uint16_t id;
 		netif_rx_response_t *rsp;
 		
 		id = RING_GET_REQUEST(ring, ring->rsp_prod_pvt)->id;
 		rsp = RING_GET_RESPONSE(ring, ring->rsp_prod_pvt);
 		rsp->id = id;
 		rsp->status = NETIF_RSP_ERROR;
 		n_responses = 1;
 	} else {
 		gnt_idx = 0;
 		const int has_extra = pkt->flags & NETRXF_extra_info;
 		if (has_extra != 0)
 			n_responses++;
 
 		for (i = 0; i < n_responses; i++) {
 			netif_rx_request_t rxq;
 			netif_rx_response_t *rsp;
 
 			r_idx = ring->rsp_prod_pvt + i;
 			/*
 			 * We copy the structure of rxq instead of making a
 			 * pointer because it shares the same memory as rsp.
 			 */
 			rxq = *(RING_GET_REQUEST(ring, r_idx));
 			rsp = RING_GET_RESPONSE(ring, r_idx);
 			if (has_extra && (i == 1)) {
 				netif_extra_info_t *ext =
 					(netif_extra_info_t*)rsp;
 				ext->type = XEN_NETIF_EXTRA_TYPE_GSO;
 				ext->flags = 0;
 				ext->u.gso.size = pkt->extra.u.gso.size;
 				ext->u.gso.type = XEN_NETIF_GSO_TYPE_TCPV4;
 				ext->u.gso.pad = 0;
 				ext->u.gso.features = 0;
 			} else {
 				rsp->id = rxq.id;
 				rsp->status = GNTST_okay;
 				rsp->offset = 0;
 				rsp->flags = 0;
 				if (i < pkt->list_len - 1)
 					rsp->flags |= NETRXF_more_data;
 				if ((i == 0) && has_extra)
 					rsp->flags |= NETRXF_extra_info;
 				if ((i == 0) &&
 					(pkt->flags & NETRXF_data_validated)) {
 					rsp->flags |= NETRXF_data_validated;
 					rsp->flags |= NETRXF_csum_blank;
 				}
 				rsp->status = 0;
 				for (; gnttab[gnt_idx].dest.u.ref == rxq.gref;
 				    gnt_idx++) {
 					rsp->status += gnttab[gnt_idx].len;
 				}
 			}
 		}
 	}
 
 	ring->req_cons += n_responses;
 	ring->rsp_prod_pvt += n_responses;
 	return n_responses;
 }
 
 /**
  * Add IP, TCP, and/or UDP checksums to every mbuf in a chain.  The first mbuf
  * in the chain must start with a struct ether_header.
  *
  * XXX This function will perform incorrectly on UDP packets that are split up
  * into multiple ethernet frames.
  */
 static void
 xnb_add_mbuf_cksum(struct mbuf *mbufc)
 {
 	struct ether_header *eh;
 	struct ip *iph;
 	uint16_t ether_type;
 
 	eh = mtod(mbufc, struct ether_header*);
 	ether_type = ntohs(eh->ether_type);
 	if (ether_type != ETHERTYPE_IP) {
 		/* Nothing to calculate */
 		return;
 	}
 
 	iph = (struct ip*)(eh + 1);
 	if (mbufc->m_pkthdr.csum_flags & CSUM_IP_VALID) {
 		iph->ip_sum = 0;
 		iph->ip_sum = in_cksum_hdr(iph);
 	}
 
 	switch (iph->ip_p) {
 	case IPPROTO_TCP:
 		if (mbufc->m_pkthdr.csum_flags & CSUM_IP_VALID) {
 			size_t tcplen = ntohs(iph->ip_len) - sizeof(struct ip);
 			struct tcphdr *th = (struct tcphdr*)(iph + 1);
 			th->th_sum = in_pseudo(iph->ip_src.s_addr,
 			    iph->ip_dst.s_addr, htons(IPPROTO_TCP + tcplen));
 			th->th_sum = in_cksum_skip(mbufc,
 			    sizeof(struct ether_header) + ntohs(iph->ip_len),
 			    sizeof(struct ether_header) + (iph->ip_hl << 2));
 		}
 		break;
 	case IPPROTO_UDP:
 		if (mbufc->m_pkthdr.csum_flags & CSUM_IP_VALID) {
 			size_t udplen = ntohs(iph->ip_len) - sizeof(struct ip);
 			struct udphdr *uh = (struct udphdr*)(iph + 1);
 			uh->uh_sum = in_pseudo(iph->ip_src.s_addr,
 			    iph->ip_dst.s_addr, htons(IPPROTO_UDP + udplen));
 			uh->uh_sum = in_cksum_skip(mbufc,
 			    sizeof(struct ether_header) + ntohs(iph->ip_len),
 			    sizeof(struct ether_header) + (iph->ip_hl << 2));
 		}
 		break;
 	default:
 		break;
 	}
 }
 
 static void
 xnb_stop(struct xnb_softc *xnb)
 {
 	struct ifnet *ifp;
 
 	mtx_assert(&xnb->sc_lock, MA_OWNED);
 	ifp = xnb->xnb_ifp;
 	ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
 	if_link_state_change(ifp, LINK_STATE_DOWN);
 }
 
 static int
 xnb_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
 {
 	struct xnb_softc *xnb = ifp->if_softc;
 #ifdef INET
 	struct ifreq *ifr = (struct ifreq*) data;
 	struct ifaddr *ifa = (struct ifaddr*)data;
 #endif
 	int error = 0;
 
 	switch (cmd) {
 		case SIOCSIFFLAGS:
 			mtx_lock(&xnb->sc_lock);
 			if (ifp->if_flags & IFF_UP) {
 				xnb_ifinit_locked(xnb);
 			} else {
 				if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
 					xnb_stop(xnb);
 				}
 			}
 			/*
 			 * Note: netfront sets a variable named xn_if_flags
 			 * here, but that variable is never read
 			 */
 			mtx_unlock(&xnb->sc_lock);
 			break;
 		case SIOCSIFADDR:
 		case SIOCGIFADDR:
 #ifdef INET
 			mtx_lock(&xnb->sc_lock);
 			if (ifa->ifa_addr->sa_family == AF_INET) {
 				ifp->if_flags |= IFF_UP;
 				if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
 					ifp->if_drv_flags &= ~(IFF_DRV_RUNNING |
 							IFF_DRV_OACTIVE);
 					if_link_state_change(ifp,
 							LINK_STATE_DOWN);
 					ifp->if_drv_flags |= IFF_DRV_RUNNING;
 					ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
 					if_link_state_change(ifp,
 					    LINK_STATE_UP);
 				}
 				arp_ifinit(ifp, ifa);
 				mtx_unlock(&xnb->sc_lock);
 			} else {
 				mtx_unlock(&xnb->sc_lock);
 #endif
 				error = ether_ioctl(ifp, cmd, data);
 #ifdef INET
 			}
 #endif
 			break;
 		case SIOCSIFCAP:
 			mtx_lock(&xnb->sc_lock);
 			if (ifr->ifr_reqcap & IFCAP_TXCSUM) {
 				ifp->if_capenable |= IFCAP_TXCSUM;
 				ifp->if_hwassist |= XNB_CSUM_FEATURES;
 			} else {
 				ifp->if_capenable &= ~(IFCAP_TXCSUM);
 				ifp->if_hwassist &= ~(XNB_CSUM_FEATURES);
 			}
 			if ((ifr->ifr_reqcap & IFCAP_RXCSUM)) {
 				ifp->if_capenable |= IFCAP_RXCSUM;
 			} else {
 				ifp->if_capenable &= ~(IFCAP_RXCSUM);
 			}
 			/*
 			 * TODO enable TSO4 and LRO once we no longer need
 			 * to calculate checksums in software
 			 */
 #if 0
 			if (ifr->if_reqcap |= IFCAP_TSO4) {
 				if (IFCAP_TXCSUM & ifp->if_capenable) {
 					printf("xnb: Xen netif requires that "
 						"TXCSUM be enabled in order "
 						"to use TSO4\n");
 					error = EINVAL;
 				} else {
 					ifp->if_capenable |= IFCAP_TSO4;
 					ifp->if_hwassist |= CSUM_TSO;
 				}
 			} else {
 				ifp->if_capenable &= ~(IFCAP_TSO4);
 				ifp->if_hwassist &= ~(CSUM_TSO);
 			}
 			if (ifr->ifreqcap |= IFCAP_LRO) {
 				ifp->if_capenable |= IFCAP_LRO;
 			} else {
 				ifp->if_capenable &= ~(IFCAP_LRO);
 			}
 #endif
 			mtx_unlock(&xnb->sc_lock);
 			break;
 		case SIOCSIFMTU:
 			ifp->if_mtu = ifr->ifr_mtu;
 			ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
 			xnb_ifinit(xnb);
 			break;
 		case SIOCADDMULTI:
 		case SIOCDELMULTI:
 		case SIOCSIFMEDIA:
 		case SIOCGIFMEDIA:
 			error = ifmedia_ioctl(ifp, ifr, &xnb->sc_media, cmd);
 			break;
 		default:
 			error = ether_ioctl(ifp, cmd, data);
 			break;
 	}
 	return (error);
 }
 
 static void
 xnb_start_locked(struct ifnet *ifp)
 {
 	netif_rx_back_ring_t *rxb;
 	struct xnb_softc *xnb;
 	struct mbuf *mbufc;
 	RING_IDX req_prod_local;
 
 	xnb = ifp->if_softc;
 	rxb = &xnb->ring_configs[XNB_RING_TYPE_RX].back_ring.rx_ring;
 
 	if (!xnb->carrier)
 		return;
 
 	do {
 		int out_of_space = 0;
 		int notify;
 		req_prod_local = rxb->sring->req_prod;
 		xen_rmb();
 		for (;;) {
 			int error;
 
 			IF_DEQUEUE(&ifp->if_snd, mbufc);
 			if (mbufc == NULL)
 				break;
 			error = xnb_send(rxb, xnb->otherend_id, mbufc,
 			    		 xnb->rx_gnttab);
 			switch (error) {
 				case EAGAIN:
 					/*
 					 * Insufficient space in the ring.
 					 * Requeue pkt and send when space is
 					 * available.
 					 */
 					IF_PREPEND(&ifp->if_snd, mbufc);
 					/*
 					 * Perhaps the frontend missed an IRQ
 					 * and went to sleep.  Notify it to wake
 					 * it up.
 					 */
 					out_of_space = 1;
 					break;
 
 				case EINVAL:
 					/* OS gave a corrupt packet.  Drop it.*/
 					ifp->if_oerrors++;
 					/* FALLTHROUGH */
 				default:
 					/* Send succeeded, or packet had error.
 					 * Free the packet */
 					ifp->if_opackets++;
 					if (mbufc)
 						m_freem(mbufc);
 					break;
 			}
 			if (out_of_space != 0)
 				break;
 		}
 
 		RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(rxb, notify);
 		if ((notify != 0) || (out_of_space != 0))
-			notify_remote_via_irq(xnb->irq);
+			xen_intr_signal(xnb->xen_intr_handle);
 		rxb->sring->req_event = req_prod_local + 1;
 		xen_mb();
 	} while (rxb->sring->req_prod != req_prod_local) ;
 }
 
 /**
  * Sends one packet to the ring.  Blocks until the packet is on the ring
  * \param[in]	mbufc	Contains one packet to send.  Caller must free
  * \param[in,out] rxb	The packet will be pushed onto this ring, but the
  * 			otherend will not be notified.
  * \param[in]	otherend The domain ID of the other end of the connection
  * \retval	EAGAIN	The ring did not have enough space for the packet.
  * 			The ring has not been modified
  * \param[in,out] gnttab Pointer to enough memory for a grant table.  We make
  * 			this a function parameter so that we will take less
  * 			stack space.
  * \retval EINVAL	mbufc was corrupt or not convertible into a pkt
  */
 static int
 xnb_send(netif_rx_back_ring_t *ring, domid_t otherend, const struct mbuf *mbufc,
 	 gnttab_copy_table gnttab)
 {
 	struct xnb_pkt pkt;
 	int error, n_entries, n_reqs;
 	RING_IDX space;
 
 	space = ring->sring->req_prod - ring->req_cons;
 	error = xnb_mbufc2pkt(mbufc, &pkt, ring->rsp_prod_pvt, space);
 	if (error != 0)
 		return error;
 	n_entries = xnb_rxpkt2gnttab(&pkt, mbufc, gnttab, ring, otherend);
 	if (n_entries != 0) {
 		int __unused hv_ret = HYPERVISOR_grant_table_op(GNTTABOP_copy,
 		    gnttab, n_entries);
 		KASSERT(hv_ret == 0, ("HYPERVISOR_grant_table_op returned %d\n",
 		    hv_ret));
 	}
 
 	n_reqs = xnb_rxpkt2rsp(&pkt, gnttab, n_entries, ring);
 
 	return 0;
 }
 
 static void
 xnb_start(struct ifnet *ifp)
 {
 	struct xnb_softc *xnb;
 
 	xnb = ifp->if_softc;
 	mtx_lock(&xnb->rx_lock);
 	xnb_start_locked(ifp);
 	mtx_unlock(&xnb->rx_lock);
 }
 
 /* equivalent of network_open() in Linux */
 static void
 xnb_ifinit_locked(struct xnb_softc *xnb)
 {
 	struct ifnet *ifp;
 
 	ifp = xnb->xnb_ifp;
 
 	mtx_assert(&xnb->sc_lock, MA_OWNED);
 
 	if (ifp->if_drv_flags & IFF_DRV_RUNNING)
 		return;
 
 	xnb_stop(xnb);
 
 	ifp->if_drv_flags |= IFF_DRV_RUNNING;
 	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
 	if_link_state_change(ifp, LINK_STATE_UP);
 }
 
 
 static void
 xnb_ifinit(void *xsc)
 {
 	struct xnb_softc *xnb = xsc;
 
 	mtx_lock(&xnb->sc_lock);
 	xnb_ifinit_locked(xnb);
 	mtx_unlock(&xnb->sc_lock);
 }
 
 
 /**
  * Read the 'mac' node at the given device's node in the store, and parse that
  * as colon-separated octets, placing result the given mac array.  mac must be
  * a preallocated array of length ETHER_ADDR_LEN ETH_ALEN (as declared in
  * net/ethernet.h).
  * Return 0 on success, or errno on error.
  */
 static int
 xen_net_read_mac(device_t dev, uint8_t mac[])
 {
 	char *s, *e, *macstr;
 	const char *path;
 	int error = 0;
 	int i;
 
 	path = xenbus_get_node(dev);
 	error = xs_read(XST_NIL, path, "mac", NULL, (void **) &macstr);
 	if (error != 0) {
 		xenbus_dev_fatal(dev, error, "parsing %s/mac", path);
 	} else {
 	        s = macstr;
 	        for (i = 0; i < ETHER_ADDR_LEN; i++) {
 		        mac[i] = strtoul(s, &e, 16);
 		        if (s == e || (e[0] != ':' && e[0] != 0)) {
 				error = ENOENT;
 				break;
 		        }
 		        s = &e[1];
 	        }
 	        free(macstr, M_XENBUS);
 	}
 	return error;
 }
 
 
 /**
  * Callback used by the generic networking code to tell us when our carrier
  * state has changed.  Since we don't have a physical carrier, we don't care
  */
 static int
 xnb_ifmedia_upd(struct ifnet *ifp)
 {
 	return (0);
 }
 
 /**
  * Callback used by the generic networking code to ask us what our carrier
  * state is.  Since we don't have a physical carrier, this is very simple
  */
 static void
 xnb_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr)
 {
 	ifmr->ifm_status = IFM_AVALID|IFM_ACTIVE;
 	ifmr->ifm_active = IFM_ETHER|IFM_MANUAL;
 }
 
 
 /*---------------------------- NewBus Registration ---------------------------*/
 static device_method_t xnb_methods[] = {
 	/* Device interface */
 	DEVMETHOD(device_probe,		xnb_probe),
 	DEVMETHOD(device_attach,	xnb_attach),
 	DEVMETHOD(device_detach,	xnb_detach),
 	DEVMETHOD(device_shutdown,	bus_generic_shutdown),
 	DEVMETHOD(device_suspend,	xnb_suspend),
 	DEVMETHOD(device_resume,	xnb_resume),
 
 	/* Xenbus interface */
 	DEVMETHOD(xenbus_otherend_changed, xnb_frontend_changed),
 
 	{ 0, 0 }
 };
 
 static driver_t xnb_driver = {
 	"xnb",
 	xnb_methods,
 	sizeof(struct xnb_softc),
 };
 devclass_t xnb_devclass;
 
 DRIVER_MODULE(xnb, xenbusb_back, xnb_driver, xnb_devclass, 0, 0);
 
 
 /*-------------------------- Unit Tests -------------------------------------*/
 #ifdef XNB_DEBUG
 #include "netback_unit_tests.c"
 #endif
diff --git a/sys/dev/xen/netfront/netfront.c b/sys/dev/xen/netfront/netfront.c
index 68b3bf9be826..f9c72e6fc49a 100644
--- a/sys/dev/xen/netfront/netfront.c
+++ b/sys/dev/xen/netfront/netfront.c
@@ -1,2257 +1,2254 @@
 /*-
  * Copyright (c) 2004-2006 Kip Macy
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_inet.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/sockio.h>
 #include <sys/mbuf.h>
 #include <sys/malloc.h>
 #include <sys/module.h>
 #include <sys/kernel.h>
 #include <sys/socket.h>
 #include <sys/sysctl.h>
 #include <sys/queue.h>
 #include <sys/lock.h>
 #include <sys/sx.h>
 
 #include <net/if.h>
 #include <net/if_arp.h>
 #include <net/ethernet.h>
 #include <net/if_dl.h>
 #include <net/if_media.h>
 
 #include <net/bpf.h>
 
 #include <net/if_types.h>
 #include <net/if.h>
 
 #include <netinet/in_systm.h>
 #include <netinet/in.h>
 #include <netinet/ip.h>
 #include <netinet/if_ether.h>
 #if __FreeBSD_version >= 700000
 #include <netinet/tcp.h>
 #include <netinet/tcp_lro.h>
 #endif
 
 #include <vm/vm.h>
 #include <vm/pmap.h>
 
 #include <machine/clock.h>      /* for DELAY */
 #include <machine/bus.h>
 #include <machine/resource.h>
 #include <machine/frame.h>
 #include <machine/vmparam.h>
 
 #include <sys/bus.h>
 #include <sys/rman.h>
 
 #include <machine/intr_machdep.h>
 
-#include <machine/xen/xen-os.h>
-#include <machine/xen/xenfunc.h>
-#include <machine/xen/xenvar.h>
+#include <xen/xen-os.h>
 #include <xen/hypervisor.h>
 #include <xen/xen_intr.h>
-#include <xen/evtchn.h>
 #include <xen/gnttab.h>
 #include <xen/interface/memory.h>
 #include <xen/interface/io/netif.h>
 #include <xen/xenbus/xenbusvar.h>
 
+#include <machine/xen/xenvar.h>
+
 #include <dev/xen/netfront/mbufq.h>
 
 #include "xenbus_if.h"
 
 /* Features supported by all backends.  TSO and LRO can be negotiated */
 #define XN_CSUM_FEATURES	(CSUM_TCP | CSUM_UDP)
 
 #define NET_TX_RING_SIZE __RING_SIZE((netif_tx_sring_t *)0, PAGE_SIZE)
 #define NET_RX_RING_SIZE __RING_SIZE((netif_rx_sring_t *)0, PAGE_SIZE)
 
 #if __FreeBSD_version >= 700000
 /*
  * Should the driver do LRO on the RX end
  *  this can be toggled on the fly, but the
  *  interface must be reset (down/up) for it
  *  to take effect.
  */
 static int xn_enable_lro = 1;
 TUNABLE_INT("hw.xn.enable_lro", &xn_enable_lro);
 #else
 
 #define IFCAP_TSO4	0
 #define CSUM_TSO	0
 
 #endif
 
 #ifdef CONFIG_XEN
 static int MODPARM_rx_copy = 0;
 module_param_named(rx_copy, MODPARM_rx_copy, bool, 0);
 MODULE_PARM_DESC(rx_copy, "Copy packets from network card (rather than flip)");
 static int MODPARM_rx_flip = 0;
 module_param_named(rx_flip, MODPARM_rx_flip, bool, 0);
 MODULE_PARM_DESC(rx_flip, "Flip packets from network card (rather than copy)");
 #else
 static const int MODPARM_rx_copy = 1;
 static const int MODPARM_rx_flip = 0;
 #endif
 
 /**
  * \brief The maximum allowed data fragments in a single transmit
  *        request.
  *
  * This limit is imposed by the backend driver.  We assume here that
  * we are dealing with a Linux driver domain and have set our limit
  * to mirror the Linux MAX_SKB_FRAGS constant.
  */
 #define	MAX_TX_REQ_FRAGS (65536 / PAGE_SIZE + 2)
 #define	NF_TSO_MAXBURST ((IP_MAXPACKET / PAGE_SIZE) * MCLBYTES)
 
 #define RX_COPY_THRESHOLD 256
 
 #define net_ratelimit() 0
 
 struct netfront_info;
 struct netfront_rx_info;
 
 static void xn_txeof(struct netfront_info *);
 static void xn_rxeof(struct netfront_info *);
 static void network_alloc_rx_buffers(struct netfront_info *);
 
 static void xn_tick_locked(struct netfront_info *);
 static void xn_tick(void *);
 
 static void xn_intr(void *);
 static inline int xn_count_frags(struct mbuf *m);
 static int  xn_assemble_tx_request(struct netfront_info *sc,
 				   struct mbuf *m_head);
 static void xn_start_locked(struct ifnet *);
 static void xn_start(struct ifnet *);
 static int  xn_ioctl(struct ifnet *, u_long, caddr_t);
 static void xn_ifinit_locked(struct netfront_info *);
 static void xn_ifinit(void *);
 static void xn_stop(struct netfront_info *);
 static void xn_query_features(struct netfront_info *np);
 static int  xn_configure_features(struct netfront_info *np);
 #ifdef notyet
 static void xn_watchdog(struct ifnet *);
 #endif
 
 static void show_device(struct netfront_info *sc);
 #ifdef notyet
 static void netfront_closing(device_t dev);
 #endif
 static void netif_free(struct netfront_info *info);
 static int netfront_detach(device_t dev);
 
 static int talk_to_backend(device_t dev, struct netfront_info *info);
 static int create_netdev(device_t dev);
 static void netif_disconnect_backend(struct netfront_info *info);
 static int setup_device(device_t dev, struct netfront_info *info);
 static void free_ring(int *ref, void *ring_ptr_ref);
 
 static int  xn_ifmedia_upd(struct ifnet *ifp);
 static void xn_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr);
 
 /* Xenolinux helper functions */
 int network_connect(struct netfront_info *);
 
 static void xn_free_rx_ring(struct netfront_info *);
 
 static void xn_free_tx_ring(struct netfront_info *);
 
 static int xennet_get_responses(struct netfront_info *np,
 	struct netfront_rx_info *rinfo, RING_IDX rp, RING_IDX *cons,
 	struct mbuf **list, int *pages_flipped_p);
 
 #define virt_to_mfn(x) (vtomach(x) >> PAGE_SHIFT)
 
 #define INVALID_P2M_ENTRY (~0UL)
 
 /*
  * Mbuf pointers. We need these to keep track of the virtual addresses
  * of our mbuf chains since we can only convert from virtual to physical,
  * not the other way around.  The size must track the free index arrays.
  */
 struct xn_chain_data {
 	struct mbuf    *xn_tx_chain[NET_TX_RING_SIZE+1];
 	int		xn_tx_chain_cnt;
 	struct mbuf    *xn_rx_chain[NET_RX_RING_SIZE+1];
 };
 
 struct net_device_stats
 {
 	u_long	rx_packets;		/* total packets received	*/
 	u_long	tx_packets;		/* total packets transmitted	*/
 	u_long	rx_bytes;		/* total bytes received 	*/
 	u_long	tx_bytes;		/* total bytes transmitted	*/
 	u_long	rx_errors;		/* bad packets received		*/
 	u_long	tx_errors;		/* packet transmit problems	*/
 	u_long	rx_dropped;		/* no space in linux buffers	*/
 	u_long	tx_dropped;		/* no space available in linux	*/
 	u_long	multicast;		/* multicast packets received	*/
 	u_long	collisions;
 
 	/* detailed rx_errors: */
 	u_long	rx_length_errors;
 	u_long	rx_over_errors;		/* receiver ring buff overflow	*/
 	u_long	rx_crc_errors;		/* recved pkt with crc error	*/
 	u_long	rx_frame_errors;	/* recv'd frame alignment error */
 	u_long	rx_fifo_errors;		/* recv'r fifo overrun		*/
 	u_long	rx_missed_errors;	/* receiver missed packet	*/
 
 	/* detailed tx_errors */
 	u_long	tx_aborted_errors;
 	u_long	tx_carrier_errors;
 	u_long	tx_fifo_errors;
 	u_long	tx_heartbeat_errors;
 	u_long	tx_window_errors;
 	
 	/* for cslip etc */
 	u_long	rx_compressed;
 	u_long	tx_compressed;
 };
 
 struct netfront_info {
 	struct ifnet *xn_ifp;
 #if __FreeBSD_version >= 700000
 	struct lro_ctrl xn_lro;
 #endif
 
 	struct net_device_stats stats;
 	u_int tx_full;
 
 	netif_tx_front_ring_t tx;
 	netif_rx_front_ring_t rx;
 
 	struct mtx   tx_lock;
 	struct mtx   rx_lock;
 	struct mtx   sc_lock;
 
-	u_int handle;
-	u_int irq;
+	xen_intr_handle_t xen_intr_handle;
 	u_int copying_receiver;
 	u_int carrier;
 	u_int maxfrags;
 		
 	/* Receive-ring batched refills. */
 #define RX_MIN_TARGET 32
 #define RX_MAX_TARGET NET_RX_RING_SIZE
 	int rx_min_target;
 	int rx_max_target;
 	int rx_target;
 
 	grant_ref_t gref_tx_head;
 	grant_ref_t grant_tx_ref[NET_TX_RING_SIZE + 1]; 
 	grant_ref_t gref_rx_head;
 	grant_ref_t grant_rx_ref[NET_TX_RING_SIZE + 1]; 
 
 	device_t		xbdev;
 	int			tx_ring_ref;
 	int			rx_ring_ref;
 	uint8_t			mac[ETHER_ADDR_LEN];
 	struct xn_chain_data	xn_cdata;	/* mbufs */
 	struct mbuf_head	xn_rx_batch;	/* head of the batch queue */
 
 	int			xn_if_flags;
 	struct callout	        xn_stat_ch;
 
 	u_long			rx_pfn_array[NET_RX_RING_SIZE];
 	multicall_entry_t	rx_mcl[NET_RX_RING_SIZE+1];
 	mmu_update_t		rx_mmu[NET_RX_RING_SIZE];
 	struct ifmedia		sc_media;
 };
 
 #define rx_mbufs xn_cdata.xn_rx_chain
 #define tx_mbufs xn_cdata.xn_tx_chain
 
 #define XN_LOCK_INIT(_sc, _name) \
         mtx_init(&(_sc)->tx_lock, #_name"_tx", "network transmit lock", MTX_DEF); \
         mtx_init(&(_sc)->rx_lock, #_name"_rx", "network receive lock", MTX_DEF);  \
         mtx_init(&(_sc)->sc_lock, #_name"_sc", "netfront softc lock", MTX_DEF)
 
 #define XN_RX_LOCK(_sc)           mtx_lock(&(_sc)->rx_lock)
 #define XN_RX_UNLOCK(_sc)         mtx_unlock(&(_sc)->rx_lock)
 
 #define XN_TX_LOCK(_sc)           mtx_lock(&(_sc)->tx_lock)
 #define XN_TX_UNLOCK(_sc)         mtx_unlock(&(_sc)->tx_lock)
 
 #define XN_LOCK(_sc)           mtx_lock(&(_sc)->sc_lock); 
 #define XN_UNLOCK(_sc)         mtx_unlock(&(_sc)->sc_lock); 
 
 #define XN_LOCK_ASSERT(_sc)    mtx_assert(&(_sc)->sc_lock, MA_OWNED); 
 #define XN_RX_LOCK_ASSERT(_sc)    mtx_assert(&(_sc)->rx_lock, MA_OWNED); 
 #define XN_TX_LOCK_ASSERT(_sc)    mtx_assert(&(_sc)->tx_lock, MA_OWNED); 
 #define XN_LOCK_DESTROY(_sc)   mtx_destroy(&(_sc)->rx_lock); \
                                mtx_destroy(&(_sc)->tx_lock); \
                                mtx_destroy(&(_sc)->sc_lock);
 
 struct netfront_rx_info {
 	struct netif_rx_response rx;
 	struct netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX - 1];
 };
 
 #define netfront_carrier_on(netif)	((netif)->carrier = 1)
 #define netfront_carrier_off(netif)	((netif)->carrier = 0)
 #define netfront_carrier_ok(netif)	((netif)->carrier)
 
 /* Access macros for acquiring freeing slots in xn_free_{tx,rx}_idxs[]. */
 
 static inline void
 add_id_to_freelist(struct mbuf **list, uintptr_t id)
 {
 	KASSERT(id != 0,
 		("%s: the head item (0) must always be free.", __func__));
 	list[id] = list[0];
 	list[0]  = (struct mbuf *)id;
 }
 
 static inline unsigned short
 get_id_from_freelist(struct mbuf **list)
 {
 	uintptr_t id;
 
 	id = (uintptr_t)list[0];
 	KASSERT(id != 0,
 		("%s: the head item (0) must always remain free.", __func__));
 	list[0] = list[id];
 	return (id);
 }
 
 static inline int
 xennet_rxidx(RING_IDX idx)
 {
 	return idx & (NET_RX_RING_SIZE - 1);
 }
 
 static inline struct mbuf *
 xennet_get_rx_mbuf(struct netfront_info *np, RING_IDX ri)
 {
 	int i = xennet_rxidx(ri);
 	struct mbuf *m;
 
 	m = np->rx_mbufs[i];
 	np->rx_mbufs[i] = NULL;
 	return (m);
 }
 
 static inline grant_ref_t
 xennet_get_rx_ref(struct netfront_info *np, RING_IDX ri)
 {
 	int i = xennet_rxidx(ri);
 	grant_ref_t ref = np->grant_rx_ref[i];
 	KASSERT(ref != GRANT_REF_INVALID, ("Invalid grant reference!\n"));
 	np->grant_rx_ref[i] = GRANT_REF_INVALID;
 	return ref;
 }
 
 #define IPRINTK(fmt, args...) \
     printf("[XEN] " fmt, ##args)
 #ifdef INVARIANTS
 #define WPRINTK(fmt, args...) \
     printf("[XEN] " fmt, ##args)
 #else
 #define WPRINTK(fmt, args...)
 #endif
 #ifdef DEBUG
 #define DPRINTK(fmt, args...) \
     printf("[XEN] %s: " fmt, __func__, ##args)
 #else
 #define DPRINTK(fmt, args...)
 #endif
 
 /**
  * Read the 'mac' node at the given device's node in the store, and parse that
  * as colon-separated octets, placing result the given mac array.  mac must be
  * a preallocated array of length ETH_ALEN (as declared in linux/if_ether.h).
  * Return 0 on success, or errno on error.
  */
 static int 
 xen_net_read_mac(device_t dev, uint8_t mac[])
 {
 	int error, i;
 	char *s, *e, *macstr;
 	const char *path;
 
 	path = xenbus_get_node(dev);
 	error = xs_read(XST_NIL, path, "mac", NULL, (void **) &macstr);
 	if (error == ENOENT) {
 		/*
 		 * Deal with missing mac XenStore nodes on devices with
 		 * HVM emulation (the 'ioemu' configuration attribute)
 		 * enabled.
 		 *
 		 * The HVM emulator may execute in a stub device model
 		 * domain which lacks the permission, only given to Dom0,
 		 * to update the guest's XenStore tree.  For this reason,
 		 * the HVM emulator doesn't even attempt to write the
 		 * front-side mac node, even when operating in Dom0.
 		 * However, there should always be a mac listed in the
 		 * backend tree.  Fallback to this version if our query
 		 * of the front side XenStore location doesn't find
 		 * anything.
 		 */
 		path = xenbus_get_otherend_path(dev);
 		error = xs_read(XST_NIL, path, "mac", NULL, (void **) &macstr);
 	}
 	if (error != 0) {
 		xenbus_dev_fatal(dev, error, "parsing %s/mac", path);
 		return (error);
 	}
 
 	s = macstr;
 	for (i = 0; i < ETHER_ADDR_LEN; i++) {
 		mac[i] = strtoul(s, &e, 16);
 		if (s == e || (e[0] != ':' && e[0] != 0)) {
 			free(macstr, M_XENBUS);
 			return (ENOENT);
 		}
 		s = &e[1];
 	}
 	free(macstr, M_XENBUS);
 	return (0);
 }
 
 /**
  * Entry point to this code when a new device is created.  Allocate the basic
  * structures and the ring buffers for communication with the backend, and
  * inform the backend of the appropriate details for those.  Switch to
  * Connected state.
  */
 static int 
 netfront_probe(device_t dev)
 {
 
 	if (!strcmp(xenbus_get_type(dev), "vif")) {
 		device_set_desc(dev, "Virtual Network Interface");
 		return (0);
 	}
 
 	return (ENXIO);
 }
 
 static int
 netfront_attach(device_t dev)
 {	
 	int err;
 
 	err = create_netdev(dev);
 	if (err) {
 		xenbus_dev_fatal(dev, err, "creating netdev");
 		return (err);
 	}
 
 #if __FreeBSD_version >= 700000
 	SYSCTL_ADD_INT(device_get_sysctl_ctx(dev),
 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
 	    OID_AUTO, "enable_lro", CTLTYPE_INT|CTLFLAG_RW,
 	    &xn_enable_lro, 0, "Large Receive Offload");
 #endif
 
 	return (0);
 }
 
 static int
 netfront_suspend(device_t dev)
 {
 	struct netfront_info *info = device_get_softc(dev);
 
 	XN_RX_LOCK(info);
 	XN_TX_LOCK(info);
 	netfront_carrier_off(info);
 	XN_TX_UNLOCK(info);
 	XN_RX_UNLOCK(info);
 	return (0);
 }
 
 /**
  * We are reconnecting to the backend, due to a suspend/resume, or a backend
  * driver restart.  We tear down our netif structure and recreate it, but
  * leave the device-layer structures intact so that this is transparent to the
  * rest of the kernel.
  */
 static int
 netfront_resume(device_t dev)
 {
 	struct netfront_info *info = device_get_softc(dev);
 
 	netif_disconnect_backend(info);
 	return (0);
 }
 
 /* Common code used when first setting up, and when resuming. */
 static int 
 talk_to_backend(device_t dev, struct netfront_info *info)
 {
 	const char *message;
 	struct xs_transaction xst;
 	const char *node = xenbus_get_node(dev);
 	int err;
 
 	err = xen_net_read_mac(dev, info->mac);
 	if (err) {
 		xenbus_dev_fatal(dev, err, "parsing %s/mac", node);
 		goto out;
 	}
 
 	/* Create shared ring, alloc event channel. */
 	err = setup_device(dev, info);
 	if (err)
 		goto out;
 	
  again:
 	err = xs_transaction_start(&xst);
 	if (err) {
 		xenbus_dev_fatal(dev, err, "starting transaction");
 		goto destroy_ring;
 	}
 	err = xs_printf(xst, node, "tx-ring-ref","%u",
 			info->tx_ring_ref);
 	if (err) {
 		message = "writing tx ring-ref";
 		goto abort_transaction;
 	}
 	err = xs_printf(xst, node, "rx-ring-ref","%u",
 			info->rx_ring_ref);
 	if (err) {
 		message = "writing rx ring-ref";
 		goto abort_transaction;
 	}
 	err = xs_printf(xst, node,
-			"event-channel", "%u", irq_to_evtchn_port(info->irq));
+			"event-channel", "%u",
+			xen_intr_port(info->xen_intr_handle));
 	if (err) {
 		message = "writing event-channel";
 		goto abort_transaction;
 	}
 	err = xs_printf(xst, node, "request-rx-copy", "%u",
 			info->copying_receiver);
 	if (err) {
 		message = "writing request-rx-copy";
 		goto abort_transaction;
 	}
 	err = xs_printf(xst, node, "feature-rx-notify", "%d", 1);
 	if (err) {
 		message = "writing feature-rx-notify";
 		goto abort_transaction;
 	}
 	err = xs_printf(xst, node, "feature-sg", "%d", 1);
 	if (err) {
 		message = "writing feature-sg";
 		goto abort_transaction;
 	}
 #if __FreeBSD_version >= 700000
 	err = xs_printf(xst, node, "feature-gso-tcpv4", "%d", 1);
 	if (err) {
 		message = "writing feature-gso-tcpv4";
 		goto abort_transaction;
 	}
 #endif
 
 	err = xs_transaction_end(xst, 0);
 	if (err) {
 		if (err == EAGAIN)
 			goto again;
 		xenbus_dev_fatal(dev, err, "completing transaction");
 		goto destroy_ring;
 	}
 	
 	return 0;
 	
  abort_transaction:
 	xs_transaction_end(xst, 1);
 	xenbus_dev_fatal(dev, err, "%s", message);
  destroy_ring:
 	netif_free(info);
  out:
 	return err;
 }
 
 static int 
 setup_device(device_t dev, struct netfront_info *info)
 {
 	netif_tx_sring_t *txs;
 	netif_rx_sring_t *rxs;
 	int error;
 	struct ifnet *ifp;
 	
 	ifp = info->xn_ifp;
 
 	info->tx_ring_ref = GRANT_REF_INVALID;
 	info->rx_ring_ref = GRANT_REF_INVALID;
 	info->rx.sring = NULL;
 	info->tx.sring = NULL;
-	info->irq = 0;
 
 	txs = (netif_tx_sring_t *)malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT|M_ZERO);
 	if (!txs) {
 		error = ENOMEM;
 		xenbus_dev_fatal(dev, error, "allocating tx ring page");
 		goto fail;
 	}
 	SHARED_RING_INIT(txs);
 	FRONT_RING_INIT(&info->tx, txs, PAGE_SIZE);
 	error = xenbus_grant_ring(dev, virt_to_mfn(txs), &info->tx_ring_ref);
 	if (error)
 		goto fail;
 
 	rxs = (netif_rx_sring_t *)malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT|M_ZERO);
 	if (!rxs) {
 		error = ENOMEM;
 		xenbus_dev_fatal(dev, error, "allocating rx ring page");
 		goto fail;
 	}
 	SHARED_RING_INIT(rxs);
 	FRONT_RING_INIT(&info->rx, rxs, PAGE_SIZE);
 
 	error = xenbus_grant_ring(dev, virt_to_mfn(rxs), &info->rx_ring_ref);
 	if (error)
 		goto fail;
 
-	error = bind_listening_port_to_irqhandler(xenbus_get_otherend_id(dev),
-	    "xn", xn_intr, info, INTR_TYPE_NET | INTR_MPSAFE, &info->irq);
+	error = xen_intr_alloc_and_bind_local_port(dev,
+	    xenbus_get_otherend_id(dev), /*filter*/NULL, xn_intr, info,
+	    INTR_TYPE_NET | INTR_MPSAFE | INTR_ENTROPY, &info->xen_intr_handle);
 
 	if (error) {
 		xenbus_dev_fatal(dev, error,
-				 "bind_evtchn_to_irqhandler failed");
+				 "xen_intr_alloc_and_bind_local_port failed");
 		goto fail;
 	}
 
 	show_device(info);
 	
 	return (0);
 	
  fail:
 	netif_free(info);
 	return (error);
 }
 
 #ifdef INET
 /**
  * If this interface has an ipv4 address, send an arp for it. This
  * helps to get the network going again after migrating hosts.
  */
 static void
 netfront_send_fake_arp(device_t dev, struct netfront_info *info)
 {
 	struct ifnet *ifp;
 	struct ifaddr *ifa;
 	
 	ifp = info->xn_ifp;
 	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
 		if (ifa->ifa_addr->sa_family == AF_INET) {
 			arp_ifinit(ifp, ifa);
 		}
 	}
 }
 #endif
 
 /**
  * Callback received when the backend's state changes.
  */
 static void
 netfront_backend_changed(device_t dev, XenbusState newstate)
 {
 	struct netfront_info *sc = device_get_softc(dev);
 		
 	DPRINTK("newstate=%d\n", newstate);
 
 	switch (newstate) {
 	case XenbusStateInitialising:
 	case XenbusStateInitialised:
 	case XenbusStateConnected:
 	case XenbusStateUnknown:
 	case XenbusStateClosed:
 	case XenbusStateReconfigured:
 	case XenbusStateReconfiguring:
 		break;
 	case XenbusStateInitWait:
 		if (xenbus_get_state(dev) != XenbusStateInitialising)
 			break;
 		if (network_connect(sc) != 0)
 			break;
 		xenbus_set_state(dev, XenbusStateConnected);
 #ifdef INET
 		netfront_send_fake_arp(dev, sc);
 #endif
 		break;
 	case XenbusStateClosing:
 		xenbus_set_state(dev, XenbusStateClosed);
 		break;
 	}
 }
 
 static void
 xn_free_rx_ring(struct netfront_info *sc)
 {
 #if 0
 	int i;
 	
 	for (i = 0; i < NET_RX_RING_SIZE; i++) {
 		if (sc->xn_cdata.rx_mbufs[i] != NULL) {
 			m_freem(sc->rx_mbufs[i]);
 			sc->rx_mbufs[i] = NULL;
 		}
 	}
 	
 	sc->rx.rsp_cons = 0;
 	sc->xn_rx_if->req_prod = 0;
 	sc->xn_rx_if->event = sc->rx.rsp_cons ;
 #endif
 }
 
 static void
 xn_free_tx_ring(struct netfront_info *sc)
 {
 #if 0
 	int i;
 	
 	for (i = 0; i < NET_TX_RING_SIZE; i++) {
 		if (sc->tx_mbufs[i] != NULL) {
 			m_freem(sc->tx_mbufs[i]);
 			sc->xn_cdata.xn_tx_chain[i] = NULL;
 		}
 	}
 	
 	return;
 #endif
 }
 
 /**
  * \brief Verify that there is sufficient space in the Tx ring
  *        buffer for a maximally sized request to be enqueued.
  *
  * A transmit request requires a transmit descriptor for each packet
  * fragment, plus up to 2 entries for "options" (e.g. TSO).
  */
 static inline int
 xn_tx_slot_available(struct netfront_info *np)
 {
 	return (RING_FREE_REQUESTS(&np->tx) > (MAX_TX_REQ_FRAGS + 2));
 }
 
 static void
 netif_release_tx_bufs(struct netfront_info *np)
 {
 	int i;
 
 	for (i = 1; i <= NET_TX_RING_SIZE; i++) {
 		struct mbuf *m;
 
 		m = np->tx_mbufs[i];
 
 		/*
 		 * We assume that no kernel addresses are
 		 * less than NET_TX_RING_SIZE.  Any entry
 		 * in the table that is below this number
 		 * must be an index from free-list tracking.
 		 */
 		if (((uintptr_t)m) <= NET_TX_RING_SIZE)
 			continue;
 		gnttab_end_foreign_access_ref(np->grant_tx_ref[i]);
 		gnttab_release_grant_reference(&np->gref_tx_head,
 		    np->grant_tx_ref[i]);
 		np->grant_tx_ref[i] = GRANT_REF_INVALID;
 		add_id_to_freelist(np->tx_mbufs, i);
 		np->xn_cdata.xn_tx_chain_cnt--;
 		if (np->xn_cdata.xn_tx_chain_cnt < 0) {
 			panic("%s: tx_chain_cnt must be >= 0", __func__);
 		}
 		m_free(m);
 	}
 }
 
 static void
 network_alloc_rx_buffers(struct netfront_info *sc)
 {
 	int otherend_id = xenbus_get_otherend_id(sc->xbdev);
 	unsigned short id;
 	struct mbuf *m_new;
 	int i, batch_target, notify;
 	RING_IDX req_prod;
 	struct xen_memory_reservation reservation;
 	grant_ref_t ref;
 	int nr_flips;
 	netif_rx_request_t *req;
 	vm_offset_t vaddr;
 	u_long pfn;
 	
 	req_prod = sc->rx.req_prod_pvt;
 
-	if (unlikely(sc->carrier == 0))
+	if (__predict_false(sc->carrier == 0))
 		return;
 	
 	/*
 	 * Allocate mbufs greedily, even though we batch updates to the
 	 * receive ring. This creates a less bursty demand on the memory
 	 * allocator, and so should reduce the chance of failed allocation
 	 * requests both for ourself and for other kernel subsystems.
 	 *
 	 * Here we attempt to maintain rx_target buffers in flight, counting
 	 * buffers that we have yet to process in the receive ring.
 	 */
 	batch_target = sc->rx_target - (req_prod - sc->rx.rsp_cons);
 	for (i = mbufq_len(&sc->xn_rx_batch); i < batch_target; i++) {
 		MGETHDR(m_new, M_NOWAIT, MT_DATA);
 		if (m_new == NULL) {
 			printf("%s: MGETHDR failed\n", __func__);
 			goto no_mbuf;
 		}
 
 		m_cljget(m_new, M_NOWAIT, MJUMPAGESIZE);
 		if ((m_new->m_flags & M_EXT) == 0) {
 			printf("%s: m_cljget failed\n", __func__);
 			m_freem(m_new);
 
 no_mbuf:
 			if (i != 0)
 				goto refill;
 			/*
 			 * XXX set timer
 			 */
 			break;
 		}
 		m_new->m_len = m_new->m_pkthdr.len = MJUMPAGESIZE;
 		
 		/* queue the mbufs allocated */
 		mbufq_tail(&sc->xn_rx_batch, m_new);
 	}
 	
 	/*
 	 * If we've allocated at least half of our target number of entries,
 	 * submit them to the backend - we have enough to make the overhead
 	 * of submission worthwhile.  Otherwise wait for more mbufs and
 	 * request entries to become available.
 	 */
 	if (i < (sc->rx_target/2)) {
 		if (req_prod >sc->rx.sring->req_prod)
 			goto push;
 		return;
 	}
 
 	/*
 	 * Double floating fill target if we risked having the backend
 	 * run out of empty buffers for receive traffic.  We define "running
 	 * low" as having less than a fourth of our target buffers free
 	 * at the time we refilled the queue. 
 	 */
 	if ((req_prod - sc->rx.sring->rsp_prod) < (sc->rx_target / 4)) {
 		sc->rx_target *= 2;
 		if (sc->rx_target > sc->rx_max_target)
 			sc->rx_target = sc->rx_max_target;
 	}
 
 refill:
 	for (nr_flips = i = 0; ; i++) {
 		if ((m_new = mbufq_dequeue(&sc->xn_rx_batch)) == NULL)
 			break;
 
 		m_new->m_ext.ext_arg1 = (vm_paddr_t *)(uintptr_t)(
 				vtophys(m_new->m_ext.ext_buf) >> PAGE_SHIFT);
 
 		id = xennet_rxidx(req_prod + i);
 
 		KASSERT(sc->rx_mbufs[id] == NULL, ("non-NULL xm_rx_chain"));
 		sc->rx_mbufs[id] = m_new;
 
 		ref = gnttab_claim_grant_reference(&sc->gref_rx_head);
 		KASSERT(ref != GNTTAB_LIST_END,
 			("reserved grant references exhuasted"));
 		sc->grant_rx_ref[id] = ref;
 
 		vaddr = mtod(m_new, vm_offset_t);
 		pfn = vtophys(vaddr) >> PAGE_SHIFT;
 		req = RING_GET_REQUEST(&sc->rx, req_prod + i);
 
 		if (sc->copying_receiver == 0) {
 			gnttab_grant_foreign_transfer_ref(ref,
 			    otherend_id, pfn);
 			sc->rx_pfn_array[nr_flips] = PFNTOMFN(pfn);
 			if (!xen_feature(XENFEAT_auto_translated_physmap)) {
 				/* Remove this page before passing
 				 * back to Xen.
 				 */
 				set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
 				MULTI_update_va_mapping(&sc->rx_mcl[i],
 				    vaddr, 0, 0);
 			}
 			nr_flips++;
 		} else {
 			gnttab_grant_foreign_access_ref(ref,
 			    otherend_id,
 			    PFNTOMFN(pfn), 0);
 		}
 		req->id = id;
 		req->gref = ref;
 		
 		sc->rx_pfn_array[i] =
 		    vtomach(mtod(m_new,vm_offset_t)) >> PAGE_SHIFT;
 	} 
 	
 	KASSERT(i, ("no mbufs processed")); /* should have returned earlier */
 	KASSERT(mbufq_len(&sc->xn_rx_batch) == 0, ("not all mbufs processed"));
 	/*
 	 * We may have allocated buffers which have entries outstanding
 	 * in the page * update queue -- make sure we flush those first!
 	 */
 	PT_UPDATES_FLUSH();
 	if (nr_flips != 0) {
 #ifdef notyet
 		/* Tell the ballon driver what is going on. */
 		balloon_update_driver_allowance(i);
 #endif
 		set_xen_guest_handle(reservation.extent_start, sc->rx_pfn_array);
 		reservation.nr_extents   = i;
 		reservation.extent_order = 0;
 		reservation.address_bits = 0;
 		reservation.domid        = DOMID_SELF;
 
 		if (!xen_feature(XENFEAT_auto_translated_physmap)) {
 			/* After all PTEs have been zapped, flush the TLB. */
 			sc->rx_mcl[i-1].args[MULTI_UVMFLAGS_INDEX] =
 			    UVMF_TLB_FLUSH|UVMF_ALL;
 	
 			/* Give away a batch of pages. */
 			sc->rx_mcl[i].op = __HYPERVISOR_memory_op;
 			sc->rx_mcl[i].args[0] = XENMEM_decrease_reservation;
 			sc->rx_mcl[i].args[1] =  (u_long)&reservation;
 			/* Zap PTEs and give away pages in one big multicall. */
 			(void)HYPERVISOR_multicall(sc->rx_mcl, i+1);
 
-			if (unlikely(sc->rx_mcl[i].result != i ||
+			if (__predict_false(sc->rx_mcl[i].result != i ||
 			    HYPERVISOR_memory_op(XENMEM_decrease_reservation,
 			    &reservation) != i))
 				panic("%s: unable to reduce memory "
 				    "reservation\n", __func__);
 		}
 	} else {
 		wmb();
 	}
 			
 	/* Above is a suitable barrier to ensure backend will see requests. */
 	sc->rx.req_prod_pvt = req_prod + i;
 push:
 	RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&sc->rx, notify);
 	if (notify)
-		notify_remote_via_irq(sc->irq);
+		xen_intr_signal(sc->xen_intr_handle);
 }
 
 static void
 xn_rxeof(struct netfront_info *np)
 {
 	struct ifnet *ifp;
 #if __FreeBSD_version >= 700000
 	struct lro_ctrl *lro = &np->xn_lro;
 	struct lro_entry *queued;
 #endif
 	struct netfront_rx_info rinfo;
 	struct netif_rx_response *rx = &rinfo.rx;
 	struct netif_extra_info *extras = rinfo.extras;
 	RING_IDX i, rp;
 	multicall_entry_t *mcl;
 	struct mbuf *m;
 	struct mbuf_head rxq, errq;
 	int err, pages_flipped = 0, work_to_do;
 
 	do {
 		XN_RX_LOCK_ASSERT(np);
 		if (!netfront_carrier_ok(np))
 			return;
 
 		mbufq_init(&errq);
 		mbufq_init(&rxq);
 
 		ifp = np->xn_ifp;
 	
 		rp = np->rx.sring->rsp_prod;
 		rmb();	/* Ensure we see queued responses up to 'rp'. */
 
 		i = np->rx.rsp_cons;
 		while ((i != rp)) {
 			memcpy(rx, RING_GET_RESPONSE(&np->rx, i), sizeof(*rx));
 			memset(extras, 0, sizeof(rinfo.extras));
 
 			m = NULL;
 			err = xennet_get_responses(np, &rinfo, rp, &i, &m,
 			    &pages_flipped);
 
-			if (unlikely(err)) {
+			if (__predict_false(err)) {
 				if (m)
 					mbufq_tail(&errq, m);
 				np->stats.rx_errors++;
 				continue;
 			}
 
 			m->m_pkthdr.rcvif = ifp;
 			if ( rx->flags & NETRXF_data_validated ) {
 				/* Tell the stack the checksums are okay */
 				/*
 				 * XXX this isn't necessarily the case - need to add
 				 * check
 				 */
 				
 				m->m_pkthdr.csum_flags |=
 					(CSUM_IP_CHECKED | CSUM_IP_VALID | CSUM_DATA_VALID
 					    | CSUM_PSEUDO_HDR);
 				m->m_pkthdr.csum_data = 0xffff;
 			}
 
 			np->stats.rx_packets++;
 			np->stats.rx_bytes += m->m_pkthdr.len;
 
 			mbufq_tail(&rxq, m);
 			np->rx.rsp_cons = i;
 		}
 
 		if (pages_flipped) {
 			/* Some pages are no longer absent... */
 #ifdef notyet
 			balloon_update_driver_allowance(-pages_flipped);
 #endif
 			/* Do all the remapping work, and M->P updates, in one big
 			 * hypercall.
 			 */
 			if (!!xen_feature(XENFEAT_auto_translated_physmap)) {
 				mcl = np->rx_mcl + pages_flipped;
 				mcl->op = __HYPERVISOR_mmu_update;
 				mcl->args[0] = (u_long)np->rx_mmu;
 				mcl->args[1] = pages_flipped;
 				mcl->args[2] = 0;
 				mcl->args[3] = DOMID_SELF;
 				(void)HYPERVISOR_multicall(np->rx_mcl,
 				    pages_flipped + 1);
 			}
 		}
 	
 		while ((m = mbufq_dequeue(&errq)))
 			m_freem(m);
 
 		/* 
 		 * Process all the mbufs after the remapping is complete.
 		 * Break the mbuf chain first though.
 		 */
 		while ((m = mbufq_dequeue(&rxq)) != NULL) {
 			ifp->if_ipackets++;
 			
 			/*
 			 * Do we really need to drop the rx lock?
 			 */
 			XN_RX_UNLOCK(np);
 #if __FreeBSD_version >= 700000
 			/* Use LRO if possible */
 			if ((ifp->if_capenable & IFCAP_LRO) == 0 ||
 			    lro->lro_cnt == 0 || tcp_lro_rx(lro, m, 0)) {
 				/*
 				 * If LRO fails, pass up to the stack
 				 * directly.
 				 */
 				(*ifp->if_input)(ifp, m);
 			}
 #else
 			(*ifp->if_input)(ifp, m);
 #endif
 			XN_RX_LOCK(np);
 		}
 	
 		np->rx.rsp_cons = i;
 
 #if __FreeBSD_version >= 700000
 		/*
 		 * Flush any outstanding LRO work
 		 */
 		while (!SLIST_EMPTY(&lro->lro_active)) {
 			queued = SLIST_FIRST(&lro->lro_active);
 			SLIST_REMOVE_HEAD(&lro->lro_active, next);
 			tcp_lro_flush(lro, queued);
 		}
 #endif
 
 #if 0
 		/* If we get a callback with very few responses, reduce fill target. */
 		/* NB. Note exponential increase, linear decrease. */
 		if (((np->rx.req_prod_pvt - np->rx.sring->rsp_prod) > 
 			((3*np->rx_target) / 4)) && (--np->rx_target < np->rx_min_target))
 			np->rx_target = np->rx_min_target;
 #endif
 	
 		network_alloc_rx_buffers(np);
 
 		RING_FINAL_CHECK_FOR_RESPONSES(&np->rx, work_to_do);
 	} while (work_to_do);
 }
 
 static void 
 xn_txeof(struct netfront_info *np)
 {
 	RING_IDX i, prod;
 	unsigned short id;
 	struct ifnet *ifp;
 	netif_tx_response_t *txr;
 	struct mbuf *m;
 	
 	XN_TX_LOCK_ASSERT(np);
 	
 	if (!netfront_carrier_ok(np))
 		return;
 	
 	ifp = np->xn_ifp;
 	
 	do {
 		prod = np->tx.sring->rsp_prod;
 		rmb(); /* Ensure we see responses up to 'rp'. */
 		
 		for (i = np->tx.rsp_cons; i != prod; i++) {
 			txr = RING_GET_RESPONSE(&np->tx, i);
 			if (txr->status == NETIF_RSP_NULL)
 				continue;
 
 			if (txr->status != NETIF_RSP_OKAY) {
 				printf("%s: WARNING: response is %d!\n",
 				       __func__, txr->status);
 			}
 			id = txr->id;
 			m = np->tx_mbufs[id]; 
 			KASSERT(m != NULL, ("mbuf not found in xn_tx_chain"));
 			KASSERT((uintptr_t)m > NET_TX_RING_SIZE,
 				("mbuf already on the free list, but we're "
 				"trying to free it again!"));
 			M_ASSERTVALID(m);
 			
 			/*
 			 * Increment packet count if this is the last
 			 * mbuf of the chain.
 			 */
 			if (!m->m_next)
 				ifp->if_opackets++;
-			if (unlikely(gnttab_query_foreign_access(
+			if (__predict_false(gnttab_query_foreign_access(
 			    np->grant_tx_ref[id]) != 0)) {
 				panic("%s: grant id %u still in use by the "
 				    "backend", __func__, id);
 			}
 			gnttab_end_foreign_access_ref(
 				np->grant_tx_ref[id]);
 			gnttab_release_grant_reference(
 				&np->gref_tx_head, np->grant_tx_ref[id]);
 			np->grant_tx_ref[id] = GRANT_REF_INVALID;
 			
 			np->tx_mbufs[id] = NULL;
 			add_id_to_freelist(np->tx_mbufs, id);
 			np->xn_cdata.xn_tx_chain_cnt--;
 			m_free(m);
 			/* Only mark the queue active if we've freed up at least one slot to try */
 			ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
 		}
 		np->tx.rsp_cons = prod;
 		
 		/*
 		 * Set a new event, then check for race with update of
 		 * tx_cons. Note that it is essential to schedule a
 		 * callback, no matter how few buffers are pending. Even if
 		 * there is space in the transmit ring, higher layers may
 		 * be blocked because too much data is outstanding: in such
 		 * cases notification from Xen is likely to be the only kick
 		 * that we'll get.
 		 */
 		np->tx.sring->rsp_event =
 		    prod + ((np->tx.sring->req_prod - prod) >> 1) + 1;
 
 		mb();
 	} while (prod != np->tx.sring->rsp_prod);
 	
 	if (np->tx_full &&
 	    ((np->tx.sring->req_prod - prod) < NET_TX_RING_SIZE)) {
 		np->tx_full = 0;
 #if 0
 		if (np->user_state == UST_OPEN)
 			netif_wake_queue(dev);
 #endif
 	}
 }
 
 static void
 xn_intr(void *xsc)
 {
 	struct netfront_info *np = xsc;
 	struct ifnet *ifp = np->xn_ifp;
 
 #if 0
 	if (!(np->rx.rsp_cons != np->rx.sring->rsp_prod &&
 	    likely(netfront_carrier_ok(np)) &&
 	    ifp->if_drv_flags & IFF_DRV_RUNNING))
 		return;
 #endif
 	if (RING_HAS_UNCONSUMED_RESPONSES(&np->tx)) {
 		XN_TX_LOCK(np);
 		xn_txeof(np);
 		XN_TX_UNLOCK(np);			
 	}	
 
 	XN_RX_LOCK(np);
 	xn_rxeof(np);
 	XN_RX_UNLOCK(np);
 
 	if (ifp->if_drv_flags & IFF_DRV_RUNNING &&
 	    !IFQ_DRV_IS_EMPTY(&ifp->if_snd))
 		xn_start(ifp);
 }
 
 static void
 xennet_move_rx_slot(struct netfront_info *np, struct mbuf *m,
 	grant_ref_t ref)
 {
 	int new = xennet_rxidx(np->rx.req_prod_pvt);
 
 	KASSERT(np->rx_mbufs[new] == NULL, ("rx_mbufs != NULL"));
 	np->rx_mbufs[new] = m;
 	np->grant_rx_ref[new] = ref;
 	RING_GET_REQUEST(&np->rx, np->rx.req_prod_pvt)->id = new;
 	RING_GET_REQUEST(&np->rx, np->rx.req_prod_pvt)->gref = ref;
 	np->rx.req_prod_pvt++;
 }
 
 static int
 xennet_get_extras(struct netfront_info *np,
     struct netif_extra_info *extras, RING_IDX rp, RING_IDX *cons)
 {
 	struct netif_extra_info *extra;
 
 	int err = 0;
 
 	do {
 		struct mbuf *m;
 		grant_ref_t ref;
 
-		if (unlikely(*cons + 1 == rp)) {
+		if (__predict_false(*cons + 1 == rp)) {
 #if 0			
 			if (net_ratelimit())
 				WPRINTK("Missing extra info\n");
 #endif			
 			err = EINVAL;
 			break;
 		}
 
 		extra = (struct netif_extra_info *)
 		RING_GET_RESPONSE(&np->rx, ++(*cons));
 
-		if (unlikely(!extra->type ||
+		if (__predict_false(!extra->type ||
 			extra->type >= XEN_NETIF_EXTRA_TYPE_MAX)) {
 #if 0				
 			if (net_ratelimit())
 				WPRINTK("Invalid extra type: %d\n",
 					extra->type);
 #endif			
 			err = EINVAL;
 		} else {
 			memcpy(&extras[extra->type - 1], extra, sizeof(*extra));
 		}
 
 		m = xennet_get_rx_mbuf(np, *cons);
 		ref = xennet_get_rx_ref(np, *cons);
 		xennet_move_rx_slot(np, m, ref);
 	} while (extra->flags & XEN_NETIF_EXTRA_FLAG_MORE);
 
 	return err;
 }
 
 static int
 xennet_get_responses(struct netfront_info *np,
 	struct netfront_rx_info *rinfo, RING_IDX rp, RING_IDX *cons,
 	struct mbuf  **list,
 	int *pages_flipped_p)
 {
 	int pages_flipped = *pages_flipped_p;
 	struct mmu_update *mmu;
 	struct multicall_entry *mcl;
 	struct netif_rx_response *rx = &rinfo->rx;
 	struct netif_extra_info *extras = rinfo->extras;
 	struct mbuf *m, *m0, *m_prev;
 	grant_ref_t ref = xennet_get_rx_ref(np, *cons);
 	RING_IDX ref_cons = *cons;
 	int frags = 1;
 	int err = 0;
 	u_long ret;
 
 	m0 = m = m_prev = xennet_get_rx_mbuf(np, *cons);
 
 	if (rx->flags & NETRXF_extra_info) {
 		err = xennet_get_extras(np, extras, rp, cons);
 	}
 
 	if (m0 != NULL) {
 		m0->m_pkthdr.len = 0;
 		m0->m_next = NULL;
 	}
 
 	for (;;) {
 		u_long mfn;
 
 #if 0		
 		DPRINTK("rx->status=%hd rx->offset=%hu frags=%u\n",
 			rx->status, rx->offset, frags);
 #endif
-		if (unlikely(rx->status < 0 ||
+		if (__predict_false(rx->status < 0 ||
 			rx->offset + rx->status > PAGE_SIZE)) {
 
 #if 0						
 			if (net_ratelimit())
 				WPRINTK("rx->offset: %x, size: %u\n",
 					rx->offset, rx->status);
 #endif						
 			xennet_move_rx_slot(np, m, ref);
 			if (m0 == m)
 				m0 = NULL;
 			m = NULL;
 			err = EINVAL;
 			goto next_skip_queue;
 		}
 		
 		/*
 		 * This definitely indicates a bug, either in this driver or in
 		 * the backend driver. In future this should flag the bad
 		 * situation to the system controller to reboot the backed.
 		 */
 		if (ref == GRANT_REF_INVALID) {
 
 #if 0 				
 			if (net_ratelimit())
 				WPRINTK("Bad rx response id %d.\n", rx->id);
 #endif			
 			printf("%s: Bad rx response id %d.\n", __func__,rx->id);
 			err = EINVAL;
 			goto next;
 		}
 
 		if (!np->copying_receiver) {
 			/* Memory pressure, insufficient buffer
 			 * headroom, ...
 			 */
 			if (!(mfn = gnttab_end_foreign_transfer_ref(ref))) {
 				WPRINTK("Unfulfilled rx req (id=%d, st=%d).\n",
 					rx->id, rx->status);
 				xennet_move_rx_slot(np, m, ref);
 				err = ENOMEM;
 				goto next;
 			}
 
 			if (!xen_feature( XENFEAT_auto_translated_physmap)) {
 				/* Remap the page. */
 				void *vaddr = mtod(m, void *);
 				uint32_t pfn;
 
 				mcl = np->rx_mcl + pages_flipped;
 				mmu = np->rx_mmu + pages_flipped;
 
 				MULTI_update_va_mapping(mcl, (u_long)vaddr,
 				    (((vm_paddr_t)mfn) << PAGE_SHIFT) | PG_RW |
 				    PG_V | PG_M | PG_A, 0);
 				pfn = (uintptr_t)m->m_ext.ext_arg1;
 				mmu->ptr = ((vm_paddr_t)mfn << PAGE_SHIFT) |
 				    MMU_MACHPHYS_UPDATE;
 				mmu->val = pfn;
 
 				set_phys_to_machine(pfn, mfn);
 			}
 			pages_flipped++;
 		} else {
 			ret = gnttab_end_foreign_access_ref(ref);
 			KASSERT(ret, ("ret != 0"));
 		}
 
 		gnttab_release_grant_reference(&np->gref_rx_head, ref);
 
 next:
 		if (m == NULL)
 			break;
 
 		m->m_len = rx->status;
 		m->m_data += rx->offset;
 		m0->m_pkthdr.len += rx->status;
 		
 next_skip_queue:
 		if (!(rx->flags & NETRXF_more_data))
 			break;
 
 		if (*cons + frags == rp) {
 			if (net_ratelimit())
 				WPRINTK("Need more frags\n");
 			err = ENOENT;
 			printf("%s: cons %u frags %u rp %u, not enough frags\n",
 			       __func__, *cons, frags, rp);
 			break;
 		}
 		/*
 		 * Note that m can be NULL, if rx->status < 0 or if
 		 * rx->offset + rx->status > PAGE_SIZE above.  
 		 */
 		m_prev = m;
 		
 		rx = RING_GET_RESPONSE(&np->rx, *cons + frags);
 		m = xennet_get_rx_mbuf(np, *cons + frags);
 
 		/*
 		 * m_prev == NULL can happen if rx->status < 0 or if
 		 * rx->offset + * rx->status > PAGE_SIZE above.  
 		 */
 		if (m_prev != NULL)
 			m_prev->m_next = m;
 
 		/*
 		 * m0 can be NULL if rx->status < 0 or if * rx->offset +
 		 * rx->status > PAGE_SIZE above.  
 		 */
 		if (m0 == NULL)
 			m0 = m;
 		m->m_next = NULL;
 		ref = xennet_get_rx_ref(np, *cons + frags);
 		ref_cons = *cons + frags;
 		frags++;
 	}
 	*list = m0;
 	*cons += frags;
 	*pages_flipped_p = pages_flipped;
 
 	return (err);
 }
 
 static void
 xn_tick_locked(struct netfront_info *sc) 
 {
 	XN_RX_LOCK_ASSERT(sc);
 	callout_reset(&sc->xn_stat_ch, hz, xn_tick, sc);
 
 	/* XXX placeholder for printing debug information */
 }
 
 static void
 xn_tick(void *xsc) 
 {
 	struct netfront_info *sc;
     
 	sc = xsc;
 	XN_RX_LOCK(sc);
 	xn_tick_locked(sc);
 	XN_RX_UNLOCK(sc);
 }
 
 /**
  * \brief Count the number of fragments in an mbuf chain.
  *
  * Surprisingly, there isn't an M* macro for this.
  */
 static inline int
 xn_count_frags(struct mbuf *m)
 {
 	int nfrags;
 
 	for (nfrags = 0; m != NULL; m = m->m_next)
 		nfrags++;
 
 	return (nfrags);
 }
 
 /**
  * Given an mbuf chain, make sure we have enough room and then push
  * it onto the transmit ring.
  */
 static int
 xn_assemble_tx_request(struct netfront_info *sc, struct mbuf *m_head)
 {
 	struct ifnet *ifp;
 	struct mbuf *m;
 	u_int nfrags;
 	netif_extra_info_t *extra;
 	int otherend_id;
 
 	ifp = sc->xn_ifp;
 
 	/**
 	 * Defragment the mbuf if necessary.
 	 */
 	nfrags = xn_count_frags(m_head);
 
 	/*
 	 * Check to see whether this request is longer than netback
 	 * can handle, and try to defrag it.
 	 */
 	/**
 	 * It is a bit lame, but the netback driver in Linux can't
 	 * deal with nfrags > MAX_TX_REQ_FRAGS, which is a quirk of
 	 * the Linux network stack.
 	 */
 	if (nfrags > sc->maxfrags) {
 		m = m_defrag(m_head, M_NOWAIT);
 		if (!m) {
 			/*
 			 * Defrag failed, so free the mbuf and
 			 * therefore drop the packet.
 			 */
 			m_freem(m_head);
 			return (EMSGSIZE);
 		}
 		m_head = m;
 	}
 
 	/* Determine how many fragments now exist */
 	nfrags = xn_count_frags(m_head);
 
 	/*
 	 * Check to see whether the defragmented packet has too many
 	 * segments for the Linux netback driver.
 	 */
 	/**
 	 * The FreeBSD TCP stack, with TSO enabled, can produce a chain
 	 * of mbufs longer than Linux can handle.  Make sure we don't
 	 * pass a too-long chain over to the other side by dropping the
 	 * packet.  It doesn't look like there is currently a way to
 	 * tell the TCP stack to generate a shorter chain of packets.
 	 */
 	if (nfrags > MAX_TX_REQ_FRAGS) {
 #ifdef DEBUG
 		printf("%s: nfrags %d > MAX_TX_REQ_FRAGS %d, netback "
 		       "won't be able to handle it, dropping\n",
 		       __func__, nfrags, MAX_TX_REQ_FRAGS);
 #endif
 		m_freem(m_head);
 		return (EMSGSIZE);
 	}
 
 	/*
 	 * This check should be redundant.  We've already verified that we
 	 * have enough slots in the ring to handle a packet of maximum
 	 * size, and that our packet is less than the maximum size.  Keep
 	 * it in here as an assert for now just to make certain that
 	 * xn_tx_chain_cnt is accurate.
 	 */
 	KASSERT((sc->xn_cdata.xn_tx_chain_cnt + nfrags) <= NET_TX_RING_SIZE,
 		("%s: xn_tx_chain_cnt (%d) + nfrags (%d) > NET_TX_RING_SIZE "
 		 "(%d)!", __func__, (int) sc->xn_cdata.xn_tx_chain_cnt,
                     (int) nfrags, (int) NET_TX_RING_SIZE));
 
 	/*
 	 * Start packing the mbufs in this chain into
 	 * the fragment pointers. Stop when we run out
 	 * of fragments or hit the end of the mbuf chain.
 	 */
 	m = m_head;
 	extra = NULL;
 	otherend_id = xenbus_get_otherend_id(sc->xbdev);
 	for (m = m_head; m; m = m->m_next) {
 		netif_tx_request_t *tx;
 		uintptr_t id;
 		grant_ref_t ref;
 		u_long mfn; /* XXX Wrong type? */
 
 		tx = RING_GET_REQUEST(&sc->tx, sc->tx.req_prod_pvt);
 		id = get_id_from_freelist(sc->tx_mbufs);
 		if (id == 0)
 			panic("%s: was allocated the freelist head!\n",
 			    __func__);
 		sc->xn_cdata.xn_tx_chain_cnt++;
 		if (sc->xn_cdata.xn_tx_chain_cnt > NET_TX_RING_SIZE)
 			panic("%s: tx_chain_cnt must be <= NET_TX_RING_SIZE\n",
 			    __func__);
 		sc->tx_mbufs[id] = m;
 		tx->id = id;
 		ref = gnttab_claim_grant_reference(&sc->gref_tx_head);
 		KASSERT((short)ref >= 0, ("Negative ref"));
 		mfn = virt_to_mfn(mtod(m, vm_offset_t));
 		gnttab_grant_foreign_access_ref(ref, otherend_id,
 		    mfn, GNTMAP_readonly);
 		tx->gref = sc->grant_tx_ref[id] = ref;
 		tx->offset = mtod(m, vm_offset_t) & (PAGE_SIZE - 1);
 		tx->flags = 0;
 		if (m == m_head) {
 			/*
 			 * The first fragment has the entire packet
 			 * size, subsequent fragments have just the
 			 * fragment size. The backend works out the
 			 * true size of the first fragment by
 			 * subtracting the sizes of the other
 			 * fragments.
 			 */
 			tx->size = m->m_pkthdr.len;
 
 			/*
 			 * The first fragment contains the checksum flags
 			 * and is optionally followed by extra data for
 			 * TSO etc.
 			 */
 			/**
 			 * CSUM_TSO requires checksum offloading.
 			 * Some versions of FreeBSD fail to
 			 * set CSUM_TCP in the CSUM_TSO case,
 			 * so we have to test for CSUM_TSO
 			 * explicitly.
 			 */
 			if (m->m_pkthdr.csum_flags
 			    & (CSUM_DELAY_DATA | CSUM_TSO)) {
 				tx->flags |= (NETTXF_csum_blank
 				    | NETTXF_data_validated);
 			}
 #if __FreeBSD_version >= 700000
 			if (m->m_pkthdr.csum_flags & CSUM_TSO) {
 				struct netif_extra_info *gso =
 					(struct netif_extra_info *)
 					RING_GET_REQUEST(&sc->tx,
 							 ++sc->tx.req_prod_pvt);
 
 				tx->flags |= NETTXF_extra_info;
 
 				gso->u.gso.size = m->m_pkthdr.tso_segsz;
 				gso->u.gso.type =
 					XEN_NETIF_GSO_TYPE_TCPV4;
 				gso->u.gso.pad = 0;
 				gso->u.gso.features = 0;
 
 				gso->type = XEN_NETIF_EXTRA_TYPE_GSO;
 				gso->flags = 0;
 			}
 #endif
 		} else {
 			tx->size = m->m_len;
 		}
 		if (m->m_next)
 			tx->flags |= NETTXF_more_data;
 
 		sc->tx.req_prod_pvt++;
 	}
 	BPF_MTAP(ifp, m_head);
 
 	sc->stats.tx_bytes += m_head->m_pkthdr.len;
 	sc->stats.tx_packets++;
 
 	return (0);
 }
 
 static void
 xn_start_locked(struct ifnet *ifp) 
 {
 	struct netfront_info *sc;
 	struct mbuf *m_head;
 	int notify;
 
 	sc = ifp->if_softc;
 
 	if (!netfront_carrier_ok(sc))
 		return;
 
 	/*
 	 * While we have enough transmit slots available for at least one
 	 * maximum-sized packet, pull mbufs off the queue and put them on
 	 * the transmit ring.
 	 */
 	while (xn_tx_slot_available(sc)) {
 		IF_DEQUEUE(&ifp->if_snd, m_head);
 		if (m_head == NULL)
 			break;
 
 		if (xn_assemble_tx_request(sc, m_head) != 0)
 			break;
 	}
 
 	RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&sc->tx, notify);
 	if (notify)
-		notify_remote_via_irq(sc->irq);
+		xen_intr_signal(sc->xen_intr_handle);
 
 	if (RING_FULL(&sc->tx)) {
 		sc->tx_full = 1;
 #if 0
 		netif_stop_queue(dev);
 #endif
 	}
 }
 
 static void
 xn_start(struct ifnet *ifp)
 {
 	struct netfront_info *sc;
 	sc = ifp->if_softc;
 	XN_TX_LOCK(sc);
 	xn_start_locked(ifp);
 	XN_TX_UNLOCK(sc);
 }
 
 /* equivalent of network_open() in Linux */
 static void 
 xn_ifinit_locked(struct netfront_info *sc) 
 {
 	struct ifnet *ifp;
 	
 	XN_LOCK_ASSERT(sc);
 	
 	ifp = sc->xn_ifp;
 	
 	if (ifp->if_drv_flags & IFF_DRV_RUNNING) 
 		return;
 	
 	xn_stop(sc);
 	
 	network_alloc_rx_buffers(sc);
 	sc->rx.sring->rsp_event = sc->rx.rsp_cons + 1;
 	
 	ifp->if_drv_flags |= IFF_DRV_RUNNING;
 	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
 	if_link_state_change(ifp, LINK_STATE_UP);
 	
 	callout_reset(&sc->xn_stat_ch, hz, xn_tick, sc);
 }
 
 static void 
 xn_ifinit(void *xsc)
 {
 	struct netfront_info *sc = xsc;
     
 	XN_LOCK(sc);
 	xn_ifinit_locked(sc);
 	XN_UNLOCK(sc);
 }
 
 static int
 xn_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
 {
 	struct netfront_info *sc = ifp->if_softc;
 	struct ifreq *ifr = (struct ifreq *) data;
 #ifdef INET
 	struct ifaddr *ifa = (struct ifaddr *)data;
 #endif
 
 	int mask, error = 0;
 	switch(cmd) {
 	case SIOCSIFADDR:
 	case SIOCGIFADDR:
 #ifdef INET
 		XN_LOCK(sc);
 		if (ifa->ifa_addr->sa_family == AF_INET) {
 			ifp->if_flags |= IFF_UP;
 			if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) 
 				xn_ifinit_locked(sc);
 			arp_ifinit(ifp, ifa);
 			XN_UNLOCK(sc);	
 		} else {
 			XN_UNLOCK(sc);	
 #endif
 			error = ether_ioctl(ifp, cmd, data);
 #ifdef INET
 		}
 #endif
 		break;
 	case SIOCSIFMTU:
 		/* XXX can we alter the MTU on a VN ?*/
 #ifdef notyet
 		if (ifr->ifr_mtu > XN_JUMBO_MTU)
 			error = EINVAL;
 		else 
 #endif
 		{
 			ifp->if_mtu = ifr->ifr_mtu;
 			ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
 			xn_ifinit(sc);
 		}
 		break;
 	case SIOCSIFFLAGS:
 		XN_LOCK(sc);
 		if (ifp->if_flags & IFF_UP) {
 			/*
 			 * If only the state of the PROMISC flag changed,
 			 * then just use the 'set promisc mode' command
 			 * instead of reinitializing the entire NIC. Doing
 			 * a full re-init means reloading the firmware and
 			 * waiting for it to start up, which may take a
 			 * second or two.
 			 */
 #ifdef notyet
 			/* No promiscuous mode with Xen */
 			if (ifp->if_drv_flags & IFF_DRV_RUNNING &&
 			    ifp->if_flags & IFF_PROMISC &&
 			    !(sc->xn_if_flags & IFF_PROMISC)) {
 				XN_SETBIT(sc, XN_RX_MODE,
 					  XN_RXMODE_RX_PROMISC);
 			} else if (ifp->if_drv_flags & IFF_DRV_RUNNING &&
 				   !(ifp->if_flags & IFF_PROMISC) &&
 				   sc->xn_if_flags & IFF_PROMISC) {
 				XN_CLRBIT(sc, XN_RX_MODE,
 					  XN_RXMODE_RX_PROMISC);
 			} else
 #endif
 				xn_ifinit_locked(sc);
 		} else {
 			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
 				xn_stop(sc);
 			}
 		}
 		sc->xn_if_flags = ifp->if_flags;
 		XN_UNLOCK(sc);
 		error = 0;
 		break;
 	case SIOCSIFCAP:
 		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
 		if (mask & IFCAP_TXCSUM) {
 			if (IFCAP_TXCSUM & ifp->if_capenable) {
 				ifp->if_capenable &= ~(IFCAP_TXCSUM|IFCAP_TSO4);
 				ifp->if_hwassist &= ~(CSUM_TCP | CSUM_UDP
 				    | CSUM_IP | CSUM_TSO);
 			} else {
 				ifp->if_capenable |= IFCAP_TXCSUM;
 				ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP
 				    | CSUM_IP);
 			}
 		}
 		if (mask & IFCAP_RXCSUM) {
 			ifp->if_capenable ^= IFCAP_RXCSUM;
 		}
 #if __FreeBSD_version >= 700000
 		if (mask & IFCAP_TSO4) {
 			if (IFCAP_TSO4 & ifp->if_capenable) {
 				ifp->if_capenable &= ~IFCAP_TSO4;
 				ifp->if_hwassist &= ~CSUM_TSO;
 			} else if (IFCAP_TXCSUM & ifp->if_capenable) {
 				ifp->if_capenable |= IFCAP_TSO4;
 				ifp->if_hwassist |= CSUM_TSO;
 			} else {
 				IPRINTK("Xen requires tx checksum offload"
 				    " be enabled to use TSO\n");
 				error = EINVAL;
 			}
 		}
 		if (mask & IFCAP_LRO) {
 			ifp->if_capenable ^= IFCAP_LRO;
 			
 		}
 #endif
 		error = 0;
 		break;
 	case SIOCADDMULTI:
 	case SIOCDELMULTI:
 #ifdef notyet
 		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
 			XN_LOCK(sc);
 			xn_setmulti(sc);
 			XN_UNLOCK(sc);
 			error = 0;
 		}
 #endif
 		/* FALLTHROUGH */
 	case SIOCSIFMEDIA:
 	case SIOCGIFMEDIA:
 		error = ifmedia_ioctl(ifp, ifr, &sc->sc_media, cmd);
 		break;
 	default:
 		error = ether_ioctl(ifp, cmd, data);
 	}
     
 	return (error);
 }
 
 static void
 xn_stop(struct netfront_info *sc)
 {	
 	struct ifnet *ifp;
 
 	XN_LOCK_ASSERT(sc);
     
 	ifp = sc->xn_ifp;
 
 	callout_stop(&sc->xn_stat_ch);
 
 	xn_free_rx_ring(sc);
 	xn_free_tx_ring(sc);
     
 	ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
 	if_link_state_change(ifp, LINK_STATE_DOWN);
 }
 
 /* START of Xenolinux helper functions adapted to FreeBSD */
 int
 network_connect(struct netfront_info *np)
 {
 	int i, requeue_idx, error;
 	grant_ref_t ref;
 	netif_rx_request_t *req;
 	u_int feature_rx_copy, feature_rx_flip;
 
 	error = xs_scanf(XST_NIL, xenbus_get_otherend_path(np->xbdev),
 	    "feature-rx-copy", NULL, "%u", &feature_rx_copy);
 	if (error)
 		feature_rx_copy = 0;
 	error = xs_scanf(XST_NIL, xenbus_get_otherend_path(np->xbdev),
 	    "feature-rx-flip", NULL, "%u", &feature_rx_flip);
 	if (error)
 		feature_rx_flip = 1;
 
 	/*
 	 * Copy packets on receive path if:
 	 *  (a) This was requested by user, and the backend supports it; or
 	 *  (b) Flipping was requested, but this is unsupported by the backend.
 	 */
 	np->copying_receiver = ((MODPARM_rx_copy && feature_rx_copy) ||
 				(MODPARM_rx_flip && !feature_rx_flip));
 
 	/* Recovery procedure: */
 	error = talk_to_backend(np->xbdev, np);
 	if (error) 
 		return (error);
 	
 	/* Step 1: Reinitialise variables. */
 	xn_query_features(np);
 	xn_configure_features(np);
 	netif_release_tx_bufs(np);
 
 	/* Step 2: Rebuild the RX buffer freelist and the RX ring itself. */
 	for (requeue_idx = 0, i = 0; i < NET_RX_RING_SIZE; i++) {
 		struct mbuf *m;
 		u_long pfn;
 
 		if (np->rx_mbufs[i] == NULL)
 			continue;
 
 		m = np->rx_mbufs[requeue_idx] = xennet_get_rx_mbuf(np, i);
 		ref = np->grant_rx_ref[requeue_idx] = xennet_get_rx_ref(np, i);
 
 		req = RING_GET_REQUEST(&np->rx, requeue_idx);
 		pfn = vtophys(mtod(m, vm_offset_t)) >> PAGE_SHIFT;
 
 		if (!np->copying_receiver) {
 			gnttab_grant_foreign_transfer_ref(ref,
 			    xenbus_get_otherend_id(np->xbdev),
 			    pfn);
 		} else {
 			gnttab_grant_foreign_access_ref(ref,
 			    xenbus_get_otherend_id(np->xbdev),
 			    PFNTOMFN(pfn), 0);
 		}
 		req->gref = ref;
 		req->id   = requeue_idx;
 
 		requeue_idx++;
 	}
 
 	np->rx.req_prod_pvt = requeue_idx;
 	
 	/* Step 3: All public and private state should now be sane.  Get
 	 * ready to start sending and receiving packets and give the driver
 	 * domain a kick because we've probably just requeued some
 	 * packets.
 	 */
 	netfront_carrier_on(np);
-	notify_remote_via_irq(np->irq);
+	xen_intr_signal(np->xen_intr_handle);
 	XN_TX_LOCK(np);
 	xn_txeof(np);
 	XN_TX_UNLOCK(np);
 	network_alloc_rx_buffers(np);
 
 	return (0);
 }
 
 static void 
 show_device(struct netfront_info *sc)
 {
 #ifdef DEBUG
 	if (sc) {
 		IPRINTK("<vif handle=%u %s(%s) evtchn=%u irq=%u tx=%p rx=%p>\n",
 			sc->xn_ifno,
 			be_state_name[sc->xn_backend_state],
 			sc->xn_user_state ? "open" : "closed",
 			sc->xn_evtchn,
 			sc->xn_irq,
 			sc->xn_tx_if,
 			sc->xn_rx_if);
 	} else {
 		IPRINTK("<vif NULL>\n");
 	}
 #endif
 }
 
 static void
 xn_query_features(struct netfront_info *np)
 {
 	int val;
 
 	device_printf(np->xbdev, "backend features:");
 
 	if (xs_scanf(XST_NIL, xenbus_get_otherend_path(np->xbdev),
 		"feature-sg", NULL, "%d", &val) < 0)
 		val = 0;
 
 	np->maxfrags = 1;
 	if (val) {
 		np->maxfrags = MAX_TX_REQ_FRAGS;
 		printf(" feature-sg");
 	}
 
 	if (xs_scanf(XST_NIL, xenbus_get_otherend_path(np->xbdev),
 		"feature-gso-tcpv4", NULL, "%d", &val) < 0)
 		val = 0;
 
 	np->xn_ifp->if_capabilities &= ~(IFCAP_TSO4|IFCAP_LRO);
 	if (val) {
 		np->xn_ifp->if_capabilities |= IFCAP_TSO4|IFCAP_LRO;
 		printf(" feature-gso-tcp4");
 	}
 
 	printf("\n");
 }
 
 static int
 xn_configure_features(struct netfront_info *np)
 {
 	int err;
 
 	err = 0;
 #if __FreeBSD_version >= 700000
 	if ((np->xn_ifp->if_capenable & IFCAP_LRO) != 0)
 		tcp_lro_free(&np->xn_lro);
 #endif
     	np->xn_ifp->if_capenable =
 	    np->xn_ifp->if_capabilities & ~(IFCAP_LRO|IFCAP_TSO4);
 	np->xn_ifp->if_hwassist &= ~CSUM_TSO;
 #if __FreeBSD_version >= 700000
 	if (xn_enable_lro && (np->xn_ifp->if_capabilities & IFCAP_LRO) != 0) {
 		err = tcp_lro_init(&np->xn_lro);
 		if (err) {
 			device_printf(np->xbdev, "LRO initialization failed\n");
 		} else {
 			np->xn_lro.ifp = np->xn_ifp;
 			np->xn_ifp->if_capenable |= IFCAP_LRO;
 		}
 	}
 	if ((np->xn_ifp->if_capabilities & IFCAP_TSO4) != 0) {
 		np->xn_ifp->if_capenable |= IFCAP_TSO4;
 		np->xn_ifp->if_hwassist |= CSUM_TSO;
 	}
 #endif
 	return (err);
 }
 
-/** Create a network device.
- * @param handle device handle
+/**
+ * Create a network device.
+ * @param dev  Newbus device representing this virtual NIC.
  */
 int 
 create_netdev(device_t dev)
 {
 	int i;
 	struct netfront_info *np;
 	int err;
 	struct ifnet *ifp;
 
 	np = device_get_softc(dev);
 	
 	np->xbdev         = dev;
     
 	XN_LOCK_INIT(np, xennetif);
 
 	ifmedia_init(&np->sc_media, 0, xn_ifmedia_upd, xn_ifmedia_sts);
 	ifmedia_add(&np->sc_media, IFM_ETHER|IFM_MANUAL, 0, NULL);
 	ifmedia_set(&np->sc_media, IFM_ETHER|IFM_MANUAL);
 
 	np->rx_target     = RX_MIN_TARGET;
 	np->rx_min_target = RX_MIN_TARGET;
 	np->rx_max_target = RX_MAX_TARGET;
 
 	/* Initialise {tx,rx}_skbs to be a free chain containing every entry. */
 	for (i = 0; i <= NET_TX_RING_SIZE; i++) {
 		np->tx_mbufs[i] = (void *) ((u_long) i+1);
 		np->grant_tx_ref[i] = GRANT_REF_INVALID;	
 	}
 	np->tx_mbufs[NET_TX_RING_SIZE] = (void *)0;
 
 	for (i = 0; i <= NET_RX_RING_SIZE; i++) {
 
 		np->rx_mbufs[i] = NULL;
 		np->grant_rx_ref[i] = GRANT_REF_INVALID;
 	}
 	/* A grant for every tx ring slot */
 	if (gnttab_alloc_grant_references(NET_TX_RING_SIZE,
 					  &np->gref_tx_head) != 0) {
 		IPRINTK("#### netfront can't alloc tx grant refs\n");
 		err = ENOMEM;
 		goto exit;
 	}
 	/* A grant for every rx ring slot */
 	if (gnttab_alloc_grant_references(RX_MAX_TARGET,
 					  &np->gref_rx_head) != 0) {
 		WPRINTK("#### netfront can't alloc rx grant refs\n");
 		gnttab_free_grant_references(np->gref_tx_head);
 		err = ENOMEM;
 		goto exit;
 	}
 	
 	err = xen_net_read_mac(dev, np->mac);
 	if (err)
 		goto out;
 	
 	/* Set up ifnet structure */
 	ifp = np->xn_ifp = if_alloc(IFT_ETHER);
     	ifp->if_softc = np;
     	if_initname(ifp, "xn",  device_get_unit(dev));
     	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
     	ifp->if_ioctl = xn_ioctl;
     	ifp->if_output = ether_output;
     	ifp->if_start = xn_start;
 #ifdef notyet
     	ifp->if_watchdog = xn_watchdog;
 #endif
     	ifp->if_init = xn_ifinit;
     	ifp->if_snd.ifq_maxlen = NET_TX_RING_SIZE - 1;
 	
     	ifp->if_hwassist = XN_CSUM_FEATURES;
     	ifp->if_capabilities = IFCAP_HWCSUM;
 	ifp->if_hw_tsomax = NF_TSO_MAXBURST;
 	
     	ether_ifattach(ifp, np->mac);
     	callout_init(&np->xn_stat_ch, CALLOUT_MPSAFE);
 	netfront_carrier_off(np);
 
 	return (0);
 
 exit:
 	gnttab_free_grant_references(np->gref_tx_head);
 out:
 	return (err);
 }
 
 /**
  * Handle the change of state of the backend to Closing.  We must delete our
  * device-layer structures now, to ensure that writes are flushed through to
  * the backend.  Once is this done, we can switch to Closed in
  * acknowledgement.
  */
 #if 0
 static void
 netfront_closing(device_t dev)
 {
 #if 0
 	struct netfront_info *info = dev->dev_driver_data;
 
 	DPRINTK("netfront_closing: %s removed\n", dev->nodename);
 
 	close_netdev(info);
 #endif
 	xenbus_switch_state(dev, XenbusStateClosed);
 }
 #endif
 
 static int
 netfront_detach(device_t dev)
 {
 	struct netfront_info *info = device_get_softc(dev);
 
 	DPRINTK("%s\n", xenbus_get_node(dev));
 
 	netif_free(info);
 
 	return 0;
 }
 
 static void
 netif_free(struct netfront_info *info)
 {
 	XN_LOCK(info);
 	xn_stop(info);
 	XN_UNLOCK(info);
 	callout_drain(&info->xn_stat_ch);
 	netif_disconnect_backend(info);
 	if (info->xn_ifp != NULL) {
 		ether_ifdetach(info->xn_ifp);
 		if_free(info->xn_ifp);
 		info->xn_ifp = NULL;
 	}
 	ifmedia_removeall(&info->sc_media);
 }
 
 static void
 netif_disconnect_backend(struct netfront_info *info)
 {
 	XN_RX_LOCK(info);
 	XN_TX_LOCK(info);
 	netfront_carrier_off(info);
 	XN_TX_UNLOCK(info);
 	XN_RX_UNLOCK(info);
 
 	free_ring(&info->tx_ring_ref, &info->tx.sring);
 	free_ring(&info->rx_ring_ref, &info->rx.sring);
 
-	if (info->irq)
-		unbind_from_irqhandler(info->irq);
-
-	info->irq = 0;
+	xen_intr_unbind(&info->xen_intr_handle);
 }
 
 static void
 free_ring(int *ref, void *ring_ptr_ref)
 {
 	void **ring_ptr_ptr = ring_ptr_ref;
 
 	if (*ref != GRANT_REF_INVALID) {
 		/* This API frees the associated storage. */
 		gnttab_end_foreign_access(*ref, *ring_ptr_ptr);
 		*ref = GRANT_REF_INVALID;
 	}
 	*ring_ptr_ptr = NULL;
 }
 
 static int
 xn_ifmedia_upd(struct ifnet *ifp)
 {
 	return (0);
 }
 
 static void
 xn_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr)
 {
 	ifmr->ifm_status = IFM_AVALID|IFM_ACTIVE;
 	ifmr->ifm_active = IFM_ETHER|IFM_MANUAL;
 }
 
 /* ** Driver registration ** */
 static device_method_t netfront_methods[] = { 
 	/* Device interface */ 
 	DEVMETHOD(device_probe,         netfront_probe), 
 	DEVMETHOD(device_attach,        netfront_attach), 
 	DEVMETHOD(device_detach,        netfront_detach), 
 	DEVMETHOD(device_shutdown,      bus_generic_shutdown), 
 	DEVMETHOD(device_suspend,       netfront_suspend), 
 	DEVMETHOD(device_resume,        netfront_resume), 
  
 	/* Xenbus interface */
 	DEVMETHOD(xenbus_otherend_changed, netfront_backend_changed),
 
 	DEVMETHOD_END
 }; 
 
 static driver_t netfront_driver = { 
 	"xn", 
 	netfront_methods, 
 	sizeof(struct netfront_info),                      
 }; 
 devclass_t netfront_devclass; 
  
 DRIVER_MODULE(xe, xenbusb_front, netfront_driver, netfront_devclass, NULL,
     NULL); 
diff --git a/sys/dev/xen/xenpci/evtchn.c b/sys/dev/xen/xenpci/evtchn.c
deleted file mode 100644
index 2d9dd6d664d1..000000000000
--- a/sys/dev/xen/xenpci/evtchn.c
+++ /dev/null
@@ -1,467 +0,0 @@
-/******************************************************************************
- * evtchn.c
- *
- * A simplified event channel for para-drivers in unmodified linux
- *
- * Copyright (c) 2002-2005, K A Fraser
- * Copyright (c) 2005, Intel Corporation <xiaofeng.ling@intel.com>
- *
- * This file may be distributed separately from the Linux kernel, or
- * incorporated into other software packages, subject to the following license:
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this source file (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use, copy, modify,
- * merge, publish, distribute, sublicense, and/or sell copies of the Software,
- * and to permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-
-#include <sys/param.h>
-#include <sys/systm.h>
-#include <sys/bus.h>
-#include <sys/malloc.h>
-#include <sys/kernel.h>
-#include <sys/limits.h>
-#include <sys/lock.h>
-#include <sys/mutex.h>
-#include <sys/interrupt.h>
-#include <sys/pcpu.h>
-
-#include <machine/xen/xen-os.h>
-#include <machine/xen/xenvar.h>
-#include <xen/hypervisor.h>
-#include <xen/xen_intr.h>
-#include <xen/evtchn.h>
-#include <sys/smp.h>
-
-#include <dev/xen/xenpci/xenpcivar.h>
-
-#if defined(__i386__)
-#define	__ffs(word)	(ffs(word) - 1)
-#elif defined(__amd64__)
-static inline unsigned long __ffs(unsigned long word)
-{
-        __asm__("bsfq %1,%0"
-                :"=r" (word)
-                :"rm" (word));	/* XXXRW: why no "cc"? */
-        return word;
-}
-#else
-#error "evtchn: unsupported architecture"
-#endif
-
-#define is_valid_evtchn(x)	((x) != 0)
-#define evtchn_from_irq(x)	(irq_evtchn[irq].evtchn)
-
-static struct {
-	struct mtx lock;
-	driver_intr_t *handler;
-	void *arg;
-	int evtchn;
-	int close:1; /* close on unbind_from_irqhandler()? */
-	int inuse:1;
-	int in_handler:1;
-	int mpsafe:1;
-} irq_evtchn[256];
-static int evtchn_to_irq[NR_EVENT_CHANNELS] = {
-	[0 ...  NR_EVENT_CHANNELS-1] = -1 };
-
-static struct mtx irq_alloc_lock;
-static device_t xenpci_device;
-
-#define ARRAY_SIZE(a)	(sizeof(a) / sizeof(a[0]))
-
-static unsigned int
-alloc_xen_irq(void)
-{
-	static int warned;
-	unsigned int irq;
-
-	mtx_lock(&irq_alloc_lock);
-
-	for (irq = 1; irq < ARRAY_SIZE(irq_evtchn); irq++) {
-		if (irq_evtchn[irq].inuse) 
-			continue;
-		irq_evtchn[irq].inuse = 1;
-		mtx_unlock(&irq_alloc_lock);
-		return irq;
-	}
-
-	if (!warned) {
-		warned = 1;
-		printf("alloc_xen_irq: No available IRQ to bind to: "
-		       "increase irq_evtchn[] size in evtchn.c.\n");
-	}
-
-	mtx_unlock(&irq_alloc_lock);
-
-	return -ENOSPC;
-}
-
-static void
-free_xen_irq(int irq)
-{
-
-	mtx_lock(&irq_alloc_lock);
-	irq_evtchn[irq].inuse = 0;
-	mtx_unlock(&irq_alloc_lock);
-}
-
-int
-irq_to_evtchn_port(int irq)
-{
-
-	return irq_evtchn[irq].evtchn;
-}
-
-void
-mask_evtchn(int port)
-{
-	shared_info_t *s = HYPERVISOR_shared_info;
-
-	synch_set_bit(port, &s->evtchn_mask[0]);
-}
-
-void
-unmask_evtchn(int port)
-{
-	evtchn_unmask_t op = { .port = port };
-
-	HYPERVISOR_event_channel_op(EVTCHNOP_unmask, &op);
-}
-
-int
-bind_listening_port_to_irqhandler(unsigned int remote_domain,
-    const char *devname, driver_intr_t handler, void *arg,
-    unsigned long irqflags, unsigned int *irqp)
-{
-	struct evtchn_alloc_unbound alloc_unbound;
-	unsigned int irq;
-	int error;
-
-	irq = alloc_xen_irq();
-	if (irq < 0)
-		return irq;
-
-	mtx_lock(&irq_evtchn[irq].lock);
-
-	alloc_unbound.dom        = DOMID_SELF;
-	alloc_unbound.remote_dom = remote_domain;
-	error = HYPERVISOR_event_channel_op(EVTCHNOP_alloc_unbound,
-					  &alloc_unbound);
-	if (error) {
-		mtx_unlock(&irq_evtchn[irq].lock);
-		free_xen_irq(irq);
-		return (-error);
-	}
-
-	irq_evtchn[irq].handler = handler;
-	irq_evtchn[irq].arg     = arg;
-	irq_evtchn[irq].evtchn  = alloc_unbound.port;
-	irq_evtchn[irq].close   = 1;
-	irq_evtchn[irq].mpsafe  = (irqflags & INTR_MPSAFE) != 0;
-
-	evtchn_to_irq[alloc_unbound.port] = irq;
-
-	unmask_evtchn(alloc_unbound.port);
-
-	mtx_unlock(&irq_evtchn[irq].lock);
-
-	if (irqp)
-		*irqp = irq;
-	return (0);
-}
-
-int 
-bind_interdomain_evtchn_to_irqhandler(unsigned int remote_domain,
-    unsigned int remote_port, const char *devname, driver_intr_t handler,
-    void *arg, unsigned long irqflags, unsigned int *irqp)
-{
-	struct evtchn_bind_interdomain bind_interdomain;
-	unsigned int irq;
-	int error;
-
-	irq = alloc_xen_irq();
-	if (irq < 0)
-		return irq;
-
-	mtx_lock(&irq_evtchn[irq].lock);
-
-	bind_interdomain.remote_dom  = remote_domain;
-	bind_interdomain.remote_port = remote_port;
-	error = HYPERVISOR_event_channel_op(EVTCHNOP_bind_interdomain,
-					    &bind_interdomain);
-	if (error) {
-		mtx_unlock(&irq_evtchn[irq].lock);
-		free_xen_irq(irq);
-		return (-error);
-	}
-
-	irq_evtchn[irq].handler = handler;
-	irq_evtchn[irq].arg     = arg;
-	irq_evtchn[irq].evtchn  = bind_interdomain.local_port;
-	irq_evtchn[irq].close   = 1;
-	irq_evtchn[irq].mpsafe  = (irqflags & INTR_MPSAFE) != 0;
-
-	evtchn_to_irq[bind_interdomain.local_port] = irq;
-
-	unmask_evtchn(bind_interdomain.local_port);
-
-	mtx_unlock(&irq_evtchn[irq].lock);
-
-	if (irqp)
-		*irqp = irq;
-	return (0);
-}
-
-
-int
-bind_caller_port_to_irqhandler(unsigned int caller_port,
-    const char *devname, driver_intr_t handler, void *arg,
-    unsigned long irqflags, unsigned int *irqp)
-{
-	unsigned int irq;
-
-	irq = alloc_xen_irq();
-	if (irq < 0)
-		return irq;
-
-	mtx_lock(&irq_evtchn[irq].lock);
-
-	irq_evtchn[irq].handler = handler;
-	irq_evtchn[irq].arg     = arg;
-	irq_evtchn[irq].evtchn  = caller_port;
-	irq_evtchn[irq].close   = 0;
-	irq_evtchn[irq].mpsafe  = (irqflags & INTR_MPSAFE) != 0;
-
-	evtchn_to_irq[caller_port] = irq;
-
-	unmask_evtchn(caller_port);
-
-	mtx_unlock(&irq_evtchn[irq].lock);
-
-	if (irqp)
-		*irqp = irq;
-	return (0);
-}
-
-void
-unbind_from_irqhandler(unsigned int irq)
-{
-	int evtchn;
-
-	mtx_lock(&irq_evtchn[irq].lock);
-
-	evtchn = evtchn_from_irq(irq);
-
-	if (is_valid_evtchn(evtchn)) {
-		evtchn_to_irq[evtchn] = -1;
-		mask_evtchn(evtchn);
-		if (irq_evtchn[irq].close) {
-			struct evtchn_close close = { .port = evtchn };
-			if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close))
-				panic("EVTCHNOP_close failed");
-		}
-	}
-
-	irq_evtchn[irq].handler = NULL;
-	irq_evtchn[irq].evtchn  = 0;
-
-	mtx_unlock(&irq_evtchn[irq].lock);
-
-	while (irq_evtchn[irq].in_handler)
-		cpu_relax();
-
-	free_xen_irq(irq);
-}
-
-void notify_remote_via_irq(int irq)
-{
-	int evtchn;
-
-	evtchn = evtchn_from_irq(irq);
-	if (is_valid_evtchn(evtchn))
-		notify_remote_via_evtchn(evtchn);
-}
-
-static inline unsigned long active_evtchns(unsigned int cpu, shared_info_t *sh,
-						unsigned int idx)
-{
-	return (sh->evtchn_pending[idx] & ~sh->evtchn_mask[idx]);
-}
-
-static void
-evtchn_interrupt(void *arg)
-{
-	unsigned int l1i, l2i, port;
-	unsigned long masked_l1, masked_l2;
-	/* XXX: All events are bound to vcpu0 but irq may be redirected. */
-	int cpu = 0; /*smp_processor_id();*/
-	driver_intr_t *handler;
-	void *handler_arg;
-	int irq, handler_mpsafe;
-	shared_info_t *s = HYPERVISOR_shared_info;
-	vcpu_info_t *v = &s->vcpu_info[cpu];
-	struct pcpu *pc = pcpu_find(cpu);
-	unsigned long l1, l2;
-
-	v->evtchn_upcall_pending = 0;
-
-#if 0
-#ifndef CONFIG_X86 /* No need for a barrier -- XCHG is a barrier on x86. */
-	/* Clear master flag /before/ clearing selector flag. */
-	wmb();
-#endif
-#endif
-
-	l1 = atomic_readandclear_long(&v->evtchn_pending_sel);
-
-	l1i = pc->pc_last_processed_l1i;
-	l2i = pc->pc_last_processed_l2i;
-
-	while (l1 != 0) {
-
-		l1i = (l1i + 1) % LONG_BIT;
-		masked_l1 = l1 & ((~0UL) << l1i);
-
-		if (masked_l1 == 0) { /* if we masked out all events, wrap around to the beginning */
-			l1i = LONG_BIT - 1;
-			l2i = LONG_BIT - 1;
-			continue;
-		}
-		l1i = __ffs(masked_l1);
-
-		do {
-			l2 = active_evtchns(cpu, s, l1i);
-
-			l2i = (l2i + 1) % LONG_BIT;
-			masked_l2 = l2 & ((~0UL) << l2i);
-
-			if (masked_l2 == 0) { /* if we masked out all events, move on */
-				l2i = LONG_BIT - 1;
-				break;
-			}
-			l2i = __ffs(masked_l2);
-
-			/* process port */
-			port = (l1i * LONG_BIT) + l2i;
-			synch_clear_bit(port, &s->evtchn_pending[0]);
-
-			irq = evtchn_to_irq[port];
-			if (irq < 0)
-				continue;
-
-			mtx_lock(&irq_evtchn[irq].lock);
-			handler = irq_evtchn[irq].handler;
-			handler_arg = irq_evtchn[irq].arg;
-			handler_mpsafe = irq_evtchn[irq].mpsafe;
-			if (unlikely(handler == NULL)) {
-				printf("Xen IRQ%d (port %d) has no handler!\n",
-				       irq, port);
-				mtx_unlock(&irq_evtchn[irq].lock);
-				continue;
-			}
-			irq_evtchn[irq].in_handler = 1;
-			mtx_unlock(&irq_evtchn[irq].lock);
-
-			//local_irq_enable();
-			if (!handler_mpsafe)
-				mtx_lock(&Giant);
-			handler(handler_arg);
-			if (!handler_mpsafe)
-				mtx_unlock(&Giant);
-			//local_irq_disable();
-
-			mtx_lock(&irq_evtchn[irq].lock);
-			irq_evtchn[irq].in_handler = 0;
-			mtx_unlock(&irq_evtchn[irq].lock);
-
-			/* if this is the final port processed, we'll pick up here+1 next time */
-			pc->pc_last_processed_l1i = l1i;
-			pc->pc_last_processed_l2i = l2i;
-
-		} while (l2i != LONG_BIT - 1);
-
-		l2 = active_evtchns(cpu, s, l1i);
-		if (l2 == 0) /* we handled all ports, so we can clear the selector bit */
-			l1 &= ~(1UL << l1i);
-	}
-}
-
-void
-irq_suspend(void)
-{
-	struct xenpci_softc *scp = device_get_softc(xenpci_device);
-
-	/*
-	 * Take our interrupt handler out of the list of handlers
-	 * that can handle this irq.
-	 */
-	if (scp->intr_cookie != NULL) {
-		if (BUS_TEARDOWN_INTR(device_get_parent(xenpci_device),
-			xenpci_device, scp->res_irq, scp->intr_cookie) != 0)
-			printf("intr teardown failed.. continuing\n");
-		scp->intr_cookie = NULL;
-	}
-}
-
-void
-irq_resume(void)
-{
-	struct xenpci_softc *scp = device_get_softc(xenpci_device);
-	int evtchn, irq;
-
-	for (evtchn = 0; evtchn < NR_EVENT_CHANNELS; evtchn++) {
-		mask_evtchn(evtchn);
-		evtchn_to_irq[evtchn] = -1;
-	}
-
-	for (irq = 0; irq < ARRAY_SIZE(irq_evtchn); irq++)
-		irq_evtchn[irq].evtchn = 0;
-
-	BUS_SETUP_INTR(device_get_parent(xenpci_device),
-	    xenpci_device, scp->res_irq, INTR_TYPE_MISC,
-	    NULL, evtchn_interrupt, NULL, &scp->intr_cookie);
-}
-
-int
-xenpci_irq_init(device_t device, struct xenpci_softc *scp)
-{
-	int irq, cpu;
-	int error;
-
-	mtx_init(&irq_alloc_lock, "xen-irq-lock", NULL, MTX_DEF);
-
-	for (irq = 0; irq < ARRAY_SIZE(irq_evtchn); irq++)
-		mtx_init(&irq_evtchn[irq].lock, "irq-evtchn", NULL, MTX_DEF);
-
-	for (cpu = 0; cpu < mp_ncpus; cpu++) {
-		pcpu_find(cpu)->pc_last_processed_l1i = LONG_BIT - 1;
-		pcpu_find(cpu)->pc_last_processed_l2i = LONG_BIT - 1;
-	}
-
-	error = BUS_SETUP_INTR(device_get_parent(device), device,
-	    scp->res_irq, INTR_MPSAFE|INTR_TYPE_MISC, NULL, evtchn_interrupt,
-	    NULL, &scp->intr_cookie);
-	if (error)
-		return (error);
-
-	xenpci_device = device;
-
-	return (0);
-}
diff --git a/sys/dev/xen/xenpci/xenpci.c b/sys/dev/xen/xenpci/xenpci.c
index 2d7467691577..0b1762df03b4 100644
--- a/sys/dev/xen/xenpci/xenpci.c
+++ b/sys/dev/xen/xenpci/xenpci.c
@@ -1,458 +1,362 @@
 /*
  * Copyright (c) 2008 Citrix Systems, Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/bus.h>
 #include <sys/kernel.h>
 #include <sys/malloc.h>
 #include <sys/module.h>
-#include <sys/proc.h>
-#include <sys/systm.h>
-#include <sys/time.h>
 
 #include <machine/bus.h>
 #include <machine/resource.h>
 #include <sys/rman.h>
 
 #include <machine/stdarg.h>
-#include <machine/xen/xen-os.h>
+
+#include <xen/xen-os.h>
 #include <xen/features.h>
 #include <xen/hypervisor.h>
-#include <xen/gnttab.h>
-#include <xen/xen_intr.h>
-#include <xen/interface/memory.h>
-#include <xen/interface/hvm/params.h>
+#include <xen/hvm.h>
 
 #include <dev/pci/pcireg.h>
 #include <dev/pci/pcivar.h>
 
-#include <vm/vm.h>
-#include <vm/vm_extern.h>
-#include <vm/vm_kern.h>
-#include <vm/pmap.h>
-
 #include <dev/xen/xenpci/xenpcivar.h>
 
-/*
- * These variables are used by the rest of the kernel to access the
- * hypervisor.
- */
-char *hypercall_stubs;
-shared_info_t *HYPERVISOR_shared_info;
-static vm_paddr_t shared_info_pa;
+extern void xen_intr_handle_upcall(struct trapframe *trap_frame);
+
 static device_t nexus;
 
 /*
  * This is used to find our platform device instance.
  */
 static devclass_t xenpci_devclass;
 
-/*
- * Return the CPUID base address for Xen functions.
- */
-static uint32_t
-xenpci_cpuid_base(void)
+static int
+xenpci_intr_filter(void *trap_frame)
 {
-	uint32_t base, regs[4];
-
-	for (base = 0x40000000; base < 0x40010000; base += 0x100) {
-		do_cpuid(base, regs);
-		if (!memcmp("XenVMMXenVMM", &regs[1], 12)
-		    && (regs[0] - base) >= 2)
-			return (base);
-	}
-	return (0);
+	xen_intr_handle_upcall(trap_frame);
+	return (FILTER_HANDLED);
 }
 
-/*
- * Allocate and fill in the hypcall page.
- */
 static int
-xenpci_init_hypercall_stubs(device_t dev, struct xenpci_softc * scp)
+xenpci_irq_init(device_t device, struct xenpci_softc *scp)
 {
-	uint32_t base, regs[4];
-	int i;
-
-	base = xenpci_cpuid_base();
-	if (!base) {
-		device_printf(dev, "Xen platform device but not Xen VMM\n");
-		return (EINVAL);
-	}
+	int error;
 
-	if (bootverbose) {
-		do_cpuid(base + 1, regs);
-		device_printf(dev, "Xen version %d.%d.\n",
-		    regs[0] >> 16, regs[0] & 0xffff);
-	}
+	error = BUS_SETUP_INTR(device_get_parent(device), device,
+			       scp->res_irq, INTR_MPSAFE|INTR_TYPE_MISC,
+			       xenpci_intr_filter, NULL, /*trap_frame*/NULL,
+			       &scp->intr_cookie);
+	if (error)
+		return error;
 
 	/*
-	 * Find the hypercall pages.
+	 * When using the PCI event delivery callback we cannot assign
+	 * events to specific vCPUs, so all events are delivered to vCPU#0 by
+	 * Xen. Since the PCI interrupt can fire on any CPU by default, we
+	 * need to bind it to vCPU#0 in order to ensure that
+	 * xen_intr_handle_upcall always gets called on vCPU#0.
 	 */
-	do_cpuid(base + 2, regs);
-	
-	hypercall_stubs = malloc(regs[0] * PAGE_SIZE, M_TEMP, M_WAITOK);
-
-	for (i = 0; i < regs[0]; i++) {
-		wrmsr(regs[1], vtophys(hypercall_stubs + i * PAGE_SIZE) + i);
-	}
+	error = BUS_BIND_INTR(device_get_parent(device), device,
+	                      scp->res_irq, 0);
+	if (error)
+		return error;
 
+	xen_hvm_set_callback(device);
 	return (0);
 }
 
-/*
- * After a resume, re-initialise the hypercall page.
- */
-static void
-xenpci_resume_hypercall_stubs(device_t dev, struct xenpci_softc * scp)
-{
-	uint32_t base, regs[4];
-	int i;
-
-	base = xenpci_cpuid_base();
-
-	do_cpuid(base + 2, regs);
-	for (i = 0; i < regs[0]; i++) {
-		wrmsr(regs[1], vtophys(hypercall_stubs + i * PAGE_SIZE) + i);
-	}
-}
-
-/*
- * Tell the hypervisor how to contact us for event channel callbacks.
- */
-static void
-xenpci_set_callback(device_t dev)
-{
-	int irq;
-	uint64_t callback;
-	struct xen_hvm_param xhp;
-
-	irq = pci_get_irq(dev);
-	if (irq < 16) {
-		callback = irq;
-	} else {
-		callback = (pci_get_intpin(dev) - 1) & 3;
-		callback |= pci_get_slot(dev) << 11;
-		callback |= 1ull << 56;
-	}
-
-	xhp.domid = DOMID_SELF;
-	xhp.index = HVM_PARAM_CALLBACK_IRQ;
-	xhp.value = callback;
-	if (HYPERVISOR_hvm_op(HVMOP_set_param, &xhp))
-		panic("Can't set evtchn callback");
-}
-
-
 /*
  * Deallocate anything allocated by xenpci_allocate_resources.
  */
 static int
 xenpci_deallocate_resources(device_t dev)
 {
 	struct xenpci_softc *scp = device_get_softc(dev);
 
 	if (scp->res_irq != 0) {
 		bus_deactivate_resource(dev, SYS_RES_IRQ,
 			scp->rid_irq, scp->res_irq);
 		bus_release_resource(dev, SYS_RES_IRQ,
 			scp->rid_irq, scp->res_irq);
 		scp->res_irq = 0;
 	}
 	if (scp->res_memory != 0) {
 		bus_deactivate_resource(dev, SYS_RES_MEMORY,
 			scp->rid_memory, scp->res_memory);
 		bus_release_resource(dev, SYS_RES_MEMORY,
 			scp->rid_memory, scp->res_memory);
 		scp->res_memory = 0;
 	}
 
 	return (0);
 }
 
 /*
  * Allocate irq and memory resources.
  */
 static int
 xenpci_allocate_resources(device_t dev)
 {
 	struct xenpci_softc *scp = device_get_softc(dev);
 
 	scp->res_irq = bus_alloc_resource_any(dev, SYS_RES_IRQ,
 			&scp->rid_irq, RF_SHAREABLE|RF_ACTIVE);
 	if (scp->res_irq == NULL) {
 		printf("xenpci Could not allocate irq.\n");
 		goto errexit;
 	}
 
 	scp->rid_memory = PCIR_BAR(1);
 	scp->res_memory = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
 			&scp->rid_memory, RF_ACTIVE);
 	if (scp->res_memory == NULL) {
 		printf("xenpci Could not allocate memory bar.\n");
 		goto errexit;
 	}
 
 	scp->phys_next = rman_get_start(scp->res_memory);
 
 	return (0);
 
 errexit:
 	/* Cleanup anything we may have assigned. */
 	xenpci_deallocate_resources(dev);
 	return (ENXIO); /* For want of a better idea. */
 }
 
 /*
  * Allocate a physical address range from our mmio region.
  */
 static int
 xenpci_alloc_space_int(struct xenpci_softc *scp, size_t sz,
     vm_paddr_t *pa)
 {
 
 	if (scp->phys_next + sz > rman_get_end(scp->res_memory)) {
 		return (ENOMEM);
 	}
 
 	*pa = scp->phys_next;
 	scp->phys_next += sz;
 
 	return (0);
 }
 
 /*
  * Allocate a physical address range from our mmio region.
  */
 int
 xenpci_alloc_space(size_t sz, vm_paddr_t *pa)
 {
 	device_t dev = devclass_get_device(xenpci_devclass, 0);
 
 	if (dev) {
 		return (xenpci_alloc_space_int(device_get_softc(dev),
 			sz, pa));
 	} else {
 		return (ENOMEM);
 	}
 }
 
 static struct resource *
 xenpci_alloc_resource(device_t dev, device_t child, int type, int *rid,
     u_long start, u_long end, u_long count, u_int flags)
 {
 	return (BUS_ALLOC_RESOURCE(nexus, child, type, rid, start,
 	    end, count, flags));
 }
 
 
 static int
 xenpci_release_resource(device_t dev, device_t child, int type, int rid,
     struct resource *r)
 {
 	return (BUS_RELEASE_RESOURCE(nexus, child, type, rid, r));
 }
 
 static int
 xenpci_activate_resource(device_t dev, device_t child, int type, int rid,
     struct resource *r)
 {
 	return (BUS_ACTIVATE_RESOURCE(nexus, child, type, rid, r));
 }
 
 static int
 xenpci_deactivate_resource(device_t dev, device_t child, int type,
     int rid, struct resource *r)
 {
 	return (BUS_DEACTIVATE_RESOURCE(nexus, child, type, rid, r));
 }
 
-/*
- * Called very early in the resume sequence - reinitialise the various
- * bits of Xen machinery including the hypercall page and the shared
- * info page.
- */
-void
-xenpci_resume()
-{
-	device_t dev = devclass_get_device(xenpci_devclass, 0);
-	struct xenpci_softc *scp = device_get_softc(dev);
-	struct xen_add_to_physmap xatp;
-
-	xenpci_resume_hypercall_stubs(dev, scp);
-
-	xatp.domid = DOMID_SELF;
-	xatp.idx = 0;
-	xatp.space = XENMAPSPACE_shared_info;
-	xatp.gpfn = shared_info_pa >> PAGE_SHIFT;
-	if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp))
-		panic("HYPERVISOR_memory_op failed");
-
-	pmap_kenter((vm_offset_t) HYPERVISOR_shared_info, shared_info_pa);
-
-	xenpci_set_callback(dev);
-
-	gnttab_resume();
-	irq_resume();
-}
-
 /*
  * Probe - just check device ID.
  */
 static int
 xenpci_probe(device_t dev)
 {
 
 	if (pci_get_devid(dev) != 0x00015853)
 		return (ENXIO);
 
 	device_set_desc(dev, "Xen Platform Device");
 	return (bus_generic_probe(dev));
 }
 
 /*
  * Attach - find resources and talk to Xen.
  */
 static int
 xenpci_attach(device_t dev)
 {
-	int error;
 	struct xenpci_softc *scp = device_get_softc(dev);
-	struct xen_add_to_physmap xatp;
-	vm_offset_t shared_va;
 	devclass_t dc;
+	int error;
 
 	/*
 	 * Find and record nexus0.  Since we are not really on the
 	 * PCI bus, all resource operations are directed to nexus
 	 * instead of through our parent.
 	 */
 	if ((dc = devclass_find("nexus"))  == 0
 	 || (nexus = devclass_get_device(dc, 0)) == 0) {
 		device_printf(dev, "unable to find nexus.");
 		return (ENOENT);
 	}
 
 	error = xenpci_allocate_resources(dev);
 	if (error) {
 		device_printf(dev, "xenpci_allocate_resources failed(%d).\n",
 		    error);
 		goto errexit;
 	}
 
-	error = xenpci_init_hypercall_stubs(dev, scp);
-	if (error) {
-		device_printf(dev, "xenpci_init_hypercall_stubs failed(%d).\n",
-		    error);
-		goto errexit;
-	}
-
-	setup_xen_features();
-
-	xenpci_alloc_space_int(scp, PAGE_SIZE, &shared_info_pa); 
-
-	xatp.domid = DOMID_SELF;
-	xatp.idx = 0;
-	xatp.space = XENMAPSPACE_shared_info;
-	xatp.gpfn = shared_info_pa >> PAGE_SHIFT;
-	if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp))
-		panic("HYPERVISOR_memory_op failed");
-
-	shared_va = kva_alloc(PAGE_SIZE);
-	pmap_kenter(shared_va, shared_info_pa);
-	HYPERVISOR_shared_info = (void *) shared_va;
-
 	/*
 	 * Hook the irq up to evtchn
 	 */
-	xenpci_irq_init(dev, scp);
-	xenpci_set_callback(dev);
+	error = xenpci_irq_init(dev, scp);
+	if (error) {
+		device_printf(dev, "xenpci_irq_init failed(%d).\n",
+			error);
+		goto errexit;
+	}
 
 	return (bus_generic_attach(dev));
 
 errexit:
 	/*
 	 * Undo anything we may have done.
 	 */
 	xenpci_deallocate_resources(dev);
 	return (error);
 }
 
 /*
  * Detach - reverse anything done by attach.
  */
 static int
 xenpci_detach(device_t dev)
 {
 	struct xenpci_softc *scp = device_get_softc(dev);
 	device_t parent = device_get_parent(dev);
 
 	/*
 	 * Take our interrupt handler out of the list of handlers
 	 * that can handle this irq.
 	 */
 	if (scp->intr_cookie != NULL) {
 		if (BUS_TEARDOWN_INTR(parent, dev,
 		    scp->res_irq, scp->intr_cookie) != 0)
 			device_printf(dev,
 			    "intr teardown failed.. continuing\n");
 		scp->intr_cookie = NULL;
 	}
 
 	/*
 	 * Deallocate any system resources we may have
 	 * allocated on behalf of this driver.
 	 */
 	return (xenpci_deallocate_resources(dev));
 }
 
+static int
+xenpci_suspend(device_t dev)
+{
+	struct xenpci_softc *scp = device_get_softc(dev);
+	device_t parent = device_get_parent(dev);
+
+	if (scp->intr_cookie != NULL) {
+		if (BUS_TEARDOWN_INTR(parent, dev, scp->res_irq,
+		    scp->intr_cookie) != 0)
+			printf("intr teardown failed.. continuing\n");
+		scp->intr_cookie = NULL;
+	}
+
+	return (bus_generic_suspend(dev));
+}
+
+static int
+xenpci_resume(device_t dev)
+{
+	struct xenpci_softc *scp = device_get_softc(dev);
+	device_t parent = device_get_parent(dev);
+
+	BUS_SETUP_INTR(parent, dev, scp->res_irq,
+	    INTR_MPSAFE|INTR_TYPE_MISC, xenpci_intr_filter, NULL,
+	    /*trap_frame*/NULL, &scp->intr_cookie);
+	xen_hvm_set_callback(dev);
+	return (bus_generic_resume(dev));
+}
+
 static device_method_t xenpci_methods[] = {
 	/* Device interface */
 	DEVMETHOD(device_probe,		xenpci_probe),
 	DEVMETHOD(device_attach,	xenpci_attach),
 	DEVMETHOD(device_detach,	xenpci_detach),
-	DEVMETHOD(device_suspend,	bus_generic_suspend),
-	DEVMETHOD(device_resume,	bus_generic_resume),
+	DEVMETHOD(device_suspend,	xenpci_suspend),
+	DEVMETHOD(device_resume,	xenpci_resume),
 
 	/* Bus interface */
 	DEVMETHOD(bus_add_child,	bus_generic_add_child),
 	DEVMETHOD(bus_alloc_resource,   xenpci_alloc_resource),
 	DEVMETHOD(bus_release_resource, xenpci_release_resource),
 	DEVMETHOD(bus_activate_resource, xenpci_activate_resource),
 	DEVMETHOD(bus_deactivate_resource, xenpci_deactivate_resource),
 
 	{ 0, 0 }
 };
 
 static driver_t xenpci_driver = {
 	"xenpci",
 	xenpci_methods,
 	sizeof(struct xenpci_softc),
 };
 
 DRIVER_MODULE(xenpci, pci, xenpci_driver, xenpci_devclass, 0, 0);
diff --git a/sys/dev/xen/xenpci/xenpcivar.h b/sys/dev/xen/xenpci/xenpcivar.h
index a57c080b31d3..527a291c8801 100644
--- a/sys/dev/xen/xenpci/xenpcivar.h
+++ b/sys/dev/xen/xenpci/xenpcivar.h
@@ -1,44 +1,43 @@
 /*
  * Copyright (c) 2008 Citrix Systems, Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
+ *
+ * $FreeBSD$
  */
 
 /*
  * One of these per allocated device.
  */
 struct xenpci_softc {
 	int rid_ioport;
 	int rid_memory;
 	int rid_irq;
 	struct resource* res_memory;	/* Resource for mem range. */
 	struct resource* res_irq;	/* Resource for irq range. */
 	void	*intr_cookie;
 
 	vm_paddr_t phys_next;		/* next page from mem range */
 };
 
-extern int xenpci_irq_init(device_t device, struct xenpci_softc *scp);
 extern int xenpci_alloc_space(size_t sz, vm_paddr_t *pa);
-extern void xenpci_resume(void);
-extern void xen_suspend(void);
diff --git a/sys/i386/i386/apic_vector.s b/sys/i386/i386/apic_vector.s
index 618436d830ac..adedbc4999f6 100644
--- a/sys/i386/i386/apic_vector.s
+++ b/sys/i386/i386/apic_vector.s
@@ -1,396 +1,415 @@
 /*-
  * Copyright (c) 1989, 1990 William F. Jolitz.
  * Copyright (c) 1990 The Regents of the University of California.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	from: vector.s, 386BSD 0.1 unknown origin
  * $FreeBSD$
  */
 
 /*
  * Interrupt entry points for external interrupts triggered by I/O APICs
  * as well as IPI handlers.
  */
 
 #include "opt_smp.h"
 
 #include <machine/asmacros.h>
 #include <x86/apicreg.h>
 
 #include "assym.s"
 
 /*
  * I/O Interrupt Entry Point.  Rather than having one entry point for
  * each interrupt source, we use one entry point for each 32-bit word
  * in the ISR.  The handler determines the highest bit set in the ISR,
  * translates that into a vector, and passes the vector to the
  * lapic_handle_intr() function.
  */
 #define	ISR_VEC(index, vec_name)					\
 	.text ;								\
 	SUPERALIGN_TEXT ;						\
 IDTVEC(vec_name) ;							\
 	PUSH_FRAME ;							\
 	SET_KERNEL_SREGS ;						\
 	cld ;								\
 	FAKE_MCOUNT(TF_EIP(%esp)) ;					\
 	movl	lapic, %edx ;	/* pointer to local APIC */		\
 	movl	LA_ISR + 16 * (index)(%edx), %eax ;	/* load ISR */	\
 	bsrl	%eax, %eax ;	/* index of highest set bit in ISR */	\
 	jz	1f ;							\
 	addl	$(32 * index),%eax ;					\
 	pushl	%esp		;                                       \
 	pushl	%eax ;		/* pass the IRQ */			\
 	call	lapic_handle_intr ;					\
 	addl	$8, %esp ;	/* discard parameter */			\
 1: ;									\
 	MEXITCOUNT ;							\
 	jmp	doreti
 
 /*
  * Handle "spurious INTerrupts".
  * Notes:
  *  This is different than the "spurious INTerrupt" generated by an
  *   8259 PIC for missing INTs.  See the APIC documentation for details.
  *  This routine should NOT do an 'EOI' cycle.
  */
 	.text
 	SUPERALIGN_TEXT
 IDTVEC(spuriousint)
 
 	/* No EOI cycle used here */
 
 	iret
 
 	ISR_VEC(1, apic_isr1)
 	ISR_VEC(2, apic_isr2)
 	ISR_VEC(3, apic_isr3)
 	ISR_VEC(4, apic_isr4)
 	ISR_VEC(5, apic_isr5)
 	ISR_VEC(6, apic_isr6)
 	ISR_VEC(7, apic_isr7)
 
 /*
  * Local APIC periodic timer handler.
  */
 	.text
 	SUPERALIGN_TEXT
 IDTVEC(timerint)
 	PUSH_FRAME
 	SET_KERNEL_SREGS
 	cld
 	FAKE_MCOUNT(TF_EIP(%esp))
 	pushl	%esp
 	call	lapic_handle_timer
 	add	$4, %esp
 	MEXITCOUNT
 	jmp	doreti
 
 /*
  * Local APIC CMCI handler.
  */
 	.text
 	SUPERALIGN_TEXT
 IDTVEC(cmcint)
 	PUSH_FRAME
 	SET_KERNEL_SREGS
 	cld
 	FAKE_MCOUNT(TF_EIP(%esp))
 	call	lapic_handle_cmc
 	MEXITCOUNT
 	jmp	doreti
 
 /*
  * Local APIC error interrupt handler.
  */
 	.text
 	SUPERALIGN_TEXT
 IDTVEC(errorint)
 	PUSH_FRAME
 	SET_KERNEL_SREGS
 	cld
 	FAKE_MCOUNT(TF_EIP(%esp))
 	call	lapic_handle_error
 	MEXITCOUNT
 	jmp	doreti
 
+#ifdef XENHVM
+/*
+ * Xen event channel upcall interrupt handler.
+ * Only used when the hypervisor supports direct vector callbacks.
+ */
+	.text
+	SUPERALIGN_TEXT
+IDTVEC(xen_intr_upcall)
+	PUSH_FRAME
+	SET_KERNEL_SREGS
+	cld
+	FAKE_MCOUNT(TF_EIP(%esp))
+	pushl	%esp
+	call	xen_intr_handle_upcall
+	add	$4, %esp
+	MEXITCOUNT
+	jmp	doreti
+#endif
+
 #ifdef SMP
 /*
  * Global address space TLB shootdown.
  */
 	.text
 	SUPERALIGN_TEXT
 IDTVEC(invltlb)
 	pushl	%eax
 	pushl	%ds
 	movl	$KDSEL, %eax		/* Kernel data selector */
 	movl	%eax, %ds
 
 #if defined(COUNT_XINVLTLB_HITS) || defined(COUNT_IPIS)
 	pushl	%fs
 	movl	$KPSEL, %eax		/* Private space selector */
 	movl	%eax, %fs
 	movl	PCPU(CPUID), %eax
 	popl	%fs
 #ifdef COUNT_XINVLTLB_HITS
 	incl	xhits_gbl(,%eax,4)
 #endif
 #ifdef COUNT_IPIS
 	movl	ipi_invltlb_counts(,%eax,4),%eax
 	incl	(%eax)
 #endif
 #endif
 
 	movl	%cr3, %eax		/* invalidate the TLB */
 	movl	%eax, %cr3
 
 	movl	lapic, %eax
 	movl	$0, LA_EOI(%eax)	/* End Of Interrupt to APIC */
 
 	lock
 	incl	smp_tlb_wait
 
 	popl	%ds
 	popl	%eax
 	iret
 
 /*
  * Single page TLB shootdown
  */
 	.text
 	SUPERALIGN_TEXT
 IDTVEC(invlpg)
 	pushl	%eax
 	pushl	%ds
 	movl	$KDSEL, %eax		/* Kernel data selector */
 	movl	%eax, %ds
 
 #if defined(COUNT_XINVLTLB_HITS) || defined(COUNT_IPIS)
 	pushl	%fs
 	movl	$KPSEL, %eax		/* Private space selector */
 	movl	%eax, %fs
 	movl	PCPU(CPUID), %eax
 	popl	%fs
 #ifdef COUNT_XINVLTLB_HITS
 	incl	xhits_pg(,%eax,4)
 #endif
 #ifdef COUNT_IPIS
 	movl	ipi_invlpg_counts(,%eax,4),%eax
 	incl	(%eax)
 #endif
 #endif
 
 	movl	smp_tlb_addr1, %eax
 	invlpg	(%eax)			/* invalidate single page */
 
 	movl	lapic, %eax
 	movl	$0, LA_EOI(%eax)	/* End Of Interrupt to APIC */
 
 	lock
 	incl	smp_tlb_wait
 
 	popl	%ds
 	popl	%eax
 	iret
 
 /*
  * Page range TLB shootdown.
  */
 	.text
 	SUPERALIGN_TEXT
 IDTVEC(invlrng)
 	pushl	%eax
 	pushl	%edx
 	pushl	%ds
 	movl	$KDSEL, %eax		/* Kernel data selector */
 	movl	%eax, %ds
 
 #if defined(COUNT_XINVLTLB_HITS) || defined(COUNT_IPIS)
 	pushl	%fs
 	movl	$KPSEL, %eax		/* Private space selector */
 	movl	%eax, %fs
 	movl	PCPU(CPUID), %eax
 	popl	%fs
 #ifdef COUNT_XINVLTLB_HITS
 	incl	xhits_rng(,%eax,4)
 #endif
 #ifdef COUNT_IPIS
 	movl	ipi_invlrng_counts(,%eax,4),%eax
 	incl	(%eax)
 #endif
 #endif
 
 	movl	smp_tlb_addr1, %edx
 	movl	smp_tlb_addr2, %eax
 1:	invlpg	(%edx)			/* invalidate single page */
 	addl	$PAGE_SIZE, %edx
 	cmpl	%eax, %edx
 	jb	1b
 
 	movl	lapic, %eax
 	movl	$0, LA_EOI(%eax)	/* End Of Interrupt to APIC */
 
 	lock
 	incl	smp_tlb_wait
 
 	popl	%ds
 	popl	%edx
 	popl	%eax
 	iret
 
 /*
  * Invalidate cache.
  */
 	.text
 	SUPERALIGN_TEXT
 IDTVEC(invlcache)
 	pushl	%eax
 	pushl	%ds
 	movl	$KDSEL, %eax		/* Kernel data selector */
 	movl	%eax, %ds
 
 #ifdef COUNT_IPIS
 	pushl	%fs
 	movl	$KPSEL, %eax		/* Private space selector */
 	movl	%eax, %fs
 	movl	PCPU(CPUID), %eax
 	popl	%fs
 	movl	ipi_invlcache_counts(,%eax,4),%eax
 	incl	(%eax)
 #endif
 
 	wbinvd
 
 	movl	lapic, %eax
 	movl	$0, LA_EOI(%eax)	/* End Of Interrupt to APIC */
 
 	lock
 	incl	smp_tlb_wait
 
 	popl	%ds
 	popl	%eax
 	iret
 
 /*
  * Handler for IPIs sent via the per-cpu IPI bitmap.
  */
 #ifndef XEN
 	.text
 	SUPERALIGN_TEXT
 IDTVEC(ipi_intr_bitmap_handler)	
 	PUSH_FRAME
 	SET_KERNEL_SREGS
 	cld
 
 	movl	lapic, %edx
 	movl	$0, LA_EOI(%edx)	/* End Of Interrupt to APIC */
 	
 	FAKE_MCOUNT(TF_EIP(%esp))
 
 	call	ipi_bitmap_handler
 	MEXITCOUNT
 	jmp	doreti
 #endif
 /*
  * Executed by a CPU when it receives an IPI_STOP from another CPU.
  */
 	.text
 	SUPERALIGN_TEXT
 IDTVEC(cpustop)
 	PUSH_FRAME
 	SET_KERNEL_SREGS
 	cld
 
 	movl	lapic, %eax
 	movl	$0, LA_EOI(%eax)	/* End Of Interrupt to APIC */
 
 	call	cpustop_handler
 
 	POP_FRAME
 	iret
 
 /*
  * Executed by a CPU when it receives an IPI_SUSPEND from another CPU.
  */
 #ifndef XEN
 	.text
 	SUPERALIGN_TEXT
 IDTVEC(cpususpend)
 	PUSH_FRAME
 	SET_KERNEL_SREGS
 	cld
 
 	movl	lapic, %eax
 	movl	$0, LA_EOI(%eax)	/* End Of Interrupt to APIC */
 
 	call	cpususpend_handler
 
 	POP_FRAME
 	jmp	doreti_iret
 #endif
 
 /*
  * Executed by a CPU when it receives a RENDEZVOUS IPI from another CPU.
  *
  * - Calls the generic rendezvous action function.
  */
 	.text
 	SUPERALIGN_TEXT
 IDTVEC(rendezvous)
 	PUSH_FRAME
 	SET_KERNEL_SREGS
 	cld
 
 #ifdef COUNT_IPIS
 	movl	PCPU(CPUID), %eax
 	movl	ipi_rendezvous_counts(,%eax,4), %eax
 	incl	(%eax)
 #endif
 	call	smp_rendezvous_action
 
 	movl	lapic, %eax
 	movl	$0, LA_EOI(%eax)	/* End Of Interrupt to APIC */
 	POP_FRAME
 	iret
 	
 /*
  * Clean up when we lose out on the lazy context switch optimization.
  * ie: when we are about to release a PTD but a cpu is still borrowing it.
  */
 	SUPERALIGN_TEXT
 IDTVEC(lazypmap)
 	PUSH_FRAME
 	SET_KERNEL_SREGS
 	cld
 
 	call	pmap_lazyfix_action
 
 	movl	lapic, %eax
 	movl	$0, LA_EOI(%eax)	/* End Of Interrupt to APIC */
 	POP_FRAME
 	iret
 #endif /* SMP */
diff --git a/sys/i386/i386/machdep.c b/sys/i386/i386/machdep.c
index 9a710e57f969..c43031644f8b 100644
--- a/sys/i386/i386/machdep.c
+++ b/sys/i386/i386/machdep.c
@@ -1,3730 +1,3743 @@
 /*-
  * Copyright (c) 1992 Terrence R. Lambert.
  * Copyright (c) 1982, 1987, 1990 The Regents of the University of California.
  * All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * William Jolitz.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	from: @(#)machdep.c	7.4 (Berkeley) 6/3/91
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_apic.h"
 #include "opt_atalk.h"
 #include "opt_atpic.h"
 #include "opt_compat.h"
 #include "opt_cpu.h"
 #include "opt_ddb.h"
 #include "opt_inet.h"
 #include "opt_ipx.h"
 #include "opt_isa.h"
 #include "opt_kstack_pages.h"
 #include "opt_maxmem.h"
 #include "opt_mp_watchdog.h"
 #include "opt_npx.h"
 #include "opt_perfmon.h"
 #include "opt_platform.h"
 #include "opt_xbox.h"
 #include "opt_kdtrace.h"
 
 #include <sys/param.h>
 #include <sys/proc.h>
 #include <sys/systm.h>
 #include <sys/bio.h>
 #include <sys/buf.h>
 #include <sys/bus.h>
 #include <sys/callout.h>
 #include <sys/cons.h>
 #include <sys/cpu.h>
 #include <sys/eventhandler.h>
 #include <sys/exec.h>
 #include <sys/imgact.h>
 #include <sys/kdb.h>
 #include <sys/kernel.h>
 #include <sys/ktr.h>
 #include <sys/linker.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/memrange.h>
 #include <sys/msgbuf.h>
 #include <sys/mutex.h>
 #include <sys/pcpu.h>
 #include <sys/ptrace.h>
 #include <sys/reboot.h>
 #include <sys/rwlock.h>
 #include <sys/sched.h>
 #include <sys/signalvar.h>
 #ifdef SMP
 #include <sys/smp.h>
 #endif
 #include <sys/syscallsubr.h>
 #include <sys/sysctl.h>
 #include <sys/sysent.h>
 #include <sys/sysproto.h>
 #include <sys/ucontext.h>
 #include <sys/vmmeter.h>
 
 #include <vm/vm.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_page.h>
 #include <vm/vm_map.h>
 #include <vm/vm_object.h>
 #include <vm/vm_pager.h>
 #include <vm/vm_param.h>
 
 #ifdef DDB
 #ifndef KDB
 #error KDB must be enabled in order for DDB to work!
 #endif
 #include <ddb/ddb.h>
 #include <ddb/db_sym.h>
 #endif
 
 #include <isa/rtc.h>
 
 #include <net/netisr.h>
 
 #include <machine/bootinfo.h>
 #include <machine/clock.h>
 #include <machine/cpu.h>
 #include <machine/cputypes.h>
 #include <machine/intr_machdep.h>
 #include <x86/mca.h>
 #include <machine/md_var.h>
 #include <machine/metadata.h>
 #include <machine/mp_watchdog.h>
 #include <machine/pc/bios.h>
 #include <machine/pcb.h>
 #include <machine/pcb_ext.h>
 #include <machine/proc.h>
 #include <machine/reg.h>
 #include <machine/sigframe.h>
 #include <machine/specialreg.h>
 #include <machine/vm86.h>
 #ifdef PERFMON
 #include <machine/perfmon.h>
 #endif
 #ifdef SMP
 #include <machine/smp.h>
 #endif
 #ifdef FDT
 #include <x86/fdt.h>
 #endif
 
 #ifdef DEV_APIC
 #include <machine/apicvar.h>
 #endif
 
 #ifdef DEV_ISA
 #include <x86/isa/icu.h>
 #endif
 
 #ifdef XBOX
 #include <machine/xbox.h>
 
 int arch_i386_is_xbox = 0;
 uint32_t arch_i386_xbox_memsize = 0;
 #endif
 
 #ifdef XEN
 /* XEN includes */
-#include <machine/xen/xen-os.h>
+#include <xen/xen-os.h>
 #include <xen/hypervisor.h>
-#include <machine/xen/xen-os.h>
 #include <machine/xen/xenvar.h>
 #include <machine/xen/xenfunc.h>
 #include <xen/xen_intr.h>
 
 void Xhypervisor_callback(void);
 void failsafe_callback(void);
 
 extern trap_info_t trap_table[];
 struct proc_ldt default_proc_ldt;
 extern int init_first;
 int running_xen = 1;
 extern unsigned long physfree;
 #endif /* XEN */
 
 /* Sanity check for __curthread() */
 CTASSERT(offsetof(struct pcpu, pc_curthread) == 0);
 
 extern void init386(int first);
 extern void dblfault_handler(void);
 
 extern void printcpuinfo(void);	/* XXX header file */
 extern void finishidentcpu(void);
 extern void panicifcpuunsupported(void);
 
 #define	CS_SECURE(cs)		(ISPL(cs) == SEL_UPL)
 #define	EFL_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
 
 #if !defined(CPU_DISABLE_SSE) && defined(I686_CPU)
 #define CPU_ENABLE_SSE
 #endif
 
 static void cpu_startup(void *);
 static void fpstate_drop(struct thread *td);
 static void get_fpcontext(struct thread *td, mcontext_t *mcp);
 static int  set_fpcontext(struct thread *td, const mcontext_t *mcp);
 #ifdef CPU_ENABLE_SSE
 static void set_fpregs_xmm(struct save87 *, struct savexmm *);
 static void fill_fpregs_xmm(struct savexmm *, struct save87 *);
 #endif /* CPU_ENABLE_SSE */
 SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL);
 
 #ifdef DDB
 extern vm_offset_t ksym_start, ksym_end;
 #endif
 
 /* Intel ICH registers */
 #define ICH_PMBASE	0x400
 #define ICH_SMI_EN	ICH_PMBASE + 0x30
 
 int	_udatasel, _ucodesel;
 u_int	basemem;
 
 int cold = 1;
 
 #ifdef COMPAT_43
 static void osendsig(sig_t catcher, ksiginfo_t *, sigset_t *mask);
 #endif
 #ifdef COMPAT_FREEBSD4
 static void freebsd4_sendsig(sig_t catcher, ksiginfo_t *, sigset_t *mask);
 #endif
 
 long Maxmem = 0;
 long realmem = 0;
 
 #ifdef PAE
 FEATURE(pae, "Physical Address Extensions");
 #endif
 
 /*
  * The number of PHYSMAP entries must be one less than the number of
  * PHYSSEG entries because the PHYSMAP entry that spans the largest
  * physical address that is accessible by ISA DMA is split into two
  * PHYSSEG entries.
  */
 #define	PHYSMAP_SIZE	(2 * (VM_PHYSSEG_MAX - 1))
 
 vm_paddr_t phys_avail[PHYSMAP_SIZE + 2];
 vm_paddr_t dump_avail[PHYSMAP_SIZE + 2];
 
 /* must be 2 less so 0 0 can signal end of chunks */
 #define PHYS_AVAIL_ARRAY_END ((sizeof(phys_avail) / sizeof(phys_avail[0])) - 2)
 #define DUMP_AVAIL_ARRAY_END ((sizeof(dump_avail) / sizeof(dump_avail[0])) - 2)
 
 struct kva_md_info kmi;
 
 static struct trapframe proc0_tf;
 struct pcpu __pcpu[MAXCPU];
 
 struct mtx icu_lock;
 
 struct mem_range_softc mem_range_softc;
 
 static void
 cpu_startup(dummy)
 	void *dummy;
 {
 	uintmax_t memsize;
 	char *sysenv;
 	
 	/*
 	 * On MacBooks, we need to disallow the legacy USB circuit to
 	 * generate an SMI# because this can cause several problems,
 	 * namely: incorrect CPU frequency detection and failure to
 	 * start the APs.
 	 * We do this by disabling a bit in the SMI_EN (SMI Control and
 	 * Enable register) of the Intel ICH LPC Interface Bridge.
 	 */
 	sysenv = getenv("smbios.system.product");
 	if (sysenv != NULL) {
 		if (strncmp(sysenv, "MacBook1,1", 10) == 0 ||
 		    strncmp(sysenv, "MacBook3,1", 10) == 0 ||
 		    strncmp(sysenv, "MacBookPro1,1", 13) == 0 ||
 		    strncmp(sysenv, "MacBookPro1,2", 13) == 0 ||
 		    strncmp(sysenv, "MacBookPro3,1", 13) == 0 ||
 		    strncmp(sysenv, "Macmini1,1", 10) == 0) {
 			if (bootverbose)
 				printf("Disabling LEGACY_USB_EN bit on "
 				    "Intel ICH.\n");
 			outl(ICH_SMI_EN, inl(ICH_SMI_EN) & ~0x8);
 		}
 		freeenv(sysenv);
 	}
 
 	/*
 	 * Good {morning,afternoon,evening,night}.
 	 */
 	startrtclock();
 	printcpuinfo();
 	panicifcpuunsupported();
 #ifdef PERFMON
 	perfmon_init();
 #endif
 	realmem = Maxmem;
 
 	/*
 	 * Display physical memory if SMBIOS reports reasonable amount.
 	 */
 	memsize = 0;
 	sysenv = getenv("smbios.memory.enabled");
 	if (sysenv != NULL) {
 		memsize = (uintmax_t)strtoul(sysenv, (char **)NULL, 10) << 10;
 		freeenv(sysenv);
 	}
 	if (memsize < ptoa((uintmax_t)cnt.v_free_count))
 		memsize = ptoa((uintmax_t)Maxmem);
 	printf("real memory  = %ju (%ju MB)\n", memsize, memsize >> 20);
 
 	/*
 	 * Display any holes after the first chunk of extended memory.
 	 */
 	if (bootverbose) {
 		int indx;
 
 		printf("Physical memory chunk(s):\n");
 		for (indx = 0; phys_avail[indx + 1] != 0; indx += 2) {
 			vm_paddr_t size;
 
 			size = phys_avail[indx + 1] - phys_avail[indx];
 			printf(
 			    "0x%016jx - 0x%016jx, %ju bytes (%ju pages)\n",
 			    (uintmax_t)phys_avail[indx],
 			    (uintmax_t)phys_avail[indx + 1] - 1,
 			    (uintmax_t)size, (uintmax_t)size / PAGE_SIZE);
 		}
 	}
 
 	vm_ksubmap_init(&kmi);
 
 	printf("avail memory = %ju (%ju MB)\n",
 	    ptoa((uintmax_t)cnt.v_free_count),
 	    ptoa((uintmax_t)cnt.v_free_count) / 1048576);
 
 	/*
 	 * Set up buffers, so they can be used to read disk labels.
 	 */
 	bufinit();
 	vm_pager_bufferinit();
 #ifndef XEN
 	cpu_setregs();
 #endif
 }
 
 /*
  * Send an interrupt to process.
  *
  * Stack is set up to allow sigcode stored
  * at top to call routine, followed by kcall
  * to sigreturn routine below.  After sigreturn
  * resets the signal mask, the stack, and the
  * frame pointer, it returns to the user
  * specified pc, psl.
  */
 #ifdef COMPAT_43
 static void
 osendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
 {
 	struct osigframe sf, *fp;
 	struct proc *p;
 	struct thread *td;
 	struct sigacts *psp;
 	struct trapframe *regs;
 	int sig;
 	int oonstack;
 
 	td = curthread;
 	p = td->td_proc;
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 	sig = ksi->ksi_signo;
 	psp = p->p_sigacts;
 	mtx_assert(&psp->ps_mtx, MA_OWNED);
 	regs = td->td_frame;
 	oonstack = sigonstack(regs->tf_esp);
 
 	/* Allocate space for the signal handler context. */
 	if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
 	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
 		fp = (struct osigframe *)(td->td_sigstk.ss_sp +
 		    td->td_sigstk.ss_size - sizeof(struct osigframe));
 #if defined(COMPAT_43)
 		td->td_sigstk.ss_flags |= SS_ONSTACK;
 #endif
 	} else
 		fp = (struct osigframe *)regs->tf_esp - 1;
 
 	/* Translate the signal if appropriate. */
 	if (p->p_sysent->sv_sigtbl && sig <= p->p_sysent->sv_sigsize)
 		sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
 
 	/* Build the argument list for the signal handler. */
 	sf.sf_signum = sig;
 	sf.sf_scp = (register_t)&fp->sf_siginfo.si_sc;
 	bzero(&sf.sf_siginfo, sizeof(sf.sf_siginfo));
 	if (SIGISMEMBER(psp->ps_siginfo, sig)) {
 		/* Signal handler installed with SA_SIGINFO. */
 		sf.sf_arg2 = (register_t)&fp->sf_siginfo;
 		sf.sf_siginfo.si_signo = sig;
 		sf.sf_siginfo.si_code = ksi->ksi_code;
 		sf.sf_ahu.sf_action = (__osiginfohandler_t *)catcher;
 		sf.sf_addr = 0;
 	} else {
 		/* Old FreeBSD-style arguments. */
 		sf.sf_arg2 = ksi->ksi_code;
 		sf.sf_addr = (register_t)ksi->ksi_addr;
 		sf.sf_ahu.sf_handler = catcher;
 	}
 	mtx_unlock(&psp->ps_mtx);
 	PROC_UNLOCK(p);
 
 	/* Save most if not all of trap frame. */
 	sf.sf_siginfo.si_sc.sc_eax = regs->tf_eax;
 	sf.sf_siginfo.si_sc.sc_ebx = regs->tf_ebx;
 	sf.sf_siginfo.si_sc.sc_ecx = regs->tf_ecx;
 	sf.sf_siginfo.si_sc.sc_edx = regs->tf_edx;
 	sf.sf_siginfo.si_sc.sc_esi = regs->tf_esi;
 	sf.sf_siginfo.si_sc.sc_edi = regs->tf_edi;
 	sf.sf_siginfo.si_sc.sc_cs = regs->tf_cs;
 	sf.sf_siginfo.si_sc.sc_ds = regs->tf_ds;
 	sf.sf_siginfo.si_sc.sc_ss = regs->tf_ss;
 	sf.sf_siginfo.si_sc.sc_es = regs->tf_es;
 	sf.sf_siginfo.si_sc.sc_fs = regs->tf_fs;
 	sf.sf_siginfo.si_sc.sc_gs = rgs();
 	sf.sf_siginfo.si_sc.sc_isp = regs->tf_isp;
 
 	/* Build the signal context to be used by osigreturn(). */
 	sf.sf_siginfo.si_sc.sc_onstack = (oonstack) ? 1 : 0;
 	SIG2OSIG(*mask, sf.sf_siginfo.si_sc.sc_mask);
 	sf.sf_siginfo.si_sc.sc_sp = regs->tf_esp;
 	sf.sf_siginfo.si_sc.sc_fp = regs->tf_ebp;
 	sf.sf_siginfo.si_sc.sc_pc = regs->tf_eip;
 	sf.sf_siginfo.si_sc.sc_ps = regs->tf_eflags;
 	sf.sf_siginfo.si_sc.sc_trapno = regs->tf_trapno;
 	sf.sf_siginfo.si_sc.sc_err = regs->tf_err;
 
 	/*
 	 * If we're a vm86 process, we want to save the segment registers.
 	 * We also change eflags to be our emulated eflags, not the actual
 	 * eflags.
 	 */
 	if (regs->tf_eflags & PSL_VM) {
 		/* XXX confusing names: `tf' isn't a trapframe; `regs' is. */
 		struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs;
 		struct vm86_kernel *vm86 = &td->td_pcb->pcb_ext->ext_vm86;
 
 		sf.sf_siginfo.si_sc.sc_gs = tf->tf_vm86_gs;
 		sf.sf_siginfo.si_sc.sc_fs = tf->tf_vm86_fs;
 		sf.sf_siginfo.si_sc.sc_es = tf->tf_vm86_es;
 		sf.sf_siginfo.si_sc.sc_ds = tf->tf_vm86_ds;
 
 		if (vm86->vm86_has_vme == 0)
 			sf.sf_siginfo.si_sc.sc_ps =
 			    (tf->tf_eflags & ~(PSL_VIF | PSL_VIP)) |
 			    (vm86->vm86_eflags & (PSL_VIF | PSL_VIP));
 
 		/* See sendsig() for comments. */
 		tf->tf_eflags &= ~(PSL_VM | PSL_NT | PSL_VIF | PSL_VIP);
 	}
 
 	/*
 	 * Copy the sigframe out to the user's stack.
 	 */
 	if (copyout(&sf, fp, sizeof(*fp)) != 0) {
 #ifdef DEBUG
 		printf("process %ld has trashed its stack\n", (long)p->p_pid);
 #endif
 		PROC_LOCK(p);
 		sigexit(td, SIGILL);
 	}
 
 	regs->tf_esp = (int)fp;
 	if (p->p_sysent->sv_sigcode_base != 0) {
 		regs->tf_eip = p->p_sysent->sv_sigcode_base + szsigcode -
 		    szosigcode;
 	} else {
 		/* a.out sysentvec does not use shared page */
 		regs->tf_eip = p->p_sysent->sv_psstrings - szosigcode;
 	}
 	regs->tf_eflags &= ~(PSL_T | PSL_D);
 	regs->tf_cs = _ucodesel;
 	regs->tf_ds = _udatasel;
 	regs->tf_es = _udatasel;
 	regs->tf_fs = _udatasel;
 	load_gs(_udatasel);
 	regs->tf_ss = _udatasel;
 	PROC_LOCK(p);
 	mtx_lock(&psp->ps_mtx);
 }
 #endif /* COMPAT_43 */
 
 #ifdef COMPAT_FREEBSD4
 static void
 freebsd4_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
 {
 	struct sigframe4 sf, *sfp;
 	struct proc *p;
 	struct thread *td;
 	struct sigacts *psp;
 	struct trapframe *regs;
 	int sig;
 	int oonstack;
 
 	td = curthread;
 	p = td->td_proc;
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 	sig = ksi->ksi_signo;
 	psp = p->p_sigacts;
 	mtx_assert(&psp->ps_mtx, MA_OWNED);
 	regs = td->td_frame;
 	oonstack = sigonstack(regs->tf_esp);
 
 	/* Save user context. */
 	bzero(&sf, sizeof(sf));
 	sf.sf_uc.uc_sigmask = *mask;
 	sf.sf_uc.uc_stack = td->td_sigstk;
 	sf.sf_uc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK)
 	    ? ((oonstack) ? SS_ONSTACK : 0) : SS_DISABLE;
 	sf.sf_uc.uc_mcontext.mc_onstack = (oonstack) ? 1 : 0;
 	sf.sf_uc.uc_mcontext.mc_gs = rgs();
 	bcopy(regs, &sf.sf_uc.uc_mcontext.mc_fs, sizeof(*regs));
 	bzero(sf.sf_uc.uc_mcontext.mc_fpregs,
 	    sizeof(sf.sf_uc.uc_mcontext.mc_fpregs));
 	bzero(sf.sf_uc.uc_mcontext.__spare__,
 	    sizeof(sf.sf_uc.uc_mcontext.__spare__));
 	bzero(sf.sf_uc.__spare__, sizeof(sf.sf_uc.__spare__));
 
 	/* Allocate space for the signal handler context. */
 	if ((td->td_pflags & TDP_ALTSTACK) != 0 && !oonstack &&
 	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
 		sfp = (struct sigframe4 *)(td->td_sigstk.ss_sp +
 		    td->td_sigstk.ss_size - sizeof(struct sigframe4));
 #if defined(COMPAT_43)
 		td->td_sigstk.ss_flags |= SS_ONSTACK;
 #endif
 	} else
 		sfp = (struct sigframe4 *)regs->tf_esp - 1;
 
 	/* Translate the signal if appropriate. */
 	if (p->p_sysent->sv_sigtbl && sig <= p->p_sysent->sv_sigsize)
 		sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
 
 	/* Build the argument list for the signal handler. */
 	sf.sf_signum = sig;
 	sf.sf_ucontext = (register_t)&sfp->sf_uc;
 	bzero(&sf.sf_si, sizeof(sf.sf_si));
 	if (SIGISMEMBER(psp->ps_siginfo, sig)) {
 		/* Signal handler installed with SA_SIGINFO. */
 		sf.sf_siginfo = (register_t)&sfp->sf_si;
 		sf.sf_ahu.sf_action = (__siginfohandler_t *)catcher;
 
 		/* Fill in POSIX parts */
 		sf.sf_si.si_signo = sig;
 		sf.sf_si.si_code = ksi->ksi_code;
 		sf.sf_si.si_addr = ksi->ksi_addr;
 	} else {
 		/* Old FreeBSD-style arguments. */
 		sf.sf_siginfo = ksi->ksi_code;
 		sf.sf_addr = (register_t)ksi->ksi_addr;
 		sf.sf_ahu.sf_handler = catcher;
 	}
 	mtx_unlock(&psp->ps_mtx);
 	PROC_UNLOCK(p);
 
 	/*
 	 * If we're a vm86 process, we want to save the segment registers.
 	 * We also change eflags to be our emulated eflags, not the actual
 	 * eflags.
 	 */
 	if (regs->tf_eflags & PSL_VM) {
 		struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs;
 		struct vm86_kernel *vm86 = &td->td_pcb->pcb_ext->ext_vm86;
 
 		sf.sf_uc.uc_mcontext.mc_gs = tf->tf_vm86_gs;
 		sf.sf_uc.uc_mcontext.mc_fs = tf->tf_vm86_fs;
 		sf.sf_uc.uc_mcontext.mc_es = tf->tf_vm86_es;
 		sf.sf_uc.uc_mcontext.mc_ds = tf->tf_vm86_ds;
 
 		if (vm86->vm86_has_vme == 0)
 			sf.sf_uc.uc_mcontext.mc_eflags =
 			    (tf->tf_eflags & ~(PSL_VIF | PSL_VIP)) |
 			    (vm86->vm86_eflags & (PSL_VIF | PSL_VIP));
 
 		/*
 		 * Clear PSL_NT to inhibit T_TSSFLT faults on return from
 		 * syscalls made by the signal handler.  This just avoids
 		 * wasting time for our lazy fixup of such faults.  PSL_NT
 		 * does nothing in vm86 mode, but vm86 programs can set it
 		 * almost legitimately in probes for old cpu types.
 		 */
 		tf->tf_eflags &= ~(PSL_VM | PSL_NT | PSL_VIF | PSL_VIP);
 	}
 
 	/*
 	 * Copy the sigframe out to the user's stack.
 	 */
 	if (copyout(&sf, sfp, sizeof(*sfp)) != 0) {
 #ifdef DEBUG
 		printf("process %ld has trashed its stack\n", (long)p->p_pid);
 #endif
 		PROC_LOCK(p);
 		sigexit(td, SIGILL);
 	}
 
 	regs->tf_esp = (int)sfp;
 	regs->tf_eip = p->p_sysent->sv_sigcode_base + szsigcode -
 	    szfreebsd4_sigcode;
 	regs->tf_eflags &= ~(PSL_T | PSL_D);
 	regs->tf_cs = _ucodesel;
 	regs->tf_ds = _udatasel;
 	regs->tf_es = _udatasel;
 	regs->tf_fs = _udatasel;
 	regs->tf_ss = _udatasel;
 	PROC_LOCK(p);
 	mtx_lock(&psp->ps_mtx);
 }
 #endif	/* COMPAT_FREEBSD4 */
 
 void
 sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
 {
 	struct sigframe sf, *sfp;
 	struct proc *p;
 	struct thread *td;
 	struct sigacts *psp;
 	char *sp;
 	struct trapframe *regs;
 	struct segment_descriptor *sdp;
 	int sig;
 	int oonstack;
 
 	td = curthread;
 	p = td->td_proc;
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 	sig = ksi->ksi_signo;
 	psp = p->p_sigacts;
 	mtx_assert(&psp->ps_mtx, MA_OWNED);
 #ifdef COMPAT_FREEBSD4
 	if (SIGISMEMBER(psp->ps_freebsd4, sig)) {
 		freebsd4_sendsig(catcher, ksi, mask);
 		return;
 	}
 #endif
 #ifdef COMPAT_43
 	if (SIGISMEMBER(psp->ps_osigset, sig)) {
 		osendsig(catcher, ksi, mask);
 		return;
 	}
 #endif
 	regs = td->td_frame;
 	oonstack = sigonstack(regs->tf_esp);
 
 	/* Save user context. */
 	bzero(&sf, sizeof(sf));
 	sf.sf_uc.uc_sigmask = *mask;
 	sf.sf_uc.uc_stack = td->td_sigstk;
 	sf.sf_uc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK)
 	    ? ((oonstack) ? SS_ONSTACK : 0) : SS_DISABLE;
 	sf.sf_uc.uc_mcontext.mc_onstack = (oonstack) ? 1 : 0;
 	sf.sf_uc.uc_mcontext.mc_gs = rgs();
 	bcopy(regs, &sf.sf_uc.uc_mcontext.mc_fs, sizeof(*regs));
 	sf.sf_uc.uc_mcontext.mc_len = sizeof(sf.sf_uc.uc_mcontext); /* magic */
 	get_fpcontext(td, &sf.sf_uc.uc_mcontext);
 	fpstate_drop(td);
 	/*
 	 * Unconditionally fill the fsbase and gsbase into the mcontext.
 	 */
 	sdp = &td->td_pcb->pcb_fsd;
 	sf.sf_uc.uc_mcontext.mc_fsbase = sdp->sd_hibase << 24 |
 	    sdp->sd_lobase;
 	sdp = &td->td_pcb->pcb_gsd;
 	sf.sf_uc.uc_mcontext.mc_gsbase = sdp->sd_hibase << 24 |
 	    sdp->sd_lobase;
 	sf.sf_uc.uc_mcontext.mc_flags = 0;
 	bzero(sf.sf_uc.uc_mcontext.mc_spare2,
 	    sizeof(sf.sf_uc.uc_mcontext.mc_spare2));
 	bzero(sf.sf_uc.__spare__, sizeof(sf.sf_uc.__spare__));
 
 	/* Allocate space for the signal handler context. */
 	if ((td->td_pflags & TDP_ALTSTACK) != 0 && !oonstack &&
 	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
 		sp = td->td_sigstk.ss_sp +
 		    td->td_sigstk.ss_size - sizeof(struct sigframe);
 #if defined(COMPAT_43)
 		td->td_sigstk.ss_flags |= SS_ONSTACK;
 #endif
 	} else
 		sp = (char *)regs->tf_esp - sizeof(struct sigframe);
 	/* Align to 16 bytes. */
 	sfp = (struct sigframe *)((unsigned int)sp & ~0xF);
 
 	/* Translate the signal if appropriate. */
 	if (p->p_sysent->sv_sigtbl && sig <= p->p_sysent->sv_sigsize)
 		sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
 
 	/* Build the argument list for the signal handler. */
 	sf.sf_signum = sig;
 	sf.sf_ucontext = (register_t)&sfp->sf_uc;
 	bzero(&sf.sf_si, sizeof(sf.sf_si));
 	if (SIGISMEMBER(psp->ps_siginfo, sig)) {
 		/* Signal handler installed with SA_SIGINFO. */
 		sf.sf_siginfo = (register_t)&sfp->sf_si;
 		sf.sf_ahu.sf_action = (__siginfohandler_t *)catcher;
 
 		/* Fill in POSIX parts */
 		sf.sf_si = ksi->ksi_info;
 		sf.sf_si.si_signo = sig; /* maybe a translated signal */
 	} else {
 		/* Old FreeBSD-style arguments. */
 		sf.sf_siginfo = ksi->ksi_code;
 		sf.sf_addr = (register_t)ksi->ksi_addr;
 		sf.sf_ahu.sf_handler = catcher;
 	}
 	mtx_unlock(&psp->ps_mtx);
 	PROC_UNLOCK(p);
 
 	/*
 	 * If we're a vm86 process, we want to save the segment registers.
 	 * We also change eflags to be our emulated eflags, not the actual
 	 * eflags.
 	 */
 	if (regs->tf_eflags & PSL_VM) {
 		struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs;
 		struct vm86_kernel *vm86 = &td->td_pcb->pcb_ext->ext_vm86;
 
 		sf.sf_uc.uc_mcontext.mc_gs = tf->tf_vm86_gs;
 		sf.sf_uc.uc_mcontext.mc_fs = tf->tf_vm86_fs;
 		sf.sf_uc.uc_mcontext.mc_es = tf->tf_vm86_es;
 		sf.sf_uc.uc_mcontext.mc_ds = tf->tf_vm86_ds;
 
 		if (vm86->vm86_has_vme == 0)
 			sf.sf_uc.uc_mcontext.mc_eflags =
 			    (tf->tf_eflags & ~(PSL_VIF | PSL_VIP)) |
 			    (vm86->vm86_eflags & (PSL_VIF | PSL_VIP));
 
 		/*
 		 * Clear PSL_NT to inhibit T_TSSFLT faults on return from
 		 * syscalls made by the signal handler.  This just avoids
 		 * wasting time for our lazy fixup of such faults.  PSL_NT
 		 * does nothing in vm86 mode, but vm86 programs can set it
 		 * almost legitimately in probes for old cpu types.
 		 */
 		tf->tf_eflags &= ~(PSL_VM | PSL_NT | PSL_VIF | PSL_VIP);
 	}
 
 	/*
 	 * Copy the sigframe out to the user's stack.
 	 */
 	if (copyout(&sf, sfp, sizeof(*sfp)) != 0) {
 #ifdef DEBUG
 		printf("process %ld has trashed its stack\n", (long)p->p_pid);
 #endif
 		PROC_LOCK(p);
 		sigexit(td, SIGILL);
 	}
 
 	regs->tf_esp = (int)sfp;
 	regs->tf_eip = p->p_sysent->sv_sigcode_base;
 	regs->tf_eflags &= ~(PSL_T | PSL_D);
 	regs->tf_cs = _ucodesel;
 	regs->tf_ds = _udatasel;
 	regs->tf_es = _udatasel;
 	regs->tf_fs = _udatasel;
 	regs->tf_ss = _udatasel;
 	PROC_LOCK(p);
 	mtx_lock(&psp->ps_mtx);
 }
 
 /*
  * System call to cleanup state after a signal
  * has been taken.  Reset signal mask and
  * stack state from context left by sendsig (above).
  * Return to previous pc and psl as specified by
  * context left by sendsig. Check carefully to
  * make sure that the user has not modified the
  * state to gain improper privileges.
  *
  * MPSAFE
  */
 #ifdef COMPAT_43
 int
 osigreturn(td, uap)
 	struct thread *td;
 	struct osigreturn_args /* {
 		struct osigcontext *sigcntxp;
 	} */ *uap;
 {
 	struct osigcontext sc;
 	struct trapframe *regs;
 	struct osigcontext *scp;
 	int eflags, error;
 	ksiginfo_t ksi;
 
 	regs = td->td_frame;
 	error = copyin(uap->sigcntxp, &sc, sizeof(sc));
 	if (error != 0)
 		return (error);
 	scp = &sc;
 	eflags = scp->sc_ps;
 	if (eflags & PSL_VM) {
 		struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs;
 		struct vm86_kernel *vm86;
 
 		/*
 		 * if pcb_ext == 0 or vm86_inited == 0, the user hasn't
 		 * set up the vm86 area, and we can't enter vm86 mode.
 		 */
 		if (td->td_pcb->pcb_ext == 0)
 			return (EINVAL);
 		vm86 = &td->td_pcb->pcb_ext->ext_vm86;
 		if (vm86->vm86_inited == 0)
 			return (EINVAL);
 
 		/* Go back to user mode if both flags are set. */
 		if ((eflags & PSL_VIP) && (eflags & PSL_VIF)) {
 			ksiginfo_init_trap(&ksi);
 			ksi.ksi_signo = SIGBUS;
 			ksi.ksi_code = BUS_OBJERR;
 			ksi.ksi_addr = (void *)regs->tf_eip;
 			trapsignal(td, &ksi);
 		}
 
 		if (vm86->vm86_has_vme) {
 			eflags = (tf->tf_eflags & ~VME_USERCHANGE) |
 			    (eflags & VME_USERCHANGE) | PSL_VM;
 		} else {
 			vm86->vm86_eflags = eflags;	/* save VIF, VIP */
 			eflags = (tf->tf_eflags & ~VM_USERCHANGE) |
 			    (eflags & VM_USERCHANGE) | PSL_VM;
 		}
 		tf->tf_vm86_ds = scp->sc_ds;
 		tf->tf_vm86_es = scp->sc_es;
 		tf->tf_vm86_fs = scp->sc_fs;
 		tf->tf_vm86_gs = scp->sc_gs;
 		tf->tf_ds = _udatasel;
 		tf->tf_es = _udatasel;
 		tf->tf_fs = _udatasel;
 	} else {
 		/*
 		 * Don't allow users to change privileged or reserved flags.
 		 */
 		/*
 		 * XXX do allow users to change the privileged flag PSL_RF.
 		 * The cpu sets PSL_RF in tf_eflags for faults.  Debuggers
 		 * should sometimes set it there too.  tf_eflags is kept in
 		 * the signal context during signal handling and there is no
 		 * other place to remember it, so the PSL_RF bit may be
 		 * corrupted by the signal handler without us knowing.
 		 * Corruption of the PSL_RF bit at worst causes one more or
 		 * one less debugger trap, so allowing it is fairly harmless.
 		 */
 		if (!EFL_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) {
 	    		return (EINVAL);
 		}
 
 		/*
 		 * Don't allow users to load a valid privileged %cs.  Let the
 		 * hardware check for invalid selectors, excess privilege in
 		 * other selectors, invalid %eip's and invalid %esp's.
 		 */
 		if (!CS_SECURE(scp->sc_cs)) {
 			ksiginfo_init_trap(&ksi);
 			ksi.ksi_signo = SIGBUS;
 			ksi.ksi_code = BUS_OBJERR;
 			ksi.ksi_trapno = T_PROTFLT;
 			ksi.ksi_addr = (void *)regs->tf_eip;
 			trapsignal(td, &ksi);
 			return (EINVAL);
 		}
 		regs->tf_ds = scp->sc_ds;
 		regs->tf_es = scp->sc_es;
 		regs->tf_fs = scp->sc_fs;
 	}
 
 	/* Restore remaining registers. */
 	regs->tf_eax = scp->sc_eax;
 	regs->tf_ebx = scp->sc_ebx;
 	regs->tf_ecx = scp->sc_ecx;
 	regs->tf_edx = scp->sc_edx;
 	regs->tf_esi = scp->sc_esi;
 	regs->tf_edi = scp->sc_edi;
 	regs->tf_cs = scp->sc_cs;
 	regs->tf_ss = scp->sc_ss;
 	regs->tf_isp = scp->sc_isp;
 	regs->tf_ebp = scp->sc_fp;
 	regs->tf_esp = scp->sc_sp;
 	regs->tf_eip = scp->sc_pc;
 	regs->tf_eflags = eflags;
 
 #if defined(COMPAT_43)
 	if (scp->sc_onstack & 1)
 		td->td_sigstk.ss_flags |= SS_ONSTACK;
 	else
 		td->td_sigstk.ss_flags &= ~SS_ONSTACK;
 #endif
 	kern_sigprocmask(td, SIG_SETMASK, (sigset_t *)&scp->sc_mask, NULL,
 	    SIGPROCMASK_OLD);
 	return (EJUSTRETURN);
 }
 #endif /* COMPAT_43 */
 
 #ifdef COMPAT_FREEBSD4
 /*
  * MPSAFE
  */
 int
 freebsd4_sigreturn(td, uap)
 	struct thread *td;
 	struct freebsd4_sigreturn_args /* {
 		const ucontext4 *sigcntxp;
 	} */ *uap;
 {
 	struct ucontext4 uc;
 	struct trapframe *regs;
 	struct ucontext4 *ucp;
 	int cs, eflags, error;
 	ksiginfo_t ksi;
 
 	error = copyin(uap->sigcntxp, &uc, sizeof(uc));
 	if (error != 0)
 		return (error);
 	ucp = &uc;
 	regs = td->td_frame;
 	eflags = ucp->uc_mcontext.mc_eflags;
 	if (eflags & PSL_VM) {
 		struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs;
 		struct vm86_kernel *vm86;
 
 		/*
 		 * if pcb_ext == 0 or vm86_inited == 0, the user hasn't
 		 * set up the vm86 area, and we can't enter vm86 mode.
 		 */
 		if (td->td_pcb->pcb_ext == 0)
 			return (EINVAL);
 		vm86 = &td->td_pcb->pcb_ext->ext_vm86;
 		if (vm86->vm86_inited == 0)
 			return (EINVAL);
 
 		/* Go back to user mode if both flags are set. */
 		if ((eflags & PSL_VIP) && (eflags & PSL_VIF)) {
 			ksiginfo_init_trap(&ksi);
 			ksi.ksi_signo = SIGBUS;
 			ksi.ksi_code = BUS_OBJERR;
 			ksi.ksi_addr = (void *)regs->tf_eip;
 			trapsignal(td, &ksi);
 		}
 		if (vm86->vm86_has_vme) {
 			eflags = (tf->tf_eflags & ~VME_USERCHANGE) |
 			    (eflags & VME_USERCHANGE) | PSL_VM;
 		} else {
 			vm86->vm86_eflags = eflags;	/* save VIF, VIP */
 			eflags = (tf->tf_eflags & ~VM_USERCHANGE) |
 			    (eflags & VM_USERCHANGE) | PSL_VM;
 		}
 		bcopy(&ucp->uc_mcontext.mc_fs, tf, sizeof(struct trapframe));
 		tf->tf_eflags = eflags;
 		tf->tf_vm86_ds = tf->tf_ds;
 		tf->tf_vm86_es = tf->tf_es;
 		tf->tf_vm86_fs = tf->tf_fs;
 		tf->tf_vm86_gs = ucp->uc_mcontext.mc_gs;
 		tf->tf_ds = _udatasel;
 		tf->tf_es = _udatasel;
 		tf->tf_fs = _udatasel;
 	} else {
 		/*
 		 * Don't allow users to change privileged or reserved flags.
 		 */
 		/*
 		 * XXX do allow users to change the privileged flag PSL_RF.
 		 * The cpu sets PSL_RF in tf_eflags for faults.  Debuggers
 		 * should sometimes set it there too.  tf_eflags is kept in
 		 * the signal context during signal handling and there is no
 		 * other place to remember it, so the PSL_RF bit may be
 		 * corrupted by the signal handler without us knowing.
 		 * Corruption of the PSL_RF bit at worst causes one more or
 		 * one less debugger trap, so allowing it is fairly harmless.
 		 */
 		if (!EFL_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) {
 			uprintf("pid %d (%s): freebsd4_sigreturn eflags = 0x%x\n",
 			    td->td_proc->p_pid, td->td_name, eflags);
 	    		return (EINVAL);
 		}
 
 		/*
 		 * Don't allow users to load a valid privileged %cs.  Let the
 		 * hardware check for invalid selectors, excess privilege in
 		 * other selectors, invalid %eip's and invalid %esp's.
 		 */
 		cs = ucp->uc_mcontext.mc_cs;
 		if (!CS_SECURE(cs)) {
 			uprintf("pid %d (%s): freebsd4_sigreturn cs = 0x%x\n",
 			    td->td_proc->p_pid, td->td_name, cs);
 			ksiginfo_init_trap(&ksi);
 			ksi.ksi_signo = SIGBUS;
 			ksi.ksi_code = BUS_OBJERR;
 			ksi.ksi_trapno = T_PROTFLT;
 			ksi.ksi_addr = (void *)regs->tf_eip;
 			trapsignal(td, &ksi);
 			return (EINVAL);
 		}
 
 		bcopy(&ucp->uc_mcontext.mc_fs, regs, sizeof(*regs));
 	}
 
 #if defined(COMPAT_43)
 	if (ucp->uc_mcontext.mc_onstack & 1)
 		td->td_sigstk.ss_flags |= SS_ONSTACK;
 	else
 		td->td_sigstk.ss_flags &= ~SS_ONSTACK;
 #endif
 	kern_sigprocmask(td, SIG_SETMASK, &ucp->uc_sigmask, NULL, 0);
 	return (EJUSTRETURN);
 }
 #endif	/* COMPAT_FREEBSD4 */
 
 /*
  * MPSAFE
  */
 int
 sys_sigreturn(td, uap)
 	struct thread *td;
 	struct sigreturn_args /* {
 		const struct __ucontext *sigcntxp;
 	} */ *uap;
 {
 	ucontext_t uc;
 	struct trapframe *regs;
 	ucontext_t *ucp;
 	int cs, eflags, error, ret;
 	ksiginfo_t ksi;
 
 	error = copyin(uap->sigcntxp, &uc, sizeof(uc));
 	if (error != 0)
 		return (error);
 	ucp = &uc;
 	regs = td->td_frame;
 	eflags = ucp->uc_mcontext.mc_eflags;
 	if (eflags & PSL_VM) {
 		struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs;
 		struct vm86_kernel *vm86;
 
 		/*
 		 * if pcb_ext == 0 or vm86_inited == 0, the user hasn't
 		 * set up the vm86 area, and we can't enter vm86 mode.
 		 */
 		if (td->td_pcb->pcb_ext == 0)
 			return (EINVAL);
 		vm86 = &td->td_pcb->pcb_ext->ext_vm86;
 		if (vm86->vm86_inited == 0)
 			return (EINVAL);
 
 		/* Go back to user mode if both flags are set. */
 		if ((eflags & PSL_VIP) && (eflags & PSL_VIF)) {
 			ksiginfo_init_trap(&ksi);
 			ksi.ksi_signo = SIGBUS;
 			ksi.ksi_code = BUS_OBJERR;
 			ksi.ksi_addr = (void *)regs->tf_eip;
 			trapsignal(td, &ksi);
 		}
 
 		if (vm86->vm86_has_vme) {
 			eflags = (tf->tf_eflags & ~VME_USERCHANGE) |
 			    (eflags & VME_USERCHANGE) | PSL_VM;
 		} else {
 			vm86->vm86_eflags = eflags;	/* save VIF, VIP */
 			eflags = (tf->tf_eflags & ~VM_USERCHANGE) |
 			    (eflags & VM_USERCHANGE) | PSL_VM;
 		}
 		bcopy(&ucp->uc_mcontext.mc_fs, tf, sizeof(struct trapframe));
 		tf->tf_eflags = eflags;
 		tf->tf_vm86_ds = tf->tf_ds;
 		tf->tf_vm86_es = tf->tf_es;
 		tf->tf_vm86_fs = tf->tf_fs;
 		tf->tf_vm86_gs = ucp->uc_mcontext.mc_gs;
 		tf->tf_ds = _udatasel;
 		tf->tf_es = _udatasel;
 		tf->tf_fs = _udatasel;
 	} else {
 		/*
 		 * Don't allow users to change privileged or reserved flags.
 		 */
 		/*
 		 * XXX do allow users to change the privileged flag PSL_RF.
 		 * The cpu sets PSL_RF in tf_eflags for faults.  Debuggers
 		 * should sometimes set it there too.  tf_eflags is kept in
 		 * the signal context during signal handling and there is no
 		 * other place to remember it, so the PSL_RF bit may be
 		 * corrupted by the signal handler without us knowing.
 		 * Corruption of the PSL_RF bit at worst causes one more or
 		 * one less debugger trap, so allowing it is fairly harmless.
 		 */
 		if (!EFL_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) {
 			uprintf("pid %d (%s): sigreturn eflags = 0x%x\n",
 			    td->td_proc->p_pid, td->td_name, eflags);
 	    		return (EINVAL);
 		}
 
 		/*
 		 * Don't allow users to load a valid privileged %cs.  Let the
 		 * hardware check for invalid selectors, excess privilege in
 		 * other selectors, invalid %eip's and invalid %esp's.
 		 */
 		cs = ucp->uc_mcontext.mc_cs;
 		if (!CS_SECURE(cs)) {
 			uprintf("pid %d (%s): sigreturn cs = 0x%x\n",
 			    td->td_proc->p_pid, td->td_name, cs);
 			ksiginfo_init_trap(&ksi);
 			ksi.ksi_signo = SIGBUS;
 			ksi.ksi_code = BUS_OBJERR;
 			ksi.ksi_trapno = T_PROTFLT;
 			ksi.ksi_addr = (void *)regs->tf_eip;
 			trapsignal(td, &ksi);
 			return (EINVAL);
 		}
 
 		ret = set_fpcontext(td, &ucp->uc_mcontext);
 		if (ret != 0)
 			return (ret);
 		bcopy(&ucp->uc_mcontext.mc_fs, regs, sizeof(*regs));
 	}
 
 #if defined(COMPAT_43)
 	if (ucp->uc_mcontext.mc_onstack & 1)
 		td->td_sigstk.ss_flags |= SS_ONSTACK;
 	else
 		td->td_sigstk.ss_flags &= ~SS_ONSTACK;
 #endif
 
 	kern_sigprocmask(td, SIG_SETMASK, &ucp->uc_sigmask, NULL, 0);
 	return (EJUSTRETURN);
 }
 
 /*
  * Machine dependent boot() routine
  *
  * I haven't seen anything to put here yet
  * Possibly some stuff might be grafted back here from boot()
  */
 void
 cpu_boot(int howto)
 {
 }
 
 /*
  * Flush the D-cache for non-DMA I/O so that the I-cache can
  * be made coherent later.
  */
 void
 cpu_flush_dcache(void *ptr, size_t len)
 {
 	/* Not applicable */
 }
 
 /* Get current clock frequency for the given cpu id. */
 int
 cpu_est_clockrate(int cpu_id, uint64_t *rate)
 {
 	uint64_t tsc1, tsc2;
 	uint64_t acnt, mcnt, perf;
 	register_t reg;
 
 	if (pcpu_find(cpu_id) == NULL || rate == NULL)
 		return (EINVAL);
 	if ((cpu_feature & CPUID_TSC) == 0)
 		return (EOPNOTSUPP);
 
 	/*
 	 * If TSC is P-state invariant and APERF/MPERF MSRs do not exist,
 	 * DELAY(9) based logic fails.
 	 */
 	if (tsc_is_invariant && !tsc_perf_stat)
 		return (EOPNOTSUPP);
 
 #ifdef SMP
 	if (smp_cpus > 1) {
 		/* Schedule ourselves on the indicated cpu. */
 		thread_lock(curthread);
 		sched_bind(curthread, cpu_id);
 		thread_unlock(curthread);
 	}
 #endif
 
 	/* Calibrate by measuring a short delay. */
 	reg = intr_disable();
 	if (tsc_is_invariant) {
 		wrmsr(MSR_MPERF, 0);
 		wrmsr(MSR_APERF, 0);
 		tsc1 = rdtsc();
 		DELAY(1000);
 		mcnt = rdmsr(MSR_MPERF);
 		acnt = rdmsr(MSR_APERF);
 		tsc2 = rdtsc();
 		intr_restore(reg);
 		perf = 1000 * acnt / mcnt;
 		*rate = (tsc2 - tsc1) * perf;
 	} else {
 		tsc1 = rdtsc();
 		DELAY(1000);
 		tsc2 = rdtsc();
 		intr_restore(reg);
 		*rate = (tsc2 - tsc1) * 1000;
 	}
 
 #ifdef SMP
 	if (smp_cpus > 1) {
 		thread_lock(curthread);
 		sched_unbind(curthread);
 		thread_unlock(curthread);
 	}
 #endif
 
 	return (0);
 }
 
 #ifdef XEN
 
+static void
+idle_block(void)
+{
+
+	HYPERVISOR_sched_op(SCHEDOP_block, 0);
+}
+
 void
 cpu_halt(void)
 {
 	HYPERVISOR_shutdown(SHUTDOWN_poweroff);
 }
 
 int scheduler_running;
 
 static void
 cpu_idle_hlt(sbintime_t sbt)
 {
 
 	scheduler_running = 1;
 	enable_intr();
 	idle_block();
 }
 
 #else
 /*
  * Shutdown the CPU as much as possible
  */
 void
 cpu_halt(void)
 {
 	for (;;)
 		halt();
 }
 
 #endif
 
 void (*cpu_idle_hook)(sbintime_t) = NULL;	/* ACPI idle hook. */
 static int	cpu_ident_amdc1e = 0;	/* AMD C1E supported. */
 static int	idle_mwait = 1;		/* Use MONITOR/MWAIT for short idle. */
 TUNABLE_INT("machdep.idle_mwait", &idle_mwait);
 SYSCTL_INT(_machdep, OID_AUTO, idle_mwait, CTLFLAG_RW, &idle_mwait,
     0, "Use MONITOR/MWAIT for short idle");
 
 #define	STATE_RUNNING	0x0
 #define	STATE_MWAIT	0x1
 #define	STATE_SLEEPING	0x2
 
 static void
 cpu_idle_acpi(sbintime_t sbt)
 {
 	int *state;
 
 	state = (int *)PCPU_PTR(monitorbuf);
 	*state = STATE_SLEEPING;
 
 	/* See comments in cpu_idle_hlt(). */
 	disable_intr();
 	if (sched_runnable())
 		enable_intr();
 	else if (cpu_idle_hook)
 		cpu_idle_hook(sbt);
 	else
 		__asm __volatile("sti; hlt");
 	*state = STATE_RUNNING;
 }
 
 #ifndef XEN
 static void
 cpu_idle_hlt(sbintime_t sbt)
 {
 	int *state;
 
 	state = (int *)PCPU_PTR(monitorbuf);
 	*state = STATE_SLEEPING;
 
 	/*
 	 * Since we may be in a critical section from cpu_idle(), if
 	 * an interrupt fires during that critical section we may have
 	 * a pending preemption.  If the CPU halts, then that thread
 	 * may not execute until a later interrupt awakens the CPU.
 	 * To handle this race, check for a runnable thread after
 	 * disabling interrupts and immediately return if one is
 	 * found.  Also, we must absolutely guarentee that hlt is
 	 * the next instruction after sti.  This ensures that any
 	 * interrupt that fires after the call to disable_intr() will
 	 * immediately awaken the CPU from hlt.  Finally, please note
 	 * that on x86 this works fine because of interrupts enabled only
 	 * after the instruction following sti takes place, while IF is set
 	 * to 1 immediately, allowing hlt instruction to acknowledge the
 	 * interrupt.
 	 */
 	disable_intr();
 	if (sched_runnable())
 		enable_intr();
 	else
 		__asm __volatile("sti; hlt");
 	*state = STATE_RUNNING;
 }
 #endif
 
 /*
  * MWAIT cpu power states.  Lower 4 bits are sub-states.
  */
 #define	MWAIT_C0	0xf0
 #define	MWAIT_C1	0x00
 #define	MWAIT_C2	0x10
 #define	MWAIT_C3	0x20
 #define	MWAIT_C4	0x30
 
 static void
 cpu_idle_mwait(sbintime_t sbt)
 {
 	int *state;
 
 	state = (int *)PCPU_PTR(monitorbuf);
 	*state = STATE_MWAIT;
 
 	/* See comments in cpu_idle_hlt(). */
 	disable_intr();
 	if (sched_runnable()) {
 		enable_intr();
 		*state = STATE_RUNNING;
 		return;
 	}
 	cpu_monitor(state, 0, 0);
 	if (*state == STATE_MWAIT)
 		__asm __volatile("sti; mwait" : : "a" (MWAIT_C1), "c" (0));
 	else
 		enable_intr();
 	*state = STATE_RUNNING;
 }
 
 static void
 cpu_idle_spin(sbintime_t sbt)
 {
 	int *state;
 	int i;
 
 	state = (int *)PCPU_PTR(monitorbuf);
 	*state = STATE_RUNNING;
 
 	/*
 	 * The sched_runnable() call is racy but as long as there is
 	 * a loop missing it one time will have just a little impact if any 
 	 * (and it is much better than missing the check at all).
 	 */
 	for (i = 0; i < 1000; i++) {
 		if (sched_runnable())
 			return;
 		cpu_spinwait();
 	}
 }
 
 /*
  * C1E renders the local APIC timer dead, so we disable it by
  * reading the Interrupt Pending Message register and clearing
  * both C1eOnCmpHalt (bit 28) and SmiOnCmpHalt (bit 27).
  * 
  * Reference:
  *   "BIOS and Kernel Developer's Guide for AMD NPT Family 0Fh Processors"
  *   #32559 revision 3.00+
  */
 #define	MSR_AMDK8_IPM		0xc0010055
 #define	AMDK8_SMIONCMPHALT	(1ULL << 27)
 #define	AMDK8_C1EONCMPHALT	(1ULL << 28)
 #define	AMDK8_CMPHALT		(AMDK8_SMIONCMPHALT | AMDK8_C1EONCMPHALT)
 
 static void
 cpu_probe_amdc1e(void)
 {
 
 	/*
 	 * Detect the presence of C1E capability mostly on latest
 	 * dual-cores (or future) k8 family.
 	 */
 	if (cpu_vendor_id == CPU_VENDOR_AMD &&
 	    (cpu_id & 0x00000f00) == 0x00000f00 &&
 	    (cpu_id & 0x0fff0000) >=  0x00040000) {
 		cpu_ident_amdc1e = 1;
 	}
 }
 
 #ifdef XEN
 void (*cpu_idle_fn)(sbintime_t) = cpu_idle_hlt;
 #else
 void (*cpu_idle_fn)(sbintime_t) = cpu_idle_acpi;
 #endif
 
 void
 cpu_idle(int busy)
 {
 #ifndef XEN
 	uint64_t msr;
 #endif
 	sbintime_t sbt = -1;
 
 	CTR2(KTR_SPARE2, "cpu_idle(%d) at %d",
 	    busy, curcpu);
 #if defined(MP_WATCHDOG) && !defined(XEN)
 	ap_watchdog(PCPU_GET(cpuid));
 #endif
 #ifndef XEN
 	/* If we are busy - try to use fast methods. */
 	if (busy) {
 		if ((cpu_feature2 & CPUID2_MON) && idle_mwait) {
 			cpu_idle_mwait(busy);
 			goto out;
 		}
 	}
 #endif
 
 	/* If we have time - switch timers into idle mode. */
 	if (!busy) {
 		critical_enter();
 		sbt = cpu_idleclock();
 	}
 
 #ifndef XEN
 	/* Apply AMD APIC timer C1E workaround. */
 	if (cpu_ident_amdc1e && cpu_disable_deep_sleep) {
 		msr = rdmsr(MSR_AMDK8_IPM);
 		if (msr & AMDK8_CMPHALT)
 			wrmsr(MSR_AMDK8_IPM, msr & ~AMDK8_CMPHALT);
 	}
 #endif
 
 	/* Call main idle method. */
 	cpu_idle_fn(sbt);
 
 	/* Switch timers mack into active mode. */
 	if (!busy) {
 		cpu_activeclock();
 		critical_exit();
 	}
 #ifndef XEN
 out:
 #endif
 	CTR2(KTR_SPARE2, "cpu_idle(%d) at %d done",
 	    busy, curcpu);
 }
 
 int
 cpu_idle_wakeup(int cpu)
 {
 	struct pcpu *pcpu;
 	int *state;
 
 	pcpu = pcpu_find(cpu);
 	state = (int *)pcpu->pc_monitorbuf;
 	/*
 	 * This doesn't need to be atomic since missing the race will
 	 * simply result in unnecessary IPIs.
 	 */
 	if (*state == STATE_SLEEPING)
 		return (0);
 	if (*state == STATE_MWAIT)
 		*state = STATE_RUNNING;
 	return (1);
 }
 
 /*
  * Ordered by speed/power consumption.
  */
 struct {
 	void	*id_fn;
 	char	*id_name;
 } idle_tbl[] = {
 	{ cpu_idle_spin, "spin" },
 	{ cpu_idle_mwait, "mwait" },
 	{ cpu_idle_hlt, "hlt" },
 	{ cpu_idle_acpi, "acpi" },
 	{ NULL, NULL }
 };
 
 static int
 idle_sysctl_available(SYSCTL_HANDLER_ARGS)
 {
 	char *avail, *p;
 	int error;
 	int i;
 
 	avail = malloc(256, M_TEMP, M_WAITOK);
 	p = avail;
 	for (i = 0; idle_tbl[i].id_name != NULL; i++) {
 		if (strstr(idle_tbl[i].id_name, "mwait") &&
 		    (cpu_feature2 & CPUID2_MON) == 0)
 			continue;
 		if (strcmp(idle_tbl[i].id_name, "acpi") == 0 &&
 		    cpu_idle_hook == NULL)
 			continue;
 		p += sprintf(p, "%s%s", p != avail ? ", " : "",
 		    idle_tbl[i].id_name);
 	}
 	error = sysctl_handle_string(oidp, avail, 0, req);
 	free(avail, M_TEMP);
 	return (error);
 }
 
 SYSCTL_PROC(_machdep, OID_AUTO, idle_available, CTLTYPE_STRING | CTLFLAG_RD,
     0, 0, idle_sysctl_available, "A", "list of available idle functions");
 
 static int
 idle_sysctl(SYSCTL_HANDLER_ARGS)
 {
 	char buf[16];
 	int error;
 	char *p;
 	int i;
 
 	p = "unknown";
 	for (i = 0; idle_tbl[i].id_name != NULL; i++) {
 		if (idle_tbl[i].id_fn == cpu_idle_fn) {
 			p = idle_tbl[i].id_name;
 			break;
 		}
 	}
 	strncpy(buf, p, sizeof(buf));
 	error = sysctl_handle_string(oidp, buf, sizeof(buf), req);
 	if (error != 0 || req->newptr == NULL)
 		return (error);
 	for (i = 0; idle_tbl[i].id_name != NULL; i++) {
 		if (strstr(idle_tbl[i].id_name, "mwait") &&
 		    (cpu_feature2 & CPUID2_MON) == 0)
 			continue;
 		if (strcmp(idle_tbl[i].id_name, "acpi") == 0 &&
 		    cpu_idle_hook == NULL)
 			continue;
 		if (strcmp(idle_tbl[i].id_name, buf))
 			continue;
 		cpu_idle_fn = idle_tbl[i].id_fn;
 		return (0);
 	}
 	return (EINVAL);
 }
 
 SYSCTL_PROC(_machdep, OID_AUTO, idle, CTLTYPE_STRING | CTLFLAG_RW, 0, 0,
     idle_sysctl, "A", "currently selected idle function");
 
 /*
  * Reset registers to default values on exec.
  */
 void
 exec_setregs(struct thread *td, struct image_params *imgp, u_long stack)
 {
 	struct trapframe *regs = td->td_frame;
 	struct pcb *pcb = td->td_pcb;
 
 	/* Reset pc->pcb_gs and %gs before possibly invalidating it. */
 	pcb->pcb_gs = _udatasel;
 	load_gs(_udatasel);
 
 	mtx_lock_spin(&dt_lock);
 	if (td->td_proc->p_md.md_ldt)
 		user_ldt_free(td);
 	else
 		mtx_unlock_spin(&dt_lock);
   
 	bzero((char *)regs, sizeof(struct trapframe));
 	regs->tf_eip = imgp->entry_addr;
 	regs->tf_esp = stack;
 	regs->tf_eflags = PSL_USER | (regs->tf_eflags & PSL_T);
 	regs->tf_ss = _udatasel;
 	regs->tf_ds = _udatasel;
 	regs->tf_es = _udatasel;
 	regs->tf_fs = _udatasel;
 	regs->tf_cs = _ucodesel;
 
 	/* PS_STRINGS value for BSD/OS binaries.  It is 0 for non-BSD/OS. */
 	regs->tf_ebx = imgp->ps_strings;
 
         /*
          * Reset the hardware debug registers if they were in use.
          * They won't have any meaning for the newly exec'd process.  
          */
         if (pcb->pcb_flags & PCB_DBREGS) {
                 pcb->pcb_dr0 = 0;
                 pcb->pcb_dr1 = 0;
                 pcb->pcb_dr2 = 0;
                 pcb->pcb_dr3 = 0;
                 pcb->pcb_dr6 = 0;
                 pcb->pcb_dr7 = 0;
                 if (pcb == curpcb) {
 		        /*
 			 * Clear the debug registers on the running
 			 * CPU, otherwise they will end up affecting
 			 * the next process we switch to.
 			 */
 		        reset_dbregs();
                 }
                 pcb->pcb_flags &= ~PCB_DBREGS;
         }
 
 	/*
 	 * Initialize the math emulator (if any) for the current process.
 	 * Actually, just clear the bit that says that the emulator has
 	 * been initialized.  Initialization is delayed until the process
 	 * traps to the emulator (if it is done at all) mainly because
 	 * emulators don't provide an entry point for initialization.
 	 */
 	td->td_pcb->pcb_flags &= ~FP_SOFTFP;
 	pcb->pcb_initial_npxcw = __INITIAL_NPXCW__;
 
 	/*
 	 * Drop the FP state if we hold it, so that the process gets a
 	 * clean FP state if it uses the FPU again.
 	 */
 	fpstate_drop(td);
 
 	/*
 	 * XXX - Linux emulator
 	 * Make sure sure edx is 0x0 on entry. Linux binaries depend
 	 * on it.
 	 */
 	td->td_retval[1] = 0;
 }
 
 void
 cpu_setregs(void)
 {
 	unsigned int cr0;
 
 	cr0 = rcr0();
 
 	/*
 	 * CR0_MP, CR0_NE and CR0_TS are set for NPX (FPU) support:
 	 *
 	 * Prepare to trap all ESC (i.e., NPX) instructions and all WAIT
 	 * instructions.  We must set the CR0_MP bit and use the CR0_TS
 	 * bit to control the trap, because setting the CR0_EM bit does
 	 * not cause WAIT instructions to trap.  It's important to trap
 	 * WAIT instructions - otherwise the "wait" variants of no-wait
 	 * control instructions would degenerate to the "no-wait" variants
 	 * after FP context switches but work correctly otherwise.  It's
 	 * particularly important to trap WAITs when there is no NPX -
 	 * otherwise the "wait" variants would always degenerate.
 	 *
 	 * Try setting CR0_NE to get correct error reporting on 486DX's.
 	 * Setting it should fail or do nothing on lesser processors.
 	 */
 	cr0 |= CR0_MP | CR0_NE | CR0_TS | CR0_WP | CR0_AM;
 	load_cr0(cr0);
 	load_gs(_udatasel);
 }
 
 u_long bootdev;		/* not a struct cdev *- encoding is different */
 SYSCTL_ULONG(_machdep, OID_AUTO, guessed_bootdev,
 	CTLFLAG_RD, &bootdev, 0, "Maybe the Boot device (not in struct cdev *format)");
 
 /*
  * Initialize 386 and configure to run kernel
  */
 
 /*
  * Initialize segments & interrupt table
  */
 
 int _default_ldt;
 
 #ifdef XEN
 union descriptor *gdt;
 union descriptor *ldt;
 #else
 union descriptor gdt[NGDT * MAXCPU];	/* global descriptor table */
 union descriptor ldt[NLDT];		/* local descriptor table */
 #endif
 static struct gate_descriptor idt0[NIDT];
 struct gate_descriptor *idt = &idt0[0];	/* interrupt descriptor table */
 struct region_descriptor r_gdt, r_idt;	/* table descriptors */
 struct mtx dt_lock;			/* lock for GDT and LDT */
 
 #if defined(I586_CPU) && !defined(NO_F00F_HACK)
 extern int has_f00f_bug;
 #endif
 
 static struct i386tss dblfault_tss;
 static char dblfault_stack[PAGE_SIZE];
 
 extern  vm_offset_t	proc0kstack;
 
 
 /*
  * software prototypes -- in more palatable form.
  *
  * GCODE_SEL through GUDATA_SEL must be in this order for syscall/sysret
  * GUFS_SEL and GUGS_SEL must be in this order (swtch.s knows it)
  */
 struct soft_segment_descriptor gdt_segs[] = {
 /* GNULL_SEL	0 Null Descriptor */
 {	.ssd_base = 0x0,
 	.ssd_limit = 0x0,
 	.ssd_type = 0,
 	.ssd_dpl = SEL_KPL,
 	.ssd_p = 0,
 	.ssd_xx = 0, .ssd_xx1 = 0,
 	.ssd_def32 = 0,
 	.ssd_gran = 0		},
 /* GPRIV_SEL	1 SMP Per-Processor Private Data Descriptor */
 {	.ssd_base = 0x0,
 	.ssd_limit = 0xfffff,
 	.ssd_type = SDT_MEMRWA,
 	.ssd_dpl = SEL_KPL,
 	.ssd_p = 1,
 	.ssd_xx = 0, .ssd_xx1 = 0,
 	.ssd_def32 = 1,
 	.ssd_gran = 1		},
 /* GUFS_SEL	2 %fs Descriptor for user */
 {	.ssd_base = 0x0,
 	.ssd_limit = 0xfffff,
 	.ssd_type = SDT_MEMRWA,
 	.ssd_dpl = SEL_UPL,
 	.ssd_p = 1,
 	.ssd_xx = 0, .ssd_xx1 = 0,
 	.ssd_def32 = 1,
 	.ssd_gran = 1		},
 /* GUGS_SEL	3 %gs Descriptor for user */
 {	.ssd_base = 0x0,
 	.ssd_limit = 0xfffff,
 	.ssd_type = SDT_MEMRWA,
 	.ssd_dpl = SEL_UPL,
 	.ssd_p = 1,
 	.ssd_xx = 0, .ssd_xx1 = 0,
 	.ssd_def32 = 1,
 	.ssd_gran = 1		},
 /* GCODE_SEL	4 Code Descriptor for kernel */
 {	.ssd_base = 0x0,
 	.ssd_limit = 0xfffff,
 	.ssd_type = SDT_MEMERA,
 	.ssd_dpl = SEL_KPL,
 	.ssd_p = 1,
 	.ssd_xx = 0, .ssd_xx1 = 0,
 	.ssd_def32 = 1,
 	.ssd_gran = 1		},
 /* GDATA_SEL	5 Data Descriptor for kernel */
 {	.ssd_base = 0x0,
 	.ssd_limit = 0xfffff,
 	.ssd_type = SDT_MEMRWA,
 	.ssd_dpl = SEL_KPL,
 	.ssd_p = 1,
 	.ssd_xx = 0, .ssd_xx1 = 0,
 	.ssd_def32 = 1,
 	.ssd_gran = 1		},
 /* GUCODE_SEL	6 Code Descriptor for user */
 {	.ssd_base = 0x0,
 	.ssd_limit = 0xfffff,
 	.ssd_type = SDT_MEMERA,
 	.ssd_dpl = SEL_UPL,
 	.ssd_p = 1,
 	.ssd_xx = 0, .ssd_xx1 = 0,
 	.ssd_def32 = 1,
 	.ssd_gran = 1		},
 /* GUDATA_SEL	7 Data Descriptor for user */
 {	.ssd_base = 0x0,
 	.ssd_limit = 0xfffff,
 	.ssd_type = SDT_MEMRWA,
 	.ssd_dpl = SEL_UPL,
 	.ssd_p = 1,
 	.ssd_xx = 0, .ssd_xx1 = 0,
 	.ssd_def32 = 1,
 	.ssd_gran = 1		},
 /* GBIOSLOWMEM_SEL 8 BIOS access to realmode segment 0x40, must be #8 in GDT */
 {	.ssd_base = 0x400,
 	.ssd_limit = 0xfffff,
 	.ssd_type = SDT_MEMRWA,
 	.ssd_dpl = SEL_KPL,
 	.ssd_p = 1,
 	.ssd_xx = 0, .ssd_xx1 = 0,
 	.ssd_def32 = 1,
 	.ssd_gran = 1		},
 #ifndef XEN
 /* GPROC0_SEL	9 Proc 0 Tss Descriptor */
 {
 	.ssd_base = 0x0,
 	.ssd_limit = sizeof(struct i386tss)-1,
 	.ssd_type = SDT_SYS386TSS,
 	.ssd_dpl = 0,
 	.ssd_p = 1,
 	.ssd_xx = 0, .ssd_xx1 = 0,
 	.ssd_def32 = 0,
 	.ssd_gran = 0		},
 /* GLDT_SEL	10 LDT Descriptor */
 {	.ssd_base = (int) ldt,
 	.ssd_limit = sizeof(ldt)-1,
 	.ssd_type = SDT_SYSLDT,
 	.ssd_dpl = SEL_UPL,
 	.ssd_p = 1,
 	.ssd_xx = 0, .ssd_xx1 = 0,
 	.ssd_def32 = 0,
 	.ssd_gran = 0		},
 /* GUSERLDT_SEL	11 User LDT Descriptor per process */
 {	.ssd_base = (int) ldt,
 	.ssd_limit = (512 * sizeof(union descriptor)-1),
 	.ssd_type = SDT_SYSLDT,
 	.ssd_dpl = 0,
 	.ssd_p = 1,
 	.ssd_xx = 0, .ssd_xx1 = 0,
 	.ssd_def32 = 0,
 	.ssd_gran = 0		},
 /* GPANIC_SEL	12 Panic Tss Descriptor */
 {	.ssd_base = (int) &dblfault_tss,
 	.ssd_limit = sizeof(struct i386tss)-1,
 	.ssd_type = SDT_SYS386TSS,
 	.ssd_dpl = 0,
 	.ssd_p = 1,
 	.ssd_xx = 0, .ssd_xx1 = 0,
 	.ssd_def32 = 0,
 	.ssd_gran = 0		},
 /* GBIOSCODE32_SEL 13 BIOS 32-bit interface (32bit Code) */
 {	.ssd_base = 0,
 	.ssd_limit = 0xfffff,
 	.ssd_type = SDT_MEMERA,
 	.ssd_dpl = 0,
 	.ssd_p = 1,
 	.ssd_xx = 0, .ssd_xx1 = 0,
 	.ssd_def32 = 0,
 	.ssd_gran = 1		},
 /* GBIOSCODE16_SEL 14 BIOS 32-bit interface (16bit Code) */
 {	.ssd_base = 0,
 	.ssd_limit = 0xfffff,
 	.ssd_type = SDT_MEMERA,
 	.ssd_dpl = 0,
 	.ssd_p = 1,
 	.ssd_xx = 0, .ssd_xx1 = 0,
 	.ssd_def32 = 0,
 	.ssd_gran = 1		},
 /* GBIOSDATA_SEL 15 BIOS 32-bit interface (Data) */
 {	.ssd_base = 0,
 	.ssd_limit = 0xfffff,
 	.ssd_type = SDT_MEMRWA,
 	.ssd_dpl = 0,
 	.ssd_p = 1,
 	.ssd_xx = 0, .ssd_xx1 = 0,
 	.ssd_def32 = 1,
 	.ssd_gran = 1		},
 /* GBIOSUTIL_SEL 16 BIOS 16-bit interface (Utility) */
 {	.ssd_base = 0,
 	.ssd_limit = 0xfffff,
 	.ssd_type = SDT_MEMRWA,
 	.ssd_dpl = 0,
 	.ssd_p = 1,
 	.ssd_xx = 0, .ssd_xx1 = 0,
 	.ssd_def32 = 0,
 	.ssd_gran = 1		},
 /* GBIOSARGS_SEL 17 BIOS 16-bit interface (Arguments) */
 {	.ssd_base = 0,
 	.ssd_limit = 0xfffff,
 	.ssd_type = SDT_MEMRWA,
 	.ssd_dpl = 0,
 	.ssd_p = 1,
 	.ssd_xx = 0, .ssd_xx1 = 0,
 	.ssd_def32 = 0,
 	.ssd_gran = 1		},
 /* GNDIS_SEL	18 NDIS Descriptor */
 {	.ssd_base = 0x0,
 	.ssd_limit = 0x0,
 	.ssd_type = 0,
 	.ssd_dpl = 0,
 	.ssd_p = 0,
 	.ssd_xx = 0, .ssd_xx1 = 0,
 	.ssd_def32 = 0,
 	.ssd_gran = 0		},
 #endif /* !XEN */
 };
 
 static struct soft_segment_descriptor ldt_segs[] = {
 	/* Null Descriptor - overwritten by call gate */
 {	.ssd_base = 0x0,
 	.ssd_limit = 0x0,
 	.ssd_type = 0,
 	.ssd_dpl = 0,
 	.ssd_p = 0,
 	.ssd_xx = 0, .ssd_xx1 = 0,
 	.ssd_def32 = 0,
 	.ssd_gran = 0		},
 	/* Null Descriptor - overwritten by call gate */
 {	.ssd_base = 0x0,
 	.ssd_limit = 0x0,
 	.ssd_type = 0,
 	.ssd_dpl = 0,
 	.ssd_p = 0,
 	.ssd_xx = 0, .ssd_xx1 = 0,
 	.ssd_def32 = 0,
 	.ssd_gran = 0		},
 	/* Null Descriptor - overwritten by call gate */
 {	.ssd_base = 0x0,
 	.ssd_limit = 0x0,
 	.ssd_type = 0,
 	.ssd_dpl = 0,
 	.ssd_p = 0,
 	.ssd_xx = 0, .ssd_xx1 = 0,
 	.ssd_def32 = 0,
 	.ssd_gran = 0		},
 	/* Code Descriptor for user */
 {	.ssd_base = 0x0,
 	.ssd_limit = 0xfffff,
 	.ssd_type = SDT_MEMERA,
 	.ssd_dpl = SEL_UPL,
 	.ssd_p = 1,
 	.ssd_xx = 0, .ssd_xx1 = 0,
 	.ssd_def32 = 1,
 	.ssd_gran = 1		},
 	/* Null Descriptor - overwritten by call gate */
 {	.ssd_base = 0x0,
 	.ssd_limit = 0x0,
 	.ssd_type = 0,
 	.ssd_dpl = 0,
 	.ssd_p = 0,
 	.ssd_xx = 0, .ssd_xx1 = 0,
 	.ssd_def32 = 0,
 	.ssd_gran = 0		},
 	/* Data Descriptor for user */
 {	.ssd_base = 0x0,
 	.ssd_limit = 0xfffff,
 	.ssd_type = SDT_MEMRWA,
 	.ssd_dpl = SEL_UPL,
 	.ssd_p = 1,
 	.ssd_xx = 0, .ssd_xx1 = 0,
 	.ssd_def32 = 1,
 	.ssd_gran = 1		},
 };
 
 void
 setidt(idx, func, typ, dpl, selec)
 	int idx;
 	inthand_t *func;
 	int typ;
 	int dpl;
 	int selec;
 {
 	struct gate_descriptor *ip;
 
 	ip = idt + idx;
 	ip->gd_looffset = (int)func;
 	ip->gd_selector = selec;
 	ip->gd_stkcpy = 0;
 	ip->gd_xx = 0;
 	ip->gd_type = typ;
 	ip->gd_dpl = dpl;
 	ip->gd_p = 1;
 	ip->gd_hioffset = ((int)func)>>16 ;
 }
 
 extern inthand_t
 	IDTVEC(div), IDTVEC(dbg), IDTVEC(nmi), IDTVEC(bpt), IDTVEC(ofl),
 	IDTVEC(bnd), IDTVEC(ill), IDTVEC(dna), IDTVEC(fpusegm),
 	IDTVEC(tss), IDTVEC(missing), IDTVEC(stk), IDTVEC(prot),
 	IDTVEC(page), IDTVEC(mchk), IDTVEC(rsvd), IDTVEC(fpu), IDTVEC(align),
 	IDTVEC(xmm),
 #ifdef KDTRACE_HOOKS
 	IDTVEC(dtrace_ret),
+#endif
+#ifdef XENHVM
+	IDTVEC(xen_intr_upcall),
 #endif
 	IDTVEC(lcall_syscall), IDTVEC(int0x80_syscall);
 
 #ifdef DDB
 /*
  * Display the index and function name of any IDT entries that don't use
  * the default 'rsvd' entry point.
  */
 DB_SHOW_COMMAND(idt, db_show_idt)
 {
 	struct gate_descriptor *ip;
 	int idx;
 	uintptr_t func;
 
 	ip = idt;
 	for (idx = 0; idx < NIDT && !db_pager_quit; idx++) {
 		func = (ip->gd_hioffset << 16 | ip->gd_looffset);
 		if (func != (uintptr_t)&IDTVEC(rsvd)) {
 			db_printf("%3d\t", idx);
 			db_printsym(func, DB_STGY_PROC);
 			db_printf("\n");
 		}
 		ip++;
 	}
 }
 
 /* Show privileged registers. */
 DB_SHOW_COMMAND(sysregs, db_show_sysregs)
 {
 	uint64_t idtr, gdtr;
 
 	idtr = ridt();
 	db_printf("idtr\t0x%08x/%04x\n",
 	    (u_int)(idtr >> 16), (u_int)idtr & 0xffff);
 	gdtr = rgdt();
 	db_printf("gdtr\t0x%08x/%04x\n",
 	    (u_int)(gdtr >> 16), (u_int)gdtr & 0xffff);
 	db_printf("ldtr\t0x%04x\n", rldt());
 	db_printf("tr\t0x%04x\n", rtr());
 	db_printf("cr0\t0x%08x\n", rcr0());
 	db_printf("cr2\t0x%08x\n", rcr2());
 	db_printf("cr3\t0x%08x\n", rcr3());
 	db_printf("cr4\t0x%08x\n", rcr4());
 }
 #endif
 
 void
 sdtossd(sd, ssd)
 	struct segment_descriptor *sd;
 	struct soft_segment_descriptor *ssd;
 {
 	ssd->ssd_base  = (sd->sd_hibase << 24) | sd->sd_lobase;
 	ssd->ssd_limit = (sd->sd_hilimit << 16) | sd->sd_lolimit;
 	ssd->ssd_type  = sd->sd_type;
 	ssd->ssd_dpl   = sd->sd_dpl;
 	ssd->ssd_p     = sd->sd_p;
 	ssd->ssd_def32 = sd->sd_def32;
 	ssd->ssd_gran  = sd->sd_gran;
 }
 
 #ifndef XEN
 static int
 add_smap_entry(struct bios_smap *smap, vm_paddr_t *physmap, int *physmap_idxp)
 {
 	int i, insert_idx, physmap_idx;
 
 	physmap_idx = *physmap_idxp;
 	
 	if (boothowto & RB_VERBOSE)
 		printf("SMAP type=%02x base=%016llx len=%016llx\n",
 		    smap->type, smap->base, smap->length);
 
 	if (smap->type != SMAP_TYPE_MEMORY)
 		return (1);
 
 	if (smap->length == 0)
 		return (1);
 
 #ifndef PAE
 	if (smap->base > 0xffffffff) {
 		printf("%uK of memory above 4GB ignored\n",
 		    (u_int)(smap->length / 1024));
 		return (1);
 	}
 #endif
 
 	/*
 	 * Find insertion point while checking for overlap.  Start off by
 	 * assuming the new entry will be added to the end.
 	 */
 	insert_idx = physmap_idx + 2;
 	for (i = 0; i <= physmap_idx; i += 2) {
 		if (smap->base < physmap[i + 1]) {
 			if (smap->base + smap->length <= physmap[i]) {
 				insert_idx = i;
 				break;
 			}
 			if (boothowto & RB_VERBOSE)
 				printf(
 		    "Overlapping memory regions, ignoring second region\n");
 			return (1);
 		}
 	}
 
 	/* See if we can prepend to the next entry. */
 	if (insert_idx <= physmap_idx &&
 	    smap->base + smap->length == physmap[insert_idx]) {
 		physmap[insert_idx] = smap->base;
 		return (1);
 	}
 
 	/* See if we can append to the previous entry. */
 	if (insert_idx > 0 && smap->base == physmap[insert_idx - 1]) {
 		physmap[insert_idx - 1] += smap->length;
 		return (1);
 	}
 
 	physmap_idx += 2;
 	*physmap_idxp = physmap_idx;
 	if (physmap_idx == PHYSMAP_SIZE) {
 		printf(
 		"Too many segments in the physical address map, giving up\n");
 		return (0);
 	}
 
 	/*
 	 * Move the last 'N' entries down to make room for the new
 	 * entry if needed.
 	 */
 	for (i = physmap_idx; i > insert_idx; i -= 2) {
 		physmap[i] = physmap[i - 2];
 		physmap[i + 1] = physmap[i - 1];
 	}
 
 	/* Insert the new entry. */
 	physmap[insert_idx] = smap->base;
 	physmap[insert_idx + 1] = smap->base + smap->length;
 	return (1);
 }
 
 static void
 basemem_setup(void)
 {
 	vm_paddr_t pa;
 	pt_entry_t *pte;
 	int i;
 
 	if (basemem > 640) {
 		printf("Preposterous BIOS basemem of %uK, truncating to 640K\n",
 			basemem);
 		basemem = 640;
 	}
 
 	/*
 	 * XXX if biosbasemem is now < 640, there is a `hole'
 	 * between the end of base memory and the start of
 	 * ISA memory.  The hole may be empty or it may
 	 * contain BIOS code or data.  Map it read/write so
 	 * that the BIOS can write to it.  (Memory from 0 to
 	 * the physical end of the kernel is mapped read-only
 	 * to begin with and then parts of it are remapped.
 	 * The parts that aren't remapped form holes that
 	 * remain read-only and are unused by the kernel.
 	 * The base memory area is below the physical end of
 	 * the kernel and right now forms a read-only hole.
 	 * The part of it from PAGE_SIZE to
 	 * (trunc_page(biosbasemem * 1024) - 1) will be
 	 * remapped and used by the kernel later.)
 	 *
 	 * This code is similar to the code used in
 	 * pmap_mapdev, but since no memory needs to be
 	 * allocated we simply change the mapping.
 	 */
 	for (pa = trunc_page(basemem * 1024);
 	     pa < ISA_HOLE_START; pa += PAGE_SIZE)
 		pmap_kenter(KERNBASE + pa, pa);
 
 	/*
 	 * Map pages between basemem and ISA_HOLE_START, if any, r/w into
 	 * the vm86 page table so that vm86 can scribble on them using
 	 * the vm86 map too.  XXX: why 2 ways for this and only 1 way for
 	 * page 0, at least as initialized here?
 	 */
 	pte = (pt_entry_t *)vm86paddr;
 	for (i = basemem / 4; i < 160; i++)
 		pte[i] = (i << PAGE_SHIFT) | PG_V | PG_RW | PG_U;
 }
 #endif
 
 /*
  * Populate the (physmap) array with base/bound pairs describing the
  * available physical memory in the system, then test this memory and
  * build the phys_avail array describing the actually-available memory.
  *
  * If we cannot accurately determine the physical memory map, then use
  * value from the 0xE801 call, and failing that, the RTC.
  *
  * Total memory size may be set by the kernel environment variable
  * hw.physmem or the compile-time define MAXMEM.
  *
  * XXX first should be vm_paddr_t.
  */
 static void
 getmemsize(int first)
 {
 	int has_smap, off, physmap_idx, pa_indx, da_indx;
 	u_long physmem_tunable, memtest;
 	vm_paddr_t physmap[PHYSMAP_SIZE];
 	pt_entry_t *pte;
 	quad_t dcons_addr, dcons_size;
 #ifndef XEN
 	int hasbrokenint12, i, res;
 	u_int extmem;
 	struct vm86frame vmf;
 	struct vm86context vmc;
 	vm_paddr_t pa;
 	struct bios_smap *smap, *smapbase, *smapend;
 	u_int32_t smapsize;
 	caddr_t kmdp;
 #endif
 
 	has_smap = 0;
 #if defined(XEN)
 	Maxmem = xen_start_info->nr_pages - init_first;
 	physmem = Maxmem;
 	basemem = 0;
 	physmap[0] = init_first << PAGE_SHIFT;
 	physmap[1] = ptoa(Maxmem) - round_page(msgbufsize);
 	physmap_idx = 0;
 #else
 #ifdef XBOX
 	if (arch_i386_is_xbox) {
 		/*
 		 * We queried the memory size before, so chop off 4MB for
 		 * the framebuffer and inform the OS of this.
 		 */
 		physmap[0] = 0;
 		physmap[1] = (arch_i386_xbox_memsize * 1024 * 1024) - XBOX_FB_SIZE;
 		physmap_idx = 0;
 		goto physmap_done;
 	}
 #endif
 	bzero(&vmf, sizeof(vmf));
 	bzero(physmap, sizeof(physmap));
 	basemem = 0;
 
 	/*
 	 * Check if the loader supplied an SMAP memory map.  If so,
 	 * use that and do not make any VM86 calls.
 	 */
 	physmap_idx = 0;
 	smapbase = NULL;
 	kmdp = preload_search_by_type("elf kernel");
 	if (kmdp == NULL)
 		kmdp = preload_search_by_type("elf32 kernel");
 	if (kmdp != NULL)
 		smapbase = (struct bios_smap *)preload_search_info(kmdp,
 		    MODINFO_METADATA | MODINFOMD_SMAP);
 	if (smapbase != NULL) {
 		/*
 		 * subr_module.c says:
 		 * "Consumer may safely assume that size value precedes data."
 		 * ie: an int32_t immediately precedes SMAP.
 		 */
 		smapsize = *((u_int32_t *)smapbase - 1);
 		smapend = (struct bios_smap *)((uintptr_t)smapbase + smapsize);
 		has_smap = 1;
 
 		for (smap = smapbase; smap < smapend; smap++)
 			if (!add_smap_entry(smap, physmap, &physmap_idx))
 				break;
 		goto have_smap;
 	}
 
 	/*
 	 * Some newer BIOSes have a broken INT 12H implementation
 	 * which causes a kernel panic immediately.  In this case, we
 	 * need use the SMAP to determine the base memory size.
 	 */
 	hasbrokenint12 = 0;
 	TUNABLE_INT_FETCH("hw.hasbrokenint12", &hasbrokenint12);
 	if (hasbrokenint12 == 0) {
 		/* Use INT12 to determine base memory size. */
 		vm86_intcall(0x12, &vmf);
 		basemem = vmf.vmf_ax;
 		basemem_setup();
 	}
 
 	/*
 	 * Fetch the memory map with INT 15:E820.  Map page 1 R/W into
 	 * the kernel page table so we can use it as a buffer.  The
 	 * kernel will unmap this page later.
 	 */
 	pmap_kenter(KERNBASE + (1 << PAGE_SHIFT), 1 << PAGE_SHIFT);
 	vmc.npages = 0;
 	smap = (void *)vm86_addpage(&vmc, 1, KERNBASE + (1 << PAGE_SHIFT));
 	res = vm86_getptr(&vmc, (vm_offset_t)smap, &vmf.vmf_es, &vmf.vmf_di);
 	KASSERT(res != 0, ("vm86_getptr() failed: address not found"));
 
 	vmf.vmf_ebx = 0;
 	do {
 		vmf.vmf_eax = 0xE820;
 		vmf.vmf_edx = SMAP_SIG;
 		vmf.vmf_ecx = sizeof(struct bios_smap);
 		i = vm86_datacall(0x15, &vmf, &vmc);
 		if (i || vmf.vmf_eax != SMAP_SIG)
 			break;
 		has_smap = 1;
 		if (!add_smap_entry(smap, physmap, &physmap_idx))
 			break;
 	} while (vmf.vmf_ebx != 0);
 
 have_smap:
 	/*
 	 * If we didn't fetch the "base memory" size from INT12,
 	 * figure it out from the SMAP (or just guess).
 	 */
 	if (basemem == 0) {
 		for (i = 0; i <= physmap_idx; i += 2) {
 			if (physmap[i] == 0x00000000) {
 				basemem = physmap[i + 1] / 1024;
 				break;
 			}
 		}
 
 		/* XXX: If we couldn't find basemem from SMAP, just guess. */
 		if (basemem == 0)
 			basemem = 640;
 		basemem_setup();
 	}
 
 	if (physmap[1] != 0)
 		goto physmap_done;
 
 	/*
 	 * If we failed to find an SMAP, figure out the extended
 	 * memory size.  We will then build a simple memory map with
 	 * two segments, one for "base memory" and the second for
 	 * "extended memory".  Note that "extended memory" starts at a
 	 * physical address of 1MB and that both basemem and extmem
 	 * are in units of 1KB.
 	 *
 	 * First, try to fetch the extended memory size via INT 15:E801.
 	 */
 	vmf.vmf_ax = 0xE801;
 	if (vm86_intcall(0x15, &vmf) == 0) {
 		extmem = vmf.vmf_cx + vmf.vmf_dx * 64;
 	} else {
 		/*
 		 * If INT15:E801 fails, this is our last ditch effort
 		 * to determine the extended memory size.  Currently
 		 * we prefer the RTC value over INT15:88.
 		 */
 #if 0
 		vmf.vmf_ah = 0x88;
 		vm86_intcall(0x15, &vmf);
 		extmem = vmf.vmf_ax;
 #else
 		extmem = rtcin(RTC_EXTLO) + (rtcin(RTC_EXTHI) << 8);
 #endif
 	}
 
 	/*
 	 * Special hack for chipsets that still remap the 384k hole when
 	 * there's 16MB of memory - this really confuses people that
 	 * are trying to use bus mastering ISA controllers with the
 	 * "16MB limit"; they only have 16MB, but the remapping puts
 	 * them beyond the limit.
 	 *
 	 * If extended memory is between 15-16MB (16-17MB phys address range),
 	 *	chop it to 15MB.
 	 */
 	if ((extmem > 15 * 1024) && (extmem < 16 * 1024))
 		extmem = 15 * 1024;
 
 	physmap[0] = 0;
 	physmap[1] = basemem * 1024;
 	physmap_idx = 2;
 	physmap[physmap_idx] = 0x100000;
 	physmap[physmap_idx + 1] = physmap[physmap_idx] + extmem * 1024;
 
 physmap_done:
 #endif	
 	/*
 	 * Now, physmap contains a map of physical memory.
 	 */
 
 #ifdef SMP
 	/* make hole for AP bootstrap code */
 	physmap[1] = mp_bootaddress(physmap[1]);
 #endif
 
 	/*
 	 * Maxmem isn't the "maximum memory", it's one larger than the
 	 * highest page of the physical address space.  It should be
 	 * called something like "Maxphyspage".  We may adjust this 
 	 * based on ``hw.physmem'' and the results of the memory test.
 	 */
 	Maxmem = atop(physmap[physmap_idx + 1]);
 
 #ifdef MAXMEM
 	Maxmem = MAXMEM / 4;
 #endif
 
 	if (TUNABLE_ULONG_FETCH("hw.physmem", &physmem_tunable))
 		Maxmem = atop(physmem_tunable);
 
 	/*
 	 * If we have an SMAP, don't allow MAXMEM or hw.physmem to extend
 	 * the amount of memory in the system.
 	 */
 	if (has_smap && Maxmem > atop(physmap[physmap_idx + 1]))
 		Maxmem = atop(physmap[physmap_idx + 1]);
 
 	/*
 	 * By default enable the memory test on real hardware, and disable
 	 * it if we appear to be running in a VM.  This avoids touching all
 	 * pages unnecessarily, which doesn't matter on real hardware but is
 	 * bad for shared VM hosts.  Use a general name so that
 	 * one could eventually do more with the code than just disable it.
 	 */
 	memtest = (vm_guest > VM_GUEST_NO) ? 0 : 1;
 	TUNABLE_ULONG_FETCH("hw.memtest.tests", &memtest);
 
 	if (atop(physmap[physmap_idx + 1]) != Maxmem &&
 	    (boothowto & RB_VERBOSE))
 		printf("Physical memory use set to %ldK\n", Maxmem * 4);
 
 	/*
 	 * If Maxmem has been increased beyond what the system has detected,
 	 * extend the last memory segment to the new limit.
 	 */ 
 	if (atop(physmap[physmap_idx + 1]) < Maxmem)
 		physmap[physmap_idx + 1] = ptoa((vm_paddr_t)Maxmem);
 
 	/* call pmap initialization to make new kernel address space */
 	pmap_bootstrap(first);
 
 	/*
 	 * Size up each available chunk of physical memory.
 	 */
 	physmap[0] = PAGE_SIZE;		/* mask off page 0 */
 	pa_indx = 0;
 	da_indx = 1;
 	phys_avail[pa_indx++] = physmap[0];
 	phys_avail[pa_indx] = physmap[0];
 	dump_avail[da_indx] = physmap[0];
 	pte = CMAP1;
 
 	/*
 	 * Get dcons buffer address
 	 */
 	if (getenv_quad("dcons.addr", &dcons_addr) == 0 ||
 	    getenv_quad("dcons.size", &dcons_size) == 0)
 		dcons_addr = 0;
 
 #ifndef XEN
 	/*
 	 * physmap is in bytes, so when converting to page boundaries,
 	 * round up the start address and round down the end address.
 	 */
 	for (i = 0; i <= physmap_idx; i += 2) {
 		vm_paddr_t end;
 
 		end = ptoa((vm_paddr_t)Maxmem);
 		if (physmap[i + 1] < end)
 			end = trunc_page(physmap[i + 1]);
 		for (pa = round_page(physmap[i]); pa < end; pa += PAGE_SIZE) {
 			int tmp, page_bad, full;
 			int *ptr = (int *)CADDR1;
 
 			full = FALSE;
 			/*
 			 * block out kernel memory as not available.
 			 */
 			if (pa >= KERNLOAD && pa < first)
 				goto do_dump_avail;
 
 			/*
 			 * block out dcons buffer
 			 */
 			if (dcons_addr > 0
 			    && pa >= trunc_page(dcons_addr)
 			    && pa < dcons_addr + dcons_size)
 				goto do_dump_avail;
 
 			page_bad = FALSE;
 			if (memtest == 0)
 				goto skip_memtest;
 
 			/*
 			 * map page into kernel: valid, read/write,non-cacheable
 			 */
 			*pte = pa | PG_V | PG_RW | PG_N;
 			invltlb();
 
 			tmp = *(int *)ptr;
 			/*
 			 * Test for alternating 1's and 0's
 			 */
 			*(volatile int *)ptr = 0xaaaaaaaa;
 			if (*(volatile int *)ptr != 0xaaaaaaaa)
 				page_bad = TRUE;
 			/*
 			 * Test for alternating 0's and 1's
 			 */
 			*(volatile int *)ptr = 0x55555555;
 			if (*(volatile int *)ptr != 0x55555555)
 				page_bad = TRUE;
 			/*
 			 * Test for all 1's
 			 */
 			*(volatile int *)ptr = 0xffffffff;
 			if (*(volatile int *)ptr != 0xffffffff)
 				page_bad = TRUE;
 			/*
 			 * Test for all 0's
 			 */
 			*(volatile int *)ptr = 0x0;
 			if (*(volatile int *)ptr != 0x0)
 				page_bad = TRUE;
 			/*
 			 * Restore original value.
 			 */
 			*(int *)ptr = tmp;
 
 skip_memtest:
 			/*
 			 * Adjust array of valid/good pages.
 			 */
 			if (page_bad == TRUE)
 				continue;
 			/*
 			 * If this good page is a continuation of the
 			 * previous set of good pages, then just increase
 			 * the end pointer. Otherwise start a new chunk.
 			 * Note that "end" points one higher than end,
 			 * making the range >= start and < end.
 			 * If we're also doing a speculative memory
 			 * test and we at or past the end, bump up Maxmem
 			 * so that we keep going. The first bad page
 			 * will terminate the loop.
 			 */
 			if (phys_avail[pa_indx] == pa) {
 				phys_avail[pa_indx] += PAGE_SIZE;
 			} else {
 				pa_indx++;
 				if (pa_indx == PHYS_AVAIL_ARRAY_END) {
 					printf(
 		"Too many holes in the physical address space, giving up\n");
 					pa_indx--;
 					full = TRUE;
 					goto do_dump_avail;
 				}
 				phys_avail[pa_indx++] = pa;	/* start */
 				phys_avail[pa_indx] = pa + PAGE_SIZE; /* end */
 			}
 			physmem++;
 do_dump_avail:
 			if (dump_avail[da_indx] == pa) {
 				dump_avail[da_indx] += PAGE_SIZE;
 			} else {
 				da_indx++;
 				if (da_indx == DUMP_AVAIL_ARRAY_END) {
 					da_indx--;
 					goto do_next;
 				}
 				dump_avail[da_indx++] = pa;	/* start */
 				dump_avail[da_indx] = pa + PAGE_SIZE; /* end */
 			}
 do_next:
 			if (full)
 				break;
 		}
 	}
 	*pte = 0;
 	invltlb();
 #else
 	phys_avail[0] = physfree;
 	phys_avail[1] = xen_start_info->nr_pages*PAGE_SIZE;
 	dump_avail[0] = 0;	
 	dump_avail[1] = xen_start_info->nr_pages*PAGE_SIZE;
 	
 #endif
 	
 	/*
 	 * XXX
 	 * The last chunk must contain at least one page plus the message
 	 * buffer to avoid complicating other code (message buffer address
 	 * calculation, etc.).
 	 */
 	while (phys_avail[pa_indx - 1] + PAGE_SIZE +
 	    round_page(msgbufsize) >= phys_avail[pa_indx]) {
 		physmem -= atop(phys_avail[pa_indx] - phys_avail[pa_indx - 1]);
 		phys_avail[pa_indx--] = 0;
 		phys_avail[pa_indx--] = 0;
 	}
 
 	Maxmem = atop(phys_avail[pa_indx]);
 
 	/* Trim off space for the message buffer. */
 	phys_avail[pa_indx] -= round_page(msgbufsize);
 
 	/* Map the message buffer. */
 	for (off = 0; off < round_page(msgbufsize); off += PAGE_SIZE)
 		pmap_kenter((vm_offset_t)msgbufp + off, phys_avail[pa_indx] +
 		    off);
 
 	PT_UPDATES_FLUSH();
 }
 
 #ifdef XEN
 #define MTOPSIZE (1<<(14 + PAGE_SHIFT))
 
 void
 init386(first)
 	int first;
 {
 	unsigned long gdtmachpfn;
 	int error, gsel_tss, metadata_missing, x, pa;
 	size_t kstack0_sz;
 	struct pcpu *pc;
 	struct callback_register event = {
 		.type = CALLBACKTYPE_event,
 		.address = {GSEL(GCODE_SEL, SEL_KPL), (unsigned long)Xhypervisor_callback },
 	};
 	struct callback_register failsafe = {
 		.type = CALLBACKTYPE_failsafe,
 		.address = {GSEL(GCODE_SEL, SEL_KPL), (unsigned long)failsafe_callback },
 	};
 
 	thread0.td_kstack = proc0kstack;
 	thread0.td_kstack_pages = KSTACK_PAGES;
 	kstack0_sz = thread0.td_kstack_pages * PAGE_SIZE;
 	thread0.td_pcb = (struct pcb *)(thread0.td_kstack + kstack0_sz) - 1;
 
 	/*
  	 * This may be done better later if it gets more high level
  	 * components in it. If so just link td->td_proc here.
 	 */
 	proc_linkup0(&proc0, &thread0);
 
 	metadata_missing = 0;
 	if (xen_start_info->mod_start) {
 		preload_metadata = (caddr_t)xen_start_info->mod_start;
 		preload_bootstrap_relocate(KERNBASE);
 	} else {
 		metadata_missing = 1;
 	}
 	if (envmode == 1)
 		kern_envp = static_env;
 	else if ((caddr_t)xen_start_info->cmd_line)
 	        kern_envp = xen_setbootenv((caddr_t)xen_start_info->cmd_line);
 
 	boothowto |= xen_boothowto(kern_envp);
 	
 	/* Init basic tunables, hz etc */
 	init_param1();
 
 	/*
 	 * XEN occupies a portion of the upper virtual address space 
 	 * At its base it manages an array mapping machine page frames 
 	 * to physical page frames - hence we need to be able to 
 	 * access 4GB - (64MB  - 4MB + 64k) 
 	 */
 	gdt_segs[GPRIV_SEL].ssd_limit = atop(HYPERVISOR_VIRT_START + MTOPSIZE);
 	gdt_segs[GUFS_SEL].ssd_limit = atop(HYPERVISOR_VIRT_START + MTOPSIZE);
 	gdt_segs[GUGS_SEL].ssd_limit = atop(HYPERVISOR_VIRT_START + MTOPSIZE);
 	gdt_segs[GCODE_SEL].ssd_limit = atop(HYPERVISOR_VIRT_START + MTOPSIZE);
 	gdt_segs[GDATA_SEL].ssd_limit = atop(HYPERVISOR_VIRT_START + MTOPSIZE);
 	gdt_segs[GUCODE_SEL].ssd_limit = atop(HYPERVISOR_VIRT_START + MTOPSIZE);
 	gdt_segs[GUDATA_SEL].ssd_limit = atop(HYPERVISOR_VIRT_START + MTOPSIZE);
 	gdt_segs[GBIOSLOWMEM_SEL].ssd_limit = atop(HYPERVISOR_VIRT_START + MTOPSIZE);
 
 	pc = &__pcpu[0];
 	gdt_segs[GPRIV_SEL].ssd_base = (int) pc;
 	gdt_segs[GPROC0_SEL].ssd_base = (int) &pc->pc_common_tss;
 
 	PT_SET_MA(gdt, xpmap_ptom(VTOP(gdt)) | PG_V | PG_RW);
 	bzero(gdt, PAGE_SIZE);
 	for (x = 0; x < NGDT; x++)
 		ssdtosd(&gdt_segs[x], &gdt[x].sd);
 
 	mtx_init(&dt_lock, "descriptor tables", NULL, MTX_SPIN);
 
 	gdtmachpfn = vtomach(gdt) >> PAGE_SHIFT;
 	PT_SET_MA(gdt, xpmap_ptom(VTOP(gdt)) | PG_V);
 	PANIC_IF(HYPERVISOR_set_gdt(&gdtmachpfn, 512) != 0);	
 	lgdt(&r_gdt);
 	gdtset = 1;
 
 	if ((error = HYPERVISOR_set_trap_table(trap_table)) != 0) {
 		panic("set_trap_table failed - error %d\n", error);
 	}
 	
 	error = HYPERVISOR_callback_op(CALLBACKOP_register, &event);
 	if (error == 0)
 		error = HYPERVISOR_callback_op(CALLBACKOP_register, &failsafe);
 #if	CONFIG_XEN_COMPAT <= 0x030002
 	if (error == -ENOXENSYS)
 		HYPERVISOR_set_callbacks(GSEL(GCODE_SEL, SEL_KPL),
 		    (unsigned long)Xhypervisor_callback,
 		    GSEL(GCODE_SEL, SEL_KPL), (unsigned long)failsafe_callback);
 #endif
 	pcpu_init(pc, 0, sizeof(struct pcpu));
 	for (pa = first; pa < first + DPCPU_SIZE; pa += PAGE_SIZE)
 		pmap_kenter(pa + KERNBASE, pa);
 	dpcpu_init((void *)(first + KERNBASE), 0);
 	first += DPCPU_SIZE;
 	physfree += DPCPU_SIZE;
 	init_first += DPCPU_SIZE / PAGE_SIZE;
 
 	PCPU_SET(prvspace, pc);
 	PCPU_SET(curthread, &thread0);
 	PCPU_SET(curpcb, thread0.td_pcb);
 
 	/*
 	 * Initialize mutexes.
 	 *
 	 * icu_lock: in order to allow an interrupt to occur in a critical
 	 * 	     section, to set pcpu->ipending (etc...) properly, we
 	 *	     must be able to get the icu lock, so it can't be
 	 *	     under witness.
 	 */
 	mutex_init();
 	mtx_init(&icu_lock, "icu", NULL, MTX_SPIN | MTX_NOWITNESS | MTX_NOPROFILE);
 
 	/* make ldt memory segments */
 	PT_SET_MA(ldt, xpmap_ptom(VTOP(ldt)) | PG_V | PG_RW);
 	bzero(ldt, PAGE_SIZE);
 	ldt_segs[LUCODE_SEL].ssd_limit = atop(0 - 1);
 	ldt_segs[LUDATA_SEL].ssd_limit = atop(0 - 1);
 	for (x = 0; x < sizeof ldt_segs / sizeof ldt_segs[0]; x++)
 		ssdtosd(&ldt_segs[x], &ldt[x].sd);
 
 	default_proc_ldt.ldt_base = (caddr_t)ldt;
 	default_proc_ldt.ldt_len = 6;
 	_default_ldt = (int)&default_proc_ldt;
 	PCPU_SET(currentldt, _default_ldt);
 	PT_SET_MA(ldt, *vtopte((unsigned long)ldt) & ~PG_RW);
 	xen_set_ldt((unsigned long) ldt, (sizeof ldt_segs / sizeof ldt_segs[0]));
 	
 #if defined(XEN_PRIVILEGED)
 	/*
 	 * Initialize the i8254 before the console so that console
 	 * initialization can use DELAY().
 	 */
 	i8254_init();
 #endif
 	
 	/*
 	 * Initialize the console before we print anything out.
 	 */
 	cninit();
 
 	if (metadata_missing)
 		printf("WARNING: loader(8) metadata is missing!\n");
 
 #ifdef DEV_ISA
 #ifdef DEV_ATPIC
 	elcr_probe();
 	atpic_startup();
 #else
 	/* Reset and mask the atpics and leave them shut down. */
 	atpic_reset();
 
 	/*
 	 * Point the ICU spurious interrupt vectors at the APIC spurious
 	 * interrupt handler.
 	 */
 	setidt(IDT_IO_INTS + 7, IDTVEC(spuriousint), SDT_SYS386IGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(IDT_IO_INTS + 15, IDTVEC(spuriousint), SDT_SYS386IGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 #endif
 #endif
 
 #ifdef DDB
 	ksym_start = bootinfo.bi_symtab;
 	ksym_end = bootinfo.bi_esymtab;
 #endif
 
 	kdb_init();
 
 #ifdef KDB
 	if (boothowto & RB_KDB)
 		kdb_enter(KDB_WHY_BOOTFLAGS, "Boot flags requested debugger");
 #endif
 
 	finishidentcpu();	/* Final stage of CPU initialization */
 	setidt(IDT_UD, &IDTVEC(ill),  SDT_SYS386TGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(IDT_GP, &IDTVEC(prot),  SDT_SYS386TGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	initializecpu();	/* Initialize CPU registers */
 
 	/* make an initial tss so cpu can get interrupt stack on syscall! */
 	/* Note: -16 is so we can grow the trapframe if we came from vm86 */
 	PCPU_SET(common_tss.tss_esp0, thread0.td_kstack +
 	    kstack0_sz - sizeof(struct pcb) - 16);
 	PCPU_SET(common_tss.tss_ss0, GSEL(GDATA_SEL, SEL_KPL));
 	gsel_tss = GSEL(GPROC0_SEL, SEL_KPL);
 	HYPERVISOR_stack_switch(GSEL(GDATA_SEL, SEL_KPL),
 	    PCPU_GET(common_tss.tss_esp0));
 	
 	/* pointer to selector slot for %fs/%gs */
 	PCPU_SET(fsgs_gdt, &gdt[GUFS_SEL].sd);
 
 	dblfault_tss.tss_esp = dblfault_tss.tss_esp0 = dblfault_tss.tss_esp1 =
 	    dblfault_tss.tss_esp2 = (int)&dblfault_stack[sizeof(dblfault_stack)];
 	dblfault_tss.tss_ss = dblfault_tss.tss_ss0 = dblfault_tss.tss_ss1 =
 	    dblfault_tss.tss_ss2 = GSEL(GDATA_SEL, SEL_KPL);
 #ifdef PAE
 	dblfault_tss.tss_cr3 = (int)IdlePDPT;
 #else
 	dblfault_tss.tss_cr3 = (int)IdlePTD;
 #endif
 	dblfault_tss.tss_eip = (int)dblfault_handler;
 	dblfault_tss.tss_eflags = PSL_KERNEL;
 	dblfault_tss.tss_ds = dblfault_tss.tss_es =
 	    dblfault_tss.tss_gs = GSEL(GDATA_SEL, SEL_KPL);
 	dblfault_tss.tss_fs = GSEL(GPRIV_SEL, SEL_KPL);
 	dblfault_tss.tss_cs = GSEL(GCODE_SEL, SEL_KPL);
 	dblfault_tss.tss_ldt = GSEL(GLDT_SEL, SEL_KPL);
 
 	vm86_initialize();
 	getmemsize(first);
 	init_param2(physmem);
 
 	/* now running on new page tables, configured,and u/iom is accessible */
 
 	msgbufinit(msgbufp, msgbufsize);
 	/* transfer to user mode */
 
 	_ucodesel = GSEL(GUCODE_SEL, SEL_UPL);
 	_udatasel = GSEL(GUDATA_SEL, SEL_UPL);
 
 	/* setup proc 0's pcb */
 	thread0.td_pcb->pcb_flags = 0;
 #ifdef PAE
 	thread0.td_pcb->pcb_cr3 = (int)IdlePDPT;
 #else
 	thread0.td_pcb->pcb_cr3 = (int)IdlePTD;
 #endif
 	thread0.td_pcb->pcb_ext = 0;
 	thread0.td_frame = &proc0_tf;
 	thread0.td_pcb->pcb_fsd = PCPU_GET(fsgs_gdt)[0];
 	thread0.td_pcb->pcb_gsd = PCPU_GET(fsgs_gdt)[1];
 
 	cpu_probe_amdc1e();
 }
 
 #else
 void
 init386(first)
 	int first;
 {
 	struct gate_descriptor *gdp;
 	int gsel_tss, metadata_missing, x, pa;
 	size_t kstack0_sz;
 	struct pcpu *pc;
 
 	thread0.td_kstack = proc0kstack;
 	thread0.td_kstack_pages = KSTACK_PAGES;
 	kstack0_sz = thread0.td_kstack_pages * PAGE_SIZE;
 	thread0.td_pcb = (struct pcb *)(thread0.td_kstack + kstack0_sz) - 1;
 
 	/*
  	 * This may be done better later if it gets more high level
  	 * components in it. If so just link td->td_proc here.
 	 */
 	proc_linkup0(&proc0, &thread0);
 
 	metadata_missing = 0;
 	if (bootinfo.bi_modulep) {
 		preload_metadata = (caddr_t)bootinfo.bi_modulep + KERNBASE;
 		preload_bootstrap_relocate(KERNBASE);
 	} else {
 		metadata_missing = 1;
 	}
 	if (envmode == 1)
 		kern_envp = static_env;
 	else if (bootinfo.bi_envp)
 		kern_envp = (caddr_t)bootinfo.bi_envp + KERNBASE;
 
 	/* Init basic tunables, hz etc */
 	init_param1();
 
 	/*
 	 * Make gdt memory segments.  All segments cover the full 4GB
 	 * of address space and permissions are enforced at page level.
 	 */
 	gdt_segs[GCODE_SEL].ssd_limit = atop(0 - 1);
 	gdt_segs[GDATA_SEL].ssd_limit = atop(0 - 1);
 	gdt_segs[GUCODE_SEL].ssd_limit = atop(0 - 1);
 	gdt_segs[GUDATA_SEL].ssd_limit = atop(0 - 1);
 	gdt_segs[GUFS_SEL].ssd_limit = atop(0 - 1);
 	gdt_segs[GUGS_SEL].ssd_limit = atop(0 - 1);
 
 	pc = &__pcpu[0];
 	gdt_segs[GPRIV_SEL].ssd_limit = atop(0 - 1);
 	gdt_segs[GPRIV_SEL].ssd_base = (int) pc;
 	gdt_segs[GPROC0_SEL].ssd_base = (int) &pc->pc_common_tss;
 
 	for (x = 0; x < NGDT; x++)
 		ssdtosd(&gdt_segs[x], &gdt[x].sd);
 
 	r_gdt.rd_limit = NGDT * sizeof(gdt[0]) - 1;
 	r_gdt.rd_base =  (int) gdt;
 	mtx_init(&dt_lock, "descriptor tables", NULL, MTX_SPIN);
 	lgdt(&r_gdt);
 
 	pcpu_init(pc, 0, sizeof(struct pcpu));
 	for (pa = first; pa < first + DPCPU_SIZE; pa += PAGE_SIZE)
 		pmap_kenter(pa + KERNBASE, pa);
 	dpcpu_init((void *)(first + KERNBASE), 0);
 	first += DPCPU_SIZE;
 	PCPU_SET(prvspace, pc);
 	PCPU_SET(curthread, &thread0);
 	PCPU_SET(curpcb, thread0.td_pcb);
 
 	/*
 	 * Initialize mutexes.
 	 *
 	 * icu_lock: in order to allow an interrupt to occur in a critical
 	 * 	     section, to set pcpu->ipending (etc...) properly, we
 	 *	     must be able to get the icu lock, so it can't be
 	 *	     under witness.
 	 */
 	mutex_init();
 	mtx_init(&icu_lock, "icu", NULL, MTX_SPIN | MTX_NOWITNESS | MTX_NOPROFILE);
 
 	/* make ldt memory segments */
 	ldt_segs[LUCODE_SEL].ssd_limit = atop(0 - 1);
 	ldt_segs[LUDATA_SEL].ssd_limit = atop(0 - 1);
 	for (x = 0; x < sizeof ldt_segs / sizeof ldt_segs[0]; x++)
 		ssdtosd(&ldt_segs[x], &ldt[x].sd);
 
 	_default_ldt = GSEL(GLDT_SEL, SEL_KPL);
 	lldt(_default_ldt);
 	PCPU_SET(currentldt, _default_ldt);
 
 	/* exceptions */
 	for (x = 0; x < NIDT; x++)
 		setidt(x, &IDTVEC(rsvd), SDT_SYS386TGT, SEL_KPL,
 		    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(IDT_DE, &IDTVEC(div),  SDT_SYS386TGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(IDT_DB, &IDTVEC(dbg),  SDT_SYS386IGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(IDT_NMI, &IDTVEC(nmi),  SDT_SYS386IGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
  	setidt(IDT_BP, &IDTVEC(bpt),  SDT_SYS386IGT, SEL_UPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(IDT_OF, &IDTVEC(ofl),  SDT_SYS386TGT, SEL_UPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(IDT_BR, &IDTVEC(bnd),  SDT_SYS386TGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(IDT_UD, &IDTVEC(ill),  SDT_SYS386TGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(IDT_NM, &IDTVEC(dna),  SDT_SYS386TGT, SEL_KPL
 	    , GSEL(GCODE_SEL, SEL_KPL));
 	setidt(IDT_DF, 0,  SDT_SYSTASKGT, SEL_KPL, GSEL(GPANIC_SEL, SEL_KPL));
 	setidt(IDT_FPUGP, &IDTVEC(fpusegm),  SDT_SYS386TGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(IDT_TS, &IDTVEC(tss),  SDT_SYS386TGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(IDT_NP, &IDTVEC(missing),  SDT_SYS386TGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(IDT_SS, &IDTVEC(stk),  SDT_SYS386TGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(IDT_GP, &IDTVEC(prot),  SDT_SYS386TGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(IDT_PF, &IDTVEC(page),  SDT_SYS386IGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(IDT_MF, &IDTVEC(fpu),  SDT_SYS386TGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(IDT_AC, &IDTVEC(align), SDT_SYS386TGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(IDT_MC, &IDTVEC(mchk),  SDT_SYS386TGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(IDT_XF, &IDTVEC(xmm), SDT_SYS386TGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
  	setidt(IDT_SYSCALL, &IDTVEC(int0x80_syscall), SDT_SYS386TGT, SEL_UPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 #ifdef KDTRACE_HOOKS
 	setidt(IDT_DTRACE_RET, &IDTVEC(dtrace_ret), SDT_SYS386TGT, SEL_UPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 #endif
+#ifdef XENHVM
+	setidt(IDT_EVTCHN, &IDTVEC(xen_intr_upcall), SDT_SYS386IGT, SEL_UPL,
+	    GSEL(GCODE_SEL, SEL_KPL));
+#endif
 
 	r_idt.rd_limit = sizeof(idt0) - 1;
 	r_idt.rd_base = (int) idt;
 	lidt(&r_idt);
 
 #ifdef XBOX
 	/*
 	 * The following code queries the PCI ID of 0:0:0. For the XBOX,
 	 * This should be 0x10de / 0x02a5.
 	 *
 	 * This is exactly what Linux does.
 	 */
 	outl(0xcf8, 0x80000000);
 	if (inl(0xcfc) == 0x02a510de) {
 		arch_i386_is_xbox = 1;
 		pic16l_setled(XBOX_LED_GREEN);
 
 		/*
 		 * We are an XBOX, but we may have either 64MB or 128MB of
 		 * memory. The PCI host bridge should be programmed for this,
 		 * so we just query it. 
 		 */
 		outl(0xcf8, 0x80000084);
 		arch_i386_xbox_memsize = (inl(0xcfc) == 0x7FFFFFF) ? 128 : 64;
 	}
 #endif /* XBOX */
 
 	/*
 	 * Initialize the i8254 before the console so that console
 	 * initialization can use DELAY().
 	 */
 	i8254_init();
 
 	/*
 	 * Initialize the console before we print anything out.
 	 */
 	cninit();
 
 	if (metadata_missing)
 		printf("WARNING: loader(8) metadata is missing!\n");
 
 #ifdef DEV_ISA
 #ifdef DEV_ATPIC
 	elcr_probe();
 	atpic_startup();
 #else
 	/* Reset and mask the atpics and leave them shut down. */
 	atpic_reset();
 
 	/*
 	 * Point the ICU spurious interrupt vectors at the APIC spurious
 	 * interrupt handler.
 	 */
 	setidt(IDT_IO_INTS + 7, IDTVEC(spuriousint), SDT_SYS386IGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(IDT_IO_INTS + 15, IDTVEC(spuriousint), SDT_SYS386IGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 #endif
 #endif
 
 #ifdef DDB
 	ksym_start = bootinfo.bi_symtab;
 	ksym_end = bootinfo.bi_esymtab;
 #endif
 
 	kdb_init();
 
 #ifdef KDB
 	if (boothowto & RB_KDB)
 		kdb_enter(KDB_WHY_BOOTFLAGS, "Boot flags requested debugger");
 #endif
 
 	finishidentcpu();	/* Final stage of CPU initialization */
 	setidt(IDT_UD, &IDTVEC(ill),  SDT_SYS386TGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	setidt(IDT_GP, &IDTVEC(prot),  SDT_SYS386TGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 	initializecpu();	/* Initialize CPU registers */
 
 	/* make an initial tss so cpu can get interrupt stack on syscall! */
 	/* Note: -16 is so we can grow the trapframe if we came from vm86 */
 	PCPU_SET(common_tss.tss_esp0, thread0.td_kstack +
 	    kstack0_sz - sizeof(struct pcb) - 16);
 	PCPU_SET(common_tss.tss_ss0, GSEL(GDATA_SEL, SEL_KPL));
 	gsel_tss = GSEL(GPROC0_SEL, SEL_KPL);
 	PCPU_SET(tss_gdt, &gdt[GPROC0_SEL].sd);
 	PCPU_SET(common_tssd, *PCPU_GET(tss_gdt));
 	PCPU_SET(common_tss.tss_ioopt, (sizeof (struct i386tss)) << 16);
 	ltr(gsel_tss);
 
 	/* pointer to selector slot for %fs/%gs */
 	PCPU_SET(fsgs_gdt, &gdt[GUFS_SEL].sd);
 
 	dblfault_tss.tss_esp = dblfault_tss.tss_esp0 = dblfault_tss.tss_esp1 =
 	    dblfault_tss.tss_esp2 = (int)&dblfault_stack[sizeof(dblfault_stack)];
 	dblfault_tss.tss_ss = dblfault_tss.tss_ss0 = dblfault_tss.tss_ss1 =
 	    dblfault_tss.tss_ss2 = GSEL(GDATA_SEL, SEL_KPL);
 #ifdef PAE
 	dblfault_tss.tss_cr3 = (int)IdlePDPT;
 #else
 	dblfault_tss.tss_cr3 = (int)IdlePTD;
 #endif
 	dblfault_tss.tss_eip = (int)dblfault_handler;
 	dblfault_tss.tss_eflags = PSL_KERNEL;
 	dblfault_tss.tss_ds = dblfault_tss.tss_es =
 	    dblfault_tss.tss_gs = GSEL(GDATA_SEL, SEL_KPL);
 	dblfault_tss.tss_fs = GSEL(GPRIV_SEL, SEL_KPL);
 	dblfault_tss.tss_cs = GSEL(GCODE_SEL, SEL_KPL);
 	dblfault_tss.tss_ldt = GSEL(GLDT_SEL, SEL_KPL);
 
 	vm86_initialize();
 	getmemsize(first);
 	init_param2(physmem);
 
 	/* now running on new page tables, configured,and u/iom is accessible */
 
 	msgbufinit(msgbufp, msgbufsize);
 
 	/* make a call gate to reenter kernel with */
 	gdp = &ldt[LSYS5CALLS_SEL].gd;
 
 	x = (int) &IDTVEC(lcall_syscall);
 	gdp->gd_looffset = x;
 	gdp->gd_selector = GSEL(GCODE_SEL,SEL_KPL);
 	gdp->gd_stkcpy = 1;
 	gdp->gd_type = SDT_SYS386CGT;
 	gdp->gd_dpl = SEL_UPL;
 	gdp->gd_p = 1;
 	gdp->gd_hioffset = x >> 16;
 
 	/* XXX does this work? */
 	/* XXX yes! */
 	ldt[LBSDICALLS_SEL] = ldt[LSYS5CALLS_SEL];
 	ldt[LSOL26CALLS_SEL] = ldt[LSYS5CALLS_SEL];
 
 	/* transfer to user mode */
 
 	_ucodesel = GSEL(GUCODE_SEL, SEL_UPL);
 	_udatasel = GSEL(GUDATA_SEL, SEL_UPL);
 
 	/* setup proc 0's pcb */
 	thread0.td_pcb->pcb_flags = 0;
 #ifdef PAE
 	thread0.td_pcb->pcb_cr3 = (int)IdlePDPT;
 #else
 	thread0.td_pcb->pcb_cr3 = (int)IdlePTD;
 #endif
 	thread0.td_pcb->pcb_ext = 0;
 	thread0.td_frame = &proc0_tf;
 
 	cpu_probe_amdc1e();
 
 #ifdef FDT
 	x86_init_fdt();
 #endif
 }
 #endif
 
 void
 cpu_pcpu_init(struct pcpu *pcpu, int cpuid, size_t size)
 {
 
 	pcpu->pc_acpi_id = 0xffffffff;
 }
 
 void
 spinlock_enter(void)
 {
 	struct thread *td;
 	register_t flags;
 
 	td = curthread;
 	if (td->td_md.md_spinlock_count == 0) {
 		flags = intr_disable();
 		td->td_md.md_spinlock_count = 1;
 		td->td_md.md_saved_flags = flags;
 	} else
 		td->td_md.md_spinlock_count++;
 	critical_enter();
 }
 
 void
 spinlock_exit(void)
 {
 	struct thread *td;
 	register_t flags;
 
 	td = curthread;
 	critical_exit();
 	flags = td->td_md.md_saved_flags;
 	td->td_md.md_spinlock_count--;
 	if (td->td_md.md_spinlock_count == 0)
 		intr_restore(flags);
 }
 
 #if defined(I586_CPU) && !defined(NO_F00F_HACK)
 static void f00f_hack(void *unused);
 SYSINIT(f00f_hack, SI_SUB_INTRINSIC, SI_ORDER_FIRST, f00f_hack, NULL);
 
 static void
 f00f_hack(void *unused)
 {
 	struct gate_descriptor *new_idt;
 	vm_offset_t tmp;
 
 	if (!has_f00f_bug)
 		return;
 
 	GIANT_REQUIRED;
 
 	printf("Intel Pentium detected, installing workaround for F00F bug\n");
 
 	tmp = kmem_malloc(kernel_arena, PAGE_SIZE * 2, M_WAITOK | M_ZERO);
 	if (tmp == 0)
 		panic("kmem_malloc returned 0");
 
 	/* Put the problematic entry (#6) at the end of the lower page. */
 	new_idt = (struct gate_descriptor*)
 	    (tmp + PAGE_SIZE - 7 * sizeof(struct gate_descriptor));
 	bcopy(idt, new_idt, sizeof(idt0));
 	r_idt.rd_base = (u_int)new_idt;
 	lidt(&r_idt);
 	idt = new_idt;
 	pmap_protect(kernel_pmap, tmp, tmp + PAGE_SIZE, VM_PROT_READ);
 }
 #endif /* defined(I586_CPU) && !NO_F00F_HACK */
 
 /*
  * Construct a PCB from a trapframe. This is called from kdb_trap() where
  * we want to start a backtrace from the function that caused us to enter
  * the debugger. We have the context in the trapframe, but base the trace
  * on the PCB. The PCB doesn't have to be perfect, as long as it contains
  * enough for a backtrace.
  */
 void
 makectx(struct trapframe *tf, struct pcb *pcb)
 {
 
 	pcb->pcb_edi = tf->tf_edi;
 	pcb->pcb_esi = tf->tf_esi;
 	pcb->pcb_ebp = tf->tf_ebp;
 	pcb->pcb_ebx = tf->tf_ebx;
 	pcb->pcb_eip = tf->tf_eip;
 	pcb->pcb_esp = (ISPL(tf->tf_cs)) ? tf->tf_esp : (int)(tf + 1) - 8;
 }
 
 int
 ptrace_set_pc(struct thread *td, u_long addr)
 {
 
 	td->td_frame->tf_eip = addr;
 	return (0);
 }
 
 int
 ptrace_single_step(struct thread *td)
 {
 	td->td_frame->tf_eflags |= PSL_T;
 	return (0);
 }
 
 int
 ptrace_clear_single_step(struct thread *td)
 {
 	td->td_frame->tf_eflags &= ~PSL_T;
 	return (0);
 }
 
 int
 fill_regs(struct thread *td, struct reg *regs)
 {
 	struct pcb *pcb;
 	struct trapframe *tp;
 
 	tp = td->td_frame;
 	pcb = td->td_pcb;
 	regs->r_gs = pcb->pcb_gs;
 	return (fill_frame_regs(tp, regs));
 }
 
 int
 fill_frame_regs(struct trapframe *tp, struct reg *regs)
 {
 	regs->r_fs = tp->tf_fs;
 	regs->r_es = tp->tf_es;
 	regs->r_ds = tp->tf_ds;
 	regs->r_edi = tp->tf_edi;
 	regs->r_esi = tp->tf_esi;
 	regs->r_ebp = tp->tf_ebp;
 	regs->r_ebx = tp->tf_ebx;
 	regs->r_edx = tp->tf_edx;
 	regs->r_ecx = tp->tf_ecx;
 	regs->r_eax = tp->tf_eax;
 	regs->r_eip = tp->tf_eip;
 	regs->r_cs = tp->tf_cs;
 	regs->r_eflags = tp->tf_eflags;
 	regs->r_esp = tp->tf_esp;
 	regs->r_ss = tp->tf_ss;
 	return (0);
 }
 
 int
 set_regs(struct thread *td, struct reg *regs)
 {
 	struct pcb *pcb;
 	struct trapframe *tp;
 
 	tp = td->td_frame;
 	if (!EFL_SECURE(regs->r_eflags, tp->tf_eflags) ||
 	    !CS_SECURE(regs->r_cs))
 		return (EINVAL);
 	pcb = td->td_pcb;
 	tp->tf_fs = regs->r_fs;
 	tp->tf_es = regs->r_es;
 	tp->tf_ds = regs->r_ds;
 	tp->tf_edi = regs->r_edi;
 	tp->tf_esi = regs->r_esi;
 	tp->tf_ebp = regs->r_ebp;
 	tp->tf_ebx = regs->r_ebx;
 	tp->tf_edx = regs->r_edx;
 	tp->tf_ecx = regs->r_ecx;
 	tp->tf_eax = regs->r_eax;
 	tp->tf_eip = regs->r_eip;
 	tp->tf_cs = regs->r_cs;
 	tp->tf_eflags = regs->r_eflags;
 	tp->tf_esp = regs->r_esp;
 	tp->tf_ss = regs->r_ss;
 	pcb->pcb_gs = regs->r_gs;
 	return (0);
 }
 
 #ifdef CPU_ENABLE_SSE
 static void
 fill_fpregs_xmm(sv_xmm, sv_87)
 	struct savexmm *sv_xmm;
 	struct save87 *sv_87;
 {
 	register struct env87 *penv_87 = &sv_87->sv_env;
 	register struct envxmm *penv_xmm = &sv_xmm->sv_env;
 	int i;
 
 	bzero(sv_87, sizeof(*sv_87));
 
 	/* FPU control/status */
 	penv_87->en_cw = penv_xmm->en_cw;
 	penv_87->en_sw = penv_xmm->en_sw;
 	penv_87->en_tw = penv_xmm->en_tw;
 	penv_87->en_fip = penv_xmm->en_fip;
 	penv_87->en_fcs = penv_xmm->en_fcs;
 	penv_87->en_opcode = penv_xmm->en_opcode;
 	penv_87->en_foo = penv_xmm->en_foo;
 	penv_87->en_fos = penv_xmm->en_fos;
 
 	/* FPU registers */
 	for (i = 0; i < 8; ++i)
 		sv_87->sv_ac[i] = sv_xmm->sv_fp[i].fp_acc;
 }
 
 static void
 set_fpregs_xmm(sv_87, sv_xmm)
 	struct save87 *sv_87;
 	struct savexmm *sv_xmm;
 {
 	register struct env87 *penv_87 = &sv_87->sv_env;
 	register struct envxmm *penv_xmm = &sv_xmm->sv_env;
 	int i;
 
 	/* FPU control/status */
 	penv_xmm->en_cw = penv_87->en_cw;
 	penv_xmm->en_sw = penv_87->en_sw;
 	penv_xmm->en_tw = penv_87->en_tw;
 	penv_xmm->en_fip = penv_87->en_fip;
 	penv_xmm->en_fcs = penv_87->en_fcs;
 	penv_xmm->en_opcode = penv_87->en_opcode;
 	penv_xmm->en_foo = penv_87->en_foo;
 	penv_xmm->en_fos = penv_87->en_fos;
 
 	/* FPU registers */
 	for (i = 0; i < 8; ++i)
 		sv_xmm->sv_fp[i].fp_acc = sv_87->sv_ac[i];
 }
 #endif /* CPU_ENABLE_SSE */
 
 int
 fill_fpregs(struct thread *td, struct fpreg *fpregs)
 {
 
 	KASSERT(td == curthread || TD_IS_SUSPENDED(td) ||
 	    P_SHOULDSTOP(td->td_proc),
 	    ("not suspended thread %p", td));
 #ifdef DEV_NPX
 	npxgetregs(td);
 #else
 	bzero(fpregs, sizeof(*fpregs));
 #endif
 #ifdef CPU_ENABLE_SSE
 	if (cpu_fxsr)
 		fill_fpregs_xmm(&td->td_pcb->pcb_user_save.sv_xmm,
 		    (struct save87 *)fpregs);
 	else
 #endif /* CPU_ENABLE_SSE */
 		bcopy(&td->td_pcb->pcb_user_save.sv_87, fpregs,
 		    sizeof(*fpregs));
 	return (0);
 }
 
 int
 set_fpregs(struct thread *td, struct fpreg *fpregs)
 {
 
 #ifdef CPU_ENABLE_SSE
 	if (cpu_fxsr)
 		set_fpregs_xmm((struct save87 *)fpregs,
 		    &td->td_pcb->pcb_user_save.sv_xmm);
 	else
 #endif /* CPU_ENABLE_SSE */
 		bcopy(fpregs, &td->td_pcb->pcb_user_save.sv_87,
 		    sizeof(*fpregs));
 #ifdef DEV_NPX
 	npxuserinited(td);
 #endif
 	return (0);
 }
 
 /*
  * Get machine context.
  */
 int
 get_mcontext(struct thread *td, mcontext_t *mcp, int flags)
 {
 	struct trapframe *tp;
 	struct segment_descriptor *sdp;
 
 	tp = td->td_frame;
 
 	PROC_LOCK(curthread->td_proc);
 	mcp->mc_onstack = sigonstack(tp->tf_esp);
 	PROC_UNLOCK(curthread->td_proc);
 	mcp->mc_gs = td->td_pcb->pcb_gs;
 	mcp->mc_fs = tp->tf_fs;
 	mcp->mc_es = tp->tf_es;
 	mcp->mc_ds = tp->tf_ds;
 	mcp->mc_edi = tp->tf_edi;
 	mcp->mc_esi = tp->tf_esi;
 	mcp->mc_ebp = tp->tf_ebp;
 	mcp->mc_isp = tp->tf_isp;
 	mcp->mc_eflags = tp->tf_eflags;
 	if (flags & GET_MC_CLEAR_RET) {
 		mcp->mc_eax = 0;
 		mcp->mc_edx = 0;
 		mcp->mc_eflags &= ~PSL_C;
 	} else {
 		mcp->mc_eax = tp->tf_eax;
 		mcp->mc_edx = tp->tf_edx;
 	}
 	mcp->mc_ebx = tp->tf_ebx;
 	mcp->mc_ecx = tp->tf_ecx;
 	mcp->mc_eip = tp->tf_eip;
 	mcp->mc_cs = tp->tf_cs;
 	mcp->mc_esp = tp->tf_esp;
 	mcp->mc_ss = tp->tf_ss;
 	mcp->mc_len = sizeof(*mcp);
 	get_fpcontext(td, mcp);
 	sdp = &td->td_pcb->pcb_fsd;
 	mcp->mc_fsbase = sdp->sd_hibase << 24 | sdp->sd_lobase;
 	sdp = &td->td_pcb->pcb_gsd;
 	mcp->mc_gsbase = sdp->sd_hibase << 24 | sdp->sd_lobase;
 	mcp->mc_flags = 0;
 	bzero(mcp->mc_spare2, sizeof(mcp->mc_spare2));
 	return (0);
 }
 
 /*
  * Set machine context.
  *
  * However, we don't set any but the user modifiable flags, and we won't
  * touch the cs selector.
  */
 int
 set_mcontext(struct thread *td, const mcontext_t *mcp)
 {
 	struct trapframe *tp;
 	int eflags, ret;
 
 	tp = td->td_frame;
 	if (mcp->mc_len != sizeof(*mcp))
 		return (EINVAL);
 	eflags = (mcp->mc_eflags & PSL_USERCHANGE) |
 	    (tp->tf_eflags & ~PSL_USERCHANGE);
 	if ((ret = set_fpcontext(td, mcp)) == 0) {
 		tp->tf_fs = mcp->mc_fs;
 		tp->tf_es = mcp->mc_es;
 		tp->tf_ds = mcp->mc_ds;
 		tp->tf_edi = mcp->mc_edi;
 		tp->tf_esi = mcp->mc_esi;
 		tp->tf_ebp = mcp->mc_ebp;
 		tp->tf_ebx = mcp->mc_ebx;
 		tp->tf_edx = mcp->mc_edx;
 		tp->tf_ecx = mcp->mc_ecx;
 		tp->tf_eax = mcp->mc_eax;
 		tp->tf_eip = mcp->mc_eip;
 		tp->tf_eflags = eflags;
 		tp->tf_esp = mcp->mc_esp;
 		tp->tf_ss = mcp->mc_ss;
 		td->td_pcb->pcb_gs = mcp->mc_gs;
 		ret = 0;
 	}
 	return (ret);
 }
 
 static void
 get_fpcontext(struct thread *td, mcontext_t *mcp)
 {
 
 #ifndef DEV_NPX
 	mcp->mc_fpformat = _MC_FPFMT_NODEV;
 	mcp->mc_ownedfp = _MC_FPOWNED_NONE;
 	bzero(mcp->mc_fpstate, sizeof(mcp->mc_fpstate));
 #else
 	mcp->mc_ownedfp = npxgetregs(td);
 	bcopy(&td->td_pcb->pcb_user_save, &mcp->mc_fpstate[0],
 	    sizeof(mcp->mc_fpstate));
 	mcp->mc_fpformat = npxformat();
 #endif
 }
 
 static int
 set_fpcontext(struct thread *td, const mcontext_t *mcp)
 {
 
 	if (mcp->mc_fpformat == _MC_FPFMT_NODEV)
 		return (0);
 	else if (mcp->mc_fpformat != _MC_FPFMT_387 &&
 	    mcp->mc_fpformat != _MC_FPFMT_XMM)
 		return (EINVAL);
 	else if (mcp->mc_ownedfp == _MC_FPOWNED_NONE)
 		/* We don't care what state is left in the FPU or PCB. */
 		fpstate_drop(td);
 	else if (mcp->mc_ownedfp == _MC_FPOWNED_FPU ||
 	    mcp->mc_ownedfp == _MC_FPOWNED_PCB) {
 #ifdef DEV_NPX
 #ifdef CPU_ENABLE_SSE
 		if (cpu_fxsr)
 			((union savefpu *)&mcp->mc_fpstate)->sv_xmm.sv_env.
 			    en_mxcsr &= cpu_mxcsr_mask;
 #endif
 		npxsetregs(td, (union savefpu *)&mcp->mc_fpstate);
 #endif
 	} else
 		return (EINVAL);
 	return (0);
 }
 
 static void
 fpstate_drop(struct thread *td)
 {
 
 	KASSERT(PCB_USER_FPU(td->td_pcb), ("fpstate_drop: kernel-owned fpu"));
 	critical_enter();
 #ifdef DEV_NPX
 	if (PCPU_GET(fpcurthread) == td)
 		npxdrop();
 #endif
 	/*
 	 * XXX force a full drop of the npx.  The above only drops it if we
 	 * owned it.  npxgetregs() has the same bug in the !cpu_fxsr case.
 	 *
 	 * XXX I don't much like npxgetregs()'s semantics of doing a full
 	 * drop.  Dropping only to the pcb matches fnsave's behaviour.
 	 * We only need to drop to !PCB_INITDONE in sendsig().  But
 	 * sendsig() is the only caller of npxgetregs()... perhaps we just
 	 * have too many layers.
 	 */
 	curthread->td_pcb->pcb_flags &= ~(PCB_NPXINITDONE |
 	    PCB_NPXUSERINITDONE);
 	critical_exit();
 }
 
 int
 fill_dbregs(struct thread *td, struct dbreg *dbregs)
 {
 	struct pcb *pcb;
 
 	if (td == NULL) {
 		dbregs->dr[0] = rdr0();
 		dbregs->dr[1] = rdr1();
 		dbregs->dr[2] = rdr2();
 		dbregs->dr[3] = rdr3();
 		dbregs->dr[4] = rdr4();
 		dbregs->dr[5] = rdr5();
 		dbregs->dr[6] = rdr6();
 		dbregs->dr[7] = rdr7();
 	} else {
 		pcb = td->td_pcb;
 		dbregs->dr[0] = pcb->pcb_dr0;
 		dbregs->dr[1] = pcb->pcb_dr1;
 		dbregs->dr[2] = pcb->pcb_dr2;
 		dbregs->dr[3] = pcb->pcb_dr3;
 		dbregs->dr[4] = 0;
 		dbregs->dr[5] = 0;
 		dbregs->dr[6] = pcb->pcb_dr6;
 		dbregs->dr[7] = pcb->pcb_dr7;
 	}
 	return (0);
 }
 
 int
 set_dbregs(struct thread *td, struct dbreg *dbregs)
 {
 	struct pcb *pcb;
 	int i;
 
 	if (td == NULL) {
 		load_dr0(dbregs->dr[0]);
 		load_dr1(dbregs->dr[1]);
 		load_dr2(dbregs->dr[2]);
 		load_dr3(dbregs->dr[3]);
 		load_dr4(dbregs->dr[4]);
 		load_dr5(dbregs->dr[5]);
 		load_dr6(dbregs->dr[6]);
 		load_dr7(dbregs->dr[7]);
 	} else {
 		/*
 		 * Don't let an illegal value for dr7 get set.	Specifically,
 		 * check for undefined settings.  Setting these bit patterns
 		 * result in undefined behaviour and can lead to an unexpected
 		 * TRCTRAP.
 		 */
 		for (i = 0; i < 4; i++) {
 			if (DBREG_DR7_ACCESS(dbregs->dr[7], i) == 0x02)
 				return (EINVAL);
 			if (DBREG_DR7_LEN(dbregs->dr[7], i) == 0x02)
 				return (EINVAL);
 		}
 		
 		pcb = td->td_pcb;
 		
 		/*
 		 * Don't let a process set a breakpoint that is not within the
 		 * process's address space.  If a process could do this, it
 		 * could halt the system by setting a breakpoint in the kernel
 		 * (if ddb was enabled).  Thus, we need to check to make sure
 		 * that no breakpoints are being enabled for addresses outside
 		 * process's address space.
 		 *
 		 * XXX - what about when the watched area of the user's
 		 * address space is written into from within the kernel
 		 * ... wouldn't that still cause a breakpoint to be generated
 		 * from within kernel mode?
 		 */
 
 		if (DBREG_DR7_ENABLED(dbregs->dr[7], 0)) {
 			/* dr0 is enabled */
 			if (dbregs->dr[0] >= VM_MAXUSER_ADDRESS)
 				return (EINVAL);
 		}
 			
 		if (DBREG_DR7_ENABLED(dbregs->dr[7], 1)) {
 			/* dr1 is enabled */
 			if (dbregs->dr[1] >= VM_MAXUSER_ADDRESS)
 				return (EINVAL);
 		}
 			
 		if (DBREG_DR7_ENABLED(dbregs->dr[7], 2)) {
 			/* dr2 is enabled */
 			if (dbregs->dr[2] >= VM_MAXUSER_ADDRESS)
 				return (EINVAL);
 		}
 			
 		if (DBREG_DR7_ENABLED(dbregs->dr[7], 3)) {
 			/* dr3 is enabled */
 			if (dbregs->dr[3] >= VM_MAXUSER_ADDRESS)
 				return (EINVAL);
 		}
 
 		pcb->pcb_dr0 = dbregs->dr[0];
 		pcb->pcb_dr1 = dbregs->dr[1];
 		pcb->pcb_dr2 = dbregs->dr[2];
 		pcb->pcb_dr3 = dbregs->dr[3];
 		pcb->pcb_dr6 = dbregs->dr[6];
 		pcb->pcb_dr7 = dbregs->dr[7];
 
 		pcb->pcb_flags |= PCB_DBREGS;
 	}
 
 	return (0);
 }
 
 /*
  * Return > 0 if a hardware breakpoint has been hit, and the
  * breakpoint was in user space.  Return 0, otherwise.
  */
 int
 user_dbreg_trap(void)
 {
         u_int32_t dr7, dr6; /* debug registers dr6 and dr7 */
         u_int32_t bp;       /* breakpoint bits extracted from dr6 */
         int nbp;            /* number of breakpoints that triggered */
         caddr_t addr[4];    /* breakpoint addresses */
         int i;
         
         dr7 = rdr7();
         if ((dr7 & 0x000000ff) == 0) {
                 /*
                  * all GE and LE bits in the dr7 register are zero,
                  * thus the trap couldn't have been caused by the
                  * hardware debug registers
                  */
                 return 0;
         }
 
         nbp = 0;
         dr6 = rdr6();
         bp = dr6 & 0x0000000f;
 
         if (!bp) {
                 /*
                  * None of the breakpoint bits are set meaning this
                  * trap was not caused by any of the debug registers
                  */
                 return 0;
         }
 
         /*
          * at least one of the breakpoints were hit, check to see
          * which ones and if any of them are user space addresses
          */
 
         if (bp & 0x01) {
                 addr[nbp++] = (caddr_t)rdr0();
         }
         if (bp & 0x02) {
                 addr[nbp++] = (caddr_t)rdr1();
         }
         if (bp & 0x04) {
                 addr[nbp++] = (caddr_t)rdr2();
         }
         if (bp & 0x08) {
                 addr[nbp++] = (caddr_t)rdr3();
         }
 
         for (i = 0; i < nbp; i++) {
                 if (addr[i] < (caddr_t)VM_MAXUSER_ADDRESS) {
                         /*
                          * addr[i] is in user space
                          */
                         return nbp;
                 }
         }
 
         /*
          * None of the breakpoints are in user space.
          */
         return 0;
 }
 
 #ifdef KDB
 
 /*
  * Provide inb() and outb() as functions.  They are normally only available as
  * inline functions, thus cannot be called from the debugger.
  */
 
 /* silence compiler warnings */
 u_char inb_(u_short);
 void outb_(u_short, u_char);
 
 u_char
 inb_(u_short port)
 {
 	return inb(port);
 }
 
 void
 outb_(u_short port, u_char data)
 {
 	outb(port, data);
 }
 
 #endif /* KDB */
diff --git a/sys/i386/i386/mp_machdep.c b/sys/i386/i386/mp_machdep.c
index 19e67cf48815..479168bbbea9 100644
--- a/sys/i386/i386/mp_machdep.c
+++ b/sys/i386/i386/mp_machdep.c
@@ -1,1587 +1,1596 @@
 /*-
  * Copyright (c) 1996, by Steve Passe
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. The name of the developer may NOT be used to endorse or promote products
  *    derived from this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_apic.h"
 #include "opt_cpu.h"
 #include "opt_kstack_pages.h"
 #include "opt_pmap.h"
 #include "opt_sched.h"
 #include "opt_smp.h"
 
 #if !defined(lint)
 #if !defined(SMP)
 #error How did you get here?
 #endif
 
 #ifndef DEV_APIC
 #error The apic device is required for SMP, add "device apic" to your config file.
 #endif
 #if defined(CPU_DISABLE_CMPXCHG) && !defined(COMPILING_LINT)
 #error SMP not supported with CPU_DISABLE_CMPXCHG
 #endif
 #endif /* not lint */
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/bus.h>
 #include <sys/cons.h>	/* cngetc() */
 #include <sys/cpuset.h>
 #ifdef GPROF 
 #include <sys/gmon.h>
 #endif
 #include <sys/kernel.h>
 #include <sys/ktr.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/memrange.h>
 #include <sys/mutex.h>
 #include <sys/pcpu.h>
 #include <sys/proc.h>
 #include <sys/sched.h>
 #include <sys/smp.h>
 #include <sys/sysctl.h>
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/pmap.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_extern.h>
 
 #include <x86/apicreg.h>
 #include <machine/clock.h>
 #include <machine/cputypes.h>
 #include <x86/mca.h>
 #include <machine/md_var.h>
 #include <machine/pcb.h>
 #include <machine/psl.h>
 #include <machine/smp.h>
 #include <machine/specialreg.h>
 
+#ifdef XENHVM
+#include <xen/hvm.h>
+#endif
+
 #define WARMBOOT_TARGET		0
 #define WARMBOOT_OFF		(KERNBASE + 0x0467)
 #define WARMBOOT_SEG		(KERNBASE + 0x0469)
 
 #define CMOS_REG		(0x70)
 #define CMOS_DATA		(0x71)
 #define BIOS_RESET		(0x0f)
 #define BIOS_WARM		(0x0a)
 
 /*
  * this code MUST be enabled here and in mpboot.s.
  * it follows the very early stages of AP boot by placing values in CMOS ram.
  * it NORMALLY will never be needed and thus the primitive method for enabling.
  *
 #define CHECK_POINTS
  */
 
 #if defined(CHECK_POINTS) && !defined(PC98)
 #define CHECK_READ(A)	 (outb(CMOS_REG, (A)), inb(CMOS_DATA))
 #define CHECK_WRITE(A,D) (outb(CMOS_REG, (A)), outb(CMOS_DATA, (D)))
 
 #define CHECK_INIT(D);				\
 	CHECK_WRITE(0x34, (D));			\
 	CHECK_WRITE(0x35, (D));			\
 	CHECK_WRITE(0x36, (D));			\
 	CHECK_WRITE(0x37, (D));			\
 	CHECK_WRITE(0x38, (D));			\
 	CHECK_WRITE(0x39, (D));
 
 #define CHECK_PRINT(S);				\
 	printf("%s: %d, %d, %d, %d, %d, %d\n",	\
 	   (S),					\
 	   CHECK_READ(0x34),			\
 	   CHECK_READ(0x35),			\
 	   CHECK_READ(0x36),			\
 	   CHECK_READ(0x37),			\
 	   CHECK_READ(0x38),			\
 	   CHECK_READ(0x39));
 
 #else				/* CHECK_POINTS */
 
 #define CHECK_INIT(D)
 #define CHECK_PRINT(S)
 #define CHECK_WRITE(A, D)
 
 #endif				/* CHECK_POINTS */
 
 /* lock region used by kernel profiling */
 int	mcount_lock;
 
 int	mp_naps;		/* # of Applications processors */
 int	boot_cpu_id = -1;	/* designated BSP */
 
 extern	struct pcpu __pcpu[];
 
 /* AP uses this during bootstrap.  Do not staticize.  */
 char *bootSTK;
 static int bootAP;
 
 /* Free these after use */
 void *bootstacks[MAXCPU];
 static void *dpcpu;
 
 struct pcb stoppcbs[MAXCPU];
 struct pcb **susppcbs = NULL;
 
 /* Variables needed for SMP tlb shootdown. */
 vm_offset_t smp_tlb_addr1;
 vm_offset_t smp_tlb_addr2;
 volatile int smp_tlb_wait;
 
 #ifdef COUNT_IPIS
 /* Interrupt counts. */
 static u_long *ipi_preempt_counts[MAXCPU];
 static u_long *ipi_ast_counts[MAXCPU];
 u_long *ipi_invltlb_counts[MAXCPU];
 u_long *ipi_invlrng_counts[MAXCPU];
 u_long *ipi_invlpg_counts[MAXCPU];
 u_long *ipi_invlcache_counts[MAXCPU];
 u_long *ipi_rendezvous_counts[MAXCPU];
 u_long *ipi_lazypmap_counts[MAXCPU];
 static u_long *ipi_hardclock_counts[MAXCPU];
 #endif
 
 /*
  * Local data and functions.
  */
 
 static volatile cpuset_t ipi_nmi_pending;
 
 /* used to hold the AP's until we are ready to release them */
 static struct mtx ap_boot_mtx;
 
 /* Set to 1 once we're ready to let the APs out of the pen. */
 static volatile int aps_ready = 0;
 
 /*
  * Store data from cpu_add() until later in the boot when we actually setup
  * the APs.
  */
 struct cpu_info {
 	int	cpu_present:1;
 	int	cpu_bsp:1;
 	int	cpu_disabled:1;
 	int	cpu_hyperthread:1;
 } static cpu_info[MAX_APIC_ID + 1];
 int cpu_apic_ids[MAXCPU];
 int apic_cpuids[MAX_APIC_ID + 1];
 
 /* Holds pending bitmap based IPIs per CPU */
 static volatile u_int cpu_ipi_pending[MAXCPU];
 
 static u_int boot_address;
 static int cpu_logical;			/* logical cpus per core */
 static int cpu_cores;			/* cores per package */
 
 static void	assign_cpu_ids(void);
 static void	install_ap_tramp(void);
 static void	set_interrupt_apic_ids(void);
 static int	start_all_aps(void);
 static int	start_ap(int apic_id);
 static void	release_aps(void *dummy);
 
 static u_int	hyperthreading_cpus;	/* logical cpus sharing L1 cache */
 static int	hyperthreading_allowed = 1;
 
 static void
 mem_range_AP_init(void)
 {
 	if (mem_range_softc.mr_op && mem_range_softc.mr_op->initAP)
 		mem_range_softc.mr_op->initAP(&mem_range_softc);
 }
 
 static void
 topo_probe_amd(void)
 {
 	int core_id_bits;
 	int id;
 
 	/* AMD processors do not support HTT. */
 	cpu_logical = 1;
 
 	if ((amd_feature2 & AMDID2_CMP) == 0) {
 		cpu_cores = 1;
 		return;
 	}
 
 	core_id_bits = (cpu_procinfo2 & AMDID_COREID_SIZE) >>
 	    AMDID_COREID_SIZE_SHIFT;
 	if (core_id_bits == 0) {
 		cpu_cores = (cpu_procinfo2 & AMDID_CMP_CORES) + 1;
 		return;
 	}
 
 	/* Fam 10h and newer should get here. */
 	for (id = 0; id <= MAX_APIC_ID; id++) {
 		/* Check logical CPU availability. */
 		if (!cpu_info[id].cpu_present || cpu_info[id].cpu_disabled)
 			continue;
 		/* Check if logical CPU has the same package ID. */
 		if ((id >> core_id_bits) != (boot_cpu_id >> core_id_bits))
 			continue;
 		cpu_cores++;
 	}
 }
 
 /*
  * Round up to the next power of two, if necessary, and then
  * take log2.
  * Returns -1 if argument is zero.
  */
 static __inline int
 mask_width(u_int x)
 {
 
 	return (fls(x << (1 - powerof2(x))) - 1);
 }
 
 static void
 topo_probe_0x4(void)
 {
 	u_int p[4];
 	int pkg_id_bits;
 	int core_id_bits;
 	int max_cores;
 	int max_logical;
 	int id;
 
 	/* Both zero and one here mean one logical processor per package. */
 	max_logical = (cpu_feature & CPUID_HTT) != 0 ?
 	    (cpu_procinfo & CPUID_HTT_CORES) >> 16 : 1;
 	if (max_logical <= 1)
 		return;
 
 	/*
 	 * Because of uniformity assumption we examine only
 	 * those logical processors that belong to the same
 	 * package as BSP.  Further, we count number of
 	 * logical processors that belong to the same core
 	 * as BSP thus deducing number of threads per core.
 	 */
 	if (cpu_high >= 0x4) {
 		cpuid_count(0x04, 0, p);
 		max_cores = ((p[0] >> 26) & 0x3f) + 1;
 	} else
 		max_cores = 1;
 	core_id_bits = mask_width(max_logical/max_cores);
 	if (core_id_bits < 0)
 		return;
 	pkg_id_bits = core_id_bits + mask_width(max_cores);
 
 	for (id = 0; id <= MAX_APIC_ID; id++) {
 		/* Check logical CPU availability. */
 		if (!cpu_info[id].cpu_present || cpu_info[id].cpu_disabled)
 			continue;
 		/* Check if logical CPU has the same package ID. */
 		if ((id >> pkg_id_bits) != (boot_cpu_id >> pkg_id_bits))
 			continue;
 		cpu_cores++;
 		/* Check if logical CPU has the same package and core IDs. */
 		if ((id >> core_id_bits) == (boot_cpu_id >> core_id_bits))
 			cpu_logical++;
 	}
 
 	KASSERT(cpu_cores >= 1 && cpu_logical >= 1,
 	    ("topo_probe_0x4 couldn't find BSP"));
 
 	cpu_cores /= cpu_logical;
 	hyperthreading_cpus = cpu_logical;
 }
 
 static void
 topo_probe_0xb(void)
 {
 	u_int p[4];
 	int bits;
 	int cnt;
 	int i;
 	int logical;
 	int type;
 	int x;
 
 	/* We only support three levels for now. */
 	for (i = 0; i < 3; i++) {
 		cpuid_count(0x0b, i, p);
 
 		/* Fall back if CPU leaf 11 doesn't really exist. */
 		if (i == 0 && p[1] == 0) {
 			topo_probe_0x4();
 			return;
 		}
 
 		bits = p[0] & 0x1f;
 		logical = p[1] &= 0xffff;
 		type = (p[2] >> 8) & 0xff;
 		if (type == 0 || logical == 0)
 			break;
 		/*
 		 * Because of uniformity assumption we examine only
 		 * those logical processors that belong to the same
 		 * package as BSP.
 		 */
 		for (cnt = 0, x = 0; x <= MAX_APIC_ID; x++) {
 			if (!cpu_info[x].cpu_present ||
 			    cpu_info[x].cpu_disabled)
 				continue;
 			if (x >> bits == boot_cpu_id >> bits)
 				cnt++;
 		}
 		if (type == CPUID_TYPE_SMT)
 			cpu_logical = cnt;
 		else if (type == CPUID_TYPE_CORE)
 			cpu_cores = cnt;
 	}
 	if (cpu_logical == 0)
 		cpu_logical = 1;
 	cpu_cores /= cpu_logical;
 }
 
 /*
  * Both topology discovery code and code that consumes topology
  * information assume top-down uniformity of the topology.
  * That is, all physical packages must be identical and each
  * core in a package must have the same number of threads.
  * Topology information is queried only on BSP, on which this
  * code runs and for which it can query CPUID information.
  * Then topology is extrapolated on all packages using the
  * uniformity assumption.
  */
 static void
 topo_probe(void)
 {
 	static int cpu_topo_probed = 0;
 
 	if (cpu_topo_probed)
 		return;
 
 	CPU_ZERO(&logical_cpus_mask);
 	if (mp_ncpus <= 1)
 		cpu_cores = cpu_logical = 1;
 	else if (cpu_vendor_id == CPU_VENDOR_AMD)
 		topo_probe_amd();
 	else if (cpu_vendor_id == CPU_VENDOR_INTEL) {
 		/*
 		 * See Intel(R) 64 Architecture Processor
 		 * Topology Enumeration article for details.
 		 *
 		 * Note that 0x1 <= cpu_high < 4 case should be
 		 * compatible with topo_probe_0x4() logic when
 		 * CPUID.1:EBX[23:16] > 0 (cpu_cores will be 1)
 		 * or it should trigger the fallback otherwise.
 		 */
 		if (cpu_high >= 0xb)
 			topo_probe_0xb();
 		else if (cpu_high >= 0x1)
 			topo_probe_0x4();
 	}
 
 	/*
 	 * Fallback: assume each logical CPU is in separate
 	 * physical package.  That is, no multi-core, no SMT.
 	 */
 	if (cpu_cores == 0 || cpu_logical == 0)
 		cpu_cores = cpu_logical = 1;
 	cpu_topo_probed = 1;
 }
 
 struct cpu_group *
 cpu_topo(void)
 {
 	int cg_flags;
 
 	/*
 	 * Determine whether any threading flags are
 	 * necessry.
 	 */
 	topo_probe();
 	if (cpu_logical > 1 && hyperthreading_cpus)
 		cg_flags = CG_FLAG_HTT;
 	else if (cpu_logical > 1)
 		cg_flags = CG_FLAG_SMT;
 	else
 		cg_flags = 0;
 	if (mp_ncpus % (cpu_cores * cpu_logical) != 0) {
 		printf("WARNING: Non-uniform processors.\n");
 		printf("WARNING: Using suboptimal topology.\n");
 		return (smp_topo_none());
 	}
 	/*
 	 * No multi-core or hyper-threaded.
 	 */
 	if (cpu_logical * cpu_cores == 1)
 		return (smp_topo_none());
 	/*
 	 * Only HTT no multi-core.
 	 */
 	if (cpu_logical > 1 && cpu_cores == 1)
 		return (smp_topo_1level(CG_SHARE_L1, cpu_logical, cg_flags));
 	/*
 	 * Only multi-core no HTT.
 	 */
 	if (cpu_cores > 1 && cpu_logical == 1)
 		return (smp_topo_1level(CG_SHARE_L2, cpu_cores, cg_flags));
 	/*
 	 * Both HTT and multi-core.
 	 */
 	return (smp_topo_2level(CG_SHARE_L2, cpu_cores,
 	    CG_SHARE_L1, cpu_logical, cg_flags));
 }
 
 
 /*
  * Calculate usable address in base memory for AP trampoline code.
  */
 u_int
 mp_bootaddress(u_int basemem)
 {
 
 	boot_address = trunc_page(basemem);	/* round down to 4k boundary */
 	if ((basemem - boot_address) < bootMP_size)
 		boot_address -= PAGE_SIZE;	/* not enough, lower by 4k */
 
 	return boot_address;
 }
 
 void
 cpu_add(u_int apic_id, char boot_cpu)
 {
 
 	if (apic_id > MAX_APIC_ID) {
 		panic("SMP: APIC ID %d too high", apic_id);
 		return;
 	}
 	KASSERT(cpu_info[apic_id].cpu_present == 0, ("CPU %d added twice",
 	    apic_id));
 	cpu_info[apic_id].cpu_present = 1;
 	if (boot_cpu) {
 		KASSERT(boot_cpu_id == -1,
 		    ("CPU %d claims to be BSP, but CPU %d already is", apic_id,
 		    boot_cpu_id));
 		boot_cpu_id = apic_id;
 		cpu_info[apic_id].cpu_bsp = 1;
 	}
 	if (mp_ncpus < MAXCPU) {
 		mp_ncpus++;
 		mp_maxid = mp_ncpus - 1;
 	}
 	if (bootverbose)
 		printf("SMP: Added CPU %d (%s)\n", apic_id, boot_cpu ? "BSP" :
 		    "AP");
 }
 
 void
 cpu_mp_setmaxid(void)
 {
 
 	/*
 	 * mp_maxid should be already set by calls to cpu_add().
 	 * Just sanity check its value here.
 	 */
 	if (mp_ncpus == 0)
 		KASSERT(mp_maxid == 0,
 		    ("%s: mp_ncpus is zero, but mp_maxid is not", __func__));
 	else if (mp_ncpus == 1)
 		mp_maxid = 0;
 	else
 		KASSERT(mp_maxid >= mp_ncpus - 1,
 		    ("%s: counters out of sync: max %d, count %d", __func__,
 			mp_maxid, mp_ncpus));
 }
 
 int
 cpu_mp_probe(void)
 {
 
 	/*
 	 * Always record BSP in CPU map so that the mbuf init code works
 	 * correctly.
 	 */
 	CPU_SETOF(0, &all_cpus);
 	if (mp_ncpus == 0) {
 		/*
 		 * No CPUs were found, so this must be a UP system.  Setup
 		 * the variables to represent a system with a single CPU
 		 * with an id of 0.
 		 */
 		mp_ncpus = 1;
 		return (0);
 	}
 
 	/* At least one CPU was found. */
 	if (mp_ncpus == 1) {
 		/*
 		 * One CPU was found, so this must be a UP system with
 		 * an I/O APIC.
 		 */
 		mp_maxid = 0;
 		return (0);
 	}
 
 	/* At least two CPUs were found. */
 	return (1);
 }
 
 /*
  * Initialize the IPI handlers and start up the AP's.
  */
 void
 cpu_mp_start(void)
 {
 	int i;
 
 	/* Initialize the logical ID to APIC ID table. */
 	for (i = 0; i < MAXCPU; i++) {
 		cpu_apic_ids[i] = -1;
 		cpu_ipi_pending[i] = 0;
 	}
 
 	/* Install an inter-CPU IPI for TLB invalidation */
 	setidt(IPI_INVLTLB, IDTVEC(invltlb),
 	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
 	setidt(IPI_INVLPG, IDTVEC(invlpg),
 	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
 	setidt(IPI_INVLRNG, IDTVEC(invlrng),
 	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
 
 	/* Install an inter-CPU IPI for cache invalidation. */
 	setidt(IPI_INVLCACHE, IDTVEC(invlcache),
 	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
 
 	/* Install an inter-CPU IPI for lazy pmap release */
 	setidt(IPI_LAZYPMAP, IDTVEC(lazypmap),
 	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
 
 	/* Install an inter-CPU IPI for all-CPU rendezvous */
 	setidt(IPI_RENDEZVOUS, IDTVEC(rendezvous),
 	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
 
 	/* Install generic inter-CPU IPI handler */
 	setidt(IPI_BITMAP_VECTOR, IDTVEC(ipi_intr_bitmap_handler),
 	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
 
 	/* Install an inter-CPU IPI for CPU stop/restart */
 	setidt(IPI_STOP, IDTVEC(cpustop),
 	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
 
 	/* Install an inter-CPU IPI for CPU suspend/resume */
 	setidt(IPI_SUSPEND, IDTVEC(cpususpend),
 	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
 
 	/* Set boot_cpu_id if needed. */
 	if (boot_cpu_id == -1) {
 		boot_cpu_id = PCPU_GET(apic_id);
 		cpu_info[boot_cpu_id].cpu_bsp = 1;
 	} else
 		KASSERT(boot_cpu_id == PCPU_GET(apic_id),
 		    ("BSP's APIC ID doesn't match boot_cpu_id"));
 
 	/* Probe logical/physical core configuration. */
 	topo_probe();
 
 	assign_cpu_ids();
 
 	/* Start each Application Processor */
 	start_all_aps();
 
 	set_interrupt_apic_ids();
 }
 
 
 /*
  * Print various information about the SMP system hardware and setup.
  */
 void
 cpu_mp_announce(void)
 {
 	const char *hyperthread;
 	int i;
 
 	printf("FreeBSD/SMP: %d package(s) x %d core(s)",
 	    mp_ncpus / (cpu_cores * cpu_logical), cpu_cores);
 	if (hyperthreading_cpus > 1)
 	    printf(" x %d HTT threads", cpu_logical);
 	else if (cpu_logical > 1)
 	    printf(" x %d SMT threads", cpu_logical);
 	printf("\n");
 
 	/* List active CPUs first. */
 	printf(" cpu0 (BSP): APIC ID: %2d\n", boot_cpu_id);
 	for (i = 1; i < mp_ncpus; i++) {
 		if (cpu_info[cpu_apic_ids[i]].cpu_hyperthread)
 			hyperthread = "/HT";
 		else
 			hyperthread = "";
 		printf(" cpu%d (AP%s): APIC ID: %2d\n", i, hyperthread,
 		    cpu_apic_ids[i]);
 	}
 
 	/* List disabled CPUs last. */
 	for (i = 0; i <= MAX_APIC_ID; i++) {
 		if (!cpu_info[i].cpu_present || !cpu_info[i].cpu_disabled)
 			continue;
 		if (cpu_info[i].cpu_hyperthread)
 			hyperthread = "/HT";
 		else
 			hyperthread = "";
 		printf("  cpu (AP%s): APIC ID: %2d (disabled)\n", hyperthread,
 		    i);
 	}
 }
 
 /*
  * AP CPU's call this to initialize themselves.
  */
 void
 init_secondary(void)
 {
 	struct pcpu *pc;
 	vm_offset_t addr;
 	int	gsel_tss;
 	int	x, myid;
 	u_int	cpuid, cr0;
 
 	/* bootAP is set in start_ap() to our ID. */
 	myid = bootAP;
 
 	/* Get per-cpu data */
 	pc = &__pcpu[myid];
 
 	/* prime data page for it to use */
 	pcpu_init(pc, myid, sizeof(struct pcpu));
 	dpcpu_init(dpcpu, myid);
 	pc->pc_apic_id = cpu_apic_ids[myid];
 	pc->pc_prvspace = pc;
 	pc->pc_curthread = 0;
 
 	gdt_segs[GPRIV_SEL].ssd_base = (int) pc;
 	gdt_segs[GPROC0_SEL].ssd_base = (int) &pc->pc_common_tss;
 
 	for (x = 0; x < NGDT; x++) {
 		ssdtosd(&gdt_segs[x], &gdt[myid * NGDT + x].sd);
 	}
 
 	r_gdt.rd_limit = NGDT * sizeof(gdt[0]) - 1;
 	r_gdt.rd_base = (int) &gdt[myid * NGDT];
 	lgdt(&r_gdt);			/* does magic intra-segment return */
 
 	lidt(&r_idt);
 
 	lldt(_default_ldt);
 	PCPU_SET(currentldt, _default_ldt);
 
 	gsel_tss = GSEL(GPROC0_SEL, SEL_KPL);
 	gdt[myid * NGDT + GPROC0_SEL].sd.sd_type = SDT_SYS386TSS;
 	PCPU_SET(common_tss.tss_esp0, 0); /* not used until after switch */
 	PCPU_SET(common_tss.tss_ss0, GSEL(GDATA_SEL, SEL_KPL));
 	PCPU_SET(common_tss.tss_ioopt, (sizeof (struct i386tss)) << 16);
 	PCPU_SET(tss_gdt, &gdt[myid * NGDT + GPROC0_SEL].sd);
 	PCPU_SET(common_tssd, *PCPU_GET(tss_gdt));
 	ltr(gsel_tss);
 
 	PCPU_SET(fsgs_gdt, &gdt[myid * NGDT + GUFS_SEL].sd);
 
 	/*
 	 * Set to a known state:
 	 * Set by mpboot.s: CR0_PG, CR0_PE
 	 * Set by cpu_setregs: CR0_NE, CR0_MP, CR0_TS, CR0_WP, CR0_AM
 	 */
 	cr0 = rcr0();
 	cr0 &= ~(CR0_CD | CR0_NW | CR0_EM);
 	load_cr0(cr0);
 	CHECK_WRITE(0x38, 5);
 	
 	/* Disable local APIC just to be sure. */
 	lapic_disable();
 
 	/* signal our startup to the BSP. */
 	mp_naps++;
 	CHECK_WRITE(0x39, 6);
 
 	/* Spin until the BSP releases the AP's. */
 	while (!aps_ready)
 		ia32_pause();
 
 	/* BSP may have changed PTD while we were waiting */
 	invltlb();
 	for (addr = 0; addr < NKPT * NBPDR - 1; addr += PAGE_SIZE)
 		invlpg(addr);
 
 #if defined(I586_CPU) && !defined(NO_F00F_HACK)
 	lidt(&r_idt);
 #endif
 
 	/* Initialize the PAT MSR if present. */
 	pmap_init_pat();
 
 	/* set up CPU registers and state */
 	cpu_setregs();
 
 	/* set up FPU state on the AP */
 	npxinit();
 
 	/* set up SSE registers */
 	enable_sse();
 
+#ifdef XENHVM
+	/* register vcpu_info area */
+	xen_hvm_init_cpu();
+#endif
+
 #ifdef PAE
 	/* Enable the PTE no-execute bit. */
 	if ((amd_feature & AMDID_NX) != 0) {
 		uint64_t msr;
 
 		msr = rdmsr(MSR_EFER) | EFER_NXE;
 		wrmsr(MSR_EFER, msr);
 	}
 #endif
 
 	/* A quick check from sanity claus */
 	cpuid = PCPU_GET(cpuid);
 	if (PCPU_GET(apic_id) != lapic_id()) {
 		printf("SMP: cpuid = %d\n", cpuid);
 		printf("SMP: actual apic_id = %d\n", lapic_id());
 		printf("SMP: correct apic_id = %d\n", PCPU_GET(apic_id));
 		panic("cpuid mismatch! boom!!");
 	}
 
 	/* Initialize curthread. */
 	KASSERT(PCPU_GET(idlethread) != NULL, ("no idle thread"));
 	PCPU_SET(curthread, PCPU_GET(idlethread));
 
 	mca_init();
 
 	mtx_lock_spin(&ap_boot_mtx);
 
 	/* Init local apic for irq's */
 	lapic_setup(1);
 
 	/* Set memory range attributes for this CPU to match the BSP */
 	mem_range_AP_init();
 
 	smp_cpus++;
 
 	CTR1(KTR_SMP, "SMP: AP CPU #%d Launched", cpuid);
 	printf("SMP: AP CPU #%d Launched!\n", cpuid);
 
 	/* Determine if we are a logical CPU. */
 	/* XXX Calculation depends on cpu_logical being a power of 2, e.g. 2 */
 	if (cpu_logical > 1 && PCPU_GET(apic_id) % cpu_logical != 0)
 		CPU_SET(cpuid, &logical_cpus_mask);
 
 	if (bootverbose)
 		lapic_dump("AP");
 
 	if (smp_cpus == mp_ncpus) {
 		/* enable IPI's, tlb shootdown, freezes etc */
 		atomic_store_rel_int(&smp_started, 1);
 		smp_active = 1;	 /* historic */
 	}
 
 	mtx_unlock_spin(&ap_boot_mtx);
 
 	/* Wait until all the AP's are up. */
 	while (smp_started == 0)
 		ia32_pause();
 
 	/* Start per-CPU event timers. */
 	cpu_initclocks_ap();
 
 	/* Enter the scheduler. */
 	sched_throw(NULL);
 
 	panic("scheduler returned us to %s", __func__);
 	/* NOTREACHED */
 }
 
 /*******************************************************************
  * local functions and data
  */
 
 /*
  * We tell the I/O APIC code about all the CPUs we want to receive
  * interrupts.  If we don't want certain CPUs to receive IRQs we
  * can simply not tell the I/O APIC code about them in this function.
  * We also do not tell it about the BSP since it tells itself about
  * the BSP internally to work with UP kernels and on UP machines.
  */
 static void
 set_interrupt_apic_ids(void)
 {
 	u_int i, apic_id;
 
 	for (i = 0; i < MAXCPU; i++) {
 		apic_id = cpu_apic_ids[i];
 		if (apic_id == -1)
 			continue;
 		if (cpu_info[apic_id].cpu_bsp)
 			continue;
 		if (cpu_info[apic_id].cpu_disabled)
 			continue;
 
 		/* Don't let hyperthreads service interrupts. */
 		if (hyperthreading_cpus > 1 &&
 		    apic_id % hyperthreading_cpus != 0)
 			continue;
 
 		intr_add_cpu(i);
 	}
 }
 
 /*
  * Assign logical CPU IDs to local APICs.
  */
 static void
 assign_cpu_ids(void)
 {
 	u_int i;
 
 	TUNABLE_INT_FETCH("machdep.hyperthreading_allowed",
 	    &hyperthreading_allowed);
 
 	/* Check for explicitly disabled CPUs. */
 	for (i = 0; i <= MAX_APIC_ID; i++) {
 		if (!cpu_info[i].cpu_present || cpu_info[i].cpu_bsp)
 			continue;
 
 		if (hyperthreading_cpus > 1 && i % hyperthreading_cpus != 0) {
 			cpu_info[i].cpu_hyperthread = 1;
 
 			/*
 			 * Don't use HT CPU if it has been disabled by a
 			 * tunable.
 			 */
 			if (hyperthreading_allowed == 0) {
 				cpu_info[i].cpu_disabled = 1;
 				continue;
 			}
 		}
 
 		/* Don't use this CPU if it has been disabled by a tunable. */
 		if (resource_disabled("lapic", i)) {
 			cpu_info[i].cpu_disabled = 1;
 			continue;
 		}
 	}
 
 	if (hyperthreading_allowed == 0 && hyperthreading_cpus > 1) {
 		hyperthreading_cpus = 0;
 		cpu_logical = 1;
 	}
 
 	/*
 	 * Assign CPU IDs to local APIC IDs and disable any CPUs
 	 * beyond MAXCPU.  CPU 0 is always assigned to the BSP.
 	 *
 	 * To minimize confusion for userland, we attempt to number
 	 * CPUs such that all threads and cores in a package are
 	 * grouped together.  For now we assume that the BSP is always
 	 * the first thread in a package and just start adding APs
 	 * starting with the BSP's APIC ID.
 	 */
 	mp_ncpus = 1;
 	cpu_apic_ids[0] = boot_cpu_id;
 	apic_cpuids[boot_cpu_id] = 0;
 	for (i = boot_cpu_id + 1; i != boot_cpu_id;
 	     i == MAX_APIC_ID ? i = 0 : i++) {
 		if (!cpu_info[i].cpu_present || cpu_info[i].cpu_bsp ||
 		    cpu_info[i].cpu_disabled)
 			continue;
 
 		if (mp_ncpus < MAXCPU) {
 			cpu_apic_ids[mp_ncpus] = i;
 			apic_cpuids[i] = mp_ncpus;
 			mp_ncpus++;
 		} else
 			cpu_info[i].cpu_disabled = 1;
 	}
 	KASSERT(mp_maxid >= mp_ncpus - 1,
 	    ("%s: counters out of sync: max %d, count %d", __func__, mp_maxid,
 	    mp_ncpus));		
 }
 
 /*
  * start each AP in our list
  */
 /* Lowest 1MB is already mapped: don't touch*/
 #define TMPMAP_START 1
 static int
 start_all_aps(void)
 {
 #ifndef PC98
 	u_char mpbiosreason;
 #endif
 	u_int32_t mpbioswarmvec;
 	int apic_id, cpu, i;
 
 	mtx_init(&ap_boot_mtx, "ap boot", NULL, MTX_SPIN);
 
 	/* install the AP 1st level boot code */
 	install_ap_tramp();
 
 	/* save the current value of the warm-start vector */
 	mpbioswarmvec = *((u_int32_t *) WARMBOOT_OFF);
 #ifndef PC98
 	outb(CMOS_REG, BIOS_RESET);
 	mpbiosreason = inb(CMOS_DATA);
 #endif
 
 	/* set up temporary P==V mapping for AP boot */
 	/* XXX this is a hack, we should boot the AP on its own stack/PTD */
 	for (i = TMPMAP_START; i < NKPT; i++)
 		PTD[i] = PTD[KPTDI + i];
 	invltlb();
 
 	/* start each AP */
 	for (cpu = 1; cpu < mp_ncpus; cpu++) {
 		apic_id = cpu_apic_ids[cpu];
 
 		/* allocate and set up a boot stack data page */
 		bootstacks[cpu] =
 		    (char *)kmem_malloc(kernel_arena, KSTACK_PAGES * PAGE_SIZE,
 		    M_WAITOK | M_ZERO);
 		dpcpu = (void *)kmem_malloc(kernel_arena, DPCPU_SIZE,
 		    M_WAITOK | M_ZERO);
 		/* setup a vector to our boot code */
 		*((volatile u_short *) WARMBOOT_OFF) = WARMBOOT_TARGET;
 		*((volatile u_short *) WARMBOOT_SEG) = (boot_address >> 4);
 #ifndef PC98
 		outb(CMOS_REG, BIOS_RESET);
 		outb(CMOS_DATA, BIOS_WARM);	/* 'warm-start' */
 #endif
 
 		bootSTK = (char *)bootstacks[cpu] + KSTACK_PAGES * PAGE_SIZE - 4;
 		bootAP = cpu;
 
 		/* attempt to start the Application Processor */
 		CHECK_INIT(99);	/* setup checkpoints */
 		if (!start_ap(apic_id)) {
 			printf("AP #%d (PHY# %d) failed!\n", cpu, apic_id);
 			CHECK_PRINT("trace");	/* show checkpoints */
 			/* better panic as the AP may be running loose */
 			printf("panic y/n? [y] ");
 			if (cngetc() != 'n')
 				panic("bye-bye");
 		}
 		CHECK_PRINT("trace");		/* show checkpoints */
 
 		CPU_SET(cpu, &all_cpus);	/* record AP in CPU map */
 	}
 
 	/* restore the warmstart vector */
 	*(u_int32_t *) WARMBOOT_OFF = mpbioswarmvec;
 
 #ifndef PC98
 	outb(CMOS_REG, BIOS_RESET);
 	outb(CMOS_DATA, mpbiosreason);
 #endif
 
 	/* Undo V==P hack from above */
 	for (i = TMPMAP_START; i < NKPT; i++)
 		PTD[i] = 0;
 	pmap_invalidate_range(kernel_pmap, 0, NKPT * NBPDR - 1);
 
 	/* number of APs actually started */
 	return mp_naps;
 }
 
 /*
  * load the 1st level AP boot code into base memory.
  */
 
 /* targets for relocation */
 extern void bigJump(void);
 extern void bootCodeSeg(void);
 extern void bootDataSeg(void);
 extern void MPentry(void);
 extern u_int MP_GDT;
 extern u_int mp_gdtbase;
 
 static void
 install_ap_tramp(void)
 {
 	int     x;
 	int     size = *(int *) ((u_long) & bootMP_size);
 	vm_offset_t va = boot_address + KERNBASE;
 	u_char *src = (u_char *) ((u_long) bootMP);
 	u_char *dst = (u_char *) va;
 	u_int   boot_base = (u_int) bootMP;
 	u_int8_t *dst8;
 	u_int16_t *dst16;
 	u_int32_t *dst32;
 
 	KASSERT (size <= PAGE_SIZE,
 	    ("'size' do not fit into PAGE_SIZE, as expected."));
 	pmap_kenter(va, boot_address);
 	pmap_invalidate_page (kernel_pmap, va);
 	for (x = 0; x < size; ++x)
 		*dst++ = *src++;
 
 	/*
 	 * modify addresses in code we just moved to basemem. unfortunately we
 	 * need fairly detailed info about mpboot.s for this to work.  changes
 	 * to mpboot.s might require changes here.
 	 */
 
 	/* boot code is located in KERNEL space */
 	dst = (u_char *) va;
 
 	/* modify the lgdt arg */
 	dst32 = (u_int32_t *) (dst + ((u_int) & mp_gdtbase - boot_base));
 	*dst32 = boot_address + ((u_int) & MP_GDT - boot_base);
 
 	/* modify the ljmp target for MPentry() */
 	dst32 = (u_int32_t *) (dst + ((u_int) bigJump - boot_base) + 1);
 	*dst32 = ((u_int) MPentry - KERNBASE);
 
 	/* modify the target for boot code segment */
 	dst16 = (u_int16_t *) (dst + ((u_int) bootCodeSeg - boot_base));
 	dst8 = (u_int8_t *) (dst16 + 1);
 	*dst16 = (u_int) boot_address & 0xffff;
 	*dst8 = ((u_int) boot_address >> 16) & 0xff;
 
 	/* modify the target for boot data segment */
 	dst16 = (u_int16_t *) (dst + ((u_int) bootDataSeg - boot_base));
 	dst8 = (u_int8_t *) (dst16 + 1);
 	*dst16 = (u_int) boot_address & 0xffff;
 	*dst8 = ((u_int) boot_address >> 16) & 0xff;
 }
 
 /*
  * This function starts the AP (application processor) identified
  * by the APIC ID 'physicalCpu'.  It does quite a "song and dance"
  * to accomplish this.  This is necessary because of the nuances
  * of the different hardware we might encounter.  It isn't pretty,
  * but it seems to work.
  */
 static int
 start_ap(int apic_id)
 {
 	int vector, ms;
 	int cpus;
 
 	/* calculate the vector */
 	vector = (boot_address >> 12) & 0xff;
 
 	/* used as a watchpoint to signal AP startup */
 	cpus = mp_naps;
 
 	ipi_startup(apic_id, vector);
 
 	/* Wait up to 5 seconds for it to start. */
 	for (ms = 0; ms < 5000; ms++) {
 		if (mp_naps > cpus)
 			return 1;	/* return SUCCESS */
 		DELAY(1000);
 	}
 	return 0;		/* return FAILURE */
 }
 
 #ifdef COUNT_XINVLTLB_HITS
 u_int xhits_gbl[MAXCPU];
 u_int xhits_pg[MAXCPU];
 u_int xhits_rng[MAXCPU];
 static SYSCTL_NODE(_debug, OID_AUTO, xhits, CTLFLAG_RW, 0, "");
 SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, global, CTLFLAG_RW, &xhits_gbl,
     sizeof(xhits_gbl), "IU", "");
 SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, page, CTLFLAG_RW, &xhits_pg,
     sizeof(xhits_pg), "IU", "");
 SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, range, CTLFLAG_RW, &xhits_rng,
     sizeof(xhits_rng), "IU", "");
 
 u_int ipi_global;
 u_int ipi_page;
 u_int ipi_range;
 u_int ipi_range_size;
 SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_global, CTLFLAG_RW, &ipi_global, 0, "");
 SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_page, CTLFLAG_RW, &ipi_page, 0, "");
 SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_range, CTLFLAG_RW, &ipi_range, 0, "");
 SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_range_size, CTLFLAG_RW, &ipi_range_size,
     0, "");
 
 u_int ipi_masked_global;
 u_int ipi_masked_page;
 u_int ipi_masked_range;
 u_int ipi_masked_range_size;
 SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_global, CTLFLAG_RW,
     &ipi_masked_global, 0, "");
 SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_page, CTLFLAG_RW,
     &ipi_masked_page, 0, "");
 SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_range, CTLFLAG_RW,
     &ipi_masked_range, 0, "");
 SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_range_size, CTLFLAG_RW,
     &ipi_masked_range_size, 0, "");
 #endif /* COUNT_XINVLTLB_HITS */
 
 /*
  * Init and startup IPI.
  */
 void
 ipi_startup(int apic_id, int vector)
 {
 
 	/*
 	 * first we do an INIT IPI: this INIT IPI might be run, resetting
 	 * and running the target CPU. OR this INIT IPI might be latched (P5
 	 * bug), CPU waiting for STARTUP IPI. OR this INIT IPI might be
 	 * ignored.
 	 */
 	lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_EDGE |
 	    APIC_LEVEL_ASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_INIT, apic_id);
 	lapic_ipi_wait(-1);
 	DELAY(10000);		/* wait ~10mS */
 
 	/*
 	 * next we do a STARTUP IPI: the previous INIT IPI might still be
 	 * latched, (P5 bug) this 1st STARTUP would then terminate
 	 * immediately, and the previously started INIT IPI would continue. OR
 	 * the previous INIT IPI has already run. and this STARTUP IPI will
 	 * run. OR the previous INIT IPI was ignored. and this STARTUP IPI
 	 * will run.
 	 */
 	lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_EDGE |
 	    APIC_LEVEL_DEASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_STARTUP |
 	    vector, apic_id);
 	lapic_ipi_wait(-1);
 	DELAY(200);		/* wait ~200uS */
 
 	/*
 	 * finally we do a 2nd STARTUP IPI: this 2nd STARTUP IPI should run IF
 	 * the previous STARTUP IPI was cancelled by a latched INIT IPI. OR
 	 * this STARTUP IPI will be ignored, as only ONE STARTUP IPI is
 	 * recognized after hardware RESET or INIT IPI.
 	 */
 	lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_EDGE |
 	    APIC_LEVEL_DEASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_STARTUP |
 	    vector, apic_id);
 	lapic_ipi_wait(-1);
 	DELAY(200);		/* wait ~200uS */
 }
 
 /*
  * Send an IPI to specified CPU handling the bitmap logic.
  */
 static void
 ipi_send_cpu(int cpu, u_int ipi)
 {
 	u_int bitmap, old_pending, new_pending;
 
 	KASSERT(cpu_apic_ids[cpu] != -1, ("IPI to non-existent CPU %d", cpu));
 
 	if (IPI_IS_BITMAPED(ipi)) {
 		bitmap = 1 << ipi;
 		ipi = IPI_BITMAP_VECTOR;
 		do {
 			old_pending = cpu_ipi_pending[cpu];
 			new_pending = old_pending | bitmap;
 		} while  (!atomic_cmpset_int(&cpu_ipi_pending[cpu],
 		    old_pending, new_pending));	
 		if (old_pending)
 			return;
 	}
 	lapic_ipi_vectored(ipi, cpu_apic_ids[cpu]);
 }
 
 /*
  * Flush the TLB on all other CPU's
  */
 static void
 smp_tlb_shootdown(u_int vector, vm_offset_t addr1, vm_offset_t addr2)
 {
 	u_int ncpu;
 
 	ncpu = mp_ncpus - 1;	/* does not shootdown self */
 	if (ncpu < 1)
 		return;		/* no other cpus */
 	if (!(read_eflags() & PSL_I))
 		panic("%s: interrupts disabled", __func__);
 	mtx_lock_spin(&smp_ipi_mtx);
 	smp_tlb_addr1 = addr1;
 	smp_tlb_addr2 = addr2;
 	atomic_store_rel_int(&smp_tlb_wait, 0);
 	ipi_all_but_self(vector);
 	while (smp_tlb_wait < ncpu)
 		ia32_pause();
 	mtx_unlock_spin(&smp_ipi_mtx);
 }
 
 static void
 smp_targeted_tlb_shootdown(cpuset_t mask, u_int vector, vm_offset_t addr1, vm_offset_t addr2)
 {
 	int cpu, ncpu, othercpus;
 
 	othercpus = mp_ncpus - 1;
 	if (CPU_ISFULLSET(&mask)) {
 		if (othercpus < 1)
 			return;
 	} else {
 		CPU_CLR(PCPU_GET(cpuid), &mask);
 		if (CPU_EMPTY(&mask))
 			return;
 	}
 	if (!(read_eflags() & PSL_I))
 		panic("%s: interrupts disabled", __func__);
 	mtx_lock_spin(&smp_ipi_mtx);
 	smp_tlb_addr1 = addr1;
 	smp_tlb_addr2 = addr2;
 	atomic_store_rel_int(&smp_tlb_wait, 0);
 	if (CPU_ISFULLSET(&mask)) {
 		ncpu = othercpus;
 		ipi_all_but_self(vector);
 	} else {
 		ncpu = 0;
 		while ((cpu = CPU_FFS(&mask)) != 0) {
 			cpu--;
 			CPU_CLR(cpu, &mask);
 			CTR3(KTR_SMP, "%s: cpu: %d ipi: %x", __func__, cpu,
 			    vector);
 			ipi_send_cpu(cpu, vector);
 			ncpu++;
 		}
 	}
 	while (smp_tlb_wait < ncpu)
 		ia32_pause();
 	mtx_unlock_spin(&smp_ipi_mtx);
 }
 
 void
 smp_cache_flush(void)
 {
 
 	if (smp_started)
 		smp_tlb_shootdown(IPI_INVLCACHE, 0, 0);
 }
 
 void
 smp_invltlb(void)
 {
 
 	if (smp_started) {
 		smp_tlb_shootdown(IPI_INVLTLB, 0, 0);
 #ifdef COUNT_XINVLTLB_HITS
 		ipi_global++;
 #endif
 	}
 }
 
 void
 smp_invlpg(vm_offset_t addr)
 {
 
 	if (smp_started) {
 		smp_tlb_shootdown(IPI_INVLPG, addr, 0);
 #ifdef COUNT_XINVLTLB_HITS
 		ipi_page++;
 #endif
 	}
 }
 
 void
 smp_invlpg_range(vm_offset_t addr1, vm_offset_t addr2)
 {
 
 	if (smp_started) {
 		smp_tlb_shootdown(IPI_INVLRNG, addr1, addr2);
 #ifdef COUNT_XINVLTLB_HITS
 		ipi_range++;
 		ipi_range_size += (addr2 - addr1) / PAGE_SIZE;
 #endif
 	}
 }
 
 void
 smp_masked_invltlb(cpuset_t mask)
 {
 
 	if (smp_started) {
 		smp_targeted_tlb_shootdown(mask, IPI_INVLTLB, 0, 0);
 #ifdef COUNT_XINVLTLB_HITS
 		ipi_masked_global++;
 #endif
 	}
 }
 
 void
 smp_masked_invlpg(cpuset_t mask, vm_offset_t addr)
 {
 
 	if (smp_started) {
 		smp_targeted_tlb_shootdown(mask, IPI_INVLPG, addr, 0);
 #ifdef COUNT_XINVLTLB_HITS
 		ipi_masked_page++;
 #endif
 	}
 }
 
 void
 smp_masked_invlpg_range(cpuset_t mask, vm_offset_t addr1, vm_offset_t addr2)
 {
 
 	if (smp_started) {
 		smp_targeted_tlb_shootdown(mask, IPI_INVLRNG, addr1, addr2);
 #ifdef COUNT_XINVLTLB_HITS
 		ipi_masked_range++;
 		ipi_masked_range_size += (addr2 - addr1) / PAGE_SIZE;
 #endif
 	}
 }
 
 void
 ipi_bitmap_handler(struct trapframe frame)
 {
 	struct trapframe *oldframe;
 	struct thread *td;
 	int cpu = PCPU_GET(cpuid);
 	u_int ipi_bitmap;
 
 	critical_enter();
 	td = curthread;
 	td->td_intr_nesting_level++;
 	oldframe = td->td_intr_frame;
 	td->td_intr_frame = &frame;
 	ipi_bitmap = atomic_readandclear_int(&cpu_ipi_pending[cpu]);
 	if (ipi_bitmap & (1 << IPI_PREEMPT)) {
 #ifdef COUNT_IPIS
 		(*ipi_preempt_counts[cpu])++;
 #endif
 		sched_preempt(td);
 	}
 	if (ipi_bitmap & (1 << IPI_AST)) {
 #ifdef COUNT_IPIS
 		(*ipi_ast_counts[cpu])++;
 #endif
 		/* Nothing to do for AST */
 	}
 	if (ipi_bitmap & (1 << IPI_HARDCLOCK)) {
 #ifdef COUNT_IPIS
 		(*ipi_hardclock_counts[cpu])++;
 #endif
 		hardclockintr();
 	}
 	td->td_intr_frame = oldframe;
 	td->td_intr_nesting_level--;
 	critical_exit();
 }
 
 /*
  * send an IPI to a set of cpus.
  */
 void
 ipi_selected(cpuset_t cpus, u_int ipi)
 {
 	int cpu;
 
 	/*
 	 * IPI_STOP_HARD maps to a NMI and the trap handler needs a bit
 	 * of help in order to understand what is the source.
 	 * Set the mask of receiving CPUs for this purpose.
 	 */
 	if (ipi == IPI_STOP_HARD)
 		CPU_OR_ATOMIC(&ipi_nmi_pending, &cpus);
 
 	while ((cpu = CPU_FFS(&cpus)) != 0) {
 		cpu--;
 		CPU_CLR(cpu, &cpus);
 		CTR3(KTR_SMP, "%s: cpu: %d ipi: %x", __func__, cpu, ipi);
 		ipi_send_cpu(cpu, ipi);
 	}
 }
 
 /*
  * send an IPI to a specific CPU.
  */
 void
 ipi_cpu(int cpu, u_int ipi)
 {
 
 	/*
 	 * IPI_STOP_HARD maps to a NMI and the trap handler needs a bit
 	 * of help in order to understand what is the source.
 	 * Set the mask of receiving CPUs for this purpose.
 	 */
 	if (ipi == IPI_STOP_HARD)
 		CPU_SET_ATOMIC(cpu, &ipi_nmi_pending);
 
 	CTR3(KTR_SMP, "%s: cpu: %d ipi: %x", __func__, cpu, ipi);
 	ipi_send_cpu(cpu, ipi);
 }
 
 /*
  * send an IPI to all CPUs EXCEPT myself
  */
 void
 ipi_all_but_self(u_int ipi)
 {
 	cpuset_t other_cpus;
 
 	other_cpus = all_cpus;
 	CPU_CLR(PCPU_GET(cpuid), &other_cpus);
 	if (IPI_IS_BITMAPED(ipi)) {
 		ipi_selected(other_cpus, ipi);
 		return;
 	}
 
 	/*
 	 * IPI_STOP_HARD maps to a NMI and the trap handler needs a bit
 	 * of help in order to understand what is the source.
 	 * Set the mask of receiving CPUs for this purpose.
 	 */
 	if (ipi == IPI_STOP_HARD)
 		CPU_OR_ATOMIC(&ipi_nmi_pending, &other_cpus);
 
 	CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi);
 	lapic_ipi_vectored(ipi, APIC_IPI_DEST_OTHERS);
 }
 
 int
 ipi_nmi_handler()
 {
 	u_int cpuid;
 
 	/*
 	 * As long as there is not a simple way to know about a NMI's
 	 * source, if the bitmask for the current CPU is present in
 	 * the global pending bitword an IPI_STOP_HARD has been issued
 	 * and should be handled.
 	 */
 	cpuid = PCPU_GET(cpuid);
 	if (!CPU_ISSET(cpuid, &ipi_nmi_pending))
 		return (1);
 
 	CPU_CLR_ATOMIC(cpuid, &ipi_nmi_pending);
 	cpustop_handler();
 	return (0);
 }
 
 /*
  * Handle an IPI_STOP by saving our current context and spinning until we
  * are resumed.
  */
 void
 cpustop_handler(void)
 {
 	u_int cpu;
 
 	cpu = PCPU_GET(cpuid);
 
 	savectx(&stoppcbs[cpu]);
 
 	/* Indicate that we are stopped */
 	CPU_SET_ATOMIC(cpu, &stopped_cpus);
 
 	/* Wait for restart */
 	while (!CPU_ISSET(cpu, &started_cpus))
 	    ia32_pause();
 
 	CPU_CLR_ATOMIC(cpu, &started_cpus);
 	CPU_CLR_ATOMIC(cpu, &stopped_cpus);
 
 	if (cpu == 0 && cpustop_restartfunc != NULL) {
 		cpustop_restartfunc();
 		cpustop_restartfunc = NULL;
 	}
 }
 
 /*
  * Handle an IPI_SUSPEND by saving our current context and spinning until we
  * are resumed.
  */
 void
 cpususpend_handler(void)
 {
 	u_int cpu;
 
 	cpu = PCPU_GET(cpuid);
 
 	if (savectx(susppcbs[cpu])) {
 		wbinvd();
 		CPU_SET_ATOMIC(cpu, &suspended_cpus);
 	} else {
 		pmap_init_pat();
 		PCPU_SET(switchtime, 0);
 		PCPU_SET(switchticks, ticks);
 
 		/* Indicate that we are resumed */
 		CPU_CLR_ATOMIC(cpu, &suspended_cpus);
 	}
 
 	/* Wait for resume */
 	while (!CPU_ISSET(cpu, &started_cpus))
 		ia32_pause();
 
 	/* Resume MCA and local APIC */
 	mca_resume();
 	lapic_setup(0);
 
 	CPU_CLR_ATOMIC(cpu, &started_cpus);
 }
 /*
  * This is called once the rest of the system is up and running and we're
  * ready to let the AP's out of the pen.
  */
 static void
 release_aps(void *dummy __unused)
 {
 
 	if (mp_ncpus == 1) 
 		return;
 	atomic_store_rel_int(&aps_ready, 1);
 	while (smp_started == 0)
 		ia32_pause();
 }
 SYSINIT(start_aps, SI_SUB_SMP, SI_ORDER_FIRST, release_aps, NULL);
 
 #ifdef COUNT_IPIS
 /*
  * Setup interrupt counters for IPI handlers.
  */
 static void
 mp_ipi_intrcnt(void *dummy)
 {
 	char buf[64];
 	int i;
 
 	CPU_FOREACH(i) {
 		snprintf(buf, sizeof(buf), "cpu%d:invltlb", i);
 		intrcnt_add(buf, &ipi_invltlb_counts[i]);
 		snprintf(buf, sizeof(buf), "cpu%d:invlrng", i);
 		intrcnt_add(buf, &ipi_invlrng_counts[i]);
 		snprintf(buf, sizeof(buf), "cpu%d:invlpg", i);
 		intrcnt_add(buf, &ipi_invlpg_counts[i]);
 		snprintf(buf, sizeof(buf), "cpu%d:invlcache", i);
 		intrcnt_add(buf, &ipi_invlcache_counts[i]);
 		snprintf(buf, sizeof(buf), "cpu%d:preempt", i);
 		intrcnt_add(buf, &ipi_preempt_counts[i]);
 		snprintf(buf, sizeof(buf), "cpu%d:ast", i);
 		intrcnt_add(buf, &ipi_ast_counts[i]);
 		snprintf(buf, sizeof(buf), "cpu%d:rendezvous", i);
 		intrcnt_add(buf, &ipi_rendezvous_counts[i]);
 		snprintf(buf, sizeof(buf), "cpu%d:lazypmap", i);
 		intrcnt_add(buf, &ipi_lazypmap_counts[i]);
 		snprintf(buf, sizeof(buf), "cpu%d:hardclock", i);
 		intrcnt_add(buf, &ipi_hardclock_counts[i]);
 	}		
 }
 SYSINIT(mp_ipi_intrcnt, SI_SUB_INTR, SI_ORDER_MIDDLE, mp_ipi_intrcnt, NULL);
 #endif
diff --git a/sys/i386/include/apicvar.h b/sys/i386/include/apicvar.h
index a0e622e8785e..5d1f52249de5 100644
--- a/sys/i386/include/apicvar.h
+++ b/sys/i386/include/apicvar.h
@@ -1,231 +1,232 @@
 /*-
  * Copyright (c) 2003 John Baldwin <jhb@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the author nor the names of any co-contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef _MACHINE_APICVAR_H_
 #define _MACHINE_APICVAR_H_
 
 /*
  * Local && I/O APIC variable definitions.
  */
 
 /*
  * Layout of local APIC interrupt vectors:
  *
  *	0xff (255)  +-------------+
  *                  |             | 15 (Spurious / IPIs / Local Interrupts)
  *	0xf0 (240)  +-------------+
  *                  |             | 14 (I/O Interrupts / Timer)
  *	0xe0 (224)  +-------------+
  *                  |             | 13 (I/O Interrupts)
  *	0xd0 (208)  +-------------+
  *                  |             | 12 (I/O Interrupts)
  *	0xc0 (192)  +-------------+
  *                  |             | 11 (I/O Interrupts)
  *	0xb0 (176)  +-------------+
  *                  |             | 10 (I/O Interrupts)
  *	0xa0 (160)  +-------------+
  *                  |             | 9 (I/O Interrupts)
  *	0x90 (144)  +-------------+
  *                  |             | 8 (I/O Interrupts / System Calls)
  *	0x80 (128)  +-------------+
  *                  |             | 7 (I/O Interrupts)
  *	0x70 (112)  +-------------+
  *                  |             | 6 (I/O Interrupts)
  *	0x60 (96)   +-------------+
  *                  |             | 5 (I/O Interrupts)
  *	0x50 (80)   +-------------+
  *                  |             | 4 (I/O Interrupts)
  *	0x40 (64)   +-------------+
  *                  |             | 3 (I/O Interrupts)
  *	0x30 (48)   +-------------+
  *                  |             | 2 (ATPIC Interrupts)
  *	0x20 (32)   +-------------+
  *                  |             | 1 (Exceptions, traps, faults, etc.)
  *	0x10 (16)   +-------------+
  *                  |             | 0 (Exceptions, traps, faults, etc.)
  *	0x00 (0)    +-------------+
  *
  * Note: 0x80 needs to be handled specially and not allocated to an
  * I/O device!
  */
 
 #define	MAX_APIC_ID	0xfe
 #define	APIC_ID_ALL	0xff
 
 /* I/O Interrupts are used for external devices such as ISA, PCI, etc. */
 #define	APIC_IO_INTS	(IDT_IO_INTS + 16)
 #define	APIC_NUM_IOINTS	191
 
 /* The timer interrupt is used for clock handling and drives hardclock, etc. */
 #define	APIC_TIMER_INT	(APIC_IO_INTS + APIC_NUM_IOINTS)
 
 /*  
  ********************* !!! WARNING !!! ******************************
  * Each local apic has an interrupt receive fifo that is two entries deep
  * for each interrupt priority class (higher 4 bits of interrupt vector).
  * Once the fifo is full the APIC can no longer receive interrupts for this
  * class and sending IPIs from other CPUs will be blocked.
  * To avoid deadlocks there should be no more than two IPI interrupts
  * pending at the same time.
  * Currently this is guaranteed by dividing the IPIs in two groups that have 
  * each at most one IPI interrupt pending. The first group is protected by the
  * smp_ipi_mtx and waits for the completion of the IPI (Only one IPI user 
  * at a time) The second group uses a single interrupt and a bitmap to avoid
  * redundant IPI interrupts.
  */ 
 
 /* Interrupts for local APIC LVT entries other than the timer. */
 #define	APIC_LOCAL_INTS	240
 #define	APIC_ERROR_INT	APIC_LOCAL_INTS
 #define	APIC_THERMAL_INT (APIC_LOCAL_INTS + 1)
 #define	APIC_CMC_INT	(APIC_LOCAL_INTS + 2)
 #define	APIC_IPI_INTS	(APIC_LOCAL_INTS + 3)
 
 #define	IPI_RENDEZVOUS	(APIC_IPI_INTS)		/* Inter-CPU rendezvous. */
 #define	IPI_INVLTLB	(APIC_IPI_INTS + 1)	/* TLB Shootdown IPIs */
 #define	IPI_INVLPG	(APIC_IPI_INTS + 2)
 #define	IPI_INVLRNG	(APIC_IPI_INTS + 3)
 #define	IPI_INVLCACHE	(APIC_IPI_INTS + 4)
 #define	IPI_LAZYPMAP	(APIC_IPI_INTS + 5)	/* Lazy pmap release. */
 /* Vector to handle bitmap based IPIs */
 #define	IPI_BITMAP_VECTOR	(APIC_IPI_INTS + 6) 
 
 /* IPIs handled by IPI_BITMAPED_VECTOR  (XXX ups is there a better place?) */
 #define	IPI_AST		0 	/* Generate software trap. */
 #define IPI_PREEMPT     1
 #define IPI_HARDCLOCK   2 
 #define IPI_BITMAP_LAST IPI_HARDCLOCK
 #define IPI_IS_BITMAPED(x) ((x) <= IPI_BITMAP_LAST)
 
 #define	IPI_STOP	(APIC_IPI_INTS + 7)	/* Stop CPU until restarted. */
 #define	IPI_SUSPEND	(APIC_IPI_INTS + 8)	/* Suspend CPU until restarted. */
 #define	IPI_STOP_HARD	(APIC_IPI_INTS + 9)	/* Stop CPU with a NMI. */
 
 /*
  * The spurious interrupt can share the priority class with the IPIs since
  * it is not a normal interrupt. (Does not use the APIC's interrupt fifo)
  */
 #define	APIC_SPURIOUS_INT 255
 
 #define	LVT_LINT0	0
 #define	LVT_LINT1	1
 #define	LVT_TIMER	2
 #define	LVT_ERROR	3
 #define	LVT_PMC		4
 #define	LVT_THERMAL	5
 #define	LVT_CMCI	6
 #define	LVT_MAX		LVT_CMCI
 
 #ifndef LOCORE
 
 #define	APIC_IPI_DEST_SELF	-1
 #define	APIC_IPI_DEST_ALL	-2
 #define	APIC_IPI_DEST_OTHERS	-3
 
 #define	APIC_BUS_UNKNOWN	-1
 #define	APIC_BUS_ISA		0
 #define	APIC_BUS_EISA		1
 #define	APIC_BUS_PCI		2
 #define	APIC_BUS_MAX		APIC_BUS_PCI
 
 /*
  * An APIC enumerator is a psuedo bus driver that enumerates APIC's including
  * CPU's and I/O APIC's.
  */
 struct apic_enumerator {
 	const char *apic_name;
 	int (*apic_probe)(void);
 	int (*apic_probe_cpus)(void);
 	int (*apic_setup_local)(void);
 	int (*apic_setup_io)(void);
 	SLIST_ENTRY(apic_enumerator) apic_next;
 };
 
 inthand_t
 	IDTVEC(apic_isr1), IDTVEC(apic_isr2), IDTVEC(apic_isr3),
 	IDTVEC(apic_isr4), IDTVEC(apic_isr5), IDTVEC(apic_isr6),
 	IDTVEC(apic_isr7), IDTVEC(cmcint), IDTVEC(errorint),
 	IDTVEC(spuriousint), IDTVEC(timerint);
 
 extern vm_paddr_t lapic_paddr;
 extern int apic_cpuids[];
 
 u_int	apic_alloc_vector(u_int apic_id, u_int irq);
 u_int	apic_alloc_vectors(u_int apic_id, u_int *irqs, u_int count,
 	    u_int align);
 void	apic_disable_vector(u_int apic_id, u_int vector);
 void	apic_enable_vector(u_int apic_id, u_int vector);
 void	apic_free_vector(u_int apic_id, u_int vector, u_int irq);
 u_int	apic_idt_to_irq(u_int apic_id, u_int vector);
 void	apic_register_enumerator(struct apic_enumerator *enumerator);
 u_int	apic_cpuid(u_int apic_id);
 void	*ioapic_create(vm_paddr_t addr, int32_t apic_id, int intbase);
 int	ioapic_disable_pin(void *cookie, u_int pin);
 int	ioapic_get_vector(void *cookie, u_int pin);
 void	ioapic_register(void *cookie);
 int	ioapic_remap_vector(void *cookie, u_int pin, int vector);
 int	ioapic_set_bus(void *cookie, u_int pin, int bus_type);
 int	ioapic_set_extint(void *cookie, u_int pin);
 int	ioapic_set_nmi(void *cookie, u_int pin);
 int	ioapic_set_polarity(void *cookie, u_int pin, enum intr_polarity pol);
 int	ioapic_set_triggermode(void *cookie, u_int pin,
 	    enum intr_trigger trigger);
 int	ioapic_set_smi(void *cookie, u_int pin);
 void	lapic_create(u_int apic_id, int boot_cpu);
 void	lapic_disable(void);
 void	lapic_disable_pmc(void);
 void	lapic_dump(const char *str);
 void	lapic_enable_cmc(void);
 int	lapic_enable_pmc(void);
 void	lapic_eoi(void);
 int	lapic_id(void);
 void	lapic_init(vm_paddr_t addr);
 int	lapic_intr_pending(u_int vector);
 void	lapic_ipi_raw(register_t icrlo, u_int dest);
 void	lapic_ipi_vectored(u_int vector, int dest);
 int	lapic_ipi_wait(int delay);
 void	lapic_handle_cmc(void);
 void	lapic_handle_error(void);
 void	lapic_handle_intr(int vector, struct trapframe *frame);
 void	lapic_handle_timer(struct trapframe *frame);
 void	lapic_reenable_pmc(void);
 void	lapic_set_logical_id(u_int apic_id, u_int cluster, u_int cluster_id);
 int	lapic_set_lvt_mask(u_int apic_id, u_int lvt, u_char masked);
 int	lapic_set_lvt_mode(u_int apic_id, u_int lvt, u_int32_t mode);
 int	lapic_set_lvt_polarity(u_int apic_id, u_int lvt,
 	    enum intr_polarity pol);
 int	lapic_set_lvt_triggermode(u_int apic_id, u_int lvt,
 	    enum intr_trigger trigger);
 void	lapic_set_tpr(u_int vector);
 void	lapic_setup(int boot);
+void	xen_intr_handle_upcall(struct trapframe *frame);
 
 #endif /* !LOCORE */
 #endif /* _MACHINE_APICVAR_H_ */
diff --git a/sys/i386/include/intr_machdep.h b/sys/i386/include/intr_machdep.h
index b3dd122301e5..6bbe378a9e71 100644
--- a/sys/i386/include/intr_machdep.h
+++ b/sys/i386/include/intr_machdep.h
@@ -1,164 +1,182 @@
 /*-
  * Copyright (c) 2003 John Baldwin <jhb@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef __MACHINE_INTR_MACHDEP_H__
 #define	__MACHINE_INTR_MACHDEP_H__
 
 #ifdef _KERNEL
 
 /*
  * The maximum number of I/O interrupts we allow.  This number is rather
  * arbitrary as it is just the maximum IRQ resource value.  The interrupt
  * source for a given IRQ maps that I/O interrupt to device interrupt
  * source whether it be a pin on an interrupt controller or an MSI interrupt.
  * The 16 ISA IRQs are assigned fixed IDT vectors, but all other device
  * interrupts allocate IDT vectors on demand.  Currently we have 191 IDT
  * vectors available for device interrupts.  On many systems with I/O APICs,
  * a lot of the IRQs are not used, so this number can be much larger than
  * 191 and still be safe since only interrupt sources in actual use will
  * allocate IDT vectors.
  *
  * The first 255 IRQs (0 - 254) are reserved for ISA IRQs and PCI intline IRQs.
- * IRQ values beyond 256 are used by MSI.  We leave 255 unused to avoid
- * confusion since 255 is used in PCI to indicate an invalid IRQ.
+ * IRQ values from 256 to 767 are used by MSI.  When running under the Xen
+ * Hypervisor, IRQ values from 768 to 4863 are available for binding to
+ * event channel events.  We leave 255 unused to avoid confusion since 255 is
+ * used in PCI to indicate an invalid IRQ.
  */
 #define	NUM_MSI_INTS	512
 #define	FIRST_MSI_INT	256
-#define	NUM_IO_INTS	(FIRST_MSI_INT + NUM_MSI_INTS)
+#ifdef XENHVM
+#include <xen/xen-os.h>
+#define	NUM_EVTCHN_INTS	NR_EVENT_CHANNELS
+#define	FIRST_EVTCHN_INT \
+    (FIRST_MSI_INT + NUM_MSI_INTS)
+#define	LAST_EVTCHN_INT \
+    (FIRST_EVTCHN_INT + NUM_EVTCHN_INTS - 1)
+#elif defined(XEN)
+#include <xen/xen-os.h>
+#define	NUM_EVTCHN_INTS	NR_EVENT_CHANNELS
+#define	FIRST_EVTCHN_INT 0
+#define	LAST_EVTCHN_INT \
+    (FIRST_EVTCHN_INT + NUM_EVTCHN_INTS - 1)
+#else /* !XEN && !XENHVM */
+#define	NUM_EVTCHN_INTS	0
+#endif
+#define	NUM_IO_INTS	(FIRST_MSI_INT + NUM_MSI_INTS + NUM_EVTCHN_INTS)
 
 /*
  * Default base address for MSI messages on x86 platforms.
  */
 #define	MSI_INTEL_ADDR_BASE		0xfee00000
 
 /*
  * - 1 ??? dummy counter.
  * - 2 counters for each I/O interrupt.
  * - 1 counter for each CPU for lapic timer.
  * - 9 counters for each CPU for IPI counters for SMP.
  */
 #ifdef SMP
 #define	INTRCNT_COUNT	(1 + NUM_IO_INTS * 2 + (1 + 9) * MAXCPU)
 #else
 #define	INTRCNT_COUNT	(1 + NUM_IO_INTS * 2 + 1)
 #endif
 
 #ifndef LOCORE
 
 typedef void inthand_t(u_int cs, u_int ef, u_int esp, u_int ss);
 
 #define	IDTVEC(name)	__CONCAT(X,name)
 
 struct intsrc;
 
 /*
  * Methods that a PIC provides to mask/unmask a given interrupt source,
  * "turn on" the interrupt on the CPU side by setting up an IDT entry, and
  * return the vector associated with this source.
  */
 struct pic {
 	void (*pic_enable_source)(struct intsrc *);
 	void (*pic_disable_source)(struct intsrc *, int);
 	void (*pic_eoi_source)(struct intsrc *);
 	void (*pic_enable_intr)(struct intsrc *);
 	void (*pic_disable_intr)(struct intsrc *);
 	int (*pic_vector)(struct intsrc *);
 	int (*pic_source_pending)(struct intsrc *);
 	void (*pic_suspend)(struct pic *);
 	void (*pic_resume)(struct pic *);
 	int (*pic_config_intr)(struct intsrc *, enum intr_trigger,
 	    enum intr_polarity);
 	int (*pic_assign_cpu)(struct intsrc *, u_int apic_id);
 	TAILQ_ENTRY(pic) pics;
 };
 
 /* Flags for pic_disable_source() */
 enum {
 	PIC_EOI,
 	PIC_NO_EOI,
 };
 
 /*
  * An interrupt source.  The upper-layer code uses the PIC methods to
  * control a given source.  The lower-layer PIC drivers can store additional
  * private data in a given interrupt source such as an interrupt pin number
  * or an I/O APIC pointer.
  */
 struct intsrc {
 	struct pic *is_pic;
 	struct intr_event *is_event;
 	u_long *is_count;
 	u_long *is_straycount;
 	u_int is_index;
 	u_int is_handlers;
 };
 
 struct trapframe;
 
 extern struct mtx icu_lock;
 extern int elcr_found;
 
 #ifndef DEV_ATPIC
 void	atpic_reset(void);
 #endif
 /* XXX: The elcr_* prototypes probably belong somewhere else. */
 int	elcr_probe(void);
 enum intr_trigger elcr_read_trigger(u_int irq);
 void	elcr_resume(void);
 void	elcr_write_trigger(u_int irq, enum intr_trigger trigger);
 #ifdef SMP
 void	intr_add_cpu(u_int cpu);
 #endif
 int	intr_add_handler(const char *name, int vector, driver_filter_t filter,
     driver_intr_t handler, void *arg, enum intr_type flags, void **cookiep);
 #ifdef SMP
 int	intr_bind(u_int vector, u_char cpu);
 #endif
 int	intr_config_intr(int vector, enum intr_trigger trig,
     enum intr_polarity pol);
 int	intr_describe(u_int vector, void *ih, const char *descr);
 void	intr_execute_handlers(struct intsrc *isrc, struct trapframe *frame);
 u_int	intr_next_cpu(void);
 struct intsrc *intr_lookup_source(int vector);
 int	intr_register_pic(struct pic *pic);
 int	intr_register_source(struct intsrc *isrc);
 int	intr_remove_handler(void *cookie);
 void	intr_resume(void);
 void	intr_suspend(void);
 void	intrcnt_add(const char *name, u_long **countp);
 void	nexus_add_irq(u_long irq);
 int	msi_alloc(device_t dev, int count, int maxcount, int *irqs);
 void	msi_init(void);
 int	msi_map(int irq, uint64_t *addr, uint32_t *data);
 int	msi_release(int* irqs, int count);
 int	msix_alloc(device_t dev, int *irq);
 int	msix_release(int irq);
 
 #endif	/* !LOCORE */
 #endif	/* _KERNEL */
 #endif	/* !__MACHINE_INTR_MACHDEP_H__ */
diff --git a/sys/i386/include/pcpu.h b/sys/i386/include/pcpu.h
index 3606d12dd1be..3a684bfde245 100644
--- a/sys/i386/include/pcpu.h
+++ b/sys/i386/include/pcpu.h
@@ -1,281 +1,269 @@
 /*-
  * Copyright (c) Peter Wemm
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef _MACHINE_PCPU_H_
 #define	_MACHINE_PCPU_H_
 
 #ifndef _SYS_CDEFS_H_
 #error "sys/cdefs.h is a prerequisite for this file"
 #endif
 
 #include <machine/segments.h>
 #include <machine/tss.h>
 
 /*
  * The SMP parts are setup in pmap.c and locore.s for the BSP, and
  * mp_machdep.c sets up the data for the AP's to "see" when they awake.
  * The reason for doing it via a struct is so that an array of pointers
  * to each CPU's data can be set up for things like "check curproc on all
  * other processors"
  */
 
 #if defined(XEN) || defined(XENHVM)
 #ifndef NR_VIRQS
 #define	NR_VIRQS	24
 #endif
 #ifndef NR_IPIS
 #define	NR_IPIS		2
 #endif
 #endif
 
 #if defined(XEN)
 
 /* These are peridically updated in shared_info, and then copied here. */
 struct shadow_time_info {
 	uint64_t tsc_timestamp;     /* TSC at last update of time vals.  */
 	uint64_t system_timestamp;  /* Time, in nanosecs, since boot.    */
 	uint32_t tsc_to_nsec_mul;
 	uint32_t tsc_to_usec_mul;
 	int tsc_shift;
 	uint32_t version;
 };
 
 #define	PCPU_XEN_FIELDS							\
 	;								\
 	u_int	pc_cr3;		/* track cr3 for R1/R3*/		\
 	vm_paddr_t *pc_pdir_shadow;					\
 	uint64_t pc_processed_system_time;				\
 	struct shadow_time_info pc_shadow_time;				\
-	int	pc_resched_irq;						\
-	int	pc_callfunc_irq;					\
-	int	pc_virq_to_irq[NR_VIRQS];				\
-	int	pc_ipi_to_irq[NR_IPIS];					\
-	char	__pad[77]
+	char	__pad[189]
 
-#elif defined(XENHVM)
-
-#define	PCPU_XEN_FIELDS							\
-	;								\
-	unsigned int pc_last_processed_l1i;				\
-	unsigned int pc_last_processed_l2i;				\
-	char	__pad[229]
-
-#else /* !XEN && !XENHVM */
+#else /* !XEN */
 
 #define PCPU_XEN_FIELDS							\
 	;								\
 	char	__pad[237]
 
 #endif
 
 #define	PCPU_MD_FIELDS							\
 	char	pc_monitorbuf[128] __aligned(128); /* cache line */	\
 	struct	pcpu *pc_prvspace;	/* Self-reference */		\
 	struct	pmap *pc_curpmap;					\
 	struct	i386tss pc_common_tss;					\
 	struct	segment_descriptor pc_common_tssd;			\
 	struct	segment_descriptor *pc_tss_gdt;				\
 	struct	segment_descriptor *pc_fsgs_gdt;			\
 	int	pc_currentldt;						\
 	u_int   pc_acpi_id;		/* ACPI CPU id */		\
 	u_int	pc_apic_id;						\
 	int	pc_private_tss;		/* Flag indicating private tss*/\
 	u_int	pc_cmci_mask		/* MCx banks for CMCI */	\
 	PCPU_XEN_FIELDS
 
 #ifdef _KERNEL
 
 #ifdef lint
 
 extern struct pcpu *pcpup;
 
 #define	PCPU_GET(member)	(pcpup->pc_ ## member)
 #define	PCPU_ADD(member, val)	(pcpup->pc_ ## member += (val))
 #define	PCPU_INC(member)	PCPU_ADD(member, 1)
 #define	PCPU_PTR(member)	(&pcpup->pc_ ## member)
 #define	PCPU_SET(member, val)	(pcpup->pc_ ## member = (val))
 
 #elif defined(__GNUCLIKE_ASM) && defined(__GNUCLIKE___TYPEOF)
 
 /*
  * Evaluates to the byte offset of the per-cpu variable name.
  */
 #define	__pcpu_offset(name)						\
 	__offsetof(struct pcpu, name)
 
 /*
  * Evaluates to the type of the per-cpu variable name.
  */
 #define	__pcpu_type(name)						\
 	__typeof(((struct pcpu *)0)->name)
 
 /*
  * Evaluates to the address of the per-cpu variable name.
  */
 #define	__PCPU_PTR(name) __extension__ ({				\
 	__pcpu_type(name) *__p;						\
 									\
 	__asm __volatile("movl %%fs:%1,%0; addl %2,%0"			\
 	    : "=r" (__p)						\
 	    : "m" (*(struct pcpu *)(__pcpu_offset(pc_prvspace))),	\
 	      "i" (__pcpu_offset(name)));				\
 									\
 	__p;								\
 })
 
 /*
  * Evaluates to the value of the per-cpu variable name.
  */
 #define	__PCPU_GET(name) __extension__ ({				\
 	__pcpu_type(name) __res;					\
 	struct __s {							\
 		u_char	__b[MIN(sizeof(__res), 4)];			\
 	} __s;								\
 									\
 	if (sizeof(__res) == 1 || sizeof(__res) == 2 ||			\
 	    sizeof(__res) == 4) {					\
 		__asm __volatile("mov %%fs:%1,%0"			\
 		    : "=r" (__s)					\
 		    : "m" (*(struct __s *)(__pcpu_offset(name))));	\
 		*(struct __s *)(void *)&__res = __s;			\
 	} else {							\
 		__res = *__PCPU_PTR(name);				\
 	}								\
 	__res;								\
 })
 
 /*
  * Adds a value of the per-cpu counter name.  The implementation
  * must be atomic with respect to interrupts.
  */
 #define	__PCPU_ADD(name, val) do {					\
 	__pcpu_type(name) __val;					\
 	struct __s {							\
 		u_char	__b[MIN(sizeof(__val), 4)];			\
 	} __s;								\
 									\
 	__val = (val);							\
 	if (sizeof(__val) == 1 || sizeof(__val) == 2 ||			\
 	    sizeof(__val) == 4) {					\
 		__s = *(struct __s *)(void *)&__val;			\
 		__asm __volatile("add %1,%%fs:%0"			\
 		    : "=m" (*(struct __s *)(__pcpu_offset(name)))	\
 		    : "r" (__s));					\
 	} else								\
 		*__PCPU_PTR(name) += __val;				\
 } while (0)
 
 /*
  * Increments the value of the per-cpu counter name.  The implementation
  * must be atomic with respect to interrupts.
  */
 #define	__PCPU_INC(name) do {						\
 	CTASSERT(sizeof(__pcpu_type(name)) == 1 ||			\
 	    sizeof(__pcpu_type(name)) == 2 ||				\
 	    sizeof(__pcpu_type(name)) == 4);				\
 	if (sizeof(__pcpu_type(name)) == 1) {				\
 		__asm __volatile("incb %%fs:%0"				\
 		    : "=m" (*(__pcpu_type(name) *)(__pcpu_offset(name)))\
 		    : "m" (*(__pcpu_type(name) *)(__pcpu_offset(name))));\
 	} else if (sizeof(__pcpu_type(name)) == 2) {			\
 		__asm __volatile("incw %%fs:%0"				\
 		    : "=m" (*(__pcpu_type(name) *)(__pcpu_offset(name)))\
 		    : "m" (*(__pcpu_type(name) *)(__pcpu_offset(name))));\
 	} else if (sizeof(__pcpu_type(name)) == 4) {			\
 		__asm __volatile("incl %%fs:%0"				\
 		    : "=m" (*(__pcpu_type(name) *)(__pcpu_offset(name)))\
 		    : "m" (*(__pcpu_type(name) *)(__pcpu_offset(name))));\
 	}								\
 } while (0)
 
 /*
  * Sets the value of the per-cpu variable name to value val.
  */
 #define	__PCPU_SET(name, val) do {					\
 	__pcpu_type(name) __val;					\
 	struct __s {							\
 		u_char	__b[MIN(sizeof(__val), 4)];			\
 	} __s;								\
 									\
 	__val = (val);							\
 	if (sizeof(__val) == 1 || sizeof(__val) == 2 ||			\
 	    sizeof(__val) == 4) {					\
 		__s = *(struct __s *)(void *)&__val;			\
 		__asm __volatile("mov %1,%%fs:%0"			\
 		    : "=m" (*(struct __s *)(__pcpu_offset(name)))	\
 		    : "r" (__s));					\
 	} else {							\
 		*__PCPU_PTR(name) = __val;				\
 	}								\
 } while (0)
 
 #define	PCPU_GET(member)	__PCPU_GET(pc_ ## member)
 #define	PCPU_ADD(member, val)	__PCPU_ADD(pc_ ## member, val)
 #define	PCPU_INC(member)	__PCPU_INC(pc_ ## member)
 #define	PCPU_PTR(member)	__PCPU_PTR(pc_ ## member)
 #define	PCPU_SET(member, val)	__PCPU_SET(pc_ ## member, val)
 
 #define	OFFSETOF_CURTHREAD	0
 #ifdef __clang__
 #pragma clang diagnostic push
 #pragma clang diagnostic ignored "-Wnull-dereference"
 #endif
 static __inline __pure2 struct thread *
 __curthread(void)
 {
 	struct thread *td;
 
 	__asm("movl %%fs:%1,%0" : "=r" (td)
 	    : "m" (*(char *)OFFSETOF_CURTHREAD));
 	return (td);
 }
 #ifdef __clang__
 #pragma clang diagnostic pop
 #endif
 #define	curthread		(__curthread())
 
 #define	OFFSETOF_CURPCB		16
 static __inline __pure2 struct pcb *
 __curpcb(void)
 {
 	struct pcb *pcb;
 
 	__asm("movl %%fs:%1,%0" : "=r" (pcb) : "m" (*(char *)OFFSETOF_CURPCB));
 	return (pcb);
 }
 #define	curpcb		(__curpcb())
 
 #else /* !lint || defined(__GNUCLIKE_ASM) && defined(__GNUCLIKE___TYPEOF) */
 
 #error "this file needs to be ported to your compiler"
 
 #endif /* lint, etc. */
 
 #endif /* _KERNEL */
 
 #endif /* !_MACHINE_PCPU_H_ */
diff --git a/sys/i386/include/pmap.h b/sys/i386/include/pmap.h
index 94b63ca14bb2..0303c604b52b 100644
--- a/sys/i386/include/pmap.h
+++ b/sys/i386/include/pmap.h
@@ -1,465 +1,467 @@
 /*-
  * Copyright (c) 1991 Regents of the University of California.
  * All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * the Systems Programming Group of the University of Utah Computer
  * Science Department and William Jolitz of UUNET Technologies Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * Derived from hp300 version by Mike Hibler, this version by William
  * Jolitz uses a recursive map [a pde points to the page directory] to
  * map the page tables using the pagetables themselves. This is done to
  * reduce the impact on kernel virtual memory for lots of sparse address
  * space, and to reduce the cost of memory to each process.
  *
  *	from: hp300: @(#)pmap.h	7.2 (Berkeley) 12/16/90
  *	from: @(#)pmap.h	7.4 (Berkeley) 5/12/91
  * $FreeBSD$
  */
 
 #ifndef _MACHINE_PMAP_H_
 #define	_MACHINE_PMAP_H_
 
 /*
  * Page-directory and page-table entries follow this format, with a few
  * of the fields not present here and there, depending on a lot of things.
  */
 				/* ---- Intel Nomenclature ---- */
 #define	PG_V		0x001	/* P	Valid			*/
 #define PG_RW		0x002	/* R/W	Read/Write		*/
 #define PG_U		0x004	/* U/S  User/Supervisor		*/
 #define	PG_NC_PWT	0x008	/* PWT	Write through		*/
 #define	PG_NC_PCD	0x010	/* PCD	Cache disable		*/
 #define PG_A		0x020	/* A	Accessed		*/
 #define	PG_M		0x040	/* D	Dirty			*/
 #define	PG_PS		0x080	/* PS	Page size (0=4k,1=4M)	*/
 #define	PG_PTE_PAT	0x080	/* PAT	PAT index		*/
 #define	PG_G		0x100	/* G	Global			*/
 #define	PG_AVAIL1	0x200	/*    /	Available for system	*/
 #define	PG_AVAIL2	0x400	/*   <	programmers use		*/
 #define	PG_AVAIL3	0x800	/*    \				*/
 #define	PG_PDE_PAT	0x1000	/* PAT	PAT index		*/
 #ifdef PAE
 #define	PG_NX		(1ull<<63) /* No-execute */
 #endif
 
 
 /* Our various interpretations of the above */
 #define PG_W		PG_AVAIL1	/* "Wired" pseudoflag */
 #define	PG_MANAGED	PG_AVAIL2
 #ifdef PAE
 #define	PG_FRAME	(0x000ffffffffff000ull)
 #define	PG_PS_FRAME	(0x000fffffffe00000ull)
 #else
 #define	PG_FRAME	(~PAGE_MASK)
 #define	PG_PS_FRAME	(0xffc00000)
 #endif
 #define	PG_PROT		(PG_RW|PG_U)	/* all protection bits . */
 #define PG_N		(PG_NC_PWT|PG_NC_PCD)	/* Non-cacheable */
 
 /* Page level cache control fields used to determine the PAT type */
 #define PG_PDE_CACHE	(PG_PDE_PAT | PG_NC_PWT | PG_NC_PCD)
 #define PG_PTE_CACHE	(PG_PTE_PAT | PG_NC_PWT | PG_NC_PCD)
 
 /*
  * Promotion to a 2 or 4MB (PDE) page mapping requires that the corresponding
  * 4KB (PTE) page mappings have identical settings for the following fields:
  */
 #define PG_PTE_PROMOTE	(PG_MANAGED | PG_W | PG_G | PG_PTE_PAT | \
 	    PG_M | PG_A | PG_NC_PCD | PG_NC_PWT | PG_U | PG_RW | PG_V)
 
 /*
  * Page Protection Exception bits
  */
 
 #define PGEX_P		0x01	/* Protection violation vs. not present */
 #define PGEX_W		0x02	/* during a Write cycle */
 #define PGEX_U		0x04	/* access from User mode (UPL) */
 #define PGEX_RSV	0x08	/* reserved PTE field is non-zero */
 #define PGEX_I		0x10	/* during an instruction fetch */
 
 /*
  * Size of Kernel address space.  This is the number of page table pages
  * (4MB each) to use for the kernel.  256 pages == 1 Gigabyte.
  * This **MUST** be a multiple of 4 (eg: 252, 256, 260, etc).
  * For PAE, the page table page unit size is 2MB.  This means that 512 pages
  * is 1 Gigabyte.  Double everything.  It must be a multiple of 8 for PAE.
  */
 #ifndef KVA_PAGES
 #ifdef PAE
 #define KVA_PAGES	512
 #else
 #define KVA_PAGES	256
 #endif
 #endif
 
 /*
  * Pte related macros
  */
 #define VADDR(pdi, pti) ((vm_offset_t)(((pdi)<<PDRSHIFT)|((pti)<<PAGE_SHIFT)))
 
 /* Initial number of kernel page tables. */
 #ifndef NKPT
 #ifdef PAE
 /* 152 page tables needed to map 16G (76B "struct vm_page", 2M page tables). */
 #define	NKPT		240
 #else
 /* 18 page tables needed to map 4G (72B "struct vm_page", 4M page tables). */
 #define	NKPT		30
 #endif
 #endif
 
 #ifndef NKPDE
 #define NKPDE	(KVA_PAGES)	/* number of page tables/pde's */
 #endif
 
 /*
  * The *PTDI values control the layout of virtual memory
  *
  * XXX This works for now, but I am not real happy with it, I'll fix it
  * right after I fix locore.s and the magic 28K hole
  */
 #define	KPTDI		(NPDEPTD-NKPDE)	/* start of kernel virtual pde's */
 #define	PTDPTDI		(KPTDI-NPGPTD)	/* ptd entry that points to ptd! */
 
 /*
  * XXX doesn't really belong here I guess...
  */
 #define ISA_HOLE_START    0xa0000
 #define ISA_HOLE_LENGTH (0x100000-ISA_HOLE_START)
 
 #ifndef LOCORE
 
 #include <sys/queue.h>
 #include <sys/_cpuset.h>
 #include <sys/_lock.h>
 #include <sys/_mutex.h>
 
 #include <vm/_vm_radix.h>
 
 #ifdef PAE
 
 typedef uint64_t pdpt_entry_t;
 typedef uint64_t pd_entry_t;
 typedef uint64_t pt_entry_t;
 
 #define	PTESHIFT	(3)
 #define	PDESHIFT	(3)
 
 #else
 
 typedef uint32_t pd_entry_t;
 typedef uint32_t pt_entry_t;
 
 #define	PTESHIFT	(2)
 #define	PDESHIFT	(2)
 
 #endif
 
 /*
  * Address of current address space page table maps and directories.
  */
 #ifdef _KERNEL
 extern pt_entry_t PTmap[];
 extern pd_entry_t PTD[];
 extern pd_entry_t PTDpde[];
 
 #ifdef PAE
 extern pdpt_entry_t *IdlePDPT;
 #endif
 extern pd_entry_t *IdlePTD;	/* physical address of "Idle" state directory */
 
 /*
  * Translate a virtual address to the kernel virtual address of its page table
  * entry (PTE).  This can be used recursively.  If the address of a PTE as
  * previously returned by this macro is itself given as the argument, then the
  * address of the page directory entry (PDE) that maps the PTE will be
  * returned.
  *
  * This macro may be used before pmap_bootstrap() is called.
  */
 #define	vtopte(va)	(PTmap + i386_btop(va))
 
 /*
  * Translate a virtual address to its physical address.
  *
  * This macro may be used before pmap_bootstrap() is called.
  */
 #define	vtophys(va)	pmap_kextract((vm_offset_t)(va))
 
 #if defined(XEN)
 #include <sys/param.h>
-#include <machine/xen/xen-os.h>
+
+#include <xen/xen-os.h>
+
 #include <machine/xen/xenvar.h>
 #include <machine/xen/xenpmap.h>
 
 extern pt_entry_t pg_nx;
 
 #define PG_KERNEL  (PG_V | PG_A | PG_RW | PG_M)
 
 #define MACH_TO_VM_PAGE(ma) PHYS_TO_VM_PAGE(xpmap_mtop((ma)))
 #define VM_PAGE_TO_MACH(m) xpmap_ptom(VM_PAGE_TO_PHYS((m)))
 
 #define VTOM(va) xpmap_ptom(VTOP(va))
 
 static __inline vm_paddr_t
 pmap_kextract_ma(vm_offset_t va)
 {
         vm_paddr_t ma;
         if ((ma = PTD[va >> PDRSHIFT]) & PG_PS) {
                 ma = (ma & ~(NBPDR - 1)) | (va & (NBPDR - 1));
         } else {
                 ma = (*vtopte(va) & PG_FRAME) | (va & PAGE_MASK);
         }
         return ma;
 }
 
 static __inline vm_paddr_t
 pmap_kextract(vm_offset_t va)
 {
         return xpmap_mtop(pmap_kextract_ma(va));
 }
 #define vtomach(va)     pmap_kextract_ma(((vm_offset_t) (va)))
 
 vm_paddr_t pmap_extract_ma(struct pmap *pmap, vm_offset_t va);
 
 void    pmap_kenter_ma(vm_offset_t va, vm_paddr_t pa);
 void    pmap_map_readonly(struct pmap *pmap, vm_offset_t va, int len);
 void    pmap_map_readwrite(struct pmap *pmap, vm_offset_t va, int len);
 
 static __inline pt_entry_t
 pte_load_store(pt_entry_t *ptep, pt_entry_t v)
 {
 	pt_entry_t r;
 
 	r = *ptep;
 	PT_SET_VA(ptep, v, TRUE);
 	return (r);
 }
 
 static __inline pt_entry_t
 pte_load_store_ma(pt_entry_t *ptep, pt_entry_t v)
 {
 	pt_entry_t r;
 
 	r = *ptep;
 	PT_SET_VA_MA(ptep, v, TRUE);
 	return (r);
 }
 
 #define	pte_load_clear(ptep)	pte_load_store((ptep), (pt_entry_t)0ULL)
 
 #define	pte_store(ptep, pte)	pte_load_store((ptep), (pt_entry_t)pte)
 #define	pte_store_ma(ptep, pte)	pte_load_store_ma((ptep), (pt_entry_t)pte)
 #define	pde_store_ma(ptep, pte)	pte_load_store_ma((ptep), (pt_entry_t)pte)
 
 #elif !defined(XEN)
 
 /*
  * KPTmap is a linear mapping of the kernel page table.  It differs from the
  * recursive mapping in two ways: (1) it only provides access to kernel page
  * table pages, and not user page table pages, and (2) it provides access to
  * a kernel page table page after the corresponding virtual addresses have
  * been promoted to a 2/4MB page mapping.
  *
  * KPTmap is first initialized by locore to support just NPKT page table
  * pages.  Later, it is reinitialized by pmap_bootstrap() to allow for
  * expansion of the kernel page table.
  */
 extern pt_entry_t *KPTmap;
 
 /*
  * Extract from the kernel page table the physical address that is mapped by
  * the given virtual address "va".
  *
  * This function may be used before pmap_bootstrap() is called.
  */
 static __inline vm_paddr_t
 pmap_kextract(vm_offset_t va)
 {
 	vm_paddr_t pa;
 
 	if ((pa = PTD[va >> PDRSHIFT]) & PG_PS) {
 		pa = (pa & PG_PS_FRAME) | (va & PDRMASK);
 	} else {
 		/*
 		 * Beware of a concurrent promotion that changes the PDE at
 		 * this point!  For example, vtopte() must not be used to
 		 * access the PTE because it would use the new PDE.  It is,
 		 * however, safe to use the old PDE because the page table
 		 * page is preserved by the promotion.
 		 */
 		pa = KPTmap[i386_btop(va)];
 		pa = (pa & PG_FRAME) | (va & PAGE_MASK);
 	}
 	return (pa);
 }
 #endif
 
 #if !defined(XEN)
 #define PT_UPDATES_FLUSH()
 #endif
 
 #if defined(PAE) && !defined(XEN)
 
 #define	pde_cmpset(pdep, old, new)	atomic_cmpset_64_i586(pdep, old, new)
 #define	pte_load_store(ptep, pte)	atomic_swap_64_i586(ptep, pte)
 #define	pte_load_clear(ptep)		atomic_swap_64_i586(ptep, 0)
 #define	pte_store(ptep, pte)		atomic_store_rel_64_i586(ptep, pte)
 
 extern pt_entry_t pg_nx;
 
 #elif !defined(PAE) && !defined(XEN)
 
 #define	pde_cmpset(pdep, old, new)	atomic_cmpset_int(pdep, old, new)
 #define	pte_load_store(ptep, pte)	atomic_swap_int(ptep, pte)
 #define	pte_load_clear(ptep)		atomic_swap_int(ptep, 0)
 #define	pte_store(ptep, pte) do { \
 	*(u_int *)(ptep) = (u_int)(pte); \
 } while (0)
 
 #endif /* PAE */
 
 #define	pte_clear(ptep)			pte_store(ptep, 0)
 
 #define	pde_store(pdep, pde)		pte_store(pdep, pde)
 
 #endif /* _KERNEL */
 
 /*
  * Pmap stuff
  */
 struct	pv_entry;
 struct	pv_chunk;
 
 struct md_page {
 	TAILQ_HEAD(,pv_entry)	pv_list;
 	int			pat_mode;
 };
 
 struct pmap {
 	struct mtx		pm_mtx;
 	pd_entry_t		*pm_pdir;	/* KVA of page directory */
 	TAILQ_HEAD(,pv_chunk)	pm_pvchunk;	/* list of mappings in pmap */
 	cpuset_t		pm_active;	/* active on cpus */
 	struct pmap_statistics	pm_stats;	/* pmap statistics */
 	LIST_ENTRY(pmap) 	pm_list;	/* List of all pmaps */
 #ifdef PAE
 	pdpt_entry_t		*pm_pdpt;	/* KVA of page director pointer
 						   table */
 #endif
 	struct vm_radix		pm_root;	/* spare page table pages */
 };
 
 typedef struct pmap	*pmap_t;
 
 #ifdef _KERNEL
 extern struct pmap	kernel_pmap_store;
 #define kernel_pmap	(&kernel_pmap_store)
 
 #define	PMAP_LOCK(pmap)		mtx_lock(&(pmap)->pm_mtx)
 #define	PMAP_LOCK_ASSERT(pmap, type) \
 				mtx_assert(&(pmap)->pm_mtx, (type))
 #define	PMAP_LOCK_DESTROY(pmap)	mtx_destroy(&(pmap)->pm_mtx)
 #define	PMAP_LOCK_INIT(pmap)	mtx_init(&(pmap)->pm_mtx, "pmap", \
 				    NULL, MTX_DEF | MTX_DUPOK)
 #define	PMAP_LOCKED(pmap)	mtx_owned(&(pmap)->pm_mtx)
 #define	PMAP_MTX(pmap)		(&(pmap)->pm_mtx)
 #define	PMAP_TRYLOCK(pmap)	mtx_trylock(&(pmap)->pm_mtx)
 #define	PMAP_UNLOCK(pmap)	mtx_unlock(&(pmap)->pm_mtx)
 #endif
 
 /*
  * For each vm_page_t, there is a list of all currently valid virtual
  * mappings of that page.  An entry is a pv_entry_t, the list is pv_list.
  */
 typedef struct pv_entry {
 	vm_offset_t	pv_va;		/* virtual address for mapping */
 	TAILQ_ENTRY(pv_entry)	pv_next;
 } *pv_entry_t;
 
 /*
  * pv_entries are allocated in chunks per-process.  This avoids the
  * need to track per-pmap assignments.
  */
 #define	_NPCM	11
 #define	_NPCPV	336
 struct pv_chunk {
 	pmap_t			pc_pmap;
 	TAILQ_ENTRY(pv_chunk)	pc_list;
 	uint32_t		pc_map[_NPCM];	/* bitmap; 1 = free */
 	TAILQ_ENTRY(pv_chunk)	pc_lru;
 	struct pv_entry		pc_pventry[_NPCPV];
 };
 
 #ifdef	_KERNEL
 
 extern caddr_t	CADDR1;
 extern pt_entry_t *CMAP1;
 extern vm_paddr_t phys_avail[];
 extern vm_paddr_t dump_avail[];
 extern int pseflag;
 extern int pgeflag;
 extern char *ptvmmap;		/* poor name! */
 extern vm_offset_t virtual_avail;
 extern vm_offset_t virtual_end;
 
 #define	pmap_page_get_memattr(m)	((vm_memattr_t)(m)->md.pat_mode)
 #define	pmap_page_is_write_mapped(m)	(((m)->aflags & PGA_WRITEABLE) != 0)
 #define	pmap_unmapbios(va, sz)	pmap_unmapdev((va), (sz))
 
 /*
  * Only the following functions or macros may be used before pmap_bootstrap()
  * is called: pmap_kenter(), pmap_kextract(), pmap_kremove(), vtophys(), and
  * vtopte().
  */
 void	pmap_bootstrap(vm_paddr_t);
 int	pmap_cache_bits(int mode, boolean_t is_pde);
 int	pmap_change_attr(vm_offset_t, vm_size_t, int);
 void	pmap_init_pat(void);
 void	pmap_kenter(vm_offset_t va, vm_paddr_t pa);
 void	*pmap_kenter_temporary(vm_paddr_t pa, int i);
 void	pmap_kremove(vm_offset_t);
 void	*pmap_mapbios(vm_paddr_t, vm_size_t);
 void	*pmap_mapdev(vm_paddr_t, vm_size_t);
 void	*pmap_mapdev_attr(vm_paddr_t, vm_size_t, int);
 boolean_t pmap_page_is_mapped(vm_page_t m);
 void	pmap_page_set_memattr(vm_page_t m, vm_memattr_t ma);
 void	pmap_unmapdev(vm_offset_t, vm_size_t);
 pt_entry_t *pmap_pte(pmap_t, vm_offset_t) __pure2;
 void	pmap_invalidate_page(pmap_t, vm_offset_t);
 void	pmap_invalidate_range(pmap_t, vm_offset_t, vm_offset_t);
 void	pmap_invalidate_all(pmap_t);
 void	pmap_invalidate_cache(void);
 void	pmap_invalidate_cache_pages(vm_page_t *pages, int count);
 void	pmap_invalidate_cache_range(vm_offset_t sva, vm_offset_t eva);
 
 #endif /* _KERNEL */
 
 #endif /* !LOCORE */
 
 #endif /* !_MACHINE_PMAP_H_ */
diff --git a/sys/i386/include/xen/xen-os.h b/sys/i386/include/xen/xen-os.h
index 257202ec9f94..e15d66870062 100644
--- a/sys/i386/include/xen/xen-os.h
+++ b/sys/i386/include/xen/xen-os.h
@@ -1,367 +1,291 @@
-/******************************************************************************
- * os.h
+/*****************************************************************************
+ * i386/xen/xen-os.h
  * 
- * random collection of macros and definition
+ * Random collection of macros and definition
+ *
+ * Copyright (c) 2003, 2004 Keir Fraser (on behalf of the Xen team)
+ * All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ * 
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * $FreeBSD$
  */
 
-#ifndef _XEN_OS_H_
-#define _XEN_OS_H_
-#include <machine/param.h>
+#ifndef _MACHINE_XEN_XEN_OS_H_
+#define _MACHINE_XEN_XEN_OS_H_
 
 #ifdef PAE
 #define CONFIG_X86_PAE
 #endif
 
-#ifdef LOCORE
-#define __ASSEMBLY__
-#endif
-
-#if !defined(__XEN_INTERFACE_VERSION__) 
-#define  __XEN_INTERFACE_VERSION__ 0x00030208
-#endif 
-
-#define GRANT_REF_INVALID   0xffffffff
-
-#include <xen/interface/xen.h>
-
 /* Everything below this point is not included by assembler (.S) files. */
 #ifndef __ASSEMBLY__
 
 /* Force a proper event-channel callback from Xen. */
 void force_evtchn_callback(void);
 
-#define likely(x)  __builtin_expect((x),1)
-#define unlikely(x)  __builtin_expect((x),0)
+/* REP NOP (PAUSE) is a good thing to insert into busy-wait loops. */
+static inline void rep_nop(void)
+{
+    __asm__ __volatile__ ( "rep;nop" : : : "memory" );
+}
+#define cpu_relax() rep_nop()
 
-#ifndef vtophys
-#include <vm/vm.h>
-#include <vm/vm_param.h>
-#include <vm/pmap.h>
-#endif
+#ifndef XENHVM
+void xc_printf(const char *fmt, ...);
 
-extern int gdtset;
 #ifdef SMP
+extern int gdtset;
+
 #include <sys/time.h> /* XXX for pcpu.h */
 #include <sys/pcpu.h> /* XXX for PCPU_GET */
 static inline int 
 smp_processor_id(void)  
 {
-    if (likely(gdtset))
+    if (__predict_true(gdtset))
 	return PCPU_GET(cpuid);
     return 0;
 }
 
 #else
 #define smp_processor_id() 0
 #endif
 
-#ifndef NULL
-#define NULL (void *)0
-#endif
-
 #ifndef PANIC_IF
-#define PANIC_IF(exp) if (unlikely(exp)) {printk("panic - %s: %s:%d\n",#exp, __FILE__, __LINE__); panic("%s: %s:%d", #exp, __FILE__, __LINE__);} 
-#endif
-
-extern shared_info_t *HYPERVISOR_shared_info;
-
-/* Somewhere in the middle of the GCC 2.96 development cycle, we implemented
-   a mechanism by which the user can annotate likely branch directions and
-   expect the blocks to be reordered appropriately.  Define __builtin_expect
-   to nothing for earlier compilers.  */
-
-/* REP NOP (PAUSE) is a good thing to insert into busy-wait loops. */
-static inline void rep_nop(void)
-{
-    __asm__ __volatile__ ( "rep;nop" : : : "memory" );
-}
-#define cpu_relax() rep_nop()
-
-
-#if __GNUC__ == 2 && __GNUC_MINOR__ < 96
-#define __builtin_expect(x, expected_value) (x)
+#define PANIC_IF(exp) if (__predict_false(exp)) {printf("panic - %s: %s:%d\n",#exp, __FILE__, __LINE__); panic("%s: %s:%d", #exp, __FILE__, __LINE__);} 
 #endif
 
-#define per_cpu(var, cpu)           (pcpu_find((cpu))->pc_ ## var)
-
-/* crude memory allocator for memory allocation early in 
- *  boot
+/*
+ * Crude memory allocator for memory allocation early in boot.
  */
 void *bootmem_alloc(unsigned int size);
 void bootmem_free(void *ptr, unsigned int size);
 
-#include <sys/types.h>
-
-void printk(const char *fmt, ...);
-
-/* some function prototypes */
-void trap_init(void);
-
-#ifndef XENHVM
-
 /*
  * STI/CLI equivalents. These basically set and clear the virtual
  * event_enable flag in the shared_info structure. Note that when
  * the enable bit is set, there may be pending events to be handled.
  * We may therefore call into do_hypervisor_callback() directly.
  */
 
-
 #define __cli()                                                         \
 do {                                                                    \
         vcpu_info_t *_vcpu;                                             \
         _vcpu = &HYPERVISOR_shared_info->vcpu_info[smp_processor_id()]; \
         _vcpu->evtchn_upcall_mask = 1;                                  \
         barrier();                                                      \
 } while (0)
 
 #define __sti()                                                         \
 do {                                                                    \
         vcpu_info_t *_vcpu;                                             \
         barrier();                                                      \
         _vcpu = &HYPERVISOR_shared_info->vcpu_info[smp_processor_id()]; \
         _vcpu->evtchn_upcall_mask = 0;                                  \
         barrier(); /* unmask then check (avoid races) */                \
-        if ( unlikely(_vcpu->evtchn_upcall_pending) )                   \
+        if (__predict_false(_vcpu->evtchn_upcall_pending))              \
                 force_evtchn_callback();                                \
 } while (0)
 
 #define __restore_flags(x)                                              \
 do {                                                                    \
         vcpu_info_t *_vcpu;                                             \
         barrier();                                                      \
         _vcpu = &HYPERVISOR_shared_info->vcpu_info[smp_processor_id()]; \
         if ((_vcpu->evtchn_upcall_mask = (x)) == 0) {                   \
                 barrier(); /* unmask then check (avoid races) */        \
-                if ( unlikely(_vcpu->evtchn_upcall_pending) )           \
+                if (__predict_false(_vcpu->evtchn_upcall_pending))      \
                         force_evtchn_callback();                        \
         } 								\
 } while (0)
 
 /*
  * Add critical_{enter, exit}?
  *
  */
 #define __save_and_cli(x)                                               \
 do {                                                                    \
         vcpu_info_t *_vcpu;                                             \
         _vcpu = &HYPERVISOR_shared_info->vcpu_info[smp_processor_id()]; \
         (x) = _vcpu->evtchn_upcall_mask;                                \
         _vcpu->evtchn_upcall_mask = 1;                                  \
         barrier();                                                      \
 } while (0)
 
 
 #define cli() __cli()
 #define sti() __sti()
 #define save_flags(x) __save_flags(x)
 #define restore_flags(x) __restore_flags(x)
 #define save_and_cli(x) __save_and_cli(x)
 
 #define local_irq_save(x)       __save_and_cli(x)
 #define local_irq_restore(x)    __restore_flags(x)
 #define local_irq_disable()     __cli()
 #define local_irq_enable()      __sti()
 
 #define mtx_lock_irqsave(lock, x) {local_irq_save((x)); mtx_lock_spin((lock));}
 #define mtx_unlock_irqrestore(lock, x) {mtx_unlock_spin((lock)); local_irq_restore((x)); }
 #define spin_lock_irqsave mtx_lock_irqsave
 #define spin_unlock_irqrestore mtx_unlock_irqrestore
 
-#endif
-
-#ifndef xen_mb
-#define xen_mb() mb()
-#endif
-#ifndef xen_rmb
-#define xen_rmb() rmb()
-#endif
-#ifndef xen_wmb
-#define xen_wmb() wmb()
-#endif
-#ifdef SMP
-#define smp_mb() mb() 
-#define smp_rmb() rmb()
-#define smp_wmb() wmb()
-#define smp_read_barrier_depends()      read_barrier_depends()
-#define set_mb(var, value) do { xchg(&var, value); } while (0)
-#else
-#define smp_mb()        barrier()
-#define smp_rmb()       barrier()
-#define smp_wmb()       barrier()
-#define smp_read_barrier_depends()      do { } while(0)
-#define set_mb(var, value) do { var = value; barrier(); } while (0)
-#endif
-
+#endif /* !XENHVM */
 
 /* This is a barrier for the compiler only, NOT the processor! */
 #define barrier() __asm__ __volatile__("": : :"memory")
 
 #define LOCK_PREFIX ""
 #define LOCK ""
 #define ADDR (*(volatile long *) addr)
 /*
  * Make sure gcc doesn't try to be clever and move things around
  * on us. We need to use _exactly_ the address the user gave us,
  * not some alias that contains the same information.
  */
 typedef struct { volatile int counter; } atomic_t;
 
-
-
 #define xen_xchg(ptr,v) \
         ((__typeof__(*(ptr)))__xchg((unsigned long)(v),(ptr),sizeof(*(ptr))))
 struct __xchg_dummy { unsigned long a[100]; };
 #define __xg(x) ((volatile struct __xchg_dummy *)(x))
 static __inline unsigned long __xchg(unsigned long x, volatile void * ptr,
                                    int size)
 {
     switch (size) {
     case 1:
         __asm__ __volatile__("xchgb %b0,%1"
                              :"=q" (x)
                              :"m" (*__xg(ptr)), "0" (x)
                              :"memory");
         break;
     case 2:
         __asm__ __volatile__("xchgw %w0,%1"
                              :"=r" (x)
                              :"m" (*__xg(ptr)), "0" (x)
                              :"memory");
         break;
     case 4:
         __asm__ __volatile__("xchgl %0,%1"
                              :"=r" (x)
                              :"m" (*__xg(ptr)), "0" (x)
                              :"memory");
         break;
     }
     return x;
 }
 
 /**
  * test_and_clear_bit - Clear a bit and return its old value
  * @nr: Bit to set
  * @addr: Address to count from
  *
  * This operation is atomic and cannot be reordered.  
  * It also implies a memory barrier.
  */
 static __inline int test_and_clear_bit(int nr, volatile void * addr)
 {
         int oldbit;
 
         __asm__ __volatile__( LOCK_PREFIX
                 "btrl %2,%1\n\tsbbl %0,%0"
                 :"=r" (oldbit),"=m" (ADDR)
                 :"Ir" (nr) : "memory");
         return oldbit;
 }
 
 static __inline int constant_test_bit(int nr, const volatile void * addr)
 {
     return ((1UL << (nr & 31)) & (((const volatile unsigned int *) addr)[nr >> 5])) != 0;
 }
 
 static __inline int variable_test_bit(int nr, volatile void * addr)
 {
     int oldbit;
     
     __asm__ __volatile__(
         "btl %2,%1\n\tsbbl %0,%0"
         :"=r" (oldbit)
         :"m" (ADDR),"Ir" (nr));
     return oldbit;
 }
 
 #define test_bit(nr,addr) \
 (__builtin_constant_p(nr) ? \
  constant_test_bit((nr),(addr)) : \
  variable_test_bit((nr),(addr)))
 
 
 /**
  * set_bit - Atomically set a bit in memory
  * @nr: the bit to set
  * @addr: the address to start counting from
  *
  * This function is atomic and may not be reordered.  See __set_bit()
  * if you do not require the atomic guarantees.
  * Note that @nr may be almost arbitrarily large; this function is not
  * restricted to acting on a single-word quantity.
  */
 static __inline__ void set_bit(int nr, volatile void * addr)
 {
         __asm__ __volatile__( LOCK_PREFIX
                 "btsl %1,%0"
                 :"=m" (ADDR)
                 :"Ir" (nr));
 }
 
 /**
  * clear_bit - Clears a bit in memory
  * @nr: Bit to clear
  * @addr: Address to start counting from
  *
  * clear_bit() is atomic and may not be reordered.  However, it does
  * not contain a memory barrier, so if it is used for locking purposes,
  * you should call smp_mb__before_clear_bit() and/or smp_mb__after_clear_bit()
  * in order to ensure changes are visible on other processors.
  */
 static __inline__ void clear_bit(int nr, volatile void * addr)
 {
         __asm__ __volatile__( LOCK_PREFIX
                 "btrl %1,%0"
                 :"=m" (ADDR)
                 :"Ir" (nr));
 }
 
 /**
  * atomic_inc - increment atomic variable
  * @v: pointer of type atomic_t
  * 
  * Atomically increments @v by 1.  Note that the guaranteed
  * useful range of an atomic_t is only 24 bits.
  */ 
 static __inline__ void atomic_inc(atomic_t *v)
 {
         __asm__ __volatile__(
                 LOCK "incl %0"
                 :"=m" (v->counter)
                 :"m" (v->counter));
 }
 
 
 #define rdtscll(val) \
      __asm__ __volatile__("rdtsc" : "=A" (val))
 
-
-
-/*
- * Kernel pointers have redundant information, so we can use a
- * scheme where we can return either an error code or a dentry
- * pointer with the same return value.
- *
- * This should be a per-architecture thing, to allow different
- * error and pointer decisions.
- */
-#define IS_ERR_VALUE(x) unlikely((x) > (unsigned long)-1000L)
-
-static inline void *ERR_PTR(long error)
-{
-	return (void *) error;
-}
-
-static inline long PTR_ERR(const void *ptr)
-{
-	return (long) ptr;
-}
-
-static inline long IS_ERR(const void *ptr)
-{
-	return IS_ERR_VALUE((unsigned long)ptr);
-}
-
 #endif /* !__ASSEMBLY__ */
 
-#endif /* _OS_H_ */
+#endif /* _MACHINE_XEN_XEN_OS_H_ */
diff --git a/sys/i386/include/xen/xenfunc.h b/sys/i386/include/xen/xenfunc.h
index 47f04057aa68..f02ee1212e32 100644
--- a/sys/i386/include/xen/xenfunc.h
+++ b/sys/i386/include/xen/xenfunc.h
@@ -1,78 +1,82 @@
 /*-
  * Copyright (c) 2004, 2005 Kip Macy
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef _XEN_XENFUNC_H_
 #define _XEN_XENFUNC_H_
 
-#include <machine/xen/xen-os.h>
+#include <xen/xen-os.h>
 #include <xen/hypervisor.h>
+
+#include <vm/pmap.h>
+
 #include <machine/xen/xenpmap.h>
 #include <machine/segments.h>
+
 #include <sys/pcpu.h>
 #define BKPT __asm__("int3");
 #define XPQ_CALL_DEPTH 5
 #define XPQ_CALL_COUNT 2
 #define PG_PRIV PG_AVAIL3
 typedef struct { 
 	unsigned long pt_ref;
 	unsigned long pt_eip[XPQ_CALL_COUNT][XPQ_CALL_DEPTH];
 } pteinfo_t;
 
 extern pteinfo_t *pteinfo_list;
 #ifdef XENDEBUG_LOW
 #define	__PRINTK(x) printk x
 #else
 #define	__PRINTK(x)
 #endif
 
 char *xen_setbootenv(char *cmd_line);
 
 int  xen_boothowto(char *envp);
 
 void _xen_machphys_update(vm_paddr_t, vm_paddr_t, char *file, int line);
 
 #ifdef INVARIANTS
 #define xen_machphys_update(a, b) _xen_machphys_update((a), (b), __FILE__, __LINE__)
 #else
 #define xen_machphys_update(a, b) _xen_machphys_update((a), (b), NULL, 0)
 #endif	
 
 void xen_update_descriptor(union descriptor *, union descriptor *);
 
 extern struct mtx balloon_lock;
 #if 0
 #define balloon_lock(__flags)   mtx_lock_irqsave(&balloon_lock, __flags)
 #define balloon_unlock(__flags) mtx_unlock_irqrestore(&balloon_lock, __flags)
 #else
 #define balloon_lock(__flags)   __flags = 1
 #define balloon_unlock(__flags) __flags = 0
 #endif
 
 
 
 #endif /* _XEN_XENFUNC_H_ */
diff --git a/sys/i386/include/xen/xenvar.h b/sys/i386/include/xen/xenvar.h
index b1a0a4d8191b..2742613249df 100644
--- a/sys/i386/include/xen/xenvar.h
+++ b/sys/i386/include/xen/xenvar.h
@@ -1,120 +1,121 @@
 /*-
  * Copyright (c) 2008 Kip Macy
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef XENVAR_H_
 #define XENVAR_H_
 
 #include <machine/xen/features.h>
 
 #if defined(XEN)
 
 #define XBOOTUP 0x1
 #define XPMAP   0x2
 extern int xendebug_flags;
 #ifndef NOXENDEBUG
-#define XENPRINTF printk
+/* Print directly to the Xen console during debugging. */
+#define XENPRINTF xc_printf
 #else
 #define XENPRINTF printf
 #endif
 
 extern	xen_pfn_t *xen_phys_machine;
 extern	xen_pfn_t *xen_pfn_to_mfn_frame_list[16];
 extern	xen_pfn_t *xen_pfn_to_mfn_frame_list_list;
 
 #if 0
 #define TRACE_ENTER XENPRINTF("(file=%s, line=%d) entered %s\n", __FILE__, __LINE__, __FUNCTION__)
 #define TRACE_EXIT XENPRINTF("(file=%s, line=%d) exiting %s\n", __FILE__, __LINE__, __FUNCTION__)
 #define TRACE_DEBUG(argflags, _f, _a...) \
 if (xendebug_flags & argflags) XENPRINTF("(file=%s, line=%d) " _f "\n", __FILE__, __LINE__, ## _a);
 #else
 #define TRACE_ENTER
 #define TRACE_EXIT
 #define TRACE_DEBUG(argflags, _f, _a...)
 #endif
 
 extern xen_pfn_t *xen_machine_phys;
 /* Xen starts physical pages after the 4MB ISA hole -
  * FreeBSD doesn't
  */
 
 
 #undef ADD_ISA_HOLE /* XXX */
 
 #ifdef ADD_ISA_HOLE
 #define ISA_INDEX_OFFSET 1024 
 #define ISA_PDR_OFFSET 1
 #else
 #define ISA_INDEX_OFFSET 0
 #define ISA_PDR_OFFSET 0
 #endif
 
 
 #define PFNTOMFN(i) (xen_phys_machine[(i)])
 #define MFNTOPFN(i) ((vm_paddr_t)xen_machine_phys[(i)])
 
 #define VTOP(x) ((((uintptr_t)(x))) - KERNBASE)
 #define PTOV(x) (((uintptr_t)(x)) + KERNBASE)
 
 #define VTOPFN(x) (VTOP(x) >> PAGE_SHIFT)
 #define PFNTOV(x) PTOV((vm_paddr_t)(x)  << PAGE_SHIFT)
 
 #define VTOMFN(va) (vtomach(va) >> PAGE_SHIFT)
 #define PFN_UP(x)    (((x) + PAGE_SIZE-1) >> PAGE_SHIFT)
 
 #define phystomach(pa) (((vm_paddr_t)(PFNTOMFN((pa) >> PAGE_SHIFT))) << PAGE_SHIFT)
 #define machtophys(ma) (((vm_paddr_t)(MFNTOPFN((ma) >> PAGE_SHIFT))) << PAGE_SHIFT)
 
 
 void xpq_init(void);
 
 #define BITS_PER_LONG 32
 #define NR_CPUS      XEN_LEGACY_MAX_VCPUS
 
 #define BITS_TO_LONGS(bits) \
 	(((bits)+BITS_PER_LONG-1)/BITS_PER_LONG)
 #define DECLARE_BITMAP(name,bits) \
 	unsigned long name[BITS_TO_LONGS(bits)]
 
 int  xen_create_contiguous_region(vm_page_t pages, int npages);
 
 void  xen_destroy_contiguous_region(void * addr, int npages);
 
 #elif defined(XENHVM)
 
 #if !defined(PAE)
 #define	vtomach(va)	pmap_kextract((vm_offset_t) (va))
 #endif
 #define	PFNTOMFN(pa)	(pa)
 #define	MFNTOPFN(ma)	(ma)
 
 #define	set_phys_to_machine(pfn, mfn)		((void)0)
 #define	phys_to_machine_mapping_valid(pfn)	(TRUE)
 
 #endif /* !XEN && !XENHVM */
 
 #endif
diff --git a/sys/i386/isa/npx.c b/sys/i386/isa/npx.c
index 3b86d8418666..622e5b7f15bb 100644
--- a/sys/i386/isa/npx.c
+++ b/sys/i386/isa/npx.c
@@ -1,1118 +1,1118 @@
 /*-
  * Copyright (c) 1990 William Jolitz.
  * Copyright (c) 1991 The Regents of the University of California.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	from: @(#)npx.c	7.2 (Berkeley) 5/12/91
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_cpu.h"
 #include "opt_isa.h"
 #include "opt_npx.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/bus.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/module.h>
 #include <sys/mutex.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/smp.h>
 #include <sys/sysctl.h>
 #include <machine/bus.h>
 #include <sys/rman.h>
 #ifdef NPX_DEBUG
 #include <sys/syslog.h>
 #endif
 #include <sys/signalvar.h>
 
 #include <machine/asmacros.h>
 #include <machine/cputypes.h>
 #include <machine/frame.h>
 #include <machine/md_var.h>
 #include <machine/pcb.h>
 #include <machine/psl.h>
 #include <machine/resource.h>
 #include <machine/specialreg.h>
 #include <machine/segments.h>
 #include <machine/ucontext.h>
 
 #include <machine/intr_machdep.h>
 #ifdef XEN
-#include <machine/xen/xen-os.h>
+#include <xen/xen-os.h>
 #include <xen/hypervisor.h>
 #endif
 
 #ifdef DEV_ISA
 #include <isa/isavar.h>
 #endif
 
 #if !defined(CPU_DISABLE_SSE) && defined(I686_CPU)
 #define CPU_ENABLE_SSE
 #endif
 
 /*
  * 387 and 287 Numeric Coprocessor Extension (NPX) Driver.
  */
 
 #if defined(__GNUCLIKE_ASM) && !defined(lint)
 
 #define	fldcw(cw)		__asm __volatile("fldcw %0" : : "m" (cw))
 #define	fnclex()		__asm __volatile("fnclex")
 #define	fninit()		__asm __volatile("fninit")
 #define	fnsave(addr)		__asm __volatile("fnsave %0" : "=m" (*(addr)))
 #define	fnstcw(addr)		__asm __volatile("fnstcw %0" : "=m" (*(addr)))
 #define	fnstsw(addr)		__asm __volatile("fnstsw %0" : "=am" (*(addr)))
 #define	fp_divide_by_0()	__asm __volatile( \
 				    "fldz; fld1; fdiv %st,%st(1); fnop")
 #define	frstor(addr)		__asm __volatile("frstor %0" : : "m" (*(addr)))
 #ifdef CPU_ENABLE_SSE
 #define	fxrstor(addr)		__asm __volatile("fxrstor %0" : : "m" (*(addr)))
 #define	fxsave(addr)		__asm __volatile("fxsave %0" : "=m" (*(addr)))
 #define	stmxcsr(addr)		__asm __volatile("stmxcsr %0" : : "m" (*(addr)))
 #endif
 #else	/* !(__GNUCLIKE_ASM && !lint) */
 
 void	fldcw(u_short cw);
 void	fnclex(void);
 void	fninit(void);
 void	fnsave(caddr_t addr);
 void	fnstcw(caddr_t addr);
 void	fnstsw(caddr_t addr);
 void	fp_divide_by_0(void);
 void	frstor(caddr_t addr);
 #ifdef CPU_ENABLE_SSE
 void	fxsave(caddr_t addr);
 void	fxrstor(caddr_t addr);
 void	stmxcsr(u_int *csr);
 #endif
 
 #endif	/* __GNUCLIKE_ASM && !lint */
 
 #ifdef XEN
 #define	start_emulating()	(HYPERVISOR_fpu_taskswitch(1))
 #define	stop_emulating()	(HYPERVISOR_fpu_taskswitch(0))
 #else
 #define	start_emulating()	load_cr0(rcr0() | CR0_TS)
 #define	stop_emulating()	clts()
 #endif
 
 #ifdef CPU_ENABLE_SSE
 #define GET_FPU_CW(thread) \
 	(cpu_fxsr ? \
 		(thread)->td_pcb->pcb_save->sv_xmm.sv_env.en_cw : \
 		(thread)->td_pcb->pcb_save->sv_87.sv_env.en_cw)
 #define GET_FPU_SW(thread) \
 	(cpu_fxsr ? \
 		(thread)->td_pcb->pcb_save->sv_xmm.sv_env.en_sw : \
 		(thread)->td_pcb->pcb_save->sv_87.sv_env.en_sw)
 #define SET_FPU_CW(savefpu, value) do { \
 	if (cpu_fxsr) \
 		(savefpu)->sv_xmm.sv_env.en_cw = (value); \
 	else \
 		(savefpu)->sv_87.sv_env.en_cw = (value); \
 } while (0)
 #else /* CPU_ENABLE_SSE */
 #define GET_FPU_CW(thread) \
 	(thread->td_pcb->pcb_save->sv_87.sv_env.en_cw)
 #define GET_FPU_SW(thread) \
 	(thread->td_pcb->pcb_save->sv_87.sv_env.en_sw)
 #define SET_FPU_CW(savefpu, value) \
 	(savefpu)->sv_87.sv_env.en_cw = (value)
 #endif /* CPU_ENABLE_SSE */
 
 typedef u_char bool_t;
 
 #ifdef CPU_ENABLE_SSE
 static	void	fpu_clean_state(void);
 #endif
 
 static	void	fpusave(union savefpu *);
 static	void	fpurstor(union savefpu *);
 static	int	npx_attach(device_t dev);
 static	void	npx_identify(driver_t *driver, device_t parent);
 static	int	npx_probe(device_t dev);
 
 int	hw_float;
 
 SYSCTL_INT(_hw, HW_FLOATINGPT, floatingpoint, CTLFLAG_RD,
     &hw_float, 0, "Floating point instructions executed in hardware");
 
 static	volatile u_int		npx_traps_while_probing;
 static	union savefpu		npx_initialstate;
 
 alias_for_inthand_t probetrap;
 __asm("								\n\
 	.text							\n\
 	.p2align 2,0x90						\n\
 	.type	" __XSTRING(CNAME(probetrap)) ",@function	\n\
 " __XSTRING(CNAME(probetrap)) ":				\n\
 	ss							\n\
 	incl	" __XSTRING(CNAME(npx_traps_while_probing)) "	\n\
 	fnclex							\n\
 	iret							\n\
 ");
 
 /*
  * Identify routine.  Create a connection point on our parent for probing.
  */
 static void
 npx_identify(driver, parent)
 	driver_t *driver;
 	device_t parent;
 {
 	device_t child;
 
 	child = BUS_ADD_CHILD(parent, 0, "npx", 0);
 	if (child == NULL)
 		panic("npx_identify");
 }
 
 /*
  * Probe routine.  Set flags to tell npxattach() what to do.  Set up an
  * interrupt handler if npx needs to use interrupts.
  */
 static int
 npx_probe(device_t dev)
 {
 	struct gate_descriptor save_idt_npxtrap;
 	u_short control, status;
 
 	device_set_desc(dev, "math processor");
 
 	/*
 	 * Modern CPUs all have an FPU that uses the INT16 interface
 	 * and provide a simple way to verify that, so handle the
 	 * common case right away.
 	 */
 	if (cpu_feature & CPUID_FPU) {
 		hw_float = 1;
 		device_quiet(dev);
 		return (0);
 	}
 
 	save_idt_npxtrap = idt[IDT_MF];
 	setidt(IDT_MF, probetrap, SDT_SYS386TGT, SEL_KPL,
 	    GSEL(GCODE_SEL, SEL_KPL));
 
 	/*
 	 * Don't trap while we're probing.
 	 */
 	stop_emulating();
 
 	/*
 	 * Finish resetting the coprocessor, if any.  If there is an error
 	 * pending, then we may get a bogus IRQ13, but npx_intr() will handle
 	 * it OK.  Bogus halts have never been observed, but we enabled
 	 * IRQ13 and cleared the BUSY# latch early to handle them anyway.
 	 */
 	fninit();
 
 	/*
 	 * Don't use fwait here because it might hang.
 	 * Don't use fnop here because it usually hangs if there is no FPU.
 	 */
 	DELAY(1000);		/* wait for any IRQ13 */
 #ifdef DIAGNOSTIC
 	if (npx_traps_while_probing != 0)
 		printf("fninit caused %u bogus npx trap(s)\n",
 		       npx_traps_while_probing);
 #endif
 	/*
 	 * Check for a status of mostly zero.
 	 */
 	status = 0x5a5a;
 	fnstsw(&status);
 	if ((status & 0xb8ff) == 0) {
 		/*
 		 * Good, now check for a proper control word.
 		 */
 		control = 0x5a5a;
 		fnstcw(&control);
 		if ((control & 0x1f3f) == 0x033f) {
 			/*
 			 * We have an npx, now divide by 0 to see if exception
 			 * 16 works.
 			 */
 			control &= ~(1 << 2);	/* enable divide by 0 trap */
 			fldcw(control);
 #ifdef FPU_ERROR_BROKEN
 			/*
 			 * FPU error signal doesn't work on some CPU
 			 * accelerator board.
 			 */
 			hw_float = 1;
 			return (0);
 #endif
 			npx_traps_while_probing = 0;
 			fp_divide_by_0();
 			if (npx_traps_while_probing != 0) {
 				/*
 				 * Good, exception 16 works.
 				 */
 				hw_float = 1;
 				goto cleanup;
 			}
 			device_printf(dev,
 	"FPU does not use exception 16 for error reporting\n");
 			goto cleanup;
 		}
 	}
 
 	/*
 	 * Probe failed.  Floating point simply won't work.
 	 * Notify user and disable FPU/MMX/SSE instruction execution.
 	 */
 	device_printf(dev, "WARNING: no FPU!\n");
 	__asm __volatile("smsw %%ax; orb %0,%%al; lmsw %%ax" : :
 	    "n" (CR0_EM | CR0_MP) : "ax");
 
 cleanup:
 	idt[IDT_MF] = save_idt_npxtrap;
 	return (hw_float ? 0 : ENXIO);
 }
 
 /*
  * Attach routine - announce which it is, and wire into system
  */
 static int
 npx_attach(device_t dev)
 {
 
 	npxinit();
 	critical_enter();
 	stop_emulating();
 	fpusave(&npx_initialstate);
 	start_emulating();
 #ifdef CPU_ENABLE_SSE
 	if (cpu_fxsr) {
 		if (npx_initialstate.sv_xmm.sv_env.en_mxcsr_mask)
 			cpu_mxcsr_mask = 
 			    npx_initialstate.sv_xmm.sv_env.en_mxcsr_mask;
 		else
 			cpu_mxcsr_mask = 0xFFBF;
 		bzero(npx_initialstate.sv_xmm.sv_fp,
 		    sizeof(npx_initialstate.sv_xmm.sv_fp));
 		bzero(npx_initialstate.sv_xmm.sv_xmm,
 		    sizeof(npx_initialstate.sv_xmm.sv_xmm));
 		/* XXX might need even more zeroing. */
 	} else
 #endif
 		bzero(npx_initialstate.sv_87.sv_ac,
 		    sizeof(npx_initialstate.sv_87.sv_ac));
 	critical_exit();
 
 	return (0);
 }
 
 /*
  * Initialize floating point unit.
  */
 void
 npxinit(void)
 {
 	static union savefpu dummy;
 	register_t saveintr;
 	u_short control;
 
 	if (!hw_float)
 		return;
 	/*
 	 * fninit has the same h/w bugs as fnsave.  Use the detoxified
 	 * fnsave to throw away any junk in the fpu.  npxsave() initializes
 	 * the fpu and sets fpcurthread = NULL as important side effects.
 	 *
 	 * It is too early for critical_enter() to work on AP.
 	 */
 	saveintr = intr_disable();
 	npxsave(&dummy);
 	stop_emulating();
 #ifdef CPU_ENABLE_SSE
 	/* XXX npxsave() doesn't actually initialize the fpu in the SSE case. */
 	if (cpu_fxsr)
 		fninit();
 #endif
 	control = __INITIAL_NPXCW__;
 	fldcw(control);
 	start_emulating();
 	intr_restore(saveintr);
 }
 
 /*
  * Free coprocessor (if we have it).
  */
 void
 npxexit(td)
 	struct thread *td;
 {
 
 	critical_enter();
 	if (curthread == PCPU_GET(fpcurthread))
 		npxsave(curpcb->pcb_save);
 	critical_exit();
 #ifdef NPX_DEBUG
 	if (hw_float) {
 		u_int	masked_exceptions;
 
 		masked_exceptions = GET_FPU_CW(td) & GET_FPU_SW(td) & 0x7f;
 		/*
 		 * Log exceptions that would have trapped with the old
 		 * control word (overflow, divide by 0, and invalid operand).
 		 */
 		if (masked_exceptions & 0x0d)
 			log(LOG_ERR,
 	"pid %d (%s) exited with masked floating point exceptions 0x%02x\n",
 			    td->td_proc->p_pid, td->td_proc->p_comm,
 			    masked_exceptions);
 	}
 #endif
 }
 
 int
 npxformat()
 {
 
 	if (!hw_float)
 		return (_MC_FPFMT_NODEV);
 #ifdef	CPU_ENABLE_SSE
 	if (cpu_fxsr)
 		return (_MC_FPFMT_XMM);
 #endif
 	return (_MC_FPFMT_387);
 }
 
 /* 
  * The following mechanism is used to ensure that the FPE_... value
  * that is passed as a trapcode to the signal handler of the user
  * process does not have more than one bit set.
  * 
  * Multiple bits may be set if the user process modifies the control
  * word while a status word bit is already set.  While this is a sign
  * of bad coding, we have no choise than to narrow them down to one
  * bit, since we must not send a trapcode that is not exactly one of
  * the FPE_ macros.
  *
  * The mechanism has a static table with 127 entries.  Each combination
  * of the 7 FPU status word exception bits directly translates to a
  * position in this table, where a single FPE_... value is stored.
  * This FPE_... value stored there is considered the "most important"
  * of the exception bits and will be sent as the signal code.  The
  * precedence of the bits is based upon Intel Document "Numerical
  * Applications", Chapter "Special Computational Situations".
  *
  * The macro to choose one of these values does these steps: 1) Throw
  * away status word bits that cannot be masked.  2) Throw away the bits
  * currently masked in the control word, assuming the user isn't
  * interested in them anymore.  3) Reinsert status word bit 7 (stack
  * fault) if it is set, which cannot be masked but must be presered.
  * 4) Use the remaining bits to point into the trapcode table.
  *
  * The 6 maskable bits in order of their preference, as stated in the
  * above referenced Intel manual:
  * 1  Invalid operation (FP_X_INV)
  * 1a   Stack underflow
  * 1b   Stack overflow
  * 1c   Operand of unsupported format
  * 1d   SNaN operand.
  * 2  QNaN operand (not an exception, irrelavant here)
  * 3  Any other invalid-operation not mentioned above or zero divide
  *      (FP_X_INV, FP_X_DZ)
  * 4  Denormal operand (FP_X_DNML)
  * 5  Numeric over/underflow (FP_X_OFL, FP_X_UFL)
  * 6  Inexact result (FP_X_IMP) 
  */
 static char fpetable[128] = {
 	0,
 	FPE_FLTINV,	/*  1 - INV */
 	FPE_FLTUND,	/*  2 - DNML */
 	FPE_FLTINV,	/*  3 - INV | DNML */
 	FPE_FLTDIV,	/*  4 - DZ */
 	FPE_FLTINV,	/*  5 - INV | DZ */
 	FPE_FLTDIV,	/*  6 - DNML | DZ */
 	FPE_FLTINV,	/*  7 - INV | DNML | DZ */
 	FPE_FLTOVF,	/*  8 - OFL */
 	FPE_FLTINV,	/*  9 - INV | OFL */
 	FPE_FLTUND,	/*  A - DNML | OFL */
 	FPE_FLTINV,	/*  B - INV | DNML | OFL */
 	FPE_FLTDIV,	/*  C - DZ | OFL */
 	FPE_FLTINV,	/*  D - INV | DZ | OFL */
 	FPE_FLTDIV,	/*  E - DNML | DZ | OFL */
 	FPE_FLTINV,	/*  F - INV | DNML | DZ | OFL */
 	FPE_FLTUND,	/* 10 - UFL */
 	FPE_FLTINV,	/* 11 - INV | UFL */
 	FPE_FLTUND,	/* 12 - DNML | UFL */
 	FPE_FLTINV,	/* 13 - INV | DNML | UFL */
 	FPE_FLTDIV,	/* 14 - DZ | UFL */
 	FPE_FLTINV,	/* 15 - INV | DZ | UFL */
 	FPE_FLTDIV,	/* 16 - DNML | DZ | UFL */
 	FPE_FLTINV,	/* 17 - INV | DNML | DZ | UFL */
 	FPE_FLTOVF,	/* 18 - OFL | UFL */
 	FPE_FLTINV,	/* 19 - INV | OFL | UFL */
 	FPE_FLTUND,	/* 1A - DNML | OFL | UFL */
 	FPE_FLTINV,	/* 1B - INV | DNML | OFL | UFL */
 	FPE_FLTDIV,	/* 1C - DZ | OFL | UFL */
 	FPE_FLTINV,	/* 1D - INV | DZ | OFL | UFL */
 	FPE_FLTDIV,	/* 1E - DNML | DZ | OFL | UFL */
 	FPE_FLTINV,	/* 1F - INV | DNML | DZ | OFL | UFL */
 	FPE_FLTRES,	/* 20 - IMP */
 	FPE_FLTINV,	/* 21 - INV | IMP */
 	FPE_FLTUND,	/* 22 - DNML | IMP */
 	FPE_FLTINV,	/* 23 - INV | DNML | IMP */
 	FPE_FLTDIV,	/* 24 - DZ | IMP */
 	FPE_FLTINV,	/* 25 - INV | DZ | IMP */
 	FPE_FLTDIV,	/* 26 - DNML | DZ | IMP */
 	FPE_FLTINV,	/* 27 - INV | DNML | DZ | IMP */
 	FPE_FLTOVF,	/* 28 - OFL | IMP */
 	FPE_FLTINV,	/* 29 - INV | OFL | IMP */
 	FPE_FLTUND,	/* 2A - DNML | OFL | IMP */
 	FPE_FLTINV,	/* 2B - INV | DNML | OFL | IMP */
 	FPE_FLTDIV,	/* 2C - DZ | OFL | IMP */
 	FPE_FLTINV,	/* 2D - INV | DZ | OFL | IMP */
 	FPE_FLTDIV,	/* 2E - DNML | DZ | OFL | IMP */
 	FPE_FLTINV,	/* 2F - INV | DNML | DZ | OFL | IMP */
 	FPE_FLTUND,	/* 30 - UFL | IMP */
 	FPE_FLTINV,	/* 31 - INV | UFL | IMP */
 	FPE_FLTUND,	/* 32 - DNML | UFL | IMP */
 	FPE_FLTINV,	/* 33 - INV | DNML | UFL | IMP */
 	FPE_FLTDIV,	/* 34 - DZ | UFL | IMP */
 	FPE_FLTINV,	/* 35 - INV | DZ | UFL | IMP */
 	FPE_FLTDIV,	/* 36 - DNML | DZ | UFL | IMP */
 	FPE_FLTINV,	/* 37 - INV | DNML | DZ | UFL | IMP */
 	FPE_FLTOVF,	/* 38 - OFL | UFL | IMP */
 	FPE_FLTINV,	/* 39 - INV | OFL | UFL | IMP */
 	FPE_FLTUND,	/* 3A - DNML | OFL | UFL | IMP */
 	FPE_FLTINV,	/* 3B - INV | DNML | OFL | UFL | IMP */
 	FPE_FLTDIV,	/* 3C - DZ | OFL | UFL | IMP */
 	FPE_FLTINV,	/* 3D - INV | DZ | OFL | UFL | IMP */
 	FPE_FLTDIV,	/* 3E - DNML | DZ | OFL | UFL | IMP */
 	FPE_FLTINV,	/* 3F - INV | DNML | DZ | OFL | UFL | IMP */
 	FPE_FLTSUB,	/* 40 - STK */
 	FPE_FLTSUB,	/* 41 - INV | STK */
 	FPE_FLTUND,	/* 42 - DNML | STK */
 	FPE_FLTSUB,	/* 43 - INV | DNML | STK */
 	FPE_FLTDIV,	/* 44 - DZ | STK */
 	FPE_FLTSUB,	/* 45 - INV | DZ | STK */
 	FPE_FLTDIV,	/* 46 - DNML | DZ | STK */
 	FPE_FLTSUB,	/* 47 - INV | DNML | DZ | STK */
 	FPE_FLTOVF,	/* 48 - OFL | STK */
 	FPE_FLTSUB,	/* 49 - INV | OFL | STK */
 	FPE_FLTUND,	/* 4A - DNML | OFL | STK */
 	FPE_FLTSUB,	/* 4B - INV | DNML | OFL | STK */
 	FPE_FLTDIV,	/* 4C - DZ | OFL | STK */
 	FPE_FLTSUB,	/* 4D - INV | DZ | OFL | STK */
 	FPE_FLTDIV,	/* 4E - DNML | DZ | OFL | STK */
 	FPE_FLTSUB,	/* 4F - INV | DNML | DZ | OFL | STK */
 	FPE_FLTUND,	/* 50 - UFL | STK */
 	FPE_FLTSUB,	/* 51 - INV | UFL | STK */
 	FPE_FLTUND,	/* 52 - DNML | UFL | STK */
 	FPE_FLTSUB,	/* 53 - INV | DNML | UFL | STK */
 	FPE_FLTDIV,	/* 54 - DZ | UFL | STK */
 	FPE_FLTSUB,	/* 55 - INV | DZ | UFL | STK */
 	FPE_FLTDIV,	/* 56 - DNML | DZ | UFL | STK */
 	FPE_FLTSUB,	/* 57 - INV | DNML | DZ | UFL | STK */
 	FPE_FLTOVF,	/* 58 - OFL | UFL | STK */
 	FPE_FLTSUB,	/* 59 - INV | OFL | UFL | STK */
 	FPE_FLTUND,	/* 5A - DNML | OFL | UFL | STK */
 	FPE_FLTSUB,	/* 5B - INV | DNML | OFL | UFL | STK */
 	FPE_FLTDIV,	/* 5C - DZ | OFL | UFL | STK */
 	FPE_FLTSUB,	/* 5D - INV | DZ | OFL | UFL | STK */
 	FPE_FLTDIV,	/* 5E - DNML | DZ | OFL | UFL | STK */
 	FPE_FLTSUB,	/* 5F - INV | DNML | DZ | OFL | UFL | STK */
 	FPE_FLTRES,	/* 60 - IMP | STK */
 	FPE_FLTSUB,	/* 61 - INV | IMP | STK */
 	FPE_FLTUND,	/* 62 - DNML | IMP | STK */
 	FPE_FLTSUB,	/* 63 - INV | DNML | IMP | STK */
 	FPE_FLTDIV,	/* 64 - DZ | IMP | STK */
 	FPE_FLTSUB,	/* 65 - INV | DZ | IMP | STK */
 	FPE_FLTDIV,	/* 66 - DNML | DZ | IMP | STK */
 	FPE_FLTSUB,	/* 67 - INV | DNML | DZ | IMP | STK */
 	FPE_FLTOVF,	/* 68 - OFL | IMP | STK */
 	FPE_FLTSUB,	/* 69 - INV | OFL | IMP | STK */
 	FPE_FLTUND,	/* 6A - DNML | OFL | IMP | STK */
 	FPE_FLTSUB,	/* 6B - INV | DNML | OFL | IMP | STK */
 	FPE_FLTDIV,	/* 6C - DZ | OFL | IMP | STK */
 	FPE_FLTSUB,	/* 6D - INV | DZ | OFL | IMP | STK */
 	FPE_FLTDIV,	/* 6E - DNML | DZ | OFL | IMP | STK */
 	FPE_FLTSUB,	/* 6F - INV | DNML | DZ | OFL | IMP | STK */
 	FPE_FLTUND,	/* 70 - UFL | IMP | STK */
 	FPE_FLTSUB,	/* 71 - INV | UFL | IMP | STK */
 	FPE_FLTUND,	/* 72 - DNML | UFL | IMP | STK */
 	FPE_FLTSUB,	/* 73 - INV | DNML | UFL | IMP | STK */
 	FPE_FLTDIV,	/* 74 - DZ | UFL | IMP | STK */
 	FPE_FLTSUB,	/* 75 - INV | DZ | UFL | IMP | STK */
 	FPE_FLTDIV,	/* 76 - DNML | DZ | UFL | IMP | STK */
 	FPE_FLTSUB,	/* 77 - INV | DNML | DZ | UFL | IMP | STK */
 	FPE_FLTOVF,	/* 78 - OFL | UFL | IMP | STK */
 	FPE_FLTSUB,	/* 79 - INV | OFL | UFL | IMP | STK */
 	FPE_FLTUND,	/* 7A - DNML | OFL | UFL | IMP | STK */
 	FPE_FLTSUB,	/* 7B - INV | DNML | OFL | UFL | IMP | STK */
 	FPE_FLTDIV,	/* 7C - DZ | OFL | UFL | IMP | STK */
 	FPE_FLTSUB,	/* 7D - INV | DZ | OFL | UFL | IMP | STK */
 	FPE_FLTDIV,	/* 7E - DNML | DZ | OFL | UFL | IMP | STK */
 	FPE_FLTSUB,	/* 7F - INV | DNML | DZ | OFL | UFL | IMP | STK */
 };
 
 /*
  * Read the FP status and control words, then generate si_code value
  * for SIGFPE.  The error code chosen will be one of the
  * FPE_... macros.  It will be sent as the second argument to old
  * BSD-style signal handlers and as "siginfo_t->si_code" (second
  * argument) to SA_SIGINFO signal handlers.
  *
  * Some time ago, we cleared the x87 exceptions with FNCLEX there.
  * Clearing exceptions was necessary mainly to avoid IRQ13 bugs.  The
  * usermode code which understands the FPU hardware enough to enable
  * the exceptions, can also handle clearing the exception state in the
  * handler.  The only consequence of not clearing the exception is the
  * rethrow of the SIGFPE on return from the signal handler and
  * reexecution of the corresponding instruction.
  *
  * For XMM traps, the exceptions were never cleared.
  */
 int
 npxtrap_x87(void)
 {
 	u_short control, status;
 
 	if (!hw_float) {
 		printf(
 	"npxtrap_x87: fpcurthread = %p, curthread = %p, hw_float = %d\n",
 		       PCPU_GET(fpcurthread), curthread, hw_float);
 		panic("npxtrap from nowhere");
 	}
 	critical_enter();
 
 	/*
 	 * Interrupt handling (for another interrupt) may have pushed the
 	 * state to memory.  Fetch the relevant parts of the state from
 	 * wherever they are.
 	 */
 	if (PCPU_GET(fpcurthread) != curthread) {
 		control = GET_FPU_CW(curthread);
 		status = GET_FPU_SW(curthread);
 	} else {
 		fnstcw(&control);
 		fnstsw(&status);
 	}
 	critical_exit();
 	return (fpetable[status & ((~control & 0x3f) | 0x40)]);
 }
 
 #ifdef CPU_ENABLE_SSE
 int
 npxtrap_sse(void)
 {
 	u_int mxcsr;
 
 	if (!hw_float) {
 		printf(
 	"npxtrap_sse: fpcurthread = %p, curthread = %p, hw_float = %d\n",
 		       PCPU_GET(fpcurthread), curthread, hw_float);
 		panic("npxtrap from nowhere");
 	}
 	critical_enter();
 	if (PCPU_GET(fpcurthread) != curthread)
 		mxcsr = curthread->td_pcb->pcb_save->sv_xmm.sv_env.en_mxcsr;
 	else
 		stmxcsr(&mxcsr);
 	critical_exit();
 	return (fpetable[(mxcsr & (~mxcsr >> 7)) & 0x3f]);
 }
 #endif
 
 /*
  * Implement device not available (DNA) exception
  *
  * It would be better to switch FP context here (if curthread != fpcurthread)
  * and not necessarily for every context switch, but it is too hard to
  * access foreign pcb's.
  */
 
 static int err_count = 0;
 
 int
 npxdna(void)
 {
 
 	if (!hw_float)
 		return (0);
 	critical_enter();
 	if (PCPU_GET(fpcurthread) == curthread) {
 		printf("npxdna: fpcurthread == curthread %d times\n",
 		    ++err_count);
 		stop_emulating();
 		critical_exit();
 		return (1);
 	}
 	if (PCPU_GET(fpcurthread) != NULL) {
 		printf("npxdna: fpcurthread = %p (%d), curthread = %p (%d)\n",
 		       PCPU_GET(fpcurthread),
 		       PCPU_GET(fpcurthread)->td_proc->p_pid,
 		       curthread, curthread->td_proc->p_pid);
 		panic("npxdna");
 	}
 	stop_emulating();
 	/*
 	 * Record new context early in case frstor causes an IRQ13.
 	 */
 	PCPU_SET(fpcurthread, curthread);
 
 #ifdef CPU_ENABLE_SSE
 	if (cpu_fxsr)
 		fpu_clean_state();
 #endif
 
 	if ((curpcb->pcb_flags & PCB_NPXINITDONE) == 0) {
 		/*
 		 * This is the first time this thread has used the FPU or
 		 * the PCB doesn't contain a clean FPU state.  Explicitly
 		 * load an initial state.
 		 */
 		fpurstor(&npx_initialstate);
 		if (curpcb->pcb_initial_npxcw != __INITIAL_NPXCW__)
 			fldcw(curpcb->pcb_initial_npxcw);
 		curpcb->pcb_flags |= PCB_NPXINITDONE;
 		if (PCB_USER_FPU(curpcb))
 			curpcb->pcb_flags |= PCB_NPXUSERINITDONE;
 	} else {
 		/*
 		 * The following fpurstor() may cause an IRQ13 when the
 		 * state being restored has a pending error.  The error will
 		 * appear to have been triggered by the current (npx) user
 		 * instruction even when that instruction is a no-wait
 		 * instruction that should not trigger an error (e.g.,
 		 * fnclex).  On at least one 486 system all of the no-wait
 		 * instructions are broken the same as frstor, so our
 		 * treatment does not amplify the breakage.  On at least
 		 * one 386/Cyrix 387 system, fnclex works correctly while
 		 * frstor and fnsave are broken, so our treatment breaks
 		 * fnclex if it is the first FPU instruction after a context
 		 * switch.
 		 */
 		fpurstor(curpcb->pcb_save);
 	}
 	critical_exit();
 
 	return (1);
 }
 
 /*
  * Wrapper for fnsave instruction, partly to handle hardware bugs.  When npx
  * exceptions are reported via IRQ13, spurious IRQ13's may be triggered by
  * no-wait npx instructions.  See the Intel application note AP-578 for
  * details.  This doesn't cause any additional complications here.  IRQ13's
  * are inherently asynchronous unless the CPU is frozen to deliver them --
  * one that started in userland may be delivered many instructions later,
  * after the process has entered the kernel.  It may even be delivered after
  * the fnsave here completes.  A spurious IRQ13 for the fnsave is handled in
  * the same way as a very-late-arriving non-spurious IRQ13 from user mode:
  * it is normally ignored at first because we set fpcurthread to NULL; it is
  * normally retriggered in npxdna() after return to user mode.
  *
  * npxsave() must be called with interrupts disabled, so that it clears
  * fpcurthread atomically with saving the state.  We require callers to do the
  * disabling, since most callers need to disable interrupts anyway to call
  * npxsave() atomically with checking fpcurthread.
  *
  * A previous version of npxsave() went to great lengths to excecute fnsave
  * with interrupts enabled in case executing it froze the CPU.  This case
  * can't happen, at least for Intel CPU/NPX's.  Spurious IRQ13's don't imply
  * spurious freezes.
  */
 void
 npxsave(addr)
 	union savefpu *addr;
 {
 
 	stop_emulating();
 	fpusave(addr);
 
 	start_emulating();
 	PCPU_SET(fpcurthread, NULL);
 }
 
 void
 npxdrop()
 {
 	struct thread *td;
 
 	/*
 	 * Discard pending exceptions in the !cpu_fxsr case so that unmasked
 	 * ones don't cause a panic on the next frstor.
 	 */
 #ifdef CPU_ENABLE_SSE
 	if (!cpu_fxsr)
 #endif
 		fnclex();
 
 	td = PCPU_GET(fpcurthread);
 	KASSERT(td == curthread, ("fpudrop: fpcurthread != curthread"));
 	CRITICAL_ASSERT(td);
 	PCPU_SET(fpcurthread, NULL);
 	td->td_pcb->pcb_flags &= ~PCB_NPXINITDONE;
 	start_emulating();
 }
 
 /*
  * Get the user state of the FPU into pcb->pcb_user_save without
  * dropping ownership (if possible).  It returns the FPU ownership
  * status.
  */
 int
 npxgetregs(struct thread *td)
 {
 	struct pcb *pcb;
 
 	if (!hw_float)
 		return (_MC_FPOWNED_NONE);
 
 	pcb = td->td_pcb;
 	if ((pcb->pcb_flags & PCB_NPXINITDONE) == 0) {
 		bcopy(&npx_initialstate, &pcb->pcb_user_save,
 		    sizeof(npx_initialstate));
 		SET_FPU_CW(&pcb->pcb_user_save, pcb->pcb_initial_npxcw);
 		npxuserinited(td);
 		return (_MC_FPOWNED_PCB);
 	}
 	critical_enter();
 	if (td == PCPU_GET(fpcurthread)) {
 		fpusave(&pcb->pcb_user_save);
 #ifdef CPU_ENABLE_SSE
 		if (!cpu_fxsr)
 #endif
 			/*
 			 * fnsave initializes the FPU and destroys whatever
 			 * context it contains.  Make sure the FPU owner
 			 * starts with a clean state next time.
 			 */
 			npxdrop();
 		critical_exit();
 		return (_MC_FPOWNED_FPU);
 	} else {
 		critical_exit();
 		return (_MC_FPOWNED_PCB);
 	}
 }
 
 void
 npxuserinited(struct thread *td)
 {
 	struct pcb *pcb;
 
 	pcb = td->td_pcb;
 	if (PCB_USER_FPU(pcb))
 		pcb->pcb_flags |= PCB_NPXINITDONE;
 	pcb->pcb_flags |= PCB_NPXUSERINITDONE;
 }
 
 
 void
 npxsetregs(struct thread *td, union savefpu *addr)
 {
 	struct pcb *pcb;
 
 	if (!hw_float)
 		return;
 
 	pcb = td->td_pcb;
 	critical_enter();
 	if (td == PCPU_GET(fpcurthread) && PCB_USER_FPU(pcb)) {
 #ifdef CPU_ENABLE_SSE
 		if (!cpu_fxsr)
 #endif
 			fnclex();	/* As in npxdrop(). */
 		if (((uintptr_t)addr & 0xf) != 0) {
 			bcopy(addr, &pcb->pcb_user_save, sizeof(*addr));
 			fpurstor(&pcb->pcb_user_save);
 		} else
 			fpurstor(addr);
 		critical_exit();
 		pcb->pcb_flags |= PCB_NPXUSERINITDONE | PCB_NPXINITDONE;
 	} else {
 		critical_exit();
 		bcopy(addr, &pcb->pcb_user_save, sizeof(*addr));
 		npxuserinited(td);
 	}
 }
 
 static void
 fpusave(addr)
 	union savefpu *addr;
 {
 	
 #ifdef CPU_ENABLE_SSE
 	if (cpu_fxsr)
 		fxsave(addr);
 	else
 #endif
 		fnsave(addr);
 }
 
 #ifdef CPU_ENABLE_SSE
 /*
  * On AuthenticAMD processors, the fxrstor instruction does not restore
  * the x87's stored last instruction pointer, last data pointer, and last
  * opcode values, except in the rare case in which the exception summary
  * (ES) bit in the x87 status word is set to 1.
  *
  * In order to avoid leaking this information across processes, we clean
  * these values by performing a dummy load before executing fxrstor().
  */
 static void
 fpu_clean_state(void)
 {
 	static float dummy_variable = 0.0;
 	u_short status;
 
 	/*
 	 * Clear the ES bit in the x87 status word if it is currently
 	 * set, in order to avoid causing a fault in the upcoming load.
 	 */
 	fnstsw(&status);
 	if (status & 0x80)
 		fnclex();
 
 	/*
 	 * Load the dummy variable into the x87 stack.  This mangles
 	 * the x87 stack, but we don't care since we're about to call
 	 * fxrstor() anyway.
 	 */
 	__asm __volatile("ffree %%st(7); flds %0" : : "m" (dummy_variable));
 }
 #endif /* CPU_ENABLE_SSE */
 
 static void
 fpurstor(addr)
 	union savefpu *addr;
 {
 
 #ifdef CPU_ENABLE_SSE
 	if (cpu_fxsr)
 		fxrstor(addr);
 	else
 #endif
 		frstor(addr);
 }
 
 static device_method_t npx_methods[] = {
 	/* Device interface */
 	DEVMETHOD(device_identify,	npx_identify),
 	DEVMETHOD(device_probe,		npx_probe),
 	DEVMETHOD(device_attach,	npx_attach),
 	DEVMETHOD(device_detach,	bus_generic_detach),
 	DEVMETHOD(device_shutdown,	bus_generic_shutdown),
 	DEVMETHOD(device_suspend,	bus_generic_suspend),
 	DEVMETHOD(device_resume,	bus_generic_resume),
 	
 	{ 0, 0 }
 };
 
 static driver_t npx_driver = {
 	"npx",
 	npx_methods,
 	1,			/* no softc */
 };
 
 static devclass_t npx_devclass;
 
 /*
  * We prefer to attach to the root nexus so that the usual case (exception 16)
  * doesn't describe the processor as being `on isa'.
  */
 DRIVER_MODULE(npx, nexus, npx_driver, npx_devclass, 0, 0);
 
 #ifdef DEV_ISA
 /*
  * This sucks up the legacy ISA support assignments from PNPBIOS/ACPI.
  */
 static struct isa_pnp_id npxisa_ids[] = {
 	{ 0x040cd041, "Legacy ISA coprocessor support" }, /* PNP0C04 */
 	{ 0 }
 };
 
 static int
 npxisa_probe(device_t dev)
 {
 	int result;
 	if ((result = ISA_PNP_PROBE(device_get_parent(dev), dev, npxisa_ids)) <= 0) {
 		device_quiet(dev);
 	}
 	return(result);
 }
 
 static int
 npxisa_attach(device_t dev)
 {
 	return (0);
 }
 
 static device_method_t npxisa_methods[] = {
 	/* Device interface */
 	DEVMETHOD(device_probe,		npxisa_probe),
 	DEVMETHOD(device_attach,	npxisa_attach),
 	DEVMETHOD(device_detach,	bus_generic_detach),
 	DEVMETHOD(device_shutdown,	bus_generic_shutdown),
 	DEVMETHOD(device_suspend,	bus_generic_suspend),
 	DEVMETHOD(device_resume,	bus_generic_resume),
 	
 	{ 0, 0 }
 };
 
 static driver_t npxisa_driver = {
 	"npxisa",
 	npxisa_methods,
 	1,			/* no softc */
 };
 
 static devclass_t npxisa_devclass;
 
 DRIVER_MODULE(npxisa, isa, npxisa_driver, npxisa_devclass, 0, 0);
 #ifndef PC98
 DRIVER_MODULE(npxisa, acpi, npxisa_driver, npxisa_devclass, 0, 0);
 #endif
 #endif /* DEV_ISA */
 
 static MALLOC_DEFINE(M_FPUKERN_CTX, "fpukern_ctx",
     "Kernel contexts for FPU state");
 
 #define	XSAVE_AREA_ALIGN	64
 
 #define	FPU_KERN_CTX_NPXINITDONE 0x01
 
 struct fpu_kern_ctx {
 	union savefpu *prev;
 	uint32_t flags;
 	char hwstate1[];
 };
 
 struct fpu_kern_ctx *
 fpu_kern_alloc_ctx(u_int flags)
 {
 	struct fpu_kern_ctx *res;
 	size_t sz;
 
 	sz = sizeof(struct fpu_kern_ctx) + XSAVE_AREA_ALIGN +
 	    sizeof(union savefpu);
 	res = malloc(sz, M_FPUKERN_CTX, ((flags & FPU_KERN_NOWAIT) ?
 	    M_NOWAIT : M_WAITOK) | M_ZERO);
 	return (res);
 }
 
 void
 fpu_kern_free_ctx(struct fpu_kern_ctx *ctx)
 {
 
 	/* XXXKIB clear the memory ? */
 	free(ctx, M_FPUKERN_CTX);
 }
 
 static union savefpu *
 fpu_kern_ctx_savefpu(struct fpu_kern_ctx *ctx)
 {
 	vm_offset_t p;
 
 	p = (vm_offset_t)&ctx->hwstate1;
 	p = roundup2(p, XSAVE_AREA_ALIGN);
 	return ((union savefpu *)p);
 }
 
 int
 fpu_kern_enter(struct thread *td, struct fpu_kern_ctx *ctx, u_int flags)
 {
 	struct pcb *pcb;
 
 	pcb = td->td_pcb;
 	KASSERT(!PCB_USER_FPU(pcb) || pcb->pcb_save == &pcb->pcb_user_save,
 	    ("mangled pcb_save"));
 	ctx->flags = 0;
 	if ((pcb->pcb_flags & PCB_NPXINITDONE) != 0)
 		ctx->flags |= FPU_KERN_CTX_NPXINITDONE;
 	npxexit(td);
 	ctx->prev = pcb->pcb_save;
 	pcb->pcb_save = fpu_kern_ctx_savefpu(ctx);
 	pcb->pcb_flags |= PCB_KERNNPX;
 	pcb->pcb_flags &= ~PCB_NPXINITDONE;
 	return (0);
 }
 
 int
 fpu_kern_leave(struct thread *td, struct fpu_kern_ctx *ctx)
 {
 	struct pcb *pcb;
 
 	pcb = td->td_pcb;
 	critical_enter();
 	if (curthread == PCPU_GET(fpcurthread))
 		npxdrop();
 	critical_exit();
 	pcb->pcb_save = ctx->prev;
 	if (pcb->pcb_save == &pcb->pcb_user_save) {
 		if ((pcb->pcb_flags & PCB_NPXUSERINITDONE) != 0)
 			pcb->pcb_flags |= PCB_NPXINITDONE;
 		else
 			pcb->pcb_flags &= ~PCB_NPXINITDONE;
 		pcb->pcb_flags &= ~PCB_KERNNPX;
 	} else {
 		if ((ctx->flags & FPU_KERN_CTX_NPXINITDONE) != 0)
 			pcb->pcb_flags |= PCB_NPXINITDONE;
 		else
 			pcb->pcb_flags &= ~PCB_NPXINITDONE;
 		KASSERT(!PCB_USER_FPU(pcb), ("unpaired fpu_kern_leave"));
 	}
 	return (0);
 }
 
 int
 fpu_kern_thread(u_int flags)
 {
 	struct pcb *pcb;
 
 	pcb = curpcb;
 	KASSERT((curthread->td_pflags & TDP_KTHREAD) != 0,
 	    ("Only kthread may use fpu_kern_thread"));
 	KASSERT(curpcb->pcb_save == &curpcb->pcb_user_save,
 	    ("mangled pcb_save"));
 	KASSERT(PCB_USER_FPU(curpcb), ("recursive call"));
 
 	curpcb->pcb_flags |= PCB_KERNNPX;
 	return (0);
 }
 
 int
 is_fpu_kern_thread(u_int flags)
 {
 
 	if ((curthread->td_pflags & TDP_KTHREAD) == 0)
 		return (0);
 	return ((curpcb->pcb_flags & PCB_KERNNPX) != 0);
 }
diff --git a/sys/i386/xen/clock.c b/sys/i386/xen/clock.c
index a10b5462b7a1..524fa149207f 100644
--- a/sys/i386/xen/clock.c
+++ b/sys/i386/xen/clock.c
@@ -1,905 +1,917 @@
 /*-
  * Copyright (c) 1990 The Regents of the University of California.
  * All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * William Jolitz and Don Ahn.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	from: @(#)clock.c	7.2 (Berkeley) 5/12/91
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 /* #define DELAYDEBUG */
 /*
  * Routines to handle clock hardware.
  */
 
 #include "opt_ddb.h"
 #include "opt_clock.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/bus.h>
 #include <sys/clock.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/time.h>
 #include <sys/timeet.h>
 #include <sys/timetc.h>
 #include <sys/kernel.h>
 #include <sys/limits.h>
 #include <sys/sysctl.h>
 #include <sys/cons.h>
 #include <sys/power.h>
 
 #include <machine/clock.h>
 #include <machine/cputypes.h>
 #include <machine/frame.h>
 #include <machine/intr_machdep.h>
 #include <machine/md_var.h>
 #include <machine/psl.h>
 #if defined(SMP)
 #include <machine/smp.h>
 #endif
 #include <machine/specialreg.h>
 #include <machine/timerreg.h>
 
 #include <x86/isa/icu.h>
 #include <x86/isa/isa.h>
 #include <isa/rtc.h>
 
 #include <xen/xen_intr.h>
 #include <vm/vm.h>
 #include <vm/pmap.h>
 #include <machine/pmap.h>
 #include <xen/hypervisor.h>
-#include <machine/xen/xen-os.h>
+#include <xen/xen-os.h>
 #include <machine/xen/xenfunc.h>
 #include <xen/interface/vcpu.h>
 #include <machine/cpu.h>
 #include <machine/xen/xen_clock_util.h>
 
 /*
  * 32-bit time_t's can't reach leap years before 1904 or after 2036, so we
  * can use a simple formula for leap years.
  */
 #define	LEAPYEAR(y)	(!((y) % 4))
 #define	DAYSPERYEAR	(28+30*4+31*7)
 
 #ifndef TIMER_FREQ
 #define	TIMER_FREQ	1193182
 #endif
 
 #ifdef CYC2NS_SCALE_FACTOR
 #undef	CYC2NS_SCALE_FACTOR
 #endif
 #define CYC2NS_SCALE_FACTOR	10
 
 /* Values for timerX_state: */
 #define	RELEASED	0
 #define	RELEASE_PENDING	1
 #define	ACQUIRED	2
 #define	ACQUIRE_PENDING	3
 
 struct mtx clock_lock;
 #define	RTC_LOCK_INIT							\
 	mtx_init(&clock_lock, "clk", NULL, MTX_SPIN | MTX_NOPROFILE)
 #define	RTC_LOCK	mtx_lock_spin(&clock_lock)
 #define	RTC_UNLOCK	mtx_unlock_spin(&clock_lock)
 
 int adjkerntz;		/* local offset from GMT in seconds */
 int clkintr_pending;
 int pscnt = 1;
 int psdiv = 1;
 int wall_cmos_clock;
 u_int timer_freq = TIMER_FREQ;
 static int independent_wallclock;
 static int xen_disable_rtc_set;
 static u_long cyc2ns_scale; 
 static struct timespec shadow_tv;
 static uint32_t shadow_tv_version;	/* XXX: lazy locking */
 static uint64_t processed_system_time;	/* stime (ns) at last processing. */
 
 static	const u_char daysinmonth[] = {31,28,31,30,31,30,31,31,30,31,30,31};
 
+int ap_cpu_initclocks(int cpu);
+
 SYSCTL_INT(_machdep, OID_AUTO, independent_wallclock,
     CTLFLAG_RW, &independent_wallclock, 0, "");
 SYSCTL_INT(_machdep, OID_AUTO, xen_disable_rtc_set,
     CTLFLAG_RW, &xen_disable_rtc_set, 1, "");
 
 
 #define do_div(n,base) ({ \
         unsigned long __upper, __low, __high, __mod, __base; \
         __base = (base); \
         __asm("":"=a" (__low), "=d" (__high):"A" (n)); \
         __upper = __high; \
         if (__high) { \
                 __upper = __high % (__base); \
                 __high = __high / (__base); \
         } \
         __asm("divl %2":"=a" (__low), "=d" (__mod):"rm" (__base), "0" (__low), "1" (__upper)); \
         __asm("":"=A" (n):"a" (__low),"d" (__high)); \
         __mod; \
 })
 
 
 #define NS_PER_TICK (1000000000ULL/hz)
 
 #define rdtscll(val) \
     __asm__ __volatile__("rdtsc" : "=A" (val))
 
 
 /* convert from cycles(64bits) => nanoseconds (64bits)
  *  basic equation:
  *		ns = cycles / (freq / ns_per_sec)
  *		ns = cycles * (ns_per_sec / freq)
  *		ns = cycles * (10^9 / (cpu_mhz * 10^6))
  *		ns = cycles * (10^3 / cpu_mhz)
  *
  *	Then we use scaling math (suggested by george@mvista.com) to get:
  *		ns = cycles * (10^3 * SC / cpu_mhz) / SC
  *		ns = cycles * cyc2ns_scale / SC
  *
  *	And since SC is a constant power of two, we can convert the div
  *  into a shift.   
  *			-johnstul@us.ibm.com "math is hard, lets go shopping!"
  */
 static inline void set_cyc2ns_scale(unsigned long cpu_mhz)
 {
 	cyc2ns_scale = (1000 << CYC2NS_SCALE_FACTOR)/cpu_mhz;
 }
 
 static inline unsigned long long cycles_2_ns(unsigned long long cyc)
 {
 	return (cyc * cyc2ns_scale) >> CYC2NS_SCALE_FACTOR;
 }
 
 /*
  * Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction,
  * yielding a 64-bit result.
  */
 static inline uint64_t 
 scale_delta(uint64_t delta, uint32_t mul_frac, int shift)
 {
 	uint64_t product;
 	uint32_t tmp1, tmp2;
 
 	if ( shift < 0 )
 		delta >>= -shift;
 	else
 		delta <<= shift;
 
 	__asm__ (
 		"mul  %5       ; "
 		"mov  %4,%%eax ; "
 		"mov  %%edx,%4 ; "
 		"mul  %5       ; "
 		"xor  %5,%5    ; "
 		"add  %4,%%eax ; "
 		"adc  %5,%%edx ; "
 		: "=A" (product), "=r" (tmp1), "=r" (tmp2)
 		: "a" ((uint32_t)delta), "1" ((uint32_t)(delta >> 32)), "2" (mul_frac) );
 
 	return product;
 }
 
 static uint64_t
 get_nsec_offset(struct shadow_time_info *shadow)
 {
 	uint64_t now, delta;
 	rdtscll(now);
 	delta = now - shadow->tsc_timestamp;
 	return scale_delta(delta, shadow->tsc_to_nsec_mul, shadow->tsc_shift);
 }
 
 static void update_wallclock(void)
 {
 	shared_info_t *s = HYPERVISOR_shared_info;
 
 	do {
 		shadow_tv_version = s->wc_version;
 		rmb();
 		shadow_tv.tv_sec  = s->wc_sec;
 		shadow_tv.tv_nsec = s->wc_nsec;
 		rmb();
 	}
 	while ((s->wc_version & 1) | (shadow_tv_version ^ s->wc_version));
 
 }
 
 static void
 add_uptime_to_wallclock(void)
 {
 	struct timespec ut;
 
 	xen_fetch_uptime(&ut);
 	timespecadd(&shadow_tv, &ut);
 }
 
 /*
  * Reads a consistent set of time-base values from Xen, into a shadow data
  * area. Must be called with the xtime_lock held for writing.
  */
 static void __get_time_values_from_xen(void)
 {
 	shared_info_t           *s = HYPERVISOR_shared_info;
 	struct vcpu_time_info   *src;
 	struct shadow_time_info *dst;
 	uint32_t pre_version, post_version;
+	struct pcpu *pc;
 
+	pc = pcpu_find(smp_processor_id());
 	src = &s->vcpu_info[smp_processor_id()].time;
-	dst = &per_cpu(shadow_time, smp_processor_id());
+	dst = &pc->pc_shadow_time;
 
 	spinlock_enter();
 	do {
 	        pre_version = dst->version = src->version;
 		rmb();
 		dst->tsc_timestamp     = src->tsc_timestamp;
 		dst->system_timestamp  = src->system_time;
 		dst->tsc_to_nsec_mul   = src->tsc_to_system_mul;
 		dst->tsc_shift         = src->tsc_shift;
 		rmb();
 		post_version = src->version;
 	}
 	while ((pre_version & 1) | (pre_version ^ post_version));
 
 	dst->tsc_to_usec_mul = dst->tsc_to_nsec_mul / 1000;
 	spinlock_exit();
 }
 
 
 static inline int time_values_up_to_date(int cpu)
 {
 	struct vcpu_time_info   *src;
 	struct shadow_time_info *dst;
+	struct pcpu *pc;
 
 	src = &HYPERVISOR_shared_info->vcpu_info[cpu].time; 
-	dst = &per_cpu(shadow_time, cpu); 
+	pc = pcpu_find(cpu);
+	dst = &pc->pc_shadow_time;
 
 	rmb();
 	return (dst->version == src->version);
 }
 
 static	unsigned xen_get_timecount(struct timecounter *tc);
 
 static struct timecounter xen_timecounter = {
 	xen_get_timecount,	/* get_timecount */
 	0,			/* no poll_pps */
 	~0u,			/* counter_mask */
 	0,			/* frequency */
 	"ixen",			/* name */
 	0			/* quality */
 };
 
 static struct eventtimer xen_et;
 
 struct xen_et_state {
 	int		mode;
 #define	MODE_STOP	0
 #define	MODE_PERIODIC	1
 #define	MODE_ONESHOT	2
 	int64_t		period;
 	int64_t		next;
 };
 
 static DPCPU_DEFINE(struct xen_et_state, et_state);
 
 static int
 clkintr(void *arg)
 {
 	int64_t now;
 	int cpu = smp_processor_id();
-	struct shadow_time_info *shadow = &per_cpu(shadow_time, cpu);
+	struct pcpu *pc = pcpu_find(cpu);
+	struct shadow_time_info *shadow = &pc->pc_shadow_time;
 	struct xen_et_state *state = DPCPU_PTR(et_state);
 
 	do {
 		__get_time_values_from_xen();
 		now = shadow->system_timestamp + get_nsec_offset(shadow);
 	} while (!time_values_up_to_date(cpu));
 
 	/* Process elapsed ticks since last call. */
 	processed_system_time = now;
 	if (state->mode == MODE_PERIODIC) {
 		while (now >= state->next) {
 		        state->next += state->period;
 			if (xen_et.et_active)
 				xen_et.et_event_cb(&xen_et, xen_et.et_arg);
 		}
 		HYPERVISOR_set_timer_op(state->next + 50000);
 	} else if (state->mode == MODE_ONESHOT) {
 		if (xen_et.et_active)
 			xen_et.et_event_cb(&xen_et, xen_et.et_arg);
 	}
 	/*
 	 * Take synchronised time from Xen once a minute if we're not
 	 * synchronised ourselves, and we haven't chosen to keep an independent
 	 * time base.
 	 */
 	
 	if (shadow_tv_version != HYPERVISOR_shared_info->wc_version &&
 	    !independent_wallclock) {
 		printf("[XEN] hypervisor wallclock nudged; nudging TOD.\n");
 		update_wallclock();
 		add_uptime_to_wallclock();
 		tc_setclock(&shadow_tv);
 	}
 	
 	/* XXX TODO */
 	return (FILTER_HANDLED);
 }
 static uint32_t
 getit(void)
 {
 	struct shadow_time_info *shadow;
 	uint64_t time;
 	uint32_t local_time_version;
+	struct pcpu *pc;
 
-	shadow = &per_cpu(shadow_time, smp_processor_id());
+	pc = pcpu_find(smp_processor_id());
+	shadow = &pc->pc_shadow_time;
 
 	do {
 	  local_time_version = shadow->version;
 	  barrier();
 	  time = shadow->system_timestamp + get_nsec_offset(shadow);
 	  if (!time_values_up_to_date(smp_processor_id()))
 	    __get_time_values_from_xen(/*cpu */);
 	  barrier();
 	} while (local_time_version != shadow->version);
 
 	  return (time);
 }
 
 
 /*
  * XXX: timer needs more SMP work.
  */
 void
 i8254_init(void)
 {
 
 	RTC_LOCK_INIT;
 }
 
 /*
  * Wait "n" microseconds.
  * Relies on timer 1 counting down from (timer_freq / hz)
  * Note: timer had better have been programmed before this is first used!
  */
 void
 DELAY(int n)
 {
 	int delta, ticks_left;
 	uint32_t tick, prev_tick;
 #ifdef DELAYDEBUG
 	int getit_calls = 1;
 	int n1;
 	static int state = 0;
 
 	if (state == 0) {
 		state = 1;
 		for (n1 = 1; n1 <= 10000000; n1 *= 10)
 			DELAY(n1);
 		state = 2;
 	}
 	if (state == 1)
 		printf("DELAY(%d)...", n);
 #endif
 	/*
 	 * Read the counter first, so that the rest of the setup overhead is
 	 * counted.  Guess the initial overhead is 20 usec (on most systems it
 	 * takes about 1.5 usec for each of the i/o's in getit().  The loop
 	 * takes about 6 usec on a 486/33 and 13 usec on a 386/20.  The
 	 * multiplications and divisions to scale the count take a while).
 	 *
 	 * However, if ddb is active then use a fake counter since reading
 	 * the i8254 counter involves acquiring a lock.  ddb must not go
 	 * locking for many reasons, but it calls here for at least atkbd
 	 * input.
 	 */
 	prev_tick = getit();
 
 	n -= 0;			/* XXX actually guess no initial overhead */
 	/*
 	 * Calculate (n * (timer_freq / 1e6)) without using floating point
 	 * and without any avoidable overflows.
 	 */
 	if (n <= 0)
 		ticks_left = 0;
 	else if (n < 256)
 		/*
 		 * Use fixed point to avoid a slow division by 1000000.
 		 * 39099 = 1193182 * 2^15 / 10^6 rounded to nearest.
 		 * 2^15 is the first power of 2 that gives exact results
 		 * for n between 0 and 256.
 		 */
 		ticks_left = ((u_int)n * 39099 + (1 << 15) - 1) >> 15;
 	else
 		/*
 		 * Don't bother using fixed point, although gcc-2.7.2
 		 * generates particularly poor code for the long long
 		 * division, since even the slow way will complete long
 		 * before the delay is up (unless we're interrupted).
 		 */
 		ticks_left = ((u_int)n * (long long)timer_freq + 999999)
 			/ 1000000;
 
 	while (ticks_left > 0) {
 		tick = getit();
 #ifdef DELAYDEBUG
 		++getit_calls;
 #endif
 		delta = tick - prev_tick;
 		prev_tick = tick;
 		if (delta < 0) {
 			/*
 			 * Guard against timer0_max_count being wrong.
 			 * This shouldn't happen in normal operation,
 			 * but it may happen if set_timer_freq() is
 			 * traced.
 			 */
 			/* delta += timer0_max_count; ??? */
 			if (delta < 0)
 				delta = 0;
 		}
 		ticks_left -= delta;
 	}
 #ifdef DELAYDEBUG
 	if (state == 1)
 		printf(" %d calls to getit() at %d usec each\n",
 		       getit_calls, (n + 5) / getit_calls);
 #endif
 }
 
 
 /*
  * Restore all the timers non-atomically (XXX: should be atomically).
  *
  * This function is called from pmtimer_resume() to restore all the timers.
  * This should not be necessary, but there are broken laptops that do not
  * restore all the timers on resume.
  */
 void
 timer_restore(void)
 {
 	struct xen_et_state *state = DPCPU_PTR(et_state);
+	struct pcpu *pc;
 
 	/* Get timebases for new environment. */ 
 	__get_time_values_from_xen();
 
 	/* Reset our own concept of passage of system time. */
-	processed_system_time = per_cpu(shadow_time, 0).system_timestamp;
+	pc = pcpu_find(0);
+	processed_system_time = pc->pc_shadow_time.system_timestamp;
 	state->next = processed_system_time;
 }
 
 void
 startrtclock()
 {
 	unsigned long long alarm;
 	uint64_t __cpu_khz;
 	uint32_t cpu_khz;
 	struct vcpu_time_info *info;
+	struct pcpu *pc;
+
+	pc = pcpu_find(0);
 
 	/* initialize xen values */
 	__get_time_values_from_xen();
-	processed_system_time = per_cpu(shadow_time, 0).system_timestamp;
+	processed_system_time = pc->pc_shadow_time.system_timestamp;
 
 	__cpu_khz = 1000000ULL << 32;
 	info = &HYPERVISOR_shared_info->vcpu_info[0].time;
 
 	(void)do_div(__cpu_khz, info->tsc_to_system_mul);
 	if ( info->tsc_shift < 0 )
 		cpu_khz = __cpu_khz << -info->tsc_shift;
 	else
 		cpu_khz = __cpu_khz >> info->tsc_shift;
 
 	printf("Xen reported: %u.%03u MHz processor.\n", 
 	       cpu_khz / 1000, cpu_khz % 1000);
 
 	/* (10^6 * 2^32) / cpu_hz = (10^3 * 2^32) / cpu_khz =
 	   (2^32 * 1 / (clocks/us)) */
 
 	set_cyc2ns_scale(cpu_khz/1000);
 	tsc_freq = cpu_khz * 1000;
 
         timer_freq = 1000000000LL;
 	xen_timecounter.tc_frequency = timer_freq >> 9;
         tc_init(&xen_timecounter);
 
 	rdtscll(alarm);
 }
 
 /*
  * RTC support routines
  */
 
 
 static __inline int
 readrtc(int port)
 {
 	return(bcd2bin(rtcin(port)));
 }
 
 
 #ifdef XEN_PRIVILEGED_GUEST
 
 /*
  * Initialize the time of day register, based on the time base which is, e.g.
  * from a filesystem.
  */
 static void
 domu_inittodr(time_t base)
 {
 	unsigned long   sec;
 	int		s, y;
 	struct timespec ts;
 
 	update_wallclock();
 	add_uptime_to_wallclock();
 	
 	RTC_LOCK;
 	
 	if (base) {
 		ts.tv_sec = base;
 		ts.tv_nsec = 0;
 		tc_setclock(&ts);
 	}
 
 	sec += tz_minuteswest * 60 + (wall_cmos_clock ? adjkerntz : 0);
 
 	y = time_second - shadow_tv.tv_sec;
 	if (y <= -2 || y >= 2) {
 		/* badly off, adjust it */
 		tc_setclock(&shadow_tv);
 	}
 	RTC_UNLOCK;
 }
 
 /*
  * Write system time back to RTC.  
  */
 static void
 domu_resettodr(void)
 {
 	unsigned long tm;
 	int s;
 	dom0_op_t op;
 	struct shadow_time_info *shadow;
+	struct pcpu *pc;
 
-	shadow = &per_cpu(shadow_time, smp_processor_id());
+	pc = pcpu_find(smp_processor_id());
+	shadow = &pc->pc_shadow_time;
 	if (xen_disable_rtc_set)
 		return;
 	
 	s = splclock();
 	tm = time_second;
 	splx(s);
 	
 	tm -= tz_minuteswest * 60 + (wall_cmos_clock ? adjkerntz : 0);
 	
 	if ((xen_start_info->flags & SIF_INITDOMAIN) &&
 	    !independent_wallclock)
 	{
 		op.cmd = DOM0_SETTIME;
 		op.u.settime.secs        = tm;
 		op.u.settime.nsecs       = 0;
 		op.u.settime.system_time = shadow->system_timestamp;
 		HYPERVISOR_dom0_op(&op);
 		update_wallclock();
 		add_uptime_to_wallclock();
 	} else if (independent_wallclock) {
 		/* notyet */
 		;
 	}		
 }
 
 /*
  * Initialize the time of day register, based on the time base which is, e.g.
  * from a filesystem.
  */
 void
 inittodr(time_t base)
 {
 	unsigned long	sec, days;
 	int		year, month;
 	int		y, m, s;
 	struct timespec ts;
 
 	if (!(xen_start_info->flags & SIF_INITDOMAIN)) {
 	        domu_inittodr(base);
 		return;
 	}
 
 	if (base) {
 		s = splclock();
 		ts.tv_sec = base;
 		ts.tv_nsec = 0;
 		tc_setclock(&ts);
 		splx(s);
 	}
 
 	/* Look if we have a RTC present and the time is valid */
 	if (!(rtcin(RTC_STATUSD) & RTCSD_PWR))
 		goto wrong_time;
 
 	/* wait for time update to complete */
 	/* If RTCSA_TUP is zero, we have at least 244us before next update */
 	s = splhigh();
 	while (rtcin(RTC_STATUSA) & RTCSA_TUP) {
 		splx(s);
 		s = splhigh();
 	}
 
 	days = 0;
 #ifdef USE_RTC_CENTURY
 	year = readrtc(RTC_YEAR) + readrtc(RTC_CENTURY) * 100;
 #else
 	year = readrtc(RTC_YEAR) + 1900;
 	if (year < 1970)
 		year += 100;
 #endif
 	if (year < 1970) {
 		splx(s);
 		goto wrong_time;
 	}
 	month = readrtc(RTC_MONTH);
 	for (m = 1; m < month; m++)
 		days += daysinmonth[m-1];
 	if ((month > 2) && LEAPYEAR(year))
 		days ++;
 	days += readrtc(RTC_DAY) - 1;
 	for (y = 1970; y < year; y++)
 		days += DAYSPERYEAR + LEAPYEAR(y);
 	sec = ((( days * 24 +
 		  readrtc(RTC_HRS)) * 60 +
 		readrtc(RTC_MIN)) * 60 +
 	       readrtc(RTC_SEC));
 	/* sec now contains the number of seconds, since Jan 1 1970,
 	   in the local time zone */
 
 	sec += tz_minuteswest * 60 + (wall_cmos_clock ? adjkerntz : 0);
 
 	y = time_second - sec;
 	if (y <= -2 || y >= 2) {
 		/* badly off, adjust it */
 		ts.tv_sec = sec;
 		ts.tv_nsec = 0;
 		tc_setclock(&ts);
 	}
 	splx(s);
 	return;
 
  wrong_time:
 	printf("Invalid time in real time clock.\n");
 	printf("Check and reset the date immediately!\n");
 }
 
 
 /*
  * Write system time back to RTC
  */
 void
 resettodr()
 {
 	unsigned long	tm;
 	int		y, m, s;
 
 	if (!(xen_start_info->flags & SIF_INITDOMAIN)) {
 	        domu_resettodr();
 		return;
 	}
 	       
 	if (xen_disable_rtc_set)
 		return;
 
 	s = splclock();
 	tm = time_second;
 	splx(s);
 
 	/* Disable RTC updates and interrupts. */
 	writertc(RTC_STATUSB, RTCSB_HALT | RTCSB_24HR);
 
 	/* Calculate local time to put in RTC */
 
 	tm -= tz_minuteswest * 60 + (wall_cmos_clock ? adjkerntz : 0);
 
 	writertc(RTC_SEC, bin2bcd(tm%60)); tm /= 60;	/* Write back Seconds */
 	writertc(RTC_MIN, bin2bcd(tm%60)); tm /= 60;	/* Write back Minutes */
 	writertc(RTC_HRS, bin2bcd(tm%24)); tm /= 24;	/* Write back Hours   */
 
 	/* We have now the days since 01-01-1970 in tm */
 	writertc(RTC_WDAY, (tm + 4) % 7 + 1);		/* Write back Weekday */
 	for (y = 1970, m = DAYSPERYEAR + LEAPYEAR(y);
 	     tm >= m;
 	     y++,      m = DAYSPERYEAR + LEAPYEAR(y))
 		tm -= m;
 
 	/* Now we have the years in y and the day-of-the-year in tm */
 	writertc(RTC_YEAR, bin2bcd(y%100));		/* Write back Year    */
 #ifdef USE_RTC_CENTURY
 	writertc(RTC_CENTURY, bin2bcd(y/100));		/* ... and Century    */
 #endif
 	for (m = 0; ; m++) {
 		int ml;
 
 		ml = daysinmonth[m];
 		if (m == 1 && LEAPYEAR(y))
 			ml++;
 		if (tm < ml)
 			break;
 		tm -= ml;
 	}
 
 	writertc(RTC_MONTH, bin2bcd(m + 1));            /* Write back Month   */
 	writertc(RTC_DAY, bin2bcd(tm + 1));             /* Write back Month Day */
 
 	/* Reenable RTC updates and interrupts. */
 	writertc(RTC_STATUSB, RTCSB_24HR);
 	rtcin(RTC_INTR);
 }
 #endif
 
 static int
 xen_et_start(struct eventtimer *et, sbintime_t first, sbintime_t period)
 {
 	struct xen_et_state *state = DPCPU_PTR(et_state);
 	struct shadow_time_info *shadow;
 	int64_t fperiod;
+	struct pcpu *pc;
 
 	__get_time_values_from_xen();
 
 	if (period != 0) {
 		state->mode = MODE_PERIODIC;
 		state->period = (1000000000LLU * period) >> 32;
 	} else {
 		state->mode = MODE_ONESHOT;
 		state->period = 0;
 	}
 	if (first != 0)
 		fperiod = (1000000000LLU * first) >> 32;
 	else
 		fperiod = state->period;
 
-	shadow = &per_cpu(shadow_time, smp_processor_id());
+	pc = pcpu_find(smp_processor_id());
+	shadow = &pc->pc_shadow_time;
 	state->next = shadow->system_timestamp + get_nsec_offset(shadow);
 	state->next += fperiod;
 	HYPERVISOR_set_timer_op(state->next + 50000);
 	return (0);
 }
 
 static int
 xen_et_stop(struct eventtimer *et)
 {
 	struct xen_et_state *state = DPCPU_PTR(et_state);
 
 	state->mode = MODE_STOP;
 	HYPERVISOR_set_timer_op(0);
 	return (0);
 }
 
 /*
  * Start clocks running.
  */
 void
 cpu_initclocks(void)
 {
-	unsigned int time_irq;
+	xen_intr_handle_t time_irq;
 	int error;
 
 	HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, 0, NULL);
-	error = bind_virq_to_irqhandler(VIRQ_TIMER, 0, "cpu0:timer",
+	error = xen_intr_bind_virq(root_bus, VIRQ_TIMER, 0,
 	    clkintr, NULL, NULL, INTR_TYPE_CLK, &time_irq);
 	if (error)
 		panic("failed to register clock interrupt\n");
 	/* should fast clock be enabled ? */
 
 	bzero(&xen_et, sizeof(xen_et));
 	xen_et.et_name = "ixen";
 	xen_et.et_flags = ET_FLAGS_PERIODIC | ET_FLAGS_ONESHOT |
 	    ET_FLAGS_PERCPU;
 	xen_et.et_quality = 600;
 	xen_et.et_frequency = 1000000000;
 	xen_et.et_min_period = 0x00400000LL;
 	xen_et.et_max_period = (0xfffffffeLLU << 32) / xen_et.et_frequency;
 	xen_et.et_start = xen_et_start;
 	xen_et.et_stop = xen_et_stop;
 	xen_et.et_priv = NULL;
 	et_register(&xen_et);
 
 	cpu_initclocks_bsp();
 }
 
 int
 ap_cpu_initclocks(int cpu)
 {
-	char buf[MAXCOMLEN + 1];
-	unsigned int time_irq;
+	xen_intr_handle_t time_irq;
 	int error;
 
 	HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, cpu, NULL);
-	snprintf(buf, sizeof(buf), "cpu%d:timer", cpu);
-	error = bind_virq_to_irqhandler(VIRQ_TIMER, cpu, buf,
+	error = xen_intr_bind_virq(root_bus, VIRQ_TIMER, cpu,
 	    clkintr, NULL, NULL, INTR_TYPE_CLK, &time_irq);
 	if (error)
 		panic("failed to register clock interrupt\n");
 
 	return (0);
 }
 
 static uint32_t
 xen_get_timecount(struct timecounter *tc)
 {	
 	uint64_t clk;
 	struct shadow_time_info *shadow;
-	shadow = &per_cpu(shadow_time, smp_processor_id());
+	struct pcpu *pc;
+
+	pc = pcpu_find(smp_processor_id());
+	shadow = &pc->pc_shadow_time;
 
 	__get_time_values_from_xen();
 	
         clk = shadow->system_timestamp + get_nsec_offset(shadow);
 
 	return (uint32_t)(clk >> 9);
 
 }
 
 /* Return system time offset by ticks */
 uint64_t
 get_system_time(int ticks)
 {
     return processed_system_time + (ticks * NS_PER_TICK);
 }
 
-void
-idle_block(void)
-{
-
-	HYPERVISOR_sched_op(SCHEDOP_block, 0);
-}
-
 int
 timer_spkr_acquire(void)
 {
 
 	return (0);
 }
 
 int
 timer_spkr_release(void)
 {
 
 	return (0);
 }
 
 void
 timer_spkr_setfreq(int freq)
 {
 
 }
 
diff --git a/sys/i386/xen/exception.s b/sys/i386/xen/exception.s
index e965ffd026d3..95f1c0e6703f 100644
--- a/sys/i386/xen/exception.s
+++ b/sys/i386/xen/exception.s
@@ -1,494 +1,494 @@
 /*-
  * Copyright (c) 1989, 1990 William F. Jolitz.
  * Copyright (c) 1990 The Regents of the University of California.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #include "opt_apic.h"
 #include "opt_npx.h"
 
 #include <machine/asmacros.h>
 #include <machine/psl.h>
 #include <machine/trap.h>
 
 #include "assym.s"
 
 #define	SEL_RPL_MASK	0x0002
 #define __HYPERVISOR_iret	23
 	
 /* Offsets into shared_info_t. */
 
 #define evtchn_upcall_pending /* 0 */
 #define evtchn_upcall_mask       1
 
 #define	sizeof_vcpu_shift	6
 
 		
 #ifdef SMP
 #define GET_VCPU_INFO(reg)	movl PCPU(CPUID),reg			; \
 				shl  $sizeof_vcpu_shift,reg		; \
 				addl HYPERVISOR_shared_info,reg
 #else
 #define GET_VCPU_INFO(reg)	movl HYPERVISOR_shared_info,reg
 #endif
 
 #define __DISABLE_INTERRUPTS(reg)	movb $1,evtchn_upcall_mask(reg)
 #define __ENABLE_INTERRUPTS(reg)	movb $0,evtchn_upcall_mask(reg)
 #define DISABLE_INTERRUPTS(reg)	GET_VCPU_INFO(reg)			; \
 				__DISABLE_INTERRUPTS(reg)
 #define ENABLE_INTERRUPTS(reg)	GET_VCPU_INFO(reg)			; \
 				__ENABLE_INTERRUPTS(reg)
 #define __TEST_PENDING(reg)	testb $0xFF,evtchn_upcall_pending(reg)
 
 #define POPA \
         popl %edi; \
         popl %esi; \
         popl %ebp; \
         popl %ebx; \
         popl %ebx; \
         popl %edx; \
         popl %ecx; \
         popl %eax;
 
 	.text
 
 /*****************************************************************************/
 /* Trap handling                                                             */
 /*****************************************************************************/
 /*
  * Trap and fault vector routines.
  *
  * Most traps are 'trap gates', SDT_SYS386TGT.  A trap gate pushes state on
  * the stack that mostly looks like an interrupt, but does not disable 
  * interrupts.  A few of the traps we are use are interrupt gates, 
  * SDT_SYS386IGT, which are nearly the same thing except interrupts are
  * disabled on entry.
  *
  * The cpu will push a certain amount of state onto the kernel stack for
  * the current process.  The amount of state depends on the type of trap 
  * and whether the trap crossed rings or not.  See i386/include/frame.h.  
  * At the very least the current EFLAGS (status register, which includes 
  * the interrupt disable state prior to the trap), the code segment register,
  * and the return instruction pointer are pushed by the cpu.  The cpu 
  * will also push an 'error' code for certain traps.  We push a dummy 
  * error code for those traps where the cpu doesn't in order to maintain 
  * a consistent frame.  We also push a contrived 'trap number'.
  *
  * The cpu does not push the general registers, we must do that, and we 
  * must restore them prior to calling 'iret'.  The cpu adjusts the %cs and
  * %ss segment registers, but does not mess with %ds, %es, or %fs.  Thus we
  * must load them with appropriate values for supervisor mode operation.
  */
 
 MCOUNT_LABEL(user)
 MCOUNT_LABEL(btrap)
 
 #define	TRAP(a)		pushl $(a) ; jmp alltraps
 
 IDTVEC(div)
 	pushl $0; TRAP(T_DIVIDE)
 IDTVEC(dbg)
 	pushl $0; TRAP(T_TRCTRAP)
 IDTVEC(nmi)
 	pushl $0; TRAP(T_NMI)
 IDTVEC(bpt)
 	pushl $0; TRAP(T_BPTFLT)
 IDTVEC(ofl)
 	pushl $0; TRAP(T_OFLOW)
 IDTVEC(bnd)
 	pushl $0; TRAP(T_BOUND)
 IDTVEC(ill)
 	pushl $0; TRAP(T_PRIVINFLT)
 IDTVEC(dna)
 	pushl $0; TRAP(T_DNA)
 IDTVEC(fpusegm)
 	pushl $0; TRAP(T_FPOPFLT)
 IDTVEC(tss)
 	TRAP(T_TSSFLT)
 IDTVEC(missing)
 	TRAP(T_SEGNPFLT)
 IDTVEC(stk)
 	TRAP(T_STKFLT)
 IDTVEC(prot)
 	TRAP(T_PROTFLT)
 IDTVEC(page)
 	TRAP(T_PAGEFLT)
 IDTVEC(mchk)
 	pushl $0; TRAP(T_MCHK)
 IDTVEC(rsvd)
 	pushl $0; TRAP(T_RESERVED)
 IDTVEC(fpu)
 	pushl $0; TRAP(T_ARITHTRAP)
 IDTVEC(align)
 	TRAP(T_ALIGNFLT)
 IDTVEC(xmm)
 	pushl $0; TRAP(T_XMMFLT)
 
 IDTVEC(hypervisor_callback)
 	pushl $0; 
 	pushl $0; 
 	pushal
 	pushl	%ds
 	pushl	%es
 	pushl	%fs
 upcall_with_regs_pushed:
 	SET_KERNEL_SREGS
 	FAKE_MCOUNT(TF_EIP(%esp))
 call_evtchn_upcall:
 	movl	TF_EIP(%esp),%eax
 	cmpl	$scrit,%eax
 	jb	10f
 	cmpl	$ecrit,%eax
 	jb	critical_region_fixup
 	
 10:	pushl	%esp
-	call	evtchn_do_upcall
+	call	xen_intr_handle_upcall
 	addl	$4,%esp
 
 	/*
 	 * Return via doreti to handle ASTs.
 	 */
 	MEXITCOUNT
 	jmp	doreti
 
 	
 hypervisor_callback_pending:
 	DISABLE_INTERRUPTS(%esi)				/*	cli */	
 	jmp	10b
 	/*
 	 * alltraps entry point.  Interrupts are enabled if this was a trap
 	 * gate (TGT), else disabled if this was an interrupt gate (IGT).
 	 * Note that int0x80_syscall is a trap gate.  Only page faults
 	 * use an interrupt gate.
 	 */
 	SUPERALIGN_TEXT
 	.globl	alltraps
 	.type	alltraps,@function
 alltraps:
 	pushal
 	pushl	%ds
 	pushl	%es
 	pushl	%fs
 
 alltraps_with_regs_pushed:
 	SET_KERNEL_SREGS
 	FAKE_MCOUNT(TF_EIP(%esp))
 
 calltrap:
 	push	%esp
 	call	trap
 	add	$4, %esp
 
 	/*
 	 * Return via doreti to handle ASTs.
 	 */
 	MEXITCOUNT
 	jmp	doreti
 
 /*
  * SYSCALL CALL GATE (old entry point for a.out binaries)
  *
  * The intersegment call has been set up to specify one dummy parameter.
  *
  * This leaves a place to put eflags so that the call frame can be
  * converted to a trap frame. Note that the eflags is (semi-)bogusly
  * pushed into (what will be) tf_err and then copied later into the
  * final spot. It has to be done this way because esp can't be just
  * temporarily altered for the pushfl - an interrupt might come in
  * and clobber the saved cs/eip.
  */
 	SUPERALIGN_TEXT
 IDTVEC(lcall_syscall)
 	pushfl				/* save eflags */
 	popl	8(%esp)			/* shuffle into tf_eflags */
 	pushl	$7			/* sizeof "lcall 7,0" */
 	subl	$4,%esp			/* skip over tf_trapno */
 	pushal
 	pushl	%ds
 	pushl	%es
 	pushl	%fs
 	SET_KERNEL_SREGS
 	FAKE_MCOUNT(TF_EIP(%esp))
 	pushl	%esp
 	call	syscall
 	add	$4, %esp
 	MEXITCOUNT
 	jmp	doreti
 
 /*
  * Call gate entry for FreeBSD ELF and Linux/NetBSD syscall (int 0x80)
  *
  * Even though the name says 'int0x80', this is actually a TGT (trap gate)
  * rather then an IGT (interrupt gate).  Thus interrupts are enabled on
  * entry just as they are for a normal syscall.
  */
 	SUPERALIGN_TEXT
 IDTVEC(int0x80_syscall)
 	pushl	$2			/* sizeof "int 0x80" */
 	pushl	$0xBEEF			/* for debug */
 	pushal
 	pushl	%ds
 	pushl	%es
 	pushl	%fs
 	SET_KERNEL_SREGS
 	FAKE_MCOUNT(TF_EIP(%esp))
 	pushl	%esp
 	call	syscall
 	add	$4, %esp
 	MEXITCOUNT
 	jmp	doreti
 
 ENTRY(fork_trampoline)
 	pushl	%esp			/* trapframe pointer */
 	pushl	%ebx			/* arg1 */
 	pushl	%esi			/* function */
 	call	fork_exit
 	addl	$12,%esp
 	/* cut from syscall */
 
 	/*
 	 * Return via doreti to handle ASTs.
 	 */
 	MEXITCOUNT
 	jmp	doreti
 
 
 /*
  * To efficiently implement classification of trap and interrupt handlers
  * for profiling, there must be only trap handlers between the labels btrap
  * and bintr, and only interrupt handlers between the labels bintr and
  * eintr.  This is implemented (partly) by including files that contain
  * some of the handlers.  Before including the files, set up a normal asm
  * environment so that the included files doen't need to know that they are
  * included.
  */
 
 	.data
 	.p2align 4
 	.text
 	SUPERALIGN_TEXT
 MCOUNT_LABEL(bintr)
 
 #ifdef DEV_APIC
 	.data
 	.p2align 4
 	.text
 	SUPERALIGN_TEXT
 
 #include <i386/i386/apic_vector.s>
 #endif
 
 	.data
 	.p2align 4
 	.text
 	SUPERALIGN_TEXT
 #include <i386/i386/vm86bios.s>
 
 	.text
 MCOUNT_LABEL(eintr)
 
 /*
  * void doreti(struct trapframe)
  *
  * Handle return from interrupts, traps and syscalls.
  */
 	.text
 	SUPERALIGN_TEXT
 	.type	doreti,@function
 doreti:
 	FAKE_MCOUNT($bintr)		/* init "from" bintr -> doreti */
 doreti_next:
 #ifdef notyet
 	/*
 	 * Check if ASTs can be handled now.  PSL_VM must be checked first
 	 * since segment registers only have an RPL in non-VM86 mode.
 	 */
 	testl	$PSL_VM,TF_EFLAGS(%esp)	/* are we in vm86 mode? */
 	jz	doreti_notvm86
 	movl	PCPU(CURPCB),%ecx
 	testl	$PCB_VM86CALL,PCB_FLAGS(%ecx)	/* are we in a vm86 call? */
 	jz	doreti_ast		/* can handle ASTS now if not */
   	jmp	doreti_exit
 
 doreti_notvm86:
 #endif
 	testb	$SEL_RPL_MASK,TF_CS(%esp) /* are we returning to user mode? */
 	jz	doreti_exit		/* can't handle ASTs now if not */
 
 doreti_ast:
 	/*
 	 * Check for ASTs atomically with returning.  Disabling CPU
 	 * interrupts provides sufficient locking even in the SMP case,
 	 * since we will be informed of any new ASTs by an IPI.
 	 */
 	DISABLE_INTERRUPTS(%esi)				/*	cli */
 	movl	PCPU(CURTHREAD),%eax
 	testl	$TDF_ASTPENDING | TDF_NEEDRESCHED,TD_FLAGS(%eax)
 	je	doreti_exit
 	ENABLE_INTERRUPTS(%esi)	/* sti */
 	pushl	%esp			/* pass a pointer to the trapframe */
 	call	ast
 	add	$4,%esp
 	jmp	doreti_ast
 
 	/*
 	 * doreti_exit:	pop registers, iret.
 	 *
 	 *	The segment register pop is a special case, since it may
 	 *	fault if (for example) a sigreturn specifies bad segment
 	 *	registers.  The fault is handled in trap.c.
 	 */
 doreti_exit:
 	ENABLE_INTERRUPTS(%esi) # reenable event callbacks (sti)
 
 	.globl	scrit
 scrit:
 	__TEST_PENDING(%esi)
         jnz	hypervisor_callback_pending	/* More to go  */
 
 	MEXITCOUNT
 
 	.globl	doreti_popl_fs
 doreti_popl_fs:
 	popl	%fs
 	.globl	doreti_popl_es
 doreti_popl_es:
 	popl	%es
 	.globl	doreti_popl_ds
 doreti_popl_ds:
 	popl	%ds
 
 	/*
 	 * This is important: as nothing is atomic over here (we can get
 	 * interrupted any time), we use the critical_region_fixup() in
 	 * order to figure out where out stack is. Therefore, do NOT use
 	 * 'popal' here without fixing up the table!
 	 */
 	POPA
 	addl	$8,%esp
 	.globl	doreti_iret
 doreti_iret:
 	jmp	hypercall_page + (__HYPERVISOR_iret * 32)
 	.globl	ecrit
 ecrit:
   	/*
 	 * doreti_iret_fault and friends.  Alternative return code for
 	 * the case where we get a fault in the doreti_exit code
 	 * above.  trap() (i386/i386/trap.c) catches this specific
 	 * case, sends the process a signal and continues in the
 	 * corresponding place in the code below.
 	 */
 	ALIGN_TEXT
 	.globl	doreti_iret_fault
 doreti_iret_fault:
 	subl	$8,%esp
 	pushal
 	pushl	%ds
 	.globl	doreti_popl_ds_fault
 doreti_popl_ds_fault:
 	pushl	%es
 	.globl	doreti_popl_es_fault
 doreti_popl_es_fault:
 	pushl	%fs
 	.globl	doreti_popl_fs_fault
 doreti_popl_fs_fault:
 	movl	$0,TF_ERR(%esp)	/* XXX should be the error code */
 	movl	$T_PROTFLT,TF_TRAPNO(%esp)
 	jmp	alltraps_with_regs_pushed
 
 	/*
 # [How we do the fixup]. We want to merge the current stack frame with the
 # just-interrupted frame. How we do this depends on where in the critical
 # region the interrupted handler was executing, and so how many saved
 # registers are in each frame. We do this quickly using the lookup table
 # 'critical_fixup_table'. For each byte offset in the critical region, it
 # provides the number of bytes which have already been popped from the
 # interrupted stack frame.
 */
 
 .globl critical_region_fixup
 critical_region_fixup:
 	addl $critical_fixup_table-scrit,%eax
 	movzbl (%eax),%eax    # %eax contains num bytes popped
         movl  %esp,%esi
         add  %eax,%esi        # %esi points at end of src region
         movl  %esp,%edi
         add  $0x40,%edi       # %edi points at end of dst region
         movl  %eax,%ecx
         shr  $2,%ecx          # convert bytes to words
         je   16f              # skip loop if nothing to copy
 15:     subl $4,%esi          # pre-decrementing copy loop
         subl $4,%edi
         movl (%esi),%eax
         movl %eax,(%edi)
         loop 15b
 16:     movl %edi,%esp        # final %edi is top of merged stack
 	jmp  hypervisor_callback_pending
 
 
 critical_fixup_table:        
 .byte   0x0,0x0,0x0			#testb  $0x1,(%esi)
 .byte   0x0,0x0,0x0,0x0,0x0,0x0		#jne    ea 
 .byte   0x0,0x0				#pop    %fs
 .byte   0x04				#pop    %es
 .byte   0x08				#pop    %ds
 .byte   0x0c				#pop    %edi
 .byte   0x10	                        #pop    %esi
 .byte   0x14	                        #pop    %ebp
 .byte   0x18	                        #pop    %ebx
 .byte   0x1c	                        #pop    %ebx
 .byte   0x20	                        #pop    %edx
 .byte   0x24	                        #pop    %ecx
 .byte   0x28	                        #pop    %eax
 .byte   0x2c,0x2c,0x2c                  #add    $0x8,%esp
 #if 0
 	.byte   0x34	                        #iret   
 #endif
 .byte   0x34,0x34,0x34,0x34,0x34        #HYPERVISOR_iret 
 	
 	
 /* # Hypervisor uses this for application faults while it executes.*/
 ENTRY(failsafe_callback)
 	pushal
 	call xen_failsafe_handler
 /*#	call install_safe_pf_handler */
         movl 28(%esp),%ebx
 1:      movl %ebx,%ds
         movl 32(%esp),%ebx
 2:      movl %ebx,%es
         movl 36(%esp),%ebx
 3:      movl %ebx,%fs
         movl 40(%esp),%ebx
 4:      movl %ebx,%gs
 /*#        call install_normal_pf_handler */
 	popal
 	addl $12,%esp
 	iret
 
 
diff --git a/sys/i386/xen/mp_machdep.c b/sys/i386/xen/mp_machdep.c
index 05531cbdca02..407731a7eada 100644
--- a/sys/i386/xen/mp_machdep.c
+++ b/sys/i386/xen/mp_machdep.c
@@ -1,1250 +1,1269 @@
 /*-
  * Copyright (c) 1996, by Steve Passe
  * Copyright (c) 2008, by Kip Macy
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. The name of the developer may NOT be used to endorse or promote products
  *    derived from this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_apic.h"
 #include "opt_cpu.h"
 #include "opt_kstack_pages.h"
 #include "opt_mp_watchdog.h"
 #include "opt_pmap.h"
 #include "opt_sched.h"
 #include "opt_smp.h"
 
 #if !defined(lint)
 #if !defined(SMP)
 #error How did you get here?
 #endif
 
 #ifndef DEV_APIC
 #error The apic device is required for SMP, add "device apic" to your config file.
 #endif
 #if defined(CPU_DISABLE_CMPXCHG) && !defined(COMPILING_LINT)
 #error SMP not supported with CPU_DISABLE_CMPXCHG
 #endif
 #endif /* not lint */
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/bus.h>
 #include <sys/cons.h>	/* cngetc() */
 #include <sys/cpuset.h>
 #ifdef GPROF 
 #include <sys/gmon.h>
 #endif
 #include <sys/kernel.h>
 #include <sys/ktr.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/memrange.h>
 #include <sys/mutex.h>
 #include <sys/pcpu.h>
 #include <sys/proc.h>
 #include <sys/rwlock.h>
 #include <sys/sched.h>
 #include <sys/smp.h>
 #include <sys/sysctl.h>
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/pmap.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_page.h>
 
 #include <x86/apicreg.h>
 #include <machine/md_var.h>
 #include <machine/mp_watchdog.h>
 #include <machine/pcb.h>
 #include <machine/psl.h>
 #include <machine/smp.h>
 #include <machine/specialreg.h>
 #include <machine/pcpu.h>
 
 
 
-#include <machine/xen/xen-os.h>
+#include <xen/xen-os.h>
 #include <xen/evtchn.h>
 #include <xen/xen_intr.h>
 #include <xen/hypervisor.h>
 #include <xen/interface/vcpu.h>
 
 
 int	mp_naps;		/* # of Applications processors */
 int	boot_cpu_id = -1;	/* designated BSP */
 
 extern	struct pcpu __pcpu[];
 
 static int bootAP;
 static union descriptor *bootAPgdt;
 
-static char resched_name[NR_CPUS][15];
-static char callfunc_name[NR_CPUS][15];
-
 /* Free these after use */
 void *bootstacks[MAXCPU];
 
 struct pcb stoppcbs[MAXCPU];
 
 /* Variables needed for SMP tlb shootdown. */
 vm_offset_t smp_tlb_addr1;
 vm_offset_t smp_tlb_addr2;
 volatile int smp_tlb_wait;
 
 typedef void call_data_func_t(uintptr_t , uintptr_t);
 
 static u_int logical_cpus;
 static volatile cpuset_t ipi_nmi_pending;
 
 /* used to hold the AP's until we are ready to release them */
 static struct mtx ap_boot_mtx;
 
 /* Set to 1 once we're ready to let the APs out of the pen. */
 static volatile int aps_ready = 0;
 
 /*
  * Store data from cpu_add() until later in the boot when we actually setup
  * the APs.
  */
 struct cpu_info {
 	int	cpu_present:1;
 	int	cpu_bsp:1;
 	int	cpu_disabled:1;
 } static cpu_info[MAX_APIC_ID + 1];
 int cpu_apic_ids[MAXCPU];
 int apic_cpuids[MAX_APIC_ID + 1];
 
 /* Holds pending bitmap based IPIs per CPU */
 static volatile u_int cpu_ipi_pending[MAXCPU];
 
 static int cpu_logical;
 static int cpu_cores;
 
 static void	assign_cpu_ids(void);
 static void	set_interrupt_apic_ids(void);
 int	start_all_aps(void);
 static int	start_ap(int apic_id);
 static void	release_aps(void *dummy);
 
 static u_int	hyperthreading_cpus;
 static cpuset_t	hyperthreading_cpus_mask;
 
 extern void Xhypervisor_callback(void);
 extern void failsafe_callback(void);
 extern void pmap_lazyfix_action(void);
+extern int  ap_cpu_initclocks(int cpu);
+
+DPCPU_DEFINE(xen_intr_handle_t, ipi_port[NR_IPIS]);
+DPCPU_DEFINE(struct vcpu_info *, vcpu_info);
 
 struct cpu_group *
 cpu_topo(void)
 {
 	if (cpu_cores == 0)
 		cpu_cores = 1;
 	if (cpu_logical == 0)
 		cpu_logical = 1;
 	if (mp_ncpus % (cpu_cores * cpu_logical) != 0) {
 		printf("WARNING: Non-uniform processors.\n");
 		printf("WARNING: Using suboptimal topology.\n");
 		return (smp_topo_none());
 	}
 	/*
 	 * No multi-core or hyper-threaded.
 	 */
 	if (cpu_logical * cpu_cores == 1)
 		return (smp_topo_none());
 	/*
 	 * Only HTT no multi-core.
 	 */
 	if (cpu_logical > 1 && cpu_cores == 1)
 		return (smp_topo_1level(CG_SHARE_L1, cpu_logical, CG_FLAG_HTT));
 	/*
 	 * Only multi-core no HTT.
 	 */
 	if (cpu_cores > 1 && cpu_logical == 1)
 		return (smp_topo_1level(CG_SHARE_NONE, cpu_cores, 0));
 	/*
 	 * Both HTT and multi-core.
 	 */
 	return (smp_topo_2level(CG_SHARE_NONE, cpu_cores,
 	    CG_SHARE_L1, cpu_logical, CG_FLAG_HTT));
 }
 
 /*
  * Calculate usable address in base memory for AP trampoline code.
  */
 u_int
 mp_bootaddress(u_int basemem)
 {
 
 	return (basemem);
 }
 
 void
 cpu_add(u_int apic_id, char boot_cpu)
 {
 
 	if (apic_id > MAX_APIC_ID) {
 		panic("SMP: APIC ID %d too high", apic_id);
 		return;
 	}
 	KASSERT(cpu_info[apic_id].cpu_present == 0, ("CPU %d added twice",
 	    apic_id));
 	cpu_info[apic_id].cpu_present = 1;
 	if (boot_cpu) {
 		KASSERT(boot_cpu_id == -1,
 		    ("CPU %d claims to be BSP, but CPU %d already is", apic_id,
 		    boot_cpu_id));
 		boot_cpu_id = apic_id;
 		cpu_info[apic_id].cpu_bsp = 1;
 	}
 	if (mp_ncpus < MAXCPU)
 		mp_ncpus++;
 	if (bootverbose)
 		printf("SMP: Added CPU %d (%s)\n", apic_id, boot_cpu ? "BSP" :
 		    "AP");
 }
 
 void
 cpu_mp_setmaxid(void)
 {
 
 	mp_maxid = MAXCPU - 1;
 }
 
 int
 cpu_mp_probe(void)
 {
 
 	/*
 	 * Always record BSP in CPU map so that the mbuf init code works
 	 * correctly.
 	 */
 	CPU_SETOF(0, &all_cpus);
 	if (mp_ncpus == 0) {
 		/*
 		 * No CPUs were found, so this must be a UP system.  Setup
 		 * the variables to represent a system with a single CPU
 		 * with an id of 0.
 		 */
 		mp_ncpus = 1;
 		return (0);
 	}
 
 	/* At least one CPU was found. */
 	if (mp_ncpus == 1) {
 		/*
 		 * One CPU was found, so this must be a UP system with
 		 * an I/O APIC.
 		 */
 		return (0);
 	}
 
 	/* At least two CPUs were found. */
 	return (1);
 }
 
 /*
  * Initialize the IPI handlers and start up the AP's.
  */
 void
 cpu_mp_start(void)
 {
 	int i;
 
 	/* Initialize the logical ID to APIC ID table. */
 	for (i = 0; i < MAXCPU; i++) {
 		cpu_apic_ids[i] = -1;
 		cpu_ipi_pending[i] = 0;
 	}
 
 	/* Set boot_cpu_id if needed. */
 	if (boot_cpu_id == -1) {
 		boot_cpu_id = PCPU_GET(apic_id);
 		cpu_info[boot_cpu_id].cpu_bsp = 1;
 	} else
 		KASSERT(boot_cpu_id == PCPU_GET(apic_id),
 		    ("BSP's APIC ID doesn't match boot_cpu_id"));
 	cpu_apic_ids[0] = boot_cpu_id;
 	apic_cpuids[boot_cpu_id] = 0;
 
 	assign_cpu_ids();
 
 	/* Start each Application Processor */
 	start_all_aps();
 
 	/* Setup the initial logical CPUs info. */
 	logical_cpus = 0;
 	CPU_ZERO(&logical_cpus_mask);
 	if (cpu_feature & CPUID_HTT)
 		logical_cpus = (cpu_procinfo & CPUID_HTT_CORES) >> 16;
 
 	set_interrupt_apic_ids();
 }
 
 
 static void
 iv_rendezvous(uintptr_t a, uintptr_t b)
 {
 	smp_rendezvous_action();
 }
 
 static void
 iv_invltlb(uintptr_t a, uintptr_t b)
 {
 	xen_tlb_flush();
 }
 
 static void
 iv_invlpg(uintptr_t a, uintptr_t b)
 {
 	xen_invlpg(a);
 }
 
 static void
 iv_invlrng(uintptr_t a, uintptr_t b)
 {
 	vm_offset_t start = (vm_offset_t)a;
 	vm_offset_t end = (vm_offset_t)b;
 
 	while (start < end) {
 		xen_invlpg(start);
 		start += PAGE_SIZE;
 	}
 }
 
 
 static void
 iv_invlcache(uintptr_t a, uintptr_t b)
 {
 
 	wbinvd();
 	atomic_add_int(&smp_tlb_wait, 1);
 }
 
 static void
 iv_lazypmap(uintptr_t a, uintptr_t b)
 {
 	pmap_lazyfix_action();
 	atomic_add_int(&smp_tlb_wait, 1);
 }
 
 /*
  * These start from "IPI offset" APIC_IPI_INTS
  */
 static call_data_func_t *ipi_vectors[6] = 
 {
   iv_rendezvous,
   iv_invltlb,
   iv_invlpg,
   iv_invlrng,
   iv_invlcache,
   iv_lazypmap,
 };
 
 /*
  * Reschedule call back. Nothing to do,
  * all the work is done automatically when
  * we return from the interrupt.
  */
 static int
 smp_reschedule_interrupt(void *unused)
 {
 	int cpu = PCPU_GET(cpuid);
 	u_int ipi_bitmap;
 
 	ipi_bitmap = atomic_readandclear_int(&cpu_ipi_pending[cpu]);
 
 	if (ipi_bitmap & (1 << IPI_PREEMPT)) {
 #ifdef COUNT_IPIS
 		(*ipi_preempt_counts[cpu])++;
 #endif
 		sched_preempt(curthread);
 	}
 
 	if (ipi_bitmap & (1 << IPI_AST)) {
 #ifdef COUNT_IPIS
 		(*ipi_ast_counts[cpu])++;
 #endif
 		/* Nothing to do for AST */
 	}	
 	return (FILTER_HANDLED);
 }
 
 struct _call_data {
 	uint16_t func_id;
 	uint16_t wait;
 	uintptr_t arg1;
 	uintptr_t arg2;
 	atomic_t started;
 	atomic_t finished;
 };
 
 static struct _call_data *call_data;
 
 static int
 smp_call_function_interrupt(void *unused)
 {	
 	call_data_func_t *func;
 	uintptr_t arg1 = call_data->arg1;
 	uintptr_t arg2 = call_data->arg2;
 	int wait = call_data->wait;
 	atomic_t *started = &call_data->started;
 	atomic_t *finished = &call_data->finished;
 
 	/* We only handle function IPIs, not bitmap IPIs */
 	if (call_data->func_id < APIC_IPI_INTS || call_data->func_id > IPI_BITMAP_VECTOR)
 		panic("invalid function id %u", call_data->func_id);
 	
 	func = ipi_vectors[call_data->func_id - APIC_IPI_INTS];
 	/*
 	 * Notify initiating CPU that I've grabbed the data and am
 	 * about to execute the function
 	 */
 	mb();
 	atomic_inc(started);
 	/*
 	 * At this point the info structure may be out of scope unless wait==1
 	 */
 	(*func)(arg1, arg2);
 
 	if (wait) {
 		mb();
 		atomic_inc(finished);
 	}
 	atomic_add_int(&smp_tlb_wait, 1);
 	return (FILTER_HANDLED);
 }
 
 /*
  * Print various information about the SMP system hardware and setup.
  */
 void
 cpu_mp_announce(void)
 {
 	int i, x;
 
 	/* List CPUs */
 	printf(" cpu0 (BSP): APIC ID: %2d\n", boot_cpu_id);
 	for (i = 1, x = 0; x <= MAX_APIC_ID; x++) {
 		if (!cpu_info[x].cpu_present || cpu_info[x].cpu_bsp)
 			continue;
 		if (cpu_info[x].cpu_disabled)
 			printf("  cpu (AP): APIC ID: %2d (disabled)\n", x);
 		else {
 			KASSERT(i < mp_ncpus,
 			    ("mp_ncpus and actual cpus are out of whack"));
 			printf(" cpu%d (AP): APIC ID: %2d\n", i++, x);
 		}
 	}
 }
 
 static int
-xen_smp_intr_init(unsigned int cpu)
+xen_smp_cpu_init(unsigned int cpu)
 {
 	int rc;
-	unsigned int irq;
-	
-	per_cpu(resched_irq, cpu) = per_cpu(callfunc_irq, cpu) = -1;
+	xen_intr_handle_t irq_handle;
 
-	sprintf(resched_name[cpu], "resched%u", cpu);
-	rc = bind_ipi_to_irqhandler(RESCHEDULE_VECTOR,
-				    cpu,
-				    resched_name[cpu],
-				    smp_reschedule_interrupt,
-	    INTR_TYPE_TTY, &irq);
+	DPCPU_ID_SET(cpu, ipi_port[RESCHEDULE_VECTOR], NULL);
+	DPCPU_ID_SET(cpu, ipi_port[CALL_FUNCTION_VECTOR], NULL);
 
-	printf("[XEN] IPI cpu=%d irq=%d vector=RESCHEDULE_VECTOR (%d)\n",
-	    cpu, irq, RESCHEDULE_VECTOR);
-	
-	per_cpu(resched_irq, cpu) = irq;
-
-	sprintf(callfunc_name[cpu], "callfunc%u", cpu);
-	rc = bind_ipi_to_irqhandler(CALL_FUNCTION_VECTOR,
-				    cpu,
-				    callfunc_name[cpu],
-				    smp_call_function_interrupt,
-	    INTR_TYPE_TTY, &irq);
+	/*
+	 * The PCPU variable pc_device is not initialized on i386 PV,
+	 * so we have to use the root_bus device in order to setup
+	 * the IPIs.
+	 */
+	rc = xen_intr_bind_ipi(root_bus, RESCHEDULE_VECTOR,
+	    cpu, smp_reschedule_interrupt, INTR_TYPE_TTY, &irq_handle);
 	if (rc < 0)
 		goto fail;
-	per_cpu(callfunc_irq, cpu) = irq;
+	xen_intr_describe(irq_handle, "resched%u", cpu);
+	DPCPU_ID_SET(cpu, ipi_port[RESCHEDULE_VECTOR], irq_handle);
 
-	printf("[XEN] IPI cpu=%d irq=%d vector=CALL_FUNCTION_VECTOR (%d)\n",
-	    cpu, irq, CALL_FUNCTION_VECTOR);
+	printf("[XEN] IPI cpu=%d port=%d vector=RESCHEDULE_VECTOR (%d)\n",
+	    cpu, xen_intr_port(irq_handle), RESCHEDULE_VECTOR);
+
+	rc = xen_intr_bind_ipi(root_bus, CALL_FUNCTION_VECTOR,
+	    cpu, smp_call_function_interrupt, INTR_TYPE_TTY, &irq_handle);
+	if (rc < 0)
+		goto fail;
+	xen_intr_describe(irq_handle, "callfunc%u", cpu);
+	DPCPU_ID_SET(cpu, ipi_port[CALL_FUNCTION_VECTOR], irq_handle);
+
+	printf("[XEN] IPI cpu=%d port=%d vector=CALL_FUNCTION_VECTOR (%d)\n",
+	    cpu, xen_intr_port(irq_handle), CALL_FUNCTION_VECTOR);
 
-	
 	if ((cpu != 0) && ((rc = ap_cpu_initclocks(cpu)) != 0))
 		goto fail;
 
 	return 0;
 
  fail:
-	if (per_cpu(resched_irq, cpu) >= 0)
-		unbind_from_irqhandler(per_cpu(resched_irq, cpu));
-	if (per_cpu(callfunc_irq, cpu) >= 0)
-		unbind_from_irqhandler(per_cpu(callfunc_irq, cpu));
+	xen_intr_unbind(DPCPU_ID_GET(cpu, ipi_port[RESCHEDULE_VECTOR]));
+	DPCPU_ID_SET(cpu, ipi_port[RESCHEDULE_VECTOR], NULL);
+	xen_intr_unbind(DPCPU_ID_GET(cpu, ipi_port[CALL_FUNCTION_VECTOR]));
+	DPCPU_ID_SET(cpu, ipi_port[CALL_FUNCTION_VECTOR], NULL);
 	return rc;
 }
 
 static void
 xen_smp_intr_init_cpus(void *unused)
 {
 	int i;
 	    
 	for (i = 0; i < mp_ncpus; i++)
-		xen_smp_intr_init(i);
+		xen_smp_cpu_init(i);
+}
+
+static void
+xen_smp_intr_setup_cpus(void *unused)
+{
+	int i;
+
+	for (i = 0; i < mp_ncpus; i++)
+		DPCPU_ID_SET(i, vcpu_info,
+		    &HYPERVISOR_shared_info->vcpu_info[i]);
 }
 
 #define MTOPSIZE (1<<(14 + PAGE_SHIFT))
 
 /*
  * AP CPU's call this to initialize themselves.
  */
 void
 init_secondary(void)
 {
 	vm_offset_t addr;
 	u_int	cpuid;
 	int	gsel_tss;
 	
 	
 	/* bootAP is set in start_ap() to our ID. */
 	PCPU_SET(currentldt, _default_ldt);
 	gsel_tss = GSEL(GPROC0_SEL, SEL_KPL);
 #if 0
 	gdt[bootAP * NGDT + GPROC0_SEL].sd.sd_type = SDT_SYS386TSS;
 #endif
 	PCPU_SET(common_tss.tss_esp0, 0); /* not used until after switch */
 	PCPU_SET(common_tss.tss_ss0, GSEL(GDATA_SEL, SEL_KPL));
 	PCPU_SET(common_tss.tss_ioopt, (sizeof (struct i386tss)) << 16);
 #if 0
 	PCPU_SET(tss_gdt, &gdt[bootAP * NGDT + GPROC0_SEL].sd);
 
 	PCPU_SET(common_tssd, *PCPU_GET(tss_gdt));
 #endif
 	PCPU_SET(fsgs_gdt, &gdt[GUFS_SEL].sd);
 
 	/*
 	 * Set to a known state:
 	 * Set by mpboot.s: CR0_PG, CR0_PE
 	 * Set by cpu_setregs: CR0_NE, CR0_MP, CR0_TS, CR0_WP, CR0_AM
 	 */
 	/*
 	 * signal our startup to the BSP.
 	 */
 	mp_naps++;
 
 	/* Spin until the BSP releases the AP's. */
 	while (!aps_ready)
 		ia32_pause();
 
 	/* BSP may have changed PTD while we were waiting */
 	invltlb();
 	for (addr = 0; addr < NKPT * NBPDR - 1; addr += PAGE_SIZE)
 		invlpg(addr);
 
 	/* set up FPU state on the AP */
 	npxinit();
 #if 0
 	
 	/* set up SSE registers */
 	enable_sse();
 #endif
 #if 0 && defined(PAE)
 	/* Enable the PTE no-execute bit. */
 	if ((amd_feature & AMDID_NX) != 0) {
 		uint64_t msr;
 
 		msr = rdmsr(MSR_EFER) | EFER_NXE;
 		wrmsr(MSR_EFER, msr);
 	}
 #endif
 #if 0
 	/* A quick check from sanity claus */
 	if (PCPU_GET(apic_id) != lapic_id()) {
 		printf("SMP: cpuid = %d\n", PCPU_GET(cpuid));
 		printf("SMP: actual apic_id = %d\n", lapic_id());
 		printf("SMP: correct apic_id = %d\n", PCPU_GET(apic_id));
 		panic("cpuid mismatch! boom!!");
 	}
 #endif
 	
 	/* Initialize curthread. */
 	KASSERT(PCPU_GET(idlethread) != NULL, ("no idle thread"));
 	PCPU_SET(curthread, PCPU_GET(idlethread));
 
 	mtx_lock_spin(&ap_boot_mtx);
 #if 0
 	
 	/* Init local apic for irq's */
 	lapic_setup(1);
 #endif
 	smp_cpus++;
 
 	cpuid = PCPU_GET(cpuid);
 	CTR1(KTR_SMP, "SMP: AP CPU #%d Launched", cpuid);
 	printf("SMP: AP CPU #%d Launched!\n", cpuid);
 
 	/* Determine if we are a logical CPU. */
 	if (logical_cpus > 1 && PCPU_GET(apic_id) % logical_cpus != 0)
 		CPU_SET(cpuid, &logical_cpus_mask);
 	
 	/* Determine if we are a hyperthread. */
 	if (hyperthreading_cpus > 1 &&
 	    PCPU_GET(apic_id) % hyperthreading_cpus != 0)
 		CPU_SET(cpuid, &hyperthreading_cpus_mask);
 #if 0
 	if (bootverbose)
 		lapic_dump("AP");
 #endif
 	if (smp_cpus == mp_ncpus) {
 		/* enable IPI's, tlb shootdown, freezes etc */
 		atomic_store_rel_int(&smp_started, 1);
 		smp_active = 1;	 /* historic */
 	}
 
 	mtx_unlock_spin(&ap_boot_mtx);
 
 	/* wait until all the AP's are up */
 	while (smp_started == 0)
 		ia32_pause();
 
 	PCPU_SET(curthread, PCPU_GET(idlethread));
 
 	/* Start per-CPU event timers. */
 	cpu_initclocks_ap();
 
 	/* enter the scheduler */
 	sched_throw(NULL);
 
 	panic("scheduler returned us to %s", __func__);
 	/* NOTREACHED */
 }
 
 /*******************************************************************
  * local functions and data
  */
 
 /*
  * We tell the I/O APIC code about all the CPUs we want to receive
  * interrupts.  If we don't want certain CPUs to receive IRQs we
  * can simply not tell the I/O APIC code about them in this function.
  * We also do not tell it about the BSP since it tells itself about
  * the BSP internally to work with UP kernels and on UP machines.
  */
 static void
 set_interrupt_apic_ids(void)
 {
 	u_int i, apic_id;
 
 	for (i = 0; i < MAXCPU; i++) {
 		apic_id = cpu_apic_ids[i];
 		if (apic_id == -1)
 			continue;
 		if (cpu_info[apic_id].cpu_bsp)
 			continue;
 		if (cpu_info[apic_id].cpu_disabled)
 			continue;
 
 		/* Don't let hyperthreads service interrupts. */
 		if (hyperthreading_cpus > 1 &&
 		    apic_id % hyperthreading_cpus != 0)
 			continue;
 
 		intr_add_cpu(i);
 	}
 }
 
 /*
  * Assign logical CPU IDs to local APICs.
  */
 static void
 assign_cpu_ids(void)
 {
 	u_int i;
 
 	/* Check for explicitly disabled CPUs. */
 	for (i = 0; i <= MAX_APIC_ID; i++) {
 		if (!cpu_info[i].cpu_present || cpu_info[i].cpu_bsp)
 			continue;
 
 		/* Don't use this CPU if it has been disabled by a tunable. */
 		if (resource_disabled("lapic", i)) {
 			cpu_info[i].cpu_disabled = 1;
 			continue;
 		}
 	}
 
 	/*
 	 * Assign CPU IDs to local APIC IDs and disable any CPUs
 	 * beyond MAXCPU.  CPU 0 has already been assigned to the BSP,
 	 * so we only have to assign IDs for APs.
 	 */
 	mp_ncpus = 1;
 	for (i = 0; i <= MAX_APIC_ID; i++) {
 		if (!cpu_info[i].cpu_present || cpu_info[i].cpu_bsp ||
 		    cpu_info[i].cpu_disabled)
 			continue;
 
 		if (mp_ncpus < MAXCPU) {
 			cpu_apic_ids[mp_ncpus] = i;
 			apic_cpuids[i] = mp_ncpus;
 			mp_ncpus++;
 		} else
 			cpu_info[i].cpu_disabled = 1;
 	}
 	KASSERT(mp_maxid >= mp_ncpus - 1,
 	    ("%s: counters out of sync: max %d, count %d", __func__, mp_maxid,
 	    mp_ncpus));		
 }
 
 /*
  * start each AP in our list
  */
 /* Lowest 1MB is already mapped: don't touch*/
 #define TMPMAP_START 1
 int
 start_all_aps(void)
 {
 	int x,apic_id, cpu;
 	struct pcpu *pc;
 	
 	mtx_init(&ap_boot_mtx, "ap boot", NULL, MTX_SPIN);
 
 	/* set up temporary P==V mapping for AP boot */
 	/* XXX this is a hack, we should boot the AP on its own stack/PTD */
 
 	/* start each AP */
 	for (cpu = 1; cpu < mp_ncpus; cpu++) {
 		apic_id = cpu_apic_ids[cpu];
 
 
 		bootAP = cpu;
 		bootAPgdt = gdt + (512*cpu);
 
 		/* Get per-cpu data */
 		pc = &__pcpu[bootAP];
 		pcpu_init(pc, bootAP, sizeof(struct pcpu));
 		dpcpu_init((void *)kmem_malloc(kernel_arena, DPCPU_SIZE,
 		    M_WAITOK | M_ZERO), bootAP);
 		pc->pc_apic_id = cpu_apic_ids[bootAP];
 		pc->pc_prvspace = pc;
 		pc->pc_curthread = 0;
 
 		gdt_segs[GPRIV_SEL].ssd_base = (int) pc;
 		gdt_segs[GPROC0_SEL].ssd_base = (int) &pc->pc_common_tss;
 		
 		PT_SET_MA(bootAPgdt, VTOM(bootAPgdt) | PG_V | PG_RW);
 		bzero(bootAPgdt, PAGE_SIZE);
 		for (x = 0; x < NGDT; x++)
 			ssdtosd(&gdt_segs[x], &bootAPgdt[x].sd);
 		PT_SET_MA(bootAPgdt, vtomach(bootAPgdt) | PG_V);
 #ifdef notyet
 		
                 if (HYPERVISOR_vcpu_op(VCPUOP_get_physid, cpu, &cpu_id) == 0) { 
                         apicid = xen_vcpu_physid_to_x86_apicid(cpu_id.phys_id); 
                         acpiid = xen_vcpu_physid_to_x86_acpiid(cpu_id.phys_id); 
 #ifdef CONFIG_ACPI 
                         if (acpiid != 0xff) 
                                 x86_acpiid_to_apicid[acpiid] = apicid; 
 #endif 
                 } 
 #endif
 		
 		/* attempt to start the Application Processor */
 		if (!start_ap(cpu)) {
 			printf("AP #%d (PHY# %d) failed!\n", cpu, apic_id);
 			/* better panic as the AP may be running loose */
 			printf("panic y/n? [y] ");
 			if (cngetc() != 'n')
 				panic("bye-bye");
 		}
 
 		CPU_SET(cpu, &all_cpus);	/* record AP in CPU map */
 	}
 	
 
 	pmap_invalidate_range(kernel_pmap, 0, NKPT * NBPDR - 1);
 	
 	/* number of APs actually started */
 	return mp_naps;
 }
 
 extern uint8_t *pcpu_boot_stack;
 extern trap_info_t trap_table[];
 
 static void
 smp_trap_init(trap_info_t *trap_ctxt)
 {
         const trap_info_t *t = trap_table;
 
         for (t = trap_table; t->address; t++) {
                 trap_ctxt[t->vector].flags = t->flags;
                 trap_ctxt[t->vector].cs = t->cs;
                 trap_ctxt[t->vector].address = t->address;
         }
 }
 
 extern struct rwlock pvh_global_lock;
 extern int nkpt;
 static void
 cpu_initialize_context(unsigned int cpu)
 {
 	/* vcpu_guest_context_t is too large to allocate on the stack.
 	 * Hence we allocate statically and protect it with a lock */
 	vm_page_t m[NPGPTD + 2];
 	static vcpu_guest_context_t ctxt;
 	vm_offset_t boot_stack;
 	vm_offset_t newPTD;
 	vm_paddr_t ma[NPGPTD];
 	int i;
 
 	/*
 	 * Page 0,[0-3]	PTD
 	 * Page 1, [4]	boot stack
 	 * Page [5]	PDPT
 	 *
 	 */
 	for (i = 0; i < NPGPTD + 2; i++) {
 		m[i] = vm_page_alloc(NULL, 0,
 		    VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED |
 		    VM_ALLOC_ZERO);
 
 		pmap_zero_page(m[i]);
 
 	}
 	boot_stack = kva_alloc(PAGE_SIZE);
 	newPTD = kva_alloc(NPGPTD * PAGE_SIZE);
 	ma[0] = VM_PAGE_TO_MACH(m[0])|PG_V;
 
 #ifdef PAE	
 	pmap_kenter(boot_stack, VM_PAGE_TO_PHYS(m[NPGPTD + 1]));
 	for (i = 0; i < NPGPTD; i++) {
 		((vm_paddr_t *)boot_stack)[i] =
 		ma[i] = VM_PAGE_TO_MACH(m[i])|PG_V;
 	}
 #endif	
 
 	/*
 	 * Copy cpu0 IdlePTD to new IdlePTD - copying only
 	 * kernel mappings
 	 */
 	pmap_qenter(newPTD, m, 4);
 	
 	memcpy((uint8_t *)newPTD + KPTDI*sizeof(vm_paddr_t),
 	    (uint8_t *)PTOV(IdlePTD) + KPTDI*sizeof(vm_paddr_t),
 	    nkpt*sizeof(vm_paddr_t));
 
 	pmap_qremove(newPTD, 4);
 	kva_free(newPTD, 4 * PAGE_SIZE);
 	/*
 	 * map actual idle stack to boot_stack
 	 */
 	pmap_kenter(boot_stack, VM_PAGE_TO_PHYS(m[NPGPTD]));
 
 
 	xen_pgdpt_pin(VM_PAGE_TO_MACH(m[NPGPTD + 1]));
 	rw_wlock(&pvh_global_lock);
 	for (i = 0; i < 4; i++) {
 		int pdir = (PTDPTDI + i) / NPDEPG;
 		int curoffset = (PTDPTDI + i) % NPDEPG;
 		
 		xen_queue_pt_update((vm_paddr_t)
 		    ((ma[pdir] & ~PG_V) + (curoffset*sizeof(vm_paddr_t))), 
 		    ma[i]);
 	}
 	PT_UPDATES_FLUSH();
 	rw_wunlock(&pvh_global_lock);
 	
 	memset(&ctxt, 0, sizeof(ctxt));
 	ctxt.flags = VGCF_IN_KERNEL;
 	ctxt.user_regs.ds = GSEL(GDATA_SEL, SEL_KPL);
 	ctxt.user_regs.es = GSEL(GDATA_SEL, SEL_KPL);
 	ctxt.user_regs.fs = GSEL(GPRIV_SEL, SEL_KPL);
 	ctxt.user_regs.gs = GSEL(GDATA_SEL, SEL_KPL);
 	ctxt.user_regs.cs = GSEL(GCODE_SEL, SEL_KPL);
 	ctxt.user_regs.ss = GSEL(GDATA_SEL, SEL_KPL);
 	ctxt.user_regs.eip = (unsigned long)init_secondary;
 	ctxt.user_regs.eflags = PSL_KERNEL | 0x1000; /* IOPL_RING1 */
 
 	memset(&ctxt.fpu_ctxt, 0, sizeof(ctxt.fpu_ctxt));
 
 	smp_trap_init(ctxt.trap_ctxt);
 
 	ctxt.ldt_ents = 0;
 	ctxt.gdt_frames[0] = (uint32_t)((uint64_t)vtomach(bootAPgdt) >> PAGE_SHIFT);
 	ctxt.gdt_ents      = 512;
 
 #ifdef __i386__
 	ctxt.user_regs.esp = boot_stack + PAGE_SIZE;
 
 	ctxt.kernel_ss = GSEL(GDATA_SEL, SEL_KPL);
 	ctxt.kernel_sp = boot_stack + PAGE_SIZE;
 
 	ctxt.event_callback_cs     = GSEL(GCODE_SEL, SEL_KPL);
 	ctxt.event_callback_eip    = (unsigned long)Xhypervisor_callback;
 	ctxt.failsafe_callback_cs  = GSEL(GCODE_SEL, SEL_KPL);
 	ctxt.failsafe_callback_eip = (unsigned long)failsafe_callback;
 
 	ctxt.ctrlreg[3] = VM_PAGE_TO_MACH(m[NPGPTD + 1]);
 #else /* __x86_64__ */
 	ctxt.user_regs.esp = idle->thread.rsp0 - sizeof(struct pt_regs);
 	ctxt.kernel_ss = GSEL(GDATA_SEL, SEL_KPL);
 	ctxt.kernel_sp = idle->thread.rsp0;
 
 	ctxt.event_callback_eip    = (unsigned long)hypervisor_callback;
 	ctxt.failsafe_callback_eip = (unsigned long)failsafe_callback;
 	ctxt.syscall_callback_eip  = (unsigned long)system_call;
 
 	ctxt.ctrlreg[3] = xen_pfn_to_cr3(virt_to_mfn(init_level4_pgt));
 
 	ctxt.gs_base_kernel = (unsigned long)(cpu_pda(cpu));
 #endif
 
 	printf("gdtpfn=%lx pdptpfn=%lx\n",
 	    ctxt.gdt_frames[0],
 	    ctxt.ctrlreg[3] >> PAGE_SHIFT);
 
 	PANIC_IF(HYPERVISOR_vcpu_op(VCPUOP_initialise, cpu, &ctxt));
 	DELAY(3000);
 	PANIC_IF(HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL));
 }
 
 /*
  * This function starts the AP (application processor) identified
  * by the APIC ID 'physicalCpu'.  It does quite a "song and dance"
  * to accomplish this.  This is necessary because of the nuances
  * of the different hardware we might encounter.  It isn't pretty,
  * but it seems to work.
  */
 
 int cpus;
 static int
 start_ap(int apic_id)
 {
 	int ms;
 
 	/* used as a watchpoint to signal AP startup */
 	cpus = mp_naps;
 
 	cpu_initialize_context(apic_id);
 	
 	/* Wait up to 5 seconds for it to start. */
 	for (ms = 0; ms < 5000; ms++) {
 		if (mp_naps > cpus)
 			return 1;	/* return SUCCESS */
 		DELAY(1000);
 	}
 	return 0;		/* return FAILURE */
 }
 
+static void
+ipi_pcpu(int cpu, u_int ipi)
+{
+	KASSERT((ipi <= NR_IPIS), ("invalid IPI"));
+	xen_intr_signal(DPCPU_ID_GET(cpu, ipi_port[ipi]));
+}
+
 /*
  * send an IPI to a specific CPU.
  */
 static void
 ipi_send_cpu(int cpu, u_int ipi)
 {
 	u_int bitmap, old_pending, new_pending;
 
 	if (IPI_IS_BITMAPED(ipi)) { 
 		bitmap = 1 << ipi;
 		ipi = IPI_BITMAP_VECTOR;
 		do {
 			old_pending = cpu_ipi_pending[cpu];
 			new_pending = old_pending | bitmap;
 		} while  (!atomic_cmpset_int(&cpu_ipi_pending[cpu],
 		    old_pending, new_pending));	
 		if (!old_pending)
 			ipi_pcpu(cpu, RESCHEDULE_VECTOR);
 	} else {
 		KASSERT(call_data != NULL, ("call_data not set"));
 		ipi_pcpu(cpu, CALL_FUNCTION_VECTOR);
 	}
 }
 
 /*
  * Flush the TLB on all other CPU's
  */
 static void
 smp_tlb_shootdown(u_int vector, vm_offset_t addr1, vm_offset_t addr2)
 {
 	u_int ncpu;
 	struct _call_data data;
 
 	ncpu = mp_ncpus - 1;	/* does not shootdown self */
 	if (ncpu < 1)
 		return;		/* no other cpus */
 	if (!(read_eflags() & PSL_I))
 		panic("%s: interrupts disabled", __func__);
 	mtx_lock_spin(&smp_ipi_mtx);
 	KASSERT(call_data == NULL, ("call_data isn't null?!"));
 	call_data = &data;
 	call_data->func_id = vector;
 	call_data->arg1 = addr1;
 	call_data->arg2 = addr2;
 	atomic_store_rel_int(&smp_tlb_wait, 0);
 	ipi_all_but_self(vector);
 	while (smp_tlb_wait < ncpu)
 		ia32_pause();
 	call_data = NULL;
 	mtx_unlock_spin(&smp_ipi_mtx);
 }
 
 static void
 smp_targeted_tlb_shootdown(cpuset_t mask, u_int vector, vm_offset_t addr1, vm_offset_t addr2)
 {
 	int cpu, ncpu, othercpus;
 	struct _call_data data;
 
 	othercpus = mp_ncpus - 1;
 	if (CPU_ISFULLSET(&mask)) {
 		if (othercpus < 1)
 			return;
 	} else {
 		CPU_CLR(PCPU_GET(cpuid), &mask);
 		if (CPU_EMPTY(&mask))
 			return;
 	}
 	if (!(read_eflags() & PSL_I))
 		panic("%s: interrupts disabled", __func__);
 	mtx_lock_spin(&smp_ipi_mtx);
 	KASSERT(call_data == NULL, ("call_data isn't null?!"));
 	call_data = &data;		
 	call_data->func_id = vector;
 	call_data->arg1 = addr1;
 	call_data->arg2 = addr2;
 	atomic_store_rel_int(&smp_tlb_wait, 0);
 	if (CPU_ISFULLSET(&mask)) {
 		ncpu = othercpus;
 		ipi_all_but_self(vector);
 	} else {
 		ncpu = 0;
 		while ((cpu = CPU_FFS(&mask)) != 0) {
 			cpu--;
 			CPU_CLR(cpu, &mask);
 			CTR3(KTR_SMP, "%s: cpu: %d ipi: %x", __func__, cpu,
 			    vector);
 			ipi_send_cpu(cpu, vector);
 			ncpu++;
 		}
 	}
 	while (smp_tlb_wait < ncpu)
 		ia32_pause();
 	call_data = NULL;
 	mtx_unlock_spin(&smp_ipi_mtx);
 }
 
 void
 smp_cache_flush(void)
 {
 
 	if (smp_started)
 		smp_tlb_shootdown(IPI_INVLCACHE, 0, 0);
 }
 
 void
 smp_invltlb(void)
 {
 
 	if (smp_started) {
 		smp_tlb_shootdown(IPI_INVLTLB, 0, 0);
 	}
 }
 
 void
 smp_invlpg(vm_offset_t addr)
 {
 
 	if (smp_started) {
 		smp_tlb_shootdown(IPI_INVLPG, addr, 0);
 	}
 }
 
 void
 smp_invlpg_range(vm_offset_t addr1, vm_offset_t addr2)
 {
 
 	if (smp_started) {
 		smp_tlb_shootdown(IPI_INVLRNG, addr1, addr2);
 	}
 }
 
 void
 smp_masked_invltlb(cpuset_t mask)
 {
 
 	if (smp_started) {
 		smp_targeted_tlb_shootdown(mask, IPI_INVLTLB, 0, 0);
 	}
 }
 
 void
 smp_masked_invlpg(cpuset_t mask, vm_offset_t addr)
 {
 
 	if (smp_started) {
 		smp_targeted_tlb_shootdown(mask, IPI_INVLPG, addr, 0);
 	}
 }
 
 void
 smp_masked_invlpg_range(cpuset_t mask, vm_offset_t addr1, vm_offset_t addr2)
 {
 
 	if (smp_started) {
 		smp_targeted_tlb_shootdown(mask, IPI_INVLRNG, addr1, addr2);
 	}
 }
 
 /*
  * send an IPI to a set of cpus.
  */
 void
 ipi_selected(cpuset_t cpus, u_int ipi)
 {
 	int cpu;
 
 	/*
 	 * IPI_STOP_HARD maps to a NMI and the trap handler needs a bit
 	 * of help in order to understand what is the source.
 	 * Set the mask of receiving CPUs for this purpose.
 	 */
 	if (ipi == IPI_STOP_HARD)
 		CPU_OR_ATOMIC(&ipi_nmi_pending, &cpus);
 
 	while ((cpu = CPU_FFS(&cpus)) != 0) {
 		cpu--;
 		CPU_CLR(cpu, &cpus);
 		CTR3(KTR_SMP, "%s: cpu: %d ipi: %x", __func__, cpu, ipi);
 		ipi_send_cpu(cpu, ipi);
 	}
 }
 
 /*
  * send an IPI to a specific CPU.
  */
 void
 ipi_cpu(int cpu, u_int ipi)
 {
 
 	/*
 	 * IPI_STOP_HARD maps to a NMI and the trap handler needs a bit
 	 * of help in order to understand what is the source.
 	 * Set the mask of receiving CPUs for this purpose.
 	 */
 	if (ipi == IPI_STOP_HARD)
 		CPU_SET_ATOMIC(cpu, &ipi_nmi_pending);
 
 	CTR3(KTR_SMP, "%s: cpu: %d ipi: %x", __func__, cpu, ipi);
 	ipi_send_cpu(cpu, ipi);
 }
 
 /*
  * send an IPI to all CPUs EXCEPT myself
  */
 void
 ipi_all_but_self(u_int ipi)
 {
 	cpuset_t other_cpus;
 
 	/*
 	 * IPI_STOP_HARD maps to a NMI and the trap handler needs a bit
 	 * of help in order to understand what is the source.
 	 * Set the mask of receiving CPUs for this purpose.
 	 */
 	other_cpus = all_cpus;
 	CPU_CLR(PCPU_GET(cpuid), &other_cpus);
 	if (ipi == IPI_STOP_HARD)
 		CPU_OR_ATOMIC(&ipi_nmi_pending, &other_cpus);
 
 	CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi);
 	ipi_selected(other_cpus, ipi);
 }
 
 int
 ipi_nmi_handler()
 {
 	u_int cpuid;
 
 	/*
 	 * As long as there is not a simple way to know about a NMI's
 	 * source, if the bitmask for the current CPU is present in
 	 * the global pending bitword an IPI_STOP_HARD has been issued
 	 * and should be handled.
 	 */
 	cpuid = PCPU_GET(cpuid);
 	if (!CPU_ISSET(cpuid, &ipi_nmi_pending))
 		return (1);
 
 	CPU_CLR_ATOMIC(cpuid, &ipi_nmi_pending);
 	cpustop_handler();
 	return (0);
 }
 
 /*
  * Handle an IPI_STOP by saving our current context and spinning until we
  * are resumed.
  */
 void
 cpustop_handler(void)
 {
 	int cpu;
 
 	cpu = PCPU_GET(cpuid);
 
 	savectx(&stoppcbs[cpu]);
 
 	/* Indicate that we are stopped */
 	CPU_SET_ATOMIC(cpu, &stopped_cpus);
 
 	/* Wait for restart */
 	while (!CPU_ISSET(cpu, &started_cpus))
 	    ia32_pause();
 
 	CPU_CLR_ATOMIC(cpu, &started_cpus);
 	CPU_CLR_ATOMIC(cpu, &stopped_cpus);
 
 	if (cpu == 0 && cpustop_restartfunc != NULL) {
 		cpustop_restartfunc();
 		cpustop_restartfunc = NULL;
 	}
 }
 
 /*
  * This is called once the rest of the system is up and running and we're
  * ready to let the AP's out of the pen.
  */
 static void
 release_aps(void *dummy __unused)
 {
 
 	if (mp_ncpus == 1) 
 		return;
 	atomic_store_rel_int(&aps_ready, 1);
 	while (smp_started == 0)
 		ia32_pause();
 }
 SYSINIT(start_aps, SI_SUB_SMP, SI_ORDER_FIRST, release_aps, NULL);
-SYSINIT(start_ipis, SI_SUB_INTR, SI_ORDER_ANY, xen_smp_intr_init_cpus, NULL);
+SYSINIT(start_ipis, SI_SUB_SMP, SI_ORDER_ANY, xen_smp_intr_init_cpus, NULL);
+SYSINIT(start_cpu, SI_SUB_INTR, SI_ORDER_ANY, xen_smp_intr_setup_cpus, NULL);
 
diff --git a/sys/i386/xen/mptable.c b/sys/i386/xen/mptable.c
index 0c1efe849b4e..74cb9ab1a512 100644
--- a/sys/i386/xen/mptable.c
+++ b/sys/i386/xen/mptable.c
@@ -1,107 +1,107 @@
 /*-
  * Copyright (c) 2003 John Baldwin <jhb@FreeBSD.org>
  * Copyright (c) 1996, by Steve Passe
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. The name of the developer may NOT be used to endorse or promote products
  *    derived from this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/bus.h>
 #include <sys/kernel.h>
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/pmap.h>
 
 #include <machine/frame.h>
 #include <machine/intr_machdep.h>
 #include <machine/apicvar.h>
 
 #include <xen/hypervisor.h>
-#include <machine/xen/xen-os.h>
+#include <xen/xen-os.h>
 #include <machine/smp.h>
 #include <xen/interface/vcpu.h>
 
 
 static int	mptable_probe(void);
 static int	mptable_probe_cpus(void);
 static void	mptable_register(void *dummy);
 static int	mptable_setup_local(void);
 static int	mptable_setup_io(void);
 
 static struct apic_enumerator mptable_enumerator = {
 	"MPTable",
 	mptable_probe,
 	mptable_probe_cpus,
 	mptable_setup_local,
 	mptable_setup_io
 };
 
 static int
 mptable_probe(void)
 {
 
 	return (-100);
 }
 
 static int
 mptable_probe_cpus(void)
 {
 	int i, rc;
 
 	for (i = 0; i < MAXCPU; i++) {
 		rc = HYPERVISOR_vcpu_op(VCPUOP_is_up, i, NULL);
 		if (rc >= 0)
 			cpu_add(i, (i == 0));
 	}
 
 	return (0);
 }
 
 /*
  * Initialize the local APIC on the BSP.
  */
 static int
 mptable_setup_local(void)
 {
 
 	return (0);
 }
 
 static int
 mptable_setup_io(void)
 {
 
 	return (0);
 }
 
 static void
 mptable_register(void *dummy __unused)
 {
 
 	apic_register_enumerator(&mptable_enumerator);
 }
 SYSINIT(mptable_register, SI_SUB_TUNABLES - 1, SI_ORDER_FIRST, mptable_register,
     NULL);
diff --git a/sys/i386/xen/xen_clock_util.c b/sys/i386/xen/xen_clock_util.c
index c14a627a122a..c12451551648 100644
--- a/sys/i386/xen/xen_clock_util.c
+++ b/sys/i386/xen/xen_clock_util.c
@@ -1,101 +1,102 @@
 /*-
  * Copyright (c) 2009 Adrian Chadd
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/bus.h>
 #include <sys/clock.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/kernel.h>
 #include <sys/module.h>
 #include <sys/time.h>
 
+#include <xen/xen-os.h>
 #include <xen/xen_intr.h>
+
 #include <vm/vm.h>
 #include <vm/pmap.h>
 #include <machine/pmap.h>
 #include <xen/hypervisor.h>
-#include <machine/xen/xen-os.h>
 #include <machine/xen/xenfunc.h>
 #include <xen/interface/io/xenbus.h>
 #include <xen/interface/vcpu.h>
 #include <machine/cpu.h>
 
 #include <machine/xen/xen_clock_util.h>
 
 /*
  * Read the current hypervisor start time (wall clock) from Xen.
  */
 void
 xen_fetch_wallclock(struct timespec *ts)
 { 
         shared_info_t *s = HYPERVISOR_shared_info;
         uint32_t ts_version;
    
         do {
                 ts_version = s->wc_version;
                 rmb();
                 ts->tv_sec  = s->wc_sec;
                 ts->tv_nsec = s->wc_nsec;
                 rmb();
         }
         while ((s->wc_version & 1) | (ts_version ^ s->wc_version));
 }
 
 /*
  * Read the current hypervisor system uptime value from Xen.
  */
 void
 xen_fetch_uptime(struct timespec *ts)
 {
         shared_info_t           *s = HYPERVISOR_shared_info;
         struct vcpu_time_info   *src;
 	struct shadow_time_info	dst;
         uint32_t pre_version, post_version;
         
         src = &s->vcpu_info[smp_processor_id()].time;
 
         spinlock_enter();
         do {
                 pre_version = dst.version = src->version;
                 rmb();
                 dst.system_timestamp  = src->system_time;
                 rmb();
                 post_version = src->version;
         }
         while ((pre_version & 1) | (pre_version ^ post_version));
 
         spinlock_exit();
 
 	ts->tv_sec = dst.system_timestamp / 1000000000;
 	ts->tv_nsec = dst.system_timestamp % 1000000000;
 }
diff --git a/sys/i386/xen/xen_machdep.c b/sys/i386/xen/xen_machdep.c
index 9b5edd384cf9..7049be6d561a 100644
--- a/sys/i386/xen/xen_machdep.c
+++ b/sys/i386/xen/xen_machdep.c
@@ -1,1260 +1,1270 @@
 /*
  *
  * Copyright (c) 2004 Christian Limpach.
  * Copyright (c) 2004-2006,2008 Kip Macy
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *      This product includes software developed by Christian Limpach.
  * 4. The name of the author may not be used to endorse or promote products
  *    derived from this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/bus.h>
 #include <sys/ktr.h>
 #include <sys/lock.h>
 #include <sys/mount.h>
 #include <sys/malloc.h>
 #include <sys/mutex.h>
 #include <sys/kernel.h>
 #include <sys/proc.h>
 #include <sys/reboot.h>
 #include <sys/rwlock.h>
 #include <sys/sysproto.h>
 
-#include <machine/xen/xen-os.h>
+#include <xen/xen-os.h>
 
 #include <vm/vm.h>
 #include <vm/pmap.h>
 #include <machine/segments.h>
 #include <machine/pcb.h>
 #include <machine/stdarg.h>
 #include <machine/vmparam.h>
 #include <machine/cpu.h>
 #include <machine/intr_machdep.h>
 #include <machine/md_var.h>
 #include <machine/asmacros.h>
 
 
 
 #include <xen/hypervisor.h>
 #include <machine/xen/xenvar.h>
 #include <machine/xen/xenfunc.h>
 #include <machine/xen/xenpmap.h>
 #include <machine/xen/xenfunc.h>
 #include <xen/interface/memory.h>
 #include <machine/xen/features.h>
 #ifdef SMP
 #include <machine/privatespace.h>
 #endif
 
 
 #include <vm/vm_page.h>
 
 
 #define	IDTVEC(name)	__CONCAT(X,name)
 
 extern inthand_t
 IDTVEC(div), IDTVEC(dbg), IDTVEC(nmi), IDTVEC(bpt), IDTVEC(ofl),
 	IDTVEC(bnd), IDTVEC(ill), IDTVEC(dna), IDTVEC(fpusegm),
 	IDTVEC(tss), IDTVEC(missing), IDTVEC(stk), IDTVEC(prot),
 	IDTVEC(page), IDTVEC(mchk), IDTVEC(rsvd), IDTVEC(fpu), IDTVEC(align),
 	IDTVEC(xmm), IDTVEC(lcall_syscall), IDTVEC(int0x80_syscall);
 
 
 int xendebug_flags; 
 start_info_t *xen_start_info;
 shared_info_t *HYPERVISOR_shared_info;
 xen_pfn_t *xen_machine_phys = machine_to_phys_mapping;
 xen_pfn_t *xen_phys_machine;
 xen_pfn_t *xen_pfn_to_mfn_frame_list[16];
 xen_pfn_t *xen_pfn_to_mfn_frame_list_list;
 int preemptable, init_first;
 extern unsigned int avail_space;
+int xen_vector_callback_enabled = 0;
+enum xen_domain_type xen_domain_type = XEN_PV_DOMAIN;
 
 void ni_cli(void);
 void ni_sti(void);
 
 
 void
 ni_cli(void)
 {
 	CTR0(KTR_SPARE2, "ni_cli disabling interrupts");
 	__asm__("pushl %edx;"
 		"pushl %eax;"
 		);
 	__cli();
 	__asm__("popl %eax;"
 		"popl %edx;"
 		);
 }
 
 
 void
 ni_sti(void)
 {
 	__asm__("pushl %edx;"
 		"pushl %esi;"
 		"pushl %eax;"
 		);
 	__sti();
 	__asm__("popl %eax;"
 		"popl %esi;"
 		"popl %edx;"
 		);
 }
 
+void
+force_evtchn_callback(void)
+{
+    (void)HYPERVISOR_xen_version(0, NULL);
+}
+
 /*
  * Modify the cmd_line by converting ',' to NULLs so that it is in a  format 
  * suitable for the static env vars.
  */
 char *
 xen_setbootenv(char *cmd_line)
 {
 	char *cmd_line_next;
     
         /* Skip leading spaces */
         for (; *cmd_line == ' '; cmd_line++);
 
-	printk("xen_setbootenv(): cmd_line='%s'\n", cmd_line);
+	xc_printf("xen_setbootenv(): cmd_line='%s'\n", cmd_line);
 
 	for (cmd_line_next = cmd_line; strsep(&cmd_line_next, ",") != NULL;);
 	return cmd_line;
 }
 
 static struct 
 {
 	const char	*ev;
 	int		mask;
 } howto_names[] = {
 	{"boot_askname",	RB_ASKNAME},
 	{"boot_single",	RB_SINGLE},
 	{"boot_nosync",	RB_NOSYNC},
 	{"boot_halt",	RB_ASKNAME},
 	{"boot_serial",	RB_SERIAL},
 	{"boot_cdrom",	RB_CDROM},
 	{"boot_gdb",	RB_GDB},
 	{"boot_gdb_pause",	RB_RESERVED1},
 	{"boot_verbose",	RB_VERBOSE},
 	{"boot_multicons",	RB_MULTIPLE},
 	{NULL,	0}
 };
 
 int 
 xen_boothowto(char *envp)
 {
 	int i, howto = 0;
 
 	/* get equivalents from the environment */
 	for (i = 0; howto_names[i].ev != NULL; i++)
 		if (getenv(howto_names[i].ev) != NULL)
 			howto |= howto_names[i].mask;
 	return howto;
 }
 
-#define PRINTK_BUFSIZE 1024
+#define XC_PRINTF_BUFSIZE 1024
 void
-printk(const char *fmt, ...)
+xc_printf(const char *fmt, ...)
 {
         __va_list ap;
         int retval;
-        static char buf[PRINTK_BUFSIZE];
+        static char buf[XC_PRINTF_BUFSIZE];
 
         va_start(ap, fmt);
-        retval = vsnprintf(buf, PRINTK_BUFSIZE - 1, fmt, ap);
+        retval = vsnprintf(buf, XC_PRINTF_BUFSIZE - 1, fmt, ap);
         va_end(ap);
         buf[retval] = 0;
         (void)HYPERVISOR_console_write(buf, retval);
 }
 
 
 #define XPQUEUE_SIZE 128
 
 struct mmu_log {
 	char *file;
 	int line;
 };
 
 #ifdef SMP
 /* per-cpu queues and indices */
 #ifdef INVARIANTS
 static struct mmu_log xpq_queue_log[XEN_LEGACY_MAX_VCPUS][XPQUEUE_SIZE];
 #endif
 
 static int xpq_idx[XEN_LEGACY_MAX_VCPUS];
 static mmu_update_t xpq_queue[XEN_LEGACY_MAX_VCPUS][XPQUEUE_SIZE];
 
 #define	XPQ_QUEUE_LOG xpq_queue_log[vcpu]
 #define	XPQ_QUEUE xpq_queue[vcpu]
 #define	XPQ_IDX xpq_idx[vcpu]
 #define	SET_VCPU() int vcpu = smp_processor_id()
 #else
 	
 static mmu_update_t xpq_queue[XPQUEUE_SIZE];
 #ifdef INVARIANTS
 static struct mmu_log xpq_queue_log[XPQUEUE_SIZE];
 #endif
 static int xpq_idx = 0;
 
 #define	XPQ_QUEUE_LOG xpq_queue_log
 #define	XPQ_QUEUE xpq_queue
 #define	XPQ_IDX xpq_idx
 #define	SET_VCPU()
 #endif /* !SMP */
 
 #define XPQ_IDX_INC atomic_add_int(&XPQ_IDX, 1);
 
 #if 0
 static void
 xen_dump_queue(void)
 {
 	int _xpq_idx = XPQ_IDX;
 	int i;
 
 	if (_xpq_idx <= 1)
 		return;
 
-	printk("xen_dump_queue(): %u entries\n", _xpq_idx);
+	xc_printf("xen_dump_queue(): %u entries\n", _xpq_idx);
 	for (i = 0; i < _xpq_idx; i++) {
-		printk(" val: %llx ptr: %llx\n", XPQ_QUEUE[i].val, XPQ_QUEUE[i].ptr);
+		xc_printf(" val: %llx ptr: %llx\n", XPQ_QUEUE[i].val,
+		    XPQ_QUEUE[i].ptr);
 	}
 }
 #endif
 
 
 static __inline void
 _xen_flush_queue(void)
 {
 	SET_VCPU();
 	int _xpq_idx = XPQ_IDX;
 	int error, i;
 
 #ifdef INVARIANTS
 	if (__predict_true(gdtset))
 		CRITICAL_ASSERT(curthread);
 #endif
 
 	XPQ_IDX = 0;
 	/* Make sure index is cleared first to avoid double updates. */
 	error = HYPERVISOR_mmu_update((mmu_update_t *)&XPQ_QUEUE,
 				      _xpq_idx, NULL, DOMID_SELF);
     
 #if 0
 	if (__predict_true(gdtset))
 	for (i = _xpq_idx; i > 0;) {
 		if (i >= 3) {
 			CTR6(KTR_PMAP, "mmu:val: %lx ptr: %lx val: %lx "
 			    "ptr: %lx val: %lx ptr: %lx",
 			    (XPQ_QUEUE[i-1].val & 0xffffffff),
 			    (XPQ_QUEUE[i-1].ptr & 0xffffffff),
 			    (XPQ_QUEUE[i-2].val & 0xffffffff),
 			    (XPQ_QUEUE[i-2].ptr & 0xffffffff),
 			    (XPQ_QUEUE[i-3].val & 0xffffffff),
 			    (XPQ_QUEUE[i-3].ptr & 0xffffffff));
 			    i -= 3;
 		} else if (i == 2) {
 			CTR4(KTR_PMAP, "mmu: val: %lx ptr: %lx val: %lx ptr: %lx",
 			    (XPQ_QUEUE[i-1].val & 0xffffffff),
 			    (XPQ_QUEUE[i-1].ptr & 0xffffffff),
 			    (XPQ_QUEUE[i-2].val & 0xffffffff),
 			    (XPQ_QUEUE[i-2].ptr & 0xffffffff));
 			i = 0;
 		} else {
 			CTR2(KTR_PMAP, "mmu: val: %lx ptr: %lx", 
 			    (XPQ_QUEUE[i-1].val & 0xffffffff),
 			    (XPQ_QUEUE[i-1].ptr & 0xffffffff));
 			i = 0;
 		}
 	}
 #endif	
 	if (__predict_false(error < 0)) {
 		for (i = 0; i < _xpq_idx; i++)
 			printf("val: %llx ptr: %llx\n",
 			    XPQ_QUEUE[i].val, XPQ_QUEUE[i].ptr);
 		panic("Failed to execute MMU updates: %d", error);
 	}
 
 }
 
 void
 xen_flush_queue(void)
 {
 	SET_VCPU();
 
 	if (__predict_true(gdtset))
 		critical_enter();
 	if (XPQ_IDX != 0) _xen_flush_queue();
 	if (__predict_true(gdtset))
 		critical_exit();
 }
 
 static __inline void
 xen_increment_idx(void)
 {
 	SET_VCPU();
 
 	XPQ_IDX++;
 	if (__predict_false(XPQ_IDX == XPQUEUE_SIZE))
 		xen_flush_queue();
 }
 
 void
 xen_check_queue(void)
 {
 #ifdef INVARIANTS
 	SET_VCPU();
 	
 	KASSERT(XPQ_IDX == 0, ("pending operations XPQ_IDX=%d", XPQ_IDX));
 #endif
 }
 
 void
 xen_invlpg(vm_offset_t va)
 {
 	struct mmuext_op op;
 	op.cmd = MMUEXT_INVLPG_ALL;
 	op.arg1.linear_addr = va & ~PAGE_MASK;
 	PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
 }
 
 void
 xen_load_cr3(u_int val)
 {
 	struct mmuext_op op;
 #ifdef INVARIANTS
 	SET_VCPU();
 	
 	KASSERT(XPQ_IDX == 0, ("pending operations XPQ_IDX=%d", XPQ_IDX));
 #endif
 	op.cmd = MMUEXT_NEW_BASEPTR;
 	op.arg1.mfn = xpmap_ptom(val) >> PAGE_SHIFT;
 	PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
 }
 
 #ifdef KTR
 static __inline u_int
 rebp(void)
 {
 	u_int	data;
 
 	__asm __volatile("movl 4(%%ebp),%0" : "=r" (data));	
 	return (data);
 }
 #endif
 
 u_int
 read_eflags(void)
 {
         vcpu_info_t *_vcpu;
 	u_int eflags;
 
 	eflags = _read_eflags();
         _vcpu = &HYPERVISOR_shared_info->vcpu_info[smp_processor_id()]; 
 	if (_vcpu->evtchn_upcall_mask)
 		eflags &= ~PSL_I;
 
 	return (eflags);
 }
 
 void
 write_eflags(u_int eflags)
 {
 	u_int intr;
 
 	CTR2(KTR_SPARE2, "%x xen_restore_flags eflags %x", rebp(), eflags);
 	intr = ((eflags & PSL_I) == 0);
 	__restore_flags(intr);
 	_write_eflags(eflags);
 }
 
 void
 xen_cli(void)
 {
 	CTR1(KTR_SPARE2, "%x xen_cli disabling interrupts", rebp());
 	__cli();
 }
 
 void
 xen_sti(void)
 {
 	CTR1(KTR_SPARE2, "%x xen_sti enabling interrupts", rebp());
 	__sti();
 }
 
 u_int
 xen_rcr2(void)
 {
 
 	return (HYPERVISOR_shared_info->vcpu_info[curcpu].arch.cr2);
 }
 
 void
 _xen_machphys_update(vm_paddr_t mfn, vm_paddr_t pfn, char *file, int line)
 {
 	SET_VCPU();
 	
 	if (__predict_true(gdtset))
 		critical_enter();
 	XPQ_QUEUE[XPQ_IDX].ptr = (mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE;
 	XPQ_QUEUE[XPQ_IDX].val = pfn;
 #ifdef INVARIANTS
 	XPQ_QUEUE_LOG[XPQ_IDX].file = file;
 	XPQ_QUEUE_LOG[XPQ_IDX].line = line;	
 #endif		
 	xen_increment_idx();
 	if (__predict_true(gdtset))
 		critical_exit();
 }
 
 extern struct rwlock pvh_global_lock;
 
 void
 _xen_queue_pt_update(vm_paddr_t ptr, vm_paddr_t val, char *file, int line)
 {
 	SET_VCPU();
 
 	if (__predict_true(gdtset))	
 		rw_assert(&pvh_global_lock, RA_WLOCKED);
 
 	KASSERT((ptr & 7) == 0, ("misaligned update"));
 	
 	if (__predict_true(gdtset))
 		critical_enter();
 	
 	XPQ_QUEUE[XPQ_IDX].ptr = ((uint64_t)ptr) | MMU_NORMAL_PT_UPDATE;
 	XPQ_QUEUE[XPQ_IDX].val = (uint64_t)val;
 #ifdef INVARIANTS
 	XPQ_QUEUE_LOG[XPQ_IDX].file = file;
 	XPQ_QUEUE_LOG[XPQ_IDX].line = line;	
 #endif	
 	xen_increment_idx();
 	if (__predict_true(gdtset))
 		critical_exit();
 }
 
 void 
 xen_pgdpt_pin(vm_paddr_t ma)
 {
 	struct mmuext_op op;
 	op.cmd = MMUEXT_PIN_L3_TABLE;
 	op.arg1.mfn = ma >> PAGE_SHIFT;
 	xen_flush_queue();
 	PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
 }
 
 void 
 xen_pgd_pin(vm_paddr_t ma)
 {
 	struct mmuext_op op;
 	op.cmd = MMUEXT_PIN_L2_TABLE;
 	op.arg1.mfn = ma >> PAGE_SHIFT;
 	xen_flush_queue();
 	PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
 }
 
 void 
 xen_pgd_unpin(vm_paddr_t ma)
 {
 	struct mmuext_op op;
 	op.cmd = MMUEXT_UNPIN_TABLE;
 	op.arg1.mfn = ma >> PAGE_SHIFT;
 	xen_flush_queue();
 	PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
 }
 
 void 
 xen_pt_pin(vm_paddr_t ma)
 {
 	struct mmuext_op op;
 	op.cmd = MMUEXT_PIN_L1_TABLE;
 	op.arg1.mfn = ma >> PAGE_SHIFT;
 	xen_flush_queue();
 	PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
 }
 
 void 
 xen_pt_unpin(vm_paddr_t ma)
 {
 	struct mmuext_op op;
 	op.cmd = MMUEXT_UNPIN_TABLE;
 	op.arg1.mfn = ma >> PAGE_SHIFT;
 	xen_flush_queue();
 	PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
 }
 
 void 
 xen_set_ldt(vm_paddr_t ptr, unsigned long len)
 {
 	struct mmuext_op op;
 	op.cmd = MMUEXT_SET_LDT;
 	op.arg1.linear_addr = ptr;
 	op.arg2.nr_ents = len;
 	xen_flush_queue();
 	PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
 }
 
 void xen_tlb_flush(void)
 {
 	struct mmuext_op op;
 	op.cmd = MMUEXT_TLB_FLUSH_LOCAL;
 	xen_flush_queue();
 	PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
 }
 
 void
 xen_update_descriptor(union descriptor *table, union descriptor *entry)
 {
 	vm_paddr_t pa;
 	pt_entry_t *ptp;
 
 	ptp = vtopte((vm_offset_t)table);
 	pa = (*ptp & PG_FRAME) | ((vm_offset_t)table & PAGE_MASK);
 	if (HYPERVISOR_update_descriptor(pa, *(uint64_t *)entry))
 		panic("HYPERVISOR_update_descriptor failed\n");
 }
 
 
 #if 0
 /*
  * Bitmap is indexed by page number. If bit is set, the page is part of a
  * xen_create_contiguous_region() area of memory.
  */
 unsigned long *contiguous_bitmap;
 
 static void 
 contiguous_bitmap_set(unsigned long first_page, unsigned long nr_pages)
 {
 	unsigned long start_off, end_off, curr_idx, end_idx;
 
 	curr_idx  = first_page / BITS_PER_LONG;
 	start_off = first_page & (BITS_PER_LONG-1);
 	end_idx   = (first_page + nr_pages) / BITS_PER_LONG;
 	end_off   = (first_page + nr_pages) & (BITS_PER_LONG-1);
 
 	if (curr_idx == end_idx) {
 		contiguous_bitmap[curr_idx] |=
 			((1UL<<end_off)-1) & -(1UL<<start_off);
 	} else {
 		contiguous_bitmap[curr_idx] |= -(1UL<<start_off);
 		while ( ++curr_idx < end_idx )
 			contiguous_bitmap[curr_idx] = ~0UL;
 		contiguous_bitmap[curr_idx] |= (1UL<<end_off)-1;
 	}
 }
 
 static void 
 contiguous_bitmap_clear(unsigned long first_page, unsigned long nr_pages)
 {
 	unsigned long start_off, end_off, curr_idx, end_idx;
 
 	curr_idx  = first_page / BITS_PER_LONG;
 	start_off = first_page & (BITS_PER_LONG-1);
 	end_idx   = (first_page + nr_pages) / BITS_PER_LONG;
 	end_off   = (first_page + nr_pages) & (BITS_PER_LONG-1);
 
 	if (curr_idx == end_idx) {
 		contiguous_bitmap[curr_idx] &=
 			-(1UL<<end_off) | ((1UL<<start_off)-1);
 	} else {
 		contiguous_bitmap[curr_idx] &= (1UL<<start_off)-1;
 		while ( ++curr_idx != end_idx )
 			contiguous_bitmap[curr_idx] = 0;
 		contiguous_bitmap[curr_idx] &= -(1UL<<end_off);
 	}
 }
 #endif
 
 /* Ensure multi-page extents are contiguous in machine memory. */
 int 
 xen_create_contiguous_region(vm_page_t pages, int npages)
 {
 	unsigned long  mfn, i, flags;
 	int order;
 	struct xen_memory_reservation reservation = {
 		.nr_extents   = 1,
 		.extent_order = 0,
 		.domid        = DOMID_SELF
 	};
 	set_xen_guest_handle(reservation.extent_start, &mfn);
 	
 	balloon_lock(flags);
 
 	/* can currently only handle power of two allocation */
 	PANIC_IF(ffs(npages) != fls(npages));
 
 	/* 0. determine order */
 	order = (ffs(npages) == fls(npages)) ? fls(npages) - 1 : fls(npages);
 	
 	/* 1. give away machine pages. */
 	for (i = 0; i < (1 << order); i++) {
 		int pfn;
 		pfn = VM_PAGE_TO_PHYS(&pages[i]) >> PAGE_SHIFT;
 		mfn = PFNTOMFN(pfn);
 		PFNTOMFN(pfn) = INVALID_P2M_ENTRY;
 		PANIC_IF(HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation) != 1);
 	}
 
 
 	/* 2. Get a new contiguous memory extent. */
 	reservation.extent_order = order;
 	/* xenlinux hardcodes this because of aacraid - maybe set to 0 if we're not 
 	 * running with a broxen driver XXXEN
 	 */
 	reservation.address_bits = 31; 
 	if (HYPERVISOR_memory_op(XENMEM_increase_reservation, &reservation) != 1)
 		goto fail;
 
 	/* 3. Map the new extent in place of old pages. */
 	for (i = 0; i < (1 << order); i++) {
 		int pfn;
 		pfn = VM_PAGE_TO_PHYS(&pages[i]) >> PAGE_SHIFT;
 		xen_machphys_update(mfn+i, pfn);
 		PFNTOMFN(pfn) = mfn+i;
 	}
 
 	xen_tlb_flush();
 
 #if 0
 	contiguous_bitmap_set(VM_PAGE_TO_PHYS(&pages[0]) >> PAGE_SHIFT, 1UL << order);
 #endif
 
 	balloon_unlock(flags);
 
 	return 0;
 
  fail:
 	reservation.extent_order = 0;
 	reservation.address_bits = 0;
 
 	for (i = 0; i < (1 << order); i++) {
 		int pfn;
 		pfn = VM_PAGE_TO_PHYS(&pages[i]) >> PAGE_SHIFT;
 		PANIC_IF(HYPERVISOR_memory_op(
 			XENMEM_increase_reservation, &reservation) != 1);
 		xen_machphys_update(mfn, pfn);
 		PFNTOMFN(pfn) = mfn;
 	}
 
 	xen_tlb_flush();
 
 	balloon_unlock(flags);
 
 	return ENOMEM;
 }
 
 void 
 xen_destroy_contiguous_region(void *addr, int npages)
 {
 	unsigned long  mfn, i, flags, order, pfn0;
 	struct xen_memory_reservation reservation = {
 		.nr_extents   = 1,
 		.extent_order = 0,
 		.domid        = DOMID_SELF
 	};
 	set_xen_guest_handle(reservation.extent_start, &mfn);
 	
 	pfn0 = vtophys(addr) >> PAGE_SHIFT;
 #if 0
 	scrub_pages(vstart, 1 << order);
 #endif
 	/* can currently only handle power of two allocation */
 	PANIC_IF(ffs(npages) != fls(npages));
 
 	/* 0. determine order */
 	order = (ffs(npages) == fls(npages)) ? fls(npages) - 1 : fls(npages);
 
 	balloon_lock(flags);
 
 #if 0
 	contiguous_bitmap_clear(vtophys(addr) >> PAGE_SHIFT, 1UL << order);
 #endif
 
 	/* 1. Zap current PTEs, giving away the underlying pages. */
 	for (i = 0; i < (1 << order); i++) {
 		int pfn;
 		uint64_t new_val = 0;
 		pfn = vtomach((char *)addr + i*PAGE_SIZE) >> PAGE_SHIFT;
 
 		PANIC_IF(HYPERVISOR_update_va_mapping((vm_offset_t)((char *)addr + (i * PAGE_SIZE)), new_val, 0));
 		PFNTOMFN(pfn) = INVALID_P2M_ENTRY;
 		PANIC_IF(HYPERVISOR_memory_op(
 			XENMEM_decrease_reservation, &reservation) != 1);
 	}
 
 	/* 2. Map new pages in place of old pages. */
 	for (i = 0; i < (1 << order); i++) {
 		int pfn;
 		uint64_t new_val;
 		pfn = pfn0 + i;
 		PANIC_IF(HYPERVISOR_memory_op(XENMEM_increase_reservation, &reservation) != 1);
 		
 		new_val = mfn << PAGE_SHIFT;
 		PANIC_IF(HYPERVISOR_update_va_mapping((vm_offset_t)addr + (i * PAGE_SIZE), 
 						      new_val, PG_KERNEL));
 		xen_machphys_update(mfn, pfn);
 		PFNTOMFN(pfn) = mfn;
 	}
 
 	xen_tlb_flush();
 
 	balloon_unlock(flags);
 }
 
 extern  vm_offset_t	proc0kstack;
 extern int vm86paddr, vm86phystk;
 char *bootmem_start, *bootmem_current, *bootmem_end;
 
 pteinfo_t *pteinfo_list;
 void initvalues(start_info_t *startinfo);
 
 struct xenstore_domain_interface;
 extern struct xenstore_domain_interface *xen_store;
 
 char *console_page;
 
 void *
 bootmem_alloc(unsigned int size) 
 {
 	char *retptr;
 	
 	retptr = bootmem_current;
 	PANIC_IF(retptr + size > bootmem_end);
 	bootmem_current += size;
 
 	return retptr;
 }
 
 void 
 bootmem_free(void *ptr, unsigned int size) 
 {
 	char *tptr;
 	
 	tptr = ptr;
 	PANIC_IF(tptr != bootmem_current - size ||
 		bootmem_current - size < bootmem_start);	
 
 	bootmem_current -= size;
 }
 
 #if 0
 static vm_paddr_t
 xpmap_mtop2(vm_paddr_t mpa)
 {
         return ((machine_to_phys_mapping[mpa >> PAGE_SHIFT] << PAGE_SHIFT)
             ) | (mpa & ~PG_FRAME);
 }
 
 static pd_entry_t 
 xpmap_get_bootpde(vm_paddr_t va)
 {
 
         return ((pd_entry_t *)xen_start_info->pt_base)[va >> 22];
 }
 
 static pd_entry_t
 xpmap_get_vbootpde(vm_paddr_t va)
 {
         pd_entry_t pde;
 
         pde = xpmap_get_bootpde(va);
         if ((pde & PG_V) == 0)
                 return (pde & ~PG_FRAME);
         return (pde & ~PG_FRAME) |
                 (xpmap_mtop2(pde & PG_FRAME) + KERNBASE);
 }
 
 static pt_entry_t 8*
 xpmap_get_bootptep(vm_paddr_t va)
 {
         pd_entry_t pde;
 
         pde = xpmap_get_vbootpde(va);
         if ((pde & PG_V) == 0)
                 return (void *)-1;
 #define PT_MASK         0x003ff000      /* page table address bits */
         return &(((pt_entry_t *)(pde & PG_FRAME))[(va & PT_MASK) >> PAGE_SHIFT]);
 }
 
 static pt_entry_t
 xpmap_get_bootpte(vm_paddr_t va)
 {
 
         return xpmap_get_bootptep(va)[0];
 }
 #endif
 
 
 #ifdef ADD_ISA_HOLE
 static void
 shift_phys_machine(unsigned long *phys_machine, int nr_pages)
 {
 
         unsigned long *tmp_page, *current_page, *next_page;
 	int i;
 
 	tmp_page = bootmem_alloc(PAGE_SIZE);
 	current_page = phys_machine + nr_pages - (PAGE_SIZE/sizeof(unsigned long));  
 	next_page = current_page - (PAGE_SIZE/sizeof(unsigned long));  
 	bcopy(phys_machine, tmp_page, PAGE_SIZE);
 
 	while (current_page > phys_machine) { 
 	        /*  save next page */
 	        bcopy(next_page, tmp_page, PAGE_SIZE);
 	        /* shift down page */
 		bcopy(current_page, next_page, PAGE_SIZE);
 	        /*  finish swap */
 	        bcopy(tmp_page, current_page, PAGE_SIZE);
 	  
 		current_page -= (PAGE_SIZE/sizeof(unsigned long));
 		next_page -= (PAGE_SIZE/sizeof(unsigned long));
 	}
 	bootmem_free(tmp_page, PAGE_SIZE);	
 	
 	for (i = 0; i < nr_pages; i++) {
 	        xen_machphys_update(phys_machine[i], i);
 	}
 	memset(phys_machine, INVALID_P2M_ENTRY, PAGE_SIZE);
 
 }
 #endif /* ADD_ISA_HOLE */
 
 /*
  * Build a directory of the pages that make up our Physical to Machine
  * mapping table. The Xen suspend/restore code uses this to find our
  * mapping table.
  */
 static void
 init_frame_list_list(void *arg)
 {
 	unsigned long nr_pages = xen_start_info->nr_pages;
 #define FPP	(PAGE_SIZE/sizeof(xen_pfn_t))
 	int i, j, k;
 
 	xen_pfn_to_mfn_frame_list_list = malloc(PAGE_SIZE, M_DEVBUF, M_WAITOK);
 	for (i = 0, j = 0, k = -1; i < nr_pages;
 	     i += FPP, j++) {
 		if ((j & (FPP - 1)) == 0) {
 			k++;
 			xen_pfn_to_mfn_frame_list[k] =
 				malloc(PAGE_SIZE, M_DEVBUF, M_WAITOK);
 			xen_pfn_to_mfn_frame_list_list[k] =
 				VTOMFN(xen_pfn_to_mfn_frame_list[k]);
 			j = 0;
 		}
 		xen_pfn_to_mfn_frame_list[k][j] = 
 			VTOMFN(&xen_phys_machine[i]);
 	}
 
 	HYPERVISOR_shared_info->arch.max_pfn = nr_pages;
 	HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list
 		= VTOMFN(xen_pfn_to_mfn_frame_list_list);
 }	
 SYSINIT(init_fll, SI_SUB_DEVFS, SI_ORDER_ANY, init_frame_list_list, NULL);
 
 extern unsigned long physfree;
 
 int pdir, curoffset;
 extern int nkpt;
 
 extern uint32_t kernbase;
 
 void
 initvalues(start_info_t *startinfo)
 { 
 	vm_offset_t cur_space, cur_space_pt;
 	struct physdev_set_iopl set_iopl;
 	
 	int l3_pages, l2_pages, l1_pages, offset;
 	vm_paddr_t console_page_ma, xen_store_ma;
 	vm_offset_t tmpva;
 	vm_paddr_t shinfo;
 #ifdef PAE
 	vm_paddr_t IdlePDPTma, IdlePDPTnewma;
 	vm_paddr_t IdlePTDnewma[4];
 	pd_entry_t *IdlePDPTnew, *IdlePTDnew;
 	vm_paddr_t IdlePTDma[4];
 #else
 	vm_paddr_t IdlePTDma[1];
 #endif
 	unsigned long i;
 	int ncpus = MAXCPU;
 
 	nkpt = min(
 		min(
 			max((startinfo->nr_pages >> NPGPTD_SHIFT), nkpt),
 		    NPGPTD*NPDEPG - KPTDI),
 		    (HYPERVISOR_VIRT_START - KERNBASE) >> PDRSHIFT);
 
 	HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_4gb_segments);	
 #ifdef notyet
 	/*
 	 * need to install handler
 	 */
 	HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_4gb_segments_notify);	
 #endif	
 	xen_start_info = startinfo;
 	xen_phys_machine = (xen_pfn_t *)startinfo->mfn_list;
 
 	IdlePTD = (pd_entry_t *)((uint8_t *)startinfo->pt_base + PAGE_SIZE);
 	l1_pages = 0;
 	
 #ifdef PAE
 	l3_pages = 1;
 	l2_pages = 0;
 	IdlePDPT = (pd_entry_t *)startinfo->pt_base;
 	IdlePDPTma = VTOM(startinfo->pt_base);
 	for (i = (KERNBASE >> 30);
 	     (i < 4) && (IdlePDPT[i] != 0); i++)
 			l2_pages++;
 	/*
 	 * Note that only one page directory has been allocated at this point.
 	 * Thus, if KERNBASE
 	 */
 	for (i = 0; i < l2_pages; i++)
 		IdlePTDma[i] = VTOM(IdlePTD + i*PAGE_SIZE);
 
 	l2_pages = (l2_pages == 0) ? 1 : l2_pages;
 #else	
 	l3_pages = 0;
 	l2_pages = 1;
 #endif
 	for (i = (((KERNBASE>>18) & PAGE_MASK)>>PAGE_SHIFT);
 	     (i<l2_pages*NPDEPG) && (i<(VM_MAX_KERNEL_ADDRESS>>PDRSHIFT)); i++) {
 		
 		if (IdlePTD[i] == 0)
 			break;
 		l1_pages++;
 	}
 
 	/* number of pages allocated after the pts + 1*/;
 	cur_space = xen_start_info->pt_base +
 	    (l3_pages + l2_pages + l1_pages + 1)*PAGE_SIZE;
 
-	printk("initvalues(): wooh - availmem=%x,%x\n", avail_space, cur_space);
+	xc_printf("initvalues(): wooh - availmem=%x,%x\n", avail_space,
+	    cur_space);
 
-	printk("KERNBASE=%x,pt_base=%x, VTOPFN(base)=%x, nr_pt_frames=%x\n",
+	xc_printf("KERNBASE=%x,pt_base=%x, VTOPFN(base)=%x, nr_pt_frames=%x\n",
 	    KERNBASE,xen_start_info->pt_base, VTOPFN(xen_start_info->pt_base),
 	    xen_start_info->nr_pt_frames);
 	xendebug_flags = 0; /* 0xffffffff; */
 
 #ifdef ADD_ISA_HOLE
 	shift_phys_machine(xen_phys_machine, xen_start_info->nr_pages);
 #endif
 	XENPRINTF("IdlePTD %p\n", IdlePTD);
 	XENPRINTF("nr_pages: %ld shared_info: 0x%lx flags: 0x%lx pt_base: 0x%lx "
 		  "mod_start: 0x%lx mod_len: 0x%lx\n",
 		  xen_start_info->nr_pages, xen_start_info->shared_info, 
 		  xen_start_info->flags, xen_start_info->pt_base, 
 		  xen_start_info->mod_start, xen_start_info->mod_len);
 
 #ifdef PAE
 	IdlePDPTnew = (pd_entry_t *)cur_space; cur_space += PAGE_SIZE;
 	bzero(IdlePDPTnew, PAGE_SIZE);
 
 	IdlePDPTnewma =  VTOM(IdlePDPTnew);
 	IdlePTDnew = (pd_entry_t *)cur_space; cur_space += 4*PAGE_SIZE;
 	bzero(IdlePTDnew, 4*PAGE_SIZE);
 
 	for (i = 0; i < 4; i++) 
 		IdlePTDnewma[i] = VTOM((uint8_t *)IdlePTDnew + i*PAGE_SIZE);
 	/*
 	 * L3
 	 *
 	 * Copy the 4 machine addresses of the new PTDs in to the PDPT
 	 * 
 	 */
 	for (i = 0; i < 4; i++)
 		IdlePDPTnew[i] = IdlePTDnewma[i] | PG_V;
 
 	__asm__("nop;");
 	/*
 	 *
 	 * re-map the new PDPT read-only
 	 */
 	PT_SET_MA(IdlePDPTnew, IdlePDPTnewma | PG_V);
 	/*
 	 * 
 	 * Unpin the current PDPT
 	 */
 	xen_pt_unpin(IdlePDPTma);
 
 #endif  /* PAE */
 
 	/* Map proc0's KSTACK */
 	proc0kstack = cur_space; cur_space += (KSTACK_PAGES * PAGE_SIZE);
-	printk("proc0kstack=%u\n", proc0kstack);
+	xc_printf("proc0kstack=%u\n", proc0kstack);
 
 	/* vm86/bios stack */
 	cur_space += PAGE_SIZE;
 
 	/* Map space for the vm86 region */
 	vm86paddr = (vm_offset_t)cur_space;
 	cur_space += (PAGE_SIZE * 3);
 
 	/* allocate 4 pages for bootmem allocator */
 	bootmem_start = bootmem_current = (char *)cur_space;
 	cur_space += (4 * PAGE_SIZE);
 	bootmem_end = (char *)cur_space;
 	
 	/* allocate pages for gdt */
 	gdt = (union descriptor *)cur_space;
 	cur_space += PAGE_SIZE*ncpus;
 
         /* allocate page for ldt */
 	ldt = (union descriptor *)cur_space; cur_space += PAGE_SIZE;
 	cur_space += PAGE_SIZE;
 	
 	/* unmap remaining pages from initial chunk
 	 *
 	 */
 	for (tmpva = cur_space; tmpva < (((uint32_t)&kernbase) + (l1_pages<<PDRSHIFT));
 	     tmpva += PAGE_SIZE) {
 		bzero((char *)tmpva, PAGE_SIZE);
 		PT_SET_MA(tmpva, (vm_paddr_t)0);
 	}
 
 	PT_UPDATES_FLUSH();
 
 	memcpy(((uint8_t *)IdlePTDnew) + ((unsigned int)(KERNBASE >> 18)),
 	    ((uint8_t *)IdlePTD) + ((KERNBASE >> 18) & PAGE_MASK),
 	    l1_pages*sizeof(pt_entry_t));
 
 	for (i = 0; i < 4; i++) {
 		PT_SET_MA((uint8_t *)IdlePTDnew + i*PAGE_SIZE,
 		    IdlePTDnewma[i] | PG_V);
 	}
 	xen_load_cr3(VTOP(IdlePDPTnew));
 	xen_pgdpt_pin(VTOM(IdlePDPTnew));
 
 	/* allocate remainder of nkpt pages */
 	cur_space_pt = cur_space;
 	for (offset = (KERNBASE >> PDRSHIFT), i = l1_pages; i < nkpt;
 	     i++, cur_space += PAGE_SIZE) {
 		pdir = (offset + i) / NPDEPG;
 		curoffset = ((offset + i) % NPDEPG);
 		if (((offset + i) << PDRSHIFT) == VM_MAX_KERNEL_ADDRESS)
 			break;
 
 		/*
 		 * make sure that all the initial page table pages
 		 * have been zeroed
 		 */
 		PT_SET_MA(cur_space, VTOM(cur_space) | PG_V | PG_RW);
 		bzero((char *)cur_space, PAGE_SIZE);
 		PT_SET_MA(cur_space, (vm_paddr_t)0);
 		xen_pt_pin(VTOM(cur_space));
 		xen_queue_pt_update((vm_paddr_t)(IdlePTDnewma[pdir] +
 			curoffset*sizeof(vm_paddr_t)), 
 		    VTOM(cur_space) | PG_KERNEL);
 		PT_UPDATES_FLUSH();
 	}
 	
 	for (i = 0; i < 4; i++) {
 		pdir = (PTDPTDI + i) / NPDEPG;
 		curoffset = (PTDPTDI + i) % NPDEPG;
 
 		xen_queue_pt_update((vm_paddr_t)(IdlePTDnewma[pdir] +
 			curoffset*sizeof(vm_paddr_t)), 
 		    IdlePTDnewma[i] | PG_V);
 	}
 
 	PT_UPDATES_FLUSH();
 	
 	IdlePTD = IdlePTDnew;
 	IdlePDPT = IdlePDPTnew;
 	IdlePDPTma = IdlePDPTnewma;
 	
 	HYPERVISOR_shared_info = (shared_info_t *)cur_space;
 	cur_space += PAGE_SIZE;
 
 	xen_store = (struct xenstore_domain_interface *)cur_space;
 	cur_space += PAGE_SIZE;
 
 	console_page = (char *)cur_space;
 	cur_space += PAGE_SIZE;
 	
 	/*
 	 * shared_info is an unsigned long so this will randomly break if
 	 * it is allocated above 4GB - I guess people are used to that
 	 * sort of thing with Xen ... sigh
 	 */
 	shinfo = xen_start_info->shared_info;
 	PT_SET_MA(HYPERVISOR_shared_info, shinfo | PG_KERNEL);
 	
-	printk("#4\n");
+	xc_printf("#4\n");
 
 	xen_store_ma = (((vm_paddr_t)xen_start_info->store_mfn) << PAGE_SHIFT);
 	PT_SET_MA(xen_store, xen_store_ma | PG_KERNEL);
 	console_page_ma = (((vm_paddr_t)xen_start_info->console.domU.mfn) << PAGE_SHIFT);
 	PT_SET_MA(console_page, console_page_ma | PG_KERNEL);
 
-	printk("#5\n");
+	xc_printf("#5\n");
 
 	set_iopl.iopl = 1;
 	PANIC_IF(HYPERVISOR_physdev_op(PHYSDEVOP_SET_IOPL, &set_iopl));
-	printk("#6\n");
+	xc_printf("#6\n");
 #if 0
 	/* add page table for KERNBASE */
 	xen_queue_pt_update(IdlePTDma + KPTDI*sizeof(vm_paddr_t), 
 			    VTOM(cur_space) | PG_KERNEL);
 	xen_flush_queue();
 #ifdef PAE	
 	xen_queue_pt_update(pdir_shadow_ma[3] + KPTDI*sizeof(vm_paddr_t), 
 			    VTOM(cur_space) | PG_V | PG_A);
 #else
 	xen_queue_pt_update(pdir_shadow_ma + KPTDI*sizeof(vm_paddr_t), 
 			    VTOM(cur_space) | PG_V | PG_A);
 #endif	
 	xen_flush_queue();
 	cur_space += PAGE_SIZE;
-	printk("#6\n");
+	xc_printf("#6\n");
 #endif /* 0 */	
 #ifdef notyet
 	if (xen_start_info->flags & SIF_INITDOMAIN) {
 		/* Map first megabyte */
 		for (i = 0; i < (256 << PAGE_SHIFT); i += PAGE_SIZE) 
 			PT_SET_MA(KERNBASE + i, i | PG_KERNEL | PG_NC_PCD);
 		xen_flush_queue();
 	}
 #endif
 	/*
 	 * re-map kernel text read-only
 	 *
 	 */
 	for (i = (((vm_offset_t)&btext) & ~PAGE_MASK);
 	     i < (((vm_offset_t)&etext) & ~PAGE_MASK); i += PAGE_SIZE)
 		PT_SET_MA(i, VTOM(i) | PG_V | PG_A);
 	
-	printk("#7\n");
+	xc_printf("#7\n");
 	physfree = VTOP(cur_space);
 	init_first = physfree >> PAGE_SHIFT;
 	IdlePTD = (pd_entry_t *)VTOP(IdlePTD);
 	IdlePDPT = (pd_entry_t *)VTOP(IdlePDPT);
 	setup_xen_features();
-	printk("#8, proc0kstack=%u\n", proc0kstack);
+	xc_printf("#8, proc0kstack=%u\n", proc0kstack);
 }
 
 
 trap_info_t trap_table[] = {
 	{ 0,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(div)},
 	{ 1,   0|4, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(dbg)},
 	{ 3,   3|4, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(bpt)},
 	{ 4,   3, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(ofl)},
 	/* This is UPL on Linux and KPL on BSD */
 	{ 5,   3, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(bnd)},
 	{ 6,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(ill)},
 	{ 7,   0|4, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(dna)},
 	/*
 	 * { 8,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(XXX)},
 	 *   no handler for double fault
 	 */
 	{ 9,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(fpusegm)},
 	{10,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(tss)},
 	{11,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(missing)},
 	{12,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(stk)},
 	{13,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(prot)},
 	{14,   0|4, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(page)},
 	{15,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(rsvd)},
 	{16,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(fpu)},
 	{17,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(align)},
 	{18,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(mchk)},
 	{19,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(xmm)},
 	{0x80, 3, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(int0x80_syscall)},
 	{  0, 0,           0, 0 }
 };
 
 /* Perform a multicall and check that individual calls succeeded. */
 int
 HYPERVISOR_multicall(struct multicall_entry * call_list, int nr_calls)
 {
 	int ret = 0;
 	int i;
 
 	/* Perform the multicall. */
 	PANIC_IF(_HYPERVISOR_multicall(call_list, nr_calls));
 
 	/* Check the results of individual hypercalls. */
 	for (i = 0; i < nr_calls; i++)
-		if (unlikely(call_list[i].result < 0))
+		if (__predict_false(call_list[i].result < 0))
 			ret++;
-	if (unlikely(ret > 0))
+	if (__predict_false(ret > 0))
 		panic("%d multicall(s) failed: cpu %d\n",
 		    ret, smp_processor_id());
 
 	/* If we didn't panic already, everything succeeded. */
 	return (0);
 }
 
 /********** CODE WORTH KEEPING ABOVE HERE *****************/ 
 
 void xen_failsafe_handler(void);
 
 void
 xen_failsafe_handler(void)
 {
 
 	panic("xen_failsafe_handler called!\n");
 }
 
 void xen_handle_thread_switch(struct pcb *pcb);
 
 /* This is called by cpu_switch() when switching threads. */
 /* The pcb arg refers to the process control block of the */
 /* next thread which is to run */
 void
 xen_handle_thread_switch(struct pcb *pcb)
 {
     uint32_t *a = (uint32_t *)&PCPU_GET(fsgs_gdt)[0];
     uint32_t *b = (uint32_t *)&pcb->pcb_fsd;
     multicall_entry_t mcl[3];
     int i = 0;
 
     /* Notify Xen of task switch */
     mcl[i].op = __HYPERVISOR_stack_switch;
     mcl[i].args[0] = GSEL(GDATA_SEL, SEL_KPL);
     mcl[i++].args[1] = (unsigned long)pcb;
 
     /* Check for update of fsd */
     if (*a != *b || *(a+1) != *(b+1)) {
         mcl[i].op = __HYPERVISOR_update_descriptor;
         *(uint64_t *)&mcl[i].args[0] = vtomach((vm_offset_t)a);
         *(uint64_t *)&mcl[i++].args[2] = *(uint64_t *)b;
     }    
 
     a += 2;
     b += 2;
 
     /* Check for update of gsd */
     if (*a != *b || *(a+1) != *(b+1)) {
         mcl[i].op = __HYPERVISOR_update_descriptor;
         *(uint64_t *)&mcl[i].args[0] = vtomach((vm_offset_t)a);
         *(uint64_t *)&mcl[i++].args[2] = *(uint64_t *)b;
     }    
 
     (void)HYPERVISOR_multicall(mcl, i);
 }
diff --git a/sys/i386/xen/xen_rtc.c b/sys/i386/xen/xen_rtc.c
index 8e1e0175959f..8dc3ecbe96fe 100644
--- a/sys/i386/xen/xen_rtc.c
+++ b/sys/i386/xen/xen_rtc.c
@@ -1,144 +1,146 @@
 /*-
  * Copyright (c) 2009 Adrian Chadd
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/bus.h>
 #include <sys/clock.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/kernel.h>
 #include <sys/module.h>
 #include <sys/time.h>
 
+#include <xen/xen-os.h>
 #include <xen/xen_intr.h>
+#include <xen/hypervisor.h>
+#include <xen/interface/io/xenbus.h>
+#include <xen/interface/vcpu.h>
+
 #include <vm/vm.h>
 #include <vm/pmap.h>
+
 #include <machine/pmap.h>
-#include <xen/hypervisor.h>
-#include <machine/xen/xen-os.h>
 #include <machine/xen/xenfunc.h>
-#include <xen/interface/io/xenbus.h>
-#include <xen/interface/vcpu.h>
 #include <machine/cpu.h>
 
 #include <machine/xen/xen_clock_util.h>
 
 #include "clock_if.h"
 
 static int
 xen_rtc_probe(device_t dev)
 {
 	device_set_desc(dev, "Xen Hypervisor Clock");
 	printf("[XEN] xen_rtc_probe: probing Hypervisor RTC clock\n");
 	if (! HYPERVISOR_shared_info) {
 		device_printf(dev, "No hypervisor shared page found; RTC can not start.\n");
 		return (EINVAL);
 	}
 	return (0);
 }
 
 static int
 xen_rtc_attach(device_t dev)
 {
 	printf("[XEN] xen_rtc_attach: attaching Hypervisor RTC clock\n");
 	clock_register(dev, 1000000);
 	return(0);
 }
 
 static int
 xen_rtc_settime(device_t dev __unused, struct timespec *ts)
 {
 	device_printf(dev, "[XEN] xen_rtc_settime\n");
 	/*
 	 * Don't return EINVAL here; just silently fail if the domain isn't privileged enough
 	 * to set the TOD.
 	 */
 	return(0);
 }
 
 /*
  * The Xen time structures document the hypervisor start time and the
  * uptime-since-hypervisor-start (in nsec.) They need to be combined
  * in order to calculate a TOD clock.
  */
 static int
 xen_rtc_gettime(device_t dev, struct timespec *ts)
 {
 	struct timespec w_ts, u_ts;
 
 	device_printf(dev, "[XEN] xen_rtc_gettime\n");
 	xen_fetch_wallclock(&w_ts);
 	device_printf(dev, "[XEN] xen_rtc_gettime: wallclock %ld sec; %ld nsec\n", (long int) w_ts.tv_sec, (long int) w_ts.tv_nsec);
 	xen_fetch_uptime(&u_ts);
 	device_printf(dev, "[XEN] xen_rtc_gettime: uptime %ld sec; %ld nsec\n", (long int) u_ts.tv_sec, (long int) u_ts.tv_nsec);
 
 	timespecclear(ts);
 	timespecadd(ts, &w_ts);
 	timespecadd(ts, &u_ts);
 
 	device_printf(dev, "[XEN] xen_rtc_gettime: TOD %ld sec; %ld nsec\n", (long int) ts->tv_sec, (long int) ts->tv_nsec);
 
 	return(0);
 }
 
 static void
 xen_rtc_identify(driver_t *drv, device_t parent)
 {
         BUS_ADD_CHILD(parent, 0, "rtc", 0);
 }
 
 static device_method_t xen_rtc_methods[] = {
 	/* Device interface */
 	DEVMETHOD(device_probe,		xen_rtc_probe),
 	DEVMETHOD(device_attach,	xen_rtc_attach),
 	DEVMETHOD(device_identify,	xen_rtc_identify),
 
 	DEVMETHOD(device_detach,	bus_generic_detach),
 	DEVMETHOD(device_shutdown,	bus_generic_shutdown),
 
 	/* clock interface */
 	DEVMETHOD(clock_gettime,	xen_rtc_gettime),
 	DEVMETHOD(clock_settime,	xen_rtc_settime),
 
 	{ 0, 0 }
 };
 
 
 static driver_t xen_rtc_driver = {
 	"rtc",
 	xen_rtc_methods,
 	0
 };
 
 static devclass_t xen_rtc_devclass;
 
 DRIVER_MODULE(rtc, nexus, xen_rtc_driver, xen_rtc_devclass, 0, 0);
diff --git a/sys/sys/kernel.h b/sys/sys/kernel.h
index 687b42caf09c..68e0858ac611 100644
--- a/sys/sys/kernel.h
+++ b/sys/sys/kernel.h
@@ -1,375 +1,380 @@
 /*-
  * Copyright (c) 1995 Terrence R. Lambert
  * All rights reserved.
  *
  * Copyright (c) 1990, 1993
  *	The Regents of the University of California.  All rights reserved.
  * (c) UNIX System Laboratories, Inc.
  * All or some portions of this file are derived from material licensed
  * to the University of California by American Telephone and Telegraph
  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
  * the permission of UNIX System Laboratories, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)kernel.h	8.3 (Berkeley) 1/21/94
  * $FreeBSD$
  */
 
 #ifndef _SYS_KERNEL_H_
 #define	_SYS_KERNEL_H_
 
 #include <sys/linker_set.h>
 
 #ifdef _KERNEL
 
 /* for intrhook below */
 #include <sys/queue.h>
 
 /* Global variables for the kernel. */
 
 /* 1.1 */
 extern char kernelname[MAXPATHLEN];
 
 extern int tick;			/* usec per tick (1000000 / hz) */
 extern int hz;				/* system clock's frequency */
 extern int psratio;			/* ratio: prof / stat */
 extern int stathz;			/* statistics clock's frequency */
 extern int profhz;			/* profiling clock's frequency */
 extern int profprocs;			/* number of process's profiling */
 extern volatile int ticks;
 
 #endif /* _KERNEL */
 
 /*
  * Enumerated types for known system startup interfaces.
  *
  * Startup occurs in ascending numeric order; the list entries are
  * sorted prior to attempting startup to guarantee order.  Items
  * of the same level are arbitrated for order based on the 'order'
  * element.
  *
  * These numbers are arbitrary and are chosen ONLY for ordering; the
  * enumeration values are explicit rather than implicit to provide
  * for binary compatibility with inserted elements.
  *
  * The SI_SUB_LAST value must have the highest lexical value.
  *
  * The SI_SUB_SWAP values represent a value used by
  * the BSD 4.4Lite but not by FreeBSD; it is maintained in dependent
  * order to support porting.
  */
 enum sysinit_sub_id {
 	SI_SUB_DUMMY		= 0x0000000,	/* not executed; for linker*/
 	SI_SUB_DONE		= 0x0000001,	/* processed*/
 	SI_SUB_TUNABLES		= 0x0700000,	/* establish tunable values */
 	SI_SUB_COPYRIGHT	= 0x0800001,	/* first use of console*/
 	SI_SUB_SETTINGS		= 0x0880000,	/* check and recheck settings */
 	SI_SUB_MTX_POOL_STATIC	= 0x0900000,	/* static mutex pool */
 	SI_SUB_LOCKMGR		= 0x0980000,	/* lockmgr locks */
 	SI_SUB_VM		= 0x1000000,	/* virtual memory system init*/
 	SI_SUB_KMEM		= 0x1800000,	/* kernel memory*/
 	SI_SUB_KVM_RSRC		= 0x1A00000,	/* kvm operational limits*/
+	SI_SUB_HYPERVISOR	= 0x1A40000,	/*
+						 * Hypervisor detection and
+						 * virtualization support 
+						 * setup.
+						 */
 	SI_SUB_WITNESS		= 0x1A80000,	/* witness initialization */
 	SI_SUB_MTX_POOL_DYNAMIC	= 0x1AC0000,	/* dynamic mutex pool */
 	SI_SUB_LOCK		= 0x1B00000,	/* various locks */
 	SI_SUB_EVENTHANDLER	= 0x1C00000,	/* eventhandler init */
 	SI_SUB_VNET_PRELINK	= 0x1E00000,	/* vnet init before modules */
 	SI_SUB_KLD		= 0x2000000,	/* KLD and module setup */
 	SI_SUB_CPU		= 0x2100000,	/* CPU resource(s)*/
 	SI_SUB_RACCT		= 0x2110000,	/* resource accounting */
 	SI_SUB_RANDOM		= 0x2120000,	/* random number generator */
 	SI_SUB_KDTRACE		= 0x2140000,	/* Kernel dtrace hooks */
 	SI_SUB_MAC		= 0x2180000,	/* TrustedBSD MAC subsystem */
 	SI_SUB_MAC_POLICY	= 0x21C0000,	/* TrustedBSD MAC policies */
 	SI_SUB_MAC_LATE		= 0x21D0000,	/* TrustedBSD MAC subsystem */
 	SI_SUB_VNET		= 0x21E0000,	/* vnet 0 */
 	SI_SUB_INTRINSIC	= 0x2200000,	/* proc 0*/
 	SI_SUB_VM_CONF		= 0x2300000,	/* config VM, set limits*/
 	SI_SUB_DDB_SERVICES	= 0x2380000,	/* capture, scripting, etc. */
 	SI_SUB_RUN_QUEUE	= 0x2400000,	/* set up run queue*/
 	SI_SUB_KTRACE		= 0x2480000,	/* ktrace */
 	SI_SUB_OPENSOLARIS	= 0x2490000,	/* OpenSolaris compatibility */
 	SI_SUB_CYCLIC		= 0x24A0000,	/* Cyclic timers */
 	SI_SUB_AUDIT		= 0x24C0000,	/* audit */
 	SI_SUB_CREATE_INIT	= 0x2500000,	/* create init process*/
 	SI_SUB_SCHED_IDLE	= 0x2600000,	/* required idle procs */
 	SI_SUB_MBUF		= 0x2700000,	/* mbuf subsystem */
 	SI_SUB_INTR		= 0x2800000,	/* interrupt threads */
 	SI_SUB_SOFTINTR		= 0x2800001,	/* start soft interrupt thread */
 	SI_SUB_ACL		= 0x2900000,	/* start for filesystem ACLs */
 	SI_SUB_DEVFS		= 0x2F00000,	/* devfs ready for devices */
 	SI_SUB_INIT_IF		= 0x3000000,	/* prep for net interfaces */
 	SI_SUB_NETGRAPH		= 0x3010000,	/* Let Netgraph initialize */
 	SI_SUB_DTRACE		= 0x3020000,	/* DTrace subsystem */
 	SI_SUB_DTRACE_PROVIDER	= 0x3048000,	/* DTrace providers */
 	SI_SUB_DTRACE_ANON	= 0x308C000,	/* DTrace anon enabling */
 	SI_SUB_DRIVERS		= 0x3100000,	/* Let Drivers initialize */
 	SI_SUB_CONFIGURE	= 0x3800000,	/* Configure devices */
 	SI_SUB_VFS		= 0x4000000,	/* virtual filesystem*/
 	SI_SUB_CLOCKS		= 0x4800000,	/* real time and stat clocks*/
 	SI_SUB_CLIST		= 0x5800000,	/* clists*/
 	SI_SUB_SYSV_SHM		= 0x6400000,	/* System V shared memory*/
 	SI_SUB_SYSV_SEM		= 0x6800000,	/* System V semaphores*/
 	SI_SUB_SYSV_MSG		= 0x6C00000,	/* System V message queues*/
 	SI_SUB_P1003_1B		= 0x6E00000,	/* P1003.1B realtime */
 	SI_SUB_PSEUDO		= 0x7000000,	/* pseudo devices*/
 	SI_SUB_EXEC		= 0x7400000,	/* execve() handlers */
 	SI_SUB_PROTO_BEGIN	= 0x8000000,	/* VNET initialization */
 	SI_SUB_PROTO_IF		= 0x8400000,	/* interfaces*/
 	SI_SUB_PROTO_DOMAININIT	= 0x8600000,	/* domain registration system */
 	SI_SUB_PROTO_DOMAIN	= 0x8800000,	/* domains (address families?)*/
 	SI_SUB_PROTO_IFATTACHDOMAIN	= 0x8800001,	/* domain dependent data init*/
 	SI_SUB_PROTO_END	= 0x8ffffff,	/* VNET helper functions */
 	SI_SUB_KPROF		= 0x9000000,	/* kernel profiling*/
 	SI_SUB_KICK_SCHEDULER	= 0xa000000,	/* start the timeout events*/
 	SI_SUB_INT_CONFIG_HOOKS	= 0xa800000,	/* Interrupts enabled config */
 	SI_SUB_ROOT_CONF	= 0xb000000,	/* Find root devices */
 	SI_SUB_DUMP_CONF	= 0xb200000,	/* Find dump devices */
 	SI_SUB_RAID		= 0xb380000,	/* Configure GEOM classes */
 	SI_SUB_SWAP		= 0xc000000,	/* swap */
 	SI_SUB_INTRINSIC_POST	= 0xd000000,	/* proc 0 cleanup*/
 	SI_SUB_SYSCALLS		= 0xd800000,	/* register system calls */
 	SI_SUB_VNET_DONE	= 0xdc00000,	/* vnet registration complete */
 	SI_SUB_KTHREAD_INIT	= 0xe000000,	/* init process*/
 	SI_SUB_KTHREAD_PAGE	= 0xe400000,	/* pageout daemon*/
 	SI_SUB_KTHREAD_VM	= 0xe800000,	/* vm daemon*/
 	SI_SUB_KTHREAD_BUF	= 0xea00000,	/* buffer daemon*/
 	SI_SUB_KTHREAD_UPDATE	= 0xec00000,	/* update daemon*/
 	SI_SUB_KTHREAD_IDLE	= 0xee00000,	/* idle procs*/
 	SI_SUB_SMP		= 0xf000000,	/* start the APs*/
 	SI_SUB_RACCTD		= 0xf100000,	/* start raccd*/
 	SI_SUB_LAST		= 0xfffffff	/* final initialization */
 };
 
 
 /*
  * Some enumerated orders; "ANY" sorts last.
  */
 enum sysinit_elem_order {
 	SI_ORDER_FIRST		= 0x0000000,	/* first*/
 	SI_ORDER_SECOND		= 0x0000001,	/* second*/
 	SI_ORDER_THIRD		= 0x0000002,	/* third*/
 	SI_ORDER_FOURTH		= 0x0000003,	/* fourth*/
 	SI_ORDER_MIDDLE		= 0x1000000,	/* somewhere in the middle */
 	SI_ORDER_ANY		= 0xfffffff	/* last*/
 };
 
 
 /*
  * A system initialization call instance
  *
  * At the moment there is one instance of sysinit.  We probably do not
  * want two which is why this code is if'd out, but we definitely want
  * to discern SYSINIT's which take non-constant data pointers and
  * SYSINIT's which take constant data pointers,
  *
  * The C_* macros take functions expecting const void * arguments
  * while the non-C_* macros take functions expecting just void * arguments.
  *
  * With -Wcast-qual on, the compiler issues warnings:
  *	- if we pass non-const data or functions taking non-const data
  *	  to a C_* macro.
  *
  *	- if we pass const data to the normal macros
  *
  * However, no warning is issued if we pass a function taking const data
  * through a normal non-const macro.  This is ok because the function is
  * saying it won't modify the data so we don't care whether the data is
  * modifiable or not.
  */
 
 typedef void (*sysinit_nfunc_t)(void *);
 typedef void (*sysinit_cfunc_t)(const void *);
 
 struct sysinit {
 	enum sysinit_sub_id	subsystem;	/* subsystem identifier*/
 	enum sysinit_elem_order	order;		/* init order within subsystem*/
 	sysinit_cfunc_t func;			/* function		*/
 	const void	*udata;			/* multiplexer/argument */
 };
 
 /*
  * Default: no special processing
  *
  * The C_ version of SYSINIT is for data pointers to const
  * data ( and functions taking data pointers to const data ).
  * At the moment it is no different from SYSINIT and thus
  * still results in warnings.
  *
  * The casts are necessary to have the compiler produce the
  * correct warnings when -Wcast-qual is used.
  *
  */
 #define	C_SYSINIT(uniquifier, subsystem, order, func, ident)	\
 	static struct sysinit uniquifier ## _sys_init = {	\
 		subsystem,					\
 		order,						\
 		func,						\
 		(ident)						\
 	};							\
 	DATA_SET(sysinit_set,uniquifier ## _sys_init)
 
 #define	SYSINIT(uniquifier, subsystem, order, func, ident)	\
 	C_SYSINIT(uniquifier, subsystem, order,			\
 	(sysinit_cfunc_t)(sysinit_nfunc_t)func, (void *)(ident))
 
 /*
  * Called on module unload: no special processing
  */
 #define	C_SYSUNINIT(uniquifier, subsystem, order, func, ident)	\
 	static struct sysinit uniquifier ## _sys_uninit = {	\
 		subsystem,					\
 		order,						\
 		func,						\
 		(ident)						\
 	};							\
 	DATA_SET(sysuninit_set,uniquifier ## _sys_uninit)
 
 #define	SYSUNINIT(uniquifier, subsystem, order, func, ident)	\
 	C_SYSUNINIT(uniquifier, subsystem, order,		\
 	(sysinit_cfunc_t)(sysinit_nfunc_t)func, (void *)(ident))
 
 void	sysinit_add(struct sysinit **set, struct sysinit **set_end);
 
 /*
  * Infrastructure for tunable 'constants'.  Value may be specified at compile
  * time or kernel load time.  Rules relating tunables together can be placed
  * in a SYSINIT function at SI_SUB_TUNABLES with SI_ORDER_ANY.
  *
  * WARNING: developers should never use the reserved suffixes specified in
  * loader.conf(5) for any tunables or conflicts will result.
  */
 
 /*
  * int
  * please avoid using for new tunables!
  */
 extern void tunable_int_init(void *);
 struct tunable_int {
 	const char *path;
 	int *var;
 };
 #define	TUNABLE_INT(path, var)					\
 	static struct tunable_int __CONCAT(__tunable_int_, __LINE__) = { \
 		(path),						\
 		(var),						\
 	};							\
 	SYSINIT(__CONCAT(__Tunable_init_, __LINE__),		\
 	    SI_SUB_TUNABLES, SI_ORDER_MIDDLE, tunable_int_init,	\
 	    &__CONCAT(__tunable_int_, __LINE__))
 
 #define	TUNABLE_INT_FETCH(path, var)	getenv_int((path), (var))
 
 /*
  * long
  */
 extern void tunable_long_init(void *);
 struct tunable_long {
 	const char *path;
 	long *var;
 };
 #define	TUNABLE_LONG(path, var)					\
 	static struct tunable_long __CONCAT(__tunable_long_, __LINE__) = { \
 		(path),						\
 		(var),						\
 	};							\
 	SYSINIT(__CONCAT(__Tunable_init_, __LINE__),		\
 	    SI_SUB_TUNABLES, SI_ORDER_MIDDLE, tunable_long_init,\
 	    &__CONCAT(__tunable_long_, __LINE__))
 
 #define	TUNABLE_LONG_FETCH(path, var)	getenv_long((path), (var))
 
 /*
  * unsigned long
  */
 extern void tunable_ulong_init(void *);
 struct tunable_ulong {
 	const char *path;
 	unsigned long *var;
 };
 #define	TUNABLE_ULONG(path, var)				\
 	static struct tunable_ulong __CONCAT(__tunable_ulong_, __LINE__) = { \
 		(path),						\
 		(var),						\
 	};							\
 	SYSINIT(__CONCAT(__Tunable_init_, __LINE__),		\
 	    SI_SUB_TUNABLES, SI_ORDER_MIDDLE, tunable_ulong_init, \
 	    &__CONCAT(__tunable_ulong_, __LINE__))
 
 #define	TUNABLE_ULONG_FETCH(path, var)	getenv_ulong((path), (var))
 
 /*
  * quad
  */
 extern void tunable_quad_init(void *);
 struct tunable_quad {
 	const char *path;
 	quad_t *var;
 };
 #define	TUNABLE_QUAD(path, var)					\
 	static struct tunable_quad __CONCAT(__tunable_quad_, __LINE__) = { \
 		(path),						\
 		(var),						\
 	};							\
 	SYSINIT(__CONCAT(__Tunable_init_, __LINE__),		\
 	    SI_SUB_TUNABLES, SI_ORDER_MIDDLE, tunable_quad_init, \
 	    &__CONCAT(__tunable_quad_, __LINE__))
 
 #define	TUNABLE_QUAD_FETCH(path, var)	getenv_quad((path), (var))
 
 extern void tunable_str_init(void *);
 struct tunable_str {
 	const char *path;
 	char *var;
 	int size;
 };
 #define	TUNABLE_STR(path, var, size)				\
 	static struct tunable_str __CONCAT(__tunable_str_, __LINE__) = { \
 		(path),						\
 		(var),						\
 		(size),						\
 	};							\
 	SYSINIT(__CONCAT(__Tunable_init_, __LINE__),		\
 	    SI_SUB_TUNABLES, SI_ORDER_MIDDLE, tunable_str_init,	\
 	    &__CONCAT(__tunable_str_, __LINE__))
 
 #define	TUNABLE_STR_FETCH(path, var, size)			\
 	getenv_string((path), (var), (size))
 
 struct intr_config_hook {
 	TAILQ_ENTRY(intr_config_hook) ich_links;
 	void	(*ich_func)(void *arg);
 	void	*ich_arg;
 };
 
 int	config_intrhook_establish(struct intr_config_hook *hook);
 void	config_intrhook_disestablish(struct intr_config_hook *hook);
 
 #endif /* !_SYS_KERNEL_H_*/
diff --git a/sys/x86/include/segments.h b/sys/x86/include/segments.h
index 74066ef48e79..0d6a282c3c8f 100644
--- a/sys/x86/include/segments.h
+++ b/sys/x86/include/segments.h
@@ -1,286 +1,287 @@
 /*-
  * Copyright (c) 1989, 1990 William F. Jolitz
  * Copyright (c) 1990 The Regents of the University of California.
  * All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * William Jolitz.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	from: @(#)segments.h	7.1 (Berkeley) 5/9/91
  * $FreeBSD$
  */
 
 #ifndef _X86_SEGMENTS_H_
 #define	_X86_SEGMENTS_H_
 
 /*
  * X86 Segmentation Data Structures and definitions
  */
 
 /*
  * Selectors
  */
 #define	SEL_RPL_MASK	3		/* requester priv level */
 #define	ISPL(s)		((s)&3)		/* priority level of a selector */
 #ifdef XEN
 #define	SEL_KPL		1		/* kernel priority level */
 #else
 #define	SEL_KPL		0		/* kernel priority level */
 #endif
 #define	SEL_UPL		3		/* user priority level */
 #define	ISLDT(s)	((s)&SEL_LDT)	/* is it local or global */
 #define	SEL_LDT		4		/* local descriptor table */
 #define	IDXSEL(s)	(((s)>>3) & 0x1fff) /* index of selector */
 #define	LSEL(s,r)	(((s)<<3) | SEL_LDT | r) /* a local selector */
 #define	GSEL(s,r)	(((s)<<3) | r)	/* a global selector */
 
 /*
  * User segment descriptors (%cs, %ds etc for i386 apps. 64 bit wide)
  * For long-mode apps, %cs only has the conforming bit in sd_type, the sd_dpl,
  * sd_p, sd_l and sd_def32 which must be zero).  %ds only has sd_p.
  */
 struct segment_descriptor {
 	unsigned sd_lolimit:16;		/* segment extent (lsb) */
 	unsigned sd_lobase:24;		/* segment base address (lsb) */
 	unsigned sd_type:5;		/* segment type */
 	unsigned sd_dpl:2;		/* segment descriptor priority level */
 	unsigned sd_p:1;		/* segment descriptor present */
 	unsigned sd_hilimit:4;		/* segment extent (msb) */
 	unsigned sd_xx:2;		/* unused */
 	unsigned sd_def32:1;		/* default 32 vs 16 bit size */
 	unsigned sd_gran:1;		/* limit granularity (byte/page units)*/
 	unsigned sd_hibase:8;		/* segment base address  (msb) */
 } __packed;
 
 struct user_segment_descriptor {
 	unsigned sd_lolimit:16;		/* segment extent (lsb) */
 	unsigned sd_lobase:24;		/* segment base address (lsb) */
 	unsigned sd_type:5;		/* segment type */
 	unsigned sd_dpl:2;		/* segment descriptor priority level */
 	unsigned sd_p:1;		/* segment descriptor present */
 	unsigned sd_hilimit:4;		/* segment extent (msb) */
 	unsigned sd_xx:1;		/* unused */
 	unsigned sd_long:1;		/* long mode (cs only) */
 	unsigned sd_def32:1;		/* default 32 vs 16 bit size */
 	unsigned sd_gran:1;		/* limit granularity (byte/page units)*/
 	unsigned sd_hibase:8;		/* segment base address  (msb) */
 } __packed;
 
 #define	USD_GETBASE(sd)		(((sd)->sd_lobase) | (sd)->sd_hibase << 24)
 #define	USD_SETBASE(sd, b)	(sd)->sd_lobase = (b);	\
 				(sd)->sd_hibase = ((b) >> 24);
 #define	USD_GETLIMIT(sd)	(((sd)->sd_lolimit) | (sd)->sd_hilimit << 16)
 #define	USD_SETLIMIT(sd, l)	(sd)->sd_lolimit = (l);	\
 				(sd)->sd_hilimit = ((l) >> 16);
 
 #ifdef __i386__
 /*
  * Gate descriptors (e.g. indirect descriptors)
  */
 struct gate_descriptor {
 	unsigned gd_looffset:16;	/* gate offset (lsb) */
 	unsigned gd_selector:16;	/* gate segment selector */
 	unsigned gd_stkcpy:5;		/* number of stack wds to cpy */
 	unsigned gd_xx:3;		/* unused */
 	unsigned gd_type:5;		/* segment type */
 	unsigned gd_dpl:2;		/* segment descriptor priority level */
 	unsigned gd_p:1;		/* segment descriptor present */
 	unsigned gd_hioffset:16;	/* gate offset (msb) */
 } __packed;
 
 /*
  * Generic descriptor
  */
 union descriptor {
 	struct segment_descriptor sd;
 	struct gate_descriptor gd;
 };
 #else
 /*
  * Gate descriptors (e.g. indirect descriptors, trap, interrupt etc. 128 bit)
  * Only interrupt and trap gates have gd_ist.
  */
 struct gate_descriptor {
 	uint64_t gd_looffset:16;	/* gate offset (lsb) */
 	uint64_t gd_selector:16;	/* gate segment selector */
 	uint64_t gd_ist:3;		/* IST table index */
 	uint64_t gd_xx:5;		/* unused */
 	uint64_t gd_type:5;		/* segment type */
 	uint64_t gd_dpl:2;		/* segment descriptor priority level */
 	uint64_t gd_p:1;		/* segment descriptor present */
 	uint64_t gd_hioffset:48;	/* gate offset (msb) */
 	uint64_t sd_xx1:32;
 } __packed;
 
 /*
  * Generic descriptor
  */
 union descriptor {
 	struct user_segment_descriptor sd;
 	struct gate_descriptor gd;
 };
 #endif
 
 	/* system segments and gate types */
 #define	SDT_SYSNULL	 0	/* system null */
 #define	SDT_SYS286TSS	 1	/* system 286 TSS available */
 #define	SDT_SYSLDT	 2	/* system local descriptor table */
 #define	SDT_SYS286BSY	 3	/* system 286 TSS busy */
 #define	SDT_SYS286CGT	 4	/* system 286 call gate */
 #define	SDT_SYSTASKGT	 5	/* system task gate */
 #define	SDT_SYS286IGT	 6	/* system 286 interrupt gate */
 #define	SDT_SYS286TGT	 7	/* system 286 trap gate */
 #define	SDT_SYSNULL2	 8	/* system null again */
 #define	SDT_SYS386TSS	 9	/* system 386 TSS available */
 #define	SDT_SYSTSS	 9	/* system available 64 bit TSS */
 #define	SDT_SYSNULL3	10	/* system null again */
 #define	SDT_SYS386BSY	11	/* system 386 TSS busy */
 #define	SDT_SYSBSY	11	/* system busy 64 bit TSS */
 #define	SDT_SYS386CGT	12	/* system 386 call gate */
 #define	SDT_SYSCGT	12	/* system 64 bit call gate */
 #define	SDT_SYSNULL4	13	/* system null again */
 #define	SDT_SYS386IGT	14	/* system 386 interrupt gate */
 #define	SDT_SYSIGT	14	/* system 64 bit interrupt gate */
 #define	SDT_SYS386TGT	15	/* system 386 trap gate */
 #define	SDT_SYSTGT	15	/* system 64 bit trap gate */
 
 	/* memory segment types */
 #define	SDT_MEMRO	16	/* memory read only */
 #define	SDT_MEMROA	17	/* memory read only accessed */
 #define	SDT_MEMRW	18	/* memory read write */
 #define	SDT_MEMRWA	19	/* memory read write accessed */
 #define	SDT_MEMROD	20	/* memory read only expand dwn limit */
 #define	SDT_MEMRODA	21	/* memory read only expand dwn limit accessed */
 #define	SDT_MEMRWD	22	/* memory read write expand dwn limit */
 #define	SDT_MEMRWDA	23	/* memory read write expand dwn limit accessed*/
 #define	SDT_MEME	24	/* memory execute only */
 #define	SDT_MEMEA	25	/* memory execute only accessed */
 #define	SDT_MEMER	26	/* memory execute read */
 #define	SDT_MEMERA	27	/* memory execute read accessed */
 #define	SDT_MEMEC	28	/* memory execute only conforming */
 #define	SDT_MEMEAC	29	/* memory execute only accessed conforming */
 #define	SDT_MEMERC	30	/* memory execute read conforming */
 #define	SDT_MEMERAC	31	/* memory execute read accessed conforming */
 
 /*
  * Size of IDT table
  */
 #define	NIDT		256	/* 32 reserved, 0x80 syscall, most are h/w */
 #define	NRSVIDT		32	/* reserved entries for cpu exceptions */
 
 /*
  * Entries in the Interrupt Descriptor Table (IDT)
  */
 #define	IDT_DE		0	/* #DE: Divide Error */
 #define	IDT_DB		1	/* #DB: Debug */
 #define	IDT_NMI		2	/* Nonmaskable External Interrupt */
 #define	IDT_BP		3	/* #BP: Breakpoint */
 #define	IDT_OF		4	/* #OF: Overflow */
 #define	IDT_BR		5	/* #BR: Bound Range Exceeded */
 #define	IDT_UD		6	/* #UD: Undefined/Invalid Opcode */
 #define	IDT_NM		7	/* #NM: No Math Coprocessor */
 #define	IDT_DF		8	/* #DF: Double Fault */
 #define	IDT_FPUGP	9	/* Coprocessor Segment Overrun */
 #define	IDT_TS		10	/* #TS: Invalid TSS */
 #define	IDT_NP		11	/* #NP: Segment Not Present */
 #define	IDT_SS		12	/* #SS: Stack Segment Fault */
 #define	IDT_GP		13	/* #GP: General Protection Fault */
 #define	IDT_PF		14	/* #PF: Page Fault */
 #define	IDT_MF		16	/* #MF: FPU Floating-Point Error */
 #define	IDT_AC		17	/* #AC: Alignment Check */
 #define	IDT_MC		18	/* #MC: Machine Check */
 #define	IDT_XF		19	/* #XF: SIMD Floating-Point Exception */
 #define	IDT_IO_INTS	NRSVIDT	/* Base of IDT entries for I/O interrupts. */
 #define	IDT_SYSCALL	0x80	/* System Call Interrupt Vector */
 #define	IDT_DTRACE_RET	0x92	/* DTrace pid provider Interrupt Vector */
+#define	IDT_EVTCHN	0x93	/* Xen HVM Event Channel Interrupt Vector */
 
 #if defined(__i386__) || defined(__ia64__)
 /*
  * Entries in the Global Descriptor Table (GDT)
  * Note that each 4 entries share a single 32 byte L1 cache line.
  * Some of the fast syscall instructions require a specific order here.
  */
 #define	GNULL_SEL	0	/* Null Descriptor */
 #define	GPRIV_SEL	1	/* SMP Per-Processor Private Data */
 #define	GUFS_SEL	2	/* User %fs Descriptor (order critical: 1) */
 #define	GUGS_SEL	3	/* User %gs Descriptor (order critical: 2) */
 #define	GCODE_SEL	4	/* Kernel Code Descriptor (order critical: 1) */
 #define	GDATA_SEL	5	/* Kernel Data Descriptor (order critical: 2) */
 #define	GUCODE_SEL	6	/* User Code Descriptor (order critical: 3) */
 #define	GUDATA_SEL	7	/* User Data Descriptor (order critical: 4) */
 #define	GBIOSLOWMEM_SEL	8	/* BIOS low memory access (must be entry 8) */
 #define	GPROC0_SEL	9	/* Task state process slot zero and up */
 #define	GLDT_SEL	10	/* Default User LDT */
 #define	GUSERLDT_SEL	11	/* User LDT */
 #define	GPANIC_SEL	12	/* Task state to consider panic from */
 #define	GBIOSCODE32_SEL	13	/* BIOS interface (32bit Code) */
 #define	GBIOSCODE16_SEL	14	/* BIOS interface (16bit Code) */
 #define	GBIOSDATA_SEL	15	/* BIOS interface (Data) */
 #define	GBIOSUTIL_SEL	16	/* BIOS interface (Utility) */
 #define	GBIOSARGS_SEL	17	/* BIOS interface (Arguments) */
 #define	GNDIS_SEL	18	/* For the NDIS layer */
 #ifdef XEN
 #define	NGDT		9
 #else
 #define	NGDT		19
 #endif
 
 /*
  * Entries in the Local Descriptor Table (LDT)
  */
 #define	LSYS5CALLS_SEL	0	/* forced by intel BCS */
 #define	LSYS5SIGR_SEL	1
 #define	L43BSDCALLS_SEL	2	/* notyet */
 #define	LUCODE_SEL	3
 #define	LSOL26CALLS_SEL	4	/* Solaris >= 2.6 system call gate */
 #define	LUDATA_SEL	5
 /* separate stack, es,fs,gs sels ? */
 /* #define	LPOSIXCALLS_SEL	5*/	/* notyet */
 #define	LBSDICALLS_SEL	16	/* BSDI system call gate */
 #define	NLDT		(LBSDICALLS_SEL + 1)
 
 #else /* !__i386__ && !__ia64__ */
 /*
  * Entries in the Global Descriptor Table (GDT)
  */
 #define	GNULL_SEL	0	/* Null Descriptor */
 #define	GNULL2_SEL	1	/* Null Descriptor */
 #define	GUFS32_SEL	2	/* User 32 bit %fs Descriptor */
 #define	GUGS32_SEL	3	/* User 32 bit %gs Descriptor */
 #define	GCODE_SEL	4	/* Kernel Code Descriptor */
 #define	GDATA_SEL	5	/* Kernel Data Descriptor */
 #define	GUCODE32_SEL	6	/* User 32 bit code Descriptor */
 #define	GUDATA_SEL	7	/* User 32/64 bit Data Descriptor */
 #define	GUCODE_SEL	8	/* User 64 bit Code Descriptor */
 #define	GPROC0_SEL	9	/* TSS for entering kernel etc */
 /* slot 10 is second half of GPROC0_SEL */
 #define	GUSERLDT_SEL	11	/* LDT */
 /* slot 12 is second half of GUSERLDT_SEL */
 #define	NGDT 		13
 #endif /* __i386__ || __ia64__ */
 
 #endif /* !_X86_SEGMENTS_H_ */
diff --git a/sys/x86/x86/local_apic.c b/sys/x86/x86/local_apic.c
index 42ffa48f7365..ac651cdcd64d 100644
--- a/sys/x86/x86/local_apic.c
+++ b/sys/x86/x86/local_apic.c
@@ -1,1516 +1,1524 @@
 /*-
  * Copyright (c) 2003 John Baldwin <jhb@FreeBSD.org>
  * Copyright (c) 1996, by Steve Passe
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. The name of the developer may NOT be used to endorse or promote products
  *    derived from this software without specific prior written permission.
  * 3. Neither the name of the author nor the names of any co-contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 /*
  * Local APIC support on Pentium and later processors.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_atpic.h"
 #include "opt_hwpmc_hooks.h"
 #include "opt_kdtrace.h"
 
 #include "opt_ddb.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/bus.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/pcpu.h>
 #include <sys/proc.h>
 #include <sys/sched.h>
 #include <sys/smp.h>
 #include <sys/timeet.h>
 
 #include <vm/vm.h>
 #include <vm/pmap.h>
 
 #include <x86/apicreg.h>
 #include <machine/cpu.h>
 #include <machine/cputypes.h>
 #include <machine/frame.h>
 #include <machine/intr_machdep.h>
 #include <machine/apicvar.h>
 #include <x86/mca.h>
 #include <machine/md_var.h>
 #include <machine/smp.h>
 #include <machine/specialreg.h>
 
 #ifdef DDB
 #include <sys/interrupt.h>
 #include <ddb/ddb.h>
 #endif
 
 #ifdef __amd64__
 #define	SDT_APIC	SDT_SYSIGT
 #define	SDT_APICT	SDT_SYSIGT
 #define	GSEL_APIC	0
 #else
 #define	SDT_APIC	SDT_SYS386IGT
 #define	SDT_APICT	SDT_SYS386TGT
 #define	GSEL_APIC	GSEL(GCODE_SEL, SEL_KPL)
 #endif
 
 /* Sanity checks on IDT vectors. */
 CTASSERT(APIC_IO_INTS + APIC_NUM_IOINTS == APIC_TIMER_INT);
 CTASSERT(APIC_TIMER_INT < APIC_LOCAL_INTS);
 CTASSERT(APIC_LOCAL_INTS == 240);
 CTASSERT(IPI_STOP < APIC_SPURIOUS_INT);
 
 /* Magic IRQ values for the timer and syscalls. */
 #define	IRQ_TIMER	(NUM_IO_INTS + 1)
 #define	IRQ_SYSCALL	(NUM_IO_INTS + 2)
 #define	IRQ_DTRACE_RET	(NUM_IO_INTS + 3)
+#define	IRQ_EVTCHN	(NUM_IO_INTS + 4)
 
 /*
  * Support for local APICs.  Local APICs manage interrupts on each
  * individual processor as opposed to I/O APICs which receive interrupts
  * from I/O devices and then forward them on to the local APICs.
  *
  * Local APICs can also send interrupts to each other thus providing the
  * mechanism for IPIs.
  */
 
 struct lvt {
 	u_int lvt_edgetrigger:1;
 	u_int lvt_activehi:1;
 	u_int lvt_masked:1;
 	u_int lvt_active:1;
 	u_int lvt_mode:16;
 	u_int lvt_vector:8;
 };
 
 struct lapic {
 	struct lvt la_lvts[LVT_MAX + 1];
 	u_int la_id:8;
 	u_int la_cluster:4;
 	u_int la_cluster_id:2;
 	u_int la_present:1;
 	u_long *la_timer_count;
 	u_long la_timer_period;
 	u_int la_timer_mode;
 	uint32_t lvt_timer_cache;
 	/* Include IDT_SYSCALL to make indexing easier. */
 	int la_ioint_irqs[APIC_NUM_IOINTS + 1];
 } static lapics[MAX_APIC_ID + 1];
 
 /* Global defaults for local APIC LVT entries. */
 static struct lvt lvts[LVT_MAX + 1] = {
 	{ 1, 1, 1, 1, APIC_LVT_DM_EXTINT, 0 },	/* LINT0: masked ExtINT */
 	{ 1, 1, 0, 1, APIC_LVT_DM_NMI, 0 },	/* LINT1: NMI */
 	{ 1, 1, 1, 1, APIC_LVT_DM_FIXED, APIC_TIMER_INT },	/* Timer */
 	{ 1, 1, 0, 1, APIC_LVT_DM_FIXED, APIC_ERROR_INT },	/* Error */
 	{ 1, 1, 1, 1, APIC_LVT_DM_NMI, 0 },	/* PMC */
 	{ 1, 1, 1, 1, APIC_LVT_DM_FIXED, APIC_THERMAL_INT },	/* Thermal */
 	{ 1, 1, 1, 1, APIC_LVT_DM_FIXED, APIC_CMC_INT },	/* CMCI */
 };
 
 static inthand_t *ioint_handlers[] = {
 	NULL,			/* 0 - 31 */
 	IDTVEC(apic_isr1),	/* 32 - 63 */
 	IDTVEC(apic_isr2),	/* 64 - 95 */
 	IDTVEC(apic_isr3),	/* 96 - 127 */
 	IDTVEC(apic_isr4),	/* 128 - 159 */
 	IDTVEC(apic_isr5),	/* 160 - 191 */
 	IDTVEC(apic_isr6),	/* 192 - 223 */
 	IDTVEC(apic_isr7),	/* 224 - 255 */
 };
 
 
 static u_int32_t lapic_timer_divisors[] = {
 	APIC_TDCR_1, APIC_TDCR_2, APIC_TDCR_4, APIC_TDCR_8, APIC_TDCR_16,
 	APIC_TDCR_32, APIC_TDCR_64, APIC_TDCR_128
 };
 
 extern inthand_t IDTVEC(rsvd);
 
 volatile lapic_t *lapic;
 vm_paddr_t lapic_paddr;
 static u_long lapic_timer_divisor;
 static struct eventtimer lapic_et;
 
 static void	lapic_enable(void);
 static void	lapic_resume(struct pic *pic);
 static void	lapic_timer_oneshot(struct lapic *,
 		    u_int count, int enable_int);
 static void	lapic_timer_periodic(struct lapic *,
 		    u_int count, int enable_int);
 static void	lapic_timer_stop(struct lapic *);
 static void	lapic_timer_set_divisor(u_int divisor);
 static uint32_t	lvt_mode(struct lapic *la, u_int pin, uint32_t value);
 static int	lapic_et_start(struct eventtimer *et,
     sbintime_t first, sbintime_t period);
 static int	lapic_et_stop(struct eventtimer *et);
 
 struct pic lapic_pic = { .pic_resume = lapic_resume };
 
 static uint32_t
 lvt_mode(struct lapic *la, u_int pin, uint32_t value)
 {
 	struct lvt *lvt;
 
 	KASSERT(pin <= LVT_MAX, ("%s: pin %u out of range", __func__, pin));
 	if (la->la_lvts[pin].lvt_active)
 		lvt = &la->la_lvts[pin];
 	else
 		lvt = &lvts[pin];
 
 	value &= ~(APIC_LVT_M | APIC_LVT_TM | APIC_LVT_IIPP | APIC_LVT_DM |
 	    APIC_LVT_VECTOR);
 	if (lvt->lvt_edgetrigger == 0)
 		value |= APIC_LVT_TM;
 	if (lvt->lvt_activehi == 0)
 		value |= APIC_LVT_IIPP_INTALO;
 	if (lvt->lvt_masked)
 		value |= APIC_LVT_M;
 	value |= lvt->lvt_mode;
 	switch (lvt->lvt_mode) {
 	case APIC_LVT_DM_NMI:
 	case APIC_LVT_DM_SMI:
 	case APIC_LVT_DM_INIT:
 	case APIC_LVT_DM_EXTINT:
 		if (!lvt->lvt_edgetrigger) {
 			printf("lapic%u: Forcing LINT%u to edge trigger\n",
 			    la->la_id, pin);
 			value |= APIC_LVT_TM;
 		}
 		/* Use a vector of 0. */
 		break;
 	case APIC_LVT_DM_FIXED:
 		value |= lvt->lvt_vector;
 		break;
 	default:
 		panic("bad APIC LVT delivery mode: %#x\n", value);
 	}
 	return (value);
 }
 
 /*
  * Map the local APIC and setup necessary interrupt vectors.
  */
 void
 lapic_init(vm_paddr_t addr)
 {
 	u_int regs[4];
 	int i, arat;
 
 	/* Map the local APIC and setup the spurious interrupt handler. */
 	KASSERT(trunc_page(addr) == addr,
 	    ("local APIC not aligned on a page boundary"));
 	lapic_paddr = addr;
 	lapic = pmap_mapdev(addr, sizeof(lapic_t));
 	setidt(APIC_SPURIOUS_INT, IDTVEC(spuriousint), SDT_APIC, SEL_KPL,
 	    GSEL_APIC);
 
 	/* Perform basic initialization of the BSP's local APIC. */
 	lapic_enable();
 
 	/* Set BSP's per-CPU local APIC ID. */
 	PCPU_SET(apic_id, lapic_id());
 
 	/* Local APIC timer interrupt. */
 	setidt(APIC_TIMER_INT, IDTVEC(timerint), SDT_APIC, SEL_KPL, GSEL_APIC);
 
 	/* Local APIC error interrupt. */
 	setidt(APIC_ERROR_INT, IDTVEC(errorint), SDT_APIC, SEL_KPL, GSEL_APIC);
 
 	/* XXX: Thermal interrupt */
 
 	/* Local APIC CMCI. */
 	setidt(APIC_CMC_INT, IDTVEC(cmcint), SDT_APICT, SEL_KPL, GSEL_APIC);
 
 	if ((resource_int_value("apic", 0, "clock", &i) != 0 || i != 0)) {
 		arat = 0;
 		/* Intel CPUID 0x06 EAX[2] set if APIC timer runs in C3. */
 		if (cpu_vendor_id == CPU_VENDOR_INTEL && cpu_high >= 6) {
 			do_cpuid(0x06, regs);
 			if ((regs[0] & CPUTPM1_ARAT) != 0)
 				arat = 1;
 		}
 		bzero(&lapic_et, sizeof(lapic_et));
 		lapic_et.et_name = "LAPIC";
 		lapic_et.et_flags = ET_FLAGS_PERIODIC | ET_FLAGS_ONESHOT |
 		    ET_FLAGS_PERCPU;
 		lapic_et.et_quality = 600;
 		if (!arat) {
 			lapic_et.et_flags |= ET_FLAGS_C3STOP;
 			lapic_et.et_quality -= 200;
 		}
 		lapic_et.et_frequency = 0;
 		/* We don't know frequency yet, so trying to guess. */
 		lapic_et.et_min_period = 0x00001000LL;
 		lapic_et.et_max_period = SBT_1S;
 		lapic_et.et_start = lapic_et_start;
 		lapic_et.et_stop = lapic_et_stop;
 		lapic_et.et_priv = NULL;
 		et_register(&lapic_et);
 	}
 }
 
 /*
  * Create a local APIC instance.
  */
 void
 lapic_create(u_int apic_id, int boot_cpu)
 {
 	int i;
 
 	if (apic_id > MAX_APIC_ID) {
 		printf("APIC: Ignoring local APIC with ID %d\n", apic_id);
 		if (boot_cpu)
 			panic("Can't ignore BSP");
 		return;
 	}
 	KASSERT(!lapics[apic_id].la_present, ("duplicate local APIC %u",
 	    apic_id));
 
 	/*
 	 * Assume no local LVT overrides and a cluster of 0 and
 	 * intra-cluster ID of 0.
 	 */
 	lapics[apic_id].la_present = 1;
 	lapics[apic_id].la_id = apic_id;
 	for (i = 0; i <= LVT_MAX; i++) {
 		lapics[apic_id].la_lvts[i] = lvts[i];
 		lapics[apic_id].la_lvts[i].lvt_active = 0;
 	}
 	for (i = 0; i <= APIC_NUM_IOINTS; i++)
 	    lapics[apic_id].la_ioint_irqs[i] = -1;
 	lapics[apic_id].la_ioint_irqs[IDT_SYSCALL - APIC_IO_INTS] = IRQ_SYSCALL;
 	lapics[apic_id].la_ioint_irqs[APIC_TIMER_INT - APIC_IO_INTS] =
 	    IRQ_TIMER;
 #ifdef KDTRACE_HOOKS
 	lapics[apic_id].la_ioint_irqs[IDT_DTRACE_RET - APIC_IO_INTS] =
 	    IRQ_DTRACE_RET;
 #endif
+#ifdef XENHVM
+	lapics[apic_id].la_ioint_irqs[IDT_EVTCHN - APIC_IO_INTS] = IRQ_EVTCHN;
+#endif
 
 
 #ifdef SMP
 	cpu_add(apic_id, boot_cpu);
 #endif
 }
 
 /*
  * Dump contents of local APIC registers
  */
 void
 lapic_dump(const char* str)
 {
 	uint32_t maxlvt;
 
 	maxlvt = (lapic->version & APIC_VER_MAXLVT) >> MAXLVTSHIFT;
 	printf("cpu%d %s:\n", PCPU_GET(cpuid), str);
 	printf("     ID: 0x%08x   VER: 0x%08x LDR: 0x%08x DFR: 0x%08x\n",
 	    lapic->id, lapic->version, lapic->ldr, lapic->dfr);
 	printf("  lint0: 0x%08x lint1: 0x%08x TPR: 0x%08x SVR: 0x%08x\n",
 	    lapic->lvt_lint0, lapic->lvt_lint1, lapic->tpr, lapic->svr);
 	printf("  timer: 0x%08x therm: 0x%08x err: 0x%08x",
 	    lapic->lvt_timer, lapic->lvt_thermal, lapic->lvt_error);
 	if (maxlvt >= LVT_PMC)
 		printf(" pmc: 0x%08x", lapic->lvt_pcint);
 	printf("\n");
 	if (maxlvt >= LVT_CMCI)
 		printf("   cmci: 0x%08x\n", lapic->lvt_cmci);
 }
 
 void
 lapic_setup(int boot)
 {
 	struct lapic *la;
 	u_int32_t maxlvt;
 	register_t saveintr;
 	char buf[MAXCOMLEN + 1];
 
 	la = &lapics[lapic_id()];
 	KASSERT(la->la_present, ("missing APIC structure"));
 	saveintr = intr_disable();
 	maxlvt = (lapic->version & APIC_VER_MAXLVT) >> MAXLVTSHIFT;
 
 	/* Initialize the TPR to allow all interrupts. */
 	lapic_set_tpr(0);
 
 	/* Setup spurious vector and enable the local APIC. */
 	lapic_enable();
 
 	/* Program LINT[01] LVT entries. */
 	lapic->lvt_lint0 = lvt_mode(la, LVT_LINT0, lapic->lvt_lint0);
 	lapic->lvt_lint1 = lvt_mode(la, LVT_LINT1, lapic->lvt_lint1);
 
 	/* Program the PMC LVT entry if present. */
 	if (maxlvt >= LVT_PMC)
 		lapic->lvt_pcint = lvt_mode(la, LVT_PMC, lapic->lvt_pcint);
 
 	/* Program timer LVT and setup handler. */
 	la->lvt_timer_cache = lapic->lvt_timer =
 	    lvt_mode(la, LVT_TIMER, lapic->lvt_timer);
 	if (boot) {
 		snprintf(buf, sizeof(buf), "cpu%d:timer", PCPU_GET(cpuid));
 		intrcnt_add(buf, &la->la_timer_count);
 	}
 
 	/* Setup the timer if configured. */
 	if (la->la_timer_mode != 0) {
 		KASSERT(la->la_timer_period != 0, ("lapic%u: zero divisor",
 		    lapic_id()));
 		lapic_timer_set_divisor(lapic_timer_divisor);
 		if (la->la_timer_mode == 1)
 			lapic_timer_periodic(la, la->la_timer_period, 1);
 		else
 			lapic_timer_oneshot(la, la->la_timer_period, 1);
 	}
 
 	/* Program error LVT and clear any existing errors. */
 	lapic->lvt_error = lvt_mode(la, LVT_ERROR, lapic->lvt_error);
 	lapic->esr = 0;
 
 	/* XXX: Thermal LVT */
 
 	/* Program the CMCI LVT entry if present. */
 	if (maxlvt >= LVT_CMCI)
 		lapic->lvt_cmci = lvt_mode(la, LVT_CMCI, lapic->lvt_cmci);
 	    
 	intr_restore(saveintr);
 }
 
 void
 lapic_reenable_pmc(void)
 {
 #ifdef HWPMC_HOOKS
 	uint32_t value;
 
 	value =  lapic->lvt_pcint;
 	value &= ~APIC_LVT_M;
 	lapic->lvt_pcint = value;
 #endif
 }
 
 #ifdef HWPMC_HOOKS
 static void
 lapic_update_pmc(void *dummy)
 {
 	struct lapic *la;
 
 	la = &lapics[lapic_id()];
 	lapic->lvt_pcint = lvt_mode(la, LVT_PMC, lapic->lvt_pcint);
 }
 #endif
 
 int
 lapic_enable_pmc(void)
 {
 #ifdef HWPMC_HOOKS
 	u_int32_t maxlvt;
 
 	/* Fail if the local APIC is not present. */
 	if (lapic == NULL)
 		return (0);
 
 	/* Fail if the PMC LVT is not present. */
 	maxlvt = (lapic->version & APIC_VER_MAXLVT) >> MAXLVTSHIFT;
 	if (maxlvt < LVT_PMC)
 		return (0);
 
 	lvts[LVT_PMC].lvt_masked = 0;
 
 #ifdef SMP
 	/*
 	 * If hwpmc was loaded at boot time then the APs may not be
 	 * started yet.  In that case, don't forward the request to
 	 * them as they will program the lvt when they start.
 	 */
 	if (smp_started)
 		smp_rendezvous(NULL, lapic_update_pmc, NULL, NULL);
 	else
 #endif
 		lapic_update_pmc(NULL);
 	return (1);
 #else
 	return (0);
 #endif
 }
 
 void
 lapic_disable_pmc(void)
 {
 #ifdef HWPMC_HOOKS
 	u_int32_t maxlvt;
 
 	/* Fail if the local APIC is not present. */
 	if (lapic == NULL)
 		return;
 
 	/* Fail if the PMC LVT is not present. */
 	maxlvt = (lapic->version & APIC_VER_MAXLVT) >> MAXLVTSHIFT;
 	if (maxlvt < LVT_PMC)
 		return;
 
 	lvts[LVT_PMC].lvt_masked = 1;
 
 #ifdef SMP
 	/* The APs should always be started when hwpmc is unloaded. */
 	KASSERT(mp_ncpus == 1 || smp_started, ("hwpmc unloaded too early"));
 #endif
 	smp_rendezvous(NULL, lapic_update_pmc, NULL, NULL);
 #endif
 }
 
 static int
 lapic_et_start(struct eventtimer *et, sbintime_t first, sbintime_t period)
 {
 	struct lapic *la;
 	u_long value;
 
 	la = &lapics[PCPU_GET(apic_id)];
 	if (et->et_frequency == 0) {
 		/* Start off with a divisor of 2 (power on reset default). */
 		lapic_timer_divisor = 2;
 		/* Try to calibrate the local APIC timer. */
 		do {
 			lapic_timer_set_divisor(lapic_timer_divisor);
 			lapic_timer_oneshot(la, APIC_TIMER_MAX_COUNT, 0);
 			DELAY(1000000);
 			value = APIC_TIMER_MAX_COUNT - lapic->ccr_timer;
 			if (value != APIC_TIMER_MAX_COUNT)
 				break;
 			lapic_timer_divisor <<= 1;
 		} while (lapic_timer_divisor <= 128);
 		if (lapic_timer_divisor > 128)
 			panic("lapic: Divisor too big");
 		if (bootverbose)
 			printf("lapic: Divisor %lu, Frequency %lu Hz\n",
 			    lapic_timer_divisor, value);
 		et->et_frequency = value;
 		et->et_min_period = (0x00000002LLU << 32) / et->et_frequency;
 		et->et_max_period = (0xfffffffeLLU << 32) / et->et_frequency;
 	}
 	if (la->la_timer_mode == 0)
 		lapic_timer_set_divisor(lapic_timer_divisor);
 	if (period != 0) {
 		la->la_timer_mode = 1;
 		la->la_timer_period = ((uint32_t)et->et_frequency * period) >> 32;
 		lapic_timer_periodic(la, la->la_timer_period, 1);
 	} else {
 		la->la_timer_mode = 2;
 		la->la_timer_period = ((uint32_t)et->et_frequency * first) >> 32;
 		lapic_timer_oneshot(la, la->la_timer_period, 1);
 	}
 	return (0);
 }
 
 static int
 lapic_et_stop(struct eventtimer *et)
 {
 	struct lapic *la = &lapics[PCPU_GET(apic_id)];
 
 	la->la_timer_mode = 0;
 	lapic_timer_stop(la);
 	return (0);
 }
 
 void
 lapic_disable(void)
 {
 	uint32_t value;
 
 	/* Software disable the local APIC. */
 	value = lapic->svr;
 	value &= ~APIC_SVR_SWEN;
 	lapic->svr = value;
 }
 
 static void
 lapic_enable(void)
 {
 	u_int32_t value;
 
 	/* Program the spurious vector to enable the local APIC. */
 	value = lapic->svr;
 	value &= ~(APIC_SVR_VECTOR | APIC_SVR_FOCUS);
 	value |= (APIC_SVR_FEN | APIC_SVR_SWEN | APIC_SPURIOUS_INT);
 	lapic->svr = value;
 }
 
 /* Reset the local APIC on the BSP during resume. */
 static void
 lapic_resume(struct pic *pic)
 {
 
 	lapic_setup(0);
 }
 
 int
 lapic_id(void)
 {
 
 	KASSERT(lapic != NULL, ("local APIC is not mapped"));
 	return (lapic->id >> APIC_ID_SHIFT);
 }
 
 int
 lapic_intr_pending(u_int vector)
 {
 	volatile u_int32_t *irr;
 
 	/*
 	 * The IRR registers are an array of 128-bit registers each of
 	 * which only describes 32 interrupts in the low 32 bits..  Thus,
 	 * we divide the vector by 32 to get the 128-bit index.  We then
 	 * multiply that index by 4 to get the equivalent index from
 	 * treating the IRR as an array of 32-bit registers.  Finally, we
 	 * modulus the vector by 32 to determine the individual bit to
 	 * test.
 	 */
 	irr = &lapic->irr0;
 	return (irr[(vector / 32) * 4] & 1 << (vector % 32));
 }
 
 void
 lapic_set_logical_id(u_int apic_id, u_int cluster, u_int cluster_id)
 {
 	struct lapic *la;
 
 	KASSERT(lapics[apic_id].la_present, ("%s: APIC %u doesn't exist",
 	    __func__, apic_id));
 	KASSERT(cluster <= APIC_MAX_CLUSTER, ("%s: cluster %u too big",
 	    __func__, cluster));
 	KASSERT(cluster_id <= APIC_MAX_INTRACLUSTER_ID,
 	    ("%s: intra cluster id %u too big", __func__, cluster_id));
 	la = &lapics[apic_id];
 	la->la_cluster = cluster;
 	la->la_cluster_id = cluster_id;
 }
 
 int
 lapic_set_lvt_mask(u_int apic_id, u_int pin, u_char masked)
 {
 
 	if (pin > LVT_MAX)
 		return (EINVAL);
 	if (apic_id == APIC_ID_ALL) {
 		lvts[pin].lvt_masked = masked;
 		if (bootverbose)
 			printf("lapic:");
 	} else {
 		KASSERT(lapics[apic_id].la_present,
 		    ("%s: missing APIC %u", __func__, apic_id));
 		lapics[apic_id].la_lvts[pin].lvt_masked = masked;
 		lapics[apic_id].la_lvts[pin].lvt_active = 1;
 		if (bootverbose)
 			printf("lapic%u:", apic_id);
 	}
 	if (bootverbose)
 		printf(" LINT%u %s\n", pin, masked ? "masked" : "unmasked");
 	return (0);
 }
 
 int
 lapic_set_lvt_mode(u_int apic_id, u_int pin, u_int32_t mode)
 {
 	struct lvt *lvt;
 
 	if (pin > LVT_MAX)
 		return (EINVAL);
 	if (apic_id == APIC_ID_ALL) {
 		lvt = &lvts[pin];
 		if (bootverbose)
 			printf("lapic:");
 	} else {
 		KASSERT(lapics[apic_id].la_present,
 		    ("%s: missing APIC %u", __func__, apic_id));
 		lvt = &lapics[apic_id].la_lvts[pin];
 		lvt->lvt_active = 1;
 		if (bootverbose)
 			printf("lapic%u:", apic_id);
 	}
 	lvt->lvt_mode = mode;
 	switch (mode) {
 	case APIC_LVT_DM_NMI:
 	case APIC_LVT_DM_SMI:
 	case APIC_LVT_DM_INIT:
 	case APIC_LVT_DM_EXTINT:
 		lvt->lvt_edgetrigger = 1;
 		lvt->lvt_activehi = 1;
 		if (mode == APIC_LVT_DM_EXTINT)
 			lvt->lvt_masked = 1;
 		else
 			lvt->lvt_masked = 0;
 		break;
 	default:
 		panic("Unsupported delivery mode: 0x%x\n", mode);
 	}
 	if (bootverbose) {
 		printf(" Routing ");
 		switch (mode) {
 		case APIC_LVT_DM_NMI:
 			printf("NMI");
 			break;
 		case APIC_LVT_DM_SMI:
 			printf("SMI");
 			break;
 		case APIC_LVT_DM_INIT:
 			printf("INIT");
 			break;
 		case APIC_LVT_DM_EXTINT:
 			printf("ExtINT");
 			break;
 		}
 		printf(" -> LINT%u\n", pin);
 	}
 	return (0);
 }
 
 int
 lapic_set_lvt_polarity(u_int apic_id, u_int pin, enum intr_polarity pol)
 {
 
 	if (pin > LVT_MAX || pol == INTR_POLARITY_CONFORM)
 		return (EINVAL);
 	if (apic_id == APIC_ID_ALL) {
 		lvts[pin].lvt_activehi = (pol == INTR_POLARITY_HIGH);
 		if (bootverbose)
 			printf("lapic:");
 	} else {
 		KASSERT(lapics[apic_id].la_present,
 		    ("%s: missing APIC %u", __func__, apic_id));
 		lapics[apic_id].la_lvts[pin].lvt_active = 1;
 		lapics[apic_id].la_lvts[pin].lvt_activehi =
 		    (pol == INTR_POLARITY_HIGH);
 		if (bootverbose)
 			printf("lapic%u:", apic_id);
 	}
 	if (bootverbose)
 		printf(" LINT%u polarity: %s\n", pin,
 		    pol == INTR_POLARITY_HIGH ? "high" : "low");
 	return (0);
 }
 
 int
 lapic_set_lvt_triggermode(u_int apic_id, u_int pin, enum intr_trigger trigger)
 {
 
 	if (pin > LVT_MAX || trigger == INTR_TRIGGER_CONFORM)
 		return (EINVAL);
 	if (apic_id == APIC_ID_ALL) {
 		lvts[pin].lvt_edgetrigger = (trigger == INTR_TRIGGER_EDGE);
 		if (bootverbose)
 			printf("lapic:");
 	} else {
 		KASSERT(lapics[apic_id].la_present,
 		    ("%s: missing APIC %u", __func__, apic_id));
 		lapics[apic_id].la_lvts[pin].lvt_edgetrigger =
 		    (trigger == INTR_TRIGGER_EDGE);
 		lapics[apic_id].la_lvts[pin].lvt_active = 1;
 		if (bootverbose)
 			printf("lapic%u:", apic_id);
 	}
 	if (bootverbose)
 		printf(" LINT%u trigger: %s\n", pin,
 		    trigger == INTR_TRIGGER_EDGE ? "edge" : "level");
 	return (0);
 }
 
 /*
  * Adjust the TPR of the current CPU so that it blocks all interrupts below
  * the passed in vector.
  */
 void
 lapic_set_tpr(u_int vector)
 {
 #ifdef CHEAP_TPR
 	lapic->tpr = vector;
 #else
 	u_int32_t tpr;
 
 	tpr = lapic->tpr & ~APIC_TPR_PRIO;
 	tpr |= vector;
 	lapic->tpr = tpr;
 #endif
 }
 
 void
 lapic_eoi(void)
 {
 
 	lapic->eoi = 0;
 }
 
 void
 lapic_handle_intr(int vector, struct trapframe *frame)
 {
 	struct intsrc *isrc;
 
 	isrc = intr_lookup_source(apic_idt_to_irq(PCPU_GET(apic_id),
 	    vector));
 	intr_execute_handlers(isrc, frame);
 }
 
 void
 lapic_handle_timer(struct trapframe *frame)
 {
 	struct lapic *la;
 	struct trapframe *oldframe;
 	struct thread *td;
 
 	/* Send EOI first thing. */
 	lapic_eoi();
 
 #if defined(SMP) && !defined(SCHED_ULE)
 	/*
 	 * Don't do any accounting for the disabled HTT cores, since it
 	 * will provide misleading numbers for the userland.
 	 *
 	 * No locking is necessary here, since even if we lose the race
 	 * when hlt_cpus_mask changes it is not a big deal, really.
 	 *
 	 * Don't do that for ULE, since ULE doesn't consider hlt_cpus_mask
 	 * and unlike other schedulers it actually schedules threads to
 	 * those CPUs.
 	 */
 	if (CPU_ISSET(PCPU_GET(cpuid), &hlt_cpus_mask))
 		return;
 #endif
 
 	/* Look up our local APIC structure for the tick counters. */
 	la = &lapics[PCPU_GET(apic_id)];
 	(*la->la_timer_count)++;
 	critical_enter();
 	if (lapic_et.et_active) {
 		td = curthread;
 		td->td_intr_nesting_level++;
 		oldframe = td->td_intr_frame;
 		td->td_intr_frame = frame;
 		lapic_et.et_event_cb(&lapic_et, lapic_et.et_arg);
 		td->td_intr_frame = oldframe;
 		td->td_intr_nesting_level--;
 	}
 	critical_exit();
 }
 
 static void
 lapic_timer_set_divisor(u_int divisor)
 {
 
 	KASSERT(powerof2(divisor), ("lapic: invalid divisor %u", divisor));
 	KASSERT(ffs(divisor) <= sizeof(lapic_timer_divisors) /
 	    sizeof(u_int32_t), ("lapic: invalid divisor %u", divisor));
 	lapic->dcr_timer = lapic_timer_divisors[ffs(divisor) - 1];
 }
 
 static void
 lapic_timer_oneshot(struct lapic *la, u_int count, int enable_int)
 {
 	u_int32_t value;
 
 	value = la->lvt_timer_cache;
 	value &= ~APIC_LVTT_TM;
 	value |= APIC_LVTT_TM_ONE_SHOT;
 	if (enable_int)
 		value &= ~APIC_LVT_M;
 	lapic->lvt_timer = value;
 	lapic->icr_timer = count;
 }
 
 static void
 lapic_timer_periodic(struct lapic *la, u_int count, int enable_int)
 {
 	u_int32_t value;
 
 	value = la->lvt_timer_cache;
 	value &= ~APIC_LVTT_TM;
 	value |= APIC_LVTT_TM_PERIODIC;
 	if (enable_int)
 		value &= ~APIC_LVT_M;
 	lapic->lvt_timer = value;
 	lapic->icr_timer = count;
 }
 
 static void
 lapic_timer_stop(struct lapic *la)
 {
 	u_int32_t value;
 
 	value = la->lvt_timer_cache;
 	value &= ~APIC_LVTT_TM;
 	value |= APIC_LVT_M;
 	lapic->lvt_timer = value;
 }
 
 void
 lapic_handle_cmc(void)
 {
 
 	lapic_eoi();
 	cmc_intr();
 }
 
 /*
  * Called from the mca_init() to activate the CMC interrupt if this CPU is
  * responsible for monitoring any MC banks for CMC events.  Since mca_init()
  * is called prior to lapic_setup() during boot, this just needs to unmask
  * this CPU's LVT_CMCI entry.
  */
 void
 lapic_enable_cmc(void)
 {
 	u_int apic_id;
 
 #ifdef DEV_ATPIC
 	if (lapic == NULL)
 		return;
 #endif
 	apic_id = PCPU_GET(apic_id);
 	KASSERT(lapics[apic_id].la_present,
 	    ("%s: missing APIC %u", __func__, apic_id));
 	lapics[apic_id].la_lvts[LVT_CMCI].lvt_masked = 0;
 	lapics[apic_id].la_lvts[LVT_CMCI].lvt_active = 1;
 	if (bootverbose)
 		printf("lapic%u: CMCI unmasked\n", apic_id);
 }
 
 void
 lapic_handle_error(void)
 {
 	u_int32_t esr;
 
 	/*
 	 * Read the contents of the error status register.  Write to
 	 * the register first before reading from it to force the APIC
 	 * to update its value to indicate any errors that have
 	 * occurred since the previous write to the register.
 	 */
 	lapic->esr = 0;
 	esr = lapic->esr;
 
 	printf("CPU%d: local APIC error 0x%x\n", PCPU_GET(cpuid), esr);
 	lapic_eoi();
 }
 
 u_int
 apic_cpuid(u_int apic_id)
 {
 #ifdef SMP
 	return apic_cpuids[apic_id];
 #else
 	return 0;
 #endif
 }
 
 /* Request a free IDT vector to be used by the specified IRQ. */
 u_int
 apic_alloc_vector(u_int apic_id, u_int irq)
 {
 	u_int vector;
 
 	KASSERT(irq < NUM_IO_INTS, ("Invalid IRQ %u", irq));
 
 	/*
 	 * Search for a free vector.  Currently we just use a very simple
 	 * algorithm to find the first free vector.
 	 */
 	mtx_lock_spin(&icu_lock);
 	for (vector = 0; vector < APIC_NUM_IOINTS; vector++) {
 		if (lapics[apic_id].la_ioint_irqs[vector] != -1)
 			continue;
 		lapics[apic_id].la_ioint_irqs[vector] = irq;
 		mtx_unlock_spin(&icu_lock);
 		return (vector + APIC_IO_INTS);
 	}
 	mtx_unlock_spin(&icu_lock);
 	return (0);
 }
 
 /*
  * Request 'count' free contiguous IDT vectors to be used by 'count'
  * IRQs.  'count' must be a power of two and the vectors will be
  * aligned on a boundary of 'align'.  If the request cannot be
  * satisfied, 0 is returned.
  */
 u_int
 apic_alloc_vectors(u_int apic_id, u_int *irqs, u_int count, u_int align)
 {
 	u_int first, run, vector;
 
 	KASSERT(powerof2(count), ("bad count"));
 	KASSERT(powerof2(align), ("bad align"));
 	KASSERT(align >= count, ("align < count"));
 #ifdef INVARIANTS
 	for (run = 0; run < count; run++)
 		KASSERT(irqs[run] < NUM_IO_INTS, ("Invalid IRQ %u at index %u",
 		    irqs[run], run));
 #endif
 
 	/*
 	 * Search for 'count' free vectors.  As with apic_alloc_vector(),
 	 * this just uses a simple first fit algorithm.
 	 */
 	run = 0;
 	first = 0;
 	mtx_lock_spin(&icu_lock);
 	for (vector = 0; vector < APIC_NUM_IOINTS; vector++) {
 
 		/* Vector is in use, end run. */
 		if (lapics[apic_id].la_ioint_irqs[vector] != -1) {
 			run = 0;
 			first = 0;
 			continue;
 		}
 
 		/* Start a new run if run == 0 and vector is aligned. */
 		if (run == 0) {
 			if ((vector & (align - 1)) != 0)
 				continue;
 			first = vector;
 		}
 		run++;
 
 		/* Keep looping if the run isn't long enough yet. */
 		if (run < count)
 			continue;
 
 		/* Found a run, assign IRQs and return the first vector. */
 		for (vector = 0; vector < count; vector++)
 			lapics[apic_id].la_ioint_irqs[first + vector] =
 			    irqs[vector];
 		mtx_unlock_spin(&icu_lock);
 		return (first + APIC_IO_INTS);
 	}
 	mtx_unlock_spin(&icu_lock);
 	printf("APIC: Couldn't find APIC vectors for %u IRQs\n", count);
 	return (0);
 }
 
 /*
  * Enable a vector for a particular apic_id.  Since all lapics share idt
  * entries and ioint_handlers this enables the vector on all lapics.  lapics
  * which do not have the vector configured would report spurious interrupts
  * should it fire.
  */
 void
 apic_enable_vector(u_int apic_id, u_int vector)
 {
 
 	KASSERT(vector != IDT_SYSCALL, ("Attempt to overwrite syscall entry"));
 	KASSERT(ioint_handlers[vector / 32] != NULL,
 	    ("No ISR handler for vector %u", vector));
 #ifdef KDTRACE_HOOKS
 	KASSERT(vector != IDT_DTRACE_RET,
 	    ("Attempt to overwrite DTrace entry"));
 #endif
 	setidt(vector, ioint_handlers[vector / 32], SDT_APIC, SEL_KPL,
 	    GSEL_APIC);
 }
 
 void
 apic_disable_vector(u_int apic_id, u_int vector)
 {
 
 	KASSERT(vector != IDT_SYSCALL, ("Attempt to overwrite syscall entry"));
 #ifdef KDTRACE_HOOKS
 	KASSERT(vector != IDT_DTRACE_RET,
 	    ("Attempt to overwrite DTrace entry"));
 #endif
 	KASSERT(ioint_handlers[vector / 32] != NULL,
 	    ("No ISR handler for vector %u", vector));
 #ifdef notyet
 	/*
 	 * We can not currently clear the idt entry because other cpus
 	 * may have a valid vector at this offset.
 	 */
 	setidt(vector, &IDTVEC(rsvd), SDT_APICT, SEL_KPL, GSEL_APIC);
 #endif
 }
 
 /* Release an APIC vector when it's no longer in use. */
 void
 apic_free_vector(u_int apic_id, u_int vector, u_int irq)
 {
 	struct thread *td;
 
 	KASSERT(vector >= APIC_IO_INTS && vector != IDT_SYSCALL &&
 	    vector <= APIC_IO_INTS + APIC_NUM_IOINTS,
 	    ("Vector %u does not map to an IRQ line", vector));
 	KASSERT(irq < NUM_IO_INTS, ("Invalid IRQ %u", irq));
 	KASSERT(lapics[apic_id].la_ioint_irqs[vector - APIC_IO_INTS] ==
 	    irq, ("IRQ mismatch"));
 #ifdef KDTRACE_HOOKS
 	KASSERT(vector != IDT_DTRACE_RET,
 	    ("Attempt to overwrite DTrace entry"));
 #endif
 
 	/*
 	 * Bind us to the cpu that owned the vector before freeing it so
 	 * we don't lose an interrupt delivery race.
 	 */
 	td = curthread;
 	if (!rebooting) {
 		thread_lock(td);
 		if (sched_is_bound(td))
 			panic("apic_free_vector: Thread already bound.\n");
 		sched_bind(td, apic_cpuid(apic_id));
 		thread_unlock(td);
 	}
 	mtx_lock_spin(&icu_lock);
 	lapics[apic_id].la_ioint_irqs[vector - APIC_IO_INTS] = -1;
 	mtx_unlock_spin(&icu_lock);
 	if (!rebooting) {
 		thread_lock(td);
 		sched_unbind(td);
 		thread_unlock(td);
 	}
 }
 
 /* Map an IDT vector (APIC) to an IRQ (interrupt source). */
 u_int
 apic_idt_to_irq(u_int apic_id, u_int vector)
 {
 	int irq;
 
 	KASSERT(vector >= APIC_IO_INTS && vector != IDT_SYSCALL &&
 	    vector <= APIC_IO_INTS + APIC_NUM_IOINTS,
 	    ("Vector %u does not map to an IRQ line", vector));
 #ifdef KDTRACE_HOOKS
 	KASSERT(vector != IDT_DTRACE_RET,
 	    ("Attempt to overwrite DTrace entry"));
 #endif
 	irq = lapics[apic_id].la_ioint_irqs[vector - APIC_IO_INTS];
 	if (irq < 0)
 		irq = 0;
 	return (irq);
 }
 
 #ifdef DDB
 /*
  * Dump data about APIC IDT vector mappings.
  */
 DB_SHOW_COMMAND(apic, db_show_apic)
 {
 	struct intsrc *isrc;
 	int i, verbose;
 	u_int apic_id;
 	u_int irq;
 
 	if (strcmp(modif, "vv") == 0)
 		verbose = 2;
 	else if (strcmp(modif, "v") == 0)
 		verbose = 1;
 	else
 		verbose = 0;
 	for (apic_id = 0; apic_id <= MAX_APIC_ID; apic_id++) {
 		if (lapics[apic_id].la_present == 0)
 			continue;
 		db_printf("Interrupts bound to lapic %u\n", apic_id);
 		for (i = 0; i < APIC_NUM_IOINTS + 1 && !db_pager_quit; i++) {
 			irq = lapics[apic_id].la_ioint_irqs[i];
 			if (irq == -1 || irq == IRQ_SYSCALL)
 				continue;
 #ifdef KDTRACE_HOOKS
 			if (irq == IRQ_DTRACE_RET)
 				continue;
+#endif
+#ifdef XENHVM
+			if (irq == IRQ_EVTCHN)
+				continue;
 #endif
 			db_printf("vec 0x%2x -> ", i + APIC_IO_INTS);
 			if (irq == IRQ_TIMER)
 				db_printf("lapic timer\n");
 			else if (irq < NUM_IO_INTS) {
 				isrc = intr_lookup_source(irq);
 				if (isrc == NULL || verbose == 0)
 					db_printf("IRQ %u\n", irq);
 				else
 					db_dump_intr_event(isrc->is_event,
 					    verbose == 2);
 			} else
 				db_printf("IRQ %u ???\n", irq);
 		}
 	}
 }
 
 static void
 dump_mask(const char *prefix, uint32_t v, int base)
 {
 	int i, first;
 
 	first = 1;
 	for (i = 0; i < 32; i++)
 		if (v & (1 << i)) {
 			if (first) {
 				db_printf("%s:", prefix);
 				first = 0;
 			}
 			db_printf(" %02x", base + i);
 		}
 	if (!first)
 		db_printf("\n");
 }
 
 /* Show info from the lapic regs for this CPU. */
 DB_SHOW_COMMAND(lapic, db_show_lapic)
 {
 	uint32_t v;
 
 	db_printf("lapic ID = %d\n", lapic_id());
 	v = lapic->version;
 	db_printf("version  = %d.%d\n", (v & APIC_VER_VERSION) >> 4,
 	    v & 0xf);
 	db_printf("max LVT  = %d\n", (v & APIC_VER_MAXLVT) >> MAXLVTSHIFT);
 	v = lapic->svr;
 	db_printf("SVR      = %02x (%s)\n", v & APIC_SVR_VECTOR,
 	    v & APIC_SVR_ENABLE ? "enabled" : "disabled");
 	db_printf("TPR      = %02x\n", lapic->tpr);
 
 #define dump_field(prefix, index)					\
 	dump_mask(__XSTRING(prefix ## index), lapic->prefix ## index,	\
 	    index * 32)
 
 	db_printf("In-service Interrupts:\n");
 	dump_field(isr, 0);
 	dump_field(isr, 1);
 	dump_field(isr, 2);
 	dump_field(isr, 3);
 	dump_field(isr, 4);
 	dump_field(isr, 5);
 	dump_field(isr, 6);
 	dump_field(isr, 7);
 
 	db_printf("TMR Interrupts:\n");
 	dump_field(tmr, 0);
 	dump_field(tmr, 1);
 	dump_field(tmr, 2);
 	dump_field(tmr, 3);
 	dump_field(tmr, 4);
 	dump_field(tmr, 5);
 	dump_field(tmr, 6);
 	dump_field(tmr, 7);
 
 	db_printf("IRR Interrupts:\n");
 	dump_field(irr, 0);
 	dump_field(irr, 1);
 	dump_field(irr, 2);
 	dump_field(irr, 3);
 	dump_field(irr, 4);
 	dump_field(irr, 5);
 	dump_field(irr, 6);
 	dump_field(irr, 7);
 
 #undef dump_field
 }
 #endif
 
 /*
  * APIC probing support code.  This includes code to manage enumerators.
  */
 
 static SLIST_HEAD(, apic_enumerator) enumerators =
 	SLIST_HEAD_INITIALIZER(enumerators);
 static struct apic_enumerator *best_enum;
 
 void
 apic_register_enumerator(struct apic_enumerator *enumerator)
 {
 #ifdef INVARIANTS
 	struct apic_enumerator *apic_enum;
 
 	SLIST_FOREACH(apic_enum, &enumerators, apic_next) {
 		if (apic_enum == enumerator)
 			panic("%s: Duplicate register of %s", __func__,
 			    enumerator->apic_name);
 	}
 #endif
 	SLIST_INSERT_HEAD(&enumerators, enumerator, apic_next);
 }
 
 /*
  * We have to look for CPU's very, very early because certain subsystems
  * want to know how many CPU's we have extremely early on in the boot
  * process.
  */
 static void
 apic_init(void *dummy __unused)
 {
 	struct apic_enumerator *enumerator;
 #ifndef __amd64__
 	uint64_t apic_base;
 #endif
 	int retval, best;
 
 	/* We only support built in local APICs. */
 	if (!(cpu_feature & CPUID_APIC))
 		return;
 
 	/* Don't probe if APIC mode is disabled. */
 	if (resource_disabled("apic", 0))
 		return;
 
 	/* Probe all the enumerators to find the best match. */
 	best_enum = NULL;
 	best = 0;
 	SLIST_FOREACH(enumerator, &enumerators, apic_next) {
 		retval = enumerator->apic_probe();
 		if (retval > 0)
 			continue;
 		if (best_enum == NULL || best < retval) {
 			best_enum = enumerator;
 			best = retval;
 		}
 	}
 	if (best_enum == NULL) {
 		if (bootverbose)
 			printf("APIC: Could not find any APICs.\n");
 #ifndef DEV_ATPIC
 		panic("running without device atpic requires a local APIC");
 #endif
 		return;
 	}
 
 	if (bootverbose)
 		printf("APIC: Using the %s enumerator.\n",
 		    best_enum->apic_name);
 
 #ifndef __amd64__
 	/*
 	 * To work around an errata, we disable the local APIC on some
 	 * CPUs during early startup.  We need to turn the local APIC back
 	 * on on such CPUs now.
 	 */
 	if (cpu == CPU_686 && cpu_vendor_id == CPU_VENDOR_INTEL &&
 	    (cpu_id & 0xff0) == 0x610) {
 		apic_base = rdmsr(MSR_APICBASE);
 		apic_base |= APICBASE_ENABLED;
 		wrmsr(MSR_APICBASE, apic_base);
 	}
 #endif
 
 	/* Probe the CPU's in the system. */
 	retval = best_enum->apic_probe_cpus();
 	if (retval != 0)
 		printf("%s: Failed to probe CPUs: returned %d\n",
 		    best_enum->apic_name, retval);
 
 }
 SYSINIT(apic_init, SI_SUB_TUNABLES - 1, SI_ORDER_SECOND, apic_init, NULL);
 
 /*
  * Setup the local APIC.  We have to do this prior to starting up the APs
  * in the SMP case.
  */
 static void
 apic_setup_local(void *dummy __unused)
 {
 	int retval;
  
 	if (best_enum == NULL)
 		return;
 
 	/* Initialize the local APIC. */
 	retval = best_enum->apic_setup_local();
 	if (retval != 0)
 		printf("%s: Failed to setup the local APIC: returned %d\n",
 		    best_enum->apic_name, retval);
 }
 SYSINIT(apic_setup_local, SI_SUB_CPU, SI_ORDER_SECOND, apic_setup_local, NULL);
 
 /*
  * Setup the I/O APICs.
  */
 static void
 apic_setup_io(void *dummy __unused)
 {
 	int retval;
 
 	if (best_enum == NULL)
 		return;
 
 	/*
 	 * Local APIC must be registered before other PICs and pseudo PICs
 	 * for proper suspend/resume order.
 	 */
 #ifndef XEN
 	intr_register_pic(&lapic_pic);
 #endif
 
 	retval = best_enum->apic_setup_io();
 	if (retval != 0)
 		printf("%s: Failed to setup I/O APICs: returned %d\n",
 		    best_enum->apic_name, retval);
 #ifdef XEN
 	return;
 #endif
 	/*
 	 * Finish setting up the local APIC on the BSP once we know how to
 	 * properly program the LINT pins.
 	 */
 	lapic_setup(1);
 	if (bootverbose)
 		lapic_dump("BSP");
 
 	/* Enable the MSI "pic". */
 	msi_init();
 }
 SYSINIT(apic_setup_io, SI_SUB_INTR, SI_ORDER_SECOND, apic_setup_io, NULL);
 
 #ifdef SMP
 /*
  * Inter Processor Interrupt functions.  The lapic_ipi_*() functions are
  * private to the MD code.  The public interface for the rest of the
  * kernel is defined in mp_machdep.c.
  */
 int
 lapic_ipi_wait(int delay)
 {
 	int x, incr;
 
 	/*
 	 * Wait delay loops for IPI to be sent.  This is highly bogus
 	 * since this is sensitive to CPU clock speed.  If delay is
 	 * -1, we wait forever.
 	 */
 	if (delay == -1) {
 		incr = 0;
 		delay = 1;
 	} else
 		incr = 1;
 	for (x = 0; x < delay; x += incr) {
 		if ((lapic->icr_lo & APIC_DELSTAT_MASK) == APIC_DELSTAT_IDLE)
 			return (1);
 		ia32_pause();
 	}
 	return (0);
 }
 
 void
 lapic_ipi_raw(register_t icrlo, u_int dest)
 {
 	register_t value, saveintr;
 
 	/* XXX: Need more sanity checking of icrlo? */
 	KASSERT(lapic != NULL, ("%s called too early", __func__));
 	KASSERT((dest & ~(APIC_ID_MASK >> APIC_ID_SHIFT)) == 0,
 	    ("%s: invalid dest field", __func__));
 	KASSERT((icrlo & APIC_ICRLO_RESV_MASK) == 0,
 	    ("%s: reserved bits set in ICR LO register", __func__));
 
 	/* Set destination in ICR HI register if it is being used. */
 	saveintr = intr_disable();
 	if ((icrlo & APIC_DEST_MASK) == APIC_DEST_DESTFLD) {
 		value = lapic->icr_hi;
 		value &= ~APIC_ID_MASK;
 		value |= dest << APIC_ID_SHIFT;
 		lapic->icr_hi = value;
 	}
 
 	/* Program the contents of the IPI and dispatch it. */
 	value = lapic->icr_lo;
 	value &= APIC_ICRLO_RESV_MASK;
 	value |= icrlo;
 	lapic->icr_lo = value;
 	intr_restore(saveintr);
 }
 
 #define	BEFORE_SPIN	1000000
 #ifdef DETECT_DEADLOCK
 #define	AFTER_SPIN	1000
 #endif
 
 void
 lapic_ipi_vectored(u_int vector, int dest)
 {
 	register_t icrlo, destfield;
 
 	KASSERT((vector & ~APIC_VECTOR_MASK) == 0,
 	    ("%s: invalid vector %d", __func__, vector));
 
 	icrlo = APIC_DESTMODE_PHY | APIC_TRIGMOD_EDGE;
 
 	/*
 	 * IPI_STOP_HARD is just a "fake" vector used to send a NMI.
 	 * Use special rules regard NMI if passed, otherwise specify
 	 * the vector.
 	 */
 	if (vector == IPI_STOP_HARD)
 		icrlo |= APIC_DELMODE_NMI | APIC_LEVEL_ASSERT;
 	else
 		icrlo |= vector | APIC_DELMODE_FIXED | APIC_LEVEL_DEASSERT;
 	destfield = 0;
 	switch (dest) {
 	case APIC_IPI_DEST_SELF:
 		icrlo |= APIC_DEST_SELF;
 		break;
 	case APIC_IPI_DEST_ALL:
 		icrlo |= APIC_DEST_ALLISELF;
 		break;
 	case APIC_IPI_DEST_OTHERS:
 		icrlo |= APIC_DEST_ALLESELF;
 		break;
 	default:
 		KASSERT((dest & ~(APIC_ID_MASK >> APIC_ID_SHIFT)) == 0,
 		    ("%s: invalid destination 0x%x", __func__, dest));
 		destfield = dest;
 	}
 
 	/* Wait for an earlier IPI to finish. */
 	if (!lapic_ipi_wait(BEFORE_SPIN)) {
 		if (panicstr != NULL)
 			return;
 		else
 			panic("APIC: Previous IPI is stuck");
 	}
 
 	lapic_ipi_raw(icrlo, destfield);
 
 #ifdef DETECT_DEADLOCK
 	/* Wait for IPI to be delivered. */
 	if (!lapic_ipi_wait(AFTER_SPIN)) {
 #ifdef needsattention
 		/*
 		 * XXX FIXME:
 		 *
 		 * The above function waits for the message to actually be
 		 * delivered.  It breaks out after an arbitrary timeout
 		 * since the message should eventually be delivered (at
 		 * least in theory) and that if it wasn't we would catch
 		 * the failure with the check above when the next IPI is
 		 * sent.
 		 *
 		 * We could skip this wait entirely, EXCEPT it probably
 		 * protects us from other routines that assume that the
 		 * message was delivered and acted upon when this function
 		 * returns.
 		 */
 		printf("APIC: IPI might be stuck\n");
 #else /* !needsattention */
 		/* Wait until mesage is sent without a timeout. */
 		while (lapic->icr_lo & APIC_DELSTAT_PEND)
 			ia32_pause();
 #endif /* needsattention */
 	}
 #endif /* DETECT_DEADLOCK */
 }
 #endif /* SMP */
diff --git a/sys/x86/xen/hvm.c b/sys/x86/xen/hvm.c
new file mode 100644
index 000000000000..0730d941afd9
--- /dev/null
+++ b/sys/x86/xen/hvm.c
@@ -0,0 +1,256 @@
+/*
+ * Copyright (c) 2008 Citrix Systems, Inc.
+ * Copyright (c) 2012 Spectra Logic Corporation
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/bus.h>
+#include <sys/kernel.h>
+#include <sys/malloc.h>
+#include <sys/proc.h>
+
+#include <dev/pci/pcivar.h>
+#include <machine/cpufunc.h>
+
+#include <xen/xen-os.h>
+#include <xen/features.h>
+#include <xen/gnttab.h>
+#include <xen/hypervisor.h>
+#include <xen/hvm.h>
+#include <xen/xen_intr.h>
+
+#include <vm/vm.h>
+#include <vm/pmap.h>
+
+#include <xen/interface/hvm/params.h>
+#include <xen/interface/vcpu.h>
+
+static MALLOC_DEFINE(M_XENHVM, "xen_hvm", "Xen HVM PV Support");
+
+DPCPU_DEFINE(struct vcpu_info, vcpu_local_info);
+DPCPU_DEFINE(struct vcpu_info *, vcpu_info);
+
+/*-------------------------------- Global Data -------------------------------*/
+/**
+ * If non-zero, the hypervisor has been configured to use a direct
+ * IDT event callback for interrupt injection.
+ */
+int xen_vector_callback_enabled;
+
+/*------------------ Hypervisor Access Shared Memory Regions -----------------*/
+/** Hypercall table accessed via HYPERVISOR_*_op() methods. */
+char *hypercall_stubs;
+shared_info_t *HYPERVISOR_shared_info;
+enum xen_domain_type xen_domain_type = XEN_NATIVE;
+
+static uint32_t
+xen_hvm_cpuid_base(void)
+{
+	uint32_t base, regs[4];
+
+	for (base = 0x40000000; base < 0x40010000; base += 0x100) {
+		do_cpuid(base, regs);
+		if (!memcmp("XenVMMXenVMM", &regs[1], 12)
+		    && (regs[0] - base) >= 2)
+			return (base);
+	}
+	return (0);
+}
+
+/*
+ * Allocate and fill in the hypcall page.
+ */
+static int
+xen_hvm_init_hypercall_stubs(void)
+{
+	uint32_t base, regs[4];
+	int i;
+
+	base = xen_hvm_cpuid_base();
+	if (!base)
+		return (ENXIO);
+
+	if (hypercall_stubs == NULL) {
+		do_cpuid(base + 1, regs);
+		printf("XEN: Hypervisor version %d.%d detected.\n",
+		    regs[0] >> 16, regs[0] & 0xffff);
+	}
+
+	/*
+	 * Find the hypercall pages.
+	 */
+	do_cpuid(base + 2, regs);
+	
+	if (hypercall_stubs == NULL) {
+		size_t call_region_size;
+
+		call_region_size = regs[0] * PAGE_SIZE;
+		hypercall_stubs = malloc(call_region_size, M_XENHVM, M_NOWAIT);
+		if (hypercall_stubs == NULL)
+			panic("Unable to allocate Xen hypercall region");
+	}
+
+	for (i = 0; i < regs[0]; i++)
+		wrmsr(regs[1], vtophys(hypercall_stubs + i * PAGE_SIZE) + i);
+
+	return (0);
+}
+
+static void
+xen_hvm_init_shared_info_page(void)
+{
+	struct xen_add_to_physmap xatp;
+
+	if (HYPERVISOR_shared_info == NULL) {
+		HYPERVISOR_shared_info = malloc(PAGE_SIZE, M_XENHVM, M_NOWAIT);
+		if (HYPERVISOR_shared_info == NULL)
+			panic("Unable to allocate Xen shared info page");
+	}
+
+	xatp.domid = DOMID_SELF;
+	xatp.idx = 0;
+	xatp.space = XENMAPSPACE_shared_info;
+	xatp.gpfn = vtophys(HYPERVISOR_shared_info) >> PAGE_SHIFT;
+	if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp))
+		panic("HYPERVISOR_memory_op failed");
+}
+
+/*
+ * Tell the hypervisor how to contact us for event channel callbacks.
+ */
+void
+xen_hvm_set_callback(device_t dev)
+{
+	struct xen_hvm_param xhp;
+	int irq;
+
+	xhp.domid = DOMID_SELF;
+	xhp.index = HVM_PARAM_CALLBACK_IRQ;
+	if (xen_feature(XENFEAT_hvm_callback_vector)) {
+		int error;
+
+		xhp.value = HVM_CALLBACK_VECTOR(IDT_EVTCHN);
+		error = HYPERVISOR_hvm_op(HVMOP_set_param, &xhp);
+		if (error == 0) {
+			xen_vector_callback_enabled = 1;
+			return;
+		}
+		printf("Xen HVM callback vector registration failed (%d). "
+		       "Falling back to emulated device interrupt\n",
+		       error);
+	}
+	xen_vector_callback_enabled = 0;
+	if (dev == NULL) {
+		/*
+		 * Called from early boot or resume.
+		 * xenpci will invoke us again later.
+		 */
+		return;
+	}
+
+	irq = pci_get_irq(dev);
+	if (irq < 16) {
+		xhp.value = HVM_CALLBACK_GSI(irq);
+	} else {
+		u_int slot;
+		u_int pin;
+
+		slot = pci_get_slot(dev);
+		pin = pci_get_intpin(dev) - 1;
+		xhp.value = HVM_CALLBACK_PCI_INTX(slot, pin);
+	}
+
+	if (HYPERVISOR_hvm_op(HVMOP_set_param, &xhp))
+		panic("Can't set evtchn callback");
+}
+
+#define	XEN_MAGIC_IOPORT 0x10
+enum {
+	XMI_MAGIC			 = 0x49d2,
+	XMI_UNPLUG_IDE_DISKS		 = 0x01,
+	XMI_UNPLUG_NICS			 = 0x02,
+	XMI_UNPLUG_IDE_EXCEPT_PRI_MASTER = 0x04
+};
+
+static void
+xen_hvm_disable_emulated_devices(void)
+{
+	if (inw(XEN_MAGIC_IOPORT) != XMI_MAGIC)
+		return;
+
+	if (bootverbose)
+		printf("XEN: Disabling emulated block and network devices\n");
+	outw(XEN_MAGIC_IOPORT, XMI_UNPLUG_IDE_DISKS|XMI_UNPLUG_NICS);
+}
+
+void
+xen_hvm_suspend(void)
+{
+}
+
+void
+xen_hvm_resume(void)
+{
+	xen_hvm_init_hypercall_stubs();
+	xen_hvm_init_shared_info_page();
+}
+ 
+static void
+xen_hvm_init(void *dummy __unused)
+{
+	if (xen_hvm_init_hypercall_stubs() != 0)
+		return;
+
+	xen_domain_type = XEN_HVM_DOMAIN;
+	setup_xen_features();
+	xen_hvm_init_shared_info_page();
+	xen_hvm_set_callback(NULL);
+	xen_hvm_disable_emulated_devices();
+} 
+
+void xen_hvm_init_cpu(void)
+{
+	int cpu = PCPU_GET(acpi_id);
+	struct vcpu_info *vcpu_info;
+	struct vcpu_register_vcpu_info info;
+	int rc;
+
+	vcpu_info = DPCPU_PTR(vcpu_local_info);
+	info.mfn = vtophys(vcpu_info) >> PAGE_SHIFT;
+	info.offset = vtophys(vcpu_info) - trunc_page(vtophys(vcpu_info));
+
+	rc = HYPERVISOR_vcpu_op(VCPUOP_register_vcpu_info, cpu, &info);
+	if (rc) {
+		DPCPU_SET(vcpu_info, &HYPERVISOR_shared_info->vcpu_info[cpu]);
+	} else {
+		DPCPU_SET(vcpu_info, vcpu_info);
+	}
+}
+
+SYSINIT(xen_hvm_init, SI_SUB_HYPERVISOR, SI_ORDER_FIRST, xen_hvm_init, NULL);
+SYSINIT(xen_hvm_init_cpu, SI_SUB_INTR, SI_ORDER_FIRST, xen_hvm_init_cpu, NULL);
diff --git a/sys/x86/xen/xen_intr.c b/sys/x86/xen/xen_intr.c
new file mode 100644
index 000000000000..83bf4873f97d
--- /dev/null
+++ b/sys/x86/xen/xen_intr.c
@@ -0,0 +1,1126 @@
+/******************************************************************************
+ * xen_intr.c
+ *
+ * Xen event and interrupt services for x86 PV and HVM guests.
+ *
+ * Copyright (c) 2002-2005, K A Fraser
+ * Copyright (c) 2005, Intel Corporation <xiaofeng.ling@intel.com>
+ * Copyright (c) 2012, Spectra Logic Corporation
+ *
+ * This file may be distributed separately from the Linux kernel, or
+ * incorporated into other software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/bus.h>
+#include <sys/malloc.h>
+#include <sys/kernel.h>
+#include <sys/limits.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+#include <sys/interrupt.h>
+#include <sys/pcpu.h>
+#include <sys/smp.h>
+
+#include <vm/vm.h>
+#include <vm/pmap.h>
+
+#include <machine/intr_machdep.h>
+#include <machine/apicvar.h>
+#include <machine/smp.h>
+#include <machine/stdarg.h>
+
+#include <machine/xen/synch_bitops.h>
+#include <machine/xen/xen-os.h>
+#include <machine/xen/xenvar.h>
+
+#include <xen/hypervisor.h>
+#include <xen/xen_intr.h>
+#include <xen/evtchn/evtchnvar.h>
+
+#include <dev/xen/xenpci/xenpcivar.h>
+
+static MALLOC_DEFINE(M_XENINTR, "xen_intr", "Xen Interrupt Services");
+
+/**
+ * Per-cpu event channel processing state.
+ */
+struct xen_intr_pcpu_data {
+	/**
+	 * The last event channel bitmap section (level one bit) processed.
+	 * This is used to ensure we scan all ports before
+	 * servicing an already servied port again.
+	 */
+	u_int	last_processed_l1i;
+
+	/**
+	 * The last event channel processed within the event channel
+	 * bitmap being scanned.
+	 */
+	u_int	last_processed_l2i;
+
+	/** Pointer to this CPU's interrupt statistic counter. */
+	u_long *evtchn_intrcnt;
+
+	/**
+	 * A bitmap of ports that can be serviced from this CPU.
+	 * A set bit means interrupt handling is enabled.
+	 */
+	u_long	evtchn_enabled[sizeof(u_long) * 8];
+};
+
+/*
+ * Start the scan at port 0 by initializing the last scanned
+ * location as the highest numbered event channel port.
+ */
+DPCPU_DEFINE(struct xen_intr_pcpu_data, xen_intr_pcpu) = {
+	.last_processed_l1i = LONG_BIT - 1,
+	.last_processed_l2i = LONG_BIT - 1
+};
+
+DPCPU_DECLARE(struct vcpu_info *, vcpu_info);
+
+#define is_valid_evtchn(x)	((x) != 0)
+
+struct xenisrc {
+	struct intsrc	xi_intsrc;
+	enum evtchn_type xi_type;
+	int		xi_cpu;		/* VCPU for delivery. */
+	int		xi_vector;	/* Global isrc vector number. */
+	evtchn_port_t	xi_port;
+	int		xi_pirq;
+	int		xi_virq;
+	u_int		xi_close:1;	/* close on unbind? */
+	u_int		xi_needs_eoi:1;
+	u_int		xi_shared:1;	/* Shared with other domains. */
+};
+
+#define ARRAY_SIZE(a)	(sizeof(a) / sizeof(a[0]))
+
+static void	xen_intr_suspend(struct pic *);
+static void	xen_intr_resume(struct pic *);
+static void	xen_intr_enable_source(struct intsrc *isrc);
+static void	xen_intr_disable_source(struct intsrc *isrc, int eoi);
+static void	xen_intr_eoi_source(struct intsrc *isrc);
+static void	xen_intr_enable_intr(struct intsrc *isrc);
+static void	xen_intr_disable_intr(struct intsrc *isrc);
+static int	xen_intr_vector(struct intsrc *isrc);
+static int	xen_intr_source_pending(struct intsrc *isrc);
+static int	xen_intr_config_intr(struct intsrc *isrc,
+		     enum intr_trigger trig, enum intr_polarity pol);
+static int	xen_intr_assign_cpu(struct intsrc *isrc, u_int apic_id);
+
+static void	xen_intr_pirq_enable_source(struct intsrc *isrc);
+static void	xen_intr_pirq_disable_source(struct intsrc *isrc, int eoi);
+static void	xen_intr_pirq_eoi_source(struct intsrc *isrc);
+static void	xen_intr_pirq_enable_intr(struct intsrc *isrc);
+
+/**
+ * PIC interface for all event channel port types except physical IRQs.
+ */
+struct pic xen_intr_pic = {
+	.pic_enable_source  = xen_intr_enable_source,
+	.pic_disable_source = xen_intr_disable_source,
+	.pic_eoi_source     = xen_intr_eoi_source,
+	.pic_enable_intr    = xen_intr_enable_intr,
+	.pic_disable_intr   = xen_intr_disable_intr,
+	.pic_vector         = xen_intr_vector,
+	.pic_source_pending = xen_intr_source_pending,
+	.pic_suspend        = xen_intr_suspend,
+	.pic_resume         = xen_intr_resume,
+	.pic_config_intr    = xen_intr_config_intr,
+	.pic_assign_cpu     = xen_intr_assign_cpu
+};
+
+/**
+ * PIC interface for all event channel representing
+ * physical interrupt sources.
+ */
+struct pic xen_intr_pirq_pic = {
+	.pic_enable_source  = xen_intr_pirq_enable_source,
+	.pic_disable_source = xen_intr_pirq_disable_source,
+	.pic_eoi_source     = xen_intr_pirq_eoi_source,
+	.pic_enable_intr    = xen_intr_pirq_enable_intr,
+	.pic_disable_intr   = xen_intr_disable_intr,
+	.pic_vector         = xen_intr_vector,
+	.pic_source_pending = xen_intr_source_pending,
+	.pic_suspend        = xen_intr_suspend,
+	.pic_resume         = xen_intr_resume,
+	.pic_config_intr    = xen_intr_config_intr,
+	.pic_assign_cpu     = xen_intr_assign_cpu
+};
+
+static struct mtx	xen_intr_isrc_lock;
+static int		xen_intr_isrc_count;
+static struct xenisrc  *xen_intr_port_to_isrc[NR_EVENT_CHANNELS];
+
+/*------------------------- Private Functions --------------------------------*/
+/**
+ * Disable signal delivery for an event channel port on the
+ * specified CPU.
+ *
+ * \param port  The event channel port to mask.
+ *
+ * This API is used to manage the port<=>CPU binding of event
+ * channel handlers.
+ *
+ * \note  This operation does not preclude reception of an event
+ *        for this event channel on another CPU.  To mask the
+ *        event channel globally, use evtchn_mask().
+ */
+static inline void
+evtchn_cpu_mask_port(u_int cpu, evtchn_port_t port)
+{
+	struct xen_intr_pcpu_data *pcpu;
+
+	pcpu = DPCPU_ID_PTR(cpu, xen_intr_pcpu);
+	clear_bit(port, pcpu->evtchn_enabled);
+}
+
+/**
+ * Enable signal delivery for an event channel port on the
+ * specified CPU.
+ *
+ * \param port  The event channel port to unmask.
+ *
+ * This API is used to manage the port<=>CPU binding of event
+ * channel handlers.
+ *
+ * \note  This operation does not guarantee that event delivery
+ *        is enabled for this event channel port.  The port must
+ *        also be globally enabled.  See evtchn_unmask().
+ */
+static inline void
+evtchn_cpu_unmask_port(u_int cpu, evtchn_port_t port)
+{
+	struct xen_intr_pcpu_data *pcpu;
+
+	pcpu = DPCPU_ID_PTR(cpu, xen_intr_pcpu);
+	set_bit(port, pcpu->evtchn_enabled);
+}
+
+/**
+ * Allocate and register a per-cpu Xen upcall interrupt counter.
+ *
+ * \param cpu  The cpu for which to register this interrupt count.
+ */
+static void
+xen_intr_intrcnt_add(u_int cpu)
+{
+	char buf[MAXCOMLEN + 1];
+	struct xen_intr_pcpu_data *pcpu;
+
+	pcpu = DPCPU_ID_PTR(cpu, xen_intr_pcpu);
+	if (pcpu->evtchn_intrcnt != NULL)
+		return;
+
+	snprintf(buf, sizeof(buf), "cpu%d:xen", cpu);
+	intrcnt_add(buf, &pcpu->evtchn_intrcnt);
+}
+
+/**
+ * Search for an already allocated but currently unused Xen interrupt
+ * source object.
+ *
+ * \param type  Restrict the search to interrupt sources of the given
+ *              type.
+ *
+ * \return  A pointer to a free Xen interrupt source object or NULL.
+ */
+static struct xenisrc *
+xen_intr_find_unused_isrc(enum evtchn_type type)
+{
+	int isrc_idx;
+
+	KASSERT(mtx_owned(&xen_intr_isrc_lock), ("Evtchn isrc lock not held"));
+
+	for (isrc_idx = 0; isrc_idx < xen_intr_isrc_count; isrc_idx ++) {
+		struct xenisrc *isrc;
+		u_int vector;
+
+		vector = FIRST_EVTCHN_INT + isrc_idx;
+		isrc = (struct xenisrc *)intr_lookup_source(vector);
+		if (isrc != NULL
+		 && isrc->xi_type == EVTCHN_TYPE_UNBOUND) {
+			KASSERT(isrc->xi_intsrc.is_handlers == 0,
+			    ("Free evtchn still has handlers"));
+			isrc->xi_type = type;
+			return (isrc);
+		}
+	}
+	return (NULL);
+}
+
+/**
+ * Allocate a Xen interrupt source object.
+ *
+ * \param type  The type of interrupt source to create.
+ *
+ * \return  A pointer to a newly allocated Xen interrupt source
+ *          object or NULL.
+ */
+static struct xenisrc *
+xen_intr_alloc_isrc(enum evtchn_type type)
+{
+	static int warned;
+	struct xenisrc *isrc;
+	int vector;
+
+	KASSERT(mtx_owned(&xen_intr_isrc_lock), ("Evtchn alloc lock not held"));
+
+	if (xen_intr_isrc_count > NR_EVENT_CHANNELS) {
+		if (!warned) {
+			warned = 1;
+			printf("xen_intr_alloc: Event channels exhausted.\n");
+		}
+		return (NULL);
+	}
+	vector = FIRST_EVTCHN_INT + xen_intr_isrc_count;
+	xen_intr_isrc_count++;
+
+	mtx_unlock(&xen_intr_isrc_lock);
+	isrc = malloc(sizeof(*isrc), M_XENINTR, M_WAITOK | M_ZERO);
+	isrc->xi_intsrc.is_pic = &xen_intr_pic;
+	isrc->xi_vector = vector;
+	isrc->xi_type = type;
+	intr_register_source(&isrc->xi_intsrc);
+	mtx_lock(&xen_intr_isrc_lock);
+
+	return (isrc);
+}
+
+/**
+ * Attempt to free an active Xen interrupt source object.
+ *
+ * \param isrc  The interrupt source object to release.
+ *
+ * \returns  EBUSY if the source is still in use, otherwise 0.
+ */
+static int
+xen_intr_release_isrc(struct xenisrc *isrc)
+{
+
+	mtx_lock(&xen_intr_isrc_lock);
+	if (isrc->xi_intsrc.is_handlers != 0) {
+		mtx_unlock(&xen_intr_isrc_lock);
+		return (EBUSY);
+	}
+	evtchn_mask_port(isrc->xi_port);
+	evtchn_clear_port(isrc->xi_port);
+
+	/* Rebind port to CPU 0. */
+	evtchn_cpu_mask_port(isrc->xi_cpu, isrc->xi_port);
+	evtchn_cpu_unmask_port(0, isrc->xi_port);
+
+	if (isrc->xi_close != 0) {
+		struct evtchn_close close = { .port = isrc->xi_port };
+		if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close))
+			panic("EVTCHNOP_close failed");
+	}
+
+	xen_intr_port_to_isrc[isrc->xi_port] = NULL;
+	isrc->xi_cpu = 0;
+	isrc->xi_type = EVTCHN_TYPE_UNBOUND;
+	isrc->xi_port = 0;
+	mtx_unlock(&xen_intr_isrc_lock);
+	return (0);
+}
+
+/**
+ * Associate an interrupt handler with an already allocated local Xen
+ * event channel port.
+ *
+ * \param isrcp       The returned Xen interrupt object associated with
+ *                    the specified local port.
+ * \param local_port  The event channel to bind.
+ * \param type        The event channel type of local_port.
+ * \param intr_owner  The device making this bind request.
+ * \param filter      An interrupt filter handler.  Specify NULL
+ *                    to always dispatch to the ithread handler.
+ * \param handler     An interrupt ithread handler.  Optional (can
+ *                    specify NULL) if all necessary event actions
+ *                    are performed by filter.
+ * \param arg         Argument to present to both filter and handler.
+ * \param irqflags    Interrupt handler flags.  See sys/bus.h.
+ * \param handlep     Pointer to an opaque handle used to manage this
+ *                    registration.
+ *
+ * \returns  0 on success, otherwise an errno.
+ */
+static int
+xen_intr_bind_isrc(struct xenisrc **isrcp, evtchn_port_t local_port,
+    enum evtchn_type type, device_t intr_owner, driver_filter_t filter,
+    driver_intr_t handler, void *arg, enum intr_type flags,
+    xen_intr_handle_t *port_handlep)
+{
+	struct xenisrc *isrc;
+	int error;
+
+	*isrcp = NULL;
+	if (port_handlep == NULL) {
+		device_printf(intr_owner,
+			      "xen_intr_bind_isrc: Bad event handle\n");
+		return (EINVAL);
+	}
+
+	mtx_lock(&xen_intr_isrc_lock);
+	isrc = xen_intr_find_unused_isrc(type);
+	if (isrc == NULL) {
+		isrc = xen_intr_alloc_isrc(type);
+		if (isrc == NULL) {
+			mtx_unlock(&xen_intr_isrc_lock);
+			return (ENOSPC);
+		}
+	}
+	isrc->xi_port = local_port;
+	xen_intr_port_to_isrc[local_port] = isrc;
+	mtx_unlock(&xen_intr_isrc_lock);
+
+	error = intr_add_handler(device_get_nameunit(intr_owner),
+				 isrc->xi_vector, filter, handler, arg,
+				 flags|INTR_EXCL, port_handlep);
+	if (error != 0) {
+		device_printf(intr_owner,
+			      "xen_intr_bind_irq: intr_add_handler failed\n");
+		xen_intr_release_isrc(isrc);
+		return (error);
+	}
+	*isrcp = isrc;
+	return (0);
+}
+
+/**
+ * Lookup a Xen interrupt source object given an interrupt binding handle.
+ * 
+ * \param handle  A handle initialized by a previous call to
+ *                xen_intr_bind_isrc().
+ *
+ * \returns  A pointer to the Xen interrupt source object associated
+ *           with the given interrupt handle.  NULL if no association
+ *           currently exists.
+ */
+static struct xenisrc *
+xen_intr_isrc(xen_intr_handle_t handle)
+{
+	struct intr_handler *ih;
+
+	ih = handle;
+	if (ih == NULL || ih->ih_event == NULL)
+		return (NULL);
+
+	return (ih->ih_event->ie_source);
+}
+
+/**
+ * Determine the event channel ports at the given section of the
+ * event port bitmap which have pending events for the given cpu.
+ * 
+ * \param pcpu  The Xen interrupt pcpu data for the cpu being querried.
+ * \param sh    The Xen shared info area.
+ * \param idx   The index of the section of the event channel bitmap to
+ *              inspect.
+ *
+ * \returns  A u_long with bits set for every event channel with pending
+ *           events.
+ */
+static inline u_long
+xen_intr_active_ports(struct xen_intr_pcpu_data *pcpu, shared_info_t *sh,
+    u_int idx)
+{
+	return (sh->evtchn_pending[idx]
+	      & ~sh->evtchn_mask[idx]
+	      & pcpu->evtchn_enabled[idx]);
+}
+
+/**
+ * Interrupt handler for processing all Xen event channel events.
+ * 
+ * \param trap_frame  The trap frame context for the current interrupt.
+ */
+void
+xen_intr_handle_upcall(struct trapframe *trap_frame)
+{
+	u_int l1i, l2i, port, cpu;
+	u_long masked_l1, masked_l2;
+	struct xenisrc *isrc;
+	shared_info_t *s;
+	vcpu_info_t *v;
+	struct xen_intr_pcpu_data *pc;
+	u_long l1, l2;
+
+	/*
+	 * Disable preemption in order to always check and fire events
+	 * on the right vCPU
+	 */
+	critical_enter();
+
+	cpu = PCPU_GET(cpuid);
+	pc  = DPCPU_PTR(xen_intr_pcpu);
+	s   = HYPERVISOR_shared_info;
+	v   = DPCPU_GET(vcpu_info);
+
+	if (xen_hvm_domain() && !xen_vector_callback_enabled) {
+		KASSERT((cpu == 0), ("Fired PCI event callback on wrong CPU"));
+	}
+
+	v->evtchn_upcall_pending = 0;
+
+#if 0
+#ifndef CONFIG_X86 /* No need for a barrier -- XCHG is a barrier on x86. */
+	/* Clear master flag /before/ clearing selector flag. */
+	wmb();
+#endif
+#endif
+
+	l1 = atomic_readandclear_long(&v->evtchn_pending_sel);
+
+	l1i = pc->last_processed_l1i;
+	l2i = pc->last_processed_l2i;
+	(*pc->evtchn_intrcnt)++;
+
+	while (l1 != 0) {
+
+		l1i = (l1i + 1) % LONG_BIT;
+		masked_l1 = l1 & ((~0UL) << l1i);
+
+		if (masked_l1 == 0) {
+			/*
+			 * if we masked out all events, wrap around
+			 * to the beginning.
+			 */
+			l1i = LONG_BIT - 1;
+			l2i = LONG_BIT - 1;
+			continue;
+		}
+		l1i = ffsl(masked_l1) - 1;
+
+		do {
+			l2 = xen_intr_active_ports(pc, s, l1i);
+
+			l2i = (l2i + 1) % LONG_BIT;
+			masked_l2 = l2 & ((~0UL) << l2i);
+
+			if (masked_l2 == 0) {
+				/* if we masked out all events, move on */
+				l2i = LONG_BIT - 1;
+				break;
+			}
+			l2i = ffsl(masked_l2) - 1;
+
+			/* process port */
+			port = (l1i * LONG_BIT) + l2i;
+			synch_clear_bit(port, &s->evtchn_pending[0]);
+
+			isrc = xen_intr_port_to_isrc[port];
+			if (__predict_false(isrc == NULL))
+				continue;
+
+			/* Make sure we are firing on the right vCPU */
+			KASSERT((isrc->xi_cpu == PCPU_GET(cpuid)),
+				("Received unexpected event on vCPU#%d, event bound to vCPU#%d",
+				PCPU_GET(cpuid), isrc->xi_cpu));
+
+			intr_execute_handlers(&isrc->xi_intsrc, trap_frame);
+
+			/*
+			 * If this is the final port processed,
+			 * we'll pick up here+1 next time.
+			 */
+			pc->last_processed_l1i = l1i;
+			pc->last_processed_l2i = l2i;
+
+		} while (l2i != LONG_BIT - 1);
+
+		l2 = xen_intr_active_ports(pc, s, l1i);
+		if (l2 == 0) {
+			/*
+			 * We handled all ports, so we can clear the
+			 * selector bit.
+			 */
+			l1 &= ~(1UL << l1i);
+		}
+	}
+	critical_exit();
+}
+
+static int
+xen_intr_init(void *dummy __unused)
+{
+	struct xen_intr_pcpu_data *pcpu;
+	int i;
+
+	mtx_init(&xen_intr_isrc_lock, "xen-irq-lock", NULL, MTX_DEF);
+
+	/*
+	 * Register interrupt count manually as we aren't
+	 * guaranteed to see a call to xen_intr_assign_cpu()
+	 * before our first interrupt. Also set the per-cpu
+	 * mask of CPU#0 to enable all, since by default
+	 * all event channels are bound to CPU#0.
+	 */
+	CPU_FOREACH(i) {
+		pcpu = DPCPU_ID_PTR(i, xen_intr_pcpu);
+		memset(pcpu->evtchn_enabled, i == 0 ? ~0 : 0,
+		       sizeof(pcpu->evtchn_enabled));
+		xen_intr_intrcnt_add(i);
+	}
+
+	intr_register_pic(&xen_intr_pic);
+
+	return (0);
+}
+SYSINIT(xen_intr_init, SI_SUB_INTR, SI_ORDER_MIDDLE, xen_intr_init, NULL);
+
+/*--------------------------- Common PIC Functions ---------------------------*/
+/**
+ * Prepare this PIC for system suspension.
+ */
+static void
+xen_intr_suspend(struct pic *unused)
+{
+}
+
+/**
+ * Return this PIC to service after being suspended.
+ */
+static void
+xen_intr_resume(struct pic *unused)
+{
+	u_int port;
+
+	/*
+	 * Mask events for all ports.  They will be unmasked after
+	 * drivers have re-registered their handlers.
+	 */
+	for (port = 0; port < NR_EVENT_CHANNELS; port++)
+		evtchn_mask_port(port);
+}
+
+/**
+ * Disable a Xen interrupt source.
+ *
+ * \param isrc  The interrupt source to disable.
+ */
+static void
+xen_intr_disable_intr(struct intsrc *base_isrc)
+{
+	struct xenisrc *isrc = (struct xenisrc *)base_isrc;
+
+	evtchn_mask_port(isrc->xi_port);
+}
+
+/**
+ * Determine the global interrupt vector number for
+ * a Xen interrupt source.
+ *
+ * \param isrc  The interrupt source to query.
+ *
+ * \return  The vector number corresponding to the given interrupt source.
+ */
+static int
+xen_intr_vector(struct intsrc *base_isrc)
+{
+	struct xenisrc *isrc = (struct xenisrc *)base_isrc;
+
+	return (isrc->xi_vector);
+}
+
+/**
+ * Determine whether or not interrupt events are pending on the
+ * the given interrupt source.
+ *
+ * \param isrc  The interrupt source to query.
+ *
+ * \returns  0 if no events are pending, otherwise non-zero.
+ */
+static int
+xen_intr_source_pending(struct intsrc *isrc)
+{
+	/*
+	 * EventChannels are edge triggered and never masked.
+	 * There can be no pending events.
+	 */
+	return (0);
+}
+
+/**
+ * Perform configuration of an interrupt source.
+ *
+ * \param isrc  The interrupt source to configure.
+ * \param trig  Edge or level.
+ * \param pol   Active high or low.
+ *
+ * \returns  0 if no events are pending, otherwise non-zero.
+ */
+static int
+xen_intr_config_intr(struct intsrc *isrc, enum intr_trigger trig,
+    enum intr_polarity pol)
+{
+	/* Configuration is only possible via the evtchn apis. */
+	return (ENODEV);
+}
+
+/**
+ * Configure CPU affinity for interrupt source event delivery.
+ *
+ * \param isrc     The interrupt source to configure.
+ * \param apic_id  The apic id of the CPU for handling future events.
+ *
+ * \returns  0 if successful, otherwise an errno.
+ */
+static int
+xen_intr_assign_cpu(struct intsrc *base_isrc, u_int apic_id)
+{
+	struct evtchn_bind_vcpu bind_vcpu;
+	struct xenisrc *isrc;
+	u_int to_cpu, acpi_id;
+	int error;
+
+#ifdef XENHVM
+	if (xen_vector_callback_enabled == 0)
+		return (EOPNOTSUPP);
+#endif
+
+	to_cpu = apic_cpuid(apic_id);
+	acpi_id = pcpu_find(to_cpu)->pc_acpi_id;
+	xen_intr_intrcnt_add(to_cpu);
+
+	mtx_lock(&xen_intr_isrc_lock);
+	isrc = (struct xenisrc *)base_isrc;
+	if (!is_valid_evtchn(isrc->xi_port)) {
+		mtx_unlock(&xen_intr_isrc_lock);
+		return (EINVAL);
+	}
+
+	if ((isrc->xi_type == EVTCHN_TYPE_VIRQ) ||
+		(isrc->xi_type == EVTCHN_TYPE_IPI)) {
+		/*
+		 * Virtual IRQs are associated with a cpu by
+		 * the Hypervisor at evtchn_bind_virq time, so
+		 * all we need to do is update the per-CPU masks.
+		 */
+		evtchn_cpu_mask_port(isrc->xi_cpu, isrc->xi_port);
+		isrc->xi_cpu = to_cpu;
+		evtchn_cpu_unmask_port(isrc->xi_cpu, isrc->xi_port);
+		mtx_unlock(&xen_intr_isrc_lock);
+		return (0);
+	}
+
+	bind_vcpu.port = isrc->xi_port;
+	bind_vcpu.vcpu = acpi_id;
+
+	/*
+	 * Allow interrupts to be fielded on the new VCPU before
+	 * we ask the hypervisor to deliver them there.
+	 */
+	evtchn_cpu_unmask_port(to_cpu, isrc->xi_port);
+	error = HYPERVISOR_event_channel_op(EVTCHNOP_bind_vcpu, &bind_vcpu);
+	if (isrc->xi_cpu != to_cpu) {
+		if (error == 0) {
+			/* Commit to new binding by removing the old one. */
+			evtchn_cpu_mask_port(isrc->xi_cpu, isrc->xi_port);
+			isrc->xi_cpu = to_cpu;
+		} else {
+			/* Roll-back to previous binding. */
+			evtchn_cpu_mask_port(to_cpu, isrc->xi_port);
+		}
+	}
+	mtx_unlock(&xen_intr_isrc_lock);
+	return (0);
+}
+
+/*------------------- Virtual Interrupt Source PIC Functions -----------------*/
+/*
+ * Mask a level triggered interrupt source.
+ *
+ * \param isrc  The interrupt source to mask (if necessary).
+ * \param eoi   If non-zero, perform any necessary end-of-interrupt
+ *              acknowledgements.
+ */
+static void
+xen_intr_disable_source(struct intsrc *isrc, int eoi)
+{
+}
+
+/*
+ * Unmask a level triggered interrupt source.
+ *
+ * \param isrc  The interrupt source to unmask (if necessary).
+ */
+static void
+xen_intr_enable_source(struct intsrc *isrc)
+{
+}
+
+/*
+ * Perform any necessary end-of-interrupt acknowledgements.
+ *
+ * \param isrc  The interrupt source to EOI.
+ */
+static void
+xen_intr_eoi_source(struct intsrc *isrc)
+{
+}
+
+/*
+ * Enable and unmask the interrupt source.
+ *
+ * \param isrc  The interrupt source to enable.
+ */
+static void
+xen_intr_enable_intr(struct intsrc *base_isrc)
+{
+	struct xenisrc *isrc = (struct xenisrc *)base_isrc;
+
+	evtchn_unmask_port(isrc->xi_port);
+}
+
+/*------------------ Physical Interrupt Source PIC Functions -----------------*/
+/*
+ * Mask a level triggered interrupt source.
+ *
+ * \param isrc  The interrupt source to mask (if necessary).
+ * \param eoi   If non-zero, perform any necessary end-of-interrupt
+ *              acknowledgements.
+ */
+static void
+xen_intr_pirq_disable_source(struct intsrc *base_isrc, int eoi)
+{
+	struct xenisrc *isrc;
+
+	isrc = (struct xenisrc *)base_isrc;
+	evtchn_mask_port(isrc->xi_port);
+}
+
+/*
+ * Unmask a level triggered interrupt source.
+ *
+ * \param isrc  The interrupt source to unmask (if necessary).
+ */
+static void
+xen_intr_pirq_enable_source(struct intsrc *base_isrc)
+{
+	struct xenisrc *isrc;
+
+	isrc = (struct xenisrc *)base_isrc;
+	evtchn_unmask_port(isrc->xi_port);
+}
+
+/*
+ * Perform any necessary end-of-interrupt acknowledgements.
+ *
+ * \param isrc  The interrupt source to EOI.
+ */
+static void
+xen_intr_pirq_eoi_source(struct intsrc *base_isrc)
+{
+	struct xenisrc *isrc;
+
+	/* XXX Use shared page of flags for this. */
+	isrc = (struct xenisrc *)base_isrc;
+	if (isrc->xi_needs_eoi != 0) {
+		struct physdev_eoi eoi = { .irq = isrc->xi_pirq };
+
+		(void)HYPERVISOR_physdev_op(PHYSDEVOP_eoi, &eoi);
+	}
+}
+
+/*
+ * Enable and unmask the interrupt source.
+ *
+ * \param isrc  The interrupt source to enable.
+ */
+static void
+xen_intr_pirq_enable_intr(struct intsrc *isrc)
+{
+}
+
+/*--------------------------- Public Functions -------------------------------*/
+/*------- API comments for these methods can be found in xen/xenintr.h -------*/
+int
+xen_intr_bind_local_port(device_t dev, evtchn_port_t local_port,
+    driver_filter_t filter, driver_intr_t handler, void *arg,
+    enum intr_type flags, xen_intr_handle_t *port_handlep)
+{
+	struct xenisrc *isrc;
+	int error;
+
+	error = xen_intr_bind_isrc(&isrc, local_port, EVTCHN_TYPE_PORT, dev,
+		    filter, handler, arg, flags, port_handlep);
+	if (error != 0)
+		return (error);
+
+	/*
+	 * The Event Channel API didn't open this port, so it is not
+	 * responsible for closing it automatically on unbind.
+	 */
+	isrc->xi_close = 0;
+	return (0);
+}
+
+int
+xen_intr_alloc_and_bind_local_port(device_t dev, u_int remote_domain,
+    driver_filter_t filter, driver_intr_t handler, void *arg,
+    enum intr_type flags, xen_intr_handle_t *port_handlep)
+{
+	struct xenisrc *isrc;
+	struct evtchn_alloc_unbound alloc_unbound;
+	int error;
+
+	alloc_unbound.dom        = DOMID_SELF;
+	alloc_unbound.remote_dom = remote_domain;
+	error = HYPERVISOR_event_channel_op(EVTCHNOP_alloc_unbound,
+		    &alloc_unbound);
+	if (error != 0) {
+		/*
+		 * XXX Trap Hypercall error code Linuxisms in
+		 *     the HYPERCALL layer.
+		 */
+		return (-error);
+	}
+
+	error = xen_intr_bind_isrc(&isrc, alloc_unbound.port, EVTCHN_TYPE_PORT,
+				 dev, filter, handler, arg, flags,
+				 port_handlep);
+	if (error != 0) {
+		evtchn_close_t close = { .port = alloc_unbound.port };
+		if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close))
+			panic("EVTCHNOP_close failed");
+		return (error);
+	}
+
+	isrc->xi_close = 1;
+	return (0);
+}
+
+int 
+xen_intr_bind_remote_port(device_t dev, u_int remote_domain,
+    u_int remote_port, driver_filter_t filter, driver_intr_t handler,
+    void *arg, enum intr_type flags, xen_intr_handle_t *port_handlep)
+{
+	struct xenisrc *isrc;
+	struct evtchn_bind_interdomain bind_interdomain;
+	int error;
+
+	bind_interdomain.remote_dom  = remote_domain;
+	bind_interdomain.remote_port = remote_port;
+	error = HYPERVISOR_event_channel_op(EVTCHNOP_bind_interdomain,
+					    &bind_interdomain);
+	if (error != 0) {
+		/*
+		 * XXX Trap Hypercall error code Linuxisms in
+		 *     the HYPERCALL layer.
+		 */
+		return (-error);
+	}
+
+	error = xen_intr_bind_isrc(&isrc, bind_interdomain.local_port,
+				 EVTCHN_TYPE_PORT, dev, filter, handler,
+				 arg, flags, port_handlep);
+	if (error) {
+		evtchn_close_t close = { .port = bind_interdomain.local_port };
+		if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close))
+			panic("EVTCHNOP_close failed");
+		return (error);
+	}
+
+	/*
+	 * The Event Channel API opened this port, so it is
+	 * responsible for closing it automatically on unbind.
+	 */
+	isrc->xi_close = 1;
+	return (0);
+}
+
+int 
+xen_intr_bind_virq(device_t dev, u_int virq, u_int cpu,
+    driver_filter_t filter, driver_intr_t handler, void *arg,
+    enum intr_type flags, xen_intr_handle_t *port_handlep)
+{
+	int acpi_id = pcpu_find(cpu)->pc_acpi_id;
+	struct xenisrc *isrc;
+	struct evtchn_bind_virq bind_virq = { .virq = virq, .vcpu = acpi_id };
+	int error;
+
+	/* Ensure the target CPU is ready to handle evtchn interrupts. */
+	xen_intr_intrcnt_add(cpu);
+
+	isrc = NULL;
+	error = HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq, &bind_virq);
+	if (error != 0) {
+		/*
+		 * XXX Trap Hypercall error code Linuxisms in
+		 *     the HYPERCALL layer.
+		 */
+		return (-error);
+	}
+
+	error = xen_intr_bind_isrc(&isrc, bind_virq.port, EVTCHN_TYPE_VIRQ, dev,
+				 filter, handler, arg, flags, port_handlep);
+	if (error == 0)
+		error = intr_event_bind(isrc->xi_intsrc.is_event, cpu);
+
+	if (error != 0) {
+		evtchn_close_t close = { .port = bind_virq.port };
+
+		xen_intr_unbind(*port_handlep);
+		if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close))
+			panic("EVTCHNOP_close failed");
+		return (error);
+	}
+
+	if (isrc->xi_cpu != cpu) {
+		/*
+		 * Too early in the boot process for the generic interrupt
+		 * code to perform the binding.  Update our event channel
+		 * masks manually so events can't fire on the wrong cpu
+		 * during AP startup.
+		 */
+		xen_intr_assign_cpu(&isrc->xi_intsrc, cpu_apic_ids[cpu]);
+	}
+
+	/*
+	 * The Event Channel API opened this port, so it is
+	 * responsible for closing it automatically on unbind.
+	 */
+	isrc->xi_close = 1;
+	return (0);
+}
+
+int
+xen_intr_bind_ipi(device_t dev, u_int ipi, u_int cpu,
+    driver_filter_t filter, enum intr_type flags,
+    xen_intr_handle_t *port_handlep)
+{
+	int acpi_id = pcpu_find(cpu)->pc_acpi_id;
+	struct xenisrc *isrc;
+	struct evtchn_bind_ipi bind_ipi = { .vcpu = acpi_id };
+	int error;
+
+	/* Ensure the target CPU is ready to handle evtchn interrupts. */
+	xen_intr_intrcnt_add(cpu);
+
+	isrc = NULL;
+	error = HYPERVISOR_event_channel_op(EVTCHNOP_bind_ipi, &bind_ipi);
+	if (error != 0) {
+		/*
+		 * XXX Trap Hypercall error code Linuxisms in
+		 *     the HYPERCALL layer.
+		 */
+		return (-error);
+	}
+
+	error = xen_intr_bind_isrc(&isrc, bind_ipi.port, EVTCHN_TYPE_IPI,
+	                           dev, filter, NULL, NULL, flags,
+	                           port_handlep);
+	if (error == 0)
+		error = intr_event_bind(isrc->xi_intsrc.is_event, cpu);
+
+	if (error != 0) {
+		evtchn_close_t close = { .port = bind_ipi.port };
+
+		xen_intr_unbind(*port_handlep);
+		if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close))
+			panic("EVTCHNOP_close failed");
+		return (error);
+	}
+
+	if (isrc->xi_cpu != cpu) {
+		/*
+		 * Too early in the boot process for the generic interrupt
+		 * code to perform the binding.  Update our event channel
+		 * masks manually so events can't fire on the wrong cpu
+		 * during AP startup.
+		 */
+		xen_intr_assign_cpu(&isrc->xi_intsrc, cpu_apic_ids[cpu]);
+	}
+
+	/*
+	 * The Event Channel API opened this port, so it is
+	 * responsible for closing it automatically on unbind.
+	 */
+	isrc->xi_close = 1;
+	return (0);
+}
+
+int
+xen_intr_describe(xen_intr_handle_t port_handle, const char *fmt, ...)
+{
+	char descr[MAXCOMLEN + 1];
+	struct xenisrc *isrc;
+	va_list ap;
+
+	isrc = xen_intr_isrc(port_handle);
+	if (isrc == NULL)
+		return (EINVAL);
+
+	va_start(ap, fmt);
+	vsnprintf(descr, sizeof(descr), fmt, ap);
+	va_end(ap);
+	return (intr_describe(isrc->xi_vector, port_handle, descr));
+}
+
+void
+xen_intr_unbind(xen_intr_handle_t *port_handlep)
+{
+	struct intr_handler *handler;
+	struct xenisrc *isrc;
+
+	handler = *port_handlep;
+	*port_handlep = NULL;
+	isrc = xen_intr_isrc(handler);
+	if (isrc == NULL)
+		return;
+
+	intr_remove_handler(handler);
+	xen_intr_release_isrc(isrc);
+}
+
+void
+xen_intr_signal(xen_intr_handle_t handle)
+{
+	struct xenisrc *isrc;
+
+	isrc = xen_intr_isrc(handle);
+	if (isrc != NULL) {
+		KASSERT(isrc->xi_type == EVTCHN_TYPE_PORT ||
+			isrc->xi_type == EVTCHN_TYPE_IPI,
+			("evtchn_signal on something other than a local port"));
+		struct evtchn_send send = { .port = isrc->xi_port };
+		(void)HYPERVISOR_event_channel_op(EVTCHNOP_send, &send);
+	}
+}
+
+evtchn_port_t
+xen_intr_port(xen_intr_handle_t handle)
+{
+	struct xenisrc *isrc;
+
+	isrc = xen_intr_isrc(handle);
+	if (isrc == NULL)
+		return (0);
+	
+	return (isrc->xi_port);
+}
diff --git a/sys/xen/evtchn.h b/sys/xen/evtchn.h
index 721742f6de40..00fa67e0a4d1 100644
--- a/sys/xen/evtchn.h
+++ b/sys/xen/evtchn.h
@@ -1,94 +1,87 @@
 /******************************************************************************
  * evtchn.h
  * 
- * Communication via Xen event channels.
- * Also definitions for the device that demuxes notifications to userspace.
+ * Interface to /dev/xen/evtchn.
  * 
- * Copyright (c) 2004, K A Fraser
+ * Copyright (c) 2003-2005, K A Fraser
+ * 
+ * This file may be distributed separately from the Linux kernel, or
+ * incorporated into other software packages, subject to the following license:
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
  *
  * $FreeBSD$
  */
 
-#ifndef __ASM_EVTCHN_H__
-#define __ASM_EVTCHN_H__
-#include <machine/pcpu.h>
-#include <xen/hypervisor.h>
-#include <machine/xen/synch_bitops.h>
-#include <machine/frame.h>
+#ifndef __XEN_EVTCHN_H__
+#define __XEN_EVTCHN_H__
 
 /*
- * LOW-LEVEL DEFINITIONS
+ * Bind a fresh port to VIRQ @virq.
  */
+#define IOCTL_EVTCHN_BIND_VIRQ				\
+	_IOWR('E', 4, struct ioctl_evtchn_bind_virq)
+struct ioctl_evtchn_bind_virq {
+	unsigned int virq;
+	unsigned int port;
+};
 
 /*
- * Unlike notify_remote_via_evtchn(), this is safe to use across
- * save/restore. Notifications on a broken connection are silently dropped.
+ * Bind a fresh port to remote <@remote_domain, @remote_port>.
  */
-void notify_remote_via_irq(int irq);
-
-
-/* Entry point for notifications into Linux subsystems. */
-void evtchn_do_upcall(struct trapframe *frame);
-
-/* Entry point for notifications into the userland character device. */
-void evtchn_device_upcall(int port);
-
-void mask_evtchn(int port);
-
-void unmask_evtchn(int port);
-
-#ifdef SMP
-void rebind_evtchn_to_cpu(int port, unsigned int cpu);
-#else
-#define rebind_evtchn_to_cpu(port, cpu)	((void)0)
-#endif
-
-static inline
-int test_and_set_evtchn_mask(int port)
-{
-	shared_info_t *s = HYPERVISOR_shared_info;
-	return synch_test_and_set_bit(port, s->evtchn_mask);
-}
-
-static inline void 
-clear_evtchn(int port)
-{
-	shared_info_t *s = HYPERVISOR_shared_info;
-	synch_clear_bit(port, &s->evtchn_pending[0]);
-}
-
-static inline void 
-notify_remote_via_evtchn(int port)
-{
-        struct evtchn_send send = { .port = port };
-        (void)HYPERVISOR_event_channel_op(EVTCHNOP_send, &send);
-}
+#define IOCTL_EVTCHN_BIND_INTERDOMAIN			\
+	_IOWR('E', 5, struct ioctl_evtchn_bind_interdomain)
+struct ioctl_evtchn_bind_interdomain {
+	unsigned int remote_domain, remote_port;
+	unsigned int port;
+};
 
 /*
- * Use these to access the event channel underlying the IRQ handle returned
- * by bind_*_to_irqhandler().
+ * Allocate a fresh port for binding to @remote_domain.
  */
-int irq_to_evtchn_port(int irq);
-
-void ipi_pcpu(unsigned int cpu, int vector);
+#define IOCTL_EVTCHN_BIND_UNBOUND_PORT			\
+	_IOWR('E', 6, struct ioctl_evtchn_bind_unbound_port)
+struct ioctl_evtchn_bind_unbound_port {
+	unsigned int remote_domain;
+	unsigned int port;
+};
 
 /*
- * CHARACTER-DEVICE DEFINITIONS
+ * Unbind previously allocated @port.
  */
+#define IOCTL_EVTCHN_UNBIND				\
+	_IOW('E', 7, struct ioctl_evtchn_unbind)
+struct ioctl_evtchn_unbind {
+	unsigned int port;
+};
 
-#define PORT_NORMAL    0x0000
-#define PORT_EXCEPTION 0x8000
-#define PORTIDX_MASK   0x7fff
-
-/* /dev/xen/evtchn resides at device number major=10, minor=200 */
-#define EVTCHN_MINOR 200
+/*
+ * Send event to previously allocated @port.
+ */
+#define IOCTL_EVTCHN_NOTIFY				\
+	_IOW('E', 8, struct ioctl_evtchn_notify)
+struct ioctl_evtchn_notify {
+	unsigned int port;
+};
 
-/* /dev/xen/evtchn ioctls: */
-/* EVTCHN_RESET: Clear and reinit the event buffer. Clear error condition. */
-#define EVTCHN_RESET  _IO('E', 1)
-/* EVTCHN_BIND: Bind to the specified event-channel port. */
-#define EVTCHN_BIND   _IO('E', 2)
-/* EVTCHN_UNBIND: Unbind from the specified event-channel port. */
-#define EVTCHN_UNBIND _IO('E', 3)
+/* Clear and reinitialise the event buffer. Clear error condition. */
+#define IOCTL_EVTCHN_RESET				\
+	_IO('E', 9)
 
-#endif /* __ASM_EVTCHN_H__ */
+#endif /* __XEN_EVTCHN_H__ */
diff --git a/sys/xen/evtchn/evtchn.c b/sys/xen/evtchn/evtchn.c
deleted file mode 100644
index baff9aaa2c80..000000000000
--- a/sys/xen/evtchn/evtchn.c
+++ /dev/null
@@ -1,1141 +0,0 @@
-/******************************************************************************
- * evtchn.c
- * 
- * Communication via Xen event channels.
- * 
- * Copyright (c) 2002-2005, K A Fraser
- * Copyright (c) 2005-2006 Kip Macy
- */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-
-#include <sys/param.h>
-#include <sys/systm.h>
-#include <sys/bus.h>
-#include <sys/limits.h>
-#include <sys/malloc.h>
-#include <sys/kernel.h>
-#include <sys/lock.h>
-#include <sys/mutex.h>
-#include <sys/interrupt.h>
-#include <sys/pcpu.h>
-#include <sys/smp.h>
-
-#include <machine/cpufunc.h>
-#include <machine/intr_machdep.h>
-
-#include <machine/xen/xen-os.h>
-#include <machine/xen/xenvar.h>
-#include <xen/xen_intr.h>
-#include <machine/xen/synch_bitops.h>
-#include <xen/evtchn.h>
-#include <xen/hypervisor.h>
-#include <sys/smp.h>
-
-#include <xen/xen_intr.h>
-#include <xen/evtchn.h>
-
-static inline unsigned long __ffs(unsigned long word)
-{
-        __asm__("bsfl %1,%0"
-                :"=r" (word)
-                :"rm" (word));
-        return word;
-}
-
-/*
- * irq_mapping_update_lock: in order to allow an interrupt to occur in a critical
- *	section, to set pcpu->ipending (etc...) properly, we
- * 	must be able to get the icu lock, so it can't be
- *	under witness.
- */
-static struct mtx irq_mapping_update_lock;
-MTX_SYSINIT(irq_mapping_update_lock, &irq_mapping_update_lock, "xp", MTX_SPIN);
-
-static struct xenpic *xp;
-struct xenpic_intsrc {
-	struct intsrc     xp_intsrc;
-	void		  *xp_cookie;
-	uint8_t           xp_vector;
-	boolean_t	  xp_masked;
-};
-
-struct xenpic { 
-	struct pic           *xp_dynirq_pic; 
-	struct pic           *xp_pirq_pic;   
-	uint16_t             xp_numintr; 
-	struct xenpic_intsrc xp_pins[0]; 
-}; 
-
-#define TODO            printf("%s: not implemented!\n", __func__) 
-
-/* IRQ <-> event-channel mappings. */
-static int evtchn_to_irq[NR_EVENT_CHANNELS];
-
-/* Packed IRQ information: binding type, sub-type index, and event channel. */
-static uint32_t irq_info[NR_IRQS];
-/* Binding types. */
-enum {
-	IRQT_UNBOUND,
-	IRQT_PIRQ,
-	IRQT_VIRQ,
-	IRQT_IPI,
-	IRQT_LOCAL_PORT,
-	IRQT_CALLER_PORT,
-	_IRQT_COUNT
-	
-};
-
-
-#define _IRQT_BITS 4
-#define _EVTCHN_BITS 12
-#define _INDEX_BITS (32 - _IRQT_BITS - _EVTCHN_BITS)
-
-/* Constructor for packed IRQ information. */
-static inline uint32_t
-mk_irq_info(uint32_t type, uint32_t index, uint32_t evtchn)
-{
-
-	return ((type << (32 - _IRQT_BITS)) | (index << _EVTCHN_BITS) | evtchn);
-}
-
-/* Constructor for packed IRQ information. */
-
-/* Convenient shorthand for packed representation of an unbound IRQ. */
-#define IRQ_UNBOUND	mk_irq_info(IRQT_UNBOUND, 0, 0)
-
-/*
- * Accessors for packed IRQ information.
- */
-
-static inline unsigned int evtchn_from_irq(int irq)
-{
-	return irq_info[irq] & ((1U << _EVTCHN_BITS) - 1);
-}
-
-static inline unsigned int index_from_irq(int irq)
-{
-	return (irq_info[irq] >> _EVTCHN_BITS) & ((1U << _INDEX_BITS) - 1);
-}
-
-static inline unsigned int type_from_irq(int irq)
-{
-	return irq_info[irq] >> (32 - _IRQT_BITS);
-}
-
-
-/* IRQ <-> VIRQ mapping. */ 
- 
-/* IRQ <-> IPI mapping. */ 
-#ifndef NR_IPIS
-#ifdef SMP
-#error "NR_IPIS not defined"
-#endif
-#define NR_IPIS 1 
-#endif 
-
-/* Bitmap indicating which PIRQs require Xen to be notified on unmask. */
-static unsigned long pirq_needs_unmask_notify[NR_PIRQS/sizeof(unsigned long)];
-
-/* Reference counts for bindings to IRQs. */
-static int irq_bindcount[NR_IRQS];
-
-#define VALID_EVTCHN(_chn) ((_chn) != 0)
-
-#ifdef SMP
-
-static uint8_t cpu_evtchn[NR_EVENT_CHANNELS];
-static unsigned long cpu_evtchn_mask[XEN_LEGACY_MAX_VCPUS][NR_EVENT_CHANNELS/LONG_BIT];
-
-#define active_evtchns(cpu,sh,idx)		\
-	((sh)->evtchn_pending[idx] &		\
-	 cpu_evtchn_mask[cpu][idx] &		\
-	 ~(sh)->evtchn_mask[idx])
-
-static void bind_evtchn_to_cpu(unsigned int chn, unsigned int cpu)
-{
-	clear_bit(chn, (unsigned long *)cpu_evtchn_mask[cpu_evtchn[chn]]);
-	set_bit(chn, (unsigned long *)cpu_evtchn_mask[cpu]);
-	cpu_evtchn[chn] = cpu;
-}
-
-static void init_evtchn_cpu_bindings(void)
-{
-	/* By default all event channels notify CPU#0. */
-	memset(cpu_evtchn, 0, sizeof(cpu_evtchn));
-	memset(cpu_evtchn_mask[0], ~0, sizeof(cpu_evtchn_mask[0]));
-}
-
-#define cpu_from_evtchn(evtchn)		(cpu_evtchn[evtchn])
-
-#else
-
-#define active_evtchns(cpu,sh,idx)		\
-	((sh)->evtchn_pending[idx] &		\
-	 ~(sh)->evtchn_mask[idx])
-#define bind_evtchn_to_cpu(chn,cpu)	((void)0)
-#define init_evtchn_cpu_bindings()	((void)0)
-#define cpu_from_evtchn(evtchn)		(0)
-
-#endif
-
-
-/*
- * Force a proper event-channel callback from Xen after clearing the
- * callback mask. We do this in a very simple manner, by making a call
- * down into Xen. The pending flag will be checked by Xen on return.
- */
-void force_evtchn_callback(void)
-{
-	(void)HYPERVISOR_xen_version(0, NULL);
-}
-
-void 
-evtchn_do_upcall(struct trapframe *frame) 
-{
-	unsigned long  l1, l2;
-	unsigned int   l1i, l2i, port;
-	int            irq, cpu;
-	shared_info_t *s;
-	vcpu_info_t   *vcpu_info;
-	
-	cpu = PCPU_GET(cpuid);
-	s = HYPERVISOR_shared_info;
-	vcpu_info = &s->vcpu_info[cpu];
-
-	vcpu_info->evtchn_upcall_pending = 0;
-
-	/* NB. No need for a barrier here -- XCHG is a barrier on x86. */
-	l1 = xen_xchg(&vcpu_info->evtchn_pending_sel, 0);
-
-	while (l1 != 0) {
-		l1i = __ffs(l1);
-		l1 &= ~(1 << l1i);
-		
-		while ((l2 = active_evtchns(cpu, s, l1i)) != 0) {
-			l2i = __ffs(l2);
-
-			port = (l1i * LONG_BIT) + l2i;
-			if ((irq = evtchn_to_irq[port]) != -1) {
-				struct intsrc *isrc = intr_lookup_source(irq);
-				/* 
-				 * ack 
-				 */
-				mask_evtchn(port);
-				clear_evtchn(port); 
-
-				intr_execute_handlers(isrc, frame);
-			} else {
-				evtchn_device_upcall(port);
-			}
-		}
-	}
-}
-
-/*
- * Send an IPI from the current CPU to the destination CPU.
- */
-void
-ipi_pcpu(unsigned int cpu, int vector) 
-{ 
-        int irq;
-
-	irq = pcpu_find(cpu)->pc_ipi_to_irq[vector];
-	
-        notify_remote_via_irq(irq); 
-} 
-
-static int 
-find_unbound_irq(void)
-{
-	int dynirq, irq;
-	
-	for (dynirq = 0; dynirq < NR_IRQS; dynirq++) {
-		irq = dynirq_to_irq(dynirq);
-		if (irq_bindcount[irq] == 0)
-			break;
-	}
-	
-	if (irq == NR_IRQS)
-		panic("No available IRQ to bind to: increase NR_IRQS!\n");
-
-	return (irq);
-}
-
-static int
-bind_caller_port_to_irq(unsigned int caller_port, int * port)
-{
-        int irq;
-
-        mtx_lock_spin(&irq_mapping_update_lock);
-
-        if ((irq = evtchn_to_irq[caller_port]) == -1) {
-                if ((irq = find_unbound_irq()) < 0)
-                        goto out;
-
-                evtchn_to_irq[caller_port] = irq;
-                irq_info[irq] = mk_irq_info(IRQT_CALLER_PORT, 0, caller_port);
-        }
-
-        irq_bindcount[irq]++;
-	*port = caller_port;
-
- out:
-        mtx_unlock_spin(&irq_mapping_update_lock);
-        return irq;
-}
-
-static int
-bind_local_port_to_irq(unsigned int local_port, int * port)
-{
-        int irq;
-
-        mtx_lock_spin(&irq_mapping_update_lock);
-
-        KASSERT(evtchn_to_irq[local_port] == -1,
-	    ("evtchn_to_irq inconsistent"));
-	
-        if ((irq = find_unbound_irq()) < 0) {
-                struct evtchn_close close = { .port = local_port };
-                HYPERVISOR_event_channel_op(EVTCHNOP_close, &close);
-		
-                goto out;
-        }
-
-        evtchn_to_irq[local_port] = irq;
-        irq_info[irq] = mk_irq_info(IRQT_LOCAL_PORT, 0, local_port);
-        irq_bindcount[irq]++;
-	*port = local_port;
-
- out:
-        mtx_unlock_spin(&irq_mapping_update_lock);
-        return irq;
-}
-
-static int
-bind_listening_port_to_irq(unsigned int remote_domain, int * port)
-{
-        struct evtchn_alloc_unbound alloc_unbound;
-        int err;
-
-        alloc_unbound.dom        = DOMID_SELF;
-        alloc_unbound.remote_dom = remote_domain;
-
-        err = HYPERVISOR_event_channel_op(EVTCHNOP_alloc_unbound,
-                                          &alloc_unbound);
-
-        return err ? : bind_local_port_to_irq(alloc_unbound.port, port);
-}
-
-static int
-bind_interdomain_evtchn_to_irq(unsigned int remote_domain,
-    unsigned int remote_port, int * port)
-{
-        struct evtchn_bind_interdomain bind_interdomain;
-        int err;
-
-        bind_interdomain.remote_dom  = remote_domain;
-        bind_interdomain.remote_port = remote_port;
-
-        err = HYPERVISOR_event_channel_op(EVTCHNOP_bind_interdomain,
-                                          &bind_interdomain);
-
-        return err ? : bind_local_port_to_irq(bind_interdomain.local_port, port);
-}
-
-static int 
-bind_virq_to_irq(unsigned int virq, unsigned int cpu, int * port)
-{
-	struct evtchn_bind_virq bind_virq;
-	int evtchn = 0, irq;
-
-	mtx_lock_spin(&irq_mapping_update_lock);
-
-	if ((irq = pcpu_find(cpu)->pc_virq_to_irq[virq]) == -1) {
-		if ((irq = find_unbound_irq()) < 0)
-			goto out;
-
-		bind_virq.virq = virq;
-		bind_virq.vcpu = cpu;
-		HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq, &bind_virq);
-
-		evtchn = bind_virq.port;
-
-		evtchn_to_irq[evtchn] = irq;
-		irq_info[irq] = mk_irq_info(IRQT_VIRQ, virq, evtchn);
-
-		pcpu_find(cpu)->pc_virq_to_irq[virq] = irq;
-
-		bind_evtchn_to_cpu(evtchn, cpu);
-	}
-
-	irq_bindcount[irq]++;
-	*port = evtchn;
-out:
-	mtx_unlock_spin(&irq_mapping_update_lock);
-
-	return irq;
-}
-
-
-static int 
-bind_ipi_to_irq(unsigned int ipi, unsigned int cpu, int * port)
-{
-	struct evtchn_bind_ipi bind_ipi;
-	int irq;
-	int evtchn = 0;
-
-	mtx_lock_spin(&irq_mapping_update_lock);
-	
-	if ((irq = pcpu_find(cpu)->pc_ipi_to_irq[ipi]) == -1) {
-		if ((irq = find_unbound_irq()) < 0)
-			goto out;
-
-		bind_ipi.vcpu = cpu;
-		HYPERVISOR_event_channel_op(EVTCHNOP_bind_ipi, &bind_ipi);
-		evtchn = bind_ipi.port;
-
-		evtchn_to_irq[evtchn] = irq;
-		irq_info[irq] = mk_irq_info(IRQT_IPI, ipi, evtchn);
-
-		pcpu_find(cpu)->pc_ipi_to_irq[ipi] = irq;
-
-		bind_evtchn_to_cpu(evtchn, cpu);
-	}
-	irq_bindcount[irq]++;
-	*port = evtchn;
-out:
-	
-	mtx_unlock_spin(&irq_mapping_update_lock);
-
-	return irq;
-}
-
-
-static void 
-unbind_from_irq(int irq)
-{
-	struct evtchn_close close;
-	int evtchn = evtchn_from_irq(irq);
-	int cpu;
-
-	mtx_lock_spin(&irq_mapping_update_lock);
-
-	if ((--irq_bindcount[irq] == 0) && VALID_EVTCHN(evtchn)) {
-		close.port = evtchn;
-		HYPERVISOR_event_channel_op(EVTCHNOP_close, &close);
-
-		switch (type_from_irq(irq)) {
-		case IRQT_VIRQ:
-			cpu = cpu_from_evtchn(evtchn);
-			pcpu_find(cpu)->pc_virq_to_irq[index_from_irq(irq)] = -1;
-			break;
-		case IRQT_IPI:
-			cpu = cpu_from_evtchn(evtchn);
-			pcpu_find(cpu)->pc_ipi_to_irq[index_from_irq(irq)] = -1;
-			break;
-		default:
-			break;
-		}
-
-		/* Closed ports are implicitly re-bound to VCPU0. */
-		bind_evtchn_to_cpu(evtchn, 0);
-
-		evtchn_to_irq[evtchn] = -1;
-		irq_info[irq] = IRQ_UNBOUND;
-	}
-
-	mtx_unlock_spin(&irq_mapping_update_lock);
-}
-
-int 
-bind_caller_port_to_irqhandler(unsigned int caller_port,
-    const char *devname, driver_intr_t handler, void *arg,
-    unsigned long irqflags, unsigned int *irqp)
-{
-	unsigned int irq;
-	int port = -1;
-	int error;
-
-	irq = bind_caller_port_to_irq(caller_port, &port);
-	intr_register_source(&xp->xp_pins[irq].xp_intsrc);
-	error = intr_add_handler(devname, irq, NULL, handler, arg, irqflags,
-	    &xp->xp_pins[irq].xp_cookie);
-
-	if (error) {
-		unbind_from_irq(irq);
-		return (error);
-	}
-	if (port != -1)
-		unmask_evtchn(port);
-
-	if (irqp)
-		*irqp = irq;
-
-	return (0);
-}
-
-int 
-bind_listening_port_to_irqhandler(unsigned int remote_domain,
-    const char *devname, driver_intr_t handler, void *arg,
-    unsigned long irqflags, unsigned int *irqp)
-{
-	unsigned int irq;
-	int port = -1;
-	int error;
-
-	irq = bind_listening_port_to_irq(remote_domain, &port);
-	intr_register_source(&xp->xp_pins[irq].xp_intsrc);
-	error = intr_add_handler(devname, irq, NULL, handler, arg, irqflags,
-	    &xp->xp_pins[irq].xp_cookie);
-	if (error) {
-		unbind_from_irq(irq);
-		return (error);
-	}
-	if (port != -1)
-		unmask_evtchn(port);
-	if (irqp)
-		*irqp = irq;
-	
-	return (0);
-}
-
-int 
-bind_interdomain_evtchn_to_irqhandler(unsigned int remote_domain,
-    unsigned int remote_port, const char *devname,
-    driver_intr_t handler, void *arg, unsigned long irqflags,
-    unsigned int *irqp)
-{
-	unsigned int irq;
-	int port = -1;
-	int error;
-
-	irq = bind_interdomain_evtchn_to_irq(remote_domain, remote_port, &port);
-	intr_register_source(&xp->xp_pins[irq].xp_intsrc);
-	error = intr_add_handler(devname, irq, NULL, handler, arg,
-	    irqflags, &xp->xp_pins[irq].xp_cookie);
-	if (error) {
-		unbind_from_irq(irq);
-		return (error);
-	}
-	if (port != -1)
-		unmask_evtchn(port);
-
-	if (irqp)
-		*irqp = irq;
-	return (0);
-}
-
-int 
-bind_virq_to_irqhandler(unsigned int virq, unsigned int cpu,
-    const char *devname, driver_filter_t filter, driver_intr_t handler,
-    void *arg, unsigned long irqflags, unsigned int *irqp)
-{
-	unsigned int irq;
-	int port = -1;
-	int error;
-
-	irq = bind_virq_to_irq(virq, cpu, &port);
-	intr_register_source(&xp->xp_pins[irq].xp_intsrc);
-	error = intr_add_handler(devname, irq, filter, handler,
-	    arg, irqflags, &xp->xp_pins[irq].xp_cookie);
-	if (error) {
-		unbind_from_irq(irq);
-		return (error);
-	}
-	if (port != -1)
-		unmask_evtchn(port);
-
-	if (irqp)
-		*irqp = irq;
-	return (0);
-}
-
-int 
-bind_ipi_to_irqhandler(unsigned int ipi, unsigned int cpu,
-    const char *devname, driver_filter_t filter,
-    unsigned long irqflags, unsigned int *irqp)
-{
-	unsigned int irq;
-	int port = -1;
-	int error;
-	
-	irq = bind_ipi_to_irq(ipi, cpu, &port);
-	intr_register_source(&xp->xp_pins[irq].xp_intsrc);
-	error = intr_add_handler(devname, irq, filter, NULL,
-	    NULL, irqflags, &xp->xp_pins[irq].xp_cookie);
-	if (error) {
-		unbind_from_irq(irq);
-		return (error);
-	}
-	if (port != -1)
-		unmask_evtchn(port);
-
-	if (irqp)
-		*irqp = irq;
-	return (0);
-}
-
-void
-unbind_from_irqhandler(unsigned int irq)
-{
-	intr_remove_handler(xp->xp_pins[irq].xp_cookie);
-	unbind_from_irq(irq);
-}
-
-#if 0
-/* Rebind an evtchn so that it gets delivered to a specific cpu */
-static void
-rebind_irq_to_cpu(unsigned irq, unsigned tcpu)
-{
-	evtchn_op_t op = { .cmd = EVTCHNOP_bind_vcpu };
-	int evtchn;
-
-	mtx_lock_spin(&irq_mapping_update_lock);
-
-	evtchn = evtchn_from_irq(irq);
-	if (!VALID_EVTCHN(evtchn)) {
-		mtx_unlock_spin(&irq_mapping_update_lock);
-		return;
-	}
-
-	/* Send future instances of this interrupt to other vcpu. */
-	bind_vcpu.port = evtchn;
-	bind_vcpu.vcpu = tcpu;
-
-	/*
-	 * If this fails, it usually just indicates that we're dealing with a 
-	 * virq or IPI channel, which don't actually need to be rebound. Ignore
-	 * it, but don't do the xenlinux-level rebind in that case.
-	 */
-	if (HYPERVISOR_event_channel_op(&op) >= 0)
-		bind_evtchn_to_cpu(evtchn, tcpu);
-
-	mtx_unlock_spin(&irq_mapping_update_lock);
-
-}
-
-static void set_affinity_irq(unsigned irq, cpumask_t dest)
-{
-	unsigned tcpu = ffs(dest) - 1;
-	rebind_irq_to_cpu(irq, tcpu);
-}
-#endif
-
-/*
- * Interface to generic handling in intr_machdep.c
- */
-
-
-/*------------ interrupt handling --------------------------------------*/
-#define TODO            printf("%s: not implemented!\n", __func__) 
-
-
-static void     xenpic_dynirq_enable_source(struct intsrc *isrc); 
-static void     xenpic_dynirq_disable_source(struct intsrc *isrc, int); 
-static void     xenpic_dynirq_eoi_source(struct intsrc *isrc); 
-static void     xenpic_dynirq_enable_intr(struct intsrc *isrc); 
-static void     xenpic_dynirq_disable_intr(struct intsrc *isrc); 
-
-static void     xenpic_pirq_enable_source(struct intsrc *isrc); 
-static void     xenpic_pirq_disable_source(struct intsrc *isrc, int); 
-static void     xenpic_pirq_eoi_source(struct intsrc *isrc); 
-static void     xenpic_pirq_enable_intr(struct intsrc *isrc); 
-
-
-static int      xenpic_vector(struct intsrc *isrc); 
-static int      xenpic_source_pending(struct intsrc *isrc); 
-static void     xenpic_suspend(struct pic* pic); 
-static void     xenpic_resume(struct pic* pic); 
-static int      xenpic_assign_cpu(struct intsrc *, u_int apic_id);
-
-
-struct pic xenpic_dynirq_template  =  { 
-	.pic_enable_source	=	xenpic_dynirq_enable_source, 
-	.pic_disable_source	=	xenpic_dynirq_disable_source,
-	.pic_eoi_source		=	xenpic_dynirq_eoi_source, 
-	.pic_enable_intr	=	xenpic_dynirq_enable_intr, 
-	.pic_disable_intr	=	xenpic_dynirq_disable_intr,
-	.pic_vector		=	xenpic_vector, 
-	.pic_source_pending	=	xenpic_source_pending,
-	.pic_suspend		=	xenpic_suspend, 
-	.pic_resume		=	xenpic_resume 
-};
-
-struct pic xenpic_pirq_template  =  { 
-	.pic_enable_source	=	xenpic_pirq_enable_source, 
-	.pic_disable_source	=	xenpic_pirq_disable_source,
-	.pic_eoi_source		=	xenpic_pirq_eoi_source, 
-	.pic_enable_intr	=	xenpic_pirq_enable_intr, 
-	.pic_vector		=	xenpic_vector, 
-	.pic_source_pending	=	xenpic_source_pending,
-	.pic_suspend		=	xenpic_suspend, 
-	.pic_resume		=	xenpic_resume,
-	.pic_assign_cpu		=	xenpic_assign_cpu
-};
-
-
-
-void 
-xenpic_dynirq_enable_source(struct intsrc *isrc)
-{
-	unsigned int irq;
-	struct xenpic_intsrc *xp;
-
-	xp = (struct xenpic_intsrc *)isrc;
-	
-	mtx_lock_spin(&irq_mapping_update_lock);
-	if (xp->xp_masked) {
-		irq = xenpic_vector(isrc);
-		unmask_evtchn(evtchn_from_irq(irq));
-		xp->xp_masked = FALSE;
-	}
-	mtx_unlock_spin(&irq_mapping_update_lock);
-}
-
-static void 
-xenpic_dynirq_disable_source(struct intsrc *isrc, int foo)
-{
-	unsigned int irq;
-	struct xenpic_intsrc *xp;
-	
-	xp = (struct xenpic_intsrc *)isrc;
-	
-	mtx_lock_spin(&irq_mapping_update_lock);
-	if (!xp->xp_masked) {
-		irq = xenpic_vector(isrc);
-		mask_evtchn(evtchn_from_irq(irq));
-		xp->xp_masked = TRUE;
-	}	
-	mtx_unlock_spin(&irq_mapping_update_lock);
-}
-
-static void 
-xenpic_dynirq_enable_intr(struct intsrc *isrc)
-{
-	unsigned int irq;
-	struct xenpic_intsrc *xp;
-	
-	xp = (struct xenpic_intsrc *)isrc;	
-	mtx_lock_spin(&irq_mapping_update_lock);
-	xp->xp_masked = 0;
-	irq = xenpic_vector(isrc);
-	unmask_evtchn(evtchn_from_irq(irq));
-	mtx_unlock_spin(&irq_mapping_update_lock);
-}
-
-static void 
-xenpic_dynirq_disable_intr(struct intsrc *isrc)
-{
-	unsigned int irq;
-	struct xenpic_intsrc *xp;
-	
-	xp = (struct xenpic_intsrc *)isrc;	
-	mtx_lock_spin(&irq_mapping_update_lock);
-	irq = xenpic_vector(isrc);
-	mask_evtchn(evtchn_from_irq(irq));
-	xp->xp_masked = 1;
-	mtx_unlock_spin(&irq_mapping_update_lock);
-}
-
-static void 
-xenpic_dynirq_eoi_source(struct intsrc *isrc)
-{
-	unsigned int irq;
-	struct xenpic_intsrc *xp;
-	
-	xp = (struct xenpic_intsrc *)isrc;	
-	mtx_lock_spin(&irq_mapping_update_lock);
-	xp->xp_masked = 0;
-	irq = xenpic_vector(isrc);
-	unmask_evtchn(evtchn_from_irq(irq));
-	mtx_unlock_spin(&irq_mapping_update_lock);
-}
-
-static int
-xenpic_vector(struct intsrc *isrc)
-{
-    struct xenpic_intsrc *pin;
-
-    pin = (struct xenpic_intsrc *)isrc;
-   //printf("xenpic_vector(): isrc=%p,vector=%u\n", pin, pin->xp_vector);
-
-    return (pin->xp_vector);
-}
-
-static int
-xenpic_source_pending(struct intsrc *isrc)
-{
-    struct xenpic_intsrc *pin = (struct xenpic_intsrc *)isrc;
-
-	/* XXXEN: TODO */
-	printf("xenpic_source_pending(): vector=%x,masked=%x\n",
-	    pin->xp_vector, pin->xp_masked);
-
-/*	notify_remote_via_evtchn(pin->xp_vector); // XXX RS: Is this correct? */
-	return 0;
-}
-
-static void 
-xenpic_suspend(struct pic* pic)
-{ 
-	TODO; 
-} 
- 
-static void 
-xenpic_resume(struct pic* pic)
-{ 
-	TODO; 
-}
-
-static int
-xenpic_assign_cpu(struct intsrc *isrc, u_int apic_id)
-{ 
-	TODO; 
-	return (EOPNOTSUPP);
-}
-
-void
-notify_remote_via_irq(int irq)
-{
-	int evtchn = evtchn_from_irq(irq);
-
-	if (VALID_EVTCHN(evtchn))
-		notify_remote_via_evtchn(evtchn);
-	else
-		panic("invalid evtchn %d", irq);
-}
-
-/* required for support of physical devices */
-static inline void 
-pirq_unmask_notify(int pirq)
-{
-	struct physdev_eoi eoi = { .irq = pirq };
-
-	if (unlikely(test_bit(pirq, &pirq_needs_unmask_notify[0]))) {
-		(void)HYPERVISOR_physdev_op(PHYSDEVOP_eoi, &eoi);
-	}
-}
-
-static inline void 
-pirq_query_unmask(int pirq)
-{
-	struct physdev_irq_status_query irq_status_query;
-
-	irq_status_query.irq = pirq;
-	(void)HYPERVISOR_physdev_op(PHYSDEVOP_IRQ_STATUS_QUERY, &irq_status_query);
-	clear_bit(pirq, &pirq_needs_unmask_notify[0]);
-	if ( irq_status_query.flags & PHYSDEVOP_IRQ_NEEDS_UNMASK_NOTIFY )
-		set_bit(pirq, &pirq_needs_unmask_notify[0]);
-}
-
-/*
- * On startup, if there is no action associated with the IRQ then we are
- * probing. In this case we should not share with others as it will confuse us.
- */
-#define probing_irq(_irq) (intr_lookup_source(irq) == NULL)
-
-static void 
-xenpic_pirq_enable_intr(struct intsrc *isrc)
-{
-	struct evtchn_bind_pirq bind_pirq;
-	int evtchn;
-	unsigned int irq;
-	
-	mtx_lock_spin(&irq_mapping_update_lock);
-	irq = xenpic_vector(isrc);
-	evtchn = evtchn_from_irq(irq);
-
-	if (VALID_EVTCHN(evtchn))
-		goto out;
-
-	bind_pirq.pirq  = irq;
-	/* NB. We are happy to share unless we are probing. */
-	bind_pirq.flags = probing_irq(irq) ? 0 : BIND_PIRQ__WILL_SHARE;
-	
-	if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_pirq, &bind_pirq) != 0) {
-#ifndef XEN_PRIVILEGED_GUEST
-		panic("unexpected pirq call");
-#endif
-		if (!probing_irq(irq)) /* Some failures are expected when probing. */
-			printf("Failed to obtain physical IRQ %d\n", irq);
-		mtx_unlock_spin(&irq_mapping_update_lock);
-		return;
-	}
-	evtchn = bind_pirq.port;
-
-	pirq_query_unmask(irq_to_pirq(irq));
-
-	bind_evtchn_to_cpu(evtchn, 0);
-	evtchn_to_irq[evtchn] = irq;
-	irq_info[irq] = mk_irq_info(IRQT_PIRQ, irq, evtchn);
-
- out:
-	unmask_evtchn(evtchn);
-	pirq_unmask_notify(irq_to_pirq(irq));
-	mtx_unlock_spin(&irq_mapping_update_lock);
-}
-
-static void 
-xenpic_pirq_enable_source(struct intsrc *isrc)
-{
-	int evtchn;
-	unsigned int irq;
-
-	mtx_lock_spin(&irq_mapping_update_lock);
-	irq = xenpic_vector(isrc);
-	evtchn = evtchn_from_irq(irq);
-
-	if (!VALID_EVTCHN(evtchn))
-		goto done;
-
-	unmask_evtchn(evtchn);
-	pirq_unmask_notify(irq_to_pirq(irq));
- done:
-	mtx_unlock_spin(&irq_mapping_update_lock);
-}
-
-static void 
-xenpic_pirq_disable_source(struct intsrc *isrc, int eoi)
-{
-	int evtchn;
-	unsigned int irq;
-
-	mtx_lock_spin(&irq_mapping_update_lock);
-	irq = xenpic_vector(isrc);
-	evtchn = evtchn_from_irq(irq);
-
-	if (!VALID_EVTCHN(evtchn))
-		goto done;
-
-	mask_evtchn(evtchn);
- done:
-	mtx_unlock_spin(&irq_mapping_update_lock);
-}
-
-
-static void 
-xenpic_pirq_eoi_source(struct intsrc *isrc)
-{
-	int evtchn;
-	unsigned int irq;
-
-	mtx_lock_spin(&irq_mapping_update_lock);
-	irq = xenpic_vector(isrc);
-	evtchn = evtchn_from_irq(irq);
-
-	if (!VALID_EVTCHN(evtchn))
-		goto done;
-
-	unmask_evtchn(evtchn);
-	pirq_unmask_notify(irq_to_pirq(irq));
- done:
-	mtx_unlock_spin(&irq_mapping_update_lock);
-}
-
-int
-irq_to_evtchn_port(int irq)
-{
-	return evtchn_from_irq(irq);
-}
-
-void 
-mask_evtchn(int port)
-{
-	shared_info_t *s = HYPERVISOR_shared_info;
-	synch_set_bit(port, &s->evtchn_mask[0]);
-}
-
-void 
-unmask_evtchn(int port)
-{
-	shared_info_t *s = HYPERVISOR_shared_info;
-	unsigned int cpu = PCPU_GET(cpuid);
-	vcpu_info_t *vcpu_info = &s->vcpu_info[cpu];
-
-	/* Slow path (hypercall) if this is a non-local port. */
-	if (unlikely(cpu != cpu_from_evtchn(port))) {
-		struct evtchn_unmask unmask = { .port = port };
-		(void)HYPERVISOR_event_channel_op(EVTCHNOP_unmask, &unmask);
-		return;
-	}
-
-	synch_clear_bit(port, &s->evtchn_mask);
-
-	/*
-	 * The following is basically the equivalent of 'hw_resend_irq'. Just
-	 * like a real IO-APIC we 'lose the interrupt edge' if the channel is
-	 * masked.
-	 */
-	if (synch_test_bit(port, &s->evtchn_pending) && 
-	    !synch_test_and_set_bit(port / LONG_BIT,
-				    &vcpu_info->evtchn_pending_sel)) {
-		vcpu_info->evtchn_upcall_pending = 1;
-		if (!vcpu_info->evtchn_upcall_mask)
-			force_evtchn_callback();
-	}
-}
-
-void irq_resume(void)
-{
-	evtchn_op_t op;
-	int         cpu, pirq, virq, ipi, irq, evtchn;
-
-	struct evtchn_bind_virq bind_virq;
-	struct evtchn_bind_ipi bind_ipi;	
-
-	init_evtchn_cpu_bindings();
-
-	/* New event-channel space is not 'live' yet. */
-	for (evtchn = 0; evtchn < NR_EVENT_CHANNELS; evtchn++)
-		mask_evtchn(evtchn);
-
-	/* Check that no PIRQs are still bound. */
-	for (pirq = 0; pirq < NR_PIRQS; pirq++) {
-		KASSERT(irq_info[pirq_to_irq(pirq)] == IRQ_UNBOUND,
-		    ("pirq_to_irq inconsistent"));
-	}
-
-	/* Secondary CPUs must have no VIRQ or IPI bindings. */
-	for (cpu = 1; cpu < XEN_LEGACY_MAX_VCPUS; cpu++) {
-		for (virq = 0; virq < NR_VIRQS; virq++) {
-			KASSERT(pcpu_find(cpu)->pc_virq_to_irq[virq] == -1,
-			    ("virq_to_irq inconsistent"));
-		}
-		for (ipi = 0; ipi < NR_IPIS; ipi++) {
-			KASSERT(pcpu_find(cpu)->pc_ipi_to_irq[ipi] == -1,
-			    ("ipi_to_irq inconsistent"));
-		}
-	}
-
-	/* No IRQ <-> event-channel mappings. */
-	for (irq = 0; irq < NR_IRQS; irq++)
-		irq_info[irq] &= ~0xFFFF; /* zap event-channel binding */
-	for (evtchn = 0; evtchn < NR_EVENT_CHANNELS; evtchn++)
-		evtchn_to_irq[evtchn] = -1;
-
-	/* Primary CPU: rebind VIRQs automatically. */
-	for (virq = 0; virq < NR_VIRQS; virq++) {
-		if ((irq = pcpu_find(0)->pc_virq_to_irq[virq]) == -1)
-			continue;
-
-		KASSERT(irq_info[irq] == mk_irq_info(IRQT_VIRQ, virq, 0),
-		    ("irq_info inconsistent"));
-
-		/* Get a new binding from Xen. */
-		bind_virq.virq = virq;
-		bind_virq.vcpu = 0;
-		HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq, &bind_virq);
-		evtchn = bind_virq.port;
-        
-		/* Record the new mapping. */
-		evtchn_to_irq[evtchn] = irq;
-		irq_info[irq] = mk_irq_info(IRQT_VIRQ, virq, evtchn);
-
-		/* Ready for use. */
-		unmask_evtchn(evtchn);
-	}
-
-	/* Primary CPU: rebind IPIs automatically. */
-	for (ipi = 0; ipi < NR_IPIS; ipi++) {
-		if ((irq = pcpu_find(0)->pc_ipi_to_irq[ipi]) == -1)
-			continue;
-
-		KASSERT(irq_info[irq] == mk_irq_info(IRQT_IPI, ipi, 0),
-		    ("irq_info inconsistent"));
-
-		/* Get a new binding from Xen. */
-		memset(&op, 0, sizeof(op));
-		bind_ipi.vcpu = 0;
-		HYPERVISOR_event_channel_op(EVTCHNOP_bind_ipi, &bind_ipi);
-		evtchn = bind_ipi.port;
-        
-		/* Record the new mapping. */
-		evtchn_to_irq[evtchn] = irq;
-		irq_info[irq] = mk_irq_info(IRQT_IPI, ipi, evtchn);
-
-		/* Ready for use. */
-		unmask_evtchn(evtchn);
-	}
-}
-
-static void 
-evtchn_init(void *dummy __unused)
-{
-	int i, cpu;
-	struct xenpic_intsrc *pin, *tpin;
-
-
-	init_evtchn_cpu_bindings();
-	
-         /* No VIRQ or IPI bindings. */
-	for (cpu = 0; cpu < mp_ncpus; cpu++) {
-		for (i = 0; i < NR_VIRQS; i++)
-			pcpu_find(cpu)->pc_virq_to_irq[i] = -1;
-		for (i = 0; i < NR_IPIS; i++)
-			pcpu_find(cpu)->pc_ipi_to_irq[i] = -1;
-	}
-
-	/* No event-channel -> IRQ mappings. */
-	for (i = 0; i < NR_EVENT_CHANNELS; i++) {
-		evtchn_to_irq[i] = -1;
-		mask_evtchn(i); /* No event channels are 'live' right now. */
-	}
-
-	/* No IRQ -> event-channel mappings. */
-	for (i = 0; i < NR_IRQS; i++)
-		irq_info[i] = IRQ_UNBOUND;
-	
-	xp = malloc(sizeof(struct xenpic) + NR_IRQS*sizeof(struct xenpic_intsrc), 
-		    M_DEVBUF, M_WAITOK);
-
-	xp->xp_dynirq_pic = &xenpic_dynirq_template;
-	xp->xp_pirq_pic = &xenpic_pirq_template;
-	xp->xp_numintr = NR_IRQS;
-	bzero(xp->xp_pins, sizeof(struct xenpic_intsrc) * NR_IRQS);
-
-
-	/* We need to register our PIC's beforehand */
-	if (intr_register_pic(&xenpic_pirq_template))
-		panic("XEN: intr_register_pic() failure");
-	if (intr_register_pic(&xenpic_dynirq_template))
-		panic("XEN: intr_register_pic() failure");
-
-	/*
-	 * Initialize the dynamic IRQ's - we initialize the structures, but
-	 * we do not bind them (bind_evtchn_to_irqhandle() does this)
-	 */
-	pin = xp->xp_pins;
-	for (i = 0; i < NR_DYNIRQS; i++) {
-		/* Dynamic IRQ space is currently unbound. Zero the refcnts. */
-		irq_bindcount[dynirq_to_irq(i)] = 0;
-
-		tpin = &pin[dynirq_to_irq(i)];
-		tpin->xp_intsrc.is_pic = xp->xp_dynirq_pic;
-		tpin->xp_vector = dynirq_to_irq(i);
-		
-	}
-	/*
-	 * Now, we go ahead and claim every PIRQ there is.
-	 */
-	pin = xp->xp_pins;
-	for (i = 0; i < NR_PIRQS; i++) {
-		/* Dynamic IRQ space is currently unbound. Zero the refcnts. */
-		irq_bindcount[pirq_to_irq(i)] = 0;
-
-#ifdef RTC_IRQ
-		/* If not domain 0, force our RTC driver to fail its probe. */
-		if ((i == RTC_IRQ) &&
-		    !(xen_start_info->flags & SIF_INITDOMAIN))
-			continue;
-#endif
-		tpin = &pin[pirq_to_irq(i)];		
-		tpin->xp_intsrc.is_pic = xp->xp_pirq_pic;
-		tpin->xp_vector = pirq_to_irq(i);
-
-	}
-}
-
-SYSINIT(evtchn_init, SI_SUB_INTR, SI_ORDER_MIDDLE, evtchn_init, NULL);
-
diff --git a/sys/xen/evtchn/evtchn_dev.c b/sys/xen/evtchn/evtchn_dev.c
index ab3bd173304f..9da26421e520 100644
--- a/sys/xen/evtchn/evtchn_dev.c
+++ b/sys/xen/evtchn/evtchn_dev.c
@@ -1,391 +1,358 @@
 /******************************************************************************
  * evtchn.c
  * 
  * Xenolinux driver for receiving and demuxing event-channel signals.
  * 
  * Copyright (c) 2004, K A Fraser
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/uio.h>
 #include <sys/bus.h>
 #include <sys/malloc.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/selinfo.h>
 #include <sys/poll.h>
 #include <sys/conf.h>
 #include <sys/fcntl.h>
 #include <sys/ioccom.h>
+#include <sys/rman.h>
 
-#include <machine/xen/xen-os.h>
+#include <xen/xen-os.h>
+#include <xen/evtchn.h>
 #include <xen/xen_intr.h>
+
 #include <machine/bus.h>
-#include <sys/rman.h>
 #include <machine/resource.h>
 #include <machine/xen/synch_bitops.h>
-#include <xen/hypervisor.h>
-#include <xen/evtchn.h>
 
+#include <xen/evtchn/evtchnvar.h>
 
 typedef struct evtchn_sotfc {
 
 	struct selinfo  ev_rsel;
 } evtchn_softc_t;
 
-
-#ifdef linuxcrap
-/* NB. This must be shared amongst drivers if more things go in /dev/xen */
-static devfs_handle_t xen_dev_dir;
-#endif
-
 /* Only one process may open /dev/xen/evtchn at any time. */
 static unsigned long evtchn_dev_inuse;
 
 /* Notification ring, accessed via /dev/xen/evtchn. */
 
 #define EVTCHN_RING_SIZE     2048  /* 2048 16-bit entries */
 
 #define EVTCHN_RING_MASK(_i) ((_i)&(EVTCHN_RING_SIZE-1))
 static uint16_t *ring;
 static unsigned int ring_cons, ring_prod, ring_overflow;
 
 /* Which ports is user-space bound to? */
 static uint32_t bound_ports[32];
 
 /* Unique address for processes to sleep on */
 static void *evtchn_waddr = &ring;
 
 static struct mtx lock, upcall_lock;
 
 static d_read_t      evtchn_read;
 static d_write_t     evtchn_write;
 static d_ioctl_t     evtchn_ioctl;
 static d_poll_t      evtchn_poll;
 static d_open_t      evtchn_open;
 static d_close_t     evtchn_close;
 
 
 void 
-evtchn_device_upcall(int port)
+evtchn_device_upcall(evtchn_port_t port)
 {
 	mtx_lock(&upcall_lock);
 
-	mask_evtchn(port);
-	clear_evtchn(port);
+	evtchn_mask_port(port);
+	evtchn_clear_port(port);
 
 	if ( ring != NULL ) {
 		if ( (ring_prod - ring_cons) < EVTCHN_RING_SIZE ) {
 			ring[EVTCHN_RING_MASK(ring_prod)] = (uint16_t)port;
 			if ( ring_cons == ring_prod++ ) {
 				wakeup(evtchn_waddr);
 			}
 		}
 		else {
 			ring_overflow = 1;
 		}
 	}
 
 	mtx_unlock(&upcall_lock);
 }
 
 static void 
 __evtchn_reset_buffer_ring(void)
 {
 	/* Initialise the ring to empty. Clear errors. */
 	ring_cons = ring_prod = ring_overflow = 0;
 }
 
 static int
 evtchn_read(struct cdev *dev, struct uio *uio, int ioflag)
 {
 	int rc;
 	unsigned int count, c, p, sst = 0, bytes1 = 0, bytes2 = 0;
 	count = uio->uio_resid;
     
 	count &= ~1; /* even number of bytes */
 
 	if ( count == 0 )
 	{
 		rc = 0;
 		goto out;
 	}
 
 	if ( count > PAGE_SIZE )
 		count = PAGE_SIZE;
 
 	for ( ; ; ) {
 		if ( (c = ring_cons) != (p = ring_prod) )
 			break;
 
 		if ( ring_overflow ) {
 			rc = EFBIG;
 			goto out;
 		}
 
 		if (sst != 0) {
 			rc = EINTR;
 			goto out;
 		}
 
 		/* PCATCH == check for signals before and after sleeping 
 		 * PWAIT == priority of waiting on resource 
 		 */
 		sst = tsleep(evtchn_waddr, PWAIT|PCATCH, "evchwt", 10);
 	}
 
 	/* Byte lengths of two chunks. Chunk split (if any) is at ring wrap. */
 	if ( ((c ^ p) & EVTCHN_RING_SIZE) != 0 ) {
 		bytes1 = (EVTCHN_RING_SIZE - EVTCHN_RING_MASK(c)) * sizeof(uint16_t);
 		bytes2 = EVTCHN_RING_MASK(p) * sizeof(uint16_t);
 	}
 	else {
 		bytes1 = (p - c) * sizeof(uint16_t);
 		bytes2 = 0;
 	}
 
 	/* Truncate chunks according to caller's maximum byte count. */
 	if ( bytes1 > count ) {
 		bytes1 = count;
 		bytes2 = 0;
 	}
 	else if ( (bytes1 + bytes2) > count ) {
 		bytes2 = count - bytes1;
 	}
     
 	if ( uiomove(&ring[EVTCHN_RING_MASK(c)], bytes1, uio) ||
 	     ((bytes2 != 0) && uiomove(&ring[0], bytes2, uio)))
 		/* keeping this around as its replacement is not equivalent 
 		 * copyout(&ring[0], &buf[bytes1], bytes2) 
 		 */
 	{
 		rc = EFAULT;
 		goto out;
 	}
 
 	ring_cons += (bytes1 + bytes2) / sizeof(uint16_t);
 
 	rc = bytes1 + bytes2;
 
  out:
     
 	return rc;
 }
 
 static int 
 evtchn_write(struct cdev *dev, struct uio *uio, int ioflag)
 {
 	int  rc, i, count;
     
 	count = uio->uio_resid;
     
 	uint16_t *kbuf = (uint16_t *)malloc(PAGE_SIZE, M_DEVBUF, M_WAITOK);
 
 
 	if ( kbuf == NULL )
 		return ENOMEM;
 
 	count &= ~1; /* even number of bytes */
 
 	if ( count == 0 ) {
 		rc = 0;
 		goto out;
 	}
 
 	if ( count > PAGE_SIZE )
 		count = PAGE_SIZE;
 
 	if ( uiomove(kbuf, count, uio) != 0 ) {
 		rc = EFAULT;
 		goto out;
 	}
 
 	mtx_lock_spin(&lock);
 	for ( i = 0; i < (count/2); i++ )
 		if ( test_bit(kbuf[i], &bound_ports[0]) )
-			unmask_evtchn(kbuf[i]);
+			evtchn_unmask_port(kbuf[i]);
 	mtx_unlock_spin(&lock);
 
 	rc = count;
 
  out:
 	free(kbuf, M_DEVBUF);
 	return rc;
 }
 
 static int 
 evtchn_ioctl(struct cdev *dev, unsigned long cmd, caddr_t arg, 
 	     int mode, struct thread *td __unused)
 {
 	int rc = 0;
     
+#ifdef NOTYET
 	mtx_lock_spin(&lock);
     
 	switch ( cmd )
 	{
 	case EVTCHN_RESET:
 		__evtchn_reset_buffer_ring();
 		break;
 	case EVTCHN_BIND:
 		if ( !synch_test_and_set_bit((uintptr_t)arg, &bound_ports[0]) )
 			unmask_evtchn((uintptr_t)arg);
 		else
 			rc = EINVAL;
 		break;
 	case EVTCHN_UNBIND:
 		if ( synch_test_and_clear_bit((uintptr_t)arg, &bound_ports[0]) )
 			mask_evtchn((uintptr_t)arg);
 		else
 			rc = EINVAL;
 		break;
 	default:
 		rc = ENOSYS;
 		break;
 	}
 
 	mtx_unlock_spin(&lock);   
+#endif
 
 	return rc;
 }
 
 static int
 evtchn_poll(struct cdev *dev, int poll_events, struct thread *td)
 {
 
 	evtchn_softc_t *sc;
 	unsigned int mask = POLLOUT | POLLWRNORM;
     
 	sc = dev->si_drv1;
     
 	if ( ring_cons != ring_prod )
 		mask |= POLLIN | POLLRDNORM;
 	else if ( ring_overflow )
 		mask = POLLERR;
 	else
 		selrecord(td, &sc->ev_rsel);
 
 
 	return mask;
 }
 
 
 static int 
 evtchn_open(struct cdev *dev, int flag, int otyp, struct thread *td)
 {
 	uint16_t *_ring;
     
 	if (flag & O_NONBLOCK)
 		return EBUSY;
 
 	if ( synch_test_and_set_bit(0, &evtchn_dev_inuse) )
 		return EBUSY;
 
 	if ( (_ring = (uint16_t *)malloc(PAGE_SIZE, M_DEVBUF, M_WAITOK)) == NULL )
 		return ENOMEM;
 
 	mtx_lock_spin(&lock);
 	ring = _ring;
 	__evtchn_reset_buffer_ring();
 	mtx_unlock_spin(&lock);
 
 
 	return 0;
 }
 
 static int 
 evtchn_close(struct cdev *dev, int flag, int otyp, struct thread *td __unused)
 {
 	int i;
 
 	if (ring != NULL) {
 		free(ring, M_DEVBUF);
 		ring = NULL;
 	}
 	mtx_lock_spin(&lock);
 	for ( i = 0; i < NR_EVENT_CHANNELS; i++ )
 		if ( synch_test_and_clear_bit(i, &bound_ports[0]) )
-			mask_evtchn(i);
+			evtchn_mask_port(i);
 	mtx_unlock_spin(&lock);
 
 	evtchn_dev_inuse = 0;
 
 	return 0;
 }
 
 static struct cdevsw evtchn_devsw = {
 	.d_version =	D_VERSION,
 	.d_open =	evtchn_open,
 	.d_close =	evtchn_close,
 	.d_read =	evtchn_read,
 	.d_write =	evtchn_write,
 	.d_ioctl =	evtchn_ioctl,
 	.d_poll =	evtchn_poll,
 	.d_name =	"evtchn",
 };
 
 
 /* XXX  - if this device is ever supposed to support use by more than one process
  * this global static will have to go away
  */
 static struct cdev *evtchn_dev;
 
 
 
 static int 
 evtchn_dev_init(void *dummy __unused)
 {
 	/* XXX I believe we don't need these leaving them here for now until we 
 	 * have some semblance of it working 
 	 */
 	mtx_init(&upcall_lock, "evtchup", NULL, MTX_DEF);
 
 	/* (DEVFS) create '/dev/misc/evtchn'. */
 	evtchn_dev = make_dev(&evtchn_devsw, 0, UID_ROOT, GID_WHEEL, 0600, "xen/evtchn");
 
 	mtx_init(&lock, "evch", NULL, MTX_SPIN | MTX_NOWITNESS);
 
 	evtchn_dev->si_drv1 = malloc(sizeof(evtchn_softc_t), M_DEVBUF, M_WAITOK);
 	bzero(evtchn_dev->si_drv1, sizeof(evtchn_softc_t));
 
-	/* XXX I don't think we need any of this rubbish */
-#if 0
-	if ( err != 0 )
-	{
-		printk(KERN_ALERT "Could not register /dev/misc/evtchn\n");
-		return err;
-	}
-
-	/* (DEVFS) create directory '/dev/xen'. */
-	xen_dev_dir = devfs_mk_dir(NULL, "xen", NULL);
-
-	/* (DEVFS) &link_dest[pos] == '../misc/evtchn'. */
-	pos = devfs_generate_path(evtchn_miscdev.devfs_handle, 
-				  &link_dest[3], 
-				  sizeof(link_dest) - 3);
-	if ( pos >= 0 )
-		strncpy(&link_dest[pos], "../", 3);
-	/* (DEVFS) symlink '/dev/xen/evtchn' -> '../misc/evtchn'. */
-	(void)devfs_mk_symlink(xen_dev_dir, 
-			       "evtchn", 
-			       DEVFS_FL_DEFAULT, 
-			       &link_dest[pos],
-			       &symlink_handle, 
-			       NULL);
-
-	/* (DEVFS) automatically destroy the symlink with its destination. */
-	devfs_auto_unregister(evtchn_miscdev.devfs_handle, symlink_handle);
-#endif
 	if (bootverbose)
 		printf("Event-channel device installed.\n");
 
 	return 0;
 }
 
 SYSINIT(evtchn_dev_init, SI_SUB_DRIVERS, SI_ORDER_FIRST, evtchn_dev_init, NULL);
-
-
diff --git a/sys/xen/evtchn/evtchnvar.h b/sys/xen/evtchn/evtchnvar.h
new file mode 100644
index 000000000000..8008d23297d3
--- /dev/null
+++ b/sys/xen/evtchn/evtchnvar.h
@@ -0,0 +1,105 @@
+/******************************************************************************
+ * evtchn.h
+ * 
+ * Data structures and definitions private to the FreeBSD implementation
+ * of the Xen event channel API.
+ * 
+ * Copyright (c) 2004, K A Fraser
+ * Copyright (c) 2012, Spectra Logic Corporation
+ *
+ * This file may be distributed separately from the Linux kernel, or
+ * incorporated into other software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef __XEN_EVTCHN_EVTCHNVAR_H__
+#define __XEN_EVTCHN_EVTCHNVAR_H__
+
+#include <xen/hypervisor.h>
+#include <xen/interface/event_channel.h>
+
+enum evtchn_type {
+	EVTCHN_TYPE_UNBOUND,
+	EVTCHN_TYPE_PIRQ,
+	EVTCHN_TYPE_VIRQ,
+	EVTCHN_TYPE_IPI,
+	EVTCHN_TYPE_PORT,
+	EVTCHN_TYPE_COUNT
+};
+
+/** Submit a port notification for delivery to a userland evtchn consumer */
+void evtchn_device_upcall(evtchn_port_t port);
+
+/**
+ * Disable signal delivery for an event channel port, returning its
+ * previous mask state.
+ *
+ * \param port  The event channel port to query and mask.
+ *
+ * \returns  1 if event delivery was previously disabled.  Otherwise 0.
+ */
+static inline int
+evtchn_test_and_set_mask(evtchn_port_t port)
+{
+	shared_info_t *s = HYPERVISOR_shared_info;
+	return synch_test_and_set_bit(port, s->evtchn_mask);
+}
+
+/**
+ * Clear any pending event for the given event channel port.
+ *
+ * \param port  The event channel port to clear.
+ */
+static inline void 
+evtchn_clear_port(evtchn_port_t port)
+{
+	shared_info_t *s = HYPERVISOR_shared_info;
+	synch_clear_bit(port, &s->evtchn_pending[0]);
+}
+
+/**
+ * Disable signal delivery for an event channel port.
+ *
+ * \param port  The event channel port to mask.
+ */
+static inline void
+evtchn_mask_port(evtchn_port_t port)
+{
+	shared_info_t *s = HYPERVISOR_shared_info;
+
+	synch_set_bit(port, &s->evtchn_mask[0]);
+}
+
+/**
+ * Enable signal delivery for an event channel port.
+ *
+ * \param port  The event channel port to enable.
+ */
+static inline void
+evtchn_unmask_port(evtchn_port_t port)
+{
+	evtchn_unmask_t op = { .port = port };
+
+	HYPERVISOR_event_channel_op(EVTCHNOP_unmask, &op);
+}
+
+#endif /* __XEN_EVTCHN_EVTCHNVAR_H__ */
diff --git a/sys/xen/features.c b/sys/xen/features.c
index f28fe049177c..bbb28968935a 100644
--- a/sys/xen/features.c
+++ b/sys/xen/features.c
@@ -1,26 +1,26 @@
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 
-#include <machine/xen/xen-os.h>
+#include <xen/xen-os.h>
 #include <xen/hypervisor.h>
 #include <xen/features.h>
 
 uint8_t xen_features[XENFEAT_NR_SUBMAPS * 32] /* __read_mostly */;
 
 void
 setup_xen_features(void)
 {
         xen_feature_info_t fi;
         int i, j;
 
         for (i = 0; i < XENFEAT_NR_SUBMAPS; i++) {
                 fi.submap_idx = i;
                 if (HYPERVISOR_xen_version(XENVER_get_features, &fi) < 0)
                         break;
                 for (j = 0; j < 32; j++)
                         xen_features[i*32 + j] = !!(fi.submap & 1<<j);
         }
 }
diff --git a/sys/xen/gnttab.c b/sys/xen/gnttab.c
index d8eb381740f2..03c32b724fd6 100644
--- a/sys/xen/gnttab.c
+++ b/sys/xen/gnttab.c
@@ -1,707 +1,707 @@
 /******************************************************************************
  * gnttab.c
  * 
  * Two sets of functionality:
  * 1. Granting foreign access to our memory reservation.
  * 2. Accessing others' memory reservations via grant references.
  * (i.e., mechanisms for both sender and recipient of grant references)
  * 
  * Copyright (c) 2005, Christopher Clark
  * Copyright (c) 2004, K A Fraser
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_global.h"
 #include "opt_pmap.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/bus.h>
 #include <sys/conf.h>
 #include <sys/module.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mman.h>
 
-#include <machine/xen/xen-os.h>
+#include <xen/xen-os.h>
 #include <xen/hypervisor.h>
 #include <machine/xen/synch_bitops.h>
 
 #include <xen/hypervisor.h>
 #include <xen/gnttab.h>
 
 #include <vm/vm.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_extern.h>
 #include <vm/pmap.h>
 
 #define cmpxchg(a, b, c) atomic_cmpset_int((volatile u_int *)(a),(b),(c))
 
 /* External tools reserve first few grant table entries. */
 #define NR_RESERVED_ENTRIES 8
 #define GREFS_PER_GRANT_FRAME (PAGE_SIZE / sizeof(grant_entry_t))
 
 static grant_ref_t **gnttab_list;
 static unsigned int nr_grant_frames;
 static unsigned int boot_max_nr_grant_frames;
 static int gnttab_free_count;
 static grant_ref_t gnttab_free_head;
 static struct mtx gnttab_list_lock;
 
 static grant_entry_t *shared;
 
 static struct gnttab_free_callback *gnttab_free_callback_list = NULL;
 
 static int gnttab_expand(unsigned int req_entries);
 
 #define RPP (PAGE_SIZE / sizeof(grant_ref_t))
 #define gnttab_entry(entry) (gnttab_list[(entry) / RPP][(entry) % RPP])
 
 static int
 get_free_entries(int count, int *entries)
 {
 	int ref, error;
 	grant_ref_t head;
 
 	mtx_lock(&gnttab_list_lock);
 	if ((gnttab_free_count < count) &&
 	    ((error = gnttab_expand(count - gnttab_free_count)) != 0)) {
 		mtx_unlock(&gnttab_list_lock);
 		return (error);
 	}
 	ref = head = gnttab_free_head;
 	gnttab_free_count -= count;
 	while (count-- > 1)
 		head = gnttab_entry(head);
 	gnttab_free_head = gnttab_entry(head);
 	gnttab_entry(head) = GNTTAB_LIST_END;
 	mtx_unlock(&gnttab_list_lock);
 
 	*entries = ref;
 	return (0);
 }
 
 static void
 do_free_callbacks(void)
 {
 	struct gnttab_free_callback *callback, *next;
 
 	callback = gnttab_free_callback_list;
 	gnttab_free_callback_list = NULL;
 
 	while (callback != NULL) {
 		next = callback->next;
 		if (gnttab_free_count >= callback->count) {
 			callback->next = NULL;
 			callback->fn(callback->arg);
 		} else {
 			callback->next = gnttab_free_callback_list;
 			gnttab_free_callback_list = callback;
 		}
 		callback = next;
 	}
 }
 
 static inline void
 check_free_callbacks(void)
 {
-	if (unlikely(gnttab_free_callback_list != NULL))
+	if (__predict_false(gnttab_free_callback_list != NULL))
 		do_free_callbacks();
 }
 
 static void
 put_free_entry(grant_ref_t ref)
 {
 
 	mtx_lock(&gnttab_list_lock);
 	gnttab_entry(ref) = gnttab_free_head;
 	gnttab_free_head = ref;
 	gnttab_free_count++;
 	check_free_callbacks();
 	mtx_unlock(&gnttab_list_lock);
 }
 
 /*
  * Public grant-issuing interface functions
  */
 
 int
 gnttab_grant_foreign_access(domid_t domid, unsigned long frame, int readonly,
 	grant_ref_t *result)
 {
 	int error, ref;
 
 	error = get_free_entries(1, &ref);
 
-	if (unlikely(error))
+	if (__predict_false(error))
 		return (error);
 
 	shared[ref].frame = frame;
 	shared[ref].domid = domid;
 	wmb();
 	shared[ref].flags = GTF_permit_access | (readonly ? GTF_readonly : 0);
 
 	if (result)
 		*result = ref;
 
 	return (0);
 }
 
 void
 gnttab_grant_foreign_access_ref(grant_ref_t ref, domid_t domid,
 				unsigned long frame, int readonly)
 {
 
 	shared[ref].frame = frame;
 	shared[ref].domid = domid;
 	wmb();
 	shared[ref].flags = GTF_permit_access | (readonly ? GTF_readonly : 0);
 }
 
 int
 gnttab_query_foreign_access(grant_ref_t ref)
 {
 	uint16_t nflags;
 
 	nflags = shared[ref].flags;
 
 	return (nflags & (GTF_reading|GTF_writing));
 }
 
 int
 gnttab_end_foreign_access_ref(grant_ref_t ref)
 {
 	uint16_t flags, nflags;
 
 	nflags = shared[ref].flags;
 	do {
 		if ( (flags = nflags) & (GTF_reading|GTF_writing) ) {
 			printf("%s: WARNING: g.e. still in use!\n", __func__);
 			return (0);
 		}
 	} while ((nflags = synch_cmpxchg(&shared[ref].flags, flags, 0)) !=
 	       flags);
 
 	return (1);
 }
 
 void
 gnttab_end_foreign_access(grant_ref_t ref, void *page)
 {
 	if (gnttab_end_foreign_access_ref(ref)) {
 		put_free_entry(ref);
 		if (page != NULL) {
 			free(page, M_DEVBUF);
 		}
 	}
 	else {
 		/* XXX This needs to be fixed so that the ref and page are
 		   placed on a list to be freed up later. */
 		printf("%s: WARNING: leaking g.e. and page still in use!\n",
 		       __func__);
 	}
 }
 
 void
 gnttab_end_foreign_access_references(u_int count, grant_ref_t *refs)
 {
 	grant_ref_t *last_ref;
 	grant_ref_t  head;
 	grant_ref_t  tail;
 
 	head = GNTTAB_LIST_END;
 	tail = *refs;
 	last_ref = refs + count;
 	while (refs != last_ref) {
 
 		if (gnttab_end_foreign_access_ref(*refs)) {
 			gnttab_entry(*refs) = head;
 			head = *refs;
 		} else {
 			/*
 			 * XXX This needs to be fixed so that the ref 
 			 * is placed on a list to be freed up later.
 			 */
 			printf("%s: WARNING: leaking g.e. still in use!\n",
 			       __func__);
 			count--;
 		}
 		refs++;
 	}
 
 	if (count != 0) {
 		mtx_lock(&gnttab_list_lock);
 		gnttab_free_count += count;
 		gnttab_entry(tail) = gnttab_free_head;
 		gnttab_free_head = head;
 		mtx_unlock(&gnttab_list_lock);
 	}
 }
 
 int
 gnttab_grant_foreign_transfer(domid_t domid, unsigned long pfn,
     grant_ref_t *result)
 {
 	int error, ref;
 
 	error = get_free_entries(1, &ref);
-	if (unlikely(error))
+	if (__predict_false(error))
 		return (error);
 
 	gnttab_grant_foreign_transfer_ref(ref, domid, pfn);
 
 	*result = ref;
 	return (0);
 }
 
 void
 gnttab_grant_foreign_transfer_ref(grant_ref_t ref, domid_t domid,
 	unsigned long pfn)
 {
 	shared[ref].frame = pfn;
 	shared[ref].domid = domid;
 	wmb();
 	shared[ref].flags = GTF_accept_transfer;
 }
 
 unsigned long
 gnttab_end_foreign_transfer_ref(grant_ref_t ref)
 {
 	unsigned long frame;
 	uint16_t      flags;
 
 	/*
          * If a transfer is not even yet started, try to reclaim the grant
          * reference and return failure (== 0).
          */
 	while (!((flags = shared[ref].flags) & GTF_transfer_committed)) {
 		if ( synch_cmpxchg(&shared[ref].flags, flags, 0) == flags )
 			return (0);
 		cpu_relax();
 	}
 
 	/* If a transfer is in progress then wait until it is completed. */
 	while (!(flags & GTF_transfer_completed)) {
 		flags = shared[ref].flags;
 		cpu_relax();
 	}
 
 	/* Read the frame number /after/ reading completion status. */
 	rmb();
 	frame = shared[ref].frame;
 	KASSERT(frame != 0, ("grant table inconsistent"));
 
 	return (frame);
 }
 
 unsigned long
 gnttab_end_foreign_transfer(grant_ref_t ref)
 {
 	unsigned long frame = gnttab_end_foreign_transfer_ref(ref);
 
 	put_free_entry(ref);
 	return (frame);
 }
 
 void
 gnttab_free_grant_reference(grant_ref_t ref)
 {
 
 	put_free_entry(ref);
 }
 
 void
 gnttab_free_grant_references(grant_ref_t head)
 {
 	grant_ref_t ref;
 	int count = 1;
 
 	if (head == GNTTAB_LIST_END)
 		return;
 
 	ref = head;
 	while (gnttab_entry(ref) != GNTTAB_LIST_END) {
 		ref = gnttab_entry(ref);
 		count++;
 	}
 	mtx_lock(&gnttab_list_lock);
 	gnttab_entry(ref) = gnttab_free_head;
 	gnttab_free_head = head;
 	gnttab_free_count += count;
 	check_free_callbacks();
 	mtx_unlock(&gnttab_list_lock);
 }
 
 int
 gnttab_alloc_grant_references(uint16_t count, grant_ref_t *head)
 {
 	int ref, error;
 
 	error = get_free_entries(count, &ref);
-	if (unlikely(error))
+	if (__predict_false(error))
 		return (error);
 
 	*head = ref;
 	return (0);
 }
 
 int
 gnttab_empty_grant_references(const grant_ref_t *private_head)
 {
 
 	return (*private_head == GNTTAB_LIST_END);
 }
 
 int
 gnttab_claim_grant_reference(grant_ref_t *private_head)
 {
 	grant_ref_t g = *private_head;
 
-	if (unlikely(g == GNTTAB_LIST_END))
+	if (__predict_false(g == GNTTAB_LIST_END))
 		return (g);
 	*private_head = gnttab_entry(g);
 	return (g);
 }
 
 void
 gnttab_release_grant_reference(grant_ref_t *private_head, grant_ref_t  release)
 {
 
 	gnttab_entry(release) = *private_head;
 	*private_head = release;
 }
 
 void
 gnttab_request_free_callback(struct gnttab_free_callback *callback,
     void (*fn)(void *), void *arg, uint16_t count)
 {
 
 	mtx_lock(&gnttab_list_lock);
 	if (callback->next)
 		goto out;
 	callback->fn = fn;
 	callback->arg = arg;
 	callback->count = count;
 	callback->next = gnttab_free_callback_list;
 	gnttab_free_callback_list = callback;
 	check_free_callbacks();
  out:
 	mtx_unlock(&gnttab_list_lock);
 
 }
 
 void
 gnttab_cancel_free_callback(struct gnttab_free_callback *callback)
 {
 	struct gnttab_free_callback **pcb;
 
 	mtx_lock(&gnttab_list_lock);
 	for (pcb = &gnttab_free_callback_list; *pcb; pcb = &(*pcb)->next) {
 		if (*pcb == callback) {
 			*pcb = callback->next;
 			break;
 		}
 	}
 	mtx_unlock(&gnttab_list_lock);
 }
 
 
 static int
 grow_gnttab_list(unsigned int more_frames)
 {
 	unsigned int new_nr_grant_frames, extra_entries, i;
 
 	new_nr_grant_frames = nr_grant_frames + more_frames;
 	extra_entries       = more_frames * GREFS_PER_GRANT_FRAME;
 
 	for (i = nr_grant_frames; i < new_nr_grant_frames; i++)
 	{
 		gnttab_list[i] = (grant_ref_t *)
 			malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT);
 
 		if (!gnttab_list[i])
 			goto grow_nomem;
 	}
 
 	for (i = GREFS_PER_GRANT_FRAME * nr_grant_frames;
 	     i < GREFS_PER_GRANT_FRAME * new_nr_grant_frames - 1; i++)
 		gnttab_entry(i) = i + 1;
 
 	gnttab_entry(i) = gnttab_free_head;
 	gnttab_free_head = GREFS_PER_GRANT_FRAME * nr_grant_frames;
 	gnttab_free_count += extra_entries;
 
 	nr_grant_frames = new_nr_grant_frames;
 
 	check_free_callbacks();
 
 	return (0);
 
 grow_nomem:
 	for ( ; i >= nr_grant_frames; i--)
 		free(gnttab_list[i], M_DEVBUF);
 	return (ENOMEM);
 }
 
 static unsigned int
 __max_nr_grant_frames(void)
 {
 	struct gnttab_query_size query;
 	int rc;
 
 	query.dom = DOMID_SELF;
 
 	rc = HYPERVISOR_grant_table_op(GNTTABOP_query_size, &query, 1);
 	if ((rc < 0) || (query.status != GNTST_okay))
 		return (4); /* Legacy max supported number of frames */
 
 	return (query.max_nr_frames);
 }
 
 static inline
 unsigned int max_nr_grant_frames(void)
 {
 	unsigned int xen_max = __max_nr_grant_frames();
 
 	if (xen_max > boot_max_nr_grant_frames)
 		return (boot_max_nr_grant_frames);
 	return (xen_max);
 }
 
 #ifdef notyet
 /*
  * XXX needed for backend support
  *
  */
 static int
 map_pte_fn(pte_t *pte, struct page *pmd_page,
 		      unsigned long addr, void *data)
 {
 	unsigned long **frames = (unsigned long **)data;
 
 	set_pte_at(&init_mm, addr, pte, pfn_pte_ma((*frames)[0], PAGE_KERNEL));
 	(*frames)++;
 	return 0;
 }
 
 static int
 unmap_pte_fn(pte_t *pte, struct page *pmd_page,
 			unsigned long addr, void *data)
 {
 
 	set_pte_at(&init_mm, addr, pte, __pte(0));
 	return 0;
 }
 #endif
 
 #ifndef XENHVM
 
 static int
 gnttab_map(unsigned int start_idx, unsigned int end_idx)
 {
 	struct gnttab_setup_table setup;
 	u_long *frames;
 
 	unsigned int nr_gframes = end_idx + 1;
 	int i, rc;
 
 	frames = malloc(nr_gframes * sizeof(unsigned long), M_DEVBUF, M_NOWAIT);
 	if (!frames)
 		return (ENOMEM);
 
 	setup.dom        = DOMID_SELF;
 	setup.nr_frames  = nr_gframes;
 	set_xen_guest_handle(setup.frame_list, frames);
 
 	rc = HYPERVISOR_grant_table_op(GNTTABOP_setup_table, &setup, 1);
 	if (rc == -ENOSYS) {
 		free(frames, M_DEVBUF);
 		return (ENOSYS);
 	}
 	KASSERT(!(rc || setup.status),
 	    ("unexpected result from grant_table_op"));
 
 	if (shared == NULL) {
 		vm_offset_t area;
 
 		area = kva_alloc(PAGE_SIZE * max_nr_grant_frames());
 		KASSERT(area, ("can't allocate VM space for grant table"));
 		shared = (grant_entry_t *)area;
 	}
 
 	for (i = 0; i < nr_gframes; i++)
 		PT_SET_MA(((caddr_t)shared) + i*PAGE_SIZE, 
 		    ((vm_paddr_t)frames[i]) << PAGE_SHIFT | PG_RW | PG_V);
 
 	free(frames, M_DEVBUF);
 
 	return (0);
 }
 
 int
 gnttab_resume(void)
 {
 
 	if (max_nr_grant_frames() < nr_grant_frames)
 		return (ENOSYS);
 	return (gnttab_map(0, nr_grant_frames - 1));
 }
 
 int
 gnttab_suspend(void)
 {
 	int i;
 
 	for (i = 0; i < nr_grant_frames; i++)
 		pmap_kremove((vm_offset_t) shared + i * PAGE_SIZE);
 
 	return (0);
 }
 
 #else /* XENHVM */
 
 #include <dev/xen/xenpci/xenpcivar.h>
 
 static vm_paddr_t resume_frames;
 
 static int
 gnttab_map(unsigned int start_idx, unsigned int end_idx)
 {
 	struct xen_add_to_physmap xatp;
 	unsigned int i = end_idx;
 
 	/*
 	 * Loop backwards, so that the first hypercall has the largest index,
 	 * ensuring that the table will grow only once.
 	 */
 	do {
 		xatp.domid = DOMID_SELF;
 		xatp.idx = i;
 		xatp.space = XENMAPSPACE_grant_table;
 		xatp.gpfn = (resume_frames >> PAGE_SHIFT) + i;
 		if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp))
 			panic("HYPERVISOR_memory_op failed to map gnttab");
 	} while (i-- > start_idx);
 
 	if (shared == NULL) {
 		vm_offset_t area;
 
 		area = kva_alloc(PAGE_SIZE * max_nr_grant_frames());
 		KASSERT(area, ("can't allocate VM space for grant table"));
 		shared = (grant_entry_t *)area;
 	}
 
 	for (i = start_idx; i <= end_idx; i++) {
 		pmap_kenter((vm_offset_t) shared + i * PAGE_SIZE,
 		    resume_frames + i * PAGE_SIZE);
 	}
 
 	return (0);
 }
 
 int
 gnttab_resume(void)
 {
 	int error;
 	unsigned int max_nr_gframes, nr_gframes;
 
 	nr_gframes = nr_grant_frames;
 	max_nr_gframes = max_nr_grant_frames();
 	if (max_nr_gframes < nr_gframes)
 		return (ENOSYS);
 
 	if (!resume_frames) {
 		error = xenpci_alloc_space(PAGE_SIZE * max_nr_gframes,
 		    &resume_frames);
 		if (error) {
 			printf("error mapping gnttab share frames\n");
 			return (error);
 		}
 	}
 
 	return (gnttab_map(0, nr_gframes - 1));
 }
 
 #endif
 
 static int
 gnttab_expand(unsigned int req_entries)
 {
 	int error;
 	unsigned int cur, extra;
 
 	cur = nr_grant_frames;
 	extra = ((req_entries + (GREFS_PER_GRANT_FRAME-1)) /
 		 GREFS_PER_GRANT_FRAME);
 	if (cur + extra > max_nr_grant_frames())
 		return (ENOSPC);
 
 	error = gnttab_map(cur, cur + extra - 1);
 	if (!error)
 		error = grow_gnttab_list(extra);
 
 	return (error);
 }
 
 int 
 gnttab_init()
 {
 	int i;
 	unsigned int max_nr_glist_frames;
 	unsigned int nr_init_grefs;
 
 	if (!is_running_on_xen())
 		return (ENODEV);
 
 	nr_grant_frames = 1;
 	boot_max_nr_grant_frames = __max_nr_grant_frames();
 
 	/* Determine the maximum number of frames required for the
 	 * grant reference free list on the current hypervisor.
 	 */
 	max_nr_glist_frames = (boot_max_nr_grant_frames *
 			       GREFS_PER_GRANT_FRAME /
 			       (PAGE_SIZE / sizeof(grant_ref_t)));
 
 	gnttab_list = malloc(max_nr_glist_frames * sizeof(grant_ref_t *),
 	    M_DEVBUF, M_NOWAIT);
 
 	if (gnttab_list == NULL)
 		return (ENOMEM);
 
 	for (i = 0; i < nr_grant_frames; i++) {
 		gnttab_list[i] = (grant_ref_t *)
 			malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT);
 		if (gnttab_list[i] == NULL)
 			goto ini_nomem;
 	}
 
 	if (gnttab_resume())
 		return (ENODEV);
 
 	nr_init_grefs = nr_grant_frames * GREFS_PER_GRANT_FRAME;
 
 	for (i = NR_RESERVED_ENTRIES; i < nr_init_grefs - 1; i++)
 		gnttab_entry(i) = i + 1;
 
 	gnttab_entry(nr_init_grefs - 1) = GNTTAB_LIST_END;
 	gnttab_free_count = nr_init_grefs - NR_RESERVED_ENTRIES;
 	gnttab_free_head  = NR_RESERVED_ENTRIES;
 
 	if (bootverbose)
 		printf("Grant table initialized\n");
 
 	return (0);
 
 ini_nomem:
 	for (i--; i >= 0; i--)
 		free(gnttab_list[i], M_DEVBUF);
 	free(gnttab_list, M_DEVBUF);
 	return (ENOMEM);
 
 }
 
 MTX_SYSINIT(gnttab, &gnttab_list_lock, "GNTTAB LOCK", MTX_DEF); 
diff --git a/sys/xen/gnttab.h b/sys/xen/gnttab.h
index 1741ec338720..d81e965fc43a 100644
--- a/sys/xen/gnttab.h
+++ b/sys/xen/gnttab.h
@@ -1,173 +1,172 @@
 /******************************************************************************
  * gnttab.h
  * 
  * Two sets of functionality:
  * 1. Granting foreign access to our memory reservation.
  * 2. Accessing others' memory reservations via grant references.
  * (i.e., mechanisms for both sender and recipient of grant references)
  * 
  * Copyright (c) 2004-2005, K A Fraser
  * Copyright (c) 2005, Christopher Clark
  * 
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public License version 2
  * as published by the Free Software Foundation; or, when distributed
  * separately from the Linux kernel or incorporated into other
  * software packages, subject to the following license:
  * 
  * Permission is hereby granted, free of charge, to any person obtaining a copy
  * of this source file (the "Software"), to deal in the Software without
  * restriction, including without limitation the rights to use, copy, modify,
  * merge, publish, distribute, sublicense, and/or sell copies of the Software,
  * and to permit persons to whom the Software is furnished to do so, subject to
  * the following conditions:
  * 
  * The above copyright notice and this permission notice shall be included in
  * all copies or substantial portions of the Software.
  * 
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  * IN THE SOFTWARE.
  */
 
 #ifndef __ASM_GNTTAB_H__
 
-#include <xen/interface/grant_table.h>
-
+#include <xen/xen-os.h>
 #include <xen/hypervisor.h>
-#include <xen/interface/grant_table.h>
-#include <machine/xen/xen-os.h>
 #include <xen/features.h>
 
+#include <xen/interface/grant_table.h>
+
 #define GNTTAB_LIST_END GRANT_REF_INVALID
 
 struct gnttab_free_callback {
 	struct gnttab_free_callback *next;
 	void (*fn)(void *);
 	void *arg;
 	uint16_t count;
 };
 
 int gnttab_init(void);
 
 /*
  * Allocate a grant table reference and return it in *result. Returns
  * zero on success or errno on error.
  */
 int gnttab_grant_foreign_access(domid_t domid, unsigned long frame,
     int flags, grant_ref_t *result);
 
 /*
  * End access through the given grant reference, iff the grant entry is no
  * longer in use.  Return 1 if the grant entry was freed, 0 if it is still in
  * use.
  */
 int gnttab_end_foreign_access_ref(grant_ref_t ref);
 
 /*
  * Eventually end access through the given grant reference, and once that
  * access has been ended, free the given page too.  Access will be ended
  * immediately iff the grant entry is not in use, otherwise it will happen
  * some time later.  page may be 0, in which case no freeing will occur.
  */
 void gnttab_end_foreign_access(grant_ref_t ref, void *page);
 
 /*
  * Eventually end access through the given array of grant references.
  * Access will be ended immediately iff the grant entry is not in use,
  * otherwise it will happen some time later
  */
 void gnttab_end_foreign_access_references(u_int count, grant_ref_t *refs);
 
 int gnttab_grant_foreign_transfer(domid_t domid, unsigned long pfn, grant_ref_t *result);
 
 unsigned long gnttab_end_foreign_transfer_ref(grant_ref_t ref);
 unsigned long gnttab_end_foreign_transfer(grant_ref_t ref);
 
 int gnttab_query_foreign_access(grant_ref_t ref);
 
 /*
  * operations on reserved batches of grant references
  */
 int gnttab_alloc_grant_references(uint16_t count, grant_ref_t *pprivate_head);
 
 void gnttab_free_grant_reference(grant_ref_t ref);
 
 void gnttab_free_grant_references(grant_ref_t head);
 
 int gnttab_empty_grant_references(const grant_ref_t *pprivate_head);
 
 int gnttab_claim_grant_reference(grant_ref_t *pprivate_head);
 
 void gnttab_release_grant_reference(grant_ref_t *private_head,
 				    grant_ref_t release);
 
 void gnttab_request_free_callback(struct gnttab_free_callback *callback,
 				  void (*fn)(void *), void *arg, uint16_t count);
 void gnttab_cancel_free_callback(struct gnttab_free_callback *callback);
 
 void gnttab_grant_foreign_access_ref(grant_ref_t ref, domid_t domid,
 				     unsigned long frame, int flags);
 
 void gnttab_grant_foreign_transfer_ref(grant_ref_t, domid_t domid,
 				       unsigned long pfn);
 
 int gnttab_suspend(void);
 int gnttab_resume(void);
 
 #if 0
 
 #include <xen/features.h>
 
 static inline void
 gnttab_set_map_op(struct gnttab_map_grant_ref *map, vm_paddr_t addr,
 		  uint32_t flags, grant_ref_t ref, domid_t domid)
 {
 	if (flags & GNTMAP_contains_pte)
 		map->host_addr = addr;
 	else if (xen_feature(XENFEAT_auto_translated_physmap))
 		map->host_addr = vtophys(addr);
 	else
 		map->host_addr = addr;
 
 	map->flags = flags;
 	map->ref = ref;
 	map->dom = domid;
 }
 
 static inline void
 gnttab_set_unmap_op(struct gnttab_unmap_grant_ref *unmap, vm_paddr_t addr,
 		    uint32_t flags, grant_handle_t handle)
 {
 	if (flags & GNTMAP_contains_pte)
 		unmap->host_addr = addr;
 	else if (xen_feature(XENFEAT_auto_translated_physmap))
 		unmap->host_addr = vtophys(addr);
 	else
 		unmap->host_addr = addr;
 
 	unmap->handle = handle;
 	unmap->dev_bus_addr = 0;
 }
 
 static inline void
 gnttab_set_replace_op(struct gnttab_unmap_and_replace *unmap, vm_paddr_t addr,
 		      vm_paddr_t new_addr, grant_handle_t handle)
 {
 	if (xen_feature(XENFEAT_auto_translated_physmap)) {
 		unmap->host_addr = vtophys(addr);
 		unmap->new_addr = vtophys(new_addr);
 	} else {
 		unmap->host_addr = addr;
 		unmap->new_addr = new_addr;
 	}
 
 	unmap->handle = handle;
 }
 #endif
 
 #endif /* __ASM_GNTTAB_H__ */
diff --git a/sys/xen/hvm.h b/sys/xen/hvm.h
index 338a107ab84d..562aaf93ab05 100644
--- a/sys/xen/hvm.h
+++ b/sys/xen/hvm.h
@@ -1,94 +1,98 @@
 /*
  * Permission is hereby granted, free of charge, to any person obtaining a copy
  * of this software and associated documentation files (the "Software"), to
  * deal in the Software without restriction, including without limitation the
  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
  * sell copies of the Software, and to permit persons to whom the Software is
  * furnished to do so, subject to the following conditions:
  *
  * The above copyright notice and this permission notice shall be included in
  * all copies or substantial portions of the Software.
  *
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  * DEALINGS IN THE SOFTWARE.
  *
  * $FreeBSD$
  */
 
 #ifndef	__XEN_HVM_H__
 #define	__XEN_HVM_H__
 
+#include <xen/xen-os.h>
+#include <xen/hypervisor.h>
+
 #include <xen/interface/hvm/params.h>
 
 /**
  * \brief Wrapper function to obtain a HVM parameter value.
  *
  * \param index	HVM parameter index; see <xen/interface/hvm/params.h>.
  * 
  * \returns	0 on failure; the value of the parameter otherwise.
  */
 static inline unsigned long
 hvm_get_parameter(int index)
 {
 	struct xen_hvm_param xhv;
 	int error;
 
 	xhv.domid = DOMID_SELF;
 	xhv.index = index;
 	error = HYPERVISOR_hvm_op(HVMOP_get_param, &xhv);
 	if (error) {
 		printf("%s: error %d trying to get %d\n", __func__,
 		    error, index);
 		return (0);
 	}
 	return (xhv.value);
 }
 
 /** The callback method types for Hypervisor event delivery to our domain. */
 enum {
 	HVM_CB_TYPE_GSI,
 	HVM_CB_TYPE_PCI_INTX,
 	HVM_CB_TYPE_VECTOR,
 	HVM_CB_TYPE_MASK  = 0xFF,
 	HVM_CB_TYPE_SHIFT = 56
 };
 
 /** Format for specifying a GSI type callback. */
 enum {
 	HVM_CB_GSI_GSI_MASK  = 0xFFFFFFFF,
 	HVM_CB_GSI_GSI_SHIFT = 0
 };
 #define HVM_CALLBACK_GSI(gsi) \
     (((uint64_t)HVM_CB_TYPE_GSI << HVM_CB_TYPE_SHIFT) \
    | ((gsi) & HVM_CB_GSI_GSI_MASK) << HVM_CB_GSI_GSI_SHIFT)
 
 /** Format for specifying a virtual PCI interrupt line GSI style callback. */
 enum {
 	HVM_CB_PCI_INTX_INTPIN_MASK  = 0x3,
 	HVM_CB_PCI_INTX_INTPIN_SHIFT = 0,
 	HVM_CB_PCI_INTX_SLOT_MASK    = 0x1F,
 	HVM_CB_PCI_INTX_SLOT_SHIFT   = 11,
 };
 #define HVM_CALLBACK_PCI_INTX(slot, pin) \
     (((uint64_t)HVM_CB_TYPE_PCI_INTX << HVM_CB_TYPE_SHIFT) \
    | (((slot) & HVM_CB_PCI_INTX_SLOT_MASK) << HVM_CB_PCI_INTX_SLOT_SHIFT) \
    | (((pin) & HVM_CB_PCI_INTX_INTPIN_MASK) << HVM_CB_PCI_INTX_INTPIN_SHIFT))
 
 /** Format for specifying a direct IDT vector injection style callback. */
 enum {
 	HVM_CB_VECTOR_VECTOR_MASK  = 0xFFFFFFFF,
 	HVM_CB_VECTOR_VECTOR_SHIFT = 0
 };
 #define HVM_CALLBACK_VECTOR(vector) \
     (((uint64_t)HVM_CB_TYPE_VECTOR << HVM_CB_TYPE_SHIFT) \
    | (((vector) & HVM_CB_GSI_GSI_MASK) << HVM_CB_GSI_GSI_SHIFT))
 
 void xen_hvm_set_callback(device_t);
 void xen_hvm_suspend(void);
 void xen_hvm_resume(void);
+void xen_hvm_init_cpu(void);
 #endif	/* __XEN_HVM_H__ */
diff --git a/sys/xen/interface/event_channel.h b/sys/xen/interface/event_channel.h
index 07ff32194b40..65d19113fc23 100644
--- a/sys/xen/interface/event_channel.h
+++ b/sys/xen/interface/event_channel.h
@@ -1,294 +1,297 @@
 /******************************************************************************
  * event_channel.h
  *
  * Event channels between domains.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a copy
  * of this software and associated documentation files (the "Software"), to
  * deal in the Software without restriction, including without limitation the
  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
  * sell copies of the Software, and to permit persons to whom the Software is
  * furnished to do so, subject to the following conditions:
  *
  * The above copyright notice and this permission notice shall be included in
  * all copies or substantial portions of the Software.
  *
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  * DEALINGS IN THE SOFTWARE.
  *
  * Copyright (c) 2003-2004, K A Fraser.
  */
 
 #ifndef __XEN_PUBLIC_EVENT_CHANNEL_H__
 #define __XEN_PUBLIC_EVENT_CHANNEL_H__
 
 #include "xen.h"
 
 /*
  * `incontents 150 evtchn Event Channels
  *
  * Event channels are the basic primitive provided by Xen for event
  * notifications. An event is the Xen equivalent of a hardware
  * interrupt. They essentially store one bit of information, the event
  * of interest is signalled by transitioning this bit from 0 to 1.
  *
  * Notifications are received by a guest via an upcall from Xen,
  * indicating when an event arrives (setting the bit). Further
  * notifications are masked until the bit is cleared again (therefore,
  * guests must check the value of the bit after re-enabling event
  * delivery to ensure no missed notifications).
  *
  * Event notifications can be masked by setting a flag; this is
  * equivalent to disabling interrupts and can be used to ensure
  * atomicity of certain operations in the guest kernel.
  *
  * Event channels are represented by the evtchn_* fields in
  * struct shared_info and struct vcpu_info.
  */
 
 /*
  * ` enum neg_errnoval
  * ` HYPERVISOR_event_channel_op(enum event_channel_op cmd, void *args)
  * `
  * @cmd  == EVTCHNOP_* (event-channel operation).
  * @args == struct evtchn_* Operation-specific extra arguments (NULL if none).
  */
 
 /* ` enum event_channel_op { // EVTCHNOP_* => struct evtchn_* */
 #define EVTCHNOP_bind_interdomain 0
 #define EVTCHNOP_bind_virq        1
 #define EVTCHNOP_bind_pirq        2
 #define EVTCHNOP_close            3
 #define EVTCHNOP_send             4
 #define EVTCHNOP_status           5
 #define EVTCHNOP_alloc_unbound    6
 #define EVTCHNOP_bind_ipi         7
 #define EVTCHNOP_bind_vcpu        8
 #define EVTCHNOP_unmask           9
 #define EVTCHNOP_reset           10
 /* ` } */
 
+#ifndef __XEN_EVTCHN_PORT_DEFINED__
 typedef uint32_t evtchn_port_t;
 DEFINE_XEN_GUEST_HANDLE(evtchn_port_t);
+#define __XEN_EVTCHN_PORT_DEFINED__ 1
+#endif
 
 /*
  * EVTCHNOP_alloc_unbound: Allocate a port in domain <dom> and mark as
  * accepting interdomain bindings from domain <remote_dom>. A fresh port
  * is allocated in <dom> and returned as <port>.
  * NOTES:
  *  1. If the caller is unprivileged then <dom> must be DOMID_SELF.
  *  2. <rdom> may be DOMID_SELF, allowing loopback connections.
  */
 struct evtchn_alloc_unbound {
     /* IN parameters */
     domid_t dom, remote_dom;
     /* OUT parameters */
     evtchn_port_t port;
 };
 typedef struct evtchn_alloc_unbound evtchn_alloc_unbound_t;
 
 /*
  * EVTCHNOP_bind_interdomain: Construct an interdomain event channel between
  * the calling domain and <remote_dom>. <remote_dom,remote_port> must identify
  * a port that is unbound and marked as accepting bindings from the calling
  * domain. A fresh port is allocated in the calling domain and returned as
  * <local_port>.
  * NOTES:
  *  1. <remote_dom> may be DOMID_SELF, allowing loopback connections.
  */
 struct evtchn_bind_interdomain {
     /* IN parameters. */
     domid_t remote_dom;
     evtchn_port_t remote_port;
     /* OUT parameters. */
     evtchn_port_t local_port;
 };
 typedef struct evtchn_bind_interdomain evtchn_bind_interdomain_t;
 
 /*
  * EVTCHNOP_bind_virq: Bind a local event channel to VIRQ <irq> on specified
  * vcpu.
  * NOTES:
  *  1. Virtual IRQs are classified as per-vcpu or global. See the VIRQ list
  *     in xen.h for the classification of each VIRQ.
  *  2. Global VIRQs must be allocated on VCPU0 but can subsequently be
  *     re-bound via EVTCHNOP_bind_vcpu.
  *  3. Per-vcpu VIRQs may be bound to at most one event channel per vcpu.
  *     The allocated event channel is bound to the specified vcpu and the
  *     binding cannot be changed.
  */
 struct evtchn_bind_virq {
     /* IN parameters. */
     uint32_t virq; /* enum virq */
     uint32_t vcpu;
     /* OUT parameters. */
     evtchn_port_t port;
 };
 typedef struct evtchn_bind_virq evtchn_bind_virq_t;
 
 /*
  * EVTCHNOP_bind_pirq: Bind a local event channel to a real IRQ (PIRQ <irq>).
  * NOTES:
  *  1. A physical IRQ may be bound to at most one event channel per domain.
  *  2. Only a sufficiently-privileged domain may bind to a physical IRQ.
  */
 struct evtchn_bind_pirq {
     /* IN parameters. */
     uint32_t pirq;
 #define BIND_PIRQ__WILL_SHARE 1
     uint32_t flags; /* BIND_PIRQ__* */
     /* OUT parameters. */
     evtchn_port_t port;
 };
 typedef struct evtchn_bind_pirq evtchn_bind_pirq_t;
 
 /*
  * EVTCHNOP_bind_ipi: Bind a local event channel to receive events.
  * NOTES:
  *  1. The allocated event channel is bound to the specified vcpu. The binding
  *     may not be changed.
  */
 struct evtchn_bind_ipi {
     uint32_t vcpu;
     /* OUT parameters. */
     evtchn_port_t port;
 };
 typedef struct evtchn_bind_ipi evtchn_bind_ipi_t;
 
 /*
  * EVTCHNOP_close: Close a local event channel <port>. If the channel is
  * interdomain then the remote end is placed in the unbound state
  * (EVTCHNSTAT_unbound), awaiting a new connection.
  */
 struct evtchn_close {
     /* IN parameters. */
     evtchn_port_t port;
 };
 typedef struct evtchn_close evtchn_close_t;
 
 /*
  * EVTCHNOP_send: Send an event to the remote end of the channel whose local
  * endpoint is <port>.
  */
 struct evtchn_send {
     /* IN parameters. */
     evtchn_port_t port;
 };
 typedef struct evtchn_send evtchn_send_t;
 
 /*
  * EVTCHNOP_status: Get the current status of the communication channel which
  * has an endpoint at <dom, port>.
  * NOTES:
  *  1. <dom> may be specified as DOMID_SELF.
  *  2. Only a sufficiently-privileged domain may obtain the status of an event
  *     channel for which <dom> is not DOMID_SELF.
  */
 struct evtchn_status {
     /* IN parameters */
     domid_t  dom;
     evtchn_port_t port;
     /* OUT parameters */
 #define EVTCHNSTAT_closed       0  /* Channel is not in use.                 */
 #define EVTCHNSTAT_unbound      1  /* Channel is waiting interdom connection.*/
 #define EVTCHNSTAT_interdomain  2  /* Channel is connected to remote domain. */
 #define EVTCHNSTAT_pirq         3  /* Channel is bound to a phys IRQ line.   */
 #define EVTCHNSTAT_virq         4  /* Channel is bound to a virtual IRQ line */
 #define EVTCHNSTAT_ipi          5  /* Channel is bound to a virtual IPI line */
     uint32_t status;
     uint32_t vcpu;                 /* VCPU to which this channel is bound.   */
     union {
         struct {
             domid_t dom;
         } unbound;                 /* EVTCHNSTAT_unbound */
         struct {
             domid_t dom;
             evtchn_port_t port;
         } interdomain;             /* EVTCHNSTAT_interdomain */
         uint32_t pirq;             /* EVTCHNSTAT_pirq        */
         uint32_t virq;             /* EVTCHNSTAT_virq        */
     } u;
 };
 typedef struct evtchn_status evtchn_status_t;
 
 /*
  * EVTCHNOP_bind_vcpu: Specify which vcpu a channel should notify when an
  * event is pending.
  * NOTES:
  *  1. IPI-bound channels always notify the vcpu specified at bind time.
  *     This binding cannot be changed.
  *  2. Per-VCPU VIRQ channels always notify the vcpu specified at bind time.
  *     This binding cannot be changed.
  *  3. All other channels notify vcpu0 by default. This default is set when
  *     the channel is allocated (a port that is freed and subsequently reused
  *     has its binding reset to vcpu0).
  */
 struct evtchn_bind_vcpu {
     /* IN parameters. */
     evtchn_port_t port;
     uint32_t vcpu;
 };
 typedef struct evtchn_bind_vcpu evtchn_bind_vcpu_t;
 
 /*
  * EVTCHNOP_unmask: Unmask the specified local event-channel port and deliver
  * a notification to the appropriate VCPU if an event is pending.
  */
 struct evtchn_unmask {
     /* IN parameters. */
     evtchn_port_t port;
 };
 typedef struct evtchn_unmask evtchn_unmask_t;
 
 /*
  * EVTCHNOP_reset: Close all event channels associated with specified domain.
  * NOTES:
  *  1. <dom> may be specified as DOMID_SELF.
  *  2. Only a sufficiently-privileged domain may specify other than DOMID_SELF.
  */
 struct evtchn_reset {
     /* IN parameters. */
     domid_t dom;
 };
 typedef struct evtchn_reset evtchn_reset_t;
 
 /*
  * ` enum neg_errnoval
  * ` HYPERVISOR_event_channel_op_compat(struct evtchn_op *op)
  * `
  * Superceded by new event_channel_op() hypercall since 0x00030202.
  */
 struct evtchn_op {
     uint32_t cmd; /* enum event_channel_op */
     union {
         struct evtchn_alloc_unbound    alloc_unbound;
         struct evtchn_bind_interdomain bind_interdomain;
         struct evtchn_bind_virq        bind_virq;
         struct evtchn_bind_pirq        bind_pirq;
         struct evtchn_bind_ipi         bind_ipi;
         struct evtchn_close            close;
         struct evtchn_send             send;
         struct evtchn_status           status;
         struct evtchn_bind_vcpu        bind_vcpu;
         struct evtchn_unmask           unmask;
     } u;
 };
 typedef struct evtchn_op evtchn_op_t;
 DEFINE_XEN_GUEST_HANDLE(evtchn_op_t);
 
 #endif /* __XEN_PUBLIC_EVENT_CHANNEL_H__ */
 
 /*
  * Local variables:
  * mode: C
  * c-set-style: "BSD"
  * c-basic-offset: 4
  * tab-width: 4
  * indent-tabs-mode: nil
  * End:
  */
diff --git a/sys/xen/xen-os.h b/sys/xen/xen-os.h
new file mode 100644
index 000000000000..95e8c6a3a4b7
--- /dev/null
+++ b/sys/xen/xen-os.h
@@ -0,0 +1,95 @@
+/******************************************************************************
+ * xen/xen-os.h
+ * 
+ * Random collection of macros and definition
+ *
+ * Copyright (c) 2003, 2004 Keir Fraser (on behalf of the Xen team)
+ * All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ * 
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _XEN_XEN_OS_H_
+#define _XEN_XEN_OS_H_
+
+#if !defined(__XEN_INTERFACE_VERSION__)  
+#define  __XEN_INTERFACE_VERSION__ 0x00030208
+#endif  
+
+#define GRANT_REF_INVALID   0xffffffff
+
+#ifdef LOCORE
+#define __ASSEMBLY__
+#endif
+
+#include <machine/xen/xen-os.h>
+
+#include <xen/interface/xen.h>
+
+/* Everything below this point is not included by assembler (.S) files. */
+#ifndef __ASSEMBLY__
+
+/* Force a proper event-channel callback from Xen. */
+void force_evtchn_callback(void);
+
+extern int gdtset;
+
+extern shared_info_t *HYPERVISOR_shared_info;
+
+enum xen_domain_type {
+	XEN_NATIVE,             /* running on bare hardware    */
+	XEN_PV_DOMAIN,          /* running in a PV domain      */
+	XEN_HVM_DOMAIN,         /* running in a Xen hvm domain */
+};
+
+extern enum xen_domain_type xen_domain_type;
+
+static inline int
+xen_domain(void)
+{
+	return (xen_domain_type != XEN_NATIVE);
+}
+
+static inline int
+xen_pv_domain(void)
+{
+	return (xen_domain_type == XEN_PV_DOMAIN);
+}
+
+static inline int
+xen_hvm_domain(void)
+{
+	return (xen_domain_type == XEN_HVM_DOMAIN);
+}
+
+#ifndef xen_mb
+#define xen_mb() mb()
+#endif
+#ifndef xen_rmb
+#define xen_rmb() rmb()
+#endif
+#ifndef xen_wmb
+#define xen_wmb() wmb()
+#endif
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* _XEN_XEN_OS_H_ */
diff --git a/sys/xen/xen_intr.h b/sys/xen/xen_intr.h
index 2e753e65ecb3..109608ffa81f 100644
--- a/sys/xen/xen_intr.h
+++ b/sys/xen/xen_intr.h
@@ -1,103 +1,216 @@
-/* -*-  Mode:C; c-basic-offset:4; tab-width:4 -*- */
+/******************************************************************************
+ * xen_intr.h
+ * 
+ * APIs for managing Xen event channel, virtual IRQ, and physical IRQ
+ * notifications.
+ * 
+ * Copyright (c) 2004, K A Fraser
+ * Copyright (c) 2012, Spectra Logic Corporation
+ *
+ * This file may be distributed separately from the Linux kernel, or
+ * incorporated into other software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * $FreeBSD$
+ */
 #ifndef _XEN_INTR_H_
 #define _XEN_INTR_H_
 
-/*
-* The flat IRQ space is divided into two regions:
-*  1. A one-to-one mapping of real physical IRQs. This space is only used
-*     if we have physical device-access privilege. This region is at the 
-*     start of the IRQ space so that existing device drivers do not need
-*     to be modified to translate physical IRQ numbers into our IRQ space.
-*  3. A dynamic mapping of inter-domain and Xen-sourced virtual IRQs. These
-*     are bound using the provided bind/unbind functions.
-*
-*
-* $FreeBSD$
-*/
-
-#define PIRQ_BASE   0
-#define NR_PIRQS  128
-
-#define DYNIRQ_BASE (PIRQ_BASE + NR_PIRQS)
-#define NR_DYNIRQS  128
-
-#define NR_IRQS   (NR_PIRQS + NR_DYNIRQS)
-
-#define pirq_to_irq(_x)   ((_x) + PIRQ_BASE)
-#define irq_to_pirq(_x)   ((_x) - PIRQ_BASE)
-
-#define dynirq_to_irq(_x) ((_x) + DYNIRQ_BASE)
-#define irq_to_dynirq(_x) ((_x) - DYNIRQ_BASE)
-
-/* 
- * Dynamic binding of event channels and VIRQ sources to guest IRQ space.
+#ifndef __XEN_EVTCHN_PORT_DEFINED__
+typedef uint32_t evtchn_port_t;
+DEFINE_XEN_GUEST_HANDLE(evtchn_port_t);
+#define __XEN_EVTCHN_PORT_DEFINED__ 1
+#endif
+
+/** Registered Xen interrupt callback handle. */
+typedef void * xen_intr_handle_t;
+
+/** If non-zero, the hypervisor has been configured to use a direct vector */
+extern int xen_vector_callback_enabled;
+
+/**
+ * Associate an already allocated local event channel port an interrupt
+ * handler.
+ *
+ * \param dev         The device making this bind request.
+ * \param local_port  The event channel to bind.
+ * \param filter      An interrupt filter handler.  Specify NULL
+ *                    to always dispatch to the ithread handler.
+ * \param handler     An interrupt ithread handler.  Optional (can
+ *                    specify NULL) if all necessary event actions
+ *                    are performed by filter.
+ * \param arg         Argument to present to both filter and handler.
+ * \param irqflags    Interrupt handler flags.  See sys/bus.h.
+ * \param handlep     Pointer to an opaque handle used to manage this
+ *                    registration.
+ *
+ * \returns  0 on success, otherwise an errno.
  */
-
-/*
- * Bind a caller port event channel to an interrupt handler. If
- * successful, the guest IRQ number is returned in *irqp. Return zero
- * on success or errno otherwise.
+int xen_intr_bind_local_port(device_t dev, evtchn_port_t local_port,
+	driver_filter_t filter, driver_intr_t handler, void *arg,
+	enum intr_type irqflags, xen_intr_handle_t *handlep);
+
+/**
+ * Allocate a local event channel port, accessible by the specified
+ * remote/foreign domain and, if successful, associate the port with
+ * the specified interrupt handler.
+ *
+ * \param dev            The device making this bind request.
+ * \param remote_domain  Remote domain grant permission to signal the
+ *                       newly allocated local port.
+ * \param filter         An interrupt filter handler.  Specify NULL
+ *                       to always dispatch to the ithread handler.
+ * \param handler        An interrupt ithread handler.  Optional (can
+ *                       specify NULL) if all necessary event actions
+ *                       are performed by filter.
+ * \param arg            Argument to present to both filter and handler.
+ * \param irqflags       Interrupt handler flags.  See sys/bus.h.
+ * \param handlep        Pointer to an opaque handle used to manage this
+ *                       registration.
+ *
+ * \returns  0 on success, otherwise an errno.
  */
-extern int bind_caller_port_to_irqhandler(unsigned int caller_port,
-	const char *devname, driver_intr_t handler, void *arg,
-	unsigned long irqflags, unsigned int *irqp);
-
-/*
- * Bind a listening port to an interrupt handler. If successful, the
- * guest IRQ number is returned in *irqp. Return zero on success or
- * errno otherwise.
+int xen_intr_alloc_and_bind_local_port(device_t dev,
+	u_int remote_domain, driver_filter_t filter, driver_intr_t handler,
+	void *arg, enum intr_type irqflags, xen_intr_handle_t *handlep);
+
+/**
+ * Associate the specified interrupt handler with the remote event
+ * channel port specified by remote_domain and remote_port.
+ *
+ * \param dev            The device making this bind request.
+ * \param remote_domain  The domain peer for this event channel connection.
+ * \param remote_port    Remote domain's local port number for this event
+ *                       channel port.
+ * \param filter         An interrupt filter handler.  Specify NULL
+ *                       to always dispatch to the ithread handler.
+ * \param handler        An interrupt ithread handler.  Optional (can
+ *                       specify NULL) if all necessary event actions
+ *                       are performed by filter.
+ * \param arg            Argument to present to both filter and handler.
+ * \param irqflags       Interrupt handler flags.  See sys/bus.h.
+ * \param handlep        Pointer to an opaque handle used to manage this
+ *                       registration.
+ *
+ * \returns  0 on success, otherwise an errno.
  */
-extern int bind_listening_port_to_irqhandler(unsigned int remote_domain,
-	const char *devname, driver_intr_t handler, void *arg,
-	unsigned long irqflags, unsigned int *irqp);
-
-/*
- * Bind a VIRQ to an interrupt handler. If successful, the guest IRQ
- * number is returned in *irqp. Return zero on success or errno
- * otherwise.
+int xen_intr_bind_remote_port(device_t dev, u_int remote_domain,
+	evtchn_port_t remote_port, driver_filter_t filter,
+	driver_intr_t handler, void *arg, enum intr_type irqflags,
+	xen_intr_handle_t *handlep);
+
+/**
+ * Associate the specified interrupt handler with the specified Xen
+ * virtual interrupt source.
+ *
+ * \param dev       The device making this bind request.
+ * \param virq      The Xen virtual IRQ number for the Xen interrupt
+ *                  source being hooked.
+ * \param cpu       The cpu on which interrupt events should be delivered. 
+ * \param filter    An interrupt filter handler.  Specify NULL
+ *                  to always dispatch to the ithread handler.
+ * \param handler   An interrupt ithread handler.  Optional (can
+ *                  specify NULL) if all necessary event actions
+ *                  are performed by filter.
+ * \param arg       Argument to present to both filter and handler.
+ * \param irqflags  Interrupt handler flags.  See sys/bus.h.
+ * \param handlep   Pointer to an opaque handle used to manage this
+ *                  registration.
+ *
+ * \returns  0 on success, otherwise an errno.
  */
-extern int bind_virq_to_irqhandler(unsigned int virq, unsigned int cpu,
-	const char *devname, driver_filter_t filter, driver_intr_t handler,
-	void *arg, unsigned long irqflags,	unsigned int *irqp);
-
-/*
- * Bind an IPI to an interrupt handler. If successful, the guest
- * IRQ number is returned in *irqp. Return zero on success or errno
- * otherwise.
+int xen_intr_bind_virq(device_t dev, u_int virq, u_int cpu,
+	driver_filter_t filter, driver_intr_t handler,
+	void *arg, enum intr_type irqflags, xen_intr_handle_t *handlep);
+
+/**
+ * Associate an interprocessor interrupt vector with an interrupt handler.
+ *
+ * \param dev       The device making this bind request.
+ * \param ipi       The interprocessor interrupt vector number of the
+ *                  interrupt source being hooked.
+ * \param cpu       The cpu receiving the IPI.
+ * \param filter    An interrupt filter handler.  Specify NULL
+ *                  to always dispatch to the ithread handler.
+ * \param irqflags  Interrupt handler flags.  See sys/bus.h.
+ * \param handlep   Pointer to an opaque handle used to manage this
+ *                  registration.
+ *
+ * \returns  0 on success, otherwise an errno.
  */
-extern int bind_ipi_to_irqhandler(unsigned int ipi, unsigned int cpu,
-	const char *devname, driver_filter_t filter,
-	unsigned long irqflags, unsigned int *irqp);
-
-/*
- * Bind an interdomain event channel to an interrupt handler. If
- * successful, the guest IRQ number is returned in *irqp. Return zero
- * on success or errno otherwise.
+int xen_intr_bind_ipi(device_t dev, u_int ipi, u_int cpu,
+	driver_filter_t filter, enum intr_type irqflags,
+	xen_intr_handle_t *handlep);
+
+/**
+ * Unbind an interrupt handler from its interrupt source.
+ *
+ * \param handlep  A pointer to the opaque handle that was initialized
+ *		   at the time the interrupt source was bound.
+ *
+ * \returns  0 on success, otherwise an errno.
+ *
+ * \note  The event channel, if any, that was allocated at bind time is
+ *        closed upon successful return of this method.
+ *
+ * \note  It is always safe to call xen_intr_unbind() on a handle that
+ *        has been initilized to NULL.
  */
-extern int bind_interdomain_evtchn_to_irqhandler(unsigned int remote_domain,
-	unsigned int remote_port, const char *devname,
-	driver_intr_t handler, void *arg,
-	unsigned long irqflags, unsigned int *irqp);
-
-/*
- * Unbind an interrupt handler using the guest IRQ number returned
- * when it was bound.
+void xen_intr_unbind(xen_intr_handle_t *handle);
+
+/**
+ * Add a description to an interrupt handler.
+ *
+ * \param handle  The opaque handle that was initialized at the time
+ *		  the interrupt source was bound.
+ *
+ * \param fmt     The sprintf compatible format string for the description,
+ *                followed by optional sprintf arguments.
+ *
+ * \returns  0 on success, otherwise an errno.
  */
-extern void unbind_from_irqhandler(unsigned int irq);
-
-static __inline__ int irq_cannonicalize(unsigned int irq)
-{
-    return (irq == 2) ? 9 : irq;
-}
-
-extern void disable_irq(unsigned int);
-extern void disable_irq_nosync(unsigned int);
-extern void enable_irq(unsigned int);
-
-extern void irq_suspend(void);
-extern void irq_resume(void);
-
-extern void	idle_block(void);
-extern int	ap_cpu_initclocks(int cpu);
+int
+xen_intr_describe(xen_intr_handle_t port_handle, const char *fmt, ...)
+	__attribute__((format(printf, 2, 3)));
+
+/**
+ * Signal the remote peer of an interrupt source associated with an
+ * event channel port.
+ *
+ * \param handle  The opaque handle that was initialized at the time
+ *                the interrupt source was bound.
+ *
+ * \note  For xen interrupt sources other than event channel ports,
+ *        this method takes no action.
+ */
+void xen_intr_signal(xen_intr_handle_t handle);
+
+/**
+ * Get the local event channel port number associated with this interrupt
+ * source.
+ *
+ * \param handle  The opaque handle that was initialized at the time
+ *                the interrupt source was bound.
+ *
+ * \returns  0 if the handle is invalid, otherwise positive port number.
+ */
+evtchn_port_t xen_intr_port(xen_intr_handle_t handle);
 
 #endif /* _XEN_INTR_H_ */
diff --git a/sys/xen/xenbus/xenbus.c b/sys/xen/xenbus/xenbus.c
index 8887066c7e4b..c59d4aec4532 100644
--- a/sys/xen/xenbus/xenbus.c
+++ b/sys/xen/xenbus/xenbus.c
@@ -1,294 +1,259 @@
 /******************************************************************************
  * Copyright (C) 2005 XenSource Ltd
  * 
  * This file may be distributed separately from the Linux kernel, or
  * incorporated into other software packages, subject to the following license:
  * 
  * Permission is hereby granted, free of charge, to any person obtaining a copy
  * of this source file (the "Software"), to deal in the Software without
  * restriction, including without limitation the rights to use, copy, modify,
  * merge, publish, distribute, sublicense, and/or sell copies of the Software,
  * and to permit persons to whom the Software is furnished to do so, subject to
  * the following conditions:
  * 
  * The above copyright notice and this permission notice shall be included in
  * all copies or substantial portions of the Software.
  * 
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  * IN THE SOFTWARE.
  */
 
 /**
  * \file xenbus.c
  *
  * \brief Client-facing interface for the Xenbus driver.
  *
  * In other words, the interface between the Xenbus and the device-specific
  * code, be it the frontend or the backend of that driver.
  */
 
 #if 0
 #define DPRINTK(fmt, args...) \
     printk("xenbus_client (%s:%d) " fmt ".\n", __FUNCTION__, __LINE__, ##args)
 #else
 #define DPRINTK(fmt, args...) ((void)0)
 #endif
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/cdefs.h>
 #include <sys/param.h>
 #include <sys/kernel.h>
 #include <sys/types.h>
 #include <sys/malloc.h>
 #include <sys/libkern.h>
 #include <sys/sbuf.h>
 
-#include <machine/xen/xen-os.h>
+#include <xen/xen-os.h>
 #include <xen/hypervisor.h>
 #include <xen/evtchn.h>
 #include <xen/gnttab.h>
 #include <xen/xenbus/xenbusvar.h>
+
 #include <machine/stdarg.h>
 
 MALLOC_DEFINE(M_XENBUS, "xenbus", "XenBus Support");
 
 /*------------------------- Private Functions --------------------------------*/
 /**
  * \brief Construct the error path corresponding to the given XenBus
  *        device.
  *
  * \param dev  The XenBus device for which we are constructing an error path.
  *
  * \return  On success, the contructed error path.  Otherwise NULL.
  *
  * It is the caller's responsibility to free any returned error path
  * node using the M_XENBUS malloc type.
  */
 static char *
 error_path(device_t dev)
 {
 	char *path_buffer = malloc(strlen("error/")
 	    + strlen(xenbus_get_node(dev)) + 1,M_XENBUS, M_WAITOK);
 
 	strcpy(path_buffer, "error/");
 	strcpy(path_buffer + strlen("error/"), xenbus_get_node(dev));
 
 	return (path_buffer);
 }
 
 /*--------------------------- Public Functions -------------------------------*/
 /*-------- API comments for these methods can be found in xenbusvar.h --------*/
 const char *
 xenbus_strstate(XenbusState state)
 {
 	static const char *const name[] = {
 		[ XenbusStateUnknown      ] = "Unknown",
 		[ XenbusStateInitialising ] = "Initialising",
 		[ XenbusStateInitWait     ] = "InitWait",
 		[ XenbusStateInitialised  ] = "Initialised",
 		[ XenbusStateConnected    ] = "Connected",
 		[ XenbusStateClosing      ] = "Closing",
 		[ XenbusStateClosed	  ] = "Closed",
 	};
 
 	return ((state < (XenbusStateClosed + 1)) ? name[state] : "INVALID");
 }
 
 int 
 xenbus_watch_path(device_t dev, char *path, struct xs_watch *watch, 
     xs_watch_cb_t *callback, uintptr_t callback_data)
 {
 	int error;
 
 	watch->node = path;
 	watch->callback = callback;
 	watch->callback_data = callback_data;
 
 	error = xs_register_watch(watch);
 
 	if (error) {
 		watch->node = NULL;
 		watch->callback = NULL;
 		xenbus_dev_fatal(dev, error, "adding watch on %s", path);
 	}
 
 	return (error);
 }
 
 int
 xenbus_watch_path2(device_t dev, const char *path,
     const char *path2, struct xs_watch *watch, 
     xs_watch_cb_t *callback, uintptr_t callback_data)
 {
 	int error;
 	char *state = malloc(strlen(path) + 1 + strlen(path2) + 1,
 	   M_XENBUS, M_WAITOK);
 
 	strcpy(state, path);
 	strcat(state, "/");
 	strcat(state, path2);
 
 	error = xenbus_watch_path(dev, state, watch, callback, callback_data);
 	if (error) {
 		free(state,M_XENBUS);
 	}
 
 	return (error);
 }
 
 void
 xenbus_dev_verror(device_t dev, int err, const char *fmt, va_list ap)
 {
 	int ret;
 	unsigned int len;
 	char *printf_buffer = NULL, *path_buffer = NULL;
 
 #define PRINTF_BUFFER_SIZE 4096
 	printf_buffer = malloc(PRINTF_BUFFER_SIZE,M_XENBUS, M_WAITOK);
 
 	len = sprintf(printf_buffer, "%i ", err);
 	ret = vsnprintf(printf_buffer+len, PRINTF_BUFFER_SIZE-len, fmt, ap);
 
 	KASSERT(len + ret <= PRINTF_BUFFER_SIZE-1, ("xenbus error message too big"));
 	device_printf(dev, "Error %s\n", printf_buffer);
 	path_buffer = error_path(dev);
 
 	if (path_buffer == NULL) {
 		printf("xenbus: failed to write error node for %s (%s)\n",
 		       xenbus_get_node(dev), printf_buffer);
 		goto fail;
 	}
 
 	if (xs_write(XST_NIL, path_buffer, "error", printf_buffer) != 0) {
 		printf("xenbus: failed to write error node for %s (%s)\n",
 		       xenbus_get_node(dev), printf_buffer);
 		goto fail;
 	}
 
  fail:
 	if (printf_buffer)
 		free(printf_buffer,M_XENBUS);
 	if (path_buffer)
 		free(path_buffer,M_XENBUS);
 }
 
 void
 xenbus_dev_error(device_t dev, int err, const char *fmt, ...)
 {
 	va_list ap;
 
 	va_start(ap, fmt);
 	xenbus_dev_verror(dev, err, fmt, ap);
 	va_end(ap);
 }
 
 void
 xenbus_dev_vfatal(device_t dev, int err, const char *fmt, va_list ap)
 {
 	xenbus_dev_verror(dev, err, fmt, ap);
 	device_printf(dev, "Fatal error. Transitioning to Closing State\n");
 	xenbus_set_state(dev, XenbusStateClosing);
 }
 
 void
 xenbus_dev_fatal(device_t dev, int err, const char *fmt, ...)
 {
 	va_list ap;
 
 	va_start(ap, fmt);
 	xenbus_dev_vfatal(dev, err, fmt, ap);
 	va_end(ap);
 }
 
 int
 xenbus_grant_ring(device_t dev, unsigned long ring_mfn, grant_ref_t *refp)
 {
 	int error;
 
 	error = gnttab_grant_foreign_access(
 		xenbus_get_otherend_id(dev), ring_mfn, 0, refp);
 	if (error) {
 		xenbus_dev_fatal(dev, error, "granting access to ring page");
 		return (error);
 	}
 
 	return (0);
 }
 
-int
-xenbus_alloc_evtchn(device_t dev, evtchn_port_t *port)
-{
-	struct evtchn_alloc_unbound alloc_unbound;
-	int err;
-
-	alloc_unbound.dom        = DOMID_SELF;
-	alloc_unbound.remote_dom = xenbus_get_otherend_id(dev);
-
-	err = HYPERVISOR_event_channel_op(EVTCHNOP_alloc_unbound,
-					  &alloc_unbound);
-
-	if (err) {
-		xenbus_dev_fatal(dev, -err, "allocating event channel");
-		return (-err);
-	}
-	*port = alloc_unbound.port;
-	return (0);
-}
-
-int
-xenbus_free_evtchn(device_t dev, evtchn_port_t port)
-{
-	struct evtchn_close close;
-	int err;
-
-	close.port = port;
-
-	err = HYPERVISOR_event_channel_op(EVTCHNOP_close, &close);
-	if (err) {
-		xenbus_dev_error(dev, -err, "freeing event channel %d", port);
-		return (-err);
-	}
-	return (0);
-}
-
 XenbusState
 xenbus_read_driver_state(const char *path)
 {
         XenbusState result;
         int error;
 
         error = xs_gather(XST_NIL, path, "state", "%d", &result, NULL);
         if (error)
                 result = XenbusStateClosed;
 
         return (result);
 }
 
 int
 xenbus_dev_is_online(device_t dev)
 {
 	const char *path;
 	int error;
 	int value;
 
 	path = xenbus_get_node(dev);
 	error = xs_gather(XST_NIL, path, "online", "%d", &value, NULL);
 	if (error != 0) {
 		/* Default to not online. */
 		value = 0;
 	}
 
 	return (value);
 }
 
 void
 xenbus_localend_changed(device_t dev, const char *path)
 {
 }
diff --git a/sys/xen/xenbus/xenbus_if.m b/sys/xen/xenbus/xenbus_if.m
index 87d7c7fef9d2..fd9ae51ce1cf 100644
--- a/sys/xen/xenbus/xenbus_if.m
+++ b/sys/xen/xenbus/xenbus_if.m
@@ -1,63 +1,64 @@
 #-
 # Copyright (c) 2008 Doug Rabson
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions
 # are met:
 # 1. Redistributions of source code must retain the above copyright
 #    notice, this list of conditions and the following disclaimer.
 # 2. Redistributions in binary form must reproduce the above copyright
 #    notice, this list of conditions and the following disclaimer in the
 #    documentation and/or other materials provided with the distribution.
 #
 # THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 # ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 # OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 # OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 # SUCH DAMAGE.
 #
 # $FreeBSD$
 #
 
 #include <sys/bus.h>
 
 #include <machine/atomic.h>
-#include <machine/xen/xen-os.h>
+
+#include <xen/xen-os.h>
 #include <xen/evtchn.h>
 #include <xen/xenbus/xenbusvar.h>
 
 INTERFACE xenbus;
 
 /**
  * \brief Callback triggered when the state of the otherend
  *        of a split device changes.
  *
  * \param _dev       NewBus device_t for this XenBus device whose otherend's
  *                   state has changed..
  * \param _newstate  The new state of the otherend device.
  */
 METHOD void otherend_changed {
 	device_t _dev;
 	enum xenbus_state _newstate;
 };
 
 /**
  * \brief Callback triggered when the XenStore tree of the local end
  *        of a split device changes.
  *
  * \param _dev   NewBus device_t for this XenBus device whose otherend's
  *               state has changed..
  * \param _path  The tree relative sub-path to the modified node.  The empty
  *               string indicates the root of the tree was destroyed.
  */
 METHOD void localend_changed {
 	device_t _dev;
 	const char * _path;
 } DEFAULT xenbus_localend_changed;
diff --git a/sys/xen/xenbus/xenbusb_front.c b/sys/xen/xenbus/xenbusb_front.c
index 818e7f0a2cc6..145d52718868 100644
--- a/sys/xen/xenbus/xenbusb_front.c
+++ b/sys/xen/xenbus/xenbusb_front.c
@@ -1,196 +1,196 @@
 /******************************************************************************
  * Talks to Xen Store to figure out what devices we have.
  *
  * Copyright (C) 2009, 2010 Spectra Logic Corporation
  * Copyright (C) 2008 Doug Rabson
  * Copyright (C) 2005 Rusty Russell, IBM Corporation
  * Copyright (C) 2005 Mike Wray, Hewlett-Packard
  * Copyright (C) 2005 XenSource Ltd
  * 
  * This file may be distributed separately from the Linux kernel, or
  * incorporated into other software packages, subject to the following license:
  * 
  * Permission is hereby granted, free of charge, to any person obtaining a copy
  * of this source file (the "Software"), to deal in the Software without
  * restriction, including without limitation the rights to use, copy, modify,
  * merge, publish, distribute, sublicense, and/or sell copies of the Software,
  * and to permit persons to whom the Software is furnished to do so, subject to
  * the following conditions:
  * 
  * The above copyright notice and this permission notice shall be included in
  * all copies or substantial portions of the Software.
  * 
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  * IN THE SOFTWARE.
  */
 
 /**
  * \file xenbusb_front.c
  *
  * XenBus management of the NewBus bus containing the frontend instances of
  * Xen split devices.
  */
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/bus.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/module.h>
 #include <sys/sbuf.h>
 #include <sys/sysctl.h>
 #include <sys/syslog.h>
 #include <sys/systm.h>
 #include <sys/sx.h>
 #include <sys/taskqueue.h>
 
-#include <machine/xen/xen-os.h>
 #include <machine/stdarg.h>
 
+#include <xen/xen-os.h>
 #include <xen/gnttab.h>
 #include <xen/xenbus/xenbusvar.h>
 #include <xen/xenbus/xenbusb.h>
 
 
 /*------------------ Private Device Attachment Functions  --------------------*/
 /**
  * \brief Probe for the existance of the XenBus front bus.
  *
  * \param dev  NewBus device_t for this XenBus front bus instance.
  *
  * \return  Always returns 0 indicating success.
  */
 static int 
 xenbusb_front_probe(device_t dev)
 {
 	device_set_desc(dev, "Xen Frontend Devices");
 
 	return (0);
 }
 
 /**
  * \brief Attach the XenBus front bus.
  *
  * \param dev  NewBus device_t for this XenBus front bus instance.
  *
  * \return  On success, 0. Otherwise an errno value indicating the
  *          type of failure.
  */
 static int
 xenbusb_front_attach(device_t dev)
 {
 	return (xenbusb_attach(dev, "device", /*id_components*/1));
 }
 
 /**
  * \brief Enumerate all devices of the given type on this bus.
  *
  * \param dev   NewBus device_t for this XenBus front bus instance.
  * \param type  String indicating the device sub-tree (e.g. "vfb", "vif")
  *              to enumerate. 
  *
  * \return  On success, 0. Otherwise an errno value indicating the
  *          type of failure.
  *
  * Devices that are found are entered into the NewBus hierarchy via
  * xenbusb_add_device().  xenbusb_add_device() ignores duplicate detects
  * and ignores duplicate devices, so it can be called unconditionally
  * for any device found in the XenStore.
  */
 static int
 xenbusb_front_enumerate_type(device_t dev, const char *type)
 {
 	struct xenbusb_softc *xbs;
 	const char **dir;
 	unsigned int i, count;
 	int error;
 
 	xbs = device_get_softc(dev);
 	error = xs_directory(XST_NIL, xbs->xbs_node, type, &count, &dir);
 	if (error)
 		return (error);
 	for (i = 0; i < count; i++)
 		xenbusb_add_device(dev, type, dir[i]);
 
 	free(dir, M_XENSTORE);
 
 	return (0);
 }
 
 /**
  * \brief Determine and store the XenStore path for the other end of
  *        a split device whose local end is represented by ivars.
  *
  * If successful, the xd_otherend_path field of the child's instance
  * variables will be updated.
  *
  * \param dev    NewBus device_t for this XenBus front bus instance.
  * \param ivars  Instance variables from the XenBus child device for
  *               which to perform this function.
  *
  * \return  On success, 0. Otherwise an errno value indicating the
  *          type of failure.
  */
 static int
 xenbusb_front_get_otherend_node(device_t dev, struct xenbus_device_ivars *ivars)
 {
 	char *otherend_path;
 	int error;
 
 	if (ivars->xd_otherend_path != NULL) {
 		free(ivars->xd_otherend_path, M_XENBUS);
 		ivars->xd_otherend_path = NULL;
 	}
 		
 	error = xs_gather(XST_NIL, ivars->xd_node,
 	    "backend-id", "%i", &ivars->xd_otherend_id,
 	    "backend", NULL, &otherend_path,
 	    NULL);
 
 	if (error == 0) {
 		ivars->xd_otherend_path = strdup(otherend_path, M_XENBUS);
 		ivars->xd_otherend_path_len = strlen(otherend_path);
 		free(otherend_path, M_XENSTORE);
 	}
 	return (error);
 }
 
 /*-------------------- Private Device Attachment Data  -----------------------*/
 static device_method_t xenbusb_front_methods[] = { 
 	/* Device interface */ 
 	DEVMETHOD(device_identify,	xenbusb_identify),
 	DEVMETHOD(device_probe,         xenbusb_front_probe), 
 	DEVMETHOD(device_attach,        xenbusb_front_attach), 
 	DEVMETHOD(device_detach,        bus_generic_detach), 
 	DEVMETHOD(device_shutdown,      bus_generic_shutdown), 
 	DEVMETHOD(device_suspend,       bus_generic_suspend), 
 	DEVMETHOD(device_resume,        xenbusb_resume), 
  
 	/* Bus Interface */ 
 	DEVMETHOD(bus_print_child,      xenbusb_print_child),
 	DEVMETHOD(bus_read_ivar,        xenbusb_read_ivar), 
 	DEVMETHOD(bus_write_ivar,       xenbusb_write_ivar), 
 	DEVMETHOD(bus_alloc_resource,   bus_generic_alloc_resource),
 	DEVMETHOD(bus_release_resource, bus_generic_release_resource),
 	DEVMETHOD(bus_activate_resource, bus_generic_activate_resource),
 	DEVMETHOD(bus_deactivate_resource, bus_generic_deactivate_resource),
  
 	/* XenBus Bus Interface */
 	DEVMETHOD(xenbusb_enumerate_type, xenbusb_front_enumerate_type),
 	DEVMETHOD(xenbusb_get_otherend_node, xenbusb_front_get_otherend_node),
 	{ 0, 0 } 
 }; 
 
 DEFINE_CLASS_0(xenbusb_front, xenbusb_front_driver, xenbusb_front_methods,
 	       sizeof(struct xenbusb_softc));
 devclass_t xenbusb_front_devclass; 
  
 DRIVER_MODULE(xenbusb_front, xenstore, xenbusb_front_driver,
 	      xenbusb_front_devclass, 0, 0);
diff --git a/sys/xen/xenbus/xenbusvar.h b/sys/xen/xenbus/xenbusvar.h
index 1c730fb900a5..ab5d01fa6b64 100644
--- a/sys/xen/xenbus/xenbusvar.h
+++ b/sys/xen/xenbus/xenbusvar.h
@@ -1,308 +1,275 @@
 /******************************************************************************
  * Copyright (C) 2005 Rusty Russell, IBM Corporation
  * Copyright (C) 2005 XenSource Ltd.
  * 
  * This file may be distributed separately from the Linux kernel, or
  * incorporated into other software packages, subject to the following license:
  * 
  * Permission is hereby granted, free of charge, to any person obtaining a copy
  * of this source file (the "Software"), to deal in the Software without
  * restriction, including without limitation the rights to use, copy, modify,
  * merge, publish, distribute, sublicense, and/or sell copies of the Software,
  * and to permit persons to whom the Software is furnished to do so, subject to
  * the following conditions:
  * 
  * The above copyright notice and this permission notice shall be included in
  * all copies or substantial portions of the Software.
  * 
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  * IN THE SOFTWARE.
  *
  * $FreeBSD$
  */
 
 /**
  * \file xenbusvar.h
  *
  * \brief Datastructures and function declarations for usedby device
  *        drivers operating on the XenBus.
  */
 
 #ifndef _XEN_XENBUS_XENBUSVAR_H
 #define _XEN_XENBUS_XENBUSVAR_H
 
 #include <sys/queue.h>
 #include <sys/bus.h>
 #include <sys/eventhandler.h>
 #include <sys/malloc.h>
 #include <sys/sbuf.h>
 
 #include <machine/stdarg.h>
-#include <machine/xen/xen-os.h>
 
+#include <xen/xen-os.h>
 #include <xen/interface/grant_table.h>
 #include <xen/interface/io/xenbus.h>
 #include <xen/interface/io/xs_wire.h>
 
 #include <xen/xenstore/xenstorevar.h>
 
 /* XenBus allocations including XenStore data returned to clients. */
 MALLOC_DECLARE(M_XENBUS);
 
 enum {
 	/**
 	 * Path of this device node.
 	 */
 	XENBUS_IVAR_NODE,
 
 	/**
 	 * The device type (e.g. vif, vbd).
 	 */
 	XENBUS_IVAR_TYPE,
 
 	/**
 	 * The state of this device (not the otherend's state).
 	 */
 	XENBUS_IVAR_STATE,
 
 	/**
 	 * Domain ID of the other end device.
 	 */
 	XENBUS_IVAR_OTHEREND_ID,
 
 	/**
 	 * Path of the other end device.
 	 */
 	XENBUS_IVAR_OTHEREND_PATH
 };
 
 /**
  * Simplified accessors for xenbus devices
  */
 #define	XENBUS_ACCESSOR(var, ivar, type) \
 	__BUS_ACCESSOR(xenbus, var, XENBUS, ivar, type)
 
 XENBUS_ACCESSOR(node,		NODE,			const char *)
 XENBUS_ACCESSOR(type,		TYPE,			const char *)
 XENBUS_ACCESSOR(state,		STATE,			enum xenbus_state)
 XENBUS_ACCESSOR(otherend_id,	OTHEREND_ID,		int)
 XENBUS_ACCESSOR(otherend_path,	OTHEREND_PATH,		const char *)
 
 /**
  * Return the state of a XenBus device.
  *
  * \param path  The root XenStore path for the device.
  *
  * \return  The current state of the device or XenbusStateClosed if no
  *	    state can be read.
  */
 XenbusState xenbus_read_driver_state(const char *path);
 
 /**
  * Return the state of the "other end" (peer) of a XenBus device.
  *
  * \param dev   The XenBus device whose peer to query.
  *
  * \return  The current state of the peer device or XenbusStateClosed if no
  *          state can be read.
  */
 static inline XenbusState
 xenbus_get_otherend_state(device_t dev)
 {
 	return (xenbus_read_driver_state(xenbus_get_otherend_path(dev)));
 }
 
 /**
  * Initialize and register a watch on the given path (client suplied storage).
  *
  * \param dev       The XenBus device requesting the watch service.
  * \param path      The XenStore path of the object to be watched.  The
  *                  storage for this string must be stable for the lifetime
  *                  of the watch.
  * \param watch     The watch object to use for this request.  This object
  *                  must be stable for the lifetime of the watch.
  * \param callback  The function to call when XenStore objects at or below
  *                  path are modified.
  * \param cb_data   Client data that can be retrieved from the watch object
  *                  during the callback.
  *
  * \return  On success, 0. Otherwise an errno value indicating the
  *          type of failure.
  *
  * \note  On error, the device 'dev' will be switched to the XenbusStateClosing
  *        state and the returned error is saved in the per-device error node
  *        for dev in the XenStore.
  */
 int xenbus_watch_path(device_t dev, char *path,
 		      struct xs_watch *watch, 
 		      xs_watch_cb_t *callback,
 		      uintptr_t cb_data);
 
 /**
  * Initialize and register a watch at path/path2 in the XenStore.
  *
  * \param dev       The XenBus device requesting the watch service.
  * \param path      The base XenStore path of the object to be watched.
  * \param path2     The tail XenStore path of the object to be watched.
  * \param watch     The watch object to use for this request.  This object
  *                  must be stable for the lifetime of the watch.
  * \param callback  The function to call when XenStore objects at or below
  *                  path are modified.
  * \param cb_data   Client data that can be retrieved from the watch object
  *                  during the callback.
  *
  * \return  On success, 0. Otherwise an errno value indicating the
  *          type of failure.
  *
  * \note  On error, \a dev will be switched to the XenbusStateClosing
  *        state and the returned error is saved in the per-device error node
  *        for \a dev in the XenStore.
  *
  * Similar to xenbus_watch_path, however the storage for the path to the
  * watched object is allocated from the heap and filled with "path '/' path2".
  * Should a call to this function succeed, it is the callers responsibility
  * to free watch->node using the M_XENBUS malloc type.
  */
 int xenbus_watch_path2(device_t dev, const char *path,
 		       const char *path2, struct xs_watch *watch, 
 		       xs_watch_cb_t *callback,
 		       uintptr_t cb_data);
 
 /**
  * Grant access to the given ring_mfn to the peer of the given device.
  *
  * \param dev        The device granting access to the ring page.
  * \param ring_mfn   The guest machine page number of the page to grant
  *                   peer access rights.
  * \param refp[out]  The grant reference for the page.
  *
  * \return  On success, 0. Otherwise an errno value indicating the
  *          type of failure.
  *
  * A successful call to xenbus_grant_ring should be paired with a call
  * to gnttab_end_foreign_access() when foregn access to this page is no
  * longer requried.
  * 
  * \note  On error, \a dev will be switched to the XenbusStateClosing
  *        state and the returned error is saved in the per-device error node
  *        for \a dev in the XenStore.
  */
 int xenbus_grant_ring(device_t dev, unsigned long ring_mfn, grant_ref_t *refp);
 
-/**
- * Allocate an event channel for the given XenBus device.
- *
- * \param dev        The device for which to allocate the event channel.
- * \param port[out]  The port identifier for the allocated event channel.
- *
- * \return  On success, 0. Otherwise an errno value indicating the
- *          type of failure.
- *
- * A successfully allocated event channel should be free'd using
- * xenbus_free_evtchn().
- *
- * \note  On error, \a dev will be switched to the XenbusStateClosing
- *        state and the returned error is saved in the per-device error node
- *        for \a dev in the XenStore.
- */
-int xenbus_alloc_evtchn(device_t dev, evtchn_port_t *port);
-
-/**
- * Free an existing event channel.
- *
- * \param dev   The device which allocated this event channel.
- * \param port  The port identifier for the event channel to free.
- *
- * \return  On success, 0. Otherwise an errno value indicating the
- *          type of failure.
- *
- * \note  On error, \a dev will be switched to the XenbusStateClosing
- *        state and the returned error is saved in the per-device error node
- *        for \a dev in the XenStore.
- */
-int xenbus_free_evtchn(device_t dev, evtchn_port_t port);
-
 /**
  * Record the given errno, along with the given, printf-style, formatted
  * message in dev's device specific error node in the XenStore.
  *
  * \param dev  The device which encountered the error.
  * \param err  The errno value corresponding to the error.
  * \param fmt  Printf format string followed by a variable number of
  *             printf arguments.
  */
 void xenbus_dev_error(device_t dev, int err, const char *fmt, ...)
 	__attribute__((format(printf, 3, 4)));
 
 /**
  * va_list version of xenbus_dev_error().
  *
  * \param dev  The device which encountered the error.
  * \param err  The errno value corresponding to the error.
  * \param fmt  Printf format string.
  * \param ap   Va_list of printf arguments.
  */
 void xenbus_dev_verror(device_t dev, int err, const char *fmt, va_list ap)
 	__attribute__((format(printf, 3, 0)));
 
 /**
  * Equivalent to xenbus_dev_error(), followed by
  * xenbus_set_state(dev, XenbusStateClosing).
  *
  * \param dev  The device which encountered the error.
  * \param err  The errno value corresponding to the error.
  * \param fmt  Printf format string followed by a variable number of
  *             printf arguments.
  */
 void xenbus_dev_fatal(device_t dev, int err, const char *fmt, ...)
 	__attribute__((format(printf, 3, 4)));
 
 /**
  * va_list version of xenbus_dev_fatal().
  *
  * \param dev  The device which encountered the error.
  * \param err  The errno value corresponding to the error.
  * \param fmt  Printf format string.
  * \param ap   Va_list of printf arguments.
  */
 void xenbus_dev_vfatal(device_t dev, int err, const char *fmt, va_list)
 	__attribute__((format(printf, 3, 0)));
 
 /**
  * Convert a member of the xenbus_state enum into an ASCII string.
  *
  * /param state  The XenBus state to lookup.
  *
  * /return  A string representing state or, for unrecognized states,
  *	    the string "Unknown".
  */
 const char *xenbus_strstate(enum xenbus_state state);
 
 /**
  * Return the value of a XenBus device's "online" node within the XenStore.
  *
  * \param dev  The XenBus device to query.
  *
  * \return  The value of the "online" node for the device.  If the node
  *          does not exist, 0 (offline) is returned.
  */
 int xenbus_dev_is_online(device_t dev);
 
 /**
  * Default callback invoked when a change to the local XenStore sub-tree
  * for a device is modified.
  * 
  * \param dev   The XenBus device whose tree was modified.
  * \param path  The tree relative sub-path to the modified node.  The empty
  *              string indicates the root of the tree was destroyed.
  */
 void xenbus_localend_changed(device_t dev, const char *path);
 
 #include "xenbus_if.h"
 
 #endif /* _XEN_XENBUS_XENBUSVAR_H */
diff --git a/sys/xen/xenstore/xenstore.c b/sys/xen/xenstore/xenstore.c
index a07a26266e43..d404862ab55d 100644
--- a/sys/xen/xenstore/xenstore.c
+++ b/sys/xen/xenstore/xenstore.c
@@ -1,1663 +1,1659 @@
 /******************************************************************************
  * xenstore.c
  *
  * Low-level kernel interface to the XenStore.
  *
  * Copyright (C) 2005 Rusty Russell, IBM Corporation
  * Copyright (C) 2009,2010 Spectra Logic Corporation
  *
  * This file may be distributed separately from the Linux kernel, or
  * incorporated into other software packages, subject to the following license:
  *
  * Permission is hereby granted, free of charge, to any person obtaining a copy
  * of this source file (the "Software"), to deal in the Software without
  * restriction, including without limitation the rights to use, copy, modify,
  * merge, publish, distribute, sublicense, and/or sell copies of the Software,
  * and to permit persons to whom the Software is furnished to do so, subject to
  * the following conditions:
  *
  * The above copyright notice and this permission notice shall be included in
  * all copies or substantial portions of the Software.
  *
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  * IN THE SOFTWARE.
  */
 
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/bus.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/module.h>
 #include <sys/mutex.h>
 #include <sys/sx.h>
 #include <sys/syslog.h>
 #include <sys/malloc.h>
 #include <sys/systm.h>
 #include <sys/proc.h>
 #include <sys/kthread.h>
 #include <sys/sbuf.h>
 #include <sys/sysctl.h>
 #include <sys/uio.h>
 #include <sys/unistd.h>
 
-#include <machine/xen/xen-os.h>
 #include <machine/stdarg.h>
 
-#include <xen/evtchn.h>
+#include <xen/xen-os.h>
 #include <xen/gnttab.h>
 #include <xen/hypervisor.h>
 #include <xen/xen_intr.h>
 
 #include <xen/interface/hvm/params.h>
 #include <xen/hvm.h>
 
 #include <xen/xenstore/xenstorevar.h>
 #include <xen/xenstore/xenstore_internal.h>
 
 #include <vm/vm.h>
 #include <vm/pmap.h>
 
 /**
  * \file xenstore.c
  * \brief XenStore interface
  *
  * The XenStore interface is a simple storage system that is a means of
  * communicating state and configuration data between the Xen Domain 0
  * and the various guest domains.  All configuration data other than
  * a small amount of essential information required during the early
  * boot process of launching a Xen aware guest, is managed using the
  * XenStore.
  *
  * The XenStore is ASCII string based, and has a structure and semantics
  * similar to a filesystem.  There are files and directories, the directories
  * able to contain files or other directories.  The depth of the hierachy
  * is only limited by the XenStore's maximum path length.
  *
  * The communication channel between the XenStore service and other
  * domains is via two, guest specific, ring buffers in a shared memory
  * area.  One ring buffer is used for communicating in each direction.
  * The grant table references for this shared memory are given to the
  * guest either via the xen_start_info structure for a fully para-
  * virtualized guest, or via HVM hypercalls for a hardware virtualized
  * guest.
  *
  * The XenStore communication relies on an event channel and thus
  * interrupts.  For this reason, the attachment of the XenStore
  * relies on an interrupt driven configuration hook to hold off
  * boot processing until communication with the XenStore service
  * can be established.
  *
  * Several Xen services depend on the XenStore, most notably the
  * XenBus used to discover and manage Xen devices.  These services
  * are implemented as NewBus child attachments to a bus exported
  * by this XenStore driver.
  */
 
 static struct xs_watch *find_watch(const char *token);
 
 MALLOC_DEFINE(M_XENSTORE, "xenstore", "XenStore data and results");
 
 /**
  * Pointer to shared memory communication structures allowing us
  * to communicate with the XenStore service.
  *
  * When operating in full PV mode, this pointer is set early in kernel
  * startup from within xen_machdep.c.  In HVM mode, we use hypercalls
  * to get the guest frame number for the shared page and then map it
  * into kva.  See xs_init() for details.
  */
 struct xenstore_domain_interface *xen_store;
 
 /*-------------------------- Private Data Structures ------------------------*/
 
 /**
  * Structure capturing messages received from the XenStore service.
  */
 struct xs_stored_msg {
 	TAILQ_ENTRY(xs_stored_msg) list;
 
 	struct xsd_sockmsg hdr;
 
 	union {
 		/* Queued replies. */
 		struct {
 			char *body;
 		} reply;
 
 		/* Queued watch events. */
 		struct {
 			struct xs_watch *handle;
 			const char **vec;
 			u_int vec_size;
 		} watch;
 	} u;
 };
 TAILQ_HEAD(xs_stored_msg_list, xs_stored_msg);
 
 /**
  * Container for all XenStore related state.
  */
 struct xs_softc {
 	/** Newbus device for the XenStore. */
 	device_t xs_dev;
 
 	/**
 	 * Lock serializing access to ring producer/consumer
 	 * indexes.  Use of this lock guarantees that wakeups
 	 * of blocking readers/writers are not missed due to
 	 * races with the XenStore service.
 	 */
 	struct mtx ring_lock;
 
 	/*
 	 * Mutex used to insure exclusive access to the outgoing
 	 * communication ring.  We use a lock type that can be
 	 * held while sleeping so that xs_write() can block waiting
 	 * for space in the ring to free up, without allowing another
 	 * writer to come in and corrupt a partial message write.
 	 */
 	struct sx request_mutex;
 
 	/**
 	 * A list of replies to our requests.
 	 *
 	 * The reply list is filled by xs_rcv_thread().  It
 	 * is consumed by the context that issued the request
 	 * to which a reply is made.  The requester blocks in
 	 * xs_read_reply().
 	 *
 	 * /note Only one requesting context can be active at a time.
 	 *       This is guaranteed by the request_mutex and insures
 	 *	 that the requester sees replies matching the order
 	 *	 of its requests.
 	 */
 	struct xs_stored_msg_list reply_list;
 
 	/** Lock protecting the reply list. */
 	struct mtx reply_lock;
 
 	/**
 	 * List of registered watches.
 	 */
 	struct xs_watch_list  registered_watches;
 
 	/** Lock protecting the registered watches list. */
 	struct mtx registered_watches_lock;
 
 	/**
 	 * List of pending watch callback events.
 	 */
 	struct xs_stored_msg_list watch_events;
 
 	/** Lock protecting the watch calback list. */
 	struct mtx watch_events_lock;
 
 	/**
 	 * Sleepable lock used to prevent VM suspension while a
 	 * xenstore transaction is outstanding.
 	 *
 	 * Each active transaction holds a shared lock on the
 	 * suspend mutex.  Our suspend method blocks waiting
 	 * to acquire an exclusive lock.  This guarantees that
 	 * suspend processing will only proceed once all active
 	 * transactions have been retired.
 	 */
 	struct sx suspend_mutex;
 
 	/**
 	 * The processid of the xenwatch thread.
 	 */
 	pid_t xenwatch_pid;
 
 	/**
 	 * Sleepable mutex used to gate the execution of XenStore
 	 * watch event callbacks.
 	 *
 	 * xenwatch_thread holds an exclusive lock on this mutex
 	 * while delivering event callbacks, and xenstore_unregister_watch()
 	 * uses an exclusive lock of this mutex to guarantee that no
 	 * callbacks of the just unregistered watch are pending
 	 * before returning to its caller.
 	 */
 	struct sx xenwatch_mutex;
 
 #ifdef XENHVM
 	/**
 	 * The HVM guest pseudo-physical frame number.  This is Xen's mapping
 	 * of the true machine frame number into our "physical address space".
 	 */
 	unsigned long gpfn;
 #endif
 
 	/**
 	 * The event channel for communicating with the
 	 * XenStore service.
 	 */
 	int evtchn;
 
-	/** Interrupt number for our event channel. */
-	u_int irq;
+	/** Handle for XenStore interrupts. */
+	xen_intr_handle_t xen_intr_handle;
 
 	/**
 	 * Interrupt driven config hook allowing us to defer
 	 * attaching children until interrupts (and thus communication
 	 * with the XenStore service) are available.
 	 */
 	struct intr_config_hook xs_attachcb;
 };
 
 /*-------------------------------- Global Data ------------------------------*/
 static struct xs_softc xs;
 
 /*------------------------- Private Utility Functions -----------------------*/
 
 /**
  * Count and optionally record pointers to a number of NUL terminated
  * strings in a buffer.
  *
  * \param strings  A pointer to a contiguous buffer of NUL terminated strings.
  * \param dest	   An array to store pointers to each string found in strings.
  * \param len	   The length of the buffer pointed to by strings.
  *
  * \return  A count of the number of strings found.
  */
 static u_int
 extract_strings(const char *strings, const char **dest, u_int len)
 {
 	u_int num;
 	const char *p;
 
 	for (p = strings, num = 0; p < strings + len; p += strlen(p) + 1) {
 		if (dest != NULL)
 			*dest++ = p;
 		num++;
 	}
 
 	return (num);
 }
 
 /**
  * Convert a contiguous buffer containing a series of NUL terminated
  * strings into an array of pointers to strings.
  *
  * The returned pointer references the array of string pointers which
  * is followed by the storage for the string data.  It is the client's
  * responsibility to free this storage.
  *
  * The storage addressed by strings is free'd prior to split returning.
  *
  * \param strings  A pointer to a contiguous buffer of NUL terminated strings.
  * \param len	   The length of the buffer pointed to by strings.
  * \param num	   The number of strings found and returned in the strings
  *                 array.
  *
  * \return  An array of pointers to the strings found in the input buffer.
  */
 static const char **
 split(char *strings, u_int len, u_int *num)
 {
 	const char **ret;
 
 	/* Protect against unterminated buffers. */
 	if (len > 0)
 		strings[len - 1] = '\0';
 
 	/* Count the strings. */
 	*num = extract_strings(strings, /*dest*/NULL, len);
 
 	/* Transfer to one big alloc for easy freeing by the caller. */
 	ret = malloc(*num * sizeof(char *) + len, M_XENSTORE, M_WAITOK);
 	memcpy(&ret[*num], strings, len);
 	free(strings, M_XENSTORE);
 
 	/* Extract pointers to newly allocated array. */
 	strings = (char *)&ret[*num];
 	(void)extract_strings(strings, /*dest*/ret, len);
 
 	return (ret);
 }
 
 /*------------------------- Public Utility Functions -------------------------*/
 /*------- API comments for these methods can be found in xenstorevar.h -------*/
 struct sbuf *
 xs_join(const char *dir, const char *name)
 {
 	struct sbuf *sb;
 
 	sb = sbuf_new_auto();
 	sbuf_cat(sb, dir);
 	if (name[0] != '\0') {
 		sbuf_putc(sb, '/');
 		sbuf_cat(sb, name);
 	}
 	sbuf_finish(sb);
 
 	return (sb);
 }
 
 /*-------------------- Low Level Communication Management --------------------*/
 /**
  * Interrupt handler for the XenStore event channel.
  *
  * XenStore reads and writes block on "xen_store" for buffer
  * space.  Wakeup any blocking operations when the XenStore
  * service has modified the queues.
  */
 static void
 xs_intr(void * arg __unused /*__attribute__((unused))*/)
 {
 
 	/*
 	 * Hold ring lock across wakeup so that clients
 	 * cannot miss a wakeup.
 	 */
 	mtx_lock(&xs.ring_lock);
 	wakeup(xen_store);
 	mtx_unlock(&xs.ring_lock);
 }
 
 /**
  * Verify that the indexes for a ring are valid.
  *
  * The difference between the producer and consumer cannot
  * exceed the size of the ring.
  *
  * \param cons  The consumer index for the ring to test.
  * \param prod  The producer index for the ring to test.
  *
  * \retval 1  If indexes are in range.
  * \retval 0  If the indexes are out of range.
  */
 static int
 xs_check_indexes(XENSTORE_RING_IDX cons, XENSTORE_RING_IDX prod)
 {
 
 	return ((prod - cons) <= XENSTORE_RING_SIZE);
 }
 
 /**
  * Return a pointer to, and the length of, the contiguous
  * free region available for output in a ring buffer.
  *
  * \param cons  The consumer index for the ring.
  * \param prod  The producer index for the ring.
  * \param buf   The base address of the ring's storage.
  * \param len   The amount of contiguous storage available.
  *
  * \return  A pointer to the start location of the free region.
  */
 static void *
 xs_get_output_chunk(XENSTORE_RING_IDX cons, XENSTORE_RING_IDX prod,
     char *buf, uint32_t *len)
 {
 
 	*len = XENSTORE_RING_SIZE - MASK_XENSTORE_IDX(prod);
 	if ((XENSTORE_RING_SIZE - (prod - cons)) < *len)
 		*len = XENSTORE_RING_SIZE - (prod - cons);
 	return (buf + MASK_XENSTORE_IDX(prod));
 }
 
 /**
  * Return a pointer to, and the length of, the contiguous
  * data available to read from a ring buffer.
  *
  * \param cons  The consumer index for the ring.
  * \param prod  The producer index for the ring.
  * \param buf   The base address of the ring's storage.
  * \param len   The amount of contiguous data available to read.
  *
  * \return  A pointer to the start location of the available data.
  */
 static const void *
 xs_get_input_chunk(XENSTORE_RING_IDX cons, XENSTORE_RING_IDX prod,
     const char *buf, uint32_t *len)
 {
 
 	*len = XENSTORE_RING_SIZE - MASK_XENSTORE_IDX(cons);
 	if ((prod - cons) < *len)
 		*len = prod - cons;
 	return (buf + MASK_XENSTORE_IDX(cons));
 }
 
 /**
  * Transmit data to the XenStore service.
  *
  * \param tdata  A pointer to the contiguous data to send.
  * \param len    The amount of data to send.
  *
  * \return  On success 0, otherwise an errno value indicating the
  *          cause of failure.
  *
  * \invariant  Called from thread context.
  * \invariant  The buffer pointed to by tdata is at least len bytes
  *             in length.
  * \invariant  xs.request_mutex exclusively locked.
  */
 static int
 xs_write_store(const void *tdata, unsigned len)
 {
 	XENSTORE_RING_IDX cons, prod;
 	const char *data = (const char *)tdata;
 	int error;
 
 	sx_assert(&xs.request_mutex, SX_XLOCKED);
 	while (len != 0) {
 		void *dst;
 		u_int avail;
 
 		/* Hold lock so we can't miss wakeups should we block. */
 		mtx_lock(&xs.ring_lock);
 		cons = xen_store->req_cons;
 		prod = xen_store->req_prod;
 		if ((prod - cons) == XENSTORE_RING_SIZE) {
 			/*
 			 * Output ring is full. Wait for a ring event.
 			 *
 			 * Note that the events from both queues
 			 * are combined, so being woken does not
 			 * guarantee that data exist in the read
 			 * ring.
 			 *
 			 * To simplify error recovery and the retry,
 			 * we specify PDROP so our lock is *not* held
 			 * when msleep returns.
 			 */
 			error = msleep(xen_store, &xs.ring_lock, PCATCH|PDROP,
 			     "xbwrite", /*timeout*/0);
 			if (error && error != EWOULDBLOCK)
 				return (error);
 
 			/* Try again. */
 			continue;
 		}
 		mtx_unlock(&xs.ring_lock);
 
 		/* Verify queue sanity. */
 		if (!xs_check_indexes(cons, prod)) {
 			xen_store->req_cons = xen_store->req_prod = 0;
 			return (EIO);
 		}
 
 		dst = xs_get_output_chunk(cons, prod, xen_store->req, &avail);
 		if (avail > len)
 			avail = len;
 
 		memcpy(dst, data, avail);
 		data += avail;
 		len -= avail;
 
 		/*
 		 * The store to the producer index, which indicates
 		 * to the other side that new data has arrived, must
 		 * be visible only after our copy of the data into the
 		 * ring has completed.
 		 */
 		wmb();
 		xen_store->req_prod += avail;
 
 		/*
-		 * notify_remote_via_evtchn implies mb(). The other side
-		 * will see the change to req_prod at the time of the
-		 * interrupt.
+		 * xen_intr_signal() implies mb(). The other side will see
+		 * the change to req_prod at the time of the interrupt.
 		 */
-		notify_remote_via_evtchn(xs.evtchn);
+		xen_intr_signal(xs.xen_intr_handle);
 	}
 
 	return (0);
 }
 
 /**
  * Receive data from the XenStore service.
  *
  * \param tdata  A pointer to the contiguous buffer to receive the data.
  * \param len    The amount of data to receive.
  *
  * \return  On success 0, otherwise an errno value indicating the
  *          cause of failure.
  *
  * \invariant  Called from thread context.
  * \invariant  The buffer pointed to by tdata is at least len bytes
  *             in length.
  *
  * \note xs_read does not perform any internal locking to guarantee
  *       serial access to the incoming ring buffer.  However, there
  *	 is only one context processing reads: xs_rcv_thread().
  */
 static int
 xs_read_store(void *tdata, unsigned len)
 {
 	XENSTORE_RING_IDX cons, prod;
 	char *data = (char *)tdata;
 	int error;
 
 	while (len != 0) {
 		u_int avail;
 		const char *src;
 
 		/* Hold lock so we can't miss wakeups should we block. */
 		mtx_lock(&xs.ring_lock);
 		cons = xen_store->rsp_cons;
 		prod = xen_store->rsp_prod;
 		if (cons == prod) {
 			/*
 			 * Nothing to read. Wait for a ring event.
 			 *
 			 * Note that the events from both queues
 			 * are combined, so being woken does not
 			 * guarantee that data exist in the read
 			 * ring.
 			 *
 			 * To simplify error recovery and the retry,
 			 * we specify PDROP so our lock is *not* held
 			 * when msleep returns.
 			 */
 			error = msleep(xen_store, &xs.ring_lock, PCATCH|PDROP,
 			    "xbread", /*timeout*/0);
 			if (error && error != EWOULDBLOCK)
 				return (error);
 			continue;
 		}
 		mtx_unlock(&xs.ring_lock);
 
 		/* Verify queue sanity. */
 		if (!xs_check_indexes(cons, prod)) {
 			xen_store->rsp_cons = xen_store->rsp_prod = 0;
 			return (EIO);
 		}
 
 		src = xs_get_input_chunk(cons, prod, xen_store->rsp, &avail);
 		if (avail > len)
 			avail = len;
 
 		/*
 		 * Insure the data we read is related to the indexes
 		 * we read above.
 		 */
 		rmb();
 
 		memcpy(data, src, avail);
 		data += avail;
 		len -= avail;
 
 		/*
 		 * Insure that the producer of this ring does not see
 		 * the ring space as free until after we have copied it
 		 * out.
 		 */
 		mb();
 		xen_store->rsp_cons += avail;
 
 		/*
-		 * notify_remote_via_evtchn implies mb(). The producer
-		 * will see the updated consumer index when the event
-		 * is delivered.
+		 * xen_intr_signal() implies mb(). The producer will see
+		 * the updated consumer index when the event is delivered.
 		 */
-		notify_remote_via_evtchn(xs.evtchn);
+		xen_intr_signal(xs.xen_intr_handle);
 	}
 
 	return (0);
 }
 
 /*----------------------- Received Message Processing ------------------------*/
 /**
  * Block reading the next message from the XenStore service and
  * process the result.
  *
  * \param type  The returned type of the XenStore message received.
  *
  * \return  0 on success.  Otherwise an errno value indicating the
  *          type of failure encountered.
  */
 static int
 xs_process_msg(enum xsd_sockmsg_type *type)
 {
 	struct xs_stored_msg *msg;
 	char *body;
 	int error;
 
 	msg = malloc(sizeof(*msg), M_XENSTORE, M_WAITOK);
 	error = xs_read_store(&msg->hdr, sizeof(msg->hdr));
 	if (error) {
 		free(msg, M_XENSTORE);
 		return (error);
 	}
 
 	body = malloc(msg->hdr.len + 1, M_XENSTORE, M_WAITOK);
 	error = xs_read_store(body, msg->hdr.len);
 	if (error) {
 		free(body, M_XENSTORE);
 		free(msg, M_XENSTORE);
 		return (error);
 	}
 	body[msg->hdr.len] = '\0';
 
 	*type = msg->hdr.type;
 	if (msg->hdr.type == XS_WATCH_EVENT) {
 		msg->u.watch.vec = split(body, msg->hdr.len,
 		    &msg->u.watch.vec_size);
 
 		mtx_lock(&xs.registered_watches_lock);
 		msg->u.watch.handle = find_watch(
 		    msg->u.watch.vec[XS_WATCH_TOKEN]);
 		if (msg->u.watch.handle != NULL) {
 			mtx_lock(&xs.watch_events_lock);
 			TAILQ_INSERT_TAIL(&xs.watch_events, msg, list);
 			wakeup(&xs.watch_events);
 			mtx_unlock(&xs.watch_events_lock);
 		} else {
 			free(msg->u.watch.vec, M_XENSTORE);
 			free(msg, M_XENSTORE);
 		}
 		mtx_unlock(&xs.registered_watches_lock);
 	} else {
 		msg->u.reply.body = body;
 		mtx_lock(&xs.reply_lock);
 		TAILQ_INSERT_TAIL(&xs.reply_list, msg, list);
 		wakeup(&xs.reply_list);
 		mtx_unlock(&xs.reply_lock);
 	}
 
 	return (0);
 }
 
 /**
  * Thread body of the XenStore receive thread.
  *
  * This thread blocks waiting for data from the XenStore service
  * and processes and received messages.
  */
 static void
 xs_rcv_thread(void *arg __unused)
 {
 	int error;
 	enum xsd_sockmsg_type type;
 
 	for (;;) {
 		error = xs_process_msg(&type);
 		if (error)
 			printf("XENSTORE error %d while reading message\n",
 			    error);
 	}
 }
 
 /*---------------- XenStore Message Request/Reply Processing -----------------*/
 /**
  * Filter invoked before transmitting any message to the XenStore service.
  *
  * The role of the filter may expand, but currently serves to manage
  * the interactions of messages with transaction state.
  *
  * \param request_msg_type  The message type for the request.
  */
 static inline void
 xs_request_filter(uint32_t request_msg_type)
 {
 	if (request_msg_type == XS_TRANSACTION_START)
 		sx_slock(&xs.suspend_mutex);
 }
 
 /**
  * Filter invoked after transmitting any message to the XenStore service.
  *
  * The role of the filter may expand, but currently serves to manage
  * the interactions of messages with transaction state.
  *
  * \param request_msg_type     The message type for the original request.
  * \param reply_msg_type       The message type for any received reply.
  * \param request_reply_error  The error status from the attempt to send
  *                             the request or retrieve the reply.
  */
 static inline void
 xs_reply_filter(uint32_t request_msg_type,
     uint32_t reply_msg_type, int request_reply_error)
 {
 	/*
 	 * The count of transactions drops if we attempted
 	 * to end a transaction (even if that attempt fails
 	 * in error), we receive a transaction end acknowledgement,
 	 * or if our attempt to begin a transaction fails.
 	 */
 	if (request_msg_type == XS_TRANSACTION_END
 	 || (request_reply_error == 0 && reply_msg_type == XS_TRANSACTION_END)
 	 || (request_msg_type == XS_TRANSACTION_START
 	  && (request_reply_error != 0 || reply_msg_type == XS_ERROR)))
 		sx_sunlock(&xs.suspend_mutex);
 
 }
 
 #define xsd_error_count	(sizeof(xsd_errors) / sizeof(xsd_errors[0]))
 
 /**
  * Convert a XenStore error string into an errno number.
  *
  * \param errorstring  The error string to convert.
  *
  * \return  The errno best matching the input string.
  *
  * \note Unknown error strings are converted to EINVAL.
  */
 static int
 xs_get_error(const char *errorstring)
 {
 	u_int i;
 
 	for (i = 0; i < xsd_error_count; i++) {
 		if (!strcmp(errorstring, xsd_errors[i].errstring))
 			return (xsd_errors[i].errnum);
 	}
 	log(LOG_WARNING, "XENSTORE xen store gave: unknown error %s",
 	    errorstring);
 	return (EINVAL);
 }
 
 /**
  * Block waiting for a reply to a message request.
  *
  * \param type	  The returned type of the reply.
  * \param len	  The returned body length of the reply.
  * \param result  The returned body of the reply.
  *
  * \return  0 on success.  Otherwise an errno indicating the
  *          cause of failure.
  */
 static int
 xs_read_reply(enum xsd_sockmsg_type *type, u_int *len, void **result)
 {
 	struct xs_stored_msg *msg;
 	char *body;
 	int error;
 
 	mtx_lock(&xs.reply_lock);
 	while (TAILQ_EMPTY(&xs.reply_list)) {
 		error = mtx_sleep(&xs.reply_list, &xs.reply_lock,
 		    PCATCH, "xswait", hz/10);
 		if (error && error != EWOULDBLOCK) {
 			mtx_unlock(&xs.reply_lock);
 			return (error);
 		}
 	}
 	msg = TAILQ_FIRST(&xs.reply_list);
 	TAILQ_REMOVE(&xs.reply_list, msg, list);
 	mtx_unlock(&xs.reply_lock);
 
 	*type = msg->hdr.type;
 	if (len)
 		*len = msg->hdr.len;
 	body = msg->u.reply.body;
 
 	free(msg, M_XENSTORE);
 	*result = body;
 	return (0);
 }
 
 /**
  * Pass-thru interface for XenStore access by userland processes
  * via the XenStore device.
  *
  * Reply type and length data are returned by overwriting these
  * fields in the passed in request message.
  *
  * \param msg	  A properly formatted message to transmit to
  *		  the XenStore service.
  * \param result  The returned body of the reply.
  *
  * \return  0 on success.  Otherwise an errno indicating the cause
  *          of failure.
  *
  * \note The returned result is provided in malloced storage and thus
  *       must be free'd by the caller with 'free(result, M_XENSTORE);
  */
 int
 xs_dev_request_and_reply(struct xsd_sockmsg *msg, void **result)
 {
 	uint32_t request_type;
 	int error;
 
 	request_type = msg->type;
 	xs_request_filter(request_type);
 
 	sx_xlock(&xs.request_mutex);
 	if ((error = xs_write_store(msg, sizeof(*msg) + msg->len)) == 0)
 		error = xs_read_reply(&msg->type, &msg->len, result);
 	sx_xunlock(&xs.request_mutex);
 
 	xs_reply_filter(request_type, msg->type, error);
 
 	return (error);
 }
 
 /**
  * Send a message with an optionally muti-part body to the XenStore service.
  *
  * \param t              The transaction to use for this request.
  * \param request_type   The type of message to send.
  * \param iovec          Pointers to the body sections of the request.
  * \param num_vecs       The number of body sections in the request.
  * \param len            The returned length of the reply.
  * \param result         The returned body of the reply.
  *
  * \return  0 on success.  Otherwise an errno indicating
  *          the cause of failure.
  *
  * \note The returned result is provided in malloced storage and thus
  *       must be free'd by the caller with 'free(*result, M_XENSTORE);
  */
 static int
 xs_talkv(struct xs_transaction t, enum xsd_sockmsg_type request_type,
     const struct iovec *iovec, u_int num_vecs, u_int *len, void **result)
 {
 	struct xsd_sockmsg msg;
 	void *ret = NULL;
 	u_int i;
 	int error;
 
 	msg.tx_id = t.id;
 	msg.req_id = 0;
 	msg.type = request_type;
 	msg.len = 0;
 	for (i = 0; i < num_vecs; i++)
 		msg.len += iovec[i].iov_len;
 
 	xs_request_filter(request_type);
 
 	sx_xlock(&xs.request_mutex);
 	error = xs_write_store(&msg, sizeof(msg));
 	if (error) {
 		printf("xs_talkv failed %d\n", error);
 		goto error_lock_held;
 	}
 
 	for (i = 0; i < num_vecs; i++) {
 		error = xs_write_store(iovec[i].iov_base, iovec[i].iov_len);
 		if (error) {
 			printf("xs_talkv failed %d\n", error);
 			goto error_lock_held;
 		}
 	}
 
 	error = xs_read_reply(&msg.type, len, &ret);
 
 error_lock_held:
 	sx_xunlock(&xs.request_mutex);
 	xs_reply_filter(request_type, msg.type, error);
 	if (error)
 		return (error);
 
 	if (msg.type == XS_ERROR) {
 		error = xs_get_error(ret);
 		free(ret, M_XENSTORE);
 		return (error);
 	}
 
 	/* Reply is either error or an echo of our request message type. */
 	KASSERT(msg.type == request_type, ("bad xenstore message type"));
 
 	if (result)
 		*result = ret;
 	else
 		free(ret, M_XENSTORE);
 
 	return (0);
 }
 
 /**
  * Wrapper for xs_talkv allowing easy transmission of a message with
  * a single, contiguous, message body.
  *
  * \param t              The transaction to use for this request.
  * \param request_type   The type of message to send.
  * \param body           The body of the request.
  * \param len            The returned length of the reply.
  * \param result         The returned body of the reply.
  *
  * \return  0 on success.  Otherwise an errno indicating
  *          the cause of failure.
  *
  * \note The returned result is provided in malloced storage and thus
  *       must be free'd by the caller with 'free(*result, M_XENSTORE);
  */
 static int
 xs_single(struct xs_transaction t, enum xsd_sockmsg_type request_type,
     const char *body, u_int *len, void **result)
 {
 	struct iovec iovec;
 
 	iovec.iov_base = (void *)(uintptr_t)body;
 	iovec.iov_len = strlen(body) + 1;
 
 	return (xs_talkv(t, request_type, &iovec, 1, len, result));
 }
 
 /*------------------------- XenStore Watch Support ---------------------------*/
 /**
  * Transmit a watch request to the XenStore service.
  *
  * \param path    The path in the XenStore to watch.
  * \param tocken  A unique identifier for this watch.
  *
  * \return  0 on success.  Otherwise an errno indicating the
  *          cause of failure.
  */
 static int
 xs_watch(const char *path, const char *token)
 {
 	struct iovec iov[2];
 
 	iov[0].iov_base = (void *)(uintptr_t) path;
 	iov[0].iov_len = strlen(path) + 1;
 	iov[1].iov_base = (void *)(uintptr_t) token;
 	iov[1].iov_len = strlen(token) + 1;
 
 	return (xs_talkv(XST_NIL, XS_WATCH, iov, 2, NULL, NULL));
 }
 
 /**
  * Transmit an uwatch request to the XenStore service.
  *
  * \param path    The path in the XenStore to watch.
  * \param tocken  A unique identifier for this watch.
  *
  * \return  0 on success.  Otherwise an errno indicating the
  *          cause of failure.
  */
 static int
 xs_unwatch(const char *path, const char *token)
 {
 	struct iovec iov[2];
 
 	iov[0].iov_base = (void *)(uintptr_t) path;
 	iov[0].iov_len = strlen(path) + 1;
 	iov[1].iov_base = (void *)(uintptr_t) token;
 	iov[1].iov_len = strlen(token) + 1;
 
 	return (xs_talkv(XST_NIL, XS_UNWATCH, iov, 2, NULL, NULL));
 }
 
 /**
  * Convert from watch token (unique identifier) to the associated
  * internal tracking structure for this watch.
  *
  * \param tocken  The unique identifier for the watch to find.
  *
  * \return  A pointer to the found watch structure or NULL.
  */
 static struct xs_watch *
 find_watch(const char *token)
 {
 	struct xs_watch *i, *cmp;
 
 	cmp = (void *)strtoul(token, NULL, 16);
 
 	LIST_FOREACH(i, &xs.registered_watches, list)
 		if (i == cmp)
 			return (i);
 
 	return (NULL);
 }
 
 /**
  * Thread body of the XenStore watch event dispatch thread.
  */
 static void
 xenwatch_thread(void *unused)
 {
 	struct xs_stored_msg *msg;
 
 	for (;;) {
 
 		mtx_lock(&xs.watch_events_lock);
 		while (TAILQ_EMPTY(&xs.watch_events))
 			mtx_sleep(&xs.watch_events,
 			    &xs.watch_events_lock,
 			    PWAIT | PCATCH, "waitev", hz/10);
 
 		mtx_unlock(&xs.watch_events_lock);
 		sx_xlock(&xs.xenwatch_mutex);
 
 		mtx_lock(&xs.watch_events_lock);
 		msg = TAILQ_FIRST(&xs.watch_events);
 		if (msg)
 			TAILQ_REMOVE(&xs.watch_events, msg, list);
 		mtx_unlock(&xs.watch_events_lock);
 
 		if (msg != NULL) {
 			/*
 			 * XXX There are messages coming in with a NULL
 			 * XXX callback.  This deserves further investigation;
 			 * XXX the workaround here simply prevents the kernel
 			 * XXX from panic'ing on startup.
 			 */
 			if (msg->u.watch.handle->callback != NULL)
 				msg->u.watch.handle->callback(
 					msg->u.watch.handle,
 					(const char **)msg->u.watch.vec,
 					msg->u.watch.vec_size);
 			free(msg->u.watch.vec, M_XENSTORE);
 			free(msg, M_XENSTORE);
 		}
 
 		sx_xunlock(&xs.xenwatch_mutex);
 	}
 }
 
 /*----------- XenStore Configuration, Initialization, and Control ------------*/
 /**
  * Setup communication channels with the XenStore service.
  *
  * \return  On success, 0. Otherwise an errno value indicating the
  *          type of failure.
  */
 static int
 xs_init_comms(void)
 {
 	int error;
 
 	if (xen_store->rsp_prod != xen_store->rsp_cons) {
 		log(LOG_WARNING, "XENSTORE response ring is not quiescent "
 		    "(%08x:%08x): fixing up\n",
 		    xen_store->rsp_cons, xen_store->rsp_prod);
 		xen_store->rsp_cons = xen_store->rsp_prod;
 	}
 
-	if (xs.irq)
-		unbind_from_irqhandler(xs.irq);
+	xen_intr_unbind(&xs.xen_intr_handle);
 
-	error = bind_caller_port_to_irqhandler(xs.evtchn, "xenstore",
-	    xs_intr, NULL, INTR_TYPE_NET, &xs.irq);
+	error = xen_intr_bind_local_port(xs.xs_dev, xs.evtchn,
+	    /*filter*/NULL, xs_intr, /*arg*/NULL, INTR_TYPE_NET|INTR_MPSAFE,
+	    &xs.xen_intr_handle);
 	if (error) {
 		log(LOG_WARNING, "XENSTORE request irq failed %i\n", error);
 		return (error);
 	}
 
 	return (0);
 }
 
 /*------------------ Private Device Attachment Functions  --------------------*/
 static void
 xs_identify(driver_t *driver, device_t parent)
 {
 
 	BUS_ADD_CHILD(parent, 0, "xenstore", 0);
 }
 
 /**
  * Probe for the existance of the XenStore.
  *
  * \param dev
  */
 static int 
 xs_probe(device_t dev)
 {
 	/*
 	 * We are either operating within a PV kernel or being probed
 	 * as the child of the successfully attached xenpci device.
 	 * Thus we are in a Xen environment and there will be a XenStore.
 	 * Unconditionally return success.
 	 */
 	device_set_desc(dev, "XenStore");
 	return (0);
 }
 
 static void
 xs_attach_deferred(void *arg)
 {
 	xs_dev_init();
 
 	bus_generic_probe(xs.xs_dev);
 	bus_generic_attach(xs.xs_dev);
 
 	config_intrhook_disestablish(&xs.xs_attachcb);
 }
 
 /**
  * Attach to the XenStore.
  *
  * This routine also prepares for the probe/attach of drivers that rely
  * on the XenStore.  
  */
 static int
 xs_attach(device_t dev)
 {
 	int error;
 
 	/* Allow us to get device_t from softc and vice-versa. */
 	xs.xs_dev = dev;
 	device_set_softc(dev, &xs);
 
 	/*
 	 * This seems to be a layering violation.  The XenStore is just
 	 * one of many clients of the Grant Table facility.  It happens
 	 * to be the first and a gating consumer to all other devices,
 	 * so this does work.  A better place would be in the PV support
 	 * code for fully PV kernels and the xenpci driver for HVM kernels.
 	 */
 	error = gnttab_init();
 	if (error != 0) {
 		log(LOG_WARNING,
 		    "XENSTORE: Error initializing grant tables: %d\n", error);
 		return (ENXIO);
 	}
 
 	/* Initialize the interface to xenstore. */
 	struct proc *p;
 
 #ifdef XENHVM
 	xs.evtchn = hvm_get_parameter(HVM_PARAM_STORE_EVTCHN);
 	xs.gpfn = hvm_get_parameter(HVM_PARAM_STORE_PFN);
 	xen_store = pmap_mapdev(xs.gpfn * PAGE_SIZE, PAGE_SIZE);
 #else
 	xs.evtchn = xen_start_info->store_evtchn;
 #endif
 
 	TAILQ_INIT(&xs.reply_list);
 	TAILQ_INIT(&xs.watch_events);
 
 	mtx_init(&xs.ring_lock, "ring lock", NULL, MTX_DEF);
 	mtx_init(&xs.reply_lock, "reply lock", NULL, MTX_DEF);
 	sx_init(&xs.xenwatch_mutex, "xenwatch");
 	sx_init(&xs.request_mutex, "xenstore request");
 	sx_init(&xs.suspend_mutex, "xenstore suspend");
 	mtx_init(&xs.registered_watches_lock, "watches", NULL, MTX_DEF);
 	mtx_init(&xs.watch_events_lock, "watch events", NULL, MTX_DEF);
-	xs.irq = 0;
 
 	/* Initialize the shared memory rings to talk to xenstored */
 	error = xs_init_comms();
 	if (error)
 		return (error);
 
 	error = kproc_create(xenwatch_thread, NULL, &p, RFHIGHPID,
 	    0, "xenwatch");
 	if (error)
 		return (error);
 	xs.xenwatch_pid = p->p_pid;
 
 	error = kproc_create(xs_rcv_thread, NULL, NULL,
 	    RFHIGHPID, 0, "xenstore_rcv");
 
 	xs.xs_attachcb.ich_func = xs_attach_deferred;
 	xs.xs_attachcb.ich_arg = NULL;
 	config_intrhook_establish(&xs.xs_attachcb);
 
 	return (error);
 }
 
 /**
  * Prepare for suspension of this VM by halting XenStore access after
  * all transactions and individual requests have completed.
  */
 static int
 xs_suspend(device_t dev)
 {
 	int error;
 
 	/* Suspend child Xen devices. */
 	error = bus_generic_suspend(dev);
 	if (error != 0)
 		return (error);
 
 	sx_xlock(&xs.suspend_mutex);
 	sx_xlock(&xs.request_mutex);
 
 	return (0);
 }
 
 /**
  * Resume XenStore operations after this VM is resumed.
  */
 static int
 xs_resume(device_t dev __unused)
 {
 	struct xs_watch *watch;
 	char token[sizeof(watch) * 2 + 1];
 
 	xs_init_comms();
 
 	sx_xunlock(&xs.request_mutex);
 
 	/*
 	 * No need for registered_watches_lock: the suspend_mutex
 	 * is sufficient.
 	 */
 	LIST_FOREACH(watch, &xs.registered_watches, list) {
 		sprintf(token, "%lX", (long)watch);
 		xs_watch(watch->node, token);
 	}
 
 	sx_xunlock(&xs.suspend_mutex);
 
 	/* Resume child Xen devices. */
 	bus_generic_resume(dev);
 
 	return (0);
 }
 
 /*-------------------- Private Device Attachment Data  -----------------------*/
 static device_method_t xenstore_methods[] = { 
 	/* Device interface */ 
 	DEVMETHOD(device_identify,	xs_identify),
 	DEVMETHOD(device_probe,         xs_probe), 
 	DEVMETHOD(device_attach,        xs_attach), 
 	DEVMETHOD(device_detach,        bus_generic_detach), 
 	DEVMETHOD(device_shutdown,      bus_generic_shutdown), 
 	DEVMETHOD(device_suspend,       xs_suspend), 
 	DEVMETHOD(device_resume,        xs_resume), 
  
 	/* Bus interface */ 
 	DEVMETHOD(bus_add_child,        bus_generic_add_child),
 	DEVMETHOD(bus_alloc_resource,   bus_generic_alloc_resource),
 	DEVMETHOD(bus_release_resource, bus_generic_release_resource),
 	DEVMETHOD(bus_activate_resource, bus_generic_activate_resource),
 	DEVMETHOD(bus_deactivate_resource, bus_generic_deactivate_resource),
 
 	DEVMETHOD_END
 }; 
 
 DEFINE_CLASS_0(xenstore, xenstore_driver, xenstore_methods, 0);
 static devclass_t xenstore_devclass; 
  
 #ifdef XENHVM
 DRIVER_MODULE(xenstore, xenpci, xenstore_driver, xenstore_devclass, 0, 0);
 #else
 DRIVER_MODULE(xenstore, nexus, xenstore_driver, xenstore_devclass, 0, 0);
 #endif
 
 /*------------------------------- Sysctl Data --------------------------------*/
 /* XXX Shouldn't the node be somewhere else? */
 SYSCTL_NODE(_dev, OID_AUTO, xen, CTLFLAG_RD, NULL, "Xen");
 SYSCTL_INT(_dev_xen, OID_AUTO, xsd_port, CTLFLAG_RD, &xs.evtchn, 0, "");
 SYSCTL_ULONG(_dev_xen, OID_AUTO, xsd_kva, CTLFLAG_RD, (u_long *) &xen_store, 0, "");
 
 /*-------------------------------- Public API --------------------------------*/
 /*------- API comments for these methods can be found in xenstorevar.h -------*/
 int
 xs_directory(struct xs_transaction t, const char *dir, const char *node,
     u_int *num, const char ***result)
 {
 	struct sbuf *path;
 	char *strings;
 	u_int len = 0;
 	int error;
 
 	path = xs_join(dir, node);
 	error = xs_single(t, XS_DIRECTORY, sbuf_data(path), &len,
 	    (void **)&strings);
 	sbuf_delete(path);
 	if (error)
 		return (error);
 
 	*result = split(strings, len, num);
 
 	return (0);
 }
 
 int
 xs_exists(struct xs_transaction t, const char *dir, const char *node)
 {
 	const char **d;
 	int error, dir_n;
 
 	error = xs_directory(t, dir, node, &dir_n, &d);
 	if (error)
 		return (0);
 	free(d, M_XENSTORE);
 	return (1);
 }
 
 int
 xs_read(struct xs_transaction t, const char *dir, const char *node,
     u_int *len, void **result)
 {
 	struct sbuf *path;
 	void *ret;
 	int error;
 
 	path = xs_join(dir, node);
 	error = xs_single(t, XS_READ, sbuf_data(path), len, &ret);
 	sbuf_delete(path);
 	if (error)
 		return (error);
 	*result = ret;
 	return (0);
 }
 
 int
 xs_write(struct xs_transaction t, const char *dir, const char *node,
     const char *string)
 {
 	struct sbuf *path;
 	struct iovec iovec[2];
 	int error;
 
 	path = xs_join(dir, node);
 
 	iovec[0].iov_base = (void *)(uintptr_t) sbuf_data(path);
 	iovec[0].iov_len = sbuf_len(path) + 1;
 	iovec[1].iov_base = (void *)(uintptr_t) string;
 	iovec[1].iov_len = strlen(string);
 
 	error = xs_talkv(t, XS_WRITE, iovec, 2, NULL, NULL);
 	sbuf_delete(path);
 
 	return (error);
 }
 
 int
 xs_mkdir(struct xs_transaction t, const char *dir, const char *node)
 {
 	struct sbuf *path;
 	int ret;
 
 	path = xs_join(dir, node);
 	ret = xs_single(t, XS_MKDIR, sbuf_data(path), NULL, NULL);
 	sbuf_delete(path);
 
 	return (ret);
 }
 
 int
 xs_rm(struct xs_transaction t, const char *dir, const char *node)
 {
 	struct sbuf *path;
 	int ret;
 
 	path = xs_join(dir, node);
 	ret = xs_single(t, XS_RM, sbuf_data(path), NULL, NULL);
 	sbuf_delete(path);
 
 	return (ret);
 }
 
 int
 xs_rm_tree(struct xs_transaction xbt, const char *base, const char *node)
 {
 	struct xs_transaction local_xbt;
 	struct sbuf *root_path_sbuf;
 	struct sbuf *cur_path_sbuf;
 	char *root_path;
 	char *cur_path;
 	const char **dir;
 	int error;
 	int empty;
 
 retry:
 	root_path_sbuf = xs_join(base, node);
 	cur_path_sbuf  = xs_join(base, node);
 	root_path      = sbuf_data(root_path_sbuf);
 	cur_path       = sbuf_data(cur_path_sbuf);
 	dir            = NULL;
 	local_xbt.id   = 0;
 
 	if (xbt.id == 0) {
 		error = xs_transaction_start(&local_xbt);
 		if (error != 0)
 			goto out;
 		xbt = local_xbt;
 	}
 
 	empty = 0;
 	while (1) {
 		u_int count;
 		u_int i;
 
 		error = xs_directory(xbt, cur_path, "", &count, &dir);
 		if (error)
 			goto out;
 
 		for (i = 0; i < count; i++) {
 			error = xs_rm(xbt, cur_path, dir[i]);
 			if (error == ENOTEMPTY) {
 				struct sbuf *push_dir;
 
 				/*
 				 * Descend to clear out this sub directory.
 				 * We'll return to cur_dir once push_dir
 				 * is empty.
 				 */
 				push_dir = xs_join(cur_path, dir[i]);
 				sbuf_delete(cur_path_sbuf);
 				cur_path_sbuf = push_dir;
 				cur_path = sbuf_data(cur_path_sbuf);
 				break;
 			} else if (error != 0) {
 				goto out;
 			}
 		}
 
 		free(dir, M_XENSTORE);
 		dir = NULL;
 
 		if (i == count) {
 			char *last_slash;
 
 			/* Directory is empty.  It is now safe to remove. */
 			error = xs_rm(xbt, cur_path, "");
 			if (error != 0)
 				goto out;
 
 			if (!strcmp(cur_path, root_path))
 				break;
 
 			/* Return to processing the parent directory. */
 			last_slash = strrchr(cur_path, '/');
 			KASSERT(last_slash != NULL,
 				("xs_rm_tree: mangled path %s", cur_path));
 			*last_slash = '\0';
 		}
 	}
 
 out:
 	sbuf_delete(cur_path_sbuf);
 	sbuf_delete(root_path_sbuf);
 	if (dir != NULL)
 		free(dir, M_XENSTORE);
 
 	if (local_xbt.id != 0) {
 		int terror;
 
 		terror = xs_transaction_end(local_xbt, /*abort*/error != 0);
 		xbt.id = 0;
 		if (terror == EAGAIN && error == 0)
 			goto retry;
 	}
 	return (error);
 }
 
 int
 xs_transaction_start(struct xs_transaction *t)
 {
 	char *id_str;
 	int error;
 
 	error = xs_single(XST_NIL, XS_TRANSACTION_START, "", NULL,
 	    (void **)&id_str);
 	if (error == 0) {
 		t->id = strtoul(id_str, NULL, 0);
 		free(id_str, M_XENSTORE);
 	}
 	return (error);
 }
 
 int
 xs_transaction_end(struct xs_transaction t, int abort)
 {
 	char abortstr[2];
 
 	if (abort)
 		strcpy(abortstr, "F");
 	else
 		strcpy(abortstr, "T");
 
 	return (xs_single(t, XS_TRANSACTION_END, abortstr, NULL, NULL));
 }
 
 int
 xs_scanf(struct xs_transaction t, const char *dir, const char *node,
      int *scancountp, const char *fmt, ...)
 {
 	va_list ap;
 	int error, ns;
 	char *val;
 
 	error = xs_read(t, dir, node, NULL, (void **) &val);
 	if (error)
 		return (error);
 
 	va_start(ap, fmt);
 	ns = vsscanf(val, fmt, ap);
 	va_end(ap);
 	free(val, M_XENSTORE);
 	/* Distinctive errno. */
 	if (ns == 0)
 		return (ERANGE);
 	if (scancountp)
 		*scancountp = ns;
 	return (0);
 }
 
 int
 xs_vprintf(struct xs_transaction t,
     const char *dir, const char *node, const char *fmt, va_list ap)
 {
 	struct sbuf *sb;
 	int error;
 
 	sb = sbuf_new_auto();
 	sbuf_vprintf(sb, fmt, ap);
 	sbuf_finish(sb);
 	error = xs_write(t, dir, node, sbuf_data(sb));
 	sbuf_delete(sb);
 
 	return (error);
 }
 
 int
 xs_printf(struct xs_transaction t, const char *dir, const char *node,
      const char *fmt, ...)
 {
 	va_list ap;
 	int error;
 
 	va_start(ap, fmt);
 	error = xs_vprintf(t, dir, node, fmt, ap);
 	va_end(ap);
 
 	return (error);
 }
 
 int
 xs_gather(struct xs_transaction t, const char *dir, ...)
 {
 	va_list ap;
 	const char *name;
 	int error;
 
 	va_start(ap, dir);
 	error = 0;
 	while (error == 0 && (name = va_arg(ap, char *)) != NULL) {
 		const char *fmt = va_arg(ap, char *);
 		void *result = va_arg(ap, void *);
 		char *p;
 
 		error = xs_read(t, dir, name, NULL, (void **) &p);
 		if (error)
 			break;
 
 		if (fmt) {
 			if (sscanf(p, fmt, result) == 0)
 				error = EINVAL;
 			free(p, M_XENSTORE);
 		} else
 			*(char **)result = p;
 	}
 	va_end(ap);
 
 	return (error);
 }
 
 int
 xs_register_watch(struct xs_watch *watch)
 {
 	/* Pointer in ascii is the token. */
 	char token[sizeof(watch) * 2 + 1];
 	int error;
 
 	sprintf(token, "%lX", (long)watch);
 
 	sx_slock(&xs.suspend_mutex);
 
 	mtx_lock(&xs.registered_watches_lock);
 	KASSERT(find_watch(token) == NULL, ("watch already registered"));
 	LIST_INSERT_HEAD(&xs.registered_watches, watch, list);
 	mtx_unlock(&xs.registered_watches_lock);
 
 	error = xs_watch(watch->node, token);
 
 	/* Ignore errors due to multiple registration. */
 	if (error == EEXIST)
 		error = 0;
 
 	if (error != 0) {
 		mtx_lock(&xs.registered_watches_lock);
 		LIST_REMOVE(watch, list);
 		mtx_unlock(&xs.registered_watches_lock);
 	}
 
 	sx_sunlock(&xs.suspend_mutex);
 
 	return (error);
 }
 
 void
 xs_unregister_watch(struct xs_watch *watch)
 {
 	struct xs_stored_msg *msg, *tmp;
 	char token[sizeof(watch) * 2 + 1];
 	int error;
 
 	sprintf(token, "%lX", (long)watch);
 
 	sx_slock(&xs.suspend_mutex);
 
 	mtx_lock(&xs.registered_watches_lock);
 	if (find_watch(token) == NULL) {
 		mtx_unlock(&xs.registered_watches_lock);
 		sx_sunlock(&xs.suspend_mutex);
 		return;
 	}
 	LIST_REMOVE(watch, list);
 	mtx_unlock(&xs.registered_watches_lock);
 
 	error = xs_unwatch(watch->node, token);
 	if (error)
 		log(LOG_WARNING, "XENSTORE Failed to release watch %s: %i\n",
 		    watch->node, error);
 
 	sx_sunlock(&xs.suspend_mutex);
 
 	/* Cancel pending watch events. */
 	mtx_lock(&xs.watch_events_lock);
 	TAILQ_FOREACH_SAFE(msg, &xs.watch_events, list, tmp) {
 		if (msg->u.watch.handle != watch)
 			continue;
 		TAILQ_REMOVE(&xs.watch_events, msg, list);
 		free(msg->u.watch.vec, M_XENSTORE);
 		free(msg, M_XENSTORE);
 	}
 	mtx_unlock(&xs.watch_events_lock);
 
 	/* Flush any currently-executing callback, unless we are it. :-) */
 	if (curproc->p_pid != xs.xenwatch_pid) {
 		sx_xlock(&xs.xenwatch_mutex);
 		sx_xunlock(&xs.xenwatch_mutex);
 	}
 }
diff --git a/sys/xen/xenstore/xenstore_dev.c b/sys/xen/xenstore/xenstore_dev.c
index 1fa419795ed1..e1b4091447ab 100644
--- a/sys/xen/xenstore/xenstore_dev.c
+++ b/sys/xen/xenstore/xenstore_dev.c
@@ -1,224 +1,224 @@
 /*
  * xenstore_dev.c
  * 
  * Driver giving user-space access to the kernel's connection to the
  * XenStore service.
  * 
  * Copyright (c) 2005, Christian Limpach
  * Copyright (c) 2005, Rusty Russell, IBM Corporation
  * 
  * This file may be distributed separately from the Linux kernel, or
  * incorporated into other software packages, subject to the following license:
  * 
  * Permission is hereby granted, free of charge, to any person obtaining a copy
  * of this source file (the "Software"), to deal in the Software without
  * restriction, including without limitation the rights to use, copy, modify,
  * merge, publish, distribute, sublicense, and/or sell copies of the Software,
  * and to permit persons to whom the Software is furnished to do so, subject to
  * the following conditions:
  * 
  * The above copyright notice and this permission notice shall be included in
  * all copies or substantial portions of the Software.
  * 
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  * IN THE SOFTWARE.
  */
 
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/types.h>
 #include <sys/cdefs.h>
 #include <sys/errno.h>
 #include <sys/uio.h>
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/proc.h>
 #include <sys/kernel.h>
 #include <sys/malloc.h>
 #include <sys/conf.h>
 
-#include <machine/xen/xen-os.h>
+#include <xen/xen-os.h>
 
 #include <xen/hypervisor.h>
 #include <xen/xenstore/xenstorevar.h>
 #include <xen/xenstore/xenstore_internal.h>
 
 struct xs_dev_transaction {
 	LIST_ENTRY(xs_dev_transaction) list;
 	struct xs_transaction handle;
 };
 
 struct xs_dev_data {
 	/* In-progress transaction. */
 	LIST_HEAD(xdd_list_head, xs_dev_transaction) transactions;
 
 	/* Partial request. */
 	unsigned int len;
 	union {
 		struct xsd_sockmsg msg;
 		char buffer[PAGE_SIZE];
 	} u;
 
 	/* Response queue. */
 #define MASK_READ_IDX(idx) ((idx)&(PAGE_SIZE-1))
 	char read_buffer[PAGE_SIZE];
 	unsigned int read_cons, read_prod;
 };
 
 static int 
 xs_dev_read(struct cdev *dev, struct uio *uio, int ioflag)
 {
 	int error;
 	struct xs_dev_data *u = dev->si_drv1;
 
 	while (u->read_prod == u->read_cons) {
 		error = tsleep(u, PCATCH, "xsdread", hz/10);
 		if (error && error != EWOULDBLOCK)
 			return (error);
 	}
 
 	while (uio->uio_resid > 0) {
 		if (u->read_cons == u->read_prod)
 			break;
 		error = uiomove(&u->read_buffer[MASK_READ_IDX(u->read_cons)],
 		    1, uio);
 		if (error)
 			return (error);
 		u->read_cons++;
 	}
 	return (0);
 }
 
 static void
 xs_queue_reply(struct xs_dev_data *u, char *data, unsigned int len)
 {
 	int i;
 
 	for (i = 0; i < len; i++, u->read_prod++)
 		u->read_buffer[MASK_READ_IDX(u->read_prod)] = data[i];
 
 	KASSERT((u->read_prod - u->read_cons) <= sizeof(u->read_buffer),
 	    ("xenstore reply too big"));
 
 	wakeup(u);
 }
 
 static int 
 xs_dev_write(struct cdev *dev, struct uio *uio, int ioflag)
 {
 	int error;
 	struct xs_dev_data *u = dev->si_drv1;
 	struct xs_dev_transaction *trans;
 	void *reply;
 	int len = uio->uio_resid;
 
 	if ((len + u->len) > sizeof(u->u.buffer))
 		return (EINVAL);
 
 	error = uiomove(u->u.buffer + u->len, len, uio);
 	if (error)
 		return (error);
 
 	u->len += len;
 	if (u->len < (sizeof(u->u.msg) + u->u.msg.len))
 		return (0);
 
 	switch (u->u.msg.type) {
 	case XS_TRANSACTION_START:
 	case XS_TRANSACTION_END:
 	case XS_DIRECTORY:
 	case XS_READ:
 	case XS_GET_PERMS:
 	case XS_RELEASE:
 	case XS_GET_DOMAIN_PATH:
 	case XS_WRITE:
 	case XS_MKDIR:
 	case XS_RM:
 	case XS_SET_PERMS:
 		error = xs_dev_request_and_reply(&u->u.msg, &reply);
 		if (!error) {
 			if (u->u.msg.type == XS_TRANSACTION_START) {
 				trans = malloc(sizeof(*trans), M_XENSTORE,
 				    M_WAITOK);
 				trans->handle.id = strtoul(reply, NULL, 0);
 				LIST_INSERT_HEAD(&u->transactions, trans, list);
 			} else if (u->u.msg.type == XS_TRANSACTION_END) {
 				LIST_FOREACH(trans, &u->transactions, list)
 					if (trans->handle.id == u->u.msg.tx_id)
 						break;
 #if 0 /* XXX does this mean the list is empty? */
 				BUG_ON(&trans->list == &u->transactions);
 #endif
 				LIST_REMOVE(trans, list);
 				free(trans, M_XENSTORE);
 			}
 			xs_queue_reply(u, (char *)&u->u.msg, sizeof(u->u.msg));
 			xs_queue_reply(u, (char *)reply, u->u.msg.len);
 			free(reply, M_XENSTORE);
 		}
 		break;
 
 	default:
 		error = EINVAL;
 		break;
 	}
 
 	if (error == 0)
 		u->len = 0;
 
 	return (error);
 }
 
 static int
 xs_dev_open(struct cdev *dev, int oflags, int devtype, struct thread *td)
 {
 	struct xs_dev_data *u;
 
 #if 0 /* XXX figure out if equiv needed */
 	nonseekable_open(inode, filp);
 #endif
 	u = malloc(sizeof(*u), M_XENSTORE, M_WAITOK|M_ZERO);
 	LIST_INIT(&u->transactions);
         dev->si_drv1 = u;
 
 	return (0);
 }
 
 static int
 xs_dev_close(struct cdev *dev, int fflag, int devtype, struct thread *td)
 {
 	struct xs_dev_data *u = dev->si_drv1;
 	struct xs_dev_transaction *trans, *tmp;
 
 	LIST_FOREACH_SAFE(trans, &u->transactions, list, tmp) {
 		xs_transaction_end(trans->handle, 1);
 		LIST_REMOVE(trans, list);
 		free(trans, M_XENSTORE);
 	}
 
 	free(u, M_XENSTORE);
 	return (0);
 }
 
 static struct cdevsw xs_dev_cdevsw = {
 	.d_version = D_VERSION,	
 	.d_read = xs_dev_read,
 	.d_write = xs_dev_write,
 	.d_open = xs_dev_open,
 	.d_close = xs_dev_close,
 	.d_name = "xs_dev",
 };
 
 void
 xs_dev_init()
 {
 	make_dev(&xs_dev_cdevsw, 0, UID_ROOT, GID_WHEEL, 0400,
 	    "xen/xenstore");
 }
diff --git a/sys/xen/xenstore/xenstorevar.h b/sys/xen/xenstore/xenstorevar.h
index 4a1382d8f746..208e5bf09b8e 100644
--- a/sys/xen/xenstore/xenstorevar.h
+++ b/sys/xen/xenstore/xenstorevar.h
@@ -1,341 +1,341 @@
 /******************************************************************************
  * xenstorevar.h
  *
  * Method declarations and structures for accessing the XenStore.h
  *
  * Copyright (C) 2005 Rusty Russell, IBM Corporation
  * Copyright (C) 2005 XenSource Ltd.
  * Copyright (C) 2009,2010 Spectra Logic Corporation
  * 
  * This file may be distributed separately from the Linux kernel, or
  * incorporated into other software packages, subject to the following license:
  * 
  * Permission is hereby granted, free of charge, to any person obtaining a copy
  * of this source file (the "Software"), to deal in the Software without
  * restriction, including without limitation the rights to use, copy, modify,
  * merge, publish, distribute, sublicense, and/or sell copies of the Software,
  * and to permit persons to whom the Software is furnished to do so, subject to
  * the following conditions:
  * 
  * The above copyright notice and this permission notice shall be included in
  * all copies or substantial portions of the Software.
  * 
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  * IN THE SOFTWARE.
  *
  * $FreeBSD$
  */
 
 #ifndef _XEN_XENSTORE_XENSTOREVAR_H
 #define _XEN_XENSTORE_XENSTOREVAR_H
 
 #include <sys/queue.h>
 #include <sys/bus.h>
 #include <sys/eventhandler.h>
 #include <sys/malloc.h>
 #include <sys/sbuf.h>
 
 #include <machine/stdarg.h>
-#include <machine/xen/xen-os.h>
 
+#include <xen/xen-os.h>
 #include <xen/interface/grant_table.h>
 #include <xen/interface/io/xenbus.h>
 #include <xen/interface/io/xs_wire.h>
 
 #include "xenbus_if.h"
 
 /* XenStore allocations including XenStore data returned to clients. */
 MALLOC_DECLARE(M_XENSTORE);
 
 struct xenstore_domain_interface;
 struct xs_watch;
 extern struct xenstore_domain_interface *xen_store;
 
 typedef	void (xs_watch_cb_t)(struct xs_watch *, const char **vec,
     unsigned int len);
 
 /* Register callback to watch subtree (node) in the XenStore. */
 struct xs_watch
 {
 	LIST_ENTRY(xs_watch) list;
 
 	/* Path being watched. */
 	char *node;
 
 	/* Callback (executed in a process context with no locks held). */
 	xs_watch_cb_t *callback;
 
 	/* Callback client data untouched by the XenStore watch mechanism. */
 	uintptr_t callback_data;
 };
 LIST_HEAD(xs_watch_list, xs_watch);
 
 typedef int (*xs_event_handler_t)(void *);
 
 struct xs_transaction
 {
 	uint32_t id;
 };
 
 #define XST_NIL ((struct xs_transaction) { 0 })
 
 /**
  * Fetch the contents of a directory in the XenStore.
  *
  * \param t       The XenStore transaction covering this request.
  * \param dir     The dirname of the path to read.
  * \param node    The basename of the path to read.
  * \param num     The returned number of directory entries.
  * \param result  An array of directory entry strings.
  *
  * \return  On success, 0. Otherwise an errno value indicating the
  *          type of failure.
  *
  * \note The results buffer is malloced and should be free'd by the
  *       caller with 'free(*result, M_XENSTORE)'.
  */
 int xs_directory(struct xs_transaction t, const char *dir,
     const char *node, unsigned int *num, const char ***result);
 
 /**
  * Determine if a path exists in the XenStore.
  *
  * \param t       The XenStore transaction covering this request.
  * \param dir     The dirname of the path to read.
  * \param node    The basename of the path to read.
  *
  * \retval 1  The path exists.
  * \retval 0  The path does not exist or an error occurred attempting
  *            to make that determination.
  */
 int xs_exists(struct xs_transaction t, const char *dir, const char *node);
 
 /**
  * Get the contents of a single "file".  Returns the contents in
  * *result which should be freed with free(*result, M_XENSTORE) after
  * use.  The length of the value in bytes is returned in *len.
  *
  * \param t       The XenStore transaction covering this request.
  * \param dir     The dirname of the file to read.
  * \param node    The basename of the file to read.
  * \param len     The amount of data read.
  * \param result  The returned contents from this file.
  *
  * \return  On success, 0. Otherwise an errno value indicating the
  *          type of failure.
  *
  * \note The results buffer is malloced and should be free'd by the
  *       caller with 'free(*result, M_XENSTORE)'.
  */
 int xs_read(struct xs_transaction t, const char *dir,
     const char *node, unsigned int *len, void **result);
 
 /**
  * Write to a single file.
  *
  * \param t       The XenStore transaction covering this request.
  * \param dir     The dirname of the file to write.
  * \param node    The basename of the file to write.
  * \param string  The NUL terminated string of data to write.
  *
  * \return  On success, 0. Otherwise an errno value indicating the
  *          type of failure.
  */
 int xs_write(struct xs_transaction t, const char *dir,
     const char *node, const char *string);
 
 /**
  * Create a new directory.
  *
  * \param t       The XenStore transaction covering this request.
  * \param dir     The dirname of the directory to create.
  * \param node    The basename of the directory to create.
  *
  * \return  On success, 0. Otherwise an errno value indicating the
  *          type of failure.
  */
 int xs_mkdir(struct xs_transaction t, const char *dir,
     const char *node);
 
 /**
  * Remove a file or directory (directories must be empty).
  *
  * \param t       The XenStore transaction covering this request.
  * \param dir     The dirname of the directory to remove.
  * \param node    The basename of the directory to remove.
  *
  * \return  On success, 0. Otherwise an errno value indicating the
  *          type of failure.
  */
 int xs_rm(struct xs_transaction t, const char *dir, const char *node);
 
 /**
  * Destroy a tree of files rooted at dir/node.
  *
  * \param t       The XenStore transaction covering this request.
  * \param dir     The dirname of the directory to remove.
  * \param node    The basename of the directory to remove.
  *
  * \return  On success, 0. Otherwise an errno value indicating the
  *          type of failure.
  */
 int xs_rm_tree(struct xs_transaction t, const char *dir,
     const char *node);
 
 /**
  * Start a transaction.
  *
  * Changes by others will not be seen during the lifetime of this
  * transaction, and changes will not be visible to others until it
  * is committed (xs_transaction_end).
  *
  * \param t  The returned transaction.
  *
  * \return  On success, 0. Otherwise an errno value indicating the
  *          type of failure.
  */
 int xs_transaction_start(struct xs_transaction *t);
 
 /**
  * End a transaction.
  *
  * \param t      The transaction to end/commit.
  * \param abort  If non-zero, the transaction is discarded
  * 		 instead of committed.
  *
  * \return  On success, 0. Otherwise an errno value indicating the
  *          type of failure.
  */
 int xs_transaction_end(struct xs_transaction t, int abort);
 
 /*
  * Single file read and scanf parsing of the result.
  *
  * \param t           The XenStore transaction covering this request.
  * \param dir         The dirname of the path to read.
  * \param node        The basename of the path to read.
  * \param scancountp  The number of input values assigned (i.e. the result
  *      	      of scanf).
  * \param fmt         Scanf format string followed by a variable number of
  *                    scanf input arguments.
  *
  * \return  On success, 0. Otherwise an errno value indicating the
  *          type of failure.
  */
 int xs_scanf(struct xs_transaction t,
     const char *dir, const char *node, int *scancountp, const char *fmt, ...)
     __attribute__((format(scanf, 5, 6)));
 
 /**
  * Printf formatted write to a XenStore file.
  *
  * \param t     The XenStore transaction covering this request.
  * \param dir   The dirname of the path to read.
  * \param node  The basename of the path to read.
  * \param fmt   Printf format string followed by a variable number of
  *              printf arguments.
  *
  * \return  On success, 0. Otherwise an errno value indicating the
  *          type of write failure.
  */
 int xs_printf(struct xs_transaction t, const char *dir,
     const char *node, const char *fmt, ...)
     __attribute__((format(printf, 4, 5)));
 
 /**
  * va_list version of xenbus_printf().
  *
  * \param t     The XenStore transaction covering this request.
  * \param dir   The dirname of the path to read.
  * \param node  The basename of the path to read.
  * \param fmt   Printf format string.
  * \param ap    Va_list of printf arguments.
  *
  * \return  On success, 0. Otherwise an errno value indicating the
  *          type of write failure.
  */
 int xs_vprintf(struct xs_transaction t, const char *dir,
     const char *node, const char *fmt, va_list ap);
 
 /**
  * Multi-file read within a single directory and scanf parsing of
  * the results.
  *
  * \param t    The XenStore transaction covering this request.
  * \param dir  The dirname of the paths to read.
  * \param ...  A variable number of argument triples specifying
  *             the file name, scanf-style format string, and
  *             output variable (pointer to storage of the results).
  *             The last triple in the call must be terminated
  *             will a final NULL argument.  A NULL format string
  *             will cause the entire contents of the given file
  *             to be assigned as a NUL terminated, M_XENSTORE heap
  *             backed, string to the output parameter of that tuple.
  *
  * \return  On success, 0. Otherwise an errno value indicating the
  *          type of read failure.
  *
  * Example:
  *         char protocol_abi[64];
  *         uint32_t ring_ref;
  *         char *dev_type;
  *         int error;
  *
  *         error = xenbus_gather(XBT_NIL, xenbus_get_node(dev),
  *             "ring-ref", "%" PRIu32, &ring_ref,
  *             "protocol", "%63s", protocol_abi,
  *             "device-type", NULL, &dev_type,
  *             NULL);
  *
  *         ...
  *
  *         free(dev_type, M_XENSTORE);
  */
 int xs_gather(struct xs_transaction t, const char *dir, ...);
 
 /**
  * Register a XenStore watch.
  *
  * XenStore watches allow a client to be notified via a callback (embedded
  * within the watch object) of changes to an object in the XenStore.
  *
  * \param watch  An xs_watch struct with it's node and callback fields
  *               properly initialized.
  *
  * \return  On success, 0. Otherwise an errno value indicating the
  *          type of write failure.  EEXIST errors from the XenStore
  *          are supressed, allowing multiple, physically different,
  *          xenbus_watch objects, to watch the same path in the XenStore.
  */
 int xs_register_watch(struct xs_watch *watch);
  
 /**
  * Unregister a XenStore watch.
  *
  * \param watch  An xs_watch object previously used in a successful call
  *		 to xs_register_watch().
  *
  * The xs_watch object's node field is not altered by this call.
  * It is the caller's responsibility to properly dispose of both the
  * watch object and the data pointed to by watch->node.
  */
 void xs_unregister_watch(struct xs_watch *watch);
 
 /**
  * Allocate and return an sbuf containing the XenStore path string
  * <dir>/<name>.  If name is the NUL string, the returned sbuf contains
  * the path string <dir>.
  *
  * \param dir	The NUL terminated directory prefix for new path.
  * \param name  The NUL terminated basename for the new path.
  *
  * \return  A buffer containing the joined path.
  */
 struct sbuf *xs_join(const char *, const char *);
 
 #endif /* _XEN_XENSTORE_XENSTOREVAR_H */