diff --git a/sys/amd64/include/vmm.h b/sys/amd64/include/vmm.h
index 8b6933a2c1ef..4dfb4fe1fe8d 100644
--- a/sys/amd64/include/vmm.h
+++ b/sys/amd64/include/vmm.h
@@ -1,366 +1,365 @@
 /*-
  * Copyright (c) 2011 NetApp, Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef _VMM_H_
 #define	_VMM_H_
 
 #ifdef _KERNEL
 
 #define	VM_MAX_NAMELEN	32
 
 struct vm;
 struct vm_memory_segment;
 struct seg_desc;
 struct vm_exit;
 struct vm_run;
 struct vhpet;
 struct vioapic;
 struct vlapic;
 struct vmspace;
 struct vm_object;
 struct pmap;
 
 enum x2apic_state;
 
 typedef int	(*vmm_init_func_t)(int ipinum);
 typedef int	(*vmm_cleanup_func_t)(void);
 typedef void	(*vmm_resume_func_t)(void);
 typedef void *	(*vmi_init_func_t)(struct vm *vm, struct pmap *pmap);
 typedef int	(*vmi_run_func_t)(void *vmi, int vcpu, register_t rip,
 				  struct pmap *pmap, void *rendezvous_cookie);
 typedef void	(*vmi_cleanup_func_t)(void *vmi);
 typedef int	(*vmi_get_register_t)(void *vmi, int vcpu, int num,
 				      uint64_t *retval);
 typedef int	(*vmi_set_register_t)(void *vmi, int vcpu, int num,
 				      uint64_t val);
 typedef int	(*vmi_get_desc_t)(void *vmi, int vcpu, int num,
 				  struct seg_desc *desc);
 typedef int	(*vmi_set_desc_t)(void *vmi, int vcpu, int num,
 				  struct seg_desc *desc);
 typedef int	(*vmi_inject_event_t)(void *vmi, int vcpu,
 				      int type, int vector,
 				      uint32_t code, int code_valid);
 typedef int	(*vmi_get_cap_t)(void *vmi, int vcpu, int num, int *retval);
 typedef int	(*vmi_set_cap_t)(void *vmi, int vcpu, int num, int val);
 typedef struct vmspace * (*vmi_vmspace_alloc)(vm_offset_t min, vm_offset_t max);
 typedef void	(*vmi_vmspace_free)(struct vmspace *vmspace);
 typedef struct vlapic * (*vmi_vlapic_init)(void *vmi, int vcpu);
 typedef void	(*vmi_vlapic_cleanup)(void *vmi, struct vlapic *vlapic);
 
 struct vmm_ops {
 	vmm_init_func_t		init;		/* module wide initialization */
 	vmm_cleanup_func_t	cleanup;
 	vmm_resume_func_t	resume;
 
 	vmi_init_func_t		vminit;		/* vm-specific initialization */
 	vmi_run_func_t		vmrun;
 	vmi_cleanup_func_t	vmcleanup;
 	vmi_get_register_t	vmgetreg;
 	vmi_set_register_t	vmsetreg;
 	vmi_get_desc_t		vmgetdesc;
 	vmi_set_desc_t		vmsetdesc;
 	vmi_inject_event_t	vminject;
 	vmi_get_cap_t		vmgetcap;
 	vmi_set_cap_t		vmsetcap;
 	vmi_vmspace_alloc	vmspace_alloc;
 	vmi_vmspace_free	vmspace_free;
 	vmi_vlapic_init		vlapic_init;
 	vmi_vlapic_cleanup	vlapic_cleanup;
 };
 
 extern struct vmm_ops vmm_ops_intel;
 extern struct vmm_ops vmm_ops_amd;
 
 int vm_create(const char *name, struct vm **retvm);
 void vm_destroy(struct vm *vm);
 const char *vm_name(struct vm *vm);
 int vm_malloc(struct vm *vm, vm_paddr_t gpa, size_t len);
 int vm_map_mmio(struct vm *vm, vm_paddr_t gpa, size_t len, vm_paddr_t hpa);
 int vm_unmap_mmio(struct vm *vm, vm_paddr_t gpa, size_t len);
 void *vm_gpa_hold(struct vm *, vm_paddr_t gpa, size_t len, int prot,
 		  void **cookie);
 void vm_gpa_release(void *cookie);
 int vm_gpabase2memseg(struct vm *vm, vm_paddr_t gpabase,
 	      struct vm_memory_segment *seg);
 int vm_get_memobj(struct vm *vm, vm_paddr_t gpa, size_t len,
 		  vm_offset_t *offset, struct vm_object **object);
 boolean_t vm_mem_allocated(struct vm *vm, vm_paddr_t gpa);
 int vm_get_register(struct vm *vm, int vcpu, int reg, uint64_t *retval);
 int vm_set_register(struct vm *vm, int vcpu, int reg, uint64_t val);
 int vm_get_seg_desc(struct vm *vm, int vcpu, int reg,
 		    struct seg_desc *ret_desc);
 int vm_set_seg_desc(struct vm *vm, int vcpu, int reg,
 		    struct seg_desc *desc);
 int vm_run(struct vm *vm, struct vm_run *vmrun);
 int vm_inject_event(struct vm *vm, int vcpu, int type,
 		    int vector, uint32_t error_code, int error_code_valid);
 int vm_inject_nmi(struct vm *vm, int vcpu);
 int vm_nmi_pending(struct vm *vm, int vcpuid);
 void vm_nmi_clear(struct vm *vm, int vcpuid);
 uint64_t *vm_guest_msrs(struct vm *vm, int cpu);
 struct vlapic *vm_lapic(struct vm *vm, int cpu);
 struct vioapic *vm_ioapic(struct vm *vm);
 struct vhpet *vm_hpet(struct vm *vm);
 int vm_get_capability(struct vm *vm, int vcpu, int type, int *val);
 int vm_set_capability(struct vm *vm, int vcpu, int type, int val);
 int vm_get_x2apic_state(struct vm *vm, int vcpu, enum x2apic_state *state);
 int vm_set_x2apic_state(struct vm *vm, int vcpu, enum x2apic_state state);
 int vm_apicid2vcpuid(struct vm *vm, int apicid);
 void vm_activate_cpu(struct vm *vm, int vcpu);
 cpuset_t vm_active_cpus(struct vm *vm);
 struct vm_exit *vm_exitinfo(struct vm *vm, int vcpuid);
 
 /*
  * Rendezvous all vcpus specified in 'dest' and execute 'func(arg)'.
  * The rendezvous 'func(arg)' is not allowed to do anything that will
  * cause the thread to be put to sleep.
  *
  * If the rendezvous is being initiated from a vcpu context then the
  * 'vcpuid' must refer to that vcpu, otherwise it should be set to -1.
  *
  * The caller cannot hold any locks when initiating the rendezvous.
  *
  * The implementation of this API may cause vcpus other than those specified
  * by 'dest' to be stalled. The caller should not rely on any vcpus making
  * forward progress when the rendezvous is in progress.
  */
 typedef void (*vm_rendezvous_func_t)(struct vm *vm, int vcpuid, void *arg);
 void vm_smp_rendezvous(struct vm *vm, int vcpuid, cpuset_t dest,
     vm_rendezvous_func_t func, void *arg);
 
 static __inline int
 vcpu_rendezvous_pending(void *rendezvous_cookie)
 {
 
 	return (*(uintptr_t *)rendezvous_cookie != 0);
 }
 
 /*
  * Return 1 if device indicated by bus/slot/func is supposed to be a
  * pci passthrough device.
  *
  * Return 0 otherwise.
  */
 int vmm_is_pptdev(int bus, int slot, int func);
 
 void *vm_iommu_domain(struct vm *vm);
 
 enum vcpu_state {
 	VCPU_IDLE,
 	VCPU_FROZEN,
 	VCPU_RUNNING,
 	VCPU_SLEEPING,
 };
 
 int vcpu_set_state(struct vm *vm, int vcpu, enum vcpu_state state,
     bool from_idle);
 enum vcpu_state vcpu_get_state(struct vm *vm, int vcpu, int *hostcpu);
 
 static int __inline
 vcpu_is_running(struct vm *vm, int vcpu, int *hostcpu)
 {
 	return (vcpu_get_state(vm, vcpu, hostcpu) == VCPU_RUNNING);
 }
 
 void *vcpu_stats(struct vm *vm, int vcpu);
 void vcpu_notify_event(struct vm *vm, int vcpuid, bool lapic_intr);
 struct vmspace *vm_get_vmspace(struct vm *vm);
 int vm_assign_pptdev(struct vm *vm, int bus, int slot, int func);
 int vm_unassign_pptdev(struct vm *vm, int bus, int slot, int func);
 #endif	/* KERNEL */
 
 #include <machine/vmm_instruction_emul.h>
 
 #define	VM_MAXCPU	16			/* maximum virtual cpus */
 
 /*
  * Identifiers for events that can be injected into the VM
  */
 enum vm_event_type {
 	VM_EVENT_NONE,
 	VM_HW_INTR,
 	VM_NMI,
 	VM_HW_EXCEPTION,
 	VM_SW_INTR,
 	VM_PRIV_SW_EXCEPTION,
 	VM_SW_EXCEPTION,
 	VM_EVENT_MAX
 };
 
 /*
  * Identifiers for architecturally defined registers.
  */
 enum vm_reg_name {
 	VM_REG_GUEST_RAX,
 	VM_REG_GUEST_RBX,
 	VM_REG_GUEST_RCX,
 	VM_REG_GUEST_RDX,
 	VM_REG_GUEST_RSI,
 	VM_REG_GUEST_RDI,
 	VM_REG_GUEST_RBP,
 	VM_REG_GUEST_R8,
 	VM_REG_GUEST_R9,
 	VM_REG_GUEST_R10,
 	VM_REG_GUEST_R11,
 	VM_REG_GUEST_R12,
 	VM_REG_GUEST_R13,
 	VM_REG_GUEST_R14,
 	VM_REG_GUEST_R15,
 	VM_REG_GUEST_CR0,
 	VM_REG_GUEST_CR3,
 	VM_REG_GUEST_CR4,
 	VM_REG_GUEST_DR7,
 	VM_REG_GUEST_RSP,
 	VM_REG_GUEST_RIP,
 	VM_REG_GUEST_RFLAGS,
 	VM_REG_GUEST_ES,
 	VM_REG_GUEST_CS,
 	VM_REG_GUEST_SS,
 	VM_REG_GUEST_DS,
 	VM_REG_GUEST_FS,
 	VM_REG_GUEST_GS,
 	VM_REG_GUEST_LDTR,
 	VM_REG_GUEST_TR,
 	VM_REG_GUEST_IDTR,
 	VM_REG_GUEST_GDTR,
 	VM_REG_GUEST_EFER,
 	VM_REG_LAST
 };
 
 /*
  * Identifiers for optional vmm capabilities
  */
 enum vm_cap_type {
 	VM_CAP_HALT_EXIT,
 	VM_CAP_MTRAP_EXIT,
 	VM_CAP_PAUSE_EXIT,
 	VM_CAP_UNRESTRICTED_GUEST,
 	VM_CAP_ENABLE_INVPCID,
 	VM_CAP_MAX
 };
 
 enum x2apic_state {
-	X2APIC_ENABLED,
-	X2APIC_AVAILABLE,
 	X2APIC_DISABLED,
+	X2APIC_ENABLED,
 	X2APIC_STATE_LAST
 };
 
 /*
  * The 'access' field has the format specified in Table 21-2 of the Intel
  * Architecture Manual vol 3b.
  *
  * XXX The contents of the 'access' field are architecturally defined except
  * bit 16 - Segment Unusable.
  */
 struct seg_desc {
 	uint64_t	base;
 	uint32_t	limit;
 	uint32_t	access;
 };
 
 enum vm_exitcode {
 	VM_EXITCODE_INOUT,
 	VM_EXITCODE_VMX,
 	VM_EXITCODE_BOGUS,
 	VM_EXITCODE_RDMSR,
 	VM_EXITCODE_WRMSR,
 	VM_EXITCODE_HLT,
 	VM_EXITCODE_MTRAP,
 	VM_EXITCODE_PAUSE,
 	VM_EXITCODE_PAGING,
 	VM_EXITCODE_INST_EMUL,
 	VM_EXITCODE_SPINUP_AP,
 	VM_EXITCODE_SPINDOWN_CPU,
 	VM_EXITCODE_RENDEZVOUS,
 	VM_EXITCODE_IOAPIC_EOI,
 	VM_EXITCODE_MAX
 };
 
 struct vm_exit {
 	enum vm_exitcode	exitcode;
 	int			inst_length;	/* 0 means unknown */
 	uint64_t		rip;
 	union {
 		struct {
 			uint16_t	bytes:3;	/* 1 or 2 or 4 */
 			uint16_t	in:1;		/* out is 0, in is 1 */
 			uint16_t	string:1;
 			uint16_t	rep:1;
 			uint16_t	port;
 			uint32_t	eax;		/* valid for out */
 		} inout;
 		struct {
 			uint64_t	gpa;
 			int		fault_type;
 		} paging;
 		struct {
 			uint64_t	gpa;
 			uint64_t	gla;
 			uint64_t	cr3;
 			enum vie_cpu_mode cpu_mode;
 			enum vie_paging_mode paging_mode;
 			struct vie	vie;
 		} inst_emul;
 		/*
 		 * VMX specific payload. Used when there is no "better"
 		 * exitcode to represent the VM-exit.
 		 */
 		struct {
 			int		status;		/* vmx inst status */
 			/*
 			 * 'exit_reason' and 'exit_qualification' are valid
 			 * only if 'status' is zero.
 			 */
 			uint32_t	exit_reason;
 			uint64_t	exit_qualification;
 			/*
 			 * 'inst_error' and 'inst_type' are valid
 			 * only if 'status' is non-zero.
 			 */
 			int		inst_type;
 			int		inst_error;
 		} vmx;
 		struct {
 			uint32_t	code;		/* ecx value */
 			uint64_t	wval;
 		} msr;
 		struct {
 			int		vcpu;
 			uint64_t	rip;
 		} spinup_ap;
 		struct {
 			uint64_t	rflags;
 		} hlt;
 		struct {
 			int		vector;
 		} ioapic_eoi;
 	} u;
 };
 
 #endif	/* _VMM_H_ */
diff --git a/sys/amd64/vmm/io/vlapic.c b/sys/amd64/vmm/io/vlapic.c
index f855f730c49c..d1f72341a909 100644
--- a/sys/amd64/vmm/io/vlapic.c
+++ b/sys/amd64/vmm/io/vlapic.c
@@ -1,1544 +1,1577 @@
 /*-
  * Copyright (c) 2011 NetApp, Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/lock.h>
 #include <sys/kernel.h>
 #include <sys/malloc.h>
 #include <sys/mutex.h>
 #include <sys/systm.h>
 #include <sys/smp.h>
 
 #include <x86/specialreg.h>
 #include <x86/apicreg.h>
 
 #include <machine/clock.h>
 #include <machine/smp.h>
 
 #include <machine/vmm.h>
 
 #include "vmm_ipi.h"
 #include "vmm_lapic.h"
 #include "vmm_ktr.h"
 #include "vmm_stat.h"
 
 #include "vlapic.h"
 #include "vlapic_priv.h"
 #include "vioapic.h"
 
 #define	PRIO(x)			((x) >> 4)
 
 #define VLAPIC_VERSION		(16)
 
 #define	x2apic(vlapic)	(((vlapic)->msr_apicbase & APICBASE_X2APIC) ? 1 : 0)
 
 /*
  * The 'vlapic->timer_mtx' is used to provide mutual exclusion between the
  * vlapic_callout_handler() and vcpu accesses to:
  * - timer_freq_bt, timer_period_bt, timer_fire_bt
  * - timer LVT register
  */
 #define	VLAPIC_TIMER_LOCK(vlapic)	mtx_lock_spin(&((vlapic)->timer_mtx))
 #define	VLAPIC_TIMER_UNLOCK(vlapic)	mtx_unlock_spin(&((vlapic)->timer_mtx))
 #define	VLAPIC_TIMER_LOCKED(vlapic)	mtx_owned(&((vlapic)->timer_mtx))
 
 #define VLAPIC_BUS_FREQ	tsc_freq
 
 static __inline uint32_t
 vlapic_get_id(struct vlapic *vlapic)
 {
 
 	if (x2apic(vlapic))
 		return (vlapic->vcpuid);
 	else
 		return (vlapic->vcpuid << 24);
 }
 
 static uint32_t
 x2apic_ldr(struct vlapic *vlapic)
 {
 	int apicid;
 	uint32_t ldr;
 
 	apicid = vlapic_get_id(vlapic);
 	ldr = 1 << (apicid & 0xf);
 	ldr |= (apicid & 0xffff0) << 12;
 	return (ldr);
 }
 
 void
 vlapic_dfr_write_handler(struct vlapic *vlapic)
 {
 	struct LAPIC *lapic;
 
 	lapic = vlapic->apic_page;
 	if (x2apic(vlapic)) {
 		VM_CTR1(vlapic->vm, "ignoring write to DFR in x2apic mode: %#x",
 		    lapic->dfr);
 		lapic->dfr = 0;
 		return;
 	}
 
 	lapic->dfr &= APIC_DFR_MODEL_MASK;
 	lapic->dfr |= APIC_DFR_RESERVED;
 
 	if ((lapic->dfr & APIC_DFR_MODEL_MASK) == APIC_DFR_MODEL_FLAT)
 		VLAPIC_CTR0(vlapic, "vlapic DFR in Flat Model");
 	else if ((lapic->dfr & APIC_DFR_MODEL_MASK) == APIC_DFR_MODEL_CLUSTER)
 		VLAPIC_CTR0(vlapic, "vlapic DFR in Cluster Model");
 	else
 		VLAPIC_CTR1(vlapic, "DFR in Unknown Model %#x", lapic->dfr);
 }
 
 void
 vlapic_ldr_write_handler(struct vlapic *vlapic)
 {
 	struct LAPIC *lapic;
 
 	lapic = vlapic->apic_page;
 
 	/* LDR is read-only in x2apic mode */
 	if (x2apic(vlapic)) {
 		VLAPIC_CTR1(vlapic, "ignoring write to LDR in x2apic mode: %#x",
 		    lapic->ldr);
 		lapic->ldr = x2apic_ldr(vlapic);
 	} else {
 		lapic->ldr &= ~APIC_LDR_RESERVED;
 		VLAPIC_CTR1(vlapic, "vlapic LDR set to %#x", lapic->ldr);
 	}
 }
 
 void
 vlapic_id_write_handler(struct vlapic *vlapic)
 {
 	struct LAPIC *lapic;
 	
 	/*
 	 * We don't allow the ID register to be modified so reset it back to
 	 * its default value.
 	 */
 	lapic = vlapic->apic_page;
 	lapic->id = vlapic_get_id(vlapic);
 }
 
 static int
 vlapic_timer_divisor(uint32_t dcr)
 {
 	switch (dcr & 0xB) {
 	case APIC_TDCR_1:
 		return (1);
 	case APIC_TDCR_2:
 		return (2);
 	case APIC_TDCR_4:
 		return (4);
 	case APIC_TDCR_8:
 		return (8);
 	case APIC_TDCR_16:
 		return (16);
 	case APIC_TDCR_32:
 		return (32);
 	case APIC_TDCR_64:
 		return (64);
 	case APIC_TDCR_128:
 		return (128);
 	default:
 		panic("vlapic_timer_divisor: invalid dcr 0x%08x", dcr);
 	}
 }
 
 #if 0
 static inline void
 vlapic_dump_lvt(uint32_t offset, uint32_t *lvt)
 {
 	printf("Offset %x: lvt %08x (V:%02x DS:%x M:%x)\n", offset,
 	    *lvt, *lvt & APIC_LVTT_VECTOR, *lvt & APIC_LVTT_DS,
 	    *lvt & APIC_LVTT_M);
 }
 #endif
 
 static uint32_t
 vlapic_get_ccr(struct vlapic *vlapic)
 {
 	struct bintime bt_now, bt_rem;
 	struct LAPIC *lapic;
 	uint32_t ccr;
 	
 	ccr = 0;
 	lapic = vlapic->apic_page;
 
 	VLAPIC_TIMER_LOCK(vlapic);
 	if (callout_active(&vlapic->callout)) {
 		/*
 		 * If the timer is scheduled to expire in the future then
 		 * compute the value of 'ccr' based on the remaining time.
 		 */
 		binuptime(&bt_now);
 		if (bintime_cmp(&vlapic->timer_fire_bt, &bt_now, >)) {
 			bt_rem = vlapic->timer_fire_bt;
 			bintime_sub(&bt_rem, &bt_now);
 			ccr += bt_rem.sec * BT2FREQ(&vlapic->timer_freq_bt);
 			ccr += bt_rem.frac / vlapic->timer_freq_bt.frac;
 		}
 	}
 	KASSERT(ccr <= lapic->icr_timer, ("vlapic_get_ccr: invalid ccr %#x, "
 	    "icr_timer is %#x", ccr, lapic->icr_timer));
 	VLAPIC_CTR2(vlapic, "vlapic ccr_timer = %#x, icr_timer = %#x",
 	    ccr, lapic->icr_timer);
 	VLAPIC_TIMER_UNLOCK(vlapic);
 	return (ccr);
 }
 
 void
 vlapic_dcr_write_handler(struct vlapic *vlapic)
 {
 	struct LAPIC *lapic;
 	int divisor;
 	
 	lapic = vlapic->apic_page;
 	VLAPIC_TIMER_LOCK(vlapic);
 
 	divisor = vlapic_timer_divisor(lapic->dcr_timer);
 	VLAPIC_CTR2(vlapic, "vlapic dcr_timer=%#x, divisor=%d",
 	    lapic->dcr_timer, divisor);
 
 	/*
 	 * Update the timer frequency and the timer period.
 	 *
 	 * XXX changes to the frequency divider will not take effect until
 	 * the timer is reloaded.
 	 */
 	FREQ2BT(VLAPIC_BUS_FREQ / divisor, &vlapic->timer_freq_bt);
 	vlapic->timer_period_bt = vlapic->timer_freq_bt;
 	bintime_mul(&vlapic->timer_period_bt, lapic->icr_timer);
 
 	VLAPIC_TIMER_UNLOCK(vlapic);
 }
 
 void
 vlapic_esr_write_handler(struct vlapic *vlapic)
 {
 	struct LAPIC *lapic;
 	
 	lapic = vlapic->apic_page;
 	lapic->esr = vlapic->esr_pending;
 	vlapic->esr_pending = 0;
 }
 
 int
 vlapic_set_intr_ready(struct vlapic *vlapic, int vector, bool level)
 {
 	struct LAPIC *lapic;
 	uint32_t *irrptr, *tmrptr, mask;
 	int idx;
 
 	KASSERT(vector >= 0 && vector < 256, ("invalid vector %d", vector));
 
 	lapic = vlapic->apic_page;
 	if (!(lapic->svr & APIC_SVR_ENABLE)) {
 		VLAPIC_CTR1(vlapic, "vlapic is software disabled, ignoring "
 		    "interrupt %d", vector);
 		return (0);
 	}
 
 	if (vector < 16) {
 		vlapic_set_error(vlapic, APIC_ESR_RECEIVE_ILLEGAL_VECTOR);
 		VLAPIC_CTR1(vlapic, "vlapic ignoring interrupt to vector %d",
 		    vector);
 		return (1);
 	}
 
 	if (vlapic->ops.set_intr_ready)
 		return ((*vlapic->ops.set_intr_ready)(vlapic, vector, level));
 
 	idx = (vector / 32) * 4;
 	mask = 1 << (vector % 32);
 
 	irrptr = &lapic->irr0;
 	atomic_set_int(&irrptr[idx], mask);
 
 	/*
 	 * Verify that the trigger-mode of the interrupt matches with
 	 * the vlapic TMR registers.
 	 */
 	tmrptr = &lapic->tmr0;
 	if ((tmrptr[idx] & mask) != (level ? mask : 0)) {
 		VLAPIC_CTR3(vlapic, "vlapic TMR[%d] is 0x%08x but "
 		    "interrupt is %s-triggered", idx / 4, tmrptr[idx],
 		    level ? "level" : "edge");
 	}
 
 	VLAPIC_CTR_IRR(vlapic, "vlapic_set_intr_ready");
 	return (1);
 }
 
 static __inline uint32_t *
 vlapic_get_lvtptr(struct vlapic *vlapic, uint32_t offset)
 {
 	struct LAPIC	*lapic = vlapic->apic_page;
 	int 		 i;
 
 	switch (offset) {
 	case APIC_OFFSET_CMCI_LVT:
 		return (&lapic->lvt_cmci);
 	case APIC_OFFSET_TIMER_LVT ... APIC_OFFSET_ERROR_LVT:
 		i = (offset - APIC_OFFSET_TIMER_LVT) >> 2;
 		return ((&lapic->lvt_timer) + i);;
 	default:
 		panic("vlapic_get_lvt: invalid LVT\n");
 	}
 }
 
 static __inline int
 lvt_off_to_idx(uint32_t offset)
 {
 	int index;
 
 	switch (offset) {
 	case APIC_OFFSET_CMCI_LVT:
 		index = APIC_LVT_CMCI;
 		break;
 	case APIC_OFFSET_TIMER_LVT:
 		index = APIC_LVT_TIMER;
 		break;
 	case APIC_OFFSET_THERM_LVT:
 		index = APIC_LVT_THERMAL;
 		break;
 	case APIC_OFFSET_PERF_LVT:
 		index = APIC_LVT_PMC;
 		break;
 	case APIC_OFFSET_LINT0_LVT:
 		index = APIC_LVT_LINT0;
 		break;
 	case APIC_OFFSET_LINT1_LVT:
 		index = APIC_LVT_LINT1;
 		break;
 	case APIC_OFFSET_ERROR_LVT:
 		index = APIC_LVT_ERROR;
 		break;
 	default:
 		index = -1;
 		break;
 	}
 	KASSERT(index >= 0 && index <= VLAPIC_MAXLVT_INDEX, ("lvt_off_to_idx: "
 	    "invalid lvt index %d for offset %#x", index, offset));
 
 	return (index);
 }
 
 static __inline uint32_t
 vlapic_get_lvt(struct vlapic *vlapic, uint32_t offset)
 {
 	int idx;
 	uint32_t val;
 
 	idx = lvt_off_to_idx(offset);
 	val = atomic_load_acq_32(&vlapic->lvt_last[idx]);
 	return (val);
 }
 
 void
 vlapic_lvt_write_handler(struct vlapic *vlapic, uint32_t offset)
 {
 	uint32_t *lvtptr, mask, val;
 	struct LAPIC *lapic;
 	int idx;
 	
 	lapic = vlapic->apic_page;
 	lvtptr = vlapic_get_lvtptr(vlapic, offset);	
 	val = *lvtptr;
 	idx = lvt_off_to_idx(offset);
 
 	if (!(lapic->svr & APIC_SVR_ENABLE))
 		val |= APIC_LVT_M;
 	mask = APIC_LVT_M | APIC_LVT_DS | APIC_LVT_VECTOR;
 	switch (offset) {
 	case APIC_OFFSET_TIMER_LVT:
 		mask |= APIC_LVTT_TM;
 		break;
 	case APIC_OFFSET_ERROR_LVT:
 		break;
 	case APIC_OFFSET_LINT0_LVT:
 	case APIC_OFFSET_LINT1_LVT:
 		mask |= APIC_LVT_TM | APIC_LVT_RIRR | APIC_LVT_IIPP;
 		/* FALLTHROUGH */
 	default:
 		mask |= APIC_LVT_DM;
 		break;
 	}
 	val &= mask;
 	*lvtptr = val;
 	atomic_store_rel_32(&vlapic->lvt_last[idx], val);
 }
 
 static void
 vlapic_mask_lvts(struct vlapic *vlapic)
 {
 	struct LAPIC *lapic = vlapic->apic_page;
 
 	lapic->lvt_cmci |= APIC_LVT_M;
 	vlapic_lvt_write_handler(vlapic, APIC_OFFSET_CMCI_LVT);
 
 	lapic->lvt_timer |= APIC_LVT_M;
 	vlapic_lvt_write_handler(vlapic, APIC_OFFSET_TIMER_LVT);
 
 	lapic->lvt_thermal |= APIC_LVT_M;
 	vlapic_lvt_write_handler(vlapic, APIC_OFFSET_THERM_LVT);
 
 	lapic->lvt_pcint |= APIC_LVT_M;
 	vlapic_lvt_write_handler(vlapic, APIC_OFFSET_PERF_LVT);
 
 	lapic->lvt_lint0 |= APIC_LVT_M;
 	vlapic_lvt_write_handler(vlapic, APIC_OFFSET_LINT0_LVT);
 
 	lapic->lvt_lint1 |= APIC_LVT_M;
 	vlapic_lvt_write_handler(vlapic, APIC_OFFSET_LINT1_LVT);
 
 	lapic->lvt_error |= APIC_LVT_M;
 	vlapic_lvt_write_handler(vlapic, APIC_OFFSET_ERROR_LVT);
 }
 
 static int
 vlapic_fire_lvt(struct vlapic *vlapic, uint32_t lvt)
 {
 	uint32_t vec, mode;
 
 	if (lvt & APIC_LVT_M)
 		return (0);
 
 	vec = lvt & APIC_LVT_VECTOR;
 	mode = lvt & APIC_LVT_DM;
 
 	switch (mode) {
 	case APIC_LVT_DM_FIXED:
 		if (vec < 16) {
 			vlapic_set_error(vlapic, APIC_ESR_SEND_ILLEGAL_VECTOR);
 			return (0);
 		}
 		if (vlapic_set_intr_ready(vlapic, vec, false))
 			vcpu_notify_event(vlapic->vm, vlapic->vcpuid, true);
 		break;
 	case APIC_LVT_DM_NMI:
 		vm_inject_nmi(vlapic->vm, vlapic->vcpuid);
 		break;
 	default:
 		// Other modes ignored
 		return (0);
 	}
 	return (1);
 }
 
 #if 1
 static void
 dump_isrvec_stk(struct vlapic *vlapic)
 {
 	int i;
 	uint32_t *isrptr;
 
 	isrptr = &vlapic->apic_page->isr0;
 	for (i = 0; i < 8; i++)
 		printf("ISR%d 0x%08x\n", i, isrptr[i * 4]);
 
 	for (i = 0; i <= vlapic->isrvec_stk_top; i++)
 		printf("isrvec_stk[%d] = %d\n", i, vlapic->isrvec_stk[i]);
 }
 #endif
 
 /*
  * Algorithm adopted from section "Interrupt, Task and Processor Priority"
  * in Intel Architecture Manual Vol 3a.
  */
 static void
 vlapic_update_ppr(struct vlapic *vlapic)
 {
 	int isrvec, tpr, ppr;
 
 	/*
 	 * Note that the value on the stack at index 0 is always 0.
 	 *
 	 * This is a placeholder for the value of ISRV when none of the
 	 * bits is set in the ISRx registers.
 	 */
 	isrvec = vlapic->isrvec_stk[vlapic->isrvec_stk_top];
 	tpr = vlapic->apic_page->tpr;
 
 #if 1
 	{
 		int i, lastprio, curprio, vector, idx;
 		uint32_t *isrptr;
 
 		if (vlapic->isrvec_stk_top == 0 && isrvec != 0)
 			panic("isrvec_stk is corrupted: %d", isrvec);
 
 		/*
 		 * Make sure that the priority of the nested interrupts is
 		 * always increasing.
 		 */
 		lastprio = -1;
 		for (i = 1; i <= vlapic->isrvec_stk_top; i++) {
 			curprio = PRIO(vlapic->isrvec_stk[i]);
 			if (curprio <= lastprio) {
 				dump_isrvec_stk(vlapic);
 				panic("isrvec_stk does not satisfy invariant");
 			}
 			lastprio = curprio;
 		}
 
 		/*
 		 * Make sure that each bit set in the ISRx registers has a
 		 * corresponding entry on the isrvec stack.
 		 */
 		i = 1;
 		isrptr = &vlapic->apic_page->isr0;
 		for (vector = 0; vector < 256; vector++) {
 			idx = (vector / 32) * 4;
 			if (isrptr[idx] & (1 << (vector % 32))) {
 				if (i > vlapic->isrvec_stk_top ||
 				    vlapic->isrvec_stk[i] != vector) {
 					dump_isrvec_stk(vlapic);
 					panic("ISR and isrvec_stk out of sync");
 				}
 				i++;
 			}
 		}
 	}
 #endif
 
 	if (PRIO(tpr) >= PRIO(isrvec))
 		ppr = tpr;
 	else
 		ppr = isrvec & 0xf0;
 
 	vlapic->apic_page->ppr = ppr;
 	VLAPIC_CTR1(vlapic, "vlapic_update_ppr 0x%02x", ppr);
 }
 
 static void
 vlapic_process_eoi(struct vlapic *vlapic)
 {
 	struct LAPIC	*lapic = vlapic->apic_page;
 	uint32_t	*isrptr, *tmrptr;
 	int		i, idx, bitpos, vector;
 
 	isrptr = &lapic->isr0;
 	tmrptr = &lapic->tmr0;
 
 	/*
 	 * The x86 architecture reserves the the first 32 vectors for use
 	 * by the processor.
 	 */
 	for (i = 7; i > 0; i--) {
 		idx = i * 4;
 		bitpos = fls(isrptr[idx]);
 		if (bitpos-- != 0) {
 			if (vlapic->isrvec_stk_top <= 0) {
 				panic("invalid vlapic isrvec_stk_top %d",
 				      vlapic->isrvec_stk_top);
 			}
 			isrptr[idx] &= ~(1 << bitpos);
 			VLAPIC_CTR_ISR(vlapic, "vlapic_process_eoi");
 			vlapic->isrvec_stk_top--;
 			vlapic_update_ppr(vlapic);
 			if ((tmrptr[idx] & (1 << bitpos)) != 0) {
 				vector = i * 32 + bitpos;
 				vioapic_process_eoi(vlapic->vm, vlapic->vcpuid,
 				    vector);
 			}
 			return;
 		}
 	}
 }
 
 static __inline int
 vlapic_get_lvt_field(uint32_t lvt, uint32_t mask)
 {
 
 	return (lvt & mask);
 }
 
 static __inline int
 vlapic_periodic_timer(struct vlapic *vlapic)
 {
 	uint32_t lvt;
 	
 	lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_TIMER_LVT);
 
 	return (vlapic_get_lvt_field(lvt, APIC_LVTT_TM_PERIODIC));
 }
 
 static VMM_STAT(VLAPIC_INTR_ERROR, "error interrupts generated by vlapic");
 
 void
 vlapic_set_error(struct vlapic *vlapic, uint32_t mask)
 {
 	uint32_t lvt;
 
 	vlapic->esr_pending |= mask;
 	if (vlapic->esr_firing)
 		return;
 	vlapic->esr_firing = 1;
 
 	// The error LVT always uses the fixed delivery mode.
 	lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_ERROR_LVT);
 	if (vlapic_fire_lvt(vlapic, lvt | APIC_LVT_DM_FIXED)) {
 		vmm_stat_incr(vlapic->vm, vlapic->vcpuid, VLAPIC_INTR_ERROR, 1);
 	}
 	vlapic->esr_firing = 0;
 }
 
 static VMM_STAT(VLAPIC_INTR_TIMER, "timer interrupts generated by vlapic");
 
 static void
 vlapic_fire_timer(struct vlapic *vlapic)
 {
 	uint32_t lvt;
 
 	KASSERT(VLAPIC_TIMER_LOCKED(vlapic), ("vlapic_fire_timer not locked"));
 	
 	// The timer LVT always uses the fixed delivery mode.
 	lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_TIMER_LVT);
 	if (vlapic_fire_lvt(vlapic, lvt | APIC_LVT_DM_FIXED)) {
 		vmm_stat_incr(vlapic->vm, vlapic->vcpuid, VLAPIC_INTR_TIMER, 1);
 	}
 }
 
 static VMM_STAT(VLAPIC_INTR_CMC,
     "corrected machine check interrupts generated by vlapic");
 
 void
 vlapic_fire_cmci(struct vlapic *vlapic)
 {
 	uint32_t lvt;
 
 	lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_CMCI_LVT);
 	if (vlapic_fire_lvt(vlapic, lvt)) {
 		vmm_stat_incr(vlapic->vm, vlapic->vcpuid, VLAPIC_INTR_CMC, 1);
 	}
 }
 
 static VMM_STAT_ARRAY(LVTS_TRIGGERRED, VLAPIC_MAXLVT_INDEX + 1,
     "lvts triggered");
 
 int
 vlapic_trigger_lvt(struct vlapic *vlapic, int vector)
 {
 	uint32_t lvt;
 
 	switch (vector) {
 	case APIC_LVT_LINT0:
 		lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_LINT0_LVT);
 		break;
 	case APIC_LVT_LINT1:
 		lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_LINT1_LVT);
 		break;
 	case APIC_LVT_TIMER:
 		lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_TIMER_LVT);
 		lvt |= APIC_LVT_DM_FIXED;
 		break;
 	case APIC_LVT_ERROR:
 		lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_ERROR_LVT);
 		lvt |= APIC_LVT_DM_FIXED;
 		break;
 	case APIC_LVT_PMC:
 		lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_PERF_LVT);
 		break;
 	case APIC_LVT_THERMAL:
 		lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_THERM_LVT);
 		break;
 	case APIC_LVT_CMCI:
 		lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_CMCI_LVT);
 		break;
 	default:
 		return (EINVAL);
 	}
 	if (vlapic_fire_lvt(vlapic, lvt)) {
 		vmm_stat_array_incr(vlapic->vm, vlapic->vcpuid,
 		    LVTS_TRIGGERRED, vector, 1);
 	}
 	return (0);
 }
 
 static void
 vlapic_callout_handler(void *arg)
 {
 	struct vlapic *vlapic;
 	struct bintime bt, btnow;
 	sbintime_t rem_sbt;
 
 	vlapic = arg;
 
 	VLAPIC_TIMER_LOCK(vlapic);
 	if (callout_pending(&vlapic->callout))	/* callout was reset */
 		goto done;
 
 	if (!callout_active(&vlapic->callout))	/* callout was stopped */
 		goto done;
 
 	callout_deactivate(&vlapic->callout);
 
 	vlapic_fire_timer(vlapic);
 
 	if (vlapic_periodic_timer(vlapic)) {
 		binuptime(&btnow);
 		KASSERT(bintime_cmp(&btnow, &vlapic->timer_fire_bt, >=),
 		    ("vlapic callout at %#lx.%#lx, expected at %#lx.#%lx",
 		    btnow.sec, btnow.frac, vlapic->timer_fire_bt.sec,
 		    vlapic->timer_fire_bt.frac));
 
 		/*
 		 * Compute the delta between when the timer was supposed to
 		 * fire and the present time.
 		 */
 		bt = btnow;
 		bintime_sub(&bt, &vlapic->timer_fire_bt);
 
 		rem_sbt = bttosbt(vlapic->timer_period_bt);
 		if (bintime_cmp(&bt, &vlapic->timer_period_bt, <)) {
 			/*
 			 * Adjust the time until the next countdown downward
 			 * to account for the lost time.
 			 */
 			rem_sbt -= bttosbt(bt);
 		} else {
 			/*
 			 * If the delta is greater than the timer period then
 			 * just reset our time base instead of trying to catch
 			 * up.
 			 */
 			vlapic->timer_fire_bt = btnow;
 			VLAPIC_CTR2(vlapic, "vlapic timer lagging by %lu "
 			    "usecs, period is %lu usecs - resetting time base",
 			    bttosbt(bt) / SBT_1US,
 			    bttosbt(vlapic->timer_period_bt) / SBT_1US);
 		}
 
 		bintime_add(&vlapic->timer_fire_bt, &vlapic->timer_period_bt);
 		callout_reset_sbt(&vlapic->callout, rem_sbt, 0,
 		    vlapic_callout_handler, vlapic, 0);
 	}
 done:
 	VLAPIC_TIMER_UNLOCK(vlapic);
 }
 
 void
 vlapic_icrtmr_write_handler(struct vlapic *vlapic)
 {
 	struct LAPIC *lapic;
 	sbintime_t sbt;
 	uint32_t icr_timer;
 
 	VLAPIC_TIMER_LOCK(vlapic);
 
 	lapic = vlapic->apic_page;
 	icr_timer = lapic->icr_timer;
 
 	vlapic->timer_period_bt = vlapic->timer_freq_bt;
 	bintime_mul(&vlapic->timer_period_bt, icr_timer);
 
 	if (icr_timer != 0) {
 		binuptime(&vlapic->timer_fire_bt);
 		bintime_add(&vlapic->timer_fire_bt, &vlapic->timer_period_bt);
 
 		sbt = bttosbt(vlapic->timer_period_bt);
 		callout_reset_sbt(&vlapic->callout, sbt, 0,
 		    vlapic_callout_handler, vlapic, 0);
 	} else
 		callout_stop(&vlapic->callout);
 
 	VLAPIC_TIMER_UNLOCK(vlapic);
 }
 
 /*
  * This function populates 'dmask' with the set of vcpus that match the
  * addressing specified by the (dest, phys, lowprio) tuple.
  * 
  * 'x2apic_dest' specifies whether 'dest' is interpreted as x2APIC (32-bit)
  * or xAPIC (8-bit) destination field.
  */
 static void
 vlapic_calcdest(struct vm *vm, cpuset_t *dmask, uint32_t dest, bool phys,
     bool lowprio, bool x2apic_dest)
 {
 	struct vlapic *vlapic;
 	uint32_t dfr, ldr, ldest, cluster;
 	uint32_t mda_flat_ldest, mda_cluster_ldest, mda_ldest, mda_cluster_id;
 	cpuset_t amask;
 	int vcpuid;
 
 	if ((x2apic_dest && dest == 0xffffffff) ||
 	    (!x2apic_dest && dest == 0xff)) {
 		/*
 		 * Broadcast in both logical and physical modes.
 		 */
 		*dmask = vm_active_cpus(vm);
 		return;
 	}
 
 	if (phys) {
 		/*
 		 * Physical mode: destination is APIC ID.
 		 */
 		CPU_ZERO(dmask);
 		vcpuid = vm_apicid2vcpuid(vm, dest);
 		if (vcpuid < VM_MAXCPU)
 			CPU_SET(vcpuid, dmask);
 	} else {
 		/*
 		 * In the "Flat Model" the MDA is interpreted as an 8-bit wide
 		 * bitmask. This model is only avilable in the xAPIC mode.
 		 */
 		mda_flat_ldest = dest & 0xff;
 
 		/*
 		 * In the "Cluster Model" the MDA is used to identify a
 		 * specific cluster and a set of APICs in that cluster.
 		 */
 		if (x2apic_dest) {
 			mda_cluster_id = dest >> 16;
 			mda_cluster_ldest = dest & 0xffff;
 		} else {
 			mda_cluster_id = (dest >> 4) & 0xf;
 			mda_cluster_ldest = dest & 0xf;
 		}
 
 		/*
 		 * Logical mode: match each APIC that has a bit set
 		 * in it's LDR that matches a bit in the ldest.
 		 */
 		CPU_ZERO(dmask);
 		amask = vm_active_cpus(vm);
 		while ((vcpuid = CPU_FFS(&amask)) != 0) {
 			vcpuid--;
 			CPU_CLR(vcpuid, &amask);
 
 			vlapic = vm_lapic(vm, vcpuid);
 			dfr = vlapic->apic_page->dfr;
 			ldr = vlapic->apic_page->ldr;
 
 			if ((dfr & APIC_DFR_MODEL_MASK) ==
 			    APIC_DFR_MODEL_FLAT) {
 				ldest = ldr >> 24;
 				mda_ldest = mda_flat_ldest;
 			} else if ((dfr & APIC_DFR_MODEL_MASK) ==
 			    APIC_DFR_MODEL_CLUSTER) {
 				if (x2apic(vlapic)) {
 					cluster = ldr >> 16;
 					ldest = ldr & 0xffff;
 				} else {
 					cluster = ldr >> 28;
 					ldest = (ldr >> 24) & 0xf;
 				}
 				if (cluster != mda_cluster_id)
 					continue;
 				mda_ldest = mda_cluster_ldest;
 			} else {
 				/*
 				 * Guest has configured a bad logical
 				 * model for this vcpu - skip it.
 				 */
 				VLAPIC_CTR1(vlapic, "vlapic has bad logical "
 				    "model %x - cannot deliver interrupt", dfr);
 				continue;
 			}
 
 			if ((mda_ldest & ldest) != 0) {
 				CPU_SET(vcpuid, dmask);
 				if (lowprio)
 					break;
 			}
 		}
 	}
 }
 
 static VMM_STAT_ARRAY(IPIS_SENT, VM_MAXCPU, "ipis sent to vcpu");
 
 int
 vlapic_icrlo_write_handler(struct vlapic *vlapic, bool *retu)
 {
 	int i;
 	bool phys;
 	cpuset_t dmask;
 	uint64_t icrval;
 	uint32_t dest, vec, mode;
 	struct vlapic *vlapic2;
 	struct vm_exit *vmexit;
 	struct LAPIC *lapic;
 
 	lapic = vlapic->apic_page;
 	lapic->icr_lo &= ~APIC_DELSTAT_PEND;
 	icrval = ((uint64_t)lapic->icr_hi << 32) | lapic->icr_lo;
 
 	if (x2apic(vlapic))
 		dest = icrval >> 32;
 	else
 		dest = icrval >> (32 + 24);
 	vec = icrval & APIC_VECTOR_MASK;
 	mode = icrval & APIC_DELMODE_MASK;
 
 	if (mode == APIC_DELMODE_FIXED && vec < 16) {
 		vlapic_set_error(vlapic, APIC_ESR_SEND_ILLEGAL_VECTOR);
 		VLAPIC_CTR1(vlapic, "Ignoring invalid IPI %d", vec);
 		return (0);
 	}
 
 	VLAPIC_CTR2(vlapic, "icrlo 0x%016lx triggered ipi %d", icrval, vec);
 
 	if (mode == APIC_DELMODE_FIXED || mode == APIC_DELMODE_NMI) {
 		switch (icrval & APIC_DEST_MASK) {
 		case APIC_DEST_DESTFLD:
 			phys = ((icrval & APIC_DESTMODE_LOG) == 0);
 			vlapic_calcdest(vlapic->vm, &dmask, dest, phys, false,
 			    x2apic(vlapic));
 			break;
 		case APIC_DEST_SELF:
 			CPU_SETOF(vlapic->vcpuid, &dmask);
 			break;
 		case APIC_DEST_ALLISELF:
 			dmask = vm_active_cpus(vlapic->vm);
 			break;
 		case APIC_DEST_ALLESELF:
 			dmask = vm_active_cpus(vlapic->vm);
 			CPU_CLR(vlapic->vcpuid, &dmask);
 			break;
 		default:
 			CPU_ZERO(&dmask);	/* satisfy gcc */
 			break;
 		}
 
 		while ((i = CPU_FFS(&dmask)) != 0) {
 			i--;
 			CPU_CLR(i, &dmask);
 			if (mode == APIC_DELMODE_FIXED) {
 				lapic_intr_edge(vlapic->vm, i, vec);
 				vmm_stat_array_incr(vlapic->vm, vlapic->vcpuid,
 						    IPIS_SENT, i, 1);
 				VLAPIC_CTR2(vlapic, "vlapic sending ipi %d "
 				    "to vcpuid %d", vec, i);
 			} else {
 				vm_inject_nmi(vlapic->vm, i);
 				VLAPIC_CTR1(vlapic, "vlapic sending ipi nmi "
 				    "to vcpuid %d", i);
 			}
 		}
 
 		return (0);	/* handled completely in the kernel */
 	}
 
 	if (mode == APIC_DELMODE_INIT) {
 		if ((icrval & APIC_LEVEL_MASK) == APIC_LEVEL_DEASSERT)
 			return (0);
 
 		if (vlapic->vcpuid == 0 && dest != 0 && dest < VM_MAXCPU) {
 			vlapic2 = vm_lapic(vlapic->vm, dest);
 
 			/* move from INIT to waiting-for-SIPI state */
 			if (vlapic2->boot_state == BS_INIT) {
 				vlapic2->boot_state = BS_SIPI;
 			}
 
 			return (0);
 		}
 	}
 
 	if (mode == APIC_DELMODE_STARTUP) {
 		if (vlapic->vcpuid == 0 && dest != 0 && dest < VM_MAXCPU) {
 			vlapic2 = vm_lapic(vlapic->vm, dest);
 
 			/*
 			 * Ignore SIPIs in any state other than wait-for-SIPI
 			 */
 			if (vlapic2->boot_state != BS_SIPI)
 				return (0);
 
 			/*
 			 * XXX this assumes that the startup IPI always succeeds
 			 */
 			vlapic2->boot_state = BS_RUNNING;
 			vm_activate_cpu(vlapic2->vm, dest);
 
 			*retu = true;
 			vmexit = vm_exitinfo(vlapic->vm, vlapic->vcpuid);
 			vmexit->exitcode = VM_EXITCODE_SPINUP_AP;
 			vmexit->u.spinup_ap.vcpu = dest;
 			vmexit->u.spinup_ap.rip = vec << PAGE_SHIFT;
 
 			return (0);
 		}
 	}
 
 	/*
 	 * This will cause a return to userland.
 	 */
 	return (1);
 }
 
 static void
 vlapic_self_ipi_handler(struct vlapic *vlapic, uint64_t val)
 {
 	int vec;
 
 	vec = val & 0xff;
 	lapic_intr_edge(vlapic->vm, vlapic->vcpuid, vec);
 	vmm_stat_array_incr(vlapic->vm, vlapic->vcpuid, IPIS_SENT,
 	    vlapic->vcpuid, 1);
 	VLAPIC_CTR1(vlapic, "vlapic self-ipi %d", vec);
 }
 
 int
 vlapic_pending_intr(struct vlapic *vlapic, int *vecptr)
 {
 	struct LAPIC	*lapic = vlapic->apic_page;
 	int	  	 idx, i, bitpos, vector;
 	uint32_t	*irrptr, val;
 
 	if (vlapic->ops.pending_intr)
 		return ((*vlapic->ops.pending_intr)(vlapic, vecptr));
 
 	irrptr = &lapic->irr0;
 
 	/*
 	 * The x86 architecture reserves the the first 32 vectors for use
 	 * by the processor.
 	 */
 	for (i = 7; i > 0; i--) {
 		idx = i * 4;
 		val = atomic_load_acq_int(&irrptr[idx]);
 		bitpos = fls(val);
 		if (bitpos != 0) {
 			vector = i * 32 + (bitpos - 1);
 			if (PRIO(vector) > PRIO(lapic->ppr)) {
 				VLAPIC_CTR1(vlapic, "pending intr %d", vector);
 				if (vecptr != NULL)
 					*vecptr = vector;
 				return (1);
 			} else 
 				break;
 		}
 	}
 	return (0);
 }
 
 void
 vlapic_intr_accepted(struct vlapic *vlapic, int vector)
 {
 	struct LAPIC	*lapic = vlapic->apic_page;
 	uint32_t	*irrptr, *isrptr;
 	int		idx, stk_top;
 
 	if (vlapic->ops.intr_accepted)
 		return ((*vlapic->ops.intr_accepted)(vlapic, vector));
 
 	/*
 	 * clear the ready bit for vector being accepted in irr 
 	 * and set the vector as in service in isr.
 	 */
 	idx = (vector / 32) * 4;
 
 	irrptr = &lapic->irr0;
 	atomic_clear_int(&irrptr[idx], 1 << (vector % 32));
 	VLAPIC_CTR_IRR(vlapic, "vlapic_intr_accepted");
 
 	isrptr = &lapic->isr0;
 	isrptr[idx] |= 1 << (vector % 32);
 	VLAPIC_CTR_ISR(vlapic, "vlapic_intr_accepted");
 
 	/*
 	 * Update the PPR
 	 */
 	vlapic->isrvec_stk_top++;
 
 	stk_top = vlapic->isrvec_stk_top;
 	if (stk_top >= ISRVEC_STK_SIZE)
 		panic("isrvec_stk_top overflow %d", stk_top);
 
 	vlapic->isrvec_stk[stk_top] = vector;
 	vlapic_update_ppr(vlapic);
 }
 
 void
 vlapic_svr_write_handler(struct vlapic *vlapic)
 {
 	struct LAPIC *lapic;
 	uint32_t old, new, changed;
 
 	lapic = vlapic->apic_page;
 
 	new = lapic->svr;
 	old = vlapic->svr_last;
 	vlapic->svr_last = new;
 
 	changed = old ^ new;
 	if ((changed & APIC_SVR_ENABLE) != 0) {
 		if ((new & APIC_SVR_ENABLE) == 0) {
 			/*
 			 * The apic is now disabled so stop the apic timer
 			 * and mask all the LVT entries.
 			 */
 			VLAPIC_CTR0(vlapic, "vlapic is software-disabled");
 			VLAPIC_TIMER_LOCK(vlapic);
 			callout_stop(&vlapic->callout);
 			VLAPIC_TIMER_UNLOCK(vlapic);
 			vlapic_mask_lvts(vlapic);
 		} else {
 			/*
 			 * The apic is now enabled so restart the apic timer
 			 * if it is configured in periodic mode.
 			 */
 			VLAPIC_CTR0(vlapic, "vlapic is software-enabled");
 			if (vlapic_periodic_timer(vlapic))
 				vlapic_icrtmr_write_handler(vlapic);
 		}
 	}
 }
 
 int
-vlapic_read(struct vlapic *vlapic, uint64_t offset, uint64_t *data, bool *retu)
+vlapic_read(struct vlapic *vlapic, int mmio_access, uint64_t offset,
+    uint64_t *data, bool *retu)
 {
 	struct LAPIC	*lapic = vlapic->apic_page;
 	uint32_t	*reg;
 	int		 i;
 
+	/* Ignore MMIO accesses in x2APIC mode */
+	if (x2apic(vlapic) && mmio_access) {
+		VLAPIC_CTR1(vlapic, "MMIO read from offset %#lx in x2APIC mode",
+		    offset);
+		*data = 0;
+		goto done;
+	}
+
+	if (!x2apic(vlapic) && !mmio_access) {
+		/*
+		 * XXX Generate GP fault for MSR accesses in xAPIC mode
+		 */
+		VLAPIC_CTR1(vlapic, "x2APIC MSR read from offset %#lx in "
+		    "xAPIC mode", offset);
+		*data = 0;
+		goto done;
+	}
+
 	if (offset > sizeof(*lapic)) {
 		*data = 0;
 		goto done;
 	}
 	
 	offset &= ~3;
 	switch(offset)
 	{
 		case APIC_OFFSET_ID:
 			*data = lapic->id;
 			break;
 		case APIC_OFFSET_VER:
 			*data = lapic->version;
 			break;
 		case APIC_OFFSET_TPR:
 			*data = lapic->tpr;
 			break;
 		case APIC_OFFSET_APR:
 			*data = lapic->apr;
 			break;
 		case APIC_OFFSET_PPR:
 			*data = lapic->ppr;
 			break;
 		case APIC_OFFSET_EOI:
 			*data = lapic->eoi;
 			break;
 		case APIC_OFFSET_LDR:
 			*data = lapic->ldr;
 			break;
 		case APIC_OFFSET_DFR:
 			*data = lapic->dfr;
 			break;
 		case APIC_OFFSET_SVR:
 			*data = lapic->svr;
 			break;
 		case APIC_OFFSET_ISR0 ... APIC_OFFSET_ISR7:
 			i = (offset - APIC_OFFSET_ISR0) >> 2;
 			reg = &lapic->isr0;
 			*data = *(reg + i);
 			break;
 		case APIC_OFFSET_TMR0 ... APIC_OFFSET_TMR7:
 			i = (offset - APIC_OFFSET_TMR0) >> 2;
 			reg = &lapic->tmr0;
 			*data = *(reg + i);
 			break;
 		case APIC_OFFSET_IRR0 ... APIC_OFFSET_IRR7:
 			i = (offset - APIC_OFFSET_IRR0) >> 2;
 			reg = &lapic->irr0;
 			*data = atomic_load_acq_int(reg + i);
 			break;
 		case APIC_OFFSET_ESR:
 			*data = lapic->esr;
 			break;
 		case APIC_OFFSET_ICR_LOW: 
 			*data = lapic->icr_lo;
 			if (x2apic(vlapic))
 				*data |= (uint64_t)lapic->icr_hi << 32;
 			break;
 		case APIC_OFFSET_ICR_HI: 
 			*data = lapic->icr_hi;
 			break;
 		case APIC_OFFSET_CMCI_LVT:
 		case APIC_OFFSET_TIMER_LVT ... APIC_OFFSET_ERROR_LVT:
 			*data = vlapic_get_lvt(vlapic, offset);	
 #ifdef INVARIANTS
 			reg = vlapic_get_lvtptr(vlapic, offset);
 			KASSERT(*data == *reg, ("inconsistent lvt value at "
 			    "offset %#lx: %#lx/%#x", offset, *data, *reg));
 #endif
 			break;
 		case APIC_OFFSET_TIMER_ICR:
 			*data = lapic->icr_timer;
 			break;
 		case APIC_OFFSET_TIMER_CCR:
 			*data = vlapic_get_ccr(vlapic);
 			break;
 		case APIC_OFFSET_TIMER_DCR:
 			*data = lapic->dcr_timer;
 			break;
 		case APIC_OFFSET_SELF_IPI:
 			/*
 			 * XXX generate a GP fault if vlapic is in x2apic mode
 			 */
 			*data = 0;
 			break;
 		case APIC_OFFSET_RRR:
 		default:
 			*data = 0;
 			break;
 	}
 done:
 	VLAPIC_CTR2(vlapic, "vlapic read offset %#x, data %#lx", offset, *data);
 	return 0;
 }
 
 int
-vlapic_write(struct vlapic *vlapic, uint64_t offset, uint64_t data, bool *retu)
+vlapic_write(struct vlapic *vlapic, int mmio_access, uint64_t offset,
+    uint64_t data, bool *retu)
 {
 	struct LAPIC	*lapic = vlapic->apic_page;
 	uint32_t	*regptr;
 	int		retval;
 
 	KASSERT((offset & 0xf) == 0 && offset < PAGE_SIZE,
 	    ("vlapic_write: invalid offset %#lx", offset));
 
-	VLAPIC_CTR2(vlapic, "vlapic write offset %#x, data %#lx", offset, data);
+	VLAPIC_CTR2(vlapic, "vlapic write offset %#lx, data %#lx",
+	    offset, data);
 
-	if (offset > sizeof(*lapic)) {
-		return 0;
+	if (offset > sizeof(*lapic))
+		return (0);
+
+	/* Ignore MMIO accesses in x2APIC mode */
+	if (x2apic(vlapic) && mmio_access) {
+		VLAPIC_CTR2(vlapic, "MMIO write of %#lx to offset %#lx "
+		    "in x2APIC mode", data, offset);
+		return (0);
+	}
+
+	/*
+	 * XXX Generate GP fault for MSR accesses in xAPIC mode
+	 */
+	if (!x2apic(vlapic) && !mmio_access) {
+		VLAPIC_CTR2(vlapic, "x2APIC MSR write of %#lx to offset %#lx "
+		    "in xAPIC mode", data, offset);
+		return (0);
 	}
 
 	retval = 0;
 	switch(offset)
 	{
 		case APIC_OFFSET_ID:
 			lapic->id = data;
 			vlapic_id_write_handler(vlapic);
 			break;
 		case APIC_OFFSET_TPR:
 			lapic->tpr = data & 0xff;
 			vlapic_update_ppr(vlapic);
 			break;
 		case APIC_OFFSET_EOI:
 			vlapic_process_eoi(vlapic);
 			break;
 		case APIC_OFFSET_LDR:
 			lapic->ldr = data;
 			vlapic_ldr_write_handler(vlapic);
 			break;
 		case APIC_OFFSET_DFR:
 			lapic->dfr = data;
 			vlapic_dfr_write_handler(vlapic);
 			break;
 		case APIC_OFFSET_SVR:
 			lapic->svr = data;
 			vlapic_svr_write_handler(vlapic);
 			break;
 		case APIC_OFFSET_ICR_LOW: 
 			lapic->icr_lo = data;
 			if (x2apic(vlapic))
 				lapic->icr_hi = data >> 32;
 			retval = vlapic_icrlo_write_handler(vlapic, retu);
 			break;
 		case APIC_OFFSET_ICR_HI:
 			lapic->icr_hi = data;
 			break;
 		case APIC_OFFSET_CMCI_LVT:
 		case APIC_OFFSET_TIMER_LVT ... APIC_OFFSET_ERROR_LVT:
 			regptr = vlapic_get_lvtptr(vlapic, offset);
 			*regptr = data;
 			vlapic_lvt_write_handler(vlapic, offset);
 			break;
 		case APIC_OFFSET_TIMER_ICR:
 			lapic->icr_timer = data;
 			vlapic_icrtmr_write_handler(vlapic);
 			break;
 
 		case APIC_OFFSET_TIMER_DCR:
 			lapic->dcr_timer = data;
 			vlapic_dcr_write_handler(vlapic);
 			break;
 
 		case APIC_OFFSET_ESR:
 			vlapic_esr_write_handler(vlapic);
 			break;
 
 		case APIC_OFFSET_SELF_IPI:
 			if (x2apic(vlapic))
 				vlapic_self_ipi_handler(vlapic, data);
 			break;
 
 		case APIC_OFFSET_VER:
 		case APIC_OFFSET_APR:
 		case APIC_OFFSET_PPR:
 		case APIC_OFFSET_RRR:
 		case APIC_OFFSET_ISR0 ... APIC_OFFSET_ISR7:
 		case APIC_OFFSET_TMR0 ... APIC_OFFSET_TMR7:
 		case APIC_OFFSET_IRR0 ... APIC_OFFSET_IRR7:
 		case APIC_OFFSET_TIMER_CCR:
 		default:
 			// Read only.
 			break;
 	}
 
 	return (retval);
 }
 
 static void
 vlapic_reset(struct vlapic *vlapic)
 {
 	struct LAPIC *lapic;
 	
 	lapic = vlapic->apic_page;
 	bzero(lapic, sizeof(struct LAPIC));
 
 	lapic->id = vlapic_get_id(vlapic);
 	lapic->version = VLAPIC_VERSION;
 	lapic->version |= (VLAPIC_MAXLVT_INDEX << MAXLVTSHIFT);
 	lapic->dfr = 0xffffffff;
 	lapic->svr = APIC_SVR_VECTOR;
 	vlapic_mask_lvts(vlapic);
 	vlapic_reset_tmr(vlapic);
 
 	lapic->dcr_timer = 0;
 	vlapic_dcr_write_handler(vlapic);
 
 	if (vlapic->vcpuid == 0)
 		vlapic->boot_state = BS_RUNNING;	/* BSP */
 	else
 		vlapic->boot_state = BS_INIT;		/* AP */
 
 	vlapic->svr_last = lapic->svr;
 }
 
 void
 vlapic_init(struct vlapic *vlapic)
 {
 	KASSERT(vlapic->vm != NULL, ("vlapic_init: vm is not initialized"));
 	KASSERT(vlapic->vcpuid >= 0 && vlapic->vcpuid < VM_MAXCPU,
 	    ("vlapic_init: vcpuid is not initialized"));
 	KASSERT(vlapic->apic_page != NULL, ("vlapic_init: apic_page is not "
 	    "initialized"));
 
 	/*
 	 * If the vlapic is configured in x2apic mode then it will be
 	 * accessed in the critical section via the MSR emulation code.
 	 *
 	 * Therefore the timer mutex must be a spinlock because blockable
 	 * mutexes cannot be acquired in a critical section.
 	 */
 	mtx_init(&vlapic->timer_mtx, "vlapic timer mtx", NULL, MTX_SPIN);
 	callout_init(&vlapic->callout, 1);
 
 	vlapic->msr_apicbase = DEFAULT_APIC_BASE | APICBASE_ENABLED;
 
 	if (vlapic->vcpuid == 0)
 		vlapic->msr_apicbase |= APICBASE_BSP;
 
 	vlapic_reset(vlapic);
 }
 
 void
 vlapic_cleanup(struct vlapic *vlapic)
 {
 
 	callout_drain(&vlapic->callout);
 }
 
 uint64_t
 vlapic_get_apicbase(struct vlapic *vlapic)
 {
 
 	return (vlapic->msr_apicbase);
 }
 
-void
+int
 vlapic_set_apicbase(struct vlapic *vlapic, uint64_t new)
 {
-	struct LAPIC *lapic;
-	enum x2apic_state state;
-	uint64_t old;
-	int err;
-
-	err = vm_get_x2apic_state(vlapic->vm, vlapic->vcpuid, &state);
-	if (err)
-		panic("vlapic_set_apicbase: err %d fetching x2apic state", err);
 
-	if (state == X2APIC_DISABLED)
-		new &= ~APICBASE_X2APIC;
-
-	old = vlapic->msr_apicbase;
-	vlapic->msr_apicbase = new;
-
-	/*
-	 * If the vlapic is switching between xAPIC and x2APIC modes then
-	 * reset the mode-dependent registers.
-	 */
-	if ((old ^ new) & APICBASE_X2APIC) {
-		lapic = vlapic->apic_page;
-		lapic->id = vlapic_get_id(vlapic);
-		if (x2apic(vlapic)) {
-			lapic->ldr = x2apic_ldr(vlapic);
-			lapic->dfr = 0;
-		} else {
-			lapic->ldr = 0;
-			lapic->dfr = 0xffffffff;
-		}
+	if (vlapic->msr_apicbase != new) {
+		VLAPIC_CTR2(vlapic, "Changing APIC_BASE MSR from %#lx to %#lx "
+		    "not supported", vlapic->msr_apicbase, new);
+		return (-1);
 	}
+
+	return (0);
 }
 
 void
 vlapic_set_x2apic_state(struct vm *vm, int vcpuid, enum x2apic_state state)
 {
 	struct vlapic *vlapic;
+	struct LAPIC *lapic;
 
 	vlapic = vm_lapic(vm, vcpuid);
 
 	if (state == X2APIC_DISABLED)
 		vlapic->msr_apicbase &= ~APICBASE_X2APIC;
+	else
+		vlapic->msr_apicbase |= APICBASE_X2APIC;
+
+	/*
+	 * Reset the local APIC registers whose values are mode-dependent.
+	 *
+	 * XXX this works because the APIC mode can be changed only at vcpu
+	 * initialization time.
+	 */
+	lapic = vlapic->apic_page;
+	lapic->id = vlapic_get_id(vlapic);
+	if (x2apic(vlapic)) {
+		lapic->ldr = x2apic_ldr(vlapic);
+		lapic->dfr = 0;
+	} else {
+		lapic->ldr = 0;
+		lapic->dfr = 0xffffffff;
+	}
 }
 
 void
 vlapic_deliver_intr(struct vm *vm, bool level, uint32_t dest, bool phys,
     int delmode, int vec)
 {
 	bool lowprio;
 	int vcpuid;
 	cpuset_t dmask;
 
 	if (delmode != APIC_DELMODE_FIXED && delmode != APIC_DELMODE_LOWPRIO) {
 		VM_CTR1(vm, "vlapic intr invalid delmode %#x", delmode);
 		return;
 	}
 	lowprio = (delmode == APIC_DELMODE_LOWPRIO);
 
 	/*
 	 * We don't provide any virtual interrupt redirection hardware so
 	 * all interrupts originating from the ioapic or MSI specify the
 	 * 'dest' in the legacy xAPIC format.
 	 */
 	vlapic_calcdest(vm, &dmask, dest, phys, lowprio, false);
 
 	while ((vcpuid = CPU_FFS(&dmask)) != 0) {
 		vcpuid--;
 		CPU_CLR(vcpuid, &dmask);
 		lapic_set_intr(vm, vcpuid, vec, level);
 	}
 }
 
 void
 vlapic_post_intr(struct vlapic *vlapic, int hostcpu, int ipinum)
 {
 	/*
 	 * Post an interrupt to the vcpu currently running on 'hostcpu'.
 	 *
 	 * This is done by leveraging features like Posted Interrupts (Intel)
 	 * Doorbell MSR (AMD AVIC) that avoid a VM exit.
 	 *
 	 * If neither of these features are available then fallback to
 	 * sending an IPI to 'hostcpu'.
 	 */
 	if (vlapic->ops.post_intr)
 		(*vlapic->ops.post_intr)(vlapic, hostcpu);
 	else
 		ipi_cpu(hostcpu, ipinum);
 }
 
 bool
 vlapic_enabled(struct vlapic *vlapic)
 {
 	struct LAPIC *lapic = vlapic->apic_page;
 
 	if ((vlapic->msr_apicbase & APICBASE_ENABLED) != 0 &&
 	    (lapic->svr & APIC_SVR_ENABLE) != 0)
 		return (true);
 	else
 		return (false);
 }
 
 static void
 vlapic_set_tmr(struct vlapic *vlapic, int vector, bool level)
 {
 	struct LAPIC *lapic;
 	uint32_t *tmrptr, mask;
 	int idx;
 
 	lapic = vlapic->apic_page;
 	tmrptr = &lapic->tmr0;
 	idx = (vector / 32) * 4;
 	mask = 1 << (vector % 32);
 	if (level)
 		tmrptr[idx] |= mask;
 	else
 		tmrptr[idx] &= ~mask;
 
 	if (vlapic->ops.set_tmr != NULL)
 		(*vlapic->ops.set_tmr)(vlapic, vector, level);
 }
 
 void
 vlapic_reset_tmr(struct vlapic *vlapic)
 {
 	int vector;
 
 	VLAPIC_CTR0(vlapic, "vlapic resetting all vectors to edge-triggered");
 
 	for (vector = 0; vector <= 255; vector++)
 		vlapic_set_tmr(vlapic, vector, false);
 }
 
 void
 vlapic_set_tmr_level(struct vlapic *vlapic, uint32_t dest, bool phys,
     int delmode, int vector)
 {
 	cpuset_t dmask;
 	bool lowprio;
 
 	KASSERT(vector >= 0 && vector <= 255, ("invalid vector %d", vector));
 
 	/*
 	 * A level trigger is valid only for fixed and lowprio delivery modes.
 	 */
 	if (delmode != APIC_DELMODE_FIXED && delmode != APIC_DELMODE_LOWPRIO) {
 		VLAPIC_CTR1(vlapic, "Ignoring level trigger-mode for "
 		    "delivery-mode %d", delmode);
 		return;
 	}
 
 	lowprio = (delmode == APIC_DELMODE_LOWPRIO);
 	vlapic_calcdest(vlapic->vm, &dmask, dest, phys, lowprio, false);
 
 	if (!CPU_ISSET(vlapic->vcpuid, &dmask))
 		return;
 
 	VLAPIC_CTR1(vlapic, "vector %d set to level-triggered", vector);
 	vlapic_set_tmr(vlapic, vector, true);
 }
diff --git a/sys/amd64/vmm/io/vlapic.h b/sys/amd64/vmm/io/vlapic.h
index d2fc6d9a6188..b215e57a9711 100644
--- a/sys/amd64/vmm/io/vlapic.h
+++ b/sys/amd64/vmm/io/vlapic.h
@@ -1,105 +1,105 @@
 /*-
  * Copyright (c) 2011 NetApp, Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef _VLAPIC_H_
 #define	_VLAPIC_H_
 
 struct vm;
 enum x2apic_state;
 
-int vlapic_write(struct vlapic *vlapic, uint64_t offset, uint64_t data,
-    bool *retu);
-int vlapic_read(struct vlapic *vlapic, uint64_t offset, uint64_t *data,
-    bool *retu);
+int vlapic_write(struct vlapic *vlapic, int mmio_access, uint64_t offset,
+    uint64_t data, bool *retu);
+int vlapic_read(struct vlapic *vlapic, int mmio_access, uint64_t offset,
+    uint64_t *data, bool *retu);
 
 /*
  * Returns 0 if there is no eligible vector that can be delivered to the
  * guest at this time and non-zero otherwise.
  *
  * If an eligible vector number is found and 'vecptr' is not NULL then it will
  * be stored in the location pointed to by 'vecptr'.
  *
  * Note that the vector does not automatically transition to the ISR as a
  * result of calling this function.
  */
 int vlapic_pending_intr(struct vlapic *vlapic, int *vecptr);
 
 /*
  * Transition 'vector' from IRR to ISR. This function is called with the
  * vector returned by 'vlapic_pending_intr()' when the guest is able to
  * accept this interrupt (i.e. RFLAGS.IF = 1 and no conditions exist that
  * block interrupt delivery).
  */
 void vlapic_intr_accepted(struct vlapic *vlapic, int vector);
 
 /*
  * Returns 1 if the vcpu needs to be notified of the interrupt and 0 otherwise.
  */
 int vlapic_set_intr_ready(struct vlapic *vlapic, int vector, bool level);
 
 /*
  * Post an interrupt to the vcpu running on 'hostcpu'. This will use a
  * hardware assist if available (e.g. Posted Interrupt) or fall back to
  * sending an 'ipinum' to interrupt the 'hostcpu'.
  */
 void vlapic_post_intr(struct vlapic *vlapic, int hostcpu, int ipinum);
 
 void vlapic_set_error(struct vlapic *vlapic, uint32_t mask);
 void vlapic_fire_cmci(struct vlapic *vlapic);
 int vlapic_trigger_lvt(struct vlapic *vlapic, int vector);
 
 uint64_t vlapic_get_apicbase(struct vlapic *vlapic);
-void vlapic_set_apicbase(struct vlapic *vlapic, uint64_t val);
+int vlapic_set_apicbase(struct vlapic *vlapic, uint64_t val);
 void vlapic_set_x2apic_state(struct vm *vm, int vcpuid, enum x2apic_state s);
 bool vlapic_enabled(struct vlapic *vlapic);
 
 void vlapic_deliver_intr(struct vm *vm, bool level, uint32_t dest, bool phys,
     int delmode, int vec);
 
 /* Reset the trigger-mode bits for all vectors to be edge-triggered */
 void vlapic_reset_tmr(struct vlapic *vlapic);
 
 /*
  * Set the trigger-mode bit associated with 'vector' to level-triggered if
  * the (dest,phys,delmode) tuple resolves to an interrupt being delivered to
  * this 'vlapic'.
  */
 void vlapic_set_tmr_level(struct vlapic *vlapic, uint32_t dest, bool phys,
     int delmode, int vector);
 
 /* APIC write handlers */
 void vlapic_id_write_handler(struct vlapic *vlapic);
 void vlapic_ldr_write_handler(struct vlapic *vlapic);
 void vlapic_dfr_write_handler(struct vlapic *vlapic);
 void vlapic_svr_write_handler(struct vlapic *vlapic);
 void vlapic_esr_write_handler(struct vlapic *vlapic);
 int vlapic_icrlo_write_handler(struct vlapic *vlapic, bool *retu);
 void vlapic_icrtmr_write_handler(struct vlapic *vlapic);
 void vlapic_dcr_write_handler(struct vlapic *vlapic);
 void vlapic_lvt_write_handler(struct vlapic *vlapic, uint32_t offset);
 #endif	/* _VLAPIC_H_ */
diff --git a/sys/amd64/vmm/vmm.c b/sys/amd64/vmm/vmm.c
index 4ae691563fec..80ff5be96691 100644
--- a/sys/amd64/vmm/vmm.c
+++ b/sys/amd64/vmm/vmm.c
@@ -1,1547 +1,1547 @@
 /*-
  * Copyright (c) 2011 NetApp, Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/module.h>
 #include <sys/sysctl.h>
 #include <sys/malloc.h>
 #include <sys/pcpu.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/rwlock.h>
 #include <sys/sched.h>
 #include <sys/smp.h>
 #include <sys/systm.h>
 
 #include <vm/vm.h>
 #include <vm/vm_object.h>
 #include <vm/vm_page.h>
 #include <vm/pmap.h>
 #include <vm/vm_map.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_param.h>
 
 #include <machine/cpu.h>
 #include <machine/vm.h>
 #include <machine/pcb.h>
 #include <machine/smp.h>
 #include <x86/psl.h>
 #include <x86/apicreg.h>
 #include <machine/vmparam.h>
 
 #include <machine/vmm.h>
 #include <machine/vmm_dev.h>
 
 #include "vmm_ktr.h"
 #include "vmm_host.h"
 #include "vmm_mem.h"
 #include "vmm_util.h"
 #include "vhpet.h"
 #include "vioapic.h"
 #include "vlapic.h"
 #include "vmm_msr.h"
 #include "vmm_ipi.h"
 #include "vmm_stat.h"
 #include "vmm_lapic.h"
 
 #include "io/ppt.h"
 #include "io/iommu.h"
 
 struct vlapic;
 
 struct vcpu {
 	int		flags;
 	enum vcpu_state	state;
 	struct mtx	mtx;
 	int		hostcpu;	/* host cpuid this vcpu last ran on */
 	uint64_t	guest_msrs[VMM_MSR_NUM];
 	struct vlapic	*vlapic;
 	int		 vcpuid;
 	struct savefpu	*guestfpu;	/* guest fpu state */
 	uint64_t	guest_xcr0;
 	void		*stats;
 	struct vm_exit	exitinfo;
 	enum x2apic_state x2apic_state;
 	int		nmi_pending;
 };
 
 #define	vcpu_lock_init(v)	mtx_init(&((v)->mtx), "vcpu lock", 0, MTX_SPIN)
 #define	vcpu_lock(v)		mtx_lock_spin(&((v)->mtx))
 #define	vcpu_unlock(v)		mtx_unlock_spin(&((v)->mtx))
 #define	vcpu_assert_locked(v)	mtx_assert(&((v)->mtx), MA_OWNED)
 
 struct mem_seg {
 	vm_paddr_t	gpa;
 	size_t		len;
 	boolean_t	wired;
 	vm_object_t	object;
 };
 #define	VM_MAX_MEMORY_SEGMENTS	2
 
 struct vm {
 	void		*cookie;	/* processor-specific data */
 	void		*iommu;		/* iommu-specific data */
 	struct vhpet	*vhpet;		/* virtual HPET */
 	struct vioapic	*vioapic;	/* virtual ioapic */
 	struct vmspace	*vmspace;	/* guest's address space */
 	struct vcpu	vcpu[VM_MAXCPU];
 	int		num_mem_segs;
 	struct mem_seg	mem_segs[VM_MAX_MEMORY_SEGMENTS];
 	char		name[VM_MAX_NAMELEN];
 
 	/*
 	 * Set of active vcpus.
 	 * An active vcpu is one that has been started implicitly (BSP) or
 	 * explicitly (AP) by sending it a startup ipi.
 	 */
 	cpuset_t	active_cpus;
 
 	struct mtx	rendezvous_mtx;
 	cpuset_t	rendezvous_req_cpus;
 	cpuset_t	rendezvous_done_cpus;
 	void		*rendezvous_arg;
 	vm_rendezvous_func_t rendezvous_func;
 };
 
 static int vmm_initialized;
 
 static struct vmm_ops *ops;
 #define	VMM_INIT(num)	(ops != NULL ? (*ops->init)(num) : 0)
 #define	VMM_CLEANUP()	(ops != NULL ? (*ops->cleanup)() : 0)
 #define	VMM_RESUME()	(ops != NULL ? (*ops->resume)() : 0)
 
 #define	VMINIT(vm, pmap) (ops != NULL ? (*ops->vminit)(vm, pmap): NULL)
 #define	VMRUN(vmi, vcpu, rip, pmap, rptr) \
 	(ops != NULL ? (*ops->vmrun)(vmi, vcpu, rip, pmap, rptr) : ENXIO)
 #define	VMCLEANUP(vmi)	(ops != NULL ? (*ops->vmcleanup)(vmi) : NULL)
 #define	VMSPACE_ALLOC(min, max) \
 	(ops != NULL ? (*ops->vmspace_alloc)(min, max) : NULL)
 #define	VMSPACE_FREE(vmspace) \
 	(ops != NULL ? (*ops->vmspace_free)(vmspace) : ENXIO)
 #define	VMGETREG(vmi, vcpu, num, retval)		\
 	(ops != NULL ? (*ops->vmgetreg)(vmi, vcpu, num, retval) : ENXIO)
 #define	VMSETREG(vmi, vcpu, num, val)		\
 	(ops != NULL ? (*ops->vmsetreg)(vmi, vcpu, num, val) : ENXIO)
 #define	VMGETDESC(vmi, vcpu, num, desc)		\
 	(ops != NULL ? (*ops->vmgetdesc)(vmi, vcpu, num, desc) : ENXIO)
 #define	VMSETDESC(vmi, vcpu, num, desc)		\
 	(ops != NULL ? (*ops->vmsetdesc)(vmi, vcpu, num, desc) : ENXIO)
 #define	VMINJECT(vmi, vcpu, type, vec, ec, ecv)	\
 	(ops != NULL ? (*ops->vminject)(vmi, vcpu, type, vec, ec, ecv) : ENXIO)
 #define	VMGETCAP(vmi, vcpu, num, retval)	\
 	(ops != NULL ? (*ops->vmgetcap)(vmi, vcpu, num, retval) : ENXIO)
 #define	VMSETCAP(vmi, vcpu, num, val)		\
 	(ops != NULL ? (*ops->vmsetcap)(vmi, vcpu, num, val) : ENXIO)
 #define	VLAPIC_INIT(vmi, vcpu)			\
 	(ops != NULL ? (*ops->vlapic_init)(vmi, vcpu) : NULL)
 #define	VLAPIC_CLEANUP(vmi, vlapic)		\
 	(ops != NULL ? (*ops->vlapic_cleanup)(vmi, vlapic) : NULL)
 
 #define	fpu_start_emulating()	load_cr0(rcr0() | CR0_TS)
 #define	fpu_stop_emulating()	clts()
 
 static MALLOC_DEFINE(M_VM, "vm", "vm");
 CTASSERT(VMM_MSR_NUM <= 64);	/* msr_mask can keep track of up to 64 msrs */
 
 /* statistics */
 static VMM_STAT(VCPU_TOTAL_RUNTIME, "vcpu total runtime");
 
 SYSCTL_NODE(_hw, OID_AUTO, vmm, CTLFLAG_RW, NULL, NULL);
 
 static int vmm_ipinum;
 SYSCTL_INT(_hw_vmm, OID_AUTO, ipinum, CTLFLAG_RD, &vmm_ipinum, 0,
     "IPI vector used for vcpu notifications");
 
 static void vm_deactivate_cpu(struct vm *vm, int vcpuid);
 
 static void
 vcpu_cleanup(struct vm *vm, int i)
 {
 	struct vcpu *vcpu = &vm->vcpu[i];
 
 	VLAPIC_CLEANUP(vm->cookie, vcpu->vlapic);
 	vmm_stat_free(vcpu->stats);	
 	fpu_save_area_free(vcpu->guestfpu);
 }
 
 static void
 vcpu_init(struct vm *vm, uint32_t vcpu_id)
 {
 	struct vcpu *vcpu;
 	
 	vcpu = &vm->vcpu[vcpu_id];
 
 	vcpu_lock_init(vcpu);
 	vcpu->hostcpu = NOCPU;
 	vcpu->vcpuid = vcpu_id;
 	vcpu->vlapic = VLAPIC_INIT(vm->cookie, vcpu_id);
-	vm_set_x2apic_state(vm, vcpu_id, X2APIC_ENABLED);
+	vm_set_x2apic_state(vm, vcpu_id, X2APIC_DISABLED);
 	vcpu->guest_xcr0 = XFEATURE_ENABLED_X87;
 	vcpu->guestfpu = fpu_save_area_alloc();
 	fpu_save_area_reset(vcpu->guestfpu);
 	vcpu->stats = vmm_stat_alloc();
 }
 
 struct vm_exit *
 vm_exitinfo(struct vm *vm, int cpuid)
 {
 	struct vcpu *vcpu;
 
 	if (cpuid < 0 || cpuid >= VM_MAXCPU)
 		panic("vm_exitinfo: invalid cpuid %d", cpuid);
 
 	vcpu = &vm->vcpu[cpuid];
 
 	return (&vcpu->exitinfo);
 }
 
 static void
 vmm_resume(void)
 {
 	VMM_RESUME();
 }
 
 static int
 vmm_init(void)
 {
 	int error;
 
 	vmm_host_state_init();
 
 	vmm_ipinum = vmm_ipi_alloc();
 	if (vmm_ipinum == 0)
 		vmm_ipinum = IPI_AST;
 
 	error = vmm_mem_init();
 	if (error)
 		return (error);
 	
 	if (vmm_is_intel())
 		ops = &vmm_ops_intel;
 	else if (vmm_is_amd())
 		ops = &vmm_ops_amd;
 	else
 		return (ENXIO);
 
 	vmm_msr_init();
 	vmm_resume_p = vmm_resume;
 
 	return (VMM_INIT(vmm_ipinum));
 }
 
 static int
 vmm_handler(module_t mod, int what, void *arg)
 {
 	int error;
 
 	switch (what) {
 	case MOD_LOAD:
 		vmmdev_init();
 		if (ppt_avail_devices() > 0)
 			iommu_init();
 		error = vmm_init();
 		if (error == 0)
 			vmm_initialized = 1;
 		break;
 	case MOD_UNLOAD:
 		error = vmmdev_cleanup();
 		if (error == 0) {
 			vmm_resume_p = NULL;
 			iommu_cleanup();
 			if (vmm_ipinum != IPI_AST)
 				vmm_ipi_free(vmm_ipinum);
 			error = VMM_CLEANUP();
 			/*
 			 * Something bad happened - prevent new
 			 * VMs from being created
 			 */
 			if (error)
 				vmm_initialized = 0;
 		}
 		break;
 	default:
 		error = 0;
 		break;
 	}
 	return (error);
 }
 
 static moduledata_t vmm_kmod = {
 	"vmm",
 	vmm_handler,
 	NULL
 };
 
 /*
  * vmm initialization has the following dependencies:
  *
  * - iommu initialization must happen after the pci passthru driver has had
  *   a chance to attach to any passthru devices (after SI_SUB_CONFIGURE).
  *
  * - VT-x initialization requires smp_rendezvous() and therefore must happen
  *   after SMP is fully functional (after SI_SUB_SMP).
  */
 DECLARE_MODULE(vmm, vmm_kmod, SI_SUB_SMP + 1, SI_ORDER_ANY);
 MODULE_VERSION(vmm, 1);
 
 int
 vm_create(const char *name, struct vm **retvm)
 {
 	int i;
 	struct vm *vm;
 	struct vmspace *vmspace;
 
 	const int BSP = 0;
 
 	/*
 	 * If vmm.ko could not be successfully initialized then don't attempt
 	 * to create the virtual machine.
 	 */
 	if (!vmm_initialized)
 		return (ENXIO);
 
 	if (name == NULL || strlen(name) >= VM_MAX_NAMELEN)
 		return (EINVAL);
 
 	vmspace = VMSPACE_ALLOC(VM_MIN_ADDRESS, VM_MAXUSER_ADDRESS);
 	if (vmspace == NULL)
 		return (ENOMEM);
 
 	vm = malloc(sizeof(struct vm), M_VM, M_WAITOK | M_ZERO);
 	strcpy(vm->name, name);
 	vm->vmspace = vmspace;
 	mtx_init(&vm->rendezvous_mtx, "vm rendezvous lock", 0, MTX_DEF);
 	vm->cookie = VMINIT(vm, vmspace_pmap(vmspace));
 	vm->vioapic = vioapic_init(vm);
 	vm->vhpet = vhpet_init(vm);
 
 	for (i = 0; i < VM_MAXCPU; i++) {
 		vcpu_init(vm, i);
 		guest_msrs_init(vm, i);
 	}
 
 	vm_activate_cpu(vm, BSP);
 
 	*retvm = vm;
 	return (0);
 }
 
 static void
 vm_free_mem_seg(struct vm *vm, struct mem_seg *seg)
 {
 
 	if (seg->object != NULL)
 		vmm_mem_free(vm->vmspace, seg->gpa, seg->len);
 
 	bzero(seg, sizeof(*seg));
 }
 
 void
 vm_destroy(struct vm *vm)
 {
 	int i;
 
 	ppt_unassign_all(vm);
 
 	if (vm->iommu != NULL)
 		iommu_destroy_domain(vm->iommu);
 
 	vhpet_cleanup(vm->vhpet);
 	vioapic_cleanup(vm->vioapic);
 
 	for (i = 0; i < vm->num_mem_segs; i++)
 		vm_free_mem_seg(vm, &vm->mem_segs[i]);
 
 	vm->num_mem_segs = 0;
 
 	for (i = 0; i < VM_MAXCPU; i++)
 		vcpu_cleanup(vm, i);
 
 	VMSPACE_FREE(vm->vmspace);
 
 	VMCLEANUP(vm->cookie);
 
 	free(vm, M_VM);
 }
 
 const char *
 vm_name(struct vm *vm)
 {
 	return (vm->name);
 }
 
 int
 vm_map_mmio(struct vm *vm, vm_paddr_t gpa, size_t len, vm_paddr_t hpa)
 {
 	vm_object_t obj;
 
 	if ((obj = vmm_mmio_alloc(vm->vmspace, gpa, len, hpa)) == NULL)
 		return (ENOMEM);
 	else
 		return (0);
 }
 
 int
 vm_unmap_mmio(struct vm *vm, vm_paddr_t gpa, size_t len)
 {
 
 	vmm_mmio_free(vm->vmspace, gpa, len);
 	return (0);
 }
 
 boolean_t
 vm_mem_allocated(struct vm *vm, vm_paddr_t gpa)
 {
 	int i;
 	vm_paddr_t gpabase, gpalimit;
 
 	for (i = 0; i < vm->num_mem_segs; i++) {
 		gpabase = vm->mem_segs[i].gpa;
 		gpalimit = gpabase + vm->mem_segs[i].len;
 		if (gpa >= gpabase && gpa < gpalimit)
 			return (TRUE);		/* 'gpa' is regular memory */
 	}
 
 	if (ppt_is_mmio(vm, gpa))
 		return (TRUE);			/* 'gpa' is pci passthru mmio */
 
 	return (FALSE);
 }
 
 int
 vm_malloc(struct vm *vm, vm_paddr_t gpa, size_t len)
 {
 	int available, allocated;
 	struct mem_seg *seg;
 	vm_object_t object;
 	vm_paddr_t g;
 
 	if ((gpa & PAGE_MASK) || (len & PAGE_MASK) || len == 0)
 		return (EINVAL);
 	
 	available = allocated = 0;
 	g = gpa;
 	while (g < gpa + len) {
 		if (vm_mem_allocated(vm, g))
 			allocated++;
 		else
 			available++;
 
 		g += PAGE_SIZE;
 	}
 
 	/*
 	 * If there are some allocated and some available pages in the address
 	 * range then it is an error.
 	 */
 	if (allocated && available)
 		return (EINVAL);
 
 	/*
 	 * If the entire address range being requested has already been
 	 * allocated then there isn't anything more to do.
 	 */
 	if (allocated && available == 0)
 		return (0);
 
 	if (vm->num_mem_segs >= VM_MAX_MEMORY_SEGMENTS)
 		return (E2BIG);
 
 	seg = &vm->mem_segs[vm->num_mem_segs];
 
 	if ((object = vmm_mem_alloc(vm->vmspace, gpa, len)) == NULL)
 		return (ENOMEM);
 
 	seg->gpa = gpa;
 	seg->len = len;
 	seg->object = object;
 	seg->wired = FALSE;
 
 	vm->num_mem_segs++;
 
 	return (0);
 }
 
 static void
 vm_gpa_unwire(struct vm *vm)
 {
 	int i, rv;
 	struct mem_seg *seg;
 
 	for (i = 0; i < vm->num_mem_segs; i++) {
 		seg = &vm->mem_segs[i];
 		if (!seg->wired)
 			continue;
 
 		rv = vm_map_unwire(&vm->vmspace->vm_map,
 				   seg->gpa, seg->gpa + seg->len,
 				   VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES);
 		KASSERT(rv == KERN_SUCCESS, ("vm(%s) memory segment "
 		    "%#lx/%ld could not be unwired: %d",
 		    vm_name(vm), seg->gpa, seg->len, rv));
 
 		seg->wired = FALSE;
 	}
 }
 
 static int
 vm_gpa_wire(struct vm *vm)
 {
 	int i, rv;
 	struct mem_seg *seg;
 
 	for (i = 0; i < vm->num_mem_segs; i++) {
 		seg = &vm->mem_segs[i];
 		if (seg->wired)
 			continue;
 
 		/* XXX rlimits? */
 		rv = vm_map_wire(&vm->vmspace->vm_map,
 				 seg->gpa, seg->gpa + seg->len,
 				 VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES);
 		if (rv != KERN_SUCCESS)
 			break;
 
 		seg->wired = TRUE;
 	}
 
 	if (i < vm->num_mem_segs) {
 		/*
 		 * Undo the wiring before returning an error.
 		 */
 		vm_gpa_unwire(vm);
 		return (EAGAIN);
 	}
 
 	return (0);
 }
 
 static void
 vm_iommu_modify(struct vm *vm, boolean_t map)
 {
 	int i, sz;
 	vm_paddr_t gpa, hpa;
 	struct mem_seg *seg;
 	void *vp, *cookie, *host_domain;
 
 	sz = PAGE_SIZE;
 	host_domain = iommu_host_domain();
 
 	for (i = 0; i < vm->num_mem_segs; i++) {
 		seg = &vm->mem_segs[i];
 		KASSERT(seg->wired, ("vm(%s) memory segment %#lx/%ld not wired",
 		    vm_name(vm), seg->gpa, seg->len));
 
 		gpa = seg->gpa;
 		while (gpa < seg->gpa + seg->len) {
 			vp = vm_gpa_hold(vm, gpa, PAGE_SIZE, VM_PROT_WRITE,
 					 &cookie);
 			KASSERT(vp != NULL, ("vm(%s) could not map gpa %#lx",
 			    vm_name(vm), gpa));
 
 			vm_gpa_release(cookie);
 
 			hpa = DMAP_TO_PHYS((uintptr_t)vp);
 			if (map) {
 				iommu_create_mapping(vm->iommu, gpa, hpa, sz);
 				iommu_remove_mapping(host_domain, hpa, sz);
 			} else {
 				iommu_remove_mapping(vm->iommu, gpa, sz);
 				iommu_create_mapping(host_domain, hpa, hpa, sz);
 			}
 
 			gpa += PAGE_SIZE;
 		}
 	}
 
 	/*
 	 * Invalidate the cached translations associated with the domain
 	 * from which pages were removed.
 	 */
 	if (map)
 		iommu_invalidate_tlb(host_domain);
 	else
 		iommu_invalidate_tlb(vm->iommu);
 }
 
 #define	vm_iommu_unmap(vm)	vm_iommu_modify((vm), FALSE)
 #define	vm_iommu_map(vm)	vm_iommu_modify((vm), TRUE)
 
 int
 vm_unassign_pptdev(struct vm *vm, int bus, int slot, int func)
 {
 	int error;
 
 	error = ppt_unassign_device(vm, bus, slot, func);
 	if (error)
 		return (error);
 
 	if (ppt_assigned_devices(vm) == 0) {
 		vm_iommu_unmap(vm);
 		vm_gpa_unwire(vm);
 	}
 	return (0);
 }
 
 int
 vm_assign_pptdev(struct vm *vm, int bus, int slot, int func)
 {
 	int error;
 	vm_paddr_t maxaddr;
 
 	/*
 	 * Virtual machines with pci passthru devices get special treatment:
 	 * - the guest physical memory is wired
 	 * - the iommu is programmed to do the 'gpa' to 'hpa' translation
 	 *
 	 * We need to do this before the first pci passthru device is attached.
 	 */
 	if (ppt_assigned_devices(vm) == 0) {
 		KASSERT(vm->iommu == NULL,
 		    ("vm_assign_pptdev: iommu must be NULL"));
 		maxaddr = vmm_mem_maxaddr();
 		vm->iommu = iommu_create_domain(maxaddr);
 
 		error = vm_gpa_wire(vm);
 		if (error)
 			return (error);
 
 		vm_iommu_map(vm);
 	}
 
 	error = ppt_assign_device(vm, bus, slot, func);
 	return (error);
 }
 
 void *
 vm_gpa_hold(struct vm *vm, vm_paddr_t gpa, size_t len, int reqprot,
 	    void **cookie)
 {
 	int count, pageoff;
 	vm_page_t m;
 
 	pageoff = gpa & PAGE_MASK;
 	if (len > PAGE_SIZE - pageoff)
 		panic("vm_gpa_hold: invalid gpa/len: 0x%016lx/%lu", gpa, len);
 
 	count = vm_fault_quick_hold_pages(&vm->vmspace->vm_map,
 	    trunc_page(gpa), PAGE_SIZE, reqprot, &m, 1);
 
 	if (count == 1) {
 		*cookie = m;
 		return ((void *)(PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)) + pageoff));
 	} else {
 		*cookie = NULL;
 		return (NULL);
 	}
 }
 
 void
 vm_gpa_release(void *cookie)
 {
 	vm_page_t m = cookie;
 
 	vm_page_lock(m);
 	vm_page_unhold(m);
 	vm_page_unlock(m);
 }
 
 int
 vm_gpabase2memseg(struct vm *vm, vm_paddr_t gpabase,
 		  struct vm_memory_segment *seg)
 {
 	int i;
 
 	for (i = 0; i < vm->num_mem_segs; i++) {
 		if (gpabase == vm->mem_segs[i].gpa) {
 			seg->gpa = vm->mem_segs[i].gpa;
 			seg->len = vm->mem_segs[i].len;
 			seg->wired = vm->mem_segs[i].wired;
 			return (0);
 		}
 	}
 	return (-1);
 }
 
 int
 vm_get_memobj(struct vm *vm, vm_paddr_t gpa, size_t len,
 	      vm_offset_t *offset, struct vm_object **object)
 {
 	int i;
 	size_t seg_len;
 	vm_paddr_t seg_gpa;
 	vm_object_t seg_obj;
 
 	for (i = 0; i < vm->num_mem_segs; i++) {
 		if ((seg_obj = vm->mem_segs[i].object) == NULL)
 			continue;
 
 		seg_gpa = vm->mem_segs[i].gpa;
 		seg_len = vm->mem_segs[i].len;
 
 		if (gpa >= seg_gpa && gpa < seg_gpa + seg_len) {
 			*offset = gpa - seg_gpa;
 			*object = seg_obj;
 			vm_object_reference(seg_obj);
 			return (0);
 		}
 	}
 
 	return (EINVAL);
 }
 
 int
 vm_get_register(struct vm *vm, int vcpu, int reg, uint64_t *retval)
 {
 
 	if (vcpu < 0 || vcpu >= VM_MAXCPU)
 		return (EINVAL);
 
 	if (reg >= VM_REG_LAST)
 		return (EINVAL);
 
 	return (VMGETREG(vm->cookie, vcpu, reg, retval));
 }
 
 int
 vm_set_register(struct vm *vm, int vcpu, int reg, uint64_t val)
 {
 
 	if (vcpu < 0 || vcpu >= VM_MAXCPU)
 		return (EINVAL);
 
 	if (reg >= VM_REG_LAST)
 		return (EINVAL);
 
 	return (VMSETREG(vm->cookie, vcpu, reg, val));
 }
 
 static boolean_t
 is_descriptor_table(int reg)
 {
 
 	switch (reg) {
 	case VM_REG_GUEST_IDTR:
 	case VM_REG_GUEST_GDTR:
 		return (TRUE);
 	default:
 		return (FALSE);
 	}
 }
 
 static boolean_t
 is_segment_register(int reg)
 {
 	
 	switch (reg) {
 	case VM_REG_GUEST_ES:
 	case VM_REG_GUEST_CS:
 	case VM_REG_GUEST_SS:
 	case VM_REG_GUEST_DS:
 	case VM_REG_GUEST_FS:
 	case VM_REG_GUEST_GS:
 	case VM_REG_GUEST_TR:
 	case VM_REG_GUEST_LDTR:
 		return (TRUE);
 	default:
 		return (FALSE);
 	}
 }
 
 int
 vm_get_seg_desc(struct vm *vm, int vcpu, int reg,
 		struct seg_desc *desc)
 {
 
 	if (vcpu < 0 || vcpu >= VM_MAXCPU)
 		return (EINVAL);
 
 	if (!is_segment_register(reg) && !is_descriptor_table(reg))
 		return (EINVAL);
 
 	return (VMGETDESC(vm->cookie, vcpu, reg, desc));
 }
 
 int
 vm_set_seg_desc(struct vm *vm, int vcpu, int reg,
 		struct seg_desc *desc)
 {
 	if (vcpu < 0 || vcpu >= VM_MAXCPU)
 		return (EINVAL);
 
 	if (!is_segment_register(reg) && !is_descriptor_table(reg))
 		return (EINVAL);
 
 	return (VMSETDESC(vm->cookie, vcpu, reg, desc));
 }
 
 static void
 restore_guest_fpustate(struct vcpu *vcpu)
 {
 
 	/* flush host state to the pcb */
 	fpuexit(curthread);
 
 	/* restore guest FPU state */
 	fpu_stop_emulating();
 	fpurestore(vcpu->guestfpu);
 
 	/* restore guest XCR0 if XSAVE is enabled in the host */
 	if (rcr4() & CR4_XSAVE)
 		load_xcr(0, vcpu->guest_xcr0);
 
 	/*
 	 * The FPU is now "dirty" with the guest's state so turn on emulation
 	 * to trap any access to the FPU by the host.
 	 */
 	fpu_start_emulating();
 }
 
 static void
 save_guest_fpustate(struct vcpu *vcpu)
 {
 
 	if ((rcr0() & CR0_TS) == 0)
 		panic("fpu emulation not enabled in host!");
 
 	/* save guest XCR0 and restore host XCR0 */
 	if (rcr4() & CR4_XSAVE) {
 		vcpu->guest_xcr0 = rxcr(0);
 		load_xcr(0, vmm_get_host_xcr0());
 	}
 
 	/* save guest FPU state */
 	fpu_stop_emulating();
 	fpusave(vcpu->guestfpu);
 	fpu_start_emulating();
 }
 
 static VMM_STAT(VCPU_IDLE_TICKS, "number of ticks vcpu was idle");
 
 static int
 vcpu_set_state_locked(struct vcpu *vcpu, enum vcpu_state newstate,
     bool from_idle)
 {
 	int error;
 
 	vcpu_assert_locked(vcpu);
 
 	/*
 	 * State transitions from the vmmdev_ioctl() must always begin from
 	 * the VCPU_IDLE state. This guarantees that there is only a single
 	 * ioctl() operating on a vcpu at any point.
 	 */
 	if (from_idle) {
 		while (vcpu->state != VCPU_IDLE)
 			msleep_spin(&vcpu->state, &vcpu->mtx, "vmstat", hz);
 	} else {
 		KASSERT(vcpu->state != VCPU_IDLE, ("invalid transition from "
 		    "vcpu idle state"));
 	}
 
 	/*
 	 * The following state transitions are allowed:
 	 * IDLE -> FROZEN -> IDLE
 	 * FROZEN -> RUNNING -> FROZEN
 	 * FROZEN -> SLEEPING -> FROZEN
 	 */
 	switch (vcpu->state) {
 	case VCPU_IDLE:
 	case VCPU_RUNNING:
 	case VCPU_SLEEPING:
 		error = (newstate != VCPU_FROZEN);
 		break;
 	case VCPU_FROZEN:
 		error = (newstate == VCPU_FROZEN);
 		break;
 	default:
 		error = 1;
 		break;
 	}
 
 	if (error)
 		return (EBUSY);
 
 	vcpu->state = newstate;
 	if (newstate == VCPU_IDLE)
 		wakeup(&vcpu->state);
 
 	return (0);
 }
 
 static void
 vcpu_require_state(struct vm *vm, int vcpuid, enum vcpu_state newstate)
 {
 	int error;
 
 	if ((error = vcpu_set_state(vm, vcpuid, newstate, false)) != 0)
 		panic("Error %d setting state to %d\n", error, newstate);
 }
 
 static void
 vcpu_require_state_locked(struct vcpu *vcpu, enum vcpu_state newstate)
 {
 	int error;
 
 	if ((error = vcpu_set_state_locked(vcpu, newstate, false)) != 0)
 		panic("Error %d setting state to %d", error, newstate);
 }
 
 static void
 vm_set_rendezvous_func(struct vm *vm, vm_rendezvous_func_t func)
 {
 
 	KASSERT(mtx_owned(&vm->rendezvous_mtx), ("rendezvous_mtx not locked"));
 
 	/*
 	 * Update 'rendezvous_func' and execute a write memory barrier to
 	 * ensure that it is visible across all host cpus. This is not needed
 	 * for correctness but it does ensure that all the vcpus will notice
 	 * that the rendezvous is requested immediately.
 	 */
 	vm->rendezvous_func = func;
 	wmb();
 }
 
 #define	RENDEZVOUS_CTR0(vm, vcpuid, fmt)				\
 	do {								\
 		if (vcpuid >= 0)					\
 			VCPU_CTR0(vm, vcpuid, fmt);			\
 		else							\
 			VM_CTR0(vm, fmt);				\
 	} while (0)
 
 static void
 vm_handle_rendezvous(struct vm *vm, int vcpuid)
 {
 
 	KASSERT(vcpuid == -1 || (vcpuid >= 0 && vcpuid < VM_MAXCPU),
 	    ("vm_handle_rendezvous: invalid vcpuid %d", vcpuid));
 
 	mtx_lock(&vm->rendezvous_mtx);
 	while (vm->rendezvous_func != NULL) {
 		if (vcpuid != -1 &&
 		    CPU_ISSET(vcpuid, &vm->rendezvous_req_cpus)) {
 			VCPU_CTR0(vm, vcpuid, "Calling rendezvous func");
 			(*vm->rendezvous_func)(vm, vcpuid, vm->rendezvous_arg);
 			CPU_SET(vcpuid, &vm->rendezvous_done_cpus);
 		}
 		if (CPU_CMP(&vm->rendezvous_req_cpus,
 		    &vm->rendezvous_done_cpus) == 0) {
 			VCPU_CTR0(vm, vcpuid, "Rendezvous completed");
 			vm_set_rendezvous_func(vm, NULL);
 			wakeup(&vm->rendezvous_func);
 			break;
 		}
 		RENDEZVOUS_CTR0(vm, vcpuid, "Wait for rendezvous completion");
 		mtx_sleep(&vm->rendezvous_func, &vm->rendezvous_mtx, 0,
 		    "vmrndv", 0);
 	}
 	mtx_unlock(&vm->rendezvous_mtx);
 }
 
 /*
  * Emulate a guest 'hlt' by sleeping until the vcpu is ready to run.
  */
 static int
 vm_handle_hlt(struct vm *vm, int vcpuid, bool intr_disabled, bool *retu)
 {
 	struct vm_exit *vmexit;
 	struct vcpu *vcpu;
 	int t, timo;
 
 	vcpu = &vm->vcpu[vcpuid];
 
 	vcpu_lock(vcpu);
 
 	/*
 	 * Do a final check for pending NMI or interrupts before
 	 * really putting this thread to sleep.
 	 *
 	 * These interrupts could have happened any time after we
 	 * returned from VMRUN() and before we grabbed the vcpu lock.
 	 */
 	if (!vm_nmi_pending(vm, vcpuid) &&
 	    (intr_disabled || !vlapic_pending_intr(vcpu->vlapic, NULL))) {
 		t = ticks;
 		vcpu_require_state_locked(vcpu, VCPU_SLEEPING);
 		if (vlapic_enabled(vcpu->vlapic)) {
 			/*
 			 * XXX msleep_spin() is not interruptible so use the
 			 * 'timo' to put an upper bound on the sleep time.
 			 */
 			timo = hz;
 			msleep_spin(vcpu, &vcpu->mtx, "vmidle", timo);
 		} else {
 			/*
 			 * Spindown the vcpu if the apic is disabled and it
 			 * had entered the halted state.
 			 */
 			*retu = true;
 			vmexit = vm_exitinfo(vm, vcpuid);
 			vmexit->exitcode = VM_EXITCODE_SPINDOWN_CPU;
 			vm_deactivate_cpu(vm, vcpuid);
 			VCPU_CTR0(vm, vcpuid, "spinning down cpu");
 		}
 		vcpu_require_state_locked(vcpu, VCPU_FROZEN);
 		vmm_stat_incr(vm, vcpuid, VCPU_IDLE_TICKS, ticks - t);
 	}
 	vcpu_unlock(vcpu);
 
 	return (0);
 }
 
 static int
 vm_handle_paging(struct vm *vm, int vcpuid, bool *retu)
 {
 	int rv, ftype;
 	struct vm_map *map;
 	struct vcpu *vcpu;
 	struct vm_exit *vme;
 
 	vcpu = &vm->vcpu[vcpuid];
 	vme = &vcpu->exitinfo;
 
 	ftype = vme->u.paging.fault_type;
 	KASSERT(ftype == VM_PROT_READ ||
 	    ftype == VM_PROT_WRITE || ftype == VM_PROT_EXECUTE,
 	    ("vm_handle_paging: invalid fault_type %d", ftype));
 
 	if (ftype == VM_PROT_READ || ftype == VM_PROT_WRITE) {
 		rv = pmap_emulate_accessed_dirty(vmspace_pmap(vm->vmspace),
 		    vme->u.paging.gpa, ftype);
 		if (rv == 0)
 			goto done;
 	}
 
 	map = &vm->vmspace->vm_map;
 	rv = vm_fault(map, vme->u.paging.gpa, ftype, VM_FAULT_NORMAL);
 
 	VCPU_CTR3(vm, vcpuid, "vm_handle_paging rv = %d, gpa = %#lx, "
 	    "ftype = %d", rv, vme->u.paging.gpa, ftype);
 
 	if (rv != KERN_SUCCESS)
 		return (EFAULT);
 done:
 	/* restart execution at the faulting instruction */
 	vme->inst_length = 0;
 
 	return (0);
 }
 
 static int
 vm_handle_inst_emul(struct vm *vm, int vcpuid, bool *retu)
 {
 	struct vie *vie;
 	struct vcpu *vcpu;
 	struct vm_exit *vme;
 	int error, inst_length;
 	uint64_t rip, gla, gpa, cr3;
 	enum vie_cpu_mode cpu_mode;
 	enum vie_paging_mode paging_mode;
 	mem_region_read_t mread;
 	mem_region_write_t mwrite;
 
 	vcpu = &vm->vcpu[vcpuid];
 	vme = &vcpu->exitinfo;
 
 	rip = vme->rip;
 	inst_length = vme->inst_length;
 
 	gla = vme->u.inst_emul.gla;
 	gpa = vme->u.inst_emul.gpa;
 	cr3 = vme->u.inst_emul.cr3;
 	cpu_mode = vme->u.inst_emul.cpu_mode;
 	paging_mode = vme->u.inst_emul.paging_mode;
 	vie = &vme->u.inst_emul.vie;
 
 	vie_init(vie);
 
 	/* Fetch, decode and emulate the faulting instruction */
 	if (vmm_fetch_instruction(vm, vcpuid, rip, inst_length, cr3,
 	    paging_mode, vie) != 0)
 		return (EFAULT);
 
 	if (vmm_decode_instruction(vm, vcpuid, gla, cpu_mode, vie) != 0)
 		return (EFAULT);
 
 	/* return to userland unless this is an in-kernel emulated device */
 	if (gpa >= DEFAULT_APIC_BASE && gpa < DEFAULT_APIC_BASE + PAGE_SIZE) {
 		mread = lapic_mmio_read;
 		mwrite = lapic_mmio_write;
 	} else if (gpa >= VIOAPIC_BASE && gpa < VIOAPIC_BASE + VIOAPIC_SIZE) {
 		mread = vioapic_mmio_read;
 		mwrite = vioapic_mmio_write;
 	} else if (gpa >= VHPET_BASE && gpa < VHPET_BASE + VHPET_SIZE) {
 		mread = vhpet_mmio_read;
 		mwrite = vhpet_mmio_write;
 	} else {
 		*retu = true;
 		return (0);
 	}
 
 	error = vmm_emulate_instruction(vm, vcpuid, gpa, vie, mread, mwrite,
 	    retu);
 
 	return (error);
 }
 
 int
 vm_run(struct vm *vm, struct vm_run *vmrun)
 {
 	int error, vcpuid;
 	struct vcpu *vcpu;
 	struct pcb *pcb;
 	uint64_t tscval, rip;
 	struct vm_exit *vme;
 	bool retu, intr_disabled;
 	pmap_t pmap;
 
 	vcpuid = vmrun->cpuid;
 
 	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
 		return (EINVAL);
 
 	pmap = vmspace_pmap(vm->vmspace);
 	vcpu = &vm->vcpu[vcpuid];
 	vme = &vcpu->exitinfo;
 	rip = vmrun->rip;
 restart:
 	critical_enter();
 
 	KASSERT(!CPU_ISSET(curcpu, &pmap->pm_active),
 	    ("vm_run: absurd pm_active"));
 
 	tscval = rdtsc();
 
 	pcb = PCPU_GET(curpcb);
 	set_pcb_flags(pcb, PCB_FULL_IRET);
 
 	restore_guest_msrs(vm, vcpuid);	
 	restore_guest_fpustate(vcpu);
 
 	vcpu_require_state(vm, vcpuid, VCPU_RUNNING);
 	vcpu->hostcpu = curcpu;
 	error = VMRUN(vm->cookie, vcpuid, rip, pmap, &vm->rendezvous_func);
 	vcpu->hostcpu = NOCPU;
 	vcpu_require_state(vm, vcpuid, VCPU_FROZEN);
 
 	save_guest_fpustate(vcpu);
 	restore_host_msrs(vm, vcpuid);
 
 	vmm_stat_incr(vm, vcpuid, VCPU_TOTAL_RUNTIME, rdtsc() - tscval);
 
 	critical_exit();
 
 	if (error == 0) {
 		retu = false;
 		switch (vme->exitcode) {
 		case VM_EXITCODE_IOAPIC_EOI:
 			vioapic_process_eoi(vm, vcpuid,
 			    vme->u.ioapic_eoi.vector);
 			break;
 		case VM_EXITCODE_RENDEZVOUS:
 			vm_handle_rendezvous(vm, vcpuid);
 			error = 0;
 			break;
 		case VM_EXITCODE_HLT:
 			intr_disabled = ((vme->u.hlt.rflags & PSL_I) == 0);
 			error = vm_handle_hlt(vm, vcpuid, intr_disabled, &retu);
 			break;
 		case VM_EXITCODE_PAGING:
 			error = vm_handle_paging(vm, vcpuid, &retu);
 			break;
 		case VM_EXITCODE_INST_EMUL:
 			error = vm_handle_inst_emul(vm, vcpuid, &retu);
 			break;
 		default:
 			retu = true;	/* handled in userland */
 			break;
 		}
 	}
 
 	if (error == 0 && retu == false) {
 		rip = vme->rip + vme->inst_length;
 		goto restart;
 	}
 
 	/* copy the exit information */
 	bcopy(vme, &vmrun->vm_exit, sizeof(struct vm_exit));
 	return (error);
 }
 
 int
 vm_inject_event(struct vm *vm, int vcpuid, int type,
 		int vector, uint32_t code, int code_valid)
 {
 	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
 		return (EINVAL);
 
 	if ((type > VM_EVENT_NONE && type < VM_EVENT_MAX) == 0)
 		return (EINVAL);
 
 	if (vector < 0 || vector > 255)
 		return (EINVAL);
 
 	return (VMINJECT(vm->cookie, vcpuid, type, vector, code, code_valid));
 }
 
 static VMM_STAT(VCPU_NMI_COUNT, "number of NMIs delivered to vcpu");
 
 int
 vm_inject_nmi(struct vm *vm, int vcpuid)
 {
 	struct vcpu *vcpu;
 
 	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
 		return (EINVAL);
 
 	vcpu = &vm->vcpu[vcpuid];
 
 	vcpu->nmi_pending = 1;
 	vcpu_notify_event(vm, vcpuid, false);
 	return (0);
 }
 
 int
 vm_nmi_pending(struct vm *vm, int vcpuid)
 {
 	struct vcpu *vcpu;
 
 	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
 		panic("vm_nmi_pending: invalid vcpuid %d", vcpuid);
 
 	vcpu = &vm->vcpu[vcpuid];
 
 	return (vcpu->nmi_pending);
 }
 
 void
 vm_nmi_clear(struct vm *vm, int vcpuid)
 {
 	struct vcpu *vcpu;
 
 	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
 		panic("vm_nmi_pending: invalid vcpuid %d", vcpuid);
 
 	vcpu = &vm->vcpu[vcpuid];
 
 	if (vcpu->nmi_pending == 0)
 		panic("vm_nmi_clear: inconsistent nmi_pending state");
 
 	vcpu->nmi_pending = 0;
 	vmm_stat_incr(vm, vcpuid, VCPU_NMI_COUNT, 1);
 }
 
 int
 vm_get_capability(struct vm *vm, int vcpu, int type, int *retval)
 {
 	if (vcpu < 0 || vcpu >= VM_MAXCPU)
 		return (EINVAL);
 
 	if (type < 0 || type >= VM_CAP_MAX)
 		return (EINVAL);
 
 	return (VMGETCAP(vm->cookie, vcpu, type, retval));
 }
 
 int
 vm_set_capability(struct vm *vm, int vcpu, int type, int val)
 {
 	if (vcpu < 0 || vcpu >= VM_MAXCPU)
 		return (EINVAL);
 
 	if (type < 0 || type >= VM_CAP_MAX)
 		return (EINVAL);
 
 	return (VMSETCAP(vm->cookie, vcpu, type, val));
 }
 
 uint64_t *
 vm_guest_msrs(struct vm *vm, int cpu)
 {
 	return (vm->vcpu[cpu].guest_msrs);
 }
 
 struct vlapic *
 vm_lapic(struct vm *vm, int cpu)
 {
 	return (vm->vcpu[cpu].vlapic);
 }
 
 struct vioapic *
 vm_ioapic(struct vm *vm)
 {
 
 	return (vm->vioapic);
 }
 
 struct vhpet *
 vm_hpet(struct vm *vm)
 {
 
 	return (vm->vhpet);
 }
 
 boolean_t
 vmm_is_pptdev(int bus, int slot, int func)
 {
 	int found, i, n;
 	int b, s, f;
 	char *val, *cp, *cp2;
 
 	/*
 	 * XXX
 	 * The length of an environment variable is limited to 128 bytes which
 	 * puts an upper limit on the number of passthru devices that may be
 	 * specified using a single environment variable.
 	 *
 	 * Work around this by scanning multiple environment variable
 	 * names instead of a single one - yuck!
 	 */
 	const char *names[] = { "pptdevs", "pptdevs2", "pptdevs3", NULL };
 
 	/* set pptdevs="1/2/3 4/5/6 7/8/9 10/11/12" */
 	found = 0;
 	for (i = 0; names[i] != NULL && !found; i++) {
 		cp = val = getenv(names[i]);
 		while (cp != NULL && *cp != '\0') {
 			if ((cp2 = strchr(cp, ' ')) != NULL)
 				*cp2 = '\0';
 
 			n = sscanf(cp, "%d/%d/%d", &b, &s, &f);
 			if (n == 3 && bus == b && slot == s && func == f) {
 				found = 1;
 				break;
 			}
 		
 			if (cp2 != NULL)
 				*cp2++ = ' ';
 
 			cp = cp2;
 		}
 		freeenv(val);
 	}
 	return (found);
 }
 
 void *
 vm_iommu_domain(struct vm *vm)
 {
 
 	return (vm->iommu);
 }
 
 int
 vcpu_set_state(struct vm *vm, int vcpuid, enum vcpu_state newstate,
     bool from_idle)
 {
 	int error;
 	struct vcpu *vcpu;
 
 	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
 		panic("vm_set_run_state: invalid vcpuid %d", vcpuid);
 
 	vcpu = &vm->vcpu[vcpuid];
 
 	vcpu_lock(vcpu);
 	error = vcpu_set_state_locked(vcpu, newstate, from_idle);
 	vcpu_unlock(vcpu);
 
 	return (error);
 }
 
 enum vcpu_state
 vcpu_get_state(struct vm *vm, int vcpuid, int *hostcpu)
 {
 	struct vcpu *vcpu;
 	enum vcpu_state state;
 
 	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
 		panic("vm_get_run_state: invalid vcpuid %d", vcpuid);
 
 	vcpu = &vm->vcpu[vcpuid];
 
 	vcpu_lock(vcpu);
 	state = vcpu->state;
 	if (hostcpu != NULL)
 		*hostcpu = vcpu->hostcpu;
 	vcpu_unlock(vcpu);
 
 	return (state);
 }
 
 void
 vm_activate_cpu(struct vm *vm, int vcpuid)
 {
 
 	if (vcpuid >= 0 && vcpuid < VM_MAXCPU)
 		CPU_SET(vcpuid, &vm->active_cpus);
 }
 
 static void
 vm_deactivate_cpu(struct vm *vm, int vcpuid)
 {
 
 	if (vcpuid >= 0 && vcpuid < VM_MAXCPU)
 		CPU_CLR(vcpuid, &vm->active_cpus);
 }
 
 cpuset_t
 vm_active_cpus(struct vm *vm)
 {
 
 	return (vm->active_cpus);
 }
 
 void *
 vcpu_stats(struct vm *vm, int vcpuid)
 {
 
 	return (vm->vcpu[vcpuid].stats);
 }
 
 int
 vm_get_x2apic_state(struct vm *vm, int vcpuid, enum x2apic_state *state)
 {
 	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
 		return (EINVAL);
 
 	*state = vm->vcpu[vcpuid].x2apic_state;
 
 	return (0);
 }
 
 int
 vm_set_x2apic_state(struct vm *vm, int vcpuid, enum x2apic_state state)
 {
 	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
 		return (EINVAL);
 
 	if (state >= X2APIC_STATE_LAST)
 		return (EINVAL);
 
 	vm->vcpu[vcpuid].x2apic_state = state;
 
 	vlapic_set_x2apic_state(vm, vcpuid, state);
 
 	return (0);
 }
 
 /*
  * This function is called to ensure that a vcpu "sees" a pending event
  * as soon as possible:
  * - If the vcpu thread is sleeping then it is woken up.
  * - If the vcpu is running on a different host_cpu then an IPI will be directed
  *   to the host_cpu to cause the vcpu to trap into the hypervisor.
  */
 void
 vcpu_notify_event(struct vm *vm, int vcpuid, bool lapic_intr)
 {
 	int hostcpu;
 	struct vcpu *vcpu;
 
 	vcpu = &vm->vcpu[vcpuid];
 
 	vcpu_lock(vcpu);
 	hostcpu = vcpu->hostcpu;
 	if (hostcpu == NOCPU) {
 		if (vcpu->state == VCPU_SLEEPING)
 			wakeup_one(vcpu);
 	} else {
 		if (vcpu->state != VCPU_RUNNING)
 			panic("invalid vcpu state %d", vcpu->state);
 		if (hostcpu != curcpu) {
 			if (lapic_intr)
 				vlapic_post_intr(vcpu->vlapic, hostcpu,
 				    vmm_ipinum);
 			else
 				ipi_cpu(hostcpu, vmm_ipinum);
 		}
 	}
 	vcpu_unlock(vcpu);
 }
 
 struct vmspace *
 vm_get_vmspace(struct vm *vm)
 {
 
 	return (vm->vmspace);
 }
 
 int
 vm_apicid2vcpuid(struct vm *vm, int apicid)
 {
 	/*
 	 * XXX apic id is assumed to be numerically identical to vcpu id
 	 */
 	return (apicid);
 }
 
 void
 vm_smp_rendezvous(struct vm *vm, int vcpuid, cpuset_t dest,
     vm_rendezvous_func_t func, void *arg)
 {
 	/*
 	 * Enforce that this function is called without any locks
 	 */
 	WITNESS_WARN(WARN_PANIC, NULL, "vm_smp_rendezvous");
 	KASSERT(vcpuid == -1 || (vcpuid >= 0 && vcpuid < VM_MAXCPU),
 	    ("vm_smp_rendezvous: invalid vcpuid %d", vcpuid));
 
 restart:
 	mtx_lock(&vm->rendezvous_mtx);
 	if (vm->rendezvous_func != NULL) {
 		/*
 		 * If a rendezvous is already in progress then we need to
 		 * call the rendezvous handler in case this 'vcpuid' is one
 		 * of the targets of the rendezvous.
 		 */
 		RENDEZVOUS_CTR0(vm, vcpuid, "Rendezvous already in progress");
 		mtx_unlock(&vm->rendezvous_mtx);
 		vm_handle_rendezvous(vm, vcpuid);
 		goto restart;
 	}
 	KASSERT(vm->rendezvous_func == NULL, ("vm_smp_rendezvous: previous "
 	    "rendezvous is still in progress"));
 
 	RENDEZVOUS_CTR0(vm, vcpuid, "Initiating rendezvous");
 	vm->rendezvous_req_cpus = dest;
 	CPU_ZERO(&vm->rendezvous_done_cpus);
 	vm->rendezvous_arg = arg;
 	vm_set_rendezvous_func(vm, func);
 	mtx_unlock(&vm->rendezvous_mtx);
 
 	vm_handle_rendezvous(vm, vcpuid);
 }
diff --git a/sys/amd64/vmm/vmm_lapic.c b/sys/amd64/vmm/vmm_lapic.c
index 47e04da6b8b3..640c779c3d4a 100644
--- a/sys/amd64/vmm/vmm_lapic.c
+++ b/sys/amd64/vmm/vmm_lapic.c
@@ -1,243 +1,242 @@
 /*-
  * Copyright (c) 2011 NetApp, Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/smp.h>
 
 #include <x86/specialreg.h>
 #include <x86/apicreg.h>
 
 #include <machine/vmm.h>
 #include "vmm_ipi.h"
 #include "vmm_ktr.h"
 #include "vmm_lapic.h"
 #include "vlapic.h"
 
 /*
  * Some MSI message definitions
  */
 #define	MSI_X86_ADDR_MASK	0xfff00000
 #define	MSI_X86_ADDR_BASE	0xfee00000
 #define	MSI_X86_ADDR_RH		0x00000008	/* Redirection Hint */
 #define	MSI_X86_ADDR_LOG	0x00000004	/* Destination Mode */
 
 int
 lapic_set_intr(struct vm *vm, int cpu, int vector, bool level)
 {
 	struct vlapic *vlapic;
 
 	if (cpu < 0 || cpu >= VM_MAXCPU)
 		return (EINVAL);
 
 	if (vector < 32 || vector > 255)
 		return (EINVAL);
 
 	vlapic = vm_lapic(vm, cpu);
 	if (vlapic_set_intr_ready(vlapic, vector, level))
 		vcpu_notify_event(vm, cpu, true);
 	return (0);
 }
 
 int
 lapic_set_local_intr(struct vm *vm, int cpu, int vector)
 {
 	struct vlapic *vlapic;
 	cpuset_t dmask;
 	int error;
 
 	if (cpu < -1 || cpu >= VM_MAXCPU)
 		return (EINVAL);
 
 	if (cpu == -1)
 		dmask = vm_active_cpus(vm);
 	else
 		CPU_SETOF(cpu, &dmask);
 	error = 0;
 	while ((cpu = CPU_FFS(&dmask)) != 0) {
 		cpu--;
 		CPU_CLR(cpu, &dmask);
 		vlapic = vm_lapic(vm, cpu);
 		error = vlapic_trigger_lvt(vlapic, vector);
 		if (error)
 			break;
 	}
 
 	return (error);
 }
 
 int
 lapic_intr_msi(struct vm *vm, uint64_t addr, uint64_t msg)
 {
 	int delmode, vec;
 	uint32_t dest;
 	bool phys;
 
 	VM_CTR2(vm, "lapic MSI addr: %#lx msg: %#lx", addr, msg);
 
 	if ((addr & MSI_X86_ADDR_MASK) != MSI_X86_ADDR_BASE) {
 		VM_CTR1(vm, "lapic MSI invalid addr %#lx", addr);
 		return (-1);
 	}
 
 	/*
 	 * Extract the x86-specific fields from the MSI addr/msg
 	 * params according to the Intel Arch spec, Vol3 Ch 10.
 	 *
 	 * The PCI specification does not support level triggered
 	 * MSI/MSI-X so ignore trigger level in 'msg'.
 	 *
 	 * The 'dest' is interpreted as a logical APIC ID if both
 	 * the Redirection Hint and Destination Mode are '1' and
 	 * physical otherwise.
 	 */
 	dest = (addr >> 12) & 0xff;
 	phys = ((addr & (MSI_X86_ADDR_RH | MSI_X86_ADDR_LOG)) !=
 	    (MSI_X86_ADDR_RH | MSI_X86_ADDR_LOG));
 	delmode = msg & APIC_DELMODE_MASK;
 	vec = msg & 0xff;
 
 	VM_CTR3(vm, "lapic MSI %s dest %#x, vec %d",
 	    phys ? "physical" : "logical", dest, vec);
 
 	vlapic_deliver_intr(vm, LAPIC_TRIG_EDGE, dest, phys, delmode, vec);
 	return (0);
 }
 
 static boolean_t
 x2apic_msr(u_int msr)
 {
 	if (msr >= 0x800 && msr <= 0xBFF)
 		return (TRUE);
 	else
 		return (FALSE);
 }
 
 static u_int
 x2apic_msr_to_regoff(u_int msr)
 {
 
 	return ((msr - 0x800) << 4);
 }
 
 boolean_t
 lapic_msr(u_int msr)
 {
 
 	if (x2apic_msr(msr) || (msr == MSR_APICBASE))
 		return (TRUE);
 	else
 		return (FALSE);
 }
 
 int
 lapic_rdmsr(struct vm *vm, int cpu, u_int msr, uint64_t *rval, bool *retu)
 {
 	int error;
 	u_int offset;
 	struct vlapic *vlapic;
 
 	vlapic = vm_lapic(vm, cpu);
 
 	if (msr == MSR_APICBASE) {
 		*rval = vlapic_get_apicbase(vlapic);
 		error = 0;
 	} else {
 		offset = x2apic_msr_to_regoff(msr);
-		error = vlapic_read(vlapic, offset, rval, retu);
+		error = vlapic_read(vlapic, 0, offset, rval, retu);
 	}
 
 	return (error);
 }
 
 int
 lapic_wrmsr(struct vm *vm, int cpu, u_int msr, uint64_t val, bool *retu)
 {
 	int error;
 	u_int offset;
 	struct vlapic *vlapic;
 
 	vlapic = vm_lapic(vm, cpu);
 
 	if (msr == MSR_APICBASE) {
-		vlapic_set_apicbase(vlapic, val);
-		error = 0;
+		error = vlapic_set_apicbase(vlapic, val);
 	} else {
 		offset = x2apic_msr_to_regoff(msr);
-		error = vlapic_write(vlapic, offset, val, retu);
+		error = vlapic_write(vlapic, 0, offset, val, retu);
 	}
 
 	return (error);
 }
 
 int
 lapic_mmio_write(void *vm, int cpu, uint64_t gpa, uint64_t wval, int size,
 		 void *arg)
 {
 	int error;
 	uint64_t off;
 	struct vlapic *vlapic;
 
 	off = gpa - DEFAULT_APIC_BASE;
 
 	/*
 	 * Memory mapped local apic accesses must be 4 bytes wide and
 	 * aligned on a 16-byte boundary.
 	 */
 	if (size != 4 || off & 0xf)
 		return (EINVAL);
 
 	vlapic = vm_lapic(vm, cpu);
-	error = vlapic_write(vlapic, off, wval, arg);
+	error = vlapic_write(vlapic, 1, off, wval, arg);
 	return (error);
 }
 
 int
 lapic_mmio_read(void *vm, int cpu, uint64_t gpa, uint64_t *rval, int size,
 		void *arg)
 {
 	int error;
 	uint64_t off;
 	struct vlapic *vlapic;
 
 	off = gpa - DEFAULT_APIC_BASE;
 
 	/*
 	 * Memory mapped local apic accesses must be 4 bytes wide and
 	 * aligned on a 16-byte boundary.
 	 */
 	if (size != 4 || off & 0xf)
 		return (EINVAL);
 
 	vlapic = vm_lapic(vm, cpu);
-	error = vlapic_read(vlapic, off, rval, arg);
+	error = vlapic_read(vlapic, 1, off, rval, arg);
 	return (error);
 }
diff --git a/sys/amd64/vmm/x86.c b/sys/amd64/vmm/x86.c
index 7ae32eca5d69..d3a0248a825a 100644
--- a/sys/amd64/vmm/x86.c
+++ b/sys/amd64/vmm/x86.c
@@ -1,337 +1,339 @@
 /*-
  * Copyright (c) 2011 NetApp, Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/pcpu.h>
 #include <sys/systm.h>
 #include <sys/cpuset.h>
 
 #include <machine/clock.h>
 #include <machine/cpufunc.h>
 #include <machine/md_var.h>
 #include <machine/segments.h>
 #include <machine/specialreg.h>
 
 #include <machine/vmm.h>
 
 #include "vmm_host.h"
 #include "x86.h"
 
 #define	CPUID_VM_HIGH		0x40000000
 
 static const char bhyve_id[12] = "bhyve bhyve ";
 
 static uint64_t bhyve_xcpuids;
 
 int
 x86_emulate_cpuid(struct vm *vm, int vcpu_id,
 		  uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx)
 {
 	const struct xsave_limits *limits;
 	uint64_t cr4;
 	int error, enable_invpcid;
 	unsigned int 	func, regs[4];
 	enum x2apic_state x2apic_state;
 
 	/*
 	 * Requests for invalid CPUID levels should map to the highest
 	 * available level instead.
 	 */
 	if (cpu_exthigh != 0 && *eax >= 0x80000000) {
 		if (*eax > cpu_exthigh)
 			*eax = cpu_exthigh;
 	} else if (*eax >= 0x40000000) {
 		if (*eax > CPUID_VM_HIGH)
 			*eax = CPUID_VM_HIGH;
 	} else if (*eax > cpu_high) {
 		*eax = cpu_high;
 	}
 
 	func = *eax;
 
 	/*
 	 * In general the approach used for CPU topology is to
 	 * advertise a flat topology where all CPUs are packages with
 	 * no multi-core or SMT.
 	 */
 	switch (func) {
 		/*
 		 * Pass these through to the guest
 		 */
 		case CPUID_0000_0000:
 		case CPUID_0000_0002:
 		case CPUID_0000_0003:
 		case CPUID_8000_0000:
 		case CPUID_8000_0002:
 		case CPUID_8000_0003:
 		case CPUID_8000_0004:
 		case CPUID_8000_0006:
 		case CPUID_8000_0008:
 			cpuid_count(*eax, *ecx, regs);
 			break;
 
 		case CPUID_8000_0001:
 			/*
 			 * Hide rdtscp/ia32_tsc_aux until we know how
 			 * to deal with them.
 			 */
 			cpuid_count(*eax, *ecx, regs);
 			regs[3] &= ~AMDID_RDTSCP;
 			break;
 
 		case CPUID_8000_0007:
 			cpuid_count(*eax, *ecx, regs);
 			/*
 			 * If the host TSCs are not synchronized across
 			 * physical cpus then we cannot advertise an
 			 * invariant tsc to a vcpu.
 			 *
 			 * XXX This still falls short because the vcpu
 			 * can observe the TSC moving backwards as it
 			 * migrates across physical cpus. But at least
 			 * it should discourage the guest from using the
 			 * TSC to keep track of time.
 			 */
 			if (!smp_tsc)
 				regs[3] &= ~AMDPM_TSC_INVARIANT;
 			break;
 
 		case CPUID_0000_0001:
 			do_cpuid(1, regs);
 
 			error = vm_get_x2apic_state(vm, vcpu_id, &x2apic_state);
 			if (error) {
 				panic("x86_emulate_cpuid: error %d "
 				      "fetching x2apic state", error);
 			}
 
 			/*
 			 * Override the APIC ID only in ebx
 			 */
 			regs[1] &= ~(CPUID_LOCAL_APIC_ID);
 			regs[1] |= (vcpu_id << CPUID_0000_0001_APICID_SHIFT);
 
 			/*
 			 * Don't expose VMX, SpeedStep or TME capability.
 			 * Advertise x2APIC capability and Hypervisor guest.
 			 */
 			regs[2] &= ~(CPUID2_VMX | CPUID2_EST | CPUID2_TM2);
 
 			regs[2] |= CPUID2_HV;
 
 			if (x2apic_state != X2APIC_DISABLED)
 				regs[2] |= CPUID2_X2APIC;
+			else
+				regs[2] &= ~CPUID2_X2APIC;
 
 			/*
 			 * Only advertise CPUID2_XSAVE in the guest if
 			 * the host is using XSAVE.
 			 */
 			if (!(regs[2] & CPUID2_OSXSAVE))
 				regs[2] &= ~CPUID2_XSAVE;
 
 			/*
 			 * If CPUID2_XSAVE is being advertised and the
 			 * guest has set CR4_XSAVE, set
 			 * CPUID2_OSXSAVE.
 			 */
 			regs[2] &= ~CPUID2_OSXSAVE;
 			if (regs[2] & CPUID2_XSAVE) {
 				error = vm_get_register(vm, vcpu_id,
 				    VM_REG_GUEST_CR4, &cr4);
 				if (error)
 					panic("x86_emulate_cpuid: error %d "
 					      "fetching %%cr4", error);
 				if (cr4 & CR4_XSAVE)
 					regs[2] |= CPUID2_OSXSAVE;
 			}
 
 			/*
 			 * Hide monitor/mwait until we know how to deal with
 			 * these instructions.
 			 */
 			regs[2] &= ~CPUID2_MON;
 
                         /*
 			 * Hide the performance and debug features.
 			 */
 			regs[2] &= ~CPUID2_PDCM;
 
 			/*
 			 * No TSC deadline support in the APIC yet
 			 */
 			regs[2] &= ~CPUID2_TSCDLT;
 
 			/*
 			 * Hide thermal monitoring
 			 */
 			regs[3] &= ~(CPUID_ACPI | CPUID_TM);
 			
 			/*
 			 * Machine check handling is done in the host.
 			 * Hide MTRR capability.
 			 */
 			regs[3] &= ~(CPUID_MCA | CPUID_MCE | CPUID_MTRR);
 
                         /*
                         * Hide the debug store capability.
                         */
 			regs[3] &= ~CPUID_DS;
 
 			/*
 			 * Disable multi-core.
 			 */
 			regs[1] &= ~CPUID_HTT_CORES;
 			regs[3] &= ~CPUID_HTT;
 			break;
 
 		case CPUID_0000_0004:
 			do_cpuid(4, regs);
 
 			/*
 			 * Do not expose topology.
 			 */
 			regs[0] &= 0xffff8000;
 			regs[0] |= 0x04008000;
 			break;
 
 		case CPUID_0000_0007:
 			regs[0] = 0;
 			regs[1] = 0;
 			regs[2] = 0;
 			regs[3] = 0;
 
 			/* leaf 0 */
 			if (*ecx == 0) {
 				error = vm_get_capability(vm, vcpu_id,
 				    VM_CAP_ENABLE_INVPCID, &enable_invpcid);
 				if (error == 0 && enable_invpcid)
 					regs[1] |= CPUID_STDEXT_INVPCID;
 			}
 			break;
 
 		case CPUID_0000_0006:
 		case CPUID_0000_000A:
 			/*
 			 * Handle the access, but report 0 for
 			 * all options
 			 */
 			regs[0] = 0;
 			regs[1] = 0;
 			regs[2] = 0;
 			regs[3] = 0;
 			break;
 
 		case CPUID_0000_000B:
 			/*
 			 * Processor topology enumeration
 			 */
 			regs[0] = 0;
 			regs[1] = 0;
 			regs[2] = *ecx & 0xff;
 			regs[3] = vcpu_id;
 			break;
 
 		case CPUID_0000_000D:
 			limits = vmm_get_xsave_limits();
 			if (!limits->xsave_enabled) {
 				regs[0] = 0;
 				regs[1] = 0;
 				regs[2] = 0;
 				regs[3] = 0;
 				break;
 			}
 
 			cpuid_count(*eax, *ecx, regs);
 			switch (*ecx) {
 			case 0:
 				/*
 				 * Only permit the guest to use bits
 				 * that are active in the host in
 				 * %xcr0.  Also, claim that the
 				 * maximum save area size is
 				 * equivalent to the host's current
 				 * save area size.  Since this runs
 				 * "inside" of vmrun(), it runs with
 				 * the guest's xcr0, so the current
 				 * save area size is correct as-is.
 				 */
 				regs[0] &= limits->xcr0_allowed;
 				regs[2] = limits->xsave_max_size;
 				regs[3] &= (limits->xcr0_allowed >> 32);
 				break;
 			case 1:
 				/* Only permit XSAVEOPT. */
 				regs[0] &= CPUID_EXTSTATE_XSAVEOPT;
 				regs[1] = 0;
 				regs[2] = 0;
 				regs[3] = 0;
 				break;
 			default:
 				/*
 				 * If the leaf is for a permitted feature,
 				 * pass through as-is, otherwise return
 				 * all zeroes.
 				 */
 				if (!(limits->xcr0_allowed & (1ul << *ecx))) {
 					regs[0] = 0;
 					regs[1] = 0;
 					regs[2] = 0;
 					regs[3] = 0;
 				}
 				break;
 			}
 			break;
 
 		case 0x40000000:
 			regs[0] = CPUID_VM_HIGH;
 			bcopy(bhyve_id, &regs[1], 4);
 			bcopy(bhyve_id + 4, &regs[2], 4);
 			bcopy(bhyve_id + 8, &regs[3], 4);
 			break;
 
 		default:
 			/*
 			 * The leaf value has already been clamped so
 			 * simply pass this through, keeping count of
 			 * how many unhandled leaf values have been seen.
 			 */
 			atomic_add_long(&bhyve_xcpuids, 1);
 			cpuid_count(*eax, *ecx, regs);
 			break;
 	}
 
 	*eax = regs[0];
 	*ebx = regs[1];
 	*ecx = regs[2];
 	*edx = regs[3];
 
 	return (1);
 }
diff --git a/usr.sbin/bhyve/bhyverun.c b/usr.sbin/bhyve/bhyverun.c
index c587aeff29b6..2218622569cb 100644
--- a/usr.sbin/bhyve/bhyverun.c
+++ b/usr.sbin/bhyve/bhyverun.c
@@ -1,743 +1,741 @@
 /*-
  * Copyright (c) 2011 NetApp, Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/types.h>
 #include <sys/mman.h>
 #include <sys/time.h>
 
 #include <machine/atomic.h>
 #include <machine/segments.h>
 
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <err.h>
 #include <libgen.h>
 #include <unistd.h>
 #include <assert.h>
 #include <errno.h>
 #include <pthread.h>
 #include <pthread_np.h>
 #include <sysexits.h>
 
 #include <machine/vmm.h>
 #include <vmmapi.h>
 
 #include "bhyverun.h"
 #include "acpi.h"
 #include "inout.h"
 #include "dbgport.h"
 #include "ioapic.h"
 #include "mem.h"
 #include "mevent.h"
 #include "mptbl.h"
 #include "pci_emul.h"
 #include "pci_lpc.h"
 #include "xmsr.h"
 #include "spinup_ap.h"
 #include "rtc.h"
 
 #define GUEST_NIO_PORT		0x488	/* guest upcalls via i/o port */
 
 #define	VMEXIT_SWITCH		0	/* force vcpu switch in mux mode */
 #define	VMEXIT_CONTINUE		1	/* continue from next instruction */
 #define	VMEXIT_RESTART		2	/* restart current instruction */
 #define	VMEXIT_ABORT		3	/* abort the vm run loop */
 #define	VMEXIT_RESET		4	/* guest machine has reset */
 #define	VMEXIT_POWEROFF		5	/* guest machine has powered off */
 
 #define MB		(1024UL * 1024)
 #define GB		(1024UL * MB)
 
 typedef int (*vmexit_handler_t)(struct vmctx *, struct vm_exit *, int *vcpu);
 
 char *vmname;
 
 int guest_ncpus;
 
 static int pincpu = -1;
-static int guest_vmexit_on_hlt, guest_vmexit_on_pause, disable_x2apic;
+static int guest_vmexit_on_hlt, guest_vmexit_on_pause;
 static int virtio_msix = 1;
+static int x2apic_mode = 0;	/* default is xAPIC */
 
 static int strictio;
 static int strictmsr = 1;
 
 static int acpi;
 
 static char *progname;
 static const int BSP = 0;
 
 static int cpumask;
 
 static void vm_loop(struct vmctx *ctx, int vcpu, uint64_t rip);
 
 struct vm_exit vmexit[VM_MAXCPU];
 
 struct bhyvestats {
         uint64_t        vmexit_bogus;
         uint64_t        vmexit_bogus_switch;
         uint64_t        vmexit_hlt;
         uint64_t        vmexit_pause;
         uint64_t        vmexit_mtrap;
         uint64_t        vmexit_inst_emul;
         uint64_t        cpu_switch_rotate;
         uint64_t        cpu_switch_direct;
         int             io_reset;
 } stats;
 
 struct mt_vmm_info {
 	pthread_t	mt_thr;
 	struct vmctx	*mt_ctx;
 	int		mt_vcpu;	
 } mt_vmm_info[VM_MAXCPU];
 
 static void
 usage(int code)
 {
 
         fprintf(stderr,
                 "Usage: %s [-aehwAHIPW] [-g <gdb port>] [-s <pci>]\n"
 		"       %*s [-c vcpus] [-p pincpu] [-m mem] [-l <lpc>] <vm>\n"
-		"       -a: local apic is in XAPIC mode (default is X2APIC)\n"
+		"       -a: local apic is in xAPIC mode (deprecated)\n"
 		"       -A: create an ACPI table\n"
 		"       -g: gdb port\n"
 		"       -c: # cpus (default 1)\n"
 		"       -p: pin vcpu 'n' to host cpu 'pincpu + n'\n"
 		"       -H: vmexit from the guest on hlt\n"
 		"       -P: vmexit from the guest on pause\n"
 		"       -W: force virtio to use single-vector MSI\n"
 		"       -e: exit on unhandled I/O access\n"
 		"       -h: help\n"
 		"       -s: <slot,driver,configinfo> PCI slot config\n"
 		"       -l: LPC device configuration\n"
 		"       -m: memory size in MB\n"
-		"       -w: ignore unimplemented MSRs\n",
+		"       -w: ignore unimplemented MSRs\n"
+		"       -x: local apic is in x2APIC mode\n",
 		progname, (int)strlen(progname), "");
 
 	exit(code);
 }
 
 void *
 paddr_guest2host(struct vmctx *ctx, uintptr_t gaddr, size_t len)
 {
 
 	return (vm_map_gpa(ctx, gaddr, len));
 }
 
-int
-fbsdrun_disable_x2apic(void)
-{
-
-	return (disable_x2apic);
-}
-
 int
 fbsdrun_vmexit_on_pause(void)
 {
 
 	return (guest_vmexit_on_pause);
 }
 
 int
 fbsdrun_vmexit_on_hlt(void)
 {
 
 	return (guest_vmexit_on_hlt);
 }
 
 int
 fbsdrun_virtio_msix(void)
 {
 
 	return (virtio_msix);
 }
 
 static void *
 fbsdrun_start_thread(void *param)
 {
 	char tname[MAXCOMLEN + 1];
 	struct mt_vmm_info *mtp;
 	int vcpu;
 
 	mtp = param;
 	vcpu = mtp->mt_vcpu;
 
 	snprintf(tname, sizeof(tname), "vcpu %d", vcpu);
 	pthread_set_name_np(mtp->mt_thr, tname);
 
 	vm_loop(mtp->mt_ctx, vcpu, vmexit[vcpu].rip);
 
 	/* not reached */
 	exit(1);
 	return (NULL);
 }
 
 void
 fbsdrun_addcpu(struct vmctx *ctx, int vcpu, uint64_t rip)
 {
 	int error;
 
 	if (cpumask & (1 << vcpu)) {
 		fprintf(stderr, "addcpu: attempting to add existing cpu %d\n",
 		    vcpu);
 		exit(1);
 	}
 
 	atomic_set_int(&cpumask, 1 << vcpu);
 
 	/*
 	 * Set up the vmexit struct to allow execution to start
 	 * at the given RIP
 	 */
 	vmexit[vcpu].rip = rip;
 	vmexit[vcpu].inst_length = 0;
 
 	mt_vmm_info[vcpu].mt_ctx = ctx;
 	mt_vmm_info[vcpu].mt_vcpu = vcpu;
 
 	error = pthread_create(&mt_vmm_info[vcpu].mt_thr, NULL,
 	    fbsdrun_start_thread, &mt_vmm_info[vcpu]);
 	assert(error == 0);
 }
 
 static int
 fbsdrun_deletecpu(struct vmctx *ctx, int vcpu)
 {
 
 	if ((cpumask & (1 << vcpu)) == 0) {
 		fprintf(stderr, "addcpu: attempting to delete unknown cpu %d\n",
 		    vcpu);
 		exit(1);
 	}
 
 	atomic_clear_int(&cpumask, 1 << vcpu);
 	return (cpumask == 0);
 }
 
 static int
 vmexit_catch_reset(void)
 {
         stats.io_reset++;
         return (VMEXIT_RESET);
 }
 
 static int
 vmexit_catch_inout(void)
 {
 	return (VMEXIT_ABORT);
 }
 
 static int
 vmexit_handle_notify(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu,
 		     uint32_t eax)
 {
 #if BHYVE_DEBUG
 	/*
 	 * put guest-driven debug here
 	 */
 #endif
         return (VMEXIT_CONTINUE);
 }
 
 static int
 vmexit_inout(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu)
 {
 	int error;
 	int bytes, port, in, out;
 	uint32_t eax;
 	int vcpu;
 
 	vcpu = *pvcpu;
 
 	port = vme->u.inout.port;
 	bytes = vme->u.inout.bytes;
 	eax = vme->u.inout.eax;
 	in = vme->u.inout.in;
 	out = !in;
 
 	/* We don't deal with these */
 	if (vme->u.inout.string || vme->u.inout.rep)
 		return (VMEXIT_ABORT);
 
 	/* Special case of guest reset */
 	if (out && port == 0x64 && (uint8_t)eax == 0xFE)
 		return (vmexit_catch_reset());
 
         /* Extra-special case of host notifications */
         if (out && port == GUEST_NIO_PORT)
                 return (vmexit_handle_notify(ctx, vme, pvcpu, eax));
 
 	error = emulate_inout(ctx, vcpu, in, port, bytes, &eax, strictio);
 	if (error == INOUT_OK && in)
 		error = vm_set_register(ctx, vcpu, VM_REG_GUEST_RAX, eax);
 
 	switch (error) {
 	case INOUT_OK:
 		return (VMEXIT_CONTINUE);
 	case INOUT_RESET:
 		return (VMEXIT_RESET);
 	case INOUT_POWEROFF:
 		return (VMEXIT_POWEROFF);
 	default:
 		fprintf(stderr, "Unhandled %s%c 0x%04x\n",
 			in ? "in" : "out",
 			bytes == 1 ? 'b' : (bytes == 2 ? 'w' : 'l'), port);
 		return (vmexit_catch_inout());
 	}
 }
 
 static int
 vmexit_rdmsr(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu)
 {
 	uint64_t val;
 	uint32_t eax, edx;
 	int error;
 
 	val = 0;
 	error = emulate_rdmsr(ctx, *pvcpu, vme->u.msr.code, &val);
 	if (error != 0) {
 		fprintf(stderr, "rdmsr to register %#x on vcpu %d\n",
 		    vme->u.msr.code, *pvcpu);
 		if (strictmsr)
 			return (VMEXIT_ABORT);
 	}
 
 	eax = val;
 	error = vm_set_register(ctx, *pvcpu, VM_REG_GUEST_RAX, eax);
 	assert(error == 0);
 
 	edx = val >> 32;
 	error = vm_set_register(ctx, *pvcpu, VM_REG_GUEST_RDX, edx);
 	assert(error == 0);
 
 	return (VMEXIT_CONTINUE);
 }
 
 static int
 vmexit_wrmsr(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu)
 {
 	int error;
 
 	error = emulate_wrmsr(ctx, *pvcpu, vme->u.msr.code, vme->u.msr.wval);
 	if (error != 0) {
 		fprintf(stderr, "wrmsr to register %#x(%#lx) on vcpu %d\n",
 		    vme->u.msr.code, vme->u.msr.wval, *pvcpu);
 		if (strictmsr)
 			return (VMEXIT_ABORT);
 	}
 	return (VMEXIT_CONTINUE);
 }
 
 static int
 vmexit_spinup_ap(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu)
 {
 	int newcpu;
 	int retval = VMEXIT_CONTINUE;
 
 	newcpu = spinup_ap(ctx, *pvcpu,
 			   vme->u.spinup_ap.vcpu, vme->u.spinup_ap.rip);
 
 	return (retval);
 }
 
 static int
 vmexit_spindown_cpu(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu)
 {
 	int lastcpu;
 
 	lastcpu = fbsdrun_deletecpu(ctx, *pvcpu);
 	if (!lastcpu)
 		pthread_exit(NULL);
 	return (vmexit_catch_reset());
 }
 
 static int
 vmexit_vmx(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
 {
 
 	fprintf(stderr, "vm exit[%d]\n", *pvcpu);
 	fprintf(stderr, "\treason\t\tVMX\n");
 	fprintf(stderr, "\trip\t\t0x%016lx\n", vmexit->rip);
 	fprintf(stderr, "\tinst_length\t%d\n", vmexit->inst_length);
 	fprintf(stderr, "\tstatus\t\t%d\n", vmexit->u.vmx.status);
 	fprintf(stderr, "\texit_reason\t%u\n", vmexit->u.vmx.exit_reason);
 	fprintf(stderr, "\tqualification\t0x%016lx\n",
 	    vmexit->u.vmx.exit_qualification);
 	fprintf(stderr, "\tinst_type\t\t%d\n", vmexit->u.vmx.inst_type);
 	fprintf(stderr, "\tinst_error\t\t%d\n", vmexit->u.vmx.inst_error);
 
 	return (VMEXIT_ABORT);
 }
 
 static int
 vmexit_bogus(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
 {
 
 	stats.vmexit_bogus++;
 
 	return (VMEXIT_RESTART);
 }
 
 static int
 vmexit_hlt(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
 {
 
 	stats.vmexit_hlt++;
 
 	/*
 	 * Just continue execution with the next instruction. We use
 	 * the HLT VM exit as a way to be friendly with the host
 	 * scheduler.
 	 */
 	return (VMEXIT_CONTINUE);
 }
 
 static int
 vmexit_pause(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
 {
 
 	stats.vmexit_pause++;
 
 	return (VMEXIT_CONTINUE);
 }
 
 static int
 vmexit_mtrap(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
 {
 
 	stats.vmexit_mtrap++;
 
 	return (VMEXIT_RESTART);
 }
 
 static int
 vmexit_inst_emul(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
 {
 	int err;
 	stats.vmexit_inst_emul++;
 
 	err = emulate_mem(ctx, *pvcpu, vmexit->u.inst_emul.gpa,
 			  &vmexit->u.inst_emul.vie);
 
 	if (err) {
 		if (err == EINVAL) {
 			fprintf(stderr,
 			    "Failed to emulate instruction at 0x%lx\n", 
 			    vmexit->rip);
 		} else if (err == ESRCH) {
 			fprintf(stderr, "Unhandled memory access to 0x%lx\n",
 			    vmexit->u.inst_emul.gpa);
 		}
 
 		return (VMEXIT_ABORT);
 	}
 
 	return (VMEXIT_CONTINUE);
 }
 
 static vmexit_handler_t handler[VM_EXITCODE_MAX] = {
 	[VM_EXITCODE_INOUT]  = vmexit_inout,
 	[VM_EXITCODE_VMX]    = vmexit_vmx,
 	[VM_EXITCODE_BOGUS]  = vmexit_bogus,
 	[VM_EXITCODE_RDMSR]  = vmexit_rdmsr,
 	[VM_EXITCODE_WRMSR]  = vmexit_wrmsr,
 	[VM_EXITCODE_MTRAP]  = vmexit_mtrap,
 	[VM_EXITCODE_INST_EMUL] = vmexit_inst_emul,
 	[VM_EXITCODE_SPINUP_AP] = vmexit_spinup_ap,
 	[VM_EXITCODE_SPINDOWN_CPU] = vmexit_spindown_cpu,
 };
 
 static void
 vm_loop(struct vmctx *ctx, int vcpu, uint64_t rip)
 {
 	cpuset_t mask;
 	int error, rc, prevcpu;
 	enum vm_exitcode exitcode;
 
 	if (pincpu >= 0) {
 		CPU_ZERO(&mask);
 		CPU_SET(pincpu + vcpu, &mask);
 		error = pthread_setaffinity_np(pthread_self(),
 					       sizeof(mask), &mask);
 		assert(error == 0);
 	}
 
 	while (1) {
 		error = vm_run(ctx, vcpu, rip, &vmexit[vcpu]);
 		if (error != 0)
 			break;
 
 		prevcpu = vcpu;
 
 		exitcode = vmexit[vcpu].exitcode;
 		if (exitcode >= VM_EXITCODE_MAX || handler[exitcode] == NULL) {
 			fprintf(stderr, "vm_loop: unexpected exitcode 0x%x\n",
 			    exitcode);
 			exit(1);
 		}
 
                 rc = (*handler[exitcode])(ctx, &vmexit[vcpu], &vcpu);
 
 		switch (rc) {
 		case VMEXIT_CONTINUE:
                         rip = vmexit[vcpu].rip + vmexit[vcpu].inst_length;
 			break;
 		case VMEXIT_RESTART:
                         rip = vmexit[vcpu].rip;
 			break;
 		case VMEXIT_RESET:
 			exit(0);
 		default:
 			exit(1);
 		}
 	}
 	fprintf(stderr, "vm_run error %d, errno %d\n", error, errno);
 }
 
 static int
 num_vcpus_allowed(struct vmctx *ctx)
 {
 	int tmp, error;
 
 	error = vm_get_capability(ctx, BSP, VM_CAP_UNRESTRICTED_GUEST, &tmp);
 
 	/*
 	 * The guest is allowed to spinup more than one processor only if the
 	 * UNRESTRICTED_GUEST capability is available.
 	 */
 	if (error == 0)
 		return (VM_MAXCPU);
 	else
 		return (1);
 }
 
 void
 fbsdrun_set_capabilities(struct vmctx *ctx, int cpu)
 {
 	int err, tmp;
 
 	if (fbsdrun_vmexit_on_hlt()) {
 		err = vm_get_capability(ctx, cpu, VM_CAP_HALT_EXIT, &tmp);
 		if (err < 0) {
 			fprintf(stderr, "VM exit on HLT not supported\n");
 			exit(1);
 		}
 		vm_set_capability(ctx, cpu, VM_CAP_HALT_EXIT, 1);
 		if (cpu == BSP)
 			handler[VM_EXITCODE_HLT] = vmexit_hlt;
 	}
 
         if (fbsdrun_vmexit_on_pause()) {
 		/*
 		 * pause exit support required for this mode
 		 */
 		err = vm_get_capability(ctx, cpu, VM_CAP_PAUSE_EXIT, &tmp);
 		if (err < 0) {
 			fprintf(stderr,
 			    "SMP mux requested, no pause support\n");
 			exit(1);
 		}
 		vm_set_capability(ctx, cpu, VM_CAP_PAUSE_EXIT, 1);
 		if (cpu == BSP)
 			handler[VM_EXITCODE_PAUSE] = vmexit_pause;
         }
 
-	if (fbsdrun_disable_x2apic())
-		err = vm_set_x2apic_state(ctx, cpu, X2APIC_DISABLED);
-	else
+	if (x2apic_mode)
 		err = vm_set_x2apic_state(ctx, cpu, X2APIC_ENABLED);
+	else
+		err = vm_set_x2apic_state(ctx, cpu, X2APIC_DISABLED);
 
 	if (err) {
 		fprintf(stderr, "Unable to set x2apic state (%d)\n", err);
 		exit(1);
 	}
 
 	vm_set_capability(ctx, cpu, VM_CAP_ENABLE_INVPCID, 1);
 }
 
 int
 main(int argc, char *argv[])
 {
 	int c, error, gdb_port, err, bvmcons;
 	int max_vcpus;
 	struct vmctx *ctx;
 	uint64_t rip;
 	size_t memsize;
 
 	bvmcons = 0;
 	progname = basename(argv[0]);
 	gdb_port = 0;
 	guest_ncpus = 1;
 	memsize = 256 * MB;
 
-	while ((c = getopt(argc, argv, "abehwAHIPWp:g:c:s:m:l:")) != -1) {
+	while ((c = getopt(argc, argv, "abehwxAHIPWp:g:c:s:m:l:")) != -1) {
 		switch (c) {
 		case 'a':
-			disable_x2apic = 1;
+			x2apic_mode = 0;
 			break;
 		case 'A':
 			acpi = 1;
 			break;
 		case 'b':
 			bvmcons = 1;
 			break;
 		case 'p':
 			pincpu = atoi(optarg);
 			break;
                 case 'c':
 			guest_ncpus = atoi(optarg);
 			break;
 		case 'g':
 			gdb_port = atoi(optarg);
 			break;
 		case 'l':
 			if (lpc_device_parse(optarg) != 0) {
 				errx(EX_USAGE, "invalid lpc device "
 				    "configuration '%s'", optarg);
 			}
 			break;
 		case 's':
 			if (pci_parse_slot(optarg) != 0)
 				exit(1);
 			else
 				break;
                 case 'm':
 			error = vm_parse_memsize(optarg, &memsize);
 			if (error)
 				errx(EX_USAGE, "invalid memsize '%s'", optarg);
 			break;
 		case 'H':
 			guest_vmexit_on_hlt = 1;
 			break;
 		case 'I':
 			/*
 			 * The "-I" option was used to add an ioapic to the
 			 * virtual machine.
 			 *
 			 * An ioapic is now provided unconditionally for each
 			 * virtual machine and this option is now deprecated.
 			 */
 			break;
 		case 'P':
 			guest_vmexit_on_pause = 1;
 			break;
 		case 'e':
 			strictio = 1;
 			break;
 		case 'w':
 			strictmsr = 0;
 			break;
 		case 'W':
 			virtio_msix = 0;
 			break;
+		case 'x':
+			x2apic_mode = 1;
+			break;
 		case 'h':
 			usage(0);			
 		default:
 			usage(1);
 		}
 	}
 	argc -= optind;
 	argv += optind;
 
 	if (argc != 1)
 		usage(1);
 
 	vmname = argv[0];
 
 	ctx = vm_open(vmname);
 	if (ctx == NULL) {
 		perror("vm_open");
 		exit(1);
 	}
 
 	max_vcpus = num_vcpus_allowed(ctx);
 	if (guest_ncpus > max_vcpus) {
 		fprintf(stderr, "%d vCPUs requested but only %d available\n",
 			guest_ncpus, max_vcpus);
 		exit(1);
 	}
 
 	fbsdrun_set_capabilities(ctx, BSP);
 
 	err = vm_setup_memory(ctx, memsize, VM_MMAP_ALL);
 	if (err) {
 		fprintf(stderr, "Unable to setup memory (%d)\n", err);
 		exit(1);
 	}
 
 	init_mem();
 	init_inout();
 	ioapic_init(ctx);
 
 	rtc_init(ctx);
 
 	/*
 	 * Exit if a device emulation finds an error in it's initilization
 	 */
 	if (init_pci(ctx) != 0)
 		exit(1);
 
 	if (gdb_port != 0)
 		init_dbgport(gdb_port);
 
 	if (bvmcons)
 		init_bvmcons();
 
 	error = vm_get_register(ctx, BSP, VM_REG_GUEST_RIP, &rip);
 	assert(error == 0);
 
 	/*
 	 * build the guest tables, MP etc.
 	 */
 	mptable_build(ctx, guest_ncpus);
 
 	if (acpi) {
 		error = acpi_build(ctx, guest_ncpus);
 		assert(error == 0);
 	}
 
 	/*
 	 * Change the proc title to include the VM name.
 	 */
 	setproctitle("%s", vmname); 
 	
 	/*
 	 * Add CPU 0
 	 */
 	fbsdrun_addcpu(ctx, BSP, rip);
 
 	/*
 	 * Head off to the main event dispatch loop
 	 */
 	mevent_dispatch();
 
 	exit(1);
 }