Index: head/sys/amd64/include/vmm.h
===================================================================
--- head/sys/amd64/include/vmm.h	(revision 282286)
+++ head/sys/amd64/include/vmm.h	(revision 282287)
@@ -1,639 +1,641 @@
 /*-
  * Copyright (c) 2011 NetApp, Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef _VMM_H_
 #define	_VMM_H_
 
 #include <x86/segments.h>
 
 enum vm_suspend_how {
 	VM_SUSPEND_NONE,
 	VM_SUSPEND_RESET,
 	VM_SUSPEND_POWEROFF,
 	VM_SUSPEND_HALT,
 	VM_SUSPEND_TRIPLEFAULT,
 	VM_SUSPEND_LAST
 };
 
 /*
  * Identifiers for architecturally defined registers.
  */
 enum vm_reg_name {
 	VM_REG_GUEST_RAX,
 	VM_REG_GUEST_RBX,
 	VM_REG_GUEST_RCX,
 	VM_REG_GUEST_RDX,
 	VM_REG_GUEST_RSI,
 	VM_REG_GUEST_RDI,
 	VM_REG_GUEST_RBP,
 	VM_REG_GUEST_R8,
 	VM_REG_GUEST_R9,
 	VM_REG_GUEST_R10,
 	VM_REG_GUEST_R11,
 	VM_REG_GUEST_R12,
 	VM_REG_GUEST_R13,
 	VM_REG_GUEST_R14,
 	VM_REG_GUEST_R15,
 	VM_REG_GUEST_CR0,
 	VM_REG_GUEST_CR3,
 	VM_REG_GUEST_CR4,
 	VM_REG_GUEST_DR7,
 	VM_REG_GUEST_RSP,
 	VM_REG_GUEST_RIP,
 	VM_REG_GUEST_RFLAGS,
 	VM_REG_GUEST_ES,
 	VM_REG_GUEST_CS,
 	VM_REG_GUEST_SS,
 	VM_REG_GUEST_DS,
 	VM_REG_GUEST_FS,
 	VM_REG_GUEST_GS,
 	VM_REG_GUEST_LDTR,
 	VM_REG_GUEST_TR,
 	VM_REG_GUEST_IDTR,
 	VM_REG_GUEST_GDTR,
 	VM_REG_GUEST_EFER,
 	VM_REG_GUEST_CR2,
 	VM_REG_GUEST_PDPTE0,
 	VM_REG_GUEST_PDPTE1,
 	VM_REG_GUEST_PDPTE2,
 	VM_REG_GUEST_PDPTE3,
 	VM_REG_GUEST_INTR_SHADOW,
 	VM_REG_LAST
 };
 
 enum x2apic_state {
 	X2APIC_DISABLED,
 	X2APIC_ENABLED,
 	X2APIC_STATE_LAST
 };
 
 #define	VM_INTINFO_VECTOR(info)	((info) & 0xff)
 #define	VM_INTINFO_DEL_ERRCODE	0x800
 #define	VM_INTINFO_RSVD		0x7ffff000
 #define	VM_INTINFO_VALID	0x80000000
 #define	VM_INTINFO_TYPE		0x700
 #define	VM_INTINFO_HWINTR	(0 << 8)
 #define	VM_INTINFO_NMI		(2 << 8)
 #define	VM_INTINFO_HWEXCEPTION	(3 << 8)
 #define	VM_INTINFO_SWINTR	(4 << 8)
 
 #ifdef _KERNEL
 
 #define	VM_MAX_NAMELEN	32
 
 struct vm;
 struct vm_exception;
 struct vm_memory_segment;
 struct seg_desc;
 struct vm_exit;
 struct vm_run;
 struct vhpet;
 struct vioapic;
 struct vlapic;
 struct vmspace;
 struct vm_object;
 struct vm_guest_paging;
 struct pmap;
 
 typedef int	(*vmm_init_func_t)(int ipinum);
 typedef int	(*vmm_cleanup_func_t)(void);
 typedef void	(*vmm_resume_func_t)(void);
 typedef void *	(*vmi_init_func_t)(struct vm *vm, struct pmap *pmap);
 typedef int	(*vmi_run_func_t)(void *vmi, int vcpu, register_t rip,
 				  struct pmap *pmap, void *rendezvous_cookie,
 				  void *suspend_cookie);
 typedef void	(*vmi_cleanup_func_t)(void *vmi);
 typedef int	(*vmi_get_register_t)(void *vmi, int vcpu, int num,
 				      uint64_t *retval);
 typedef int	(*vmi_set_register_t)(void *vmi, int vcpu, int num,
 				      uint64_t val);
 typedef int	(*vmi_get_desc_t)(void *vmi, int vcpu, int num,
 				  struct seg_desc *desc);
 typedef int	(*vmi_set_desc_t)(void *vmi, int vcpu, int num,
 				  struct seg_desc *desc);
 typedef int	(*vmi_get_cap_t)(void *vmi, int vcpu, int num, int *retval);
 typedef int	(*vmi_set_cap_t)(void *vmi, int vcpu, int num, int val);
 typedef struct vmspace * (*vmi_vmspace_alloc)(vm_offset_t min, vm_offset_t max);
 typedef void	(*vmi_vmspace_free)(struct vmspace *vmspace);
 typedef struct vlapic * (*vmi_vlapic_init)(void *vmi, int vcpu);
 typedef void	(*vmi_vlapic_cleanup)(void *vmi, struct vlapic *vlapic);
 
 struct vmm_ops {
 	vmm_init_func_t		init;		/* module wide initialization */
 	vmm_cleanup_func_t	cleanup;
 	vmm_resume_func_t	resume;
 
 	vmi_init_func_t		vminit;		/* vm-specific initialization */
 	vmi_run_func_t		vmrun;
 	vmi_cleanup_func_t	vmcleanup;
 	vmi_get_register_t	vmgetreg;
 	vmi_set_register_t	vmsetreg;
 	vmi_get_desc_t		vmgetdesc;
 	vmi_set_desc_t		vmsetdesc;
 	vmi_get_cap_t		vmgetcap;
 	vmi_set_cap_t		vmsetcap;
 	vmi_vmspace_alloc	vmspace_alloc;
 	vmi_vmspace_free	vmspace_free;
 	vmi_vlapic_init		vlapic_init;
 	vmi_vlapic_cleanup	vlapic_cleanup;
 };
 
 extern struct vmm_ops vmm_ops_intel;
 extern struct vmm_ops vmm_ops_amd;
 
 int vm_create(const char *name, struct vm **retvm);
 void vm_destroy(struct vm *vm);
 int vm_reinit(struct vm *vm);
 const char *vm_name(struct vm *vm);
 int vm_malloc(struct vm *vm, vm_paddr_t gpa, size_t len);
 int vm_map_mmio(struct vm *vm, vm_paddr_t gpa, size_t len, vm_paddr_t hpa);
 int vm_unmap_mmio(struct vm *vm, vm_paddr_t gpa, size_t len);
 void *vm_gpa_hold(struct vm *, vm_paddr_t gpa, size_t len, int prot,
 		  void **cookie);
 void vm_gpa_release(void *cookie);
 int vm_gpabase2memseg(struct vm *vm, vm_paddr_t gpabase,
 	      struct vm_memory_segment *seg);
 int vm_get_memobj(struct vm *vm, vm_paddr_t gpa, size_t len,
 		  vm_offset_t *offset, struct vm_object **object);
 boolean_t vm_mem_allocated(struct vm *vm, vm_paddr_t gpa);
 int vm_get_register(struct vm *vm, int vcpu, int reg, uint64_t *retval);
 int vm_set_register(struct vm *vm, int vcpu, int reg, uint64_t val);
 int vm_get_seg_desc(struct vm *vm, int vcpu, int reg,
 		    struct seg_desc *ret_desc);
 int vm_set_seg_desc(struct vm *vm, int vcpu, int reg,
 		    struct seg_desc *desc);
 int vm_run(struct vm *vm, struct vm_run *vmrun);
 int vm_suspend(struct vm *vm, enum vm_suspend_how how);
 int vm_inject_nmi(struct vm *vm, int vcpu);
 int vm_nmi_pending(struct vm *vm, int vcpuid);
 void vm_nmi_clear(struct vm *vm, int vcpuid);
 int vm_inject_extint(struct vm *vm, int vcpu);
 int vm_extint_pending(struct vm *vm, int vcpuid);
 void vm_extint_clear(struct vm *vm, int vcpuid);
 struct vlapic *vm_lapic(struct vm *vm, int cpu);
 struct vioapic *vm_ioapic(struct vm *vm);
 struct vhpet *vm_hpet(struct vm *vm);
 int vm_get_capability(struct vm *vm, int vcpu, int type, int *val);
 int vm_set_capability(struct vm *vm, int vcpu, int type, int val);
 int vm_get_x2apic_state(struct vm *vm, int vcpu, enum x2apic_state *state);
 int vm_set_x2apic_state(struct vm *vm, int vcpu, enum x2apic_state state);
 int vm_apicid2vcpuid(struct vm *vm, int apicid);
 int vm_activate_cpu(struct vm *vm, int vcpu);
-cpuset_t vm_active_cpus(struct vm *vm);
-cpuset_t vm_suspended_cpus(struct vm *vm);
 struct vm_exit *vm_exitinfo(struct vm *vm, int vcpuid);
 void vm_exit_suspended(struct vm *vm, int vcpuid, uint64_t rip);
 void vm_exit_rendezvous(struct vm *vm, int vcpuid, uint64_t rip);
 void vm_exit_astpending(struct vm *vm, int vcpuid, uint64_t rip);
 
+#ifdef _SYS__CPUSET_H_
 /*
  * Rendezvous all vcpus specified in 'dest' and execute 'func(arg)'.
  * The rendezvous 'func(arg)' is not allowed to do anything that will
  * cause the thread to be put to sleep.
  *
  * If the rendezvous is being initiated from a vcpu context then the
  * 'vcpuid' must refer to that vcpu, otherwise it should be set to -1.
  *
  * The caller cannot hold any locks when initiating the rendezvous.
  *
  * The implementation of this API may cause vcpus other than those specified
  * by 'dest' to be stalled. The caller should not rely on any vcpus making
  * forward progress when the rendezvous is in progress.
  */
 typedef void (*vm_rendezvous_func_t)(struct vm *vm, int vcpuid, void *arg);
 void vm_smp_rendezvous(struct vm *vm, int vcpuid, cpuset_t dest,
     vm_rendezvous_func_t func, void *arg);
+cpuset_t vm_active_cpus(struct vm *vm);
+cpuset_t vm_suspended_cpus(struct vm *vm);
+#endif	/* _SYS__CPUSET_H_ */
 
 static __inline int
 vcpu_rendezvous_pending(void *rendezvous_cookie)
 {
 
 	return (*(uintptr_t *)rendezvous_cookie != 0);
 }
 
 static __inline int
 vcpu_suspended(void *suspend_cookie)
 {
 
 	return (*(int *)suspend_cookie);
 }
 
 /*
  * Return 1 if device indicated by bus/slot/func is supposed to be a
  * pci passthrough device.
  *
  * Return 0 otherwise.
  */
 int vmm_is_pptdev(int bus, int slot, int func);
 
 void *vm_iommu_domain(struct vm *vm);
 
 enum vcpu_state {
 	VCPU_IDLE,
 	VCPU_FROZEN,
 	VCPU_RUNNING,
 	VCPU_SLEEPING,
 };
 
 int vcpu_set_state(struct vm *vm, int vcpu, enum vcpu_state state,
     bool from_idle);
 enum vcpu_state vcpu_get_state(struct vm *vm, int vcpu, int *hostcpu);
 
 static int __inline
 vcpu_is_running(struct vm *vm, int vcpu, int *hostcpu)
 {
 	return (vcpu_get_state(vm, vcpu, hostcpu) == VCPU_RUNNING);
 }
 
 #ifdef _SYS_PROC_H_
 static int __inline
 vcpu_should_yield(struct vm *vm, int vcpu)
 {
 	return (curthread->td_flags & (TDF_ASTPENDING | TDF_NEEDRESCHED));
 }
 #endif
 
 void *vcpu_stats(struct vm *vm, int vcpu);
 void vcpu_notify_event(struct vm *vm, int vcpuid, bool lapic_intr);
 struct vmspace *vm_get_vmspace(struct vm *vm);
 int vm_assign_pptdev(struct vm *vm, int bus, int slot, int func);
 int vm_unassign_pptdev(struct vm *vm, int bus, int slot, int func);
 struct vatpic *vm_atpic(struct vm *vm);
 struct vatpit *vm_atpit(struct vm *vm);
 struct vpmtmr *vm_pmtmr(struct vm *vm);
 struct vrtc *vm_rtc(struct vm *vm);
 
 /*
  * Inject exception 'vector' into the guest vcpu. This function returns 0 on
  * success and non-zero on failure.
  *
  * Wrapper functions like 'vm_inject_gp()' should be preferred to calling
  * this function directly because they enforce the trap-like or fault-like
  * behavior of an exception.
  *
  * This function should only be called in the context of the thread that is
  * executing this vcpu.
  */
 int vm_inject_exception(struct vm *vm, int vcpuid, int vector, int err_valid,
     uint32_t errcode, int restart_instruction);
 
 /*
  * This function is called after a VM-exit that occurred during exception or
  * interrupt delivery through the IDT. The format of 'intinfo' is described
  * in Figure 15-1, "EXITINTINFO for All Intercepts", APM, Vol 2.
  *
  * If a VM-exit handler completes the event delivery successfully then it
  * should call vm_exit_intinfo() to extinguish the pending event. For e.g.,
  * if the task switch emulation is triggered via a task gate then it should
  * call this function with 'intinfo=0' to indicate that the external event
  * is not pending anymore.
  *
  * Return value is 0 on success and non-zero on failure.
  */
 int vm_exit_intinfo(struct vm *vm, int vcpuid, uint64_t intinfo);
 
 /*
  * This function is called before every VM-entry to retrieve a pending
  * event that should be injected into the guest. This function combines
  * nested events into a double or triple fault.
  *
  * Returns 0 if there are no events that need to be injected into the guest
  * and non-zero otherwise.
  */
 int vm_entry_intinfo(struct vm *vm, int vcpuid, uint64_t *info);
 
 int vm_get_intinfo(struct vm *vm, int vcpuid, uint64_t *info1, uint64_t *info2);
 
 enum vm_reg_name vm_segment_name(int seg_encoding);
 
 struct vm_copyinfo {
 	uint64_t	gpa;
 	size_t		len;
 	void		*hva;
 	void		*cookie;
 };
 
 /*
  * Set up 'copyinfo[]' to copy to/from guest linear address space starting
  * at 'gla' and 'len' bytes long. The 'prot' should be set to PROT_READ for
  * a copyin or PROT_WRITE for a copyout. 
  *
  * Returns 0 on success.
  * Returns 1 if an exception was injected into the guest.
  * Returns -1 otherwise.
  *
  * The 'copyinfo[]' can be passed to 'vm_copyin()' or 'vm_copyout()' only if
  * the return value is 0. The 'copyinfo[]' resources should be freed by calling
  * 'vm_copy_teardown()' after the copy is done.
  */
 int vm_copy_setup(struct vm *vm, int vcpuid, struct vm_guest_paging *paging,
     uint64_t gla, size_t len, int prot, struct vm_copyinfo *copyinfo,
     int num_copyinfo);
 void vm_copy_teardown(struct vm *vm, int vcpuid, struct vm_copyinfo *copyinfo,
     int num_copyinfo);
 void vm_copyin(struct vm *vm, int vcpuid, struct vm_copyinfo *copyinfo,
     void *kaddr, size_t len);
 void vm_copyout(struct vm *vm, int vcpuid, const void *kaddr,
     struct vm_copyinfo *copyinfo, size_t len);
 
 int vcpu_trace_exceptions(struct vm *vm, int vcpuid);
 #endif	/* KERNEL */
 
 #define	VM_MAXCPU	16			/* maximum virtual cpus */
 
 /*
  * Identifiers for optional vmm capabilities
  */
 enum vm_cap_type {
 	VM_CAP_HALT_EXIT,
 	VM_CAP_MTRAP_EXIT,
 	VM_CAP_PAUSE_EXIT,
 	VM_CAP_UNRESTRICTED_GUEST,
 	VM_CAP_ENABLE_INVPCID,
 	VM_CAP_MAX
 };
 
 enum vm_intr_trigger {
 	EDGE_TRIGGER,
 	LEVEL_TRIGGER
 };
 	
 /*
  * The 'access' field has the format specified in Table 21-2 of the Intel
  * Architecture Manual vol 3b.
  *
  * XXX The contents of the 'access' field are architecturally defined except
  * bit 16 - Segment Unusable.
  */
 struct seg_desc {
 	uint64_t	base;
 	uint32_t	limit;
 	uint32_t	access;
 };
 #define	SEG_DESC_TYPE(access)		((access) & 0x001f)
 #define	SEG_DESC_DPL(access)		(((access) >> 5) & 0x3)
 #define	SEG_DESC_PRESENT(access)	(((access) & 0x0080) ? 1 : 0)
 #define	SEG_DESC_DEF32(access)		(((access) & 0x4000) ? 1 : 0)
 #define	SEG_DESC_GRANULARITY(access)	(((access) & 0x8000) ? 1 : 0)
 #define	SEG_DESC_UNUSABLE(access)	(((access) & 0x10000) ? 1 : 0)
 
 enum vm_cpu_mode {
 	CPU_MODE_REAL,
 	CPU_MODE_PROTECTED,
 	CPU_MODE_COMPATIBILITY,		/* IA-32E mode (CS.L = 0) */
 	CPU_MODE_64BIT,			/* IA-32E mode (CS.L = 1) */
 };
 
 enum vm_paging_mode {
 	PAGING_MODE_FLAT,
 	PAGING_MODE_32,
 	PAGING_MODE_PAE,
 	PAGING_MODE_64,
 };
 
 struct vm_guest_paging {
 	uint64_t	cr3;
 	int		cpl;
 	enum vm_cpu_mode cpu_mode;
 	enum vm_paging_mode paging_mode;
 };
 
 /*
  * The data structures 'vie' and 'vie_op' are meant to be opaque to the
  * consumers of instruction decoding. The only reason why their contents
  * need to be exposed is because they are part of the 'vm_exit' structure.
  */
 struct vie_op {
 	uint8_t		op_byte;	/* actual opcode byte */
 	uint8_t		op_type;	/* type of operation (e.g. MOV) */
 	uint16_t	op_flags;
 };
 
 #define	VIE_INST_SIZE	15
 struct vie {
 	uint8_t		inst[VIE_INST_SIZE];	/* instruction bytes */
 	uint8_t		num_valid;		/* size of the instruction */
 	uint8_t		num_processed;
 
 	uint8_t		addrsize:4, opsize:4;	/* address and operand sizes */
 	uint8_t		rex_w:1,		/* REX prefix */
 			rex_r:1,
 			rex_x:1,
 			rex_b:1,
 			rex_present:1,
 			repz_present:1,		/* REP/REPE/REPZ prefix */
 			repnz_present:1,	/* REPNE/REPNZ prefix */
 			opsize_override:1,	/* Operand size override */
 			addrsize_override:1,	/* Address size override */
 			segment_override:1;	/* Segment override */
 
 	uint8_t		mod:2,			/* ModRM byte */
 			reg:4,
 			rm:4;
 
 	uint8_t		ss:2,			/* SIB byte */
 			index:4,
 			base:4;
 
 	uint8_t		disp_bytes;
 	uint8_t		imm_bytes;
 
 	uint8_t		scale;
 	int		base_register;		/* VM_REG_GUEST_xyz */
 	int		index_register;		/* VM_REG_GUEST_xyz */
 	int		segment_register;	/* VM_REG_GUEST_xyz */
 
 	int64_t		displacement;		/* optional addr displacement */
 	int64_t		immediate;		/* optional immediate operand */
 
 	uint8_t		decoded;	/* set to 1 if successfully decoded */
 
 	struct vie_op	op;			/* opcode description */
 };
 
 enum vm_exitcode {
 	VM_EXITCODE_INOUT,
 	VM_EXITCODE_VMX,
 	VM_EXITCODE_BOGUS,
 	VM_EXITCODE_RDMSR,
 	VM_EXITCODE_WRMSR,
 	VM_EXITCODE_HLT,
 	VM_EXITCODE_MTRAP,
 	VM_EXITCODE_PAUSE,
 	VM_EXITCODE_PAGING,
 	VM_EXITCODE_INST_EMUL,
 	VM_EXITCODE_SPINUP_AP,
 	VM_EXITCODE_DEPRECATED1,	/* used to be SPINDOWN_CPU */
 	VM_EXITCODE_RENDEZVOUS,
 	VM_EXITCODE_IOAPIC_EOI,
 	VM_EXITCODE_SUSPENDED,
 	VM_EXITCODE_INOUT_STR,
 	VM_EXITCODE_TASK_SWITCH,
 	VM_EXITCODE_MONITOR,
 	VM_EXITCODE_MWAIT,
 	VM_EXITCODE_SVM,
 	VM_EXITCODE_MAX
 };
 
 struct vm_inout {
 	uint16_t	bytes:3;	/* 1 or 2 or 4 */
 	uint16_t	in:1;
 	uint16_t	string:1;
 	uint16_t	rep:1;
 	uint16_t	port;
 	uint32_t	eax;		/* valid for out */
 };
 
 struct vm_inout_str {
 	struct vm_inout	inout;		/* must be the first element */
 	struct vm_guest_paging paging;
 	uint64_t	rflags;
 	uint64_t	cr0;
 	uint64_t	index;
 	uint64_t	count;		/* rep=1 (%rcx), rep=0 (1) */
 	int		addrsize;
 	enum vm_reg_name seg_name;
 	struct seg_desc seg_desc;
 };
 
 enum task_switch_reason {
 	TSR_CALL,
 	TSR_IRET,
 	TSR_JMP,
 	TSR_IDT_GATE,	/* task gate in IDT */
 };
 
 struct vm_task_switch {
 	uint16_t	tsssel;		/* new TSS selector */
 	int		ext;		/* task switch due to external event */
 	uint32_t	errcode;
 	int		errcode_valid;	/* push 'errcode' on the new stack */
 	enum task_switch_reason reason;
 	struct vm_guest_paging paging;
 };
 
 struct vm_exit {
 	enum vm_exitcode	exitcode;
 	int			inst_length;	/* 0 means unknown */
 	uint64_t		rip;
 	union {
 		struct vm_inout	inout;
 		struct vm_inout_str inout_str;
 		struct {
 			uint64_t	gpa;
 			int		fault_type;
 		} paging;
 		struct {
 			uint64_t	gpa;
 			uint64_t	gla;
 			uint64_t	cs_base;
 			int		cs_d;		/* CS.D */
 			struct vm_guest_paging paging;
 			struct vie	vie;
 		} inst_emul;
 		/*
 		 * VMX specific payload. Used when there is no "better"
 		 * exitcode to represent the VM-exit.
 		 */
 		struct {
 			int		status;		/* vmx inst status */
 			/*
 			 * 'exit_reason' and 'exit_qualification' are valid
 			 * only if 'status' is zero.
 			 */
 			uint32_t	exit_reason;
 			uint64_t	exit_qualification;
 			/*
 			 * 'inst_error' and 'inst_type' are valid
 			 * only if 'status' is non-zero.
 			 */
 			int		inst_type;
 			int		inst_error;
 		} vmx;
 		/*
 		 * SVM specific payload.
 		 */
 		struct {
 			uint64_t	exitcode;
 			uint64_t	exitinfo1;
 			uint64_t	exitinfo2;
 		} svm;
 		struct {
 			uint32_t	code;		/* ecx value */
 			uint64_t	wval;
 		} msr;
 		struct {
 			int		vcpu;
 			uint64_t	rip;
 		} spinup_ap;
 		struct {
 			uint64_t	rflags;
 		} hlt;
 		struct {
 			int		vector;
 		} ioapic_eoi;
 		struct {
 			enum vm_suspend_how how;
 		} suspended;
 		struct vm_task_switch task_switch;
 	} u;
 };
 
 /* APIs to inject faults into the guest */
 void vm_inject_fault(void *vm, int vcpuid, int vector, int errcode_valid,
     int errcode);
 
 static __inline void
 vm_inject_ud(void *vm, int vcpuid)
 {
 	vm_inject_fault(vm, vcpuid, IDT_UD, 0, 0);
 }
 
 static __inline void
 vm_inject_gp(void *vm, int vcpuid)
 {
 	vm_inject_fault(vm, vcpuid, IDT_GP, 1, 0);
 }
 
 static __inline void
 vm_inject_ac(void *vm, int vcpuid, int errcode)
 {
 	vm_inject_fault(vm, vcpuid, IDT_AC, 1, errcode);
 }
 
 static __inline void
 vm_inject_ss(void *vm, int vcpuid, int errcode)
 {
 	vm_inject_fault(vm, vcpuid, IDT_SS, 1, errcode);
 }
 
 void vm_inject_pf(void *vm, int vcpuid, int error_code, uint64_t cr2);
 
 int vm_restart_instruction(void *vm, int vcpuid);
 
 #endif	/* _VMM_H_ */
Index: head/sys/amd64/vmm/amd/amdv.c
===================================================================
--- head/sys/amd64/vmm/amd/amdv.c	(revision 282286)
+++ head/sys/amd64/vmm/amd/amdv.c	(revision 282287)
@@ -1,134 +1,133 @@
 /*-
  * Copyright (c) 2011 NetApp, Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/errno.h>
-#include <sys/smp.h>
 
 #include <machine/vmm.h>
 #include "io/iommu.h"
 
 static int
 amd_iommu_init(void)
 {
 
 	printf("amd_iommu_init: not implemented\n");
 	return (ENXIO);
 }
 
 static void
 amd_iommu_cleanup(void)
 {
 
 	printf("amd_iommu_cleanup: not implemented\n");
 }
 
 static void
 amd_iommu_enable(void)
 {
 
 	printf("amd_iommu_enable: not implemented\n");
 }
 
 static void
 amd_iommu_disable(void)
 {
 
 	printf("amd_iommu_disable: not implemented\n");
 }
 
 static void *
 amd_iommu_create_domain(vm_paddr_t maxaddr)
 {
 
 	printf("amd_iommu_create_domain: not implemented\n");
 	return (NULL);
 }
 
 static void
 amd_iommu_destroy_domain(void *domain)
 {
 
 	printf("amd_iommu_destroy_domain: not implemented\n");
 }
 
 static uint64_t
 amd_iommu_create_mapping(void *domain, vm_paddr_t gpa, vm_paddr_t hpa,
 			 uint64_t len)
 {
 
 	printf("amd_iommu_create_mapping: not implemented\n");
 	return (0);
 }
 
 static uint64_t
 amd_iommu_remove_mapping(void *domain, vm_paddr_t gpa, uint64_t len)
 {
 
 	printf("amd_iommu_remove_mapping: not implemented\n");
 	return (0);
 }
 
 static void
 amd_iommu_add_device(void *domain, uint16_t rid)
 {
 
 	printf("amd_iommu_add_device: not implemented\n");
 }
 
 static void
 amd_iommu_remove_device(void *domain, uint16_t rid)
 {
 
 	printf("amd_iommu_remove_device: not implemented\n");
 }
 
 static void
 amd_iommu_invalidate_tlb(void *domain)
 {
 
 	printf("amd_iommu_invalidate_tlb: not implemented\n");
 }
 
 struct iommu_ops iommu_ops_amd = {
 	amd_iommu_init,
 	amd_iommu_cleanup,
 	amd_iommu_enable,
 	amd_iommu_disable,
 	amd_iommu_create_domain,
 	amd_iommu_destroy_domain,
 	amd_iommu_create_mapping,
 	amd_iommu_remove_mapping,
 	amd_iommu_add_device,
 	amd_iommu_remove_device,
 	amd_iommu_invalidate_tlb,
 };
Index: head/sys/amd64/vmm/amd/svm_msr.c
===================================================================
--- head/sys/amd64/vmm/amd/svm_msr.c	(revision 282286)
+++ head/sys/amd64/vmm/amd/svm_msr.c	(revision 282287)
@@ -1,157 +1,156 @@
 /*-
  * Copyright (c) 2014, Neel Natu (neel@freebsd.org)
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice unmodified, this list of conditions, and the following
  *    disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/errno.h>
 #include <sys/systm.h>
-#include <sys/cpuset.h>
 
 #include <machine/cpufunc.h>
 #include <machine/specialreg.h>
 #include <machine/vmm.h>
 
 #include "svm.h"
 #include "vmcb.h"
 #include "svm_softc.h"
 #include "svm_msr.h"
 
 #ifndef MSR_AMDK8_IPM
 #define	MSR_AMDK8_IPM	0xc0010055
 #endif
 
 enum {
 	IDX_MSR_LSTAR,
 	IDX_MSR_CSTAR,
 	IDX_MSR_STAR,
 	IDX_MSR_SF_MASK,
 	HOST_MSR_NUM		/* must be the last enumeration */
 };
 
 static uint64_t host_msrs[HOST_MSR_NUM];
 
 void
 svm_msr_init(void)
 {
 	/* 
 	 * It is safe to cache the values of the following MSRs because they
 	 * don't change based on curcpu, curproc or curthread.
 	 */
 	host_msrs[IDX_MSR_LSTAR] = rdmsr(MSR_LSTAR);
 	host_msrs[IDX_MSR_CSTAR] = rdmsr(MSR_CSTAR);
 	host_msrs[IDX_MSR_STAR] = rdmsr(MSR_STAR);
 	host_msrs[IDX_MSR_SF_MASK] = rdmsr(MSR_SF_MASK);
 }
 
 void
 svm_msr_guest_init(struct svm_softc *sc, int vcpu)
 {
 	/*
 	 * All the MSRs accessible to the guest are either saved/restored by
 	 * hardware on every #VMEXIT/VMRUN (e.g., G_PAT) or are saved/restored
 	 * by VMSAVE/VMLOAD (e.g., MSR_GSBASE).
 	 *
 	 * There are no guest MSRs that are saved/restored "by hand" so nothing
 	 * more to do here.
 	 */
 	return;
 }
 
 void
 svm_msr_guest_enter(struct svm_softc *sc, int vcpu)
 {
 	/*
 	 * Save host MSRs (if any) and restore guest MSRs (if any).
 	 */
 }
 
 void
 svm_msr_guest_exit(struct svm_softc *sc, int vcpu)
 {
 	/*
 	 * Save guest MSRs (if any) and restore host MSRs.
 	 */
 	wrmsr(MSR_LSTAR, host_msrs[IDX_MSR_LSTAR]);
 	wrmsr(MSR_CSTAR, host_msrs[IDX_MSR_CSTAR]);
 	wrmsr(MSR_STAR, host_msrs[IDX_MSR_STAR]);
 	wrmsr(MSR_SF_MASK, host_msrs[IDX_MSR_SF_MASK]);
 
 	/* MSR_KGSBASE will be restored on the way back to userspace */
 }
 
 int
 svm_rdmsr(struct svm_softc *sc, int vcpu, u_int num, uint64_t *result,
     bool *retu)
 {
 	int error = 0;
 
 	switch (num) {
 	case MSR_MTRRcap:
 	case MSR_MTRRdefType:
 	case MSR_MTRR4kBase ... MSR_MTRR4kBase + 8:
 	case MSR_MTRR16kBase ... MSR_MTRR16kBase + 1:
 	case MSR_MTRR64kBase:
 		*result = 0;
 		break;
 	case MSR_AMDK8_IPM:
 		*result = 0;
 		break;
 	default:
 		error = EINVAL;
 		break;
 	}
 
 	return (error);
 }
 
 int
 svm_wrmsr(struct svm_softc *sc, int vcpu, u_int num, uint64_t val, bool *retu)
 {
 	int error = 0;
 
 	switch (num) {
 	case MSR_MTRRcap:
 		vm_inject_gp(sc->vm, vcpu);
 		break;
 	case MSR_MTRRdefType:
 	case MSR_MTRR4kBase ... MSR_MTRR4kBase + 8:
 	case MSR_MTRR16kBase ... MSR_MTRR16kBase + 1:
 	case MSR_MTRR64kBase:
 		break;		/* Ignore writes */
 	case MSR_AMDK8_IPM:
 		/*
 		 * Ignore writes to the "Interrupt Pending Message" MSR.
 		 */
 		break;
 	default:
 		error = EINVAL;
 		break;
 	}
 
 	return (error);
 }
Index: head/sys/amd64/vmm/amd/vmcb.c
===================================================================
--- head/sys/amd64/vmm/amd/vmcb.c	(revision 282286)
+++ head/sys/amd64/vmm/amd/vmcb.c	(revision 282287)
@@ -1,443 +1,442 @@
 /*-
  * Copyright (c) 2013 Anish Gupta (akgupt3@gmail.com)
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice unmodified, this list of conditions, and the following
  *    disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
-#include <sys/cpuset.h>
 
 #include <machine/segments.h>
 #include <machine/specialreg.h>
 #include <machine/vmm.h>
 
 #include "vmm_ktr.h"
 
 #include "vmcb.h"
 #include "svm.h"
 #include "svm_softc.h"
 
 /*
  * The VMCB aka Virtual Machine Control Block is a 4KB aligned page
  * in memory that describes the virtual machine.
  *
  * The VMCB contains:
  * - instructions or events in the guest to intercept
  * - control bits that modify execution environment of the guest
  * - guest processor state (e.g. general purpose registers)
  */
 
 /*
  * Return VMCB segment area.
  */
 static struct vmcb_segment *
 vmcb_segptr(struct vmcb *vmcb, int type)
 {
 	struct vmcb_state *state;
 	struct vmcb_segment *seg;
 
 	state = &vmcb->state;
 
 	switch (type) {
 	case VM_REG_GUEST_CS:
 		seg = &state->cs;
 		break;
 
 	case VM_REG_GUEST_DS:
 		seg = &state->ds;
 		break;
 
 	case VM_REG_GUEST_ES:
 		seg = &state->es;
 		break;
 
 	case VM_REG_GUEST_FS:
 		seg = &state->fs;
 		break;
 
 	case VM_REG_GUEST_GS:
 		seg = &state->gs;
 		break;
 
 	case VM_REG_GUEST_SS:
 		seg = &state->ss;
 		break;
 
 	case VM_REG_GUEST_GDTR:
 		seg = &state->gdt;
 		break;
 
 	case VM_REG_GUEST_IDTR:
 		seg = &state->idt;
 		break;
 
 	case VM_REG_GUEST_LDTR:
 		seg = &state->ldt;
 		break;
 
 	case VM_REG_GUEST_TR:
 		seg = &state->tr;
 		break;
 
 	default:
 		seg = NULL;
 		break;
 	}
 
 	return (seg);
 }
 
 static int
 vmcb_access(struct svm_softc *softc, int vcpu, int write, int ident,
 	uint64_t *val)
 {
 	struct vmcb *vmcb;
 	int off, bytes;
 	char *ptr;
 
 	vmcb	= svm_get_vmcb(softc, vcpu);
 	off	= VMCB_ACCESS_OFFSET(ident);
 	bytes	= VMCB_ACCESS_BYTES(ident);
 
 	if ((off + bytes) >= sizeof (struct vmcb))
 		return (EINVAL);
 
 	ptr = (char *)vmcb;
 
 	if (!write)
 		*val = 0;
 
 	switch (bytes) {
 	case 8:
 	case 4:
 	case 2:
 		if (write)
 			memcpy(ptr + off, val, bytes);
 		else
 			memcpy(val, ptr + off, bytes);
 		break;
 	default:
 		VCPU_CTR1(softc->vm, vcpu,
 		    "Invalid size %d for VMCB access: %d", bytes);
 		return (EINVAL);
 	}
 
 	/* Invalidate all VMCB state cached by h/w. */
 	if (write)
 		svm_set_dirty(softc, vcpu, 0xffffffff);
 
 	return (0);
 }
 
 /*
  * Read from segment selector, control and general purpose register of VMCB.
  */
 int
 vmcb_read(struct svm_softc *sc, int vcpu, int ident, uint64_t *retval)
 {
 	struct vmcb *vmcb;
 	struct vmcb_state *state;
 	struct vmcb_segment *seg;
 	int err;
 
 	vmcb = svm_get_vmcb(sc, vcpu);
 	state = &vmcb->state;
 	err = 0;
 
 	if (VMCB_ACCESS_OK(ident))
 		return (vmcb_access(sc, vcpu, 0, ident, retval));
 
 	switch (ident) {
 	case VM_REG_GUEST_CR0:
 		*retval = state->cr0;
 		break;
 
 	case VM_REG_GUEST_CR2:
 		*retval = state->cr2;
 		break;
 
 	case VM_REG_GUEST_CR3:
 		*retval = state->cr3;
 		break;
 
 	case VM_REG_GUEST_CR4:
 		*retval = state->cr4;
 		break;
 
 	case VM_REG_GUEST_DR7:
 		*retval = state->dr7;
 		break;
 
 	case VM_REG_GUEST_EFER:
 		*retval = state->efer;
 		break;
 
 	case VM_REG_GUEST_RAX:
 		*retval = state->rax;
 		break;
 
 	case VM_REG_GUEST_RFLAGS:
 		*retval = state->rflags;
 		break;
 
 	case VM_REG_GUEST_RIP:
 		*retval = state->rip;
 		break;
 
 	case VM_REG_GUEST_RSP:
 		*retval = state->rsp;
 		break;
 
 	case VM_REG_GUEST_CS:
 	case VM_REG_GUEST_DS:
 	case VM_REG_GUEST_ES:
 	case VM_REG_GUEST_FS:
 	case VM_REG_GUEST_GS:
 	case VM_REG_GUEST_SS:
 	case VM_REG_GUEST_LDTR:
 	case VM_REG_GUEST_TR:
 		seg = vmcb_segptr(vmcb, ident);
 		KASSERT(seg != NULL, ("%s: unable to get segment %d from VMCB",
 		    __func__, ident));
 		*retval = seg->selector;
 		break;
 
 	case VM_REG_GUEST_GDTR:
 	case VM_REG_GUEST_IDTR:
 		/* GDTR and IDTR don't have segment selectors */
 		err = EINVAL;
 		break;
 	default:
 		err =  EINVAL;
 		break;
 	}
 
 	return (err);
 }
 
 /*
  * Write to segment selector, control and general purpose register of VMCB.
  */
 int
 vmcb_write(struct svm_softc *sc, int vcpu, int ident, uint64_t val)
 {
 	struct vmcb *vmcb;
 	struct vmcb_state *state;
 	struct vmcb_segment *seg;
 	int err, dirtyseg;
 
 	vmcb = svm_get_vmcb(sc, vcpu);
 	state = &vmcb->state;
 	dirtyseg = 0;
 	err = 0;
 
 	if (VMCB_ACCESS_OK(ident))
 		return (vmcb_access(sc, vcpu, 1, ident, &val));
 
 	switch (ident) {
 	case VM_REG_GUEST_CR0:
 		state->cr0 = val;
 		svm_set_dirty(sc, vcpu, VMCB_CACHE_CR);
 		break;
 
 	case VM_REG_GUEST_CR2:
 		state->cr2 = val;
 		svm_set_dirty(sc, vcpu, VMCB_CACHE_CR2);
 		break;
 
 	case VM_REG_GUEST_CR3:
 		state->cr3 = val;
 		svm_set_dirty(sc, vcpu, VMCB_CACHE_CR);
 		break;
 
 	case VM_REG_GUEST_CR4:
 		state->cr4 = val;
 		svm_set_dirty(sc, vcpu, VMCB_CACHE_CR);
 		break;
 
 	case VM_REG_GUEST_DR7:
 		state->dr7 = val;
 		break;
 
 	case VM_REG_GUEST_EFER:
 		/* EFER_SVM must always be set when the guest is executing */
 		state->efer = val | EFER_SVM;
 		svm_set_dirty(sc, vcpu, VMCB_CACHE_CR);
 		break;
 
 	case VM_REG_GUEST_RAX:
 		state->rax = val;
 		break;
 
 	case VM_REG_GUEST_RFLAGS:
 		state->rflags = val;
 		break;
 
 	case VM_REG_GUEST_RIP:
 		state->rip = val;
 		break;
 
 	case VM_REG_GUEST_RSP:
 		state->rsp = val;
 		break;
 
 	case VM_REG_GUEST_CS:
 	case VM_REG_GUEST_DS:
 	case VM_REG_GUEST_ES:
 	case VM_REG_GUEST_SS:
 		dirtyseg = 1;		/* FALLTHROUGH */
 	case VM_REG_GUEST_FS:
 	case VM_REG_GUEST_GS:
 	case VM_REG_GUEST_LDTR:
 	case VM_REG_GUEST_TR:
 		seg = vmcb_segptr(vmcb, ident);
 		KASSERT(seg != NULL, ("%s: unable to get segment %d from VMCB",
 		    __func__, ident));
 		seg->selector = val;
 		if (dirtyseg)
 			svm_set_dirty(sc, vcpu, VMCB_CACHE_SEG);
 		break;
 
 	case VM_REG_GUEST_GDTR:
 	case VM_REG_GUEST_IDTR:
 		/* GDTR and IDTR don't have segment selectors */
 		err = EINVAL;
 		break;
 	default:
 		err = EINVAL;
 		break;
 	}
 
 	return (err);
 }
 
 int
 vmcb_seg(struct vmcb *vmcb, int ident, struct vmcb_segment *seg2)
 {
 	struct vmcb_segment *seg;
 
 	seg = vmcb_segptr(vmcb, ident);
 	if (seg != NULL) {
 		bcopy(seg, seg2, sizeof(struct vmcb_segment));
 		return (0);
 	} else {
 		return (EINVAL);
 	}
 }
 
 int
 vmcb_setdesc(void *arg, int vcpu, int reg, struct seg_desc *desc)
 {
 	struct vmcb *vmcb;
 	struct svm_softc *sc;
 	struct vmcb_segment *seg;
 	uint16_t attrib;
 
 	sc = arg;
 	vmcb = svm_get_vmcb(sc, vcpu);
 
 	seg = vmcb_segptr(vmcb, reg);
 	KASSERT(seg != NULL, ("%s: invalid segment descriptor %d",
 	    __func__, reg));
 
 	seg->base = desc->base;
 	seg->limit = desc->limit;
 	if (reg != VM_REG_GUEST_GDTR && reg != VM_REG_GUEST_IDTR) {
 		/*
 		 * Map seg_desc access to VMCB attribute format.
 		 *
 		 * SVM uses the 'P' bit in the segment attributes to indicate a
 		 * NULL segment so clear it if the segment is marked unusable.
 		 */
 		attrib = ((desc->access & 0xF000) >> 4) | (desc->access & 0xFF);
 		if (SEG_DESC_UNUSABLE(desc->access)) {
 			attrib &= ~0x80;
 		}
 		seg->attrib = attrib;
 	}
 
 	VCPU_CTR4(sc->vm, vcpu, "Setting desc %d: base (%#lx), limit (%#x), "
 	    "attrib (%#x)", reg, seg->base, seg->limit, seg->attrib);
 
 	switch (reg) {
 	case VM_REG_GUEST_CS:
 	case VM_REG_GUEST_DS:
 	case VM_REG_GUEST_ES:
 	case VM_REG_GUEST_SS:
 		svm_set_dirty(sc, vcpu, VMCB_CACHE_SEG);
 		break;
 	case VM_REG_GUEST_GDTR:
 	case VM_REG_GUEST_IDTR:
 		svm_set_dirty(sc, vcpu, VMCB_CACHE_DT);
 		break;
 	default:
 		break;
 	}
 
 	return (0);
 }
 
 int
 vmcb_getdesc(void *arg, int vcpu, int reg, struct seg_desc *desc)
 {
 	struct vmcb *vmcb;
 	struct svm_softc *sc;
 	struct vmcb_segment *seg;
 
 	sc = arg;
 	vmcb = svm_get_vmcb(sc, vcpu);
 	seg = vmcb_segptr(vmcb, reg);
 	KASSERT(seg != NULL, ("%s: invalid segment descriptor %d",
 	    __func__, reg));
 
 	desc->base = seg->base;
 	desc->limit = seg->limit;
 	desc->access = 0;
 
 	if (reg != VM_REG_GUEST_GDTR && reg != VM_REG_GUEST_IDTR) {
 		/* Map seg_desc access to VMCB attribute format */
 		desc->access = ((seg->attrib & 0xF00) << 4) |
 		    (seg->attrib & 0xFF);
 
 		/*
 		 * VT-x uses bit 16 to indicate a segment that has been loaded
 		 * with a NULL selector (aka unusable). The 'desc->access'
 		 * field is interpreted in the VT-x format by the
 		 * processor-independent code.
 		 *
 		 * SVM uses the 'P' bit to convey the same information so
 		 * convert it into the VT-x format. For more details refer to
 		 * section "Segment State in the VMCB" in APMv2.
 		 */
 		if (reg != VM_REG_GUEST_CS && reg != VM_REG_GUEST_TR) {
 			if ((desc->access & 0x80) == 0)
 				desc->access |= 0x10000;  /* Unusable segment */
 		}
 	}
 
 	return (0);
 }
Index: head/sys/amd64/vmm/intel/vmx_msr.c
===================================================================
--- head/sys/amd64/vmm/intel/vmx_msr.c	(revision 282286)
+++ head/sys/amd64/vmm/intel/vmx_msr.c	(revision 282287)
@@ -1,477 +1,476 @@
 /*-
  * Copyright (c) 2011 NetApp, Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
-#include <sys/cpuset.h>
 
 #include <machine/clock.h>
 #include <machine/cpufunc.h>
 #include <machine/md_var.h>
 #include <machine/specialreg.h>
 #include <machine/vmm.h>
 
 #include "vmx.h"
 #include "vmx_msr.h"
 
 static boolean_t
 vmx_ctl_allows_one_setting(uint64_t msr_val, int bitpos)
 {
 
 	if (msr_val & (1UL << (bitpos + 32)))
 		return (TRUE);
 	else
 		return (FALSE);
 }
 
 static boolean_t
 vmx_ctl_allows_zero_setting(uint64_t msr_val, int bitpos)
 {
 
 	if ((msr_val & (1UL << bitpos)) == 0)
 		return (TRUE);
 	else
 		return (FALSE);
 }
 
 uint32_t
 vmx_revision(void)
 {
 
 	return (rdmsr(MSR_VMX_BASIC) & 0xffffffff);
 }
 
 /*
  * Generate a bitmask to be used for the VMCS execution control fields.
  *
  * The caller specifies what bits should be set to one in 'ones_mask'
  * and what bits should be set to zero in 'zeros_mask'. The don't-care
  * bits are set to the default value. The default values are obtained
  * based on "Algorithm 3" in Section 27.5.1 "Algorithms for Determining
  * VMX Capabilities".
  *
  * Returns zero on success and non-zero on error.
  */
 int
 vmx_set_ctlreg(int ctl_reg, int true_ctl_reg, uint32_t ones_mask,
 	       uint32_t zeros_mask, uint32_t *retval)
 {
 	int i;
 	uint64_t val, trueval;
 	boolean_t true_ctls_avail, one_allowed, zero_allowed;
 
 	/* We cannot ask the same bit to be set to both '1' and '0' */
 	if ((ones_mask ^ zeros_mask) != (ones_mask | zeros_mask))
 		return (EINVAL);
 
 	if (rdmsr(MSR_VMX_BASIC) & (1UL << 55))
 		true_ctls_avail = TRUE;
 	else
 		true_ctls_avail = FALSE;
 
 	val = rdmsr(ctl_reg);
 	if (true_ctls_avail)
 		trueval = rdmsr(true_ctl_reg);		/* step c */
 	else
 		trueval = val;				/* step a */
 
 	for (i = 0; i < 32; i++) {
 		one_allowed = vmx_ctl_allows_one_setting(trueval, i);
 		zero_allowed = vmx_ctl_allows_zero_setting(trueval, i);
 
 		KASSERT(one_allowed || zero_allowed,
 			("invalid zero/one setting for bit %d of ctl 0x%0x, "
 			 "truectl 0x%0x\n", i, ctl_reg, true_ctl_reg));
 
 		if (zero_allowed && !one_allowed) {		/* b(i),c(i) */
 			if (ones_mask & (1 << i))
 				return (EINVAL);
 			*retval &= ~(1 << i);
 		} else if (one_allowed && !zero_allowed) {	/* b(i),c(i) */
 			if (zeros_mask & (1 << i))
 				return (EINVAL);
 			*retval |= 1 << i;
 		} else {
 			if (zeros_mask & (1 << i))	/* b(ii),c(ii) */
 				*retval &= ~(1 << i);
 			else if (ones_mask & (1 << i)) /* b(ii), c(ii) */
 				*retval |= 1 << i;
 			else if (!true_ctls_avail)
 				*retval &= ~(1 << i);	/* b(iii) */
 			else if (vmx_ctl_allows_zero_setting(val, i))/* c(iii)*/
 				*retval &= ~(1 << i);
 			else if (vmx_ctl_allows_one_setting(val, i)) /* c(iv) */
 				*retval |= 1 << i;
 			else {
 				panic("vmx_set_ctlreg: unable to determine "
 				      "correct value of ctl bit %d for msr "
 				      "0x%0x and true msr 0x%0x", i, ctl_reg,
 				      true_ctl_reg);
 			}
 		}
 	}
 
 	return (0);
 }
 
 void
 msr_bitmap_initialize(char *bitmap)
 {
 
 	memset(bitmap, 0xff, PAGE_SIZE);
 }
 
 int
 msr_bitmap_change_access(char *bitmap, u_int msr, int access)
 {
 	int byte, bit;
 
 	if (msr <= 0x00001FFF)
 		byte = msr / 8;
 	else if (msr >= 0xC0000000 && msr <= 0xC0001FFF)
 		byte = 1024 + (msr - 0xC0000000) / 8;
 	else
 		return (EINVAL);
 
 	bit = msr & 0x7;
 
 	if (access & MSR_BITMAP_ACCESS_READ)
 		bitmap[byte] &= ~(1 << bit);
 	else
 		bitmap[byte] |= 1 << bit;
 
 	byte += 2048;
 	if (access & MSR_BITMAP_ACCESS_WRITE)
 		bitmap[byte] &= ~(1 << bit);
 	else
 		bitmap[byte] |= 1 << bit;
 
 	return (0);
 }
 
 static uint64_t misc_enable;
 static uint64_t platform_info;
 static uint64_t turbo_ratio_limit;
 static uint64_t host_msrs[GUEST_MSR_NUM];
 
 static bool
 nehalem_cpu(void)
 {
 	u_int family, model;
 
 	/*
 	 * The family:model numbers belonging to the Nehalem microarchitecture
 	 * are documented in Section 35.5, Intel SDM dated Feb 2014.
 	 */
 	family = CPUID_TO_FAMILY(cpu_id);
 	model = CPUID_TO_MODEL(cpu_id);
 	if (family == 0x6) {
 		switch (model) {
 		case 0x1A:
 		case 0x1E:
 		case 0x1F:
 		case 0x2E:
 			return (true);
 		default:
 			break;
 		}
 	}
 	return (false);
 }
 
 static bool
 westmere_cpu(void)
 {
 	u_int family, model;
 
 	/*
 	 * The family:model numbers belonging to the Westmere microarchitecture
 	 * are documented in Section 35.6, Intel SDM dated Feb 2014.
 	 */
 	family = CPUID_TO_FAMILY(cpu_id);
 	model = CPUID_TO_MODEL(cpu_id);
 	if (family == 0x6) {
 		switch (model) {
 		case 0x25:
 		case 0x2C:
 			return (true);
 		default:
 			break;
 		}
 	}
 	return (false);
 }
 
 static bool
 pat_valid(uint64_t val)
 {
 	int i, pa;
 
 	/*
 	 * From Intel SDM: Table "Memory Types That Can Be Encoded With PAT"
 	 *
 	 * Extract PA0 through PA7 and validate that each one encodes a
 	 * valid memory type.
 	 */
 	for (i = 0; i < 8; i++) {
 		pa = (val >> (i * 8)) & 0xff;
 		if (pa == 2 || pa == 3 || pa >= 8)
 			return (false);
 	}
 	return (true);
 }
 
 void
 vmx_msr_init(void)
 {
 	uint64_t bus_freq, ratio;
 	int i;
 
 	/*
 	 * It is safe to cache the values of the following MSRs because
 	 * they don't change based on curcpu, curproc or curthread.
 	 */
 	host_msrs[IDX_MSR_LSTAR] = rdmsr(MSR_LSTAR);
 	host_msrs[IDX_MSR_CSTAR] = rdmsr(MSR_CSTAR);
 	host_msrs[IDX_MSR_STAR] = rdmsr(MSR_STAR);
 	host_msrs[IDX_MSR_SF_MASK] = rdmsr(MSR_SF_MASK);
 
 	/*
 	 * Initialize emulated MSRs
 	 */
 	misc_enable = rdmsr(MSR_IA32_MISC_ENABLE);
 	/*
 	 * Set mandatory bits
 	 *  11:   branch trace disabled
 	 *  12:   PEBS unavailable
 	 * Clear unsupported features
 	 *  16:   SpeedStep enable
 	 *  18:   enable MONITOR FSM
 	 */
 	misc_enable |= (1 << 12) | (1 << 11);
 	misc_enable &= ~((1 << 18) | (1 << 16));
 
 	if (nehalem_cpu() || westmere_cpu())
 		bus_freq = 133330000;		/* 133Mhz */
 	else
 		bus_freq = 100000000;		/* 100Mhz */
 
 	/*
 	 * XXXtime
 	 * The ratio should really be based on the virtual TSC frequency as
 	 * opposed to the host TSC.
 	 */
 	ratio = (tsc_freq / bus_freq) & 0xff;
 
 	/*
 	 * The register definition is based on the micro-architecture
 	 * but the following bits are always the same:
 	 * [15:8]  Maximum Non-Turbo Ratio
 	 * [28]    Programmable Ratio Limit for Turbo Mode
 	 * [29]    Programmable TDC-TDP Limit for Turbo Mode
 	 * [47:40] Maximum Efficiency Ratio
 	 *
 	 * The other bits can be safely set to 0 on all
 	 * micro-architectures up to Haswell.
 	 */
 	platform_info = (ratio << 8) | (ratio << 40);
 
 	/*
 	 * The number of valid bits in the MSR_TURBO_RATIO_LIMITx register is
 	 * dependent on the maximum cores per package supported by the micro-
 	 * architecture. For e.g., Westmere supports 6 cores per package and
 	 * uses the low 48 bits. Sandybridge support 8 cores per package and
 	 * uses up all 64 bits.
 	 *
 	 * However, the unused bits are reserved so we pretend that all bits
 	 * in this MSR are valid.
 	 */
 	for (i = 0; i < 8; i++)
 		turbo_ratio_limit = (turbo_ratio_limit << 8) | ratio;
 }
 
 void
 vmx_msr_guest_init(struct vmx *vmx, int vcpuid)
 {
 	uint64_t *guest_msrs;
 
 	guest_msrs = vmx->guest_msrs[vcpuid];
 
 	/*
 	 * The permissions bitmap is shared between all vcpus so initialize it
 	 * once when initializing the vBSP.
 	 */
 	if (vcpuid == 0) {
 		guest_msr_rw(vmx, MSR_LSTAR);
 		guest_msr_rw(vmx, MSR_CSTAR);
 		guest_msr_rw(vmx, MSR_STAR);
 		guest_msr_rw(vmx, MSR_SF_MASK);
 		guest_msr_rw(vmx, MSR_KGSBASE);
 	}
 
 	/*
 	 * Initialize guest IA32_PAT MSR with default value after reset.
 	 */
 	guest_msrs[IDX_MSR_PAT] = PAT_VALUE(0, PAT_WRITE_BACK) |
 	    PAT_VALUE(1, PAT_WRITE_THROUGH)	|
 	    PAT_VALUE(2, PAT_UNCACHED)		|
 	    PAT_VALUE(3, PAT_UNCACHEABLE)	|
 	    PAT_VALUE(4, PAT_WRITE_BACK)	|
 	    PAT_VALUE(5, PAT_WRITE_THROUGH)	|
 	    PAT_VALUE(6, PAT_UNCACHED)		|
 	    PAT_VALUE(7, PAT_UNCACHEABLE);
 
 	return;
 }
 
 void
 vmx_msr_guest_enter(struct vmx *vmx, int vcpuid)
 {
 	uint64_t *guest_msrs = vmx->guest_msrs[vcpuid];
 
 	/* Save host MSRs (if any) and restore guest MSRs */
 	wrmsr(MSR_LSTAR, guest_msrs[IDX_MSR_LSTAR]);
 	wrmsr(MSR_CSTAR, guest_msrs[IDX_MSR_CSTAR]);
 	wrmsr(MSR_STAR, guest_msrs[IDX_MSR_STAR]);
 	wrmsr(MSR_SF_MASK, guest_msrs[IDX_MSR_SF_MASK]);
 	wrmsr(MSR_KGSBASE, guest_msrs[IDX_MSR_KGSBASE]);
 }
 
 void
 vmx_msr_guest_exit(struct vmx *vmx, int vcpuid)
 {
 	uint64_t *guest_msrs = vmx->guest_msrs[vcpuid];
 
 	/* Save guest MSRs */
 	guest_msrs[IDX_MSR_LSTAR] = rdmsr(MSR_LSTAR);
 	guest_msrs[IDX_MSR_CSTAR] = rdmsr(MSR_CSTAR);
 	guest_msrs[IDX_MSR_STAR] = rdmsr(MSR_STAR);
 	guest_msrs[IDX_MSR_SF_MASK] = rdmsr(MSR_SF_MASK);
 	guest_msrs[IDX_MSR_KGSBASE] = rdmsr(MSR_KGSBASE);
 
 	/* Restore host MSRs */
 	wrmsr(MSR_LSTAR, host_msrs[IDX_MSR_LSTAR]);
 	wrmsr(MSR_CSTAR, host_msrs[IDX_MSR_CSTAR]);
 	wrmsr(MSR_STAR, host_msrs[IDX_MSR_STAR]);
 	wrmsr(MSR_SF_MASK, host_msrs[IDX_MSR_SF_MASK]);
 
 	/* MSR_KGSBASE will be restored on the way back to userspace */
 }
 
 int
 vmx_rdmsr(struct vmx *vmx, int vcpuid, u_int num, uint64_t *val, bool *retu)
 {
 	const uint64_t *guest_msrs;
 	int error;
 
 	guest_msrs = vmx->guest_msrs[vcpuid];
 	error = 0;
 
 	switch (num) {
 	case MSR_MTRRcap:
 	case MSR_MTRRdefType:
 	case MSR_MTRR4kBase ... MSR_MTRR4kBase + 8:
 	case MSR_MTRR16kBase ... MSR_MTRR16kBase + 1:
 	case MSR_MTRR64kBase:
 		*val = 0;
 		break;
 	case MSR_IA32_MISC_ENABLE:
 		*val = misc_enable;
 		break;
 	case MSR_PLATFORM_INFO:
 		*val = platform_info;
 		break;
 	case MSR_TURBO_RATIO_LIMIT:
 	case MSR_TURBO_RATIO_LIMIT1:
 		*val = turbo_ratio_limit;
 		break;
 	case MSR_PAT:
 		*val = guest_msrs[IDX_MSR_PAT];
 		break;
 	default:
 		error = EINVAL;
 		break;
 	}
 	return (error);
 }
 
 int
 vmx_wrmsr(struct vmx *vmx, int vcpuid, u_int num, uint64_t val, bool *retu)
 {
 	uint64_t *guest_msrs;
 	uint64_t changed;
 	int error;
 	
 	guest_msrs = vmx->guest_msrs[vcpuid];
 	error = 0;
 
 	switch (num) {
 	case MSR_MTRRcap:
 		vm_inject_gp(vmx->vm, vcpuid);
 		break;
 	case MSR_MTRRdefType:
 	case MSR_MTRR4kBase ... MSR_MTRR4kBase + 8:
 	case MSR_MTRR16kBase ... MSR_MTRR16kBase + 1:
 	case MSR_MTRR64kBase:
 		break;		/* Ignore writes */
 	case MSR_IA32_MISC_ENABLE:
 		changed = val ^ misc_enable;
 		/*
 		 * If the host has disabled the NX feature then the guest
 		 * also cannot use it. However, a Linux guest will try to
 		 * enable the NX feature by writing to the MISC_ENABLE MSR.
 		 *
 		 * This can be safely ignored because the memory management
 		 * code looks at CPUID.80000001H:EDX.NX to check if the
 		 * functionality is actually enabled.
 		 */
 		changed &= ~(1UL << 34);
 
 		/*
 		 * Punt to userspace if any other bits are being modified.
 		 */
 		if (changed)
 			error = EINVAL;
 
 		break;
 	case MSR_PAT:
 		if (pat_valid(val))
 			guest_msrs[IDX_MSR_PAT] = val;
 		else
 			vm_inject_gp(vmx->vm, vcpuid);
 		break;
 	default:
 		error = EINVAL;
 		break;
 	}
 
 	return (error);
 }
Index: head/sys/amd64/vmm/io/vatpic.c
===================================================================
--- head/sys/amd64/vmm/io/vatpic.c	(revision 282286)
+++ head/sys/amd64/vmm/io/vatpic.c	(revision 282287)
@@ -1,809 +1,808 @@
 /*-
  * Copyright (c) 2014 Tycho Nightingale <tycho.nightingale@pluribusnetworks.com>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/types.h>
 #include <sys/queue.h>
-#include <sys/cpuset.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mutex.h>
 #include <sys/systm.h>
 
 #include <x86/apicreg.h>
 #include <dev/ic/i8259.h>
 
 #include <machine/vmm.h>
 
 #include "vmm_ktr.h"
 #include "vmm_lapic.h"
 #include "vioapic.h"
 #include "vatpic.h"
 
 static MALLOC_DEFINE(M_VATPIC, "atpic", "bhyve virtual atpic (8259)");
 
 #define	VATPIC_LOCK(vatpic)		mtx_lock_spin(&((vatpic)->mtx))
 #define	VATPIC_UNLOCK(vatpic)		mtx_unlock_spin(&((vatpic)->mtx))
 #define	VATPIC_LOCKED(vatpic)		mtx_owned(&((vatpic)->mtx))
 
 enum irqstate {
 	IRQSTATE_ASSERT,
 	IRQSTATE_DEASSERT,
 	IRQSTATE_PULSE
 };
 
 struct atpic {
 	bool		ready;
 	int		icw_num;
 	int		rd_cmd_reg;
 
 	bool		aeoi;
 	bool		poll;
 	bool		rotate;
 	bool		sfn;		/* special fully-nested mode */
 
 	int		irq_base;
 	uint8_t		request;	/* Interrupt Request Register (IIR) */
 	uint8_t		service;	/* Interrupt Service (ISR) */
 	uint8_t		mask;		/* Interrupt Mask Register (IMR) */
 	uint8_t		smm;		/* special mask mode */
 
 	int		acnt[8];	/* sum of pin asserts and deasserts */
 	int		lowprio;	/* lowest priority irq */
 
 	bool		intr_raised;
 };
 
 struct vatpic {
 	struct vm	*vm;
 	struct mtx	mtx;
 	struct atpic	atpic[2];
 	uint8_t		elc[2];
 };
 
 #define	VATPIC_CTR0(vatpic, fmt)					\
 	VM_CTR0((vatpic)->vm, fmt)
 
 #define	VATPIC_CTR1(vatpic, fmt, a1)					\
 	VM_CTR1((vatpic)->vm, fmt, a1)
 
 #define	VATPIC_CTR2(vatpic, fmt, a1, a2)				\
 	VM_CTR2((vatpic)->vm, fmt, a1, a2)
 
 #define	VATPIC_CTR3(vatpic, fmt, a1, a2, a3)				\
 	VM_CTR3((vatpic)->vm, fmt, a1, a2, a3)
 
 #define	VATPIC_CTR4(vatpic, fmt, a1, a2, a3, a4)			\
 	VM_CTR4((vatpic)->vm, fmt, a1, a2, a3, a4)
 
 /*
  * Loop over all the pins in priority order from highest to lowest.
  */
 #define	ATPIC_PIN_FOREACH(pinvar, atpic, tmpvar)			\
 	for (tmpvar = 0, pinvar = (atpic->lowprio + 1) & 0x7;		\
 	    tmpvar < 8;							\
 	    tmpvar++, pinvar = (pinvar + 1) & 0x7)
 
 static void vatpic_set_pinstate(struct vatpic *vatpic, int pin, bool newstate);
 
 static __inline bool
 master_atpic(struct vatpic *vatpic, struct atpic *atpic)
 {
 
 	if (atpic == &vatpic->atpic[0])
 		return (true);
 	else
 		return (false);
 }
 
 static __inline int
 vatpic_get_highest_isrpin(struct atpic *atpic)
 {
 	int bit, pin;
 	int i;
 
 	ATPIC_PIN_FOREACH(pin, atpic, i) {
                 bit = (1 << pin);
 
 		if (atpic->service & bit) {
 			/*
 			 * An IS bit that is masked by an IMR bit will not be
 			 * cleared by a non-specific EOI in Special Mask Mode.
 			 */
 			if (atpic->smm && (atpic->mask & bit) != 0)
 				continue;
 			else
 				return (pin);
 		}
 	}
 
 	return (-1);
 }
 
 static __inline int
 vatpic_get_highest_irrpin(struct atpic *atpic)
 {
 	int serviced;
 	int bit, pin, tmp;
 
 	/*
 	 * In 'Special Fully-Nested Mode' when an interrupt request from
 	 * a slave is in service, the slave is not locked out from the
 	 * master's priority logic.
 	 */
 	serviced = atpic->service;
 	if (atpic->sfn)
 		serviced &= ~(1 << 2);
 
 	/*
 	 * In 'Special Mask Mode', when a mask bit is set in OCW1 it inhibits
 	 * further interrupts at that level and enables interrupts from all
 	 * other levels that are not masked. In other words the ISR has no
 	 * bearing on the levels that can generate interrupts.
 	 */
 	if (atpic->smm)
 		serviced = 0;
 
 	ATPIC_PIN_FOREACH(pin, atpic, tmp) {
 		bit = 1 << pin;
 
 		/*
 		 * If there is already an interrupt in service at the same
 		 * or higher priority then bail.
 		 */
 		if ((serviced & bit) != 0)
 			break;
 
 		/*
 		 * If an interrupt is asserted and not masked then return
 		 * the corresponding 'pin' to the caller.
 		 */
 		if ((atpic->request & bit) != 0 && (atpic->mask & bit) == 0)
 			return (pin);
 	}
 
 	return (-1);
 }
 
 static void
 vatpic_notify_intr(struct vatpic *vatpic)
 {
 	struct atpic *atpic;
 	int pin;
 
 	KASSERT(VATPIC_LOCKED(vatpic), ("vatpic_notify_intr not locked"));
 
 	/*
 	 * First check the slave.
 	 */
 	atpic = &vatpic->atpic[1];
 	if (!atpic->intr_raised &&
 	    (pin = vatpic_get_highest_irrpin(atpic)) != -1) {
 		VATPIC_CTR4(vatpic, "atpic slave notify pin = %d "
 		    "(imr 0x%x irr 0x%x isr 0x%x)", pin,
 		    atpic->mask, atpic->request, atpic->service);
 
 		/*
 		 * Cascade the request from the slave to the master.
 		 */
 		atpic->intr_raised = true;
 		vatpic_set_pinstate(vatpic, 2, true);
 		vatpic_set_pinstate(vatpic, 2, false);
 	} else {
 		VATPIC_CTR3(vatpic, "atpic slave no eligible interrupts "
 		    "(imr 0x%x irr 0x%x isr 0x%x)",
 		    atpic->mask, atpic->request, atpic->service);
 	}
 
 	/*
 	 * Then check the master.
 	 */
 	atpic = &vatpic->atpic[0];
 	if (!atpic->intr_raised &&
 	    (pin = vatpic_get_highest_irrpin(atpic)) != -1) {
 		VATPIC_CTR4(vatpic, "atpic master notify pin = %d "
 		    "(imr 0x%x irr 0x%x isr 0x%x)", pin,
 		    atpic->mask, atpic->request, atpic->service);
 
 		/*
 		 * From Section 3.6.2, "Interrupt Modes", in the
 		 * MPtable Specification, Version 1.4
 		 *
 		 * PIC interrupts are routed to both the Local APIC
 		 * and the I/O APIC to support operation in 1 of 3
 		 * modes.
 		 *
 		 * 1. Legacy PIC Mode: the PIC effectively bypasses
 		 * all APIC components.  In this mode the local APIC is
 		 * disabled and LINT0 is reconfigured as INTR to
 		 * deliver the PIC interrupt directly to the CPU.
 		 *
 		 * 2. Virtual Wire Mode: the APIC is treated as a
 		 * virtual wire which delivers interrupts from the PIC
 		 * to the CPU.  In this mode LINT0 is programmed as
 		 * ExtINT to indicate that the PIC is the source of
 		 * the interrupt.
 		 *
 		 * 3. Virtual Wire Mode via I/O APIC: PIC interrupts are
 		 * fielded by the I/O APIC and delivered to the appropriate
 		 * CPU.  In this mode the I/O APIC input 0 is programmed
 		 * as ExtINT to indicate that the PIC is the source of the
 		 * interrupt.
 		 */
 		atpic->intr_raised = true;
 		lapic_set_local_intr(vatpic->vm, -1, APIC_LVT_LINT0);
 		vioapic_pulse_irq(vatpic->vm, 0);
 	} else {
 		VATPIC_CTR3(vatpic, "atpic master no eligible interrupts "
 		    "(imr 0x%x irr 0x%x isr 0x%x)",
 		    atpic->mask, atpic->request, atpic->service);
 	}
 }
 
 static int
 vatpic_icw1(struct vatpic *vatpic, struct atpic *atpic, uint8_t val)
 {
 	VATPIC_CTR1(vatpic, "atpic icw1 0x%x", val);
 
 	atpic->ready = false;
 
 	atpic->icw_num = 1;
 	atpic->request = 0;
 	atpic->mask = 0;
 	atpic->lowprio = 7;
 	atpic->rd_cmd_reg = 0;
 	atpic->poll = 0;
 	atpic->smm = 0;
 
 	if ((val & ICW1_SNGL) != 0) {
 		VATPIC_CTR0(vatpic, "vatpic cascade mode required");
 		return (-1);
 	}
 
 	if ((val & ICW1_IC4) == 0) {
 		VATPIC_CTR0(vatpic, "vatpic icw4 required");
 		return (-1);
 	}
 
 	atpic->icw_num++;
 
 	return (0);
 }
 
 static int
 vatpic_icw2(struct vatpic *vatpic, struct atpic *atpic, uint8_t val)
 {
 	VATPIC_CTR1(vatpic, "atpic icw2 0x%x", val);
 
 	atpic->irq_base = val & 0xf8;
 
 	atpic->icw_num++;
 
 	return (0);
 }
 
 static int
 vatpic_icw3(struct vatpic *vatpic, struct atpic *atpic, uint8_t val)
 {
 	VATPIC_CTR1(vatpic, "atpic icw3 0x%x", val);
 
 	atpic->icw_num++;
 
 	return (0);
 }
 
 static int
 vatpic_icw4(struct vatpic *vatpic, struct atpic *atpic, uint8_t val)
 {
 	VATPIC_CTR1(vatpic, "atpic icw4 0x%x", val);
 
 	if ((val & ICW4_8086) == 0) {
 		VATPIC_CTR0(vatpic, "vatpic microprocessor mode required");
 		return (-1);
 	}
 
 	if ((val & ICW4_AEOI) != 0)
 		atpic->aeoi = true;
 
 	if ((val & ICW4_SFNM) != 0) {
 		if (master_atpic(vatpic, atpic)) {
 			atpic->sfn = true;
 		} else {
 			VATPIC_CTR1(vatpic, "Ignoring special fully nested "
 			    "mode on slave atpic: %#x", val);
 		}
 	}
 
 	atpic->icw_num = 0;
 	atpic->ready = true;
 
 	return (0);
 }
 
 static int
 vatpic_ocw1(struct vatpic *vatpic, struct atpic *atpic, uint8_t val)
 {
 	VATPIC_CTR1(vatpic, "atpic ocw1 0x%x", val);
 
 	atpic->mask = val & 0xff;
 
 	return (0);
 }
 
 static int
 vatpic_ocw2(struct vatpic *vatpic, struct atpic *atpic, uint8_t val)
 {
 	VATPIC_CTR1(vatpic, "atpic ocw2 0x%x", val);
 
 	atpic->rotate = ((val & OCW2_R) != 0);
 
 	if ((val & OCW2_EOI) != 0) {
 		int isr_bit;
 
 		if ((val & OCW2_SL) != 0) {
 			/* specific EOI */
 			isr_bit = val & 0x7;
 		} else {
 			/* non-specific EOI */
 			isr_bit = vatpic_get_highest_isrpin(atpic);
 		}
 
 		if (isr_bit != -1) {
 			atpic->service &= ~(1 << isr_bit);
 
 			if (atpic->rotate)
 				atpic->lowprio = isr_bit;
 		}
 	} else if ((val & OCW2_SL) != 0 && atpic->rotate == true) {
 		/* specific priority */
 		atpic->lowprio = val & 0x7;
 	}
 
 	return (0);
 }
 
 static int
 vatpic_ocw3(struct vatpic *vatpic, struct atpic *atpic, uint8_t val)
 {
 	VATPIC_CTR1(vatpic, "atpic ocw3 0x%x", val);
 
 	if (val & OCW3_ESMM) {
 		atpic->smm = val & OCW3_SMM ? 1 : 0;
 		VATPIC_CTR2(vatpic, "%s atpic special mask mode %s",
 		    master_atpic(vatpic, atpic) ? "master" : "slave",
 		    atpic->smm ?  "enabled" : "disabled");
 	}
 
 	if (val & OCW3_RR) {
 		/* read register command */
 		atpic->rd_cmd_reg = val & OCW3_RIS;
 
 		/* Polling mode */
 		atpic->poll = ((val & OCW3_P) != 0);
 	}
 
 	return (0);
 }
 
 static void
 vatpic_set_pinstate(struct vatpic *vatpic, int pin, bool newstate)
 {
 	struct atpic *atpic;
 	int oldcnt, newcnt;
 	bool level;
 
 	KASSERT(pin >= 0 && pin < 16,
 	    ("vatpic_set_pinstate: invalid pin number %d", pin));
 	KASSERT(VATPIC_LOCKED(vatpic),
 	    ("vatpic_set_pinstate: vatpic is not locked"));
 
 	atpic = &vatpic->atpic[pin >> 3];
 
 	oldcnt = atpic->acnt[pin & 0x7];
 	if (newstate)
 		atpic->acnt[pin & 0x7]++;
 	else
 		atpic->acnt[pin & 0x7]--;
 	newcnt = atpic->acnt[pin & 0x7];
 
 	if (newcnt < 0) {
 		VATPIC_CTR2(vatpic, "atpic pin%d: bad acnt %d", pin, newcnt);
 	}
 
 	level = ((vatpic->elc[pin >> 3] & (1 << (pin & 0x7))) != 0);
 
 	if ((oldcnt == 0 && newcnt == 1) || (newcnt > 0 && level == true)) {
 		/* rising edge or level */
 		VATPIC_CTR1(vatpic, "atpic pin%d: asserted", pin);
 		atpic->request |= (1 << (pin & 0x7));
 	} else if (oldcnt == 1 && newcnt == 0) {
 		/* falling edge */
 		VATPIC_CTR1(vatpic, "atpic pin%d: deasserted", pin);
 		if (level)
 			atpic->request &= ~(1 << (pin & 0x7));
 	} else {
 		VATPIC_CTR3(vatpic, "atpic pin%d: %s, ignored, acnt %d",
 		    pin, newstate ? "asserted" : "deasserted", newcnt);
 	}
 
 	vatpic_notify_intr(vatpic);
 }
 
 static int
 vatpic_set_irqstate(struct vm *vm, int irq, enum irqstate irqstate)
 {
 	struct vatpic *vatpic;
 	struct atpic *atpic;
 
 	if (irq < 0 || irq > 15)
 		return (EINVAL);
 
 	vatpic = vm_atpic(vm);
 	atpic = &vatpic->atpic[irq >> 3];
 
 	if (atpic->ready == false)
 		return (0);
 
 	VATPIC_LOCK(vatpic);
 	switch (irqstate) {
 	case IRQSTATE_ASSERT:
 		vatpic_set_pinstate(vatpic, irq, true);
 		break;
 	case IRQSTATE_DEASSERT:
 		vatpic_set_pinstate(vatpic, irq, false);
 		break;
 	case IRQSTATE_PULSE:
 		vatpic_set_pinstate(vatpic, irq, true);
 		vatpic_set_pinstate(vatpic, irq, false);
 		break;
 	default:
 		panic("vatpic_set_irqstate: invalid irqstate %d", irqstate);
 	}
 	VATPIC_UNLOCK(vatpic);
 
 	return (0);
 }
 
 int
 vatpic_assert_irq(struct vm *vm, int irq)
 {
 	return (vatpic_set_irqstate(vm, irq, IRQSTATE_ASSERT));
 }
 
 int
 vatpic_deassert_irq(struct vm *vm, int irq)
 {
 	return (vatpic_set_irqstate(vm, irq, IRQSTATE_DEASSERT));
 }
 
 int
 vatpic_pulse_irq(struct vm *vm, int irq)
 {
 	return (vatpic_set_irqstate(vm, irq, IRQSTATE_PULSE));
 }
 
 int
 vatpic_set_irq_trigger(struct vm *vm, int irq, enum vm_intr_trigger trigger)
 {
 	struct vatpic *vatpic;
 
 	if (irq < 0 || irq > 15)
 		return (EINVAL);
 
 	/*
 	 * See comment in vatpic_elc_handler.  These IRQs must be
 	 * edge triggered.
 	 */
 	if (trigger == LEVEL_TRIGGER) {
 		switch (irq) {
 		case 0:
 		case 1:
 		case 2:
 		case 8:
 		case 13:
 			return (EINVAL);
 		}
 	}
 
 	vatpic = vm_atpic(vm);
 
 	VATPIC_LOCK(vatpic);
 
 	if (trigger == LEVEL_TRIGGER)
 		vatpic->elc[irq >> 3] |=  1 << (irq & 0x7);
 	else
 		vatpic->elc[irq >> 3] &=  ~(1 << (irq & 0x7));
 
 	VATPIC_UNLOCK(vatpic);
 
 	return (0);
 }
 
 void
 vatpic_pending_intr(struct vm *vm, int *vecptr)
 {
 	struct vatpic *vatpic;
 	struct atpic *atpic;
 	int pin;
 
 	vatpic = vm_atpic(vm);
 
 	atpic = &vatpic->atpic[0];
 
 	VATPIC_LOCK(vatpic);
 
 	pin = vatpic_get_highest_irrpin(atpic);
 	if (pin == 2) {
 		atpic = &vatpic->atpic[1];
 		pin = vatpic_get_highest_irrpin(atpic);
 	}
 
 	/*
 	 * If there are no pins active at this moment then return the spurious
 	 * interrupt vector instead.
 	 */
 	if (pin == -1)
 		pin = 7;
 
 	KASSERT(pin >= 0 && pin <= 7, ("%s: invalid pin %d", __func__, pin));
 	*vecptr = atpic->irq_base + pin;
 
 	VATPIC_UNLOCK(vatpic);
 }
 
 static void
 vatpic_pin_accepted(struct atpic *atpic, int pin)
 {
 	atpic->intr_raised = false;
 
 	if (atpic->acnt[pin] == 0)
 		atpic->request &= ~(1 << pin);
 
 	if (atpic->aeoi == true) {
 		if (atpic->rotate == true)
 			atpic->lowprio = pin;
 	} else {
 		atpic->service |= (1 << pin);
 	}
 }
 
 void
 vatpic_intr_accepted(struct vm *vm, int vector)
 {
 	struct vatpic *vatpic;
 	int pin;
 
 	vatpic = vm_atpic(vm);
 
 	VATPIC_LOCK(vatpic);
 
 	pin = vector & 0x7;
 
 	if ((vector & ~0x7) == vatpic->atpic[1].irq_base) {
 		vatpic_pin_accepted(&vatpic->atpic[1], pin);
 		/*
 		 * If this vector originated from the slave,
 		 * accept the cascaded interrupt too.
 		 */
 		vatpic_pin_accepted(&vatpic->atpic[0], 2);
 	} else {
 		vatpic_pin_accepted(&vatpic->atpic[0], pin);
 	}
 
 	vatpic_notify_intr(vatpic);
 
 	VATPIC_UNLOCK(vatpic);
 }
 
 static int
 vatpic_read(struct vatpic *vatpic, struct atpic *atpic, bool in, int port,
 	    int bytes, uint32_t *eax)
 {
 	int pin;
 
 	VATPIC_LOCK(vatpic);
 
 	if (atpic->poll) {
 		atpic->poll = 0;
 		pin = vatpic_get_highest_irrpin(atpic);
 		if (pin >= 0) {
 			vatpic_pin_accepted(atpic, pin);
 			*eax = 0x80 | pin;
 		} else {
 			*eax = 0;
 		}
 	} else {
 		if (port & ICU_IMR_OFFSET) {
 			/* read interrrupt mask register */
 			*eax = atpic->mask;
 		} else {
 			if (atpic->rd_cmd_reg == OCW3_RIS) {
 				/* read interrupt service register */
 				*eax = atpic->service;
 			} else {
 				/* read interrupt request register */
 				*eax = atpic->request;
 			}
 		}
 	}
 
 	VATPIC_UNLOCK(vatpic);
 
 	return (0);
 
 }
 
 static int
 vatpic_write(struct vatpic *vatpic, struct atpic *atpic, bool in, int port,
     int bytes, uint32_t *eax)
 {
 	int error;
 	uint8_t val;
 
 	error = 0;
 	val = *eax;
 
 	VATPIC_LOCK(vatpic);
 
 	if (port & ICU_IMR_OFFSET) {
 		switch (atpic->icw_num) {
 		case 2:
 			error = vatpic_icw2(vatpic, atpic, val);
 			break;
 		case 3:
 			error = vatpic_icw3(vatpic, atpic, val);
 			break;
 		case 4:
 			error = vatpic_icw4(vatpic, atpic, val);
 			break;
 		default:
 			error = vatpic_ocw1(vatpic, atpic, val);
 			break;
 		}
 	} else {
 		if (val & (1 << 4))
 			error = vatpic_icw1(vatpic, atpic, val);
 
 		if (atpic->ready) {
 			if (val & (1 << 3))
 				error = vatpic_ocw3(vatpic, atpic, val);
 			else
 				error = vatpic_ocw2(vatpic, atpic, val);
 		}
 	}
 
 	if (atpic->ready)
 		vatpic_notify_intr(vatpic);
 
 	VATPIC_UNLOCK(vatpic);
 
 	return (error);
 }
 
 int
 vatpic_master_handler(struct vm *vm, int vcpuid, bool in, int port, int bytes,
     uint32_t *eax)
 {
 	struct vatpic *vatpic;
 	struct atpic *atpic;
 
 	vatpic = vm_atpic(vm);
 	atpic = &vatpic->atpic[0];
 
 	if (bytes != 1)
 		return (-1);
  
 	if (in) {
 		return (vatpic_read(vatpic, atpic, in, port, bytes, eax));
 	}
  
 	return (vatpic_write(vatpic, atpic, in, port, bytes, eax));
 }
 
 int
 vatpic_slave_handler(struct vm *vm, int vcpuid, bool in, int port, int bytes,
     uint32_t *eax)
 {
 	struct vatpic *vatpic;
 	struct atpic *atpic;
 
 	vatpic = vm_atpic(vm);
 	atpic = &vatpic->atpic[1];
 
 	if (bytes != 1)
 		return (-1);
 
 	if (in) {
 		return (vatpic_read(vatpic, atpic, in, port, bytes, eax));
 	}
 
 	return (vatpic_write(vatpic, atpic, in, port, bytes, eax));
 }
 
 int
 vatpic_elc_handler(struct vm *vm, int vcpuid, bool in, int port, int bytes,
     uint32_t *eax)
 {
 	struct vatpic *vatpic;
 	bool is_master;
 
 	vatpic = vm_atpic(vm);
 	is_master = (port == IO_ELCR1);
 
 	if (bytes != 1)
 		return (-1);
 
 	VATPIC_LOCK(vatpic);
 
 	if (in) {
 		if (is_master)
 			*eax = vatpic->elc[0];
 		else
 			*eax = vatpic->elc[1];
 	} else {
 		/*
 		 * For the master PIC the cascade channel (IRQ2), the
 		 * heart beat timer (IRQ0), and the keyboard
 		 * controller (IRQ1) cannot be programmed for level
 		 * mode.
 		 *
 		 * For the slave PIC the real time clock (IRQ8) and
 		 * the floating point error interrupt (IRQ13) cannot
 		 * be programmed for level mode.
 		 */
 		if (is_master)
 			vatpic->elc[0] = (*eax & 0xf8);
 		else
 			vatpic->elc[1] = (*eax & 0xde);
 	}
 
 	VATPIC_UNLOCK(vatpic);
 
 	return (0);
 }
 
 struct vatpic *
 vatpic_init(struct vm *vm)
 {
 	struct vatpic *vatpic;
 
 	vatpic = malloc(sizeof(struct vatpic), M_VATPIC, M_WAITOK | M_ZERO);
 	vatpic->vm = vm;
 
 	mtx_init(&vatpic->mtx, "vatpic lock", NULL, MTX_SPIN);
 
 	return (vatpic);
 }
 
 void
 vatpic_cleanup(struct vatpic *vatpic)
 {
 	free(vatpic, M_VATPIC);
 }
Index: head/sys/amd64/vmm/io/vatpit.c
===================================================================
--- head/sys/amd64/vmm/io/vatpit.c	(revision 282286)
+++ head/sys/amd64/vmm/io/vatpit.c	(revision 282287)
@@ -1,458 +1,457 @@
 /*-
  * Copyright (c) 2014 Tycho Nightingale <tycho.nightingale@pluribusnetworks.com>
  * Copyright (c) 2011 NetApp, Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/types.h>
 #include <sys/queue.h>
-#include <sys/cpuset.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mutex.h>
 #include <sys/systm.h>
 
 #include <machine/vmm.h>
 
 #include "vmm_ktr.h"
 #include "vatpic.h"
 #include "vioapic.h"
 #include "vatpit.h"
 
 static MALLOC_DEFINE(M_VATPIT, "atpit", "bhyve virtual atpit (8254)");
 
 #define	VATPIT_LOCK(vatpit)		mtx_lock_spin(&((vatpit)->mtx))
 #define	VATPIT_UNLOCK(vatpit)		mtx_unlock_spin(&((vatpit)->mtx))
 #define	VATPIT_LOCKED(vatpit)		mtx_owned(&((vatpit)->mtx))
 
 #define	TIMER_SEL_MASK		0xc0
 #define	TIMER_RW_MASK		0x30
 #define	TIMER_MODE_MASK		0x0f
 #define	TIMER_SEL_READBACK	0xc0
 
 #define	TIMER_STS_OUT		0x80
 #define	TIMER_STS_NULLCNT	0x40
 
 #define	TIMER_RB_LCTR		0x20
 #define	TIMER_RB_LSTATUS	0x10
 #define	TIMER_RB_CTR_2		0x08
 #define	TIMER_RB_CTR_1		0x04
 #define	TIMER_RB_CTR_0		0x02
 
 #define	TMR2_OUT_STS		0x20
 
 #define	PIT_8254_FREQ		1193182
 #define	TIMER_DIV(freq, hz)	(((freq) + (hz) / 2) / (hz))
 
 struct vatpit_callout_arg {
 	struct vatpit	*vatpit;
 	int		channel_num;
 };
 
 
 struct channel {
 	int		mode;
 	uint16_t	initial;	/* initial counter value */
 	sbintime_t	now_sbt;	/* uptime when counter was loaded */
 	uint8_t		cr[2];
 	uint8_t		ol[2];
 	bool		slatched;	/* status latched */
 	uint8_t		status;
 	int		crbyte;
 	int		olbyte;
 	int		frbyte;
 	struct callout	callout;
 	sbintime_t	callout_sbt;	/* target time */
 	struct vatpit_callout_arg callout_arg;
 };
 
 struct vatpit {
 	struct vm	*vm;
 	struct mtx	mtx;
 
 	sbintime_t	freq_sbt;
 
 	struct channel	channel[3];
 };
 
 static void pit_timer_start_cntr0(struct vatpit *vatpit);
 
 static int
 vatpit_get_out(struct vatpit *vatpit, int channel)
 {
 	struct channel *c;
 	sbintime_t delta_ticks;
 	int out;
 
 	c = &vatpit->channel[channel];
 
 	switch (c->mode) {
 	case TIMER_INTTC:
 		delta_ticks = (sbinuptime() - c->now_sbt) / vatpit->freq_sbt;
 		out = ((c->initial - delta_ticks) <= 0);
 		break;
 	default:
 		out = 0;
 		break;
 	}
 
 	return (out);
 }
 
 static void
 vatpit_callout_handler(void *a)
 {
 	struct vatpit_callout_arg *arg = a;
 	struct vatpit *vatpit;
 	struct callout *callout;
 	struct channel *c;
 
 	vatpit = arg->vatpit;
 	c = &vatpit->channel[arg->channel_num];
 	callout = &c->callout;
 
 	VM_CTR1(vatpit->vm, "atpit t%d fired", arg->channel_num);
 
 	VATPIT_LOCK(vatpit);
 
 	if (callout_pending(callout))		/* callout was reset */
 		goto done;
 
 	if (!callout_active(callout))		/* callout was stopped */
 		goto done;
 
 	callout_deactivate(callout);
 
 	if (c->mode == TIMER_RATEGEN) {
 		pit_timer_start_cntr0(vatpit);
 	}
 
 	vatpic_pulse_irq(vatpit->vm, 0);
 	vioapic_pulse_irq(vatpit->vm, 2);
 
 done:
 	VATPIT_UNLOCK(vatpit);
 	return;
 }
 
 static void
 pit_timer_start_cntr0(struct vatpit *vatpit)
 {
 	struct channel *c;
 	sbintime_t now, delta, precision;
 
 	c = &vatpit->channel[0];
 	if (c->initial != 0) {
 		delta = c->initial * vatpit->freq_sbt;
 		precision = delta >> tc_precexp;
 		c->callout_sbt = c->callout_sbt + delta;
 
 		/*
 		 * Reset 'callout_sbt' if the time that the callout
 		 * was supposed to fire is more than 'c->initial'
 		 * ticks in the past.
 		 */
 		now = sbinuptime();
 		if (c->callout_sbt < now)
 			c->callout_sbt = now + delta;
 
 		callout_reset_sbt(&c->callout, c->callout_sbt,
 		    precision, vatpit_callout_handler, &c->callout_arg,
 		    C_ABSOLUTE);
 	}
 }
 
 static uint16_t
 pit_update_counter(struct vatpit *vatpit, struct channel *c, bool latch)
 {
 	uint16_t lval;
 	sbintime_t delta_ticks;
 
 	/* cannot latch a new value until the old one has been consumed */
 	if (latch && c->olbyte != 0)
 		return (0);
 
 	if (c->initial == 0) {
 		/*
 		 * This is possibly an o/s bug - reading the value of
 		 * the timer without having set up the initial value.
 		 *
 		 * The original user-space version of this code set
 		 * the timer to 100hz in this condition; do the same
 		 * here.
 		 */
 		c->initial = TIMER_DIV(PIT_8254_FREQ, 100);
 		c->now_sbt = sbinuptime();
 		c->status &= ~TIMER_STS_NULLCNT;
 	}
 
 	delta_ticks = (sbinuptime() - c->now_sbt) / vatpit->freq_sbt;
 
 	lval = c->initial - delta_ticks % c->initial;
 
 	if (latch) {
 		c->olbyte = 2;
 		c->ol[1] = lval;		/* LSB */
 		c->ol[0] = lval >> 8;		/* MSB */
 	}
 
 	return (lval);
 }
 
 static int
 pit_readback1(struct vatpit *vatpit, int channel, uint8_t cmd)
 {
 	struct channel *c;
 
 	c = &vatpit->channel[channel];
 
 	/*
 	 * Latch the count/status of the timer if not already latched.
 	 * N.B. that the count/status latch-select bits are active-low.
 	 */
 	if (!(cmd & TIMER_RB_LCTR) && !c->olbyte) {
 		(void) pit_update_counter(vatpit, c, true);
 	}
 
 	if (!(cmd & TIMER_RB_LSTATUS) && !c->slatched) {
 		c->slatched = true;
 		/*
 		 * For mode 0, see if the elapsed time is greater
 		 * than the initial value - this results in the
 		 * output pin being set to 1 in the status byte.
 		 */
 		if (c->mode == TIMER_INTTC && vatpit_get_out(vatpit, channel))
 			c->status |= TIMER_STS_OUT;
 		else
 			c->status &= ~TIMER_STS_OUT;
 	}
 
 	return (0);
 }
 
 static int
 pit_readback(struct vatpit *vatpit, uint8_t cmd)
 {
 	int error;
 
 	/*
 	 * The readback command can apply to all timers.
 	 */
 	error = 0;
 	if (cmd & TIMER_RB_CTR_0)
 		error = pit_readback1(vatpit, 0, cmd);
 	if (!error && cmd & TIMER_RB_CTR_1)
 		error = pit_readback1(vatpit, 1, cmd);
 	if (!error && cmd & TIMER_RB_CTR_2)
 		error = pit_readback1(vatpit, 2, cmd);
 
 	return (error);
 }
 
 
 static int
 vatpit_update_mode(struct vatpit *vatpit, uint8_t val)
 {
 	struct channel *c;
 	int sel, rw, mode;
 
 	sel = val & TIMER_SEL_MASK;
 	rw = val & TIMER_RW_MASK;
 	mode = val & TIMER_MODE_MASK;
 
 	if (sel == TIMER_SEL_READBACK)
 		return (pit_readback(vatpit, val));
 
 	if (rw != TIMER_LATCH && rw != TIMER_16BIT)
 		return (-1);
 
 	if (rw != TIMER_LATCH) {
 		/*
 		 * Counter mode is not affected when issuing a
 		 * latch command.
 		 */
 		if (mode != TIMER_INTTC &&
 		    mode != TIMER_RATEGEN &&
 		    mode != TIMER_SQWAVE &&
 		    mode != TIMER_SWSTROBE)
 			return (-1);
 	}
 
 	c = &vatpit->channel[sel >> 6];
 	if (rw == TIMER_LATCH)
 		pit_update_counter(vatpit, c, true);
 	else {
 		c->mode = mode;
 		c->olbyte = 0;	/* reset latch after reprogramming */
 		c->status |= TIMER_STS_NULLCNT;
 	}
 
 	return (0);
 }
 
 int
 vatpit_handler(struct vm *vm, int vcpuid, bool in, int port, int bytes,
     uint32_t *eax)
 {
 	struct vatpit *vatpit;
 	struct channel *c;
 	uint8_t val;
 	int error;
 
 	vatpit = vm_atpit(vm);
 
 	if (bytes != 1)
 		return (-1);
 
 	val = *eax;
 
 	if (port == TIMER_MODE) {
 		if (in) {
 			VM_CTR0(vatpit->vm, "vatpit attempt to read mode");
 			return (-1);
 		}
 
 		VATPIT_LOCK(vatpit);
 		error = vatpit_update_mode(vatpit, val);
 		VATPIT_UNLOCK(vatpit);
 
 		return (error);
 	}
 
 	/* counter ports */
 	KASSERT(port >= TIMER_CNTR0 && port <= TIMER_CNTR2,
 	    ("invalid port 0x%x", port));
 	c = &vatpit->channel[port - TIMER_CNTR0];
 
 	VATPIT_LOCK(vatpit);
 	if (in && c->slatched) {
 		/*
 		 * Return the status byte if latched
 		 */
 		*eax = c->status;
 		c->slatched = false;
 		c->status = 0;
 	} else if (in) {
 		/*
 		 * The spec says that once the output latch is completely
 		 * read it should revert to "following" the counter. Use
 		 * the free running counter for this case (i.e. Linux
 		 * TSC calibration). Assuming the access mode is 16-bit,
 		 * toggle the MSB/LSB bit on each read.
 		 */
 		if (c->olbyte == 0) {
 			uint16_t tmp;
 
 			tmp = pit_update_counter(vatpit, c, false);
 			if (c->frbyte)
 				tmp >>= 8;
 			tmp &= 0xff;
 			*eax = tmp;
 			c->frbyte ^= 1;
 		}  else
 			*eax = c->ol[--c->olbyte];
 	} else {
 		c->cr[c->crbyte++] = *eax;
 		if (c->crbyte == 2) {
 			c->status &= ~TIMER_STS_NULLCNT;
 			c->frbyte = 0;
 			c->crbyte = 0;
 			c->initial = c->cr[0] | (uint16_t)c->cr[1] << 8;
 			c->now_sbt = sbinuptime();
 			/* Start an interval timer for channel 0 */
 			if (port == TIMER_CNTR0) {
 				c->callout_sbt = c->now_sbt;
 				pit_timer_start_cntr0(vatpit);
 			}
 			if (c->initial == 0)
 				c->initial = 0xffff;
 		}
 	}
 	VATPIT_UNLOCK(vatpit);
 
 	return (0);
 }
 
 int
 vatpit_nmisc_handler(struct vm *vm, int vcpuid, bool in, int port, int bytes,
     uint32_t *eax)
 {
 	struct vatpit *vatpit;
 
 	vatpit = vm_atpit(vm);
 
 	if (in) {
 			VATPIT_LOCK(vatpit);
 			if (vatpit_get_out(vatpit, 2))
 				*eax = TMR2_OUT_STS;
 			else
 				*eax = 0;
 
 			VATPIT_UNLOCK(vatpit);
 	}
 
 	return (0);
 }
 
 struct vatpit *
 vatpit_init(struct vm *vm)
 {
 	struct vatpit *vatpit;
 	struct bintime bt;
 	struct vatpit_callout_arg *arg;
 	int i;
 
 	vatpit = malloc(sizeof(struct vatpit), M_VATPIT, M_WAITOK | M_ZERO);
 	vatpit->vm = vm;
 
 	mtx_init(&vatpit->mtx, "vatpit lock", NULL, MTX_SPIN);
 
 	FREQ2BT(PIT_8254_FREQ, &bt);
 	vatpit->freq_sbt = bttosbt(bt);
 
 	for (i = 0; i < 3; i++) {
 		callout_init(&vatpit->channel[i].callout, true);
 		arg = &vatpit->channel[i].callout_arg;
 		arg->vatpit = vatpit;
 		arg->channel_num = i;
 	}
 
 	return (vatpit);
 }
 
 void
 vatpit_cleanup(struct vatpit *vatpit)
 {
 	int i;
 
 	for (i = 0; i < 3; i++)
 		callout_drain(&vatpit->channel[i].callout);
 
 	free(vatpit, M_VATPIT);
 }
Index: head/sys/amd64/vmm/io/vhpet.c
===================================================================
--- head/sys/amd64/vmm/io/vhpet.c	(revision 282286)
+++ head/sys/amd64/vmm/io/vhpet.c	(revision 282287)
@@ -1,760 +1,759 @@
 /*-
  * Copyright (c) 2013 Tycho Nightingale <tycho.nightingale@pluribusnetworks.com>
  * Copyright (c) 2013 Neel Natu <neel@freebsd.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/kernel.h>
 #include <sys/malloc.h>
 #include <sys/systm.h>
-#include <sys/cpuset.h>
 
 #include <dev/acpica/acpi_hpet.h>
 
 #include <machine/vmm.h>
 #include <machine/vmm_dev.h>
 
 #include "vmm_lapic.h"
 #include "vatpic.h"
 #include "vioapic.h"
 #include "vhpet.h"
 
 #include "vmm_ktr.h"
 
 static MALLOC_DEFINE(M_VHPET, "vhpet", "bhyve virtual hpet");
 
 #define	HPET_FREQ	10000000		/* 10.0 Mhz */
 #define	FS_PER_S	1000000000000000ul
 
 /* Timer N Configuration and Capabilities Register */
 #define	HPET_TCAP_RO_MASK	(HPET_TCAP_INT_ROUTE 	|		\
 				 HPET_TCAP_FSB_INT_DEL	|		\
 				 HPET_TCAP_SIZE		|		\
 				 HPET_TCAP_PER_INT)
 /*
  * HPET requires at least 3 timers and up to 32 timers per block.
  */
 #define	VHPET_NUM_TIMERS	8
 CTASSERT(VHPET_NUM_TIMERS >= 3 && VHPET_NUM_TIMERS <= 32);
 
 struct vhpet_callout_arg {
 	struct vhpet *vhpet;
 	int timer_num;
 };
 
 struct vhpet {
 	struct vm	*vm;
 	struct mtx	mtx;
 	sbintime_t	freq_sbt;
 
 	uint64_t	config;		/* Configuration */
 	uint64_t	isr;		/* Interrupt Status */
 	uint32_t	countbase;	/* HPET counter base value */
 	sbintime_t	countbase_sbt;	/* uptime corresponding to base value */
 
 	struct {
 		uint64_t	cap_config;	/* Configuration */
 		uint64_t	msireg;		/* FSB interrupt routing */
 		uint32_t	compval;	/* Comparator */
 		uint32_t	comprate;
 		struct callout	callout;
 		sbintime_t	callout_sbt;	/* time when counter==compval */
 		struct vhpet_callout_arg arg;
 	} timer[VHPET_NUM_TIMERS];
 };
 
 #define	VHPET_LOCK(vhp)		mtx_lock(&((vhp)->mtx))
 #define	VHPET_UNLOCK(vhp)	mtx_unlock(&((vhp)->mtx))
 
 static void vhpet_start_timer(struct vhpet *vhpet, int n, uint32_t counter,
     sbintime_t now);
 
 static uint64_t
 vhpet_capabilities(void)
 {
 	uint64_t cap = 0;
 
 	cap |= 0x8086 << 16;			/* vendor id */
 	cap |= (VHPET_NUM_TIMERS - 1) << 8;	/* number of timers */
 	cap |= 1;				/* revision */
 	cap &= ~HPET_CAP_COUNT_SIZE;		/* 32-bit timer */
 
 	cap &= 0xffffffff;
 	cap |= (FS_PER_S / HPET_FREQ) << 32;	/* tick period in fs */
 
 	return (cap);
 }
 
 static __inline bool
 vhpet_counter_enabled(struct vhpet *vhpet)
 {
 
 	return ((vhpet->config & HPET_CNF_ENABLE) ? true : false);
 }
 
 static __inline bool
 vhpet_timer_msi_enabled(struct vhpet *vhpet, int n)
 {
 	const uint64_t msi_enable = HPET_TCAP_FSB_INT_DEL | HPET_TCNF_FSB_EN;
 
 	if ((vhpet->timer[n].cap_config & msi_enable) == msi_enable)
 		return (true);
 	else
 		return (false);
 }
 
 static __inline int
 vhpet_timer_ioapic_pin(struct vhpet *vhpet, int n)
 {
 	/*
 	 * If the timer is configured to use MSI then treat it as if the
 	 * timer is not connected to the ioapic.
 	 */
 	if (vhpet_timer_msi_enabled(vhpet, n))
 		return (0);
 
 	return ((vhpet->timer[n].cap_config & HPET_TCNF_INT_ROUTE) >> 9);
 }
 
 static uint32_t
 vhpet_counter(struct vhpet *vhpet, sbintime_t *nowptr)
 {
 	uint32_t val;
 	sbintime_t now, delta;
 
 	val = vhpet->countbase;
 	if (vhpet_counter_enabled(vhpet)) {
 		now = sbinuptime();
 		delta = now - vhpet->countbase_sbt;
 		KASSERT(delta >= 0, ("vhpet_counter: uptime went backwards: "
 		    "%#lx to %#lx", vhpet->countbase_sbt, now));
 		val += delta / vhpet->freq_sbt;
 		if (nowptr != NULL)
 			*nowptr = now;
 	} else {
 		/*
 		 * The sbinuptime corresponding to the 'countbase' is
 		 * meaningless when the counter is disabled. Make sure
 		 * that the the caller doesn't want to use it.
 		 */
 		KASSERT(nowptr == NULL, ("vhpet_counter: nowptr must be NULL"));
 	}
 	return (val);
 }
 
 static void
 vhpet_timer_clear_isr(struct vhpet *vhpet, int n)
 {
 	int pin;
 
 	if (vhpet->isr & (1 << n)) {
 		pin = vhpet_timer_ioapic_pin(vhpet, n);
 		KASSERT(pin != 0, ("vhpet timer %d irq incorrectly routed", n));
 		vioapic_deassert_irq(vhpet->vm, pin);
 		vhpet->isr &= ~(1 << n);
 	}
 }
 
 static __inline bool
 vhpet_periodic_timer(struct vhpet *vhpet, int n)
 {
 
 	return ((vhpet->timer[n].cap_config & HPET_TCNF_TYPE) != 0);
 }
 
 static __inline bool
 vhpet_timer_interrupt_enabled(struct vhpet *vhpet, int n)
 {
 
 	return ((vhpet->timer[n].cap_config & HPET_TCNF_INT_ENB) != 0);
 }
 
 static __inline bool
 vhpet_timer_edge_trig(struct vhpet *vhpet, int n)
 {
 
 	KASSERT(!vhpet_timer_msi_enabled(vhpet, n), ("vhpet_timer_edge_trig: "
 	    "timer %d is using MSI", n));
 
 	if ((vhpet->timer[n].cap_config & HPET_TCNF_INT_TYPE) == 0)
 		return (true);
 	else
 		return (false);
 }
 
 static void
 vhpet_timer_interrupt(struct vhpet *vhpet, int n)
 {
 	int pin;
 
 	/* If interrupts are not enabled for this timer then just return. */
 	if (!vhpet_timer_interrupt_enabled(vhpet, n))
 		return;
 
 	/*
 	 * If a level triggered interrupt is already asserted then just return.
 	 */
 	if ((vhpet->isr & (1 << n)) != 0) {
 		VM_CTR1(vhpet->vm, "hpet t%d intr is already asserted", n);
 		return;
 	}
 
 	if (vhpet_timer_msi_enabled(vhpet, n)) {
 		lapic_intr_msi(vhpet->vm, vhpet->timer[n].msireg >> 32,
 		    vhpet->timer[n].msireg & 0xffffffff);
 		return;
 	}	
 
 	pin = vhpet_timer_ioapic_pin(vhpet, n);
 	if (pin == 0) {
 		VM_CTR1(vhpet->vm, "hpet t%d intr is not routed to ioapic", n);
 		return;
 	}
 
 	if (vhpet_timer_edge_trig(vhpet, n)) {
 		vioapic_pulse_irq(vhpet->vm, pin);
 	} else {
 		vhpet->isr |= 1 << n;
 		vioapic_assert_irq(vhpet->vm, pin);
 	}
 }
 
 static void
 vhpet_adjust_compval(struct vhpet *vhpet, int n, uint32_t counter)
 {
 	uint32_t compval, comprate, compnext;
 
 	KASSERT(vhpet->timer[n].comprate != 0, ("hpet t%d is not periodic", n));
 
 	compval = vhpet->timer[n].compval;
 	comprate = vhpet->timer[n].comprate;
 
 	/*
 	 * Calculate the comparator value to be used for the next periodic
 	 * interrupt.
 	 *
 	 * This function is commonly called from the callout handler.
 	 * In this scenario the 'counter' is ahead of 'compval'. To find
 	 * the next value to program into the accumulator we divide the
 	 * number space between 'compval' and 'counter' into 'comprate'
 	 * sized units. The 'compval' is rounded up such that is "ahead"
 	 * of 'counter'.
 	 */
 	compnext = compval + ((counter - compval) / comprate + 1) * comprate;
 
 	vhpet->timer[n].compval = compnext;
 }
 
 static void
 vhpet_handler(void *a)
 {
 	int n;
 	uint32_t counter;
 	sbintime_t now;
 	struct vhpet *vhpet;
 	struct callout *callout;
 	struct vhpet_callout_arg *arg;
 
 	arg = a;
 	vhpet = arg->vhpet;
 	n = arg->timer_num;
 	callout = &vhpet->timer[n].callout;
 
 	VM_CTR1(vhpet->vm, "hpet t%d fired", n);
 
 	VHPET_LOCK(vhpet);
 
 	if (callout_pending(callout))		/* callout was reset */
 		goto done;
 
 	if (!callout_active(callout))		/* callout was stopped */
 		goto done;
 
 	callout_deactivate(callout);
 
 	if (!vhpet_counter_enabled(vhpet))
 		panic("vhpet(%p) callout with counter disabled", vhpet);
 
 	counter = vhpet_counter(vhpet, &now);
 	vhpet_start_timer(vhpet, n, counter, now);
 	vhpet_timer_interrupt(vhpet, n);
 done:
 	VHPET_UNLOCK(vhpet);
 	return;
 }
 
 static void
 vhpet_stop_timer(struct vhpet *vhpet, int n, sbintime_t now)
 {
 
 	VM_CTR1(vhpet->vm, "hpet t%d stopped", n);
 	callout_stop(&vhpet->timer[n].callout);
 
 	/*
 	 * If the callout was scheduled to expire in the past but hasn't
 	 * had a chance to execute yet then trigger the timer interrupt
 	 * here. Failing to do so will result in a missed timer interrupt
 	 * in the guest. This is especially bad in one-shot mode because
 	 * the next interrupt has to wait for the counter to wrap around.
 	 */
 	if (vhpet->timer[n].callout_sbt < now) {
 		VM_CTR1(vhpet->vm, "hpet t%d interrupt triggered after "
 		    "stopping timer", n);
 		vhpet_timer_interrupt(vhpet, n);
 	}
 }
 
 static void
 vhpet_start_timer(struct vhpet *vhpet, int n, uint32_t counter, sbintime_t now)
 {
 	sbintime_t delta, precision;
 
 	if (vhpet->timer[n].comprate != 0)
 		vhpet_adjust_compval(vhpet, n, counter);
 	else {
 		/*
 		 * In one-shot mode it is the guest's responsibility to make
 		 * sure that the comparator value is not in the "past". The
 		 * hardware doesn't have any belt-and-suspenders to deal with
 		 * this so we don't either.
 		 */
 	}
 
 	delta = (vhpet->timer[n].compval - counter) * vhpet->freq_sbt;
 	precision = delta >> tc_precexp;
 	vhpet->timer[n].callout_sbt = now + delta;
 	callout_reset_sbt(&vhpet->timer[n].callout, vhpet->timer[n].callout_sbt,
 	    precision, vhpet_handler, &vhpet->timer[n].arg, C_ABSOLUTE);
 }
 
 static void
 vhpet_start_counting(struct vhpet *vhpet)
 {
 	int i;
 
 	vhpet->countbase_sbt = sbinuptime();
 	for (i = 0; i < VHPET_NUM_TIMERS; i++) {
 		/*
 		 * Restart the timers based on the value of the main counter
 		 * when it stopped counting.
 		 */
 		vhpet_start_timer(vhpet, i, vhpet->countbase,
 		    vhpet->countbase_sbt);
 	}
 }
 
 static void
 vhpet_stop_counting(struct vhpet *vhpet, uint32_t counter, sbintime_t now)
 {
 	int i;
 
 	vhpet->countbase = counter;
 	for (i = 0; i < VHPET_NUM_TIMERS; i++)
 		vhpet_stop_timer(vhpet, i, now);
 }
 
 static __inline void
 update_register(uint64_t *regptr, uint64_t data, uint64_t mask)
 {
 
 	*regptr &= ~mask;
 	*regptr |= (data & mask);
 }
 
 static void
 vhpet_timer_update_config(struct vhpet *vhpet, int n, uint64_t data,
     uint64_t mask)
 {
 	bool clear_isr;
 	int old_pin, new_pin;
 	uint32_t allowed_irqs;
 	uint64_t oldval, newval;
 
 	if (vhpet_timer_msi_enabled(vhpet, n) ||
 	    vhpet_timer_edge_trig(vhpet, n)) {
 		if (vhpet->isr & (1 << n))
 			panic("vhpet timer %d isr should not be asserted", n);
 	}
 	old_pin = vhpet_timer_ioapic_pin(vhpet, n);
 	oldval = vhpet->timer[n].cap_config;
 
 	newval = oldval;
 	update_register(&newval, data, mask);
 	newval &= ~(HPET_TCAP_RO_MASK | HPET_TCNF_32MODE);
 	newval |= oldval & HPET_TCAP_RO_MASK;
 
 	if (newval == oldval)
 		return;
 
 	vhpet->timer[n].cap_config = newval;
 	VM_CTR2(vhpet->vm, "hpet t%d cap_config set to 0x%016x", n, newval);
 
 	/*
 	 * Validate the interrupt routing in the HPET_TCNF_INT_ROUTE field.
 	 * If it does not match the bits set in HPET_TCAP_INT_ROUTE then set
 	 * it to the default value of 0.
 	 */
 	allowed_irqs = vhpet->timer[n].cap_config >> 32;
 	new_pin = vhpet_timer_ioapic_pin(vhpet, n);
 	if (new_pin != 0 && (allowed_irqs & (1 << new_pin)) == 0) {
 		VM_CTR3(vhpet->vm, "hpet t%d configured invalid irq %d, "
 		    "allowed_irqs 0x%08x", n, new_pin, allowed_irqs);
 		new_pin = 0;
 		vhpet->timer[n].cap_config &= ~HPET_TCNF_INT_ROUTE;
 	}
 
 	if (!vhpet_periodic_timer(vhpet, n))
 		vhpet->timer[n].comprate = 0;
 
 	/*
 	 * If the timer's ISR bit is set then clear it in the following cases:
 	 * - interrupt is disabled
 	 * - interrupt type is changed from level to edge or fsb.
 	 * - interrupt routing is changed
 	 *
 	 * This is to ensure that this timer's level triggered interrupt does
 	 * not remain asserted forever.
 	 */
 	if (vhpet->isr & (1 << n)) {
 		KASSERT(old_pin != 0, ("timer %d isr asserted to ioapic pin %d",
 		    n, old_pin));
 		if (!vhpet_timer_interrupt_enabled(vhpet, n))
 			clear_isr = true;
 		else if (vhpet_timer_msi_enabled(vhpet, n))
 			clear_isr = true;
 		else if (vhpet_timer_edge_trig(vhpet, n))
 			clear_isr = true;
 		else if (vhpet_timer_ioapic_pin(vhpet, n) != old_pin)
 			clear_isr = true;
 		else
 			clear_isr = false;
 
 		if (clear_isr) {
 			VM_CTR1(vhpet->vm, "hpet t%d isr cleared due to "
 			    "configuration change", n);
 			vioapic_deassert_irq(vhpet->vm, old_pin);
 			vhpet->isr &= ~(1 << n);
 		}
 	}
 }
 
 int
 vhpet_mmio_write(void *vm, int vcpuid, uint64_t gpa, uint64_t val, int size,
     void *arg)
 {
 	struct vhpet *vhpet;
 	uint64_t data, mask, oldval, val64;
 	uint32_t isr_clear_mask, old_compval, old_comprate, counter;
 	sbintime_t now, *nowptr;
 	int i, offset;
 
 	vhpet = vm_hpet(vm);
 	offset = gpa - VHPET_BASE;
 
 	VHPET_LOCK(vhpet);
 
 	/* Accesses to the HPET should be 4 or 8 bytes wide */
 	switch (size) {
 	case 8:
 		mask = 0xffffffffffffffff;
 		data = val;
 		break;
 	case 4:
 		mask = 0xffffffff;
 		data = val;
 		if ((offset & 0x4) != 0) {
 			mask <<= 32;
 			data <<= 32;
 		} 
 		break;
 	default:
 		VM_CTR2(vhpet->vm, "hpet invalid mmio write: "
 		    "offset 0x%08x, size %d", offset, size);
 		goto done;
 	}
 
 	/* Access to the HPET should be naturally aligned to its width */
 	if (offset & (size - 1)) {
 		VM_CTR2(vhpet->vm, "hpet invalid mmio write: "
 		    "offset 0x%08x, size %d", offset, size);
 		goto done;
 	}
 
 	if (offset == HPET_CONFIG || offset == HPET_CONFIG + 4) {
 		/*
 		 * Get the most recent value of the counter before updating
 		 * the 'config' register. If the HPET is going to be disabled
 		 * then we need to update 'countbase' with the value right
 		 * before it is disabled.
 		 */
 		nowptr = vhpet_counter_enabled(vhpet) ? &now : NULL;
 		counter = vhpet_counter(vhpet, nowptr);
 		oldval = vhpet->config;
 		update_register(&vhpet->config, data, mask);
 
 		/*
 		 * LegacyReplacement Routing is not supported so clear the
 		 * bit explicitly.
 		 */
 		vhpet->config &= ~HPET_CNF_LEG_RT;
 
 		if ((oldval ^ vhpet->config) & HPET_CNF_ENABLE) {
 			if (vhpet_counter_enabled(vhpet)) {
 				vhpet_start_counting(vhpet);
 				VM_CTR0(vhpet->vm, "hpet enabled");
 			} else {
 				vhpet_stop_counting(vhpet, counter, now);
 				VM_CTR0(vhpet->vm, "hpet disabled");
 			}
 		}
 		goto done;
 	}
 
 	if (offset == HPET_ISR || offset == HPET_ISR + 4) {
 		isr_clear_mask = vhpet->isr & data;
 		for (i = 0; i < VHPET_NUM_TIMERS; i++) {
 			if ((isr_clear_mask & (1 << i)) != 0) {
 				VM_CTR1(vhpet->vm, "hpet t%d isr cleared", i);
 				vhpet_timer_clear_isr(vhpet, i);
 			}
 		}
 		goto done;
 	}
 
 	if (offset == HPET_MAIN_COUNTER || offset == HPET_MAIN_COUNTER + 4) {
 		/* Zero-extend the counter to 64-bits before updating it */
 		val64 = vhpet_counter(vhpet, NULL);
 		update_register(&val64, data, mask);
 		vhpet->countbase = val64;
 		if (vhpet_counter_enabled(vhpet))
 			vhpet_start_counting(vhpet);
 		goto done;
 	}
 
 	for (i = 0; i < VHPET_NUM_TIMERS; i++) {
 		if (offset == HPET_TIMER_CAP_CNF(i) ||
 		    offset == HPET_TIMER_CAP_CNF(i) + 4) {
 			vhpet_timer_update_config(vhpet, i, data, mask);
 			break;
 		}
 
 		if (offset == HPET_TIMER_COMPARATOR(i) ||
 		    offset == HPET_TIMER_COMPARATOR(i) + 4) {
 			old_compval = vhpet->timer[i].compval;
 			old_comprate = vhpet->timer[i].comprate;
 			if (vhpet_periodic_timer(vhpet, i)) {
 				/*
 				 * In periodic mode writes to the comparator
 				 * change the 'compval' register only if the
 				 * HPET_TCNF_VAL_SET bit is set in the config
 				 * register.
 				 */
 				val64 = vhpet->timer[i].comprate;
 				update_register(&val64, data, mask);
 				vhpet->timer[i].comprate = val64;
 				if ((vhpet->timer[i].cap_config &
 				    HPET_TCNF_VAL_SET) != 0) {
 					vhpet->timer[i].compval = val64;
 				}
 			} else {
 				KASSERT(vhpet->timer[i].comprate == 0,
 				    ("vhpet one-shot timer %d has invalid "
 				    "rate %u", i, vhpet->timer[i].comprate));
 				val64 = vhpet->timer[i].compval;
 				update_register(&val64, data, mask);
 				vhpet->timer[i].compval = val64;
 			}
 			vhpet->timer[i].cap_config &= ~HPET_TCNF_VAL_SET;
 
 			if (vhpet->timer[i].compval != old_compval ||
 			    vhpet->timer[i].comprate != old_comprate) {
 				if (vhpet_counter_enabled(vhpet)) {
 					counter = vhpet_counter(vhpet, &now);
 					vhpet_start_timer(vhpet, i, counter,
 					    now);
 				}
 			}
 			break;
 		}
 
 		if (offset == HPET_TIMER_FSB_VAL(i) ||
 		    offset == HPET_TIMER_FSB_ADDR(i)) {
 			update_register(&vhpet->timer[i].msireg, data, mask);
 			break;
 		}
 	}
 done:
 	VHPET_UNLOCK(vhpet);
 	return (0);
 }
 
 int
 vhpet_mmio_read(void *vm, int vcpuid, uint64_t gpa, uint64_t *rval, int size,
     void *arg)
 {
 	int i, offset;
 	struct vhpet *vhpet;
 	uint64_t data;
 
 	vhpet = vm_hpet(vm);
 	offset = gpa - VHPET_BASE;
 
 	VHPET_LOCK(vhpet);
 
 	/* Accesses to the HPET should be 4 or 8 bytes wide */
 	if (size != 4 && size != 8) {
 		VM_CTR2(vhpet->vm, "hpet invalid mmio read: "
 		    "offset 0x%08x, size %d", offset, size);
 		data = 0;
 		goto done;
 	}
 
 	/* Access to the HPET should be naturally aligned to its width */
 	if (offset & (size - 1)) {
 		VM_CTR2(vhpet->vm, "hpet invalid mmio read: "
 		    "offset 0x%08x, size %d", offset, size);
 		data = 0;
 		goto done;
 	}
 
 	if (offset == HPET_CAPABILITIES || offset == HPET_CAPABILITIES + 4) {
 		data = vhpet_capabilities();
 		goto done;	
 	}
 
 	if (offset == HPET_CONFIG || offset == HPET_CONFIG + 4) {
 		data = vhpet->config;
 		goto done;
 	}
 
 	if (offset == HPET_ISR || offset == HPET_ISR + 4) {
 		data = vhpet->isr;
 		goto done;
 	}
 
 	if (offset == HPET_MAIN_COUNTER || offset == HPET_MAIN_COUNTER + 4) {
 		data = vhpet_counter(vhpet, NULL);
 		goto done;
 	}
 
 	for (i = 0; i < VHPET_NUM_TIMERS; i++) {
 		if (offset == HPET_TIMER_CAP_CNF(i) ||
 		    offset == HPET_TIMER_CAP_CNF(i) + 4) {
 			data = vhpet->timer[i].cap_config;
 			break;
 		}
 
 		if (offset == HPET_TIMER_COMPARATOR(i) ||
 		    offset == HPET_TIMER_COMPARATOR(i) + 4) {
 			data = vhpet->timer[i].compval;
 			break;
 		}
 
 		if (offset == HPET_TIMER_FSB_VAL(i) ||
 		    offset == HPET_TIMER_FSB_ADDR(i)) {
 			data = vhpet->timer[i].msireg;
 			break;
 		}
 	}
 
 	if (i >= VHPET_NUM_TIMERS)
 		data = 0;
 done:
 	VHPET_UNLOCK(vhpet);
 
 	if (size == 4) {
 		if (offset & 0x4)
 			data >>= 32;
 	}
 	*rval = data;
 	return (0);
 }
 
 struct vhpet *
 vhpet_init(struct vm *vm)
 {
 	int i, pincount;
 	struct vhpet *vhpet;
 	uint64_t allowed_irqs;
 	struct vhpet_callout_arg *arg;
 	struct bintime bt;
 
 	vhpet = malloc(sizeof(struct vhpet), M_VHPET, M_WAITOK | M_ZERO);
         vhpet->vm = vm;
 	mtx_init(&vhpet->mtx, "vhpet lock", NULL, MTX_DEF);
 
 	FREQ2BT(HPET_FREQ, &bt);
 	vhpet->freq_sbt = bttosbt(bt);
 
 	pincount = vioapic_pincount(vm);
 	if (pincount >= 24)
 		allowed_irqs = 0x00f00000;	/* irqs 20, 21, 22 and 23 */
 	else
 		allowed_irqs = 0;
 
 	/*
 	 * Initialize HPET timer hardware state.
 	 */
 	for (i = 0; i < VHPET_NUM_TIMERS; i++) {
 		vhpet->timer[i].cap_config = allowed_irqs << 32;
 		vhpet->timer[i].cap_config |= HPET_TCAP_PER_INT;
 		vhpet->timer[i].cap_config |= HPET_TCAP_FSB_INT_DEL;
 
 		vhpet->timer[i].compval = 0xffffffff;
 		callout_init(&vhpet->timer[i].callout, 1);
 
 		arg = &vhpet->timer[i].arg;
 		arg->vhpet = vhpet;
 		arg->timer_num = i;
 	}
 
 	return (vhpet);
 }
 
 void
 vhpet_cleanup(struct vhpet *vhpet)
 {
 	int i;
 
 	for (i = 0; i < VHPET_NUM_TIMERS; i++)
 		callout_drain(&vhpet->timer[i].callout);
 
 	free(vhpet, M_VHPET);
 }
 
 int
 vhpet_getcap(struct vm_hpet_cap *cap)
 {
 
 	cap->capabilities = vhpet_capabilities();
 	return (0);
 }
Index: head/sys/amd64/vmm/io/vioapic.c
===================================================================
--- head/sys/amd64/vmm/io/vioapic.c	(revision 282286)
+++ head/sys/amd64/vmm/io/vioapic.c	(revision 282287)
@@ -1,500 +1,499 @@
 /*-
  * Copyright (c) 2013 Tycho Nightingale <tycho.nightingale@pluribusnetworks.com>
  * Copyright (c) 2013 Neel Natu <neel@freebsd.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/queue.h>
-#include <sys/cpuset.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/malloc.h>
 
 #include <x86/apicreg.h>
 #include <machine/vmm.h>
 
 #include "vmm_ktr.h"
 #include "vmm_lapic.h"
 #include "vlapic.h"
 #include "vioapic.h"
 
 #define	IOREGSEL	0x00
 #define	IOWIN		0x10
 
 #define	REDIR_ENTRIES	24
 #define	RTBL_RO_BITS	((uint64_t)(IOART_REM_IRR | IOART_DELIVS))
 
 struct vioapic {
 	struct vm	*vm;
 	struct mtx	mtx;
 	uint32_t	id;
 	uint32_t	ioregsel;
 	struct {
 		uint64_t reg;
 		int	 acnt;	/* sum of pin asserts (+1) and deasserts (-1) */
 	} rtbl[REDIR_ENTRIES];
 };
 
 #define	VIOAPIC_LOCK(vioapic)		mtx_lock_spin(&((vioapic)->mtx))
 #define	VIOAPIC_UNLOCK(vioapic)		mtx_unlock_spin(&((vioapic)->mtx))
 #define	VIOAPIC_LOCKED(vioapic)		mtx_owned(&((vioapic)->mtx))
 
 static MALLOC_DEFINE(M_VIOAPIC, "vioapic", "bhyve virtual ioapic");
 
 #define	VIOAPIC_CTR1(vioapic, fmt, a1)					\
 	VM_CTR1((vioapic)->vm, fmt, a1)
 
 #define	VIOAPIC_CTR2(vioapic, fmt, a1, a2)				\
 	VM_CTR2((vioapic)->vm, fmt, a1, a2)
 
 #define	VIOAPIC_CTR3(vioapic, fmt, a1, a2, a3)				\
 	VM_CTR3((vioapic)->vm, fmt, a1, a2, a3)
 
 #define	VIOAPIC_CTR4(vioapic, fmt, a1, a2, a3, a4)			\
 	VM_CTR4((vioapic)->vm, fmt, a1, a2, a3, a4)
 
 #ifdef KTR
 static const char *
 pinstate_str(bool asserted)
 {
 
 	if (asserted)
 		return ("asserted");
 	else
 		return ("deasserted");
 }
 #endif
 
 static void
 vioapic_send_intr(struct vioapic *vioapic, int pin)
 {
 	int vector, delmode;
 	uint32_t low, high, dest;
 	bool level, phys;
 
 	KASSERT(pin >= 0 && pin < REDIR_ENTRIES,
 	    ("vioapic_set_pinstate: invalid pin number %d", pin));
 
 	KASSERT(VIOAPIC_LOCKED(vioapic),
 	    ("vioapic_set_pinstate: vioapic is not locked"));
 
 	low = vioapic->rtbl[pin].reg;
 	high = vioapic->rtbl[pin].reg >> 32;
 
 	if ((low & IOART_INTMASK) == IOART_INTMSET) {
 		VIOAPIC_CTR1(vioapic, "ioapic pin%d: masked", pin);
 		return;
 	}
 
 	phys = ((low & IOART_DESTMOD) == IOART_DESTPHY);
 	delmode = low & IOART_DELMOD;
 	level = low & IOART_TRGRLVL ? true : false;
 	if (level)
 		vioapic->rtbl[pin].reg |= IOART_REM_IRR;
 
 	vector = low & IOART_INTVEC;
 	dest = high >> APIC_ID_SHIFT;
 	vlapic_deliver_intr(vioapic->vm, level, dest, phys, delmode, vector);
 }
 
 static void
 vioapic_set_pinstate(struct vioapic *vioapic, int pin, bool newstate)
 {
 	int oldcnt, newcnt;
 	bool needintr;
 
 	KASSERT(pin >= 0 && pin < REDIR_ENTRIES,
 	    ("vioapic_set_pinstate: invalid pin number %d", pin));
 
 	KASSERT(VIOAPIC_LOCKED(vioapic),
 	    ("vioapic_set_pinstate: vioapic is not locked"));
 
 	oldcnt = vioapic->rtbl[pin].acnt;
 	if (newstate)
 		vioapic->rtbl[pin].acnt++;
 	else
 		vioapic->rtbl[pin].acnt--;
 	newcnt = vioapic->rtbl[pin].acnt;
 
 	if (newcnt < 0) {
 		VIOAPIC_CTR2(vioapic, "ioapic pin%d: bad acnt %d",
 		    pin, newcnt);
 	}
 
 	needintr = false;
 	if (oldcnt == 0 && newcnt == 1) {
 		needintr = true;
 		VIOAPIC_CTR1(vioapic, "ioapic pin%d: asserted", pin);
 	} else if (oldcnt == 1 && newcnt == 0) {
 		VIOAPIC_CTR1(vioapic, "ioapic pin%d: deasserted", pin);
 	} else {
 		VIOAPIC_CTR3(vioapic, "ioapic pin%d: %s, ignored, acnt %d",
 		    pin, pinstate_str(newstate), newcnt);
 	}
 
 	if (needintr)
 		vioapic_send_intr(vioapic, pin);
 }
 
 enum irqstate {
 	IRQSTATE_ASSERT,
 	IRQSTATE_DEASSERT,
 	IRQSTATE_PULSE
 };
 
 static int
 vioapic_set_irqstate(struct vm *vm, int irq, enum irqstate irqstate)
 {
 	struct vioapic *vioapic;
 
 	if (irq < 0 || irq >= REDIR_ENTRIES)
 		return (EINVAL);
 
 	vioapic = vm_ioapic(vm);
 
 	VIOAPIC_LOCK(vioapic);
 	switch (irqstate) {
 	case IRQSTATE_ASSERT:
 		vioapic_set_pinstate(vioapic, irq, true);
 		break;
 	case IRQSTATE_DEASSERT:
 		vioapic_set_pinstate(vioapic, irq, false);
 		break;
 	case IRQSTATE_PULSE:
 		vioapic_set_pinstate(vioapic, irq, true);
 		vioapic_set_pinstate(vioapic, irq, false);
 		break;
 	default:
 		panic("vioapic_set_irqstate: invalid irqstate %d", irqstate);
 	}
 	VIOAPIC_UNLOCK(vioapic);
 
 	return (0);
 }
 
 int
 vioapic_assert_irq(struct vm *vm, int irq)
 {
 
 	return (vioapic_set_irqstate(vm, irq, IRQSTATE_ASSERT));
 }
 
 int
 vioapic_deassert_irq(struct vm *vm, int irq)
 {
 
 	return (vioapic_set_irqstate(vm, irq, IRQSTATE_DEASSERT));
 }
 
 int
 vioapic_pulse_irq(struct vm *vm, int irq)
 {
 
 	return (vioapic_set_irqstate(vm, irq, IRQSTATE_PULSE));
 }
 
 /*
  * Reset the vlapic's trigger-mode register to reflect the ioapic pin
  * configuration.
  */
 static void
 vioapic_update_tmr(struct vm *vm, int vcpuid, void *arg)
 {
 	struct vioapic *vioapic;
 	struct vlapic *vlapic;
 	uint32_t low, high, dest;
 	int delmode, pin, vector;
 	bool level, phys;
 
 	vlapic = vm_lapic(vm, vcpuid);
 	vioapic = vm_ioapic(vm);
 
 	VIOAPIC_LOCK(vioapic);
 	/*
 	 * Reset all vectors to be edge-triggered.
 	 */
 	vlapic_reset_tmr(vlapic);
 	for (pin = 0; pin < REDIR_ENTRIES; pin++) {
 		low = vioapic->rtbl[pin].reg;
 		high = vioapic->rtbl[pin].reg >> 32;
 
 		level = low & IOART_TRGRLVL ? true : false;
 		if (!level)
 			continue;
 
 		/*
 		 * For a level-triggered 'pin' let the vlapic figure out if
 		 * an assertion on this 'pin' would result in an interrupt
 		 * being delivered to it. If yes, then it will modify the
 		 * TMR bit associated with this vector to level-triggered.
 		 */
 		phys = ((low & IOART_DESTMOD) == IOART_DESTPHY);
 		delmode = low & IOART_DELMOD;
 		vector = low & IOART_INTVEC;
 		dest = high >> APIC_ID_SHIFT;
 		vlapic_set_tmr_level(vlapic, dest, phys, delmode, vector);
 	}
 	VIOAPIC_UNLOCK(vioapic);
 }
 
 static uint32_t
 vioapic_read(struct vioapic *vioapic, int vcpuid, uint32_t addr)
 {
 	int regnum, pin, rshift;
 
 	regnum = addr & 0xff;
 	switch (regnum) {
 	case IOAPIC_ID:
 		return (vioapic->id);
 		break;
 	case IOAPIC_VER:
 		return (((REDIR_ENTRIES - 1) << MAXREDIRSHIFT) | 0x11);
 		break;
 	case IOAPIC_ARB:
 		return (vioapic->id);
 		break;
 	default:
 		break;
 	}
 
 	/* redirection table entries */
 	if (regnum >= IOAPIC_REDTBL &&
 	    regnum < IOAPIC_REDTBL + REDIR_ENTRIES * 2) {
 		pin = (regnum - IOAPIC_REDTBL) / 2;
 		if ((regnum - IOAPIC_REDTBL) % 2)
 			rshift = 32;
 		else
 			rshift = 0;
 
 		return (vioapic->rtbl[pin].reg >> rshift);
 	}
 
 	return (0);
 }
 
 static void
 vioapic_write(struct vioapic *vioapic, int vcpuid, uint32_t addr, uint32_t data)
 {
 	uint64_t data64, mask64;
 	uint64_t last, changed;
 	int regnum, pin, lshift;
 	cpuset_t allvcpus;
 
 	regnum = addr & 0xff;
 	switch (regnum) {
 	case IOAPIC_ID:
 		vioapic->id = data & APIC_ID_MASK;
 		break;
 	case IOAPIC_VER:
 	case IOAPIC_ARB:
 		/* readonly */
 		break;
 	default:
 		break;
 	}
 
 	/* redirection table entries */
 	if (regnum >= IOAPIC_REDTBL &&
 	    regnum < IOAPIC_REDTBL + REDIR_ENTRIES * 2) {
 		pin = (regnum - IOAPIC_REDTBL) / 2;
 		if ((regnum - IOAPIC_REDTBL) % 2)
 			lshift = 32;
 		else
 			lshift = 0;
 
 		last = vioapic->rtbl[pin].reg;
 
 		data64 = (uint64_t)data << lshift;
 		mask64 = (uint64_t)0xffffffff << lshift;
 		vioapic->rtbl[pin].reg &= ~mask64 | RTBL_RO_BITS;
 		vioapic->rtbl[pin].reg |= data64 & ~RTBL_RO_BITS;
 
 		VIOAPIC_CTR2(vioapic, "ioapic pin%d: redir table entry %#lx",
 		    pin, vioapic->rtbl[pin].reg);
 
 		/*
 		 * If any fields in the redirection table entry (except mask
 		 * or polarity) have changed then rendezvous all the vcpus
 		 * to update their vlapic trigger-mode registers.
 		 */
 		changed = last ^ vioapic->rtbl[pin].reg;
 		if (changed & ~(IOART_INTMASK | IOART_INTPOL)) {
 			VIOAPIC_CTR1(vioapic, "ioapic pin%d: recalculate "
 			    "vlapic trigger-mode register", pin);
 			VIOAPIC_UNLOCK(vioapic);
 			allvcpus = vm_active_cpus(vioapic->vm);
 			vm_smp_rendezvous(vioapic->vm, vcpuid, allvcpus,
 			    vioapic_update_tmr, NULL);
 			VIOAPIC_LOCK(vioapic);
 		}
 
 		/*
 		 * Generate an interrupt if the following conditions are met:
 		 * - pin is not masked
 		 * - previous interrupt has been EOIed
 		 * - pin level is asserted
 		 */
 		if ((vioapic->rtbl[pin].reg & IOART_INTMASK) == IOART_INTMCLR &&
 		    (vioapic->rtbl[pin].reg & IOART_REM_IRR) == 0 &&
 		    (vioapic->rtbl[pin].acnt > 0)) {
 			VIOAPIC_CTR2(vioapic, "ioapic pin%d: asserted at rtbl "
 			    "write, acnt %d", pin, vioapic->rtbl[pin].acnt);
 			vioapic_send_intr(vioapic, pin);
 		}
 	}
 }
 
 static int
 vioapic_mmio_rw(struct vioapic *vioapic, int vcpuid, uint64_t gpa,
     uint64_t *data, int size, bool doread)
 {
 	uint64_t offset;
 
 	offset = gpa - VIOAPIC_BASE;
 
 	/*
 	 * The IOAPIC specification allows 32-bit wide accesses to the
 	 * IOREGSEL (offset 0) and IOWIN (offset 16) registers.
 	 */
 	if (size != 4 || (offset != IOREGSEL && offset != IOWIN)) {
 		if (doread)
 			*data = 0;
 		return (0);
 	}
 
 	VIOAPIC_LOCK(vioapic);
 	if (offset == IOREGSEL) {
 		if (doread)
 			*data = vioapic->ioregsel;
 		else
 			vioapic->ioregsel = *data;
 	} else {
 		if (doread) {
 			*data = vioapic_read(vioapic, vcpuid,
 			    vioapic->ioregsel);
 		} else {
 			vioapic_write(vioapic, vcpuid, vioapic->ioregsel,
 			    *data);
 		}
 	}
 	VIOAPIC_UNLOCK(vioapic);
 
 	return (0);
 }
 
 int
 vioapic_mmio_read(void *vm, int vcpuid, uint64_t gpa, uint64_t *rval,
     int size, void *arg)
 {
 	int error;
 	struct vioapic *vioapic;
 
 	vioapic = vm_ioapic(vm);
 	error = vioapic_mmio_rw(vioapic, vcpuid, gpa, rval, size, true);
 	return (error);
 }
 
 int
 vioapic_mmio_write(void *vm, int vcpuid, uint64_t gpa, uint64_t wval,
     int size, void *arg)
 {
 	int error;
 	struct vioapic *vioapic;
 
 	vioapic = vm_ioapic(vm);
 	error = vioapic_mmio_rw(vioapic, vcpuid, gpa, &wval, size, false);
 	return (error);
 }
 
 void
 vioapic_process_eoi(struct vm *vm, int vcpuid, int vector)
 {
 	struct vioapic *vioapic;
 	int pin;
 
 	KASSERT(vector >= 0 && vector < 256,
 	    ("vioapic_process_eoi: invalid vector %d", vector));
 
 	vioapic = vm_ioapic(vm);
 	VIOAPIC_CTR1(vioapic, "ioapic processing eoi for vector %d", vector);
 
 	/*
 	 * XXX keep track of the pins associated with this vector instead
 	 * of iterating on every single pin each time.
 	 */
 	VIOAPIC_LOCK(vioapic);
 	for (pin = 0; pin < REDIR_ENTRIES; pin++) {
 		if ((vioapic->rtbl[pin].reg & IOART_REM_IRR) == 0)
 			continue;
 		if ((vioapic->rtbl[pin].reg & IOART_INTVEC) != vector)
 			continue;
 		vioapic->rtbl[pin].reg &= ~IOART_REM_IRR;
 		if (vioapic->rtbl[pin].acnt > 0) {
 			VIOAPIC_CTR2(vioapic, "ioapic pin%d: asserted at eoi, "
 			    "acnt %d", pin, vioapic->rtbl[pin].acnt);
 			vioapic_send_intr(vioapic, pin);
 		}
 	}
 	VIOAPIC_UNLOCK(vioapic);
 }
 
 struct vioapic *
 vioapic_init(struct vm *vm)
 {
 	int i;
 	struct vioapic *vioapic;
 
 	vioapic = malloc(sizeof(struct vioapic), M_VIOAPIC, M_WAITOK | M_ZERO);
 
 	vioapic->vm = vm;
 	mtx_init(&vioapic->mtx, "vioapic lock", NULL, MTX_SPIN);
 
 	/* Initialize all redirection entries to mask all interrupts */
 	for (i = 0; i < REDIR_ENTRIES; i++)
 		vioapic->rtbl[i].reg = 0x0001000000010000UL;
 
 	return (vioapic);
 }
 
 void
 vioapic_cleanup(struct vioapic *vioapic)
 {
 
 	free(vioapic, M_VIOAPIC);
 }
 
 int
 vioapic_pincount(struct vm *vm)
 {
 
 	return (REDIR_ENTRIES);
 }
Index: head/sys/amd64/vmm/io/vpmtmr.c
===================================================================
--- head/sys/amd64/vmm/io/vpmtmr.c	(revision 282286)
+++ head/sys/amd64/vmm/io/vpmtmr.c	(revision 282287)
@@ -1,104 +1,103 @@
 /*-
  * Copyright (c) 2014, Neel Natu (neel@freebsd.org)
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice unmodified, this list of conditions, and the following
  *    disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/queue.h>
-#include <sys/cpuset.h>
 #include <sys/kernel.h>
 #include <sys/malloc.h>
 #include <sys/systm.h>
 
 #include <machine/vmm.h>
 
 #include "vpmtmr.h"
 
 /*
  * The ACPI Power Management timer is a free-running 24- or 32-bit
  * timer with a frequency of 3.579545MHz
  *
  * This implementation will be 32-bits
  */
 
 #define PMTMR_FREQ	3579545  /* 3.579545MHz */
 
 struct vpmtmr {
 	sbintime_t	freq_sbt;
 	sbintime_t	baseuptime;
 	uint32_t	baseval;
 };
 
 static MALLOC_DEFINE(M_VPMTMR, "vpmtmr", "bhyve virtual acpi timer");
 
 struct vpmtmr *
 vpmtmr_init(struct vm *vm)
 {
 	struct vpmtmr *vpmtmr;
 	struct bintime bt;
 
 	vpmtmr = malloc(sizeof(struct vpmtmr), M_VPMTMR, M_WAITOK | M_ZERO);
 	vpmtmr->baseuptime = sbinuptime();
 	vpmtmr->baseval = 0;
 
 	FREQ2BT(PMTMR_FREQ, &bt);
 	vpmtmr->freq_sbt = bttosbt(bt);
 
 	return (vpmtmr);
 }
 
 void
 vpmtmr_cleanup(struct vpmtmr *vpmtmr)
 {
 
 	free(vpmtmr, M_VPMTMR);
 }
 
 int
 vpmtmr_handler(struct vm *vm, int vcpuid, bool in, int port, int bytes,
     uint32_t *val)
 {
 	struct vpmtmr *vpmtmr;
 	sbintime_t now, delta;
 
 	if (!in || bytes != 4)
 		return (-1);
 
 	vpmtmr = vm_pmtmr(vm);
 
 	/*
 	 * No locking needed because 'baseuptime' and 'baseval' are
 	 * written only during initialization.
 	 */
 	now = sbinuptime();
 	delta = now - vpmtmr->baseuptime;
 	KASSERT(delta >= 0, ("vpmtmr_handler: uptime went backwards: "
 	    "%#lx to %#lx", vpmtmr->baseuptime, now));
 	*val = vpmtmr->baseval + delta / vpmtmr->freq_sbt;
 
 	return (0);
 }
Index: head/sys/amd64/vmm/io/vrtc.c
===================================================================
--- head/sys/amd64/vmm/io/vrtc.c	(revision 282286)
+++ head/sys/amd64/vmm/io/vrtc.c	(revision 282287)
@@ -1,1020 +1,1019 @@
 /*-
  * Copyright (c) 2014, Neel Natu (neel@freebsd.org)
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice unmodified, this list of conditions, and the following
  *    disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/queue.h>
-#include <sys/cpuset.h>
 #include <sys/kernel.h>
 #include <sys/malloc.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/clock.h>
 #include <sys/sysctl.h>
 
 #include <machine/vmm.h>
 
 #include <isa/rtc.h>
 
 #include "vmm_ktr.h"
 #include "vatpic.h"
 #include "vioapic.h"
 #include "vrtc.h"
 
 /* Register layout of the RTC */
 struct rtcdev {
 	uint8_t	sec;
 	uint8_t	alarm_sec;
 	uint8_t	min;
 	uint8_t	alarm_min;
 	uint8_t	hour;
 	uint8_t	alarm_hour;
 	uint8_t	day_of_week;
 	uint8_t	day_of_month;
 	uint8_t	month;
 	uint8_t	year;
 	uint8_t	reg_a;
 	uint8_t	reg_b;
 	uint8_t	reg_c;
 	uint8_t	reg_d;
 	uint8_t	nvram[36];
 	uint8_t	century;
 	uint8_t	nvram2[128 - 51];
 } __packed;
 CTASSERT(sizeof(struct rtcdev) == 128);
 CTASSERT(offsetof(struct rtcdev, century) == RTC_CENTURY);
 
 struct vrtc {
 	struct vm	*vm;
 	struct mtx	mtx;
 	struct callout	callout;
 	u_int		addr;		/* RTC register to read or write */
 	sbintime_t	base_uptime;
 	time_t		base_rtctime;
 	struct rtcdev	rtcdev;
 };
 
 #define	VRTC_LOCK(vrtc)		mtx_lock(&((vrtc)->mtx))
 #define	VRTC_UNLOCK(vrtc)	mtx_unlock(&((vrtc)->mtx))
 #define	VRTC_LOCKED(vrtc)	mtx_owned(&((vrtc)->mtx))
 
 /*
  * RTC time is considered "broken" if:
  * - RTC updates are halted by the guest
  * - RTC date/time fields have invalid values
  */
 #define	VRTC_BROKEN_TIME	((time_t)-1)
 
 #define	RTC_IRQ			8
 #define	RTCSB_BIN		0x04
 #define	RTCSB_ALL_INTRS		(RTCSB_UINTR | RTCSB_AINTR | RTCSB_PINTR)
 #define	rtc_halted(vrtc)	((vrtc->rtcdev.reg_b & RTCSB_HALT) != 0)
 #define	aintr_enabled(vrtc)	(((vrtc)->rtcdev.reg_b & RTCSB_AINTR) != 0)
 #define	pintr_enabled(vrtc)	(((vrtc)->rtcdev.reg_b & RTCSB_PINTR) != 0)
 #define	uintr_enabled(vrtc)	(((vrtc)->rtcdev.reg_b & RTCSB_UINTR) != 0)
 
 static void vrtc_callout_handler(void *arg);
 static void vrtc_set_reg_c(struct vrtc *vrtc, uint8_t newval);
 
 static MALLOC_DEFINE(M_VRTC, "vrtc", "bhyve virtual rtc");
 
 SYSCTL_DECL(_hw_vmm);
 SYSCTL_NODE(_hw_vmm, OID_AUTO, vrtc, CTLFLAG_RW, NULL, NULL);
 
 static int rtc_flag_broken_time = 1;
 SYSCTL_INT(_hw_vmm_vrtc, OID_AUTO, flag_broken_time, CTLFLAG_RDTUN,
     &rtc_flag_broken_time, 0, "Stop guest when invalid RTC time is detected");
 
 static __inline bool
 divider_enabled(int reg_a)
 {
 	/*
 	 * The RTC is counting only when dividers are not held in reset.
 	 */
 	return ((reg_a & 0x70) == 0x20);
 }
 
 static __inline bool
 update_enabled(struct vrtc *vrtc)
 {
 	/*
 	 * RTC date/time can be updated only if:
 	 * - divider is not held in reset
 	 * - guest has not disabled updates
 	 * - the date/time fields have valid contents
 	 */
 	if (!divider_enabled(vrtc->rtcdev.reg_a))
 		return (false);
 
 	if (rtc_halted(vrtc))
 		return (false);
 
 	if (vrtc->base_rtctime == VRTC_BROKEN_TIME)
 		return (false);
 
 	return (true);
 }
 
 static time_t
 vrtc_curtime(struct vrtc *vrtc, sbintime_t *basetime)
 {
 	sbintime_t now, delta;
 	time_t t, secs;
 
 	KASSERT(VRTC_LOCKED(vrtc), ("%s: vrtc not locked", __func__));
 
 	t = vrtc->base_rtctime;
 	*basetime = vrtc->base_uptime;
 	if (update_enabled(vrtc)) {
 		now = sbinuptime();
 		delta = now - vrtc->base_uptime;
 		KASSERT(delta >= 0, ("vrtc_curtime: uptime went backwards: "
 		    "%#lx to %#lx", vrtc->base_uptime, now));
 		secs = delta / SBT_1S;
 		t += secs;
 		*basetime += secs * SBT_1S;
 	}
 	return (t);
 }
 
 static __inline uint8_t
 rtcset(struct rtcdev *rtc, int val)
 {
 
 	KASSERT(val >= 0 && val < 100, ("%s: invalid bin2bcd index %d",
 	    __func__, val));
 
 	return ((rtc->reg_b & RTCSB_BIN) ? val : bin2bcd_data[val]);
 }
 
 static void
 secs_to_rtc(time_t rtctime, struct vrtc *vrtc, int force_update)
 {
 	struct clocktime ct;
 	struct timespec ts;
 	struct rtcdev *rtc;
 	int hour;
 
 	KASSERT(VRTC_LOCKED(vrtc), ("%s: vrtc not locked", __func__));
 
 	if (rtctime < 0) {
 		KASSERT(rtctime == VRTC_BROKEN_TIME,
 		    ("%s: invalid vrtc time %#lx", __func__, rtctime));
 		return;
 	}
 
 	/*
 	 * If the RTC is halted then the guest has "ownership" of the
 	 * date/time fields. Don't update the RTC date/time fields in
 	 * this case (unless forced).
 	 */
 	if (rtc_halted(vrtc) && !force_update)
 		return;
 
 	ts.tv_sec = rtctime;
 	ts.tv_nsec = 0;
 	clock_ts_to_ct(&ts, &ct);
 
 	KASSERT(ct.sec >= 0 && ct.sec <= 59, ("invalid clocktime sec %d",
 	    ct.sec));
 	KASSERT(ct.min >= 0 && ct.min <= 59, ("invalid clocktime min %d",
 	    ct.min));
 	KASSERT(ct.hour >= 0 && ct.hour <= 23, ("invalid clocktime hour %d",
 	    ct.hour));
 	KASSERT(ct.dow >= 0 && ct.dow <= 6, ("invalid clocktime wday %d",
 	    ct.dow));
 	KASSERT(ct.day >= 1 && ct.day <= 31, ("invalid clocktime mday %d",
 	    ct.day));
 	KASSERT(ct.mon >= 1 && ct.mon <= 12, ("invalid clocktime month %d",
 	    ct.mon));
 	KASSERT(ct.year >= POSIX_BASE_YEAR, ("invalid clocktime year %d",
 	    ct.year));
 
 	rtc = &vrtc->rtcdev;
 	rtc->sec = rtcset(rtc, ct.sec);
 	rtc->min = rtcset(rtc, ct.min);
 
 	if (rtc->reg_b & RTCSB_24HR) {
 		hour = ct.hour;
 	} else {
 		/*
 		 * Convert to the 12-hour format.
 		 */
 		switch (ct.hour) {
 		case 0:			/* 12 AM */
 		case 12:		/* 12 PM */
 			hour = 12;
 			break;
 		default:
 			/*
 			 * The remaining 'ct.hour' values are interpreted as:
 			 * [1  - 11] ->  1 - 11 AM
 			 * [13 - 23] ->  1 - 11 PM
 			 */
 			hour = ct.hour % 12;
 			break;
 		}
 	}
 
 	rtc->hour = rtcset(rtc, hour);
 
 	if ((rtc->reg_b & RTCSB_24HR) == 0 && ct.hour >= 12)
 		rtc->hour |= 0x80;	    /* set MSB to indicate PM */
 
 	rtc->day_of_week = rtcset(rtc, ct.dow + 1);
 	rtc->day_of_month = rtcset(rtc, ct.day);
 	rtc->month = rtcset(rtc, ct.mon);
 	rtc->year = rtcset(rtc, ct.year % 100);
 	rtc->century = rtcset(rtc, ct.year / 100);
 }
 
 static int
 rtcget(struct rtcdev *rtc, int val, int *retval)
 {
 	uint8_t upper, lower;
 
 	if (rtc->reg_b & RTCSB_BIN) {
 		*retval = val;
 		return (0);
 	}
 
 	lower = val & 0xf;
 	upper = (val >> 4) & 0xf;
 
 	if (lower > 9 || upper > 9)
 		return (-1);
 
 	*retval = upper * 10 + lower;
 	return (0);
 }
 
 static time_t
 rtc_to_secs(struct vrtc *vrtc)
 {
 	struct clocktime ct;
 	struct timespec ts;
 	struct rtcdev *rtc;
 	struct vm *vm;
 	int century, error, hour, pm, year;
 
 	KASSERT(VRTC_LOCKED(vrtc), ("%s: vrtc not locked", __func__));
 
 	vm = vrtc->vm;
 	rtc = &vrtc->rtcdev;
 
 	bzero(&ct, sizeof(struct clocktime));
 
 	error = rtcget(rtc, rtc->sec, &ct.sec);
 	if (error || ct.sec < 0 || ct.sec > 59) {
 		VM_CTR2(vm, "Invalid RTC sec %#x/%d", rtc->sec, ct.sec);
 		goto fail;
 	}
 
 	error = rtcget(rtc, rtc->min, &ct.min);
 	if (error || ct.min < 0 || ct.min > 59) {
 		VM_CTR2(vm, "Invalid RTC min %#x/%d", rtc->min, ct.min);
 		goto fail;
 	}
 
 	pm = 0;
 	hour = rtc->hour;
 	if ((rtc->reg_b & RTCSB_24HR) == 0) {
 		if (hour & 0x80) {
 			hour &= ~0x80;
 			pm = 1;
 		}
 	}
 	error = rtcget(rtc, hour, &ct.hour);
 	if ((rtc->reg_b & RTCSB_24HR) == 0) {
 		if (ct.hour >= 1 && ct.hour <= 12) {
 			/*
 			 * Convert from 12-hour format to internal 24-hour
 			 * representation as follows:
 			 *
 			 *    12-hour format		ct.hour
 			 *	12	AM		0
 			 *	1 - 11	AM		1 - 11
 			 *	12	PM		12
 			 *	1 - 11	PM		13 - 23
 			 */
 			if (ct.hour == 12)
 				ct.hour = 0;
 			if (pm)
 				ct.hour += 12;
 		} else {
 			VM_CTR2(vm, "Invalid RTC 12-hour format %#x/%d",
 			    rtc->hour, ct.hour);
 			goto fail;
 		}
 	}
 
 	if (error || ct.hour < 0 || ct.hour > 23) {
 		VM_CTR2(vm, "Invalid RTC hour %#x/%d", rtc->hour, ct.hour);
 		goto fail;
 	}
 
 	/*
 	 * Ignore 'rtc->dow' because some guests like Linux don't bother
 	 * setting it at all while others like OpenBSD/i386 set it incorrectly. 
 	 *
 	 * clock_ct_to_ts() does not depend on 'ct.dow' anyways so ignore it.
 	 */
 	ct.dow = -1;
 
 	error = rtcget(rtc, rtc->day_of_month, &ct.day);
 	if (error || ct.day < 1 || ct.day > 31) {
 		VM_CTR2(vm, "Invalid RTC mday %#x/%d", rtc->day_of_month,
 		    ct.day);
 		goto fail;
 	}
 
 	error = rtcget(rtc, rtc->month, &ct.mon);
 	if (error || ct.mon < 1 || ct.mon > 12) {
 		VM_CTR2(vm, "Invalid RTC month %#x/%d", rtc->month, ct.mon);
 		goto fail;
 	}
 
 	error = rtcget(rtc, rtc->year, &year);
 	if (error || year < 0 || year > 99) {
 		VM_CTR2(vm, "Invalid RTC year %#x/%d", rtc->year, year);
 		goto fail;
 	}
 
 	error = rtcget(rtc, rtc->century, &century);
 	ct.year = century * 100 + year;
 	if (error || ct.year < POSIX_BASE_YEAR) {
 		VM_CTR2(vm, "Invalid RTC century %#x/%d", rtc->century,
 		    ct.year);
 		goto fail;
 	}
 
 	error = clock_ct_to_ts(&ct, &ts);
 	if (error || ts.tv_sec < 0) {
 		VM_CTR3(vm, "Invalid RTC clocktime.date %04d-%02d-%02d",
 		    ct.year, ct.mon, ct.day);
 		VM_CTR3(vm, "Invalid RTC clocktime.time %02d:%02d:%02d",
 		    ct.hour, ct.min, ct.sec);
 		goto fail;
 	}
 	return (ts.tv_sec);		/* success */
 fail:
 	/*
 	 * Stop updating the RTC if the date/time fields programmed by
 	 * the guest are invalid.
 	 */
 	VM_CTR0(vrtc->vm, "Invalid RTC date/time programming detected");
 	return (VRTC_BROKEN_TIME);
 }
 
 static int
 vrtc_time_update(struct vrtc *vrtc, time_t newtime, sbintime_t newbase)
 {
 	struct rtcdev *rtc;
 	sbintime_t oldbase;
 	time_t oldtime;
 	uint8_t alarm_sec, alarm_min, alarm_hour;
 
 	KASSERT(VRTC_LOCKED(vrtc), ("%s: vrtc not locked", __func__));
 
 	rtc = &vrtc->rtcdev;
 	alarm_sec = rtc->alarm_sec;
 	alarm_min = rtc->alarm_min;
 	alarm_hour = rtc->alarm_hour;
 
 	oldtime = vrtc->base_rtctime;
 	VM_CTR2(vrtc->vm, "Updating RTC secs from %#lx to %#lx",
 	    oldtime, newtime);
 
 	oldbase = vrtc->base_uptime;
 	VM_CTR2(vrtc->vm, "Updating RTC base uptime from %#lx to %#lx",
 	    oldbase, newbase);
 	vrtc->base_uptime = newbase;
 
 	if (newtime == oldtime)
 		return (0);
 
 	/*
 	 * If 'newtime' indicates that RTC updates are disabled then just
 	 * record that and return. There is no need to do alarm interrupt
 	 * processing in this case.
 	 */
 	if (newtime == VRTC_BROKEN_TIME) {
 		vrtc->base_rtctime = VRTC_BROKEN_TIME;
 		return (0);
 	}
 
 	/*
 	 * Return an error if RTC updates are halted by the guest.
 	 */
 	if (rtc_halted(vrtc)) {
 		VM_CTR0(vrtc->vm, "RTC update halted by guest");
 		return (EBUSY);
 	}
 
 	do {
 		/*
 		 * If the alarm interrupt is enabled and 'oldtime' is valid
 		 * then visit all the seconds between 'oldtime' and 'newtime'
 		 * to check for the alarm condition.
 		 *
 		 * Otherwise move the RTC time forward directly to 'newtime'.
 		 */
 		if (aintr_enabled(vrtc) && oldtime != VRTC_BROKEN_TIME)
 			vrtc->base_rtctime++;
 		else
 			vrtc->base_rtctime = newtime;
 
 		if (aintr_enabled(vrtc)) {
 			/*
 			 * Update the RTC date/time fields before checking
 			 * if the alarm conditions are satisfied.
 			 */
 			secs_to_rtc(vrtc->base_rtctime, vrtc, 0);
 
 			if ((alarm_sec >= 0xC0 || alarm_sec == rtc->sec) &&
 			    (alarm_min >= 0xC0 || alarm_min == rtc->min) &&
 			    (alarm_hour >= 0xC0 || alarm_hour == rtc->hour)) {
 				vrtc_set_reg_c(vrtc, rtc->reg_c | RTCIR_ALARM);
 			}
 		}
 	} while (vrtc->base_rtctime != newtime);
 
 	if (uintr_enabled(vrtc))
 		vrtc_set_reg_c(vrtc, rtc->reg_c | RTCIR_UPDATE);
 
 	return (0);
 }
 
 static sbintime_t
 vrtc_freq(struct vrtc *vrtc)
 {
 	int ratesel;
 
 	static sbintime_t pf[16] = {
 		0,
 		SBT_1S / 256,
 		SBT_1S / 128,
 		SBT_1S / 8192,
 		SBT_1S / 4096,
 		SBT_1S / 2048,
 		SBT_1S / 1024,
 		SBT_1S / 512,
 		SBT_1S / 256,
 		SBT_1S / 128,
 		SBT_1S / 64,
 		SBT_1S / 32,
 		SBT_1S / 16,
 		SBT_1S / 8,
 		SBT_1S / 4,
 		SBT_1S / 2,
 	};
 
 	KASSERT(VRTC_LOCKED(vrtc), ("%s: vrtc not locked", __func__));
 
 	/*
 	 * If both periodic and alarm interrupts are enabled then use the
 	 * periodic frequency to drive the callout. The minimum periodic
 	 * frequency (2 Hz) is higher than the alarm frequency (1 Hz) so
 	 * piggyback the alarm on top of it. The same argument applies to
 	 * the update interrupt.
 	 */
 	if (pintr_enabled(vrtc) && divider_enabled(vrtc->rtcdev.reg_a)) {
 		ratesel = vrtc->rtcdev.reg_a & 0xf;
 		return (pf[ratesel]);
 	} else if (aintr_enabled(vrtc) && update_enabled(vrtc)) {
 		return (SBT_1S);
 	} else if (uintr_enabled(vrtc) && update_enabled(vrtc)) {
 		return (SBT_1S);
 	} else {
 		return (0);
 	}
 }
 
 static void
 vrtc_callout_reset(struct vrtc *vrtc, sbintime_t freqsbt)
 {
 
 	KASSERT(VRTC_LOCKED(vrtc), ("%s: vrtc not locked", __func__));
 
 	if (freqsbt == 0) {
 		if (callout_active(&vrtc->callout)) {
 			VM_CTR0(vrtc->vm, "RTC callout stopped");
 			callout_stop(&vrtc->callout);
 		}
 		return;
 	}
 	VM_CTR1(vrtc->vm, "RTC callout frequency %d hz", SBT_1S / freqsbt);
 	callout_reset_sbt(&vrtc->callout, freqsbt, 0, vrtc_callout_handler,
 	    vrtc, 0);
 }
 
 static void
 vrtc_callout_handler(void *arg)
 {
 	struct vrtc *vrtc = arg;
 	sbintime_t freqsbt, basetime;
 	time_t rtctime;
 	int error;
 
 	VM_CTR0(vrtc->vm, "vrtc callout fired");
 
 	VRTC_LOCK(vrtc);
 	if (callout_pending(&vrtc->callout))	/* callout was reset */
 		goto done;
 
 	if (!callout_active(&vrtc->callout))	/* callout was stopped */
 		goto done;
 
 	callout_deactivate(&vrtc->callout);
 
 	KASSERT((vrtc->rtcdev.reg_b & RTCSB_ALL_INTRS) != 0,
 	    ("gratuitous vrtc callout"));
 
 	if (pintr_enabled(vrtc))
 		vrtc_set_reg_c(vrtc, vrtc->rtcdev.reg_c | RTCIR_PERIOD);
 
 	if (aintr_enabled(vrtc) || uintr_enabled(vrtc)) {
 		rtctime = vrtc_curtime(vrtc, &basetime);
 		error = vrtc_time_update(vrtc, rtctime, basetime);
 		KASSERT(error == 0, ("%s: vrtc_time_update error %d",
 		    __func__, error));
 	}
 
 	freqsbt = vrtc_freq(vrtc);
 	KASSERT(freqsbt != 0, ("%s: vrtc frequency cannot be zero", __func__));
 	vrtc_callout_reset(vrtc, freqsbt);
 done:
 	VRTC_UNLOCK(vrtc);
 }
 
 static __inline void
 vrtc_callout_check(struct vrtc *vrtc, sbintime_t freq)
 {
 	int active;
 
 	active = callout_active(&vrtc->callout) ? 1 : 0;
 	KASSERT((freq == 0 && !active) || (freq != 0 && active),
 	    ("vrtc callout %s with frequency %#lx",
 	    active ? "active" : "inactive", freq));
 }
 
 static void
 vrtc_set_reg_c(struct vrtc *vrtc, uint8_t newval)
 {
 	struct rtcdev *rtc;
 	int oldirqf, newirqf;
 	uint8_t oldval, changed;
 
 	KASSERT(VRTC_LOCKED(vrtc), ("%s: vrtc not locked", __func__));
 
 	rtc = &vrtc->rtcdev;
 	newval &= RTCIR_ALARM | RTCIR_PERIOD | RTCIR_UPDATE;
 
 	oldirqf = rtc->reg_c & RTCIR_INT;
 	if ((aintr_enabled(vrtc) && (newval & RTCIR_ALARM) != 0) ||
 	    (pintr_enabled(vrtc) && (newval & RTCIR_PERIOD) != 0) ||
 	    (uintr_enabled(vrtc) && (newval & RTCIR_UPDATE) != 0)) {
 		newirqf = RTCIR_INT;
 	} else {
 		newirqf = 0;
 	}
 
 	oldval = rtc->reg_c;
 	rtc->reg_c = newirqf | newval;
 	changed = oldval ^ rtc->reg_c;
 	if (changed) {
 		VM_CTR2(vrtc->vm, "RTC reg_c changed from %#x to %#x",
 		    oldval, rtc->reg_c);
 	}
 
 	if (!oldirqf && newirqf) {
 		VM_CTR1(vrtc->vm, "RTC irq %d asserted", RTC_IRQ);
 		vatpic_pulse_irq(vrtc->vm, RTC_IRQ);
 		vioapic_pulse_irq(vrtc->vm, RTC_IRQ);
 	} else if (oldirqf && !newirqf) {
 		VM_CTR1(vrtc->vm, "RTC irq %d deasserted", RTC_IRQ);
 	}
 }
 
 static int
 vrtc_set_reg_b(struct vrtc *vrtc, uint8_t newval)
 {
 	struct rtcdev *rtc;
 	sbintime_t oldfreq, newfreq, basetime;
 	time_t curtime, rtctime;
 	int error;
 	uint8_t oldval, changed;
 
 	KASSERT(VRTC_LOCKED(vrtc), ("%s: vrtc not locked", __func__));
 
 	rtc = &vrtc->rtcdev;
 	oldval = rtc->reg_b;
 	oldfreq = vrtc_freq(vrtc);
 
 	rtc->reg_b = newval;
 	changed = oldval ^ newval;
 	if (changed) {
 		VM_CTR2(vrtc->vm, "RTC reg_b changed from %#x to %#x",
 		    oldval, newval);
 	}
 
 	if (changed & RTCSB_HALT) {
 		if ((newval & RTCSB_HALT) == 0) {
 			rtctime = rtc_to_secs(vrtc);
 			basetime = sbinuptime();
 			if (rtctime == VRTC_BROKEN_TIME) {
 				if (rtc_flag_broken_time)
 					return (-1);
 			}
 		} else {
 			curtime = vrtc_curtime(vrtc, &basetime);
 			KASSERT(curtime == vrtc->base_rtctime, ("%s: mismatch "
 			    "between vrtc basetime (%#lx) and curtime (%#lx)",
 			    __func__, vrtc->base_rtctime, curtime));
 
 			/*
 			 * Force a refresh of the RTC date/time fields so
 			 * they reflect the time right before the guest set
 			 * the HALT bit.
 			 */
 			secs_to_rtc(curtime, vrtc, 1);
 
 			/*
 			 * Updates are halted so mark 'base_rtctime' to denote
 			 * that the RTC date/time is in flux.
 			 */
 			rtctime = VRTC_BROKEN_TIME;
 			rtc->reg_b &= ~RTCSB_UINTR;
 		}
 		error = vrtc_time_update(vrtc, rtctime, basetime);
 		KASSERT(error == 0, ("vrtc_time_update error %d", error));
 	}
 
 	/*
 	 * Side effect of changes to the interrupt enable bits.
 	 */
 	if (changed & RTCSB_ALL_INTRS)
 		vrtc_set_reg_c(vrtc, vrtc->rtcdev.reg_c);
 
 	/*
 	 * Change the callout frequency if it has changed.
 	 */
 	newfreq = vrtc_freq(vrtc);
 	if (newfreq != oldfreq)
 		vrtc_callout_reset(vrtc, newfreq);
 	else
 		vrtc_callout_check(vrtc, newfreq);
 
 	/*
 	 * The side effect of bits that control the RTC date/time format
 	 * is handled lazily when those fields are actually read.
 	 */
 	return (0);
 }
 
 static void
 vrtc_set_reg_a(struct vrtc *vrtc, uint8_t newval)
 {
 	sbintime_t oldfreq, newfreq;
 	uint8_t oldval, changed;
 
 	KASSERT(VRTC_LOCKED(vrtc), ("%s: vrtc not locked", __func__));
 
 	newval &= ~RTCSA_TUP;
 	oldval = vrtc->rtcdev.reg_a;
 	oldfreq = vrtc_freq(vrtc);
 
 	if (divider_enabled(oldval) && !divider_enabled(newval)) {
 		VM_CTR2(vrtc->vm, "RTC divider held in reset at %#lx/%#lx",
 		    vrtc->base_rtctime, vrtc->base_uptime);
 	} else if (!divider_enabled(oldval) && divider_enabled(newval)) {
 		/*
 		 * If the dividers are coming out of reset then update
 		 * 'base_uptime' before this happens. This is done to
 		 * maintain the illusion that the RTC date/time was frozen
 		 * while the dividers were disabled.
 		 */
 		vrtc->base_uptime = sbinuptime();
 		VM_CTR2(vrtc->vm, "RTC divider out of reset at %#lx/%#lx",
 		    vrtc->base_rtctime, vrtc->base_uptime);
 	} else {
 		/* NOTHING */
 	}
 
 	vrtc->rtcdev.reg_a = newval;
 	changed = oldval ^ newval;
 	if (changed) {
 		VM_CTR2(vrtc->vm, "RTC reg_a changed from %#x to %#x",
 		    oldval, newval);
 	}
 
 	/*
 	 * Side effect of changes to rate select and divider enable bits.
 	 */
 	newfreq = vrtc_freq(vrtc);
 	if (newfreq != oldfreq)
 		vrtc_callout_reset(vrtc, newfreq);
 	else
 		vrtc_callout_check(vrtc, newfreq);
 }
 
 int
 vrtc_set_time(struct vm *vm, time_t secs)
 {
 	struct vrtc *vrtc;
 	int error;
 
 	vrtc = vm_rtc(vm);
 	VRTC_LOCK(vrtc);
 	error = vrtc_time_update(vrtc, secs, sbinuptime());
 	VRTC_UNLOCK(vrtc);
 
 	if (error) {
 		VM_CTR2(vrtc->vm, "Error %d setting RTC time to %#lx", error,
 		    secs);
 	} else {
 		VM_CTR1(vrtc->vm, "RTC time set to %#lx", secs);
 	}
 
 	return (error);
 }
 
 time_t
 vrtc_get_time(struct vm *vm)
 {
 	struct vrtc *vrtc;
 	sbintime_t basetime;
 	time_t t;
 
 	vrtc = vm_rtc(vm);
 	VRTC_LOCK(vrtc);
 	t = vrtc_curtime(vrtc, &basetime);
 	VRTC_UNLOCK(vrtc);
 
 	return (t);
 }
 
 int
 vrtc_nvram_write(struct vm *vm, int offset, uint8_t value)
 {
 	struct vrtc *vrtc;
 	uint8_t *ptr;
 
 	vrtc = vm_rtc(vm);
 
 	/*
 	 * Don't allow writes to RTC control registers or the date/time fields.
 	 */
 	if (offset < offsetof(struct rtcdev, nvram[0]) ||
 	    offset == RTC_CENTURY || offset >= sizeof(struct rtcdev)) {
 		VM_CTR1(vrtc->vm, "RTC nvram write to invalid offset %d",
 		    offset);
 		return (EINVAL);
 	}
 
 	VRTC_LOCK(vrtc);
 	ptr = (uint8_t *)(&vrtc->rtcdev);
 	ptr[offset] = value;
 	VM_CTR2(vrtc->vm, "RTC nvram write %#x to offset %#x", value, offset);
 	VRTC_UNLOCK(vrtc);
 
 	return (0);
 }
 
 int
 vrtc_nvram_read(struct vm *vm, int offset, uint8_t *retval)
 {
 	struct vrtc *vrtc;
 	sbintime_t basetime;
 	time_t curtime;
 	uint8_t *ptr;
 
 	/*
 	 * Allow all offsets in the RTC to be read.
 	 */
 	if (offset < 0 || offset >= sizeof(struct rtcdev))
 		return (EINVAL);
 
 	vrtc = vm_rtc(vm);
 	VRTC_LOCK(vrtc);
 
 	/*
 	 * Update RTC date/time fields if necessary.
 	 */
 	if (offset < 10 || offset == RTC_CENTURY) {
 		curtime = vrtc_curtime(vrtc, &basetime);
 		secs_to_rtc(curtime, vrtc, 0);
 	}
 
 	ptr = (uint8_t *)(&vrtc->rtcdev);
 	*retval = ptr[offset];
 
 	VRTC_UNLOCK(vrtc);
 	return (0);
 }
 
 int
 vrtc_addr_handler(struct vm *vm, int vcpuid, bool in, int port, int bytes,
     uint32_t *val)
 {
 	struct vrtc *vrtc;
 
 	vrtc = vm_rtc(vm);
 
 	if (bytes != 1)
 		return (-1);
 
 	if (in) {
 		*val = 0xff;
 		return (0);
 	}
 
 	VRTC_LOCK(vrtc);
 	vrtc->addr = *val & 0x7f;
 	VRTC_UNLOCK(vrtc);
 
 	return (0);
 }
 
 int
 vrtc_data_handler(struct vm *vm, int vcpuid, bool in, int port, int bytes,
     uint32_t *val)
 {
 	struct vrtc *vrtc;
 	struct rtcdev *rtc;
 	sbintime_t basetime;
 	time_t curtime;
 	int error, offset;
 
 	vrtc = vm_rtc(vm);
 	rtc = &vrtc->rtcdev;
 
 	if (bytes != 1)
 		return (-1);
 
 	VRTC_LOCK(vrtc);
 	offset = vrtc->addr;
 	if (offset >= sizeof(struct rtcdev)) {
 		VRTC_UNLOCK(vrtc);
 		return (-1);
 	}
 
 	error = 0;
 	curtime = vrtc_curtime(vrtc, &basetime);
 	vrtc_time_update(vrtc, curtime, basetime);
 
 	/*
 	 * Update RTC date/time fields if necessary.
 	 *
 	 * This is not just for reads of the RTC. The side-effect of writing
 	 * the century byte requires other RTC date/time fields (e.g. sec)
 	 * to be updated here.
 	 */
 	if (offset < 10 || offset == RTC_CENTURY)
 		secs_to_rtc(curtime, vrtc, 0);
 
 	if (in) {
 		if (offset == 12) {
 			/*
 			 * XXX
 			 * reg_c interrupt flags are updated only if the
 			 * corresponding interrupt enable bit in reg_b is set.
 			 */
 			*val = vrtc->rtcdev.reg_c;
 			vrtc_set_reg_c(vrtc, 0);
 		} else {
 			*val = *((uint8_t *)rtc + offset);
 		}
 		VCPU_CTR2(vm, vcpuid, "Read value %#x from RTC offset %#x",
 		    *val, offset);
 	} else {
 		switch (offset) {
 		case 10:
 			VCPU_CTR1(vm, vcpuid, "RTC reg_a set to %#x", *val);
 			vrtc_set_reg_a(vrtc, *val);
 			break;
 		case 11:
 			VCPU_CTR1(vm, vcpuid, "RTC reg_b set to %#x", *val);
 			error = vrtc_set_reg_b(vrtc, *val);
 			break;
 		case 12:
 			VCPU_CTR1(vm, vcpuid, "RTC reg_c set to %#x (ignored)",
 			    *val);
 			break;
 		case 13:
 			VCPU_CTR1(vm, vcpuid, "RTC reg_d set to %#x (ignored)",
 			    *val);
 			break;
 		case 0:
 			/*
 			 * High order bit of 'seconds' is readonly.
 			 */
 			*val &= 0x7f;
 			/* FALLTHRU */
 		default:
 			VCPU_CTR2(vm, vcpuid, "RTC offset %#x set to %#x",
 			    offset, *val);
 			*((uint8_t *)rtc + offset) = *val;
 			break;
 		}
 
 		/*
 		 * XXX some guests (e.g. OpenBSD) write the century byte
 		 * outside of RTCSB_HALT so re-calculate the RTC date/time.
 		 */
 		if (offset == RTC_CENTURY && !rtc_halted(vrtc)) {
 			curtime = rtc_to_secs(vrtc);
 			error = vrtc_time_update(vrtc, curtime, sbinuptime());
 			KASSERT(!error, ("vrtc_time_update error %d", error));
 			if (curtime == VRTC_BROKEN_TIME && rtc_flag_broken_time)
 				error = -1;
 		}
 	}
 	VRTC_UNLOCK(vrtc);
 	return (error);
 }
 
 void
 vrtc_reset(struct vrtc *vrtc)
 {
 	struct rtcdev *rtc;
 
 	VRTC_LOCK(vrtc);
 
 	rtc = &vrtc->rtcdev;
 	vrtc_set_reg_b(vrtc, rtc->reg_b & ~(RTCSB_ALL_INTRS | RTCSB_SQWE));
 	vrtc_set_reg_c(vrtc, 0);
 	KASSERT(!callout_active(&vrtc->callout), ("rtc callout still active"));
 
 	VRTC_UNLOCK(vrtc);
 }
 
 struct vrtc *
 vrtc_init(struct vm *vm)
 {
 	struct vrtc *vrtc;
 	struct rtcdev *rtc;
 	time_t curtime;
 
 	vrtc = malloc(sizeof(struct vrtc), M_VRTC, M_WAITOK | M_ZERO);
 	vrtc->vm = vm;
 	mtx_init(&vrtc->mtx, "vrtc lock", NULL, MTX_DEF);
 	callout_init(&vrtc->callout, 1);
 
 	/* Allow dividers to keep time but disable everything else */
 	rtc = &vrtc->rtcdev;
 	rtc->reg_a = 0x20;
 	rtc->reg_b = RTCSB_24HR;
 	rtc->reg_c = 0;
 	rtc->reg_d = RTCSD_PWR;
 
 	/* Reset the index register to a safe value. */
 	vrtc->addr = RTC_STATUSD;
 
 	/*
 	 * Initialize RTC time to 00:00:00 Jan 1, 1970.
 	 */
 	curtime = 0;
 
 	VRTC_LOCK(vrtc);
 	vrtc->base_rtctime = VRTC_BROKEN_TIME;
 	vrtc_time_update(vrtc, curtime, sbinuptime());
 	secs_to_rtc(curtime, vrtc, 0);
 	VRTC_UNLOCK(vrtc);
 
 	return (vrtc);
 }
 
 void
 vrtc_cleanup(struct vrtc *vrtc)
 {
 
 	callout_drain(&vrtc->callout);
 	free(vrtc, M_VRTC);
 }
Index: head/sys/amd64/vmm/vmm_ioport.c
===================================================================
--- head/sys/amd64/vmm/vmm_ioport.c	(revision 282286)
+++ head/sys/amd64/vmm/vmm_ioport.c	(revision 282287)
@@ -1,182 +1,176 @@
 /*-
  * Copyright (c) 2014 Tycho Nightingale <tycho.nightingale@pluribusnetworks.com>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
-#include <sys/types.h>
-#include <sys/queue.h>
-#include <sys/cpuset.h>
 #include <sys/systm.h>
 
-#include <vm/vm.h>
-
 #include <machine/vmm.h>
 #include <machine/vmm_instruction_emul.h>
-#include <x86/psl.h>
 
 #include "vatpic.h"
 #include "vatpit.h"
 #include "vpmtmr.h"
 #include "vrtc.h"
 #include "vmm_ioport.h"
 #include "vmm_ktr.h"
 
 #define	MAX_IOPORTS		1280
 
 ioport_handler_func_t ioport_handler[MAX_IOPORTS] = {
 	[TIMER_MODE] = vatpit_handler,
 	[TIMER_CNTR0] = vatpit_handler,
 	[TIMER_CNTR1] = vatpit_handler,
 	[TIMER_CNTR2] = vatpit_handler,
 	[NMISC_PORT] = vatpit_nmisc_handler,
 	[IO_ICU1] = vatpic_master_handler,
 	[IO_ICU1 + ICU_IMR_OFFSET] = vatpic_master_handler,
 	[IO_ICU2] = vatpic_slave_handler,
 	[IO_ICU2 + ICU_IMR_OFFSET] = vatpic_slave_handler,
 	[IO_ELCR1] = vatpic_elc_handler,
 	[IO_ELCR2] = vatpic_elc_handler,
 	[IO_PMTMR] = vpmtmr_handler,
 	[IO_RTC] = vrtc_addr_handler,
 	[IO_RTC + 1] = vrtc_data_handler,
 };
 
 #ifdef KTR
 static const char *
 inout_instruction(struct vm_exit *vmexit)
 {
 	int index;
 
 	static const char *iodesc[] = {
 		"outb", "outw", "outl",
 		"inb", "inw", "inl",
 		"outsb", "outsw", "outsd",
 		"insb", "insw", "insd",
 	};
 
 	switch (vmexit->u.inout.bytes) {
 	case 1:
 		index = 0;
 		break;
 	case 2:
 		index = 1;
 		break;
 	default:
 		index = 2;
 		break;
 	}
 
 	if (vmexit->u.inout.in)
 		index += 3;
 
 	if (vmexit->u.inout.string)
 		index += 6;
 
 	KASSERT(index < nitems(iodesc), ("%s: invalid index %d",
 	    __func__, index));
 
 	return (iodesc[index]);
 }
 #endif	/* KTR */
 
 static int
 emulate_inout_port(struct vm *vm, int vcpuid, struct vm_exit *vmexit,
     bool *retu)
 {
 	ioport_handler_func_t handler;
 	uint32_t mask, val;
 	int error;
 
 	/*
 	 * If there is no handler for the I/O port then punt to userspace.
 	 */
 	if (vmexit->u.inout.port >= MAX_IOPORTS ||
 	    (handler = ioport_handler[vmexit->u.inout.port]) == NULL) {
 		*retu = true;
 		return (0);
 	}
 
 	mask = vie_size2mask(vmexit->u.inout.bytes);
 
 	if (!vmexit->u.inout.in) {
 		val = vmexit->u.inout.eax & mask;
 	}
 
 	error = (*handler)(vm, vcpuid, vmexit->u.inout.in,
 	    vmexit->u.inout.port, vmexit->u.inout.bytes, &val);
 	if (error) {
 		/*
 		 * The value returned by this function is also the return value
 		 * of vm_run(). This needs to be a positive number otherwise it
 		 * can be interpreted as a "pseudo-error" like ERESTART.
 		 *
 		 * Enforce this by mapping all errors to EIO.
 		 */
 		return (EIO);
 	}
 
 	if (vmexit->u.inout.in) {
 		vmexit->u.inout.eax &= ~mask;
 		vmexit->u.inout.eax |= val & mask;
 		error = vm_set_register(vm, vcpuid, VM_REG_GUEST_RAX,
 		    vmexit->u.inout.eax);
 		KASSERT(error == 0, ("emulate_ioport: error %d setting guest "
 		    "rax register", error));
 	}
 	*retu = false;
 	return (0);
 }
 
 static int
 emulate_inout_str(struct vm *vm, int vcpuid, struct vm_exit *vmexit, bool *retu)
 {
 	*retu = true;
 	return (0);	/* Return to userspace to finish emulation */
 }
 
 int
 vm_handle_inout(struct vm *vm, int vcpuid, struct vm_exit *vmexit, bool *retu)
 {
 	int bytes, error;
 
 	bytes = vmexit->u.inout.bytes;
 	KASSERT(bytes == 1 || bytes == 2 || bytes == 4,
 	    ("vm_handle_inout: invalid operand size %d", bytes));
 
 	if (vmexit->u.inout.string)
 		error = emulate_inout_str(vm, vcpuid, vmexit, retu);
 	else
 		error = emulate_inout_port(vm, vcpuid, vmexit, retu);
 
 	VCPU_CTR4(vm, vcpuid, "%s%s 0x%04x: %s",
 	    vmexit->u.inout.rep ? "rep " : "",
 	    inout_instruction(vmexit),
 	    vmexit->u.inout.port,
 	    error ? "error" : (*retu ? "userspace" : "handled"));
 
 	return (error);
 }
Index: head/sys/amd64/vmm/vmm_stat.c
===================================================================
--- head/sys/amd64/vmm/vmm_stat.c	(revision 282286)
+++ head/sys/amd64/vmm/vmm_stat.c	(revision 282287)
@@ -1,170 +1,169 @@
 /*-
  * Copyright (c) 2011 NetApp, Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/kernel.h>
 #include <sys/systm.h>
 #include <sys/malloc.h>
-#include <sys/smp.h>
 
 #include <machine/vmm.h>
 #include "vmm_util.h"
 #include "vmm_stat.h"
 
 /*
  * 'vst_num_elems' is the total number of addressable statistic elements
  * 'vst_num_types' is the number of unique statistic types
  *
  * It is always true that 'vst_num_elems' is greater than or equal to
  * 'vst_num_types'. This is because a stat type may represent more than
  * one element (for e.g. VMM_STAT_ARRAY).
  */
 static int vst_num_elems, vst_num_types;
 static struct vmm_stat_type *vsttab[MAX_VMM_STAT_ELEMS];
 
 static MALLOC_DEFINE(M_VMM_STAT, "vmm stat", "vmm stat");
 
 #define	vst_size	((size_t)vst_num_elems * sizeof(uint64_t))
 
 void
 vmm_stat_register(void *arg)
 {
 	struct vmm_stat_type *vst = arg;
 
 	/* We require all stats to identify themselves with a description */
 	if (vst->desc == NULL)
 		return;
 
 	if (vst->scope == VMM_STAT_SCOPE_INTEL && !vmm_is_intel())
 		return;
 
 	if (vst->scope == VMM_STAT_SCOPE_AMD && !vmm_is_amd())
 		return;
 
 	if (vst_num_elems + vst->nelems >= MAX_VMM_STAT_ELEMS) {
 		printf("Cannot accomodate vmm stat type \"%s\"!\n", vst->desc);
 		return;
 	}
 
 	vst->index = vst_num_elems;
 	vst_num_elems += vst->nelems;
 
 	vsttab[vst_num_types++] = vst;
 }
 
 int
 vmm_stat_copy(struct vm *vm, int vcpu, int *num_stats, uint64_t *buf)
 {
 	struct vmm_stat_type *vst;
 	uint64_t *stats;
 	int i;
 
 	if (vcpu < 0 || vcpu >= VM_MAXCPU)
 		return (EINVAL);
 
 	/* Let stats functions update their counters */
 	for (i = 0; i < vst_num_types; i++) {
 		vst = vsttab[i];
 		if (vst->func != NULL)
 			(*vst->func)(vm, vcpu, vst);
 	}
 
 	/* Copy over the stats */
 	stats = vcpu_stats(vm, vcpu);
 	for (i = 0; i < vst_num_elems; i++)
 		buf[i] = stats[i];
 	*num_stats = vst_num_elems;
 	return (0);
 }
 
 void *
 vmm_stat_alloc(void)
 {
 
 	return (malloc(vst_size, M_VMM_STAT, M_WAITOK));
 }
 
 void
 vmm_stat_init(void *vp)
 {
 
 	bzero(vp, vst_size);
 }
 
 void
 vmm_stat_free(void *vp)
 {
 	free(vp, M_VMM_STAT);
 }
 
 int
 vmm_stat_desc_copy(int index, char *buf, int bufsize)
 {
 	int i;
 	struct vmm_stat_type *vst;
 
 	for (i = 0; i < vst_num_types; i++) {
 		vst = vsttab[i];
 		if (index >= vst->index && index < vst->index + vst->nelems) {
 			if (vst->nelems > 1) {
 				snprintf(buf, bufsize, "%s[%d]",
 					 vst->desc, index - vst->index);
 			} else {
 				strlcpy(buf, vst->desc, bufsize);
 			}
 			return (0);	/* found it */
 		}
 	}
 
 	return (EINVAL);
 }
 
 /* global statistics */
 VMM_STAT(VCPU_MIGRATIONS, "vcpu migration across host cpus");
 VMM_STAT(VMEXIT_COUNT, "total number of vm exits");
 VMM_STAT(VMEXIT_EXTINT, "vm exits due to external interrupt");
 VMM_STAT(VMEXIT_HLT, "number of times hlt was intercepted");
 VMM_STAT(VMEXIT_CR_ACCESS, "number of times %cr access was intercepted");
 VMM_STAT(VMEXIT_RDMSR, "number of times rdmsr was intercepted");
 VMM_STAT(VMEXIT_WRMSR, "number of times wrmsr was intercepted");
 VMM_STAT(VMEXIT_MTRAP, "number of monitor trap exits");
 VMM_STAT(VMEXIT_PAUSE, "number of times pause was intercepted");
 VMM_STAT(VMEXIT_INTR_WINDOW, "vm exits due to interrupt window opening");
 VMM_STAT(VMEXIT_NMI_WINDOW, "vm exits due to nmi window opening");
 VMM_STAT(VMEXIT_INOUT, "number of times in/out was intercepted");
 VMM_STAT(VMEXIT_CPUID, "number of times cpuid was intercepted");
 VMM_STAT(VMEXIT_NESTED_FAULT, "vm exits due to nested page fault");
 VMM_STAT(VMEXIT_INST_EMUL, "vm exits for instruction emulation");
 VMM_STAT(VMEXIT_UNKNOWN, "number of vm exits for unknown reason");
 VMM_STAT(VMEXIT_ASTPENDING, "number of times astpending at exit");
 VMM_STAT(VMEXIT_USERSPACE, "number of vm exits handled in userspace");
 VMM_STAT(VMEXIT_RENDEZVOUS, "number of times rendezvous pending at exit");
 VMM_STAT(VMEXIT_EXCEPTION, "number of vm exits due to exceptions");
Index: head/sys/amd64/vmm/x86.c
===================================================================
--- head/sys/amd64/vmm/x86.c	(revision 282286)
+++ head/sys/amd64/vmm/x86.c	(revision 282287)
@@ -1,487 +1,486 @@
 /*-
  * Copyright (c) 2011 NetApp, Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/pcpu.h>
 #include <sys/systm.h>
-#include <sys/cpuset.h>
 #include <sys/sysctl.h>
 
 #include <machine/clock.h>
 #include <machine/cpufunc.h>
 #include <machine/md_var.h>
 #include <machine/segments.h>
 #include <machine/specialreg.h>
 
 #include <machine/vmm.h>
 
 #include "vmm_host.h"
 #include "vmm_ktr.h"
 #include "vmm_util.h"
 #include "x86.h"
 
 SYSCTL_DECL(_hw_vmm);
 static SYSCTL_NODE(_hw_vmm, OID_AUTO, topology, CTLFLAG_RD, 0, NULL);
 
 #define	CPUID_VM_HIGH		0x40000000
 
 static const char bhyve_id[12] = "bhyve bhyve ";
 
 static uint64_t bhyve_xcpuids;
 SYSCTL_ULONG(_hw_vmm, OID_AUTO, bhyve_xcpuids, CTLFLAG_RW, &bhyve_xcpuids, 0,
     "Number of times an unknown cpuid leaf was accessed");
 
 /*
  * The default CPU topology is a single thread per package.
  */
 static u_int threads_per_core = 1;
 SYSCTL_UINT(_hw_vmm_topology, OID_AUTO, threads_per_core, CTLFLAG_RDTUN,
     &threads_per_core, 0, NULL);
 
 static u_int cores_per_package = 1;
 SYSCTL_UINT(_hw_vmm_topology, OID_AUTO, cores_per_package, CTLFLAG_RDTUN,
     &cores_per_package, 0, NULL);
 
 static int cpuid_leaf_b = 1;
 SYSCTL_INT(_hw_vmm_topology, OID_AUTO, cpuid_leaf_b, CTLFLAG_RDTUN,
     &cpuid_leaf_b, 0, NULL);
 
 /*
  * Round up to the next power of two, if necessary, and then take log2.
  * Returns -1 if argument is zero.
  */
 static __inline int
 log2(u_int x)
 {
 
 	return (fls(x << (1 - powerof2(x))) - 1);
 }
 
 int
 x86_emulate_cpuid(struct vm *vm, int vcpu_id,
 		  uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx)
 {
 	const struct xsave_limits *limits;
 	uint64_t cr4;
 	int error, enable_invpcid, level, width, x2apic_id;
 	unsigned int func, regs[4], logical_cpus;
 	enum x2apic_state x2apic_state;
 
 	VCPU_CTR2(vm, vcpu_id, "cpuid %#x,%#x", *eax, *ecx);
 
 	/*
 	 * Requests for invalid CPUID levels should map to the highest
 	 * available level instead.
 	 */
 	if (cpu_exthigh != 0 && *eax >= 0x80000000) {
 		if (*eax > cpu_exthigh)
 			*eax = cpu_exthigh;
 	} else if (*eax >= 0x40000000) {
 		if (*eax > CPUID_VM_HIGH)
 			*eax = CPUID_VM_HIGH;
 	} else if (*eax > cpu_high) {
 		*eax = cpu_high;
 	}
 
 	func = *eax;
 
 	/*
 	 * In general the approach used for CPU topology is to
 	 * advertise a flat topology where all CPUs are packages with
 	 * no multi-core or SMT.
 	 */
 	switch (func) {
 		/*
 		 * Pass these through to the guest
 		 */
 		case CPUID_0000_0000:
 		case CPUID_0000_0002:
 		case CPUID_0000_0003:
 		case CPUID_8000_0000:
 		case CPUID_8000_0002:
 		case CPUID_8000_0003:
 		case CPUID_8000_0004:
 		case CPUID_8000_0006:
 			cpuid_count(*eax, *ecx, regs);
 			break;
 		case CPUID_8000_0008:
 			cpuid_count(*eax, *ecx, regs);
 			if (vmm_is_amd()) {
 				/*
 				 * XXX this might appear silly because AMD
 				 * cpus don't have threads.
 				 *
 				 * However this matches the logical cpus as
 				 * advertised by leaf 0x1 and will work even
 				 * if the 'threads_per_core' tunable is set
 				 * incorrectly on an AMD host.
 				 */
 				logical_cpus = threads_per_core *
 				    cores_per_package;
 				regs[2] = logical_cpus - 1;
 			}
 			break;
 
 		case CPUID_8000_0001:
 			cpuid_count(*eax, *ecx, regs);
 
 			/*
 			 * Hide SVM and Topology Extension features from guest.
 			 */
 			regs[2] &= ~(AMDID2_SVM | AMDID2_TOPOLOGY);
 
 			/*
 			 * Don't advertise extended performance counter MSRs
 			 * to the guest.
 			 */
 			regs[2] &= ~AMDID2_PCXC;
 			regs[2] &= ~AMDID2_PNXC;
 			regs[2] &= ~AMDID2_PTSCEL2I;
 
 			/*
 			 * Don't advertise Instruction Based Sampling feature.
 			 */
 			regs[2] &= ~AMDID2_IBS;
 
 			/* NodeID MSR not available */
 			regs[2] &= ~AMDID2_NODE_ID;
 
 			/* Don't advertise the OS visible workaround feature */
 			regs[2] &= ~AMDID2_OSVW;
 
 			/*
 			 * Hide rdtscp/ia32_tsc_aux until we know how
 			 * to deal with them.
 			 */
 			regs[3] &= ~AMDID_RDTSCP;
 			break;
 
 		case CPUID_8000_0007:
 			/*
 			 * AMD uses this leaf to advertise the processor's
 			 * power monitoring and RAS capabilities. These
 			 * features are hardware-specific and exposing
 			 * them to a guest doesn't make a lot of sense.
 			 *
 			 * Intel uses this leaf only to advertise the
 			 * "Invariant TSC" feature with all other bits
 			 * being reserved (set to zero).
 			 */
 			regs[0] = 0;
 			regs[1] = 0;
 			regs[2] = 0;
 			regs[3] = 0;
 
 			/*
 			 * "Invariant TSC" can be advertised to the guest if:
 			 * - host TSC frequency is invariant
 			 * - host TSCs are synchronized across physical cpus
 			 *
 			 * XXX This still falls short because the vcpu
 			 * can observe the TSC moving backwards as it
 			 * migrates across physical cpus. But at least
 			 * it should discourage the guest from using the
 			 * TSC to keep track of time.
 			 */
 			if (tsc_is_invariant && smp_tsc)
 				regs[3] |= AMDPM_TSC_INVARIANT;
 			break;
 
 		case CPUID_0000_0001:
 			do_cpuid(1, regs);
 
 			error = vm_get_x2apic_state(vm, vcpu_id, &x2apic_state);
 			if (error) {
 				panic("x86_emulate_cpuid: error %d "
 				      "fetching x2apic state", error);
 			}
 
 			/*
 			 * Override the APIC ID only in ebx
 			 */
 			regs[1] &= ~(CPUID_LOCAL_APIC_ID);
 			regs[1] |= (vcpu_id << CPUID_0000_0001_APICID_SHIFT);
 
 			/*
 			 * Don't expose VMX, SpeedStep or TME capability.
 			 * Advertise x2APIC capability and Hypervisor guest.
 			 */
 			regs[2] &= ~(CPUID2_VMX | CPUID2_EST | CPUID2_TM2);
 
 			regs[2] |= CPUID2_HV;
 
 			if (x2apic_state != X2APIC_DISABLED)
 				regs[2] |= CPUID2_X2APIC;
 			else
 				regs[2] &= ~CPUID2_X2APIC;
 
 			/*
 			 * Only advertise CPUID2_XSAVE in the guest if
 			 * the host is using XSAVE.
 			 */
 			if (!(regs[2] & CPUID2_OSXSAVE))
 				regs[2] &= ~CPUID2_XSAVE;
 
 			/*
 			 * If CPUID2_XSAVE is being advertised and the
 			 * guest has set CR4_XSAVE, set
 			 * CPUID2_OSXSAVE.
 			 */
 			regs[2] &= ~CPUID2_OSXSAVE;
 			if (regs[2] & CPUID2_XSAVE) {
 				error = vm_get_register(vm, vcpu_id,
 				    VM_REG_GUEST_CR4, &cr4);
 				if (error)
 					panic("x86_emulate_cpuid: error %d "
 					      "fetching %%cr4", error);
 				if (cr4 & CR4_XSAVE)
 					regs[2] |= CPUID2_OSXSAVE;
 			}
 
 			/*
 			 * Hide monitor/mwait until we know how to deal with
 			 * these instructions.
 			 */
 			regs[2] &= ~CPUID2_MON;
 
                         /*
 			 * Hide the performance and debug features.
 			 */
 			regs[2] &= ~CPUID2_PDCM;
 
 			/*
 			 * No TSC deadline support in the APIC yet
 			 */
 			regs[2] &= ~CPUID2_TSCDLT;
 
 			/*
 			 * Hide thermal monitoring
 			 */
 			regs[3] &= ~(CPUID_ACPI | CPUID_TM);
 			
 			/*
 			 * Machine check handling is done in the host.
 			 */
 			regs[3] &= ~(CPUID_MCA | CPUID_MCE);
 
                         /*
                         * Hide the debug store capability.
                         */
 			regs[3] &= ~CPUID_DS;
 
 			logical_cpus = threads_per_core * cores_per_package;
 			regs[1] &= ~CPUID_HTT_CORES;
 			regs[1] |= (logical_cpus & 0xff) << 16;
 			regs[3] |= CPUID_HTT;
 			break;
 
 		case CPUID_0000_0004:
 			cpuid_count(*eax, *ecx, regs);
 
 			if (regs[0] || regs[1] || regs[2] || regs[3]) {
 				regs[0] &= 0x3ff;
 				regs[0] |= (cores_per_package - 1) << 26;
 				/*
 				 * Cache topology:
 				 * - L1 and L2 are shared only by the logical
 				 *   processors in a single core.
 				 * - L3 and above are shared by all logical
 				 *   processors in the package.
 				 */
 				logical_cpus = threads_per_core;
 				level = (regs[0] >> 5) & 0x7;
 				if (level >= 3)
 					logical_cpus *= cores_per_package;
 				regs[0] |= (logical_cpus - 1) << 14;
 			}
 			break;
 
 		case CPUID_0000_0007:
 			regs[0] = 0;
 			regs[1] = 0;
 			regs[2] = 0;
 			regs[3] = 0;
 
 			/* leaf 0 */
 			if (*ecx == 0) {
 				cpuid_count(*eax, *ecx, regs);
 
 				/* Only leaf 0 is supported */
 				regs[0] = 0;
 
 				/*
 				 * Expose known-safe features.
 				 */
 				regs[1] &= (CPUID_STDEXT_FSGSBASE |
 				    CPUID_STDEXT_BMI1 | CPUID_STDEXT_HLE |
 				    CPUID_STDEXT_AVX2 | CPUID_STDEXT_BMI2 |
 				    CPUID_STDEXT_ERMS | CPUID_STDEXT_RTM |
 				    CPUID_STDEXT_AVX512F |
 				    CPUID_STDEXT_AVX512PF |
 				    CPUID_STDEXT_AVX512ER |
 				    CPUID_STDEXT_AVX512CD);
 				regs[2] = 0;
 				regs[3] = 0;
 
 				/* Advertise INVPCID if it is enabled. */
 				error = vm_get_capability(vm, vcpu_id,
 				    VM_CAP_ENABLE_INVPCID, &enable_invpcid);
 				if (error == 0 && enable_invpcid)
 					regs[1] |= CPUID_STDEXT_INVPCID;
 			}
 			break;
 
 		case CPUID_0000_0006:
 			regs[0] = CPUTPM1_ARAT;
 			regs[1] = 0;
 			regs[2] = 0;
 			regs[3] = 0;
 			break;
 
 		case CPUID_0000_000A:
 			/*
 			 * Handle the access, but report 0 for
 			 * all options
 			 */
 			regs[0] = 0;
 			regs[1] = 0;
 			regs[2] = 0;
 			regs[3] = 0;
 			break;
 
 		case CPUID_0000_000B:
 			/*
 			 * Processor topology enumeration
 			 */
 			if (*ecx == 0) {
 				logical_cpus = threads_per_core;
 				width = log2(logical_cpus);
 				level = CPUID_TYPE_SMT;
 				x2apic_id = vcpu_id;
 			}
 
 			if (*ecx == 1) {
 				logical_cpus = threads_per_core *
 				    cores_per_package;
 				width = log2(logical_cpus);
 				level = CPUID_TYPE_CORE;
 				x2apic_id = vcpu_id;
 			}
 
 			if (!cpuid_leaf_b || *ecx >= 2) {
 				width = 0;
 				logical_cpus = 0;
 				level = 0;
 				x2apic_id = 0;
 			}
 
 			regs[0] = width & 0x1f;
 			regs[1] = logical_cpus & 0xffff;
 			regs[2] = (level << 8) | (*ecx & 0xff);
 			regs[3] = x2apic_id;
 			break;
 
 		case CPUID_0000_000D:
 			limits = vmm_get_xsave_limits();
 			if (!limits->xsave_enabled) {
 				regs[0] = 0;
 				regs[1] = 0;
 				regs[2] = 0;
 				regs[3] = 0;
 				break;
 			}
 
 			cpuid_count(*eax, *ecx, regs);
 			switch (*ecx) {
 			case 0:
 				/*
 				 * Only permit the guest to use bits
 				 * that are active in the host in
 				 * %xcr0.  Also, claim that the
 				 * maximum save area size is
 				 * equivalent to the host's current
 				 * save area size.  Since this runs
 				 * "inside" of vmrun(), it runs with
 				 * the guest's xcr0, so the current
 				 * save area size is correct as-is.
 				 */
 				regs[0] &= limits->xcr0_allowed;
 				regs[2] = limits->xsave_max_size;
 				regs[3] &= (limits->xcr0_allowed >> 32);
 				break;
 			case 1:
 				/* Only permit XSAVEOPT. */
 				regs[0] &= CPUID_EXTSTATE_XSAVEOPT;
 				regs[1] = 0;
 				regs[2] = 0;
 				regs[3] = 0;
 				break;
 			default:
 				/*
 				 * If the leaf is for a permitted feature,
 				 * pass through as-is, otherwise return
 				 * all zeroes.
 				 */
 				if (!(limits->xcr0_allowed & (1ul << *ecx))) {
 					regs[0] = 0;
 					regs[1] = 0;
 					regs[2] = 0;
 					regs[3] = 0;
 				}
 				break;
 			}
 			break;
 
 		case 0x40000000:
 			regs[0] = CPUID_VM_HIGH;
 			bcopy(bhyve_id, &regs[1], 4);
 			bcopy(bhyve_id + 4, &regs[2], 4);
 			bcopy(bhyve_id + 8, &regs[3], 4);
 			break;
 
 		default:
 			/*
 			 * The leaf value has already been clamped so
 			 * simply pass this through, keeping count of
 			 * how many unhandled leaf values have been seen.
 			 */
 			atomic_add_long(&bhyve_xcpuids, 1);
 			cpuid_count(*eax, *ecx, regs);
 			break;
 	}
 
 	*eax = regs[0];
 	*ebx = regs[1];
 	*ecx = regs[2];
 	*edx = regs[3];
 
 	return (1);
 }