Index: head/lib/libvmmapi/vmmapi.c
===================================================================
--- head/lib/libvmmapi/vmmapi.c	(revision 276427)
+++ head/lib/libvmmapi/vmmapi.c	(revision 276428)
@@ -1,1148 +1,1200 @@
 /*-
  * Copyright (c) 2011 NetApp, Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/sysctl.h>
 #include <sys/ioctl.h>
 #include <sys/mman.h>
 #include <sys/_iovec.h>
 #include <sys/cpuset.h>
 
 #include <x86/segments.h>
 #include <machine/specialreg.h>
 #include <machine/param.h>
 
 #include <stdio.h>
 #include <stdlib.h>
 #include <assert.h>
 #include <string.h>
 #include <fcntl.h>
 #include <unistd.h>
 
 #include <libutil.h>
 
 #include <machine/vmm.h>
 #include <machine/vmm_dev.h>
 
 #include "vmmapi.h"
 
 #define	MB	(1024 * 1024UL)
 #define	GB	(1024 * 1024 * 1024UL)
 
 struct vmctx {
 	int	fd;
 	uint32_t lowmem_limit;
 	enum vm_mmap_style vms;
 	int	memflags;
 	size_t	lowmem;
 	char	*lowmem_addr;
 	size_t	highmem;
 	char	*highmem_addr;
 	char	*name;
 };
 
 #define	CREATE(x)  sysctlbyname("hw.vmm.create", NULL, NULL, (x), strlen((x)))
 #define	DESTROY(x) sysctlbyname("hw.vmm.destroy", NULL, NULL, (x), strlen((x)))
 
 static int
 vm_device_open(const char *name)
 {
         int fd, len;
         char *vmfile;
 
 	len = strlen("/dev/vmm/") + strlen(name) + 1;
 	vmfile = malloc(len);
 	assert(vmfile != NULL);
 	snprintf(vmfile, len, "/dev/vmm/%s", name);
 
         /* Open the device file */
         fd = open(vmfile, O_RDWR, 0);
 
 	free(vmfile);
         return (fd);
 }
 
 int
 vm_create(const char *name)
 {
 
 	return (CREATE((char *)name));
 }
 
 struct vmctx *
 vm_open(const char *name)
 {
 	struct vmctx *vm;
 
 	vm = malloc(sizeof(struct vmctx) + strlen(name) + 1);
 	assert(vm != NULL);
 
 	vm->fd = -1;
 	vm->memflags = 0;
 	vm->lowmem_limit = 3 * GB;
 	vm->name = (char *)(vm + 1);
 	strcpy(vm->name, name);
 
 	if ((vm->fd = vm_device_open(vm->name)) < 0)
 		goto err;
 
 	return (vm);
 err:
 	vm_destroy(vm);
 	return (NULL);
 }
 
 void
 vm_destroy(struct vmctx *vm)
 {
 	assert(vm != NULL);
 
 	if (vm->fd >= 0)
 		close(vm->fd);
 	DESTROY(vm->name);
 
 	free(vm);
 }
 
 int
 vm_parse_memsize(const char *optarg, size_t *ret_memsize)
 {
 	char *endptr;
 	size_t optval;
 	int error;
 
 	optval = strtoul(optarg, &endptr, 0);
 	if (*optarg != '\0' && *endptr == '\0') {
 		/*
 		 * For the sake of backward compatibility if the memory size
 		 * specified on the command line is less than a megabyte then
 		 * it is interpreted as being in units of MB.
 		 */
 		if (optval < MB)
 			optval *= MB;
 		*ret_memsize = optval;
 		error = 0;
 	} else
 		error = expand_number(optarg, ret_memsize);
 
 	return (error);
 }
 
 int
 vm_get_memory_seg(struct vmctx *ctx, vm_paddr_t gpa, size_t *ret_len,
 		  int *wired)
 {
 	int error;
 	struct vm_memory_segment seg;
 
 	bzero(&seg, sizeof(seg));
 	seg.gpa = gpa;
 	error = ioctl(ctx->fd, VM_GET_MEMORY_SEG, &seg);
 	*ret_len = seg.len;
 	if (wired != NULL)
 		*wired = seg.wired;
 	return (error);
 }
 
 uint32_t
 vm_get_lowmem_limit(struct vmctx *ctx)
 {
 
 	return (ctx->lowmem_limit);
 }
 
 void
 vm_set_lowmem_limit(struct vmctx *ctx, uint32_t limit)
 {
 
 	ctx->lowmem_limit = limit;
 }
 
 void
 vm_set_memflags(struct vmctx *ctx, int flags)
 {
 
 	ctx->memflags = flags;
 }
 
 static int
 setup_memory_segment(struct vmctx *ctx, vm_paddr_t gpa, size_t len, char **addr)
 {
 	int error, mmap_flags;
 	struct vm_memory_segment seg;
 
 	/*
 	 * Create and optionally map 'len' bytes of memory at guest
 	 * physical address 'gpa'
 	 */
 	bzero(&seg, sizeof(seg));
 	seg.gpa = gpa;
 	seg.len = len;
 	error = ioctl(ctx->fd, VM_MAP_MEMORY, &seg);
 	if (error == 0 && addr != NULL) {
 		mmap_flags = MAP_SHARED;
 		if ((ctx->memflags & VM_MEM_F_INCORE) == 0)
 			mmap_flags |= MAP_NOCORE;
 		*addr = mmap(NULL, len, PROT_READ | PROT_WRITE, mmap_flags,
 		    ctx->fd, gpa);
 	}
 	return (error);
 }
 
 int
 vm_setup_memory(struct vmctx *ctx, size_t memsize, enum vm_mmap_style vms)
 {
 	char **addr;
 	int error;
 
 	/* XXX VM_MMAP_SPARSE not implemented yet */
 	assert(vms == VM_MMAP_NONE || vms == VM_MMAP_ALL);
 	ctx->vms = vms;
 
 	/*
 	 * If 'memsize' cannot fit entirely in the 'lowmem' segment then
 	 * create another 'highmem' segment above 4GB for the remainder.
 	 */
 	if (memsize > ctx->lowmem_limit) {
 		ctx->lowmem = ctx->lowmem_limit;
 		ctx->highmem = memsize - ctx->lowmem;
 	} else {
 		ctx->lowmem = memsize;
 		ctx->highmem = 0;
 	}
 
 	if (ctx->lowmem > 0) {
 		addr = (vms == VM_MMAP_ALL) ? &ctx->lowmem_addr : NULL;
 		error = setup_memory_segment(ctx, 0, ctx->lowmem, addr);
 		if (error)
 			return (error);
 	}
 
 	if (ctx->highmem > 0) {
 		addr = (vms == VM_MMAP_ALL) ? &ctx->highmem_addr : NULL;
 		error = setup_memory_segment(ctx, 4*GB, ctx->highmem, addr);
 		if (error)
 			return (error);
 	}
 
 	return (0);
 }
 
 void *
 vm_map_gpa(struct vmctx *ctx, vm_paddr_t gaddr, size_t len)
 {
 
 	/* XXX VM_MMAP_SPARSE not implemented yet */
 	assert(ctx->vms == VM_MMAP_ALL);
 
 	if (gaddr < ctx->lowmem && gaddr + len <= ctx->lowmem)
 		return ((void *)(ctx->lowmem_addr + gaddr));
 
 	if (gaddr >= 4*GB) {
 		gaddr -= 4*GB;
 		if (gaddr < ctx->highmem && gaddr + len <= ctx->highmem)
 			return ((void *)(ctx->highmem_addr + gaddr));
 	}
 
 	return (NULL);
 }
 
 size_t
 vm_get_lowmem_size(struct vmctx *ctx)
 {
 
 	return (ctx->lowmem);
 }
 
 size_t
 vm_get_highmem_size(struct vmctx *ctx)
 {
 
 	return (ctx->highmem);
 }
 
 int
 vm_set_desc(struct vmctx *ctx, int vcpu, int reg,
 	    uint64_t base, uint32_t limit, uint32_t access)
 {
 	int error;
 	struct vm_seg_desc vmsegdesc;
 
 	bzero(&vmsegdesc, sizeof(vmsegdesc));
 	vmsegdesc.cpuid = vcpu;
 	vmsegdesc.regnum = reg;
 	vmsegdesc.desc.base = base;
 	vmsegdesc.desc.limit = limit;
 	vmsegdesc.desc.access = access;
 
 	error = ioctl(ctx->fd, VM_SET_SEGMENT_DESCRIPTOR, &vmsegdesc);
 	return (error);
 }
 
 int
 vm_get_desc(struct vmctx *ctx, int vcpu, int reg,
 	    uint64_t *base, uint32_t *limit, uint32_t *access)
 {
 	int error;
 	struct vm_seg_desc vmsegdesc;
 
 	bzero(&vmsegdesc, sizeof(vmsegdesc));
 	vmsegdesc.cpuid = vcpu;
 	vmsegdesc.regnum = reg;
 
 	error = ioctl(ctx->fd, VM_GET_SEGMENT_DESCRIPTOR, &vmsegdesc);
 	if (error == 0) {
 		*base = vmsegdesc.desc.base;
 		*limit = vmsegdesc.desc.limit;
 		*access = vmsegdesc.desc.access;
 	}
 	return (error);
 }
 
 int
 vm_get_seg_desc(struct vmctx *ctx, int vcpu, int reg, struct seg_desc *seg_desc)
 {
 	int error;
 
 	error = vm_get_desc(ctx, vcpu, reg, &seg_desc->base, &seg_desc->limit,
 	    &seg_desc->access);
 	return (error);
 }
 
 int
 vm_set_register(struct vmctx *ctx, int vcpu, int reg, uint64_t val)
 {
 	int error;
 	struct vm_register vmreg;
 
 	bzero(&vmreg, sizeof(vmreg));
 	vmreg.cpuid = vcpu;
 	vmreg.regnum = reg;
 	vmreg.regval = val;
 
 	error = ioctl(ctx->fd, VM_SET_REGISTER, &vmreg);
 	return (error);
 }
 
 int
 vm_get_register(struct vmctx *ctx, int vcpu, int reg, uint64_t *ret_val)
 {
 	int error;
 	struct vm_register vmreg;
 
 	bzero(&vmreg, sizeof(vmreg));
 	vmreg.cpuid = vcpu;
 	vmreg.regnum = reg;
 
 	error = ioctl(ctx->fd, VM_GET_REGISTER, &vmreg);
 	*ret_val = vmreg.regval;
 	return (error);
 }
 
 int
 vm_run(struct vmctx *ctx, int vcpu, uint64_t rip, struct vm_exit *vmexit)
 {
 	int error;
 	struct vm_run vmrun;
 
 	bzero(&vmrun, sizeof(vmrun));
 	vmrun.cpuid = vcpu;
 	vmrun.rip = rip;
 
 	error = ioctl(ctx->fd, VM_RUN, &vmrun);
 	bcopy(&vmrun.vm_exit, vmexit, sizeof(struct vm_exit));
 	return (error);
 }
 
 int
 vm_suspend(struct vmctx *ctx, enum vm_suspend_how how)
 {
 	struct vm_suspend vmsuspend;
 
 	bzero(&vmsuspend, sizeof(vmsuspend));
 	vmsuspend.how = how;
 	return (ioctl(ctx->fd, VM_SUSPEND, &vmsuspend));
 }
 
 int
 vm_reinit(struct vmctx *ctx)
 {
 
 	return (ioctl(ctx->fd, VM_REINIT, 0));
 }
 
 static int
 vm_inject_exception_real(struct vmctx *ctx, int vcpu, int vector,
     int error_code, int error_code_valid)
 {
 	struct vm_exception exc;
 
 	bzero(&exc, sizeof(exc));
 	exc.cpuid = vcpu;
 	exc.vector = vector;
 	exc.error_code = error_code;
 	exc.error_code_valid = error_code_valid;
 
 	return (ioctl(ctx->fd, VM_INJECT_EXCEPTION, &exc));
 }
 
 int
 vm_inject_exception(struct vmctx *ctx, int vcpu, int vector)
 {
 
 	return (vm_inject_exception_real(ctx, vcpu, vector, 0, 0));
 }
 
 int
 vm_inject_exception2(struct vmctx *ctx, int vcpu, int vector, int errcode)
 {
 
 	return (vm_inject_exception_real(ctx, vcpu, vector, errcode, 1));
 }
 
 int
 vm_apicid2vcpu(struct vmctx *ctx, int apicid)
 {
 	/*
 	 * The apic id associated with the 'vcpu' has the same numerical value
 	 * as the 'vcpu' itself.
 	 */
 	return (apicid);
 }
 
 int
 vm_lapic_irq(struct vmctx *ctx, int vcpu, int vector)
 {
 	struct vm_lapic_irq vmirq;
 
 	bzero(&vmirq, sizeof(vmirq));
 	vmirq.cpuid = vcpu;
 	vmirq.vector = vector;
 
 	return (ioctl(ctx->fd, VM_LAPIC_IRQ, &vmirq));
 }
 
 int
 vm_lapic_local_irq(struct vmctx *ctx, int vcpu, int vector)
 {
 	struct vm_lapic_irq vmirq;
 
 	bzero(&vmirq, sizeof(vmirq));
 	vmirq.cpuid = vcpu;
 	vmirq.vector = vector;
 
 	return (ioctl(ctx->fd, VM_LAPIC_LOCAL_IRQ, &vmirq));
 }
 
 int
 vm_lapic_msi(struct vmctx *ctx, uint64_t addr, uint64_t msg)
 {
 	struct vm_lapic_msi vmmsi;
 
 	bzero(&vmmsi, sizeof(vmmsi));
 	vmmsi.addr = addr;
 	vmmsi.msg = msg;
 
 	return (ioctl(ctx->fd, VM_LAPIC_MSI, &vmmsi));
 }
 
 int
 vm_ioapic_assert_irq(struct vmctx *ctx, int irq)
 {
 	struct vm_ioapic_irq ioapic_irq;
 
 	bzero(&ioapic_irq, sizeof(struct vm_ioapic_irq));
 	ioapic_irq.irq = irq;
 
 	return (ioctl(ctx->fd, VM_IOAPIC_ASSERT_IRQ, &ioapic_irq));
 }
 
 int
 vm_ioapic_deassert_irq(struct vmctx *ctx, int irq)
 {
 	struct vm_ioapic_irq ioapic_irq;
 
 	bzero(&ioapic_irq, sizeof(struct vm_ioapic_irq));
 	ioapic_irq.irq = irq;
 
 	return (ioctl(ctx->fd, VM_IOAPIC_DEASSERT_IRQ, &ioapic_irq));
 }
 
 int
 vm_ioapic_pulse_irq(struct vmctx *ctx, int irq)
 {
 	struct vm_ioapic_irq ioapic_irq;
 
 	bzero(&ioapic_irq, sizeof(struct vm_ioapic_irq));
 	ioapic_irq.irq = irq;
 
 	return (ioctl(ctx->fd, VM_IOAPIC_PULSE_IRQ, &ioapic_irq));
 }
 
 int
 vm_ioapic_pincount(struct vmctx *ctx, int *pincount)
 {
 
 	return (ioctl(ctx->fd, VM_IOAPIC_PINCOUNT, pincount));
 }
 
 int
 vm_isa_assert_irq(struct vmctx *ctx, int atpic_irq, int ioapic_irq)
 {
 	struct vm_isa_irq isa_irq;
 
 	bzero(&isa_irq, sizeof(struct vm_isa_irq));
 	isa_irq.atpic_irq = atpic_irq;
 	isa_irq.ioapic_irq = ioapic_irq;
 
 	return (ioctl(ctx->fd, VM_ISA_ASSERT_IRQ, &isa_irq));
 }
 
 int
 vm_isa_deassert_irq(struct vmctx *ctx, int atpic_irq, int ioapic_irq)
 {
 	struct vm_isa_irq isa_irq;
 
 	bzero(&isa_irq, sizeof(struct vm_isa_irq));
 	isa_irq.atpic_irq = atpic_irq;
 	isa_irq.ioapic_irq = ioapic_irq;
 
 	return (ioctl(ctx->fd, VM_ISA_DEASSERT_IRQ, &isa_irq));
 }
 
 int
 vm_isa_pulse_irq(struct vmctx *ctx, int atpic_irq, int ioapic_irq)
 {
 	struct vm_isa_irq isa_irq;
 
 	bzero(&isa_irq, sizeof(struct vm_isa_irq));
 	isa_irq.atpic_irq = atpic_irq;
 	isa_irq.ioapic_irq = ioapic_irq;
 
 	return (ioctl(ctx->fd, VM_ISA_PULSE_IRQ, &isa_irq));
 }
 
 int
 vm_isa_set_irq_trigger(struct vmctx *ctx, int atpic_irq,
     enum vm_intr_trigger trigger)
 {
 	struct vm_isa_irq_trigger isa_irq_trigger;
 
 	bzero(&isa_irq_trigger, sizeof(struct vm_isa_irq_trigger));
 	isa_irq_trigger.atpic_irq = atpic_irq;
 	isa_irq_trigger.trigger = trigger;
 
 	return (ioctl(ctx->fd, VM_ISA_SET_IRQ_TRIGGER, &isa_irq_trigger));
 }
 
 int
 vm_inject_nmi(struct vmctx *ctx, int vcpu)
 {
 	struct vm_nmi vmnmi;
 
 	bzero(&vmnmi, sizeof(vmnmi));
 	vmnmi.cpuid = vcpu;
 
 	return (ioctl(ctx->fd, VM_INJECT_NMI, &vmnmi));
 }
 
 static struct {
 	const char	*name;
 	int		type;
 } capstrmap[] = {
 	{ "hlt_exit",		VM_CAP_HALT_EXIT },
 	{ "mtrap_exit",		VM_CAP_MTRAP_EXIT },
 	{ "pause_exit",		VM_CAP_PAUSE_EXIT },
 	{ "unrestricted_guest",	VM_CAP_UNRESTRICTED_GUEST },
 	{ "enable_invpcid",	VM_CAP_ENABLE_INVPCID },
 	{ 0 }
 };
 
 int
 vm_capability_name2type(const char *capname)
 {
 	int i;
 
 	for (i = 0; capstrmap[i].name != NULL && capname != NULL; i++) {
 		if (strcmp(capstrmap[i].name, capname) == 0)
 			return (capstrmap[i].type);
 	}
 
 	return (-1);
 }
 
 const char *
 vm_capability_type2name(int type)
 {
 	int i;
 
 	for (i = 0; capstrmap[i].name != NULL; i++) {
 		if (capstrmap[i].type == type)
 			return (capstrmap[i].name);
 	}
 
 	return (NULL);
 }
 
 int
 vm_get_capability(struct vmctx *ctx, int vcpu, enum vm_cap_type cap,
 		  int *retval)
 {
 	int error;
 	struct vm_capability vmcap;
 
 	bzero(&vmcap, sizeof(vmcap));
 	vmcap.cpuid = vcpu;
 	vmcap.captype = cap;
 
 	error = ioctl(ctx->fd, VM_GET_CAPABILITY, &vmcap);
 	*retval = vmcap.capval;
 	return (error);
 }
 
 int
 vm_set_capability(struct vmctx *ctx, int vcpu, enum vm_cap_type cap, int val)
 {
 	struct vm_capability vmcap;
 
 	bzero(&vmcap, sizeof(vmcap));
 	vmcap.cpuid = vcpu;
 	vmcap.captype = cap;
 	vmcap.capval = val;
 	
 	return (ioctl(ctx->fd, VM_SET_CAPABILITY, &vmcap));
 }
 
 int
 vm_assign_pptdev(struct vmctx *ctx, int bus, int slot, int func)
 {
 	struct vm_pptdev pptdev;
 
 	bzero(&pptdev, sizeof(pptdev));
 	pptdev.bus = bus;
 	pptdev.slot = slot;
 	pptdev.func = func;
 
 	return (ioctl(ctx->fd, VM_BIND_PPTDEV, &pptdev));
 }
 
 int
 vm_unassign_pptdev(struct vmctx *ctx, int bus, int slot, int func)
 {
 	struct vm_pptdev pptdev;
 
 	bzero(&pptdev, sizeof(pptdev));
 	pptdev.bus = bus;
 	pptdev.slot = slot;
 	pptdev.func = func;
 
 	return (ioctl(ctx->fd, VM_UNBIND_PPTDEV, &pptdev));
 }
 
 int
 vm_map_pptdev_mmio(struct vmctx *ctx, int bus, int slot, int func,
 		   vm_paddr_t gpa, size_t len, vm_paddr_t hpa)
 {
 	struct vm_pptdev_mmio pptmmio;
 
 	bzero(&pptmmio, sizeof(pptmmio));
 	pptmmio.bus = bus;
 	pptmmio.slot = slot;
 	pptmmio.func = func;
 	pptmmio.gpa = gpa;
 	pptmmio.len = len;
 	pptmmio.hpa = hpa;
 
 	return (ioctl(ctx->fd, VM_MAP_PPTDEV_MMIO, &pptmmio));
 }
 
 int
 vm_setup_pptdev_msi(struct vmctx *ctx, int vcpu, int bus, int slot, int func,
     uint64_t addr, uint64_t msg, int numvec)
 {
 	struct vm_pptdev_msi pptmsi;
 
 	bzero(&pptmsi, sizeof(pptmsi));
 	pptmsi.vcpu = vcpu;
 	pptmsi.bus = bus;
 	pptmsi.slot = slot;
 	pptmsi.func = func;
 	pptmsi.msg = msg;
 	pptmsi.addr = addr;
 	pptmsi.numvec = numvec;
 
 	return (ioctl(ctx->fd, VM_PPTDEV_MSI, &pptmsi));
 }
 
 int	
 vm_setup_pptdev_msix(struct vmctx *ctx, int vcpu, int bus, int slot, int func,
     int idx, uint64_t addr, uint64_t msg, uint32_t vector_control)
 {
 	struct vm_pptdev_msix pptmsix;
 
 	bzero(&pptmsix, sizeof(pptmsix));
 	pptmsix.vcpu = vcpu;
 	pptmsix.bus = bus;
 	pptmsix.slot = slot;
 	pptmsix.func = func;
 	pptmsix.idx = idx;
 	pptmsix.msg = msg;
 	pptmsix.addr = addr;
 	pptmsix.vector_control = vector_control;
 
 	return ioctl(ctx->fd, VM_PPTDEV_MSIX, &pptmsix);
 }
 
 uint64_t *
 vm_get_stats(struct vmctx *ctx, int vcpu, struct timeval *ret_tv,
 	     int *ret_entries)
 {
 	int error;
 
 	static struct vm_stats vmstats;
 
 	vmstats.cpuid = vcpu;
 
 	error = ioctl(ctx->fd, VM_STATS, &vmstats);
 	if (error == 0) {
 		if (ret_entries)
 			*ret_entries = vmstats.num_entries;
 		if (ret_tv)
 			*ret_tv = vmstats.tv;
 		return (vmstats.statbuf);
 	} else
 		return (NULL);
 }
 
 const char *
 vm_get_stat_desc(struct vmctx *ctx, int index)
 {
 	static struct vm_stat_desc statdesc;
 
 	statdesc.index = index;
 	if (ioctl(ctx->fd, VM_STAT_DESC, &statdesc) == 0)
 		return (statdesc.desc);
 	else
 		return (NULL);
 }
 
 int
 vm_get_x2apic_state(struct vmctx *ctx, int vcpu, enum x2apic_state *state)
 {
 	int error;
 	struct vm_x2apic x2apic;
 
 	bzero(&x2apic, sizeof(x2apic));
 	x2apic.cpuid = vcpu;
 
 	error = ioctl(ctx->fd, VM_GET_X2APIC_STATE, &x2apic);
 	*state = x2apic.state;
 	return (error);
 }
 
 int
 vm_set_x2apic_state(struct vmctx *ctx, int vcpu, enum x2apic_state state)
 {
 	int error;
 	struct vm_x2apic x2apic;
 
 	bzero(&x2apic, sizeof(x2apic));
 	x2apic.cpuid = vcpu;
 	x2apic.state = state;
 
 	error = ioctl(ctx->fd, VM_SET_X2APIC_STATE, &x2apic);
 
 	return (error);
 }
 
 /*
  * From Intel Vol 3a:
  * Table 9-1. IA-32 Processor States Following Power-up, Reset or INIT
  */
 int
 vcpu_reset(struct vmctx *vmctx, int vcpu)
 {
 	int error;
 	uint64_t rflags, rip, cr0, cr4, zero, desc_base, rdx;
 	uint32_t desc_access, desc_limit;
 	uint16_t sel;
 
 	zero = 0;
 
 	rflags = 0x2;
 	error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RFLAGS, rflags);
 	if (error)
 		goto done;
 
 	rip = 0xfff0;
 	if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RIP, rip)) != 0)
 		goto done;
 
 	cr0 = CR0_NE;
 	if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_CR0, cr0)) != 0)
 		goto done;
 
 	if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_CR3, zero)) != 0)
 		goto done;
 	
 	cr4 = 0;
 	if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_CR4, cr4)) != 0)
 		goto done;
 
 	/*
 	 * CS: present, r/w, accessed, 16-bit, byte granularity, usable
 	 */
 	desc_base = 0xffff0000;
 	desc_limit = 0xffff;
 	desc_access = 0x0093;
 	error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_CS,
 			    desc_base, desc_limit, desc_access);
 	if (error)
 		goto done;
 
 	sel = 0xf000;
 	if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_CS, sel)) != 0)
 		goto done;
 
 	/*
 	 * SS,DS,ES,FS,GS: present, r/w, accessed, 16-bit, byte granularity
 	 */
 	desc_base = 0;
 	desc_limit = 0xffff;
 	desc_access = 0x0093;
 	error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_SS,
 			    desc_base, desc_limit, desc_access);
 	if (error)
 		goto done;
 
 	error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_DS,
 			    desc_base, desc_limit, desc_access);
 	if (error)
 		goto done;
 
 	error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_ES,
 			    desc_base, desc_limit, desc_access);
 	if (error)
 		goto done;
 
 	error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_FS,
 			    desc_base, desc_limit, desc_access);
 	if (error)
 		goto done;
 
 	error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_GS,
 			    desc_base, desc_limit, desc_access);
 	if (error)
 		goto done;
 
 	sel = 0;
 	if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_SS, sel)) != 0)
 		goto done;
 	if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_DS, sel)) != 0)
 		goto done;
 	if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_ES, sel)) != 0)
 		goto done;
 	if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_FS, sel)) != 0)
 		goto done;
 	if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_GS, sel)) != 0)
 		goto done;
 
 	/* General purpose registers */
 	rdx = 0xf00;
 	if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RAX, zero)) != 0)
 		goto done;
 	if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RBX, zero)) != 0)
 		goto done;
 	if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RCX, zero)) != 0)
 		goto done;
 	if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RDX, rdx)) != 0)
 		goto done;
 	if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RSI, zero)) != 0)
 		goto done;
 	if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RDI, zero)) != 0)
 		goto done;
 	if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RBP, zero)) != 0)
 		goto done;
 	if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RSP, zero)) != 0)
 		goto done;
 
 	/* GDTR, IDTR */
 	desc_base = 0;
 	desc_limit = 0xffff;
 	desc_access = 0;
 	error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_GDTR,
 			    desc_base, desc_limit, desc_access);
 	if (error != 0)
 		goto done;
 
 	error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_IDTR,
 			    desc_base, desc_limit, desc_access);
 	if (error != 0)
 		goto done;
 
 	/* TR */
 	desc_base = 0;
 	desc_limit = 0xffff;
 	desc_access = 0x0000008b;
 	error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_TR, 0, 0, desc_access);
 	if (error)
 		goto done;
 
 	sel = 0;
 	if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_TR, sel)) != 0)
 		goto done;
 
 	/* LDTR */
 	desc_base = 0;
 	desc_limit = 0xffff;
 	desc_access = 0x00000082;
 	error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_LDTR, desc_base,
 			    desc_limit, desc_access);
 	if (error)
 		goto done;
 
 	sel = 0;
 	if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_LDTR, 0)) != 0)
 		goto done;
 
 	/* XXX cr2, debug registers */
 
 	error = 0;
 done:
 	return (error);
 }
 
 int
 vm_get_gpa_pmap(struct vmctx *ctx, uint64_t gpa, uint64_t *pte, int *num)
 {
 	int error, i;
 	struct vm_gpa_pte gpapte;
 
 	bzero(&gpapte, sizeof(gpapte));
 	gpapte.gpa = gpa;
 
 	error = ioctl(ctx->fd, VM_GET_GPA_PMAP, &gpapte);
 
 	if (error == 0) {
 		*num = gpapte.ptenum;
 		for (i = 0; i < gpapte.ptenum; i++)
 			pte[i] = gpapte.pte[i];
 	}
 
 	return (error);
 }
 
 int
 vm_get_hpet_capabilities(struct vmctx *ctx, uint32_t *capabilities)
 {
 	int error;
 	struct vm_hpet_cap cap;
 
 	bzero(&cap, sizeof(struct vm_hpet_cap));
 	error = ioctl(ctx->fd, VM_GET_HPET_CAPABILITIES, &cap);
 	if (capabilities != NULL)
 		*capabilities = cap.capabilities;
 	return (error);
 }
 
 static int
 gla2gpa(struct vmctx *ctx, int vcpu, struct vm_guest_paging *paging,
     uint64_t gla, int prot, int *fault, uint64_t *gpa)
 {
 	struct vm_gla2gpa gg;
 	int error;
 
 	bzero(&gg, sizeof(struct vm_gla2gpa));
 	gg.vcpuid = vcpu;
 	gg.prot = prot;
 	gg.gla = gla;
 	gg.paging = *paging;
 
 	error = ioctl(ctx->fd, VM_GLA2GPA, &gg);
 	if (error == 0) {
 		*fault = gg.fault;
 		*gpa = gg.gpa;
 	}
 	return (error);
 }
 
 #ifndef min
 #define	min(a,b)	(((a) < (b)) ? (a) : (b))
 #endif
 
 int
 vm_copy_setup(struct vmctx *ctx, int vcpu, struct vm_guest_paging *paging,
     uint64_t gla, size_t len, int prot, struct iovec *iov, int iovcnt)
 {
 	uint64_t gpa;
 	int error, fault, i, n, off;
 
 	for (i = 0; i < iovcnt; i++) {
 		iov[i].iov_base = 0;
 		iov[i].iov_len = 0;
 	}
 
 	while (len) {
 		assert(iovcnt > 0);
 		error = gla2gpa(ctx, vcpu, paging, gla, prot, &fault, &gpa);
 		if (error)
 			return (-1);
 		if (fault)
 			return (1);
 
 		off = gpa & PAGE_MASK;
 		n = min(len, PAGE_SIZE - off);
 
 		iov->iov_base = (void *)gpa;
 		iov->iov_len = n;
 		iov++;
 		iovcnt--;
 
 		gla += n;
 		len -= n;
 	}
 	return (0);
 }
 
 void
 vm_copyin(struct vmctx *ctx, int vcpu, struct iovec *iov, void *vp, size_t len)
 {
 	const char *src;
 	char *dst;
 	uint64_t gpa;
 	size_t n;
 
 	dst = vp;
 	while (len) {
 		assert(iov->iov_len);
 		gpa = (uint64_t)iov->iov_base;
 		n = min(len, iov->iov_len);
 		src = vm_map_gpa(ctx, gpa, n);
 		bcopy(src, dst, n);
 
 		iov++;
 		dst += n;
 		len -= n;
 	}
 }
 
 void
 vm_copyout(struct vmctx *ctx, int vcpu, const void *vp, struct iovec *iov,
     size_t len)
 {
 	const char *src;
 	char *dst;
 	uint64_t gpa;
 	size_t n;
 
 	src = vp;
 	while (len) {
 		assert(iov->iov_len);
 		gpa = (uint64_t)iov->iov_base;
 		n = min(len, iov->iov_len);
 		dst = vm_map_gpa(ctx, gpa, n);
 		bcopy(src, dst, n);
 
 		iov++;
 		src += n;
 		len -= n;
 	}
 }
 
 static int
 vm_get_cpus(struct vmctx *ctx, int which, cpuset_t *cpus)
 {
 	struct vm_cpuset vm_cpuset;
 	int error;
 
 	bzero(&vm_cpuset, sizeof(struct vm_cpuset));
 	vm_cpuset.which = which;
 	vm_cpuset.cpusetsize = sizeof(cpuset_t);
 	vm_cpuset.cpus = cpus;
 
 	error = ioctl(ctx->fd, VM_GET_CPUS, &vm_cpuset);
 	return (error);
 }
 
 int
 vm_active_cpus(struct vmctx *ctx, cpuset_t *cpus)
 {
 
 	return (vm_get_cpus(ctx, VM_ACTIVE_CPUS, cpus));
 }
 
 int
 vm_suspended_cpus(struct vmctx *ctx, cpuset_t *cpus)
 {
 
 	return (vm_get_cpus(ctx, VM_SUSPENDED_CPUS, cpus));
 }
 
 int
 vm_activate_cpu(struct vmctx *ctx, int vcpu)
 {
 	struct vm_activate_cpu ac;
 	int error;
 
 	bzero(&ac, sizeof(struct vm_activate_cpu));
 	ac.vcpuid = vcpu;
 	error = ioctl(ctx->fd, VM_ACTIVATE_CPU, &ac);
 	return (error);
 }
 
 int
 vm_get_intinfo(struct vmctx *ctx, int vcpu, uint64_t *info1, uint64_t *info2)
 {
 	struct vm_intinfo vmii;
 	int error;
 
 	bzero(&vmii, sizeof(struct vm_intinfo));
 	vmii.vcpuid = vcpu;
 	error = ioctl(ctx->fd, VM_GET_INTINFO, &vmii);
 	if (error == 0) {
 		*info1 = vmii.info1;
 		*info2 = vmii.info2;
 	}
 	return (error);
 }
 
 int
 vm_set_intinfo(struct vmctx *ctx, int vcpu, uint64_t info1)
 {
 	struct vm_intinfo vmii;
 	int error;
 
 	bzero(&vmii, sizeof(struct vm_intinfo));
 	vmii.vcpuid = vcpu;
 	vmii.info1 = info1;
 	error = ioctl(ctx->fd, VM_SET_INTINFO, &vmii);
 	return (error);
 }
+
+int
+vm_rtc_write(struct vmctx *ctx, int offset, uint8_t value)
+{
+	struct vm_rtc_data rtcdata;
+	int error;
+
+	bzero(&rtcdata, sizeof(struct vm_rtc_data));
+	rtcdata.offset = offset;
+	rtcdata.value = value;
+	error = ioctl(ctx->fd, VM_RTC_WRITE, &rtcdata);
+	return (error);
+}
+
+int
+vm_rtc_read(struct vmctx *ctx, int offset, uint8_t *retval)
+{
+	struct vm_rtc_data rtcdata;
+	int error;
+
+	bzero(&rtcdata, sizeof(struct vm_rtc_data));
+	rtcdata.offset = offset;
+	error = ioctl(ctx->fd, VM_RTC_READ, &rtcdata);
+	if (error == 0)
+		*retval = rtcdata.value;
+	return (error);
+}
+
+int
+vm_rtc_settime(struct vmctx *ctx, time_t secs)
+{
+	struct vm_rtc_time rtctime;
+	int error;
+
+	bzero(&rtctime, sizeof(struct vm_rtc_time));
+	rtctime.secs = secs;
+	error = ioctl(ctx->fd, VM_RTC_SETTIME, &rtctime);
+	return (error);
+}
+
+int
+vm_rtc_gettime(struct vmctx *ctx, time_t *secs)
+{
+	struct vm_rtc_time rtctime;
+	int error;
+
+	bzero(&rtctime, sizeof(struct vm_rtc_time));
+	error = ioctl(ctx->fd, VM_RTC_GETTIME, &rtctime);
+	if (error == 0)
+		*secs = rtctime.secs;
+	return (error);
+}
Index: head/lib/libvmmapi/vmmapi.h
===================================================================
--- head/lib/libvmmapi/vmmapi.h	(revision 276427)
+++ head/lib/libvmmapi/vmmapi.h	(revision 276428)
@@ -1,153 +1,159 @@
 /*-
  * Copyright (c) 2011 NetApp, Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef _VMMAPI_H_
 #define	_VMMAPI_H_
 
 #include <sys/param.h>
 #include <sys/cpuset.h>
 
 struct iovec;
 struct vmctx;
 enum x2apic_state;
 
 /*
  * Different styles of mapping the memory assigned to a VM into the address
  * space of the controlling process.
  */
 enum vm_mmap_style {
 	VM_MMAP_NONE,		/* no mapping */
 	VM_MMAP_ALL,		/* fully and statically mapped */
 	VM_MMAP_SPARSE,		/* mappings created on-demand */
 };
 
 #define	VM_MEM_F_INCORE	0x01	/* include guest memory in core file */
 
 int	vm_create(const char *name);
 struct vmctx *vm_open(const char *name);
 void	vm_destroy(struct vmctx *ctx);
 int	vm_parse_memsize(const char *optarg, size_t *memsize);
 int	vm_get_memory_seg(struct vmctx *ctx, vm_paddr_t gpa, size_t *ret_len,
 			  int *wired);
 int	vm_setup_memory(struct vmctx *ctx, size_t len, enum vm_mmap_style s);
 void	*vm_map_gpa(struct vmctx *ctx, vm_paddr_t gaddr, size_t len);
 int	vm_get_gpa_pmap(struct vmctx *, uint64_t gpa, uint64_t *pte, int *num);
 uint32_t vm_get_lowmem_limit(struct vmctx *ctx);
 void	vm_set_lowmem_limit(struct vmctx *ctx, uint32_t limit);
 void	vm_set_memflags(struct vmctx *ctx, int flags);
 size_t	vm_get_lowmem_size(struct vmctx *ctx);
 size_t	vm_get_highmem_size(struct vmctx *ctx);
 int	vm_set_desc(struct vmctx *ctx, int vcpu, int reg,
 		    uint64_t base, uint32_t limit, uint32_t access);
 int	vm_get_desc(struct vmctx *ctx, int vcpu, int reg,
 		    uint64_t *base, uint32_t *limit, uint32_t *access);
 int	vm_get_seg_desc(struct vmctx *ctx, int vcpu, int reg,
 			struct seg_desc *seg_desc);
 int	vm_set_register(struct vmctx *ctx, int vcpu, int reg, uint64_t val);
 int	vm_get_register(struct vmctx *ctx, int vcpu, int reg, uint64_t *retval);
 int	vm_run(struct vmctx *ctx, int vcpu, uint64_t rip,
 	       struct vm_exit *ret_vmexit);
 int	vm_suspend(struct vmctx *ctx, enum vm_suspend_how how);
 int	vm_reinit(struct vmctx *ctx);
 int	vm_apicid2vcpu(struct vmctx *ctx, int apicid);
 int	vm_inject_exception(struct vmctx *ctx, int vcpu, int vec);
 int	vm_inject_exception2(struct vmctx *ctx, int vcpu, int vec, int errcode);
 int	vm_lapic_irq(struct vmctx *ctx, int vcpu, int vector);
 int	vm_lapic_local_irq(struct vmctx *ctx, int vcpu, int vector);
 int	vm_lapic_msi(struct vmctx *ctx, uint64_t addr, uint64_t msg);
 int	vm_ioapic_assert_irq(struct vmctx *ctx, int irq);
 int	vm_ioapic_deassert_irq(struct vmctx *ctx, int irq);
 int	vm_ioapic_pulse_irq(struct vmctx *ctx, int irq);
 int	vm_ioapic_pincount(struct vmctx *ctx, int *pincount);
 int	vm_isa_assert_irq(struct vmctx *ctx, int atpic_irq, int ioapic_irq);
 int	vm_isa_deassert_irq(struct vmctx *ctx, int atpic_irq, int ioapic_irq);
 int	vm_isa_pulse_irq(struct vmctx *ctx, int atpic_irq, int ioapic_irq);
 int	vm_isa_set_irq_trigger(struct vmctx *ctx, int atpic_irq,
 	    enum vm_intr_trigger trigger);
 int	vm_inject_nmi(struct vmctx *ctx, int vcpu);
 int	vm_capability_name2type(const char *capname);
 const char *vm_capability_type2name(int type);
 int	vm_get_capability(struct vmctx *ctx, int vcpu, enum vm_cap_type cap,
 			  int *retval);
 int	vm_set_capability(struct vmctx *ctx, int vcpu, enum vm_cap_type cap,
 			  int val);
 int	vm_assign_pptdev(struct vmctx *ctx, int bus, int slot, int func);
 int	vm_unassign_pptdev(struct vmctx *ctx, int bus, int slot, int func);
 int	vm_map_pptdev_mmio(struct vmctx *ctx, int bus, int slot, int func,
 			   vm_paddr_t gpa, size_t len, vm_paddr_t hpa);
 int	vm_setup_pptdev_msi(struct vmctx *ctx, int vcpu, int bus, int slot,
 	    int func, uint64_t addr, uint64_t msg, int numvec);
 int	vm_setup_pptdev_msix(struct vmctx *ctx, int vcpu, int bus, int slot,
 	    int func, int idx, uint64_t addr, uint64_t msg,
 	    uint32_t vector_control);
 
 int	vm_get_intinfo(struct vmctx *ctx, int vcpu, uint64_t *i1, uint64_t *i2);
 int	vm_set_intinfo(struct vmctx *ctx, int vcpu, uint64_t exit_intinfo);
 
 /*
  * Return a pointer to the statistics buffer. Note that this is not MT-safe.
  */
 uint64_t *vm_get_stats(struct vmctx *ctx, int vcpu, struct timeval *ret_tv,
 		       int *ret_entries);
 const char *vm_get_stat_desc(struct vmctx *ctx, int index);
 
 int	vm_get_x2apic_state(struct vmctx *ctx, int vcpu, enum x2apic_state *s);
 int	vm_set_x2apic_state(struct vmctx *ctx, int vcpu, enum x2apic_state s);
 
 int	vm_get_hpet_capabilities(struct vmctx *ctx, uint32_t *capabilities);
 
 /*
  * Translate the GLA range [gla,gla+len) into GPA segments in 'iov'.
  * The 'iovcnt' should be big enough to accomodate all GPA segments.
  * Returns 0 on success, 1 on a guest fault condition and -1 otherwise.
  */
 int	vm_copy_setup(struct vmctx *ctx, int vcpu, struct vm_guest_paging *pg,
 	    uint64_t gla, size_t len, int prot, struct iovec *iov, int iovcnt);
 void	vm_copyin(struct vmctx *ctx, int vcpu, struct iovec *guest_iov,
 	    void *host_dst, size_t len);
 void	vm_copyout(struct vmctx *ctx, int vcpu, const void *host_src,
 	    struct iovec *guest_iov, size_t len);
 
+/* RTC */
+int	vm_rtc_write(struct vmctx *ctx, int offset, uint8_t value);
+int	vm_rtc_read(struct vmctx *ctx, int offset, uint8_t *retval);
+int	vm_rtc_settime(struct vmctx *ctx, time_t secs);
+int	vm_rtc_gettime(struct vmctx *ctx, time_t *secs);
+
 /* Reset vcpu register state */
 int	vcpu_reset(struct vmctx *ctx, int vcpu);
 
 int	vm_active_cpus(struct vmctx *ctx, cpuset_t *cpus);
 int	vm_suspended_cpus(struct vmctx *ctx, cpuset_t *cpus);
 int	vm_activate_cpu(struct vmctx *ctx, int vcpu);
 
 /*
  * FreeBSD specific APIs
  */
 int	vm_setup_freebsd_registers(struct vmctx *ctx, int vcpu,
 				uint64_t rip, uint64_t cr3, uint64_t gdtbase,
 				uint64_t rsp);
 int	vm_setup_freebsd_registers_i386(struct vmctx *vmctx, int vcpu,
 					uint32_t eip, uint32_t gdtbase,
 					uint32_t esp);
 void	vm_setup_freebsd_gdt(uint64_t *gdtr);
 #endif	/* _VMMAPI_H_ */
Index: head/sys/amd64/include/vmm.h
===================================================================
--- head/sys/amd64/include/vmm.h	(revision 276427)
+++ head/sys/amd64/include/vmm.h	(revision 276428)
@@ -1,630 +1,631 @@
 /*-
  * Copyright (c) 2011 NetApp, Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef _VMM_H_
 #define	_VMM_H_
 
 #include <x86/segments.h>
 
 enum vm_suspend_how {
 	VM_SUSPEND_NONE,
 	VM_SUSPEND_RESET,
 	VM_SUSPEND_POWEROFF,
 	VM_SUSPEND_HALT,
 	VM_SUSPEND_TRIPLEFAULT,
 	VM_SUSPEND_LAST
 };
 
 /*
  * Identifiers for architecturally defined registers.
  */
 enum vm_reg_name {
 	VM_REG_GUEST_RAX,
 	VM_REG_GUEST_RBX,
 	VM_REG_GUEST_RCX,
 	VM_REG_GUEST_RDX,
 	VM_REG_GUEST_RSI,
 	VM_REG_GUEST_RDI,
 	VM_REG_GUEST_RBP,
 	VM_REG_GUEST_R8,
 	VM_REG_GUEST_R9,
 	VM_REG_GUEST_R10,
 	VM_REG_GUEST_R11,
 	VM_REG_GUEST_R12,
 	VM_REG_GUEST_R13,
 	VM_REG_GUEST_R14,
 	VM_REG_GUEST_R15,
 	VM_REG_GUEST_CR0,
 	VM_REG_GUEST_CR3,
 	VM_REG_GUEST_CR4,
 	VM_REG_GUEST_DR7,
 	VM_REG_GUEST_RSP,
 	VM_REG_GUEST_RIP,
 	VM_REG_GUEST_RFLAGS,
 	VM_REG_GUEST_ES,
 	VM_REG_GUEST_CS,
 	VM_REG_GUEST_SS,
 	VM_REG_GUEST_DS,
 	VM_REG_GUEST_FS,
 	VM_REG_GUEST_GS,
 	VM_REG_GUEST_LDTR,
 	VM_REG_GUEST_TR,
 	VM_REG_GUEST_IDTR,
 	VM_REG_GUEST_GDTR,
 	VM_REG_GUEST_EFER,
 	VM_REG_GUEST_CR2,
 	VM_REG_GUEST_PDPTE0,
 	VM_REG_GUEST_PDPTE1,
 	VM_REG_GUEST_PDPTE2,
 	VM_REG_GUEST_PDPTE3,
 	VM_REG_GUEST_INTR_SHADOW,
 	VM_REG_LAST
 };
 
 enum x2apic_state {
 	X2APIC_DISABLED,
 	X2APIC_ENABLED,
 	X2APIC_STATE_LAST
 };
 
 #define	VM_INTINFO_VECTOR(info)	((info) & 0xff)
 #define	VM_INTINFO_DEL_ERRCODE	0x800
 #define	VM_INTINFO_RSVD		0x7ffff000
 #define	VM_INTINFO_VALID	0x80000000
 #define	VM_INTINFO_TYPE		0x700
 #define	VM_INTINFO_HWINTR	(0 << 8)
 #define	VM_INTINFO_NMI		(2 << 8)
 #define	VM_INTINFO_HWEXCEPTION	(3 << 8)
 #define	VM_INTINFO_SWINTR	(4 << 8)
 
 #ifdef _KERNEL
 
 #define	VM_MAX_NAMELEN	32
 
 struct vm;
 struct vm_exception;
 struct vm_memory_segment;
 struct seg_desc;
 struct vm_exit;
 struct vm_run;
 struct vhpet;
 struct vioapic;
 struct vlapic;
 struct vmspace;
 struct vm_object;
 struct vm_guest_paging;
 struct pmap;
 
 typedef int	(*vmm_init_func_t)(int ipinum);
 typedef int	(*vmm_cleanup_func_t)(void);
 typedef void	(*vmm_resume_func_t)(void);
 typedef void *	(*vmi_init_func_t)(struct vm *vm, struct pmap *pmap);
 typedef int	(*vmi_run_func_t)(void *vmi, int vcpu, register_t rip,
 				  struct pmap *pmap, void *rendezvous_cookie,
 				  void *suspend_cookie);
 typedef void	(*vmi_cleanup_func_t)(void *vmi);
 typedef int	(*vmi_get_register_t)(void *vmi, int vcpu, int num,
 				      uint64_t *retval);
 typedef int	(*vmi_set_register_t)(void *vmi, int vcpu, int num,
 				      uint64_t val);
 typedef int	(*vmi_get_desc_t)(void *vmi, int vcpu, int num,
 				  struct seg_desc *desc);
 typedef int	(*vmi_set_desc_t)(void *vmi, int vcpu, int num,
 				  struct seg_desc *desc);
 typedef int	(*vmi_get_cap_t)(void *vmi, int vcpu, int num, int *retval);
 typedef int	(*vmi_set_cap_t)(void *vmi, int vcpu, int num, int val);
 typedef struct vmspace * (*vmi_vmspace_alloc)(vm_offset_t min, vm_offset_t max);
 typedef void	(*vmi_vmspace_free)(struct vmspace *vmspace);
 typedef struct vlapic * (*vmi_vlapic_init)(void *vmi, int vcpu);
 typedef void	(*vmi_vlapic_cleanup)(void *vmi, struct vlapic *vlapic);
 
 struct vmm_ops {
 	vmm_init_func_t		init;		/* module wide initialization */
 	vmm_cleanup_func_t	cleanup;
 	vmm_resume_func_t	resume;
 
 	vmi_init_func_t		vminit;		/* vm-specific initialization */
 	vmi_run_func_t		vmrun;
 	vmi_cleanup_func_t	vmcleanup;
 	vmi_get_register_t	vmgetreg;
 	vmi_set_register_t	vmsetreg;
 	vmi_get_desc_t		vmgetdesc;
 	vmi_set_desc_t		vmsetdesc;
 	vmi_get_cap_t		vmgetcap;
 	vmi_set_cap_t		vmsetcap;
 	vmi_vmspace_alloc	vmspace_alloc;
 	vmi_vmspace_free	vmspace_free;
 	vmi_vlapic_init		vlapic_init;
 	vmi_vlapic_cleanup	vlapic_cleanup;
 };
 
 extern struct vmm_ops vmm_ops_intel;
 extern struct vmm_ops vmm_ops_amd;
 
 int vm_create(const char *name, struct vm **retvm);
 void vm_destroy(struct vm *vm);
 int vm_reinit(struct vm *vm);
 const char *vm_name(struct vm *vm);
 int vm_malloc(struct vm *vm, vm_paddr_t gpa, size_t len);
 int vm_map_mmio(struct vm *vm, vm_paddr_t gpa, size_t len, vm_paddr_t hpa);
 int vm_unmap_mmio(struct vm *vm, vm_paddr_t gpa, size_t len);
 void *vm_gpa_hold(struct vm *, vm_paddr_t gpa, size_t len, int prot,
 		  void **cookie);
 void vm_gpa_release(void *cookie);
 int vm_gpabase2memseg(struct vm *vm, vm_paddr_t gpabase,
 	      struct vm_memory_segment *seg);
 int vm_get_memobj(struct vm *vm, vm_paddr_t gpa, size_t len,
 		  vm_offset_t *offset, struct vm_object **object);
 boolean_t vm_mem_allocated(struct vm *vm, vm_paddr_t gpa);
 int vm_get_register(struct vm *vm, int vcpu, int reg, uint64_t *retval);
 int vm_set_register(struct vm *vm, int vcpu, int reg, uint64_t val);
 int vm_get_seg_desc(struct vm *vm, int vcpu, int reg,
 		    struct seg_desc *ret_desc);
 int vm_set_seg_desc(struct vm *vm, int vcpu, int reg,
 		    struct seg_desc *desc);
 int vm_run(struct vm *vm, struct vm_run *vmrun);
 int vm_suspend(struct vm *vm, enum vm_suspend_how how);
 int vm_inject_nmi(struct vm *vm, int vcpu);
 int vm_nmi_pending(struct vm *vm, int vcpuid);
 void vm_nmi_clear(struct vm *vm, int vcpuid);
 int vm_inject_extint(struct vm *vm, int vcpu);
 int vm_extint_pending(struct vm *vm, int vcpuid);
 void vm_extint_clear(struct vm *vm, int vcpuid);
 struct vlapic *vm_lapic(struct vm *vm, int cpu);
 struct vioapic *vm_ioapic(struct vm *vm);
 struct vhpet *vm_hpet(struct vm *vm);
 int vm_get_capability(struct vm *vm, int vcpu, int type, int *val);
 int vm_set_capability(struct vm *vm, int vcpu, int type, int val);
 int vm_get_x2apic_state(struct vm *vm, int vcpu, enum x2apic_state *state);
 int vm_set_x2apic_state(struct vm *vm, int vcpu, enum x2apic_state state);
 int vm_apicid2vcpuid(struct vm *vm, int apicid);
 int vm_activate_cpu(struct vm *vm, int vcpu);
 cpuset_t vm_active_cpus(struct vm *vm);
 cpuset_t vm_suspended_cpus(struct vm *vm);
 struct vm_exit *vm_exitinfo(struct vm *vm, int vcpuid);
 void vm_exit_suspended(struct vm *vm, int vcpuid, uint64_t rip);
 void vm_exit_rendezvous(struct vm *vm, int vcpuid, uint64_t rip);
 void vm_exit_astpending(struct vm *vm, int vcpuid, uint64_t rip);
 
 /*
  * Rendezvous all vcpus specified in 'dest' and execute 'func(arg)'.
  * The rendezvous 'func(arg)' is not allowed to do anything that will
  * cause the thread to be put to sleep.
  *
  * If the rendezvous is being initiated from a vcpu context then the
  * 'vcpuid' must refer to that vcpu, otherwise it should be set to -1.
  *
  * The caller cannot hold any locks when initiating the rendezvous.
  *
  * The implementation of this API may cause vcpus other than those specified
  * by 'dest' to be stalled. The caller should not rely on any vcpus making
  * forward progress when the rendezvous is in progress.
  */
 typedef void (*vm_rendezvous_func_t)(struct vm *vm, int vcpuid, void *arg);
 void vm_smp_rendezvous(struct vm *vm, int vcpuid, cpuset_t dest,
     vm_rendezvous_func_t func, void *arg);
 
 static __inline int
 vcpu_rendezvous_pending(void *rendezvous_cookie)
 {
 
 	return (*(uintptr_t *)rendezvous_cookie != 0);
 }
 
 static __inline int
 vcpu_suspended(void *suspend_cookie)
 {
 
 	return (*(int *)suspend_cookie);
 }
 
 /*
  * Return 1 if device indicated by bus/slot/func is supposed to be a
  * pci passthrough device.
  *
  * Return 0 otherwise.
  */
 int vmm_is_pptdev(int bus, int slot, int func);
 
 void *vm_iommu_domain(struct vm *vm);
 
 enum vcpu_state {
 	VCPU_IDLE,
 	VCPU_FROZEN,
 	VCPU_RUNNING,
 	VCPU_SLEEPING,
 };
 
 int vcpu_set_state(struct vm *vm, int vcpu, enum vcpu_state state,
     bool from_idle);
 enum vcpu_state vcpu_get_state(struct vm *vm, int vcpu, int *hostcpu);
 
 static int __inline
 vcpu_is_running(struct vm *vm, int vcpu, int *hostcpu)
 {
 	return (vcpu_get_state(vm, vcpu, hostcpu) == VCPU_RUNNING);
 }
 
 #ifdef _SYS_PROC_H_
 static int __inline
 vcpu_should_yield(struct vm *vm, int vcpu)
 {
 	return (curthread->td_flags & (TDF_ASTPENDING | TDF_NEEDRESCHED));
 }
 #endif
 
 void *vcpu_stats(struct vm *vm, int vcpu);
 void vcpu_notify_event(struct vm *vm, int vcpuid, bool lapic_intr);
 struct vmspace *vm_get_vmspace(struct vm *vm);
 int vm_assign_pptdev(struct vm *vm, int bus, int slot, int func);
 int vm_unassign_pptdev(struct vm *vm, int bus, int slot, int func);
 struct vatpic *vm_atpic(struct vm *vm);
 struct vatpit *vm_atpit(struct vm *vm);
 struct vpmtmr *vm_pmtmr(struct vm *vm);
+struct vrtc *vm_rtc(struct vm *vm);
 
 /*
  * Inject exception 'vme' into the guest vcpu. This function returns 0 on
  * success and non-zero on failure.
  *
  * Wrapper functions like 'vm_inject_gp()' should be preferred to calling
  * this function directly because they enforce the trap-like or fault-like
  * behavior of an exception.
  *
  * This function should only be called in the context of the thread that is
  * executing this vcpu.
  */
 int vm_inject_exception(struct vm *vm, int vcpuid, struct vm_exception *vme);
 
 /*
  * This function is called after a VM-exit that occurred during exception or
  * interrupt delivery through the IDT. The format of 'intinfo' is described
  * in Figure 15-1, "EXITINTINFO for All Intercepts", APM, Vol 2.
  *
  * If a VM-exit handler completes the event delivery successfully then it
  * should call vm_exit_intinfo() to extinguish the pending event. For e.g.,
  * if the task switch emulation is triggered via a task gate then it should
  * call this function with 'intinfo=0' to indicate that the external event
  * is not pending anymore.
  *
  * Return value is 0 on success and non-zero on failure.
  */
 int vm_exit_intinfo(struct vm *vm, int vcpuid, uint64_t intinfo);
 
 /*
  * This function is called before every VM-entry to retrieve a pending
  * event that should be injected into the guest. This function combines
  * nested events into a double or triple fault.
  *
  * Returns 0 if there are no events that need to be injected into the guest
  * and non-zero otherwise.
  */
 int vm_entry_intinfo(struct vm *vm, int vcpuid, uint64_t *info);
 
 int vm_get_intinfo(struct vm *vm, int vcpuid, uint64_t *info1, uint64_t *info2);
 
 enum vm_reg_name vm_segment_name(int seg_encoding);
 
 struct vm_copyinfo {
 	uint64_t	gpa;
 	size_t		len;
 	void		*hva;
 	void		*cookie;
 };
 
 /*
  * Set up 'copyinfo[]' to copy to/from guest linear address space starting
  * at 'gla' and 'len' bytes long. The 'prot' should be set to PROT_READ for
  * a copyin or PROT_WRITE for a copyout. 
  *
  * Returns 0 on success.
  * Returns 1 if an exception was injected into the guest.
  * Returns -1 otherwise.
  *
  * The 'copyinfo[]' can be passed to 'vm_copyin()' or 'vm_copyout()' only if
  * the return value is 0. The 'copyinfo[]' resources should be freed by calling
  * 'vm_copy_teardown()' after the copy is done.
  */
 int vm_copy_setup(struct vm *vm, int vcpuid, struct vm_guest_paging *paging,
     uint64_t gla, size_t len, int prot, struct vm_copyinfo *copyinfo,
     int num_copyinfo);
 void vm_copy_teardown(struct vm *vm, int vcpuid, struct vm_copyinfo *copyinfo,
     int num_copyinfo);
 void vm_copyin(struct vm *vm, int vcpuid, struct vm_copyinfo *copyinfo,
     void *kaddr, size_t len);
 void vm_copyout(struct vm *vm, int vcpuid, const void *kaddr,
     struct vm_copyinfo *copyinfo, size_t len);
 
 int vcpu_trace_exceptions(struct vm *vm, int vcpuid);
 #endif	/* KERNEL */
 
 #define	VM_MAXCPU	16			/* maximum virtual cpus */
 
 /*
  * Identifiers for optional vmm capabilities
  */
 enum vm_cap_type {
 	VM_CAP_HALT_EXIT,
 	VM_CAP_MTRAP_EXIT,
 	VM_CAP_PAUSE_EXIT,
 	VM_CAP_UNRESTRICTED_GUEST,
 	VM_CAP_ENABLE_INVPCID,
 	VM_CAP_MAX
 };
 
 enum vm_intr_trigger {
 	EDGE_TRIGGER,
 	LEVEL_TRIGGER
 };
 	
 /*
  * The 'access' field has the format specified in Table 21-2 of the Intel
  * Architecture Manual vol 3b.
  *
  * XXX The contents of the 'access' field are architecturally defined except
  * bit 16 - Segment Unusable.
  */
 struct seg_desc {
 	uint64_t	base;
 	uint32_t	limit;
 	uint32_t	access;
 };
 #define	SEG_DESC_TYPE(access)		((access) & 0x001f)
 #define	SEG_DESC_DPL(access)		(((access) >> 5) & 0x3)
 #define	SEG_DESC_PRESENT(access)	(((access) & 0x0080) ? 1 : 0)
 #define	SEG_DESC_DEF32(access)		(((access) & 0x4000) ? 1 : 0)
 #define	SEG_DESC_GRANULARITY(access)	(((access) & 0x8000) ? 1 : 0)
 #define	SEG_DESC_UNUSABLE(access)	(((access) & 0x10000) ? 1 : 0)
 
 enum vm_cpu_mode {
 	CPU_MODE_REAL,
 	CPU_MODE_PROTECTED,
 	CPU_MODE_COMPATIBILITY,		/* IA-32E mode (CS.L = 0) */
 	CPU_MODE_64BIT,			/* IA-32E mode (CS.L = 1) */
 };
 
 enum vm_paging_mode {
 	PAGING_MODE_FLAT,
 	PAGING_MODE_32,
 	PAGING_MODE_PAE,
 	PAGING_MODE_64,
 };
 
 struct vm_guest_paging {
 	uint64_t	cr3;
 	int		cpl;
 	enum vm_cpu_mode cpu_mode;
 	enum vm_paging_mode paging_mode;
 };
 
 /*
  * The data structures 'vie' and 'vie_op' are meant to be opaque to the
  * consumers of instruction decoding. The only reason why their contents
  * need to be exposed is because they are part of the 'vm_exit' structure.
  */
 struct vie_op {
 	uint8_t		op_byte;	/* actual opcode byte */
 	uint8_t		op_type;	/* type of operation (e.g. MOV) */
 	uint16_t	op_flags;
 };
 
 #define	VIE_INST_SIZE	15
 struct vie {
 	uint8_t		inst[VIE_INST_SIZE];	/* instruction bytes */
 	uint8_t		num_valid;		/* size of the instruction */
 	uint8_t		num_processed;
 
 	uint8_t		addrsize:4, opsize:4;	/* address and operand sizes */
 	uint8_t		rex_w:1,		/* REX prefix */
 			rex_r:1,
 			rex_x:1,
 			rex_b:1,
 			rex_present:1,
 			opsize_override:1,	/* Operand size override */
 			addrsize_override:1;	/* Address size override */
 
 	uint8_t		mod:2,			/* ModRM byte */
 			reg:4,
 			rm:4;
 
 	uint8_t		ss:2,			/* SIB byte */
 			index:4,
 			base:4;
 
 	uint8_t		disp_bytes;
 	uint8_t		imm_bytes;
 
 	uint8_t		scale;
 	int		base_register;		/* VM_REG_GUEST_xyz */
 	int		index_register;		/* VM_REG_GUEST_xyz */
 
 	int64_t		displacement;		/* optional addr displacement */
 	int64_t		immediate;		/* optional immediate operand */
 
 	uint8_t		decoded;	/* set to 1 if successfully decoded */
 
 	struct vie_op	op;			/* opcode description */
 };
 
 enum vm_exitcode {
 	VM_EXITCODE_INOUT,
 	VM_EXITCODE_VMX,
 	VM_EXITCODE_BOGUS,
 	VM_EXITCODE_RDMSR,
 	VM_EXITCODE_WRMSR,
 	VM_EXITCODE_HLT,
 	VM_EXITCODE_MTRAP,
 	VM_EXITCODE_PAUSE,
 	VM_EXITCODE_PAGING,
 	VM_EXITCODE_INST_EMUL,
 	VM_EXITCODE_SPINUP_AP,
 	VM_EXITCODE_DEPRECATED1,	/* used to be SPINDOWN_CPU */
 	VM_EXITCODE_RENDEZVOUS,
 	VM_EXITCODE_IOAPIC_EOI,
 	VM_EXITCODE_SUSPENDED,
 	VM_EXITCODE_INOUT_STR,
 	VM_EXITCODE_TASK_SWITCH,
 	VM_EXITCODE_MONITOR,
 	VM_EXITCODE_MWAIT,
 	VM_EXITCODE_SVM,
 	VM_EXITCODE_MAX
 };
 
 struct vm_inout {
 	uint16_t	bytes:3;	/* 1 or 2 or 4 */
 	uint16_t	in:1;
 	uint16_t	string:1;
 	uint16_t	rep:1;
 	uint16_t	port;
 	uint32_t	eax;		/* valid for out */
 };
 
 struct vm_inout_str {
 	struct vm_inout	inout;		/* must be the first element */
 	struct vm_guest_paging paging;
 	uint64_t	rflags;
 	uint64_t	cr0;
 	uint64_t	index;
 	uint64_t	count;		/* rep=1 (%rcx), rep=0 (1) */
 	int		addrsize;
 	enum vm_reg_name seg_name;
 	struct seg_desc seg_desc;
 };
 
 enum task_switch_reason {
 	TSR_CALL,
 	TSR_IRET,
 	TSR_JMP,
 	TSR_IDT_GATE,	/* task gate in IDT */
 };
 
 struct vm_task_switch {
 	uint16_t	tsssel;		/* new TSS selector */
 	int		ext;		/* task switch due to external event */
 	uint32_t	errcode;
 	int		errcode_valid;	/* push 'errcode' on the new stack */
 	enum task_switch_reason reason;
 	struct vm_guest_paging paging;
 };
 
 struct vm_exit {
 	enum vm_exitcode	exitcode;
 	int			inst_length;	/* 0 means unknown */
 	uint64_t		rip;
 	union {
 		struct vm_inout	inout;
 		struct vm_inout_str inout_str;
 		struct {
 			uint64_t	gpa;
 			int		fault_type;
 		} paging;
 		struct {
 			uint64_t	gpa;
 			uint64_t	gla;
 			int		cs_d;		/* CS.D */
 			struct vm_guest_paging paging;
 			struct vie	vie;
 		} inst_emul;
 		/*
 		 * VMX specific payload. Used when there is no "better"
 		 * exitcode to represent the VM-exit.
 		 */
 		struct {
 			int		status;		/* vmx inst status */
 			/*
 			 * 'exit_reason' and 'exit_qualification' are valid
 			 * only if 'status' is zero.
 			 */
 			uint32_t	exit_reason;
 			uint64_t	exit_qualification;
 			/*
 			 * 'inst_error' and 'inst_type' are valid
 			 * only if 'status' is non-zero.
 			 */
 			int		inst_type;
 			int		inst_error;
 		} vmx;
 		/*
 		 * SVM specific payload.
 		 */
 		struct {
 			uint64_t	exitcode;
 			uint64_t	exitinfo1;
 			uint64_t	exitinfo2;
 		} svm;
 		struct {
 			uint32_t	code;		/* ecx value */
 			uint64_t	wval;
 		} msr;
 		struct {
 			int		vcpu;
 			uint64_t	rip;
 		} spinup_ap;
 		struct {
 			uint64_t	rflags;
 		} hlt;
 		struct {
 			int		vector;
 		} ioapic_eoi;
 		struct {
 			enum vm_suspend_how how;
 		} suspended;
 		struct vm_task_switch task_switch;
 	} u;
 };
 
 /* APIs to inject faults into the guest */
 void vm_inject_fault(void *vm, int vcpuid, int vector, int errcode_valid,
     int errcode);
 
 static __inline void
 vm_inject_ud(void *vm, int vcpuid)
 {
 	vm_inject_fault(vm, vcpuid, IDT_UD, 0, 0);
 }
 
 static __inline void
 vm_inject_gp(void *vm, int vcpuid)
 {
 	vm_inject_fault(vm, vcpuid, IDT_GP, 1, 0);
 }
 
 static __inline void
 vm_inject_ac(void *vm, int vcpuid, int errcode)
 {
 	vm_inject_fault(vm, vcpuid, IDT_AC, 1, errcode);
 }
 
 static __inline void
 vm_inject_ss(void *vm, int vcpuid, int errcode)
 {
 	vm_inject_fault(vm, vcpuid, IDT_SS, 1, errcode);
 }
 
 void vm_inject_pf(void *vm, int vcpuid, int error_code, uint64_t cr2);
 
 #endif	/* _VMM_H_ */
Index: head/sys/amd64/include/vmm_dev.h
===================================================================
--- head/sys/amd64/include/vmm_dev.h	(revision 276427)
+++ head/sys/amd64/include/vmm_dev.h	(revision 276428)
@@ -1,339 +1,362 @@
 /*-
  * Copyright (c) 2011 NetApp, Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef	_VMM_DEV_H_
 #define	_VMM_DEV_H_
 
 #ifdef _KERNEL
 void	vmmdev_init(void);
 int	vmmdev_cleanup(void);
 #endif
 
 struct vm_memory_segment {
 	vm_paddr_t	gpa;	/* in */
 	size_t		len;
 	int		wired;
 };
 
 struct vm_register {
 	int		cpuid;
 	int		regnum;		/* enum vm_reg_name */
 	uint64_t	regval;
 };
 
 struct vm_seg_desc {			/* data or code segment */
 	int		cpuid;
 	int		regnum;		/* enum vm_reg_name */
 	struct seg_desc desc;
 };
 
 struct vm_run {
 	int		cpuid;
 	uint64_t	rip;		/* start running here */
 	struct vm_exit	vm_exit;
 };
 
 struct vm_exception {
 	int		cpuid;
 	int		vector;
 	uint32_t	error_code;
 	int		error_code_valid;
 };
 
 struct vm_lapic_msi {
 	uint64_t	msg;
 	uint64_t	addr;
 };
 
 struct vm_lapic_irq {
 	int		cpuid;
 	int		vector;
 };
 
 struct vm_ioapic_irq {
 	int		irq;
 };
 
 struct vm_isa_irq {
 	int		atpic_irq;
 	int		ioapic_irq;
 };
 
 struct vm_isa_irq_trigger {
 	int		atpic_irq;
 	enum vm_intr_trigger trigger;
 };
 
 struct vm_capability {
 	int		cpuid;
 	enum vm_cap_type captype;
 	int		capval;
 	int		allcpus;
 };
 
 struct vm_pptdev {
 	int		bus;
 	int		slot;
 	int		func;
 };
 
 struct vm_pptdev_mmio {
 	int		bus;
 	int		slot;
 	int		func;
 	vm_paddr_t	gpa;
 	vm_paddr_t	hpa;
 	size_t		len;
 };
 
 struct vm_pptdev_msi {
 	int		vcpu;
 	int		bus;
 	int		slot;
 	int		func;
 	int		numvec;		/* 0 means disabled */
 	uint64_t	msg;
 	uint64_t	addr;
 };
 
 struct vm_pptdev_msix {
 	int		vcpu;
 	int		bus;
 	int		slot;
 	int		func;
 	int		idx;
 	uint64_t	msg;
 	uint32_t	vector_control;
 	uint64_t	addr;
 };
 
 struct vm_nmi {
 	int		cpuid;
 };
 
 #define	MAX_VM_STATS	64
 struct vm_stats {
 	int		cpuid;				/* in */
 	int		num_entries;			/* out */
 	struct timeval	tv;
 	uint64_t	statbuf[MAX_VM_STATS];
 };
 
 struct vm_stat_desc {
 	int		index;				/* in */
 	char		desc[128];			/* out */
 };
 
 struct vm_x2apic {
 	int			cpuid;
 	enum x2apic_state	state;
 };
 
 struct vm_gpa_pte {
 	uint64_t	gpa;				/* in */
 	uint64_t	pte[4];				/* out */
 	int		ptenum;
 };
 
 struct vm_hpet_cap {
 	uint32_t	capabilities;	/* lower 32 bits of HPET capabilities */
 };
 
 struct vm_suspend {
 	enum vm_suspend_how how;
 };
 
 struct vm_gla2gpa {
 	int		vcpuid;		/* inputs */
 	int 		prot;		/* PROT_READ or PROT_WRITE */
 	uint64_t	gla;
 	struct vm_guest_paging paging;
 	int		fault;		/* outputs */
 	uint64_t	gpa;
 };
 
 struct vm_activate_cpu {
 	int		vcpuid;
 };
 
 struct vm_cpuset {
 	int		which;
 	int		cpusetsize;
 	cpuset_t	*cpus;
 };
 #define	VM_ACTIVE_CPUS		0
 #define	VM_SUSPENDED_CPUS	1
 
 struct vm_intinfo {
 	int		vcpuid;
 	uint64_t	info1;
 	uint64_t	info2;
 };
 
+struct vm_rtc_time {
+	time_t		secs;
+};
+
+struct vm_rtc_data {
+	int		offset;
+	uint8_t		value;
+};
+
 enum {
 	/* general routines */
 	IOCNUM_ABIVERS = 0,
 	IOCNUM_RUN = 1,
 	IOCNUM_SET_CAPABILITY = 2,
 	IOCNUM_GET_CAPABILITY = 3,
 	IOCNUM_SUSPEND = 4,
 	IOCNUM_REINIT = 5,
 
 	/* memory apis */
 	IOCNUM_MAP_MEMORY = 10,
 	IOCNUM_GET_MEMORY_SEG = 11,
 	IOCNUM_GET_GPA_PMAP = 12,
 	IOCNUM_GLA2GPA = 13,
 
 	/* register/state accessors */
 	IOCNUM_SET_REGISTER = 20,
 	IOCNUM_GET_REGISTER = 21,
 	IOCNUM_SET_SEGMENT_DESCRIPTOR = 22,
 	IOCNUM_GET_SEGMENT_DESCRIPTOR = 23,
 
 	/* interrupt injection */
 	IOCNUM_GET_INTINFO = 28,
 	IOCNUM_SET_INTINFO = 29,
 	IOCNUM_INJECT_EXCEPTION = 30,
 	IOCNUM_LAPIC_IRQ = 31,
 	IOCNUM_INJECT_NMI = 32,
 	IOCNUM_IOAPIC_ASSERT_IRQ = 33,
 	IOCNUM_IOAPIC_DEASSERT_IRQ = 34,
 	IOCNUM_IOAPIC_PULSE_IRQ = 35,
 	IOCNUM_LAPIC_MSI = 36,
 	IOCNUM_LAPIC_LOCAL_IRQ = 37,
 	IOCNUM_IOAPIC_PINCOUNT = 38,
 
 	/* PCI pass-thru */
 	IOCNUM_BIND_PPTDEV = 40,
 	IOCNUM_UNBIND_PPTDEV = 41,
 	IOCNUM_MAP_PPTDEV_MMIO = 42,
 	IOCNUM_PPTDEV_MSI = 43,
 	IOCNUM_PPTDEV_MSIX = 44,
 
 	/* statistics */
 	IOCNUM_VM_STATS = 50, 
 	IOCNUM_VM_STAT_DESC = 51,
 
 	/* kernel device state */
 	IOCNUM_SET_X2APIC_STATE = 60,
 	IOCNUM_GET_X2APIC_STATE = 61,
 	IOCNUM_GET_HPET_CAPABILITIES = 62,
 
 	/* legacy interrupt injection */
 	IOCNUM_ISA_ASSERT_IRQ = 80,
 	IOCNUM_ISA_DEASSERT_IRQ = 81,
 	IOCNUM_ISA_PULSE_IRQ = 82,
 	IOCNUM_ISA_SET_IRQ_TRIGGER = 83,
 
 	/* vm_cpuset */
 	IOCNUM_ACTIVATE_CPU = 90,
 	IOCNUM_GET_CPUSET = 91,
+
+	/* RTC */
+	IOCNUM_RTC_READ = 100,
+	IOCNUM_RTC_WRITE = 101,
+	IOCNUM_RTC_SETTIME = 102,
+	IOCNUM_RTC_GETTIME = 103,
 };
 
 #define	VM_RUN		\
 	_IOWR('v', IOCNUM_RUN, struct vm_run)
 #define	VM_SUSPEND	\
 	_IOW('v', IOCNUM_SUSPEND, struct vm_suspend)
 #define	VM_REINIT	\
 	_IO('v', IOCNUM_REINIT)
 #define	VM_MAP_MEMORY	\
 	_IOWR('v', IOCNUM_MAP_MEMORY, struct vm_memory_segment)
 #define	VM_GET_MEMORY_SEG \
 	_IOWR('v', IOCNUM_GET_MEMORY_SEG, struct vm_memory_segment)
 #define	VM_SET_REGISTER \
 	_IOW('v', IOCNUM_SET_REGISTER, struct vm_register)
 #define	VM_GET_REGISTER \
 	_IOWR('v', IOCNUM_GET_REGISTER, struct vm_register)
 #define	VM_SET_SEGMENT_DESCRIPTOR \
 	_IOW('v', IOCNUM_SET_SEGMENT_DESCRIPTOR, struct vm_seg_desc)
 #define	VM_GET_SEGMENT_DESCRIPTOR \
 	_IOWR('v', IOCNUM_GET_SEGMENT_DESCRIPTOR, struct vm_seg_desc)
 #define	VM_INJECT_EXCEPTION	\
 	_IOW('v', IOCNUM_INJECT_EXCEPTION, struct vm_exception)
 #define	VM_LAPIC_IRQ 		\
 	_IOW('v', IOCNUM_LAPIC_IRQ, struct vm_lapic_irq)
 #define	VM_LAPIC_LOCAL_IRQ 	\
 	_IOW('v', IOCNUM_LAPIC_LOCAL_IRQ, struct vm_lapic_irq)
 #define	VM_LAPIC_MSI		\
 	_IOW('v', IOCNUM_LAPIC_MSI, struct vm_lapic_msi)
 #define	VM_IOAPIC_ASSERT_IRQ	\
 	_IOW('v', IOCNUM_IOAPIC_ASSERT_IRQ, struct vm_ioapic_irq)
 #define	VM_IOAPIC_DEASSERT_IRQ	\
 	_IOW('v', IOCNUM_IOAPIC_DEASSERT_IRQ, struct vm_ioapic_irq)
 #define	VM_IOAPIC_PULSE_IRQ	\
 	_IOW('v', IOCNUM_IOAPIC_PULSE_IRQ, struct vm_ioapic_irq)
 #define	VM_IOAPIC_PINCOUNT	\
 	_IOR('v', IOCNUM_IOAPIC_PINCOUNT, int)
 #define	VM_ISA_ASSERT_IRQ	\
 	_IOW('v', IOCNUM_ISA_ASSERT_IRQ, struct vm_isa_irq)
 #define	VM_ISA_DEASSERT_IRQ	\
 	_IOW('v', IOCNUM_ISA_DEASSERT_IRQ, struct vm_isa_irq)
 #define	VM_ISA_PULSE_IRQ	\
 	_IOW('v', IOCNUM_ISA_PULSE_IRQ, struct vm_isa_irq)
 #define	VM_ISA_SET_IRQ_TRIGGER	\
 	_IOW('v', IOCNUM_ISA_SET_IRQ_TRIGGER, struct vm_isa_irq_trigger)
 #define	VM_SET_CAPABILITY \
 	_IOW('v', IOCNUM_SET_CAPABILITY, struct vm_capability)
 #define	VM_GET_CAPABILITY \
 	_IOWR('v', IOCNUM_GET_CAPABILITY, struct vm_capability)
 #define	VM_BIND_PPTDEV \
 	_IOW('v', IOCNUM_BIND_PPTDEV, struct vm_pptdev)
 #define	VM_UNBIND_PPTDEV \
 	_IOW('v', IOCNUM_UNBIND_PPTDEV, struct vm_pptdev)
 #define	VM_MAP_PPTDEV_MMIO \
 	_IOW('v', IOCNUM_MAP_PPTDEV_MMIO, struct vm_pptdev_mmio)
 #define	VM_PPTDEV_MSI \
 	_IOW('v', IOCNUM_PPTDEV_MSI, struct vm_pptdev_msi)
 #define	VM_PPTDEV_MSIX \
 	_IOW('v', IOCNUM_PPTDEV_MSIX, struct vm_pptdev_msix)
 #define VM_INJECT_NMI \
 	_IOW('v', IOCNUM_INJECT_NMI, struct vm_nmi)
 #define	VM_STATS \
 	_IOWR('v', IOCNUM_VM_STATS, struct vm_stats)
 #define	VM_STAT_DESC \
 	_IOWR('v', IOCNUM_VM_STAT_DESC, struct vm_stat_desc)
 #define	VM_SET_X2APIC_STATE \
 	_IOW('v', IOCNUM_SET_X2APIC_STATE, struct vm_x2apic)
 #define	VM_GET_X2APIC_STATE \
 	_IOWR('v', IOCNUM_GET_X2APIC_STATE, struct vm_x2apic)
 #define	VM_GET_HPET_CAPABILITIES \
 	_IOR('v', IOCNUM_GET_HPET_CAPABILITIES, struct vm_hpet_cap)
 #define	VM_GET_GPA_PMAP \
 	_IOWR('v', IOCNUM_GET_GPA_PMAP, struct vm_gpa_pte)
 #define	VM_GLA2GPA	\
 	_IOWR('v', IOCNUM_GLA2GPA, struct vm_gla2gpa)
 #define	VM_ACTIVATE_CPU	\
 	_IOW('v', IOCNUM_ACTIVATE_CPU, struct vm_activate_cpu)
 #define	VM_GET_CPUS	\
 	_IOW('v', IOCNUM_GET_CPUSET, struct vm_cpuset)
 #define	VM_SET_INTINFO	\
 	_IOW('v', IOCNUM_SET_INTINFO, struct vm_intinfo)
 #define	VM_GET_INTINFO	\
 	_IOWR('v', IOCNUM_GET_INTINFO, struct vm_intinfo)
+#define VM_RTC_WRITE \
+	_IOW('v', IOCNUM_RTC_WRITE, struct vm_rtc_data)
+#define VM_RTC_READ \
+	_IOWR('v', IOCNUM_RTC_READ, struct vm_rtc_data)
+#define VM_RTC_SETTIME	\
+	_IOW('v', IOCNUM_RTC_SETTIME, struct vm_rtc_time)
+#define VM_RTC_GETTIME	\
+	_IOR('v', IOCNUM_RTC_GETTIME, struct vm_rtc_time)
 #endif
Index: head/sys/amd64/vmm/io/vhpet.c
===================================================================
--- head/sys/amd64/vmm/io/vhpet.c	(revision 276427)
+++ head/sys/amd64/vmm/io/vhpet.c	(revision 276428)
@@ -1,812 +1,760 @@
 /*-
  * Copyright (c) 2013 Tycho Nightingale <tycho.nightingale@pluribusnetworks.com>
  * Copyright (c) 2013 Neel Natu <neel@freebsd.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/kernel.h>
 #include <sys/malloc.h>
 #include <sys/systm.h>
 #include <sys/cpuset.h>
 
 #include <dev/acpica/acpi_hpet.h>
 
 #include <machine/vmm.h>
 #include <machine/vmm_dev.h>
 
 #include "vmm_lapic.h"
 #include "vatpic.h"
 #include "vioapic.h"
 #include "vhpet.h"
 
 #include "vmm_ktr.h"
 
 static MALLOC_DEFINE(M_VHPET, "vhpet", "bhyve virtual hpet");
 
 #define	HPET_FREQ	10000000		/* 10.0 Mhz */
 #define	FS_PER_S	1000000000000000ul
 
 /* Timer N Configuration and Capabilities Register */
 #define	HPET_TCAP_RO_MASK	(HPET_TCAP_INT_ROUTE 	|		\
 				 HPET_TCAP_FSB_INT_DEL	|		\
 				 HPET_TCAP_SIZE		|		\
 				 HPET_TCAP_PER_INT)
 /*
  * HPET requires at least 3 timers and up to 32 timers per block.
  */
 #define	VHPET_NUM_TIMERS	8
 CTASSERT(VHPET_NUM_TIMERS >= 3 && VHPET_NUM_TIMERS <= 32);
 
 struct vhpet_callout_arg {
 	struct vhpet *vhpet;
 	int timer_num;
 };
 
 struct vhpet {
 	struct vm	*vm;
 	struct mtx	mtx;
 	sbintime_t	freq_sbt;
 
 	uint64_t	config;		/* Configuration */
 	uint64_t	isr;		/* Interrupt Status */
 	uint32_t	countbase;	/* HPET counter base value */
 	sbintime_t	countbase_sbt;	/* uptime corresponding to base value */
 
 	struct {
 		uint64_t	cap_config;	/* Configuration */
 		uint64_t	msireg;		/* FSB interrupt routing */
 		uint32_t	compval;	/* Comparator */
 		uint32_t	comprate;
 		struct callout	callout;
 		sbintime_t	callout_sbt;	/* time when counter==compval */
 		struct vhpet_callout_arg arg;
 	} timer[VHPET_NUM_TIMERS];
 };
 
 #define	VHPET_LOCK(vhp)		mtx_lock(&((vhp)->mtx))
 #define	VHPET_UNLOCK(vhp)	mtx_unlock(&((vhp)->mtx))
 
 static void vhpet_start_timer(struct vhpet *vhpet, int n, uint32_t counter,
     sbintime_t now);
 
 static uint64_t
 vhpet_capabilities(void)
 {
 	uint64_t cap = 0;
 
 	cap |= 0x8086 << 16;			/* vendor id */
-	cap |= HPET_CAP_LEG_RT;			/* legacy routing capable */
 	cap |= (VHPET_NUM_TIMERS - 1) << 8;	/* number of timers */
 	cap |= 1;				/* revision */
 	cap &= ~HPET_CAP_COUNT_SIZE;		/* 32-bit timer */
 
 	cap &= 0xffffffff;
 	cap |= (FS_PER_S / HPET_FREQ) << 32;	/* tick period in fs */
 
 	return (cap);
 }
 
 static __inline bool
 vhpet_counter_enabled(struct vhpet *vhpet)
 {
 
 	return ((vhpet->config & HPET_CNF_ENABLE) ? true : false);
 }
 
 static __inline bool
 vhpet_timer_msi_enabled(struct vhpet *vhpet, int n)
 {
 	const uint64_t msi_enable = HPET_TCAP_FSB_INT_DEL | HPET_TCNF_FSB_EN;
 
-	/*
-	 * LegacyReplacement Route configuration takes precedence over MSI
-	 * for timers 0 and 1.
-	 */
-	if (n == 0 || n == 1) {
-		if (vhpet->config & HPET_CNF_LEG_RT)
-			return (false);
-	}
-
 	if ((vhpet->timer[n].cap_config & msi_enable) == msi_enable)
 		return (true);
 	else
 		return (false);
 }
 
 static __inline int
 vhpet_timer_ioapic_pin(struct vhpet *vhpet, int n)
 {
 	/*
 	 * If the timer is configured to use MSI then treat it as if the
 	 * timer is not connected to the ioapic.
 	 */
 	if (vhpet_timer_msi_enabled(vhpet, n))
 		return (0);
 
-	if (vhpet->config & HPET_CNF_LEG_RT) {
-		/*
-		 * In "legacy routing" timers 0 and 1 are connected to
-		 * ioapic pins 2 and 8 respectively.
-		 */
-		switch (n) {
-		case 0:
-			return (2);
-		case 1:
-			return (8);
-		}
-	}
-
 	return ((vhpet->timer[n].cap_config & HPET_TCNF_INT_ROUTE) >> 9);
 }
 
-static __inline int
-vhpet_timer_atpic_pin(struct vhpet *vhpet, int n)
-{
-	if (vhpet->config & HPET_CNF_LEG_RT) {
-		/*
-		 * In "legacy routing" timers 0 and 1 are connected to
-		 * 8259 master pin 0 and slave pin 0 respectively.
-		 */
-		switch (n) {
-		case 0:
-			return (0);
-		case 1:
-			return (8);
-		}
-	}
-
-	return (-1);
-}
-
 static uint32_t
 vhpet_counter(struct vhpet *vhpet, sbintime_t *nowptr)
 {
 	uint32_t val;
 	sbintime_t now, delta;
 
 	val = vhpet->countbase;
 	if (vhpet_counter_enabled(vhpet)) {
 		now = sbinuptime();
 		delta = now - vhpet->countbase_sbt;
 		KASSERT(delta >= 0, ("vhpet_counter: uptime went backwards: "
 		    "%#lx to %#lx", vhpet->countbase_sbt, now));
 		val += delta / vhpet->freq_sbt;
 		if (nowptr != NULL)
 			*nowptr = now;
 	} else {
 		/*
 		 * The sbinuptime corresponding to the 'countbase' is
 		 * meaningless when the counter is disabled. Make sure
 		 * that the the caller doesn't want to use it.
 		 */
 		KASSERT(nowptr == NULL, ("vhpet_counter: nowptr must be NULL"));
 	}
 	return (val);
 }
 
 static void
 vhpet_timer_clear_isr(struct vhpet *vhpet, int n)
 {
-	int pin, legacy_pin;
+	int pin;
 
 	if (vhpet->isr & (1 << n)) {
 		pin = vhpet_timer_ioapic_pin(vhpet, n);
 		KASSERT(pin != 0, ("vhpet timer %d irq incorrectly routed", n));
 		vioapic_deassert_irq(vhpet->vm, pin);
-
-		legacy_pin = vhpet_timer_atpic_pin(vhpet, n);
-		if (legacy_pin != -1)
-			vatpic_deassert_irq(vhpet->vm, legacy_pin);
-
 		vhpet->isr &= ~(1 << n);
 	}
 }
 
 static __inline bool
 vhpet_periodic_timer(struct vhpet *vhpet, int n)
 {
 
 	return ((vhpet->timer[n].cap_config & HPET_TCNF_TYPE) != 0);
 }
 
 static __inline bool
 vhpet_timer_interrupt_enabled(struct vhpet *vhpet, int n)
 {
 
 	return ((vhpet->timer[n].cap_config & HPET_TCNF_INT_ENB) != 0);
 }
 
 static __inline bool
 vhpet_timer_edge_trig(struct vhpet *vhpet, int n)
 {
 
 	KASSERT(!vhpet_timer_msi_enabled(vhpet, n), ("vhpet_timer_edge_trig: "
 	    "timer %d is using MSI", n));
 
-	/* The legacy replacement interrupts are always edge triggered */
-	if (vhpet->config & HPET_CNF_LEG_RT) {
-		if (n == 0 || n == 1)
-			return (true);
-	}
-
 	if ((vhpet->timer[n].cap_config & HPET_TCNF_INT_TYPE) == 0)
 		return (true);
 	else
 		return (false);
 }
 
 static void
 vhpet_timer_interrupt(struct vhpet *vhpet, int n)
 {
-	int pin, legacy_pin;
+	int pin;
 
 	/* If interrupts are not enabled for this timer then just return. */
 	if (!vhpet_timer_interrupt_enabled(vhpet, n))
 		return;
 
 	/*
 	 * If a level triggered interrupt is already asserted then just return.
 	 */
 	if ((vhpet->isr & (1 << n)) != 0) {
 		VM_CTR1(vhpet->vm, "hpet t%d intr is already asserted", n);
 		return;
 	}
 
 	if (vhpet_timer_msi_enabled(vhpet, n)) {
 		lapic_intr_msi(vhpet->vm, vhpet->timer[n].msireg >> 32,
 		    vhpet->timer[n].msireg & 0xffffffff);
 		return;
 	}	
 
 	pin = vhpet_timer_ioapic_pin(vhpet, n);
 	if (pin == 0) {
 		VM_CTR1(vhpet->vm, "hpet t%d intr is not routed to ioapic", n);
 		return;
 	}
 
-	legacy_pin = vhpet_timer_atpic_pin(vhpet, n);
-
 	if (vhpet_timer_edge_trig(vhpet, n)) {
 		vioapic_pulse_irq(vhpet->vm, pin);
-		if (legacy_pin != -1)
-			vatpic_pulse_irq(vhpet->vm, legacy_pin);
 	} else {
 		vhpet->isr |= 1 << n;
 		vioapic_assert_irq(vhpet->vm, pin);
-		if (legacy_pin != -1)
-			vatpic_assert_irq(vhpet->vm, legacy_pin);
 	}
 }
 
 static void
 vhpet_adjust_compval(struct vhpet *vhpet, int n, uint32_t counter)
 {
 	uint32_t compval, comprate, compnext;
 
 	KASSERT(vhpet->timer[n].comprate != 0, ("hpet t%d is not periodic", n));
 
 	compval = vhpet->timer[n].compval;
 	comprate = vhpet->timer[n].comprate;
 
 	/*
 	 * Calculate the comparator value to be used for the next periodic
 	 * interrupt.
 	 *
 	 * This function is commonly called from the callout handler.
 	 * In this scenario the 'counter' is ahead of 'compval'. To find
 	 * the next value to program into the accumulator we divide the
 	 * number space between 'compval' and 'counter' into 'comprate'
 	 * sized units. The 'compval' is rounded up such that is "ahead"
 	 * of 'counter'.
 	 */
 	compnext = compval + ((counter - compval) / comprate + 1) * comprate;
 
 	vhpet->timer[n].compval = compnext;
 }
 
 static void
 vhpet_handler(void *a)
 {
 	int n;
 	uint32_t counter;
 	sbintime_t now;
 	struct vhpet *vhpet;
 	struct callout *callout;
 	struct vhpet_callout_arg *arg;
 
 	arg = a;
 	vhpet = arg->vhpet;
 	n = arg->timer_num;
 	callout = &vhpet->timer[n].callout;
 
 	VM_CTR1(vhpet->vm, "hpet t%d fired", n);
 
 	VHPET_LOCK(vhpet);
 
 	if (callout_pending(callout))		/* callout was reset */
 		goto done;
 
 	if (!callout_active(callout))		/* callout was stopped */
 		goto done;
 
 	callout_deactivate(callout);
 
 	if (!vhpet_counter_enabled(vhpet))
 		panic("vhpet(%p) callout with counter disabled", vhpet);
 
 	counter = vhpet_counter(vhpet, &now);
 	vhpet_start_timer(vhpet, n, counter, now);
 	vhpet_timer_interrupt(vhpet, n);
 done:
 	VHPET_UNLOCK(vhpet);
 	return;
 }
 
 static void
 vhpet_stop_timer(struct vhpet *vhpet, int n, sbintime_t now)
 {
 
 	VM_CTR1(vhpet->vm, "hpet t%d stopped", n);
 	callout_stop(&vhpet->timer[n].callout);
 
 	/*
 	 * If the callout was scheduled to expire in the past but hasn't
 	 * had a chance to execute yet then trigger the timer interrupt
 	 * here. Failing to do so will result in a missed timer interrupt
 	 * in the guest. This is especially bad in one-shot mode because
 	 * the next interrupt has to wait for the counter to wrap around.
 	 */
 	if (vhpet->timer[n].callout_sbt < now) {
 		VM_CTR1(vhpet->vm, "hpet t%d interrupt triggered after "
 		    "stopping timer", n);
 		vhpet_timer_interrupt(vhpet, n);
 	}
 }
 
 static void
 vhpet_start_timer(struct vhpet *vhpet, int n, uint32_t counter, sbintime_t now)
 {
 	sbintime_t delta, precision;
 
 	if (vhpet->timer[n].comprate != 0)
 		vhpet_adjust_compval(vhpet, n, counter);
 	else {
 		/*
 		 * In one-shot mode it is the guest's responsibility to make
 		 * sure that the comparator value is not in the "past". The
 		 * hardware doesn't have any belt-and-suspenders to deal with
 		 * this so we don't either.
 		 */
 	}
 
 	delta = (vhpet->timer[n].compval - counter) * vhpet->freq_sbt;
 	precision = delta >> tc_precexp;
 	vhpet->timer[n].callout_sbt = now + delta;
 	callout_reset_sbt(&vhpet->timer[n].callout, vhpet->timer[n].callout_sbt,
 	    precision, vhpet_handler, &vhpet->timer[n].arg, C_ABSOLUTE);
 }
 
 static void
 vhpet_start_counting(struct vhpet *vhpet)
 {
 	int i;
 
 	vhpet->countbase_sbt = sbinuptime();
 	for (i = 0; i < VHPET_NUM_TIMERS; i++) {
 		/*
 		 * Restart the timers based on the value of the main counter
 		 * when it stopped counting.
 		 */
 		vhpet_start_timer(vhpet, i, vhpet->countbase,
 		    vhpet->countbase_sbt);
 	}
 }
 
 static void
 vhpet_stop_counting(struct vhpet *vhpet, uint32_t counter, sbintime_t now)
 {
 	int i;
 
 	vhpet->countbase = counter;
 	for (i = 0; i < VHPET_NUM_TIMERS; i++)
 		vhpet_stop_timer(vhpet, i, now);
 }
 
 static __inline void
 update_register(uint64_t *regptr, uint64_t data, uint64_t mask)
 {
 
 	*regptr &= ~mask;
 	*regptr |= (data & mask);
 }
 
 static void
 vhpet_timer_update_config(struct vhpet *vhpet, int n, uint64_t data,
     uint64_t mask)
 {
 	bool clear_isr;
 	int old_pin, new_pin;
 	uint32_t allowed_irqs;
 	uint64_t oldval, newval;
 
 	if (vhpet_timer_msi_enabled(vhpet, n) ||
 	    vhpet_timer_edge_trig(vhpet, n)) {
 		if (vhpet->isr & (1 << n))
 			panic("vhpet timer %d isr should not be asserted", n);
 	}
 	old_pin = vhpet_timer_ioapic_pin(vhpet, n);
 	oldval = vhpet->timer[n].cap_config;
 
 	newval = oldval;
 	update_register(&newval, data, mask);
 	newval &= ~(HPET_TCAP_RO_MASK | HPET_TCNF_32MODE);
 	newval |= oldval & HPET_TCAP_RO_MASK;
 
 	if (newval == oldval)
 		return;
 
 	vhpet->timer[n].cap_config = newval;
 	VM_CTR2(vhpet->vm, "hpet t%d cap_config set to 0x%016x", n, newval);
 
 	/*
 	 * Validate the interrupt routing in the HPET_TCNF_INT_ROUTE field.
 	 * If it does not match the bits set in HPET_TCAP_INT_ROUTE then set
 	 * it to the default value of 0.
 	 */
 	allowed_irqs = vhpet->timer[n].cap_config >> 32;
 	new_pin = vhpet_timer_ioapic_pin(vhpet, n);
 	if (new_pin != 0 && (allowed_irqs & (1 << new_pin)) == 0) {
 		VM_CTR3(vhpet->vm, "hpet t%d configured invalid irq %d, "
 		    "allowed_irqs 0x%08x", n, new_pin, allowed_irqs);
 		new_pin = 0;
 		vhpet->timer[n].cap_config &= ~HPET_TCNF_INT_ROUTE;
 	}
 
 	if (!vhpet_periodic_timer(vhpet, n))
 		vhpet->timer[n].comprate = 0;
 
 	/*
 	 * If the timer's ISR bit is set then clear it in the following cases:
 	 * - interrupt is disabled
 	 * - interrupt type is changed from level to edge or fsb.
 	 * - interrupt routing is changed
 	 *
 	 * This is to ensure that this timer's level triggered interrupt does
 	 * not remain asserted forever.
 	 */
 	if (vhpet->isr & (1 << n)) {
 		KASSERT(old_pin != 0, ("timer %d isr asserted to ioapic pin %d",
 		    n, old_pin));
 		if (!vhpet_timer_interrupt_enabled(vhpet, n))
 			clear_isr = true;
 		else if (vhpet_timer_msi_enabled(vhpet, n))
 			clear_isr = true;
 		else if (vhpet_timer_edge_trig(vhpet, n))
 			clear_isr = true;
 		else if (vhpet_timer_ioapic_pin(vhpet, n) != old_pin)
 			clear_isr = true;
 		else
 			clear_isr = false;
 
 		if (clear_isr) {
 			VM_CTR1(vhpet->vm, "hpet t%d isr cleared due to "
 			    "configuration change", n);
 			vioapic_deassert_irq(vhpet->vm, old_pin);
 			vhpet->isr &= ~(1 << n);
 		}
 	}
 }
 
 int
 vhpet_mmio_write(void *vm, int vcpuid, uint64_t gpa, uint64_t val, int size,
     void *arg)
 {
 	struct vhpet *vhpet;
 	uint64_t data, mask, oldval, val64;
 	uint32_t isr_clear_mask, old_compval, old_comprate, counter;
 	sbintime_t now, *nowptr;
 	int i, offset;
 
 	vhpet = vm_hpet(vm);
 	offset = gpa - VHPET_BASE;
 
 	VHPET_LOCK(vhpet);
 
 	/* Accesses to the HPET should be 4 or 8 bytes wide */
 	switch (size) {
 	case 8:
 		mask = 0xffffffffffffffff;
 		data = val;
 		break;
 	case 4:
 		mask = 0xffffffff;
 		data = val;
 		if ((offset & 0x4) != 0) {
 			mask <<= 32;
 			data <<= 32;
 		} 
 		break;
 	default:
 		VM_CTR2(vhpet->vm, "hpet invalid mmio write: "
 		    "offset 0x%08x, size %d", offset, size);
 		goto done;
 	}
 
 	/* Access to the HPET should be naturally aligned to its width */
 	if (offset & (size - 1)) {
 		VM_CTR2(vhpet->vm, "hpet invalid mmio write: "
 		    "offset 0x%08x, size %d", offset, size);
 		goto done;
 	}
 
 	if (offset == HPET_CONFIG || offset == HPET_CONFIG + 4) {
 		/*
 		 * Get the most recent value of the counter before updating
 		 * the 'config' register. If the HPET is going to be disabled
 		 * then we need to update 'countbase' with the value right
 		 * before it is disabled.
 		 */
 		nowptr = vhpet_counter_enabled(vhpet) ? &now : NULL;
 		counter = vhpet_counter(vhpet, nowptr);
 		oldval = vhpet->config;
 		update_register(&vhpet->config, data, mask);
+
+		/*
+		 * LegacyReplacement Routing is not supported so clear the
+		 * bit explicitly.
+		 */
+		vhpet->config &= ~HPET_CNF_LEG_RT;
+
 		if ((oldval ^ vhpet->config) & HPET_CNF_ENABLE) {
 			if (vhpet_counter_enabled(vhpet)) {
 				vhpet_start_counting(vhpet);
 				VM_CTR0(vhpet->vm, "hpet enabled");
 			} else {
 				vhpet_stop_counting(vhpet, counter, now);
 				VM_CTR0(vhpet->vm, "hpet disabled");
 			}
 		}
 		goto done;
 	}
 
 	if (offset == HPET_ISR || offset == HPET_ISR + 4) {
 		isr_clear_mask = vhpet->isr & data;
 		for (i = 0; i < VHPET_NUM_TIMERS; i++) {
 			if ((isr_clear_mask & (1 << i)) != 0) {
 				VM_CTR1(vhpet->vm, "hpet t%d isr cleared", i);
 				vhpet_timer_clear_isr(vhpet, i);
 			}
 		}
 		goto done;
 	}
 
 	if (offset == HPET_MAIN_COUNTER || offset == HPET_MAIN_COUNTER + 4) {
 		/* Zero-extend the counter to 64-bits before updating it */
 		val64 = vhpet_counter(vhpet, NULL);
 		update_register(&val64, data, mask);
 		vhpet->countbase = val64;
 		if (vhpet_counter_enabled(vhpet))
 			vhpet_start_counting(vhpet);
 		goto done;
 	}
 
 	for (i = 0; i < VHPET_NUM_TIMERS; i++) {
 		if (offset == HPET_TIMER_CAP_CNF(i) ||
 		    offset == HPET_TIMER_CAP_CNF(i) + 4) {
 			vhpet_timer_update_config(vhpet, i, data, mask);
 			break;
 		}
 
 		if (offset == HPET_TIMER_COMPARATOR(i) ||
 		    offset == HPET_TIMER_COMPARATOR(i) + 4) {
 			old_compval = vhpet->timer[i].compval;
 			old_comprate = vhpet->timer[i].comprate;
 			if (vhpet_periodic_timer(vhpet, i)) {
 				/*
 				 * In periodic mode writes to the comparator
 				 * change the 'compval' register only if the
 				 * HPET_TCNF_VAL_SET bit is set in the config
 				 * register.
 				 */
 				val64 = vhpet->timer[i].comprate;
 				update_register(&val64, data, mask);
 				vhpet->timer[i].comprate = val64;
 				if ((vhpet->timer[i].cap_config &
 				    HPET_TCNF_VAL_SET) != 0) {
 					vhpet->timer[i].compval = val64;
 				}
 			} else {
 				KASSERT(vhpet->timer[i].comprate == 0,
 				    ("vhpet one-shot timer %d has invalid "
 				    "rate %u", i, vhpet->timer[i].comprate));
 				val64 = vhpet->timer[i].compval;
 				update_register(&val64, data, mask);
 				vhpet->timer[i].compval = val64;
 			}
 			vhpet->timer[i].cap_config &= ~HPET_TCNF_VAL_SET;
 
 			if (vhpet->timer[i].compval != old_compval ||
 			    vhpet->timer[i].comprate != old_comprate) {
 				if (vhpet_counter_enabled(vhpet)) {
 					counter = vhpet_counter(vhpet, &now);
 					vhpet_start_timer(vhpet, i, counter,
 					    now);
 				}
 			}
 			break;
 		}
 
 		if (offset == HPET_TIMER_FSB_VAL(i) ||
 		    offset == HPET_TIMER_FSB_ADDR(i)) {
 			update_register(&vhpet->timer[i].msireg, data, mask);
 			break;
 		}
 	}
 done:
 	VHPET_UNLOCK(vhpet);
 	return (0);
 }
 
 int
 vhpet_mmio_read(void *vm, int vcpuid, uint64_t gpa, uint64_t *rval, int size,
     void *arg)
 {
 	int i, offset;
 	struct vhpet *vhpet;
 	uint64_t data;
 
 	vhpet = vm_hpet(vm);
 	offset = gpa - VHPET_BASE;
 
 	VHPET_LOCK(vhpet);
 
 	/* Accesses to the HPET should be 4 or 8 bytes wide */
 	if (size != 4 && size != 8) {
 		VM_CTR2(vhpet->vm, "hpet invalid mmio read: "
 		    "offset 0x%08x, size %d", offset, size);
 		data = 0;
 		goto done;
 	}
 
 	/* Access to the HPET should be naturally aligned to its width */
 	if (offset & (size - 1)) {
 		VM_CTR2(vhpet->vm, "hpet invalid mmio read: "
 		    "offset 0x%08x, size %d", offset, size);
 		data = 0;
 		goto done;
 	}
 
 	if (offset == HPET_CAPABILITIES || offset == HPET_CAPABILITIES + 4) {
 		data = vhpet_capabilities();
 		goto done;	
 	}
 
 	if (offset == HPET_CONFIG || offset == HPET_CONFIG + 4) {
 		data = vhpet->config;
 		goto done;
 	}
 
 	if (offset == HPET_ISR || offset == HPET_ISR + 4) {
 		data = vhpet->isr;
 		goto done;
 	}
 
 	if (offset == HPET_MAIN_COUNTER || offset == HPET_MAIN_COUNTER + 4) {
 		data = vhpet_counter(vhpet, NULL);
 		goto done;
 	}
 
 	for (i = 0; i < VHPET_NUM_TIMERS; i++) {
 		if (offset == HPET_TIMER_CAP_CNF(i) ||
 		    offset == HPET_TIMER_CAP_CNF(i) + 4) {
 			data = vhpet->timer[i].cap_config;
 			break;
 		}
 
 		if (offset == HPET_TIMER_COMPARATOR(i) ||
 		    offset == HPET_TIMER_COMPARATOR(i) + 4) {
 			data = vhpet->timer[i].compval;
 			break;
 		}
 
 		if (offset == HPET_TIMER_FSB_VAL(i) ||
 		    offset == HPET_TIMER_FSB_ADDR(i)) {
 			data = vhpet->timer[i].msireg;
 			break;
 		}
 	}
 
 	if (i >= VHPET_NUM_TIMERS)
 		data = 0;
 done:
 	VHPET_UNLOCK(vhpet);
 
 	if (size == 4) {
 		if (offset & 0x4)
 			data >>= 32;
 	}
 	*rval = data;
 	return (0);
 }
 
 struct vhpet *
 vhpet_init(struct vm *vm)
 {
 	int i, pincount;
 	struct vhpet *vhpet;
 	uint64_t allowed_irqs;
 	struct vhpet_callout_arg *arg;
 	struct bintime bt;
 
 	vhpet = malloc(sizeof(struct vhpet), M_VHPET, M_WAITOK | M_ZERO);
         vhpet->vm = vm;
 	mtx_init(&vhpet->mtx, "vhpet lock", NULL, MTX_DEF);
 
 	FREQ2BT(HPET_FREQ, &bt);
 	vhpet->freq_sbt = bttosbt(bt);
 
 	pincount = vioapic_pincount(vm);
 	if (pincount >= 24)
 		allowed_irqs = 0x00f00000;	/* irqs 20, 21, 22 and 23 */
 	else
 		allowed_irqs = 0;
 
 	/*
 	 * Initialize HPET timer hardware state.
 	 */
 	for (i = 0; i < VHPET_NUM_TIMERS; i++) {
 		vhpet->timer[i].cap_config = allowed_irqs << 32;
 		vhpet->timer[i].cap_config |= HPET_TCAP_PER_INT;
 		vhpet->timer[i].cap_config |= HPET_TCAP_FSB_INT_DEL;
 
 		vhpet->timer[i].compval = 0xffffffff;
 		callout_init(&vhpet->timer[i].callout, 1);
 
 		arg = &vhpet->timer[i].arg;
 		arg->vhpet = vhpet;
 		arg->timer_num = i;
 	}
 
 	return (vhpet);
 }
 
 void
 vhpet_cleanup(struct vhpet *vhpet)
 {
 	int i;
 
 	for (i = 0; i < VHPET_NUM_TIMERS; i++)
 		callout_drain(&vhpet->timer[i].callout);
 
 	free(vhpet, M_VHPET);
 }
 
 int
 vhpet_getcap(struct vm_hpet_cap *cap)
 {
 
 	cap->capabilities = vhpet_capabilities();
 	return (0);
 }
Index: head/sys/amd64/vmm/io/vrtc.c
===================================================================
--- head/sys/amd64/vmm/io/vrtc.c	(nonexistent)
+++ head/sys/amd64/vmm/io/vrtc.c	(revision 276428)
@@ -0,0 +1,952 @@
+/*-
+ * Copyright (c) 2014, Neel Natu (neel@freebsd.org)
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice unmodified, this list of conditions, and the following
+ *    disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/queue.h>
+#include <sys/cpuset.h>
+#include <sys/kernel.h>
+#include <sys/malloc.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+#include <sys/clock.h>
+#include <sys/sysctl.h>
+
+#include <machine/vmm.h>
+
+#include <isa/rtc.h>
+
+#include "vmm_ktr.h"
+#include "vatpic.h"
+#include "vioapic.h"
+#include "vrtc.h"
+
+/* Register layout of the RTC */
+struct rtcdev {
+	uint8_t	sec;
+	uint8_t	alarm_sec;
+	uint8_t	min;
+	uint8_t	alarm_min;
+	uint8_t	hour;
+	uint8_t	alarm_hour;
+	uint8_t	day_of_week;
+	uint8_t	day_of_month;
+	uint8_t	month;
+	uint8_t	year;
+	uint8_t	reg_a;
+	uint8_t	reg_b;
+	uint8_t	reg_c;
+	uint8_t	reg_d;
+	uint8_t	nvram[128 - 14];
+} __packed;
+CTASSERT(sizeof(struct rtcdev) == 128);
+
+struct vrtc {
+	struct vm	*vm;
+	struct mtx	mtx;
+	struct callout	callout;
+	u_int		addr;		/* RTC register to read or write */
+	sbintime_t	base_uptime;
+	time_t		base_rtctime;
+	struct rtcdev	rtcdev;
+};
+
+#define	VRTC_LOCK(vrtc)		mtx_lock(&((vrtc)->mtx))
+#define	VRTC_UNLOCK(vrtc)	mtx_unlock(&((vrtc)->mtx))
+#define	VRTC_LOCKED(vrtc)	mtx_owned(&((vrtc)->mtx))
+
+/*
+ * RTC time is considered "broken" if:
+ * - RTC updates are halted by the guest
+ * - RTC date/time fields have invalid values
+ */
+#define	VRTC_BROKEN_TIME	((time_t)-1)
+
+#define	RTC_IRQ			8
+#define	RTCSB_BIN		0x04
+#define	RTCSB_ALL_INTRS		(RTCSB_UINTR | RTCSB_AINTR | RTCSB_PINTR)
+#define	rtc_halted(vrtc)	((vrtc->rtcdev.reg_b & RTCSB_HALT) != 0)
+#define	aintr_enabled(vrtc)	(((vrtc)->rtcdev.reg_b & RTCSB_AINTR) != 0)
+#define	pintr_enabled(vrtc)	(((vrtc)->rtcdev.reg_b & RTCSB_PINTR) != 0)
+#define	uintr_enabled(vrtc)	(((vrtc)->rtcdev.reg_b & RTCSB_UINTR) != 0)
+
+static void vrtc_callout_handler(void *arg);
+static void vrtc_set_reg_c(struct vrtc *vrtc, uint8_t newval);
+
+static MALLOC_DEFINE(M_VRTC, "vrtc", "bhyve virtual rtc");
+
+SYSCTL_DECL(_hw_vmm);
+SYSCTL_NODE(_hw_vmm, OID_AUTO, vrtc, CTLFLAG_RW, NULL, NULL);
+
+static int rtc_flag_broken_time = 1;
+SYSCTL_INT(_hw_vmm_vrtc, OID_AUTO, flag_broken_time, CTLFLAG_RDTUN,
+    &rtc_flag_broken_time, 0, "Stop guest when invalid RTC time is detected");
+
+static __inline bool
+divider_enabled(int reg_a)
+{
+	/*
+	 * The RTC is counting only when dividers are not held in reset.
+	 */
+	return ((reg_a & 0x70) == 0x20);
+}
+
+static __inline bool
+update_enabled(struct vrtc *vrtc)
+{
+	/*
+	 * RTC date/time can be updated only if:
+	 * - divider is not held in reset
+	 * - guest has not disabled updates
+	 * - the date/time fields have valid contents
+	 */
+	if (!divider_enabled(vrtc->rtcdev.reg_a))
+		return (false);
+
+	if (rtc_halted(vrtc))
+		return (false);
+
+	if (vrtc->base_rtctime == VRTC_BROKEN_TIME)
+		return (false);
+
+	return (true);
+}
+
+static time_t
+vrtc_curtime(struct vrtc *vrtc)
+{
+	sbintime_t now, delta;
+	time_t t;
+
+	KASSERT(VRTC_LOCKED(vrtc), ("%s: vrtc not locked", __func__));
+
+	t = vrtc->base_rtctime;
+	if (update_enabled(vrtc)) {
+		now = sbinuptime();
+		delta = now - vrtc->base_uptime;
+		KASSERT(delta >= 0, ("vrtc_curtime: uptime went backwards: "
+		    "%#lx to %#lx", vrtc->base_uptime, now));
+		t += delta / SBT_1S;
+	}
+	return (t);
+}
+
+static __inline uint8_t
+rtcset(struct rtcdev *rtc, int val)
+{
+
+	KASSERT(val >= 0 && val < 100, ("%s: invalid bin2bcd index %d",
+	    __func__, val));
+
+	return ((rtc->reg_b & RTCSB_BIN) ? val : bin2bcd_data[val]);
+}
+
+static void
+secs_to_rtc(time_t rtctime, struct vrtc *vrtc, int force_update)
+{
+	struct clocktime ct;
+	struct timespec ts;
+	struct rtcdev *rtc;
+	int hour;
+
+	KASSERT(VRTC_LOCKED(vrtc), ("%s: vrtc not locked", __func__));
+
+	if (rtctime < 0) {
+		KASSERT(rtctime == VRTC_BROKEN_TIME,
+		    ("%s: invalid vrtc time %#lx", __func__, rtctime));
+		return;
+	}
+
+	/*
+	 * If the RTC is halted then the guest has "ownership" of the
+	 * date/time fields. Don't update the RTC date/time fields in
+	 * this case (unless forced).
+	 */
+	if (rtc_halted(vrtc) && !force_update)
+		return;
+
+	ts.tv_sec = rtctime;
+	ts.tv_nsec = 0;
+	clock_ts_to_ct(&ts, &ct);
+
+	KASSERT(ct.sec >= 0 && ct.sec <= 59, ("invalid clocktime sec %d",
+	    ct.sec));
+	KASSERT(ct.min >= 0 && ct.min <= 59, ("invalid clocktime min %d",
+	    ct.min));
+	KASSERT(ct.hour >= 0 && ct.hour <= 23, ("invalid clocktime hour %d",
+	    ct.hour));
+	KASSERT(ct.dow >= 0 && ct.dow <= 6, ("invalid clocktime wday %d",
+	    ct.dow));
+	KASSERT(ct.day >= 1 && ct.day <= 31, ("invalid clocktime mday %d",
+	    ct.day));
+	KASSERT(ct.mon >= 1 && ct.mon <= 12, ("invalid clocktime month %d",
+	    ct.mon));
+	KASSERT(ct.year >= POSIX_BASE_YEAR, ("invalid clocktime year %d",
+	    ct.year));
+
+	rtc = &vrtc->rtcdev;
+	rtc->sec = rtcset(rtc, ct.sec);
+	rtc->min = rtcset(rtc, ct.min);
+
+	hour = ct.hour;
+	if ((rtc->reg_b & RTCSB_24HR) == 0)
+		hour = (hour % 12) + 1;	    /* convert to a 12-hour format */
+
+	rtc->hour = rtcset(rtc, hour);
+
+	if ((rtc->reg_b & RTCSB_24HR) == 0 && ct.hour >= 12)
+		rtc->hour |= 0x80;	    /* set MSB to indicate PM */
+
+	rtc->day_of_week = rtcset(rtc, ct.dow + 1);
+	rtc->day_of_month = rtcset(rtc, ct.day);
+	rtc->month = rtcset(rtc, ct.mon);
+	rtc->year = rtcset(rtc, ct.year % 100);
+}
+
+static int
+rtcget(struct rtcdev *rtc, int val, int *retval)
+{
+	uint8_t upper, lower;
+
+	if (rtc->reg_b & RTCSB_BIN) {
+		*retval = val;
+		return (0);
+	}
+
+	lower = val & 0xf;
+	upper = (val >> 4) & 0xf;
+
+	if (lower > 9 || upper > 9)
+		return (-1);
+
+	*retval = upper * 10 + lower;
+	return (0);
+}
+
+static time_t
+rtc_to_secs(struct vrtc *vrtc)
+{
+	struct clocktime ct;
+	struct timespec ts;
+	struct rtcdev *rtc;
+	struct vm *vm;
+	int error, hour, pm, year;
+
+	KASSERT(VRTC_LOCKED(vrtc), ("%s: vrtc not locked", __func__));
+
+	vm = vrtc->vm;
+	rtc = &vrtc->rtcdev;
+
+	bzero(&ct, sizeof(struct clocktime));
+
+	error = rtcget(rtc, rtc->sec, &ct.sec);
+	if (error || ct.sec < 0 || ct.sec > 59) {
+		VM_CTR2(vm, "Invalid RTC sec %#x/%d", rtc->sec, ct.sec);
+		goto fail;
+	}
+
+	error = rtcget(rtc, rtc->min, &ct.min);
+	if (error || ct.min < 0 || ct.min > 59) {
+		VM_CTR2(vm, "Invalid RTC min %#x/%d", rtc->min, ct.min);
+		goto fail;
+	}
+
+	pm = 0;
+	hour = rtc->hour;
+	if ((rtc->reg_b & RTCSB_24HR) == 0) {
+		if (hour & 0x80) {
+			hour &= ~0x80;
+			pm = 1;
+		}
+	}
+	error = rtcget(rtc, hour, &ct.hour);
+	if ((rtc->reg_b & RTCSB_24HR) == 0) {
+		ct.hour -= 1;
+		if (pm)
+			ct.hour += 12;
+	}
+
+	if (error || ct.hour < 0 || ct.hour > 23) {
+		VM_CTR2(vm, "Invalid RTC hour %#x/%d", rtc->hour, ct.hour);
+		goto fail;
+	}
+
+	/*
+	 * Ignore 'rtc->dow' because some guests like Linux don't bother
+	 * setting it at all while others like OpenBSD/i386 set it incorrectly. 
+	 *
+	 * clock_ct_to_ts() does not depend on 'ct.dow' anyways so ignore it.
+	 */
+	ct.dow = -1;
+
+	error = rtcget(rtc, rtc->day_of_month, &ct.day);
+	if (error || ct.day < 1 || ct.day > 31) {
+		VM_CTR2(vm, "Invalid RTC mday %#x/%d", rtc->day_of_month,
+		    ct.day);
+		goto fail;
+	}
+
+	error = rtcget(rtc, rtc->month, &ct.mon);
+	if (error || ct.mon < 1 || ct.mon > 12) {
+		VM_CTR2(vm, "Invalid RTC month %#x/%d", rtc->month, ct.mon);
+		goto fail;
+	}
+
+	error = rtcget(rtc, rtc->year, &year);
+	if (error || year < 0 || year > 99) {
+		VM_CTR2(vm, "Invalid RTC year %#x/%d", rtc->year, year);
+		goto fail;
+	}
+	if (year >= 70)
+		ct.year = 1900 + year;
+	else
+		ct.year = 2000 + year;
+
+	error = clock_ct_to_ts(&ct, &ts);
+	if (error || ts.tv_sec < 0) {
+		VM_CTR3(vm, "Invalid RTC clocktime.date %04d-%02d-%02d",
+		    ct.year, ct.mon, ct.day);
+		VM_CTR3(vm, "Invalid RTC clocktime.time %02d:%02d:%02d",
+		    ct.hour, ct.min, ct.sec);
+		goto fail;
+	}
+	return (ts.tv_sec);		/* success */
+fail:
+	return (VRTC_BROKEN_TIME);	/* failure */
+}
+
+static int
+vrtc_time_update(struct vrtc *vrtc, time_t newtime)
+{
+	struct rtcdev *rtc;
+	time_t oldtime;
+	uint8_t alarm_sec, alarm_min, alarm_hour;
+
+	KASSERT(VRTC_LOCKED(vrtc), ("%s: vrtc not locked", __func__));
+
+	rtc = &vrtc->rtcdev;
+	alarm_sec = rtc->alarm_sec;
+	alarm_min = rtc->alarm_min;
+	alarm_hour = rtc->alarm_hour;
+
+	oldtime = vrtc->base_rtctime;
+	VM_CTR2(vrtc->vm, "Updating RTC time from %#lx to %#lx",
+	    oldtime, newtime);
+
+	if (newtime == oldtime)
+		return (0);
+
+	/*
+	 * If 'newtime' indicates that RTC updates are disabled then just
+	 * record that and return. There is no need to do alarm interrupt
+	 * processing or update 'base_uptime' in this case.
+	 */
+	if (newtime == VRTC_BROKEN_TIME) {
+		vrtc->base_rtctime = VRTC_BROKEN_TIME;
+		return (0);
+	}
+
+	/*
+	 * Return an error if RTC updates are halted by the guest.
+	 */
+	if (rtc_halted(vrtc)) {
+		VM_CTR0(vrtc->vm, "RTC update halted by guest");
+		return (EBUSY);
+	}
+
+	do {
+		/*
+		 * If the alarm interrupt is enabled and 'oldtime' is valid
+		 * then visit all the seconds between 'oldtime' and 'newtime'
+		 * to check for the alarm condition.
+		 *
+		 * Otherwise move the RTC time forward directly to 'newtime'.
+		 */
+		if (aintr_enabled(vrtc) && oldtime != VRTC_BROKEN_TIME)
+			vrtc->base_rtctime++;
+		else
+			vrtc->base_rtctime = newtime;
+
+		if (aintr_enabled(vrtc)) {
+			/*
+			 * Update the RTC date/time fields before checking
+			 * if the alarm conditions are satisfied.
+			 */
+			secs_to_rtc(vrtc->base_rtctime, vrtc, 0);
+
+			if ((alarm_sec >= 0xC0 || alarm_sec == rtc->sec) &&
+			    (alarm_min >= 0xC0 || alarm_min == rtc->min) &&
+			    (alarm_hour >= 0xC0 || alarm_hour == rtc->hour)) {
+				vrtc_set_reg_c(vrtc, rtc->reg_c | RTCIR_ALARM);
+			}
+		}
+	} while (vrtc->base_rtctime != newtime);
+
+	if (uintr_enabled(vrtc))
+		vrtc_set_reg_c(vrtc, rtc->reg_c | RTCIR_UPDATE);
+
+	vrtc->base_uptime = sbinuptime();
+
+	return (0);
+}
+
+static sbintime_t
+vrtc_freq(struct vrtc *vrtc)
+{
+	int ratesel;
+
+	static sbintime_t pf[16] = {
+		0,
+		SBT_1S / 256,
+		SBT_1S / 128,
+		SBT_1S / 8192,
+		SBT_1S / 4096,
+		SBT_1S / 2048,
+		SBT_1S / 1024,
+		SBT_1S / 512,
+		SBT_1S / 256,
+		SBT_1S / 128,
+		SBT_1S / 64,
+		SBT_1S / 32,
+		SBT_1S / 16,
+		SBT_1S / 8,
+		SBT_1S / 4,
+		SBT_1S / 2,
+	};
+
+	KASSERT(VRTC_LOCKED(vrtc), ("%s: vrtc not locked", __func__));
+
+	/*
+	 * If both periodic and alarm interrupts are enabled then use the
+	 * periodic frequency to drive the callout. The minimum periodic
+	 * frequency (2 Hz) is higher than the alarm frequency (1 Hz) so
+	 * piggyback the alarm on top of it. The same argument applies to
+	 * the update interrupt.
+	 */
+	if (pintr_enabled(vrtc) && divider_enabled(vrtc->rtcdev.reg_a)) {
+		ratesel = vrtc->rtcdev.reg_a & 0xf;
+		return (pf[ratesel]);
+	} else if (aintr_enabled(vrtc) && update_enabled(vrtc)) {
+		return (SBT_1S);
+	} else if (uintr_enabled(vrtc) && update_enabled(vrtc)) {
+		return (SBT_1S);
+	} else {
+		return (0);
+	}
+}
+
+static void
+vrtc_callout_reset(struct vrtc *vrtc, sbintime_t freqsbt)
+{
+
+	KASSERT(VRTC_LOCKED(vrtc), ("%s: vrtc not locked", __func__));
+
+	if (freqsbt == 0) {
+		if (callout_active(&vrtc->callout)) {
+			VM_CTR0(vrtc->vm, "RTC callout stopped");
+			callout_stop(&vrtc->callout);
+		}
+		return;
+	}
+	VM_CTR1(vrtc->vm, "RTC callout frequency %d hz", SBT_1S / freqsbt);
+	callout_reset_sbt(&vrtc->callout, freqsbt, 0, vrtc_callout_handler,
+	    vrtc, 0);
+}
+
+static void
+vrtc_callout_handler(void *arg)
+{
+	struct vrtc *vrtc = arg;
+	sbintime_t freqsbt;
+	time_t rtctime;
+	int error;
+
+	VM_CTR0(vrtc->vm, "vrtc callout fired");
+
+	VRTC_LOCK(vrtc);
+	if (callout_pending(&vrtc->callout))	/* callout was reset */
+		goto done;
+
+	if (!callout_active(&vrtc->callout))	/* callout was stopped */
+		goto done;
+
+	callout_deactivate(&vrtc->callout);
+
+	KASSERT((vrtc->rtcdev.reg_b & RTCSB_ALL_INTRS) != 0,
+	    ("gratuitous vrtc callout"));
+
+	if (pintr_enabled(vrtc))
+		vrtc_set_reg_c(vrtc, vrtc->rtcdev.reg_c | RTCIR_PERIOD);
+
+	if (aintr_enabled(vrtc) || uintr_enabled(vrtc)) {
+		rtctime = vrtc_curtime(vrtc);
+		error = vrtc_time_update(vrtc, rtctime);
+		KASSERT(error == 0, ("%s: vrtc_time_update error %d",
+		    __func__, error));
+	}
+
+	freqsbt = vrtc_freq(vrtc);
+	KASSERT(freqsbt != 0, ("%s: vrtc frequency cannot be zero", __func__));
+	vrtc_callout_reset(vrtc, freqsbt);
+done:
+	VRTC_UNLOCK(vrtc);
+}
+
+static __inline void
+vrtc_callout_check(struct vrtc *vrtc, sbintime_t freq)
+{
+	int active;
+
+	active = callout_active(&vrtc->callout) ? 1 : 0;
+	KASSERT((freq == 0 && !active) || (freq != 0 && active),
+	    ("vrtc callout %s with frequency %#lx",
+	    active ? "active" : "inactive", freq));
+}
+
+static void
+vrtc_set_reg_c(struct vrtc *vrtc, uint8_t newval)
+{
+	struct rtcdev *rtc;
+	int oldirqf, newirqf;
+	uint8_t oldval, changed;
+
+	KASSERT(VRTC_LOCKED(vrtc), ("%s: vrtc not locked", __func__));
+
+	rtc = &vrtc->rtcdev;
+	newval &= RTCIR_ALARM | RTCIR_PERIOD | RTCIR_UPDATE;
+
+	oldirqf = rtc->reg_c & RTCIR_INT;
+	if ((aintr_enabled(vrtc) && (newval & RTCIR_ALARM) != 0) ||
+	    (pintr_enabled(vrtc) && (newval & RTCIR_PERIOD) != 0) ||
+	    (uintr_enabled(vrtc) && (newval & RTCIR_UPDATE) != 0)) {
+		newirqf = RTCIR_INT;
+	} else {
+		newirqf = 0;
+	}
+
+	oldval = rtc->reg_c;
+	rtc->reg_c = newirqf | newval;
+	changed = oldval ^ rtc->reg_c;
+	if (changed) {
+		VM_CTR2(vrtc->vm, "RTC reg_c changed from %#x to %#x",
+		    oldval, rtc->reg_c);
+	}
+
+	if (!oldirqf && newirqf) {
+		VM_CTR1(vrtc->vm, "RTC irq %d asserted", RTC_IRQ);
+		vatpic_pulse_irq(vrtc->vm, RTC_IRQ);
+		vioapic_pulse_irq(vrtc->vm, RTC_IRQ);
+	} else if (oldirqf && !newirqf) {
+		VM_CTR1(vrtc->vm, "RTC irq %d deasserted", RTC_IRQ);
+	}
+}
+
+static int
+vrtc_set_reg_b(struct vrtc *vrtc, uint8_t newval)
+{
+	struct rtcdev *rtc;
+	sbintime_t oldfreq, newfreq;
+	time_t curtime, rtctime;
+	int error;
+	uint8_t oldval, changed;
+
+	KASSERT(VRTC_LOCKED(vrtc), ("%s: vrtc not locked", __func__));
+
+	rtc = &vrtc->rtcdev;
+	oldval = rtc->reg_b;
+	oldfreq = vrtc_freq(vrtc);
+
+	rtc->reg_b = newval;
+	changed = oldval ^ newval;
+	if (changed) {
+		VM_CTR2(vrtc->vm, "RTC reg_b changed from %#x to %#x",
+		    oldval, newval);
+	}
+
+	if (changed & RTCSB_HALT) {
+		if ((newval & RTCSB_HALT) == 0) {
+			rtctime = rtc_to_secs(vrtc);
+			if (rtctime == VRTC_BROKEN_TIME) {
+				/*
+				 * Stop updating the RTC if the date/time
+				 * programmed by the guest is not correct.
+				 */
+				VM_CTR0(vrtc->vm, "Invalid RTC date/time "
+				    "programming detected");
+
+				if (rtc_flag_broken_time)
+					return (-1);
+			}
+		} else {
+			curtime = vrtc_curtime(vrtc);
+			KASSERT(curtime == vrtc->base_rtctime, ("%s: mismatch "
+			    "between vrtc basetime (%#lx) and curtime (%#lx)",
+			    __func__, vrtc->base_rtctime, curtime));
+
+			/*
+			 * Force a refresh of the RTC date/time fields so
+			 * they reflect the time right before the guest set
+			 * the HALT bit.
+			 */
+			secs_to_rtc(curtime, vrtc, 1);
+
+			/*
+			 * Updates are halted so mark 'base_rtctime' to denote
+			 * that the RTC date/time is in flux.
+			 */
+			rtctime = VRTC_BROKEN_TIME;
+			rtc->reg_b &= ~RTCSB_UINTR;
+		}
+		error = vrtc_time_update(vrtc, rtctime);
+		KASSERT(error == 0, ("vrtc_time_update error %d", error));
+	}
+
+	/*
+	 * Side effect of changes to the interrupt enable bits.
+	 */
+	if (changed & RTCSB_ALL_INTRS)
+		vrtc_set_reg_c(vrtc, vrtc->rtcdev.reg_c);
+
+	/*
+	 * Change the callout frequency if it has changed.
+	 */
+	newfreq = vrtc_freq(vrtc);
+	if (newfreq != oldfreq)
+		vrtc_callout_reset(vrtc, newfreq);
+	else
+		vrtc_callout_check(vrtc, newfreq);
+
+	/*
+	 * The side effect of bits that control the RTC date/time format
+	 * is handled lazily when those fields are actually read.
+	 */
+	return (0);
+}
+
+static void
+vrtc_set_reg_a(struct vrtc *vrtc, uint8_t newval)
+{
+	sbintime_t oldfreq, newfreq;
+	uint8_t oldval, changed;
+
+	KASSERT(VRTC_LOCKED(vrtc), ("%s: vrtc not locked", __func__));
+
+	newval &= ~RTCSA_TUP;
+	oldval = vrtc->rtcdev.reg_a;
+	oldfreq = vrtc_freq(vrtc);
+
+	if (divider_enabled(oldval) && !divider_enabled(newval)) {
+		VM_CTR2(vrtc->vm, "RTC divider held in reset at %#lx/%#lx",
+		    vrtc->base_rtctime, vrtc->base_uptime);
+	} else if (!divider_enabled(oldval) && divider_enabled(newval)) {
+		/*
+		 * If the dividers are coming out of reset then update
+		 * 'base_uptime' before this happens. This is done to
+		 * maintain the illusion that the RTC date/time was frozen
+		 * while the dividers were disabled.
+		 */
+		vrtc->base_uptime = sbinuptime();
+		VM_CTR2(vrtc->vm, "RTC divider out of reset at %#lx/%#lx",
+		    vrtc->base_rtctime, vrtc->base_uptime);
+	} else {
+		/* NOTHING */
+	}
+
+	vrtc->rtcdev.reg_a = newval;
+	changed = oldval ^ newval;
+	if (changed) {
+		VM_CTR2(vrtc->vm, "RTC reg_a changed from %#x to %#x",
+		    oldval, newval);
+	}
+
+	/*
+	 * Side effect of changes to rate select and divider enable bits.
+	 */
+	newfreq = vrtc_freq(vrtc);
+	if (newfreq != oldfreq)
+		vrtc_callout_reset(vrtc, newfreq);
+	else
+		vrtc_callout_check(vrtc, newfreq);
+}
+
+int
+vrtc_set_time(struct vm *vm, time_t secs)
+{
+	struct vrtc *vrtc;
+	int error;
+
+	vrtc = vm_rtc(vm);
+	VRTC_LOCK(vrtc);
+	error = vrtc_time_update(vrtc, secs);
+	VRTC_UNLOCK(vrtc);
+
+	if (error) {
+		VM_CTR2(vrtc->vm, "Error %d setting RTC time to %#lx", error,
+		    secs);
+	} else {
+		VM_CTR1(vrtc->vm, "RTC time set to %#lx", secs);
+	}
+
+	return (error);
+}
+
+time_t
+vrtc_get_time(struct vm *vm)
+{
+	struct vrtc *vrtc;
+	time_t t;
+
+	vrtc = vm_rtc(vm);
+	VRTC_LOCK(vrtc);
+	t = vrtc_curtime(vrtc);
+	VRTC_UNLOCK(vrtc);
+
+	return (t);
+}
+
+int
+vrtc_nvram_write(struct vm *vm, int offset, uint8_t value)
+{
+	struct vrtc *vrtc;
+	uint8_t *ptr;
+
+	vrtc = vm_rtc(vm);
+
+	/*
+	 * Don't allow writes to RTC control registers or the date/time fields.
+	 */
+	if (offset < offsetof(struct rtcdev, nvram[0]) ||
+	    offset >= sizeof(struct rtcdev)) {
+		VM_CTR1(vrtc->vm, "RTC nvram write to invalid offset %d",
+		    offset);
+		return (EINVAL);
+	}
+
+	VRTC_LOCK(vrtc);
+	ptr = (uint8_t *)(&vrtc->rtcdev);
+	ptr[offset] = value;
+	VM_CTR2(vrtc->vm, "RTC nvram write %#x to offset %#x", value, offset);
+	VRTC_UNLOCK(vrtc);
+
+	return (0);
+}
+
+int
+vrtc_nvram_read(struct vm *vm, int offset, uint8_t *retval)
+{
+	struct vrtc *vrtc;
+	time_t curtime;
+	uint8_t *ptr;
+
+	/*
+	 * Allow all offsets in the RTC to be read.
+	 */
+	if (offset < 0 || offset >= sizeof(struct rtcdev))
+		return (EINVAL);
+
+	vrtc = vm_rtc(vm);
+	VRTC_LOCK(vrtc);
+
+	/*
+	 * Update RTC date/time fields if necessary.
+	 */
+	if (offset < 10) {
+		curtime = vrtc_curtime(vrtc);
+		secs_to_rtc(curtime, vrtc, 0);
+	}
+
+	ptr = (uint8_t *)(&vrtc->rtcdev);
+	*retval = ptr[offset];
+
+	VRTC_UNLOCK(vrtc);
+	return (0);
+}
+
+int
+vrtc_addr_handler(struct vm *vm, int vcpuid, bool in, int port, int bytes,
+    uint32_t *val)
+{
+	struct vrtc *vrtc;
+
+	vrtc = vm_rtc(vm);
+
+	if (bytes != 1)
+		return (-1);
+
+	if (in) {
+		*val = 0xff;
+		return (0);
+	}
+
+	VRTC_LOCK(vrtc);
+	vrtc->addr = *val & 0x7f;
+	VRTC_UNLOCK(vrtc);
+
+	return (0);
+}
+
+int
+vrtc_data_handler(struct vm *vm, int vcpuid, bool in, int port, int bytes,
+    uint32_t *val)
+{
+	struct vrtc *vrtc;
+	struct rtcdev *rtc;
+	time_t curtime;
+	int error, offset;
+
+	vrtc = vm_rtc(vm);
+	rtc = &vrtc->rtcdev;
+
+	if (bytes != 1)
+		return (-1);
+
+	VRTC_LOCK(vrtc);
+	offset = vrtc->addr;
+	if (offset >= sizeof(struct rtcdev)) {
+		VRTC_UNLOCK(vrtc);
+		return (-1);
+	}
+
+	error = 0;
+	curtime = vrtc_curtime(vrtc);
+	vrtc_time_update(vrtc, curtime);
+
+	if (in) {
+		/*
+		 * Update RTC date/time fields if necessary.
+		 */
+		if (offset < 10)
+			secs_to_rtc(curtime, vrtc, 0);
+
+		if (offset == 12) {
+			/*
+			 * XXX
+			 * reg_c interrupt flags are updated only if the
+			 * corresponding interrupt enable bit in reg_b is set.
+			 */
+			*val = vrtc->rtcdev.reg_c;
+			vrtc_set_reg_c(vrtc, 0);
+		} else {
+			*val = *((uint8_t *)rtc + offset);
+		}
+		VCPU_CTR2(vm, vcpuid, "Read value %#x from RTC offset %#x",
+		    *val, offset);
+	} else {
+		switch (offset) {
+		case 10:
+			VCPU_CTR1(vm, vcpuid, "RTC reg_a set to %#x", *val);
+			vrtc_set_reg_a(vrtc, *val);
+			break;
+		case 11:
+			VCPU_CTR1(vm, vcpuid, "RTC reg_b set to %#x", *val);
+			error = vrtc_set_reg_b(vrtc, *val);
+			break;
+		case 12:
+			VCPU_CTR1(vm, vcpuid, "RTC reg_c set to %#x (ignored)",
+			    *val);
+			break;
+		case 13:
+			VCPU_CTR1(vm, vcpuid, "RTC reg_d set to %#x (ignored)",
+			    *val);
+			break;
+		case 0:
+			/*
+			 * High order bit of 'seconds' is readonly.
+			 */
+			*val &= 0x7f;
+			/* FALLTHRU */
+		default:
+			VCPU_CTR2(vm, vcpuid, "RTC offset %#x set to %#x",
+			    offset, *val);
+			*((uint8_t *)rtc + offset) = *val;
+			break;
+		}
+	}
+	VRTC_UNLOCK(vrtc);
+	return (error);
+}
+
+void
+vrtc_reset(struct vrtc *vrtc)
+{
+	struct rtcdev *rtc;
+
+	VRTC_LOCK(vrtc);
+
+	rtc = &vrtc->rtcdev;
+	vrtc_set_reg_b(vrtc, rtc->reg_b & ~(RTCSB_ALL_INTRS | RTCSB_SQWE));
+	vrtc_set_reg_c(vrtc, 0);
+	KASSERT(!callout_active(&vrtc->callout), ("rtc callout still active"));
+
+	VRTC_UNLOCK(vrtc);
+}
+
+struct vrtc *
+vrtc_init(struct vm *vm)
+{
+	struct vrtc *vrtc;
+	struct rtcdev *rtc;
+	time_t curtime;
+
+	vrtc = malloc(sizeof(struct vrtc), M_VRTC, M_WAITOK | M_ZERO);
+	vrtc->vm = vm;
+	mtx_init(&vrtc->mtx, "vrtc lock", NULL, MTX_DEF);
+	callout_init(&vrtc->callout, 1);
+
+	/* Allow dividers to keep time but disable everything else */
+	rtc = &vrtc->rtcdev;
+	rtc->reg_a = 0x20;
+	rtc->reg_b = RTCSB_24HR;
+	rtc->reg_c = 0;
+	rtc->reg_d = RTCSD_PWR;
+
+	/* Reset the index register to a safe value. */
+	vrtc->addr = RTC_STATUSD;
+
+	/*
+	 * Initialize RTC time to 00:00:00 Jan 1, 1970.
+	 */
+	curtime = 0;
+
+	VRTC_LOCK(vrtc);
+	vrtc->base_rtctime = VRTC_BROKEN_TIME;
+	vrtc_time_update(vrtc, curtime);
+	secs_to_rtc(curtime, vrtc, 0);
+	VRTC_UNLOCK(vrtc);
+
+	return (vrtc);
+}
+
+void
+vrtc_cleanup(struct vrtc *vrtc)
+{
+
+	callout_drain(&vrtc->callout);
+	free(vrtc, M_VRTC);
+}

Property changes on: head/sys/amd64/vmm/io/vrtc.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+FreeBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Index: head/sys/amd64/vmm/io/vrtc.h
===================================================================
--- head/sys/amd64/vmm/io/vrtc.h	(nonexistent)
+++ head/sys/amd64/vmm/io/vrtc.h	(revision 276428)
@@ -0,0 +1,50 @@
+/*-
+ * Copyright (c) 2014 Neel Natu (neel@freebsd.org)
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice unmodified, this list of conditions, and the following
+ *    disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _VRTC_H_
+#define	_VRTC_H_
+
+#include <isa/isareg.h>
+
+struct vrtc;
+
+struct vrtc *vrtc_init(struct vm *vm);
+void vrtc_cleanup(struct vrtc *vrtc);
+void vrtc_reset(struct vrtc *vrtc);
+
+time_t vrtc_get_time(struct vm *vm);
+int vrtc_set_time(struct vm *vm, time_t secs);
+int vrtc_nvram_write(struct vm *vm, int offset, uint8_t value);
+int vrtc_nvram_read(struct vm *vm, int offset, uint8_t *retval);
+
+int vrtc_addr_handler(struct vm *vm, int vcpuid, bool in, int port, int bytes,
+    uint32_t *val);
+int vrtc_data_handler(struct vm *vm, int vcpuid, bool in, int port, int bytes,
+    uint32_t *val);
+
+#endif

Property changes on: head/sys/amd64/vmm/io/vrtc.h
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+FreeBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Index: head/sys/amd64/vmm/vmm.c
===================================================================
--- head/sys/amd64/vmm/vmm.c	(revision 276427)
+++ head/sys/amd64/vmm/vmm.c	(revision 276428)
@@ -1,2362 +1,2377 @@
 /*-
  * Copyright (c) 2011 NetApp, Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/module.h>
 #include <sys/sysctl.h>
 #include <sys/malloc.h>
 #include <sys/pcpu.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/rwlock.h>
 #include <sys/sched.h>
 #include <sys/smp.h>
 #include <sys/systm.h>
 
 #include <vm/vm.h>
 #include <vm/vm_object.h>
 #include <vm/vm_page.h>
 #include <vm/pmap.h>
 #include <vm/vm_map.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_param.h>
 
 #include <machine/cpu.h>
 #include <machine/vm.h>
 #include <machine/pcb.h>
 #include <machine/smp.h>
 #include <x86/psl.h>
 #include <x86/apicreg.h>
 #include <machine/vmparam.h>
 
 #include <machine/vmm.h>
 #include <machine/vmm_dev.h>
 #include <machine/vmm_instruction_emul.h>
 
 #include "vmm_ioport.h"
 #include "vmm_ktr.h"
 #include "vmm_host.h"
 #include "vmm_mem.h"
 #include "vmm_util.h"
 #include "vatpic.h"
 #include "vatpit.h"
 #include "vhpet.h"
 #include "vioapic.h"
 #include "vlapic.h"
 #include "vpmtmr.h"
+#include "vrtc.h"
 #include "vmm_ipi.h"
 #include "vmm_stat.h"
 #include "vmm_lapic.h"
 
 #include "io/ppt.h"
 #include "io/iommu.h"
 
 struct vlapic;
 
 /*
  * Initialization:
  * (a) allocated when vcpu is created
  * (i) initialized when vcpu is created and when it is reinitialized
  * (o) initialized the first time the vcpu is created
  * (x) initialized before use
  */
 struct vcpu {
 	struct mtx 	mtx;		/* (o) protects 'state' and 'hostcpu' */
 	enum vcpu_state	state;		/* (o) vcpu state */
 	int		hostcpu;	/* (o) vcpu's host cpu */
 	struct vlapic	*vlapic;	/* (i) APIC device model */
 	enum x2apic_state x2apic_state;	/* (i) APIC mode */
 	uint64_t	exitintinfo;	/* (i) events pending at VM exit */
 	int		nmi_pending;	/* (i) NMI pending */
 	int		extint_pending;	/* (i) INTR pending */
 	struct vm_exception exception;	/* (x) exception collateral */
 	int	exception_pending;	/* (i) exception pending */
 	struct savefpu	*guestfpu;	/* (a,i) guest fpu state */
 	uint64_t	guest_xcr0;	/* (i) guest %xcr0 register */
 	void		*stats;		/* (a,i) statistics */
 	struct vm_exit	exitinfo;	/* (x) exit reason and collateral */
 };
 
 #define	vcpu_lock_initialized(v) mtx_initialized(&((v)->mtx))
 #define	vcpu_lock_init(v)	mtx_init(&((v)->mtx), "vcpu lock", 0, MTX_SPIN)
 #define	vcpu_lock(v)		mtx_lock_spin(&((v)->mtx))
 #define	vcpu_unlock(v)		mtx_unlock_spin(&((v)->mtx))
 #define	vcpu_assert_locked(v)	mtx_assert(&((v)->mtx), MA_OWNED)
 
 struct mem_seg {
 	vm_paddr_t	gpa;
 	size_t		len;
 	boolean_t	wired;
 	vm_object_t	object;
 };
 #define	VM_MAX_MEMORY_SEGMENTS	2
 
 /*
  * Initialization:
  * (o) initialized the first time the VM is created
  * (i) initialized when VM is created and when it is reinitialized
  * (x) initialized before use
  */
 struct vm {
 	void		*cookie;		/* (i) cpu-specific data */
 	void		*iommu;			/* (x) iommu-specific data */
 	struct vhpet	*vhpet;			/* (i) virtual HPET */
 	struct vioapic	*vioapic;		/* (i) virtual ioapic */
 	struct vatpic	*vatpic;		/* (i) virtual atpic */
 	struct vatpit	*vatpit;		/* (i) virtual atpit */
 	struct vpmtmr	*vpmtmr;		/* (i) virtual ACPI PM timer */
+	struct vrtc	*vrtc;			/* (o) virtual RTC */
 	volatile cpuset_t active_cpus;		/* (i) active vcpus */
 	int		suspend;		/* (i) stop VM execution */
 	volatile cpuset_t suspended_cpus; 	/* (i) suspended vcpus */
 	volatile cpuset_t halted_cpus;		/* (x) cpus in a hard halt */
 	cpuset_t	rendezvous_req_cpus;	/* (x) rendezvous requested */
 	cpuset_t	rendezvous_done_cpus;	/* (x) rendezvous finished */
 	void		*rendezvous_arg;	/* (x) rendezvous func/arg */
 	vm_rendezvous_func_t rendezvous_func;
 	struct mtx	rendezvous_mtx;		/* (o) rendezvous lock */
 	int		num_mem_segs;		/* (o) guest memory segments */
 	struct mem_seg	mem_segs[VM_MAX_MEMORY_SEGMENTS];
 	struct vmspace	*vmspace;		/* (o) guest's address space */
 	char		name[VM_MAX_NAMELEN];	/* (o) virtual machine name */
 	struct vcpu	vcpu[VM_MAXCPU];	/* (i) guest vcpus */
 };
 
 static int vmm_initialized;
 
 static struct vmm_ops *ops;
 #define	VMM_INIT(num)	(ops != NULL ? (*ops->init)(num) : 0)
 #define	VMM_CLEANUP()	(ops != NULL ? (*ops->cleanup)() : 0)
 #define	VMM_RESUME()	(ops != NULL ? (*ops->resume)() : 0)
 
 #define	VMINIT(vm, pmap) (ops != NULL ? (*ops->vminit)(vm, pmap): NULL)
 #define	VMRUN(vmi, vcpu, rip, pmap, rptr, sptr) \
 	(ops != NULL ? (*ops->vmrun)(vmi, vcpu, rip, pmap, rptr, sptr) : ENXIO)
 #define	VMCLEANUP(vmi)	(ops != NULL ? (*ops->vmcleanup)(vmi) : NULL)
 #define	VMSPACE_ALLOC(min, max) \
 	(ops != NULL ? (*ops->vmspace_alloc)(min, max) : NULL)
 #define	VMSPACE_FREE(vmspace) \
 	(ops != NULL ? (*ops->vmspace_free)(vmspace) : ENXIO)
 #define	VMGETREG(vmi, vcpu, num, retval)		\
 	(ops != NULL ? (*ops->vmgetreg)(vmi, vcpu, num, retval) : ENXIO)
 #define	VMSETREG(vmi, vcpu, num, val)		\
 	(ops != NULL ? (*ops->vmsetreg)(vmi, vcpu, num, val) : ENXIO)
 #define	VMGETDESC(vmi, vcpu, num, desc)		\
 	(ops != NULL ? (*ops->vmgetdesc)(vmi, vcpu, num, desc) : ENXIO)
 #define	VMSETDESC(vmi, vcpu, num, desc)		\
 	(ops != NULL ? (*ops->vmsetdesc)(vmi, vcpu, num, desc) : ENXIO)
 #define	VMGETCAP(vmi, vcpu, num, retval)	\
 	(ops != NULL ? (*ops->vmgetcap)(vmi, vcpu, num, retval) : ENXIO)
 #define	VMSETCAP(vmi, vcpu, num, val)		\
 	(ops != NULL ? (*ops->vmsetcap)(vmi, vcpu, num, val) : ENXIO)
 #define	VLAPIC_INIT(vmi, vcpu)			\
 	(ops != NULL ? (*ops->vlapic_init)(vmi, vcpu) : NULL)
 #define	VLAPIC_CLEANUP(vmi, vlapic)		\
 	(ops != NULL ? (*ops->vlapic_cleanup)(vmi, vlapic) : NULL)
 
 #define	fpu_start_emulating()	load_cr0(rcr0() | CR0_TS)
 #define	fpu_stop_emulating()	clts()
 
 static MALLOC_DEFINE(M_VM, "vm", "vm");
 
 /* statistics */
 static VMM_STAT(VCPU_TOTAL_RUNTIME, "vcpu total runtime");
 
 SYSCTL_NODE(_hw, OID_AUTO, vmm, CTLFLAG_RW, NULL, NULL);
 
 /*
  * Halt the guest if all vcpus are executing a HLT instruction with
  * interrupts disabled.
  */
 static int halt_detection_enabled = 1;
 SYSCTL_INT(_hw_vmm, OID_AUTO, halt_detection, CTLFLAG_RDTUN,
     &halt_detection_enabled, 0,
     "Halt VM if all vcpus execute HLT with interrupts disabled");
 
 static int vmm_ipinum;
 SYSCTL_INT(_hw_vmm, OID_AUTO, ipinum, CTLFLAG_RD, &vmm_ipinum, 0,
     "IPI vector used for vcpu notifications");
 
 static int trace_guest_exceptions;
 SYSCTL_INT(_hw_vmm, OID_AUTO, trace_guest_exceptions, CTLFLAG_RDTUN,
     &trace_guest_exceptions, 0,
     "Trap into hypervisor on all guest exceptions and reflect them back");
 
 static void
 vcpu_cleanup(struct vm *vm, int i, bool destroy)
 {
 	struct vcpu *vcpu = &vm->vcpu[i];
 
 	VLAPIC_CLEANUP(vm->cookie, vcpu->vlapic);
 	if (destroy) {
 		vmm_stat_free(vcpu->stats);	
 		fpu_save_area_free(vcpu->guestfpu);
 	}
 }
 
 static void
 vcpu_init(struct vm *vm, int vcpu_id, bool create)
 {
 	struct vcpu *vcpu;
 
 	KASSERT(vcpu_id >= 0 && vcpu_id < VM_MAXCPU,
 	    ("vcpu_init: invalid vcpu %d", vcpu_id));
 	  
 	vcpu = &vm->vcpu[vcpu_id];
 
 	if (create) {
 		KASSERT(!vcpu_lock_initialized(vcpu), ("vcpu %d already "
 		    "initialized", vcpu_id));
 		vcpu_lock_init(vcpu);
 		vcpu->state = VCPU_IDLE;
 		vcpu->hostcpu = NOCPU;
 		vcpu->guestfpu = fpu_save_area_alloc();
 		vcpu->stats = vmm_stat_alloc();
 	}
 
 	vcpu->vlapic = VLAPIC_INIT(vm->cookie, vcpu_id);
 	vm_set_x2apic_state(vm, vcpu_id, X2APIC_DISABLED);
 	vcpu->exitintinfo = 0;
 	vcpu->nmi_pending = 0;
 	vcpu->extint_pending = 0;
 	vcpu->exception_pending = 0;
 	vcpu->guest_xcr0 = XFEATURE_ENABLED_X87;
 	fpu_save_area_reset(vcpu->guestfpu);
 	vmm_stat_init(vcpu->stats);
 }
 
 int
 vcpu_trace_exceptions(struct vm *vm, int vcpuid)
 {
 
 	return (trace_guest_exceptions);
 }
 
 struct vm_exit *
 vm_exitinfo(struct vm *vm, int cpuid)
 {
 	struct vcpu *vcpu;
 
 	if (cpuid < 0 || cpuid >= VM_MAXCPU)
 		panic("vm_exitinfo: invalid cpuid %d", cpuid);
 
 	vcpu = &vm->vcpu[cpuid];
 
 	return (&vcpu->exitinfo);
 }
 
 static void
 vmm_resume(void)
 {
 	VMM_RESUME();
 }
 
 static int
 vmm_init(void)
 {
 	int error;
 
 	vmm_host_state_init();
 
 	vmm_ipinum = vmm_ipi_alloc();
 	if (vmm_ipinum == 0)
 		vmm_ipinum = IPI_AST;
 
 	error = vmm_mem_init();
 	if (error)
 		return (error);
 	
 	if (vmm_is_intel())
 		ops = &vmm_ops_intel;
 	else if (vmm_is_amd())
 		ops = &vmm_ops_amd;
 	else
 		return (ENXIO);
 
 	vmm_resume_p = vmm_resume;
 
 	return (VMM_INIT(vmm_ipinum));
 }
 
 static int
 vmm_handler(module_t mod, int what, void *arg)
 {
 	int error;
 
 	switch (what) {
 	case MOD_LOAD:
 		vmmdev_init();
 		if (ppt_avail_devices() > 0)
 			iommu_init();
 		error = vmm_init();
 		if (error == 0)
 			vmm_initialized = 1;
 		break;
 	case MOD_UNLOAD:
 		error = vmmdev_cleanup();
 		if (error == 0) {
 			vmm_resume_p = NULL;
 			iommu_cleanup();
 			if (vmm_ipinum != IPI_AST)
 				vmm_ipi_free(vmm_ipinum);
 			error = VMM_CLEANUP();
 			/*
 			 * Something bad happened - prevent new
 			 * VMs from being created
 			 */
 			if (error)
 				vmm_initialized = 0;
 		}
 		break;
 	default:
 		error = 0;
 		break;
 	}
 	return (error);
 }
 
 static moduledata_t vmm_kmod = {
 	"vmm",
 	vmm_handler,
 	NULL
 };
 
 /*
  * vmm initialization has the following dependencies:
  *
  * - iommu initialization must happen after the pci passthru driver has had
  *   a chance to attach to any passthru devices (after SI_SUB_CONFIGURE).
  *
  * - VT-x initialization requires smp_rendezvous() and therefore must happen
  *   after SMP is fully functional (after SI_SUB_SMP).
  */
 DECLARE_MODULE(vmm, vmm_kmod, SI_SUB_SMP + 1, SI_ORDER_ANY);
 MODULE_VERSION(vmm, 1);
 
 static void
 vm_init(struct vm *vm, bool create)
 {
 	int i;
 
 	vm->cookie = VMINIT(vm, vmspace_pmap(vm->vmspace));
 	vm->iommu = NULL;
 	vm->vioapic = vioapic_init(vm);
 	vm->vhpet = vhpet_init(vm);
 	vm->vatpic = vatpic_init(vm);
 	vm->vatpit = vatpit_init(vm);
 	vm->vpmtmr = vpmtmr_init(vm);
+	if (create)
+		vm->vrtc = vrtc_init(vm);
 
 	CPU_ZERO(&vm->active_cpus);
 
 	vm->suspend = 0;
 	CPU_ZERO(&vm->suspended_cpus);
 
 	for (i = 0; i < VM_MAXCPU; i++)
 		vcpu_init(vm, i, create);
 }
 
 int
 vm_create(const char *name, struct vm **retvm)
 {
 	struct vm *vm;
 	struct vmspace *vmspace;
 
 	/*
 	 * If vmm.ko could not be successfully initialized then don't attempt
 	 * to create the virtual machine.
 	 */
 	if (!vmm_initialized)
 		return (ENXIO);
 
 	if (name == NULL || strlen(name) >= VM_MAX_NAMELEN)
 		return (EINVAL);
 
 	vmspace = VMSPACE_ALLOC(0, VM_MAXUSER_ADDRESS);
 	if (vmspace == NULL)
 		return (ENOMEM);
 
 	vm = malloc(sizeof(struct vm), M_VM, M_WAITOK | M_ZERO);
 	strcpy(vm->name, name);
 	vm->num_mem_segs = 0;
 	vm->vmspace = vmspace;
 	mtx_init(&vm->rendezvous_mtx, "vm rendezvous lock", 0, MTX_DEF);
 
 	vm_init(vm, true);
 
 	*retvm = vm;
 	return (0);
 }
 
 static void
 vm_free_mem_seg(struct vm *vm, struct mem_seg *seg)
 {
 
 	if (seg->object != NULL)
 		vmm_mem_free(vm->vmspace, seg->gpa, seg->len);
 
 	bzero(seg, sizeof(*seg));
 }
 
 static void
 vm_cleanup(struct vm *vm, bool destroy)
 {
 	int i;
 
 	ppt_unassign_all(vm);
 
 	if (vm->iommu != NULL)
 		iommu_destroy_domain(vm->iommu);
 
+	if (destroy)
+		vrtc_cleanup(vm->vrtc);
+	else
+		vrtc_reset(vm->vrtc);
 	vpmtmr_cleanup(vm->vpmtmr);
 	vatpit_cleanup(vm->vatpit);
 	vhpet_cleanup(vm->vhpet);
 	vatpic_cleanup(vm->vatpic);
 	vioapic_cleanup(vm->vioapic);
 
 	for (i = 0; i < VM_MAXCPU; i++)
 		vcpu_cleanup(vm, i, destroy);
 
 	VMCLEANUP(vm->cookie);
 
 	if (destroy) {
 		for (i = 0; i < vm->num_mem_segs; i++)
 			vm_free_mem_seg(vm, &vm->mem_segs[i]);
 
 		vm->num_mem_segs = 0;
 
 		VMSPACE_FREE(vm->vmspace);
 		vm->vmspace = NULL;
 	}
 }
 
 void
 vm_destroy(struct vm *vm)
 {
 	vm_cleanup(vm, true);
 	free(vm, M_VM);
 }
 
 int
 vm_reinit(struct vm *vm)
 {
 	int error;
 
 	/*
 	 * A virtual machine can be reset only if all vcpus are suspended.
 	 */
 	if (CPU_CMP(&vm->suspended_cpus, &vm->active_cpus) == 0) {
 		vm_cleanup(vm, false);
 		vm_init(vm, false);
 		error = 0;
 	} else {
 		error = EBUSY;
 	}
 
 	return (error);
 }
 
 const char *
 vm_name(struct vm *vm)
 {
 	return (vm->name);
 }
 
 int
 vm_map_mmio(struct vm *vm, vm_paddr_t gpa, size_t len, vm_paddr_t hpa)
 {
 	vm_object_t obj;
 
 	if ((obj = vmm_mmio_alloc(vm->vmspace, gpa, len, hpa)) == NULL)
 		return (ENOMEM);
 	else
 		return (0);
 }
 
 int
 vm_unmap_mmio(struct vm *vm, vm_paddr_t gpa, size_t len)
 {
 
 	vmm_mmio_free(vm->vmspace, gpa, len);
 	return (0);
 }
 
 boolean_t
 vm_mem_allocated(struct vm *vm, vm_paddr_t gpa)
 {
 	int i;
 	vm_paddr_t gpabase, gpalimit;
 
 	for (i = 0; i < vm->num_mem_segs; i++) {
 		gpabase = vm->mem_segs[i].gpa;
 		gpalimit = gpabase + vm->mem_segs[i].len;
 		if (gpa >= gpabase && gpa < gpalimit)
 			return (TRUE);		/* 'gpa' is regular memory */
 	}
 
 	if (ppt_is_mmio(vm, gpa))
 		return (TRUE);			/* 'gpa' is pci passthru mmio */
 
 	return (FALSE);
 }
 
 int
 vm_malloc(struct vm *vm, vm_paddr_t gpa, size_t len)
 {
 	int available, allocated;
 	struct mem_seg *seg;
 	vm_object_t object;
 	vm_paddr_t g;
 
 	if ((gpa & PAGE_MASK) || (len & PAGE_MASK) || len == 0)
 		return (EINVAL);
 	
 	available = allocated = 0;
 	g = gpa;
 	while (g < gpa + len) {
 		if (vm_mem_allocated(vm, g))
 			allocated++;
 		else
 			available++;
 
 		g += PAGE_SIZE;
 	}
 
 	/*
 	 * If there are some allocated and some available pages in the address
 	 * range then it is an error.
 	 */
 	if (allocated && available)
 		return (EINVAL);
 
 	/*
 	 * If the entire address range being requested has already been
 	 * allocated then there isn't anything more to do.
 	 */
 	if (allocated && available == 0)
 		return (0);
 
 	if (vm->num_mem_segs >= VM_MAX_MEMORY_SEGMENTS)
 		return (E2BIG);
 
 	seg = &vm->mem_segs[vm->num_mem_segs];
 
 	if ((object = vmm_mem_alloc(vm->vmspace, gpa, len)) == NULL)
 		return (ENOMEM);
 
 	seg->gpa = gpa;
 	seg->len = len;
 	seg->object = object;
 	seg->wired = FALSE;
 
 	vm->num_mem_segs++;
 
 	return (0);
 }
 
 static vm_paddr_t
 vm_maxmem(struct vm *vm)
 {
 	int i;
 	vm_paddr_t gpa, maxmem;
 
 	maxmem = 0;
 	for (i = 0; i < vm->num_mem_segs; i++) {
 		gpa = vm->mem_segs[i].gpa + vm->mem_segs[i].len;
 		if (gpa > maxmem)
 			maxmem = gpa;
 	}
 	return (maxmem);
 }
 
 static void
 vm_gpa_unwire(struct vm *vm)
 {
 	int i, rv;
 	struct mem_seg *seg;
 
 	for (i = 0; i < vm->num_mem_segs; i++) {
 		seg = &vm->mem_segs[i];
 		if (!seg->wired)
 			continue;
 
 		rv = vm_map_unwire(&vm->vmspace->vm_map,
 				   seg->gpa, seg->gpa + seg->len,
 				   VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES);
 		KASSERT(rv == KERN_SUCCESS, ("vm(%s) memory segment "
 		    "%#lx/%ld could not be unwired: %d",
 		    vm_name(vm), seg->gpa, seg->len, rv));
 
 		seg->wired = FALSE;
 	}
 }
 
 static int
 vm_gpa_wire(struct vm *vm)
 {
 	int i, rv;
 	struct mem_seg *seg;
 
 	for (i = 0; i < vm->num_mem_segs; i++) {
 		seg = &vm->mem_segs[i];
 		if (seg->wired)
 			continue;
 
 		/* XXX rlimits? */
 		rv = vm_map_wire(&vm->vmspace->vm_map,
 				 seg->gpa, seg->gpa + seg->len,
 				 VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES);
 		if (rv != KERN_SUCCESS)
 			break;
 
 		seg->wired = TRUE;
 	}
 
 	if (i < vm->num_mem_segs) {
 		/*
 		 * Undo the wiring before returning an error.
 		 */
 		vm_gpa_unwire(vm);
 		return (EAGAIN);
 	}
 
 	return (0);
 }
 
 static void
 vm_iommu_modify(struct vm *vm, boolean_t map)
 {
 	int i, sz;
 	vm_paddr_t gpa, hpa;
 	struct mem_seg *seg;
 	void *vp, *cookie, *host_domain;
 
 	sz = PAGE_SIZE;
 	host_domain = iommu_host_domain();
 
 	for (i = 0; i < vm->num_mem_segs; i++) {
 		seg = &vm->mem_segs[i];
 		KASSERT(seg->wired, ("vm(%s) memory segment %#lx/%ld not wired",
 		    vm_name(vm), seg->gpa, seg->len));
 
 		gpa = seg->gpa;
 		while (gpa < seg->gpa + seg->len) {
 			vp = vm_gpa_hold(vm, gpa, PAGE_SIZE, VM_PROT_WRITE,
 					 &cookie);
 			KASSERT(vp != NULL, ("vm(%s) could not map gpa %#lx",
 			    vm_name(vm), gpa));
 
 			vm_gpa_release(cookie);
 
 			hpa = DMAP_TO_PHYS((uintptr_t)vp);
 			if (map) {
 				iommu_create_mapping(vm->iommu, gpa, hpa, sz);
 				iommu_remove_mapping(host_domain, hpa, sz);
 			} else {
 				iommu_remove_mapping(vm->iommu, gpa, sz);
 				iommu_create_mapping(host_domain, hpa, hpa, sz);
 			}
 
 			gpa += PAGE_SIZE;
 		}
 	}
 
 	/*
 	 * Invalidate the cached translations associated with the domain
 	 * from which pages were removed.
 	 */
 	if (map)
 		iommu_invalidate_tlb(host_domain);
 	else
 		iommu_invalidate_tlb(vm->iommu);
 }
 
 #define	vm_iommu_unmap(vm)	vm_iommu_modify((vm), FALSE)
 #define	vm_iommu_map(vm)	vm_iommu_modify((vm), TRUE)
 
 int
 vm_unassign_pptdev(struct vm *vm, int bus, int slot, int func)
 {
 	int error;
 
 	error = ppt_unassign_device(vm, bus, slot, func);
 	if (error)
 		return (error);
 
 	if (ppt_assigned_devices(vm) == 0) {
 		vm_iommu_unmap(vm);
 		vm_gpa_unwire(vm);
 	}
 	return (0);
 }
 
 int
 vm_assign_pptdev(struct vm *vm, int bus, int slot, int func)
 {
 	int error;
 	vm_paddr_t maxaddr;
 
 	/*
 	 * Virtual machines with pci passthru devices get special treatment:
 	 * - the guest physical memory is wired
 	 * - the iommu is programmed to do the 'gpa' to 'hpa' translation
 	 *
 	 * We need to do this before the first pci passthru device is attached.
 	 */
 	if (ppt_assigned_devices(vm) == 0) {
 		KASSERT(vm->iommu == NULL,
 		    ("vm_assign_pptdev: iommu must be NULL"));
 		maxaddr = vm_maxmem(vm);
 		vm->iommu = iommu_create_domain(maxaddr);
 
 		error = vm_gpa_wire(vm);
 		if (error)
 			return (error);
 
 		vm_iommu_map(vm);
 	}
 
 	error = ppt_assign_device(vm, bus, slot, func);
 	return (error);
 }
 
 void *
 vm_gpa_hold(struct vm *vm, vm_paddr_t gpa, size_t len, int reqprot,
 	    void **cookie)
 {
 	int count, pageoff;
 	vm_page_t m;
 
 	pageoff = gpa & PAGE_MASK;
 	if (len > PAGE_SIZE - pageoff)
 		panic("vm_gpa_hold: invalid gpa/len: 0x%016lx/%lu", gpa, len);
 
 	count = vm_fault_quick_hold_pages(&vm->vmspace->vm_map,
 	    trunc_page(gpa), PAGE_SIZE, reqprot, &m, 1);
 
 	if (count == 1) {
 		*cookie = m;
 		return ((void *)(PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)) + pageoff));
 	} else {
 		*cookie = NULL;
 		return (NULL);
 	}
 }
 
 void
 vm_gpa_release(void *cookie)
 {
 	vm_page_t m = cookie;
 
 	vm_page_lock(m);
 	vm_page_unhold(m);
 	vm_page_unlock(m);
 }
 
 int
 vm_gpabase2memseg(struct vm *vm, vm_paddr_t gpabase,
 		  struct vm_memory_segment *seg)
 {
 	int i;
 
 	for (i = 0; i < vm->num_mem_segs; i++) {
 		if (gpabase == vm->mem_segs[i].gpa) {
 			seg->gpa = vm->mem_segs[i].gpa;
 			seg->len = vm->mem_segs[i].len;
 			seg->wired = vm->mem_segs[i].wired;
 			return (0);
 		}
 	}
 	return (-1);
 }
 
 int
 vm_get_memobj(struct vm *vm, vm_paddr_t gpa, size_t len,
 	      vm_offset_t *offset, struct vm_object **object)
 {
 	int i;
 	size_t seg_len;
 	vm_paddr_t seg_gpa;
 	vm_object_t seg_obj;
 
 	for (i = 0; i < vm->num_mem_segs; i++) {
 		if ((seg_obj = vm->mem_segs[i].object) == NULL)
 			continue;
 
 		seg_gpa = vm->mem_segs[i].gpa;
 		seg_len = vm->mem_segs[i].len;
 
 		if (gpa >= seg_gpa && gpa < seg_gpa + seg_len) {
 			*offset = gpa - seg_gpa;
 			*object = seg_obj;
 			vm_object_reference(seg_obj);
 			return (0);
 		}
 	}
 
 	return (EINVAL);
 }
 
 int
 vm_get_register(struct vm *vm, int vcpu, int reg, uint64_t *retval)
 {
 
 	if (vcpu < 0 || vcpu >= VM_MAXCPU)
 		return (EINVAL);
 
 	if (reg >= VM_REG_LAST)
 		return (EINVAL);
 
 	return (VMGETREG(vm->cookie, vcpu, reg, retval));
 }
 
 int
 vm_set_register(struct vm *vm, int vcpu, int reg, uint64_t val)
 {
 
 	if (vcpu < 0 || vcpu >= VM_MAXCPU)
 		return (EINVAL);
 
 	if (reg >= VM_REG_LAST)
 		return (EINVAL);
 
 	return (VMSETREG(vm->cookie, vcpu, reg, val));
 }
 
 static boolean_t
 is_descriptor_table(int reg)
 {
 
 	switch (reg) {
 	case VM_REG_GUEST_IDTR:
 	case VM_REG_GUEST_GDTR:
 		return (TRUE);
 	default:
 		return (FALSE);
 	}
 }
 
 static boolean_t
 is_segment_register(int reg)
 {
 	
 	switch (reg) {
 	case VM_REG_GUEST_ES:
 	case VM_REG_GUEST_CS:
 	case VM_REG_GUEST_SS:
 	case VM_REG_GUEST_DS:
 	case VM_REG_GUEST_FS:
 	case VM_REG_GUEST_GS:
 	case VM_REG_GUEST_TR:
 	case VM_REG_GUEST_LDTR:
 		return (TRUE);
 	default:
 		return (FALSE);
 	}
 }
 
 int
 vm_get_seg_desc(struct vm *vm, int vcpu, int reg,
 		struct seg_desc *desc)
 {
 
 	if (vcpu < 0 || vcpu >= VM_MAXCPU)
 		return (EINVAL);
 
 	if (!is_segment_register(reg) && !is_descriptor_table(reg))
 		return (EINVAL);
 
 	return (VMGETDESC(vm->cookie, vcpu, reg, desc));
 }
 
 int
 vm_set_seg_desc(struct vm *vm, int vcpu, int reg,
 		struct seg_desc *desc)
 {
 	if (vcpu < 0 || vcpu >= VM_MAXCPU)
 		return (EINVAL);
 
 	if (!is_segment_register(reg) && !is_descriptor_table(reg))
 		return (EINVAL);
 
 	return (VMSETDESC(vm->cookie, vcpu, reg, desc));
 }
 
 static void
 restore_guest_fpustate(struct vcpu *vcpu)
 {
 
 	/* flush host state to the pcb */
 	fpuexit(curthread);
 
 	/* restore guest FPU state */
 	fpu_stop_emulating();
 	fpurestore(vcpu->guestfpu);
 
 	/* restore guest XCR0 if XSAVE is enabled in the host */
 	if (rcr4() & CR4_XSAVE)
 		load_xcr(0, vcpu->guest_xcr0);
 
 	/*
 	 * The FPU is now "dirty" with the guest's state so turn on emulation
 	 * to trap any access to the FPU by the host.
 	 */
 	fpu_start_emulating();
 }
 
 static void
 save_guest_fpustate(struct vcpu *vcpu)
 {
 
 	if ((rcr0() & CR0_TS) == 0)
 		panic("fpu emulation not enabled in host!");
 
 	/* save guest XCR0 and restore host XCR0 */
 	if (rcr4() & CR4_XSAVE) {
 		vcpu->guest_xcr0 = rxcr(0);
 		load_xcr(0, vmm_get_host_xcr0());
 	}
 
 	/* save guest FPU state */
 	fpu_stop_emulating();
 	fpusave(vcpu->guestfpu);
 	fpu_start_emulating();
 }
 
 static VMM_STAT(VCPU_IDLE_TICKS, "number of ticks vcpu was idle");
 
 static int
 vcpu_set_state_locked(struct vcpu *vcpu, enum vcpu_state newstate,
     bool from_idle)
 {
 	int error;
 
 	vcpu_assert_locked(vcpu);
 
 	/*
 	 * State transitions from the vmmdev_ioctl() must always begin from
 	 * the VCPU_IDLE state. This guarantees that there is only a single
 	 * ioctl() operating on a vcpu at any point.
 	 */
 	if (from_idle) {
 		while (vcpu->state != VCPU_IDLE)
 			msleep_spin(&vcpu->state, &vcpu->mtx, "vmstat", hz);
 	} else {
 		KASSERT(vcpu->state != VCPU_IDLE, ("invalid transition from "
 		    "vcpu idle state"));
 	}
 
 	if (vcpu->state == VCPU_RUNNING) {
 		KASSERT(vcpu->hostcpu == curcpu, ("curcpu %d and hostcpu %d "
 		    "mismatch for running vcpu", curcpu, vcpu->hostcpu));
 	} else {
 		KASSERT(vcpu->hostcpu == NOCPU, ("Invalid hostcpu %d for a "
 		    "vcpu that is not running", vcpu->hostcpu));
 	}
 
 	/*
 	 * The following state transitions are allowed:
 	 * IDLE -> FROZEN -> IDLE
 	 * FROZEN -> RUNNING -> FROZEN
 	 * FROZEN -> SLEEPING -> FROZEN
 	 */
 	switch (vcpu->state) {
 	case VCPU_IDLE:
 	case VCPU_RUNNING:
 	case VCPU_SLEEPING:
 		error = (newstate != VCPU_FROZEN);
 		break;
 	case VCPU_FROZEN:
 		error = (newstate == VCPU_FROZEN);
 		break;
 	default:
 		error = 1;
 		break;
 	}
 
 	if (error)
 		return (EBUSY);
 
 	vcpu->state = newstate;
 	if (newstate == VCPU_RUNNING)
 		vcpu->hostcpu = curcpu;
 	else
 		vcpu->hostcpu = NOCPU;
 
 	if (newstate == VCPU_IDLE)
 		wakeup(&vcpu->state);
 
 	return (0);
 }
 
 static void
 vcpu_require_state(struct vm *vm, int vcpuid, enum vcpu_state newstate)
 {
 	int error;
 
 	if ((error = vcpu_set_state(vm, vcpuid, newstate, false)) != 0)
 		panic("Error %d setting state to %d\n", error, newstate);
 }
 
 static void
 vcpu_require_state_locked(struct vcpu *vcpu, enum vcpu_state newstate)
 {
 	int error;
 
 	if ((error = vcpu_set_state_locked(vcpu, newstate, false)) != 0)
 		panic("Error %d setting state to %d", error, newstate);
 }
 
 static void
 vm_set_rendezvous_func(struct vm *vm, vm_rendezvous_func_t func)
 {
 
 	KASSERT(mtx_owned(&vm->rendezvous_mtx), ("rendezvous_mtx not locked"));
 
 	/*
 	 * Update 'rendezvous_func' and execute a write memory barrier to
 	 * ensure that it is visible across all host cpus. This is not needed
 	 * for correctness but it does ensure that all the vcpus will notice
 	 * that the rendezvous is requested immediately.
 	 */
 	vm->rendezvous_func = func;
 	wmb();
 }
 
 #define	RENDEZVOUS_CTR0(vm, vcpuid, fmt)				\
 	do {								\
 		if (vcpuid >= 0)					\
 			VCPU_CTR0(vm, vcpuid, fmt);			\
 		else							\
 			VM_CTR0(vm, fmt);				\
 	} while (0)
 
 static void
 vm_handle_rendezvous(struct vm *vm, int vcpuid)
 {
 
 	KASSERT(vcpuid == -1 || (vcpuid >= 0 && vcpuid < VM_MAXCPU),
 	    ("vm_handle_rendezvous: invalid vcpuid %d", vcpuid));
 
 	mtx_lock(&vm->rendezvous_mtx);
 	while (vm->rendezvous_func != NULL) {
 		/* 'rendezvous_req_cpus' must be a subset of 'active_cpus' */
 		CPU_AND(&vm->rendezvous_req_cpus, &vm->active_cpus);
 
 		if (vcpuid != -1 &&
 		    CPU_ISSET(vcpuid, &vm->rendezvous_req_cpus) &&
 		    !CPU_ISSET(vcpuid, &vm->rendezvous_done_cpus)) {
 			VCPU_CTR0(vm, vcpuid, "Calling rendezvous func");
 			(*vm->rendezvous_func)(vm, vcpuid, vm->rendezvous_arg);
 			CPU_SET(vcpuid, &vm->rendezvous_done_cpus);
 		}
 		if (CPU_CMP(&vm->rendezvous_req_cpus,
 		    &vm->rendezvous_done_cpus) == 0) {
 			VCPU_CTR0(vm, vcpuid, "Rendezvous completed");
 			vm_set_rendezvous_func(vm, NULL);
 			wakeup(&vm->rendezvous_func);
 			break;
 		}
 		RENDEZVOUS_CTR0(vm, vcpuid, "Wait for rendezvous completion");
 		mtx_sleep(&vm->rendezvous_func, &vm->rendezvous_mtx, 0,
 		    "vmrndv", 0);
 	}
 	mtx_unlock(&vm->rendezvous_mtx);
 }
 
 /*
  * Emulate a guest 'hlt' by sleeping until the vcpu is ready to run.
  */
 static int
 vm_handle_hlt(struct vm *vm, int vcpuid, bool intr_disabled, bool *retu)
 {
 	struct vcpu *vcpu;
 	const char *wmesg;
 	int error, t, vcpu_halted, vm_halted;
 
 	KASSERT(!CPU_ISSET(vcpuid, &vm->halted_cpus), ("vcpu already halted"));
 
 	vcpu = &vm->vcpu[vcpuid];
 	vcpu_halted = 0;
 	vm_halted = 0;
 
 	/*
 	 * The typical way to halt a cpu is to execute: "sti; hlt"
 	 *
 	 * STI sets RFLAGS.IF to enable interrupts. However, the processor
 	 * remains in an "interrupt shadow" for an additional instruction
 	 * following the STI. This guarantees that "sti; hlt" sequence is
 	 * atomic and a pending interrupt will be recognized after the HLT.
 	 *
 	 * After the HLT emulation is done the vcpu is no longer in an
 	 * interrupt shadow and a pending interrupt can be injected on
 	 * the next entry into the guest.
 	 */
 	error = vm_set_register(vm, vcpuid, VM_REG_GUEST_INTR_SHADOW, 0);
 	KASSERT(error == 0, ("%s: error %d clearing interrupt shadow",
 	    __func__, error));
 
 	vcpu_lock(vcpu);
 	while (1) {
 		/*
 		 * Do a final check for pending NMI or interrupts before
 		 * really putting this thread to sleep. Also check for
 		 * software events that would cause this vcpu to wakeup.
 		 *
 		 * These interrupts/events could have happened after the
 		 * vcpu returned from VMRUN() and before it acquired the
 		 * vcpu lock above.
 		 */
 		if (vm->rendezvous_func != NULL || vm->suspend)
 			break;
 		if (vm_nmi_pending(vm, vcpuid))
 			break;
 		if (!intr_disabled) {
 			if (vm_extint_pending(vm, vcpuid) ||
 			    vlapic_pending_intr(vcpu->vlapic, NULL)) {
 				break;
 			}
 		}
 
 		/* Don't go to sleep if the vcpu thread needs to yield */
 		if (vcpu_should_yield(vm, vcpuid))
 			break;
 
 		/*
 		 * Some Linux guests implement "halt" by having all vcpus
 		 * execute HLT with interrupts disabled. 'halted_cpus' keeps
 		 * track of the vcpus that have entered this state. When all
 		 * vcpus enter the halted state the virtual machine is halted.
 		 */
 		if (intr_disabled) {
 			wmesg = "vmhalt";
 			VCPU_CTR0(vm, vcpuid, "Halted");
 			if (!vcpu_halted && halt_detection_enabled) {
 				vcpu_halted = 1;
 				CPU_SET_ATOMIC(vcpuid, &vm->halted_cpus);
 			}
 			if (CPU_CMP(&vm->halted_cpus, &vm->active_cpus) == 0) {
 				vm_halted = 1;
 				break;
 			}
 		} else {
 			wmesg = "vmidle";
 		}
 
 		t = ticks;
 		vcpu_require_state_locked(vcpu, VCPU_SLEEPING);
 		/*
 		 * XXX msleep_spin() cannot be interrupted by signals so
 		 * wake up periodically to check pending signals.
 		 */
 		msleep_spin(vcpu, &vcpu->mtx, wmesg, hz);
 		vcpu_require_state_locked(vcpu, VCPU_FROZEN);
 		vmm_stat_incr(vm, vcpuid, VCPU_IDLE_TICKS, ticks - t);
 	}
 
 	if (vcpu_halted)
 		CPU_CLR_ATOMIC(vcpuid, &vm->halted_cpus);
 
 	vcpu_unlock(vcpu);
 
 	if (vm_halted)
 		vm_suspend(vm, VM_SUSPEND_HALT);
 
 	return (0);
 }
 
 static int
 vm_handle_paging(struct vm *vm, int vcpuid, bool *retu)
 {
 	int rv, ftype;
 	struct vm_map *map;
 	struct vcpu *vcpu;
 	struct vm_exit *vme;
 
 	vcpu = &vm->vcpu[vcpuid];
 	vme = &vcpu->exitinfo;
 
 	ftype = vme->u.paging.fault_type;
 	KASSERT(ftype == VM_PROT_READ ||
 	    ftype == VM_PROT_WRITE || ftype == VM_PROT_EXECUTE,
 	    ("vm_handle_paging: invalid fault_type %d", ftype));
 
 	if (ftype == VM_PROT_READ || ftype == VM_PROT_WRITE) {
 		rv = pmap_emulate_accessed_dirty(vmspace_pmap(vm->vmspace),
 		    vme->u.paging.gpa, ftype);
 		if (rv == 0) {
 			VCPU_CTR2(vm, vcpuid, "%s bit emulation for gpa %#lx",
 			    ftype == VM_PROT_READ ? "accessed" : "dirty",
 			    vme->u.paging.gpa);
 			goto done;
 		}
 	}
 
 	map = &vm->vmspace->vm_map;
 	rv = vm_fault(map, vme->u.paging.gpa, ftype, VM_FAULT_NORMAL);
 
 	VCPU_CTR3(vm, vcpuid, "vm_handle_paging rv = %d, gpa = %#lx, "
 	    "ftype = %d", rv, vme->u.paging.gpa, ftype);
 
 	if (rv != KERN_SUCCESS)
 		return (EFAULT);
 done:
 	/* restart execution at the faulting instruction */
 	vme->inst_length = 0;
 
 	return (0);
 }
 
 static int
 vm_handle_inst_emul(struct vm *vm, int vcpuid, bool *retu)
 {
 	struct vie *vie;
 	struct vcpu *vcpu;
 	struct vm_exit *vme;
 	uint64_t gla, gpa;
 	struct vm_guest_paging *paging;
 	mem_region_read_t mread;
 	mem_region_write_t mwrite;
 	enum vm_cpu_mode cpu_mode;
 	int cs_d, error, length;
 
 	vcpu = &vm->vcpu[vcpuid];
 	vme = &vcpu->exitinfo;
 
 	gla = vme->u.inst_emul.gla;
 	gpa = vme->u.inst_emul.gpa;
 	cs_d = vme->u.inst_emul.cs_d;
 	vie = &vme->u.inst_emul.vie;
 	paging = &vme->u.inst_emul.paging;
 	cpu_mode = paging->cpu_mode;
 
 	VCPU_CTR1(vm, vcpuid, "inst_emul fault accessing gpa %#lx", gpa);
 
 	/* Fetch, decode and emulate the faulting instruction */
 	if (vie->num_valid == 0) {
 		/*
 		 * If the instruction length is not known then assume a
 		 * maximum size instruction.
 		 */
 		length = vme->inst_length ? vme->inst_length : VIE_INST_SIZE;
 		error = vmm_fetch_instruction(vm, vcpuid, paging, vme->rip,
 		    length, vie);
 	} else {
 		/*
 		 * The instruction bytes have already been copied into 'vie'
 		 */
 		error = 0;
 	}
 	if (error == 1)
 		return (0);		/* Resume guest to handle page fault */
 	else if (error == -1)
 		return (EFAULT);
 	else if (error != 0)
 		panic("%s: vmm_fetch_instruction error %d", __func__, error);
 
 	if (vmm_decode_instruction(vm, vcpuid, gla, cpu_mode, cs_d, vie) != 0)
 		return (EFAULT);
 
 	/*
 	 * If the instruction length is not specified the update it now.
 	 */
 	if (vme->inst_length == 0)
 		vme->inst_length = vie->num_processed;
  
 	/* return to userland unless this is an in-kernel emulated device */
 	if (gpa >= DEFAULT_APIC_BASE && gpa < DEFAULT_APIC_BASE + PAGE_SIZE) {
 		mread = lapic_mmio_read;
 		mwrite = lapic_mmio_write;
 	} else if (gpa >= VIOAPIC_BASE && gpa < VIOAPIC_BASE + VIOAPIC_SIZE) {
 		mread = vioapic_mmio_read;
 		mwrite = vioapic_mmio_write;
 	} else if (gpa >= VHPET_BASE && gpa < VHPET_BASE + VHPET_SIZE) {
 		mread = vhpet_mmio_read;
 		mwrite = vhpet_mmio_write;
 	} else {
 		*retu = true;
 		return (0);
 	}
 
 	error = vmm_emulate_instruction(vm, vcpuid, gpa, vie, paging,
 	    mread, mwrite, retu);
 
 	return (error);
 }
 
 static int
 vm_handle_suspend(struct vm *vm, int vcpuid, bool *retu)
 {
 	int i, done;
 	struct vcpu *vcpu;
 
 	done = 0;
 	vcpu = &vm->vcpu[vcpuid];
 
 	CPU_SET_ATOMIC(vcpuid, &vm->suspended_cpus);
 
 	/*
 	 * Wait until all 'active_cpus' have suspended themselves.
 	 *
 	 * Since a VM may be suspended at any time including when one or
 	 * more vcpus are doing a rendezvous we need to call the rendezvous
 	 * handler while we are waiting to prevent a deadlock.
 	 */
 	vcpu_lock(vcpu);
 	while (1) {
 		if (CPU_CMP(&vm->suspended_cpus, &vm->active_cpus) == 0) {
 			VCPU_CTR0(vm, vcpuid, "All vcpus suspended");
 			break;
 		}
 
 		if (vm->rendezvous_func == NULL) {
 			VCPU_CTR0(vm, vcpuid, "Sleeping during suspend");
 			vcpu_require_state_locked(vcpu, VCPU_SLEEPING);
 			msleep_spin(vcpu, &vcpu->mtx, "vmsusp", hz);
 			vcpu_require_state_locked(vcpu, VCPU_FROZEN);
 		} else {
 			VCPU_CTR0(vm, vcpuid, "Rendezvous during suspend");
 			vcpu_unlock(vcpu);
 			vm_handle_rendezvous(vm, vcpuid);
 			vcpu_lock(vcpu);
 		}
 	}
 	vcpu_unlock(vcpu);
 
 	/*
 	 * Wakeup the other sleeping vcpus and return to userspace.
 	 */
 	for (i = 0; i < VM_MAXCPU; i++) {
 		if (CPU_ISSET(i, &vm->suspended_cpus)) {
 			vcpu_notify_event(vm, i, false);
 		}
 	}
 
 	*retu = true;
 	return (0);
 }
 
 int
 vm_suspend(struct vm *vm, enum vm_suspend_how how)
 {
 	int i;
 
 	if (how <= VM_SUSPEND_NONE || how >= VM_SUSPEND_LAST)
 		return (EINVAL);
 
 	if (atomic_cmpset_int(&vm->suspend, 0, how) == 0) {
 		VM_CTR2(vm, "virtual machine already suspended %d/%d",
 		    vm->suspend, how);
 		return (EALREADY);
 	}
 
 	VM_CTR1(vm, "virtual machine successfully suspended %d", how);
 
 	/*
 	 * Notify all active vcpus that they are now suspended.
 	 */
 	for (i = 0; i < VM_MAXCPU; i++) {
 		if (CPU_ISSET(i, &vm->active_cpus))
 			vcpu_notify_event(vm, i, false);
 	}
 
 	return (0);
 }
 
 void
 vm_exit_suspended(struct vm *vm, int vcpuid, uint64_t rip)
 {
 	struct vm_exit *vmexit;
 
 	KASSERT(vm->suspend > VM_SUSPEND_NONE && vm->suspend < VM_SUSPEND_LAST,
 	    ("vm_exit_suspended: invalid suspend type %d", vm->suspend));
 
 	vmexit = vm_exitinfo(vm, vcpuid);
 	vmexit->rip = rip;
 	vmexit->inst_length = 0;
 	vmexit->exitcode = VM_EXITCODE_SUSPENDED;
 	vmexit->u.suspended.how = vm->suspend;
 }
 
 void
 vm_exit_rendezvous(struct vm *vm, int vcpuid, uint64_t rip)
 {
 	struct vm_exit *vmexit;
 
 	KASSERT(vm->rendezvous_func != NULL, ("rendezvous not in progress"));
 
 	vmexit = vm_exitinfo(vm, vcpuid);
 	vmexit->rip = rip;
 	vmexit->inst_length = 0;
 	vmexit->exitcode = VM_EXITCODE_RENDEZVOUS;
 	vmm_stat_incr(vm, vcpuid, VMEXIT_RENDEZVOUS, 1);
 }
 
 void
 vm_exit_astpending(struct vm *vm, int vcpuid, uint64_t rip)
 {
 	struct vm_exit *vmexit;
 
 	vmexit = vm_exitinfo(vm, vcpuid);
 	vmexit->rip = rip;
 	vmexit->inst_length = 0;
 	vmexit->exitcode = VM_EXITCODE_BOGUS;
 	vmm_stat_incr(vm, vcpuid, VMEXIT_ASTPENDING, 1);
 }
 
 int
 vm_run(struct vm *vm, struct vm_run *vmrun)
 {
 	int error, vcpuid;
 	struct vcpu *vcpu;
 	struct pcb *pcb;
 	uint64_t tscval, rip;
 	struct vm_exit *vme;
 	bool retu, intr_disabled;
 	pmap_t pmap;
 	void *rptr, *sptr;
 
 	vcpuid = vmrun->cpuid;
 
 	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
 		return (EINVAL);
 
 	if (!CPU_ISSET(vcpuid, &vm->active_cpus))
 		return (EINVAL);
 
 	if (CPU_ISSET(vcpuid, &vm->suspended_cpus))
 		return (EINVAL);
 
 	rptr = &vm->rendezvous_func;
 	sptr = &vm->suspend;
 	pmap = vmspace_pmap(vm->vmspace);
 	vcpu = &vm->vcpu[vcpuid];
 	vme = &vcpu->exitinfo;
 	rip = vmrun->rip;
 restart:
 	critical_enter();
 
 	KASSERT(!CPU_ISSET(curcpu, &pmap->pm_active),
 	    ("vm_run: absurd pm_active"));
 
 	tscval = rdtsc();
 
 	pcb = PCPU_GET(curpcb);
 	set_pcb_flags(pcb, PCB_FULL_IRET);
 
 	restore_guest_fpustate(vcpu);
 
 	vcpu_require_state(vm, vcpuid, VCPU_RUNNING);
 	error = VMRUN(vm->cookie, vcpuid, rip, pmap, rptr, sptr);
 	vcpu_require_state(vm, vcpuid, VCPU_FROZEN);
 
 	save_guest_fpustate(vcpu);
 
 	vmm_stat_incr(vm, vcpuid, VCPU_TOTAL_RUNTIME, rdtsc() - tscval);
 
 	critical_exit();
 
 	if (error == 0) {
 		retu = false;
 		switch (vme->exitcode) {
 		case VM_EXITCODE_SUSPENDED:
 			error = vm_handle_suspend(vm, vcpuid, &retu);
 			break;
 		case VM_EXITCODE_IOAPIC_EOI:
 			vioapic_process_eoi(vm, vcpuid,
 			    vme->u.ioapic_eoi.vector);
 			break;
 		case VM_EXITCODE_RENDEZVOUS:
 			vm_handle_rendezvous(vm, vcpuid);
 			error = 0;
 			break;
 		case VM_EXITCODE_HLT:
 			intr_disabled = ((vme->u.hlt.rflags & PSL_I) == 0);
 			error = vm_handle_hlt(vm, vcpuid, intr_disabled, &retu);
 			break;
 		case VM_EXITCODE_PAGING:
 			error = vm_handle_paging(vm, vcpuid, &retu);
 			break;
 		case VM_EXITCODE_INST_EMUL:
 			error = vm_handle_inst_emul(vm, vcpuid, &retu);
 			break;
 		case VM_EXITCODE_INOUT:
 		case VM_EXITCODE_INOUT_STR:
 			error = vm_handle_inout(vm, vcpuid, vme, &retu);
 			break;
 		case VM_EXITCODE_MONITOR:
 		case VM_EXITCODE_MWAIT:
 			vm_inject_ud(vm, vcpuid);
 			break;
 		default:
 			retu = true;	/* handled in userland */
 			break;
 		}
 	}
 
 	if (error == 0 && retu == false) {
 		rip = vme->rip + vme->inst_length;
 		goto restart;
 	}
 
 	/* copy the exit information */
 	bcopy(vme, &vmrun->vm_exit, sizeof(struct vm_exit));
 	return (error);
 }
 
 int
 vm_exit_intinfo(struct vm *vm, int vcpuid, uint64_t info)
 {
 	struct vcpu *vcpu;
 	int type, vector;
 
 	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
 		return (EINVAL);
 
 	vcpu = &vm->vcpu[vcpuid];
 
 	if (info & VM_INTINFO_VALID) {
 		type = info & VM_INTINFO_TYPE;
 		vector = info & 0xff;
 		if (type == VM_INTINFO_NMI && vector != IDT_NMI)
 			return (EINVAL);
 		if (type == VM_INTINFO_HWEXCEPTION && vector >= 32)
 			return (EINVAL);
 		if (info & VM_INTINFO_RSVD)
 			return (EINVAL);
 	} else {
 		info = 0;
 	}
 	VCPU_CTR2(vm, vcpuid, "%s: info1(%#lx)", __func__, info);
 	vcpu->exitintinfo = info;
 	return (0);
 }
 
 enum exc_class {
 	EXC_BENIGN,
 	EXC_CONTRIBUTORY,
 	EXC_PAGEFAULT
 };
 
 #define	IDT_VE	20	/* Virtualization Exception (Intel specific) */
 
 static enum exc_class
 exception_class(uint64_t info)
 {
 	int type, vector;
 
 	KASSERT(info & VM_INTINFO_VALID, ("intinfo must be valid: %#lx", info));
 	type = info & VM_INTINFO_TYPE;
 	vector = info & 0xff;
 
 	/* Table 6-4, "Interrupt and Exception Classes", Intel SDM, Vol 3 */
 	switch (type) {
 	case VM_INTINFO_HWINTR:
 	case VM_INTINFO_SWINTR:
 	case VM_INTINFO_NMI:
 		return (EXC_BENIGN);
 	default:
 		/*
 		 * Hardware exception.
 		 *
 		 * SVM and VT-x use identical type values to represent NMI,
 		 * hardware interrupt and software interrupt.
 		 *
 		 * SVM uses type '3' for all exceptions. VT-x uses type '3'
 		 * for exceptions except #BP and #OF. #BP and #OF use a type
 		 * value of '5' or '6'. Therefore we don't check for explicit
 		 * values of 'type' to classify 'intinfo' into a hardware
 		 * exception.
 		 */
 		break;
 	}
 
 	switch (vector) {
 	case IDT_PF:
 	case IDT_VE:
 		return (EXC_PAGEFAULT);
 	case IDT_DE:
 	case IDT_TS:
 	case IDT_NP:
 	case IDT_SS:
 	case IDT_GP:
 		return (EXC_CONTRIBUTORY);
 	default:
 		return (EXC_BENIGN);
 	}
 }
 
 static int
 nested_fault(struct vm *vm, int vcpuid, uint64_t info1, uint64_t info2,
     uint64_t *retinfo)
 {
 	enum exc_class exc1, exc2;
 	int type1, vector1;
 
 	KASSERT(info1 & VM_INTINFO_VALID, ("info1 %#lx is not valid", info1));
 	KASSERT(info2 & VM_INTINFO_VALID, ("info2 %#lx is not valid", info2));
 
 	/*
 	 * If an exception occurs while attempting to call the double-fault
 	 * handler the processor enters shutdown mode (aka triple fault).
 	 */
 	type1 = info1 & VM_INTINFO_TYPE;
 	vector1 = info1 & 0xff;
 	if (type1 == VM_INTINFO_HWEXCEPTION && vector1 == IDT_DF) {
 		VCPU_CTR2(vm, vcpuid, "triple fault: info1(%#lx), info2(%#lx)",
 		    info1, info2);
 		vm_suspend(vm, VM_SUSPEND_TRIPLEFAULT);
 		*retinfo = 0;
 		return (0);
 	}
 
 	/*
 	 * Table 6-5 "Conditions for Generating a Double Fault", Intel SDM, Vol3
 	 */
 	exc1 = exception_class(info1);
 	exc2 = exception_class(info2);
 	if ((exc1 == EXC_CONTRIBUTORY && exc2 == EXC_CONTRIBUTORY) ||
 	    (exc1 == EXC_PAGEFAULT && exc2 != EXC_BENIGN)) {
 		/* Convert nested fault into a double fault. */
 		*retinfo = IDT_DF;
 		*retinfo |= VM_INTINFO_VALID | VM_INTINFO_HWEXCEPTION;
 		*retinfo |= VM_INTINFO_DEL_ERRCODE;
 	} else {
 		/* Handle exceptions serially */
 		*retinfo = info2;
 	}
 	return (1);
 }
 
 static uint64_t
 vcpu_exception_intinfo(struct vcpu *vcpu)
 {
 	uint64_t info = 0;
 
 	if (vcpu->exception_pending) {
 		info = vcpu->exception.vector & 0xff;
 		info |= VM_INTINFO_VALID | VM_INTINFO_HWEXCEPTION;
 		if (vcpu->exception.error_code_valid) {
 			info |= VM_INTINFO_DEL_ERRCODE;
 			info |= (uint64_t)vcpu->exception.error_code << 32;
 		}
 	}
 	return (info);
 }
 
 int
 vm_entry_intinfo(struct vm *vm, int vcpuid, uint64_t *retinfo)
 {
 	struct vcpu *vcpu;
 	uint64_t info1, info2;
 	int valid;
 
 	KASSERT(vcpuid >= 0 && vcpuid < VM_MAXCPU, ("invalid vcpu %d", vcpuid));
 
 	vcpu = &vm->vcpu[vcpuid];
 
 	info1 = vcpu->exitintinfo;
 	vcpu->exitintinfo = 0;
 
 	info2 = 0;
 	if (vcpu->exception_pending) {
 		info2 = vcpu_exception_intinfo(vcpu);
 		vcpu->exception_pending = 0;
 		VCPU_CTR2(vm, vcpuid, "Exception %d delivered: %#lx",
 		    vcpu->exception.vector, info2);
 	}
 
 	if ((info1 & VM_INTINFO_VALID) && (info2 & VM_INTINFO_VALID)) {
 		valid = nested_fault(vm, vcpuid, info1, info2, retinfo);
 	} else if (info1 & VM_INTINFO_VALID) {
 		*retinfo = info1;
 		valid = 1;
 	} else if (info2 & VM_INTINFO_VALID) {
 		*retinfo = info2;
 		valid = 1;
 	} else {
 		valid = 0;
 	}
 
 	if (valid) {
 		VCPU_CTR4(vm, vcpuid, "%s: info1(%#lx), info2(%#lx), "
 		    "retinfo(%#lx)", __func__, info1, info2, *retinfo);
 	}
 
 	return (valid);
 }
 
 int
 vm_get_intinfo(struct vm *vm, int vcpuid, uint64_t *info1, uint64_t *info2)
 {
 	struct vcpu *vcpu;
 
 	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
 		return (EINVAL);
 
 	vcpu = &vm->vcpu[vcpuid];
 	*info1 = vcpu->exitintinfo;
 	*info2 = vcpu_exception_intinfo(vcpu);
 	return (0);
 }
 
 int
 vm_inject_exception(struct vm *vm, int vcpuid, struct vm_exception *exception)
 {
 	struct vcpu *vcpu;
 
 	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
 		return (EINVAL);
 
 	if (exception->vector < 0 || exception->vector >= 32)
 		return (EINVAL);
 
 	/*
 	 * A double fault exception should never be injected directly into
 	 * the guest. It is a derived exception that results from specific
 	 * combinations of nested faults.
 	 */
 	if (exception->vector == IDT_DF)
 		return (EINVAL);
 
 	vcpu = &vm->vcpu[vcpuid];
 
 	if (vcpu->exception_pending) {
 		VCPU_CTR2(vm, vcpuid, "Unable to inject exception %d due to "
 		    "pending exception %d", exception->vector,
 		    vcpu->exception.vector);
 		return (EBUSY);
 	}
 
 	vcpu->exception_pending = 1;
 	vcpu->exception = *exception;
 	VCPU_CTR1(vm, vcpuid, "Exception %d pending", exception->vector);
 	return (0);
 }
 
 void
 vm_inject_fault(void *vmarg, int vcpuid, int vector, int errcode_valid,
     int errcode)
 {
 	struct vm_exception exception;
 	struct vm_exit *vmexit;
 	struct vm *vm;
 	int error;
 
 	vm = vmarg;
 
 	exception.vector = vector;
 	exception.error_code = errcode;
 	exception.error_code_valid = errcode_valid;
 	error = vm_inject_exception(vm, vcpuid, &exception);
 	KASSERT(error == 0, ("vm_inject_exception error %d", error));
 
 	/*
 	 * A fault-like exception allows the instruction to be restarted
 	 * after the exception handler returns.
 	 *
 	 * By setting the inst_length to 0 we ensure that the instruction
 	 * pointer remains at the faulting instruction.
 	 */
 	vmexit = vm_exitinfo(vm, vcpuid);
 	vmexit->inst_length = 0;
 }
 
 void
 vm_inject_pf(void *vmarg, int vcpuid, int error_code, uint64_t cr2)
 {
 	struct vm *vm;
 	int error;
 
 	vm = vmarg;
 	VCPU_CTR2(vm, vcpuid, "Injecting page fault: error_code %#x, cr2 %#lx",
 	    error_code, cr2);
 
 	error = vm_set_register(vm, vcpuid, VM_REG_GUEST_CR2, cr2);
 	KASSERT(error == 0, ("vm_set_register(cr2) error %d", error));
 
 	vm_inject_fault(vm, vcpuid, IDT_PF, 1, error_code);
 }
 
 static VMM_STAT(VCPU_NMI_COUNT, "number of NMIs delivered to vcpu");
 
 int
 vm_inject_nmi(struct vm *vm, int vcpuid)
 {
 	struct vcpu *vcpu;
 
 	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
 		return (EINVAL);
 
 	vcpu = &vm->vcpu[vcpuid];
 
 	vcpu->nmi_pending = 1;
 	vcpu_notify_event(vm, vcpuid, false);
 	return (0);
 }
 
 int
 vm_nmi_pending(struct vm *vm, int vcpuid)
 {
 	struct vcpu *vcpu;
 
 	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
 		panic("vm_nmi_pending: invalid vcpuid %d", vcpuid);
 
 	vcpu = &vm->vcpu[vcpuid];
 
 	return (vcpu->nmi_pending);
 }
 
 void
 vm_nmi_clear(struct vm *vm, int vcpuid)
 {
 	struct vcpu *vcpu;
 
 	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
 		panic("vm_nmi_pending: invalid vcpuid %d", vcpuid);
 
 	vcpu = &vm->vcpu[vcpuid];
 
 	if (vcpu->nmi_pending == 0)
 		panic("vm_nmi_clear: inconsistent nmi_pending state");
 
 	vcpu->nmi_pending = 0;
 	vmm_stat_incr(vm, vcpuid, VCPU_NMI_COUNT, 1);
 }
 
 static VMM_STAT(VCPU_EXTINT_COUNT, "number of ExtINTs delivered to vcpu");
 
 int
 vm_inject_extint(struct vm *vm, int vcpuid)
 {
 	struct vcpu *vcpu;
 
 	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
 		return (EINVAL);
 
 	vcpu = &vm->vcpu[vcpuid];
 
 	vcpu->extint_pending = 1;
 	vcpu_notify_event(vm, vcpuid, false);
 	return (0);
 }
 
 int
 vm_extint_pending(struct vm *vm, int vcpuid)
 {
 	struct vcpu *vcpu;
 
 	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
 		panic("vm_extint_pending: invalid vcpuid %d", vcpuid);
 
 	vcpu = &vm->vcpu[vcpuid];
 
 	return (vcpu->extint_pending);
 }
 
 void
 vm_extint_clear(struct vm *vm, int vcpuid)
 {
 	struct vcpu *vcpu;
 
 	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
 		panic("vm_extint_pending: invalid vcpuid %d", vcpuid);
 
 	vcpu = &vm->vcpu[vcpuid];
 
 	if (vcpu->extint_pending == 0)
 		panic("vm_extint_clear: inconsistent extint_pending state");
 
 	vcpu->extint_pending = 0;
 	vmm_stat_incr(vm, vcpuid, VCPU_EXTINT_COUNT, 1);
 }
 
 int
 vm_get_capability(struct vm *vm, int vcpu, int type, int *retval)
 {
 	if (vcpu < 0 || vcpu >= VM_MAXCPU)
 		return (EINVAL);
 
 	if (type < 0 || type >= VM_CAP_MAX)
 		return (EINVAL);
 
 	return (VMGETCAP(vm->cookie, vcpu, type, retval));
 }
 
 int
 vm_set_capability(struct vm *vm, int vcpu, int type, int val)
 {
 	if (vcpu < 0 || vcpu >= VM_MAXCPU)
 		return (EINVAL);
 
 	if (type < 0 || type >= VM_CAP_MAX)
 		return (EINVAL);
 
 	return (VMSETCAP(vm->cookie, vcpu, type, val));
 }
 
 struct vlapic *
 vm_lapic(struct vm *vm, int cpu)
 {
 	return (vm->vcpu[cpu].vlapic);
 }
 
 struct vioapic *
 vm_ioapic(struct vm *vm)
 {
 
 	return (vm->vioapic);
 }
 
 struct vhpet *
 vm_hpet(struct vm *vm)
 {
 
 	return (vm->vhpet);
 }
 
 boolean_t
 vmm_is_pptdev(int bus, int slot, int func)
 {
 	int found, i, n;
 	int b, s, f;
 	char *val, *cp, *cp2;
 
 	/*
 	 * XXX
 	 * The length of an environment variable is limited to 128 bytes which
 	 * puts an upper limit on the number of passthru devices that may be
 	 * specified using a single environment variable.
 	 *
 	 * Work around this by scanning multiple environment variable
 	 * names instead of a single one - yuck!
 	 */
 	const char *names[] = { "pptdevs", "pptdevs2", "pptdevs3", NULL };
 
 	/* set pptdevs="1/2/3 4/5/6 7/8/9 10/11/12" */
 	found = 0;
 	for (i = 0; names[i] != NULL && !found; i++) {
 		cp = val = kern_getenv(names[i]);
 		while (cp != NULL && *cp != '\0') {
 			if ((cp2 = strchr(cp, ' ')) != NULL)
 				*cp2 = '\0';
 
 			n = sscanf(cp, "%d/%d/%d", &b, &s, &f);
 			if (n == 3 && bus == b && slot == s && func == f) {
 				found = 1;
 				break;
 			}
 		
 			if (cp2 != NULL)
 				*cp2++ = ' ';
 
 			cp = cp2;
 		}
 		freeenv(val);
 	}
 	return (found);
 }
 
 void *
 vm_iommu_domain(struct vm *vm)
 {
 
 	return (vm->iommu);
 }
 
 int
 vcpu_set_state(struct vm *vm, int vcpuid, enum vcpu_state newstate,
     bool from_idle)
 {
 	int error;
 	struct vcpu *vcpu;
 
 	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
 		panic("vm_set_run_state: invalid vcpuid %d", vcpuid);
 
 	vcpu = &vm->vcpu[vcpuid];
 
 	vcpu_lock(vcpu);
 	error = vcpu_set_state_locked(vcpu, newstate, from_idle);
 	vcpu_unlock(vcpu);
 
 	return (error);
 }
 
 enum vcpu_state
 vcpu_get_state(struct vm *vm, int vcpuid, int *hostcpu)
 {
 	struct vcpu *vcpu;
 	enum vcpu_state state;
 
 	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
 		panic("vm_get_run_state: invalid vcpuid %d", vcpuid);
 
 	vcpu = &vm->vcpu[vcpuid];
 
 	vcpu_lock(vcpu);
 	state = vcpu->state;
 	if (hostcpu != NULL)
 		*hostcpu = vcpu->hostcpu;
 	vcpu_unlock(vcpu);
 
 	return (state);
 }
 
 int
 vm_activate_cpu(struct vm *vm, int vcpuid)
 {
 
 	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
 		return (EINVAL);
 
 	if (CPU_ISSET(vcpuid, &vm->active_cpus))
 		return (EBUSY);
 
 	VCPU_CTR0(vm, vcpuid, "activated");
 	CPU_SET_ATOMIC(vcpuid, &vm->active_cpus);
 	return (0);
 }
 
 cpuset_t
 vm_active_cpus(struct vm *vm)
 {
 
 	return (vm->active_cpus);
 }
 
 cpuset_t
 vm_suspended_cpus(struct vm *vm)
 {
 
 	return (vm->suspended_cpus);
 }
 
 void *
 vcpu_stats(struct vm *vm, int vcpuid)
 {
 
 	return (vm->vcpu[vcpuid].stats);
 }
 
 int
 vm_get_x2apic_state(struct vm *vm, int vcpuid, enum x2apic_state *state)
 {
 	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
 		return (EINVAL);
 
 	*state = vm->vcpu[vcpuid].x2apic_state;
 
 	return (0);
 }
 
 int
 vm_set_x2apic_state(struct vm *vm, int vcpuid, enum x2apic_state state)
 {
 	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
 		return (EINVAL);
 
 	if (state >= X2APIC_STATE_LAST)
 		return (EINVAL);
 
 	vm->vcpu[vcpuid].x2apic_state = state;
 
 	vlapic_set_x2apic_state(vm, vcpuid, state);
 
 	return (0);
 }
 
 /*
  * This function is called to ensure that a vcpu "sees" a pending event
  * as soon as possible:
  * - If the vcpu thread is sleeping then it is woken up.
  * - If the vcpu is running on a different host_cpu then an IPI will be directed
  *   to the host_cpu to cause the vcpu to trap into the hypervisor.
  */
 void
 vcpu_notify_event(struct vm *vm, int vcpuid, bool lapic_intr)
 {
 	int hostcpu;
 	struct vcpu *vcpu;
 
 	vcpu = &vm->vcpu[vcpuid];
 
 	vcpu_lock(vcpu);
 	hostcpu = vcpu->hostcpu;
 	if (vcpu->state == VCPU_RUNNING) {
 		KASSERT(hostcpu != NOCPU, ("vcpu running on invalid hostcpu"));
 		if (hostcpu != curcpu) {
 			if (lapic_intr) {
 				vlapic_post_intr(vcpu->vlapic, hostcpu,
 				    vmm_ipinum);
 			} else {
 				ipi_cpu(hostcpu, vmm_ipinum);
 			}
 		} else {
 			/*
 			 * If the 'vcpu' is running on 'curcpu' then it must
 			 * be sending a notification to itself (e.g. SELF_IPI).
 			 * The pending event will be picked up when the vcpu
 			 * transitions back to guest context.
 			 */
 		}
 	} else {
 		KASSERT(hostcpu == NOCPU, ("vcpu state %d not consistent "
 		    "with hostcpu %d", vcpu->state, hostcpu));
 		if (vcpu->state == VCPU_SLEEPING)
 			wakeup_one(vcpu);
 	}
 	vcpu_unlock(vcpu);
 }
 
 struct vmspace *
 vm_get_vmspace(struct vm *vm)
 {
 
 	return (vm->vmspace);
 }
 
 int
 vm_apicid2vcpuid(struct vm *vm, int apicid)
 {
 	/*
 	 * XXX apic id is assumed to be numerically identical to vcpu id
 	 */
 	return (apicid);
 }
 
 void
 vm_smp_rendezvous(struct vm *vm, int vcpuid, cpuset_t dest,
     vm_rendezvous_func_t func, void *arg)
 {
 	int i;
 
 	/*
 	 * Enforce that this function is called without any locks
 	 */
 	WITNESS_WARN(WARN_PANIC, NULL, "vm_smp_rendezvous");
 	KASSERT(vcpuid == -1 || (vcpuid >= 0 && vcpuid < VM_MAXCPU),
 	    ("vm_smp_rendezvous: invalid vcpuid %d", vcpuid));
 
 restart:
 	mtx_lock(&vm->rendezvous_mtx);
 	if (vm->rendezvous_func != NULL) {
 		/*
 		 * If a rendezvous is already in progress then we need to
 		 * call the rendezvous handler in case this 'vcpuid' is one
 		 * of the targets of the rendezvous.
 		 */
 		RENDEZVOUS_CTR0(vm, vcpuid, "Rendezvous already in progress");
 		mtx_unlock(&vm->rendezvous_mtx);
 		vm_handle_rendezvous(vm, vcpuid);
 		goto restart;
 	}
 	KASSERT(vm->rendezvous_func == NULL, ("vm_smp_rendezvous: previous "
 	    "rendezvous is still in progress"));
 
 	RENDEZVOUS_CTR0(vm, vcpuid, "Initiating rendezvous");
 	vm->rendezvous_req_cpus = dest;
 	CPU_ZERO(&vm->rendezvous_done_cpus);
 	vm->rendezvous_arg = arg;
 	vm_set_rendezvous_func(vm, func);
 	mtx_unlock(&vm->rendezvous_mtx);
 
 	/*
 	 * Wake up any sleeping vcpus and trigger a VM-exit in any running
 	 * vcpus so they handle the rendezvous as soon as possible.
 	 */
 	for (i = 0; i < VM_MAXCPU; i++) {
 		if (CPU_ISSET(i, &dest))
 			vcpu_notify_event(vm, i, false);
 	}
 
 	vm_handle_rendezvous(vm, vcpuid);
 }
 
 struct vatpic *
 vm_atpic(struct vm *vm)
 {
 	return (vm->vatpic);
 }
 
 struct vatpit *
 vm_atpit(struct vm *vm)
 {
 	return (vm->vatpit);
 }
 
 struct vpmtmr *
 vm_pmtmr(struct vm *vm)
 {
 
 	return (vm->vpmtmr);
+}
+
+struct vrtc *
+vm_rtc(struct vm *vm)
+{
+
+	return (vm->vrtc);
 }
 
 enum vm_reg_name
 vm_segment_name(int seg)
 {
 	static enum vm_reg_name seg_names[] = {
 		VM_REG_GUEST_ES,
 		VM_REG_GUEST_CS,
 		VM_REG_GUEST_SS,
 		VM_REG_GUEST_DS,
 		VM_REG_GUEST_FS,
 		VM_REG_GUEST_GS
 	};
 
 	KASSERT(seg >= 0 && seg < nitems(seg_names),
 	    ("%s: invalid segment encoding %d", __func__, seg));
 	return (seg_names[seg]);
 }
 
 void
 vm_copy_teardown(struct vm *vm, int vcpuid, struct vm_copyinfo *copyinfo,
     int num_copyinfo)
 {
 	int idx;
 
 	for (idx = 0; idx < num_copyinfo; idx++) {
 		if (copyinfo[idx].cookie != NULL)
 			vm_gpa_release(copyinfo[idx].cookie);
 	}
 	bzero(copyinfo, num_copyinfo * sizeof(struct vm_copyinfo));
 }
 
 int
 vm_copy_setup(struct vm *vm, int vcpuid, struct vm_guest_paging *paging,
     uint64_t gla, size_t len, int prot, struct vm_copyinfo *copyinfo,
     int num_copyinfo)
 {
 	int error, idx, nused;
 	size_t n, off, remaining;
 	void *hva, *cookie;
 	uint64_t gpa;
 
 	bzero(copyinfo, sizeof(struct vm_copyinfo) * num_copyinfo);
 
 	nused = 0;
 	remaining = len;
 	while (remaining > 0) {
 		KASSERT(nused < num_copyinfo, ("insufficient vm_copyinfo"));
 		error = vmm_gla2gpa(vm, vcpuid, paging, gla, prot, &gpa);
 		if (error)
 			return (error);
 		off = gpa & PAGE_MASK;
 		n = min(remaining, PAGE_SIZE - off);
 		copyinfo[nused].gpa = gpa;
 		copyinfo[nused].len = n;
 		remaining -= n;
 		gla += n;
 		nused++;
 	}
 
 	for (idx = 0; idx < nused; idx++) {
 		hva = vm_gpa_hold(vm, copyinfo[idx].gpa, copyinfo[idx].len,
 		    prot, &cookie);
 		if (hva == NULL)
 			break;
 		copyinfo[idx].hva = hva;
 		copyinfo[idx].cookie = cookie;
 	}
 
 	if (idx != nused) {
 		vm_copy_teardown(vm, vcpuid, copyinfo, num_copyinfo);
 		return (-1);
 	} else {
 		return (0);
 	}
 }
 
 void
 vm_copyin(struct vm *vm, int vcpuid, struct vm_copyinfo *copyinfo, void *kaddr,
     size_t len)
 {
 	char *dst;
 	int idx;
 	
 	dst = kaddr;
 	idx = 0;
 	while (len > 0) {
 		bcopy(copyinfo[idx].hva, dst, copyinfo[idx].len);
 		len -= copyinfo[idx].len;
 		dst += copyinfo[idx].len;
 		idx++;
 	}
 }
 
 void
 vm_copyout(struct vm *vm, int vcpuid, const void *kaddr,
     struct vm_copyinfo *copyinfo, size_t len)
 {
 	const char *src;
 	int idx;
 
 	src = kaddr;
 	idx = 0;
 	while (len > 0) {
 		bcopy(src, copyinfo[idx].hva, copyinfo[idx].len);
 		len -= copyinfo[idx].len;
 		src += copyinfo[idx].len;
 		idx++;
 	}
 }
 
 /*
  * Return the amount of in-use and wired memory for the VM. Since
  * these are global stats, only return the values with for vCPU 0
  */
 VMM_STAT_DECLARE(VMM_MEM_RESIDENT);
 VMM_STAT_DECLARE(VMM_MEM_WIRED);
 
 static void
 vm_get_rescnt(struct vm *vm, int vcpu, struct vmm_stat_type *stat)
 {
 
 	if (vcpu == 0) {
 		vmm_stat_set(vm, vcpu, VMM_MEM_RESIDENT,
 	       	    PAGE_SIZE * vmspace_resident_count(vm->vmspace));
 	}	
 }
 
 static void
 vm_get_wiredcnt(struct vm *vm, int vcpu, struct vmm_stat_type *stat)
 {
 
 	if (vcpu == 0) {
 		vmm_stat_set(vm, vcpu, VMM_MEM_WIRED,
 	      	    PAGE_SIZE * pmap_wired_count(vmspace_pmap(vm->vmspace)));
 	}	
 }
 
 VMM_STAT_FUNC(VMM_MEM_RESIDENT, "Resident memory", vm_get_rescnt);
 VMM_STAT_FUNC(VMM_MEM_WIRED, "Wired memory", vm_get_wiredcnt);
Index: head/sys/amd64/vmm/vmm_dev.c
===================================================================
--- head/sys/amd64/vmm/vmm_dev.c	(revision 276427)
+++ head/sys/amd64/vmm/vmm_dev.c	(revision 276428)
@@ -1,671 +1,693 @@
 /*-
  * Copyright (c) 2011 NetApp, Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/kernel.h>
 #include <sys/queue.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/malloc.h>
 #include <sys/conf.h>
 #include <sys/sysctl.h>
 #include <sys/libkern.h>
 #include <sys/ioccom.h>
 #include <sys/mman.h>
 #include <sys/uio.h>
 
 #include <vm/vm.h>
 #include <vm/pmap.h>
 #include <vm/vm_map.h>
 
 #include <machine/vmparam.h>
 #include <machine/vmm.h>
 #include <machine/vmm_instruction_emul.h>
 #include <machine/vmm_dev.h>
 
 #include "vmm_lapic.h"
 #include "vmm_stat.h"
 #include "vmm_mem.h"
 #include "io/ppt.h"
 #include "io/vatpic.h"
 #include "io/vioapic.h"
 #include "io/vhpet.h"
+#include "io/vrtc.h"
 
 struct vmmdev_softc {
 	struct vm	*vm;		/* vm instance cookie */
 	struct cdev	*cdev;
 	SLIST_ENTRY(vmmdev_softc) link;
 	int		flags;
 };
 #define	VSC_LINKED		0x01
 
 static SLIST_HEAD(, vmmdev_softc) head;
 
 static struct mtx vmmdev_mtx;
 
 static MALLOC_DEFINE(M_VMMDEV, "vmmdev", "vmmdev");
 
 SYSCTL_DECL(_hw_vmm);
 
 static struct vmmdev_softc *
 vmmdev_lookup(const char *name)
 {
 	struct vmmdev_softc *sc;
 
 #ifdef notyet	/* XXX kernel is not compiled with invariants */
 	mtx_assert(&vmmdev_mtx, MA_OWNED);
 #endif
 
 	SLIST_FOREACH(sc, &head, link) {
 		if (strcmp(name, vm_name(sc->vm)) == 0)
 			break;
 	}
 
 	return (sc);
 }
 
 static struct vmmdev_softc *
 vmmdev_lookup2(struct cdev *cdev)
 {
 
 	return (cdev->si_drv1);
 }
 
 static int
 vmmdev_rw(struct cdev *cdev, struct uio *uio, int flags)
 {
 	int error, off, c, prot;
 	vm_paddr_t gpa;
 	void *hpa, *cookie;
 	struct vmmdev_softc *sc;
 
 	static char zerobuf[PAGE_SIZE];
 
 	error = 0;
 	sc = vmmdev_lookup2(cdev);
 	if (sc == NULL)
 		error = ENXIO;
 
 	prot = (uio->uio_rw == UIO_WRITE ? VM_PROT_WRITE : VM_PROT_READ);
 	while (uio->uio_resid > 0 && error == 0) {
 		gpa = uio->uio_offset;
 		off = gpa & PAGE_MASK;
 		c = min(uio->uio_resid, PAGE_SIZE - off);
 
 		/*
 		 * The VM has a hole in its physical memory map. If we want to
 		 * use 'dd' to inspect memory beyond the hole we need to
 		 * provide bogus data for memory that lies in the hole.
 		 *
 		 * Since this device does not support lseek(2), dd(1) will
 		 * read(2) blocks of data to simulate the lseek(2).
 		 */
 		hpa = vm_gpa_hold(sc->vm, gpa, c, prot, &cookie);
 		if (hpa == NULL) {
 			if (uio->uio_rw == UIO_READ)
 				error = uiomove(zerobuf, c, uio);
 			else
 				error = EFAULT;
 		} else {
 			error = uiomove(hpa, c, uio);
 			vm_gpa_release(cookie);
 		}
 	}
 	return (error);
 }
 
 static int
 vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag,
 	     struct thread *td)
 {
 	int error, vcpu, state_changed, size;
 	cpuset_t *cpuset;
 	struct vmmdev_softc *sc;
 	struct vm_memory_segment *seg;
 	struct vm_register *vmreg;
 	struct vm_seg_desc *vmsegdesc;
 	struct vm_run *vmrun;
 	struct vm_exception *vmexc;
 	struct vm_lapic_irq *vmirq;
 	struct vm_lapic_msi *vmmsi;
 	struct vm_ioapic_irq *ioapic_irq;
 	struct vm_isa_irq *isa_irq;
 	struct vm_isa_irq_trigger *isa_irq_trigger;
 	struct vm_capability *vmcap;
 	struct vm_pptdev *pptdev;
 	struct vm_pptdev_mmio *pptmmio;
 	struct vm_pptdev_msi *pptmsi;
 	struct vm_pptdev_msix *pptmsix;
 	struct vm_nmi *vmnmi;
 	struct vm_stats *vmstats;
 	struct vm_stat_desc *statdesc;
 	struct vm_x2apic *x2apic;
 	struct vm_gpa_pte *gpapte;
 	struct vm_suspend *vmsuspend;
 	struct vm_gla2gpa *gg;
 	struct vm_activate_cpu *vac;
 	struct vm_cpuset *vm_cpuset;
 	struct vm_intinfo *vmii;
+	struct vm_rtc_time *rtctime;
+	struct vm_rtc_data *rtcdata;
 
 	sc = vmmdev_lookup2(cdev);
 	if (sc == NULL)
 		return (ENXIO);
 
 	error = 0;
 	vcpu = -1;
 	state_changed = 0;
 
 	/*
 	 * Some VMM ioctls can operate only on vcpus that are not running.
 	 */
 	switch (cmd) {
 	case VM_RUN:
 	case VM_GET_REGISTER:
 	case VM_SET_REGISTER:
 	case VM_GET_SEGMENT_DESCRIPTOR:
 	case VM_SET_SEGMENT_DESCRIPTOR:
 	case VM_INJECT_EXCEPTION:
 	case VM_GET_CAPABILITY:
 	case VM_SET_CAPABILITY:
 	case VM_PPTDEV_MSI:
 	case VM_PPTDEV_MSIX:
 	case VM_SET_X2APIC_STATE:
 	case VM_GLA2GPA:
 	case VM_ACTIVATE_CPU:
 	case VM_SET_INTINFO:
 	case VM_GET_INTINFO:
 		/*
 		 * XXX fragile, handle with care
 		 * Assumes that the first field of the ioctl data is the vcpu.
 		 */
 		vcpu = *(int *)data;
 		if (vcpu < 0 || vcpu >= VM_MAXCPU) {
 			error = EINVAL;
 			goto done;
 		}
 
 		error = vcpu_set_state(sc->vm, vcpu, VCPU_FROZEN, true);
 		if (error)
 			goto done;
 
 		state_changed = 1;
 		break;
 
 	case VM_MAP_PPTDEV_MMIO:
 	case VM_BIND_PPTDEV:
 	case VM_UNBIND_PPTDEV:
 	case VM_MAP_MEMORY:
 	case VM_REINIT:
 		/*
 		 * ioctls that operate on the entire virtual machine must
 		 * prevent all vcpus from running.
 		 */
 		error = 0;
 		for (vcpu = 0; vcpu < VM_MAXCPU; vcpu++) {
 			error = vcpu_set_state(sc->vm, vcpu, VCPU_FROZEN, true);
 			if (error)
 				break;
 		}
 
 		if (error) {
 			while (--vcpu >= 0)
 				vcpu_set_state(sc->vm, vcpu, VCPU_IDLE, false);
 			goto done;
 		}
 
 		state_changed = 2;
 		break;
 
 	default:
 		break;
 	}
 
 	switch(cmd) {
 	case VM_RUN:
 		vmrun = (struct vm_run *)data;
 		error = vm_run(sc->vm, vmrun);
 		break;
 	case VM_SUSPEND:
 		vmsuspend = (struct vm_suspend *)data;
 		error = vm_suspend(sc->vm, vmsuspend->how);
 		break;
 	case VM_REINIT:
 		error = vm_reinit(sc->vm);
 		break;
 	case VM_STAT_DESC: {
 		statdesc = (struct vm_stat_desc *)data;
 		error = vmm_stat_desc_copy(statdesc->index,
 					statdesc->desc, sizeof(statdesc->desc));
 		break;
 	}
 	case VM_STATS: {
 		CTASSERT(MAX_VM_STATS >= MAX_VMM_STAT_ELEMS);
 		vmstats = (struct vm_stats *)data;
 		getmicrotime(&vmstats->tv);
 		error = vmm_stat_copy(sc->vm, vmstats->cpuid,
 				      &vmstats->num_entries, vmstats->statbuf);
 		break;
 	}
 	case VM_PPTDEV_MSI:
 		pptmsi = (struct vm_pptdev_msi *)data;
 		error = ppt_setup_msi(sc->vm, pptmsi->vcpu,
 				      pptmsi->bus, pptmsi->slot, pptmsi->func,
 				      pptmsi->addr, pptmsi->msg,
 				      pptmsi->numvec);
 		break;
 	case VM_PPTDEV_MSIX:
 		pptmsix = (struct vm_pptdev_msix *)data;
 		error = ppt_setup_msix(sc->vm, pptmsix->vcpu,
 				       pptmsix->bus, pptmsix->slot, 
 				       pptmsix->func, pptmsix->idx,
 				       pptmsix->addr, pptmsix->msg,
 				       pptmsix->vector_control);
 		break;
 	case VM_MAP_PPTDEV_MMIO:
 		pptmmio = (struct vm_pptdev_mmio *)data;
 		error = ppt_map_mmio(sc->vm, pptmmio->bus, pptmmio->slot,
 				     pptmmio->func, pptmmio->gpa, pptmmio->len,
 				     pptmmio->hpa);
 		break;
 	case VM_BIND_PPTDEV:
 		pptdev = (struct vm_pptdev *)data;
 		error = vm_assign_pptdev(sc->vm, pptdev->bus, pptdev->slot,
 					 pptdev->func);
 		break;
 	case VM_UNBIND_PPTDEV:
 		pptdev = (struct vm_pptdev *)data;
 		error = vm_unassign_pptdev(sc->vm, pptdev->bus, pptdev->slot,
 					   pptdev->func);
 		break;
 	case VM_INJECT_EXCEPTION:
 		vmexc = (struct vm_exception *)data;
 		error = vm_inject_exception(sc->vm, vmexc->cpuid, vmexc);
 		break;
 	case VM_INJECT_NMI:
 		vmnmi = (struct vm_nmi *)data;
 		error = vm_inject_nmi(sc->vm, vmnmi->cpuid);
 		break;
 	case VM_LAPIC_IRQ:
 		vmirq = (struct vm_lapic_irq *)data;
 		error = lapic_intr_edge(sc->vm, vmirq->cpuid, vmirq->vector);
 		break;
 	case VM_LAPIC_LOCAL_IRQ:
 		vmirq = (struct vm_lapic_irq *)data;
 		error = lapic_set_local_intr(sc->vm, vmirq->cpuid,
 		    vmirq->vector);
 		break;
 	case VM_LAPIC_MSI:
 		vmmsi = (struct vm_lapic_msi *)data;
 		error = lapic_intr_msi(sc->vm, vmmsi->addr, vmmsi->msg);
 		break;
 	case VM_IOAPIC_ASSERT_IRQ:
 		ioapic_irq = (struct vm_ioapic_irq *)data;
 		error = vioapic_assert_irq(sc->vm, ioapic_irq->irq);
 		break;
 	case VM_IOAPIC_DEASSERT_IRQ:
 		ioapic_irq = (struct vm_ioapic_irq *)data;
 		error = vioapic_deassert_irq(sc->vm, ioapic_irq->irq);
 		break;
 	case VM_IOAPIC_PULSE_IRQ:
 		ioapic_irq = (struct vm_ioapic_irq *)data;
 		error = vioapic_pulse_irq(sc->vm, ioapic_irq->irq);
 		break;
 	case VM_IOAPIC_PINCOUNT:
 		*(int *)data = vioapic_pincount(sc->vm);
 		break;
 	case VM_ISA_ASSERT_IRQ:
 		isa_irq = (struct vm_isa_irq *)data;
 		error = vatpic_assert_irq(sc->vm, isa_irq->atpic_irq);
 		if (error == 0 && isa_irq->ioapic_irq != -1)
 			error = vioapic_assert_irq(sc->vm,
 			    isa_irq->ioapic_irq);
 		break;
 	case VM_ISA_DEASSERT_IRQ:
 		isa_irq = (struct vm_isa_irq *)data;
 		error = vatpic_deassert_irq(sc->vm, isa_irq->atpic_irq);
 		if (error == 0 && isa_irq->ioapic_irq != -1)
 			error = vioapic_deassert_irq(sc->vm,
 			    isa_irq->ioapic_irq);
 		break;
 	case VM_ISA_PULSE_IRQ:
 		isa_irq = (struct vm_isa_irq *)data;
 		error = vatpic_pulse_irq(sc->vm, isa_irq->atpic_irq);
 		if (error == 0 && isa_irq->ioapic_irq != -1)
 			error = vioapic_pulse_irq(sc->vm, isa_irq->ioapic_irq);
 		break;
 	case VM_ISA_SET_IRQ_TRIGGER:
 		isa_irq_trigger = (struct vm_isa_irq_trigger *)data;
 		error = vatpic_set_irq_trigger(sc->vm,
 		    isa_irq_trigger->atpic_irq, isa_irq_trigger->trigger);
 		break;
 	case VM_MAP_MEMORY:
 		seg = (struct vm_memory_segment *)data;
 		error = vm_malloc(sc->vm, seg->gpa, seg->len);
 		break;
 	case VM_GET_MEMORY_SEG:
 		seg = (struct vm_memory_segment *)data;
 		seg->len = 0;
 		(void)vm_gpabase2memseg(sc->vm, seg->gpa, seg);
 		error = 0;
 		break;
 	case VM_GET_REGISTER:
 		vmreg = (struct vm_register *)data;
 		error = vm_get_register(sc->vm, vmreg->cpuid, vmreg->regnum,
 					&vmreg->regval);
 		break;
 	case VM_SET_REGISTER:
 		vmreg = (struct vm_register *)data;
 		error = vm_set_register(sc->vm, vmreg->cpuid, vmreg->regnum,
 					vmreg->regval);
 		break;
 	case VM_SET_SEGMENT_DESCRIPTOR:
 		vmsegdesc = (struct vm_seg_desc *)data;
 		error = vm_set_seg_desc(sc->vm, vmsegdesc->cpuid,
 					vmsegdesc->regnum,
 					&vmsegdesc->desc);
 		break;
 	case VM_GET_SEGMENT_DESCRIPTOR:
 		vmsegdesc = (struct vm_seg_desc *)data;
 		error = vm_get_seg_desc(sc->vm, vmsegdesc->cpuid,
 					vmsegdesc->regnum,
 					&vmsegdesc->desc);
 		break;
 	case VM_GET_CAPABILITY:
 		vmcap = (struct vm_capability *)data;
 		error = vm_get_capability(sc->vm, vmcap->cpuid,
 					  vmcap->captype,
 					  &vmcap->capval);
 		break;
 	case VM_SET_CAPABILITY:
 		vmcap = (struct vm_capability *)data;
 		error = vm_set_capability(sc->vm, vmcap->cpuid,
 					  vmcap->captype,
 					  vmcap->capval);
 		break;
 	case VM_SET_X2APIC_STATE:
 		x2apic = (struct vm_x2apic *)data;
 		error = vm_set_x2apic_state(sc->vm,
 					    x2apic->cpuid, x2apic->state);
 		break;
 	case VM_GET_X2APIC_STATE:
 		x2apic = (struct vm_x2apic *)data;
 		error = vm_get_x2apic_state(sc->vm,
 					    x2apic->cpuid, &x2apic->state);
 		break;
 	case VM_GET_GPA_PMAP:
 		gpapte = (struct vm_gpa_pte *)data;
 		pmap_get_mapping(vmspace_pmap(vm_get_vmspace(sc->vm)),
 				 gpapte->gpa, gpapte->pte, &gpapte->ptenum);
 		error = 0;
 		break;
 	case VM_GET_HPET_CAPABILITIES:
 		error = vhpet_getcap((struct vm_hpet_cap *)data);
 		break;
 	case VM_GLA2GPA: {
 		CTASSERT(PROT_READ == VM_PROT_READ);
 		CTASSERT(PROT_WRITE == VM_PROT_WRITE);
 		CTASSERT(PROT_EXEC == VM_PROT_EXECUTE);
 		gg = (struct vm_gla2gpa *)data;
 		error = vmm_gla2gpa(sc->vm, gg->vcpuid, &gg->paging, gg->gla,
 		    gg->prot, &gg->gpa);
 		KASSERT(error == 0 || error == 1 || error == -1,
 		    ("%s: vmm_gla2gpa unknown error %d", __func__, error));
 		if (error >= 0) {
 			/*
 			 * error = 0: the translation was successful
 			 * error = 1: a fault was injected into the guest
 			 */
 			gg->fault = error;
 			error = 0;
 		} else {
 			error = EFAULT;
 		}
 		break;
 	}
 	case VM_ACTIVATE_CPU:
 		vac = (struct vm_activate_cpu *)data;
 		error = vm_activate_cpu(sc->vm, vac->vcpuid);
 		break;
 	case VM_GET_CPUS:
 		error = 0;
 		vm_cpuset = (struct vm_cpuset *)data;
 		size = vm_cpuset->cpusetsize;
 		if (size < sizeof(cpuset_t) || size > CPU_MAXSIZE / NBBY) {
 			error = ERANGE;
 			break;
 		}
 		cpuset = malloc(size, M_TEMP, M_WAITOK | M_ZERO);
 		if (vm_cpuset->which == VM_ACTIVE_CPUS)
 			*cpuset = vm_active_cpus(sc->vm);
 		else if (vm_cpuset->which == VM_SUSPENDED_CPUS)
 			*cpuset = vm_suspended_cpus(sc->vm);
 		else
 			error = EINVAL;
 		if (error == 0)
 			error = copyout(cpuset, vm_cpuset->cpus, size);
 		free(cpuset, M_TEMP);
 		break;
 	case VM_SET_INTINFO:
 		vmii = (struct vm_intinfo *)data;
 		error = vm_exit_intinfo(sc->vm, vmii->vcpuid, vmii->info1);
 		break;
 	case VM_GET_INTINFO:
 		vmii = (struct vm_intinfo *)data;
 		error = vm_get_intinfo(sc->vm, vmii->vcpuid, &vmii->info1,
 		    &vmii->info2);
+		break;
+	case VM_RTC_WRITE:
+		rtcdata = (struct vm_rtc_data *)data;
+		error = vrtc_nvram_write(sc->vm, rtcdata->offset,
+		    rtcdata->value);
+		break;
+	case VM_RTC_READ:
+		rtcdata = (struct vm_rtc_data *)data;
+		error = vrtc_nvram_read(sc->vm, rtcdata->offset,
+		    &rtcdata->value);
+		break;
+	case VM_RTC_SETTIME:
+		rtctime = (struct vm_rtc_time *)data;
+		error = vrtc_set_time(sc->vm, rtctime->secs);
+		break;
+	case VM_RTC_GETTIME:
+		error = 0;
+		rtctime = (struct vm_rtc_time *)data;
+		rtctime->secs = vrtc_get_time(sc->vm);
 		break;
 	default:
 		error = ENOTTY;
 		break;
 	}
 
 	if (state_changed == 1) {
 		vcpu_set_state(sc->vm, vcpu, VCPU_IDLE, false);
 	} else if (state_changed == 2) {
 		for (vcpu = 0; vcpu < VM_MAXCPU; vcpu++)
 			vcpu_set_state(sc->vm, vcpu, VCPU_IDLE, false);
 	}
 
 done:
 	/* Make sure that no handler returns a bogus value like ERESTART */
 	KASSERT(error >= 0, ("vmmdev_ioctl: invalid error return %d", error));
 	return (error);
 }
 
 static int
 vmmdev_mmap_single(struct cdev *cdev, vm_ooffset_t *offset,
 		   vm_size_t size, struct vm_object **object, int nprot)
 {
 	int error;
 	struct vmmdev_softc *sc;
 
 	sc = vmmdev_lookup2(cdev);
 	if (sc != NULL && (nprot & PROT_EXEC) == 0)
 		error = vm_get_memobj(sc->vm, *offset, size, offset, object);
 	else
 		error = EINVAL;
 
 	return (error);
 }
 
 static void
 vmmdev_destroy(void *arg)
 {
 
 	struct vmmdev_softc *sc = arg;
 
 	if (sc->cdev != NULL)
 		destroy_dev(sc->cdev);
 
 	if (sc->vm != NULL)
 		vm_destroy(sc->vm);
 
 	if ((sc->flags & VSC_LINKED) != 0) {
 		mtx_lock(&vmmdev_mtx);
 		SLIST_REMOVE(&head, sc, vmmdev_softc, link);
 		mtx_unlock(&vmmdev_mtx);
 	}
 
 	free(sc, M_VMMDEV);
 }
 
 static int
 sysctl_vmm_destroy(SYSCTL_HANDLER_ARGS)
 {
 	int error;
 	char buf[VM_MAX_NAMELEN];
 	struct vmmdev_softc *sc;
 	struct cdev *cdev;
 
 	strlcpy(buf, "beavis", sizeof(buf));
 	error = sysctl_handle_string(oidp, buf, sizeof(buf), req);
 	if (error != 0 || req->newptr == NULL)
 		return (error);
 
 	mtx_lock(&vmmdev_mtx);
 	sc = vmmdev_lookup(buf);
 	if (sc == NULL || sc->cdev == NULL) {
 		mtx_unlock(&vmmdev_mtx);
 		return (EINVAL);
 	}
 
 	/*
 	 * The 'cdev' will be destroyed asynchronously when 'si_threadcount'
 	 * goes down to 0 so we should not do it again in the callback.
 	 */
 	cdev = sc->cdev;
 	sc->cdev = NULL;		
 	mtx_unlock(&vmmdev_mtx);
 
 	/*
 	 * Schedule the 'cdev' to be destroyed:
 	 *
 	 * - any new operations on this 'cdev' will return an error (ENXIO).
 	 *
 	 * - when the 'si_threadcount' dwindles down to zero the 'cdev' will
 	 *   be destroyed and the callback will be invoked in a taskqueue
 	 *   context.
 	 */
 	destroy_dev_sched_cb(cdev, vmmdev_destroy, sc);
 
 	return (0);
 }
 SYSCTL_PROC(_hw_vmm, OID_AUTO, destroy, CTLTYPE_STRING | CTLFLAG_RW,
 	    NULL, 0, sysctl_vmm_destroy, "A", NULL);
 
 static struct cdevsw vmmdevsw = {
 	.d_name		= "vmmdev",
 	.d_version	= D_VERSION,
 	.d_ioctl	= vmmdev_ioctl,
 	.d_mmap_single	= vmmdev_mmap_single,
 	.d_read		= vmmdev_rw,
 	.d_write	= vmmdev_rw,
 };
 
 static int
 sysctl_vmm_create(SYSCTL_HANDLER_ARGS)
 {
 	int error;
 	struct vm *vm;
 	struct cdev *cdev;
 	struct vmmdev_softc *sc, *sc2;
 	char buf[VM_MAX_NAMELEN];
 
 	strlcpy(buf, "beavis", sizeof(buf));
 	error = sysctl_handle_string(oidp, buf, sizeof(buf), req);
 	if (error != 0 || req->newptr == NULL)
 		return (error);
 
 	mtx_lock(&vmmdev_mtx);
 	sc = vmmdev_lookup(buf);
 	mtx_unlock(&vmmdev_mtx);
 	if (sc != NULL)
 		return (EEXIST);
 
 	error = vm_create(buf, &vm);
 	if (error != 0)
 		return (error);
 
 	sc = malloc(sizeof(struct vmmdev_softc), M_VMMDEV, M_WAITOK | M_ZERO);
 	sc->vm = vm;
 
 	/*
 	 * Lookup the name again just in case somebody sneaked in when we
 	 * dropped the lock.
 	 */
 	mtx_lock(&vmmdev_mtx);
 	sc2 = vmmdev_lookup(buf);
 	if (sc2 == NULL) {
 		SLIST_INSERT_HEAD(&head, sc, link);
 		sc->flags |= VSC_LINKED;
 	}
 	mtx_unlock(&vmmdev_mtx);
 
 	if (sc2 != NULL) {
 		vmmdev_destroy(sc);
 		return (EEXIST);
 	}
 
 	error = make_dev_p(MAKEDEV_CHECKNAME, &cdev, &vmmdevsw, NULL,
 			   UID_ROOT, GID_WHEEL, 0600, "vmm/%s", buf);
 	if (error != 0) {
 		vmmdev_destroy(sc);
 		return (error);
 	}
 
 	mtx_lock(&vmmdev_mtx);
 	sc->cdev = cdev;
 	sc->cdev->si_drv1 = sc;
 	mtx_unlock(&vmmdev_mtx);
 
 	return (0);
 }
 SYSCTL_PROC(_hw_vmm, OID_AUTO, create, CTLTYPE_STRING | CTLFLAG_RW,
 	    NULL, 0, sysctl_vmm_create, "A", NULL);
 
 void
 vmmdev_init(void)
 {
 	mtx_init(&vmmdev_mtx, "vmm device mutex", NULL, MTX_DEF);
 }
 
 int
 vmmdev_cleanup(void)
 {
 	int error;
 
 	if (SLIST_EMPTY(&head))
 		error = 0;
 	else
 		error = EBUSY;
 
 	return (error);
 }
Index: head/sys/amd64/vmm/vmm_ioport.c
===================================================================
--- head/sys/amd64/vmm/vmm_ioport.c	(revision 276427)
+++ head/sys/amd64/vmm/vmm_ioport.c	(revision 276428)
@@ -1,179 +1,182 @@
 /*-
  * Copyright (c) 2014 Tycho Nightingale <tycho.nightingale@pluribusnetworks.com>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/types.h>
 #include <sys/queue.h>
 #include <sys/cpuset.h>
 #include <sys/systm.h>
 
 #include <vm/vm.h>
 
 #include <machine/vmm.h>
 #include <machine/vmm_instruction_emul.h>
 #include <x86/psl.h>
 
 #include "vatpic.h"
 #include "vatpit.h"
 #include "vpmtmr.h"
+#include "vrtc.h"
 #include "vmm_ioport.h"
 #include "vmm_ktr.h"
 
 #define	MAX_IOPORTS		1280
 
 ioport_handler_func_t ioport_handler[MAX_IOPORTS] = {
 	[TIMER_MODE] = vatpit_handler,
 	[TIMER_CNTR0] = vatpit_handler,
 	[TIMER_CNTR1] = vatpit_handler,
 	[TIMER_CNTR2] = vatpit_handler,
 	[NMISC_PORT] = vatpit_nmisc_handler,
 	[IO_ICU1] = vatpic_master_handler,
 	[IO_ICU1 + ICU_IMR_OFFSET] = vatpic_master_handler,
 	[IO_ICU2] = vatpic_slave_handler,
 	[IO_ICU2 + ICU_IMR_OFFSET] = vatpic_slave_handler,
 	[IO_ELCR1] = vatpic_elc_handler,
 	[IO_ELCR2] = vatpic_elc_handler,
 	[IO_PMTMR] = vpmtmr_handler,
+	[IO_RTC] = vrtc_addr_handler,
+	[IO_RTC + 1] = vrtc_data_handler,
 };
 
 #ifdef KTR
 static const char *
 inout_instruction(struct vm_exit *vmexit)
 {
 	int index;
 
 	static const char *iodesc[] = {
 		"outb", "outw", "outl",
 		"inb", "inw", "inl",
 		"outsb", "outsw", "outsd"
 		"insb", "insw", "insd",
 	};
 
 	switch (vmexit->u.inout.bytes) {
 	case 1:
 		index = 0;
 		break;
 	case 2:
 		index = 1;
 		break;
 	default:
 		index = 2;
 		break;
 	}
 
 	if (vmexit->u.inout.in)
 		index += 3;
 
 	if (vmexit->u.inout.string)
 		index += 6;
 
 	KASSERT(index < nitems(iodesc), ("%s: invalid index %d",
 	    __func__, index));
 
 	return (iodesc[index]);
 }
 #endif	/* KTR */
 
 static int
 emulate_inout_port(struct vm *vm, int vcpuid, struct vm_exit *vmexit,
     bool *retu)
 {
 	ioport_handler_func_t handler;
 	uint32_t mask, val;
 	int error;
 
 	/*
 	 * If there is no handler for the I/O port then punt to userspace.
 	 */
 	if (vmexit->u.inout.port >= MAX_IOPORTS ||
 	    (handler = ioport_handler[vmexit->u.inout.port]) == NULL) {
 		*retu = true;
 		return (0);
 	}
 
 	mask = vie_size2mask(vmexit->u.inout.bytes);
 
 	if (!vmexit->u.inout.in) {
 		val = vmexit->u.inout.eax & mask;
 	}
 
 	error = (*handler)(vm, vcpuid, vmexit->u.inout.in,
 	    vmexit->u.inout.port, vmexit->u.inout.bytes, &val);
 	if (error) {
 		/*
 		 * The value returned by this function is also the return value
 		 * of vm_run(). This needs to be a positive number otherwise it
 		 * can be interpreted as a "pseudo-error" like ERESTART.
 		 *
 		 * Enforce this by mapping all errors to EIO.
 		 */
 		return (EIO);
 	}
 
 	if (vmexit->u.inout.in) {
 		vmexit->u.inout.eax &= ~mask;
 		vmexit->u.inout.eax |= val & mask;
 		error = vm_set_register(vm, vcpuid, VM_REG_GUEST_RAX,
 		    vmexit->u.inout.eax);
 		KASSERT(error == 0, ("emulate_ioport: error %d setting guest "
 		    "rax register", error));
 	}
 	*retu = false;
 	return (0);
 }
 
 static int
 emulate_inout_str(struct vm *vm, int vcpuid, struct vm_exit *vmexit, bool *retu)
 {
 	*retu = true;
 	return (0);	/* Return to userspace to finish emulation */
 }
 
 int
 vm_handle_inout(struct vm *vm, int vcpuid, struct vm_exit *vmexit, bool *retu)
 {
 	int bytes, error;
 
 	bytes = vmexit->u.inout.bytes;
 	KASSERT(bytes == 1 || bytes == 2 || bytes == 4,
 	    ("vm_handle_inout: invalid operand size %d", bytes));
 
 	if (vmexit->u.inout.string)
 		error = emulate_inout_str(vm, vcpuid, vmexit, retu);
 	else
 		error = emulate_inout_port(vm, vcpuid, vmexit, retu);
 
 	VCPU_CTR4(vm, vcpuid, "%s%s 0x%04x: %s",
 	    vmexit->u.inout.rep ? "rep " : "",
 	    inout_instruction(vmexit),
 	    vmexit->u.inout.port,
 	    error ? "error" : (*retu ? "userspace" : "handled"));
 
 	return (error);
 }
Index: head/sys/modules/vmm/Makefile
===================================================================
--- head/sys/modules/vmm/Makefile	(revision 276427)
+++ head/sys/modules/vmm/Makefile	(revision 276428)
@@ -1,80 +1,81 @@
 # $FreeBSD$
 
 KMOD=	vmm
 
 SRCS=	opt_acpi.h opt_ddb.h device_if.h bus_if.h pci_if.h
 SRCS+=	vmx_assym.h svm_assym.h
 DPSRCS=	vmx_genassym.c svm_genassym.c
 
 CFLAGS+= -DVMM_KEEP_STATS -DSMP
 CFLAGS+= -I${.CURDIR}/../../amd64/vmm
 CFLAGS+= -I${.CURDIR}/../../amd64/vmm/io
 CFLAGS+= -I${.CURDIR}/../../amd64/vmm/intel
 CFLAGS+= -I${.CURDIR}/../../amd64/vmm/amd
 
 # generic vmm support
 .PATH: ${.CURDIR}/../../amd64/vmm
 SRCS+=	vmm.c		\
 	vmm_dev.c	\
 	vmm_host.c	\
 	vmm_instruction_emul.c	\
 	vmm_ioport.c	\
 	vmm_ipi.c	\
 	vmm_lapic.c	\
 	vmm_mem.c	\
 	vmm_stat.c	\
 	vmm_util.c	\
 	x86.c		\
 	vmm_support.S
 
 .PATH: ${.CURDIR}/../../amd64/vmm/io
 SRCS+=	iommu.c		\
 	ppt.c           \
 	vatpic.c	\
 	vatpit.c	\
 	vhpet.c		\
 	vioapic.c	\
 	vlapic.c	\
-	vpmtmr.c
+	vpmtmr.c	\
+	vrtc.c
 
 # intel-specific files
 .PATH: ${.CURDIR}/../../amd64/vmm/intel
 SRCS+=	ept.c		\
 	vmcs.c		\
 	vmx_msr.c	\
 	vmx_support.S	\
 	vmx.c		\
 	vtd.c
 
 # amd-specific files
 .PATH: ${.CURDIR}/../../amd64/vmm/amd
 SRCS+=	vmcb.c		\
 	svm.c		\
 	svm_support.S	\
 	npt.c		\
 	amdv.c		\
 	svm_msr.c
 
 CLEANFILES=	vmx_assym.h vmx_genassym.o svm_assym.h svm_genassym.o
 
 vmx_assym.h:    vmx_genassym.o
 	sh ${SYSDIR}/kern/genassym.sh vmx_genassym.o > ${.TARGET}
 
 svm_assym.h:    svm_genassym.o
 	sh ${SYSDIR}/kern/genassym.sh svm_genassym.o > ${.TARGET}
 
 vmx_support.o:
 	${CC} -c -x assembler-with-cpp -DLOCORE ${CFLAGS} \
 	    ${.IMPSRC} -o ${.TARGET}
 
 svm_support.o:
 	${CC} -c -x assembler-with-cpp -DLOCORE ${CFLAGS} \
 	    ${.IMPSRC} -o ${.TARGET}
 
 vmx_genassym.o:
 	${CC} -c ${CFLAGS:N-fno-common} ${.IMPSRC}
 
 svm_genassym.o:
 	${CC} -c ${CFLAGS:N-fno-common} ${.IMPSRC}
 
 .include <bsd.kmod.mk>
Index: head/usr.sbin/bhyve/rtc.c
===================================================================
--- head/usr.sbin/bhyve/rtc.c	(revision 276427)
+++ head/usr.sbin/bhyve/rtc.c	(revision 276428)
@@ -1,382 +1,129 @@
 /*-
  * Copyright (c) 2011 NetApp, Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/types.h>
-#include <sys/time.h>
 
-#include <stdio.h>
-#include <string.h>
 #include <time.h>
 #include <assert.h>
 
 #include <machine/vmm.h>
 #include <vmmapi.h>
 
 #include "acpi.h"
-#include "inout.h"
 #include "pci_lpc.h"
 #include "rtc.h"
 
-#define	IO_RTC	0x70
+#define	IO_RTC		0x70
 
-#define RTC_SEC		0x00	/* seconds */
-#define	RTC_SEC_ALARM	0x01
-#define	RTC_MIN		0x02
-#define	RTC_MIN_ALARM	0x03
-#define	RTC_HRS		0x04
-#define	RTC_HRS_ALARM	0x05
-#define	RTC_WDAY	0x06
-#define	RTC_DAY		0x07
-#define	RTC_MONTH	0x08
-#define	RTC_YEAR	0x09
-#define	RTC_CENTURY	0x32	/* current century */
-
-#define RTC_STATUSA	0xA
-#define  RTCSA_TUP	 0x80	/* time update, don't look now */
-
-#define	RTC_STATUSB	0xB
-#define	 RTCSB_DST	 0x01
-#define	 RTCSB_24HR	 0x02
-#define	 RTCSB_BIN	 0x04	/* 0 = BCD, 1 = Binary */
-#define	 RTCSB_PINTR	 0x40	/* 1 = enable periodic clock interrupt */
-#define	 RTCSB_HALT      0x80	/* stop clock updates */
-
-#define RTC_INTR	0x0c	/* status register C (R) interrupt source */
-
-#define RTC_STATUSD	0x0d	/* status register D (R) Lost Power */
-#define  RTCSD_PWR	 0x80	/* clock power OK */
-
-#define	RTC_NVRAM_START	0x0e
-#define	RTC_NVRAM_END	0x7f
-#define RTC_NVRAM_SZ	(128 - RTC_NVRAM_START)
-#define	nvoff(x)	((x) - RTC_NVRAM_START)
-
-#define	RTC_DIAG	0x0e
-#define RTC_RSTCODE	0x0f
-#define	RTC_EQUIPMENT	0x14
 #define	RTC_LMEM_LSB	0x34
 #define	RTC_LMEM_MSB	0x35
 #define	RTC_HMEM_LSB	0x5b
 #define	RTC_HMEM_SB	0x5c
 #define	RTC_HMEM_MSB	0x5d
 
 #define m_64KB		(64*1024)
 #define	m_16MB		(16*1024*1024)
 #define	m_4GB		(4ULL*1024*1024*1024)
 
-static int addr;
-
-static uint8_t rtc_nvram[RTC_NVRAM_SZ];
-
-/* XXX initialize these to default values as they would be from BIOS */
-static uint8_t status_a, status_b;
-
-static struct {
-	uint8_t  hours;
-	uint8_t  mins;
-	uint8_t  secs;
-} rtc_alarm;
-
-static u_char const bin2bcd_data[] = {
-	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09,
-	0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19,
-	0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29,
-	0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39,
-	0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49,
-	0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59,
-	0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69,
-	0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79,
-	0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89,
-	0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99
-};
-#define	bin2bcd(bin)	(bin2bcd_data[bin])
-
-#define	rtcout(val)	((status_b & RTCSB_BIN) ? (val) : bin2bcd((val)))
-
-static void
-timevalfix(struct timeval *t1)
+/*
+ * Returns the current RTC time as number of seconds since 00:00:00 Jan 1, 1970
+ *
+ * XXX this always returns localtime to maintain compatibility with the
+ * original device model.
+ */
+static time_t
+rtc_time(struct vmctx *ctx)
 {
-
-	if (t1->tv_usec < 0) {
-		t1->tv_sec--;
-		t1->tv_usec += 1000000;
-	}
-	if (t1->tv_usec >= 1000000) {
-		t1->tv_sec++;
-		t1->tv_usec -= 1000000;
-	}
-}
-
-static void
-timevalsub(struct timeval *t1, const struct timeval *t2)
-{
-
-	t1->tv_sec -= t2->tv_sec;
-	t1->tv_usec -= t2->tv_usec;
-	timevalfix(t1);
-}
-
-static int
-rtc_addr_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes,
-		 uint32_t *eax, void *arg)
-{
-	if (bytes != 1)
-		return (-1);
-
-	if (in) {
-		/* straight read of this register will return 0xFF */
-		*eax = 0xff;
-		return (0);
-	}
-
-	switch (*eax & 0x7f) {
-	case RTC_SEC:
-	case RTC_SEC_ALARM:
-	case RTC_MIN:
-	case RTC_MIN_ALARM:
-	case RTC_HRS:
-	case RTC_HRS_ALARM:
-	case RTC_WDAY:
-	case RTC_DAY:
-	case RTC_MONTH:
-	case RTC_YEAR:
-	case RTC_STATUSA:
-	case RTC_STATUSB:
-	case RTC_INTR:
-	case RTC_STATUSD:
-	case RTC_NVRAM_START ... RTC_NVRAM_END:
-		break;
-	default:
-		return (-1);
-	}
-
-	addr = *eax & 0x7f;
-	return (0);
-}
-
-static int
-rtc_data_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes,
-		 uint32_t *eax, void *arg)
-{
-	int hour;
+	struct tm tm;
 	time_t t;
-	struct timeval cur, delta;
 
-	static struct timeval last;
-	static struct tm tm;
-
-	if (bytes != 1)
-		return (-1);
-
-	gettimeofday(&cur, NULL);
-
-	/*
-	 * Increment the cached time only once per second so we can guarantee
-	 * that the guest has at least one second to read the hour:min:sec
-	 * separately and still get a coherent view of the time.
-	 */
-	delta = cur;
-	timevalsub(&delta, &last);
-	if (delta.tv_sec >= 1 && (status_b & RTCSB_HALT) == 0) {
-		t = cur.tv_sec;
-		localtime_r(&t, &tm);
-		last = cur;
-	}
-
-	if (in) {
-		switch (addr) {
-		case RTC_SEC_ALARM:
-			*eax = rtc_alarm.secs;
-			break;
-		case RTC_MIN_ALARM:
-			*eax = rtc_alarm.mins;
-			break;
-		case RTC_HRS_ALARM:
-			*eax = rtc_alarm.hours;
-			break;
-		case RTC_SEC:
-			*eax = rtcout(tm.tm_sec);
-			return (0);
-		case RTC_MIN:
-			*eax = rtcout(tm.tm_min);
-			return (0);
-		case RTC_HRS:
-			if (status_b & RTCSB_24HR)
-				hour = tm.tm_hour;
-			else
-				hour = (tm.tm_hour % 12) + 1;
-			
-			*eax = rtcout(hour);
-
-			/*
-			 * If we are representing time in the 12-hour format
-			 * then set the MSB to indicate PM.
-			 */
-			if ((status_b & RTCSB_24HR) == 0 && tm.tm_hour >= 12)
-				*eax |= 0x80;
-
-			return (0);
-		case RTC_WDAY:
-			*eax = rtcout(tm.tm_wday + 1);
-			return (0);
-		case RTC_DAY:
-			*eax = rtcout(tm.tm_mday);
-			return (0);
-		case RTC_MONTH:
-			*eax = rtcout(tm.tm_mon + 1);
-			return (0);
-		case RTC_YEAR:
-			*eax = rtcout(tm.tm_year % 100);
-			return (0);
-		case RTC_STATUSA:
-			*eax = status_a;
-			return (0);
-		case RTC_STATUSB:
-			*eax = status_b;
-			return (0);
-		case RTC_INTR:
-			*eax = 0;
-			return (0);
-		case RTC_STATUSD:
-			*eax = RTCSD_PWR;
-			return (0);
-		case RTC_NVRAM_START ... RTC_NVRAM_END:
-			*eax = rtc_nvram[addr - RTC_NVRAM_START];
-			return (0);
-		default:
-			return (-1);
-		}
-	}
-
-	switch (addr) {
-	case RTC_STATUSA:
-		status_a = *eax & ~RTCSA_TUP;
-		break;
-	case RTC_STATUSB:
-		/* XXX not implemented yet XXX */
-		if (*eax & RTCSB_PINTR)
-			return (-1);
-		status_b = *eax;
-		break;
-	case RTC_STATUSD:
-		/* ignore write */
-		break;
-	case RTC_SEC_ALARM:
-		rtc_alarm.secs = *eax;
-		break;
-	case RTC_MIN_ALARM:
-		rtc_alarm.mins = *eax;
-		break;
-	case RTC_HRS_ALARM:
-		rtc_alarm.hours = *eax;
-		break;
-	case RTC_SEC:
-	case RTC_MIN:
-	case RTC_HRS:
-	case RTC_WDAY:
-	case RTC_DAY:
-	case RTC_MONTH:
-	case RTC_YEAR:
-		/*
-		 * Ignore writes to the time of day registers
-		 */
-		break;
-	case RTC_NVRAM_START ... RTC_NVRAM_END:
-		rtc_nvram[addr - RTC_NVRAM_START] = *eax;
-		break;
-	default:
-		return (-1);
-	}
-	return (0);
+	time(&t);
+	localtime_r(&t, &tm);
+	return (timegm(&tm));
 }
 
 void
 rtc_init(struct vmctx *ctx)
 {	
-	struct timeval cur;
-	struct tm tm;
 	size_t himem;
 	size_t lomem;
 	int err;
 
-	err = gettimeofday(&cur, NULL);
-	assert(err == 0);
-	(void) localtime_r(&cur.tv_sec, &tm);
-
-	memset(rtc_nvram, 0, sizeof(rtc_nvram));
-
-	rtc_nvram[nvoff(RTC_CENTURY)] = bin2bcd((tm.tm_year + 1900) / 100);
-
 	/* XXX init diag/reset code/equipment/checksum ? */
 
 	/*
 	 * Report guest memory size in nvram cells as required by UEFI.
 	 * Little-endian encoding.
 	 * 0x34/0x35 - 64KB chunks above 16MB, below 4GB
 	 * 0x5b/0x5c/0x5d - 64KB chunks above 4GB
 	 */
 	lomem = (vm_get_lowmem_size(ctx) - m_16MB) / m_64KB;
-	rtc_nvram[nvoff(RTC_LMEM_LSB)] = lomem;
-	rtc_nvram[nvoff(RTC_LMEM_MSB)] = lomem >> 8;
+	err = vm_rtc_write(ctx, RTC_LMEM_LSB, lomem);
+	assert(err == 0);
+	err = vm_rtc_write(ctx, RTC_LMEM_MSB, lomem >> 8);
+	assert(err == 0);
 
 	himem = vm_get_highmem_size(ctx) / m_64KB;
-	rtc_nvram[nvoff(RTC_HMEM_LSB)] = himem;
-	rtc_nvram[nvoff(RTC_HMEM_SB)]  = himem >> 8;
-	rtc_nvram[nvoff(RTC_HMEM_MSB)] = himem >> 16;
-}
+	err = vm_rtc_write(ctx, RTC_HMEM_LSB, himem);
+	assert(err == 0);
+	err = vm_rtc_write(ctx, RTC_HMEM_SB, himem >> 8);
+	assert(err == 0);
+	err = vm_rtc_write(ctx, RTC_HMEM_MSB, himem >> 16);
+	assert(err == 0);
 
-INOUT_PORT(rtc, IO_RTC, IOPORT_F_INOUT, rtc_addr_handler);
-INOUT_PORT(rtc, IO_RTC + 1, IOPORT_F_INOUT, rtc_data_handler);
+	err = vm_rtc_settime(ctx, rtc_time(ctx));
+	assert(err == 0);
+}
 
 static void
 rtc_dsdt(void)
 {
 
 	dsdt_line("");
 	dsdt_line("Device (RTC)");
 	dsdt_line("{");
 	dsdt_line("  Name (_HID, EisaId (\"PNP0B00\"))");
 	dsdt_line("  Name (_CRS, ResourceTemplate ()");
 	dsdt_line("  {");
 	dsdt_indent(2);
 	dsdt_fixed_ioport(IO_RTC, 2);
 	dsdt_fixed_irq(8);
 	dsdt_unindent(2);
 	dsdt_line("  })");
 	dsdt_line("}");
 }
 LPC_DSDT(rtc_dsdt);
 
 /*
  * Reserve the extended RTC I/O ports although they are not emulated at this
  * time.
  */
 SYSRES_IO(0x72, 6);
Index: head/usr.sbin/bhyvectl/bhyvectl.c
===================================================================
--- head/usr.sbin/bhyvectl/bhyvectl.c	(revision 276427)
+++ head/usr.sbin/bhyvectl/bhyvectl.c	(revision 276428)
@@ -1,2061 +1,2145 @@
 /*-
  * Copyright (c) 2011 NetApp, Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/types.h>
 #include <sys/sysctl.h>
 #include <sys/errno.h>
 #include <sys/mman.h>
 
 #include <stdio.h>
 #include <stdlib.h>
 #include <stdbool.h>
 #include <string.h>
 #include <unistd.h>
 #include <libgen.h>
 #include <libutil.h>
 #include <fcntl.h>
 #include <string.h>
 #include <getopt.h>
+#include <time.h>
 #include <assert.h>
 
 #include <machine/cpufunc.h>
 #include <machine/vmm.h>
 #include <machine/specialreg.h>
 #include <vmmapi.h>
 
 #include "amd/vmcb.h"
 #include "intel/vmcs.h"
 
 #define	MB	(1UL << 20)
 #define	GB	(1UL << 30)
 
 #define	REQ_ARG		required_argument
 #define	NO_ARG		no_argument
 #define	OPT_ARG		optional_argument
 
 static const char *progname;
 
 static void
 usage(bool cpu_intel)
 {
 
 	(void)fprintf(stderr,
 	"Usage: %s --vm=<vmname>\n"
 	"       [--cpu=<vcpu_number>]\n"
 	"       [--create]\n"
 	"       [--destroy]\n"
 	"       [--get-all]\n"
 	"       [--get-stats]\n"
 	"       [--set-desc-ds]\n"
 	"       [--get-desc-ds]\n"
 	"       [--set-desc-es]\n"
 	"       [--get-desc-es]\n"
 	"       [--set-desc-gs]\n"
 	"       [--get-desc-gs]\n"
 	"       [--set-desc-fs]\n"
 	"       [--get-desc-fs]\n"
 	"       [--set-desc-cs]\n"
 	"       [--get-desc-cs]\n"
 	"       [--set-desc-ss]\n"
 	"       [--get-desc-ss]\n"
 	"       [--set-desc-tr]\n"
 	"       [--get-desc-tr]\n"
 	"       [--set-desc-ldtr]\n"
 	"       [--get-desc-ldtr]\n"
 	"       [--set-desc-gdtr]\n"
 	"       [--get-desc-gdtr]\n"
 	"       [--set-desc-idtr]\n"
 	"       [--get-desc-idtr]\n"
 	"       [--run]\n"
 	"       [--capname=<capname>]\n"
 	"       [--getcap]\n"
 	"       [--setcap=<0|1>]\n"
 	"       [--desc-base=<BASE>]\n"
 	"       [--desc-limit=<LIMIT>]\n"
 	"       [--desc-access=<ACCESS>]\n"
 	"       [--set-cr0=<CR0>]\n"
 	"       [--get-cr0]\n"
 	"       [--set-cr3=<CR3>]\n"
 	"       [--get-cr3]\n"
 	"       [--set-cr4=<CR4>]\n"
 	"       [--get-cr4]\n"
 	"       [--set-dr7=<DR7>]\n"
 	"       [--get-dr7]\n"
 	"       [--set-rsp=<RSP>]\n"
 	"       [--get-rsp]\n"
 	"       [--set-rip=<RIP>]\n"
 	"       [--get-rip]\n"
 	"       [--get-rax]\n"
 	"       [--set-rax=<RAX>]\n"
 	"       [--get-rbx]\n"
 	"       [--get-rcx]\n"
 	"       [--get-rdx]\n"
 	"       [--get-rsi]\n"
 	"       [--get-rdi]\n"
 	"       [--get-rbp]\n"
 	"       [--get-r8]\n"
 	"       [--get-r9]\n"
 	"       [--get-r10]\n"
 	"       [--get-r11]\n"
 	"       [--get-r12]\n"
 	"       [--get-r13]\n"
 	"       [--get-r14]\n"
 	"       [--get-r15]\n"
 	"       [--set-rflags=<RFLAGS>]\n"
 	"       [--get-rflags]\n"
 	"       [--set-cs]\n"
 	"       [--get-cs]\n"
 	"       [--set-ds]\n"
 	"       [--get-ds]\n"
 	"       [--set-es]\n"
 	"       [--get-es]\n"
 	"       [--set-fs]\n"
 	"       [--get-fs]\n"
 	"       [--set-gs]\n"
 	"       [--get-gs]\n"
 	"       [--set-ss]\n"
 	"       [--get-ss]\n"
 	"       [--get-tr]\n"
 	"       [--get-ldtr]\n"
 	"       [--set-x2apic-state=<state>]\n"
 	"       [--get-x2apic-state]\n"
 	"       [--unassign-pptdev=<bus/slot/func>]\n"
 	"       [--set-mem=<memory in units of MB>]\n"
 	"       [--get-lowmem]\n"
 	"       [--get-highmem]\n"
 	"       [--get-gpa-pmap]\n"
 	"       [--assert-lapic-lvt=<pin>]\n"
 	"       [--inject-nmi]\n"
 	"       [--force-reset]\n"
 	"       [--force-poweroff]\n"
+	"       [--get-rtc-time]\n"
+	"       [--set-rtc-time=<secs>]\n"
+	"       [--get-rtc-nvram]\n"
+	"       [--set-rtc-nvram=<val>]\n"
+	"       [--rtc-nvram-offset=<offset>]\n"
 	"       [--get-active-cpus]\n"
 	"       [--get-suspended-cpus]\n"
 	"       [--get-intinfo]\n"
 	"       [--get-eptp]\n"
 	"       [--set-exception-bitmap]\n"
 	"       [--get-exception-bitmap]\n"
 	"       [--get-tsc-offset]\n"
 	"       [--get-guest-pat]\n"
 	"       [--get-io-bitmap-address]\n"
 	"       [--get-msr-bitmap]\n"
 	"       [--get-msr-bitmap-address]\n"
 	"       [--get-guest-sysenter]\n"
 	"       [--get-exit-reason]\n",
 	progname);
 
 	if (cpu_intel) {
 		(void)fprintf(stderr,
 		"       [--get-vmcs-pinbased-ctls]\n"
 		"       [--get-vmcs-procbased-ctls]\n"
 		"       [--get-vmcs-procbased-ctls2]\n"
 		"       [--get-vmcs-entry-interruption-info]\n"
 		"       [--set-vmcs-entry-interruption-info=<info>]\n"
 		"       [--get-vmcs-guest-physical-address\n"
 		"       [--get-vmcs-guest-linear-address\n"
 		"       [--get-vmcs-host-pat]\n"
 		"       [--get-vmcs-host-cr0]\n"
 		"       [--get-vmcs-host-cr3]\n"
 		"       [--get-vmcs-host-cr4]\n"
 		"       [--get-vmcs-host-rip]\n"
 		"       [--get-vmcs-host-rsp]\n"
 		"       [--get-vmcs-cr0-mask]\n"
 		"       [--get-vmcs-cr0-shadow]\n"
 		"       [--get-vmcs-cr4-mask]\n"
 		"       [--get-vmcs-cr4-shadow]\n"
 		"       [--get-vmcs-cr3-targets]\n"
 		"       [--get-vmcs-apic-access-address]\n"
 		"       [--get-vmcs-virtual-apic-address]\n"
 		"       [--get-vmcs-tpr-threshold]\n"
 		"       [--get-vmcs-vpid]\n"
 		"       [--get-vmcs-instruction-error]\n"
 		"       [--get-vmcs-exit-ctls]\n"
 		"       [--get-vmcs-entry-ctls]\n"
 		"       [--get-vmcs-link]\n"
 		"       [--get-vmcs-exit-qualification]\n"
 		"       [--get-vmcs-exit-interruption-info]\n"
 		"       [--get-vmcs-exit-interruption-error]\n"
 		"       [--get-vmcs-interruptibility]\n"
 		);
 	} else {
 		(void)fprintf(stderr,
 		"       [--get-vmcb-intercepts]\n"
 		"       [--get-vmcb-asid]\n"
 		"       [--get-vmcb-exit-details]\n"
 		"       [--get-vmcb-tlb-ctrl]\n"
 		"       [--get-vmcb-virq]\n"
 		"       [--get-avic-apic-bar]\n"
 		"       [--get-avic-backing-page]\n"
 		"       [--get-avic-table]\n"
 		);
 	}
 	exit(1);
 }
 
+static int get_rtc_time, set_rtc_time;
+static int get_rtc_nvram, set_rtc_nvram;
+static int rtc_nvram_offset;
+static uint8_t rtc_nvram_value;
+static time_t rtc_secs;
+
 static int get_stats, getcap, setcap, capval, get_gpa_pmap;
 static int inject_nmi, assert_lapic_lvt;
 static int force_reset, force_poweroff;
 static const char *capname;
 static int create, destroy, get_lowmem, get_highmem;
 static int get_intinfo;
 static int get_active_cpus, get_suspended_cpus;
 static uint64_t memsize;
 static int set_cr0, get_cr0, set_cr3, get_cr3, set_cr4, get_cr4;
 static int set_efer, get_efer;
 static int set_dr7, get_dr7;
 static int set_rsp, get_rsp, set_rip, get_rip, set_rflags, get_rflags;
 static int set_rax, get_rax;
 static int get_rbx, get_rcx, get_rdx, get_rsi, get_rdi, get_rbp;
 static int get_r8, get_r9, get_r10, get_r11, get_r12, get_r13, get_r14, get_r15;
 static int set_desc_ds, get_desc_ds;
 static int set_desc_es, get_desc_es;
 static int set_desc_fs, get_desc_fs;
 static int set_desc_gs, get_desc_gs;
 static int set_desc_cs, get_desc_cs;
 static int set_desc_ss, get_desc_ss;
 static int set_desc_gdtr, get_desc_gdtr;
 static int set_desc_idtr, get_desc_idtr;
 static int set_desc_tr, get_desc_tr;
 static int set_desc_ldtr, get_desc_ldtr;
 static int set_cs, set_ds, set_es, set_fs, set_gs, set_ss, set_tr, set_ldtr;
 static int get_cs, get_ds, get_es, get_fs, get_gs, get_ss, get_tr, get_ldtr;
 static int set_x2apic_state, get_x2apic_state;
 enum x2apic_state x2apic_state;
 static int unassign_pptdev, bus, slot, func;
 static int run;
 
 /*
  * VMCB specific.
  */
 static int get_vmcb_intercept, get_vmcb_exit_details, get_vmcb_tlb_ctrl;
 static int get_vmcb_virq, get_avic_table;
 
 /*
  * VMCS-specific fields
  */
 static int get_pinbased_ctls, get_procbased_ctls, get_procbased_ctls2;
 static int get_eptp, get_io_bitmap, get_tsc_offset;
 static int get_vmcs_entry_interruption_info, set_vmcs_entry_interruption_info;
 static int get_vmcs_interruptibility;
 uint32_t vmcs_entry_interruption_info;
 static int get_vmcs_gpa, get_vmcs_gla;
 static int get_exception_bitmap, set_exception_bitmap, exception_bitmap;
 static int get_cr0_mask, get_cr0_shadow;
 static int get_cr4_mask, get_cr4_shadow;
 static int get_cr3_targets;
 static int get_apic_access_addr, get_virtual_apic_addr, get_tpr_threshold;
 static int get_msr_bitmap, get_msr_bitmap_address;
 static int get_vpid_asid;
 static int get_inst_err, get_exit_ctls, get_entry_ctls;
 static int get_host_cr0, get_host_cr3, get_host_cr4;
 static int get_host_rip, get_host_rsp;
 static int get_guest_pat, get_host_pat;
 static int get_guest_sysenter, get_vmcs_link;
 static int get_exit_reason, get_vmcs_exit_qualification;
 static int get_vmcs_exit_interruption_info, get_vmcs_exit_interruption_error;
 
 static uint64_t desc_base;
 static uint32_t desc_limit, desc_access;
 
 static int get_all;
 
 static void
 dump_vm_run_exitcode(struct vm_exit *vmexit, int vcpu)
 {
 	printf("vm exit[%d]\n", vcpu);
 	printf("\trip\t\t0x%016lx\n", vmexit->rip);
 	printf("\tinst_length\t%d\n", vmexit->inst_length);
 	switch (vmexit->exitcode) {
 	case VM_EXITCODE_INOUT:
 		printf("\treason\t\tINOUT\n");
 		printf("\tdirection\t%s\n", vmexit->u.inout.in ? "IN" : "OUT");
 		printf("\tbytes\t\t%d\n", vmexit->u.inout.bytes);
 		printf("\tflags\t\t%s%s\n",
 			vmexit->u.inout.string ? "STRING " : "",
 			vmexit->u.inout.rep ? "REP " : "");
 		printf("\tport\t\t0x%04x\n", vmexit->u.inout.port);
 		printf("\teax\t\t0x%08x\n", vmexit->u.inout.eax);
 		break;
 	case VM_EXITCODE_VMX:
 		printf("\treason\t\tVMX\n");
 		printf("\tstatus\t\t%d\n", vmexit->u.vmx.status);
 		printf("\texit_reason\t0x%08x (%u)\n",
 		    vmexit->u.vmx.exit_reason, vmexit->u.vmx.exit_reason);
 		printf("\tqualification\t0x%016lx\n",
 			vmexit->u.vmx.exit_qualification);
 		printf("\tinst_type\t\t%d\n", vmexit->u.vmx.inst_type);
 		printf("\tinst_error\t\t%d\n", vmexit->u.vmx.inst_error);
 		break;
 	case VM_EXITCODE_SVM:
 		printf("\treason\t\tSVM\n");
 		printf("\texit_reason\t\t%#lx\n", vmexit->u.svm.exitcode);
 		printf("\texitinfo1\t\t%#lx\n", vmexit->u.svm.exitinfo1);
 		printf("\texitinfo2\t\t%#lx\n", vmexit->u.svm.exitinfo2);
 		break;
 	default:
 		printf("*** unknown vm run exitcode %d\n", vmexit->exitcode);
 		break;
 	}
 }
 
 /* AMD 6th generation and Intel compatible MSRs */
 #define MSR_AMD6TH_START	0xC0000000
 #define MSR_AMD6TH_END		0xC0001FFF
 /* AMD 7th and 8th generation compatible MSRs */
 #define MSR_AMD7TH_START	0xC0010000
 #define MSR_AMD7TH_END		0xC0011FFF
 
 static const char *
 msr_name(uint32_t msr)
 {
 	static char buf[32];
 
 	switch(msr) {
 	case MSR_TSC:
 		return ("MSR_TSC");
 	case MSR_EFER:
 		return ("MSR_EFER");
 	case MSR_STAR:
 		return ("MSR_STAR");
 	case MSR_LSTAR:	
 		return ("MSR_LSTAR");
 	case MSR_CSTAR:
 		return ("MSR_CSTAR");
 	case MSR_SF_MASK:
 		return ("MSR_SF_MASK");
 	case MSR_FSBASE:
 		return ("MSR_FSBASE");
 	case MSR_GSBASE:
 		return ("MSR_GSBASE");
 	case MSR_KGSBASE:
 		return ("MSR_KGSBASE");
 	case MSR_SYSENTER_CS_MSR:
 		return ("MSR_SYSENTER_CS_MSR");
 	case MSR_SYSENTER_ESP_MSR:
 		return ("MSR_SYSENTER_ESP_MSR");
 	case MSR_SYSENTER_EIP_MSR:
 		return ("MSR_SYSENTER_EIP_MSR");
 	case MSR_PAT:
 		return ("MSR_PAT");
 	}
 	snprintf(buf, sizeof(buf), "MSR       %#08x", msr);
 
 	return (buf);
 }
 
 static inline void
 print_msr_pm(uint64_t msr, int vcpu, int readable, int writeable)
 {
 
 	if (readable || writeable) {
 		printf("%-20s[%d]\t\t%c%c\n", msr_name(msr), vcpu,
 			readable ? 'R' : '-', writeable ? 'W' : '-');
 	}
 }
 
 /*
  * Reference APM vol2, section 15.11 MSR Intercepts.
  */
 static void
 dump_amd_msr_pm(const char *bitmap, int vcpu)
 {
 	int byte, bit, readable, writeable;
 	uint32_t msr;
 
 	for (msr = 0; msr < 0x2000; msr++) {
 		byte = msr / 4;
 		bit = (msr % 4) * 2;
 
 		/* Look at MSRs in the range 0x00000000 to 0x00001FFF */
 		readable = (bitmap[byte] & (1 << bit)) ? 0 : 1;
 		writeable = (bitmap[byte] & (2 << bit)) ?  0 : 1;
 		print_msr_pm(msr, vcpu, readable, writeable);
 
 		/* Look at MSRs in the range 0xC0000000 to 0xC0001FFF */
 		byte += 2048;
 		readable = (bitmap[byte] & (1 << bit)) ? 0 : 1;
 		writeable = (bitmap[byte] & (2 << bit)) ?  0 : 1;
 		print_msr_pm(msr + MSR_AMD6TH_START, vcpu, readable,
 				writeable);
 		
 		/* MSR 0xC0010000 to 0xC0011FF is only for AMD */
 		byte += 4096;
 		readable = (bitmap[byte] & (1 << bit)) ? 0 : 1;
 		writeable = (bitmap[byte] & (2 << bit)) ?  0 : 1;
 		print_msr_pm(msr + MSR_AMD7TH_START, vcpu, readable,
 				writeable);
 	}
 }
 
 /*
  * Reference Intel SDM Vol3 Section 24.6.9 MSR-Bitmap Address
  */
 static void
 dump_intel_msr_pm(const char *bitmap, int vcpu)
 {
 	int byte, bit, readable, writeable;
 	uint32_t msr;
 
 	for (msr = 0; msr < 0x2000; msr++) {
 		byte = msr / 8;
 		bit = msr & 0x7;
 
 		/* Look at MSRs in the range 0x00000000 to 0x00001FFF */
 		readable = (bitmap[byte] & (1 << bit)) ? 0 : 1;
 		writeable = (bitmap[2048 + byte] & (1 << bit)) ?  0 : 1;
 		print_msr_pm(msr, vcpu, readable, writeable);
 
 		/* Look at MSRs in the range 0xC0000000 to 0xC0001FFF */
 		byte += 1024;
 		readable = (bitmap[byte] & (1 << bit)) ? 0 : 1;
 		writeable = (bitmap[2048 + byte] & (1 << bit)) ?  0 : 1;
 		print_msr_pm(msr + MSR_AMD6TH_START, vcpu, readable,
 				writeable);
 	}
 }
 
 static int
 dump_msr_bitmap(int vcpu, uint64_t addr, bool cpu_intel)
 {
 	int error, fd, map_size;
 	const char *bitmap;
 
 	error = -1;
 	bitmap = MAP_FAILED;
 
 	fd = open("/dev/mem", O_RDONLY, 0);
 	if (fd < 0) {
 		perror("Couldn't open /dev/mem");
 		goto done;
 	}
 
 	if (cpu_intel)
 		map_size = PAGE_SIZE;
 	else
 		map_size = 2 * PAGE_SIZE;
 
 	bitmap = mmap(NULL, map_size, PROT_READ, MAP_SHARED, fd, addr);
 	if (bitmap == MAP_FAILED) {
 		perror("mmap failed");
 		goto done;
 	}
 	
 	if (cpu_intel)
 		dump_intel_msr_pm(bitmap, vcpu);
 	else	
 		dump_amd_msr_pm(bitmap, vcpu);
 
 	error = 0;
 done:
 	if (bitmap != MAP_FAILED)
 		munmap((void *)bitmap, map_size);
 	if (fd >= 0)
 		close(fd);
 
 	return (error);
 }
 
 static int
 vm_get_vmcs_field(struct vmctx *ctx, int vcpu, int field, uint64_t *ret_val)
 {
 
 	return (vm_get_register(ctx, vcpu, VMCS_IDENT(field), ret_val));
 }
 
 static int
 vm_set_vmcs_field(struct vmctx *ctx, int vcpu, int field, uint64_t val)
 {
 
 	return (vm_set_register(ctx, vcpu, VMCS_IDENT(field), val));
 }
 
 static int
 vm_get_vmcb_field(struct vmctx *ctx, int vcpu, int off, int bytes,
 	uint64_t *ret_val)
 {
 
 	return (vm_get_register(ctx, vcpu, VMCB_ACCESS(off, bytes), ret_val));
 }
 
 static int
 vm_set_vmcb_field(struct vmctx *ctx, int vcpu, int off, int bytes,
 	uint64_t val)
 {
 	
 	return (vm_set_register(ctx, vcpu, VMCB_ACCESS(off, bytes), val));
 }
 
 enum {
 	VMNAME = 1000,	/* avoid collision with return values from getopt */
 	VCPU,
 	SET_MEM,
 	SET_EFER,
 	SET_CR0,
 	SET_CR3,
 	SET_CR4,
 	SET_DR7,
 	SET_RSP,
 	SET_RIP,
 	SET_RAX,
 	SET_RFLAGS,
 	DESC_BASE,
 	DESC_LIMIT,
 	DESC_ACCESS,
 	SET_CS,
 	SET_DS,
 	SET_ES,
 	SET_FS,
 	SET_GS,
 	SET_SS,
 	SET_TR,
 	SET_LDTR,
 	SET_X2APIC_STATE,
 	SET_EXCEPTION_BITMAP,
 	SET_VMCS_ENTRY_INTERRUPTION_INFO,
 	SET_CAP,
 	CAPNAME,
 	UNASSIGN_PPTDEV,
 	GET_GPA_PMAP,
 	ASSERT_LAPIC_LVT,
+	SET_RTC_TIME,
+	SET_RTC_NVRAM,
+	RTC_NVRAM_OFFSET,
 };
 
 static void
 print_cpus(const char *banner, const cpuset_t *cpus)
 {
 	int i, first;
 
 	first = 1;
 	printf("%s:\t", banner);
 	if (!CPU_EMPTY(cpus)) {
 		for (i = 0; i < CPU_SETSIZE; i++) {
 			if (CPU_ISSET(i, cpus)) {
 				printf("%s%d", first ? " " : ", ", i);
 				first = 0;
 			}
 		}
 	} else
 		printf(" (none)");
 	printf("\n");
 }
 
 static void
 print_intinfo(const char *banner, uint64_t info)
 {
 	int type;
 
 	printf("%s:\t", banner);
 	if (info & VM_INTINFO_VALID) {
 		type = info & VM_INTINFO_TYPE;
 		switch (type) {
 		case VM_INTINFO_HWINTR:
 			printf("extint");
 			break;
 		case VM_INTINFO_NMI:
 			printf("nmi");
 			break;
 		case VM_INTINFO_SWINTR:
 			printf("swint");
 			break;
 		default:
 			printf("exception");
 			break;
 		}
 		printf(" vector %d", (int)VM_INTINFO_VECTOR(info));
 		if (info & VM_INTINFO_DEL_ERRCODE)
 			printf(" errcode %#x", (u_int)(info >> 32));
 	} else {
 		printf("n/a");
 	}
 	printf("\n");
 }
 
 static bool
 cpu_vendor_intel(void)
 {
 	u_int regs[4];
 	char cpu_vendor[13];
 
 	do_cpuid(0, regs);
 	((u_int *)&cpu_vendor)[0] = regs[1];
 	((u_int *)&cpu_vendor)[1] = regs[3];
 	((u_int *)&cpu_vendor)[2] = regs[2];
 	cpu_vendor[12] = '\0';
 
 	if (strcmp(cpu_vendor, "AuthenticAMD") == 0) {
 		return (false);
 	} else if (strcmp(cpu_vendor, "GenuineIntel") == 0) {
 		return (true);
 	} else {
 		fprintf(stderr, "Unknown cpu vendor \"%s\"\n", cpu_vendor);
 		exit(1);
 	}
 }
 
 static int
 get_all_registers(struct vmctx *ctx, int vcpu)
 {
 	uint64_t cr0, cr3, cr4, dr7, rsp, rip, rflags, efer;
 	uint64_t rax, rbx, rcx, rdx, rsi, rdi, rbp;
 	uint64_t r8, r9, r10, r11, r12, r13, r14, r15;
 	int error;
 
 	if (get_efer || get_all) {
 		error = vm_get_register(ctx, vcpu, VM_REG_GUEST_EFER, &efer);
 		if (error == 0)
 			printf("efer[%d]\t\t0x%016lx\n", vcpu, efer);
 	}
 
 	if (!error && (get_cr0 || get_all)) {
 		error = vm_get_register(ctx, vcpu, VM_REG_GUEST_CR0, &cr0);
 		if (error == 0)
 			printf("cr0[%d]\t\t0x%016lx\n", vcpu, cr0);
 	}
 
 	if (!error && (get_cr3 || get_all)) {
 		error = vm_get_register(ctx, vcpu, VM_REG_GUEST_CR3, &cr3);
 		if (error == 0)
 			printf("cr3[%d]\t\t0x%016lx\n", vcpu, cr3);
 	}
 
 	if (!error && (get_cr4 || get_all)) {
 		error = vm_get_register(ctx, vcpu, VM_REG_GUEST_CR4, &cr4);
 		if (error == 0)
 			printf("cr4[%d]\t\t0x%016lx\n", vcpu, cr4);
 	}
 
 	if (!error && (get_dr7 || get_all)) {
 		error = vm_get_register(ctx, vcpu, VM_REG_GUEST_DR7, &dr7);
 		if (error == 0)
 			printf("dr7[%d]\t\t0x%016lx\n", vcpu, dr7);
 	}
 
 	if (!error && (get_rsp || get_all)) {
 		error = vm_get_register(ctx, vcpu, VM_REG_GUEST_RSP, &rsp);
 		if (error == 0)
 			printf("rsp[%d]\t\t0x%016lx\n", vcpu, rsp);
 	}
 
 	if (!error && (get_rip || get_all)) {
 		error = vm_get_register(ctx, vcpu, VM_REG_GUEST_RIP, &rip);
 		if (error == 0)
 			printf("rip[%d]\t\t0x%016lx\n", vcpu, rip);
 	}
 
 	if (!error && (get_rax || get_all)) {
 		error = vm_get_register(ctx, vcpu, VM_REG_GUEST_RAX, &rax);
 		if (error == 0)
 			printf("rax[%d]\t\t0x%016lx\n", vcpu, rax);
 	}
 
 	if (!error && (get_rbx || get_all)) {
 		error = vm_get_register(ctx, vcpu, VM_REG_GUEST_RBX, &rbx);
 		if (error == 0)
 			printf("rbx[%d]\t\t0x%016lx\n", vcpu, rbx);
 	}
 
 	if (!error && (get_rcx || get_all)) {
 		error = vm_get_register(ctx, vcpu, VM_REG_GUEST_RCX, &rcx);
 		if (error == 0)
 			printf("rcx[%d]\t\t0x%016lx\n", vcpu, rcx);
 	}
 
 	if (!error && (get_rdx || get_all)) {
 		error = vm_get_register(ctx, vcpu, VM_REG_GUEST_RDX, &rdx);
 		if (error == 0)
 			printf("rdx[%d]\t\t0x%016lx\n", vcpu, rdx);
 	}
 
 	if (!error && (get_rsi || get_all)) {
 		error = vm_get_register(ctx, vcpu, VM_REG_GUEST_RSI, &rsi);
 		if (error == 0)
 			printf("rsi[%d]\t\t0x%016lx\n", vcpu, rsi);
 	}
 
 	if (!error && (get_rdi || get_all)) {
 		error = vm_get_register(ctx, vcpu, VM_REG_GUEST_RDI, &rdi);
 		if (error == 0)
 			printf("rdi[%d]\t\t0x%016lx\n", vcpu, rdi);
 	}
 
 	if (!error && (get_rbp || get_all)) {
 		error = vm_get_register(ctx, vcpu, VM_REG_GUEST_RBP, &rbp);
 		if (error == 0)
 			printf("rbp[%d]\t\t0x%016lx\n", vcpu, rbp);
 	}
 
 	if (!error && (get_r8 || get_all)) {
 		error = vm_get_register(ctx, vcpu, VM_REG_GUEST_R8, &r8);
 		if (error == 0)
 			printf("r8[%d]\t\t0x%016lx\n", vcpu, r8);
 	}
 
 	if (!error && (get_r9 || get_all)) {
 		error = vm_get_register(ctx, vcpu, VM_REG_GUEST_R9, &r9);
 		if (error == 0)
 			printf("r9[%d]\t\t0x%016lx\n", vcpu, r9);
 	}
 
 	if (!error && (get_r10 || get_all)) {
 		error = vm_get_register(ctx, vcpu, VM_REG_GUEST_R10, &r10);
 		if (error == 0)
 			printf("r10[%d]\t\t0x%016lx\n", vcpu, r10);
 	}
 
 	if (!error && (get_r11 || get_all)) {
 		error = vm_get_register(ctx, vcpu, VM_REG_GUEST_R11, &r11);
 		if (error == 0)
 			printf("r11[%d]\t\t0x%016lx\n", vcpu, r11);
 	}
 
 	if (!error && (get_r12 || get_all)) {
 		error = vm_get_register(ctx, vcpu, VM_REG_GUEST_R12, &r12);
 		if (error == 0)
 			printf("r12[%d]\t\t0x%016lx\n", vcpu, r12);
 	}
 
 	if (!error && (get_r13 || get_all)) {
 		error = vm_get_register(ctx, vcpu, VM_REG_GUEST_R13, &r13);
 		if (error == 0)
 			printf("r13[%d]\t\t0x%016lx\n", vcpu, r13);
 	}
 
 	if (!error && (get_r14 || get_all)) {
 		error = vm_get_register(ctx, vcpu, VM_REG_GUEST_R14, &r14);
 		if (error == 0)
 			printf("r14[%d]\t\t0x%016lx\n", vcpu, r14);
 	}
 
 	if (!error && (get_r15 || get_all)) {
 		error = vm_get_register(ctx, vcpu, VM_REG_GUEST_R15, &r15);
 		if (error == 0)
 			printf("r15[%d]\t\t0x%016lx\n", vcpu, r15);
 	}
 
 	if (!error && (get_rflags || get_all)) {
 		error = vm_get_register(ctx, vcpu, VM_REG_GUEST_RFLAGS,
 					&rflags);
 		if (error == 0)
 			printf("rflags[%d]\t0x%016lx\n", vcpu, rflags);
 	}
 	
 	return (error);
 }
 
 static int
 get_all_segments(struct vmctx *ctx, int vcpu)
 {
 	int error;
 	uint64_t cs, ds, es, fs, gs, ss, tr, ldtr;
 
 	if (get_desc_ds || get_all) {
 		error = vm_get_desc(ctx, vcpu, VM_REG_GUEST_DS,
 				   &desc_base, &desc_limit, &desc_access);
 		if (error == 0) {
 			printf("ds desc[%d]\t0x%016lx/0x%08x/0x%08x\n",
 			      vcpu, desc_base, desc_limit, desc_access);
 		}
 	}
 
 	if (!error && (get_desc_es || get_all)) {
 		error = vm_get_desc(ctx, vcpu, VM_REG_GUEST_ES,
 				    &desc_base, &desc_limit, &desc_access);
 		if (error == 0) {
 			printf("es desc[%d]\t0x%016lx/0x%08x/0x%08x\n",
 			       vcpu, desc_base, desc_limit, desc_access);
 		}
 	}
 
 	if (!error && (get_desc_fs || get_all)) {
 		error = vm_get_desc(ctx, vcpu, VM_REG_GUEST_FS,
 				    &desc_base, &desc_limit, &desc_access);
 		if (error == 0) {
 			printf("fs desc[%d]\t0x%016lx/0x%08x/0x%08x\n",
 			       vcpu, desc_base, desc_limit, desc_access);
 		}
 	}
 
 	if (!error && (get_desc_gs || get_all)) {
 		error = vm_get_desc(ctx, vcpu, VM_REG_GUEST_GS,
 				    &desc_base, &desc_limit, &desc_access);
 		if (error == 0) {
 			printf("gs desc[%d]\t0x%016lx/0x%08x/0x%08x\n",
 			       vcpu, desc_base, desc_limit, desc_access);
 		}
 	}
 
 	if (!error && (get_desc_ss || get_all)) {
 		error = vm_get_desc(ctx, vcpu, VM_REG_GUEST_SS,
 				    &desc_base, &desc_limit, &desc_access);
 		if (error == 0) {
 			printf("ss desc[%d]\t0x%016lx/0x%08x/0x%08x\n",
 			       vcpu, desc_base, desc_limit, desc_access);
 		}
 	}
 
 	if (!error && (get_desc_cs || get_all)) {
 		error = vm_get_desc(ctx, vcpu, VM_REG_GUEST_CS,
 				    &desc_base, &desc_limit, &desc_access);
 		if (error == 0) {
 			printf("cs desc[%d]\t0x%016lx/0x%08x/0x%08x\n",
 			       vcpu, desc_base, desc_limit, desc_access);
 		}
 	}
 
 	if (!error && (get_desc_tr || get_all)) {
 		error = vm_get_desc(ctx, vcpu, VM_REG_GUEST_TR,
 				    &desc_base, &desc_limit, &desc_access);
 		if (error == 0) {
 			printf("tr desc[%d]\t0x%016lx/0x%08x/0x%08x\n",
 			       vcpu, desc_base, desc_limit, desc_access);
 		}
 	}
 
 	if (!error && (get_desc_ldtr || get_all)) {
 		error = vm_get_desc(ctx, vcpu, VM_REG_GUEST_LDTR,
 				    &desc_base, &desc_limit, &desc_access);
 		if (error == 0) {
 			printf("ldtr desc[%d]\t0x%016lx/0x%08x/0x%08x\n",
 			       vcpu, desc_base, desc_limit, desc_access);
 		}
 	}
 
 	if (!error && (get_desc_gdtr || get_all)) {
 		error = vm_get_desc(ctx, vcpu, VM_REG_GUEST_GDTR,
 				    &desc_base, &desc_limit, &desc_access);
 		if (error == 0) {
 			printf("gdtr[%d]\t\t0x%016lx/0x%08x\n",
 			       vcpu, desc_base, desc_limit);
 		}
 	}
 
 	if (!error && (get_desc_idtr || get_all)) {
 		error = vm_get_desc(ctx, vcpu, VM_REG_GUEST_IDTR,
 				    &desc_base, &desc_limit, &desc_access);
 		if (error == 0) {
 			printf("idtr[%d]\t\t0x%016lx/0x%08x\n",
 			       vcpu, desc_base, desc_limit);
 		}
 	}
 
 	if (!error && (get_cs || get_all)) {
 		error = vm_get_register(ctx, vcpu, VM_REG_GUEST_CS, &cs);
 		if (error == 0)
 			printf("cs[%d]\t\t0x%04lx\n", vcpu, cs);
 	}
 
 	if (!error && (get_ds || get_all)) {
 		error = vm_get_register(ctx, vcpu, VM_REG_GUEST_DS, &ds);
 		if (error == 0)
 			printf("ds[%d]\t\t0x%04lx\n", vcpu, ds);
 	}
 
 	if (!error && (get_es || get_all)) {
 		error = vm_get_register(ctx, vcpu, VM_REG_GUEST_ES, &es);
 		if (error == 0)
 			printf("es[%d]\t\t0x%04lx\n", vcpu, es);
 	}
 
 	if (!error && (get_fs || get_all)) {
 		error = vm_get_register(ctx, vcpu, VM_REG_GUEST_FS, &fs);
 		if (error == 0)
 			printf("fs[%d]\t\t0x%04lx\n", vcpu, fs);
 	}
 
 	if (!error && (get_gs || get_all)) {
 		error = vm_get_register(ctx, vcpu, VM_REG_GUEST_GS, &gs);
 		if (error == 0)
 			printf("gs[%d]\t\t0x%04lx\n", vcpu, gs);
 	}
 
 	if (!error && (get_ss || get_all)) {
 		error = vm_get_register(ctx, vcpu, VM_REG_GUEST_SS, &ss);
 		if (error == 0)
 			printf("ss[%d]\t\t0x%04lx\n", vcpu, ss);
 	}
 
 	if (!error && (get_tr || get_all)) {
 		error = vm_get_register(ctx, vcpu, VM_REG_GUEST_TR, &tr);
 		if (error == 0)
 			printf("tr[%d]\t\t0x%04lx\n", vcpu, tr);
 	}
 
 	if (!error && (get_ldtr || get_all)) {
 		error = vm_get_register(ctx, vcpu, VM_REG_GUEST_LDTR, &ldtr);
 		if (error == 0)
 			printf("ldtr[%d]\t\t0x%04lx\n", vcpu, ldtr);
 	}
 
 	return (error);
 }
 
 static int
 get_misc_vmcs(struct vmctx *ctx, int vcpu)
 {
 	uint64_t ctl, cr0, cr3, cr4, rsp, rip, pat, addr, u64;
 	int error;
 	
 	if (get_cr0_mask || get_all) {
 		uint64_t cr0mask;
 		error = vm_get_vmcs_field(ctx, vcpu, VMCS_CR0_MASK, &cr0mask);
 		if (error == 0)
 			printf("cr0_mask[%d]\t\t0x%016lx\n", vcpu, cr0mask);
 	}
 
 	if (!error && (get_cr0_shadow || get_all)) {
 		uint64_t cr0shadow;
 		error = vm_get_vmcs_field(ctx, vcpu, VMCS_CR0_SHADOW,
 					  &cr0shadow);
 		if (error == 0)
 			printf("cr0_shadow[%d]\t\t0x%016lx\n", vcpu, cr0shadow);
 	}
 
 	if (!error && (get_cr4_mask || get_all)) {
 		uint64_t cr4mask;
 		error = vm_get_vmcs_field(ctx, vcpu, VMCS_CR4_MASK, &cr4mask);
 		if (error == 0)
 			printf("cr4_mask[%d]\t\t0x%016lx\n", vcpu, cr4mask);
 	}
 
 	if (!error && (get_cr4_shadow || get_all)) {
 		uint64_t cr4shadow;
 		error = vm_get_vmcs_field(ctx, vcpu, VMCS_CR4_SHADOW,
 					  &cr4shadow);
 		if (error == 0)
 			printf("cr4_shadow[%d]\t\t0x%016lx\n", vcpu, cr4shadow);
 	}
 	
 	if (!error && (get_cr3_targets || get_all)) {
 		uint64_t target_count, target_addr;
 		error = vm_get_vmcs_field(ctx, vcpu, VMCS_CR3_TARGET_COUNT,
 					  &target_count);
 		if (error == 0) {
 			printf("cr3_target_count[%d]\t0x%016lx\n",
 				vcpu, target_count);
 		}
 
 		error = vm_get_vmcs_field(ctx, vcpu, VMCS_CR3_TARGET0,
 					  &target_addr);
 		if (error == 0) {
 			printf("cr3_target0[%d]\t\t0x%016lx\n",
 				vcpu, target_addr);
 		}
 
 		error = vm_get_vmcs_field(ctx, vcpu, VMCS_CR3_TARGET1,
 					  &target_addr);
 		if (error == 0) {
 			printf("cr3_target1[%d]\t\t0x%016lx\n",
 				vcpu, target_addr);
 		}
 
 		error = vm_get_vmcs_field(ctx, vcpu, VMCS_CR3_TARGET2,
 					  &target_addr);
 		if (error == 0) {
 			printf("cr3_target2[%d]\t\t0x%016lx\n",
 				vcpu, target_addr);
 		}
 
 		error = vm_get_vmcs_field(ctx, vcpu, VMCS_CR3_TARGET3,
 					  &target_addr);
 		if (error == 0) {
 			printf("cr3_target3[%d]\t\t0x%016lx\n",
 				vcpu, target_addr);
 		}
 	}
 
 	if (!error && (get_pinbased_ctls || get_all)) {
 		error = vm_get_vmcs_field(ctx, vcpu, VMCS_PIN_BASED_CTLS, &ctl);
 		if (error == 0)
 			printf("pinbased_ctls[%d]\t0x%016lx\n", vcpu, ctl);
 	}
 
 	if (!error && (get_procbased_ctls || get_all)) {
 		error = vm_get_vmcs_field(ctx, vcpu,
 					  VMCS_PRI_PROC_BASED_CTLS, &ctl);
 		if (error == 0)
 			printf("procbased_ctls[%d]\t0x%016lx\n", vcpu, ctl);
 	}
 
 	if (!error && (get_procbased_ctls2 || get_all)) {
 		error = vm_get_vmcs_field(ctx, vcpu,
 					  VMCS_SEC_PROC_BASED_CTLS, &ctl);
 		if (error == 0)
 			printf("procbased_ctls2[%d]\t0x%016lx\n", vcpu, ctl);
 	}
 
 	if (!error && (get_vmcs_gla || get_all)) {
 		error = vm_get_vmcs_field(ctx, vcpu,
 					  VMCS_GUEST_LINEAR_ADDRESS, &u64);
 		if (error == 0)
 			printf("gla[%d]\t\t0x%016lx\n", vcpu, u64);
 	}
 
 	if (!error && (get_vmcs_gpa || get_all)) {
 		error = vm_get_vmcs_field(ctx, vcpu,
 					  VMCS_GUEST_PHYSICAL_ADDRESS, &u64);
 		if (error == 0)
 			printf("gpa[%d]\t\t0x%016lx\n", vcpu, u64);
 	}
 
 	if (!error && (get_vmcs_entry_interruption_info || 
 		get_all)) {
 		error = vm_get_vmcs_field(ctx, vcpu, VMCS_ENTRY_INTR_INFO,&u64);
 		if (error == 0) {
 			printf("entry_interruption_info[%d]\t0x%016lx\n",
 				vcpu, u64);
 		}
 	}
 	
 	if (!error && (get_tpr_threshold || get_all)) {
 		uint64_t threshold;
 		error = vm_get_vmcs_field(ctx, vcpu, VMCS_TPR_THRESHOLD,
 					  &threshold);
 		if (error == 0)
 			printf("tpr_threshold[%d]\t0x%016lx\n", vcpu, threshold);
 	}
 
 	if (!error && (get_inst_err || get_all)) {
 		uint64_t insterr;
 		error = vm_get_vmcs_field(ctx, vcpu, VMCS_INSTRUCTION_ERROR,
 					  &insterr);
 		if (error == 0) {
 			printf("instruction_error[%d]\t0x%016lx\n",
 				vcpu, insterr);
 		}
 	}
 	
 	if (!error && (get_exit_ctls || get_all)) {
 		error = vm_get_vmcs_field(ctx, vcpu, VMCS_EXIT_CTLS, &ctl);
 		if (error == 0)
 			printf("exit_ctls[%d]\t\t0x%016lx\n", vcpu, ctl);
 	}
 
 	if (!error && (get_entry_ctls || get_all)) {
 		error = vm_get_vmcs_field(ctx, vcpu, VMCS_ENTRY_CTLS, &ctl);
 		if (error == 0)
 			printf("entry_ctls[%d]\t\t0x%016lx\n", vcpu, ctl);
 	}
 
 	if (!error && (get_host_pat || get_all)) {
 		error = vm_get_vmcs_field(ctx, vcpu, VMCS_HOST_IA32_PAT, &pat);
 		if (error == 0)
 			printf("host_pat[%d]\t\t0x%016lx\n", vcpu, pat);
 	}
 
 	if (!error && (get_host_cr0 || get_all)) {
 		error = vm_get_vmcs_field(ctx, vcpu, VMCS_HOST_CR0, &cr0);
 		if (error == 0)
 			printf("host_cr0[%d]\t\t0x%016lx\n", vcpu, cr0);
 	}
 
 	if (!error && (get_host_cr3 || get_all)) {
 		error = vm_get_vmcs_field(ctx, vcpu, VMCS_HOST_CR3, &cr3);
 		if (error == 0)
 			printf("host_cr3[%d]\t\t0x%016lx\n", vcpu, cr3);
 	}
 
 	if (!error && (get_host_cr4 || get_all)) {
 		error = vm_get_vmcs_field(ctx, vcpu, VMCS_HOST_CR4, &cr4);
 		if (error == 0)
 			printf("host_cr4[%d]\t\t0x%016lx\n", vcpu, cr4);
 	}
 
 	if (!error && (get_host_rip || get_all)) {
 		error = vm_get_vmcs_field(ctx, vcpu, VMCS_HOST_RIP, &rip);
 		if (error == 0)
 			printf("host_rip[%d]\t\t0x%016lx\n", vcpu, rip);
 	}
 
 	if (!error && (get_host_rsp || get_all)) {
 		error = vm_get_vmcs_field(ctx, vcpu, VMCS_HOST_RSP, &rsp);
 		if (error == 0)
 			printf("host_rsp[%d]\t\t0x%016lx\n", vcpu, rsp);
 	}
 	
 	if (!error && (get_vmcs_link || get_all)) {
 		error = vm_get_vmcs_field(ctx, vcpu, VMCS_LINK_POINTER, &addr);
 		if (error == 0)
 			printf("vmcs_pointer[%d]\t0x%016lx\n", vcpu, addr);
 	}
 
 	if (!error && (get_vmcs_exit_interruption_info || get_all)) {
 		error = vm_get_vmcs_field(ctx, vcpu, VMCS_EXIT_INTR_INFO, &u64);
 		if (error == 0) {
 			printf("vmcs_exit_interruption_info[%d]\t0x%016lx\n",
 				vcpu, u64);
 		}
 	}
 
 	if (!error && (get_vmcs_exit_interruption_error || get_all)) {
 		error = vm_get_vmcs_field(ctx, vcpu, VMCS_EXIT_INTR_ERRCODE,
 		    			  &u64);
 		if (error == 0) {
 			printf("vmcs_exit_interruption_error[%d]\t0x%016lx\n",
 				vcpu, u64);
 		}
 	}
 
 	if (!error && (get_vmcs_interruptibility || get_all)) {
 		error = vm_get_vmcs_field(ctx, vcpu,
 					  VMCS_GUEST_INTERRUPTIBILITY, &u64);
 		if (error == 0) {
 			printf("vmcs_guest_interruptibility[%d]\t0x%016lx\n",
 				vcpu, u64);
 		}
 	}
 	
 	if (!error && (get_vmcs_exit_qualification || get_all)) {
 		error = vm_get_vmcs_field(ctx, vcpu, VMCS_EXIT_QUALIFICATION,
 					  &u64);
 		if (error == 0)
 			printf("vmcs_exit_qualification[%d]\t0x%016lx\n",
 				vcpu, u64);
 	}
 	
 	return (error);
 }
 
 static int
 get_misc_vmcb(struct vmctx *ctx, int vcpu)
 {
 	uint64_t ctl, addr;
 	int error;
 
 	if (get_vmcb_intercept || get_all) {
 		error = vm_get_vmcb_field(ctx, vcpu, VMCB_OFF_CR_INTERCEPT, 4,
 		    &ctl);
 		if (error == 0)
 			printf("cr_intercept[%d]\t0x%08x\n", vcpu, (int)ctl);
 
 		error = vm_get_vmcb_field(ctx, vcpu, VMCB_OFF_DR_INTERCEPT, 4,
 		    &ctl);
 		if (error == 0)
 			printf("dr_intercept[%d]\t0x%08x\n", vcpu, (int)ctl);
 
 		error = vm_get_vmcb_field(ctx, vcpu, VMCB_OFF_EXC_INTERCEPT, 4,
 		    &ctl);
 		if (error == 0)
 			printf("exc_intercept[%d]\t0x%08x\n", vcpu, (int)ctl);
 
 		error = vm_get_vmcb_field(ctx, vcpu, VMCB_OFF_INST1_INTERCEPT,
 		    4, &ctl);
 		if (error == 0)
 			printf("inst1_intercept[%d]\t0x%08x\n", vcpu, (int)ctl);
 
 		error = vm_get_vmcb_field(ctx, vcpu, VMCB_OFF_INST2_INTERCEPT,
 		    4, &ctl);
 		if (error == 0)
 			printf("inst2_intercept[%d]\t0x%08x\n", vcpu, (int)ctl);
 	}
 
 	if (!error && (get_vmcb_tlb_ctrl || get_all)) {
 		error = vm_get_vmcb_field(ctx, vcpu, VMCB_OFF_TLB_CTRL,
 					  4, &ctl);
 		if (error == 0)
 			printf("TLB ctrl[%d]\t0x%016lx\n", vcpu, ctl);
 	}
 
 	if (!error && (get_vmcb_exit_details || get_all)) {
 		error = vm_get_vmcb_field(ctx, vcpu, VMCB_OFF_EXITINFO1,
 					  8, &ctl);
 		if (error == 0)
 			printf("exitinfo1[%d]\t0x%016lx\n", vcpu, ctl);
 		error = vm_get_vmcb_field(ctx, vcpu, VMCB_OFF_EXITINFO2,
 					  8, &ctl);
 		if (error == 0)
 			printf("exitinfo2[%d]\t0x%016lx\n", vcpu, ctl);
 		error = vm_get_vmcb_field(ctx, vcpu, VMCB_OFF_EXITINTINFO,
 					  8, &ctl);
 		if (error == 0)
 			printf("exitintinfo[%d]\t0x%016lx\n", vcpu, ctl);
 	}
 
 	if (!error && (get_vmcb_virq || get_all)) {
 		error = vm_get_vmcb_field(ctx, vcpu, VMCB_OFF_VIRQ,
 					  8, &ctl);
 		if (error == 0)
 			printf("v_irq/tpr[%d]\t0x%016lx\n", vcpu, ctl);
 	}
 
 	if (!error && (get_apic_access_addr || get_all)) {
 		error = vm_get_vmcb_field(ctx, vcpu, VMCB_OFF_AVIC_BAR, 8,
 					  &addr);
 		if (error == 0)
 			printf("AVIC apic_bar[%d]\t0x%016lx\n", vcpu, addr);
 	}
 
 	if (!error && (get_virtual_apic_addr || get_all)) {
 		error = vm_get_vmcb_field(ctx, vcpu, VMCB_OFF_AVIC_PAGE, 8,
 					  &addr);
 		if (error == 0)
 			printf("AVIC backing page[%d]\t0x%016lx\n", vcpu, addr);
 	}
 
 	if (!error && (get_avic_table || get_all)) {
 		error = vm_get_vmcb_field(ctx, vcpu, VMCB_OFF_AVIC_LT, 8,
 					  &addr);
 		if (error == 0)
 			printf("AVIC logical table[%d]\t0x%016lx\n",
 				vcpu, addr);
 		error = vm_get_vmcb_field(ctx, vcpu, VMCB_OFF_AVIC_PT, 8,
 					  &addr);
 		if (error == 0)
 			printf("AVIC physical table[%d]\t0x%016lx\n",
 				vcpu, addr);
 	}
 
 	return (error);
 }
 
 static struct option *
 setup_options(bool cpu_intel)
 {
 	const struct option common_opts[] = {
 		{ "vm",		REQ_ARG,	0,	VMNAME },
 		{ "cpu",	REQ_ARG,	0,	VCPU },
 		{ "set-mem",	REQ_ARG,	0,	SET_MEM },
 		{ "set-efer",	REQ_ARG,	0,	SET_EFER },
 		{ "set-cr0",	REQ_ARG,	0,	SET_CR0 },
 		{ "set-cr3",	REQ_ARG,	0,	SET_CR3 },
 		{ "set-cr4",	REQ_ARG,	0,	SET_CR4 },
 		{ "set-dr7",	REQ_ARG,	0,	SET_DR7 },
 		{ "set-rsp",	REQ_ARG,	0,	SET_RSP },
 		{ "set-rip",	REQ_ARG,	0,	SET_RIP },
 		{ "set-rax",	REQ_ARG,	0,	SET_RAX },
 		{ "set-rflags",	REQ_ARG,	0,	SET_RFLAGS },
 		{ "desc-base",	REQ_ARG,	0,	DESC_BASE },
 		{ "desc-limit",	REQ_ARG,	0,	DESC_LIMIT },
 		{ "desc-access",REQ_ARG,	0,	DESC_ACCESS },
 		{ "set-cs",	REQ_ARG,	0,	SET_CS },
 		{ "set-ds",	REQ_ARG,	0,	SET_DS },
 		{ "set-es",	REQ_ARG,	0,	SET_ES },
 		{ "set-fs",	REQ_ARG,	0,	SET_FS },
 		{ "set-gs",	REQ_ARG,	0,	SET_GS },
 		{ "set-ss",	REQ_ARG,	0,	SET_SS },
 		{ "set-tr",	REQ_ARG,	0,	SET_TR },
 		{ "set-ldtr",	REQ_ARG,	0,	SET_LDTR },
 		{ "set-x2apic-state",REQ_ARG,	0,	SET_X2APIC_STATE },
 		{ "set-exception-bitmap",
 				REQ_ARG,	0, SET_EXCEPTION_BITMAP },
 		{ "capname",	REQ_ARG,	0,	CAPNAME },
 		{ "unassign-pptdev", REQ_ARG,	0,	UNASSIGN_PPTDEV },
 		{ "setcap",	REQ_ARG,	0,	SET_CAP },
 		{ "get-gpa-pmap", REQ_ARG,	0,	GET_GPA_PMAP },
 		{ "assert-lapic-lvt", REQ_ARG,	0,	ASSERT_LAPIC_LVT },
+		{ "get-rtc-time", NO_ARG,	&get_rtc_time,	1 },
+		{ "set-rtc-time", REQ_ARG,	0,	SET_RTC_TIME },
+		{ "rtc-nvram-offset", REQ_ARG,	0,	RTC_NVRAM_OFFSET },
+		{ "get-rtc-nvram", NO_ARG,	&get_rtc_nvram,	1 },
+		{ "set-rtc-nvram", REQ_ARG,	0,	SET_RTC_NVRAM },
 		{ "getcap",	NO_ARG,		&getcap,	1 },
 		{ "get-stats",	NO_ARG,		&get_stats,	1 },
 		{ "get-desc-ds",NO_ARG,		&get_desc_ds,	1 },
 		{ "set-desc-ds",NO_ARG,		&set_desc_ds,	1 },
 		{ "get-desc-es",NO_ARG,		&get_desc_es,	1 },
 		{ "set-desc-es",NO_ARG,		&set_desc_es,	1 },
 		{ "get-desc-ss",NO_ARG,		&get_desc_ss,	1 },
 		{ "set-desc-ss",NO_ARG,		&set_desc_ss,	1 },
 		{ "get-desc-cs",NO_ARG,		&get_desc_cs,	1 },
 		{ "set-desc-cs",NO_ARG,		&set_desc_cs,	1 },
 		{ "get-desc-fs",NO_ARG,		&get_desc_fs,	1 },
 		{ "set-desc-fs",NO_ARG,		&set_desc_fs,	1 },
 		{ "get-desc-gs",NO_ARG,		&get_desc_gs,	1 },
 		{ "set-desc-gs",NO_ARG,		&set_desc_gs,	1 },
 		{ "get-desc-tr",NO_ARG,		&get_desc_tr,	1 },
 		{ "set-desc-tr",NO_ARG,		&set_desc_tr,	1 },
 		{ "set-desc-ldtr", NO_ARG,	&set_desc_ldtr,	1 },
 		{ "get-desc-ldtr", NO_ARG,	&get_desc_ldtr,	1 },
 		{ "set-desc-gdtr", NO_ARG,	&set_desc_gdtr, 1 },
 		{ "get-desc-gdtr", NO_ARG,	&get_desc_gdtr, 1 },
 		{ "set-desc-idtr", NO_ARG,	&set_desc_idtr, 1 },
 		{ "get-desc-idtr", NO_ARG,	&get_desc_idtr, 1 },
 		{ "get-lowmem", NO_ARG,		&get_lowmem,	1 },
 		{ "get-highmem",NO_ARG,		&get_highmem,	1 },
 		{ "get-efer",	NO_ARG,		&get_efer,	1 },
 		{ "get-cr0",	NO_ARG,		&get_cr0,	1 },
 		{ "get-cr3",	NO_ARG,		&get_cr3,	1 },
 		{ "get-cr4",	NO_ARG,		&get_cr4,	1 },
 		{ "get-dr7",	NO_ARG,		&get_dr7,	1 },
 		{ "get-rsp",	NO_ARG,		&get_rsp,	1 },
 		{ "get-rip",	NO_ARG,		&get_rip,	1 },
 		{ "get-rax",	NO_ARG,		&get_rax,	1 },
 		{ "get-rbx",	NO_ARG,		&get_rbx,	1 },
 		{ "get-rcx",	NO_ARG,		&get_rcx,	1 },
 		{ "get-rdx",	NO_ARG,		&get_rdx,	1 },
 		{ "get-rsi",	NO_ARG,		&get_rsi,	1 },
 		{ "get-rdi",	NO_ARG,		&get_rdi,	1 },
 		{ "get-rbp",	NO_ARG,		&get_rbp,	1 },
 		{ "get-r8",	NO_ARG,		&get_r8,	1 },
 		{ "get-r9",	NO_ARG,		&get_r9,	1 },
 		{ "get-r10",	NO_ARG,		&get_r10,	1 },
 		{ "get-r11",	NO_ARG,		&get_r11,	1 },
 		{ "get-r12",	NO_ARG,		&get_r12,	1 },
 		{ "get-r13",	NO_ARG,		&get_r13,	1 },
 		{ "get-r14",	NO_ARG,		&get_r14,	1 },
 		{ "get-r15",	NO_ARG,		&get_r15,	1 },
 		{ "get-rflags",	NO_ARG,		&get_rflags,	1 },
 		{ "get-cs",	NO_ARG,		&get_cs,	1 },
 		{ "get-ds",	NO_ARG,		&get_ds,	1 },
 		{ "get-es",	NO_ARG,		&get_es,	1 },
 		{ "get-fs",	NO_ARG,		&get_fs,	1 },
 		{ "get-gs",	NO_ARG,		&get_gs,	1 },
 		{ "get-ss",	NO_ARG,		&get_ss,	1 },
 		{ "get-tr",	NO_ARG,		&get_tr,	1 },
 		{ "get-ldtr",	NO_ARG,		&get_ldtr,	1 },
 		{ "get-eptp", 	NO_ARG,		&get_eptp,	1 },
 		{ "get-exception-bitmap",
 					NO_ARG,	&get_exception_bitmap,  1 },
 		{ "get-io-bitmap-address",
 					NO_ARG,	&get_io_bitmap,		1 },
 		{ "get-tsc-offset", 	NO_ARG, &get_tsc_offset, 	1 },
 		{ "get-msr-bitmap",
 					NO_ARG,	&get_msr_bitmap, 	1 },
 		{ "get-msr-bitmap-address",
 					NO_ARG,	&get_msr_bitmap_address, 1 },
 		{ "get-guest-pat",	NO_ARG,	&get_guest_pat,		1 },
 		{ "get-guest-sysenter",
 					NO_ARG,	&get_guest_sysenter, 	1 },
 		{ "get-exit-reason",
 					NO_ARG,	&get_exit_reason, 	1 },
 		{ "get-x2apic-state",	NO_ARG,	&get_x2apic_state, 	1 },
 		{ "get-all",		NO_ARG,	&get_all,		1 },
 		{ "run",		NO_ARG,	&run,			1 },
 		{ "create",		NO_ARG,	&create,		1 },
 		{ "destroy",		NO_ARG,	&destroy,		1 },
 		{ "inject-nmi",		NO_ARG,	&inject_nmi,		1 },
 		{ "force-reset",	NO_ARG,	&force_reset,		1 },
 		{ "force-poweroff", 	NO_ARG,	&force_poweroff, 	1 },
 		{ "get-active-cpus", 	NO_ARG,	&get_active_cpus, 	1 },
 		{ "get-suspended-cpus", NO_ARG,	&get_suspended_cpus, 	1 },
 		{ "get-intinfo", 	NO_ARG,	&get_intinfo,		1 },
 	};
 
 	const struct option intel_opts[] = {
 		{ "get-vmcs-pinbased-ctls",
 				NO_ARG,		&get_pinbased_ctls, 1 },
 		{ "get-vmcs-procbased-ctls",
 				NO_ARG,		&get_procbased_ctls, 1 },
 		{ "get-vmcs-procbased-ctls2",
 				NO_ARG,		&get_procbased_ctls2, 1 },
 		{ "get-vmcs-guest-linear-address",
 				NO_ARG,		&get_vmcs_gla,	1 },
 		{ "get-vmcs-guest-physical-address",
 				NO_ARG,		&get_vmcs_gpa,	1 },
 		{ "get-vmcs-entry-interruption-info",
 				NO_ARG, &get_vmcs_entry_interruption_info, 1},
 		{ "get-vmcs-cr0-mask", NO_ARG,	&get_cr0_mask,	1 },
 		{ "get-vmcs-cr0-shadow", NO_ARG,&get_cr0_shadow, 1 },
 		{ "get-vmcs-cr4-mask", 		NO_ARG,	&get_cr4_mask,	  1 },
 		{ "get-vmcs-cr4-shadow", 	NO_ARG, &get_cr4_shadow,  1 },
 		{ "get-vmcs-cr3-targets", 	NO_ARG, &get_cr3_targets, 1 },
 		{ "get-vmcs-tpr-threshold",
 					NO_ARG,	&get_tpr_threshold, 1 },
 		{ "get-vmcs-vpid", 	NO_ARG,	&get_vpid_asid,	    1 },
 		{ "get-vmcs-exit-ctls", NO_ARG,	&get_exit_ctls,	    1 },
 		{ "get-vmcs-entry-ctls",
 					NO_ARG,	&get_entry_ctls, 1 },
 		{ "get-vmcs-instruction-error",
 					NO_ARG,	&get_inst_err,	1 },
 		{ "get-vmcs-host-pat",	NO_ARG,	&get_host_pat,	1 },
 		{ "get-vmcs-host-cr0",
 					NO_ARG,	&get_host_cr0,	1 },
 		{ "set-vmcs-entry-interruption-info",
 				REQ_ARG, 0, SET_VMCS_ENTRY_INTERRUPTION_INFO },
 		{ "get-vmcs-exit-qualification",
 				NO_ARG,	&get_vmcs_exit_qualification, 1 },
 		{ "get-vmcs-interruptibility",
 				NO_ARG, &get_vmcs_interruptibility, 1 },
 		{ "get-vmcs-exit-interruption-error",
 				NO_ARG,	&get_vmcs_exit_interruption_error, 1 },
 		{ "get-vmcs-exit-interruption-info",
 				NO_ARG,	&get_vmcs_exit_interruption_info, 1 },
 		{ "get-vmcs-link", 	NO_ARG,		&get_vmcs_link, 1 },
 		{ "get-vmcs-host-cr3",
 					NO_ARG,		&get_host_cr3,	1 },
 		{ "get-vmcs-host-cr4",
 				NO_ARG,		&get_host_cr4,	1 },
 		{ "get-vmcs-host-rip",
 				NO_ARG,		&get_host_rip,	1 },
 		{ "get-vmcs-host-rsp",
 				NO_ARG,		&get_host_rsp,	1 },
 		{ "get-apic-access-address",
 				NO_ARG,		&get_apic_access_addr, 1},
 		{ "get-virtual-apic-address",
 				NO_ARG,		&get_virtual_apic_addr, 1}
 	};
 
 	const struct option amd_opts[] = {
 		{ "get-vmcb-intercepts",
 				NO_ARG,	&get_vmcb_intercept, 	1 },
 		{ "get-vmcb-asid", 
 				NO_ARG,	&get_vpid_asid,	     	1 },
 		{ "get-vmcb-exit-details",
 				NO_ARG, &get_vmcb_exit_details,	1 },
 		{ "get-vmcb-tlb-ctrl",
 				NO_ARG, &get_vmcb_tlb_ctrl, 	1 },
 		{ "get-vmcb-virq",
 				NO_ARG, &get_vmcb_virq, 	1 },
 		{ "get-avic-apic-bar",
 				NO_ARG,	&get_apic_access_addr, 	1 },
 		{ "get-avic-backing-page",
 				NO_ARG,	&get_virtual_apic_addr, 1 },
 		{ "get-avic-table",
 				NO_ARG,	&get_avic_table, 	1 }
 	};
 
 	const struct option null_opt = {
 		NULL, 0, NULL, 0
 	};
 
 	struct option *all_opts;
 	char *cp;
 	int optlen;
 
 	optlen = sizeof(common_opts);
 
 	if (cpu_intel)
 		optlen += sizeof(intel_opts);
 	else
 		optlen += sizeof(amd_opts);
 
 	optlen += sizeof(null_opt);
 
 	all_opts = malloc(optlen);
 
 	cp = (char *)all_opts;
 	memcpy(cp, common_opts, sizeof(common_opts));
 	cp += sizeof(common_opts);
 
 	if (cpu_intel) {
 		memcpy(cp, intel_opts, sizeof(intel_opts));
 		cp += sizeof(intel_opts);
 	} else {
 		memcpy(cp, amd_opts, sizeof(amd_opts));
 		cp += sizeof(amd_opts);
 	}
 
 	memcpy(cp, &null_opt, sizeof(null_opt));
 	cp += sizeof(null_opt);
 
 	return (all_opts);
 }
 
+static const char *
+wday_str(int idx)
+{
+	static const char *weekdays[] = {
+		"Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"
+	};
+
+	if (idx >= 0 && idx < 7)
+		return (weekdays[idx]);
+	else
+		return ("UNK");
+}
+
+static const char *
+mon_str(int idx)
+{
+	static const char *months[] = {
+		"Jan", "Feb", "Mar", "Apr", "May", "Jun",
+		"Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
+	};
+
+	if (idx >= 0 && idx < 12)
+		return (months[idx]);
+	else
+		return ("UNK");
+}
+
 int
 main(int argc, char *argv[])
 {
 	char *vmname;
 	int error, ch, vcpu, ptenum;
 	vm_paddr_t gpa, gpa_pmap;
 	size_t len;
 	struct vm_exit vmexit;
 	uint64_t rax, cr0, cr3, cr4, dr7, rsp, rip, rflags, efer, pat;
 	uint64_t eptp, bm, addr, u64, pteval[4], *pte, info[2];
 	struct vmctx *ctx;
 	int wired;
 	cpuset_t cpus;
 	bool cpu_intel;
 	uint64_t cs, ds, es, fs, gs, ss, tr, ldtr;
+	struct tm tm;
 	struct option *opts;
 
 	cpu_intel = cpu_vendor_intel();
 	opts = setup_options(cpu_intel);
 
 	vcpu = 0;
 	vmname = NULL;
 	assert_lapic_lvt = -1;
 	progname = basename(argv[0]);
 
 	while ((ch = getopt_long(argc, argv, "", opts, NULL)) != -1) {
 		switch (ch) {
 		case 0:
 			break;
 		case VMNAME:
 			vmname = optarg;
 			break;
 		case VCPU:
 			vcpu = atoi(optarg);
 			break;
 		case SET_MEM:
 			memsize = atoi(optarg) * MB;
 			memsize = roundup(memsize, 2 * MB);
 			break;
 		case SET_EFER:
 			efer = strtoul(optarg, NULL, 0);
 			set_efer = 1;
 			break;
 		case SET_CR0:
 			cr0 = strtoul(optarg, NULL, 0);
 			set_cr0 = 1;
 			break;
 		case SET_CR3:
 			cr3 = strtoul(optarg, NULL, 0);
 			set_cr3 = 1;
 			break;
 		case SET_CR4:
 			cr4 = strtoul(optarg, NULL, 0);
 			set_cr4 = 1;
 			break;
 		case SET_DR7:
 			dr7 = strtoul(optarg, NULL, 0);
 			set_dr7 = 1;
 			break;
 		case SET_RSP:
 			rsp = strtoul(optarg, NULL, 0);
 			set_rsp = 1;
 			break;
 		case SET_RIP:
 			rip = strtoul(optarg, NULL, 0);
 			set_rip = 1;
 			break;
 		case SET_RAX:
 			rax = strtoul(optarg, NULL, 0);
 			set_rax = 1;
 			break;
 		case SET_RFLAGS:
 			rflags = strtoul(optarg, NULL, 0);
 			set_rflags = 1;
 			break;
 		case DESC_BASE:
 			desc_base = strtoul(optarg, NULL, 0);
 			break;
 		case DESC_LIMIT:
 			desc_limit = strtoul(optarg, NULL, 0);
 			break;
 		case DESC_ACCESS:
 			desc_access = strtoul(optarg, NULL, 0);
 			break;
 		case SET_CS:
 			cs = strtoul(optarg, NULL, 0);
 			set_cs = 1;
 			break;
 		case SET_DS:
 			ds = strtoul(optarg, NULL, 0);
 			set_ds = 1;
 			break;
 		case SET_ES:
 			es = strtoul(optarg, NULL, 0);
 			set_es = 1;
 			break;
 		case SET_FS:
 			fs = strtoul(optarg, NULL, 0);
 			set_fs = 1;
 			break;
 		case SET_GS:
 			gs = strtoul(optarg, NULL, 0);
 			set_gs = 1;
 			break;
 		case SET_SS:
 			ss = strtoul(optarg, NULL, 0);
 			set_ss = 1;
 			break;
 		case SET_TR:
 			tr = strtoul(optarg, NULL, 0);
 			set_tr = 1;
 			break;
 		case SET_LDTR:
 			ldtr = strtoul(optarg, NULL, 0);
 			set_ldtr = 1;
 			break;
 		case SET_X2APIC_STATE:
 			x2apic_state = strtol(optarg, NULL, 0);
 			set_x2apic_state = 1;
 			break;
 		case SET_EXCEPTION_BITMAP:
 			exception_bitmap = strtoul(optarg, NULL, 0);
 			set_exception_bitmap = 1;
 			break;
 		case SET_VMCS_ENTRY_INTERRUPTION_INFO:
 			vmcs_entry_interruption_info = strtoul(optarg, NULL, 0);
 			set_vmcs_entry_interruption_info = 1;
 			break;
 		case SET_CAP:
 			capval = strtoul(optarg, NULL, 0);
 			setcap = 1;
 			break;
+		case SET_RTC_TIME:
+			rtc_secs = strtoul(optarg, NULL, 0);
+			set_rtc_time = 1;
+			break;
+		case SET_RTC_NVRAM:
+			rtc_nvram_value = (uint8_t)strtoul(optarg, NULL, 0);
+			set_rtc_nvram = 1;
+			break;
+		case RTC_NVRAM_OFFSET:
+			rtc_nvram_offset = strtoul(optarg, NULL, 0);
+			break;
 		case GET_GPA_PMAP:
 			gpa_pmap = strtoul(optarg, NULL, 0);
 			get_gpa_pmap = 1;
 			break;
 		case CAPNAME:
 			capname = optarg;
 			break;
 		case UNASSIGN_PPTDEV:
 			unassign_pptdev = 1;
 			if (sscanf(optarg, "%d/%d/%d", &bus, &slot, &func) != 3)
 				usage(cpu_intel);
 			break;
 		case ASSERT_LAPIC_LVT:
 			assert_lapic_lvt = atoi(optarg);
 			break;
 		default:
 			usage(cpu_intel);
 		}
 	}
 	argc -= optind;
 	argv += optind;
 
 	if (vmname == NULL)
 		usage(cpu_intel);
 
 	error = 0;
 
 	if (!error && create)
 		error = vm_create(vmname);
 
 	if (!error) {
 		ctx = vm_open(vmname);
 		if (ctx == NULL) {
 			printf("VM:%s is not created.\n", vmname);
 			exit (1);
 		}
 	}
 
 	if (!error && memsize)
 		error = vm_setup_memory(ctx, memsize, VM_MMAP_NONE);
 
 	if (!error && set_efer)
 		error = vm_set_register(ctx, vcpu, VM_REG_GUEST_EFER, efer);
 
 	if (!error && set_cr0)
 		error = vm_set_register(ctx, vcpu, VM_REG_GUEST_CR0, cr0);
 
 	if (!error && set_cr3)
 		error = vm_set_register(ctx, vcpu, VM_REG_GUEST_CR3, cr3);
 
 	if (!error && set_cr4)
 		error = vm_set_register(ctx, vcpu, VM_REG_GUEST_CR4, cr4);
 
 	if (!error && set_dr7)
 		error = vm_set_register(ctx, vcpu, VM_REG_GUEST_DR7, dr7);
 
 	if (!error && set_rsp)
 		error = vm_set_register(ctx, vcpu, VM_REG_GUEST_RSP, rsp);
 
 	if (!error && set_rip)
 		error = vm_set_register(ctx, vcpu, VM_REG_GUEST_RIP, rip);
 
 	if (!error && set_rax)
 		error = vm_set_register(ctx, vcpu, VM_REG_GUEST_RAX, rax);
 
 	if (!error && set_rflags) {
 		error = vm_set_register(ctx, vcpu, VM_REG_GUEST_RFLAGS,
 					rflags);
 	}
 
 	if (!error && set_desc_ds) {
 		error = vm_set_desc(ctx, vcpu, VM_REG_GUEST_DS,
 				    desc_base, desc_limit, desc_access);
 	}
 
 	if (!error && set_desc_es) {
 		error = vm_set_desc(ctx, vcpu, VM_REG_GUEST_ES,
 				    desc_base, desc_limit, desc_access);
 	}
 
 	if (!error && set_desc_ss) {
 		error = vm_set_desc(ctx, vcpu, VM_REG_GUEST_SS,
 				    desc_base, desc_limit, desc_access);
 	}
 
 	if (!error && set_desc_cs) {
 		error = vm_set_desc(ctx, vcpu, VM_REG_GUEST_CS,
 				    desc_base, desc_limit, desc_access);
 	}
 
 	if (!error && set_desc_fs) {
 		error = vm_set_desc(ctx, vcpu, VM_REG_GUEST_FS,
 				    desc_base, desc_limit, desc_access);
 	}
 
 	if (!error && set_desc_gs) {
 		error = vm_set_desc(ctx, vcpu, VM_REG_GUEST_GS,
 				    desc_base, desc_limit, desc_access);
 	}
 
 	if (!error && set_desc_tr) {
 		error = vm_set_desc(ctx, vcpu, VM_REG_GUEST_TR,
 				    desc_base, desc_limit, desc_access);
 	}
 
 	if (!error && set_desc_ldtr) {
 		error = vm_set_desc(ctx, vcpu, VM_REG_GUEST_LDTR,
 				    desc_base, desc_limit, desc_access);
 	}
 
 	if (!error && set_desc_gdtr) {
 		error = vm_set_desc(ctx, vcpu, VM_REG_GUEST_GDTR,
 				    desc_base, desc_limit, 0);
 	}
 
 	if (!error && set_desc_idtr) {
 		error = vm_set_desc(ctx, vcpu, VM_REG_GUEST_IDTR,
 				    desc_base, desc_limit, 0);
 	}
 
 	if (!error && set_cs)
 		error = vm_set_register(ctx, vcpu, VM_REG_GUEST_CS, cs);
 
 	if (!error && set_ds)
 		error = vm_set_register(ctx, vcpu, VM_REG_GUEST_DS, ds);
 
 	if (!error && set_es)
 		error = vm_set_register(ctx, vcpu, VM_REG_GUEST_ES, es);
 
 	if (!error && set_fs)
 		error = vm_set_register(ctx, vcpu, VM_REG_GUEST_FS, fs);
 
 	if (!error && set_gs)
 		error = vm_set_register(ctx, vcpu, VM_REG_GUEST_GS, gs);
 
 	if (!error && set_ss)
 		error = vm_set_register(ctx, vcpu, VM_REG_GUEST_SS, ss);
 
 	if (!error && set_tr)
 		error = vm_set_register(ctx, vcpu, VM_REG_GUEST_TR, tr);
 
 	if (!error && set_ldtr)
 		error = vm_set_register(ctx, vcpu, VM_REG_GUEST_LDTR, ldtr);
 
 	if (!error && set_x2apic_state)
 		error = vm_set_x2apic_state(ctx, vcpu, x2apic_state);
 
 	if (!error && unassign_pptdev)
 		error = vm_unassign_pptdev(ctx, bus, slot, func);
 
 	if (!error && set_exception_bitmap) {
 		if (cpu_intel)
 			error = vm_set_vmcs_field(ctx, vcpu,
 						  VMCS_EXCEPTION_BITMAP,
 						  exception_bitmap);
 		else
 			error = vm_set_vmcb_field(ctx, vcpu,
 						  VMCB_OFF_EXC_INTERCEPT,
 						  4, exception_bitmap);
 	}
 
 	if (!error && cpu_intel && set_vmcs_entry_interruption_info) {
 		error = vm_set_vmcs_field(ctx, vcpu, VMCS_ENTRY_INTR_INFO,
 					  vmcs_entry_interruption_info);
 	}
 
 	if (!error && inject_nmi) {
 		error = vm_inject_nmi(ctx, vcpu);
 	}
 
 	if (!error && assert_lapic_lvt != -1) {
 		error = vm_lapic_local_irq(ctx, vcpu, assert_lapic_lvt);
 	}
 
 	if (!error && (get_lowmem || get_all)) {
 		gpa = 0;
 		error = vm_get_memory_seg(ctx, gpa, &len, &wired);
 		if (error == 0)
 			printf("lowmem\t\t0x%016lx/%ld%s\n", gpa, len,
 			    wired ? " wired" : "");
 	}
 
 	if (!error && (get_highmem || get_all)) {
 		gpa = 4 * GB;
 		error = vm_get_memory_seg(ctx, gpa, &len, &wired);
 		if (error == 0)
 			printf("highmem\t\t0x%016lx/%ld%s\n", gpa, len,
 			    wired ? " wired" : "");
 	}
 
 	if (!error)
 		error = get_all_registers(ctx, vcpu);
 
 	if (!error)
 		error = get_all_segments(ctx, vcpu);
 
 	if (!error) {
 		if (cpu_intel)
 			error = get_misc_vmcs(ctx, vcpu);
 		else
 			error = get_misc_vmcb(ctx, vcpu);
 	}
 	
 	if (!error && (get_x2apic_state || get_all)) {
 		error = vm_get_x2apic_state(ctx, vcpu, &x2apic_state);
 		if (error == 0)
 			printf("x2apic_state[%d]\t%d\n", vcpu, x2apic_state);
 	}
 
 	if (!error && (get_eptp || get_all)) {
 		if (cpu_intel)
 			error = vm_get_vmcs_field(ctx, vcpu, VMCS_EPTP, &eptp);
 		else
 			error = vm_get_vmcb_field(ctx, vcpu, VMCB_OFF_NPT_BASE,
 						   8, &eptp);
 		if (error == 0)
 			printf("%s[%d]\t\t0x%016lx\n",
 				cpu_intel ? "eptp" : "rvi/npt", vcpu, eptp);
 	}
 
 	if (!error && (get_exception_bitmap || get_all)) {
 		if(cpu_intel)
 			error = vm_get_vmcs_field(ctx, vcpu,
 						VMCS_EXCEPTION_BITMAP, &bm);
 		else
 			error = vm_get_vmcb_field(ctx, vcpu,
 						  VMCB_OFF_EXC_INTERCEPT,
 						  4, &bm);
 		if (error == 0)
 			printf("exception_bitmap[%d]\t%#lx\n", vcpu, bm);
 	}
 
 	if (!error && (get_io_bitmap || get_all)) {
 		if (cpu_intel) {
 			error = vm_get_vmcs_field(ctx, vcpu, VMCS_IO_BITMAP_A,
 						  &bm);
 			if (error == 0)
 				printf("io_bitmap_a[%d]\t%#lx\n", vcpu, bm);
 			error = vm_get_vmcs_field(ctx, vcpu, VMCS_IO_BITMAP_B,
 						  &bm);
 			if (error == 0)
 				printf("io_bitmap_b[%d]\t%#lx\n", vcpu, bm);
 		} else {
 			error = vm_get_vmcb_field(ctx, vcpu,
 						  VMCB_OFF_IO_PERM, 8, &bm);
 			if (error == 0)
 				printf("io_bitmap[%d]\t%#lx\n", vcpu, bm);
 		}
 	}
 
 	if (!error && (get_tsc_offset || get_all)) {
 		uint64_t tscoff;
 		if (cpu_intel)
 			error = vm_get_vmcs_field(ctx, vcpu, VMCS_TSC_OFFSET,
 						  &tscoff);
 		else
 			error = vm_get_vmcb_field(ctx, vcpu,
 						  VMCB_OFF_TSC_OFFSET, 
 						  8, &tscoff);
 		if (error == 0)
 			printf("tsc_offset[%d]\t0x%016lx\n", vcpu, tscoff);
 	}
 
 	if (!error && (get_msr_bitmap_address || get_all)) {
 		if (cpu_intel)
 			error = vm_get_vmcs_field(ctx, vcpu, VMCS_MSR_BITMAP, 
 						  &addr);
 		else
 			error = vm_get_vmcb_field(ctx, vcpu,
 						  VMCB_OFF_MSR_PERM, 8, &addr);
 		if (error == 0)
 			printf("msr_bitmap[%d]\t\t%#lx\n", vcpu, addr);
 	}
 
 	if (!error && (get_msr_bitmap || get_all)) {
 		if (cpu_intel) {
 			error = vm_get_vmcs_field(ctx, vcpu, 
 						  VMCS_MSR_BITMAP, &addr);
 		} else {
 			error = vm_get_vmcb_field(ctx, vcpu,
 						  VMCB_OFF_MSR_PERM, 8,
 						  &addr);
 		}
 
 		if (error == 0)
 			error = dump_msr_bitmap(vcpu, addr, cpu_intel);
 	}
 
 	if (!error && (get_vpid_asid || get_all)) {
 		uint64_t vpid;
 		if (cpu_intel)
 			error = vm_get_vmcs_field(ctx, vcpu, VMCS_VPID, &vpid);
 		else
 			error = vm_get_vmcb_field(ctx, vcpu, VMCB_OFF_ASID, 
 						  4, &vpid);
 		if (error == 0)
 			printf("%s[%d]\t\t0x%04lx\n", 
 				cpu_intel ? "vpid" : "asid", vcpu, vpid);
 	}
 
 	if (!error && (get_guest_pat || get_all)) {
 		if (cpu_intel)
 			error = vm_get_vmcs_field(ctx, vcpu,
 						  VMCS_GUEST_IA32_PAT, &pat);
 		else
 			error = vm_get_vmcb_field(ctx, vcpu,
 						  VMCB_OFF_GUEST_PAT, 8, &pat);
 		if (error == 0)
 			printf("guest_pat[%d]\t\t0x%016lx\n", vcpu, pat);
 	}
 
 	if (!error && (get_guest_sysenter || get_all)) {
 		if (cpu_intel)
 			error = vm_get_vmcs_field(ctx, vcpu,
 						  VMCS_GUEST_IA32_SYSENTER_CS,
 						  &cs);
 		else
 			error = vm_get_vmcb_field(ctx, vcpu,
 						  VMCB_OFF_SYSENTER_CS, 8,
 						  &cs);
 
 		if (error == 0)
 			printf("guest_sysenter_cs[%d]\t%#lx\n", vcpu, cs);
 		if (cpu_intel)
 			error = vm_get_vmcs_field(ctx, vcpu,
 						  VMCS_GUEST_IA32_SYSENTER_ESP,
 						  &rsp);
 		else
 			error = vm_get_vmcb_field(ctx, vcpu,
 						  VMCB_OFF_SYSENTER_ESP, 8,
 						  &rsp);
 
 		if (error == 0)
 			printf("guest_sysenter_sp[%d]\t%#lx\n", vcpu, rsp);
 		if (cpu_intel)
 			error = vm_get_vmcs_field(ctx, vcpu,
 						  VMCS_GUEST_IA32_SYSENTER_EIP,
 						  &rip);
 		else
 			error = vm_get_vmcb_field(ctx, vcpu,
 						  VMCB_OFF_SYSENTER_EIP, 8, 
 						  &rip);
 		if (error == 0)
 			printf("guest_sysenter_ip[%d]\t%#lx\n", vcpu, rip);
 	}
 
 	if (!error && (get_exit_reason || get_all)) {
 		if (cpu_intel)
 			error = vm_get_vmcs_field(ctx, vcpu, VMCS_EXIT_REASON,
 						  &u64);
 		else	
 			error = vm_get_vmcb_field(ctx, vcpu,
 						  VMCB_OFF_EXIT_REASON, 8,
 						  &u64);
 		if (error == 0)
 			printf("exit_reason[%d]\t%#lx\n", vcpu, u64);
 	}
 
 	if (!error && setcap) {
 		int captype;
 		captype = vm_capability_name2type(capname);
 		error = vm_set_capability(ctx, vcpu, captype, capval);
 		if (error != 0 && errno == ENOENT)
 			printf("Capability \"%s\" is not available\n", capname);
 	}
 
 	if (!error && get_gpa_pmap) {
 		error = vm_get_gpa_pmap(ctx, gpa_pmap, pteval, &ptenum);
 		if (error == 0) {
 			printf("gpa %#lx:", gpa_pmap);
 			pte = &pteval[0];
 			while (ptenum-- > 0)
 				printf(" %#lx", *pte++);
 			printf("\n");
+		}
+	}
+
+	if (!error && set_rtc_nvram)
+		error = vm_rtc_write(ctx, rtc_nvram_offset, rtc_nvram_value);
+
+	if (!error && (get_rtc_nvram || get_all)) {
+		error = vm_rtc_read(ctx, rtc_nvram_offset, &rtc_nvram_value);
+		if (error == 0) {
+			printf("rtc nvram[%03d]: 0x%02x\n", rtc_nvram_offset,
+			    rtc_nvram_value);
+		}
+	}
+
+	if (!error && set_rtc_time)
+		error = vm_rtc_settime(ctx, rtc_secs);
+
+	if (!error && (get_rtc_time || get_all)) {
+		error = vm_rtc_gettime(ctx, &rtc_secs);
+		if (error == 0) {
+			gmtime_r(&rtc_secs, &tm);
+			printf("rtc time %#lx: %s %s %02d %02d:%02d:%02d %d\n",
+			    rtc_secs, wday_str(tm.tm_wday), mon_str(tm.tm_mon),
+			    tm.tm_mday, tm.tm_hour, tm.tm_min, tm.tm_sec,
+			    1900 + tm.tm_year);
 		}
 	}
 
 	if (!error && (getcap || get_all)) {
 		int captype, val, getcaptype;
 
 		if (getcap && capname)
 			getcaptype = vm_capability_name2type(capname);
 		else
 			getcaptype = -1;
 
 		for (captype = 0; captype < VM_CAP_MAX; captype++) {
 			if (getcaptype >= 0 && captype != getcaptype)
 				continue;
 			error = vm_get_capability(ctx, vcpu, captype, &val);
 			if (error == 0) {
 				printf("Capability \"%s\" is %s on vcpu %d\n",
 					vm_capability_type2name(captype),
 					val ? "set" : "not set", vcpu);
 			} else if (errno == ENOENT) {
 				error = 0;
 				printf("Capability \"%s\" is not available\n",
 					vm_capability_type2name(captype));
 			} else {
 				break;
 			}
 		}
 	}
 
 	if (!error && (get_active_cpus || get_all)) {
 		error = vm_active_cpus(ctx, &cpus);
 		if (!error)
 			print_cpus("active cpus", &cpus);
 	}
 
 	if (!error && (get_suspended_cpus || get_all)) {
 		error = vm_suspended_cpus(ctx, &cpus);
 		if (!error)
 			print_cpus("suspended cpus", &cpus);
 	}
 
 	if (!error && (get_intinfo || get_all)) {
 		error = vm_get_intinfo(ctx, vcpu, &info[0], &info[1]);
 		if (!error) {
 			print_intinfo("pending", info[0]);
 			print_intinfo("current", info[1]);
 		}
 	}
 
 	if (!error && (get_stats || get_all)) {
 		int i, num_stats;
 		uint64_t *stats;
 		struct timeval tv;
 		const char *desc;
 
 		stats = vm_get_stats(ctx, vcpu, &tv, &num_stats);
 		if (stats != NULL) {
 			printf("vcpu%d stats:\n", vcpu);
 			for (i = 0; i < num_stats; i++) {
 				desc = vm_get_stat_desc(ctx, i);
 				printf("%-40s\t%ld\n", desc, stats[i]);
 			}
 		}
 	}
 
 	if (!error && run) {
 		error = vm_get_register(ctx, vcpu, VM_REG_GUEST_RIP, &rip);
 		assert(error == 0);
 
 		error = vm_run(ctx, vcpu, rip, &vmexit);
 		if (error == 0)
 			dump_vm_run_exitcode(&vmexit, vcpu);
 		else
 			printf("vm_run error %d\n", error);
 	}
 
 	if (!error && force_reset)
 		error = vm_suspend(ctx, VM_SUSPEND_RESET);
 
 	if (!error && force_poweroff)
 		error = vm_suspend(ctx, VM_SUSPEND_POWEROFF);
 
 	if (error)
 		printf("errno = %d\n", errno);
 
 	if (!error && destroy)
 		vm_destroy(ctx);
 
 	free (opts);
 	exit(error);
 }