Index: projects/bhyve/lib/libvmmapi/vmmapi.c
===================================================================
--- projects/bhyve/lib/libvmmapi/vmmapi.c	(revision 241177)
+++ projects/bhyve/lib/libvmmapi/vmmapi.c	(revision 241178)
@@ -1,724 +1,724 @@
 /*-
  * Copyright (c) 2011 NetApp, Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/types.h>
 #include <sys/sysctl.h>
 #include <sys/ioctl.h>
 #include <sys/mman.h>
 
 #include <machine/specialreg.h>
 
 #include <stdio.h>
 #include <stdlib.h>
 #include <assert.h>
 #include <string.h>
 #include <fcntl.h>
 #include <unistd.h>
 
 #include <machine/vmm.h>
 #include <machine/vmm_dev.h>
 
 #include "vmmapi.h"
 #include "mptable.h"
 
 #define BIOS_ROM_BASE		(0xf0000)
 #define BIOS_ROM_SIZE		(0x10000)
 
 struct vmctx {
 	int	fd;
 	char	*name;
 };
 
 #define	CREATE(x)  sysctlbyname("hw.vmm.create", NULL, NULL, (x), strlen((x)))
 #define	DESTROY(x) sysctlbyname("hw.vmm.destroy", NULL, NULL, (x), strlen((x)))
 
 static int
 vm_device_open(const char *name)
 {
         int fd, len;
         char *vmfile;
 
 	len = strlen("/dev/vmm/") + strlen(name) + 1;
 	vmfile = malloc(len);
 	assert(vmfile != NULL);
 	snprintf(vmfile, len, "/dev/vmm/%s", name);
 
         /* Open the device file */
         fd = open(vmfile, O_RDWR, 0);
 
 	free(vmfile);
         return (fd);
 }
 
 int
 vm_create(const char *name)
 {
 
 	return (CREATE((char *)name));
 }
 
 struct vmctx *
 vm_open(const char *name)
 {
 	struct vmctx *vm;
 
 	vm = malloc(sizeof(struct vmctx) + strlen(name) + 1);
 	assert(vm != NULL);
 
 	vm->fd = -1;
 	vm->name = (char *)(vm + 1);
 	strcpy(vm->name, name);
 
 	if ((vm->fd = vm_device_open(vm->name)) < 0)
 		goto err;
 
 	return (vm);
 err:
 	vm_destroy(vm);
 	return (NULL);
 }
 
 void
 vm_destroy(struct vmctx *vm)
 {
 	assert(vm != NULL);
 
-	DESTROY(vm->name);
 	if (vm->fd >= 0)
 		close(vm->fd);
+	DESTROY(vm->name);
+
 	free(vm);
 }
 
 size_t
 vmm_get_mem_total(void)
 {
 	size_t mem_total = 0;
 	size_t oldlen = sizeof(mem_total);
 	int error;
 	error = sysctlbyname("hw.vmm.mem_total", &mem_total, &oldlen, NULL, 0);
 	if (error)
 		return -1;
 	return mem_total;
 }
 
 size_t
 vmm_get_mem_free(void)
 {
 	size_t mem_free = 0;
 	size_t oldlen = sizeof(mem_free);
 	int error;
 	error = sysctlbyname("hw.vmm.mem_free", &mem_free, &oldlen, NULL, 0);
 	if (error)
 		return -1;
 	return mem_free;
 }
 
 int
 vm_get_memory_seg(struct vmctx *ctx, vm_paddr_t gpa,
 		  vm_paddr_t *ret_hpa, size_t *ret_len)
 {
 	int error;
 	struct vm_memory_segment seg;
 
 	bzero(&seg, sizeof(seg));
 	seg.gpa = gpa;
 	error = ioctl(ctx->fd, VM_GET_MEMORY_SEG, &seg);
-	*ret_hpa = seg.hpa;
 	*ret_len = seg.len;
 	return (error);
 }
 
 int
 vm_setup_memory(struct vmctx *ctx, vm_paddr_t gpa, size_t len, char **mapaddr)
 {
 	int error;
 	struct vm_memory_segment seg;
 
 	/*
 	 * Create and optionally map 'len' bytes of memory at guest
 	 * physical address 'gpa'
 	 */
 	bzero(&seg, sizeof(seg));
 	seg.gpa = gpa;
 	seg.len = len;
 	error = ioctl(ctx->fd, VM_MAP_MEMORY, &seg);
 	if (error == 0 && mapaddr != NULL) {
 		*mapaddr = mmap(NULL, len, PROT_READ | PROT_WRITE, MAP_SHARED,
 				ctx->fd, gpa);
 	}
 	return (error);
 }
 
 char *
 vm_map_memory(struct vmctx *ctx, vm_paddr_t gpa, size_t len)
 {
 
 	/* Map 'len' bytes of memory at guest physical address 'gpa' */
 	return ((char *)mmap(NULL, len, PROT_READ | PROT_WRITE, MAP_SHARED,
 		     ctx->fd, gpa));
 }
 
 int
 vm_set_desc(struct vmctx *ctx, int vcpu, int reg,
 	    uint64_t base, uint32_t limit, uint32_t access)
 {
 	int error;
 	struct vm_seg_desc vmsegdesc;
 
 	bzero(&vmsegdesc, sizeof(vmsegdesc));
 	vmsegdesc.cpuid = vcpu;
 	vmsegdesc.regnum = reg;
 	vmsegdesc.desc.base = base;
 	vmsegdesc.desc.limit = limit;
 	vmsegdesc.desc.access = access;
 
 	error = ioctl(ctx->fd, VM_SET_SEGMENT_DESCRIPTOR, &vmsegdesc);
 	return (error);
 }
 
 int
 vm_get_desc(struct vmctx *ctx, int vcpu, int reg,
 	    uint64_t *base, uint32_t *limit, uint32_t *access)
 {
 	int error;
 	struct vm_seg_desc vmsegdesc;
 
 	bzero(&vmsegdesc, sizeof(vmsegdesc));
 	vmsegdesc.cpuid = vcpu;
 	vmsegdesc.regnum = reg;
 
 	error = ioctl(ctx->fd, VM_GET_SEGMENT_DESCRIPTOR, &vmsegdesc);
 	if (error == 0) {
 		*base = vmsegdesc.desc.base;
 		*limit = vmsegdesc.desc.limit;
 		*access = vmsegdesc.desc.access;
 	}
 	return (error);
 }
 
 int
 vm_set_register(struct vmctx *ctx, int vcpu, int reg, uint64_t val)
 {
 	int error;
 	struct vm_register vmreg;
 
 	bzero(&vmreg, sizeof(vmreg));
 	vmreg.cpuid = vcpu;
 	vmreg.regnum = reg;
 	vmreg.regval = val;
 
 	error = ioctl(ctx->fd, VM_SET_REGISTER, &vmreg);
 	return (error);
 }
 
 int
 vm_get_register(struct vmctx *ctx, int vcpu, int reg, uint64_t *ret_val)
 {
 	int error;
 	struct vm_register vmreg;
 
 	bzero(&vmreg, sizeof(vmreg));
 	vmreg.cpuid = vcpu;
 	vmreg.regnum = reg;
 
 	error = ioctl(ctx->fd, VM_GET_REGISTER, &vmreg);
 	*ret_val = vmreg.regval;
 	return (error);
 }
 
 int
 vm_get_pinning(struct vmctx *ctx, int vcpu, int *host_cpuid)
 {
 	int error;
 	struct vm_pin vmpin;
 
 	bzero(&vmpin, sizeof(vmpin));
 	vmpin.vm_cpuid = vcpu;
 
 	error = ioctl(ctx->fd, VM_GET_PINNING, &vmpin);
 	*host_cpuid = vmpin.host_cpuid;
 	return (error);
 }
 
 int
 vm_set_pinning(struct vmctx *ctx, int vcpu, int host_cpuid)
 {
 	int error;
 	struct vm_pin vmpin;
 
 	bzero(&vmpin, sizeof(vmpin));
 	vmpin.vm_cpuid = vcpu;
 	vmpin.host_cpuid = host_cpuid;
 
 	error = ioctl(ctx->fd, VM_SET_PINNING, &vmpin);
 	return (error);
 }
 
 int
 vm_run(struct vmctx *ctx, int vcpu, uint64_t rip, struct vm_exit *vmexit)
 {
 	int error;
 	struct vm_run vmrun;
 
 	bzero(&vmrun, sizeof(vmrun));
 	vmrun.cpuid = vcpu;
 	vmrun.rip = rip;
 
 	error = ioctl(ctx->fd, VM_RUN, &vmrun);
 	bcopy(&vmrun.vm_exit, vmexit, sizeof(struct vm_exit));
 	return (error);
 }
 
 static int
 vm_inject_event_real(struct vmctx *ctx, int vcpu, enum vm_event_type type,
 		     int vector, int error_code, int error_code_valid)
 {
 	struct vm_event ev;
 
 	bzero(&ev, sizeof(ev));
 	ev.cpuid = vcpu;
 	ev.type = type;
 	ev.vector = vector;
 	ev.error_code = error_code;
 	ev.error_code_valid = error_code_valid;
 
 	return (ioctl(ctx->fd, VM_INJECT_EVENT, &ev));
 }
 
 int
 vm_inject_event(struct vmctx *ctx, int vcpu, enum vm_event_type type,
 		int vector)
 {
 
 	return (vm_inject_event_real(ctx, vcpu, type, vector, 0, 0));
 }
 
 int
 vm_inject_event2(struct vmctx *ctx, int vcpu, enum vm_event_type type,
 		 int vector, int error_code)
 {
 
 	return (vm_inject_event_real(ctx, vcpu, type, vector, error_code, 1));
 }
 
 int
 vm_build_tables(struct vmctx *ctxt, int ncpu, int ioapic,
 		void *oemtbl, int oemtblsz)
 {
 
 	return (vm_build_mptable(ctxt, BIOS_ROM_BASE, BIOS_ROM_SIZE, ncpu,
 				 ioapic, oemtbl, oemtblsz));
 }
 
 int
 vm_apicid2vcpu(struct vmctx *ctx, int apicid)
 {
 	/*
 	 * The apic id associated with the 'vcpu' has the same numerical value
 	 * as the 'vcpu' itself.
 	 */
 	return (apicid);
 }
 
 int
 vm_lapic_irq(struct vmctx *ctx, int vcpu, int vector)
 {
 	struct vm_lapic_irq vmirq;
 
 	bzero(&vmirq, sizeof(vmirq));
 	vmirq.cpuid = vcpu;
 	vmirq.vector = vector;
 
 	return (ioctl(ctx->fd, VM_LAPIC_IRQ, &vmirq));
 }
 
 int
 vm_inject_nmi(struct vmctx *ctx, int vcpu)
 {
 	struct vm_nmi vmnmi;
 
 	bzero(&vmnmi, sizeof(vmnmi));
 	vmnmi.cpuid = vcpu;
 
 	return (ioctl(ctx->fd, VM_INJECT_NMI, &vmnmi));
 }
 
 int
 vm_capability_name2type(const char *capname)
 {
 	int i;
 
 	static struct {
 		const char	*name;
 		int		type;
 	} capstrmap[] = {
 		{ "hlt_exit",		VM_CAP_HALT_EXIT },
 		{ "mtrap_exit",		VM_CAP_MTRAP_EXIT },
 		{ "pause_exit",		VM_CAP_PAUSE_EXIT },
 		{ "unrestricted_guest",	VM_CAP_UNRESTRICTED_GUEST },
 		{ 0 }
 	};
 
 	for (i = 0; capstrmap[i].name != NULL && capname != NULL; i++) {
 		if (strcmp(capstrmap[i].name, capname) == 0)
 			return (capstrmap[i].type);
 	}
 
 	return (-1);
 }
 
 int
 vm_get_capability(struct vmctx *ctx, int vcpu, enum vm_cap_type cap,
 		  int *retval)
 {
 	int error;
 	struct vm_capability vmcap;
 
 	bzero(&vmcap, sizeof(vmcap));
 	vmcap.cpuid = vcpu;
 	vmcap.captype = cap;
 
 	error = ioctl(ctx->fd, VM_GET_CAPABILITY, &vmcap);
 	*retval = vmcap.capval;
 	return (error);
 }
 
 int
 vm_set_capability(struct vmctx *ctx, int vcpu, enum vm_cap_type cap, int val)
 {
 	struct vm_capability vmcap;
 
 	bzero(&vmcap, sizeof(vmcap));
 	vmcap.cpuid = vcpu;
 	vmcap.captype = cap;
 	vmcap.capval = val;
 	
 	return (ioctl(ctx->fd, VM_SET_CAPABILITY, &vmcap));
 }
 
 int
 vm_assign_pptdev(struct vmctx *ctx, int bus, int slot, int func)
 {
 	struct vm_pptdev pptdev;
 
 	bzero(&pptdev, sizeof(pptdev));
 	pptdev.bus = bus;
 	pptdev.slot = slot;
 	pptdev.func = func;
 
 	return (ioctl(ctx->fd, VM_BIND_PPTDEV, &pptdev));
 }
 
 int
 vm_unassign_pptdev(struct vmctx *ctx, int bus, int slot, int func)
 {
 	struct vm_pptdev pptdev;
 
 	bzero(&pptdev, sizeof(pptdev));
 	pptdev.bus = bus;
 	pptdev.slot = slot;
 	pptdev.func = func;
 
 	return (ioctl(ctx->fd, VM_UNBIND_PPTDEV, &pptdev));
 }
 
 int
 vm_map_pptdev_mmio(struct vmctx *ctx, int bus, int slot, int func,
 		   vm_paddr_t gpa, size_t len, vm_paddr_t hpa)
 {
 	struct vm_pptdev_mmio pptmmio;
 
 	bzero(&pptmmio, sizeof(pptmmio));
 	pptmmio.bus = bus;
 	pptmmio.slot = slot;
 	pptmmio.func = func;
 	pptmmio.gpa = gpa;
 	pptmmio.len = len;
 	pptmmio.hpa = hpa;
 
 	return (ioctl(ctx->fd, VM_MAP_PPTDEV_MMIO, &pptmmio));
 }
 
 int
 vm_setup_msi(struct vmctx *ctx, int vcpu, int bus, int slot, int func,
 	     int destcpu, int vector, int numvec)
 {
 	struct vm_pptdev_msi pptmsi;
 
 	bzero(&pptmsi, sizeof(pptmsi));
 	pptmsi.vcpu = vcpu;
 	pptmsi.bus = bus;
 	pptmsi.slot = slot;
 	pptmsi.func = func;
 	pptmsi.destcpu = destcpu;
 	pptmsi.vector = vector;
 	pptmsi.numvec = numvec;
 
 	return (ioctl(ctx->fd, VM_PPTDEV_MSI, &pptmsi));
 }
 
 int	
 vm_setup_msix(struct vmctx *ctx, int vcpu, int bus, int slot, int func,
 	      int idx, uint32_t msg, uint32_t vector_control, uint64_t addr)
 {
 	struct vm_pptdev_msix pptmsix;
 
 	bzero(&pptmsix, sizeof(pptmsix));
 	pptmsix.vcpu = vcpu;
 	pptmsix.bus = bus;
 	pptmsix.slot = slot;
 	pptmsix.func = func;
 	pptmsix.idx = idx;
 	pptmsix.msg = msg;
 	pptmsix.addr = addr;
 	pptmsix.vector_control = vector_control;
 
 	return ioctl(ctx->fd, VM_PPTDEV_MSIX, &pptmsix);
 }
 
 uint64_t *
 vm_get_stats(struct vmctx *ctx, int vcpu, struct timeval *ret_tv,
 	     int *ret_entries)
 {
 	int error;
 
 	static struct vm_stats vmstats;
 
 	vmstats.cpuid = vcpu;
 
 	error = ioctl(ctx->fd, VM_STATS, &vmstats);
 	if (error == 0) {
 		if (ret_entries)
 			*ret_entries = vmstats.num_entries;
 		if (ret_tv)
 			*ret_tv = vmstats.tv;
 		return (vmstats.statbuf);
 	} else
 		return (NULL);
 }
 
 const char *
 vm_get_stat_desc(struct vmctx *ctx, int index)
 {
 	static struct vm_stat_desc statdesc;
 
 	statdesc.index = index;
 	if (ioctl(ctx->fd, VM_STAT_DESC, &statdesc) == 0)
 		return (statdesc.desc);
 	else
 		return (NULL);
 }
 
 int
 vm_get_x2apic_state(struct vmctx *ctx, int vcpu, enum x2apic_state *state)
 {
 	int error;
 	struct vm_x2apic x2apic;
 
 	bzero(&x2apic, sizeof(x2apic));
 	x2apic.cpuid = vcpu;
 
 	error = ioctl(ctx->fd, VM_GET_X2APIC_STATE, &x2apic);
 	*state = x2apic.state;
 	return (error);
 }
 
 int
 vm_set_x2apic_state(struct vmctx *ctx, int vcpu, enum x2apic_state state)
 {
 	int error;
 	struct vm_x2apic x2apic;
 
 	bzero(&x2apic, sizeof(x2apic));
 	x2apic.cpuid = vcpu;
 	x2apic.state = state;
 
 	error = ioctl(ctx->fd, VM_SET_X2APIC_STATE, &x2apic);
 
 	return (error);
 }
 
 /*
  * From Intel Vol 3a:
  * Table 9-1. IA-32 Processor States Following Power-up, Reset or INIT
  */
 int
 vcpu_reset(struct vmctx *vmctx, int vcpu)
 {
 	int error;
 	uint64_t rflags, rip, cr0, cr4, zero, desc_base, rdx;
 	uint32_t desc_access, desc_limit;
 	uint16_t sel;
 
 	zero = 0;
 
 	rflags = 0x2;
 	error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RFLAGS, rflags);
 	if (error)
 		goto done;
 
 	rip = 0xfff0;
 	if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RIP, rip)) != 0)
 		goto done;
 
 	cr0 = CR0_NE;
 	if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_CR0, cr0)) != 0)
 		goto done;
 
 	if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_CR3, zero)) != 0)
 		goto done;
 	
 	cr4 = 0;
 	if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_CR4, cr4)) != 0)
 		goto done;
 
 	/*
 	 * CS: present, r/w, accessed, 16-bit, byte granularity, usable
 	 */
 	desc_base = 0xffff0000;
 	desc_limit = 0xffff;
 	desc_access = 0x0093;
 	error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_CS,
 			    desc_base, desc_limit, desc_access);
 	if (error)
 		goto done;
 
 	sel = 0xf000;
 	if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_CS, sel)) != 0)
 		goto done;
 
 	/*
 	 * SS,DS,ES,FS,GS: present, r/w, accessed, 16-bit, byte granularity
 	 */
 	desc_base = 0;
 	desc_limit = 0xffff;
 	desc_access = 0x0093;
 	error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_SS,
 			    desc_base, desc_limit, desc_access);
 	if (error)
 		goto done;
 
 	error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_DS,
 			    desc_base, desc_limit, desc_access);
 	if (error)
 		goto done;
 
 	error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_ES,
 			    desc_base, desc_limit, desc_access);
 	if (error)
 		goto done;
 
 	error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_FS,
 			    desc_base, desc_limit, desc_access);
 	if (error)
 		goto done;
 
 	error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_GS,
 			    desc_base, desc_limit, desc_access);
 	if (error)
 		goto done;
 
 	sel = 0;
 	if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_SS, sel)) != 0)
 		goto done;
 	if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_DS, sel)) != 0)
 		goto done;
 	if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_ES, sel)) != 0)
 		goto done;
 	if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_FS, sel)) != 0)
 		goto done;
 	if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_GS, sel)) != 0)
 		goto done;
 
 	/* General purpose registers */
 	rdx = 0xf00;
 	if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RAX, zero)) != 0)
 		goto done;
 	if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RBX, zero)) != 0)
 		goto done;
 	if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RCX, zero)) != 0)
 		goto done;
 	if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RDX, rdx)) != 0)
 		goto done;
 	if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RSI, zero)) != 0)
 		goto done;
 	if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RDI, zero)) != 0)
 		goto done;
 	if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RBP, zero)) != 0)
 		goto done;
 	if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RSP, zero)) != 0)
 		goto done;
 
 	/* GDTR, IDTR */
 	desc_base = 0;
 	desc_limit = 0xffff;
 	desc_access = 0;
 	error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_GDTR,
 			    desc_base, desc_limit, desc_access);
 	if (error != 0)
 		goto done;
 
 	error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_IDTR,
 			    desc_base, desc_limit, desc_access);
 	if (error != 0)
 		goto done;
 
 	/* TR */
 	desc_base = 0;
 	desc_limit = 0xffff;
 	desc_access = 0x0000008b;
 	error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_TR, 0, 0, desc_access);
 	if (error)
 		goto done;
 
 	sel = 0;
 	if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_TR, sel)) != 0)
 		goto done;
 
 	/* LDTR */
 	desc_base = 0;
 	desc_limit = 0xffff;
 	desc_access = 0x00000082;
 	error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_LDTR, desc_base,
 			    desc_limit, desc_access);
 	if (error)
 		goto done;
 
 	sel = 0;
 	if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_LDTR, 0)) != 0)
 		goto done;
 
 	/* XXX cr2, debug registers */
 
 	error = 0;
 done:
 	return (error);
 }
Index: projects/bhyve/sys/amd64/include/vmm_dev.h
===================================================================
--- projects/bhyve/sys/amd64/include/vmm_dev.h	(revision 241177)
+++ projects/bhyve/sys/amd64/include/vmm_dev.h	(revision 241178)
@@ -1,216 +1,215 @@
 /*-
  * Copyright (c) 2011 NetApp, Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD: vmm_dev.h 482 2011-05-09 21:22:43Z grehan $
  */
 
 #ifndef	_VMM_DEV_H_
 #define	_VMM_DEV_H_
 
 #ifdef _KERNEL
 void	vmmdev_init(void);
 void	vmmdev_cleanup(void);
 #endif
 
 struct vm_memory_segment {
-	vm_paddr_t	hpa;	/* out */
 	vm_paddr_t	gpa;	/* in */
 	size_t		len;	/* in */
 };
 
 struct vm_register {
 	int		cpuid;
 	int		regnum;		/* enum vm_reg_name */
 	uint64_t	regval;
 };
 
 struct vm_seg_desc {			/* data or code segment */
 	int		cpuid;
 	int		regnum;		/* enum vm_reg_name */
 	struct seg_desc desc;
 };
 
 struct vm_pin {
 	int		vm_cpuid;
 	int		host_cpuid;	/* -1 to unpin */
 };
 
 struct vm_run {
 	int		cpuid;
 	uint64_t	rip;		/* start running here */
 	struct vm_exit	vm_exit;
 };
 
 struct vm_event {
 	int		cpuid;
 	enum vm_event_type type;
 	int		vector;
 	uint32_t	error_code;
 	int		error_code_valid;
 };
 
 struct vm_lapic_irq {
 	int		cpuid;
 	int		vector;
 };
 
 struct vm_capability {
 	int		cpuid;
 	enum vm_cap_type captype;
 	int		capval;
 	int		allcpus;
 };
 
 struct vm_pptdev {
 	int		bus;
 	int		slot;
 	int		func;
 };
 
 struct vm_pptdev_mmio {
 	int		bus;
 	int		slot;
 	int		func;
 	vm_paddr_t	gpa;
 	vm_paddr_t	hpa;
 	size_t		len;
 };
 
 struct vm_pptdev_msi {
 	int		vcpu;
 	int		bus;
 	int		slot;
 	int		func;
 	int		numvec;		/* 0 means disabled */
 	int		vector;
 	int		destcpu;
 };
 
 struct vm_pptdev_msix {
 	int		vcpu;
 	int		bus;
 	int		slot;
 	int		func;
 	int		idx;
 	uint32_t	msg;
 	uint32_t	vector_control;
 	uint64_t	addr;
 };
 
 struct vm_nmi {
 	int		cpuid;
 };
 
 #define	MAX_VM_STATS	64
 struct vm_stats {
 	int		cpuid;				/* in */
 	int		num_entries;			/* out */
 	struct timeval	tv;
 	uint64_t	statbuf[MAX_VM_STATS];
 };
 
 struct vm_stat_desc {
 	int		index;				/* in */
 	char		desc[128];			/* out */
 };
 
 struct vm_x2apic {
 	int			cpuid;
 	enum x2apic_state	state;
 };
 
 enum {
 	IOCNUM_RUN,
 	IOCNUM_SET_PINNING,
 	IOCNUM_GET_PINNING,
 	IOCNUM_MAP_MEMORY,
 	IOCNUM_GET_MEMORY_SEG,
 	IOCNUM_SET_REGISTER,
 	IOCNUM_GET_REGISTER,
 	IOCNUM_SET_SEGMENT_DESCRIPTOR,
 	IOCNUM_GET_SEGMENT_DESCRIPTOR,
 	IOCNUM_INJECT_EVENT,
 	IOCNUM_LAPIC_IRQ,
 	IOCNUM_SET_CAPABILITY,
 	IOCNUM_GET_CAPABILITY,
 	IOCNUM_BIND_PPTDEV,
 	IOCNUM_UNBIND_PPTDEV,
 	IOCNUM_MAP_PPTDEV_MMIO,
 	IOCNUM_PPTDEV_MSI,
 	IOCNUM_PPTDEV_MSIX,
 	IOCNUM_INJECT_NMI,
 	IOCNUM_VM_STATS,
 	IOCNUM_VM_STAT_DESC,
 	IOCNUM_SET_X2APIC_STATE,
 	IOCNUM_GET_X2APIC_STATE,
 };
 
 #define	VM_RUN		\
 	_IOWR('v', IOCNUM_RUN, struct vm_run)
 #define	VM_SET_PINNING	\
 	_IOW('v', IOCNUM_SET_PINNING, struct vm_pin)
 #define	VM_GET_PINNING	\
 	_IOWR('v', IOCNUM_GET_PINNING, struct vm_pin)
 #define	VM_MAP_MEMORY	\
 	_IOWR('v', IOCNUM_MAP_MEMORY, struct vm_memory_segment)
 #define	VM_GET_MEMORY_SEG \
 	_IOWR('v', IOCNUM_GET_MEMORY_SEG, struct vm_memory_segment)
 #define	VM_SET_REGISTER \
 	_IOW('v', IOCNUM_SET_REGISTER, struct vm_register)
 #define	VM_GET_REGISTER \
 	_IOWR('v', IOCNUM_GET_REGISTER, struct vm_register)
 #define	VM_SET_SEGMENT_DESCRIPTOR \
 	_IOW('v', IOCNUM_SET_SEGMENT_DESCRIPTOR, struct vm_seg_desc)
 #define	VM_GET_SEGMENT_DESCRIPTOR \
 	_IOWR('v', IOCNUM_GET_SEGMENT_DESCRIPTOR, struct vm_seg_desc)
 #define	VM_INJECT_EVENT	\
 	_IOW('v', IOCNUM_INJECT_EVENT, struct vm_event)
 #define	VM_LAPIC_IRQ 		\
 	_IOW('v', IOCNUM_LAPIC_IRQ, struct vm_lapic_irq)
 #define	VM_SET_CAPABILITY \
 	_IOW('v', IOCNUM_SET_CAPABILITY, struct vm_capability)
 #define	VM_GET_CAPABILITY \
 	_IOWR('v', IOCNUM_GET_CAPABILITY, struct vm_capability)
 #define	VM_BIND_PPTDEV \
 	_IOW('v', IOCNUM_BIND_PPTDEV, struct vm_pptdev)
 #define	VM_UNBIND_PPTDEV \
 	_IOW('v', IOCNUM_UNBIND_PPTDEV, struct vm_pptdev)
 #define	VM_MAP_PPTDEV_MMIO \
 	_IOW('v', IOCNUM_MAP_PPTDEV_MMIO, struct vm_pptdev_mmio)
 #define	VM_PPTDEV_MSI \
 	_IOW('v', IOCNUM_PPTDEV_MSI, struct vm_pptdev_msi)
 #define	VM_PPTDEV_MSIX \
 	_IOW('v', IOCNUM_PPTDEV_MSIX, struct vm_pptdev_msix)
 #define VM_INJECT_NMI \
 	_IOW('v', IOCNUM_INJECT_NMI, struct vm_nmi)
 #define	VM_STATS \
 	_IOWR('v', IOCNUM_VM_STATS, struct vm_stats)
 #define	VM_STAT_DESC \
 	_IOWR('v', IOCNUM_VM_STAT_DESC, struct vm_stat_desc)
 #define	VM_SET_X2APIC_STATE \
 	_IOW('v', IOCNUM_SET_X2APIC_STATE, struct vm_x2apic)
 #define	VM_GET_X2APIC_STATE \
 	_IOWR('v', IOCNUM_GET_X2APIC_STATE, struct vm_x2apic)
 #endif
Index: projects/bhyve/sys/amd64/vmm/io/ppt.c
===================================================================
--- projects/bhyve/sys/amd64/vmm/io/ppt.c	(revision 241177)
+++ projects/bhyve/sys/amd64/vmm/io/ppt.c	(revision 241178)
@@ -1,623 +1,622 @@
 /*-
  * Copyright (c) 2011 NetApp, Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/malloc.h>
 #include <sys/module.h>
 #include <sys/bus.h>
 #include <sys/pciio.h>
 #include <sys/rman.h>
 #include <sys/smp.h>
 
 #include <dev/pci/pcivar.h>
 #include <dev/pci/pcireg.h>
 
 #include <machine/resource.h>
 
 #include <machine/vmm.h>
 #include <machine/vmm_dev.h>
 
 #include "vmm_lapic.h"
 #include "vmm_ktr.h"
 
 #include "iommu.h"
 #include "ppt.h"
 
 #define	MAX_PPTDEVS	(sizeof(pptdevs) / sizeof(pptdevs[0]))
 #define	MAX_MMIOSEGS	(PCIR_MAX_BAR_0 + 1)
 #define	MAX_MSIMSGS	32
 
 MALLOC_DEFINE(M_PPTMSIX, "pptmsix", "Passthru MSI-X resources");
 
 struct pptintr_arg {				/* pptintr(pptintr_arg) */
 	struct pptdev	*pptdev;
 	int		vec;
 	int 		vcpu;
 };
 
 static struct pptdev {
 	device_t	dev;
 	struct vm	*vm;			/* owner of this device */
 	struct vm_memory_segment mmio[MAX_MMIOSEGS];
 	struct {
 		int	num_msgs;		/* guest state */
 		int	vector;
 		int	vcpu;
 
 		int	startrid;		/* host state */
 		struct resource *res[MAX_MSIMSGS];
 		void	*cookie[MAX_MSIMSGS];
 		struct pptintr_arg arg[MAX_MSIMSGS];
 	} msi;
 
 	struct {
 		int num_msgs;
 		int startrid;
 		int msix_table_rid;
 		struct resource *msix_table_res;
 		struct resource **res;
 		void **cookie;
 		struct pptintr_arg *arg;
 	} msix;
 } pptdevs[32];
 
 static int num_pptdevs;
 
 static int
 ppt_probe(device_t dev)
 {
 	int bus, slot, func;
 	struct pci_devinfo *dinfo;
 
 	dinfo = (struct pci_devinfo *)device_get_ivars(dev);
 
 	bus = pci_get_bus(dev);
 	slot = pci_get_slot(dev);
 	func = pci_get_function(dev);
 
 	/*
 	 * To qualify as a pci passthrough device a device must:
 	 * - be allowed by administrator to be used in this role
 	 * - be an endpoint device
 	 */
 	if (vmm_is_pptdev(bus, slot, func) &&
 	    (dinfo->cfg.hdrtype & PCIM_HDRTYPE) == PCIM_HDRTYPE_NORMAL)
 		return (0);
 	else
 		return (ENXIO);
 }
 
 static int
 ppt_attach(device_t dev)
 {
 	int n;
 
 	if (num_pptdevs >= MAX_PPTDEVS) {
 		printf("ppt_attach: maximum number of pci passthrough devices "
 		       "exceeded\n");
 		return (ENXIO);
 	}
 
 	n = num_pptdevs++;
 	pptdevs[n].dev = dev;
 
 	if (bootverbose)
 		device_printf(dev, "attached\n");
 
 	return (0);
 }
 
 static int
 ppt_detach(device_t dev)
 {
 	/*
 	 * XXX check whether there are any pci passthrough devices assigned
 	 * to guests before we allow this driver to detach.
 	 */
 
 	return (0);
 }
 
 static device_method_t ppt_methods[] = {
 	/* Device interface */
 	DEVMETHOD(device_probe,		ppt_probe),
 	DEVMETHOD(device_attach,	ppt_attach),
 	DEVMETHOD(device_detach,	ppt_detach),
 	{0, 0}
 };
 
 static devclass_t ppt_devclass;
 DEFINE_CLASS_0(ppt, ppt_driver, ppt_methods, 0);
 DRIVER_MODULE(ppt, pci, ppt_driver, ppt_devclass, NULL, NULL);
 
 static struct pptdev *
 ppt_find(int bus, int slot, int func)
 {
 	device_t dev;
 	int i, b, s, f;
 
 	for (i = 0; i < num_pptdevs; i++) {
 		dev = pptdevs[i].dev;
 		b = pci_get_bus(dev);
 		s = pci_get_slot(dev);
 		f = pci_get_function(dev);
 		if (bus == b && slot == s && func == f)
 			return (&pptdevs[i]);
 	}
 	return (NULL);
 }
 
 static void
 ppt_unmap_mmio(struct vm *vm, struct pptdev *ppt)
 {
 	int i;
 	struct vm_memory_segment *seg;
 
 	for (i = 0; i < MAX_MMIOSEGS; i++) {
 		seg = &ppt->mmio[i];
 		if (seg->len == 0)
 			continue;
 		(void)vm_unmap_mmio(vm, seg->gpa, seg->len);
 		bzero(seg, sizeof(struct vm_memory_segment));
 	}
 }
 
 static void
 ppt_teardown_msi(struct pptdev *ppt)
 {
 	int i, rid;
 	void *cookie;
 	struct resource *res;
 
 	if (ppt->msi.num_msgs == 0)
 		return;
 
 	for (i = 0; i < ppt->msi.num_msgs; i++) {
 		rid = ppt->msi.startrid + i;
 		res = ppt->msi.res[i];
 		cookie = ppt->msi.cookie[i];
 
 		if (cookie != NULL)
 			bus_teardown_intr(ppt->dev, res, cookie);
 
 		if (res != NULL)
 			bus_release_resource(ppt->dev, SYS_RES_IRQ, rid, res);
 		
 		ppt->msi.res[i] = NULL;
 		ppt->msi.cookie[i] = NULL;
 	}
 
 	if (ppt->msi.startrid == 1)
 		pci_release_msi(ppt->dev);
 
 	ppt->msi.num_msgs = 0;
 }
 
 static void 
 ppt_teardown_msix_intr(struct pptdev *ppt, int idx)
 {
 	int rid;
 	struct resource *res;
 	void *cookie;
 
 	rid = ppt->msix.startrid + idx;
 	res = ppt->msix.res[idx];
 	cookie = ppt->msix.cookie[idx];
 
 	if (cookie != NULL) 
 		bus_teardown_intr(ppt->dev, res, cookie);
 
 	if (res != NULL) 
 		bus_release_resource(ppt->dev, SYS_RES_IRQ, rid, res);
 
 	ppt->msix.res[idx] = NULL;
 	ppt->msix.cookie[idx] = NULL;
 }
 
 static void 
 ppt_teardown_msix(struct pptdev *ppt)
 {
 	int i, error;
 
 	if (ppt->msix.num_msgs == 0) 
 		return;
 
 	for (i = 0; i < ppt->msix.num_msgs; i++) 
 		ppt_teardown_msix_intr(ppt, i);
 
 	if (ppt->msix.msix_table_res) {
 		bus_release_resource(ppt->dev, SYS_RES_MEMORY, 
 				     ppt->msix.msix_table_rid,
 				     ppt->msix.msix_table_res);
 		ppt->msix.msix_table_res = NULL;
 		ppt->msix.msix_table_rid = 0;
 	}
 
 	free(ppt->msix.res, M_PPTMSIX);
 	free(ppt->msix.cookie, M_PPTMSIX);
 	free(ppt->msix.arg, M_PPTMSIX);
 
 	error = pci_release_msi(ppt->dev);
 	if (error) 
 		printf("ppt_teardown_msix: Failed to release MSI-X resources (error %i)\n", error);
 
 	ppt->msix.num_msgs = 0;
 }
 
 int
 ppt_assign_device(struct vm *vm, int bus, int slot, int func)
 {
 	struct pptdev *ppt;
 
 	ppt = ppt_find(bus, slot, func);
 	if (ppt != NULL) {
 		/*
 		 * If this device is owned by a different VM then we
 		 * cannot change its owner.
 		 */
 		if (ppt->vm != NULL && ppt->vm != vm)
 			return (EBUSY);
 
 		ppt->vm = vm;
 		iommu_add_device(vm_iommu_domain(vm), bus, slot, func);
 		return (0);
 	}
 	return (ENOENT);
 }
 
 int
 ppt_unassign_device(struct vm *vm, int bus, int slot, int func)
 {
 	struct pptdev *ppt;
 
 	ppt = ppt_find(bus, slot, func);
 	if (ppt != NULL) {
 		/*
 		 * If this device is not owned by this 'vm' then bail out.
 		 */
 		if (ppt->vm != vm)
 			return (EBUSY);
 		ppt_unmap_mmio(vm, ppt);
 		ppt_teardown_msi(ppt);
 		ppt_teardown_msix(ppt);
 		iommu_remove_device(vm_iommu_domain(vm), bus, slot, func);
 		ppt->vm = NULL;
 		return (0);
 	}
 	return (ENOENT);
 }
 
 int
 ppt_unassign_all(struct vm *vm)
 {
 	int i, bus, slot, func;
 	device_t dev;
 
 	for (i = 0; i < num_pptdevs; i++) {
 		if (pptdevs[i].vm == vm) {
 			dev = pptdevs[i].dev;
 			bus = pci_get_bus(dev);
 			slot = pci_get_slot(dev);
 			func = pci_get_function(dev);
 			ppt_unassign_device(vm, bus, slot, func);
 		}
 	}
 
 	return (0);
 }
 
 int
 ppt_map_mmio(struct vm *vm, int bus, int slot, int func,
 	     vm_paddr_t gpa, size_t len, vm_paddr_t hpa)
 {
 	int i, error;
 	struct vm_memory_segment *seg;
 	struct pptdev *ppt;
 
 	ppt = ppt_find(bus, slot, func);
 	if (ppt != NULL) {
 		if (ppt->vm != vm)
 			return (EBUSY);
 
 		for (i = 0; i < MAX_MMIOSEGS; i++) {
 			seg = &ppt->mmio[i];
 			if (seg->len == 0) {
 				error = vm_map_mmio(vm, gpa, len, hpa);
 				if (error == 0) {
 					seg->gpa = gpa;
 					seg->len = len;
-					seg->hpa = hpa;
 				}
 				return (error);
 			}
 		}
 		return (ENOSPC);
 	}
 	return (ENOENT);
 }
 
 static int
 pptintr(void *arg)
 {
 	int vec;
 	struct pptdev *ppt;
 	struct pptintr_arg *pptarg;
 	
 	pptarg = arg;
 	ppt = pptarg->pptdev;
 	vec = pptarg->vec;
 
 	if (ppt->vm != NULL)
 		(void) lapic_set_intr(ppt->vm, pptarg->vcpu, vec);
 	else {
 		/*
 		 * XXX
 		 * This is not expected to happen - panic?
 		 */
 	}
 
 	/*
 	 * For legacy interrupts give other filters a chance in case
 	 * the interrupt was not generated by the passthrough device.
 	 */
 	if (ppt->msi.startrid == 0)
 		return (FILTER_STRAY);
 	else
 		return (FILTER_HANDLED);
 }
 
 /*
  * XXX
  * When we try to free the MSI resource the kernel will bind the thread to
  * the host cpu was originally handling the MSI. The function freeing the
  * MSI vector (apic_free_vector()) will panic the kernel if the thread
  * is already bound to a cpu.
  * 
  * So, we temporarily unbind the vcpu thread before freeing the MSI resource.
  */
 static void
 PPT_TEARDOWN_MSI(struct vm *vm, int vcpu, struct pptdev *ppt)
 {
 	int pincpu = -1;
 
 	vm_get_pinning(vm, vcpu, &pincpu);
 
 	if (pincpu >= 0)
 		vm_set_pinning(vm, vcpu, -1);
 
 	ppt_teardown_msi(ppt);
 
 	if (pincpu >= 0)
 		vm_set_pinning(vm, vcpu, pincpu);
 }
 
 int
 ppt_setup_msi(struct vm *vm, int vcpu, int bus, int slot, int func,
 	      int destcpu, int vector, int numvec)
 {
 	int i, rid, flags;
 	int msi_count, startrid, error, tmp;
 	struct pptdev *ppt;
 
 	if ((destcpu >= VM_MAXCPU || destcpu < 0) ||
 	    (vector < 0 || vector > 255) ||
 	    (numvec < 0 || numvec > MAX_MSIMSGS))
 		return (EINVAL);
 
 	ppt = ppt_find(bus, slot, func);
 	if (ppt == NULL)
 		return (ENOENT);
 	if (ppt->vm != vm)		/* Make sure we own this device */
 		return (EBUSY);
 
 	/* Free any allocated resources */
 	PPT_TEARDOWN_MSI(vm, vcpu, ppt);
 
 	if (numvec == 0)		/* nothing more to do */
 		return (0);
 
 	flags = RF_ACTIVE;
 	msi_count = pci_msi_count(ppt->dev);
 	if (msi_count == 0) {
 		startrid = 0;		/* legacy interrupt */
 		msi_count = 1;
 		flags |= RF_SHAREABLE;
 	} else
 		startrid = 1;		/* MSI */
 
 	/*
 	 * The device must be capable of supporting the number of vectors
 	 * the guest wants to allocate.
 	 */
 	if (numvec > msi_count)
 		return (EINVAL);
 
 	/*
 	 * Make sure that we can allocate all the MSI vectors that are needed
 	 * by the guest.
 	 */
 	if (startrid == 1) {
 		tmp = numvec;
 		error = pci_alloc_msi(ppt->dev, &tmp);
 		if (error)
 			return (error);
 		else if (tmp != numvec) {
 			pci_release_msi(ppt->dev);
 			return (ENOSPC);
 		} else {
 			/* success */
 		}
 	}
 	
 	ppt->msi.vector = vector;
 	ppt->msi.vcpu = destcpu;
 	ppt->msi.startrid = startrid;
 
 	/*
 	 * Allocate the irq resource and attach it to the interrupt handler.
 	 */
 	for (i = 0; i < numvec; i++) {
 		ppt->msi.num_msgs = i + 1;
 		ppt->msi.cookie[i] = NULL;
 
 		rid = startrid + i;
 		ppt->msi.res[i] = bus_alloc_resource_any(ppt->dev, SYS_RES_IRQ,
 							 &rid, flags);
 		if (ppt->msi.res[i] == NULL)
 			break;
 
 		ppt->msi.arg[i].pptdev = ppt;
 		ppt->msi.arg[i].vec = vector + i;
 
 		error = bus_setup_intr(ppt->dev, ppt->msi.res[i],
 				       INTR_TYPE_NET | INTR_MPSAFE,
 				       pptintr, NULL, &ppt->msi.arg[i],
 				       &ppt->msi.cookie[i]);
 		if (error != 0)
 			break;
 	}
 	
 	if (i < numvec) {
 		PPT_TEARDOWN_MSI(vm, vcpu, ppt);
 		return (ENXIO);
 	}
 
 	return (0);
 }
 
 int
 ppt_setup_msix(struct vm *vm, int vcpu, int bus, int slot, int func,
 	       int idx, uint32_t msg, uint32_t vector_control, uint64_t addr)
 {
 	struct pptdev *ppt;
 	struct pci_devinfo *dinfo;
 	int numvec, vector_count, rid, error;
 	size_t res_size, cookie_size, arg_size;
 
 	ppt = ppt_find(bus, slot, func);
 	if (ppt == NULL)
 		return (ENOENT);
 	if (ppt->vm != vm)		/* Make sure we own this device */
 		return (EBUSY);
 
 	dinfo = device_get_ivars(ppt->dev);
 	if (!dinfo) 
 		return (ENXIO);
 
 	/* 
 	 * First-time configuration:
 	 * 	Allocate the MSI-X table
 	 *	Allocate the IRQ resources
 	 *	Set up some variables in ppt->msix
 	 */
 	if (!ppt->msix.msix_table_res) {
 		ppt->msix.res = NULL;
 		ppt->msix.cookie = NULL;
 		ppt->msix.arg = NULL;
 
 		rid = dinfo->cfg.msix.msix_table_bar;
 		ppt->msix.msix_table_res = bus_alloc_resource_any(ppt->dev, SYS_RES_MEMORY,
 								  &rid, RF_ACTIVE);
 		if (ppt->msix.msix_table_res == NULL) 
 			return (ENOSPC);
 
 		ppt->msix.msix_table_rid = rid;
 
 		vector_count = numvec = pci_msix_count(ppt->dev);
 
 		error = pci_alloc_msix(ppt->dev, &numvec);
 		if (error) 
 			return (error);
 		else if (vector_count != numvec) {
 			pci_release_msi(ppt->dev);
 			return (ENOSPC);
 		} 
 
 		ppt->msix.num_msgs = numvec;
 
 		ppt->msix.startrid = 1;
 
 		res_size = numvec * sizeof(ppt->msix.res[0]);
 		cookie_size = numvec * sizeof(ppt->msix.cookie[0]);
 		arg_size = numvec * sizeof(ppt->msix.arg[0]);
 
 		ppt->msix.res = malloc(res_size, M_PPTMSIX, M_WAITOK);
 		ppt->msix.cookie = malloc(cookie_size, M_PPTMSIX, M_WAITOK);
 		ppt->msix.arg = malloc(arg_size, M_PPTMSIX, M_WAITOK);
 		if (ppt->msix.res == NULL || ppt->msix.cookie == NULL || 
 		    ppt->msix.arg == NULL) {
 			ppt_teardown_msix(ppt);
 			return (ENOSPC);
 		}
 		bzero(ppt->msix.res, res_size);
 		bzero(ppt->msix.cookie, cookie_size);
 		bzero(ppt->msix.arg, arg_size);
 	}
 
 	if ((vector_control & PCIM_MSIX_VCTRL_MASK) == 0) {
 		/* Tear down the IRQ if it's already set up */
 		ppt_teardown_msix_intr(ppt, idx);
 
 		/* Allocate the IRQ resource */
 		ppt->msix.cookie[idx] = NULL;
 		rid = ppt->msix.startrid + idx;
 		ppt->msix.res[idx] = bus_alloc_resource_any(ppt->dev, SYS_RES_IRQ,
 							    &rid, RF_ACTIVE);
 		if (ppt->msix.res[idx] == NULL)
 			return (ENXIO);
 	
 		ppt->msix.arg[idx].pptdev = ppt;
 		ppt->msix.arg[idx].vec = msg;
 		ppt->msix.arg[idx].vcpu = (addr >> 12) & 0xFF;
 	
 		/* Setup the MSI-X interrupt */
 		error = bus_setup_intr(ppt->dev, ppt->msix.res[idx],
 				       INTR_TYPE_NET | INTR_MPSAFE,
 				       pptintr, NULL, &ppt->msix.arg[idx],
 				       &ppt->msix.cookie[idx]);
 	
 		if (error != 0) {
 			bus_teardown_intr(ppt->dev, ppt->msix.res[idx], ppt->msix.cookie[idx]);
 			bus_release_resource(ppt->dev, SYS_RES_IRQ, rid, ppt->msix.res[idx]);
 			ppt->msix.cookie[idx] = NULL;
 			ppt->msix.res[idx] = NULL;
 			return (ENXIO);
 		}
 	} else {
 		/* Masked, tear it down if it's already been set up */
 		ppt_teardown_msix_intr(ppt, idx);
 	}
 
 	return (0);
 }
 
Index: projects/bhyve/sys/amd64/vmm/vmm.c
===================================================================
--- projects/bhyve/sys/amd64/vmm/vmm.c	(revision 241177)
+++ projects/bhyve/sys/amd64/vmm/vmm.c	(revision 241178)
@@ -1,818 +1,853 @@
 /*-
  * Copyright (c) 2011 NetApp, Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/module.h>
 #include <sys/sysctl.h>
 #include <sys/malloc.h>
 #include <sys/pcpu.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/sched.h>
 #include <sys/smp.h>
 #include <sys/systm.h>
 
 #include <vm/vm.h>
 
 #include <machine/vm.h>
 #include <machine/pcb.h>
 #include <x86/apicreg.h>
 
 #include <machine/vmm.h>
 #include "vmm_mem.h"
 #include "vmm_util.h"
 #include <machine/vmm_dev.h>
 #include "vlapic.h"
 #include "vmm_msr.h"
 #include "vmm_ipi.h"
 #include "vmm_stat.h"
 
 #include "io/ppt.h"
 #include "io/iommu.h"
 
 struct vlapic;
 
 struct vcpu {
 	int		flags;
 	int		pincpu;		/* host cpuid this vcpu is bound to */
 	int		hostcpu;	/* host cpuid this vcpu last ran on */
 	uint64_t	guest_msrs[VMM_MSR_NUM];
 	struct vlapic	*vlapic;
 	int		 vcpuid;
 	struct savefpu	*guestfpu;	/* guest fpu state */
 	void		*stats;
 	struct vm_exit	exitinfo;
 	enum x2apic_state x2apic_state;
 };
 #define	VCPU_F_PINNED	0x0001
 #define	VCPU_F_RUNNING	0x0002
 
 #define	VCPU_PINCPU(vm, vcpuid)	\
     ((vm->vcpu[vcpuid].flags & VCPU_F_PINNED) ? vm->vcpu[vcpuid].pincpu : -1)
 
 #define	VCPU_UNPIN(vm, vcpuid)	(vm->vcpu[vcpuid].flags &= ~VCPU_F_PINNED)
 
 #define	VCPU_PIN(vm, vcpuid, host_cpuid)				\
 do {									\
 	vm->vcpu[vcpuid].flags |= VCPU_F_PINNED;			\
 	vm->vcpu[vcpuid].pincpu = host_cpuid;				\
 } while(0)
 
 #define	VM_MAX_MEMORY_SEGMENTS	2
 
 struct vm {
 	void		*cookie;	/* processor-specific data */
 	void		*iommu;		/* iommu-specific data */
 	struct vcpu	vcpu[VM_MAXCPU];
 	int		num_mem_segs;
 	struct vm_memory_segment mem_segs[VM_MAX_MEMORY_SEGMENTS];
 	char		name[VM_MAX_NAMELEN];
 
 	/*
 	 * Set of active vcpus.
 	 * An active vcpu is one that has been started implicitly (BSP) or
 	 * explicitly (AP) by sending it a startup ipi.
 	 */
 	cpuset_t	active_cpus;
 };
 
 static struct vmm_ops *ops;
 #define	VMM_INIT()	(ops != NULL ? (*ops->init)() : 0)
 #define	VMM_CLEANUP()	(ops != NULL ? (*ops->cleanup)() : 0)
 
 #define	VMINIT(vm)	(ops != NULL ? (*ops->vminit)(vm): NULL)
 #define	VMRUN(vmi, vcpu, rip) \
 	(ops != NULL ? (*ops->vmrun)(vmi, vcpu, rip) : ENXIO)
 #define	VMCLEANUP(vmi)	(ops != NULL ? (*ops->vmcleanup)(vmi) : NULL)
 #define	VMMMAP_SET(vmi, gpa, hpa, len, attr, prot, spm)			\
     	(ops != NULL ? 							\
     	(*ops->vmmmap_set)(vmi, gpa, hpa, len, attr, prot, spm) :	\
 	ENXIO)
 #define	VMMMAP_GET(vmi, gpa) \
 	(ops != NULL ? (*ops->vmmmap_get)(vmi, gpa) : ENXIO)
 #define	VMGETREG(vmi, vcpu, num, retval)		\
 	(ops != NULL ? (*ops->vmgetreg)(vmi, vcpu, num, retval) : ENXIO)
 #define	VMSETREG(vmi, vcpu, num, val)		\
 	(ops != NULL ? (*ops->vmsetreg)(vmi, vcpu, num, val) : ENXIO)
 #define	VMGETDESC(vmi, vcpu, num, desc)		\
 	(ops != NULL ? (*ops->vmgetdesc)(vmi, vcpu, num, desc) : ENXIO)
 #define	VMSETDESC(vmi, vcpu, num, desc)		\
 	(ops != NULL ? (*ops->vmsetdesc)(vmi, vcpu, num, desc) : ENXIO)
 #define	VMINJECT(vmi, vcpu, type, vec, ec, ecv)	\
 	(ops != NULL ? (*ops->vminject)(vmi, vcpu, type, vec, ec, ecv) : ENXIO)
 #define	VMNMI(vmi, vcpu)	\
 	(ops != NULL ? (*ops->vmnmi)(vmi, vcpu) : ENXIO)
 #define	VMGETCAP(vmi, vcpu, num, retval)	\
 	(ops != NULL ? (*ops->vmgetcap)(vmi, vcpu, num, retval) : ENXIO)
 #define	VMSETCAP(vmi, vcpu, num, val)		\
 	(ops != NULL ? (*ops->vmsetcap)(vmi, vcpu, num, val) : ENXIO)
 
 #define	fpu_start_emulating()	start_emulating()
 #define	fpu_stop_emulating()	stop_emulating()
 
 static MALLOC_DEFINE(M_VM, "vm", "vm");
 CTASSERT(VMM_MSR_NUM <= 64);	/* msr_mask can keep track of up to 64 msrs */
 
 /* statistics */
 static VMM_STAT_DEFINE(VCPU_TOTAL_RUNTIME, "vcpu total runtime");
 
 static void
 vcpu_cleanup(struct vcpu *vcpu)
 {
 	vlapic_cleanup(vcpu->vlapic);
 	vmm_stat_free(vcpu->stats);	
 	fpu_save_area_free(vcpu->guestfpu);
 }
 
 static void
 vcpu_init(struct vm *vm, uint32_t vcpu_id)
 {
 	struct vcpu *vcpu;
 	
 	vcpu = &vm->vcpu[vcpu_id];
 
 	vcpu->hostcpu = -1;
 	vcpu->vcpuid = vcpu_id;
 	vcpu->vlapic = vlapic_init(vm, vcpu_id);
 	vm_set_x2apic_state(vm, vcpu_id, X2APIC_ENABLED);
 	vcpu->guestfpu = fpu_save_area_alloc();
 	fpu_save_area_reset(vcpu->guestfpu);
 	vcpu->stats = vmm_stat_alloc();
 }
 
 struct vm_exit *
 vm_exitinfo(struct vm *vm, int cpuid)
 {
 	struct vcpu *vcpu;
 
 	if (cpuid < 0 || cpuid >= VM_MAXCPU)
 		panic("vm_exitinfo: invalid cpuid %d", cpuid);
 
 	vcpu = &vm->vcpu[cpuid];
 
 	return (&vcpu->exitinfo);
 }
 
 static int
 vmm_init(void)
 {
 	int error;
 
 	vmm_ipi_init();
 
 	error = vmm_mem_init();
 	if (error)
 		return (error);
 	
 	if (vmm_is_intel())
 		ops = &vmm_ops_intel;
 	else if (vmm_is_amd())
 		ops = &vmm_ops_amd;
 	else
 		return (ENXIO);
 
 	vmm_msr_init();
 
 	return (VMM_INIT());
 }
 
 static int
 vmm_handler(module_t mod, int what, void *arg)
 {
 	int error;
 
 	switch (what) {
 	case MOD_LOAD:
 		vmmdev_init();
 		iommu_init();
 		error = vmm_init();
 		break;
 	case MOD_UNLOAD:
 		vmmdev_cleanup();
 		iommu_cleanup();
 		vmm_ipi_cleanup();
 		error = VMM_CLEANUP();
 		break;
 	default:
 		error = 0;
 		break;
 	}
 	return (error);
 }
 
 static moduledata_t vmm_kmod = {
 	"vmm",
 	vmm_handler,
 	NULL
 };
 
 /*
  * Execute the module load handler after the pci passthru driver has had
  * a chance to claim devices. We need this information at the time we do
  * iommu initialization.
  */
 DECLARE_MODULE(vmm, vmm_kmod, SI_SUB_CONFIGURE + 1, SI_ORDER_ANY);
 MODULE_VERSION(vmm, 1);
 
 SYSCTL_NODE(_hw, OID_AUTO, vmm, CTLFLAG_RW, NULL, NULL);
 
 struct vm *
 vm_create(const char *name)
 {
 	int i;
 	struct vm *vm;
 	vm_paddr_t maxaddr;
 
 	const int BSP = 0;
 
 	if (name == NULL || strlen(name) >= VM_MAX_NAMELEN)
 		return (NULL);
 
 	vm = malloc(sizeof(struct vm), M_VM, M_WAITOK | M_ZERO);
 	strcpy(vm->name, name);
 	vm->cookie = VMINIT(vm);
 
 	for (i = 0; i < VM_MAXCPU; i++) {
 		vcpu_init(vm, i);
 		guest_msrs_init(vm, i);
 	}
 
 	maxaddr = vmm_mem_maxaddr();
 	vm->iommu = iommu_create_domain(maxaddr);
 	vm_activate_cpu(vm, BSP);
 
 	return (vm);
 }
 
+static void
+vm_free_mem_seg(struct vm *vm, struct vm_memory_segment *seg)
+{
+	size_t len;
+	vm_paddr_t hpa;
+
+	len = 0;
+	while (len < seg->len) {
+		hpa = vm_gpa2hpa(vm, seg->gpa + len, PAGE_SIZE);
+		if (hpa == (vm_paddr_t)-1) {
+			panic("vm_free_mem_segs: cannot free hpa "
+			      "associated with gpa 0x%016lx", seg->gpa + len);
+		}
+
+		vmm_mem_free(hpa, PAGE_SIZE);
+
+		len += PAGE_SIZE;
+	}
+
+	bzero(seg, sizeof(struct vm_memory_segment));
+}
+
 void
 vm_destroy(struct vm *vm)
 {
 	int i;
 
 	ppt_unassign_all(vm);
 
 	for (i = 0; i < vm->num_mem_segs; i++)
-		vmm_mem_free(vm->mem_segs[i].hpa, vm->mem_segs[i].len);
+		vm_free_mem_seg(vm, &vm->mem_segs[i]);
 
+	vm->num_mem_segs = 0;
+
 	for (i = 0; i < VM_MAXCPU; i++)
 		vcpu_cleanup(&vm->vcpu[i]);
 
 	iommu_destroy_domain(vm->iommu);
 
 	VMCLEANUP(vm->cookie);
 
 	free(vm, M_VM);
 }
 
 const char *
 vm_name(struct vm *vm)
 {
 	return (vm->name);
 }
 
 int
 vm_map_mmio(struct vm *vm, vm_paddr_t gpa, size_t len, vm_paddr_t hpa)
 {
 	const boolean_t spok = TRUE;	/* superpage mappings are ok */
 
 	return (VMMMAP_SET(vm->cookie, gpa, hpa, len, VM_MEMATTR_UNCACHEABLE,
 			   VM_PROT_RW, spok));
 }
 
 int
 vm_unmap_mmio(struct vm *vm, vm_paddr_t gpa, size_t len)
 {
 	const boolean_t spok = TRUE;	/* superpage mappings are ok */
 
 	return (VMMMAP_SET(vm->cookie, gpa, 0, len, 0,
 			   VM_PROT_NONE, spok));
 }
 
 /*
  * Returns TRUE if 'gpa' is available for allocation and FALSE otherwise
  */
 static boolean_t
 vm_gpa_available(struct vm *vm, vm_paddr_t gpa)
 {
 	int i;
 	vm_paddr_t gpabase, gpalimit;
 
 	if (gpa & PAGE_MASK)
 		panic("vm_gpa_available: gpa (0x%016lx) not page aligned", gpa);
 
 	for (i = 0; i < vm->num_mem_segs; i++) {
 		gpabase = vm->mem_segs[i].gpa;
 		gpalimit = gpabase + vm->mem_segs[i].len;
 		if (gpa >= gpabase && gpa < gpalimit)
 			return (FALSE);
 	}
 
 	return (TRUE);
 }
 
 int
 vm_malloc(struct vm *vm, vm_paddr_t gpa, size_t len)
 {
 	int error, available, allocated;
+	struct vm_memory_segment *seg;
 	vm_paddr_t g, hpa;
 
 	const boolean_t spok = TRUE;	/* superpage mappings are ok */
 
 	if ((gpa & PAGE_MASK) || (len & PAGE_MASK) || len == 0)
 		return (EINVAL);
 	
 	available = allocated = 0;
 	g = gpa;
 	while (g < gpa + len) {
 		if (vm_gpa_available(vm, g))
 			available++;
 		else
 			allocated++;
 
 		g += PAGE_SIZE;
 	}
 
 	/*
 	 * If there are some allocated and some available pages in the address
 	 * range then it is an error.
 	 */
 	if (allocated && available)
 		return (EINVAL);
 
 	/*
 	 * If the entire address range being requested has already been
 	 * allocated then there isn't anything more to do.
 	 */
 	if (allocated && available == 0)
 		return (0);
 
 	if (vm->num_mem_segs >= VM_MAX_MEMORY_SEGMENTS)
 		return (E2BIG);
 
-	hpa = vmm_mem_alloc(len);
-	if (hpa == 0)
-		return (ENOMEM);
+	seg = &vm->mem_segs[vm->num_mem_segs];
 
-	error = VMMMAP_SET(vm->cookie, gpa, hpa, len, VM_MEMATTR_WRITE_BACK,
-			   VM_PROT_ALL, spok);
-	if (error) {
-		vmm_mem_free(hpa, len);
+	seg->gpa = gpa;
+	seg->len = 0;
+	while (seg->len < len) {
+		hpa = vmm_mem_alloc(PAGE_SIZE);
+		if (hpa == 0) {
+			error = ENOMEM;
+			break;
+		}
+
+		error = VMMMAP_SET(vm->cookie, gpa + seg->len, hpa, PAGE_SIZE,
+				   VM_MEMATTR_WRITE_BACK, VM_PROT_ALL, spok);
+		if (error)
+			break;
+
+		iommu_create_mapping(vm->iommu, gpa + seg->len, hpa, PAGE_SIZE);
+
+		seg->len += PAGE_SIZE;
+	}
+
+	if (seg->len != len) {
+		vm_free_mem_seg(vm, seg);
 		return (error);
 	}
 
-	iommu_create_mapping(vm->iommu, gpa, hpa, len);
-
-	vm->mem_segs[vm->num_mem_segs].gpa = gpa;
-	vm->mem_segs[vm->num_mem_segs].hpa = hpa;
-	vm->mem_segs[vm->num_mem_segs].len = len;
 	vm->num_mem_segs++;
 
 	return (0);
 }
 
 vm_paddr_t
 vm_gpa2hpa(struct vm *vm, vm_paddr_t gpa, size_t len)
 {
 	vm_paddr_t nextpage;
 
 	nextpage = rounddown(gpa + PAGE_SIZE, PAGE_SIZE);
 	if (len > nextpage - gpa)
 		panic("vm_gpa2hpa: invalid gpa/len: 0x%016lx/%lu", gpa, len);
 
 	return (VMMMAP_GET(vm->cookie, gpa));
 }
 
 int
 vm_gpabase2memseg(struct vm *vm, vm_paddr_t gpabase,
 		  struct vm_memory_segment *seg)
 {
 	int i;
 
 	for (i = 0; i < vm->num_mem_segs; i++) {
 		if (gpabase == vm->mem_segs[i].gpa) {
 			*seg = vm->mem_segs[i];
 			return (0);
 		}
 	}
 	return (-1);
 }
 
 int
 vm_get_register(struct vm *vm, int vcpu, int reg, uint64_t *retval)
 {
 
 	if (vcpu < 0 || vcpu >= VM_MAXCPU)
 		return (EINVAL);
 
 	if (reg >= VM_REG_LAST)
 		return (EINVAL);
 
 	return (VMGETREG(vm->cookie, vcpu, reg, retval));
 }
 
 int
 vm_set_register(struct vm *vm, int vcpu, int reg, uint64_t val)
 {
 
 	if (vcpu < 0 || vcpu >= VM_MAXCPU)
 		return (EINVAL);
 
 	if (reg >= VM_REG_LAST)
 		return (EINVAL);
 
 	return (VMSETREG(vm->cookie, vcpu, reg, val));
 }
 
 static boolean_t
 is_descriptor_table(int reg)
 {
 
 	switch (reg) {
 	case VM_REG_GUEST_IDTR:
 	case VM_REG_GUEST_GDTR:
 		return (TRUE);
 	default:
 		return (FALSE);
 	}
 }
 
 static boolean_t
 is_segment_register(int reg)
 {
 	
 	switch (reg) {
 	case VM_REG_GUEST_ES:
 	case VM_REG_GUEST_CS:
 	case VM_REG_GUEST_SS:
 	case VM_REG_GUEST_DS:
 	case VM_REG_GUEST_FS:
 	case VM_REG_GUEST_GS:
 	case VM_REG_GUEST_TR:
 	case VM_REG_GUEST_LDTR:
 		return (TRUE);
 	default:
 		return (FALSE);
 	}
 }
 
 int
 vm_get_seg_desc(struct vm *vm, int vcpu, int reg,
 		struct seg_desc *desc)
 {
 
 	if (vcpu < 0 || vcpu >= VM_MAXCPU)
 		return (EINVAL);
 
 	if (!is_segment_register(reg) && !is_descriptor_table(reg))
 		return (EINVAL);
 
 	return (VMGETDESC(vm->cookie, vcpu, reg, desc));
 }
 
 int
 vm_set_seg_desc(struct vm *vm, int vcpu, int reg,
 		struct seg_desc *desc)
 {
 	if (vcpu < 0 || vcpu >= VM_MAXCPU)
 		return (EINVAL);
 
 	if (!is_segment_register(reg) && !is_descriptor_table(reg))
 		return (EINVAL);
 
 	return (VMSETDESC(vm->cookie, vcpu, reg, desc));
 }
 
 int
 vm_get_pinning(struct vm *vm, int vcpuid, int *cpuid)
 {
 
 	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
 		return (EINVAL);
 
 	*cpuid = VCPU_PINCPU(vm, vcpuid);
 
 	return (0);
 }
 
 int
 vm_set_pinning(struct vm *vm, int vcpuid, int host_cpuid)
 {
 	struct thread *td;
 
 	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
 		return (EINVAL);
 
 	td = curthread;		/* XXXSMP only safe when muxing vcpus */
 
 	/* unpin */
 	if (host_cpuid < 0) {
 		VCPU_UNPIN(vm, vcpuid);
 		thread_lock(td);
 		sched_unbind(td);
 		thread_unlock(td);
 		return (0);
 	}
 
 	if (CPU_ABSENT(host_cpuid))
 		return (EINVAL);
 
 	/*
 	 * XXX we should check that 'host_cpuid' has not already been pinned
 	 * by another vm.
 	 */
 	thread_lock(td);
 	sched_bind(td, host_cpuid);
 	thread_unlock(td);
 	VCPU_PIN(vm, vcpuid, host_cpuid);
 
 	return (0);
 }
 
 static void
 restore_guest_fpustate(struct vcpu *vcpu)
 {
 
 	/* flush host state to the pcb */
 	fpuexit(curthread);
 	fpu_stop_emulating();
 	fpurestore(vcpu->guestfpu);
 }
 
 static void
 save_guest_fpustate(struct vcpu *vcpu)
 {
 
 	fpusave(vcpu->guestfpu);
 	fpu_start_emulating();
 }
 
 int
 vm_run(struct vm *vm, struct vm_run *vmrun)
 {
 	int error, vcpuid;
 	struct vcpu *vcpu;
 	struct pcb *pcb;
 	uint64_t tscval;
 
 	vcpuid = vmrun->cpuid;
 
 	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
 		return (EINVAL);
 
 	vcpu = &vm->vcpu[vcpuid];
 
 	critical_enter();
 
 	tscval = rdtsc();
 
 	pcb = PCPU_GET(curpcb);
 	set_pcb_flags(pcb, PCB_FULL_IRET);
 
 	vcpu->hostcpu = curcpu;
 
 	restore_guest_msrs(vm, vcpuid);	
 	restore_guest_fpustate(vcpu);
 	error = VMRUN(vm->cookie, vcpuid, vmrun->rip);
 	save_guest_fpustate(vcpu);
 	restore_host_msrs(vm, vcpuid);
 
 	vmm_stat_incr(vm, vcpuid, VCPU_TOTAL_RUNTIME, rdtsc() - tscval);
 
 	/* copy the exit information */
 	bcopy(&vcpu->exitinfo, &vmrun->vm_exit, sizeof(struct vm_exit));
 
 	critical_exit();
 
 	return (error);
 }
 
 int
 vm_inject_event(struct vm *vm, int vcpuid, int type,
 		int vector, uint32_t code, int code_valid)
 {
 	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
 		return (EINVAL);
 
 	if ((type > VM_EVENT_NONE && type < VM_EVENT_MAX) == 0)
 		return (EINVAL);
 
 	if (vector < 0 || vector > 255)
 		return (EINVAL);
 
 	return (VMINJECT(vm->cookie, vcpuid, type, vector, code, code_valid));
 }
 
 int
 vm_inject_nmi(struct vm *vm, int vcpu)
 {
 	int error;
 
 	if (vcpu < 0 || vcpu >= VM_MAXCPU)
 		return (EINVAL);
 
 	error = VMNMI(vm->cookie, vcpu);
 	vm_interrupt_hostcpu(vm, vcpu);
 	return (error);
 }
 
 int
 vm_get_capability(struct vm *vm, int vcpu, int type, int *retval)
 {
 	if (vcpu < 0 || vcpu >= VM_MAXCPU)
 		return (EINVAL);
 
 	if (type < 0 || type >= VM_CAP_MAX)
 		return (EINVAL);
 
 	return (VMGETCAP(vm->cookie, vcpu, type, retval));
 }
 
 int
 vm_set_capability(struct vm *vm, int vcpu, int type, int val)
 {
 	if (vcpu < 0 || vcpu >= VM_MAXCPU)
 		return (EINVAL);
 
 	if (type < 0 || type >= VM_CAP_MAX)
 		return (EINVAL);
 
 	return (VMSETCAP(vm->cookie, vcpu, type, val));
 }
 
 uint64_t *
 vm_guest_msrs(struct vm *vm, int cpu)
 {
 	return (vm->vcpu[cpu].guest_msrs);
 }
 
 struct vlapic *
 vm_lapic(struct vm *vm, int cpu)
 {
 	return (vm->vcpu[cpu].vlapic);
 }
 
 boolean_t
 vmm_is_pptdev(int bus, int slot, int func)
 {
 	int found, b, s, f, n;
 	char *val, *cp, *cp2;
 
 	/*
 	 * setenv pptdevs "1/2/3 4/5/6 7/8/9 10/11/12"
 	 */
 	found = 0;
 	cp = val = getenv("pptdevs");
 	while (cp != NULL && *cp != '\0') {
 		if ((cp2 = strchr(cp, ' ')) != NULL)
 			*cp2 = '\0';
 
 		n = sscanf(cp, "%d/%d/%d", &b, &s, &f);
 		if (n == 3 && bus == b && slot == s && func == f) {
 			found = 1;
 			break;
 		}
 		
 		if (cp2 != NULL)
 			*cp2++ = ' ';
 
 		cp = cp2;
 	}
 	freeenv(val);
 	return (found);
 }
 
 void *
 vm_iommu_domain(struct vm *vm)
 {
 
 	return (vm->iommu);
 }
 
 void
 vm_set_run_state(struct vm *vm, int vcpuid, int state)
 {
 	struct vcpu *vcpu;
 
 	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
 		panic("vm_set_run_state: invalid vcpuid %d", vcpuid);
 
 	vcpu = &vm->vcpu[vcpuid];
 
 	if (state == VCPU_RUNNING) {
 		if (vcpu->flags & VCPU_F_RUNNING) {
 			panic("vm_set_run_state: %s[%d] is already running",
 			      vm_name(vm), vcpuid);
 		}
 		vcpu->flags |= VCPU_F_RUNNING;
 	} else {
 		if ((vcpu->flags & VCPU_F_RUNNING) == 0) {
 			panic("vm_set_run_state: %s[%d] is already stopped",
 			      vm_name(vm), vcpuid);
 		}
 		vcpu->flags &= ~VCPU_F_RUNNING;
 	}
 }
 
 int
 vm_get_run_state(struct vm *vm, int vcpuid, int *cpuptr)
 {
 	int retval, hostcpu;
 	struct vcpu *vcpu;
 
 	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
 		panic("vm_get_run_state: invalid vcpuid %d", vcpuid);
 
 	vcpu = &vm->vcpu[vcpuid];
 	if (vcpu->flags & VCPU_F_RUNNING) {
 		retval = VCPU_RUNNING;
 		hostcpu = vcpu->hostcpu;
 	} else {
 		retval = VCPU_STOPPED;
 		hostcpu = -1;
 	}
 
 	if (cpuptr)
 		*cpuptr = hostcpu;
 
 	return (retval);
 }
 
 void
 vm_activate_cpu(struct vm *vm, int vcpuid)
 {
 
 	if (vcpuid >= 0 && vcpuid < VM_MAXCPU)
 		CPU_SET(vcpuid, &vm->active_cpus);
 }
 
 cpuset_t
 vm_active_cpus(struct vm *vm)
 {
 
 	return (vm->active_cpus);
 }
 
 void *
 vcpu_stats(struct vm *vm, int vcpuid)
 {
 
 	return (vm->vcpu[vcpuid].stats);
 }
 
 int
 vm_get_x2apic_state(struct vm *vm, int vcpuid, enum x2apic_state *state)
 {
 	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
 		return (EINVAL);
 
 	*state = vm->vcpu[vcpuid].x2apic_state;
 
 	return (0);
 }
 
 int
 vm_set_x2apic_state(struct vm *vm, int vcpuid, enum x2apic_state state)
 {
 	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
 		return (EINVAL);
 
 	if (state < 0 || state >= X2APIC_STATE_LAST)
 		return (EINVAL);
 
 	vm->vcpu[vcpuid].x2apic_state = state;
 
 	vlapic_set_x2apic_state(vm, vcpuid, state);
 
 	return (0);
 }
Index: projects/bhyve/sys/amd64/vmm/vmm_dev.c
===================================================================
--- projects/bhyve/sys/amd64/vmm/vmm_dev.c	(revision 241177)
+++ projects/bhyve/sys/amd64/vmm/vmm_dev.c	(revision 241178)
@@ -1,509 +1,509 @@
 /*-
  * Copyright (c) 2011 NetApp, Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/kernel.h>
 #include <sys/queue.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/malloc.h>
 #include <sys/conf.h>
 #include <sys/sysctl.h>
 #include <sys/libkern.h>
 #include <sys/ioccom.h>
 #include <sys/mman.h>
 #include <sys/uio.h>
 
 #include <vm/vm.h>
 #include <vm/pmap.h>
 
 #include <machine/pmap.h>
 #include <machine/vmparam.h>
 
 #include <machine/vmm.h>
 #include "vmm_lapic.h"
 #include "vmm_stat.h"
 #include "vmm_mem.h"
 #include "io/ppt.h"
 #include <machine/vmm_dev.h>
 
 struct vmmdev_softc {
 	struct vm	*vm;		/* vm instance cookie */
 	struct cdev	*cdev;
 	SLIST_ENTRY(vmmdev_softc) link;
 };
 static SLIST_HEAD(, vmmdev_softc) head;
 
 static struct mtx vmmdev_mtx;
 
 static MALLOC_DEFINE(M_VMMDEV, "vmmdev", "vmmdev");
 
 SYSCTL_DECL(_hw_vmm);
 
 static struct vmmdev_softc *
 vmmdev_lookup(const char *name)
 {
 	struct vmmdev_softc *sc;
 
 #ifdef notyet	/* XXX kernel is not compiled with invariants */
 	mtx_assert(&vmmdev_mtx, MA_OWNED);
 #endif
 
 	SLIST_FOREACH(sc, &head, link) {
 		if (strcmp(name, vm_name(sc->vm)) == 0)
 			break;
 	}
 
 	return (sc);
 }
 
 static struct vmmdev_softc *
 vmmdev_lookup2(struct cdev *cdev)
 {
 	struct vmmdev_softc *sc;
 
 #ifdef notyet	/* XXX kernel is not compiled with invariants */
 	mtx_assert(&vmmdev_mtx, MA_OWNED);
 #endif
 
 	SLIST_FOREACH(sc, &head, link) {
 		if (sc->cdev == cdev)
 			break;
 	}
 
 	return (sc);
 }
 
 static int
 vmmdev_rw(struct cdev *cdev, struct uio *uio, int flags)
 {
 	int error, off, c;
 	vm_paddr_t hpa, gpa;
 	struct vmmdev_softc *sc;
 
 	static char zerobuf[PAGE_SIZE];
 
 	error = 0;
 	mtx_lock(&vmmdev_mtx);
 	sc = vmmdev_lookup2(cdev);
 
 	while (uio->uio_resid > 0 && error == 0) {
 		gpa = uio->uio_offset;
 		off = gpa & PAGE_MASK;
 		c = min(uio->uio_resid, PAGE_SIZE - off);
 
 		/*
 		 * The VM has a hole in its physical memory map. If we want to
 		 * use 'dd' to inspect memory beyond the hole we need to
 		 * provide bogus data for memory that lies in the hole.
 		 *
 		 * Since this device does not support lseek(2), dd(1) will
 		 * read(2) blocks of data to simulate the lseek(2).
 		 */
 		hpa = vm_gpa2hpa(sc->vm, gpa, c);
 		if (hpa == (vm_paddr_t)-1) {
 			if (uio->uio_rw == UIO_READ)
 				error = uiomove(zerobuf, c, uio);
 			else
 				error = EFAULT;
 		} else
 			error = uiomove((void *)PHYS_TO_DMAP(hpa), c, uio);
 	}
 
 	mtx_unlock(&vmmdev_mtx);
 	return (error);
 }
 
 static int
 vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag,
 	     struct thread *td)
 {
 	int error, vcpu;
 	struct vmmdev_softc *sc;
 	struct vm_memory_segment *seg;
 	struct vm_register *vmreg;
 	struct vm_seg_desc* vmsegdesc;
 	struct vm_pin *vmpin;
 	struct vm_run *vmrun;
 	struct vm_event *vmevent;
 	struct vm_lapic_irq *vmirq;
 	struct vm_capability *vmcap;
 	struct vm_pptdev *pptdev;
 	struct vm_pptdev_mmio *pptmmio;
 	struct vm_pptdev_msi *pptmsi;
 	struct vm_pptdev_msix *pptmsix;
 	struct vm_nmi *vmnmi;
 	struct vm_stats *vmstats;
 	struct vm_stat_desc *statdesc;
 	struct vm_x2apic *x2apic;
 
 	mtx_lock(&vmmdev_mtx);
 	sc = vmmdev_lookup2(cdev);
 	if (sc == NULL) {
 		mtx_unlock(&vmmdev_mtx);
 		return (ENXIO);
 	}
 
 	/*
 	 * Some VMM ioctls can operate only on vcpus that are not running.
 	 */
 	switch (cmd) {
 	case VM_RUN:
 	case VM_SET_PINNING:
 	case VM_GET_REGISTER:
 	case VM_SET_REGISTER:
 	case VM_GET_SEGMENT_DESCRIPTOR:
 	case VM_SET_SEGMENT_DESCRIPTOR:
 	case VM_INJECT_EVENT:
 	case VM_GET_CAPABILITY:
 	case VM_SET_CAPABILITY:
 	case VM_PPTDEV_MSI:
 	case VM_SET_X2APIC_STATE:
 		/*
 		 * XXX fragile, handle with care
 		 * Assumes that the first field of the ioctl data is the vcpu.
 		 */
 		vcpu = *(int *)data;
 		if (vcpu < 0 || vcpu >= VM_MAXCPU) {
 			error = EINVAL;
 			goto done;
 		}
 
 		if (vcpu_is_running(sc->vm, vcpu, NULL)) {
 			error = EBUSY;
 			goto done;
 		}
 		break;
 	default:
 		break;
 	}
 
 	switch(cmd) {
 	case VM_RUN:
 		vmrun = (struct vm_run *)data;
 
 		vm_set_run_state(sc->vm, vmrun->cpuid, VCPU_RUNNING);
 		mtx_unlock(&vmmdev_mtx);
 
 		error = vm_run(sc->vm, vmrun);
 
 		mtx_lock(&vmmdev_mtx);
 		vm_set_run_state(sc->vm, vmrun->cpuid, VCPU_STOPPED);
 		break;
 	case VM_STAT_DESC: {
 		const char *desc;
 		statdesc = (struct vm_stat_desc *)data;
 		desc = vmm_stat_desc(statdesc->index);
 		if (desc != NULL) {
 			error = 0;
 			strlcpy(statdesc->desc, desc, sizeof(statdesc->desc));
 		} else
 			error = EINVAL;
 		break;
 	}
 	case VM_STATS: {
 		CTASSERT(MAX_VM_STATS >= MAX_VMM_STAT_TYPES);
 		vmstats = (struct vm_stats *)data;
 		getmicrotime(&vmstats->tv);
 		error = vmm_stat_copy(sc->vm, vmstats->cpuid,
 				      &vmstats->num_entries, vmstats->statbuf);
 		break;
 	}
 	case VM_PPTDEV_MSI:
 		pptmsi = (struct vm_pptdev_msi *)data;
 		error = ppt_setup_msi(sc->vm, pptmsi->vcpu,
 				      pptmsi->bus, pptmsi->slot, pptmsi->func,
 				      pptmsi->destcpu, pptmsi->vector,
 				      pptmsi->numvec);
 		break;
 	case VM_PPTDEV_MSIX:
 		pptmsix = (struct vm_pptdev_msix *)data;
 		error = ppt_setup_msix(sc->vm, pptmsix->vcpu,
 				       pptmsix->bus, pptmsix->slot, 
 				       pptmsix->func, pptmsix->idx,
 				       pptmsix->msg, pptmsix->vector_control,
 				       pptmsix->addr);
 		break;
 	case VM_MAP_PPTDEV_MMIO:
 		pptmmio = (struct vm_pptdev_mmio *)data;
 		error = ppt_map_mmio(sc->vm, pptmmio->bus, pptmmio->slot,
 				     pptmmio->func, pptmmio->gpa, pptmmio->len,
 				     pptmmio->hpa);
 		break;
 	case VM_BIND_PPTDEV:
 		pptdev = (struct vm_pptdev *)data;
 		error = ppt_assign_device(sc->vm, pptdev->bus, pptdev->slot,
 					  pptdev->func);
 		break;
 	case VM_UNBIND_PPTDEV:
 		pptdev = (struct vm_pptdev *)data;
 		error = ppt_unassign_device(sc->vm, pptdev->bus, pptdev->slot,
 					    pptdev->func);
 		break;
 	case VM_INJECT_EVENT:
 		vmevent = (struct vm_event *)data;
 		error = vm_inject_event(sc->vm, vmevent->cpuid, vmevent->type,
 					vmevent->vector,
 					vmevent->error_code,
 					vmevent->error_code_valid);
 		break;
 	case VM_INJECT_NMI:
 		vmnmi = (struct vm_nmi *)data;
 		error = vm_inject_nmi(sc->vm, vmnmi->cpuid);
 		break;
 	case VM_LAPIC_IRQ:
 		vmirq = (struct vm_lapic_irq *)data;
 		error = lapic_set_intr(sc->vm, vmirq->cpuid, vmirq->vector);
 		break;
 	case VM_SET_PINNING:
 		vmpin = (struct vm_pin *)data;
 		error = vm_set_pinning(sc->vm, vmpin->vm_cpuid,
 				       vmpin->host_cpuid);
 		break;
 	case VM_GET_PINNING:
 		vmpin = (struct vm_pin *)data;
 		error = vm_get_pinning(sc->vm, vmpin->vm_cpuid,
 				       &vmpin->host_cpuid);
 		break;
 	case VM_MAP_MEMORY:
 		seg = (struct vm_memory_segment *)data;
 		error = vm_malloc(sc->vm, seg->gpa, seg->len);
 		break;
 	case VM_GET_MEMORY_SEG:
 		seg = (struct vm_memory_segment *)data;
-		seg->hpa = seg->len = 0;
+		seg->len = 0;
 		(void)vm_gpabase2memseg(sc->vm, seg->gpa, seg);
 		error = 0;
 		break;
 	case VM_GET_REGISTER:
 		vmreg = (struct vm_register *)data;
 		error = vm_get_register(sc->vm, vmreg->cpuid, vmreg->regnum,
 					&vmreg->regval);
 		break;
 	case VM_SET_REGISTER:
 		vmreg = (struct vm_register *)data;
 		error = vm_set_register(sc->vm, vmreg->cpuid, vmreg->regnum,
 					vmreg->regval);
 		break;
 	case VM_SET_SEGMENT_DESCRIPTOR:
 		vmsegdesc = (struct vm_seg_desc *)data;
 		error = vm_set_seg_desc(sc->vm, vmsegdesc->cpuid,
 					vmsegdesc->regnum,
 					&vmsegdesc->desc);
 		break;
 	case VM_GET_SEGMENT_DESCRIPTOR:
 		vmsegdesc = (struct vm_seg_desc *)data;
 		error = vm_get_seg_desc(sc->vm, vmsegdesc->cpuid,
 					vmsegdesc->regnum,
 					&vmsegdesc->desc);
 		break;
 	case VM_GET_CAPABILITY:
 		vmcap = (struct vm_capability *)data;
 		error = vm_get_capability(sc->vm, vmcap->cpuid,
 					  vmcap->captype,
 					  &vmcap->capval);
 		break;
 	case VM_SET_CAPABILITY:
 		vmcap = (struct vm_capability *)data;
 		error = vm_set_capability(sc->vm, vmcap->cpuid,
 					  vmcap->captype,
 					  vmcap->capval);
 		break;
 	case VM_SET_X2APIC_STATE:
 		x2apic = (struct vm_x2apic *)data;
 		error = vm_set_x2apic_state(sc->vm,
 					    x2apic->cpuid, x2apic->state);
 		break;
 	case VM_GET_X2APIC_STATE:
 		x2apic = (struct vm_x2apic *)data;
 		error = vm_get_x2apic_state(sc->vm,
 					    x2apic->cpuid, &x2apic->state);
 		break;
 	default:
 		error = ENOTTY;
 		break;
 	}
 done:
 	mtx_unlock(&vmmdev_mtx);
 
 	return (error);
 }
 
 static int
 vmmdev_mmap(struct cdev *cdev, vm_ooffset_t offset, vm_paddr_t *paddr,
     int nprot, vm_memattr_t *memattr)
 {
 	int error;
 	struct vmmdev_softc *sc;
 
 	error = -1;
 	mtx_lock(&vmmdev_mtx);
 
 	sc = vmmdev_lookup2(cdev);
 	if (sc != NULL && (nprot & PROT_EXEC) == 0) {
 		*paddr = vm_gpa2hpa(sc->vm, (vm_paddr_t)offset, PAGE_SIZE);
 		if (*paddr != (vm_paddr_t)-1)
 			error = 0;
 	}
 
 	mtx_unlock(&vmmdev_mtx);
 
 	return (error);
 }
 
 static void
 vmmdev_destroy(struct vmmdev_softc *sc)
 {
 
 #ifdef notyet	/* XXX kernel is not compiled with invariants */
 	mtx_assert(&vmmdev_mtx, MA_OWNED);
 #endif
 
 	/*
 	 * XXX must stop virtual machine instances that may be still
 	 * running and cleanup their state.
 	 */
 	SLIST_REMOVE(&head, sc, vmmdev_softc, link);
 	destroy_dev(sc->cdev);
 	vm_destroy(sc->vm);
 	free(sc, M_VMMDEV);
 }
 
 static int
 sysctl_vmm_destroy(SYSCTL_HANDLER_ARGS)
 {
 	int error;
 	char buf[VM_MAX_NAMELEN];
 	struct vmmdev_softc *sc;
 
 	strlcpy(buf, "beavis", sizeof(buf));
 	error = sysctl_handle_string(oidp, buf, sizeof(buf), req);
 	if (error != 0 || req->newptr == NULL)
 		return (error);
 
 	mtx_lock(&vmmdev_mtx);
 	sc = vmmdev_lookup(buf);
 	if (sc == NULL) {
 		mtx_unlock(&vmmdev_mtx);
 		return (EINVAL);
 	}
 	vmmdev_destroy(sc);
 	mtx_unlock(&vmmdev_mtx);
 	return (0);
 }
 SYSCTL_PROC(_hw_vmm, OID_AUTO, destroy, CTLTYPE_STRING | CTLFLAG_RW,
 	    NULL, 0, sysctl_vmm_destroy, "A", NULL);
 
 static struct cdevsw vmmdevsw = {
 	.d_name		= "vmmdev",
 	.d_version	= D_VERSION,
 	.d_ioctl	= vmmdev_ioctl,
 	.d_mmap		= vmmdev_mmap,
 	.d_read		= vmmdev_rw,
 	.d_write	= vmmdev_rw,
 };
 
 static int
 sysctl_vmm_create(SYSCTL_HANDLER_ARGS)
 {
 	int error;
 	struct vm *vm;
 	struct vmmdev_softc *sc;
 	char buf[VM_MAX_NAMELEN];
 
 	strlcpy(buf, "beavis", sizeof(buf));
 	error = sysctl_handle_string(oidp, buf, sizeof(buf), req);
 	if (error != 0 || req->newptr == NULL)
 		return (error);
 
 	mtx_lock(&vmmdev_mtx);
 
 	sc = vmmdev_lookup(buf);
 	if (sc != NULL) {
 		mtx_unlock(&vmmdev_mtx);
 		return (EEXIST);
 	}
 
 	vm = vm_create(buf);
 	if (vm == NULL) {
 		mtx_unlock(&vmmdev_mtx);
 		return (EINVAL);
 	}
 
 	sc = malloc(sizeof(struct vmmdev_softc), M_VMMDEV, M_WAITOK | M_ZERO);
 	sc->vm = vm;
 	sc->cdev = make_dev(&vmmdevsw, 0, UID_ROOT, GID_WHEEL, 0600,
 			    "vmm/%s", buf);
 	sc->cdev->si_drv1 = sc;
 	SLIST_INSERT_HEAD(&head, sc, link);
 
 	mtx_unlock(&vmmdev_mtx);
 	return (0);
 }
 SYSCTL_PROC(_hw_vmm, OID_AUTO, create, CTLTYPE_STRING | CTLFLAG_RW,
 	    NULL, 0, sysctl_vmm_create, "A", NULL);
 
 static int
 sysctl_vmm_mem_total(SYSCTL_HANDLER_ARGS)
 {
 	size_t val = vmm_mem_get_mem_total();
 	return sysctl_handle_long(oidp, &val, 0, req);
 }
 SYSCTL_PROC(_hw_vmm, OID_AUTO, mem_total, CTLTYPE_LONG | CTLFLAG_RD,
 		0, 0, sysctl_vmm_mem_total, "LU", "Amount of Total memory");
 
 static int
 sysctl_vmm_mem_free(SYSCTL_HANDLER_ARGS)
 {
 	size_t val = vmm_mem_get_mem_free();
 	return sysctl_handle_long(oidp, &val, 0, req);
 }
 SYSCTL_PROC(_hw_vmm, OID_AUTO, mem_free, CTLTYPE_LONG | CTLFLAG_RD,
 		0, 0, sysctl_vmm_mem_free, "LU", "Amount of Free memory");
 
 void
 vmmdev_init(void)
 {
 	mtx_init(&vmmdev_mtx, "vmm device mutex", NULL, MTX_DEF);
 }
 
 void
 vmmdev_cleanup(void)
 {
 	struct vmmdev_softc *sc, *sc2;
 
 	mtx_lock(&vmmdev_mtx);
 
 	SLIST_FOREACH_SAFE(sc, &head, link, sc2)
 		vmmdev_destroy(sc);
 
 	mtx_unlock(&vmmdev_mtx);
 }
Index: projects/bhyve/sys/amd64/vmm/vmm_mem.c
===================================================================
--- projects/bhyve/sys/amd64/vmm/vmm_mem.c	(revision 241177)
+++ projects/bhyve/sys/amd64/vmm/vmm_mem.c	(revision 241178)
@@ -1,436 +1,436 @@
 /*-
  * Copyright (c) 2011 NetApp, Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/linker.h>
 #include <sys/systm.h>
 #include <sys/malloc.h>
 #include <sys/kernel.h>
 
 #include <vm/vm.h>
 #include <vm/pmap.h>
 
 #include <machine/md_var.h>
 #include <machine/metadata.h>
 #include <machine/pc/bios.h>
 #include <machine/vmparam.h>
 #include <machine/pmap.h>
 
 #include "vmm_util.h"
 #include "vmm_mem.h"
 
 static MALLOC_DEFINE(M_VMM_MEM, "vmm memory", "vmm memory");
 
 #define	MB		(1024 * 1024)
 #define	GB		(1024 * MB)
 
 #define	VMM_MEM_MAXSEGS	64
 
 /* protected by vmm_mem_mtx */
 static struct {
 	vm_paddr_t	base;
 	vm_size_t	length;
 } vmm_mem_avail[VMM_MEM_MAXSEGS];
 
 static int vmm_mem_nsegs;
 size_t vmm_mem_total_bytes;
 
 static vm_paddr_t maxaddr;
 
 static struct mtx vmm_mem_mtx;
 
 /*
  * Steal any memory that was deliberately hidden from FreeBSD either by
  * the use of MAXMEM kernel config option or the hw.physmem loader tunable.
  */
 static int
 vmm_mem_steal_memory(void)
 {
 	int nsegs;
 	caddr_t kmdp;
 	uint32_t smapsize;
 	uint64_t base, length;
 	struct bios_smap *smapbase, *smap, *smapend;
 
 	/*
 	 * Borrowed from hammer_time() and getmemsize() in machdep.c
 	 */
 	kmdp = preload_search_by_type("elf kernel");
 	if (kmdp == NULL)
 		kmdp = preload_search_by_type("elf64 kernel");
 
 	smapbase = (struct bios_smap *)preload_search_info(kmdp,
 		MODINFO_METADATA | MODINFOMD_SMAP);
 	if (smapbase == NULL)
 		panic("No BIOS smap info from loader!");
 
 	smapsize = *((uint32_t *)smapbase - 1);
 	smapend = (struct bios_smap *)((uintptr_t)smapbase + smapsize);
 
 	vmm_mem_total_bytes = 0;
 	nsegs = 0;
 	for (smap = smapbase; smap < smapend; smap++) {
 		/*
 		 * XXX
 		 * Assuming non-overlapping, monotonically increasing
 		 * memory segments.
 		 */
 		if (smap->type != SMAP_TYPE_MEMORY)
 			continue;
 		if (smap->length == 0)
 			break;
 
 		base = roundup(smap->base, NBPDR);
 		length = rounddown(smap->length, NBPDR);
 
 		/* Skip this segment if FreeBSD is using all of it. */
 		if (base + length <= ptoa(Maxmem))
 			continue;
 
 		/*
 		 * If FreeBSD is using part of this segment then adjust
 		 * 'base' and 'length' accordingly.
 		 */
 		if (base < ptoa(Maxmem)) {
 			uint64_t used;
 			used = roundup(ptoa(Maxmem), NBPDR) - base;
 			base += used;
 			length -= used;
 		}
 
 		if (length == 0)
 			continue;
 
 		vmm_mem_avail[nsegs].base = base;
 		vmm_mem_avail[nsegs].length = length;
 		vmm_mem_total_bytes += length;
 
 		if (base + length > maxaddr)
 			maxaddr = base + length;
 
 		if (0 && bootverbose) {
 			printf("vmm_mem_populate: index %d, base 0x%0lx, "
 			       "length %ld\n",
 			       nsegs, vmm_mem_avail[nsegs].base,
 			       vmm_mem_avail[nsegs].length);
 		}
 
 		nsegs++;
 		if (nsegs >= VMM_MEM_MAXSEGS) {
 			printf("vmm_mem_populate: maximum number of vmm memory "
 			       "segments reached!\n");
 			return (ENOSPC);
 		}
 	}
 
 	vmm_mem_nsegs = nsegs;
 
 	return (0);
 }
 
 static void
 vmm_mem_direct_map(vm_paddr_t start, vm_paddr_t end)
 {
 	vm_paddr_t addr, remaining;
 	int pdpi, pdi, superpage_size;
 	pml4_entry_t *pml4p;
 	pdp_entry_t *pdp;
 	pd_entry_t *pd;
 	uint64_t page_attr_bits;
 
 	if (end >= NBPML4)
 		panic("Cannot map memory beyond %ldGB", NBPML4 / GB);
 
 	if (vmm_supports_1G_pages())
 		superpage_size = NBPDP;
 	else
 		superpage_size = NBPDR;
 
 	/*
 	 * Get the page directory pointer page that contains the direct
 	 * map address mappings.
 	 */
 	pml4p = kernel_pmap->pm_pml4;
 	pdp = (pdp_entry_t *)PHYS_TO_DMAP(pml4p[DMPML4I] & ~PAGE_MASK);
 
 	page_attr_bits = PG_RW | PG_V | PG_PS | PG_G;
 	addr = start;
 	while (addr < end) {
 		remaining = end - addr;
 		pdpi = addr / NBPDP;
 		if (superpage_size == NBPDP &&
 		    remaining >= NBPDP &&
 		    addr % NBPDP == 0) {
 			/*
 			 * If there isn't a mapping for this address then
 			 * create one but if there is one already make sure
 			 * it matches what we expect it to be.
 			 */
 			if (pdp[pdpi] == 0) {
 				pdp[pdpi] = addr | page_attr_bits;
 				if (0 && bootverbose) {
 					printf("vmm_mem_populate: mapping "
 					       "0x%lx with 1GB page at "
 					       "pdpi %d\n", addr, pdpi);
 				}
 			} else {
 				pdp_entry_t pdpe = pdp[pdpi];
 				if ((pdpe & ~PAGE_MASK) != addr ||
 				    (pdpe & page_attr_bits) != page_attr_bits) {
 					panic("An invalid mapping 0x%016lx "
 					      "already exists for 0x%016lx\n",
 					      pdpe, addr);
 				}
 			}
 			addr += NBPDP;
 		} else {
 			if (remaining < NBPDR) {
 				panic("vmm_mem_populate: remaining (%ld) must "
 				      "be greater than NBPDR (%d)\n",
 				      remaining, NBPDR);
 			}
 			if (pdp[pdpi] == 0) {
 				/*
 				 * XXX we lose this memory forever because
 				 * we do not keep track of the virtual address
 				 * that would be required to free this page.
 				 */
 				pd = malloc(PAGE_SIZE, M_VMM_MEM,
 					    M_WAITOK | M_ZERO);
 				if ((uintptr_t)pd & PAGE_MASK) {
 					panic("vmm_mem_populate: page directory"
 					      "page not aligned on %d "
 					      "boundary\n", PAGE_SIZE);
 				}
 				pdp[pdpi] = vtophys(pd);
 				pdp[pdpi] |= PG_RW | PG_V | PG_U;
 				if (0 && bootverbose) {
 					printf("Creating page directory "
 					       "at pdp index %d for 0x%016lx\n",
 					       pdpi, addr);
 				}
 			}
 			pdi = (addr % NBPDP) / NBPDR;
 			pd = (pd_entry_t *)PHYS_TO_DMAP(pdp[pdpi] & ~PAGE_MASK);
 
 			/*
 			 * Create a new mapping if one doesn't already exist
 			 * or validate it if it does.
 			 */
 			if (pd[pdi] == 0) {
 				pd[pdi] = addr | page_attr_bits;
 				if (0 && bootverbose) {
 					printf("vmm_mem_populate: mapping "
 					       "0x%lx with 2MB page at "
 					       "pdpi %d, pdi %d\n",
 					       addr, pdpi, pdi);
 				}
 			} else {
 				pd_entry_t pde = pd[pdi];
 				if ((pde & ~PAGE_MASK) != addr ||
 				    (pde & page_attr_bits) != page_attr_bits) {
 					panic("An invalid mapping 0x%016lx "
 					      "already exists for 0x%016lx\n",
 					      pde, addr);
 				}
 			}
 			addr += NBPDR;
 		}
 	}
 }
 
 static int
 vmm_mem_populate(void)
 {
 	int seg, error;
 	vm_paddr_t start, end;
 
 	/* populate the vmm_mem_avail[] array */
 	error = vmm_mem_steal_memory();
 	if (error)
 		return (error);
 	
 	/*
 	 * Now map the memory that was hidden from FreeBSD in
 	 * the direct map VA space.
 	 */
 	for (seg = 0; seg < vmm_mem_nsegs; seg++) {
 		start = vmm_mem_avail[seg].base;
 		end = start + vmm_mem_avail[seg].length;
 		if ((start & PDRMASK) != 0 || (end & PDRMASK) != 0) {
 			panic("start (0x%016lx) and end (0x%016lx) must be "
 			      "aligned on a %dMB boundary\n",
 			      start, end, NBPDR / MB);
 		}
 		vmm_mem_direct_map(start, end);
 	}
 
 	return (0);
 }
 
 int
 vmm_mem_init(void)
 {
 	int error;
 
 	mtx_init(&vmm_mem_mtx, "vmm_mem_mtx", NULL, MTX_DEF);
 
 	error = vmm_mem_populate();
 	if (error)
 		return (error);
 
 	return (0);
 }
 
 vm_paddr_t
 vmm_mem_alloc(size_t size)
 {
 	int i;
 	vm_paddr_t addr;
 
-	if ((size & PDRMASK) != 0) {
+	if ((size & PAGE_MASK) != 0) {
 		panic("vmm_mem_alloc: size 0x%0lx must be "
-		      "aligned on a 0x%0x boundary\n", size, NBPDR);
+		      "aligned on a 0x%0x boundary\n", size, PAGE_SIZE);
 	}
 
 	addr = 0;
 
 	mtx_lock(&vmm_mem_mtx);
 	for (i = 0; i < vmm_mem_nsegs; i++) {
 		if (vmm_mem_avail[i].length >= size) {
 			addr = vmm_mem_avail[i].base;
 			vmm_mem_avail[i].base += size;
 			vmm_mem_avail[i].length -= size;
 			/* remove a zero length segment */
 			if (vmm_mem_avail[i].length == 0) {
 				memmove(&vmm_mem_avail[i],
 					&vmm_mem_avail[i + 1],
 					(vmm_mem_nsegs - (i + 1)) *
 					 sizeof(vmm_mem_avail[0]));
 				vmm_mem_nsegs--;
 			}
 			break;
 		}
 	}
 	mtx_unlock(&vmm_mem_mtx);
 
 	return (addr);
 }
 
 size_t
 vmm_mem_get_mem_total(void)
 {
 	return vmm_mem_total_bytes;
 }
 
 size_t
 vmm_mem_get_mem_free(void)
 {
 	size_t length = 0;
 	int i;
 
 	mtx_lock(&vmm_mem_mtx);
 	for (i = 0; i < vmm_mem_nsegs; i++) {
 		length += vmm_mem_avail[i].length;
 	}
 	mtx_unlock(&vmm_mem_mtx);
 
 	return(length);
 }
 
 void
 vmm_mem_free(vm_paddr_t base, size_t length)
 {
 	int i;
 
-	if ((base & PDRMASK) != 0 || (length & PDRMASK) != 0) {
+	if ((base & PAGE_MASK) != 0 || (length & PAGE_MASK) != 0) {
 		panic("vmm_mem_free: base 0x%0lx and length 0x%0lx must be "
-		      "aligned on a 0x%0x boundary\n", base, length, NBPDR);
+		      "aligned on a 0x%0x boundary\n", base, length, PAGE_SIZE);
 	}
 
 	mtx_lock(&vmm_mem_mtx);
 
 	for (i = 0; i < vmm_mem_nsegs; i++) {
 		if (vmm_mem_avail[i].base > base)
 			break;
 	}
 
 	if (vmm_mem_nsegs >= VMM_MEM_MAXSEGS)
 		panic("vmm_mem_free: cannot free any more segments");
 
 	/* Create a new segment at index 'i' */
 	memmove(&vmm_mem_avail[i + 1], &vmm_mem_avail[i],
 		(vmm_mem_nsegs - i) * sizeof(vmm_mem_avail[0]));
 
 	vmm_mem_avail[i].base = base;
 	vmm_mem_avail[i].length = length;
 
 	vmm_mem_nsegs++;
 
 coalesce_some_more:
 	for (i = 0; i < vmm_mem_nsegs - 1; i++) {
 		if (vmm_mem_avail[i].base + vmm_mem_avail[i].length ==
 		    vmm_mem_avail[i + 1].base) {
 			vmm_mem_avail[i].length += vmm_mem_avail[i + 1].length;
 			memmove(&vmm_mem_avail[i + 1], &vmm_mem_avail[i + 2],
 			  (vmm_mem_nsegs - (i + 2)) * sizeof(vmm_mem_avail[0]));
 			vmm_mem_nsegs--;
 			goto coalesce_some_more;
 		}
 	}
 
 	mtx_unlock(&vmm_mem_mtx);
 }
 
 vm_paddr_t
 vmm_mem_maxaddr(void)
 {
 
 	return (maxaddr);
 }
 
 void
 vmm_mem_dump(void)
 {
 	int i;
 	vm_paddr_t base;
 	vm_size_t length;
 
 	mtx_lock(&vmm_mem_mtx);
 	for (i = 0; i < vmm_mem_nsegs; i++) {
 		base = vmm_mem_avail[i].base;
 		length = vmm_mem_avail[i].length;
 		printf("%-4d0x%016lx    0x%016lx\n", i, base, base + length);
 	}
 	mtx_unlock(&vmm_mem_mtx);
 }