Index: head/lib/libvmmapi/vmmapi.c
===================================================================
--- head/lib/libvmmapi/vmmapi.c	(revision 332297)
+++ head/lib/libvmmapi/vmmapi.c	(revision 332298)
@@ -1,1551 +1,1583 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2011 NetApp, Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/sysctl.h>
 #include <sys/ioctl.h>
 #include <sys/mman.h>
 #include <sys/_iovec.h>
 #include <sys/cpuset.h>
 
 #include <x86/segments.h>
 #include <machine/specialreg.h>
 
 #include <errno.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <assert.h>
 #include <string.h>
 #include <fcntl.h>
 #include <unistd.h>
 
 #include <libutil.h>
 
 #include <machine/vmm.h>
 #include <machine/vmm_dev.h>
 
 #include "vmmapi.h"
 
 #define	MB	(1024 * 1024UL)
 #define	GB	(1024 * 1024 * 1024UL)
 
 /*
  * Size of the guard region before and after the virtual address space
  * mapping the guest physical memory. This must be a multiple of the
  * superpage size for performance reasons.
  */
 #define	VM_MMAP_GUARD_SIZE	(4 * MB)
 
 #define	PROT_RW		(PROT_READ | PROT_WRITE)
 #define	PROT_ALL	(PROT_READ | PROT_WRITE | PROT_EXEC)
 
 struct vmctx {
 	int	fd;
 	uint32_t lowmem_limit;
 	int	memflags;
 	size_t	lowmem;
 	size_t	highmem;
 	char	*baseaddr;
 	char	*name;
 };
 
 #define	CREATE(x)  sysctlbyname("hw.vmm.create", NULL, NULL, (x), strlen((x)))
 #define	DESTROY(x) sysctlbyname("hw.vmm.destroy", NULL, NULL, (x), strlen((x)))
 
 static int
 vm_device_open(const char *name)
 {
         int fd, len;
         char *vmfile;
 
 	len = strlen("/dev/vmm/") + strlen(name) + 1;
 	vmfile = malloc(len);
 	assert(vmfile != NULL);
 	snprintf(vmfile, len, "/dev/vmm/%s", name);
 
         /* Open the device file */
         fd = open(vmfile, O_RDWR, 0);
 
 	free(vmfile);
         return (fd);
 }
 
 int
 vm_create(const char *name)
 {
 
 	return (CREATE((char *)name));
 }
 
 struct vmctx *
 vm_open(const char *name)
 {
 	struct vmctx *vm;
 
 	vm = malloc(sizeof(struct vmctx) + strlen(name) + 1);
 	assert(vm != NULL);
 
 	vm->fd = -1;
 	vm->memflags = 0;
 	vm->lowmem_limit = 3 * GB;
 	vm->name = (char *)(vm + 1);
 	strcpy(vm->name, name);
 
 	if ((vm->fd = vm_device_open(vm->name)) < 0)
 		goto err;
 
 	return (vm);
 err:
 	vm_destroy(vm);
 	return (NULL);
 }
 
 void
 vm_destroy(struct vmctx *vm)
 {
 	assert(vm != NULL);
 
 	if (vm->fd >= 0)
 		close(vm->fd);
 	DESTROY(vm->name);
 
 	free(vm);
 }
 
 int
 vm_parse_memsize(const char *optarg, size_t *ret_memsize)
 {
 	char *endptr;
 	size_t optval;
 	int error;
 
 	optval = strtoul(optarg, &endptr, 0);
 	if (*optarg != '\0' && *endptr == '\0') {
 		/*
 		 * For the sake of backward compatibility if the memory size
 		 * specified on the command line is less than a megabyte then
 		 * it is interpreted as being in units of MB.
 		 */
 		if (optval < MB)
 			optval *= MB;
 		*ret_memsize = optval;
 		error = 0;
 	} else
 		error = expand_number(optarg, ret_memsize);
 
 	return (error);
 }
 
 uint32_t
 vm_get_lowmem_limit(struct vmctx *ctx)
 {
 
 	return (ctx->lowmem_limit);
 }
 
 void
 vm_set_lowmem_limit(struct vmctx *ctx, uint32_t limit)
 {
 
 	ctx->lowmem_limit = limit;
 }
 
 void
 vm_set_memflags(struct vmctx *ctx, int flags)
 {
 
 	ctx->memflags = flags;
 }
 
 int
 vm_get_memflags(struct vmctx *ctx)
 {
 
 	return (ctx->memflags);
 }
 
 /*
  * Map segment 'segid' starting at 'off' into guest address range [gpa,gpa+len).
  */
 int
 vm_mmap_memseg(struct vmctx *ctx, vm_paddr_t gpa, int segid, vm_ooffset_t off,
     size_t len, int prot)
 {
 	struct vm_memmap memmap;
 	int error, flags;
 
 	memmap.gpa = gpa;
 	memmap.segid = segid;
 	memmap.segoff = off;
 	memmap.len = len;
 	memmap.prot = prot;
 	memmap.flags = 0;
 
 	if (ctx->memflags & VM_MEM_F_WIRED)
 		memmap.flags |= VM_MEMMAP_F_WIRED;
 
 	/*
 	 * If this mapping already exists then don't create it again. This
 	 * is the common case for SYSMEM mappings created by bhyveload(8).
 	 */
 	error = vm_mmap_getnext(ctx, &gpa, &segid, &off, &len, &prot, &flags);
 	if (error == 0 && gpa == memmap.gpa) {
 		if (segid != memmap.segid || off != memmap.segoff ||
 		    prot != memmap.prot || flags != memmap.flags) {
 			errno = EEXIST;
 			return (-1);
 		} else {
 			return (0);
 		}
 	}
 
 	error = ioctl(ctx->fd, VM_MMAP_MEMSEG, &memmap);
 	return (error);
 }
 
 int
 vm_mmap_getnext(struct vmctx *ctx, vm_paddr_t *gpa, int *segid,
     vm_ooffset_t *segoff, size_t *len, int *prot, int *flags)
 {
 	struct vm_memmap memmap;
 	int error;
 
 	bzero(&memmap, sizeof(struct vm_memmap));
 	memmap.gpa = *gpa;
 	error = ioctl(ctx->fd, VM_MMAP_GETNEXT, &memmap);
 	if (error == 0) {
 		*gpa = memmap.gpa;
 		*segid = memmap.segid;
 		*segoff = memmap.segoff;
 		*len = memmap.len;
 		*prot = memmap.prot;
 		*flags = memmap.flags;
 	}
 	return (error);
 }
 
 /*
  * Return 0 if the segments are identical and non-zero otherwise.
  *
  * This is slightly complicated by the fact that only device memory segments
  * are named.
  */
 static int
 cmpseg(size_t len, const char *str, size_t len2, const char *str2)
 {
 
 	if (len == len2) {
 		if ((!str && !str2) || (str && str2 && !strcmp(str, str2)))
 			return (0);
 	}
 	return (-1);
 }
 
 static int
 vm_alloc_memseg(struct vmctx *ctx, int segid, size_t len, const char *name)
 {
 	struct vm_memseg memseg;
 	size_t n;
 	int error;
 
 	/*
 	 * If the memory segment has already been created then just return.
 	 * This is the usual case for the SYSMEM segment created by userspace
 	 * loaders like bhyveload(8).
 	 */
 	error = vm_get_memseg(ctx, segid, &memseg.len, memseg.name,
 	    sizeof(memseg.name));
 	if (error)
 		return (error);
 
 	if (memseg.len != 0) {
 		if (cmpseg(len, name, memseg.len, VM_MEMSEG_NAME(&memseg))) {
 			errno = EINVAL;
 			return (-1);
 		} else {
 			return (0);
 		}
 	}
 
 	bzero(&memseg, sizeof(struct vm_memseg));
 	memseg.segid = segid;
 	memseg.len = len;
 	if (name != NULL) {
 		n = strlcpy(memseg.name, name, sizeof(memseg.name));
 		if (n >= sizeof(memseg.name)) {
 			errno = ENAMETOOLONG;
 			return (-1);
 		}
 	}
 
 	error = ioctl(ctx->fd, VM_ALLOC_MEMSEG, &memseg);
 	return (error);
 }
 
 int
 vm_get_memseg(struct vmctx *ctx, int segid, size_t *lenp, char *namebuf,
     size_t bufsize)
 {
 	struct vm_memseg memseg;
 	size_t n;
 	int error;
 
 	memseg.segid = segid;
 	error = ioctl(ctx->fd, VM_GET_MEMSEG, &memseg);
 	if (error == 0) {
 		*lenp = memseg.len;
 		n = strlcpy(namebuf, memseg.name, bufsize);
 		if (n >= bufsize) {
 			errno = ENAMETOOLONG;
 			error = -1;
 		}
 	}
 	return (error);
 }
 
 static int
 setup_memory_segment(struct vmctx *ctx, vm_paddr_t gpa, size_t len, char *base)
 {
 	char *ptr;
 	int error, flags;
 
 	/* Map 'len' bytes starting at 'gpa' in the guest address space */
 	error = vm_mmap_memseg(ctx, gpa, VM_SYSMEM, gpa, len, PROT_ALL);
 	if (error)
 		return (error);
 
 	flags = MAP_SHARED | MAP_FIXED;
 	if ((ctx->memflags & VM_MEM_F_INCORE) == 0)
 		flags |= MAP_NOCORE;
 
 	/* mmap into the process address space on the host */
 	ptr = mmap(base + gpa, len, PROT_RW, flags, ctx->fd, gpa);
 	if (ptr == MAP_FAILED)
 		return (-1);
 
 	return (0);
 }
 
 int
 vm_setup_memory(struct vmctx *ctx, size_t memsize, enum vm_mmap_style vms)
 {
 	size_t objsize, len;
 	vm_paddr_t gpa;
 	char *baseaddr, *ptr;
 	int error, flags;
 
 	assert(vms == VM_MMAP_ALL);
 
 	/*
 	 * If 'memsize' cannot fit entirely in the 'lowmem' segment then
 	 * create another 'highmem' segment above 4GB for the remainder.
 	 */
 	if (memsize > ctx->lowmem_limit) {
 		ctx->lowmem = ctx->lowmem_limit;
 		ctx->highmem = memsize - ctx->lowmem_limit;
 		objsize = 4*GB + ctx->highmem;
 	} else {
 		ctx->lowmem = memsize;
 		ctx->highmem = 0;
 		objsize = ctx->lowmem;
 	}
 
 	error = vm_alloc_memseg(ctx, VM_SYSMEM, objsize, NULL);
 	if (error)
 		return (error);
 
 	/*
 	 * Stake out a contiguous region covering the guest physical memory
 	 * and the adjoining guard regions.
 	 */
 	len = VM_MMAP_GUARD_SIZE + objsize + VM_MMAP_GUARD_SIZE;
 	flags = MAP_PRIVATE | MAP_ANON | MAP_NOCORE | MAP_ALIGNED_SUPER;
 	ptr = mmap(NULL, len, PROT_NONE, flags, -1, 0);
 	if (ptr == MAP_FAILED)
 		return (-1);
 
 	baseaddr = ptr + VM_MMAP_GUARD_SIZE;
 	if (ctx->highmem > 0) {
 		gpa = 4*GB;
 		len = ctx->highmem;
 		error = setup_memory_segment(ctx, gpa, len, baseaddr);
 		if (error)
 			return (error);
 	}
 
 	if (ctx->lowmem > 0) {
 		gpa = 0;
 		len = ctx->lowmem;
 		error = setup_memory_segment(ctx, gpa, len, baseaddr);
 		if (error)
 			return (error);
 	}
 
 	ctx->baseaddr = baseaddr;
 
 	return (0);
 }
 
 /*
  * Returns a non-NULL pointer if [gaddr, gaddr+len) is entirely contained in
  * the lowmem or highmem regions.
  *
  * In particular return NULL if [gaddr, gaddr+len) falls in guest MMIO region.
  * The instruction emulation code depends on this behavior.
  */
 void *
 vm_map_gpa(struct vmctx *ctx, vm_paddr_t gaddr, size_t len)
 {
 
 	if (ctx->lowmem > 0) {
 		if (gaddr < ctx->lowmem && len <= ctx->lowmem &&
 		    gaddr + len <= ctx->lowmem)
 			return (ctx->baseaddr + gaddr);
 	}
 
 	if (ctx->highmem > 0) {
                 if (gaddr >= 4*GB) {
 			if (gaddr < 4*GB + ctx->highmem &&
 			    len <= ctx->highmem &&
 			    gaddr + len <= 4*GB + ctx->highmem)
 				return (ctx->baseaddr + gaddr);
 		}
 	}
 
 	return (NULL);
 }
 
 size_t
 vm_get_lowmem_size(struct vmctx *ctx)
 {
 
 	return (ctx->lowmem);
 }
 
 size_t
 vm_get_highmem_size(struct vmctx *ctx)
 {
 
 	return (ctx->highmem);
 }
 
 void *
 vm_create_devmem(struct vmctx *ctx, int segid, const char *name, size_t len)
 {
 	char pathname[MAXPATHLEN];
 	size_t len2;
 	char *base, *ptr;
 	int fd, error, flags;
 
 	fd = -1;
 	ptr = MAP_FAILED;
 	if (name == NULL || strlen(name) == 0) {
 		errno = EINVAL;
 		goto done;
 	}
 
 	error = vm_alloc_memseg(ctx, segid, len, name);
 	if (error)
 		goto done;
 
 	strlcpy(pathname, "/dev/vmm.io/", sizeof(pathname));
 	strlcat(pathname, ctx->name, sizeof(pathname));
 	strlcat(pathname, ".", sizeof(pathname));
 	strlcat(pathname, name, sizeof(pathname));
 
 	fd = open(pathname, O_RDWR);
 	if (fd < 0)
 		goto done;
 
 	/*
 	 * Stake out a contiguous region covering the device memory and the
 	 * adjoining guard regions.
 	 */
 	len2 = VM_MMAP_GUARD_SIZE + len + VM_MMAP_GUARD_SIZE;
 	flags = MAP_PRIVATE | MAP_ANON | MAP_NOCORE | MAP_ALIGNED_SUPER;
 	base = mmap(NULL, len2, PROT_NONE, flags, -1, 0);
 	if (base == MAP_FAILED)
 		goto done;
 
 	flags = MAP_SHARED | MAP_FIXED;
 	if ((ctx->memflags & VM_MEM_F_INCORE) == 0)
 		flags |= MAP_NOCORE;
 
 	/* mmap the devmem region in the host address space */
 	ptr = mmap(base + VM_MMAP_GUARD_SIZE, len, PROT_RW, flags, fd, 0);
 done:
 	if (fd >= 0)
 		close(fd);
 	return (ptr);
 }
 
 int
 vm_set_desc(struct vmctx *ctx, int vcpu, int reg,
 	    uint64_t base, uint32_t limit, uint32_t access)
 {
 	int error;
 	struct vm_seg_desc vmsegdesc;
 
 	bzero(&vmsegdesc, sizeof(vmsegdesc));
 	vmsegdesc.cpuid = vcpu;
 	vmsegdesc.regnum = reg;
 	vmsegdesc.desc.base = base;
 	vmsegdesc.desc.limit = limit;
 	vmsegdesc.desc.access = access;
 
 	error = ioctl(ctx->fd, VM_SET_SEGMENT_DESCRIPTOR, &vmsegdesc);
 	return (error);
 }
 
 int
 vm_get_desc(struct vmctx *ctx, int vcpu, int reg,
 	    uint64_t *base, uint32_t *limit, uint32_t *access)
 {
 	int error;
 	struct vm_seg_desc vmsegdesc;
 
 	bzero(&vmsegdesc, sizeof(vmsegdesc));
 	vmsegdesc.cpuid = vcpu;
 	vmsegdesc.regnum = reg;
 
 	error = ioctl(ctx->fd, VM_GET_SEGMENT_DESCRIPTOR, &vmsegdesc);
 	if (error == 0) {
 		*base = vmsegdesc.desc.base;
 		*limit = vmsegdesc.desc.limit;
 		*access = vmsegdesc.desc.access;
 	}
 	return (error);
 }
 
 int
 vm_get_seg_desc(struct vmctx *ctx, int vcpu, int reg, struct seg_desc *seg_desc)
 {
 	int error;
 
 	error = vm_get_desc(ctx, vcpu, reg, &seg_desc->base, &seg_desc->limit,
 	    &seg_desc->access);
 	return (error);
 }
 
 int
 vm_set_register(struct vmctx *ctx, int vcpu, int reg, uint64_t val)
 {
 	int error;
 	struct vm_register vmreg;
 
 	bzero(&vmreg, sizeof(vmreg));
 	vmreg.cpuid = vcpu;
 	vmreg.regnum = reg;
 	vmreg.regval = val;
 
 	error = ioctl(ctx->fd, VM_SET_REGISTER, &vmreg);
 	return (error);
 }
 
 int
 vm_get_register(struct vmctx *ctx, int vcpu, int reg, uint64_t *ret_val)
 {
 	int error;
 	struct vm_register vmreg;
 
 	bzero(&vmreg, sizeof(vmreg));
 	vmreg.cpuid = vcpu;
 	vmreg.regnum = reg;
 
 	error = ioctl(ctx->fd, VM_GET_REGISTER, &vmreg);
 	*ret_val = vmreg.regval;
 	return (error);
 }
 
 int
 vm_set_register_set(struct vmctx *ctx, int vcpu, unsigned int count,
     const int *regnums, uint64_t *regvals)
 {
 	int error;
 	struct vm_register_set vmregset;
 
 	bzero(&vmregset, sizeof(vmregset));
 	vmregset.cpuid = vcpu;
 	vmregset.count = count;
 	vmregset.regnums = regnums;
 	vmregset.regvals = regvals;
 
 	error = ioctl(ctx->fd, VM_SET_REGISTER_SET, &vmregset);
 	return (error);
 }
 
 int
 vm_get_register_set(struct vmctx *ctx, int vcpu, unsigned int count,
     const int *regnums, uint64_t *regvals)
 {
 	int error;
 	struct vm_register_set vmregset;
 
 	bzero(&vmregset, sizeof(vmregset));
 	vmregset.cpuid = vcpu;
 	vmregset.count = count;
 	vmregset.regnums = regnums;
 	vmregset.regvals = regvals;
 
 	error = ioctl(ctx->fd, VM_GET_REGISTER_SET, &vmregset);
 	return (error);
 }
 
 int
 vm_run(struct vmctx *ctx, int vcpu, struct vm_exit *vmexit)
 {
 	int error;
 	struct vm_run vmrun;
 
 	bzero(&vmrun, sizeof(vmrun));
 	vmrun.cpuid = vcpu;
 
 	error = ioctl(ctx->fd, VM_RUN, &vmrun);
 	bcopy(&vmrun.vm_exit, vmexit, sizeof(struct vm_exit));
 	return (error);
 }
 
 int
 vm_suspend(struct vmctx *ctx, enum vm_suspend_how how)
 {
 	struct vm_suspend vmsuspend;
 
 	bzero(&vmsuspend, sizeof(vmsuspend));
 	vmsuspend.how = how;
 	return (ioctl(ctx->fd, VM_SUSPEND, &vmsuspend));
 }
 
 int
 vm_reinit(struct vmctx *ctx)
 {
 
 	return (ioctl(ctx->fd, VM_REINIT, 0));
 }
 
 int
 vm_inject_exception(struct vmctx *ctx, int vcpu, int vector, int errcode_valid,
     uint32_t errcode, int restart_instruction)
 {
 	struct vm_exception exc;
 
 	exc.cpuid = vcpu;
 	exc.vector = vector;
 	exc.error_code = errcode;
 	exc.error_code_valid = errcode_valid;
 	exc.restart_instruction = restart_instruction;
 
 	return (ioctl(ctx->fd, VM_INJECT_EXCEPTION, &exc));
 }
 
 int
 vm_apicid2vcpu(struct vmctx *ctx, int apicid)
 {
 	/*
 	 * The apic id associated with the 'vcpu' has the same numerical value
 	 * as the 'vcpu' itself.
 	 */
 	return (apicid);
 }
 
 int
 vm_lapic_irq(struct vmctx *ctx, int vcpu, int vector)
 {
 	struct vm_lapic_irq vmirq;
 
 	bzero(&vmirq, sizeof(vmirq));
 	vmirq.cpuid = vcpu;
 	vmirq.vector = vector;
 
 	return (ioctl(ctx->fd, VM_LAPIC_IRQ, &vmirq));
 }
 
 int
 vm_lapic_local_irq(struct vmctx *ctx, int vcpu, int vector)
 {
 	struct vm_lapic_irq vmirq;
 
 	bzero(&vmirq, sizeof(vmirq));
 	vmirq.cpuid = vcpu;
 	vmirq.vector = vector;
 
 	return (ioctl(ctx->fd, VM_LAPIC_LOCAL_IRQ, &vmirq));
 }
 
 int
 vm_lapic_msi(struct vmctx *ctx, uint64_t addr, uint64_t msg)
 {
 	struct vm_lapic_msi vmmsi;
 
 	bzero(&vmmsi, sizeof(vmmsi));
 	vmmsi.addr = addr;
 	vmmsi.msg = msg;
 
 	return (ioctl(ctx->fd, VM_LAPIC_MSI, &vmmsi));
 }
 
 int
 vm_ioapic_assert_irq(struct vmctx *ctx, int irq)
 {
 	struct vm_ioapic_irq ioapic_irq;
 
 	bzero(&ioapic_irq, sizeof(struct vm_ioapic_irq));
 	ioapic_irq.irq = irq;
 
 	return (ioctl(ctx->fd, VM_IOAPIC_ASSERT_IRQ, &ioapic_irq));
 }
 
 int
 vm_ioapic_deassert_irq(struct vmctx *ctx, int irq)
 {
 	struct vm_ioapic_irq ioapic_irq;
 
 	bzero(&ioapic_irq, sizeof(struct vm_ioapic_irq));
 	ioapic_irq.irq = irq;
 
 	return (ioctl(ctx->fd, VM_IOAPIC_DEASSERT_IRQ, &ioapic_irq));
 }
 
 int
 vm_ioapic_pulse_irq(struct vmctx *ctx, int irq)
 {
 	struct vm_ioapic_irq ioapic_irq;
 
 	bzero(&ioapic_irq, sizeof(struct vm_ioapic_irq));
 	ioapic_irq.irq = irq;
 
 	return (ioctl(ctx->fd, VM_IOAPIC_PULSE_IRQ, &ioapic_irq));
 }
 
 int
 vm_ioapic_pincount(struct vmctx *ctx, int *pincount)
 {
 
 	return (ioctl(ctx->fd, VM_IOAPIC_PINCOUNT, pincount));
 }
 
 int
 vm_isa_assert_irq(struct vmctx *ctx, int atpic_irq, int ioapic_irq)
 {
 	struct vm_isa_irq isa_irq;
 
 	bzero(&isa_irq, sizeof(struct vm_isa_irq));
 	isa_irq.atpic_irq = atpic_irq;
 	isa_irq.ioapic_irq = ioapic_irq;
 
 	return (ioctl(ctx->fd, VM_ISA_ASSERT_IRQ, &isa_irq));
 }
 
 int
 vm_isa_deassert_irq(struct vmctx *ctx, int atpic_irq, int ioapic_irq)
 {
 	struct vm_isa_irq isa_irq;
 
 	bzero(&isa_irq, sizeof(struct vm_isa_irq));
 	isa_irq.atpic_irq = atpic_irq;
 	isa_irq.ioapic_irq = ioapic_irq;
 
 	return (ioctl(ctx->fd, VM_ISA_DEASSERT_IRQ, &isa_irq));
 }
 
 int
 vm_isa_pulse_irq(struct vmctx *ctx, int atpic_irq, int ioapic_irq)
 {
 	struct vm_isa_irq isa_irq;
 
 	bzero(&isa_irq, sizeof(struct vm_isa_irq));
 	isa_irq.atpic_irq = atpic_irq;
 	isa_irq.ioapic_irq = ioapic_irq;
 
 	return (ioctl(ctx->fd, VM_ISA_PULSE_IRQ, &isa_irq));
 }
 
 int
 vm_isa_set_irq_trigger(struct vmctx *ctx, int atpic_irq,
     enum vm_intr_trigger trigger)
 {
 	struct vm_isa_irq_trigger isa_irq_trigger;
 
 	bzero(&isa_irq_trigger, sizeof(struct vm_isa_irq_trigger));
 	isa_irq_trigger.atpic_irq = atpic_irq;
 	isa_irq_trigger.trigger = trigger;
 
 	return (ioctl(ctx->fd, VM_ISA_SET_IRQ_TRIGGER, &isa_irq_trigger));
 }
 
 int
 vm_inject_nmi(struct vmctx *ctx, int vcpu)
 {
 	struct vm_nmi vmnmi;
 
 	bzero(&vmnmi, sizeof(vmnmi));
 	vmnmi.cpuid = vcpu;
 
 	return (ioctl(ctx->fd, VM_INJECT_NMI, &vmnmi));
 }
 
 static struct {
 	const char	*name;
 	int		type;
 } capstrmap[] = {
 	{ "hlt_exit",		VM_CAP_HALT_EXIT },
 	{ "mtrap_exit",		VM_CAP_MTRAP_EXIT },
 	{ "pause_exit",		VM_CAP_PAUSE_EXIT },
 	{ "unrestricted_guest",	VM_CAP_UNRESTRICTED_GUEST },
 	{ "enable_invpcid",	VM_CAP_ENABLE_INVPCID },
 	{ 0 }
 };
 
 int
 vm_capability_name2type(const char *capname)
 {
 	int i;
 
 	for (i = 0; capstrmap[i].name != NULL && capname != NULL; i++) {
 		if (strcmp(capstrmap[i].name, capname) == 0)
 			return (capstrmap[i].type);
 	}
 
 	return (-1);
 }
 
 const char *
 vm_capability_type2name(int type)
 {
 	int i;
 
 	for (i = 0; capstrmap[i].name != NULL; i++) {
 		if (capstrmap[i].type == type)
 			return (capstrmap[i].name);
 	}
 
 	return (NULL);
 }
 
 int
 vm_get_capability(struct vmctx *ctx, int vcpu, enum vm_cap_type cap,
 		  int *retval)
 {
 	int error;
 	struct vm_capability vmcap;
 
 	bzero(&vmcap, sizeof(vmcap));
 	vmcap.cpuid = vcpu;
 	vmcap.captype = cap;
 
 	error = ioctl(ctx->fd, VM_GET_CAPABILITY, &vmcap);
 	*retval = vmcap.capval;
 	return (error);
 }
 
 int
 vm_set_capability(struct vmctx *ctx, int vcpu, enum vm_cap_type cap, int val)
 {
 	struct vm_capability vmcap;
 
 	bzero(&vmcap, sizeof(vmcap));
 	vmcap.cpuid = vcpu;
 	vmcap.captype = cap;
 	vmcap.capval = val;
 	
 	return (ioctl(ctx->fd, VM_SET_CAPABILITY, &vmcap));
 }
 
 int
 vm_assign_pptdev(struct vmctx *ctx, int bus, int slot, int func)
 {
 	struct vm_pptdev pptdev;
 
 	bzero(&pptdev, sizeof(pptdev));
 	pptdev.bus = bus;
 	pptdev.slot = slot;
 	pptdev.func = func;
 
 	return (ioctl(ctx->fd, VM_BIND_PPTDEV, &pptdev));
 }
 
 int
 vm_unassign_pptdev(struct vmctx *ctx, int bus, int slot, int func)
 {
 	struct vm_pptdev pptdev;
 
 	bzero(&pptdev, sizeof(pptdev));
 	pptdev.bus = bus;
 	pptdev.slot = slot;
 	pptdev.func = func;
 
 	return (ioctl(ctx->fd, VM_UNBIND_PPTDEV, &pptdev));
 }
 
 int
 vm_map_pptdev_mmio(struct vmctx *ctx, int bus, int slot, int func,
 		   vm_paddr_t gpa, size_t len, vm_paddr_t hpa)
 {
 	struct vm_pptdev_mmio pptmmio;
 
 	bzero(&pptmmio, sizeof(pptmmio));
 	pptmmio.bus = bus;
 	pptmmio.slot = slot;
 	pptmmio.func = func;
 	pptmmio.gpa = gpa;
 	pptmmio.len = len;
 	pptmmio.hpa = hpa;
 
 	return (ioctl(ctx->fd, VM_MAP_PPTDEV_MMIO, &pptmmio));
 }
 
 int
 vm_setup_pptdev_msi(struct vmctx *ctx, int vcpu, int bus, int slot, int func,
     uint64_t addr, uint64_t msg, int numvec)
 {
 	struct vm_pptdev_msi pptmsi;
 
 	bzero(&pptmsi, sizeof(pptmsi));
 	pptmsi.vcpu = vcpu;
 	pptmsi.bus = bus;
 	pptmsi.slot = slot;
 	pptmsi.func = func;
 	pptmsi.msg = msg;
 	pptmsi.addr = addr;
 	pptmsi.numvec = numvec;
 
 	return (ioctl(ctx->fd, VM_PPTDEV_MSI, &pptmsi));
 }
 
 int	
 vm_setup_pptdev_msix(struct vmctx *ctx, int vcpu, int bus, int slot, int func,
     int idx, uint64_t addr, uint64_t msg, uint32_t vector_control)
 {
 	struct vm_pptdev_msix pptmsix;
 
 	bzero(&pptmsix, sizeof(pptmsix));
 	pptmsix.vcpu = vcpu;
 	pptmsix.bus = bus;
 	pptmsix.slot = slot;
 	pptmsix.func = func;
 	pptmsix.idx = idx;
 	pptmsix.msg = msg;
 	pptmsix.addr = addr;
 	pptmsix.vector_control = vector_control;
 
 	return ioctl(ctx->fd, VM_PPTDEV_MSIX, &pptmsix);
 }
 
 uint64_t *
 vm_get_stats(struct vmctx *ctx, int vcpu, struct timeval *ret_tv,
 	     int *ret_entries)
 {
 	int error;
 
 	static struct vm_stats vmstats;
 
 	vmstats.cpuid = vcpu;
 
 	error = ioctl(ctx->fd, VM_STATS, &vmstats);
 	if (error == 0) {
 		if (ret_entries)
 			*ret_entries = vmstats.num_entries;
 		if (ret_tv)
 			*ret_tv = vmstats.tv;
 		return (vmstats.statbuf);
 	} else
 		return (NULL);
 }
 
 const char *
 vm_get_stat_desc(struct vmctx *ctx, int index)
 {
 	static struct vm_stat_desc statdesc;
 
 	statdesc.index = index;
 	if (ioctl(ctx->fd, VM_STAT_DESC, &statdesc) == 0)
 		return (statdesc.desc);
 	else
 		return (NULL);
 }
 
 int
 vm_get_x2apic_state(struct vmctx *ctx, int vcpu, enum x2apic_state *state)
 {
 	int error;
 	struct vm_x2apic x2apic;
 
 	bzero(&x2apic, sizeof(x2apic));
 	x2apic.cpuid = vcpu;
 
 	error = ioctl(ctx->fd, VM_GET_X2APIC_STATE, &x2apic);
 	*state = x2apic.state;
 	return (error);
 }
 
 int
 vm_set_x2apic_state(struct vmctx *ctx, int vcpu, enum x2apic_state state)
 {
 	int error;
 	struct vm_x2apic x2apic;
 
 	bzero(&x2apic, sizeof(x2apic));
 	x2apic.cpuid = vcpu;
 	x2apic.state = state;
 
 	error = ioctl(ctx->fd, VM_SET_X2APIC_STATE, &x2apic);
 
 	return (error);
 }
 
 /*
  * From Intel Vol 3a:
  * Table 9-1. IA-32 Processor States Following Power-up, Reset or INIT
  */
 int
 vcpu_reset(struct vmctx *vmctx, int vcpu)
 {
 	int error;
 	uint64_t rflags, rip, cr0, cr4, zero, desc_base, rdx;
 	uint32_t desc_access, desc_limit;
 	uint16_t sel;
 
 	zero = 0;
 
 	rflags = 0x2;
 	error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RFLAGS, rflags);
 	if (error)
 		goto done;
 
 	rip = 0xfff0;
 	if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RIP, rip)) != 0)
 		goto done;
 
 	cr0 = CR0_NE;
 	if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_CR0, cr0)) != 0)
 		goto done;
 
 	if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_CR3, zero)) != 0)
 		goto done;
 	
 	cr4 = 0;
 	if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_CR4, cr4)) != 0)
 		goto done;
 
 	/*
 	 * CS: present, r/w, accessed, 16-bit, byte granularity, usable
 	 */
 	desc_base = 0xffff0000;
 	desc_limit = 0xffff;
 	desc_access = 0x0093;
 	error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_CS,
 			    desc_base, desc_limit, desc_access);
 	if (error)
 		goto done;
 
 	sel = 0xf000;
 	if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_CS, sel)) != 0)
 		goto done;
 
 	/*
 	 * SS,DS,ES,FS,GS: present, r/w, accessed, 16-bit, byte granularity
 	 */
 	desc_base = 0;
 	desc_limit = 0xffff;
 	desc_access = 0x0093;
 	error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_SS,
 			    desc_base, desc_limit, desc_access);
 	if (error)
 		goto done;
 
 	error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_DS,
 			    desc_base, desc_limit, desc_access);
 	if (error)
 		goto done;
 
 	error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_ES,
 			    desc_base, desc_limit, desc_access);
 	if (error)
 		goto done;
 
 	error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_FS,
 			    desc_base, desc_limit, desc_access);
 	if (error)
 		goto done;
 
 	error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_GS,
 			    desc_base, desc_limit, desc_access);
 	if (error)
 		goto done;
 
 	sel = 0;
 	if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_SS, sel)) != 0)
 		goto done;
 	if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_DS, sel)) != 0)
 		goto done;
 	if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_ES, sel)) != 0)
 		goto done;
 	if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_FS, sel)) != 0)
 		goto done;
 	if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_GS, sel)) != 0)
 		goto done;
 
 	/* General purpose registers */
 	rdx = 0xf00;
 	if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RAX, zero)) != 0)
 		goto done;
 	if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RBX, zero)) != 0)
 		goto done;
 	if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RCX, zero)) != 0)
 		goto done;
 	if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RDX, rdx)) != 0)
 		goto done;
 	if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RSI, zero)) != 0)
 		goto done;
 	if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RDI, zero)) != 0)
 		goto done;
 	if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RBP, zero)) != 0)
 		goto done;
 	if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RSP, zero)) != 0)
 		goto done;
 
 	/* GDTR, IDTR */
 	desc_base = 0;
 	desc_limit = 0xffff;
 	desc_access = 0;
 	error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_GDTR,
 			    desc_base, desc_limit, desc_access);
 	if (error != 0)
 		goto done;
 
 	error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_IDTR,
 			    desc_base, desc_limit, desc_access);
 	if (error != 0)
 		goto done;
 
 	/* TR */
 	desc_base = 0;
 	desc_limit = 0xffff;
 	desc_access = 0x0000008b;
 	error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_TR, 0, 0, desc_access);
 	if (error)
 		goto done;
 
 	sel = 0;
 	if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_TR, sel)) != 0)
 		goto done;
 
 	/* LDTR */
 	desc_base = 0;
 	desc_limit = 0xffff;
 	desc_access = 0x00000082;
 	error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_LDTR, desc_base,
 			    desc_limit, desc_access);
 	if (error)
 		goto done;
 
 	sel = 0;
 	if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_LDTR, 0)) != 0)
 		goto done;
 
 	/* XXX cr2, debug registers */
 
 	error = 0;
 done:
 	return (error);
 }
 
 int
 vm_get_gpa_pmap(struct vmctx *ctx, uint64_t gpa, uint64_t *pte, int *num)
 {
 	int error, i;
 	struct vm_gpa_pte gpapte;
 
 	bzero(&gpapte, sizeof(gpapte));
 	gpapte.gpa = gpa;
 
 	error = ioctl(ctx->fd, VM_GET_GPA_PMAP, &gpapte);
 
 	if (error == 0) {
 		*num = gpapte.ptenum;
 		for (i = 0; i < gpapte.ptenum; i++)
 			pte[i] = gpapte.pte[i];
 	}
 
 	return (error);
 }
 
 int
 vm_get_hpet_capabilities(struct vmctx *ctx, uint32_t *capabilities)
 {
 	int error;
 	struct vm_hpet_cap cap;
 
 	bzero(&cap, sizeof(struct vm_hpet_cap));
 	error = ioctl(ctx->fd, VM_GET_HPET_CAPABILITIES, &cap);
 	if (capabilities != NULL)
 		*capabilities = cap.capabilities;
 	return (error);
 }
 
 int
 vm_gla2gpa(struct vmctx *ctx, int vcpu, struct vm_guest_paging *paging,
     uint64_t gla, int prot, uint64_t *gpa, int *fault)
 {
 	struct vm_gla2gpa gg;
 	int error;
 
 	bzero(&gg, sizeof(struct vm_gla2gpa));
 	gg.vcpuid = vcpu;
 	gg.prot = prot;
 	gg.gla = gla;
 	gg.paging = *paging;
 
 	error = ioctl(ctx->fd, VM_GLA2GPA, &gg);
 	if (error == 0) {
 		*fault = gg.fault;
 		*gpa = gg.gpa;
 	}
 	return (error);
 }
 
 int
 vm_gla2gpa_nofault(struct vmctx *ctx, int vcpu, struct vm_guest_paging *paging,
     uint64_t gla, int prot, uint64_t *gpa, int *fault)
 {
 	struct vm_gla2gpa gg;
 	int error;
 
 	bzero(&gg, sizeof(struct vm_gla2gpa));
 	gg.vcpuid = vcpu;
 	gg.prot = prot;
 	gg.gla = gla;
 	gg.paging = *paging;
 
 	error = ioctl(ctx->fd, VM_GLA2GPA_NOFAULT, &gg);
 	if (error == 0) {
 		*fault = gg.fault;
 		*gpa = gg.gpa;
 	}
 	return (error);
 }
 
 #ifndef min
 #define	min(a,b)	(((a) < (b)) ? (a) : (b))
 #endif
 
 int
 vm_copy_setup(struct vmctx *ctx, int vcpu, struct vm_guest_paging *paging,
     uint64_t gla, size_t len, int prot, struct iovec *iov, int iovcnt,
     int *fault)
 {
 	void *va;
 	uint64_t gpa;
 	int error, i, n, off;
 
 	for (i = 0; i < iovcnt; i++) {
 		iov[i].iov_base = 0;
 		iov[i].iov_len = 0;
 	}
 
 	while (len) {
 		assert(iovcnt > 0);
 		error = vm_gla2gpa(ctx, vcpu, paging, gla, prot, &gpa, fault);
 		if (error || *fault)
 			return (error);
 
 		off = gpa & PAGE_MASK;
 		n = min(len, PAGE_SIZE - off);
 
 		va = vm_map_gpa(ctx, gpa, n);
 		if (va == NULL)
 			return (EFAULT);
 
 		iov->iov_base = va;
 		iov->iov_len = n;
 		iov++;
 		iovcnt--;
 
 		gla += n;
 		len -= n;
 	}
 	return (0);
 }
 
 void
 vm_copy_teardown(struct vmctx *ctx, int vcpu, struct iovec *iov, int iovcnt)
 {
 
 	return;
 }
 
 void
 vm_copyin(struct vmctx *ctx, int vcpu, struct iovec *iov, void *vp, size_t len)
 {
 	const char *src;
 	char *dst;
 	size_t n;
 
 	dst = vp;
 	while (len) {
 		assert(iov->iov_len);
 		n = min(len, iov->iov_len);
 		src = iov->iov_base;
 		bcopy(src, dst, n);
 
 		iov++;
 		dst += n;
 		len -= n;
 	}
 }
 
 void
 vm_copyout(struct vmctx *ctx, int vcpu, const void *vp, struct iovec *iov,
     size_t len)
 {
 	const char *src;
 	char *dst;
 	size_t n;
 
 	src = vp;
 	while (len) {
 		assert(iov->iov_len);
 		n = min(len, iov->iov_len);
 		dst = iov->iov_base;
 		bcopy(src, dst, n);
 
 		iov++;
 		src += n;
 		len -= n;
 	}
 }
 
 static int
 vm_get_cpus(struct vmctx *ctx, int which, cpuset_t *cpus)
 {
 	struct vm_cpuset vm_cpuset;
 	int error;
 
 	bzero(&vm_cpuset, sizeof(struct vm_cpuset));
 	vm_cpuset.which = which;
 	vm_cpuset.cpusetsize = sizeof(cpuset_t);
 	vm_cpuset.cpus = cpus;
 
 	error = ioctl(ctx->fd, VM_GET_CPUS, &vm_cpuset);
 	return (error);
 }
 
 int
 vm_active_cpus(struct vmctx *ctx, cpuset_t *cpus)
 {
 
 	return (vm_get_cpus(ctx, VM_ACTIVE_CPUS, cpus));
 }
 
 int
 vm_suspended_cpus(struct vmctx *ctx, cpuset_t *cpus)
 {
 
 	return (vm_get_cpus(ctx, VM_SUSPENDED_CPUS, cpus));
 }
 
 int
 vm_debug_cpus(struct vmctx *ctx, cpuset_t *cpus)
 {
 
 	return (vm_get_cpus(ctx, VM_DEBUG_CPUS, cpus));
 }
 
 int
 vm_activate_cpu(struct vmctx *ctx, int vcpu)
 {
 	struct vm_activate_cpu ac;
 	int error;
 
 	bzero(&ac, sizeof(struct vm_activate_cpu));
 	ac.vcpuid = vcpu;
 	error = ioctl(ctx->fd, VM_ACTIVATE_CPU, &ac);
 	return (error);
 }
 
 int
 vm_suspend_cpu(struct vmctx *ctx, int vcpu)
 {
 	struct vm_activate_cpu ac;
 	int error;
 
 	bzero(&ac, sizeof(struct vm_activate_cpu));
 	ac.vcpuid = vcpu;
 	error = ioctl(ctx->fd, VM_SUSPEND_CPU, &ac);
 	return (error);
 }
 
 int
 vm_resume_cpu(struct vmctx *ctx, int vcpu)
 {
 	struct vm_activate_cpu ac;
 	int error;
 
 	bzero(&ac, sizeof(struct vm_activate_cpu));
 	ac.vcpuid = vcpu;
 	error = ioctl(ctx->fd, VM_RESUME_CPU, &ac);
 	return (error);
 }
 
 int
 vm_get_intinfo(struct vmctx *ctx, int vcpu, uint64_t *info1, uint64_t *info2)
 {
 	struct vm_intinfo vmii;
 	int error;
 
 	bzero(&vmii, sizeof(struct vm_intinfo));
 	vmii.vcpuid = vcpu;
 	error = ioctl(ctx->fd, VM_GET_INTINFO, &vmii);
 	if (error == 0) {
 		*info1 = vmii.info1;
 		*info2 = vmii.info2;
 	}
 	return (error);
 }
 
 int
 vm_set_intinfo(struct vmctx *ctx, int vcpu, uint64_t info1)
 {
 	struct vm_intinfo vmii;
 	int error;
 
 	bzero(&vmii, sizeof(struct vm_intinfo));
 	vmii.vcpuid = vcpu;
 	vmii.info1 = info1;
 	error = ioctl(ctx->fd, VM_SET_INTINFO, &vmii);
 	return (error);
 }
 
 int
 vm_rtc_write(struct vmctx *ctx, int offset, uint8_t value)
 {
 	struct vm_rtc_data rtcdata;
 	int error;
 
 	bzero(&rtcdata, sizeof(struct vm_rtc_data));
 	rtcdata.offset = offset;
 	rtcdata.value = value;
 	error = ioctl(ctx->fd, VM_RTC_WRITE, &rtcdata);
 	return (error);
 }
 
 int
 vm_rtc_read(struct vmctx *ctx, int offset, uint8_t *retval)
 {
 	struct vm_rtc_data rtcdata;
 	int error;
 
 	bzero(&rtcdata, sizeof(struct vm_rtc_data));
 	rtcdata.offset = offset;
 	error = ioctl(ctx->fd, VM_RTC_READ, &rtcdata);
 	if (error == 0)
 		*retval = rtcdata.value;
 	return (error);
 }
 
 int
 vm_rtc_settime(struct vmctx *ctx, time_t secs)
 {
 	struct vm_rtc_time rtctime;
 	int error;
 
 	bzero(&rtctime, sizeof(struct vm_rtc_time));
 	rtctime.secs = secs;
 	error = ioctl(ctx->fd, VM_RTC_SETTIME, &rtctime);
 	return (error);
 }
 
 int
 vm_rtc_gettime(struct vmctx *ctx, time_t *secs)
 {
 	struct vm_rtc_time rtctime;
 	int error;
 
 	bzero(&rtctime, sizeof(struct vm_rtc_time));
 	error = ioctl(ctx->fd, VM_RTC_GETTIME, &rtctime);
 	if (error == 0)
 		*secs = rtctime.secs;
 	return (error);
 }
 
 int
 vm_restart_instruction(void *arg, int vcpu)
 {
 	struct vmctx *ctx = arg;
 
 	return (ioctl(ctx->fd, VM_RESTART_INSTRUCTION, &vcpu));
 }
 
 int
+vm_set_topology(struct vmctx *ctx,
+    uint16_t sockets, uint16_t cores, uint16_t threads, uint16_t maxcpus)
+{
+	struct vm_cpu_topology topology;
+
+	bzero(&topology, sizeof (struct vm_cpu_topology));
+	topology.sockets = sockets;
+	topology.cores = cores;
+	topology.threads = threads;
+	topology.maxcpus = maxcpus;
+	return (ioctl(ctx->fd, VM_SET_TOPOLOGY, &topology));
+}
+
+int
+vm_get_topology(struct vmctx *ctx,
+    uint16_t *sockets, uint16_t *cores, uint16_t *threads, uint16_t *maxcpus)
+{
+	struct vm_cpu_topology topology;
+	int error;
+
+	bzero(&topology, sizeof (struct vm_cpu_topology));
+	error = ioctl(ctx->fd, VM_GET_TOPOLOGY, &topology);
+	if (error == 0) {
+		*sockets = topology.sockets;
+		*cores = topology.cores;
+		*threads = topology.threads;
+		*maxcpus = topology.maxcpus;
+	}
+	return (error);
+}
+
+int
 vm_get_device_fd(struct vmctx *ctx)
 {
 
 	return (ctx->fd);
 }
 
 const cap_ioctl_t *
 vm_get_ioctls(size_t *len)
 {
 	cap_ioctl_t *cmds;
 	/* keep in sync with machine/vmm_dev.h */
 	static const cap_ioctl_t vm_ioctl_cmds[] = { VM_RUN, VM_SUSPEND, VM_REINIT,
 	    VM_ALLOC_MEMSEG, VM_GET_MEMSEG, VM_MMAP_MEMSEG, VM_MMAP_MEMSEG,
 	    VM_MMAP_GETNEXT, VM_SET_REGISTER, VM_GET_REGISTER,
 	    VM_SET_SEGMENT_DESCRIPTOR, VM_GET_SEGMENT_DESCRIPTOR,
 	    VM_SET_REGISTER_SET, VM_GET_REGISTER_SET,
 	    VM_INJECT_EXCEPTION, VM_LAPIC_IRQ, VM_LAPIC_LOCAL_IRQ,
 	    VM_LAPIC_MSI, VM_IOAPIC_ASSERT_IRQ, VM_IOAPIC_DEASSERT_IRQ,
 	    VM_IOAPIC_PULSE_IRQ, VM_IOAPIC_PINCOUNT, VM_ISA_ASSERT_IRQ,
 	    VM_ISA_DEASSERT_IRQ, VM_ISA_PULSE_IRQ, VM_ISA_SET_IRQ_TRIGGER,
 	    VM_SET_CAPABILITY, VM_GET_CAPABILITY, VM_BIND_PPTDEV,
 	    VM_UNBIND_PPTDEV, VM_MAP_PPTDEV_MMIO, VM_PPTDEV_MSI,
 	    VM_PPTDEV_MSIX, VM_INJECT_NMI, VM_STATS, VM_STAT_DESC,
 	    VM_SET_X2APIC_STATE, VM_GET_X2APIC_STATE,
 	    VM_GET_HPET_CAPABILITIES, VM_GET_GPA_PMAP, VM_GLA2GPA,
 	    VM_GLA2GPA_NOFAULT,
 	    VM_ACTIVATE_CPU, VM_GET_CPUS, VM_SUSPEND_CPU, VM_RESUME_CPU,
 	    VM_SET_INTINFO, VM_GET_INTINFO,
 	    VM_RTC_WRITE, VM_RTC_READ, VM_RTC_SETTIME, VM_RTC_GETTIME,
-	    VM_RESTART_INSTRUCTION };
+	    VM_RESTART_INSTRUCTION, VM_SET_TOPOLOGY, VM_GET_TOPOLOGY };
 
 	if (len == NULL) {
 		cmds = malloc(sizeof(vm_ioctl_cmds));
 		if (cmds == NULL)
 			return (NULL);
 		bcopy(vm_ioctl_cmds, cmds, sizeof(vm_ioctl_cmds));
 		return (cmds);
 	}
 
 	*len = nitems(vm_ioctl_cmds);
 	return (NULL);
 }
 
Index: head/lib/libvmmapi/vmmapi.h
===================================================================
--- head/lib/libvmmapi/vmmapi.h	(revision 332297)
+++ head/lib/libvmmapi/vmmapi.h	(revision 332298)
@@ -1,234 +1,240 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2011 NetApp, Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef _VMMAPI_H_
 #define	_VMMAPI_H_
 
 #include <sys/param.h>
 #include <sys/cpuset.h>
 
 /*
  * API version for out-of-tree consumers like grub-bhyve for making compile
  * time decisions.
  */
 #define	VMMAPI_VERSION	0103	/* 2 digit major followed by 2 digit minor */
 
 struct iovec;
 struct vmctx;
 enum x2apic_state;
 
 /*
  * Different styles of mapping the memory assigned to a VM into the address
  * space of the controlling process.
  */
 enum vm_mmap_style {
 	VM_MMAP_NONE,		/* no mapping */
 	VM_MMAP_ALL,		/* fully and statically mapped */
 	VM_MMAP_SPARSE,		/* mappings created on-demand */
 };
 
 /*
  * 'flags' value passed to 'vm_set_memflags()'.
  */
 #define	VM_MEM_F_INCORE	0x01	/* include guest memory in core file */
 #define	VM_MEM_F_WIRED	0x02	/* guest memory is wired */
 
 /*
  * Identifiers for memory segments:
  * - vm_setup_memory() uses VM_SYSMEM for the system memory segment.
  * - the remaining identifiers can be used to create devmem segments.
  */
 enum {
 	VM_SYSMEM,
 	VM_BOOTROM,
 	VM_FRAMEBUFFER,
 };
 
 /*
  * Get the length and name of the memory segment identified by 'segid'.
  * Note that system memory segments are identified with a nul name.
  *
  * Returns 0 on success and non-zero otherwise.
  */
 int	vm_get_memseg(struct vmctx *ctx, int ident, size_t *lenp, char *name,
 	    size_t namesiz);
 
 /*
  * Iterate over the guest address space. This function finds an address range
  * that starts at an address >= *gpa.
  *
  * Returns 0 if the next address range was found and non-zero otherwise.
  */
 int	vm_mmap_getnext(struct vmctx *ctx, vm_paddr_t *gpa, int *segid,
 	    vm_ooffset_t *segoff, size_t *len, int *prot, int *flags);
 /*
  * Create a device memory segment identified by 'segid'.
  *
  * Returns a pointer to the memory segment on success and MAP_FAILED otherwise.
  */
 void	*vm_create_devmem(struct vmctx *ctx, int segid, const char *name,
 	    size_t len);
 
 /*
  * Map the memory segment identified by 'segid' into the guest address space
  * at [gpa,gpa+len) with protection 'prot'.
  */
 int	vm_mmap_memseg(struct vmctx *ctx, vm_paddr_t gpa, int segid,
 	    vm_ooffset_t segoff, size_t len, int prot);
 
 int	vm_create(const char *name);
 int	vm_get_device_fd(struct vmctx *ctx);
 struct vmctx *vm_open(const char *name);
 void	vm_destroy(struct vmctx *ctx);
 int	vm_parse_memsize(const char *optarg, size_t *memsize);
 int	vm_setup_memory(struct vmctx *ctx, size_t len, enum vm_mmap_style s);
 void	*vm_map_gpa(struct vmctx *ctx, vm_paddr_t gaddr, size_t len);
 int	vm_get_gpa_pmap(struct vmctx *, uint64_t gpa, uint64_t *pte, int *num);
 int	vm_gla2gpa(struct vmctx *, int vcpuid, struct vm_guest_paging *paging,
 		   uint64_t gla, int prot, uint64_t *gpa, int *fault);
 int	vm_gla2gpa_nofault(struct vmctx *, int vcpuid,
 		   struct vm_guest_paging *paging, uint64_t gla, int prot,
 		   uint64_t *gpa, int *fault);
 uint32_t vm_get_lowmem_limit(struct vmctx *ctx);
 void	vm_set_lowmem_limit(struct vmctx *ctx, uint32_t limit);
 void	vm_set_memflags(struct vmctx *ctx, int flags);
 int	vm_get_memflags(struct vmctx *ctx);
 size_t	vm_get_lowmem_size(struct vmctx *ctx);
 size_t	vm_get_highmem_size(struct vmctx *ctx);
 int	vm_set_desc(struct vmctx *ctx, int vcpu, int reg,
 		    uint64_t base, uint32_t limit, uint32_t access);
 int	vm_get_desc(struct vmctx *ctx, int vcpu, int reg,
 		    uint64_t *base, uint32_t *limit, uint32_t *access);
 int	vm_get_seg_desc(struct vmctx *ctx, int vcpu, int reg,
 			struct seg_desc *seg_desc);
 int	vm_set_register(struct vmctx *ctx, int vcpu, int reg, uint64_t val);
 int	vm_get_register(struct vmctx *ctx, int vcpu, int reg, uint64_t *retval);
 int	vm_set_register_set(struct vmctx *ctx, int vcpu, unsigned int count,
     const int *regnums, uint64_t *regvals);
 int	vm_get_register_set(struct vmctx *ctx, int vcpu, unsigned int count,
     const int *regnums, uint64_t *regvals);
 int	vm_run(struct vmctx *ctx, int vcpu, struct vm_exit *ret_vmexit);
 int	vm_suspend(struct vmctx *ctx, enum vm_suspend_how how);
 int	vm_reinit(struct vmctx *ctx);
 int	vm_apicid2vcpu(struct vmctx *ctx, int apicid);
 int	vm_inject_exception(struct vmctx *ctx, int vcpu, int vector,
     int errcode_valid, uint32_t errcode, int restart_instruction);
 int	vm_lapic_irq(struct vmctx *ctx, int vcpu, int vector);
 int	vm_lapic_local_irq(struct vmctx *ctx, int vcpu, int vector);
 int	vm_lapic_msi(struct vmctx *ctx, uint64_t addr, uint64_t msg);
 int	vm_ioapic_assert_irq(struct vmctx *ctx, int irq);
 int	vm_ioapic_deassert_irq(struct vmctx *ctx, int irq);
 int	vm_ioapic_pulse_irq(struct vmctx *ctx, int irq);
 int	vm_ioapic_pincount(struct vmctx *ctx, int *pincount);
 int	vm_isa_assert_irq(struct vmctx *ctx, int atpic_irq, int ioapic_irq);
 int	vm_isa_deassert_irq(struct vmctx *ctx, int atpic_irq, int ioapic_irq);
 int	vm_isa_pulse_irq(struct vmctx *ctx, int atpic_irq, int ioapic_irq);
 int	vm_isa_set_irq_trigger(struct vmctx *ctx, int atpic_irq,
 	    enum vm_intr_trigger trigger);
 int	vm_inject_nmi(struct vmctx *ctx, int vcpu);
 int	vm_capability_name2type(const char *capname);
 const char *vm_capability_type2name(int type);
 int	vm_get_capability(struct vmctx *ctx, int vcpu, enum vm_cap_type cap,
 			  int *retval);
 int	vm_set_capability(struct vmctx *ctx, int vcpu, enum vm_cap_type cap,
 			  int val);
 int	vm_assign_pptdev(struct vmctx *ctx, int bus, int slot, int func);
 int	vm_unassign_pptdev(struct vmctx *ctx, int bus, int slot, int func);
 int	vm_map_pptdev_mmio(struct vmctx *ctx, int bus, int slot, int func,
 			   vm_paddr_t gpa, size_t len, vm_paddr_t hpa);
 int	vm_setup_pptdev_msi(struct vmctx *ctx, int vcpu, int bus, int slot,
 	    int func, uint64_t addr, uint64_t msg, int numvec);
 int	vm_setup_pptdev_msix(struct vmctx *ctx, int vcpu, int bus, int slot,
 	    int func, int idx, uint64_t addr, uint64_t msg,
 	    uint32_t vector_control);
 
 int	vm_get_intinfo(struct vmctx *ctx, int vcpu, uint64_t *i1, uint64_t *i2);
 int	vm_set_intinfo(struct vmctx *ctx, int vcpu, uint64_t exit_intinfo);
 
 const cap_ioctl_t *vm_get_ioctls(size_t *len);
 
 /*
  * Return a pointer to the statistics buffer. Note that this is not MT-safe.
  */
 uint64_t *vm_get_stats(struct vmctx *ctx, int vcpu, struct timeval *ret_tv,
 		       int *ret_entries);
 const char *vm_get_stat_desc(struct vmctx *ctx, int index);
 
 int	vm_get_x2apic_state(struct vmctx *ctx, int vcpu, enum x2apic_state *s);
 int	vm_set_x2apic_state(struct vmctx *ctx, int vcpu, enum x2apic_state s);
 
 int	vm_get_hpet_capabilities(struct vmctx *ctx, uint32_t *capabilities);
 
 /*
  * Translate the GLA range [gla,gla+len) into GPA segments in 'iov'.
  * The 'iovcnt' should be big enough to accommodate all GPA segments.
  *
  * retval	fault		Interpretation
  *   0		  0		Success
  *   0		  1		An exception was injected into the guest
  * EFAULT	 N/A		Error
  */
 int	vm_copy_setup(struct vmctx *ctx, int vcpu, struct vm_guest_paging *pg,
 	    uint64_t gla, size_t len, int prot, struct iovec *iov, int iovcnt,
 	    int *fault);
 void	vm_copyin(struct vmctx *ctx, int vcpu, struct iovec *guest_iov,
 	    void *host_dst, size_t len);
 void	vm_copyout(struct vmctx *ctx, int vcpu, const void *host_src,
 	    struct iovec *guest_iov, size_t len);
 void	vm_copy_teardown(struct vmctx *ctx, int vcpu, struct iovec *iov,
 	    int iovcnt);
 
 /* RTC */
 int	vm_rtc_write(struct vmctx *ctx, int offset, uint8_t value);
 int	vm_rtc_read(struct vmctx *ctx, int offset, uint8_t *retval);
 int	vm_rtc_settime(struct vmctx *ctx, time_t secs);
 int	vm_rtc_gettime(struct vmctx *ctx, time_t *secs);
 
 /* Reset vcpu register state */
 int	vcpu_reset(struct vmctx *ctx, int vcpu);
 
 int	vm_active_cpus(struct vmctx *ctx, cpuset_t *cpus);
 int	vm_suspended_cpus(struct vmctx *ctx, cpuset_t *cpus);
 int	vm_debug_cpus(struct vmctx *ctx, cpuset_t *cpus);
 int	vm_activate_cpu(struct vmctx *ctx, int vcpu);
 int	vm_suspend_cpu(struct vmctx *ctx, int vcpu);
 int	vm_resume_cpu(struct vmctx *ctx, int vcpu);
 
+/* CPU topology */
+int	vm_set_topology(struct vmctx *ctx, uint16_t sockets, uint16_t cores,
+	    uint16_t threads, uint16_t maxcpus);
+int	vm_get_topology(struct vmctx *ctx, uint16_t *sockets, uint16_t *cores,
+	    uint16_t *threads, uint16_t *maxcpus);
+
 /*
  * FreeBSD specific APIs
  */
 int	vm_setup_freebsd_registers(struct vmctx *ctx, int vcpu,
 				uint64_t rip, uint64_t cr3, uint64_t gdtbase,
 				uint64_t rsp);
 int	vm_setup_freebsd_registers_i386(struct vmctx *vmctx, int vcpu,
 					uint32_t eip, uint32_t gdtbase,
 					uint32_t esp);
 void	vm_setup_freebsd_gdt(uint64_t *gdtr);
 #endif	/* _VMMAPI_H_ */
Index: head/sys/amd64/include/vmm.h
===================================================================
--- head/sys/amd64/include/vmm.h	(revision 332297)
+++ head/sys/amd64/include/vmm.h	(revision 332298)
@@ -1,690 +1,694 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2011 NetApp, Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef _VMM_H_
 #define	_VMM_H_
 
 #include <x86/segments.h>
 
 enum vm_suspend_how {
 	VM_SUSPEND_NONE,
 	VM_SUSPEND_RESET,
 	VM_SUSPEND_POWEROFF,
 	VM_SUSPEND_HALT,
 	VM_SUSPEND_TRIPLEFAULT,
 	VM_SUSPEND_LAST
 };
 
 /*
  * Identifiers for architecturally defined registers.
  */
 enum vm_reg_name {
 	VM_REG_GUEST_RAX,
 	VM_REG_GUEST_RBX,
 	VM_REG_GUEST_RCX,
 	VM_REG_GUEST_RDX,
 	VM_REG_GUEST_RSI,
 	VM_REG_GUEST_RDI,
 	VM_REG_GUEST_RBP,
 	VM_REG_GUEST_R8,
 	VM_REG_GUEST_R9,
 	VM_REG_GUEST_R10,
 	VM_REG_GUEST_R11,
 	VM_REG_GUEST_R12,
 	VM_REG_GUEST_R13,
 	VM_REG_GUEST_R14,
 	VM_REG_GUEST_R15,
 	VM_REG_GUEST_CR0,
 	VM_REG_GUEST_CR3,
 	VM_REG_GUEST_CR4,
 	VM_REG_GUEST_DR7,
 	VM_REG_GUEST_RSP,
 	VM_REG_GUEST_RIP,
 	VM_REG_GUEST_RFLAGS,
 	VM_REG_GUEST_ES,
 	VM_REG_GUEST_CS,
 	VM_REG_GUEST_SS,
 	VM_REG_GUEST_DS,
 	VM_REG_GUEST_FS,
 	VM_REG_GUEST_GS,
 	VM_REG_GUEST_LDTR,
 	VM_REG_GUEST_TR,
 	VM_REG_GUEST_IDTR,
 	VM_REG_GUEST_GDTR,
 	VM_REG_GUEST_EFER,
 	VM_REG_GUEST_CR2,
 	VM_REG_GUEST_PDPTE0,
 	VM_REG_GUEST_PDPTE1,
 	VM_REG_GUEST_PDPTE2,
 	VM_REG_GUEST_PDPTE3,
 	VM_REG_GUEST_INTR_SHADOW,
 	VM_REG_GUEST_DR0,
 	VM_REG_GUEST_DR1,
 	VM_REG_GUEST_DR2,
 	VM_REG_GUEST_DR3,
 	VM_REG_GUEST_DR6,
 	VM_REG_LAST
 };
 
 enum x2apic_state {
 	X2APIC_DISABLED,
 	X2APIC_ENABLED,
 	X2APIC_STATE_LAST
 };
 
 #define	VM_INTINFO_VECTOR(info)	((info) & 0xff)
 #define	VM_INTINFO_DEL_ERRCODE	0x800
 #define	VM_INTINFO_RSVD		0x7ffff000
 #define	VM_INTINFO_VALID	0x80000000
 #define	VM_INTINFO_TYPE		0x700
 #define	VM_INTINFO_HWINTR	(0 << 8)
 #define	VM_INTINFO_NMI		(2 << 8)
 #define	VM_INTINFO_HWEXCEPTION	(3 << 8)
 #define	VM_INTINFO_SWINTR	(4 << 8)
 
 #ifdef _KERNEL
 
 #define	VM_MAX_NAMELEN	32
 
 struct vm;
 struct vm_exception;
 struct seg_desc;
 struct vm_exit;
 struct vm_run;
 struct vhpet;
 struct vioapic;
 struct vlapic;
 struct vmspace;
 struct vm_object;
 struct vm_guest_paging;
 struct pmap;
 
 struct vm_eventinfo {
 	void	*rptr;		/* rendezvous cookie */
 	int	*sptr;		/* suspend cookie */
 	int	*iptr;		/* reqidle cookie */
 };
 
 typedef int	(*vmm_init_func_t)(int ipinum);
 typedef int	(*vmm_cleanup_func_t)(void);
 typedef void	(*vmm_resume_func_t)(void);
 typedef void *	(*vmi_init_func_t)(struct vm *vm, struct pmap *pmap);
 typedef int	(*vmi_run_func_t)(void *vmi, int vcpu, register_t rip,
 		    struct pmap *pmap, struct vm_eventinfo *info);
 typedef void	(*vmi_cleanup_func_t)(void *vmi);
 typedef int	(*vmi_get_register_t)(void *vmi, int vcpu, int num,
 				      uint64_t *retval);
 typedef int	(*vmi_set_register_t)(void *vmi, int vcpu, int num,
 				      uint64_t val);
 typedef int	(*vmi_get_desc_t)(void *vmi, int vcpu, int num,
 				  struct seg_desc *desc);
 typedef int	(*vmi_set_desc_t)(void *vmi, int vcpu, int num,
 				  struct seg_desc *desc);
 typedef int	(*vmi_get_cap_t)(void *vmi, int vcpu, int num, int *retval);
 typedef int	(*vmi_set_cap_t)(void *vmi, int vcpu, int num, int val);
 typedef struct vmspace * (*vmi_vmspace_alloc)(vm_offset_t min, vm_offset_t max);
 typedef void	(*vmi_vmspace_free)(struct vmspace *vmspace);
 typedef struct vlapic * (*vmi_vlapic_init)(void *vmi, int vcpu);
 typedef void	(*vmi_vlapic_cleanup)(void *vmi, struct vlapic *vlapic);
 
 struct vmm_ops {
 	vmm_init_func_t		init;		/* module wide initialization */
 	vmm_cleanup_func_t	cleanup;
 	vmm_resume_func_t	resume;
 
 	vmi_init_func_t		vminit;		/* vm-specific initialization */
 	vmi_run_func_t		vmrun;
 	vmi_cleanup_func_t	vmcleanup;
 	vmi_get_register_t	vmgetreg;
 	vmi_set_register_t	vmsetreg;
 	vmi_get_desc_t		vmgetdesc;
 	vmi_set_desc_t		vmsetdesc;
 	vmi_get_cap_t		vmgetcap;
 	vmi_set_cap_t		vmsetcap;
 	vmi_vmspace_alloc	vmspace_alloc;
 	vmi_vmspace_free	vmspace_free;
 	vmi_vlapic_init		vlapic_init;
 	vmi_vlapic_cleanup	vlapic_cleanup;
 };
 
 extern struct vmm_ops vmm_ops_intel;
 extern struct vmm_ops vmm_ops_amd;
 
 int vm_create(const char *name, struct vm **retvm);
 void vm_destroy(struct vm *vm);
 int vm_reinit(struct vm *vm);
 const char *vm_name(struct vm *vm);
+void vm_get_topology(struct vm *vm, uint16_t *sockets, uint16_t *cores,
+    uint16_t *threads, uint16_t *maxcpus);
+int vm_set_topology(struct vm *vm, uint16_t sockets, uint16_t cores,
+    uint16_t threads, uint16_t maxcpus);
 
 /*
  * APIs that modify the guest memory map require all vcpus to be frozen.
  */
 int vm_mmap_memseg(struct vm *vm, vm_paddr_t gpa, int segid, vm_ooffset_t off,
     size_t len, int prot, int flags);
 int vm_alloc_memseg(struct vm *vm, int ident, size_t len, bool sysmem);
 void vm_free_memseg(struct vm *vm, int ident);
 int vm_map_mmio(struct vm *vm, vm_paddr_t gpa, size_t len, vm_paddr_t hpa);
 int vm_unmap_mmio(struct vm *vm, vm_paddr_t gpa, size_t len);
 int vm_assign_pptdev(struct vm *vm, int bus, int slot, int func);
 int vm_unassign_pptdev(struct vm *vm, int bus, int slot, int func);
 
 /*
  * APIs that inspect the guest memory map require only a *single* vcpu to
  * be frozen. This acts like a read lock on the guest memory map since any
  * modification requires *all* vcpus to be frozen.
  */
 int vm_mmap_getnext(struct vm *vm, vm_paddr_t *gpa, int *segid,
     vm_ooffset_t *segoff, size_t *len, int *prot, int *flags);
 int vm_get_memseg(struct vm *vm, int ident, size_t *len, bool *sysmem,
     struct vm_object **objptr);
 void *vm_gpa_hold(struct vm *, int vcpuid, vm_paddr_t gpa, size_t len,
     int prot, void **cookie);
 void vm_gpa_release(void *cookie);
 bool vm_mem_allocated(struct vm *vm, int vcpuid, vm_paddr_t gpa);
 
 int vm_get_register(struct vm *vm, int vcpu, int reg, uint64_t *retval);
 int vm_set_register(struct vm *vm, int vcpu, int reg, uint64_t val);
 int vm_get_seg_desc(struct vm *vm, int vcpu, int reg,
 		    struct seg_desc *ret_desc);
 int vm_set_seg_desc(struct vm *vm, int vcpu, int reg,
 		    struct seg_desc *desc);
 int vm_run(struct vm *vm, struct vm_run *vmrun);
 int vm_suspend(struct vm *vm, enum vm_suspend_how how);
 int vm_inject_nmi(struct vm *vm, int vcpu);
 int vm_nmi_pending(struct vm *vm, int vcpuid);
 void vm_nmi_clear(struct vm *vm, int vcpuid);
 int vm_inject_extint(struct vm *vm, int vcpu);
 int vm_extint_pending(struct vm *vm, int vcpuid);
 void vm_extint_clear(struct vm *vm, int vcpuid);
 struct vlapic *vm_lapic(struct vm *vm, int cpu);
 struct vioapic *vm_ioapic(struct vm *vm);
 struct vhpet *vm_hpet(struct vm *vm);
 int vm_get_capability(struct vm *vm, int vcpu, int type, int *val);
 int vm_set_capability(struct vm *vm, int vcpu, int type, int val);
 int vm_get_x2apic_state(struct vm *vm, int vcpu, enum x2apic_state *state);
 int vm_set_x2apic_state(struct vm *vm, int vcpu, enum x2apic_state state);
 int vm_apicid2vcpuid(struct vm *vm, int apicid);
 int vm_activate_cpu(struct vm *vm, int vcpu);
 int vm_suspend_cpu(struct vm *vm, int vcpu);
 int vm_resume_cpu(struct vm *vm, int vcpu);
 struct vm_exit *vm_exitinfo(struct vm *vm, int vcpuid);
 void vm_exit_suspended(struct vm *vm, int vcpuid, uint64_t rip);
 void vm_exit_debug(struct vm *vm, int vcpuid, uint64_t rip);
 void vm_exit_rendezvous(struct vm *vm, int vcpuid, uint64_t rip);
 void vm_exit_astpending(struct vm *vm, int vcpuid, uint64_t rip);
 void vm_exit_reqidle(struct vm *vm, int vcpuid, uint64_t rip);
 
 #ifdef _SYS__CPUSET_H_
 /*
  * Rendezvous all vcpus specified in 'dest' and execute 'func(arg)'.
  * The rendezvous 'func(arg)' is not allowed to do anything that will
  * cause the thread to be put to sleep.
  *
  * If the rendezvous is being initiated from a vcpu context then the
  * 'vcpuid' must refer to that vcpu, otherwise it should be set to -1.
  *
  * The caller cannot hold any locks when initiating the rendezvous.
  *
  * The implementation of this API may cause vcpus other than those specified
  * by 'dest' to be stalled. The caller should not rely on any vcpus making
  * forward progress when the rendezvous is in progress.
  */
 typedef void (*vm_rendezvous_func_t)(struct vm *vm, int vcpuid, void *arg);
 void vm_smp_rendezvous(struct vm *vm, int vcpuid, cpuset_t dest,
     vm_rendezvous_func_t func, void *arg);
 cpuset_t vm_active_cpus(struct vm *vm);
 cpuset_t vm_debug_cpus(struct vm *vm);
 cpuset_t vm_suspended_cpus(struct vm *vm);
 #endif	/* _SYS__CPUSET_H_ */
 
 static __inline int
 vcpu_rendezvous_pending(struct vm_eventinfo *info)
 {
 
 	return (*((uintptr_t *)(info->rptr)) != 0);
 }
 
 static __inline int
 vcpu_suspended(struct vm_eventinfo *info)
 {
 
 	return (*info->sptr);
 }
 
 static __inline int
 vcpu_reqidle(struct vm_eventinfo *info)
 {
 
 	return (*info->iptr);
 }
 
 int vcpu_debugged(struct vm *vm, int vcpuid);
 
 /*
  * Return 1 if device indicated by bus/slot/func is supposed to be a
  * pci passthrough device.
  *
  * Return 0 otherwise.
  */
 int vmm_is_pptdev(int bus, int slot, int func);
 
 void *vm_iommu_domain(struct vm *vm);
 
 enum vcpu_state {
 	VCPU_IDLE,
 	VCPU_FROZEN,
 	VCPU_RUNNING,
 	VCPU_SLEEPING,
 };
 
 int vcpu_set_state(struct vm *vm, int vcpu, enum vcpu_state state,
     bool from_idle);
 enum vcpu_state vcpu_get_state(struct vm *vm, int vcpu, int *hostcpu);
 
 static int __inline
 vcpu_is_running(struct vm *vm, int vcpu, int *hostcpu)
 {
 	return (vcpu_get_state(vm, vcpu, hostcpu) == VCPU_RUNNING);
 }
 
 #ifdef _SYS_PROC_H_
 static int __inline
 vcpu_should_yield(struct vm *vm, int vcpu)
 {
 
 	if (curthread->td_flags & (TDF_ASTPENDING | TDF_NEEDRESCHED))
 		return (1);
 	else if (curthread->td_owepreempt)
 		return (1);
 	else
 		return (0);
 }
 #endif
 
 void *vcpu_stats(struct vm *vm, int vcpu);
 void vcpu_notify_event(struct vm *vm, int vcpuid, bool lapic_intr);
 struct vmspace *vm_get_vmspace(struct vm *vm);
 struct vatpic *vm_atpic(struct vm *vm);
 struct vatpit *vm_atpit(struct vm *vm);
 struct vpmtmr *vm_pmtmr(struct vm *vm);
 struct vrtc *vm_rtc(struct vm *vm);
 
 /*
  * Inject exception 'vector' into the guest vcpu. This function returns 0 on
  * success and non-zero on failure.
  *
  * Wrapper functions like 'vm_inject_gp()' should be preferred to calling
  * this function directly because they enforce the trap-like or fault-like
  * behavior of an exception.
  *
  * This function should only be called in the context of the thread that is
  * executing this vcpu.
  */
 int vm_inject_exception(struct vm *vm, int vcpuid, int vector, int err_valid,
     uint32_t errcode, int restart_instruction);
 
 /*
  * This function is called after a VM-exit that occurred during exception or
  * interrupt delivery through the IDT. The format of 'intinfo' is described
  * in Figure 15-1, "EXITINTINFO for All Intercepts", APM, Vol 2.
  *
  * If a VM-exit handler completes the event delivery successfully then it
  * should call vm_exit_intinfo() to extinguish the pending event. For e.g.,
  * if the task switch emulation is triggered via a task gate then it should
  * call this function with 'intinfo=0' to indicate that the external event
  * is not pending anymore.
  *
  * Return value is 0 on success and non-zero on failure.
  */
 int vm_exit_intinfo(struct vm *vm, int vcpuid, uint64_t intinfo);
 
 /*
  * This function is called before every VM-entry to retrieve a pending
  * event that should be injected into the guest. This function combines
  * nested events into a double or triple fault.
  *
  * Returns 0 if there are no events that need to be injected into the guest
  * and non-zero otherwise.
  */
 int vm_entry_intinfo(struct vm *vm, int vcpuid, uint64_t *info);
 
 int vm_get_intinfo(struct vm *vm, int vcpuid, uint64_t *info1, uint64_t *info2);
 
 enum vm_reg_name vm_segment_name(int seg_encoding);
 
 struct vm_copyinfo {
 	uint64_t	gpa;
 	size_t		len;
 	void		*hva;
 	void		*cookie;
 };
 
 /*
  * Set up 'copyinfo[]' to copy to/from guest linear address space starting
  * at 'gla' and 'len' bytes long. The 'prot' should be set to PROT_READ for
  * a copyin or PROT_WRITE for a copyout. 
  *
  * retval	is_fault	Interpretation
  *   0		   0		Success
  *   0		   1		An exception was injected into the guest
  * EFAULT	  N/A		Unrecoverable error
  *
  * The 'copyinfo[]' can be passed to 'vm_copyin()' or 'vm_copyout()' only if
  * the return value is 0. The 'copyinfo[]' resources should be freed by calling
  * 'vm_copy_teardown()' after the copy is done.
  */
 int vm_copy_setup(struct vm *vm, int vcpuid, struct vm_guest_paging *paging,
     uint64_t gla, size_t len, int prot, struct vm_copyinfo *copyinfo,
     int num_copyinfo, int *is_fault);
 void vm_copy_teardown(struct vm *vm, int vcpuid, struct vm_copyinfo *copyinfo,
     int num_copyinfo);
 void vm_copyin(struct vm *vm, int vcpuid, struct vm_copyinfo *copyinfo,
     void *kaddr, size_t len);
 void vm_copyout(struct vm *vm, int vcpuid, const void *kaddr,
     struct vm_copyinfo *copyinfo, size_t len);
 
 int vcpu_trace_exceptions(struct vm *vm, int vcpuid);
 #endif	/* KERNEL */
 
 #define	VM_MAXCPU	16			/* maximum virtual cpus */
 
 /*
  * Identifiers for optional vmm capabilities
  */
 enum vm_cap_type {
 	VM_CAP_HALT_EXIT,
 	VM_CAP_MTRAP_EXIT,
 	VM_CAP_PAUSE_EXIT,
 	VM_CAP_UNRESTRICTED_GUEST,
 	VM_CAP_ENABLE_INVPCID,
 	VM_CAP_MAX
 };
 
 enum vm_intr_trigger {
 	EDGE_TRIGGER,
 	LEVEL_TRIGGER
 };
 	
 /*
  * The 'access' field has the format specified in Table 21-2 of the Intel
  * Architecture Manual vol 3b.
  *
  * XXX The contents of the 'access' field are architecturally defined except
  * bit 16 - Segment Unusable.
  */
 struct seg_desc {
 	uint64_t	base;
 	uint32_t	limit;
 	uint32_t	access;
 };
 #define	SEG_DESC_TYPE(access)		((access) & 0x001f)
 #define	SEG_DESC_DPL(access)		(((access) >> 5) & 0x3)
 #define	SEG_DESC_PRESENT(access)	(((access) & 0x0080) ? 1 : 0)
 #define	SEG_DESC_DEF32(access)		(((access) & 0x4000) ? 1 : 0)
 #define	SEG_DESC_GRANULARITY(access)	(((access) & 0x8000) ? 1 : 0)
 #define	SEG_DESC_UNUSABLE(access)	(((access) & 0x10000) ? 1 : 0)
 
 enum vm_cpu_mode {
 	CPU_MODE_REAL,
 	CPU_MODE_PROTECTED,
 	CPU_MODE_COMPATIBILITY,		/* IA-32E mode (CS.L = 0) */
 	CPU_MODE_64BIT,			/* IA-32E mode (CS.L = 1) */
 };
 
 enum vm_paging_mode {
 	PAGING_MODE_FLAT,
 	PAGING_MODE_32,
 	PAGING_MODE_PAE,
 	PAGING_MODE_64,
 };
 
 struct vm_guest_paging {
 	uint64_t	cr3;
 	int		cpl;
 	enum vm_cpu_mode cpu_mode;
 	enum vm_paging_mode paging_mode;
 };
 
 /*
  * The data structures 'vie' and 'vie_op' are meant to be opaque to the
  * consumers of instruction decoding. The only reason why their contents
  * need to be exposed is because they are part of the 'vm_exit' structure.
  */
 struct vie_op {
 	uint8_t		op_byte;	/* actual opcode byte */
 	uint8_t		op_type;	/* type of operation (e.g. MOV) */
 	uint16_t	op_flags;
 };
 
 #define	VIE_INST_SIZE	15
 struct vie {
 	uint8_t		inst[VIE_INST_SIZE];	/* instruction bytes */
 	uint8_t		num_valid;		/* size of the instruction */
 	uint8_t		num_processed;
 
 	uint8_t		addrsize:4, opsize:4;	/* address and operand sizes */
 	uint8_t		rex_w:1,		/* REX prefix */
 			rex_r:1,
 			rex_x:1,
 			rex_b:1,
 			rex_present:1,
 			repz_present:1,		/* REP/REPE/REPZ prefix */
 			repnz_present:1,	/* REPNE/REPNZ prefix */
 			opsize_override:1,	/* Operand size override */
 			addrsize_override:1,	/* Address size override */
 			segment_override:1;	/* Segment override */
 
 	uint8_t		mod:2,			/* ModRM byte */
 			reg:4,
 			rm:4;
 
 	uint8_t		ss:2,			/* SIB byte */
 			index:4,
 			base:4;
 
 	uint8_t		disp_bytes;
 	uint8_t		imm_bytes;
 
 	uint8_t		scale;
 	int		base_register;		/* VM_REG_GUEST_xyz */
 	int		index_register;		/* VM_REG_GUEST_xyz */
 	int		segment_register;	/* VM_REG_GUEST_xyz */
 
 	int64_t		displacement;		/* optional addr displacement */
 	int64_t		immediate;		/* optional immediate operand */
 
 	uint8_t		decoded;	/* set to 1 if successfully decoded */
 
 	struct vie_op	op;			/* opcode description */
 };
 
 enum vm_exitcode {
 	VM_EXITCODE_INOUT,
 	VM_EXITCODE_VMX,
 	VM_EXITCODE_BOGUS,
 	VM_EXITCODE_RDMSR,
 	VM_EXITCODE_WRMSR,
 	VM_EXITCODE_HLT,
 	VM_EXITCODE_MTRAP,
 	VM_EXITCODE_PAUSE,
 	VM_EXITCODE_PAGING,
 	VM_EXITCODE_INST_EMUL,
 	VM_EXITCODE_SPINUP_AP,
 	VM_EXITCODE_DEPRECATED1,	/* used to be SPINDOWN_CPU */
 	VM_EXITCODE_RENDEZVOUS,
 	VM_EXITCODE_IOAPIC_EOI,
 	VM_EXITCODE_SUSPENDED,
 	VM_EXITCODE_INOUT_STR,
 	VM_EXITCODE_TASK_SWITCH,
 	VM_EXITCODE_MONITOR,
 	VM_EXITCODE_MWAIT,
 	VM_EXITCODE_SVM,
 	VM_EXITCODE_REQIDLE,
 	VM_EXITCODE_DEBUG,
 	VM_EXITCODE_MAX
 };
 
 struct vm_inout {
 	uint16_t	bytes:3;	/* 1 or 2 or 4 */
 	uint16_t	in:1;
 	uint16_t	string:1;
 	uint16_t	rep:1;
 	uint16_t	port;
 	uint32_t	eax;		/* valid for out */
 };
 
 struct vm_inout_str {
 	struct vm_inout	inout;		/* must be the first element */
 	struct vm_guest_paging paging;
 	uint64_t	rflags;
 	uint64_t	cr0;
 	uint64_t	index;
 	uint64_t	count;		/* rep=1 (%rcx), rep=0 (1) */
 	int		addrsize;
 	enum vm_reg_name seg_name;
 	struct seg_desc seg_desc;
 };
 
 enum task_switch_reason {
 	TSR_CALL,
 	TSR_IRET,
 	TSR_JMP,
 	TSR_IDT_GATE,	/* task gate in IDT */
 };
 
 struct vm_task_switch {
 	uint16_t	tsssel;		/* new TSS selector */
 	int		ext;		/* task switch due to external event */
 	uint32_t	errcode;
 	int		errcode_valid;	/* push 'errcode' on the new stack */
 	enum task_switch_reason reason;
 	struct vm_guest_paging paging;
 };
 
 struct vm_exit {
 	enum vm_exitcode	exitcode;
 	int			inst_length;	/* 0 means unknown */
 	uint64_t		rip;
 	union {
 		struct vm_inout	inout;
 		struct vm_inout_str inout_str;
 		struct {
 			uint64_t	gpa;
 			int		fault_type;
 		} paging;
 		struct {
 			uint64_t	gpa;
 			uint64_t	gla;
 			uint64_t	cs_base;
 			int		cs_d;		/* CS.D */
 			struct vm_guest_paging paging;
 			struct vie	vie;
 		} inst_emul;
 		/*
 		 * VMX specific payload. Used when there is no "better"
 		 * exitcode to represent the VM-exit.
 		 */
 		struct {
 			int		status;		/* vmx inst status */
 			/*
 			 * 'exit_reason' and 'exit_qualification' are valid
 			 * only if 'status' is zero.
 			 */
 			uint32_t	exit_reason;
 			uint64_t	exit_qualification;
 			/*
 			 * 'inst_error' and 'inst_type' are valid
 			 * only if 'status' is non-zero.
 			 */
 			int		inst_type;
 			int		inst_error;
 		} vmx;
 		/*
 		 * SVM specific payload.
 		 */
 		struct {
 			uint64_t	exitcode;
 			uint64_t	exitinfo1;
 			uint64_t	exitinfo2;
 		} svm;
 		struct {
 			uint32_t	code;		/* ecx value */
 			uint64_t	wval;
 		} msr;
 		struct {
 			int		vcpu;
 			uint64_t	rip;
 		} spinup_ap;
 		struct {
 			uint64_t	rflags;
 			uint64_t	intr_status;
 		} hlt;
 		struct {
 			int		vector;
 		} ioapic_eoi;
 		struct {
 			enum vm_suspend_how how;
 		} suspended;
 		struct vm_task_switch task_switch;
 	} u;
 };
 
 /* APIs to inject faults into the guest */
 void vm_inject_fault(void *vm, int vcpuid, int vector, int errcode_valid,
     int errcode);
 
 static __inline void
 vm_inject_ud(void *vm, int vcpuid)
 {
 	vm_inject_fault(vm, vcpuid, IDT_UD, 0, 0);
 }
 
 static __inline void
 vm_inject_gp(void *vm, int vcpuid)
 {
 	vm_inject_fault(vm, vcpuid, IDT_GP, 1, 0);
 }
 
 static __inline void
 vm_inject_ac(void *vm, int vcpuid, int errcode)
 {
 	vm_inject_fault(vm, vcpuid, IDT_AC, 1, errcode);
 }
 
 static __inline void
 vm_inject_ss(void *vm, int vcpuid, int errcode)
 {
 	vm_inject_fault(vm, vcpuid, IDT_SS, 1, errcode);
 }
 
 void vm_inject_pf(void *vm, int vcpuid, int error_code, uint64_t cr2);
 
 int vm_restart_instruction(void *vm, int vcpuid);
 
 #endif	/* _VMM_H_ */
Index: head/sys/amd64/include/vmm_dev.h
===================================================================
--- head/sys/amd64/include/vmm_dev.h	(revision 332297)
+++ head/sys/amd64/include/vmm_dev.h	(revision 332298)
@@ -1,410 +1,425 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2011 NetApp, Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef	_VMM_DEV_H_
 #define	_VMM_DEV_H_
 
 #ifdef _KERNEL
 void	vmmdev_init(void);
 int	vmmdev_cleanup(void);
 #endif
 
 struct vm_memmap {
 	vm_paddr_t	gpa;
 	int		segid;		/* memory segment */
 	vm_ooffset_t	segoff;		/* offset into memory segment */
 	size_t		len;		/* mmap length */
 	int		prot;		/* RWX */
 	int		flags;
 };
 #define	VM_MEMMAP_F_WIRED	0x01
 #define	VM_MEMMAP_F_IOMMU	0x02
 
 #define	VM_MEMSEG_NAME(m)	((m)->name[0] != '\0' ? (m)->name : NULL)
 struct vm_memseg {
 	int		segid;
 	size_t		len;
 	char		name[SPECNAMELEN + 1];
 };
 
 struct vm_register {
 	int		cpuid;
 	int		regnum;		/* enum vm_reg_name */
 	uint64_t	regval;
 };
 
 struct vm_seg_desc {			/* data or code segment */
 	int		cpuid;
 	int		regnum;		/* enum vm_reg_name */
 	struct seg_desc desc;
 };
 
 struct vm_register_set {
 	int		cpuid;
 	unsigned int	count;
 	const int	*regnums;	/* enum vm_reg_name */
 	uint64_t	*regvals;
 };
 
 struct vm_run {
 	int		cpuid;
 	struct vm_exit	vm_exit;
 };
 
 struct vm_exception {
 	int		cpuid;
 	int		vector;
 	uint32_t	error_code;
 	int		error_code_valid;
 	int		restart_instruction;
 };
 
 struct vm_lapic_msi {
 	uint64_t	msg;
 	uint64_t	addr;
 };
 
 struct vm_lapic_irq {
 	int		cpuid;
 	int		vector;
 };
 
 struct vm_ioapic_irq {
 	int		irq;
 };
 
 struct vm_isa_irq {
 	int		atpic_irq;
 	int		ioapic_irq;
 };
 
 struct vm_isa_irq_trigger {
 	int		atpic_irq;
 	enum vm_intr_trigger trigger;
 };
 
 struct vm_capability {
 	int		cpuid;
 	enum vm_cap_type captype;
 	int		capval;
 	int		allcpus;
 };
 
 struct vm_pptdev {
 	int		bus;
 	int		slot;
 	int		func;
 };
 
 struct vm_pptdev_mmio {
 	int		bus;
 	int		slot;
 	int		func;
 	vm_paddr_t	gpa;
 	vm_paddr_t	hpa;
 	size_t		len;
 };
 
 struct vm_pptdev_msi {
 	int		vcpu;
 	int		bus;
 	int		slot;
 	int		func;
 	int		numvec;		/* 0 means disabled */
 	uint64_t	msg;
 	uint64_t	addr;
 };
 
 struct vm_pptdev_msix {
 	int		vcpu;
 	int		bus;
 	int		slot;
 	int		func;
 	int		idx;
 	uint64_t	msg;
 	uint32_t	vector_control;
 	uint64_t	addr;
 };
 
 struct vm_nmi {
 	int		cpuid;
 };
 
 #define	MAX_VM_STATS	64
 struct vm_stats {
 	int		cpuid;				/* in */
 	int		num_entries;			/* out */
 	struct timeval	tv;
 	uint64_t	statbuf[MAX_VM_STATS];
 };
 
 struct vm_stat_desc {
 	int		index;				/* in */
 	char		desc[128];			/* out */
 };
 
 struct vm_x2apic {
 	int			cpuid;
 	enum x2apic_state	state;
 };
 
 struct vm_gpa_pte {
 	uint64_t	gpa;				/* in */
 	uint64_t	pte[4];				/* out */
 	int		ptenum;
 };
 
 struct vm_hpet_cap {
 	uint32_t	capabilities;	/* lower 32 bits of HPET capabilities */
 };
 
 struct vm_suspend {
 	enum vm_suspend_how how;
 };
 
 struct vm_gla2gpa {
 	int		vcpuid;		/* inputs */
 	int 		prot;		/* PROT_READ or PROT_WRITE */
 	uint64_t	gla;
 	struct vm_guest_paging paging;
 	int		fault;		/* outputs */
 	uint64_t	gpa;
 };
 
 struct vm_activate_cpu {
 	int		vcpuid;
 };
 
 struct vm_cpuset {
 	int		which;
 	int		cpusetsize;
 	cpuset_t	*cpus;
 };
 #define	VM_ACTIVE_CPUS		0
 #define	VM_SUSPENDED_CPUS	1
 #define	VM_DEBUG_CPUS		2
 
 struct vm_intinfo {
 	int		vcpuid;
 	uint64_t	info1;
 	uint64_t	info2;
 };
 
 struct vm_rtc_time {
 	time_t		secs;
 };
 
 struct vm_rtc_data {
 	int		offset;
 	uint8_t		value;
 };
 
+struct vm_cpu_topology {
+	uint16_t	sockets;
+	uint16_t	cores;
+	uint16_t	threads;
+	uint16_t	maxcpus;
+};
+
 enum {
 	/* general routines */
 	IOCNUM_ABIVERS = 0,
 	IOCNUM_RUN = 1,
 	IOCNUM_SET_CAPABILITY = 2,
 	IOCNUM_GET_CAPABILITY = 3,
 	IOCNUM_SUSPEND = 4,
 	IOCNUM_REINIT = 5,
 
 	/* memory apis */
 	IOCNUM_MAP_MEMORY = 10,			/* deprecated */
 	IOCNUM_GET_MEMORY_SEG = 11,		/* deprecated */
 	IOCNUM_GET_GPA_PMAP = 12,
 	IOCNUM_GLA2GPA = 13,
 	IOCNUM_ALLOC_MEMSEG = 14,
 	IOCNUM_GET_MEMSEG = 15,
 	IOCNUM_MMAP_MEMSEG = 16,
 	IOCNUM_MMAP_GETNEXT = 17,
 	IOCNUM_GLA2GPA_NOFAULT = 18,
 
 	/* register/state accessors */
 	IOCNUM_SET_REGISTER = 20,
 	IOCNUM_GET_REGISTER = 21,
 	IOCNUM_SET_SEGMENT_DESCRIPTOR = 22,
 	IOCNUM_GET_SEGMENT_DESCRIPTOR = 23,
 	IOCNUM_SET_REGISTER_SET = 24,
 	IOCNUM_GET_REGISTER_SET = 25,
 
 	/* interrupt injection */
 	IOCNUM_GET_INTINFO = 28,
 	IOCNUM_SET_INTINFO = 29,
 	IOCNUM_INJECT_EXCEPTION = 30,
 	IOCNUM_LAPIC_IRQ = 31,
 	IOCNUM_INJECT_NMI = 32,
 	IOCNUM_IOAPIC_ASSERT_IRQ = 33,
 	IOCNUM_IOAPIC_DEASSERT_IRQ = 34,
 	IOCNUM_IOAPIC_PULSE_IRQ = 35,
 	IOCNUM_LAPIC_MSI = 36,
 	IOCNUM_LAPIC_LOCAL_IRQ = 37,
 	IOCNUM_IOAPIC_PINCOUNT = 38,
 	IOCNUM_RESTART_INSTRUCTION = 39,
 
 	/* PCI pass-thru */
 	IOCNUM_BIND_PPTDEV = 40,
 	IOCNUM_UNBIND_PPTDEV = 41,
 	IOCNUM_MAP_PPTDEV_MMIO = 42,
 	IOCNUM_PPTDEV_MSI = 43,
 	IOCNUM_PPTDEV_MSIX = 44,
 
 	/* statistics */
 	IOCNUM_VM_STATS = 50, 
 	IOCNUM_VM_STAT_DESC = 51,
 
 	/* kernel device state */
 	IOCNUM_SET_X2APIC_STATE = 60,
 	IOCNUM_GET_X2APIC_STATE = 61,
 	IOCNUM_GET_HPET_CAPABILITIES = 62,
 
+	/* CPU Topology */
+	IOCNUM_SET_TOPOLOGY = 63,
+	IOCNUM_GET_TOPOLOGY = 64,
+
 	/* legacy interrupt injection */
 	IOCNUM_ISA_ASSERT_IRQ = 80,
 	IOCNUM_ISA_DEASSERT_IRQ = 81,
 	IOCNUM_ISA_PULSE_IRQ = 82,
 	IOCNUM_ISA_SET_IRQ_TRIGGER = 83,
 
 	/* vm_cpuset */
 	IOCNUM_ACTIVATE_CPU = 90,
 	IOCNUM_GET_CPUSET = 91,
 	IOCNUM_SUSPEND_CPU = 92,
 	IOCNUM_RESUME_CPU = 93,
 
 	/* RTC */
 	IOCNUM_RTC_READ = 100,
 	IOCNUM_RTC_WRITE = 101,
 	IOCNUM_RTC_SETTIME = 102,
 	IOCNUM_RTC_GETTIME = 103,
 };
 
 #define	VM_RUN		\
 	_IOWR('v', IOCNUM_RUN, struct vm_run)
 #define	VM_SUSPEND	\
 	_IOW('v', IOCNUM_SUSPEND, struct vm_suspend)
 #define	VM_REINIT	\
 	_IO('v', IOCNUM_REINIT)
 #define	VM_ALLOC_MEMSEG	\
 	_IOW('v', IOCNUM_ALLOC_MEMSEG, struct vm_memseg)
 #define	VM_GET_MEMSEG	\
 	_IOWR('v', IOCNUM_GET_MEMSEG, struct vm_memseg)
 #define	VM_MMAP_MEMSEG	\
 	_IOW('v', IOCNUM_MMAP_MEMSEG, struct vm_memmap)
 #define	VM_MMAP_GETNEXT	\
 	_IOWR('v', IOCNUM_MMAP_GETNEXT, struct vm_memmap)
 #define	VM_SET_REGISTER \
 	_IOW('v', IOCNUM_SET_REGISTER, struct vm_register)
 #define	VM_GET_REGISTER \
 	_IOWR('v', IOCNUM_GET_REGISTER, struct vm_register)
 #define	VM_SET_SEGMENT_DESCRIPTOR \
 	_IOW('v', IOCNUM_SET_SEGMENT_DESCRIPTOR, struct vm_seg_desc)
 #define	VM_GET_SEGMENT_DESCRIPTOR \
 	_IOWR('v', IOCNUM_GET_SEGMENT_DESCRIPTOR, struct vm_seg_desc)
 #define	VM_SET_REGISTER_SET \
 	_IOW('v', IOCNUM_SET_REGISTER_SET, struct vm_register_set)
 #define	VM_GET_REGISTER_SET \
 	_IOWR('v', IOCNUM_GET_REGISTER_SET, struct vm_register_set)
 #define	VM_INJECT_EXCEPTION	\
 	_IOW('v', IOCNUM_INJECT_EXCEPTION, struct vm_exception)
 #define	VM_LAPIC_IRQ 		\
 	_IOW('v', IOCNUM_LAPIC_IRQ, struct vm_lapic_irq)
 #define	VM_LAPIC_LOCAL_IRQ 	\
 	_IOW('v', IOCNUM_LAPIC_LOCAL_IRQ, struct vm_lapic_irq)
 #define	VM_LAPIC_MSI		\
 	_IOW('v', IOCNUM_LAPIC_MSI, struct vm_lapic_msi)
 #define	VM_IOAPIC_ASSERT_IRQ	\
 	_IOW('v', IOCNUM_IOAPIC_ASSERT_IRQ, struct vm_ioapic_irq)
 #define	VM_IOAPIC_DEASSERT_IRQ	\
 	_IOW('v', IOCNUM_IOAPIC_DEASSERT_IRQ, struct vm_ioapic_irq)
 #define	VM_IOAPIC_PULSE_IRQ	\
 	_IOW('v', IOCNUM_IOAPIC_PULSE_IRQ, struct vm_ioapic_irq)
 #define	VM_IOAPIC_PINCOUNT	\
 	_IOR('v', IOCNUM_IOAPIC_PINCOUNT, int)
 #define	VM_ISA_ASSERT_IRQ	\
 	_IOW('v', IOCNUM_ISA_ASSERT_IRQ, struct vm_isa_irq)
 #define	VM_ISA_DEASSERT_IRQ	\
 	_IOW('v', IOCNUM_ISA_DEASSERT_IRQ, struct vm_isa_irq)
 #define	VM_ISA_PULSE_IRQ	\
 	_IOW('v', IOCNUM_ISA_PULSE_IRQ, struct vm_isa_irq)
 #define	VM_ISA_SET_IRQ_TRIGGER	\
 	_IOW('v', IOCNUM_ISA_SET_IRQ_TRIGGER, struct vm_isa_irq_trigger)
 #define	VM_SET_CAPABILITY \
 	_IOW('v', IOCNUM_SET_CAPABILITY, struct vm_capability)
 #define	VM_GET_CAPABILITY \
 	_IOWR('v', IOCNUM_GET_CAPABILITY, struct vm_capability)
 #define	VM_BIND_PPTDEV \
 	_IOW('v', IOCNUM_BIND_PPTDEV, struct vm_pptdev)
 #define	VM_UNBIND_PPTDEV \
 	_IOW('v', IOCNUM_UNBIND_PPTDEV, struct vm_pptdev)
 #define	VM_MAP_PPTDEV_MMIO \
 	_IOW('v', IOCNUM_MAP_PPTDEV_MMIO, struct vm_pptdev_mmio)
 #define	VM_PPTDEV_MSI \
 	_IOW('v', IOCNUM_PPTDEV_MSI, struct vm_pptdev_msi)
 #define	VM_PPTDEV_MSIX \
 	_IOW('v', IOCNUM_PPTDEV_MSIX, struct vm_pptdev_msix)
 #define VM_INJECT_NMI \
 	_IOW('v', IOCNUM_INJECT_NMI, struct vm_nmi)
 #define	VM_STATS \
 	_IOWR('v', IOCNUM_VM_STATS, struct vm_stats)
 #define	VM_STAT_DESC \
 	_IOWR('v', IOCNUM_VM_STAT_DESC, struct vm_stat_desc)
 #define	VM_SET_X2APIC_STATE \
 	_IOW('v', IOCNUM_SET_X2APIC_STATE, struct vm_x2apic)
 #define	VM_GET_X2APIC_STATE \
 	_IOWR('v', IOCNUM_GET_X2APIC_STATE, struct vm_x2apic)
 #define	VM_GET_HPET_CAPABILITIES \
 	_IOR('v', IOCNUM_GET_HPET_CAPABILITIES, struct vm_hpet_cap)
+#define VM_SET_TOPOLOGY \
+	_IOW('v', IOCNUM_SET_TOPOLOGY, struct vm_cpu_topology)
+#define VM_GET_TOPOLOGY \
+	_IOR('v', IOCNUM_GET_TOPOLOGY, struct vm_cpu_topology)
 #define	VM_GET_GPA_PMAP \
 	_IOWR('v', IOCNUM_GET_GPA_PMAP, struct vm_gpa_pte)
 #define	VM_GLA2GPA	\
 	_IOWR('v', IOCNUM_GLA2GPA, struct vm_gla2gpa)
 #define	VM_GLA2GPA_NOFAULT \
 	_IOWR('v', IOCNUM_GLA2GPA_NOFAULT, struct vm_gla2gpa)
 #define	VM_ACTIVATE_CPU	\
 	_IOW('v', IOCNUM_ACTIVATE_CPU, struct vm_activate_cpu)
 #define	VM_GET_CPUS	\
 	_IOW('v', IOCNUM_GET_CPUSET, struct vm_cpuset)
 #define	VM_SUSPEND_CPU \
 	_IOW('v', IOCNUM_SUSPEND_CPU, struct vm_activate_cpu)
 #define	VM_RESUME_CPU \
 	_IOW('v', IOCNUM_RESUME_CPU, struct vm_activate_cpu)
 #define	VM_SET_INTINFO	\
 	_IOW('v', IOCNUM_SET_INTINFO, struct vm_intinfo)
 #define	VM_GET_INTINFO	\
 	_IOWR('v', IOCNUM_GET_INTINFO, struct vm_intinfo)
 #define VM_RTC_WRITE \
 	_IOW('v', IOCNUM_RTC_WRITE, struct vm_rtc_data)
 #define VM_RTC_READ \
 	_IOWR('v', IOCNUM_RTC_READ, struct vm_rtc_data)
 #define VM_RTC_SETTIME	\
 	_IOW('v', IOCNUM_RTC_SETTIME, struct vm_rtc_time)
 #define VM_RTC_GETTIME	\
 	_IOR('v', IOCNUM_RTC_GETTIME, struct vm_rtc_time)
 #define	VM_RESTART_INSTRUCTION \
 	_IOW('v', IOCNUM_RESTART_INSTRUCTION, int)
 #endif
Index: head/sys/amd64/vmm/vmm.c
===================================================================
--- head/sys/amd64/vmm/vmm.c	(revision 332297)
+++ head/sys/amd64/vmm/vmm.c	(revision 332298)
@@ -1,2666 +1,2708 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2011 NetApp, Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/module.h>
 #include <sys/sysctl.h>
 #include <sys/malloc.h>
 #include <sys/pcpu.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/rwlock.h>
 #include <sys/sched.h>
 #include <sys/smp.h>
 #include <sys/systm.h>
 
 #include <vm/vm.h>
 #include <vm/vm_object.h>
 #include <vm/vm_page.h>
 #include <vm/pmap.h>
 #include <vm/vm_map.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_param.h>
 
 #include <machine/cpu.h>
 #include <machine/pcb.h>
 #include <machine/smp.h>
 #include <machine/md_var.h>
 #include <x86/psl.h>
 #include <x86/apicreg.h>
 
 #include <machine/vmm.h>
 #include <machine/vmm_dev.h>
 #include <machine/vmm_instruction_emul.h>
 
 #include "vmm_ioport.h"
 #include "vmm_ktr.h"
 #include "vmm_host.h"
 #include "vmm_mem.h"
 #include "vmm_util.h"
 #include "vatpic.h"
 #include "vatpit.h"
 #include "vhpet.h"
 #include "vioapic.h"
 #include "vlapic.h"
 #include "vpmtmr.h"
 #include "vrtc.h"
 #include "vmm_stat.h"
 #include "vmm_lapic.h"
 
 #include "io/ppt.h"
 #include "io/iommu.h"
 
 struct vlapic;
 
 /*
  * Initialization:
  * (a) allocated when vcpu is created
  * (i) initialized when vcpu is created and when it is reinitialized
  * (o) initialized the first time the vcpu is created
  * (x) initialized before use
  */
 struct vcpu {
 	struct mtx 	mtx;		/* (o) protects 'state' and 'hostcpu' */
 	enum vcpu_state	state;		/* (o) vcpu state */
 	int		hostcpu;	/* (o) vcpu's host cpu */
 	int		reqidle;	/* (i) request vcpu to idle */
 	struct vlapic	*vlapic;	/* (i) APIC device model */
 	enum x2apic_state x2apic_state;	/* (i) APIC mode */
 	uint64_t	exitintinfo;	/* (i) events pending at VM exit */
 	int		nmi_pending;	/* (i) NMI pending */
 	int		extint_pending;	/* (i) INTR pending */
 	int	exception_pending;	/* (i) exception pending */
 	int	exc_vector;		/* (x) exception collateral */
 	int	exc_errcode_valid;
 	uint32_t exc_errcode;
 	struct savefpu	*guestfpu;	/* (a,i) guest fpu state */
 	uint64_t	guest_xcr0;	/* (i) guest %xcr0 register */
 	void		*stats;		/* (a,i) statistics */
 	struct vm_exit	exitinfo;	/* (x) exit reason and collateral */
 	uint64_t	nextrip;	/* (x) next instruction to execute */
 };
 
 #define	vcpu_lock_initialized(v) mtx_initialized(&((v)->mtx))
 #define	vcpu_lock_init(v)	mtx_init(&((v)->mtx), "vcpu lock", 0, MTX_SPIN)
 #define	vcpu_lock(v)		mtx_lock_spin(&((v)->mtx))
 #define	vcpu_unlock(v)		mtx_unlock_spin(&((v)->mtx))
 #define	vcpu_assert_locked(v)	mtx_assert(&((v)->mtx), MA_OWNED)
 
 struct mem_seg {
 	size_t	len;
 	bool	sysmem;
 	struct vm_object *object;
 };
 #define	VM_MAX_MEMSEGS	3
 
 struct mem_map {
 	vm_paddr_t	gpa;
 	size_t		len;
 	vm_ooffset_t	segoff;
 	int		segid;
 	int		prot;
 	int		flags;
 };
 #define	VM_MAX_MEMMAPS	4
 
 /*
  * Initialization:
  * (o) initialized the first time the VM is created
  * (i) initialized when VM is created and when it is reinitialized
  * (x) initialized before use
  */
 struct vm {
 	void		*cookie;		/* (i) cpu-specific data */
 	void		*iommu;			/* (x) iommu-specific data */
 	struct vhpet	*vhpet;			/* (i) virtual HPET */
 	struct vioapic	*vioapic;		/* (i) virtual ioapic */
 	struct vatpic	*vatpic;		/* (i) virtual atpic */
 	struct vatpit	*vatpit;		/* (i) virtual atpit */
 	struct vpmtmr	*vpmtmr;		/* (i) virtual ACPI PM timer */
 	struct vrtc	*vrtc;			/* (o) virtual RTC */
 	volatile cpuset_t active_cpus;		/* (i) active vcpus */
 	volatile cpuset_t debug_cpus;		/* (i) vcpus stopped for debug */
 	int		suspend;		/* (i) stop VM execution */
 	volatile cpuset_t suspended_cpus; 	/* (i) suspended vcpus */
 	volatile cpuset_t halted_cpus;		/* (x) cpus in a hard halt */
 	cpuset_t	rendezvous_req_cpus;	/* (x) rendezvous requested */
 	cpuset_t	rendezvous_done_cpus;	/* (x) rendezvous finished */
 	void		*rendezvous_arg;	/* (x) rendezvous func/arg */
 	vm_rendezvous_func_t rendezvous_func;
 	struct mtx	rendezvous_mtx;		/* (o) rendezvous lock */
 	struct mem_map	mem_maps[VM_MAX_MEMMAPS]; /* (i) guest address space */
 	struct mem_seg	mem_segs[VM_MAX_MEMSEGS]; /* (o) guest memory regions */
 	struct vmspace	*vmspace;		/* (o) guest's address space */
 	char		name[VM_MAX_NAMELEN];	/* (o) virtual machine name */
 	struct vcpu	vcpu[VM_MAXCPU];	/* (i) guest vcpus */
+	/* The following describe the vm cpu topology */
+	uint16_t	sockets;		/* (o) num of sockets */
+	uint16_t	cores;			/* (o) num of cores/socket */
+	uint16_t	threads;		/* (o) num of threads/core */
+	uint16_t	maxcpus;		/* (o) max pluggable cpus */
 };
 
 static int vmm_initialized;
 
 static struct vmm_ops *ops;
 #define	VMM_INIT(num)	(ops != NULL ? (*ops->init)(num) : 0)
 #define	VMM_CLEANUP()	(ops != NULL ? (*ops->cleanup)() : 0)
 #define	VMM_RESUME()	(ops != NULL ? (*ops->resume)() : 0)
 
 #define	VMINIT(vm, pmap) (ops != NULL ? (*ops->vminit)(vm, pmap): NULL)
 #define	VMRUN(vmi, vcpu, rip, pmap, evinfo) \
 	(ops != NULL ? (*ops->vmrun)(vmi, vcpu, rip, pmap, evinfo) : ENXIO)
 #define	VMCLEANUP(vmi)	(ops != NULL ? (*ops->vmcleanup)(vmi) : NULL)
 #define	VMSPACE_ALLOC(min, max) \
 	(ops != NULL ? (*ops->vmspace_alloc)(min, max) : NULL)
 #define	VMSPACE_FREE(vmspace) \
 	(ops != NULL ? (*ops->vmspace_free)(vmspace) : ENXIO)
 #define	VMGETREG(vmi, vcpu, num, retval)		\
 	(ops != NULL ? (*ops->vmgetreg)(vmi, vcpu, num, retval) : ENXIO)
 #define	VMSETREG(vmi, vcpu, num, val)		\
 	(ops != NULL ? (*ops->vmsetreg)(vmi, vcpu, num, val) : ENXIO)
 #define	VMGETDESC(vmi, vcpu, num, desc)		\
 	(ops != NULL ? (*ops->vmgetdesc)(vmi, vcpu, num, desc) : ENXIO)
 #define	VMSETDESC(vmi, vcpu, num, desc)		\
 	(ops != NULL ? (*ops->vmsetdesc)(vmi, vcpu, num, desc) : ENXIO)
 #define	VMGETCAP(vmi, vcpu, num, retval)	\
 	(ops != NULL ? (*ops->vmgetcap)(vmi, vcpu, num, retval) : ENXIO)
 #define	VMSETCAP(vmi, vcpu, num, val)		\
 	(ops != NULL ? (*ops->vmsetcap)(vmi, vcpu, num, val) : ENXIO)
 #define	VLAPIC_INIT(vmi, vcpu)			\
 	(ops != NULL ? (*ops->vlapic_init)(vmi, vcpu) : NULL)
 #define	VLAPIC_CLEANUP(vmi, vlapic)		\
 	(ops != NULL ? (*ops->vlapic_cleanup)(vmi, vlapic) : NULL)
 
 #define	fpu_start_emulating()	load_cr0(rcr0() | CR0_TS)
 #define	fpu_stop_emulating()	clts()
 
 static MALLOC_DEFINE(M_VM, "vm", "vm");
 
 /* statistics */
 static VMM_STAT(VCPU_TOTAL_RUNTIME, "vcpu total runtime");
 
 SYSCTL_NODE(_hw, OID_AUTO, vmm, CTLFLAG_RW, NULL, NULL);
 
 /*
  * Halt the guest if all vcpus are executing a HLT instruction with
  * interrupts disabled.
  */
 static int halt_detection_enabled = 1;
 SYSCTL_INT(_hw_vmm, OID_AUTO, halt_detection, CTLFLAG_RDTUN,
     &halt_detection_enabled, 0,
     "Halt VM if all vcpus execute HLT with interrupts disabled");
 
 static int vmm_ipinum;
 SYSCTL_INT(_hw_vmm, OID_AUTO, ipinum, CTLFLAG_RD, &vmm_ipinum, 0,
     "IPI vector used for vcpu notifications");
 
 static int trace_guest_exceptions;
 SYSCTL_INT(_hw_vmm, OID_AUTO, trace_guest_exceptions, CTLFLAG_RDTUN,
     &trace_guest_exceptions, 0,
     "Trap into hypervisor on all guest exceptions and reflect them back");
 
 static void vm_free_memmap(struct vm *vm, int ident);
 static bool sysmem_mapping(struct vm *vm, struct mem_map *mm);
 static void vcpu_notify_event_locked(struct vcpu *vcpu, bool lapic_intr);
 
 #ifdef KTR
 static const char *
 vcpu_state2str(enum vcpu_state state)
 {
 
 	switch (state) {
 	case VCPU_IDLE:
 		return ("idle");
 	case VCPU_FROZEN:
 		return ("frozen");
 	case VCPU_RUNNING:
 		return ("running");
 	case VCPU_SLEEPING:
 		return ("sleeping");
 	default:
 		return ("unknown");
 	}
 }
 #endif
 
 static void
 vcpu_cleanup(struct vm *vm, int i, bool destroy)
 {
 	struct vcpu *vcpu = &vm->vcpu[i];
 
 	VLAPIC_CLEANUP(vm->cookie, vcpu->vlapic);
 	if (destroy) {
 		vmm_stat_free(vcpu->stats);	
 		fpu_save_area_free(vcpu->guestfpu);
 	}
 }
 
 static void
 vcpu_init(struct vm *vm, int vcpu_id, bool create)
 {
 	struct vcpu *vcpu;
 
 	KASSERT(vcpu_id >= 0 && vcpu_id < VM_MAXCPU,
 	    ("vcpu_init: invalid vcpu %d", vcpu_id));
 	  
 	vcpu = &vm->vcpu[vcpu_id];
 
 	if (create) {
 		KASSERT(!vcpu_lock_initialized(vcpu), ("vcpu %d already "
 		    "initialized", vcpu_id));
 		vcpu_lock_init(vcpu);
 		vcpu->state = VCPU_IDLE;
 		vcpu->hostcpu = NOCPU;
 		vcpu->guestfpu = fpu_save_area_alloc();
 		vcpu->stats = vmm_stat_alloc();
 	}
 
 	vcpu->vlapic = VLAPIC_INIT(vm->cookie, vcpu_id);
 	vm_set_x2apic_state(vm, vcpu_id, X2APIC_DISABLED);
 	vcpu->reqidle = 0;
 	vcpu->exitintinfo = 0;
 	vcpu->nmi_pending = 0;
 	vcpu->extint_pending = 0;
 	vcpu->exception_pending = 0;
 	vcpu->guest_xcr0 = XFEATURE_ENABLED_X87;
 	fpu_save_area_reset(vcpu->guestfpu);
 	vmm_stat_init(vcpu->stats);
 }
 
 int
 vcpu_trace_exceptions(struct vm *vm, int vcpuid)
 {
 
 	return (trace_guest_exceptions);
 }
 
 struct vm_exit *
 vm_exitinfo(struct vm *vm, int cpuid)
 {
 	struct vcpu *vcpu;
 
 	if (cpuid < 0 || cpuid >= VM_MAXCPU)
 		panic("vm_exitinfo: invalid cpuid %d", cpuid);
 
 	vcpu = &vm->vcpu[cpuid];
 
 	return (&vcpu->exitinfo);
 }
 
 static void
 vmm_resume(void)
 {
 	VMM_RESUME();
 }
 
 static int
 vmm_init(void)
 {
 	int error;
 
 	vmm_host_state_init();
 
 	vmm_ipinum = lapic_ipi_alloc(pti ? &IDTVEC(justreturn1_pti) :
 	    &IDTVEC(justreturn));
 	if (vmm_ipinum < 0)
 		vmm_ipinum = IPI_AST;
 
 	error = vmm_mem_init();
 	if (error)
 		return (error);
 	
 	if (vmm_is_intel())
 		ops = &vmm_ops_intel;
 	else if (vmm_is_amd())
 		ops = &vmm_ops_amd;
 	else
 		return (ENXIO);
 
 	vmm_resume_p = vmm_resume;
 
 	return (VMM_INIT(vmm_ipinum));
 }
 
 static int
 vmm_handler(module_t mod, int what, void *arg)
 {
 	int error;
 
 	switch (what) {
 	case MOD_LOAD:
 		vmmdev_init();
 		error = vmm_init();
 		if (error == 0)
 			vmm_initialized = 1;
 		break;
 	case MOD_UNLOAD:
 		error = vmmdev_cleanup();
 		if (error == 0) {
 			vmm_resume_p = NULL;
 			iommu_cleanup();
 			if (vmm_ipinum != IPI_AST)
 				lapic_ipi_free(vmm_ipinum);
 			error = VMM_CLEANUP();
 			/*
 			 * Something bad happened - prevent new
 			 * VMs from being created
 			 */
 			if (error)
 				vmm_initialized = 0;
 		}
 		break;
 	default:
 		error = 0;
 		break;
 	}
 	return (error);
 }
 
 static moduledata_t vmm_kmod = {
 	"vmm",
 	vmm_handler,
 	NULL
 };
 
 /*
  * vmm initialization has the following dependencies:
  *
  * - VT-x initialization requires smp_rendezvous() and therefore must happen
  *   after SMP is fully functional (after SI_SUB_SMP).
  */
 DECLARE_MODULE(vmm, vmm_kmod, SI_SUB_SMP + 1, SI_ORDER_ANY);
 MODULE_VERSION(vmm, 1);
 
 static void
 vm_init(struct vm *vm, bool create)
 {
 	int i;
 
 	vm->cookie = VMINIT(vm, vmspace_pmap(vm->vmspace));
 	vm->iommu = NULL;
 	vm->vioapic = vioapic_init(vm);
 	vm->vhpet = vhpet_init(vm);
 	vm->vatpic = vatpic_init(vm);
 	vm->vatpit = vatpit_init(vm);
 	vm->vpmtmr = vpmtmr_init(vm);
 	if (create)
 		vm->vrtc = vrtc_init(vm);
 
 	CPU_ZERO(&vm->active_cpus);
 	CPU_ZERO(&vm->debug_cpus);
 
 	vm->suspend = 0;
 	CPU_ZERO(&vm->suspended_cpus);
 
 	for (i = 0; i < VM_MAXCPU; i++)
 		vcpu_init(vm, i, create);
 }
 
+/*
+ * The default CPU topology is a single thread per package.
+ */
+u_int cores_per_package = 1;
+u_int threads_per_core = 1;
+
 int
 vm_create(const char *name, struct vm **retvm)
 {
 	struct vm *vm;
 	struct vmspace *vmspace;
 
 	/*
 	 * If vmm.ko could not be successfully initialized then don't attempt
 	 * to create the virtual machine.
 	 */
 	if (!vmm_initialized)
 		return (ENXIO);
 
 	if (name == NULL || strlen(name) >= VM_MAX_NAMELEN)
 		return (EINVAL);
 
 	vmspace = VMSPACE_ALLOC(0, VM_MAXUSER_ADDRESS);
 	if (vmspace == NULL)
 		return (ENOMEM);
 
 	vm = malloc(sizeof(struct vm), M_VM, M_WAITOK | M_ZERO);
 	strcpy(vm->name, name);
 	vm->vmspace = vmspace;
 	mtx_init(&vm->rendezvous_mtx, "vm rendezvous lock", 0, MTX_DEF);
 
+	vm->sockets = 1;
+	vm->cores = cores_per_package;	/* XXX backwards compatibility */
+	vm->threads = threads_per_core;	/* XXX backwards compatibility */
+	vm->maxcpus = 0;		/* XXX not implemented */
+
 	vm_init(vm, true);
 
 	*retvm = vm;
 	return (0);
+}
+
+void
+vm_get_topology(struct vm *vm, uint16_t *sockets, uint16_t *cores,
+    uint16_t *threads, uint16_t *maxcpus)
+{
+	*sockets = vm->sockets;
+	*cores = vm->cores;
+	*threads = vm->threads;
+	*maxcpus = vm->maxcpus;
+}
+
+int
+vm_set_topology(struct vm *vm, uint16_t sockets, uint16_t cores,
+    uint16_t threads, uint16_t maxcpus)
+{
+	if (maxcpus != 0)
+		return (EINVAL);	/* XXX remove when supported */
+	if ((sockets * cores * threads) > VM_MAXCPU)
+		return (EINVAL);
+	/* XXX need to check sockets * cores * threads == vCPU, how? */
+	vm->sockets = sockets;
+	vm->cores = cores;
+	vm->threads = threads;
+	vm->maxcpus = maxcpus;
+	return(0);
 }
 
 static void
 vm_cleanup(struct vm *vm, bool destroy)
 {
 	struct mem_map *mm;
 	int i;
 
 	ppt_unassign_all(vm);
 
 	if (vm->iommu != NULL)
 		iommu_destroy_domain(vm->iommu);
 
 	if (destroy)
 		vrtc_cleanup(vm->vrtc);
 	else
 		vrtc_reset(vm->vrtc);
 	vpmtmr_cleanup(vm->vpmtmr);
 	vatpit_cleanup(vm->vatpit);
 	vhpet_cleanup(vm->vhpet);
 	vatpic_cleanup(vm->vatpic);
 	vioapic_cleanup(vm->vioapic);
 
 	for (i = 0; i < VM_MAXCPU; i++)
 		vcpu_cleanup(vm, i, destroy);
 
 	VMCLEANUP(vm->cookie);
 
 	/*
 	 * System memory is removed from the guest address space only when
 	 * the VM is destroyed. This is because the mapping remains the same
 	 * across VM reset.
 	 *
 	 * Device memory can be relocated by the guest (e.g. using PCI BARs)
 	 * so those mappings are removed on a VM reset.
 	 */
 	for (i = 0; i < VM_MAX_MEMMAPS; i++) {
 		mm = &vm->mem_maps[i];
 		if (destroy || !sysmem_mapping(vm, mm))
 			vm_free_memmap(vm, i);
 	}
 
 	if (destroy) {
 		for (i = 0; i < VM_MAX_MEMSEGS; i++)
 			vm_free_memseg(vm, i);
 
 		VMSPACE_FREE(vm->vmspace);
 		vm->vmspace = NULL;
 	}
 }
 
 void
 vm_destroy(struct vm *vm)
 {
 	vm_cleanup(vm, true);
 	free(vm, M_VM);
 }
 
 int
 vm_reinit(struct vm *vm)
 {
 	int error;
 
 	/*
 	 * A virtual machine can be reset only if all vcpus are suspended.
 	 */
 	if (CPU_CMP(&vm->suspended_cpus, &vm->active_cpus) == 0) {
 		vm_cleanup(vm, false);
 		vm_init(vm, false);
 		error = 0;
 	} else {
 		error = EBUSY;
 	}
 
 	return (error);
 }
 
 const char *
 vm_name(struct vm *vm)
 {
 	return (vm->name);
 }
 
 int
 vm_map_mmio(struct vm *vm, vm_paddr_t gpa, size_t len, vm_paddr_t hpa)
 {
 	vm_object_t obj;
 
 	if ((obj = vmm_mmio_alloc(vm->vmspace, gpa, len, hpa)) == NULL)
 		return (ENOMEM);
 	else
 		return (0);
 }
 
 int
 vm_unmap_mmio(struct vm *vm, vm_paddr_t gpa, size_t len)
 {
 
 	vmm_mmio_free(vm->vmspace, gpa, len);
 	return (0);
 }
 
 /*
  * Return 'true' if 'gpa' is allocated in the guest address space.
  *
  * This function is called in the context of a running vcpu which acts as
  * an implicit lock on 'vm->mem_maps[]'.
  */
 bool
 vm_mem_allocated(struct vm *vm, int vcpuid, vm_paddr_t gpa)
 {
 	struct mem_map *mm;
 	int i;
 
 #ifdef INVARIANTS
 	int hostcpu, state;
 	state = vcpu_get_state(vm, vcpuid, &hostcpu);
 	KASSERT(state == VCPU_RUNNING && hostcpu == curcpu,
 	    ("%s: invalid vcpu state %d/%d", __func__, state, hostcpu));
 #endif
 
 	for (i = 0; i < VM_MAX_MEMMAPS; i++) {
 		mm = &vm->mem_maps[i];
 		if (mm->len != 0 && gpa >= mm->gpa && gpa < mm->gpa + mm->len)
 			return (true);		/* 'gpa' is sysmem or devmem */
 	}
 
 	if (ppt_is_mmio(vm, gpa))
 		return (true);			/* 'gpa' is pci passthru mmio */
 
 	return (false);
 }
 
 int
 vm_alloc_memseg(struct vm *vm, int ident, size_t len, bool sysmem)
 {
 	struct mem_seg *seg;
 	vm_object_t obj;
 
 	if (ident < 0 || ident >= VM_MAX_MEMSEGS)
 		return (EINVAL);
 
 	if (len == 0 || (len & PAGE_MASK))
 		return (EINVAL);
 
 	seg = &vm->mem_segs[ident];
 	if (seg->object != NULL) {
 		if (seg->len == len && seg->sysmem == sysmem)
 			return (EEXIST);
 		else
 			return (EINVAL);
 	}
 
 	obj = vm_object_allocate(OBJT_DEFAULT, len >> PAGE_SHIFT);
 	if (obj == NULL)
 		return (ENOMEM);
 
 	seg->len = len;
 	seg->object = obj;
 	seg->sysmem = sysmem;
 	return (0);
 }
 
 int
 vm_get_memseg(struct vm *vm, int ident, size_t *len, bool *sysmem,
     vm_object_t *objptr)
 {
 	struct mem_seg *seg;
 
 	if (ident < 0 || ident >= VM_MAX_MEMSEGS)
 		return (EINVAL);
 
 	seg = &vm->mem_segs[ident];
 	if (len)
 		*len = seg->len;
 	if (sysmem)
 		*sysmem = seg->sysmem;
 	if (objptr)
 		*objptr = seg->object;
 	return (0);
 }
 
 void
 vm_free_memseg(struct vm *vm, int ident)
 {
 	struct mem_seg *seg;
 
 	KASSERT(ident >= 0 && ident < VM_MAX_MEMSEGS,
 	    ("%s: invalid memseg ident %d", __func__, ident));
 
 	seg = &vm->mem_segs[ident];
 	if (seg->object != NULL) {
 		vm_object_deallocate(seg->object);
 		bzero(seg, sizeof(struct mem_seg));
 	}
 }
 
 int
 vm_mmap_memseg(struct vm *vm, vm_paddr_t gpa, int segid, vm_ooffset_t first,
     size_t len, int prot, int flags)
 {
 	struct mem_seg *seg;
 	struct mem_map *m, *map;
 	vm_ooffset_t last;
 	int i, error;
 
 	if (prot == 0 || (prot & ~(VM_PROT_ALL)) != 0)
 		return (EINVAL);
 
 	if (flags & ~VM_MEMMAP_F_WIRED)
 		return (EINVAL);
 
 	if (segid < 0 || segid >= VM_MAX_MEMSEGS)
 		return (EINVAL);
 
 	seg = &vm->mem_segs[segid];
 	if (seg->object == NULL)
 		return (EINVAL);
 
 	last = first + len;
 	if (first < 0 || first >= last || last > seg->len)
 		return (EINVAL);
 
 	if ((gpa | first | last) & PAGE_MASK)
 		return (EINVAL);
 
 	map = NULL;
 	for (i = 0; i < VM_MAX_MEMMAPS; i++) {
 		m = &vm->mem_maps[i];
 		if (m->len == 0) {
 			map = m;
 			break;
 		}
 	}
 
 	if (map == NULL)
 		return (ENOSPC);
 
 	error = vm_map_find(&vm->vmspace->vm_map, seg->object, first, &gpa,
 	    len, 0, VMFS_NO_SPACE, prot, prot, 0);
 	if (error != KERN_SUCCESS)
 		return (EFAULT);
 
 	vm_object_reference(seg->object);
 
 	if (flags & VM_MEMMAP_F_WIRED) {
 		error = vm_map_wire(&vm->vmspace->vm_map, gpa, gpa + len,
 		    VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES);
 		if (error != KERN_SUCCESS) {
 			vm_map_remove(&vm->vmspace->vm_map, gpa, gpa + len);
 			return (EFAULT);
 		}
 	}
 
 	map->gpa = gpa;
 	map->len = len;
 	map->segoff = first;
 	map->segid = segid;
 	map->prot = prot;
 	map->flags = flags;
 	return (0);
 }
 
 int
 vm_mmap_getnext(struct vm *vm, vm_paddr_t *gpa, int *segid,
     vm_ooffset_t *segoff, size_t *len, int *prot, int *flags)
 {
 	struct mem_map *mm, *mmnext;
 	int i;
 
 	mmnext = NULL;
 	for (i = 0; i < VM_MAX_MEMMAPS; i++) {
 		mm = &vm->mem_maps[i];
 		if (mm->len == 0 || mm->gpa < *gpa)
 			continue;
 		if (mmnext == NULL || mm->gpa < mmnext->gpa)
 			mmnext = mm;
 	}
 
 	if (mmnext != NULL) {
 		*gpa = mmnext->gpa;
 		if (segid)
 			*segid = mmnext->segid;
 		if (segoff)
 			*segoff = mmnext->segoff;
 		if (len)
 			*len = mmnext->len;
 		if (prot)
 			*prot = mmnext->prot;
 		if (flags)
 			*flags = mmnext->flags;
 		return (0);
 	} else {
 		return (ENOENT);
 	}
 }
 
 static void
 vm_free_memmap(struct vm *vm, int ident)
 {
 	struct mem_map *mm;
 	int error;
 
 	mm = &vm->mem_maps[ident];
 	if (mm->len) {
 		error = vm_map_remove(&vm->vmspace->vm_map, mm->gpa,
 		    mm->gpa + mm->len);
 		KASSERT(error == KERN_SUCCESS, ("%s: vm_map_remove error %d",
 		    __func__, error));
 		bzero(mm, sizeof(struct mem_map));
 	}
 }
 
 static __inline bool
 sysmem_mapping(struct vm *vm, struct mem_map *mm)
 {
 
 	if (mm->len != 0 && vm->mem_segs[mm->segid].sysmem)
 		return (true);
 	else
 		return (false);
 }
 
 static vm_paddr_t
 sysmem_maxaddr(struct vm *vm)
 {
 	struct mem_map *mm;
 	vm_paddr_t maxaddr;
 	int i;
 
 	maxaddr = 0;
 	for (i = 0; i < VM_MAX_MEMMAPS; i++) {
 		mm = &vm->mem_maps[i];
 		if (sysmem_mapping(vm, mm)) {
 			if (maxaddr < mm->gpa + mm->len)
 				maxaddr = mm->gpa + mm->len;
 		}
 	}
 	return (maxaddr);
 }
 
 static void
 vm_iommu_modify(struct vm *vm, boolean_t map)
 {
 	int i, sz;
 	vm_paddr_t gpa, hpa;
 	struct mem_map *mm;
 	void *vp, *cookie, *host_domain;
 
 	sz = PAGE_SIZE;
 	host_domain = iommu_host_domain();
 
 	for (i = 0; i < VM_MAX_MEMMAPS; i++) {
 		mm = &vm->mem_maps[i];
 		if (!sysmem_mapping(vm, mm))
 			continue;
 
 		if (map) {
 			KASSERT((mm->flags & VM_MEMMAP_F_IOMMU) == 0,
 			    ("iommu map found invalid memmap %#lx/%#lx/%#x",
 			    mm->gpa, mm->len, mm->flags));
 			if ((mm->flags & VM_MEMMAP_F_WIRED) == 0)
 				continue;
 			mm->flags |= VM_MEMMAP_F_IOMMU;
 		} else {
 			if ((mm->flags & VM_MEMMAP_F_IOMMU) == 0)
 				continue;
 			mm->flags &= ~VM_MEMMAP_F_IOMMU;
 			KASSERT((mm->flags & VM_MEMMAP_F_WIRED) != 0,
 			    ("iommu unmap found invalid memmap %#lx/%#lx/%#x",
 			    mm->gpa, mm->len, mm->flags));
 		}
 
 		gpa = mm->gpa;
 		while (gpa < mm->gpa + mm->len) {
 			vp = vm_gpa_hold(vm, -1, gpa, PAGE_SIZE, VM_PROT_WRITE,
 					 &cookie);
 			KASSERT(vp != NULL, ("vm(%s) could not map gpa %#lx",
 			    vm_name(vm), gpa));
 
 			vm_gpa_release(cookie);
 
 			hpa = DMAP_TO_PHYS((uintptr_t)vp);
 			if (map) {
 				iommu_create_mapping(vm->iommu, gpa, hpa, sz);
 				iommu_remove_mapping(host_domain, hpa, sz);
 			} else {
 				iommu_remove_mapping(vm->iommu, gpa, sz);
 				iommu_create_mapping(host_domain, hpa, hpa, sz);
 			}
 
 			gpa += PAGE_SIZE;
 		}
 	}
 
 	/*
 	 * Invalidate the cached translations associated with the domain
 	 * from which pages were removed.
 	 */
 	if (map)
 		iommu_invalidate_tlb(host_domain);
 	else
 		iommu_invalidate_tlb(vm->iommu);
 }
 
 #define	vm_iommu_unmap(vm)	vm_iommu_modify((vm), FALSE)
 #define	vm_iommu_map(vm)	vm_iommu_modify((vm), TRUE)
 
 int
 vm_unassign_pptdev(struct vm *vm, int bus, int slot, int func)
 {
 	int error;
 
 	error = ppt_unassign_device(vm, bus, slot, func);
 	if (error)
 		return (error);
 
 	if (ppt_assigned_devices(vm) == 0)
 		vm_iommu_unmap(vm);
 
 	return (0);
 }
 
 int
 vm_assign_pptdev(struct vm *vm, int bus, int slot, int func)
 {
 	int error;
 	vm_paddr_t maxaddr;
 
 	/* Set up the IOMMU to do the 'gpa' to 'hpa' translation */
 	if (ppt_assigned_devices(vm) == 0) {
 		KASSERT(vm->iommu == NULL,
 		    ("vm_assign_pptdev: iommu must be NULL"));
 		maxaddr = sysmem_maxaddr(vm);
 		vm->iommu = iommu_create_domain(maxaddr);
 		if (vm->iommu == NULL)
 			return (ENXIO);
 		vm_iommu_map(vm);
 	}
 
 	error = ppt_assign_device(vm, bus, slot, func);
 	return (error);
 }
 
 void *
 vm_gpa_hold(struct vm *vm, int vcpuid, vm_paddr_t gpa, size_t len, int reqprot,
 	    void **cookie)
 {
 	int i, count, pageoff;
 	struct mem_map *mm;
 	vm_page_t m;
 #ifdef INVARIANTS
 	/*
 	 * All vcpus are frozen by ioctls that modify the memory map
 	 * (e.g. VM_MMAP_MEMSEG). Therefore 'vm->memmap[]' stability is
 	 * guaranteed if at least one vcpu is in the VCPU_FROZEN state.
 	 */
 	int state;
 	KASSERT(vcpuid >= -1 && vcpuid < VM_MAXCPU, ("%s: invalid vcpuid %d",
 	    __func__, vcpuid));
 	for (i = 0; i < VM_MAXCPU; i++) {
 		if (vcpuid != -1 && vcpuid != i)
 			continue;
 		state = vcpu_get_state(vm, i, NULL);
 		KASSERT(state == VCPU_FROZEN, ("%s: invalid vcpu state %d",
 		    __func__, state));
 	}
 #endif
 	pageoff = gpa & PAGE_MASK;
 	if (len > PAGE_SIZE - pageoff)
 		panic("vm_gpa_hold: invalid gpa/len: 0x%016lx/%lu", gpa, len);
 
 	count = 0;
 	for (i = 0; i < VM_MAX_MEMMAPS; i++) {
 		mm = &vm->mem_maps[i];
 		if (sysmem_mapping(vm, mm) && gpa >= mm->gpa &&
 		    gpa < mm->gpa + mm->len) {
 			count = vm_fault_quick_hold_pages(&vm->vmspace->vm_map,
 			    trunc_page(gpa), PAGE_SIZE, reqprot, &m, 1);
 			break;
 		}
 	}
 
 	if (count == 1) {
 		*cookie = m;
 		return ((void *)(PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)) + pageoff));
 	} else {
 		*cookie = NULL;
 		return (NULL);
 	}
 }
 
 void
 vm_gpa_release(void *cookie)
 {
 	vm_page_t m = cookie;
 
 	vm_page_lock(m);
 	vm_page_unhold(m);
 	vm_page_unlock(m);
 }
 
 int
 vm_get_register(struct vm *vm, int vcpu, int reg, uint64_t *retval)
 {
 
 	if (vcpu < 0 || vcpu >= VM_MAXCPU)
 		return (EINVAL);
 
 	if (reg >= VM_REG_LAST)
 		return (EINVAL);
 
 	return (VMGETREG(vm->cookie, vcpu, reg, retval));
 }
 
 int
 vm_set_register(struct vm *vm, int vcpuid, int reg, uint64_t val)
 {
 	struct vcpu *vcpu;
 	int error;
 
 	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
 		return (EINVAL);
 
 	if (reg >= VM_REG_LAST)
 		return (EINVAL);
 
 	error = VMSETREG(vm->cookie, vcpuid, reg, val);
 	if (error || reg != VM_REG_GUEST_RIP)
 		return (error);
 
 	/* Set 'nextrip' to match the value of %rip */
 	VCPU_CTR1(vm, vcpuid, "Setting nextrip to %#lx", val);
 	vcpu = &vm->vcpu[vcpuid];
 	vcpu->nextrip = val;
 	return (0);
 }
 
 static boolean_t
 is_descriptor_table(int reg)
 {
 
 	switch (reg) {
 	case VM_REG_GUEST_IDTR:
 	case VM_REG_GUEST_GDTR:
 		return (TRUE);
 	default:
 		return (FALSE);
 	}
 }
 
 static boolean_t
 is_segment_register(int reg)
 {
 	
 	switch (reg) {
 	case VM_REG_GUEST_ES:
 	case VM_REG_GUEST_CS:
 	case VM_REG_GUEST_SS:
 	case VM_REG_GUEST_DS:
 	case VM_REG_GUEST_FS:
 	case VM_REG_GUEST_GS:
 	case VM_REG_GUEST_TR:
 	case VM_REG_GUEST_LDTR:
 		return (TRUE);
 	default:
 		return (FALSE);
 	}
 }
 
 int
 vm_get_seg_desc(struct vm *vm, int vcpu, int reg,
 		struct seg_desc *desc)
 {
 
 	if (vcpu < 0 || vcpu >= VM_MAXCPU)
 		return (EINVAL);
 
 	if (!is_segment_register(reg) && !is_descriptor_table(reg))
 		return (EINVAL);
 
 	return (VMGETDESC(vm->cookie, vcpu, reg, desc));
 }
 
 int
 vm_set_seg_desc(struct vm *vm, int vcpu, int reg,
 		struct seg_desc *desc)
 {
 	if (vcpu < 0 || vcpu >= VM_MAXCPU)
 		return (EINVAL);
 
 	if (!is_segment_register(reg) && !is_descriptor_table(reg))
 		return (EINVAL);
 
 	return (VMSETDESC(vm->cookie, vcpu, reg, desc));
 }
 
 static void
 restore_guest_fpustate(struct vcpu *vcpu)
 {
 
 	/* flush host state to the pcb */
 	fpuexit(curthread);
 
 	/* restore guest FPU state */
 	fpu_stop_emulating();
 	fpurestore(vcpu->guestfpu);
 
 	/* restore guest XCR0 if XSAVE is enabled in the host */
 	if (rcr4() & CR4_XSAVE)
 		load_xcr(0, vcpu->guest_xcr0);
 
 	/*
 	 * The FPU is now "dirty" with the guest's state so turn on emulation
 	 * to trap any access to the FPU by the host.
 	 */
 	fpu_start_emulating();
 }
 
 static void
 save_guest_fpustate(struct vcpu *vcpu)
 {
 
 	if ((rcr0() & CR0_TS) == 0)
 		panic("fpu emulation not enabled in host!");
 
 	/* save guest XCR0 and restore host XCR0 */
 	if (rcr4() & CR4_XSAVE) {
 		vcpu->guest_xcr0 = rxcr(0);
 		load_xcr(0, vmm_get_host_xcr0());
 	}
 
 	/* save guest FPU state */
 	fpu_stop_emulating();
 	fpusave(vcpu->guestfpu);
 	fpu_start_emulating();
 }
 
 static VMM_STAT(VCPU_IDLE_TICKS, "number of ticks vcpu was idle");
 
 static int
 vcpu_set_state_locked(struct vm *vm, int vcpuid, enum vcpu_state newstate,
     bool from_idle)
 {
 	struct vcpu *vcpu;
 	int error;
 
 	vcpu = &vm->vcpu[vcpuid];
 	vcpu_assert_locked(vcpu);
 
 	/*
 	 * State transitions from the vmmdev_ioctl() must always begin from
 	 * the VCPU_IDLE state. This guarantees that there is only a single
 	 * ioctl() operating on a vcpu at any point.
 	 */
 	if (from_idle) {
 		while (vcpu->state != VCPU_IDLE) {
 			vcpu->reqidle = 1;
 			vcpu_notify_event_locked(vcpu, false);
 			VCPU_CTR1(vm, vcpuid, "vcpu state change from %s to "
 			    "idle requested", vcpu_state2str(vcpu->state));
 			msleep_spin(&vcpu->state, &vcpu->mtx, "vmstat", hz);
 		}
 	} else {
 		KASSERT(vcpu->state != VCPU_IDLE, ("invalid transition from "
 		    "vcpu idle state"));
 	}
 
 	if (vcpu->state == VCPU_RUNNING) {
 		KASSERT(vcpu->hostcpu == curcpu, ("curcpu %d and hostcpu %d "
 		    "mismatch for running vcpu", curcpu, vcpu->hostcpu));
 	} else {
 		KASSERT(vcpu->hostcpu == NOCPU, ("Invalid hostcpu %d for a "
 		    "vcpu that is not running", vcpu->hostcpu));
 	}
 
 	/*
 	 * The following state transitions are allowed:
 	 * IDLE -> FROZEN -> IDLE
 	 * FROZEN -> RUNNING -> FROZEN
 	 * FROZEN -> SLEEPING -> FROZEN
 	 */
 	switch (vcpu->state) {
 	case VCPU_IDLE:
 	case VCPU_RUNNING:
 	case VCPU_SLEEPING:
 		error = (newstate != VCPU_FROZEN);
 		break;
 	case VCPU_FROZEN:
 		error = (newstate == VCPU_FROZEN);
 		break;
 	default:
 		error = 1;
 		break;
 	}
 
 	if (error)
 		return (EBUSY);
 
 	VCPU_CTR2(vm, vcpuid, "vcpu state changed from %s to %s",
 	    vcpu_state2str(vcpu->state), vcpu_state2str(newstate));
 
 	vcpu->state = newstate;
 	if (newstate == VCPU_RUNNING)
 		vcpu->hostcpu = curcpu;
 	else
 		vcpu->hostcpu = NOCPU;
 
 	if (newstate == VCPU_IDLE)
 		wakeup(&vcpu->state);
 
 	return (0);
 }
 
 static void
 vcpu_require_state(struct vm *vm, int vcpuid, enum vcpu_state newstate)
 {
 	int error;
 
 	if ((error = vcpu_set_state(vm, vcpuid, newstate, false)) != 0)
 		panic("Error %d setting state to %d\n", error, newstate);
 }
 
 static void
 vcpu_require_state_locked(struct vm *vm, int vcpuid, enum vcpu_state newstate)
 {
 	int error;
 
 	if ((error = vcpu_set_state_locked(vm, vcpuid, newstate, false)) != 0)
 		panic("Error %d setting state to %d", error, newstate);
 }
 
 static void
 vm_set_rendezvous_func(struct vm *vm, vm_rendezvous_func_t func)
 {
 
 	KASSERT(mtx_owned(&vm->rendezvous_mtx), ("rendezvous_mtx not locked"));
 
 	/*
 	 * Update 'rendezvous_func' and execute a write memory barrier to
 	 * ensure that it is visible across all host cpus. This is not needed
 	 * for correctness but it does ensure that all the vcpus will notice
 	 * that the rendezvous is requested immediately.
 	 */
 	vm->rendezvous_func = func;
 	wmb();
 }
 
 #define	RENDEZVOUS_CTR0(vm, vcpuid, fmt)				\
 	do {								\
 		if (vcpuid >= 0)					\
 			VCPU_CTR0(vm, vcpuid, fmt);			\
 		else							\
 			VM_CTR0(vm, fmt);				\
 	} while (0)
 
 static void
 vm_handle_rendezvous(struct vm *vm, int vcpuid)
 {
 
 	KASSERT(vcpuid == -1 || (vcpuid >= 0 && vcpuid < VM_MAXCPU),
 	    ("vm_handle_rendezvous: invalid vcpuid %d", vcpuid));
 
 	mtx_lock(&vm->rendezvous_mtx);
 	while (vm->rendezvous_func != NULL) {
 		/* 'rendezvous_req_cpus' must be a subset of 'active_cpus' */
 		CPU_AND(&vm->rendezvous_req_cpus, &vm->active_cpus);
 
 		if (vcpuid != -1 &&
 		    CPU_ISSET(vcpuid, &vm->rendezvous_req_cpus) &&
 		    !CPU_ISSET(vcpuid, &vm->rendezvous_done_cpus)) {
 			VCPU_CTR0(vm, vcpuid, "Calling rendezvous func");
 			(*vm->rendezvous_func)(vm, vcpuid, vm->rendezvous_arg);
 			CPU_SET(vcpuid, &vm->rendezvous_done_cpus);
 		}
 		if (CPU_CMP(&vm->rendezvous_req_cpus,
 		    &vm->rendezvous_done_cpus) == 0) {
 			VCPU_CTR0(vm, vcpuid, "Rendezvous completed");
 			vm_set_rendezvous_func(vm, NULL);
 			wakeup(&vm->rendezvous_func);
 			break;
 		}
 		RENDEZVOUS_CTR0(vm, vcpuid, "Wait for rendezvous completion");
 		mtx_sleep(&vm->rendezvous_func, &vm->rendezvous_mtx, 0,
 		    "vmrndv", 0);
 	}
 	mtx_unlock(&vm->rendezvous_mtx);
 }
 
 /*
  * Emulate a guest 'hlt' by sleeping until the vcpu is ready to run.
  */
 static int
 vm_handle_hlt(struct vm *vm, int vcpuid, bool intr_disabled, bool *retu)
 {
 	struct vcpu *vcpu;
 	const char *wmesg;
 	int t, vcpu_halted, vm_halted;
 
 	KASSERT(!CPU_ISSET(vcpuid, &vm->halted_cpus), ("vcpu already halted"));
 
 	vcpu = &vm->vcpu[vcpuid];
 	vcpu_halted = 0;
 	vm_halted = 0;
 
 	vcpu_lock(vcpu);
 	while (1) {
 		/*
 		 * Do a final check for pending NMI or interrupts before
 		 * really putting this thread to sleep. Also check for
 		 * software events that would cause this vcpu to wakeup.
 		 *
 		 * These interrupts/events could have happened after the
 		 * vcpu returned from VMRUN() and before it acquired the
 		 * vcpu lock above.
 		 */
 		if (vm->rendezvous_func != NULL || vm->suspend || vcpu->reqidle)
 			break;
 		if (vm_nmi_pending(vm, vcpuid))
 			break;
 		if (!intr_disabled) {
 			if (vm_extint_pending(vm, vcpuid) ||
 			    vlapic_pending_intr(vcpu->vlapic, NULL)) {
 				break;
 			}
 		}
 
 		/* Don't go to sleep if the vcpu thread needs to yield */
 		if (vcpu_should_yield(vm, vcpuid))
 			break;
 
 		if (vcpu_debugged(vm, vcpuid))
 			break;
 
 		/*
 		 * Some Linux guests implement "halt" by having all vcpus
 		 * execute HLT with interrupts disabled. 'halted_cpus' keeps
 		 * track of the vcpus that have entered this state. When all
 		 * vcpus enter the halted state the virtual machine is halted.
 		 */
 		if (intr_disabled) {
 			wmesg = "vmhalt";
 			VCPU_CTR0(vm, vcpuid, "Halted");
 			if (!vcpu_halted && halt_detection_enabled) {
 				vcpu_halted = 1;
 				CPU_SET_ATOMIC(vcpuid, &vm->halted_cpus);
 			}
 			if (CPU_CMP(&vm->halted_cpus, &vm->active_cpus) == 0) {
 				vm_halted = 1;
 				break;
 			}
 		} else {
 			wmesg = "vmidle";
 		}
 
 		t = ticks;
 		vcpu_require_state_locked(vm, vcpuid, VCPU_SLEEPING);
 		/*
 		 * XXX msleep_spin() cannot be interrupted by signals so
 		 * wake up periodically to check pending signals.
 		 */
 		msleep_spin(vcpu, &vcpu->mtx, wmesg, hz);
 		vcpu_require_state_locked(vm, vcpuid, VCPU_FROZEN);
 		vmm_stat_incr(vm, vcpuid, VCPU_IDLE_TICKS, ticks - t);
 	}
 
 	if (vcpu_halted)
 		CPU_CLR_ATOMIC(vcpuid, &vm->halted_cpus);
 
 	vcpu_unlock(vcpu);
 
 	if (vm_halted)
 		vm_suspend(vm, VM_SUSPEND_HALT);
 
 	return (0);
 }
 
 static int
 vm_handle_paging(struct vm *vm, int vcpuid, bool *retu)
 {
 	int rv, ftype;
 	struct vm_map *map;
 	struct vcpu *vcpu;
 	struct vm_exit *vme;
 
 	vcpu = &vm->vcpu[vcpuid];
 	vme = &vcpu->exitinfo;
 
 	KASSERT(vme->inst_length == 0, ("%s: invalid inst_length %d",
 	    __func__, vme->inst_length));
 
 	ftype = vme->u.paging.fault_type;
 	KASSERT(ftype == VM_PROT_READ ||
 	    ftype == VM_PROT_WRITE || ftype == VM_PROT_EXECUTE,
 	    ("vm_handle_paging: invalid fault_type %d", ftype));
 
 	if (ftype == VM_PROT_READ || ftype == VM_PROT_WRITE) {
 		rv = pmap_emulate_accessed_dirty(vmspace_pmap(vm->vmspace),
 		    vme->u.paging.gpa, ftype);
 		if (rv == 0) {
 			VCPU_CTR2(vm, vcpuid, "%s bit emulation for gpa %#lx",
 			    ftype == VM_PROT_READ ? "accessed" : "dirty",
 			    vme->u.paging.gpa);
 			goto done;
 		}
 	}
 
 	map = &vm->vmspace->vm_map;
 	rv = vm_fault(map, vme->u.paging.gpa, ftype, VM_FAULT_NORMAL);
 
 	VCPU_CTR3(vm, vcpuid, "vm_handle_paging rv = %d, gpa = %#lx, "
 	    "ftype = %d", rv, vme->u.paging.gpa, ftype);
 
 	if (rv != KERN_SUCCESS)
 		return (EFAULT);
 done:
 	return (0);
 }
 
 static int
 vm_handle_inst_emul(struct vm *vm, int vcpuid, bool *retu)
 {
 	struct vie *vie;
 	struct vcpu *vcpu;
 	struct vm_exit *vme;
 	uint64_t gla, gpa, cs_base;
 	struct vm_guest_paging *paging;
 	mem_region_read_t mread;
 	mem_region_write_t mwrite;
 	enum vm_cpu_mode cpu_mode;
 	int cs_d, error, fault;
 
 	vcpu = &vm->vcpu[vcpuid];
 	vme = &vcpu->exitinfo;
 
 	KASSERT(vme->inst_length == 0, ("%s: invalid inst_length %d",
 	    __func__, vme->inst_length));
 
 	gla = vme->u.inst_emul.gla;
 	gpa = vme->u.inst_emul.gpa;
 	cs_base = vme->u.inst_emul.cs_base;
 	cs_d = vme->u.inst_emul.cs_d;
 	vie = &vme->u.inst_emul.vie;
 	paging = &vme->u.inst_emul.paging;
 	cpu_mode = paging->cpu_mode;
 
 	VCPU_CTR1(vm, vcpuid, "inst_emul fault accessing gpa %#lx", gpa);
 
 	/* Fetch, decode and emulate the faulting instruction */
 	if (vie->num_valid == 0) {
 		error = vmm_fetch_instruction(vm, vcpuid, paging, vme->rip +
 		    cs_base, VIE_INST_SIZE, vie, &fault);
 	} else {
 		/*
 		 * The instruction bytes have already been copied into 'vie'
 		 */
 		error = fault = 0;
 	}
 	if (error || fault)
 		return (error);
 
 	if (vmm_decode_instruction(vm, vcpuid, gla, cpu_mode, cs_d, vie) != 0) {
 		VCPU_CTR1(vm, vcpuid, "Error decoding instruction at %#lx",
 		    vme->rip + cs_base);
 		*retu = true;	    /* dump instruction bytes in userspace */
 		return (0);
 	}
 
 	/*
 	 * Update 'nextrip' based on the length of the emulated instruction.
 	 */
 	vme->inst_length = vie->num_processed;
 	vcpu->nextrip += vie->num_processed;
 	VCPU_CTR1(vm, vcpuid, "nextrip updated to %#lx after instruction "
 	    "decoding", vcpu->nextrip);
  
 	/* return to userland unless this is an in-kernel emulated device */
 	if (gpa >= DEFAULT_APIC_BASE && gpa < DEFAULT_APIC_BASE + PAGE_SIZE) {
 		mread = lapic_mmio_read;
 		mwrite = lapic_mmio_write;
 	} else if (gpa >= VIOAPIC_BASE && gpa < VIOAPIC_BASE + VIOAPIC_SIZE) {
 		mread = vioapic_mmio_read;
 		mwrite = vioapic_mmio_write;
 	} else if (gpa >= VHPET_BASE && gpa < VHPET_BASE + VHPET_SIZE) {
 		mread = vhpet_mmio_read;
 		mwrite = vhpet_mmio_write;
 	} else {
 		*retu = true;
 		return (0);
 	}
 
 	error = vmm_emulate_instruction(vm, vcpuid, gpa, vie, paging,
 	    mread, mwrite, retu);
 
 	return (error);
 }
 
 static int
 vm_handle_suspend(struct vm *vm, int vcpuid, bool *retu)
 {
 	int i, done;
 	struct vcpu *vcpu;
 
 	done = 0;
 	vcpu = &vm->vcpu[vcpuid];
 
 	CPU_SET_ATOMIC(vcpuid, &vm->suspended_cpus);
 
 	/*
 	 * Wait until all 'active_cpus' have suspended themselves.
 	 *
 	 * Since a VM may be suspended at any time including when one or
 	 * more vcpus are doing a rendezvous we need to call the rendezvous
 	 * handler while we are waiting to prevent a deadlock.
 	 */
 	vcpu_lock(vcpu);
 	while (1) {
 		if (CPU_CMP(&vm->suspended_cpus, &vm->active_cpus) == 0) {
 			VCPU_CTR0(vm, vcpuid, "All vcpus suspended");
 			break;
 		}
 
 		if (vm->rendezvous_func == NULL) {
 			VCPU_CTR0(vm, vcpuid, "Sleeping during suspend");
 			vcpu_require_state_locked(vm, vcpuid, VCPU_SLEEPING);
 			msleep_spin(vcpu, &vcpu->mtx, "vmsusp", hz);
 			vcpu_require_state_locked(vm, vcpuid, VCPU_FROZEN);
 		} else {
 			VCPU_CTR0(vm, vcpuid, "Rendezvous during suspend");
 			vcpu_unlock(vcpu);
 			vm_handle_rendezvous(vm, vcpuid);
 			vcpu_lock(vcpu);
 		}
 	}
 	vcpu_unlock(vcpu);
 
 	/*
 	 * Wakeup the other sleeping vcpus and return to userspace.
 	 */
 	for (i = 0; i < VM_MAXCPU; i++) {
 		if (CPU_ISSET(i, &vm->suspended_cpus)) {
 			vcpu_notify_event(vm, i, false);
 		}
 	}
 
 	*retu = true;
 	return (0);
 }
 
 static int
 vm_handle_reqidle(struct vm *vm, int vcpuid, bool *retu)
 {
 	struct vcpu *vcpu = &vm->vcpu[vcpuid];
 
 	vcpu_lock(vcpu);
 	KASSERT(vcpu->reqidle, ("invalid vcpu reqidle %d", vcpu->reqidle));
 	vcpu->reqidle = 0;
 	vcpu_unlock(vcpu);
 	*retu = true;
 	return (0);
 }
 
 int
 vm_suspend(struct vm *vm, enum vm_suspend_how how)
 {
 	int i;
 
 	if (how <= VM_SUSPEND_NONE || how >= VM_SUSPEND_LAST)
 		return (EINVAL);
 
 	if (atomic_cmpset_int(&vm->suspend, 0, how) == 0) {
 		VM_CTR2(vm, "virtual machine already suspended %d/%d",
 		    vm->suspend, how);
 		return (EALREADY);
 	}
 
 	VM_CTR1(vm, "virtual machine successfully suspended %d", how);
 
 	/*
 	 * Notify all active vcpus that they are now suspended.
 	 */
 	for (i = 0; i < VM_MAXCPU; i++) {
 		if (CPU_ISSET(i, &vm->active_cpus))
 			vcpu_notify_event(vm, i, false);
 	}
 
 	return (0);
 }
 
 void
 vm_exit_suspended(struct vm *vm, int vcpuid, uint64_t rip)
 {
 	struct vm_exit *vmexit;
 
 	KASSERT(vm->suspend > VM_SUSPEND_NONE && vm->suspend < VM_SUSPEND_LAST,
 	    ("vm_exit_suspended: invalid suspend type %d", vm->suspend));
 
 	vmexit = vm_exitinfo(vm, vcpuid);
 	vmexit->rip = rip;
 	vmexit->inst_length = 0;
 	vmexit->exitcode = VM_EXITCODE_SUSPENDED;
 	vmexit->u.suspended.how = vm->suspend;
 }
 
 void
 vm_exit_debug(struct vm *vm, int vcpuid, uint64_t rip)
 {
 	struct vm_exit *vmexit;
 
 	vmexit = vm_exitinfo(vm, vcpuid);
 	vmexit->rip = rip;
 	vmexit->inst_length = 0;
 	vmexit->exitcode = VM_EXITCODE_DEBUG;
 }
 
 void
 vm_exit_rendezvous(struct vm *vm, int vcpuid, uint64_t rip)
 {
 	struct vm_exit *vmexit;
 
 	KASSERT(vm->rendezvous_func != NULL, ("rendezvous not in progress"));
 
 	vmexit = vm_exitinfo(vm, vcpuid);
 	vmexit->rip = rip;
 	vmexit->inst_length = 0;
 	vmexit->exitcode = VM_EXITCODE_RENDEZVOUS;
 	vmm_stat_incr(vm, vcpuid, VMEXIT_RENDEZVOUS, 1);
 }
 
 void
 vm_exit_reqidle(struct vm *vm, int vcpuid, uint64_t rip)
 {
 	struct vm_exit *vmexit;
 
 	vmexit = vm_exitinfo(vm, vcpuid);
 	vmexit->rip = rip;
 	vmexit->inst_length = 0;
 	vmexit->exitcode = VM_EXITCODE_REQIDLE;
 	vmm_stat_incr(vm, vcpuid, VMEXIT_REQIDLE, 1);
 }
 
 void
 vm_exit_astpending(struct vm *vm, int vcpuid, uint64_t rip)
 {
 	struct vm_exit *vmexit;
 
 	vmexit = vm_exitinfo(vm, vcpuid);
 	vmexit->rip = rip;
 	vmexit->inst_length = 0;
 	vmexit->exitcode = VM_EXITCODE_BOGUS;
 	vmm_stat_incr(vm, vcpuid, VMEXIT_ASTPENDING, 1);
 }
 
 int
 vm_run(struct vm *vm, struct vm_run *vmrun)
 {
 	struct vm_eventinfo evinfo;
 	int error, vcpuid;
 	struct vcpu *vcpu;
 	struct pcb *pcb;
 	uint64_t tscval;
 	struct vm_exit *vme;
 	bool retu, intr_disabled;
 	pmap_t pmap;
 
 	vcpuid = vmrun->cpuid;
 
 	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
 		return (EINVAL);
 
 	if (!CPU_ISSET(vcpuid, &vm->active_cpus))
 		return (EINVAL);
 
 	if (CPU_ISSET(vcpuid, &vm->suspended_cpus))
 		return (EINVAL);
 
 	pmap = vmspace_pmap(vm->vmspace);
 	vcpu = &vm->vcpu[vcpuid];
 	vme = &vcpu->exitinfo;
 	evinfo.rptr = &vm->rendezvous_func;
 	evinfo.sptr = &vm->suspend;
 	evinfo.iptr = &vcpu->reqidle;
 restart:
 	critical_enter();
 
 	KASSERT(!CPU_ISSET(curcpu, &pmap->pm_active),
 	    ("vm_run: absurd pm_active"));
 
 	tscval = rdtsc();
 
 	pcb = PCPU_GET(curpcb);
 	set_pcb_flags(pcb, PCB_FULL_IRET);
 
 	restore_guest_fpustate(vcpu);
 
 	vcpu_require_state(vm, vcpuid, VCPU_RUNNING);
 	error = VMRUN(vm->cookie, vcpuid, vcpu->nextrip, pmap, &evinfo);
 	vcpu_require_state(vm, vcpuid, VCPU_FROZEN);
 
 	save_guest_fpustate(vcpu);
 
 	vmm_stat_incr(vm, vcpuid, VCPU_TOTAL_RUNTIME, rdtsc() - tscval);
 
 	critical_exit();
 
 	if (error == 0) {
 		retu = false;
 		vcpu->nextrip = vme->rip + vme->inst_length;
 		switch (vme->exitcode) {
 		case VM_EXITCODE_REQIDLE:
 			error = vm_handle_reqidle(vm, vcpuid, &retu);
 			break;
 		case VM_EXITCODE_SUSPENDED:
 			error = vm_handle_suspend(vm, vcpuid, &retu);
 			break;
 		case VM_EXITCODE_IOAPIC_EOI:
 			vioapic_process_eoi(vm, vcpuid,
 			    vme->u.ioapic_eoi.vector);
 			break;
 		case VM_EXITCODE_RENDEZVOUS:
 			vm_handle_rendezvous(vm, vcpuid);
 			error = 0;
 			break;
 		case VM_EXITCODE_HLT:
 			intr_disabled = ((vme->u.hlt.rflags & PSL_I) == 0);
 			error = vm_handle_hlt(vm, vcpuid, intr_disabled, &retu);
 			break;
 		case VM_EXITCODE_PAGING:
 			error = vm_handle_paging(vm, vcpuid, &retu);
 			break;
 		case VM_EXITCODE_INST_EMUL:
 			error = vm_handle_inst_emul(vm, vcpuid, &retu);
 			break;
 		case VM_EXITCODE_INOUT:
 		case VM_EXITCODE_INOUT_STR:
 			error = vm_handle_inout(vm, vcpuid, vme, &retu);
 			break;
 		case VM_EXITCODE_MONITOR:
 		case VM_EXITCODE_MWAIT:
 			vm_inject_ud(vm, vcpuid);
 			break;
 		default:
 			retu = true;	/* handled in userland */
 			break;
 		}
 	}
 
 	if (error == 0 && retu == false)
 		goto restart;
 
 	VCPU_CTR2(vm, vcpuid, "retu %d/%d", error, vme->exitcode);
 
 	/* copy the exit information */
 	bcopy(vme, &vmrun->vm_exit, sizeof(struct vm_exit));
 	return (error);
 }
 
 int
 vm_restart_instruction(void *arg, int vcpuid)
 {
 	struct vm *vm;
 	struct vcpu *vcpu;
 	enum vcpu_state state;
 	uint64_t rip;
 	int error;
 
 	vm = arg;
 	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
 		return (EINVAL);
 
 	vcpu = &vm->vcpu[vcpuid];
 	state = vcpu_get_state(vm, vcpuid, NULL);
 	if (state == VCPU_RUNNING) {
 		/*
 		 * When a vcpu is "running" the next instruction is determined
 		 * by adding 'rip' and 'inst_length' in the vcpu's 'exitinfo'.
 		 * Thus setting 'inst_length' to zero will cause the current
 		 * instruction to be restarted.
 		 */
 		vcpu->exitinfo.inst_length = 0;
 		VCPU_CTR1(vm, vcpuid, "restarting instruction at %#lx by "
 		    "setting inst_length to zero", vcpu->exitinfo.rip);
 	} else if (state == VCPU_FROZEN) {
 		/*
 		 * When a vcpu is "frozen" it is outside the critical section
 		 * around VMRUN() and 'nextrip' points to the next instruction.
 		 * Thus instruction restart is achieved by setting 'nextrip'
 		 * to the vcpu's %rip.
 		 */
 		error = vm_get_register(vm, vcpuid, VM_REG_GUEST_RIP, &rip);
 		KASSERT(!error, ("%s: error %d getting rip", __func__, error));
 		VCPU_CTR2(vm, vcpuid, "restarting instruction by updating "
 		    "nextrip from %#lx to %#lx", vcpu->nextrip, rip);
 		vcpu->nextrip = rip;
 	} else {
 		panic("%s: invalid state %d", __func__, state);
 	}
 	return (0);
 }
 
 int
 vm_exit_intinfo(struct vm *vm, int vcpuid, uint64_t info)
 {
 	struct vcpu *vcpu;
 	int type, vector;
 
 	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
 		return (EINVAL);
 
 	vcpu = &vm->vcpu[vcpuid];
 
 	if (info & VM_INTINFO_VALID) {
 		type = info & VM_INTINFO_TYPE;
 		vector = info & 0xff;
 		if (type == VM_INTINFO_NMI && vector != IDT_NMI)
 			return (EINVAL);
 		if (type == VM_INTINFO_HWEXCEPTION && vector >= 32)
 			return (EINVAL);
 		if (info & VM_INTINFO_RSVD)
 			return (EINVAL);
 	} else {
 		info = 0;
 	}
 	VCPU_CTR2(vm, vcpuid, "%s: info1(%#lx)", __func__, info);
 	vcpu->exitintinfo = info;
 	return (0);
 }
 
 enum exc_class {
 	EXC_BENIGN,
 	EXC_CONTRIBUTORY,
 	EXC_PAGEFAULT
 };
 
 #define	IDT_VE	20	/* Virtualization Exception (Intel specific) */
 
 static enum exc_class
 exception_class(uint64_t info)
 {
 	int type, vector;
 
 	KASSERT(info & VM_INTINFO_VALID, ("intinfo must be valid: %#lx", info));
 	type = info & VM_INTINFO_TYPE;
 	vector = info & 0xff;
 
 	/* Table 6-4, "Interrupt and Exception Classes", Intel SDM, Vol 3 */
 	switch (type) {
 	case VM_INTINFO_HWINTR:
 	case VM_INTINFO_SWINTR:
 	case VM_INTINFO_NMI:
 		return (EXC_BENIGN);
 	default:
 		/*
 		 * Hardware exception.
 		 *
 		 * SVM and VT-x use identical type values to represent NMI,
 		 * hardware interrupt and software interrupt.
 		 *
 		 * SVM uses type '3' for all exceptions. VT-x uses type '3'
 		 * for exceptions except #BP and #OF. #BP and #OF use a type
 		 * value of '5' or '6'. Therefore we don't check for explicit
 		 * values of 'type' to classify 'intinfo' into a hardware
 		 * exception.
 		 */
 		break;
 	}
 
 	switch (vector) {
 	case IDT_PF:
 	case IDT_VE:
 		return (EXC_PAGEFAULT);
 	case IDT_DE:
 	case IDT_TS:
 	case IDT_NP:
 	case IDT_SS:
 	case IDT_GP:
 		return (EXC_CONTRIBUTORY);
 	default:
 		return (EXC_BENIGN);
 	}
 }
 
 static int
 nested_fault(struct vm *vm, int vcpuid, uint64_t info1, uint64_t info2,
     uint64_t *retinfo)
 {
 	enum exc_class exc1, exc2;
 	int type1, vector1;
 
 	KASSERT(info1 & VM_INTINFO_VALID, ("info1 %#lx is not valid", info1));
 	KASSERT(info2 & VM_INTINFO_VALID, ("info2 %#lx is not valid", info2));
 
 	/*
 	 * If an exception occurs while attempting to call the double-fault
 	 * handler the processor enters shutdown mode (aka triple fault).
 	 */
 	type1 = info1 & VM_INTINFO_TYPE;
 	vector1 = info1 & 0xff;
 	if (type1 == VM_INTINFO_HWEXCEPTION && vector1 == IDT_DF) {
 		VCPU_CTR2(vm, vcpuid, "triple fault: info1(%#lx), info2(%#lx)",
 		    info1, info2);
 		vm_suspend(vm, VM_SUSPEND_TRIPLEFAULT);
 		*retinfo = 0;
 		return (0);
 	}
 
 	/*
 	 * Table 6-5 "Conditions for Generating a Double Fault", Intel SDM, Vol3
 	 */
 	exc1 = exception_class(info1);
 	exc2 = exception_class(info2);
 	if ((exc1 == EXC_CONTRIBUTORY && exc2 == EXC_CONTRIBUTORY) ||
 	    (exc1 == EXC_PAGEFAULT && exc2 != EXC_BENIGN)) {
 		/* Convert nested fault into a double fault. */
 		*retinfo = IDT_DF;
 		*retinfo |= VM_INTINFO_VALID | VM_INTINFO_HWEXCEPTION;
 		*retinfo |= VM_INTINFO_DEL_ERRCODE;
 	} else {
 		/* Handle exceptions serially */
 		*retinfo = info2;
 	}
 	return (1);
 }
 
 static uint64_t
 vcpu_exception_intinfo(struct vcpu *vcpu)
 {
 	uint64_t info = 0;
 
 	if (vcpu->exception_pending) {
 		info = vcpu->exc_vector & 0xff;
 		info |= VM_INTINFO_VALID | VM_INTINFO_HWEXCEPTION;
 		if (vcpu->exc_errcode_valid) {
 			info |= VM_INTINFO_DEL_ERRCODE;
 			info |= (uint64_t)vcpu->exc_errcode << 32;
 		}
 	}
 	return (info);
 }
 
 int
 vm_entry_intinfo(struct vm *vm, int vcpuid, uint64_t *retinfo)
 {
 	struct vcpu *vcpu;
 	uint64_t info1, info2;
 	int valid;
 
 	KASSERT(vcpuid >= 0 && vcpuid < VM_MAXCPU, ("invalid vcpu %d", vcpuid));
 
 	vcpu = &vm->vcpu[vcpuid];
 
 	info1 = vcpu->exitintinfo;
 	vcpu->exitintinfo = 0;
 
 	info2 = 0;
 	if (vcpu->exception_pending) {
 		info2 = vcpu_exception_intinfo(vcpu);
 		vcpu->exception_pending = 0;
 		VCPU_CTR2(vm, vcpuid, "Exception %d delivered: %#lx",
 		    vcpu->exc_vector, info2);
 	}
 
 	if ((info1 & VM_INTINFO_VALID) && (info2 & VM_INTINFO_VALID)) {
 		valid = nested_fault(vm, vcpuid, info1, info2, retinfo);
 	} else if (info1 & VM_INTINFO_VALID) {
 		*retinfo = info1;
 		valid = 1;
 	} else if (info2 & VM_INTINFO_VALID) {
 		*retinfo = info2;
 		valid = 1;
 	} else {
 		valid = 0;
 	}
 
 	if (valid) {
 		VCPU_CTR4(vm, vcpuid, "%s: info1(%#lx), info2(%#lx), "
 		    "retinfo(%#lx)", __func__, info1, info2, *retinfo);
 	}
 
 	return (valid);
 }
 
 int
 vm_get_intinfo(struct vm *vm, int vcpuid, uint64_t *info1, uint64_t *info2)
 {
 	struct vcpu *vcpu;
 
 	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
 		return (EINVAL);
 
 	vcpu = &vm->vcpu[vcpuid];
 	*info1 = vcpu->exitintinfo;
 	*info2 = vcpu_exception_intinfo(vcpu);
 	return (0);
 }
 
 int
 vm_inject_exception(struct vm *vm, int vcpuid, int vector, int errcode_valid,
     uint32_t errcode, int restart_instruction)
 {
 	struct vcpu *vcpu;
 	uint64_t regval;
 	int error;
 
 	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
 		return (EINVAL);
 
 	if (vector < 0 || vector >= 32)
 		return (EINVAL);
 
 	/*
 	 * A double fault exception should never be injected directly into
 	 * the guest. It is a derived exception that results from specific
 	 * combinations of nested faults.
 	 */
 	if (vector == IDT_DF)
 		return (EINVAL);
 
 	vcpu = &vm->vcpu[vcpuid];
 
 	if (vcpu->exception_pending) {
 		VCPU_CTR2(vm, vcpuid, "Unable to inject exception %d due to "
 		    "pending exception %d", vector, vcpu->exc_vector);
 		return (EBUSY);
 	}
 
 	if (errcode_valid) {
 		/*
 		 * Exceptions don't deliver an error code in real mode.
 		 */
 		error = vm_get_register(vm, vcpuid, VM_REG_GUEST_CR0, &regval);
 		KASSERT(!error, ("%s: error %d getting CR0", __func__, error));
 		if (!(regval & CR0_PE))
 			errcode_valid = 0;
 	}
 
 	/*
 	 * From section 26.6.1 "Interruptibility State" in Intel SDM:
 	 *
 	 * Event blocking by "STI" or "MOV SS" is cleared after guest executes
 	 * one instruction or incurs an exception.
 	 */
 	error = vm_set_register(vm, vcpuid, VM_REG_GUEST_INTR_SHADOW, 0);
 	KASSERT(error == 0, ("%s: error %d clearing interrupt shadow",
 	    __func__, error));
 
 	if (restart_instruction)
 		vm_restart_instruction(vm, vcpuid);
 
 	vcpu->exception_pending = 1;
 	vcpu->exc_vector = vector;
 	vcpu->exc_errcode = errcode;
 	vcpu->exc_errcode_valid = errcode_valid;
 	VCPU_CTR1(vm, vcpuid, "Exception %d pending", vector);
 	return (0);
 }
 
 void
 vm_inject_fault(void *vmarg, int vcpuid, int vector, int errcode_valid,
     int errcode)
 {
 	struct vm *vm;
 	int error, restart_instruction;
 
 	vm = vmarg;
 	restart_instruction = 1;
 
 	error = vm_inject_exception(vm, vcpuid, vector, errcode_valid,
 	    errcode, restart_instruction);
 	KASSERT(error == 0, ("vm_inject_exception error %d", error));
 }
 
 void
 vm_inject_pf(void *vmarg, int vcpuid, int error_code, uint64_t cr2)
 {
 	struct vm *vm;
 	int error;
 
 	vm = vmarg;
 	VCPU_CTR2(vm, vcpuid, "Injecting page fault: error_code %#x, cr2 %#lx",
 	    error_code, cr2);
 
 	error = vm_set_register(vm, vcpuid, VM_REG_GUEST_CR2, cr2);
 	KASSERT(error == 0, ("vm_set_register(cr2) error %d", error));
 
 	vm_inject_fault(vm, vcpuid, IDT_PF, 1, error_code);
 }
 
 static VMM_STAT(VCPU_NMI_COUNT, "number of NMIs delivered to vcpu");
 
 int
 vm_inject_nmi(struct vm *vm, int vcpuid)
 {
 	struct vcpu *vcpu;
 
 	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
 		return (EINVAL);
 
 	vcpu = &vm->vcpu[vcpuid];
 
 	vcpu->nmi_pending = 1;
 	vcpu_notify_event(vm, vcpuid, false);
 	return (0);
 }
 
 int
 vm_nmi_pending(struct vm *vm, int vcpuid)
 {
 	struct vcpu *vcpu;
 
 	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
 		panic("vm_nmi_pending: invalid vcpuid %d", vcpuid);
 
 	vcpu = &vm->vcpu[vcpuid];
 
 	return (vcpu->nmi_pending);
 }
 
 void
 vm_nmi_clear(struct vm *vm, int vcpuid)
 {
 	struct vcpu *vcpu;
 
 	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
 		panic("vm_nmi_pending: invalid vcpuid %d", vcpuid);
 
 	vcpu = &vm->vcpu[vcpuid];
 
 	if (vcpu->nmi_pending == 0)
 		panic("vm_nmi_clear: inconsistent nmi_pending state");
 
 	vcpu->nmi_pending = 0;
 	vmm_stat_incr(vm, vcpuid, VCPU_NMI_COUNT, 1);
 }
 
 static VMM_STAT(VCPU_EXTINT_COUNT, "number of ExtINTs delivered to vcpu");
 
 int
 vm_inject_extint(struct vm *vm, int vcpuid)
 {
 	struct vcpu *vcpu;
 
 	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
 		return (EINVAL);
 
 	vcpu = &vm->vcpu[vcpuid];
 
 	vcpu->extint_pending = 1;
 	vcpu_notify_event(vm, vcpuid, false);
 	return (0);
 }
 
 int
 vm_extint_pending(struct vm *vm, int vcpuid)
 {
 	struct vcpu *vcpu;
 
 	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
 		panic("vm_extint_pending: invalid vcpuid %d", vcpuid);
 
 	vcpu = &vm->vcpu[vcpuid];
 
 	return (vcpu->extint_pending);
 }
 
 void
 vm_extint_clear(struct vm *vm, int vcpuid)
 {
 	struct vcpu *vcpu;
 
 	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
 		panic("vm_extint_pending: invalid vcpuid %d", vcpuid);
 
 	vcpu = &vm->vcpu[vcpuid];
 
 	if (vcpu->extint_pending == 0)
 		panic("vm_extint_clear: inconsistent extint_pending state");
 
 	vcpu->extint_pending = 0;
 	vmm_stat_incr(vm, vcpuid, VCPU_EXTINT_COUNT, 1);
 }
 
 int
 vm_get_capability(struct vm *vm, int vcpu, int type, int *retval)
 {
 	if (vcpu < 0 || vcpu >= VM_MAXCPU)
 		return (EINVAL);
 
 	if (type < 0 || type >= VM_CAP_MAX)
 		return (EINVAL);
 
 	return (VMGETCAP(vm->cookie, vcpu, type, retval));
 }
 
 int
 vm_set_capability(struct vm *vm, int vcpu, int type, int val)
 {
 	if (vcpu < 0 || vcpu >= VM_MAXCPU)
 		return (EINVAL);
 
 	if (type < 0 || type >= VM_CAP_MAX)
 		return (EINVAL);
 
 	return (VMSETCAP(vm->cookie, vcpu, type, val));
 }
 
 struct vlapic *
 vm_lapic(struct vm *vm, int cpu)
 {
 	return (vm->vcpu[cpu].vlapic);
 }
 
 struct vioapic *
 vm_ioapic(struct vm *vm)
 {
 
 	return (vm->vioapic);
 }
 
 struct vhpet *
 vm_hpet(struct vm *vm)
 {
 
 	return (vm->vhpet);
 }
 
 boolean_t
 vmm_is_pptdev(int bus, int slot, int func)
 {
 	int found, i, n;
 	int b, s, f;
 	char *val, *cp, *cp2;
 
 	/*
 	 * XXX
 	 * The length of an environment variable is limited to 128 bytes which
 	 * puts an upper limit on the number of passthru devices that may be
 	 * specified using a single environment variable.
 	 *
 	 * Work around this by scanning multiple environment variable
 	 * names instead of a single one - yuck!
 	 */
 	const char *names[] = { "pptdevs", "pptdevs2", "pptdevs3", NULL };
 
 	/* set pptdevs="1/2/3 4/5/6 7/8/9 10/11/12" */
 	found = 0;
 	for (i = 0; names[i] != NULL && !found; i++) {
 		cp = val = kern_getenv(names[i]);
 		while (cp != NULL && *cp != '\0') {
 			if ((cp2 = strchr(cp, ' ')) != NULL)
 				*cp2 = '\0';
 
 			n = sscanf(cp, "%d/%d/%d", &b, &s, &f);
 			if (n == 3 && bus == b && slot == s && func == f) {
 				found = 1;
 				break;
 			}
 		
 			if (cp2 != NULL)
 				*cp2++ = ' ';
 
 			cp = cp2;
 		}
 		freeenv(val);
 	}
 	return (found);
 }
 
 void *
 vm_iommu_domain(struct vm *vm)
 {
 
 	return (vm->iommu);
 }
 
 int
 vcpu_set_state(struct vm *vm, int vcpuid, enum vcpu_state newstate,
     bool from_idle)
 {
 	int error;
 	struct vcpu *vcpu;
 
 	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
 		panic("vm_set_run_state: invalid vcpuid %d", vcpuid);
 
 	vcpu = &vm->vcpu[vcpuid];
 
 	vcpu_lock(vcpu);
 	error = vcpu_set_state_locked(vm, vcpuid, newstate, from_idle);
 	vcpu_unlock(vcpu);
 
 	return (error);
 }
 
 enum vcpu_state
 vcpu_get_state(struct vm *vm, int vcpuid, int *hostcpu)
 {
 	struct vcpu *vcpu;
 	enum vcpu_state state;
 
 	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
 		panic("vm_get_run_state: invalid vcpuid %d", vcpuid);
 
 	vcpu = &vm->vcpu[vcpuid];
 
 	vcpu_lock(vcpu);
 	state = vcpu->state;
 	if (hostcpu != NULL)
 		*hostcpu = vcpu->hostcpu;
 	vcpu_unlock(vcpu);
 
 	return (state);
 }
 
 int
 vm_activate_cpu(struct vm *vm, int vcpuid)
 {
 
 	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
 		return (EINVAL);
 
 	if (CPU_ISSET(vcpuid, &vm->active_cpus))
 		return (EBUSY);
 
 	VCPU_CTR0(vm, vcpuid, "activated");
 	CPU_SET_ATOMIC(vcpuid, &vm->active_cpus);
 	return (0);
 }
 
 int
 vm_suspend_cpu(struct vm *vm, int vcpuid)
 {
 	int i;
 
 	if (vcpuid < -1 || vcpuid >= VM_MAXCPU)
 		return (EINVAL);
 
 	if (vcpuid == -1) {
 		vm->debug_cpus = vm->active_cpus;
 		for (i = 0; i < VM_MAXCPU; i++) {
 			if (CPU_ISSET(i, &vm->active_cpus))
 				vcpu_notify_event(vm, i, false);
 		}
 	} else {
 		if (!CPU_ISSET(vcpuid, &vm->active_cpus))
 			return (EINVAL);
 
 		CPU_SET_ATOMIC(vcpuid, &vm->debug_cpus);
 		vcpu_notify_event(vm, vcpuid, false);
 	}
 	return (0);
 }
 
 int
 vm_resume_cpu(struct vm *vm, int vcpuid)
 {
 
 	if (vcpuid < -1 || vcpuid >= VM_MAXCPU)
 		return (EINVAL);
 
 	if (vcpuid == -1) {
 		CPU_ZERO(&vm->debug_cpus);
 	} else {
 		if (!CPU_ISSET(vcpuid, &vm->debug_cpus))
 			return (EINVAL);
 
 		CPU_CLR_ATOMIC(vcpuid, &vm->debug_cpus);
 	}
 	return (0);
 }
 
 int
 vcpu_debugged(struct vm *vm, int vcpuid)
 {
 
 	return (CPU_ISSET(vcpuid, &vm->debug_cpus));
 }
 
 cpuset_t
 vm_active_cpus(struct vm *vm)
 {
 
 	return (vm->active_cpus);
 }
 
 cpuset_t
 vm_debug_cpus(struct vm *vm)
 {
 
 	return (vm->debug_cpus);
 }
 
 cpuset_t
 vm_suspended_cpus(struct vm *vm)
 {
 
 	return (vm->suspended_cpus);
 }
 
 void *
 vcpu_stats(struct vm *vm, int vcpuid)
 {
 
 	return (vm->vcpu[vcpuid].stats);
 }
 
 int
 vm_get_x2apic_state(struct vm *vm, int vcpuid, enum x2apic_state *state)
 {
 	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
 		return (EINVAL);
 
 	*state = vm->vcpu[vcpuid].x2apic_state;
 
 	return (0);
 }
 
 int
 vm_set_x2apic_state(struct vm *vm, int vcpuid, enum x2apic_state state)
 {
 	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
 		return (EINVAL);
 
 	if (state >= X2APIC_STATE_LAST)
 		return (EINVAL);
 
 	vm->vcpu[vcpuid].x2apic_state = state;
 
 	vlapic_set_x2apic_state(vm, vcpuid, state);
 
 	return (0);
 }
 
 /*
  * This function is called to ensure that a vcpu "sees" a pending event
  * as soon as possible:
  * - If the vcpu thread is sleeping then it is woken up.
  * - If the vcpu is running on a different host_cpu then an IPI will be directed
  *   to the host_cpu to cause the vcpu to trap into the hypervisor.
  */
 static void
 vcpu_notify_event_locked(struct vcpu *vcpu, bool lapic_intr)
 {
 	int hostcpu;
 
 	hostcpu = vcpu->hostcpu;
 	if (vcpu->state == VCPU_RUNNING) {
 		KASSERT(hostcpu != NOCPU, ("vcpu running on invalid hostcpu"));
 		if (hostcpu != curcpu) {
 			if (lapic_intr) {
 				vlapic_post_intr(vcpu->vlapic, hostcpu,
 				    vmm_ipinum);
 			} else {
 				ipi_cpu(hostcpu, vmm_ipinum);
 			}
 		} else {
 			/*
 			 * If the 'vcpu' is running on 'curcpu' then it must
 			 * be sending a notification to itself (e.g. SELF_IPI).
 			 * The pending event will be picked up when the vcpu
 			 * transitions back to guest context.
 			 */
 		}
 	} else {
 		KASSERT(hostcpu == NOCPU, ("vcpu state %d not consistent "
 		    "with hostcpu %d", vcpu->state, hostcpu));
 		if (vcpu->state == VCPU_SLEEPING)
 			wakeup_one(vcpu);
 	}
 }
 
 void
 vcpu_notify_event(struct vm *vm, int vcpuid, bool lapic_intr)
 {
 	struct vcpu *vcpu = &vm->vcpu[vcpuid];
 
 	vcpu_lock(vcpu);
 	vcpu_notify_event_locked(vcpu, lapic_intr);
 	vcpu_unlock(vcpu);
 }
 
 struct vmspace *
 vm_get_vmspace(struct vm *vm)
 {
 
 	return (vm->vmspace);
 }
 
 int
 vm_apicid2vcpuid(struct vm *vm, int apicid)
 {
 	/*
 	 * XXX apic id is assumed to be numerically identical to vcpu id
 	 */
 	return (apicid);
 }
 
 void
 vm_smp_rendezvous(struct vm *vm, int vcpuid, cpuset_t dest,
     vm_rendezvous_func_t func, void *arg)
 {
 	int i;
 
 	/*
 	 * Enforce that this function is called without any locks
 	 */
 	WITNESS_WARN(WARN_PANIC, NULL, "vm_smp_rendezvous");
 	KASSERT(vcpuid == -1 || (vcpuid >= 0 && vcpuid < VM_MAXCPU),
 	    ("vm_smp_rendezvous: invalid vcpuid %d", vcpuid));
 
 restart:
 	mtx_lock(&vm->rendezvous_mtx);
 	if (vm->rendezvous_func != NULL) {
 		/*
 		 * If a rendezvous is already in progress then we need to
 		 * call the rendezvous handler in case this 'vcpuid' is one
 		 * of the targets of the rendezvous.
 		 */
 		RENDEZVOUS_CTR0(vm, vcpuid, "Rendezvous already in progress");
 		mtx_unlock(&vm->rendezvous_mtx);
 		vm_handle_rendezvous(vm, vcpuid);
 		goto restart;
 	}
 	KASSERT(vm->rendezvous_func == NULL, ("vm_smp_rendezvous: previous "
 	    "rendezvous is still in progress"));
 
 	RENDEZVOUS_CTR0(vm, vcpuid, "Initiating rendezvous");
 	vm->rendezvous_req_cpus = dest;
 	CPU_ZERO(&vm->rendezvous_done_cpus);
 	vm->rendezvous_arg = arg;
 	vm_set_rendezvous_func(vm, func);
 	mtx_unlock(&vm->rendezvous_mtx);
 
 	/*
 	 * Wake up any sleeping vcpus and trigger a VM-exit in any running
 	 * vcpus so they handle the rendezvous as soon as possible.
 	 */
 	for (i = 0; i < VM_MAXCPU; i++) {
 		if (CPU_ISSET(i, &dest))
 			vcpu_notify_event(vm, i, false);
 	}
 
 	vm_handle_rendezvous(vm, vcpuid);
 }
 
 struct vatpic *
 vm_atpic(struct vm *vm)
 {
 	return (vm->vatpic);
 }
 
 struct vatpit *
 vm_atpit(struct vm *vm)
 {
 	return (vm->vatpit);
 }
 
 struct vpmtmr *
 vm_pmtmr(struct vm *vm)
 {
 
 	return (vm->vpmtmr);
 }
 
 struct vrtc *
 vm_rtc(struct vm *vm)
 {
 
 	return (vm->vrtc);
 }
 
 enum vm_reg_name
 vm_segment_name(int seg)
 {
 	static enum vm_reg_name seg_names[] = {
 		VM_REG_GUEST_ES,
 		VM_REG_GUEST_CS,
 		VM_REG_GUEST_SS,
 		VM_REG_GUEST_DS,
 		VM_REG_GUEST_FS,
 		VM_REG_GUEST_GS
 	};
 
 	KASSERT(seg >= 0 && seg < nitems(seg_names),
 	    ("%s: invalid segment encoding %d", __func__, seg));
 	return (seg_names[seg]);
 }
 
 void
 vm_copy_teardown(struct vm *vm, int vcpuid, struct vm_copyinfo *copyinfo,
     int num_copyinfo)
 {
 	int idx;
 
 	for (idx = 0; idx < num_copyinfo; idx++) {
 		if (copyinfo[idx].cookie != NULL)
 			vm_gpa_release(copyinfo[idx].cookie);
 	}
 	bzero(copyinfo, num_copyinfo * sizeof(struct vm_copyinfo));
 }
 
 int
 vm_copy_setup(struct vm *vm, int vcpuid, struct vm_guest_paging *paging,
     uint64_t gla, size_t len, int prot, struct vm_copyinfo *copyinfo,
     int num_copyinfo, int *fault)
 {
 	int error, idx, nused;
 	size_t n, off, remaining;
 	void *hva, *cookie;
 	uint64_t gpa;
 
 	bzero(copyinfo, sizeof(struct vm_copyinfo) * num_copyinfo);
 
 	nused = 0;
 	remaining = len;
 	while (remaining > 0) {
 		KASSERT(nused < num_copyinfo, ("insufficient vm_copyinfo"));
 		error = vm_gla2gpa(vm, vcpuid, paging, gla, prot, &gpa, fault);
 		if (error || *fault)
 			return (error);
 		off = gpa & PAGE_MASK;
 		n = min(remaining, PAGE_SIZE - off);
 		copyinfo[nused].gpa = gpa;
 		copyinfo[nused].len = n;
 		remaining -= n;
 		gla += n;
 		nused++;
 	}
 
 	for (idx = 0; idx < nused; idx++) {
 		hva = vm_gpa_hold(vm, vcpuid, copyinfo[idx].gpa,
 		    copyinfo[idx].len, prot, &cookie);
 		if (hva == NULL)
 			break;
 		copyinfo[idx].hva = hva;
 		copyinfo[idx].cookie = cookie;
 	}
 
 	if (idx != nused) {
 		vm_copy_teardown(vm, vcpuid, copyinfo, num_copyinfo);
 		return (EFAULT);
 	} else {
 		*fault = 0;
 		return (0);
 	}
 }
 
 void
 vm_copyin(struct vm *vm, int vcpuid, struct vm_copyinfo *copyinfo, void *kaddr,
     size_t len)
 {
 	char *dst;
 	int idx;
 	
 	dst = kaddr;
 	idx = 0;
 	while (len > 0) {
 		bcopy(copyinfo[idx].hva, dst, copyinfo[idx].len);
 		len -= copyinfo[idx].len;
 		dst += copyinfo[idx].len;
 		idx++;
 	}
 }
 
 void
 vm_copyout(struct vm *vm, int vcpuid, const void *kaddr,
     struct vm_copyinfo *copyinfo, size_t len)
 {
 	const char *src;
 	int idx;
 
 	src = kaddr;
 	idx = 0;
 	while (len > 0) {
 		bcopy(src, copyinfo[idx].hva, copyinfo[idx].len);
 		len -= copyinfo[idx].len;
 		src += copyinfo[idx].len;
 		idx++;
 	}
 }
 
 /*
  * Return the amount of in-use and wired memory for the VM. Since
  * these are global stats, only return the values with for vCPU 0
  */
 VMM_STAT_DECLARE(VMM_MEM_RESIDENT);
 VMM_STAT_DECLARE(VMM_MEM_WIRED);
 
 static void
 vm_get_rescnt(struct vm *vm, int vcpu, struct vmm_stat_type *stat)
 {
 
 	if (vcpu == 0) {
 		vmm_stat_set(vm, vcpu, VMM_MEM_RESIDENT,
 	       	    PAGE_SIZE * vmspace_resident_count(vm->vmspace));
 	}	
 }
 
 static void
 vm_get_wiredcnt(struct vm *vm, int vcpu, struct vmm_stat_type *stat)
 {
 
 	if (vcpu == 0) {
 		vmm_stat_set(vm, vcpu, VMM_MEM_WIRED,
 	      	    PAGE_SIZE * pmap_wired_count(vmspace_pmap(vm->vmspace)));
 	}	
 }
 
 VMM_STAT_FUNC(VMM_MEM_RESIDENT, "Resident memory", vm_get_rescnt);
 VMM_STAT_FUNC(VMM_MEM_WIRED, "Wired memory", vm_get_wiredcnt);
Index: head/sys/amd64/vmm/vmm_dev.c
===================================================================
--- head/sys/amd64/vmm/vmm_dev.c	(revision 332297)
+++ head/sys/amd64/vmm/vmm_dev.c	(revision 332298)
@@ -1,1080 +1,1092 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2011 NetApp, Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/kernel.h>
 #include <sys/queue.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/malloc.h>
 #include <sys/conf.h>
 #include <sys/sysctl.h>
 #include <sys/libkern.h>
 #include <sys/ioccom.h>
 #include <sys/mman.h>
 #include <sys/uio.h>
 
 #include <vm/vm.h>
 #include <vm/pmap.h>
 #include <vm/vm_map.h>
 #include <vm/vm_object.h>
 
 #include <machine/vmparam.h>
 #include <machine/vmm.h>
 #include <machine/vmm_instruction_emul.h>
 #include <machine/vmm_dev.h>
 
 #include "vmm_lapic.h"
 #include "vmm_stat.h"
 #include "vmm_mem.h"
 #include "io/ppt.h"
 #include "io/vatpic.h"
 #include "io/vioapic.h"
 #include "io/vhpet.h"
 #include "io/vrtc.h"
 
 struct devmem_softc {
 	int	segid;
 	char	*name;
 	struct cdev *cdev;
 	struct vmmdev_softc *sc;
 	SLIST_ENTRY(devmem_softc) link;
 };
 
 struct vmmdev_softc {
 	struct vm	*vm;		/* vm instance cookie */
 	struct cdev	*cdev;
 	SLIST_ENTRY(vmmdev_softc) link;
 	SLIST_HEAD(, devmem_softc) devmem;
 	int		flags;
 };
 #define	VSC_LINKED		0x01
 
 static SLIST_HEAD(, vmmdev_softc) head;
 
 static struct mtx vmmdev_mtx;
 
 static MALLOC_DEFINE(M_VMMDEV, "vmmdev", "vmmdev");
 
 SYSCTL_DECL(_hw_vmm);
 
 static int devmem_create_cdev(const char *vmname, int id, char *devmem);
 static void devmem_destroy(void *arg);
 
 static int
 vcpu_lock_one(struct vmmdev_softc *sc, int vcpu)
 {
 	int error;
 
 	if (vcpu < 0 || vcpu >= VM_MAXCPU)
 		return (EINVAL);
 
 	error = vcpu_set_state(sc->vm, vcpu, VCPU_FROZEN, true);
 	return (error);
 }
 
 static void
 vcpu_unlock_one(struct vmmdev_softc *sc, int vcpu)
 {
 	enum vcpu_state state;
 
 	state = vcpu_get_state(sc->vm, vcpu, NULL);
 	if (state != VCPU_FROZEN) {
 		panic("vcpu %s(%d) has invalid state %d", vm_name(sc->vm),
 		    vcpu, state);
 	}
 
 	vcpu_set_state(sc->vm, vcpu, VCPU_IDLE, false);
 }
 
 static int
 vcpu_lock_all(struct vmmdev_softc *sc)
 {
 	int error, vcpu;
 
 	for (vcpu = 0; vcpu < VM_MAXCPU; vcpu++) {
 		error = vcpu_lock_one(sc, vcpu);
 		if (error)
 			break;
 	}
 
 	if (error) {
 		while (--vcpu >= 0)
 			vcpu_unlock_one(sc, vcpu);
 	}
 
 	return (error);
 }
 
 static void
 vcpu_unlock_all(struct vmmdev_softc *sc)
 {
 	int vcpu;
 
 	for (vcpu = 0; vcpu < VM_MAXCPU; vcpu++)
 		vcpu_unlock_one(sc, vcpu);
 }
 
 static struct vmmdev_softc *
 vmmdev_lookup(const char *name)
 {
 	struct vmmdev_softc *sc;
 
 #ifdef notyet	/* XXX kernel is not compiled with invariants */
 	mtx_assert(&vmmdev_mtx, MA_OWNED);
 #endif
 
 	SLIST_FOREACH(sc, &head, link) {
 		if (strcmp(name, vm_name(sc->vm)) == 0)
 			break;
 	}
 
 	return (sc);
 }
 
 static struct vmmdev_softc *
 vmmdev_lookup2(struct cdev *cdev)
 {
 
 	return (cdev->si_drv1);
 }
 
 static int
 vmmdev_rw(struct cdev *cdev, struct uio *uio, int flags)
 {
 	int error, off, c, prot;
 	vm_paddr_t gpa;
 	void *hpa, *cookie;
 	struct vmmdev_softc *sc;
 
 	sc = vmmdev_lookup2(cdev);
 	if (sc == NULL)
 		return (ENXIO);
 
 	/*
 	 * Get a read lock on the guest memory map by freezing any vcpu.
 	 */
 	error = vcpu_lock_one(sc, VM_MAXCPU - 1);
 	if (error)
 		return (error);
 
 	prot = (uio->uio_rw == UIO_WRITE ? VM_PROT_WRITE : VM_PROT_READ);
 	while (uio->uio_resid > 0 && error == 0) {
 		gpa = uio->uio_offset;
 		off = gpa & PAGE_MASK;
 		c = min(uio->uio_resid, PAGE_SIZE - off);
 
 		/*
 		 * The VM has a hole in its physical memory map. If we want to
 		 * use 'dd' to inspect memory beyond the hole we need to
 		 * provide bogus data for memory that lies in the hole.
 		 *
 		 * Since this device does not support lseek(2), dd(1) will
 		 * read(2) blocks of data to simulate the lseek(2).
 		 */
 		hpa = vm_gpa_hold(sc->vm, VM_MAXCPU - 1, gpa, c, prot, &cookie);
 		if (hpa == NULL) {
 			if (uio->uio_rw == UIO_READ)
 				error = uiomove(__DECONST(void *, zero_region),
 				    c, uio);
 			else
 				error = EFAULT;
 		} else {
 			error = uiomove(hpa, c, uio);
 			vm_gpa_release(cookie);
 		}
 	}
 	vcpu_unlock_one(sc, VM_MAXCPU - 1);
 	return (error);
 }
 
 CTASSERT(sizeof(((struct vm_memseg *)0)->name) >= SPECNAMELEN + 1);
 
 static int
 get_memseg(struct vmmdev_softc *sc, struct vm_memseg *mseg)
 {
 	struct devmem_softc *dsc;
 	int error;
 	bool sysmem;
 
 	error = vm_get_memseg(sc->vm, mseg->segid, &mseg->len, &sysmem, NULL);
 	if (error || mseg->len == 0)
 		return (error);
 
 	if (!sysmem) {
 		SLIST_FOREACH(dsc, &sc->devmem, link) {
 			if (dsc->segid == mseg->segid)
 				break;
 		}
 		KASSERT(dsc != NULL, ("%s: devmem segment %d not found",
 		    __func__, mseg->segid));
 		error = copystr(dsc->name, mseg->name, SPECNAMELEN + 1, NULL);
 	} else {
 		bzero(mseg->name, sizeof(mseg->name));
 	}
 
 	return (error);
 }
 
 static int
 alloc_memseg(struct vmmdev_softc *sc, struct vm_memseg *mseg)
 {
 	char *name;
 	int error;
 	bool sysmem;
 
 	error = 0;
 	name = NULL;
 	sysmem = true;
 
 	if (VM_MEMSEG_NAME(mseg)) {
 		sysmem = false;
 		name = malloc(SPECNAMELEN + 1, M_VMMDEV, M_WAITOK);
 		error = copystr(mseg->name, name, SPECNAMELEN + 1, 0);
 		if (error)
 			goto done;
 	}
 
 	error = vm_alloc_memseg(sc->vm, mseg->segid, mseg->len, sysmem);
 	if (error)
 		goto done;
 
 	if (VM_MEMSEG_NAME(mseg)) {
 		error = devmem_create_cdev(vm_name(sc->vm), mseg->segid, name);
 		if (error)
 			vm_free_memseg(sc->vm, mseg->segid);
 		else
 			name = NULL;	/* freed when 'cdev' is destroyed */
 	}
 done:
 	free(name, M_VMMDEV);
 	return (error);
 }
 
 static int
 vm_get_register_set(struct vm *vm, int vcpu, unsigned int count, int *regnum,
     uint64_t *regval)
 {
 	int error, i;
 
 	error = 0;
 	for (i = 0; i < count; i++) {
 		error = vm_get_register(vm, vcpu, regnum[i], &regval[i]);
 		if (error)
 			break;
 	}
 	return (error);
 }
 
 static int
 vm_set_register_set(struct vm *vm, int vcpu, unsigned int count, int *regnum,
     uint64_t *regval)
 {
 	int error, i;
 
 	error = 0;
 	for (i = 0; i < count; i++) {
 		error = vm_set_register(vm, vcpu, regnum[i], regval[i]);
 		if (error)
 			break;
 	}
 	return (error);
 }
 
 static int
 vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag,
 	     struct thread *td)
 {
 	int error, vcpu, state_changed, size;
 	cpuset_t *cpuset;
 	struct vmmdev_softc *sc;
 	struct vm_register *vmreg;
 	struct vm_seg_desc *vmsegdesc;
 	struct vm_register_set *vmregset;
 	struct vm_run *vmrun;
 	struct vm_exception *vmexc;
 	struct vm_lapic_irq *vmirq;
 	struct vm_lapic_msi *vmmsi;
 	struct vm_ioapic_irq *ioapic_irq;
 	struct vm_isa_irq *isa_irq;
 	struct vm_isa_irq_trigger *isa_irq_trigger;
 	struct vm_capability *vmcap;
 	struct vm_pptdev *pptdev;
 	struct vm_pptdev_mmio *pptmmio;
 	struct vm_pptdev_msi *pptmsi;
 	struct vm_pptdev_msix *pptmsix;
 	struct vm_nmi *vmnmi;
 	struct vm_stats *vmstats;
 	struct vm_stat_desc *statdesc;
 	struct vm_x2apic *x2apic;
 	struct vm_gpa_pte *gpapte;
 	struct vm_suspend *vmsuspend;
 	struct vm_gla2gpa *gg;
 	struct vm_activate_cpu *vac;
 	struct vm_cpuset *vm_cpuset;
 	struct vm_intinfo *vmii;
 	struct vm_rtc_time *rtctime;
 	struct vm_rtc_data *rtcdata;
 	struct vm_memmap *mm;
+	struct vm_cpu_topology *topology;
 	uint64_t *regvals;
 	int *regnums;
 
 	sc = vmmdev_lookup2(cdev);
 	if (sc == NULL)
 		return (ENXIO);
 
 	error = 0;
 	vcpu = -1;
 	state_changed = 0;
 
 	/*
 	 * Some VMM ioctls can operate only on vcpus that are not running.
 	 */
 	switch (cmd) {
 	case VM_RUN:
 	case VM_GET_REGISTER:
 	case VM_SET_REGISTER:
 	case VM_GET_SEGMENT_DESCRIPTOR:
 	case VM_SET_SEGMENT_DESCRIPTOR:
 	case VM_GET_REGISTER_SET:
 	case VM_SET_REGISTER_SET:
 	case VM_INJECT_EXCEPTION:
 	case VM_GET_CAPABILITY:
 	case VM_SET_CAPABILITY:
 	case VM_PPTDEV_MSI:
 	case VM_PPTDEV_MSIX:
 	case VM_SET_X2APIC_STATE:
 	case VM_GLA2GPA:
 	case VM_GLA2GPA_NOFAULT:
 	case VM_ACTIVATE_CPU:
 	case VM_SET_INTINFO:
 	case VM_GET_INTINFO:
 	case VM_RESTART_INSTRUCTION:
 		/*
 		 * XXX fragile, handle with care
 		 * Assumes that the first field of the ioctl data is the vcpu.
 		 */
 		vcpu = *(int *)data;
 		error = vcpu_lock_one(sc, vcpu);
 		if (error)
 			goto done;
 		state_changed = 1;
 		break;
 
 	case VM_MAP_PPTDEV_MMIO:
 	case VM_BIND_PPTDEV:
 	case VM_UNBIND_PPTDEV:
 	case VM_ALLOC_MEMSEG:
 	case VM_MMAP_MEMSEG:
 	case VM_REINIT:
 		/*
 		 * ioctls that operate on the entire virtual machine must
 		 * prevent all vcpus from running.
 		 */
 		error = vcpu_lock_all(sc);
 		if (error)
 			goto done;
 		state_changed = 2;
 		break;
 
 	case VM_GET_MEMSEG:
 	case VM_MMAP_GETNEXT:
 		/*
 		 * Lock a vcpu to make sure that the memory map cannot be
 		 * modified while it is being inspected.
 		 */
 		vcpu = VM_MAXCPU - 1;
 		error = vcpu_lock_one(sc, vcpu);
 		if (error)
 			goto done;
 		state_changed = 1;
 		break;
 
 	default:
 		break;
 	}
 
 	switch(cmd) {
 	case VM_RUN:
 		vmrun = (struct vm_run *)data;
 		error = vm_run(sc->vm, vmrun);
 		break;
 	case VM_SUSPEND:
 		vmsuspend = (struct vm_suspend *)data;
 		error = vm_suspend(sc->vm, vmsuspend->how);
 		break;
 	case VM_REINIT:
 		error = vm_reinit(sc->vm);
 		break;
 	case VM_STAT_DESC: {
 		statdesc = (struct vm_stat_desc *)data;
 		error = vmm_stat_desc_copy(statdesc->index,
 					statdesc->desc, sizeof(statdesc->desc));
 		break;
 	}
 	case VM_STATS: {
 		CTASSERT(MAX_VM_STATS >= MAX_VMM_STAT_ELEMS);
 		vmstats = (struct vm_stats *)data;
 		getmicrotime(&vmstats->tv);
 		error = vmm_stat_copy(sc->vm, vmstats->cpuid,
 				      &vmstats->num_entries, vmstats->statbuf);
 		break;
 	}
 	case VM_PPTDEV_MSI:
 		pptmsi = (struct vm_pptdev_msi *)data;
 		error = ppt_setup_msi(sc->vm, pptmsi->vcpu,
 				      pptmsi->bus, pptmsi->slot, pptmsi->func,
 				      pptmsi->addr, pptmsi->msg,
 				      pptmsi->numvec);
 		break;
 	case VM_PPTDEV_MSIX:
 		pptmsix = (struct vm_pptdev_msix *)data;
 		error = ppt_setup_msix(sc->vm, pptmsix->vcpu,
 				       pptmsix->bus, pptmsix->slot, 
 				       pptmsix->func, pptmsix->idx,
 				       pptmsix->addr, pptmsix->msg,
 				       pptmsix->vector_control);
 		break;
 	case VM_MAP_PPTDEV_MMIO:
 		pptmmio = (struct vm_pptdev_mmio *)data;
 		error = ppt_map_mmio(sc->vm, pptmmio->bus, pptmmio->slot,
 				     pptmmio->func, pptmmio->gpa, pptmmio->len,
 				     pptmmio->hpa);
 		break;
 	case VM_BIND_PPTDEV:
 		pptdev = (struct vm_pptdev *)data;
 		error = vm_assign_pptdev(sc->vm, pptdev->bus, pptdev->slot,
 					 pptdev->func);
 		break;
 	case VM_UNBIND_PPTDEV:
 		pptdev = (struct vm_pptdev *)data;
 		error = vm_unassign_pptdev(sc->vm, pptdev->bus, pptdev->slot,
 					   pptdev->func);
 		break;
 	case VM_INJECT_EXCEPTION:
 		vmexc = (struct vm_exception *)data;
 		error = vm_inject_exception(sc->vm, vmexc->cpuid,
 		    vmexc->vector, vmexc->error_code_valid, vmexc->error_code,
 		    vmexc->restart_instruction);
 		break;
 	case VM_INJECT_NMI:
 		vmnmi = (struct vm_nmi *)data;
 		error = vm_inject_nmi(sc->vm, vmnmi->cpuid);
 		break;
 	case VM_LAPIC_IRQ:
 		vmirq = (struct vm_lapic_irq *)data;
 		error = lapic_intr_edge(sc->vm, vmirq->cpuid, vmirq->vector);
 		break;
 	case VM_LAPIC_LOCAL_IRQ:
 		vmirq = (struct vm_lapic_irq *)data;
 		error = lapic_set_local_intr(sc->vm, vmirq->cpuid,
 		    vmirq->vector);
 		break;
 	case VM_LAPIC_MSI:
 		vmmsi = (struct vm_lapic_msi *)data;
 		error = lapic_intr_msi(sc->vm, vmmsi->addr, vmmsi->msg);
 		break;
 	case VM_IOAPIC_ASSERT_IRQ:
 		ioapic_irq = (struct vm_ioapic_irq *)data;
 		error = vioapic_assert_irq(sc->vm, ioapic_irq->irq);
 		break;
 	case VM_IOAPIC_DEASSERT_IRQ:
 		ioapic_irq = (struct vm_ioapic_irq *)data;
 		error = vioapic_deassert_irq(sc->vm, ioapic_irq->irq);
 		break;
 	case VM_IOAPIC_PULSE_IRQ:
 		ioapic_irq = (struct vm_ioapic_irq *)data;
 		error = vioapic_pulse_irq(sc->vm, ioapic_irq->irq);
 		break;
 	case VM_IOAPIC_PINCOUNT:
 		*(int *)data = vioapic_pincount(sc->vm);
 		break;
 	case VM_ISA_ASSERT_IRQ:
 		isa_irq = (struct vm_isa_irq *)data;
 		error = vatpic_assert_irq(sc->vm, isa_irq->atpic_irq);
 		if (error == 0 && isa_irq->ioapic_irq != -1)
 			error = vioapic_assert_irq(sc->vm,
 			    isa_irq->ioapic_irq);
 		break;
 	case VM_ISA_DEASSERT_IRQ:
 		isa_irq = (struct vm_isa_irq *)data;
 		error = vatpic_deassert_irq(sc->vm, isa_irq->atpic_irq);
 		if (error == 0 && isa_irq->ioapic_irq != -1)
 			error = vioapic_deassert_irq(sc->vm,
 			    isa_irq->ioapic_irq);
 		break;
 	case VM_ISA_PULSE_IRQ:
 		isa_irq = (struct vm_isa_irq *)data;
 		error = vatpic_pulse_irq(sc->vm, isa_irq->atpic_irq);
 		if (error == 0 && isa_irq->ioapic_irq != -1)
 			error = vioapic_pulse_irq(sc->vm, isa_irq->ioapic_irq);
 		break;
 	case VM_ISA_SET_IRQ_TRIGGER:
 		isa_irq_trigger = (struct vm_isa_irq_trigger *)data;
 		error = vatpic_set_irq_trigger(sc->vm,
 		    isa_irq_trigger->atpic_irq, isa_irq_trigger->trigger);
 		break;
 	case VM_MMAP_GETNEXT:
 		mm = (struct vm_memmap *)data;
 		error = vm_mmap_getnext(sc->vm, &mm->gpa, &mm->segid,
 		    &mm->segoff, &mm->len, &mm->prot, &mm->flags);
 		break;
 	case VM_MMAP_MEMSEG:
 		mm = (struct vm_memmap *)data;
 		error = vm_mmap_memseg(sc->vm, mm->gpa, mm->segid, mm->segoff,
 		    mm->len, mm->prot, mm->flags);
 		break;
 	case VM_ALLOC_MEMSEG:
 		error = alloc_memseg(sc, (struct vm_memseg *)data);
 		break;
 	case VM_GET_MEMSEG:
 		error = get_memseg(sc, (struct vm_memseg *)data);
 		break;
 	case VM_GET_REGISTER:
 		vmreg = (struct vm_register *)data;
 		error = vm_get_register(sc->vm, vmreg->cpuid, vmreg->regnum,
 					&vmreg->regval);
 		break;
 	case VM_SET_REGISTER:
 		vmreg = (struct vm_register *)data;
 		error = vm_set_register(sc->vm, vmreg->cpuid, vmreg->regnum,
 					vmreg->regval);
 		break;
 	case VM_SET_SEGMENT_DESCRIPTOR:
 		vmsegdesc = (struct vm_seg_desc *)data;
 		error = vm_set_seg_desc(sc->vm, vmsegdesc->cpuid,
 					vmsegdesc->regnum,
 					&vmsegdesc->desc);
 		break;
 	case VM_GET_SEGMENT_DESCRIPTOR:
 		vmsegdesc = (struct vm_seg_desc *)data;
 		error = vm_get_seg_desc(sc->vm, vmsegdesc->cpuid,
 					vmsegdesc->regnum,
 					&vmsegdesc->desc);
 		break;
 	case VM_GET_REGISTER_SET:
 		vmregset = (struct vm_register_set *)data;
 		if (vmregset->count > VM_REG_LAST) {
 			error = EINVAL;
 			break;
 		}
 		regvals = malloc(sizeof(regvals[0]) * vmregset->count, M_VMMDEV,
 		    M_WAITOK);
 		regnums = malloc(sizeof(regnums[0]) * vmregset->count, M_VMMDEV,
 		    M_WAITOK);
 		error = copyin(vmregset->regnums, regnums, sizeof(regnums[0]) *
 		    vmregset->count);
 		if (error == 0)
 			error = vm_get_register_set(sc->vm, vmregset->cpuid,
 			    vmregset->count, regnums, regvals);
 		if (error == 0)
 			error = copyout(regvals, vmregset->regvals,
 			    sizeof(regvals[0]) * vmregset->count);
 		free(regvals, M_VMMDEV);
 		free(regnums, M_VMMDEV);
 		break;
 	case VM_SET_REGISTER_SET:
 		vmregset = (struct vm_register_set *)data;
 		if (vmregset->count > VM_REG_LAST) {
 			error = EINVAL;
 			break;
 		}
 		regvals = malloc(sizeof(regvals[0]) * vmregset->count, M_VMMDEV,
 		    M_WAITOK);
 		regnums = malloc(sizeof(regnums[0]) * vmregset->count, M_VMMDEV,
 		    M_WAITOK);
 		error = copyin(vmregset->regnums, regnums, sizeof(regnums[0]) *
 		    vmregset->count);
 		if (error == 0)
 			error = copyin(vmregset->regvals, regvals,
 			    sizeof(regvals[0]) * vmregset->count);
 		if (error == 0)
 			error = vm_set_register_set(sc->vm, vmregset->cpuid,
 			    vmregset->count, regnums, regvals);
 		free(regvals, M_VMMDEV);
 		free(regnums, M_VMMDEV);
 		break;
 	case VM_GET_CAPABILITY:
 		vmcap = (struct vm_capability *)data;
 		error = vm_get_capability(sc->vm, vmcap->cpuid,
 					  vmcap->captype,
 					  &vmcap->capval);
 		break;
 	case VM_SET_CAPABILITY:
 		vmcap = (struct vm_capability *)data;
 		error = vm_set_capability(sc->vm, vmcap->cpuid,
 					  vmcap->captype,
 					  vmcap->capval);
 		break;
 	case VM_SET_X2APIC_STATE:
 		x2apic = (struct vm_x2apic *)data;
 		error = vm_set_x2apic_state(sc->vm,
 					    x2apic->cpuid, x2apic->state);
 		break;
 	case VM_GET_X2APIC_STATE:
 		x2apic = (struct vm_x2apic *)data;
 		error = vm_get_x2apic_state(sc->vm,
 					    x2apic->cpuid, &x2apic->state);
 		break;
 	case VM_GET_GPA_PMAP:
 		gpapte = (struct vm_gpa_pte *)data;
 		pmap_get_mapping(vmspace_pmap(vm_get_vmspace(sc->vm)),
 				 gpapte->gpa, gpapte->pte, &gpapte->ptenum);
 		error = 0;
 		break;
 	case VM_GET_HPET_CAPABILITIES:
 		error = vhpet_getcap((struct vm_hpet_cap *)data);
 		break;
 	case VM_GLA2GPA: {
 		CTASSERT(PROT_READ == VM_PROT_READ);
 		CTASSERT(PROT_WRITE == VM_PROT_WRITE);
 		CTASSERT(PROT_EXEC == VM_PROT_EXECUTE);
 		gg = (struct vm_gla2gpa *)data;
 		error = vm_gla2gpa(sc->vm, gg->vcpuid, &gg->paging, gg->gla,
 		    gg->prot, &gg->gpa, &gg->fault);
 		KASSERT(error == 0 || error == EFAULT,
 		    ("%s: vm_gla2gpa unknown error %d", __func__, error));
 		break;
 	}
 	case VM_GLA2GPA_NOFAULT:
 		gg = (struct vm_gla2gpa *)data;
 		error = vm_gla2gpa_nofault(sc->vm, gg->vcpuid, &gg->paging,
 		    gg->gla, gg->prot, &gg->gpa, &gg->fault);
 		KASSERT(error == 0 || error == EFAULT,
 		    ("%s: vm_gla2gpa unknown error %d", __func__, error));
 		break;
 	case VM_ACTIVATE_CPU:
 		vac = (struct vm_activate_cpu *)data;
 		error = vm_activate_cpu(sc->vm, vac->vcpuid);
 		break;
 	case VM_GET_CPUS:
 		error = 0;
 		vm_cpuset = (struct vm_cpuset *)data;
 		size = vm_cpuset->cpusetsize;
 		if (size < sizeof(cpuset_t) || size > CPU_MAXSIZE / NBBY) {
 			error = ERANGE;
 			break;
 		}
 		cpuset = malloc(size, M_TEMP, M_WAITOK | M_ZERO);
 		if (vm_cpuset->which == VM_ACTIVE_CPUS)
 			*cpuset = vm_active_cpus(sc->vm);
 		else if (vm_cpuset->which == VM_SUSPENDED_CPUS)
 			*cpuset = vm_suspended_cpus(sc->vm);
 		else if (vm_cpuset->which == VM_DEBUG_CPUS)
 			*cpuset = vm_debug_cpus(sc->vm);
 		else
 			error = EINVAL;
 		if (error == 0)
 			error = copyout(cpuset, vm_cpuset->cpus, size);
 		free(cpuset, M_TEMP);
 		break;
 	case VM_SUSPEND_CPU:
 		vac = (struct vm_activate_cpu *)data;
 		error = vm_suspend_cpu(sc->vm, vac->vcpuid);
 		break;
 	case VM_RESUME_CPU:
 		vac = (struct vm_activate_cpu *)data;
 		error = vm_resume_cpu(sc->vm, vac->vcpuid);
 		break;
 	case VM_SET_INTINFO:
 		vmii = (struct vm_intinfo *)data;
 		error = vm_exit_intinfo(sc->vm, vmii->vcpuid, vmii->info1);
 		break;
 	case VM_GET_INTINFO:
 		vmii = (struct vm_intinfo *)data;
 		error = vm_get_intinfo(sc->vm, vmii->vcpuid, &vmii->info1,
 		    &vmii->info2);
 		break;
 	case VM_RTC_WRITE:
 		rtcdata = (struct vm_rtc_data *)data;
 		error = vrtc_nvram_write(sc->vm, rtcdata->offset,
 		    rtcdata->value);
 		break;
 	case VM_RTC_READ:
 		rtcdata = (struct vm_rtc_data *)data;
 		error = vrtc_nvram_read(sc->vm, rtcdata->offset,
 		    &rtcdata->value);
 		break;
 	case VM_RTC_SETTIME:
 		rtctime = (struct vm_rtc_time *)data;
 		error = vrtc_set_time(sc->vm, rtctime->secs);
 		break;
 	case VM_RTC_GETTIME:
 		error = 0;
 		rtctime = (struct vm_rtc_time *)data;
 		rtctime->secs = vrtc_get_time(sc->vm);
 		break;
 	case VM_RESTART_INSTRUCTION:
 		error = vm_restart_instruction(sc->vm, vcpu);
+		break;
+	case VM_SET_TOPOLOGY:
+		topology = (struct vm_cpu_topology *)data;
+		error = vm_set_topology(sc->vm, topology->sockets,
+		    topology->cores, topology->threads, topology->maxcpus);
+		break;
+	case VM_GET_TOPOLOGY:
+		topology = (struct vm_cpu_topology *)data;
+		vm_get_topology(sc->vm, &topology->sockets, &topology->cores,
+		    &topology->threads, &topology->maxcpus);
+		error = 0;
 		break;
 	default:
 		error = ENOTTY;
 		break;
 	}
 
 	if (state_changed == 1)
 		vcpu_unlock_one(sc, vcpu);
 	else if (state_changed == 2)
 		vcpu_unlock_all(sc);
 
 done:
 	/* Make sure that no handler returns a bogus value like ERESTART */
 	KASSERT(error >= 0, ("vmmdev_ioctl: invalid error return %d", error));
 	return (error);
 }
 
 static int
 vmmdev_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t mapsize,
     struct vm_object **objp, int nprot)
 {
 	struct vmmdev_softc *sc;
 	vm_paddr_t gpa;
 	size_t len;
 	vm_ooffset_t segoff, first, last;
 	int error, found, segid;
 	bool sysmem;
 
 	first = *offset;
 	last = first + mapsize;
 	if ((nprot & PROT_EXEC) || first < 0 || first >= last)
 		return (EINVAL);
 
 	sc = vmmdev_lookup2(cdev);
 	if (sc == NULL) {
 		/* virtual machine is in the process of being created */
 		return (EINVAL);
 	}
 
 	/*
 	 * Get a read lock on the guest memory map by freezing any vcpu.
 	 */
 	error = vcpu_lock_one(sc, VM_MAXCPU - 1);
 	if (error)
 		return (error);
 
 	gpa = 0;
 	found = 0;
 	while (!found) {
 		error = vm_mmap_getnext(sc->vm, &gpa, &segid, &segoff, &len,
 		    NULL, NULL);
 		if (error)
 			break;
 
 		if (first >= gpa && last <= gpa + len)
 			found = 1;
 		else
 			gpa += len;
 	}
 
 	if (found) {
 		error = vm_get_memseg(sc->vm, segid, &len, &sysmem, objp);
 		KASSERT(error == 0 && *objp != NULL,
 		    ("%s: invalid memory segment %d", __func__, segid));
 		if (sysmem) {
 			vm_object_reference(*objp);
 			*offset = segoff + (first - gpa);
 		} else {
 			error = EINVAL;
 		}
 	}
 	vcpu_unlock_one(sc, VM_MAXCPU - 1);
 	return (error);
 }
 
 static void
 vmmdev_destroy(void *arg)
 {
 	struct vmmdev_softc *sc = arg;
 	struct devmem_softc *dsc;
 	int error;
 
 	error = vcpu_lock_all(sc);
 	KASSERT(error == 0, ("%s: error %d freezing vcpus", __func__, error));
 
 	while ((dsc = SLIST_FIRST(&sc->devmem)) != NULL) {
 		KASSERT(dsc->cdev == NULL, ("%s: devmem not free", __func__));
 		SLIST_REMOVE_HEAD(&sc->devmem, link);
 		free(dsc->name, M_VMMDEV);
 		free(dsc, M_VMMDEV);
 	}
 
 	if (sc->cdev != NULL)
 		destroy_dev(sc->cdev);
 
 	if (sc->vm != NULL)
 		vm_destroy(sc->vm);
 
 	if ((sc->flags & VSC_LINKED) != 0) {
 		mtx_lock(&vmmdev_mtx);
 		SLIST_REMOVE(&head, sc, vmmdev_softc, link);
 		mtx_unlock(&vmmdev_mtx);
 	}
 
 	free(sc, M_VMMDEV);
 }
 
 static int
 sysctl_vmm_destroy(SYSCTL_HANDLER_ARGS)
 {
 	int error;
 	char buf[VM_MAX_NAMELEN];
 	struct devmem_softc *dsc;
 	struct vmmdev_softc *sc;
 	struct cdev *cdev;
 
 	strlcpy(buf, "beavis", sizeof(buf));
 	error = sysctl_handle_string(oidp, buf, sizeof(buf), req);
 	if (error != 0 || req->newptr == NULL)
 		return (error);
 
 	mtx_lock(&vmmdev_mtx);
 	sc = vmmdev_lookup(buf);
 	if (sc == NULL || sc->cdev == NULL) {
 		mtx_unlock(&vmmdev_mtx);
 		return (EINVAL);
 	}
 
 	/*
 	 * The 'cdev' will be destroyed asynchronously when 'si_threadcount'
 	 * goes down to 0 so we should not do it again in the callback.
 	 *
 	 * Setting 'sc->cdev' to NULL is also used to indicate that the VM
 	 * is scheduled for destruction.
 	 */
 	cdev = sc->cdev;
 	sc->cdev = NULL;		
 	mtx_unlock(&vmmdev_mtx);
 
 	/*
 	 * Schedule all cdevs to be destroyed:
 	 *
 	 * - any new operations on the 'cdev' will return an error (ENXIO).
 	 *
 	 * - when the 'si_threadcount' dwindles down to zero the 'cdev' will
 	 *   be destroyed and the callback will be invoked in a taskqueue
 	 *   context.
 	 *
 	 * - the 'devmem' cdevs are destroyed before the virtual machine 'cdev'
 	 */
 	SLIST_FOREACH(dsc, &sc->devmem, link) {
 		KASSERT(dsc->cdev != NULL, ("devmem cdev already destroyed"));
 		destroy_dev_sched_cb(dsc->cdev, devmem_destroy, dsc);
 	}
 	destroy_dev_sched_cb(cdev, vmmdev_destroy, sc);
 	return (0);
 }
 SYSCTL_PROC(_hw_vmm, OID_AUTO, destroy, CTLTYPE_STRING | CTLFLAG_RW,
 	    NULL, 0, sysctl_vmm_destroy, "A", NULL);
 
 static struct cdevsw vmmdevsw = {
 	.d_name		= "vmmdev",
 	.d_version	= D_VERSION,
 	.d_ioctl	= vmmdev_ioctl,
 	.d_mmap_single	= vmmdev_mmap_single,
 	.d_read		= vmmdev_rw,
 	.d_write	= vmmdev_rw,
 };
 
 static int
 sysctl_vmm_create(SYSCTL_HANDLER_ARGS)
 {
 	int error;
 	struct vm *vm;
 	struct cdev *cdev;
 	struct vmmdev_softc *sc, *sc2;
 	char buf[VM_MAX_NAMELEN];
 
 	strlcpy(buf, "beavis", sizeof(buf));
 	error = sysctl_handle_string(oidp, buf, sizeof(buf), req);
 	if (error != 0 || req->newptr == NULL)
 		return (error);
 
 	mtx_lock(&vmmdev_mtx);
 	sc = vmmdev_lookup(buf);
 	mtx_unlock(&vmmdev_mtx);
 	if (sc != NULL)
 		return (EEXIST);
 
 	error = vm_create(buf, &vm);
 	if (error != 0)
 		return (error);
 
 	sc = malloc(sizeof(struct vmmdev_softc), M_VMMDEV, M_WAITOK | M_ZERO);
 	sc->vm = vm;
 	SLIST_INIT(&sc->devmem);
 
 	/*
 	 * Lookup the name again just in case somebody sneaked in when we
 	 * dropped the lock.
 	 */
 	mtx_lock(&vmmdev_mtx);
 	sc2 = vmmdev_lookup(buf);
 	if (sc2 == NULL) {
 		SLIST_INSERT_HEAD(&head, sc, link);
 		sc->flags |= VSC_LINKED;
 	}
 	mtx_unlock(&vmmdev_mtx);
 
 	if (sc2 != NULL) {
 		vmmdev_destroy(sc);
 		return (EEXIST);
 	}
 
 	error = make_dev_p(MAKEDEV_CHECKNAME, &cdev, &vmmdevsw, NULL,
 			   UID_ROOT, GID_WHEEL, 0600, "vmm/%s", buf);
 	if (error != 0) {
 		vmmdev_destroy(sc);
 		return (error);
 	}
 
 	mtx_lock(&vmmdev_mtx);
 	sc->cdev = cdev;
 	sc->cdev->si_drv1 = sc;
 	mtx_unlock(&vmmdev_mtx);
 
 	return (0);
 }
 SYSCTL_PROC(_hw_vmm, OID_AUTO, create, CTLTYPE_STRING | CTLFLAG_RW,
 	    NULL, 0, sysctl_vmm_create, "A", NULL);
 
 void
 vmmdev_init(void)
 {
 	mtx_init(&vmmdev_mtx, "vmm device mutex", NULL, MTX_DEF);
 }
 
 int
 vmmdev_cleanup(void)
 {
 	int error;
 
 	if (SLIST_EMPTY(&head))
 		error = 0;
 	else
 		error = EBUSY;
 
 	return (error);
 }
 
 static int
 devmem_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t len,
     struct vm_object **objp, int nprot)
 {
 	struct devmem_softc *dsc;
 	vm_ooffset_t first, last;
 	size_t seglen;
 	int error;
 	bool sysmem;
 
 	dsc = cdev->si_drv1;
 	if (dsc == NULL) {
 		/* 'cdev' has been created but is not ready for use */
 		return (ENXIO);
 	}
 
 	first = *offset;
 	last = *offset + len;
 	if ((nprot & PROT_EXEC) || first < 0 || first >= last)
 		return (EINVAL);
 
 	error = vcpu_lock_one(dsc->sc, VM_MAXCPU - 1);
 	if (error)
 		return (error);
 
 	error = vm_get_memseg(dsc->sc->vm, dsc->segid, &seglen, &sysmem, objp);
 	KASSERT(error == 0 && !sysmem && *objp != NULL,
 	    ("%s: invalid devmem segment %d", __func__, dsc->segid));
 
 	vcpu_unlock_one(dsc->sc, VM_MAXCPU - 1);
 
 	if (seglen >= last) {
 		vm_object_reference(*objp);
 		return (0);
 	} else {
 		return (EINVAL);
 	}
 }
 
 static struct cdevsw devmemsw = {
 	.d_name		= "devmem",
 	.d_version	= D_VERSION,
 	.d_mmap_single	= devmem_mmap_single,
 };
 
 static int
 devmem_create_cdev(const char *vmname, int segid, char *devname)
 {
 	struct devmem_softc *dsc;
 	struct vmmdev_softc *sc;
 	struct cdev *cdev;
 	int error;
 
 	error = make_dev_p(MAKEDEV_CHECKNAME, &cdev, &devmemsw, NULL,
 	    UID_ROOT, GID_WHEEL, 0600, "vmm.io/%s.%s", vmname, devname);
 	if (error)
 		return (error);
 
 	dsc = malloc(sizeof(struct devmem_softc), M_VMMDEV, M_WAITOK | M_ZERO);
 
 	mtx_lock(&vmmdev_mtx);
 	sc = vmmdev_lookup(vmname);
 	KASSERT(sc != NULL, ("%s: vm %s softc not found", __func__, vmname));
 	if (sc->cdev == NULL) {
 		/* virtual machine is being created or destroyed */
 		mtx_unlock(&vmmdev_mtx);
 		free(dsc, M_VMMDEV);
 		destroy_dev_sched_cb(cdev, NULL, 0);
 		return (ENODEV);
 	}
 
 	dsc->segid = segid;
 	dsc->name = devname;
 	dsc->cdev = cdev;
 	dsc->sc = sc;
 	SLIST_INSERT_HEAD(&sc->devmem, dsc, link);
 	mtx_unlock(&vmmdev_mtx);
 
 	/* The 'cdev' is ready for use after 'si_drv1' is initialized */
 	cdev->si_drv1 = dsc;
 	return (0);
 }
 
 static void
 devmem_destroy(void *arg)
 {
 	struct devmem_softc *dsc = arg;
 
 	KASSERT(dsc->cdev, ("%s: devmem cdev already destroyed", __func__));
 	dsc->cdev = NULL;
 	dsc->sc = NULL;
 }
Index: head/sys/amd64/vmm/x86.c
===================================================================
--- head/sys/amd64/vmm/x86.c	(revision 332297)
+++ head/sys/amd64/vmm/x86.c	(revision 332298)
@@ -1,526 +1,531 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2011 NetApp, Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/pcpu.h>
 #include <sys/systm.h>
 #include <sys/sysctl.h>
 
 #include <machine/clock.h>
 #include <machine/cpufunc.h>
 #include <machine/md_var.h>
 #include <machine/segments.h>
 #include <machine/specialreg.h>
 
 #include <machine/vmm.h>
 
 #include "vmm_host.h"
 #include "vmm_ktr.h"
 #include "vmm_util.h"
 #include "x86.h"
 
 SYSCTL_DECL(_hw_vmm);
 static SYSCTL_NODE(_hw_vmm, OID_AUTO, topology, CTLFLAG_RD, 0, NULL);
 
 #define	CPUID_VM_HIGH		0x40000000
 
 static const char bhyve_id[12] = "bhyve bhyve ";
 
 static uint64_t bhyve_xcpuids;
 SYSCTL_ULONG(_hw_vmm, OID_AUTO, bhyve_xcpuids, CTLFLAG_RW, &bhyve_xcpuids, 0,
     "Number of times an unknown cpuid leaf was accessed");
 
-/*
- * The default CPU topology is a single thread per package.
- */
-static u_int threads_per_core = 1;
+#if __FreeBSD_version < 1200060	/* Remove after 11 EOL helps MFCing */
+extern u_int threads_per_core;
 SYSCTL_UINT(_hw_vmm_topology, OID_AUTO, threads_per_core, CTLFLAG_RDTUN,
     &threads_per_core, 0, NULL);
 
-static u_int cores_per_package = 1;
+extern u_int cores_per_package;
 SYSCTL_UINT(_hw_vmm_topology, OID_AUTO, cores_per_package, CTLFLAG_RDTUN,
     &cores_per_package, 0, NULL);
+#endif
 
 static int cpuid_leaf_b = 1;
 SYSCTL_INT(_hw_vmm_topology, OID_AUTO, cpuid_leaf_b, CTLFLAG_RDTUN,
     &cpuid_leaf_b, 0, NULL);
 
 /*
  * Round up to the next power of two, if necessary, and then take log2.
  * Returns -1 if argument is zero.
  */
 static __inline int
 log2(u_int x)
 {
 
 	return (fls(x << (1 - powerof2(x))) - 1);
 }
 
 int
 x86_emulate_cpuid(struct vm *vm, int vcpu_id,
 		  uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx)
 {
 	const struct xsave_limits *limits;
 	uint64_t cr4;
 	int error, enable_invpcid, level, width, x2apic_id;
 	unsigned int func, regs[4], logical_cpus;
 	enum x2apic_state x2apic_state;
+	uint16_t cores, maxcpus, sockets, threads;
 
 	VCPU_CTR2(vm, vcpu_id, "cpuid %#x,%#x", *eax, *ecx);
 
 	/*
 	 * Requests for invalid CPUID levels should map to the highest
 	 * available level instead.
 	 */
 	if (cpu_exthigh != 0 && *eax >= 0x80000000) {
 		if (*eax > cpu_exthigh)
 			*eax = cpu_exthigh;
 	} else if (*eax >= 0x40000000) {
 		if (*eax > CPUID_VM_HIGH)
 			*eax = CPUID_VM_HIGH;
 	} else if (*eax > cpu_high) {
 		*eax = cpu_high;
 	}
 
 	func = *eax;
 
 	/*
 	 * In general the approach used for CPU topology is to
 	 * advertise a flat topology where all CPUs are packages with
 	 * no multi-core or SMT.
 	 */
 	switch (func) {
 		/*
 		 * Pass these through to the guest
 		 */
 		case CPUID_0000_0000:
 		case CPUID_0000_0002:
 		case CPUID_0000_0003:
 		case CPUID_8000_0000:
 		case CPUID_8000_0002:
 		case CPUID_8000_0003:
 		case CPUID_8000_0004:
 		case CPUID_8000_0006:
 			cpuid_count(*eax, *ecx, regs);
 			break;
 		case CPUID_8000_0008:
 			cpuid_count(*eax, *ecx, regs);
 			if (vmm_is_amd()) {
 				/*
 				 * XXX this might appear silly because AMD
 				 * cpus don't have threads.
 				 *
 				 * However this matches the logical cpus as
 				 * advertised by leaf 0x1 and will work even
-				 * if the 'threads_per_core' tunable is set
-				 * incorrectly on an AMD host.
+				 * if threads is set incorrectly on an AMD host.
 				 */
-				logical_cpus = threads_per_core *
-				    cores_per_package;
+				vm_get_topology(vm, &sockets, &cores, &threads,
+				    &maxcpus);
+				logical_cpus = threads * cores;
 				regs[2] = logical_cpus - 1;
 			}
 			break;
 
 		case CPUID_8000_0001:
 			cpuid_count(*eax, *ecx, regs);
 
 			/*
 			 * Hide SVM and Topology Extension features from guest.
 			 */
 			regs[2] &= ~(AMDID2_SVM | AMDID2_TOPOLOGY);
 
 			/*
 			 * Don't advertise extended performance counter MSRs
 			 * to the guest.
 			 */
 			regs[2] &= ~AMDID2_PCXC;
 			regs[2] &= ~AMDID2_PNXC;
 			regs[2] &= ~AMDID2_PTSCEL2I;
 
 			/*
 			 * Don't advertise Instruction Based Sampling feature.
 			 */
 			regs[2] &= ~AMDID2_IBS;
 
 			/* NodeID MSR not available */
 			regs[2] &= ~AMDID2_NODE_ID;
 
 			/* Don't advertise the OS visible workaround feature */
 			regs[2] &= ~AMDID2_OSVW;
 
 			/* Hide mwaitx/monitorx capability from the guest */
 			regs[2] &= ~AMDID2_MWAITX;
 
 			/*
 			 * Hide rdtscp/ia32_tsc_aux until we know how
 			 * to deal with them.
 			 */
 			regs[3] &= ~AMDID_RDTSCP;
 			break;
 
 		case CPUID_8000_0007:
 			/*
 			 * AMD uses this leaf to advertise the processor's
 			 * power monitoring and RAS capabilities. These
 			 * features are hardware-specific and exposing
 			 * them to a guest doesn't make a lot of sense.
 			 *
 			 * Intel uses this leaf only to advertise the
 			 * "Invariant TSC" feature with all other bits
 			 * being reserved (set to zero).
 			 */
 			regs[0] = 0;
 			regs[1] = 0;
 			regs[2] = 0;
 			regs[3] = 0;
 
 			/*
 			 * "Invariant TSC" can be advertised to the guest if:
 			 * - host TSC frequency is invariant
 			 * - host TSCs are synchronized across physical cpus
 			 *
 			 * XXX This still falls short because the vcpu
 			 * can observe the TSC moving backwards as it
 			 * migrates across physical cpus. But at least
 			 * it should discourage the guest from using the
 			 * TSC to keep track of time.
 			 */
 			if (tsc_is_invariant && smp_tsc)
 				regs[3] |= AMDPM_TSC_INVARIANT;
 			break;
 
 		case CPUID_0000_0001:
 			do_cpuid(1, regs);
 
 			error = vm_get_x2apic_state(vm, vcpu_id, &x2apic_state);
 			if (error) {
 				panic("x86_emulate_cpuid: error %d "
 				      "fetching x2apic state", error);
 			}
 
 			/*
 			 * Override the APIC ID only in ebx
 			 */
 			regs[1] &= ~(CPUID_LOCAL_APIC_ID);
 			regs[1] |= (vcpu_id << CPUID_0000_0001_APICID_SHIFT);
 
 			/*
 			 * Don't expose VMX, SpeedStep, TME or SMX capability.
 			 * Advertise x2APIC capability and Hypervisor guest.
 			 */
 			regs[2] &= ~(CPUID2_VMX | CPUID2_EST | CPUID2_TM2);
 			regs[2] &= ~(CPUID2_SMX);
 
 			regs[2] |= CPUID2_HV;
 
 			if (x2apic_state != X2APIC_DISABLED)
 				regs[2] |= CPUID2_X2APIC;
 			else
 				regs[2] &= ~CPUID2_X2APIC;
 
 			/*
 			 * Only advertise CPUID2_XSAVE in the guest if
 			 * the host is using XSAVE.
 			 */
 			if (!(regs[2] & CPUID2_OSXSAVE))
 				regs[2] &= ~CPUID2_XSAVE;
 
 			/*
 			 * If CPUID2_XSAVE is being advertised and the
 			 * guest has set CR4_XSAVE, set
 			 * CPUID2_OSXSAVE.
 			 */
 			regs[2] &= ~CPUID2_OSXSAVE;
 			if (regs[2] & CPUID2_XSAVE) {
 				error = vm_get_register(vm, vcpu_id,
 				    VM_REG_GUEST_CR4, &cr4);
 				if (error)
 					panic("x86_emulate_cpuid: error %d "
 					      "fetching %%cr4", error);
 				if (cr4 & CR4_XSAVE)
 					regs[2] |= CPUID2_OSXSAVE;
 			}
 
 			/*
 			 * Hide monitor/mwait until we know how to deal with
 			 * these instructions.
 			 */
 			regs[2] &= ~CPUID2_MON;
 
                         /*
 			 * Hide the performance and debug features.
 			 */
 			regs[2] &= ~CPUID2_PDCM;
 
 			/*
 			 * No TSC deadline support in the APIC yet
 			 */
 			regs[2] &= ~CPUID2_TSCDLT;
 
 			/*
 			 * Hide thermal monitoring
 			 */
 			regs[3] &= ~(CPUID_ACPI | CPUID_TM);
 
 			/*
 			 * Hide the debug store capability.
 			 */
 			regs[3] &= ~CPUID_DS;
 
 			/*
 			 * Advertise the Machine Check and MTRR capability.
 			 *
 			 * Some guest OSes (e.g. Windows) will not boot if
 			 * these features are absent.
 			 */
 			regs[3] |= (CPUID_MCA | CPUID_MCE | CPUID_MTRR);
 
-			logical_cpus = threads_per_core * cores_per_package;
+			vm_get_topology(vm, &sockets, &cores, &threads,
+			    &maxcpus);
+			logical_cpus = threads * cores;
 			regs[1] &= ~CPUID_HTT_CORES;
 			regs[1] |= (logical_cpus & 0xff) << 16;
 			regs[3] |= CPUID_HTT;
 			break;
 
 		case CPUID_0000_0004:
 			cpuid_count(*eax, *ecx, regs);
 
 			if (regs[0] || regs[1] || regs[2] || regs[3]) {
+				vm_get_topology(vm, &sockets, &cores, &threads,
+				    &maxcpus);
 				regs[0] &= 0x3ff;
-				regs[0] |= (cores_per_package - 1) << 26;
+				regs[0] |= (cores - 1) << 26;
 				/*
 				 * Cache topology:
 				 * - L1 and L2 are shared only by the logical
 				 *   processors in a single core.
 				 * - L3 and above are shared by all logical
 				 *   processors in the package.
 				 */
-				logical_cpus = threads_per_core;
+				logical_cpus = threads;
 				level = (regs[0] >> 5) & 0x7;
 				if (level >= 3)
-					logical_cpus *= cores_per_package;
+					logical_cpus *= cores;
 				regs[0] |= (logical_cpus - 1) << 14;
 			}
 			break;
 
 		case CPUID_0000_0007:
 			regs[0] = 0;
 			regs[1] = 0;
 			regs[2] = 0;
 			regs[3] = 0;
 
 			/* leaf 0 */
 			if (*ecx == 0) {
 				cpuid_count(*eax, *ecx, regs);
 
 				/* Only leaf 0 is supported */
 				regs[0] = 0;
 
 				/*
 				 * Expose known-safe features.
 				 */
 				regs[1] &= (CPUID_STDEXT_FSGSBASE |
 				    CPUID_STDEXT_BMI1 | CPUID_STDEXT_HLE |
 				    CPUID_STDEXT_AVX2 | CPUID_STDEXT_BMI2 |
 				    CPUID_STDEXT_ERMS | CPUID_STDEXT_RTM |
 				    CPUID_STDEXT_AVX512F |
 				    CPUID_STDEXT_AVX512PF |
 				    CPUID_STDEXT_AVX512ER |
 				    CPUID_STDEXT_AVX512CD);
 				regs[2] = 0;
 				regs[3] = 0;
 
 				/* Advertise INVPCID if it is enabled. */
 				error = vm_get_capability(vm, vcpu_id,
 				    VM_CAP_ENABLE_INVPCID, &enable_invpcid);
 				if (error == 0 && enable_invpcid)
 					regs[1] |= CPUID_STDEXT_INVPCID;
 			}
 			break;
 
 		case CPUID_0000_0006:
 			regs[0] = CPUTPM1_ARAT;
 			regs[1] = 0;
 			regs[2] = 0;
 			regs[3] = 0;
 			break;
 
 		case CPUID_0000_000A:
 			/*
 			 * Handle the access, but report 0 for
 			 * all options
 			 */
 			regs[0] = 0;
 			regs[1] = 0;
 			regs[2] = 0;
 			regs[3] = 0;
 			break;
 
 		case CPUID_0000_000B:
 			/*
 			 * Processor topology enumeration
 			 */
+			vm_get_topology(vm, &sockets, &cores, &threads,
+			    &maxcpus);
 			if (*ecx == 0) {
-				logical_cpus = threads_per_core;
+				logical_cpus = threads;
 				width = log2(logical_cpus);
 				level = CPUID_TYPE_SMT;
 				x2apic_id = vcpu_id;
 			}
 
 			if (*ecx == 1) {
-				logical_cpus = threads_per_core *
-				    cores_per_package;
+				logical_cpus = threads * cores;
 				width = log2(logical_cpus);
 				level = CPUID_TYPE_CORE;
 				x2apic_id = vcpu_id;
 			}
 
 			if (!cpuid_leaf_b || *ecx >= 2) {
 				width = 0;
 				logical_cpus = 0;
 				level = 0;
 				x2apic_id = 0;
 			}
 
 			regs[0] = width & 0x1f;
 			regs[1] = logical_cpus & 0xffff;
 			regs[2] = (level << 8) | (*ecx & 0xff);
 			regs[3] = x2apic_id;
 			break;
 
 		case CPUID_0000_000D:
 			limits = vmm_get_xsave_limits();
 			if (!limits->xsave_enabled) {
 				regs[0] = 0;
 				regs[1] = 0;
 				regs[2] = 0;
 				regs[3] = 0;
 				break;
 			}
 
 			cpuid_count(*eax, *ecx, regs);
 			switch (*ecx) {
 			case 0:
 				/*
 				 * Only permit the guest to use bits
 				 * that are active in the host in
 				 * %xcr0.  Also, claim that the
 				 * maximum save area size is
 				 * equivalent to the host's current
 				 * save area size.  Since this runs
 				 * "inside" of vmrun(), it runs with
 				 * the guest's xcr0, so the current
 				 * save area size is correct as-is.
 				 */
 				regs[0] &= limits->xcr0_allowed;
 				regs[2] = limits->xsave_max_size;
 				regs[3] &= (limits->xcr0_allowed >> 32);
 				break;
 			case 1:
 				/* Only permit XSAVEOPT. */
 				regs[0] &= CPUID_EXTSTATE_XSAVEOPT;
 				regs[1] = 0;
 				regs[2] = 0;
 				regs[3] = 0;
 				break;
 			default:
 				/*
 				 * If the leaf is for a permitted feature,
 				 * pass through as-is, otherwise return
 				 * all zeroes.
 				 */
 				if (!(limits->xcr0_allowed & (1ul << *ecx))) {
 					regs[0] = 0;
 					regs[1] = 0;
 					regs[2] = 0;
 					regs[3] = 0;
 				}
 				break;
 			}
 			break;
 
 		case 0x40000000:
 			regs[0] = CPUID_VM_HIGH;
 			bcopy(bhyve_id, &regs[1], 4);
 			bcopy(bhyve_id + 4, &regs[2], 4);
 			bcopy(bhyve_id + 8, &regs[3], 4);
 			break;
 
 		default:
 			/*
 			 * The leaf value has already been clamped so
 			 * simply pass this through, keeping count of
 			 * how many unhandled leaf values have been seen.
 			 */
 			atomic_add_long(&bhyve_xcpuids, 1);
 			cpuid_count(*eax, *ecx, regs);
 			break;
 	}
 
 	*eax = regs[0];
 	*ebx = regs[1];
 	*ecx = regs[2];
 	*edx = regs[3];
 
 	return (1);
 }
 
 bool
 vm_cpuid_capability(struct vm *vm, int vcpuid, enum vm_cpuid_capability cap)
 {
 	bool rv;
 
 	KASSERT(cap > 0 && cap < VCC_LAST, ("%s: invalid vm_cpu_capability %d",
 	    __func__, cap));
 
 	/*
 	 * Simply passthrough the capabilities of the host cpu for now.
 	 */
 	rv = false;
 	switch (cap) {
 	case VCC_NO_EXECUTE:
 		if (amd_feature & AMDID_NX)
 			rv = true;
 		break;
 	case VCC_FFXSR:
 		if (amd_feature & AMDID_FFXSR)
 			rv = true;
 		break;
 	case VCC_TCE:
 		if (amd_feature2 & AMDID2_TCE)
 			rv = true;
 		break;
 	default:
 		panic("%s: unknown vm_cpu_capability %d", __func__, cap);
 	}
 	return (rv);
 }
Index: head/usr.sbin/bhyve/bhyve.8
===================================================================
--- head/usr.sbin/bhyve/bhyve.8	(revision 332297)
+++ head/usr.sbin/bhyve/bhyve.8	(revision 332298)
@@ -1,504 +1,534 @@
 .\" Copyright (c) 2013 Peter Grehan
 .\" All rights reserved.
 .\"
 .\" Redistribution and use in source and binary forms, with or without
 .\" modification, are permitted provided that the following conditions
 .\" are met:
 .\" 1. Redistributions of source code must retain the above copyright
 .\"    notice, this list of conditions and the following disclaimer.
 .\" 2. Redistributions in binary form must reproduce the above copyright
 .\"    notice, this list of conditions and the following disclaimer in the
 .\"    documentation and/or other materials provided with the distribution.
 .\"
 .\" THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
 .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 .\" ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
 .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 .\" SUCH DAMAGE.
 .\"
 .\" $FreeBSD$
 .\"
-.Dd June 2, 2017
+.Dd April 6, 2018
 .Dt BHYVE 8
 .Os
 .Sh NAME
 .Nm bhyve
 .Nd "run a guest operating system inside a virtual machine"
 .Sh SYNOPSIS
 .Nm
 .Op Fl abehuwxACHPSWY
-.Op Fl c Ar numcpus
+.Oo
+.Fl c\~ Ns
+.Oo
+.Op Ar cpus= Ns
+.Ar numcpus Ns
+.Oc Ns
+.Op Ar ,sockets=n Ns
+.Op Ar ,cores=n Ns
+.Op Ar ,threads=n
+.Oc
 .Op Fl g Ar gdbport
 .Op Fl l Ar lpcdev Ns Op , Ns Ar conf
 .Op Fl m Ar memsize Ns Op Ar K|k|M|m|G|g|T|t
 .Op Fl p Ar vcpu:hostcpu
 .Op Fl s Ar slot,emulation Ns Op , Ns Ar conf
 .Op Fl U Ar uuid
 .Ar vmname
 .Sh DESCRIPTION
 .Nm
 is a hypervisor that runs guest operating systems inside a
 virtual machine.
 .Pp
 Parameters such as the number of virtual CPUs, amount of guest memory, and
 I/O connectivity can be specified with command-line parameters.
 .Pp
 If not using a boot ROM, the guest operating system must be loaded with
 .Xr bhyveload 8
 or a similar boot loader before running
 .Nm ,
 otherwise, it is enough to run
 .Nm
 with a boot ROM of choice.
 .Pp
 .Nm
 runs until the guest operating system reboots or an unhandled hypervisor
 exit is detected.
 .Sh OPTIONS
 .Bl -tag -width 10n
 .It Fl a
 The guest's local APIC is configured in xAPIC mode.
 The xAPIC mode is the default setting so this option is redundant.
 It will be deprecated in a future version.
 .It Fl A
 Generate ACPI tables.
 Required for
 .Fx Ns /amd64
 guests.
 .It Fl b
 Enable a low-level console device supported by
 .Fx
 kernels compiled with
 .Cd "device bvmconsole" .
 This option will be deprecated in a future version.
-.It Fl c Ar numcpus
-Number of guest virtual CPUs.
-The default is 1 and the maximum is 16.
+.It Fl c Op Ar setting ...
+Number of guest virtual CPUs
+and/or the CPU topology.
+The default value for each of
+.Ar numcpus ,
+.Ar sockets ,
+.Ar cores ,
+and
+.Ar threads
+is 1.
+The current maximum number of guest virtual CPUs is 16.
+If
+.Ar numcpus
+is not specified then it will be calculated from the other arguments.
+The topology must be consistent in that the
+.Ar numcpus
+must equal the product of
+.Ar sockets ,
+.Ar cores ,
+and
+.Ar threads .
+If a
+.Ar setting
+is specified more than once the last one has precedence.
 .It Fl C
 Include guest memory in core file.
 .It Fl e
 Force
 .Nm
 to exit when a guest issues an access to an I/O port that is not emulated.
 This is intended for debug purposes.
 .It Fl g Ar gdbport
 For
 .Fx
 kernels compiled with
 .Cd "device bvmdebug" ,
 allow a remote kernel kgdb to be relayed to the guest kernel gdb stub
 via a local IPv4 address and this port.
 This option will be deprecated in a future version.
 .It Fl h
 Print help message and exit.
 .It Fl H
 Yield the virtual CPU thread when a HLT instruction is detected.
 If this option is not specified, virtual CPUs will use 100% of a host CPU.
 .It Fl l Ar lpcdev Ns Op , Ns Ar conf
 Allow devices behind the LPC PCI-ISA bridge to be configured.
 The only supported devices are the TTY-class devices
 .Ar com1
 and
 .Ar com2
 and the boot ROM device
 .Ar bootrom .
 .It Fl m Ar memsize Ns Op Ar K|k|M|m|G|g|T|t
 Guest physical memory size in bytes.
 This must be the same size that was given to
 .Xr bhyveload 8 .
 .Pp
 The size argument may be suffixed with one of K, M, G or T (either upper
 or lower case) to indicate a multiple of kilobytes, megabytes, gigabytes,
 or terabytes.
 If no suffix is given, the value is assumed to be in megabytes.
 .Pp
 .Ar memsize
 defaults to 256M.
 .It Fl p Ar vcpu:hostcpu
 Pin guest's virtual CPU
 .Em vcpu
 to
 .Em hostcpu .
 .It Fl P
 Force the guest virtual CPU to exit when a PAUSE instruction is detected.
 .It Fl s Ar slot,emulation Ns Op , Ns Ar conf
 Configure a virtual PCI slot and function.
 .Pp
 .Nm
 provides PCI bus emulation and virtual devices that can be attached to
 slots on the bus.
 There are 32 available slots, with the option of providing up to 8 functions
 per slot.
 .Bl -tag -width 10n
 .It Ar slot
 .Ar pcislot[:function]
 .Ar bus:pcislot:function
 .Pp
 The
 .Ar pcislot
 value is 0 to 31.
 The optional
 .Ar function
 value is 0 to 7.
 The optional
 .Ar bus
 value is 0 to 255.
 If not specified, the
 .Ar function
 value defaults to 0.
 If not specified, the
 .Ar bus
 value defaults to 0.
 .It Ar emulation
 .Bl -tag -width 10n
 .It Li hostbridge | Li amd_hostbridge
 .Pp
 Provide a simple host bridge.
 This is usually configured at slot 0, and is required by most guest
 operating systems.
 The
 .Li amd_hostbridge
 emulation is identical but uses a PCI vendor ID of
 .Li AMD .
 .It Li passthru
 PCI pass-through device.
 .It Li virtio-net
 Virtio network interface.
 .It Li virtio-blk
 Virtio block storage interface.
 .It Li virtio-rnd
 Virtio RNG interface.
 .It Li virtio-console
 Virtio console interface, which exposes multiple ports
 to the guest in the form of simple char devices for simple IO
 between the guest and host userspaces.
 .It Li ahci
 AHCI controller attached to arbitrary devices.
 .It Li ahci-cd
 AHCI controller attached to an ATAPI CD/DVD.
 .It Li ahci-hd
 AHCI controller attached to a SATA hard-drive.
 .It Li e1000
 Intel e82545 network interface.
 .It Li uart
 PCI 16550 serial device.
 .It Li lpc
 LPC PCI-ISA bridge with COM1 and COM2 16550 serial ports and a boot ROM.
 The LPC bridge emulation can only be configured on bus 0.
 .It Li fbuf
 Raw framebuffer device attached to VNC server.
 .It Li xhci
 eXtensible Host Controller Interface (xHCI) USB controller.
 .El
 .It Op Ar conf
 This optional parameter describes the backend for device emulations.
 If
 .Ar conf
 is not specified, the device emulation has no backend and can be
 considered unconnected.
 .Pp
 Network devices:
 .Bl -tag -width 10n
 .It Ar tapN Ns Op , Ns Ar mac=xx:xx:xx:xx:xx:xx
 .It Ar vmnetN Ns Op , Ns Ar mac=xx:xx:xx:xx:xx:xx
 .Pp
 If
 .Ar mac
 is not specified, the MAC address is derived from a fixed OUI and the
 remaining bytes from an MD5 hash of the slot and function numbers and
 the device name.
 .Pp
 The MAC address is an ASCII string in
 .Xr ethers 5
 format.
 .El
 .Pp
 Block storage devices:
 .Bl -tag -width 10n
 .It Pa /filename Ns Oo , Ns Ar block-device-options Oc
 .It Pa /dev/xxx Ns Oo , Ns Ar block-device-options Oc
 .El
 .Pp
 The
 .Ar block-device-options
 are:
 .Bl -tag -width 8n
 .It Li nocache
 Open the file with
 .Dv O_DIRECT .
 .It Li direct
 Open the file using
 .Dv O_SYNC .
 .It Li ro
 Force the file to be opened read-only.
 .It Li sectorsize= Ns Ar logical Ns Oo / Ns Ar physical Oc
 Specify the logical and physical sector sizes of the emulated disk.
 The physical sector size is optional and is equal to the logical sector size
 if not explicitly specified.
 .El
 .Pp
 TTY devices:
 .Bl -tag -width 10n
 .It Li stdio
 Connect the serial port to the standard input and output of
 the
 .Nm
 process.
 .It Pa /dev/xxx
 Use the host TTY device for serial port I/O.
 .El
 .Pp
 Boot ROM device:
 .Bl -tag -width 10n
 .It Pa romfile
 Map
 .Ar romfile
 in the guest address space reserved for boot firmware.
 .El
 .Pp
 Pass-through devices:
 .Bl -tag -width 10n
 .It Ns Ar slot Ns / Ns Ar bus Ns / Ns Ar function
 Connect to a PCI device on the host at the selector described by
 .Ar slot ,
 .Ar bus ,
 and
 .Ar function
 numbers.
 .El
 .Pp
 Guest memory must be wired using the
 .Fl S
 option when a pass-through device is configured.
 .Pp
 The host device must have been reserved at boot-time using the
 .Va pptdev
 loader variable as described in
 .Xr vmm 4 .
 .Pp
 Virtio console devices:
 .Bl -tag -width 10n
 .It Li port1= Ns Pa /path/to/port1.sock Ns ,anotherport= Ns Pa ...
 A maximum of 16 ports per device can be created.
 Every port is named and corresponds to a Unix domain socket created by
 .Nm .
 .Nm
 accepts at most one connection per port at a time.
 .Pp
 Limitations:
 .Bl -bullet -offset 2n
 .It
 Due to lack of destructors in
 .Nm ,
 sockets on the filesystem must be cleaned up manually after
 .Nm
 exits.
 .It
 There is no way to use the "console port" feature, nor the console port
 resize at present.
 .It
 Emergency write is advertised, but no-op at present.
 .El
 .El
 .Pp
 Framebuffer devices:
 .Bl -tag -width 10n
 .It Oo rfb= Ns Oo Ar IP: Oc Ns Ar port Oc Ns Oo ,w= Ns Ar width Oc Ns Oo ,h= Ns Ar height Oc Ns Oo ,vga= Ns Ar vgaconf Oc Ns Oo Ns ,wait Oc Ns Oo ,password= Ns Ar password Oc
 .Bl -tag -width 8n
 .It Ar IP:port
 An
 .Ar IP
 address and a
 .Ar port
 VNC should listen on.
 The default is to listen on localhost IPv4 address and default VNC port 5900.
 Listening on an IPv6 address is not supported.
 .It Ar width No and Ar height
 A display resolution, width and height, respectively.
 If not specified, a default resolution of 1024x768 pixels will be used.
 Minimal supported resolution is 640x480 pixels,
 and maximum is 1920x1200 pixels.
 .It Ar vgaconf
 Possible values for this option are
 .Dq io
 (default),
 .Dq on
 , and
 .Dq off .
 PCI graphics cards have a dual personality in that they are
 standard PCI devices with BAR addressing, but may also
 implicitly decode legacy VGA I/O space
 .Pq Ad 0x3c0-3df
 and memory space
 .Pq 64KB at Ad 0xA0000 .
 The default
 .Dq io
 option should be used for guests that attempt to issue BIOS
 calls which result in I/O port queries, and fail to boot if I/O decode is disabled.
 .Pp
 The
 .Dq on
 option should be used along with the CSM BIOS capability in UEFI
 to boot traditional BIOS guests that require the legacy VGA I/O and
 memory regions to be available.
 .Pp
 The
 .Dq off
 option should be used for the UEFI guests that assume that
 VGA adapter is present if they detect the I/O ports.
 An example of such a guest is
 .Ox
 in UEFI mode.
 .Pp
 Please refer to the
 .Nm
 .Fx
 wiki page
 .Pq Lk https://wiki.freebsd.org/bhyve
 for configuration notes of particular guests.
 .It wait
 Instruct
 .Nm
 to only boot upon the initiation of a VNC connection, simplifying the installation
 of operating systems that require immediate keyboard input.
 This can be removed for post-installation use.
 .It password
 This type of authentication is known to be cryptographically weak and is not
 intended for use on untrusted networks.
 Many implementations will want to use stronger security, such as running
 the session over an encrypted channel provided by IPsec or SSH.
 .El
 .El
 .Pp
 xHCI USB devices:
 .Bl -tag -width 10n
 .It Li tablet
 A USB tablet device which provides precise cursor synchronization
 when using VNC.
 .El
 .El
 .It Fl S
 Wire guest memory.
 .It Fl u
 RTC keeps UTC time.
 .It Fl U Ar uuid
 Set the universally unique identifier
 .Pq UUID
 in the guest's System Management BIOS System Information structure.
 By default a UUID is generated from the host's hostname and
 .Ar vmname .
 .It Fl w
 Ignore accesses to unimplemented Model Specific Registers (MSRs).
 This is intended for debug purposes.
 .It Fl W
 Force virtio PCI device emulations to use MSI interrupts instead of MSI-X
 interrupts.
 .It Fl x
 The guest's local APIC is configured in x2APIC mode.
 .It Fl Y
 Disable MPtable generation.
 .It Ar vmname
 Alphanumeric name of the guest.
 This should be the same as that created by
 .Xr bhyveload 8 .
 .El
 .Sh SIGNAL HANDLING
 .Nm
 deals with the following signals:
 .Pp
 .Bl -tag -width indent -compact
 .It SIGTERM
 Trigger ACPI poweroff for a VM
 .El
 .Sh EXIT STATUS
 Exit status indicates how the VM was terminated:
 .Pp
 .Bl -tag -width indent -compact
 .It 0
 rebooted
 .It 1
 powered off
 .It 2
 halted
 .It 3
 triple fault
 .El
 .Sh EXAMPLES
 If not using a boot ROM, the guest operating system must have been loaded with
 .Xr bhyveload 8
 or a similar boot loader before
 .Xr bhyve 4
 can be run.
 Otherwise, the boot loader is not needed.
 .Pp
 To run a virtual machine with 1GB of memory, two virtual CPUs, a virtio
 block device backed by the
 .Pa /my/image
 filesystem image, and a serial port for the console:
 .Bd -literal -offset indent
 bhyve -c 2 -s 0,hostbridge -s 1,lpc -s 2,virtio-blk,/my/image \\
   -l com1,stdio -A -H -P -m 1G vm1
 .Ed
 .Pp
 Run a 24GB single-CPU virtual machine with three network ports, one of which
 has a MAC address specified:
 .Bd -literal -offset indent
 bhyve -s 0,hostbridge -s 1,lpc -s 2:0,virtio-net,tap0 \\
   -s 2:1,virtio-net,tap1 \\
   -s 2:2,virtio-net,tap2,mac=00:be:fa:76:45:00 \\
   -s 3,virtio-blk,/my/image -l com1,stdio \\
   -A -H -P -m 24G bigvm
 .Ed
 .Pp
 Run an 8GB quad-CPU virtual machine with 8 AHCI SATA disks, an AHCI ATAPI
 CD-ROM, a single virtio network port, an AMD hostbridge, and the console
 port connected to an
 .Xr nmdm 4
 null-modem device.
 .Bd -literal -offset indent
 bhyve -c 4 \\
   -s 0,amd_hostbridge -s 1,lpc \\
   -s 1:0,ahci,hd:/images/disk.1,hd:/images/disk.2,\\
 hd:/images/disk.3,hd:/images/disk.4,\\
 hd:/images/disk.5,hd:/images/disk.6,\\
 hd:/images/disk.7,hd:/images/disk.8,\\
 cd:/images/install.iso \\
   -s 3,virtio-net,tap0 \\
   -l com1,/dev/nmdm0A \\
   -A -H -P -m 8G
 .Ed
 .Pp
 Run a UEFI virtual machine with a display resolution of 800 by 600 pixels
 that can be accessed via VNC at: 0.0.0.0:5900.
 .Bd -literal -offset indent
 bhyve -c 2 -m 4G -w -H \\
   -s 0,hostbridge \\
   -s 3,ahci-cd,/path/to/uefi-OS-install.iso \\
   -s 4,ahci-hd,disk.img \\
   -s 5,virtio-net,tap0 \\
   -s 29,fbuf,tcp=0.0.0.0:5900,w=800,h=600,wait \\
   -s 30,xhci,tablet \\
   -s 31,lpc -l com1,stdio \\
   -l bootrom,/usr/local/share/uefi-firmware/BHYVE_UEFI.fd \\
    uefivm
 .Ed
 .Sh SEE ALSO
 .Xr bhyve 4 ,
 .Xr nmdm 4 ,
 .Xr vmm 4 ,
 .Xr ethers 5 ,
 .Xr bhyvectl 8 ,
 .Xr bhyveload 8
 .Sh HISTORY
 .Nm
 first appeared in
 .Fx 10.0 .
 .Sh AUTHORS
 .An Neel Natu Aq Mt neel@freebsd.org
 .An Peter Grehan Aq Mt grehan@freebsd.org
Index: head/usr.sbin/bhyve/bhyverun.c
===================================================================
--- head/usr.sbin/bhyve/bhyverun.c	(revision 332297)
+++ head/usr.sbin/bhyve/bhyverun.c	(revision 332298)
@@ -1,1013 +1,1101 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2011 NetApp, Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/types.h>
 #ifndef WITHOUT_CAPSICUM
 #include <sys/capsicum.h>
 #endif
 #include <sys/mman.h>
 #include <sys/time.h>
 
 #include <machine/atomic.h>
 #include <machine/segments.h>
 
 #ifndef WITHOUT_CAPSICUM
 #include <capsicum_helpers.h>
 #endif
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <err.h>
 #include <errno.h>
 #include <libgen.h>
 #include <unistd.h>
 #include <assert.h>
 #include <errno.h>
 #include <pthread.h>
 #include <pthread_np.h>
 #include <sysexits.h>
 #include <stdbool.h>
+#include <stdint.h>
 
 #include <machine/vmm.h>
 #ifndef WITHOUT_CAPSICUM
 #include <machine/vmm_dev.h>
 #endif
 #include <vmmapi.h>
 
 #include "bhyverun.h"
 #include "acpi.h"
 #include "atkbdc.h"
 #include "inout.h"
 #include "dbgport.h"
 #include "fwctl.h"
 #include "ioapic.h"
 #include "mem.h"
 #include "mevent.h"
 #include "mptbl.h"
 #include "pci_emul.h"
 #include "pci_irq.h"
 #include "pci_lpc.h"
 #include "smbiostbl.h"
 #include "xmsr.h"
 #include "spinup_ap.h"
 #include "rtc.h"
 
 #define GUEST_NIO_PORT		0x488	/* guest upcalls via i/o port */
 
 #define MB		(1024UL * 1024)
 #define GB		(1024UL * MB)
 
 typedef int (*vmexit_handler_t)(struct vmctx *, struct vm_exit *, int *vcpu);
 extern int vmexit_task_switch(struct vmctx *, struct vm_exit *, int *vcpu);
 
 char *vmname;
 
 int guest_ncpus;
+uint16_t cores, maxcpus, sockets, threads;
+
 char *guest_uuid_str;
 
 static int guest_vmexit_on_hlt, guest_vmexit_on_pause;
 static int virtio_msix = 1;
 static int x2apic_mode = 0;	/* default is xAPIC */
 
 static int strictio;
 static int strictmsr = 1;
 
 static int acpi;
 
 static char *progname;
 static const int BSP = 0;
 
 static cpuset_t cpumask;
 
 static void vm_loop(struct vmctx *ctx, int vcpu, uint64_t rip);
 
 static struct vm_exit vmexit[VM_MAXCPU];
 
 struct bhyvestats {
         uint64_t        vmexit_bogus;
 	uint64_t	vmexit_reqidle;
         uint64_t        vmexit_hlt;
         uint64_t        vmexit_pause;
         uint64_t        vmexit_mtrap;
         uint64_t        vmexit_inst_emul;
         uint64_t        cpu_switch_rotate;
         uint64_t        cpu_switch_direct;
 } stats;
 
 struct mt_vmm_info {
 	pthread_t	mt_thr;
 	struct vmctx	*mt_ctx;
 	int		mt_vcpu;	
 } mt_vmm_info[VM_MAXCPU];
 
 static cpuset_t *vcpumap[VM_MAXCPU] = { NULL };
 
 static void
 usage(int code)
 {
 
         fprintf(stderr,
-                "Usage: %s [-abehuwxACHPSWY] [-c vcpus] [-g <gdb port>] [-l <lpc>]\n"
+		"Usage: %s [-abehuwxACHPSWY]\n"
+		"       %*s [-c [[cpus=]numcpus][,sockets=n][,cores=n][,threads=n]]\n"
+		"       %*s [-g <gdb port>] [-l <lpc>]\n"
 		"       %*s [-m mem] [-p vcpu:hostcpu] [-s <pci>] [-U uuid] <vm>\n"
 		"       -a: local apic is in xAPIC mode (deprecated)\n"
 		"       -A: create ACPI tables\n"
-		"       -c: # cpus (default 1)\n"
+		"       -c: number of cpus and/or topology specification"
 		"       -C: include guest memory in core file\n"
 		"       -e: exit on unhandled I/O access\n"
 		"       -g: gdb port\n"
 		"       -h: help\n"
 		"       -H: vmexit from the guest on hlt\n"
 		"       -l: LPC device configuration\n"
 		"       -m: memory size in MB\n"
 		"       -p: pin 'vcpu' to 'hostcpu'\n"
 		"       -P: vmexit from the guest on pause\n"
 		"       -s: <slot,driver,configinfo> PCI slot config\n"
 		"       -S: guest memory cannot be swapped\n"
 		"       -u: RTC keeps UTC time\n"
 		"       -U: uuid\n"
 		"       -w: ignore unimplemented MSRs\n"
 		"       -W: force virtio to use single-vector MSI\n"
 		"       -x: local apic is in x2APIC mode\n"
 		"       -Y: disable MPtable generation\n",
-		progname, (int)strlen(progname), "");
+		progname, (int)strlen(progname), "", (int)strlen(progname), "",
+		(int)strlen(progname), "");
 
 	exit(code);
 }
 
+/*
+ * XXX This parser is known to have the following issues:
+ * 1.  It accepts null key=value tokens ",,".
+ * 2.  It accepts whitespace after = and before value.
+ * 3.  Values out of range of INT are silently wrapped.
+ * 4.  It doesn't check non-final values.
+ * 5.  The apparently bogus limits of UINT16_MAX are for future expansion.
+ *
+ * The acceptance of a null specification ('-c ""') is by design to match the
+ * manual page syntax specification, this results in a topology of 1 vCPU.
+ */
 static int
+topology_parse(const char *opt)
+{
+	uint64_t ncpus;
+	int c, chk, n, s, t, tmp;
+	char *cp, *str;
+	bool ns, scts;
+
+	c = 1, n = 1, s = 1, t = 1;
+	ns = false, scts = false;
+	str = strdup(opt);
+
+	while ((cp = strsep(&str, ",")) != NULL) {
+		if (sscanf(cp, "%i%n", &tmp, &chk) == 1) {
+			n = tmp;
+			ns = true;
+		} else if (sscanf(cp, "cpus=%i%n", &tmp, &chk) == 1) {
+			n = tmp;
+			ns = true;
+		} else if (sscanf(cp, "sockets=%i%n", &tmp, &chk) == 1) {
+			s = tmp;
+			scts = true;
+		} else if (sscanf(cp, "cores=%i%n", &tmp, &chk) == 1) {
+			c = tmp;
+			scts = true;
+		} else if (sscanf(cp, "threads=%i%n", &tmp, &chk) == 1) {
+			t = tmp;
+			scts = true;
+#ifdef notyet  /* Do not expose this until vmm.ko implements it */
+		} else if (sscanf(cp, "maxcpus=%i%n", &tmp, &chk) == 1) {
+			m = tmp;
+#endif
+		/* Skip the empty argument case from -c "" */
+		} else if (cp[0] == '\0')
+			continue;
+		else
+			return (-1);
+		/* Any trailing garbage causes an error */
+		if (cp[chk] != '\0')
+			return (-1);
+	}
+	/*
+	 * Range check 1 <= n <= UINT16_MAX all values
+	 */
+	if (n < 1 || s < 1 || c < 1 || t < 1 ||
+	    n > UINT16_MAX || s > UINT16_MAX || c > UINT16_MAX  ||
+	    t > UINT16_MAX)
+		return (-1);
+
+	/* If only the cpus was specified, use that as sockets */
+	if (!scts)
+		s = n;
+	/*
+	 * Compute sockets * cores * threads avoiding overflow
+	 * The range check above insures these are 16 bit values
+	 * If n was specified check it against computed ncpus
+	 */
+	ncpus = (uint64_t)s * c * t;
+	if (ncpus > UINT16_MAX || (ns && n != ncpus))
+		return (-1);
+
+	guest_ncpus = ncpus;
+	sockets = s;
+	cores = c;
+	threads = t;
+	return(0);
+}
+
+static int
 pincpu_parse(const char *opt)
 {
 	int vcpu, pcpu;
 
 	if (sscanf(opt, "%d:%d", &vcpu, &pcpu) != 2) {
 		fprintf(stderr, "invalid format: %s\n", opt);
 		return (-1);
 	}
 
 	if (vcpu < 0 || vcpu >= VM_MAXCPU) {
 		fprintf(stderr, "vcpu '%d' outside valid range from 0 to %d\n",
 		    vcpu, VM_MAXCPU - 1);
 		return (-1);
 	}
 
 	if (pcpu < 0 || pcpu >= CPU_SETSIZE) {
 		fprintf(stderr, "hostcpu '%d' outside valid range from "
 		    "0 to %d\n", pcpu, CPU_SETSIZE - 1);
 		return (-1);
 	}
 
 	if (vcpumap[vcpu] == NULL) {
 		if ((vcpumap[vcpu] = malloc(sizeof(cpuset_t))) == NULL) {
 			perror("malloc");
 			return (-1);
 		}
 		CPU_ZERO(vcpumap[vcpu]);
 	}
 	CPU_SET(pcpu, vcpumap[vcpu]);
 	return (0);
 }
 
 void
 vm_inject_fault(void *arg, int vcpu, int vector, int errcode_valid,
     int errcode)
 {
 	struct vmctx *ctx;
 	int error, restart_instruction;
 
 	ctx = arg;
 	restart_instruction = 1;
 
 	error = vm_inject_exception(ctx, vcpu, vector, errcode_valid, errcode,
 	    restart_instruction);
 	assert(error == 0);
 }
 
 void *
 paddr_guest2host(struct vmctx *ctx, uintptr_t gaddr, size_t len)
 {
 
 	return (vm_map_gpa(ctx, gaddr, len));
 }
 
 int
 fbsdrun_vmexit_on_pause(void)
 {
 
 	return (guest_vmexit_on_pause);
 }
 
 int
 fbsdrun_vmexit_on_hlt(void)
 {
 
 	return (guest_vmexit_on_hlt);
 }
 
 int
 fbsdrun_virtio_msix(void)
 {
 
 	return (virtio_msix);
 }
 
 static void *
 fbsdrun_start_thread(void *param)
 {
 	char tname[MAXCOMLEN + 1];
 	struct mt_vmm_info *mtp;
 	int vcpu;
 
 	mtp = param;
 	vcpu = mtp->mt_vcpu;
 
 	snprintf(tname, sizeof(tname), "vcpu %d", vcpu);
 	pthread_set_name_np(mtp->mt_thr, tname);
 
 	vm_loop(mtp->mt_ctx, vcpu, vmexit[vcpu].rip);
 
 	/* not reached */
 	exit(1);
 	return (NULL);
 }
 
 void
 fbsdrun_addcpu(struct vmctx *ctx, int fromcpu, int newcpu, uint64_t rip)
 {
 	int error;
 
 	assert(fromcpu == BSP);
 
 	/*
 	 * The 'newcpu' must be activated in the context of 'fromcpu'. If
 	 * vm_activate_cpu() is delayed until newcpu's pthread starts running
 	 * then vmm.ko is out-of-sync with bhyve and this can create a race
 	 * with vm_suspend().
 	 */
 	error = vm_activate_cpu(ctx, newcpu);
 	if (error != 0)
 		err(EX_OSERR, "could not activate CPU %d", newcpu);
 
 	CPU_SET_ATOMIC(newcpu, &cpumask);
 
 	/*
 	 * Set up the vmexit struct to allow execution to start
 	 * at the given RIP
 	 */
 	vmexit[newcpu].rip = rip;
 	vmexit[newcpu].inst_length = 0;
 
 	mt_vmm_info[newcpu].mt_ctx = ctx;
 	mt_vmm_info[newcpu].mt_vcpu = newcpu;
 
 	error = pthread_create(&mt_vmm_info[newcpu].mt_thr, NULL,
 	    fbsdrun_start_thread, &mt_vmm_info[newcpu]);
 	assert(error == 0);
 }
 
 static int
 fbsdrun_deletecpu(struct vmctx *ctx, int vcpu)
 {
 
 	if (!CPU_ISSET(vcpu, &cpumask)) {
 		fprintf(stderr, "Attempting to delete unknown cpu %d\n", vcpu);
 		exit(1);
 	}
 
 	CPU_CLR_ATOMIC(vcpu, &cpumask);
 	return (CPU_EMPTY(&cpumask));
 }
 
 static int
 vmexit_handle_notify(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu,
 		     uint32_t eax)
 {
 #if BHYVE_DEBUG
 	/*
 	 * put guest-driven debug here
 	 */
 #endif
         return (VMEXIT_CONTINUE);
 }
 
 static int
 vmexit_inout(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu)
 {
 	int error;
 	int bytes, port, in, out;
 	int vcpu;
 
 	vcpu = *pvcpu;
 
 	port = vme->u.inout.port;
 	bytes = vme->u.inout.bytes;
 	in = vme->u.inout.in;
 	out = !in;
 
         /* Extra-special case of host notifications */
         if (out && port == GUEST_NIO_PORT) {
                 error = vmexit_handle_notify(ctx, vme, pvcpu, vme->u.inout.eax);
 		return (error);
 	}
 
 	error = emulate_inout(ctx, vcpu, vme, strictio);
 	if (error) {
 		fprintf(stderr, "Unhandled %s%c 0x%04x at 0x%lx\n",
 		    in ? "in" : "out",
 		    bytes == 1 ? 'b' : (bytes == 2 ? 'w' : 'l'),
 		    port, vmexit->rip);
 		return (VMEXIT_ABORT);
 	} else {
 		return (VMEXIT_CONTINUE);
 	}
 }
 
 static int
 vmexit_rdmsr(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu)
 {
 	uint64_t val;
 	uint32_t eax, edx;
 	int error;
 
 	val = 0;
 	error = emulate_rdmsr(ctx, *pvcpu, vme->u.msr.code, &val);
 	if (error != 0) {
 		fprintf(stderr, "rdmsr to register %#x on vcpu %d\n",
 		    vme->u.msr.code, *pvcpu);
 		if (strictmsr) {
 			vm_inject_gp(ctx, *pvcpu);
 			return (VMEXIT_CONTINUE);
 		}
 	}
 
 	eax = val;
 	error = vm_set_register(ctx, *pvcpu, VM_REG_GUEST_RAX, eax);
 	assert(error == 0);
 
 	edx = val >> 32;
 	error = vm_set_register(ctx, *pvcpu, VM_REG_GUEST_RDX, edx);
 	assert(error == 0);
 
 	return (VMEXIT_CONTINUE);
 }
 
 static int
 vmexit_wrmsr(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu)
 {
 	int error;
 
 	error = emulate_wrmsr(ctx, *pvcpu, vme->u.msr.code, vme->u.msr.wval);
 	if (error != 0) {
 		fprintf(stderr, "wrmsr to register %#x(%#lx) on vcpu %d\n",
 		    vme->u.msr.code, vme->u.msr.wval, *pvcpu);
 		if (strictmsr) {
 			vm_inject_gp(ctx, *pvcpu);
 			return (VMEXIT_CONTINUE);
 		}
 	}
 	return (VMEXIT_CONTINUE);
 }
 
 static int
 vmexit_spinup_ap(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu)
 {
 
 	(void)spinup_ap(ctx, *pvcpu,
 		    vme->u.spinup_ap.vcpu, vme->u.spinup_ap.rip);
 
 	return (VMEXIT_CONTINUE);
 }
 
 #define	DEBUG_EPT_MISCONFIG
 #ifdef DEBUG_EPT_MISCONFIG
 #define	EXIT_REASON_EPT_MISCONFIG	49
 #define	VMCS_GUEST_PHYSICAL_ADDRESS	0x00002400
 #define	VMCS_IDENT(x)			((x) | 0x80000000)
 
 static uint64_t ept_misconfig_gpa, ept_misconfig_pte[4];
 static int ept_misconfig_ptenum;
 #endif
 
 static int
 vmexit_vmx(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
 {
 
 	fprintf(stderr, "vm exit[%d]\n", *pvcpu);
 	fprintf(stderr, "\treason\t\tVMX\n");
 	fprintf(stderr, "\trip\t\t0x%016lx\n", vmexit->rip);
 	fprintf(stderr, "\tinst_length\t%d\n", vmexit->inst_length);
 	fprintf(stderr, "\tstatus\t\t%d\n", vmexit->u.vmx.status);
 	fprintf(stderr, "\texit_reason\t%u\n", vmexit->u.vmx.exit_reason);
 	fprintf(stderr, "\tqualification\t0x%016lx\n",
 	    vmexit->u.vmx.exit_qualification);
 	fprintf(stderr, "\tinst_type\t\t%d\n", vmexit->u.vmx.inst_type);
 	fprintf(stderr, "\tinst_error\t\t%d\n", vmexit->u.vmx.inst_error);
 #ifdef DEBUG_EPT_MISCONFIG
 	if (vmexit->u.vmx.exit_reason == EXIT_REASON_EPT_MISCONFIG) {
 		vm_get_register(ctx, *pvcpu,
 		    VMCS_IDENT(VMCS_GUEST_PHYSICAL_ADDRESS),
 		    &ept_misconfig_gpa);
 		vm_get_gpa_pmap(ctx, ept_misconfig_gpa, ept_misconfig_pte,
 		    &ept_misconfig_ptenum);
 		fprintf(stderr, "\tEPT misconfiguration:\n");
 		fprintf(stderr, "\t\tGPA: %#lx\n", ept_misconfig_gpa);
 		fprintf(stderr, "\t\tPTE(%d): %#lx %#lx %#lx %#lx\n",
 		    ept_misconfig_ptenum, ept_misconfig_pte[0],
 		    ept_misconfig_pte[1], ept_misconfig_pte[2],
 		    ept_misconfig_pte[3]);
 	}
 #endif	/* DEBUG_EPT_MISCONFIG */
 	return (VMEXIT_ABORT);
 }
 
 static int
 vmexit_svm(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
 {
 
 	fprintf(stderr, "vm exit[%d]\n", *pvcpu);
 	fprintf(stderr, "\treason\t\tSVM\n");
 	fprintf(stderr, "\trip\t\t0x%016lx\n", vmexit->rip);
 	fprintf(stderr, "\tinst_length\t%d\n", vmexit->inst_length);
 	fprintf(stderr, "\texitcode\t%#lx\n", vmexit->u.svm.exitcode);
 	fprintf(stderr, "\texitinfo1\t%#lx\n", vmexit->u.svm.exitinfo1);
 	fprintf(stderr, "\texitinfo2\t%#lx\n", vmexit->u.svm.exitinfo2);
 	return (VMEXIT_ABORT);
 }
 
 static int
 vmexit_bogus(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
 {
 
 	assert(vmexit->inst_length == 0);
 
 	stats.vmexit_bogus++;
 
 	return (VMEXIT_CONTINUE);
 }
 
 static int
 vmexit_reqidle(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
 {
 
 	assert(vmexit->inst_length == 0);
 
 	stats.vmexit_reqidle++;
 
 	return (VMEXIT_CONTINUE);
 }
 
 static int
 vmexit_hlt(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
 {
 
 	stats.vmexit_hlt++;
 
 	/*
 	 * Just continue execution with the next instruction. We use
 	 * the HLT VM exit as a way to be friendly with the host
 	 * scheduler.
 	 */
 	return (VMEXIT_CONTINUE);
 }
 
 static int
 vmexit_pause(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
 {
 
 	stats.vmexit_pause++;
 
 	return (VMEXIT_CONTINUE);
 }
 
 static int
 vmexit_mtrap(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
 {
 
 	assert(vmexit->inst_length == 0);
 
 	stats.vmexit_mtrap++;
 
 	return (VMEXIT_CONTINUE);
 }
 
 static int
 vmexit_inst_emul(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
 {
 	int err, i;
 	struct vie *vie;
 
 	stats.vmexit_inst_emul++;
 
 	vie = &vmexit->u.inst_emul.vie;
 	err = emulate_mem(ctx, *pvcpu, vmexit->u.inst_emul.gpa,
 	    vie, &vmexit->u.inst_emul.paging);
 
 	if (err) {
 		if (err == ESRCH) {
 			fprintf(stderr, "Unhandled memory access to 0x%lx\n",
 			    vmexit->u.inst_emul.gpa);
 		}
 
 		fprintf(stderr, "Failed to emulate instruction [");
 		for (i = 0; i < vie->num_valid; i++) {
 			fprintf(stderr, "0x%02x%s", vie->inst[i],
 			    i != (vie->num_valid - 1) ? " " : "");
 		}
 		fprintf(stderr, "] at 0x%lx\n", vmexit->rip);
 		return (VMEXIT_ABORT);
 	}
 
 	return (VMEXIT_CONTINUE);
 }
 
 static pthread_mutex_t resetcpu_mtx = PTHREAD_MUTEX_INITIALIZER;
 static pthread_cond_t resetcpu_cond = PTHREAD_COND_INITIALIZER;
 
 static int
 vmexit_suspend(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
 {
 	enum vm_suspend_how how;
 
 	how = vmexit->u.suspended.how;
 
 	fbsdrun_deletecpu(ctx, *pvcpu);
 
 	if (*pvcpu != BSP) {
 		pthread_mutex_lock(&resetcpu_mtx);
 		pthread_cond_signal(&resetcpu_cond);
 		pthread_mutex_unlock(&resetcpu_mtx);
 		pthread_exit(NULL);
 	}
 
 	pthread_mutex_lock(&resetcpu_mtx);
 	while (!CPU_EMPTY(&cpumask)) {
 		pthread_cond_wait(&resetcpu_cond, &resetcpu_mtx);
 	}
 	pthread_mutex_unlock(&resetcpu_mtx);
 
 	switch (how) {
 	case VM_SUSPEND_RESET:
 		exit(0);
 	case VM_SUSPEND_POWEROFF:
 		exit(1);
 	case VM_SUSPEND_HALT:
 		exit(2);
 	case VM_SUSPEND_TRIPLEFAULT:
 		exit(3);
 	default:
 		fprintf(stderr, "vmexit_suspend: invalid reason %d\n", how);
 		exit(100);
 	}
 	return (0);	/* NOTREACHED */
 }
 
 static vmexit_handler_t handler[VM_EXITCODE_MAX] = {
 	[VM_EXITCODE_INOUT]  = vmexit_inout,
 	[VM_EXITCODE_INOUT_STR]  = vmexit_inout,
 	[VM_EXITCODE_VMX]    = vmexit_vmx,
 	[VM_EXITCODE_SVM]    = vmexit_svm,
 	[VM_EXITCODE_BOGUS]  = vmexit_bogus,
 	[VM_EXITCODE_REQIDLE] = vmexit_reqidle,
 	[VM_EXITCODE_RDMSR]  = vmexit_rdmsr,
 	[VM_EXITCODE_WRMSR]  = vmexit_wrmsr,
 	[VM_EXITCODE_MTRAP]  = vmexit_mtrap,
 	[VM_EXITCODE_INST_EMUL] = vmexit_inst_emul,
 	[VM_EXITCODE_SPINUP_AP] = vmexit_spinup_ap,
 	[VM_EXITCODE_SUSPENDED] = vmexit_suspend,
 	[VM_EXITCODE_TASK_SWITCH] = vmexit_task_switch,
 };
 
 static void
 vm_loop(struct vmctx *ctx, int vcpu, uint64_t startrip)
 {
 	int error, rc;
 	enum vm_exitcode exitcode;
 	cpuset_t active_cpus;
 
 	if (vcpumap[vcpu] != NULL) {
 		error = pthread_setaffinity_np(pthread_self(),
 		    sizeof(cpuset_t), vcpumap[vcpu]);
 		assert(error == 0);
 	}
 
 	error = vm_active_cpus(ctx, &active_cpus);
 	assert(CPU_ISSET(vcpu, &active_cpus));
 
 	error = vm_set_register(ctx, vcpu, VM_REG_GUEST_RIP, startrip);
 	assert(error == 0);
 
 	while (1) {
 		error = vm_run(ctx, vcpu, &vmexit[vcpu]);
 		if (error != 0)
 			break;
 
 		exitcode = vmexit[vcpu].exitcode;
 		if (exitcode >= VM_EXITCODE_MAX || handler[exitcode] == NULL) {
 			fprintf(stderr, "vm_loop: unexpected exitcode 0x%x\n",
 			    exitcode);
 			exit(1);
 		}
 
 		rc = (*handler[exitcode])(ctx, &vmexit[vcpu], &vcpu);
 
 		switch (rc) {
 		case VMEXIT_CONTINUE:
 			break;
 		case VMEXIT_ABORT:
 			abort();
 		default:
 			exit(1);
 		}
 	}
 	fprintf(stderr, "vm_run error %d, errno %d\n", error, errno);
 }
 
 static int
 num_vcpus_allowed(struct vmctx *ctx)
 {
 	int tmp, error;
 
 	error = vm_get_capability(ctx, BSP, VM_CAP_UNRESTRICTED_GUEST, &tmp);
 
 	/*
 	 * The guest is allowed to spinup more than one processor only if the
 	 * UNRESTRICTED_GUEST capability is available.
 	 */
 	if (error == 0)
 		return (VM_MAXCPU);
 	else
 		return (1);
 }
 
 void
 fbsdrun_set_capabilities(struct vmctx *ctx, int cpu)
 {
 	int err, tmp;
 
 	if (fbsdrun_vmexit_on_hlt()) {
 		err = vm_get_capability(ctx, cpu, VM_CAP_HALT_EXIT, &tmp);
 		if (err < 0) {
 			fprintf(stderr, "VM exit on HLT not supported\n");
 			exit(1);
 		}
 		vm_set_capability(ctx, cpu, VM_CAP_HALT_EXIT, 1);
 		if (cpu == BSP)
 			handler[VM_EXITCODE_HLT] = vmexit_hlt;
 	}
 
         if (fbsdrun_vmexit_on_pause()) {
 		/*
 		 * pause exit support required for this mode
 		 */
 		err = vm_get_capability(ctx, cpu, VM_CAP_PAUSE_EXIT, &tmp);
 		if (err < 0) {
 			fprintf(stderr,
 			    "SMP mux requested, no pause support\n");
 			exit(1);
 		}
 		vm_set_capability(ctx, cpu, VM_CAP_PAUSE_EXIT, 1);
 		if (cpu == BSP)
 			handler[VM_EXITCODE_PAUSE] = vmexit_pause;
         }
 
 	if (x2apic_mode)
 		err = vm_set_x2apic_state(ctx, cpu, X2APIC_ENABLED);
 	else
 		err = vm_set_x2apic_state(ctx, cpu, X2APIC_DISABLED);
 
 	if (err) {
 		fprintf(stderr, "Unable to set x2apic state (%d)\n", err);
 		exit(1);
 	}
 
 	vm_set_capability(ctx, cpu, VM_CAP_ENABLE_INVPCID, 1);
 }
 
 static struct vmctx *
 do_open(const char *vmname)
 {
 	struct vmctx *ctx;
 	int error;
 	bool reinit, romboot;
 #ifndef WITHOUT_CAPSICUM
 	cap_rights_t rights;
 	const cap_ioctl_t *cmds;	
 	size_t ncmds;
 #endif
 
 	reinit = romboot = false;
 
 	if (lpc_bootrom())
 		romboot = true;
 
 	error = vm_create(vmname);
 	if (error) {
 		if (errno == EEXIST) {
 			if (romboot) {
 				reinit = true;
 			} else {
 				/*
 				 * The virtual machine has been setup by the
 				 * userspace bootloader.
 				 */
 			}
 		} else {
 			perror("vm_create");
 			exit(1);
 		}
 	} else {
 		if (!romboot) {
 			/*
 			 * If the virtual machine was just created then a
 			 * bootrom must be configured to boot it.
 			 */
 			fprintf(stderr, "virtual machine cannot be booted\n");
 			exit(1);
 		}
 	}
 
 	ctx = vm_open(vmname);
 	if (ctx == NULL) {
 		perror("vm_open");
 		exit(1);
 	}
 
 #ifndef WITHOUT_CAPSICUM
 	cap_rights_init(&rights, CAP_IOCTL, CAP_MMAP_RW);
 	if (cap_rights_limit(vm_get_device_fd(ctx), &rights) == -1 &&
 	    errno != ENOSYS)
 		errx(EX_OSERR, "Unable to apply rights for sandbox");
 	vm_get_ioctls(&ncmds);
 	cmds = vm_get_ioctls(NULL);
 	if (cmds == NULL)
 		errx(EX_OSERR, "out of memory");
 	if (cap_ioctls_limit(vm_get_device_fd(ctx), cmds, ncmds) == -1 &&
 	    errno != ENOSYS)
 		errx(EX_OSERR, "Unable to apply rights for sandbox");
 	free((cap_ioctl_t *)cmds);
 #endif
  
 	if (reinit) {
 		error = vm_reinit(ctx);
 		if (error) {
 			perror("vm_reinit");
 			exit(1);
 		}
 	}
+	error = vm_set_topology(ctx, sockets, cores, threads, maxcpus);
+	if (error)
+		errx(EX_OSERR, "vm_set_topology");
 	return (ctx);
 }
 
 int
 main(int argc, char *argv[])
 {
 	int c, error, gdb_port, err, bvmcons;
 	int max_vcpus, mptgen, memflags;
 	int rtc_localtime;
 	struct vmctx *ctx;
 	uint64_t rip;
 	size_t memsize;
 	char *optstr;
 
 	bvmcons = 0;
 	progname = basename(argv[0]);
 	gdb_port = 0;
 	guest_ncpus = 1;
+	sockets = cores = threads = 1;
+	maxcpus = 0;
 	memsize = 256 * MB;
 	mptgen = 1;
 	rtc_localtime = 1;
 	memflags = 0;
 
 	optstr = "abehuwxACHIPSWYp:g:c:s:m:l:U:";
 	while ((c = getopt(argc, argv, optstr)) != -1) {
 		switch (c) {
 		case 'a':
 			x2apic_mode = 0;
 			break;
 		case 'A':
 			acpi = 1;
 			break;
 		case 'b':
 			bvmcons = 1;
 			break;
 		case 'p':
                         if (pincpu_parse(optarg) != 0) {
                             errx(EX_USAGE, "invalid vcpu pinning "
                                  "configuration '%s'", optarg);
                         }
 			break;
                 case 'c':
-			guest_ncpus = atoi(optarg);
+			if (topology_parse(optarg) != 0) {
+			    errx(EX_USAGE, "invalid cpu topology "
+				"'%s'", optarg);
+			}
 			break;
 		case 'C':
 			memflags |= VM_MEM_F_INCORE;
 			break;
 		case 'g':
 			gdb_port = atoi(optarg);
 			break;
 		case 'l':
 			if (lpc_device_parse(optarg) != 0) {
 				errx(EX_USAGE, "invalid lpc device "
 				    "configuration '%s'", optarg);
 			}
 			break;
 		case 's':
 			if (pci_parse_slot(optarg) != 0)
 				exit(1);
 			else
 				break;
 		case 'S':
 			memflags |= VM_MEM_F_WIRED;
 			break;
                 case 'm':
 			error = vm_parse_memsize(optarg, &memsize);
 			if (error)
 				errx(EX_USAGE, "invalid memsize '%s'", optarg);
 			break;
 		case 'H':
 			guest_vmexit_on_hlt = 1;
 			break;
 		case 'I':
 			/*
 			 * The "-I" option was used to add an ioapic to the
 			 * virtual machine.
 			 *
 			 * An ioapic is now provided unconditionally for each
 			 * virtual machine and this option is now deprecated.
 			 */
 			break;
 		case 'P':
 			guest_vmexit_on_pause = 1;
 			break;
 		case 'e':
 			strictio = 1;
 			break;
 		case 'u':
 			rtc_localtime = 0;
 			break;
 		case 'U':
 			guest_uuid_str = optarg;
 			break;
 		case 'w':
 			strictmsr = 0;
 			break;
 		case 'W':
 			virtio_msix = 0;
 			break;
 		case 'x':
 			x2apic_mode = 1;
 			break;
 		case 'Y':
 			mptgen = 0;
 			break;
 		case 'h':
 			usage(0);			
 		default:
 			usage(1);
 		}
 	}
 	argc -= optind;
 	argv += optind;
 
 	if (argc != 1)
 		usage(1);
 
 	vmname = argv[0];
 	ctx = do_open(vmname);
-
-	if (guest_ncpus < 1) {
-		fprintf(stderr, "Invalid guest vCPUs (%d)\n", guest_ncpus);
-		exit(1);
-	}
 
 	max_vcpus = num_vcpus_allowed(ctx);
 	if (guest_ncpus > max_vcpus) {
 		fprintf(stderr, "%d vCPUs requested but only %d available\n",
 			guest_ncpus, max_vcpus);
 		exit(1);
 	}
 
 	fbsdrun_set_capabilities(ctx, BSP);
 
 	vm_set_memflags(ctx, memflags);
 	err = vm_setup_memory(ctx, memsize, VM_MMAP_ALL);
 	if (err) {
 		fprintf(stderr, "Unable to setup memory (%d)\n", errno);
 		exit(1);
 	}
 
 	error = init_msr();
 	if (error) {
 		fprintf(stderr, "init_msr error %d", error);
 		exit(1);
 	}
 
 	init_mem();
 	init_inout();
 	atkbdc_init(ctx);
 	pci_irq_init(ctx);
 	ioapic_init(ctx);
 
 	rtc_init(ctx, rtc_localtime);
 	sci_init(ctx);
 
 	/*
 	 * Exit if a device emulation finds an error in its initilization
 	 */
 	if (init_pci(ctx) != 0)
 		exit(1);
 
 	if (gdb_port != 0)
 		init_dbgport(gdb_port);
 
 	if (bvmcons)
 		init_bvmcons();
 
 	if (lpc_bootrom()) {
 		if (vm_set_capability(ctx, BSP, VM_CAP_UNRESTRICTED_GUEST, 1)) {
 			fprintf(stderr, "ROM boot failed: unrestricted guest "
 			    "capability not available\n");
 			exit(1);
 		}
 		error = vcpu_reset(ctx, BSP);
 		assert(error == 0);
 	}
 
 	error = vm_get_register(ctx, BSP, VM_REG_GUEST_RIP, &rip);
 	assert(error == 0);
 
 	/*
 	 * build the guest tables, MP etc.
 	 */
 	if (mptgen) {
 		error = mptable_build(ctx, guest_ncpus);
 		if (error)
 			exit(1);
 	}
 
 	error = smbios_build(ctx);
 	assert(error == 0);
 
 	if (acpi) {
 		error = acpi_build(ctx, guest_ncpus);
 		assert(error == 0);
 	}
 
 	if (lpc_bootrom())
 		fwctl_init();
 
 #ifndef WITHOUT_CAPSICUM
 	caph_cache_catpages();
 
 	if (caph_limit_stdout() == -1 || caph_limit_stderr() == -1)
 		errx(EX_OSERR, "Unable to apply rights for sandbox");
 
 	if (cap_enter() == -1 && errno != ENOSYS)
 		errx(EX_OSERR, "cap_enter() failed");
 #endif
 
 	/*
 	 * Change the proc title to include the VM name.
 	 */
 	setproctitle("%s", vmname); 
 	
 	/*
 	 * Add CPU 0
 	 */
 	fbsdrun_addcpu(ctx, BSP, BSP, rip);
 
 	/*
 	 * Head off to the main event dispatch loop
 	 */
 	mevent_dispatch();
 
 	exit(1);
 }
Index: head/usr.sbin/bhyvectl/bhyvectl.c
===================================================================
--- head/usr.sbin/bhyvectl/bhyvectl.c	(revision 332297)
+++ head/usr.sbin/bhyvectl/bhyvectl.c	(revision 332298)
@@ -1,2337 +1,2348 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2011 NetApp, Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/types.h>
 #include <sys/sysctl.h>
 #include <sys/errno.h>
 #include <sys/mman.h>
 #include <sys/cpuset.h>
 
 #include <stdio.h>
 #include <stdlib.h>
 #include <stdbool.h>
 #include <string.h>
 #include <unistd.h>
 #include <libgen.h>
 #include <libutil.h>
 #include <fcntl.h>
 #include <getopt.h>
 #include <time.h>
 #include <assert.h>
 #include <libutil.h>
 
 #include <machine/cpufunc.h>
 #include <machine/specialreg.h>
 #include <machine/vmm.h>
 #include <machine/vmm_dev.h>
 #include <vmmapi.h>
 
 #include "amd/vmcb.h"
 #include "intel/vmcs.h"
 
 #define	MB	(1UL << 20)
 #define	GB	(1UL << 30)
 
 #define	REQ_ARG		required_argument
 #define	NO_ARG		no_argument
 #define	OPT_ARG		optional_argument
 
 static const char *progname;
 
 static void
 usage(bool cpu_intel)
 {
 
 	(void)fprintf(stderr,
 	"Usage: %s --vm=<vmname>\n"
 	"       [--cpu=<vcpu_number>]\n"
 	"       [--create]\n"
 	"       [--destroy]\n"
 	"       [--get-all]\n"
 	"       [--get-stats]\n"
 	"       [--set-desc-ds]\n"
 	"       [--get-desc-ds]\n"
 	"       [--set-desc-es]\n"
 	"       [--get-desc-es]\n"
 	"       [--set-desc-gs]\n"
 	"       [--get-desc-gs]\n"
 	"       [--set-desc-fs]\n"
 	"       [--get-desc-fs]\n"
 	"       [--set-desc-cs]\n"
 	"       [--get-desc-cs]\n"
 	"       [--set-desc-ss]\n"
 	"       [--get-desc-ss]\n"
 	"       [--set-desc-tr]\n"
 	"       [--get-desc-tr]\n"
 	"       [--set-desc-ldtr]\n"
 	"       [--get-desc-ldtr]\n"
 	"       [--set-desc-gdtr]\n"
 	"       [--get-desc-gdtr]\n"
 	"       [--set-desc-idtr]\n"
 	"       [--get-desc-idtr]\n"
 	"       [--run]\n"
 	"       [--capname=<capname>]\n"
 	"       [--getcap]\n"
 	"       [--setcap=<0|1>]\n"
 	"       [--desc-base=<BASE>]\n"
 	"       [--desc-limit=<LIMIT>]\n"
 	"       [--desc-access=<ACCESS>]\n"
 	"       [--set-cr0=<CR0>]\n"
 	"       [--get-cr0]\n"
 	"       [--set-cr2=<CR2>]\n"
 	"       [--get-cr2]\n"
 	"       [--set-cr3=<CR3>]\n"
 	"       [--get-cr3]\n"
 	"       [--set-cr4=<CR4>]\n"
 	"       [--get-cr4]\n"
 	"       [--set-dr0=<DR0>]\n"
 	"       [--get-dr0]\n"
 	"       [--set-dr1=<DR1>]\n"
 	"       [--get-dr1]\n"
 	"       [--set-dr2=<DR2>]\n"
 	"       [--get-dr2]\n"
 	"       [--set-dr3=<DR3>]\n"
 	"       [--get-dr3]\n"
 	"       [--set-dr6=<DR6>]\n"
 	"       [--get-dr6]\n"
 	"       [--set-dr7=<DR7>]\n"
 	"       [--get-dr7]\n"
 	"       [--set-rsp=<RSP>]\n"
 	"       [--get-rsp]\n"
 	"       [--set-rip=<RIP>]\n"
 	"       [--get-rip]\n"
 	"       [--get-rax]\n"
 	"       [--set-rax=<RAX>]\n"
 	"       [--get-rbx]\n"
 	"       [--get-rcx]\n"
 	"       [--get-rdx]\n"
 	"       [--get-rsi]\n"
 	"       [--get-rdi]\n"
 	"       [--get-rbp]\n"
 	"       [--get-r8]\n"
 	"       [--get-r9]\n"
 	"       [--get-r10]\n"
 	"       [--get-r11]\n"
 	"       [--get-r12]\n"
 	"       [--get-r13]\n"
 	"       [--get-r14]\n"
 	"       [--get-r15]\n"
 	"       [--set-rflags=<RFLAGS>]\n"
 	"       [--get-rflags]\n"
 	"       [--set-cs]\n"
 	"       [--get-cs]\n"
 	"       [--set-ds]\n"
 	"       [--get-ds]\n"
 	"       [--set-es]\n"
 	"       [--get-es]\n"
 	"       [--set-fs]\n"
 	"       [--get-fs]\n"
 	"       [--set-gs]\n"
 	"       [--get-gs]\n"
 	"       [--set-ss]\n"
 	"       [--get-ss]\n"
 	"       [--get-tr]\n"
 	"       [--get-ldtr]\n"
 	"       [--set-x2apic-state=<state>]\n"
 	"       [--get-x2apic-state]\n"
 	"       [--unassign-pptdev=<bus/slot/func>]\n"
 	"       [--set-mem=<memory in units of MB>]\n"
 	"       [--get-lowmem]\n"
 	"       [--get-highmem]\n"
 	"       [--get-gpa-pmap]\n"
 	"       [--assert-lapic-lvt=<pin>]\n"
 	"       [--inject-nmi]\n"
 	"       [--force-reset]\n"
 	"       [--force-poweroff]\n"
 	"       [--get-rtc-time]\n"
 	"       [--set-rtc-time=<secs>]\n"
 	"       [--get-rtc-nvram]\n"
 	"       [--set-rtc-nvram=<val>]\n"
 	"       [--rtc-nvram-offset=<offset>]\n"
 	"       [--get-active-cpus]\n"
 	"       [--get-suspended-cpus]\n"
 	"       [--get-intinfo]\n"
 	"       [--get-eptp]\n"
 	"       [--set-exception-bitmap]\n"
 	"       [--get-exception-bitmap]\n"
 	"       [--get-tsc-offset]\n"
 	"       [--get-guest-pat]\n"
 	"       [--get-io-bitmap-address]\n"
 	"       [--get-msr-bitmap]\n"
 	"       [--get-msr-bitmap-address]\n"
 	"       [--get-guest-sysenter]\n"
-	"       [--get-exit-reason]\n",
+	"       [--get-exit-reason]\n"
+	"       [--get-cpu-topology]\n",
 	progname);
 
 	if (cpu_intel) {
 		(void)fprintf(stderr,
 		"       [--get-vmcs-pinbased-ctls]\n"
 		"       [--get-vmcs-procbased-ctls]\n"
 		"       [--get-vmcs-procbased-ctls2]\n"
 		"       [--get-vmcs-entry-interruption-info]\n"
 		"       [--set-vmcs-entry-interruption-info=<info>]\n"
 		"       [--get-vmcs-guest-physical-address\n"
 		"       [--get-vmcs-guest-linear-address\n"
 		"       [--get-vmcs-host-pat]\n"
 		"       [--get-vmcs-host-cr0]\n"
 		"       [--get-vmcs-host-cr3]\n"
 		"       [--get-vmcs-host-cr4]\n"
 		"       [--get-vmcs-host-rip]\n"
 		"       [--get-vmcs-host-rsp]\n"
 		"       [--get-vmcs-cr0-mask]\n"
 		"       [--get-vmcs-cr0-shadow]\n"
 		"       [--get-vmcs-cr4-mask]\n"
 		"       [--get-vmcs-cr4-shadow]\n"
 		"       [--get-vmcs-cr3-targets]\n"
 		"       [--get-vmcs-apic-access-address]\n"
 		"       [--get-vmcs-virtual-apic-address]\n"
 		"       [--get-vmcs-tpr-threshold]\n"
 		"       [--get-vmcs-vpid]\n"
 		"       [--get-vmcs-instruction-error]\n"
 		"       [--get-vmcs-exit-ctls]\n"
 		"       [--get-vmcs-entry-ctls]\n"
 		"       [--get-vmcs-link]\n"
 		"       [--get-vmcs-exit-qualification]\n"
 		"       [--get-vmcs-exit-interruption-info]\n"
 		"       [--get-vmcs-exit-interruption-error]\n"
 		"       [--get-vmcs-interruptibility]\n"
 		);
 	} else {
 		(void)fprintf(stderr,
 		"       [--get-vmcb-intercepts]\n"
 		"       [--get-vmcb-asid]\n"
 		"       [--get-vmcb-exit-details]\n"
 		"       [--get-vmcb-tlb-ctrl]\n"
 		"       [--get-vmcb-virq]\n"
 		"       [--get-avic-apic-bar]\n"
 		"       [--get-avic-backing-page]\n"
 		"       [--get-avic-table]\n"
 		);
 	}
 	exit(1);
 }
 
 static int get_rtc_time, set_rtc_time;
 static int get_rtc_nvram, set_rtc_nvram;
 static int rtc_nvram_offset;
 static uint8_t rtc_nvram_value;
 static time_t rtc_secs;
 
 static int get_stats, getcap, setcap, capval, get_gpa_pmap;
 static int inject_nmi, assert_lapic_lvt;
 static int force_reset, force_poweroff;
 static const char *capname;
 static int create, destroy, get_memmap, get_memseg;
 static int get_intinfo;
 static int get_active_cpus, get_suspended_cpus;
 static uint64_t memsize;
 static int set_cr0, get_cr0, set_cr2, get_cr2, set_cr3, get_cr3;
 static int set_cr4, get_cr4;
 static int set_efer, get_efer;
 static int set_dr0, get_dr0;
 static int set_dr1, get_dr1;
 static int set_dr2, get_dr2;
 static int set_dr3, get_dr3;
 static int set_dr6, get_dr6;
 static int set_dr7, get_dr7;
 static int set_rsp, get_rsp, set_rip, get_rip, set_rflags, get_rflags;
 static int set_rax, get_rax;
 static int get_rbx, get_rcx, get_rdx, get_rsi, get_rdi, get_rbp;
 static int get_r8, get_r9, get_r10, get_r11, get_r12, get_r13, get_r14, get_r15;
 static int set_desc_ds, get_desc_ds;
 static int set_desc_es, get_desc_es;
 static int set_desc_fs, get_desc_fs;
 static int set_desc_gs, get_desc_gs;
 static int set_desc_cs, get_desc_cs;
 static int set_desc_ss, get_desc_ss;
 static int set_desc_gdtr, get_desc_gdtr;
 static int set_desc_idtr, get_desc_idtr;
 static int set_desc_tr, get_desc_tr;
 static int set_desc_ldtr, get_desc_ldtr;
 static int set_cs, set_ds, set_es, set_fs, set_gs, set_ss, set_tr, set_ldtr;
 static int get_cs, get_ds, get_es, get_fs, get_gs, get_ss, get_tr, get_ldtr;
 static int set_x2apic_state, get_x2apic_state;
 enum x2apic_state x2apic_state;
 static int unassign_pptdev, bus, slot, func;
 static int run;
+static int get_cpu_topology;
 
 /*
  * VMCB specific.
  */
 static int get_vmcb_intercept, get_vmcb_exit_details, get_vmcb_tlb_ctrl;
 static int get_vmcb_virq, get_avic_table;
 
 /*
  * VMCS-specific fields
  */
 static int get_pinbased_ctls, get_procbased_ctls, get_procbased_ctls2;
 static int get_eptp, get_io_bitmap, get_tsc_offset;
 static int get_vmcs_entry_interruption_info, set_vmcs_entry_interruption_info;
 static int get_vmcs_interruptibility;
 uint32_t vmcs_entry_interruption_info;
 static int get_vmcs_gpa, get_vmcs_gla;
 static int get_exception_bitmap, set_exception_bitmap, exception_bitmap;
 static int get_cr0_mask, get_cr0_shadow;
 static int get_cr4_mask, get_cr4_shadow;
 static int get_cr3_targets;
 static int get_apic_access_addr, get_virtual_apic_addr, get_tpr_threshold;
 static int get_msr_bitmap, get_msr_bitmap_address;
 static int get_vpid_asid;
 static int get_inst_err, get_exit_ctls, get_entry_ctls;
 static int get_host_cr0, get_host_cr3, get_host_cr4;
 static int get_host_rip, get_host_rsp;
 static int get_guest_pat, get_host_pat;
 static int get_guest_sysenter, get_vmcs_link;
 static int get_exit_reason, get_vmcs_exit_qualification;
 static int get_vmcs_exit_interruption_info, get_vmcs_exit_interruption_error;
 static int get_vmcs_exit_inst_length;
 
 static uint64_t desc_base;
 static uint32_t desc_limit, desc_access;
 
 static int get_all;
 
 static void
 dump_vm_run_exitcode(struct vm_exit *vmexit, int vcpu)
 {
 	printf("vm exit[%d]\n", vcpu);
 	printf("\trip\t\t0x%016lx\n", vmexit->rip);
 	printf("\tinst_length\t%d\n", vmexit->inst_length);
 	switch (vmexit->exitcode) {
 	case VM_EXITCODE_INOUT:
 		printf("\treason\t\tINOUT\n");
 		printf("\tdirection\t%s\n", vmexit->u.inout.in ? "IN" : "OUT");
 		printf("\tbytes\t\t%d\n", vmexit->u.inout.bytes);
 		printf("\tflags\t\t%s%s\n",
 			vmexit->u.inout.string ? "STRING " : "",
 			vmexit->u.inout.rep ? "REP " : "");
 		printf("\tport\t\t0x%04x\n", vmexit->u.inout.port);
 		printf("\teax\t\t0x%08x\n", vmexit->u.inout.eax);
 		break;
 	case VM_EXITCODE_VMX:
 		printf("\treason\t\tVMX\n");
 		printf("\tstatus\t\t%d\n", vmexit->u.vmx.status);
 		printf("\texit_reason\t0x%08x (%u)\n",
 		    vmexit->u.vmx.exit_reason, vmexit->u.vmx.exit_reason);
 		printf("\tqualification\t0x%016lx\n",
 			vmexit->u.vmx.exit_qualification);
 		printf("\tinst_type\t\t%d\n", vmexit->u.vmx.inst_type);
 		printf("\tinst_error\t\t%d\n", vmexit->u.vmx.inst_error);
 		break;
 	case VM_EXITCODE_SVM:
 		printf("\treason\t\tSVM\n");
 		printf("\texit_reason\t\t%#lx\n", vmexit->u.svm.exitcode);
 		printf("\texitinfo1\t\t%#lx\n", vmexit->u.svm.exitinfo1);
 		printf("\texitinfo2\t\t%#lx\n", vmexit->u.svm.exitinfo2);
 		break;
 	default:
 		printf("*** unknown vm run exitcode %d\n", vmexit->exitcode);
 		break;
 	}
 }
 
 /* AMD 6th generation and Intel compatible MSRs */
 #define MSR_AMD6TH_START	0xC0000000
 #define MSR_AMD6TH_END		0xC0001FFF
 /* AMD 7th and 8th generation compatible MSRs */
 #define MSR_AMD7TH_START	0xC0010000
 #define MSR_AMD7TH_END		0xC0011FFF
 
 static const char *
 msr_name(uint32_t msr)
 {
 	static char buf[32];
 
 	switch(msr) {
 	case MSR_TSC:
 		return ("MSR_TSC");
 	case MSR_EFER:
 		return ("MSR_EFER");
 	case MSR_STAR:
 		return ("MSR_STAR");
 	case MSR_LSTAR:	
 		return ("MSR_LSTAR");
 	case MSR_CSTAR:
 		return ("MSR_CSTAR");
 	case MSR_SF_MASK:
 		return ("MSR_SF_MASK");
 	case MSR_FSBASE:
 		return ("MSR_FSBASE");
 	case MSR_GSBASE:
 		return ("MSR_GSBASE");
 	case MSR_KGSBASE:
 		return ("MSR_KGSBASE");
 	case MSR_SYSENTER_CS_MSR:
 		return ("MSR_SYSENTER_CS_MSR");
 	case MSR_SYSENTER_ESP_MSR:
 		return ("MSR_SYSENTER_ESP_MSR");
 	case MSR_SYSENTER_EIP_MSR:
 		return ("MSR_SYSENTER_EIP_MSR");
 	case MSR_PAT:
 		return ("MSR_PAT");
 	}
 	snprintf(buf, sizeof(buf), "MSR       %#08x", msr);
 
 	return (buf);
 }
 
 static inline void
 print_msr_pm(uint64_t msr, int vcpu, int readable, int writeable)
 {
 
 	if (readable || writeable) {
 		printf("%-20s[%d]\t\t%c%c\n", msr_name(msr), vcpu,
 			readable ? 'R' : '-', writeable ? 'W' : '-');
 	}
 }
 
 /*
  * Reference APM vol2, section 15.11 MSR Intercepts.
  */
 static void
 dump_amd_msr_pm(const char *bitmap, int vcpu)
 {
 	int byte, bit, readable, writeable;
 	uint32_t msr;
 
 	for (msr = 0; msr < 0x2000; msr++) {
 		byte = msr / 4;
 		bit = (msr % 4) * 2;
 
 		/* Look at MSRs in the range 0x00000000 to 0x00001FFF */
 		readable = (bitmap[byte] & (1 << bit)) ? 0 : 1;
 		writeable = (bitmap[byte] & (2 << bit)) ?  0 : 1;
 		print_msr_pm(msr, vcpu, readable, writeable);
 
 		/* Look at MSRs in the range 0xC0000000 to 0xC0001FFF */
 		byte += 2048;
 		readable = (bitmap[byte] & (1 << bit)) ? 0 : 1;
 		writeable = (bitmap[byte] & (2 << bit)) ?  0 : 1;
 		print_msr_pm(msr + MSR_AMD6TH_START, vcpu, readable,
 				writeable);
 		
 		/* MSR 0xC0010000 to 0xC0011FF is only for AMD */
 		byte += 4096;
 		readable = (bitmap[byte] & (1 << bit)) ? 0 : 1;
 		writeable = (bitmap[byte] & (2 << bit)) ?  0 : 1;
 		print_msr_pm(msr + MSR_AMD7TH_START, vcpu, readable,
 				writeable);
 	}
 }
 
 /*
  * Reference Intel SDM Vol3 Section 24.6.9 MSR-Bitmap Address
  */
 static void
 dump_intel_msr_pm(const char *bitmap, int vcpu)
 {
 	int byte, bit, readable, writeable;
 	uint32_t msr;
 
 	for (msr = 0; msr < 0x2000; msr++) {
 		byte = msr / 8;
 		bit = msr & 0x7;
 
 		/* Look at MSRs in the range 0x00000000 to 0x00001FFF */
 		readable = (bitmap[byte] & (1 << bit)) ? 0 : 1;
 		writeable = (bitmap[2048 + byte] & (1 << bit)) ?  0 : 1;
 		print_msr_pm(msr, vcpu, readable, writeable);
 
 		/* Look at MSRs in the range 0xC0000000 to 0xC0001FFF */
 		byte += 1024;
 		readable = (bitmap[byte] & (1 << bit)) ? 0 : 1;
 		writeable = (bitmap[2048 + byte] & (1 << bit)) ?  0 : 1;
 		print_msr_pm(msr + MSR_AMD6TH_START, vcpu, readable,
 				writeable);
 	}
 }
 
 static int
 dump_msr_bitmap(int vcpu, uint64_t addr, bool cpu_intel)
 {
 	int error, fd, map_size;
 	const char *bitmap;
 
 	error = -1;
 	bitmap = MAP_FAILED;
 
 	fd = open("/dev/mem", O_RDONLY, 0);
 	if (fd < 0) {
 		perror("Couldn't open /dev/mem");
 		goto done;
 	}
 
 	if (cpu_intel)
 		map_size = PAGE_SIZE;
 	else
 		map_size = 2 * PAGE_SIZE;
 
 	bitmap = mmap(NULL, map_size, PROT_READ, MAP_SHARED, fd, addr);
 	if (bitmap == MAP_FAILED) {
 		perror("mmap failed");
 		goto done;
 	}
 	
 	if (cpu_intel)
 		dump_intel_msr_pm(bitmap, vcpu);
 	else	
 		dump_amd_msr_pm(bitmap, vcpu);
 
 	error = 0;
 done:
 	if (bitmap != MAP_FAILED)
 		munmap((void *)bitmap, map_size);
 	if (fd >= 0)
 		close(fd);
 
 	return (error);
 }
 
 static int
 vm_get_vmcs_field(struct vmctx *ctx, int vcpu, int field, uint64_t *ret_val)
 {
 
 	return (vm_get_register(ctx, vcpu, VMCS_IDENT(field), ret_val));
 }
 
 static int
 vm_set_vmcs_field(struct vmctx *ctx, int vcpu, int field, uint64_t val)
 {
 
 	return (vm_set_register(ctx, vcpu, VMCS_IDENT(field), val));
 }
 
 static int
 vm_get_vmcb_field(struct vmctx *ctx, int vcpu, int off, int bytes,
 	uint64_t *ret_val)
 {
 
 	return (vm_get_register(ctx, vcpu, VMCB_ACCESS(off, bytes), ret_val));
 }
 
 static int
 vm_set_vmcb_field(struct vmctx *ctx, int vcpu, int off, int bytes,
 	uint64_t val)
 {
 	
 	return (vm_set_register(ctx, vcpu, VMCB_ACCESS(off, bytes), val));
 }
 
 enum {
 	VMNAME = 1000,	/* avoid collision with return values from getopt */
 	VCPU,
 	SET_MEM,
 	SET_EFER,
 	SET_CR0,
 	SET_CR2,
 	SET_CR3,
 	SET_CR4,
 	SET_DR0,
 	SET_DR1,
 	SET_DR2,
 	SET_DR3,
 	SET_DR6,
 	SET_DR7,
 	SET_RSP,
 	SET_RIP,
 	SET_RAX,
 	SET_RFLAGS,
 	DESC_BASE,
 	DESC_LIMIT,
 	DESC_ACCESS,
 	SET_CS,
 	SET_DS,
 	SET_ES,
 	SET_FS,
 	SET_GS,
 	SET_SS,
 	SET_TR,
 	SET_LDTR,
 	SET_X2APIC_STATE,
 	SET_EXCEPTION_BITMAP,
 	SET_VMCS_ENTRY_INTERRUPTION_INFO,
 	SET_CAP,
 	CAPNAME,
 	UNASSIGN_PPTDEV,
 	GET_GPA_PMAP,
 	ASSERT_LAPIC_LVT,
 	SET_RTC_TIME,
 	SET_RTC_NVRAM,
 	RTC_NVRAM_OFFSET,
 };
 
 static void
 print_cpus(const char *banner, const cpuset_t *cpus)
 {
 	int i, first;
 
 	first = 1;
 	printf("%s:\t", banner);
 	if (!CPU_EMPTY(cpus)) {
 		for (i = 0; i < CPU_SETSIZE; i++) {
 			if (CPU_ISSET(i, cpus)) {
 				printf("%s%d", first ? " " : ", ", i);
 				first = 0;
 			}
 		}
 	} else
 		printf(" (none)");
 	printf("\n");
 }
 
 static void
 print_intinfo(const char *banner, uint64_t info)
 {
 	int type;
 
 	printf("%s:\t", banner);
 	if (info & VM_INTINFO_VALID) {
 		type = info & VM_INTINFO_TYPE;
 		switch (type) {
 		case VM_INTINFO_HWINTR:
 			printf("extint");
 			break;
 		case VM_INTINFO_NMI:
 			printf("nmi");
 			break;
 		case VM_INTINFO_SWINTR:
 			printf("swint");
 			break;
 		default:
 			printf("exception");
 			break;
 		}
 		printf(" vector %d", (int)VM_INTINFO_VECTOR(info));
 		if (info & VM_INTINFO_DEL_ERRCODE)
 			printf(" errcode %#x", (u_int)(info >> 32));
 	} else {
 		printf("n/a");
 	}
 	printf("\n");
 }
 
 static bool
 cpu_vendor_intel(void)
 {
 	u_int regs[4];
 	char cpu_vendor[13];
 
 	do_cpuid(0, regs);
 	((u_int *)&cpu_vendor)[0] = regs[1];
 	((u_int *)&cpu_vendor)[1] = regs[3];
 	((u_int *)&cpu_vendor)[2] = regs[2];
 	cpu_vendor[12] = '\0';
 
 	if (strcmp(cpu_vendor, "AuthenticAMD") == 0) {
 		return (false);
 	} else if (strcmp(cpu_vendor, "GenuineIntel") == 0) {
 		return (true);
 	} else {
 		fprintf(stderr, "Unknown cpu vendor \"%s\"\n", cpu_vendor);
 		exit(1);
 	}
 }
 
 static int
 get_all_registers(struct vmctx *ctx, int vcpu)
 {
 	uint64_t cr0, cr2, cr3, cr4, dr0, dr1, dr2, dr3, dr6, dr7;
 	uint64_t rsp, rip, rflags, efer;
 	uint64_t rax, rbx, rcx, rdx, rsi, rdi, rbp;
 	uint64_t r8, r9, r10, r11, r12, r13, r14, r15;
 	int error = 0;
 
 	if (!error && (get_efer || get_all)) {
 		error = vm_get_register(ctx, vcpu, VM_REG_GUEST_EFER, &efer);
 		if (error == 0)
 			printf("efer[%d]\t\t0x%016lx\n", vcpu, efer);
 	}
 
 	if (!error && (get_cr0 || get_all)) {
 		error = vm_get_register(ctx, vcpu, VM_REG_GUEST_CR0, &cr0);
 		if (error == 0)
 			printf("cr0[%d]\t\t0x%016lx\n", vcpu, cr0);
 	}
 
 	if (!error && (get_cr2 || get_all)) {
 		error = vm_get_register(ctx, vcpu, VM_REG_GUEST_CR2, &cr2);
 		if (error == 0)
 			printf("cr2[%d]\t\t0x%016lx\n", vcpu, cr2);
 	}
 
 	if (!error && (get_cr3 || get_all)) {
 		error = vm_get_register(ctx, vcpu, VM_REG_GUEST_CR3, &cr3);
 		if (error == 0)
 			printf("cr3[%d]\t\t0x%016lx\n", vcpu, cr3);
 	}
 
 	if (!error && (get_cr4 || get_all)) {
 		error = vm_get_register(ctx, vcpu, VM_REG_GUEST_CR4, &cr4);
 		if (error == 0)
 			printf("cr4[%d]\t\t0x%016lx\n", vcpu, cr4);
 	}
 
 	if (!error && (get_dr0 || get_all)) {
 		error = vm_get_register(ctx, vcpu, VM_REG_GUEST_DR0, &dr0);
 		if (error == 0)
 			printf("dr0[%d]\t\t0x%016lx\n", vcpu, dr0);
 	}
 
 	if (!error && (get_dr1 || get_all)) {
 		error = vm_get_register(ctx, vcpu, VM_REG_GUEST_DR1, &dr1);
 		if (error == 0)
 			printf("dr1[%d]\t\t0x%016lx\n", vcpu, dr1);
 	}
 
 	if (!error && (get_dr2 || get_all)) {
 		error = vm_get_register(ctx, vcpu, VM_REG_GUEST_DR2, &dr2);
 		if (error == 0)
 			printf("dr2[%d]\t\t0x%016lx\n", vcpu, dr2);
 	}
 
 	if (!error && (get_dr3 || get_all)) {
 		error = vm_get_register(ctx, vcpu, VM_REG_GUEST_DR3, &dr3);
 		if (error == 0)
 			printf("dr3[%d]\t\t0x%016lx\n", vcpu, dr3);
 	}
 
 	if (!error && (get_dr6 || get_all)) {
 		error = vm_get_register(ctx, vcpu, VM_REG_GUEST_DR6, &dr6);
 		if (error == 0)
 			printf("dr6[%d]\t\t0x%016lx\n", vcpu, dr6);
 	}
 
 	if (!error && (get_dr7 || get_all)) {
 		error = vm_get_register(ctx, vcpu, VM_REG_GUEST_DR7, &dr7);
 		if (error == 0)
 			printf("dr7[%d]\t\t0x%016lx\n", vcpu, dr7);
 	}
 
 	if (!error && (get_rsp || get_all)) {
 		error = vm_get_register(ctx, vcpu, VM_REG_GUEST_RSP, &rsp);
 		if (error == 0)
 			printf("rsp[%d]\t\t0x%016lx\n", vcpu, rsp);
 	}
 
 	if (!error && (get_rip || get_all)) {
 		error = vm_get_register(ctx, vcpu, VM_REG_GUEST_RIP, &rip);
 		if (error == 0)
 			printf("rip[%d]\t\t0x%016lx\n", vcpu, rip);
 	}
 
 	if (!error && (get_rax || get_all)) {
 		error = vm_get_register(ctx, vcpu, VM_REG_GUEST_RAX, &rax);
 		if (error == 0)
 			printf("rax[%d]\t\t0x%016lx\n", vcpu, rax);
 	}
 
 	if (!error && (get_rbx || get_all)) {
 		error = vm_get_register(ctx, vcpu, VM_REG_GUEST_RBX, &rbx);
 		if (error == 0)
 			printf("rbx[%d]\t\t0x%016lx\n", vcpu, rbx);
 	}
 
 	if (!error && (get_rcx || get_all)) {
 		error = vm_get_register(ctx, vcpu, VM_REG_GUEST_RCX, &rcx);
 		if (error == 0)
 			printf("rcx[%d]\t\t0x%016lx\n", vcpu, rcx);
 	}
 
 	if (!error && (get_rdx || get_all)) {
 		error = vm_get_register(ctx, vcpu, VM_REG_GUEST_RDX, &rdx);
 		if (error == 0)
 			printf("rdx[%d]\t\t0x%016lx\n", vcpu, rdx);
 	}
 
 	if (!error && (get_rsi || get_all)) {
 		error = vm_get_register(ctx, vcpu, VM_REG_GUEST_RSI, &rsi);
 		if (error == 0)
 			printf("rsi[%d]\t\t0x%016lx\n", vcpu, rsi);
 	}
 
 	if (!error && (get_rdi || get_all)) {
 		error = vm_get_register(ctx, vcpu, VM_REG_GUEST_RDI, &rdi);
 		if (error == 0)
 			printf("rdi[%d]\t\t0x%016lx\n", vcpu, rdi);
 	}
 
 	if (!error && (get_rbp || get_all)) {
 		error = vm_get_register(ctx, vcpu, VM_REG_GUEST_RBP, &rbp);
 		if (error == 0)
 			printf("rbp[%d]\t\t0x%016lx\n", vcpu, rbp);
 	}
 
 	if (!error && (get_r8 || get_all)) {
 		error = vm_get_register(ctx, vcpu, VM_REG_GUEST_R8, &r8);
 		if (error == 0)
 			printf("r8[%d]\t\t0x%016lx\n", vcpu, r8);
 	}
 
 	if (!error && (get_r9 || get_all)) {
 		error = vm_get_register(ctx, vcpu, VM_REG_GUEST_R9, &r9);
 		if (error == 0)
 			printf("r9[%d]\t\t0x%016lx\n", vcpu, r9);
 	}
 
 	if (!error && (get_r10 || get_all)) {
 		error = vm_get_register(ctx, vcpu, VM_REG_GUEST_R10, &r10);
 		if (error == 0)
 			printf("r10[%d]\t\t0x%016lx\n", vcpu, r10);
 	}
 
 	if (!error && (get_r11 || get_all)) {
 		error = vm_get_register(ctx, vcpu, VM_REG_GUEST_R11, &r11);
 		if (error == 0)
 			printf("r11[%d]\t\t0x%016lx\n", vcpu, r11);
 	}
 
 	if (!error && (get_r12 || get_all)) {
 		error = vm_get_register(ctx, vcpu, VM_REG_GUEST_R12, &r12);
 		if (error == 0)
 			printf("r12[%d]\t\t0x%016lx\n", vcpu, r12);
 	}
 
 	if (!error && (get_r13 || get_all)) {
 		error = vm_get_register(ctx, vcpu, VM_REG_GUEST_R13, &r13);
 		if (error == 0)
 			printf("r13[%d]\t\t0x%016lx\n", vcpu, r13);
 	}
 
 	if (!error && (get_r14 || get_all)) {
 		error = vm_get_register(ctx, vcpu, VM_REG_GUEST_R14, &r14);
 		if (error == 0)
 			printf("r14[%d]\t\t0x%016lx\n", vcpu, r14);
 	}
 
 	if (!error && (get_r15 || get_all)) {
 		error = vm_get_register(ctx, vcpu, VM_REG_GUEST_R15, &r15);
 		if (error == 0)
 			printf("r15[%d]\t\t0x%016lx\n", vcpu, r15);
 	}
 
 	if (!error && (get_rflags || get_all)) {
 		error = vm_get_register(ctx, vcpu, VM_REG_GUEST_RFLAGS,
 					&rflags);
 		if (error == 0)
 			printf("rflags[%d]\t0x%016lx\n", vcpu, rflags);
 	}
 	
 	return (error);
 }
 
 static int
 get_all_segments(struct vmctx *ctx, int vcpu)
 {
 	uint64_t cs, ds, es, fs, gs, ss, tr, ldtr;
 	int error = 0;
 
 	if (!error && (get_desc_ds || get_all)) {
 		error = vm_get_desc(ctx, vcpu, VM_REG_GUEST_DS,
 				   &desc_base, &desc_limit, &desc_access);
 		if (error == 0) {
 			printf("ds desc[%d]\t0x%016lx/0x%08x/0x%08x\n",
 			      vcpu, desc_base, desc_limit, desc_access);
 		}
 	}
 
 	if (!error && (get_desc_es || get_all)) {
 		error = vm_get_desc(ctx, vcpu, VM_REG_GUEST_ES,
 				    &desc_base, &desc_limit, &desc_access);
 		if (error == 0) {
 			printf("es desc[%d]\t0x%016lx/0x%08x/0x%08x\n",
 			       vcpu, desc_base, desc_limit, desc_access);
 		}
 	}
 
 	if (!error && (get_desc_fs || get_all)) {
 		error = vm_get_desc(ctx, vcpu, VM_REG_GUEST_FS,
 				    &desc_base, &desc_limit, &desc_access);
 		if (error == 0) {
 			printf("fs desc[%d]\t0x%016lx/0x%08x/0x%08x\n",
 			       vcpu, desc_base, desc_limit, desc_access);
 		}
 	}
 
 	if (!error && (get_desc_gs || get_all)) {
 		error = vm_get_desc(ctx, vcpu, VM_REG_GUEST_GS,
 				    &desc_base, &desc_limit, &desc_access);
 		if (error == 0) {
 			printf("gs desc[%d]\t0x%016lx/0x%08x/0x%08x\n",
 			       vcpu, desc_base, desc_limit, desc_access);
 		}
 	}
 
 	if (!error && (get_desc_ss || get_all)) {
 		error = vm_get_desc(ctx, vcpu, VM_REG_GUEST_SS,
 				    &desc_base, &desc_limit, &desc_access);
 		if (error == 0) {
 			printf("ss desc[%d]\t0x%016lx/0x%08x/0x%08x\n",
 			       vcpu, desc_base, desc_limit, desc_access);
 		}
 	}
 
 	if (!error && (get_desc_cs || get_all)) {
 		error = vm_get_desc(ctx, vcpu, VM_REG_GUEST_CS,
 				    &desc_base, &desc_limit, &desc_access);
 		if (error == 0) {
 			printf("cs desc[%d]\t0x%016lx/0x%08x/0x%08x\n",
 			       vcpu, desc_base, desc_limit, desc_access);
 		}
 	}
 
 	if (!error && (get_desc_tr || get_all)) {
 		error = vm_get_desc(ctx, vcpu, VM_REG_GUEST_TR,
 				    &desc_base, &desc_limit, &desc_access);
 		if (error == 0) {
 			printf("tr desc[%d]\t0x%016lx/0x%08x/0x%08x\n",
 			       vcpu, desc_base, desc_limit, desc_access);
 		}
 	}
 
 	if (!error && (get_desc_ldtr || get_all)) {
 		error = vm_get_desc(ctx, vcpu, VM_REG_GUEST_LDTR,
 				    &desc_base, &desc_limit, &desc_access);
 		if (error == 0) {
 			printf("ldtr desc[%d]\t0x%016lx/0x%08x/0x%08x\n",
 			       vcpu, desc_base, desc_limit, desc_access);
 		}
 	}
 
 	if (!error && (get_desc_gdtr || get_all)) {
 		error = vm_get_desc(ctx, vcpu, VM_REG_GUEST_GDTR,
 				    &desc_base, &desc_limit, &desc_access);
 		if (error == 0) {
 			printf("gdtr[%d]\t\t0x%016lx/0x%08x\n",
 			       vcpu, desc_base, desc_limit);
 		}
 	}
 
 	if (!error && (get_desc_idtr || get_all)) {
 		error = vm_get_desc(ctx, vcpu, VM_REG_GUEST_IDTR,
 				    &desc_base, &desc_limit, &desc_access);
 		if (error == 0) {
 			printf("idtr[%d]\t\t0x%016lx/0x%08x\n",
 			       vcpu, desc_base, desc_limit);
 		}
 	}
 
 	if (!error && (get_cs || get_all)) {
 		error = vm_get_register(ctx, vcpu, VM_REG_GUEST_CS, &cs);
 		if (error == 0)
 			printf("cs[%d]\t\t0x%04lx\n", vcpu, cs);
 	}
 
 	if (!error && (get_ds || get_all)) {
 		error = vm_get_register(ctx, vcpu, VM_REG_GUEST_DS, &ds);
 		if (error == 0)
 			printf("ds[%d]\t\t0x%04lx\n", vcpu, ds);
 	}
 
 	if (!error && (get_es || get_all)) {
 		error = vm_get_register(ctx, vcpu, VM_REG_GUEST_ES, &es);
 		if (error == 0)
 			printf("es[%d]\t\t0x%04lx\n", vcpu, es);
 	}
 
 	if (!error && (get_fs || get_all)) {
 		error = vm_get_register(ctx, vcpu, VM_REG_GUEST_FS, &fs);
 		if (error == 0)
 			printf("fs[%d]\t\t0x%04lx\n", vcpu, fs);
 	}
 
 	if (!error && (get_gs || get_all)) {
 		error = vm_get_register(ctx, vcpu, VM_REG_GUEST_GS, &gs);
 		if (error == 0)
 			printf("gs[%d]\t\t0x%04lx\n", vcpu, gs);
 	}
 
 	if (!error && (get_ss || get_all)) {
 		error = vm_get_register(ctx, vcpu, VM_REG_GUEST_SS, &ss);
 		if (error == 0)
 			printf("ss[%d]\t\t0x%04lx\n", vcpu, ss);
 	}
 
 	if (!error && (get_tr || get_all)) {
 		error = vm_get_register(ctx, vcpu, VM_REG_GUEST_TR, &tr);
 		if (error == 0)
 			printf("tr[%d]\t\t0x%04lx\n", vcpu, tr);
 	}
 
 	if (!error && (get_ldtr || get_all)) {
 		error = vm_get_register(ctx, vcpu, VM_REG_GUEST_LDTR, &ldtr);
 		if (error == 0)
 			printf("ldtr[%d]\t\t0x%04lx\n", vcpu, ldtr);
 	}
 
 	return (error);
 }
 
 static int
 get_misc_vmcs(struct vmctx *ctx, int vcpu)
 {
 	uint64_t ctl, cr0, cr3, cr4, rsp, rip, pat, addr, u64;
 	int error = 0;
 
 	if (!error && (get_cr0_mask || get_all)) {
 		uint64_t cr0mask;
 		error = vm_get_vmcs_field(ctx, vcpu, VMCS_CR0_MASK, &cr0mask);
 		if (error == 0)
 			printf("cr0_mask[%d]\t\t0x%016lx\n", vcpu, cr0mask);
 	}
 
 	if (!error && (get_cr0_shadow || get_all)) {
 		uint64_t cr0shadow;
 		error = vm_get_vmcs_field(ctx, vcpu, VMCS_CR0_SHADOW,
 					  &cr0shadow);
 		if (error == 0)
 			printf("cr0_shadow[%d]\t\t0x%016lx\n", vcpu, cr0shadow);
 	}
 
 	if (!error && (get_cr4_mask || get_all)) {
 		uint64_t cr4mask;
 		error = vm_get_vmcs_field(ctx, vcpu, VMCS_CR4_MASK, &cr4mask);
 		if (error == 0)
 			printf("cr4_mask[%d]\t\t0x%016lx\n", vcpu, cr4mask);
 	}
 
 	if (!error && (get_cr4_shadow || get_all)) {
 		uint64_t cr4shadow;
 		error = vm_get_vmcs_field(ctx, vcpu, VMCS_CR4_SHADOW,
 					  &cr4shadow);
 		if (error == 0)
 			printf("cr4_shadow[%d]\t\t0x%016lx\n", vcpu, cr4shadow);
 	}
 	
 	if (!error && (get_cr3_targets || get_all)) {
 		uint64_t target_count, target_addr;
 		error = vm_get_vmcs_field(ctx, vcpu, VMCS_CR3_TARGET_COUNT,
 					  &target_count);
 		if (error == 0) {
 			printf("cr3_target_count[%d]\t0x%016lx\n",
 				vcpu, target_count);
 		}
 
 		error = vm_get_vmcs_field(ctx, vcpu, VMCS_CR3_TARGET0,
 					  &target_addr);
 		if (error == 0) {
 			printf("cr3_target0[%d]\t\t0x%016lx\n",
 				vcpu, target_addr);
 		}
 
 		error = vm_get_vmcs_field(ctx, vcpu, VMCS_CR3_TARGET1,
 					  &target_addr);
 		if (error == 0) {
 			printf("cr3_target1[%d]\t\t0x%016lx\n",
 				vcpu, target_addr);
 		}
 
 		error = vm_get_vmcs_field(ctx, vcpu, VMCS_CR3_TARGET2,
 					  &target_addr);
 		if (error == 0) {
 			printf("cr3_target2[%d]\t\t0x%016lx\n",
 				vcpu, target_addr);
 		}
 
 		error = vm_get_vmcs_field(ctx, vcpu, VMCS_CR3_TARGET3,
 					  &target_addr);
 		if (error == 0) {
 			printf("cr3_target3[%d]\t\t0x%016lx\n",
 				vcpu, target_addr);
 		}
 	}
 
 	if (!error && (get_pinbased_ctls || get_all)) {
 		error = vm_get_vmcs_field(ctx, vcpu, VMCS_PIN_BASED_CTLS, &ctl);
 		if (error == 0)
 			printf("pinbased_ctls[%d]\t0x%016lx\n", vcpu, ctl);
 	}
 
 	if (!error && (get_procbased_ctls || get_all)) {
 		error = vm_get_vmcs_field(ctx, vcpu,
 					  VMCS_PRI_PROC_BASED_CTLS, &ctl);
 		if (error == 0)
 			printf("procbased_ctls[%d]\t0x%016lx\n", vcpu, ctl);
 	}
 
 	if (!error && (get_procbased_ctls2 || get_all)) {
 		error = vm_get_vmcs_field(ctx, vcpu,
 					  VMCS_SEC_PROC_BASED_CTLS, &ctl);
 		if (error == 0)
 			printf("procbased_ctls2[%d]\t0x%016lx\n", vcpu, ctl);
 	}
 
 	if (!error && (get_vmcs_gla || get_all)) {
 		error = vm_get_vmcs_field(ctx, vcpu,
 					  VMCS_GUEST_LINEAR_ADDRESS, &u64);
 		if (error == 0)
 			printf("gla[%d]\t\t0x%016lx\n", vcpu, u64);
 	}
 
 	if (!error && (get_vmcs_gpa || get_all)) {
 		error = vm_get_vmcs_field(ctx, vcpu,
 					  VMCS_GUEST_PHYSICAL_ADDRESS, &u64);
 		if (error == 0)
 			printf("gpa[%d]\t\t0x%016lx\n", vcpu, u64);
 	}
 
 	if (!error && (get_vmcs_entry_interruption_info || 
 		get_all)) {
 		error = vm_get_vmcs_field(ctx, vcpu, VMCS_ENTRY_INTR_INFO,&u64);
 		if (error == 0) {
 			printf("entry_interruption_info[%d]\t0x%016lx\n",
 				vcpu, u64);
 		}
 	}
 	
 	if (!error && (get_tpr_threshold || get_all)) {
 		uint64_t threshold;
 		error = vm_get_vmcs_field(ctx, vcpu, VMCS_TPR_THRESHOLD,
 					  &threshold);
 		if (error == 0)
 			printf("tpr_threshold[%d]\t0x%016lx\n", vcpu, threshold);
 	}
 
 	if (!error && (get_inst_err || get_all)) {
 		uint64_t insterr;
 		error = vm_get_vmcs_field(ctx, vcpu, VMCS_INSTRUCTION_ERROR,
 					  &insterr);
 		if (error == 0) {
 			printf("instruction_error[%d]\t0x%016lx\n",
 				vcpu, insterr);
 		}
 	}
 	
 	if (!error && (get_exit_ctls || get_all)) {
 		error = vm_get_vmcs_field(ctx, vcpu, VMCS_EXIT_CTLS, &ctl);
 		if (error == 0)
 			printf("exit_ctls[%d]\t\t0x%016lx\n", vcpu, ctl);
 	}
 
 	if (!error && (get_entry_ctls || get_all)) {
 		error = vm_get_vmcs_field(ctx, vcpu, VMCS_ENTRY_CTLS, &ctl);
 		if (error == 0)
 			printf("entry_ctls[%d]\t\t0x%016lx\n", vcpu, ctl);
 	}
 
 	if (!error && (get_host_pat || get_all)) {
 		error = vm_get_vmcs_field(ctx, vcpu, VMCS_HOST_IA32_PAT, &pat);
 		if (error == 0)
 			printf("host_pat[%d]\t\t0x%016lx\n", vcpu, pat);
 	}
 
 	if (!error && (get_host_cr0 || get_all)) {
 		error = vm_get_vmcs_field(ctx, vcpu, VMCS_HOST_CR0, &cr0);
 		if (error == 0)
 			printf("host_cr0[%d]\t\t0x%016lx\n", vcpu, cr0);
 	}
 
 	if (!error && (get_host_cr3 || get_all)) {
 		error = vm_get_vmcs_field(ctx, vcpu, VMCS_HOST_CR3, &cr3);
 		if (error == 0)
 			printf("host_cr3[%d]\t\t0x%016lx\n", vcpu, cr3);
 	}
 
 	if (!error && (get_host_cr4 || get_all)) {
 		error = vm_get_vmcs_field(ctx, vcpu, VMCS_HOST_CR4, &cr4);
 		if (error == 0)
 			printf("host_cr4[%d]\t\t0x%016lx\n", vcpu, cr4);
 	}
 
 	if (!error && (get_host_rip || get_all)) {
 		error = vm_get_vmcs_field(ctx, vcpu, VMCS_HOST_RIP, &rip);
 		if (error == 0)
 			printf("host_rip[%d]\t\t0x%016lx\n", vcpu, rip);
 	}
 
 	if (!error && (get_host_rsp || get_all)) {
 		error = vm_get_vmcs_field(ctx, vcpu, VMCS_HOST_RSP, &rsp);
 		if (error == 0)
 			printf("host_rsp[%d]\t\t0x%016lx\n", vcpu, rsp);
 	}
 	
 	if (!error && (get_vmcs_link || get_all)) {
 		error = vm_get_vmcs_field(ctx, vcpu, VMCS_LINK_POINTER, &addr);
 		if (error == 0)
 			printf("vmcs_pointer[%d]\t0x%016lx\n", vcpu, addr);
 	}
 
 	if (!error && (get_vmcs_exit_interruption_info || get_all)) {
 		error = vm_get_vmcs_field(ctx, vcpu, VMCS_EXIT_INTR_INFO, &u64);
 		if (error == 0) {
 			printf("vmcs_exit_interruption_info[%d]\t0x%016lx\n",
 				vcpu, u64);
 		}
 	}
 
 	if (!error && (get_vmcs_exit_interruption_error || get_all)) {
 		error = vm_get_vmcs_field(ctx, vcpu, VMCS_EXIT_INTR_ERRCODE,
 		    			  &u64);
 		if (error == 0) {
 			printf("vmcs_exit_interruption_error[%d]\t0x%016lx\n",
 				vcpu, u64);
 		}
 	}
 
 	if (!error && (get_vmcs_interruptibility || get_all)) {
 		error = vm_get_vmcs_field(ctx, vcpu,
 					  VMCS_GUEST_INTERRUPTIBILITY, &u64);
 		if (error == 0) {
 			printf("vmcs_guest_interruptibility[%d]\t0x%016lx\n",
 				vcpu, u64);
 		}
 	}
 
 	if (!error && (get_vmcs_exit_inst_length || get_all)) {
 		error = vm_get_vmcs_field(ctx, vcpu,
 		    VMCS_EXIT_INSTRUCTION_LENGTH, &u64);
 		if (error == 0)
 			printf("vmcs_exit_inst_length[%d]\t0x%08x\n", vcpu,
 			    (uint32_t)u64);
 	}
 
 	if (!error && (get_vmcs_exit_qualification || get_all)) {
 		error = vm_get_vmcs_field(ctx, vcpu, VMCS_EXIT_QUALIFICATION,
 					  &u64);
 		if (error == 0)
 			printf("vmcs_exit_qualification[%d]\t0x%016lx\n",
 				vcpu, u64);
 	}
 	
 	return (error);
 }
 
 static int
 get_misc_vmcb(struct vmctx *ctx, int vcpu)
 {
 	uint64_t ctl, addr;
 	int error = 0;
 
 	if (!error && (get_vmcb_intercept || get_all)) {
 		error = vm_get_vmcb_field(ctx, vcpu, VMCB_OFF_CR_INTERCEPT, 4,
 		    &ctl);
 		if (error == 0)
 			printf("cr_intercept[%d]\t0x%08x\n", vcpu, (int)ctl);
 
 		error = vm_get_vmcb_field(ctx, vcpu, VMCB_OFF_DR_INTERCEPT, 4,
 		    &ctl);
 		if (error == 0)
 			printf("dr_intercept[%d]\t0x%08x\n", vcpu, (int)ctl);
 
 		error = vm_get_vmcb_field(ctx, vcpu, VMCB_OFF_EXC_INTERCEPT, 4,
 		    &ctl);
 		if (error == 0)
 			printf("exc_intercept[%d]\t0x%08x\n", vcpu, (int)ctl);
 
 		error = vm_get_vmcb_field(ctx, vcpu, VMCB_OFF_INST1_INTERCEPT,
 		    4, &ctl);
 		if (error == 0)
 			printf("inst1_intercept[%d]\t0x%08x\n", vcpu, (int)ctl);
 
 		error = vm_get_vmcb_field(ctx, vcpu, VMCB_OFF_INST2_INTERCEPT,
 		    4, &ctl);
 		if (error == 0)
 			printf("inst2_intercept[%d]\t0x%08x\n", vcpu, (int)ctl);
 	}
 
 	if (!error && (get_vmcb_tlb_ctrl || get_all)) {
 		error = vm_get_vmcb_field(ctx, vcpu, VMCB_OFF_TLB_CTRL,
 					  4, &ctl);
 		if (error == 0)
 			printf("TLB ctrl[%d]\t0x%016lx\n", vcpu, ctl);
 	}
 
 	if (!error && (get_vmcb_exit_details || get_all)) {
 		error = vm_get_vmcb_field(ctx, vcpu, VMCB_OFF_EXITINFO1,
 					  8, &ctl);
 		if (error == 0)
 			printf("exitinfo1[%d]\t0x%016lx\n", vcpu, ctl);
 		error = vm_get_vmcb_field(ctx, vcpu, VMCB_OFF_EXITINFO2,
 					  8, &ctl);
 		if (error == 0)
 			printf("exitinfo2[%d]\t0x%016lx\n", vcpu, ctl);
 		error = vm_get_vmcb_field(ctx, vcpu, VMCB_OFF_EXITINTINFO,
 					  8, &ctl);
 		if (error == 0)
 			printf("exitintinfo[%d]\t0x%016lx\n", vcpu, ctl);
 	}
 
 	if (!error && (get_vmcb_virq || get_all)) {
 		error = vm_get_vmcb_field(ctx, vcpu, VMCB_OFF_VIRQ,
 					  8, &ctl);
 		if (error == 0)
 			printf("v_irq/tpr[%d]\t0x%016lx\n", vcpu, ctl);
 	}
 
 	if (!error && (get_apic_access_addr || get_all)) {
 		error = vm_get_vmcb_field(ctx, vcpu, VMCB_OFF_AVIC_BAR, 8,
 					  &addr);
 		if (error == 0)
 			printf("AVIC apic_bar[%d]\t0x%016lx\n", vcpu, addr);
 	}
 
 	if (!error && (get_virtual_apic_addr || get_all)) {
 		error = vm_get_vmcb_field(ctx, vcpu, VMCB_OFF_AVIC_PAGE, 8,
 					  &addr);
 		if (error == 0)
 			printf("AVIC backing page[%d]\t0x%016lx\n", vcpu, addr);
 	}
 
 	if (!error && (get_avic_table || get_all)) {
 		error = vm_get_vmcb_field(ctx, vcpu, VMCB_OFF_AVIC_LT, 8,
 					  &addr);
 		if (error == 0)
 			printf("AVIC logical table[%d]\t0x%016lx\n",
 				vcpu, addr);
 		error = vm_get_vmcb_field(ctx, vcpu, VMCB_OFF_AVIC_PT, 8,
 					  &addr);
 		if (error == 0)
 			printf("AVIC physical table[%d]\t0x%016lx\n",
 				vcpu, addr);
 	}
 
 	return (error);
 }
 
 static struct option *
 setup_options(bool cpu_intel)
 {
 	const struct option common_opts[] = {
 		{ "vm",		REQ_ARG,	0,	VMNAME },
 		{ "cpu",	REQ_ARG,	0,	VCPU },
 		{ "set-mem",	REQ_ARG,	0,	SET_MEM },
 		{ "set-efer",	REQ_ARG,	0,	SET_EFER },
 		{ "set-cr0",	REQ_ARG,	0,	SET_CR0 },
 		{ "set-cr2",	REQ_ARG,	0,	SET_CR2 },
 		{ "set-cr3",	REQ_ARG,	0,	SET_CR3 },
 		{ "set-cr4",	REQ_ARG,	0,	SET_CR4 },
 		{ "set-dr0",	REQ_ARG,	0,	SET_DR0 },
 		{ "set-dr1",	REQ_ARG,	0,	SET_DR1 },
 		{ "set-dr2",	REQ_ARG,	0,	SET_DR2 },
 		{ "set-dr3",	REQ_ARG,	0,	SET_DR3 },
 		{ "set-dr6",	REQ_ARG,	0,	SET_DR6 },
 		{ "set-dr7",	REQ_ARG,	0,	SET_DR7 },
 		{ "set-rsp",	REQ_ARG,	0,	SET_RSP },
 		{ "set-rip",	REQ_ARG,	0,	SET_RIP },
 		{ "set-rax",	REQ_ARG,	0,	SET_RAX },
 		{ "set-rflags",	REQ_ARG,	0,	SET_RFLAGS },
 		{ "desc-base",	REQ_ARG,	0,	DESC_BASE },
 		{ "desc-limit",	REQ_ARG,	0,	DESC_LIMIT },
 		{ "desc-access",REQ_ARG,	0,	DESC_ACCESS },
 		{ "set-cs",	REQ_ARG,	0,	SET_CS },
 		{ "set-ds",	REQ_ARG,	0,	SET_DS },
 		{ "set-es",	REQ_ARG,	0,	SET_ES },
 		{ "set-fs",	REQ_ARG,	0,	SET_FS },
 		{ "set-gs",	REQ_ARG,	0,	SET_GS },
 		{ "set-ss",	REQ_ARG,	0,	SET_SS },
 		{ "set-tr",	REQ_ARG,	0,	SET_TR },
 		{ "set-ldtr",	REQ_ARG,	0,	SET_LDTR },
 		{ "set-x2apic-state",REQ_ARG,	0,	SET_X2APIC_STATE },
 		{ "set-exception-bitmap",
 				REQ_ARG,	0, SET_EXCEPTION_BITMAP },
 		{ "capname",	REQ_ARG,	0,	CAPNAME },
 		{ "unassign-pptdev", REQ_ARG,	0,	UNASSIGN_PPTDEV },
 		{ "setcap",	REQ_ARG,	0,	SET_CAP },
 		{ "get-gpa-pmap", REQ_ARG,	0,	GET_GPA_PMAP },
 		{ "assert-lapic-lvt", REQ_ARG,	0,	ASSERT_LAPIC_LVT },
 		{ "get-rtc-time", NO_ARG,	&get_rtc_time,	1 },
 		{ "set-rtc-time", REQ_ARG,	0,	SET_RTC_TIME },
 		{ "rtc-nvram-offset", REQ_ARG,	0,	RTC_NVRAM_OFFSET },
 		{ "get-rtc-nvram", NO_ARG,	&get_rtc_nvram,	1 },
 		{ "set-rtc-nvram", REQ_ARG,	0,	SET_RTC_NVRAM },
 		{ "getcap",	NO_ARG,		&getcap,	1 },
 		{ "get-stats",	NO_ARG,		&get_stats,	1 },
 		{ "get-desc-ds",NO_ARG,		&get_desc_ds,	1 },
 		{ "set-desc-ds",NO_ARG,		&set_desc_ds,	1 },
 		{ "get-desc-es",NO_ARG,		&get_desc_es,	1 },
 		{ "set-desc-es",NO_ARG,		&set_desc_es,	1 },
 		{ "get-desc-ss",NO_ARG,		&get_desc_ss,	1 },
 		{ "set-desc-ss",NO_ARG,		&set_desc_ss,	1 },
 		{ "get-desc-cs",NO_ARG,		&get_desc_cs,	1 },
 		{ "set-desc-cs",NO_ARG,		&set_desc_cs,	1 },
 		{ "get-desc-fs",NO_ARG,		&get_desc_fs,	1 },
 		{ "set-desc-fs",NO_ARG,		&set_desc_fs,	1 },
 		{ "get-desc-gs",NO_ARG,		&get_desc_gs,	1 },
 		{ "set-desc-gs",NO_ARG,		&set_desc_gs,	1 },
 		{ "get-desc-tr",NO_ARG,		&get_desc_tr,	1 },
 		{ "set-desc-tr",NO_ARG,		&set_desc_tr,	1 },
 		{ "set-desc-ldtr", NO_ARG,	&set_desc_ldtr,	1 },
 		{ "get-desc-ldtr", NO_ARG,	&get_desc_ldtr,	1 },
 		{ "set-desc-gdtr", NO_ARG,	&set_desc_gdtr, 1 },
 		{ "get-desc-gdtr", NO_ARG,	&get_desc_gdtr, 1 },
 		{ "set-desc-idtr", NO_ARG,	&set_desc_idtr, 1 },
 		{ "get-desc-idtr", NO_ARG,	&get_desc_idtr, 1 },
 		{ "get-memmap",	NO_ARG,		&get_memmap,	1 },
 		{ "get-memseg", NO_ARG,		&get_memseg,	1 },
 		{ "get-efer",	NO_ARG,		&get_efer,	1 },
 		{ "get-cr0",	NO_ARG,		&get_cr0,	1 },
 		{ "get-cr2",	NO_ARG,		&get_cr2,	1 },
 		{ "get-cr3",	NO_ARG,		&get_cr3,	1 },
 		{ "get-cr4",	NO_ARG,		&get_cr4,	1 },
 		{ "get-dr0",	NO_ARG,		&get_dr0,	1 },
 		{ "get-dr1",	NO_ARG,		&get_dr1,	1 },
 		{ "get-dr2",	NO_ARG,		&get_dr2,	1 },
 		{ "get-dr3",	NO_ARG,		&get_dr3,	1 },
 		{ "get-dr6",	NO_ARG,		&get_dr6,	1 },
 		{ "get-dr7",	NO_ARG,		&get_dr7,	1 },
 		{ "get-rsp",	NO_ARG,		&get_rsp,	1 },
 		{ "get-rip",	NO_ARG,		&get_rip,	1 },
 		{ "get-rax",	NO_ARG,		&get_rax,	1 },
 		{ "get-rbx",	NO_ARG,		&get_rbx,	1 },
 		{ "get-rcx",	NO_ARG,		&get_rcx,	1 },
 		{ "get-rdx",	NO_ARG,		&get_rdx,	1 },
 		{ "get-rsi",	NO_ARG,		&get_rsi,	1 },
 		{ "get-rdi",	NO_ARG,		&get_rdi,	1 },
 		{ "get-rbp",	NO_ARG,		&get_rbp,	1 },
 		{ "get-r8",	NO_ARG,		&get_r8,	1 },
 		{ "get-r9",	NO_ARG,		&get_r9,	1 },
 		{ "get-r10",	NO_ARG,		&get_r10,	1 },
 		{ "get-r11",	NO_ARG,		&get_r11,	1 },
 		{ "get-r12",	NO_ARG,		&get_r12,	1 },
 		{ "get-r13",	NO_ARG,		&get_r13,	1 },
 		{ "get-r14",	NO_ARG,		&get_r14,	1 },
 		{ "get-r15",	NO_ARG,		&get_r15,	1 },
 		{ "get-rflags",	NO_ARG,		&get_rflags,	1 },
 		{ "get-cs",	NO_ARG,		&get_cs,	1 },
 		{ "get-ds",	NO_ARG,		&get_ds,	1 },
 		{ "get-es",	NO_ARG,		&get_es,	1 },
 		{ "get-fs",	NO_ARG,		&get_fs,	1 },
 		{ "get-gs",	NO_ARG,		&get_gs,	1 },
 		{ "get-ss",	NO_ARG,		&get_ss,	1 },
 		{ "get-tr",	NO_ARG,		&get_tr,	1 },
 		{ "get-ldtr",	NO_ARG,		&get_ldtr,	1 },
 		{ "get-eptp", 	NO_ARG,		&get_eptp,	1 },
 		{ "get-exception-bitmap",
 					NO_ARG,	&get_exception_bitmap,  1 },
 		{ "get-io-bitmap-address",
 					NO_ARG,	&get_io_bitmap,		1 },
 		{ "get-tsc-offset", 	NO_ARG, &get_tsc_offset, 	1 },
 		{ "get-msr-bitmap",
 					NO_ARG,	&get_msr_bitmap, 	1 },
 		{ "get-msr-bitmap-address",
 					NO_ARG,	&get_msr_bitmap_address, 1 },
 		{ "get-guest-pat",	NO_ARG,	&get_guest_pat,		1 },
 		{ "get-guest-sysenter",
 					NO_ARG,	&get_guest_sysenter, 	1 },
 		{ "get-exit-reason",
 					NO_ARG,	&get_exit_reason, 	1 },
 		{ "get-x2apic-state",	NO_ARG,	&get_x2apic_state, 	1 },
 		{ "get-all",		NO_ARG,	&get_all,		1 },
 		{ "run",		NO_ARG,	&run,			1 },
 		{ "create",		NO_ARG,	&create,		1 },
 		{ "destroy",		NO_ARG,	&destroy,		1 },
 		{ "inject-nmi",		NO_ARG,	&inject_nmi,		1 },
 		{ "force-reset",	NO_ARG,	&force_reset,		1 },
 		{ "force-poweroff", 	NO_ARG,	&force_poweroff, 	1 },
 		{ "get-active-cpus", 	NO_ARG,	&get_active_cpus, 	1 },
 		{ "get-suspended-cpus", NO_ARG,	&get_suspended_cpus, 	1 },
 		{ "get-intinfo", 	NO_ARG,	&get_intinfo,		1 },
+		{ "get-cpu-topology",	NO_ARG, &get_cpu_topology,	1 },
 	};
 
 	const struct option intel_opts[] = {
 		{ "get-vmcs-pinbased-ctls",
 				NO_ARG,		&get_pinbased_ctls, 1 },
 		{ "get-vmcs-procbased-ctls",
 				NO_ARG,		&get_procbased_ctls, 1 },
 		{ "get-vmcs-procbased-ctls2",
 				NO_ARG,		&get_procbased_ctls2, 1 },
 		{ "get-vmcs-guest-linear-address",
 				NO_ARG,		&get_vmcs_gla,	1 },
 		{ "get-vmcs-guest-physical-address",
 				NO_ARG,		&get_vmcs_gpa,	1 },
 		{ "get-vmcs-entry-interruption-info",
 				NO_ARG, &get_vmcs_entry_interruption_info, 1},
 		{ "get-vmcs-cr0-mask", NO_ARG,	&get_cr0_mask,	1 },
 		{ "get-vmcs-cr0-shadow", NO_ARG,&get_cr0_shadow, 1 },
 		{ "get-vmcs-cr4-mask", 		NO_ARG,	&get_cr4_mask,	  1 },
 		{ "get-vmcs-cr4-shadow", 	NO_ARG, &get_cr4_shadow,  1 },
 		{ "get-vmcs-cr3-targets", 	NO_ARG, &get_cr3_targets, 1 },
 		{ "get-vmcs-tpr-threshold",
 					NO_ARG,	&get_tpr_threshold, 1 },
 		{ "get-vmcs-vpid", 	NO_ARG,	&get_vpid_asid,	    1 },
 		{ "get-vmcs-exit-ctls", NO_ARG,	&get_exit_ctls,	    1 },
 		{ "get-vmcs-entry-ctls",
 					NO_ARG,	&get_entry_ctls, 1 },
 		{ "get-vmcs-instruction-error",
 					NO_ARG,	&get_inst_err,	1 },
 		{ "get-vmcs-host-pat",	NO_ARG,	&get_host_pat,	1 },
 		{ "get-vmcs-host-cr0",
 					NO_ARG,	&get_host_cr0,	1 },
 		{ "set-vmcs-entry-interruption-info",
 				REQ_ARG, 0, SET_VMCS_ENTRY_INTERRUPTION_INFO },
 		{ "get-vmcs-exit-qualification",
 				NO_ARG,	&get_vmcs_exit_qualification, 1 },
 		{ "get-vmcs-exit-inst-length",
 				NO_ARG,	&get_vmcs_exit_inst_length, 1 },
 		{ "get-vmcs-interruptibility",
 				NO_ARG, &get_vmcs_interruptibility, 1 },
 		{ "get-vmcs-exit-interruption-error",
 				NO_ARG,	&get_vmcs_exit_interruption_error, 1 },
 		{ "get-vmcs-exit-interruption-info",
 				NO_ARG,	&get_vmcs_exit_interruption_info, 1 },
 		{ "get-vmcs-link", 	NO_ARG,		&get_vmcs_link, 1 },
 		{ "get-vmcs-host-cr3",
 					NO_ARG,		&get_host_cr3,	1 },
 		{ "get-vmcs-host-cr4",
 				NO_ARG,		&get_host_cr4,	1 },
 		{ "get-vmcs-host-rip",
 				NO_ARG,		&get_host_rip,	1 },
 		{ "get-vmcs-host-rsp",
 				NO_ARG,		&get_host_rsp,	1 },
 		{ "get-apic-access-address",
 				NO_ARG,		&get_apic_access_addr, 1},
 		{ "get-virtual-apic-address",
 				NO_ARG,		&get_virtual_apic_addr, 1}
 	};
 
 	const struct option amd_opts[] = {
 		{ "get-vmcb-intercepts",
 				NO_ARG,	&get_vmcb_intercept, 	1 },
 		{ "get-vmcb-asid", 
 				NO_ARG,	&get_vpid_asid,	     	1 },
 		{ "get-vmcb-exit-details",
 				NO_ARG, &get_vmcb_exit_details,	1 },
 		{ "get-vmcb-tlb-ctrl",
 				NO_ARG, &get_vmcb_tlb_ctrl, 	1 },
 		{ "get-vmcb-virq",
 				NO_ARG, &get_vmcb_virq, 	1 },
 		{ "get-avic-apic-bar",
 				NO_ARG,	&get_apic_access_addr, 	1 },
 		{ "get-avic-backing-page",
 				NO_ARG,	&get_virtual_apic_addr, 1 },
 		{ "get-avic-table",
 				NO_ARG,	&get_avic_table, 	1 }
 	};
 
 	const struct option null_opt = {
 		NULL, 0, NULL, 0
 	};
 
 	struct option *all_opts;
 	char *cp;
 	int optlen;
 
 	optlen = sizeof(common_opts);
 
 	if (cpu_intel)
 		optlen += sizeof(intel_opts);
 	else
 		optlen += sizeof(amd_opts);
 
 	optlen += sizeof(null_opt);
 
 	all_opts = malloc(optlen);
 
 	cp = (char *)all_opts;
 	memcpy(cp, common_opts, sizeof(common_opts));
 	cp += sizeof(common_opts);
 
 	if (cpu_intel) {
 		memcpy(cp, intel_opts, sizeof(intel_opts));
 		cp += sizeof(intel_opts);
 	} else {
 		memcpy(cp, amd_opts, sizeof(amd_opts));
 		cp += sizeof(amd_opts);
 	}
 
 	memcpy(cp, &null_opt, sizeof(null_opt));
 	cp += sizeof(null_opt);
 
 	return (all_opts);
 }
 
 static const char *
 wday_str(int idx)
 {
 	static const char *weekdays[] = {
 		"Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"
 	};
 
 	if (idx >= 0 && idx < 7)
 		return (weekdays[idx]);
 	else
 		return ("UNK");
 }
 
 static const char *
 mon_str(int idx)
 {
 	static const char *months[] = {
 		"Jan", "Feb", "Mar", "Apr", "May", "Jun",
 		"Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
 	};
 
 	if (idx >= 0 && idx < 12)
 		return (months[idx]);
 	else
 		return ("UNK");
 }
 
 static int
 show_memmap(struct vmctx *ctx)
 {
 	char name[SPECNAMELEN + 1], numbuf[8];
 	vm_ooffset_t segoff;
 	vm_paddr_t gpa;
 	size_t maplen, seglen;
 	int error, flags, prot, segid, delim;
 
 	printf("Address     Length      Segment     Offset      ");
 	printf("Prot  Flags\n");
 
 	gpa = 0;
 	while (1) {
 		error = vm_mmap_getnext(ctx, &gpa, &segid, &segoff, &maplen,
 		    &prot, &flags);
 		if (error)
 			return (errno == ENOENT ? 0 : error);
 
 		error = vm_get_memseg(ctx, segid, &seglen, name, sizeof(name));
 		if (error)
 			return (error);
 
 		printf("%-12lX", gpa);
 		humanize_number(numbuf, sizeof(numbuf), maplen, "B",
 		    HN_AUTOSCALE, HN_NOSPACE);
 		printf("%-12s", numbuf);
 
 		printf("%-12s", name[0] ? name : "sysmem");
 		printf("%-12lX", segoff);
 		printf("%c%c%c   ", prot & PROT_READ ? 'R' : '-',
 		    prot & PROT_WRITE ? 'W' : '-',
 		    prot & PROT_EXEC ? 'X' : '-');
 
 		delim = '\0';
 		if (flags & VM_MEMMAP_F_WIRED) {
 			printf("%cwired", delim);
 			delim = '/';
 		}
 		if (flags & VM_MEMMAP_F_IOMMU) {
 			printf("%ciommu", delim);
 			delim = '/';
 		}
 		printf("\n");
 
 		gpa += maplen;
 	}
 }
 
 static int
 show_memseg(struct vmctx *ctx)
 {
 	char name[SPECNAMELEN + 1], numbuf[8];
 	size_t seglen;
 	int error, segid;
 
 	printf("ID  Length      Name\n");
 
 	segid = 0;
 	while (1) {
 		error = vm_get_memseg(ctx, segid, &seglen, name, sizeof(name));
 		if (error)
 			return (errno == EINVAL ? 0 : error);
 
 		if (seglen) {
 			printf("%-4d", segid);
 			humanize_number(numbuf, sizeof(numbuf), seglen, "B",
 			    HN_AUTOSCALE, HN_NOSPACE);
 			printf("%-12s", numbuf);
 			printf("%s", name[0] ? name : "sysmem");
 			printf("\n");
 		}
 		segid++;
 	}
 }
 
 int
 main(int argc, char *argv[])
 {
 	char *vmname;
 	int error, ch, vcpu, ptenum;
 	vm_paddr_t gpa_pmap;
 	struct vm_exit vmexit;
 	uint64_t rax, cr0, cr2, cr3, cr4, dr0, dr1, dr2, dr3, dr6, dr7;
 	uint64_t rsp, rip, rflags, efer, pat;
 	uint64_t eptp, bm, addr, u64, pteval[4], *pte, info[2];
 	struct vmctx *ctx;
 	cpuset_t cpus;
 	bool cpu_intel;
 	uint64_t cs, ds, es, fs, gs, ss, tr, ldtr;
 	struct tm tm;
 	struct option *opts;
 
 	cpu_intel = cpu_vendor_intel();
 	opts = setup_options(cpu_intel);
 
 	vcpu = 0;
 	vmname = NULL;
 	assert_lapic_lvt = -1;
 	progname = basename(argv[0]);
 
 	while ((ch = getopt_long(argc, argv, "", opts, NULL)) != -1) {
 		switch (ch) {
 		case 0:
 			break;
 		case VMNAME:
 			vmname = optarg;
 			break;
 		case VCPU:
 			vcpu = atoi(optarg);
 			break;
 		case SET_MEM:
 			memsize = atoi(optarg) * MB;
 			memsize = roundup(memsize, 2 * MB);
 			break;
 		case SET_EFER:
 			efer = strtoul(optarg, NULL, 0);
 			set_efer = 1;
 			break;
 		case SET_CR0:
 			cr0 = strtoul(optarg, NULL, 0);
 			set_cr0 = 1;
 			break;
 		case SET_CR2:
 			cr2 = strtoul(optarg, NULL, 0);
 			set_cr2 = 1;
 			break;
 		case SET_CR3:
 			cr3 = strtoul(optarg, NULL, 0);
 			set_cr3 = 1;
 			break;
 		case SET_CR4:
 			cr4 = strtoul(optarg, NULL, 0);
 			set_cr4 = 1;
 			break;
 		case SET_DR0:
 			dr0 = strtoul(optarg, NULL, 0);
 			set_dr0 = 1;
 			break;
 		case SET_DR1:
 			dr1 = strtoul(optarg, NULL, 0);
 			set_dr1 = 1;
 			break;
 		case SET_DR2:
 			dr2 = strtoul(optarg, NULL, 0);
 			set_dr2 = 1;
 			break;
 		case SET_DR3:
 			dr3 = strtoul(optarg, NULL, 0);
 			set_dr3 = 1;
 			break;
 		case SET_DR6:
 			dr6 = strtoul(optarg, NULL, 0);
 			set_dr6 = 1;
 			break;
 		case SET_DR7:
 			dr7 = strtoul(optarg, NULL, 0);
 			set_dr7 = 1;
 			break;
 		case SET_RSP:
 			rsp = strtoul(optarg, NULL, 0);
 			set_rsp = 1;
 			break;
 		case SET_RIP:
 			rip = strtoul(optarg, NULL, 0);
 			set_rip = 1;
 			break;
 		case SET_RAX:
 			rax = strtoul(optarg, NULL, 0);
 			set_rax = 1;
 			break;
 		case SET_RFLAGS:
 			rflags = strtoul(optarg, NULL, 0);
 			set_rflags = 1;
 			break;
 		case DESC_BASE:
 			desc_base = strtoul(optarg, NULL, 0);
 			break;
 		case DESC_LIMIT:
 			desc_limit = strtoul(optarg, NULL, 0);
 			break;
 		case DESC_ACCESS:
 			desc_access = strtoul(optarg, NULL, 0);
 			break;
 		case SET_CS:
 			cs = strtoul(optarg, NULL, 0);
 			set_cs = 1;
 			break;
 		case SET_DS:
 			ds = strtoul(optarg, NULL, 0);
 			set_ds = 1;
 			break;
 		case SET_ES:
 			es = strtoul(optarg, NULL, 0);
 			set_es = 1;
 			break;
 		case SET_FS:
 			fs = strtoul(optarg, NULL, 0);
 			set_fs = 1;
 			break;
 		case SET_GS:
 			gs = strtoul(optarg, NULL, 0);
 			set_gs = 1;
 			break;
 		case SET_SS:
 			ss = strtoul(optarg, NULL, 0);
 			set_ss = 1;
 			break;
 		case SET_TR:
 			tr = strtoul(optarg, NULL, 0);
 			set_tr = 1;
 			break;
 		case SET_LDTR:
 			ldtr = strtoul(optarg, NULL, 0);
 			set_ldtr = 1;
 			break;
 		case SET_X2APIC_STATE:
 			x2apic_state = strtol(optarg, NULL, 0);
 			set_x2apic_state = 1;
 			break;
 		case SET_EXCEPTION_BITMAP:
 			exception_bitmap = strtoul(optarg, NULL, 0);
 			set_exception_bitmap = 1;
 			break;
 		case SET_VMCS_ENTRY_INTERRUPTION_INFO:
 			vmcs_entry_interruption_info = strtoul(optarg, NULL, 0);
 			set_vmcs_entry_interruption_info = 1;
 			break;
 		case SET_CAP:
 			capval = strtoul(optarg, NULL, 0);
 			setcap = 1;
 			break;
 		case SET_RTC_TIME:
 			rtc_secs = strtoul(optarg, NULL, 0);
 			set_rtc_time = 1;
 			break;
 		case SET_RTC_NVRAM:
 			rtc_nvram_value = (uint8_t)strtoul(optarg, NULL, 0);
 			set_rtc_nvram = 1;
 			break;
 		case RTC_NVRAM_OFFSET:
 			rtc_nvram_offset = strtoul(optarg, NULL, 0);
 			break;
 		case GET_GPA_PMAP:
 			gpa_pmap = strtoul(optarg, NULL, 0);
 			get_gpa_pmap = 1;
 			break;
 		case CAPNAME:
 			capname = optarg;
 			break;
 		case UNASSIGN_PPTDEV:
 			unassign_pptdev = 1;
 			if (sscanf(optarg, "%d/%d/%d", &bus, &slot, &func) != 3)
 				usage(cpu_intel);
 			break;
 		case ASSERT_LAPIC_LVT:
 			assert_lapic_lvt = atoi(optarg);
 			break;
 		default:
 			usage(cpu_intel);
 		}
 	}
 	argc -= optind;
 	argv += optind;
 
 	if (vmname == NULL)
 		usage(cpu_intel);
 
 	error = 0;
 
 	if (!error && create)
 		error = vm_create(vmname);
 
 	if (!error) {
 		ctx = vm_open(vmname);
 		if (ctx == NULL) {
 			printf("VM:%s is not created.\n", vmname);
 			exit (1);
 		}
 	}
 
 	if (!error && memsize)
 		error = vm_setup_memory(ctx, memsize, VM_MMAP_ALL);
 
 	if (!error && set_efer)
 		error = vm_set_register(ctx, vcpu, VM_REG_GUEST_EFER, efer);
 
 	if (!error && set_cr0)
 		error = vm_set_register(ctx, vcpu, VM_REG_GUEST_CR0, cr0);
 
 	if (!error && set_cr2)
 		error = vm_set_register(ctx, vcpu, VM_REG_GUEST_CR2, cr2);
 
 	if (!error && set_cr3)
 		error = vm_set_register(ctx, vcpu, VM_REG_GUEST_CR3, cr3);
 
 	if (!error && set_cr4)
 		error = vm_set_register(ctx, vcpu, VM_REG_GUEST_CR4, cr4);
 
 	if (!error && set_dr0)
 		error = vm_set_register(ctx, vcpu, VM_REG_GUEST_DR0, dr0);
 
 	if (!error && set_dr1)
 		error = vm_set_register(ctx, vcpu, VM_REG_GUEST_DR1, dr1);
 
 	if (!error && set_dr2)
 		error = vm_set_register(ctx, vcpu, VM_REG_GUEST_DR2, dr2);
 
 	if (!error && set_dr3)
 		error = vm_set_register(ctx, vcpu, VM_REG_GUEST_DR3, dr3);
 
 	if (!error && set_dr6)
 		error = vm_set_register(ctx, vcpu, VM_REG_GUEST_DR6, dr6);
 
 	if (!error && set_dr7)
 		error = vm_set_register(ctx, vcpu, VM_REG_GUEST_DR7, dr7);
 
 	if (!error && set_rsp)
 		error = vm_set_register(ctx, vcpu, VM_REG_GUEST_RSP, rsp);
 
 	if (!error && set_rip)
 		error = vm_set_register(ctx, vcpu, VM_REG_GUEST_RIP, rip);
 
 	if (!error && set_rax)
 		error = vm_set_register(ctx, vcpu, VM_REG_GUEST_RAX, rax);
 
 	if (!error && set_rflags) {
 		error = vm_set_register(ctx, vcpu, VM_REG_GUEST_RFLAGS,
 					rflags);
 	}
 
 	if (!error && set_desc_ds) {
 		error = vm_set_desc(ctx, vcpu, VM_REG_GUEST_DS,
 				    desc_base, desc_limit, desc_access);
 	}
 
 	if (!error && set_desc_es) {
 		error = vm_set_desc(ctx, vcpu, VM_REG_GUEST_ES,
 				    desc_base, desc_limit, desc_access);
 	}
 
 	if (!error && set_desc_ss) {
 		error = vm_set_desc(ctx, vcpu, VM_REG_GUEST_SS,
 				    desc_base, desc_limit, desc_access);
 	}
 
 	if (!error && set_desc_cs) {
 		error = vm_set_desc(ctx, vcpu, VM_REG_GUEST_CS,
 				    desc_base, desc_limit, desc_access);
 	}
 
 	if (!error && set_desc_fs) {
 		error = vm_set_desc(ctx, vcpu, VM_REG_GUEST_FS,
 				    desc_base, desc_limit, desc_access);
 	}
 
 	if (!error && set_desc_gs) {
 		error = vm_set_desc(ctx, vcpu, VM_REG_GUEST_GS,
 				    desc_base, desc_limit, desc_access);
 	}
 
 	if (!error && set_desc_tr) {
 		error = vm_set_desc(ctx, vcpu, VM_REG_GUEST_TR,
 				    desc_base, desc_limit, desc_access);
 	}
 
 	if (!error && set_desc_ldtr) {
 		error = vm_set_desc(ctx, vcpu, VM_REG_GUEST_LDTR,
 				    desc_base, desc_limit, desc_access);
 	}
 
 	if (!error && set_desc_gdtr) {
 		error = vm_set_desc(ctx, vcpu, VM_REG_GUEST_GDTR,
 				    desc_base, desc_limit, 0);
 	}
 
 	if (!error && set_desc_idtr) {
 		error = vm_set_desc(ctx, vcpu, VM_REG_GUEST_IDTR,
 				    desc_base, desc_limit, 0);
 	}
 
 	if (!error && set_cs)
 		error = vm_set_register(ctx, vcpu, VM_REG_GUEST_CS, cs);
 
 	if (!error && set_ds)
 		error = vm_set_register(ctx, vcpu, VM_REG_GUEST_DS, ds);
 
 	if (!error && set_es)
 		error = vm_set_register(ctx, vcpu, VM_REG_GUEST_ES, es);
 
 	if (!error && set_fs)
 		error = vm_set_register(ctx, vcpu, VM_REG_GUEST_FS, fs);
 
 	if (!error && set_gs)
 		error = vm_set_register(ctx, vcpu, VM_REG_GUEST_GS, gs);
 
 	if (!error && set_ss)
 		error = vm_set_register(ctx, vcpu, VM_REG_GUEST_SS, ss);
 
 	if (!error && set_tr)
 		error = vm_set_register(ctx, vcpu, VM_REG_GUEST_TR, tr);
 
 	if (!error && set_ldtr)
 		error = vm_set_register(ctx, vcpu, VM_REG_GUEST_LDTR, ldtr);
 
 	if (!error && set_x2apic_state)
 		error = vm_set_x2apic_state(ctx, vcpu, x2apic_state);
 
 	if (!error && unassign_pptdev)
 		error = vm_unassign_pptdev(ctx, bus, slot, func);
 
 	if (!error && set_exception_bitmap) {
 		if (cpu_intel)
 			error = vm_set_vmcs_field(ctx, vcpu,
 						  VMCS_EXCEPTION_BITMAP,
 						  exception_bitmap);
 		else
 			error = vm_set_vmcb_field(ctx, vcpu,
 						  VMCB_OFF_EXC_INTERCEPT,
 						  4, exception_bitmap);
 	}
 
 	if (!error && cpu_intel && set_vmcs_entry_interruption_info) {
 		error = vm_set_vmcs_field(ctx, vcpu, VMCS_ENTRY_INTR_INFO,
 					  vmcs_entry_interruption_info);
 	}
 
 	if (!error && inject_nmi) {
 		error = vm_inject_nmi(ctx, vcpu);
 	}
 
 	if (!error && assert_lapic_lvt != -1) {
 		error = vm_lapic_local_irq(ctx, vcpu, assert_lapic_lvt);
 	}
 
 	if (!error && (get_memseg || get_all))
 		error = show_memseg(ctx);
 
 	if (!error && (get_memmap || get_all))
 		error = show_memmap(ctx);
 
 	if (!error)
 		error = get_all_registers(ctx, vcpu);
 
 	if (!error)
 		error = get_all_segments(ctx, vcpu);
 
 	if (!error) {
 		if (cpu_intel)
 			error = get_misc_vmcs(ctx, vcpu);
 		else
 			error = get_misc_vmcb(ctx, vcpu);
 	}
 	
 	if (!error && (get_x2apic_state || get_all)) {
 		error = vm_get_x2apic_state(ctx, vcpu, &x2apic_state);
 		if (error == 0)
 			printf("x2apic_state[%d]\t%d\n", vcpu, x2apic_state);
 	}
 
 	if (!error && (get_eptp || get_all)) {
 		if (cpu_intel)
 			error = vm_get_vmcs_field(ctx, vcpu, VMCS_EPTP, &eptp);
 		else
 			error = vm_get_vmcb_field(ctx, vcpu, VMCB_OFF_NPT_BASE,
 						   8, &eptp);
 		if (error == 0)
 			printf("%s[%d]\t\t0x%016lx\n",
 				cpu_intel ? "eptp" : "rvi/npt", vcpu, eptp);
 	}
 
 	if (!error && (get_exception_bitmap || get_all)) {
 		if(cpu_intel)
 			error = vm_get_vmcs_field(ctx, vcpu,
 						VMCS_EXCEPTION_BITMAP, &bm);
 		else
 			error = vm_get_vmcb_field(ctx, vcpu,
 						  VMCB_OFF_EXC_INTERCEPT,
 						  4, &bm);
 		if (error == 0)
 			printf("exception_bitmap[%d]\t%#lx\n", vcpu, bm);
 	}
 
 	if (!error && (get_io_bitmap || get_all)) {
 		if (cpu_intel) {
 			error = vm_get_vmcs_field(ctx, vcpu, VMCS_IO_BITMAP_A,
 						  &bm);
 			if (error == 0)
 				printf("io_bitmap_a[%d]\t%#lx\n", vcpu, bm);
 			error = vm_get_vmcs_field(ctx, vcpu, VMCS_IO_BITMAP_B,
 						  &bm);
 			if (error == 0)
 				printf("io_bitmap_b[%d]\t%#lx\n", vcpu, bm);
 		} else {
 			error = vm_get_vmcb_field(ctx, vcpu,
 						  VMCB_OFF_IO_PERM, 8, &bm);
 			if (error == 0)
 				printf("io_bitmap[%d]\t%#lx\n", vcpu, bm);
 		}
 	}
 
 	if (!error && (get_tsc_offset || get_all)) {
 		uint64_t tscoff;
 		if (cpu_intel)
 			error = vm_get_vmcs_field(ctx, vcpu, VMCS_TSC_OFFSET,
 						  &tscoff);
 		else
 			error = vm_get_vmcb_field(ctx, vcpu,
 						  VMCB_OFF_TSC_OFFSET, 
 						  8, &tscoff);
 		if (error == 0)
 			printf("tsc_offset[%d]\t0x%016lx\n", vcpu, tscoff);
 	}
 
 	if (!error && (get_msr_bitmap_address || get_all)) {
 		if (cpu_intel)
 			error = vm_get_vmcs_field(ctx, vcpu, VMCS_MSR_BITMAP, 
 						  &addr);
 		else
 			error = vm_get_vmcb_field(ctx, vcpu,
 						  VMCB_OFF_MSR_PERM, 8, &addr);
 		if (error == 0)
 			printf("msr_bitmap[%d]\t\t%#lx\n", vcpu, addr);
 	}
 
 	if (!error && (get_msr_bitmap || get_all)) {
 		if (cpu_intel) {
 			error = vm_get_vmcs_field(ctx, vcpu, 
 						  VMCS_MSR_BITMAP, &addr);
 		} else {
 			error = vm_get_vmcb_field(ctx, vcpu,
 						  VMCB_OFF_MSR_PERM, 8,
 						  &addr);
 		}
 
 		if (error == 0)
 			error = dump_msr_bitmap(vcpu, addr, cpu_intel);
 	}
 
 	if (!error && (get_vpid_asid || get_all)) {
 		uint64_t vpid;
 		if (cpu_intel)
 			error = vm_get_vmcs_field(ctx, vcpu, VMCS_VPID, &vpid);
 		else
 			error = vm_get_vmcb_field(ctx, vcpu, VMCB_OFF_ASID, 
 						  4, &vpid);
 		if (error == 0)
 			printf("%s[%d]\t\t0x%04lx\n", 
 				cpu_intel ? "vpid" : "asid", vcpu, vpid);
 	}
 
 	if (!error && (get_guest_pat || get_all)) {
 		if (cpu_intel)
 			error = vm_get_vmcs_field(ctx, vcpu,
 						  VMCS_GUEST_IA32_PAT, &pat);
 		else
 			error = vm_get_vmcb_field(ctx, vcpu,
 						  VMCB_OFF_GUEST_PAT, 8, &pat);
 		if (error == 0)
 			printf("guest_pat[%d]\t\t0x%016lx\n", vcpu, pat);
 	}
 
 	if (!error && (get_guest_sysenter || get_all)) {
 		if (cpu_intel)
 			error = vm_get_vmcs_field(ctx, vcpu,
 						  VMCS_GUEST_IA32_SYSENTER_CS,
 						  &cs);
 		else
 			error = vm_get_vmcb_field(ctx, vcpu,
 						  VMCB_OFF_SYSENTER_CS, 8,
 						  &cs);
 
 		if (error == 0)
 			printf("guest_sysenter_cs[%d]\t%#lx\n", vcpu, cs);
 		if (cpu_intel)
 			error = vm_get_vmcs_field(ctx, vcpu,
 						  VMCS_GUEST_IA32_SYSENTER_ESP,
 						  &rsp);
 		else
 			error = vm_get_vmcb_field(ctx, vcpu,
 						  VMCB_OFF_SYSENTER_ESP, 8,
 						  &rsp);
 
 		if (error == 0)
 			printf("guest_sysenter_sp[%d]\t%#lx\n", vcpu, rsp);
 		if (cpu_intel)
 			error = vm_get_vmcs_field(ctx, vcpu,
 						  VMCS_GUEST_IA32_SYSENTER_EIP,
 						  &rip);
 		else
 			error = vm_get_vmcb_field(ctx, vcpu,
 						  VMCB_OFF_SYSENTER_EIP, 8, 
 						  &rip);
 		if (error == 0)
 			printf("guest_sysenter_ip[%d]\t%#lx\n", vcpu, rip);
 	}
 
 	if (!error && (get_exit_reason || get_all)) {
 		if (cpu_intel)
 			error = vm_get_vmcs_field(ctx, vcpu, VMCS_EXIT_REASON,
 						  &u64);
 		else	
 			error = vm_get_vmcb_field(ctx, vcpu,
 						  VMCB_OFF_EXIT_REASON, 8,
 						  &u64);
 		if (error == 0)
 			printf("exit_reason[%d]\t%#lx\n", vcpu, u64);
 	}
 
 	if (!error && setcap) {
 		int captype;
 		captype = vm_capability_name2type(capname);
 		error = vm_set_capability(ctx, vcpu, captype, capval);
 		if (error != 0 && errno == ENOENT)
 			printf("Capability \"%s\" is not available\n", capname);
 	}
 
 	if (!error && get_gpa_pmap) {
 		error = vm_get_gpa_pmap(ctx, gpa_pmap, pteval, &ptenum);
 		if (error == 0) {
 			printf("gpa %#lx:", gpa_pmap);
 			pte = &pteval[0];
 			while (ptenum-- > 0)
 				printf(" %#lx", *pte++);
 			printf("\n");
 		}
 	}
 
 	if (!error && set_rtc_nvram)
 		error = vm_rtc_write(ctx, rtc_nvram_offset, rtc_nvram_value);
 
 	if (!error && (get_rtc_nvram || get_all)) {
 		error = vm_rtc_read(ctx, rtc_nvram_offset, &rtc_nvram_value);
 		if (error == 0) {
 			printf("rtc nvram[%03d]: 0x%02x\n", rtc_nvram_offset,
 			    rtc_nvram_value);
 		}
 	}
 
 	if (!error && set_rtc_time)
 		error = vm_rtc_settime(ctx, rtc_secs);
 
 	if (!error && (get_rtc_time || get_all)) {
 		error = vm_rtc_gettime(ctx, &rtc_secs);
 		if (error == 0) {
 			gmtime_r(&rtc_secs, &tm);
 			printf("rtc time %#lx: %s %s %02d %02d:%02d:%02d %d\n",
 			    rtc_secs, wday_str(tm.tm_wday), mon_str(tm.tm_mon),
 			    tm.tm_mday, tm.tm_hour, tm.tm_min, tm.tm_sec,
 			    1900 + tm.tm_year);
 		}
 	}
 
 	if (!error && (getcap || get_all)) {
 		int captype, val, getcaptype;
 
 		if (getcap && capname)
 			getcaptype = vm_capability_name2type(capname);
 		else
 			getcaptype = -1;
 
 		for (captype = 0; captype < VM_CAP_MAX; captype++) {
 			if (getcaptype >= 0 && captype != getcaptype)
 				continue;
 			error = vm_get_capability(ctx, vcpu, captype, &val);
 			if (error == 0) {
 				printf("Capability \"%s\" is %s on vcpu %d\n",
 					vm_capability_type2name(captype),
 					val ? "set" : "not set", vcpu);
 			} else if (errno == ENOENT) {
 				error = 0;
 				printf("Capability \"%s\" is not available\n",
 					vm_capability_type2name(captype));
 			} else {
 				break;
 			}
 		}
 	}
 
 	if (!error && (get_active_cpus || get_all)) {
 		error = vm_active_cpus(ctx, &cpus);
 		if (!error)
 			print_cpus("active cpus", &cpus);
 	}
 
 	if (!error && (get_suspended_cpus || get_all)) {
 		error = vm_suspended_cpus(ctx, &cpus);
 		if (!error)
 			print_cpus("suspended cpus", &cpus);
 	}
 
 	if (!error && (get_intinfo || get_all)) {
 		error = vm_get_intinfo(ctx, vcpu, &info[0], &info[1]);
 		if (!error) {
 			print_intinfo("pending", info[0]);
 			print_intinfo("current", info[1]);
 		}
 	}
 
 	if (!error && (get_stats || get_all)) {
 		int i, num_stats;
 		uint64_t *stats;
 		struct timeval tv;
 		const char *desc;
 
 		stats = vm_get_stats(ctx, vcpu, &tv, &num_stats);
 		if (stats != NULL) {
 			printf("vcpu%d stats:\n", vcpu);
 			for (i = 0; i < num_stats; i++) {
 				desc = vm_get_stat_desc(ctx, i);
 				printf("%-40s\t%ld\n", desc, stats[i]);
 			}
 		}
+	}
+
+	if (!error && (get_cpu_topology || get_all)) {
+		uint16_t sockets, cores, threads, maxcpus;
+
+		vm_get_topology(ctx, &sockets, &cores, &threads, &maxcpus);
+		printf("cpu_topology:\tsockets=%hu, cores=%hu, threads=%hu, "
+		    "maxcpus=%hu\n", sockets, cores, threads, maxcpus);
 	}
 
 	if (!error && run) {
 		error = vm_run(ctx, vcpu, &vmexit);
 		if (error == 0)
 			dump_vm_run_exitcode(&vmexit, vcpu);
 		else
 			printf("vm_run error %d\n", error);
 	}
 
 	if (!error && force_reset)
 		error = vm_suspend(ctx, VM_SUSPEND_RESET);
 
 	if (!error && force_poweroff)
 		error = vm_suspend(ctx, VM_SUSPEND_POWEROFF);
 
 	if (error)
 		printf("errno = %d\n", errno);
 
 	if (!error && destroy)
 		vm_destroy(ctx);
 
 	free (opts);
 	exit(error);
 }