diff --git a/lib/libvmmapi/vmmapi.c b/lib/libvmmapi/vmmapi.c index 463ae7e5ec38..6fec469ac92f 100644 --- a/lib/libvmmapi/vmmapi.c +++ b/lib/libvmmapi/vmmapi.c @@ -1,852 +1,864 @@ /*- * Copyright (c) 2011 NetApp, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "vmmapi.h" #define MB (1024 * 1024UL) #define GB (1024 * 1024 * 1024UL) struct vmctx { int fd; uint32_t lowmem_limit; enum vm_mmap_style vms; size_t lowmem; char *lowmem_addr; size_t highmem; char *highmem_addr; char *name; }; #define CREATE(x) sysctlbyname("hw.vmm.create", NULL, NULL, (x), strlen((x))) #define DESTROY(x) sysctlbyname("hw.vmm.destroy", NULL, NULL, (x), strlen((x))) static int vm_device_open(const char *name) { int fd, len; char *vmfile; len = strlen("/dev/vmm/") + strlen(name) + 1; vmfile = malloc(len); assert(vmfile != NULL); snprintf(vmfile, len, "/dev/vmm/%s", name); /* Open the device file */ fd = open(vmfile, O_RDWR, 0); free(vmfile); return (fd); } int vm_create(const char *name) { return (CREATE((char *)name)); } struct vmctx * vm_open(const char *name) { struct vmctx *vm; vm = malloc(sizeof(struct vmctx) + strlen(name) + 1); assert(vm != NULL); vm->fd = -1; vm->lowmem_limit = 3 * GB; vm->name = (char *)(vm + 1); strcpy(vm->name, name); if ((vm->fd = vm_device_open(vm->name)) < 0) goto err; return (vm); err: vm_destroy(vm); return (NULL); } void vm_destroy(struct vmctx *vm) { assert(vm != NULL); if (vm->fd >= 0) close(vm->fd); DESTROY(vm->name); free(vm); } int vm_parse_memsize(const char *optarg, size_t *ret_memsize) { char *endptr; size_t optval; int error; optval = strtoul(optarg, &endptr, 0); if (*optarg != '\0' && *endptr == '\0') { /* * For the sake of backward compatibility if the memory size * specified on the command line is less than a megabyte then * it is interpreted as being in units of MB. */ if (optval < MB) optval *= MB; *ret_memsize = optval; error = 0; } else error = expand_number(optarg, ret_memsize); return (error); } int vm_get_memory_seg(struct vmctx *ctx, vm_paddr_t gpa, size_t *ret_len, int *wired) { int error; struct vm_memory_segment seg; bzero(&seg, sizeof(seg)); seg.gpa = gpa; error = ioctl(ctx->fd, VM_GET_MEMORY_SEG, &seg); *ret_len = seg.len; if (wired != NULL) *wired = seg.wired; return (error); } uint32_t vm_get_lowmem_limit(struct vmctx *ctx) { return (ctx->lowmem_limit); } void vm_set_lowmem_limit(struct vmctx *ctx, uint32_t limit) { ctx->lowmem_limit = limit; } static int setup_memory_segment(struct vmctx *ctx, vm_paddr_t gpa, size_t len, char **addr) { int error; struct vm_memory_segment seg; /* * Create and optionally map 'len' bytes of memory at guest * physical address 'gpa' */ bzero(&seg, sizeof(seg)); seg.gpa = gpa; seg.len = len; error = ioctl(ctx->fd, VM_MAP_MEMORY, &seg); if (error == 0 && addr != NULL) { *addr = mmap(NULL, len, PROT_READ | PROT_WRITE, MAP_SHARED, ctx->fd, gpa); } return (error); } int vm_setup_memory(struct vmctx *ctx, size_t memsize, enum vm_mmap_style vms) { char **addr; int error; /* XXX VM_MMAP_SPARSE not implemented yet */ assert(vms == VM_MMAP_NONE || vms == VM_MMAP_ALL); ctx->vms = vms; /* * If 'memsize' cannot fit entirely in the 'lowmem' segment then * create another 'highmem' segment above 4GB for the remainder. */ if (memsize > ctx->lowmem_limit) { ctx->lowmem = ctx->lowmem_limit; ctx->highmem = memsize - ctx->lowmem; } else { ctx->lowmem = memsize; ctx->highmem = 0; } if (ctx->lowmem > 0) { addr = (vms == VM_MMAP_ALL) ? &ctx->lowmem_addr : NULL; error = setup_memory_segment(ctx, 0, ctx->lowmem, addr); if (error) return (error); } if (ctx->highmem > 0) { addr = (vms == VM_MMAP_ALL) ? &ctx->highmem_addr : NULL; error = setup_memory_segment(ctx, 4*GB, ctx->highmem, addr); if (error) return (error); } return (0); } void * vm_map_gpa(struct vmctx *ctx, vm_paddr_t gaddr, size_t len) { /* XXX VM_MMAP_SPARSE not implemented yet */ assert(ctx->vms == VM_MMAP_ALL); if (gaddr < ctx->lowmem && gaddr + len <= ctx->lowmem) return ((void *)(ctx->lowmem_addr + gaddr)); if (gaddr >= 4*GB) { gaddr -= 4*GB; if (gaddr < ctx->highmem && gaddr + len <= ctx->highmem) return ((void *)(ctx->highmem_addr + gaddr)); } return (NULL); } int vm_set_desc(struct vmctx *ctx, int vcpu, int reg, uint64_t base, uint32_t limit, uint32_t access) { int error; struct vm_seg_desc vmsegdesc; bzero(&vmsegdesc, sizeof(vmsegdesc)); vmsegdesc.cpuid = vcpu; vmsegdesc.regnum = reg; vmsegdesc.desc.base = base; vmsegdesc.desc.limit = limit; vmsegdesc.desc.access = access; error = ioctl(ctx->fd, VM_SET_SEGMENT_DESCRIPTOR, &vmsegdesc); return (error); } int vm_get_desc(struct vmctx *ctx, int vcpu, int reg, uint64_t *base, uint32_t *limit, uint32_t *access) { int error; struct vm_seg_desc vmsegdesc; bzero(&vmsegdesc, sizeof(vmsegdesc)); vmsegdesc.cpuid = vcpu; vmsegdesc.regnum = reg; error = ioctl(ctx->fd, VM_GET_SEGMENT_DESCRIPTOR, &vmsegdesc); if (error == 0) { *base = vmsegdesc.desc.base; *limit = vmsegdesc.desc.limit; *access = vmsegdesc.desc.access; } return (error); } int vm_set_register(struct vmctx *ctx, int vcpu, int reg, uint64_t val) { int error; struct vm_register vmreg; bzero(&vmreg, sizeof(vmreg)); vmreg.cpuid = vcpu; vmreg.regnum = reg; vmreg.regval = val; error = ioctl(ctx->fd, VM_SET_REGISTER, &vmreg); return (error); } int vm_get_register(struct vmctx *ctx, int vcpu, int reg, uint64_t *ret_val) { int error; struct vm_register vmreg; bzero(&vmreg, sizeof(vmreg)); vmreg.cpuid = vcpu; vmreg.regnum = reg; error = ioctl(ctx->fd, VM_GET_REGISTER, &vmreg); *ret_val = vmreg.regval; return (error); } int vm_run(struct vmctx *ctx, int vcpu, uint64_t rip, struct vm_exit *vmexit) { int error; struct vm_run vmrun; bzero(&vmrun, sizeof(vmrun)); vmrun.cpuid = vcpu; vmrun.rip = rip; error = ioctl(ctx->fd, VM_RUN, &vmrun); bcopy(&vmrun.vm_exit, vmexit, sizeof(struct vm_exit)); return (error); } static int vm_inject_event_real(struct vmctx *ctx, int vcpu, enum vm_event_type type, int vector, int error_code, int error_code_valid) { struct vm_event ev; bzero(&ev, sizeof(ev)); ev.cpuid = vcpu; ev.type = type; ev.vector = vector; ev.error_code = error_code; ev.error_code_valid = error_code_valid; return (ioctl(ctx->fd, VM_INJECT_EVENT, &ev)); } int vm_inject_event(struct vmctx *ctx, int vcpu, enum vm_event_type type, int vector) { return (vm_inject_event_real(ctx, vcpu, type, vector, 0, 0)); } int vm_inject_event2(struct vmctx *ctx, int vcpu, enum vm_event_type type, int vector, int error_code) { return (vm_inject_event_real(ctx, vcpu, type, vector, error_code, 1)); } int vm_apicid2vcpu(struct vmctx *ctx, int apicid) { /* * The apic id associated with the 'vcpu' has the same numerical value * as the 'vcpu' itself. */ return (apicid); } int vm_lapic_irq(struct vmctx *ctx, int vcpu, int vector) { struct vm_lapic_irq vmirq; bzero(&vmirq, sizeof(vmirq)); vmirq.cpuid = vcpu; vmirq.vector = vector; return (ioctl(ctx->fd, VM_LAPIC_IRQ, &vmirq)); } +int +vm_lapic_local_irq(struct vmctx *ctx, int vcpu, int vector) +{ + struct vm_lapic_irq vmirq; + + bzero(&vmirq, sizeof(vmirq)); + vmirq.cpuid = vcpu; + vmirq.vector = vector; + + return (ioctl(ctx->fd, VM_LAPIC_LOCAL_IRQ, &vmirq)); +} + int vm_lapic_msi(struct vmctx *ctx, uint64_t addr, uint64_t msg) { struct vm_lapic_msi vmmsi; bzero(&vmmsi, sizeof(vmmsi)); vmmsi.addr = addr; vmmsi.msg = msg; return (ioctl(ctx->fd, VM_LAPIC_MSI, &vmmsi)); } int vm_ioapic_assert_irq(struct vmctx *ctx, int irq) { struct vm_ioapic_irq ioapic_irq; bzero(&ioapic_irq, sizeof(struct vm_ioapic_irq)); ioapic_irq.irq = irq; return (ioctl(ctx->fd, VM_IOAPIC_ASSERT_IRQ, &ioapic_irq)); } int vm_ioapic_deassert_irq(struct vmctx *ctx, int irq) { struct vm_ioapic_irq ioapic_irq; bzero(&ioapic_irq, sizeof(struct vm_ioapic_irq)); ioapic_irq.irq = irq; return (ioctl(ctx->fd, VM_IOAPIC_DEASSERT_IRQ, &ioapic_irq)); } int vm_ioapic_pulse_irq(struct vmctx *ctx, int irq) { struct vm_ioapic_irq ioapic_irq; bzero(&ioapic_irq, sizeof(struct vm_ioapic_irq)); ioapic_irq.irq = irq; return (ioctl(ctx->fd, VM_IOAPIC_PULSE_IRQ, &ioapic_irq)); } int vm_inject_nmi(struct vmctx *ctx, int vcpu) { struct vm_nmi vmnmi; bzero(&vmnmi, sizeof(vmnmi)); vmnmi.cpuid = vcpu; return (ioctl(ctx->fd, VM_INJECT_NMI, &vmnmi)); } static struct { const char *name; int type; } capstrmap[] = { { "hlt_exit", VM_CAP_HALT_EXIT }, { "mtrap_exit", VM_CAP_MTRAP_EXIT }, { "pause_exit", VM_CAP_PAUSE_EXIT }, { "unrestricted_guest", VM_CAP_UNRESTRICTED_GUEST }, { "enable_invpcid", VM_CAP_ENABLE_INVPCID }, { 0 } }; int vm_capability_name2type(const char *capname) { int i; for (i = 0; capstrmap[i].name != NULL && capname != NULL; i++) { if (strcmp(capstrmap[i].name, capname) == 0) return (capstrmap[i].type); } return (-1); } const char * vm_capability_type2name(int type) { int i; for (i = 0; capstrmap[i].name != NULL; i++) { if (capstrmap[i].type == type) return (capstrmap[i].name); } return (NULL); } int vm_get_capability(struct vmctx *ctx, int vcpu, enum vm_cap_type cap, int *retval) { int error; struct vm_capability vmcap; bzero(&vmcap, sizeof(vmcap)); vmcap.cpuid = vcpu; vmcap.captype = cap; error = ioctl(ctx->fd, VM_GET_CAPABILITY, &vmcap); *retval = vmcap.capval; return (error); } int vm_set_capability(struct vmctx *ctx, int vcpu, enum vm_cap_type cap, int val) { struct vm_capability vmcap; bzero(&vmcap, sizeof(vmcap)); vmcap.cpuid = vcpu; vmcap.captype = cap; vmcap.capval = val; return (ioctl(ctx->fd, VM_SET_CAPABILITY, &vmcap)); } int vm_assign_pptdev(struct vmctx *ctx, int bus, int slot, int func) { struct vm_pptdev pptdev; bzero(&pptdev, sizeof(pptdev)); pptdev.bus = bus; pptdev.slot = slot; pptdev.func = func; return (ioctl(ctx->fd, VM_BIND_PPTDEV, &pptdev)); } int vm_unassign_pptdev(struct vmctx *ctx, int bus, int slot, int func) { struct vm_pptdev pptdev; bzero(&pptdev, sizeof(pptdev)); pptdev.bus = bus; pptdev.slot = slot; pptdev.func = func; return (ioctl(ctx->fd, VM_UNBIND_PPTDEV, &pptdev)); } int vm_map_pptdev_mmio(struct vmctx *ctx, int bus, int slot, int func, vm_paddr_t gpa, size_t len, vm_paddr_t hpa) { struct vm_pptdev_mmio pptmmio; bzero(&pptmmio, sizeof(pptmmio)); pptmmio.bus = bus; pptmmio.slot = slot; pptmmio.func = func; pptmmio.gpa = gpa; pptmmio.len = len; pptmmio.hpa = hpa; return (ioctl(ctx->fd, VM_MAP_PPTDEV_MMIO, &pptmmio)); } int vm_setup_pptdev_msi(struct vmctx *ctx, int vcpu, int bus, int slot, int func, uint64_t addr, uint64_t msg, int numvec) { struct vm_pptdev_msi pptmsi; bzero(&pptmsi, sizeof(pptmsi)); pptmsi.vcpu = vcpu; pptmsi.bus = bus; pptmsi.slot = slot; pptmsi.func = func; pptmsi.msg = msg; pptmsi.addr = addr; pptmsi.numvec = numvec; return (ioctl(ctx->fd, VM_PPTDEV_MSI, &pptmsi)); } int vm_setup_pptdev_msix(struct vmctx *ctx, int vcpu, int bus, int slot, int func, int idx, uint64_t addr, uint64_t msg, uint32_t vector_control) { struct vm_pptdev_msix pptmsix; bzero(&pptmsix, sizeof(pptmsix)); pptmsix.vcpu = vcpu; pptmsix.bus = bus; pptmsix.slot = slot; pptmsix.func = func; pptmsix.idx = idx; pptmsix.msg = msg; pptmsix.addr = addr; pptmsix.vector_control = vector_control; return ioctl(ctx->fd, VM_PPTDEV_MSIX, &pptmsix); } uint64_t * vm_get_stats(struct vmctx *ctx, int vcpu, struct timeval *ret_tv, int *ret_entries) { int error; static struct vm_stats vmstats; vmstats.cpuid = vcpu; error = ioctl(ctx->fd, VM_STATS, &vmstats); if (error == 0) { if (ret_entries) *ret_entries = vmstats.num_entries; if (ret_tv) *ret_tv = vmstats.tv; return (vmstats.statbuf); } else return (NULL); } const char * vm_get_stat_desc(struct vmctx *ctx, int index) { static struct vm_stat_desc statdesc; statdesc.index = index; if (ioctl(ctx->fd, VM_STAT_DESC, &statdesc) == 0) return (statdesc.desc); else return (NULL); } int vm_get_x2apic_state(struct vmctx *ctx, int vcpu, enum x2apic_state *state) { int error; struct vm_x2apic x2apic; bzero(&x2apic, sizeof(x2apic)); x2apic.cpuid = vcpu; error = ioctl(ctx->fd, VM_GET_X2APIC_STATE, &x2apic); *state = x2apic.state; return (error); } int vm_set_x2apic_state(struct vmctx *ctx, int vcpu, enum x2apic_state state) { int error; struct vm_x2apic x2apic; bzero(&x2apic, sizeof(x2apic)); x2apic.cpuid = vcpu; x2apic.state = state; error = ioctl(ctx->fd, VM_SET_X2APIC_STATE, &x2apic); return (error); } /* * From Intel Vol 3a: * Table 9-1. IA-32 Processor States Following Power-up, Reset or INIT */ int vcpu_reset(struct vmctx *vmctx, int vcpu) { int error; uint64_t rflags, rip, cr0, cr4, zero, desc_base, rdx; uint32_t desc_access, desc_limit; uint16_t sel; zero = 0; rflags = 0x2; error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RFLAGS, rflags); if (error) goto done; rip = 0xfff0; if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RIP, rip)) != 0) goto done; cr0 = CR0_NE; if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_CR0, cr0)) != 0) goto done; if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_CR3, zero)) != 0) goto done; cr4 = 0; if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_CR4, cr4)) != 0) goto done; /* * CS: present, r/w, accessed, 16-bit, byte granularity, usable */ desc_base = 0xffff0000; desc_limit = 0xffff; desc_access = 0x0093; error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_CS, desc_base, desc_limit, desc_access); if (error) goto done; sel = 0xf000; if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_CS, sel)) != 0) goto done; /* * SS,DS,ES,FS,GS: present, r/w, accessed, 16-bit, byte granularity */ desc_base = 0; desc_limit = 0xffff; desc_access = 0x0093; error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_SS, desc_base, desc_limit, desc_access); if (error) goto done; error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_DS, desc_base, desc_limit, desc_access); if (error) goto done; error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_ES, desc_base, desc_limit, desc_access); if (error) goto done; error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_FS, desc_base, desc_limit, desc_access); if (error) goto done; error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_GS, desc_base, desc_limit, desc_access); if (error) goto done; sel = 0; if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_SS, sel)) != 0) goto done; if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_DS, sel)) != 0) goto done; if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_ES, sel)) != 0) goto done; if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_FS, sel)) != 0) goto done; if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_GS, sel)) != 0) goto done; /* General purpose registers */ rdx = 0xf00; if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RAX, zero)) != 0) goto done; if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RBX, zero)) != 0) goto done; if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RCX, zero)) != 0) goto done; if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RDX, rdx)) != 0) goto done; if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RSI, zero)) != 0) goto done; if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RDI, zero)) != 0) goto done; if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RBP, zero)) != 0) goto done; if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RSP, zero)) != 0) goto done; /* GDTR, IDTR */ desc_base = 0; desc_limit = 0xffff; desc_access = 0; error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_GDTR, desc_base, desc_limit, desc_access); if (error != 0) goto done; error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_IDTR, desc_base, desc_limit, desc_access); if (error != 0) goto done; /* TR */ desc_base = 0; desc_limit = 0xffff; desc_access = 0x0000008b; error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_TR, 0, 0, desc_access); if (error) goto done; sel = 0; if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_TR, sel)) != 0) goto done; /* LDTR */ desc_base = 0; desc_limit = 0xffff; desc_access = 0x00000082; error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_LDTR, desc_base, desc_limit, desc_access); if (error) goto done; sel = 0; if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_LDTR, 0)) != 0) goto done; /* XXX cr2, debug registers */ error = 0; done: return (error); } int vm_get_gpa_pmap(struct vmctx *ctx, uint64_t gpa, uint64_t *pte, int *num) { int error, i; struct vm_gpa_pte gpapte; bzero(&gpapte, sizeof(gpapte)); gpapte.gpa = gpa; error = ioctl(ctx->fd, VM_GET_GPA_PMAP, &gpapte); if (error == 0) { *num = gpapte.ptenum; for (i = 0; i < gpapte.ptenum; i++) pte[i] = gpapte.pte[i]; } return (error); } int vm_get_hpet_capabilities(struct vmctx *ctx, uint32_t *capabilities) { int error; struct vm_hpet_cap cap; bzero(&cap, sizeof(struct vm_hpet_cap)); error = ioctl(ctx->fd, VM_GET_HPET_CAPABILITIES, &cap); if (capabilities != NULL) *capabilities = cap.capabilities; return (error); } diff --git a/lib/libvmmapi/vmmapi.h b/lib/libvmmapi/vmmapi.h index 52e7e92bebbd..69762c7cf995 100644 --- a/lib/libvmmapi/vmmapi.h +++ b/lib/libvmmapi/vmmapi.h @@ -1,113 +1,114 @@ /*- * Copyright (c) 2011 NetApp, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _VMMAPI_H_ #define _VMMAPI_H_ struct vmctx; enum x2apic_state; /* * Different styles of mapping the memory assigned to a VM into the address * space of the controlling process. */ enum vm_mmap_style { VM_MMAP_NONE, /* no mapping */ VM_MMAP_ALL, /* fully and statically mapped */ VM_MMAP_SPARSE, /* mappings created on-demand */ }; int vm_create(const char *name); struct vmctx *vm_open(const char *name); void vm_destroy(struct vmctx *ctx); int vm_parse_memsize(const char *optarg, size_t *memsize); int vm_get_memory_seg(struct vmctx *ctx, vm_paddr_t gpa, size_t *ret_len, int *wired); int vm_setup_memory(struct vmctx *ctx, size_t len, enum vm_mmap_style s); void *vm_map_gpa(struct vmctx *ctx, vm_paddr_t gaddr, size_t len); int vm_get_gpa_pmap(struct vmctx *, uint64_t gpa, uint64_t *pte, int *num); uint32_t vm_get_lowmem_limit(struct vmctx *ctx); void vm_set_lowmem_limit(struct vmctx *ctx, uint32_t limit); int vm_set_desc(struct vmctx *ctx, int vcpu, int reg, uint64_t base, uint32_t limit, uint32_t access); int vm_get_desc(struct vmctx *ctx, int vcpu, int reg, uint64_t *base, uint32_t *limit, uint32_t *access); int vm_set_register(struct vmctx *ctx, int vcpu, int reg, uint64_t val); int vm_get_register(struct vmctx *ctx, int vcpu, int reg, uint64_t *retval); int vm_run(struct vmctx *ctx, int vcpu, uint64_t rip, struct vm_exit *ret_vmexit); int vm_apicid2vcpu(struct vmctx *ctx, int apicid); int vm_inject_event(struct vmctx *ctx, int vcpu, enum vm_event_type type, int vector); int vm_inject_event2(struct vmctx *ctx, int vcpu, enum vm_event_type type, int vector, int error_code); int vm_lapic_irq(struct vmctx *ctx, int vcpu, int vector); +int vm_lapic_local_irq(struct vmctx *ctx, int vcpu, int vector); int vm_lapic_msi(struct vmctx *ctx, uint64_t addr, uint64_t msg); int vm_ioapic_assert_irq(struct vmctx *ctx, int irq); int vm_ioapic_deassert_irq(struct vmctx *ctx, int irq); int vm_ioapic_pulse_irq(struct vmctx *ctx, int irq); int vm_inject_nmi(struct vmctx *ctx, int vcpu); int vm_capability_name2type(const char *capname); const char *vm_capability_type2name(int type); int vm_get_capability(struct vmctx *ctx, int vcpu, enum vm_cap_type cap, int *retval); int vm_set_capability(struct vmctx *ctx, int vcpu, enum vm_cap_type cap, int val); int vm_assign_pptdev(struct vmctx *ctx, int bus, int slot, int func); int vm_unassign_pptdev(struct vmctx *ctx, int bus, int slot, int func); int vm_map_pptdev_mmio(struct vmctx *ctx, int bus, int slot, int func, vm_paddr_t gpa, size_t len, vm_paddr_t hpa); int vm_setup_pptdev_msi(struct vmctx *ctx, int vcpu, int bus, int slot, int func, uint64_t addr, uint64_t msg, int numvec); int vm_setup_pptdev_msix(struct vmctx *ctx, int vcpu, int bus, int slot, int func, int idx, uint64_t addr, uint64_t msg, uint32_t vector_control); /* * Return a pointer to the statistics buffer. Note that this is not MT-safe. */ uint64_t *vm_get_stats(struct vmctx *ctx, int vcpu, struct timeval *ret_tv, int *ret_entries); const char *vm_get_stat_desc(struct vmctx *ctx, int index); int vm_get_x2apic_state(struct vmctx *ctx, int vcpu, enum x2apic_state *s); int vm_set_x2apic_state(struct vmctx *ctx, int vcpu, enum x2apic_state s); int vm_get_hpet_capabilities(struct vmctx *ctx, uint32_t *capabilities); /* Reset vcpu register state */ int vcpu_reset(struct vmctx *ctx, int vcpu); /* * FreeBSD specific APIs */ int vm_setup_freebsd_registers(struct vmctx *ctx, int vcpu, uint64_t rip, uint64_t cr3, uint64_t gdtbase, uint64_t rsp); void vm_setup_freebsd_gdt(uint64_t *gdtr); #endif /* _VMMAPI_H_ */ diff --git a/sys/amd64/include/vmm_dev.h b/sys/amd64/include/vmm_dev.h index b71b745795e9..454c411e5fae 100644 --- a/sys/amd64/include/vmm_dev.h +++ b/sys/amd64/include/vmm_dev.h @@ -1,256 +1,259 @@ /*- * Copyright (c) 2011 NetApp, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _VMM_DEV_H_ #define _VMM_DEV_H_ #ifdef _KERNEL void vmmdev_init(void); int vmmdev_cleanup(void); #endif struct vm_memory_segment { vm_paddr_t gpa; /* in */ size_t len; int wired; }; struct vm_register { int cpuid; int regnum; /* enum vm_reg_name */ uint64_t regval; }; struct vm_seg_desc { /* data or code segment */ int cpuid; int regnum; /* enum vm_reg_name */ struct seg_desc desc; }; struct vm_run { int cpuid; uint64_t rip; /* start running here */ struct vm_exit vm_exit; }; struct vm_event { int cpuid; enum vm_event_type type; int vector; uint32_t error_code; int error_code_valid; }; struct vm_lapic_msi { uint64_t msg; uint64_t addr; }; struct vm_lapic_irq { int cpuid; int vector; }; struct vm_ioapic_irq { int irq; }; struct vm_capability { int cpuid; enum vm_cap_type captype; int capval; int allcpus; }; struct vm_pptdev { int bus; int slot; int func; }; struct vm_pptdev_mmio { int bus; int slot; int func; vm_paddr_t gpa; vm_paddr_t hpa; size_t len; }; struct vm_pptdev_msi { int vcpu; int bus; int slot; int func; int numvec; /* 0 means disabled */ uint64_t msg; uint64_t addr; }; struct vm_pptdev_msix { int vcpu; int bus; int slot; int func; int idx; uint64_t msg; uint32_t vector_control; uint64_t addr; }; struct vm_nmi { int cpuid; }; #define MAX_VM_STATS 64 struct vm_stats { int cpuid; /* in */ int num_entries; /* out */ struct timeval tv; uint64_t statbuf[MAX_VM_STATS]; }; struct vm_stat_desc { int index; /* in */ char desc[128]; /* out */ }; struct vm_x2apic { int cpuid; enum x2apic_state state; }; struct vm_gpa_pte { uint64_t gpa; /* in */ uint64_t pte[4]; /* out */ int ptenum; }; struct vm_hpet_cap { uint32_t capabilities; /* lower 32 bits of HPET capabilities */ }; enum { /* general routines */ IOCNUM_ABIVERS = 0, IOCNUM_RUN = 1, IOCNUM_SET_CAPABILITY = 2, IOCNUM_GET_CAPABILITY = 3, /* memory apis */ IOCNUM_MAP_MEMORY = 10, IOCNUM_GET_MEMORY_SEG = 11, IOCNUM_GET_GPA_PMAP = 12, /* register/state accessors */ IOCNUM_SET_REGISTER = 20, IOCNUM_GET_REGISTER = 21, IOCNUM_SET_SEGMENT_DESCRIPTOR = 22, IOCNUM_GET_SEGMENT_DESCRIPTOR = 23, /* interrupt injection */ IOCNUM_INJECT_EVENT = 30, IOCNUM_LAPIC_IRQ = 31, IOCNUM_INJECT_NMI = 32, IOCNUM_IOAPIC_ASSERT_IRQ = 33, IOCNUM_IOAPIC_DEASSERT_IRQ = 34, IOCNUM_IOAPIC_PULSE_IRQ = 35, IOCNUM_LAPIC_MSI = 36, + IOCNUM_LAPIC_LOCAL_IRQ = 37, /* PCI pass-thru */ IOCNUM_BIND_PPTDEV = 40, IOCNUM_UNBIND_PPTDEV = 41, IOCNUM_MAP_PPTDEV_MMIO = 42, IOCNUM_PPTDEV_MSI = 43, IOCNUM_PPTDEV_MSIX = 44, /* statistics */ IOCNUM_VM_STATS = 50, IOCNUM_VM_STAT_DESC = 51, /* kernel device state */ IOCNUM_SET_X2APIC_STATE = 60, IOCNUM_GET_X2APIC_STATE = 61, IOCNUM_GET_HPET_CAPABILITIES = 62, }; #define VM_RUN \ _IOWR('v', IOCNUM_RUN, struct vm_run) #define VM_MAP_MEMORY \ _IOWR('v', IOCNUM_MAP_MEMORY, struct vm_memory_segment) #define VM_GET_MEMORY_SEG \ _IOWR('v', IOCNUM_GET_MEMORY_SEG, struct vm_memory_segment) #define VM_SET_REGISTER \ _IOW('v', IOCNUM_SET_REGISTER, struct vm_register) #define VM_GET_REGISTER \ _IOWR('v', IOCNUM_GET_REGISTER, struct vm_register) #define VM_SET_SEGMENT_DESCRIPTOR \ _IOW('v', IOCNUM_SET_SEGMENT_DESCRIPTOR, struct vm_seg_desc) #define VM_GET_SEGMENT_DESCRIPTOR \ _IOWR('v', IOCNUM_GET_SEGMENT_DESCRIPTOR, struct vm_seg_desc) #define VM_INJECT_EVENT \ _IOW('v', IOCNUM_INJECT_EVENT, struct vm_event) #define VM_LAPIC_IRQ \ _IOW('v', IOCNUM_LAPIC_IRQ, struct vm_lapic_irq) +#define VM_LAPIC_LOCAL_IRQ \ + _IOW('v', IOCNUM_LAPIC_LOCAL_IRQ, struct vm_lapic_irq) #define VM_LAPIC_MSI \ _IOW('v', IOCNUM_LAPIC_MSI, struct vm_lapic_msi) #define VM_IOAPIC_ASSERT_IRQ \ _IOW('v', IOCNUM_IOAPIC_ASSERT_IRQ, struct vm_ioapic_irq) #define VM_IOAPIC_DEASSERT_IRQ \ _IOW('v', IOCNUM_IOAPIC_DEASSERT_IRQ, struct vm_ioapic_irq) #define VM_IOAPIC_PULSE_IRQ \ _IOW('v', IOCNUM_IOAPIC_PULSE_IRQ, struct vm_ioapic_irq) #define VM_SET_CAPABILITY \ _IOW('v', IOCNUM_SET_CAPABILITY, struct vm_capability) #define VM_GET_CAPABILITY \ _IOWR('v', IOCNUM_GET_CAPABILITY, struct vm_capability) #define VM_BIND_PPTDEV \ _IOW('v', IOCNUM_BIND_PPTDEV, struct vm_pptdev) #define VM_UNBIND_PPTDEV \ _IOW('v', IOCNUM_UNBIND_PPTDEV, struct vm_pptdev) #define VM_MAP_PPTDEV_MMIO \ _IOW('v', IOCNUM_MAP_PPTDEV_MMIO, struct vm_pptdev_mmio) #define VM_PPTDEV_MSI \ _IOW('v', IOCNUM_PPTDEV_MSI, struct vm_pptdev_msi) #define VM_PPTDEV_MSIX \ _IOW('v', IOCNUM_PPTDEV_MSIX, struct vm_pptdev_msix) #define VM_INJECT_NMI \ _IOW('v', IOCNUM_INJECT_NMI, struct vm_nmi) #define VM_STATS \ _IOWR('v', IOCNUM_VM_STATS, struct vm_stats) #define VM_STAT_DESC \ _IOWR('v', IOCNUM_VM_STAT_DESC, struct vm_stat_desc) #define VM_SET_X2APIC_STATE \ _IOW('v', IOCNUM_SET_X2APIC_STATE, struct vm_x2apic) #define VM_GET_X2APIC_STATE \ _IOWR('v', IOCNUM_GET_X2APIC_STATE, struct vm_x2apic) #define VM_GET_HPET_CAPABILITIES \ _IOR('v', IOCNUM_GET_HPET_CAPABILITIES, struct vm_hpet_cap) #define VM_GET_GPA_PMAP \ _IOWR('v', IOCNUM_GET_GPA_PMAP, struct vm_gpa_pte) #endif diff --git a/sys/amd64/vmm/io/vlapic.c b/sys/amd64/vmm/io/vlapic.c index 39db9c9f1b65..695040dc2118 100644 --- a/sys/amd64/vmm/io/vlapic.c +++ b/sys/amd64/vmm/io/vlapic.c @@ -1,1255 +1,1391 @@ /*- * Copyright (c) 2011 NetApp, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include "vmm_stat.h" #include "vmm_lapic.h" #include "vmm_ktr.h" #include "vlapic.h" #include "vioapic.h" #define VLAPIC_CTR0(vlapic, format) \ VCPU_CTR0((vlapic)->vm, (vlapic)->vcpuid, format) #define VLAPIC_CTR1(vlapic, format, p1) \ VCPU_CTR1((vlapic)->vm, (vlapic)->vcpuid, format, p1) #define VLAPIC_CTR2(vlapic, format, p1, p2) \ VCPU_CTR2((vlapic)->vm, (vlapic)->vcpuid, format, p1, p2) #define VLAPIC_CTR_IRR(vlapic, msg) \ do { \ uint32_t *irrptr = &(vlapic)->apic.irr0; \ irrptr[0] = irrptr[0]; /* silence compiler */ \ VLAPIC_CTR1((vlapic), msg " irr0 0x%08x", irrptr[0 << 2]); \ VLAPIC_CTR1((vlapic), msg " irr1 0x%08x", irrptr[1 << 2]); \ VLAPIC_CTR1((vlapic), msg " irr2 0x%08x", irrptr[2 << 2]); \ VLAPIC_CTR1((vlapic), msg " irr3 0x%08x", irrptr[3 << 2]); \ VLAPIC_CTR1((vlapic), msg " irr4 0x%08x", irrptr[4 << 2]); \ VLAPIC_CTR1((vlapic), msg " irr5 0x%08x", irrptr[5 << 2]); \ VLAPIC_CTR1((vlapic), msg " irr6 0x%08x", irrptr[6 << 2]); \ VLAPIC_CTR1((vlapic), msg " irr7 0x%08x", irrptr[7 << 2]); \ } while (0) #define VLAPIC_CTR_ISR(vlapic, msg) \ do { \ uint32_t *isrptr = &(vlapic)->apic.isr0; \ isrptr[0] = isrptr[0]; /* silence compiler */ \ VLAPIC_CTR1((vlapic), msg " isr0 0x%08x", isrptr[0 << 2]); \ VLAPIC_CTR1((vlapic), msg " isr1 0x%08x", isrptr[1 << 2]); \ VLAPIC_CTR1((vlapic), msg " isr2 0x%08x", isrptr[2 << 2]); \ VLAPIC_CTR1((vlapic), msg " isr3 0x%08x", isrptr[3 << 2]); \ VLAPIC_CTR1((vlapic), msg " isr4 0x%08x", isrptr[4 << 2]); \ VLAPIC_CTR1((vlapic), msg " isr5 0x%08x", isrptr[5 << 2]); \ VLAPIC_CTR1((vlapic), msg " isr6 0x%08x", isrptr[6 << 2]); \ VLAPIC_CTR1((vlapic), msg " isr7 0x%08x", isrptr[7 << 2]); \ } while (0) static MALLOC_DEFINE(M_VLAPIC, "vlapic", "vlapic"); #define PRIO(x) ((x) >> 4) #define VLAPIC_VERSION (16) -#define VLAPIC_MAXLVT_ENTRIES (5) +#define VLAPIC_MAXLVT_ENTRIES (APIC_LVT_CMCI) #define x2apic(vlapic) (((vlapic)->msr_apicbase & APICBASE_X2APIC) ? 1 : 0) enum boot_state { BS_INIT, BS_SIPI, BS_RUNNING }; struct vlapic { struct vm *vm; int vcpuid; struct LAPIC apic; - int esr_update; + uint32_t esr_pending; + int esr_firing; struct callout callout; /* vlapic timer */ struct bintime timer_fire_bt; /* callout expiry time */ struct bintime timer_freq_bt; /* timer frequency */ struct bintime timer_period_bt; /* timer period */ struct mtx timer_mtx; /* * The 'isrvec_stk' is a stack of vectors injected by the local apic. * A vector is popped from the stack when the processor does an EOI. * The vector on the top of the stack is used to compute the * Processor Priority in conjunction with the TPR. */ uint8_t isrvec_stk[ISRVEC_STK_SIZE]; int isrvec_stk_top; uint64_t msr_apicbase; enum boot_state boot_state; }; /* * The 'vlapic->timer_mtx' is used to provide mutual exclusion between the * vlapic_callout_handler() and vcpu accesses to the following registers: * - initial count register aka icr_timer * - current count register aka ccr_timer * - divide config register aka dcr_timer * - timer LVT register * * Note that the vlapic_callout_handler() does not write to any of these * registers so they can be safely read from the vcpu context without locking. */ #define VLAPIC_TIMER_LOCK(vlapic) mtx_lock_spin(&((vlapic)->timer_mtx)) #define VLAPIC_TIMER_UNLOCK(vlapic) mtx_unlock_spin(&((vlapic)->timer_mtx)) #define VLAPIC_TIMER_LOCKED(vlapic) mtx_owned(&((vlapic)->timer_mtx)) #define VLAPIC_BUS_FREQ tsc_freq static __inline uint32_t vlapic_get_id(struct vlapic *vlapic) { if (x2apic(vlapic)) return (vlapic->vcpuid); else return (vlapic->vcpuid << 24); } static __inline uint32_t vlapic_get_ldr(struct vlapic *vlapic) { struct LAPIC *lapic; int apicid; uint32_t ldr; lapic = &vlapic->apic; if (x2apic(vlapic)) { apicid = vlapic_get_id(vlapic); ldr = 1 << (apicid & 0xf); ldr |= (apicid & 0xffff0) << 12; return (ldr); } else return (lapic->ldr); } static __inline uint32_t vlapic_get_dfr(struct vlapic *vlapic) { struct LAPIC *lapic; lapic = &vlapic->apic; if (x2apic(vlapic)) return (0); else return (lapic->dfr); } static void vlapic_set_dfr(struct vlapic *vlapic, uint32_t data) { uint32_t dfr; struct LAPIC *lapic; if (x2apic(vlapic)) { VM_CTR1(vlapic->vm, "write to DFR in x2apic mode: %#x", data); return; } lapic = &vlapic->apic; dfr = (lapic->dfr & APIC_DFR_RESERVED) | (data & APIC_DFR_MODEL_MASK); if ((dfr & APIC_DFR_MODEL_MASK) == APIC_DFR_MODEL_FLAT) VLAPIC_CTR0(vlapic, "vlapic DFR in Flat Model"); else if ((dfr & APIC_DFR_MODEL_MASK) == APIC_DFR_MODEL_CLUSTER) VLAPIC_CTR0(vlapic, "vlapic DFR in Cluster Model"); else VLAPIC_CTR1(vlapic, "vlapic DFR in Unknown Model %#x", dfr); lapic->dfr = dfr; } static void vlapic_set_ldr(struct vlapic *vlapic, uint32_t data) { struct LAPIC *lapic; /* LDR is read-only in x2apic mode */ if (x2apic(vlapic)) { VLAPIC_CTR1(vlapic, "write to LDR in x2apic mode: %#x", data); return; } lapic = &vlapic->apic; lapic->ldr = data & ~APIC_LDR_RESERVED; VLAPIC_CTR1(vlapic, "vlapic LDR set to %#x", lapic->ldr); } static int vlapic_timer_divisor(uint32_t dcr) { switch (dcr & 0xB) { case APIC_TDCR_1: return (1); case APIC_TDCR_2: return (2); case APIC_TDCR_4: return (4); case APIC_TDCR_8: return (8); case APIC_TDCR_16: return (16); case APIC_TDCR_32: return (32); case APIC_TDCR_64: return (64); case APIC_TDCR_128: return (128); default: panic("vlapic_timer_divisor: invalid dcr 0x%08x", dcr); } } static void vlapic_mask_lvts(uint32_t *lvts, int num_lvt) { int i; for (i = 0; i < num_lvt; i++) { *lvts |= APIC_LVT_M; lvts += 4; } } #if 0 static inline void vlapic_dump_lvt(uint32_t offset, uint32_t *lvt) { printf("Offset %x: lvt %08x (V:%02x DS:%x M:%x)\n", offset, *lvt, *lvt & APIC_LVTT_VECTOR, *lvt & APIC_LVTT_DS, *lvt & APIC_LVTT_M); } #endif static uint32_t vlapic_get_ccr(struct vlapic *vlapic) { struct bintime bt_now, bt_rem; struct LAPIC *lapic; uint32_t ccr; ccr = 0; lapic = &vlapic->apic; VLAPIC_TIMER_LOCK(vlapic); if (callout_active(&vlapic->callout)) { /* * If the timer is scheduled to expire in the future then * compute the value of 'ccr' based on the remaining time. */ binuptime(&bt_now); if (bintime_cmp(&vlapic->timer_fire_bt, &bt_now, >)) { bt_rem = vlapic->timer_fire_bt; bintime_sub(&bt_rem, &bt_now); ccr += bt_rem.sec * BT2FREQ(&vlapic->timer_freq_bt); ccr += bt_rem.frac / vlapic->timer_freq_bt.frac; } } KASSERT(ccr <= lapic->icr_timer, ("vlapic_get_ccr: invalid ccr %#x, " "icr_timer is %#x", ccr, lapic->icr_timer)); VLAPIC_CTR2(vlapic, "vlapic ccr_timer = %#x, icr_timer = %#x", ccr, lapic->icr_timer); VLAPIC_TIMER_UNLOCK(vlapic); return (ccr); } static void vlapic_set_dcr(struct vlapic *vlapic, uint32_t dcr) { struct LAPIC *lapic; int divisor; lapic = &vlapic->apic; VLAPIC_TIMER_LOCK(vlapic); lapic->dcr_timer = dcr; divisor = vlapic_timer_divisor(dcr); VLAPIC_CTR2(vlapic, "vlapic dcr_timer=%#x, divisor=%d", dcr, divisor); /* * Update the timer frequency and the timer period. * * XXX changes to the frequency divider will not take effect until * the timer is reloaded. */ FREQ2BT(VLAPIC_BUS_FREQ / divisor, &vlapic->timer_freq_bt); vlapic->timer_period_bt = vlapic->timer_freq_bt; bintime_mul(&vlapic->timer_period_bt, lapic->icr_timer); VLAPIC_TIMER_UNLOCK(vlapic); } static void vlapic_update_errors(struct vlapic *vlapic) { struct LAPIC *lapic = &vlapic->apic; - lapic->esr = 0; // XXX + lapic->esr = vlapic->esr_pending; + vlapic->esr_pending = 0; } static void vlapic_reset(struct vlapic *vlapic) { struct LAPIC *lapic; lapic = &vlapic->apic; bzero(lapic, sizeof(struct LAPIC)); lapic->version = VLAPIC_VERSION; lapic->version |= (VLAPIC_MAXLVT_ENTRIES << MAXLVTSHIFT); lapic->dfr = 0xffffffff; lapic->svr = APIC_SVR_VECTOR; - vlapic_mask_lvts(&lapic->lvt_timer, VLAPIC_MAXLVT_ENTRIES+1); + vlapic_mask_lvts(&lapic->lvt_timer, 6); + vlapic_mask_lvts(&lapic->lvt_cmci, 1); vlapic_set_dcr(vlapic, 0); if (vlapic->vcpuid == 0) vlapic->boot_state = BS_RUNNING; /* BSP */ else vlapic->boot_state = BS_INIT; /* AP */ } void vlapic_set_intr_ready(struct vlapic *vlapic, int vector, bool level) { struct LAPIC *lapic = &vlapic->apic; uint32_t *irrptr, *tmrptr, mask; int idx; if (vector < 0 || vector >= 256) panic("vlapic_set_intr_ready: invalid vector %d\n", vector); if (!(lapic->svr & APIC_SVR_ENABLE)) { VLAPIC_CTR1(vlapic, "vlapic is software disabled, ignoring " "interrupt %d", vector); return; } + if (vector < 16) { + vlapic_set_error(vlapic, APIC_ESR_RECEIVE_ILLEGAL_VECTOR); + return; + } + idx = (vector / 32) * 4; mask = 1 << (vector % 32); irrptr = &lapic->irr0; atomic_set_int(&irrptr[idx], mask); /* * Upon acceptance of an interrupt into the IRR the corresponding * TMR bit is cleared for edge-triggered interrupts and set for * level-triggered interrupts. */ tmrptr = &lapic->tmr0; if (level) atomic_set_int(&tmrptr[idx], mask); else atomic_clear_int(&tmrptr[idx], mask); VLAPIC_CTR_IRR(vlapic, "vlapic_set_intr_ready"); } static __inline uint32_t * vlapic_get_lvtptr(struct vlapic *vlapic, uint32_t offset) { struct LAPIC *lapic = &vlapic->apic; int i; - if (offset < APIC_OFFSET_TIMER_LVT || offset > APIC_OFFSET_ERROR_LVT) { + switch (offset) { + case APIC_OFFSET_CMCI_LVT: + return (&lapic->lvt_cmci); + case APIC_OFFSET_TIMER_LVT ... APIC_OFFSET_ERROR_LVT: + i = (offset - APIC_OFFSET_TIMER_LVT) >> 2; + return ((&lapic->lvt_timer) + i);; + default: panic("vlapic_get_lvt: invalid LVT\n"); } - i = (offset - APIC_OFFSET_TIMER_LVT) >> 2; - return ((&lapic->lvt_timer) + i);; } static __inline uint32_t vlapic_get_lvt(struct vlapic *vlapic, uint32_t offset) { return (*vlapic_get_lvtptr(vlapic, offset)); } static void vlapic_set_lvt(struct vlapic *vlapic, uint32_t offset, uint32_t val) { - uint32_t *lvtptr; + uint32_t *lvtptr, mask; struct LAPIC *lapic; lapic = &vlapic->apic; lvtptr = vlapic_get_lvtptr(vlapic, offset); if (offset == APIC_OFFSET_TIMER_LVT) VLAPIC_TIMER_LOCK(vlapic); if (!(lapic->svr & APIC_SVR_ENABLE)) val |= APIC_LVT_M; - *lvtptr = val; + mask = APIC_LVT_M | APIC_LVT_DS | APIC_LVT_VECTOR; + switch (offset) { + case APIC_OFFSET_TIMER_LVT: + mask |= APIC_LVTT_TM; + break; + case APIC_OFFSET_ERROR_LVT: + break; + case APIC_OFFSET_LINT0_LVT: + case APIC_OFFSET_LINT1_LVT: + mask |= APIC_LVT_TM | APIC_LVT_RIRR | APIC_LVT_IIPP; + /* FALLTHROUGH */ + default: + mask |= APIC_LVT_DM; + break; + } + *lvtptr = val & mask; if (offset == APIC_OFFSET_TIMER_LVT) VLAPIC_TIMER_UNLOCK(vlapic); } +static int +vlapic_fire_lvt(struct vlapic *vlapic, uint32_t lvt) +{ + uint32_t vec, mode; + + if (lvt & APIC_LVT_M) + return (0); + + vec = lvt & APIC_LVT_VECTOR; + mode = lvt & APIC_LVT_DM; + + switch (mode) { + case APIC_LVT_DM_FIXED: + if (vec < 16) { + vlapic_set_error(vlapic, APIC_ESR_SEND_ILLEGAL_VECTOR); + return (0); + } + vlapic_set_intr_ready(vlapic, vec, false); + vcpu_notify_event(vlapic->vm, vlapic->vcpuid); + break; + case APIC_LVT_DM_NMI: + vm_inject_nmi(vlapic->vm, vlapic->vcpuid); + break; + default: + // Other modes ignored + return (0); + } + return (1); +} + #if 1 static void dump_isrvec_stk(struct vlapic *vlapic) { int i; uint32_t *isrptr; isrptr = &vlapic->apic.isr0; for (i = 0; i < 8; i++) printf("ISR%d 0x%08x\n", i, isrptr[i * 4]); for (i = 0; i <= vlapic->isrvec_stk_top; i++) printf("isrvec_stk[%d] = %d\n", i, vlapic->isrvec_stk[i]); } #endif /* * Algorithm adopted from section "Interrupt, Task and Processor Priority" * in Intel Architecture Manual Vol 3a. */ static void vlapic_update_ppr(struct vlapic *vlapic) { int isrvec, tpr, ppr; /* * Note that the value on the stack at index 0 is always 0. * * This is a placeholder for the value of ISRV when none of the * bits is set in the ISRx registers. */ isrvec = vlapic->isrvec_stk[vlapic->isrvec_stk_top]; tpr = vlapic->apic.tpr; #if 1 { int i, lastprio, curprio, vector, idx; uint32_t *isrptr; if (vlapic->isrvec_stk_top == 0 && isrvec != 0) panic("isrvec_stk is corrupted: %d", isrvec); /* * Make sure that the priority of the nested interrupts is * always increasing. */ lastprio = -1; for (i = 1; i <= vlapic->isrvec_stk_top; i++) { curprio = PRIO(vlapic->isrvec_stk[i]); if (curprio <= lastprio) { dump_isrvec_stk(vlapic); panic("isrvec_stk does not satisfy invariant"); } lastprio = curprio; } /* * Make sure that each bit set in the ISRx registers has a * corresponding entry on the isrvec stack. */ i = 1; isrptr = &vlapic->apic.isr0; for (vector = 0; vector < 256; vector++) { idx = (vector / 32) * 4; if (isrptr[idx] & (1 << (vector % 32))) { if (i > vlapic->isrvec_stk_top || vlapic->isrvec_stk[i] != vector) { dump_isrvec_stk(vlapic); panic("ISR and isrvec_stk out of sync"); } i++; } } } #endif if (PRIO(tpr) >= PRIO(isrvec)) ppr = tpr; else ppr = isrvec & 0xf0; vlapic->apic.ppr = ppr; VLAPIC_CTR1(vlapic, "vlapic_update_ppr 0x%02x", ppr); } static void vlapic_process_eoi(struct vlapic *vlapic) { struct LAPIC *lapic = &vlapic->apic; uint32_t *isrptr, *tmrptr; int i, idx, bitpos, vector; isrptr = &lapic->isr0; tmrptr = &lapic->tmr0; /* * The x86 architecture reserves the the first 32 vectors for use * by the processor. */ for (i = 7; i > 0; i--) { idx = i * 4; bitpos = fls(isrptr[idx]); if (bitpos-- != 0) { if (vlapic->isrvec_stk_top <= 0) { panic("invalid vlapic isrvec_stk_top %d", vlapic->isrvec_stk_top); } isrptr[idx] &= ~(1 << bitpos); VLAPIC_CTR_ISR(vlapic, "vlapic_process_eoi"); vlapic->isrvec_stk_top--; vlapic_update_ppr(vlapic); if ((tmrptr[idx] & (1 << bitpos)) != 0) { vector = i * 32 + bitpos; vioapic_process_eoi(vlapic->vm, vlapic->vcpuid, vector); } return; } } } static __inline int vlapic_get_lvt_field(uint32_t lvt, uint32_t mask) { return (lvt & mask); } static __inline int vlapic_periodic_timer(struct vlapic *vlapic) { uint32_t lvt; lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_TIMER_LVT); return (vlapic_get_lvt_field(lvt, APIC_LVTT_TM_PERIODIC)); } +static VMM_STAT(VLAPIC_INTR_ERROR, "error interrupts generated by vlapic"); + +void +vlapic_set_error(struct vlapic *vlapic, uint32_t mask) +{ + uint32_t lvt; + + vlapic->esr_pending |= mask; + if (vlapic->esr_firing) + return; + vlapic->esr_firing = 1; + + // The error LVT always uses the fixed delivery mode. + lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_ERROR_LVT); + if (vlapic_fire_lvt(vlapic, lvt | APIC_LVT_DM_FIXED)) { + vmm_stat_incr(vlapic->vm, vlapic->vcpuid, VLAPIC_INTR_ERROR, 1); + } + vlapic->esr_firing = 0; +} + static VMM_STAT(VLAPIC_INTR_TIMER, "timer interrupts generated by vlapic"); static void vlapic_fire_timer(struct vlapic *vlapic) { - int vector; uint32_t lvt; KASSERT(VLAPIC_TIMER_LOCKED(vlapic), ("vlapic_fire_timer not locked")); + // The timer LVT always uses the fixed delivery mode. lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_TIMER_LVT); - - if (!vlapic_get_lvt_field(lvt, APIC_LVTT_M)) { + if (vlapic_fire_lvt(vlapic, lvt | APIC_LVT_DM_FIXED)) { vmm_stat_incr(vlapic->vm, vlapic->vcpuid, VLAPIC_INTR_TIMER, 1); - vector = vlapic_get_lvt_field(lvt, APIC_LVTT_VECTOR); - vlapic_set_intr_ready(vlapic, vector, false); - vcpu_notify_event(vlapic->vm, vlapic->vcpuid); } } +static VMM_STAT(VLAPIC_INTR_CMC, + "corrected machine check interrupts generated by vlapic"); + +void +vlapic_fire_cmci(struct vlapic *vlapic) +{ + uint32_t lvt; + + lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_CMCI_LVT); + if (vlapic_fire_lvt(vlapic, lvt)) { + vmm_stat_incr(vlapic->vm, vlapic->vcpuid, VLAPIC_INTR_CMC, 1); + } +} + +static VMM_STAT_ARRAY(LVTS_TRIGGERRED, VLAPIC_MAXLVT_ENTRIES, + "lvts triggered"); + +int +vlapic_trigger_lvt(struct vlapic *vlapic, int vector) +{ + uint32_t lvt; + + switch (vector) { + case APIC_LVT_LINT0: + lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_LINT0_LVT); + break; + case APIC_LVT_LINT1: + lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_LINT1_LVT); + break; + case APIC_LVT_TIMER: + lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_TIMER_LVT); + lvt |= APIC_LVT_DM_FIXED; + break; + case APIC_LVT_ERROR: + lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_ERROR_LVT); + lvt |= APIC_LVT_DM_FIXED; + break; + case APIC_LVT_PMC: + lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_PERF_LVT); + break; + case APIC_LVT_THERMAL: + lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_THERM_LVT); + break; + case APIC_LVT_CMCI: + lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_CMCI_LVT); + break; + default: + return (EINVAL); + } + if (vlapic_fire_lvt(vlapic, lvt)) { + vmm_stat_array_incr(vlapic->vm, vlapic->vcpuid, + LVTS_TRIGGERRED, vector, 1); + } + return (0); +} + static void vlapic_callout_handler(void *arg) { struct vlapic *vlapic; struct bintime bt, btnow; sbintime_t rem_sbt; vlapic = arg; VLAPIC_TIMER_LOCK(vlapic); if (callout_pending(&vlapic->callout)) /* callout was reset */ goto done; if (!callout_active(&vlapic->callout)) /* callout was stopped */ goto done; callout_deactivate(&vlapic->callout); KASSERT(vlapic->apic.icr_timer != 0, ("vlapic timer is disabled")); vlapic_fire_timer(vlapic); if (vlapic_periodic_timer(vlapic)) { binuptime(&btnow); KASSERT(bintime_cmp(&btnow, &vlapic->timer_fire_bt, >=), ("vlapic callout at %#lx.%#lx, expected at %#lx.#%lx", btnow.sec, btnow.frac, vlapic->timer_fire_bt.sec, vlapic->timer_fire_bt.frac)); /* * Compute the delta between when the timer was supposed to * fire and the present time. */ bt = btnow; bintime_sub(&bt, &vlapic->timer_fire_bt); rem_sbt = bttosbt(vlapic->timer_period_bt); if (bintime_cmp(&bt, &vlapic->timer_period_bt, <)) { /* * Adjust the time until the next countdown downward * to account for the lost time. */ rem_sbt -= bttosbt(bt); } else { /* * If the delta is greater than the timer period then * just reset our time base instead of trying to catch * up. */ vlapic->timer_fire_bt = btnow; VLAPIC_CTR2(vlapic, "vlapic timer lagging by %lu " "usecs, period is %lu usecs - resetting time base", bttosbt(bt) / SBT_1US, bttosbt(vlapic->timer_period_bt) / SBT_1US); } bintime_add(&vlapic->timer_fire_bt, &vlapic->timer_period_bt); callout_reset_sbt(&vlapic->callout, rem_sbt, 0, vlapic_callout_handler, vlapic, 0); } done: VLAPIC_TIMER_UNLOCK(vlapic); } static void vlapic_set_icr_timer(struct vlapic *vlapic, uint32_t icr_timer) { struct LAPIC *lapic; sbintime_t sbt; VLAPIC_TIMER_LOCK(vlapic); lapic = &vlapic->apic; lapic->icr_timer = icr_timer; vlapic->timer_period_bt = vlapic->timer_freq_bt; bintime_mul(&vlapic->timer_period_bt, icr_timer); if (icr_timer != 0) { binuptime(&vlapic->timer_fire_bt); bintime_add(&vlapic->timer_fire_bt, &vlapic->timer_period_bt); sbt = bttosbt(vlapic->timer_period_bt); callout_reset_sbt(&vlapic->callout, sbt, 0, vlapic_callout_handler, vlapic, 0); } else callout_stop(&vlapic->callout); VLAPIC_TIMER_UNLOCK(vlapic); } /* * This function populates 'dmask' with the set of vcpus that match the * addressing specified by the (dest, phys, lowprio) tuple. * * 'x2apic_dest' specifies whether 'dest' is interpreted as x2APIC (32-bit) * or xAPIC (8-bit) destination field. */ static void vlapic_calcdest(struct vm *vm, cpuset_t *dmask, uint32_t dest, bool phys, bool lowprio, bool x2apic_dest) { struct vlapic *vlapic; uint32_t dfr, ldr, ldest, cluster; uint32_t mda_flat_ldest, mda_cluster_ldest, mda_ldest, mda_cluster_id; cpuset_t amask; int vcpuid; if ((x2apic_dest && dest == 0xffffffff) || (!x2apic_dest && dest == 0xff)) { /* * Broadcast in both logical and physical modes. */ *dmask = vm_active_cpus(vm); return; } if (phys) { /* * Physical mode: destination is APIC ID. */ CPU_ZERO(dmask); vcpuid = vm_apicid2vcpuid(vm, dest); if (vcpuid < VM_MAXCPU) CPU_SET(vcpuid, dmask); } else { /* * In the "Flat Model" the MDA is interpreted as an 8-bit wide * bitmask. This model is only avilable in the xAPIC mode. */ mda_flat_ldest = dest & 0xff; /* * In the "Cluster Model" the MDA is used to identify a * specific cluster and a set of APICs in that cluster. */ if (x2apic_dest) { mda_cluster_id = dest >> 16; mda_cluster_ldest = dest & 0xffff; } else { mda_cluster_id = (dest >> 4) & 0xf; mda_cluster_ldest = dest & 0xf; } /* * Logical mode: match each APIC that has a bit set * in it's LDR that matches a bit in the ldest. */ CPU_ZERO(dmask); amask = vm_active_cpus(vm); while ((vcpuid = CPU_FFS(&amask)) != 0) { vcpuid--; CPU_CLR(vcpuid, &amask); vlapic = vm_lapic(vm, vcpuid); dfr = vlapic_get_dfr(vlapic); ldr = vlapic_get_ldr(vlapic); if ((dfr & APIC_DFR_MODEL_MASK) == APIC_DFR_MODEL_FLAT) { ldest = ldr >> 24; mda_ldest = mda_flat_ldest; } else if ((dfr & APIC_DFR_MODEL_MASK) == APIC_DFR_MODEL_CLUSTER) { if (x2apic(vlapic)) { cluster = ldr >> 16; ldest = ldr & 0xffff; } else { cluster = ldr >> 28; ldest = (ldr >> 24) & 0xf; } if (cluster != mda_cluster_id) continue; mda_ldest = mda_cluster_ldest; } else { /* * Guest has configured a bad logical * model for this vcpu - skip it. */ VLAPIC_CTR1(vlapic, "vlapic has bad logical " "model %x - cannot deliver interrupt", dfr); continue; } if ((mda_ldest & ldest) != 0) { CPU_SET(vcpuid, dmask); if (lowprio) break; } } } } static VMM_STAT_ARRAY(IPIS_SENT, VM_MAXCPU, "ipis sent to vcpu"); static int lapic_process_icr(struct vlapic *vlapic, uint64_t icrval, bool *retu) { int i; bool phys; cpuset_t dmask; uint32_t dest, vec, mode; struct vlapic *vlapic2; struct vm_exit *vmexit; if (x2apic(vlapic)) dest = icrval >> 32; else dest = icrval >> (32 + 24); vec = icrval & APIC_VECTOR_MASK; mode = icrval & APIC_DELMODE_MASK; + if (mode == APIC_DELMODE_FIXED && vec < 16) { + vlapic_set_error(vlapic, APIC_ESR_SEND_ILLEGAL_VECTOR); + return (0); + } + if (mode == APIC_DELMODE_FIXED || mode == APIC_DELMODE_NMI) { switch (icrval & APIC_DEST_MASK) { case APIC_DEST_DESTFLD: phys = ((icrval & APIC_DESTMODE_LOG) == 0); vlapic_calcdest(vlapic->vm, &dmask, dest, phys, false, x2apic(vlapic)); break; case APIC_DEST_SELF: CPU_SETOF(vlapic->vcpuid, &dmask); break; case APIC_DEST_ALLISELF: dmask = vm_active_cpus(vlapic->vm); break; case APIC_DEST_ALLESELF: dmask = vm_active_cpus(vlapic->vm); CPU_CLR(vlapic->vcpuid, &dmask); break; default: CPU_ZERO(&dmask); /* satisfy gcc */ break; } while ((i = CPU_FFS(&dmask)) != 0) { i--; CPU_CLR(i, &dmask); if (mode == APIC_DELMODE_FIXED) { lapic_intr_edge(vlapic->vm, i, vec); vmm_stat_array_incr(vlapic->vm, vlapic->vcpuid, IPIS_SENT, i, 1); } else vm_inject_nmi(vlapic->vm, i); } return (0); /* handled completely in the kernel */ } if (mode == APIC_DELMODE_INIT) { if ((icrval & APIC_LEVEL_MASK) == APIC_LEVEL_DEASSERT) return (0); if (vlapic->vcpuid == 0 && dest != 0 && dest < VM_MAXCPU) { vlapic2 = vm_lapic(vlapic->vm, dest); /* move from INIT to waiting-for-SIPI state */ if (vlapic2->boot_state == BS_INIT) { vlapic2->boot_state = BS_SIPI; } return (0); } } if (mode == APIC_DELMODE_STARTUP) { if (vlapic->vcpuid == 0 && dest != 0 && dest < VM_MAXCPU) { vlapic2 = vm_lapic(vlapic->vm, dest); /* * Ignore SIPIs in any state other than wait-for-SIPI */ if (vlapic2->boot_state != BS_SIPI) return (0); /* * XXX this assumes that the startup IPI always succeeds */ vlapic2->boot_state = BS_RUNNING; vm_activate_cpu(vlapic2->vm, dest); *retu = true; vmexit = vm_exitinfo(vlapic->vm, vlapic->vcpuid); vmexit->exitcode = VM_EXITCODE_SPINUP_AP; vmexit->u.spinup_ap.vcpu = dest; vmexit->u.spinup_ap.rip = vec << PAGE_SHIFT; return (0); } } /* * This will cause a return to userland. */ return (1); } int vlapic_pending_intr(struct vlapic *vlapic) { struct LAPIC *lapic = &vlapic->apic; int idx, i, bitpos, vector; uint32_t *irrptr, val; irrptr = &lapic->irr0; /* * The x86 architecture reserves the the first 32 vectors for use * by the processor. */ for (i = 7; i > 0; i--) { idx = i * 4; val = atomic_load_acq_int(&irrptr[idx]); bitpos = fls(val); if (bitpos != 0) { vector = i * 32 + (bitpos - 1); if (PRIO(vector) > PRIO(lapic->ppr)) { VLAPIC_CTR1(vlapic, "pending intr %d", vector); return (vector); } else break; } } return (-1); } void vlapic_intr_accepted(struct vlapic *vlapic, int vector) { struct LAPIC *lapic = &vlapic->apic; uint32_t *irrptr, *isrptr; int idx, stk_top; /* * clear the ready bit for vector being accepted in irr * and set the vector as in service in isr. */ idx = (vector / 32) * 4; irrptr = &lapic->irr0; atomic_clear_int(&irrptr[idx], 1 << (vector % 32)); VLAPIC_CTR_IRR(vlapic, "vlapic_intr_accepted"); isrptr = &lapic->isr0; isrptr[idx] |= 1 << (vector % 32); VLAPIC_CTR_ISR(vlapic, "vlapic_intr_accepted"); /* * Update the PPR */ vlapic->isrvec_stk_top++; stk_top = vlapic->isrvec_stk_top; if (stk_top >= ISRVEC_STK_SIZE) panic("isrvec_stk_top overflow %d", stk_top); vlapic->isrvec_stk[stk_top] = vector; vlapic_update_ppr(vlapic); } static void lapic_set_svr(struct vlapic *vlapic, uint32_t new) { struct LAPIC *lapic; uint32_t old, changed; lapic = &vlapic->apic; old = lapic->svr; changed = old ^ new; if ((changed & APIC_SVR_ENABLE) != 0) { if ((new & APIC_SVR_ENABLE) == 0) { /* * The apic is now disabled so stop the apic timer. */ VLAPIC_CTR0(vlapic, "vlapic is software-disabled"); VLAPIC_TIMER_LOCK(vlapic); callout_stop(&vlapic->callout); VLAPIC_TIMER_UNLOCK(vlapic); } else { /* * The apic is now enabled so restart the apic timer * if it is configured in periodic mode. */ VLAPIC_CTR0(vlapic, "vlapic is software-enabled"); if (vlapic_periodic_timer(vlapic)) vlapic_set_icr_timer(vlapic, lapic->icr_timer); } } lapic->svr = new; } int vlapic_read(struct vlapic *vlapic, uint64_t offset, uint64_t *data, bool *retu) { struct LAPIC *lapic = &vlapic->apic; uint32_t *reg; int i; if (offset > sizeof(*lapic)) { *data = 0; goto done; } offset &= ~3; switch(offset) { case APIC_OFFSET_ID: *data = vlapic_get_id(vlapic); break; case APIC_OFFSET_VER: *data = lapic->version; break; case APIC_OFFSET_TPR: *data = lapic->tpr; break; case APIC_OFFSET_APR: *data = lapic->apr; break; case APIC_OFFSET_PPR: *data = lapic->ppr; break; case APIC_OFFSET_EOI: *data = lapic->eoi; break; case APIC_OFFSET_LDR: *data = vlapic_get_ldr(vlapic); break; case APIC_OFFSET_DFR: *data = vlapic_get_dfr(vlapic); break; case APIC_OFFSET_SVR: *data = lapic->svr; break; case APIC_OFFSET_ISR0 ... APIC_OFFSET_ISR7: i = (offset - APIC_OFFSET_ISR0) >> 2; reg = &lapic->isr0; *data = *(reg + i); break; case APIC_OFFSET_TMR0 ... APIC_OFFSET_TMR7: i = (offset - APIC_OFFSET_TMR0) >> 2; reg = &lapic->tmr0; *data = *(reg + i); break; case APIC_OFFSET_IRR0 ... APIC_OFFSET_IRR7: i = (offset - APIC_OFFSET_IRR0) >> 2; reg = &lapic->irr0; *data = atomic_load_acq_int(reg + i); break; case APIC_OFFSET_ESR: *data = lapic->esr; break; case APIC_OFFSET_ICR_LOW: *data = lapic->icr_lo; break; case APIC_OFFSET_ICR_HI: *data = lapic->icr_hi; break; + case APIC_OFFSET_CMCI_LVT: case APIC_OFFSET_TIMER_LVT ... APIC_OFFSET_ERROR_LVT: *data = vlapic_get_lvt(vlapic, offset); break; case APIC_OFFSET_ICR: *data = lapic->icr_timer; break; case APIC_OFFSET_CCR: *data = vlapic_get_ccr(vlapic); break; case APIC_OFFSET_DCR: *data = lapic->dcr_timer; break; case APIC_OFFSET_RRR: default: *data = 0; break; } done: VLAPIC_CTR2(vlapic, "vlapic read offset %#x, data %#lx", offset, *data); return 0; } int vlapic_write(struct vlapic *vlapic, uint64_t offset, uint64_t data, bool *retu) { struct LAPIC *lapic = &vlapic->apic; int retval; VLAPIC_CTR2(vlapic, "vlapic write offset %#x, data %#lx", offset, data); if (offset > sizeof(*lapic)) { return 0; } retval = 0; offset &= ~3; switch(offset) { case APIC_OFFSET_ID: break; case APIC_OFFSET_TPR: lapic->tpr = data & 0xff; vlapic_update_ppr(vlapic); break; case APIC_OFFSET_EOI: vlapic_process_eoi(vlapic); break; case APIC_OFFSET_LDR: vlapic_set_ldr(vlapic, data); break; case APIC_OFFSET_DFR: vlapic_set_dfr(vlapic, data); break; case APIC_OFFSET_SVR: lapic_set_svr(vlapic, data); break; case APIC_OFFSET_ICR_LOW: if (!x2apic(vlapic)) { data &= 0xffffffff; data |= (uint64_t)lapic->icr_hi << 32; } retval = lapic_process_icr(vlapic, data, retu); break; case APIC_OFFSET_ICR_HI: if (!x2apic(vlapic)) { retval = 0; lapic->icr_hi = data; } break; + case APIC_OFFSET_CMCI_LVT: case APIC_OFFSET_TIMER_LVT ... APIC_OFFSET_ERROR_LVT: vlapic_set_lvt(vlapic, offset, data); break; case APIC_OFFSET_ICR: vlapic_set_icr_timer(vlapic, data); break; case APIC_OFFSET_DCR: vlapic_set_dcr(vlapic, data); break; case APIC_OFFSET_ESR: vlapic_update_errors(vlapic); break; case APIC_OFFSET_VER: case APIC_OFFSET_APR: case APIC_OFFSET_PPR: case APIC_OFFSET_RRR: case APIC_OFFSET_ISR0 ... APIC_OFFSET_ISR7: case APIC_OFFSET_TMR0 ... APIC_OFFSET_TMR7: case APIC_OFFSET_IRR0 ... APIC_OFFSET_IRR7: case APIC_OFFSET_CCR: default: // Read only. break; } return (retval); } struct vlapic * vlapic_init(struct vm *vm, int vcpuid) { struct vlapic *vlapic; vlapic = malloc(sizeof(struct vlapic), M_VLAPIC, M_WAITOK | M_ZERO); vlapic->vm = vm; vlapic->vcpuid = vcpuid; /* * If the vlapic is configured in x2apic mode then it will be * accessed in the critical section via the MSR emulation code. * * Therefore the timer mutex must be a spinlock because blockable * mutexes cannot be acquired in a critical section. */ mtx_init(&vlapic->timer_mtx, "vlapic timer mtx", NULL, MTX_SPIN); callout_init(&vlapic->callout, 1); vlapic->msr_apicbase = DEFAULT_APIC_BASE | APICBASE_ENABLED; if (vcpuid == 0) vlapic->msr_apicbase |= APICBASE_BSP; vlapic_reset(vlapic); return (vlapic); } void vlapic_cleanup(struct vlapic *vlapic) { callout_drain(&vlapic->callout); free(vlapic, M_VLAPIC); } uint64_t vlapic_get_apicbase(struct vlapic *vlapic) { return (vlapic->msr_apicbase); } void vlapic_set_apicbase(struct vlapic *vlapic, uint64_t val) { int err; enum x2apic_state state; err = vm_get_x2apic_state(vlapic->vm, vlapic->vcpuid, &state); if (err) panic("vlapic_set_apicbase: err %d fetching x2apic state", err); if (state == X2APIC_DISABLED) val &= ~APICBASE_X2APIC; vlapic->msr_apicbase = val; } void vlapic_set_x2apic_state(struct vm *vm, int vcpuid, enum x2apic_state state) { struct vlapic *vlapic; vlapic = vm_lapic(vm, vcpuid); if (state == X2APIC_DISABLED) vlapic->msr_apicbase &= ~APICBASE_X2APIC; } void vlapic_deliver_intr(struct vm *vm, bool level, uint32_t dest, bool phys, int delmode, int vec) { bool lowprio; int vcpuid; cpuset_t dmask; if (delmode != APIC_DELMODE_FIXED && delmode != APIC_DELMODE_LOWPRIO) { VM_CTR1(vm, "vlapic intr invalid delmode %#x", delmode); return; } lowprio = (delmode == APIC_DELMODE_LOWPRIO); /* * We don't provide any virtual interrupt redirection hardware so * all interrupts originating from the ioapic or MSI specify the * 'dest' in the legacy xAPIC format. */ vlapic_calcdest(vm, &dmask, dest, phys, lowprio, false); while ((vcpuid = CPU_FFS(&dmask)) != 0) { vcpuid--; CPU_CLR(vcpuid, &dmask); lapic_set_intr(vm, vcpuid, vec, level); } } bool vlapic_enabled(struct vlapic *vlapic) { struct LAPIC *lapic = &vlapic->apic; if ((vlapic->msr_apicbase & APICBASE_ENABLED) != 0 && (lapic->svr & APIC_SVR_ENABLE) != 0) return (true); else return (false); } diff --git a/sys/amd64/vmm/io/vlapic.h b/sys/amd64/vmm/io/vlapic.h index f66994074c35..98f377eeec05 100644 --- a/sys/amd64/vmm/io/vlapic.h +++ b/sys/amd64/vmm/io/vlapic.h @@ -1,108 +1,112 @@ /*- * Copyright (c) 2011 NetApp, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _VLAPIC_H_ #define _VLAPIC_H_ struct vm; /* * Map of APIC Registers: Offset Description Access */ #define APIC_OFFSET_ID 0x20 // Local APIC ID R/W #define APIC_OFFSET_VER 0x30 // Local APIC Version R #define APIC_OFFSET_TPR 0x80 // Task Priority Register R/W #define APIC_OFFSET_APR 0x90 // Arbitration Priority Register R #define APIC_OFFSET_PPR 0xA0 // Processor Priority Register R #define APIC_OFFSET_EOI 0xB0 // EOI Register W #define APIC_OFFSET_RRR 0xC0 // Remote read R #define APIC_OFFSET_LDR 0xD0 // Logical Destination R/W #define APIC_OFFSET_DFR 0xE0 // Destination Format Register 0..27 R; 28..31 R/W #define APIC_OFFSET_SVR 0xF0 // Spurious Interrupt Vector Reg. 0..3 R; 4..9 R/W #define APIC_OFFSET_ISR0 0x100 // ISR 000-031 R #define APIC_OFFSET_ISR1 0x110 // ISR 032-063 R #define APIC_OFFSET_ISR2 0x120 // ISR 064-095 R #define APIC_OFFSET_ISR3 0x130 // ISR 095-128 R #define APIC_OFFSET_ISR4 0x140 // ISR 128-159 R #define APIC_OFFSET_ISR5 0x150 // ISR 160-191 R #define APIC_OFFSET_ISR6 0x160 // ISR 192-223 R #define APIC_OFFSET_ISR7 0x170 // ISR 224-255 R #define APIC_OFFSET_TMR0 0x180 // TMR 000-031 R #define APIC_OFFSET_TMR1 0x190 // TMR 032-063 R #define APIC_OFFSET_TMR2 0x1A0 // TMR 064-095 R #define APIC_OFFSET_TMR3 0x1B0 // TMR 095-128 R #define APIC_OFFSET_TMR4 0x1C0 // TMR 128-159 R #define APIC_OFFSET_TMR5 0x1D0 // TMR 160-191 R #define APIC_OFFSET_TMR6 0x1E0 // TMR 192-223 R #define APIC_OFFSET_TMR7 0x1F0 // TMR 224-255 R #define APIC_OFFSET_IRR0 0x200 // IRR 000-031 R #define APIC_OFFSET_IRR1 0x210 // IRR 032-063 R #define APIC_OFFSET_IRR2 0x220 // IRR 064-095 R #define APIC_OFFSET_IRR3 0x230 // IRR 095-128 R #define APIC_OFFSET_IRR4 0x240 // IRR 128-159 R #define APIC_OFFSET_IRR5 0x250 // IRR 160-191 R #define APIC_OFFSET_IRR6 0x260 // IRR 192-223 R #define APIC_OFFSET_IRR7 0x270 // IRR 224-255 R #define APIC_OFFSET_ESR 0x280 // Error Status Register R +#define APIC_OFFSET_CMCI_LVT 0x2F0 // Local Vector Table (CMCI) R/W #define APIC_OFFSET_ICR_LOW 0x300 // Interrupt Command Reg. (0-31) R/W #define APIC_OFFSET_ICR_HI 0x310 // Interrupt Command Reg. (32-63) R/W #define APIC_OFFSET_TIMER_LVT 0x320 // Local Vector Table (Timer) R/W #define APIC_OFFSET_THERM_LVT 0x330 // Local Vector Table (Thermal) R/W (PIV+) #define APIC_OFFSET_PERF_LVT 0x340 // Local Vector Table (Performance) R/W (P6+) #define APIC_OFFSET_LINT0_LVT 0x350 // Local Vector Table (LINT0) R/W #define APIC_OFFSET_LINT1_LVT 0x360 // Local Vector Table (LINT1) R/W #define APIC_OFFSET_ERROR_LVT 0x370 // Local Vector Table (ERROR) R/W #define APIC_OFFSET_ICR 0x380 // Initial Count Reg. for Timer R/W #define APIC_OFFSET_CCR 0x390 // Current Count of Timer R #define APIC_OFFSET_DCR 0x3E0 // Timer Divide Configuration Reg. R/W /* * 16 priority levels with at most one vector injected per level. */ #define ISRVEC_STK_SIZE (16 + 1) enum x2apic_state; struct vlapic *vlapic_init(struct vm *vm, int vcpuid); void vlapic_cleanup(struct vlapic *vlapic); int vlapic_write(struct vlapic *vlapic, uint64_t offset, uint64_t data, bool *retu); int vlapic_read(struct vlapic *vlapic, uint64_t offset, uint64_t *data, bool *retu); int vlapic_pending_intr(struct vlapic *vlapic); void vlapic_intr_accepted(struct vlapic *vlapic, int vector); void vlapic_set_intr_ready(struct vlapic *vlapic, int vector, bool level); +void vlapic_set_error(struct vlapic *vlapic, uint32_t mask); +void vlapic_fire_cmci(struct vlapic *vlapic); +int vlapic_trigger_lvt(struct vlapic *vlapic, int vector); uint64_t vlapic_get_apicbase(struct vlapic *vlapic); void vlapic_set_apicbase(struct vlapic *vlapic, uint64_t val); void vlapic_set_x2apic_state(struct vm *vm, int vcpuid, enum x2apic_state s); bool vlapic_enabled(struct vlapic *vlapic); void vlapic_deliver_intr(struct vm *vm, bool level, uint32_t dest, bool phys, int delmode, int vec); #endif /* _VLAPIC_H_ */ diff --git a/sys/amd64/vmm/vmm_dev.c b/sys/amd64/vmm/vmm_dev.c index 829d8d5c18c3..4b5f691021f5 100644 --- a/sys/amd64/vmm/vmm_dev.c +++ b/sys/amd64/vmm/vmm_dev.c @@ -1,565 +1,570 @@ /*- * Copyright (c) 2011 NetApp, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "vmm_lapic.h" #include "vmm_stat.h" #include "vmm_mem.h" #include "io/ppt.h" #include "io/vioapic.h" #include "io/vhpet.h" struct vmmdev_softc { struct vm *vm; /* vm instance cookie */ struct cdev *cdev; SLIST_ENTRY(vmmdev_softc) link; int flags; }; #define VSC_LINKED 0x01 static SLIST_HEAD(, vmmdev_softc) head; static struct mtx vmmdev_mtx; static MALLOC_DEFINE(M_VMMDEV, "vmmdev", "vmmdev"); SYSCTL_DECL(_hw_vmm); static struct vmmdev_softc * vmmdev_lookup(const char *name) { struct vmmdev_softc *sc; #ifdef notyet /* XXX kernel is not compiled with invariants */ mtx_assert(&vmmdev_mtx, MA_OWNED); #endif SLIST_FOREACH(sc, &head, link) { if (strcmp(name, vm_name(sc->vm)) == 0) break; } return (sc); } static struct vmmdev_softc * vmmdev_lookup2(struct cdev *cdev) { return (cdev->si_drv1); } static int vmmdev_rw(struct cdev *cdev, struct uio *uio, int flags) { int error, off, c, prot; vm_paddr_t gpa; void *hpa, *cookie; struct vmmdev_softc *sc; static char zerobuf[PAGE_SIZE]; error = 0; sc = vmmdev_lookup2(cdev); if (sc == NULL) error = ENXIO; prot = (uio->uio_rw == UIO_WRITE ? VM_PROT_WRITE : VM_PROT_READ); while (uio->uio_resid > 0 && error == 0) { gpa = uio->uio_offset; off = gpa & PAGE_MASK; c = min(uio->uio_resid, PAGE_SIZE - off); /* * The VM has a hole in its physical memory map. If we want to * use 'dd' to inspect memory beyond the hole we need to * provide bogus data for memory that lies in the hole. * * Since this device does not support lseek(2), dd(1) will * read(2) blocks of data to simulate the lseek(2). */ hpa = vm_gpa_hold(sc->vm, gpa, c, prot, &cookie); if (hpa == NULL) { if (uio->uio_rw == UIO_READ) error = uiomove(zerobuf, c, uio); else error = EFAULT; } else { error = uiomove(hpa, c, uio); vm_gpa_release(cookie); } } return (error); } static int vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag, struct thread *td) { int error, vcpu, state_changed; struct vmmdev_softc *sc; struct vm_memory_segment *seg; struct vm_register *vmreg; struct vm_seg_desc *vmsegdesc; struct vm_run *vmrun; struct vm_event *vmevent; struct vm_lapic_irq *vmirq; struct vm_lapic_msi *vmmsi; struct vm_ioapic_irq *ioapic_irq; struct vm_capability *vmcap; struct vm_pptdev *pptdev; struct vm_pptdev_mmio *pptmmio; struct vm_pptdev_msi *pptmsi; struct vm_pptdev_msix *pptmsix; struct vm_nmi *vmnmi; struct vm_stats *vmstats; struct vm_stat_desc *statdesc; struct vm_x2apic *x2apic; struct vm_gpa_pte *gpapte; sc = vmmdev_lookup2(cdev); if (sc == NULL) return (ENXIO); vcpu = -1; state_changed = 0; /* * Some VMM ioctls can operate only on vcpus that are not running. */ switch (cmd) { case VM_RUN: case VM_GET_REGISTER: case VM_SET_REGISTER: case VM_GET_SEGMENT_DESCRIPTOR: case VM_SET_SEGMENT_DESCRIPTOR: case VM_INJECT_EVENT: case VM_GET_CAPABILITY: case VM_SET_CAPABILITY: case VM_PPTDEV_MSI: case VM_PPTDEV_MSIX: case VM_SET_X2APIC_STATE: /* * XXX fragile, handle with care * Assumes that the first field of the ioctl data is the vcpu. */ vcpu = *(int *)data; if (vcpu < 0 || vcpu >= VM_MAXCPU) { error = EINVAL; goto done; } error = vcpu_set_state(sc->vm, vcpu, VCPU_FROZEN, true); if (error) goto done; state_changed = 1; break; case VM_MAP_PPTDEV_MMIO: case VM_BIND_PPTDEV: case VM_UNBIND_PPTDEV: case VM_MAP_MEMORY: /* * ioctls that operate on the entire virtual machine must * prevent all vcpus from running. */ error = 0; for (vcpu = 0; vcpu < VM_MAXCPU; vcpu++) { error = vcpu_set_state(sc->vm, vcpu, VCPU_FROZEN, true); if (error) break; } if (error) { while (--vcpu >= 0) vcpu_set_state(sc->vm, vcpu, VCPU_IDLE, false); goto done; } state_changed = 2; break; default: break; } switch(cmd) { case VM_RUN: vmrun = (struct vm_run *)data; error = vm_run(sc->vm, vmrun); break; case VM_STAT_DESC: { statdesc = (struct vm_stat_desc *)data; error = vmm_stat_desc_copy(statdesc->index, statdesc->desc, sizeof(statdesc->desc)); break; } case VM_STATS: { CTASSERT(MAX_VM_STATS >= MAX_VMM_STAT_ELEMS); vmstats = (struct vm_stats *)data; getmicrotime(&vmstats->tv); error = vmm_stat_copy(sc->vm, vmstats->cpuid, &vmstats->num_entries, vmstats->statbuf); break; } case VM_PPTDEV_MSI: pptmsi = (struct vm_pptdev_msi *)data; error = ppt_setup_msi(sc->vm, pptmsi->vcpu, pptmsi->bus, pptmsi->slot, pptmsi->func, pptmsi->addr, pptmsi->msg, pptmsi->numvec); break; case VM_PPTDEV_MSIX: pptmsix = (struct vm_pptdev_msix *)data; error = ppt_setup_msix(sc->vm, pptmsix->vcpu, pptmsix->bus, pptmsix->slot, pptmsix->func, pptmsix->idx, pptmsix->addr, pptmsix->msg, pptmsix->vector_control); break; case VM_MAP_PPTDEV_MMIO: pptmmio = (struct vm_pptdev_mmio *)data; error = ppt_map_mmio(sc->vm, pptmmio->bus, pptmmio->slot, pptmmio->func, pptmmio->gpa, pptmmio->len, pptmmio->hpa); break; case VM_BIND_PPTDEV: pptdev = (struct vm_pptdev *)data; error = vm_assign_pptdev(sc->vm, pptdev->bus, pptdev->slot, pptdev->func); break; case VM_UNBIND_PPTDEV: pptdev = (struct vm_pptdev *)data; error = vm_unassign_pptdev(sc->vm, pptdev->bus, pptdev->slot, pptdev->func); break; case VM_INJECT_EVENT: vmevent = (struct vm_event *)data; error = vm_inject_event(sc->vm, vmevent->cpuid, vmevent->type, vmevent->vector, vmevent->error_code, vmevent->error_code_valid); break; case VM_INJECT_NMI: vmnmi = (struct vm_nmi *)data; error = vm_inject_nmi(sc->vm, vmnmi->cpuid); break; case VM_LAPIC_IRQ: vmirq = (struct vm_lapic_irq *)data; error = lapic_intr_edge(sc->vm, vmirq->cpuid, vmirq->vector); break; + case VM_LAPIC_LOCAL_IRQ: + vmirq = (struct vm_lapic_irq *)data; + error = lapic_set_local_intr(sc->vm, vmirq->cpuid, + vmirq->vector); + break; case VM_LAPIC_MSI: vmmsi = (struct vm_lapic_msi *)data; error = lapic_intr_msi(sc->vm, vmmsi->addr, vmmsi->msg); break; case VM_IOAPIC_ASSERT_IRQ: ioapic_irq = (struct vm_ioapic_irq *)data; error = vioapic_assert_irq(sc->vm, ioapic_irq->irq); break; case VM_IOAPIC_DEASSERT_IRQ: ioapic_irq = (struct vm_ioapic_irq *)data; error = vioapic_deassert_irq(sc->vm, ioapic_irq->irq); break; case VM_IOAPIC_PULSE_IRQ: ioapic_irq = (struct vm_ioapic_irq *)data; error = vioapic_pulse_irq(sc->vm, ioapic_irq->irq); break; case VM_MAP_MEMORY: seg = (struct vm_memory_segment *)data; error = vm_malloc(sc->vm, seg->gpa, seg->len); break; case VM_GET_MEMORY_SEG: seg = (struct vm_memory_segment *)data; seg->len = 0; (void)vm_gpabase2memseg(sc->vm, seg->gpa, seg); error = 0; break; case VM_GET_REGISTER: vmreg = (struct vm_register *)data; error = vm_get_register(sc->vm, vmreg->cpuid, vmreg->regnum, &vmreg->regval); break; case VM_SET_REGISTER: vmreg = (struct vm_register *)data; error = vm_set_register(sc->vm, vmreg->cpuid, vmreg->regnum, vmreg->regval); break; case VM_SET_SEGMENT_DESCRIPTOR: vmsegdesc = (struct vm_seg_desc *)data; error = vm_set_seg_desc(sc->vm, vmsegdesc->cpuid, vmsegdesc->regnum, &vmsegdesc->desc); break; case VM_GET_SEGMENT_DESCRIPTOR: vmsegdesc = (struct vm_seg_desc *)data; error = vm_get_seg_desc(sc->vm, vmsegdesc->cpuid, vmsegdesc->regnum, &vmsegdesc->desc); break; case VM_GET_CAPABILITY: vmcap = (struct vm_capability *)data; error = vm_get_capability(sc->vm, vmcap->cpuid, vmcap->captype, &vmcap->capval); break; case VM_SET_CAPABILITY: vmcap = (struct vm_capability *)data; error = vm_set_capability(sc->vm, vmcap->cpuid, vmcap->captype, vmcap->capval); break; case VM_SET_X2APIC_STATE: x2apic = (struct vm_x2apic *)data; error = vm_set_x2apic_state(sc->vm, x2apic->cpuid, x2apic->state); break; case VM_GET_X2APIC_STATE: x2apic = (struct vm_x2apic *)data; error = vm_get_x2apic_state(sc->vm, x2apic->cpuid, &x2apic->state); break; case VM_GET_GPA_PMAP: gpapte = (struct vm_gpa_pte *)data; pmap_get_mapping(vmspace_pmap(vm_get_vmspace(sc->vm)), gpapte->gpa, gpapte->pte, &gpapte->ptenum); error = 0; break; case VM_GET_HPET_CAPABILITIES: error = vhpet_getcap((struct vm_hpet_cap *)data); break; default: error = ENOTTY; break; } if (state_changed == 1) { vcpu_set_state(sc->vm, vcpu, VCPU_IDLE, false); } else if (state_changed == 2) { for (vcpu = 0; vcpu < VM_MAXCPU; vcpu++) vcpu_set_state(sc->vm, vcpu, VCPU_IDLE, false); } done: /* Make sure that no handler returns a bogus value like ERESTART */ KASSERT(error >= 0, ("vmmdev_ioctl: invalid error return %d", error)); return (error); } static int vmmdev_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t size, struct vm_object **object, int nprot) { int error; struct vmmdev_softc *sc; sc = vmmdev_lookup2(cdev); if (sc != NULL && (nprot & PROT_EXEC) == 0) error = vm_get_memobj(sc->vm, *offset, size, offset, object); else error = EINVAL; return (error); } static void vmmdev_destroy(void *arg) { struct vmmdev_softc *sc = arg; if (sc->cdev != NULL) destroy_dev(sc->cdev); if (sc->vm != NULL) vm_destroy(sc->vm); if ((sc->flags & VSC_LINKED) != 0) { mtx_lock(&vmmdev_mtx); SLIST_REMOVE(&head, sc, vmmdev_softc, link); mtx_unlock(&vmmdev_mtx); } free(sc, M_VMMDEV); } static int sysctl_vmm_destroy(SYSCTL_HANDLER_ARGS) { int error; char buf[VM_MAX_NAMELEN]; struct vmmdev_softc *sc; struct cdev *cdev; strlcpy(buf, "beavis", sizeof(buf)); error = sysctl_handle_string(oidp, buf, sizeof(buf), req); if (error != 0 || req->newptr == NULL) return (error); mtx_lock(&vmmdev_mtx); sc = vmmdev_lookup(buf); if (sc == NULL || sc->cdev == NULL) { mtx_unlock(&vmmdev_mtx); return (EINVAL); } /* * The 'cdev' will be destroyed asynchronously when 'si_threadcount' * goes down to 0 so we should not do it again in the callback. */ cdev = sc->cdev; sc->cdev = NULL; mtx_unlock(&vmmdev_mtx); /* * Schedule the 'cdev' to be destroyed: * * - any new operations on this 'cdev' will return an error (ENXIO). * * - when the 'si_threadcount' dwindles down to zero the 'cdev' will * be destroyed and the callback will be invoked in a taskqueue * context. */ destroy_dev_sched_cb(cdev, vmmdev_destroy, sc); return (0); } SYSCTL_PROC(_hw_vmm, OID_AUTO, destroy, CTLTYPE_STRING | CTLFLAG_RW, NULL, 0, sysctl_vmm_destroy, "A", NULL); static struct cdevsw vmmdevsw = { .d_name = "vmmdev", .d_version = D_VERSION, .d_ioctl = vmmdev_ioctl, .d_mmap_single = vmmdev_mmap_single, .d_read = vmmdev_rw, .d_write = vmmdev_rw, }; static int sysctl_vmm_create(SYSCTL_HANDLER_ARGS) { int error; struct vm *vm; struct cdev *cdev; struct vmmdev_softc *sc, *sc2; char buf[VM_MAX_NAMELEN]; strlcpy(buf, "beavis", sizeof(buf)); error = sysctl_handle_string(oidp, buf, sizeof(buf), req); if (error != 0 || req->newptr == NULL) return (error); mtx_lock(&vmmdev_mtx); sc = vmmdev_lookup(buf); mtx_unlock(&vmmdev_mtx); if (sc != NULL) return (EEXIST); error = vm_create(buf, &vm); if (error != 0) return (error); sc = malloc(sizeof(struct vmmdev_softc), M_VMMDEV, M_WAITOK | M_ZERO); sc->vm = vm; /* * Lookup the name again just in case somebody sneaked in when we * dropped the lock. */ mtx_lock(&vmmdev_mtx); sc2 = vmmdev_lookup(buf); if (sc2 == NULL) { SLIST_INSERT_HEAD(&head, sc, link); sc->flags |= VSC_LINKED; } mtx_unlock(&vmmdev_mtx); if (sc2 != NULL) { vmmdev_destroy(sc); return (EEXIST); } error = make_dev_p(MAKEDEV_CHECKNAME, &cdev, &vmmdevsw, NULL, UID_ROOT, GID_WHEEL, 0600, "vmm/%s", buf); if (error != 0) { vmmdev_destroy(sc); return (error); } mtx_lock(&vmmdev_mtx); sc->cdev = cdev; sc->cdev->si_drv1 = sc; mtx_unlock(&vmmdev_mtx); return (0); } SYSCTL_PROC(_hw_vmm, OID_AUTO, create, CTLTYPE_STRING | CTLFLAG_RW, NULL, 0, sysctl_vmm_create, "A", NULL); void vmmdev_init(void) { mtx_init(&vmmdev_mtx, "vmm device mutex", NULL, MTX_DEF); } int vmmdev_cleanup(void) { int error; if (SLIST_EMPTY(&head)) error = 0; else error = EBUSY; return (error); } diff --git a/sys/amd64/vmm/vmm_lapic.c b/sys/amd64/vmm/vmm_lapic.c index 96234a186ed1..8d915cd156d4 100644 --- a/sys/amd64/vmm/vmm_lapic.c +++ b/sys/amd64/vmm/vmm_lapic.c @@ -1,238 +1,265 @@ /*- * Copyright (c) 2011 NetApp, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include "vmm_ipi.h" #include "vmm_ktr.h" #include "vmm_lapic.h" #include "vlapic.h" /* * Some MSI message definitions */ #define MSI_X86_ADDR_MASK 0xfff00000 #define MSI_X86_ADDR_BASE 0xfee00000 #define MSI_X86_ADDR_RH 0x00000008 /* Redirection Hint */ #define MSI_X86_ADDR_LOG 0x00000004 /* Destination Mode */ int lapic_pending_intr(struct vm *vm, int cpu) { struct vlapic *vlapic; vlapic = vm_lapic(vm, cpu); return (vlapic_pending_intr(vlapic)); } void lapic_intr_accepted(struct vm *vm, int cpu, int vector) { struct vlapic *vlapic; vlapic = vm_lapic(vm, cpu); vlapic_intr_accepted(vlapic, vector); } int lapic_set_intr(struct vm *vm, int cpu, int vector, bool level) { struct vlapic *vlapic; if (cpu < 0 || cpu >= VM_MAXCPU) return (EINVAL); if (vector < 32 || vector > 255) return (EINVAL); vlapic = vm_lapic(vm, cpu); vlapic_set_intr_ready(vlapic, vector, level); vcpu_notify_event(vm, cpu); return (0); } +int +lapic_set_local_intr(struct vm *vm, int cpu, int vector) +{ + struct vlapic *vlapic; + cpuset_t dmask; + int error; + + if (cpu < -1 || cpu >= VM_MAXCPU) + return (EINVAL); + + if (cpu == -1) + dmask = vm_active_cpus(vm); + else + CPU_SETOF(cpu, &dmask); + error = 0; + while ((cpu = CPU_FFS(&dmask)) != 0) { + cpu--; + CPU_CLR(cpu, &dmask); + vlapic = vm_lapic(vm, cpu); + error = vlapic_trigger_lvt(vlapic, vector); + if (error) + break; + } + + return (error); +} + int lapic_intr_msi(struct vm *vm, uint64_t addr, uint64_t msg) { int delmode, vec; uint32_t dest; bool phys; VM_CTR2(vm, "lapic MSI addr: %#lx msg: %#lx", addr, msg); if ((addr & MSI_X86_ADDR_MASK) != MSI_X86_ADDR_BASE) { VM_CTR1(vm, "lapic MSI invalid addr %#lx", addr); return (-1); } /* * Extract the x86-specific fields from the MSI addr/msg * params according to the Intel Arch spec, Vol3 Ch 10. * * The PCI specification does not support level triggered * MSI/MSI-X so ignore trigger level in 'msg'. * * The 'dest' is interpreted as a logical APIC ID if both * the Redirection Hint and Destination Mode are '1' and * physical otherwise. */ dest = (addr >> 12) & 0xff; phys = ((addr & (MSI_X86_ADDR_RH | MSI_X86_ADDR_LOG)) != (MSI_X86_ADDR_RH | MSI_X86_ADDR_LOG)); delmode = msg & APIC_DELMODE_MASK; vec = msg & 0xff; VM_CTR3(vm, "lapic MSI %s dest %#x, vec %d", phys ? "physical" : "logical", dest, vec); vlapic_deliver_intr(vm, LAPIC_TRIG_EDGE, dest, phys, delmode, vec); return (0); } static boolean_t x2apic_msr(u_int msr) { if (msr >= 0x800 && msr <= 0xBFF) return (TRUE); else return (FALSE); } static u_int x2apic_msr_to_regoff(u_int msr) { return ((msr - 0x800) << 4); } boolean_t lapic_msr(u_int msr) { if (x2apic_msr(msr) || (msr == MSR_APICBASE)) return (TRUE); else return (FALSE); } int lapic_rdmsr(struct vm *vm, int cpu, u_int msr, uint64_t *rval, bool *retu) { int error; u_int offset; struct vlapic *vlapic; vlapic = vm_lapic(vm, cpu); if (msr == MSR_APICBASE) { *rval = vlapic_get_apicbase(vlapic); error = 0; } else { offset = x2apic_msr_to_regoff(msr); error = vlapic_read(vlapic, offset, rval, retu); } return (error); } int lapic_wrmsr(struct vm *vm, int cpu, u_int msr, uint64_t val, bool *retu) { int error; u_int offset; struct vlapic *vlapic; vlapic = vm_lapic(vm, cpu); if (msr == MSR_APICBASE) { vlapic_set_apicbase(vlapic, val); error = 0; } else { offset = x2apic_msr_to_regoff(msr); error = vlapic_write(vlapic, offset, val, retu); } return (error); } int lapic_mmio_write(void *vm, int cpu, uint64_t gpa, uint64_t wval, int size, void *arg) { int error; uint64_t off; struct vlapic *vlapic; off = gpa - DEFAULT_APIC_BASE; /* * Memory mapped local apic accesses must be 4 bytes wide and * aligned on a 16-byte boundary. */ if (size != 4 || off & 0xf) return (EINVAL); vlapic = vm_lapic(vm, cpu); error = vlapic_write(vlapic, off, wval, arg); return (error); } int lapic_mmio_read(void *vm, int cpu, uint64_t gpa, uint64_t *rval, int size, void *arg) { int error; uint64_t off; struct vlapic *vlapic; off = gpa - DEFAULT_APIC_BASE; /* * Memory mapped local apic accesses must be 4 bytes wide and * aligned on a 16-byte boundary. */ if (size != 4 || off & 0xf) return (EINVAL); vlapic = vm_lapic(vm, cpu); error = vlapic_read(vlapic, off, rval, arg); return (error); } diff --git a/sys/amd64/vmm/vmm_lapic.h b/sys/amd64/vmm/vmm_lapic.h index d7e75a960c03..c5c95aa69ff0 100644 --- a/sys/amd64/vmm/vmm_lapic.h +++ b/sys/amd64/vmm/vmm_lapic.h @@ -1,88 +1,95 @@ /*- * Copyright (c) 2011 NetApp, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _VMM_LAPIC_H_ #define _VMM_LAPIC_H_ struct vm; boolean_t lapic_msr(u_int num); int lapic_rdmsr(struct vm *vm, int cpu, u_int msr, uint64_t *rval, bool *retu); int lapic_wrmsr(struct vm *vm, int cpu, u_int msr, uint64_t wval, bool *retu); int lapic_mmio_read(void *vm, int cpu, uint64_t gpa, uint64_t *rval, int size, void *arg); int lapic_mmio_write(void *vm, int cpu, uint64_t gpa, uint64_t wval, int size, void *arg); /* * Returns a vector between 32 and 255 if an interrupt is pending in the * IRR that can be delivered based on the current state of ISR and TPR. * * Note that the vector does not automatically transition to the ISR as a * result of calling this function. * * Returns -1 if there is no eligible vector that can be delivered to the * guest at this time. */ int lapic_pending_intr(struct vm *vm, int cpu); /* * Transition 'vector' from IRR to ISR. This function is called with the * vector returned by 'lapic_pending_intr()' when the guest is able to * accept this interrupt (i.e. RFLAGS.IF = 1 and no conditions exist that * block interrupt delivery). */ void lapic_intr_accepted(struct vm *vm, int cpu, int vector); /* * Signals to the LAPIC that an interrupt at 'vector' needs to be generated * to the 'cpu', the state is recorded in IRR. */ int lapic_set_intr(struct vm *vm, int cpu, int vector, bool trig); #define LAPIC_TRIG_LEVEL true #define LAPIC_TRIG_EDGE false static __inline int lapic_intr_level(struct vm *vm, int cpu, int vector) { return (lapic_set_intr(vm, cpu, vector, LAPIC_TRIG_LEVEL)); } static __inline int lapic_intr_edge(struct vm *vm, int cpu, int vector) { return (lapic_set_intr(vm, cpu, vector, LAPIC_TRIG_EDGE)); } +/* + * Triggers the LAPIC local interrupt (LVT) 'vector' on 'cpu'. 'cpu' can + * be set to -1 to trigger the interrupt on all CPUs. + */ +int lapic_set_local_intr(struct vm *vm, int cpu, int vector); + int lapic_intr_msi(struct vm *vm, uint64_t addr, uint64_t msg); + #endif diff --git a/usr.sbin/bhyve/acpi.c b/usr.sbin/bhyve/acpi.c index 32dd84243035..71c7bbdf0191 100644 --- a/usr.sbin/bhyve/acpi.c +++ b/usr.sbin/bhyve/acpi.c @@ -1,936 +1,946 @@ /*- * Copyright (c) 2012 NetApp, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ /* * bhyve ACPI table generator. * * Create the minimal set of ACPI tables required to boot FreeBSD (and * hopefully other o/s's) by writing out ASL template files for each of * the tables and the compiling them to AML with the Intel iasl compiler. * The AML files are then read into guest memory. * * The tables are placed in the guest's ROM area just below 1MB physical, * above the MPTable. * * Layout * ------ * RSDP -> 0xf0400 (36 bytes fixed) * RSDT -> 0xf0440 (36 bytes + 4*N table addrs, 2 used) * XSDT -> 0xf0480 (36 bytes + 8*N table addrs, 2 used) * MADT -> 0xf0500 (depends on #CPUs) * FADT -> 0xf0600 (268 bytes) * HPET -> 0xf0740 (56 bytes) * FACS -> 0xf0780 (64 bytes) * DSDT -> 0xf0800 (variable - can go up to 0x100000) */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include "bhyverun.h" #include "acpi.h" /* * Define the base address of the ACPI tables, and the offsets to * the individual tables */ #define BHYVE_ACPI_BASE 0xf0400 #define RSDT_OFFSET 0x040 #define XSDT_OFFSET 0x080 #define MADT_OFFSET 0x100 #define FADT_OFFSET 0x200 #define HPET_OFFSET 0x340 #define FACS_OFFSET 0x380 #define DSDT_OFFSET 0x400 #define BHYVE_ASL_TEMPLATE "bhyve.XXXXXXX" #define BHYVE_ASL_SUFFIX ".aml" #define BHYVE_ASL_COMPILER "/usr/sbin/iasl" #define BHYVE_PM_TIMER_ADDR 0x408 static int basl_keep_temps; static int basl_verbose_iasl; static int basl_ncpu; static uint32_t basl_acpi_base = BHYVE_ACPI_BASE; static uint32_t hpet_capabilities; /* * Contains the full pathname of the template to be passed * to mkstemp/mktemps(3) */ static char basl_template[MAXPATHLEN]; static char basl_stemplate[MAXPATHLEN]; struct basl_fio { int fd; FILE *fp; char f_name[MAXPATHLEN]; }; #define EFPRINTF(...) \ err = fprintf(__VA_ARGS__); if (err < 0) goto err_exit; #define EFFLUSH(x) \ err = fflush(x); if (err != 0) goto err_exit; static int basl_fwrite_rsdp(FILE *fp) { int err; err = 0; EFPRINTF(fp, "/*\n"); EFPRINTF(fp, " * bhyve RSDP template\n"); EFPRINTF(fp, " */\n"); EFPRINTF(fp, "[0008]\t\tSignature : \"RSD PTR \"\n"); EFPRINTF(fp, "[0001]\t\tChecksum : 43\n"); EFPRINTF(fp, "[0006]\t\tOem ID : \"BHYVE \"\n"); EFPRINTF(fp, "[0001]\t\tRevision : 02\n"); EFPRINTF(fp, "[0004]\t\tRSDT Address : %08X\n", basl_acpi_base + RSDT_OFFSET); EFPRINTF(fp, "[0004]\t\tLength : 00000024\n"); EFPRINTF(fp, "[0008]\t\tXSDT Address : 00000000%08X\n", basl_acpi_base + XSDT_OFFSET); EFPRINTF(fp, "[0001]\t\tExtended Checksum : 00\n"); EFPRINTF(fp, "[0003]\t\tReserved : 000000\n"); EFFLUSH(fp); return (0); err_exit: return (errno); } static int basl_fwrite_rsdt(FILE *fp) { int err; err = 0; EFPRINTF(fp, "/*\n"); EFPRINTF(fp, " * bhyve RSDT template\n"); EFPRINTF(fp, " */\n"); EFPRINTF(fp, "[0004]\t\tSignature : \"RSDT\"\n"); EFPRINTF(fp, "[0004]\t\tTable Length : 00000000\n"); EFPRINTF(fp, "[0001]\t\tRevision : 01\n"); EFPRINTF(fp, "[0001]\t\tChecksum : 00\n"); EFPRINTF(fp, "[0006]\t\tOem ID : \"BHYVE \"\n"); EFPRINTF(fp, "[0008]\t\tOem Table ID : \"BVRSDT \"\n"); EFPRINTF(fp, "[0004]\t\tOem Revision : 00000001\n"); /* iasl will fill in the compiler ID/revision fields */ EFPRINTF(fp, "[0004]\t\tAsl Compiler ID : \"xxxx\"\n"); EFPRINTF(fp, "[0004]\t\tAsl Compiler Revision : 00000000\n"); EFPRINTF(fp, "\n"); /* Add in pointers to the MADT, FADT and HPET */ EFPRINTF(fp, "[0004]\t\tACPI Table Address 0 : %08X\n", basl_acpi_base + MADT_OFFSET); EFPRINTF(fp, "[0004]\t\tACPI Table Address 1 : %08X\n", basl_acpi_base + FADT_OFFSET); EFPRINTF(fp, "[0004]\t\tACPI Table Address 2 : %08X\n", basl_acpi_base + HPET_OFFSET); EFFLUSH(fp); return (0); err_exit: return (errno); } static int basl_fwrite_xsdt(FILE *fp) { int err; err = 0; EFPRINTF(fp, "/*\n"); EFPRINTF(fp, " * bhyve XSDT template\n"); EFPRINTF(fp, " */\n"); EFPRINTF(fp, "[0004]\t\tSignature : \"XSDT\"\n"); EFPRINTF(fp, "[0004]\t\tTable Length : 00000000\n"); EFPRINTF(fp, "[0001]\t\tRevision : 01\n"); EFPRINTF(fp, "[0001]\t\tChecksum : 00\n"); EFPRINTF(fp, "[0006]\t\tOem ID : \"BHYVE \"\n"); EFPRINTF(fp, "[0008]\t\tOem Table ID : \"BVXSDT \"\n"); EFPRINTF(fp, "[0004]\t\tOem Revision : 00000001\n"); /* iasl will fill in the compiler ID/revision fields */ EFPRINTF(fp, "[0004]\t\tAsl Compiler ID : \"xxxx\"\n"); EFPRINTF(fp, "[0004]\t\tAsl Compiler Revision : 00000000\n"); EFPRINTF(fp, "\n"); /* Add in pointers to the MADT, FADT and HPET */ EFPRINTF(fp, "[0004]\t\tACPI Table Address 0 : 00000000%08X\n", basl_acpi_base + MADT_OFFSET); EFPRINTF(fp, "[0004]\t\tACPI Table Address 1 : 00000000%08X\n", basl_acpi_base + FADT_OFFSET); EFPRINTF(fp, "[0004]\t\tACPI Table Address 2 : 00000000%08X\n", basl_acpi_base + HPET_OFFSET); EFFLUSH(fp); return (0); err_exit: return (errno); } static int basl_fwrite_madt(FILE *fp) { int err; int i; err = 0; EFPRINTF(fp, "/*\n"); EFPRINTF(fp, " * bhyve MADT template\n"); EFPRINTF(fp, " */\n"); EFPRINTF(fp, "[0004]\t\tSignature : \"APIC\"\n"); EFPRINTF(fp, "[0004]\t\tTable Length : 00000000\n"); EFPRINTF(fp, "[0001]\t\tRevision : 01\n"); EFPRINTF(fp, "[0001]\t\tChecksum : 00\n"); EFPRINTF(fp, "[0006]\t\tOem ID : \"BHYVE \"\n"); EFPRINTF(fp, "[0008]\t\tOem Table ID : \"BVMADT \"\n"); EFPRINTF(fp, "[0004]\t\tOem Revision : 00000001\n"); /* iasl will fill in the compiler ID/revision fields */ EFPRINTF(fp, "[0004]\t\tAsl Compiler ID : \"xxxx\"\n"); EFPRINTF(fp, "[0004]\t\tAsl Compiler Revision : 00000000\n"); EFPRINTF(fp, "\n"); EFPRINTF(fp, "[0004]\t\tLocal Apic Address : FEE00000\n"); EFPRINTF(fp, "[0004]\t\tFlags (decoded below) : 00000001\n"); EFPRINTF(fp, "\t\t\tPC-AT Compatibility : 1\n"); EFPRINTF(fp, "\n"); /* Add a Processor Local APIC entry for each CPU */ for (i = 0; i < basl_ncpu; i++) { EFPRINTF(fp, "[0001]\t\tSubtable Type : 00\n"); EFPRINTF(fp, "[0001]\t\tLength : 08\n"); /* iasl expects hex values for the proc and apic id's */ EFPRINTF(fp, "[0001]\t\tProcessor ID : %02x\n", i); EFPRINTF(fp, "[0001]\t\tLocal Apic ID : %02x\n", i); EFPRINTF(fp, "[0004]\t\tFlags (decoded below) : 00000001\n"); EFPRINTF(fp, "\t\t\tProcessor Enabled : 1\n"); EFPRINTF(fp, "\n"); } /* Always a single IOAPIC entry, with ID 0 */ EFPRINTF(fp, "[0001]\t\tSubtable Type : 01\n"); EFPRINTF(fp, "[0001]\t\tLength : 0C\n"); /* iasl expects a hex value for the i/o apic id */ EFPRINTF(fp, "[0001]\t\tI/O Apic ID : %02x\n", 0); EFPRINTF(fp, "[0001]\t\tReserved : 00\n"); EFPRINTF(fp, "[0004]\t\tAddress : fec00000\n"); EFPRINTF(fp, "[0004]\t\tInterrupt : 00000000\n"); EFPRINTF(fp, "\n"); /* Legacy IRQ0 is connected to pin 2 of the IOAPIC */ EFPRINTF(fp, "[0001]\t\tSubtable Type : 02\n"); EFPRINTF(fp, "[0001]\t\tLength : 0A\n"); EFPRINTF(fp, "[0001]\t\tBus : 00\n"); EFPRINTF(fp, "[0001]\t\tSource : 00\n"); EFPRINTF(fp, "[0004]\t\tInterrupt : 00000002\n"); EFPRINTF(fp, "[0002]\t\tFlags (decoded below) : 0005\n"); EFPRINTF(fp, "\t\t\tPolarity : 1\n"); EFPRINTF(fp, "\t\t\tTrigger Mode : 1\n"); EFPRINTF(fp, "\n"); EFPRINTF(fp, "[0001]\t\tSubtable Type : 02\n"); EFPRINTF(fp, "[0001]\t\tLength : 0A\n"); EFPRINTF(fp, "[0001]\t\tBus : 00\n"); EFPRINTF(fp, "[0001]\t\tSource : 09\n"); EFPRINTF(fp, "[0004]\t\tInterrupt : 00000009\n"); EFPRINTF(fp, "[0002]\t\tFlags (decoded below) : 0000\n"); EFPRINTF(fp, "\t\t\tPolarity : 0\n"); EFPRINTF(fp, "\t\t\tTrigger Mode : 0\n"); EFPRINTF(fp, "\n"); + /* Local APIC NMI is connected to LINT 1 on all CPUs */ + EFPRINTF(fp, "[0001]\t\tSubtable Type : 04\n"); + EFPRINTF(fp, "[0001]\t\tLength : 06\n"); + EFPRINTF(fp, "[0001]\t\tProcessorId : FF\n"); + EFPRINTF(fp, "[0002]\t\tFlags (decoded below) : 0005\n"); + EFPRINTF(fp, "\t\t\tPolarity : 1\n"); + EFPRINTF(fp, "\t\t\tTrigger Mode : 1\n"); + EFPRINTF(fp, "[0001]\t\tInterrupt : 01\n"); + EFPRINTF(fp, "\n"); + EFFLUSH(fp); return (0); err_exit: return (errno); } static int basl_fwrite_fadt(FILE *fp) { int err; err = 0; EFPRINTF(fp, "/*\n"); EFPRINTF(fp, " * bhyve FADT template\n"); EFPRINTF(fp, " */\n"); EFPRINTF(fp, "[0004]\t\tSignature : \"FACP\"\n"); EFPRINTF(fp, "[0004]\t\tTable Length : 0000010C\n"); EFPRINTF(fp, "[0001]\t\tRevision : 05\n"); EFPRINTF(fp, "[0001]\t\tChecksum : 00\n"); EFPRINTF(fp, "[0006]\t\tOem ID : \"BHYVE \"\n"); EFPRINTF(fp, "[0008]\t\tOem Table ID : \"BVFACP \"\n"); EFPRINTF(fp, "[0004]\t\tOem Revision : 00000001\n"); /* iasl will fill in the compiler ID/revision fields */ EFPRINTF(fp, "[0004]\t\tAsl Compiler ID : \"xxxx\"\n"); EFPRINTF(fp, "[0004]\t\tAsl Compiler Revision : 00000000\n"); EFPRINTF(fp, "\n"); EFPRINTF(fp, "[0004]\t\tFACS Address : %08X\n", basl_acpi_base + FACS_OFFSET); EFPRINTF(fp, "[0004]\t\tDSDT Address : %08X\n", basl_acpi_base + DSDT_OFFSET); EFPRINTF(fp, "[0001]\t\tModel : 00\n"); EFPRINTF(fp, "[0001]\t\tPM Profile : 00 [Unspecified]\n"); EFPRINTF(fp, "[0002]\t\tSCI Interrupt : 0009\n"); EFPRINTF(fp, "[0004]\t\tSMI Command Port : 00000000\n"); EFPRINTF(fp, "[0001]\t\tACPI Enable Value : 00\n"); EFPRINTF(fp, "[0001]\t\tACPI Disable Value : 00\n"); EFPRINTF(fp, "[0001]\t\tS4BIOS Command : 00\n"); EFPRINTF(fp, "[0001]\t\tP-State Control : 00\n"); EFPRINTF(fp, "[0004]\t\tPM1A Event Block Address : 00000000\n"); EFPRINTF(fp, "[0004]\t\tPM1B Event Block Address : 00000000\n"); EFPRINTF(fp, "[0004]\t\tPM1A Control Block Address : 00000000\n"); EFPRINTF(fp, "[0004]\t\tPM1B Control Block Address : 00000000\n"); EFPRINTF(fp, "[0004]\t\tPM2 Control Block Address : 00000000\n"); EFPRINTF(fp, "[0004]\t\tPM Timer Block Address : %08X\n", BHYVE_PM_TIMER_ADDR); EFPRINTF(fp, "[0004]\t\tGPE0 Block Address : 00000000\n"); EFPRINTF(fp, "[0004]\t\tGPE1 Block Address : 00000000\n"); EFPRINTF(fp, "[0001]\t\tPM1 Event Block Length : 04\n"); EFPRINTF(fp, "[0001]\t\tPM1 Control Block Length : 02\n"); EFPRINTF(fp, "[0001]\t\tPM2 Control Block Length : 00\n"); EFPRINTF(fp, "[0001]\t\tPM Timer Block Length : 04\n"); EFPRINTF(fp, "[0001]\t\tGPE0 Block Length : 00\n"); EFPRINTF(fp, "[0001]\t\tGPE1 Block Length : 00\n"); EFPRINTF(fp, "[0001]\t\tGPE1 Base Offset : 00\n"); EFPRINTF(fp, "[0001]\t\t_CST Support : 00\n"); EFPRINTF(fp, "[0002]\t\tC2 Latency : 0000\n"); EFPRINTF(fp, "[0002]\t\tC3 Latency : 0000\n"); EFPRINTF(fp, "[0002]\t\tCPU Cache Size : 0000\n"); EFPRINTF(fp, "[0002]\t\tCache Flush Stride : 0000\n"); EFPRINTF(fp, "[0001]\t\tDuty Cycle Offset : 00\n"); EFPRINTF(fp, "[0001]\t\tDuty Cycle Width : 00\n"); EFPRINTF(fp, "[0001]\t\tRTC Day Alarm Index : 00\n"); EFPRINTF(fp, "[0001]\t\tRTC Month Alarm Index : 00\n"); EFPRINTF(fp, "[0001]\t\tRTC Century Index : 00\n"); EFPRINTF(fp, "[0002]\t\tBoot Flags (decoded below) : 0000\n"); EFPRINTF(fp, "\t\t\tLegacy Devices Supported (V2) : 0\n"); EFPRINTF(fp, "\t\t\t8042 Present on ports 60/64 (V2) : 0\n"); EFPRINTF(fp, "\t\t\tVGA Not Present (V4) : 1\n"); EFPRINTF(fp, "\t\t\tMSI Not Supported (V4) : 0\n"); EFPRINTF(fp, "\t\t\tPCIe ASPM Not Supported (V4) : 1\n"); EFPRINTF(fp, "\t\t\tCMOS RTC Not Present (V5) : 0\n"); EFPRINTF(fp, "[0001]\t\tReserved : 00\n"); EFPRINTF(fp, "[0004]\t\tFlags (decoded below) : 00000000\n"); EFPRINTF(fp, "\t\t\tWBINVD instruction is operational (V1) : 1\n"); EFPRINTF(fp, "\t\t\tWBINVD flushes all caches (V1) : 0\n"); EFPRINTF(fp, "\t\t\tAll CPUs support C1 (V1) : 0\n"); EFPRINTF(fp, "\t\t\tC2 works on MP system (V1) : 0\n"); EFPRINTF(fp, "\t\t\tControl Method Power Button (V1) : 1\n"); EFPRINTF(fp, "\t\t\tControl Method Sleep Button (V1) : 1\n"); EFPRINTF(fp, "\t\t\tRTC wake not in fixed reg space (V1) : 0\n"); EFPRINTF(fp, "\t\t\tRTC can wake system from S4 (V1) : 0\n"); EFPRINTF(fp, "\t\t\t32-bit PM Timer (V1) : 1\n"); EFPRINTF(fp, "\t\t\tDocking Supported (V1) : 0\n"); EFPRINTF(fp, "\t\t\tReset Register Supported (V2) : 0\n"); EFPRINTF(fp, "\t\t\tSealed Case (V3) : 0\n"); EFPRINTF(fp, "\t\t\tHeadless - No Video (V3) : 1\n"); EFPRINTF(fp, "\t\t\tUse native instr after SLP_TYPx (V3) : 0\n"); EFPRINTF(fp, "\t\t\tPCIEXP_WAK Bits Supported (V4) : 0\n"); EFPRINTF(fp, "\t\t\tUse Platform Timer (V4) : 0\n"); EFPRINTF(fp, "\t\t\tRTC_STS valid on S4 wake (V4) : 0\n"); EFPRINTF(fp, "\t\t\tRemote Power-on capable (V4) : 0\n"); EFPRINTF(fp, "\t\t\tUse APIC Cluster Model (V4) : 0\n"); EFPRINTF(fp, "\t\t\tUse APIC Physical Destination Mode (V4) : 1\n"); EFPRINTF(fp, "\t\t\tHardware Reduced (V5) : 0\n"); EFPRINTF(fp, "\t\t\tLow Power S0 Idle (V5) : 0\n"); EFPRINTF(fp, "\n"); EFPRINTF(fp, "[0012]\t\tReset Register : [Generic Address Structure]\n"); EFPRINTF(fp, "[0001]\t\tSpace ID : 01 [SystemIO]\n"); EFPRINTF(fp, "[0001]\t\tBit Width : 08\n"); EFPRINTF(fp, "[0001]\t\tBit Offset : 00\n"); EFPRINTF(fp, "[0001]\t\tEncoded Access Width : 01 [Byte Access:8]\n"); EFPRINTF(fp, "[0008]\t\tAddress : 0000000000000001\n"); EFPRINTF(fp, "\n"); EFPRINTF(fp, "[0001]\t\tValue to cause reset : 00\n"); EFPRINTF(fp, "[0003]\t\tReserved : 000000\n"); EFPRINTF(fp, "[0008]\t\tFACS Address : 00000000%08X\n", basl_acpi_base + FACS_OFFSET); EFPRINTF(fp, "[0008]\t\tDSDT Address : 00000000%08X\n", basl_acpi_base + DSDT_OFFSET); EFPRINTF(fp, "[0012]\t\tPM1A Event Block : [Generic Address Structure]\n"); EFPRINTF(fp, "[0001]\t\tSpace ID : 01 [SystemIO]\n"); EFPRINTF(fp, "[0001]\t\tBit Width : 20\n"); EFPRINTF(fp, "[0001]\t\tBit Offset : 00\n"); EFPRINTF(fp, "[0001]\t\tEncoded Access Width : 02 [Word Access:16]\n"); EFPRINTF(fp, "[0008]\t\tAddress : 0000000000000001\n"); EFPRINTF(fp, "\n"); EFPRINTF(fp, "[0012]\t\tPM1B Event Block : [Generic Address Structure]\n"); EFPRINTF(fp, "[0001]\t\tSpace ID : 01 [SystemIO]\n"); EFPRINTF(fp, "[0001]\t\tBit Width : 00\n"); EFPRINTF(fp, "[0001]\t\tBit Offset : 00\n"); EFPRINTF(fp, "[0001]\t\tEncoded Access Width : 00 [Undefined/Legacy]\n"); EFPRINTF(fp, "[0008]\t\tAddress : 0000000000000000\n"); EFPRINTF(fp, "\n"); EFPRINTF(fp, "[0012]\t\tPM1A Control Block : [Generic Address Structure]\n"); EFPRINTF(fp, "[0001]\t\tSpace ID : 01 [SystemIO]\n"); EFPRINTF(fp, "[0001]\t\tBit Width : 10\n"); EFPRINTF(fp, "[0001]\t\tBit Offset : 00\n"); EFPRINTF(fp, "[0001]\t\tEncoded Access Width : 02 [Word Access:16]\n"); EFPRINTF(fp, "[0008]\t\tAddress : 0000000000000001\n"); EFPRINTF(fp, "\n"); EFPRINTF(fp, "[0012]\t\tPM1B Control Block : [Generic Address Structure]\n"); EFPRINTF(fp, "[0001]\t\tSpace ID : 01 [SystemIO]\n"); EFPRINTF(fp, "[0001]\t\tBit Width : 00\n"); EFPRINTF(fp, "[0001]\t\tBit Offset : 00\n"); EFPRINTF(fp, "[0001]\t\tEncoded Access Width : 00 [Undefined/Legacy]\n"); EFPRINTF(fp, "[0008]\t\tAddress : 0000000000000000\n"); EFPRINTF(fp, "\n"); EFPRINTF(fp, "[0012]\t\tPM2 Control Block : [Generic Address Structure]\n"); EFPRINTF(fp, "[0001]\t\tSpace ID : 01 [SystemIO]\n"); EFPRINTF(fp, "[0001]\t\tBit Width : 08\n"); EFPRINTF(fp, "[0001]\t\tBit Offset : 00\n"); EFPRINTF(fp, "[0001]\t\tEncoded Access Width : 00 [Undefined/Legacy]\n"); EFPRINTF(fp, "[0008]\t\tAddress : 0000000000000000\n"); EFPRINTF(fp, "\n"); /* Valid for bhyve */ EFPRINTF(fp, "[0012]\t\tPM Timer Block : [Generic Address Structure]\n"); EFPRINTF(fp, "[0001]\t\tSpace ID : 01 [SystemIO]\n"); EFPRINTF(fp, "[0001]\t\tBit Width : 32\n"); EFPRINTF(fp, "[0001]\t\tBit Offset : 00\n"); EFPRINTF(fp, "[0001]\t\tEncoded Access Width : 03 [DWord Access:32]\n"); EFPRINTF(fp, "[0008]\t\tAddress : 00000000%08X\n", BHYVE_PM_TIMER_ADDR); EFPRINTF(fp, "\n"); EFPRINTF(fp, "[0012]\t\tGPE0 Block : [Generic Address Structure]\n"); EFPRINTF(fp, "[0001]\t\tSpace ID : 01 [SystemIO]\n"); EFPRINTF(fp, "[0001]\t\tBit Width : 80\n"); EFPRINTF(fp, "[0001]\t\tBit Offset : 00\n"); EFPRINTF(fp, "[0001]\t\tEncoded Access Width : 01 [Byte Access:8]\n"); EFPRINTF(fp, "[0008]\t\tAddress : 0000000000000000\n"); EFPRINTF(fp, "\n"); EFPRINTF(fp, "[0012]\t\tGPE1 Block : [Generic Address Structure]\n"); EFPRINTF(fp, "[0001]\t\tSpace ID : 01 [SystemIO]\n"); EFPRINTF(fp, "[0001]\t\tBit Width : 00\n"); EFPRINTF(fp, "[0001]\t\tBit Offset : 00\n"); EFPRINTF(fp, "[0001]\t\tEncoded Access Width : 00 [Undefined/Legacy]\n"); EFPRINTF(fp, "[0008]\t\tAddress : 0000000000000000\n"); EFPRINTF(fp, "\n"); EFPRINTF(fp, "[0012]\t\tSleep Control Register : [Generic Address Structure]\n"); EFPRINTF(fp, "[0001]\t\tSpace ID : 01 [SystemIO]\n"); EFPRINTF(fp, "[0001]\t\tBit Width : 08\n"); EFPRINTF(fp, "[0001]\t\tBit Offset : 00\n"); EFPRINTF(fp, "[0001]\t\tEncoded Access Width : 01 [Byte Access:8]\n"); EFPRINTF(fp, "[0008]\t\tAddress : 0000000000000000\n"); EFPRINTF(fp, "\n"); EFPRINTF(fp, "[0012]\t\tSleep Status Register : [Generic Address Structure]\n"); EFPRINTF(fp, "[0001]\t\tSpace ID : 01 [SystemIO]\n"); EFPRINTF(fp, "[0001]\t\tBit Width : 08\n"); EFPRINTF(fp, "[0001]\t\tBit Offset : 00\n"); EFPRINTF(fp, "[0001]\t\tEncoded Access Width : 01 [Byte Access:8]\n"); EFPRINTF(fp, "[0008]\t\tAddress : 0000000000000000\n"); EFFLUSH(fp); return (0); err_exit: return (errno); } static int basl_fwrite_hpet(FILE *fp) { int err; err = 0; EFPRINTF(fp, "/*\n"); EFPRINTF(fp, " * bhyve HPET template\n"); EFPRINTF(fp, " */\n"); EFPRINTF(fp, "[0004]\t\tSignature : \"HPET\"\n"); EFPRINTF(fp, "[0004]\t\tTable Length : 00000000\n"); EFPRINTF(fp, "[0001]\t\tRevision : 01\n"); EFPRINTF(fp, "[0001]\t\tChecksum : 00\n"); EFPRINTF(fp, "[0006]\t\tOem ID : \"BHYVE \"\n"); EFPRINTF(fp, "[0008]\t\tOem Table ID : \"BVHPET \"\n"); EFPRINTF(fp, "[0004]\t\tOem Revision : 00000001\n"); /* iasl will fill in the compiler ID/revision fields */ EFPRINTF(fp, "[0004]\t\tAsl Compiler ID : \"xxxx\"\n"); EFPRINTF(fp, "[0004]\t\tAsl Compiler Revision : 00000000\n"); EFPRINTF(fp, "\n"); EFPRINTF(fp, "[0004]\t\tTimer Block ID : %08X\n", hpet_capabilities); EFPRINTF(fp, "[0012]\t\tTimer Block Register : [Generic Address Structure]\n"); EFPRINTF(fp, "[0001]\t\tSpace ID : 00 [SystemMemory]\n"); EFPRINTF(fp, "[0001]\t\tBit Width : 00\n"); EFPRINTF(fp, "[0001]\t\tBit Offset : 00\n"); EFPRINTF(fp, "[0001]\t\tEncoded Access Width : 00 [Undefined/Legacy]\n"); EFPRINTF(fp, "[0008]\t\tAddress : 00000000FED00000\n"); EFPRINTF(fp, "\n"); EFPRINTF(fp, "[0001]\t\tHPET Number : 00\n"); EFPRINTF(fp, "[0002]\t\tMinimum Clock Ticks : 0000\n"); EFPRINTF(fp, "[0004]\t\tFlags (decoded below) : 00000001\n"); EFPRINTF(fp, "\t\t\t4K Page Protect : 1\n"); EFPRINTF(fp, "\t\t\t64K Page Protect : 0\n"); EFPRINTF(fp, "\n"); EFFLUSH(fp); return (0); err_exit: return (errno); } static int basl_fwrite_facs(FILE *fp) { int err; err = 0; EFPRINTF(fp, "/*\n"); EFPRINTF(fp, " * bhyve FACS template\n"); EFPRINTF(fp, " */\n"); EFPRINTF(fp, "[0004]\t\tSignature : \"FACS\"\n"); EFPRINTF(fp, "[0004]\t\tLength : 00000040\n"); EFPRINTF(fp, "[0004]\t\tHardware Signature : 00000000\n"); EFPRINTF(fp, "[0004]\t\t32 Firmware Waking Vector : 00000000\n"); EFPRINTF(fp, "[0004]\t\tGlobal Lock : 00000000\n"); EFPRINTF(fp, "[0004]\t\tFlags (decoded below) : 00000000\n"); EFPRINTF(fp, "\t\t\tS4BIOS Support Present : 0\n"); EFPRINTF(fp, "\t\t\t64-bit Wake Supported (V2) : 0\n"); EFPRINTF(fp, "[0008]\t\t64 Firmware Waking Vector : 0000000000000000\n"); EFPRINTF(fp, "[0001]\t\tVersion : 02\n"); EFPRINTF(fp, "[0003]\t\tReserved : 000000\n"); EFPRINTF(fp, "[0004]\t\tOspmFlags (decoded below) : 00000000\n"); EFPRINTF(fp, "\t\t\t64-bit Wake Env Required (V2) : 0\n"); EFFLUSH(fp); return (0); err_exit: return (errno); } static int basl_fwrite_dsdt(FILE *fp) { int err; err = 0; EFPRINTF(fp, "/*\n"); EFPRINTF(fp, " * bhyve DSDT template\n"); EFPRINTF(fp, " */\n"); EFPRINTF(fp, "DefinitionBlock (\"bhyve_dsdt.aml\", \"DSDT\", 2," "\"BHYVE \", \"BVDSDT \", 0x00000001)\n"); EFPRINTF(fp, "{\n"); EFPRINTF(fp, " Scope (_SB)\n"); EFPRINTF(fp, " {\n"); EFPRINTF(fp, " Device (PCI0)\n"); EFPRINTF(fp, " {\n"); EFPRINTF(fp, " Name (_HID, EisaId (\"PNP0A03\"))\n"); EFPRINTF(fp, " Name (_ADR, Zero)\n"); EFPRINTF(fp, " Name (_UID, One)\n"); EFPRINTF(fp, " Name (_CRS, ResourceTemplate ()\n"); EFPRINTF(fp, " {\n"); EFPRINTF(fp, " WordBusNumber (ResourceProducer, MinFixed," "MaxFixed, PosDecode,\n"); EFPRINTF(fp, " 0x0000, // Granularity\n"); EFPRINTF(fp, " 0x0000, // Range Minimum\n"); EFPRINTF(fp, " 0x00FF, // Range Maximum\n"); EFPRINTF(fp, " 0x0000, // Transl Offset\n"); EFPRINTF(fp, " 0x0100, // Length\n"); EFPRINTF(fp, " ,, )\n"); EFPRINTF(fp, " IO (Decode16,\n"); EFPRINTF(fp, " 0x0CF8, // Range Minimum\n"); EFPRINTF(fp, " 0x0CF8, // Range Maximum\n"); EFPRINTF(fp, " 0x01, // Alignment\n"); EFPRINTF(fp, " 0x08, // Length\n"); EFPRINTF(fp, " )\n"); EFPRINTF(fp, " WordIO (ResourceProducer, MinFixed, MaxFixed," "PosDecode, EntireRange,\n"); EFPRINTF(fp, " 0x0000, // Granularity\n"); EFPRINTF(fp, " 0x0000, // Range Minimum\n"); EFPRINTF(fp, " 0x0CF7, // Range Maximum\n"); EFPRINTF(fp, " 0x0000, // Transl Offset\n"); EFPRINTF(fp, " 0x0CF8, // Length\n"); EFPRINTF(fp, " ,, , TypeStatic)\n"); EFPRINTF(fp, " WordIO (ResourceProducer, MinFixed, MaxFixed," "PosDecode, EntireRange,\n"); EFPRINTF(fp, " 0x0000, // Granularity\n"); EFPRINTF(fp, " 0x0D00, // Range Minimum\n"); EFPRINTF(fp, " 0xFFFF, // Range Maximum\n"); EFPRINTF(fp, " 0x0000, // Transl Offset\n"); EFPRINTF(fp, " 0xF300, // Length\n"); EFPRINTF(fp, " ,, , TypeStatic)\n"); EFPRINTF(fp, " })\n"); EFPRINTF(fp, " }\n"); EFPRINTF(fp, " }\n"); EFPRINTF(fp, "\n"); EFPRINTF(fp, " Scope (_SB.PCI0)\n"); EFPRINTF(fp, " {\n"); EFPRINTF(fp, " Device (ISA)\n"); EFPRINTF(fp, " {\n"); EFPRINTF(fp, " Name (_ADR, 0x00010000)\n"); EFPRINTF(fp, " OperationRegion (P40C, PCI_Config, 0x60, 0x04)\n"); EFPRINTF(fp, " }\n"); EFPRINTF(fp, " Device (HPET)\n"); EFPRINTF(fp, " {\n"); EFPRINTF(fp, " Name (_HID, EISAID(\"PNP0103\"))\n"); EFPRINTF(fp, " Name (_UID, 0)\n"); EFPRINTF(fp, " Name (_CRS, ResourceTemplate ()\n"); EFPRINTF(fp, " {\n"); EFPRINTF(fp, " DWordMemory (ResourceConsumer, PosDecode, " "MinFixed, MaxFixed, NonCacheable, ReadWrite,\n"); EFPRINTF(fp, " 0x00000000,\n"); EFPRINTF(fp, " 0xFED00000,\n"); EFPRINTF(fp, " 0xFED003FF,\n"); EFPRINTF(fp, " 0x00000000,\n"); EFPRINTF(fp, " 0x00000400\n"); EFPRINTF(fp, " )\n"); EFPRINTF(fp, " })\n"); EFPRINTF(fp, " }\n"); EFPRINTF(fp, " }\n"); EFPRINTF(fp, "\n"); EFPRINTF(fp, " Scope (_SB.PCI0.ISA)\n"); EFPRINTF(fp, " {\n"); EFPRINTF(fp, " Device (RTC)\n"); EFPRINTF(fp, " {\n"); EFPRINTF(fp, " Name (_HID, EisaId (\"PNP0B00\"))\n"); EFPRINTF(fp, " Name (_CRS, ResourceTemplate ()\n"); EFPRINTF(fp, " {\n"); EFPRINTF(fp, " IO (Decode16,\n"); EFPRINTF(fp, " 0x0070, // Range Minimum\n"); EFPRINTF(fp, " 0x0070, // Range Maximum\n"); EFPRINTF(fp, " 0x10, // Alignment\n"); EFPRINTF(fp, " 0x02, // Length\n"); EFPRINTF(fp, " )\n"); EFPRINTF(fp, " IRQNoFlags ()\n"); EFPRINTF(fp, " {8}\n"); EFPRINTF(fp, " IO (Decode16,\n"); EFPRINTF(fp, " 0x0072, // Range Minimum\n"); EFPRINTF(fp, " 0x0072, // Range Maximum\n"); EFPRINTF(fp, " 0x02, // Alignment\n"); EFPRINTF(fp, " 0x06, // Length\n"); EFPRINTF(fp, " )\n"); EFPRINTF(fp, " })\n"); EFPRINTF(fp, " }\n"); EFPRINTF(fp, " }\n"); EFPRINTF(fp, "}\n"); EFFLUSH(fp); return (0); err_exit: return (errno); } static int basl_open(struct basl_fio *bf, int suffix) { int err; err = 0; if (suffix) { strncpy(bf->f_name, basl_stemplate, MAXPATHLEN); bf->fd = mkstemps(bf->f_name, strlen(BHYVE_ASL_SUFFIX)); } else { strncpy(bf->f_name, basl_template, MAXPATHLEN); bf->fd = mkstemp(bf->f_name); } if (bf->fd > 0) { bf->fp = fdopen(bf->fd, "w+"); if (bf->fp == NULL) { unlink(bf->f_name); close(bf->fd); } } else { err = 1; } return (err); } static void basl_close(struct basl_fio *bf) { if (!basl_keep_temps) unlink(bf->f_name); fclose(bf->fp); } static int basl_start(struct basl_fio *in, struct basl_fio *out) { int err; err = basl_open(in, 0); if (!err) { err = basl_open(out, 1); if (err) { basl_close(in); } } return (err); } static void basl_end(struct basl_fio *in, struct basl_fio *out) { basl_close(in); basl_close(out); } static int basl_load(struct vmctx *ctx, int fd, uint64_t off) { struct stat sb; void *gaddr; if (fstat(fd, &sb) < 0) return (errno); gaddr = paddr_guest2host(ctx, basl_acpi_base + off, sb.st_size); if (gaddr == NULL) return (EFAULT); if (read(fd, gaddr, sb.st_size) < 0) return (errno); return (0); } static int basl_compile(struct vmctx *ctx, int (*fwrite_section)(FILE *), uint64_t offset) { struct basl_fio io[2]; static char iaslbuf[3*MAXPATHLEN + 10]; char *fmt; int err; err = basl_start(&io[0], &io[1]); if (!err) { err = (*fwrite_section)(io[0].fp); if (!err) { /* * iasl sends the results of the compilation to * stdout. Shut this down by using the shell to * redirect stdout to /dev/null, unless the user * has requested verbose output for debugging * purposes */ fmt = basl_verbose_iasl ? "%s -p %s %s" : "/bin/sh -c \"%s -p %s %s\" 1> /dev/null"; snprintf(iaslbuf, sizeof(iaslbuf), fmt, BHYVE_ASL_COMPILER, io[1].f_name, io[0].f_name); err = system(iaslbuf); if (!err) { /* * Copy the aml output file into guest * memory at the specified location */ err = basl_load(ctx, io[1].fd, offset); } } basl_end(&io[0], &io[1]); } return (err); } static int basl_make_templates(void) { const char *tmpdir; int err; int len; err = 0; /* * */ if ((tmpdir = getenv("BHYVE_TMPDIR")) == NULL || *tmpdir == '\0' || (tmpdir = getenv("TMPDIR")) == NULL || *tmpdir == '\0') { tmpdir = _PATH_TMP; } len = strlen(tmpdir); if ((len + sizeof(BHYVE_ASL_TEMPLATE) + 1) < MAXPATHLEN) { strcpy(basl_template, tmpdir); while (len > 0 && basl_template[len - 1] == '/') len--; basl_template[len] = '/'; strcpy(&basl_template[len + 1], BHYVE_ASL_TEMPLATE); } else err = E2BIG; if (!err) { /* * len has been intialized (and maybe adjusted) above */ if ((len + sizeof(BHYVE_ASL_TEMPLATE) + 1 + sizeof(BHYVE_ASL_SUFFIX)) < MAXPATHLEN) { strcpy(basl_stemplate, tmpdir); basl_stemplate[len] = '/'; strcpy(&basl_stemplate[len + 1], BHYVE_ASL_TEMPLATE); len = strlen(basl_stemplate); strcpy(&basl_stemplate[len], BHYVE_ASL_SUFFIX); } else err = E2BIG; } return (err); } static struct { int (*wsect)(FILE *fp); uint64_t offset; } basl_ftables[] = { { basl_fwrite_rsdp, 0}, { basl_fwrite_rsdt, RSDT_OFFSET }, { basl_fwrite_xsdt, XSDT_OFFSET }, { basl_fwrite_madt, MADT_OFFSET }, { basl_fwrite_fadt, FADT_OFFSET }, { basl_fwrite_hpet, HPET_OFFSET }, { basl_fwrite_facs, FACS_OFFSET }, { basl_fwrite_dsdt, DSDT_OFFSET }, { NULL } }; int acpi_build(struct vmctx *ctx, int ncpu) { int err; int i; basl_ncpu = ncpu; err = vm_get_hpet_capabilities(ctx, &hpet_capabilities); if (err != 0) return (err); /* * For debug, allow the user to have iasl compiler output sent * to stdout rather than /dev/null */ if (getenv("BHYVE_ACPI_VERBOSE_IASL")) basl_verbose_iasl = 1; /* * Allow the user to keep the generated ASL files for debugging * instead of deleting them following use */ if (getenv("BHYVE_ACPI_KEEPTMPS")) basl_keep_temps = 1; i = 0; err = basl_make_templates(); /* * Run through all the ASL files, compiling them and * copying them into guest memory */ while (!err && basl_ftables[i].wsect != NULL) { err = basl_compile(ctx, basl_ftables[i].wsect, basl_ftables[i].offset); i++; } return (err); } diff --git a/usr.sbin/bhyve/mptbl.c b/usr.sbin/bhyve/mptbl.c index 3d45cc6cd77b..50e4e203807b 100644 --- a/usr.sbin/bhyve/mptbl.c +++ b/usr.sbin/bhyve/mptbl.c @@ -1,288 +1,320 @@ /*- * Copyright (c) 2012 NetApp, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include "bhyverun.h" #include "mptbl.h" #define MPTABLE_BASE 0xF0000 /* floating pointer length + maximum length of configuration table */ #define MPTABLE_MAX_LENGTH (65536 + 16) #define LAPIC_PADDR 0xFEE00000 #define LAPIC_VERSION 16 #define IOAPIC_PADDR 0xFEC00000 #define IOAPIC_VERSION 0x11 #define MP_SPECREV 4 #define MPFP_SIG "_MP_" /* Configuration header defines */ #define MPCH_SIG "PCMP" #define MPCH_OEMID "BHyVe " #define MPCH_OEMID_LEN 8 #define MPCH_PRODID "Hypervisor " #define MPCH_PRODID_LEN 12 /* Processor entry defines */ #define MPEP_SIG_FAMILY 6 /* XXX bhyve should supply this */ #define MPEP_SIG_MODEL 26 #define MPEP_SIG_STEPPING 5 #define MPEP_SIG \ ((MPEP_SIG_FAMILY << 8) | \ (MPEP_SIG_MODEL << 4) | \ (MPEP_SIG_STEPPING)) #define MPEP_FEATURES (0xBFEBFBFF) /* XXX Intel i7 */ +/* Number of local intr entries */ +#define MPEII_NUM_LOCAL_IRQ 2 + /* Number of i/o intr entries */ #define MPEII_MAX_IRQ 24 /* Bus entry defines */ #define MPE_NUM_BUSES 2 #define MPE_BUSNAME_LEN 6 #define MPE_BUSNAME_ISA "ISA " #define MPE_BUSNAME_PCI "PCI " static void *oem_tbl_start; static int oem_tbl_size; static uint8_t mpt_compute_checksum(void *base, size_t len) { uint8_t *bytes; uint8_t sum; for(bytes = base, sum = 0; len > 0; len--) { sum += *bytes++; } return (256 - sum); } static void mpt_build_mpfp(mpfps_t mpfp, vm_paddr_t gpa) { memset(mpfp, 0, sizeof(*mpfp)); memcpy(mpfp->signature, MPFP_SIG, 4); mpfp->pap = gpa + sizeof(*mpfp); mpfp->length = 1; mpfp->spec_rev = MP_SPECREV; mpfp->checksum = mpt_compute_checksum(mpfp, sizeof(*mpfp)); } static void mpt_build_mpch(mpcth_t mpch) { memset(mpch, 0, sizeof(*mpch)); memcpy(mpch->signature, MPCH_SIG, 4); mpch->spec_rev = MP_SPECREV; memcpy(mpch->oem_id, MPCH_OEMID, MPCH_OEMID_LEN); memcpy(mpch->product_id, MPCH_PRODID, MPCH_PRODID_LEN); mpch->apic_address = LAPIC_PADDR; } static void mpt_build_proc_entries(proc_entry_ptr mpep, int ncpu) { int i; for (i = 0; i < ncpu; i++) { memset(mpep, 0, sizeof(*mpep)); mpep->type = MPCT_ENTRY_PROCESSOR; mpep->apic_id = i; // XXX mpep->apic_version = LAPIC_VERSION; mpep->cpu_flags = PROCENTRY_FLAG_EN; if (i == 0) mpep->cpu_flags |= PROCENTRY_FLAG_BP; mpep->cpu_signature = MPEP_SIG; mpep->feature_flags = MPEP_FEATURES; mpep++; } } +static void +mpt_build_localint_entries(int_entry_ptr mpie) +{ + + /* Hardcode LINT0 as ExtINT on all CPUs. */ + memset(mpie, 0, sizeof(*mpie)); + mpie->type = MPCT_ENTRY_LOCAL_INT; + mpie->int_type = INTENTRY_TYPE_EXTINT; + mpie->int_flags = INTENTRY_FLAGS_POLARITY_CONFORM | + INTENTRY_FLAGS_TRIGGER_CONFORM; + mpie->dst_apic_id = 0xff; + mpie->dst_apic_int = 0; + mpie++; + + /* Hardcode LINT1 as NMI on all CPUs. */ + memset(mpie, 0, sizeof(*mpie)); + mpie->type = MPCT_ENTRY_LOCAL_INT; + mpie->int_type = INTENTRY_TYPE_NMI; + mpie->int_flags = INTENTRY_FLAGS_POLARITY_CONFORM | + INTENTRY_FLAGS_TRIGGER_CONFORM; + mpie->dst_apic_id = 0xff; + mpie->dst_apic_int = 1; +} + static void mpt_build_bus_entries(bus_entry_ptr mpeb) { memset(mpeb, 0, sizeof(*mpeb)); mpeb->type = MPCT_ENTRY_BUS; mpeb->bus_id = 0; memcpy(mpeb->bus_type, MPE_BUSNAME_PCI, MPE_BUSNAME_LEN); mpeb++; memset(mpeb, 0, sizeof(*mpeb)); mpeb->type = MPCT_ENTRY_BUS; mpeb->bus_id = 1; memcpy(mpeb->bus_type, MPE_BUSNAME_ISA, MPE_BUSNAME_LEN); } static void mpt_build_ioapic_entries(io_apic_entry_ptr mpei, int id) { memset(mpei, 0, sizeof(*mpei)); mpei->type = MPCT_ENTRY_IOAPIC; mpei->apic_id = id; mpei->apic_version = IOAPIC_VERSION; mpei->apic_flags = IOAPICENTRY_FLAG_EN; mpei->apic_address = IOAPIC_PADDR; } static void mpt_build_ioint_entries(int_entry_ptr mpie, int num_pins, int id) { int pin; /* * The following config is taken from kernel mptable.c * mptable_parse_default_config_ints(...), for now * just use the default config, tweek later if needed. */ /* Run through all 16 pins. */ for (pin = 0; pin < num_pins; pin++) { memset(mpie, 0, sizeof(*mpie)); mpie->type = MPCT_ENTRY_INT; mpie->src_bus_id = 1; mpie->dst_apic_id = id; /* * All default configs route IRQs from bus 0 to the first 16 * pins of the first I/O APIC with an APIC ID of 2. */ mpie->dst_apic_int = pin; switch (pin) { case 0: /* Pin 0 is an ExtINT pin. */ mpie->int_type = INTENTRY_TYPE_EXTINT; break; case 2: /* IRQ 0 is routed to pin 2. */ mpie->int_type = INTENTRY_TYPE_INT; mpie->src_bus_irq = 0; break; case 5: case 10: case 11: /* * PCI Irqs set to level triggered. */ mpie->int_flags = INTENTRY_FLAGS_TRIGGER_LEVEL; mpie->src_bus_id = 0; /* fall through.. */ default: /* All other pins are identity mapped. */ mpie->int_type = INTENTRY_TYPE_INT; mpie->src_bus_irq = pin; break; } mpie++; } } void mptable_add_oemtbl(void *tbl, int tblsz) { oem_tbl_start = tbl; oem_tbl_size = tblsz; } int mptable_build(struct vmctx *ctx, int ncpu) { mpcth_t mpch; bus_entry_ptr mpeb; io_apic_entry_ptr mpei; proc_entry_ptr mpep; mpfps_t mpfp; int_entry_ptr mpie; char *curraddr; char *startaddr; startaddr = paddr_guest2host(ctx, MPTABLE_BASE, MPTABLE_MAX_LENGTH); if (startaddr == NULL) { printf("mptable requires mapped mem\n"); return (ENOMEM); } curraddr = startaddr; mpfp = (mpfps_t)curraddr; mpt_build_mpfp(mpfp, MPTABLE_BASE); curraddr += sizeof(*mpfp); mpch = (mpcth_t)curraddr; mpt_build_mpch(mpch); curraddr += sizeof(*mpch); mpep = (proc_entry_ptr)curraddr; mpt_build_proc_entries(mpep, ncpu); curraddr += sizeof(*mpep) * ncpu; mpch->entry_count += ncpu; mpeb = (bus_entry_ptr) curraddr; mpt_build_bus_entries(mpeb); curraddr += sizeof(*mpeb) * MPE_NUM_BUSES; mpch->entry_count += MPE_NUM_BUSES; mpei = (io_apic_entry_ptr)curraddr; mpt_build_ioapic_entries(mpei, 0); curraddr += sizeof(*mpei); mpch->entry_count++; mpie = (int_entry_ptr) curraddr; mpt_build_ioint_entries(mpie, MPEII_MAX_IRQ, 0); curraddr += sizeof(*mpie) * MPEII_MAX_IRQ; mpch->entry_count += MPEII_MAX_IRQ; + mpie = (int_entry_ptr)curraddr; + mpt_build_localint_entries(mpie); + curraddr += sizeof(*mpie) * MPEII_NUM_LOCAL_IRQ; + mpch->entry_count += MPEII_NUM_LOCAL_IRQ; + if (oem_tbl_start) { mpch->oem_table_pointer = curraddr - startaddr + MPTABLE_BASE; mpch->oem_table_size = oem_tbl_size; memcpy(curraddr, oem_tbl_start, oem_tbl_size); } mpch->base_table_length = curraddr - (char *)mpch; mpch->checksum = mpt_compute_checksum(mpch, mpch->base_table_length); return (0); } diff --git a/usr.sbin/bhyvectl/bhyvectl.c b/usr.sbin/bhyvectl/bhyvectl.c index 1b68776dca2e..c697492230de 100644 --- a/usr.sbin/bhyvectl/bhyvectl.c +++ b/usr.sbin/bhyvectl/bhyvectl.c @@ -1,1532 +1,1543 @@ /*- * Copyright (c) 2011 NetApp, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "intel/vmcs.h" #define MB (1UL << 20) #define GB (1UL << 30) #define REQ_ARG required_argument #define NO_ARG no_argument #define OPT_ARG optional_argument static const char *progname; static void usage(void) { (void)fprintf(stderr, "Usage: %s --vm=\n" " [--cpu=]\n" " [--create]\n" " [--destroy]\n" " [--get-all]\n" " [--get-stats]\n" " [--set-desc-ds]\n" " [--get-desc-ds]\n" " [--set-desc-es]\n" " [--get-desc-es]\n" " [--set-desc-gs]\n" " [--get-desc-gs]\n" " [--set-desc-fs]\n" " [--get-desc-fs]\n" " [--set-desc-cs]\n" " [--get-desc-cs]\n" " [--set-desc-ss]\n" " [--get-desc-ss]\n" " [--set-desc-tr]\n" " [--get-desc-tr]\n" " [--set-desc-ldtr]\n" " [--get-desc-ldtr]\n" " [--set-desc-gdtr]\n" " [--get-desc-gdtr]\n" " [--set-desc-idtr]\n" " [--get-desc-idtr]\n" " [--run]\n" " [--capname=]\n" " [--getcap]\n" " [--setcap=<0|1>]\n" " [--desc-base=]\n" " [--desc-limit=]\n" " [--desc-access=]\n" " [--set-cr0=]\n" " [--get-cr0]\n" " [--set-cr3=]\n" " [--get-cr3]\n" " [--set-cr4=]\n" " [--get-cr4]\n" " [--set-dr7=]\n" " [--get-dr7]\n" " [--set-rsp=]\n" " [--get-rsp]\n" " [--set-rip=]\n" " [--get-rip]\n" " [--get-rax]\n" " [--set-rax=]\n" " [--get-rbx]\n" " [--get-rcx]\n" " [--get-rdx]\n" " [--get-rsi]\n" " [--get-rdi]\n" " [--get-rbp]\n" " [--get-r8]\n" " [--get-r9]\n" " [--get-r10]\n" " [--get-r11]\n" " [--get-r12]\n" " [--get-r13]\n" " [--get-r14]\n" " [--get-r15]\n" " [--set-rflags=]\n" " [--get-rflags]\n" " [--set-cs]\n" " [--get-cs]\n" " [--set-ds]\n" " [--get-ds]\n" " [--set-es]\n" " [--get-es]\n" " [--set-fs]\n" " [--get-fs]\n" " [--set-gs]\n" " [--get-gs]\n" " [--set-ss]\n" " [--get-ss]\n" " [--get-tr]\n" " [--get-ldtr]\n" " [--get-vmcs-pinbased-ctls]\n" " [--get-vmcs-procbased-ctls]\n" " [--get-vmcs-procbased-ctls2]\n" " [--get-vmcs-entry-interruption-info]\n" " [--set-vmcs-entry-interruption-info=]\n" " [--get-vmcs-eptp]\n" " [--get-vmcs-guest-physical-address\n" " [--get-vmcs-guest-linear-address\n" " [--set-vmcs-exception-bitmap]\n" " [--get-vmcs-exception-bitmap]\n" " [--get-vmcs-io-bitmap-address]\n" " [--get-vmcs-tsc-offset]\n" " [--get-vmcs-guest-pat]\n" " [--get-vmcs-host-pat]\n" " [--get-vmcs-host-cr0]\n" " [--get-vmcs-host-cr3]\n" " [--get-vmcs-host-cr4]\n" " [--get-vmcs-host-rip]\n" " [--get-vmcs-host-rsp]\n" " [--get-vmcs-cr0-mask]\n" " [--get-vmcs-cr0-shadow]\n" " [--get-vmcs-cr4-mask]\n" " [--get-vmcs-cr4-shadow]\n" " [--get-vmcs-cr3-targets]\n" " [--get-vmcs-apic-access-address]\n" " [--get-vmcs-virtual-apic-address]\n" " [--get-vmcs-tpr-threshold]\n" " [--get-vmcs-msr-bitmap]\n" " [--get-vmcs-msr-bitmap-address]\n" " [--get-vmcs-vpid]\n" " [--get-vmcs-ple-gap]\n" " [--get-vmcs-ple-window]\n" " [--get-vmcs-instruction-error]\n" " [--get-vmcs-exit-ctls]\n" " [--get-vmcs-entry-ctls]\n" " [--get-vmcs-guest-sysenter]\n" " [--get-vmcs-link]\n" " [--get-vmcs-exit-reason]\n" " [--get-vmcs-exit-qualification]\n" " [--get-vmcs-exit-interruption-info]\n" " [--get-vmcs-exit-interruption-error]\n" " [--get-vmcs-interruptibility]\n" " [--set-x2apic-state=]\n" " [--get-x2apic-state]\n" " [--unassign-pptdev=]\n" " [--set-mem=]\n" " [--get-lowmem]\n" " [--get-highmem]\n" " [--get-gpa-pmap]\n" + " [--assert-lapic-lvt=]\n" " [--inject-nmi]\n", progname); exit(1); } static int get_stats, getcap, setcap, capval, get_gpa_pmap; -static int inject_nmi; +static int inject_nmi, assert_lapic_lvt; static const char *capname; static int create, destroy, get_lowmem, get_highmem; static uint64_t memsize; static int set_cr0, get_cr0, set_cr3, get_cr3, set_cr4, get_cr4; static int set_efer, get_efer; static int set_dr7, get_dr7; static int set_rsp, get_rsp, set_rip, get_rip, set_rflags, get_rflags; static int set_rax, get_rax; static int get_rbx, get_rcx, get_rdx, get_rsi, get_rdi, get_rbp; static int get_r8, get_r9, get_r10, get_r11, get_r12, get_r13, get_r14, get_r15; static int set_desc_ds, get_desc_ds; static int set_desc_es, get_desc_es; static int set_desc_fs, get_desc_fs; static int set_desc_gs, get_desc_gs; static int set_desc_cs, get_desc_cs; static int set_desc_ss, get_desc_ss; static int set_desc_gdtr, get_desc_gdtr; static int set_desc_idtr, get_desc_idtr; static int set_desc_tr, get_desc_tr; static int set_desc_ldtr, get_desc_ldtr; static int set_cs, set_ds, set_es, set_fs, set_gs, set_ss, set_tr, set_ldtr; static int get_cs, get_ds, get_es, get_fs, get_gs, get_ss, get_tr, get_ldtr; static int set_x2apic_state, get_x2apic_state; enum x2apic_state x2apic_state; static int unassign_pptdev, bus, slot, func; static int run; /* * VMCS-specific fields */ static int get_pinbased_ctls, get_procbased_ctls, get_procbased_ctls2; static int get_eptp, get_io_bitmap, get_tsc_offset; static int get_vmcs_entry_interruption_info, set_vmcs_entry_interruption_info; static int get_vmcs_interruptibility; uint32_t vmcs_entry_interruption_info; static int get_vmcs_gpa, get_vmcs_gla; static int get_exception_bitmap, set_exception_bitmap, exception_bitmap; static int get_cr0_mask, get_cr0_shadow; static int get_cr4_mask, get_cr4_shadow; static int get_cr3_targets; static int get_apic_access_addr, get_virtual_apic_addr, get_tpr_threshold; static int get_msr_bitmap, get_msr_bitmap_address; static int get_vpid, get_ple_gap, get_ple_window; static int get_inst_err, get_exit_ctls, get_entry_ctls; static int get_host_cr0, get_host_cr3, get_host_cr4; static int get_host_rip, get_host_rsp; static int get_guest_pat, get_host_pat; static int get_guest_sysenter, get_vmcs_link; static int get_vmcs_exit_reason, get_vmcs_exit_qualification; static int get_vmcs_exit_interruption_info, get_vmcs_exit_interruption_error; static uint64_t desc_base; static uint32_t desc_limit, desc_access; static int get_all; static void dump_vm_run_exitcode(struct vm_exit *vmexit, int vcpu) { printf("vm exit[%d]\n", vcpu); printf("\trip\t\t0x%016lx\n", vmexit->rip); printf("\tinst_length\t%d\n", vmexit->inst_length); switch (vmexit->exitcode) { case VM_EXITCODE_INOUT: printf("\treason\t\tINOUT\n"); printf("\tdirection\t%s\n", vmexit->u.inout.in ? "IN" : "OUT"); printf("\tbytes\t\t%d\n", vmexit->u.inout.bytes); printf("\tflags\t\t%s%s\n", vmexit->u.inout.string ? "STRING " : "", vmexit->u.inout.rep ? "REP " : ""); printf("\tport\t\t0x%04x\n", vmexit->u.inout.port); printf("\teax\t\t0x%08x\n", vmexit->u.inout.eax); break; case VM_EXITCODE_VMX: printf("\treason\t\tVMX\n"); printf("\terror\t\t%d\n", vmexit->u.vmx.error); printf("\texit_reason\t0x%08x (%u)\n", vmexit->u.vmx.exit_reason, vmexit->u.vmx.exit_reason); printf("\tqualification\t0x%016lx\n", vmexit->u.vmx.exit_qualification); break; default: printf("*** unknown vm run exitcode %d\n", vmexit->exitcode); break; } } static int dump_vmcs_msr_bitmap(int vcpu, u_long addr) { int error, fd, byte, bit, readable, writeable; u_int msr; const char *bitmap; error = -1; bitmap = MAP_FAILED; fd = open("/dev/mem", O_RDONLY, 0); if (fd < 0) goto done; bitmap = mmap(NULL, PAGE_SIZE, PROT_READ, 0, fd, addr); if (bitmap == MAP_FAILED) goto done; for (msr = 0; msr < 0x2000; msr++) { byte = msr / 8; bit = msr & 0x7; /* Look at MSRs in the range 0x00000000 to 0x00001FFF */ readable = (bitmap[byte] & (1 << bit)) ? 0 : 1; writeable = (bitmap[2048 + byte] & (1 << bit)) ? 0 : 1; if (readable || writeable) { printf("msr 0x%08x[%d]\t\t%c%c\n", msr, vcpu, readable ? 'R' : '-', writeable ? 'W' : '-'); } /* Look at MSRs in the range 0xC0000000 to 0xC0001FFF */ byte += 1024; readable = (bitmap[byte] & (1 << bit)) ? 0 : 1; writeable = (bitmap[2048 + byte] & (1 << bit)) ? 0 : 1; if (readable || writeable) { printf("msr 0x%08x[%d]\t\t%c%c\n", 0xc0000000 + msr, vcpu, readable ? 'R' : '-', writeable ? 'W' : '-'); } } error = 0; done: if (bitmap != MAP_FAILED) munmap((void *)bitmap, PAGE_SIZE); if (fd >= 0) close(fd); return (error); } static int vm_get_vmcs_field(struct vmctx *ctx, int vcpu, int field, uint64_t *ret_val) { return (vm_get_register(ctx, vcpu, VMCS_IDENT(field), ret_val)); } static int vm_set_vmcs_field(struct vmctx *ctx, int vcpu, int field, uint64_t val) { return (vm_set_register(ctx, vcpu, VMCS_IDENT(field), val)); } enum { VMNAME = 1000, /* avoid collision with return values from getopt */ VCPU, SET_MEM, SET_EFER, SET_CR0, SET_CR3, SET_CR4, SET_DR7, SET_RSP, SET_RIP, SET_RAX, SET_RFLAGS, DESC_BASE, DESC_LIMIT, DESC_ACCESS, SET_CS, SET_DS, SET_ES, SET_FS, SET_GS, SET_SS, SET_TR, SET_LDTR, SET_X2APIC_STATE, SET_VMCS_EXCEPTION_BITMAP, SET_VMCS_ENTRY_INTERRUPTION_INFO, SET_CAP, CAPNAME, UNASSIGN_PPTDEV, GET_GPA_PMAP, + ASSERT_LAPIC_LVT, }; int main(int argc, char *argv[]) { char *vmname; int error, ch, vcpu, ptenum; vm_paddr_t gpa, gpa_pmap; size_t len; struct vm_exit vmexit; uint64_t ctl, eptp, bm, addr, u64, pteval[4], *pte; struct vmctx *ctx; int wired; uint64_t cr0, cr3, cr4, dr7, rsp, rip, rflags, efer, pat; uint64_t rax, rbx, rcx, rdx, rsi, rdi, rbp; uint64_t r8, r9, r10, r11, r12, r13, r14, r15; uint64_t cs, ds, es, fs, gs, ss, tr, ldtr; struct option opts[] = { { "vm", REQ_ARG, 0, VMNAME }, { "cpu", REQ_ARG, 0, VCPU }, { "set-mem", REQ_ARG, 0, SET_MEM }, { "set-efer", REQ_ARG, 0, SET_EFER }, { "set-cr0", REQ_ARG, 0, SET_CR0 }, { "set-cr3", REQ_ARG, 0, SET_CR3 }, { "set-cr4", REQ_ARG, 0, SET_CR4 }, { "set-dr7", REQ_ARG, 0, SET_DR7 }, { "set-rsp", REQ_ARG, 0, SET_RSP }, { "set-rip", REQ_ARG, 0, SET_RIP }, { "set-rax", REQ_ARG, 0, SET_RAX }, { "set-rflags", REQ_ARG, 0, SET_RFLAGS }, { "desc-base", REQ_ARG, 0, DESC_BASE }, { "desc-limit", REQ_ARG, 0, DESC_LIMIT }, { "desc-access",REQ_ARG, 0, DESC_ACCESS }, { "set-cs", REQ_ARG, 0, SET_CS }, { "set-ds", REQ_ARG, 0, SET_DS }, { "set-es", REQ_ARG, 0, SET_ES }, { "set-fs", REQ_ARG, 0, SET_FS }, { "set-gs", REQ_ARG, 0, SET_GS }, { "set-ss", REQ_ARG, 0, SET_SS }, { "set-tr", REQ_ARG, 0, SET_TR }, { "set-ldtr", REQ_ARG, 0, SET_LDTR }, { "set-x2apic-state",REQ_ARG, 0, SET_X2APIC_STATE }, { "set-vmcs-exception-bitmap", REQ_ARG, 0, SET_VMCS_EXCEPTION_BITMAP }, { "set-vmcs-entry-interruption-info", REQ_ARG, 0, SET_VMCS_ENTRY_INTERRUPTION_INFO }, { "capname", REQ_ARG, 0, CAPNAME }, { "unassign-pptdev", REQ_ARG, 0, UNASSIGN_PPTDEV }, { "setcap", REQ_ARG, 0, SET_CAP }, { "get-gpa-pmap", REQ_ARG, 0, GET_GPA_PMAP }, + { "assert-lapic-lvt", REQ_ARG, 0, ASSERT_LAPIC_LVT }, { "getcap", NO_ARG, &getcap, 1 }, { "get-stats", NO_ARG, &get_stats, 1 }, { "get-desc-ds",NO_ARG, &get_desc_ds, 1 }, { "set-desc-ds",NO_ARG, &set_desc_ds, 1 }, { "get-desc-es",NO_ARG, &get_desc_es, 1 }, { "set-desc-es",NO_ARG, &set_desc_es, 1 }, { "get-desc-ss",NO_ARG, &get_desc_ss, 1 }, { "set-desc-ss",NO_ARG, &set_desc_ss, 1 }, { "get-desc-cs",NO_ARG, &get_desc_cs, 1 }, { "set-desc-cs",NO_ARG, &set_desc_cs, 1 }, { "get-desc-fs",NO_ARG, &get_desc_fs, 1 }, { "set-desc-fs",NO_ARG, &set_desc_fs, 1 }, { "get-desc-gs",NO_ARG, &get_desc_gs, 1 }, { "set-desc-gs",NO_ARG, &set_desc_gs, 1 }, { "get-desc-tr",NO_ARG, &get_desc_tr, 1 }, { "set-desc-tr",NO_ARG, &set_desc_tr, 1 }, { "set-desc-ldtr", NO_ARG, &set_desc_ldtr, 1 }, { "get-desc-ldtr", NO_ARG, &get_desc_ldtr, 1 }, { "set-desc-gdtr", NO_ARG, &set_desc_gdtr, 1 }, { "get-desc-gdtr", NO_ARG, &get_desc_gdtr, 1 }, { "set-desc-idtr", NO_ARG, &set_desc_idtr, 1 }, { "get-desc-idtr", NO_ARG, &get_desc_idtr, 1 }, { "get-lowmem", NO_ARG, &get_lowmem, 1 }, { "get-highmem",NO_ARG, &get_highmem, 1 }, { "get-efer", NO_ARG, &get_efer, 1 }, { "get-cr0", NO_ARG, &get_cr0, 1 }, { "get-cr3", NO_ARG, &get_cr3, 1 }, { "get-cr4", NO_ARG, &get_cr4, 1 }, { "get-dr7", NO_ARG, &get_dr7, 1 }, { "get-rsp", NO_ARG, &get_rsp, 1 }, { "get-rip", NO_ARG, &get_rip, 1 }, { "get-rax", NO_ARG, &get_rax, 1 }, { "get-rbx", NO_ARG, &get_rbx, 1 }, { "get-rcx", NO_ARG, &get_rcx, 1 }, { "get-rdx", NO_ARG, &get_rdx, 1 }, { "get-rsi", NO_ARG, &get_rsi, 1 }, { "get-rdi", NO_ARG, &get_rdi, 1 }, { "get-rbp", NO_ARG, &get_rbp, 1 }, { "get-r8", NO_ARG, &get_r8, 1 }, { "get-r9", NO_ARG, &get_r9, 1 }, { "get-r10", NO_ARG, &get_r10, 1 }, { "get-r11", NO_ARG, &get_r11, 1 }, { "get-r12", NO_ARG, &get_r12, 1 }, { "get-r13", NO_ARG, &get_r13, 1 }, { "get-r14", NO_ARG, &get_r14, 1 }, { "get-r15", NO_ARG, &get_r15, 1 }, { "get-rflags", NO_ARG, &get_rflags, 1 }, { "get-cs", NO_ARG, &get_cs, 1 }, { "get-ds", NO_ARG, &get_ds, 1 }, { "get-es", NO_ARG, &get_es, 1 }, { "get-fs", NO_ARG, &get_fs, 1 }, { "get-gs", NO_ARG, &get_gs, 1 }, { "get-ss", NO_ARG, &get_ss, 1 }, { "get-tr", NO_ARG, &get_tr, 1 }, { "get-ldtr", NO_ARG, &get_ldtr, 1 }, { "get-vmcs-pinbased-ctls", NO_ARG, &get_pinbased_ctls, 1 }, { "get-vmcs-procbased-ctls", NO_ARG, &get_procbased_ctls, 1 }, { "get-vmcs-procbased-ctls2", NO_ARG, &get_procbased_ctls2, 1 }, { "get-vmcs-guest-linear-address", NO_ARG, &get_vmcs_gla, 1 }, { "get-vmcs-guest-physical-address", NO_ARG, &get_vmcs_gpa, 1 }, { "get-vmcs-entry-interruption-info", NO_ARG, &get_vmcs_entry_interruption_info, 1}, { "get-vmcs-eptp", NO_ARG, &get_eptp, 1 }, { "get-vmcs-exception-bitmap", NO_ARG, &get_exception_bitmap, 1 }, { "get-vmcs-io-bitmap-address", NO_ARG, &get_io_bitmap, 1 }, { "get-vmcs-tsc-offset", NO_ARG,&get_tsc_offset, 1 }, { "get-vmcs-cr0-mask", NO_ARG, &get_cr0_mask, 1 }, { "get-vmcs-cr0-shadow", NO_ARG,&get_cr0_shadow, 1 }, { "get-vmcs-cr4-mask", NO_ARG, &get_cr4_mask, 1 }, { "get-vmcs-cr4-shadow", NO_ARG,&get_cr4_shadow, 1 }, { "get-vmcs-cr3-targets", NO_ARG, &get_cr3_targets, 1}, { "get-vmcs-apic-access-address", NO_ARG, &get_apic_access_addr, 1}, { "get-vmcs-virtual-apic-address", NO_ARG, &get_virtual_apic_addr, 1}, { "get-vmcs-tpr-threshold", NO_ARG, &get_tpr_threshold, 1 }, { "get-vmcs-msr-bitmap", NO_ARG, &get_msr_bitmap, 1 }, { "get-vmcs-msr-bitmap-address", NO_ARG, &get_msr_bitmap_address, 1 }, { "get-vmcs-vpid", NO_ARG, &get_vpid, 1 }, { "get-vmcs-ple-gap", NO_ARG, &get_ple_gap, 1 }, { "get-vmcs-ple-window", NO_ARG,&get_ple_window,1 }, { "get-vmcs-instruction-error", NO_ARG, &get_inst_err, 1 }, { "get-vmcs-exit-ctls", NO_ARG, &get_exit_ctls, 1 }, { "get-vmcs-entry-ctls", NO_ARG, &get_entry_ctls, 1 }, { "get-vmcs-guest-pat", NO_ARG, &get_guest_pat, 1 }, { "get-vmcs-host-pat", NO_ARG, &get_host_pat, 1 }, { "get-vmcs-host-cr0", NO_ARG, &get_host_cr0, 1 }, { "get-vmcs-host-cr3", NO_ARG, &get_host_cr3, 1 }, { "get-vmcs-host-cr4", NO_ARG, &get_host_cr4, 1 }, { "get-vmcs-host-rip", NO_ARG, &get_host_rip, 1 }, { "get-vmcs-host-rsp", NO_ARG, &get_host_rsp, 1 }, { "get-vmcs-guest-sysenter", NO_ARG, &get_guest_sysenter, 1 }, { "get-vmcs-link", NO_ARG, &get_vmcs_link, 1 }, { "get-vmcs-exit-reason", NO_ARG, &get_vmcs_exit_reason, 1 }, { "get-vmcs-exit-qualification", NO_ARG, &get_vmcs_exit_qualification, 1 }, { "get-vmcs-exit-interruption-info", NO_ARG, &get_vmcs_exit_interruption_info, 1}, { "get-vmcs-exit-interruption-error", NO_ARG, &get_vmcs_exit_interruption_error, 1}, { "get-vmcs-interruptibility", NO_ARG, &get_vmcs_interruptibility, 1 }, { "get-x2apic-state",NO_ARG, &get_x2apic_state, 1 }, { "get-all", NO_ARG, &get_all, 1 }, { "run", NO_ARG, &run, 1 }, { "create", NO_ARG, &create, 1 }, { "destroy", NO_ARG, &destroy, 1 }, { "inject-nmi", NO_ARG, &inject_nmi, 1 }, { NULL, 0, NULL, 0 } }; vcpu = 0; + assert_lapic_lvt = -1; progname = basename(argv[0]); while ((ch = getopt_long(argc, argv, "", opts, NULL)) != -1) { switch (ch) { case 0: break; case VMNAME: vmname = optarg; break; case VCPU: vcpu = atoi(optarg); break; case SET_MEM: memsize = atoi(optarg) * MB; memsize = roundup(memsize, 2 * MB); break; case SET_EFER: efer = strtoul(optarg, NULL, 0); set_efer = 1; break; case SET_CR0: cr0 = strtoul(optarg, NULL, 0); set_cr0 = 1; break; case SET_CR3: cr3 = strtoul(optarg, NULL, 0); set_cr3 = 1; break; case SET_CR4: cr4 = strtoul(optarg, NULL, 0); set_cr4 = 1; break; case SET_DR7: dr7 = strtoul(optarg, NULL, 0); set_dr7 = 1; break; case SET_RSP: rsp = strtoul(optarg, NULL, 0); set_rsp = 1; break; case SET_RIP: rip = strtoul(optarg, NULL, 0); set_rip = 1; break; case SET_RAX: rax = strtoul(optarg, NULL, 0); set_rax = 1; break; case SET_RFLAGS: rflags = strtoul(optarg, NULL, 0); set_rflags = 1; break; case DESC_BASE: desc_base = strtoul(optarg, NULL, 0); break; case DESC_LIMIT: desc_limit = strtoul(optarg, NULL, 0); break; case DESC_ACCESS: desc_access = strtoul(optarg, NULL, 0); break; case SET_CS: cs = strtoul(optarg, NULL, 0); set_cs = 1; break; case SET_DS: ds = strtoul(optarg, NULL, 0); set_ds = 1; break; case SET_ES: es = strtoul(optarg, NULL, 0); set_es = 1; break; case SET_FS: fs = strtoul(optarg, NULL, 0); set_fs = 1; break; case SET_GS: gs = strtoul(optarg, NULL, 0); set_gs = 1; break; case SET_SS: ss = strtoul(optarg, NULL, 0); set_ss = 1; break; case SET_TR: tr = strtoul(optarg, NULL, 0); set_tr = 1; break; case SET_LDTR: ldtr = strtoul(optarg, NULL, 0); set_ldtr = 1; break; case SET_X2APIC_STATE: x2apic_state = strtol(optarg, NULL, 0); set_x2apic_state = 1; break; case SET_VMCS_EXCEPTION_BITMAP: exception_bitmap = strtoul(optarg, NULL, 0); set_exception_bitmap = 1; break; case SET_VMCS_ENTRY_INTERRUPTION_INFO: vmcs_entry_interruption_info = strtoul(optarg, NULL, 0); set_vmcs_entry_interruption_info = 1; break; case SET_CAP: capval = strtoul(optarg, NULL, 0); setcap = 1; break; case GET_GPA_PMAP: gpa_pmap = strtoul(optarg, NULL, 0); get_gpa_pmap = 1; break; case CAPNAME: capname = optarg; break; case UNASSIGN_PPTDEV: unassign_pptdev = 1; if (sscanf(optarg, "%d/%d/%d", &bus, &slot, &func) != 3) usage(); break; + case ASSERT_LAPIC_LVT: + assert_lapic_lvt = atoi(optarg); + break; default: usage(); } } argc -= optind; argv += optind; if (vmname == NULL) usage(); error = 0; if (!error && create) error = vm_create(vmname); if (!error) { ctx = vm_open(vmname); if (ctx == NULL) error = -1; } if (!error && memsize) error = vm_setup_memory(ctx, memsize, VM_MMAP_NONE); if (!error && set_efer) error = vm_set_register(ctx, vcpu, VM_REG_GUEST_EFER, efer); if (!error && set_cr0) error = vm_set_register(ctx, vcpu, VM_REG_GUEST_CR0, cr0); if (!error && set_cr3) error = vm_set_register(ctx, vcpu, VM_REG_GUEST_CR3, cr3); if (!error && set_cr4) error = vm_set_register(ctx, vcpu, VM_REG_GUEST_CR4, cr4); if (!error && set_dr7) error = vm_set_register(ctx, vcpu, VM_REG_GUEST_DR7, dr7); if (!error && set_rsp) error = vm_set_register(ctx, vcpu, VM_REG_GUEST_RSP, rsp); if (!error && set_rip) error = vm_set_register(ctx, vcpu, VM_REG_GUEST_RIP, rip); if (!error && set_rax) error = vm_set_register(ctx, vcpu, VM_REG_GUEST_RAX, rax); if (!error && set_rflags) { error = vm_set_register(ctx, vcpu, VM_REG_GUEST_RFLAGS, rflags); } if (!error && set_desc_ds) { error = vm_set_desc(ctx, vcpu, VM_REG_GUEST_DS, desc_base, desc_limit, desc_access); } if (!error && set_desc_es) { error = vm_set_desc(ctx, vcpu, VM_REG_GUEST_ES, desc_base, desc_limit, desc_access); } if (!error && set_desc_ss) { error = vm_set_desc(ctx, vcpu, VM_REG_GUEST_SS, desc_base, desc_limit, desc_access); } if (!error && set_desc_cs) { error = vm_set_desc(ctx, vcpu, VM_REG_GUEST_CS, desc_base, desc_limit, desc_access); } if (!error && set_desc_fs) { error = vm_set_desc(ctx, vcpu, VM_REG_GUEST_FS, desc_base, desc_limit, desc_access); } if (!error && set_desc_gs) { error = vm_set_desc(ctx, vcpu, VM_REG_GUEST_GS, desc_base, desc_limit, desc_access); } if (!error && set_desc_tr) { error = vm_set_desc(ctx, vcpu, VM_REG_GUEST_TR, desc_base, desc_limit, desc_access); } if (!error && set_desc_ldtr) { error = vm_set_desc(ctx, vcpu, VM_REG_GUEST_LDTR, desc_base, desc_limit, desc_access); } if (!error && set_desc_gdtr) { error = vm_set_desc(ctx, vcpu, VM_REG_GUEST_GDTR, desc_base, desc_limit, 0); } if (!error && set_desc_idtr) { error = vm_set_desc(ctx, vcpu, VM_REG_GUEST_IDTR, desc_base, desc_limit, 0); } if (!error && set_cs) error = vm_set_register(ctx, vcpu, VM_REG_GUEST_CS, cs); if (!error && set_ds) error = vm_set_register(ctx, vcpu, VM_REG_GUEST_DS, ds); if (!error && set_es) error = vm_set_register(ctx, vcpu, VM_REG_GUEST_ES, es); if (!error && set_fs) error = vm_set_register(ctx, vcpu, VM_REG_GUEST_FS, fs); if (!error && set_gs) error = vm_set_register(ctx, vcpu, VM_REG_GUEST_GS, gs); if (!error && set_ss) error = vm_set_register(ctx, vcpu, VM_REG_GUEST_SS, ss); if (!error && set_tr) error = vm_set_register(ctx, vcpu, VM_REG_GUEST_TR, tr); if (!error && set_ldtr) error = vm_set_register(ctx, vcpu, VM_REG_GUEST_LDTR, ldtr); if (!error && set_x2apic_state) error = vm_set_x2apic_state(ctx, vcpu, x2apic_state); if (!error && unassign_pptdev) error = vm_unassign_pptdev(ctx, bus, slot, func); if (!error && set_exception_bitmap) { error = vm_set_vmcs_field(ctx, vcpu, VMCS_EXCEPTION_BITMAP, exception_bitmap); } if (!error && set_vmcs_entry_interruption_info) { error = vm_set_vmcs_field(ctx, vcpu, VMCS_ENTRY_INTR_INFO, vmcs_entry_interruption_info); } if (!error && inject_nmi) { error = vm_inject_nmi(ctx, vcpu); } + if (!error && assert_lapic_lvt != -1) { + error = vm_lapic_local_irq(ctx, vcpu, assert_lapic_lvt); + } + if (!error && (get_lowmem || get_all)) { gpa = 0; error = vm_get_memory_seg(ctx, gpa, &len, &wired); if (error == 0) printf("lowmem\t\t0x%016lx/%ld%s\n", gpa, len, wired ? " wired" : ""); } if (!error && (get_highmem || get_all)) { gpa = 4 * GB; error = vm_get_memory_seg(ctx, gpa, &len, &wired); if (error == 0) printf("highmem\t\t0x%016lx/%ld%s\n", gpa, len, wired ? " wired" : ""); } if (!error && (get_efer || get_all)) { error = vm_get_register(ctx, vcpu, VM_REG_GUEST_EFER, &efer); if (error == 0) printf("efer[%d]\t\t0x%016lx\n", vcpu, efer); } if (!error && (get_cr0 || get_all)) { error = vm_get_register(ctx, vcpu, VM_REG_GUEST_CR0, &cr0); if (error == 0) printf("cr0[%d]\t\t0x%016lx\n", vcpu, cr0); } if (!error && (get_cr3 || get_all)) { error = vm_get_register(ctx, vcpu, VM_REG_GUEST_CR3, &cr3); if (error == 0) printf("cr3[%d]\t\t0x%016lx\n", vcpu, cr3); } if (!error && (get_cr4 || get_all)) { error = vm_get_register(ctx, vcpu, VM_REG_GUEST_CR4, &cr4); if (error == 0) printf("cr4[%d]\t\t0x%016lx\n", vcpu, cr4); } if (!error && (get_dr7 || get_all)) { error = vm_get_register(ctx, vcpu, VM_REG_GUEST_DR7, &dr7); if (error == 0) printf("dr7[%d]\t\t0x%016lx\n", vcpu, dr7); } if (!error && (get_rsp || get_all)) { error = vm_get_register(ctx, vcpu, VM_REG_GUEST_RSP, &rsp); if (error == 0) printf("rsp[%d]\t\t0x%016lx\n", vcpu, rsp); } if (!error && (get_rip || get_all)) { error = vm_get_register(ctx, vcpu, VM_REG_GUEST_RIP, &rip); if (error == 0) printf("rip[%d]\t\t0x%016lx\n", vcpu, rip); } if (!error && (get_rax || get_all)) { error = vm_get_register(ctx, vcpu, VM_REG_GUEST_RAX, &rax); if (error == 0) printf("rax[%d]\t\t0x%016lx\n", vcpu, rax); } if (!error && (get_rbx || get_all)) { error = vm_get_register(ctx, vcpu, VM_REG_GUEST_RBX, &rbx); if (error == 0) printf("rbx[%d]\t\t0x%016lx\n", vcpu, rbx); } if (!error && (get_rcx || get_all)) { error = vm_get_register(ctx, vcpu, VM_REG_GUEST_RCX, &rcx); if (error == 0) printf("rcx[%d]\t\t0x%016lx\n", vcpu, rcx); } if (!error && (get_rdx || get_all)) { error = vm_get_register(ctx, vcpu, VM_REG_GUEST_RDX, &rdx); if (error == 0) printf("rdx[%d]\t\t0x%016lx\n", vcpu, rdx); } if (!error && (get_rsi || get_all)) { error = vm_get_register(ctx, vcpu, VM_REG_GUEST_RSI, &rsi); if (error == 0) printf("rsi[%d]\t\t0x%016lx\n", vcpu, rsi); } if (!error && (get_rdi || get_all)) { error = vm_get_register(ctx, vcpu, VM_REG_GUEST_RDI, &rdi); if (error == 0) printf("rdi[%d]\t\t0x%016lx\n", vcpu, rdi); } if (!error && (get_rbp || get_all)) { error = vm_get_register(ctx, vcpu, VM_REG_GUEST_RBP, &rbp); if (error == 0) printf("rbp[%d]\t\t0x%016lx\n", vcpu, rbp); } if (!error && (get_r8 || get_all)) { error = vm_get_register(ctx, vcpu, VM_REG_GUEST_R8, &r8); if (error == 0) printf("r8[%d]\t\t0x%016lx\n", vcpu, r8); } if (!error && (get_r9 || get_all)) { error = vm_get_register(ctx, vcpu, VM_REG_GUEST_R9, &r9); if (error == 0) printf("r9[%d]\t\t0x%016lx\n", vcpu, r9); } if (!error && (get_r10 || get_all)) { error = vm_get_register(ctx, vcpu, VM_REG_GUEST_R10, &r10); if (error == 0) printf("r10[%d]\t\t0x%016lx\n", vcpu, r10); } if (!error && (get_r11 || get_all)) { error = vm_get_register(ctx, vcpu, VM_REG_GUEST_R11, &r11); if (error == 0) printf("r11[%d]\t\t0x%016lx\n", vcpu, r11); } if (!error && (get_r12 || get_all)) { error = vm_get_register(ctx, vcpu, VM_REG_GUEST_R12, &r12); if (error == 0) printf("r12[%d]\t\t0x%016lx\n", vcpu, r12); } if (!error && (get_r13 || get_all)) { error = vm_get_register(ctx, vcpu, VM_REG_GUEST_R13, &r13); if (error == 0) printf("r13[%d]\t\t0x%016lx\n", vcpu, r13); } if (!error && (get_r14 || get_all)) { error = vm_get_register(ctx, vcpu, VM_REG_GUEST_R14, &r14); if (error == 0) printf("r14[%d]\t\t0x%016lx\n", vcpu, r14); } if (!error && (get_r15 || get_all)) { error = vm_get_register(ctx, vcpu, VM_REG_GUEST_R15, &r15); if (error == 0) printf("r15[%d]\t\t0x%016lx\n", vcpu, r15); } if (!error && (get_rflags || get_all)) { error = vm_get_register(ctx, vcpu, VM_REG_GUEST_RFLAGS, &rflags); if (error == 0) printf("rflags[%d]\t0x%016lx\n", vcpu, rflags); } if (!error && (get_stats || get_all)) { int i, num_stats; uint64_t *stats; struct timeval tv; const char *desc; stats = vm_get_stats(ctx, vcpu, &tv, &num_stats); if (stats != NULL) { printf("vcpu%d\n", vcpu); for (i = 0; i < num_stats; i++) { desc = vm_get_stat_desc(ctx, i); printf("%-40s\t%ld\n", desc, stats[i]); } } } if (!error && (get_desc_ds || get_all)) { error = vm_get_desc(ctx, vcpu, VM_REG_GUEST_DS, &desc_base, &desc_limit, &desc_access); if (error == 0) { printf("ds desc[%d]\t0x%016lx/0x%08x/0x%08x\n", vcpu, desc_base, desc_limit, desc_access); } } if (!error && (get_desc_es || get_all)) { error = vm_get_desc(ctx, vcpu, VM_REG_GUEST_ES, &desc_base, &desc_limit, &desc_access); if (error == 0) { printf("es desc[%d]\t0x%016lx/0x%08x/0x%08x\n", vcpu, desc_base, desc_limit, desc_access); } } if (!error && (get_desc_fs || get_all)) { error = vm_get_desc(ctx, vcpu, VM_REG_GUEST_FS, &desc_base, &desc_limit, &desc_access); if (error == 0) { printf("fs desc[%d]\t0x%016lx/0x%08x/0x%08x\n", vcpu, desc_base, desc_limit, desc_access); } } if (!error && (get_desc_gs || get_all)) { error = vm_get_desc(ctx, vcpu, VM_REG_GUEST_GS, &desc_base, &desc_limit, &desc_access); if (error == 0) { printf("gs desc[%d]\t0x%016lx/0x%08x/0x%08x\n", vcpu, desc_base, desc_limit, desc_access); } } if (!error && (get_desc_ss || get_all)) { error = vm_get_desc(ctx, vcpu, VM_REG_GUEST_SS, &desc_base, &desc_limit, &desc_access); if (error == 0) { printf("ss desc[%d]\t0x%016lx/0x%08x/0x%08x\n", vcpu, desc_base, desc_limit, desc_access); } } if (!error && (get_desc_cs || get_all)) { error = vm_get_desc(ctx, vcpu, VM_REG_GUEST_CS, &desc_base, &desc_limit, &desc_access); if (error == 0) { printf("cs desc[%d]\t0x%016lx/0x%08x/0x%08x\n", vcpu, desc_base, desc_limit, desc_access); } } if (!error && (get_desc_tr || get_all)) { error = vm_get_desc(ctx, vcpu, VM_REG_GUEST_TR, &desc_base, &desc_limit, &desc_access); if (error == 0) { printf("tr desc[%d]\t0x%016lx/0x%08x/0x%08x\n", vcpu, desc_base, desc_limit, desc_access); } } if (!error && (get_desc_ldtr || get_all)) { error = vm_get_desc(ctx, vcpu, VM_REG_GUEST_LDTR, &desc_base, &desc_limit, &desc_access); if (error == 0) { printf("ldtr desc[%d]\t0x%016lx/0x%08x/0x%08x\n", vcpu, desc_base, desc_limit, desc_access); } } if (!error && (get_desc_gdtr || get_all)) { error = vm_get_desc(ctx, vcpu, VM_REG_GUEST_GDTR, &desc_base, &desc_limit, &desc_access); if (error == 0) { printf("gdtr[%d]\t\t0x%016lx/0x%08x\n", vcpu, desc_base, desc_limit); } } if (!error && (get_desc_idtr || get_all)) { error = vm_get_desc(ctx, vcpu, VM_REG_GUEST_IDTR, &desc_base, &desc_limit, &desc_access); if (error == 0) { printf("idtr[%d]\t\t0x%016lx/0x%08x\n", vcpu, desc_base, desc_limit); } } if (!error && (get_cs || get_all)) { error = vm_get_register(ctx, vcpu, VM_REG_GUEST_CS, &cs); if (error == 0) printf("cs[%d]\t\t0x%04lx\n", vcpu, cs); } if (!error && (get_ds || get_all)) { error = vm_get_register(ctx, vcpu, VM_REG_GUEST_DS, &ds); if (error == 0) printf("ds[%d]\t\t0x%04lx\n", vcpu, ds); } if (!error && (get_es || get_all)) { error = vm_get_register(ctx, vcpu, VM_REG_GUEST_ES, &es); if (error == 0) printf("es[%d]\t\t0x%04lx\n", vcpu, es); } if (!error && (get_fs || get_all)) { error = vm_get_register(ctx, vcpu, VM_REG_GUEST_FS, &fs); if (error == 0) printf("fs[%d]\t\t0x%04lx\n", vcpu, fs); } if (!error && (get_gs || get_all)) { error = vm_get_register(ctx, vcpu, VM_REG_GUEST_GS, &gs); if (error == 0) printf("gs[%d]\t\t0x%04lx\n", vcpu, gs); } if (!error && (get_ss || get_all)) { error = vm_get_register(ctx, vcpu, VM_REG_GUEST_SS, &ss); if (error == 0) printf("ss[%d]\t\t0x%04lx\n", vcpu, ss); } if (!error && (get_tr || get_all)) { error = vm_get_register(ctx, vcpu, VM_REG_GUEST_TR, &tr); if (error == 0) printf("tr[%d]\t\t0x%04lx\n", vcpu, tr); } if (!error && (get_ldtr || get_all)) { error = vm_get_register(ctx, vcpu, VM_REG_GUEST_LDTR, &ldtr); if (error == 0) printf("ldtr[%d]\t\t0x%04lx\n", vcpu, ldtr); } if (!error && (get_x2apic_state || get_all)) { error = vm_get_x2apic_state(ctx, vcpu, &x2apic_state); if (error == 0) printf("x2apic_state[%d]\t%d\n", vcpu, x2apic_state); } if (!error && (get_pinbased_ctls || get_all)) { error = vm_get_vmcs_field(ctx, vcpu, VMCS_PIN_BASED_CTLS, &ctl); if (error == 0) printf("pinbased_ctls[%d]\t0x%08lx\n", vcpu, ctl); } if (!error && (get_procbased_ctls || get_all)) { error = vm_get_vmcs_field(ctx, vcpu, VMCS_PRI_PROC_BASED_CTLS, &ctl); if (error == 0) printf("procbased_ctls[%d]\t0x%08lx\n", vcpu, ctl); } if (!error && (get_procbased_ctls2 || get_all)) { error = vm_get_vmcs_field(ctx, vcpu, VMCS_SEC_PROC_BASED_CTLS, &ctl); if (error == 0) printf("procbased_ctls2[%d]\t0x%08lx\n", vcpu, ctl); } if (!error && (get_vmcs_gla || get_all)) { error = vm_get_vmcs_field(ctx, vcpu, VMCS_GUEST_LINEAR_ADDRESS, &u64); if (error == 0) printf("gla[%d]\t\t0x%016lx\n", vcpu, u64); } if (!error && (get_vmcs_gpa || get_all)) { error = vm_get_vmcs_field(ctx, vcpu, VMCS_GUEST_PHYSICAL_ADDRESS, &u64); if (error == 0) printf("gpa[%d]\t\t0x%016lx\n", vcpu, u64); } if (!error && (get_vmcs_entry_interruption_info || get_all)) { error = vm_get_vmcs_field(ctx, vcpu, VMCS_ENTRY_INTR_INFO,&u64); if (error == 0) { printf("entry_interruption_info[%d]\t0x%08lx\n", vcpu, u64); } } if (!error && (get_eptp || get_all)) { error = vm_get_vmcs_field(ctx, vcpu, VMCS_EPTP, &eptp); if (error == 0) printf("eptp[%d]\t\t0x%016lx\n", vcpu, eptp); } if (!error && (get_exception_bitmap || get_all)) { error = vm_get_vmcs_field(ctx, vcpu, VMCS_EXCEPTION_BITMAP, &bm); if (error == 0) printf("exception_bitmap[%d]\t0x%08lx\n", vcpu, bm); } if (!error && (get_io_bitmap || get_all)) { error = vm_get_vmcs_field(ctx, vcpu, VMCS_IO_BITMAP_A, &bm); if (error == 0) printf("io_bitmap_a[%d]\t0x%08lx\n", vcpu, bm); error = vm_get_vmcs_field(ctx, vcpu, VMCS_IO_BITMAP_B, &bm); if (error == 0) printf("io_bitmap_b[%d]\t0x%08lx\n", vcpu, bm); } if (!error && (get_tsc_offset || get_all)) { uint64_t tscoff; error = vm_get_vmcs_field(ctx, vcpu, VMCS_TSC_OFFSET, &tscoff); if (error == 0) printf("tsc_offset[%d]\t0x%016lx\n", vcpu, tscoff); } if (!error && (get_cr0_mask || get_all)) { uint64_t cr0mask; error = vm_get_vmcs_field(ctx, vcpu, VMCS_CR0_MASK, &cr0mask); if (error == 0) printf("cr0_mask[%d]\t\t0x%016lx\n", vcpu, cr0mask); } if (!error && (get_cr0_shadow || get_all)) { uint64_t cr0shadow; error = vm_get_vmcs_field(ctx, vcpu, VMCS_CR0_SHADOW, &cr0shadow); if (error == 0) printf("cr0_shadow[%d]\t\t0x%016lx\n", vcpu, cr0shadow); } if (!error && (get_cr4_mask || get_all)) { uint64_t cr4mask; error = vm_get_vmcs_field(ctx, vcpu, VMCS_CR4_MASK, &cr4mask); if (error == 0) printf("cr4_mask[%d]\t\t0x%016lx\n", vcpu, cr4mask); } if (!error && (get_cr4_shadow || get_all)) { uint64_t cr4shadow; error = vm_get_vmcs_field(ctx, vcpu, VMCS_CR4_SHADOW, &cr4shadow); if (error == 0) printf("cr4_shadow[%d]\t\t0x%016lx\n", vcpu, cr4shadow); } if (!error && (get_cr3_targets || get_all)) { uint64_t target_count, target_addr; error = vm_get_vmcs_field(ctx, vcpu, VMCS_CR3_TARGET_COUNT, &target_count); if (error == 0) { printf("cr3_target_count[%d]\t0x%08lx\n", vcpu, target_count); } error = vm_get_vmcs_field(ctx, vcpu, VMCS_CR3_TARGET0, &target_addr); if (error == 0) { printf("cr3_target0[%d]\t\t0x%016lx\n", vcpu, target_addr); } error = vm_get_vmcs_field(ctx, vcpu, VMCS_CR3_TARGET1, &target_addr); if (error == 0) { printf("cr3_target1[%d]\t\t0x%016lx\n", vcpu, target_addr); } error = vm_get_vmcs_field(ctx, vcpu, VMCS_CR3_TARGET2, &target_addr); if (error == 0) { printf("cr3_target2[%d]\t\t0x%016lx\n", vcpu, target_addr); } error = vm_get_vmcs_field(ctx, vcpu, VMCS_CR3_TARGET3, &target_addr); if (error == 0) { printf("cr3_target3[%d]\t\t0x%016lx\n", vcpu, target_addr); } } if (!error && (get_apic_access_addr || get_all)) { error = vm_get_vmcs_field(ctx, vcpu, VMCS_APIC_ACCESS, &addr); if (error == 0) printf("apic_access_addr[%d]\t0x%016lx\n", vcpu, addr); } if (!error && (get_virtual_apic_addr || get_all)) { error = vm_get_vmcs_field(ctx, vcpu, VMCS_VIRTUAL_APIC, &addr); if (error == 0) printf("virtual_apic_addr[%d]\t0x%016lx\n", vcpu, addr); } if (!error && (get_tpr_threshold || get_all)) { uint64_t threshold; error = vm_get_vmcs_field(ctx, vcpu, VMCS_TPR_THRESHOLD, &threshold); if (error == 0) printf("tpr_threshold[%d]\t0x%08lx\n", vcpu, threshold); } if (!error && (get_msr_bitmap_address || get_all)) { error = vm_get_vmcs_field(ctx, vcpu, VMCS_MSR_BITMAP, &addr); if (error == 0) printf("msr_bitmap[%d]\t\t0x%016lx\n", vcpu, addr); } if (!error && (get_msr_bitmap || get_all)) { error = vm_get_vmcs_field(ctx, vcpu, VMCS_MSR_BITMAP, &addr); if (error == 0) error = dump_vmcs_msr_bitmap(vcpu, addr); } if (!error && (get_vpid || get_all)) { uint64_t vpid; error = vm_get_vmcs_field(ctx, vcpu, VMCS_VPID, &vpid); if (error == 0) printf("vpid[%d]\t\t0x%04lx\n", vcpu, vpid); } if (!error && (get_ple_window || get_all)) { uint64_t window; error = vm_get_vmcs_field(ctx, vcpu, VMCS_PLE_WINDOW, &window); if (error == 0) printf("ple_window[%d]\t\t0x%08lx\n", vcpu, window); } if (!error && (get_ple_gap || get_all)) { uint64_t gap; error = vm_get_vmcs_field(ctx, vcpu, VMCS_PLE_GAP, &gap); if (error == 0) printf("ple_gap[%d]\t\t0x%08lx\n", vcpu, gap); } if (!error && (get_inst_err || get_all)) { uint64_t insterr; error = vm_get_vmcs_field(ctx, vcpu, VMCS_INSTRUCTION_ERROR, &insterr); if (error == 0) { printf("instruction_error[%d]\t0x%08lx\n", vcpu, insterr); } } if (!error && (get_exit_ctls || get_all)) { error = vm_get_vmcs_field(ctx, vcpu, VMCS_EXIT_CTLS, &ctl); if (error == 0) printf("exit_ctls[%d]\t\t0x%08lx\n", vcpu, ctl); } if (!error && (get_entry_ctls || get_all)) { error = vm_get_vmcs_field(ctx, vcpu, VMCS_ENTRY_CTLS, &ctl); if (error == 0) printf("entry_ctls[%d]\t\t0x%08lx\n", vcpu, ctl); } if (!error && (get_host_pat || get_all)) { error = vm_get_vmcs_field(ctx, vcpu, VMCS_HOST_IA32_PAT, &pat); if (error == 0) printf("host_pat[%d]\t\t0x%016lx\n", vcpu, pat); } if (!error && (get_guest_pat || get_all)) { error = vm_get_vmcs_field(ctx, vcpu, VMCS_GUEST_IA32_PAT, &pat); if (error == 0) printf("guest_pat[%d]\t\t0x%016lx\n", vcpu, pat); } if (!error && (get_host_cr0 || get_all)) { error = vm_get_vmcs_field(ctx, vcpu, VMCS_HOST_CR0, &cr0); if (error == 0) printf("host_cr0[%d]\t\t0x%016lx\n", vcpu, cr0); } if (!error && (get_host_cr3 || get_all)) { error = vm_get_vmcs_field(ctx, vcpu, VMCS_HOST_CR3, &cr3); if (error == 0) printf("host_cr3[%d]\t\t0x%016lx\n", vcpu, cr3); } if (!error && (get_host_cr4 || get_all)) { error = vm_get_vmcs_field(ctx, vcpu, VMCS_HOST_CR4, &cr4); if (error == 0) printf("host_cr4[%d]\t\t0x%016lx\n", vcpu, cr4); } if (!error && (get_host_rip || get_all)) { error = vm_get_vmcs_field(ctx, vcpu, VMCS_HOST_RIP, &rip); if (error == 0) printf("host_rip[%d]\t\t0x%016lx\n", vcpu, rip); } if (!error && (get_host_rsp || get_all)) { error = vm_get_vmcs_field(ctx, vcpu, VMCS_HOST_RSP, &rsp); if (error == 0) printf("host_rsp[%d]\t\t0x%016lx\n", vcpu, rsp); } if (!error && (get_guest_sysenter || get_all)) { error = vm_get_vmcs_field(ctx, vcpu, VMCS_GUEST_IA32_SYSENTER_CS, &cs); if (error == 0) printf("guest_sysenter_cs[%d]\t0x%08lx\n", vcpu, cs); error = vm_get_vmcs_field(ctx, vcpu, VMCS_GUEST_IA32_SYSENTER_ESP, &rsp); if (error == 0) printf("guest_sysenter_sp[%d]\t0x%016lx\n", vcpu, rsp); error = vm_get_vmcs_field(ctx, vcpu, VMCS_GUEST_IA32_SYSENTER_EIP, &rip); if (error == 0) printf("guest_sysenter_ip[%d]\t0x%016lx\n", vcpu, rip); } if (!error && (get_vmcs_link || get_all)) { error = vm_get_vmcs_field(ctx, vcpu, VMCS_LINK_POINTER, &addr); if (error == 0) printf("vmcs_pointer[%d]\t0x%016lx\n", vcpu, addr); } if (!error && (get_vmcs_exit_reason || get_all)) { error = vm_get_vmcs_field(ctx, vcpu, VMCS_EXIT_REASON, &u64); if (error == 0) printf("vmcs_exit_reason[%d]\t0x%016lx\n", vcpu, u64); } if (!error && (get_vmcs_exit_qualification || get_all)) { error = vm_get_vmcs_field(ctx, vcpu, VMCS_EXIT_QUALIFICATION, &u64); if (error == 0) printf("vmcs_exit_qualification[%d]\t0x%016lx\n", vcpu, u64); } if (!error && (get_vmcs_exit_interruption_info || get_all)) { error = vm_get_vmcs_field(ctx, vcpu, VMCS_EXIT_INTERRUPTION_INFO, &u64); if (error == 0) { printf("vmcs_exit_interruption_info[%d]\t0x%08lx\n", vcpu, u64); } } if (!error && (get_vmcs_exit_interruption_error || get_all)) { error = vm_get_vmcs_field(ctx, vcpu, VMCS_EXIT_INTERRUPTION_ERROR, &u64); if (error == 0) { printf("vmcs_exit_interruption_error[%d]\t0x%08lx\n", vcpu, u64); } } if (!error && (get_vmcs_interruptibility || get_all)) { error = vm_get_vmcs_field(ctx, vcpu, VMCS_GUEST_INTERRUPTIBILITY, &u64); if (error == 0) { printf("vmcs_guest_interruptibility[%d]\t0x%08lx\n", vcpu, u64); } } if (!error && setcap) { int captype; captype = vm_capability_name2type(capname); error = vm_set_capability(ctx, vcpu, captype, capval); if (error != 0 && errno == ENOENT) printf("Capability \"%s\" is not available\n", capname); } if (!error && get_gpa_pmap) { error = vm_get_gpa_pmap(ctx, gpa_pmap, pteval, &ptenum); if (error == 0) { printf("gpa %#lx:", gpa_pmap); pte = &pteval[0]; while (ptenum-- > 0) printf(" %#lx", *pte++); printf("\n"); } } if (!error && (getcap || get_all)) { int captype, val, getcaptype; if (getcap && capname) getcaptype = vm_capability_name2type(capname); else getcaptype = -1; for (captype = 0; captype < VM_CAP_MAX; captype++) { if (getcaptype >= 0 && captype != getcaptype) continue; error = vm_get_capability(ctx, vcpu, captype, &val); if (error == 0) { printf("Capability \"%s\" is %s on vcpu %d\n", vm_capability_type2name(captype), val ? "set" : "not set", vcpu); } else if (errno == ENOENT) { error = 0; printf("Capability \"%s\" is not available\n", vm_capability_type2name(captype)); } else { break; } } } if (!error && run) { error = vm_get_register(ctx, vcpu, VM_REG_GUEST_RIP, &rip); assert(error == 0); error = vm_run(ctx, vcpu, rip, &vmexit); if (error == 0) dump_vm_run_exitcode(&vmexit, vcpu); else printf("vm_run error %d\n", error); } if (error) printf("errno = %d\n", errno); if (!error && destroy) vm_destroy(ctx); exit(error); }