Index: head/lib/libvmmapi/vmmapi.c =================================================================== --- head/lib/libvmmapi/vmmapi.c (revision 276427) +++ head/lib/libvmmapi/vmmapi.c (revision 276428) @@ -1,1148 +1,1200 @@ /*- * Copyright (c) 2011 NetApp, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "vmmapi.h" #define MB (1024 * 1024UL) #define GB (1024 * 1024 * 1024UL) struct vmctx { int fd; uint32_t lowmem_limit; enum vm_mmap_style vms; int memflags; size_t lowmem; char *lowmem_addr; size_t highmem; char *highmem_addr; char *name; }; #define CREATE(x) sysctlbyname("hw.vmm.create", NULL, NULL, (x), strlen((x))) #define DESTROY(x) sysctlbyname("hw.vmm.destroy", NULL, NULL, (x), strlen((x))) static int vm_device_open(const char *name) { int fd, len; char *vmfile; len = strlen("/dev/vmm/") + strlen(name) + 1; vmfile = malloc(len); assert(vmfile != NULL); snprintf(vmfile, len, "/dev/vmm/%s", name); /* Open the device file */ fd = open(vmfile, O_RDWR, 0); free(vmfile); return (fd); } int vm_create(const char *name) { return (CREATE((char *)name)); } struct vmctx * vm_open(const char *name) { struct vmctx *vm; vm = malloc(sizeof(struct vmctx) + strlen(name) + 1); assert(vm != NULL); vm->fd = -1; vm->memflags = 0; vm->lowmem_limit = 3 * GB; vm->name = (char *)(vm + 1); strcpy(vm->name, name); if ((vm->fd = vm_device_open(vm->name)) < 0) goto err; return (vm); err: vm_destroy(vm); return (NULL); } void vm_destroy(struct vmctx *vm) { assert(vm != NULL); if (vm->fd >= 0) close(vm->fd); DESTROY(vm->name); free(vm); } int vm_parse_memsize(const char *optarg, size_t *ret_memsize) { char *endptr; size_t optval; int error; optval = strtoul(optarg, &endptr, 0); if (*optarg != '\0' && *endptr == '\0') { /* * For the sake of backward compatibility if the memory size * specified on the command line is less than a megabyte then * it is interpreted as being in units of MB. */ if (optval < MB) optval *= MB; *ret_memsize = optval; error = 0; } else error = expand_number(optarg, ret_memsize); return (error); } int vm_get_memory_seg(struct vmctx *ctx, vm_paddr_t gpa, size_t *ret_len, int *wired) { int error; struct vm_memory_segment seg; bzero(&seg, sizeof(seg)); seg.gpa = gpa; error = ioctl(ctx->fd, VM_GET_MEMORY_SEG, &seg); *ret_len = seg.len; if (wired != NULL) *wired = seg.wired; return (error); } uint32_t vm_get_lowmem_limit(struct vmctx *ctx) { return (ctx->lowmem_limit); } void vm_set_lowmem_limit(struct vmctx *ctx, uint32_t limit) { ctx->lowmem_limit = limit; } void vm_set_memflags(struct vmctx *ctx, int flags) { ctx->memflags = flags; } static int setup_memory_segment(struct vmctx *ctx, vm_paddr_t gpa, size_t len, char **addr) { int error, mmap_flags; struct vm_memory_segment seg; /* * Create and optionally map 'len' bytes of memory at guest * physical address 'gpa' */ bzero(&seg, sizeof(seg)); seg.gpa = gpa; seg.len = len; error = ioctl(ctx->fd, VM_MAP_MEMORY, &seg); if (error == 0 && addr != NULL) { mmap_flags = MAP_SHARED; if ((ctx->memflags & VM_MEM_F_INCORE) == 0) mmap_flags |= MAP_NOCORE; *addr = mmap(NULL, len, PROT_READ | PROT_WRITE, mmap_flags, ctx->fd, gpa); } return (error); } int vm_setup_memory(struct vmctx *ctx, size_t memsize, enum vm_mmap_style vms) { char **addr; int error; /* XXX VM_MMAP_SPARSE not implemented yet */ assert(vms == VM_MMAP_NONE || vms == VM_MMAP_ALL); ctx->vms = vms; /* * If 'memsize' cannot fit entirely in the 'lowmem' segment then * create another 'highmem' segment above 4GB for the remainder. */ if (memsize > ctx->lowmem_limit) { ctx->lowmem = ctx->lowmem_limit; ctx->highmem = memsize - ctx->lowmem; } else { ctx->lowmem = memsize; ctx->highmem = 0; } if (ctx->lowmem > 0) { addr = (vms == VM_MMAP_ALL) ? &ctx->lowmem_addr : NULL; error = setup_memory_segment(ctx, 0, ctx->lowmem, addr); if (error) return (error); } if (ctx->highmem > 0) { addr = (vms == VM_MMAP_ALL) ? &ctx->highmem_addr : NULL; error = setup_memory_segment(ctx, 4*GB, ctx->highmem, addr); if (error) return (error); } return (0); } void * vm_map_gpa(struct vmctx *ctx, vm_paddr_t gaddr, size_t len) { /* XXX VM_MMAP_SPARSE not implemented yet */ assert(ctx->vms == VM_MMAP_ALL); if (gaddr < ctx->lowmem && gaddr + len <= ctx->lowmem) return ((void *)(ctx->lowmem_addr + gaddr)); if (gaddr >= 4*GB) { gaddr -= 4*GB; if (gaddr < ctx->highmem && gaddr + len <= ctx->highmem) return ((void *)(ctx->highmem_addr + gaddr)); } return (NULL); } size_t vm_get_lowmem_size(struct vmctx *ctx) { return (ctx->lowmem); } size_t vm_get_highmem_size(struct vmctx *ctx) { return (ctx->highmem); } int vm_set_desc(struct vmctx *ctx, int vcpu, int reg, uint64_t base, uint32_t limit, uint32_t access) { int error; struct vm_seg_desc vmsegdesc; bzero(&vmsegdesc, sizeof(vmsegdesc)); vmsegdesc.cpuid = vcpu; vmsegdesc.regnum = reg; vmsegdesc.desc.base = base; vmsegdesc.desc.limit = limit; vmsegdesc.desc.access = access; error = ioctl(ctx->fd, VM_SET_SEGMENT_DESCRIPTOR, &vmsegdesc); return (error); } int vm_get_desc(struct vmctx *ctx, int vcpu, int reg, uint64_t *base, uint32_t *limit, uint32_t *access) { int error; struct vm_seg_desc vmsegdesc; bzero(&vmsegdesc, sizeof(vmsegdesc)); vmsegdesc.cpuid = vcpu; vmsegdesc.regnum = reg; error = ioctl(ctx->fd, VM_GET_SEGMENT_DESCRIPTOR, &vmsegdesc); if (error == 0) { *base = vmsegdesc.desc.base; *limit = vmsegdesc.desc.limit; *access = vmsegdesc.desc.access; } return (error); } int vm_get_seg_desc(struct vmctx *ctx, int vcpu, int reg, struct seg_desc *seg_desc) { int error; error = vm_get_desc(ctx, vcpu, reg, &seg_desc->base, &seg_desc->limit, &seg_desc->access); return (error); } int vm_set_register(struct vmctx *ctx, int vcpu, int reg, uint64_t val) { int error; struct vm_register vmreg; bzero(&vmreg, sizeof(vmreg)); vmreg.cpuid = vcpu; vmreg.regnum = reg; vmreg.regval = val; error = ioctl(ctx->fd, VM_SET_REGISTER, &vmreg); return (error); } int vm_get_register(struct vmctx *ctx, int vcpu, int reg, uint64_t *ret_val) { int error; struct vm_register vmreg; bzero(&vmreg, sizeof(vmreg)); vmreg.cpuid = vcpu; vmreg.regnum = reg; error = ioctl(ctx->fd, VM_GET_REGISTER, &vmreg); *ret_val = vmreg.regval; return (error); } int vm_run(struct vmctx *ctx, int vcpu, uint64_t rip, struct vm_exit *vmexit) { int error; struct vm_run vmrun; bzero(&vmrun, sizeof(vmrun)); vmrun.cpuid = vcpu; vmrun.rip = rip; error = ioctl(ctx->fd, VM_RUN, &vmrun); bcopy(&vmrun.vm_exit, vmexit, sizeof(struct vm_exit)); return (error); } int vm_suspend(struct vmctx *ctx, enum vm_suspend_how how) { struct vm_suspend vmsuspend; bzero(&vmsuspend, sizeof(vmsuspend)); vmsuspend.how = how; return (ioctl(ctx->fd, VM_SUSPEND, &vmsuspend)); } int vm_reinit(struct vmctx *ctx) { return (ioctl(ctx->fd, VM_REINIT, 0)); } static int vm_inject_exception_real(struct vmctx *ctx, int vcpu, int vector, int error_code, int error_code_valid) { struct vm_exception exc; bzero(&exc, sizeof(exc)); exc.cpuid = vcpu; exc.vector = vector; exc.error_code = error_code; exc.error_code_valid = error_code_valid; return (ioctl(ctx->fd, VM_INJECT_EXCEPTION, &exc)); } int vm_inject_exception(struct vmctx *ctx, int vcpu, int vector) { return (vm_inject_exception_real(ctx, vcpu, vector, 0, 0)); } int vm_inject_exception2(struct vmctx *ctx, int vcpu, int vector, int errcode) { return (vm_inject_exception_real(ctx, vcpu, vector, errcode, 1)); } int vm_apicid2vcpu(struct vmctx *ctx, int apicid) { /* * The apic id associated with the 'vcpu' has the same numerical value * as the 'vcpu' itself. */ return (apicid); } int vm_lapic_irq(struct vmctx *ctx, int vcpu, int vector) { struct vm_lapic_irq vmirq; bzero(&vmirq, sizeof(vmirq)); vmirq.cpuid = vcpu; vmirq.vector = vector; return (ioctl(ctx->fd, VM_LAPIC_IRQ, &vmirq)); } int vm_lapic_local_irq(struct vmctx *ctx, int vcpu, int vector) { struct vm_lapic_irq vmirq; bzero(&vmirq, sizeof(vmirq)); vmirq.cpuid = vcpu; vmirq.vector = vector; return (ioctl(ctx->fd, VM_LAPIC_LOCAL_IRQ, &vmirq)); } int vm_lapic_msi(struct vmctx *ctx, uint64_t addr, uint64_t msg) { struct vm_lapic_msi vmmsi; bzero(&vmmsi, sizeof(vmmsi)); vmmsi.addr = addr; vmmsi.msg = msg; return (ioctl(ctx->fd, VM_LAPIC_MSI, &vmmsi)); } int vm_ioapic_assert_irq(struct vmctx *ctx, int irq) { struct vm_ioapic_irq ioapic_irq; bzero(&ioapic_irq, sizeof(struct vm_ioapic_irq)); ioapic_irq.irq = irq; return (ioctl(ctx->fd, VM_IOAPIC_ASSERT_IRQ, &ioapic_irq)); } int vm_ioapic_deassert_irq(struct vmctx *ctx, int irq) { struct vm_ioapic_irq ioapic_irq; bzero(&ioapic_irq, sizeof(struct vm_ioapic_irq)); ioapic_irq.irq = irq; return (ioctl(ctx->fd, VM_IOAPIC_DEASSERT_IRQ, &ioapic_irq)); } int vm_ioapic_pulse_irq(struct vmctx *ctx, int irq) { struct vm_ioapic_irq ioapic_irq; bzero(&ioapic_irq, sizeof(struct vm_ioapic_irq)); ioapic_irq.irq = irq; return (ioctl(ctx->fd, VM_IOAPIC_PULSE_IRQ, &ioapic_irq)); } int vm_ioapic_pincount(struct vmctx *ctx, int *pincount) { return (ioctl(ctx->fd, VM_IOAPIC_PINCOUNT, pincount)); } int vm_isa_assert_irq(struct vmctx *ctx, int atpic_irq, int ioapic_irq) { struct vm_isa_irq isa_irq; bzero(&isa_irq, sizeof(struct vm_isa_irq)); isa_irq.atpic_irq = atpic_irq; isa_irq.ioapic_irq = ioapic_irq; return (ioctl(ctx->fd, VM_ISA_ASSERT_IRQ, &isa_irq)); } int vm_isa_deassert_irq(struct vmctx *ctx, int atpic_irq, int ioapic_irq) { struct vm_isa_irq isa_irq; bzero(&isa_irq, sizeof(struct vm_isa_irq)); isa_irq.atpic_irq = atpic_irq; isa_irq.ioapic_irq = ioapic_irq; return (ioctl(ctx->fd, VM_ISA_DEASSERT_IRQ, &isa_irq)); } int vm_isa_pulse_irq(struct vmctx *ctx, int atpic_irq, int ioapic_irq) { struct vm_isa_irq isa_irq; bzero(&isa_irq, sizeof(struct vm_isa_irq)); isa_irq.atpic_irq = atpic_irq; isa_irq.ioapic_irq = ioapic_irq; return (ioctl(ctx->fd, VM_ISA_PULSE_IRQ, &isa_irq)); } int vm_isa_set_irq_trigger(struct vmctx *ctx, int atpic_irq, enum vm_intr_trigger trigger) { struct vm_isa_irq_trigger isa_irq_trigger; bzero(&isa_irq_trigger, sizeof(struct vm_isa_irq_trigger)); isa_irq_trigger.atpic_irq = atpic_irq; isa_irq_trigger.trigger = trigger; return (ioctl(ctx->fd, VM_ISA_SET_IRQ_TRIGGER, &isa_irq_trigger)); } int vm_inject_nmi(struct vmctx *ctx, int vcpu) { struct vm_nmi vmnmi; bzero(&vmnmi, sizeof(vmnmi)); vmnmi.cpuid = vcpu; return (ioctl(ctx->fd, VM_INJECT_NMI, &vmnmi)); } static struct { const char *name; int type; } capstrmap[] = { { "hlt_exit", VM_CAP_HALT_EXIT }, { "mtrap_exit", VM_CAP_MTRAP_EXIT }, { "pause_exit", VM_CAP_PAUSE_EXIT }, { "unrestricted_guest", VM_CAP_UNRESTRICTED_GUEST }, { "enable_invpcid", VM_CAP_ENABLE_INVPCID }, { 0 } }; int vm_capability_name2type(const char *capname) { int i; for (i = 0; capstrmap[i].name != NULL && capname != NULL; i++) { if (strcmp(capstrmap[i].name, capname) == 0) return (capstrmap[i].type); } return (-1); } const char * vm_capability_type2name(int type) { int i; for (i = 0; capstrmap[i].name != NULL; i++) { if (capstrmap[i].type == type) return (capstrmap[i].name); } return (NULL); } int vm_get_capability(struct vmctx *ctx, int vcpu, enum vm_cap_type cap, int *retval) { int error; struct vm_capability vmcap; bzero(&vmcap, sizeof(vmcap)); vmcap.cpuid = vcpu; vmcap.captype = cap; error = ioctl(ctx->fd, VM_GET_CAPABILITY, &vmcap); *retval = vmcap.capval; return (error); } int vm_set_capability(struct vmctx *ctx, int vcpu, enum vm_cap_type cap, int val) { struct vm_capability vmcap; bzero(&vmcap, sizeof(vmcap)); vmcap.cpuid = vcpu; vmcap.captype = cap; vmcap.capval = val; return (ioctl(ctx->fd, VM_SET_CAPABILITY, &vmcap)); } int vm_assign_pptdev(struct vmctx *ctx, int bus, int slot, int func) { struct vm_pptdev pptdev; bzero(&pptdev, sizeof(pptdev)); pptdev.bus = bus; pptdev.slot = slot; pptdev.func = func; return (ioctl(ctx->fd, VM_BIND_PPTDEV, &pptdev)); } int vm_unassign_pptdev(struct vmctx *ctx, int bus, int slot, int func) { struct vm_pptdev pptdev; bzero(&pptdev, sizeof(pptdev)); pptdev.bus = bus; pptdev.slot = slot; pptdev.func = func; return (ioctl(ctx->fd, VM_UNBIND_PPTDEV, &pptdev)); } int vm_map_pptdev_mmio(struct vmctx *ctx, int bus, int slot, int func, vm_paddr_t gpa, size_t len, vm_paddr_t hpa) { struct vm_pptdev_mmio pptmmio; bzero(&pptmmio, sizeof(pptmmio)); pptmmio.bus = bus; pptmmio.slot = slot; pptmmio.func = func; pptmmio.gpa = gpa; pptmmio.len = len; pptmmio.hpa = hpa; return (ioctl(ctx->fd, VM_MAP_PPTDEV_MMIO, &pptmmio)); } int vm_setup_pptdev_msi(struct vmctx *ctx, int vcpu, int bus, int slot, int func, uint64_t addr, uint64_t msg, int numvec) { struct vm_pptdev_msi pptmsi; bzero(&pptmsi, sizeof(pptmsi)); pptmsi.vcpu = vcpu; pptmsi.bus = bus; pptmsi.slot = slot; pptmsi.func = func; pptmsi.msg = msg; pptmsi.addr = addr; pptmsi.numvec = numvec; return (ioctl(ctx->fd, VM_PPTDEV_MSI, &pptmsi)); } int vm_setup_pptdev_msix(struct vmctx *ctx, int vcpu, int bus, int slot, int func, int idx, uint64_t addr, uint64_t msg, uint32_t vector_control) { struct vm_pptdev_msix pptmsix; bzero(&pptmsix, sizeof(pptmsix)); pptmsix.vcpu = vcpu; pptmsix.bus = bus; pptmsix.slot = slot; pptmsix.func = func; pptmsix.idx = idx; pptmsix.msg = msg; pptmsix.addr = addr; pptmsix.vector_control = vector_control; return ioctl(ctx->fd, VM_PPTDEV_MSIX, &pptmsix); } uint64_t * vm_get_stats(struct vmctx *ctx, int vcpu, struct timeval *ret_tv, int *ret_entries) { int error; static struct vm_stats vmstats; vmstats.cpuid = vcpu; error = ioctl(ctx->fd, VM_STATS, &vmstats); if (error == 0) { if (ret_entries) *ret_entries = vmstats.num_entries; if (ret_tv) *ret_tv = vmstats.tv; return (vmstats.statbuf); } else return (NULL); } const char * vm_get_stat_desc(struct vmctx *ctx, int index) { static struct vm_stat_desc statdesc; statdesc.index = index; if (ioctl(ctx->fd, VM_STAT_DESC, &statdesc) == 0) return (statdesc.desc); else return (NULL); } int vm_get_x2apic_state(struct vmctx *ctx, int vcpu, enum x2apic_state *state) { int error; struct vm_x2apic x2apic; bzero(&x2apic, sizeof(x2apic)); x2apic.cpuid = vcpu; error = ioctl(ctx->fd, VM_GET_X2APIC_STATE, &x2apic); *state = x2apic.state; return (error); } int vm_set_x2apic_state(struct vmctx *ctx, int vcpu, enum x2apic_state state) { int error; struct vm_x2apic x2apic; bzero(&x2apic, sizeof(x2apic)); x2apic.cpuid = vcpu; x2apic.state = state; error = ioctl(ctx->fd, VM_SET_X2APIC_STATE, &x2apic); return (error); } /* * From Intel Vol 3a: * Table 9-1. IA-32 Processor States Following Power-up, Reset or INIT */ int vcpu_reset(struct vmctx *vmctx, int vcpu) { int error; uint64_t rflags, rip, cr0, cr4, zero, desc_base, rdx; uint32_t desc_access, desc_limit; uint16_t sel; zero = 0; rflags = 0x2; error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RFLAGS, rflags); if (error) goto done; rip = 0xfff0; if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RIP, rip)) != 0) goto done; cr0 = CR0_NE; if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_CR0, cr0)) != 0) goto done; if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_CR3, zero)) != 0) goto done; cr4 = 0; if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_CR4, cr4)) != 0) goto done; /* * CS: present, r/w, accessed, 16-bit, byte granularity, usable */ desc_base = 0xffff0000; desc_limit = 0xffff; desc_access = 0x0093; error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_CS, desc_base, desc_limit, desc_access); if (error) goto done; sel = 0xf000; if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_CS, sel)) != 0) goto done; /* * SS,DS,ES,FS,GS: present, r/w, accessed, 16-bit, byte granularity */ desc_base = 0; desc_limit = 0xffff; desc_access = 0x0093; error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_SS, desc_base, desc_limit, desc_access); if (error) goto done; error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_DS, desc_base, desc_limit, desc_access); if (error) goto done; error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_ES, desc_base, desc_limit, desc_access); if (error) goto done; error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_FS, desc_base, desc_limit, desc_access); if (error) goto done; error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_GS, desc_base, desc_limit, desc_access); if (error) goto done; sel = 0; if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_SS, sel)) != 0) goto done; if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_DS, sel)) != 0) goto done; if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_ES, sel)) != 0) goto done; if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_FS, sel)) != 0) goto done; if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_GS, sel)) != 0) goto done; /* General purpose registers */ rdx = 0xf00; if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RAX, zero)) != 0) goto done; if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RBX, zero)) != 0) goto done; if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RCX, zero)) != 0) goto done; if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RDX, rdx)) != 0) goto done; if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RSI, zero)) != 0) goto done; if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RDI, zero)) != 0) goto done; if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RBP, zero)) != 0) goto done; if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_RSP, zero)) != 0) goto done; /* GDTR, IDTR */ desc_base = 0; desc_limit = 0xffff; desc_access = 0; error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_GDTR, desc_base, desc_limit, desc_access); if (error != 0) goto done; error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_IDTR, desc_base, desc_limit, desc_access); if (error != 0) goto done; /* TR */ desc_base = 0; desc_limit = 0xffff; desc_access = 0x0000008b; error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_TR, 0, 0, desc_access); if (error) goto done; sel = 0; if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_TR, sel)) != 0) goto done; /* LDTR */ desc_base = 0; desc_limit = 0xffff; desc_access = 0x00000082; error = vm_set_desc(vmctx, vcpu, VM_REG_GUEST_LDTR, desc_base, desc_limit, desc_access); if (error) goto done; sel = 0; if ((error = vm_set_register(vmctx, vcpu, VM_REG_GUEST_LDTR, 0)) != 0) goto done; /* XXX cr2, debug registers */ error = 0; done: return (error); } int vm_get_gpa_pmap(struct vmctx *ctx, uint64_t gpa, uint64_t *pte, int *num) { int error, i; struct vm_gpa_pte gpapte; bzero(&gpapte, sizeof(gpapte)); gpapte.gpa = gpa; error = ioctl(ctx->fd, VM_GET_GPA_PMAP, &gpapte); if (error == 0) { *num = gpapte.ptenum; for (i = 0; i < gpapte.ptenum; i++) pte[i] = gpapte.pte[i]; } return (error); } int vm_get_hpet_capabilities(struct vmctx *ctx, uint32_t *capabilities) { int error; struct vm_hpet_cap cap; bzero(&cap, sizeof(struct vm_hpet_cap)); error = ioctl(ctx->fd, VM_GET_HPET_CAPABILITIES, &cap); if (capabilities != NULL) *capabilities = cap.capabilities; return (error); } static int gla2gpa(struct vmctx *ctx, int vcpu, struct vm_guest_paging *paging, uint64_t gla, int prot, int *fault, uint64_t *gpa) { struct vm_gla2gpa gg; int error; bzero(&gg, sizeof(struct vm_gla2gpa)); gg.vcpuid = vcpu; gg.prot = prot; gg.gla = gla; gg.paging = *paging; error = ioctl(ctx->fd, VM_GLA2GPA, &gg); if (error == 0) { *fault = gg.fault; *gpa = gg.gpa; } return (error); } #ifndef min #define min(a,b) (((a) < (b)) ? (a) : (b)) #endif int vm_copy_setup(struct vmctx *ctx, int vcpu, struct vm_guest_paging *paging, uint64_t gla, size_t len, int prot, struct iovec *iov, int iovcnt) { uint64_t gpa; int error, fault, i, n, off; for (i = 0; i < iovcnt; i++) { iov[i].iov_base = 0; iov[i].iov_len = 0; } while (len) { assert(iovcnt > 0); error = gla2gpa(ctx, vcpu, paging, gla, prot, &fault, &gpa); if (error) return (-1); if (fault) return (1); off = gpa & PAGE_MASK; n = min(len, PAGE_SIZE - off); iov->iov_base = (void *)gpa; iov->iov_len = n; iov++; iovcnt--; gla += n; len -= n; } return (0); } void vm_copyin(struct vmctx *ctx, int vcpu, struct iovec *iov, void *vp, size_t len) { const char *src; char *dst; uint64_t gpa; size_t n; dst = vp; while (len) { assert(iov->iov_len); gpa = (uint64_t)iov->iov_base; n = min(len, iov->iov_len); src = vm_map_gpa(ctx, gpa, n); bcopy(src, dst, n); iov++; dst += n; len -= n; } } void vm_copyout(struct vmctx *ctx, int vcpu, const void *vp, struct iovec *iov, size_t len) { const char *src; char *dst; uint64_t gpa; size_t n; src = vp; while (len) { assert(iov->iov_len); gpa = (uint64_t)iov->iov_base; n = min(len, iov->iov_len); dst = vm_map_gpa(ctx, gpa, n); bcopy(src, dst, n); iov++; src += n; len -= n; } } static int vm_get_cpus(struct vmctx *ctx, int which, cpuset_t *cpus) { struct vm_cpuset vm_cpuset; int error; bzero(&vm_cpuset, sizeof(struct vm_cpuset)); vm_cpuset.which = which; vm_cpuset.cpusetsize = sizeof(cpuset_t); vm_cpuset.cpus = cpus; error = ioctl(ctx->fd, VM_GET_CPUS, &vm_cpuset); return (error); } int vm_active_cpus(struct vmctx *ctx, cpuset_t *cpus) { return (vm_get_cpus(ctx, VM_ACTIVE_CPUS, cpus)); } int vm_suspended_cpus(struct vmctx *ctx, cpuset_t *cpus) { return (vm_get_cpus(ctx, VM_SUSPENDED_CPUS, cpus)); } int vm_activate_cpu(struct vmctx *ctx, int vcpu) { struct vm_activate_cpu ac; int error; bzero(&ac, sizeof(struct vm_activate_cpu)); ac.vcpuid = vcpu; error = ioctl(ctx->fd, VM_ACTIVATE_CPU, &ac); return (error); } int vm_get_intinfo(struct vmctx *ctx, int vcpu, uint64_t *info1, uint64_t *info2) { struct vm_intinfo vmii; int error; bzero(&vmii, sizeof(struct vm_intinfo)); vmii.vcpuid = vcpu; error = ioctl(ctx->fd, VM_GET_INTINFO, &vmii); if (error == 0) { *info1 = vmii.info1; *info2 = vmii.info2; } return (error); } int vm_set_intinfo(struct vmctx *ctx, int vcpu, uint64_t info1) { struct vm_intinfo vmii; int error; bzero(&vmii, sizeof(struct vm_intinfo)); vmii.vcpuid = vcpu; vmii.info1 = info1; error = ioctl(ctx->fd, VM_SET_INTINFO, &vmii); return (error); } + +int +vm_rtc_write(struct vmctx *ctx, int offset, uint8_t value) +{ + struct vm_rtc_data rtcdata; + int error; + + bzero(&rtcdata, sizeof(struct vm_rtc_data)); + rtcdata.offset = offset; + rtcdata.value = value; + error = ioctl(ctx->fd, VM_RTC_WRITE, &rtcdata); + return (error); +} + +int +vm_rtc_read(struct vmctx *ctx, int offset, uint8_t *retval) +{ + struct vm_rtc_data rtcdata; + int error; + + bzero(&rtcdata, sizeof(struct vm_rtc_data)); + rtcdata.offset = offset; + error = ioctl(ctx->fd, VM_RTC_READ, &rtcdata); + if (error == 0) + *retval = rtcdata.value; + return (error); +} + +int +vm_rtc_settime(struct vmctx *ctx, time_t secs) +{ + struct vm_rtc_time rtctime; + int error; + + bzero(&rtctime, sizeof(struct vm_rtc_time)); + rtctime.secs = secs; + error = ioctl(ctx->fd, VM_RTC_SETTIME, &rtctime); + return (error); +} + +int +vm_rtc_gettime(struct vmctx *ctx, time_t *secs) +{ + struct vm_rtc_time rtctime; + int error; + + bzero(&rtctime, sizeof(struct vm_rtc_time)); + error = ioctl(ctx->fd, VM_RTC_GETTIME, &rtctime); + if (error == 0) + *secs = rtctime.secs; + return (error); +} Index: head/lib/libvmmapi/vmmapi.h =================================================================== --- head/lib/libvmmapi/vmmapi.h (revision 276427) +++ head/lib/libvmmapi/vmmapi.h (revision 276428) @@ -1,153 +1,159 @@ /*- * Copyright (c) 2011 NetApp, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _VMMAPI_H_ #define _VMMAPI_H_ #include #include struct iovec; struct vmctx; enum x2apic_state; /* * Different styles of mapping the memory assigned to a VM into the address * space of the controlling process. */ enum vm_mmap_style { VM_MMAP_NONE, /* no mapping */ VM_MMAP_ALL, /* fully and statically mapped */ VM_MMAP_SPARSE, /* mappings created on-demand */ }; #define VM_MEM_F_INCORE 0x01 /* include guest memory in core file */ int vm_create(const char *name); struct vmctx *vm_open(const char *name); void vm_destroy(struct vmctx *ctx); int vm_parse_memsize(const char *optarg, size_t *memsize); int vm_get_memory_seg(struct vmctx *ctx, vm_paddr_t gpa, size_t *ret_len, int *wired); int vm_setup_memory(struct vmctx *ctx, size_t len, enum vm_mmap_style s); void *vm_map_gpa(struct vmctx *ctx, vm_paddr_t gaddr, size_t len); int vm_get_gpa_pmap(struct vmctx *, uint64_t gpa, uint64_t *pte, int *num); uint32_t vm_get_lowmem_limit(struct vmctx *ctx); void vm_set_lowmem_limit(struct vmctx *ctx, uint32_t limit); void vm_set_memflags(struct vmctx *ctx, int flags); size_t vm_get_lowmem_size(struct vmctx *ctx); size_t vm_get_highmem_size(struct vmctx *ctx); int vm_set_desc(struct vmctx *ctx, int vcpu, int reg, uint64_t base, uint32_t limit, uint32_t access); int vm_get_desc(struct vmctx *ctx, int vcpu, int reg, uint64_t *base, uint32_t *limit, uint32_t *access); int vm_get_seg_desc(struct vmctx *ctx, int vcpu, int reg, struct seg_desc *seg_desc); int vm_set_register(struct vmctx *ctx, int vcpu, int reg, uint64_t val); int vm_get_register(struct vmctx *ctx, int vcpu, int reg, uint64_t *retval); int vm_run(struct vmctx *ctx, int vcpu, uint64_t rip, struct vm_exit *ret_vmexit); int vm_suspend(struct vmctx *ctx, enum vm_suspend_how how); int vm_reinit(struct vmctx *ctx); int vm_apicid2vcpu(struct vmctx *ctx, int apicid); int vm_inject_exception(struct vmctx *ctx, int vcpu, int vec); int vm_inject_exception2(struct vmctx *ctx, int vcpu, int vec, int errcode); int vm_lapic_irq(struct vmctx *ctx, int vcpu, int vector); int vm_lapic_local_irq(struct vmctx *ctx, int vcpu, int vector); int vm_lapic_msi(struct vmctx *ctx, uint64_t addr, uint64_t msg); int vm_ioapic_assert_irq(struct vmctx *ctx, int irq); int vm_ioapic_deassert_irq(struct vmctx *ctx, int irq); int vm_ioapic_pulse_irq(struct vmctx *ctx, int irq); int vm_ioapic_pincount(struct vmctx *ctx, int *pincount); int vm_isa_assert_irq(struct vmctx *ctx, int atpic_irq, int ioapic_irq); int vm_isa_deassert_irq(struct vmctx *ctx, int atpic_irq, int ioapic_irq); int vm_isa_pulse_irq(struct vmctx *ctx, int atpic_irq, int ioapic_irq); int vm_isa_set_irq_trigger(struct vmctx *ctx, int atpic_irq, enum vm_intr_trigger trigger); int vm_inject_nmi(struct vmctx *ctx, int vcpu); int vm_capability_name2type(const char *capname); const char *vm_capability_type2name(int type); int vm_get_capability(struct vmctx *ctx, int vcpu, enum vm_cap_type cap, int *retval); int vm_set_capability(struct vmctx *ctx, int vcpu, enum vm_cap_type cap, int val); int vm_assign_pptdev(struct vmctx *ctx, int bus, int slot, int func); int vm_unassign_pptdev(struct vmctx *ctx, int bus, int slot, int func); int vm_map_pptdev_mmio(struct vmctx *ctx, int bus, int slot, int func, vm_paddr_t gpa, size_t len, vm_paddr_t hpa); int vm_setup_pptdev_msi(struct vmctx *ctx, int vcpu, int bus, int slot, int func, uint64_t addr, uint64_t msg, int numvec); int vm_setup_pptdev_msix(struct vmctx *ctx, int vcpu, int bus, int slot, int func, int idx, uint64_t addr, uint64_t msg, uint32_t vector_control); int vm_get_intinfo(struct vmctx *ctx, int vcpu, uint64_t *i1, uint64_t *i2); int vm_set_intinfo(struct vmctx *ctx, int vcpu, uint64_t exit_intinfo); /* * Return a pointer to the statistics buffer. Note that this is not MT-safe. */ uint64_t *vm_get_stats(struct vmctx *ctx, int vcpu, struct timeval *ret_tv, int *ret_entries); const char *vm_get_stat_desc(struct vmctx *ctx, int index); int vm_get_x2apic_state(struct vmctx *ctx, int vcpu, enum x2apic_state *s); int vm_set_x2apic_state(struct vmctx *ctx, int vcpu, enum x2apic_state s); int vm_get_hpet_capabilities(struct vmctx *ctx, uint32_t *capabilities); /* * Translate the GLA range [gla,gla+len) into GPA segments in 'iov'. * The 'iovcnt' should be big enough to accomodate all GPA segments. * Returns 0 on success, 1 on a guest fault condition and -1 otherwise. */ int vm_copy_setup(struct vmctx *ctx, int vcpu, struct vm_guest_paging *pg, uint64_t gla, size_t len, int prot, struct iovec *iov, int iovcnt); void vm_copyin(struct vmctx *ctx, int vcpu, struct iovec *guest_iov, void *host_dst, size_t len); void vm_copyout(struct vmctx *ctx, int vcpu, const void *host_src, struct iovec *guest_iov, size_t len); +/* RTC */ +int vm_rtc_write(struct vmctx *ctx, int offset, uint8_t value); +int vm_rtc_read(struct vmctx *ctx, int offset, uint8_t *retval); +int vm_rtc_settime(struct vmctx *ctx, time_t secs); +int vm_rtc_gettime(struct vmctx *ctx, time_t *secs); + /* Reset vcpu register state */ int vcpu_reset(struct vmctx *ctx, int vcpu); int vm_active_cpus(struct vmctx *ctx, cpuset_t *cpus); int vm_suspended_cpus(struct vmctx *ctx, cpuset_t *cpus); int vm_activate_cpu(struct vmctx *ctx, int vcpu); /* * FreeBSD specific APIs */ int vm_setup_freebsd_registers(struct vmctx *ctx, int vcpu, uint64_t rip, uint64_t cr3, uint64_t gdtbase, uint64_t rsp); int vm_setup_freebsd_registers_i386(struct vmctx *vmctx, int vcpu, uint32_t eip, uint32_t gdtbase, uint32_t esp); void vm_setup_freebsd_gdt(uint64_t *gdtr); #endif /* _VMMAPI_H_ */ Index: head/sys/amd64/include/vmm.h =================================================================== --- head/sys/amd64/include/vmm.h (revision 276427) +++ head/sys/amd64/include/vmm.h (revision 276428) @@ -1,630 +1,631 @@ /*- * Copyright (c) 2011 NetApp, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _VMM_H_ #define _VMM_H_ #include enum vm_suspend_how { VM_SUSPEND_NONE, VM_SUSPEND_RESET, VM_SUSPEND_POWEROFF, VM_SUSPEND_HALT, VM_SUSPEND_TRIPLEFAULT, VM_SUSPEND_LAST }; /* * Identifiers for architecturally defined registers. */ enum vm_reg_name { VM_REG_GUEST_RAX, VM_REG_GUEST_RBX, VM_REG_GUEST_RCX, VM_REG_GUEST_RDX, VM_REG_GUEST_RSI, VM_REG_GUEST_RDI, VM_REG_GUEST_RBP, VM_REG_GUEST_R8, VM_REG_GUEST_R9, VM_REG_GUEST_R10, VM_REG_GUEST_R11, VM_REG_GUEST_R12, VM_REG_GUEST_R13, VM_REG_GUEST_R14, VM_REG_GUEST_R15, VM_REG_GUEST_CR0, VM_REG_GUEST_CR3, VM_REG_GUEST_CR4, VM_REG_GUEST_DR7, VM_REG_GUEST_RSP, VM_REG_GUEST_RIP, VM_REG_GUEST_RFLAGS, VM_REG_GUEST_ES, VM_REG_GUEST_CS, VM_REG_GUEST_SS, VM_REG_GUEST_DS, VM_REG_GUEST_FS, VM_REG_GUEST_GS, VM_REG_GUEST_LDTR, VM_REG_GUEST_TR, VM_REG_GUEST_IDTR, VM_REG_GUEST_GDTR, VM_REG_GUEST_EFER, VM_REG_GUEST_CR2, VM_REG_GUEST_PDPTE0, VM_REG_GUEST_PDPTE1, VM_REG_GUEST_PDPTE2, VM_REG_GUEST_PDPTE3, VM_REG_GUEST_INTR_SHADOW, VM_REG_LAST }; enum x2apic_state { X2APIC_DISABLED, X2APIC_ENABLED, X2APIC_STATE_LAST }; #define VM_INTINFO_VECTOR(info) ((info) & 0xff) #define VM_INTINFO_DEL_ERRCODE 0x800 #define VM_INTINFO_RSVD 0x7ffff000 #define VM_INTINFO_VALID 0x80000000 #define VM_INTINFO_TYPE 0x700 #define VM_INTINFO_HWINTR (0 << 8) #define VM_INTINFO_NMI (2 << 8) #define VM_INTINFO_HWEXCEPTION (3 << 8) #define VM_INTINFO_SWINTR (4 << 8) #ifdef _KERNEL #define VM_MAX_NAMELEN 32 struct vm; struct vm_exception; struct vm_memory_segment; struct seg_desc; struct vm_exit; struct vm_run; struct vhpet; struct vioapic; struct vlapic; struct vmspace; struct vm_object; struct vm_guest_paging; struct pmap; typedef int (*vmm_init_func_t)(int ipinum); typedef int (*vmm_cleanup_func_t)(void); typedef void (*vmm_resume_func_t)(void); typedef void * (*vmi_init_func_t)(struct vm *vm, struct pmap *pmap); typedef int (*vmi_run_func_t)(void *vmi, int vcpu, register_t rip, struct pmap *pmap, void *rendezvous_cookie, void *suspend_cookie); typedef void (*vmi_cleanup_func_t)(void *vmi); typedef int (*vmi_get_register_t)(void *vmi, int vcpu, int num, uint64_t *retval); typedef int (*vmi_set_register_t)(void *vmi, int vcpu, int num, uint64_t val); typedef int (*vmi_get_desc_t)(void *vmi, int vcpu, int num, struct seg_desc *desc); typedef int (*vmi_set_desc_t)(void *vmi, int vcpu, int num, struct seg_desc *desc); typedef int (*vmi_get_cap_t)(void *vmi, int vcpu, int num, int *retval); typedef int (*vmi_set_cap_t)(void *vmi, int vcpu, int num, int val); typedef struct vmspace * (*vmi_vmspace_alloc)(vm_offset_t min, vm_offset_t max); typedef void (*vmi_vmspace_free)(struct vmspace *vmspace); typedef struct vlapic * (*vmi_vlapic_init)(void *vmi, int vcpu); typedef void (*vmi_vlapic_cleanup)(void *vmi, struct vlapic *vlapic); struct vmm_ops { vmm_init_func_t init; /* module wide initialization */ vmm_cleanup_func_t cleanup; vmm_resume_func_t resume; vmi_init_func_t vminit; /* vm-specific initialization */ vmi_run_func_t vmrun; vmi_cleanup_func_t vmcleanup; vmi_get_register_t vmgetreg; vmi_set_register_t vmsetreg; vmi_get_desc_t vmgetdesc; vmi_set_desc_t vmsetdesc; vmi_get_cap_t vmgetcap; vmi_set_cap_t vmsetcap; vmi_vmspace_alloc vmspace_alloc; vmi_vmspace_free vmspace_free; vmi_vlapic_init vlapic_init; vmi_vlapic_cleanup vlapic_cleanup; }; extern struct vmm_ops vmm_ops_intel; extern struct vmm_ops vmm_ops_amd; int vm_create(const char *name, struct vm **retvm); void vm_destroy(struct vm *vm); int vm_reinit(struct vm *vm); const char *vm_name(struct vm *vm); int vm_malloc(struct vm *vm, vm_paddr_t gpa, size_t len); int vm_map_mmio(struct vm *vm, vm_paddr_t gpa, size_t len, vm_paddr_t hpa); int vm_unmap_mmio(struct vm *vm, vm_paddr_t gpa, size_t len); void *vm_gpa_hold(struct vm *, vm_paddr_t gpa, size_t len, int prot, void **cookie); void vm_gpa_release(void *cookie); int vm_gpabase2memseg(struct vm *vm, vm_paddr_t gpabase, struct vm_memory_segment *seg); int vm_get_memobj(struct vm *vm, vm_paddr_t gpa, size_t len, vm_offset_t *offset, struct vm_object **object); boolean_t vm_mem_allocated(struct vm *vm, vm_paddr_t gpa); int vm_get_register(struct vm *vm, int vcpu, int reg, uint64_t *retval); int vm_set_register(struct vm *vm, int vcpu, int reg, uint64_t val); int vm_get_seg_desc(struct vm *vm, int vcpu, int reg, struct seg_desc *ret_desc); int vm_set_seg_desc(struct vm *vm, int vcpu, int reg, struct seg_desc *desc); int vm_run(struct vm *vm, struct vm_run *vmrun); int vm_suspend(struct vm *vm, enum vm_suspend_how how); int vm_inject_nmi(struct vm *vm, int vcpu); int vm_nmi_pending(struct vm *vm, int vcpuid); void vm_nmi_clear(struct vm *vm, int vcpuid); int vm_inject_extint(struct vm *vm, int vcpu); int vm_extint_pending(struct vm *vm, int vcpuid); void vm_extint_clear(struct vm *vm, int vcpuid); struct vlapic *vm_lapic(struct vm *vm, int cpu); struct vioapic *vm_ioapic(struct vm *vm); struct vhpet *vm_hpet(struct vm *vm); int vm_get_capability(struct vm *vm, int vcpu, int type, int *val); int vm_set_capability(struct vm *vm, int vcpu, int type, int val); int vm_get_x2apic_state(struct vm *vm, int vcpu, enum x2apic_state *state); int vm_set_x2apic_state(struct vm *vm, int vcpu, enum x2apic_state state); int vm_apicid2vcpuid(struct vm *vm, int apicid); int vm_activate_cpu(struct vm *vm, int vcpu); cpuset_t vm_active_cpus(struct vm *vm); cpuset_t vm_suspended_cpus(struct vm *vm); struct vm_exit *vm_exitinfo(struct vm *vm, int vcpuid); void vm_exit_suspended(struct vm *vm, int vcpuid, uint64_t rip); void vm_exit_rendezvous(struct vm *vm, int vcpuid, uint64_t rip); void vm_exit_astpending(struct vm *vm, int vcpuid, uint64_t rip); /* * Rendezvous all vcpus specified in 'dest' and execute 'func(arg)'. * The rendezvous 'func(arg)' is not allowed to do anything that will * cause the thread to be put to sleep. * * If the rendezvous is being initiated from a vcpu context then the * 'vcpuid' must refer to that vcpu, otherwise it should be set to -1. * * The caller cannot hold any locks when initiating the rendezvous. * * The implementation of this API may cause vcpus other than those specified * by 'dest' to be stalled. The caller should not rely on any vcpus making * forward progress when the rendezvous is in progress. */ typedef void (*vm_rendezvous_func_t)(struct vm *vm, int vcpuid, void *arg); void vm_smp_rendezvous(struct vm *vm, int vcpuid, cpuset_t dest, vm_rendezvous_func_t func, void *arg); static __inline int vcpu_rendezvous_pending(void *rendezvous_cookie) { return (*(uintptr_t *)rendezvous_cookie != 0); } static __inline int vcpu_suspended(void *suspend_cookie) { return (*(int *)suspend_cookie); } /* * Return 1 if device indicated by bus/slot/func is supposed to be a * pci passthrough device. * * Return 0 otherwise. */ int vmm_is_pptdev(int bus, int slot, int func); void *vm_iommu_domain(struct vm *vm); enum vcpu_state { VCPU_IDLE, VCPU_FROZEN, VCPU_RUNNING, VCPU_SLEEPING, }; int vcpu_set_state(struct vm *vm, int vcpu, enum vcpu_state state, bool from_idle); enum vcpu_state vcpu_get_state(struct vm *vm, int vcpu, int *hostcpu); static int __inline vcpu_is_running(struct vm *vm, int vcpu, int *hostcpu) { return (vcpu_get_state(vm, vcpu, hostcpu) == VCPU_RUNNING); } #ifdef _SYS_PROC_H_ static int __inline vcpu_should_yield(struct vm *vm, int vcpu) { return (curthread->td_flags & (TDF_ASTPENDING | TDF_NEEDRESCHED)); } #endif void *vcpu_stats(struct vm *vm, int vcpu); void vcpu_notify_event(struct vm *vm, int vcpuid, bool lapic_intr); struct vmspace *vm_get_vmspace(struct vm *vm); int vm_assign_pptdev(struct vm *vm, int bus, int slot, int func); int vm_unassign_pptdev(struct vm *vm, int bus, int slot, int func); struct vatpic *vm_atpic(struct vm *vm); struct vatpit *vm_atpit(struct vm *vm); struct vpmtmr *vm_pmtmr(struct vm *vm); +struct vrtc *vm_rtc(struct vm *vm); /* * Inject exception 'vme' into the guest vcpu. This function returns 0 on * success and non-zero on failure. * * Wrapper functions like 'vm_inject_gp()' should be preferred to calling * this function directly because they enforce the trap-like or fault-like * behavior of an exception. * * This function should only be called in the context of the thread that is * executing this vcpu. */ int vm_inject_exception(struct vm *vm, int vcpuid, struct vm_exception *vme); /* * This function is called after a VM-exit that occurred during exception or * interrupt delivery through the IDT. The format of 'intinfo' is described * in Figure 15-1, "EXITINTINFO for All Intercepts", APM, Vol 2. * * If a VM-exit handler completes the event delivery successfully then it * should call vm_exit_intinfo() to extinguish the pending event. For e.g., * if the task switch emulation is triggered via a task gate then it should * call this function with 'intinfo=0' to indicate that the external event * is not pending anymore. * * Return value is 0 on success and non-zero on failure. */ int vm_exit_intinfo(struct vm *vm, int vcpuid, uint64_t intinfo); /* * This function is called before every VM-entry to retrieve a pending * event that should be injected into the guest. This function combines * nested events into a double or triple fault. * * Returns 0 if there are no events that need to be injected into the guest * and non-zero otherwise. */ int vm_entry_intinfo(struct vm *vm, int vcpuid, uint64_t *info); int vm_get_intinfo(struct vm *vm, int vcpuid, uint64_t *info1, uint64_t *info2); enum vm_reg_name vm_segment_name(int seg_encoding); struct vm_copyinfo { uint64_t gpa; size_t len; void *hva; void *cookie; }; /* * Set up 'copyinfo[]' to copy to/from guest linear address space starting * at 'gla' and 'len' bytes long. The 'prot' should be set to PROT_READ for * a copyin or PROT_WRITE for a copyout. * * Returns 0 on success. * Returns 1 if an exception was injected into the guest. * Returns -1 otherwise. * * The 'copyinfo[]' can be passed to 'vm_copyin()' or 'vm_copyout()' only if * the return value is 0. The 'copyinfo[]' resources should be freed by calling * 'vm_copy_teardown()' after the copy is done. */ int vm_copy_setup(struct vm *vm, int vcpuid, struct vm_guest_paging *paging, uint64_t gla, size_t len, int prot, struct vm_copyinfo *copyinfo, int num_copyinfo); void vm_copy_teardown(struct vm *vm, int vcpuid, struct vm_copyinfo *copyinfo, int num_copyinfo); void vm_copyin(struct vm *vm, int vcpuid, struct vm_copyinfo *copyinfo, void *kaddr, size_t len); void vm_copyout(struct vm *vm, int vcpuid, const void *kaddr, struct vm_copyinfo *copyinfo, size_t len); int vcpu_trace_exceptions(struct vm *vm, int vcpuid); #endif /* KERNEL */ #define VM_MAXCPU 16 /* maximum virtual cpus */ /* * Identifiers for optional vmm capabilities */ enum vm_cap_type { VM_CAP_HALT_EXIT, VM_CAP_MTRAP_EXIT, VM_CAP_PAUSE_EXIT, VM_CAP_UNRESTRICTED_GUEST, VM_CAP_ENABLE_INVPCID, VM_CAP_MAX }; enum vm_intr_trigger { EDGE_TRIGGER, LEVEL_TRIGGER }; /* * The 'access' field has the format specified in Table 21-2 of the Intel * Architecture Manual vol 3b. * * XXX The contents of the 'access' field are architecturally defined except * bit 16 - Segment Unusable. */ struct seg_desc { uint64_t base; uint32_t limit; uint32_t access; }; #define SEG_DESC_TYPE(access) ((access) & 0x001f) #define SEG_DESC_DPL(access) (((access) >> 5) & 0x3) #define SEG_DESC_PRESENT(access) (((access) & 0x0080) ? 1 : 0) #define SEG_DESC_DEF32(access) (((access) & 0x4000) ? 1 : 0) #define SEG_DESC_GRANULARITY(access) (((access) & 0x8000) ? 1 : 0) #define SEG_DESC_UNUSABLE(access) (((access) & 0x10000) ? 1 : 0) enum vm_cpu_mode { CPU_MODE_REAL, CPU_MODE_PROTECTED, CPU_MODE_COMPATIBILITY, /* IA-32E mode (CS.L = 0) */ CPU_MODE_64BIT, /* IA-32E mode (CS.L = 1) */ }; enum vm_paging_mode { PAGING_MODE_FLAT, PAGING_MODE_32, PAGING_MODE_PAE, PAGING_MODE_64, }; struct vm_guest_paging { uint64_t cr3; int cpl; enum vm_cpu_mode cpu_mode; enum vm_paging_mode paging_mode; }; /* * The data structures 'vie' and 'vie_op' are meant to be opaque to the * consumers of instruction decoding. The only reason why their contents * need to be exposed is because they are part of the 'vm_exit' structure. */ struct vie_op { uint8_t op_byte; /* actual opcode byte */ uint8_t op_type; /* type of operation (e.g. MOV) */ uint16_t op_flags; }; #define VIE_INST_SIZE 15 struct vie { uint8_t inst[VIE_INST_SIZE]; /* instruction bytes */ uint8_t num_valid; /* size of the instruction */ uint8_t num_processed; uint8_t addrsize:4, opsize:4; /* address and operand sizes */ uint8_t rex_w:1, /* REX prefix */ rex_r:1, rex_x:1, rex_b:1, rex_present:1, opsize_override:1, /* Operand size override */ addrsize_override:1; /* Address size override */ uint8_t mod:2, /* ModRM byte */ reg:4, rm:4; uint8_t ss:2, /* SIB byte */ index:4, base:4; uint8_t disp_bytes; uint8_t imm_bytes; uint8_t scale; int base_register; /* VM_REG_GUEST_xyz */ int index_register; /* VM_REG_GUEST_xyz */ int64_t displacement; /* optional addr displacement */ int64_t immediate; /* optional immediate operand */ uint8_t decoded; /* set to 1 if successfully decoded */ struct vie_op op; /* opcode description */ }; enum vm_exitcode { VM_EXITCODE_INOUT, VM_EXITCODE_VMX, VM_EXITCODE_BOGUS, VM_EXITCODE_RDMSR, VM_EXITCODE_WRMSR, VM_EXITCODE_HLT, VM_EXITCODE_MTRAP, VM_EXITCODE_PAUSE, VM_EXITCODE_PAGING, VM_EXITCODE_INST_EMUL, VM_EXITCODE_SPINUP_AP, VM_EXITCODE_DEPRECATED1, /* used to be SPINDOWN_CPU */ VM_EXITCODE_RENDEZVOUS, VM_EXITCODE_IOAPIC_EOI, VM_EXITCODE_SUSPENDED, VM_EXITCODE_INOUT_STR, VM_EXITCODE_TASK_SWITCH, VM_EXITCODE_MONITOR, VM_EXITCODE_MWAIT, VM_EXITCODE_SVM, VM_EXITCODE_MAX }; struct vm_inout { uint16_t bytes:3; /* 1 or 2 or 4 */ uint16_t in:1; uint16_t string:1; uint16_t rep:1; uint16_t port; uint32_t eax; /* valid for out */ }; struct vm_inout_str { struct vm_inout inout; /* must be the first element */ struct vm_guest_paging paging; uint64_t rflags; uint64_t cr0; uint64_t index; uint64_t count; /* rep=1 (%rcx), rep=0 (1) */ int addrsize; enum vm_reg_name seg_name; struct seg_desc seg_desc; }; enum task_switch_reason { TSR_CALL, TSR_IRET, TSR_JMP, TSR_IDT_GATE, /* task gate in IDT */ }; struct vm_task_switch { uint16_t tsssel; /* new TSS selector */ int ext; /* task switch due to external event */ uint32_t errcode; int errcode_valid; /* push 'errcode' on the new stack */ enum task_switch_reason reason; struct vm_guest_paging paging; }; struct vm_exit { enum vm_exitcode exitcode; int inst_length; /* 0 means unknown */ uint64_t rip; union { struct vm_inout inout; struct vm_inout_str inout_str; struct { uint64_t gpa; int fault_type; } paging; struct { uint64_t gpa; uint64_t gla; int cs_d; /* CS.D */ struct vm_guest_paging paging; struct vie vie; } inst_emul; /* * VMX specific payload. Used when there is no "better" * exitcode to represent the VM-exit. */ struct { int status; /* vmx inst status */ /* * 'exit_reason' and 'exit_qualification' are valid * only if 'status' is zero. */ uint32_t exit_reason; uint64_t exit_qualification; /* * 'inst_error' and 'inst_type' are valid * only if 'status' is non-zero. */ int inst_type; int inst_error; } vmx; /* * SVM specific payload. */ struct { uint64_t exitcode; uint64_t exitinfo1; uint64_t exitinfo2; } svm; struct { uint32_t code; /* ecx value */ uint64_t wval; } msr; struct { int vcpu; uint64_t rip; } spinup_ap; struct { uint64_t rflags; } hlt; struct { int vector; } ioapic_eoi; struct { enum vm_suspend_how how; } suspended; struct vm_task_switch task_switch; } u; }; /* APIs to inject faults into the guest */ void vm_inject_fault(void *vm, int vcpuid, int vector, int errcode_valid, int errcode); static __inline void vm_inject_ud(void *vm, int vcpuid) { vm_inject_fault(vm, vcpuid, IDT_UD, 0, 0); } static __inline void vm_inject_gp(void *vm, int vcpuid) { vm_inject_fault(vm, vcpuid, IDT_GP, 1, 0); } static __inline void vm_inject_ac(void *vm, int vcpuid, int errcode) { vm_inject_fault(vm, vcpuid, IDT_AC, 1, errcode); } static __inline void vm_inject_ss(void *vm, int vcpuid, int errcode) { vm_inject_fault(vm, vcpuid, IDT_SS, 1, errcode); } void vm_inject_pf(void *vm, int vcpuid, int error_code, uint64_t cr2); #endif /* _VMM_H_ */ Index: head/sys/amd64/include/vmm_dev.h =================================================================== --- head/sys/amd64/include/vmm_dev.h (revision 276427) +++ head/sys/amd64/include/vmm_dev.h (revision 276428) @@ -1,339 +1,362 @@ /*- * Copyright (c) 2011 NetApp, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _VMM_DEV_H_ #define _VMM_DEV_H_ #ifdef _KERNEL void vmmdev_init(void); int vmmdev_cleanup(void); #endif struct vm_memory_segment { vm_paddr_t gpa; /* in */ size_t len; int wired; }; struct vm_register { int cpuid; int regnum; /* enum vm_reg_name */ uint64_t regval; }; struct vm_seg_desc { /* data or code segment */ int cpuid; int regnum; /* enum vm_reg_name */ struct seg_desc desc; }; struct vm_run { int cpuid; uint64_t rip; /* start running here */ struct vm_exit vm_exit; }; struct vm_exception { int cpuid; int vector; uint32_t error_code; int error_code_valid; }; struct vm_lapic_msi { uint64_t msg; uint64_t addr; }; struct vm_lapic_irq { int cpuid; int vector; }; struct vm_ioapic_irq { int irq; }; struct vm_isa_irq { int atpic_irq; int ioapic_irq; }; struct vm_isa_irq_trigger { int atpic_irq; enum vm_intr_trigger trigger; }; struct vm_capability { int cpuid; enum vm_cap_type captype; int capval; int allcpus; }; struct vm_pptdev { int bus; int slot; int func; }; struct vm_pptdev_mmio { int bus; int slot; int func; vm_paddr_t gpa; vm_paddr_t hpa; size_t len; }; struct vm_pptdev_msi { int vcpu; int bus; int slot; int func; int numvec; /* 0 means disabled */ uint64_t msg; uint64_t addr; }; struct vm_pptdev_msix { int vcpu; int bus; int slot; int func; int idx; uint64_t msg; uint32_t vector_control; uint64_t addr; }; struct vm_nmi { int cpuid; }; #define MAX_VM_STATS 64 struct vm_stats { int cpuid; /* in */ int num_entries; /* out */ struct timeval tv; uint64_t statbuf[MAX_VM_STATS]; }; struct vm_stat_desc { int index; /* in */ char desc[128]; /* out */ }; struct vm_x2apic { int cpuid; enum x2apic_state state; }; struct vm_gpa_pte { uint64_t gpa; /* in */ uint64_t pte[4]; /* out */ int ptenum; }; struct vm_hpet_cap { uint32_t capabilities; /* lower 32 bits of HPET capabilities */ }; struct vm_suspend { enum vm_suspend_how how; }; struct vm_gla2gpa { int vcpuid; /* inputs */ int prot; /* PROT_READ or PROT_WRITE */ uint64_t gla; struct vm_guest_paging paging; int fault; /* outputs */ uint64_t gpa; }; struct vm_activate_cpu { int vcpuid; }; struct vm_cpuset { int which; int cpusetsize; cpuset_t *cpus; }; #define VM_ACTIVE_CPUS 0 #define VM_SUSPENDED_CPUS 1 struct vm_intinfo { int vcpuid; uint64_t info1; uint64_t info2; }; +struct vm_rtc_time { + time_t secs; +}; + +struct vm_rtc_data { + int offset; + uint8_t value; +}; + enum { /* general routines */ IOCNUM_ABIVERS = 0, IOCNUM_RUN = 1, IOCNUM_SET_CAPABILITY = 2, IOCNUM_GET_CAPABILITY = 3, IOCNUM_SUSPEND = 4, IOCNUM_REINIT = 5, /* memory apis */ IOCNUM_MAP_MEMORY = 10, IOCNUM_GET_MEMORY_SEG = 11, IOCNUM_GET_GPA_PMAP = 12, IOCNUM_GLA2GPA = 13, /* register/state accessors */ IOCNUM_SET_REGISTER = 20, IOCNUM_GET_REGISTER = 21, IOCNUM_SET_SEGMENT_DESCRIPTOR = 22, IOCNUM_GET_SEGMENT_DESCRIPTOR = 23, /* interrupt injection */ IOCNUM_GET_INTINFO = 28, IOCNUM_SET_INTINFO = 29, IOCNUM_INJECT_EXCEPTION = 30, IOCNUM_LAPIC_IRQ = 31, IOCNUM_INJECT_NMI = 32, IOCNUM_IOAPIC_ASSERT_IRQ = 33, IOCNUM_IOAPIC_DEASSERT_IRQ = 34, IOCNUM_IOAPIC_PULSE_IRQ = 35, IOCNUM_LAPIC_MSI = 36, IOCNUM_LAPIC_LOCAL_IRQ = 37, IOCNUM_IOAPIC_PINCOUNT = 38, /* PCI pass-thru */ IOCNUM_BIND_PPTDEV = 40, IOCNUM_UNBIND_PPTDEV = 41, IOCNUM_MAP_PPTDEV_MMIO = 42, IOCNUM_PPTDEV_MSI = 43, IOCNUM_PPTDEV_MSIX = 44, /* statistics */ IOCNUM_VM_STATS = 50, IOCNUM_VM_STAT_DESC = 51, /* kernel device state */ IOCNUM_SET_X2APIC_STATE = 60, IOCNUM_GET_X2APIC_STATE = 61, IOCNUM_GET_HPET_CAPABILITIES = 62, /* legacy interrupt injection */ IOCNUM_ISA_ASSERT_IRQ = 80, IOCNUM_ISA_DEASSERT_IRQ = 81, IOCNUM_ISA_PULSE_IRQ = 82, IOCNUM_ISA_SET_IRQ_TRIGGER = 83, /* vm_cpuset */ IOCNUM_ACTIVATE_CPU = 90, IOCNUM_GET_CPUSET = 91, + + /* RTC */ + IOCNUM_RTC_READ = 100, + IOCNUM_RTC_WRITE = 101, + IOCNUM_RTC_SETTIME = 102, + IOCNUM_RTC_GETTIME = 103, }; #define VM_RUN \ _IOWR('v', IOCNUM_RUN, struct vm_run) #define VM_SUSPEND \ _IOW('v', IOCNUM_SUSPEND, struct vm_suspend) #define VM_REINIT \ _IO('v', IOCNUM_REINIT) #define VM_MAP_MEMORY \ _IOWR('v', IOCNUM_MAP_MEMORY, struct vm_memory_segment) #define VM_GET_MEMORY_SEG \ _IOWR('v', IOCNUM_GET_MEMORY_SEG, struct vm_memory_segment) #define VM_SET_REGISTER \ _IOW('v', IOCNUM_SET_REGISTER, struct vm_register) #define VM_GET_REGISTER \ _IOWR('v', IOCNUM_GET_REGISTER, struct vm_register) #define VM_SET_SEGMENT_DESCRIPTOR \ _IOW('v', IOCNUM_SET_SEGMENT_DESCRIPTOR, struct vm_seg_desc) #define VM_GET_SEGMENT_DESCRIPTOR \ _IOWR('v', IOCNUM_GET_SEGMENT_DESCRIPTOR, struct vm_seg_desc) #define VM_INJECT_EXCEPTION \ _IOW('v', IOCNUM_INJECT_EXCEPTION, struct vm_exception) #define VM_LAPIC_IRQ \ _IOW('v', IOCNUM_LAPIC_IRQ, struct vm_lapic_irq) #define VM_LAPIC_LOCAL_IRQ \ _IOW('v', IOCNUM_LAPIC_LOCAL_IRQ, struct vm_lapic_irq) #define VM_LAPIC_MSI \ _IOW('v', IOCNUM_LAPIC_MSI, struct vm_lapic_msi) #define VM_IOAPIC_ASSERT_IRQ \ _IOW('v', IOCNUM_IOAPIC_ASSERT_IRQ, struct vm_ioapic_irq) #define VM_IOAPIC_DEASSERT_IRQ \ _IOW('v', IOCNUM_IOAPIC_DEASSERT_IRQ, struct vm_ioapic_irq) #define VM_IOAPIC_PULSE_IRQ \ _IOW('v', IOCNUM_IOAPIC_PULSE_IRQ, struct vm_ioapic_irq) #define VM_IOAPIC_PINCOUNT \ _IOR('v', IOCNUM_IOAPIC_PINCOUNT, int) #define VM_ISA_ASSERT_IRQ \ _IOW('v', IOCNUM_ISA_ASSERT_IRQ, struct vm_isa_irq) #define VM_ISA_DEASSERT_IRQ \ _IOW('v', IOCNUM_ISA_DEASSERT_IRQ, struct vm_isa_irq) #define VM_ISA_PULSE_IRQ \ _IOW('v', IOCNUM_ISA_PULSE_IRQ, struct vm_isa_irq) #define VM_ISA_SET_IRQ_TRIGGER \ _IOW('v', IOCNUM_ISA_SET_IRQ_TRIGGER, struct vm_isa_irq_trigger) #define VM_SET_CAPABILITY \ _IOW('v', IOCNUM_SET_CAPABILITY, struct vm_capability) #define VM_GET_CAPABILITY \ _IOWR('v', IOCNUM_GET_CAPABILITY, struct vm_capability) #define VM_BIND_PPTDEV \ _IOW('v', IOCNUM_BIND_PPTDEV, struct vm_pptdev) #define VM_UNBIND_PPTDEV \ _IOW('v', IOCNUM_UNBIND_PPTDEV, struct vm_pptdev) #define VM_MAP_PPTDEV_MMIO \ _IOW('v', IOCNUM_MAP_PPTDEV_MMIO, struct vm_pptdev_mmio) #define VM_PPTDEV_MSI \ _IOW('v', IOCNUM_PPTDEV_MSI, struct vm_pptdev_msi) #define VM_PPTDEV_MSIX \ _IOW('v', IOCNUM_PPTDEV_MSIX, struct vm_pptdev_msix) #define VM_INJECT_NMI \ _IOW('v', IOCNUM_INJECT_NMI, struct vm_nmi) #define VM_STATS \ _IOWR('v', IOCNUM_VM_STATS, struct vm_stats) #define VM_STAT_DESC \ _IOWR('v', IOCNUM_VM_STAT_DESC, struct vm_stat_desc) #define VM_SET_X2APIC_STATE \ _IOW('v', IOCNUM_SET_X2APIC_STATE, struct vm_x2apic) #define VM_GET_X2APIC_STATE \ _IOWR('v', IOCNUM_GET_X2APIC_STATE, struct vm_x2apic) #define VM_GET_HPET_CAPABILITIES \ _IOR('v', IOCNUM_GET_HPET_CAPABILITIES, struct vm_hpet_cap) #define VM_GET_GPA_PMAP \ _IOWR('v', IOCNUM_GET_GPA_PMAP, struct vm_gpa_pte) #define VM_GLA2GPA \ _IOWR('v', IOCNUM_GLA2GPA, struct vm_gla2gpa) #define VM_ACTIVATE_CPU \ _IOW('v', IOCNUM_ACTIVATE_CPU, struct vm_activate_cpu) #define VM_GET_CPUS \ _IOW('v', IOCNUM_GET_CPUSET, struct vm_cpuset) #define VM_SET_INTINFO \ _IOW('v', IOCNUM_SET_INTINFO, struct vm_intinfo) #define VM_GET_INTINFO \ _IOWR('v', IOCNUM_GET_INTINFO, struct vm_intinfo) +#define VM_RTC_WRITE \ + _IOW('v', IOCNUM_RTC_WRITE, struct vm_rtc_data) +#define VM_RTC_READ \ + _IOWR('v', IOCNUM_RTC_READ, struct vm_rtc_data) +#define VM_RTC_SETTIME \ + _IOW('v', IOCNUM_RTC_SETTIME, struct vm_rtc_time) +#define VM_RTC_GETTIME \ + _IOR('v', IOCNUM_RTC_GETTIME, struct vm_rtc_time) #endif Index: head/sys/amd64/vmm/io/vhpet.c =================================================================== --- head/sys/amd64/vmm/io/vhpet.c (revision 276427) +++ head/sys/amd64/vmm/io/vhpet.c (revision 276428) @@ -1,812 +1,760 @@ /*- * Copyright (c) 2013 Tycho Nightingale * Copyright (c) 2013 Neel Natu * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include "vmm_lapic.h" #include "vatpic.h" #include "vioapic.h" #include "vhpet.h" #include "vmm_ktr.h" static MALLOC_DEFINE(M_VHPET, "vhpet", "bhyve virtual hpet"); #define HPET_FREQ 10000000 /* 10.0 Mhz */ #define FS_PER_S 1000000000000000ul /* Timer N Configuration and Capabilities Register */ #define HPET_TCAP_RO_MASK (HPET_TCAP_INT_ROUTE | \ HPET_TCAP_FSB_INT_DEL | \ HPET_TCAP_SIZE | \ HPET_TCAP_PER_INT) /* * HPET requires at least 3 timers and up to 32 timers per block. */ #define VHPET_NUM_TIMERS 8 CTASSERT(VHPET_NUM_TIMERS >= 3 && VHPET_NUM_TIMERS <= 32); struct vhpet_callout_arg { struct vhpet *vhpet; int timer_num; }; struct vhpet { struct vm *vm; struct mtx mtx; sbintime_t freq_sbt; uint64_t config; /* Configuration */ uint64_t isr; /* Interrupt Status */ uint32_t countbase; /* HPET counter base value */ sbintime_t countbase_sbt; /* uptime corresponding to base value */ struct { uint64_t cap_config; /* Configuration */ uint64_t msireg; /* FSB interrupt routing */ uint32_t compval; /* Comparator */ uint32_t comprate; struct callout callout; sbintime_t callout_sbt; /* time when counter==compval */ struct vhpet_callout_arg arg; } timer[VHPET_NUM_TIMERS]; }; #define VHPET_LOCK(vhp) mtx_lock(&((vhp)->mtx)) #define VHPET_UNLOCK(vhp) mtx_unlock(&((vhp)->mtx)) static void vhpet_start_timer(struct vhpet *vhpet, int n, uint32_t counter, sbintime_t now); static uint64_t vhpet_capabilities(void) { uint64_t cap = 0; cap |= 0x8086 << 16; /* vendor id */ - cap |= HPET_CAP_LEG_RT; /* legacy routing capable */ cap |= (VHPET_NUM_TIMERS - 1) << 8; /* number of timers */ cap |= 1; /* revision */ cap &= ~HPET_CAP_COUNT_SIZE; /* 32-bit timer */ cap &= 0xffffffff; cap |= (FS_PER_S / HPET_FREQ) << 32; /* tick period in fs */ return (cap); } static __inline bool vhpet_counter_enabled(struct vhpet *vhpet) { return ((vhpet->config & HPET_CNF_ENABLE) ? true : false); } static __inline bool vhpet_timer_msi_enabled(struct vhpet *vhpet, int n) { const uint64_t msi_enable = HPET_TCAP_FSB_INT_DEL | HPET_TCNF_FSB_EN; - /* - * LegacyReplacement Route configuration takes precedence over MSI - * for timers 0 and 1. - */ - if (n == 0 || n == 1) { - if (vhpet->config & HPET_CNF_LEG_RT) - return (false); - } - if ((vhpet->timer[n].cap_config & msi_enable) == msi_enable) return (true); else return (false); } static __inline int vhpet_timer_ioapic_pin(struct vhpet *vhpet, int n) { /* * If the timer is configured to use MSI then treat it as if the * timer is not connected to the ioapic. */ if (vhpet_timer_msi_enabled(vhpet, n)) return (0); - if (vhpet->config & HPET_CNF_LEG_RT) { - /* - * In "legacy routing" timers 0 and 1 are connected to - * ioapic pins 2 and 8 respectively. - */ - switch (n) { - case 0: - return (2); - case 1: - return (8); - } - } - return ((vhpet->timer[n].cap_config & HPET_TCNF_INT_ROUTE) >> 9); } -static __inline int -vhpet_timer_atpic_pin(struct vhpet *vhpet, int n) -{ - if (vhpet->config & HPET_CNF_LEG_RT) { - /* - * In "legacy routing" timers 0 and 1 are connected to - * 8259 master pin 0 and slave pin 0 respectively. - */ - switch (n) { - case 0: - return (0); - case 1: - return (8); - } - } - - return (-1); -} - static uint32_t vhpet_counter(struct vhpet *vhpet, sbintime_t *nowptr) { uint32_t val; sbintime_t now, delta; val = vhpet->countbase; if (vhpet_counter_enabled(vhpet)) { now = sbinuptime(); delta = now - vhpet->countbase_sbt; KASSERT(delta >= 0, ("vhpet_counter: uptime went backwards: " "%#lx to %#lx", vhpet->countbase_sbt, now)); val += delta / vhpet->freq_sbt; if (nowptr != NULL) *nowptr = now; } else { /* * The sbinuptime corresponding to the 'countbase' is * meaningless when the counter is disabled. Make sure * that the the caller doesn't want to use it. */ KASSERT(nowptr == NULL, ("vhpet_counter: nowptr must be NULL")); } return (val); } static void vhpet_timer_clear_isr(struct vhpet *vhpet, int n) { - int pin, legacy_pin; + int pin; if (vhpet->isr & (1 << n)) { pin = vhpet_timer_ioapic_pin(vhpet, n); KASSERT(pin != 0, ("vhpet timer %d irq incorrectly routed", n)); vioapic_deassert_irq(vhpet->vm, pin); - - legacy_pin = vhpet_timer_atpic_pin(vhpet, n); - if (legacy_pin != -1) - vatpic_deassert_irq(vhpet->vm, legacy_pin); - vhpet->isr &= ~(1 << n); } } static __inline bool vhpet_periodic_timer(struct vhpet *vhpet, int n) { return ((vhpet->timer[n].cap_config & HPET_TCNF_TYPE) != 0); } static __inline bool vhpet_timer_interrupt_enabled(struct vhpet *vhpet, int n) { return ((vhpet->timer[n].cap_config & HPET_TCNF_INT_ENB) != 0); } static __inline bool vhpet_timer_edge_trig(struct vhpet *vhpet, int n) { KASSERT(!vhpet_timer_msi_enabled(vhpet, n), ("vhpet_timer_edge_trig: " "timer %d is using MSI", n)); - /* The legacy replacement interrupts are always edge triggered */ - if (vhpet->config & HPET_CNF_LEG_RT) { - if (n == 0 || n == 1) - return (true); - } - if ((vhpet->timer[n].cap_config & HPET_TCNF_INT_TYPE) == 0) return (true); else return (false); } static void vhpet_timer_interrupt(struct vhpet *vhpet, int n) { - int pin, legacy_pin; + int pin; /* If interrupts are not enabled for this timer then just return. */ if (!vhpet_timer_interrupt_enabled(vhpet, n)) return; /* * If a level triggered interrupt is already asserted then just return. */ if ((vhpet->isr & (1 << n)) != 0) { VM_CTR1(vhpet->vm, "hpet t%d intr is already asserted", n); return; } if (vhpet_timer_msi_enabled(vhpet, n)) { lapic_intr_msi(vhpet->vm, vhpet->timer[n].msireg >> 32, vhpet->timer[n].msireg & 0xffffffff); return; } pin = vhpet_timer_ioapic_pin(vhpet, n); if (pin == 0) { VM_CTR1(vhpet->vm, "hpet t%d intr is not routed to ioapic", n); return; } - legacy_pin = vhpet_timer_atpic_pin(vhpet, n); - if (vhpet_timer_edge_trig(vhpet, n)) { vioapic_pulse_irq(vhpet->vm, pin); - if (legacy_pin != -1) - vatpic_pulse_irq(vhpet->vm, legacy_pin); } else { vhpet->isr |= 1 << n; vioapic_assert_irq(vhpet->vm, pin); - if (legacy_pin != -1) - vatpic_assert_irq(vhpet->vm, legacy_pin); } } static void vhpet_adjust_compval(struct vhpet *vhpet, int n, uint32_t counter) { uint32_t compval, comprate, compnext; KASSERT(vhpet->timer[n].comprate != 0, ("hpet t%d is not periodic", n)); compval = vhpet->timer[n].compval; comprate = vhpet->timer[n].comprate; /* * Calculate the comparator value to be used for the next periodic * interrupt. * * This function is commonly called from the callout handler. * In this scenario the 'counter' is ahead of 'compval'. To find * the next value to program into the accumulator we divide the * number space between 'compval' and 'counter' into 'comprate' * sized units. The 'compval' is rounded up such that is "ahead" * of 'counter'. */ compnext = compval + ((counter - compval) / comprate + 1) * comprate; vhpet->timer[n].compval = compnext; } static void vhpet_handler(void *a) { int n; uint32_t counter; sbintime_t now; struct vhpet *vhpet; struct callout *callout; struct vhpet_callout_arg *arg; arg = a; vhpet = arg->vhpet; n = arg->timer_num; callout = &vhpet->timer[n].callout; VM_CTR1(vhpet->vm, "hpet t%d fired", n); VHPET_LOCK(vhpet); if (callout_pending(callout)) /* callout was reset */ goto done; if (!callout_active(callout)) /* callout was stopped */ goto done; callout_deactivate(callout); if (!vhpet_counter_enabled(vhpet)) panic("vhpet(%p) callout with counter disabled", vhpet); counter = vhpet_counter(vhpet, &now); vhpet_start_timer(vhpet, n, counter, now); vhpet_timer_interrupt(vhpet, n); done: VHPET_UNLOCK(vhpet); return; } static void vhpet_stop_timer(struct vhpet *vhpet, int n, sbintime_t now) { VM_CTR1(vhpet->vm, "hpet t%d stopped", n); callout_stop(&vhpet->timer[n].callout); /* * If the callout was scheduled to expire in the past but hasn't * had a chance to execute yet then trigger the timer interrupt * here. Failing to do so will result in a missed timer interrupt * in the guest. This is especially bad in one-shot mode because * the next interrupt has to wait for the counter to wrap around. */ if (vhpet->timer[n].callout_sbt < now) { VM_CTR1(vhpet->vm, "hpet t%d interrupt triggered after " "stopping timer", n); vhpet_timer_interrupt(vhpet, n); } } static void vhpet_start_timer(struct vhpet *vhpet, int n, uint32_t counter, sbintime_t now) { sbintime_t delta, precision; if (vhpet->timer[n].comprate != 0) vhpet_adjust_compval(vhpet, n, counter); else { /* * In one-shot mode it is the guest's responsibility to make * sure that the comparator value is not in the "past". The * hardware doesn't have any belt-and-suspenders to deal with * this so we don't either. */ } delta = (vhpet->timer[n].compval - counter) * vhpet->freq_sbt; precision = delta >> tc_precexp; vhpet->timer[n].callout_sbt = now + delta; callout_reset_sbt(&vhpet->timer[n].callout, vhpet->timer[n].callout_sbt, precision, vhpet_handler, &vhpet->timer[n].arg, C_ABSOLUTE); } static void vhpet_start_counting(struct vhpet *vhpet) { int i; vhpet->countbase_sbt = sbinuptime(); for (i = 0; i < VHPET_NUM_TIMERS; i++) { /* * Restart the timers based on the value of the main counter * when it stopped counting. */ vhpet_start_timer(vhpet, i, vhpet->countbase, vhpet->countbase_sbt); } } static void vhpet_stop_counting(struct vhpet *vhpet, uint32_t counter, sbintime_t now) { int i; vhpet->countbase = counter; for (i = 0; i < VHPET_NUM_TIMERS; i++) vhpet_stop_timer(vhpet, i, now); } static __inline void update_register(uint64_t *regptr, uint64_t data, uint64_t mask) { *regptr &= ~mask; *regptr |= (data & mask); } static void vhpet_timer_update_config(struct vhpet *vhpet, int n, uint64_t data, uint64_t mask) { bool clear_isr; int old_pin, new_pin; uint32_t allowed_irqs; uint64_t oldval, newval; if (vhpet_timer_msi_enabled(vhpet, n) || vhpet_timer_edge_trig(vhpet, n)) { if (vhpet->isr & (1 << n)) panic("vhpet timer %d isr should not be asserted", n); } old_pin = vhpet_timer_ioapic_pin(vhpet, n); oldval = vhpet->timer[n].cap_config; newval = oldval; update_register(&newval, data, mask); newval &= ~(HPET_TCAP_RO_MASK | HPET_TCNF_32MODE); newval |= oldval & HPET_TCAP_RO_MASK; if (newval == oldval) return; vhpet->timer[n].cap_config = newval; VM_CTR2(vhpet->vm, "hpet t%d cap_config set to 0x%016x", n, newval); /* * Validate the interrupt routing in the HPET_TCNF_INT_ROUTE field. * If it does not match the bits set in HPET_TCAP_INT_ROUTE then set * it to the default value of 0. */ allowed_irqs = vhpet->timer[n].cap_config >> 32; new_pin = vhpet_timer_ioapic_pin(vhpet, n); if (new_pin != 0 && (allowed_irqs & (1 << new_pin)) == 0) { VM_CTR3(vhpet->vm, "hpet t%d configured invalid irq %d, " "allowed_irqs 0x%08x", n, new_pin, allowed_irqs); new_pin = 0; vhpet->timer[n].cap_config &= ~HPET_TCNF_INT_ROUTE; } if (!vhpet_periodic_timer(vhpet, n)) vhpet->timer[n].comprate = 0; /* * If the timer's ISR bit is set then clear it in the following cases: * - interrupt is disabled * - interrupt type is changed from level to edge or fsb. * - interrupt routing is changed * * This is to ensure that this timer's level triggered interrupt does * not remain asserted forever. */ if (vhpet->isr & (1 << n)) { KASSERT(old_pin != 0, ("timer %d isr asserted to ioapic pin %d", n, old_pin)); if (!vhpet_timer_interrupt_enabled(vhpet, n)) clear_isr = true; else if (vhpet_timer_msi_enabled(vhpet, n)) clear_isr = true; else if (vhpet_timer_edge_trig(vhpet, n)) clear_isr = true; else if (vhpet_timer_ioapic_pin(vhpet, n) != old_pin) clear_isr = true; else clear_isr = false; if (clear_isr) { VM_CTR1(vhpet->vm, "hpet t%d isr cleared due to " "configuration change", n); vioapic_deassert_irq(vhpet->vm, old_pin); vhpet->isr &= ~(1 << n); } } } int vhpet_mmio_write(void *vm, int vcpuid, uint64_t gpa, uint64_t val, int size, void *arg) { struct vhpet *vhpet; uint64_t data, mask, oldval, val64; uint32_t isr_clear_mask, old_compval, old_comprate, counter; sbintime_t now, *nowptr; int i, offset; vhpet = vm_hpet(vm); offset = gpa - VHPET_BASE; VHPET_LOCK(vhpet); /* Accesses to the HPET should be 4 or 8 bytes wide */ switch (size) { case 8: mask = 0xffffffffffffffff; data = val; break; case 4: mask = 0xffffffff; data = val; if ((offset & 0x4) != 0) { mask <<= 32; data <<= 32; } break; default: VM_CTR2(vhpet->vm, "hpet invalid mmio write: " "offset 0x%08x, size %d", offset, size); goto done; } /* Access to the HPET should be naturally aligned to its width */ if (offset & (size - 1)) { VM_CTR2(vhpet->vm, "hpet invalid mmio write: " "offset 0x%08x, size %d", offset, size); goto done; } if (offset == HPET_CONFIG || offset == HPET_CONFIG + 4) { /* * Get the most recent value of the counter before updating * the 'config' register. If the HPET is going to be disabled * then we need to update 'countbase' with the value right * before it is disabled. */ nowptr = vhpet_counter_enabled(vhpet) ? &now : NULL; counter = vhpet_counter(vhpet, nowptr); oldval = vhpet->config; update_register(&vhpet->config, data, mask); + + /* + * LegacyReplacement Routing is not supported so clear the + * bit explicitly. + */ + vhpet->config &= ~HPET_CNF_LEG_RT; + if ((oldval ^ vhpet->config) & HPET_CNF_ENABLE) { if (vhpet_counter_enabled(vhpet)) { vhpet_start_counting(vhpet); VM_CTR0(vhpet->vm, "hpet enabled"); } else { vhpet_stop_counting(vhpet, counter, now); VM_CTR0(vhpet->vm, "hpet disabled"); } } goto done; } if (offset == HPET_ISR || offset == HPET_ISR + 4) { isr_clear_mask = vhpet->isr & data; for (i = 0; i < VHPET_NUM_TIMERS; i++) { if ((isr_clear_mask & (1 << i)) != 0) { VM_CTR1(vhpet->vm, "hpet t%d isr cleared", i); vhpet_timer_clear_isr(vhpet, i); } } goto done; } if (offset == HPET_MAIN_COUNTER || offset == HPET_MAIN_COUNTER + 4) { /* Zero-extend the counter to 64-bits before updating it */ val64 = vhpet_counter(vhpet, NULL); update_register(&val64, data, mask); vhpet->countbase = val64; if (vhpet_counter_enabled(vhpet)) vhpet_start_counting(vhpet); goto done; } for (i = 0; i < VHPET_NUM_TIMERS; i++) { if (offset == HPET_TIMER_CAP_CNF(i) || offset == HPET_TIMER_CAP_CNF(i) + 4) { vhpet_timer_update_config(vhpet, i, data, mask); break; } if (offset == HPET_TIMER_COMPARATOR(i) || offset == HPET_TIMER_COMPARATOR(i) + 4) { old_compval = vhpet->timer[i].compval; old_comprate = vhpet->timer[i].comprate; if (vhpet_periodic_timer(vhpet, i)) { /* * In periodic mode writes to the comparator * change the 'compval' register only if the * HPET_TCNF_VAL_SET bit is set in the config * register. */ val64 = vhpet->timer[i].comprate; update_register(&val64, data, mask); vhpet->timer[i].comprate = val64; if ((vhpet->timer[i].cap_config & HPET_TCNF_VAL_SET) != 0) { vhpet->timer[i].compval = val64; } } else { KASSERT(vhpet->timer[i].comprate == 0, ("vhpet one-shot timer %d has invalid " "rate %u", i, vhpet->timer[i].comprate)); val64 = vhpet->timer[i].compval; update_register(&val64, data, mask); vhpet->timer[i].compval = val64; } vhpet->timer[i].cap_config &= ~HPET_TCNF_VAL_SET; if (vhpet->timer[i].compval != old_compval || vhpet->timer[i].comprate != old_comprate) { if (vhpet_counter_enabled(vhpet)) { counter = vhpet_counter(vhpet, &now); vhpet_start_timer(vhpet, i, counter, now); } } break; } if (offset == HPET_TIMER_FSB_VAL(i) || offset == HPET_TIMER_FSB_ADDR(i)) { update_register(&vhpet->timer[i].msireg, data, mask); break; } } done: VHPET_UNLOCK(vhpet); return (0); } int vhpet_mmio_read(void *vm, int vcpuid, uint64_t gpa, uint64_t *rval, int size, void *arg) { int i, offset; struct vhpet *vhpet; uint64_t data; vhpet = vm_hpet(vm); offset = gpa - VHPET_BASE; VHPET_LOCK(vhpet); /* Accesses to the HPET should be 4 or 8 bytes wide */ if (size != 4 && size != 8) { VM_CTR2(vhpet->vm, "hpet invalid mmio read: " "offset 0x%08x, size %d", offset, size); data = 0; goto done; } /* Access to the HPET should be naturally aligned to its width */ if (offset & (size - 1)) { VM_CTR2(vhpet->vm, "hpet invalid mmio read: " "offset 0x%08x, size %d", offset, size); data = 0; goto done; } if (offset == HPET_CAPABILITIES || offset == HPET_CAPABILITIES + 4) { data = vhpet_capabilities(); goto done; } if (offset == HPET_CONFIG || offset == HPET_CONFIG + 4) { data = vhpet->config; goto done; } if (offset == HPET_ISR || offset == HPET_ISR + 4) { data = vhpet->isr; goto done; } if (offset == HPET_MAIN_COUNTER || offset == HPET_MAIN_COUNTER + 4) { data = vhpet_counter(vhpet, NULL); goto done; } for (i = 0; i < VHPET_NUM_TIMERS; i++) { if (offset == HPET_TIMER_CAP_CNF(i) || offset == HPET_TIMER_CAP_CNF(i) + 4) { data = vhpet->timer[i].cap_config; break; } if (offset == HPET_TIMER_COMPARATOR(i) || offset == HPET_TIMER_COMPARATOR(i) + 4) { data = vhpet->timer[i].compval; break; } if (offset == HPET_TIMER_FSB_VAL(i) || offset == HPET_TIMER_FSB_ADDR(i)) { data = vhpet->timer[i].msireg; break; } } if (i >= VHPET_NUM_TIMERS) data = 0; done: VHPET_UNLOCK(vhpet); if (size == 4) { if (offset & 0x4) data >>= 32; } *rval = data; return (0); } struct vhpet * vhpet_init(struct vm *vm) { int i, pincount; struct vhpet *vhpet; uint64_t allowed_irqs; struct vhpet_callout_arg *arg; struct bintime bt; vhpet = malloc(sizeof(struct vhpet), M_VHPET, M_WAITOK | M_ZERO); vhpet->vm = vm; mtx_init(&vhpet->mtx, "vhpet lock", NULL, MTX_DEF); FREQ2BT(HPET_FREQ, &bt); vhpet->freq_sbt = bttosbt(bt); pincount = vioapic_pincount(vm); if (pincount >= 24) allowed_irqs = 0x00f00000; /* irqs 20, 21, 22 and 23 */ else allowed_irqs = 0; /* * Initialize HPET timer hardware state. */ for (i = 0; i < VHPET_NUM_TIMERS; i++) { vhpet->timer[i].cap_config = allowed_irqs << 32; vhpet->timer[i].cap_config |= HPET_TCAP_PER_INT; vhpet->timer[i].cap_config |= HPET_TCAP_FSB_INT_DEL; vhpet->timer[i].compval = 0xffffffff; callout_init(&vhpet->timer[i].callout, 1); arg = &vhpet->timer[i].arg; arg->vhpet = vhpet; arg->timer_num = i; } return (vhpet); } void vhpet_cleanup(struct vhpet *vhpet) { int i; for (i = 0; i < VHPET_NUM_TIMERS; i++) callout_drain(&vhpet->timer[i].callout); free(vhpet, M_VHPET); } int vhpet_getcap(struct vm_hpet_cap *cap) { cap->capabilities = vhpet_capabilities(); return (0); } Index: head/sys/amd64/vmm/io/vrtc.c =================================================================== --- head/sys/amd64/vmm/io/vrtc.c (nonexistent) +++ head/sys/amd64/vmm/io/vrtc.c (revision 276428) @@ -0,0 +1,952 @@ +/*- + * Copyright (c) 2014, Neel Natu (neel@freebsd.org) + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice unmodified, this list of conditions, and the following + * disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include + +#include "vmm_ktr.h" +#include "vatpic.h" +#include "vioapic.h" +#include "vrtc.h" + +/* Register layout of the RTC */ +struct rtcdev { + uint8_t sec; + uint8_t alarm_sec; + uint8_t min; + uint8_t alarm_min; + uint8_t hour; + uint8_t alarm_hour; + uint8_t day_of_week; + uint8_t day_of_month; + uint8_t month; + uint8_t year; + uint8_t reg_a; + uint8_t reg_b; + uint8_t reg_c; + uint8_t reg_d; + uint8_t nvram[128 - 14]; +} __packed; +CTASSERT(sizeof(struct rtcdev) == 128); + +struct vrtc { + struct vm *vm; + struct mtx mtx; + struct callout callout; + u_int addr; /* RTC register to read or write */ + sbintime_t base_uptime; + time_t base_rtctime; + struct rtcdev rtcdev; +}; + +#define VRTC_LOCK(vrtc) mtx_lock(&((vrtc)->mtx)) +#define VRTC_UNLOCK(vrtc) mtx_unlock(&((vrtc)->mtx)) +#define VRTC_LOCKED(vrtc) mtx_owned(&((vrtc)->mtx)) + +/* + * RTC time is considered "broken" if: + * - RTC updates are halted by the guest + * - RTC date/time fields have invalid values + */ +#define VRTC_BROKEN_TIME ((time_t)-1) + +#define RTC_IRQ 8 +#define RTCSB_BIN 0x04 +#define RTCSB_ALL_INTRS (RTCSB_UINTR | RTCSB_AINTR | RTCSB_PINTR) +#define rtc_halted(vrtc) ((vrtc->rtcdev.reg_b & RTCSB_HALT) != 0) +#define aintr_enabled(vrtc) (((vrtc)->rtcdev.reg_b & RTCSB_AINTR) != 0) +#define pintr_enabled(vrtc) (((vrtc)->rtcdev.reg_b & RTCSB_PINTR) != 0) +#define uintr_enabled(vrtc) (((vrtc)->rtcdev.reg_b & RTCSB_UINTR) != 0) + +static void vrtc_callout_handler(void *arg); +static void vrtc_set_reg_c(struct vrtc *vrtc, uint8_t newval); + +static MALLOC_DEFINE(M_VRTC, "vrtc", "bhyve virtual rtc"); + +SYSCTL_DECL(_hw_vmm); +SYSCTL_NODE(_hw_vmm, OID_AUTO, vrtc, CTLFLAG_RW, NULL, NULL); + +static int rtc_flag_broken_time = 1; +SYSCTL_INT(_hw_vmm_vrtc, OID_AUTO, flag_broken_time, CTLFLAG_RDTUN, + &rtc_flag_broken_time, 0, "Stop guest when invalid RTC time is detected"); + +static __inline bool +divider_enabled(int reg_a) +{ + /* + * The RTC is counting only when dividers are not held in reset. + */ + return ((reg_a & 0x70) == 0x20); +} + +static __inline bool +update_enabled(struct vrtc *vrtc) +{ + /* + * RTC date/time can be updated only if: + * - divider is not held in reset + * - guest has not disabled updates + * - the date/time fields have valid contents + */ + if (!divider_enabled(vrtc->rtcdev.reg_a)) + return (false); + + if (rtc_halted(vrtc)) + return (false); + + if (vrtc->base_rtctime == VRTC_BROKEN_TIME) + return (false); + + return (true); +} + +static time_t +vrtc_curtime(struct vrtc *vrtc) +{ + sbintime_t now, delta; + time_t t; + + KASSERT(VRTC_LOCKED(vrtc), ("%s: vrtc not locked", __func__)); + + t = vrtc->base_rtctime; + if (update_enabled(vrtc)) { + now = sbinuptime(); + delta = now - vrtc->base_uptime; + KASSERT(delta >= 0, ("vrtc_curtime: uptime went backwards: " + "%#lx to %#lx", vrtc->base_uptime, now)); + t += delta / SBT_1S; + } + return (t); +} + +static __inline uint8_t +rtcset(struct rtcdev *rtc, int val) +{ + + KASSERT(val >= 0 && val < 100, ("%s: invalid bin2bcd index %d", + __func__, val)); + + return ((rtc->reg_b & RTCSB_BIN) ? val : bin2bcd_data[val]); +} + +static void +secs_to_rtc(time_t rtctime, struct vrtc *vrtc, int force_update) +{ + struct clocktime ct; + struct timespec ts; + struct rtcdev *rtc; + int hour; + + KASSERT(VRTC_LOCKED(vrtc), ("%s: vrtc not locked", __func__)); + + if (rtctime < 0) { + KASSERT(rtctime == VRTC_BROKEN_TIME, + ("%s: invalid vrtc time %#lx", __func__, rtctime)); + return; + } + + /* + * If the RTC is halted then the guest has "ownership" of the + * date/time fields. Don't update the RTC date/time fields in + * this case (unless forced). + */ + if (rtc_halted(vrtc) && !force_update) + return; + + ts.tv_sec = rtctime; + ts.tv_nsec = 0; + clock_ts_to_ct(&ts, &ct); + + KASSERT(ct.sec >= 0 && ct.sec <= 59, ("invalid clocktime sec %d", + ct.sec)); + KASSERT(ct.min >= 0 && ct.min <= 59, ("invalid clocktime min %d", + ct.min)); + KASSERT(ct.hour >= 0 && ct.hour <= 23, ("invalid clocktime hour %d", + ct.hour)); + KASSERT(ct.dow >= 0 && ct.dow <= 6, ("invalid clocktime wday %d", + ct.dow)); + KASSERT(ct.day >= 1 && ct.day <= 31, ("invalid clocktime mday %d", + ct.day)); + KASSERT(ct.mon >= 1 && ct.mon <= 12, ("invalid clocktime month %d", + ct.mon)); + KASSERT(ct.year >= POSIX_BASE_YEAR, ("invalid clocktime year %d", + ct.year)); + + rtc = &vrtc->rtcdev; + rtc->sec = rtcset(rtc, ct.sec); + rtc->min = rtcset(rtc, ct.min); + + hour = ct.hour; + if ((rtc->reg_b & RTCSB_24HR) == 0) + hour = (hour % 12) + 1; /* convert to a 12-hour format */ + + rtc->hour = rtcset(rtc, hour); + + if ((rtc->reg_b & RTCSB_24HR) == 0 && ct.hour >= 12) + rtc->hour |= 0x80; /* set MSB to indicate PM */ + + rtc->day_of_week = rtcset(rtc, ct.dow + 1); + rtc->day_of_month = rtcset(rtc, ct.day); + rtc->month = rtcset(rtc, ct.mon); + rtc->year = rtcset(rtc, ct.year % 100); +} + +static int +rtcget(struct rtcdev *rtc, int val, int *retval) +{ + uint8_t upper, lower; + + if (rtc->reg_b & RTCSB_BIN) { + *retval = val; + return (0); + } + + lower = val & 0xf; + upper = (val >> 4) & 0xf; + + if (lower > 9 || upper > 9) + return (-1); + + *retval = upper * 10 + lower; + return (0); +} + +static time_t +rtc_to_secs(struct vrtc *vrtc) +{ + struct clocktime ct; + struct timespec ts; + struct rtcdev *rtc; + struct vm *vm; + int error, hour, pm, year; + + KASSERT(VRTC_LOCKED(vrtc), ("%s: vrtc not locked", __func__)); + + vm = vrtc->vm; + rtc = &vrtc->rtcdev; + + bzero(&ct, sizeof(struct clocktime)); + + error = rtcget(rtc, rtc->sec, &ct.sec); + if (error || ct.sec < 0 || ct.sec > 59) { + VM_CTR2(vm, "Invalid RTC sec %#x/%d", rtc->sec, ct.sec); + goto fail; + } + + error = rtcget(rtc, rtc->min, &ct.min); + if (error || ct.min < 0 || ct.min > 59) { + VM_CTR2(vm, "Invalid RTC min %#x/%d", rtc->min, ct.min); + goto fail; + } + + pm = 0; + hour = rtc->hour; + if ((rtc->reg_b & RTCSB_24HR) == 0) { + if (hour & 0x80) { + hour &= ~0x80; + pm = 1; + } + } + error = rtcget(rtc, hour, &ct.hour); + if ((rtc->reg_b & RTCSB_24HR) == 0) { + ct.hour -= 1; + if (pm) + ct.hour += 12; + } + + if (error || ct.hour < 0 || ct.hour > 23) { + VM_CTR2(vm, "Invalid RTC hour %#x/%d", rtc->hour, ct.hour); + goto fail; + } + + /* + * Ignore 'rtc->dow' because some guests like Linux don't bother + * setting it at all while others like OpenBSD/i386 set it incorrectly. + * + * clock_ct_to_ts() does not depend on 'ct.dow' anyways so ignore it. + */ + ct.dow = -1; + + error = rtcget(rtc, rtc->day_of_month, &ct.day); + if (error || ct.day < 1 || ct.day > 31) { + VM_CTR2(vm, "Invalid RTC mday %#x/%d", rtc->day_of_month, + ct.day); + goto fail; + } + + error = rtcget(rtc, rtc->month, &ct.mon); + if (error || ct.mon < 1 || ct.mon > 12) { + VM_CTR2(vm, "Invalid RTC month %#x/%d", rtc->month, ct.mon); + goto fail; + } + + error = rtcget(rtc, rtc->year, &year); + if (error || year < 0 || year > 99) { + VM_CTR2(vm, "Invalid RTC year %#x/%d", rtc->year, year); + goto fail; + } + if (year >= 70) + ct.year = 1900 + year; + else + ct.year = 2000 + year; + + error = clock_ct_to_ts(&ct, &ts); + if (error || ts.tv_sec < 0) { + VM_CTR3(vm, "Invalid RTC clocktime.date %04d-%02d-%02d", + ct.year, ct.mon, ct.day); + VM_CTR3(vm, "Invalid RTC clocktime.time %02d:%02d:%02d", + ct.hour, ct.min, ct.sec); + goto fail; + } + return (ts.tv_sec); /* success */ +fail: + return (VRTC_BROKEN_TIME); /* failure */ +} + +static int +vrtc_time_update(struct vrtc *vrtc, time_t newtime) +{ + struct rtcdev *rtc; + time_t oldtime; + uint8_t alarm_sec, alarm_min, alarm_hour; + + KASSERT(VRTC_LOCKED(vrtc), ("%s: vrtc not locked", __func__)); + + rtc = &vrtc->rtcdev; + alarm_sec = rtc->alarm_sec; + alarm_min = rtc->alarm_min; + alarm_hour = rtc->alarm_hour; + + oldtime = vrtc->base_rtctime; + VM_CTR2(vrtc->vm, "Updating RTC time from %#lx to %#lx", + oldtime, newtime); + + if (newtime == oldtime) + return (0); + + /* + * If 'newtime' indicates that RTC updates are disabled then just + * record that and return. There is no need to do alarm interrupt + * processing or update 'base_uptime' in this case. + */ + if (newtime == VRTC_BROKEN_TIME) { + vrtc->base_rtctime = VRTC_BROKEN_TIME; + return (0); + } + + /* + * Return an error if RTC updates are halted by the guest. + */ + if (rtc_halted(vrtc)) { + VM_CTR0(vrtc->vm, "RTC update halted by guest"); + return (EBUSY); + } + + do { + /* + * If the alarm interrupt is enabled and 'oldtime' is valid + * then visit all the seconds between 'oldtime' and 'newtime' + * to check for the alarm condition. + * + * Otherwise move the RTC time forward directly to 'newtime'. + */ + if (aintr_enabled(vrtc) && oldtime != VRTC_BROKEN_TIME) + vrtc->base_rtctime++; + else + vrtc->base_rtctime = newtime; + + if (aintr_enabled(vrtc)) { + /* + * Update the RTC date/time fields before checking + * if the alarm conditions are satisfied. + */ + secs_to_rtc(vrtc->base_rtctime, vrtc, 0); + + if ((alarm_sec >= 0xC0 || alarm_sec == rtc->sec) && + (alarm_min >= 0xC0 || alarm_min == rtc->min) && + (alarm_hour >= 0xC0 || alarm_hour == rtc->hour)) { + vrtc_set_reg_c(vrtc, rtc->reg_c | RTCIR_ALARM); + } + } + } while (vrtc->base_rtctime != newtime); + + if (uintr_enabled(vrtc)) + vrtc_set_reg_c(vrtc, rtc->reg_c | RTCIR_UPDATE); + + vrtc->base_uptime = sbinuptime(); + + return (0); +} + +static sbintime_t +vrtc_freq(struct vrtc *vrtc) +{ + int ratesel; + + static sbintime_t pf[16] = { + 0, + SBT_1S / 256, + SBT_1S / 128, + SBT_1S / 8192, + SBT_1S / 4096, + SBT_1S / 2048, + SBT_1S / 1024, + SBT_1S / 512, + SBT_1S / 256, + SBT_1S / 128, + SBT_1S / 64, + SBT_1S / 32, + SBT_1S / 16, + SBT_1S / 8, + SBT_1S / 4, + SBT_1S / 2, + }; + + KASSERT(VRTC_LOCKED(vrtc), ("%s: vrtc not locked", __func__)); + + /* + * If both periodic and alarm interrupts are enabled then use the + * periodic frequency to drive the callout. The minimum periodic + * frequency (2 Hz) is higher than the alarm frequency (1 Hz) so + * piggyback the alarm on top of it. The same argument applies to + * the update interrupt. + */ + if (pintr_enabled(vrtc) && divider_enabled(vrtc->rtcdev.reg_a)) { + ratesel = vrtc->rtcdev.reg_a & 0xf; + return (pf[ratesel]); + } else if (aintr_enabled(vrtc) && update_enabled(vrtc)) { + return (SBT_1S); + } else if (uintr_enabled(vrtc) && update_enabled(vrtc)) { + return (SBT_1S); + } else { + return (0); + } +} + +static void +vrtc_callout_reset(struct vrtc *vrtc, sbintime_t freqsbt) +{ + + KASSERT(VRTC_LOCKED(vrtc), ("%s: vrtc not locked", __func__)); + + if (freqsbt == 0) { + if (callout_active(&vrtc->callout)) { + VM_CTR0(vrtc->vm, "RTC callout stopped"); + callout_stop(&vrtc->callout); + } + return; + } + VM_CTR1(vrtc->vm, "RTC callout frequency %d hz", SBT_1S / freqsbt); + callout_reset_sbt(&vrtc->callout, freqsbt, 0, vrtc_callout_handler, + vrtc, 0); +} + +static void +vrtc_callout_handler(void *arg) +{ + struct vrtc *vrtc = arg; + sbintime_t freqsbt; + time_t rtctime; + int error; + + VM_CTR0(vrtc->vm, "vrtc callout fired"); + + VRTC_LOCK(vrtc); + if (callout_pending(&vrtc->callout)) /* callout was reset */ + goto done; + + if (!callout_active(&vrtc->callout)) /* callout was stopped */ + goto done; + + callout_deactivate(&vrtc->callout); + + KASSERT((vrtc->rtcdev.reg_b & RTCSB_ALL_INTRS) != 0, + ("gratuitous vrtc callout")); + + if (pintr_enabled(vrtc)) + vrtc_set_reg_c(vrtc, vrtc->rtcdev.reg_c | RTCIR_PERIOD); + + if (aintr_enabled(vrtc) || uintr_enabled(vrtc)) { + rtctime = vrtc_curtime(vrtc); + error = vrtc_time_update(vrtc, rtctime); + KASSERT(error == 0, ("%s: vrtc_time_update error %d", + __func__, error)); + } + + freqsbt = vrtc_freq(vrtc); + KASSERT(freqsbt != 0, ("%s: vrtc frequency cannot be zero", __func__)); + vrtc_callout_reset(vrtc, freqsbt); +done: + VRTC_UNLOCK(vrtc); +} + +static __inline void +vrtc_callout_check(struct vrtc *vrtc, sbintime_t freq) +{ + int active; + + active = callout_active(&vrtc->callout) ? 1 : 0; + KASSERT((freq == 0 && !active) || (freq != 0 && active), + ("vrtc callout %s with frequency %#lx", + active ? "active" : "inactive", freq)); +} + +static void +vrtc_set_reg_c(struct vrtc *vrtc, uint8_t newval) +{ + struct rtcdev *rtc; + int oldirqf, newirqf; + uint8_t oldval, changed; + + KASSERT(VRTC_LOCKED(vrtc), ("%s: vrtc not locked", __func__)); + + rtc = &vrtc->rtcdev; + newval &= RTCIR_ALARM | RTCIR_PERIOD | RTCIR_UPDATE; + + oldirqf = rtc->reg_c & RTCIR_INT; + if ((aintr_enabled(vrtc) && (newval & RTCIR_ALARM) != 0) || + (pintr_enabled(vrtc) && (newval & RTCIR_PERIOD) != 0) || + (uintr_enabled(vrtc) && (newval & RTCIR_UPDATE) != 0)) { + newirqf = RTCIR_INT; + } else { + newirqf = 0; + } + + oldval = rtc->reg_c; + rtc->reg_c = newirqf | newval; + changed = oldval ^ rtc->reg_c; + if (changed) { + VM_CTR2(vrtc->vm, "RTC reg_c changed from %#x to %#x", + oldval, rtc->reg_c); + } + + if (!oldirqf && newirqf) { + VM_CTR1(vrtc->vm, "RTC irq %d asserted", RTC_IRQ); + vatpic_pulse_irq(vrtc->vm, RTC_IRQ); + vioapic_pulse_irq(vrtc->vm, RTC_IRQ); + } else if (oldirqf && !newirqf) { + VM_CTR1(vrtc->vm, "RTC irq %d deasserted", RTC_IRQ); + } +} + +static int +vrtc_set_reg_b(struct vrtc *vrtc, uint8_t newval) +{ + struct rtcdev *rtc; + sbintime_t oldfreq, newfreq; + time_t curtime, rtctime; + int error; + uint8_t oldval, changed; + + KASSERT(VRTC_LOCKED(vrtc), ("%s: vrtc not locked", __func__)); + + rtc = &vrtc->rtcdev; + oldval = rtc->reg_b; + oldfreq = vrtc_freq(vrtc); + + rtc->reg_b = newval; + changed = oldval ^ newval; + if (changed) { + VM_CTR2(vrtc->vm, "RTC reg_b changed from %#x to %#x", + oldval, newval); + } + + if (changed & RTCSB_HALT) { + if ((newval & RTCSB_HALT) == 0) { + rtctime = rtc_to_secs(vrtc); + if (rtctime == VRTC_BROKEN_TIME) { + /* + * Stop updating the RTC if the date/time + * programmed by the guest is not correct. + */ + VM_CTR0(vrtc->vm, "Invalid RTC date/time " + "programming detected"); + + if (rtc_flag_broken_time) + return (-1); + } + } else { + curtime = vrtc_curtime(vrtc); + KASSERT(curtime == vrtc->base_rtctime, ("%s: mismatch " + "between vrtc basetime (%#lx) and curtime (%#lx)", + __func__, vrtc->base_rtctime, curtime)); + + /* + * Force a refresh of the RTC date/time fields so + * they reflect the time right before the guest set + * the HALT bit. + */ + secs_to_rtc(curtime, vrtc, 1); + + /* + * Updates are halted so mark 'base_rtctime' to denote + * that the RTC date/time is in flux. + */ + rtctime = VRTC_BROKEN_TIME; + rtc->reg_b &= ~RTCSB_UINTR; + } + error = vrtc_time_update(vrtc, rtctime); + KASSERT(error == 0, ("vrtc_time_update error %d", error)); + } + + /* + * Side effect of changes to the interrupt enable bits. + */ + if (changed & RTCSB_ALL_INTRS) + vrtc_set_reg_c(vrtc, vrtc->rtcdev.reg_c); + + /* + * Change the callout frequency if it has changed. + */ + newfreq = vrtc_freq(vrtc); + if (newfreq != oldfreq) + vrtc_callout_reset(vrtc, newfreq); + else + vrtc_callout_check(vrtc, newfreq); + + /* + * The side effect of bits that control the RTC date/time format + * is handled lazily when those fields are actually read. + */ + return (0); +} + +static void +vrtc_set_reg_a(struct vrtc *vrtc, uint8_t newval) +{ + sbintime_t oldfreq, newfreq; + uint8_t oldval, changed; + + KASSERT(VRTC_LOCKED(vrtc), ("%s: vrtc not locked", __func__)); + + newval &= ~RTCSA_TUP; + oldval = vrtc->rtcdev.reg_a; + oldfreq = vrtc_freq(vrtc); + + if (divider_enabled(oldval) && !divider_enabled(newval)) { + VM_CTR2(vrtc->vm, "RTC divider held in reset at %#lx/%#lx", + vrtc->base_rtctime, vrtc->base_uptime); + } else if (!divider_enabled(oldval) && divider_enabled(newval)) { + /* + * If the dividers are coming out of reset then update + * 'base_uptime' before this happens. This is done to + * maintain the illusion that the RTC date/time was frozen + * while the dividers were disabled. + */ + vrtc->base_uptime = sbinuptime(); + VM_CTR2(vrtc->vm, "RTC divider out of reset at %#lx/%#lx", + vrtc->base_rtctime, vrtc->base_uptime); + } else { + /* NOTHING */ + } + + vrtc->rtcdev.reg_a = newval; + changed = oldval ^ newval; + if (changed) { + VM_CTR2(vrtc->vm, "RTC reg_a changed from %#x to %#x", + oldval, newval); + } + + /* + * Side effect of changes to rate select and divider enable bits. + */ + newfreq = vrtc_freq(vrtc); + if (newfreq != oldfreq) + vrtc_callout_reset(vrtc, newfreq); + else + vrtc_callout_check(vrtc, newfreq); +} + +int +vrtc_set_time(struct vm *vm, time_t secs) +{ + struct vrtc *vrtc; + int error; + + vrtc = vm_rtc(vm); + VRTC_LOCK(vrtc); + error = vrtc_time_update(vrtc, secs); + VRTC_UNLOCK(vrtc); + + if (error) { + VM_CTR2(vrtc->vm, "Error %d setting RTC time to %#lx", error, + secs); + } else { + VM_CTR1(vrtc->vm, "RTC time set to %#lx", secs); + } + + return (error); +} + +time_t +vrtc_get_time(struct vm *vm) +{ + struct vrtc *vrtc; + time_t t; + + vrtc = vm_rtc(vm); + VRTC_LOCK(vrtc); + t = vrtc_curtime(vrtc); + VRTC_UNLOCK(vrtc); + + return (t); +} + +int +vrtc_nvram_write(struct vm *vm, int offset, uint8_t value) +{ + struct vrtc *vrtc; + uint8_t *ptr; + + vrtc = vm_rtc(vm); + + /* + * Don't allow writes to RTC control registers or the date/time fields. + */ + if (offset < offsetof(struct rtcdev, nvram[0]) || + offset >= sizeof(struct rtcdev)) { + VM_CTR1(vrtc->vm, "RTC nvram write to invalid offset %d", + offset); + return (EINVAL); + } + + VRTC_LOCK(vrtc); + ptr = (uint8_t *)(&vrtc->rtcdev); + ptr[offset] = value; + VM_CTR2(vrtc->vm, "RTC nvram write %#x to offset %#x", value, offset); + VRTC_UNLOCK(vrtc); + + return (0); +} + +int +vrtc_nvram_read(struct vm *vm, int offset, uint8_t *retval) +{ + struct vrtc *vrtc; + time_t curtime; + uint8_t *ptr; + + /* + * Allow all offsets in the RTC to be read. + */ + if (offset < 0 || offset >= sizeof(struct rtcdev)) + return (EINVAL); + + vrtc = vm_rtc(vm); + VRTC_LOCK(vrtc); + + /* + * Update RTC date/time fields if necessary. + */ + if (offset < 10) { + curtime = vrtc_curtime(vrtc); + secs_to_rtc(curtime, vrtc, 0); + } + + ptr = (uint8_t *)(&vrtc->rtcdev); + *retval = ptr[offset]; + + VRTC_UNLOCK(vrtc); + return (0); +} + +int +vrtc_addr_handler(struct vm *vm, int vcpuid, bool in, int port, int bytes, + uint32_t *val) +{ + struct vrtc *vrtc; + + vrtc = vm_rtc(vm); + + if (bytes != 1) + return (-1); + + if (in) { + *val = 0xff; + return (0); + } + + VRTC_LOCK(vrtc); + vrtc->addr = *val & 0x7f; + VRTC_UNLOCK(vrtc); + + return (0); +} + +int +vrtc_data_handler(struct vm *vm, int vcpuid, bool in, int port, int bytes, + uint32_t *val) +{ + struct vrtc *vrtc; + struct rtcdev *rtc; + time_t curtime; + int error, offset; + + vrtc = vm_rtc(vm); + rtc = &vrtc->rtcdev; + + if (bytes != 1) + return (-1); + + VRTC_LOCK(vrtc); + offset = vrtc->addr; + if (offset >= sizeof(struct rtcdev)) { + VRTC_UNLOCK(vrtc); + return (-1); + } + + error = 0; + curtime = vrtc_curtime(vrtc); + vrtc_time_update(vrtc, curtime); + + if (in) { + /* + * Update RTC date/time fields if necessary. + */ + if (offset < 10) + secs_to_rtc(curtime, vrtc, 0); + + if (offset == 12) { + /* + * XXX + * reg_c interrupt flags are updated only if the + * corresponding interrupt enable bit in reg_b is set. + */ + *val = vrtc->rtcdev.reg_c; + vrtc_set_reg_c(vrtc, 0); + } else { + *val = *((uint8_t *)rtc + offset); + } + VCPU_CTR2(vm, vcpuid, "Read value %#x from RTC offset %#x", + *val, offset); + } else { + switch (offset) { + case 10: + VCPU_CTR1(vm, vcpuid, "RTC reg_a set to %#x", *val); + vrtc_set_reg_a(vrtc, *val); + break; + case 11: + VCPU_CTR1(vm, vcpuid, "RTC reg_b set to %#x", *val); + error = vrtc_set_reg_b(vrtc, *val); + break; + case 12: + VCPU_CTR1(vm, vcpuid, "RTC reg_c set to %#x (ignored)", + *val); + break; + case 13: + VCPU_CTR1(vm, vcpuid, "RTC reg_d set to %#x (ignored)", + *val); + break; + case 0: + /* + * High order bit of 'seconds' is readonly. + */ + *val &= 0x7f; + /* FALLTHRU */ + default: + VCPU_CTR2(vm, vcpuid, "RTC offset %#x set to %#x", + offset, *val); + *((uint8_t *)rtc + offset) = *val; + break; + } + } + VRTC_UNLOCK(vrtc); + return (error); +} + +void +vrtc_reset(struct vrtc *vrtc) +{ + struct rtcdev *rtc; + + VRTC_LOCK(vrtc); + + rtc = &vrtc->rtcdev; + vrtc_set_reg_b(vrtc, rtc->reg_b & ~(RTCSB_ALL_INTRS | RTCSB_SQWE)); + vrtc_set_reg_c(vrtc, 0); + KASSERT(!callout_active(&vrtc->callout), ("rtc callout still active")); + + VRTC_UNLOCK(vrtc); +} + +struct vrtc * +vrtc_init(struct vm *vm) +{ + struct vrtc *vrtc; + struct rtcdev *rtc; + time_t curtime; + + vrtc = malloc(sizeof(struct vrtc), M_VRTC, M_WAITOK | M_ZERO); + vrtc->vm = vm; + mtx_init(&vrtc->mtx, "vrtc lock", NULL, MTX_DEF); + callout_init(&vrtc->callout, 1); + + /* Allow dividers to keep time but disable everything else */ + rtc = &vrtc->rtcdev; + rtc->reg_a = 0x20; + rtc->reg_b = RTCSB_24HR; + rtc->reg_c = 0; + rtc->reg_d = RTCSD_PWR; + + /* Reset the index register to a safe value. */ + vrtc->addr = RTC_STATUSD; + + /* + * Initialize RTC time to 00:00:00 Jan 1, 1970. + */ + curtime = 0; + + VRTC_LOCK(vrtc); + vrtc->base_rtctime = VRTC_BROKEN_TIME; + vrtc_time_update(vrtc, curtime); + secs_to_rtc(curtime, vrtc, 0); + VRTC_UNLOCK(vrtc); + + return (vrtc); +} + +void +vrtc_cleanup(struct vrtc *vrtc) +{ + + callout_drain(&vrtc->callout); + free(vrtc, M_VRTC); +} Property changes on: head/sys/amd64/vmm/io/vrtc.c ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Added: svn:keywords ## -0,0 +1 ## +FreeBSD=%H \ No newline at end of property Added: svn:mime-type ## -0,0 +1 ## +text/plain \ No newline at end of property Index: head/sys/amd64/vmm/io/vrtc.h =================================================================== --- head/sys/amd64/vmm/io/vrtc.h (nonexistent) +++ head/sys/amd64/vmm/io/vrtc.h (revision 276428) @@ -0,0 +1,50 @@ +/*- + * Copyright (c) 2014 Neel Natu (neel@freebsd.org) + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice unmodified, this list of conditions, and the following + * disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _VRTC_H_ +#define _VRTC_H_ + +#include + +struct vrtc; + +struct vrtc *vrtc_init(struct vm *vm); +void vrtc_cleanup(struct vrtc *vrtc); +void vrtc_reset(struct vrtc *vrtc); + +time_t vrtc_get_time(struct vm *vm); +int vrtc_set_time(struct vm *vm, time_t secs); +int vrtc_nvram_write(struct vm *vm, int offset, uint8_t value); +int vrtc_nvram_read(struct vm *vm, int offset, uint8_t *retval); + +int vrtc_addr_handler(struct vm *vm, int vcpuid, bool in, int port, int bytes, + uint32_t *val); +int vrtc_data_handler(struct vm *vm, int vcpuid, bool in, int port, int bytes, + uint32_t *val); + +#endif Property changes on: head/sys/amd64/vmm/io/vrtc.h ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Added: svn:keywords ## -0,0 +1 ## +FreeBSD=%H \ No newline at end of property Added: svn:mime-type ## -0,0 +1 ## +text/plain \ No newline at end of property Index: head/sys/amd64/vmm/vmm.c =================================================================== --- head/sys/amd64/vmm/vmm.c (revision 276427) +++ head/sys/amd64/vmm/vmm.c (revision 276428) @@ -1,2362 +1,2377 @@ /*- * Copyright (c) 2011 NetApp, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "vmm_ioport.h" #include "vmm_ktr.h" #include "vmm_host.h" #include "vmm_mem.h" #include "vmm_util.h" #include "vatpic.h" #include "vatpit.h" #include "vhpet.h" #include "vioapic.h" #include "vlapic.h" #include "vpmtmr.h" +#include "vrtc.h" #include "vmm_ipi.h" #include "vmm_stat.h" #include "vmm_lapic.h" #include "io/ppt.h" #include "io/iommu.h" struct vlapic; /* * Initialization: * (a) allocated when vcpu is created * (i) initialized when vcpu is created and when it is reinitialized * (o) initialized the first time the vcpu is created * (x) initialized before use */ struct vcpu { struct mtx mtx; /* (o) protects 'state' and 'hostcpu' */ enum vcpu_state state; /* (o) vcpu state */ int hostcpu; /* (o) vcpu's host cpu */ struct vlapic *vlapic; /* (i) APIC device model */ enum x2apic_state x2apic_state; /* (i) APIC mode */ uint64_t exitintinfo; /* (i) events pending at VM exit */ int nmi_pending; /* (i) NMI pending */ int extint_pending; /* (i) INTR pending */ struct vm_exception exception; /* (x) exception collateral */ int exception_pending; /* (i) exception pending */ struct savefpu *guestfpu; /* (a,i) guest fpu state */ uint64_t guest_xcr0; /* (i) guest %xcr0 register */ void *stats; /* (a,i) statistics */ struct vm_exit exitinfo; /* (x) exit reason and collateral */ }; #define vcpu_lock_initialized(v) mtx_initialized(&((v)->mtx)) #define vcpu_lock_init(v) mtx_init(&((v)->mtx), "vcpu lock", 0, MTX_SPIN) #define vcpu_lock(v) mtx_lock_spin(&((v)->mtx)) #define vcpu_unlock(v) mtx_unlock_spin(&((v)->mtx)) #define vcpu_assert_locked(v) mtx_assert(&((v)->mtx), MA_OWNED) struct mem_seg { vm_paddr_t gpa; size_t len; boolean_t wired; vm_object_t object; }; #define VM_MAX_MEMORY_SEGMENTS 2 /* * Initialization: * (o) initialized the first time the VM is created * (i) initialized when VM is created and when it is reinitialized * (x) initialized before use */ struct vm { void *cookie; /* (i) cpu-specific data */ void *iommu; /* (x) iommu-specific data */ struct vhpet *vhpet; /* (i) virtual HPET */ struct vioapic *vioapic; /* (i) virtual ioapic */ struct vatpic *vatpic; /* (i) virtual atpic */ struct vatpit *vatpit; /* (i) virtual atpit */ struct vpmtmr *vpmtmr; /* (i) virtual ACPI PM timer */ + struct vrtc *vrtc; /* (o) virtual RTC */ volatile cpuset_t active_cpus; /* (i) active vcpus */ int suspend; /* (i) stop VM execution */ volatile cpuset_t suspended_cpus; /* (i) suspended vcpus */ volatile cpuset_t halted_cpus; /* (x) cpus in a hard halt */ cpuset_t rendezvous_req_cpus; /* (x) rendezvous requested */ cpuset_t rendezvous_done_cpus; /* (x) rendezvous finished */ void *rendezvous_arg; /* (x) rendezvous func/arg */ vm_rendezvous_func_t rendezvous_func; struct mtx rendezvous_mtx; /* (o) rendezvous lock */ int num_mem_segs; /* (o) guest memory segments */ struct mem_seg mem_segs[VM_MAX_MEMORY_SEGMENTS]; struct vmspace *vmspace; /* (o) guest's address space */ char name[VM_MAX_NAMELEN]; /* (o) virtual machine name */ struct vcpu vcpu[VM_MAXCPU]; /* (i) guest vcpus */ }; static int vmm_initialized; static struct vmm_ops *ops; #define VMM_INIT(num) (ops != NULL ? (*ops->init)(num) : 0) #define VMM_CLEANUP() (ops != NULL ? (*ops->cleanup)() : 0) #define VMM_RESUME() (ops != NULL ? (*ops->resume)() : 0) #define VMINIT(vm, pmap) (ops != NULL ? (*ops->vminit)(vm, pmap): NULL) #define VMRUN(vmi, vcpu, rip, pmap, rptr, sptr) \ (ops != NULL ? (*ops->vmrun)(vmi, vcpu, rip, pmap, rptr, sptr) : ENXIO) #define VMCLEANUP(vmi) (ops != NULL ? (*ops->vmcleanup)(vmi) : NULL) #define VMSPACE_ALLOC(min, max) \ (ops != NULL ? (*ops->vmspace_alloc)(min, max) : NULL) #define VMSPACE_FREE(vmspace) \ (ops != NULL ? (*ops->vmspace_free)(vmspace) : ENXIO) #define VMGETREG(vmi, vcpu, num, retval) \ (ops != NULL ? (*ops->vmgetreg)(vmi, vcpu, num, retval) : ENXIO) #define VMSETREG(vmi, vcpu, num, val) \ (ops != NULL ? (*ops->vmsetreg)(vmi, vcpu, num, val) : ENXIO) #define VMGETDESC(vmi, vcpu, num, desc) \ (ops != NULL ? (*ops->vmgetdesc)(vmi, vcpu, num, desc) : ENXIO) #define VMSETDESC(vmi, vcpu, num, desc) \ (ops != NULL ? (*ops->vmsetdesc)(vmi, vcpu, num, desc) : ENXIO) #define VMGETCAP(vmi, vcpu, num, retval) \ (ops != NULL ? (*ops->vmgetcap)(vmi, vcpu, num, retval) : ENXIO) #define VMSETCAP(vmi, vcpu, num, val) \ (ops != NULL ? (*ops->vmsetcap)(vmi, vcpu, num, val) : ENXIO) #define VLAPIC_INIT(vmi, vcpu) \ (ops != NULL ? (*ops->vlapic_init)(vmi, vcpu) : NULL) #define VLAPIC_CLEANUP(vmi, vlapic) \ (ops != NULL ? (*ops->vlapic_cleanup)(vmi, vlapic) : NULL) #define fpu_start_emulating() load_cr0(rcr0() | CR0_TS) #define fpu_stop_emulating() clts() static MALLOC_DEFINE(M_VM, "vm", "vm"); /* statistics */ static VMM_STAT(VCPU_TOTAL_RUNTIME, "vcpu total runtime"); SYSCTL_NODE(_hw, OID_AUTO, vmm, CTLFLAG_RW, NULL, NULL); /* * Halt the guest if all vcpus are executing a HLT instruction with * interrupts disabled. */ static int halt_detection_enabled = 1; SYSCTL_INT(_hw_vmm, OID_AUTO, halt_detection, CTLFLAG_RDTUN, &halt_detection_enabled, 0, "Halt VM if all vcpus execute HLT with interrupts disabled"); static int vmm_ipinum; SYSCTL_INT(_hw_vmm, OID_AUTO, ipinum, CTLFLAG_RD, &vmm_ipinum, 0, "IPI vector used for vcpu notifications"); static int trace_guest_exceptions; SYSCTL_INT(_hw_vmm, OID_AUTO, trace_guest_exceptions, CTLFLAG_RDTUN, &trace_guest_exceptions, 0, "Trap into hypervisor on all guest exceptions and reflect them back"); static void vcpu_cleanup(struct vm *vm, int i, bool destroy) { struct vcpu *vcpu = &vm->vcpu[i]; VLAPIC_CLEANUP(vm->cookie, vcpu->vlapic); if (destroy) { vmm_stat_free(vcpu->stats); fpu_save_area_free(vcpu->guestfpu); } } static void vcpu_init(struct vm *vm, int vcpu_id, bool create) { struct vcpu *vcpu; KASSERT(vcpu_id >= 0 && vcpu_id < VM_MAXCPU, ("vcpu_init: invalid vcpu %d", vcpu_id)); vcpu = &vm->vcpu[vcpu_id]; if (create) { KASSERT(!vcpu_lock_initialized(vcpu), ("vcpu %d already " "initialized", vcpu_id)); vcpu_lock_init(vcpu); vcpu->state = VCPU_IDLE; vcpu->hostcpu = NOCPU; vcpu->guestfpu = fpu_save_area_alloc(); vcpu->stats = vmm_stat_alloc(); } vcpu->vlapic = VLAPIC_INIT(vm->cookie, vcpu_id); vm_set_x2apic_state(vm, vcpu_id, X2APIC_DISABLED); vcpu->exitintinfo = 0; vcpu->nmi_pending = 0; vcpu->extint_pending = 0; vcpu->exception_pending = 0; vcpu->guest_xcr0 = XFEATURE_ENABLED_X87; fpu_save_area_reset(vcpu->guestfpu); vmm_stat_init(vcpu->stats); } int vcpu_trace_exceptions(struct vm *vm, int vcpuid) { return (trace_guest_exceptions); } struct vm_exit * vm_exitinfo(struct vm *vm, int cpuid) { struct vcpu *vcpu; if (cpuid < 0 || cpuid >= VM_MAXCPU) panic("vm_exitinfo: invalid cpuid %d", cpuid); vcpu = &vm->vcpu[cpuid]; return (&vcpu->exitinfo); } static void vmm_resume(void) { VMM_RESUME(); } static int vmm_init(void) { int error; vmm_host_state_init(); vmm_ipinum = vmm_ipi_alloc(); if (vmm_ipinum == 0) vmm_ipinum = IPI_AST; error = vmm_mem_init(); if (error) return (error); if (vmm_is_intel()) ops = &vmm_ops_intel; else if (vmm_is_amd()) ops = &vmm_ops_amd; else return (ENXIO); vmm_resume_p = vmm_resume; return (VMM_INIT(vmm_ipinum)); } static int vmm_handler(module_t mod, int what, void *arg) { int error; switch (what) { case MOD_LOAD: vmmdev_init(); if (ppt_avail_devices() > 0) iommu_init(); error = vmm_init(); if (error == 0) vmm_initialized = 1; break; case MOD_UNLOAD: error = vmmdev_cleanup(); if (error == 0) { vmm_resume_p = NULL; iommu_cleanup(); if (vmm_ipinum != IPI_AST) vmm_ipi_free(vmm_ipinum); error = VMM_CLEANUP(); /* * Something bad happened - prevent new * VMs from being created */ if (error) vmm_initialized = 0; } break; default: error = 0; break; } return (error); } static moduledata_t vmm_kmod = { "vmm", vmm_handler, NULL }; /* * vmm initialization has the following dependencies: * * - iommu initialization must happen after the pci passthru driver has had * a chance to attach to any passthru devices (after SI_SUB_CONFIGURE). * * - VT-x initialization requires smp_rendezvous() and therefore must happen * after SMP is fully functional (after SI_SUB_SMP). */ DECLARE_MODULE(vmm, vmm_kmod, SI_SUB_SMP + 1, SI_ORDER_ANY); MODULE_VERSION(vmm, 1); static void vm_init(struct vm *vm, bool create) { int i; vm->cookie = VMINIT(vm, vmspace_pmap(vm->vmspace)); vm->iommu = NULL; vm->vioapic = vioapic_init(vm); vm->vhpet = vhpet_init(vm); vm->vatpic = vatpic_init(vm); vm->vatpit = vatpit_init(vm); vm->vpmtmr = vpmtmr_init(vm); + if (create) + vm->vrtc = vrtc_init(vm); CPU_ZERO(&vm->active_cpus); vm->suspend = 0; CPU_ZERO(&vm->suspended_cpus); for (i = 0; i < VM_MAXCPU; i++) vcpu_init(vm, i, create); } int vm_create(const char *name, struct vm **retvm) { struct vm *vm; struct vmspace *vmspace; /* * If vmm.ko could not be successfully initialized then don't attempt * to create the virtual machine. */ if (!vmm_initialized) return (ENXIO); if (name == NULL || strlen(name) >= VM_MAX_NAMELEN) return (EINVAL); vmspace = VMSPACE_ALLOC(0, VM_MAXUSER_ADDRESS); if (vmspace == NULL) return (ENOMEM); vm = malloc(sizeof(struct vm), M_VM, M_WAITOK | M_ZERO); strcpy(vm->name, name); vm->num_mem_segs = 0; vm->vmspace = vmspace; mtx_init(&vm->rendezvous_mtx, "vm rendezvous lock", 0, MTX_DEF); vm_init(vm, true); *retvm = vm; return (0); } static void vm_free_mem_seg(struct vm *vm, struct mem_seg *seg) { if (seg->object != NULL) vmm_mem_free(vm->vmspace, seg->gpa, seg->len); bzero(seg, sizeof(*seg)); } static void vm_cleanup(struct vm *vm, bool destroy) { int i; ppt_unassign_all(vm); if (vm->iommu != NULL) iommu_destroy_domain(vm->iommu); + if (destroy) + vrtc_cleanup(vm->vrtc); + else + vrtc_reset(vm->vrtc); vpmtmr_cleanup(vm->vpmtmr); vatpit_cleanup(vm->vatpit); vhpet_cleanup(vm->vhpet); vatpic_cleanup(vm->vatpic); vioapic_cleanup(vm->vioapic); for (i = 0; i < VM_MAXCPU; i++) vcpu_cleanup(vm, i, destroy); VMCLEANUP(vm->cookie); if (destroy) { for (i = 0; i < vm->num_mem_segs; i++) vm_free_mem_seg(vm, &vm->mem_segs[i]); vm->num_mem_segs = 0; VMSPACE_FREE(vm->vmspace); vm->vmspace = NULL; } } void vm_destroy(struct vm *vm) { vm_cleanup(vm, true); free(vm, M_VM); } int vm_reinit(struct vm *vm) { int error; /* * A virtual machine can be reset only if all vcpus are suspended. */ if (CPU_CMP(&vm->suspended_cpus, &vm->active_cpus) == 0) { vm_cleanup(vm, false); vm_init(vm, false); error = 0; } else { error = EBUSY; } return (error); } const char * vm_name(struct vm *vm) { return (vm->name); } int vm_map_mmio(struct vm *vm, vm_paddr_t gpa, size_t len, vm_paddr_t hpa) { vm_object_t obj; if ((obj = vmm_mmio_alloc(vm->vmspace, gpa, len, hpa)) == NULL) return (ENOMEM); else return (0); } int vm_unmap_mmio(struct vm *vm, vm_paddr_t gpa, size_t len) { vmm_mmio_free(vm->vmspace, gpa, len); return (0); } boolean_t vm_mem_allocated(struct vm *vm, vm_paddr_t gpa) { int i; vm_paddr_t gpabase, gpalimit; for (i = 0; i < vm->num_mem_segs; i++) { gpabase = vm->mem_segs[i].gpa; gpalimit = gpabase + vm->mem_segs[i].len; if (gpa >= gpabase && gpa < gpalimit) return (TRUE); /* 'gpa' is regular memory */ } if (ppt_is_mmio(vm, gpa)) return (TRUE); /* 'gpa' is pci passthru mmio */ return (FALSE); } int vm_malloc(struct vm *vm, vm_paddr_t gpa, size_t len) { int available, allocated; struct mem_seg *seg; vm_object_t object; vm_paddr_t g; if ((gpa & PAGE_MASK) || (len & PAGE_MASK) || len == 0) return (EINVAL); available = allocated = 0; g = gpa; while (g < gpa + len) { if (vm_mem_allocated(vm, g)) allocated++; else available++; g += PAGE_SIZE; } /* * If there are some allocated and some available pages in the address * range then it is an error. */ if (allocated && available) return (EINVAL); /* * If the entire address range being requested has already been * allocated then there isn't anything more to do. */ if (allocated && available == 0) return (0); if (vm->num_mem_segs >= VM_MAX_MEMORY_SEGMENTS) return (E2BIG); seg = &vm->mem_segs[vm->num_mem_segs]; if ((object = vmm_mem_alloc(vm->vmspace, gpa, len)) == NULL) return (ENOMEM); seg->gpa = gpa; seg->len = len; seg->object = object; seg->wired = FALSE; vm->num_mem_segs++; return (0); } static vm_paddr_t vm_maxmem(struct vm *vm) { int i; vm_paddr_t gpa, maxmem; maxmem = 0; for (i = 0; i < vm->num_mem_segs; i++) { gpa = vm->mem_segs[i].gpa + vm->mem_segs[i].len; if (gpa > maxmem) maxmem = gpa; } return (maxmem); } static void vm_gpa_unwire(struct vm *vm) { int i, rv; struct mem_seg *seg; for (i = 0; i < vm->num_mem_segs; i++) { seg = &vm->mem_segs[i]; if (!seg->wired) continue; rv = vm_map_unwire(&vm->vmspace->vm_map, seg->gpa, seg->gpa + seg->len, VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES); KASSERT(rv == KERN_SUCCESS, ("vm(%s) memory segment " "%#lx/%ld could not be unwired: %d", vm_name(vm), seg->gpa, seg->len, rv)); seg->wired = FALSE; } } static int vm_gpa_wire(struct vm *vm) { int i, rv; struct mem_seg *seg; for (i = 0; i < vm->num_mem_segs; i++) { seg = &vm->mem_segs[i]; if (seg->wired) continue; /* XXX rlimits? */ rv = vm_map_wire(&vm->vmspace->vm_map, seg->gpa, seg->gpa + seg->len, VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES); if (rv != KERN_SUCCESS) break; seg->wired = TRUE; } if (i < vm->num_mem_segs) { /* * Undo the wiring before returning an error. */ vm_gpa_unwire(vm); return (EAGAIN); } return (0); } static void vm_iommu_modify(struct vm *vm, boolean_t map) { int i, sz; vm_paddr_t gpa, hpa; struct mem_seg *seg; void *vp, *cookie, *host_domain; sz = PAGE_SIZE; host_domain = iommu_host_domain(); for (i = 0; i < vm->num_mem_segs; i++) { seg = &vm->mem_segs[i]; KASSERT(seg->wired, ("vm(%s) memory segment %#lx/%ld not wired", vm_name(vm), seg->gpa, seg->len)); gpa = seg->gpa; while (gpa < seg->gpa + seg->len) { vp = vm_gpa_hold(vm, gpa, PAGE_SIZE, VM_PROT_WRITE, &cookie); KASSERT(vp != NULL, ("vm(%s) could not map gpa %#lx", vm_name(vm), gpa)); vm_gpa_release(cookie); hpa = DMAP_TO_PHYS((uintptr_t)vp); if (map) { iommu_create_mapping(vm->iommu, gpa, hpa, sz); iommu_remove_mapping(host_domain, hpa, sz); } else { iommu_remove_mapping(vm->iommu, gpa, sz); iommu_create_mapping(host_domain, hpa, hpa, sz); } gpa += PAGE_SIZE; } } /* * Invalidate the cached translations associated with the domain * from which pages were removed. */ if (map) iommu_invalidate_tlb(host_domain); else iommu_invalidate_tlb(vm->iommu); } #define vm_iommu_unmap(vm) vm_iommu_modify((vm), FALSE) #define vm_iommu_map(vm) vm_iommu_modify((vm), TRUE) int vm_unassign_pptdev(struct vm *vm, int bus, int slot, int func) { int error; error = ppt_unassign_device(vm, bus, slot, func); if (error) return (error); if (ppt_assigned_devices(vm) == 0) { vm_iommu_unmap(vm); vm_gpa_unwire(vm); } return (0); } int vm_assign_pptdev(struct vm *vm, int bus, int slot, int func) { int error; vm_paddr_t maxaddr; /* * Virtual machines with pci passthru devices get special treatment: * - the guest physical memory is wired * - the iommu is programmed to do the 'gpa' to 'hpa' translation * * We need to do this before the first pci passthru device is attached. */ if (ppt_assigned_devices(vm) == 0) { KASSERT(vm->iommu == NULL, ("vm_assign_pptdev: iommu must be NULL")); maxaddr = vm_maxmem(vm); vm->iommu = iommu_create_domain(maxaddr); error = vm_gpa_wire(vm); if (error) return (error); vm_iommu_map(vm); } error = ppt_assign_device(vm, bus, slot, func); return (error); } void * vm_gpa_hold(struct vm *vm, vm_paddr_t gpa, size_t len, int reqprot, void **cookie) { int count, pageoff; vm_page_t m; pageoff = gpa & PAGE_MASK; if (len > PAGE_SIZE - pageoff) panic("vm_gpa_hold: invalid gpa/len: 0x%016lx/%lu", gpa, len); count = vm_fault_quick_hold_pages(&vm->vmspace->vm_map, trunc_page(gpa), PAGE_SIZE, reqprot, &m, 1); if (count == 1) { *cookie = m; return ((void *)(PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)) + pageoff)); } else { *cookie = NULL; return (NULL); } } void vm_gpa_release(void *cookie) { vm_page_t m = cookie; vm_page_lock(m); vm_page_unhold(m); vm_page_unlock(m); } int vm_gpabase2memseg(struct vm *vm, vm_paddr_t gpabase, struct vm_memory_segment *seg) { int i; for (i = 0; i < vm->num_mem_segs; i++) { if (gpabase == vm->mem_segs[i].gpa) { seg->gpa = vm->mem_segs[i].gpa; seg->len = vm->mem_segs[i].len; seg->wired = vm->mem_segs[i].wired; return (0); } } return (-1); } int vm_get_memobj(struct vm *vm, vm_paddr_t gpa, size_t len, vm_offset_t *offset, struct vm_object **object) { int i; size_t seg_len; vm_paddr_t seg_gpa; vm_object_t seg_obj; for (i = 0; i < vm->num_mem_segs; i++) { if ((seg_obj = vm->mem_segs[i].object) == NULL) continue; seg_gpa = vm->mem_segs[i].gpa; seg_len = vm->mem_segs[i].len; if (gpa >= seg_gpa && gpa < seg_gpa + seg_len) { *offset = gpa - seg_gpa; *object = seg_obj; vm_object_reference(seg_obj); return (0); } } return (EINVAL); } int vm_get_register(struct vm *vm, int vcpu, int reg, uint64_t *retval) { if (vcpu < 0 || vcpu >= VM_MAXCPU) return (EINVAL); if (reg >= VM_REG_LAST) return (EINVAL); return (VMGETREG(vm->cookie, vcpu, reg, retval)); } int vm_set_register(struct vm *vm, int vcpu, int reg, uint64_t val) { if (vcpu < 0 || vcpu >= VM_MAXCPU) return (EINVAL); if (reg >= VM_REG_LAST) return (EINVAL); return (VMSETREG(vm->cookie, vcpu, reg, val)); } static boolean_t is_descriptor_table(int reg) { switch (reg) { case VM_REG_GUEST_IDTR: case VM_REG_GUEST_GDTR: return (TRUE); default: return (FALSE); } } static boolean_t is_segment_register(int reg) { switch (reg) { case VM_REG_GUEST_ES: case VM_REG_GUEST_CS: case VM_REG_GUEST_SS: case VM_REG_GUEST_DS: case VM_REG_GUEST_FS: case VM_REG_GUEST_GS: case VM_REG_GUEST_TR: case VM_REG_GUEST_LDTR: return (TRUE); default: return (FALSE); } } int vm_get_seg_desc(struct vm *vm, int vcpu, int reg, struct seg_desc *desc) { if (vcpu < 0 || vcpu >= VM_MAXCPU) return (EINVAL); if (!is_segment_register(reg) && !is_descriptor_table(reg)) return (EINVAL); return (VMGETDESC(vm->cookie, vcpu, reg, desc)); } int vm_set_seg_desc(struct vm *vm, int vcpu, int reg, struct seg_desc *desc) { if (vcpu < 0 || vcpu >= VM_MAXCPU) return (EINVAL); if (!is_segment_register(reg) && !is_descriptor_table(reg)) return (EINVAL); return (VMSETDESC(vm->cookie, vcpu, reg, desc)); } static void restore_guest_fpustate(struct vcpu *vcpu) { /* flush host state to the pcb */ fpuexit(curthread); /* restore guest FPU state */ fpu_stop_emulating(); fpurestore(vcpu->guestfpu); /* restore guest XCR0 if XSAVE is enabled in the host */ if (rcr4() & CR4_XSAVE) load_xcr(0, vcpu->guest_xcr0); /* * The FPU is now "dirty" with the guest's state so turn on emulation * to trap any access to the FPU by the host. */ fpu_start_emulating(); } static void save_guest_fpustate(struct vcpu *vcpu) { if ((rcr0() & CR0_TS) == 0) panic("fpu emulation not enabled in host!"); /* save guest XCR0 and restore host XCR0 */ if (rcr4() & CR4_XSAVE) { vcpu->guest_xcr0 = rxcr(0); load_xcr(0, vmm_get_host_xcr0()); } /* save guest FPU state */ fpu_stop_emulating(); fpusave(vcpu->guestfpu); fpu_start_emulating(); } static VMM_STAT(VCPU_IDLE_TICKS, "number of ticks vcpu was idle"); static int vcpu_set_state_locked(struct vcpu *vcpu, enum vcpu_state newstate, bool from_idle) { int error; vcpu_assert_locked(vcpu); /* * State transitions from the vmmdev_ioctl() must always begin from * the VCPU_IDLE state. This guarantees that there is only a single * ioctl() operating on a vcpu at any point. */ if (from_idle) { while (vcpu->state != VCPU_IDLE) msleep_spin(&vcpu->state, &vcpu->mtx, "vmstat", hz); } else { KASSERT(vcpu->state != VCPU_IDLE, ("invalid transition from " "vcpu idle state")); } if (vcpu->state == VCPU_RUNNING) { KASSERT(vcpu->hostcpu == curcpu, ("curcpu %d and hostcpu %d " "mismatch for running vcpu", curcpu, vcpu->hostcpu)); } else { KASSERT(vcpu->hostcpu == NOCPU, ("Invalid hostcpu %d for a " "vcpu that is not running", vcpu->hostcpu)); } /* * The following state transitions are allowed: * IDLE -> FROZEN -> IDLE * FROZEN -> RUNNING -> FROZEN * FROZEN -> SLEEPING -> FROZEN */ switch (vcpu->state) { case VCPU_IDLE: case VCPU_RUNNING: case VCPU_SLEEPING: error = (newstate != VCPU_FROZEN); break; case VCPU_FROZEN: error = (newstate == VCPU_FROZEN); break; default: error = 1; break; } if (error) return (EBUSY); vcpu->state = newstate; if (newstate == VCPU_RUNNING) vcpu->hostcpu = curcpu; else vcpu->hostcpu = NOCPU; if (newstate == VCPU_IDLE) wakeup(&vcpu->state); return (0); } static void vcpu_require_state(struct vm *vm, int vcpuid, enum vcpu_state newstate) { int error; if ((error = vcpu_set_state(vm, vcpuid, newstate, false)) != 0) panic("Error %d setting state to %d\n", error, newstate); } static void vcpu_require_state_locked(struct vcpu *vcpu, enum vcpu_state newstate) { int error; if ((error = vcpu_set_state_locked(vcpu, newstate, false)) != 0) panic("Error %d setting state to %d", error, newstate); } static void vm_set_rendezvous_func(struct vm *vm, vm_rendezvous_func_t func) { KASSERT(mtx_owned(&vm->rendezvous_mtx), ("rendezvous_mtx not locked")); /* * Update 'rendezvous_func' and execute a write memory barrier to * ensure that it is visible across all host cpus. This is not needed * for correctness but it does ensure that all the vcpus will notice * that the rendezvous is requested immediately. */ vm->rendezvous_func = func; wmb(); } #define RENDEZVOUS_CTR0(vm, vcpuid, fmt) \ do { \ if (vcpuid >= 0) \ VCPU_CTR0(vm, vcpuid, fmt); \ else \ VM_CTR0(vm, fmt); \ } while (0) static void vm_handle_rendezvous(struct vm *vm, int vcpuid) { KASSERT(vcpuid == -1 || (vcpuid >= 0 && vcpuid < VM_MAXCPU), ("vm_handle_rendezvous: invalid vcpuid %d", vcpuid)); mtx_lock(&vm->rendezvous_mtx); while (vm->rendezvous_func != NULL) { /* 'rendezvous_req_cpus' must be a subset of 'active_cpus' */ CPU_AND(&vm->rendezvous_req_cpus, &vm->active_cpus); if (vcpuid != -1 && CPU_ISSET(vcpuid, &vm->rendezvous_req_cpus) && !CPU_ISSET(vcpuid, &vm->rendezvous_done_cpus)) { VCPU_CTR0(vm, vcpuid, "Calling rendezvous func"); (*vm->rendezvous_func)(vm, vcpuid, vm->rendezvous_arg); CPU_SET(vcpuid, &vm->rendezvous_done_cpus); } if (CPU_CMP(&vm->rendezvous_req_cpus, &vm->rendezvous_done_cpus) == 0) { VCPU_CTR0(vm, vcpuid, "Rendezvous completed"); vm_set_rendezvous_func(vm, NULL); wakeup(&vm->rendezvous_func); break; } RENDEZVOUS_CTR0(vm, vcpuid, "Wait for rendezvous completion"); mtx_sleep(&vm->rendezvous_func, &vm->rendezvous_mtx, 0, "vmrndv", 0); } mtx_unlock(&vm->rendezvous_mtx); } /* * Emulate a guest 'hlt' by sleeping until the vcpu is ready to run. */ static int vm_handle_hlt(struct vm *vm, int vcpuid, bool intr_disabled, bool *retu) { struct vcpu *vcpu; const char *wmesg; int error, t, vcpu_halted, vm_halted; KASSERT(!CPU_ISSET(vcpuid, &vm->halted_cpus), ("vcpu already halted")); vcpu = &vm->vcpu[vcpuid]; vcpu_halted = 0; vm_halted = 0; /* * The typical way to halt a cpu is to execute: "sti; hlt" * * STI sets RFLAGS.IF to enable interrupts. However, the processor * remains in an "interrupt shadow" for an additional instruction * following the STI. This guarantees that "sti; hlt" sequence is * atomic and a pending interrupt will be recognized after the HLT. * * After the HLT emulation is done the vcpu is no longer in an * interrupt shadow and a pending interrupt can be injected on * the next entry into the guest. */ error = vm_set_register(vm, vcpuid, VM_REG_GUEST_INTR_SHADOW, 0); KASSERT(error == 0, ("%s: error %d clearing interrupt shadow", __func__, error)); vcpu_lock(vcpu); while (1) { /* * Do a final check for pending NMI or interrupts before * really putting this thread to sleep. Also check for * software events that would cause this vcpu to wakeup. * * These interrupts/events could have happened after the * vcpu returned from VMRUN() and before it acquired the * vcpu lock above. */ if (vm->rendezvous_func != NULL || vm->suspend) break; if (vm_nmi_pending(vm, vcpuid)) break; if (!intr_disabled) { if (vm_extint_pending(vm, vcpuid) || vlapic_pending_intr(vcpu->vlapic, NULL)) { break; } } /* Don't go to sleep if the vcpu thread needs to yield */ if (vcpu_should_yield(vm, vcpuid)) break; /* * Some Linux guests implement "halt" by having all vcpus * execute HLT with interrupts disabled. 'halted_cpus' keeps * track of the vcpus that have entered this state. When all * vcpus enter the halted state the virtual machine is halted. */ if (intr_disabled) { wmesg = "vmhalt"; VCPU_CTR0(vm, vcpuid, "Halted"); if (!vcpu_halted && halt_detection_enabled) { vcpu_halted = 1; CPU_SET_ATOMIC(vcpuid, &vm->halted_cpus); } if (CPU_CMP(&vm->halted_cpus, &vm->active_cpus) == 0) { vm_halted = 1; break; } } else { wmesg = "vmidle"; } t = ticks; vcpu_require_state_locked(vcpu, VCPU_SLEEPING); /* * XXX msleep_spin() cannot be interrupted by signals so * wake up periodically to check pending signals. */ msleep_spin(vcpu, &vcpu->mtx, wmesg, hz); vcpu_require_state_locked(vcpu, VCPU_FROZEN); vmm_stat_incr(vm, vcpuid, VCPU_IDLE_TICKS, ticks - t); } if (vcpu_halted) CPU_CLR_ATOMIC(vcpuid, &vm->halted_cpus); vcpu_unlock(vcpu); if (vm_halted) vm_suspend(vm, VM_SUSPEND_HALT); return (0); } static int vm_handle_paging(struct vm *vm, int vcpuid, bool *retu) { int rv, ftype; struct vm_map *map; struct vcpu *vcpu; struct vm_exit *vme; vcpu = &vm->vcpu[vcpuid]; vme = &vcpu->exitinfo; ftype = vme->u.paging.fault_type; KASSERT(ftype == VM_PROT_READ || ftype == VM_PROT_WRITE || ftype == VM_PROT_EXECUTE, ("vm_handle_paging: invalid fault_type %d", ftype)); if (ftype == VM_PROT_READ || ftype == VM_PROT_WRITE) { rv = pmap_emulate_accessed_dirty(vmspace_pmap(vm->vmspace), vme->u.paging.gpa, ftype); if (rv == 0) { VCPU_CTR2(vm, vcpuid, "%s bit emulation for gpa %#lx", ftype == VM_PROT_READ ? "accessed" : "dirty", vme->u.paging.gpa); goto done; } } map = &vm->vmspace->vm_map; rv = vm_fault(map, vme->u.paging.gpa, ftype, VM_FAULT_NORMAL); VCPU_CTR3(vm, vcpuid, "vm_handle_paging rv = %d, gpa = %#lx, " "ftype = %d", rv, vme->u.paging.gpa, ftype); if (rv != KERN_SUCCESS) return (EFAULT); done: /* restart execution at the faulting instruction */ vme->inst_length = 0; return (0); } static int vm_handle_inst_emul(struct vm *vm, int vcpuid, bool *retu) { struct vie *vie; struct vcpu *vcpu; struct vm_exit *vme; uint64_t gla, gpa; struct vm_guest_paging *paging; mem_region_read_t mread; mem_region_write_t mwrite; enum vm_cpu_mode cpu_mode; int cs_d, error, length; vcpu = &vm->vcpu[vcpuid]; vme = &vcpu->exitinfo; gla = vme->u.inst_emul.gla; gpa = vme->u.inst_emul.gpa; cs_d = vme->u.inst_emul.cs_d; vie = &vme->u.inst_emul.vie; paging = &vme->u.inst_emul.paging; cpu_mode = paging->cpu_mode; VCPU_CTR1(vm, vcpuid, "inst_emul fault accessing gpa %#lx", gpa); /* Fetch, decode and emulate the faulting instruction */ if (vie->num_valid == 0) { /* * If the instruction length is not known then assume a * maximum size instruction. */ length = vme->inst_length ? vme->inst_length : VIE_INST_SIZE; error = vmm_fetch_instruction(vm, vcpuid, paging, vme->rip, length, vie); } else { /* * The instruction bytes have already been copied into 'vie' */ error = 0; } if (error == 1) return (0); /* Resume guest to handle page fault */ else if (error == -1) return (EFAULT); else if (error != 0) panic("%s: vmm_fetch_instruction error %d", __func__, error); if (vmm_decode_instruction(vm, vcpuid, gla, cpu_mode, cs_d, vie) != 0) return (EFAULT); /* * If the instruction length is not specified the update it now. */ if (vme->inst_length == 0) vme->inst_length = vie->num_processed; /* return to userland unless this is an in-kernel emulated device */ if (gpa >= DEFAULT_APIC_BASE && gpa < DEFAULT_APIC_BASE + PAGE_SIZE) { mread = lapic_mmio_read; mwrite = lapic_mmio_write; } else if (gpa >= VIOAPIC_BASE && gpa < VIOAPIC_BASE + VIOAPIC_SIZE) { mread = vioapic_mmio_read; mwrite = vioapic_mmio_write; } else if (gpa >= VHPET_BASE && gpa < VHPET_BASE + VHPET_SIZE) { mread = vhpet_mmio_read; mwrite = vhpet_mmio_write; } else { *retu = true; return (0); } error = vmm_emulate_instruction(vm, vcpuid, gpa, vie, paging, mread, mwrite, retu); return (error); } static int vm_handle_suspend(struct vm *vm, int vcpuid, bool *retu) { int i, done; struct vcpu *vcpu; done = 0; vcpu = &vm->vcpu[vcpuid]; CPU_SET_ATOMIC(vcpuid, &vm->suspended_cpus); /* * Wait until all 'active_cpus' have suspended themselves. * * Since a VM may be suspended at any time including when one or * more vcpus are doing a rendezvous we need to call the rendezvous * handler while we are waiting to prevent a deadlock. */ vcpu_lock(vcpu); while (1) { if (CPU_CMP(&vm->suspended_cpus, &vm->active_cpus) == 0) { VCPU_CTR0(vm, vcpuid, "All vcpus suspended"); break; } if (vm->rendezvous_func == NULL) { VCPU_CTR0(vm, vcpuid, "Sleeping during suspend"); vcpu_require_state_locked(vcpu, VCPU_SLEEPING); msleep_spin(vcpu, &vcpu->mtx, "vmsusp", hz); vcpu_require_state_locked(vcpu, VCPU_FROZEN); } else { VCPU_CTR0(vm, vcpuid, "Rendezvous during suspend"); vcpu_unlock(vcpu); vm_handle_rendezvous(vm, vcpuid); vcpu_lock(vcpu); } } vcpu_unlock(vcpu); /* * Wakeup the other sleeping vcpus and return to userspace. */ for (i = 0; i < VM_MAXCPU; i++) { if (CPU_ISSET(i, &vm->suspended_cpus)) { vcpu_notify_event(vm, i, false); } } *retu = true; return (0); } int vm_suspend(struct vm *vm, enum vm_suspend_how how) { int i; if (how <= VM_SUSPEND_NONE || how >= VM_SUSPEND_LAST) return (EINVAL); if (atomic_cmpset_int(&vm->suspend, 0, how) == 0) { VM_CTR2(vm, "virtual machine already suspended %d/%d", vm->suspend, how); return (EALREADY); } VM_CTR1(vm, "virtual machine successfully suspended %d", how); /* * Notify all active vcpus that they are now suspended. */ for (i = 0; i < VM_MAXCPU; i++) { if (CPU_ISSET(i, &vm->active_cpus)) vcpu_notify_event(vm, i, false); } return (0); } void vm_exit_suspended(struct vm *vm, int vcpuid, uint64_t rip) { struct vm_exit *vmexit; KASSERT(vm->suspend > VM_SUSPEND_NONE && vm->suspend < VM_SUSPEND_LAST, ("vm_exit_suspended: invalid suspend type %d", vm->suspend)); vmexit = vm_exitinfo(vm, vcpuid); vmexit->rip = rip; vmexit->inst_length = 0; vmexit->exitcode = VM_EXITCODE_SUSPENDED; vmexit->u.suspended.how = vm->suspend; } void vm_exit_rendezvous(struct vm *vm, int vcpuid, uint64_t rip) { struct vm_exit *vmexit; KASSERT(vm->rendezvous_func != NULL, ("rendezvous not in progress")); vmexit = vm_exitinfo(vm, vcpuid); vmexit->rip = rip; vmexit->inst_length = 0; vmexit->exitcode = VM_EXITCODE_RENDEZVOUS; vmm_stat_incr(vm, vcpuid, VMEXIT_RENDEZVOUS, 1); } void vm_exit_astpending(struct vm *vm, int vcpuid, uint64_t rip) { struct vm_exit *vmexit; vmexit = vm_exitinfo(vm, vcpuid); vmexit->rip = rip; vmexit->inst_length = 0; vmexit->exitcode = VM_EXITCODE_BOGUS; vmm_stat_incr(vm, vcpuid, VMEXIT_ASTPENDING, 1); } int vm_run(struct vm *vm, struct vm_run *vmrun) { int error, vcpuid; struct vcpu *vcpu; struct pcb *pcb; uint64_t tscval, rip; struct vm_exit *vme; bool retu, intr_disabled; pmap_t pmap; void *rptr, *sptr; vcpuid = vmrun->cpuid; if (vcpuid < 0 || vcpuid >= VM_MAXCPU) return (EINVAL); if (!CPU_ISSET(vcpuid, &vm->active_cpus)) return (EINVAL); if (CPU_ISSET(vcpuid, &vm->suspended_cpus)) return (EINVAL); rptr = &vm->rendezvous_func; sptr = &vm->suspend; pmap = vmspace_pmap(vm->vmspace); vcpu = &vm->vcpu[vcpuid]; vme = &vcpu->exitinfo; rip = vmrun->rip; restart: critical_enter(); KASSERT(!CPU_ISSET(curcpu, &pmap->pm_active), ("vm_run: absurd pm_active")); tscval = rdtsc(); pcb = PCPU_GET(curpcb); set_pcb_flags(pcb, PCB_FULL_IRET); restore_guest_fpustate(vcpu); vcpu_require_state(vm, vcpuid, VCPU_RUNNING); error = VMRUN(vm->cookie, vcpuid, rip, pmap, rptr, sptr); vcpu_require_state(vm, vcpuid, VCPU_FROZEN); save_guest_fpustate(vcpu); vmm_stat_incr(vm, vcpuid, VCPU_TOTAL_RUNTIME, rdtsc() - tscval); critical_exit(); if (error == 0) { retu = false; switch (vme->exitcode) { case VM_EXITCODE_SUSPENDED: error = vm_handle_suspend(vm, vcpuid, &retu); break; case VM_EXITCODE_IOAPIC_EOI: vioapic_process_eoi(vm, vcpuid, vme->u.ioapic_eoi.vector); break; case VM_EXITCODE_RENDEZVOUS: vm_handle_rendezvous(vm, vcpuid); error = 0; break; case VM_EXITCODE_HLT: intr_disabled = ((vme->u.hlt.rflags & PSL_I) == 0); error = vm_handle_hlt(vm, vcpuid, intr_disabled, &retu); break; case VM_EXITCODE_PAGING: error = vm_handle_paging(vm, vcpuid, &retu); break; case VM_EXITCODE_INST_EMUL: error = vm_handle_inst_emul(vm, vcpuid, &retu); break; case VM_EXITCODE_INOUT: case VM_EXITCODE_INOUT_STR: error = vm_handle_inout(vm, vcpuid, vme, &retu); break; case VM_EXITCODE_MONITOR: case VM_EXITCODE_MWAIT: vm_inject_ud(vm, vcpuid); break; default: retu = true; /* handled in userland */ break; } } if (error == 0 && retu == false) { rip = vme->rip + vme->inst_length; goto restart; } /* copy the exit information */ bcopy(vme, &vmrun->vm_exit, sizeof(struct vm_exit)); return (error); } int vm_exit_intinfo(struct vm *vm, int vcpuid, uint64_t info) { struct vcpu *vcpu; int type, vector; if (vcpuid < 0 || vcpuid >= VM_MAXCPU) return (EINVAL); vcpu = &vm->vcpu[vcpuid]; if (info & VM_INTINFO_VALID) { type = info & VM_INTINFO_TYPE; vector = info & 0xff; if (type == VM_INTINFO_NMI && vector != IDT_NMI) return (EINVAL); if (type == VM_INTINFO_HWEXCEPTION && vector >= 32) return (EINVAL); if (info & VM_INTINFO_RSVD) return (EINVAL); } else { info = 0; } VCPU_CTR2(vm, vcpuid, "%s: info1(%#lx)", __func__, info); vcpu->exitintinfo = info; return (0); } enum exc_class { EXC_BENIGN, EXC_CONTRIBUTORY, EXC_PAGEFAULT }; #define IDT_VE 20 /* Virtualization Exception (Intel specific) */ static enum exc_class exception_class(uint64_t info) { int type, vector; KASSERT(info & VM_INTINFO_VALID, ("intinfo must be valid: %#lx", info)); type = info & VM_INTINFO_TYPE; vector = info & 0xff; /* Table 6-4, "Interrupt and Exception Classes", Intel SDM, Vol 3 */ switch (type) { case VM_INTINFO_HWINTR: case VM_INTINFO_SWINTR: case VM_INTINFO_NMI: return (EXC_BENIGN); default: /* * Hardware exception. * * SVM and VT-x use identical type values to represent NMI, * hardware interrupt and software interrupt. * * SVM uses type '3' for all exceptions. VT-x uses type '3' * for exceptions except #BP and #OF. #BP and #OF use a type * value of '5' or '6'. Therefore we don't check for explicit * values of 'type' to classify 'intinfo' into a hardware * exception. */ break; } switch (vector) { case IDT_PF: case IDT_VE: return (EXC_PAGEFAULT); case IDT_DE: case IDT_TS: case IDT_NP: case IDT_SS: case IDT_GP: return (EXC_CONTRIBUTORY); default: return (EXC_BENIGN); } } static int nested_fault(struct vm *vm, int vcpuid, uint64_t info1, uint64_t info2, uint64_t *retinfo) { enum exc_class exc1, exc2; int type1, vector1; KASSERT(info1 & VM_INTINFO_VALID, ("info1 %#lx is not valid", info1)); KASSERT(info2 & VM_INTINFO_VALID, ("info2 %#lx is not valid", info2)); /* * If an exception occurs while attempting to call the double-fault * handler the processor enters shutdown mode (aka triple fault). */ type1 = info1 & VM_INTINFO_TYPE; vector1 = info1 & 0xff; if (type1 == VM_INTINFO_HWEXCEPTION && vector1 == IDT_DF) { VCPU_CTR2(vm, vcpuid, "triple fault: info1(%#lx), info2(%#lx)", info1, info2); vm_suspend(vm, VM_SUSPEND_TRIPLEFAULT); *retinfo = 0; return (0); } /* * Table 6-5 "Conditions for Generating a Double Fault", Intel SDM, Vol3 */ exc1 = exception_class(info1); exc2 = exception_class(info2); if ((exc1 == EXC_CONTRIBUTORY && exc2 == EXC_CONTRIBUTORY) || (exc1 == EXC_PAGEFAULT && exc2 != EXC_BENIGN)) { /* Convert nested fault into a double fault. */ *retinfo = IDT_DF; *retinfo |= VM_INTINFO_VALID | VM_INTINFO_HWEXCEPTION; *retinfo |= VM_INTINFO_DEL_ERRCODE; } else { /* Handle exceptions serially */ *retinfo = info2; } return (1); } static uint64_t vcpu_exception_intinfo(struct vcpu *vcpu) { uint64_t info = 0; if (vcpu->exception_pending) { info = vcpu->exception.vector & 0xff; info |= VM_INTINFO_VALID | VM_INTINFO_HWEXCEPTION; if (vcpu->exception.error_code_valid) { info |= VM_INTINFO_DEL_ERRCODE; info |= (uint64_t)vcpu->exception.error_code << 32; } } return (info); } int vm_entry_intinfo(struct vm *vm, int vcpuid, uint64_t *retinfo) { struct vcpu *vcpu; uint64_t info1, info2; int valid; KASSERT(vcpuid >= 0 && vcpuid < VM_MAXCPU, ("invalid vcpu %d", vcpuid)); vcpu = &vm->vcpu[vcpuid]; info1 = vcpu->exitintinfo; vcpu->exitintinfo = 0; info2 = 0; if (vcpu->exception_pending) { info2 = vcpu_exception_intinfo(vcpu); vcpu->exception_pending = 0; VCPU_CTR2(vm, vcpuid, "Exception %d delivered: %#lx", vcpu->exception.vector, info2); } if ((info1 & VM_INTINFO_VALID) && (info2 & VM_INTINFO_VALID)) { valid = nested_fault(vm, vcpuid, info1, info2, retinfo); } else if (info1 & VM_INTINFO_VALID) { *retinfo = info1; valid = 1; } else if (info2 & VM_INTINFO_VALID) { *retinfo = info2; valid = 1; } else { valid = 0; } if (valid) { VCPU_CTR4(vm, vcpuid, "%s: info1(%#lx), info2(%#lx), " "retinfo(%#lx)", __func__, info1, info2, *retinfo); } return (valid); } int vm_get_intinfo(struct vm *vm, int vcpuid, uint64_t *info1, uint64_t *info2) { struct vcpu *vcpu; if (vcpuid < 0 || vcpuid >= VM_MAXCPU) return (EINVAL); vcpu = &vm->vcpu[vcpuid]; *info1 = vcpu->exitintinfo; *info2 = vcpu_exception_intinfo(vcpu); return (0); } int vm_inject_exception(struct vm *vm, int vcpuid, struct vm_exception *exception) { struct vcpu *vcpu; if (vcpuid < 0 || vcpuid >= VM_MAXCPU) return (EINVAL); if (exception->vector < 0 || exception->vector >= 32) return (EINVAL); /* * A double fault exception should never be injected directly into * the guest. It is a derived exception that results from specific * combinations of nested faults. */ if (exception->vector == IDT_DF) return (EINVAL); vcpu = &vm->vcpu[vcpuid]; if (vcpu->exception_pending) { VCPU_CTR2(vm, vcpuid, "Unable to inject exception %d due to " "pending exception %d", exception->vector, vcpu->exception.vector); return (EBUSY); } vcpu->exception_pending = 1; vcpu->exception = *exception; VCPU_CTR1(vm, vcpuid, "Exception %d pending", exception->vector); return (0); } void vm_inject_fault(void *vmarg, int vcpuid, int vector, int errcode_valid, int errcode) { struct vm_exception exception; struct vm_exit *vmexit; struct vm *vm; int error; vm = vmarg; exception.vector = vector; exception.error_code = errcode; exception.error_code_valid = errcode_valid; error = vm_inject_exception(vm, vcpuid, &exception); KASSERT(error == 0, ("vm_inject_exception error %d", error)); /* * A fault-like exception allows the instruction to be restarted * after the exception handler returns. * * By setting the inst_length to 0 we ensure that the instruction * pointer remains at the faulting instruction. */ vmexit = vm_exitinfo(vm, vcpuid); vmexit->inst_length = 0; } void vm_inject_pf(void *vmarg, int vcpuid, int error_code, uint64_t cr2) { struct vm *vm; int error; vm = vmarg; VCPU_CTR2(vm, vcpuid, "Injecting page fault: error_code %#x, cr2 %#lx", error_code, cr2); error = vm_set_register(vm, vcpuid, VM_REG_GUEST_CR2, cr2); KASSERT(error == 0, ("vm_set_register(cr2) error %d", error)); vm_inject_fault(vm, vcpuid, IDT_PF, 1, error_code); } static VMM_STAT(VCPU_NMI_COUNT, "number of NMIs delivered to vcpu"); int vm_inject_nmi(struct vm *vm, int vcpuid) { struct vcpu *vcpu; if (vcpuid < 0 || vcpuid >= VM_MAXCPU) return (EINVAL); vcpu = &vm->vcpu[vcpuid]; vcpu->nmi_pending = 1; vcpu_notify_event(vm, vcpuid, false); return (0); } int vm_nmi_pending(struct vm *vm, int vcpuid) { struct vcpu *vcpu; if (vcpuid < 0 || vcpuid >= VM_MAXCPU) panic("vm_nmi_pending: invalid vcpuid %d", vcpuid); vcpu = &vm->vcpu[vcpuid]; return (vcpu->nmi_pending); } void vm_nmi_clear(struct vm *vm, int vcpuid) { struct vcpu *vcpu; if (vcpuid < 0 || vcpuid >= VM_MAXCPU) panic("vm_nmi_pending: invalid vcpuid %d", vcpuid); vcpu = &vm->vcpu[vcpuid]; if (vcpu->nmi_pending == 0) panic("vm_nmi_clear: inconsistent nmi_pending state"); vcpu->nmi_pending = 0; vmm_stat_incr(vm, vcpuid, VCPU_NMI_COUNT, 1); } static VMM_STAT(VCPU_EXTINT_COUNT, "number of ExtINTs delivered to vcpu"); int vm_inject_extint(struct vm *vm, int vcpuid) { struct vcpu *vcpu; if (vcpuid < 0 || vcpuid >= VM_MAXCPU) return (EINVAL); vcpu = &vm->vcpu[vcpuid]; vcpu->extint_pending = 1; vcpu_notify_event(vm, vcpuid, false); return (0); } int vm_extint_pending(struct vm *vm, int vcpuid) { struct vcpu *vcpu; if (vcpuid < 0 || vcpuid >= VM_MAXCPU) panic("vm_extint_pending: invalid vcpuid %d", vcpuid); vcpu = &vm->vcpu[vcpuid]; return (vcpu->extint_pending); } void vm_extint_clear(struct vm *vm, int vcpuid) { struct vcpu *vcpu; if (vcpuid < 0 || vcpuid >= VM_MAXCPU) panic("vm_extint_pending: invalid vcpuid %d", vcpuid); vcpu = &vm->vcpu[vcpuid]; if (vcpu->extint_pending == 0) panic("vm_extint_clear: inconsistent extint_pending state"); vcpu->extint_pending = 0; vmm_stat_incr(vm, vcpuid, VCPU_EXTINT_COUNT, 1); } int vm_get_capability(struct vm *vm, int vcpu, int type, int *retval) { if (vcpu < 0 || vcpu >= VM_MAXCPU) return (EINVAL); if (type < 0 || type >= VM_CAP_MAX) return (EINVAL); return (VMGETCAP(vm->cookie, vcpu, type, retval)); } int vm_set_capability(struct vm *vm, int vcpu, int type, int val) { if (vcpu < 0 || vcpu >= VM_MAXCPU) return (EINVAL); if (type < 0 || type >= VM_CAP_MAX) return (EINVAL); return (VMSETCAP(vm->cookie, vcpu, type, val)); } struct vlapic * vm_lapic(struct vm *vm, int cpu) { return (vm->vcpu[cpu].vlapic); } struct vioapic * vm_ioapic(struct vm *vm) { return (vm->vioapic); } struct vhpet * vm_hpet(struct vm *vm) { return (vm->vhpet); } boolean_t vmm_is_pptdev(int bus, int slot, int func) { int found, i, n; int b, s, f; char *val, *cp, *cp2; /* * XXX * The length of an environment variable is limited to 128 bytes which * puts an upper limit on the number of passthru devices that may be * specified using a single environment variable. * * Work around this by scanning multiple environment variable * names instead of a single one - yuck! */ const char *names[] = { "pptdevs", "pptdevs2", "pptdevs3", NULL }; /* set pptdevs="1/2/3 4/5/6 7/8/9 10/11/12" */ found = 0; for (i = 0; names[i] != NULL && !found; i++) { cp = val = kern_getenv(names[i]); while (cp != NULL && *cp != '\0') { if ((cp2 = strchr(cp, ' ')) != NULL) *cp2 = '\0'; n = sscanf(cp, "%d/%d/%d", &b, &s, &f); if (n == 3 && bus == b && slot == s && func == f) { found = 1; break; } if (cp2 != NULL) *cp2++ = ' '; cp = cp2; } freeenv(val); } return (found); } void * vm_iommu_domain(struct vm *vm) { return (vm->iommu); } int vcpu_set_state(struct vm *vm, int vcpuid, enum vcpu_state newstate, bool from_idle) { int error; struct vcpu *vcpu; if (vcpuid < 0 || vcpuid >= VM_MAXCPU) panic("vm_set_run_state: invalid vcpuid %d", vcpuid); vcpu = &vm->vcpu[vcpuid]; vcpu_lock(vcpu); error = vcpu_set_state_locked(vcpu, newstate, from_idle); vcpu_unlock(vcpu); return (error); } enum vcpu_state vcpu_get_state(struct vm *vm, int vcpuid, int *hostcpu) { struct vcpu *vcpu; enum vcpu_state state; if (vcpuid < 0 || vcpuid >= VM_MAXCPU) panic("vm_get_run_state: invalid vcpuid %d", vcpuid); vcpu = &vm->vcpu[vcpuid]; vcpu_lock(vcpu); state = vcpu->state; if (hostcpu != NULL) *hostcpu = vcpu->hostcpu; vcpu_unlock(vcpu); return (state); } int vm_activate_cpu(struct vm *vm, int vcpuid) { if (vcpuid < 0 || vcpuid >= VM_MAXCPU) return (EINVAL); if (CPU_ISSET(vcpuid, &vm->active_cpus)) return (EBUSY); VCPU_CTR0(vm, vcpuid, "activated"); CPU_SET_ATOMIC(vcpuid, &vm->active_cpus); return (0); } cpuset_t vm_active_cpus(struct vm *vm) { return (vm->active_cpus); } cpuset_t vm_suspended_cpus(struct vm *vm) { return (vm->suspended_cpus); } void * vcpu_stats(struct vm *vm, int vcpuid) { return (vm->vcpu[vcpuid].stats); } int vm_get_x2apic_state(struct vm *vm, int vcpuid, enum x2apic_state *state) { if (vcpuid < 0 || vcpuid >= VM_MAXCPU) return (EINVAL); *state = vm->vcpu[vcpuid].x2apic_state; return (0); } int vm_set_x2apic_state(struct vm *vm, int vcpuid, enum x2apic_state state) { if (vcpuid < 0 || vcpuid >= VM_MAXCPU) return (EINVAL); if (state >= X2APIC_STATE_LAST) return (EINVAL); vm->vcpu[vcpuid].x2apic_state = state; vlapic_set_x2apic_state(vm, vcpuid, state); return (0); } /* * This function is called to ensure that a vcpu "sees" a pending event * as soon as possible: * - If the vcpu thread is sleeping then it is woken up. * - If the vcpu is running on a different host_cpu then an IPI will be directed * to the host_cpu to cause the vcpu to trap into the hypervisor. */ void vcpu_notify_event(struct vm *vm, int vcpuid, bool lapic_intr) { int hostcpu; struct vcpu *vcpu; vcpu = &vm->vcpu[vcpuid]; vcpu_lock(vcpu); hostcpu = vcpu->hostcpu; if (vcpu->state == VCPU_RUNNING) { KASSERT(hostcpu != NOCPU, ("vcpu running on invalid hostcpu")); if (hostcpu != curcpu) { if (lapic_intr) { vlapic_post_intr(vcpu->vlapic, hostcpu, vmm_ipinum); } else { ipi_cpu(hostcpu, vmm_ipinum); } } else { /* * If the 'vcpu' is running on 'curcpu' then it must * be sending a notification to itself (e.g. SELF_IPI). * The pending event will be picked up when the vcpu * transitions back to guest context. */ } } else { KASSERT(hostcpu == NOCPU, ("vcpu state %d not consistent " "with hostcpu %d", vcpu->state, hostcpu)); if (vcpu->state == VCPU_SLEEPING) wakeup_one(vcpu); } vcpu_unlock(vcpu); } struct vmspace * vm_get_vmspace(struct vm *vm) { return (vm->vmspace); } int vm_apicid2vcpuid(struct vm *vm, int apicid) { /* * XXX apic id is assumed to be numerically identical to vcpu id */ return (apicid); } void vm_smp_rendezvous(struct vm *vm, int vcpuid, cpuset_t dest, vm_rendezvous_func_t func, void *arg) { int i; /* * Enforce that this function is called without any locks */ WITNESS_WARN(WARN_PANIC, NULL, "vm_smp_rendezvous"); KASSERT(vcpuid == -1 || (vcpuid >= 0 && vcpuid < VM_MAXCPU), ("vm_smp_rendezvous: invalid vcpuid %d", vcpuid)); restart: mtx_lock(&vm->rendezvous_mtx); if (vm->rendezvous_func != NULL) { /* * If a rendezvous is already in progress then we need to * call the rendezvous handler in case this 'vcpuid' is one * of the targets of the rendezvous. */ RENDEZVOUS_CTR0(vm, vcpuid, "Rendezvous already in progress"); mtx_unlock(&vm->rendezvous_mtx); vm_handle_rendezvous(vm, vcpuid); goto restart; } KASSERT(vm->rendezvous_func == NULL, ("vm_smp_rendezvous: previous " "rendezvous is still in progress")); RENDEZVOUS_CTR0(vm, vcpuid, "Initiating rendezvous"); vm->rendezvous_req_cpus = dest; CPU_ZERO(&vm->rendezvous_done_cpus); vm->rendezvous_arg = arg; vm_set_rendezvous_func(vm, func); mtx_unlock(&vm->rendezvous_mtx); /* * Wake up any sleeping vcpus and trigger a VM-exit in any running * vcpus so they handle the rendezvous as soon as possible. */ for (i = 0; i < VM_MAXCPU; i++) { if (CPU_ISSET(i, &dest)) vcpu_notify_event(vm, i, false); } vm_handle_rendezvous(vm, vcpuid); } struct vatpic * vm_atpic(struct vm *vm) { return (vm->vatpic); } struct vatpit * vm_atpit(struct vm *vm) { return (vm->vatpit); } struct vpmtmr * vm_pmtmr(struct vm *vm) { return (vm->vpmtmr); +} + +struct vrtc * +vm_rtc(struct vm *vm) +{ + + return (vm->vrtc); } enum vm_reg_name vm_segment_name(int seg) { static enum vm_reg_name seg_names[] = { VM_REG_GUEST_ES, VM_REG_GUEST_CS, VM_REG_GUEST_SS, VM_REG_GUEST_DS, VM_REG_GUEST_FS, VM_REG_GUEST_GS }; KASSERT(seg >= 0 && seg < nitems(seg_names), ("%s: invalid segment encoding %d", __func__, seg)); return (seg_names[seg]); } void vm_copy_teardown(struct vm *vm, int vcpuid, struct vm_copyinfo *copyinfo, int num_copyinfo) { int idx; for (idx = 0; idx < num_copyinfo; idx++) { if (copyinfo[idx].cookie != NULL) vm_gpa_release(copyinfo[idx].cookie); } bzero(copyinfo, num_copyinfo * sizeof(struct vm_copyinfo)); } int vm_copy_setup(struct vm *vm, int vcpuid, struct vm_guest_paging *paging, uint64_t gla, size_t len, int prot, struct vm_copyinfo *copyinfo, int num_copyinfo) { int error, idx, nused; size_t n, off, remaining; void *hva, *cookie; uint64_t gpa; bzero(copyinfo, sizeof(struct vm_copyinfo) * num_copyinfo); nused = 0; remaining = len; while (remaining > 0) { KASSERT(nused < num_copyinfo, ("insufficient vm_copyinfo")); error = vmm_gla2gpa(vm, vcpuid, paging, gla, prot, &gpa); if (error) return (error); off = gpa & PAGE_MASK; n = min(remaining, PAGE_SIZE - off); copyinfo[nused].gpa = gpa; copyinfo[nused].len = n; remaining -= n; gla += n; nused++; } for (idx = 0; idx < nused; idx++) { hva = vm_gpa_hold(vm, copyinfo[idx].gpa, copyinfo[idx].len, prot, &cookie); if (hva == NULL) break; copyinfo[idx].hva = hva; copyinfo[idx].cookie = cookie; } if (idx != nused) { vm_copy_teardown(vm, vcpuid, copyinfo, num_copyinfo); return (-1); } else { return (0); } } void vm_copyin(struct vm *vm, int vcpuid, struct vm_copyinfo *copyinfo, void *kaddr, size_t len) { char *dst; int idx; dst = kaddr; idx = 0; while (len > 0) { bcopy(copyinfo[idx].hva, dst, copyinfo[idx].len); len -= copyinfo[idx].len; dst += copyinfo[idx].len; idx++; } } void vm_copyout(struct vm *vm, int vcpuid, const void *kaddr, struct vm_copyinfo *copyinfo, size_t len) { const char *src; int idx; src = kaddr; idx = 0; while (len > 0) { bcopy(src, copyinfo[idx].hva, copyinfo[idx].len); len -= copyinfo[idx].len; src += copyinfo[idx].len; idx++; } } /* * Return the amount of in-use and wired memory for the VM. Since * these are global stats, only return the values with for vCPU 0 */ VMM_STAT_DECLARE(VMM_MEM_RESIDENT); VMM_STAT_DECLARE(VMM_MEM_WIRED); static void vm_get_rescnt(struct vm *vm, int vcpu, struct vmm_stat_type *stat) { if (vcpu == 0) { vmm_stat_set(vm, vcpu, VMM_MEM_RESIDENT, PAGE_SIZE * vmspace_resident_count(vm->vmspace)); } } static void vm_get_wiredcnt(struct vm *vm, int vcpu, struct vmm_stat_type *stat) { if (vcpu == 0) { vmm_stat_set(vm, vcpu, VMM_MEM_WIRED, PAGE_SIZE * pmap_wired_count(vmspace_pmap(vm->vmspace))); } } VMM_STAT_FUNC(VMM_MEM_RESIDENT, "Resident memory", vm_get_rescnt); VMM_STAT_FUNC(VMM_MEM_WIRED, "Wired memory", vm_get_wiredcnt); Index: head/sys/amd64/vmm/vmm_dev.c =================================================================== --- head/sys/amd64/vmm/vmm_dev.c (revision 276427) +++ head/sys/amd64/vmm/vmm_dev.c (revision 276428) @@ -1,671 +1,693 @@ /*- * Copyright (c) 2011 NetApp, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "vmm_lapic.h" #include "vmm_stat.h" #include "vmm_mem.h" #include "io/ppt.h" #include "io/vatpic.h" #include "io/vioapic.h" #include "io/vhpet.h" +#include "io/vrtc.h" struct vmmdev_softc { struct vm *vm; /* vm instance cookie */ struct cdev *cdev; SLIST_ENTRY(vmmdev_softc) link; int flags; }; #define VSC_LINKED 0x01 static SLIST_HEAD(, vmmdev_softc) head; static struct mtx vmmdev_mtx; static MALLOC_DEFINE(M_VMMDEV, "vmmdev", "vmmdev"); SYSCTL_DECL(_hw_vmm); static struct vmmdev_softc * vmmdev_lookup(const char *name) { struct vmmdev_softc *sc; #ifdef notyet /* XXX kernel is not compiled with invariants */ mtx_assert(&vmmdev_mtx, MA_OWNED); #endif SLIST_FOREACH(sc, &head, link) { if (strcmp(name, vm_name(sc->vm)) == 0) break; } return (sc); } static struct vmmdev_softc * vmmdev_lookup2(struct cdev *cdev) { return (cdev->si_drv1); } static int vmmdev_rw(struct cdev *cdev, struct uio *uio, int flags) { int error, off, c, prot; vm_paddr_t gpa; void *hpa, *cookie; struct vmmdev_softc *sc; static char zerobuf[PAGE_SIZE]; error = 0; sc = vmmdev_lookup2(cdev); if (sc == NULL) error = ENXIO; prot = (uio->uio_rw == UIO_WRITE ? VM_PROT_WRITE : VM_PROT_READ); while (uio->uio_resid > 0 && error == 0) { gpa = uio->uio_offset; off = gpa & PAGE_MASK; c = min(uio->uio_resid, PAGE_SIZE - off); /* * The VM has a hole in its physical memory map. If we want to * use 'dd' to inspect memory beyond the hole we need to * provide bogus data for memory that lies in the hole. * * Since this device does not support lseek(2), dd(1) will * read(2) blocks of data to simulate the lseek(2). */ hpa = vm_gpa_hold(sc->vm, gpa, c, prot, &cookie); if (hpa == NULL) { if (uio->uio_rw == UIO_READ) error = uiomove(zerobuf, c, uio); else error = EFAULT; } else { error = uiomove(hpa, c, uio); vm_gpa_release(cookie); } } return (error); } static int vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag, struct thread *td) { int error, vcpu, state_changed, size; cpuset_t *cpuset; struct vmmdev_softc *sc; struct vm_memory_segment *seg; struct vm_register *vmreg; struct vm_seg_desc *vmsegdesc; struct vm_run *vmrun; struct vm_exception *vmexc; struct vm_lapic_irq *vmirq; struct vm_lapic_msi *vmmsi; struct vm_ioapic_irq *ioapic_irq; struct vm_isa_irq *isa_irq; struct vm_isa_irq_trigger *isa_irq_trigger; struct vm_capability *vmcap; struct vm_pptdev *pptdev; struct vm_pptdev_mmio *pptmmio; struct vm_pptdev_msi *pptmsi; struct vm_pptdev_msix *pptmsix; struct vm_nmi *vmnmi; struct vm_stats *vmstats; struct vm_stat_desc *statdesc; struct vm_x2apic *x2apic; struct vm_gpa_pte *gpapte; struct vm_suspend *vmsuspend; struct vm_gla2gpa *gg; struct vm_activate_cpu *vac; struct vm_cpuset *vm_cpuset; struct vm_intinfo *vmii; + struct vm_rtc_time *rtctime; + struct vm_rtc_data *rtcdata; sc = vmmdev_lookup2(cdev); if (sc == NULL) return (ENXIO); error = 0; vcpu = -1; state_changed = 0; /* * Some VMM ioctls can operate only on vcpus that are not running. */ switch (cmd) { case VM_RUN: case VM_GET_REGISTER: case VM_SET_REGISTER: case VM_GET_SEGMENT_DESCRIPTOR: case VM_SET_SEGMENT_DESCRIPTOR: case VM_INJECT_EXCEPTION: case VM_GET_CAPABILITY: case VM_SET_CAPABILITY: case VM_PPTDEV_MSI: case VM_PPTDEV_MSIX: case VM_SET_X2APIC_STATE: case VM_GLA2GPA: case VM_ACTIVATE_CPU: case VM_SET_INTINFO: case VM_GET_INTINFO: /* * XXX fragile, handle with care * Assumes that the first field of the ioctl data is the vcpu. */ vcpu = *(int *)data; if (vcpu < 0 || vcpu >= VM_MAXCPU) { error = EINVAL; goto done; } error = vcpu_set_state(sc->vm, vcpu, VCPU_FROZEN, true); if (error) goto done; state_changed = 1; break; case VM_MAP_PPTDEV_MMIO: case VM_BIND_PPTDEV: case VM_UNBIND_PPTDEV: case VM_MAP_MEMORY: case VM_REINIT: /* * ioctls that operate on the entire virtual machine must * prevent all vcpus from running. */ error = 0; for (vcpu = 0; vcpu < VM_MAXCPU; vcpu++) { error = vcpu_set_state(sc->vm, vcpu, VCPU_FROZEN, true); if (error) break; } if (error) { while (--vcpu >= 0) vcpu_set_state(sc->vm, vcpu, VCPU_IDLE, false); goto done; } state_changed = 2; break; default: break; } switch(cmd) { case VM_RUN: vmrun = (struct vm_run *)data; error = vm_run(sc->vm, vmrun); break; case VM_SUSPEND: vmsuspend = (struct vm_suspend *)data; error = vm_suspend(sc->vm, vmsuspend->how); break; case VM_REINIT: error = vm_reinit(sc->vm); break; case VM_STAT_DESC: { statdesc = (struct vm_stat_desc *)data; error = vmm_stat_desc_copy(statdesc->index, statdesc->desc, sizeof(statdesc->desc)); break; } case VM_STATS: { CTASSERT(MAX_VM_STATS >= MAX_VMM_STAT_ELEMS); vmstats = (struct vm_stats *)data; getmicrotime(&vmstats->tv); error = vmm_stat_copy(sc->vm, vmstats->cpuid, &vmstats->num_entries, vmstats->statbuf); break; } case VM_PPTDEV_MSI: pptmsi = (struct vm_pptdev_msi *)data; error = ppt_setup_msi(sc->vm, pptmsi->vcpu, pptmsi->bus, pptmsi->slot, pptmsi->func, pptmsi->addr, pptmsi->msg, pptmsi->numvec); break; case VM_PPTDEV_MSIX: pptmsix = (struct vm_pptdev_msix *)data; error = ppt_setup_msix(sc->vm, pptmsix->vcpu, pptmsix->bus, pptmsix->slot, pptmsix->func, pptmsix->idx, pptmsix->addr, pptmsix->msg, pptmsix->vector_control); break; case VM_MAP_PPTDEV_MMIO: pptmmio = (struct vm_pptdev_mmio *)data; error = ppt_map_mmio(sc->vm, pptmmio->bus, pptmmio->slot, pptmmio->func, pptmmio->gpa, pptmmio->len, pptmmio->hpa); break; case VM_BIND_PPTDEV: pptdev = (struct vm_pptdev *)data; error = vm_assign_pptdev(sc->vm, pptdev->bus, pptdev->slot, pptdev->func); break; case VM_UNBIND_PPTDEV: pptdev = (struct vm_pptdev *)data; error = vm_unassign_pptdev(sc->vm, pptdev->bus, pptdev->slot, pptdev->func); break; case VM_INJECT_EXCEPTION: vmexc = (struct vm_exception *)data; error = vm_inject_exception(sc->vm, vmexc->cpuid, vmexc); break; case VM_INJECT_NMI: vmnmi = (struct vm_nmi *)data; error = vm_inject_nmi(sc->vm, vmnmi->cpuid); break; case VM_LAPIC_IRQ: vmirq = (struct vm_lapic_irq *)data; error = lapic_intr_edge(sc->vm, vmirq->cpuid, vmirq->vector); break; case VM_LAPIC_LOCAL_IRQ: vmirq = (struct vm_lapic_irq *)data; error = lapic_set_local_intr(sc->vm, vmirq->cpuid, vmirq->vector); break; case VM_LAPIC_MSI: vmmsi = (struct vm_lapic_msi *)data; error = lapic_intr_msi(sc->vm, vmmsi->addr, vmmsi->msg); break; case VM_IOAPIC_ASSERT_IRQ: ioapic_irq = (struct vm_ioapic_irq *)data; error = vioapic_assert_irq(sc->vm, ioapic_irq->irq); break; case VM_IOAPIC_DEASSERT_IRQ: ioapic_irq = (struct vm_ioapic_irq *)data; error = vioapic_deassert_irq(sc->vm, ioapic_irq->irq); break; case VM_IOAPIC_PULSE_IRQ: ioapic_irq = (struct vm_ioapic_irq *)data; error = vioapic_pulse_irq(sc->vm, ioapic_irq->irq); break; case VM_IOAPIC_PINCOUNT: *(int *)data = vioapic_pincount(sc->vm); break; case VM_ISA_ASSERT_IRQ: isa_irq = (struct vm_isa_irq *)data; error = vatpic_assert_irq(sc->vm, isa_irq->atpic_irq); if (error == 0 && isa_irq->ioapic_irq != -1) error = vioapic_assert_irq(sc->vm, isa_irq->ioapic_irq); break; case VM_ISA_DEASSERT_IRQ: isa_irq = (struct vm_isa_irq *)data; error = vatpic_deassert_irq(sc->vm, isa_irq->atpic_irq); if (error == 0 && isa_irq->ioapic_irq != -1) error = vioapic_deassert_irq(sc->vm, isa_irq->ioapic_irq); break; case VM_ISA_PULSE_IRQ: isa_irq = (struct vm_isa_irq *)data; error = vatpic_pulse_irq(sc->vm, isa_irq->atpic_irq); if (error == 0 && isa_irq->ioapic_irq != -1) error = vioapic_pulse_irq(sc->vm, isa_irq->ioapic_irq); break; case VM_ISA_SET_IRQ_TRIGGER: isa_irq_trigger = (struct vm_isa_irq_trigger *)data; error = vatpic_set_irq_trigger(sc->vm, isa_irq_trigger->atpic_irq, isa_irq_trigger->trigger); break; case VM_MAP_MEMORY: seg = (struct vm_memory_segment *)data; error = vm_malloc(sc->vm, seg->gpa, seg->len); break; case VM_GET_MEMORY_SEG: seg = (struct vm_memory_segment *)data; seg->len = 0; (void)vm_gpabase2memseg(sc->vm, seg->gpa, seg); error = 0; break; case VM_GET_REGISTER: vmreg = (struct vm_register *)data; error = vm_get_register(sc->vm, vmreg->cpuid, vmreg->regnum, &vmreg->regval); break; case VM_SET_REGISTER: vmreg = (struct vm_register *)data; error = vm_set_register(sc->vm, vmreg->cpuid, vmreg->regnum, vmreg->regval); break; case VM_SET_SEGMENT_DESCRIPTOR: vmsegdesc = (struct vm_seg_desc *)data; error = vm_set_seg_desc(sc->vm, vmsegdesc->cpuid, vmsegdesc->regnum, &vmsegdesc->desc); break; case VM_GET_SEGMENT_DESCRIPTOR: vmsegdesc = (struct vm_seg_desc *)data; error = vm_get_seg_desc(sc->vm, vmsegdesc->cpuid, vmsegdesc->regnum, &vmsegdesc->desc); break; case VM_GET_CAPABILITY: vmcap = (struct vm_capability *)data; error = vm_get_capability(sc->vm, vmcap->cpuid, vmcap->captype, &vmcap->capval); break; case VM_SET_CAPABILITY: vmcap = (struct vm_capability *)data; error = vm_set_capability(sc->vm, vmcap->cpuid, vmcap->captype, vmcap->capval); break; case VM_SET_X2APIC_STATE: x2apic = (struct vm_x2apic *)data; error = vm_set_x2apic_state(sc->vm, x2apic->cpuid, x2apic->state); break; case VM_GET_X2APIC_STATE: x2apic = (struct vm_x2apic *)data; error = vm_get_x2apic_state(sc->vm, x2apic->cpuid, &x2apic->state); break; case VM_GET_GPA_PMAP: gpapte = (struct vm_gpa_pte *)data; pmap_get_mapping(vmspace_pmap(vm_get_vmspace(sc->vm)), gpapte->gpa, gpapte->pte, &gpapte->ptenum); error = 0; break; case VM_GET_HPET_CAPABILITIES: error = vhpet_getcap((struct vm_hpet_cap *)data); break; case VM_GLA2GPA: { CTASSERT(PROT_READ == VM_PROT_READ); CTASSERT(PROT_WRITE == VM_PROT_WRITE); CTASSERT(PROT_EXEC == VM_PROT_EXECUTE); gg = (struct vm_gla2gpa *)data; error = vmm_gla2gpa(sc->vm, gg->vcpuid, &gg->paging, gg->gla, gg->prot, &gg->gpa); KASSERT(error == 0 || error == 1 || error == -1, ("%s: vmm_gla2gpa unknown error %d", __func__, error)); if (error >= 0) { /* * error = 0: the translation was successful * error = 1: a fault was injected into the guest */ gg->fault = error; error = 0; } else { error = EFAULT; } break; } case VM_ACTIVATE_CPU: vac = (struct vm_activate_cpu *)data; error = vm_activate_cpu(sc->vm, vac->vcpuid); break; case VM_GET_CPUS: error = 0; vm_cpuset = (struct vm_cpuset *)data; size = vm_cpuset->cpusetsize; if (size < sizeof(cpuset_t) || size > CPU_MAXSIZE / NBBY) { error = ERANGE; break; } cpuset = malloc(size, M_TEMP, M_WAITOK | M_ZERO); if (vm_cpuset->which == VM_ACTIVE_CPUS) *cpuset = vm_active_cpus(sc->vm); else if (vm_cpuset->which == VM_SUSPENDED_CPUS) *cpuset = vm_suspended_cpus(sc->vm); else error = EINVAL; if (error == 0) error = copyout(cpuset, vm_cpuset->cpus, size); free(cpuset, M_TEMP); break; case VM_SET_INTINFO: vmii = (struct vm_intinfo *)data; error = vm_exit_intinfo(sc->vm, vmii->vcpuid, vmii->info1); break; case VM_GET_INTINFO: vmii = (struct vm_intinfo *)data; error = vm_get_intinfo(sc->vm, vmii->vcpuid, &vmii->info1, &vmii->info2); + break; + case VM_RTC_WRITE: + rtcdata = (struct vm_rtc_data *)data; + error = vrtc_nvram_write(sc->vm, rtcdata->offset, + rtcdata->value); + break; + case VM_RTC_READ: + rtcdata = (struct vm_rtc_data *)data; + error = vrtc_nvram_read(sc->vm, rtcdata->offset, + &rtcdata->value); + break; + case VM_RTC_SETTIME: + rtctime = (struct vm_rtc_time *)data; + error = vrtc_set_time(sc->vm, rtctime->secs); + break; + case VM_RTC_GETTIME: + error = 0; + rtctime = (struct vm_rtc_time *)data; + rtctime->secs = vrtc_get_time(sc->vm); break; default: error = ENOTTY; break; } if (state_changed == 1) { vcpu_set_state(sc->vm, vcpu, VCPU_IDLE, false); } else if (state_changed == 2) { for (vcpu = 0; vcpu < VM_MAXCPU; vcpu++) vcpu_set_state(sc->vm, vcpu, VCPU_IDLE, false); } done: /* Make sure that no handler returns a bogus value like ERESTART */ KASSERT(error >= 0, ("vmmdev_ioctl: invalid error return %d", error)); return (error); } static int vmmdev_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t size, struct vm_object **object, int nprot) { int error; struct vmmdev_softc *sc; sc = vmmdev_lookup2(cdev); if (sc != NULL && (nprot & PROT_EXEC) == 0) error = vm_get_memobj(sc->vm, *offset, size, offset, object); else error = EINVAL; return (error); } static void vmmdev_destroy(void *arg) { struct vmmdev_softc *sc = arg; if (sc->cdev != NULL) destroy_dev(sc->cdev); if (sc->vm != NULL) vm_destroy(sc->vm); if ((sc->flags & VSC_LINKED) != 0) { mtx_lock(&vmmdev_mtx); SLIST_REMOVE(&head, sc, vmmdev_softc, link); mtx_unlock(&vmmdev_mtx); } free(sc, M_VMMDEV); } static int sysctl_vmm_destroy(SYSCTL_HANDLER_ARGS) { int error; char buf[VM_MAX_NAMELEN]; struct vmmdev_softc *sc; struct cdev *cdev; strlcpy(buf, "beavis", sizeof(buf)); error = sysctl_handle_string(oidp, buf, sizeof(buf), req); if (error != 0 || req->newptr == NULL) return (error); mtx_lock(&vmmdev_mtx); sc = vmmdev_lookup(buf); if (sc == NULL || sc->cdev == NULL) { mtx_unlock(&vmmdev_mtx); return (EINVAL); } /* * The 'cdev' will be destroyed asynchronously when 'si_threadcount' * goes down to 0 so we should not do it again in the callback. */ cdev = sc->cdev; sc->cdev = NULL; mtx_unlock(&vmmdev_mtx); /* * Schedule the 'cdev' to be destroyed: * * - any new operations on this 'cdev' will return an error (ENXIO). * * - when the 'si_threadcount' dwindles down to zero the 'cdev' will * be destroyed and the callback will be invoked in a taskqueue * context. */ destroy_dev_sched_cb(cdev, vmmdev_destroy, sc); return (0); } SYSCTL_PROC(_hw_vmm, OID_AUTO, destroy, CTLTYPE_STRING | CTLFLAG_RW, NULL, 0, sysctl_vmm_destroy, "A", NULL); static struct cdevsw vmmdevsw = { .d_name = "vmmdev", .d_version = D_VERSION, .d_ioctl = vmmdev_ioctl, .d_mmap_single = vmmdev_mmap_single, .d_read = vmmdev_rw, .d_write = vmmdev_rw, }; static int sysctl_vmm_create(SYSCTL_HANDLER_ARGS) { int error; struct vm *vm; struct cdev *cdev; struct vmmdev_softc *sc, *sc2; char buf[VM_MAX_NAMELEN]; strlcpy(buf, "beavis", sizeof(buf)); error = sysctl_handle_string(oidp, buf, sizeof(buf), req); if (error != 0 || req->newptr == NULL) return (error); mtx_lock(&vmmdev_mtx); sc = vmmdev_lookup(buf); mtx_unlock(&vmmdev_mtx); if (sc != NULL) return (EEXIST); error = vm_create(buf, &vm); if (error != 0) return (error); sc = malloc(sizeof(struct vmmdev_softc), M_VMMDEV, M_WAITOK | M_ZERO); sc->vm = vm; /* * Lookup the name again just in case somebody sneaked in when we * dropped the lock. */ mtx_lock(&vmmdev_mtx); sc2 = vmmdev_lookup(buf); if (sc2 == NULL) { SLIST_INSERT_HEAD(&head, sc, link); sc->flags |= VSC_LINKED; } mtx_unlock(&vmmdev_mtx); if (sc2 != NULL) { vmmdev_destroy(sc); return (EEXIST); } error = make_dev_p(MAKEDEV_CHECKNAME, &cdev, &vmmdevsw, NULL, UID_ROOT, GID_WHEEL, 0600, "vmm/%s", buf); if (error != 0) { vmmdev_destroy(sc); return (error); } mtx_lock(&vmmdev_mtx); sc->cdev = cdev; sc->cdev->si_drv1 = sc; mtx_unlock(&vmmdev_mtx); return (0); } SYSCTL_PROC(_hw_vmm, OID_AUTO, create, CTLTYPE_STRING | CTLFLAG_RW, NULL, 0, sysctl_vmm_create, "A", NULL); void vmmdev_init(void) { mtx_init(&vmmdev_mtx, "vmm device mutex", NULL, MTX_DEF); } int vmmdev_cleanup(void) { int error; if (SLIST_EMPTY(&head)) error = 0; else error = EBUSY; return (error); } Index: head/sys/amd64/vmm/vmm_ioport.c =================================================================== --- head/sys/amd64/vmm/vmm_ioport.c (revision 276427) +++ head/sys/amd64/vmm/vmm_ioport.c (revision 276428) @@ -1,179 +1,182 @@ /*- * Copyright (c) 2014 Tycho Nightingale * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include "vatpic.h" #include "vatpit.h" #include "vpmtmr.h" +#include "vrtc.h" #include "vmm_ioport.h" #include "vmm_ktr.h" #define MAX_IOPORTS 1280 ioport_handler_func_t ioport_handler[MAX_IOPORTS] = { [TIMER_MODE] = vatpit_handler, [TIMER_CNTR0] = vatpit_handler, [TIMER_CNTR1] = vatpit_handler, [TIMER_CNTR2] = vatpit_handler, [NMISC_PORT] = vatpit_nmisc_handler, [IO_ICU1] = vatpic_master_handler, [IO_ICU1 + ICU_IMR_OFFSET] = vatpic_master_handler, [IO_ICU2] = vatpic_slave_handler, [IO_ICU2 + ICU_IMR_OFFSET] = vatpic_slave_handler, [IO_ELCR1] = vatpic_elc_handler, [IO_ELCR2] = vatpic_elc_handler, [IO_PMTMR] = vpmtmr_handler, + [IO_RTC] = vrtc_addr_handler, + [IO_RTC + 1] = vrtc_data_handler, }; #ifdef KTR static const char * inout_instruction(struct vm_exit *vmexit) { int index; static const char *iodesc[] = { "outb", "outw", "outl", "inb", "inw", "inl", "outsb", "outsw", "outsd" "insb", "insw", "insd", }; switch (vmexit->u.inout.bytes) { case 1: index = 0; break; case 2: index = 1; break; default: index = 2; break; } if (vmexit->u.inout.in) index += 3; if (vmexit->u.inout.string) index += 6; KASSERT(index < nitems(iodesc), ("%s: invalid index %d", __func__, index)); return (iodesc[index]); } #endif /* KTR */ static int emulate_inout_port(struct vm *vm, int vcpuid, struct vm_exit *vmexit, bool *retu) { ioport_handler_func_t handler; uint32_t mask, val; int error; /* * If there is no handler for the I/O port then punt to userspace. */ if (vmexit->u.inout.port >= MAX_IOPORTS || (handler = ioport_handler[vmexit->u.inout.port]) == NULL) { *retu = true; return (0); } mask = vie_size2mask(vmexit->u.inout.bytes); if (!vmexit->u.inout.in) { val = vmexit->u.inout.eax & mask; } error = (*handler)(vm, vcpuid, vmexit->u.inout.in, vmexit->u.inout.port, vmexit->u.inout.bytes, &val); if (error) { /* * The value returned by this function is also the return value * of vm_run(). This needs to be a positive number otherwise it * can be interpreted as a "pseudo-error" like ERESTART. * * Enforce this by mapping all errors to EIO. */ return (EIO); } if (vmexit->u.inout.in) { vmexit->u.inout.eax &= ~mask; vmexit->u.inout.eax |= val & mask; error = vm_set_register(vm, vcpuid, VM_REG_GUEST_RAX, vmexit->u.inout.eax); KASSERT(error == 0, ("emulate_ioport: error %d setting guest " "rax register", error)); } *retu = false; return (0); } static int emulate_inout_str(struct vm *vm, int vcpuid, struct vm_exit *vmexit, bool *retu) { *retu = true; return (0); /* Return to userspace to finish emulation */ } int vm_handle_inout(struct vm *vm, int vcpuid, struct vm_exit *vmexit, bool *retu) { int bytes, error; bytes = vmexit->u.inout.bytes; KASSERT(bytes == 1 || bytes == 2 || bytes == 4, ("vm_handle_inout: invalid operand size %d", bytes)); if (vmexit->u.inout.string) error = emulate_inout_str(vm, vcpuid, vmexit, retu); else error = emulate_inout_port(vm, vcpuid, vmexit, retu); VCPU_CTR4(vm, vcpuid, "%s%s 0x%04x: %s", vmexit->u.inout.rep ? "rep " : "", inout_instruction(vmexit), vmexit->u.inout.port, error ? "error" : (*retu ? "userspace" : "handled")); return (error); } Index: head/sys/modules/vmm/Makefile =================================================================== --- head/sys/modules/vmm/Makefile (revision 276427) +++ head/sys/modules/vmm/Makefile (revision 276428) @@ -1,80 +1,81 @@ # $FreeBSD$ KMOD= vmm SRCS= opt_acpi.h opt_ddb.h device_if.h bus_if.h pci_if.h SRCS+= vmx_assym.h svm_assym.h DPSRCS= vmx_genassym.c svm_genassym.c CFLAGS+= -DVMM_KEEP_STATS -DSMP CFLAGS+= -I${.CURDIR}/../../amd64/vmm CFLAGS+= -I${.CURDIR}/../../amd64/vmm/io CFLAGS+= -I${.CURDIR}/../../amd64/vmm/intel CFLAGS+= -I${.CURDIR}/../../amd64/vmm/amd # generic vmm support .PATH: ${.CURDIR}/../../amd64/vmm SRCS+= vmm.c \ vmm_dev.c \ vmm_host.c \ vmm_instruction_emul.c \ vmm_ioport.c \ vmm_ipi.c \ vmm_lapic.c \ vmm_mem.c \ vmm_stat.c \ vmm_util.c \ x86.c \ vmm_support.S .PATH: ${.CURDIR}/../../amd64/vmm/io SRCS+= iommu.c \ ppt.c \ vatpic.c \ vatpit.c \ vhpet.c \ vioapic.c \ vlapic.c \ - vpmtmr.c + vpmtmr.c \ + vrtc.c # intel-specific files .PATH: ${.CURDIR}/../../amd64/vmm/intel SRCS+= ept.c \ vmcs.c \ vmx_msr.c \ vmx_support.S \ vmx.c \ vtd.c # amd-specific files .PATH: ${.CURDIR}/../../amd64/vmm/amd SRCS+= vmcb.c \ svm.c \ svm_support.S \ npt.c \ amdv.c \ svm_msr.c CLEANFILES= vmx_assym.h vmx_genassym.o svm_assym.h svm_genassym.o vmx_assym.h: vmx_genassym.o sh ${SYSDIR}/kern/genassym.sh vmx_genassym.o > ${.TARGET} svm_assym.h: svm_genassym.o sh ${SYSDIR}/kern/genassym.sh svm_genassym.o > ${.TARGET} vmx_support.o: ${CC} -c -x assembler-with-cpp -DLOCORE ${CFLAGS} \ ${.IMPSRC} -o ${.TARGET} svm_support.o: ${CC} -c -x assembler-with-cpp -DLOCORE ${CFLAGS} \ ${.IMPSRC} -o ${.TARGET} vmx_genassym.o: ${CC} -c ${CFLAGS:N-fno-common} ${.IMPSRC} svm_genassym.o: ${CC} -c ${CFLAGS:N-fno-common} ${.IMPSRC} .include Index: head/usr.sbin/bhyve/rtc.c =================================================================== --- head/usr.sbin/bhyve/rtc.c (revision 276427) +++ head/usr.sbin/bhyve/rtc.c (revision 276428) @@ -1,382 +1,129 @@ /*- * Copyright (c) 2011 NetApp, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include __FBSDID("$FreeBSD$"); #include -#include -#include -#include #include #include #include #include #include "acpi.h" -#include "inout.h" #include "pci_lpc.h" #include "rtc.h" -#define IO_RTC 0x70 +#define IO_RTC 0x70 -#define RTC_SEC 0x00 /* seconds */ -#define RTC_SEC_ALARM 0x01 -#define RTC_MIN 0x02 -#define RTC_MIN_ALARM 0x03 -#define RTC_HRS 0x04 -#define RTC_HRS_ALARM 0x05 -#define RTC_WDAY 0x06 -#define RTC_DAY 0x07 -#define RTC_MONTH 0x08 -#define RTC_YEAR 0x09 -#define RTC_CENTURY 0x32 /* current century */ - -#define RTC_STATUSA 0xA -#define RTCSA_TUP 0x80 /* time update, don't look now */ - -#define RTC_STATUSB 0xB -#define RTCSB_DST 0x01 -#define RTCSB_24HR 0x02 -#define RTCSB_BIN 0x04 /* 0 = BCD, 1 = Binary */ -#define RTCSB_PINTR 0x40 /* 1 = enable periodic clock interrupt */ -#define RTCSB_HALT 0x80 /* stop clock updates */ - -#define RTC_INTR 0x0c /* status register C (R) interrupt source */ - -#define RTC_STATUSD 0x0d /* status register D (R) Lost Power */ -#define RTCSD_PWR 0x80 /* clock power OK */ - -#define RTC_NVRAM_START 0x0e -#define RTC_NVRAM_END 0x7f -#define RTC_NVRAM_SZ (128 - RTC_NVRAM_START) -#define nvoff(x) ((x) - RTC_NVRAM_START) - -#define RTC_DIAG 0x0e -#define RTC_RSTCODE 0x0f -#define RTC_EQUIPMENT 0x14 #define RTC_LMEM_LSB 0x34 #define RTC_LMEM_MSB 0x35 #define RTC_HMEM_LSB 0x5b #define RTC_HMEM_SB 0x5c #define RTC_HMEM_MSB 0x5d #define m_64KB (64*1024) #define m_16MB (16*1024*1024) #define m_4GB (4ULL*1024*1024*1024) -static int addr; - -static uint8_t rtc_nvram[RTC_NVRAM_SZ]; - -/* XXX initialize these to default values as they would be from BIOS */ -static uint8_t status_a, status_b; - -static struct { - uint8_t hours; - uint8_t mins; - uint8_t secs; -} rtc_alarm; - -static u_char const bin2bcd_data[] = { - 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, - 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, - 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, - 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, - 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, - 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, - 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, - 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, - 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, - 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99 -}; -#define bin2bcd(bin) (bin2bcd_data[bin]) - -#define rtcout(val) ((status_b & RTCSB_BIN) ? (val) : bin2bcd((val))) - -static void -timevalfix(struct timeval *t1) +/* + * Returns the current RTC time as number of seconds since 00:00:00 Jan 1, 1970 + * + * XXX this always returns localtime to maintain compatibility with the + * original device model. + */ +static time_t +rtc_time(struct vmctx *ctx) { - - if (t1->tv_usec < 0) { - t1->tv_sec--; - t1->tv_usec += 1000000; - } - if (t1->tv_usec >= 1000000) { - t1->tv_sec++; - t1->tv_usec -= 1000000; - } -} - -static void -timevalsub(struct timeval *t1, const struct timeval *t2) -{ - - t1->tv_sec -= t2->tv_sec; - t1->tv_usec -= t2->tv_usec; - timevalfix(t1); -} - -static int -rtc_addr_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes, - uint32_t *eax, void *arg) -{ - if (bytes != 1) - return (-1); - - if (in) { - /* straight read of this register will return 0xFF */ - *eax = 0xff; - return (0); - } - - switch (*eax & 0x7f) { - case RTC_SEC: - case RTC_SEC_ALARM: - case RTC_MIN: - case RTC_MIN_ALARM: - case RTC_HRS: - case RTC_HRS_ALARM: - case RTC_WDAY: - case RTC_DAY: - case RTC_MONTH: - case RTC_YEAR: - case RTC_STATUSA: - case RTC_STATUSB: - case RTC_INTR: - case RTC_STATUSD: - case RTC_NVRAM_START ... RTC_NVRAM_END: - break; - default: - return (-1); - } - - addr = *eax & 0x7f; - return (0); -} - -static int -rtc_data_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes, - uint32_t *eax, void *arg) -{ - int hour; + struct tm tm; time_t t; - struct timeval cur, delta; - static struct timeval last; - static struct tm tm; - - if (bytes != 1) - return (-1); - - gettimeofday(&cur, NULL); - - /* - * Increment the cached time only once per second so we can guarantee - * that the guest has at least one second to read the hour:min:sec - * separately and still get a coherent view of the time. - */ - delta = cur; - timevalsub(&delta, &last); - if (delta.tv_sec >= 1 && (status_b & RTCSB_HALT) == 0) { - t = cur.tv_sec; - localtime_r(&t, &tm); - last = cur; - } - - if (in) { - switch (addr) { - case RTC_SEC_ALARM: - *eax = rtc_alarm.secs; - break; - case RTC_MIN_ALARM: - *eax = rtc_alarm.mins; - break; - case RTC_HRS_ALARM: - *eax = rtc_alarm.hours; - break; - case RTC_SEC: - *eax = rtcout(tm.tm_sec); - return (0); - case RTC_MIN: - *eax = rtcout(tm.tm_min); - return (0); - case RTC_HRS: - if (status_b & RTCSB_24HR) - hour = tm.tm_hour; - else - hour = (tm.tm_hour % 12) + 1; - - *eax = rtcout(hour); - - /* - * If we are representing time in the 12-hour format - * then set the MSB to indicate PM. - */ - if ((status_b & RTCSB_24HR) == 0 && tm.tm_hour >= 12) - *eax |= 0x80; - - return (0); - case RTC_WDAY: - *eax = rtcout(tm.tm_wday + 1); - return (0); - case RTC_DAY: - *eax = rtcout(tm.tm_mday); - return (0); - case RTC_MONTH: - *eax = rtcout(tm.tm_mon + 1); - return (0); - case RTC_YEAR: - *eax = rtcout(tm.tm_year % 100); - return (0); - case RTC_STATUSA: - *eax = status_a; - return (0); - case RTC_STATUSB: - *eax = status_b; - return (0); - case RTC_INTR: - *eax = 0; - return (0); - case RTC_STATUSD: - *eax = RTCSD_PWR; - return (0); - case RTC_NVRAM_START ... RTC_NVRAM_END: - *eax = rtc_nvram[addr - RTC_NVRAM_START]; - return (0); - default: - return (-1); - } - } - - switch (addr) { - case RTC_STATUSA: - status_a = *eax & ~RTCSA_TUP; - break; - case RTC_STATUSB: - /* XXX not implemented yet XXX */ - if (*eax & RTCSB_PINTR) - return (-1); - status_b = *eax; - break; - case RTC_STATUSD: - /* ignore write */ - break; - case RTC_SEC_ALARM: - rtc_alarm.secs = *eax; - break; - case RTC_MIN_ALARM: - rtc_alarm.mins = *eax; - break; - case RTC_HRS_ALARM: - rtc_alarm.hours = *eax; - break; - case RTC_SEC: - case RTC_MIN: - case RTC_HRS: - case RTC_WDAY: - case RTC_DAY: - case RTC_MONTH: - case RTC_YEAR: - /* - * Ignore writes to the time of day registers - */ - break; - case RTC_NVRAM_START ... RTC_NVRAM_END: - rtc_nvram[addr - RTC_NVRAM_START] = *eax; - break; - default: - return (-1); - } - return (0); + time(&t); + localtime_r(&t, &tm); + return (timegm(&tm)); } void rtc_init(struct vmctx *ctx) { - struct timeval cur; - struct tm tm; size_t himem; size_t lomem; int err; - err = gettimeofday(&cur, NULL); - assert(err == 0); - (void) localtime_r(&cur.tv_sec, &tm); - - memset(rtc_nvram, 0, sizeof(rtc_nvram)); - - rtc_nvram[nvoff(RTC_CENTURY)] = bin2bcd((tm.tm_year + 1900) / 100); - /* XXX init diag/reset code/equipment/checksum ? */ /* * Report guest memory size in nvram cells as required by UEFI. * Little-endian encoding. * 0x34/0x35 - 64KB chunks above 16MB, below 4GB * 0x5b/0x5c/0x5d - 64KB chunks above 4GB */ lomem = (vm_get_lowmem_size(ctx) - m_16MB) / m_64KB; - rtc_nvram[nvoff(RTC_LMEM_LSB)] = lomem; - rtc_nvram[nvoff(RTC_LMEM_MSB)] = lomem >> 8; + err = vm_rtc_write(ctx, RTC_LMEM_LSB, lomem); + assert(err == 0); + err = vm_rtc_write(ctx, RTC_LMEM_MSB, lomem >> 8); + assert(err == 0); himem = vm_get_highmem_size(ctx) / m_64KB; - rtc_nvram[nvoff(RTC_HMEM_LSB)] = himem; - rtc_nvram[nvoff(RTC_HMEM_SB)] = himem >> 8; - rtc_nvram[nvoff(RTC_HMEM_MSB)] = himem >> 16; -} + err = vm_rtc_write(ctx, RTC_HMEM_LSB, himem); + assert(err == 0); + err = vm_rtc_write(ctx, RTC_HMEM_SB, himem >> 8); + assert(err == 0); + err = vm_rtc_write(ctx, RTC_HMEM_MSB, himem >> 16); + assert(err == 0); -INOUT_PORT(rtc, IO_RTC, IOPORT_F_INOUT, rtc_addr_handler); -INOUT_PORT(rtc, IO_RTC + 1, IOPORT_F_INOUT, rtc_data_handler); + err = vm_rtc_settime(ctx, rtc_time(ctx)); + assert(err == 0); +} static void rtc_dsdt(void) { dsdt_line(""); dsdt_line("Device (RTC)"); dsdt_line("{"); dsdt_line(" Name (_HID, EisaId (\"PNP0B00\"))"); dsdt_line(" Name (_CRS, ResourceTemplate ()"); dsdt_line(" {"); dsdt_indent(2); dsdt_fixed_ioport(IO_RTC, 2); dsdt_fixed_irq(8); dsdt_unindent(2); dsdt_line(" })"); dsdt_line("}"); } LPC_DSDT(rtc_dsdt); /* * Reserve the extended RTC I/O ports although they are not emulated at this * time. */ SYSRES_IO(0x72, 6); Index: head/usr.sbin/bhyvectl/bhyvectl.c =================================================================== --- head/usr.sbin/bhyvectl/bhyvectl.c (revision 276427) +++ head/usr.sbin/bhyvectl/bhyvectl.c (revision 276428) @@ -1,2061 +1,2145 @@ /*- * Copyright (c) 2011 NetApp, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include +#include #include #include #include #include #include #include "amd/vmcb.h" #include "intel/vmcs.h" #define MB (1UL << 20) #define GB (1UL << 30) #define REQ_ARG required_argument #define NO_ARG no_argument #define OPT_ARG optional_argument static const char *progname; static void usage(bool cpu_intel) { (void)fprintf(stderr, "Usage: %s --vm=\n" " [--cpu=]\n" " [--create]\n" " [--destroy]\n" " [--get-all]\n" " [--get-stats]\n" " [--set-desc-ds]\n" " [--get-desc-ds]\n" " [--set-desc-es]\n" " [--get-desc-es]\n" " [--set-desc-gs]\n" " [--get-desc-gs]\n" " [--set-desc-fs]\n" " [--get-desc-fs]\n" " [--set-desc-cs]\n" " [--get-desc-cs]\n" " [--set-desc-ss]\n" " [--get-desc-ss]\n" " [--set-desc-tr]\n" " [--get-desc-tr]\n" " [--set-desc-ldtr]\n" " [--get-desc-ldtr]\n" " [--set-desc-gdtr]\n" " [--get-desc-gdtr]\n" " [--set-desc-idtr]\n" " [--get-desc-idtr]\n" " [--run]\n" " [--capname=]\n" " [--getcap]\n" " [--setcap=<0|1>]\n" " [--desc-base=]\n" " [--desc-limit=]\n" " [--desc-access=]\n" " [--set-cr0=]\n" " [--get-cr0]\n" " [--set-cr3=]\n" " [--get-cr3]\n" " [--set-cr4=]\n" " [--get-cr4]\n" " [--set-dr7=]\n" " [--get-dr7]\n" " [--set-rsp=]\n" " [--get-rsp]\n" " [--set-rip=]\n" " [--get-rip]\n" " [--get-rax]\n" " [--set-rax=]\n" " [--get-rbx]\n" " [--get-rcx]\n" " [--get-rdx]\n" " [--get-rsi]\n" " [--get-rdi]\n" " [--get-rbp]\n" " [--get-r8]\n" " [--get-r9]\n" " [--get-r10]\n" " [--get-r11]\n" " [--get-r12]\n" " [--get-r13]\n" " [--get-r14]\n" " [--get-r15]\n" " [--set-rflags=]\n" " [--get-rflags]\n" " [--set-cs]\n" " [--get-cs]\n" " [--set-ds]\n" " [--get-ds]\n" " [--set-es]\n" " [--get-es]\n" " [--set-fs]\n" " [--get-fs]\n" " [--set-gs]\n" " [--get-gs]\n" " [--set-ss]\n" " [--get-ss]\n" " [--get-tr]\n" " [--get-ldtr]\n" " [--set-x2apic-state=]\n" " [--get-x2apic-state]\n" " [--unassign-pptdev=]\n" " [--set-mem=]\n" " [--get-lowmem]\n" " [--get-highmem]\n" " [--get-gpa-pmap]\n" " [--assert-lapic-lvt=]\n" " [--inject-nmi]\n" " [--force-reset]\n" " [--force-poweroff]\n" + " [--get-rtc-time]\n" + " [--set-rtc-time=]\n" + " [--get-rtc-nvram]\n" + " [--set-rtc-nvram=]\n" + " [--rtc-nvram-offset=]\n" " [--get-active-cpus]\n" " [--get-suspended-cpus]\n" " [--get-intinfo]\n" " [--get-eptp]\n" " [--set-exception-bitmap]\n" " [--get-exception-bitmap]\n" " [--get-tsc-offset]\n" " [--get-guest-pat]\n" " [--get-io-bitmap-address]\n" " [--get-msr-bitmap]\n" " [--get-msr-bitmap-address]\n" " [--get-guest-sysenter]\n" " [--get-exit-reason]\n", progname); if (cpu_intel) { (void)fprintf(stderr, " [--get-vmcs-pinbased-ctls]\n" " [--get-vmcs-procbased-ctls]\n" " [--get-vmcs-procbased-ctls2]\n" " [--get-vmcs-entry-interruption-info]\n" " [--set-vmcs-entry-interruption-info=]\n" " [--get-vmcs-guest-physical-address\n" " [--get-vmcs-guest-linear-address\n" " [--get-vmcs-host-pat]\n" " [--get-vmcs-host-cr0]\n" " [--get-vmcs-host-cr3]\n" " [--get-vmcs-host-cr4]\n" " [--get-vmcs-host-rip]\n" " [--get-vmcs-host-rsp]\n" " [--get-vmcs-cr0-mask]\n" " [--get-vmcs-cr0-shadow]\n" " [--get-vmcs-cr4-mask]\n" " [--get-vmcs-cr4-shadow]\n" " [--get-vmcs-cr3-targets]\n" " [--get-vmcs-apic-access-address]\n" " [--get-vmcs-virtual-apic-address]\n" " [--get-vmcs-tpr-threshold]\n" " [--get-vmcs-vpid]\n" " [--get-vmcs-instruction-error]\n" " [--get-vmcs-exit-ctls]\n" " [--get-vmcs-entry-ctls]\n" " [--get-vmcs-link]\n" " [--get-vmcs-exit-qualification]\n" " [--get-vmcs-exit-interruption-info]\n" " [--get-vmcs-exit-interruption-error]\n" " [--get-vmcs-interruptibility]\n" ); } else { (void)fprintf(stderr, " [--get-vmcb-intercepts]\n" " [--get-vmcb-asid]\n" " [--get-vmcb-exit-details]\n" " [--get-vmcb-tlb-ctrl]\n" " [--get-vmcb-virq]\n" " [--get-avic-apic-bar]\n" " [--get-avic-backing-page]\n" " [--get-avic-table]\n" ); } exit(1); } +static int get_rtc_time, set_rtc_time; +static int get_rtc_nvram, set_rtc_nvram; +static int rtc_nvram_offset; +static uint8_t rtc_nvram_value; +static time_t rtc_secs; + static int get_stats, getcap, setcap, capval, get_gpa_pmap; static int inject_nmi, assert_lapic_lvt; static int force_reset, force_poweroff; static const char *capname; static int create, destroy, get_lowmem, get_highmem; static int get_intinfo; static int get_active_cpus, get_suspended_cpus; static uint64_t memsize; static int set_cr0, get_cr0, set_cr3, get_cr3, set_cr4, get_cr4; static int set_efer, get_efer; static int set_dr7, get_dr7; static int set_rsp, get_rsp, set_rip, get_rip, set_rflags, get_rflags; static int set_rax, get_rax; static int get_rbx, get_rcx, get_rdx, get_rsi, get_rdi, get_rbp; static int get_r8, get_r9, get_r10, get_r11, get_r12, get_r13, get_r14, get_r15; static int set_desc_ds, get_desc_ds; static int set_desc_es, get_desc_es; static int set_desc_fs, get_desc_fs; static int set_desc_gs, get_desc_gs; static int set_desc_cs, get_desc_cs; static int set_desc_ss, get_desc_ss; static int set_desc_gdtr, get_desc_gdtr; static int set_desc_idtr, get_desc_idtr; static int set_desc_tr, get_desc_tr; static int set_desc_ldtr, get_desc_ldtr; static int set_cs, set_ds, set_es, set_fs, set_gs, set_ss, set_tr, set_ldtr; static int get_cs, get_ds, get_es, get_fs, get_gs, get_ss, get_tr, get_ldtr; static int set_x2apic_state, get_x2apic_state; enum x2apic_state x2apic_state; static int unassign_pptdev, bus, slot, func; static int run; /* * VMCB specific. */ static int get_vmcb_intercept, get_vmcb_exit_details, get_vmcb_tlb_ctrl; static int get_vmcb_virq, get_avic_table; /* * VMCS-specific fields */ static int get_pinbased_ctls, get_procbased_ctls, get_procbased_ctls2; static int get_eptp, get_io_bitmap, get_tsc_offset; static int get_vmcs_entry_interruption_info, set_vmcs_entry_interruption_info; static int get_vmcs_interruptibility; uint32_t vmcs_entry_interruption_info; static int get_vmcs_gpa, get_vmcs_gla; static int get_exception_bitmap, set_exception_bitmap, exception_bitmap; static int get_cr0_mask, get_cr0_shadow; static int get_cr4_mask, get_cr4_shadow; static int get_cr3_targets; static int get_apic_access_addr, get_virtual_apic_addr, get_tpr_threshold; static int get_msr_bitmap, get_msr_bitmap_address; static int get_vpid_asid; static int get_inst_err, get_exit_ctls, get_entry_ctls; static int get_host_cr0, get_host_cr3, get_host_cr4; static int get_host_rip, get_host_rsp; static int get_guest_pat, get_host_pat; static int get_guest_sysenter, get_vmcs_link; static int get_exit_reason, get_vmcs_exit_qualification; static int get_vmcs_exit_interruption_info, get_vmcs_exit_interruption_error; static uint64_t desc_base; static uint32_t desc_limit, desc_access; static int get_all; static void dump_vm_run_exitcode(struct vm_exit *vmexit, int vcpu) { printf("vm exit[%d]\n", vcpu); printf("\trip\t\t0x%016lx\n", vmexit->rip); printf("\tinst_length\t%d\n", vmexit->inst_length); switch (vmexit->exitcode) { case VM_EXITCODE_INOUT: printf("\treason\t\tINOUT\n"); printf("\tdirection\t%s\n", vmexit->u.inout.in ? "IN" : "OUT"); printf("\tbytes\t\t%d\n", vmexit->u.inout.bytes); printf("\tflags\t\t%s%s\n", vmexit->u.inout.string ? "STRING " : "", vmexit->u.inout.rep ? "REP " : ""); printf("\tport\t\t0x%04x\n", vmexit->u.inout.port); printf("\teax\t\t0x%08x\n", vmexit->u.inout.eax); break; case VM_EXITCODE_VMX: printf("\treason\t\tVMX\n"); printf("\tstatus\t\t%d\n", vmexit->u.vmx.status); printf("\texit_reason\t0x%08x (%u)\n", vmexit->u.vmx.exit_reason, vmexit->u.vmx.exit_reason); printf("\tqualification\t0x%016lx\n", vmexit->u.vmx.exit_qualification); printf("\tinst_type\t\t%d\n", vmexit->u.vmx.inst_type); printf("\tinst_error\t\t%d\n", vmexit->u.vmx.inst_error); break; case VM_EXITCODE_SVM: printf("\treason\t\tSVM\n"); printf("\texit_reason\t\t%#lx\n", vmexit->u.svm.exitcode); printf("\texitinfo1\t\t%#lx\n", vmexit->u.svm.exitinfo1); printf("\texitinfo2\t\t%#lx\n", vmexit->u.svm.exitinfo2); break; default: printf("*** unknown vm run exitcode %d\n", vmexit->exitcode); break; } } /* AMD 6th generation and Intel compatible MSRs */ #define MSR_AMD6TH_START 0xC0000000 #define MSR_AMD6TH_END 0xC0001FFF /* AMD 7th and 8th generation compatible MSRs */ #define MSR_AMD7TH_START 0xC0010000 #define MSR_AMD7TH_END 0xC0011FFF static const char * msr_name(uint32_t msr) { static char buf[32]; switch(msr) { case MSR_TSC: return ("MSR_TSC"); case MSR_EFER: return ("MSR_EFER"); case MSR_STAR: return ("MSR_STAR"); case MSR_LSTAR: return ("MSR_LSTAR"); case MSR_CSTAR: return ("MSR_CSTAR"); case MSR_SF_MASK: return ("MSR_SF_MASK"); case MSR_FSBASE: return ("MSR_FSBASE"); case MSR_GSBASE: return ("MSR_GSBASE"); case MSR_KGSBASE: return ("MSR_KGSBASE"); case MSR_SYSENTER_CS_MSR: return ("MSR_SYSENTER_CS_MSR"); case MSR_SYSENTER_ESP_MSR: return ("MSR_SYSENTER_ESP_MSR"); case MSR_SYSENTER_EIP_MSR: return ("MSR_SYSENTER_EIP_MSR"); case MSR_PAT: return ("MSR_PAT"); } snprintf(buf, sizeof(buf), "MSR %#08x", msr); return (buf); } static inline void print_msr_pm(uint64_t msr, int vcpu, int readable, int writeable) { if (readable || writeable) { printf("%-20s[%d]\t\t%c%c\n", msr_name(msr), vcpu, readable ? 'R' : '-', writeable ? 'W' : '-'); } } /* * Reference APM vol2, section 15.11 MSR Intercepts. */ static void dump_amd_msr_pm(const char *bitmap, int vcpu) { int byte, bit, readable, writeable; uint32_t msr; for (msr = 0; msr < 0x2000; msr++) { byte = msr / 4; bit = (msr % 4) * 2; /* Look at MSRs in the range 0x00000000 to 0x00001FFF */ readable = (bitmap[byte] & (1 << bit)) ? 0 : 1; writeable = (bitmap[byte] & (2 << bit)) ? 0 : 1; print_msr_pm(msr, vcpu, readable, writeable); /* Look at MSRs in the range 0xC0000000 to 0xC0001FFF */ byte += 2048; readable = (bitmap[byte] & (1 << bit)) ? 0 : 1; writeable = (bitmap[byte] & (2 << bit)) ? 0 : 1; print_msr_pm(msr + MSR_AMD6TH_START, vcpu, readable, writeable); /* MSR 0xC0010000 to 0xC0011FF is only for AMD */ byte += 4096; readable = (bitmap[byte] & (1 << bit)) ? 0 : 1; writeable = (bitmap[byte] & (2 << bit)) ? 0 : 1; print_msr_pm(msr + MSR_AMD7TH_START, vcpu, readable, writeable); } } /* * Reference Intel SDM Vol3 Section 24.6.9 MSR-Bitmap Address */ static void dump_intel_msr_pm(const char *bitmap, int vcpu) { int byte, bit, readable, writeable; uint32_t msr; for (msr = 0; msr < 0x2000; msr++) { byte = msr / 8; bit = msr & 0x7; /* Look at MSRs in the range 0x00000000 to 0x00001FFF */ readable = (bitmap[byte] & (1 << bit)) ? 0 : 1; writeable = (bitmap[2048 + byte] & (1 << bit)) ? 0 : 1; print_msr_pm(msr, vcpu, readable, writeable); /* Look at MSRs in the range 0xC0000000 to 0xC0001FFF */ byte += 1024; readable = (bitmap[byte] & (1 << bit)) ? 0 : 1; writeable = (bitmap[2048 + byte] & (1 << bit)) ? 0 : 1; print_msr_pm(msr + MSR_AMD6TH_START, vcpu, readable, writeable); } } static int dump_msr_bitmap(int vcpu, uint64_t addr, bool cpu_intel) { int error, fd, map_size; const char *bitmap; error = -1; bitmap = MAP_FAILED; fd = open("/dev/mem", O_RDONLY, 0); if (fd < 0) { perror("Couldn't open /dev/mem"); goto done; } if (cpu_intel) map_size = PAGE_SIZE; else map_size = 2 * PAGE_SIZE; bitmap = mmap(NULL, map_size, PROT_READ, MAP_SHARED, fd, addr); if (bitmap == MAP_FAILED) { perror("mmap failed"); goto done; } if (cpu_intel) dump_intel_msr_pm(bitmap, vcpu); else dump_amd_msr_pm(bitmap, vcpu); error = 0; done: if (bitmap != MAP_FAILED) munmap((void *)bitmap, map_size); if (fd >= 0) close(fd); return (error); } static int vm_get_vmcs_field(struct vmctx *ctx, int vcpu, int field, uint64_t *ret_val) { return (vm_get_register(ctx, vcpu, VMCS_IDENT(field), ret_val)); } static int vm_set_vmcs_field(struct vmctx *ctx, int vcpu, int field, uint64_t val) { return (vm_set_register(ctx, vcpu, VMCS_IDENT(field), val)); } static int vm_get_vmcb_field(struct vmctx *ctx, int vcpu, int off, int bytes, uint64_t *ret_val) { return (vm_get_register(ctx, vcpu, VMCB_ACCESS(off, bytes), ret_val)); } static int vm_set_vmcb_field(struct vmctx *ctx, int vcpu, int off, int bytes, uint64_t val) { return (vm_set_register(ctx, vcpu, VMCB_ACCESS(off, bytes), val)); } enum { VMNAME = 1000, /* avoid collision with return values from getopt */ VCPU, SET_MEM, SET_EFER, SET_CR0, SET_CR3, SET_CR4, SET_DR7, SET_RSP, SET_RIP, SET_RAX, SET_RFLAGS, DESC_BASE, DESC_LIMIT, DESC_ACCESS, SET_CS, SET_DS, SET_ES, SET_FS, SET_GS, SET_SS, SET_TR, SET_LDTR, SET_X2APIC_STATE, SET_EXCEPTION_BITMAP, SET_VMCS_ENTRY_INTERRUPTION_INFO, SET_CAP, CAPNAME, UNASSIGN_PPTDEV, GET_GPA_PMAP, ASSERT_LAPIC_LVT, + SET_RTC_TIME, + SET_RTC_NVRAM, + RTC_NVRAM_OFFSET, }; static void print_cpus(const char *banner, const cpuset_t *cpus) { int i, first; first = 1; printf("%s:\t", banner); if (!CPU_EMPTY(cpus)) { for (i = 0; i < CPU_SETSIZE; i++) { if (CPU_ISSET(i, cpus)) { printf("%s%d", first ? " " : ", ", i); first = 0; } } } else printf(" (none)"); printf("\n"); } static void print_intinfo(const char *banner, uint64_t info) { int type; printf("%s:\t", banner); if (info & VM_INTINFO_VALID) { type = info & VM_INTINFO_TYPE; switch (type) { case VM_INTINFO_HWINTR: printf("extint"); break; case VM_INTINFO_NMI: printf("nmi"); break; case VM_INTINFO_SWINTR: printf("swint"); break; default: printf("exception"); break; } printf(" vector %d", (int)VM_INTINFO_VECTOR(info)); if (info & VM_INTINFO_DEL_ERRCODE) printf(" errcode %#x", (u_int)(info >> 32)); } else { printf("n/a"); } printf("\n"); } static bool cpu_vendor_intel(void) { u_int regs[4]; char cpu_vendor[13]; do_cpuid(0, regs); ((u_int *)&cpu_vendor)[0] = regs[1]; ((u_int *)&cpu_vendor)[1] = regs[3]; ((u_int *)&cpu_vendor)[2] = regs[2]; cpu_vendor[12] = '\0'; if (strcmp(cpu_vendor, "AuthenticAMD") == 0) { return (false); } else if (strcmp(cpu_vendor, "GenuineIntel") == 0) { return (true); } else { fprintf(stderr, "Unknown cpu vendor \"%s\"\n", cpu_vendor); exit(1); } } static int get_all_registers(struct vmctx *ctx, int vcpu) { uint64_t cr0, cr3, cr4, dr7, rsp, rip, rflags, efer; uint64_t rax, rbx, rcx, rdx, rsi, rdi, rbp; uint64_t r8, r9, r10, r11, r12, r13, r14, r15; int error; if (get_efer || get_all) { error = vm_get_register(ctx, vcpu, VM_REG_GUEST_EFER, &efer); if (error == 0) printf("efer[%d]\t\t0x%016lx\n", vcpu, efer); } if (!error && (get_cr0 || get_all)) { error = vm_get_register(ctx, vcpu, VM_REG_GUEST_CR0, &cr0); if (error == 0) printf("cr0[%d]\t\t0x%016lx\n", vcpu, cr0); } if (!error && (get_cr3 || get_all)) { error = vm_get_register(ctx, vcpu, VM_REG_GUEST_CR3, &cr3); if (error == 0) printf("cr3[%d]\t\t0x%016lx\n", vcpu, cr3); } if (!error && (get_cr4 || get_all)) { error = vm_get_register(ctx, vcpu, VM_REG_GUEST_CR4, &cr4); if (error == 0) printf("cr4[%d]\t\t0x%016lx\n", vcpu, cr4); } if (!error && (get_dr7 || get_all)) { error = vm_get_register(ctx, vcpu, VM_REG_GUEST_DR7, &dr7); if (error == 0) printf("dr7[%d]\t\t0x%016lx\n", vcpu, dr7); } if (!error && (get_rsp || get_all)) { error = vm_get_register(ctx, vcpu, VM_REG_GUEST_RSP, &rsp); if (error == 0) printf("rsp[%d]\t\t0x%016lx\n", vcpu, rsp); } if (!error && (get_rip || get_all)) { error = vm_get_register(ctx, vcpu, VM_REG_GUEST_RIP, &rip); if (error == 0) printf("rip[%d]\t\t0x%016lx\n", vcpu, rip); } if (!error && (get_rax || get_all)) { error = vm_get_register(ctx, vcpu, VM_REG_GUEST_RAX, &rax); if (error == 0) printf("rax[%d]\t\t0x%016lx\n", vcpu, rax); } if (!error && (get_rbx || get_all)) { error = vm_get_register(ctx, vcpu, VM_REG_GUEST_RBX, &rbx); if (error == 0) printf("rbx[%d]\t\t0x%016lx\n", vcpu, rbx); } if (!error && (get_rcx || get_all)) { error = vm_get_register(ctx, vcpu, VM_REG_GUEST_RCX, &rcx); if (error == 0) printf("rcx[%d]\t\t0x%016lx\n", vcpu, rcx); } if (!error && (get_rdx || get_all)) { error = vm_get_register(ctx, vcpu, VM_REG_GUEST_RDX, &rdx); if (error == 0) printf("rdx[%d]\t\t0x%016lx\n", vcpu, rdx); } if (!error && (get_rsi || get_all)) { error = vm_get_register(ctx, vcpu, VM_REG_GUEST_RSI, &rsi); if (error == 0) printf("rsi[%d]\t\t0x%016lx\n", vcpu, rsi); } if (!error && (get_rdi || get_all)) { error = vm_get_register(ctx, vcpu, VM_REG_GUEST_RDI, &rdi); if (error == 0) printf("rdi[%d]\t\t0x%016lx\n", vcpu, rdi); } if (!error && (get_rbp || get_all)) { error = vm_get_register(ctx, vcpu, VM_REG_GUEST_RBP, &rbp); if (error == 0) printf("rbp[%d]\t\t0x%016lx\n", vcpu, rbp); } if (!error && (get_r8 || get_all)) { error = vm_get_register(ctx, vcpu, VM_REG_GUEST_R8, &r8); if (error == 0) printf("r8[%d]\t\t0x%016lx\n", vcpu, r8); } if (!error && (get_r9 || get_all)) { error = vm_get_register(ctx, vcpu, VM_REG_GUEST_R9, &r9); if (error == 0) printf("r9[%d]\t\t0x%016lx\n", vcpu, r9); } if (!error && (get_r10 || get_all)) { error = vm_get_register(ctx, vcpu, VM_REG_GUEST_R10, &r10); if (error == 0) printf("r10[%d]\t\t0x%016lx\n", vcpu, r10); } if (!error && (get_r11 || get_all)) { error = vm_get_register(ctx, vcpu, VM_REG_GUEST_R11, &r11); if (error == 0) printf("r11[%d]\t\t0x%016lx\n", vcpu, r11); } if (!error && (get_r12 || get_all)) { error = vm_get_register(ctx, vcpu, VM_REG_GUEST_R12, &r12); if (error == 0) printf("r12[%d]\t\t0x%016lx\n", vcpu, r12); } if (!error && (get_r13 || get_all)) { error = vm_get_register(ctx, vcpu, VM_REG_GUEST_R13, &r13); if (error == 0) printf("r13[%d]\t\t0x%016lx\n", vcpu, r13); } if (!error && (get_r14 || get_all)) { error = vm_get_register(ctx, vcpu, VM_REG_GUEST_R14, &r14); if (error == 0) printf("r14[%d]\t\t0x%016lx\n", vcpu, r14); } if (!error && (get_r15 || get_all)) { error = vm_get_register(ctx, vcpu, VM_REG_GUEST_R15, &r15); if (error == 0) printf("r15[%d]\t\t0x%016lx\n", vcpu, r15); } if (!error && (get_rflags || get_all)) { error = vm_get_register(ctx, vcpu, VM_REG_GUEST_RFLAGS, &rflags); if (error == 0) printf("rflags[%d]\t0x%016lx\n", vcpu, rflags); } return (error); } static int get_all_segments(struct vmctx *ctx, int vcpu) { int error; uint64_t cs, ds, es, fs, gs, ss, tr, ldtr; if (get_desc_ds || get_all) { error = vm_get_desc(ctx, vcpu, VM_REG_GUEST_DS, &desc_base, &desc_limit, &desc_access); if (error == 0) { printf("ds desc[%d]\t0x%016lx/0x%08x/0x%08x\n", vcpu, desc_base, desc_limit, desc_access); } } if (!error && (get_desc_es || get_all)) { error = vm_get_desc(ctx, vcpu, VM_REG_GUEST_ES, &desc_base, &desc_limit, &desc_access); if (error == 0) { printf("es desc[%d]\t0x%016lx/0x%08x/0x%08x\n", vcpu, desc_base, desc_limit, desc_access); } } if (!error && (get_desc_fs || get_all)) { error = vm_get_desc(ctx, vcpu, VM_REG_GUEST_FS, &desc_base, &desc_limit, &desc_access); if (error == 0) { printf("fs desc[%d]\t0x%016lx/0x%08x/0x%08x\n", vcpu, desc_base, desc_limit, desc_access); } } if (!error && (get_desc_gs || get_all)) { error = vm_get_desc(ctx, vcpu, VM_REG_GUEST_GS, &desc_base, &desc_limit, &desc_access); if (error == 0) { printf("gs desc[%d]\t0x%016lx/0x%08x/0x%08x\n", vcpu, desc_base, desc_limit, desc_access); } } if (!error && (get_desc_ss || get_all)) { error = vm_get_desc(ctx, vcpu, VM_REG_GUEST_SS, &desc_base, &desc_limit, &desc_access); if (error == 0) { printf("ss desc[%d]\t0x%016lx/0x%08x/0x%08x\n", vcpu, desc_base, desc_limit, desc_access); } } if (!error && (get_desc_cs || get_all)) { error = vm_get_desc(ctx, vcpu, VM_REG_GUEST_CS, &desc_base, &desc_limit, &desc_access); if (error == 0) { printf("cs desc[%d]\t0x%016lx/0x%08x/0x%08x\n", vcpu, desc_base, desc_limit, desc_access); } } if (!error && (get_desc_tr || get_all)) { error = vm_get_desc(ctx, vcpu, VM_REG_GUEST_TR, &desc_base, &desc_limit, &desc_access); if (error == 0) { printf("tr desc[%d]\t0x%016lx/0x%08x/0x%08x\n", vcpu, desc_base, desc_limit, desc_access); } } if (!error && (get_desc_ldtr || get_all)) { error = vm_get_desc(ctx, vcpu, VM_REG_GUEST_LDTR, &desc_base, &desc_limit, &desc_access); if (error == 0) { printf("ldtr desc[%d]\t0x%016lx/0x%08x/0x%08x\n", vcpu, desc_base, desc_limit, desc_access); } } if (!error && (get_desc_gdtr || get_all)) { error = vm_get_desc(ctx, vcpu, VM_REG_GUEST_GDTR, &desc_base, &desc_limit, &desc_access); if (error == 0) { printf("gdtr[%d]\t\t0x%016lx/0x%08x\n", vcpu, desc_base, desc_limit); } } if (!error && (get_desc_idtr || get_all)) { error = vm_get_desc(ctx, vcpu, VM_REG_GUEST_IDTR, &desc_base, &desc_limit, &desc_access); if (error == 0) { printf("idtr[%d]\t\t0x%016lx/0x%08x\n", vcpu, desc_base, desc_limit); } } if (!error && (get_cs || get_all)) { error = vm_get_register(ctx, vcpu, VM_REG_GUEST_CS, &cs); if (error == 0) printf("cs[%d]\t\t0x%04lx\n", vcpu, cs); } if (!error && (get_ds || get_all)) { error = vm_get_register(ctx, vcpu, VM_REG_GUEST_DS, &ds); if (error == 0) printf("ds[%d]\t\t0x%04lx\n", vcpu, ds); } if (!error && (get_es || get_all)) { error = vm_get_register(ctx, vcpu, VM_REG_GUEST_ES, &es); if (error == 0) printf("es[%d]\t\t0x%04lx\n", vcpu, es); } if (!error && (get_fs || get_all)) { error = vm_get_register(ctx, vcpu, VM_REG_GUEST_FS, &fs); if (error == 0) printf("fs[%d]\t\t0x%04lx\n", vcpu, fs); } if (!error && (get_gs || get_all)) { error = vm_get_register(ctx, vcpu, VM_REG_GUEST_GS, &gs); if (error == 0) printf("gs[%d]\t\t0x%04lx\n", vcpu, gs); } if (!error && (get_ss || get_all)) { error = vm_get_register(ctx, vcpu, VM_REG_GUEST_SS, &ss); if (error == 0) printf("ss[%d]\t\t0x%04lx\n", vcpu, ss); } if (!error && (get_tr || get_all)) { error = vm_get_register(ctx, vcpu, VM_REG_GUEST_TR, &tr); if (error == 0) printf("tr[%d]\t\t0x%04lx\n", vcpu, tr); } if (!error && (get_ldtr || get_all)) { error = vm_get_register(ctx, vcpu, VM_REG_GUEST_LDTR, &ldtr); if (error == 0) printf("ldtr[%d]\t\t0x%04lx\n", vcpu, ldtr); } return (error); } static int get_misc_vmcs(struct vmctx *ctx, int vcpu) { uint64_t ctl, cr0, cr3, cr4, rsp, rip, pat, addr, u64; int error; if (get_cr0_mask || get_all) { uint64_t cr0mask; error = vm_get_vmcs_field(ctx, vcpu, VMCS_CR0_MASK, &cr0mask); if (error == 0) printf("cr0_mask[%d]\t\t0x%016lx\n", vcpu, cr0mask); } if (!error && (get_cr0_shadow || get_all)) { uint64_t cr0shadow; error = vm_get_vmcs_field(ctx, vcpu, VMCS_CR0_SHADOW, &cr0shadow); if (error == 0) printf("cr0_shadow[%d]\t\t0x%016lx\n", vcpu, cr0shadow); } if (!error && (get_cr4_mask || get_all)) { uint64_t cr4mask; error = vm_get_vmcs_field(ctx, vcpu, VMCS_CR4_MASK, &cr4mask); if (error == 0) printf("cr4_mask[%d]\t\t0x%016lx\n", vcpu, cr4mask); } if (!error && (get_cr4_shadow || get_all)) { uint64_t cr4shadow; error = vm_get_vmcs_field(ctx, vcpu, VMCS_CR4_SHADOW, &cr4shadow); if (error == 0) printf("cr4_shadow[%d]\t\t0x%016lx\n", vcpu, cr4shadow); } if (!error && (get_cr3_targets || get_all)) { uint64_t target_count, target_addr; error = vm_get_vmcs_field(ctx, vcpu, VMCS_CR3_TARGET_COUNT, &target_count); if (error == 0) { printf("cr3_target_count[%d]\t0x%016lx\n", vcpu, target_count); } error = vm_get_vmcs_field(ctx, vcpu, VMCS_CR3_TARGET0, &target_addr); if (error == 0) { printf("cr3_target0[%d]\t\t0x%016lx\n", vcpu, target_addr); } error = vm_get_vmcs_field(ctx, vcpu, VMCS_CR3_TARGET1, &target_addr); if (error == 0) { printf("cr3_target1[%d]\t\t0x%016lx\n", vcpu, target_addr); } error = vm_get_vmcs_field(ctx, vcpu, VMCS_CR3_TARGET2, &target_addr); if (error == 0) { printf("cr3_target2[%d]\t\t0x%016lx\n", vcpu, target_addr); } error = vm_get_vmcs_field(ctx, vcpu, VMCS_CR3_TARGET3, &target_addr); if (error == 0) { printf("cr3_target3[%d]\t\t0x%016lx\n", vcpu, target_addr); } } if (!error && (get_pinbased_ctls || get_all)) { error = vm_get_vmcs_field(ctx, vcpu, VMCS_PIN_BASED_CTLS, &ctl); if (error == 0) printf("pinbased_ctls[%d]\t0x%016lx\n", vcpu, ctl); } if (!error && (get_procbased_ctls || get_all)) { error = vm_get_vmcs_field(ctx, vcpu, VMCS_PRI_PROC_BASED_CTLS, &ctl); if (error == 0) printf("procbased_ctls[%d]\t0x%016lx\n", vcpu, ctl); } if (!error && (get_procbased_ctls2 || get_all)) { error = vm_get_vmcs_field(ctx, vcpu, VMCS_SEC_PROC_BASED_CTLS, &ctl); if (error == 0) printf("procbased_ctls2[%d]\t0x%016lx\n", vcpu, ctl); } if (!error && (get_vmcs_gla || get_all)) { error = vm_get_vmcs_field(ctx, vcpu, VMCS_GUEST_LINEAR_ADDRESS, &u64); if (error == 0) printf("gla[%d]\t\t0x%016lx\n", vcpu, u64); } if (!error && (get_vmcs_gpa || get_all)) { error = vm_get_vmcs_field(ctx, vcpu, VMCS_GUEST_PHYSICAL_ADDRESS, &u64); if (error == 0) printf("gpa[%d]\t\t0x%016lx\n", vcpu, u64); } if (!error && (get_vmcs_entry_interruption_info || get_all)) { error = vm_get_vmcs_field(ctx, vcpu, VMCS_ENTRY_INTR_INFO,&u64); if (error == 0) { printf("entry_interruption_info[%d]\t0x%016lx\n", vcpu, u64); } } if (!error && (get_tpr_threshold || get_all)) { uint64_t threshold; error = vm_get_vmcs_field(ctx, vcpu, VMCS_TPR_THRESHOLD, &threshold); if (error == 0) printf("tpr_threshold[%d]\t0x%016lx\n", vcpu, threshold); } if (!error && (get_inst_err || get_all)) { uint64_t insterr; error = vm_get_vmcs_field(ctx, vcpu, VMCS_INSTRUCTION_ERROR, &insterr); if (error == 0) { printf("instruction_error[%d]\t0x%016lx\n", vcpu, insterr); } } if (!error && (get_exit_ctls || get_all)) { error = vm_get_vmcs_field(ctx, vcpu, VMCS_EXIT_CTLS, &ctl); if (error == 0) printf("exit_ctls[%d]\t\t0x%016lx\n", vcpu, ctl); } if (!error && (get_entry_ctls || get_all)) { error = vm_get_vmcs_field(ctx, vcpu, VMCS_ENTRY_CTLS, &ctl); if (error == 0) printf("entry_ctls[%d]\t\t0x%016lx\n", vcpu, ctl); } if (!error && (get_host_pat || get_all)) { error = vm_get_vmcs_field(ctx, vcpu, VMCS_HOST_IA32_PAT, &pat); if (error == 0) printf("host_pat[%d]\t\t0x%016lx\n", vcpu, pat); } if (!error && (get_host_cr0 || get_all)) { error = vm_get_vmcs_field(ctx, vcpu, VMCS_HOST_CR0, &cr0); if (error == 0) printf("host_cr0[%d]\t\t0x%016lx\n", vcpu, cr0); } if (!error && (get_host_cr3 || get_all)) { error = vm_get_vmcs_field(ctx, vcpu, VMCS_HOST_CR3, &cr3); if (error == 0) printf("host_cr3[%d]\t\t0x%016lx\n", vcpu, cr3); } if (!error && (get_host_cr4 || get_all)) { error = vm_get_vmcs_field(ctx, vcpu, VMCS_HOST_CR4, &cr4); if (error == 0) printf("host_cr4[%d]\t\t0x%016lx\n", vcpu, cr4); } if (!error && (get_host_rip || get_all)) { error = vm_get_vmcs_field(ctx, vcpu, VMCS_HOST_RIP, &rip); if (error == 0) printf("host_rip[%d]\t\t0x%016lx\n", vcpu, rip); } if (!error && (get_host_rsp || get_all)) { error = vm_get_vmcs_field(ctx, vcpu, VMCS_HOST_RSP, &rsp); if (error == 0) printf("host_rsp[%d]\t\t0x%016lx\n", vcpu, rsp); } if (!error && (get_vmcs_link || get_all)) { error = vm_get_vmcs_field(ctx, vcpu, VMCS_LINK_POINTER, &addr); if (error == 0) printf("vmcs_pointer[%d]\t0x%016lx\n", vcpu, addr); } if (!error && (get_vmcs_exit_interruption_info || get_all)) { error = vm_get_vmcs_field(ctx, vcpu, VMCS_EXIT_INTR_INFO, &u64); if (error == 0) { printf("vmcs_exit_interruption_info[%d]\t0x%016lx\n", vcpu, u64); } } if (!error && (get_vmcs_exit_interruption_error || get_all)) { error = vm_get_vmcs_field(ctx, vcpu, VMCS_EXIT_INTR_ERRCODE, &u64); if (error == 0) { printf("vmcs_exit_interruption_error[%d]\t0x%016lx\n", vcpu, u64); } } if (!error && (get_vmcs_interruptibility || get_all)) { error = vm_get_vmcs_field(ctx, vcpu, VMCS_GUEST_INTERRUPTIBILITY, &u64); if (error == 0) { printf("vmcs_guest_interruptibility[%d]\t0x%016lx\n", vcpu, u64); } } if (!error && (get_vmcs_exit_qualification || get_all)) { error = vm_get_vmcs_field(ctx, vcpu, VMCS_EXIT_QUALIFICATION, &u64); if (error == 0) printf("vmcs_exit_qualification[%d]\t0x%016lx\n", vcpu, u64); } return (error); } static int get_misc_vmcb(struct vmctx *ctx, int vcpu) { uint64_t ctl, addr; int error; if (get_vmcb_intercept || get_all) { error = vm_get_vmcb_field(ctx, vcpu, VMCB_OFF_CR_INTERCEPT, 4, &ctl); if (error == 0) printf("cr_intercept[%d]\t0x%08x\n", vcpu, (int)ctl); error = vm_get_vmcb_field(ctx, vcpu, VMCB_OFF_DR_INTERCEPT, 4, &ctl); if (error == 0) printf("dr_intercept[%d]\t0x%08x\n", vcpu, (int)ctl); error = vm_get_vmcb_field(ctx, vcpu, VMCB_OFF_EXC_INTERCEPT, 4, &ctl); if (error == 0) printf("exc_intercept[%d]\t0x%08x\n", vcpu, (int)ctl); error = vm_get_vmcb_field(ctx, vcpu, VMCB_OFF_INST1_INTERCEPT, 4, &ctl); if (error == 0) printf("inst1_intercept[%d]\t0x%08x\n", vcpu, (int)ctl); error = vm_get_vmcb_field(ctx, vcpu, VMCB_OFF_INST2_INTERCEPT, 4, &ctl); if (error == 0) printf("inst2_intercept[%d]\t0x%08x\n", vcpu, (int)ctl); } if (!error && (get_vmcb_tlb_ctrl || get_all)) { error = vm_get_vmcb_field(ctx, vcpu, VMCB_OFF_TLB_CTRL, 4, &ctl); if (error == 0) printf("TLB ctrl[%d]\t0x%016lx\n", vcpu, ctl); } if (!error && (get_vmcb_exit_details || get_all)) { error = vm_get_vmcb_field(ctx, vcpu, VMCB_OFF_EXITINFO1, 8, &ctl); if (error == 0) printf("exitinfo1[%d]\t0x%016lx\n", vcpu, ctl); error = vm_get_vmcb_field(ctx, vcpu, VMCB_OFF_EXITINFO2, 8, &ctl); if (error == 0) printf("exitinfo2[%d]\t0x%016lx\n", vcpu, ctl); error = vm_get_vmcb_field(ctx, vcpu, VMCB_OFF_EXITINTINFO, 8, &ctl); if (error == 0) printf("exitintinfo[%d]\t0x%016lx\n", vcpu, ctl); } if (!error && (get_vmcb_virq || get_all)) { error = vm_get_vmcb_field(ctx, vcpu, VMCB_OFF_VIRQ, 8, &ctl); if (error == 0) printf("v_irq/tpr[%d]\t0x%016lx\n", vcpu, ctl); } if (!error && (get_apic_access_addr || get_all)) { error = vm_get_vmcb_field(ctx, vcpu, VMCB_OFF_AVIC_BAR, 8, &addr); if (error == 0) printf("AVIC apic_bar[%d]\t0x%016lx\n", vcpu, addr); } if (!error && (get_virtual_apic_addr || get_all)) { error = vm_get_vmcb_field(ctx, vcpu, VMCB_OFF_AVIC_PAGE, 8, &addr); if (error == 0) printf("AVIC backing page[%d]\t0x%016lx\n", vcpu, addr); } if (!error && (get_avic_table || get_all)) { error = vm_get_vmcb_field(ctx, vcpu, VMCB_OFF_AVIC_LT, 8, &addr); if (error == 0) printf("AVIC logical table[%d]\t0x%016lx\n", vcpu, addr); error = vm_get_vmcb_field(ctx, vcpu, VMCB_OFF_AVIC_PT, 8, &addr); if (error == 0) printf("AVIC physical table[%d]\t0x%016lx\n", vcpu, addr); } return (error); } static struct option * setup_options(bool cpu_intel) { const struct option common_opts[] = { { "vm", REQ_ARG, 0, VMNAME }, { "cpu", REQ_ARG, 0, VCPU }, { "set-mem", REQ_ARG, 0, SET_MEM }, { "set-efer", REQ_ARG, 0, SET_EFER }, { "set-cr0", REQ_ARG, 0, SET_CR0 }, { "set-cr3", REQ_ARG, 0, SET_CR3 }, { "set-cr4", REQ_ARG, 0, SET_CR4 }, { "set-dr7", REQ_ARG, 0, SET_DR7 }, { "set-rsp", REQ_ARG, 0, SET_RSP }, { "set-rip", REQ_ARG, 0, SET_RIP }, { "set-rax", REQ_ARG, 0, SET_RAX }, { "set-rflags", REQ_ARG, 0, SET_RFLAGS }, { "desc-base", REQ_ARG, 0, DESC_BASE }, { "desc-limit", REQ_ARG, 0, DESC_LIMIT }, { "desc-access",REQ_ARG, 0, DESC_ACCESS }, { "set-cs", REQ_ARG, 0, SET_CS }, { "set-ds", REQ_ARG, 0, SET_DS }, { "set-es", REQ_ARG, 0, SET_ES }, { "set-fs", REQ_ARG, 0, SET_FS }, { "set-gs", REQ_ARG, 0, SET_GS }, { "set-ss", REQ_ARG, 0, SET_SS }, { "set-tr", REQ_ARG, 0, SET_TR }, { "set-ldtr", REQ_ARG, 0, SET_LDTR }, { "set-x2apic-state",REQ_ARG, 0, SET_X2APIC_STATE }, { "set-exception-bitmap", REQ_ARG, 0, SET_EXCEPTION_BITMAP }, { "capname", REQ_ARG, 0, CAPNAME }, { "unassign-pptdev", REQ_ARG, 0, UNASSIGN_PPTDEV }, { "setcap", REQ_ARG, 0, SET_CAP }, { "get-gpa-pmap", REQ_ARG, 0, GET_GPA_PMAP }, { "assert-lapic-lvt", REQ_ARG, 0, ASSERT_LAPIC_LVT }, + { "get-rtc-time", NO_ARG, &get_rtc_time, 1 }, + { "set-rtc-time", REQ_ARG, 0, SET_RTC_TIME }, + { "rtc-nvram-offset", REQ_ARG, 0, RTC_NVRAM_OFFSET }, + { "get-rtc-nvram", NO_ARG, &get_rtc_nvram, 1 }, + { "set-rtc-nvram", REQ_ARG, 0, SET_RTC_NVRAM }, { "getcap", NO_ARG, &getcap, 1 }, { "get-stats", NO_ARG, &get_stats, 1 }, { "get-desc-ds",NO_ARG, &get_desc_ds, 1 }, { "set-desc-ds",NO_ARG, &set_desc_ds, 1 }, { "get-desc-es",NO_ARG, &get_desc_es, 1 }, { "set-desc-es",NO_ARG, &set_desc_es, 1 }, { "get-desc-ss",NO_ARG, &get_desc_ss, 1 }, { "set-desc-ss",NO_ARG, &set_desc_ss, 1 }, { "get-desc-cs",NO_ARG, &get_desc_cs, 1 }, { "set-desc-cs",NO_ARG, &set_desc_cs, 1 }, { "get-desc-fs",NO_ARG, &get_desc_fs, 1 }, { "set-desc-fs",NO_ARG, &set_desc_fs, 1 }, { "get-desc-gs",NO_ARG, &get_desc_gs, 1 }, { "set-desc-gs",NO_ARG, &set_desc_gs, 1 }, { "get-desc-tr",NO_ARG, &get_desc_tr, 1 }, { "set-desc-tr",NO_ARG, &set_desc_tr, 1 }, { "set-desc-ldtr", NO_ARG, &set_desc_ldtr, 1 }, { "get-desc-ldtr", NO_ARG, &get_desc_ldtr, 1 }, { "set-desc-gdtr", NO_ARG, &set_desc_gdtr, 1 }, { "get-desc-gdtr", NO_ARG, &get_desc_gdtr, 1 }, { "set-desc-idtr", NO_ARG, &set_desc_idtr, 1 }, { "get-desc-idtr", NO_ARG, &get_desc_idtr, 1 }, { "get-lowmem", NO_ARG, &get_lowmem, 1 }, { "get-highmem",NO_ARG, &get_highmem, 1 }, { "get-efer", NO_ARG, &get_efer, 1 }, { "get-cr0", NO_ARG, &get_cr0, 1 }, { "get-cr3", NO_ARG, &get_cr3, 1 }, { "get-cr4", NO_ARG, &get_cr4, 1 }, { "get-dr7", NO_ARG, &get_dr7, 1 }, { "get-rsp", NO_ARG, &get_rsp, 1 }, { "get-rip", NO_ARG, &get_rip, 1 }, { "get-rax", NO_ARG, &get_rax, 1 }, { "get-rbx", NO_ARG, &get_rbx, 1 }, { "get-rcx", NO_ARG, &get_rcx, 1 }, { "get-rdx", NO_ARG, &get_rdx, 1 }, { "get-rsi", NO_ARG, &get_rsi, 1 }, { "get-rdi", NO_ARG, &get_rdi, 1 }, { "get-rbp", NO_ARG, &get_rbp, 1 }, { "get-r8", NO_ARG, &get_r8, 1 }, { "get-r9", NO_ARG, &get_r9, 1 }, { "get-r10", NO_ARG, &get_r10, 1 }, { "get-r11", NO_ARG, &get_r11, 1 }, { "get-r12", NO_ARG, &get_r12, 1 }, { "get-r13", NO_ARG, &get_r13, 1 }, { "get-r14", NO_ARG, &get_r14, 1 }, { "get-r15", NO_ARG, &get_r15, 1 }, { "get-rflags", NO_ARG, &get_rflags, 1 }, { "get-cs", NO_ARG, &get_cs, 1 }, { "get-ds", NO_ARG, &get_ds, 1 }, { "get-es", NO_ARG, &get_es, 1 }, { "get-fs", NO_ARG, &get_fs, 1 }, { "get-gs", NO_ARG, &get_gs, 1 }, { "get-ss", NO_ARG, &get_ss, 1 }, { "get-tr", NO_ARG, &get_tr, 1 }, { "get-ldtr", NO_ARG, &get_ldtr, 1 }, { "get-eptp", NO_ARG, &get_eptp, 1 }, { "get-exception-bitmap", NO_ARG, &get_exception_bitmap, 1 }, { "get-io-bitmap-address", NO_ARG, &get_io_bitmap, 1 }, { "get-tsc-offset", NO_ARG, &get_tsc_offset, 1 }, { "get-msr-bitmap", NO_ARG, &get_msr_bitmap, 1 }, { "get-msr-bitmap-address", NO_ARG, &get_msr_bitmap_address, 1 }, { "get-guest-pat", NO_ARG, &get_guest_pat, 1 }, { "get-guest-sysenter", NO_ARG, &get_guest_sysenter, 1 }, { "get-exit-reason", NO_ARG, &get_exit_reason, 1 }, { "get-x2apic-state", NO_ARG, &get_x2apic_state, 1 }, { "get-all", NO_ARG, &get_all, 1 }, { "run", NO_ARG, &run, 1 }, { "create", NO_ARG, &create, 1 }, { "destroy", NO_ARG, &destroy, 1 }, { "inject-nmi", NO_ARG, &inject_nmi, 1 }, { "force-reset", NO_ARG, &force_reset, 1 }, { "force-poweroff", NO_ARG, &force_poweroff, 1 }, { "get-active-cpus", NO_ARG, &get_active_cpus, 1 }, { "get-suspended-cpus", NO_ARG, &get_suspended_cpus, 1 }, { "get-intinfo", NO_ARG, &get_intinfo, 1 }, }; const struct option intel_opts[] = { { "get-vmcs-pinbased-ctls", NO_ARG, &get_pinbased_ctls, 1 }, { "get-vmcs-procbased-ctls", NO_ARG, &get_procbased_ctls, 1 }, { "get-vmcs-procbased-ctls2", NO_ARG, &get_procbased_ctls2, 1 }, { "get-vmcs-guest-linear-address", NO_ARG, &get_vmcs_gla, 1 }, { "get-vmcs-guest-physical-address", NO_ARG, &get_vmcs_gpa, 1 }, { "get-vmcs-entry-interruption-info", NO_ARG, &get_vmcs_entry_interruption_info, 1}, { "get-vmcs-cr0-mask", NO_ARG, &get_cr0_mask, 1 }, { "get-vmcs-cr0-shadow", NO_ARG,&get_cr0_shadow, 1 }, { "get-vmcs-cr4-mask", NO_ARG, &get_cr4_mask, 1 }, { "get-vmcs-cr4-shadow", NO_ARG, &get_cr4_shadow, 1 }, { "get-vmcs-cr3-targets", NO_ARG, &get_cr3_targets, 1 }, { "get-vmcs-tpr-threshold", NO_ARG, &get_tpr_threshold, 1 }, { "get-vmcs-vpid", NO_ARG, &get_vpid_asid, 1 }, { "get-vmcs-exit-ctls", NO_ARG, &get_exit_ctls, 1 }, { "get-vmcs-entry-ctls", NO_ARG, &get_entry_ctls, 1 }, { "get-vmcs-instruction-error", NO_ARG, &get_inst_err, 1 }, { "get-vmcs-host-pat", NO_ARG, &get_host_pat, 1 }, { "get-vmcs-host-cr0", NO_ARG, &get_host_cr0, 1 }, { "set-vmcs-entry-interruption-info", REQ_ARG, 0, SET_VMCS_ENTRY_INTERRUPTION_INFO }, { "get-vmcs-exit-qualification", NO_ARG, &get_vmcs_exit_qualification, 1 }, { "get-vmcs-interruptibility", NO_ARG, &get_vmcs_interruptibility, 1 }, { "get-vmcs-exit-interruption-error", NO_ARG, &get_vmcs_exit_interruption_error, 1 }, { "get-vmcs-exit-interruption-info", NO_ARG, &get_vmcs_exit_interruption_info, 1 }, { "get-vmcs-link", NO_ARG, &get_vmcs_link, 1 }, { "get-vmcs-host-cr3", NO_ARG, &get_host_cr3, 1 }, { "get-vmcs-host-cr4", NO_ARG, &get_host_cr4, 1 }, { "get-vmcs-host-rip", NO_ARG, &get_host_rip, 1 }, { "get-vmcs-host-rsp", NO_ARG, &get_host_rsp, 1 }, { "get-apic-access-address", NO_ARG, &get_apic_access_addr, 1}, { "get-virtual-apic-address", NO_ARG, &get_virtual_apic_addr, 1} }; const struct option amd_opts[] = { { "get-vmcb-intercepts", NO_ARG, &get_vmcb_intercept, 1 }, { "get-vmcb-asid", NO_ARG, &get_vpid_asid, 1 }, { "get-vmcb-exit-details", NO_ARG, &get_vmcb_exit_details, 1 }, { "get-vmcb-tlb-ctrl", NO_ARG, &get_vmcb_tlb_ctrl, 1 }, { "get-vmcb-virq", NO_ARG, &get_vmcb_virq, 1 }, { "get-avic-apic-bar", NO_ARG, &get_apic_access_addr, 1 }, { "get-avic-backing-page", NO_ARG, &get_virtual_apic_addr, 1 }, { "get-avic-table", NO_ARG, &get_avic_table, 1 } }; const struct option null_opt = { NULL, 0, NULL, 0 }; struct option *all_opts; char *cp; int optlen; optlen = sizeof(common_opts); if (cpu_intel) optlen += sizeof(intel_opts); else optlen += sizeof(amd_opts); optlen += sizeof(null_opt); all_opts = malloc(optlen); cp = (char *)all_opts; memcpy(cp, common_opts, sizeof(common_opts)); cp += sizeof(common_opts); if (cpu_intel) { memcpy(cp, intel_opts, sizeof(intel_opts)); cp += sizeof(intel_opts); } else { memcpy(cp, amd_opts, sizeof(amd_opts)); cp += sizeof(amd_opts); } memcpy(cp, &null_opt, sizeof(null_opt)); cp += sizeof(null_opt); return (all_opts); } +static const char * +wday_str(int idx) +{ + static const char *weekdays[] = { + "Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat" + }; + + if (idx >= 0 && idx < 7) + return (weekdays[idx]); + else + return ("UNK"); +} + +static const char * +mon_str(int idx) +{ + static const char *months[] = { + "Jan", "Feb", "Mar", "Apr", "May", "Jun", + "Jul", "Aug", "Sep", "Oct", "Nov", "Dec" + }; + + if (idx >= 0 && idx < 12) + return (months[idx]); + else + return ("UNK"); +} + int main(int argc, char *argv[]) { char *vmname; int error, ch, vcpu, ptenum; vm_paddr_t gpa, gpa_pmap; size_t len; struct vm_exit vmexit; uint64_t rax, cr0, cr3, cr4, dr7, rsp, rip, rflags, efer, pat; uint64_t eptp, bm, addr, u64, pteval[4], *pte, info[2]; struct vmctx *ctx; int wired; cpuset_t cpus; bool cpu_intel; uint64_t cs, ds, es, fs, gs, ss, tr, ldtr; + struct tm tm; struct option *opts; cpu_intel = cpu_vendor_intel(); opts = setup_options(cpu_intel); vcpu = 0; vmname = NULL; assert_lapic_lvt = -1; progname = basename(argv[0]); while ((ch = getopt_long(argc, argv, "", opts, NULL)) != -1) { switch (ch) { case 0: break; case VMNAME: vmname = optarg; break; case VCPU: vcpu = atoi(optarg); break; case SET_MEM: memsize = atoi(optarg) * MB; memsize = roundup(memsize, 2 * MB); break; case SET_EFER: efer = strtoul(optarg, NULL, 0); set_efer = 1; break; case SET_CR0: cr0 = strtoul(optarg, NULL, 0); set_cr0 = 1; break; case SET_CR3: cr3 = strtoul(optarg, NULL, 0); set_cr3 = 1; break; case SET_CR4: cr4 = strtoul(optarg, NULL, 0); set_cr4 = 1; break; case SET_DR7: dr7 = strtoul(optarg, NULL, 0); set_dr7 = 1; break; case SET_RSP: rsp = strtoul(optarg, NULL, 0); set_rsp = 1; break; case SET_RIP: rip = strtoul(optarg, NULL, 0); set_rip = 1; break; case SET_RAX: rax = strtoul(optarg, NULL, 0); set_rax = 1; break; case SET_RFLAGS: rflags = strtoul(optarg, NULL, 0); set_rflags = 1; break; case DESC_BASE: desc_base = strtoul(optarg, NULL, 0); break; case DESC_LIMIT: desc_limit = strtoul(optarg, NULL, 0); break; case DESC_ACCESS: desc_access = strtoul(optarg, NULL, 0); break; case SET_CS: cs = strtoul(optarg, NULL, 0); set_cs = 1; break; case SET_DS: ds = strtoul(optarg, NULL, 0); set_ds = 1; break; case SET_ES: es = strtoul(optarg, NULL, 0); set_es = 1; break; case SET_FS: fs = strtoul(optarg, NULL, 0); set_fs = 1; break; case SET_GS: gs = strtoul(optarg, NULL, 0); set_gs = 1; break; case SET_SS: ss = strtoul(optarg, NULL, 0); set_ss = 1; break; case SET_TR: tr = strtoul(optarg, NULL, 0); set_tr = 1; break; case SET_LDTR: ldtr = strtoul(optarg, NULL, 0); set_ldtr = 1; break; case SET_X2APIC_STATE: x2apic_state = strtol(optarg, NULL, 0); set_x2apic_state = 1; break; case SET_EXCEPTION_BITMAP: exception_bitmap = strtoul(optarg, NULL, 0); set_exception_bitmap = 1; break; case SET_VMCS_ENTRY_INTERRUPTION_INFO: vmcs_entry_interruption_info = strtoul(optarg, NULL, 0); set_vmcs_entry_interruption_info = 1; break; case SET_CAP: capval = strtoul(optarg, NULL, 0); setcap = 1; break; + case SET_RTC_TIME: + rtc_secs = strtoul(optarg, NULL, 0); + set_rtc_time = 1; + break; + case SET_RTC_NVRAM: + rtc_nvram_value = (uint8_t)strtoul(optarg, NULL, 0); + set_rtc_nvram = 1; + break; + case RTC_NVRAM_OFFSET: + rtc_nvram_offset = strtoul(optarg, NULL, 0); + break; case GET_GPA_PMAP: gpa_pmap = strtoul(optarg, NULL, 0); get_gpa_pmap = 1; break; case CAPNAME: capname = optarg; break; case UNASSIGN_PPTDEV: unassign_pptdev = 1; if (sscanf(optarg, "%d/%d/%d", &bus, &slot, &func) != 3) usage(cpu_intel); break; case ASSERT_LAPIC_LVT: assert_lapic_lvt = atoi(optarg); break; default: usage(cpu_intel); } } argc -= optind; argv += optind; if (vmname == NULL) usage(cpu_intel); error = 0; if (!error && create) error = vm_create(vmname); if (!error) { ctx = vm_open(vmname); if (ctx == NULL) { printf("VM:%s is not created.\n", vmname); exit (1); } } if (!error && memsize) error = vm_setup_memory(ctx, memsize, VM_MMAP_NONE); if (!error && set_efer) error = vm_set_register(ctx, vcpu, VM_REG_GUEST_EFER, efer); if (!error && set_cr0) error = vm_set_register(ctx, vcpu, VM_REG_GUEST_CR0, cr0); if (!error && set_cr3) error = vm_set_register(ctx, vcpu, VM_REG_GUEST_CR3, cr3); if (!error && set_cr4) error = vm_set_register(ctx, vcpu, VM_REG_GUEST_CR4, cr4); if (!error && set_dr7) error = vm_set_register(ctx, vcpu, VM_REG_GUEST_DR7, dr7); if (!error && set_rsp) error = vm_set_register(ctx, vcpu, VM_REG_GUEST_RSP, rsp); if (!error && set_rip) error = vm_set_register(ctx, vcpu, VM_REG_GUEST_RIP, rip); if (!error && set_rax) error = vm_set_register(ctx, vcpu, VM_REG_GUEST_RAX, rax); if (!error && set_rflags) { error = vm_set_register(ctx, vcpu, VM_REG_GUEST_RFLAGS, rflags); } if (!error && set_desc_ds) { error = vm_set_desc(ctx, vcpu, VM_REG_GUEST_DS, desc_base, desc_limit, desc_access); } if (!error && set_desc_es) { error = vm_set_desc(ctx, vcpu, VM_REG_GUEST_ES, desc_base, desc_limit, desc_access); } if (!error && set_desc_ss) { error = vm_set_desc(ctx, vcpu, VM_REG_GUEST_SS, desc_base, desc_limit, desc_access); } if (!error && set_desc_cs) { error = vm_set_desc(ctx, vcpu, VM_REG_GUEST_CS, desc_base, desc_limit, desc_access); } if (!error && set_desc_fs) { error = vm_set_desc(ctx, vcpu, VM_REG_GUEST_FS, desc_base, desc_limit, desc_access); } if (!error && set_desc_gs) { error = vm_set_desc(ctx, vcpu, VM_REG_GUEST_GS, desc_base, desc_limit, desc_access); } if (!error && set_desc_tr) { error = vm_set_desc(ctx, vcpu, VM_REG_GUEST_TR, desc_base, desc_limit, desc_access); } if (!error && set_desc_ldtr) { error = vm_set_desc(ctx, vcpu, VM_REG_GUEST_LDTR, desc_base, desc_limit, desc_access); } if (!error && set_desc_gdtr) { error = vm_set_desc(ctx, vcpu, VM_REG_GUEST_GDTR, desc_base, desc_limit, 0); } if (!error && set_desc_idtr) { error = vm_set_desc(ctx, vcpu, VM_REG_GUEST_IDTR, desc_base, desc_limit, 0); } if (!error && set_cs) error = vm_set_register(ctx, vcpu, VM_REG_GUEST_CS, cs); if (!error && set_ds) error = vm_set_register(ctx, vcpu, VM_REG_GUEST_DS, ds); if (!error && set_es) error = vm_set_register(ctx, vcpu, VM_REG_GUEST_ES, es); if (!error && set_fs) error = vm_set_register(ctx, vcpu, VM_REG_GUEST_FS, fs); if (!error && set_gs) error = vm_set_register(ctx, vcpu, VM_REG_GUEST_GS, gs); if (!error && set_ss) error = vm_set_register(ctx, vcpu, VM_REG_GUEST_SS, ss); if (!error && set_tr) error = vm_set_register(ctx, vcpu, VM_REG_GUEST_TR, tr); if (!error && set_ldtr) error = vm_set_register(ctx, vcpu, VM_REG_GUEST_LDTR, ldtr); if (!error && set_x2apic_state) error = vm_set_x2apic_state(ctx, vcpu, x2apic_state); if (!error && unassign_pptdev) error = vm_unassign_pptdev(ctx, bus, slot, func); if (!error && set_exception_bitmap) { if (cpu_intel) error = vm_set_vmcs_field(ctx, vcpu, VMCS_EXCEPTION_BITMAP, exception_bitmap); else error = vm_set_vmcb_field(ctx, vcpu, VMCB_OFF_EXC_INTERCEPT, 4, exception_bitmap); } if (!error && cpu_intel && set_vmcs_entry_interruption_info) { error = vm_set_vmcs_field(ctx, vcpu, VMCS_ENTRY_INTR_INFO, vmcs_entry_interruption_info); } if (!error && inject_nmi) { error = vm_inject_nmi(ctx, vcpu); } if (!error && assert_lapic_lvt != -1) { error = vm_lapic_local_irq(ctx, vcpu, assert_lapic_lvt); } if (!error && (get_lowmem || get_all)) { gpa = 0; error = vm_get_memory_seg(ctx, gpa, &len, &wired); if (error == 0) printf("lowmem\t\t0x%016lx/%ld%s\n", gpa, len, wired ? " wired" : ""); } if (!error && (get_highmem || get_all)) { gpa = 4 * GB; error = vm_get_memory_seg(ctx, gpa, &len, &wired); if (error == 0) printf("highmem\t\t0x%016lx/%ld%s\n", gpa, len, wired ? " wired" : ""); } if (!error) error = get_all_registers(ctx, vcpu); if (!error) error = get_all_segments(ctx, vcpu); if (!error) { if (cpu_intel) error = get_misc_vmcs(ctx, vcpu); else error = get_misc_vmcb(ctx, vcpu); } if (!error && (get_x2apic_state || get_all)) { error = vm_get_x2apic_state(ctx, vcpu, &x2apic_state); if (error == 0) printf("x2apic_state[%d]\t%d\n", vcpu, x2apic_state); } if (!error && (get_eptp || get_all)) { if (cpu_intel) error = vm_get_vmcs_field(ctx, vcpu, VMCS_EPTP, &eptp); else error = vm_get_vmcb_field(ctx, vcpu, VMCB_OFF_NPT_BASE, 8, &eptp); if (error == 0) printf("%s[%d]\t\t0x%016lx\n", cpu_intel ? "eptp" : "rvi/npt", vcpu, eptp); } if (!error && (get_exception_bitmap || get_all)) { if(cpu_intel) error = vm_get_vmcs_field(ctx, vcpu, VMCS_EXCEPTION_BITMAP, &bm); else error = vm_get_vmcb_field(ctx, vcpu, VMCB_OFF_EXC_INTERCEPT, 4, &bm); if (error == 0) printf("exception_bitmap[%d]\t%#lx\n", vcpu, bm); } if (!error && (get_io_bitmap || get_all)) { if (cpu_intel) { error = vm_get_vmcs_field(ctx, vcpu, VMCS_IO_BITMAP_A, &bm); if (error == 0) printf("io_bitmap_a[%d]\t%#lx\n", vcpu, bm); error = vm_get_vmcs_field(ctx, vcpu, VMCS_IO_BITMAP_B, &bm); if (error == 0) printf("io_bitmap_b[%d]\t%#lx\n", vcpu, bm); } else { error = vm_get_vmcb_field(ctx, vcpu, VMCB_OFF_IO_PERM, 8, &bm); if (error == 0) printf("io_bitmap[%d]\t%#lx\n", vcpu, bm); } } if (!error && (get_tsc_offset || get_all)) { uint64_t tscoff; if (cpu_intel) error = vm_get_vmcs_field(ctx, vcpu, VMCS_TSC_OFFSET, &tscoff); else error = vm_get_vmcb_field(ctx, vcpu, VMCB_OFF_TSC_OFFSET, 8, &tscoff); if (error == 0) printf("tsc_offset[%d]\t0x%016lx\n", vcpu, tscoff); } if (!error && (get_msr_bitmap_address || get_all)) { if (cpu_intel) error = vm_get_vmcs_field(ctx, vcpu, VMCS_MSR_BITMAP, &addr); else error = vm_get_vmcb_field(ctx, vcpu, VMCB_OFF_MSR_PERM, 8, &addr); if (error == 0) printf("msr_bitmap[%d]\t\t%#lx\n", vcpu, addr); } if (!error && (get_msr_bitmap || get_all)) { if (cpu_intel) { error = vm_get_vmcs_field(ctx, vcpu, VMCS_MSR_BITMAP, &addr); } else { error = vm_get_vmcb_field(ctx, vcpu, VMCB_OFF_MSR_PERM, 8, &addr); } if (error == 0) error = dump_msr_bitmap(vcpu, addr, cpu_intel); } if (!error && (get_vpid_asid || get_all)) { uint64_t vpid; if (cpu_intel) error = vm_get_vmcs_field(ctx, vcpu, VMCS_VPID, &vpid); else error = vm_get_vmcb_field(ctx, vcpu, VMCB_OFF_ASID, 4, &vpid); if (error == 0) printf("%s[%d]\t\t0x%04lx\n", cpu_intel ? "vpid" : "asid", vcpu, vpid); } if (!error && (get_guest_pat || get_all)) { if (cpu_intel) error = vm_get_vmcs_field(ctx, vcpu, VMCS_GUEST_IA32_PAT, &pat); else error = vm_get_vmcb_field(ctx, vcpu, VMCB_OFF_GUEST_PAT, 8, &pat); if (error == 0) printf("guest_pat[%d]\t\t0x%016lx\n", vcpu, pat); } if (!error && (get_guest_sysenter || get_all)) { if (cpu_intel) error = vm_get_vmcs_field(ctx, vcpu, VMCS_GUEST_IA32_SYSENTER_CS, &cs); else error = vm_get_vmcb_field(ctx, vcpu, VMCB_OFF_SYSENTER_CS, 8, &cs); if (error == 0) printf("guest_sysenter_cs[%d]\t%#lx\n", vcpu, cs); if (cpu_intel) error = vm_get_vmcs_field(ctx, vcpu, VMCS_GUEST_IA32_SYSENTER_ESP, &rsp); else error = vm_get_vmcb_field(ctx, vcpu, VMCB_OFF_SYSENTER_ESP, 8, &rsp); if (error == 0) printf("guest_sysenter_sp[%d]\t%#lx\n", vcpu, rsp); if (cpu_intel) error = vm_get_vmcs_field(ctx, vcpu, VMCS_GUEST_IA32_SYSENTER_EIP, &rip); else error = vm_get_vmcb_field(ctx, vcpu, VMCB_OFF_SYSENTER_EIP, 8, &rip); if (error == 0) printf("guest_sysenter_ip[%d]\t%#lx\n", vcpu, rip); } if (!error && (get_exit_reason || get_all)) { if (cpu_intel) error = vm_get_vmcs_field(ctx, vcpu, VMCS_EXIT_REASON, &u64); else error = vm_get_vmcb_field(ctx, vcpu, VMCB_OFF_EXIT_REASON, 8, &u64); if (error == 0) printf("exit_reason[%d]\t%#lx\n", vcpu, u64); } if (!error && setcap) { int captype; captype = vm_capability_name2type(capname); error = vm_set_capability(ctx, vcpu, captype, capval); if (error != 0 && errno == ENOENT) printf("Capability \"%s\" is not available\n", capname); } if (!error && get_gpa_pmap) { error = vm_get_gpa_pmap(ctx, gpa_pmap, pteval, &ptenum); if (error == 0) { printf("gpa %#lx:", gpa_pmap); pte = &pteval[0]; while (ptenum-- > 0) printf(" %#lx", *pte++); printf("\n"); + } + } + + if (!error && set_rtc_nvram) + error = vm_rtc_write(ctx, rtc_nvram_offset, rtc_nvram_value); + + if (!error && (get_rtc_nvram || get_all)) { + error = vm_rtc_read(ctx, rtc_nvram_offset, &rtc_nvram_value); + if (error == 0) { + printf("rtc nvram[%03d]: 0x%02x\n", rtc_nvram_offset, + rtc_nvram_value); + } + } + + if (!error && set_rtc_time) + error = vm_rtc_settime(ctx, rtc_secs); + + if (!error && (get_rtc_time || get_all)) { + error = vm_rtc_gettime(ctx, &rtc_secs); + if (error == 0) { + gmtime_r(&rtc_secs, &tm); + printf("rtc time %#lx: %s %s %02d %02d:%02d:%02d %d\n", + rtc_secs, wday_str(tm.tm_wday), mon_str(tm.tm_mon), + tm.tm_mday, tm.tm_hour, tm.tm_min, tm.tm_sec, + 1900 + tm.tm_year); } } if (!error && (getcap || get_all)) { int captype, val, getcaptype; if (getcap && capname) getcaptype = vm_capability_name2type(capname); else getcaptype = -1; for (captype = 0; captype < VM_CAP_MAX; captype++) { if (getcaptype >= 0 && captype != getcaptype) continue; error = vm_get_capability(ctx, vcpu, captype, &val); if (error == 0) { printf("Capability \"%s\" is %s on vcpu %d\n", vm_capability_type2name(captype), val ? "set" : "not set", vcpu); } else if (errno == ENOENT) { error = 0; printf("Capability \"%s\" is not available\n", vm_capability_type2name(captype)); } else { break; } } } if (!error && (get_active_cpus || get_all)) { error = vm_active_cpus(ctx, &cpus); if (!error) print_cpus("active cpus", &cpus); } if (!error && (get_suspended_cpus || get_all)) { error = vm_suspended_cpus(ctx, &cpus); if (!error) print_cpus("suspended cpus", &cpus); } if (!error && (get_intinfo || get_all)) { error = vm_get_intinfo(ctx, vcpu, &info[0], &info[1]); if (!error) { print_intinfo("pending", info[0]); print_intinfo("current", info[1]); } } if (!error && (get_stats || get_all)) { int i, num_stats; uint64_t *stats; struct timeval tv; const char *desc; stats = vm_get_stats(ctx, vcpu, &tv, &num_stats); if (stats != NULL) { printf("vcpu%d stats:\n", vcpu); for (i = 0; i < num_stats; i++) { desc = vm_get_stat_desc(ctx, i); printf("%-40s\t%ld\n", desc, stats[i]); } } } if (!error && run) { error = vm_get_register(ctx, vcpu, VM_REG_GUEST_RIP, &rip); assert(error == 0); error = vm_run(ctx, vcpu, rip, &vmexit); if (error == 0) dump_vm_run_exitcode(&vmexit, vcpu); else printf("vm_run error %d\n", error); } if (!error && force_reset) error = vm_suspend(ctx, VM_SUSPEND_RESET); if (!error && force_poweroff) error = vm_suspend(ctx, VM_SUSPEND_POWEROFF); if (error) printf("errno = %d\n", errno); if (!error && destroy) vm_destroy(ctx); free (opts); exit(error); }