Index: head/usr.sbin/bhyve/bhyverun.c =================================================================== --- head/usr.sbin/bhyve/bhyverun.c (revision 257017) +++ head/usr.sbin/bhyve/bhyverun.c (revision 257018) @@ -1,685 +1,685 @@ /*- * Copyright (c) 2011 NetApp, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include +#include #include #include #include #include #include #include #include #include #include #include #include "bhyverun.h" #include "acpi.h" #include "inout.h" #include "dbgport.h" #include "mem.h" #include "mevent.h" #include "mptbl.h" #include "pci_emul.h" #include "xmsr.h" #include "ioapic.h" #include "spinup_ap.h" #include "rtc.h" #define GUEST_NIO_PORT 0x488 /* guest upcalls via i/o port */ #define VMEXIT_SWITCH 0 /* force vcpu switch in mux mode */ #define VMEXIT_CONTINUE 1 /* continue from next instruction */ #define VMEXIT_RESTART 2 /* restart current instruction */ #define VMEXIT_ABORT 3 /* abort the vm run loop */ #define VMEXIT_RESET 4 /* guest machine has reset */ #define MB (1024UL * 1024) #define GB (1024UL * MB) typedef int (*vmexit_handler_t)(struct vmctx *, struct vm_exit *, int *vcpu); char *vmname; int guest_ncpus; static int pincpu = -1; static int guest_vmexit_on_hlt, guest_vmexit_on_pause, disable_x2apic; static int virtio_msix = 1; static int foundcpus; static int strictio; static int acpi; static char *progname; static const int BSP = 0; static int cpumask; static void vm_loop(struct vmctx *ctx, int vcpu, uint64_t rip); struct vm_exit vmexit[VM_MAXCPU]; struct bhyvestats { uint64_t vmexit_bogus; uint64_t vmexit_bogus_switch; uint64_t vmexit_hlt; uint64_t vmexit_pause; uint64_t vmexit_mtrap; uint64_t vmexit_inst_emul; uint64_t cpu_switch_rotate; uint64_t cpu_switch_direct; int io_reset; } stats; struct mt_vmm_info { pthread_t mt_thr; struct vmctx *mt_ctx; int mt_vcpu; } mt_vmm_info[VM_MAXCPU]; static void usage(int code) { fprintf(stderr, - "Usage: %s [-aehAHIPW][-g ][-s ][-S ]" - "[-c vcpus][-p pincpu][-m mem]" - " \n" + "Usage: %s [-aehAHIPW] [-g ] [-s ] [-S ]\n" + " %*s [-c vcpus] [-p pincpu] [-m mem] \n" " -a: local apic is in XAPIC mode (default is X2APIC)\n" " -A: create an ACPI table\n" " -g: gdb port\n" " -c: # cpus (default 1)\n" " -p: pin vcpu 'n' to host cpu 'pincpu + n'\n" " -H: vmexit from the guest on hlt\n" " -I: present an ioapic to the guest\n" " -P: vmexit from the guest on pause\n" - " -W: force virtio to use single-vector MSI\n" - " -e: exit on unhandled i/o access\n" + " -W: force virtio to use single-vector MSI\n" + " -e: exit on unhandled I/O access\n" " -h: help\n" " -s: PCI slot config\n" " -S: legacy PCI slot config\n" " -m: memory size in MB\n", - progname); + progname, (int)strlen(progname), ""); exit(code); } void * paddr_guest2host(struct vmctx *ctx, uintptr_t gaddr, size_t len) { return (vm_map_gpa(ctx, gaddr, len)); } int fbsdrun_disable_x2apic(void) { return (disable_x2apic); } int fbsdrun_vmexit_on_pause(void) { return (guest_vmexit_on_pause); } int fbsdrun_vmexit_on_hlt(void) { return (guest_vmexit_on_hlt); } int fbsdrun_virtio_msix(void) { return (virtio_msix); } static void * fbsdrun_start_thread(void *param) { char tname[MAXCOMLEN + 1]; struct mt_vmm_info *mtp; int vcpu; mtp = param; vcpu = mtp->mt_vcpu; snprintf(tname, sizeof(tname), "%s vcpu %d", vmname, vcpu); pthread_set_name_np(mtp->mt_thr, tname); vm_loop(mtp->mt_ctx, vcpu, vmexit[vcpu].rip); /* not reached */ exit(1); return (NULL); } void fbsdrun_addcpu(struct vmctx *ctx, int vcpu, uint64_t rip) { int error; if (cpumask & (1 << vcpu)) { fprintf(stderr, "addcpu: attempting to add existing cpu %d\n", vcpu); exit(1); } cpumask |= 1 << vcpu; foundcpus++; /* * Set up the vmexit struct to allow execution to start * at the given RIP */ vmexit[vcpu].rip = rip; vmexit[vcpu].inst_length = 0; mt_vmm_info[vcpu].mt_ctx = ctx; mt_vmm_info[vcpu].mt_vcpu = vcpu; error = pthread_create(&mt_vmm_info[vcpu].mt_thr, NULL, fbsdrun_start_thread, &mt_vmm_info[vcpu]); assert(error == 0); } static int vmexit_catch_reset(void) { stats.io_reset++; return (VMEXIT_RESET); } static int vmexit_catch_inout(void) { return (VMEXIT_ABORT); } static int vmexit_handle_notify(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu, uint32_t eax) { #if BHYVE_DEBUG /* * put guest-driven debug here */ #endif return (VMEXIT_CONTINUE); } static int vmexit_inout(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu) { int error; int bytes, port, in, out; uint32_t eax; int vcpu; vcpu = *pvcpu; port = vme->u.inout.port; bytes = vme->u.inout.bytes; eax = vme->u.inout.eax; in = vme->u.inout.in; out = !in; /* We don't deal with these */ if (vme->u.inout.string || vme->u.inout.rep) return (VMEXIT_ABORT); /* Special case of guest reset */ if (out && port == 0x64 && (uint8_t)eax == 0xFE) return (vmexit_catch_reset()); /* Extra-special case of host notifications */ if (out && port == GUEST_NIO_PORT) return (vmexit_handle_notify(ctx, vme, pvcpu, eax)); error = emulate_inout(ctx, vcpu, in, port, bytes, &eax, strictio); if (error == 0 && in) error = vm_set_register(ctx, vcpu, VM_REG_GUEST_RAX, eax); if (error == 0) return (VMEXIT_CONTINUE); else { fprintf(stderr, "Unhandled %s%c 0x%04x\n", in ? "in" : "out", bytes == 1 ? 'b' : (bytes == 2 ? 'w' : 'l'), port); return (vmexit_catch_inout()); } } static int vmexit_rdmsr(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu) { fprintf(stderr, "vm exit rdmsr 0x%x, cpu %d\n", vme->u.msr.code, *pvcpu); return (VMEXIT_ABORT); } static int vmexit_wrmsr(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu) { int newcpu; int retval = VMEXIT_CONTINUE; newcpu = emulate_wrmsr(ctx, *pvcpu, vme->u.msr.code,vme->u.msr.wval); return (retval); } static int vmexit_spinup_ap(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu) { int newcpu; int retval = VMEXIT_CONTINUE; newcpu = spinup_ap(ctx, *pvcpu, vme->u.spinup_ap.vcpu, vme->u.spinup_ap.rip); return (retval); } static int vmexit_vmx(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) { fprintf(stderr, "vm exit[%d]\n", *pvcpu); fprintf(stderr, "\treason\t\tVMX\n"); fprintf(stderr, "\trip\t\t0x%016lx\n", vmexit->rip); fprintf(stderr, "\tinst_length\t%d\n", vmexit->inst_length); fprintf(stderr, "\terror\t\t%d\n", vmexit->u.vmx.error); fprintf(stderr, "\texit_reason\t%u\n", vmexit->u.vmx.exit_reason); fprintf(stderr, "\tqualification\t0x%016lx\n", vmexit->u.vmx.exit_qualification); return (VMEXIT_ABORT); } static int vmexit_bogus(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) { stats.vmexit_bogus++; return (VMEXIT_RESTART); } static int vmexit_hlt(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) { stats.vmexit_hlt++; /* * Just continue execution with the next instruction. We use * the HLT VM exit as a way to be friendly with the host * scheduler. */ return (VMEXIT_CONTINUE); } static int vmexit_pause(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) { stats.vmexit_pause++; return (VMEXIT_CONTINUE); } static int vmexit_mtrap(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) { stats.vmexit_mtrap++; return (VMEXIT_RESTART); } static int vmexit_inst_emul(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) { int err; stats.vmexit_inst_emul++; err = emulate_mem(ctx, *pvcpu, vmexit->u.inst_emul.gpa, &vmexit->u.inst_emul.vie); if (err) { if (err == EINVAL) { fprintf(stderr, "Failed to emulate instruction at 0x%lx\n", vmexit->rip); } else if (err == ESRCH) { fprintf(stderr, "Unhandled memory access to 0x%lx\n", vmexit->u.inst_emul.gpa); } return (VMEXIT_ABORT); } return (VMEXIT_CONTINUE); } static vmexit_handler_t handler[VM_EXITCODE_MAX] = { [VM_EXITCODE_INOUT] = vmexit_inout, [VM_EXITCODE_VMX] = vmexit_vmx, [VM_EXITCODE_BOGUS] = vmexit_bogus, [VM_EXITCODE_RDMSR] = vmexit_rdmsr, [VM_EXITCODE_WRMSR] = vmexit_wrmsr, [VM_EXITCODE_MTRAP] = vmexit_mtrap, [VM_EXITCODE_INST_EMUL] = vmexit_inst_emul, [VM_EXITCODE_SPINUP_AP] = vmexit_spinup_ap, }; static void vm_loop(struct vmctx *ctx, int vcpu, uint64_t rip) { cpuset_t mask; int error, rc, prevcpu; enum vm_exitcode exitcode; if (pincpu >= 0) { CPU_ZERO(&mask); CPU_SET(pincpu + vcpu, &mask); error = pthread_setaffinity_np(pthread_self(), sizeof(mask), &mask); assert(error == 0); } while (1) { error = vm_run(ctx, vcpu, rip, &vmexit[vcpu]); if (error != 0) { /* * It is possible that 'vmmctl' or some other process * has transitioned the vcpu to CANNOT_RUN state right * before we tried to transition it to RUNNING. * * This is expected to be temporary so just retry. */ if (errno == EBUSY) continue; else break; } prevcpu = vcpu; exitcode = vmexit[vcpu].exitcode; if (exitcode >= VM_EXITCODE_MAX || handler[exitcode] == NULL) { fprintf(stderr, "vm_loop: unexpected exitcode 0x%x\n", exitcode); exit(1); } rc = (*handler[exitcode])(ctx, &vmexit[vcpu], &vcpu); switch (rc) { case VMEXIT_CONTINUE: rip = vmexit[vcpu].rip + vmexit[vcpu].inst_length; break; case VMEXIT_RESTART: rip = vmexit[vcpu].rip; break; case VMEXIT_RESET: exit(0); default: exit(1); } } fprintf(stderr, "vm_run error %d, errno %d\n", error, errno); } static int num_vcpus_allowed(struct vmctx *ctx) { int tmp, error; error = vm_get_capability(ctx, BSP, VM_CAP_UNRESTRICTED_GUEST, &tmp); /* * The guest is allowed to spinup more than one processor only if the * UNRESTRICTED_GUEST capability is available. */ if (error == 0) return (VM_MAXCPU); else return (1); } void fbsdrun_set_capabilities(struct vmctx *ctx, int cpu) { int err, tmp; if (fbsdrun_vmexit_on_hlt()) { err = vm_get_capability(ctx, cpu, VM_CAP_HALT_EXIT, &tmp); if (err < 0) { fprintf(stderr, "VM exit on HLT not supported\n"); exit(1); } vm_set_capability(ctx, cpu, VM_CAP_HALT_EXIT, 1); if (cpu == BSP) handler[VM_EXITCODE_HLT] = vmexit_hlt; } if (fbsdrun_vmexit_on_pause()) { /* * pause exit support required for this mode */ err = vm_get_capability(ctx, cpu, VM_CAP_PAUSE_EXIT, &tmp); if (err < 0) { fprintf(stderr, "SMP mux requested, no pause support\n"); exit(1); } vm_set_capability(ctx, cpu, VM_CAP_PAUSE_EXIT, 1); if (cpu == BSP) handler[VM_EXITCODE_PAUSE] = vmexit_pause; } if (fbsdrun_disable_x2apic()) err = vm_set_x2apic_state(ctx, cpu, X2APIC_DISABLED); else err = vm_set_x2apic_state(ctx, cpu, X2APIC_ENABLED); if (err) { fprintf(stderr, "Unable to set x2apic state (%d)\n", err); exit(1); } vm_set_capability(ctx, cpu, VM_CAP_ENABLE_INVPCID, 1); } int main(int argc, char *argv[]) { int c, error, gdb_port, err, ioapic, bvmcons; int max_vcpus; struct vmctx *ctx; uint64_t rip; size_t memsize; bvmcons = 0; progname = basename(argv[0]); gdb_port = 0; guest_ncpus = 1; ioapic = 0; memsize = 256 * MB; while ((c = getopt(argc, argv, "abehAHIPWp:g:c:s:S:m:")) != -1) { switch (c) { case 'a': disable_x2apic = 1; break; case 'A': acpi = 1; break; case 'b': bvmcons = 1; break; case 'p': pincpu = atoi(optarg); break; case 'c': guest_ncpus = atoi(optarg); break; case 'g': gdb_port = atoi(optarg); break; case 's': if (pci_parse_slot(optarg, 0) != 0) exit(1); else break; case 'S': if (pci_parse_slot(optarg, 1) != 0) exit(1); else break; case 'm': error = vm_parse_memsize(optarg, &memsize); if (error) errx(EX_USAGE, "invalid memsize '%s'", optarg); break; case 'H': guest_vmexit_on_hlt = 1; break; case 'I': ioapic = 1; break; case 'P': guest_vmexit_on_pause = 1; break; case 'e': strictio = 1; break; case 'W': virtio_msix = 0; break; case 'h': usage(0); default: usage(1); } } argc -= optind; argv += optind; if (argc != 1) usage(1); vmname = argv[0]; ctx = vm_open(vmname); if (ctx == NULL) { perror("vm_open"); exit(1); } max_vcpus = num_vcpus_allowed(ctx); if (guest_ncpus > max_vcpus) { fprintf(stderr, "%d vCPUs requested but only %d available\n", guest_ncpus, max_vcpus); exit(1); } fbsdrun_set_capabilities(ctx, BSP); err = vm_setup_memory(ctx, memsize, VM_MMAP_ALL); if (err) { fprintf(stderr, "Unable to setup memory (%d)\n", err); exit(1); } init_mem(); init_inout(); rtc_init(ctx); /* * Exit if a device emulation finds an error in it's initilization */ if (init_pci(ctx) != 0) exit(1); if (ioapic) ioapic_init(0); if (gdb_port != 0) init_dbgport(gdb_port); if (bvmcons) init_bvmcons(); error = vm_get_register(ctx, BSP, VM_REG_GUEST_RIP, &rip); assert(error == 0); /* * build the guest tables, MP etc. */ mptable_build(ctx, guest_ncpus, ioapic); if (acpi) { error = acpi_build(ctx, guest_ncpus, ioapic); assert(error == 0); } /* * Add CPU 0 */ fbsdrun_addcpu(ctx, BSP, rip); /* * Head off to the main event dispatch loop */ mevent_dispatch(); exit(1); } Index: head/usr.sbin/bhyveload/bhyveload.c =================================================================== --- head/usr.sbin/bhyveload/bhyveload.c (revision 257017) +++ head/usr.sbin/bhyveload/bhyveload.c (revision 257018) @@ -1,665 +1,667 @@ /*- * Copyright (c) 2011 NetApp, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ /*- * Copyright (c) 2011 Google, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include +#include #include #include #include #include #include #include #include #include #include "userboot.h" #define MB (1024 * 1024UL) #define GB (1024 * 1024 * 1024UL) #define BSP 0 static char *host_base = "/"; static struct termios term, oldterm; static int disk_fd = -1; static char *vmname, *progname; static struct vmctx *ctx; static uint64_t gdtbase, cr3, rsp; static void cb_exit(void *arg, int v); /* * Console i/o callbacks */ static void cb_putc(void *arg, int ch) { char c = ch; write(1, &c, 1); } static int cb_getc(void *arg) { char c; if (read(0, &c, 1) == 1) return (c); return (-1); } static int cb_poll(void *arg) { int n; if (ioctl(0, FIONREAD, &n) >= 0) return (n > 0); return (0); } /* * Host filesystem i/o callbacks */ struct cb_file { int cf_isdir; size_t cf_size; struct stat cf_stat; union { int fd; DIR *dir; } cf_u; }; static int cb_open(void *arg, const char *filename, void **hp) { struct stat st; struct cb_file *cf; char path[PATH_MAX]; if (!host_base) return (ENOENT); strlcpy(path, host_base, PATH_MAX); if (path[strlen(path) - 1] == '/') path[strlen(path) - 1] = 0; strlcat(path, filename, PATH_MAX); cf = malloc(sizeof(struct cb_file)); if (stat(path, &cf->cf_stat) < 0) { free(cf); return (errno); } cf->cf_size = st.st_size; if (S_ISDIR(cf->cf_stat.st_mode)) { cf->cf_isdir = 1; cf->cf_u.dir = opendir(path); if (!cf->cf_u.dir) goto out; *hp = cf; return (0); } if (S_ISREG(cf->cf_stat.st_mode)) { cf->cf_isdir = 0; cf->cf_u.fd = open(path, O_RDONLY); if (cf->cf_u.fd < 0) goto out; *hp = cf; return (0); } out: free(cf); return (EINVAL); } static int cb_close(void *arg, void *h) { struct cb_file *cf = h; if (cf->cf_isdir) closedir(cf->cf_u.dir); else close(cf->cf_u.fd); free(cf); return (0); } static int cb_isdir(void *arg, void *h) { struct cb_file *cf = h; return (cf->cf_isdir); } static int cb_read(void *arg, void *h, void *buf, size_t size, size_t *resid) { struct cb_file *cf = h; ssize_t sz; if (cf->cf_isdir) return (EINVAL); sz = read(cf->cf_u.fd, buf, size); if (sz < 0) return (EINVAL); *resid = size - sz; return (0); } static int cb_readdir(void *arg, void *h, uint32_t *fileno_return, uint8_t *type_return, size_t *namelen_return, char *name) { struct cb_file *cf = h; struct dirent *dp; if (!cf->cf_isdir) return (EINVAL); dp = readdir(cf->cf_u.dir); if (!dp) return (ENOENT); /* * Note: d_namlen is in the range 0..255 and therefore less * than PATH_MAX so we don't need to test before copying. */ *fileno_return = dp->d_fileno; *type_return = dp->d_type; *namelen_return = dp->d_namlen; memcpy(name, dp->d_name, dp->d_namlen); name[dp->d_namlen] = 0; return (0); } static int cb_seek(void *arg, void *h, uint64_t offset, int whence) { struct cb_file *cf = h; if (cf->cf_isdir) return (EINVAL); if (lseek(cf->cf_u.fd, offset, whence) < 0) return (errno); return (0); } static int cb_stat(void *arg, void *h, int *mode, int *uid, int *gid, uint64_t *size) { struct cb_file *cf = h; *mode = cf->cf_stat.st_mode; *uid = cf->cf_stat.st_uid; *gid = cf->cf_stat.st_gid; *size = cf->cf_stat.st_size; return (0); } /* * Disk image i/o callbacks */ static int cb_diskread(void *arg, int unit, uint64_t from, void *to, size_t size, size_t *resid) { ssize_t n; if (unit != 0 || disk_fd == -1) return (EIO); n = pread(disk_fd, to, size, from); if (n < 0) return (errno); *resid = size - n; return (0); } static int cb_diskioctl(void *arg, int unit, u_long cmd, void *data) { struct stat sb; if (unit != 0 || disk_fd == -1) return (EBADF); switch (cmd) { case DIOCGSECTORSIZE: *(u_int *)data = 512; break; case DIOCGMEDIASIZE: if (fstat(disk_fd, &sb) == 0) *(off_t *)data = sb.st_size; else return (ENOTTY); break; default: return (ENOTTY); } return (0); } /* * Guest virtual machine i/o callbacks */ static int cb_copyin(void *arg, const void *from, uint64_t to, size_t size) { char *ptr; to &= 0x7fffffff; ptr = vm_map_gpa(ctx, to, size); if (ptr == NULL) return (EFAULT); memcpy(ptr, from, size); return (0); } static int cb_copyout(void *arg, uint64_t from, void *to, size_t size) { char *ptr; from &= 0x7fffffff; ptr = vm_map_gpa(ctx, from, size); if (ptr == NULL) return (EFAULT); memcpy(to, ptr, size); return (0); } static void cb_setreg(void *arg, int r, uint64_t v) { int error; enum vm_reg_name vmreg; vmreg = VM_REG_LAST; switch (r) { case 4: vmreg = VM_REG_GUEST_RSP; rsp = v; break; default: break; } if (vmreg == VM_REG_LAST) { printf("test_setreg(%d): not implemented\n", r); cb_exit(NULL, USERBOOT_EXIT_QUIT); } error = vm_set_register(ctx, BSP, vmreg, v); if (error) { perror("vm_set_register"); cb_exit(NULL, USERBOOT_EXIT_QUIT); } } static void cb_setmsr(void *arg, int r, uint64_t v) { int error; enum vm_reg_name vmreg; vmreg = VM_REG_LAST; switch (r) { case MSR_EFER: vmreg = VM_REG_GUEST_EFER; break; default: break; } if (vmreg == VM_REG_LAST) { printf("test_setmsr(%d): not implemented\n", r); cb_exit(NULL, USERBOOT_EXIT_QUIT); } error = vm_set_register(ctx, BSP, vmreg, v); if (error) { perror("vm_set_msr"); cb_exit(NULL, USERBOOT_EXIT_QUIT); } } static void cb_setcr(void *arg, int r, uint64_t v) { int error; enum vm_reg_name vmreg; vmreg = VM_REG_LAST; switch (r) { case 0: vmreg = VM_REG_GUEST_CR0; break; case 3: vmreg = VM_REG_GUEST_CR3; cr3 = v; break; case 4: vmreg = VM_REG_GUEST_CR4; break; default: break; } if (vmreg == VM_REG_LAST) { printf("test_setcr(%d): not implemented\n", r); cb_exit(NULL, USERBOOT_EXIT_QUIT); } error = vm_set_register(ctx, BSP, vmreg, v); if (error) { perror("vm_set_cr"); cb_exit(NULL, USERBOOT_EXIT_QUIT); } } static void cb_setgdt(void *arg, uint64_t base, size_t size) { int error; error = vm_set_desc(ctx, BSP, VM_REG_GUEST_GDTR, base, size - 1, 0); if (error != 0) { perror("vm_set_desc(gdt)"); cb_exit(NULL, USERBOOT_EXIT_QUIT); } gdtbase = base; } static void cb_exec(void *arg, uint64_t rip) { int error; error = vm_setup_freebsd_registers(ctx, BSP, rip, cr3, gdtbase, rsp); if (error) { perror("vm_setup_freebsd_registers"); cb_exit(NULL, USERBOOT_EXIT_QUIT); } cb_exit(NULL, 0); } /* * Misc */ static void cb_delay(void *arg, int usec) { usleep(usec); } static void cb_exit(void *arg, int v) { tcsetattr(0, TCSAFLUSH, &oldterm); exit(v); } static void cb_getmem(void *arg, uint64_t *ret_lowmem, uint64_t *ret_highmem) { vm_get_memory_seg(ctx, 0, ret_lowmem, NULL); vm_get_memory_seg(ctx, 4 * GB, ret_highmem, NULL); } struct env { const char *str; /* name=value */ SLIST_ENTRY(env) next; }; static SLIST_HEAD(envhead, env) envhead; static void addenv(const char *str) { struct env *env; env = malloc(sizeof(struct env)); env->str = str; SLIST_INSERT_HEAD(&envhead, env, next); } static const char * cb_getenv(void *arg, int num) { int i; struct env *env; i = 0; SLIST_FOREACH(env, &envhead, next) { if (i == num) return (env->str); i++; } return (NULL); } static struct loader_callbacks cb = { .getc = cb_getc, .putc = cb_putc, .poll = cb_poll, .open = cb_open, .close = cb_close, .isdir = cb_isdir, .read = cb_read, .readdir = cb_readdir, .seek = cb_seek, .stat = cb_stat, .diskread = cb_diskread, .diskioctl = cb_diskioctl, .copyin = cb_copyin, .copyout = cb_copyout, .setreg = cb_setreg, .setmsr = cb_setmsr, .setcr = cb_setcr, .setgdt = cb_setgdt, .exec = cb_exec, .delay = cb_delay, .exit = cb_exit, .getmem = cb_getmem, .getenv = cb_getenv, }; static void usage(void) { fprintf(stderr, - "usage: %s [-m mem-size][-d ] [-h ] " - "[-e ] \n", progname); + "usage: %s [-m mem-size] [-d ] [-h ]\n" + " %*s [-e ] \n", progname, + (int)strlen(progname), ""); exit(1); } int main(int argc, char** argv) { void *h; void (*func)(struct loader_callbacks *, void *, int, int); uint64_t mem_size; int opt, error; char *disk_image; - progname = argv[0]; + progname = basename(argv[0]); mem_size = 256 * MB; disk_image = NULL; while ((opt = getopt(argc, argv, "d:e:h:m:")) != -1) { switch (opt) { case 'd': disk_image = optarg; break; case 'e': addenv(optarg); break; case 'h': host_base = optarg; break; case 'm': error = vm_parse_memsize(optarg, &mem_size); if (error != 0) errx(EX_USAGE, "Invalid memsize '%s'", optarg); break; case '?': usage(); } } argc -= optind; argv += optind; if (argc != 1) usage(); vmname = argv[0]; error = vm_create(vmname); if (error != 0 && errno != EEXIST) { perror("vm_create"); exit(1); } ctx = vm_open(vmname); if (ctx == NULL) { perror("vm_open"); exit(1); } error = vm_setup_memory(ctx, mem_size, VM_MMAP_ALL); if (error) { perror("vm_setup_memory"); exit(1); } tcgetattr(0, &term); oldterm = term; term.c_lflag &= ~(ICANON|ECHO); term.c_iflag &= ~ICRNL; tcsetattr(0, TCSAFLUSH, &term); h = dlopen("/boot/userboot.so", RTLD_LOCAL); if (!h) { printf("%s\n", dlerror()); return (1); } func = dlsym(h, "loader_main"); if (!func) { printf("%s\n", dlerror()); return (1); } if (disk_image) { disk_fd = open(disk_image, O_RDONLY); } addenv("smbios.bios.vendor=BHYVE"); addenv("boot_serial=1"); func(&cb, NULL, USERBOOT_VERSION_3, disk_fd >= 0); }