Changeset View
Standalone View
usr.sbin/bhyve/bhyverun.c
Show All 30 Lines | |||||
#include <sys/cdefs.h> | #include <sys/cdefs.h> | ||||
__FBSDID("$FreeBSD$"); | __FBSDID("$FreeBSD$"); | ||||
#include <sys/types.h> | #include <sys/types.h> | ||||
#ifndef WITHOUT_CAPSICUM | #ifndef WITHOUT_CAPSICUM | ||||
#include <sys/capsicum.h> | #include <sys/capsicum.h> | ||||
#endif | #endif | ||||
#include <sys/mman.h> | #include <sys/mman.h> | ||||
#include <sys/stat.h> | |||||
#include <sys/time.h> | #include <sys/time.h> | ||||
#include <amd64/vmm/intel/vmcs.h> | #include <amd64/vmm/intel/vmcs.h> | ||||
#include <machine/atomic.h> | #include <machine/atomic.h> | ||||
#include <machine/segments.h> | #include <machine/segments.h> | ||||
#ifndef WITHOUT_CAPSICUM | #ifndef WITHOUT_CAPSICUM | ||||
Show All 29 Lines | |||||
#include "ioapic.h" | #include "ioapic.h" | ||||
#include "mem.h" | #include "mem.h" | ||||
#include "mevent.h" | #include "mevent.h" | ||||
#include "mptbl.h" | #include "mptbl.h" | ||||
#include "pci_emul.h" | #include "pci_emul.h" | ||||
#include "pci_irq.h" | #include "pci_irq.h" | ||||
#include "pci_lpc.h" | #include "pci_lpc.h" | ||||
#include "smbiostbl.h" | #include "smbiostbl.h" | ||||
#include "snapshot.h" | |||||
#include "xmsr.h" | #include "xmsr.h" | ||||
#include "spinup_ap.h" | #include "spinup_ap.h" | ||||
#include "rtc.h" | #include "rtc.h" | ||||
#include <sys/socket.h> | |||||
#include <sys/un.h> | |||||
#include <sys/types.h> | |||||
#include <fcntl.h> | |||||
#include <libxo/xo.h> | |||||
#include <ucl.h> | |||||
#include <unistd.h> | |||||
#define GUEST_NIO_PORT 0x488 /* guest upcalls via i/o port */ | #define GUEST_NIO_PORT 0x488 /* guest upcalls via i/o port */ | ||||
#define MB (1024UL * 1024) | #define MB (1024UL * 1024) | ||||
#define GB (1024UL * MB) | #define GB (1024UL * MB) | ||||
static const char * const vmx_exit_reason_desc[] = { | static const char * const vmx_exit_reason_desc[] = { | ||||
[EXIT_REASON_EXCEPTION] = "Exception or non-maskable interrupt (NMI)", | [EXIT_REASON_EXCEPTION] = "Exception or non-maskable interrupt (NMI)", | ||||
[EXIT_REASON_EXT_INTR] = "External interrupt", | [EXIT_REASON_EXT_INTR] = "External interrupt", | ||||
▲ Show 20 Lines • Show All 56 Lines • ▼ Show 20 Lines | static const char * const vmx_exit_reason_desc[] = { | ||||
[EXIT_REASON_VMFUNC] = "VMFUNC", | [EXIT_REASON_VMFUNC] = "VMFUNC", | ||||
[EXIT_REASON_ENCLS] = "ENCLS", | [EXIT_REASON_ENCLS] = "ENCLS", | ||||
[EXIT_REASON_RDSEED] = "RDSEED", | [EXIT_REASON_RDSEED] = "RDSEED", | ||||
[EXIT_REASON_PM_LOG_FULL] = "Page-modification log full", | [EXIT_REASON_PM_LOG_FULL] = "Page-modification log full", | ||||
[EXIT_REASON_XSAVES] = "XSAVES", | [EXIT_REASON_XSAVES] = "XSAVES", | ||||
[EXIT_REASON_XRSTORS] = "XRSTORS" | [EXIT_REASON_XRSTORS] = "XRSTORS" | ||||
}; | }; | ||||
#define MAX_SOCK_NAME 200 | |||||
pmooney_pfmooney.com: What is this for? | |||||
darius.mihaim_gmail.comUnsubmitted Done Inline ActionsArtifact from an older implementation. The code has since been refactored. darius.mihaim_gmail.com: Artifact from an older implementation. The code has since been refactored. | |||||
typedef int (*vmexit_handler_t)(struct vmctx *, struct vm_exit *, int *vcpu); | typedef int (*vmexit_handler_t)(struct vmctx *, struct vm_exit *, int *vcpu); | ||||
extern int vmexit_task_switch(struct vmctx *, struct vm_exit *, int *vcpu); | extern int vmexit_task_switch(struct vmctx *, struct vm_exit *, int *vcpu); | ||||
char *vmname; | const char *vmname; | ||||
pmooney_pfmooney.comUnsubmitted Not Done Inline ActionsIt looks like mevent.c has an extern declaration which refers to this which doesn't reflect the const change made here. It should be kept in sync? pmooney_pfmooney.com: It looks like `mevent.c` has an `extern` declaration which refers to this which doesn't reflect… | |||||
darius.mihaim_gmail.comUnsubmitted Done Inline ActionsI have added the 'const' qualifier to the mevent.c variable. The vmname should not be modified at runtime, so it makes more sense to keep it as a constant. On a side note, since the name of the VM is also kept in libvmmapi's 'struct vmctx' , the name does not need to be kept as a global; a getter function may be better suited. darius.mihaim_gmail.com: I have added the 'const' qualifier to the mevent.c variable. The vmname should not be modified… | |||||
int guest_ncpus; | int guest_ncpus; | ||||
uint16_t cores, maxcpus, sockets, threads; | uint16_t cores, maxcpus, sockets, threads; | ||||
char *guest_uuid_str; | char *guest_uuid_str; | ||||
static int guest_vmexit_on_hlt, guest_vmexit_on_pause; | static int guest_vmexit_on_hlt, guest_vmexit_on_pause; | ||||
static int virtio_msix = 1; | static int virtio_msix = 1; | ||||
▲ Show 20 Lines • Show All 46 Lines • ▼ Show 20 Lines | usage(int code) | ||||
" -c: number of cpus and/or topology specification\n" | " -c: number of cpus and/or topology specification\n" | ||||
" -C: include guest memory in core file\n" | " -C: include guest memory in core file\n" | ||||
" -e: exit on unhandled I/O access\n" | " -e: exit on unhandled I/O access\n" | ||||
" -g: gdb port\n" | " -g: gdb port\n" | ||||
" -h: help\n" | " -h: help\n" | ||||
" -H: vmexit from the guest on hlt\n" | " -H: vmexit from the guest on hlt\n" | ||||
" -l: LPC device configuration\n" | " -l: LPC device configuration\n" | ||||
" -m: memory size in MB\n" | " -m: memory size in MB\n" | ||||
" -r: path to checkpoint file\n" | |||||
" -p: pin 'vcpu' to 'hostcpu'\n" | " -p: pin 'vcpu' to 'hostcpu'\n" | ||||
" -P: vmexit from the guest on pause\n" | " -P: vmexit from the guest on pause\n" | ||||
" -s: <slot,driver,configinfo> PCI slot config\n" | " -s: <slot,driver,configinfo> PCI slot config\n" | ||||
" -S: guest memory cannot be swapped\n" | " -S: guest memory cannot be swapped\n" | ||||
" -u: RTC keeps UTC time\n" | " -u: RTC keeps UTC time\n" | ||||
" -U: uuid\n" | " -U: uuid\n" | ||||
" -w: ignore unimplemented MSRs\n" | " -w: ignore unimplemented MSRs\n" | ||||
" -W: force virtio to use single-vector MSI\n" | " -W: force virtio to use single-vector MSI\n" | ||||
▲ Show 20 Lines • Show All 143 Lines • ▼ Show 20 Lines | |||||
void * | void * | ||||
paddr_guest2host(struct vmctx *ctx, uintptr_t gaddr, size_t len) | paddr_guest2host(struct vmctx *ctx, uintptr_t gaddr, size_t len) | ||||
{ | { | ||||
return (vm_map_gpa(ctx, gaddr, len)); | return (vm_map_gpa(ctx, gaddr, len)); | ||||
} | } | ||||
uintptr_t | |||||
paddr_host2guest(struct vmctx *ctx, void *addr) | |||||
{ | |||||
return (vm_rev_map_gpa(ctx, addr)); | |||||
} | |||||
int | int | ||||
fbsdrun_vmexit_on_pause(void) | fbsdrun_vmexit_on_pause(void) | ||||
{ | { | ||||
return (guest_vmexit_on_pause); | return (guest_vmexit_on_pause); | ||||
} | } | ||||
int | int | ||||
▲ Show 20 Lines • Show All 574 Lines • ▼ Show 20 Lines | if (reinit) { | ||||
} | } | ||||
} | } | ||||
error = vm_set_topology(ctx, sockets, cores, threads, maxcpus); | error = vm_set_topology(ctx, sockets, cores, threads, maxcpus); | ||||
if (error) | if (error) | ||||
errx(EX_OSERR, "vm_set_topology"); | errx(EX_OSERR, "vm_set_topology"); | ||||
return (ctx); | return (ctx); | ||||
} | } | ||||
void | |||||
spinup_vcpu(struct vmctx *ctx, int vcpu) | |||||
{ | |||||
int error; | |||||
uint64_t rip; | |||||
error = vm_get_register(ctx, vcpu, VM_REG_GUEST_RIP, &rip); | |||||
assert(error == 0); | |||||
fbsdrun_set_capabilities(ctx, vcpu); | |||||
error = vm_set_capability(ctx, vcpu, VM_CAP_UNRESTRICTED_GUEST, 1); | |||||
assert(error == 0); | |||||
fbsdrun_addcpu(ctx, BSP, vcpu, rip); | |||||
} | |||||
int | int | ||||
main(int argc, char *argv[]) | main(int argc, char *argv[]) | ||||
{ | { | ||||
int c, error, dbg_port, gdb_port, err, bvmcons; | int c, error, dbg_port, gdb_port, err, bvmcons; | ||||
int max_vcpus, mptgen, memflags; | int max_vcpus, mptgen, memflags, vcpu; | ||||
int rtc_localtime; | int rtc_localtime; | ||||
bool gdb_stop; | bool gdb_stop; | ||||
struct vmctx *ctx; | struct vmctx *ctx; | ||||
uint64_t rip; | |||||
size_t memsize; | size_t memsize; | ||||
char *optstr; | char *optstr, *restore_file; | ||||
struct restore_state rstate; | |||||
restore_file = NULL; | |||||
bvmcons = 0; | bvmcons = 0; | ||||
progname = basename(argv[0]); | progname = basename(argv[0]); | ||||
dbg_port = 0; | dbg_port = 0; | ||||
gdb_port = 0; | gdb_port = 0; | ||||
gdb_stop = false; | gdb_stop = false; | ||||
guest_ncpus = 1; | guest_ncpus = 1; | ||||
sockets = cores = threads = 1; | sockets = cores = threads = 1; | ||||
maxcpus = 0; | maxcpus = 0; | ||||
memsize = 256 * MB; | memsize = 256 * MB; | ||||
mptgen = 1; | mptgen = 1; | ||||
rtc_localtime = 1; | rtc_localtime = 1; | ||||
memflags = 0; | memflags = 0; | ||||
optstr = "abehuwxACHIPSWYp:g:G:c:s:m:l:U:"; | optstr = "abehuwxACHIPSWYp:g:G:c:s:m:l:U:r:"; | ||||
while ((c = getopt(argc, argv, optstr)) != -1) { | while ((c = getopt(argc, argv, optstr)) != -1) { | ||||
switch (c) { | switch (c) { | ||||
case 'a': | case 'a': | ||||
x2apic_mode = 0; | x2apic_mode = 0; | ||||
break; | break; | ||||
case 'A': | case 'A': | ||||
acpi = 1; | acpi = 1; | ||||
break; | break; | ||||
Show All 29 Lines | case 'l': | ||||
if (strncmp(optarg, "help", strlen(optarg)) == 0) { | if (strncmp(optarg, "help", strlen(optarg)) == 0) { | ||||
lpc_print_supported_devices(); | lpc_print_supported_devices(); | ||||
exit(0); | exit(0); | ||||
} else if (lpc_device_parse(optarg) != 0) { | } else if (lpc_device_parse(optarg) != 0) { | ||||
errx(EX_USAGE, "invalid lpc device " | errx(EX_USAGE, "invalid lpc device " | ||||
"configuration '%s'", optarg); | "configuration '%s'", optarg); | ||||
} | } | ||||
break; | break; | ||||
case 'r': | |||||
restore_file = optarg; | |||||
break; | |||||
case 's': | case 's': | ||||
if (strncmp(optarg, "help", strlen(optarg)) == 0) { | if (strncmp(optarg, "help", strlen(optarg)) == 0) { | ||||
pci_print_supported_devices(); | pci_print_supported_devices(); | ||||
exit(0); | exit(0); | ||||
} else if (pci_parse_slot(optarg) != 0) | } else if (pci_parse_slot(optarg) != 0) | ||||
exit(4); | exit(4); | ||||
else | else | ||||
break; | break; | ||||
▲ Show 20 Lines • Show All 45 Lines • ▼ Show 20 Lines | case 'h': | ||||
usage(0); | usage(0); | ||||
default: | default: | ||||
usage(1); | usage(1); | ||||
} | } | ||||
} | } | ||||
argc -= optind; | argc -= optind; | ||||
argv += optind; | argv += optind; | ||||
if (argc != 1) | if (argc > 1 || (argc == 0 && restore_file == NULL)) | ||||
usage(1); | usage(1); | ||||
if (restore_file != NULL) { | |||||
error = load_restore_file(restore_file, &rstate); | |||||
if (error) { | |||||
fprintf(stderr, "Failed to read checkpoint info from " | |||||
"file: '%s'.\n", restore_file); | |||||
exit(1); | |||||
} | |||||
} | |||||
if (argc == 1) { | |||||
vmname = argv[0]; | vmname = argv[0]; | ||||
} else { | |||||
vmname = lookup_vmname(&rstate); | |||||
if (vmname == NULL) { | |||||
fprintf(stderr, "Cannot find VM name in restore file. " | |||||
"Please specify one.\n"); | |||||
exit(1); | |||||
} | |||||
} | |||||
ctx = do_open(vmname); | ctx = do_open(vmname); | ||||
if (restore_file != NULL) { | |||||
guest_ncpus = lookup_guest_ncpus(&rstate); | |||||
memflags = lookup_memflags(&rstate); | |||||
memsize = lookup_memsize(&rstate); | |||||
} | |||||
if (guest_ncpus < 1) { | |||||
fprintf(stderr, "Invalid guest vCPUs (%d)\n", guest_ncpus); | |||||
exit(1); | |||||
} | |||||
max_vcpus = num_vcpus_allowed(ctx); | max_vcpus = num_vcpus_allowed(ctx); | ||||
if (guest_ncpus > max_vcpus) { | if (guest_ncpus > max_vcpus) { | ||||
fprintf(stderr, "%d vCPUs requested but only %d available\n", | fprintf(stderr, "%d vCPUs requested but only %d available\n", | ||||
guest_ncpus, max_vcpus); | guest_ncpus, max_vcpus); | ||||
exit(4); | exit(4); | ||||
} | } | ||||
fbsdrun_set_capabilities(ctx, BSP); | |||||
vm_set_memflags(ctx, memflags); | vm_set_memflags(ctx, memflags); | ||||
err = vm_setup_memory(ctx, memsize, VM_MMAP_ALL); | err = vm_setup_memory(ctx, memsize, VM_MMAP_ALL); | ||||
if (err) { | if (err) { | ||||
fprintf(stderr, "Unable to setup memory (%d)\n", errno); | fprintf(stderr, "Unable to setup memory (%d)\n", errno); | ||||
exit(4); | exit(4); | ||||
} | } | ||||
error = init_msr(); | error = init_msr(); | ||||
Show All 33 Lines | if (vm_set_capability(ctx, BSP, VM_CAP_UNRESTRICTED_GUEST, 1)) { | ||||
fprintf(stderr, "ROM boot failed: unrestricted guest " | fprintf(stderr, "ROM boot failed: unrestricted guest " | ||||
"capability not available\n"); | "capability not available\n"); | ||||
exit(4); | exit(4); | ||||
} | } | ||||
error = vcpu_reset(ctx, BSP); | error = vcpu_reset(ctx, BSP); | ||||
assert(error == 0); | assert(error == 0); | ||||
} | } | ||||
error = vm_get_register(ctx, BSP, VM_REG_GUEST_RIP, &rip); | if (restore_file != NULL) { | ||||
assert(error == 0); | fprintf(stdout, "Pausing pci devs...\r\n"); | ||||
if (vm_pause_user_devs(ctx) != 0) { | |||||
fprintf(stderr, "Failed to pause PCI device state.\n"); | |||||
exit(1); | |||||
} | |||||
fprintf(stdout, "Restoring vm mem...\r\n"); | |||||
if (restore_vm_mem(ctx, &rstate) != 0) { | |||||
fprintf(stderr, "Failed to restore VM memory.\n"); | |||||
exit(1); | |||||
} | |||||
fprintf(stdout, "Restoring pci devs...\r\n"); | |||||
if (vm_restore_user_devs(ctx, &rstate) != 0) { | |||||
fprintf(stderr, "Failed to restore PCI device state.\n"); | |||||
exit(1); | |||||
} | |||||
fprintf(stdout, "Restoring kernel structs...\r\n"); | |||||
if (vm_restore_kern_structs(ctx, &rstate) != 0) { | |||||
fprintf(stderr, "Failed to restore kernel structs.\n"); | |||||
exit(1); | |||||
} | |||||
fprintf(stdout, "Resuming pci devs...\r\n"); | |||||
if (vm_resume_user_devs(ctx) != 0) { | |||||
fprintf(stderr, "Failed to resume PCI device state.\n"); | |||||
exit(1); | |||||
} | |||||
} | |||||
/* | /* | ||||
* build the guest tables, MP etc. | * build the guest tables, MP etc. | ||||
*/ | */ | ||||
if (mptgen) { | if (mptgen) { | ||||
error = mptable_build(ctx, guest_ncpus); | error = mptable_build(ctx, guest_ncpus); | ||||
if (error) { | if (error) { | ||||
perror("error to build the guest tables"); | perror("error to build the guest tables"); | ||||
exit(4); | exit(4); | ||||
Show All 21 Lines | #ifndef WITHOUT_CAPSICUM | ||||
if (caph_limit_stdout() == -1 || caph_limit_stderr() == -1) | if (caph_limit_stdout() == -1 || caph_limit_stderr() == -1) | ||||
errx(EX_OSERR, "Unable to apply rights for sandbox"); | errx(EX_OSERR, "Unable to apply rights for sandbox"); | ||||
if (caph_enter() == -1) | if (caph_enter() == -1) | ||||
errx(EX_OSERR, "cap_enter() failed"); | errx(EX_OSERR, "cap_enter() failed"); | ||||
#endif | #endif | ||||
if (restore_file != NULL) | |||||
destroy_restore_state(&rstate); | |||||
/* | /* | ||||
* Add CPU 0 | * checkpointing thread for communication with bhyvectl | ||||
*/ | */ | ||||
fbsdrun_addcpu(ctx, BSP, BSP, rip); | if (init_checkpoint_thread(ctx) < 0) | ||||
printf("Failed to start checkpoint thread!\r\n"); | |||||
/* | |||||
* Change the proc title to include the VM name. | |||||
*/ | |||||
setproctitle("%s", vmname); | |||||
if (restore_file != NULL) { | |||||
vm_restore_time(ctx); | |||||
} | |||||
/* Add CPU 0 | |||||
* If we restore a VM, start all vCPUs now (including APs), otherwise, | |||||
* let the guest OS to spin them up later via vmexits. | |||||
*/ | |||||
for (vcpu = 0; vcpu < guest_ncpus; vcpu++) | |||||
if (vcpu == BSP || restore_file) { | |||||
fprintf(stdout, "spinning up vcpu no %d...\r\n", vcpu); | |||||
spinup_vcpu(ctx, vcpu); | |||||
pmooney_pfmooney.comUnsubmitted Not Done Inline ActionsGiven the logic in spinup_vcpu() the UNRESTRICTED_GUEST capability is now a necessity for all bhyve users. If that is your intent, it probably needs to be properly documented and communicated to the user base. pmooney_pfmooney.com: Given the logic in `spinup_vcpu()` the `UNRESTRICTED_GUEST` capability is now a necessity for… | |||||
darius.mihaim_gmail.comUnsubmitted Done Inline ActionsI have changed the initialization loop, and moved the BSP vCPU initialization outside it. Through it, the BSP vCPU will not have the UNRESTRICTED_GUEST capability set, and the other vCPUs will be initialized only when restoring the VM. Since the changes added by @jhb keep track of the activated vCPUs, it may be worth keeping track of those when suspending/resuming, but I do not believe there will be any cases where this will actually be important, since they will be activated by the guest very early on. darius.mihaim_gmail.com: I have changed the initialization loop, and moved the BSP vCPU initialization outside it. | |||||
} | |||||
/* | /* | ||||
* Head off to the main event dispatch loop | * Head off to the main event dispatch loop | ||||
*/ | */ | ||||
mevent_dispatch(); | mevent_dispatch(); | ||||
exit(4); | exit(4); | ||||
} | } |
What is this for?