diff --git a/lib/libvmmapi/vmmapi.c b/lib/libvmmapi/vmmapi.c --- a/lib/libvmmapi/vmmapi.c +++ b/lib/libvmmapi/vmmapi.c @@ -1716,7 +1716,8 @@ VM_ACTIVATE_CPU, VM_GET_CPUS, VM_SUSPEND_CPU, VM_RESUME_CPU, VM_SET_INTINFO, VM_GET_INTINFO, VM_RTC_WRITE, VM_RTC_READ, VM_RTC_SETTIME, VM_RTC_GETTIME, - VM_RESTART_INSTRUCTION, VM_SET_TOPOLOGY, VM_GET_TOPOLOGY }; + VM_RESTART_INSTRUCTION, VM_SET_TOPOLOGY, VM_GET_TOPOLOGY, + VM_SNAPSHOT_REQ, VM_RESTORE_TIME }; if (len == NULL) { cmds = malloc(sizeof(vm_ioctl_cmds)); diff --git a/usr.sbin/bhyve/Makefile b/usr.sbin/bhyve/Makefile --- a/usr.sbin/bhyve/Makefile +++ b/usr.sbin/bhyve/Makefile @@ -91,8 +91,7 @@ LIBADD+= casper LIBADD+= cap_pwd LIBADD+= cap_grp -# Temporary disable capsicum, until we integrate checkpoint code with it. -#CFLAGS+=-DWITH_CASPER +CFLAGS+=-DWITH_CASPER .endif .if ${MK_BHYVE_SNAPSHOT} != "no" @@ -121,9 +120,6 @@ .if ${MK_BHYVE_SNAPSHOT} != "no" CFLAGS+= -I${SRCTOP}/contrib/libucl/include -# Temporary disable capsicum, until we integrate checkpoint code with it. -CFLAGS+= -DWITHOUT_CAPSICUM - CFLAGS+= -DBHYVE_SNAPSHOT .endif diff --git a/usr.sbin/bhyve/bhyverun.c b/usr.sbin/bhyve/bhyverun.c --- a/usr.sbin/bhyve/bhyverun.c +++ b/usr.sbin/bhyve/bhyverun.c @@ -1551,16 +1551,6 @@ */ setproctitle("%s", vmname); -#ifndef WITHOUT_CAPSICUM - caph_cache_catpages(); - - if (caph_limit_stdout() == -1 || caph_limit_stderr() == -1) - errx(EX_OSERR, "Unable to apply rights for sandbox"); - - if (caph_enter() == -1) - errx(EX_OSERR, "cap_enter() failed"); -#endif - #ifdef BHYVE_SNAPSHOT if (restore_file != NULL) destroy_restore_state(&rstate); @@ -1572,12 +1562,22 @@ * checkpointing thread for communication with bhyvectl */ if (init_checkpoint_thread(ctx) < 0) - printf("Failed to start checkpoint thread!\r\n"); + errx(EX_OSERR, "Failed to start checkpoint thread!"); if (restore_file != NULL) vm_restore_time(ctx); #endif +#ifndef WITHOUT_CAPSICUM + caph_cache_catpages(); + + if (caph_limit_stdout() == -1 || caph_limit_stderr() == -1) + errx(EX_OSERR, "Unable to apply rights for sandbox"); + + if (caph_enter() == -1) + errx(EX_OSERR, "cap_enter() failed"); +#endif + /* Allocate per-VCPU resources. */ vmexit = calloc(guest_ncpus, sizeof(*vmexit)); mt_vmm_info = calloc(guest_ncpus, sizeof(*mt_vmm_info)); diff --git a/usr.sbin/bhyve/snapshot.c b/usr.sbin/bhyve/snapshot.c --- a/usr.sbin/bhyve/snapshot.c +++ b/usr.sbin/bhyve/snapshot.c @@ -170,6 +170,8 @@ static pthread_cond_t vcpus_idle, vcpus_can_run; static bool checkpoint_active; +static int snapdir_fd = -1; + /* * TODO: Harden this function and all of its callers since 'base_str' is a user * provided string. @@ -1287,16 +1289,20 @@ pthread_mutex_unlock(&vcpu_lock); } -static void +static int vm_vcpu_pause(struct vmctx *ctx) { pthread_mutex_lock(&vcpu_lock); checkpoint_active = true; - vm_suspend_cpu(ctx, -1); + if (vm_suspend_cpu(ctx, -1) != 0) { + pthread_mutex_unlock(&vcpu_lock); + return (errno); + } while (CPU_CMP(&vcpus_active, &vcpus_suspended) != 0) pthread_cond_wait(&vcpus_idle, &vcpu_lock); pthread_mutex_unlock(&vcpu_lock); + return (0); } static void @@ -1313,7 +1319,7 @@ static int vm_checkpoint(struct vmctx *ctx, const char *checkpoint_file, bool stop_vm) { - int fd_checkpoint = 0, kdata_fd = 0; + int fd_checkpoint = 0, kdata_fd = 0, meta_fd; int ret = 0; int error = 0; size_t memsz; @@ -1328,15 +1334,15 @@ return (-1); } - kdata_fd = open(kdata_filename, O_WRONLY | O_CREAT | O_TRUNC, 0700); + kdata_fd = openat(snapdir_fd, kdata_filename, O_WRONLY | O_CREAT | O_TRUNC, 0600); if (kdata_fd < 0) { perror("Failed to open kernel data snapshot file."); error = -1; goto done; } - fd_checkpoint = open(checkpoint_file, O_RDWR | O_CREAT | O_TRUNC, 0700); - + fd_checkpoint = openat(snapdir_fd, checkpoint_file, + O_RDWR | O_CREAT | O_TRUNC, 0600); if (fd_checkpoint < 0) { perror("Failed to create checkpoint file"); error = -1; @@ -1346,22 +1352,35 @@ meta_filename = strcat_extension(checkpoint_file, ".meta"); if (meta_filename == NULL) { fprintf(stderr, "Failed to construct vm metadata filename.\n"); + error = -1; goto done; } - meta_file = fopen(meta_filename, "w"); + meta_fd = openat(snapdir_fd, meta_filename, + O_WRONLY | O_CREAT | O_TRUNC, 0600); + if (meta_fd < 0) + perror("Failed to open vm metadata snapshot file descriptor."); + meta_file = fdopen(meta_fd, "w"); if (meta_file == NULL) { perror("Failed to open vm metadata snapshot file."); + close(meta_fd); + error = -1; goto done; } xop = xo_create_to_file(meta_file, XO_STYLE_JSON, XOF_PRETTY); if (xop == NULL) { perror("Failed to get libxo handle on metadata file."); + error = -1; goto done; } - vm_vcpu_pause(ctx); + ret = vm_vcpu_pause(ctx); + if (ret != 0) { + fprintf(stderr, "Could not pause vcpu\n"); + error = ret; + goto done; + } ret = vm_pause_user_devs(ctx); if (ret != 0) { @@ -1513,10 +1532,27 @@ struct sockaddr_un addr; int socket_fd; pthread_t checkpoint_pthread; + const char *snapdir = getenv("BHYVE_SNAPDIR") ? : "."; int err; +#ifndef WITHOUT_CAPSICUM + cap_rights_t rights; +#endif memset(&addr, 0, sizeof(addr)); + err = mkdir(snapdir, 0755); + if (err < 0 && errno != EEXIST) { + warn("Cannot create directory '%s'", snapdir); + goto fail; + } + + snapdir_fd = open(snapdir, O_RDONLY | O_DIRECTORY); + if (snapdir_fd < 0) { + warn("Cannot open snapshot directory '%s'", snapdir); + err = -1; + goto fail; + } + socket_fd = socket(PF_UNIX, SOCK_STREAM, 0); if (socket_fd < 0) { EPRINTLN("Socket creation failed: %s", strerror(errno)); @@ -1544,6 +1580,23 @@ goto fail; } +#ifndef WITHOUT_CAPSICUM + cap_rights_init(&rights, CAP_LOOKUP, CAP_FTRUNCATE, CAP_PWRITE, CAP_PREAD, + CAP_FCNTL, CAP_CREATE); + if (caph_rights_limit(snapdir_fd, &rights) == -1) { + warn("Unable to apply rights for sandbox for snapdir"); + err = -1; + goto fail; + } + + cap_rights_init(&rights, CAP_READ | CAP_WRITE | CAP_ACCEPT); + if (caph_rights_limit(socket_fd, &rights) == -1) { + warn("Unable to apply rights for sandbox for checkpoint socket"); + err = -1; + goto fail; + } +#endif + checkpoint_info = calloc(1, sizeof(*checkpoint_info)); checkpoint_info->ctx = ctx; checkpoint_info->socket_fd = socket_fd; @@ -1558,6 +1611,7 @@ free(checkpoint_info); if (socket_fd > 0) close(socket_fd); + close(snapdir_fd); unlink(addr.sun_path); return (err);