diff --git a/lib/libvmmapi/vmmapi.c b/lib/libvmmapi/vmmapi.c --- a/lib/libvmmapi/vmmapi.c +++ b/lib/libvmmapi/vmmapi.c @@ -1753,7 +1753,8 @@ VM_ACTIVATE_CPU, VM_GET_CPUS, VM_SUSPEND_CPU, VM_RESUME_CPU, VM_SET_INTINFO, VM_GET_INTINFO, VM_RTC_WRITE, VM_RTC_READ, VM_RTC_SETTIME, VM_RTC_GETTIME, - VM_RESTART_INSTRUCTION, VM_SET_TOPOLOGY, VM_GET_TOPOLOGY + VM_RESTART_INSTRUCTION, VM_SET_TOPOLOGY, VM_GET_TOPOLOGY, + VM_SNAPSHOT_REQ, VM_RESTORE_TIME }; int diff --git a/usr.sbin/bhyve/Makefile b/usr.sbin/bhyve/Makefile --- a/usr.sbin/bhyve/Makefile +++ b/usr.sbin/bhyve/Makefile @@ -93,8 +93,7 @@ LIBADD+= casper LIBADD+= cap_pwd LIBADD+= cap_grp -# Temporary disable capsicum, until we integrate checkpoint code with it. -#CFLAGS+=-DWITH_CASPER +CFLAGS+=-DWITH_CASPER .endif .if ${MK_BHYVE_SNAPSHOT} != "no" @@ -122,10 +121,6 @@ CFLAGS+= -I${BHYVE_SYSDIR}/sys/dev/usb/controller .if ${MK_BHYVE_SNAPSHOT} != "no" CFLAGS+= -I${SRCTOP}/contrib/libucl/include - -# Temporary disable capsicum, until we integrate checkpoint code with it. -CFLAGS+= -DWITHOUT_CAPSICUM - CFLAGS+= -DBHYVE_SNAPSHOT .endif diff --git a/usr.sbin/bhyve/bhyverun.c b/usr.sbin/bhyve/bhyverun.c --- a/usr.sbin/bhyve/bhyverun.c +++ b/usr.sbin/bhyve/bhyverun.c @@ -1562,6 +1562,17 @@ */ setproctitle("%s", vmname); +#ifdef BHYVE_SNAPSHOT + /* initialize mutex/cond variables */ + init_snapshot(); + + /* + * checkpointing thread for communication with bhyvectl + */ + if (init_checkpoint_thread(ctx) < 0) + errx(EX_OSERR, "Failed to start checkpoint thread"); +#endif + #ifndef WITHOUT_CAPSICUM caph_cache_catpages(); @@ -1573,18 +1584,10 @@ #endif #ifdef BHYVE_SNAPSHOT - /* initialize mutex/cond variables */ - init_snapshot(); - - /* - * checkpointing thread for communication with bhyvectl - */ - if (init_checkpoint_thread(ctx) < 0) - printf("Failed to start checkpoint thread!\r\n"); - if (restore_file != NULL) { destroy_restore_state(&rstate); - vm_restore_time(ctx); + if (vm_restore_time(ctx) < 0) + err(EX_OSERR, "Unable to restore time"); for (int i = 0; i < guest_ncpus; i++) { if (i == BSP) diff --git a/usr.sbin/bhyve/block_if.c b/usr.sbin/bhyve/block_if.c --- a/usr.sbin/bhyve/block_if.c +++ b/usr.sbin/bhyve/block_if.c @@ -1009,7 +1009,7 @@ pthread_cond_wait(&bc->bc_work_done_cond, &bc->bc_mtx); pthread_mutex_unlock(&bc->bc_mtx); - if (blockif_flush_bc(bc)) + if (!bc->bc_rdonly && blockif_flush_bc(bc)) fprintf(stderr, "%s: [WARN] failed to flush backing file.\r\n", __func__); } diff --git a/usr.sbin/bhyve/snapshot.c b/usr.sbin/bhyve/snapshot.c --- a/usr.sbin/bhyve/snapshot.c +++ b/usr.sbin/bhyve/snapshot.c @@ -1314,9 +1314,10 @@ } static int -vm_checkpoint(struct vmctx *ctx, const char *checkpoint_file, bool stop_vm) +vm_checkpoint(struct vmctx *ctx, int fddir, const char *checkpoint_file, + bool stop_vm) { - int fd_checkpoint = 0, kdata_fd = 0; + int fd_checkpoint = 0, kdata_fd = 0, fd_meta; int ret = 0; int error = 0; size_t memsz; @@ -1331,14 +1332,16 @@ return (-1); } - kdata_fd = open(kdata_filename, O_WRONLY | O_CREAT | O_TRUNC, 0700); + kdata_fd = openat(fddir, kdata_filename, O_WRONLY | O_CREAT | O_TRUNC, 0700); if (kdata_fd < 0) { - perror("Failed to open kernel data snapshot file."); + fprintf(stderr, + "Failed to open kernel data snapshot file. fd %d file %s\n", + fddir, checkpoint_file); error = -1; goto done; } - fd_checkpoint = open(checkpoint_file, O_RDWR | O_CREAT | O_TRUNC, 0700); + fd_checkpoint = openat(fddir, checkpoint_file, O_RDWR | O_CREAT | O_TRUNC, 0700); if (fd_checkpoint < 0) { perror("Failed to create checkpoint file"); @@ -1352,9 +1355,11 @@ goto done; } - meta_file = fopen(meta_filename, "w"); + fd_meta = openat(fddir, meta_filename, O_WRONLY | O_CREAT | O_TRUNC, 0700); + meta_file = fdopen(fd_meta, "w"); if (meta_file == NULL) { perror("Failed to open vm metadata snapshot file."); + close(fd_meta); goto done; } @@ -1480,10 +1485,13 @@ int error; if (!nvlist_exists_string(nvl, "filename") || - !nvlist_exists_bool(nvl, "suspend")) + !nvlist_exists_bool(nvl, "suspend") || + !nvlist_exists_descriptor(nvl, "fddir")) error = EINVAL; else - error = vm_checkpoint(ctx, nvlist_get_string(nvl, "filename"), + error = vm_checkpoint(ctx, + nvlist_get_descriptor(nvl, "fddir"), + nvlist_get_string(nvl, "filename"), nvlist_get_bool(nvl, "suspend")); return (error); @@ -1517,7 +1525,9 @@ int socket_fd; pthread_t checkpoint_pthread; int err; - +#ifndef WITHOUT_CAPSICUM + cap_rights_t rights; +#endif memset(&addr, 0, sizeof(addr)); socket_fd = socket(PF_UNIX, SOCK_STREAM, 0); @@ -1547,6 +1557,14 @@ goto fail; } +#ifndef WITHOUT_CAPSICUM + cap_rights_init(&rights, CAP_ACCEPT, CAP_READ, CAP_RECV, CAP_WRITE, + CAP_SEND, CAP_GETSOCKOPT); + + if (caph_rights_limit(socket_fd, &rights) == -1) + errx(EX_OSERR, "Unable to apply rights for sandbox"); +#endif + checkpoint_info = calloc(1, sizeof(*checkpoint_info)); checkpoint_info->ctx = ctx; checkpoint_info->socket_fd = socket_fd; diff --git a/usr.sbin/bhyvectl/bhyvectl.c b/usr.sbin/bhyvectl/bhyvectl.c --- a/usr.sbin/bhyvectl/bhyvectl.c +++ b/usr.sbin/bhyvectl/bhyvectl.c @@ -1716,6 +1716,34 @@ return (err); } +static int +open_directory(const char *file) +{ + char *path; + int fd; + + if ((path = strdup(file)) == NULL) + return (-ENOMEM); + + dirname(path); + fd = open(path, O_DIRECTORY); + free(path); + + return (fd >= 0 ? fd : -errno); +} + +static const char * +last_name(const char *path) +{ + const char *p; + + p = strrchr(path, '/'); + if (p == NULL) + return (path); + + return (p + 1); +} + static int snapshot_request(const char *vmname, const char *file, bool suspend) { @@ -1723,8 +1751,9 @@ nvl = nvlist_create(0); nvlist_add_string(nvl, "cmd", "checkpoint"); - nvlist_add_string(nvl, "filename", file); + nvlist_add_string(nvl, "filename", last_name(file)); nvlist_add_bool(nvl, "suspend", suspend); + nvlist_move_descriptor(nvl, "fddir", open_directory(file)); return (send_message(vmname, nvl)); }