Page MenuHomeFreeBSD

D34547.id103823.diff
No OneTemporary

D34547.id103823.diff

Index: Makefile.inc1
===================================================================
--- Makefile.inc1
+++ Makefile.inc1
@@ -2778,7 +2778,8 @@
-DNO_SHARED \
-DNO_CPU_CFLAGS \
-DNO_PIC \
- MK_CASPER=no \
+ SSP_CFLAGS= \
+ MK_CASPER=yes \
MK_CLANG_EXTRAS=no \
MK_CLANG_FORMAT=no \
MK_CLANG_FULL=no \
Index: lib/libvmmapi/vmmapi.c
===================================================================
--- lib/libvmmapi/vmmapi.c
+++ lib/libvmmapi/vmmapi.c
@@ -1719,7 +1719,7 @@
VM_ACTIVATE_CPU, VM_GET_CPUS, VM_SUSPEND_CPU, VM_RESUME_CPU,
VM_SET_INTINFO, VM_GET_INTINFO,
VM_RTC_WRITE, VM_RTC_READ, VM_RTC_SETTIME, VM_RTC_GETTIME,
- VM_RESTART_INSTRUCTION, VM_SET_TOPOLOGY, VM_GET_TOPOLOGY };
+ VM_RESTART_INSTRUCTION, VM_SET_TOPOLOGY, VM_GET_TOPOLOGY, VM_SNAPSHOT_REQ };
if (len == NULL) {
cmds = malloc(sizeof(vm_ioctl_cmds));
Index: usr.sbin/bhyve/Makefile
===================================================================
--- usr.sbin/bhyve/Makefile
+++ usr.sbin/bhyve/Makefile
@@ -91,8 +91,9 @@
LIBADD+= casper
LIBADD+= cap_pwd
LIBADD+= cap_grp
+LIBADD+= cap_sysctl
# Temporary disable capsicum, until we integrate checkpoint code with it.
-#CFLAGS+=-DWITH_CASPER
+CFLAGS+=-DWITH_CASPER
.endif
.if ${MK_BHYVE_SNAPSHOT} != "no"
@@ -121,9 +122,6 @@
.if ${MK_BHYVE_SNAPSHOT} != "no"
CFLAGS+= -I${SRCTOP}/contrib/libucl/include
-# Temporary disable capsicum, until we integrate checkpoint code with it.
-CFLAGS+= -DWITHOUT_CAPSICUM
-
CFLAGS+= -DBHYVE_SNAPSHOT
.endif
Index: usr.sbin/bhyve/bhyverun.c
===================================================================
--- usr.sbin/bhyve/bhyverun.c
+++ usr.sbin/bhyve/bhyverun.c
@@ -34,6 +34,8 @@
#include <sys/types.h>
#ifndef WITHOUT_CAPSICUM
#include <sys/capsicum.h>
+#include <libcasper.h>
+#include <casper/cap_sysctl.h>
#endif
#include <sys/mman.h>
#ifdef BHYVE_SNAPSHOT
@@ -1227,6 +1229,44 @@
set_config_bool("x86.strictmsr", true);
}
+#ifndef WITHOUT_CAPSICUM
+char *get_ckp_path(char *str)
+{
+ char *strcopy;
+ char *aux1, *aux2, *aux3, *aux4, *aux5;
+ char *path = NULL;
+ const char delim[2] = ",";
+
+ strcopy = strdup(str);
+ assert(strcopy != NULL);
+
+
+ aux1 = strtok(strcopy, delim);
+ aux2 = strtok(NULL, delim);
+ aux3 = strtok(NULL, delim);
+ if (aux3 != NULL) {
+ if (strcmp(aux3, "virtio-blk") ||
+ strcmp(aux3, "ahci-hd") ||
+ strcmp(aux3, "ahci")) {
+
+ aux4 = realpath(aux3, NULL);
+ if (aux4 != NULL) {
+ aux5 = strrchr(aux4, '/');
+ if (aux5 != NULL) {
+ *aux5 = '\0';
+ path = strdup(aux4);
+ }
+ free(aux4);
+ }
+ }
+ }
+
+ free(strcopy);
+
+ return path;
+}
+#endif
+
int
main(int argc, char *argv[])
{
@@ -1244,6 +1284,8 @@
restore_file = NULL;
#endif
+ cap_channel_t *capcas = NULL;
+ char *ckp_path = NULL;
init_config();
set_defaults();
@@ -1309,8 +1351,13 @@
exit(0);
} else if (pci_parse_slot(optarg) != 0)
exit(4);
- else
+ else {
+#ifndef WITHOUT_CAPSISCUM
+ if (ckp_path == NULL)
+ ckp_path = get_ckp_path(optarg);
+#endif
break;
+ }
case 'S':
set_config_bool("memory.wired", true);
break;
@@ -1548,13 +1595,10 @@
setproctitle("%s", vmname);
#ifndef WITHOUT_CAPSICUM
- caph_cache_catpages();
-
- if (caph_limit_stdout() == -1 || caph_limit_stderr() == -1)
- errx(EX_OSERR, "Unable to apply rights for sandbox");
-
- if (caph_enter() == -1)
- errx(EX_OSERR, "cap_enter() failed");
+ /* Open capability to Casper. */
+ capcas = cap_init();
+ if (capcas == NULL)
+ errx(EX_OSERR, "cap_init() failed");
#endif
#ifdef BHYVE_SNAPSHOT
@@ -1567,13 +1611,24 @@
/*
* checkpointing thread for communication with bhyvectl
*/
- if (init_checkpoint_thread(ctx) < 0)
+ if (init_checkpoint_thread(ctx, ckp_path, capcas) < 0)
printf("Failed to start checkpoint thread!\r\n");
if (restore_file != NULL)
vm_restore_time(ctx);
#endif
+#ifndef WITHOUT_CAPSICUM
+ free(ckp_path);
+ caph_cache_catpages();
+
+ if (caph_limit_stdout() == -1 || caph_limit_stderr() == -1)
+ errx(EX_OSERR, "Unable to apply rights for sandbox");
+
+ if (caph_enter() == -1)
+ errx(EX_OSERR, "cap_enter() failed");
+#endif
+
/*
* Add CPU 0
*/
Index: usr.sbin/bhyve/snapshot.h
===================================================================
--- usr.sbin/bhyve/snapshot.h
+++ usr.sbin/bhyve/snapshot.h
@@ -42,6 +42,11 @@
#include <libxo/xo.h>
#include <ucl.h>
+#ifndef WITHOUT_CAPSICUM
+#include <libcasper.h>
+#include <casper/cap_sysctl.h>
+#endif
+
#define BHYVE_RUN_DIR "/var/run/bhyve/"
#define MAX_SNAPSHOT_FILENAME PATH_MAX
@@ -63,6 +68,7 @@
struct checkpoint_thread_info {
struct vmctx *ctx;
int socket_fd;
+ cap_channel_t *channel;
};
typedef int (*vm_snapshot_dev_cb)(struct vm_snapshot_meta *);
@@ -101,8 +107,8 @@
int get_checkpoint_msg(int conn_fd, struct vmctx *ctx);
void *checkpoint_thread(void *param);
-int init_checkpoint_thread(struct vmctx *ctx);
void init_snapshot(void);
+int init_checkpoint_thread(struct vmctx *ctx, char *ckp_path, cap_channel_t *chn);
int load_restore_file(const char *filename, struct restore_state *rstate);
Index: usr.sbin/bhyve/snapshot.c
===================================================================
--- usr.sbin/bhyve/snapshot.c
+++ usr.sbin/bhyve/snapshot.c
@@ -171,6 +171,7 @@
static pthread_cond_t vcpus_idle, vcpus_can_run;
static bool checkpoint_active;
+static int cdir_fd = AT_FDCWD;
/*
* TODO: Harden this function and all of its callers since 'base_str' is a user
* provided string.
@@ -224,6 +225,38 @@
ucl_parser_free(rstate->meta_parser);
}
+#ifndef WITHOUT_CAPSICUM
+static void
+limit_vmmem_socket(int s)
+{
+ cap_rights_t rights;
+
+ cap_rights_init(&rights, CAP_FSTAT, CAP_MMAP_R, CAP_IOCTL, CAP_READ);
+ if (caph_rights_limit(s, &rights) == -1)
+ errx(EX_OSERR, "Unable to apply rights for sandbox");
+}
+
+static void
+limit_kernel_socket(int s)
+{
+ cap_rights_t rights;
+
+ cap_rights_init(&rights, CAP_FSTAT, CAP_MMAP_R, CAP_READ);
+ if (caph_rights_limit(s, &rights) == -1)
+ errx(EX_OSERR, "Unable to apply rights for sandbox");
+}
+
+static void
+limit_metadata_socket(int s)
+{
+ cap_rights_t rights;
+
+ cap_rights_init(&rights, CAP_FSTAT, CAP_MMAP_R, CAP_READ);
+ if (caph_rights_limit(s, &rights) == -1)
+ errx(EX_OSERR, "Unable to apply rights for sandbox");
+}
+#endif
+
static int
load_vmmem_file(const char *filename, struct restore_state *rstate)
{
@@ -236,6 +269,10 @@
return (-1);
}
+#ifndef WITHOUT_CAPSICUM
+ limit_vmmem_socket(rstate->vmmem_fd);
+#endif
+
err = fstat(rstate->vmmem_fd, &sb);
if (err < 0) {
perror("Failed to stat restore file");
@@ -269,6 +306,10 @@
return (-1);
}
+#ifndef WITHOUT_CAPSICUM
+ limit_kernel_socket(rstate->kdata_fd);
+#endif
+
err = fstat(rstate->kdata_fd, &sb);
if (err < 0) {
perror("Failed to stat kernel data file");
@@ -301,6 +342,7 @@
{
const ucl_object_t *obj;
struct ucl_parser *parser;
+ int md_fd = -1;
int err;
parser = ucl_parser_new(UCL_PARSER_DEFAULT);
@@ -309,7 +351,13 @@
goto err_load_metadata;
}
- err = ucl_parser_add_file(parser, filename);
+ md_fd = open(filename, O_RDONLY);
+
+#ifndef WITHOUT_CAPSICUM
+ limit_metadata_socket(md_fd);
+#endif
+
+ err = ucl_parser_add_fd(parser, md_fd);
if (err == 0) {
fprintf(stderr, "Failed to parse metadata file: '%s'\n",
filename);
@@ -330,6 +378,8 @@
return (0);
err_load_metadata:
+ if (md_fd > 0)
+ close(md_fd);
if (parser != NULL)
ucl_parser_free(parser);
return (err);
@@ -1304,10 +1354,16 @@
static void
vm_vcpu_pause(struct vmctx *ctx)
{
+ int err;
pthread_mutex_lock(&vcpu_lock);
checkpoint_active = true;
- vm_suspend_cpu(ctx, -1);
+ err = vm_suspend_cpu(ctx, -1);
+ if (err != 0) {
+ fprintf(stderr, "%s: Could not suspend vcpus\r\n", __func__);
+ pthread_mutex_unlock(&vcpu_lock);
+ return;
+ }
while (CPU_CMP(&vcpus_active, &vcpus_suspended) != 0)
pthread_cond_wait(&vcpus_idle, &vcpu_lock);
pthread_mutex_unlock(&vcpu_lock);
@@ -1324,10 +1380,42 @@
pthread_cond_broadcast(&vcpus_can_run);
}
+#ifndef WITHOUT_CAPSICUM
+#define DESTROY(vm, ch, err, LABEL) \
+do { \
+ cap_channel_t *capsysctl = NULL; \
+ char *name = "hw.vmm.destroy"; \
+ void *limit; \
+ \
+ /* Create capability to the system.sysctl service with Casper. */ \
+ capsysctl = cap_service_open(ch, "system.sysctl"); \
+ if (capsysctl == NULL) \
+ fprintf(stderr, "%s: Unable to open system.sysctl service", __func__); \
+ \
+ cap_close(ch); \
+ \
+ /* Create limit for one MIB with write access only. */ \
+ limit = cap_sysctl_limit_init(capsysctl); \
+ (void)cap_sysctl_limit_name(limit, name, CAP_SYSCTL_WRITE); \
+ \
+ /* Limit system.sysctl. */ \
+ if (cap_sysctl_limit(limit) < 0) \
+ fprintf(stderr, "%s: Unable to set limits", __func__); \
+ \
+ err = cap_sysctlbyname(capsysctl, name, NULL, NULL, (vm), strlen((vm))); \
+ \
+ cap_close(capsysctl); \
+ if (err != 0) { \
+ fprintf(stderr, "%s: err is %d\r\n", __func__, errno); \
+ goto LABEL; \
+ } \
+} while(0)
+#endif
+
static int
-vm_checkpoint(struct vmctx *ctx, const char *checkpoint_file, bool stop_vm)
+vm_checkpoint(struct vmctx *ctx, char *checkpoint_file, cap_channel_t *chn, bool stop_vm)
{
- int fd_checkpoint = 0, kdata_fd = 0;
+ int fd_checkpoint = 0, kdata_fd = 0, meta_fd = 0;
int ret = 0;
int error = 0;
size_t memsz;
@@ -1335,6 +1423,8 @@
char *meta_filename = NULL;
char *kdata_filename = NULL;
FILE *meta_file = NULL;
+ char vmname[MAX_VMNAME];
+
kdata_filename = strcat_extension(checkpoint_file, ".kern");
if (kdata_filename == NULL) {
@@ -1342,15 +1432,14 @@
return (-1);
}
- kdata_fd = open(kdata_filename, O_WRONLY | O_CREAT | O_TRUNC, 0700);
+ kdata_fd = openat(cdir_fd, kdata_filename, O_WRONLY | O_CREAT | O_TRUNC, 0700);
if (kdata_fd < 0) {
perror("Failed to open kernel data snapshot file.");
error = -1;
goto done;
}
- fd_checkpoint = open(checkpoint_file, O_RDWR | O_CREAT | O_TRUNC, 0700);
-
+ fd_checkpoint = openat(cdir_fd, checkpoint_file, O_RDWR | O_CREAT | O_TRUNC, 0700);
if (fd_checkpoint < 0) {
perror("Failed to create checkpoint file");
error = -1;
@@ -1363,7 +1452,12 @@
goto done;
}
- meta_file = fopen(meta_filename, "w");
+ meta_fd = openat(cdir_fd, meta_filename, O_WRONLY | O_CREAT | O_TRUNC, 0700);
+ if (meta_fd < 0) {
+ perror("Failed to open vm metadata snapshot file descriptor.");
+ goto done;
+ }
+ meta_file = fdopen(meta_fd, "w");
if (meta_file == NULL) {
perror("Failed to open vm metadata snapshot file.");
goto done;
@@ -1382,6 +1476,10 @@
fprintf(stderr, "Could not pause devices\r\n");
error = ret;
goto done;
+#ifndef WITHOUT_CAPSICUM
+ if (cdir_fd > 0)
+ close(cdir_fd);
+#endif
}
memsz = vm_snapshot_mem(ctx, fd_checkpoint, 0, true);
@@ -1398,7 +1496,6 @@
goto done;
}
-
ret = vm_snapshot_kern_structs(ctx, kdata_fd, xop);
if (ret != 0) {
fprintf(stderr, "Failed to snapshot vm kernel data.\n");
@@ -1415,8 +1512,19 @@
xo_finish_h(xop);
+
if (stop_vm) {
- vm_destroy(ctx);
+ if (chn != NULL) {
+ error = vm_get_name(ctx, vmname, MAX_VMNAME - 1);
+ if (error != 0) {
+ fprintf(stderr, "%s: Failed to get VM name", __func__);
+ goto done;
+ }
+ DESTROY(vmname, chn, error, done);
+ free(ctx);
+ } else
+ vm_destroy(ctx);
+
exit(0);
}
@@ -1437,11 +1545,15 @@
fclose(meta_file);
if (kdata_fd > 0)
close(kdata_fd);
+#ifndef WITHOUT_CAPSICUM
+ if (cdir_fd > 0)
+ close(cdir_fd);
+#endif
return (error);
}
-static int
-handle_message(struct vmctx *ctx, nvlist_t *nvl)
+int
+handle_message(struct vmctx *ctx, nvlist_t *nvl, cap_channel_t *chn)
{
int err;
const char *cmd;
@@ -1456,10 +1568,10 @@
err = -1;
else
err = vm_checkpoint(ctx, nvlist_get_string(nvl, "filename"),
- nvlist_get_bool(nvl, "suspend"));
+ chn, nvlist_get_bool(nvl, "suspend"));
} else {
- EPRINTLN("Unrecognized checkpoint operation\n");
- err = -1;
+ EPRINTLN("Unrecognized checkpoint operation\n");
+ err = -1;
}
if (err != 0)
@@ -1482,9 +1594,14 @@
thread_info = (struct checkpoint_thread_info *)param;
for (;;) {
- nvl = nvlist_recv(thread_info->socket_fd, 0);
- if (nvl != NULL)
- handle_message(thread_info->ctx, nvl);
+ n = recvfrom(thread_info->socket_fd, &imsg, sizeof(imsg), 0, NULL, 0);
+
+ /*
+ * slight sanity check: see if there's enough data to at
+ * least determine the type of message.
+ */
+ if (n >= sizeof(imsg.code))
+ handle_message(&imsg, thread_info->ctx, thread_info->channel);
else
EPRINTLN("nvlist_recv() failed: %s", strerror(errno));
}
@@ -1508,18 +1625,41 @@
errc(1, err, "checkpoint cv init (vcpus_can_run)");
}
+#ifndef WITHOUT_CAPSICUM
+static void
+limit_control_socket(int s)
+{
+ cap_rights_t rights;
+
+ cap_rights_init(&rights, CAP_BIND, CAP_READ);
+ if (caph_rights_limit(s, &rights) == -1)
+ errx(EX_OSERR, "Unable to apply rights for sandbox");
+}
+
+static void
+limit_file_operations()
+{
+ cap_rights_t rights;
+
+ cap_rights_init(&rights, CAP_LOOKUP, CAP_FTRUNCATE, CAP_PWRITE, CAP_PREAD, CAP_FCNTL, CAP_CREATE);
+ if (caph_rights_limit(cdir_fd, &rights) == -1)
+ errx(EX_OSERR, "Unable to apply rights for sandbox");
+}
+
+#endif
+
/*
* Create the listening socket for IPC with bhyvectl
*/
int
-init_checkpoint_thread(struct vmctx *ctx)
+init_checkpoint_thread(struct vmctx *ctx, char *ckp_path, cap_channel_t *chn)
{
struct checkpoint_thread_info *checkpoint_info = NULL;
struct sockaddr_un addr;
int socket_fd;
pthread_t checkpoint_pthread;
char vmname_buf[MAX_VMNAME];
- int err;
+ int ret, err = 0;
memset(&addr, 0, sizeof(addr));
@@ -1530,6 +1670,17 @@
goto fail;
}
+ if (ckp_path != NULL) {
+ cdir_fd = open(ckp_path, O_RDONLY | O_DIRECTORY);
+ if (cdir_fd < 0) {
+ perror("Failed to open working directory.");
+ err = -1;
+ goto fail;
+ }
+ limit_control_socket(socket_fd);
+ limit_file_operations();
+ }
+
addr.sun_family = AF_UNIX;
err = vm_get_name(ctx, vmname_buf, MAX_VMNAME - 1);
@@ -1553,6 +1704,7 @@
checkpoint_info = calloc(1, sizeof(*checkpoint_info));
checkpoint_info->ctx = ctx;
checkpoint_info->socket_fd = socket_fd;
+ checkpoint_info->channel = chn;
err = pthread_create(&checkpoint_pthread, NULL, checkpoint_thread,
checkpoint_info);

File Metadata

Mime Type
text/plain
Expires
Fri, Apr 17, 11:56 AM (9 h, 19 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
31658258
Default Alt Text
D34547.id103823.diff (14 KB)

Event Timeline