Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F139407544
D34547.id103818.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
12 KB
Referenced Files
None
Subscribers
None
D34547.id103818.diff
View Options
Index: Makefile.inc1
===================================================================
--- Makefile.inc1
+++ Makefile.inc1
@@ -2778,7 +2778,8 @@
-DNO_SHARED \
-DNO_CPU_CFLAGS \
-DNO_PIC \
- MK_CASPER=no \
+ SSP_CFLAGS= \
+ MK_CASPER=yes \
MK_CLANG_EXTRAS=no \
MK_CLANG_FORMAT=no \
MK_CLANG_FULL=no \
Index: lib/libvmmapi/vmmapi.c
===================================================================
--- lib/libvmmapi/vmmapi.c
+++ lib/libvmmapi/vmmapi.c
@@ -1719,7 +1719,7 @@
VM_ACTIVATE_CPU, VM_GET_CPUS, VM_SUSPEND_CPU, VM_RESUME_CPU,
VM_SET_INTINFO, VM_GET_INTINFO,
VM_RTC_WRITE, VM_RTC_READ, VM_RTC_SETTIME, VM_RTC_GETTIME,
- VM_RESTART_INSTRUCTION, VM_SET_TOPOLOGY, VM_GET_TOPOLOGY };
+ VM_RESTART_INSTRUCTION, VM_SET_TOPOLOGY, VM_GET_TOPOLOGY, VM_SNAPSHOT_REQ };
if (len == NULL) {
cmds = malloc(sizeof(vm_ioctl_cmds));
Index: usr.sbin/bhyve/Makefile
===================================================================
--- usr.sbin/bhyve/Makefile
+++ usr.sbin/bhyve/Makefile
@@ -91,8 +91,9 @@
LIBADD+= casper
LIBADD+= cap_pwd
LIBADD+= cap_grp
+LIBADD+= cap_sysctl
# Temporary disable capsicum, until we integrate checkpoint code with it.
-#CFLAGS+=-DWITH_CASPER
+CFLAGS+=-DWITH_CASPER
.endif
.if ${MK_BHYVE_SNAPSHOT} != "no"
@@ -122,7 +123,7 @@
CFLAGS+= -I${SRCTOP}/contrib/libucl/include
# Temporary disable capsicum, until we integrate checkpoint code with it.
-CFLAGS+= -DWITHOUT_CAPSICUM
+#CFLAGS+= -DWITHOUT_CAPSICUM
CFLAGS+= -DBHYVE_SNAPSHOT
.endif
Index: usr.sbin/bhyve/bhyverun.c
===================================================================
--- usr.sbin/bhyve/bhyverun.c
+++ usr.sbin/bhyve/bhyverun.c
@@ -34,6 +34,8 @@
#include <sys/types.h>
#ifndef WITHOUT_CAPSICUM
#include <sys/capsicum.h>
+#include <libcasper.h>
+#include <casper/cap_sysctl.h>
#endif
#include <sys/mman.h>
#ifdef BHYVE_SNAPSHOT
@@ -1124,6 +1126,7 @@
if (reinit) {
error = vm_reinit(ctx);
if (error) {
+ fprintf(stderr, "%s: error code is %d\r\n", __func__, error);
perror("vm_reinit");
exit(4);
}
@@ -1244,6 +1247,8 @@
restore_file = NULL;
#endif
+ cap_channel_t *capcas;
+
init_config();
set_defaults();
@@ -1547,15 +1552,10 @@
*/
setproctitle("%s", vmname);
-#ifndef WITHOUT_CAPSICUM
- caph_cache_catpages();
-
- if (caph_limit_stdout() == -1 || caph_limit_stderr() == -1)
- errx(EX_OSERR, "Unable to apply rights for sandbox");
-
- if (caph_enter() == -1)
- errx(EX_OSERR, "cap_enter() failed");
-#endif
+ /* Open capability to Casper. */
+ capcas = cap_init();
+ if (capcas == NULL)
+ errx(EX_OSERR, "cap_init() failed");
#ifdef BHYVE_SNAPSHOT
if (restore_file != NULL)
@@ -1567,13 +1567,23 @@
/*
* checkpointing thread for communication with bhyvectl
*/
- if (init_checkpoint_thread(ctx) < 0)
+ if (init_checkpoint_thread(ctx, capcas) < 0)
printf("Failed to start checkpoint thread!\r\n");
if (restore_file != NULL)
vm_restore_time(ctx);
#endif
+#ifndef WITHOUT_CAPSICUM
+ caph_cache_catpages();
+
+ if (caph_limit_stdout() == -1 || caph_limit_stderr() == -1)
+ errx(EX_OSERR, "Unable to apply rights for sandbox");
+
+ if (caph_enter() == -1)
+ errx(EX_OSERR, "cap_enter() failed");
+#endif
+
/*
* Add CPU 0
*/
Index: usr.sbin/bhyve/snapshot.h
===================================================================
--- usr.sbin/bhyve/snapshot.h
+++ usr.sbin/bhyve/snapshot.h
@@ -42,6 +42,11 @@
#include <libxo/xo.h>
#include <ucl.h>
+#ifndef WITHOUT_CAPSICUM
+#include <libcasper.h>
+#include <casper/cap_sysctl.h>
+#endif
+
#define BHYVE_RUN_DIR "/var/run/bhyve/"
#define MAX_SNAPSHOT_FILENAME PATH_MAX
@@ -63,6 +68,7 @@
struct checkpoint_thread_info {
struct vmctx *ctx;
int socket_fd;
+ cap_channel_t *channel;
};
typedef int (*vm_snapshot_dev_cb)(struct vm_snapshot_meta *);
@@ -101,8 +107,8 @@
int get_checkpoint_msg(int conn_fd, struct vmctx *ctx);
void *checkpoint_thread(void *param);
-int init_checkpoint_thread(struct vmctx *ctx);
void init_snapshot(void);
+int init_checkpoint_thread(struct vmctx *ctx, cap_channel_t *chn);
int load_restore_file(const char *filename, struct restore_state *rstate);
Index: usr.sbin/bhyve/snapshot.c
===================================================================
--- usr.sbin/bhyve/snapshot.c
+++ usr.sbin/bhyve/snapshot.c
@@ -171,6 +171,7 @@
static pthread_cond_t vcpus_idle, vcpus_can_run;
static bool checkpoint_active;
+static int cdir_fd = -1;
/*
* TODO: Harden this function and all of its callers since 'base_str' is a user
* provided string.
@@ -224,6 +225,29 @@
ucl_parser_free(rstate->meta_parser);
}
+#ifndef WITHOUT_CAPSICUM
+static void
+limit_vmmem_socket(int s)
+{
+ cap_rights_t rights;
+
+ cap_rights_init(&rights, CAP_FSTAT, CAP_MMAP_R, CAP_IOCTL, CAP_READ);
+ if (caph_rights_limit(s, &rights) == -1)
+ errx(EX_OSERR, "Unable to apply rights for sandbox");
+}
+
+static void
+limit_kernel_socket(int s)
+{
+ cap_rights_t rights;
+
+ cap_rights_init(&rights, CAP_FSTAT, CAP_MMAP_R, CAP_READ);
+ if (caph_rights_limit(s, &rights) == -1)
+ errx(EX_OSERR, "Unable to apply rights for sandbox");
+}
+
+#endif
+
static int
load_vmmem_file(const char *filename, struct restore_state *rstate)
{
@@ -236,6 +260,10 @@
return (-1);
}
+#ifndef WITHOUT_CAPSICUM
+ limit_vmmem_socket(rstate->vmmem_fd);
+#endif
+
err = fstat(rstate->vmmem_fd, &sb);
if (err < 0) {
perror("Failed to stat restore file");
@@ -269,6 +297,10 @@
return (-1);
}
+#ifndef WITHOUT_CAPSICUM
+ limit_kernel_socket(rstate->kdata_fd);
+#endif
+
err = fstat(rstate->kdata_fd, &sb);
if (err < 0) {
perror("Failed to stat kernel data file");
@@ -825,7 +857,6 @@
fprintf(stderr, "%s: Could not %s lowmem\r\n",
__func__, op_wr ? "write" : "read");
totalmem = 0;
- goto done;
}
if (highmem == 0)
@@ -1304,10 +1335,16 @@
static void
vm_vcpu_pause(struct vmctx *ctx)
{
+ int err;
pthread_mutex_lock(&vcpu_lock);
checkpoint_active = true;
- vm_suspend_cpu(ctx, -1);
+ err = vm_suspend_cpu(ctx, -1);
+ if (err != 0) {
+ fprintf(stderr, "%s: Could not suspend vcpus\r\n", __func__);
+ pthread_mutex_unlock(&vcpu_lock);
+ return;
+ }
while (CPU_CMP(&vcpus_active, &vcpus_suspended) != 0)
pthread_cond_wait(&vcpus_idle, &vcpu_lock);
pthread_mutex_unlock(&vcpu_lock);
@@ -1324,10 +1361,42 @@
pthread_cond_broadcast(&vcpus_can_run);
}
+#ifndef WITHOUT_CAPSICUM
+#define DESTROY(vm, ch, err, LABEL) \
+do { \
+ cap_channel_t *capsysctl = NULL; \
+ char *name = "hw.vmm.destroy"; \
+ void *limit; \
+ \
+ /* Create capability to the system.sysctl service with Casper. */ \
+ capsysctl = cap_service_open(ch, "system.sysctl"); \
+ if (capsysctl == NULL) \
+ fprintf(stderr, "%s: Unable to open system.sysctl service", __func__); \
+ \
+ cap_close(ch); \
+ \
+ /* Create limit for one MIB with write access only. */ \
+ limit = cap_sysctl_limit_init(capsysctl); \
+ (void)cap_sysctl_limit_name(limit, name, CAP_SYSCTL_WRITE); \
+ \
+ /* Limit system.sysctl. */ \
+ if (cap_sysctl_limit(limit) < 0) \
+ fprintf(stderr, "%s: Unable to set limits", __func__); \
+ \
+ err = cap_sysctlbyname(capsysctl, name, NULL, NULL, (vm), strlen((vm))); \
+ \
+ cap_close(capsysctl); \
+ if (err != 0) { \
+ fprintf(stderr, "%s: err is %d\r\n", __func__, errno); \
+ goto LABEL; \
+ } \
+} while(0)
+#endif
+
static int
-vm_checkpoint(struct vmctx *ctx, const char *checkpoint_file, bool stop_vm)
+vm_checkpoint(struct vmctx *ctx, char *checkpoint_file, cap_channel_t *chn, bool stop_vm)
{
- int fd_checkpoint = 0, kdata_fd = 0;
+ int fd_checkpoint = 0, kdata_fd = 0, meta_fd = 0;
int ret = 0;
int error = 0;
size_t memsz;
@@ -1335,6 +1404,7 @@
char *meta_filename = NULL;
char *kdata_filename = NULL;
FILE *meta_file = NULL;
+ char vmname[MAX_VMNAME];
kdata_filename = strcat_extension(checkpoint_file, ".kern");
if (kdata_filename == NULL) {
@@ -1342,15 +1412,14 @@
return (-1);
}
- kdata_fd = open(kdata_filename, O_WRONLY | O_CREAT | O_TRUNC, 0700);
+ kdata_fd = openat(cdir_fd, kdata_filename, O_WRONLY | O_CREAT | O_TRUNC, 0700);
if (kdata_fd < 0) {
perror("Failed to open kernel data snapshot file.");
error = -1;
goto done;
}
- fd_checkpoint = open(checkpoint_file, O_RDWR | O_CREAT | O_TRUNC, 0700);
-
+ fd_checkpoint = openat(cdir_fd, checkpoint_file, O_RDWR | O_CREAT | O_TRUNC, 0700);
if (fd_checkpoint < 0) {
perror("Failed to create checkpoint file");
error = -1;
@@ -1363,7 +1432,12 @@
goto done;
}
- meta_file = fopen(meta_filename, "w");
+ meta_fd = openat(cdir_fd, meta_filename, O_WRONLY | O_CREAT | O_TRUNC, 0700);
+ if (meta_fd < 0) {
+ perror("Failed to open vm metadata snapshot file descriptor.");
+ goto done;
+ }
+ meta_file = fdopen(meta_fd, "w");
if (meta_file == NULL) {
perror("Failed to open vm metadata snapshot file.");
goto done;
@@ -1382,7 +1456,8 @@
fprintf(stderr, "Could not pause devices\r\n");
error = ret;
goto done;
- }
+ if (cdir_fd > 0)
+ close(cdir_fd);}
memsz = vm_snapshot_mem(ctx, fd_checkpoint, 0, true);
if (memsz == 0) {
@@ -1398,7 +1473,6 @@
goto done;
}
-
ret = vm_snapshot_kern_structs(ctx, kdata_fd, xop);
if (ret != 0) {
fprintf(stderr, "Failed to snapshot vm kernel data.\n");
@@ -1415,8 +1489,19 @@
xo_finish_h(xop);
+
if (stop_vm) {
- vm_destroy(ctx);
+ if (chn != NULL) {
+ error = vm_get_name(ctx, vmname, MAX_VMNAME - 1);
+ if (error != 0) {
+ fprintf(stderr, "%s: Failed to get VM name", __func__);
+ goto done;
+ }
+ DESTROY(vmname, chn, error, done);
+ free(ctx);
+ } else
+ vm_destroy(ctx);
+
exit(0);
}
@@ -1437,11 +1522,13 @@
fclose(meta_file);
if (kdata_fd > 0)
close(kdata_fd);
+ if (cdir_fd > 0)
+ close(cdir_fd);
return (error);
}
-static int
-handle_message(struct vmctx *ctx, nvlist_t *nvl)
+int
+handle_message(struct vmctx *ctx, nvlist_t *nvl, cap_channel_t *chn)
{
int err;
const char *cmd;
@@ -1456,10 +1543,10 @@
err = -1;
else
err = vm_checkpoint(ctx, nvlist_get_string(nvl, "filename"),
- nvlist_get_bool(nvl, "suspend"));
+ chn, nvlist_get_bool(nvl, "suspend"));
} else {
- EPRINTLN("Unrecognized checkpoint operation\n");
- err = -1;
+ EPRINTLN("Unrecognized checkpoint operation\n");
+ err = -1;
}
if (err != 0)
@@ -1482,9 +1569,14 @@
thread_info = (struct checkpoint_thread_info *)param;
for (;;) {
- nvl = nvlist_recv(thread_info->socket_fd, 0);
- if (nvl != NULL)
- handle_message(thread_info->ctx, nvl);
+ n = recvfrom(thread_info->socket_fd, &imsg, sizeof(imsg), 0, NULL, 0);
+
+ /*
+ * slight sanity check: see if there's enough data to at
+ * least determine the type of message.
+ */
+ if (n >= sizeof(imsg.code))
+ handle_message(&imsg, thread_info->ctx, thread_info->channel);
else
EPRINTLN("nvlist_recv() failed: %s", strerror(errno));
}
@@ -1508,18 +1600,42 @@
errc(1, err, "checkpoint cv init (vcpus_can_run)");
}
+#ifndef WITHOUT_CAPSICUM
+static void
+limit_control_socket(int s)
+{
+ cap_rights_t rights;
+
+ cap_rights_init(&rights, CAP_BIND, CAP_READ);
+ if (caph_rights_limit(s, &rights) == -1)
+ errx(EX_OSERR, "Unable to apply rights for sandbox");
+}
+
+static void
+limit_file_operations()
+{
+ cap_rights_t rights;
+
+ cap_rights_init(&rights, CAP_LOOKUP, CAP_FTRUNCATE, CAP_PWRITE, CAP_PREAD, CAP_FCNTL, CAP_CREATE);
+ if (caph_rights_limit(cdir_fd, &rights) == -1)
+ errx(EX_OSERR, "Unable to apply rights for sandbox");
+}
+
+#endif
+
/*
* Create the listening socket for IPC with bhyvectl
*/
int
-init_checkpoint_thread(struct vmctx *ctx)
+init_checkpoint_thread(struct vmctx *ctx, cap_channel_t *chn)
{
struct checkpoint_thread_info *checkpoint_info = NULL;
struct sockaddr_un addr;
int socket_fd;
pthread_t checkpoint_pthread;
char vmname_buf[MAX_VMNAME];
- int err;
+ int ret, err = 0;
+ char *cdir_name;
memset(&addr, 0, sizeof(addr));
@@ -1530,6 +1646,18 @@
goto fail;
}
+ cdir_name = getcwd(NULL, 0);
+ cdir_fd = open(cdir_name, O_RDONLY | O_DIRECTORY);
+ if (cdir_fd < 0) {
+ perror("Failed to open working directory.");
+ err = -1;
+ goto fail;
+ }
+ free(cdir_name);
+#ifndef WITHOUT_CAPSICUM
+ limit_control_socket(socket_fd);
+ limit_file_operations();
+#endif
addr.sun_family = AF_UNIX;
err = vm_get_name(ctx, vmname_buf, MAX_VMNAME - 1);
@@ -1543,6 +1671,7 @@
addr.sun_len = SUN_LEN(&addr);
unlink(addr.sun_path);
+
if (bind(socket_fd, (struct sockaddr *)&addr, addr.sun_len) != 0) {
EPRINTLN("Failed to bind socket \"%s\": %s\n",
addr.sun_path, strerror(errno));
@@ -1553,6 +1682,7 @@
checkpoint_info = calloc(1, sizeof(*checkpoint_info));
checkpoint_info->ctx = ctx;
checkpoint_info->socket_fd = socket_fd;
+ checkpoint_info->channel = chn;
err = pthread_create(&checkpoint_pthread, NULL, checkpoint_thread,
checkpoint_info);
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Fri, Dec 12, 6:20 PM (6 h, 43 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
26907730
Default Alt Text
D34547.id103818.diff (12 KB)
Attached To
Mode
D34547: bhyve - snapshot capsicum integration[Part 1]
Attached
Detach File
Event Timeline
Log In to Comment