Changeset View
Standalone View
usr.sbin/bhyve/snapshot.c
Show First 20 Lines • Show All 165 Lines • ▼ Show 20 Lines | const struct vm_snapshot_kern_info snapshot_kern_structs[] = { | |||||||||
{ "vrtc", STRUCT_VRTC }, | { "vrtc", STRUCT_VRTC }, | |||||||||
}; | }; | |||||||||
static cpuset_t vcpus_active, vcpus_suspended; | static cpuset_t vcpus_active, vcpus_suspended; | |||||||||
static pthread_mutex_t vcpu_lock; | static pthread_mutex_t vcpu_lock; | |||||||||
static pthread_cond_t vcpus_idle, vcpus_can_run; | static pthread_cond_t vcpus_idle, vcpus_can_run; | |||||||||
static bool checkpoint_active; | static bool checkpoint_active; | |||||||||
static int cdir_fd = -1; | static int cdir_fd = AT_FDCWD; | |||||||||
static cap_channel_t *casper_channel = NULL; | ||||||||||
gusev.vitaliy_gmail.com: why 'capsysctl' is required? | ||||||||||
/* | /* | |||||||||
* TODO: Harden this function and all of its callers since 'base_str' is a user | * TODO: Harden this function and all of its callers since 'base_str' is a user | |||||||||
* provided string. | * provided string. | |||||||||
*/ | */ | |||||||||
static char * | static char * | |||||||||
strcat_extension(const char *base_str, const char *ext) | strcat_extension(const char *base_str, const char *ext) | |||||||||
{ | { | |||||||||
char *res; | char *res; | |||||||||
Show All 37 Lines | if (rstate->vmmem_fd > 0) | |||||||||
close(rstate->vmmem_fd); | close(rstate->vmmem_fd); | |||||||||
if (rstate->meta_root_obj != NULL) | if (rstate->meta_root_obj != NULL) | |||||||||
ucl_object_unref(rstate->meta_root_obj); | ucl_object_unref(rstate->meta_root_obj); | |||||||||
if (rstate->meta_parser != NULL) | if (rstate->meta_parser != NULL) | |||||||||
ucl_parser_free(rstate->meta_parser); | ucl_parser_free(rstate->meta_parser); | |||||||||
} | } | |||||||||
#ifndef WITHOUT_CAPSICUM | #ifndef WITHOUT_CAPSICUM | |||||||||
Not Done Inline ActionsI guess all under ifdef is not really required. gusev.vitaliy_gmail.com: I guess all under ifdef is not really required. | ||||||||||
Not Done Inline ActionsWhy? emaste: Why? | ||||||||||
Not Done Inline ActionsBecause it should work w/o that code. What problem does that code solve? gusev.vitaliy_gmail.com: Because it should work w/o that code. What problem does that code solve? | ||||||||||
Not Done Inline ActionsDo these sockets not remain open in the running bhyve process? The intent is to limit the privileges of any fds that remain open. emaste: Do these sockets not remain open in the running bhyve process? The intent is to limit the… | ||||||||||
Not Done Inline ActionsFor "checkpoint" all fds are closed or unmapped. If consider "restore" currently they are not closed after finished restore, but I believe it is a bug. All fd-s should be unmapped and closed because all data are copied to vmm or kernel and are not required after successful restore. And note, name "socket" also is wrong here - all fd-s are not socket. gusev.vitaliy_gmail.com: For "checkpoint" all fds are closed or unmapped. If consider "restore" currently they are not… | ||||||||||
Not Done Inline ActionsI see that restore path also has cleanup in main(), so all fd-s should be closed after "init" stage. And additionally I believe that intention of caph_rights_limit() is not limit, but allowing and by default all open file descriptor is strictly limited. main(): if (restore_file != NULL) destroy_restore_state(&rstate); gusev.vitaliy_gmail.com: I see that restore path also has cleanup in main(), so all fd-s should be closed after "init"… | ||||||||||
Not Done Inline ActionsBy default a fd starts with all rights (see cap_rights_limit man page). It is still reasonable to limit rights to only those necessary, but not as important as for fds that remain open in a process involving untrusted data. Name could be limit_vmmem_rights instead of _socket emaste: By default a fd starts with all rights (see cap_rights_limit man page). It is still reasonable… | ||||||||||
static void | static void | |||||||||
limit_vmmem_socket(int s) | limit_vmmem_socket(int s) | |||||||||
{ | { | |||||||||
cap_rights_t rights; | cap_rights_t rights; | |||||||||
cap_rights_init(&rights, CAP_FSTAT, CAP_MMAP_R, CAP_IOCTL, CAP_READ); | cap_rights_init(&rights, CAP_FSTAT, CAP_MMAP_R, CAP_IOCTL, CAP_READ); | |||||||||
if (caph_rights_limit(s, &rights) == -1) | if (caph_rights_limit(s, &rights) == -1) | |||||||||
errx(EX_OSERR, "Unable to apply rights for sandbox"); | errx(EX_OSERR, "Unable to apply rights for sandbox"); | |||||||||
Show All 18 Lines | load_vmmem_file(const char *filename, struct restore_state *rstate) | |||||||||
int err; | int err; | |||||||||
rstate->vmmem_fd = open(filename, O_RDONLY); | rstate->vmmem_fd = open(filename, O_RDONLY); | |||||||||
if (rstate->vmmem_fd < 0) { | if (rstate->vmmem_fd < 0) { | |||||||||
perror("Failed to open restore file"); | perror("Failed to open restore file"); | |||||||||
return (-1); | return (-1); | |||||||||
} | } | |||||||||
#ifndef WITHOUT_CAPSICUM | #ifndef WITHOUT_CAPSICUM | |||||||||
Not Done Inline ActionsThis also is not required. gusev.vitaliy_gmail.com: This also is not required. | ||||||||||
limit_vmmem_socket(rstate->vmmem_fd); | limit_vmmem_socket(rstate->vmmem_fd); | |||||||||
#endif | #endif | |||||||||
err = fstat(rstate->vmmem_fd, &sb); | err = fstat(rstate->vmmem_fd, &sb); | |||||||||
if (err < 0) { | if (err < 0) { | |||||||||
perror("Failed to stat restore file"); | perror("Failed to stat restore file"); | |||||||||
goto err_load_vmmem; | goto err_load_vmmem; | |||||||||
} | } | |||||||||
Show All 21 Lines | load_kdata_file(const char *filename, struct restore_state *rstate) | |||||||||
rstate->kdata_fd = open(filename, O_RDONLY); | rstate->kdata_fd = open(filename, O_RDONLY); | |||||||||
if (rstate->kdata_fd < 0) { | if (rstate->kdata_fd < 0) { | |||||||||
perror("Failed to open kernel data file"); | perror("Failed to open kernel data file"); | |||||||||
return (-1); | return (-1); | |||||||||
} | } | |||||||||
#ifndef WITHOUT_CAPSICUM | #ifndef WITHOUT_CAPSICUM | |||||||||
limit_kernel_socket(rstate->kdata_fd); | limit_kernel_socket(rstate->kdata_fd); | |||||||||
Not Done Inline ActionsThis is not required ? gusev.vitaliy_gmail.com: This is not required ? | ||||||||||
#endif | #endif | |||||||||
err = fstat(rstate->kdata_fd, &sb); | err = fstat(rstate->kdata_fd, &sb); | |||||||||
if (err < 0) { | if (err < 0) { | |||||||||
perror("Failed to stat kernel data file"); | perror("Failed to stat kernel data file"); | |||||||||
goto err_load_kdata; | goto err_load_kdata; | |||||||||
} | } | |||||||||
Show All 20 Lines | ||||||||||
static int | static int | |||||||||
load_metadata_file(const char *filename, struct restore_state *rstate) | load_metadata_file(const char *filename, struct restore_state *rstate) | |||||||||
{ | { | |||||||||
const ucl_object_t *obj; | const ucl_object_t *obj; | |||||||||
struct ucl_parser *parser; | struct ucl_parser *parser; | |||||||||
int err; | int err; | |||||||||
parser = ucl_parser_new(UCL_PARSER_DEFAULT); | parser = ucl_parser_new(UCL_PARSER_DEFAULT); | |||||||||
Not Done Inline ActionsWhy not openat()? And this function needs changes at all if consider capsicum support? gusev.vitaliy_gmail.com: Why not openat()? And this function needs changes at all if consider capsicum support? | ||||||||||
Not Done Inline ActionsThe changes would make sense if the cap_enter is called before the restore is actually done and openat should be used in that case because would not work, but considering the 'restore' files are opened and closed before cap_enter the function should remain unchanged. ionut.mihalache1506_gmail.com: The changes would make sense if the cap_enter is called before the restore is actually done and… | ||||||||||
if (parser == NULL) { | if (parser == NULL) { | |||||||||
fprintf(stderr, "Failed to initialize UCL parser.\n"); | fprintf(stderr, "Failed to initialize UCL parser.\n"); | |||||||||
goto err_load_metadata; | goto err_load_metadata; | |||||||||
} | } | |||||||||
err = ucl_parser_add_file(parser, filename); | err = ucl_parser_add_file(parser, filename); | |||||||||
Not Done Inline ActionsMissing error checking. markj: Missing error checking. | ||||||||||
if (err == 0) { | if (err == 0) { | |||||||||
fprintf(stderr, "Failed to parse metadata file: '%s'\n", | fprintf(stderr, "Failed to parse metadata file: '%s'\n", | |||||||||
filename); | filename); | |||||||||
err = -1; | err = -1; | |||||||||
goto err_load_metadata; | goto err_load_metadata; | |||||||||
} | } | |||||||||
obj = ucl_parser_get_object(parser); | obj = ucl_parser_get_object(parser); | |||||||||
▲ Show 20 Lines • Show All 975 Lines • ▼ Show 20 Lines | checkpoint_cpu_resume(int vcpu) | |||||||||
pthread_mutex_lock(&vcpu_lock); | pthread_mutex_lock(&vcpu_lock); | |||||||||
while (checkpoint_active) | while (checkpoint_active) | |||||||||
pthread_cond_wait(&vcpus_can_run, &vcpu_lock); | pthread_cond_wait(&vcpus_can_run, &vcpu_lock); | |||||||||
CPU_CLR(vcpu, &vcpus_suspended); | CPU_CLR(vcpu, &vcpus_suspended); | |||||||||
pthread_mutex_unlock(&vcpu_lock); | pthread_mutex_unlock(&vcpu_lock); | |||||||||
} | } | |||||||||
static void | static void | |||||||||
vm_vcpu_pause(struct vmctx *ctx) | vm_vcpu_pause(struct vmctx *ctx) | |||||||||
Not Done Inline ActionsThis function should return value and vm_checkpoint() should check that value. gusev.vitaliy_gmail.com: This function should return value and vm_checkpoint() should check that value. | ||||||||||
{ | { | |||||||||
int err; | int err; | |||||||||
pthread_mutex_lock(&vcpu_lock); | pthread_mutex_lock(&vcpu_lock); | |||||||||
checkpoint_active = true; | checkpoint_active = true; | |||||||||
err = vm_suspend_cpu(ctx, -1); | err = vm_suspend_cpu(ctx, -1); | |||||||||
if (err != 0) { | if (err != 0) { | |||||||||
fprintf(stderr, "%s: Could not suspend vcpus\r\n", __func__); | fprintf(stderr, "%s: Could not suspend vcpus\r\n", __func__); | |||||||||
Not Done Inline Actions
markj: | ||||||||||
Not Done Inline ActionsEPRINTLN() is preferred for new code, which handles raw mode; see bhyve/debug.h rew: EPRINTLN() is preferred for new code, which handles raw mode; see bhyve/debug.h | ||||||||||
pthread_mutex_unlock(&vcpu_lock); | pthread_mutex_unlock(&vcpu_lock); | |||||||||
return; | return; | |||||||||
} | } | |||||||||
while (CPU_CMP(&vcpus_active, &vcpus_suspended) != 0) | while (CPU_CMP(&vcpus_active, &vcpus_suspended) != 0) | |||||||||
pthread_cond_wait(&vcpus_idle, &vcpu_lock); | pthread_cond_wait(&vcpus_idle, &vcpu_lock); | |||||||||
pthread_mutex_unlock(&vcpu_lock); | pthread_mutex_unlock(&vcpu_lock); | |||||||||
} | } | |||||||||
static void | static void | |||||||||
vm_vcpu_resume(struct vmctx *ctx) | vm_vcpu_resume(struct vmctx *ctx) | |||||||||
{ | { | |||||||||
pthread_mutex_lock(&vcpu_lock); | pthread_mutex_lock(&vcpu_lock); | |||||||||
checkpoint_active = false; | checkpoint_active = false; | |||||||||
pthread_mutex_unlock(&vcpu_lock); | pthread_mutex_unlock(&vcpu_lock); | |||||||||
vm_resume_cpu(ctx, -1); | vm_resume_cpu(ctx, -1); | |||||||||
pthread_cond_broadcast(&vcpus_can_run); | pthread_cond_broadcast(&vcpus_can_run); | |||||||||
} | } | |||||||||
#ifndef WITHOUT_CAPSICUM | #ifndef WITHOUT_CAPSICUM | |||||||||
#define DESTROY(vm, ch, err, LABEL) \ | #define DESTROY(vm, ch, err, LABEL) \ | |||||||||
do { \ | do { \ | |||||||||
Not Done Inline ActionsI suggest remove this function and vm-destroy usage. Leave vm state as is on suspend. gusev.vitaliy_gmail.com: I suggest remove this function and vm-destroy usage. Leave vm state as is on suspend. | ||||||||||
Not Done Inline ActionsCan you give more details about how exactly it would be better to approach the usage of vm_destroy? If I only remove function call then resources will not be freed and the guest will be stopped due to the exit call afterwards and a new guest can't be opened or restored until a vm_destroy call is requested through bhyvectl and if I don't use exit as well, wouldn't this remove the "suspend" functionality and allow only "checkpoint" functionality? ionut.mihalache1506_gmail.com: Can you give more details about how exactly it would be better to approach the usage of… | ||||||||||
cap_channel_t *capsysctl = NULL; \ | cap_channel_t *capsysctl = NULL; \ | |||||||||
char *name = "hw.vmm.destroy"; \ | char *name = "hw.vmm.destroy"; \ | |||||||||
void *limit; \ | void *limit; \ | |||||||||
\ | \ | |||||||||
/* Create capability to the system.sysctl service with Casper. */ \ | /* Create capability to the system.sysctl service with Casper. */ \ | |||||||||
capsysctl = cap_service_open(ch, "system.sysctl"); \ | capsysctl = cap_service_open(ch, "system.sysctl"); \ | |||||||||
Not Done Inline ActionsI see that casper that you added is really needed only for destroying vm environment via sysctl. But what if leave it as I and do not destroy vm env? This behaviour could be similar as after "halt" in VM. gusev.vitaliy_gmail.com: I see that casper that you added is really needed only for destroying vm environment via sysctl. | ||||||||||
Not Done Inline Actions*leave it as is and do not destroy vm env on suspend* gusev.vitaliy_gmail.com: *leave it as is and do not destroy vm env on suspend* | ||||||||||
Not Done Inline ActionsThis is the wrong time to be creating a casper service. All services should be started at initialization time, after which the "casper capability" should be closed. markj: This is the wrong time to be creating a casper service. All services should be started at… | ||||||||||
Not Done Inline ActionsSorry, if user does "halt" inside VM, VM context is not destroyed when bhyve process ends. gusev.vitaliy_gmail.com: Sorry, if user does "halt" inside VM, VM context is not destroyed when bhyve process ends. | ||||||||||
if (capsysctl == NULL) \ | if (capsysctl == NULL) \ | |||||||||
fprintf(stderr, "%s: Unable to open system.sysctl service", __func__); \ | fprintf(stderr, "%s: Unable to open system.sysctl service", __func__); \ | |||||||||
\ | \ | |||||||||
cap_close(ch); \ | cap_close(ch); \ | |||||||||
\ | \ | |||||||||
/* Create limit for one MIB with write access only. */ \ | /* Create limit for one MIB with write access only. */ \ | |||||||||
limit = cap_sysctl_limit_init(capsysctl); \ | limit = cap_sysctl_limit_init(capsysctl); \ | |||||||||
(void)cap_sysctl_limit_name(limit, name, CAP_SYSCTL_WRITE); \ | (void)cap_sysctl_limit_name(limit, name, CAP_SYSCTL_WRITE); \ | |||||||||
\ | \ | |||||||||
/* Limit system.sysctl. */ \ | /* Limit system.sysctl. */ \ | |||||||||
if (cap_sysctl_limit(limit) < 0) \ | if (cap_sysctl_limit(limit) < 0) \ | |||||||||
fprintf(stderr, "%s: Unable to set limits", __func__); \ | fprintf(stderr, "%s: Unable to set limits", __func__); \ | |||||||||
\ | \ | |||||||||
err = cap_sysctlbyname(capsysctl, name, NULL, NULL, (vm), strlen((vm))); \ | err = cap_sysctlbyname(capsysctl, name, NULL, NULL, (vm), strlen((vm))); \ | |||||||||
\ | \ | |||||||||
cap_close(capsysctl); \ | cap_close(capsysctl); \ | |||||||||
if (err != 0) { \ | if (err != 0) { \ | |||||||||
fprintf(stderr, "%s: err is %d\r\n", __func__, errno); \ | fprintf(stderr, "%s: err is %d\r\n", __func__, errno); \ | |||||||||
goto LABEL; \ | goto LABEL; \ | |||||||||
} \ | } \ | |||||||||
} while(0) | } while(0) | |||||||||
#endif | #endif | |||||||||
Not Done Inline ActionsThis doesn't need to be a macro, please make it a regular subroutine. markj: This doesn't need to be a macro, please make it a regular subroutine. | ||||||||||
static int | static int | |||||||||
vm_checkpoint(struct vmctx *ctx, char *checkpoint_file, cap_channel_t *chn, bool stop_vm) | vm_checkpoint(struct vmctx *ctx, const char *checkpoint_file, bool stop_vm) | |||||||||
{ | { | |||||||||
int fd_checkpoint = 0, kdata_fd = 0, meta_fd = 0; | int fd_checkpoint = 0, kdata_fd = 0, meta_fd = 0; | |||||||||
Not Done Inline ActionsBTW, usage 0 is wrong here, cleanup path also should be corrected with (fd_checkpoint >= 0) condition. gusev.vitaliy_gmail.com: BTW, usage 0 is wrong here, cleanup path also should be corrected with (fd_checkpoint >= 0)… | ||||||||||
int ret = 0; | int ret = 0; | |||||||||
int error = 0; | int error = 0; | |||||||||
size_t memsz; | size_t memsz; | |||||||||
xo_handle_t *xop = NULL; | xo_handle_t *xop = NULL; | |||||||||
char *meta_filename = NULL; | char *meta_filename = NULL; | |||||||||
char *kdata_filename = NULL; | char *kdata_filename = NULL; | |||||||||
FILE *meta_file = NULL; | FILE *meta_file = NULL; | |||||||||
char vmname[MAX_VMNAME]; | char vmname[MAX_VMNAME]; | |||||||||
kdata_filename = strcat_extension(checkpoint_file, ".kern"); | kdata_filename = strcat_extension(checkpoint_file, ".kern"); | |||||||||
if (kdata_filename == NULL) { | if (kdata_filename == NULL) { | |||||||||
fprintf(stderr, "Failed to construct kernel data filename.\n"); | fprintf(stderr, "Failed to construct kernel data filename.\n"); | |||||||||
return (-1); | return (-1); | |||||||||
} | } | |||||||||
kdata_fd = openat(cdir_fd, kdata_filename, O_WRONLY | O_CREAT | O_TRUNC, 0700); | kdata_fd = openat(cdir_fd, kdata_filename, O_WRONLY | O_CREAT | O_TRUNC, 0700); | |||||||||
Not Done Inline ActionsBTW, why do permission masks (0700) have execute bit here? gusev.vitaliy_gmail.com: BTW, why do permission masks (0700) have execute bit here? | ||||||||||
if (kdata_fd < 0) { | if (kdata_fd < 0) { | |||||||||
perror("Failed to open kernel data snapshot file."); | perror("Failed to open kernel data snapshot file."); | |||||||||
error = -1; | error = -1; | |||||||||
goto done; | goto done; | |||||||||
} | } | |||||||||
fd_checkpoint = openat(cdir_fd, checkpoint_file, O_RDWR | O_CREAT | O_TRUNC, 0700); | fd_checkpoint = openat(cdir_fd, checkpoint_file, O_RDWR | O_CREAT | O_TRUNC, 0700); | |||||||||
if (fd_checkpoint < 0) { | if (fd_checkpoint < 0) { | |||||||||
Show All 11 Lines | vm_checkpoint(struct vmctx *ctx, const char *checkpoint_file, bool stop_vm) | |||||||||
meta_fd = openat(cdir_fd, meta_filename, O_WRONLY | O_CREAT | O_TRUNC, 0700); | meta_fd = openat(cdir_fd, meta_filename, O_WRONLY | O_CREAT | O_TRUNC, 0700); | |||||||||
if (meta_fd < 0) { | if (meta_fd < 0) { | |||||||||
perror("Failed to open vm metadata snapshot file descriptor."); | perror("Failed to open vm metadata snapshot file descriptor."); | |||||||||
goto done; | goto done; | |||||||||
} | } | |||||||||
meta_file = fdopen(meta_fd, "w"); | meta_file = fdopen(meta_fd, "w"); | |||||||||
if (meta_file == NULL) { | if (meta_file == NULL) { | |||||||||
perror("Failed to open vm metadata snapshot file."); | perror("Failed to open vm metadata snapshot file."); | |||||||||
goto done; | goto done; | |||||||||
Not Done Inline ActionsPotential meta_fd leak here. It should be closed on fdopen() failure. gusev.vitaliy_gmail.com: Potential meta_fd leak here. It should be closed on fdopen() failure. | ||||||||||
} | } | |||||||||
xop = xo_create_to_file(meta_file, XO_STYLE_JSON, XOF_PRETTY); | xop = xo_create_to_file(meta_file, XO_STYLE_JSON, XOF_PRETTY); | |||||||||
if (xop == NULL) { | if (xop == NULL) { | |||||||||
perror("Failed to get libxo handle on metadata file."); | perror("Failed to get libxo handle on metadata file."); | |||||||||
goto done; | goto done; | |||||||||
} | } | |||||||||
vm_vcpu_pause(ctx); | vm_vcpu_pause(ctx); | |||||||||
ret = vm_pause_user_devs(ctx); | ret = vm_pause_user_devs(ctx); | |||||||||
if (ret != 0) { | if (ret != 0) { | |||||||||
fprintf(stderr, "Could not pause devices\r\n"); | fprintf(stderr, "Could not pause devices\r\n"); | |||||||||
error = ret; | error = ret; | |||||||||
goto done; | goto done; | |||||||||
if (cdir_fd > 0) | } | |||||||||
Not Done Inline ActionsThe brace looks like it's in the wrong place. The subsequent revisions modify this section some more, and finally the close() is removed again in the last revision, so this is just noise. markj: The brace looks like it's in the wrong place. The subsequent revisions modify this section some… | ||||||||||
close(cdir_fd);} | ||||||||||
memsz = vm_snapshot_mem(ctx, fd_checkpoint, 0, true); | memsz = vm_snapshot_mem(ctx, fd_checkpoint, 0, true); | |||||||||
if (memsz == 0) { | if (memsz == 0) { | |||||||||
perror("Could not write guest memory to file"); | perror("Could not write guest memory to file"); | |||||||||
error = -1; | error = -1; | |||||||||
goto done; | goto done; | |||||||||
} | } | |||||||||
Show All 17 Lines | if (ret != 0) { | |||||||||
error = -1; | error = -1; | |||||||||
goto done; | goto done; | |||||||||
} | } | |||||||||
xo_finish_h(xop); | xo_finish_h(xop); | |||||||||
if (stop_vm) { | if (stop_vm) { | |||||||||
if (chn != NULL) { | if (casper_channel != NULL) { | |||||||||
error = vm_get_name(ctx, vmname, MAX_VMNAME - 1); | error = vm_get_name(ctx, vmname, MAX_VMNAME - 1); | |||||||||
if (error != 0) { | if (error != 0) { | |||||||||
fprintf(stderr, "%s: Failed to get VM name", __func__); | fprintf(stderr, "%s: Failed to get VM name", __func__); | |||||||||
Not Done Inline Actions
markj: | ||||||||||
goto done; | goto done; | |||||||||
} | } | |||||||||
DESTROY(vmname, chn, error, done); | DESTROY(vmname, casper_channel, error, done); | |||||||||
free(ctx); | free(ctx); | |||||||||
} else | } else | |||||||||
vm_destroy(ctx); | vm_destroy(ctx); | |||||||||
exit(0); | exit(0); | |||||||||
} | } | |||||||||
done: | done: | |||||||||
Show All 9 Lines | if (kdata_filename != NULL) | |||||||||
free(kdata_filename); | free(kdata_filename); | |||||||||
if (xop != NULL) | if (xop != NULL) | |||||||||
xo_destroy(xop); | xo_destroy(xop); | |||||||||
if (meta_file != NULL) | if (meta_file != NULL) | |||||||||
fclose(meta_file); | fclose(meta_file); | |||||||||
if (kdata_fd > 0) | if (kdata_fd > 0) | |||||||||
close(kdata_fd); | close(kdata_fd); | |||||||||
if (cdir_fd > 0) | if (cdir_fd > 0) | |||||||||
close(cdir_fd); | close(cdir_fd); | |||||||||
Not Done Inline ActionsWhy cdir_fd is closed here? It should persist cross "checkpoint" or "suspend" calls. And using it under "ifdef WITHOUT_CAPSICUM" is also wrong. gusev.vitaliy_gmail.com: Why cdir_fd is closed here? It should persist cross "checkpoint" or "suspend" calls. And using… | ||||||||||
return (error); | return (error); | |||||||||
} | } | |||||||||
int | int | |||||||||
handle_message(struct vmctx *ctx, nvlist_t *nvl, cap_channel_t *chn) | handle_message(struct vmctx *ctx, nvlist_t *nvl) | |||||||||
{ | { | |||||||||
int err; | int err; | |||||||||
const char *cmd; | const char *cmd; | |||||||||
if (!nvlist_exists_string(nvl, "cmd")) | if (!nvlist_exists_string(nvl, "cmd")) | |||||||||
return (-1); | return (-1); | |||||||||
cmd = nvlist_get_string(nvl, "cmd"); | cmd = nvlist_get_string(nvl, "cmd"); | |||||||||
if (strcmp(cmd, "checkpoint") == 0) { | if (strcmp(cmd, "checkpoint") == 0) { | |||||||||
if (!nvlist_exists_string(nvl, "filename") || | if (!nvlist_exists_string(nvl, "filename") || | |||||||||
!nvlist_exists_bool(nvl, "suspend")) | !nvlist_exists_bool(nvl, "suspend")) | |||||||||
err = -1; | err = -1; | |||||||||
else | else | |||||||||
err = vm_checkpoint(ctx, nvlist_get_string(nvl, "filename"), | err = vm_checkpoint(ctx, nvlist_get_string(nvl, "filename"), | |||||||||
chn, nvlist_get_bool(nvl, "suspend")); | nvlist_get_bool(nvl, "suspend")); | |||||||||
} else { | } else { | |||||||||
EPRINTLN("Unrecognized checkpoint operation\n"); | EPRINTLN("Unrecognized checkpoint operation\n"); | |||||||||
err = -1; | err = -1; | |||||||||
} | } | |||||||||
if (err != 0) | if (err != 0) | |||||||||
EPRINTLN("Unable to perform the requested operation\n"); | EPRINTLN("Unable to perform the requested operation\n"); | |||||||||
Show All 15 Lines | checkpoint_thread(void *param) | |||||||||
for (;;) { | for (;;) { | |||||||||
n = recvfrom(thread_info->socket_fd, &imsg, sizeof(imsg), 0, NULL, 0); | n = recvfrom(thread_info->socket_fd, &imsg, sizeof(imsg), 0, NULL, 0); | |||||||||
/* | /* | |||||||||
* slight sanity check: see if there's enough data to at | * slight sanity check: see if there's enough data to at | |||||||||
* least determine the type of message. | * least determine the type of message. | |||||||||
*/ | */ | |||||||||
if (n >= sizeof(imsg.code)) | if (nvl != NULL) | |||||||||
handle_message(&imsg, thread_info->ctx, thread_info->channel); | handle_message(thread_info->ctx, nvl); | |||||||||
else | else | |||||||||
EPRINTLN("nvlist_recv() failed: %s", strerror(errno)); | EPRINTLN("nvlist_recv() failed: %s", strerror(errno)); | |||||||||
} | } | |||||||||
return (NULL); | return (NULL); | |||||||||
} | } | |||||||||
void | void | |||||||||
Not Done Inline ActionsWhy all below under ifdef is required? I guess it is excessive. gusev.vitaliy_gmail.com: Why all below under ifdef is required? I guess it is excessive. | ||||||||||
init_snapshot(void) | init_snapshot(void) | |||||||||
{ | { | |||||||||
int err; | int err; | |||||||||
err = pthread_mutex_init(&vcpu_lock, NULL); | err = pthread_mutex_init(&vcpu_lock, NULL); | |||||||||
if (err != 0) | if (err != 0) | |||||||||
errc(1, err, "checkpoint mutex init"); | errc(1, err, "checkpoint mutex init"); | |||||||||
err = pthread_cond_init(&vcpus_idle, NULL); | err = pthread_cond_init(&vcpus_idle, NULL); | |||||||||
Show All 11 Lines | limit_control_socket(int s) | |||||||||
cap_rights_t rights; | cap_rights_t rights; | |||||||||
cap_rights_init(&rights, CAP_BIND, CAP_READ); | cap_rights_init(&rights, CAP_BIND, CAP_READ); | |||||||||
if (caph_rights_limit(s, &rights) == -1) | if (caph_rights_limit(s, &rights) == -1) | |||||||||
errx(EX_OSERR, "Unable to apply rights for sandbox"); | errx(EX_OSERR, "Unable to apply rights for sandbox"); | |||||||||
} | } | |||||||||
static void | static void | |||||||||
limit_file_operations() | limit_file_operations() | |||||||||
Not Done Inline Actions
markj: | ||||||||||
{ | { | |||||||||
cap_rights_t rights; | cap_rights_t rights; | |||||||||
cap_rights_init(&rights, CAP_LOOKUP, CAP_FTRUNCATE, CAP_PWRITE, CAP_PREAD, CAP_FCNTL, CAP_CREATE); | cap_rights_init(&rights, CAP_LOOKUP, CAP_FTRUNCATE, CAP_PWRITE, CAP_PREAD, CAP_FCNTL, CAP_CREATE); | |||||||||
if (caph_rights_limit(cdir_fd, &rights) == -1) | if (caph_rights_limit(cdir_fd, &rights) == -1) | |||||||||
errx(EX_OSERR, "Unable to apply rights for sandbox"); | errx(EX_OSERR, "Unable to apply rights for sandbox"); | |||||||||
} | } | |||||||||
#endif | #endif | |||||||||
void | ||||||||||
init_capsicum_info(char *ckp_path) | ||||||||||
{ | ||||||||||
Not Done Inline ActionsThe casper channel should be closed after initialization is complete. Otherwise Capsicum is not going to accomplish anything, since malicious code can always request new capabilities via new casper services. markj: The casper channel should be closed after initialization is complete. Otherwise Capsicum is not… | ||||||||||
/* Open capability to Casper. */ | ||||||||||
casper_channel = cap_init(); | ||||||||||
if (casper_channel == NULL) | ||||||||||
errx(EX_OSERR, "cap_init() failed"); | ||||||||||
if (ckp_path != NULL) { | ||||||||||
cdir_fd = open(ckp_path, O_RDONLY | O_DIRECTORY); | ||||||||||
if (cdir_fd < 0) | ||||||||||
errc(1, cdir_fd, "open snapshot files directory"); | ||||||||||
limit_file_operations(); | ||||||||||
} | ||||||||||
} | ||||||||||
/* | /* | |||||||||
* Create the listening socket for IPC with bhyvectl | * Create the listening socket for IPC with bhyvectl | |||||||||
*/ | */ | |||||||||
int | int | |||||||||
init_checkpoint_thread(struct vmctx *ctx, cap_channel_t *chn) | init_checkpoint_thread(struct vmctx *ctx) | |||||||||
{ | { | |||||||||
struct checkpoint_thread_info *checkpoint_info = NULL; | struct checkpoint_thread_info *checkpoint_info = NULL; | |||||||||
struct sockaddr_un addr; | struct sockaddr_un addr; | |||||||||
int socket_fd; | int socket_fd; | |||||||||
pthread_t checkpoint_pthread; | pthread_t checkpoint_pthread; | |||||||||
char vmname_buf[MAX_VMNAME]; | char vmname_buf[MAX_VMNAME]; | |||||||||
int ret, err = 0; | int ret, err = 0; | |||||||||
char *cdir_name; | char *cdir_name; | |||||||||
memset(&addr, 0, sizeof(addr)); | memset(&addr, 0, sizeof(addr)); | |||||||||
socket_fd = socket(PF_UNIX, SOCK_DGRAM, 0); | socket_fd = socket(PF_UNIX, SOCK_DGRAM, 0); | |||||||||
if (socket_fd < 0) { | if (socket_fd < 0) { | |||||||||
EPRINTLN("Socket creation failed: %s", strerror(errno)); | EPRINTLN("Socket creation failed: %s", strerror(errno)); | |||||||||
err = -1; | err = -1; | |||||||||
goto fail; | goto fail; | |||||||||
} | } | |||||||||
cdir_name = getcwd(NULL, 0); | ||||||||||
cdir_fd = open(cdir_name, O_RDONLY | O_DIRECTORY); | ||||||||||
if (cdir_fd < 0) { | ||||||||||
perror("Failed to open working directory."); | ||||||||||
err = -1; | ||||||||||
goto fail; | ||||||||||
} | ||||||||||
free(cdir_name); | ||||||||||
#ifndef WITHOUT_CAPSICUM | ||||||||||
limit_control_socket(socket_fd); | limit_control_socket(socket_fd); | |||||||||
Not Done Inline ActionsOriginal code works with capsicum w/o any changing, so this function is not required. gusev.vitaliy_gmail.com: Original code works with capsicum w/o any changing, so this function is not required. | ||||||||||
limit_file_operations(); | ||||||||||
#endif | ||||||||||
addr.sun_family = AF_UNIX; | addr.sun_family = AF_UNIX; | |||||||||
err = vm_get_name(ctx, vmname_buf, MAX_VMNAME - 1); | err = vm_get_name(ctx, vmname_buf, MAX_VMNAME - 1); | |||||||||
if (err != 0) { | if (err != 0) { | |||||||||
perror("Failed to get VM name"); | perror("Failed to get VM name"); | |||||||||
goto fail; | goto fail; | |||||||||
} | } | |||||||||
snprintf(addr.sun_path, sizeof(addr.sun_path), "%s%s", | snprintf(addr.sun_path, sizeof(addr.sun_path), "%s%s", | |||||||||
BHYVE_RUN_DIR, vmname_buf); | BHYVE_RUN_DIR, vmname_buf); | |||||||||
addr.sun_len = SUN_LEN(&addr); | addr.sun_len = SUN_LEN(&addr); | |||||||||
unlink(addr.sun_path); | unlink(addr.sun_path); | |||||||||
if (bind(socket_fd, (struct sockaddr *)&addr, addr.sun_len) != 0) { | if (bind(socket_fd, (struct sockaddr *)&addr, addr.sun_len) != 0) { | |||||||||
EPRINTLN("Failed to bind socket \"%s\": %s\n", | EPRINTLN("Failed to bind socket \"%s\": %s\n", | |||||||||
addr.sun_path, strerror(errno)); | addr.sun_path, strerror(errno)); | |||||||||
err = -1; | err = -1; | |||||||||
goto fail; | goto fail; | |||||||||
} | } | |||||||||
checkpoint_info = calloc(1, sizeof(*checkpoint_info)); | checkpoint_info = calloc(1, sizeof(*checkpoint_info)); | |||||||||
checkpoint_info->ctx = ctx; | checkpoint_info->ctx = ctx; | |||||||||
checkpoint_info->socket_fd = socket_fd; | checkpoint_info->socket_fd = socket_fd; | |||||||||
checkpoint_info->channel = chn; | ||||||||||
err = pthread_create(&checkpoint_pthread, NULL, checkpoint_thread, | err = pthread_create(&checkpoint_pthread, NULL, checkpoint_thread, | |||||||||
checkpoint_info); | checkpoint_info); | |||||||||
if (err != 0) | if (err != 0) | |||||||||
goto fail; | goto fail; | |||||||||
return (0); | return (0); | |||||||||
fail: | fail: | |||||||||
▲ Show 20 Lines • Show All 140 Lines • Show Last 20 Lines |
why 'capsysctl' is required?