Changeset View
Standalone View
usr.sbin/bhyve/snapshot.c
Show First 20 Lines • Show All 165 Lines • ▼ Show 20 Lines | const struct vm_snapshot_kern_info snapshot_kern_structs[] = { | |||||||||
{ "vrtc", STRUCT_VRTC }, | { "vrtc", STRUCT_VRTC }, | |||||||||
}; | }; | |||||||||
static cpuset_t vcpus_active, vcpus_suspended; | static cpuset_t vcpus_active, vcpus_suspended; | |||||||||
static pthread_mutex_t vcpu_lock; | static pthread_mutex_t vcpu_lock; | |||||||||
static pthread_cond_t vcpus_idle, vcpus_can_run; | static pthread_cond_t vcpus_idle, vcpus_can_run; | |||||||||
static bool checkpoint_active; | static bool checkpoint_active; | |||||||||
static int cdir_fd = AT_FDCWD; | ||||||||||
/* | /* | |||||||||
gusev.vitaliy_gmail.com: why 'capsysctl' is required? | ||||||||||
* TODO: Harden this function and all of its callers since 'base_str' is a user | * TODO: Harden this function and all of its callers since 'base_str' is a user | |||||||||
* provided string. | * provided string. | |||||||||
*/ | */ | |||||||||
static char * | static char * | |||||||||
strcat_extension(const char *base_str, const char *ext) | strcat_extension(const char *base_str, const char *ext) | |||||||||
{ | { | |||||||||
char *res; | char *res; | |||||||||
size_t base_len, ext_len; | size_t base_len, ext_len; | |||||||||
Show All 36 Lines | if (rstate->vmmem_fd > 0) | |||||||||
close(rstate->vmmem_fd); | close(rstate->vmmem_fd); | |||||||||
if (rstate->meta_root_obj != NULL) | if (rstate->meta_root_obj != NULL) | |||||||||
ucl_object_unref(rstate->meta_root_obj); | ucl_object_unref(rstate->meta_root_obj); | |||||||||
if (rstate->meta_parser != NULL) | if (rstate->meta_parser != NULL) | |||||||||
ucl_parser_free(rstate->meta_parser); | ucl_parser_free(rstate->meta_parser); | |||||||||
} | } | |||||||||
#ifndef WITHOUT_CAPSICUM | ||||||||||
Not Done Inline ActionsI guess all under ifdef is not really required. gusev.vitaliy_gmail.com: I guess all under ifdef is not really required. | ||||||||||
Not Done Inline ActionsWhy? emaste: Why? | ||||||||||
Not Done Inline ActionsBecause it should work w/o that code. What problem does that code solve? gusev.vitaliy_gmail.com: Because it should work w/o that code. What problem does that code solve? | ||||||||||
Not Done Inline ActionsDo these sockets not remain open in the running bhyve process? The intent is to limit the privileges of any fds that remain open. emaste: Do these sockets not remain open in the running bhyve process? The intent is to limit the… | ||||||||||
Not Done Inline ActionsFor "checkpoint" all fds are closed or unmapped. If consider "restore" currently they are not closed after finished restore, but I believe it is a bug. All fd-s should be unmapped and closed because all data are copied to vmm or kernel and are not required after successful restore. And note, name "socket" also is wrong here - all fd-s are not socket. gusev.vitaliy_gmail.com: For "checkpoint" all fds are closed or unmapped. If consider "restore" currently they are not… | ||||||||||
Not Done Inline ActionsI see that restore path also has cleanup in main(), so all fd-s should be closed after "init" stage. And additionally I believe that intention of caph_rights_limit() is not limit, but allowing and by default all open file descriptor is strictly limited. main(): if (restore_file != NULL) destroy_restore_state(&rstate); gusev.vitaliy_gmail.com: I see that restore path also has cleanup in main(), so all fd-s should be closed after "init"… | ||||||||||
Not Done Inline ActionsBy default a fd starts with all rights (see cap_rights_limit man page). It is still reasonable to limit rights to only those necessary, but not as important as for fds that remain open in a process involving untrusted data. Name could be limit_vmmem_rights instead of _socket emaste: By default a fd starts with all rights (see cap_rights_limit man page). It is still reasonable… | ||||||||||
static void | ||||||||||
limit_vmmem_socket(int s) | ||||||||||
{ | ||||||||||
cap_rights_t rights; | ||||||||||
cap_rights_init(&rights, CAP_FSTAT, CAP_MMAP_R, CAP_IOCTL, CAP_READ); | ||||||||||
if (caph_rights_limit(s, &rights) == -1) | ||||||||||
errx(EX_OSERR, "Unable to apply rights for sandbox"); | ||||||||||
} | ||||||||||
static void | ||||||||||
limit_kernel_socket(int s) | ||||||||||
{ | ||||||||||
cap_rights_t rights; | ||||||||||
cap_rights_init(&rights, CAP_FSTAT, CAP_MMAP_R, CAP_READ); | ||||||||||
if (caph_rights_limit(s, &rights) == -1) | ||||||||||
errx(EX_OSERR, "Unable to apply rights for sandbox"); | ||||||||||
} | ||||||||||
static void | ||||||||||
limit_metadata_socket(int s) | ||||||||||
{ | ||||||||||
cap_rights_t rights; | ||||||||||
cap_rights_init(&rights, CAP_FSTAT, CAP_MMAP_R, CAP_READ); | ||||||||||
if (caph_rights_limit(s, &rights) == -1) | ||||||||||
errx(EX_OSERR, "Unable to apply rights for sandbox"); | ||||||||||
} | ||||||||||
#endif | ||||||||||
static int | static int | |||||||||
load_vmmem_file(const char *filename, struct restore_state *rstate) | load_vmmem_file(const char *filename, struct restore_state *rstate) | |||||||||
{ | { | |||||||||
struct stat sb; | struct stat sb; | |||||||||
int err; | int err; | |||||||||
rstate->vmmem_fd = open(filename, O_RDONLY); | rstate->vmmem_fd = open(filename, O_RDONLY); | |||||||||
if (rstate->vmmem_fd < 0) { | if (rstate->vmmem_fd < 0) { | |||||||||
perror("Failed to open restore file"); | perror("Failed to open restore file"); | |||||||||
return (-1); | return (-1); | |||||||||
} | } | |||||||||
#ifndef WITHOUT_CAPSICUM | ||||||||||
Not Done Inline ActionsThis also is not required. gusev.vitaliy_gmail.com: This also is not required. | ||||||||||
limit_vmmem_socket(rstate->vmmem_fd); | ||||||||||
#endif | ||||||||||
err = fstat(rstate->vmmem_fd, &sb); | err = fstat(rstate->vmmem_fd, &sb); | |||||||||
if (err < 0) { | if (err < 0) { | |||||||||
perror("Failed to stat restore file"); | perror("Failed to stat restore file"); | |||||||||
goto err_load_vmmem; | goto err_load_vmmem; | |||||||||
} | } | |||||||||
if (sb.st_size == 0) { | if (sb.st_size == 0) { | |||||||||
fprintf(stderr, "Restore file is empty.\n"); | fprintf(stderr, "Restore file is empty.\n"); | |||||||||
Show All 17 Lines | load_kdata_file(const char *filename, struct restore_state *rstate) | |||||||||
int err; | int err; | |||||||||
rstate->kdata_fd = open(filename, O_RDONLY); | rstate->kdata_fd = open(filename, O_RDONLY); | |||||||||
if (rstate->kdata_fd < 0) { | if (rstate->kdata_fd < 0) { | |||||||||
perror("Failed to open kernel data file"); | perror("Failed to open kernel data file"); | |||||||||
return (-1); | return (-1); | |||||||||
} | } | |||||||||
#ifndef WITHOUT_CAPSICUM | ||||||||||
limit_kernel_socket(rstate->kdata_fd); | ||||||||||
Not Done Inline ActionsThis is not required ? gusev.vitaliy_gmail.com: This is not required ? | ||||||||||
#endif | ||||||||||
err = fstat(rstate->kdata_fd, &sb); | err = fstat(rstate->kdata_fd, &sb); | |||||||||
if (err < 0) { | if (err < 0) { | |||||||||
perror("Failed to stat kernel data file"); | perror("Failed to stat kernel data file"); | |||||||||
goto err_load_kdata; | goto err_load_kdata; | |||||||||
} | } | |||||||||
if (sb.st_size == 0) { | if (sb.st_size == 0) { | |||||||||
fprintf(stderr, "Kernel data file is empty.\n"); | fprintf(stderr, "Kernel data file is empty.\n"); | |||||||||
Show All 16 Lines | err_load_kdata: | |||||||||
return (-1); | return (-1); | |||||||||
} | } | |||||||||
static int | static int | |||||||||
load_metadata_file(const char *filename, struct restore_state *rstate) | load_metadata_file(const char *filename, struct restore_state *rstate) | |||||||||
{ | { | |||||||||
const ucl_object_t *obj; | const ucl_object_t *obj; | |||||||||
struct ucl_parser *parser; | struct ucl_parser *parser; | |||||||||
int md_fd = -1; | ||||||||||
int err; | int err; | |||||||||
parser = ucl_parser_new(UCL_PARSER_DEFAULT); | parser = ucl_parser_new(UCL_PARSER_DEFAULT); | |||||||||
Not Done Inline ActionsWhy not openat()? And this function needs changes at all if consider capsicum support? gusev.vitaliy_gmail.com: Why not openat()? And this function needs changes at all if consider capsicum support? | ||||||||||
Not Done Inline ActionsThe changes would make sense if the cap_enter is called before the restore is actually done and openat should be used in that case because would not work, but considering the 'restore' files are opened and closed before cap_enter the function should remain unchanged. ionut.mihalache1506_gmail.com: The changes would make sense if the cap_enter is called before the restore is actually done and… | ||||||||||
if (parser == NULL) { | if (parser == NULL) { | |||||||||
fprintf(stderr, "Failed to initialize UCL parser.\n"); | fprintf(stderr, "Failed to initialize UCL parser.\n"); | |||||||||
goto err_load_metadata; | goto err_load_metadata; | |||||||||
} | } | |||||||||
err = ucl_parser_add_file(parser, filename); | md_fd = open(filename, O_RDONLY); | |||||||||
#ifndef WITHOUT_CAPSICUM | ||||||||||
limit_metadata_socket(md_fd); | ||||||||||
#endif | ||||||||||
err = ucl_parser_add_fd(parser, md_fd); | ||||||||||
Not Done Inline ActionsMissing error checking. markj: Missing error checking. | ||||||||||
if (err == 0) { | if (err == 0) { | |||||||||
fprintf(stderr, "Failed to parse metadata file: '%s'\n", | fprintf(stderr, "Failed to parse metadata file: '%s'\n", | |||||||||
filename); | filename); | |||||||||
err = -1; | err = -1; | |||||||||
goto err_load_metadata; | goto err_load_metadata; | |||||||||
} | } | |||||||||
obj = ucl_parser_get_object(parser); | obj = ucl_parser_get_object(parser); | |||||||||
if (obj == NULL) { | if (obj == NULL) { | |||||||||
fprintf(stderr, "Failed to parse object.\n"); | fprintf(stderr, "Failed to parse object.\n"); | |||||||||
err = -1; | err = -1; | |||||||||
goto err_load_metadata; | goto err_load_metadata; | |||||||||
} | } | |||||||||
rstate->meta_parser = parser; | rstate->meta_parser = parser; | |||||||||
rstate->meta_root_obj = (ucl_object_t *)obj; | rstate->meta_root_obj = (ucl_object_t *)obj; | |||||||||
return (0); | return (0); | |||||||||
err_load_metadata: | err_load_metadata: | |||||||||
if (md_fd > 0) | ||||||||||
close(md_fd); | ||||||||||
if (parser != NULL) | if (parser != NULL) | |||||||||
ucl_parser_free(parser); | ucl_parser_free(parser); | |||||||||
return (err); | return (err); | |||||||||
} | } | |||||||||
int | int | |||||||||
load_restore_file(const char *filename, struct restore_state *rstate) | load_restore_file(const char *filename, struct restore_state *rstate) | |||||||||
{ | { | |||||||||
▲ Show 20 Lines • Show All 956 Lines • ▼ Show 20 Lines | checkpoint_cpu_resume(int vcpu) | |||||||||
pthread_mutex_lock(&vcpu_lock); | pthread_mutex_lock(&vcpu_lock); | |||||||||
while (checkpoint_active) | while (checkpoint_active) | |||||||||
pthread_cond_wait(&vcpus_can_run, &vcpu_lock); | pthread_cond_wait(&vcpus_can_run, &vcpu_lock); | |||||||||
CPU_CLR(vcpu, &vcpus_suspended); | CPU_CLR(vcpu, &vcpus_suspended); | |||||||||
pthread_mutex_unlock(&vcpu_lock); | pthread_mutex_unlock(&vcpu_lock); | |||||||||
} | } | |||||||||
static void | static void | |||||||||
vm_vcpu_pause(struct vmctx *ctx) | vm_vcpu_pause(struct vmctx *ctx) | |||||||||
Not Done Inline ActionsThis function should return value and vm_checkpoint() should check that value. gusev.vitaliy_gmail.com: This function should return value and vm_checkpoint() should check that value. | ||||||||||
{ | { | |||||||||
int err; | ||||||||||
pthread_mutex_lock(&vcpu_lock); | pthread_mutex_lock(&vcpu_lock); | |||||||||
checkpoint_active = true; | checkpoint_active = true; | |||||||||
vm_suspend_cpu(ctx, -1); | err = vm_suspend_cpu(ctx, -1); | |||||||||
if (err != 0) { | ||||||||||
fprintf(stderr, "%s: Could not suspend vcpus\r\n", __func__); | ||||||||||
Not Done Inline Actions
markj: | ||||||||||
Not Done Inline ActionsEPRINTLN() is preferred for new code, which handles raw mode; see bhyve/debug.h rew: EPRINTLN() is preferred for new code, which handles raw mode; see bhyve/debug.h | ||||||||||
pthread_mutex_unlock(&vcpu_lock); | ||||||||||
return; | ||||||||||
} | ||||||||||
while (CPU_CMP(&vcpus_active, &vcpus_suspended) != 0) | while (CPU_CMP(&vcpus_active, &vcpus_suspended) != 0) | |||||||||
pthread_cond_wait(&vcpus_idle, &vcpu_lock); | pthread_cond_wait(&vcpus_idle, &vcpu_lock); | |||||||||
pthread_mutex_unlock(&vcpu_lock); | pthread_mutex_unlock(&vcpu_lock); | |||||||||
} | } | |||||||||
static void | static void | |||||||||
vm_vcpu_resume(struct vmctx *ctx) | vm_vcpu_resume(struct vmctx *ctx) | |||||||||
{ | { | |||||||||
pthread_mutex_lock(&vcpu_lock); | pthread_mutex_lock(&vcpu_lock); | |||||||||
checkpoint_active = false; | checkpoint_active = false; | |||||||||
pthread_mutex_unlock(&vcpu_lock); | pthread_mutex_unlock(&vcpu_lock); | |||||||||
vm_resume_cpu(ctx, -1); | vm_resume_cpu(ctx, -1); | |||||||||
pthread_cond_broadcast(&vcpus_can_run); | pthread_cond_broadcast(&vcpus_can_run); | |||||||||
} | } | |||||||||
#ifndef WITHOUT_CAPSICUM | ||||||||||
#define DESTROY(vm, ch, err, LABEL) \ | ||||||||||
do { \ | ||||||||||
Not Done Inline ActionsI suggest remove this function and vm-destroy usage. Leave vm state as is on suspend. gusev.vitaliy_gmail.com: I suggest remove this function and vm-destroy usage. Leave vm state as is on suspend. | ||||||||||
Not Done Inline ActionsCan you give more details about how exactly it would be better to approach the usage of vm_destroy? If I only remove function call then resources will not be freed and the guest will be stopped due to the exit call afterwards and a new guest can't be opened or restored until a vm_destroy call is requested through bhyvectl and if I don't use exit as well, wouldn't this remove the "suspend" functionality and allow only "checkpoint" functionality? ionut.mihalache1506_gmail.com: Can you give more details about how exactly it would be better to approach the usage of… | ||||||||||
cap_channel_t *capsysctl = NULL; \ | ||||||||||
char *name = "hw.vmm.destroy"; \ | ||||||||||
void *limit; \ | ||||||||||
\ | ||||||||||
/* Create capability to the system.sysctl service with Casper. */ \ | ||||||||||
capsysctl = cap_service_open(ch, "system.sysctl"); \ | ||||||||||
Not Done Inline ActionsI see that casper that you added is really needed only for destroying vm environment via sysctl. But what if leave it as I and do not destroy vm env? This behaviour could be similar as after "halt" in VM. gusev.vitaliy_gmail.com: I see that casper that you added is really needed only for destroying vm environment via sysctl. | ||||||||||
Not Done Inline Actions*leave it as is and do not destroy vm env on suspend* gusev.vitaliy_gmail.com: *leave it as is and do not destroy vm env on suspend* | ||||||||||
Not Done Inline ActionsThis is the wrong time to be creating a casper service. All services should be started at initialization time, after which the "casper capability" should be closed. markj: This is the wrong time to be creating a casper service. All services should be started at… | ||||||||||
Not Done Inline ActionsSorry, if user does "halt" inside VM, VM context is not destroyed when bhyve process ends. gusev.vitaliy_gmail.com: Sorry, if user does "halt" inside VM, VM context is not destroyed when bhyve process ends. | ||||||||||
if (capsysctl == NULL) \ | ||||||||||
fprintf(stderr, "%s: Unable to open system.sysctl service", __func__); \ | ||||||||||
\ | ||||||||||
cap_close(ch); \ | ||||||||||
\ | ||||||||||
/* Create limit for one MIB with write access only. */ \ | ||||||||||
limit = cap_sysctl_limit_init(capsysctl); \ | ||||||||||
(void)cap_sysctl_limit_name(limit, name, CAP_SYSCTL_WRITE); \ | ||||||||||
\ | ||||||||||
/* Limit system.sysctl. */ \ | ||||||||||
if (cap_sysctl_limit(limit) < 0) \ | ||||||||||
fprintf(stderr, "%s: Unable to set limits", __func__); \ | ||||||||||
\ | ||||||||||
err = cap_sysctlbyname(capsysctl, name, NULL, NULL, (vm), strlen((vm))); \ | ||||||||||
\ | ||||||||||
cap_close(capsysctl); \ | ||||||||||
if (err != 0) { \ | ||||||||||
fprintf(stderr, "%s: err is %d\r\n", __func__, errno); \ | ||||||||||
goto LABEL; \ | ||||||||||
} \ | ||||||||||
} while(0) | ||||||||||
#endif | ||||||||||
Not Done Inline ActionsThis doesn't need to be a macro, please make it a regular subroutine. markj: This doesn't need to be a macro, please make it a regular subroutine. | ||||||||||
static int | static int | |||||||||
vm_checkpoint(struct vmctx *ctx, const char *checkpoint_file, bool stop_vm) | vm_checkpoint(struct vmctx *ctx, char *checkpoint_file, cap_channel_t *chn, bool stop_vm) | |||||||||
{ | { | |||||||||
int fd_checkpoint = 0, kdata_fd = 0; | int fd_checkpoint = 0, kdata_fd = 0, meta_fd = 0; | |||||||||
Not Done Inline ActionsBTW, usage 0 is wrong here, cleanup path also should be corrected with (fd_checkpoint >= 0) condition. gusev.vitaliy_gmail.com: BTW, usage 0 is wrong here, cleanup path also should be corrected with (fd_checkpoint >= 0)… | ||||||||||
int ret = 0; | int ret = 0; | |||||||||
int error = 0; | int error = 0; | |||||||||
size_t memsz; | size_t memsz; | |||||||||
xo_handle_t *xop = NULL; | xo_handle_t *xop = NULL; | |||||||||
char *meta_filename = NULL; | char *meta_filename = NULL; | |||||||||
char *kdata_filename = NULL; | char *kdata_filename = NULL; | |||||||||
FILE *meta_file = NULL; | FILE *meta_file = NULL; | |||||||||
char vmname[MAX_VMNAME]; | ||||||||||
kdata_filename = strcat_extension(checkpoint_file, ".kern"); | kdata_filename = strcat_extension(checkpoint_file, ".kern"); | |||||||||
if (kdata_filename == NULL) { | if (kdata_filename == NULL) { | |||||||||
fprintf(stderr, "Failed to construct kernel data filename.\n"); | fprintf(stderr, "Failed to construct kernel data filename.\n"); | |||||||||
return (-1); | return (-1); | |||||||||
} | } | |||||||||
kdata_fd = open(kdata_filename, O_WRONLY | O_CREAT | O_TRUNC, 0700); | kdata_fd = openat(cdir_fd, kdata_filename, O_WRONLY | O_CREAT | O_TRUNC, 0700); | |||||||||
Not Done Inline ActionsBTW, why do permission masks (0700) have execute bit here? gusev.vitaliy_gmail.com: BTW, why do permission masks (0700) have execute bit here? | ||||||||||
if (kdata_fd < 0) { | if (kdata_fd < 0) { | |||||||||
perror("Failed to open kernel data snapshot file."); | perror("Failed to open kernel data snapshot file."); | |||||||||
error = -1; | error = -1; | |||||||||
goto done; | goto done; | |||||||||
} | } | |||||||||
fd_checkpoint = open(checkpoint_file, O_RDWR | O_CREAT | O_TRUNC, 0700); | fd_checkpoint = openat(cdir_fd, checkpoint_file, O_RDWR | O_CREAT | O_TRUNC, 0700); | |||||||||
if (fd_checkpoint < 0) { | if (fd_checkpoint < 0) { | |||||||||
perror("Failed to create checkpoint file"); | perror("Failed to create checkpoint file"); | |||||||||
error = -1; | error = -1; | |||||||||
goto done; | goto done; | |||||||||
} | } | |||||||||
meta_filename = strcat_extension(checkpoint_file, ".meta"); | meta_filename = strcat_extension(checkpoint_file, ".meta"); | |||||||||
if (meta_filename == NULL) { | if (meta_filename == NULL) { | |||||||||
fprintf(stderr, "Failed to construct vm metadata filename.\n"); | fprintf(stderr, "Failed to construct vm metadata filename.\n"); | |||||||||
goto done; | goto done; | |||||||||
} | } | |||||||||
meta_file = fopen(meta_filename, "w"); | meta_fd = openat(cdir_fd, meta_filename, O_WRONLY | O_CREAT | O_TRUNC, 0700); | |||||||||
if (meta_fd < 0) { | ||||||||||
perror("Failed to open vm metadata snapshot file descriptor."); | ||||||||||
goto done; | ||||||||||
} | ||||||||||
meta_file = fdopen(meta_fd, "w"); | ||||||||||
if (meta_file == NULL) { | if (meta_file == NULL) { | |||||||||
perror("Failed to open vm metadata snapshot file."); | perror("Failed to open vm metadata snapshot file."); | |||||||||
goto done; | goto done; | |||||||||
Not Done Inline ActionsPotential meta_fd leak here. It should be closed on fdopen() failure. gusev.vitaliy_gmail.com: Potential meta_fd leak here. It should be closed on fdopen() failure. | ||||||||||
} | } | |||||||||
xop = xo_create_to_file(meta_file, XO_STYLE_JSON, XOF_PRETTY); | xop = xo_create_to_file(meta_file, XO_STYLE_JSON, XOF_PRETTY); | |||||||||
if (xop == NULL) { | if (xop == NULL) { | |||||||||
perror("Failed to get libxo handle on metadata file."); | perror("Failed to get libxo handle on metadata file."); | |||||||||
goto done; | goto done; | |||||||||
} | } | |||||||||
vm_vcpu_pause(ctx); | vm_vcpu_pause(ctx); | |||||||||
ret = vm_pause_user_devs(ctx); | ret = vm_pause_user_devs(ctx); | |||||||||
if (ret != 0) { | if (ret != 0) { | |||||||||
fprintf(stderr, "Could not pause devices\r\n"); | fprintf(stderr, "Could not pause devices\r\n"); | |||||||||
error = ret; | error = ret; | |||||||||
goto done; | goto done; | |||||||||
#ifndef WITHOUT_CAPSICUM | ||||||||||
if (cdir_fd > 0) | ||||||||||
close(cdir_fd); | ||||||||||
#endif | ||||||||||
} | } | |||||||||
Not Done Inline ActionsThe brace looks like it's in the wrong place. The subsequent revisions modify this section some more, and finally the close() is removed again in the last revision, so this is just noise. markj: The brace looks like it's in the wrong place. The subsequent revisions modify this section some… | ||||||||||
memsz = vm_snapshot_mem(ctx, fd_checkpoint, 0, true); | memsz = vm_snapshot_mem(ctx, fd_checkpoint, 0, true); | |||||||||
if (memsz == 0) { | if (memsz == 0) { | |||||||||
perror("Could not write guest memory to file"); | perror("Could not write guest memory to file"); | |||||||||
error = -1; | error = -1; | |||||||||
goto done; | goto done; | |||||||||
} | } | |||||||||
ret = vm_snapshot_basic_metadata(ctx, xop, memsz); | ret = vm_snapshot_basic_metadata(ctx, xop, memsz); | |||||||||
if (ret != 0) { | if (ret != 0) { | |||||||||
fprintf(stderr, "Failed to snapshot vm basic metadata.\n"); | fprintf(stderr, "Failed to snapshot vm basic metadata.\n"); | |||||||||
error = -1; | error = -1; | |||||||||
goto done; | goto done; | |||||||||
} | } | |||||||||
ret = vm_snapshot_kern_structs(ctx, kdata_fd, xop); | ret = vm_snapshot_kern_structs(ctx, kdata_fd, xop); | |||||||||
if (ret != 0) { | if (ret != 0) { | |||||||||
fprintf(stderr, "Failed to snapshot vm kernel data.\n"); | fprintf(stderr, "Failed to snapshot vm kernel data.\n"); | |||||||||
error = -1; | error = -1; | |||||||||
goto done; | goto done; | |||||||||
} | } | |||||||||
ret = vm_snapshot_user_devs(ctx, kdata_fd, xop); | ret = vm_snapshot_user_devs(ctx, kdata_fd, xop); | |||||||||
if (ret != 0) { | if (ret != 0) { | |||||||||
fprintf(stderr, "Failed to snapshot device state.\n"); | fprintf(stderr, "Failed to snapshot device state.\n"); | |||||||||
error = -1; | error = -1; | |||||||||
goto done; | goto done; | |||||||||
} | } | |||||||||
xo_finish_h(xop); | xo_finish_h(xop); | |||||||||
if (stop_vm) { | if (stop_vm) { | |||||||||
if (chn != NULL) { | ||||||||||
error = vm_get_name(ctx, vmname, MAX_VMNAME - 1); | ||||||||||
if (error != 0) { | ||||||||||
fprintf(stderr, "%s: Failed to get VM name", __func__); | ||||||||||
Not Done Inline Actions
markj: | ||||||||||
goto done; | ||||||||||
} | ||||||||||
DESTROY(vmname, chn, error, done); | ||||||||||
free(ctx); | ||||||||||
} else | ||||||||||
vm_destroy(ctx); | vm_destroy(ctx); | |||||||||
exit(0); | exit(0); | |||||||||
} | } | |||||||||
done: | done: | |||||||||
ret = vm_resume_user_devs(ctx); | ret = vm_resume_user_devs(ctx); | |||||||||
if (ret != 0) | if (ret != 0) | |||||||||
fprintf(stderr, "Could not resume devices\r\n"); | fprintf(stderr, "Could not resume devices\r\n"); | |||||||||
vm_vcpu_resume(ctx); | vm_vcpu_resume(ctx); | |||||||||
if (fd_checkpoint > 0) | if (fd_checkpoint > 0) | |||||||||
close(fd_checkpoint); | close(fd_checkpoint); | |||||||||
if (meta_filename != NULL) | if (meta_filename != NULL) | |||||||||
free(meta_filename); | free(meta_filename); | |||||||||
if (kdata_filename != NULL) | if (kdata_filename != NULL) | |||||||||
free(kdata_filename); | free(kdata_filename); | |||||||||
if (xop != NULL) | if (xop != NULL) | |||||||||
xo_destroy(xop); | xo_destroy(xop); | |||||||||
if (meta_file != NULL) | if (meta_file != NULL) | |||||||||
fclose(meta_file); | fclose(meta_file); | |||||||||
if (kdata_fd > 0) | if (kdata_fd > 0) | |||||||||
close(kdata_fd); | close(kdata_fd); | |||||||||
#ifndef WITHOUT_CAPSICUM | ||||||||||
if (cdir_fd > 0) | ||||||||||
Not Done Inline ActionsWhy cdir_fd is closed here? It should persist cross "checkpoint" or "suspend" calls. And using it under "ifdef WITHOUT_CAPSICUM" is also wrong. gusev.vitaliy_gmail.com: Why cdir_fd is closed here? It should persist cross "checkpoint" or "suspend" calls. And using… | ||||||||||
close(cdir_fd); | ||||||||||
#endif | ||||||||||
return (error); | return (error); | |||||||||
} | } | |||||||||
static int | int | |||||||||
handle_message(struct vmctx *ctx, nvlist_t *nvl) | handle_message(struct vmctx *ctx, nvlist_t *nvl, cap_channel_t *chn) | |||||||||
{ | { | |||||||||
int err; | int err; | |||||||||
const char *cmd; | const char *cmd; | |||||||||
if (!nvlist_exists_string(nvl, "cmd")) | if (!nvlist_exists_string(nvl, "cmd")) | |||||||||
return (-1); | return (-1); | |||||||||
cmd = nvlist_get_string(nvl, "cmd"); | cmd = nvlist_get_string(nvl, "cmd"); | |||||||||
if (strcmp(cmd, "checkpoint") == 0) { | if (strcmp(cmd, "checkpoint") == 0) { | |||||||||
if (!nvlist_exists_string(nvl, "filename") || | if (!nvlist_exists_string(nvl, "filename") || | |||||||||
!nvlist_exists_bool(nvl, "suspend")) | !nvlist_exists_bool(nvl, "suspend")) | |||||||||
err = -1; | err = -1; | |||||||||
else | else | |||||||||
err = vm_checkpoint(ctx, nvlist_get_string(nvl, "filename"), | err = vm_checkpoint(ctx, nvlist_get_string(nvl, "filename"), | |||||||||
nvlist_get_bool(nvl, "suspend")); | chn, nvlist_get_bool(nvl, "suspend")); | |||||||||
} else { | } else { | |||||||||
EPRINTLN("Unrecognized checkpoint operation\n"); | EPRINTLN("Unrecognized checkpoint operation\n"); | |||||||||
err = -1; | err = -1; | |||||||||
} | } | |||||||||
if (err != 0) | if (err != 0) | |||||||||
EPRINTLN("Unable to perform the requested operation\n"); | EPRINTLN("Unable to perform the requested operation\n"); | |||||||||
nvlist_destroy(nvl); | nvlist_destroy(nvl); | |||||||||
return (err); | return (err); | |||||||||
} | } | |||||||||
/* | /* | |||||||||
* Listen for commands from bhyvectl | * Listen for commands from bhyvectl | |||||||||
*/ | */ | |||||||||
void * | void * | |||||||||
checkpoint_thread(void *param) | checkpoint_thread(void *param) | |||||||||
{ | { | |||||||||
struct checkpoint_thread_info *thread_info; | struct checkpoint_thread_info *thread_info; | |||||||||
nvlist_t *nvl; | nvlist_t *nvl; | |||||||||
pthread_set_name_np(pthread_self(), "checkpoint thread"); | pthread_set_name_np(pthread_self(), "checkpoint thread"); | |||||||||
thread_info = (struct checkpoint_thread_info *)param; | thread_info = (struct checkpoint_thread_info *)param; | |||||||||
for (;;) { | for (;;) { | |||||||||
nvl = nvlist_recv(thread_info->socket_fd, 0); | n = recvfrom(thread_info->socket_fd, &imsg, sizeof(imsg), 0, NULL, 0); | |||||||||
if (nvl != NULL) | ||||||||||
handle_message(thread_info->ctx, nvl); | /* | |||||||||
* slight sanity check: see if there's enough data to at | ||||||||||
* least determine the type of message. | ||||||||||
*/ | ||||||||||
if (n >= sizeof(imsg.code)) | ||||||||||
handle_message(&imsg, thread_info->ctx, thread_info->channel); | ||||||||||
else | else | |||||||||
EPRINTLN("nvlist_recv() failed: %s", strerror(errno)); | EPRINTLN("nvlist_recv() failed: %s", strerror(errno)); | |||||||||
} | } | |||||||||
return (NULL); | return (NULL); | |||||||||
} | } | |||||||||
void | void | |||||||||
Not Done Inline ActionsWhy all below under ifdef is required? I guess it is excessive. gusev.vitaliy_gmail.com: Why all below under ifdef is required? I guess it is excessive. | ||||||||||
init_snapshot(void) | init_snapshot(void) | |||||||||
{ | { | |||||||||
int err; | int err; | |||||||||
err = pthread_mutex_init(&vcpu_lock, NULL); | err = pthread_mutex_init(&vcpu_lock, NULL); | |||||||||
if (err != 0) | if (err != 0) | |||||||||
errc(1, err, "checkpoint mutex init"); | errc(1, err, "checkpoint mutex init"); | |||||||||
err = pthread_cond_init(&vcpus_idle, NULL); | err = pthread_cond_init(&vcpus_idle, NULL); | |||||||||
if (err != 0) | if (err != 0) | |||||||||
errc(1, err, "checkpoint cv init (vcpus_idle)"); | errc(1, err, "checkpoint cv init (vcpus_idle)"); | |||||||||
err = pthread_cond_init(&vcpus_can_run, NULL); | err = pthread_cond_init(&vcpus_can_run, NULL); | |||||||||
if (err != 0) | if (err != 0) | |||||||||
errc(1, err, "checkpoint cv init (vcpus_can_run)"); | errc(1, err, "checkpoint cv init (vcpus_can_run)"); | |||||||||
} | } | |||||||||
#ifndef WITHOUT_CAPSICUM | ||||||||||
static void | ||||||||||
limit_control_socket(int s) | ||||||||||
{ | ||||||||||
cap_rights_t rights; | ||||||||||
cap_rights_init(&rights, CAP_BIND, CAP_READ); | ||||||||||
if (caph_rights_limit(s, &rights) == -1) | ||||||||||
errx(EX_OSERR, "Unable to apply rights for sandbox"); | ||||||||||
} | ||||||||||
static void | ||||||||||
limit_file_operations() | ||||||||||
Not Done Inline Actions
markj: | ||||||||||
{ | ||||||||||
cap_rights_t rights; | ||||||||||
cap_rights_init(&rights, CAP_LOOKUP, CAP_FTRUNCATE, CAP_PWRITE, CAP_PREAD, CAP_FCNTL, CAP_CREATE); | ||||||||||
if (caph_rights_limit(cdir_fd, &rights) == -1) | ||||||||||
errx(EX_OSERR, "Unable to apply rights for sandbox"); | ||||||||||
} | ||||||||||
#endif | ||||||||||
/* | /* | |||||||||
* Create the listening socket for IPC with bhyvectl | * Create the listening socket for IPC with bhyvectl | |||||||||
*/ | */ | |||||||||
Not Done Inline ActionsThe casper channel should be closed after initialization is complete. Otherwise Capsicum is not going to accomplish anything, since malicious code can always request new capabilities via new casper services. markj: The casper channel should be closed after initialization is complete. Otherwise Capsicum is not… | ||||||||||
int | int | |||||||||
init_checkpoint_thread(struct vmctx *ctx) | init_checkpoint_thread(struct vmctx *ctx, char *ckp_path, cap_channel_t *chn) | |||||||||
{ | { | |||||||||
struct checkpoint_thread_info *checkpoint_info = NULL; | struct checkpoint_thread_info *checkpoint_info = NULL; | |||||||||
struct sockaddr_un addr; | struct sockaddr_un addr; | |||||||||
int socket_fd; | int socket_fd; | |||||||||
pthread_t checkpoint_pthread; | pthread_t checkpoint_pthread; | |||||||||
char vmname_buf[MAX_VMNAME]; | char vmname_buf[MAX_VMNAME]; | |||||||||
int err; | int ret, err = 0; | |||||||||
memset(&addr, 0, sizeof(addr)); | memset(&addr, 0, sizeof(addr)); | |||||||||
socket_fd = socket(PF_UNIX, SOCK_DGRAM, 0); | socket_fd = socket(PF_UNIX, SOCK_DGRAM, 0); | |||||||||
if (socket_fd < 0) { | if (socket_fd < 0) { | |||||||||
EPRINTLN("Socket creation failed: %s", strerror(errno)); | EPRINTLN("Socket creation failed: %s", strerror(errno)); | |||||||||
err = -1; | err = -1; | |||||||||
goto fail; | goto fail; | |||||||||
} | } | |||||||||
if (ckp_path != NULL) { | ||||||||||
Not Done Inline ActionsOriginal code works with capsicum w/o any changing, so this function is not required. gusev.vitaliy_gmail.com: Original code works with capsicum w/o any changing, so this function is not required. | ||||||||||
cdir_fd = open(ckp_path, O_RDONLY | O_DIRECTORY); | ||||||||||
if (cdir_fd < 0) { | ||||||||||
perror("Failed to open working directory."); | ||||||||||
err = -1; | ||||||||||
goto fail; | ||||||||||
} | ||||||||||
limit_control_socket(socket_fd); | ||||||||||
limit_file_operations(); | ||||||||||
} | ||||||||||
addr.sun_family = AF_UNIX; | addr.sun_family = AF_UNIX; | |||||||||
err = vm_get_name(ctx, vmname_buf, MAX_VMNAME - 1); | err = vm_get_name(ctx, vmname_buf, MAX_VMNAME - 1); | |||||||||
if (err != 0) { | if (err != 0) { | |||||||||
perror("Failed to get VM name"); | perror("Failed to get VM name"); | |||||||||
goto fail; | goto fail; | |||||||||
} | } | |||||||||
snprintf(addr.sun_path, sizeof(addr.sun_path), "%s%s", | snprintf(addr.sun_path, sizeof(addr.sun_path), "%s%s", | |||||||||
BHYVE_RUN_DIR, vmname_buf); | BHYVE_RUN_DIR, vmname_buf); | |||||||||
addr.sun_len = SUN_LEN(&addr); | addr.sun_len = SUN_LEN(&addr); | |||||||||
unlink(addr.sun_path); | unlink(addr.sun_path); | |||||||||
if (bind(socket_fd, (struct sockaddr *)&addr, addr.sun_len) != 0) { | if (bind(socket_fd, (struct sockaddr *)&addr, addr.sun_len) != 0) { | |||||||||
EPRINTLN("Failed to bind socket \"%s\": %s\n", | EPRINTLN("Failed to bind socket \"%s\": %s\n", | |||||||||
addr.sun_path, strerror(errno)); | addr.sun_path, strerror(errno)); | |||||||||
err = -1; | err = -1; | |||||||||
goto fail; | goto fail; | |||||||||
} | } | |||||||||
checkpoint_info = calloc(1, sizeof(*checkpoint_info)); | checkpoint_info = calloc(1, sizeof(*checkpoint_info)); | |||||||||
checkpoint_info->ctx = ctx; | checkpoint_info->ctx = ctx; | |||||||||
checkpoint_info->socket_fd = socket_fd; | checkpoint_info->socket_fd = socket_fd; | |||||||||
checkpoint_info->channel = chn; | ||||||||||
err = pthread_create(&checkpoint_pthread, NULL, checkpoint_thread, | err = pthread_create(&checkpoint_pthread, NULL, checkpoint_thread, | |||||||||
checkpoint_info); | checkpoint_info); | |||||||||
if (err != 0) | if (err != 0) | |||||||||
goto fail; | goto fail; | |||||||||
return (0); | return (0); | |||||||||
fail: | fail: | |||||||||
▲ Show 20 Lines • Show All 140 Lines • Show Last 20 Lines |
why 'capsysctl' is required?