Index: usr.sbin/bhyve/Makefile =================================================================== --- usr.sbin/bhyve/Makefile +++ usr.sbin/bhyve/Makefile @@ -75,7 +75,7 @@ iov.c .if ${MK_BHYVE_SNAPSHOT} != "no" -SRCS+= snapshot.c +SRCS+= ipc.c snapshot.c .endif CFLAGS.kernemu_dev.c+= -I${SRCTOP}/sys/amd64 Index: usr.sbin/bhyve/bhyverun.c =================================================================== --- usr.sbin/bhyve/bhyverun.c +++ usr.sbin/bhyve/bhyverun.c @@ -101,6 +101,7 @@ #include "pci_lpc.h" #include "smbiostbl.h" #ifdef BHYVE_SNAPSHOT +#include "ipc.h" #include "snapshot.h" #endif #include "xmsr.h" @@ -1402,11 +1403,12 @@ if (restore_file != NULL) destroy_restore_state(&rstate); - /* - * checkpointing thread for communication with bhyvectl - */ - if (init_checkpoint_thread(ctx) < 0) - printf("Failed to start checkpoint thread!\r\n"); + /* initialize mutex/cond variables */ + init_snapshot(); + + /* open up IPC to bhyve */ + if (init_ipc(ctx) != 0) + fprintf(stderr, "Unable to open IPC to %s\n", vmname); if (restore_file != NULL) vm_restore_time(ctx); Index: usr.sbin/bhyve/ipc.h =================================================================== --- /dev/null +++ usr.sbin/bhyve/ipc.h @@ -0,0 +1,34 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * + * Copyright (c) 2021 Robert Wing + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + */ + +#ifndef _IPC_H_ +#define _IPC_H_ + +int init_ipc(struct vmctx *); + +#endif /* _IPC_H_ */ Index: usr.sbin/bhyve/ipc.c =================================================================== --- /dev/null +++ usr.sbin/bhyve/ipc.c @@ -0,0 +1,194 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * + * Copyright (c) 2016 Flavius Anton + * Copyright (c) 2016 Mihai Tiganus + * Copyright (c) 2016-2019 Mihai Carabas + * Copyright (c) 2017-2019 Darius Mihai + * Copyright (c) 2017-2019 Elena Mihailescu + * Copyright (c) 2018-2019 Sergiu Weisz + * All rights reserved. + * The bhyve-snapshot feature was developed under sponsorships + * from Matthew Grooms. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "ipc.h" +#include "snapshot.h" + +#define MAX_MSG_SIZE 1024 +#define BHYVE_RUN_DIR "/var/run/bhyve" + +struct checkpoint_thread_info { + struct vmctx *ctx; + int socket_fd; +}; + +static int +get_checkpoint_msg(int conn_fd, struct vmctx *ctx) +{ + unsigned char buf[MAX_MSG_SIZE]; + struct checkpoint_op *checkpoint_op; + int err, len, recv_len, total_recv; + + len = sizeof(struct checkpoint_op); /* expected length */ + total_recv = 0; + while ((recv_len = recv(conn_fd, buf + total_recv, len - total_recv, 0)) > 0) { + total_recv += recv_len; + } + if (recv_len < 0) { + perror("Error while receiving data from bhyvectl"); + err = -1; + goto done; + } + + checkpoint_op = (struct checkpoint_op *)buf; + switch (checkpoint_op->op) { + case START_CHECKPOINT: + err = vm_checkpoint(ctx, checkpoint_op->snapshot_filename, false); + break; + case START_SUSPEND: + err = vm_checkpoint(ctx, checkpoint_op->snapshot_filename, true); + break; + default: + fprintf(stderr, "Unrecognized message operation.\n"); + err = -1; + } + +done: + close(conn_fd); + return (err); +} + +/* + * Listen for commands from bhyvectl + */ +static void * +checkpoint_thread(void *param) +{ + struct checkpoint_thread_info *thread_info; + int conn_fd, ret; + + pthread_set_name_np(pthread_self(), "checkpoint thread"); + thread_info = (struct checkpoint_thread_info *)param; + + while ((conn_fd = accept(thread_info->socket_fd, NULL, NULL)) > -1) { + ret = get_checkpoint_msg(conn_fd, thread_info->ctx); + if (ret != 0) { + fprintf(stderr, "Failed to read message on checkpoint " + "socket. Retrying.\n"); + } + } + if (conn_fd < -1) { + perror("Failed to accept connection"); + } + + return (NULL); +} + + +/* + * Create the listening socket for IPC with bhyvectl + */ +int +init_ipc(struct vmctx *ctx) +{ + struct checkpoint_thread_info *checkpoint_info = NULL; + struct sockaddr_un addr; + int err, socket_fd; + pthread_t checkpoint_pthread; + char vmname_buf[MAX_SNAPSHOT_VMNAME]; + + memset(&addr, 0, sizeof(addr)); + + socket_fd = socket(PF_UNIX, SOCK_STREAM, 0); + if (socket_fd < 0) { + perror("Socket creation failed (IPC with bhyvectl"); + err = -1; + goto fail; + } + + err = mkdir(BHYVE_RUN_DIR, 0755); + if (err != 0 && errno != EEXIST) { + perror("Failed to create checkpoint runtime directory"); + goto fail; + } + + addr.sun_family = AF_UNIX; + + err = vm_get_name(ctx, vmname_buf, MAX_SNAPSHOT_VMNAME - 1); + if (err != 0) { + perror("Failed to get VM name"); + goto fail; + } + + snprintf(addr.sun_path, sizeof(addr.sun_path), "%s/%s", + BHYVE_RUN_DIR, vmname_buf); + addr.sun_len = SUN_LEN(&addr); + unlink(addr.sun_path); + + if (bind(socket_fd, (struct sockaddr *)&addr, addr.sun_len) != 0) { + perror("Failed to bind socket (IPC with bhyvectl)"); + err = -1; + goto fail; + } + + if (listen(socket_fd, 10) < 0) { + perror("Failed to listen on socket (IPC with bhyvectl)"); + err = -1; + goto fail; + } + + checkpoint_info = calloc(1, sizeof(*checkpoint_info)); + checkpoint_info->ctx = ctx; + checkpoint_info->socket_fd = socket_fd; + + err = pthread_create(&checkpoint_pthread, NULL, checkpoint_thread, + checkpoint_info); + if (err != 0) + goto fail; + + return (0); +fail: + free(checkpoint_info); + if (socket_fd > 0) + close(socket_fd); + unlink(addr.sun_path); + + return (err); +} Index: usr.sbin/bhyve/snapshot.h =================================================================== --- usr.sbin/bhyve/snapshot.h +++ usr.sbin/bhyve/snapshot.h @@ -57,11 +57,6 @@ ucl_object_t *meta_root_obj; }; -struct checkpoint_thread_info { - struct vmctx *ctx; - int socket_fd; -}; - typedef int (*vm_snapshot_dev_cb)(struct vm_snapshot_meta *); typedef int (*vm_pause_dev_cb) (struct vmctx *, const char *); typedef int (*vm_resume_dev_cb) (struct vmctx *, const char *); @@ -96,9 +91,8 @@ int vm_pause_user_devs(struct vmctx *ctx); int vm_resume_user_devs(struct vmctx *ctx); -int get_checkpoint_msg(int conn_fd, struct vmctx *ctx); -void *checkpoint_thread(void *param); -int init_checkpoint_thread(struct vmctx *ctx); +int vm_checkpoint(struct vmctx *ctx, char *checkpoint_file, bool stop_vm); +int init_snapshot(void); int load_restore_file(const char *filename, struct restore_state *rstate); Index: usr.sbin/bhyve/snapshot.c =================================================================== --- usr.sbin/bhyve/snapshot.c +++ usr.sbin/bhyve/snapshot.c @@ -115,12 +115,9 @@ #define SNAPSHOT_CHUNK (4 * MB) #define PROG_BUF_SZ (8192) -#define BHYVE_RUN_DIR "/var/run/bhyve" -#define CHECKPOINT_RUN_DIR BHYVE_RUN_DIR "/checkpoint" +/* XXX: could use MAX_SNAPSHOT_VMNAME from vmmapi.h */ #define MAX_VMNAME 100 -#define MAX_MSG_SIZE 1024 - #define SNAPSHOT_BUFFER_SIZE (20 * MB) #define JSON_STRUCT_ARR_KEY "structs" @@ -1327,7 +1324,7 @@ pthread_cond_broadcast(&vcpus_can_run); } -static int +int vm_checkpoint(struct vmctx *ctx, char *checkpoint_file, bool stop_vm) { int fd_checkpoint = 0, kdata_fd = 0; @@ -1443,103 +1440,10 @@ return (error); } -int -get_checkpoint_msg(int conn_fd, struct vmctx *ctx) -{ - unsigned char buf[MAX_MSG_SIZE]; - struct checkpoint_op *checkpoint_op; - int len, recv_len, total_recv = 0; - int err = 0; - - len = sizeof(struct checkpoint_op); /* expected length */ - while ((recv_len = recv(conn_fd, buf + total_recv, len - total_recv, 0)) > 0) { - total_recv += recv_len; - } - if (recv_len < 0) { - perror("Error while receiving data from bhyvectl"); - err = -1; - goto done; - } - - checkpoint_op = (struct checkpoint_op *)buf; - switch (checkpoint_op->op) { - case START_CHECKPOINT: - err = vm_checkpoint(ctx, checkpoint_op->snapshot_filename, false); - break; - case START_SUSPEND: - err = vm_checkpoint(ctx, checkpoint_op->snapshot_filename, true); - break; - default: - fprintf(stderr, "Unrecognized checkpoint operation.\n"); - err = -1; - } - -done: - close(conn_fd); - return (err); -} - -/* - * Listen for commands from bhyvectl - */ -void * -checkpoint_thread(void *param) -{ - struct checkpoint_thread_info *thread_info; - int conn_fd, ret; - - pthread_set_name_np(pthread_self(), "checkpoint thread"); - thread_info = (struct checkpoint_thread_info *)param; - - while ((conn_fd = accept(thread_info->socket_fd, NULL, NULL)) > -1) { - ret = get_checkpoint_msg(conn_fd, thread_info->ctx); - if (ret != 0) { - fprintf(stderr, "Failed to read message on checkpoint " - "socket. Retrying.\n"); - } - } - if (conn_fd < -1) { - perror("Failed to accept connection"); - } - - return (NULL); -} - -/* - * Create directory tree to store runtime specific information: - * i.e. UNIX sockets for IPC with bhyvectl. - */ -static int -make_checkpoint_dir(void) +int init_snapshot(void) { int err; - err = mkdir(BHYVE_RUN_DIR, 0755); - if (err < 0 && errno != EEXIST) - return (err); - - err = mkdir(CHECKPOINT_RUN_DIR, 0755); - if (err < 0 && errno != EEXIST) - return (err); - - return 0; -} - -/* - * Create the listening socket for IPC with bhyvectl - */ -int -init_checkpoint_thread(struct vmctx *ctx) -{ - struct checkpoint_thread_info *checkpoint_info = NULL; - struct sockaddr_un addr; - int socket_fd; - pthread_t checkpoint_pthread; - char vmname_buf[MAX_VMNAME]; - int ret, err = 0; - - memset(&addr, 0, sizeof(addr)); - err = pthread_mutex_init(&vcpu_lock, NULL); if (err != 0) errc(1, err, "checkpoint mutex init"); @@ -1549,63 +1453,7 @@ if (err != 0) errc(1, err, "checkpoint cv init"); - socket_fd = socket(PF_UNIX, SOCK_STREAM, 0); - if (socket_fd < 0) { - perror("Socket creation failed (IPC with bhyvectl"); - err = -1; - goto fail; - } - - err = make_checkpoint_dir(); - if (err < 0) { - perror("Failed to create checkpoint runtime directory"); - goto fail; - } - - addr.sun_family = AF_UNIX; - - err = vm_get_name(ctx, vmname_buf, MAX_VMNAME - 1); - if (err != 0) { - perror("Failed to get VM name"); - goto fail; - } - - snprintf(addr.sun_path, sizeof(addr.sun_path), "%s/%s", - CHECKPOINT_RUN_DIR, vmname_buf); - addr.sun_len = SUN_LEN(&addr); - unlink(addr.sun_path); - - if (bind(socket_fd, (struct sockaddr *)&addr, addr.sun_len) != 0) { - perror("Failed to bind socket (IPC with bhyvectl)"); - err = -1; - goto fail; - } - - if (listen(socket_fd, 10) < 0) { - perror("Failed to listen on socket (IPC with bhyvectl)"); - err = -1; - goto fail; - } - - checkpoint_info = calloc(1, sizeof(*checkpoint_info)); - checkpoint_info->ctx = ctx; - checkpoint_info->socket_fd = socket_fd; - - ret = pthread_create(&checkpoint_pthread, NULL, checkpoint_thread, - checkpoint_info); - if (ret < 0) { - err = ret; - goto fail; - } - return (0); -fail: - free(checkpoint_info); - if (socket_fd > 0) - close(socket_fd); - unlink(addr.sun_path); - - return (err); } void Index: usr.sbin/bhyvectl/bhyvectl.c =================================================================== --- usr.sbin/bhyvectl/bhyvectl.c +++ usr.sbin/bhyvectl/bhyvectl.c @@ -70,7 +70,7 @@ #define NO_ARG no_argument #define OPT_ARG optional_argument -#define CHECKPOINT_RUN_DIR "/var/run/bhyve/checkpoint" +#define BHYVE_RUN_DIR "/var/run/bhyve" #define MAX_VMNAME 100 static const char *progname; @@ -1704,7 +1704,7 @@ goto done; } - snprintf(addr.sun_path, sizeof(addr.sun_path), "%s/%s", CHECKPOINT_RUN_DIR, vmname_buf); + snprintf(addr.sun_path, sizeof(addr.sun_path), "%s/%s", BHYVE_RUN_DIR, vmname_buf); if (connect(socket_fd, (struct sockaddr *)&addr, sizeof(struct sockaddr_un)) != 0) {