Index: etc/mtree/BSD.var.dist =================================================================== --- etc/mtree/BSD.var.dist +++ etc/mtree/BSD.var.dist @@ -76,6 +76,8 @@ run dhclient .. + bhyve + .. ppp gname=network mode=0770 .. wpa_supplicant Index: lib/libvmmapi/vmmapi.h =================================================================== --- lib/libvmmapi/vmmapi.h +++ lib/libvmmapi/vmmapi.h @@ -258,16 +258,31 @@ #define MAX_SNAPSHOT_VMNAME 100 -enum checkpoint_opcodes { - START_CHECKPOINT = 0, - START_SUSPEND = 1, -}; - +/* Filename that will be used for save/restore */ struct checkpoint_op { - unsigned int op; char snapshot_filename[MAX_SNAPSHOT_VMNAME]; }; +/* Messages that a bhyve process understands. */ +enum ipc_opcode { + START_CHECKPOINT, + START_SUSPEND, +}; + +/* + * The type of message and associated data to + * send to a bhyve process. + */ +struct ipc_message { + enum ipc_opcode code; + union { + /* + * message specific structures + */ + struct checkpoint_op op; + } data; +}; + int vm_snapshot_req(struct vm_snapshot_meta *meta); int vm_restore_time(struct vmctx *ctx); Index: share/man/man7/hier.7 =================================================================== --- share/man/man7/hier.7 +++ share/man/man7/hier.7 @@ -835,6 +835,8 @@ system since it was booted .Pp .Bl -tag -width Fl -compact +.It Pa bhyve/ +bhyve vm unix domain socket. .It Pa ppp/ writable by the .Dq network Index: usr.sbin/bhyve/Makefile =================================================================== --- usr.sbin/bhyve/Makefile +++ usr.sbin/bhyve/Makefile @@ -75,7 +75,7 @@ iov.c .if ${MK_BHYVE_SNAPSHOT} != "no" -SRCS+= snapshot.c +SRCS+= ipc.c snapshot.c .endif CFLAGS.kernemu_dev.c+= -I${SRCTOP}/sys/amd64 Index: usr.sbin/bhyve/bhyverun.c =================================================================== --- usr.sbin/bhyve/bhyverun.c +++ usr.sbin/bhyve/bhyverun.c @@ -101,6 +101,7 @@ #include "pci_lpc.h" #include "smbiostbl.h" #ifdef BHYVE_SNAPSHOT +#include "ipc.h" #include "snapshot.h" #endif #include "xmsr.h" @@ -1402,11 +1403,12 @@ if (restore_file != NULL) destroy_restore_state(&rstate); - /* - * checkpointing thread for communication with bhyvectl - */ - if (init_checkpoint_thread(ctx) < 0) - printf("Failed to start checkpoint thread!\r\n"); + /* initialize mutex/cond variables */ + init_snapshot(); + + /* open up IPC to bhyve */ + if (init_ipc(ctx) != 0) + fprintf(stderr, "Unable to open IPC to %s\n", vmname); if (restore_file != NULL) vm_restore_time(ctx); Index: usr.sbin/bhyve/ipc.h =================================================================== --- /dev/null +++ usr.sbin/bhyve/ipc.h @@ -0,0 +1,34 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * + * Copyright (c) 2021 Robert Wing + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + */ + +#ifndef _IPC_H_ +#define _IPC_H_ + +int init_ipc(struct vmctx *); + +#endif /* _IPC_H_ */ Index: usr.sbin/bhyve/ipc.c =================================================================== --- /dev/null +++ usr.sbin/bhyve/ipc.c @@ -0,0 +1,193 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * + * Copyright (c) 2016 Flavius Anton + * Copyright (c) 2016 Mihai Tiganus + * Copyright (c) 2016-2019 Mihai Carabas + * Copyright (c) 2017-2019 Darius Mihai + * Copyright (c) 2017-2019 Elena Mihailescu + * Copyright (c) 2018-2019 Sergiu Weisz + * All rights reserved. + * The bhyve-snapshot feature was developed under sponsorships + * from Matthew Grooms. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "ipc.h" +#include "snapshot.h" + +#define BHYVE_RUN_DIR "/var/run/bhyve" + +struct ipc_thread_args { + struct vmctx *ctx; + int socket_fd; +}; + +static int +get_message(int conn_fd, struct vmctx *ctx) +{ + struct ipc_message imsg; + int err, len, recv_len, total_recv; + + /* The largest message expected */ + len = sizeof(imsg); + total_recv = 0; + while ((recv_len = recv(conn_fd, &imsg + total_recv, len - total_recv, 0)) > 0) + total_recv += recv_len; + + /* Didn't receive enough bytes to determine message type */ + if (recv_len < 0 || total_recv < sizeof(imsg.code)) { + perror("Error while receiving data from bhyvectl"); + close(conn_fd); + return (-1); + } + + switch (imsg.code) { + case START_CHECKPOINT: + err = vm_checkpoint(ctx, imsg.data.op.snapshot_filename, false); + break; + case START_SUSPEND: + err = vm_checkpoint(ctx, imsg.data.op.snapshot_filename, true); + break; + default: + fprintf(stderr, "Unrecognized message operation.\n"); + err = -1; + } + + close(conn_fd); + return (err); +} + +/* + * Listen for commands from bhyvectl + */ +static void * +ipc_thread(void *arg) +{ + struct ipc_thread_args *thread_args; + int conn_fd, ret; + + pthread_set_name_np(pthread_self(), "ipc thread"); + thread_args = (struct ipc_thread_args *)arg; + + while ((conn_fd = accept(thread_args->socket_fd, NULL, NULL)) > -1) { + ret = get_message(conn_fd, thread_args->ctx); + if (ret != 0) { + /* + * XXX: This error message is misleading, it's possible we could + * have read data off the socket, yet it was the handler + * function that exited with an error code. + */ + fprintf(stderr, "Failed to read message on ipc " + "socket. Retrying.\n"); + } + } + if (conn_fd < -1) + perror("Failed to accept connection"); + + return (NULL); +} + +/* + * Create the listening socket for IPC with bhyvectl + */ +int +init_ipc(struct vmctx *ctx) +{ + struct ipc_thread_args *thread_args; + struct sockaddr_un addr; + int err, socket_fd; + pthread_t ipc_pthread; + char vmname_buf[MAX_SNAPSHOT_VMNAME]; + + err = vm_get_name(ctx, vmname_buf, MAX_SNAPSHOT_VMNAME - 1); + if (err != 0) { + perror("Failed to get VM name"); + return (err); + } + + /* XXX: SOCK_DGRAM or SOCK_SEQPACKET, also in bhyvectl.c */ + socket_fd = socket(PF_UNIX, SOCK_STREAM, 0); + if (socket_fd < 0) { + perror("Socket creation failed (IPC with bhyvectl"); + return (-1); + } + + /* + * Note, BHYVE_RUN_DIR + vmname_buf should be < SUNPATHLEN (104) + * which allows for a vmname of 88 characters. + */ + memset(&addr, 0, sizeof(addr)); + addr.sun_family = AF_UNIX; + snprintf(addr.sun_path, sizeof(addr.sun_path), "%s/%s", + BHYVE_RUN_DIR, vmname_buf); + addr.sun_len = SUN_LEN(&addr); + unlink(addr.sun_path); + + if (bind(socket_fd, (struct sockaddr *)&addr, addr.sun_len) != 0) { + perror("Failed to bind socket (IPC with bhyvectl)"); + err = -1; + goto fail; + } + + if (listen(socket_fd, 10) < 0) { + perror("Failed to listen on socket (IPC with bhyvectl)"); + err = -1; + goto fail; + } + + thread_args = calloc(1, sizeof(*thread_args)); + if (thread_args == NULL) { + perror("calloc"); + err = -1; + goto fail; + } + thread_args->ctx = ctx; + thread_args->socket_fd = socket_fd; + + err = pthread_create(&ipc_pthread, NULL, ipc_thread, thread_args); + if (err == 0) + return (0); + + free(thread_args); +fail: + close(socket_fd); + unlink(addr.sun_path); + + return (err); +} Index: usr.sbin/bhyve/snapshot.h =================================================================== --- usr.sbin/bhyve/snapshot.h +++ usr.sbin/bhyve/snapshot.h @@ -57,11 +57,6 @@ ucl_object_t *meta_root_obj; }; -struct checkpoint_thread_info { - struct vmctx *ctx; - int socket_fd; -}; - typedef int (*vm_snapshot_dev_cb)(struct vm_snapshot_meta *); typedef int (*vm_pause_dev_cb) (struct vmctx *, const char *); typedef int (*vm_resume_dev_cb) (struct vmctx *, const char *); @@ -96,9 +91,8 @@ int vm_pause_user_devs(struct vmctx *ctx); int vm_resume_user_devs(struct vmctx *ctx); -int get_checkpoint_msg(int conn_fd, struct vmctx *ctx); -void *checkpoint_thread(void *param); -int init_checkpoint_thread(struct vmctx *ctx); +int vm_checkpoint(struct vmctx *ctx, char *checkpoint_file, bool stop_vm); +int init_snapshot(void); int load_restore_file(const char *filename, struct restore_state *rstate); Index: usr.sbin/bhyve/snapshot.c =================================================================== --- usr.sbin/bhyve/snapshot.c +++ usr.sbin/bhyve/snapshot.c @@ -115,12 +115,9 @@ #define SNAPSHOT_CHUNK (4 * MB) #define PROG_BUF_SZ (8192) -#define BHYVE_RUN_DIR "/var/run/bhyve" -#define CHECKPOINT_RUN_DIR BHYVE_RUN_DIR "/checkpoint" +/* XXX: could use MAX_SNAPSHOT_VMNAME from vmmapi.h */ #define MAX_VMNAME 100 -#define MAX_MSG_SIZE 1024 - #define SNAPSHOT_BUFFER_SIZE (20 * MB) #define JSON_STRUCT_ARR_KEY "structs" @@ -1327,7 +1324,7 @@ pthread_cond_broadcast(&vcpus_can_run); } -static int +int vm_checkpoint(struct vmctx *ctx, char *checkpoint_file, bool stop_vm) { int fd_checkpoint = 0, kdata_fd = 0; @@ -1444,102 +1441,10 @@ } int -get_checkpoint_msg(int conn_fd, struct vmctx *ctx) -{ - unsigned char buf[MAX_MSG_SIZE]; - struct checkpoint_op *checkpoint_op; - int len, recv_len, total_recv = 0; - int err = 0; - - len = sizeof(struct checkpoint_op); /* expected length */ - while ((recv_len = recv(conn_fd, buf + total_recv, len - total_recv, 0)) > 0) { - total_recv += recv_len; - } - if (recv_len < 0) { - perror("Error while receiving data from bhyvectl"); - err = -1; - goto done; - } - - checkpoint_op = (struct checkpoint_op *)buf; - switch (checkpoint_op->op) { - case START_CHECKPOINT: - err = vm_checkpoint(ctx, checkpoint_op->snapshot_filename, false); - break; - case START_SUSPEND: - err = vm_checkpoint(ctx, checkpoint_op->snapshot_filename, true); - break; - default: - fprintf(stderr, "Unrecognized checkpoint operation.\n"); - err = -1; - } - -done: - close(conn_fd); - return (err); -} - -/* - * Listen for commands from bhyvectl - */ -void * -checkpoint_thread(void *param) -{ - struct checkpoint_thread_info *thread_info; - int conn_fd, ret; - - pthread_set_name_np(pthread_self(), "checkpoint thread"); - thread_info = (struct checkpoint_thread_info *)param; - - while ((conn_fd = accept(thread_info->socket_fd, NULL, NULL)) > -1) { - ret = get_checkpoint_msg(conn_fd, thread_info->ctx); - if (ret != 0) { - fprintf(stderr, "Failed to read message on checkpoint " - "socket. Retrying.\n"); - } - } - if (conn_fd < -1) { - perror("Failed to accept connection"); - } - - return (NULL); -} - -/* - * Create directory tree to store runtime specific information: - * i.e. UNIX sockets for IPC with bhyvectl. - */ -static int -make_checkpoint_dir(void) +init_snapshot(void) { int err; - err = mkdir(BHYVE_RUN_DIR, 0755); - if (err < 0 && errno != EEXIST) - return (err); - - err = mkdir(CHECKPOINT_RUN_DIR, 0755); - if (err < 0 && errno != EEXIST) - return (err); - - return 0; -} - -/* - * Create the listening socket for IPC with bhyvectl - */ -int -init_checkpoint_thread(struct vmctx *ctx) -{ - struct checkpoint_thread_info *checkpoint_info = NULL; - struct sockaddr_un addr; - int socket_fd; - pthread_t checkpoint_pthread; - char vmname_buf[MAX_VMNAME]; - int ret, err = 0; - - memset(&addr, 0, sizeof(addr)); - err = pthread_mutex_init(&vcpu_lock, NULL); if (err != 0) errc(1, err, "checkpoint mutex init"); @@ -1549,63 +1454,7 @@ if (err != 0) errc(1, err, "checkpoint cv init"); - socket_fd = socket(PF_UNIX, SOCK_STREAM, 0); - if (socket_fd < 0) { - perror("Socket creation failed (IPC with bhyvectl"); - err = -1; - goto fail; - } - - err = make_checkpoint_dir(); - if (err < 0) { - perror("Failed to create checkpoint runtime directory"); - goto fail; - } - - addr.sun_family = AF_UNIX; - - err = vm_get_name(ctx, vmname_buf, MAX_VMNAME - 1); - if (err != 0) { - perror("Failed to get VM name"); - goto fail; - } - - snprintf(addr.sun_path, sizeof(addr.sun_path), "%s/%s", - CHECKPOINT_RUN_DIR, vmname_buf); - addr.sun_len = SUN_LEN(&addr); - unlink(addr.sun_path); - - if (bind(socket_fd, (struct sockaddr *)&addr, addr.sun_len) != 0) { - perror("Failed to bind socket (IPC with bhyvectl)"); - err = -1; - goto fail; - } - - if (listen(socket_fd, 10) < 0) { - perror("Failed to listen on socket (IPC with bhyvectl)"); - err = -1; - goto fail; - } - - checkpoint_info = calloc(1, sizeof(*checkpoint_info)); - checkpoint_info->ctx = ctx; - checkpoint_info->socket_fd = socket_fd; - - ret = pthread_create(&checkpoint_pthread, NULL, checkpoint_thread, - checkpoint_info); - if (ret < 0) { - err = ret; - goto fail; - } - return (0); -fail: - free(checkpoint_info); - if (socket_fd > 0) - close(socket_fd); - unlink(addr.sun_path); - - return (err); } void Index: usr.sbin/bhyvectl/bhyvectl.c =================================================================== --- usr.sbin/bhyvectl/bhyvectl.c +++ usr.sbin/bhyvectl/bhyvectl.c @@ -38,6 +38,7 @@ #include #include +#include #include #include #include @@ -70,7 +71,7 @@ #define NO_ARG no_argument #define OPT_ARG optional_argument -#define CHECKPOINT_RUN_DIR "/var/run/bhyve/checkpoint" +#define BHYVE_RUN_DIR "/var/run/bhyve" #define MAX_VMNAME 100 static const char *progname; @@ -1681,11 +1682,10 @@ #ifdef BHYVE_SNAPSHOT static int -send_checkpoint_op_req(struct vmctx *ctx, struct checkpoint_op *op) +send_message(struct vmctx *ctx, void *data, size_t len) { struct sockaddr_un addr; - int socket_fd, len, len_sent, total_sent; - int err = 0; + int socket_fd, len_sent, total_sent, err; char vmname_buf[MAX_VMNAME]; socket_fd = socket(PF_UNIX, SOCK_STREAM, 0); @@ -1704,7 +1704,7 @@ goto done; } - snprintf(addr.sun_path, sizeof(addr.sun_path), "%s/%s", CHECKPOINT_RUN_DIR, vmname_buf); + snprintf(addr.sun_path, sizeof(addr.sun_path), "%s/%s", BHYVE_RUN_DIR, vmname_buf); if (connect(socket_fd, (struct sockaddr *)&addr, sizeof(struct sockaddr_un)) != 0) { @@ -1713,14 +1713,12 @@ goto done; } - len = sizeof(*op); total_sent = 0; - while ((len_sent = send(socket_fd, (char *)op + total_sent, len - total_sent, 0)) > 0) { + while ((len_sent = send(socket_fd, (char *)data + total_sent, len - total_sent, 0)) > 0) total_sent += len_sent; - } if (len_sent < 0) { - perror("Failed to send checkpoint operation request"); + perror("Failed to send message to bhyve vm"); err = -1; } @@ -1731,27 +1729,18 @@ } static int -send_start_checkpoint(struct vmctx *ctx, const char *checkpoint_file) +snapshot_request(struct vmctx *ctx, const char *file, enum ipc_opcode code) { - struct checkpoint_op op; - - op.op = START_CHECKPOINT; - strncpy(op.snapshot_filename, checkpoint_file, MAX_SNAPSHOT_VMNAME); - op.snapshot_filename[MAX_SNAPSHOT_VMNAME - 1] = 0; + struct ipc_message imsg; + size_t length; - return (send_checkpoint_op_req(ctx, &op)); -} + imsg.code = code; -static int -send_start_suspend(struct vmctx *ctx, const char *suspend_file) -{ - struct checkpoint_op op; + strlcpy(imsg.data.op.snapshot_filename, file, MAX_SNAPSHOT_VMNAME); - op.op = START_SUSPEND; - strncpy(op.snapshot_filename, suspend_file, MAX_SNAPSHOT_VMNAME); - op.snapshot_filename[MAX_SNAPSHOT_VMNAME - 1] = 0; + length = offsetof(struct ipc_message, data) + sizeof(imsg.data.op); - return (send_checkpoint_op_req(ctx, &op)); + return (send_message(ctx, (void *)&imsg, length)); } #endif @@ -2413,10 +2402,10 @@ #ifdef BHYVE_SNAPSHOT if (!error && vm_checkpoint_opt) - error = send_start_checkpoint(ctx, checkpoint_file); + error = snapshot_request(ctx, checkpoint_file, START_CHECKPOINT); if (!error && vm_suspend_opt) - error = send_start_suspend(ctx, suspend_file); + error = snapshot_request(ctx, suspend_file, START_SUSPEND); #endif free (opts);