diff --git a/usr.sbin/bhyve/Makefile b/usr.sbin/bhyve/Makefile --- a/usr.sbin/bhyve/Makefile +++ b/usr.sbin/bhyve/Makefile @@ -34,6 +34,7 @@ kernemu_dev.c \ mem.c \ mevent.c \ + migration.c \ mptbl.c \ net_backends.c \ net_utils.c \ diff --git a/usr.sbin/bhyve/bhyve.8 b/usr.sbin/bhyve/bhyve.8 --- a/usr.sbin/bhyve/bhyve.8 +++ b/usr.sbin/bhyve/bhyve.8 @@ -71,6 +71,11 @@ .Op Fl o Ar var Ns Cm = Ns Ar value .Op Fl p Ar vcpu Ns Cm \&: Ns Ar hostcpu .Op Fl r Ar file +.Oo Fl R +.Sm off +.Ar host Op Cm \&, Ar port +.Sm on +.Oc .Sm off .Oo Fl s\~ .Ar slot Cm \&, Ar emulation Op Cm \&, Ar conf @@ -255,6 +260,13 @@ .Fl l options. The count of vCPUs and memory configuration are read from the snapshot. +.It Fl R Ar host Ns Op Cm \&, Ns Ar port +Receive migration from a source guest. +Await for a connection from +.Ar host +on the specified +.Ar port +and resume execution. The default migration port is 24983. .It Fl S Wire guest memory. .It Fl s Cm help diff --git a/usr.sbin/bhyve/bhyverun.c b/usr.sbin/bhyve/bhyverun.c --- a/usr.sbin/bhyve/bhyverun.c +++ b/usr.sbin/bhyve/bhyverun.c @@ -96,6 +96,9 @@ #include "kernemu_dev.h" #include "mem.h" #include "mevent.h" +#ifdef BHYVE_SNAPSHOT +#include "migration.h" +#endif #include "mptbl.h" #include "pci_emul.h" #include "pci_irq.h" @@ -244,6 +247,7 @@ " -p: pin 'vcpu' to 'hostcpu'\n" #ifdef BHYVE_SNAPSHOT " -r: path to checkpoint file\n" + " -R: the source vm host and port for migration\n" #endif " -S: guest memory cannot be swapped\n" " -s: PCI slot config\n" @@ -1239,10 +1243,12 @@ char *optstr; #ifdef BHYVE_SNAPSHOT char *restore_file; + char *receive_migration; struct restore_state rstate; int vcpu; restore_file = NULL; + receive_migration = NULL; #endif init_config(); @@ -1250,7 +1256,7 @@ progname = basename(argv[0]); #ifdef BHYVE_SNAPSHOT - optstr = "aehuwxACDHIPSWYk:o:p:G:c:s:m:l:K:U:r:"; + optstr = "aehuwxACDHIPSWYk:o:p:G:c:s:m:l:K:U:r:R:"; #else optstr = "aehuwxACDHIPSWYk:o:p:G:c:s:m:l:K:U:"; #endif @@ -1302,6 +1308,9 @@ case 'r': restore_file = optarg; break; + case 'R': + receive_migration = optarg; + break; #endif case 's': if (strncmp(optarg, "help", strlen(optarg)) == 0) { @@ -1515,7 +1524,25 @@ exit(1); } } -#endif + + if (receive_migration != NULL) { + if (vm_pause_user_devs(ctx) != 0) { + fprintf(stderr, "Failed to pause PCI device state.\n"); + exit(1); + } + + fprintf(stdout, "Starting the migration process...\r\n"); + if (receive_vm_migration(ctx, receive_migration) != 0) { + fprintf(stderr, "Failed to migrate the vm.\r\n"); + exit(1); + } + + if (vm_resume_user_devs(ctx) != 0) { + fprintf(stderr, "Failed to resume PCI device state.\n"); + exit(1); + } + } +#endif /* BHYVE_SNAPSHOT */ error = vm_get_register(ctx, BSP, VM_REG_GUEST_RIP, &rip); assert(error == 0); @@ -1570,7 +1597,7 @@ if (init_checkpoint_thread(ctx) < 0) printf("Failed to start checkpoint thread!\r\n"); - if (restore_file != NULL) + if ((restore_file != NULL) || (receive_migration != NULL)) vm_restore_time(ctx); #endif @@ -1584,7 +1611,7 @@ * If we restore a VM, start all vCPUs now (including APs), otherwise, * let the guest OS to spin them up later via vmexits. */ - if (restore_file != NULL) { + if ((restore_file != NULL) || (receive_migration != NULL)) { for (vcpu = 0; vcpu < guest_ncpus; vcpu++) { if (vcpu == BSP) continue; diff --git a/usr.sbin/bhyve/migration.h b/usr.sbin/bhyve/migration.h new file mode 100644 --- /dev/null +++ b/usr.sbin/bhyve/migration.h @@ -0,0 +1,42 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * + * Copyright (c) 2017-2020 Elena Mihailescu + * Copyright (c) 2017-2020 Darius Mihai + * Copyright (c) 2017-2020 Mihai Carabas + * All rights reserved. + * The migration feature was developed under sponsorships + * from Matthew Grooms. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef _BHYVE_MIGRATION_ +#define _BHYVE_MIGRATION_ + +#include +#include +#include +#include "snapshot.h" + +int receive_vm_migration(struct vmctx *ctx, char *migration_data); +#endif diff --git a/usr.sbin/bhyve/migration.c b/usr.sbin/bhyve/migration.c new file mode 100644 --- /dev/null +++ b/usr.sbin/bhyve/migration.c @@ -0,0 +1,121 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * + * Copyright (c) 2017-2020 Elena Mihailescu + * Copyright (c) 2017-2020 Darius Mihai + * Copyright (c) 2017-2020 Mihai Carabas + * All rights reserved. + * The migration feature was developed under sponsorships + * from Matthew Grooms. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#ifndef WITHOUT_CAPSICUM +#include +#include +#endif + +#include +#include +#include + +#include +#ifndef WITHOUT_CAPSICUM +#include +#endif +#include + +#include +#include +#include + +#include +#include +#include +#include + +#include + +#include "migration.h" +#include "pci_emul.h" +#include "snapshot.h" + +#ifdef BHYVE_DEBUG +#define DPRINTF(FMT, ...) \ +({ \ + fprintf(stderr, "%s: " FMT "\r\n", __func__, ##__VA_ARGS__); \ + }) +#else +#define DPRINTF(FMT, ...) +#endif + +#define EPRINTF(FMT, ...) \ +({ \ + fprintf(stderr, "%s: " FMT "\r\n", __func__, ##__VA_ARGS__); \ + }) + +int +receive_vm_migration(struct vmctx *ctx, char *migration_data) +{ + struct migrate_req req; + size_t len; + char *hostname, *pos; + int rc; + + memset(req.host, 0, MAXHOSTNAMELEN); + hostname = strdup(migration_data); + + if ((pos = strchr(hostname, ',')) != NULL) { + *pos = '\0'; + pos = pos + 1; + + rc = sscanf(pos, "%d", &(req.port)); + + if (rc == 0) { + EPRINTF("Could not parse the port"); + free(hostname); + return -1; + } + } else { + /* If only one variable could be read, it should be the host */ + req.port = DEFAULT_MIGRATION_PORT; + } + + len = strlen(hostname); + if (len > MAXHOSTNAMELEN - 1) { + EPRINTF("Hostname length %lu bigger than maximum allowed %d", + len, MAXHOSTNAMELEN - 1); + free(hostname); + return -1; + } + + strlcpy(req.host, hostname, MAXHOSTNAMELEN); + + free(hostname); + EPRINTF("Migration not implemented yet"); + return (-1); +} diff --git a/usr.sbin/bhyve/snapshot.h b/usr.sbin/bhyve/snapshot.h --- a/usr.sbin/bhyve/snapshot.h +++ b/usr.sbin/bhyve/snapshot.h @@ -38,12 +38,15 @@ #ifndef _BHYVE_SNAPSHOT_ #define _BHYVE_SNAPSHOT_ +#include + #include #include #include #define BHYVE_RUN_DIR "/var/run/bhyve/" #define MAX_SNAPSHOT_FILENAME PATH_MAX +#define DEFAULT_MIGRATION_PORT 24983 struct vmctx; @@ -60,6 +63,11 @@ ucl_object_t *meta_root_obj; }; +struct __attribute__((packed)) migrate_req { + char host[MAXHOSTNAMELEN]; + unsigned int port; +}; + struct checkpoint_thread_info { struct vmctx *ctx; int socket_fd; diff --git a/usr.sbin/bhyve/snapshot.c b/usr.sbin/bhyve/snapshot.c --- a/usr.sbin/bhyve/snapshot.c +++ b/usr.sbin/bhyve/snapshot.c @@ -85,6 +85,7 @@ #include "ioapic.h" #include "mem.h" #include "mevent.h" +#include "migration.h" #include "mptbl.h" #include "pci_emul.h" #include "pci_irq.h" @@ -1444,24 +1445,57 @@ handle_message(struct vmctx *ctx, nvlist_t *nvl) { int err; + size_t len; const char *cmd; + struct migrate_req req; - if (!nvlist_exists_string(nvl, "cmd")) - return (-1); - + if (!nvlist_exists_string(nvl, "cmd")) { + err = -1; + goto done; + } + cmd = nvlist_get_string(nvl, "cmd"); if (strcmp(cmd, "checkpoint") == 0) { if (!nvlist_exists_string(nvl, "filename") || - !nvlist_exists_bool(nvl, "suspend")) + !nvlist_exists_bool(nvl, "suspend")) { err = -1; - else - err = vm_checkpoint(ctx, nvlist_get_string(nvl, "filename"), + goto done; + } + err = vm_checkpoint(ctx, nvlist_get_string(nvl, "filename"), nvlist_get_bool(nvl, "suspend")); + } else if (strcmp(cmd, "migrate") == 0) { + if (!nvlist_exists_string(nvl, "hostname") || + !nvlist_exists_number(nvl, "port")) { + err = -1; + goto done; + } + memset(&req, 0, sizeof(struct migrate_req)); + req.port = nvlist_get_number(nvl, "port"); + + len = strlen(nvlist_get_string(nvl, "hostname")); + if (len > MAXHOSTNAMELEN - 1) { + EPRINTLN("Hostname length %lu bigger than maximum allowed %d", + len, MAXHOSTNAMELEN - 1); + err = -1; + goto done; + } + + strlcpy(req.host, nvlist_get_string(nvl, "hostname"), MAXHOSTNAMELEN); + + fprintf(stderr, "%s: IP address used for migration: %s;\r\n" + "Port used for migration: %d\r\n", + __func__, + req.host, + req.port); + + EPRINTLN("Migration operation not implemented yet\n"); + err = -1; } else { EPRINTLN("Unrecognized checkpoint operation\n"); err = -1; } +done: if (err != 0) EPRINTLN("Unable to perform the requested operation\n"); diff --git a/usr.sbin/bhyvectl/bhyvectl.8 b/usr.sbin/bhyvectl/bhyvectl.8 --- a/usr.sbin/bhyvectl/bhyvectl.8 +++ b/usr.sbin/bhyvectl/bhyvectl.8 @@ -41,6 +41,7 @@ .Op Fl -force-poweroff .Op Fl -checkpoint= Ns Ar .Op Fl -suspend= Ns Ar +.Op Fl -migrate= Ns Ar host Ns Op Cm \&, Ns Ar port .Sh DESCRIPTION The .Nm @@ -85,6 +86,13 @@ .Fl -checkpoint . The virtual machine will terminate after the snapshot has been saved. +.It Fl -migrate= Ns Ar host Ns Op Cm \&, Ns Ar port +Warm migrate the virtual machine to a +.Ar host +on the specified +.Ar port . +The default migration port is 24983. +The virtual machine will be destroyed after the migration finishes. .El .Sh EXIT STATUS .Ex -std diff --git a/usr.sbin/bhyvectl/bhyvectl.c b/usr.sbin/bhyvectl/bhyvectl.c --- a/usr.sbin/bhyvectl/bhyvectl.c +++ b/usr.sbin/bhyvectl/bhyvectl.c @@ -90,6 +90,7 @@ #ifdef BHYVE_SNAPSHOT " [--checkpoint=]\n" " [--suspend=]\n" + " [--migrate=host[,port]]\n" #endif " [--get-all]\n" " [--get-stats]\n" @@ -303,6 +304,7 @@ #ifdef BHYVE_SNAPSHOT static int vm_checkpoint_opt; static int vm_suspend_opt; +static int vm_migrate; #endif /* @@ -594,6 +596,7 @@ #ifdef BHYVE_SNAPSHOT SET_CHECKPOINT_FILE, SET_SUSPEND_FILE, + MIGRATE_VM, #endif }; @@ -1466,6 +1469,7 @@ #ifdef BHYVE_SNAPSHOT { "checkpoint", REQ_ARG, 0, SET_CHECKPOINT_FILE}, { "suspend", REQ_ARG, 0, SET_SUSPEND_FILE}, + { "migrate", REQ_ARG, 0, MIGRATE_VM}, #endif }; @@ -1736,7 +1740,45 @@ return (send_message(ctx, nvl)); } -#endif + +static int +send_start_migrate(struct vmctx *ctx, const char *migrate_vm) +{ + nvlist_t *nvl; + char *hostname, *pos; + int rc; + unsigned int port; + + hostname = strdup(migrate_vm); + + if ((pos = strchr(hostname, ',')) != NULL ) { + *pos = '\0'; + pos = pos + 1; + + rc = sscanf(pos, "%u", &port); + + if (rc == 0) { + fprintf(stderr, "Could not parse the port\r\n"); + free(hostname); + return -1; + } + } else { + /* If only one variable could be read, it should be the host */ + port = DEFAULT_MIGRATION_PORT; + } + + nvl = nvlist_create(0); + nvlist_add_string(nvl, "cmd", "migrate"); + nvlist_add_string(nvl, "hostname", hostname); + nvlist_add_number(nvl, "port", port); + + free(hostname); + + return (send_message(ctx, nvl)); +} + +#endif /* BHYVE_SNAPSHOT */ + int main(int argc, char *argv[]) @@ -1755,7 +1797,7 @@ struct tm tm; struct option *opts; #ifdef BHYVE_SNAPSHOT - char *checkpoint_file, *suspend_file; + char *checkpoint_file, *suspend_file, *migrate_host; #endif cpu_intel = cpu_vendor_intel(); @@ -1924,6 +1966,10 @@ vm_suspend_opt = 1; suspend_file = optarg; break; + case MIGRATE_VM: + vm_migrate = 1; + migrate_host = optarg; + break; #endif default: usage(cpu_intel); @@ -2402,6 +2448,9 @@ if (!error && vm_suspend_opt) error = snapshot_request(ctx, suspend_file, true); + + if (!error && vm_migrate) + error = send_start_migrate(ctx, migrate_host); #endif free (opts);