diff --git a/usr.sbin/bhyve/Makefile b/usr.sbin/bhyve/Makefile --- a/usr.sbin/bhyve/Makefile +++ b/usr.sbin/bhyve/Makefile @@ -84,6 +84,7 @@ .if ${MK_BHYVE_SNAPSHOT} != "no" SRCS+= snapshot.c +SRCS+= migration.c .endif CFLAGS.kernemu_dev.c+= -I${SRCTOP}/sys/amd64 diff --git a/usr.sbin/bhyve/bhyve.8 b/usr.sbin/bhyve/bhyve.8 --- a/usr.sbin/bhyve/bhyve.8 +++ b/usr.sbin/bhyve/bhyve.8 @@ -71,6 +71,11 @@ .Op Fl o Ar var Ns Cm = Ns Ar value .Op Fl p Ar vcpu Ns Cm \&: Ns Ar hostcpu .Op Fl r Ar file +.Oo Fl R +.Sm off +.Ar host Op Cm \&: Ar port +.Sm on +.Oc .Sm off .Oo Fl s\~ .Ar slot Cm \&, Ar emulation Op Cm \&, Ar conf @@ -257,6 +262,13 @@ .Fl l options. The count of vCPUs and memory configuration are read from the snapshot. +.It Fl R Ar host Ns Op Cm \&: Ns Ar port +Receive migration from a source guest. +Await for a connection from +.Ar host +on the specified +.Ar port +and resume execution. The default migration port is 24983. .It Fl S Wire guest memory. .It Fl s Cm help diff --git a/usr.sbin/bhyve/bhyverun.c b/usr.sbin/bhyve/bhyverun.c --- a/usr.sbin/bhyve/bhyverun.c +++ b/usr.sbin/bhyve/bhyverun.c @@ -98,6 +98,9 @@ #include "kernemu_dev.h" #include "mem.h" #include "mevent.h" +#ifdef BHYVE_SNAPSHOT +#include "migration.h" +#endif #include "mptbl.h" #include "pci_emul.h" #include "pci_irq.h" @@ -244,6 +247,7 @@ " -p: pin 'vcpu' to 'hostcpu'\n" #ifdef BHYVE_SNAPSHOT " -r: path to checkpoint file\n" + " -R: the source vm host and port for migration\n" #endif " -S: guest memory cannot be swapped\n" " -s: PCI slot config\n" @@ -1103,7 +1107,11 @@ exit(4); } } else { +#ifndef BHYVE_SNAPSHOT if (!romboot) { +#else + if (!romboot && !get_config_bool_default("is_migrating", false)) { +#endif /* * If the virtual machine was just created then a * bootrom must be configured to boot it. @@ -1249,9 +1257,11 @@ const char *optstr, *value, *vmname; #ifdef BHYVE_SNAPSHOT char *restore_file; + char *migration_host; struct restore_state rstate; restore_file = NULL; + migration_host = NULL; #endif init_config(); @@ -1259,7 +1269,7 @@ progname = basename(argv[0]); #ifdef BHYVE_SNAPSHOT - optstr = "aehuwxACDHIPSWYk:o:p:G:c:s:m:l:K:U:r:"; + optstr = "aehuwxACDHIPSWYk:o:p:G:c:s:m:l:K:U:r:R:"; #else optstr = "aehuwxACDHIPSWYk:o:p:G:c:s:m:l:K:U:"; #endif @@ -1311,6 +1321,10 @@ case 'r': restore_file = optarg; break; + case 'R': + migration_host = optarg; + set_config_bool("is_migrating", true); + break; #endif case 's': if (strncmp(optarg, "help", strlen(optarg)) == 0) { @@ -1527,38 +1541,48 @@ spinup_vcpu(&vcpu_info[vcpuid], vcpuid == BSP); #ifdef BHYVE_SNAPSHOT - if (restore_file != NULL) { - fprintf(stdout, "Pausing pci devs...\r\n"); + if (restore_file != NULL || migration_host != NULL) { + fprintf(stdout, "Pausing pci devs...\n"); if (vm_pause_user_devs() != 0) { fprintf(stderr, "Failed to pause PCI device state.\n"); exit(1); } - fprintf(stdout, "Restoring vm mem...\r\n"); - if (restore_vm_mem(ctx, &rstate) != 0) { - fprintf(stderr, "Failed to restore VM memory.\n"); - exit(1); - } + if (restore_file != NULL) { + fprintf(stdout, "Restoring vm mem...\n"); + if (restore_vm_mem(ctx, &rstate) != 0) { + fprintf(stderr, "Failed to restore VM memory.\n"); + exit(1); + } - fprintf(stdout, "Restoring pci devs...\r\n"); - if (vm_restore_user_devs(&rstate) != 0) { - fprintf(stderr, "Failed to restore PCI device state.\n"); - exit(1); + fprintf(stdout, "Restoring pci devs...\n"); + if (vm_restore_user_devs(&rstate) != 0) { + fprintf(stderr, "Failed to restore PCI device state.\n"); + exit(1); + } + + fprintf(stdout, "Restoring kernel structs...\n"); + if (vm_restore_kern_structs(ctx, &rstate) != 0) { + fprintf(stderr, "Failed to restore kernel structs.\n"); + exit(1); + } } - fprintf(stdout, "Restoring kernel structs...\r\n"); - if (vm_restore_kern_structs(ctx, &rstate) != 0) { - fprintf(stderr, "Failed to restore kernel structs.\n"); - exit(1); + if (migration_host != NULL) { + fprintf(stdout, "Starting the migration process...\n"); + if (receive_vm_migration(ctx, migration_host) != 0) { + fprintf(stderr, "Failed to migrate the vm.\n"); + exit(1); + } } - fprintf(stdout, "Resuming pci devs...\r\n"); + fprintf(stdout, "Resuming pci devs...\n"); if (vm_resume_user_devs() != 0) { fprintf(stderr, "Failed to resume PCI device state.\n"); exit(1); } } -#endif +#endif /* BHYVE_SNAPSHOT */ error = vm_get_register(bsp, VM_REG_GUEST_RIP, &rip); assert(error == 0); @@ -1626,8 +1650,9 @@ #endif #ifdef BHYVE_SNAPSHOT - if (restore_file != NULL) { + if (restore_file != NULL) destroy_restore_state(&rstate); + if (restore_file != NULL || migration_host != NULL) { if (vm_restore_time(ctx) < 0) err(EX_OSERR, "Unable to restore time"); diff --git a/usr.sbin/bhyve/migration.h b/usr.sbin/bhyve/migration.h new file mode 100644 --- /dev/null +++ b/usr.sbin/bhyve/migration.h @@ -0,0 +1,27 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2017-2020 Elena Mihailescu + * Copyright (c) 2017-2020 Darius Mihai + * Copyright (c) 2017-2020 Mihai Carabas + * + * The migration feature was developed under sponsorships + * from Matthew Grooms. + * + */ + +#pragma once + +#include +#include + +#define DEFAULT_MIGRATION_PORT 24983 + +struct vmctx; + +struct __attribute__((packed)) migrate_req { + char host[MAXHOSTNAMELEN]; + unsigned int port; +}; + +int receive_vm_migration(struct vmctx *ctx, char *migration_data); \ No newline at end of file diff --git a/usr.sbin/bhyve/migration.c b/usr.sbin/bhyve/migration.c new file mode 100644 --- /dev/null +++ b/usr.sbin/bhyve/migration.c @@ -0,0 +1,98 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2017-2020 Elena Mihailescu + * Copyright (c) 2017-2020 Darius Mihai + * Copyright (c) 2017-2020 Mihai Carabas + * + * The migration feature was developed under sponsorships + * from Matthew Grooms. + * + */ + +#include +#include +#include +#include +#include + +#include +#ifndef WITHOUT_CAPSICUM +#include +#include +#include +#include +#endif +#include +#include +#include +#include +#include +#include +#include + +#include "migration.h" +#include "pci_emul.h" +#include "snapshot.h" + + +#ifdef BHYVE_DEBUG +#define DPRINTF(FMT, ...) \ +({ \ + fprintf(stderr, "%s: " FMT "\n", __func__, ##__VA_ARGS__); \ + }) +#else +#define DPRINTF(FMT, ...) +#endif + +#define EPRINTF(FMT, ...) \ +({ \ + fprintf(stderr, "%s: " FMT "\n", __func__, ##__VA_ARGS__); \ + }) + +int +receive_vm_migration(struct vmctx *ctx, char *migration_data) +{ + struct migrate_req req; + size_t len; + char *hostname, *pos; + unsigned int port = DEFAULT_MIGRATION_PORT; + int rc; + + assert(ctx != NULL); + assert(migration_data != NULL); + + memset(req.host, 0, MAXHOSTNAMELEN); + hostname = strdup(migration_data); + + if ((pos = strchr(hostname, ':')) != NULL) { + *pos = '\0'; + pos = pos + 1; + + rc = sscanf(pos, "%u", &port); + + if (rc <= 0) { + EPRINTF("Could not parse the port"); + free(hostname); + return (EINVAL); + } + } + req.port = port; + + len = strlen(hostname); + if (len > MAXHOSTNAMELEN - 1) { + EPRINTF("Hostname length %lu bigger than maximum allowed %d", + len, MAXHOSTNAMELEN - 1); + free(hostname); + return (EINVAL); + } + + strlcpy(req.host, hostname, MAXHOSTNAMELEN); + + // rc = vm_recv_migrate_req(ctx, req); + rc = EOPNOTSUPP; + EPRINTF("Migration not implemented yet"); + + free(hostname); + return (rc); +} diff --git a/usr.sbin/bhyve/snapshot.c b/usr.sbin/bhyve/snapshot.c --- a/usr.sbin/bhyve/snapshot.c +++ b/usr.sbin/bhyve/snapshot.c @@ -86,6 +86,7 @@ #include "ioapic.h" #include "mem.h" #include "mevent.h" +#include "migration.h" #include "mptbl.h" #include "pci_emul.h" #include "pci_irq.h" @@ -1491,6 +1492,40 @@ } IPC_COMMAND(ipc_cmd_set, checkpoint, vm_do_checkpoint); +static int +vm_do_migrate(struct vmctx __unused *ctx, const nvlist_t *nvl) +{ + size_t len; + struct migrate_req req; + + if (!nvlist_exists_string(nvl, "hostname") || + !nvlist_exists_number(nvl, "port")) + return (EINVAL); + + memset(&req, 0, sizeof(struct migrate_req)); + req.port = nvlist_get_number(nvl, "port"); + + len = strlen(nvlist_get_string(nvl, "hostname")); + if (len > MAXHOSTNAMELEN - 1) { + EPRINTLN("Hostname length %lu bigger than maximum allowed %d", + len, MAXHOSTNAMELEN - 1); + return (EINVAL); + } + + strlcpy(req.host, nvlist_get_string(nvl, "hostname"), MAXHOSTNAMELEN); + + printf("%s: IP address used for migration: %s;\n" + "Port used for migration: %d\n", + __func__, + req.host, + req.port); + + // return (vm_send_migrate_req(ctx, req, nvlist_get_bool(nvl, "live"))); + EPRINTLN("Migration operation not implemented yet\n"); + return (EOPNOTSUPP); +} +IPC_COMMAND(ipc_cmd_set, migrate, vm_do_migrate); + void init_snapshot(void) { diff --git a/usr.sbin/bhyvectl/bhyvectl.8 b/usr.sbin/bhyvectl/bhyvectl.8 --- a/usr.sbin/bhyvectl/bhyvectl.8 +++ b/usr.sbin/bhyvectl/bhyvectl.8 @@ -41,6 +41,11 @@ .Op Fl -force-poweroff .Op Fl -checkpoint= Ns Ar .Op Fl -suspend= Ns Ar +.Oo +.Fl -migrate= Ns Ar host Ns Op Cm \&: Ns Ar port +| +.Fl -migrate-live= Ns Ar host Ns Op Cm \&: Ns Ar port +.Oc .Sh DESCRIPTION The .Nm @@ -85,6 +90,20 @@ .Fl -checkpoint . The virtual machine will terminate after the snapshot has been saved. +.It Fl -migrate= Ns Ar host Ns Op Cm \&: Ns Ar port +Warm migrate the virtual machine to a +.Ar host +on the specified +.Ar port . +The default migration port is 24983. +The virtual machine will be destroyed after the migration finishes. +.It Fl -migrate-live= Ns Ar host Ns Op Cm \&: Ns Ar port +Live migrate the virtual machine to a +.Ar host +on the specified +.Ar port . +The default migration port is 24983. +The virtual machine will be destroyed after the migration finishes. .El .Sh EXIT STATUS .Ex -std diff --git a/usr.sbin/bhyvectl/bhyvectl.c b/usr.sbin/bhyvectl/bhyvectl.c --- a/usr.sbin/bhyvectl/bhyvectl.c +++ b/usr.sbin/bhyvectl/bhyvectl.c @@ -65,6 +65,7 @@ #ifdef BHYVE_SNAPSHOT #include "snapshot.h" +#include "migration.h" #endif #define MB (1UL << 20) @@ -87,6 +88,7 @@ " [--destroy]\n" #ifdef BHYVE_SNAPSHOT " [--checkpoint= | --suspend=]\n" + " [--migrate=[:] | --migrate-live=[:]]\n" #endif " [--get-all]\n" " [--get-stats]\n" @@ -299,6 +301,7 @@ static int get_cpu_topology; #ifdef BHYVE_SNAPSHOT static int vm_suspend_opt; +static int vm_migrate_live; #endif /* @@ -589,6 +592,8 @@ #ifdef BHYVE_SNAPSHOT SET_CHECKPOINT_FILE, SET_SUSPEND_FILE, + MIGRATE_VM, + MIGRATE_VM_LIVE, #endif }; @@ -1456,6 +1461,8 @@ #ifdef BHYVE_SNAPSHOT { "checkpoint", REQ_ARG, 0, SET_CHECKPOINT_FILE}, { "suspend", REQ_ARG, 0, SET_SUSPEND_FILE}, + { "migrate", REQ_ARG, 0, MIGRATE_VM}, + { "migrate-live", REQ_ARG, 0, MIGRATE_VM_LIVE}, #endif }; @@ -1743,7 +1750,45 @@ return (send_message(vmname, nvl)); } -#endif + +static int +migration_request(const char *vmname, const char *migrate_vm, bool live) +{ + nvlist_t *nvl; + char *hostname, *pos; + int rc; + unsigned int port; + + hostname = strdup(migrate_vm); + + if ((pos = strchr(hostname, ':')) != NULL) { + *pos = '\0'; + pos = pos + 1; + + rc = sscanf(pos, "%u", &port); + + if (rc <= 0) { + fprintf(stderr, "Could not parse the port\n"); + free(hostname); + return (EINVAL); + } + } else { + /* If only one variable could be read, it should be the host */ + port = DEFAULT_MIGRATION_PORT; + } + + nvl = nvlist_create(0); + nvlist_add_string(nvl, "cmd", "migrate"); + nvlist_add_string(nvl, "hostname", hostname); + nvlist_add_number(nvl, "port", port); + nvlist_add_bool(nvl, "live", live); + + free(hostname); + + return (send_message(vmname, nvl)); +} + +#endif /* BHYVE_SNAPSHOT */ int main(int argc, char *argv[]) @@ -1763,7 +1808,7 @@ struct tm tm; struct option *opts; #ifdef BHYVE_SNAPSHOT - char *checkpoint_file = NULL; + char *checkpoint_file = NULL, *migrate_host = NULL; #endif cpu_intel = cpu_vendor_intel(); @@ -1932,6 +1977,14 @@ checkpoint_file = optarg; vm_suspend_opt = (ch == SET_SUSPEND_FILE); break; + case MIGRATE_VM: + case MIGRATE_VM_LIVE: + if (migrate_host != NULL) + usage(cpu_intel); + + migrate_host = optarg; + vm_migrate_live = (ch == MIGRATE_VM_LIVE); + break; #endif default: usage(cpu_intel); @@ -2408,6 +2461,9 @@ #ifdef BHYVE_SNAPSHOT if (!error && checkpoint_file) error = snapshot_request(vmname, checkpoint_file, vm_suspend_opt); + + if (!error && migrate_host) + error = migration_request(vmname, migrate_host, vm_migrate_live); #endif free (opts);