Changeset View
Changeset View
Standalone View
Standalone View
usr.sbin/bhyve/migration.c
| Show All 24 Lines | |||||
| #endif | #endif | ||||
| #include <err.h> | #include <err.h> | ||||
| #include <netdb.h> | #include <netdb.h> | ||||
| #include <netinet/in.h> | #include <netinet/in.h> | ||||
| #include <stdio.h> | #include <stdio.h> | ||||
| #include <stdlib.h> | #include <stdlib.h> | ||||
| #include <string.h> | #include <string.h> | ||||
| #include <vmmapi.h> | #include <vmmapi.h> | ||||
| #ifdef BHYVE_DEBUG | |||||
| #include <time.h> | |||||
| #endif | |||||
| #include "migration.h" | #include "migration.h" | ||||
| #include "pci_emul.h" | #include "pci_emul.h" | ||||
| #include "snapshot.h" | #include "snapshot.h" | ||||
| #define MB (1024UL * 1024) | #define MB (1024UL * 1024) | ||||
| #define GB (1024UL * MB) | #define GB (1024UL * MB) | ||||
| ▲ Show 20 Lines • Show All 696 Lines • ▼ Show 20 Lines | |||||
| end: | end: | ||||
| if (buffer != NULL) | if (buffer != NULL) | ||||
| free(buffer); | free(buffer); | ||||
| return (error); | return (error); | ||||
| } | } | ||||
| #define MIGRATION_ROUNDS 4 | |||||
| static int | |||||
| migrate_segment_pages(int socket, char *baseaddr, char *page_list, | |||||
| size_t nr_pages, enum migration_transfer_req migration_req) | |||||
| { | |||||
| size_t start_dirty_page; | |||||
| int rc; | |||||
| for (size_t i = 0; i < nr_pages; i++) { | |||||
| if (page_list[i] == 0) | |||||
| continue; | |||||
| start_dirty_page = i; | |||||
| for (; i < nr_pages; i++) { | |||||
| if (page_list[i] == 0) | |||||
| break; | |||||
| } | |||||
| /* Transfer all continous dirty pages into the vm's memory */ | |||||
| rc = migration_transfer_data(socket, baseaddr + start_dirty_page * PAGE_SIZE, | |||||
| (i - start_dirty_page) * PAGE_SIZE, migration_req); | |||||
| if (rc != 0) | |||||
| return (rc); | |||||
| } | |||||
| return (0); | |||||
| } | |||||
| static int | |||||
| migrate_pages(struct vmctx *ctx, int socket, char *page_list, | |||||
| size_t page_list_size, enum migration_transfer_req migration_req) | |||||
| { | |||||
| size_t lowmem_limit_page, lowmem, highmem; | |||||
| int rc; | |||||
| char *baseaddr; | |||||
| if ((migration_req != MIGRATION_SEND_REQ) && (migration_req != MIGRATION_RECV_REQ)) { | |||||
| EPRINTF("wrong migration transfer req"); | |||||
| return (EINVAL); | |||||
| } | |||||
| /* | |||||
| * Transfer the state of the pages (dirty/not dirty) from the source | |||||
| * host to the destination host. The pages that are dirty will be | |||||
| * transferred in the next steps. | |||||
| */ | |||||
| rc = migration_transfer_data(socket, page_list, page_list_size, migration_req); | |||||
| if (rc != 0) { | |||||
| EPRINTF("Could not transfer page list"); | |||||
| return (rc); | |||||
| } | |||||
| vm_get_guestmem_from_ctx(ctx, &baseaddr, &lowmem, &highmem); | |||||
| /* Lowmem segment */ | |||||
| rc = migrate_segment_pages(socket, baseaddr, | |||||
| page_list, lowmem / PAGE_SIZE, migration_req); | |||||
| if (rc != 0) { | |||||
| EPRINTF("Could not migrate the lowmem segment pages"); | |||||
| return (rc); | |||||
| } | |||||
| /* Highmem segment */ | |||||
| if (highmem == 0) | |||||
| return (0); | |||||
| lowmem_limit_page = vm_get_lowmem_limit(ctx) / PAGE_SIZE; | |||||
| rc = migrate_segment_pages(socket, baseaddr + 4 * GB, | |||||
| page_list + lowmem_limit_page, highmem / PAGE_SIZE, migration_req); | |||||
| if (rc != 0) { | |||||
| EPRINTF("Could not migrate the highmem segment pages"); | |||||
| return (rc); | |||||
| } | |||||
| return (0); | |||||
| } | |||||
| static int | |||||
| live_migrate_send(struct vmctx *ctx, int socket) | |||||
| { | |||||
| int error, i, rc; | |||||
| uint8_t rounds; | |||||
| size_t migration_completed; | |||||
| size_t pages; | |||||
| char *page_list_indexes; | |||||
| #ifdef BHYVE_DEBUG | |||||
| struct timespec start, now; | |||||
| uint64_t time_diff_ms; | |||||
| #endif | |||||
| error = 0; | |||||
| page_list_indexes = NULL; | |||||
| rounds = MIGRATION_ROUNDS; | |||||
| /* Send the number of memory rounds to destination */ | |||||
| error = migration_transfer_data(socket, &rounds, sizeof(rounds), MIGRATION_SEND_REQ); | |||||
| if (error != 0) { | |||||
| EPRINTF("Could not transfer the number of rounds"); | |||||
| goto done; | |||||
| } | |||||
| pages = (vm_get_lowmem_size(ctx) + vm_get_highmem_size(ctx)) / PAGE_SIZE; | |||||
| /* alloc page_list_indexes */ | |||||
| page_list_indexes = mmap(NULL, pages * sizeof(char), PROT_READ | PROT_WRITE, MAP_ANON, -1, 0); | |||||
| if (page_list_indexes == MAP_FAILED) { | |||||
| perror("Page list indexes could not be allocated"); | |||||
| error = errno; | |||||
| goto done; | |||||
| } | |||||
| if (mlock(page_list_indexes, pages * sizeof(char)) == -1) { | |||||
| perror("Page list indexes could not be locked"); | |||||
| error = errno; | |||||
| goto done; | |||||
| } | |||||
| for (i = 0; i <= MIGRATION_ROUNDS; i++) { | |||||
| if (i == MIGRATION_ROUNDS) { | |||||
| /* Last round */ | |||||
| vm_vcpu_pause(ctx); | |||||
| rc = vm_pause_user_devs(); | |||||
| if (rc != 0) { | |||||
| EPRINTF("Could not pause devices"); | |||||
| error = rc; | |||||
| goto unlock_vm_and_exit; | |||||
| } | |||||
| } | |||||
| DPRINTF("Live migration round %d - Start", i); | |||||
| #ifdef BHYVE_DEBUG | |||||
| clock_gettime(CLOCK_MONOTONIC, &start); | |||||
| #endif | |||||
| memset(page_list_indexes, 0, pages); | |||||
| /* Search the dirty pages and populate page_list_index */ | |||||
| rc = vm_get_dirty_page_list(ctx, page_list_indexes, pages, i == 0); | |||||
| if (rc != 0) { | |||||
| EPRINTF("Couldn't search for the dirty pages"); | |||||
| error = errno; | |||||
| if (i == MIGRATION_ROUNDS) | |||||
| goto unlock_vm_and_exit; | |||||
| else | |||||
| goto done; | |||||
| } | |||||
| DPRINTF("Live migration round %d: Finished searching the dirty pages", i); | |||||
| error = migrate_pages(ctx, socket, page_list_indexes, | |||||
| pages, MIGRATION_SEND_REQ); | |||||
| if (error != 0) { | |||||
| EPRINTF("Couldn't send dirty pages to dest"); | |||||
| if (i == MIGRATION_ROUNDS) | |||||
| goto unlock_vm_and_exit; | |||||
| else | |||||
| goto done; | |||||
| } | |||||
| #ifdef BHYVE_DEBUG | |||||
| clock_gettime(CLOCK_MONOTONIC, &now); | |||||
| time_diff_ms = (now.tv_sec - start.tv_sec) * 1000 + | |||||
| (now.tv_nsec - start.tv_nsec) / 1000000; | |||||
| DPRINTF("Live migration round %d - Done - %lu ms", i, time_diff_ms); | |||||
| #endif | |||||
| } | |||||
| /* Send kern data */ | |||||
| error = migrate_kern_data(ctx, socket, MIGRATION_SEND_REQ); | |||||
| if (error != 0) { | |||||
| EPRINTF("Could not send kern data to destination"); | |||||
| goto unlock_vm_and_exit; | |||||
| } | |||||
| /* Send PCI data */ | |||||
| error = migrate_devs(socket, MIGRATION_SEND_REQ); | |||||
| if (error != 0) { | |||||
| EPRINTF("Could not send pci devs to destination"); | |||||
| goto unlock_vm_and_exit; | |||||
| } | |||||
| /* Wait for migration completed */ | |||||
| error = migration_transfer_data(socket, &migration_completed, | |||||
| sizeof(migration_completed), MIGRATION_RECV_REQ); | |||||
| if ((error != 0) || (migration_completed != MIGRATION_SPECS_OK)) { | |||||
| EPRINTF("Could not recv migration completed remote or received error"); | |||||
| if (error == 0) | |||||
| error = EINVAL; | |||||
| goto unlock_vm_and_exit; | |||||
| } | |||||
| #ifdef BHYVE_DEBUG | |||||
| clock_gettime(CLOCK_MONOTONIC, &now); | |||||
| time_diff_ms = (now.tv_sec - start.tv_sec) * 1000 + (now.tv_nsec - start.tv_nsec) / 1000000; | |||||
| DPRINTF("Live migration downtime - %lu ms", time_diff_ms); | |||||
| #endif | |||||
| vm_destroy(ctx); | |||||
| exit(0); | |||||
| unlock_vm_and_exit: | |||||
| rc = vm_resume_user_devs(); | |||||
| if (rc != 0) | |||||
| EPRINTF("Could not resume devices"); | |||||
| vm_vcpu_resume(ctx); | |||||
| done: | |||||
| if (page_list_indexes != MAP_FAILED) { | |||||
| munmap(page_list_indexes, pages * sizeof(char)); | |||||
| } | |||||
| return (error); | |||||
| } | |||||
| static int | |||||
| live_migrate_recv(struct vmctx *ctx, int socket) | |||||
| { | |||||
| int error, index; | |||||
| uint8_t rounds; | |||||
| size_t lowmem_size, highmem_size, pages; | |||||
| char *baseaddr, *page_list_indexes; | |||||
| page_list_indexes = NULL; | |||||
| error = migration_transfer_data(socket, &rounds, sizeof(rounds), MIGRATION_RECV_REQ); | |||||
| if (error != 0) { | |||||
| EPRINTF("Could not recv the number of rounds from remote"); | |||||
| goto done; | |||||
| } | |||||
| /* Compute memory_size and pages*/ | |||||
| vm_get_guestmem_from_ctx(ctx, &baseaddr, &lowmem_size, &highmem_size); | |||||
| pages = (lowmem_size + highmem_size) / PAGE_SIZE; | |||||
| madvise(baseaddr, lowmem_size, MADV_WILLNEED); | |||||
| if (highmem_size != 0) | |||||
| madvise(baseaddr + 4 * GB, highmem_size, MADV_WILLNEED); | |||||
| /* alloc page_list_indexes */ | |||||
| page_list_indexes = malloc(pages * sizeof(char)); | |||||
| if (page_list_indexes == NULL) { | |||||
| perror("Page list indexes could not be allocated"); | |||||
| error = ENOMEM; | |||||
| goto done; | |||||
| } | |||||
| /* The following iteration contains the preliminary round in which the | |||||
| * entire memory is migrated to the destination. Then, for | |||||
| * MIGRATION_ROUNDS - 1 rounds, only the dirtied pages will be migrated. | |||||
| * In the final round, the rest of the pages are migrated. | |||||
| * Since the vcpus are not started, we don't need to lock them, so we | |||||
| * can do the memory migration pretty straight-forward. | |||||
| */ | |||||
| DPRINTF("Live migration start"); | |||||
| for (index = 0; index <= rounds; index ++) { | |||||
| DPRINTF("Live migration round %d: Start", index); | |||||
| error = migrate_pages(ctx, socket, page_list_indexes, pages, MIGRATION_RECV_REQ); | |||||
| if (error != 0) { | |||||
| EPRINTF("Couldn't recv dirty pages from source"); | |||||
| goto done; | |||||
| } | |||||
| } | |||||
| DPRINTF("Live migration done"); | |||||
| madvise(baseaddr, lowmem_size, MADV_NORMAL); | |||||
| if (highmem_size!= 0) | |||||
| madvise(baseaddr + 4 * GB, highmem_size, MADV_NORMAL); | |||||
| error = 0; | |||||
| done: | |||||
| if (page_list_indexes != NULL) { | |||||
| free(page_list_indexes); | |||||
| } | |||||
| return (error); | |||||
| } | |||||
| static inline int | static inline int | ||||
| migrate_connections(struct migrate_req req, int *socket_fd, | migrate_connections(struct migrate_req req, int *socket_fd, | ||||
| enum migration_transfer_req type) | enum migration_transfer_req type) | ||||
| { | { | ||||
| int error; | int error; | ||||
| int s, con_socket; | int s, con_socket; | ||||
| struct sockaddr_in sa, client_sa; | struct sockaddr_in sa, client_sa; | ||||
| struct in_addr req_addr; | struct in_addr req_addr; | ||||
| ▲ Show 20 Lines • Show All 116 Lines • ▼ Show 20 Lines | #endif | ||||
| rc = migration_transfer_data(s, &is_live, sizeof(is_live), MIGRATION_SEND_REQ); | rc = migration_transfer_data(s, &is_live, sizeof(is_live), MIGRATION_SEND_REQ); | ||||
| if (rc != 0) { | if (rc != 0) { | ||||
| EPRINTF("Could not send migration type"); | EPRINTF("Could not send migration type"); | ||||
| error = rc; | error = rc; | ||||
| goto done; | goto done; | ||||
| } | } | ||||
| if (is_live) { | if (is_live) { | ||||
| EPRINTF("Live migration not implemented"); | rc = live_migrate_send(ctx, s); | ||||
| rc = EOPNOTSUPP; | |||||
| if (rc != 0) | if (rc != 0) | ||||
| EPRINTF("Could not live migrate the guest's memory"); | EPRINTF("Could not live migrate the guest's memory"); | ||||
| error = rc; | error = rc; | ||||
| goto done; | goto done; | ||||
| } /* else continue the warm migration procedure */ | } /* else continue the warm migration procedure */ | ||||
| vm_vcpu_pause(ctx); | vm_vcpu_pause(ctx); | ||||
| ▲ Show 20 Lines • Show All 72 Lines • ▼ Show 20 Lines | if (rc != 0) { | ||||
| EPRINTF("Could not recv migration type"); | EPRINTF("Could not recv migration type"); | ||||
| goto done; | goto done; | ||||
| } | } | ||||
| /* For recv, the only difference between warm and live migration is the | /* For recv, the only difference between warm and live migration is the | ||||
| * way in which the memory is migrated. | * way in which the memory is migrated. | ||||
| */ | */ | ||||
| if (is_live) { | if (is_live) { | ||||
| EPRINTF("Live migration not implemented"); | rc = live_migrate_recv(ctx, s); | ||||
| rc = EOPNOTSUPP; | |||||
| if (rc != 0) { | if (rc != 0) { | ||||
| EPRINTF("Could not live migrate the guest's memory"); | EPRINTF("Could not live migrate the guest's memory"); | ||||
| goto done; | goto done; | ||||
| } | } | ||||
| } else { | } else { | ||||
| /* if not live migration, then migrate memory normally. */ | /* if not live migration, then migrate memory normally. */ | ||||
| rc = migrate_recv_memory(ctx, s); | rc = migrate_recv_memory(ctx, s); | ||||
| if (rc != 0) { | if (rc != 0) { | ||||
| ▲ Show 20 Lines • Show All 95 Lines • Show Last 20 Lines | |||||