Index: sys/amd64/conf/GENERIC =================================================================== --- sys/amd64/conf/GENERIC +++ sys/amd64/conf/GENERIC @@ -398,3 +398,6 @@ options HID_DEBUG # enable debug msgs device hid # Generic HID support options IICHID_SAMPLING # Workaround missing GPIO INTR support + +# bhyve +options BHYVE_SNAPSHOT Index: sys/amd64/include/vmm_snapshot.h =================================================================== --- sys/amd64/include/vmm_snapshot.h +++ sys/amd64/include/vmm_snapshot.h @@ -40,6 +40,7 @@ #include #include + #ifndef _KERNEL #include #endif @@ -84,6 +85,365 @@ */ }; +#ifndef JSON_SNAPSHOT_V2 +#define JSON_SNAPSHOT_V2 + +#define JSON_V1 1 +#define JSON_V2 2 + +#include +#include + +#define IDENT_LEVEL 10 + +#define JSON_V1 1 +#define JSON_V2 2 + +#include +#include + +#define IDENT_LEVEL 10 + +/* ####################### kernel structs copies ######################### */ + +#define VM_MAXCPU 16 /* maximum virtual cpus */ + +/* vhpet */ +#define VHPET_NUM_TIMERS 8 + +struct timer_userspace { + uint64_t cap_config; /* Configuration */ + uint64_t msireg; /* FSB interrupt routing */ + uint32_t compval; /* Comparator */ + uint32_t comprate; + sbintime_t callout_sbt; /* time when counter==compval */ +}; + +struct vhpet_userspace { + sbintime_t freq_sbt; + + uint64_t config; /* Configuration */ + uint64_t isr; /* Interrupt Status */ + uint32_t countbase; /* HPET counter base value */ + sbintime_t countbase_sbt; /* uptime corresponding to base value */ + + struct timer_userspace timer[VHPET_NUM_TIMERS]; +}; + +/* vioapic */ + +#define REDIR_ENTRIES 32 + +struct rtbl_userspace { + uint64_t reg; + int acnt; /* sum of pin asserts (+1) and deasserts (-1) */ +}; + +struct vioapic_userspace { + uint32_t id; + uint32_t ioregsel; + struct rtbl_userspace rtbl[REDIR_ENTRIES]; +}; + +/* vm (vcpus) */ +/* + * Initialization: + * (a) allocated when vcpu is created + * (i) initialized when vcpu is created and when it is reinitialized + * (o) initialized the first time the vcpu is created + * (x) initialized before use + */ +struct vcpu_userspace { + enum x2apic_state x2apic_state; /* (i) APIC mode */ + uint64_t exitintinfo; /* (i) events pending at VM exit */ + int exc_vector; /* (x) exception collateral */ + int exc_errcode_valid; + uint32_t exc_errcode; + uint64_t guest_xcr0; /* (i) guest %xcr0 register */ + struct vm_exit exitinfo; /* (x) exit reason and collateral */ + uint64_t nextrip; /* (x) next instruction to execute */ + uint64_t tsc_offset; /* (o) TSC offsetting */ +}; + +/* + * Initialization: + * (o) initialized the first time the VM is created + * (i) initialized when VM is created and when it is reinitialized + * (x) initialized before use + */ +struct vm_userspace { + struct vcpu_userspace vcpu[VM_MAXCPU]; /* (i) guest vcpus */ +}; + +/* vlapic */ +#define APIC_LVT_CMCI 6 +#define APIC_LVT_MAX APIC_LVT_CMCI + +enum boot_state_userspace { + BS_INIT_USERSPACE, + BS_SIPI_USERSPACE, + BS_RUNNING_USERSPACE +}; + +/* + * 16 priority levels with at most one vector injected per level. + */ +#define ISRVEC_STK_SIZE (16 + 1) + +#define VLAPIC_MAXLVT_INDEX APIC_LVT_CMCI + +struct vlapic_userspace { + struct vm_userspace *vm; + int vcpuid; + struct LAPIC *apic_page; + + uint32_t esr_pending; + + struct bintime timer_fire_bt; /* callout expiry time */ + struct bintime timer_freq_bt; /* timer frequency */ + struct bintime timer_period_bt; /* timer period */ + + /* + * The 'isrvec_stk' is a stack of vectors injected by the local apic. + * A vector is popped from the stack when the processor does an EOI. + * The vector on the top of the stack is used to compute the + * Processor Priority in conjunction with the TPR. + */ + uint8_t isrvec_stk[ISRVEC_STK_SIZE]; + int isrvec_stk_top; + + uint64_t msr_apicbase; + enum boot_state_userspace boot_state; + + /* + * Copies of some registers in the virtual APIC page. We do this for + * a couple of different reasons: + * - to be able to detect what changed (e.g. svr_last) + * - to maintain a coherent snapshot of the register (e.g. lvt_last) + */ + uint32_t svr_last; + uint32_t lvt_last[VLAPIC_MAXLVT_INDEX + 1]; +}; + +/* vatpic */ +struct atpic_userspace { + bool ready; + int icw_num; + int rd_cmd_reg; + + bool aeoi; + bool poll; + bool rotate; + bool sfn; /* special fully-nested mode */ + + int irq_base; + uint8_t request; /* Interrupt Request Register (IIR) */ + uint8_t service; /* Interrupt Service (ISR) */ + uint8_t mask; /* Interrupt Mask Register (IMR) */ + uint8_t smm; /* special mask mode */ + + int acnt[8]; /* sum of pin asserts and deasserts */ + int lowprio; /* lowest priority irq */ + + bool intr_raised; +}; + +struct vatpic_userspace { + struct atpic_userspace atpic[2]; + uint8_t elc[2]; +}; + +/* vatpit */ +struct vatpit_userspace; + +struct vatpit_callout_arg_userspace { + struct vatpit_userspace *vatpit; + int channel_num; +}; + +struct channel_userspace { + int mode; + uint16_t initial; /* initial counter value */ + struct bintime now_bt; /* uptime when counter was loaded */ + uint8_t cr[2]; + uint8_t ol[2]; + bool slatched; /* status latched */ + uint8_t status; + int crbyte; + int olbyte; + int frbyte; + struct bintime callout_bt; /* target time */ + struct vatpit_callout_arg_userspace callout_arg; +}; + +struct vatpit_userspace { + struct bintime freq_bt; + struct channel_userspace channel[3]; +}; + +/* vmptmr */ +struct vpmtmr_userspace { + sbintime_t freq_sbt; + sbintime_t baseuptime; + uint32_t baseval; +}; + +/* vrtc */ +/* Register layout of the RTC */ +struct rtcdev_userspace { + uint8_t sec; + uint8_t alarm_sec; + uint8_t min; + uint8_t alarm_min; + uint8_t hour; + uint8_t alarm_hour; + uint8_t day_of_week; + uint8_t day_of_month; + uint8_t month; + uint8_t year; + uint8_t reg_a; + uint8_t reg_b; + uint8_t reg_c; + uint8_t reg_d; + uint8_t nvram[36]; + uint8_t century; + uint8_t nvram2[128 - 51]; +} __packed; + +struct vrtc_userspace { + u_int addr; /* RTC register to read or write */ + sbintime_t base_uptime; + time_t base_rtctime; + struct rtcdev_userspace rtcdev; +}; + +/* vmx */ +#define VMCS_GUEST_IA32_SYSENTER_CS 0x0000482A +#define VMCS_GUEST_IA32_SYSENTER_ESP 0x00006824 +#define VMCS_GUEST_IA32_SYSENTER_EIP 0x00006826 +#define VMCS_GUEST_INTERRUPTIBILITY 0x00004824 +#define VMCS_GUEST_ACTIVITY 0x00004826 +#define VMCS_ENTRY_CTLS 0x00004012 +#define VMCS_EXIT_CTLS 0x0000400C + +struct vmxctx_userspace { + register_t guest_rdi; /* Guest state */ + register_t guest_rsi; + register_t guest_rdx; + register_t guest_rcx; + register_t guest_r8; + register_t guest_r9; + register_t guest_rax; + register_t guest_rbx; + register_t guest_rbp; + register_t guest_r10; + register_t guest_r11; + register_t guest_r12; + register_t guest_r13; + register_t guest_r14; + register_t guest_r15; + register_t guest_cr2; + register_t guest_dr0; + register_t guest_dr1; + register_t guest_dr2; + register_t guest_dr3; + register_t guest_dr6; + + register_t host_r15; /* Host state */ + register_t host_r14; + register_t host_r13; + register_t host_r12; + register_t host_rbp; + register_t host_rsp; + register_t host_rbx; + register_t host_dr0; + register_t host_dr1; + register_t host_dr2; + register_t host_dr3; + register_t host_dr6; + register_t host_dr7; + uint64_t host_debugctl; + int host_tf; + + int inst_fail_status; +}; + +struct vmxcap_userspace { + int set; + uint32_t proc_ctls; + uint32_t proc_ctls2; + uint32_t exc_bitmap; +}; + +struct vmxstate_userspace { + uint64_t nextrip; /* next instruction to be executed by guest */ + int lastcpu; /* host cpu that this 'vcpu' last ran on */ + uint16_t vpid; +}; + +struct apic_page_userspace { + uint32_t reg[PAGE_SIZE / 4]; +}; + +/* Index into the 'guest_msrs[]' array */ +enum { + IDX_MSR_LSTAR_USERSPACE, + IDX_MSR_CSTAR_USERSPACE, + IDX_MSR_STAR_USERSPACE, + IDX_MSR_SF_MASK_USERSPACE, + IDX_MSR_KGSBASE_USERSPACE, + IDX_MSR_PAT_USERSPACE, + IDX_MSR_TSC_AUX_USERSPACE, + GUEST_MSR_NUM_USERSPACE /* must be the last enumeration */ +}; + +struct vmcs_userspace { + uint32_t identifier; + uint32_t abort_code; + char _impl_specific[PAGE_SIZE - sizeof(uint32_t) * 2]; +}; + +struct vmx_userspace { + struct vmcs_userspace vmcs[VM_MAXCPU]; /* one vmcs per virtual cpu */ + struct apic_page_userspace apic_page[VM_MAXCPU]; /* one apic page per vcpu */ + char msr_bitmap[PAGE_SIZE]; + uint64_t guest_msrs[VM_MAXCPU][GUEST_MSR_NUM_USERSPACE]; + struct vmxctx_userspace ctx[VM_MAXCPU]; + struct vmxcap_userspace cap[VM_MAXCPU]; + struct vmxstate_userspace state[VM_MAXCPU]; + uint64_t eptp; + struct vm_userspace *vm; + long eptgen[MAXCPU]; /* cached pmap->pm_eptgen */ +}; + +/* ####################### kernel structs copies ######################### */ + +struct vm_snapshot_device_info { + unsigned char ident; + unsigned char create_instance; + char *field_name; + char *type; + int index; + char *intern_arr_name; + void *field_data; + size_t data_size; + struct vm_snapshot_device_info *next_field; +}; + +struct list_device_info { + unsigned char ident; + unsigned char create_instance; + char *type; + const char *intern_arr_names[IDENT_LEVEL]; + int index; + int auto_index; + + struct vm_snapshot_device_info *first; + struct vm_snapshot_device_info *last; +}; + +#endif + enum vm_snapshot_op { VM_SNAPSHOT_SAVE, VM_SNAPSHOT_RESTORE, @@ -97,29 +457,126 @@ struct vm_snapshot_buffer buffer; +#ifdef JSON_SNAPSHOT_V2 + struct list_device_info dev_info_list; + unsigned char snapshot_kernel; +#endif + enum vm_snapshot_op op; + unsigned char version; }; +int vm_snapshot_save_fieldname(const char *fullname, volatile void *data, + char *type, size_t data_size, struct vm_snapshot_meta *meta); + +void vm_snapshot_add_intern_list(const char *arr_name, + struct vm_snapshot_meta *meta); +void vm_snapshot_remove_intern_list(struct vm_snapshot_meta *meta); + +void vm_snapshot_set_intern_arr_index(struct vm_snapshot_meta *meta, int index); +void vm_snapshot_clear_intern_arr_index(struct vm_snapshot_meta *meta); + +void vm_snapshot_activate_auto_index(struct vm_snapshot_meta *meta, + unsigned char create_instance); +void vm_snapshot_deactivate_auto_index(struct vm_snapshot_meta *meta); + +int vm_snapshot_save_fieldname_cmp(const char *fullname, volatile void *data, + char *type, size_t data_size, struct vm_snapshot_meta *meta); + + void vm_snapshot_buf_err(const char *bufname, const enum vm_snapshot_op op); int vm_snapshot_buf(volatile void *data, size_t data_size, struct vm_snapshot_meta *meta); size_t vm_get_snapshot_size(struct vm_snapshot_meta *meta); + int vm_snapshot_guest2host_addr(void **addrp, size_t len, bool restore_null, struct vm_snapshot_meta *meta); int vm_snapshot_buf_cmp(volatile void *data, size_t data_size, struct vm_snapshot_meta *meta); -#define SNAPSHOT_BUF_OR_LEAVE(DATA, LEN, META, RES, LABEL) \ -do { \ - (RES) = vm_snapshot_buf((DATA), (LEN), (META)); \ - if ((RES) != 0) { \ - vm_snapshot_buf_err(#DATA, (META)->op); \ - goto LABEL; \ - } \ +void check_and_set_non_array_type(char *type, struct vm_snapshot_meta *meta); + +#ifdef JSON_SNAPSHOT_V2 + +#define SNAPSHOT_ADD_INTERN_ARR(ARR_NAME, META) \ +do { \ + vm_snapshot_add_intern_list(#ARR_NAME, (META)); \ +} while (0) + +#define SNAPSHOT_REMOVE_INTERN_ARR(ARR_NAME, META) \ +do { \ + vm_snapshot_remove_intern_list((META)); \ +} while (0) + + +#define SNAPSHOT_SET_INTERN_ARR_INDEX(META, IDX) \ +do { \ + vm_snapshot_set_intern_arr_index((META), (IDX)); \ +} while (0) + +#define SNAPSHOT_CLEAR_INTERN_ARR_INDEX(META) \ +do { \ + vm_snapshot_clear_intern_arr_index((META)); \ +} while (0) + +/* + * Second parameter tells if the index will be used to + * create a new instance or just use it with the name of + * the key of the element + * 1 - create a new instance + * 0 - do not create a new instance + */ +#define SNAPSHOT_ACTIVATE_AUTO_INDEXING(META, create_instance) \ +do { \ + vm_snapshot_activate_auto_index((META), (create_instance)); \ } while (0) -#define SNAPSHOT_VAR_OR_LEAVE(DATA, META, RES, LABEL) \ - SNAPSHOT_BUF_OR_LEAVE(&(DATA), sizeof(DATA), (META), (RES), LABEL) +#define SNAPSHOT_DEACTIVATE_AUTO_INDEXING(META) \ +do { \ + vm_snapshot_deactivate_auto_index((META)); \ +} while (0) + +#define GET_TYPE(X) _Generic((X), \ + /* fixed sized types */ \ + int8_t: "int8", \ + uint8_t: "uint8", \ + int16_t: "int16", \ + uint16_t: "uint16", \ + int32_t: "int32", \ + uint32_t: "uint32", \ + int64_t: "int64", \ + uint64_t: "uint64", \ + default: "b64" \ +) + +#endif + +#define SNAPSHOT_BUF_OR_LEAVE(DATA, LEN, META, RES, LABEL) \ +do { \ + char *type; \ + type = GET_TYPE(DATA); \ + if ((META)->version == 2) { \ + (RES) = vm_snapshot_save_fieldname(#DATA, (DATA), type, (LEN), (META)); \ + if ((RES) != 0) { \ + vm_snapshot_buf_err(#DATA, (META)->op); \ + goto LABEL; \ + } \ + } else { \ + (RES) = vm_snapshot_buf((DATA), (LEN), (META)); \ + if ((RES) != 0) { \ + vm_snapshot_buf_err(#DATA, (META)->op); \ + goto LABEL; \ + } \ + } \ +} while (0) + +#define SNAPSHOT_VAR_OR_LEAVE(DATA, META, RES, LABEL) \ +do { \ + char *type; \ + type = GET_TYPE(DATA); \ + check_and_set_non_array_type(type, (META)); \ + SNAPSHOT_BUF_OR_LEAVE(&(DATA), sizeof(DATA), (META), (RES), LABEL); \ +} while (0) /* * Address variables are pointers to guest memory. @@ -128,25 +585,35 @@ * pointer NULL at restore time. */ #define SNAPSHOT_GUEST2HOST_ADDR_OR_LEAVE(ADDR, LEN, RNULL, META, RES, LABEL) \ -do { \ - (RES) = vm_snapshot_guest2host_addr((void **)&(ADDR), (LEN), (RNULL), \ - (META)); \ - if ((RES) != 0) { \ - if ((RES) == EFAULT) \ - fprintf(stderr, "%s: invalid address: %s\r\n", \ - __func__, #ADDR); \ - goto LABEL; \ - } \ +do { \ + (RES) = vm_snapshot_guest2host_addr((void **)&(ADDR), (LEN), \ + (RNULL), (META)); \ + if ((RES) != 0) { \ + if ((RES) == EFAULT) \ + fprintf(stderr, "%s: invalid address: %s\r\n", \ + __func__, #ADDR); \ + goto LABEL; \ + } \ } while (0) /* compare the value in the meta buffer with the data */ -#define SNAPSHOT_BUF_CMP_OR_LEAVE(DATA, LEN, META, RES, LABEL) \ -do { \ - (RES) = vm_snapshot_buf_cmp((DATA), (LEN), (META)); \ - if ((RES) != 0) { \ - vm_snapshot_buf_err(#DATA, (META)->op); \ - goto LABEL; \ - } \ +#define SNAPSHOT_BUF_CMP_OR_LEAVE(DATA, LEN, META, RES, LABEL) \ +do { \ + char *type; \ + type = GET_TYPE(DATA); \ + if ((META)->version == 2) { \ + (RES) = vm_snapshot_save_fieldname_cmp(#DATA, (DATA), type, (LEN), (META)); \ + if ((RES) != 0) { \ + vm_snapshot_buf_err(#DATA, (META)->op); \ + goto LABEL; \ + } \ + } else { \ + (RES) = vm_snapshot_buf_cmp((DATA), (LEN), (META)); \ + if ((RES) != 0) { \ + vm_snapshot_buf_err(#DATA, (META)->op); \ + goto LABEL; \ + } \ + } \ } while (0) #define SNAPSHOT_VAR_CMP_OR_LEAVE(DATA, META, RES, LABEL) \ Index: sys/amd64/vmm/vmm_snapshot.c =================================================================== --- sys/amd64/vmm/vmm_snapshot.c +++ sys/amd64/vmm/vmm_snapshot.c @@ -41,6 +41,35 @@ #include +int +vm_snapshot_save_fieldname(const char *fullname, volatile void *data, + char *type, size_t data_size, struct vm_snapshot_meta *meta) +{ + return vm_snapshot_buf(data, data_size, meta); +} + +int +vm_snapshot_save_fieldname_cmp(const char *fullname, volatile void *data, + char *type, size_t data_size, struct vm_snapshot_meta *meta) +{ + return vm_snapshot_buf_cmp(data, data_size, meta); +} + +void +vm_snapshot_add_intern_list(const char *arr_name, struct vm_snapshot_meta *meta) +{ +} + +void +vm_snapshot_remove_intern_list(struct vm_snapshot_meta *meta) +{ +} + +void +check_and_set_non_array_type(char *type, struct vm_snapshot_meta *meta) +{ +} + void vm_snapshot_buf_err(const char *bufname, const enum vm_snapshot_op op) { @@ -62,22 +91,39 @@ { struct vm_snapshot_buffer *buffer; int op; + int ds; void *nv_data; nv_data = __DEVOLATILE(void *, data); buffer = &meta->buffer; op = meta->op; - if (buffer->buf_rem < data_size) { + if (buffer->buf_rem < data_size + sizeof(int32_t)) { printf("%s: buffer too small\r\n", __func__); return (E2BIG); } - if (op == VM_SNAPSHOT_SAVE) + if (op == VM_SNAPSHOT_SAVE) { + if (meta->version == JSON_V2) { + copyout(&data_size, buffer->buf, sizeof(int32_t)); + buffer->buf += sizeof(int32_t); + buffer->buf_rem -= sizeof(int32_t); + } copyout(nv_data, buffer->buf, data_size); - else if (op == VM_SNAPSHOT_RESTORE) + } else if (op == VM_SNAPSHOT_RESTORE) { + if (meta->version == JSON_V2) { + ds = -1; + copyin(buffer->buf, &ds, sizeof(int32_t)); + if (ds != data_size) { + printf("%s(line %d): Size mismatch, expected %ld but got %d\r\n", + __func__, __LINE__, data_size, ds); + return (-1); + } + buffer->buf += sizeof(int32_t); + buffer->buf_rem -= sizeof(int32_t); + } copyin(buffer->buf, nv_data, data_size); - else + } else return (EINVAL); buffer->buf += data_size; @@ -112,12 +158,13 @@ struct vm_snapshot_buffer *buffer; int op; int ret; + int ds; void *_data = *(void **)(void *)&data; buffer = &meta->buffer; op = meta->op; - if (buffer->buf_rem < data_size) { + if (buffer->buf_rem < data_size + sizeof(int32_t)) { printf("%s: buffer too small\r\n", __func__); ret = E2BIG; goto done; @@ -125,8 +172,22 @@ if (op == VM_SNAPSHOT_SAVE) { ret = 0; + copyout(&data_size, buffer->buf, sizeof(int32_t)); + buffer->buf += sizeof(int32_t); + buffer->buf_rem -= sizeof(int32_t); copyout(_data, buffer->buf, data_size); } else if (op == VM_SNAPSHOT_RESTORE) { + if (meta->version == JSON_V2) { + ds = -1; + copyin(&ds, buffer->buf, sizeof(int32_t)); + if (ds != data_size) { + printf("%s(line %d): Size mismatch, expected %ld but got %d\r\n", + __func__, __LINE__, data_size, ds); + return (-1); + } + buffer->buf += sizeof(int32_t); + buffer->buf_rem -= sizeof(int32_t); + } ret = memcmp(_data, buffer->buf, data_size); } else { ret = EINVAL; Index: usr.sbin/bhyve/Makefile =================================================================== --- usr.sbin/bhyve/Makefile +++ usr.sbin/bhyve/Makefile @@ -78,6 +78,7 @@ .if ${MK_BHYVE_SNAPSHOT} != "no" SRCS+= snapshot.c +SRCS+= kern_snapshot.c .endif CFLAGS.kernemu_dev.c+= -I${SRCTOP}/sys/amd64 Index: usr.sbin/bhyve/atkbdc.c =================================================================== --- usr.sbin/bhyve/atkbdc.c +++ usr.sbin/bhyve/atkbdc.c @@ -33,9 +33,9 @@ #include #include +#include #include -#include #include #include @@ -585,12 +585,15 @@ SNAPSHOT_VAR_OR_LEAVE(atkbdc_sc->aux.irq_active, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(atkbdc_sc->aux.irq, meta, ret, done); + SNAPSHOT_ADD_INTERN_ARR(ps2kbd, meta); ret = ps2kbd_snapshot(atkbdc_sc->ps2kbd_sc, meta); if (ret != 0) goto done; + SNAPSHOT_REMOVE_INTERN_ARR(ps2kbd, meta); + SNAPSHOT_ADD_INTERN_ARR(ps2mouse, meta); ret = ps2mouse_snapshot(atkbdc_sc->ps2mouse_sc, meta); - + SNAPSHOT_REMOVE_INTERN_ARR(ps2kbd, meta); done: return (ret); } Index: usr.sbin/bhyve/kern_snapshot.c =================================================================== --- /dev/null +++ usr.sbin/bhyve/kern_snapshot.c @@ -0,0 +1,547 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * + * Copyright (c) 2016 Flavius Anton + * Copyright (c) 2016 Mihai Tiganus + * Copyright (c) 2016-2019 Mihai Carabas + * Copyright (c) 2017-2019 Darius Mihai + * Copyright (c) 2017-2019 Elena Mihailescu + * Copyright (c) 2018-2019 Sergiu Weisz + * Copyright (c) 2020-2021 Ionut Mihalache + * The bhyve-snapshot feature was developed under sponsorships + * from Matthew Grooms. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef JSON_SNAPSHOT_V2 +/* ################## kernel snapshot functions copies ##################### */ + +/* vhpet */ +int +vhpet_snapshot(struct vm_snapshot_meta *meta) +{ + struct vhpet_userspace *vhpet; + struct timer_userspace *timer; + int i, ret = 0; + + SNAPSHOT_VAR_OR_LEAVE(vhpet->freq_sbt, meta, ret, done); + SNAPSHOT_VAR_OR_LEAVE(vhpet->config, meta, ret, done); + SNAPSHOT_VAR_OR_LEAVE(vhpet->isr, meta, ret, done); + + SNAPSHOT_VAR_OR_LEAVE(vhpet->countbase, meta, ret, done); + + SNAPSHOT_ADD_INTERN_ARR(timers, meta); + for (i = 0; i < nitems(vhpet->timer); i++) { + timer = &vhpet->timer[i]; + SNAPSHOT_SET_INTERN_ARR_INDEX(meta, i); + + SNAPSHOT_VAR_OR_LEAVE(timer->cap_config, meta, ret, done); + SNAPSHOT_VAR_OR_LEAVE(timer->msireg, meta, ret, done); + SNAPSHOT_VAR_OR_LEAVE(timer->compval, meta, ret, done); + SNAPSHOT_VAR_OR_LEAVE(timer->comprate, meta, ret, done); + SNAPSHOT_VAR_OR_LEAVE(timer->callout_sbt, meta, ret, done); + } + SNAPSHOT_CLEAR_INTERN_ARR_INDEX(meta); + SNAPSHOT_REMOVE_INTERN_ARR(timers, meta); + +done: + return (ret); +} + + +/* vioapic */ +int +vioapic_snapshot(struct vm_snapshot_meta *meta) +{ + struct rtbl_userspace *rtbl; + struct vioapic_userspace *vioapic; + int ret; + int i; + + SNAPSHOT_VAR_OR_LEAVE(vioapic->ioregsel, meta, ret, done); + + SNAPSHOT_ADD_INTERN_ARR(rtbls, meta); + for (i = 0; i < nitems(vioapic->rtbl); i++) { + rtbl = &vioapic->rtbl[i]; + SNAPSHOT_SET_INTERN_ARR_INDEX(meta, i); + + SNAPSHOT_VAR_OR_LEAVE(rtbl->reg, meta, ret, done); + SNAPSHOT_VAR_OR_LEAVE(rtbl->acnt, meta, ret, done); + } + SNAPSHOT_CLEAR_INTERN_ARR_INDEX(meta); + SNAPSHOT_REMOVE_INTERN_ARR(rtbls, meta); + +done: + return (ret); +} + +/* vm (vcpus) */ +static int +vm_snapshot_vcpus(struct vm_userspace *vm, struct vm_snapshot_meta *meta) +{ + int ret; + int i; + struct vcpu_userspace *vcpu; + + SNAPSHOT_ADD_INTERN_ARR(vcpus, meta); + for (i = 0; i < VM_MAXCPU; i++) { + vcpu = &vm->vcpu[i]; + SNAPSHOT_SET_INTERN_ARR_INDEX(meta, i); + + SNAPSHOT_VAR_OR_LEAVE(vcpu->x2apic_state, meta, ret, done); + SNAPSHOT_VAR_OR_LEAVE(vcpu->exitintinfo, meta, ret, done); + SNAPSHOT_VAR_OR_LEAVE(vcpu->exc_vector, meta, ret, done); + SNAPSHOT_VAR_OR_LEAVE(vcpu->exc_errcode_valid, meta, ret, done); + SNAPSHOT_VAR_OR_LEAVE(vcpu->exc_errcode, meta, ret, done); + SNAPSHOT_VAR_OR_LEAVE(vcpu->guest_xcr0, meta, ret, done); + SNAPSHOT_VAR_OR_LEAVE(vcpu->exitinfo, meta, ret, done); + SNAPSHOT_VAR_OR_LEAVE(vcpu->nextrip, meta, ret, done); + + SNAPSHOT_VAR_OR_LEAVE(vcpu->tsc_offset, meta, ret, done); + } + SNAPSHOT_CLEAR_INTERN_ARR_INDEX(meta); + SNAPSHOT_REMOVE_INTERN_ARR(vcpus, meta); + +done: + return (ret); +} + +int +vm_snapshot_vm(struct vm_snapshot_meta *meta) +{ + int ret; + struct vm_userspace *vm; + + ret = 0; + + ret = vm_snapshot_vcpus(vm, meta); + if (ret != 0) { + printf("%s: failed to copy vm data to user buffer", __func__); + goto done; + } + +done: + return (ret); +} + +/* vlapic */ +int +vlapic_snapshot(struct vm_snapshot_meta *meta) +{ + int i, ret; + struct vlapic_userspace *vlapic; + uint32_t ccr; + + ret = 0; + + SNAPSHOT_ADD_INTERN_ARR(vlapic, meta); + for (i = 0; i < VM_MAXCPU; i++) { + SNAPSHOT_SET_INTERN_ARR_INDEX(meta, i); + + /* snapshot the page first; timer period depends on icr_timer */ + SNAPSHOT_BUF_OR_LEAVE(vlapic->apic_page, PAGE_SIZE, meta, ret, done); + + SNAPSHOT_VAR_OR_LEAVE(vlapic->esr_pending, meta, ret, done); + + SNAPSHOT_VAR_OR_LEAVE(vlapic->timer_freq_bt.sec, + meta, ret, done); + SNAPSHOT_VAR_OR_LEAVE(vlapic->timer_freq_bt.frac, + meta, ret, done); + + SNAPSHOT_BUF_OR_LEAVE(vlapic->isrvec_stk, + sizeof(vlapic->isrvec_stk), + meta, ret, done); + SNAPSHOT_VAR_OR_LEAVE(vlapic->isrvec_stk_top, meta, ret, done); + SNAPSHOT_VAR_OR_LEAVE(vlapic->boot_state, meta, ret, done); + + SNAPSHOT_BUF_OR_LEAVE(vlapic->lvt_last, + sizeof(vlapic->lvt_last), + meta, ret, done); + + SNAPSHOT_VAR_OR_LEAVE(ccr, meta, ret, done); + } + SNAPSHOT_CLEAR_INTERN_ARR_INDEX(meta); + SNAPSHOT_REMOVE_INTERN_ARR(vlapic, meta); + +done: + return (ret); +} + +/* vatpic */ +int +vatpic_snapshot(struct vm_snapshot_meta *meta) +{ + int ret; + int i; + struct atpic_userspace *atpic; + struct vatpic_userspace *vatpic; + + SNAPSHOT_ADD_INTERN_ARR(atpic, meta); + for (i = 0; i < nitems(vatpic->atpic); i++) { + atpic = &vatpic->atpic[i]; + SNAPSHOT_SET_INTERN_ARR_INDEX(meta, i); + + SNAPSHOT_VAR_OR_LEAVE(atpic->ready, meta, ret, done); + SNAPSHOT_VAR_OR_LEAVE(atpic->icw_num, meta, ret, done); + SNAPSHOT_VAR_OR_LEAVE(atpic->rd_cmd_reg, meta, ret, done); + + SNAPSHOT_VAR_OR_LEAVE(atpic->aeoi, meta, ret, done); + SNAPSHOT_VAR_OR_LEAVE(atpic->poll, meta, ret, done); + SNAPSHOT_VAR_OR_LEAVE(atpic->rotate, meta, ret, done); + SNAPSHOT_VAR_OR_LEAVE(atpic->sfn, meta, ret, done); + SNAPSHOT_VAR_OR_LEAVE(atpic->irq_base, meta, ret, done); + SNAPSHOT_VAR_OR_LEAVE(atpic->request, meta, ret, done); + SNAPSHOT_VAR_OR_LEAVE(atpic->service, meta, ret, done); + SNAPSHOT_VAR_OR_LEAVE(atpic->mask, meta, ret, done); + SNAPSHOT_VAR_OR_LEAVE(atpic->smm, meta, ret, done); + + SNAPSHOT_BUF_OR_LEAVE(atpic->acnt, sizeof(atpic->acnt), + meta, ret, done); + SNAPSHOT_VAR_OR_LEAVE(atpic->lowprio, meta, ret, done); + SNAPSHOT_VAR_OR_LEAVE(atpic->intr_raised, meta, ret, done); + } + SNAPSHOT_CLEAR_INTERN_ARR_INDEX(meta); + SNAPSHOT_REMOVE_INTERN_ARR(atpic, meta); + + SNAPSHOT_BUF_OR_LEAVE(vatpic->elc, sizeof(vatpic->elc), + meta, ret, done); + +done: + return (ret); +} + +/* vatpit */ +int +vatpit_snapshot(struct vm_snapshot_meta *meta) +{ + int ret; + int i; + struct channel_userspace *channel; + struct vatpit_userspace *vatpit; + + SNAPSHOT_VAR_OR_LEAVE(vatpit->freq_bt.sec, meta, ret, done); + SNAPSHOT_VAR_OR_LEAVE(vatpit->freq_bt.frac, meta, ret, done); + + SNAPSHOT_ADD_INTERN_ARR(channels, meta); + for (i = 0; i < nitems(vatpit->channel); i++) { + channel = &vatpit->channel[i]; + SNAPSHOT_SET_INTERN_ARR_INDEX(meta, i); + + SNAPSHOT_VAR_OR_LEAVE(channel->mode, meta, ret, done); + SNAPSHOT_VAR_OR_LEAVE(channel->initial, meta, ret, done); + SNAPSHOT_VAR_OR_LEAVE(channel->now_bt.sec, meta, ret, done); + SNAPSHOT_VAR_OR_LEAVE(channel->now_bt.frac, meta, ret, done); + SNAPSHOT_BUF_OR_LEAVE(channel->cr, sizeof(channel->cr), + meta, ret, done); + SNAPSHOT_BUF_OR_LEAVE(channel->ol, sizeof(channel->ol), + meta, ret, done); + SNAPSHOT_VAR_OR_LEAVE(channel->slatched, meta, ret, done); + SNAPSHOT_VAR_OR_LEAVE(channel->status, meta, ret, done); + SNAPSHOT_VAR_OR_LEAVE(channel->crbyte, meta, ret, done); + SNAPSHOT_VAR_OR_LEAVE(channel->frbyte, meta, ret, done); + SNAPSHOT_VAR_OR_LEAVE(channel->callout_bt.sec, meta, ret, done); + SNAPSHOT_VAR_OR_LEAVE(channel->callout_bt.frac, meta, ret, + done); + } + SNAPSHOT_CLEAR_INTERN_ARR_INDEX(meta); + SNAPSHOT_REMOVE_INTERN_ARR(channels, meta); + +done: + return (ret); +} + +/* vmptmr */ +int +vpmtmr_snapshot(struct vm_snapshot_meta *meta) +{ + int ret; + struct vpmtmr_userspace *vpmtmr; + + SNAPSHOT_VAR_OR_LEAVE(vpmtmr->baseval, meta, ret, done); + +done: + return (ret); +} + +/* vrtc */ +int +vrtc_snapshot(struct vm_snapshot_meta *meta) +{ + int ret; + struct vrtc_userspace *vrtc; + + SNAPSHOT_VAR_OR_LEAVE(vrtc->addr, meta, ret, done); + SNAPSHOT_VAR_OR_LEAVE(vrtc->base_rtctime, meta, ret, done); + + SNAPSHOT_VAR_OR_LEAVE(vrtc->rtcdev.sec, meta, ret, done); + SNAPSHOT_VAR_OR_LEAVE(vrtc->rtcdev.alarm_sec, meta, ret, done); + SNAPSHOT_VAR_OR_LEAVE(vrtc->rtcdev.min, meta, ret, done); + SNAPSHOT_VAR_OR_LEAVE(vrtc->rtcdev.alarm_min, meta, ret, done); + SNAPSHOT_VAR_OR_LEAVE(vrtc->rtcdev.hour, meta, ret, done); + SNAPSHOT_VAR_OR_LEAVE(vrtc->rtcdev.alarm_hour, meta, ret, done); + SNAPSHOT_VAR_OR_LEAVE(vrtc->rtcdev.day_of_week, meta, ret, done); + SNAPSHOT_VAR_OR_LEAVE(vrtc->rtcdev.day_of_month, meta, ret, done); + SNAPSHOT_VAR_OR_LEAVE(vrtc->rtcdev.month, meta, ret, done); + SNAPSHOT_VAR_OR_LEAVE(vrtc->rtcdev.year, meta, ret, done); + SNAPSHOT_VAR_OR_LEAVE(vrtc->rtcdev.reg_a, meta, ret, done); + SNAPSHOT_VAR_OR_LEAVE(vrtc->rtcdev.reg_b, meta, ret, done); + SNAPSHOT_VAR_OR_LEAVE(vrtc->rtcdev.reg_c, meta, ret, done); + SNAPSHOT_VAR_OR_LEAVE(vrtc->rtcdev.reg_d, meta, ret, done); + SNAPSHOT_BUF_OR_LEAVE(vrtc->rtcdev.nvram, sizeof(vrtc->rtcdev.nvram), + meta, ret, done); + SNAPSHOT_VAR_OR_LEAVE(vrtc->rtcdev.century, meta, ret, done); + SNAPSHOT_BUF_OR_LEAVE(vrtc->rtcdev.nvram2, sizeof(vrtc->rtcdev.nvram2), + meta, ret, done); + +done: + return (ret); +} + +/* vmx */ +int +vmx_snapshot(struct vm_snapshot_meta *meta) +{ + struct vmx_userspace *vmx; + struct vmxctx_userspace *vmxctx; + int i; + uint64_t *guest_msrs; + int ret; + + SNAPSHOT_ADD_INTERN_ARR(vmx, meta); + for (i = 0; i < VM_MAXCPU; i++) { + guest_msrs = vmx->guest_msrs[i]; + SNAPSHOT_SET_INTERN_ARR_INDEX(meta, i); + SNAPSHOT_BUF_OR_LEAVE(guest_msrs, + sizeof(vmx->guest_msrs[i]), meta, ret, done); + + SNAPSHOT_CLEAR_INTERN_ARR_INDEX(meta); + SNAPSHOT_ADD_INTERN_ARR(guest_registers, meta); + vmxctx = &vmx->ctx[i]; + SNAPSHOT_VAR_OR_LEAVE(vmxctx->guest_rdi, meta, ret, done); + SNAPSHOT_VAR_OR_LEAVE(vmxctx->guest_rsi, meta, ret, done); + SNAPSHOT_VAR_OR_LEAVE(vmxctx->guest_rdx, meta, ret, done); + SNAPSHOT_VAR_OR_LEAVE(vmxctx->guest_rcx, meta, ret, done); + SNAPSHOT_VAR_OR_LEAVE(vmxctx->guest_r8, meta, ret, done); + SNAPSHOT_VAR_OR_LEAVE(vmxctx->guest_r9, meta, ret, done); + SNAPSHOT_VAR_OR_LEAVE(vmxctx->guest_rax, meta, ret, done); + SNAPSHOT_VAR_OR_LEAVE(vmxctx->guest_rbx, meta, ret, done); + SNAPSHOT_VAR_OR_LEAVE(vmxctx->guest_rbp, meta, ret, done); + SNAPSHOT_VAR_OR_LEAVE(vmxctx->guest_r10, meta, ret, done); + SNAPSHOT_VAR_OR_LEAVE(vmxctx->guest_r11, meta, ret, done); + SNAPSHOT_VAR_OR_LEAVE(vmxctx->guest_r12, meta, ret, done); + SNAPSHOT_VAR_OR_LEAVE(vmxctx->guest_r13, meta, ret, done); + SNAPSHOT_VAR_OR_LEAVE(vmxctx->guest_r14, meta, ret, done); + SNAPSHOT_VAR_OR_LEAVE(vmxctx->guest_r15, meta, ret, done); + SNAPSHOT_VAR_OR_LEAVE(vmxctx->guest_cr2, meta, ret, done); + SNAPSHOT_VAR_OR_LEAVE(vmxctx->guest_dr0, meta, ret, done); + SNAPSHOT_VAR_OR_LEAVE(vmxctx->guest_dr1, meta, ret, done); + SNAPSHOT_VAR_OR_LEAVE(vmxctx->guest_dr2, meta, ret, done); + SNAPSHOT_VAR_OR_LEAVE(vmxctx->guest_dr3, meta, ret, done); + SNAPSHOT_VAR_OR_LEAVE(vmxctx->guest_dr6, meta, ret, done); + SNAPSHOT_REMOVE_INTERN_ARR(guest_registers, meta); + } + SNAPSHOT_CLEAR_INTERN_ARR_INDEX(meta); + SNAPSHOT_REMOVE_INTERN_ARR(vmx, meta); + +done: + return (ret); +} + +/* vmcx */ +static int +vmcs_snapshot_desc(struct vm_snapshot_meta *meta) +{ + int ret; + struct seg_desc desc; + + SNAPSHOT_VAR_OR_LEAVE(desc.base, meta, ret, done); + SNAPSHOT_VAR_OR_LEAVE(desc.limit, meta, ret, done); + SNAPSHOT_VAR_OR_LEAVE(desc.access, meta, ret, done); + +done: + return (ret); +} + +int +vmx_vmcx_snapshot(struct vm_snapshot_meta *meta) +{ + struct vmcs_userspace *vmcs; + struct vmx_userspace *vmx; + int err, i; + uint64_t vm_reg_guest_cr0, vm_reg_guest_cr3, vm_reg_guest_cr4; + uint64_t vm_reg_guest_dr7, vm_reg_guest_rsp, vm_reg_guest_rip; + uint64_t vm_reg_guest_rflags; + + uint64_t vm_reg_guest_es, vm_reg_guest_cs, vm_reg_guest_ss, vm_reg_guest_ds; + uint64_t vm_reg_guest_fs, vm_reg_guest_gs, vm_reg_guest_tr; + uint64_t vm_reg_guest_ldtr, vm_reg_guest_efer; + + uint64_t vm_reg_guest_pdpte0, vm_reg_guest_pdpte1; + uint64_t vm_reg_guest_pdpte2, vm_reg_guest_pdpte3; + + uint64_t vmcs_guest_ia32_sysenter_cs, vmcs_guest_ia32_sysenter_esp; + uint64_t vmcs_guest_ia32_sysenter_eip, vmcs_guest_interruptibility; + uint64_t vmcs_guest_activity, vmcs_entry_ctls, vmcs_exit_ctls; + + SNAPSHOT_ADD_INTERN_ARR(vcpu, meta); + for (i = 0; i < VM_MAXCPU; i++) { + SNAPSHOT_SET_INTERN_ARR_INDEX(meta, i); + err = 0; + + vmcs = &vmx->vmcs[i]; + + vm_reg_guest_cr0 = VM_REG_GUEST_CR0; + vm_reg_guest_cr3 = VM_REG_GUEST_CR3; + vm_reg_guest_cr4 = VM_REG_GUEST_CR4; + vm_reg_guest_dr7 = VM_REG_GUEST_DR7; + vm_reg_guest_rsp = VM_REG_GUEST_RSP; + vm_reg_guest_rip = VM_REG_GUEST_RIP; + vm_reg_guest_rflags = VM_REG_GUEST_RFLAGS; + + SNAPSHOT_VAR_OR_LEAVE(vm_reg_guest_cr0, meta, err, done); + SNAPSHOT_VAR_OR_LEAVE(vm_reg_guest_cr3, meta, err, done); + SNAPSHOT_VAR_OR_LEAVE(vm_reg_guest_cr4, meta, err, done); + SNAPSHOT_VAR_OR_LEAVE(vm_reg_guest_dr7, meta, err, done); + SNAPSHOT_VAR_OR_LEAVE(vm_reg_guest_rsp, meta, err, done); + SNAPSHOT_VAR_OR_LEAVE(vm_reg_guest_rip, meta, err, done); + SNAPSHOT_VAR_OR_LEAVE(vm_reg_guest_rflags, meta, err, done); + + SNAPSHOT_CLEAR_INTERN_ARR_INDEX(meta); + + /* Guest segments */ + SNAPSHOT_ADD_INTERN_ARR(guest_segments, meta); + + vm_reg_guest_es = VM_REG_GUEST_ES; + vm_reg_guest_cs = VM_REG_GUEST_CS; + vm_reg_guest_ss = VM_REG_GUEST_SS; + vm_reg_guest_ds = VM_REG_GUEST_DS; + vm_reg_guest_fs = VM_REG_GUEST_FS; + vm_reg_guest_gs = VM_REG_GUEST_GS; + vm_reg_guest_tr = VM_REG_GUEST_TR; + vm_reg_guest_ldtr = VM_REG_GUEST_LDTR; + vm_reg_guest_efer = VM_REG_GUEST_EFER; + + SNAPSHOT_VAR_OR_LEAVE(vm_reg_guest_es, meta, err, done); + SNAPSHOT_ADD_INTERN_ARR(es_desc, meta); + err += vmcs_snapshot_desc(meta); + SNAPSHOT_REMOVE_INTERN_ARR(es_desc, meta); + + SNAPSHOT_VAR_OR_LEAVE(vm_reg_guest_cs, meta, err, done); + SNAPSHOT_ADD_INTERN_ARR(cs_desc, meta); + err += vmcs_snapshot_desc(meta); + SNAPSHOT_REMOVE_INTERN_ARR(cs_desc, meta); + + SNAPSHOT_VAR_OR_LEAVE(vm_reg_guest_ss, meta, err, done); + SNAPSHOT_ADD_INTERN_ARR(ss_desc, meta); + err += vmcs_snapshot_desc(meta); + SNAPSHOT_REMOVE_INTERN_ARR(ss_desc, meta); + + SNAPSHOT_VAR_OR_LEAVE(vm_reg_guest_ds, meta, err, done); + SNAPSHOT_ADD_INTERN_ARR(ds_desc, meta); + err += vmcs_snapshot_desc(meta); + SNAPSHOT_REMOVE_INTERN_ARR(ds_desc, meta); + + SNAPSHOT_VAR_OR_LEAVE(vm_reg_guest_fs, meta, err, done); + SNAPSHOT_ADD_INTERN_ARR(fs_desc, meta); + err += vmcs_snapshot_desc(meta); + SNAPSHOT_REMOVE_INTERN_ARR(fs_desc, meta); + + SNAPSHOT_VAR_OR_LEAVE(vm_reg_guest_gs, meta, err, done); + SNAPSHOT_ADD_INTERN_ARR(gs_desc, meta); + err += vmcs_snapshot_desc(meta); + SNAPSHOT_REMOVE_INTERN_ARR(gs_desc, meta); + + SNAPSHOT_VAR_OR_LEAVE(vm_reg_guest_tr, meta, err, done); + SNAPSHOT_ADD_INTERN_ARR(tr_desc, meta); + err += vmcs_snapshot_desc(meta); + SNAPSHOT_REMOVE_INTERN_ARR(tr_desc, meta); + + SNAPSHOT_VAR_OR_LEAVE(vm_reg_guest_ldtr, meta, err, done); + SNAPSHOT_ADD_INTERN_ARR(ldtr_desc, meta); + err += vmcs_snapshot_desc(meta); + SNAPSHOT_REMOVE_INTERN_ARR(ldtr_desc, meta); + + SNAPSHOT_VAR_OR_LEAVE(vm_reg_guest_efer, meta, err, done); + + SNAPSHOT_ADD_INTERN_ARR(efer_desc, meta); + err += vmcs_snapshot_desc(meta); + SNAPSHOT_REMOVE_INTERN_ARR(efer_desc, meta); + + err += vmcs_snapshot_desc(meta); + + SNAPSHOT_REMOVE_INTERN_ARR(guest_segments, meta); + + /* Guest page tables */ + vm_reg_guest_pdpte0 = VM_REG_GUEST_PDPTE0; + vm_reg_guest_pdpte1 = VM_REG_GUEST_PDPTE1; + vm_reg_guest_pdpte2 = VM_REG_GUEST_PDPTE2; + vm_reg_guest_pdpte3 = VM_REG_GUEST_PDPTE3; + + SNAPSHOT_ADD_INTERN_ARR(guest_page_tables, meta); + SNAPSHOT_VAR_OR_LEAVE(vm_reg_guest_pdpte0, meta, err, done); + SNAPSHOT_VAR_OR_LEAVE(vm_reg_guest_pdpte1, meta, err, done); + SNAPSHOT_VAR_OR_LEAVE(vm_reg_guest_pdpte2, meta, err, done); + SNAPSHOT_VAR_OR_LEAVE(vm_reg_guest_pdpte3, meta, err, done); + SNAPSHOT_REMOVE_INTERN_ARR(guest_page_tables, meta); + + /* Other guest state */ + vmcs_guest_ia32_sysenter_cs = VMCS_GUEST_IA32_SYSENTER_CS; + vmcs_guest_ia32_sysenter_esp = VMCS_GUEST_IA32_SYSENTER_ESP; + vmcs_guest_ia32_sysenter_eip = VMCS_GUEST_IA32_SYSENTER_EIP; + vmcs_guest_interruptibility = VMCS_GUEST_INTERRUPTIBILITY; + vmcs_guest_activity = VMCS_GUEST_ACTIVITY; + vmcs_entry_ctls = VMCS_ENTRY_CTLS; + vmcs_exit_ctls = VMCS_EXIT_CTLS; + + SNAPSHOT_SET_INTERN_ARR_INDEX(meta, i); + SNAPSHOT_VAR_OR_LEAVE(vmcs_guest_ia32_sysenter_cs, meta, err, done); + SNAPSHOT_VAR_OR_LEAVE(vmcs_guest_ia32_sysenter_esp, meta, err, done); + SNAPSHOT_VAR_OR_LEAVE(vmcs_guest_ia32_sysenter_eip, meta, err, done); + SNAPSHOT_VAR_OR_LEAVE(vmcs_guest_interruptibility, meta, err, done); + SNAPSHOT_VAR_OR_LEAVE(vmcs_guest_activity, meta, err, done); + SNAPSHOT_VAR_OR_LEAVE(vmcs_entry_ctls, meta, err, done); + SNAPSHOT_VAR_OR_LEAVE(vmcs_exit_ctls, meta, err, done); + } + SNAPSHOT_CLEAR_INTERN_ARR_INDEX(meta); + SNAPSHOT_REMOVE_INTERN_ARR(vcpu, meta); + +done: + return (err); +} + +/* ################## kernel snapshot functions copies ##################### */ + +#endif + Index: usr.sbin/bhyve/pci_ahci.c =================================================================== --- usr.sbin/bhyve/pci_ahci.c +++ usr.sbin/bhyve/pci_ahci.c @@ -1006,7 +1006,7 @@ ata_ident->capabilities1 = ATA_SUPPORT_LBA | ATA_SUPPORT_DMA; ata_ident->capabilities2 = (1 << 14 | 1); - ata_ident->atavalid = ATA_FLAG_64_70 | ATA_FLAG_88; + ata_ident->atavalid = ATA_FLAG_54_58 | ATA_FLAG_64_70; ata_ident->obsolete62 = 0x3f; ata_ident->mwdmamodes = 7; if (p->xfermode & ATA_WDMA0) @@ -1055,7 +1055,8 @@ ata_ident->capabilities1 = ATA_SUPPORT_DMA | ATA_SUPPORT_LBA | ATA_SUPPORT_IORDY; ata_ident->capabilities2 = (1 << 14); - ata_ident->atavalid = ATA_FLAG_64_70 | ATA_FLAG_88; + ata_ident->atavalid = ATA_FLAG_54_58 | + ATA_FLAG_64_70; if (p->mult_sectors) ata_ident->multi = (ATA_MULTI_VALID | p->mult_sectors); if (sectors <= 0x0fffffff) { @@ -2590,9 +2591,12 @@ SNAPSHOT_VAR_OR_LEAVE(sc->bohc, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(sc->lintr, meta, ret, done); + SNAPSHOT_ADD_INTERN_ARR(ports, meta); for (i = 0; i < MAX_PORTS; i++) { port = &sc->port[i]; + SNAPSHOT_SET_INTERN_ARR_INDEX(meta, i); + if (meta->op == VM_SNAPSHOT_SAVE) bctx = port->bctx; @@ -2618,11 +2622,17 @@ goto done; } + SNAPSHOT_ADD_INTERN_ARR(port_cmd_lst_and_rfis, meta); + SNAPSHOT_ACTIVATE_AUTO_INDEXING(meta, 0); + SNAPSHOT_GUEST2HOST_ADDR_OR_LEAVE(port->cmd_lst, AHCI_CL_SIZE * AHCI_MAX_SLOTS, false, meta, ret, done); SNAPSHOT_GUEST2HOST_ADDR_OR_LEAVE(port->rfis, 256, false, meta, ret, done); + SNAPSHOT_DEACTIVATE_AUTO_INDEXING(meta); + SNAPSHOT_REMOVE_INTERN_ARR(port_cmd_lst_and_rfis, meta); + SNAPSHOT_VAR_OR_LEAVE(port->ata_ident, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(port->atapi, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(port->reset, meta, ret, done); @@ -2652,9 +2662,8 @@ SNAPSHOT_VAR_OR_LEAVE(port->sntf, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(port->fbs, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(port->ioqsz, meta, ret, done); - - assert(TAILQ_EMPTY(&port->iobhd)); } + SNAPSHOT_REMOVE_INTERN_ARR(ports, meta); done: return (ret); Index: usr.sbin/bhyve/pci_e82545.c =================================================================== --- usr.sbin/bhyve/pci_e82545.c +++ usr.sbin/bhyve/pci_e82545.c @@ -46,6 +46,9 @@ #ifndef WITHOUT_CAPSICUM #include #endif + +#include +#include #include #include Index: usr.sbin/bhyve/pci_emul.c =================================================================== --- usr.sbin/bhyve/pci_emul.c +++ usr.sbin/bhyve/pci_emul.c @@ -2274,6 +2274,8 @@ pci_snapshot_pci_dev(struct vm_snapshot_meta *meta) { struct pci_devinst *pi; + struct pcibar *pb; + struct msix_table_entry *mte; int i; int ret; @@ -2296,21 +2298,28 @@ SNAPSHOT_BUF_OR_LEAVE(pi->pi_cfgdata, sizeof(pi->pi_cfgdata), meta, ret, done); + SNAPSHOT_ADD_INTERN_ARR(pi_bars, meta); for (i = 0; i < nitems(pi->pi_bar); i++) { - SNAPSHOT_VAR_OR_LEAVE(pi->pi_bar[i].type, meta, ret, done); - SNAPSHOT_VAR_OR_LEAVE(pi->pi_bar[i].size, meta, ret, done); - SNAPSHOT_VAR_OR_LEAVE(pi->pi_bar[i].addr, meta, ret, done); + pb = &(pi->pi_bar[i]); + SNAPSHOT_SET_INTERN_ARR_INDEX(meta, i); + SNAPSHOT_VAR_OR_LEAVE(pb->type, meta, ret, done); + SNAPSHOT_VAR_OR_LEAVE(pb->size, meta, ret, done); + SNAPSHOT_VAR_OR_LEAVE(pb->addr, meta, ret, done); } + SNAPSHOT_CLEAR_INTERN_ARR_INDEX(meta); + SNAPSHOT_REMOVE_INTERN_ARR(pi_bars, meta); /* Restore MSI-X table. */ + SNAPSHOT_ADD_INTERN_ARR(pi_msix_table, meta); for (i = 0; i < pi->pi_msix.table_count; i++) { - SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.table[i].addr, - meta, ret, done); - SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.table[i].msg_data, - meta, ret, done); - SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.table[i].vector_control, - meta, ret, done); - } + mte = &(pi->pi_msix.table[i]); + SNAPSHOT_SET_INTERN_ARR_INDEX(meta, i); + SNAPSHOT_VAR_OR_LEAVE(mte->addr, meta, ret, done); + SNAPSHOT_VAR_OR_LEAVE(mte->msg_data, meta, ret, done); + SNAPSHOT_VAR_OR_LEAVE(mte->vector_control, meta, ret, done); + } + SNAPSHOT_CLEAR_INTERN_ARR_INDEX(meta); + SNAPSHOT_REMOVE_INTERN_ARR(pi_msix_table, meta); done: return (ret); Index: usr.sbin/bhyve/pci_fbuf.c =================================================================== --- usr.sbin/bhyve/pci_fbuf.c +++ usr.sbin/bhyve/pci_fbuf.c @@ -35,8 +35,8 @@ #include #include -#include #include +#include #include #include Index: usr.sbin/bhyve/pci_lpc.c =================================================================== --- usr.sbin/bhyve/pci_lpc.c +++ usr.sbin/bhyve/pci_lpc.c @@ -34,6 +34,7 @@ #include #include +#include #include #include @@ -41,7 +42,6 @@ #include #include -#include #include "acpi.h" #include "debug.h" @@ -150,7 +150,6 @@ printf("bootrom\n"); for (i = 0; i < LPC_UART_NUM; i++) printf("%s\n", lpc_uart_names[i]); - printf("%s\n", pctestdev_getname()); } const char * @@ -502,13 +501,17 @@ int unit, ret; struct uart_softc *sc; + SNAPSHOT_ADD_INTERN_ARR(uart, meta); for (unit = 0; unit < LPC_UART_NUM; unit++) { sc = lpc_uart_softc[unit].uart_softc; + SNAPSHOT_SET_INTERN_ARR_INDEX(meta, unit); ret = uart_snapshot(sc, meta); if (ret != 0) goto done; } + SNAPSHOT_CLEAR_INTERN_ARR_INDEX(meta); + SNAPSHOT_REMOVE_INTERN_ARR(uart, meta); done: return (ret); Index: usr.sbin/bhyve/pci_xhci.c =================================================================== --- usr.sbin/bhyve/pci_xhci.c +++ usr.sbin/bhyve/pci_xhci.c @@ -3033,6 +3033,7 @@ SNAPSHOT_VAR_OR_LEAVE(sc->regsend, meta, ret, done); /* opregs */ + SNAPSHOT_ADD_INTERN_ARR(opregs, meta); SNAPSHOT_VAR_OR_LEAVE(sc->opregs.usbcmd, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(sc->opregs.usbsts, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(sc->opregs.pgsz, meta, ret, done); @@ -3073,8 +3074,10 @@ SNAPSHOT_VAR_OR_LEAVE(sc->rtsregs.er_enq_seg, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(sc->rtsregs.er_events_cnt, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(sc->rtsregs.event_pcs, meta, ret, done); + SNAPSHOT_REMOVE_INTERN_ARR(opregs, meta); /* sanity checking */ + SNAPSHOT_ADD_INTERN_ARR(sanity_checks, meta); for (i = 1; i <= XHCI_MAX_DEVS; i++) { dev = XHCI_DEVINST_PTR(sc, i); if (dev == NULL) @@ -3111,8 +3114,10 @@ } } } + SNAPSHOT_REMOVE_INTERN_ARR(sanity_checks, meta); /* portregs */ + SNAPSHOT_ADD_INTERN_ARR(portregs, meta); for (i = 1; i <= XHCI_MAX_DEVS; i++) { port = XHCI_PORTREG_PTR(sc, i); dev = XHCI_DEVINST_PTR(sc, i); @@ -3125,8 +3130,10 @@ SNAPSHOT_VAR_OR_LEAVE(port->portli, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(port->porthlpmc, meta, ret, done); } + SNAPSHOT_REMOVE_INTERN_ARR(portregs, meta); /* slots */ + SNAPSHOT_ADD_INTERN_ARR(slots, meta); if (meta->op == VM_SNAPSHOT_SAVE) pci_xhci_map_devs_slots(sc, maps); @@ -3171,6 +3178,7 @@ SNAPSHOT_VAR_OR_LEAVE(dev->hci.hci_address, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(dev->hci.hci_port, meta, ret, done); } + SNAPSHOT_REMOVE_INTERN_ARR(slots, meta); SNAPSHOT_VAR_OR_LEAVE(sc->usb2_port_start, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(sc->usb3_port_start, meta, ret, done); Index: usr.sbin/bhyve/ps2kbd.c =================================================================== --- usr.sbin/bhyve/ps2kbd.c +++ usr.sbin/bhyve/ps2kbd.c @@ -33,6 +33,8 @@ #include #include +#include +#include #include #include Index: usr.sbin/bhyve/ps2mouse.c =================================================================== --- usr.sbin/bhyve/ps2mouse.c +++ usr.sbin/bhyve/ps2mouse.c @@ -32,6 +32,8 @@ #include +#include +#include #include #include Index: usr.sbin/bhyve/snapshot.h =================================================================== --- usr.sbin/bhyve/snapshot.h +++ usr.sbin/bhyve/snapshot.h @@ -45,6 +45,12 @@ #define BHYVE_RUN_DIR "/var/run/bhyve/" #define MAX_SNAPSHOT_FILENAME PATH_MAX +#ifndef JSON_SNAPSHPT_V2 + +#define JSON_SNAPSHOT_V2 + +#endif + struct vmctx; struct restore_state { @@ -76,11 +82,23 @@ vm_resume_dev_cb resume_cb; /* callback for device resume */ }; +#ifdef JSON_SNAPSHOT_V2 + struct vm_snapshot_kern_info { const char *struct_name; /* kernel structure name*/ enum snapshot_req req; /* request type */ + vm_snapshot_dev_cb snapshot_cb; /* callback for device snapshot */ }; +#else + +struct vm_snapshot_kern_info { + const char *struct_name; /* kernel structure name*/ + enum snapshot_req req; /* request type */ +}; + +#endif + void destroy_restore_state(struct restore_state *rstate); const char *lookup_vmname(struct restore_state *rstate); Index: usr.sbin/bhyve/snapshot.c =================================================================== --- usr.sbin/bhyve/snapshot.c +++ usr.sbin/bhyve/snapshot.c @@ -68,7 +68,6 @@ #include #include #include - #include #ifndef WITHOUT_CAPSICUM #include @@ -99,6 +98,14 @@ #include #include +#ifdef JSON_SNAPSHOT_V2 + +#include + +#include "../lib/libc/stdlib/hsearch.h" + +#endif + struct spinner_info { const size_t *crtval; const size_t maxval; @@ -110,6 +117,18 @@ static struct winsize winsize; static sig_t old_winch_handler; +#ifdef JSON_SNAPSHOT_V2 + +struct type_info { + char type_name[24]; + char fmt_str[24]; + unsigned char size; +}; + +static struct hsearch_data *types_htable; + +#endif + #define KB (1024UL) #define MB (1024UL * KB) #define GB (1024UL * MB) @@ -131,6 +150,18 @@ #define JSON_MEMSIZE_KEY "memsize" #define JSON_MEMFLAGS_KEY "memflags" +#define JSON_VERSION_KEY "version" +#define JSON_PARAMS_KEY "device_params" +#define JSON_PARAM_KEY "param_name" +#define JSON_PARAM_DATA_KEY "param_data" +#define JSON_PARAM_DATA_SIZE_KEY "data_size" + +#define JSON_VERSION_KEY "version" +#define JSON_PARAMS_KEY "device_params" +#define JSON_PARAM_KEY "param_name" +#define JSON_PARAM_DATA_KEY "param_data" +#define JSON_PARAM_DATA_SIZE_KEY "data_size" + #define min(a,b) \ ({ \ __typeof__ (a) _a = (a); \ @@ -142,38 +173,202 @@ { "atkbdc", atkbdc_snapshot, NULL, NULL }, { "virtio-net", pci_snapshot, pci_pause, pci_resume }, { "virtio-blk", pci_snapshot, pci_pause, pci_resume }, - { "virtio-rnd", pci_snapshot, NULL, NULL }, { "lpc", pci_snapshot, NULL, NULL }, { "fbuf", pci_snapshot, NULL, NULL }, { "xhci", pci_snapshot, NULL, NULL }, { "e1000", pci_snapshot, NULL, NULL }, { "ahci", pci_snapshot, pci_pause, pci_resume }, { "ahci-hd", pci_snapshot, pci_pause, pci_resume }, - { "ahci-cd", pci_snapshot, pci_pause, pci_resume }, + { "ahci-cd", pci_snapshot, NULL, NULL }, +}; + +#ifdef JSON_SNAPSHOT_V2 + +int vhpet_snapshot(struct vm_snapshot_meta *meta); +int vm_snapshot_vm(struct vm_snapshot_meta *meta); +int vmx_snapshot(struct vm_snapshot_meta *meta); +int vioapic_snapshot(struct vm_snapshot_meta *meta); +int vlapic_snapshot(struct vm_snapshot_meta *meta); +int vmx_vmcx_snapshot(struct vm_snapshot_meta *meta); +int vatpit_snapshot(struct vm_snapshot_meta *meta); +int vatpic_snapshot(struct vm_snapshot_meta *meta); +int vpmtmr_snapshot(struct vm_snapshot_meta *meta); +int vrtc_snapshot(struct vm_snapshot_meta *meta); + +const struct vm_snapshot_kern_info snapshot_kern_structs[] = { + { "vhpet", STRUCT_VHPET, vhpet_snapshot }, + { "vm", STRUCT_VM, vm_snapshot_vm }, + { "vmx", STRUCT_VMX, vmx_snapshot }, + { "vioapic", STRUCT_VIOAPIC, vioapic_snapshot }, + { "vlapic", STRUCT_VLAPIC, vlapic_snapshot }, + { "vmcx", STRUCT_VMCX, vmx_vmcx_snapshot }, + { "vatpit", STRUCT_VATPIT, vatpit_snapshot }, + { "vatpic", STRUCT_VATPIC, vatpic_snapshot }, + { "vpmtmr", STRUCT_VPMTMR, vpmtmr_snapshot }, + { "vrtc", STRUCT_VRTC, vrtc_snapshot }, }; +#else + const struct vm_snapshot_kern_info snapshot_kern_structs[] = { - { "vhpet", STRUCT_VHPET }, - { "vm", STRUCT_VM }, - { "vmx", STRUCT_VMX }, - { "vioapic", STRUCT_VIOAPIC }, - { "vlapic", STRUCT_VLAPIC }, - { "vmcx", STRUCT_VMCX }, - { "vatpit", STRUCT_VATPIT }, - { "vatpic", STRUCT_VATPIC }, - { "vpmtmr", STRUCT_VPMTMR }, - { "vrtc", STRUCT_VRTC }, + { "vhpet", STRUCT_VHPET, vhpet_snapshot }, + { "vm", STRUCT_VM, vm_snapshot_vm }, + { "vmx", STRUCT_VMX, vmx_snapshot }, + { "vioapic", STRUCT_VIOAPIC, vioapic_snapshot }, + { "vlapic", STRUCT_VLAPIC, vlapic_snapshot }, + { "vmcx", STRUCT_VMCX, vmx_vmcx_snapshot }, + { "vatpit", STRUCT_VATPIT, vatpit_snapshot }, + { "vatpic", STRUCT_VATPIC, vatpic_snapshot }, + { "vpmtmr", STRUCT_VPMTMR, vpmtmr_snapshot }, + { "vrtc", STRUCT_VRTC, vrtc_snapshot }, }; +#endif + static cpuset_t vcpus_active, vcpus_suspended; static pthread_mutex_t vcpu_lock; static pthread_cond_t vcpus_idle, vcpus_can_run; static bool checkpoint_active; -/* - * TODO: Harden this function and all of its callers since 'base_str' is a user - * provided string. - */ +#ifdef JSON_SNAPSHOT_V2 + +static void +write_param_array(struct vm_snapshot_meta *meta, xo_handle_t *xop); + +static int +vm_snapshot_dev_intern_arr(xo_handle_t *xop, int ident, int index, + struct vm_snapshot_device_info **curr_el); + +static int +emit_data(xo_handle_t *xop, struct vm_snapshot_device_info *elem); + +static int +create_types_hashtable(); + +static int +add_device_info(struct vm_snapshot_device_info *field_info, char *field_name, + const char *arr_name, int index, volatile void *data, + char *type, size_t data_size) +{ + if (arr_name != NULL) { + field_info->intern_arr_name = strdup(arr_name); + if (field_info->intern_arr_name == NULL) { + fprintf(stderr, "%s: Could not alloc memory at line %d\r\n", __func__, __LINE__); + return (-1); + } + } else + field_info->intern_arr_name = NULL; + + field_info->field_name = strdup(field_name); + if (field_info->field_name == NULL) { + fprintf(stderr, "%s: Could not alloc memory at line %d\r\n", __func__, __LINE__); + return (-1); + } + + field_info->index = index; + + if (data_size != 0 && data != NULL) { + field_info->field_data = calloc(data_size + 1, sizeof(char)); + if (field_info->field_data == NULL) { + fprintf(stderr, "%s: Could not alloc memory at line %d\r\n", __func__, __LINE__); + return (-1); + } + memcpy(field_info->field_data, (uint8_t *)data, data_size); + field_info->data_size = data_size; + } + + if (type != NULL) { + field_info->type = strdup(type); + if (field_info->type == NULL) { + fprintf(stderr, "%s: Could not alloc memory at line %d\r\n", __func__, __LINE__); + return (-1); + } + } + + return (0); +} + +static int +alloc_device_info_elem(struct list_device_info *list, char *field_name, + volatile void *data, char *type, size_t data_size) +{ + const char *arr_name = NULL; + char *t; + struct vm_snapshot_device_info *aux; + int index; + int ret; + + ret = 0; + + aux = calloc(1, sizeof(struct vm_snapshot_device_info)); + if (aux == NULL) { + fprintf(stderr, "%s: Could not alloc memory at line %d\r\n", __func__, __LINE__); + return (-1); + } + aux->ident = list->ident; + aux->create_instance = list->create_instance; + if (aux->ident > 0) + arr_name = list->intern_arr_names[aux->ident - 1]; + if (list->auto_index != -1) + index = list->auto_index; + else + index = list->index; + + t = type; + if (list->type != NULL) + t = list->type; + + ret = add_device_info(aux, field_name, arr_name, index, data, t, data_size); + if (ret != 0) + return (ret); + list->type = NULL; + if (list->first == NULL) { + list->first = aux; + list->last = list->first; + } else if (list->first == list->last) { + list->first->next_field = aux; + list->last = aux; + } else { + list->last->next_field = aux; + list->last = list->last->next_field; + } + + return (ret); +} + +void +remove_first_elem(struct list_device_info *list) +{ + struct vm_snapshot_device_info *aux; + + aux = list->first; + list->first = aux->next_field; + free(aux); +} + +void +free_device_info_list(struct list_device_info *list) +{ + struct vm_snapshot_device_info *curr_el, *aux; + + curr_el = list->first; + while (curr_el != NULL) { + free(curr_el->intern_arr_name); + free(curr_el->field_name); + free(curr_el->field_data); + + aux = curr_el->next_field; + free(curr_el); + curr_el = aux; + } + list->ident = 0; + memset(list->intern_arr_names, 0, IDENT_LEVEL * sizeof(char *)); + list->type = NULL; + list->first = NULL; + list->last = NULL; +} + +#endif static char * strcat_extension(const char *base_str, const char *ext) { @@ -256,6 +451,8 @@ return (-1); } +#ifndef JSON_SNAPSHOT_V2 + static int load_kdata_file(const char *filename, struct restore_state *rstate) { @@ -295,6 +492,8 @@ return (-1); } +#endif + static int load_metadata_file(const char *filename, struct restore_state *rstate) { @@ -352,6 +551,7 @@ goto err_restore; } +#ifndef JSON_SNAPSHOT_V2 kdata_filename = strcat_extension(filename, ".kern"); if (kdata_filename == NULL) { fprintf(stderr, "Failed to construct kernel data filename.\n"); @@ -363,7 +563,8 @@ fprintf(stderr, "Failed to load guest kernel data file.\n"); goto err_restore; } - +#endif + meta_filename = strcat_extension(filename, ".meta"); if (meta_filename == NULL) { fprintf(stderr, "Failed to construct kernel metadata filename.\n"); @@ -415,6 +616,282 @@ } \ } while(0) +#define JSON_GET_STRING_VALUE_OR_RETURN(key, obj, result_ptr, ret) \ +do { \ + const ucl_object_t *obj__; \ + obj__ = ucl_object_lookup(obj, (key)); \ + if (obj__ == NULL) { \ + fprintf(stderr, "Missing key: '%s'", (key)); \ + return (ret); \ + } \ + if (!ucl_object_tostring_safe(obj__, result_ptr)) { \ + fprintf(stderr, "Cannot convert '%s' value to string.", (key)); \ + return (ret); \ + } \ +} while(0) + + +#ifdef JSON_SNAPSHOT_V2 + +int +extract_type(char **type, const ucl_object_t *obj) +{ + char *key_copy = NULL; + char *aux = NULL; + const char delim[2] = "$"; + + key_copy = strdup(obj->key); + assert(key_copy != NULL); + + /* Param name */ + strtok(key_copy, delim); + + aux = strtok(NULL, delim); + assert(aux != NULL); + + *type = strdup(aux); + assert(*type != NULL); + + free(key_copy); + + return (0); +} + +int +restore_data(const ucl_object_t *obj, struct list_device_info *list) +{ + int ret; + const char *enc_data; + const char *str_data; + char *dec_data; + int enc_bytes; + int dec_bytes; + int64_t data_size; + int64_t int_data; + char *type; + char *endptr; + + ret = 0; + + extract_type(&type, obj); + if (!strcmp(type, "int8") || + !strcmp(type, "uint8") || + !strcmp(type, "int16") || + !strcmp(type, "uint16") || + !strcmp(type, "int32") || + !strcmp(type, "uint32")) { + + int_data = 0; + if (!ucl_object_toint_safe(obj, &int_data)) { + fprintf(stderr, "%s: Cannot convert '%s' value to int_t at line %d.\r\n", + __func__, obj->key, __LINE__); + ret = -1; + goto done; + } + + alloc_device_info_elem(list, (char *)obj->key, &int_data, NULL, sizeof(int_data)); + } else if (!strcmp(type, "int64") || + !strcmp(type, "uint64")) { + str_data = NULL; + if (!ucl_object_tostring_safe(obj, &str_data)) { + fprintf(stderr, "%s: Cannot convert '%s' value to string.\r\n", + __func__, obj->key); + ret = -1; + goto done; + } + assert(str_data != NULL); + + errno = 0; + int_data = (int64_t)strtoul(str_data, &endptr, 10); + if ((errno != 0) || (endptr == str_data)) { + fprintf(stderr, "%s: Cannot convert '%s' value to int.\r\n", + __func__, str_data); + ret = ((errno == 0) ? -1 : errno); + goto done; + } + + alloc_device_info_elem(list, (char *)obj->key, &int_data, NULL, sizeof(int_data)); + } else if (!strcmp(type, "int64") || + !strcmp(type, "uint64")) { + sscanf(obj->value.sv, "%lx", &int_data); + + alloc_device_info_elem(list, (char *)obj->key, &int_data, NULL, sizeof(int_data)); + } else { + enc_data = NULL; + if (!ucl_object_tostring_safe(obj, &enc_data)) { + fprintf(stderr, "Cannot convert '%s' value to string.\r\n", obj->key); + ret = -1; + goto done; + } + assert(enc_data != NULL); + + data_size = strlen(enc_data); + enc_bytes = (data_size >> 2) * 3; + dec_data = NULL; + dec_data = malloc((enc_bytes + 2) * sizeof(char)); + assert(dec_data != NULL); + + dec_bytes = EVP_DecodeBlock(dec_data, enc_data, data_size); + assert(dec_bytes > 0); + + alloc_device_info_elem(list, (char *)obj->key, dec_data, NULL, (size_t)data_size); + } + +done: + free(type); + return (ret); +} + +int +intern_arr_restore(const char *intern_arr_name, struct list_device_info *list, + const ucl_object_t *obj) +{ + const ucl_object_t *param = NULL, *intern_obj = NULL; + ucl_object_iter_t it = NULL, iit = NULL; + int is_list; + int ret = 0; + + /* Check if the received instance contains an array */ + while ((param = ucl_object_iterate(obj, &it, true)) != NULL) { + while ((intern_obj = ucl_object_iterate(param, &iit, true)) != NULL) { + is_list = (ucl_object_type(intern_obj) == UCL_ARRAY); + + if (!is_list) + ret = restore_data(intern_obj, list); + else + ret = intern_arr_restore(intern_obj->key, list, intern_obj); + + if (ret != 0) + goto done; + } + } + +done: + return (ret); +} + +static int +lookup_struct(enum snapshot_req struct_id, struct restore_state *rstate, + struct list_device_info *list) +{ + const ucl_object_t *structs = NULL, *obj = NULL; + const ucl_object_t *dev_params = NULL; + ucl_object_iter_t it = NULL; + int64_t snapshot_req; + + structs = ucl_object_lookup(rstate->meta_root_obj, JSON_STRUCT_ARR_KEY); + if (structs == NULL) { + fprintf(stderr, "Failed to find '%s' object.\r\n", + JSON_STRUCT_ARR_KEY); + return (-1); + } + + if (ucl_object_type((ucl_object_t *)structs) != UCL_ARRAY) { + fprintf(stderr, "Object '%s' is not an array.\r\n", + JSON_STRUCT_ARR_KEY); + return (-1); + } + + while ((obj = ucl_object_iterate(structs, &it, true)) != NULL) { + snapshot_req = -1; + JSON_GET_INT_OR_RETURN(JSON_SNAPSHOT_REQ_KEY, obj, + &snapshot_req, -1); + assert(snapshot_req >= 0); + if ((enum snapshot_req) snapshot_req == struct_id) { + dev_params = ucl_object_lookup(obj, JSON_PARAMS_KEY); + if (dev_params == NULL) { + fprintf(stderr, "Failed to find '%s' object.\r\n", + JSON_PARAMS_KEY); + return(-EINVAL); + } + + if (ucl_object_type((ucl_object_t *)dev_params) != UCL_ARRAY) { + fprintf(stderr, "Object '%s' is not an array.\r\n", + JSON_PARAMS_KEY); + return (-EINVAL); + } + + /* Iterate through device parameters */ + intern_arr_restore(JSON_PARAMS_KEY, list, dev_params); + + return (0); + } + } + + return (-1); +} + +int +lookup_check_dev(const char *dev_name, struct restore_state *rstate, + const ucl_object_t *obj, + struct list_device_info *list) +{ + const ucl_object_t *dev_params = NULL; + const char *snapshot_req; + + snapshot_req = NULL; + JSON_GET_STRING_OR_RETURN(JSON_SNAPSHOT_REQ_KEY, obj, + &snapshot_req, -EINVAL); + if (snapshot_req == NULL) { + fprintf(stderr, "%s: Could not extract device name\r\n", __func__); + return (-1); + } + + if (!strcmp(snapshot_req, dev_name)) { + dev_params = ucl_object_lookup(obj, JSON_PARAMS_KEY); + if (dev_params == NULL) { + fprintf(stderr, "Failed to find '%s' object.\n", + JSON_PARAMS_KEY); + return(-EINVAL); + } + + if (ucl_object_type((ucl_object_t *)dev_params) != UCL_ARRAY) { + fprintf(stderr, "Object '%s' is not an array.\n", + JSON_PARAMS_KEY); + return (-EINVAL); + } + + /* Iterate through device parameters */ + intern_arr_restore(JSON_PARAMS_KEY, list, dev_params); + + return (0); + } + + return (-1); +} + +int +lookup_dev(const char *dev_name, struct restore_state *rstate, + struct list_device_info *list) +{ + const ucl_object_t *devs = NULL, *obj = NULL; + ucl_object_iter_t it = NULL; + int ret; + + devs = ucl_object_lookup(rstate->meta_root_obj, JSON_DEV_ARR_KEY); + if (devs == NULL) { + fprintf(stderr, "Failed to find '%s' object.\n", + JSON_DEV_ARR_KEY); + return (-EINVAL); + } + + if (ucl_object_type((ucl_object_t *)devs) != UCL_ARRAY) { + fprintf(stderr, "Object '%s' is not an array.\n", + JSON_DEV_ARR_KEY); + return (-EINVAL); + } + + while ((obj = ucl_object_iterate(devs, &it, true)) != NULL) { + ret = lookup_check_dev(dev_name, rstate, obj, list); + if (ret == 0) + return (ret); + } + + return (-1); +} + +#else + static void * lookup_struct(enum snapshot_req struct_id, struct restore_state *rstate, size_t *struct_size) @@ -517,6 +994,8 @@ return (NULL); } +#endif + static const ucl_object_t * lookup_basic_metadata_object(struct restore_state *rstate) { @@ -590,9 +1069,7 @@ lookup_guest_ncpus(struct restore_state *rstate) { int64_t ncpus; - const ucl_object_t *obj; - - obj = lookup_basic_metadata_object(rstate); + const ucl_object_t *obj; obj = lookup_basic_metadata_object(rstate); if (obj == NULL) return (0); @@ -860,24 +1337,39 @@ return (0); } +#ifdef JSON_SNAPSHOT_V2 + static int vm_restore_kern_struct(struct vmctx *ctx, struct restore_state *rstate, const struct vm_snapshot_kern_info *info) { - void *struct_ptr; - size_t struct_size; int ret; + struct list_device_info list; struct vm_snapshot_meta *meta; + void *buffer; + size_t buf_size; + + buf_size = SNAPSHOT_BUFFER_SIZE; + + buffer = calloc(1, buf_size); + if (buffer == NULL) { + perror("Failed to allocate memory for snapshot buffer"); + ret = ENOSPC; + goto done; + } + + memset(&list, 0, sizeof(list)); + list.first = NULL; + list.last = NULL; - struct_ptr = lookup_struct(info->req, rstate, &struct_size); - if (struct_ptr == NULL) { + ret = lookup_struct(info->req, rstate, &list); + if (ret != 0) { fprintf(stderr, "%s: Failed to lookup struct %s\r\n", __func__, info->struct_name); - ret = -1; goto done; } - if (struct_size == 0) { + if (list.first == NULL) { fprintf(stderr, "%s: Kernel struct size was 0 for: %s\r\n", __func__, info->struct_name); ret = -1; @@ -889,15 +1381,28 @@ .dev_name = info->struct_name, .dev_req = info->req, - .buffer.buf_start = struct_ptr, - .buffer.buf_size = struct_size, - - .buffer.buf = struct_ptr, - .buffer.buf_rem = struct_size, + .buffer.buf_start = buffer, + .buffer.buf_size = buf_size, + .buffer.buf = buffer, + .buffer.buf_rem = buf_size, .op = VM_SNAPSHOT_RESTORE, + .version = JSON_V2, + .dev_info_list.ident = 0, + .dev_info_list.first = list.first, + .dev_info_list.last = list.last, + .snapshot_kernel = 1, }; + ret = (*info->snapshot_cb)(meta); + if (ret != 0) { + fprintf(stderr, "Failed to restore dev: %s\r\n", + info->struct_name); + return (-1); + } + + meta->buffer.buf = meta->buffer.buf_start; + ret = vm_snapshot_req(meta); if (ret != 0) { fprintf(stderr, "%s: Failed to restore struct: %s\r\n", @@ -909,10 +1414,76 @@ return (ret); } -int -vm_restore_kern_structs(struct vmctx *ctx, struct restore_state *rstate) -{ - int ret; +#else + +static int +vm_restore_kern_struct(struct vmctx *ctx, struct restore_state *rstate, + const struct vm_snapshot_kern_info *info) +{ + int ret; + struct list_device_info list; + struct vm_snapshot_meta *meta; + void *buffer; + size_t buf_size; + + buf_size = SNAPSHOT_BUFFER_SIZE; + + buffer = calloc(1, buf_size); + if (buffer == NULL) { + perror("Failed to allocate memory for snapshot buffer"); + ret = ENOSPC; + goto done; + } + + memset(&list, 0, sizeof(list)); + list.first = NULL; + list.last = NULL; + + ret = lookup_struct(info->req, rstate, &list); + if (ret != 0) { + fprintf(stderr, "%s: Failed to lookup struct %s\r\n", + __func__, info->struct_name); + goto done; + } + + if (list.first == 0) { + fprintf(stderr, "%s: Kernel struct size was 0 for: %s\r\n", + __func__, info->struct_name); + ret = -1; + goto done; + } + + meta = &(struct vm_snapshot_meta) { + .ctx = ctx, + .dev_name = info->struct_name, + .dev_req = info->req, + + .buffer.buf_start = buffer, + .buffer.buf_size = buf_size, + .buffer.buf = buffer, + .buffer.buf_rem = buf_size, + + .op = VM_SNAPSHOT_RESTORE, + .version = JSON_V1, + }; + + ret = vm_snapshot_req(meta); + if (ret != 0) { + fprintf(stderr, "%s: Failed to restore struct: %s\r\n", + __func__, info->struct_name); + goto done; + } + +done: + return (ret); +} + +#endif + +int +vm_restore_kern_structs(struct vmctx *ctx, struct restore_state *rstate) +{ + int ret; int i; for (i = 0; i < nitems(snapshot_kern_structs); i++) { @@ -925,6 +1496,50 @@ return (0); } +#ifdef JSON_SNAPSHOT_V2 + +int +vm_restore_user_dev(struct vmctx *ctx, struct restore_state *rstate, + const struct vm_snapshot_dev_info *info) +{ + int ret; + struct list_device_info list; + struct vm_snapshot_meta *meta; + + memset(&list, 0, sizeof(list)); + list.first = NULL; + list.last = NULL; + + ret = lookup_dev(info->dev_name, rstate, &list); + if (ret != 0) { + fprintf(stderr, "Failed to lookup dev: %s\r\n", info->dev_name); + fprintf(stderr, "Continuing the restore/migration process\r\n"); + return (0); + } + + meta = &(struct vm_snapshot_meta) { + .ctx = ctx, + .dev_name = info->dev_name, + + .op = VM_SNAPSHOT_RESTORE, + + .version = JSON_V2, + .dev_info_list.ident = 0, + .dev_info_list.first = list.first, + .dev_info_list.last = list.last, + }; + + ret = (*info->snapshot_cb)(meta); + if (ret != 0) { + fprintf(stderr, "Failed to restore dev: %s\r\n", + info->dev_name); + return (-1); + } + + return (0); +} + +#else int vm_restore_user_dev(struct vmctx *ctx, struct restore_state *rstate, const struct vm_snapshot_dev_info *info) @@ -946,7 +1561,7 @@ "Assuming %s is not used\r\n", __func__, info->dev_name); return (0); - } + } meta = &(struct vm_snapshot_meta) { .ctx = ctx, @@ -959,6 +1574,7 @@ .buffer.buf_rem = dev_size, .op = VM_SNAPSHOT_RESTORE, + .version = JSON_V1, }; ret = (*info->snapshot_cb)(meta); @@ -971,6 +1587,7 @@ return (0); } +#endif int vm_restore_user_devs(struct vmctx *ctx, struct restore_state *rstate) @@ -982,6 +1599,7 @@ ret = vm_restore_user_dev(ctx, rstate, &snapshot_devs[i]); if (ret != 0) return (ret); + fprintf(stderr, "%s restored successfully\r\n", snapshot_devs[i].dev_name); } return 0; @@ -1028,7 +1646,8 @@ } static int -vm_snapshot_kern_struct(int data_fd, xo_handle_t *xop, const char *array_key, +vm_snapshot_kern_struct(const struct vm_snapshot_kern_info *info, + int data_fd, xo_handle_t *xop, const char *array_key, struct vm_snapshot_meta *meta, off_t *offset) { int ret; @@ -1043,25 +1662,69 @@ goto done; } - data_size = vm_get_snapshot_size(meta); + if (meta->version == JSON_V1) { + data_size = vm_get_snapshot_size(meta); - write_cnt = write(data_fd, meta->buffer.buf_start, data_size); - if (write_cnt != data_size) { - perror("Failed to write all snapshotted data."); - ret = -1; - goto done; + write_cnt = write(data_fd, meta->buffer.buf_start, data_size); + if (write_cnt != data_size) { + perror("Failed to write all snapshotted data."); + ret = -1; + goto done; + } + } + meta->buffer.buf = meta->buffer.buf_start; + fprintf(stderr, "%s: %s has size %ld\r\n", __func__, meta->dev_name, data_size); + + if (!strcmp(meta->dev_name, "vhpet")) + vhpet_snapshot(meta); + else if (!strcmp(meta->dev_name, "vm")) + vm_snapshot_vm(meta); + else if (!strcmp(meta->dev_name, "vlapic")) + vlapic_snapshot(meta); + else if (!strcmp(meta->dev_name, "vioapic")) + vioapic_snapshot(meta); + else if (!strcmp(meta->dev_name, "vatpit")) + vatpit_snapshot(meta); + else if (!strcmp(meta->dev_name, "vatpic")) + vatpic_snapshot(meta); + else if (!strcmp(meta->dev_name, "vpmtmr")) + vpmtmr_snapshot(meta); + else if (!strcmp(meta->dev_name, "vrtc")) + vrtc_snapshot(meta); + else if (!strcmp(meta->dev_name, "vmx")) + vmx_snapshot(meta); + + if (meta->version == JSON_V1) { + data_size = vm_get_snapshot_size(meta); + + write_cnt = write(data_fd, meta->buffer.buf_start, data_size); + if (write_cnt != data_size) { + perror("Failed to write all snapshotted data."); + ret = -1; + goto done; + } } + meta->buffer.buf = meta->buffer.buf_start; /* Write metadata. */ xo_open_instance_h(xop, array_key); xo_emit_h(xop, "{:debug_name/%s}\n", meta->dev_name); - xo_emit_h(xop, "{:" JSON_SNAPSHOT_REQ_KEY "/%d}\n", - meta->dev_req); - xo_emit_h(xop, "{:" JSON_SIZE_KEY "/%lu}\n", data_size); - xo_emit_h(xop, "{:" JSON_FILE_OFFSET_KEY "/%lu}\n", *offset); - xo_close_instance_h(xop, JSON_STRUCT_ARR_KEY); + xo_emit_h(xop, "{:" JSON_SNAPSHOT_REQ_KEY "/%d}\n", meta->dev_req); + if (meta->version == JSON_V1) { + xo_emit_h(xop, "{:" JSON_SIZE_KEY "/%lu}\n", data_size); + xo_emit_h(xop, "{:" JSON_FILE_OFFSET_KEY "/%lu}\n", *offset); + *offset += data_size; + } else if (meta->version == JSON_V2) { + ret = (*info->snapshot_cb)(meta); + if (ret != 0) { + fprintf(stderr, "Failed to restore dev: %s\r\n", + info->struct_name); + return (-1); + } - *offset += data_size; + write_param_array(meta, xop); + } + xo_close_instance_h(xop, array_key); done: return (ret); @@ -1093,8 +1756,35 @@ .buffer.buf_size = buf_size, .op = VM_SNAPSHOT_SAVE, +#ifdef JSON_SNAPSHOT_V2 + .version = JSON_V2, + .dev_info_list.ident = 0, + .dev_info_list.index = -1, + .dev_info_list.type = NULL, + .dev_info_list.create_instance = 1, + .dev_info_list.auto_index = -1, + .dev_info_list.first = NULL, + .dev_info_list.last = NULL, + .snapshot_kernel = 1, +#else + .version = JSON_V1, +#endif }; + /* Prepare types hashtable */ + ret = create_types_hashtable(); + if (ret != 0) { + error = -1; + goto err_vm_snapshot_kern_data; + } + + /* Prepare types hashtable */ + ret = create_types_hashtable(); + if (ret != 0) { + error = -1; + goto err_vm_snapshot_kern_data; + } + xo_open_list_h(xop, JSON_STRUCT_ARR_KEY); for (i = 0; i < nitems(snapshot_kern_structs); i++) { meta->dev_name = snapshot_kern_structs[i].struct_name; @@ -1104,8 +1794,13 @@ meta->buffer.buf = meta->buffer.buf_start; meta->buffer.buf_rem = meta->buffer.buf_size; - ret = vm_snapshot_kern_struct(data_fd, xop, JSON_DEV_ARR_KEY, - meta, &offset); + if (meta->version == JSON_V2) { + free_device_info_list(&meta->dev_info_list); + meta->snapshot_kernel = 1; + } + + ret = vm_snapshot_kern_struct(&snapshot_kern_structs[i], data_fd, + xop, JSON_STRUCT_ARR_KEY, meta, &offset); if (ret != 0) { error = -1; goto err_vm_snapshot_kern_data; @@ -1128,11 +1823,358 @@ xo_emit_h(xop, "{:" JSON_VMNAME_KEY "/%s}\n", vm_get_name(ctx)); xo_emit_h(xop, "{:" JSON_MEMSIZE_KEY "/%lu}\n", memsz); xo_emit_h(xop, "{:" JSON_MEMFLAGS_KEY "/%d}\n", vm_get_memflags(ctx)); +#ifndef JSON_SNAPSHOT_V2 + xo_emit_h(xop, "{:" JSON_VERSION_KEY "/%d}\n", JSON_V1); +#else + xo_emit_h(xop, "{:" JSON_VERSION_KEY "/%d}\n", JSON_V2); +#endif xo_close_container_h(xop, JSON_BASIC_METADATA_KEY); return (0); } +#ifdef JSON_SNAPSHOT_V2 + +static int +create_indexed_arr_name(char *intern_arr, int number, char **indexed_name) +{ + int ret; + + ret = asprintf(indexed_name, "%s@%d", intern_arr, number); + + if (ret < 0) + fprintf(stderr, "%s: Could not alloc memory at line %d\r\n", __func__, __LINE__); + + return (ret); +} + +static int +create_type_info(struct type_info **ti, const char *name, + const char *fmt_str, unsigned char size) +{ + int ret; + + ret = 0; + + *ti = calloc(1, sizeof(struct type_info)); + if (*ti == NULL) { + fprintf(stderr, "%s: Could not alloc memory at line %d\r\n", __func__, __LINE__); + ret = ENOMEM; + } + + strcpy((*ti)->type_name, name); + strcpy((*ti)->fmt_str, fmt_str); + (*ti)->size = size; + + return (ret); +} + +static int +create_types_hashtable() +{ + int ret, i, j; + struct type_info *ti; + ENTRY item; + ENTRY *res = NULL; + const char *types[] = { "int8", "uint8", "int16", "uint16", + "int32", "uint32", "int64", "uint64" }; + + const char *fmt_strs[] = { "/%%hhd}\\n", "/%%hhu}\\n", "/%%hd}\\n", + "/%%hu}\\n", "/%%d}\\n", "/%%u}\\n", "/%%s}\\n", "/%%s}\\n" }; + + const unsigned char type_sizes[] = { sizeof(int8_t), sizeof(uint8_t), + sizeof(int16_t), sizeof(uint16_t), + sizeof(int32_t), sizeof(uint32_t), + sizeof(int64_t), sizeof(uint64_t) }; + ret = 0; + + types_htable = calloc(1, sizeof(*types_htable)); + if (types_htable == NULL) { + fprintf(stderr, "%s: Could not alloc memory at line %d\r\n", __func__, __LINE__); + ret = ENOMEM; + goto done; + } + + if (!hcreate_r(32, types_htable)) { + ret = errno; + goto done; + } + + for (i = 0; i < 8; ++i) { + ret = create_type_info(&ti, types[i], fmt_strs[i], type_sizes[i]); + + if (ret != 0) { + j = i; + goto done; + } + + item.key = (char *)ti->type_name; + item.data = ti; + if (!hsearch_r(item, ENTER, &res, types_htable)) { + j = i; + fprintf(stderr, "%s: Could not add data into hashtable(line %d)\r\n", + __func__, __LINE__); + ret = errno; + goto done; + } + } + + return (ret); + +done: + free(types_htable); + types_htable = NULL; + + for (i = 0; i < j; ++i) { + item.key = (char *)types[i]; + if (!hsearch_r(item, FIND, &res, types_htable)) { + fprintf(stderr, + "%s: Could not find key %s in hashtable(line %d)\r\n", + __func__, item.key, __LINE__); + continue; + } + free(res->data); + } + hdestroy_r(types_htable); + + return (ret); +} + +static void +destroy_types_hashtable() +{ + int i; + ENTRY item; + ENTRY *res = NULL; + const char *types[] = { "int8", "uint8", "int16", "uint16", + "int32", "uint32", "int64", "uint64" }; + + for (i = 0; i < 8; ++i) { + item.key = (char *)types[i]; + if (!hsearch_r(item, FIND, &res, types_htable)) { + fprintf(stderr, + "%s: Could not find key %s in hashtable(line %d)\r\n", + __func__, item.key, __LINE__); + continue; + } + + free(res->data); + } + + hdestroy_r(types_htable); +} + +static int +get_type_format_string(char **res, char *key_part, char *type) +{ + int ret; + struct type_info *ti; + ENTRY item; + ENTRY *ires = NULL; + + item.key = type; + if (hsearch_r(item, FIND, &ires, types_htable)) { + ti = (struct type_info *)(ires->data); + ret = asprintf(res, "%s%s", key_part, ti->fmt_str); + } else + ret = asprintf(res, "%s%s", key_part, "/%%s}\\n"); + + if (ret < 0) + fprintf(stderr, "%s: Could not alloc memory at line %d\r\n", __func__, __LINE__); + + return (ret); +} + +static int +create_key_string(struct vm_snapshot_device_info *elem, char **res_str) +{ + int ret; + char *fmt = NULL; + + ret = 0; + if (!elem->create_instance && (elem->index != -1)) { + ret = get_type_format_string(&fmt, "{:%s%d$%s", elem->type); + ret = asprintf(res_str, fmt, elem->field_name, elem->index, elem->type); + } else { + ret = get_type_format_string(&fmt, "{:%s$%s", elem->type); + ret = asprintf(res_str, fmt, elem->field_name, elem->type); + } + + free(fmt); + return (ret); +} + +static int +emit_data(xo_handle_t *xop, struct vm_snapshot_device_info *elem) +{ + int ret; + char *enc_data = NULL; + char *fmt; + char *lv_str; + int enc_bytes = 0; + uint64_t int_data; + + unsigned long ds; + + ENTRY item; + ENTRY *res = NULL; + + ret = 0; + create_key_string(elem, &fmt); + + item.key = elem->type; + if (hsearch_r(item, FIND, &res, types_htable)) { + memcpy(&int_data, elem->field_data, + ((struct type_info *)res->data)->size); + lv_str = NULL; + if (!strcmp(elem->type, "int64")) + ret = asprintf(&lv_str, "%ld", int_data); + else if (!strcmp(elem->type, "uint64")) + ret = asprintf(&lv_str, "%lu", int_data); + + if (ret < 0) + goto done; + + if (lv_str != NULL) + xo_emit_h(xop, fmt, lv_str); + else + xo_emit_h(xop, fmt, int_data); + } else { + ds = elem->data_size; + enc_data = malloc(4 * (ds + 2) / 3); + assert(enc_data != NULL); + + enc_bytes = EVP_EncodeBlock(enc_data, (const char *)elem->field_data, ds); + assert(enc_bytes != 0); + + xo_emit_h(xop, fmt, enc_data); + + free(enc_data); + } + +done: + free(fmt); + return (ret); +} + + +static int +vm_snapshot_dev_intern_arr_index(xo_handle_t *xop, int ident, int index, + struct vm_snapshot_device_info **curr_el) +{ + char *intern_arr = NULL; + char *indexed_name = NULL; + int ret = 0; + + intern_arr = (*curr_el)->intern_arr_name; + + create_indexed_arr_name(intern_arr, index, &indexed_name); + xo_open_list_h(xop, indexed_name); + + xo_open_instance_h(xop, indexed_name); + while (*curr_el != NULL) { + /* Check if there is an internal array */ + if ((*curr_el)->ident > ident) { + ret = vm_snapshot_dev_intern_arr(xop, (*curr_el)->ident, (*curr_el)->index, curr_el); + continue; + } + + /* Check if index changed and if there is no array at the same + * indentation level as the current one for this index */ + if ((index != (*curr_el)->index) && (ret == 0)) + break; + + /* Reset the return value for the first branch inside the loop */ + ret = 0; + + /* Write data */ + emit_data(xop, *curr_el); + + *curr_el = (*curr_el)->next_field; + } + + xo_close_instance_h(xop, indexed_name); + xo_close_list_h(xop, indexed_name); + free(indexed_name); + indexed_name = NULL; + + return (ret); +} + +static int +vm_snapshot_dev_intern_arr(xo_handle_t *xop, int ident, int index, + struct vm_snapshot_device_info **curr_el) +{ + char *intern_arr = NULL; + int ret = 0; + + intern_arr = (*curr_el)->intern_arr_name; + xo_open_list_h(xop, intern_arr); + + xo_open_instance_h(xop, intern_arr); + while (*curr_el != NULL) { + /* Check if the current array has no more elements */ + if ((*curr_el)->ident < ident) + break; + + /* Check if there is an array on the same indentation level */ + if (strcmp((*curr_el)->intern_arr_name, intern_arr) && + (*curr_el)->ident == ident && + ret == 0) { + ret = 1; + break; + } + + /* Check if there is an internal array */ + if ((*curr_el)->ident > ident) { + ret = vm_snapshot_dev_intern_arr(xop, (*curr_el)->ident, (*curr_el)->index, curr_el); + continue; + } + + /* Check if for the current array indexing is present */ + if (((*curr_el)->index != -1) && ((*curr_el)->create_instance == 1)) { + vm_snapshot_dev_intern_arr_index(xop, (*curr_el)->ident, (*curr_el)->index, curr_el); + continue; + } + + ret = 0; + /* Write data inside the array */ + emit_data(xop, *curr_el); + + *curr_el = (*curr_el)->next_field; + } + xo_close_instance_h(xop, intern_arr); + xo_close_list_h(xop, intern_arr); + + return (ret); +} + +static void +write_param_array(struct vm_snapshot_meta *meta, xo_handle_t *xop) +{ + struct vm_snapshot_device_info *curr_el; + + curr_el = meta->dev_info_list.first; + meta->dev_info_list.ident = 0; + + xo_open_list_h(xop, JSON_PARAMS_KEY); + xo_open_instance_h(xop, JSON_PARAMS_KEY); + while (curr_el != NULL) { + if (curr_el->ident > meta->dev_info_list.ident) { + vm_snapshot_dev_intern_arr(xop, curr_el->ident, curr_el->index, &curr_el); + continue; + } + + emit_data(xop, curr_el); + + curr_el = curr_el->next_field; + } + xo_close_instance_h(xop, JSON_PARAMS_KEY); + xo_close_list_h(xop, JSON_PARAMS_KEY); +} + +#endif + static int vm_snapshot_dev_write_data(int data_fd, xo_handle_t *xop, const char *array_key, struct vm_snapshot_meta *meta, off_t *offset) @@ -1140,23 +2182,26 @@ int ret; size_t data_size; - data_size = vm_get_snapshot_size(meta); - - ret = write(data_fd, meta->buffer.buf_start, data_size); - if (ret != data_size) { - perror("Failed to write all snapshotted data."); - return (-1); + if (meta->version == JSON_V1) { + data_size = vm_get_snapshot_size(meta); + ret = write(data_fd, meta->buffer.buf_start, data_size); + if (ret != data_size) { + perror("Failed to write all snapshotted data."); + return (-1); + } + *offset += data_size; } - + /* Write metadata. */ xo_open_instance_h(xop, array_key); xo_emit_h(xop, "{:" JSON_SNAPSHOT_REQ_KEY "/%s}\n", meta->dev_name); - xo_emit_h(xop, "{:" JSON_SIZE_KEY "/%lu}\n", data_size); - xo_emit_h(xop, "{:" JSON_FILE_OFFSET_KEY "/%lu}\n", *offset); - xo_close_instance_h(xop, array_key); - - *offset += data_size; + if (meta->version == JSON_V1) { + xo_emit_h(xop, "{:" JSON_SIZE_KEY "/%lu}\n", data_size); + xo_emit_h(xop, "{:" JSON_FILE_OFFSET_KEY "/%lu}\n", *offset); + } else if (meta->version == JSON_V2) + write_param_array(meta, xop); + xo_close_instance_h(xop, array_key); return (0); } @@ -1174,6 +2219,10 @@ return (ret); } + if (meta->version == JSON_V2) + if (meta->dev_info_list.first == NULL) + return (0); + ret = vm_snapshot_dev_write_data(data_fd, xop, JSON_DEV_ARR_KEY, meta, offset); if (ret != 0) @@ -1186,11 +2235,16 @@ vm_snapshot_user_devs(struct vmctx *ctx, int data_fd, xo_handle_t *xop) { int ret, i; + off_t offset; +#ifndef JSON_SNAPSHOT_V2 void *buffer; size_t buf_size; +#endif + struct vm_snapshot_meta *meta; +#ifndef JSON_SNAPSHOT_V2 buf_size = SNAPSHOT_BUFFER_SIZE; offset = lseek(data_fd, 0, SEEK_CUR); @@ -1205,25 +2259,49 @@ ret = ENOSPC; goto snapshot_err; } - +#endif + offset = 0; meta = &(struct vm_snapshot_meta) { .ctx = ctx, + .op = VM_SNAPSHOT_SAVE, + +#ifndef JSON_SNAPSHOT_V2 .buffer.buf_start = buffer, .buffer.buf_size = buf_size, - - .op = VM_SNAPSHOT_SAVE, + .version = JSON_V1, +#else + .version = JSON_V2, + .dev_info_list.ident = 0, + .dev_info_list.index = -1, + .dev_info_list.create_instance = 1, + .dev_info_list.auto_index = -1, + .dev_info_list.first = NULL, + .dev_info_list.last = NULL, +#endif }; + /* Prepare the hashtable for types */ + ret = create_types_hashtable(); + if (ret != 0) + goto snapshot_err; + xo_open_list_h(xop, JSON_DEV_ARR_KEY); /* Restore other devices that support this feature */ for (i = 0; i < nitems(snapshot_devs); i++) { + fprintf(stderr, "Creating snapshot for %s device\r\n", snapshot_devs[i].dev_name); meta->dev_name = snapshot_devs[i].dev_name; - memset(meta->buffer.buf_start, 0, meta->buffer.buf_size); - meta->buffer.buf = meta->buffer.buf_start; - meta->buffer.buf_rem = meta->buffer.buf_size; + if (meta->version == JSON_V1) { + memset(meta->buffer.buf_start, 0, meta->buffer.buf_size); + meta->buffer.buf = meta->buffer.buf_start; + meta->buffer.buf_rem = meta->buffer.buf_size; + } else if (meta->version == JSON_V2) + free_device_info_list(&meta->dev_info_list); + + if (meta->version == JSON_V2) + free_device_info_list(&meta->dev_info_list); ret = vm_snapshot_user_dev(&snapshot_devs[i], data_fd, xop, meta, &offset); @@ -1233,9 +2311,15 @@ xo_close_list_h(xop, JSON_DEV_ARR_KEY); + /* Clear types hashtable */ + destroy_types_hashtable(); + snapshot_err: +#ifndef JSON_SNAPSHOT_V2 if (buffer != NULL) free(buffer); +#endif + return (ret); } @@ -1319,9 +2403,12 @@ size_t memsz; xo_handle_t *xop = NULL; char *meta_filename = NULL; +#ifndef JSON_SNAPSHOT_V2 char *kdata_filename = NULL; +#endif FILE *meta_file = NULL; +#ifndef JSON_SNAPSHOT_V2 kdata_filename = strcat_extension(checkpoint_file, ".kern"); if (kdata_filename == NULL) { fprintf(stderr, "Failed to construct kernel data filename.\n"); @@ -1334,7 +2421,7 @@ error = -1; goto done; } - +#endif fd_checkpoint = open(checkpoint_file, O_RDWR | O_CREAT | O_TRUNC, 0700); if (fd_checkpoint < 0) { @@ -1384,7 +2471,6 @@ goto done; } - ret = vm_snapshot_kern_structs(ctx, kdata_fd, xop); if (ret != 0) { fprintf(stderr, "Failed to snapshot vm kernel data.\n"); @@ -1415,8 +2501,10 @@ close(fd_checkpoint); if (meta_filename != NULL) free(meta_filename); +#ifndef JSON_SNAPSHOT_V2 if (kdata_filename != NULL) free(kdata_filename); +#endif if (xop != NULL) xo_destroy(xop); if (meta_file != NULL) @@ -1563,6 +2651,222 @@ return (err); } +#define BUFFER_SUB_REM(buffer, size) \ +do { \ + buffer->buf += (size); \ + buffer->buf_rem -= (size); \ +} while(0) + +#define CHK_SIZE_AND_ADD_ELEM(list, buffer, data_size, field_name, type, RET, LABEL) \ +do { \ + int32_t ds; \ + void *kdata = NULL; \ + \ + memcpy((uint8_t *) &ds, (buffer)->buf, sizeof(int32_t)); \ + if (ds != data_size) { \ + fprintf(stderr, \ + "%s: Size mismatch for parameter %s, expected %d but got %ld\r\n", \ + __func__, field_name, ds, data_size); \ + (RET) = -1; \ + goto LABEL; \ + } \ + BUFFER_SUB_REM(buffer, sizeof(int32_t)); \ + kdata = calloc(1, data_size); \ + if (kdata == NULL) { \ + fprintf(stderr, "%s: Could not alloc memory at line %d\r\n", \ + __func__, __LINE__); \ + (RET) = ENOMEM; \ + goto LABEL; \ + } \ + memcpy((uint8_t *) kdata, (buffer)->buf, data_size); \ + \ + alloc_device_info_elem(list, field_name, kdata, type, data_size); \ + \ + BUFFER_SUB_REM(buffer, data_size); \ + free(kdata); \ +} while(0) + +#define ADD_SIZE_AND_DATA_TO_BUFFER(buffer, data_size, field_data) \ +do { \ + memcpy(buffer->buf, (uint8_t *)&data_size, sizeof(int32_t)); \ + BUFFER_SUB_REM(buffer, sizeof(int32_t)); \ + memcpy(buffer->buf, (field_data), data_size); \ + BUFFER_SUB_REM(buffer, data_size); \ +} while(0) + +int +vm_snapshot_save_fieldname(const char *fullname, volatile void *data, + char *type, size_t data_size, struct vm_snapshot_meta *meta) +{ + int ret; + size_t len; + char *ffield_name; + char *aux; + char *field_name; + int op; + struct vm_snapshot_buffer *buffer; + struct list_device_info *list; + struct vm_snapshot_device_info *aux_elem; + const char delim[5] = "&(>)"; + + buffer = &meta->buffer; + if (meta->snapshot_kernel) + if (buffer->buf_rem < data_size + sizeof(int)) { + fprintf(stderr, "%s: buffer too small\r\n", __func__); + return (E2BIG); + } + + ret = 0; + op = meta->op; + + len = strlen(fullname); + ffield_name = calloc(len + 1, sizeof(char)); + assert(ffield_name != NULL); + + memcpy(ffield_name, fullname, len); + aux = strtok(ffield_name, delim); + field_name = strtok(NULL, delim); + + if (field_name == NULL) + field_name = aux; + + list = &meta->dev_info_list; + if (op == VM_SNAPSHOT_SAVE) { + if (meta->snapshot_kernel) + CHK_SIZE_AND_ADD_ELEM(list, buffer, data_size, field_name, type, ret, done); + else + alloc_device_info_elem(list, field_name, data, type, data_size); + + if (list->auto_index >= 0) + list->auto_index++; + } else if (op == VM_SNAPSHOT_RESTORE) { + aux_elem = list->first; + if (aux_elem != NULL) { + if (meta->snapshot_kernel) { + ADD_SIZE_AND_DATA_TO_BUFFER(buffer, data_size, + (uint8_t *) aux_elem->field_data); + } else + memcpy((uint8_t *)data, (uint8_t *)aux_elem->field_data, data_size); + } + remove_first_elem(list); + } else { + ret = EINVAL; + goto done; + } + +done: + free(ffield_name); + return (ret); +} + +int +vm_snapshot_save_fieldname_cmp(const char *fullname, volatile void *data, + char *type, size_t data_size, struct vm_snapshot_meta *meta) +{ + size_t len; + char *ffield_name; + char *aux; + char *field_name; + int op; + int ret; + struct vm_snapshot_buffer *buffer; + struct list_device_info *list; + struct vm_snapshot_device_info *aux_elem; + const char delim[5] = "&(>)"; + + buffer = &meta->buffer; + if (meta->snapshot_kernel) + if (buffer->buf_rem < data_size + sizeof(int32_t)) { + fprintf(stderr, "%s: buffer too small\r\n", __func__); + return (E2BIG); + } + + op = meta->op; + + len = strlen(fullname); + ffield_name = calloc(len + 1, sizeof(char)); + assert(ffield_name != NULL); + + memcpy(ffield_name, fullname, len); + aux = strtok(ffield_name, delim); + field_name = strtok(NULL, delim); + + if (field_name == NULL) + field_name = aux; + + list = &meta->dev_info_list; + if (op == VM_SNAPSHOT_SAVE) { + ret = 0; + if (meta->snapshot_kernel) + CHK_SIZE_AND_ADD_ELEM(list, buffer, data_size, field_name, type, ret, done); + else + alloc_device_info_elem(list, field_name, data, type, data_size); + + if (list->auto_index >= 0) + list->auto_index++; + } else if (op == VM_SNAPSHOT_RESTORE) { + aux_elem = list->first; + if (aux_elem != NULL) { + if (meta->snapshot_kernel) { + ADD_SIZE_AND_DATA_TO_BUFFER(buffer, data_size, + (uint8_t *) aux_elem->field_data); + } else + ret = memcmp((uint8_t *)data, (uint8_t *)aux_elem->field_data, data_size); + } + remove_first_elem(list); + } else { + ret = EINVAL; + goto done; + } + +done: + free(ffield_name); + return (ret); +} + +void +vm_snapshot_add_intern_list(const char *arr_name, struct vm_snapshot_meta *meta) +{ + meta->dev_info_list.intern_arr_names[meta->dev_info_list.ident++] = arr_name; +} + +void +vm_snapshot_remove_intern_list(struct vm_snapshot_meta *meta) +{ + meta->dev_info_list.intern_arr_names[--meta->dev_info_list.ident] = NULL; +} + +void +vm_snapshot_set_intern_arr_index(struct vm_snapshot_meta *meta, int index) +{ + meta->dev_info_list.index = index; +} + +void +vm_snapshot_clear_intern_arr_index(struct vm_snapshot_meta *meta) +{ + meta->dev_info_list.index = -1; +} + +void vm_snapshot_activate_auto_index(struct vm_snapshot_meta *meta, + unsigned char create_instance) +{ + meta->dev_info_list.create_instance = create_instance; + meta->dev_info_list.auto_index = 0; +} + +void vm_snapshot_deactivate_auto_index(struct vm_snapshot_meta *meta) +{ + meta->dev_info_list.create_instance = 1; + meta->dev_info_list.auto_index = -1; +} + +void check_and_set_non_array_type(char *type, struct vm_snapshot_meta *meta) +{ + if ((type != NULL) && strcmp(type, "b64")) + meta->dev_info_list.type = type; +} + void vm_snapshot_buf_err(const char *bufname, const enum vm_snapshot_op op) { @@ -1601,8 +2905,7 @@ else return (EINVAL); - buffer->buf += data_size; - buffer->buf_rem -= data_size; + BUFFER_SUB_REM(buffer, data_size); return (0); } @@ -1689,8 +2992,7 @@ goto done; } - buffer->buf += data_size; - buffer->buf_rem -= data_size; + BUFFER_SUB_REM(buffer, data_size); done: return (ret); Index: usr.sbin/bhyve/uart_emul.c =================================================================== --- usr.sbin/bhyve/uart_emul.c +++ usr.sbin/bhyve/uart_emul.c @@ -39,6 +39,8 @@ #include #endif +#include +#include #include #include Index: usr.sbin/bhyve/usb_mouse.c =================================================================== --- usr.sbin/bhyve/usb_mouse.c +++ usr.sbin/bhyve/usb_mouse.c @@ -31,6 +31,8 @@ #include +#include +#include #include #include Index: usr.sbin/bhyve/virtio.c =================================================================== --- usr.sbin/bhyve/virtio.c +++ usr.sbin/bhyve/virtio.c @@ -890,6 +890,9 @@ for (i = 0; i < vc->vc_nvq; i++) { vq = &vs->vs_queues[i]; + /* Set index */ + SNAPSHOT_SET_INTERN_ARR_INDEX(meta, i); + SNAPSHOT_VAR_CMP_OR_LEAVE(vq->vq_qsize, meta, ret, done); SNAPSHOT_VAR_CMP_OR_LEAVE(vq->vq_num, meta, ret, done); @@ -904,6 +907,9 @@ if (!vq_ring_ready(vq)) continue; + SNAPSHOT_ADD_INTERN_ARR(h2g_addrs, meta); + SNAPSHOT_ACTIVATE_AUTO_INDEXING(meta, 0); + addr_size = vq->vq_qsize * sizeof(struct vring_desc); SNAPSHOT_GUEST2HOST_ADDR_OR_LEAVE(vq->vq_desc, addr_size, false, meta, ret, done); @@ -916,10 +922,16 @@ SNAPSHOT_GUEST2HOST_ADDR_OR_LEAVE(vq->vq_used, addr_size, false, meta, ret, done); - SNAPSHOT_BUF_OR_LEAVE(vq->vq_desc, - vring_size_aligned(vq->vq_qsize), meta, ret, done); + SNAPSHOT_DEACTIVATE_AUTO_INDEXING(meta); + SNAPSHOT_REMOVE_INTERN_ARR(h2g_addrs, meta); + + SNAPSHOT_BUF_OR_LEAVE(vq->vq_desc, vring_size_aligned(vq->vq_qsize), + meta, ret, done); } + /* Reset index */ + SNAPSHOT_CLEAR_INTERN_ARR_INDEX(meta); + done: return (ret); } @@ -937,19 +949,25 @@ vc = vs->vs_vc; /* Save virtio softc */ + SNAPSHOT_ADD_INTERN_ARR(softc, meta); ret = vi_pci_snapshot_softc(vs, meta); if (ret != 0) goto done; + SNAPSHOT_REMOVE_INTERN_ARR(softc, meta); /* Save virtio consts */ + SNAPSHOT_ADD_INTERN_ARR(consts, meta); ret = vi_pci_snapshot_consts(vc, meta); if (ret != 0) goto done; + SNAPSHOT_REMOVE_INTERN_ARR(consts, meta); /* Save virtio queue info */ + SNAPSHOT_ADD_INTERN_ARR(queues, meta); ret = vi_pci_snapshot_queues(vs, meta); if (ret != 0) goto done; + SNAPSHOT_REMOVE_INTERN_ARR(queues, meta); /* Save device softc, if needed */ if (vc->vc_snapshot != NULL) {