Page MenuHomeFreeBSD

D19495.id58263.diff
No OneTemporary

D19495.id58263.diff

Index: lib/libvmmapi/vmmapi.h
===================================================================
--- lib/libvmmapi/vmmapi.h
+++ lib/libvmmapi/vmmapi.h
@@ -33,6 +33,7 @@
#include <sys/param.h>
#include <sys/cpuset.h>
+#include <machine/vmm_dev.h>
/*
* API version for out-of-tree consumers like grub-bhyve for making compile
@@ -42,6 +43,7 @@
struct iovec;
struct vmctx;
+struct vm_snapshot_meta;
enum x2apic_state;
/*
@@ -88,6 +90,10 @@
*/
int vm_mmap_getnext(struct vmctx *ctx, vm_paddr_t *gpa, int *segid,
vm_ooffset_t *segoff, size_t *len, int *prot, int *flags);
+
+int vm_get_guestmem_from_ctx(struct vmctx *ctx, char **guest_baseaddr,
+ size_t *lowmem_size, size_t *highmem_size);
+
/*
* Create a device memory segment identified by 'segid'.
*
@@ -110,6 +116,8 @@
int vm_parse_memsize(const char *optarg, size_t *memsize);
int vm_setup_memory(struct vmctx *ctx, size_t len, enum vm_mmap_style s);
void *vm_map_gpa(struct vmctx *ctx, vm_paddr_t gaddr, size_t len);
+/* inverse operation to vm_map_gpa - extract guest address from host pointer */
+vm_paddr_t vm_rev_map_gpa(struct vmctx *ctx, void *addr);
int vm_get_gpa_pmap(struct vmctx *, uint64_t gpa, uint64_t *pte, int *num);
int vm_gla2gpa(struct vmctx *, int vcpuid, struct vm_guest_paging *paging,
uint64_t gla, int prot, uint64_t *gpa, int *fault);
@@ -120,6 +128,7 @@
void vm_set_lowmem_limit(struct vmctx *ctx, uint32_t limit);
void vm_set_memflags(struct vmctx *ctx, int flags);
int vm_get_memflags(struct vmctx *ctx);
+int vm_get_name(struct vmctx *ctx, char *buffer, size_t max_len);
size_t vm_get_lowmem_size(struct vmctx *ctx);
size_t vm_get_highmem_size(struct vmctx *ctx);
int vm_set_desc(struct vmctx *ctx, int vcpu, int reg,
@@ -226,6 +235,8 @@
uint16_t threads, uint16_t maxcpus);
int vm_get_topology(struct vmctx *ctx, uint16_t *sockets, uint16_t *cores,
uint16_t *threads, uint16_t *maxcpus);
+int vm_vcpu_lock_all(struct vmctx *ctx);
+int vm_vcpu_unlock_all(struct vmctx *ctx);
/*
* FreeBSD specific APIs
@@ -237,4 +248,27 @@
uint32_t eip, uint32_t gdtbase,
uint32_t esp);
void vm_setup_freebsd_gdt(uint64_t *gdtr);
+
+/*
+ * Bhyve save-restore
+ */
+
+#define MAX_SNAPSHOT_VMNAME 100
+
+enum checkpoint_opcodes {
+ START_CHECKPOINT = 0,
+ START_SUSPEND = 1,
+};
+
+struct __attribute__((packed)) checkpoint_op {
+ unsigned int op;
+ char snapshot_filename[MAX_SNAPSHOT_VMNAME];
+};
+
+int vm_snapshot_req(struct vm_snapshot_meta *meta);
+
+int vm_restore_time(struct vmctx *ctx);
+
+int vm_restore_mem(struct vmctx *ctx, int vmmem_fd, size_t size);
+
#endif /* _VMMAPI_H_ */
Index: lib/libvmmapi/vmmapi.c
===================================================================
--- lib/libvmmapi/vmmapi.c
+++ lib/libvmmapi/vmmapi.c
@@ -51,8 +51,10 @@
#include <libutil.h>
+#include <vm/vm.h>
#include <machine/vmm.h>
#include <machine/vmm_dev.h>
+#include <machine/vmm_snapshot.h>
#include "vmmapi.h"
@@ -233,6 +235,16 @@
return (error);
}
+int vm_get_guestmem_from_ctx(struct vmctx *ctx, char **guest_baseaddr,
+ size_t *lowmem_size, size_t *highmem_size)
+{
+ *guest_baseaddr = ctx->baseaddr;
+ *lowmem_size = ctx->lowmem;
+ *highmem_size = ctx->highmem;
+
+ return 0;
+}
+
int
vm_mmap_getnext(struct vmctx *ctx, vm_paddr_t *gpa, int *segid,
vm_ooffset_t *segoff, size_t *len, int *prot, int *flags)
@@ -444,6 +456,35 @@
return (NULL);
}
+vm_paddr_t
+vm_rev_map_gpa(struct vmctx *ctx, void *addr)
+{
+ off_t offaddr;
+
+ offaddr = addr - (void *) ctx->baseaddr;
+
+ if (ctx->lowmem > 0)
+ if (offaddr >= 0 && offaddr <= ctx->lowmem)
+ return (offaddr);
+
+ if (ctx->highmem > 0)
+ if (offaddr >= 4*GB && offaddr < 4*GB + ctx->highmem)
+ return (offaddr);
+
+ return ((vm_paddr_t) -1);
+}
+
+/* TODO: maximum size for vmname */
+int
+vm_get_name(struct vmctx *ctx, char *buf, size_t max_len)
+{
+ if (max_len < strlen(ctx->name))
+ return (EINVAL);
+
+ strlcpy(buf, ctx->name, max_len);
+ return (0);
+}
+
size_t
vm_get_lowmem_size(struct vmctx *ctx)
{
@@ -1504,6 +1545,110 @@
return (ioctl(ctx->fd, VM_RESTART_INSTRUCTION, &vcpu));
}
+int vm_vcpu_lock_all(struct vmctx *ctx)
+{
+ return (ioctl(ctx->fd, VM_VCPU_LOCK_ALL));
+}
+
+int vm_vcpu_unlock_all(struct vmctx *ctx)
+{
+ return (ioctl(ctx->fd, VM_VCPU_UNLOCK_ALL));
+}
+
+int
+vm_snapshot_req(struct vm_snapshot_meta *meta)
+{
+ struct vm_snapshot_req req_params;
+ int error;
+
+ bzero(&req_params, sizeof(struct vm_snapshot_req));
+ /* copy metadata header for syscall */
+ memcpy(&req_params.meta, meta, sizeof(req_params.meta));
+
+ error = ioctl(meta->ctx->fd, VM_SNAPSHOT_REQ, &req_params);
+ if (error != 0) {
+ fprintf(stderr, "%s: snapshot failed for %s\r\n",
+ __func__, meta->dev_name);
+ goto done;
+ }
+
+ /* copy results back to metadata header */
+ memcpy(meta, &req_params.meta, sizeof(req_params.meta));
+
+done:
+ return (error);
+}
+
+static int
+vm_mem_read_from_file(int fd, void *dest, size_t file_offset, size_t len)
+{
+ ssize_t cnt_read = 0;
+ size_t read_total = 0;
+ size_t to_read = len;
+
+ if ( lseek(fd, file_offset , SEEK_SET) < 0) {
+ fprintf(stderr,
+ "%s: Could not change file offset errno = %d\r\n",
+ __func__, errno);
+ return (-1);
+ }
+
+ while (read_total < len) {
+ cnt_read = read(fd, dest + read_total, to_read);
+ /* TODO - fix for when read returns 0 */
+ if (cnt_read <= 0) {
+ fprintf(stderr,"%s: read error: %d\r\n",
+ __func__, errno);
+ return (-1);
+ }
+ read_total += cnt_read;
+ to_read -= cnt_read;
+ }
+
+ return (0);
+}
+
+int
+vm_restore_mem(struct vmctx *ctx, int vmmem_fd, size_t size)
+{
+ if (ctx->lowmem + ctx->highmem != size) {
+ fprintf(stderr, "%s: mem size mismatch: %ld vs %ld\n",
+ __func__, ctx->lowmem + ctx->highmem, size);
+ return (-1);
+ }
+
+ if (vm_mem_read_from_file(vmmem_fd, ctx->baseaddr,
+ 0, ctx->lowmem) != 0) {
+ fprintf(stderr,
+ "%s: Could not read lowmem from file\r\n", __func__);
+ return (-1);
+ }
+
+ if (ctx->highmem > 0) {
+ if (vm_mem_read_from_file(vmmem_fd, ctx->baseaddr + 4*GB,
+ ctx->lowmem, ctx->highmem) != 0) {
+
+ fprintf(stderr,
+ "%s: Could not read highmem from file\r\n",
+ __func__);
+ return (-1);
+ }
+ }
+
+ return (0);
+}
+
+int
+vm_restore_time(struct vmctx *ctx)
+{
+ int error, dummy;
+
+ dummy = 0;
+ error = ioctl(ctx->fd, VM_RESTORE_TIME, &dummy);
+
+ return (error);
+}
+
int
vm_set_topology(struct vmctx *ctx,
uint16_t sockets, uint16_t cores, uint16_t threads, uint16_t maxcpus)
Index: sys/amd64/include/vmm.h
===================================================================
--- sys/amd64/include/vmm.h
+++ sys/amd64/include/vmm.h
@@ -34,6 +34,8 @@
#include <sys/sdt.h>
#include <x86/segments.h>
+struct vm_snapshot_meta;
+
#ifdef _KERNEL
SDT_PROVIDER_DECLARE(vmm);
#endif
@@ -130,6 +132,15 @@
struct vm_object;
struct vm_guest_paging;
struct pmap;
+struct vmcx_state;
+enum snapshot_req;
+
+struct mem_seg {
+ size_t len;
+ bool sysmem;
+ struct vm_object *object;
+};
+#define VM_MAX_MEMSEGS 3
struct vm_eventinfo {
void *rptr; /* rendezvous cookie */
@@ -158,6 +169,10 @@
typedef void (*vmi_vmspace_free)(struct vmspace *vmspace);
typedef struct vlapic * (*vmi_vlapic_init)(void *vmi, int vcpu);
typedef void (*vmi_vlapic_cleanup)(void *vmi, struct vlapic *vlapic);
+typedef int (*vmi_snapshot_t)(void *vmi, struct vm_snapshot_meta *meta);
+typedef int (*vmi_snapshot_vmcx_t)(void *vmi, struct vm_snapshot_meta *meta,
+ int vcpu);
+typedef int (*vmi_restore_tsc_t)(void *vmi, int vcpuid, uint64_t now);
struct vmm_ops {
vmm_init_func_t init; /* module wide initialization */
@@ -177,6 +192,11 @@
vmi_vmspace_free vmspace_free;
vmi_vlapic_init vlapic_init;
vmi_vlapic_cleanup vlapic_cleanup;
+
+ /* checkpoint operations */
+ vmi_snapshot_t vmsnapshot;
+ vmi_snapshot_vmcx_t vmcx_snapshot;
+ vmi_restore_tsc_t vm_restore_tsc;
};
extern struct vmm_ops vmm_ops_intel;
@@ -213,6 +233,7 @@
int vm_get_memseg(struct vm *vm, int ident, size_t *len, bool *sysmem,
struct vm_object **objptr);
vm_paddr_t vmm_sysmem_maxaddr(struct vm *vm);
+struct mem_seg * vm_get_memsegs(struct vm *vm);
void *vm_gpa_hold(struct vm *, int vcpuid, vm_paddr_t gpa, size_t len,
int prot, void **cookie);
void vm_gpa_release(void *cookie);
@@ -249,6 +270,9 @@
void vm_exit_rendezvous(struct vm *vm, int vcpuid, uint64_t rip);
void vm_exit_astpending(struct vm *vm, int vcpuid, uint64_t rip);
void vm_exit_reqidle(struct vm *vm, int vcpuid, uint64_t rip);
+int vm_snapshot_req(struct vm *vm, struct vm_snapshot_meta *meta);
+int vm_restore_time(struct vm *vm);
+
#ifdef _SYS__CPUSET_H_
/*
@@ -386,6 +410,15 @@
int vm_get_intinfo(struct vm *vm, int vcpuid, uint64_t *info1, uint64_t *info2);
+/*
+ * Function used to keep track of the guest's TSC offset. The
+ * offset is used by the virutalization extensions to provide a consistent
+ * value for the Time Stamp Counter to the guest.
+ *
+ * Return value is 0 on success and non-zero on failure.
+ */
+int vm_set_tsc_offset(struct vm *vm, int vcpu_id, uint64_t offset);
+
enum vm_reg_name vm_segment_name(int seg_encoding);
struct vm_copyinfo {
Index: sys/amd64/include/vmm_dev.h
===================================================================
--- sys/amd64/include/vmm_dev.h
+++ sys/amd64/include/vmm_dev.h
@@ -31,6 +31,13 @@
#ifndef _VMM_DEV_H_
#define _VMM_DEV_H_
+#include <machine/vmm_snapshot.h>
+#include <sys/_cpuset.h>
+#include <sys/param.h>
+#include <vm/vm.h>
+
+struct vm_snapshot_meta;
+
#ifdef _KERNEL
void vmmdev_init(void);
int vmmdev_cleanup(void);
@@ -233,6 +240,10 @@
uint16_t maxcpus;
};
+struct vm_snapshot_req {
+ struct vm_snapshot_meta meta;
+};
+
enum {
/* general routines */
IOCNUM_ABIVERS = 0,
@@ -241,6 +252,8 @@
IOCNUM_GET_CAPABILITY = 3,
IOCNUM_SUSPEND = 4,
IOCNUM_REINIT = 5,
+ IOCNUM_VCPU_LOCK_ALL = 6,
+ IOCNUM_VCPU_UNLOCK_ALL = 7,
/* memory apis */
IOCNUM_MAP_MEMORY = 10, /* deprecated */
@@ -312,6 +325,11 @@
IOCNUM_RTC_WRITE = 101,
IOCNUM_RTC_SETTIME = 102,
IOCNUM_RTC_GETTIME = 103,
+
+ /* checkpoint */
+ IOCNUM_SNAPSHOT_REQ = 113,
+
+ IOCNUM_RESTORE_TIME = 115
};
#define VM_RUN \
@@ -320,6 +338,10 @@
_IOW('v', IOCNUM_SUSPEND, struct vm_suspend)
#define VM_REINIT \
_IO('v', IOCNUM_REINIT)
+#define VM_VCPU_LOCK_ALL\
+ _IO('v', IOCNUM_VCPU_LOCK_ALL)
+#define VM_VCPU_UNLOCK_ALL\
+ _IO('v', IOCNUM_VCPU_UNLOCK_ALL)
#define VM_ALLOC_MEMSEG \
_IOW('v', IOCNUM_ALLOC_MEMSEG, struct vm_memseg)
#define VM_GET_MEMSEG \
@@ -422,4 +444,8 @@
_IOR('v', IOCNUM_RTC_GETTIME, struct vm_rtc_time)
#define VM_RESTART_INSTRUCTION \
_IOW('v', IOCNUM_RESTART_INSTRUCTION, int)
+#define VM_SNAPSHOT_REQ \
+ _IOWR('v', IOCNUM_SNAPSHOT_REQ, struct vm_snapshot_req)
+#define VM_RESTORE_TIME \
+ _IOWR('v', IOCNUM_RESTORE_TIME, int)
#endif
Index: sys/amd64/include/vmm_snapshot.h
===================================================================
--- /dev/null
+++ sys/amd64/include/vmm_snapshot.h
@@ -0,0 +1,116 @@
+#ifndef _VMM_SNAPSHOT_
+#define _VMM_SNAPSHOT_
+
+#include <sys/errno.h>
+#include <sys/types.h>
+
+struct vmctx;
+
+enum snapshot_req {
+ STRUCT_VMX,
+ STRUCT_VIOAPIC,
+ STRUCT_VM,
+ STRUCT_VLAPIC,
+ VM_MEM,
+ STRUCT_VHPET,
+ STRUCT_VMCX,
+ STRUCT_VATPIC,
+ STRUCT_VATPIT,
+ STRUCT_VPMTMR,
+ STRUCT_VRTC,
+};
+
+struct vm_snapshot_buffer {
+ /*
+ * R/O for device-specific functions;
+ * written by generic snapshot functions.
+ */
+ uint8_t *const buf_start;
+ const size_t buf_size;
+
+ /*
+ * R/W for device-specific functions used to keep track of buffer
+ * current position and remaining size.
+ */
+ uint8_t *buf;
+ size_t buf_rem;
+
+ /*
+ * Length of the snapshot is either determined as (buf_size - buf_rem)
+ * or (buf - buf_start) -- the second variation returns a signed value
+ * so it may not be appropriate.
+ *
+ * Use vm_get_snapshot_size(meta).
+ */
+};
+
+enum vm_snapshot_op {
+ VM_SNAPSHOT_SAVE,
+ VM_SNAPSHOT_RESTORE,
+};
+
+struct vm_snapshot_meta {
+ struct vmctx *ctx;
+ void *dev_data;
+ const char *dev_name; /* identify userspace devices */
+ enum snapshot_req dev_req; /* identify kernel structs */
+
+ struct vm_snapshot_buffer buffer;
+
+ enum vm_snapshot_op op;
+};
+
+
+void vm_snapshot_buf_err(const char *bufname, const enum vm_snapshot_op op);
+int vm_snapshot_buf(volatile void *data, size_t data_size,
+ struct vm_snapshot_meta *meta);
+size_t vm_get_snapshot_size(struct vm_snapshot_meta *meta);
+int vm_snapshot_guest2host_addr(void **addrp, size_t len, int restore_null,
+ struct vm_snapshot_meta *meta);
+int vm_snapshot_buf_cmp(volatile void *data, size_t data_size,
+ struct vm_snapshot_meta *meta);
+
+#define SNAPSHOT_BUF_OR_LEAVE(DATA, LEN, META, RES, LABEL) \
+do { \
+ (RES) = vm_snapshot_buf((DATA), (LEN), (META)); \
+ if ((RES) != 0) { \
+ vm_snapshot_buf_err(#DATA, (META)->op); \
+ goto LABEL; \
+ } \
+} while (0)
+
+#define SNAPSHOT_VAR_OR_LEAVE(DATA, META, RES, LABEL) \
+ SNAPSHOT_BUF_OR_LEAVE(&(DATA), sizeof(DATA), (META), (RES), LABEL)
+
+/*
+ * Address variables are pointers to guest memory.
+ *
+ * When RNULL != 0, do not enforce invalid address checks; instead, make the
+ * pointer NULL at restore time.
+ */
+#define SNAPSHOT_GUEST2HOST_ADDR_OR_LEAVE(ADDR, LEN, RNULL, META, RES, LABEL) \
+do { \
+ (RES) = vm_snapshot_guest2host_addr((void **)&(ADDR), (LEN), (RNULL), \
+ (META)); \
+ if ((RES) != 0) { \
+ if ((RES) == EFAULT) \
+ fprintf(stderr, "%s: invalid address: %s\r\n", \
+ __func__, #ADDR); \
+ goto LABEL; \
+ } \
+} while (0)
+
+/* compare the value in the meta buffer with the data */
+#define SNAPSHOT_BUF_CMP_OR_LEAVE(DATA, LEN, META, RES, LABEL) \
+do { \
+ (RES) = vm_snapshot_buf_cmp((DATA), (LEN), (META)); \
+ if ((RES) != 0) { \
+ vm_snapshot_buf_err(#DATA, (META)->op); \
+ goto LABEL; \
+ } \
+} while (0)
+
+#define SNAPSHOT_VAR_CMP_OR_LEAVE(DATA, META, RES, LABEL) \
+ SNAPSHOT_BUF_CMP_OR_LEAVE(&(DATA), sizeof(DATA), (META), (RES), LABEL)
+
+#endif
Index: sys/amd64/vmm/amd/svm.h
===================================================================
--- sys/amd64/vmm/amd/svm.h
+++ sys/amd64/vmm/amd/svm.h
@@ -32,6 +32,7 @@
#define _SVM_H_
struct pcpu;
+struct svm_softc;
/*
* Guest register state that is saved outside the VMCB.
@@ -66,5 +67,6 @@
};
void svm_launch(uint64_t pa, struct svm_regctx *gctx, struct pcpu *pcpu);
+int svm_set_tsc_offset(struct svm_softc *sc, int vcpu, uint64_t offset);
#endif /* _SVM_H_ */
Index: sys/amd64/vmm/amd/svm.c
===================================================================
--- sys/amd64/vmm/amd/svm.c
+++ sys/amd64/vmm/amd/svm.c
@@ -50,6 +50,7 @@
#include <machine/vmm.h>
#include <machine/vmm_dev.h>
#include <machine/vmm_instruction_emul.h>
+#include <machine/vmm_snapshot.h>
#include "vmm_lapic.h"
#include "vmm_stat.h"
@@ -278,6 +279,23 @@
svm_enable(NULL);
}
+int
+svm_set_tsc_offset(struct svm_softc *sc, int vcpu, uint64_t offset)
+{
+ int error;
+ struct vmcb_ctrl *ctrl;
+
+ ctrl = svm_get_vmcb_ctrl(sc, vcpu);
+ ctrl->tsc_offset = offset;
+
+ svm_set_dirty(sc, vcpu, VMCB_CACHE_I);
+ VCPU_CTR1(sc->vm, vcpu, "tsc offset changed to %#lx", offset);
+
+ error = vm_set_tsc_offset(sc->vm, vcpu, offset);
+
+ return (error);
+}
+
/* Pentium compatible MSRs */
#define MSR_PENTIUM_START 0
#define MSR_PENTIUM_END 0x1FFF
@@ -2198,6 +2216,34 @@
return (EINVAL);
}
+static int
+svm_snapshot_reg(void *arg, int vcpu, int ident,
+ struct vm_snapshot_meta *meta)
+{
+ int ret;
+ uint64_t val;
+
+ if (meta->op == VM_SNAPSHOT_SAVE) {
+ ret = svm_getreg(arg, vcpu, ident, &val);
+ if (ret != 0)
+ goto done;
+
+ SNAPSHOT_VAR_OR_LEAVE(val, meta, ret, done);
+ } else if (meta->op == VM_SNAPSHOT_RESTORE) {
+ SNAPSHOT_VAR_OR_LEAVE(val, meta, ret, done);
+
+ ret = svm_setreg(arg, vcpu, ident, val);
+ if (ret != 0)
+ goto done;
+ } else {
+ ret = EINVAL;
+ goto done;
+ }
+
+done:
+ return (ret);
+}
+
static int
svm_setcap(void *arg, int vcpu, int type, int val)
{
@@ -2280,6 +2326,304 @@
free(vlapic, M_SVM_VLAPIC);
}
+static int
+svm_snapshot_vmi(void *arg, struct vm_snapshot_meta *meta)
+{
+ /* struct svm_softc is AMD's representation for SVM softc */
+ struct svm_softc *sc;
+ struct svm_vcpu *vcpu;
+ struct vmcb *vmcb;
+ uint64_t val;
+ int i;
+ int ret;
+
+ sc = arg;
+
+ KASSERT(sc != NULL, ("%s: arg was NULL", __func__));
+
+ SNAPSHOT_VAR_OR_LEAVE(sc->nptp, meta, ret, done);
+
+ for (i = 0; i < VM_MAXCPU; i++) {
+ vcpu = &sc->vcpu[i];
+ vmcb = &vcpu->vmcb;
+
+ /* VMCB fields for virtual cpu i */
+ SNAPSHOT_VAR_OR_LEAVE(vmcb->ctrl.v_tpr, meta, ret, done);
+ val = vmcb->ctrl.v_tpr;
+ SNAPSHOT_VAR_OR_LEAVE(val, meta, ret, done);
+ vmcb->ctrl.v_tpr = val;
+
+ SNAPSHOT_VAR_OR_LEAVE(vmcb->ctrl.asid, meta, ret, done);
+ val = vmcb->ctrl.np_enable;
+ SNAPSHOT_VAR_OR_LEAVE(val, meta, ret, done);
+ vmcb->ctrl.np_enable = val;
+
+ val = vmcb->ctrl.intr_shadow;
+ SNAPSHOT_VAR_OR_LEAVE(val, meta, ret, done);
+ vmcb->ctrl.intr_shadow = val;
+ SNAPSHOT_VAR_OR_LEAVE(vmcb->ctrl.tlb_ctrl, meta, ret, done);
+
+ SNAPSHOT_BUF_OR_LEAVE(vmcb->state.pad1,
+ sizeof(vmcb->state.pad1),
+ meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vmcb->state.cpl, meta, ret, done);
+ SNAPSHOT_BUF_OR_LEAVE(vmcb->state.pad2,
+ sizeof(vmcb->state.pad2),
+ meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vmcb->state.efer, meta, ret, done);
+ SNAPSHOT_BUF_OR_LEAVE(vmcb->state.pad3,
+ sizeof(vmcb->state.pad3),
+ meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vmcb->state.cr4, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vmcb->state.cr3, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vmcb->state.cr0, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vmcb->state.dr7, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vmcb->state.dr6, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vmcb->state.rflags, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vmcb->state.rip, meta, ret, done);
+ SNAPSHOT_BUF_OR_LEAVE(vmcb->state.pad4,
+ sizeof(vmcb->state.pad4),
+ meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vmcb->state.rsp, meta, ret, done);
+ SNAPSHOT_BUF_OR_LEAVE(vmcb->state.pad5,
+ sizeof(vmcb->state.pad5),
+ meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vmcb->state.rax, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vmcb->state.star, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vmcb->state.lstar, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vmcb->state.cstar, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vmcb->state.sfmask, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vmcb->state.kernelgsbase,
+ meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vmcb->state.sysenter_cs, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vmcb->state.sysenter_esp,
+ meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vmcb->state.sysenter_eip,
+ meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vmcb->state.cr2, meta, ret, done);
+ SNAPSHOT_BUF_OR_LEAVE(vmcb->state.pad6,
+ sizeof(vmcb->state.pad6),
+ meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vmcb->state.g_pat, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vmcb->state.dbgctl, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vmcb->state.br_from, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vmcb->state.br_to, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vmcb->state.int_from, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vmcb->state.int_to, meta, ret, done);
+ SNAPSHOT_BUF_OR_LEAVE(vmcb->state.pad7,
+ sizeof(vmcb->state.pad7),
+ meta, ret, done);
+
+ /* Snapshot swctx for virtual cpu i */
+ SNAPSHOT_VAR_OR_LEAVE(vcpu->swctx.sctx_rbp, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vcpu->swctx.sctx_rbx, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vcpu->swctx.sctx_rcx, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vcpu->swctx.sctx_rdx, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vcpu->swctx.sctx_rdi, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vcpu->swctx.sctx_rsi, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vcpu->swctx.sctx_r8, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vcpu->swctx.sctx_r9, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vcpu->swctx.sctx_r10, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vcpu->swctx.sctx_r11, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vcpu->swctx.sctx_r12, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vcpu->swctx.sctx_r13, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vcpu->swctx.sctx_r14, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vcpu->swctx.sctx_r15, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vcpu->swctx.sctx_dr0, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vcpu->swctx.sctx_dr1, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vcpu->swctx.sctx_dr2, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vcpu->swctx.sctx_dr3, meta, ret, done);
+
+ SNAPSHOT_VAR_OR_LEAVE(vcpu->swctx.host_dr0, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vcpu->swctx.host_dr1, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vcpu->swctx.host_dr2, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vcpu->swctx.host_dr3, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vcpu->swctx.host_dr6, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vcpu->swctx.host_dr7, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vcpu->swctx.host_debugctl, meta, ret,
+ done);
+
+ /* Restore other svm_vcpu struct fields */
+
+ /* Restore NEXTRIP field */
+ SNAPSHOT_VAR_OR_LEAVE(vcpu->nextrip, meta, ret, done);
+
+ /* Restore lastcpu field */
+ SNAPSHOT_VAR_OR_LEAVE(vcpu->lastcpu, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vcpu->dirty, meta, ret, done);
+
+ /* Restore EPTGEN field - EPT is Extended Page Tabel */
+ SNAPSHOT_VAR_OR_LEAVE(vcpu->eptgen, meta, ret, done);
+
+ SNAPSHOT_VAR_OR_LEAVE(vcpu->asid.gen, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vcpu->asid.num, meta, ret, done);
+
+ /* Set all caches dirty */
+ if (meta->op == VM_SNAPSHOT_RESTORE) {
+ svm_set_dirty(sc, i, VMCB_CACHE_ASID);
+ svm_set_dirty(sc, i, VMCB_CACHE_IOPM);
+ svm_set_dirty(sc, i, VMCB_CACHE_I);
+ svm_set_dirty(sc, i, VMCB_CACHE_TPR);
+ svm_set_dirty(sc, i, VMCB_CACHE_CR2);
+ svm_set_dirty(sc, i, VMCB_CACHE_CR);
+ svm_set_dirty(sc, i, VMCB_CACHE_DT);
+ svm_set_dirty(sc, i, VMCB_CACHE_SEG);
+ svm_set_dirty(sc, i, VMCB_CACHE_NP);
+ }
+ }
+
+ if (meta->op == VM_SNAPSHOT_RESTORE)
+ flush_by_asid();
+
+done:
+ return (ret);
+}
+
+static int
+svm_snapshot_vmcx(void *arg, struct vm_snapshot_meta *meta, int vcpu)
+{
+ struct vmcb *vmcb;
+ struct svm_softc *sc;
+ int err, running, hostcpu;
+
+ sc = (struct svm_softc *)arg;
+ err = 0;
+
+ KASSERT(arg != NULL, ("%s: arg was NULL", __func__));
+ vmcb = svm_get_vmcb(sc, vcpu);
+
+ running = vcpu_is_running(sc->vm, vcpu, &hostcpu);
+ if (running && hostcpu !=curcpu) {
+ printf("%s: %s%d is running", __func__, vm_name(sc->vm), vcpu);
+ return (EINVAL);
+ }
+
+ err += svm_snapshot_reg(sc, vcpu, VM_REG_GUEST_CR0, meta);
+ err += svm_snapshot_reg(sc, vcpu, VM_REG_GUEST_CR2, meta);
+ err += svm_snapshot_reg(sc, vcpu, VM_REG_GUEST_CR3, meta);
+ err += svm_snapshot_reg(sc, vcpu, VM_REG_GUEST_CR4, meta);
+
+ err += svm_snapshot_reg(sc, vcpu, VM_REG_GUEST_DR7, meta);
+
+ err += svm_snapshot_reg(sc, vcpu, VM_REG_GUEST_RAX, meta);
+
+ err += svm_snapshot_reg(sc, vcpu, VM_REG_GUEST_RSP, meta);
+ err += svm_snapshot_reg(sc, vcpu, VM_REG_GUEST_RIP, meta);
+ err += svm_snapshot_reg(sc, vcpu, VM_REG_GUEST_RFLAGS, meta);
+
+ /* Guest segments */
+ /* ES */
+ err += svm_snapshot_reg(sc, vcpu, VM_REG_GUEST_ES, meta);
+ err += vmcb_snapshot_desc(sc, vcpu, VM_REG_GUEST_ES, meta);
+
+ /* CS */
+ err += svm_snapshot_reg(sc, vcpu, VM_REG_GUEST_CS, meta);
+ err += vmcb_snapshot_desc(sc, vcpu, VM_REG_GUEST_CS, meta);
+
+ /* SS */
+ err += svm_snapshot_reg(sc, vcpu, VM_REG_GUEST_SS, meta);
+ err += vmcb_snapshot_desc(sc, vcpu, VM_REG_GUEST_SS, meta);
+
+ /* DS */
+ err += svm_snapshot_reg(sc, vcpu, VM_REG_GUEST_DS, meta);
+ err += vmcb_snapshot_desc(sc, vcpu, VM_REG_GUEST_DS, meta);
+
+ /* FS */
+ err += svm_snapshot_reg(sc, vcpu, VM_REG_GUEST_FS, meta);
+ err += vmcb_snapshot_desc(sc, vcpu, VM_REG_GUEST_FS, meta);
+
+ /* GS */
+ err += svm_snapshot_reg(sc, vcpu, VM_REG_GUEST_GS, meta);
+ err += vmcb_snapshot_desc(sc, vcpu, VM_REG_GUEST_GS, meta);
+
+ /* TR */
+ err += svm_snapshot_reg(sc, vcpu, VM_REG_GUEST_TR, meta);
+ err += vmcb_snapshot_desc(sc, vcpu, VM_REG_GUEST_TR, meta);
+
+ /* LDTR */
+ err += svm_snapshot_reg(sc, vcpu, VM_REG_GUEST_LDTR, meta);
+ err += vmcb_snapshot_desc(sc, vcpu, VM_REG_GUEST_LDTR, meta);
+
+ /* EFER */
+ err += svm_snapshot_reg(sc, vcpu, VM_REG_GUEST_EFER, meta);
+
+ /* IDTR and GDTR */
+ err += vmcb_snapshot_desc(sc, vcpu, VM_REG_GUEST_IDTR, meta);
+ err += vmcb_snapshot_desc(sc, vcpu, VM_REG_GUEST_GDTR, meta);
+
+ /* Specific AMD registers */
+ err += vmcb_snapshot_any(sc, vcpu,
+ VMCB_ACCESS(VMCB_OFF_SYSENTER_CS, 8), meta);
+ err += vmcb_snapshot_any(sc, vcpu,
+ VMCB_ACCESS(VMCB_OFF_SYSENTER_ESP, 8), meta);
+ err += vmcb_snapshot_any(sc, vcpu,
+ VMCB_ACCESS(VMCB_OFF_SYSENTER_EIP, 8), meta);
+
+ err += vmcb_snapshot_any(sc, vcpu,
+ VMCB_ACCESS(VMCB_OFF_NPT_BASE, 8), meta);
+
+ err += vmcb_snapshot_any(sc, vcpu,
+ VMCB_ACCESS(VMCB_OFF_CR_INTERCEPT, 4), meta);
+ err += vmcb_snapshot_any(sc, vcpu,
+ VMCB_ACCESS(VMCB_OFF_DR_INTERCEPT, 4), meta);
+ err += vmcb_snapshot_any(sc, vcpu,
+ VMCB_ACCESS(VMCB_OFF_EXC_INTERCEPT, 4), meta);
+ err += vmcb_snapshot_any(sc, vcpu,
+ VMCB_ACCESS(VMCB_OFF_INST1_INTERCEPT, 4), meta);
+ err += vmcb_snapshot_any(sc, vcpu,
+ VMCB_ACCESS(VMCB_OFF_INST2_INTERCEPT, 4), meta);
+
+ err += vmcb_snapshot_any(sc, vcpu,
+ VMCB_ACCESS(VMCB_OFF_TLB_CTRL, 4), meta);
+
+ err += vmcb_snapshot_any(sc, vcpu,
+ VMCB_ACCESS(VMCB_OFF_EXITINFO1, 8), meta);
+ err += vmcb_snapshot_any(sc, vcpu,
+ VMCB_ACCESS(VMCB_OFF_EXITINFO2, 8), meta);
+ err += vmcb_snapshot_any(sc, vcpu,
+ VMCB_ACCESS(VMCB_OFF_EXITINTINFO, 8), meta);
+
+ err += vmcb_snapshot_any(sc, vcpu,
+ VMCB_ACCESS(VMCB_OFF_VIRQ, 8), meta);
+
+ err += vmcb_snapshot_any(sc, vcpu,
+ VMCB_ACCESS(VMCB_OFF_GUEST_PAT, 8), meta);
+
+ err += vmcb_snapshot_any(sc, vcpu,
+ VMCB_ACCESS(VMCB_OFF_AVIC_BAR, 8), meta);
+ err += vmcb_snapshot_any(sc, vcpu,
+ VMCB_ACCESS(VMCB_OFF_AVIC_PAGE, 8), meta);
+ err += vmcb_snapshot_any(sc, vcpu,
+ VMCB_ACCESS(VMCB_OFF_AVIC_LT, 8), meta);
+ err += vmcb_snapshot_any(sc, vcpu,
+ VMCB_ACCESS(VMCB_OFF_AVIC_PT, 8), meta);
+
+ err += vmcb_snapshot_any(sc, vcpu,
+ VMCB_ACCESS(VMCB_OFF_IO_PERM, 8), meta);
+ err += vmcb_snapshot_any(sc, vcpu,
+ VMCB_ACCESS(VMCB_OFF_MSR_PERM, 8), meta);
+
+ err += vmcb_snapshot_any(sc, vcpu,
+ VMCB_ACCESS(VMCB_OFF_ASID, 4), meta);
+
+ err += vmcb_snapshot_any(sc, vcpu,
+ VMCB_ACCESS(VMCB_OFF_EXIT_REASON, 8), meta);
+
+ err += svm_snapshot_reg(sc, vcpu, VM_REG_GUEST_INTR_SHADOW, meta);
+
+ return (err);
+}
+
+static int
+svm_restore_tsc(void *arg, int vcpu, uint64_t offset)
+{
+ int err;
+
+ err = svm_set_tsc_offset(arg, vcpu, offset);
+
+ return (err);
+}
+
struct vmm_ops vmm_ops_amd = {
svm_init,
svm_cleanup,
@@ -2296,5 +2640,8 @@
svm_npt_alloc,
svm_npt_free,
svm_vlapic_init,
- svm_vlapic_cleanup
+ svm_vlapic_cleanup,
+ svm_snapshot_vmi,
+ svm_snapshot_vmcx,
+ svm_restore_tsc,
};
Index: sys/amd64/vmm/amd/svm_msr.c
===================================================================
--- sys/amd64/vmm/amd/svm_msr.c
+++ sys/amd64/vmm/amd/svm_msr.c
@@ -162,6 +162,8 @@
* Ignore writes to microcode update register.
*/
break;
+ case MSR_TSC:
+ error = svm_set_tsc_offset(sc, vcpu, val - rdtsc());
case MSR_EXTFEATURES:
break;
default:
Index: sys/amd64/vmm/amd/vmcb.h
===================================================================
--- sys/amd64/vmm/amd/vmcb.h
+++ sys/amd64/vmm/amd/vmcb.h
@@ -209,6 +209,9 @@
#define VMCB_ACCESS_OFFSET(v) ((v) & 0xFFF)
#ifdef _KERNEL
+
+struct vm_snapshot_meta;
+
/* VMCB save state area segment format */
struct vmcb_segment {
uint16_t selector;
@@ -331,6 +334,12 @@
int vmcb_setdesc(void *arg, int vcpu, int ident, struct seg_desc *desc);
int vmcb_getdesc(void *arg, int vcpu, int ident, struct seg_desc *desc);
int vmcb_seg(struct vmcb *vmcb, int ident, struct vmcb_segment *seg);
+int vmcb_getany(struct svm_softc *sc, int vcpu, int ident, uint64_t *val);
+int vmcb_setany(struct svm_softc *sc, int vcpu, int ident, uint64_t val);
+int vmcb_snapshot_desc(void *arg, int vcpu, int reg,
+ struct vm_snapshot_meta *meta);
+int vmcb_snapshot_any(struct svm_softc *sc, int vcpu, int ident,
+ struct vm_snapshot_meta *meta);
#endif /* _KERNEL */
#endif /* _VMCB_H_ */
Index: sys/amd64/vmm/amd/vmcb.c
===================================================================
--- sys/amd64/vmm/amd/vmcb.c
+++ sys/amd64/vmm/amd/vmcb.c
@@ -35,6 +35,7 @@
#include <machine/segments.h>
#include <machine/specialreg.h>
#include <machine/vmm.h>
+#include <machine/vmm_snapshot.h>
#include "vmm_ktr.h"
@@ -452,3 +453,104 @@
return (0);
}
+
+int
+vmcb_getany(struct svm_softc *sc, int vcpu, int ident, uint64_t *val)
+{
+ int error = 0;
+
+ if (vcpu < 0 || vcpu >= VM_MAXCPU) {
+ error = EINVAL;
+ goto err;
+ }
+
+ if (ident >= VM_REG_LAST) {
+ error = EINVAL;
+ goto err;
+ }
+
+ error = vm_get_register(sc->vm, vcpu, ident, val);
+
+err:
+ return (error);
+}
+
+int
+vmcb_setany(struct svm_softc *sc, int vcpu, int ident, uint64_t val)
+{
+ int error = 0;
+
+ if (vcpu < 0 || vcpu >= VM_MAXCPU) {
+ error = EINVAL;
+ goto err;
+ }
+
+ if (ident >= VM_REG_LAST) {
+ error = EINVAL;
+ goto err;
+ }
+
+ error = vm_set_register(sc->vm, vcpu, ident, val);
+
+err:
+ return (error);
+}
+
+int
+vmcb_snapshot_desc(void *arg, int vcpu, int reg, struct vm_snapshot_meta *meta)
+{
+ int ret;
+ struct seg_desc desc;
+
+ if (meta->op == VM_SNAPSHOT_SAVE) {
+ ret = vmcb_getdesc(arg, vcpu, reg, &desc);
+ if (ret != 0)
+ goto done;
+
+ SNAPSHOT_VAR_OR_LEAVE(desc.base, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(desc.limit, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(desc.access, meta, ret, done);
+ } else if (meta->op == VM_SNAPSHOT_RESTORE) {
+ SNAPSHOT_VAR_OR_LEAVE(desc.base, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(desc.limit, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(desc.access, meta, ret, done);
+
+ ret = vmcb_setdesc(arg, vcpu, reg, &desc);
+ if (ret != 0)
+ goto done;
+ } else {
+ ret = EINVAL;
+ goto done;
+ }
+
+done:
+ return (ret);
+}
+
+int
+vmcb_snapshot_any(struct svm_softc *sc, int vcpu, int ident,
+ struct vm_snapshot_meta *meta)
+{
+ int ret;
+ uint64_t val;
+
+ if (meta->op == VM_SNAPSHOT_SAVE) {
+ ret = vmcb_getany(sc, vcpu, ident, &val);
+ if (ret != 0)
+ goto done;
+
+ SNAPSHOT_VAR_OR_LEAVE(val, meta, ret, done);
+ } else if (meta->op == VM_SNAPSHOT_RESTORE) {
+ SNAPSHOT_VAR_OR_LEAVE(val, meta, ret, done);
+
+ ret = vmcb_setany(sc, vcpu, ident, val);
+ if (ret != 0)
+ goto done;
+ } else {
+ ret = EINVAL;
+ goto done;
+ }
+
+done:
+ return (ret);
+}
Index: sys/amd64/vmm/intel/vmcs.h
===================================================================
--- sys/amd64/vmm/intel/vmcs.h
+++ sys/amd64/vmm/intel/vmcs.h
@@ -32,6 +32,9 @@
#define _VMCS_H_
#ifdef _KERNEL
+
+struct vm_snapshot_meta;
+
struct vmcs {
uint32_t identifier;
uint32_t abort_code;
@@ -55,6 +58,14 @@
struct seg_desc *desc);
int vmcs_setdesc(struct vmcs *vmcs, int running, int ident,
struct seg_desc *desc);
+int vmcs_getany(struct vmcs *vmcs, int running, int ident, uint64_t *val);
+int vmcs_setany(struct vmcs *vmcs, int running, int ident, uint64_t val);
+int vmcs_snapshot_reg(struct vmcs *vmcs, int running, int ident,
+ struct vm_snapshot_meta *meta);
+int vmcs_snapshot_desc(struct vmcs *vmcs, int running, int seg,
+ struct vm_snapshot_meta *meta);
+int vmcs_snapshot_any(struct vmcs *vmcs, int running, int ident,
+ struct vm_snapshot_meta *meta);
/*
* Avoid header pollution caused by inline use of 'vtophys()' in vmx_cpufunc.h
Index: sys/amd64/vmm/intel/vmcs.c
===================================================================
--- sys/amd64/vmm/intel/vmcs.c
+++ sys/amd64/vmm/intel/vmcs.c
@@ -43,6 +43,7 @@
#include <machine/segments.h>
#include <machine/vmm.h>
+#include <machine/vmm_snapshot.h>
#include "vmm_host.h"
#include "vmx_cpufunc.h"
#include "vmcs.h"
@@ -428,6 +429,126 @@
return (error);
}
+int
+vmcs_getany(struct vmcs *vmcs, int running, int ident, uint64_t *val)
+{
+ int error;
+
+ if (!running)
+ VMPTRLD(vmcs);
+
+ error = vmread(ident, val);
+
+ if (!running)
+ VMCLEAR(vmcs);
+
+ return (error);
+}
+
+int
+vmcs_setany(struct vmcs *vmcs, int running, int ident, uint64_t val)
+{
+ int error;
+
+ if (!running)
+ VMPTRLD(vmcs);
+
+ error = vmwrite(ident, val);
+
+ if (!running)
+ VMCLEAR(vmcs);
+
+ return (error);
+}
+
+int
+vmcs_snapshot_reg(struct vmcs *vmcs, int running, int ident,
+ struct vm_snapshot_meta *meta)
+{
+ int ret;
+ uint64_t val;
+
+ if (meta->op == VM_SNAPSHOT_SAVE) {
+ ret = vmcs_getreg(vmcs, running, ident, &val);
+ if (ret != 0)
+ goto done;
+
+ SNAPSHOT_VAR_OR_LEAVE(val, meta, ret, done);
+ } else if (meta->op == VM_SNAPSHOT_RESTORE) {
+ SNAPSHOT_VAR_OR_LEAVE(val, meta, ret, done);
+
+ ret = vmcs_setreg(vmcs, running, ident, val);
+ if (ret != 0)
+ goto done;
+ } else {
+ ret = EINVAL;
+ goto done;
+ }
+
+done:
+ return (ret);
+}
+
+int
+vmcs_snapshot_desc(struct vmcs *vmcs, int running, int seg,
+ struct vm_snapshot_meta *meta)
+{
+ int ret;
+ struct seg_desc desc;
+
+ if (meta->op == VM_SNAPSHOT_SAVE) {
+ ret = vmcs_getdesc(vmcs, running, seg, &desc);
+ if (ret != 0)
+ goto done;
+
+ SNAPSHOT_VAR_OR_LEAVE(desc.base, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(desc.limit, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(desc.access, meta, ret, done);
+ } else if (meta->op == VM_SNAPSHOT_RESTORE) {
+ SNAPSHOT_VAR_OR_LEAVE(desc.base, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(desc.limit, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(desc.access, meta, ret, done);
+
+ ret = vmcs_setdesc(vmcs, running, seg, &desc);
+ if (ret != 0)
+ goto done;
+ } else {
+ ret = EINVAL;
+ goto done;
+ }
+
+done:
+ return (ret);
+}
+
+int
+vmcs_snapshot_any(struct vmcs *vmcs, int running, int ident,
+ struct vm_snapshot_meta *meta)
+{
+ int ret;
+ uint64_t val;
+
+ if (meta->op == VM_SNAPSHOT_SAVE) {
+ ret = vmcs_getany(vmcs, running, ident, &val);
+ if (ret != 0)
+ goto done;
+
+ SNAPSHOT_VAR_OR_LEAVE(val, meta, ret, done);
+ } else if (meta->op == VM_SNAPSHOT_RESTORE) {
+ SNAPSHOT_VAR_OR_LEAVE(val, meta, ret, done);
+
+ ret = vmcs_setany(vmcs, running, ident, val);
+ if (ret != 0)
+ goto done;
+ } else {
+ ret = EINVAL;
+ goto done;
+ }
+
+done:
+ return (ret);
+}
+
#ifdef DDB
extern int vmxon_enabled[];
Index: sys/amd64/vmm/intel/vmx.c
===================================================================
--- sys/amd64/vmm/intel/vmx.c
+++ sys/amd64/vmm/intel/vmx.c
@@ -56,6 +56,8 @@
#include <machine/vmm.h>
#include <machine/vmm_dev.h>
#include <machine/vmm_instruction_emul.h>
+#include <machine/vmm_snapshot.h>
+
#include "vmm_lapic.h"
#include "vmm_host.h"
#include "vmm_ioport.h"
@@ -288,6 +290,7 @@
static int vmx_getreg(void *arg, int vcpu, int reg, uint64_t *retval);
static int vmxctx_setreg(struct vmxctx *vmxctx, int reg, uint64_t val);
static void vmx_inject_pir(struct vlapic *vlapic);
+static int vmx_restore_tsc(void *arg, int vcpu, uint64_t now);
#ifdef KTR
static const char *
@@ -1279,7 +1282,12 @@
}
error = vmwrite(VMCS_TSC_OFFSET, offset);
+ if (error != 0)
+ goto done;
+
+ error = vm_set_tsc_offset(vmx->vm, vcpu, offset);
+done:
return (error);
}
@@ -2823,6 +2831,10 @@
write_rflags(read_rflags() | vmxctx->host_tf);
}
+/*
+ * XXX
+ * Added old_vmcs and old_regs to vmx_run to test guest vcpu saving
+ */
static int
vmx_run(void *arg, int vcpu, register_t rip, pmap_t pmap,
struct vm_eventinfo *evinfo)
@@ -2863,8 +2875,12 @@
*/
vmcs_write(VMCS_HOST_CR3, rcr3());
+ /*
+ * XXX If we restore a VM we use the rip saved in the vmcs
+ */
vmcs_write(VMCS_GUEST_RIP, rip);
vmx_set_pcpu_defaults(vmx, vcpu, pmap);
+
do {
KASSERT(vmcs_guest_rip() == rip, ("%s: vmcs guest rip mismatch "
"%#lx/%#lx", __func__, vmcs_guest_rip(), rip));
@@ -3785,6 +3801,138 @@
free(vlapic, M_VLAPIC);
}
+static int
+vmx_snapshot_vmi(void *arg, struct vm_snapshot_meta *meta)
+{
+ struct vmx *vmx;
+ struct vmxctx *vmxctx;
+ struct pmap *new_pmap;
+ int i;
+ int ret;
+
+ vmx = arg;
+
+ KASSERT(vmx != NULL, ("%s: arg was NULL", __func__));
+
+ for (i = 0; i < VM_MAXCPU; i++) {
+ SNAPSHOT_BUF_OR_LEAVE(vmx->guest_msrs[i],
+ sizeof(vmx->guest_msrs[i]), meta, ret,
+ done);
+
+ vmxctx = &vmx->ctx[i];
+
+ new_pmap = vmxctx->pmap;
+ SNAPSHOT_BUF_OR_LEAVE(vmxctx, sizeof(*vmxctx), meta, ret, done);
+ vmxctx->pmap = new_pmap;
+ vmx->eptgen[i] = new_pmap->pm_eptgen - 1;
+ }
+
+done:
+ return (0);
+}
+
+static int
+vmx_snapshot_vmcx(void *arg, struct vm_snapshot_meta *meta, int vcpu)
+{
+ struct vmcs *vmcs;
+ struct vmx *vmx;
+ int err, run, hostcpu;
+
+ vmx = (struct vmx *)arg;
+ err = 0;
+
+ KASSERT(arg != NULL, ("%s: arg was NULL", __func__));
+ vmcs = &vmx->vmcs[vcpu];
+
+ run = vcpu_is_running(vmx->vm, vcpu, &hostcpu);
+ if (run && hostcpu != curcpu) {
+ printf("%s: %s%d is running", __func__, vm_name(vmx->vm), vcpu);
+ return (EINVAL);
+ }
+
+ err += vmcs_snapshot_reg(vmcs, run, VM_REG_GUEST_CR0, meta);
+ err += vmcs_snapshot_reg(vmcs, run, VM_REG_GUEST_CR3, meta);
+ err += vmcs_snapshot_reg(vmcs, run, VM_REG_GUEST_CR4, meta);
+ err += vmcs_snapshot_reg(vmcs, run, VM_REG_GUEST_DR7, meta);
+ err += vmcs_snapshot_reg(vmcs, run, VM_REG_GUEST_RSP, meta);
+ err += vmcs_snapshot_reg(vmcs, run, VM_REG_GUEST_RIP, meta);
+ err += vmcs_snapshot_reg(vmcs, run, VM_REG_GUEST_RFLAGS, meta);
+
+ /* Guest segments */
+ err += vmcs_snapshot_reg(vmcs, run, VM_REG_GUEST_ES, meta);
+ err += vmcs_snapshot_desc(vmcs, run, VM_REG_GUEST_ES, meta);
+
+ err += vmcs_snapshot_reg(vmcs, run, VM_REG_GUEST_CS, meta);
+ err += vmcs_snapshot_desc(vmcs, run, VM_REG_GUEST_CS, meta);
+
+ err += vmcs_snapshot_reg(vmcs, run, VM_REG_GUEST_SS, meta);
+ err += vmcs_snapshot_desc(vmcs, run, VM_REG_GUEST_SS, meta);
+
+ err += vmcs_snapshot_reg(vmcs, run, VM_REG_GUEST_DS, meta);
+ err += vmcs_snapshot_desc(vmcs, run, VM_REG_GUEST_DS, meta);
+
+ err += vmcs_snapshot_reg(vmcs, run, VM_REG_GUEST_FS, meta);
+ err += vmcs_snapshot_desc(vmcs, run, VM_REG_GUEST_FS, meta);
+
+ err += vmcs_snapshot_reg(vmcs, run, VM_REG_GUEST_GS, meta);
+ err += vmcs_snapshot_desc(vmcs, run, VM_REG_GUEST_GS, meta);
+
+ err += vmcs_snapshot_reg(vmcs, run, VM_REG_GUEST_TR, meta);
+ err += vmcs_snapshot_desc(vmcs, run, VM_REG_GUEST_TR, meta);
+
+ err += vmcs_snapshot_reg(vmcs, run, VM_REG_GUEST_LDTR, meta);
+ err += vmcs_snapshot_desc(vmcs, run, VM_REG_GUEST_LDTR, meta);
+
+ err += vmcs_snapshot_reg(vmcs, run, VM_REG_GUEST_EFER, meta);
+
+ err += vmcs_snapshot_desc(vmcs, run, VM_REG_GUEST_IDTR, meta);
+ err += vmcs_snapshot_desc(vmcs, run, VM_REG_GUEST_GDTR, meta);
+
+ /* Guest page tables */
+ err += vmcs_snapshot_reg(vmcs, run, VM_REG_GUEST_PDPTE0, meta);
+ err += vmcs_snapshot_reg(vmcs, run, VM_REG_GUEST_PDPTE1, meta);
+ err += vmcs_snapshot_reg(vmcs, run, VM_REG_GUEST_PDPTE2, meta);
+ err += vmcs_snapshot_reg(vmcs, run, VM_REG_GUEST_PDPTE3, meta);
+
+ /* Other guest state */
+ err += vmcs_snapshot_any(vmcs, run, VMCS_GUEST_IA32_SYSENTER_CS, meta);
+ err += vmcs_snapshot_any(vmcs, run, VMCS_GUEST_IA32_SYSENTER_ESP, meta);
+ err += vmcs_snapshot_any(vmcs, run, VMCS_GUEST_IA32_SYSENTER_EIP, meta);
+ err += vmcs_snapshot_any(vmcs, run, VMCS_GUEST_INTERRUPTIBILITY, meta);
+ err += vmcs_snapshot_any(vmcs, run, VMCS_GUEST_ACTIVITY, meta);
+ err += vmcs_snapshot_any(vmcs, run, VMCS_GUEST_IA32_EFER, meta);
+ err += vmcs_snapshot_any(vmcs, run, VMCS_ENTRY_CTLS, meta);
+ err += vmcs_snapshot_any(vmcs, run, VMCS_EXIT_CTLS, meta);
+
+ return (err);
+}
+
+static int
+vmx_restore_tsc(void *arg, int vcpu, uint64_t offset)
+{
+ struct vmcs *vmcs;
+ struct vmx *vmx = (struct vmx *)arg;
+ int error, running, hostcpu;
+
+ KASSERT(arg != NULL, ("%s: arg was NULL", __func__));
+ vmcs = &vmx->vmcs[vcpu];
+
+ running = vcpu_is_running(vmx->vm, vcpu, &hostcpu);
+ if (running && hostcpu != curcpu) {
+ printf("%s: %s%d is running", __func__, vm_name(vmx->vm), vcpu);
+ return (EINVAL);
+ }
+
+ if (!running)
+ VMPTRLD(vmcs);
+
+ error = vmx_set_tsc_offset(vmx, vcpu, offset);
+
+ if (!running)
+ VMCLEAR(vmcs);
+ return (error);
+}
+
struct vmm_ops vmm_ops_intel = {
vmx_init,
vmx_cleanup,
@@ -3802,4 +3950,7 @@
ept_vmspace_free,
vmx_vlapic_init,
vmx_vlapic_cleanup,
+ vmx_snapshot_vmi,
+ vmx_snapshot_vmcx,
+ vmx_restore_tsc,
};
Index: sys/amd64/vmm/io/vatpic.h
===================================================================
--- sys/amd64/vmm/io/vatpic.h
+++ sys/amd64/vmm/io/vatpic.h
@@ -36,6 +36,8 @@
#define IO_ELCR1 0x4d0
#define IO_ELCR2 0x4d1
+struct vm_snapshot_meta;
+
struct vatpic *vatpic_init(struct vm *vm);
void vatpic_cleanup(struct vatpic *vatpic);
@@ -54,4 +56,6 @@
void vatpic_pending_intr(struct vm *vm, int *vecptr);
void vatpic_intr_accepted(struct vm *vm, int vector);
+int vatpic_snapshot(struct vatpic *vatpic, struct vm_snapshot_meta *meta);
+
#endif /* _VATPIC_H_ */
Index: sys/amd64/vmm/io/vatpic.c
===================================================================
--- sys/amd64/vmm/io/vatpic.c
+++ sys/amd64/vmm/io/vatpic.c
@@ -42,6 +42,7 @@
#include <dev/ic/i8259.h>
#include <machine/vmm.h>
+#include <machine/vmm_snapshot.h>
#include "vmm_ktr.h"
#include "vmm_lapic.h"
@@ -808,3 +809,41 @@
{
free(vatpic, M_VATPIC);
}
+
+int
+vatpic_snapshot(struct vatpic *vatpic, struct vm_snapshot_meta *meta)
+{
+ int ret;
+ int i;
+ struct atpic *atpic;
+
+ for (i = 0; i < nitems(vatpic->atpic); i++) {
+ atpic = &vatpic->atpic[i];
+
+ SNAPSHOT_VAR_OR_LEAVE(atpic->ready, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(atpic->icw_num, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(atpic->rd_cmd_reg, meta, ret, done);
+
+ SNAPSHOT_VAR_OR_LEAVE(atpic->aeoi, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(atpic->poll, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(atpic->rotate, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(atpic->sfn, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(atpic->irq_base, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(atpic->request, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(atpic->service, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(atpic->mask, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(atpic->smm, meta, ret, done);
+
+ SNAPSHOT_BUF_OR_LEAVE(atpic->acnt, sizeof(atpic->acnt),
+ meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(atpic->lowprio, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(atpic->intr_raised, meta, ret, done);
+
+ }
+
+ SNAPSHOT_BUF_OR_LEAVE(vatpic->elc, sizeof(vatpic->elc),
+ meta, ret, done);
+
+done:
+ return (ret);
+}
Index: sys/amd64/vmm/io/vatpit.h
===================================================================
--- sys/amd64/vmm/io/vatpit.h
+++ sys/amd64/vmm/io/vatpit.h
@@ -36,6 +36,8 @@
#define NMISC_PORT 0x61
+struct vm_snapshot_meta;
+
struct vatpit *vatpit_init(struct vm *vm);
void vatpit_cleanup(struct vatpit *vatpit);
@@ -43,5 +45,6 @@
uint32_t *eax);
int vatpit_nmisc_handler(struct vm *vm, int vcpuid, bool in, int port,
int bytes, uint32_t *eax);
+int vatpit_snapshot(struct vatpit *vatpit, struct vm_snapshot_meta *meta);
#endif /* _VATPIT_H_ */
Index: sys/amd64/vmm/io/vatpit.c
===================================================================
--- sys/amd64/vmm/io/vatpit.c
+++ sys/amd64/vmm/io/vatpit.c
@@ -38,6 +38,7 @@
#include <sys/systm.h>
#include <machine/vmm.h>
+#include <machine/vmm_snapshot.h>
#include "vmm_ktr.h"
#include "vatpic.h"
@@ -455,3 +456,36 @@
free(vatpit, M_VATPIT);
}
+
+int
+vatpit_snapshot(struct vatpit *vatpit, struct vm_snapshot_meta *meta)
+{
+ int ret;
+ int i;
+ struct channel *channel;
+
+ SNAPSHOT_VAR_OR_LEAVE(vatpit->freq_sbt, meta, ret, done);
+
+ /* properly restore timers; they will NOT work currently */
+ printf("%s: snapshot restore does not reset timers!\r\n", __func__);
+
+ for (i = 0; i < nitems(vatpit->channel); i++) {
+ channel = &vatpit->channel[i];
+
+ SNAPSHOT_VAR_OR_LEAVE(channel->mode, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(channel->initial, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(channel->now_sbt, meta, ret, done);
+ SNAPSHOT_BUF_OR_LEAVE(channel->cr, sizeof(channel->cr),
+ meta, ret, done);
+ SNAPSHOT_BUF_OR_LEAVE(channel->ol, sizeof(channel->ol),
+ meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(channel->slatched, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(channel->status, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(channel->crbyte, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(channel->frbyte, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(channel->callout_sbt, meta, ret, done);
+ }
+
+done:
+ return (ret);
+}
Index: sys/amd64/vmm/io/vhpet.h
===================================================================
--- sys/amd64/vmm/io/vhpet.h
+++ sys/amd64/vmm/io/vhpet.h
@@ -35,6 +35,8 @@
#define VHPET_BASE 0xfed00000
#define VHPET_SIZE 1024
+struct vm_snapshot_meta;
+
struct vhpet *vhpet_init(struct vm *vm);
void vhpet_cleanup(struct vhpet *vhpet);
int vhpet_mmio_write(void *vm, int vcpuid, uint64_t gpa, uint64_t val,
@@ -42,5 +44,7 @@
int vhpet_mmio_read(void *vm, int vcpuid, uint64_t gpa, uint64_t *val,
int size, void *arg);
int vhpet_getcap(struct vm_hpet_cap *cap);
+int vhpet_snapshot(struct vhpet *vhpet, struct vm_snapshot_meta *meta);
+int vhpet_restore_time(struct vhpet *vhpet);
#endif /* _VHPET_H_ */
Index: sys/amd64/vmm/io/vhpet.c
===================================================================
--- sys/amd64/vmm/io/vhpet.c
+++ sys/amd64/vmm/io/vhpet.c
@@ -43,6 +43,7 @@
#include <machine/vmm.h>
#include <machine/vmm_dev.h>
+#include <machine/vmm_snapshot.h>
#include "vmm_lapic.h"
#include "vatpic.h"
@@ -761,3 +762,47 @@
cap->capabilities = vhpet_capabilities();
return (0);
}
+
+int
+vhpet_snapshot(struct vhpet *vhpet, struct vm_snapshot_meta *meta)
+{
+ int i, ret;
+ uint32_t countbase;
+
+ SNAPSHOT_VAR_OR_LEAVE(vhpet->freq_sbt, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vhpet->config, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vhpet->isr, meta, ret, done);
+
+ /* at restore time the countbase should have the value it had when the
+ * snapshot was created; since the value is not directly kept in
+ * vhpet->countbase, but rather computed relative to the current system
+ * uptime using countbase_sbt, save the value retured by vhpet_counter
+ */
+ if (meta->op == VM_SNAPSHOT_SAVE)
+ countbase = vhpet_counter(vhpet, NULL);
+ SNAPSHOT_VAR_OR_LEAVE(countbase, meta, ret, done);
+ if (meta->op == VM_SNAPSHOT_RESTORE)
+ vhpet->countbase = countbase;
+
+ for (i = 0; i < nitems(vhpet->timer); i++) {
+ SNAPSHOT_VAR_OR_LEAVE(vhpet->timer[i].cap_config,
+ meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vhpet->timer[i].msireg, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vhpet->timer[i].compval, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vhpet->timer[i].comprate, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vhpet->timer[i].callout_sbt,
+ meta, ret, done);
+ }
+
+done:
+ return (ret);
+}
+
+int
+vhpet_restore_time(struct vhpet *vhpet)
+{
+ if (vhpet_counter_enabled(vhpet))
+ vhpet_start_counting(vhpet);
+
+ return (0);
+}
Index: sys/amd64/vmm/io/vioapic.h
===================================================================
--- sys/amd64/vmm/io/vioapic.h
+++ sys/amd64/vmm/io/vioapic.h
@@ -32,6 +32,8 @@
#ifndef _VIOAPIC_H_
#define _VIOAPIC_H_
+struct vm_snapshot_meta;
+
#define VIOAPIC_BASE 0xFEC00000
#define VIOAPIC_SIZE 4096
@@ -49,4 +51,6 @@
int vioapic_pincount(struct vm *vm);
void vioapic_process_eoi(struct vm *vm, int vcpuid, int vector);
+int vioapic_snapshot(struct vioapic *vioapic,
+ struct vm_snapshot_meta *meta);
#endif
Index: sys/amd64/vmm/io/vioapic.c
===================================================================
--- sys/amd64/vmm/io/vioapic.c
+++ sys/amd64/vmm/io/vioapic.c
@@ -42,6 +42,7 @@
#include <x86/apicreg.h>
#include <machine/vmm.h>
+#include <machine/vmm_snapshot.h>
#include "vmm_ktr.h"
#include "vmm_lapic.h"
@@ -499,3 +500,20 @@
return (REDIR_ENTRIES);
}
+
+int
+vioapic_snapshot(struct vioapic *vioapic, struct vm_snapshot_meta *meta)
+{
+ int ret;
+ int i;
+
+ SNAPSHOT_VAR_OR_LEAVE(vioapic->ioregsel, meta, ret, done);
+
+ for (i = 0; i < nitems(vioapic->rtbl); i++) {
+ SNAPSHOT_VAR_OR_LEAVE(vioapic->rtbl[i].reg, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vioapic->rtbl[i].acnt, meta, ret, done);
+ }
+
+done:
+ return (ret);
+}
Index: sys/amd64/vmm/io/vlapic.h
===================================================================
--- sys/amd64/vmm/io/vlapic.h
+++ sys/amd64/vmm/io/vlapic.h
@@ -32,6 +32,7 @@
#define _VLAPIC_H_
struct vm;
+struct vm_snapshot_meta;
enum x2apic_state;
int vlapic_write(struct vlapic *vlapic, int mmio_access, uint64_t offset,
@@ -108,4 +109,9 @@
void vlapic_dcr_write_handler(struct vlapic *vlapic);
void vlapic_lvt_write_handler(struct vlapic *vlapic, uint32_t offset);
void vlapic_self_ipi_handler(struct vlapic *vlapic, uint64_t val);
+
+struct LAPIC *vlapic_get_LAPIC(struct vlapic *vlapic);
+int vlapic_snapshot(struct vm *vm, struct vm_snapshot_meta *meta);
+int vlapic_lapic_snapshot(struct vm *vm, struct vm_snapshot_meta *meta);
+
#endif /* _VLAPIC_H_ */
Index: sys/amd64/vmm/io/vlapic.c
===================================================================
--- sys/amd64/vmm/io/vlapic.c
+++ sys/amd64/vmm/io/vlapic.c
@@ -46,6 +46,7 @@
#include <machine/smp.h>
#include <machine/vmm.h>
+#include <machine/vmm_snapshot.h>
#include "vmm_lapic.h"
#include "vmm_ktr.h"
@@ -1654,3 +1655,110 @@
VLAPIC_CTR1(vlapic, "vector %d set to level-triggered", vector);
vlapic_set_tmr(vlapic, vector, true);
}
+
+struct LAPIC*
+vlapic_get_LAPIC(struct vlapic *vlapic)
+{
+ return vlapic->apic_page;
+}
+
+static void
+vlapic_reset_callout(struct vlapic *vlapic, uint32_t ccr)
+{
+ /* The implementation is similar to the one in the
+ * `vlapic_icrtmr_write_handler` function
+ */
+ sbintime_t sbt;
+ struct bintime bt;
+
+ VLAPIC_TIMER_LOCK(vlapic);
+
+ bt = vlapic->timer_freq_bt;
+ bintime_mul(&bt, ccr);
+
+ if (ccr != 0) {
+ binuptime(&vlapic->timer_fire_bt);
+ bintime_add(&vlapic->timer_fire_bt, &bt);
+
+ sbt = bttosbt(bt);
+ callout_reset_sbt(&vlapic->callout, sbt, 0,
+ vlapic_callout_handler, vlapic, 0);
+ } else {
+ /* even if the CCR was 0, periodic timers should be reset */
+ if (vlapic_periodic_timer(vlapic)) {
+ binuptime(&vlapic->timer_fire_bt);
+ bintime_add(&vlapic->timer_fire_bt,
+ &vlapic->timer_period_bt);
+ sbt = bttosbt(vlapic->timer_period_bt);
+
+ callout_stop(&vlapic->callout);
+ callout_reset_sbt(&vlapic->callout, sbt, 0,
+ vlapic_callout_handler, vlapic, 0);
+ }
+ }
+
+ VLAPIC_TIMER_UNLOCK(vlapic);
+}
+
+int
+vlapic_snapshot(struct vm *vm, struct vm_snapshot_meta *meta)
+{
+ int i, ret;
+ struct vlapic *vlapic;
+ struct LAPIC *lapic;
+ uint32_t ccr;
+
+ KASSERT(vm != NULL, ("%s: arg was NULL", __func__));
+
+ ret = 0;
+
+ for (i = 0; i < VM_MAXCPU; i++) {
+ vlapic = vm_lapic(vm, i);
+
+ /* snapshot the page first; timer period depends on icr_timer */
+ lapic = vlapic->apic_page;
+ SNAPSHOT_BUF_OR_LEAVE(lapic, PAGE_SIZE, meta, ret, done);
+
+ SNAPSHOT_VAR_OR_LEAVE(vlapic->esr_pending, meta, ret, done);
+
+ SNAPSHOT_VAR_OR_LEAVE(vlapic->timer_freq_bt.sec,
+ meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vlapic->timer_freq_bt.frac,
+ meta, ret, done);
+
+ /*
+ * Timer period is equal to 'icr_timer' ticks at a frequency of
+ * 'timer_freq_bt'.
+ */
+ if (meta->op == VM_SNAPSHOT_RESTORE) {
+ vlapic->timer_period_bt = vlapic->timer_freq_bt;
+ bintime_mul(&vlapic->timer_period_bt, lapic->icr_timer);
+ }
+
+ SNAPSHOT_BUF_OR_LEAVE(vlapic->isrvec_stk,
+ sizeof(vlapic->isrvec_stk),
+ meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vlapic->isrvec_stk_top, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vlapic->boot_state, meta, ret, done);
+
+ SNAPSHOT_BUF_OR_LEAVE(vlapic->lvt_last,
+ sizeof(vlapic->lvt_last),
+ meta, ret, done);
+
+ if (meta->op == VM_SNAPSHOT_SAVE)
+ ccr = vlapic_get_ccr(vlapic);
+
+ SNAPSHOT_VAR_OR_LEAVE(ccr, meta, ret, done);
+
+ if (meta->op == VM_SNAPSHOT_RESTORE) {
+ /* Reset the value of the 'timer_fire_bt' and the vlapic
+ * callout based on the value of the current count
+ * register saved when the VM snapshot was created
+ */
+ vlapic_reset_callout(vlapic, ccr);
+ }
+ }
+
+done:
+ return (ret);
+}
Index: sys/amd64/vmm/io/vpmtmr.h
===================================================================
--- sys/amd64/vmm/io/vpmtmr.h
+++ sys/amd64/vmm/io/vpmtmr.h
@@ -34,6 +34,7 @@
#define IO_PMTMR 0x408
struct vpmtmr;
+struct vm_snapshot_meta;
struct vpmtmr *vpmtmr_init(struct vm *vm);
void vpmtmr_cleanup(struct vpmtmr *pmtmr);
@@ -41,4 +42,6 @@
int vpmtmr_handler(struct vm *vm, int vcpuid, bool in, int port, int bytes,
uint32_t *val);
+int vpmtmr_snapshot(struct vpmtmr *vpmtmr, struct vm_snapshot_meta *meta);
+
#endif
Index: sys/amd64/vmm/io/vpmtmr.c
===================================================================
--- sys/amd64/vmm/io/vpmtmr.c
+++ sys/amd64/vmm/io/vpmtmr.c
@@ -36,6 +36,7 @@
#include <sys/systm.h>
#include <machine/vmm.h>
+#include <machine/vmm_snapshot.h>
#include "vpmtmr.h"
@@ -103,3 +104,14 @@
return (0);
}
+
+int
+vpmtmr_snapshot(struct vpmtmr *vpmtmr, struct vm_snapshot_meta *meta)
+{
+ int ret;
+
+ SNAPSHOT_VAR_OR_LEAVE(vpmtmr->baseval, meta, ret, done);
+
+done:
+ return (ret);
+}
Index: sys/amd64/vmm/io/vrtc.h
===================================================================
--- sys/amd64/vmm/io/vrtc.h
+++ sys/amd64/vmm/io/vrtc.h
@@ -34,6 +34,7 @@
#include <isa/isareg.h>
struct vrtc;
+struct vm_snapshot_meta;
struct vrtc *vrtc_init(struct vm *vm);
void vrtc_cleanup(struct vrtc *vrtc);
@@ -49,4 +50,6 @@
int vrtc_data_handler(struct vm *vm, int vcpuid, bool in, int port, int bytes,
uint32_t *val);
+int vrtc_snapshot(struct vrtc *vrtc, struct vm_snapshot_meta *meta);
+
#endif
Index: sys/amd64/vmm/io/vrtc.c
===================================================================
--- sys/amd64/vmm/io/vrtc.c
+++ sys/amd64/vmm/io/vrtc.c
@@ -40,6 +40,7 @@
#include <sys/sysctl.h>
#include <machine/vmm.h>
+#include <machine/vmm_snapshot.h>
#include <isa/rtc.h>
@@ -1019,3 +1020,43 @@
callout_drain(&vrtc->callout);
free(vrtc, M_VRTC);
}
+
+int
+vrtc_snapshot(struct vrtc *vrtc, struct vm_snapshot_meta *meta)
+{
+ int ret;
+
+ VRTC_LOCK(vrtc);
+
+ SNAPSHOT_VAR_OR_LEAVE(vrtc->addr, meta, ret, done);
+ if (meta->op == VM_SNAPSHOT_RESTORE)
+ vrtc->base_uptime = sbinuptime();
+ SNAPSHOT_VAR_OR_LEAVE(vrtc->base_rtctime, meta, ret, done);
+
+ SNAPSHOT_VAR_OR_LEAVE(vrtc->rtcdev.sec, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vrtc->rtcdev.alarm_sec, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vrtc->rtcdev.min, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vrtc->rtcdev.alarm_min, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vrtc->rtcdev.hour, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vrtc->rtcdev.alarm_hour, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vrtc->rtcdev.day_of_week, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vrtc->rtcdev.day_of_month, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vrtc->rtcdev.month, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vrtc->rtcdev.year, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vrtc->rtcdev.reg_a, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vrtc->rtcdev.reg_b, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vrtc->rtcdev.reg_c, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vrtc->rtcdev.reg_d, meta, ret, done);
+ SNAPSHOT_BUF_OR_LEAVE(vrtc->rtcdev.nvram, sizeof(vrtc->rtcdev.nvram),
+ meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vrtc->rtcdev.century, meta, ret, done);
+ SNAPSHOT_BUF_OR_LEAVE(vrtc->rtcdev.nvram2, sizeof(vrtc->rtcdev.nvram2),
+ meta, ret, done);
+
+ vrtc_callout_reset(vrtc, vrtc_freq(vrtc));
+
+ VRTC_UNLOCK(vrtc);
+
+done:
+ return (ret);
+}
Index: sys/amd64/vmm/vmm.c
===================================================================
--- sys/amd64/vmm/vmm.c
+++ sys/amd64/vmm/vmm.c
@@ -44,7 +44,7 @@
#include <sys/rwlock.h>
#include <sys/sched.h>
#include <sys/smp.h>
-#include <sys/systm.h>
+#include <sys/vnode.h>
#include <vm/vm.h>
#include <vm/vm_object.h>
@@ -53,6 +53,11 @@
#include <vm/vm_map.h>
#include <vm/vm_extern.h>
#include <vm/vm_param.h>
+#include <vm/vm_pager.h>
+#include <vm/vm_kern.h>
+#include <vm/vnode_pager.h>
+#include <vm/swap_pager.h>
+#include <vm/uma.h>
#include <machine/cpu.h>
#include <machine/pcb.h>
@@ -64,6 +69,7 @@
#include <machine/vmm.h>
#include <machine/vmm_dev.h>
#include <machine/vmm_instruction_emul.h>
+#include <machine/vmm_snapshot.h>
#include "vmm_ioport.h"
#include "vmm_ktr.h"
@@ -111,6 +117,7 @@
void *stats; /* (a,i) statistics */
struct vm_exit exitinfo; /* (x) exit reason and collateral */
uint64_t nextrip; /* (x) next instruction to execute */
+ uint64_t tsc_offset; /* (o) TSC offsetting */
};
#define vcpu_lock_initialized(v) mtx_initialized(&((v)->mtx))
@@ -119,13 +126,6 @@
#define vcpu_unlock(v) mtx_unlock_spin(&((v)->mtx))
#define vcpu_assert_locked(v) mtx_assert(&((v)->mtx), MA_OWNED)
-struct mem_seg {
- size_t len;
- bool sysmem;
- struct vm_object *object;
-};
-#define VM_MAX_MEMSEGS 3
-
struct mem_map {
vm_paddr_t gpa;
size_t len;
@@ -181,6 +181,9 @@
#define VMM_RESUME() (ops != NULL ? (*ops->resume)() : 0)
#define VMINIT(vm, pmap) (ops != NULL ? (*ops->vminit)(vm, pmap): NULL)
+/*
+ * XXX: Updated VMRUN to test vcpu restoring
+ */
#define VMRUN(vmi, vcpu, rip, pmap, evinfo) \
(ops != NULL ? (*ops->vmrun)(vmi, vcpu, rip, pmap, evinfo) : ENXIO)
#define VMCLEANUP(vmi) (ops != NULL ? (*ops->vmcleanup)(vmi) : NULL)
@@ -204,6 +207,12 @@
(ops != NULL ? (*ops->vlapic_init)(vmi, vcpu) : NULL)
#define VLAPIC_CLEANUP(vmi, vlapic) \
(ops != NULL ? (*ops->vlapic_cleanup)(vmi, vlapic) : NULL)
+#define VM_SNAPSHOT_VMI(vmi, meta) \
+ (ops != NULL ? (*ops->vmsnapshot)(vmi, meta) : ENXIO)
+#define VM_SNAPSHOT_VMCX(vmi, meta, vcpuid) \
+ (ops != NULL ? (*ops->vmcx_snapshot)(vmi, meta, vcpuid) : ENXIO)
+#define VM_RESTORE_TSC(vmi, vcpuid, offset) \
+ (ops != NULL ? (*ops->vm_restore_tsc)(vmi, vcpuid, offset) : ENXIO)
#define fpu_start_emulating() load_cr0(rcr0() | CR0_TS)
#define fpu_stop_emulating() clts()
@@ -211,6 +220,7 @@
SDT_PROVIDER_DEFINE(vmm);
static MALLOC_DEFINE(M_VM, "vm", "vm");
+static MALLOC_DEFINE(M_RESTORE, "restore", "restore");
/* statistics */
static VMM_STAT(VCPU_TOTAL_RUNTIME, "vcpu total runtime");
@@ -289,6 +299,7 @@
vcpu->hostcpu = NOCPU;
vcpu->guestfpu = fpu_save_area_alloc();
vcpu->stats = vmm_stat_alloc();
+ vcpu->tsc_offset = 0;
}
vcpu->vlapic = VLAPIC_INIT(vm->cookie, vcpu_id);
@@ -795,6 +806,11 @@
}
}
+struct mem_seg * vm_get_memsegs(struct vm *vm)
+{
+ return (vm->mem_segs);
+}
+
static void
vm_free_memmap(struct vm *vm, int ident)
{
@@ -1701,6 +1717,8 @@
vmm_stat_incr(vm, vcpuid, VCPU_TOTAL_RUNTIME, rdtsc() - tscval);
+ if (curthread->td_critnest != 1)
+ return (EINVAL);
critical_exit();
if (error == 0) {
@@ -2709,3 +2727,217 @@
VMM_STAT_FUNC(VMM_MEM_RESIDENT, "Resident memory", vm_get_rescnt);
VMM_STAT_FUNC(VMM_MEM_WIRED, "Wired memory", vm_get_wiredcnt);
+
+static int
+vm_snapshot_vcpus(struct vm *vm, struct vm_snapshot_meta *meta)
+{
+ int ret;
+ int i;
+ struct vcpu *vcpu;
+
+ for (i = 0; i < VM_MAXCPU; i++) {
+ vcpu = &vm->vcpu[i];
+
+ SNAPSHOT_VAR_OR_LEAVE(vcpu->x2apic_state, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vcpu->exitintinfo, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vcpu->exc_vector, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vcpu->exc_errcode_valid, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vcpu->exc_errcode, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vcpu->guest_xcr0, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vcpu->exitinfo, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vcpu->nextrip, meta, ret, done);
+ /* XXX we're cheating here, since the value of tsc_offset as
+ * saved here is actually the value of the guest's TSC value.
+ *
+ * It will be turned turned back into an actual offset when the
+ * TSC restore function is called
+ */
+ SNAPSHOT_VAR_OR_LEAVE(vcpu->tsc_offset, meta, ret, done);
+ }
+
+done:
+ return (ret);
+}
+
+static int
+vm_snapshot_vm(struct vm *vm, struct vm_snapshot_meta *meta)
+{
+ int ret;
+ int i;
+ uint64_t now;
+
+ ret = 0;
+ now = rdtsc();
+
+ if (meta->op == VM_SNAPSHOT_SAVE) {
+ /* XXX make tsc_offset take the value TSC proper as seen by the
+ * guest
+ */
+ for (i = 0; i < VM_MAXCPU; i++)
+ vm->vcpu[i].tsc_offset += now;
+ }
+
+ ret = vm_snapshot_vcpus(vm, meta);
+ if (ret != 0) {
+ printf("%s: failed to copy vm data to user buffer", __func__);
+ goto done;
+ }
+
+ if (meta->op == VM_SNAPSHOT_SAVE) {
+ /* XXX turn tsc_offset back into an offset; actual value is only
+ * required for restore; using it otherwise would be wrong
+ */
+ for (i = 0; i < VM_MAXCPU; i++)
+ vm->vcpu[i].tsc_offset -= now;
+ }
+
+done:
+ return (ret);
+}
+
+static int
+vm_snapshot_vlapic(struct vm *vm, struct vm_snapshot_meta *meta)
+{
+ return vlapic_snapshot(vm, meta);
+}
+
+static int
+vm_snapshot_vioapic(struct vm *vm, struct vm_snapshot_meta *meta)
+{
+ return vioapic_snapshot(vm_ioapic(vm), meta);
+}
+
+static int
+vm_snapshot_vhpet(struct vm *vm, struct vm_snapshot_meta *meta)
+{
+ return vhpet_snapshot(vm_hpet(vm), meta);
+}
+
+static int
+vm_snapshot_vatpic(struct vm *vm, struct vm_snapshot_meta *meta)
+{
+ return vatpic_snapshot(vm_atpic(vm), meta);
+}
+
+static int
+vm_snapshot_vatpit(struct vm *vm, struct vm_snapshot_meta *meta)
+{
+ return vatpit_snapshot(vm_atpit(vm), meta);
+}
+
+static int
+vm_snapshot_vpmtmr(struct vm *vm, struct vm_snapshot_meta *meta)
+{
+ return vpmtmr_snapshot(vm_pmtmr(vm), meta);
+}
+
+static int
+vm_snapshot_vrtc(struct vm *vm, struct vm_snapshot_meta *meta)
+{
+ return vrtc_snapshot(vm_rtc(vm), meta);
+}
+
+static int
+vm_snapshot_vmcx(struct vm *vm, struct vm_snapshot_meta *meta)
+{
+ int i, error;
+
+ error = 0;
+
+ for (i = 0; i < VM_MAXCPU; i++) {
+ error = VM_SNAPSHOT_VMCX(vm->cookie, meta, i);
+ if (error != 0) {
+ printf("%s: failed to snapshot vmcs/vmcb data for "
+ "vCPU: %d; error: %d\n", __func__, i, error);
+ goto done;
+ }
+ }
+
+done:
+ return (error);
+}
+
+/*
+ * Save kernel-side structures to user-space for snapshotting.
+ */
+int
+vm_snapshot_req(struct vm *vm, struct vm_snapshot_meta *meta)
+{
+ int ret = 0;
+
+ switch (meta->dev_req) {
+ case STRUCT_VMX:
+ ret = VM_SNAPSHOT_VMI(vm->cookie, meta);
+ break;
+ case STRUCT_VMCX:
+ ret = vm_snapshot_vmcx(vm, meta);
+ break;
+ case STRUCT_VM:
+ ret = vm_snapshot_vm(vm, meta);
+ break;
+ case STRUCT_VIOAPIC:
+ ret = vm_snapshot_vioapic(vm, meta);
+ break;
+ case STRUCT_VLAPIC:
+ ret = vm_snapshot_vlapic(vm, meta);
+ break;
+ case STRUCT_VHPET:
+ ret = vm_snapshot_vhpet(vm, meta);
+ break;
+ case STRUCT_VATPIC:
+ ret = vm_snapshot_vatpic(vm, meta);
+ break;
+ case STRUCT_VATPIT:
+ ret = vm_snapshot_vatpit(vm, meta);
+ break;
+ case STRUCT_VPMTMR:
+ ret = vm_snapshot_vpmtmr(vm, meta);
+ break;
+ case STRUCT_VRTC:
+ ret = vm_snapshot_vrtc(vm, meta);
+ break;
+ default:
+ printf("%s: failed to find the requested type %#x\n",
+ __func__, meta->dev_req);
+ ret = (EINVAL);
+ }
+ return (ret);
+}
+
+int
+vm_set_tsc_offset(struct vm *vm, int vcpuid, uint64_t offset)
+{
+ struct vcpu *vcpu;
+
+ if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
+ return (EINVAL);
+
+ vcpu = &vm->vcpu[vcpuid];
+ vcpu->tsc_offset = offset;
+
+ return (0);
+}
+
+int
+vm_restore_time(struct vm *vm)
+{
+ int error, i;
+ uint64_t now;
+ struct vcpu *vcpu;
+
+ now = rdtsc();
+
+ error = vhpet_restore_time(vm_hpet(vm));
+ if (error)
+ return (error);
+
+ for (i = 0; i < nitems(vm->vcpu); i++) {
+ vcpu = &vm->vcpu[i];
+
+ error = VM_RESTORE_TSC(vm->cookie, i, vcpu->tsc_offset - now);
+ if (error)
+ return (error);
+ }
+
+ return (0);
+}
Index: sys/amd64/vmm/vmm_dev.c
===================================================================
--- sys/amd64/vmm/vmm_dev.c
+++ sys/amd64/vmm/vmm_dev.c
@@ -53,8 +53,9 @@
#include <machine/vmparam.h>
#include <machine/vmm.h>
-#include <machine/vmm_instruction_emul.h>
#include <machine/vmm_dev.h>
+#include <machine/vmm_instruction_emul.h>
+#include <machine/vmm_snapshot.h>
#include "vmm_lapic.h"
#include "vmm_stat.h"
@@ -369,6 +370,7 @@
struct vm_cpu_topology *topology;
uint64_t *regvals;
int *regnums;
+ struct vm_snapshot_req *snapshot_req;
error = vmm_priv_check(curthread->td_ucred);
if (error)
@@ -460,6 +462,13 @@
case VM_REINIT:
error = vm_reinit(sc->vm);
break;
+ case VM_VCPU_LOCK_ALL:
+ error = vcpu_lock_all(sc);
+ break;
+ case VM_VCPU_UNLOCK_ALL:
+ vcpu_unlock_all(sc);
+ error = 0;
+ break;
case VM_STAT_DESC: {
statdesc = (struct vm_stat_desc *)data;
error = vmm_stat_desc_copy(statdesc->index,
@@ -771,6 +780,12 @@
vm_get_topology(sc->vm, &topology->sockets, &topology->cores,
&topology->threads, &topology->maxcpus);
error = 0;
+ case VM_SNAPSHOT_REQ:
+ snapshot_req = (struct vm_snapshot_req *)data;
+ error = vm_snapshot_req(sc->vm, &snapshot_req->meta);
+ break;
+ case VM_RESTORE_TIME:
+ error = vm_restore_time(sc->vm);
break;
default:
error = ENOTTY;
@@ -1000,6 +1015,7 @@
error = make_dev_p(MAKEDEV_CHECKNAME, &cdev, &vmmdevsw, NULL,
UID_ROOT, GID_WHEEL, 0600, "vmm/%s", buf);
+ /* TODO: shouldn't be done inside lock? */
if (error != 0) {
vmmdev_destroy(sc);
return (error);
Index: sys/amd64/vmm/vmm_snapshot.c
===================================================================
--- /dev/null
+++ sys/amd64/vmm/vmm_snapshot.c
@@ -0,0 +1,110 @@
+#include <sys/types.h>
+#include <sys/systm.h>
+
+#include <machine/vmm_snapshot.h>
+
+void
+vm_snapshot_buf_err(const char *bufname, const enum vm_snapshot_op op)
+{
+ const char *__op;
+
+ if (op == VM_SNAPSHOT_SAVE)
+ __op = "save";
+ else if (op == VM_SNAPSHOT_RESTORE)
+ __op = "restore";
+ else
+ __op = "unknown";
+
+ printf("%s: snapshot-%s failed for %s\r\n", __func__, __op, bufname);
+}
+
+int
+vm_snapshot_buf(volatile void *data, size_t data_size,
+ struct vm_snapshot_meta *meta)
+{
+ struct vm_snapshot_buffer *buffer;
+ int op;
+ void *_data = *(void **)(void *)&data;
+
+ buffer = &meta->buffer;
+ op = meta->op;
+
+ if (buffer->buf_rem < data_size) {
+ printf("%s: buffer too small\r\n", __func__);
+ return (E2BIG);
+ }
+
+ if (op == VM_SNAPSHOT_SAVE)
+ copyout(_data, buffer->buf, data_size);
+ else if (op == VM_SNAPSHOT_RESTORE)
+ copyin(buffer->buf, _data, data_size);
+ else
+ return (EINVAL);
+
+ buffer->buf += data_size;
+ buffer->buf_rem -= data_size;
+
+ return (0);
+}
+
+size_t
+vm_get_snapshot_size(struct vm_snapshot_meta *meta)
+{
+ size_t length;
+ struct vm_snapshot_buffer *buffer;
+
+ buffer = &meta->buffer;
+
+ if (buffer->buf_size < buffer->buf_rem) {
+ printf("%s: Invalid buffer: size = %zu, rem = %zu\r\n",
+ __func__, buffer->buf_size, buffer->buf_rem);
+ length = 0;
+ } else {
+ length = buffer->buf_size - buffer->buf_rem;
+ }
+
+ return (length);
+}
+
+int
+vm_snapshot_guest2host_addr(void **addrp, size_t len, int restore_null,
+ struct vm_snapshot_meta *meta)
+{
+ /* The kernel devices/structures should not map guest memory */
+ return (0);
+}
+
+int
+vm_snapshot_buf_cmp(volatile void *data, size_t data_size,
+ struct vm_snapshot_meta *meta)
+{
+ struct vm_snapshot_buffer *buffer;
+ int op;
+ int ret;
+ void *_data = *(void **)(void *)&data;
+
+ buffer = &meta->buffer;
+ op = meta->op;
+
+ if (buffer->buf_rem < data_size) {
+ printf("%s: buffer too small\r\n", __func__);
+ ret = E2BIG;
+ goto done;
+ }
+
+ if (op == VM_SNAPSHOT_SAVE) {
+ ret = 0;
+ copyout(_data, buffer->buf, data_size);
+ } else if (op == VM_SNAPSHOT_RESTORE) {
+ ret = memcmp(_data, buffer->buf, data_size);
+ } else {
+ ret = EINVAL;
+ goto done;
+ }
+
+ buffer->buf += data_size;
+ buffer->buf_rem -= data_size;
+
+done:
+ return (ret);
+}
Index: sys/modules/vmm/Makefile
===================================================================
--- sys/modules/vmm/Makefile
+++ sys/modules/vmm/Makefile
@@ -18,6 +18,7 @@
vmm_dev.c \
vmm_host.c \
vmm_instruction_emul.c \
+ vmm_snapshot.c \
vmm_ioport.c \
vmm_lapic.c \
vmm_mem.c \
Index: usr.sbin/bhyve/Makefile
===================================================================
--- usr.sbin/bhyve/Makefile
+++ usr.sbin/bhyve/Makefile
@@ -57,6 +57,7 @@
rfb.c \
rtc.c \
smbiostbl.c \
+ snapshot.c \
sockstream.c \
task_switch.c \
uart_emul.c \
@@ -71,7 +72,7 @@
.PATH: ${BHYVE_SYSDIR}/sys/amd64/vmm
SRCS+= vmm_instruction_emul.c
-LIBADD= vmmapi md pthread z util sbuf cam
+LIBADD= vmmapi md pthread z util sbuf cam ucl xo
.if ${MK_INET_SUPPORT} != "no"
CFLAGS+=-DINET
@@ -92,6 +93,10 @@
.ifdef GDB_LOG
CFLAGS+=-DGDB_LOG
.endif
+CFLAGS+= -I${SRCTOP}/contrib/libucl/include
+
+# Temporary disable capsicum, until we integrate checkpoint code with it.
+CFLAGS+= -DWITHOUT_CAPSICUM
WARNS?= 2
Index: usr.sbin/bhyve/Makefile.depend
===================================================================
--- usr.sbin/bhyve/Makefile.depend
+++ usr.sbin/bhyve/Makefile.depend
@@ -12,8 +12,10 @@
lib/libcompiler_rt \
lib/libmd \
lib/libthr \
+ lib/libucl \
lib/libutil \
lib/libvmmapi \
+ lib/libxo \
lib/libz \
secure/lib/libcrypto \
Index: usr.sbin/bhyve/atkbdc.h
===================================================================
--- usr.sbin/bhyve/atkbdc.h
+++ usr.sbin/bhyve/atkbdc.h
@@ -30,9 +30,12 @@
#define _ATKBDC_H_
struct atkbdc_softc;
+struct vm_snapshot_meta;
struct vmctx;
void atkbdc_init(struct vmctx *ctx);
void atkbdc_event(struct atkbdc_softc *sc, int iskbd);
+int atkbdc_snapshot(struct vm_snapshot_meta *meta);
+
#endif /* _ATKBDC_H_ */
Index: usr.sbin/bhyve/atkbdc.c
===================================================================
--- usr.sbin/bhyve/atkbdc.c
+++ usr.sbin/bhyve/atkbdc.c
@@ -33,6 +33,7 @@
#include <sys/types.h>
#include <machine/vmm.h>
+#include <machine/vmm_snapshot.h>
#include <vmmapi.h>
@@ -137,6 +138,8 @@
struct aux_dev aux;
};
+static struct atkbdc_softc *atkbdc_sc = NULL;
+
static void
atkbdc_assert_kbd_intr(struct atkbdc_softc *sc)
{
@@ -548,6 +551,43 @@
sc->ps2kbd_sc = ps2kbd_init(sc);
sc->ps2mouse_sc = ps2mouse_init(sc);
+
+ assert(atkbdc_sc == NULL);
+ atkbdc_sc = sc;
+}
+
+int
+atkbdc_snapshot(struct vm_snapshot_meta *meta)
+{
+ int ret;
+
+ SNAPSHOT_VAR_OR_LEAVE(atkbdc_sc->status, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(atkbdc_sc->outport, meta, ret, done);
+ SNAPSHOT_BUF_OR_LEAVE(atkbdc_sc->ram,
+ sizeof(atkbdc_sc->ram), meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(atkbdc_sc->curcmd, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(atkbdc_sc->ctrlbyte, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(atkbdc_sc->kbd, meta, ret, done);
+
+ SNAPSHOT_VAR_OR_LEAVE(atkbdc_sc->kbd.irq_active, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(atkbdc_sc->kbd.irq, meta, ret, done);
+ SNAPSHOT_BUF_OR_LEAVE(atkbdc_sc->kbd.buffer,
+ sizeof(atkbdc_sc->kbd.buffer), meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(atkbdc_sc->kbd.brd, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(atkbdc_sc->kbd.bwr, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(atkbdc_sc->kbd.bcnt, meta, ret, done);
+
+ SNAPSHOT_VAR_OR_LEAVE(atkbdc_sc->aux.irq_active, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(atkbdc_sc->aux.irq, meta, ret, done);
+
+ ret = ps2kbd_snapshot(atkbdc_sc->ps2kbd_sc, meta);
+ if (ret != 0)
+ goto done;
+
+ ret = ps2mouse_snapshot(atkbdc_sc->ps2mouse_sc, meta);
+
+done:
+ return (ret);
}
static void
Index: usr.sbin/bhyve/bhyverun.h
===================================================================
--- usr.sbin/bhyve/bhyverun.h
+++ usr.sbin/bhyve/bhyverun.h
@@ -28,6 +28,8 @@
* $FreeBSD$
*/
+ #include <ucl.h>
+
#ifndef _FBSDRUN_H_
#define _FBSDRUN_H_
@@ -37,9 +39,10 @@
struct vmctx;
extern int guest_ncpus;
extern char *guest_uuid_str;
-extern char *vmname;
+extern const char *vmname;
void *paddr_guest2host(struct vmctx *ctx, uintptr_t addr, size_t len);
+uintptr_t paddr_host2guest(struct vmctx *ctx, void *addr);
void fbsdrun_set_capabilities(struct vmctx *ctx, int cpu);
void fbsdrun_addcpu(struct vmctx *ctx, int fromcpu, int newcpu, uint64_t rip);
Index: usr.sbin/bhyve/bhyverun.c
===================================================================
--- usr.sbin/bhyve/bhyverun.c
+++ usr.sbin/bhyve/bhyverun.c
@@ -36,6 +36,7 @@
#include <sys/capsicum.h>
#endif
#include <sys/mman.h>
+#include <sys/stat.h>
#include <sys/time.h>
#include <amd64/vmm/intel/vmcs.h>
@@ -81,10 +82,20 @@
#include "pci_irq.h"
#include "pci_lpc.h"
#include "smbiostbl.h"
+#include "snapshot.h"
#include "xmsr.h"
#include "spinup_ap.h"
#include "rtc.h"
+#include <sys/socket.h>
+#include <sys/un.h>
+#include <sys/types.h>
+#include <fcntl.h>
+
+#include <libxo/xo.h>
+#include <ucl.h>
+#include <unistd.h>
+
#define GUEST_NIO_PORT 0x488 /* guest upcalls via i/o port */
#define MB (1024UL * 1024)
@@ -157,10 +168,12 @@
[EXIT_REASON_XRSTORS] = "XRSTORS"
};
+#define MAX_SOCK_NAME 200
+
typedef int (*vmexit_handler_t)(struct vmctx *, struct vm_exit *, int *vcpu);
extern int vmexit_task_switch(struct vmctx *, struct vm_exit *, int *vcpu);
-char *vmname;
+const char *vmname;
int guest_ncpus;
uint16_t cores, maxcpus, sockets, threads;
@@ -223,6 +236,7 @@
" -H: vmexit from the guest on hlt\n"
" -l: LPC device configuration\n"
" -m: memory size in MB\n"
+ " -r: path to checkpoint file\n"
" -p: pin 'vcpu' to 'hostcpu'\n"
" -P: vmexit from the guest on pause\n"
" -s: <slot,driver,configinfo> PCI slot config\n"
@@ -382,6 +396,12 @@
return (vm_map_gpa(ctx, gaddr, len));
}
+uintptr_t
+paddr_host2guest(struct vmctx *ctx, void *addr)
+{
+ return (vm_rev_map_gpa(ctx, addr));
+}
+
int
fbsdrun_vmexit_on_pause(void)
{
@@ -972,17 +992,35 @@
return (ctx);
}
+void
+spinup_vcpu(struct vmctx *ctx, int vcpu)
+{
+ int error;
+ uint64_t rip;
+
+ error = vm_get_register(ctx, vcpu, VM_REG_GUEST_RIP, &rip);
+ assert(error == 0);
+
+ fbsdrun_set_capabilities(ctx, vcpu);
+ error = vm_set_capability(ctx, vcpu, VM_CAP_UNRESTRICTED_GUEST, 1);
+ assert(error == 0);
+
+ fbsdrun_addcpu(ctx, BSP, vcpu, rip);
+}
+
int
main(int argc, char *argv[])
{
int c, error, dbg_port, gdb_port, err, bvmcons;
- int max_vcpus, mptgen, memflags;
+ int max_vcpus, mptgen, memflags, vcpu;
int rtc_localtime;
bool gdb_stop;
struct vmctx *ctx;
- uint64_t rip;
size_t memsize;
- char *optstr;
+ char *optstr, *restore_file;
+ struct restore_state rstate;
+
+ restore_file = NULL;
bvmcons = 0;
progname = basename(argv[0]);
@@ -997,7 +1035,7 @@
rtc_localtime = 1;
memflags = 0;
- optstr = "abehuwxACHIPSWYp:g:G:c:s:m:l:U:";
+ optstr = "abehuwxACHIPSWYp:g:G:c:s:m:l:U:r:";
while ((c = getopt(argc, argv, optstr)) != -1) {
switch (c) {
case 'a':
@@ -1043,6 +1081,9 @@
"configuration '%s'", optarg);
}
break;
+ case 'r':
+ restore_file = optarg;
+ break;
case 's':
if (strncmp(optarg, "help", strlen(optarg)) == 0) {
pci_print_supported_devices();
@@ -1104,12 +1145,41 @@
argc -= optind;
argv += optind;
- if (argc != 1)
+ if (argc > 1 || (argc == 0 && restore_file == NULL))
usage(1);
+ if (restore_file != NULL) {
+ error = load_restore_file(restore_file, &rstate);
+ if (error) {
+ fprintf(stderr, "Failed to read checkpoint info from "
+ "file: '%s'.\n", restore_file);
+ exit(1);
+ }
+ }
+
+ if (argc == 1) {
vmname = argv[0];
+ } else {
+ vmname = lookup_vmname(&rstate);
+ if (vmname == NULL) {
+ fprintf(stderr, "Cannot find VM name in restore file. "
+ "Please specify one.\n");
+ exit(1);
+ }
+ }
ctx = do_open(vmname);
+ if (restore_file != NULL) {
+ guest_ncpus = lookup_guest_ncpus(&rstate);
+ memflags = lookup_memflags(&rstate);
+ memsize = lookup_memsize(&rstate);
+ }
+
+ if (guest_ncpus < 1) {
+ fprintf(stderr, "Invalid guest vCPUs (%d)\n", guest_ncpus);
+ exit(1);
+ }
+
max_vcpus = num_vcpus_allowed(ctx);
if (guest_ncpus > max_vcpus) {
fprintf(stderr, "%d vCPUs requested but only %d available\n",
@@ -1117,8 +1187,6 @@
exit(4);
}
- fbsdrun_set_capabilities(ctx, BSP);
-
vm_set_memflags(ctx, memflags);
err = vm_setup_memory(ctx, memsize, VM_MMAP_ALL);
if (err) {
@@ -1168,8 +1236,37 @@
assert(error == 0);
}
- error = vm_get_register(ctx, BSP, VM_REG_GUEST_RIP, &rip);
- assert(error == 0);
+ if (restore_file != NULL) {
+ fprintf(stdout, "Pausing pci devs...\r\n");
+ if (vm_pause_user_devs(ctx) != 0) {
+ fprintf(stderr, "Failed to pause PCI device state.\n");
+ exit(1);
+ }
+
+ fprintf(stdout, "Restoring vm mem...\r\n");
+ if (restore_vm_mem(ctx, &rstate) != 0) {
+ fprintf(stderr, "Failed to restore VM memory.\n");
+ exit(1);
+ }
+
+ fprintf(stdout, "Restoring pci devs...\r\n");
+ if (vm_restore_user_devs(ctx, &rstate) != 0) {
+ fprintf(stderr, "Failed to restore PCI device state.\n");
+ exit(1);
+ }
+
+ fprintf(stdout, "Restoring kernel structs...\r\n");
+ if (vm_restore_kern_structs(ctx, &rstate) != 0) {
+ fprintf(stderr, "Failed to restore kernel structs.\n");
+ exit(1);
+ }
+
+ fprintf(stdout, "Resuming pci devs...\r\n");
+ if (vm_resume_user_devs(ctx) != 0) {
+ fprintf(stderr, "Failed to resume PCI device state.\n");
+ exit(1);
+ }
+ }
/*
* build the guest tables, MP etc.
@@ -1208,10 +1305,34 @@
errx(EX_OSERR, "cap_enter() failed");
#endif
+ if (restore_file != NULL)
+ destroy_restore_state(&rstate);
+
/*
- * Add CPU 0
+ * checkpointing thread for communication with bhyvectl
+ */
+ if (init_checkpoint_thread(ctx) < 0)
+ printf("Failed to start checkpoint thread!\r\n");
+
+ /*
+ * Change the proc title to include the VM name.
*/
- fbsdrun_addcpu(ctx, BSP, BSP, rip);
+ setproctitle("%s", vmname);
+
+ if (restore_file != NULL) {
+ vm_restore_time(ctx);
+ }
+
+ /* Add CPU 0
+ * If we restore a VM, start all vCPUs now (including APs), otherwise,
+ * let the guest OS to spin them up later via vmexits.
+ */
+
+ for (vcpu = 0; vcpu < guest_ncpus; vcpu++)
+ if (vcpu == BSP || restore_file) {
+ fprintf(stdout, "spinning up vcpu no %d...\r\n", vcpu);
+ spinup_vcpu(ctx, vcpu);
+ }
/*
* Head off to the main event dispatch loop
Index: usr.sbin/bhyve/block_if.h
===================================================================
--- usr.sbin/bhyve/block_if.h
+++ usr.sbin/bhyve/block_if.h
@@ -41,6 +41,9 @@
#include <sys/uio.h>
#include <sys/unistd.h>
+struct vm_snapshot_meta;
+
+
#define BLOCKIF_IOV_MAX 33 /* not practical to be IOV_MAX */
struct blockif_req {
@@ -68,5 +71,11 @@
int blockif_delete(struct blockif_ctxt *bc, struct blockif_req *breq);
int blockif_cancel(struct blockif_ctxt *bc, struct blockif_req *breq);
int blockif_close(struct blockif_ctxt *bc);
+void blockif_pause(struct blockif_ctxt *bc);
+void blockif_resume(struct blockif_ctxt *bc);
+int blockif_snapshot_req(struct blockif_req *br,
+ struct vm_snapshot_meta *meta);
+int blockif_snapshot(struct blockif_ctxt *bc,
+ struct vm_snapshot_meta *meta);
#endif /* _BLOCK_IF_H_ */
Index: usr.sbin/bhyve/block_if.c
===================================================================
--- usr.sbin/bhyve/block_if.c
+++ usr.sbin/bhyve/block_if.c
@@ -57,6 +57,7 @@
#include <unistd.h>
#include <machine/atomic.h>
+#include <machine/vmm_snapshot.h>
#include "bhyverun.h"
#include "mevent.h"
@@ -67,6 +68,9 @@
#define BLOCKIF_NUMTHR 8
#define BLOCKIF_MAXREQ (64 + BLOCKIF_NUMTHR)
+#define NO_THREAD_IDX (-2)
+#define REQ_IDX_SEPARATOR (-1)
+
enum blockop {
BOP_READ,
BOP_WRITE,
@@ -103,9 +107,13 @@
int bc_psectsz;
int bc_psectoff;
int bc_closing;
+ int bc_paused;
+ int bc_work_count;
pthread_t bc_btid[BLOCKIF_NUMTHR];
pthread_mutex_t bc_mtx;
pthread_cond_t bc_cond;
+ pthread_cond_t bc_paused_cond;
+ pthread_cond_t bc_work_done_cond;
/* Request elements and free/pending/busy queues */
TAILQ_HEAD(, blockif_elem) bc_freeq;
@@ -208,6 +216,18 @@
TAILQ_INSERT_TAIL(&bc->bc_freeq, be, be_link);
}
+static int
+blockif_flush_bc(struct blockif_ctxt *bc)
+{
+ if (bc->bc_ischr) {
+ if (ioctl(bc->bc_fd, DIOCGFLUSH))
+ return (errno);
+ } else if (fsync(bc->bc_fd))
+ return (errno);
+
+ return (0);
+}
+
static void
blockif_proc(struct blockif_ctxt *bc, struct blockif_elem *be, uint8_t *buf)
{
@@ -298,11 +318,7 @@
}
break;
case BOP_FLUSH:
- if (bc->bc_ischr) {
- if (ioctl(bc->bc_fd, DIOCGFLUSH))
- err = errno;
- } else if (fsync(bc->bc_fd))
- err = errno;
+ err = blockif_flush_bc(bc);
break;
case BOP_DELETE:
if (!bc->bc_candelete)
@@ -346,15 +362,34 @@
pthread_mutex_lock(&bc->bc_mtx);
for (;;) {
- while (blockif_dequeue(bc, t, &be)) {
+ bc->bc_work_count++;
+
+ /* We cannot process work if the interface is paused */
+ while (!bc->bc_paused && blockif_dequeue(bc, t, &be)) {
pthread_mutex_unlock(&bc->bc_mtx);
blockif_proc(bc, be, buf);
pthread_mutex_lock(&bc->bc_mtx);
blockif_complete(bc, be);
}
- /* Check ctxt status here to see if exit requested */
+
+ bc->bc_work_count--;
+
+ /* If none of the workers is busy, notify the main thread */
+ if (bc->bc_work_count == 0)
+ pthread_cond_broadcast(&bc->bc_work_done_cond);
+
+ /*
+ * Check ctxt status here to see if exit requested
+ *
+ * No sense to wait while paused if closing anyway
+ */
if (bc->bc_closing)
break;
+
+ /* Make all worker threads wait here if the device is paused */
+ while (bc->bc_paused)
+ pthread_cond_wait(&bc->bc_paused_cond, &bc->bc_mtx);
+
pthread_cond_wait(&bc->bc_cond, &bc->bc_mtx);
}
pthread_mutex_unlock(&bc->bc_mtx);
@@ -558,6 +593,10 @@
bc->bc_psectoff = psectoff;
pthread_mutex_init(&bc->bc_mtx, NULL);
pthread_cond_init(&bc->bc_cond, NULL);
+ bc->bc_paused = 0;
+ bc->bc_work_count = 0;
+ pthread_cond_init(&bc->bc_paused_cond, NULL);
+ pthread_cond_init(&bc->bc_work_done_cond, NULL);
TAILQ_INIT(&bc->bc_freeq);
TAILQ_INIT(&bc->bc_pendq);
TAILQ_INIT(&bc->bc_busyq);
@@ -589,6 +628,7 @@
err = 0;
pthread_mutex_lock(&bc->bc_mtx);
+ /* should make thread wait if interface paused ? */
if (!TAILQ_EMPTY(&bc->bc_freeq)) {
/*
* Enqueue and inform the block i/o thread
@@ -650,6 +690,8 @@
assert(bc->bc_magic == BLOCKIF_SIG);
pthread_mutex_lock(&bc->bc_mtx);
+ /* XXX: not waiting while paused */
+
/*
* Check pending requests.
*/
@@ -848,3 +890,96 @@
assert(bc->bc_magic == BLOCKIF_SIG);
return (bc->bc_candelete);
}
+
+void
+blockif_pause(struct blockif_ctxt *bc)
+{
+ assert(bc != NULL);
+ assert(bc->bc_magic == BLOCKIF_SIG);
+
+ pthread_mutex_lock(&bc->bc_mtx);
+ bc->bc_paused = 1;
+ /* The interface is paused. Wait for workers to finish their work */
+ while (bc->bc_work_count)
+ pthread_cond_wait(&bc->bc_work_done_cond, &bc->bc_mtx);
+ pthread_mutex_unlock(&bc->bc_mtx);
+
+ if (blockif_flush_bc(bc))
+ fprintf(stderr, "%s: [WARN] failed to flush backing file.\r\n",
+ __func__);
+}
+
+void
+blockif_resume(struct blockif_ctxt *bc)
+{
+ assert(bc != NULL);
+ assert(bc->bc_magic == BLOCKIF_SIG);
+
+ pthread_mutex_lock(&bc->bc_mtx);
+ bc->bc_paused = 0;
+ /* resume the threads waiting for paused */
+ pthread_cond_broadcast(&bc->bc_paused_cond);
+ /* kick the threads after restore */
+ pthread_cond_broadcast(&bc->bc_cond);
+ pthread_mutex_unlock(&bc->bc_mtx);
+}
+
+int
+blockif_snapshot_req(struct blockif_req *br, struct vm_snapshot_meta *meta)
+{
+ int i;
+ struct iovec *iov;
+ int ret;
+
+ SNAPSHOT_VAR_OR_LEAVE(br->br_iovcnt, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(br->br_offset, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(br->br_resid, meta, ret, done);
+
+ /* XXX: The callback and parameter must be filled by the virtualized
+ * device that uses the interface, during its init; we're not touching
+ * them here
+ */
+
+ /* Snapshot the iovecs */
+ for (i = 0; i < br->br_iovcnt; i++) {
+ iov = &br->br_iov[i];
+
+ SNAPSHOT_VAR_OR_LEAVE(iov->iov_len, meta, ret, done);
+ /* we assume the iov is a guest-mapped address */
+ SNAPSHOT_GUEST2HOST_ADDR_OR_LEAVE(iov->iov_base, iov->iov_len,
+ false, meta, ret, done);
+ }
+
+done:
+ return (ret);
+}
+
+int
+blockif_snapshot(struct blockif_ctxt *bc, struct vm_snapshot_meta *meta)
+{
+ int ret;
+
+ if (bc->bc_paused == 0) {
+ fprintf(stderr, "%s: Snapshot failed: "
+ "interface not paused.\r\n", __func__);
+ return (ENXIO);
+ }
+
+ pthread_mutex_lock(&bc->bc_mtx);
+
+ SNAPSHOT_VAR_OR_LEAVE(bc->bc_magic, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(bc->bc_ischr, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(bc->bc_isgeom, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(bc->bc_candelete, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(bc->bc_rdonly, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(bc->bc_size, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(bc->bc_sectsz, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(bc->bc_psectsz, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(bc->bc_psectoff, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(bc->bc_closing, meta, ret, done);
+
+done:
+ pthread_mutex_unlock(&bc->bc_mtx);
+ return (ret);
+}
+
Index: usr.sbin/bhyve/pci_ahci.c
===================================================================
--- usr.sbin/bhyve/pci_ahci.c
+++ usr.sbin/bhyve/pci_ahci.c
@@ -41,6 +41,8 @@
#include <sys/ata.h>
#include <sys/endian.h>
+#include <machine/vmm_snapshot.h>
+
#include <errno.h>
#include <fcntl.h>
#include <stdio.h>
@@ -131,6 +133,7 @@
uint32_t done;
int slot;
int more;
+ int readop;
};
struct ahci_port {
@@ -724,6 +727,7 @@
aior->slot = slot;
aior->len = len;
aior->done = done;
+ aior->readop = readop;
breq = &aior->io_req;
breq->br_offset = lba + done;
ahci_build_iov(p, aior, prdt, hdr->prdtl);
@@ -1420,6 +1424,7 @@
aior->slot = slot;
aior->len = len;
aior->done = done;
+ aior->readop = 1;
breq = &aior->io_req;
breq->br_offset = lba + done;
ahci_build_iov(p, aior, prdt, hdr->prdtl);
@@ -1815,6 +1820,7 @@
#endif
cfis = paddr_guest2host(ahci_ctx(sc), hdr->ctba,
0x80 + hdr->prdtl * sizeof(struct ahci_prdt_entry));
+ printf(" >>> cfis: %p, gaddr: %#08lx, len: %zx, port: %d\r\n", cfis, hdr->ctba, 0x80 + hdr->prdtl * sizeof(struct ahci_prdt_entry), p->port);
#ifdef AHCI_DEBUG
prdt = (struct ahci_prdt_entry *)(cfis + 0x80);
@@ -2446,6 +2452,291 @@
return (pci_ahci_init(ctx, pi, opts, 1));
}
+static int
+pci_ahci_snapshot_save_queues(struct ahci_port *port,
+ struct vm_snapshot_meta *meta)
+{
+ int ret;
+ int idx;
+ struct ahci_ioreq *ioreq;
+
+ STAILQ_FOREACH(ioreq, &port->iofhd, io_flist) {
+ idx = ((void *) ioreq - (void *) port->ioreq) / sizeof(*ioreq);
+ SNAPSHOT_VAR_OR_LEAVE(idx, meta, ret, done);
+ }
+
+ idx = -1;
+ SNAPSHOT_VAR_OR_LEAVE(idx, meta, ret, done);
+
+ TAILQ_FOREACH(ioreq, &port->iobhd, io_blist) {
+ idx = ((void *) ioreq - (void *) port->ioreq) / sizeof(*ioreq);
+ SNAPSHOT_VAR_OR_LEAVE(idx, meta, ret, done);
+
+ /* snapshot only the busy requests
+ * other requests are not valid
+ */
+ ret = blockif_snapshot_req(&ioreq->io_req, meta);
+ if (ret != 0) {
+ fprintf(stderr, "%s: failed to snapshot req\r\n",
+ __func__);
+ goto done;
+ }
+ }
+
+ idx = -1;
+ SNAPSHOT_VAR_OR_LEAVE(idx, meta, ret, done);
+
+done:
+ return (ret);
+}
+
+static int
+pci_ahci_snapshot_restore_queues(struct ahci_port *port,
+ struct vm_snapshot_meta *meta)
+{
+ int ret;
+ int idx;
+ struct ahci_ioreq *ioreq;
+
+ /* empty the free queue before restoring */
+ while (!STAILQ_EMPTY(&port->iofhd))
+ STAILQ_REMOVE_HEAD(&port->iofhd, io_flist);
+
+ /* restore the free queue */
+ while (1) {
+ SNAPSHOT_VAR_OR_LEAVE(idx, meta, ret, done);
+ if (idx == -1)
+ break;
+
+ STAILQ_INSERT_TAIL(&port->iofhd, &port->ioreq[idx], io_flist);
+ }
+
+ /* restore the busy queue */
+ while (1) {
+ SNAPSHOT_VAR_OR_LEAVE(idx, meta, ret, done);
+ if (idx == -1)
+ break;
+
+ ioreq = &port->ioreq[idx];
+ TAILQ_INSERT_TAIL(&port->iobhd, ioreq, io_blist);
+
+ /* restore only the busy requests
+ * other requests are not valid
+ */
+ ret = blockif_snapshot_req(&ioreq->io_req, meta);
+ if (ret != 0) {
+ fprintf(stderr, "%s: failed to restore request\r\n",
+ __func__);
+
+ goto done;
+ }
+
+ /* re-enqueue the requests in the block interface */
+ if (ioreq->readop)
+ ret = blockif_read(port->bctx, &ioreq->io_req);
+ else
+ ret = blockif_write(port->bctx, &ioreq->io_req);
+
+ if (ret != 0) {
+ fprintf(stderr,
+ "%s: failed to re-enqueue request\r\n",
+ __func__);
+
+ goto done;
+ }
+ }
+
+done:
+ return (ret);
+}
+
+static int
+pci_ahci_snapshot(struct vm_snapshot_meta *meta)
+{
+ int i, j, ret;
+ void *bctx;
+ struct pci_devinst *pi;
+ struct pci_ahci_softc *sc;
+ struct ahci_port *port;
+ struct ahci_cmd_hdr *hdr;
+ struct ahci_ioreq *ioreq;
+
+ pi = meta->dev_data;
+ sc = pi->pi_arg;
+
+ /* TODO: add mtx lock/unlock */
+
+ SNAPSHOT_VAR_OR_LEAVE(sc->ports, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->cap, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->ghc, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->is, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->pi, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->vs, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->ccc_ctl, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->ccc_pts, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->em_loc, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->em_ctl, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->cap2, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->bohc, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->lintr, meta, ret, done);
+
+ for (i = 0; i < MAX_PORTS; i++) {
+ port = &sc->port[i];
+
+ if (meta->op == VM_SNAPSHOT_SAVE)
+ bctx = port->bctx;
+
+ SNAPSHOT_VAR_OR_LEAVE(bctx, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(port->port, meta, ret, done);
+
+ /* mostly for restore; save is ensured by the lines above */
+ if (((bctx == NULL) && (port->bctx != NULL)) ||
+ ((bctx != NULL) && (port->bctx == NULL))) {
+ fprintf(stderr, "%s: ports not matching\r\n", __func__);
+
+ ret = EINVAL;
+ goto done;
+ }
+
+ if (port->bctx == NULL)
+ continue;
+
+ if (port->port != i) {
+ fprintf(stderr, "%s: ports not matching: "
+ "actual: %d expected: %d\r\n",
+ __func__, port->port, i);
+
+ ret = EINVAL;
+ goto done;
+ }
+
+ SNAPSHOT_GUEST2HOST_ADDR_OR_LEAVE(port->cmd_lst,
+ AHCI_CL_SIZE * AHCI_MAX_SLOTS, false, meta, ret, done);
+ SNAPSHOT_GUEST2HOST_ADDR_OR_LEAVE(port->rfis, 256, false, meta,
+ ret, done);
+
+ SNAPSHOT_VAR_OR_LEAVE(port->ident, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(port->atapi, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(port->reset, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(port->waitforclear, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(port->mult_sectors, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(port->xfermode, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(port->err_cfis, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(port->sense_key, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(port->asc, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(port->ccs, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(port->pending, meta, ret, done);
+
+ SNAPSHOT_VAR_OR_LEAVE(port->clb, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(port->clbu, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(port->fb, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(port->fbu, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(port->is, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(port->ie, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(port->cmd, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(port->unused0, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(port->tfd, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(port->sig, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(port->ssts, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(port->sctl, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(port->serr, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(port->sact, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(port->ci, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(port->sntf, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(port->fbs, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(port->ioqsz, meta, ret, done);
+
+ printf(" %%%%%% cmd_list: %p\r\n", port->cmd_lst);
+
+ for (j = 0; j < port->ioqsz; j++) {
+ ioreq = &port->ioreq[j];
+
+ /* blockif_req snapshot done only for busy requests */
+ hdr = (struct ahci_cmd_hdr *)(port->cmd_lst + j * AHCI_CL_SIZE);
+ printf(" *** AHCI[%d] -> hdr->prdtl: %#08hx\r\n", i, hdr->prdtl);
+ SNAPSHOT_GUEST2HOST_ADDR_OR_LEAVE(ioreq->cfis,
+ 0x80 + hdr->prdtl * sizeof(struct ahci_prdt_entry),
+ false, meta, ret, done);
+ hdr = (struct ahci_cmd_hdr *)(port->cmd_lst + j * AHCI_CL_SIZE);
+ printf(" ### AHCI[%d] -> hdr->prdtl: %#08hx\r\n", i, hdr->prdtl);
+ printf(" --- AHCI[%d][%d] -> ioreq: %p, cfis: %p, hdr->prdtl: %#08hx\r\n", i, j, ioreq, ioreq->cfis, hdr->prdtl);
+ nanosleep(&(struct timespec){0, 1000000}, NULL);
+
+ SNAPSHOT_VAR_OR_LEAVE(ioreq->len, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(ioreq->done, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(ioreq->slot, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(ioreq->more, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(ioreq->readop, meta, ret, done);
+ }
+
+ /* Perform save / restore specific operations */
+ if (meta->op == VM_SNAPSHOT_SAVE) {
+ ret = pci_ahci_snapshot_save_queues(port, meta);
+ if (ret != 0)
+ goto done;
+ } else if (meta->op == VM_SNAPSHOT_RESTORE) {
+ ret = pci_ahci_snapshot_restore_queues(port, meta);
+ if (ret != 0)
+ goto done;
+ } else {
+ /* error */
+ ret = EINVAL;
+ goto done;
+ }
+
+ ret = blockif_snapshot(port->bctx, meta);
+ if (ret != 0) {
+ fprintf(stderr, "%s: failed to restore blockif\r\n",
+ __func__);
+
+ goto done;
+ }
+ }
+
+done:
+ return (ret);
+}
+
+static int
+pci_ahci_pause(struct vmctx *ctx, struct pci_devinst *pi)
+{
+ struct pci_ahci_softc *sc;
+ struct blockif_ctxt *bctxt;
+ int i;
+
+ sc = pi->pi_arg;
+
+ for (i = 0; i < MAX_PORTS; i++) {
+ bctxt = sc->port[i].bctx;
+ if (bctxt == NULL)
+ continue;
+
+ blockif_pause(bctxt);
+ }
+
+ return (0);
+}
+
+static int
+pci_ahci_resume(struct vmctx *ctx, struct pci_devinst *pi)
+{
+ struct pci_ahci_softc *sc;
+ struct blockif_ctxt *bctxt;
+ int i;
+
+ sc = pi->pi_arg;
+
+ for (i = 0; i < MAX_PORTS; i++) {
+ bctxt = sc->port[i].bctx;
+ if (bctxt == NULL)
+ continue;
+
+ blockif_resume(bctxt);
+ }
+
+ return (0);
+}
+
/*
* Use separate emulation names to distinguish drive and atapi devices
*/
@@ -2453,7 +2744,10 @@
.pe_emu = "ahci",
.pe_init = pci_ahci_hd_init,
.pe_barwrite = pci_ahci_write,
- .pe_barread = pci_ahci_read
+ .pe_barread = pci_ahci_read,
+ .pe_snapshot = pci_ahci_snapshot,
+ .pe_pause = pci_ahci_pause,
+ .pe_resume = pci_ahci_resume,
};
PCI_EMUL_SET(pci_de_ahci);
@@ -2461,7 +2755,10 @@
.pe_emu = "ahci-hd",
.pe_init = pci_ahci_hd_init,
.pe_barwrite = pci_ahci_write,
- .pe_barread = pci_ahci_read
+ .pe_barread = pci_ahci_read,
+ .pe_snapshot = pci_ahci_snapshot,
+ .pe_pause = pci_ahci_pause,
+ .pe_resume = pci_ahci_resume,
};
PCI_EMUL_SET(pci_de_ahci_hd);
@@ -2469,6 +2766,9 @@
.pe_emu = "ahci-cd",
.pe_init = pci_ahci_atapi_init,
.pe_barwrite = pci_ahci_write,
- .pe_barread = pci_ahci_read
+ .pe_barread = pci_ahci_read,
+ .pe_snapshot = pci_ahci_snapshot,
+ .pe_pause = pci_ahci_pause,
+ .pe_resume = pci_ahci_resume,
};
PCI_EMUL_SET(pci_de_ahci_cd);
Index: usr.sbin/bhyve/pci_e82545.c
===================================================================
--- usr.sbin/bhyve/pci_e82545.c
+++ usr.sbin/bhyve/pci_e82545.c
@@ -46,6 +46,8 @@
#ifndef WITHOUT_CAPSICUM
#include <capsicum_helpers.h>
#endif
+#include <machine/vmm_snapshot.h>
+
#include <err.h>
#include <errno.h>
#include <fcntl.h>
@@ -2381,11 +2383,158 @@
return (0);
}
+static int
+e82545_snapshot(struct vm_snapshot_meta *meta)
+{
+ int i;
+ int ret;
+ struct e82545_softc *sc;
+ struct pci_devinst *pi;
+ uint64_t bitmap_value;
+
+ pi = meta->dev_data;
+ sc = pi->pi_arg;
+
+ /* esc_mevp and esc_mevpitr should be reinitiated at init */
+ SNAPSHOT_VAR_OR_LEAVE(sc->esc_mac, meta, ret, done);
+
+ /* General */
+ SNAPSHOT_VAR_OR_LEAVE(sc->esc_CTRL, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->esc_FCAL, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->esc_FCAH, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->esc_FCT, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->esc_VET, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->esc_FCTTV, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->esc_LEDCTL, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->esc_PBA, meta, ret, done);
+
+ /* Interrupt control */
+ SNAPSHOT_VAR_OR_LEAVE(sc->esc_irq_asserted, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->esc_ICR, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->esc_ITR, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->esc_ICS, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->esc_IMS, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->esc_IMC, meta, ret, done);
+
+ /* Transmit */
+ /* The fields in the unions are in superposition to access certain
+ * bytes in the larger uint variables
+ * e.g., ip_config = [ipcss|ipcso|ipcse0|ipcse1]
+ */
+ SNAPSHOT_VAR_OR_LEAVE(sc->esc_txctx.lower_setup.ip_config, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->esc_txctx.upper_setup.tcp_config, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->esc_txctx.cmd_and_length, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->esc_txctx.tcp_seg_setup.data, meta, ret, done);
+
+ SNAPSHOT_VAR_OR_LEAVE(sc->esc_tx_enabled, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->esc_tx_active, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->esc_TXCW, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->esc_TCTL, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->esc_TIPG, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->esc_AIT, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->esc_tdba, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->esc_TDBAL, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->esc_TDBAH, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->esc_TDLEN, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->esc_TDH, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->esc_TDHr, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->esc_TDT, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->esc_TIDV, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->esc_TXDCTL, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->esc_TADV, meta, ret, done);
+
+ /* Has dependency on esc_TDLEN; reoreder of fields from struct */
+ SNAPSHOT_GUEST2HOST_ADDR_OR_LEAVE(sc->esc_txdesc, sc->esc_TDLEN,
+ true, meta, ret, done);
+
+ /* L2 frame acceptance */
+ for (i = 0; i < nitems(sc->esc_uni); i++) {
+ SNAPSHOT_VAR_OR_LEAVE(sc->esc_uni[i].eu_valid, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->esc_uni[i].eu_addrsel, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->esc_uni[i].eu_eth, meta, ret, done);
+ }
+
+ SNAPSHOT_BUF_OR_LEAVE(sc->esc_fmcast, sizeof(sc->esc_fmcast),
+ meta, ret, done);
+ SNAPSHOT_BUF_OR_LEAVE(sc->esc_fvlan, sizeof(sc->esc_fvlan),
+ meta, ret, done);
+
+ /* Receive */
+ SNAPSHOT_VAR_OR_LEAVE(sc->esc_rx_enabled, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->esc_rx_active, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->esc_rx_loopback, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->esc_RCTL, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->esc_FCRTL, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->esc_FCRTH, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->esc_rdba, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->esc_RDBAL, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->esc_RDBAH, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->esc_RDLEN, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->esc_RDH, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->esc_RDT, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->esc_RDTR, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->esc_RXDCTL, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->esc_RADV, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->esc_RSRPD, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->esc_RXCSUM, meta, ret, done);
+
+ /* Has dependency on esc_RDLEN; reoreder of fields from struct */
+ SNAPSHOT_GUEST2HOST_ADDR_OR_LEAVE(sc->esc_rxdesc, sc->esc_TDLEN,
+ true, meta, ret, done);
+
+ /* IO Port register access */
+ SNAPSHOT_VAR_OR_LEAVE(sc->io_addr, meta, ret, done);
+ /* Shadow copy of MDIC */
+ SNAPSHOT_VAR_OR_LEAVE(sc->mdi_control, meta, ret, done);
+ /* Shadow copy of EECD */
+ SNAPSHOT_VAR_OR_LEAVE(sc->eeprom_control, meta, ret, done);
+ /* Latest NVM in/out */
+ SNAPSHOT_VAR_OR_LEAVE(sc->nvm_data, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->nvm_opaddr, meta, ret, done);
+ /* stats */
+ SNAPSHOT_VAR_OR_LEAVE(sc->missed_pkt_count, meta, ret, done);
+ SNAPSHOT_BUF_OR_LEAVE(sc->pkt_rx_by_size, sizeof(sc->pkt_rx_by_size),
+ meta, ret, done);
+ SNAPSHOT_BUF_OR_LEAVE(sc->pkt_tx_by_size, sizeof(sc->pkt_tx_by_size),
+ meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->good_pkt_rx_count, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->bcast_pkt_rx_count, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->mcast_pkt_rx_count, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->good_pkt_tx_count, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->bcast_pkt_tx_count, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->mcast_pkt_tx_count, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->oversize_rx_count, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->tso_tx_count, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->good_octets_rx, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->good_octets_tx, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->missed_octets, meta, ret, done);
+
+ if (meta->op == VM_SNAPSHOT_SAVE)
+ bitmap_value = sc->nvm_bits;
+ SNAPSHOT_VAR_OR_LEAVE(bitmap_value, meta, ret, done);
+ if (meta->op == VM_SNAPSHOT_RESTORE)
+ sc->nvm_bits = bitmap_value;
+
+ if (meta->op == VM_SNAPSHOT_SAVE)
+ bitmap_value = sc->nvm_bits;
+ SNAPSHOT_VAR_OR_LEAVE(bitmap_value, meta, ret, done);
+ if (meta->op == VM_SNAPSHOT_RESTORE)
+ sc->nvm_bits = bitmap_value;
+
+ /* EEPROM data */
+ SNAPSHOT_BUF_OR_LEAVE(sc->eeprom_data, sizeof(sc->eeprom_data),
+ meta, ret, done);
+
+done:
+ return (ret);
+}
+
struct pci_devemu pci_de_e82545 = {
.pe_emu = "e1000",
.pe_init = e82545_init,
.pe_barwrite = e82545_write,
- .pe_barread = e82545_read
+ .pe_barread = e82545_read,
+ .pe_snapshot = e82545_snapshot,
};
PCI_EMUL_SET(pci_de_e82545);
Index: usr.sbin/bhyve/pci_emul.h
===================================================================
--- usr.sbin/bhyve/pci_emul.h
+++ usr.sbin/bhyve/pci_emul.h
@@ -45,6 +45,7 @@
struct vmctx;
struct pci_devinst;
struct memory_region;
+struct vm_snapshot_meta;
struct pci_devemu {
char *pe_emu; /* Name of device emulation */
@@ -71,6 +72,11 @@
uint64_t (*pe_barread)(struct vmctx *ctx, int vcpu,
struct pci_devinst *pi, int baridx,
uint64_t offset, int size);
+
+ /* Save/restore device state */
+ int (*pe_snapshot)(struct vm_snapshot_meta *meta);
+ int (*pe_pause)(struct vmctx *ctx, struct pci_devinst *pi);
+ int (*pe_resume)(struct vmctx *ctx, struct pci_devinst *pi);
};
#define PCI_EMUL_SET(x) DATA_SET(pci_devemu_set, x);
@@ -245,6 +251,9 @@
void pci_write_dsdt(void);
uint64_t pci_ecfg_base(void);
int pci_bus_configured(int bus);
+int pci_snapshot(struct vm_snapshot_meta *meta);
+int pci_pause(struct vmctx *ctx, const char *dev_name);
+int pci_resume(struct vmctx *ctx, const char *dev_name);
static __inline void
pci_set_cfgdata8(struct pci_devinst *pi, int offset, uint8_t val)
Index: usr.sbin/bhyve/pci_emul.c
===================================================================
--- usr.sbin/bhyve/pci_emul.c
+++ usr.sbin/bhyve/pci_emul.c
@@ -45,6 +45,7 @@
#include <stdbool.h>
#include <machine/vmm.h>
+#include <machine/vmm_snapshot.h>
#include <vmmapi.h>
#include "acpi.h"
@@ -1936,6 +1937,187 @@
INOUT_PORT(pci_cfgdata, CONF1_DATA_PORT+2, IOPORT_F_INOUT, pci_emul_cfgdata);
INOUT_PORT(pci_cfgdata, CONF1_DATA_PORT+3, IOPORT_F_INOUT, pci_emul_cfgdata);
+/*
+ * Saves/restores PCI device emulated state. Returns 0 on success.
+ */
+static int
+pci_snapshot_pci_dev(struct vm_snapshot_meta *meta)
+{
+ struct pci_devinst *pi;
+ int i;
+ int ret;
+
+ pi = meta->dev_data;
+
+ SNAPSHOT_VAR_OR_LEAVE(pi->pi_msi.enabled, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(pi->pi_msi.addr, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(pi->pi_msi.msg_data, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(pi->pi_msi.maxmsgnum, meta, ret, done);
+
+ SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.enabled, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.table_bar, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.pba_bar, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.table_offset, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.table_count, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.pba_offset, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.pba_size, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.function_mask, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.pba_page_offset, meta, ret, done);
+
+ SNAPSHOT_BUF_OR_LEAVE(pi->pi_cfgdata, sizeof(pi->pi_cfgdata),
+ meta, ret, done);
+
+ for (i = 0; i < nitems(pi->pi_bar); i++) {
+ SNAPSHOT_VAR_OR_LEAVE(pi->pi_bar[i].type, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(pi->pi_bar[i].size, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(pi->pi_bar[i].addr, meta, ret, done);
+ }
+
+ /* Restore MSI-X table. */
+ for (i = 0; i < pi->pi_msix.table_count; i++) {
+ SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.table[i].addr,
+ meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.table[i].msg_data,
+ meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.table[i].vector_control,
+ meta, ret, done);
+ }
+
+done:
+ return (ret);
+}
+
+static int
+pci_find_slotted_dev(const char *dev_name, struct pci_devemu **pde,
+ struct pci_devinst **pdi)
+{
+ struct businfo *bi;
+ struct slotinfo *si;
+ struct funcinfo *fi;
+ int bus, slot, func;
+
+ assert(dev_name != NULL);
+ assert(pde != NULL);
+ assert(pdi != NULL);
+
+ for (bus = 0; bus < MAXBUSES; bus++) {
+ if ((bi = pci_businfo[bus]) == NULL)
+ continue;
+
+ for (slot = 0; slot < MAXSLOTS; slot++) {
+ si = &bi->slotinfo[slot];
+ for (func = 0; func < MAXFUNCS; func++) {
+ fi = &si->si_funcs[func];
+ if (fi->fi_name == NULL)
+ continue;
+ if (strcmp(dev_name, fi->fi_name))
+ continue;
+
+ *pde = pci_emul_finddev(fi->fi_name);
+ assert(*pde != NULL);
+
+ *pdi = fi->fi_devi;
+ return (0);
+ }
+ }
+ }
+
+ return (EINVAL);
+}
+
+int
+pci_snapshot(struct vm_snapshot_meta *meta)
+{
+ struct pci_devemu *pde;
+ struct pci_devinst *pdi;
+ int ret;
+
+ assert(meta->dev_name != NULL);
+
+ ret = pci_find_slotted_dev(meta->dev_name, &pde, &pdi);
+ if (ret != 0) {
+ fprintf(stderr, "%s: no such name: %s\r\n",
+ __func__, meta->dev_name);
+ memset(meta->buffer.buf_start, 0, meta->buffer.buf_size);
+ return (0);
+ }
+
+ meta->dev_data = pdi;
+
+ if (pde->pe_snapshot == NULL) {
+ fprintf(stderr, "%s: not implemented yet for: %s\r\n",
+ __func__, meta->dev_name);
+ return (-1);
+ }
+
+ ret = pci_snapshot_pci_dev(meta);
+ if (ret != 0) {
+ fprintf(stderr, "%s: failed to snapshot pci dev\r\n",
+ __func__);
+ return (-1);
+ }
+
+ ret = (*pde->pe_snapshot)(meta);
+
+ return (ret);
+}
+
+int
+pci_pause(struct vmctx *ctx, const char *dev_name)
+{
+ struct pci_devemu *pde;
+ struct pci_devinst *pdi;
+ int ret;
+
+ assert(dev_name != NULL);
+
+ ret = pci_find_slotted_dev(dev_name, &pde, &pdi);
+ if (ret != 0) {
+ /* it is possible to call this function without checking that
+ * the device is inserted first
+ */
+ fprintf(stderr, "%s: no such name: %s\n", __func__, dev_name);
+ return (0);
+ }
+
+ if (pde->pe_pause == NULL) {
+ /* The pause/resume functionality is optional */
+ fprintf(stderr, "%s: not implemented for: %s\n",
+ __func__, dev_name);
+ return (0);
+ }
+
+ return (*pde->pe_pause)(ctx, pdi);
+}
+
+int
+pci_resume(struct vmctx *ctx, const char *dev_name)
+{
+ struct pci_devemu *pde;
+ struct pci_devinst *pdi;
+ int ret;
+
+ assert(dev_name != NULL);
+
+ ret = pci_find_slotted_dev(dev_name, &pde, &pdi);
+ if (ret != 0) {
+ /* it is possible to call this function without checking that
+ * the device is inserted first
+ */
+ fprintf(stderr, "%s: no such name: %s\n", __func__, dev_name);
+ return (0);
+ }
+
+ if (pde->pe_resume == NULL) {
+ /* The pause/resume functionality is optional */
+ fprintf(stderr, "%s: not implemented for: %s\n",
+ __func__, dev_name);
+ return (0);
+ }
+
+ return (*pde->pe_resume)(ctx, pdi);
+}
+
#define PCI_EMUL_TEST
#ifdef PCI_EMUL_TEST
/*
@@ -2105,11 +2287,18 @@
return (value);
}
+int
+pci_emul_snapshot(struct vm_snapshot_meta *meta)
+{
+ return (0);
+}
+
struct pci_devemu pci_dummy = {
.pe_emu = "dummy",
.pe_init = pci_emul_dinit,
.pe_barwrite = pci_emul_diow,
- .pe_barread = pci_emul_dior
+ .pe_barread = pci_emul_dior,
+ .pe_snapshot = pci_emul_snapshot,
};
PCI_EMUL_SET(pci_dummy);
Index: usr.sbin/bhyve/pci_fbuf.c
===================================================================
--- usr.sbin/bhyve/pci_fbuf.c
+++ usr.sbin/bhyve/pci_fbuf.c
@@ -35,6 +35,7 @@
#include <sys/mman.h>
#include <machine/vmm.h>
+#include <machine/vmm_snapshot.h>
#include <vmmapi.h>
#include <stdio.h>
@@ -440,10 +441,22 @@
return (error);
}
+static int
+pci_fbuf_snapshot(struct vm_snapshot_meta *meta)
+{
+ int ret;
+
+ SNAPSHOT_BUF_OR_LEAVE(fbuf_sc->fb_base, FB_SIZE, meta, ret, err);
+
+err:
+ return (ret);
+}
+
struct pci_devemu pci_fbuf = {
.pe_emu = "fbuf",
.pe_init = pci_fbuf_init,
.pe_barwrite = pci_fbuf_write,
- .pe_barread = pci_fbuf_read
+ .pe_barread = pci_fbuf_read,
+ .pe_snapshot = pci_fbuf_snapshot,
};
PCI_EMUL_SET(pci_fbuf);
Index: usr.sbin/bhyve/pci_lpc.c
===================================================================
--- usr.sbin/bhyve/pci_lpc.c
+++ usr.sbin/bhyve/pci_lpc.c
@@ -34,6 +34,7 @@
#include <sys/types.h>
#include <machine/vmm.h>
+#include <machine/vmm_snapshot.h>
#include <stdio.h>
#include <stdlib.h>
@@ -451,12 +452,31 @@
pci_set_cfgdata8(lpc_bridge, 0x68 + pin, pirq_read(pin + 5));
}
+static int
+pci_lpc_snapshot(struct vm_snapshot_meta *meta)
+{
+ int unit, ret;
+ struct uart_softc *sc;
+
+ for (unit = 0; unit < LPC_UART_NUM; unit++) {
+ sc = lpc_uart_softc[unit].uart_softc;
+
+ ret = uart_snapshot(sc, meta);
+ if (ret != 0)
+ goto done;
+ }
+
+done:
+ return (ret);
+}
+
struct pci_devemu pci_de_lpc = {
.pe_emu = "lpc",
.pe_init = pci_lpc_init,
.pe_write_dsdt = pci_lpc_write_dsdt,
.pe_cfgwrite = pci_lpc_cfgwrite,
.pe_barwrite = pci_lpc_write,
- .pe_barread = pci_lpc_read
+ .pe_barread = pci_lpc_read,
+ .pe_snapshot = pci_lpc_snapshot,
};
PCI_EMUL_SET(pci_de_lpc);
Index: usr.sbin/bhyve/pci_virtio_block.c
===================================================================
--- usr.sbin/bhyve/pci_virtio_block.c
+++ usr.sbin/bhyve/pci_virtio_block.c
@@ -38,6 +38,8 @@
#include <sys/ioctl.h>
#include <sys/disk.h>
+#include <machine/vmm_snapshot.h>
+
#include <errno.h>
#include <fcntl.h>
#include <stdio.h>
@@ -144,6 +146,9 @@
};
static void pci_vtblk_reset(void *);
+static void pci_vtblk_pause(void *);
+static void pci_vtblk_resume(void *);
+static int pci_vtblk_snapshot(void *, struct vm_snapshot_meta *);
static void pci_vtblk_notify(void *, struct vqueue_info *);
static int pci_vtblk_cfgread(void *, int, int, uint32_t *);
static int pci_vtblk_cfgwrite(void *, int, int, uint32_t);
@@ -158,6 +163,9 @@
pci_vtblk_cfgwrite, /* write PCI config */
NULL, /* apply negotiated features */
VTBLK_S_HOSTCAPS, /* our capabilities */
+ pci_vtblk_pause, /* pause blockif threads */
+ pci_vtblk_resume, /* resume blockif threads */
+ pci_vtblk_snapshot, /* save / restore device state */
};
static void
@@ -169,6 +177,38 @@
vi_reset_dev(&sc->vbsc_vs);
}
+static void
+pci_vtblk_pause(void *vsc)
+{
+ struct pci_vtblk_softc *sc = vsc;
+
+ DPRINTF(("vtblk: device pause requested !\n"));
+ blockif_pause(sc->bc);
+}
+
+static void
+pci_vtblk_resume(void *vsc)
+{
+ struct pci_vtblk_softc *sc = vsc;
+
+ DPRINTF(("vtblk: device resume requested !\n"));
+ blockif_resume(sc->bc);
+}
+
+static int
+pci_vtblk_snapshot(void *vsc, struct vm_snapshot_meta *meta)
+{
+ int ret;
+ struct pci_vtblk_softc *sc = vsc;
+
+ SNAPSHOT_VAR_OR_LEAVE(sc->vbsc_cfg, meta, ret, done);
+ SNAPSHOT_BUF_OR_LEAVE(sc->vbsc_ident, sizeof(sc->vbsc_ident),
+ meta, ret, done);
+
+done:
+ return (ret);
+}
+
static void
pci_vtblk_done(struct blockif_req *br, int err)
{
@@ -408,6 +448,7 @@
.pe_emu = "virtio-blk",
.pe_init = pci_vtblk_init,
.pe_barwrite = vi_pci_write,
- .pe_barread = vi_pci_read
+ .pe_barread = vi_pci_read,
+ .pe_snapshot = vi_pci_snapshot,
};
PCI_EMUL_SET(pci_de_vblk);
Index: usr.sbin/bhyve/pci_virtio_net.c
===================================================================
--- usr.sbin/bhyve/pci_virtio_net.c
+++ usr.sbin/bhyve/pci_virtio_net.c
@@ -49,6 +49,8 @@
#ifndef WITHOUT_CAPSICUM
#include <capsicum_helpers.h>
#endif
+#include <machine/vmm_snapshot.h>
+
#include <err.h>
#include <errno.h>
#include <fcntl.h>
@@ -172,6 +174,9 @@
};
static void pci_vtnet_reset(void *);
+static void pci_vtnet_pause(void *);
+static void pci_vtnet_resume(void *);
+static int pci_vtnet_snapshot(void *, struct vm_snapshot_meta *);
/* static void pci_vtnet_notify(void *, struct vqueue_info *); */
static int pci_vtnet_cfgread(void *, int, int, uint32_t *);
static int pci_vtnet_cfgwrite(void *, int, int, uint32_t);
@@ -187,6 +192,9 @@
pci_vtnet_cfgwrite, /* write PCI config */
pci_vtnet_neg_features, /* apply negotiated features */
VTNET_S_HOSTCAPS, /* our capabilities */
+ pci_vtnet_pause, /* pause rx/tx threads */
+ pci_vtnet_resume, /* resume rx/tx threads */
+ pci_vtnet_snapshot, /* save / restore device state */
};
/*
@@ -247,6 +255,65 @@
sc->resetting = 0;
}
+static void
+pci_vtnet_pause(void *vsc)
+{
+ struct pci_vtnet_softc *sc = vsc;
+
+ DPRINTF(("vtnet: device pause requested !\n"));
+
+ pthread_mutex_lock(&sc->tx_mtx);
+ pthread_mutex_lock(&sc->rx_mtx);
+ sc->resetting = 1;
+ pthread_mutex_unlock(&sc->rx_mtx);
+ pthread_mutex_unlock(&sc->tx_mtx);
+
+ /*
+ * Wait for the transmit and receive threads to finish their
+ * processing.
+ */
+ pci_vtnet_txwait(sc);
+ pci_vtnet_rxwait(sc);
+}
+
+static void
+pci_vtnet_resume(void *vsc)
+{
+ struct pci_vtnet_softc *sc = vsc;
+
+ DPRINTF(("vtnet: device resume requested !\n"));
+
+ pthread_mutex_lock(&sc->tx_mtx);
+ pthread_mutex_lock(&sc->rx_mtx);
+ sc->resetting = 0;
+ pthread_mutex_unlock(&sc->rx_mtx);
+ pthread_mutex_unlock(&sc->tx_mtx);
+}
+
+static int
+pci_vtnet_snapshot(void *vsc, struct vm_snapshot_meta *meta)
+{
+ int ret;
+ struct pci_vtnet_softc *sc = vsc;
+
+ DPRINTF(("vtnet: device snapshot requested !\n"));
+
+ /*
+ * Queues and consts should have been saved by the more generic
+ * vi_pci_snapshot function. We need to save only our features and
+ * config.
+ */
+
+ SNAPSHOT_VAR_OR_LEAVE(sc->vsc_features, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->vsc_config, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->rx_vhdrlen, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->rx_merge, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->vsc_rx_ready, meta, ret, done);
+
+done:
+ return (ret);
+}
+
/*
* Called to send a buffer chain out to the tap device
*/
@@ -990,6 +1057,7 @@
.pe_emu = "virtio-net",
.pe_init = pci_vtnet_init,
.pe_barwrite = vi_pci_write,
- .pe_barread = vi_pci_read
+ .pe_barread = vi_pci_read,
+ .pe_snapshot = vi_pci_snapshot,
};
PCI_EMUL_SET(pci_de_vnet);
Index: usr.sbin/bhyve/pci_xhci.c
===================================================================
--- usr.sbin/bhyve/pci_xhci.c
+++ usr.sbin/bhyve/pci_xhci.c
@@ -48,6 +48,8 @@
#include <pthread.h>
#include <unistd.h>
+#include <machine/vmm_snapshot.h>
+
#include <dev/usb/usbdi.h>
#include <dev/usb/usb.h>
#include <dev/usb/usb_freebsd.h>
@@ -150,6 +152,8 @@
#define FIELD_COPY(a,b,m,s) (((a) & ~((m) << (s))) | \
(((b) & ((m) << (s)))))
+#define SNAP_DEV_NAME_LEN 128
+
struct pci_xhci_trb_ring {
uint64_t ringaddr; /* current dequeue guest address */
uint32_t ccs; /* consumer cycle state */
@@ -285,9 +289,10 @@
#define XHCI_HALTED(sc) ((sc)->opregs.usbsts & XHCI_STS_HCH)
+#define XHCI_GADDR_SIZE(a) (XHCI_PADDR_SZ - \
+ (((uint64_t) (a)) & (XHCI_PADDR_SZ - 1)))
#define XHCI_GADDR(sc,a) paddr_guest2host((sc)->xsc_pi->pi_vmctx, \
- (a), \
- XHCI_PADDR_SZ - ((a) & (XHCI_PADDR_SZ-1)))
+ (a), XHCI_GADDR_SIZE(a))
static int xhci_in_use;
@@ -2827,12 +2832,261 @@
return (error);
}
+static void
+pci_xhci_map_devs_slots(struct pci_xhci_softc *sc, int maps[])
+{
+ int i, j;
+ struct pci_xhci_dev_emu *dev, *slot;
+
+ memset(maps, 0, sizeof(maps[0]) * XHCI_MAX_SLOTS);
+
+ for (i = 1; i <= XHCI_MAX_SLOTS; i++) {
+ for (j = 1; j <= XHCI_MAX_DEVS; j++) {
+ slot = XHCI_SLOTDEV_PTR(sc, i);
+ dev = XHCI_DEVINST_PTR(sc, j);
+
+ if (slot == dev)
+ maps[i] = j;
+ }
+ }
+}
+static int
+pci_xhci_snapshot_ep(struct pci_xhci_softc *sc, struct pci_xhci_dev_emu *dev,
+ int idx, struct vm_snapshot_meta *meta)
+{
+ int k;
+ int ret;
+ struct usb_data_xfer *xfer;
+ struct usb_data_xfer_block *xfer_block;
+
+ /* some sanity checks */
+ if (meta->op == VM_SNAPSHOT_SAVE)
+ xfer = dev->eps[idx].ep_xfer;
+
+ SNAPSHOT_VAR_OR_LEAVE(xfer, meta, ret, done);
+ if (xfer == NULL) {
+ ret = 0;
+ goto done;
+ }
+
+ if (meta->op == VM_SNAPSHOT_RESTORE) {
+ pci_xhci_init_ep(dev, idx);
+ xfer = dev->eps[idx].ep_xfer;
+ }
+
+ /* save / restore proper */
+ for (k = 0; k < USB_MAX_XFER_BLOCKS; k++) {
+ xfer_block = &xfer->data[k];
+
+ SNAPSHOT_GUEST2HOST_ADDR_OR_LEAVE(xfer_block->buf,
+ XHCI_GADDR_SIZE(xfer_block->buf), true, meta, ret,
+ done);
+ SNAPSHOT_VAR_OR_LEAVE(xfer_block->blen, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(xfer_block->bdone, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(xfer_block->processed, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(xfer_block->hci_data, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(xfer_block->ccs, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(xfer_block->streamid, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(xfer_block->trbnext, meta, ret, done);
+ }
+
+ SNAPSHOT_VAR_OR_LEAVE(xfer->ureq, meta, ret, done);
+ if (xfer->ureq) {
+ /* xfer->ureq is not allocated at restore time */
+ if (meta->op == VM_SNAPSHOT_RESTORE)
+ xfer->ureq = malloc(sizeof(struct usb_device_request));
+
+ SNAPSHOT_BUF_OR_LEAVE(xfer->ureq,
+ sizeof(struct usb_device_request),
+ meta, ret, done);
+ }
+
+ SNAPSHOT_VAR_OR_LEAVE(xfer->ndata, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(xfer->head, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(xfer->tail, meta, ret, done);
+
+done:
+ return (ret);
+}
+
+static int
+pci_xhci_snapshot(struct vm_snapshot_meta *meta)
+{
+ int i, j;
+ int ret;
+ int restore_idx;
+ struct pci_devinst *pi;
+ struct pci_xhci_softc *sc;
+ struct pci_xhci_portregs *port;
+ struct pci_xhci_dev_emu *dev;
+ char dname[SNAP_DEV_NAME_LEN];
+ int maps[XHCI_MAX_SLOTS + 1];
+
+ pi = meta->dev_data;
+ sc = pi->pi_arg;
+
+ SNAPSHOT_VAR_OR_LEAVE(sc->caplength, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->hcsparams1, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->hcsparams2, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->hcsparams3, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->hccparams1, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->dboff, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->rtsoff, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->hccparams2, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->regsend, meta, ret, done);
+
+ /* opregs */
+ SNAPSHOT_VAR_OR_LEAVE(sc->opregs.usbcmd, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->opregs.usbsts, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->opregs.pgsz, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->opregs.dnctrl, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->opregs.crcr, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->opregs.dcbaap, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->opregs.config, meta, ret, done);
+
+ /* opregs.cr_p */
+ SNAPSHOT_GUEST2HOST_ADDR_OR_LEAVE(sc->opregs.cr_p,
+ XHCI_GADDR_SIZE(sc->opregs.cr_p), false, meta, ret, done);
+
+ /* opregs.dcbaa_p */
+ SNAPSHOT_GUEST2HOST_ADDR_OR_LEAVE(sc->opregs.dcbaa_p,
+ XHCI_GADDR_SIZE(sc->opregs.dcbaa_p), false, meta, ret, done);
+
+ /* rtsregs */
+ SNAPSHOT_VAR_OR_LEAVE(sc->rtsregs.mfindex, meta, ret, done);
+
+ /* rtsregs.intrreg */
+ SNAPSHOT_VAR_OR_LEAVE(sc->rtsregs.intrreg.iman, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->rtsregs.intrreg.imod, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->rtsregs.intrreg.erstsz, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->rtsregs.intrreg.rsvd, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->rtsregs.intrreg.erstba, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->rtsregs.intrreg.erdp, meta, ret, done);
+
+ /* rtsregs.erstba_p */
+ SNAPSHOT_GUEST2HOST_ADDR_OR_LEAVE(sc->rtsregs.erstba_p,
+ XHCI_GADDR_SIZE(sc->rtsregs.erstba_p), false, meta, ret, done);
+
+ /* rtsregs.erst_p */
+ SNAPSHOT_GUEST2HOST_ADDR_OR_LEAVE(sc->rtsregs.erst_p,
+ XHCI_GADDR_SIZE(sc->rtsregs.erst_p), false, meta, ret, done);
+
+ SNAPSHOT_VAR_OR_LEAVE(sc->rtsregs.er_deq_seg, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->rtsregs.er_enq_idx, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->rtsregs.er_enq_seg, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->rtsregs.er_events_cnt, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->rtsregs.event_pcs, meta, ret, done);
+
+ /* sanity checking */
+ for (i = 1; i <= XHCI_MAX_DEVS; i++) {
+ dev = XHCI_DEVINST_PTR(sc, i);
+ if (dev == NULL)
+ continue;
+
+ if (meta->op == VM_SNAPSHOT_SAVE)
+ restore_idx = i;
+ SNAPSHOT_VAR_OR_LEAVE(restore_idx, meta, ret, done);
+
+ /* check if the restored device (when restoring) is sane */
+ if (restore_idx != i) {
+ fprintf(stderr, "%s: idx not matching: actual: %d, "
+ "expected: %d\r\n", __func__, restore_idx, i);
+ ret = EINVAL;
+ goto done;
+ }
+
+ if (meta->op == VM_SNAPSHOT_SAVE) {
+ memset(dname, 0, sizeof(dname));
+ strncpy(dname, dev->dev_ue->ue_emu, sizeof(dname) - 1);
+ }
+
+ SNAPSHOT_BUF_OR_LEAVE(dname, sizeof(dname), meta, ret, done);
+
+ if (meta->op == VM_SNAPSHOT_RESTORE) {
+ dname[sizeof(dname) - 1] = '\0';
+ if (strcmp(dev->dev_ue->ue_emu, dname)) {
+ fprintf(stderr, "%s: device names mismatch: "
+ "actual: %s, expected: %s\r\n",
+ __func__, dname, dev->dev_ue->ue_emu);
+
+ ret = EINVAL;
+ goto done;
+ }
+ }
+ }
+
+ /* portregs */
+ for (i = 1; i <= XHCI_MAX_DEVS; i++) {
+ port = XHCI_PORTREG_PTR(sc, i);
+ dev = XHCI_DEVINST_PTR(sc, i);
+
+ if (dev == NULL)
+ continue;
+
+ SNAPSHOT_VAR_OR_LEAVE(port->portsc, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(port->portpmsc, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(port->portli, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(port->porthlpmc, meta, ret, done);
+ }
+
+ /* slots */
+ if (meta->op == VM_SNAPSHOT_SAVE)
+ pci_xhci_map_devs_slots(sc, maps);
+
+ for (i = 1; i <= XHCI_MAX_SLOTS; i++) {
+ SNAPSHOT_VAR_OR_LEAVE(maps[i], meta, ret, done);
+
+ if (meta->op == VM_SNAPSHOT_SAVE) {
+ dev = XHCI_SLOTDEV_PTR(sc, i);
+ } else if (meta->op == VM_SNAPSHOT_RESTORE) {
+ if (maps[i] != 0)
+ dev = XHCI_DEVINST_PTR(sc, maps[i]);
+ else
+ dev = NULL;
+
+ XHCI_SLOTDEV_PTR(sc, i) = dev;
+ } else {
+ /* error */
+ ret = EINVAL;
+ goto done;
+ }
+
+ if (dev == NULL)
+ continue;
+
+ SNAPSHOT_GUEST2HOST_ADDR_OR_LEAVE(dev->dev_ctx,
+ XHCI_GADDR_SIZE(dev->dev_ctx), false, meta, ret, done);
+
+ for (j = 1; j < XHCI_MAX_ENDPOINTS; j++) {
+ ret = pci_xhci_snapshot_ep(sc, dev, j, meta);
+ if (ret != 0)
+ goto done;
+ }
+
+ SNAPSHOT_VAR_OR_LEAVE(dev->dev_slotstate, meta, ret, done);
+
+ /* devices[i]->dev_sc */
+ dev->dev_ue->ue_snapshot(dev->dev_sc, meta);
+
+ /* devices[i]->hci */
+ SNAPSHOT_VAR_OR_LEAVE(dev->hci.hci_address, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(dev->hci.hci_port, meta, ret, done);
+ }
+
+ SNAPSHOT_VAR_OR_LEAVE(sc->ndevices, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->usb2_port_start, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->usb3_port_start, meta, ret, done);
+
+done:
+ return (ret);
+}
struct pci_devemu pci_de_xhci = {
.pe_emu = "xhci",
.pe_init = pci_xhci_init,
.pe_barwrite = pci_xhci_write,
- .pe_barread = pci_xhci_read
+ .pe_barread = pci_xhci_read,
+ .pe_snapshot = pci_xhci_snapshot,
};
PCI_EMUL_SET(pci_de_xhci);
Index: usr.sbin/bhyve/ps2kbd.h
===================================================================
--- usr.sbin/bhyve/ps2kbd.h
+++ usr.sbin/bhyve/ps2kbd.h
@@ -32,10 +32,13 @@
#define _PS2KBD_H_
struct atkbdc_softc;
+struct vm_snapshot_meta;
struct ps2kbd_softc *ps2kbd_init(struct atkbdc_softc *sc);
int ps2kbd_read(struct ps2kbd_softc *sc, uint8_t *val);
void ps2kbd_write(struct ps2kbd_softc *sc, uint8_t val);
+int ps2kbd_snapshot(struct ps2kbd_softc *sc, struct vm_snapshot_meta *meta);
+
#endif /* _PS2KBD_H_ */
Index: usr.sbin/bhyve/ps2kbd.c
===================================================================
--- usr.sbin/bhyve/ps2kbd.c
+++ usr.sbin/bhyve/ps2kbd.c
@@ -32,10 +32,13 @@
#include <sys/types.h>
+#include <machine/vmm_snapshot.h>
+
#include <assert.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
+#include <string.h>
#include <strings.h>
#include <pthread.h>
#include <pthread_np.h>
@@ -381,3 +384,14 @@
return (sc);
}
+int
+ps2kbd_snapshot(struct ps2kbd_softc *sc, struct vm_snapshot_meta *meta)
+{
+ int ret;
+
+ SNAPSHOT_VAR_OR_LEAVE(sc->enabled, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->curcmd, meta, ret, done);
+
+done:
+ return (ret);
+}
Index: usr.sbin/bhyve/ps2mouse.h
===================================================================
--- usr.sbin/bhyve/ps2mouse.h
+++ usr.sbin/bhyve/ps2mouse.h
@@ -32,6 +32,7 @@
#define _PS2MOUSE_H_
struct atkbdc_softc;
+struct vm_snapshot_meta;
struct ps2mouse_softc *ps2mouse_init(struct atkbdc_softc *sc);
@@ -40,4 +41,6 @@
void ps2mouse_toggle(struct ps2mouse_softc *sc, int enable);
int ps2mouse_fifocnt(struct ps2mouse_softc *sc);
+int ps2mouse_snapshot(struct ps2mouse_softc *sc, struct vm_snapshot_meta *meta);
+
#endif /* _PS2MOUSE_H_ */
Index: usr.sbin/bhyve/ps2mouse.c
===================================================================
--- usr.sbin/bhyve/ps2mouse.c
+++ usr.sbin/bhyve/ps2mouse.c
@@ -32,10 +32,13 @@
#include <sys/types.h>
+#include <machine/vmm_snapshot.h>
+
#include <assert.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
+#include <string.h>
#include <strings.h>
#include <pthread.h>
#include <pthread_np.h>
@@ -415,4 +418,21 @@
return (sc);
}
-
+int
+ps2mouse_snapshot(struct ps2mouse_softc *sc, struct vm_snapshot_meta *meta)
+{
+ int ret;
+
+ SNAPSHOT_VAR_OR_LEAVE(sc->status, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->resolution, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->sampling_rate, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->ctrlenable, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->curcmd, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->cur_x, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->cur_y, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->delta_x, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->delta_y, meta, ret, done);
+
+done:
+ return (ret);
+}
Index: usr.sbin/bhyve/snapshot.h
===================================================================
--- /dev/null
+++ usr.sbin/bhyve/snapshot.h
@@ -0,0 +1,68 @@
+#ifndef _BHYVE_SNAPSHOT_
+#define _BHYVE_SNAPSHOT_
+
+#include <machine/vmm_snapshot.h>
+#include <libxo/xo.h>
+#include <ucl.h>
+
+struct vmctx;
+
+struct __attribute__((packed)) restore_state {
+ int kdata_fd;
+ int vmmem_fd;
+
+ void *kdata_map;
+ size_t kdata_len;
+
+ size_t vmmem_len;
+
+ struct ucl_parser *meta_parser;
+ ucl_object_t *meta_root_obj;
+};
+
+struct checkpoint_thread_info {
+ struct vmctx *ctx;
+ int socket_fd;
+ struct sockaddr_un *addr;
+} checkpoint_info;
+
+typedef int (*vm_snapshot_dev_cb)(struct vm_snapshot_meta *);
+typedef int (*vm_pause_dev_cb) (struct vmctx *, const char *);
+typedef int (*vm_resume_dev_cb) (struct vmctx *, const char *);
+
+struct vm_snapshot_dev_info {
+ const char *dev_name; /* device name */
+ vm_snapshot_dev_cb snapshot_cb; /* callback for device snapshot */
+ vm_pause_dev_cb pause_cb; /* callback for device pause */
+ vm_resume_dev_cb resume_cb; /* callback for device resume */
+};
+
+struct vm_snapshot_kern_info {
+ const char *struct_name; /* kernel structure name*/
+ enum snapshot_req req; /* request type */
+};
+
+
+void destroy_restore_state(struct restore_state *rstate);
+
+const char * lookup_vmname(struct restore_state *rstate);
+int lookup_memflags(struct restore_state *rstate);
+size_t lookup_memsize(struct restore_state *rstate);
+int lookup_guest_ncpus(struct restore_state *rstate);
+
+
+int restore_vm_mem(struct vmctx *ctx, struct restore_state *rstate);
+int vm_restore_kern_structs(struct vmctx *ctx, struct restore_state *rstate);
+
+int vm_restore_user_devs(struct vmctx *ctx, struct restore_state *rstate);
+int vm_pause_user_devs(struct vmctx *ctx);
+int vm_resume_user_devs(struct vmctx *ctx);
+
+int get_checkpoint_msg(int conn_fd, struct vmctx *ctx);
+void *checkpoint_thread(void *param);
+int init_checkpoint_thread(struct vmctx *ctx);
+
+
+int load_restore_file(const char *filename, struct restore_state *rstate);
+
+#endif
Index: usr.sbin/bhyve/snapshot.c
===================================================================
--- /dev/null
+++ usr.sbin/bhyve/snapshot.c
@@ -0,0 +1,1425 @@
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/types.h>
+#ifndef WITHOUT_CAPSICUM
+#include <sys/capsicum.h>
+#endif
+#include <sys/mman.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/un.h>
+
+#include <machine/atomic.h>
+#include <machine/segments.h>
+
+#ifndef WITHOUT_CAPSICUM
+#include <capsicum_helpers.h>
+#endif
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <err.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <libgen.h>
+#include <unistd.h>
+#include <assert.h>
+#include <errno.h>
+#include <pthread.h>
+#include <pthread_np.h>
+#include <sysexits.h>
+#include <stdbool.h>
+
+#include <machine/vmm.h>
+#ifndef WITHOUT_CAPSICUM
+#include <machine/vmm_dev.h>
+#endif
+#include <machine/vmm_snapshot.h>
+#include <vmmapi.h>
+
+#include "bhyverun.h"
+#include "acpi.h"
+#include "atkbdc.h"
+#include "inout.h"
+#include "dbgport.h"
+#include "fwctl.h"
+#include "ioapic.h"
+#include "mem.h"
+#include "mevent.h"
+#include "mptbl.h"
+#include "pci_emul.h"
+#include "pci_irq.h"
+#include "pci_lpc.h"
+#include "smbiostbl.h"
+#include "snapshot.h"
+#include "xmsr.h"
+#include "spinup_ap.h"
+#include "rtc.h"
+
+#include <libxo/xo.h>
+#include <ucl.h>
+
+extern int guest_ncpus;
+
+#define MB (1024UL * 1024)
+#define GB (1024UL * MB)
+
+#define BHYVE_RUN_DIR "/var/run/bhyve"
+#define CHECKPOINT_RUN_DIR BHYVE_RUN_DIR "/checkpoint"
+#define MAX_VMNAME 100
+
+#define MAX_MSG_SIZE 1024
+
+#define SNAPSHOT_BUFFER_SIZE (20 * MB)
+
+#define JSON_STRUCT_ARR_KEY "structs"
+#define JSON_DEV_ARR_KEY "devices"
+#define JSON_BASIC_METADATA_KEY "basic metadata"
+#define JSON_SNAPSHOT_REQ_KEY "snapshot_req"
+#define JSON_SIZE_KEY "size"
+#define JSON_FILE_OFFSET_KEY "file_offset"
+
+#define JSON_NCPUS_KEY "ncpus"
+#define JSON_VMNAME_KEY "vmname"
+#define JSON_MEMSIZE_KEY "memsize"
+#define JSON_MEMFLAGS_KEY "memflags"
+
+const struct vm_snapshot_dev_info snapshot_devs[] = {
+ { "atkbdc", atkbdc_snapshot, NULL, NULL },
+ { "virtio-net", pci_snapshot, NULL, NULL },
+ { "virtio-blk", pci_snapshot, NULL, NULL },
+ { "lpc", pci_snapshot, NULL, NULL },
+ { "fbuf", pci_snapshot, NULL, NULL },
+ { "xhci", pci_snapshot, NULL, NULL },
+ { "e1000", pci_snapshot, NULL, NULL },
+ { "ahci", pci_snapshot, pci_pause, pci_resume },
+ { "ahci-hd", pci_snapshot, pci_pause, pci_resume },
+ { "ahci-cd", pci_snapshot, NULL, NULL },
+};
+
+const struct vm_snapshot_kern_info snapshot_kern_structs[] = {
+ { "vhpet", STRUCT_VHPET },
+ { "vm", STRUCT_VM },
+ { "vmx", STRUCT_VMX },
+ { "vioapic", STRUCT_VIOAPIC },
+ { "vlapic", STRUCT_VLAPIC },
+ { "vmcx", STRUCT_VMCX },
+ { "vatpit", STRUCT_VATPIT },
+ { "vatpic", STRUCT_VATPIC },
+ { "vpmtmr", STRUCT_VPMTMR },
+ { "vrtc", STRUCT_VRTC },
+};
+
+/*
+ * TODO: Harden this function and all of its callers since 'base_str' is a user
+ * provided string.
+ */
+static char *
+strcat_extension(const char *base_str, const char *ext)
+{
+ char *res;
+ size_t base_len, ext_len;
+
+ base_len = strnlen(base_str, MAX_VMNAME);
+ ext_len = strnlen(ext, MAX_VMNAME);
+
+ if (base_len + ext_len > MAX_VMNAME) {
+ fprintf(stderr, "Filename exceeds maximum length.\n");
+ return (NULL);
+ }
+
+ res = malloc(base_len + ext_len + 1);
+ if (res == NULL) {
+ perror("Failed to allocate memory.");
+ return (NULL);
+ }
+
+ memcpy(res, base_str, base_len);
+ memcpy(res + base_len, ext, ext_len);
+ res[base_len + ext_len] = 0;
+
+ return (res);
+}
+
+void
+destroy_restore_state(struct restore_state *rstate)
+{
+ if (rstate == NULL) {
+ fprintf(stderr, "Attempting to destroy NULL restore struct.\n");
+ return;
+ }
+
+ if (rstate->kdata_map != MAP_FAILED)
+ munmap(rstate->kdata_map, rstate->kdata_len);
+
+ if (rstate->kdata_fd > 0)
+ close(rstate->kdata_fd);
+ if (rstate->vmmem_fd > 0)
+ close(rstate->vmmem_fd);
+
+ if (rstate->meta_root_obj != NULL)
+ ucl_object_unref(rstate->meta_root_obj);
+ if (rstate->meta_parser != NULL)
+ ucl_parser_free(rstate->meta_parser);
+}
+
+static int
+load_vmmem_file(const char *filename, struct restore_state *rstate)
+{
+ struct stat sb;
+ int err;
+
+ rstate->vmmem_fd = open(filename, O_RDONLY);
+ if (rstate->vmmem_fd < 0) {
+ perror("Failed to open restore file");
+ return (-1);
+ }
+
+ err = fstat(rstate->vmmem_fd, &sb);
+ if (err < 0) {
+ perror("Failed to stat restore file");
+ goto err_load_vmmem;
+ }
+
+ if (sb.st_size == 0) {
+ fprintf(stderr, "Restore file is empty.\n");
+ goto err_load_vmmem;
+ }
+
+ rstate->vmmem_len = sb.st_size;
+
+ return (0);
+
+err_load_vmmem:
+ if (rstate->vmmem_fd > 0)
+ close(rstate->vmmem_fd);
+ return (-1);
+}
+
+static int
+load_kdata_file(const char *filename, struct restore_state *rstate)
+{
+ struct stat sb;
+ int err;
+
+ rstate->kdata_fd = open(filename, O_RDONLY);
+ if (rstate->kdata_fd < 0) {
+ perror("Failed to open kernel data file");
+ return (-1);
+ }
+
+ err = fstat(rstate->kdata_fd, &sb);
+ if (err < 0) {
+ perror("Failed to stat kernel data file");
+ goto err_load_kdata;
+ }
+
+ if (sb.st_size == 0) {
+ fprintf(stderr, "Kernel data file is empty.\n");
+ goto err_load_kdata;
+ }
+
+ rstate->kdata_len = sb.st_size;
+ rstate->kdata_map = mmap(NULL, rstate->kdata_len, PROT_READ,
+ MAP_SHARED, rstate->kdata_fd, 0);
+ if (rstate->kdata_map == MAP_FAILED) {
+ perror("Failed to map restore file");
+ goto err_load_kdata;
+ }
+
+ return (0);
+
+err_load_kdata:
+ if (rstate->kdata_fd > 0)
+ close(rstate->kdata_fd);
+ return (-1);
+}
+
+static int
+load_metadata_file(const char *filename, struct restore_state *rstate)
+{
+ const ucl_object_t *obj;
+ struct ucl_parser *parser;
+ int err;
+
+ parser = ucl_parser_new(UCL_PARSER_DEFAULT);
+ if (parser == NULL) {
+ fprintf(stderr, "Failed to initialize UCL parser.\n");
+ goto err_load_metadata;
+ }
+
+ err = ucl_parser_add_file(parser, filename);
+ if (err == 0) {
+ fprintf(stderr, "Failed to parse metadata file: '%s'\n",
+ filename);
+ err = -1;
+ goto err_load_metadata;
+ }
+
+ obj = ucl_parser_get_object(parser);
+ if (obj == NULL) {
+ fprintf(stderr, "Failed to parse object.\n");
+ err = -1;
+ goto err_load_metadata;
+ }
+
+ rstate->meta_parser = parser;
+ rstate->meta_root_obj = (ucl_object_t *)obj;
+
+ return (0);
+
+err_load_metadata:
+ if (parser != NULL)
+ ucl_parser_free(parser);
+ return (err);
+}
+
+int
+load_restore_file(const char *filename, struct restore_state *rstate)
+{
+ int err = 0;
+ char *kdata_filename = NULL, *meta_filename = NULL;
+
+ assert(filename != NULL);
+ assert(rstate != NULL);
+
+ memset(rstate, 0, sizeof(*rstate));
+ rstate->kdata_map = MAP_FAILED;
+
+ err = load_vmmem_file(filename, rstate);
+ if (err != 0) {
+ fprintf(stderr, "Failed to load guest RAM file.\n");
+ goto err_restore;
+ }
+
+ kdata_filename = strcat_extension(filename, ".kern");
+ if (kdata_filename == NULL) {
+ fprintf(stderr, "Failed to construct kernel data filename.\n");
+ goto err_restore;
+ }
+
+ err = load_kdata_file(kdata_filename, rstate);
+ if (err != 0) {
+ fprintf(stderr, "Failed to load guest kernel data file.\n");
+ goto err_restore;
+ }
+
+ meta_filename = strcat_extension(filename, ".meta");
+ if (meta_filename == NULL) {
+ fprintf(stderr, "Failed to construct kernel metadata filename.\n");
+ goto err_restore;
+ }
+
+ err = load_metadata_file(meta_filename, rstate);
+ if (err != 0) {
+ fprintf(stderr, "Failed to load guest metadata file.\n");
+ goto err_restore;
+ }
+
+ return (0);
+
+err_restore:
+ destroy_restore_state(rstate);
+ if (kdata_filename != NULL)
+ free(kdata_filename);
+ if (meta_filename != NULL)
+ free(meta_filename);
+ return (-1);
+}
+
+#define JSON_GET_INT_OR_RETURN(key, obj, result_ptr, ret) \
+do { \
+ const ucl_object_t *obj__; \
+ obj__ = ucl_object_lookup(obj, key); \
+ if (obj__ == NULL) { \
+ fprintf(stderr, "Missing key: '%s'", key); \
+ return (ret); \
+ } \
+ if (!ucl_object_toint_safe(obj__, result_ptr)) { \
+ fprintf(stderr, "Cannot convert '%s' value to int.", key); \
+ return (ret); \
+ } \
+} while(0)
+
+#define JSON_GET_STRING_OR_RETURN(key, obj, result_ptr, ret) \
+do { \
+ const ucl_object_t *obj__; \
+ obj__ = ucl_object_lookup(obj, key); \
+ if (obj__ == NULL) { \
+ fprintf(stderr, "Missing key: '%s'", key); \
+ return (ret); \
+ } \
+ if (!ucl_object_tostring_safe(obj__, result_ptr)) { \
+ fprintf(stderr, "Cannot convert '%s' value to string.", key); \
+ return (ret); \
+ } \
+} while(0)
+
+static void *
+lookup_struct(enum snapshot_req struct_id, struct restore_state *rstate,
+ size_t *struct_size)
+{
+ const ucl_object_t *structs = NULL, *obj = NULL;
+ ucl_object_iter_t it = NULL;
+ int64_t snapshot_req, size, file_offset;
+
+ structs = ucl_object_lookup(rstate->meta_root_obj, JSON_STRUCT_ARR_KEY);
+ if (structs == NULL) {
+ fprintf(stderr, "Failed to find '%s' object.\n",
+ JSON_STRUCT_ARR_KEY);
+ return (NULL);
+ }
+
+ if (ucl_object_type((ucl_object_t *)structs) != UCL_ARRAY) {
+ fprintf(stderr, "Object '%s' is not an array.\n",
+ JSON_STRUCT_ARR_KEY);
+ return (NULL);
+ }
+
+ while ((obj = ucl_object_iterate(structs, &it, true)) != NULL) {
+ snapshot_req = -1;
+ JSON_GET_INT_OR_RETURN(JSON_SNAPSHOT_REQ_KEY, obj,
+ &snapshot_req, NULL);
+ assert(snapshot_req >= 0);
+ if ((enum snapshot_req) snapshot_req == struct_id) {
+ JSON_GET_INT_OR_RETURN(JSON_SIZE_KEY, obj,
+ &size, NULL);
+ assert(size >= 0);
+
+ JSON_GET_INT_OR_RETURN(JSON_FILE_OFFSET_KEY, obj,
+ &file_offset, NULL);
+ assert(file_offset >= 0);
+ assert(file_offset + size <= rstate->kdata_len);
+
+ *struct_size = (size_t)size;
+ return (rstate->kdata_map + file_offset);
+ }
+ }
+
+ return (NULL);
+}
+
+static void *
+lookup_check_dev(const char *dev_name, struct restore_state *rstate,
+ const ucl_object_t *obj, size_t *data_size)
+{
+ const char *snapshot_req;
+ int64_t size, file_offset;
+
+ snapshot_req = NULL;
+ JSON_GET_STRING_OR_RETURN(JSON_SNAPSHOT_REQ_KEY, obj,
+ &snapshot_req, NULL);
+ assert(snapshot_req != NULL);
+ if (!strcmp(snapshot_req, dev_name)) {
+ JSON_GET_INT_OR_RETURN(JSON_SIZE_KEY, obj,
+ &size, NULL);
+ assert(size >= 0);
+
+ JSON_GET_INT_OR_RETURN(JSON_FILE_OFFSET_KEY, obj,
+ &file_offset, NULL);
+ assert(file_offset >= 0);
+ assert(file_offset + size <= rstate->kdata_len);
+
+ *data_size = (size_t)size;
+ return (rstate->kdata_map + file_offset);
+ }
+
+ return (NULL);
+}
+
+static void*
+lookup_dev(const char *dev_name, struct restore_state *rstate,
+ size_t *data_size)
+{
+ const ucl_object_t *devs = NULL, *obj = NULL;
+ ucl_object_iter_t it = NULL;
+ void *ret;
+
+ devs = ucl_object_lookup(rstate->meta_root_obj, JSON_DEV_ARR_KEY);
+ if (devs == NULL) {
+ fprintf(stderr, "Failed to find '%s' object.\n",
+ JSON_DEV_ARR_KEY);
+ return (NULL);
+ }
+
+ if (ucl_object_type((ucl_object_t *)devs) != UCL_ARRAY) {
+ fprintf(stderr, "Object '%s' is not an array.\n",
+ JSON_DEV_ARR_KEY);
+ return (NULL);
+ }
+
+ while ((obj = ucl_object_iterate(devs, &it, true)) != NULL) {
+ ret = lookup_check_dev(dev_name, rstate, obj, data_size);
+ if (ret != NULL)
+ return (ret);
+ }
+
+ return (NULL);
+}
+
+static const ucl_object_t *
+lookup_basic_metadata_object(struct restore_state *rstate)
+{
+ const ucl_object_t *basic_meta_obj = NULL;
+
+ basic_meta_obj = ucl_object_lookup(rstate->meta_root_obj,
+ JSON_BASIC_METADATA_KEY);
+ if (basic_meta_obj == NULL) {
+ fprintf(stderr, "Failed to find '%s' object.\n",
+ JSON_BASIC_METADATA_KEY);
+ return (NULL);
+ }
+
+ if (ucl_object_type((ucl_object_t *)basic_meta_obj) != UCL_OBJECT) {
+ fprintf(stderr, "Object '%s' is not a JSON object.\n",
+ JSON_BASIC_METADATA_KEY);
+ return (NULL);
+ }
+
+ return (basic_meta_obj);
+}
+
+const char *
+lookup_vmname(struct restore_state *rstate)
+{
+ const char *vmname;
+ const ucl_object_t *obj;
+
+ obj = lookup_basic_metadata_object(rstate);
+ if (obj == NULL)
+ return (NULL);
+
+ JSON_GET_STRING_OR_RETURN(JSON_VMNAME_KEY, obj, &vmname, NULL);
+ return (vmname);
+}
+
+int
+lookup_memflags(struct restore_state *rstate)
+{
+ int64_t memflags;
+ const ucl_object_t *obj;
+
+ obj = lookup_basic_metadata_object(rstate);
+ if (obj == NULL)
+ return (0);
+
+ JSON_GET_INT_OR_RETURN(JSON_MEMFLAGS_KEY, obj, &memflags, 0);
+
+ return ((int)memflags);
+}
+
+size_t
+lookup_memsize(struct restore_state *rstate)
+{
+ int64_t memsize;
+ const ucl_object_t *obj;
+
+ obj = lookup_basic_metadata_object(rstate);
+ if (obj == NULL)
+ return (0);
+
+ JSON_GET_INT_OR_RETURN(JSON_MEMSIZE_KEY, obj, &memsize, 0);
+ if (memsize < 0)
+ memsize = 0;
+
+ return ((size_t)memsize);
+}
+
+
+int
+lookup_guest_ncpus(struct restore_state *rstate)
+{
+ int64_t ncpus;
+ const ucl_object_t *obj;
+
+ obj = lookup_basic_metadata_object(rstate);
+ if (obj == NULL)
+ return (0);
+
+ JSON_GET_INT_OR_RETURN(JSON_NCPUS_KEY, obj, &ncpus, 0);
+ return ((int)ncpus);
+}
+
+int
+restore_vm_mem(struct vmctx *ctx, struct restore_state *rstate)
+{
+ return vm_restore_mem(ctx, rstate->vmmem_fd, rstate->vmmem_len);
+}
+
+static int
+vm_restore_kern_struct(struct vmctx *ctx, struct restore_state *rstate,
+ const struct vm_snapshot_kern_info *info)
+{
+ void *struct_ptr;
+ size_t struct_size;
+ int ret;
+ struct vm_snapshot_meta *meta;
+
+ struct_ptr = lookup_struct(info->req, rstate, &struct_size);
+ if (struct_ptr == NULL) {
+ fprintf(stderr, "%s: Failed to lookup struct %s\r\n",
+ __func__, info->struct_name);
+ ret = -1;
+ goto done;
+ }
+
+ if (struct_size == 0) {
+ fprintf(stderr, "%s: Kernel struct size was 0 for: %s\r\n",
+ __func__, info->struct_name);
+ ret = -1;
+ goto done;
+ }
+
+ meta = &(struct vm_snapshot_meta) {
+ .ctx = ctx,
+ .dev_name = info->struct_name,
+ .dev_req = info->req,
+
+ .buffer.buf_start = struct_ptr,
+ .buffer.buf_size = struct_size,
+
+ .buffer.buf = struct_ptr,
+ .buffer.buf_rem = struct_size,
+
+ .op = VM_SNAPSHOT_RESTORE,
+ };
+
+ ret = vm_snapshot_req(meta);
+ if (ret != 0) {
+ fprintf(stderr, "%s: Failed to restore struct: %s\r\n",
+ __func__, info->struct_name);
+ goto done;
+ }
+
+done:
+ return (ret);
+}
+
+int
+vm_restore_kern_structs(struct vmctx *ctx, struct restore_state *rstate)
+{
+ int ret;
+ int i;
+
+ for (i = 0; i < nitems(snapshot_kern_structs); i++) {
+ ret = vm_restore_kern_struct(ctx, rstate,
+ &snapshot_kern_structs[i]);
+ if (ret != 0)
+ return (ret);
+ }
+
+ return (0);
+}
+
+int
+vm_restore_user_dev(struct vmctx *ctx, struct restore_state *rstate,
+ const struct vm_snapshot_dev_info *info)
+{
+ void *dev_ptr;
+ size_t dev_size;
+ int ret;
+ struct vm_snapshot_meta *meta;
+
+ dev_ptr = lookup_dev(info->dev_name, rstate, &dev_size);
+ if (dev_ptr == NULL) {
+ fprintf(stderr, "Failed to lookup dev: %s\r\n", info->dev_name);
+ fprintf(stderr, "Continuing the restore/migration process\r\n");
+ return (0);
+ }
+
+ if (dev_size == 0) {
+ fprintf(stderr, "%s: Device size is 0. "
+ "Assuming %s is not used\r\n",
+ __func__, info->dev_name);
+ return (0);
+ }
+
+ meta = &(struct vm_snapshot_meta) {
+ .ctx = ctx,
+ .dev_name = info->dev_name,
+
+ .buffer.buf_start = dev_ptr,
+ .buffer.buf_size = dev_size,
+
+ .buffer.buf = dev_ptr,
+ .buffer.buf_rem = dev_size,
+
+ .op = VM_SNAPSHOT_RESTORE,
+ };
+
+ ret = (*info->snapshot_cb)(meta);
+ if (ret != 0) {
+ fprintf(stderr, "Failed to restore dev: %s\r\n",
+ info->dev_name);
+ return (-1);
+ }
+
+ return (0);
+}
+
+
+int
+vm_restore_user_devs(struct vmctx *ctx, struct restore_state *rstate)
+{
+ int ret;
+ int i;
+
+ for (i = 0; i < nitems(snapshot_devs); i++) {
+ ret = vm_restore_user_dev(ctx, rstate, &snapshot_devs[i]);
+ if (ret != 0)
+ return (ret);
+ }
+
+ return 0;
+}
+
+int
+vm_pause_user_devs(struct vmctx *ctx)
+{
+ const struct vm_snapshot_dev_info *info;
+ int ret;
+ int i;
+
+ for (i = 0; i < nitems(snapshot_devs); i++) {
+ info = &snapshot_devs[i];
+ if (info->pause_cb == NULL)
+ continue;
+
+ ret = info->pause_cb(ctx, info->dev_name);
+ if (ret != 0)
+ return (ret);
+ }
+
+ return (0);
+}
+
+int
+vm_resume_user_devs(struct vmctx *ctx)
+{
+ const struct vm_snapshot_dev_info *info;
+ int ret;
+ int i;
+
+ for (i = 0; i < nitems(snapshot_devs); i++) {
+ info = &snapshot_devs[i];
+ if (info->resume_cb == NULL)
+ continue;
+
+ ret = info->resume_cb(ctx, info->dev_name);
+ if (ret != 0)
+ return (ret);
+ }
+
+ return (0);
+}
+
+static int
+vm_snapshot_kern_struct(int data_fd, xo_handle_t *xop, const char *array_key,
+ struct vm_snapshot_meta *meta, off_t *offset)
+{
+ int ret;
+ size_t data_size;
+ ssize_t write_cnt;
+
+ ret = vm_snapshot_req(meta);
+ if (ret != 0) {
+ fprintf(stderr, "%s: Failed to snapshot struct %s\r\n",
+ __func__, meta->dev_name);
+ ret = -1;
+ goto done;
+ }
+
+ data_size = vm_get_snapshot_size(meta);
+
+ write_cnt = write(data_fd, meta->buffer.buf_start, data_size);
+ if (write_cnt != data_size) {
+ perror("Failed to write all snapshotted data.");
+ ret = -1;
+ goto done;
+ }
+
+ /* Write metadata. */
+ xo_open_instance_h(xop, array_key);
+ xo_emit_h(xop, "{:debug_name/%s}\n", meta->dev_name);
+ xo_emit_h(xop, "{:" JSON_SNAPSHOT_REQ_KEY "/%d}\n",
+ meta->dev_req);
+ xo_emit_h(xop, "{:" JSON_SIZE_KEY "/%lu}\n", data_size);
+ xo_emit_h(xop, "{:" JSON_FILE_OFFSET_KEY "/%lu}\n", *offset);
+ xo_close_instance_h(xop, JSON_STRUCT_ARR_KEY);
+
+ *offset += data_size;
+
+done:
+ return (ret);
+}
+
+static int
+vm_snapshot_kern_structs(struct vmctx *ctx, int data_fd, xo_handle_t *xop)
+{
+ int ret, i, error;
+ size_t offset, buf_size;
+ char *buffer;
+ struct vm_snapshot_meta *meta;
+
+ error = 0;
+ offset = 0;
+ buf_size = SNAPSHOT_BUFFER_SIZE;
+
+ buffer = malloc(SNAPSHOT_BUFFER_SIZE * sizeof(char));
+ if (buffer == NULL) {
+ error = ENOMEM;
+ perror("Failed to allocate memory for snapshot buffer");
+ goto err_vm_snapshot_kern_data;
+ }
+
+ meta = &(struct vm_snapshot_meta) {
+ .ctx = ctx,
+
+ .buffer.buf_start = buffer,
+ .buffer.buf_size = buf_size,
+
+ .op = VM_SNAPSHOT_SAVE,
+ };
+
+ xo_open_list_h(xop, JSON_STRUCT_ARR_KEY);
+ for (i = 0; i < nitems(snapshot_kern_structs); i++) {
+ meta->dev_name = snapshot_kern_structs[i].struct_name;
+ meta->dev_req = snapshot_kern_structs[i].req;
+
+ memset(meta->buffer.buf_start, 0, meta->buffer.buf_size);
+ meta->buffer.buf = meta->buffer.buf_start;
+ meta->buffer.buf_rem = meta->buffer.buf_size;
+
+ ret = vm_snapshot_kern_struct(data_fd, xop, JSON_DEV_ARR_KEY,
+ meta, &offset);
+ if (ret != 0) {
+ error = -1;
+ goto err_vm_snapshot_kern_data;
+ }
+ }
+ xo_close_list_h(xop, JSON_STRUCT_ARR_KEY);
+
+err_vm_snapshot_kern_data:
+ if (buffer != NULL)
+ free(buffer);
+ return (error);
+}
+
+static int
+vm_snapshot_basic_metadata(struct vmctx *ctx, xo_handle_t *xop)
+{
+ int error;
+ size_t memsize;
+ int memflags;
+ char vmname_buf[MAX_VMNAME];
+
+ memset(vmname_buf, 0, MAX_VMNAME);
+ error = vm_get_name(ctx, vmname_buf, MAX_VMNAME - 1);
+ if (error != 0) {
+ perror("Failed to get VM name");
+ goto err;
+ }
+
+ memsize = vm_get_lowmem_size(ctx) + vm_get_highmem_size(ctx);
+ memflags = vm_get_memflags(ctx);
+
+ xo_open_container_h(xop, JSON_BASIC_METADATA_KEY);
+ xo_emit_h(xop, "{:" JSON_NCPUS_KEY "/%ld}\n", guest_ncpus);
+ xo_emit_h(xop, "{:" JSON_VMNAME_KEY "/%s}\n", vmname_buf);
+ xo_emit_h(xop, "{:" JSON_MEMSIZE_KEY "/%lu}\n", memsize);
+ xo_emit_h(xop, "{:" JSON_MEMFLAGS_KEY "/%d}\n", memflags);
+ xo_close_container_h(xop, JSON_BASIC_METADATA_KEY);
+
+err:
+ return (error);
+}
+
+static int
+vm_snapshot_dev_write_data(int data_fd, xo_handle_t *xop, const char *array_key,
+ struct vm_snapshot_meta *meta, off_t *offset)
+{
+ int ret;
+ size_t data_size;
+
+ data_size = vm_get_snapshot_size(meta);
+
+ ret = write(data_fd, meta->buffer.buf_start, data_size);
+ if (ret != data_size) {
+ perror("Failed to write all snapshotted data.");
+ return (-1);
+ }
+
+ /* Write metadata. */
+ xo_open_instance_h(xop, array_key);
+ xo_emit_h(xop, "{:" JSON_SNAPSHOT_REQ_KEY "/%s}\n", meta->dev_name);
+ xo_emit_h(xop, "{:" JSON_SIZE_KEY "/%lu}\n", data_size);
+ xo_emit_h(xop, "{:" JSON_FILE_OFFSET_KEY "/%lu}\n", *offset);
+ xo_close_instance_h(xop, array_key);
+
+ *offset += data_size;
+
+ return (0);
+}
+
+static int
+vm_snapshot_user_dev(const struct vm_snapshot_dev_info *info,
+ int data_fd, xo_handle_t *xop,
+ struct vm_snapshot_meta *meta, off_t *offset)
+{
+ int ret;
+
+ ret = (*info->snapshot_cb)(meta);
+ if (ret != 0) {
+ fprintf(stderr, "Failed to snapshot %s; ret=%d\r\n",
+ meta->dev_name, ret);
+ return (ret);
+ }
+
+ ret = vm_snapshot_dev_write_data(data_fd, xop, JSON_DEV_ARR_KEY, meta,
+ offset);
+ if (ret != 0)
+ return (ret);
+
+ return (0);
+}
+
+static int
+vm_snapshot_user_devs(struct vmctx *ctx, int data_fd, xo_handle_t *xop)
+{
+ int ret, i;
+ off_t offset;
+ void *buffer;
+ size_t buf_size;
+ struct vm_snapshot_meta *meta;
+
+ buf_size = SNAPSHOT_BUFFER_SIZE;
+
+ offset = lseek(data_fd, 0, SEEK_CUR);
+ if (offset < 0) {
+ perror("Failed to get data file current offset.");
+ return (-1);
+ }
+
+ buffer = malloc(buf_size);
+ if (buffer == NULL) {
+ perror("Failed to allocate memory for snapshot buffer");
+ ret = ENOSPC;
+ goto snapshot_err;
+ }
+
+ meta = &(struct vm_snapshot_meta) {
+ .ctx = ctx,
+
+ .buffer.buf_start = buffer,
+ .buffer.buf_size = buf_size,
+
+ .op = VM_SNAPSHOT_SAVE,
+ };
+
+ xo_open_list_h(xop, JSON_DEV_ARR_KEY);
+
+ /* Restore other devices that support this feature */
+ for (i = 0; i < nitems(snapshot_devs); i++) {
+ meta->dev_name = snapshot_devs[i].dev_name;
+
+ memset(meta->buffer.buf_start, 0, meta->buffer.buf_size);
+ meta->buffer.buf = meta->buffer.buf_start;
+ meta->buffer.buf_rem = meta->buffer.buf_size;
+
+ ret = vm_snapshot_user_dev(&snapshot_devs[i], data_fd, xop,
+ meta, &offset);
+ if (ret != 0)
+ goto snapshot_err;
+ }
+
+ xo_close_list_h(xop, JSON_DEV_ARR_KEY);
+
+snapshot_err:
+ if (buffer != NULL)
+ free(buffer);
+ return (ret);
+}
+
+static int
+vm_mem_write_to_file(int fd, const void *src, size_t dst_offset, size_t len)
+{
+ size_t write_total;
+ ssize_t cnt_write;
+ size_t to_write;
+
+ write_total = 0;
+ to_write = len;
+
+ if (lseek(fd, dst_offset, SEEK_SET) < 0 ) {
+ perror("Failed to changed file offset");
+ return (-1);
+ }
+
+ while (write_total < len) {
+ cnt_write = write(fd, src + write_total, to_write);
+ if (cnt_write < 0) {
+ perror("Failed to write in file");
+ return (-1);
+ }
+ to_write -= cnt_write;
+ write_total += cnt_write;
+ }
+
+ return (0);
+}
+
+static int
+vm_checkpoint(struct vmctx *ctx, char *checkpoint_file, bool stop_vm)
+{
+ int fd_checkpoint = 0, kdata_fd = 0;
+ int ret = 0;
+ int error = 0;
+ size_t guest_lowmem, guest_highmem, guest_memsize;
+ char *guest_baseaddr;
+ char *guest_lowmem_addr, *guest_highmem_addr;
+ xo_handle_t *xop = NULL;
+ char *meta_filename = NULL;
+ char *kdata_filename = NULL;
+ FILE *meta_file = NULL;
+
+ kdata_filename = strcat_extension(checkpoint_file, ".kern");
+ if (kdata_filename == NULL) {
+ fprintf(stderr, "Failed to construct kernel data filename.\n");
+ return (-1);
+ }
+
+ kdata_fd = open(kdata_filename, O_WRONLY | O_CREAT | O_TRUNC, 0700);
+ if (kdata_fd < 0) {
+ perror("Failed to open kernel data snapshot file.");
+ error = -1;
+ goto done;
+ }
+
+ fd_checkpoint = open(checkpoint_file, O_RDWR | O_CREAT | O_TRUNC, 0700);
+
+ if (fd_checkpoint < 0) {
+ perror("Failed to create checkpoint file");
+ error = -1;
+ goto done;
+ }
+
+ ret = vm_get_guestmem_from_ctx(ctx, &guest_baseaddr, &guest_lowmem, &guest_highmem);
+ guest_memsize = guest_lowmem + guest_highmem;
+ if (ret < 0) {
+ fprintf(stderr, "Failed to get guest mem information (base, low, high)\n");
+ error = -1;
+ goto done;
+ }
+
+ /* make space for VMs address space */
+ ret = ftruncate(fd_checkpoint, guest_memsize);
+ if (ret < 0) {
+ perror("Failed to truncate checkpoint file\n");
+ goto done;
+ }
+
+ meta_filename = strcat_extension(checkpoint_file, ".meta");
+ if (meta_filename == NULL) {
+ fprintf(stderr, "Failed to construct vm metadata filename.\n");
+ goto done;
+ }
+
+ meta_file = fopen(meta_filename, "w");
+ if (meta_file == NULL) {
+ perror("Failed to open vm metadata snapshot file.");
+ goto done;
+ }
+
+ xop = xo_create_to_file(meta_file, XO_STYLE_JSON, XOF_PRETTY);
+ if (xop == NULL) {
+ perror("Failed to get libxo handle on metadata file.");
+ goto done;
+ }
+
+ ret = vm_snapshot_basic_metadata(ctx, xop);
+ if (ret != 0) {
+ fprintf(stderr, "Failed to snapshot vm basic metadata.\n");
+ error = -1;
+ goto done;
+ }
+
+ guest_lowmem_addr = guest_baseaddr;
+ if (guest_highmem > 0)
+ guest_highmem_addr = guest_baseaddr + 4*GB;
+
+ ret = vm_pause_user_devs(ctx);
+ if (ret != 0) {
+ fprintf(stderr, "Could not pause devices\r\n");
+ error = ret;
+ goto done;
+ }
+
+ vm_vcpu_lock_all(ctx);
+
+ ret = vm_snapshot_kern_structs(ctx, kdata_fd, xop);
+ if (ret != 0) {
+ fprintf(stderr, "Failed to snapshot vm kernel data.\n");
+ error = -1;
+ goto done_unlock;
+ }
+
+ ret = vm_snapshot_user_devs(ctx, kdata_fd, xop);
+ if (ret != 0) {
+ fprintf(stderr, "Failed to snapshot device state.\n");
+ error = -1;
+ goto done_unlock;
+ }
+
+ if (vm_mem_write_to_file(fd_checkpoint, guest_lowmem_addr,
+ 0, guest_lowmem) != 0) {
+ perror("Could not write lowmem");
+ error = -1;
+ goto done_unlock;
+ }
+
+ if (guest_highmem > 0) {
+ if (vm_mem_write_to_file(fd_checkpoint, guest_highmem_addr,
+ guest_lowmem, guest_highmem) != 0) {
+ perror("Could not write highmem");
+ error = -1;
+ goto done_unlock;
+ }
+ }
+
+ xo_finish_h(xop);
+
+ if (stop_vm) {
+ ret = vm_suspend(ctx, VM_SUSPEND_POWEROFF);
+ if (ret != 0) {
+ fprintf(stderr, "Failed to suspend vm\n");
+ }
+ vm_vcpu_unlock_all(ctx);
+
+ ret = vm_resume_user_devs(ctx);
+ if (ret != 0)
+ fprintf(stderr, "Could not resume devices\r\n");
+
+ /* Wait for CPUs to suspend. TODO: write this properly. */
+ sleep(5);
+ vm_destroy(ctx);
+ exit(0);
+ }
+
+done_unlock:
+ vm_vcpu_unlock_all(ctx);
+done:
+ ret = vm_resume_user_devs(ctx);
+ if (ret != 0)
+ fprintf(stderr, "Could not resume devices\r\n");
+ if (fd_checkpoint > 0)
+ close(fd_checkpoint);
+ if (meta_filename != NULL)
+ free(meta_filename);
+ if (kdata_filename != NULL)
+ free(kdata_filename);
+ if (xop != NULL)
+ xo_destroy(xop);
+ if (meta_file != NULL)
+ fclose(meta_file);
+ if (kdata_fd > 0)
+ close(kdata_fd);
+ return (error);
+}
+
+int get_checkpoint_msg(int conn_fd, struct vmctx *ctx)
+{
+ unsigned char buf[MAX_MSG_SIZE];
+ struct checkpoint_op *checkpoint_op;
+ int len, recv_len, total_recv = 0;
+ int err = 0;
+
+ len = sizeof(struct checkpoint_op); /* expected length */
+ while ((recv_len = recv(conn_fd, buf + total_recv, len - total_recv, 0)) > 0) {
+ total_recv += recv_len;
+ }
+ if (recv_len < 0) {
+ perror("Error while receiving data from bhyvectl");
+ err = -1;
+ goto done;
+ }
+
+ checkpoint_op = (struct checkpoint_op *)buf;
+ switch (checkpoint_op->op) {
+ case START_CHECKPOINT:
+ err = vm_checkpoint(ctx, checkpoint_op->snapshot_filename, false);
+ break;
+ case START_SUSPEND:
+ err = vm_checkpoint(ctx, checkpoint_op->snapshot_filename, true);
+ break;
+ default:
+ fprintf(stderr, "Unrecognized checkpoint operation.\n");
+ err = -1;
+ }
+
+done:
+ close(conn_fd);
+ return (err);
+}
+
+/*
+ * Listen for commands from bhyvectl
+ */
+void * checkpoint_thread(void *param)
+{
+ struct checkpoint_thread_info *thread_info;
+ socklen_t addr_len;
+ int conn_fd, ret;
+
+ thread_info = (struct checkpoint_thread_info *)param;
+
+ addr_len = sizeof(thread_info->addr);
+ while ((conn_fd = accept(thread_info->socket_fd,
+ (struct sockaddr *) thread_info->addr,
+ &addr_len)) > -1) {
+ ret = get_checkpoint_msg(conn_fd, thread_info->ctx);
+ if (ret != 0) {
+ fprintf(stderr, "Failed to read message on checkpoint "
+ "socket. Retrying.\n");
+ }
+
+ addr_len = sizeof(struct sockaddr_un);
+ }
+ if (conn_fd < -1) {
+ perror("Failed to accept connection");
+ }
+
+ return (NULL);
+}
+
+/*
+ * Create directory tree to store runtime specific information:
+ * i.e. UNIX sockets for IPC with bhyvectl.
+ */
+static int
+make_checkpoint_dir()
+{
+ int err;
+
+ err = mkdir(BHYVE_RUN_DIR, 0755);
+ if (err < 0 && errno != EEXIST)
+ return (err);
+
+ err = mkdir(CHECKPOINT_RUN_DIR, 0755);
+ if (err < 0 && errno != EEXIST)
+ return (err);
+
+ return 0;
+}
+
+/*
+ * Create the listening socket for IPC with bhyvectl
+ */
+int
+init_checkpoint_thread(struct vmctx *ctx)
+{
+ struct sockaddr_un addr;
+ int socket_fd;
+ pthread_t checkpoint_pthread;
+ char vmname_buf[MAX_VMNAME];
+ int ret, err = 0;
+
+ socket_fd = socket(PF_UNIX, SOCK_STREAM, 0);
+ if (socket_fd < 0) {
+ perror("Socket creation failed (IPC with bhyvectl");
+ err = -1;
+ goto fail;
+ }
+
+ err = make_checkpoint_dir();
+ if (err < 0) {
+ perror("Failed to create checkpoint runtime directory");
+ goto fail;
+ }
+
+ memset(&addr, 0, sizeof(struct sockaddr_un));
+ addr.sun_family = AF_UNIX;
+
+ err = vm_get_name(ctx, vmname_buf, MAX_VMNAME - 1);
+ if (err != 0) {
+ perror("Failed to get VM name");
+ goto fail;
+ }
+
+ snprintf(addr.sun_path, PATH_MAX, "%s/%s",
+ CHECKPOINT_RUN_DIR, vmname_buf);
+ unlink(addr.sun_path);
+
+ if (bind(socket_fd, (struct sockaddr *)&addr,
+ sizeof(struct sockaddr_un)) != 0) {
+ perror("Failed to bind socket (IPC with bhyvectl)");
+ err = -1;
+ goto fail;
+ }
+
+ if (listen(socket_fd, 10) < 0) {
+ perror("Failed to listen on socket (IPC with bhyvectl)");
+ err = -1;
+ goto fail;
+ }
+
+ memset(&checkpoint_info, 0, sizeof(struct checkpoint_thread_info));
+ checkpoint_info.ctx = ctx;
+ checkpoint_info.socket_fd = socket_fd;
+ checkpoint_info.addr = &addr;
+
+
+ /* TODO: start thread for listening connections */
+ pthread_set_name_np(checkpoint_pthread, "checkpoint thread");
+ ret = pthread_create(&checkpoint_pthread, NULL, checkpoint_thread,
+ &checkpoint_info);
+ if (ret < 0) {
+ err = ret;
+ goto fail;
+ }
+
+ return (0);
+fail:
+ if (socket_fd > 0)
+ close(socket_fd);
+ unlink(addr.sun_path);
+
+ return (err);
+}
+
+void
+vm_snapshot_buf_err(const char *bufname, const enum vm_snapshot_op op)
+{
+ const char *__op;
+
+ if (op == VM_SNAPSHOT_SAVE)
+ __op = "save";
+ else if (op == VM_SNAPSHOT_RESTORE)
+ __op = "restore";
+ else
+ __op = "unknown";
+
+ fprintf(stderr, "%s: snapshot-%s failed for %s\r\n",
+ __func__, __op, bufname);
+}
+
+int
+vm_snapshot_buf(volatile void *data, size_t data_size,
+ struct vm_snapshot_meta *meta)
+{
+ struct vm_snapshot_buffer *buffer;
+ int op;
+
+ buffer = &meta->buffer;
+ op = meta->op;
+
+ if (buffer->buf_rem < data_size) {
+ fprintf(stderr, "%s: buffer too small\r\n", __func__);
+ return (E2BIG);
+ }
+
+ if (op == VM_SNAPSHOT_SAVE)
+ memcpy(buffer->buf, (uint8_t *) data, data_size);
+ else if (op == VM_SNAPSHOT_RESTORE)
+ memcpy((uint8_t *) data, buffer->buf, data_size);
+ else
+ return (EINVAL);
+
+ buffer->buf += data_size;
+ buffer->buf_rem -= data_size;
+
+ return (0);
+}
+
+size_t
+vm_get_snapshot_size(struct vm_snapshot_meta *meta)
+{
+ size_t length;
+ struct vm_snapshot_buffer *buffer;
+
+ buffer = &meta->buffer;
+
+ if (buffer->buf_size < buffer->buf_rem) {
+ fprintf(stderr, "%s: Invalid buffer: size = %zu, rem = %zu\r\n",
+ __func__, buffer->buf_size, buffer->buf_rem);
+ length = 0;
+ } else {
+ length = buffer->buf_size - buffer->buf_rem;
+ }
+
+ return (length);
+}
+
+int
+vm_snapshot_guest2host_addr(void **addrp, size_t len, int restore_null,
+ struct vm_snapshot_meta *meta)
+{
+ int ret;
+ vm_paddr_t gaddr;
+
+ if (meta->op == VM_SNAPSHOT_SAVE) {
+ gaddr = paddr_host2guest(meta->ctx, *addrp);
+ if (gaddr == (vm_paddr_t) -1) {
+ if ((restore_null == false) ||
+ ((restore_null == true) && (*addrp != NULL))) {
+ ret = EFAULT;
+ goto done;
+ }
+ }
+
+ SNAPSHOT_VAR_OR_LEAVE(gaddr, meta, ret, done);
+ } else if (meta->op == VM_SNAPSHOT_RESTORE) {
+ SNAPSHOT_VAR_OR_LEAVE(gaddr, meta, ret, done);
+ if (gaddr == (vm_paddr_t) -1) {
+ if (restore_null == false) {
+ ret = EFAULT;
+ goto done;
+ }
+ }
+
+ *addrp = paddr_guest2host(meta->ctx, gaddr, len);
+ } else {
+ ret = EINVAL;
+ }
+
+done:
+ printf("[%s]: addr: %p, len: %zx, gaddr: %#08lx, ret: %d\r\n",
+ __func__, *addrp, len, gaddr, ret);
+ return (ret);
+}
+
+int
+vm_snapshot_buf_cmp(volatile void *data, size_t data_size,
+ struct vm_snapshot_meta *meta)
+{
+ struct vm_snapshot_buffer *buffer;
+ int op;
+ int ret;
+
+ buffer = &meta->buffer;
+ op = meta->op;
+
+ if (buffer->buf_rem < data_size) {
+ fprintf(stderr, "%s: buffer too small\r\n", __func__);
+ ret = E2BIG;
+ goto done;
+ }
+
+ if (op == VM_SNAPSHOT_SAVE) {
+ ret = 0;
+ memcpy(buffer->buf, (uint8_t *) data, data_size);
+ } else if (op == VM_SNAPSHOT_RESTORE) {
+ ret = memcmp((uint8_t *) data, buffer->buf, data_size);
+ } else {
+ ret = EINVAL;
+ goto done;
+ }
+
+ buffer->buf += data_size;
+ buffer->buf_rem -= data_size;
+
+done:
+ return (ret);
+}
Index: usr.sbin/bhyve/uart_emul.h
===================================================================
--- usr.sbin/bhyve/uart_emul.h
+++ usr.sbin/bhyve/uart_emul.h
@@ -31,10 +31,10 @@
#ifndef _UART_EMUL_H_
#define _UART_EMUL_H_
-
#define UART_IO_BAR_SIZE 8
struct uart_softc;
+struct vm_snapshot_meta;
typedef void (*uart_intr_func_t)(void *arg);
struct uart_softc *uart_init(uart_intr_func_t intr_assert,
@@ -44,4 +44,5 @@
uint8_t uart_read(struct uart_softc *sc, int offset);
void uart_write(struct uart_softc *sc, int offset, uint8_t value);
int uart_set_backend(struct uart_softc *sc, const char *opt);
+int uart_snapshot(struct uart_softc *sc, struct vm_snapshot_meta *meta);
#endif
Index: usr.sbin/bhyve/uart_emul.c
===================================================================
--- usr.sbin/bhyve/uart_emul.c
+++ usr.sbin/bhyve/uart_emul.c
@@ -39,6 +39,8 @@
#include <capsicum_helpers.h>
#endif
+#include <machine/vmm_snapshot.h>
+
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
@@ -699,3 +701,33 @@
return (retval);
}
+
+int
+uart_snapshot(struct uart_softc *sc, struct vm_snapshot_meta *meta)
+{
+ int ret;
+
+ SNAPSHOT_VAR_OR_LEAVE(sc->data, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->ier, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->lcr, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->mcr, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->lsr, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->msr, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->fcr, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->scr, meta, ret, done);
+
+ SNAPSHOT_VAR_OR_LEAVE(sc->dll, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->dlh, meta, ret, done);
+
+ SNAPSHOT_VAR_OR_LEAVE(sc->rxfifo.rindex, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->rxfifo.windex, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->rxfifo.num, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->rxfifo.size, meta, ret, done);
+ SNAPSHOT_BUF_OR_LEAVE(sc->rxfifo.buf, sizeof(sc->rxfifo.buf),
+ meta, ret, done);
+
+ sc->thre_int_pending = 1;
+
+done:
+ return (ret);
+}
Index: usr.sbin/bhyve/usb_emul.h
===================================================================
--- usr.sbin/bhyve/usb_emul.h
+++ usr.sbin/bhyve/usb_emul.h
@@ -41,10 +41,10 @@
#define USB_XFER_IN 1
-
struct usb_hci;
struct usb_device_request;
struct usb_data_xfer;
+struct vm_snapshot_meta;
/* Device emulation handlers */
struct usb_devemu {
@@ -62,6 +62,7 @@
int (*ue_reset)(void *sc);
int (*ue_remove)(void *sc);
int (*ue_stop)(void *sc);
+ int (*ue_snapshot)(void *scarg, struct vm_snapshot_meta *meta);
};
#define USB_EMUL_SET(x) DATA_SET(usb_emu_set, x);
@@ -148,7 +149,6 @@
pthread_mutex_unlock(&((x)->mtx)); \
} while (0)
-
struct usb_devemu *usb_emu_finddev(char *name);
struct usb_data_xfer_block *usb_data_xfer_append(struct usb_data_xfer *xfer,
Index: usr.sbin/bhyve/usb_mouse.c
===================================================================
--- usr.sbin/bhyve/usb_mouse.c
+++ usr.sbin/bhyve/usb_mouse.c
@@ -31,6 +31,8 @@
#include <sys/time.h>
+#include <machine/vmm_snapshot.h>
+
#include <pthread.h>
#include <stdio.h>
#include <stdlib.h>
@@ -787,6 +789,27 @@
return (0);
}
+static int
+umouse_snapshot(void *scarg, struct vm_snapshot_meta *meta)
+{
+ int ret;
+ struct umouse_softc *sc;
+
+ sc = scarg;
+
+ SNAPSHOT_VAR_OR_LEAVE(sc->um_report, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->newdata, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->hid.idle, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->hid.protocol, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->hid.feature, meta, ret, done);
+
+ SNAPSHOT_VAR_OR_LEAVE(sc->polling, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->prev_evt.tv_sec, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->prev_evt.tv_usec, meta, ret, done);
+
+done:
+ return (ret);
+}
struct usb_devemu ue_mouse = {
.ue_emu = "tablet",
@@ -797,6 +820,7 @@
.ue_data = umouse_data_handler,
.ue_reset = umouse_reset,
.ue_remove = umouse_remove,
- .ue_stop = umouse_stop
+ .ue_stop = umouse_stop,
+ .ue_snapshot = umouse_snapshot,
};
USB_EMUL_SET(ue_mouse);
Index: usr.sbin/bhyve/virtio.h
===================================================================
--- usr.sbin/bhyve/virtio.h
+++ usr.sbin/bhyve/virtio.h
@@ -285,6 +285,7 @@
struct vmctx;
struct pci_devinst;
struct vqueue_info;
+struct vm_snapshot_meta;
/*
* A virtual device, with some number (possibly 0) of virtual
@@ -359,6 +360,10 @@
void (*vc_apply_features)(void *, uint64_t);
/* called to apply negotiated features */
uint64_t vc_hv_caps; /* hypervisor-provided capabilities */
+ void (*vc_pause)(void *); /* called to pause device activity */
+ void (*vc_resume)(void *); /* called to resume device activity */
+ int (*vc_snapshot)(void *, struct vm_snapshot_meta *);
+ /* called to save / restore device state */
};
/*
@@ -465,4 +470,7 @@
int baridx, uint64_t offset, int size);
void vi_pci_write(struct vmctx *ctx, int vcpu, struct pci_devinst *pi,
int baridx, uint64_t offset, int size, uint64_t value);
+int vi_pci_snapshot(struct vm_snapshot_meta *meta);
+int vi_pci_pause(struct pci_devinst *pi);
+int vi_pci_resume(struct pci_devinst *pi);
#endif /* _VIRTIO_H_ */
Index: usr.sbin/bhyve/virtio.c
===================================================================
--- usr.sbin/bhyve/virtio.c
+++ usr.sbin/bhyve/virtio.c
@@ -32,6 +32,8 @@
#include <sys/param.h>
#include <sys/uio.h>
+#include <machine/vmm_snapshot.h>
+
#include <stdio.h>
#include <stdint.h>
#include <pthread.h>
@@ -777,3 +779,147 @@
if (vs->vs_mtx)
pthread_mutex_unlock(vs->vs_mtx);
}
+
+int
+vi_pci_pause(struct pci_devinst *pi)
+{
+ struct virtio_softc *vs;
+ struct virtio_consts *vc;
+
+ vs = pi->pi_arg;
+ vc = vs->vs_vc;
+
+ vc = vs->vs_vc;
+ assert(vc->vc_pause != NULL);
+ (*vc->vc_pause)(DEV_SOFTC(vs));
+
+ return (0);
+}
+
+int
+vi_pci_resume(struct pci_devinst *pi)
+{
+ struct virtio_softc *vs;
+ struct virtio_consts *vc;
+
+ vs = pi->pi_arg;
+ vc = vs->vs_vc;
+
+ vc = vs->vs_vc;
+ assert(vc->vc_resume != NULL);
+ (*vc->vc_resume)(DEV_SOFTC(vs));
+
+ return (0);
+}
+
+static int
+vi_pci_snapshot_softc(struct virtio_softc *vs, struct vm_snapshot_meta *meta)
+{
+ int ret;
+
+ SNAPSHOT_VAR_OR_LEAVE(vs->vs_flags, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vs->vs_negotiated_caps, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vs->vs_curq, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vs->vs_status, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vs->vs_isr, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vs->vs_msix_cfg_idx, meta, ret, done);
+
+done:
+ return (ret);
+}
+
+static int
+vi_pci_snapshot_consts(struct virtio_consts *vc, struct vm_snapshot_meta *meta)
+{
+ int ret;
+
+ SNAPSHOT_VAR_CMP_OR_LEAVE(vc->vc_nvq, meta, ret, done);
+ SNAPSHOT_VAR_CMP_OR_LEAVE(vc->vc_cfgsize, meta, ret, done);
+ SNAPSHOT_VAR_CMP_OR_LEAVE(vc->vc_hv_caps, meta, ret, done);
+
+done:
+ return (ret);
+}
+
+static int
+vi_pci_snapshot_queues(struct virtio_softc *vs, struct vm_snapshot_meta *meta)
+{
+ int i;
+ int ret;
+ struct virtio_consts *vc;
+ struct vqueue_info *vq;
+ uint64_t addr_size;
+
+ vc = vs->vs_vc;
+
+ /* Save virtio queue info */
+ for (i = 0; i < vc->vc_nvq; i++) {
+ vq = &vs->vs_queues[i];
+
+ SNAPSHOT_VAR_CMP_OR_LEAVE(vq->vq_qsize, meta, ret, done);
+ SNAPSHOT_VAR_CMP_OR_LEAVE(vq->vq_num, meta, ret, done);
+
+ SNAPSHOT_VAR_OR_LEAVE(vq->vq_flags, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vq->vq_last_avail, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vq->vq_save_used, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vq->vq_msix_idx, meta, ret, done);
+
+ SNAPSHOT_VAR_OR_LEAVE(vq->vq_pfn, meta, ret, done);
+
+ addr_size = vq->vq_qsize * sizeof(struct virtio_desc);
+ SNAPSHOT_GUEST2HOST_ADDR_OR_LEAVE(vq->vq_desc, addr_size,
+ false, meta, ret, done);
+
+ addr_size = (2 + vq->vq_qsize + 1) * sizeof(uint16_t);
+ SNAPSHOT_GUEST2HOST_ADDR_OR_LEAVE(vq->vq_avail, addr_size,
+ false, meta, ret, done);
+
+ addr_size = (2 + 2 * vq->vq_qsize + 1) * sizeof(uint16_t);
+ SNAPSHOT_GUEST2HOST_ADDR_OR_LEAVE(vq->vq_used, addr_size,
+ false, meta, ret, done);
+
+ SNAPSHOT_BUF_OR_LEAVE(vq->vq_desc, vring_size(vq->vq_qsize),
+ meta, ret, done);
+ }
+
+done:
+ return (ret);
+}
+
+int
+vi_pci_snapshot(struct vm_snapshot_meta *meta)
+{
+ int ret;
+ struct pci_devinst *pi;
+ struct virtio_softc *vs;
+ struct virtio_consts *vc;
+
+ pi = meta->dev_data;
+ vs = pi->pi_arg;
+ vc = vs->vs_vc;
+
+ /* Save virtio softc */
+ ret = vi_pci_snapshot_softc(vs, meta);
+ if (ret != 0)
+ goto done;
+
+ /* Save virtio consts */
+ ret = vi_pci_snapshot_consts(vc, meta);
+ if (ret != 0)
+ goto done;
+
+ /* Save virtio queue info */
+ ret = vi_pci_snapshot_queues(vs, meta);
+ if (ret != 0)
+ goto done;
+
+ /* Save device softc, if needed */
+ if (vc->vc_snapshot != NULL) {
+ ret = (*vc->vc_snapshot)(DEV_SOFTC(vs), meta);
+ if (ret != 0)
+ goto done;
+ }
+
+done:
+ return (ret);
+}
Index: usr.sbin/bhyvectl/bhyvectl.c
===================================================================
--- usr.sbin/bhyvectl/bhyvectl.c
+++ usr.sbin/bhyvectl/bhyvectl.c
@@ -57,6 +57,9 @@
#include <machine/vmm_dev.h>
#include <vmmapi.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+
#include "amd/vmcb.h"
#include "intel/vmcs.h"
@@ -67,6 +70,9 @@
#define NO_ARG no_argument
#define OPT_ARG optional_argument
+#define CHECKPOINT_RUN_DIR "/var/run/bhyve/checkpoint"
+#define MAX_VMNAME 100
+
static const char *progname;
static void
@@ -78,6 +84,8 @@
" [--cpu=<vcpu_number>]\n"
" [--create]\n"
" [--destroy]\n"
+ " [--checkpoint=<filename>]\n"
+ " [--suspend=<filename>]\n"
" [--get-all]\n"
" [--get-stats]\n"
" [--set-desc-ds]\n"
@@ -287,6 +295,10 @@
static int unassign_pptdev, bus, slot, func;
static int run;
static int get_cpu_topology;
+static int vm_checkpoint_opt;
+static int vm_suspend_opt;
+static int vcpu_lock_all_opt;
+static int vcpu_unlock_all_opt;
/*
* VMCB specific.
@@ -591,6 +603,8 @@
SET_RTC_TIME,
SET_RTC_NVRAM,
RTC_NVRAM_OFFSET,
+ SET_CHECKPOINT_FILE,
+ SET_SUSPEND_FILE,
};
static void
@@ -1459,6 +1473,10 @@
{ "get-suspended-cpus", NO_ARG, &get_suspended_cpus, 1 },
{ "get-intinfo", NO_ARG, &get_intinfo, 1 },
{ "get-cpu-topology", NO_ARG, &get_cpu_topology, 1 },
+ { "checkpoint", REQ_ARG, 0, SET_CHECKPOINT_FILE},
+ { "suspend", REQ_ARG, 0, SET_SUSPEND_FILE},
+ { "vcpu_lock_all", NO_ARG,&vcpu_lock_all_opt, 1 },
+ { "vcpu_unlock_all", NO_ARG,&vcpu_unlock_all_opt, 1 },
};
const struct option intel_opts[] = {
@@ -1676,6 +1694,80 @@
}
}
+static int
+send_checkpoint_op_req(struct vmctx *ctx, struct checkpoint_op *op)
+{
+ struct sockaddr_un addr;
+ int socket_fd, len, len_sent, total_sent;
+ int err = 0;
+ char vmname_buf[MAX_VMNAME];
+
+ socket_fd = socket(PF_UNIX, SOCK_STREAM, 0);
+ if (socket_fd < 0) {
+ perror("Error creating bhyvectl socket");
+ err = -1;
+ goto done;
+ }
+
+ memset(&addr, 0, sizeof(struct sockaddr_un));
+ addr.sun_family = AF_UNIX;
+
+ err = vm_get_name(ctx, vmname_buf, MAX_VMNAME - 1);
+ if (err != 0) {
+ perror("Failed to get VM name");
+ goto done;
+ }
+
+ snprintf(addr.sun_path, PATH_MAX, "%s/%s", CHECKPOINT_RUN_DIR, vmname_buf);
+
+ if (connect(socket_fd, (struct sockaddr *)&addr,
+ sizeof(struct sockaddr_un)) != 0) {
+ perror("Connect to VM socket failed");
+ err = -1;
+ goto done;
+ }
+
+ len = sizeof(*op);
+ total_sent = 0;
+ while ((len_sent = send(socket_fd, (char *)op + total_sent, len - total_sent, 0)) > 0) {
+ total_sent += len_sent;
+ }
+
+ if (len_sent < 0) {
+ perror("Failed to send checkpoint operation request");
+ err = -1;
+ }
+
+done:
+ if (socket_fd > 0)
+ close(socket_fd);
+ return (err);
+}
+
+static int
+send_start_checkpoint(struct vmctx *ctx, const char *checkpoint_file)
+{
+ struct checkpoint_op op;
+
+ op.op = START_CHECKPOINT;
+ strncpy(op.snapshot_filename, checkpoint_file, MAX_SNAPSHOT_VMNAME);
+ op.snapshot_filename[MAX_SNAPSHOT_VMNAME - 1] = 0;
+
+ return send_checkpoint_op_req(ctx, &op);
+}
+
+static int
+send_start_suspend(struct vmctx *ctx, const char *suspend_file)
+{
+ struct checkpoint_op op;
+
+ op.op = START_SUSPEND;
+ strncpy(op.snapshot_filename, suspend_file, MAX_SNAPSHOT_VMNAME);
+ op.snapshot_filename[MAX_SNAPSHOT_VMNAME - 1] = 0;
+
+ return send_checkpoint_op_req(ctx, &op);
+}
+
int
main(int argc, char *argv[])
{
@@ -1692,6 +1784,7 @@
uint64_t cs, ds, es, fs, gs, ss, tr, ldtr;
struct tm tm;
struct option *opts;
+ char *checkpoint_file, *suspend_file;
cpu_intel = cpu_vendor_intel();
opts = setup_options(cpu_intel);
@@ -1858,6 +1951,14 @@
case ASSERT_LAPIC_LVT:
assert_lapic_lvt = atoi(optarg);
break;
+ case SET_CHECKPOINT_FILE:
+ vm_checkpoint_opt = 1;
+ checkpoint_file = optarg;
+ break;
+ case SET_SUSPEND_FILE:
+ vm_suspend_opt = 1;
+ suspend_file = optarg;
+ break;
default:
usage(cpu_intel);
}
@@ -2343,6 +2444,18 @@
if (!error && destroy)
vm_destroy(ctx);
+ if (!error && vm_checkpoint_opt)
+ error = send_start_checkpoint(ctx, checkpoint_file);
+
+ if (!error && vm_suspend_opt)
+ error = send_start_suspend(ctx, suspend_file);
+
+ if (!error && vcpu_lock_all_opt)
+ error = vm_vcpu_lock_all(ctx);
+
+ if (!error && vcpu_unlock_all_opt)
+ error = vm_vcpu_unlock_all(ctx);
+
free (opts);
exit(error);
}

File Metadata

Mime Type
text/plain
Expires
Tue, Jan 14, 5:45 PM (1 h, 41 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
15798484
Default Alt Text
D19495.id58263.diff (172 KB)

Event Timeline