Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F105838251
D19495.id58265.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
172 KB
Referenced Files
None
Subscribers
None
D19495.id58265.diff
View Options
Index: lib/libvmmapi/vmmapi.h
===================================================================
--- lib/libvmmapi/vmmapi.h
+++ lib/libvmmapi/vmmapi.h
@@ -33,6 +33,7 @@
#include <sys/param.h>
#include <sys/cpuset.h>
+#include <machine/vmm_dev.h>
/*
* API version for out-of-tree consumers like grub-bhyve for making compile
@@ -42,6 +43,7 @@
struct iovec;
struct vmctx;
+struct vm_snapshot_meta;
enum x2apic_state;
/*
@@ -88,6 +90,10 @@
*/
int vm_mmap_getnext(struct vmctx *ctx, vm_paddr_t *gpa, int *segid,
vm_ooffset_t *segoff, size_t *len, int *prot, int *flags);
+
+int vm_get_guestmem_from_ctx(struct vmctx *ctx, char **guest_baseaddr,
+ size_t *lowmem_size, size_t *highmem_size);
+
/*
* Create a device memory segment identified by 'segid'.
*
@@ -110,6 +116,8 @@
int vm_parse_memsize(const char *optarg, size_t *memsize);
int vm_setup_memory(struct vmctx *ctx, size_t len, enum vm_mmap_style s);
void *vm_map_gpa(struct vmctx *ctx, vm_paddr_t gaddr, size_t len);
+/* inverse operation to vm_map_gpa - extract guest address from host pointer */
+vm_paddr_t vm_rev_map_gpa(struct vmctx *ctx, void *addr);
int vm_get_gpa_pmap(struct vmctx *, uint64_t gpa, uint64_t *pte, int *num);
int vm_gla2gpa(struct vmctx *, int vcpuid, struct vm_guest_paging *paging,
uint64_t gla, int prot, uint64_t *gpa, int *fault);
@@ -120,6 +128,7 @@
void vm_set_lowmem_limit(struct vmctx *ctx, uint32_t limit);
void vm_set_memflags(struct vmctx *ctx, int flags);
int vm_get_memflags(struct vmctx *ctx);
+int vm_get_name(struct vmctx *ctx, char *buffer, size_t max_len);
size_t vm_get_lowmem_size(struct vmctx *ctx);
size_t vm_get_highmem_size(struct vmctx *ctx);
int vm_set_desc(struct vmctx *ctx, int vcpu, int reg,
@@ -226,6 +235,8 @@
uint16_t threads, uint16_t maxcpus);
int vm_get_topology(struct vmctx *ctx, uint16_t *sockets, uint16_t *cores,
uint16_t *threads, uint16_t *maxcpus);
+int vm_vcpu_lock_all(struct vmctx *ctx);
+int vm_vcpu_unlock_all(struct vmctx *ctx);
/*
* FreeBSD specific APIs
@@ -237,4 +248,27 @@
uint32_t eip, uint32_t gdtbase,
uint32_t esp);
void vm_setup_freebsd_gdt(uint64_t *gdtr);
+
+/*
+ * Bhyve save-restore
+ */
+
+#define MAX_SNAPSHOT_VMNAME 100
+
+enum checkpoint_opcodes {
+ START_CHECKPOINT = 0,
+ START_SUSPEND = 1,
+};
+
+struct __attribute__((packed)) checkpoint_op {
+ unsigned int op;
+ char snapshot_filename[MAX_SNAPSHOT_VMNAME];
+};
+
+int vm_snapshot_req(struct vm_snapshot_meta *meta);
+
+int vm_restore_time(struct vmctx *ctx);
+
+int vm_restore_mem(struct vmctx *ctx, int vmmem_fd, size_t size);
+
#endif /* _VMMAPI_H_ */
Index: lib/libvmmapi/vmmapi.c
===================================================================
--- lib/libvmmapi/vmmapi.c
+++ lib/libvmmapi/vmmapi.c
@@ -51,8 +51,10 @@
#include <libutil.h>
+#include <vm/vm.h>
#include <machine/vmm.h>
#include <machine/vmm_dev.h>
+#include <machine/vmm_snapshot.h>
#include "vmmapi.h"
@@ -233,6 +235,16 @@
return (error);
}
+int vm_get_guestmem_from_ctx(struct vmctx *ctx, char **guest_baseaddr,
+ size_t *lowmem_size, size_t *highmem_size)
+{
+ *guest_baseaddr = ctx->baseaddr;
+ *lowmem_size = ctx->lowmem;
+ *highmem_size = ctx->highmem;
+
+ return 0;
+}
+
int
vm_mmap_getnext(struct vmctx *ctx, vm_paddr_t *gpa, int *segid,
vm_ooffset_t *segoff, size_t *len, int *prot, int *flags)
@@ -444,6 +456,35 @@
return (NULL);
}
+vm_paddr_t
+vm_rev_map_gpa(struct vmctx *ctx, void *addr)
+{
+ off_t offaddr;
+
+ offaddr = addr - (void *) ctx->baseaddr;
+
+ if (ctx->lowmem > 0)
+ if (offaddr >= 0 && offaddr <= ctx->lowmem)
+ return (offaddr);
+
+ if (ctx->highmem > 0)
+ if (offaddr >= 4*GB && offaddr < 4*GB + ctx->highmem)
+ return (offaddr);
+
+ return ((vm_paddr_t) -1);
+}
+
+/* TODO: maximum size for vmname */
+int
+vm_get_name(struct vmctx *ctx, char *buf, size_t max_len)
+{
+ if (max_len < strlen(ctx->name))
+ return (EINVAL);
+
+ strlcpy(buf, ctx->name, max_len);
+ return (0);
+}
+
size_t
vm_get_lowmem_size(struct vmctx *ctx)
{
@@ -1504,6 +1545,110 @@
return (ioctl(ctx->fd, VM_RESTART_INSTRUCTION, &vcpu));
}
+int vm_vcpu_lock_all(struct vmctx *ctx)
+{
+ return (ioctl(ctx->fd, VM_VCPU_LOCK_ALL));
+}
+
+int vm_vcpu_unlock_all(struct vmctx *ctx)
+{
+ return (ioctl(ctx->fd, VM_VCPU_UNLOCK_ALL));
+}
+
+int
+vm_snapshot_req(struct vm_snapshot_meta *meta)
+{
+ struct vm_snapshot_req req_params;
+ int error;
+
+ bzero(&req_params, sizeof(struct vm_snapshot_req));
+ /* copy metadata header for syscall */
+ memcpy(&req_params.meta, meta, sizeof(req_params.meta));
+
+ error = ioctl(meta->ctx->fd, VM_SNAPSHOT_REQ, &req_params);
+ if (error != 0) {
+ fprintf(stderr, "%s: snapshot failed for %s\r\n",
+ __func__, meta->dev_name);
+ goto done;
+ }
+
+ /* copy results back to metadata header */
+ memcpy(meta, &req_params.meta, sizeof(req_params.meta));
+
+done:
+ return (error);
+}
+
+static int
+vm_mem_read_from_file(int fd, void *dest, size_t file_offset, size_t len)
+{
+ ssize_t cnt_read = 0;
+ size_t read_total = 0;
+ size_t to_read = len;
+
+ if ( lseek(fd, file_offset , SEEK_SET) < 0) {
+ fprintf(stderr,
+ "%s: Could not change file offset errno = %d\r\n",
+ __func__, errno);
+ return (-1);
+ }
+
+ while (read_total < len) {
+ cnt_read = read(fd, dest + read_total, to_read);
+ /* TODO - fix for when read returns 0 */
+ if (cnt_read <= 0) {
+ fprintf(stderr,"%s: read error: %d\r\n",
+ __func__, errno);
+ return (-1);
+ }
+ read_total += cnt_read;
+ to_read -= cnt_read;
+ }
+
+ return (0);
+}
+
+int
+vm_restore_mem(struct vmctx *ctx, int vmmem_fd, size_t size)
+{
+ if (ctx->lowmem + ctx->highmem != size) {
+ fprintf(stderr, "%s: mem size mismatch: %ld vs %ld\n",
+ __func__, ctx->lowmem + ctx->highmem, size);
+ return (-1);
+ }
+
+ if (vm_mem_read_from_file(vmmem_fd, ctx->baseaddr,
+ 0, ctx->lowmem) != 0) {
+ fprintf(stderr,
+ "%s: Could not read lowmem from file\r\n", __func__);
+ return (-1);
+ }
+
+ if (ctx->highmem > 0) {
+ if (vm_mem_read_from_file(vmmem_fd, ctx->baseaddr + 4*GB,
+ ctx->lowmem, ctx->highmem) != 0) {
+
+ fprintf(stderr,
+ "%s: Could not read highmem from file\r\n",
+ __func__);
+ return (-1);
+ }
+ }
+
+ return (0);
+}
+
+int
+vm_restore_time(struct vmctx *ctx)
+{
+ int error, dummy;
+
+ dummy = 0;
+ error = ioctl(ctx->fd, VM_RESTORE_TIME, &dummy);
+
+ return (error);
+}
+
int
vm_set_topology(struct vmctx *ctx,
uint16_t sockets, uint16_t cores, uint16_t threads, uint16_t maxcpus)
Index: sys/amd64/include/vmm.h
===================================================================
--- sys/amd64/include/vmm.h
+++ sys/amd64/include/vmm.h
@@ -34,6 +34,8 @@
#include <sys/sdt.h>
#include <x86/segments.h>
+struct vm_snapshot_meta;
+
#ifdef _KERNEL
SDT_PROVIDER_DECLARE(vmm);
#endif
@@ -130,6 +132,15 @@
struct vm_object;
struct vm_guest_paging;
struct pmap;
+struct vmcx_state;
+enum snapshot_req;
+
+struct mem_seg {
+ size_t len;
+ bool sysmem;
+ struct vm_object *object;
+};
+#define VM_MAX_MEMSEGS 3
struct vm_eventinfo {
void *rptr; /* rendezvous cookie */
@@ -158,6 +169,10 @@
typedef void (*vmi_vmspace_free)(struct vmspace *vmspace);
typedef struct vlapic * (*vmi_vlapic_init)(void *vmi, int vcpu);
typedef void (*vmi_vlapic_cleanup)(void *vmi, struct vlapic *vlapic);
+typedef int (*vmi_snapshot_t)(void *vmi, struct vm_snapshot_meta *meta);
+typedef int (*vmi_snapshot_vmcx_t)(void *vmi, struct vm_snapshot_meta *meta,
+ int vcpu);
+typedef int (*vmi_restore_tsc_t)(void *vmi, int vcpuid, uint64_t now);
struct vmm_ops {
vmm_init_func_t init; /* module wide initialization */
@@ -177,6 +192,11 @@
vmi_vmspace_free vmspace_free;
vmi_vlapic_init vlapic_init;
vmi_vlapic_cleanup vlapic_cleanup;
+
+ /* checkpoint operations */
+ vmi_snapshot_t vmsnapshot;
+ vmi_snapshot_vmcx_t vmcx_snapshot;
+ vmi_restore_tsc_t vm_restore_tsc;
};
extern struct vmm_ops vmm_ops_intel;
@@ -213,6 +233,7 @@
int vm_get_memseg(struct vm *vm, int ident, size_t *len, bool *sysmem,
struct vm_object **objptr);
vm_paddr_t vmm_sysmem_maxaddr(struct vm *vm);
+struct mem_seg * vm_get_memsegs(struct vm *vm);
void *vm_gpa_hold(struct vm *, int vcpuid, vm_paddr_t gpa, size_t len,
int prot, void **cookie);
void vm_gpa_release(void *cookie);
@@ -249,6 +270,9 @@
void vm_exit_rendezvous(struct vm *vm, int vcpuid, uint64_t rip);
void vm_exit_astpending(struct vm *vm, int vcpuid, uint64_t rip);
void vm_exit_reqidle(struct vm *vm, int vcpuid, uint64_t rip);
+int vm_snapshot_req(struct vm *vm, struct vm_snapshot_meta *meta);
+int vm_restore_time(struct vm *vm);
+
#ifdef _SYS__CPUSET_H_
/*
@@ -386,6 +410,15 @@
int vm_get_intinfo(struct vm *vm, int vcpuid, uint64_t *info1, uint64_t *info2);
+/*
+ * Function used to keep track of the guest's TSC offset. The
+ * offset is used by the virutalization extensions to provide a consistent
+ * value for the Time Stamp Counter to the guest.
+ *
+ * Return value is 0 on success and non-zero on failure.
+ */
+int vm_set_tsc_offset(struct vm *vm, int vcpu_id, uint64_t offset);
+
enum vm_reg_name vm_segment_name(int seg_encoding);
struct vm_copyinfo {
Index: sys/amd64/include/vmm_dev.h
===================================================================
--- sys/amd64/include/vmm_dev.h
+++ sys/amd64/include/vmm_dev.h
@@ -31,6 +31,13 @@
#ifndef _VMM_DEV_H_
#define _VMM_DEV_H_
+#include <machine/vmm_snapshot.h>
+#include <sys/_cpuset.h>
+#include <sys/param.h>
+#include <vm/vm.h>
+
+struct vm_snapshot_meta;
+
#ifdef _KERNEL
void vmmdev_init(void);
int vmmdev_cleanup(void);
@@ -233,6 +240,10 @@
uint16_t maxcpus;
};
+struct vm_snapshot_req {
+ struct vm_snapshot_meta meta;
+};
+
enum {
/* general routines */
IOCNUM_ABIVERS = 0,
@@ -241,6 +252,8 @@
IOCNUM_GET_CAPABILITY = 3,
IOCNUM_SUSPEND = 4,
IOCNUM_REINIT = 5,
+ IOCNUM_VCPU_LOCK_ALL = 6,
+ IOCNUM_VCPU_UNLOCK_ALL = 7,
/* memory apis */
IOCNUM_MAP_MEMORY = 10, /* deprecated */
@@ -312,6 +325,11 @@
IOCNUM_RTC_WRITE = 101,
IOCNUM_RTC_SETTIME = 102,
IOCNUM_RTC_GETTIME = 103,
+
+ /* checkpoint */
+ IOCNUM_SNAPSHOT_REQ = 113,
+
+ IOCNUM_RESTORE_TIME = 115
};
#define VM_RUN \
@@ -320,6 +338,10 @@
_IOW('v', IOCNUM_SUSPEND, struct vm_suspend)
#define VM_REINIT \
_IO('v', IOCNUM_REINIT)
+#define VM_VCPU_LOCK_ALL\
+ _IO('v', IOCNUM_VCPU_LOCK_ALL)
+#define VM_VCPU_UNLOCK_ALL\
+ _IO('v', IOCNUM_VCPU_UNLOCK_ALL)
#define VM_ALLOC_MEMSEG \
_IOW('v', IOCNUM_ALLOC_MEMSEG, struct vm_memseg)
#define VM_GET_MEMSEG \
@@ -422,4 +444,8 @@
_IOR('v', IOCNUM_RTC_GETTIME, struct vm_rtc_time)
#define VM_RESTART_INSTRUCTION \
_IOW('v', IOCNUM_RESTART_INSTRUCTION, int)
+#define VM_SNAPSHOT_REQ \
+ _IOWR('v', IOCNUM_SNAPSHOT_REQ, struct vm_snapshot_req)
+#define VM_RESTORE_TIME \
+ _IOWR('v', IOCNUM_RESTORE_TIME, int)
#endif
Index: sys/amd64/include/vmm_snapshot.h
===================================================================
--- /dev/null
+++ sys/amd64/include/vmm_snapshot.h
@@ -0,0 +1,116 @@
+#ifndef _VMM_SNAPSHOT_
+#define _VMM_SNAPSHOT_
+
+#include <sys/errno.h>
+#include <sys/types.h>
+
+struct vmctx;
+
+enum snapshot_req {
+ STRUCT_VMX,
+ STRUCT_VIOAPIC,
+ STRUCT_VM,
+ STRUCT_VLAPIC,
+ VM_MEM,
+ STRUCT_VHPET,
+ STRUCT_VMCX,
+ STRUCT_VATPIC,
+ STRUCT_VATPIT,
+ STRUCT_VPMTMR,
+ STRUCT_VRTC,
+};
+
+struct vm_snapshot_buffer {
+ /*
+ * R/O for device-specific functions;
+ * written by generic snapshot functions.
+ */
+ uint8_t *const buf_start;
+ const size_t buf_size;
+
+ /*
+ * R/W for device-specific functions used to keep track of buffer
+ * current position and remaining size.
+ */
+ uint8_t *buf;
+ size_t buf_rem;
+
+ /*
+ * Length of the snapshot is either determined as (buf_size - buf_rem)
+ * or (buf - buf_start) -- the second variation returns a signed value
+ * so it may not be appropriate.
+ *
+ * Use vm_get_snapshot_size(meta).
+ */
+};
+
+enum vm_snapshot_op {
+ VM_SNAPSHOT_SAVE,
+ VM_SNAPSHOT_RESTORE,
+};
+
+struct vm_snapshot_meta {
+ struct vmctx *ctx;
+ void *dev_data;
+ const char *dev_name; /* identify userspace devices */
+ enum snapshot_req dev_req; /* identify kernel structs */
+
+ struct vm_snapshot_buffer buffer;
+
+ enum vm_snapshot_op op;
+};
+
+
+void vm_snapshot_buf_err(const char *bufname, const enum vm_snapshot_op op);
+int vm_snapshot_buf(volatile void *data, size_t data_size,
+ struct vm_snapshot_meta *meta);
+size_t vm_get_snapshot_size(struct vm_snapshot_meta *meta);
+int vm_snapshot_guest2host_addr(void **addrp, size_t len, int restore_null,
+ struct vm_snapshot_meta *meta);
+int vm_snapshot_buf_cmp(volatile void *data, size_t data_size,
+ struct vm_snapshot_meta *meta);
+
+#define SNAPSHOT_BUF_OR_LEAVE(DATA, LEN, META, RES, LABEL) \
+do { \
+ (RES) = vm_snapshot_buf((DATA), (LEN), (META)); \
+ if ((RES) != 0) { \
+ vm_snapshot_buf_err(#DATA, (META)->op); \
+ goto LABEL; \
+ } \
+} while (0)
+
+#define SNAPSHOT_VAR_OR_LEAVE(DATA, META, RES, LABEL) \
+ SNAPSHOT_BUF_OR_LEAVE(&(DATA), sizeof(DATA), (META), (RES), LABEL)
+
+/*
+ * Address variables are pointers to guest memory.
+ *
+ * When RNULL != 0, do not enforce invalid address checks; instead, make the
+ * pointer NULL at restore time.
+ */
+#define SNAPSHOT_GUEST2HOST_ADDR_OR_LEAVE(ADDR, LEN, RNULL, META, RES, LABEL) \
+do { \
+ (RES) = vm_snapshot_guest2host_addr((void **)&(ADDR), (LEN), (RNULL), \
+ (META)); \
+ if ((RES) != 0) { \
+ if ((RES) == EFAULT) \
+ fprintf(stderr, "%s: invalid address: %s\r\n", \
+ __func__, #ADDR); \
+ goto LABEL; \
+ } \
+} while (0)
+
+/* compare the value in the meta buffer with the data */
+#define SNAPSHOT_BUF_CMP_OR_LEAVE(DATA, LEN, META, RES, LABEL) \
+do { \
+ (RES) = vm_snapshot_buf_cmp((DATA), (LEN), (META)); \
+ if ((RES) != 0) { \
+ vm_snapshot_buf_err(#DATA, (META)->op); \
+ goto LABEL; \
+ } \
+} while (0)
+
+#define SNAPSHOT_VAR_CMP_OR_LEAVE(DATA, META, RES, LABEL) \
+ SNAPSHOT_BUF_CMP_OR_LEAVE(&(DATA), sizeof(DATA), (META), (RES), LABEL)
+
+#endif
Index: sys/amd64/vmm/amd/svm.h
===================================================================
--- sys/amd64/vmm/amd/svm.h
+++ sys/amd64/vmm/amd/svm.h
@@ -32,6 +32,7 @@
#define _SVM_H_
struct pcpu;
+struct svm_softc;
/*
* Guest register state that is saved outside the VMCB.
@@ -66,5 +67,6 @@
};
void svm_launch(uint64_t pa, struct svm_regctx *gctx, struct pcpu *pcpu);
+int svm_set_tsc_offset(struct svm_softc *sc, int vcpu, uint64_t offset);
#endif /* _SVM_H_ */
Index: sys/amd64/vmm/amd/svm.c
===================================================================
--- sys/amd64/vmm/amd/svm.c
+++ sys/amd64/vmm/amd/svm.c
@@ -50,6 +50,7 @@
#include <machine/vmm.h>
#include <machine/vmm_dev.h>
#include <machine/vmm_instruction_emul.h>
+#include <machine/vmm_snapshot.h>
#include "vmm_lapic.h"
#include "vmm_stat.h"
@@ -278,6 +279,23 @@
svm_enable(NULL);
}
+int
+svm_set_tsc_offset(struct svm_softc *sc, int vcpu, uint64_t offset)
+{
+ int error;
+ struct vmcb_ctrl *ctrl;
+
+ ctrl = svm_get_vmcb_ctrl(sc, vcpu);
+ ctrl->tsc_offset = offset;
+
+ svm_set_dirty(sc, vcpu, VMCB_CACHE_I);
+ VCPU_CTR1(sc->vm, vcpu, "tsc offset changed to %#lx", offset);
+
+ error = vm_set_tsc_offset(sc->vm, vcpu, offset);
+
+ return (error);
+}
+
/* Pentium compatible MSRs */
#define MSR_PENTIUM_START 0
#define MSR_PENTIUM_END 0x1FFF
@@ -2198,6 +2216,34 @@
return (EINVAL);
}
+static int
+svm_snapshot_reg(void *arg, int vcpu, int ident,
+ struct vm_snapshot_meta *meta)
+{
+ int ret;
+ uint64_t val;
+
+ if (meta->op == VM_SNAPSHOT_SAVE) {
+ ret = svm_getreg(arg, vcpu, ident, &val);
+ if (ret != 0)
+ goto done;
+
+ SNAPSHOT_VAR_OR_LEAVE(val, meta, ret, done);
+ } else if (meta->op == VM_SNAPSHOT_RESTORE) {
+ SNAPSHOT_VAR_OR_LEAVE(val, meta, ret, done);
+
+ ret = svm_setreg(arg, vcpu, ident, val);
+ if (ret != 0)
+ goto done;
+ } else {
+ ret = EINVAL;
+ goto done;
+ }
+
+done:
+ return (ret);
+}
+
static int
svm_setcap(void *arg, int vcpu, int type, int val)
{
@@ -2280,6 +2326,304 @@
free(vlapic, M_SVM_VLAPIC);
}
+static int
+svm_snapshot_vmi(void *arg, struct vm_snapshot_meta *meta)
+{
+ /* struct svm_softc is AMD's representation for SVM softc */
+ struct svm_softc *sc;
+ struct svm_vcpu *vcpu;
+ struct vmcb *vmcb;
+ uint64_t val;
+ int i;
+ int ret;
+
+ sc = arg;
+
+ KASSERT(sc != NULL, ("%s: arg was NULL", __func__));
+
+ SNAPSHOT_VAR_OR_LEAVE(sc->nptp, meta, ret, done);
+
+ for (i = 0; i < VM_MAXCPU; i++) {
+ vcpu = &sc->vcpu[i];
+ vmcb = &vcpu->vmcb;
+
+ /* VMCB fields for virtual cpu i */
+ SNAPSHOT_VAR_OR_LEAVE(vmcb->ctrl.v_tpr, meta, ret, done);
+ val = vmcb->ctrl.v_tpr;
+ SNAPSHOT_VAR_OR_LEAVE(val, meta, ret, done);
+ vmcb->ctrl.v_tpr = val;
+
+ SNAPSHOT_VAR_OR_LEAVE(vmcb->ctrl.asid, meta, ret, done);
+ val = vmcb->ctrl.np_enable;
+ SNAPSHOT_VAR_OR_LEAVE(val, meta, ret, done);
+ vmcb->ctrl.np_enable = val;
+
+ val = vmcb->ctrl.intr_shadow;
+ SNAPSHOT_VAR_OR_LEAVE(val, meta, ret, done);
+ vmcb->ctrl.intr_shadow = val;
+ SNAPSHOT_VAR_OR_LEAVE(vmcb->ctrl.tlb_ctrl, meta, ret, done);
+
+ SNAPSHOT_BUF_OR_LEAVE(vmcb->state.pad1,
+ sizeof(vmcb->state.pad1),
+ meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vmcb->state.cpl, meta, ret, done);
+ SNAPSHOT_BUF_OR_LEAVE(vmcb->state.pad2,
+ sizeof(vmcb->state.pad2),
+ meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vmcb->state.efer, meta, ret, done);
+ SNAPSHOT_BUF_OR_LEAVE(vmcb->state.pad3,
+ sizeof(vmcb->state.pad3),
+ meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vmcb->state.cr4, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vmcb->state.cr3, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vmcb->state.cr0, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vmcb->state.dr7, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vmcb->state.dr6, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vmcb->state.rflags, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vmcb->state.rip, meta, ret, done);
+ SNAPSHOT_BUF_OR_LEAVE(vmcb->state.pad4,
+ sizeof(vmcb->state.pad4),
+ meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vmcb->state.rsp, meta, ret, done);
+ SNAPSHOT_BUF_OR_LEAVE(vmcb->state.pad5,
+ sizeof(vmcb->state.pad5),
+ meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vmcb->state.rax, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vmcb->state.star, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vmcb->state.lstar, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vmcb->state.cstar, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vmcb->state.sfmask, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vmcb->state.kernelgsbase,
+ meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vmcb->state.sysenter_cs, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vmcb->state.sysenter_esp,
+ meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vmcb->state.sysenter_eip,
+ meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vmcb->state.cr2, meta, ret, done);
+ SNAPSHOT_BUF_OR_LEAVE(vmcb->state.pad6,
+ sizeof(vmcb->state.pad6),
+ meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vmcb->state.g_pat, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vmcb->state.dbgctl, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vmcb->state.br_from, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vmcb->state.br_to, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vmcb->state.int_from, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vmcb->state.int_to, meta, ret, done);
+ SNAPSHOT_BUF_OR_LEAVE(vmcb->state.pad7,
+ sizeof(vmcb->state.pad7),
+ meta, ret, done);
+
+ /* Snapshot swctx for virtual cpu i */
+ SNAPSHOT_VAR_OR_LEAVE(vcpu->swctx.sctx_rbp, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vcpu->swctx.sctx_rbx, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vcpu->swctx.sctx_rcx, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vcpu->swctx.sctx_rdx, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vcpu->swctx.sctx_rdi, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vcpu->swctx.sctx_rsi, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vcpu->swctx.sctx_r8, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vcpu->swctx.sctx_r9, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vcpu->swctx.sctx_r10, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vcpu->swctx.sctx_r11, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vcpu->swctx.sctx_r12, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vcpu->swctx.sctx_r13, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vcpu->swctx.sctx_r14, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vcpu->swctx.sctx_r15, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vcpu->swctx.sctx_dr0, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vcpu->swctx.sctx_dr1, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vcpu->swctx.sctx_dr2, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vcpu->swctx.sctx_dr3, meta, ret, done);
+
+ SNAPSHOT_VAR_OR_LEAVE(vcpu->swctx.host_dr0, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vcpu->swctx.host_dr1, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vcpu->swctx.host_dr2, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vcpu->swctx.host_dr3, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vcpu->swctx.host_dr6, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vcpu->swctx.host_dr7, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vcpu->swctx.host_debugctl, meta, ret,
+ done);
+
+ /* Restore other svm_vcpu struct fields */
+
+ /* Restore NEXTRIP field */
+ SNAPSHOT_VAR_OR_LEAVE(vcpu->nextrip, meta, ret, done);
+
+ /* Restore lastcpu field */
+ SNAPSHOT_VAR_OR_LEAVE(vcpu->lastcpu, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vcpu->dirty, meta, ret, done);
+
+ /* Restore EPTGEN field - EPT is Extended Page Tabel */
+ SNAPSHOT_VAR_OR_LEAVE(vcpu->eptgen, meta, ret, done);
+
+ SNAPSHOT_VAR_OR_LEAVE(vcpu->asid.gen, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vcpu->asid.num, meta, ret, done);
+
+ /* Set all caches dirty */
+ if (meta->op == VM_SNAPSHOT_RESTORE) {
+ svm_set_dirty(sc, i, VMCB_CACHE_ASID);
+ svm_set_dirty(sc, i, VMCB_CACHE_IOPM);
+ svm_set_dirty(sc, i, VMCB_CACHE_I);
+ svm_set_dirty(sc, i, VMCB_CACHE_TPR);
+ svm_set_dirty(sc, i, VMCB_CACHE_CR2);
+ svm_set_dirty(sc, i, VMCB_CACHE_CR);
+ svm_set_dirty(sc, i, VMCB_CACHE_DT);
+ svm_set_dirty(sc, i, VMCB_CACHE_SEG);
+ svm_set_dirty(sc, i, VMCB_CACHE_NP);
+ }
+ }
+
+ if (meta->op == VM_SNAPSHOT_RESTORE)
+ flush_by_asid();
+
+done:
+ return (ret);
+}
+
+static int
+svm_snapshot_vmcx(void *arg, struct vm_snapshot_meta *meta, int vcpu)
+{
+ struct vmcb *vmcb;
+ struct svm_softc *sc;
+ int err, running, hostcpu;
+
+ sc = (struct svm_softc *)arg;
+ err = 0;
+
+ KASSERT(arg != NULL, ("%s: arg was NULL", __func__));
+ vmcb = svm_get_vmcb(sc, vcpu);
+
+ running = vcpu_is_running(sc->vm, vcpu, &hostcpu);
+ if (running && hostcpu !=curcpu) {
+ printf("%s: %s%d is running", __func__, vm_name(sc->vm), vcpu);
+ return (EINVAL);
+ }
+
+ err += svm_snapshot_reg(sc, vcpu, VM_REG_GUEST_CR0, meta);
+ err += svm_snapshot_reg(sc, vcpu, VM_REG_GUEST_CR2, meta);
+ err += svm_snapshot_reg(sc, vcpu, VM_REG_GUEST_CR3, meta);
+ err += svm_snapshot_reg(sc, vcpu, VM_REG_GUEST_CR4, meta);
+
+ err += svm_snapshot_reg(sc, vcpu, VM_REG_GUEST_DR7, meta);
+
+ err += svm_snapshot_reg(sc, vcpu, VM_REG_GUEST_RAX, meta);
+
+ err += svm_snapshot_reg(sc, vcpu, VM_REG_GUEST_RSP, meta);
+ err += svm_snapshot_reg(sc, vcpu, VM_REG_GUEST_RIP, meta);
+ err += svm_snapshot_reg(sc, vcpu, VM_REG_GUEST_RFLAGS, meta);
+
+ /* Guest segments */
+ /* ES */
+ err += svm_snapshot_reg(sc, vcpu, VM_REG_GUEST_ES, meta);
+ err += vmcb_snapshot_desc(sc, vcpu, VM_REG_GUEST_ES, meta);
+
+ /* CS */
+ err += svm_snapshot_reg(sc, vcpu, VM_REG_GUEST_CS, meta);
+ err += vmcb_snapshot_desc(sc, vcpu, VM_REG_GUEST_CS, meta);
+
+ /* SS */
+ err += svm_snapshot_reg(sc, vcpu, VM_REG_GUEST_SS, meta);
+ err += vmcb_snapshot_desc(sc, vcpu, VM_REG_GUEST_SS, meta);
+
+ /* DS */
+ err += svm_snapshot_reg(sc, vcpu, VM_REG_GUEST_DS, meta);
+ err += vmcb_snapshot_desc(sc, vcpu, VM_REG_GUEST_DS, meta);
+
+ /* FS */
+ err += svm_snapshot_reg(sc, vcpu, VM_REG_GUEST_FS, meta);
+ err += vmcb_snapshot_desc(sc, vcpu, VM_REG_GUEST_FS, meta);
+
+ /* GS */
+ err += svm_snapshot_reg(sc, vcpu, VM_REG_GUEST_GS, meta);
+ err += vmcb_snapshot_desc(sc, vcpu, VM_REG_GUEST_GS, meta);
+
+ /* TR */
+ err += svm_snapshot_reg(sc, vcpu, VM_REG_GUEST_TR, meta);
+ err += vmcb_snapshot_desc(sc, vcpu, VM_REG_GUEST_TR, meta);
+
+ /* LDTR */
+ err += svm_snapshot_reg(sc, vcpu, VM_REG_GUEST_LDTR, meta);
+ err += vmcb_snapshot_desc(sc, vcpu, VM_REG_GUEST_LDTR, meta);
+
+ /* EFER */
+ err += svm_snapshot_reg(sc, vcpu, VM_REG_GUEST_EFER, meta);
+
+ /* IDTR and GDTR */
+ err += vmcb_snapshot_desc(sc, vcpu, VM_REG_GUEST_IDTR, meta);
+ err += vmcb_snapshot_desc(sc, vcpu, VM_REG_GUEST_GDTR, meta);
+
+ /* Specific AMD registers */
+ err += vmcb_snapshot_any(sc, vcpu,
+ VMCB_ACCESS(VMCB_OFF_SYSENTER_CS, 8), meta);
+ err += vmcb_snapshot_any(sc, vcpu,
+ VMCB_ACCESS(VMCB_OFF_SYSENTER_ESP, 8), meta);
+ err += vmcb_snapshot_any(sc, vcpu,
+ VMCB_ACCESS(VMCB_OFF_SYSENTER_EIP, 8), meta);
+
+ err += vmcb_snapshot_any(sc, vcpu,
+ VMCB_ACCESS(VMCB_OFF_NPT_BASE, 8), meta);
+
+ err += vmcb_snapshot_any(sc, vcpu,
+ VMCB_ACCESS(VMCB_OFF_CR_INTERCEPT, 4), meta);
+ err += vmcb_snapshot_any(sc, vcpu,
+ VMCB_ACCESS(VMCB_OFF_DR_INTERCEPT, 4), meta);
+ err += vmcb_snapshot_any(sc, vcpu,
+ VMCB_ACCESS(VMCB_OFF_EXC_INTERCEPT, 4), meta);
+ err += vmcb_snapshot_any(sc, vcpu,
+ VMCB_ACCESS(VMCB_OFF_INST1_INTERCEPT, 4), meta);
+ err += vmcb_snapshot_any(sc, vcpu,
+ VMCB_ACCESS(VMCB_OFF_INST2_INTERCEPT, 4), meta);
+
+ err += vmcb_snapshot_any(sc, vcpu,
+ VMCB_ACCESS(VMCB_OFF_TLB_CTRL, 4), meta);
+
+ err += vmcb_snapshot_any(sc, vcpu,
+ VMCB_ACCESS(VMCB_OFF_EXITINFO1, 8), meta);
+ err += vmcb_snapshot_any(sc, vcpu,
+ VMCB_ACCESS(VMCB_OFF_EXITINFO2, 8), meta);
+ err += vmcb_snapshot_any(sc, vcpu,
+ VMCB_ACCESS(VMCB_OFF_EXITINTINFO, 8), meta);
+
+ err += vmcb_snapshot_any(sc, vcpu,
+ VMCB_ACCESS(VMCB_OFF_VIRQ, 8), meta);
+
+ err += vmcb_snapshot_any(sc, vcpu,
+ VMCB_ACCESS(VMCB_OFF_GUEST_PAT, 8), meta);
+
+ err += vmcb_snapshot_any(sc, vcpu,
+ VMCB_ACCESS(VMCB_OFF_AVIC_BAR, 8), meta);
+ err += vmcb_snapshot_any(sc, vcpu,
+ VMCB_ACCESS(VMCB_OFF_AVIC_PAGE, 8), meta);
+ err += vmcb_snapshot_any(sc, vcpu,
+ VMCB_ACCESS(VMCB_OFF_AVIC_LT, 8), meta);
+ err += vmcb_snapshot_any(sc, vcpu,
+ VMCB_ACCESS(VMCB_OFF_AVIC_PT, 8), meta);
+
+ err += vmcb_snapshot_any(sc, vcpu,
+ VMCB_ACCESS(VMCB_OFF_IO_PERM, 8), meta);
+ err += vmcb_snapshot_any(sc, vcpu,
+ VMCB_ACCESS(VMCB_OFF_MSR_PERM, 8), meta);
+
+ err += vmcb_snapshot_any(sc, vcpu,
+ VMCB_ACCESS(VMCB_OFF_ASID, 4), meta);
+
+ err += vmcb_snapshot_any(sc, vcpu,
+ VMCB_ACCESS(VMCB_OFF_EXIT_REASON, 8), meta);
+
+ err += svm_snapshot_reg(sc, vcpu, VM_REG_GUEST_INTR_SHADOW, meta);
+
+ return (err);
+}
+
+static int
+svm_restore_tsc(void *arg, int vcpu, uint64_t offset)
+{
+ int err;
+
+ err = svm_set_tsc_offset(arg, vcpu, offset);
+
+ return (err);
+}
+
struct vmm_ops vmm_ops_amd = {
svm_init,
svm_cleanup,
@@ -2296,5 +2640,8 @@
svm_npt_alloc,
svm_npt_free,
svm_vlapic_init,
- svm_vlapic_cleanup
+ svm_vlapic_cleanup,
+ svm_snapshot_vmi,
+ svm_snapshot_vmcx,
+ svm_restore_tsc,
};
Index: sys/amd64/vmm/amd/svm_msr.c
===================================================================
--- sys/amd64/vmm/amd/svm_msr.c
+++ sys/amd64/vmm/amd/svm_msr.c
@@ -162,6 +162,8 @@
* Ignore writes to microcode update register.
*/
break;
+ case MSR_TSC:
+ error = svm_set_tsc_offset(sc, vcpu, val - rdtsc());
case MSR_EXTFEATURES:
break;
default:
Index: sys/amd64/vmm/amd/vmcb.h
===================================================================
--- sys/amd64/vmm/amd/vmcb.h
+++ sys/amd64/vmm/amd/vmcb.h
@@ -209,6 +209,9 @@
#define VMCB_ACCESS_OFFSET(v) ((v) & 0xFFF)
#ifdef _KERNEL
+
+struct vm_snapshot_meta;
+
/* VMCB save state area segment format */
struct vmcb_segment {
uint16_t selector;
@@ -331,6 +334,12 @@
int vmcb_setdesc(void *arg, int vcpu, int ident, struct seg_desc *desc);
int vmcb_getdesc(void *arg, int vcpu, int ident, struct seg_desc *desc);
int vmcb_seg(struct vmcb *vmcb, int ident, struct vmcb_segment *seg);
+int vmcb_getany(struct svm_softc *sc, int vcpu, int ident, uint64_t *val);
+int vmcb_setany(struct svm_softc *sc, int vcpu, int ident, uint64_t val);
+int vmcb_snapshot_desc(void *arg, int vcpu, int reg,
+ struct vm_snapshot_meta *meta);
+int vmcb_snapshot_any(struct svm_softc *sc, int vcpu, int ident,
+ struct vm_snapshot_meta *meta);
#endif /* _KERNEL */
#endif /* _VMCB_H_ */
Index: sys/amd64/vmm/amd/vmcb.c
===================================================================
--- sys/amd64/vmm/amd/vmcb.c
+++ sys/amd64/vmm/amd/vmcb.c
@@ -35,6 +35,7 @@
#include <machine/segments.h>
#include <machine/specialreg.h>
#include <machine/vmm.h>
+#include <machine/vmm_snapshot.h>
#include "vmm_ktr.h"
@@ -452,3 +453,104 @@
return (0);
}
+
+int
+vmcb_getany(struct svm_softc *sc, int vcpu, int ident, uint64_t *val)
+{
+ int error = 0;
+
+ if (vcpu < 0 || vcpu >= VM_MAXCPU) {
+ error = EINVAL;
+ goto err;
+ }
+
+ if (ident >= VM_REG_LAST) {
+ error = EINVAL;
+ goto err;
+ }
+
+ error = vm_get_register(sc->vm, vcpu, ident, val);
+
+err:
+ return (error);
+}
+
+int
+vmcb_setany(struct svm_softc *sc, int vcpu, int ident, uint64_t val)
+{
+ int error = 0;
+
+ if (vcpu < 0 || vcpu >= VM_MAXCPU) {
+ error = EINVAL;
+ goto err;
+ }
+
+ if (ident >= VM_REG_LAST) {
+ error = EINVAL;
+ goto err;
+ }
+
+ error = vm_set_register(sc->vm, vcpu, ident, val);
+
+err:
+ return (error);
+}
+
+int
+vmcb_snapshot_desc(void *arg, int vcpu, int reg, struct vm_snapshot_meta *meta)
+{
+ int ret;
+ struct seg_desc desc;
+
+ if (meta->op == VM_SNAPSHOT_SAVE) {
+ ret = vmcb_getdesc(arg, vcpu, reg, &desc);
+ if (ret != 0)
+ goto done;
+
+ SNAPSHOT_VAR_OR_LEAVE(desc.base, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(desc.limit, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(desc.access, meta, ret, done);
+ } else if (meta->op == VM_SNAPSHOT_RESTORE) {
+ SNAPSHOT_VAR_OR_LEAVE(desc.base, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(desc.limit, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(desc.access, meta, ret, done);
+
+ ret = vmcb_setdesc(arg, vcpu, reg, &desc);
+ if (ret != 0)
+ goto done;
+ } else {
+ ret = EINVAL;
+ goto done;
+ }
+
+done:
+ return (ret);
+}
+
+int
+vmcb_snapshot_any(struct svm_softc *sc, int vcpu, int ident,
+ struct vm_snapshot_meta *meta)
+{
+ int ret;
+ uint64_t val;
+
+ if (meta->op == VM_SNAPSHOT_SAVE) {
+ ret = vmcb_getany(sc, vcpu, ident, &val);
+ if (ret != 0)
+ goto done;
+
+ SNAPSHOT_VAR_OR_LEAVE(val, meta, ret, done);
+ } else if (meta->op == VM_SNAPSHOT_RESTORE) {
+ SNAPSHOT_VAR_OR_LEAVE(val, meta, ret, done);
+
+ ret = vmcb_setany(sc, vcpu, ident, val);
+ if (ret != 0)
+ goto done;
+ } else {
+ ret = EINVAL;
+ goto done;
+ }
+
+done:
+ return (ret);
+}
Index: sys/amd64/vmm/intel/vmcs.h
===================================================================
--- sys/amd64/vmm/intel/vmcs.h
+++ sys/amd64/vmm/intel/vmcs.h
@@ -32,6 +32,9 @@
#define _VMCS_H_
#ifdef _KERNEL
+
+struct vm_snapshot_meta;
+
struct vmcs {
uint32_t identifier;
uint32_t abort_code;
@@ -55,6 +58,14 @@
struct seg_desc *desc);
int vmcs_setdesc(struct vmcs *vmcs, int running, int ident,
struct seg_desc *desc);
+int vmcs_getany(struct vmcs *vmcs, int running, int ident, uint64_t *val);
+int vmcs_setany(struct vmcs *vmcs, int running, int ident, uint64_t val);
+int vmcs_snapshot_reg(struct vmcs *vmcs, int running, int ident,
+ struct vm_snapshot_meta *meta);
+int vmcs_snapshot_desc(struct vmcs *vmcs, int running, int seg,
+ struct vm_snapshot_meta *meta);
+int vmcs_snapshot_any(struct vmcs *vmcs, int running, int ident,
+ struct vm_snapshot_meta *meta);
/*
* Avoid header pollution caused by inline use of 'vtophys()' in vmx_cpufunc.h
Index: sys/amd64/vmm/intel/vmcs.c
===================================================================
--- sys/amd64/vmm/intel/vmcs.c
+++ sys/amd64/vmm/intel/vmcs.c
@@ -43,6 +43,7 @@
#include <machine/segments.h>
#include <machine/vmm.h>
+#include <machine/vmm_snapshot.h>
#include "vmm_host.h"
#include "vmx_cpufunc.h"
#include "vmcs.h"
@@ -428,6 +429,126 @@
return (error);
}
+int
+vmcs_getany(struct vmcs *vmcs, int running, int ident, uint64_t *val)
+{
+ int error;
+
+ if (!running)
+ VMPTRLD(vmcs);
+
+ error = vmread(ident, val);
+
+ if (!running)
+ VMCLEAR(vmcs);
+
+ return (error);
+}
+
+int
+vmcs_setany(struct vmcs *vmcs, int running, int ident, uint64_t val)
+{
+ int error;
+
+ if (!running)
+ VMPTRLD(vmcs);
+
+ error = vmwrite(ident, val);
+
+ if (!running)
+ VMCLEAR(vmcs);
+
+ return (error);
+}
+
+int
+vmcs_snapshot_reg(struct vmcs *vmcs, int running, int ident,
+ struct vm_snapshot_meta *meta)
+{
+ int ret;
+ uint64_t val;
+
+ if (meta->op == VM_SNAPSHOT_SAVE) {
+ ret = vmcs_getreg(vmcs, running, ident, &val);
+ if (ret != 0)
+ goto done;
+
+ SNAPSHOT_VAR_OR_LEAVE(val, meta, ret, done);
+ } else if (meta->op == VM_SNAPSHOT_RESTORE) {
+ SNAPSHOT_VAR_OR_LEAVE(val, meta, ret, done);
+
+ ret = vmcs_setreg(vmcs, running, ident, val);
+ if (ret != 0)
+ goto done;
+ } else {
+ ret = EINVAL;
+ goto done;
+ }
+
+done:
+ return (ret);
+}
+
+int
+vmcs_snapshot_desc(struct vmcs *vmcs, int running, int seg,
+ struct vm_snapshot_meta *meta)
+{
+ int ret;
+ struct seg_desc desc;
+
+ if (meta->op == VM_SNAPSHOT_SAVE) {
+ ret = vmcs_getdesc(vmcs, running, seg, &desc);
+ if (ret != 0)
+ goto done;
+
+ SNAPSHOT_VAR_OR_LEAVE(desc.base, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(desc.limit, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(desc.access, meta, ret, done);
+ } else if (meta->op == VM_SNAPSHOT_RESTORE) {
+ SNAPSHOT_VAR_OR_LEAVE(desc.base, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(desc.limit, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(desc.access, meta, ret, done);
+
+ ret = vmcs_setdesc(vmcs, running, seg, &desc);
+ if (ret != 0)
+ goto done;
+ } else {
+ ret = EINVAL;
+ goto done;
+ }
+
+done:
+ return (ret);
+}
+
+int
+vmcs_snapshot_any(struct vmcs *vmcs, int running, int ident,
+ struct vm_snapshot_meta *meta)
+{
+ int ret;
+ uint64_t val;
+
+ if (meta->op == VM_SNAPSHOT_SAVE) {
+ ret = vmcs_getany(vmcs, running, ident, &val);
+ if (ret != 0)
+ goto done;
+
+ SNAPSHOT_VAR_OR_LEAVE(val, meta, ret, done);
+ } else if (meta->op == VM_SNAPSHOT_RESTORE) {
+ SNAPSHOT_VAR_OR_LEAVE(val, meta, ret, done);
+
+ ret = vmcs_setany(vmcs, running, ident, val);
+ if (ret != 0)
+ goto done;
+ } else {
+ ret = EINVAL;
+ goto done;
+ }
+
+done:
+ return (ret);
+}
+
#ifdef DDB
extern int vmxon_enabled[];
Index: sys/amd64/vmm/intel/vmx.c
===================================================================
--- sys/amd64/vmm/intel/vmx.c
+++ sys/amd64/vmm/intel/vmx.c
@@ -56,6 +56,8 @@
#include <machine/vmm.h>
#include <machine/vmm_dev.h>
#include <machine/vmm_instruction_emul.h>
+#include <machine/vmm_snapshot.h>
+
#include "vmm_lapic.h"
#include "vmm_host.h"
#include "vmm_ioport.h"
@@ -288,6 +290,7 @@
static int vmx_getreg(void *arg, int vcpu, int reg, uint64_t *retval);
static int vmxctx_setreg(struct vmxctx *vmxctx, int reg, uint64_t val);
static void vmx_inject_pir(struct vlapic *vlapic);
+static int vmx_restore_tsc(void *arg, int vcpu, uint64_t now);
#ifdef KTR
static const char *
@@ -1279,7 +1282,12 @@
}
error = vmwrite(VMCS_TSC_OFFSET, offset);
+ if (error != 0)
+ goto done;
+
+ error = vm_set_tsc_offset(vmx->vm, vcpu, offset);
+done:
return (error);
}
@@ -2823,6 +2831,10 @@
write_rflags(read_rflags() | vmxctx->host_tf);
}
+/*
+ * XXX
+ * Added old_vmcs and old_regs to vmx_run to test guest vcpu saving
+ */
static int
vmx_run(void *arg, int vcpu, register_t rip, pmap_t pmap,
struct vm_eventinfo *evinfo)
@@ -2863,8 +2875,12 @@
*/
vmcs_write(VMCS_HOST_CR3, rcr3());
+ /*
+ * XXX If we restore a VM we use the rip saved in the vmcs
+ */
vmcs_write(VMCS_GUEST_RIP, rip);
vmx_set_pcpu_defaults(vmx, vcpu, pmap);
+
do {
KASSERT(vmcs_guest_rip() == rip, ("%s: vmcs guest rip mismatch "
"%#lx/%#lx", __func__, vmcs_guest_rip(), rip));
@@ -3785,6 +3801,138 @@
free(vlapic, M_VLAPIC);
}
+static int
+vmx_snapshot_vmi(void *arg, struct vm_snapshot_meta *meta)
+{
+ struct vmx *vmx;
+ struct vmxctx *vmxctx;
+ struct pmap *new_pmap;
+ int i;
+ int ret;
+
+ vmx = arg;
+
+ KASSERT(vmx != NULL, ("%s: arg was NULL", __func__));
+
+ for (i = 0; i < VM_MAXCPU; i++) {
+ SNAPSHOT_BUF_OR_LEAVE(vmx->guest_msrs[i],
+ sizeof(vmx->guest_msrs[i]), meta, ret,
+ done);
+
+ vmxctx = &vmx->ctx[i];
+
+ new_pmap = vmxctx->pmap;
+ SNAPSHOT_BUF_OR_LEAVE(vmxctx, sizeof(*vmxctx), meta, ret, done);
+ vmxctx->pmap = new_pmap;
+ vmx->eptgen[i] = new_pmap->pm_eptgen - 1;
+ }
+
+done:
+ return (0);
+}
+
+static int
+vmx_snapshot_vmcx(void *arg, struct vm_snapshot_meta *meta, int vcpu)
+{
+ struct vmcs *vmcs;
+ struct vmx *vmx;
+ int err, run, hostcpu;
+
+ vmx = (struct vmx *)arg;
+ err = 0;
+
+ KASSERT(arg != NULL, ("%s: arg was NULL", __func__));
+ vmcs = &vmx->vmcs[vcpu];
+
+ run = vcpu_is_running(vmx->vm, vcpu, &hostcpu);
+ if (run && hostcpu != curcpu) {
+ printf("%s: %s%d is running", __func__, vm_name(vmx->vm), vcpu);
+ return (EINVAL);
+ }
+
+ err += vmcs_snapshot_reg(vmcs, run, VM_REG_GUEST_CR0, meta);
+ err += vmcs_snapshot_reg(vmcs, run, VM_REG_GUEST_CR3, meta);
+ err += vmcs_snapshot_reg(vmcs, run, VM_REG_GUEST_CR4, meta);
+ err += vmcs_snapshot_reg(vmcs, run, VM_REG_GUEST_DR7, meta);
+ err += vmcs_snapshot_reg(vmcs, run, VM_REG_GUEST_RSP, meta);
+ err += vmcs_snapshot_reg(vmcs, run, VM_REG_GUEST_RIP, meta);
+ err += vmcs_snapshot_reg(vmcs, run, VM_REG_GUEST_RFLAGS, meta);
+
+ /* Guest segments */
+ err += vmcs_snapshot_reg(vmcs, run, VM_REG_GUEST_ES, meta);
+ err += vmcs_snapshot_desc(vmcs, run, VM_REG_GUEST_ES, meta);
+
+ err += vmcs_snapshot_reg(vmcs, run, VM_REG_GUEST_CS, meta);
+ err += vmcs_snapshot_desc(vmcs, run, VM_REG_GUEST_CS, meta);
+
+ err += vmcs_snapshot_reg(vmcs, run, VM_REG_GUEST_SS, meta);
+ err += vmcs_snapshot_desc(vmcs, run, VM_REG_GUEST_SS, meta);
+
+ err += vmcs_snapshot_reg(vmcs, run, VM_REG_GUEST_DS, meta);
+ err += vmcs_snapshot_desc(vmcs, run, VM_REG_GUEST_DS, meta);
+
+ err += vmcs_snapshot_reg(vmcs, run, VM_REG_GUEST_FS, meta);
+ err += vmcs_snapshot_desc(vmcs, run, VM_REG_GUEST_FS, meta);
+
+ err += vmcs_snapshot_reg(vmcs, run, VM_REG_GUEST_GS, meta);
+ err += vmcs_snapshot_desc(vmcs, run, VM_REG_GUEST_GS, meta);
+
+ err += vmcs_snapshot_reg(vmcs, run, VM_REG_GUEST_TR, meta);
+ err += vmcs_snapshot_desc(vmcs, run, VM_REG_GUEST_TR, meta);
+
+ err += vmcs_snapshot_reg(vmcs, run, VM_REG_GUEST_LDTR, meta);
+ err += vmcs_snapshot_desc(vmcs, run, VM_REG_GUEST_LDTR, meta);
+
+ err += vmcs_snapshot_reg(vmcs, run, VM_REG_GUEST_EFER, meta);
+
+ err += vmcs_snapshot_desc(vmcs, run, VM_REG_GUEST_IDTR, meta);
+ err += vmcs_snapshot_desc(vmcs, run, VM_REG_GUEST_GDTR, meta);
+
+ /* Guest page tables */
+ err += vmcs_snapshot_reg(vmcs, run, VM_REG_GUEST_PDPTE0, meta);
+ err += vmcs_snapshot_reg(vmcs, run, VM_REG_GUEST_PDPTE1, meta);
+ err += vmcs_snapshot_reg(vmcs, run, VM_REG_GUEST_PDPTE2, meta);
+ err += vmcs_snapshot_reg(vmcs, run, VM_REG_GUEST_PDPTE3, meta);
+
+ /* Other guest state */
+ err += vmcs_snapshot_any(vmcs, run, VMCS_GUEST_IA32_SYSENTER_CS, meta);
+ err += vmcs_snapshot_any(vmcs, run, VMCS_GUEST_IA32_SYSENTER_ESP, meta);
+ err += vmcs_snapshot_any(vmcs, run, VMCS_GUEST_IA32_SYSENTER_EIP, meta);
+ err += vmcs_snapshot_any(vmcs, run, VMCS_GUEST_INTERRUPTIBILITY, meta);
+ err += vmcs_snapshot_any(vmcs, run, VMCS_GUEST_ACTIVITY, meta);
+ err += vmcs_snapshot_any(vmcs, run, VMCS_GUEST_IA32_EFER, meta);
+ err += vmcs_snapshot_any(vmcs, run, VMCS_ENTRY_CTLS, meta);
+ err += vmcs_snapshot_any(vmcs, run, VMCS_EXIT_CTLS, meta);
+
+ return (err);
+}
+
+static int
+vmx_restore_tsc(void *arg, int vcpu, uint64_t offset)
+{
+ struct vmcs *vmcs;
+ struct vmx *vmx = (struct vmx *)arg;
+ int error, running, hostcpu;
+
+ KASSERT(arg != NULL, ("%s: arg was NULL", __func__));
+ vmcs = &vmx->vmcs[vcpu];
+
+ running = vcpu_is_running(vmx->vm, vcpu, &hostcpu);
+ if (running && hostcpu != curcpu) {
+ printf("%s: %s%d is running", __func__, vm_name(vmx->vm), vcpu);
+ return (EINVAL);
+ }
+
+ if (!running)
+ VMPTRLD(vmcs);
+
+ error = vmx_set_tsc_offset(vmx, vcpu, offset);
+
+ if (!running)
+ VMCLEAR(vmcs);
+ return (error);
+}
+
struct vmm_ops vmm_ops_intel = {
vmx_init,
vmx_cleanup,
@@ -3802,4 +3950,7 @@
ept_vmspace_free,
vmx_vlapic_init,
vmx_vlapic_cleanup,
+ vmx_snapshot_vmi,
+ vmx_snapshot_vmcx,
+ vmx_restore_tsc,
};
Index: sys/amd64/vmm/io/vatpic.h
===================================================================
--- sys/amd64/vmm/io/vatpic.h
+++ sys/amd64/vmm/io/vatpic.h
@@ -36,6 +36,8 @@
#define IO_ELCR1 0x4d0
#define IO_ELCR2 0x4d1
+struct vm_snapshot_meta;
+
struct vatpic *vatpic_init(struct vm *vm);
void vatpic_cleanup(struct vatpic *vatpic);
@@ -54,4 +56,6 @@
void vatpic_pending_intr(struct vm *vm, int *vecptr);
void vatpic_intr_accepted(struct vm *vm, int vector);
+int vatpic_snapshot(struct vatpic *vatpic, struct vm_snapshot_meta *meta);
+
#endif /* _VATPIC_H_ */
Index: sys/amd64/vmm/io/vatpic.c
===================================================================
--- sys/amd64/vmm/io/vatpic.c
+++ sys/amd64/vmm/io/vatpic.c
@@ -42,6 +42,7 @@
#include <dev/ic/i8259.h>
#include <machine/vmm.h>
+#include <machine/vmm_snapshot.h>
#include "vmm_ktr.h"
#include "vmm_lapic.h"
@@ -808,3 +809,41 @@
{
free(vatpic, M_VATPIC);
}
+
+int
+vatpic_snapshot(struct vatpic *vatpic, struct vm_snapshot_meta *meta)
+{
+ int ret;
+ int i;
+ struct atpic *atpic;
+
+ for (i = 0; i < nitems(vatpic->atpic); i++) {
+ atpic = &vatpic->atpic[i];
+
+ SNAPSHOT_VAR_OR_LEAVE(atpic->ready, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(atpic->icw_num, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(atpic->rd_cmd_reg, meta, ret, done);
+
+ SNAPSHOT_VAR_OR_LEAVE(atpic->aeoi, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(atpic->poll, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(atpic->rotate, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(atpic->sfn, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(atpic->irq_base, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(atpic->request, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(atpic->service, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(atpic->mask, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(atpic->smm, meta, ret, done);
+
+ SNAPSHOT_BUF_OR_LEAVE(atpic->acnt, sizeof(atpic->acnt),
+ meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(atpic->lowprio, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(atpic->intr_raised, meta, ret, done);
+
+ }
+
+ SNAPSHOT_BUF_OR_LEAVE(vatpic->elc, sizeof(vatpic->elc),
+ meta, ret, done);
+
+done:
+ return (ret);
+}
Index: sys/amd64/vmm/io/vatpit.h
===================================================================
--- sys/amd64/vmm/io/vatpit.h
+++ sys/amd64/vmm/io/vatpit.h
@@ -36,6 +36,8 @@
#define NMISC_PORT 0x61
+struct vm_snapshot_meta;
+
struct vatpit *vatpit_init(struct vm *vm);
void vatpit_cleanup(struct vatpit *vatpit);
@@ -43,5 +45,6 @@
uint32_t *eax);
int vatpit_nmisc_handler(struct vm *vm, int vcpuid, bool in, int port,
int bytes, uint32_t *eax);
+int vatpit_snapshot(struct vatpit *vatpit, struct vm_snapshot_meta *meta);
#endif /* _VATPIT_H_ */
Index: sys/amd64/vmm/io/vatpit.c
===================================================================
--- sys/amd64/vmm/io/vatpit.c
+++ sys/amd64/vmm/io/vatpit.c
@@ -38,6 +38,7 @@
#include <sys/systm.h>
#include <machine/vmm.h>
+#include <machine/vmm_snapshot.h>
#include "vmm_ktr.h"
#include "vatpic.h"
@@ -455,3 +456,36 @@
free(vatpit, M_VATPIT);
}
+
+int
+vatpit_snapshot(struct vatpit *vatpit, struct vm_snapshot_meta *meta)
+{
+ int ret;
+ int i;
+ struct channel *channel;
+
+ SNAPSHOT_VAR_OR_LEAVE(vatpit->freq_sbt, meta, ret, done);
+
+ /* properly restore timers; they will NOT work currently */
+ printf("%s: snapshot restore does not reset timers!\r\n", __func__);
+
+ for (i = 0; i < nitems(vatpit->channel); i++) {
+ channel = &vatpit->channel[i];
+
+ SNAPSHOT_VAR_OR_LEAVE(channel->mode, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(channel->initial, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(channel->now_sbt, meta, ret, done);
+ SNAPSHOT_BUF_OR_LEAVE(channel->cr, sizeof(channel->cr),
+ meta, ret, done);
+ SNAPSHOT_BUF_OR_LEAVE(channel->ol, sizeof(channel->ol),
+ meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(channel->slatched, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(channel->status, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(channel->crbyte, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(channel->frbyte, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(channel->callout_sbt, meta, ret, done);
+ }
+
+done:
+ return (ret);
+}
Index: sys/amd64/vmm/io/vhpet.h
===================================================================
--- sys/amd64/vmm/io/vhpet.h
+++ sys/amd64/vmm/io/vhpet.h
@@ -35,6 +35,8 @@
#define VHPET_BASE 0xfed00000
#define VHPET_SIZE 1024
+struct vm_snapshot_meta;
+
struct vhpet *vhpet_init(struct vm *vm);
void vhpet_cleanup(struct vhpet *vhpet);
int vhpet_mmio_write(void *vm, int vcpuid, uint64_t gpa, uint64_t val,
@@ -42,5 +44,7 @@
int vhpet_mmio_read(void *vm, int vcpuid, uint64_t gpa, uint64_t *val,
int size, void *arg);
int vhpet_getcap(struct vm_hpet_cap *cap);
+int vhpet_snapshot(struct vhpet *vhpet, struct vm_snapshot_meta *meta);
+int vhpet_restore_time(struct vhpet *vhpet);
#endif /* _VHPET_H_ */
Index: sys/amd64/vmm/io/vhpet.c
===================================================================
--- sys/amd64/vmm/io/vhpet.c
+++ sys/amd64/vmm/io/vhpet.c
@@ -43,6 +43,7 @@
#include <machine/vmm.h>
#include <machine/vmm_dev.h>
+#include <machine/vmm_snapshot.h>
#include "vmm_lapic.h"
#include "vatpic.h"
@@ -761,3 +762,47 @@
cap->capabilities = vhpet_capabilities();
return (0);
}
+
+int
+vhpet_snapshot(struct vhpet *vhpet, struct vm_snapshot_meta *meta)
+{
+ int i, ret;
+ uint32_t countbase;
+
+ SNAPSHOT_VAR_OR_LEAVE(vhpet->freq_sbt, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vhpet->config, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vhpet->isr, meta, ret, done);
+
+ /* at restore time the countbase should have the value it had when the
+ * snapshot was created; since the value is not directly kept in
+ * vhpet->countbase, but rather computed relative to the current system
+ * uptime using countbase_sbt, save the value retured by vhpet_counter
+ */
+ if (meta->op == VM_SNAPSHOT_SAVE)
+ countbase = vhpet_counter(vhpet, NULL);
+ SNAPSHOT_VAR_OR_LEAVE(countbase, meta, ret, done);
+ if (meta->op == VM_SNAPSHOT_RESTORE)
+ vhpet->countbase = countbase;
+
+ for (i = 0; i < nitems(vhpet->timer); i++) {
+ SNAPSHOT_VAR_OR_LEAVE(vhpet->timer[i].cap_config,
+ meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vhpet->timer[i].msireg, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vhpet->timer[i].compval, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vhpet->timer[i].comprate, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vhpet->timer[i].callout_sbt,
+ meta, ret, done);
+ }
+
+done:
+ return (ret);
+}
+
+int
+vhpet_restore_time(struct vhpet *vhpet)
+{
+ if (vhpet_counter_enabled(vhpet))
+ vhpet_start_counting(vhpet);
+
+ return (0);
+}
Index: sys/amd64/vmm/io/vioapic.h
===================================================================
--- sys/amd64/vmm/io/vioapic.h
+++ sys/amd64/vmm/io/vioapic.h
@@ -32,6 +32,8 @@
#ifndef _VIOAPIC_H_
#define _VIOAPIC_H_
+struct vm_snapshot_meta;
+
#define VIOAPIC_BASE 0xFEC00000
#define VIOAPIC_SIZE 4096
@@ -49,4 +51,6 @@
int vioapic_pincount(struct vm *vm);
void vioapic_process_eoi(struct vm *vm, int vcpuid, int vector);
+int vioapic_snapshot(struct vioapic *vioapic,
+ struct vm_snapshot_meta *meta);
#endif
Index: sys/amd64/vmm/io/vioapic.c
===================================================================
--- sys/amd64/vmm/io/vioapic.c
+++ sys/amd64/vmm/io/vioapic.c
@@ -42,6 +42,7 @@
#include <x86/apicreg.h>
#include <machine/vmm.h>
+#include <machine/vmm_snapshot.h>
#include "vmm_ktr.h"
#include "vmm_lapic.h"
@@ -499,3 +500,20 @@
return (REDIR_ENTRIES);
}
+
+int
+vioapic_snapshot(struct vioapic *vioapic, struct vm_snapshot_meta *meta)
+{
+ int ret;
+ int i;
+
+ SNAPSHOT_VAR_OR_LEAVE(vioapic->ioregsel, meta, ret, done);
+
+ for (i = 0; i < nitems(vioapic->rtbl); i++) {
+ SNAPSHOT_VAR_OR_LEAVE(vioapic->rtbl[i].reg, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vioapic->rtbl[i].acnt, meta, ret, done);
+ }
+
+done:
+ return (ret);
+}
Index: sys/amd64/vmm/io/vlapic.h
===================================================================
--- sys/amd64/vmm/io/vlapic.h
+++ sys/amd64/vmm/io/vlapic.h
@@ -32,6 +32,7 @@
#define _VLAPIC_H_
struct vm;
+struct vm_snapshot_meta;
enum x2apic_state;
int vlapic_write(struct vlapic *vlapic, int mmio_access, uint64_t offset,
@@ -108,4 +109,9 @@
void vlapic_dcr_write_handler(struct vlapic *vlapic);
void vlapic_lvt_write_handler(struct vlapic *vlapic, uint32_t offset);
void vlapic_self_ipi_handler(struct vlapic *vlapic, uint64_t val);
+
+struct LAPIC *vlapic_get_LAPIC(struct vlapic *vlapic);
+int vlapic_snapshot(struct vm *vm, struct vm_snapshot_meta *meta);
+int vlapic_lapic_snapshot(struct vm *vm, struct vm_snapshot_meta *meta);
+
#endif /* _VLAPIC_H_ */
Index: sys/amd64/vmm/io/vlapic.c
===================================================================
--- sys/amd64/vmm/io/vlapic.c
+++ sys/amd64/vmm/io/vlapic.c
@@ -46,6 +46,7 @@
#include <machine/smp.h>
#include <machine/vmm.h>
+#include <machine/vmm_snapshot.h>
#include "vmm_lapic.h"
#include "vmm_ktr.h"
@@ -1654,3 +1655,110 @@
VLAPIC_CTR1(vlapic, "vector %d set to level-triggered", vector);
vlapic_set_tmr(vlapic, vector, true);
}
+
+struct LAPIC*
+vlapic_get_LAPIC(struct vlapic *vlapic)
+{
+ return vlapic->apic_page;
+}
+
+static void
+vlapic_reset_callout(struct vlapic *vlapic, uint32_t ccr)
+{
+ /* The implementation is similar to the one in the
+ * `vlapic_icrtmr_write_handler` function
+ */
+ sbintime_t sbt;
+ struct bintime bt;
+
+ VLAPIC_TIMER_LOCK(vlapic);
+
+ bt = vlapic->timer_freq_bt;
+ bintime_mul(&bt, ccr);
+
+ if (ccr != 0) {
+ binuptime(&vlapic->timer_fire_bt);
+ bintime_add(&vlapic->timer_fire_bt, &bt);
+
+ sbt = bttosbt(bt);
+ callout_reset_sbt(&vlapic->callout, sbt, 0,
+ vlapic_callout_handler, vlapic, 0);
+ } else {
+ /* even if the CCR was 0, periodic timers should be reset */
+ if (vlapic_periodic_timer(vlapic)) {
+ binuptime(&vlapic->timer_fire_bt);
+ bintime_add(&vlapic->timer_fire_bt,
+ &vlapic->timer_period_bt);
+ sbt = bttosbt(vlapic->timer_period_bt);
+
+ callout_stop(&vlapic->callout);
+ callout_reset_sbt(&vlapic->callout, sbt, 0,
+ vlapic_callout_handler, vlapic, 0);
+ }
+ }
+
+ VLAPIC_TIMER_UNLOCK(vlapic);
+}
+
+int
+vlapic_snapshot(struct vm *vm, struct vm_snapshot_meta *meta)
+{
+ int i, ret;
+ struct vlapic *vlapic;
+ struct LAPIC *lapic;
+ uint32_t ccr;
+
+ KASSERT(vm != NULL, ("%s: arg was NULL", __func__));
+
+ ret = 0;
+
+ for (i = 0; i < VM_MAXCPU; i++) {
+ vlapic = vm_lapic(vm, i);
+
+ /* snapshot the page first; timer period depends on icr_timer */
+ lapic = vlapic->apic_page;
+ SNAPSHOT_BUF_OR_LEAVE(lapic, PAGE_SIZE, meta, ret, done);
+
+ SNAPSHOT_VAR_OR_LEAVE(vlapic->esr_pending, meta, ret, done);
+
+ SNAPSHOT_VAR_OR_LEAVE(vlapic->timer_freq_bt.sec,
+ meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vlapic->timer_freq_bt.frac,
+ meta, ret, done);
+
+ /*
+ * Timer period is equal to 'icr_timer' ticks at a frequency of
+ * 'timer_freq_bt'.
+ */
+ if (meta->op == VM_SNAPSHOT_RESTORE) {
+ vlapic->timer_period_bt = vlapic->timer_freq_bt;
+ bintime_mul(&vlapic->timer_period_bt, lapic->icr_timer);
+ }
+
+ SNAPSHOT_BUF_OR_LEAVE(vlapic->isrvec_stk,
+ sizeof(vlapic->isrvec_stk),
+ meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vlapic->isrvec_stk_top, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vlapic->boot_state, meta, ret, done);
+
+ SNAPSHOT_BUF_OR_LEAVE(vlapic->lvt_last,
+ sizeof(vlapic->lvt_last),
+ meta, ret, done);
+
+ if (meta->op == VM_SNAPSHOT_SAVE)
+ ccr = vlapic_get_ccr(vlapic);
+
+ SNAPSHOT_VAR_OR_LEAVE(ccr, meta, ret, done);
+
+ if (meta->op == VM_SNAPSHOT_RESTORE) {
+ /* Reset the value of the 'timer_fire_bt' and the vlapic
+ * callout based on the value of the current count
+ * register saved when the VM snapshot was created
+ */
+ vlapic_reset_callout(vlapic, ccr);
+ }
+ }
+
+done:
+ return (ret);
+}
Index: sys/amd64/vmm/io/vpmtmr.h
===================================================================
--- sys/amd64/vmm/io/vpmtmr.h
+++ sys/amd64/vmm/io/vpmtmr.h
@@ -34,6 +34,7 @@
#define IO_PMTMR 0x408
struct vpmtmr;
+struct vm_snapshot_meta;
struct vpmtmr *vpmtmr_init(struct vm *vm);
void vpmtmr_cleanup(struct vpmtmr *pmtmr);
@@ -41,4 +42,6 @@
int vpmtmr_handler(struct vm *vm, int vcpuid, bool in, int port, int bytes,
uint32_t *val);
+int vpmtmr_snapshot(struct vpmtmr *vpmtmr, struct vm_snapshot_meta *meta);
+
#endif
Index: sys/amd64/vmm/io/vpmtmr.c
===================================================================
--- sys/amd64/vmm/io/vpmtmr.c
+++ sys/amd64/vmm/io/vpmtmr.c
@@ -36,6 +36,7 @@
#include <sys/systm.h>
#include <machine/vmm.h>
+#include <machine/vmm_snapshot.h>
#include "vpmtmr.h"
@@ -103,3 +104,14 @@
return (0);
}
+
+int
+vpmtmr_snapshot(struct vpmtmr *vpmtmr, struct vm_snapshot_meta *meta)
+{
+ int ret;
+
+ SNAPSHOT_VAR_OR_LEAVE(vpmtmr->baseval, meta, ret, done);
+
+done:
+ return (ret);
+}
Index: sys/amd64/vmm/io/vrtc.h
===================================================================
--- sys/amd64/vmm/io/vrtc.h
+++ sys/amd64/vmm/io/vrtc.h
@@ -34,6 +34,7 @@
#include <isa/isareg.h>
struct vrtc;
+struct vm_snapshot_meta;
struct vrtc *vrtc_init(struct vm *vm);
void vrtc_cleanup(struct vrtc *vrtc);
@@ -49,4 +50,6 @@
int vrtc_data_handler(struct vm *vm, int vcpuid, bool in, int port, int bytes,
uint32_t *val);
+int vrtc_snapshot(struct vrtc *vrtc, struct vm_snapshot_meta *meta);
+
#endif
Index: sys/amd64/vmm/io/vrtc.c
===================================================================
--- sys/amd64/vmm/io/vrtc.c
+++ sys/amd64/vmm/io/vrtc.c
@@ -40,6 +40,7 @@
#include <sys/sysctl.h>
#include <machine/vmm.h>
+#include <machine/vmm_snapshot.h>
#include <isa/rtc.h>
@@ -1019,3 +1020,43 @@
callout_drain(&vrtc->callout);
free(vrtc, M_VRTC);
}
+
+int
+vrtc_snapshot(struct vrtc *vrtc, struct vm_snapshot_meta *meta)
+{
+ int ret;
+
+ VRTC_LOCK(vrtc);
+
+ SNAPSHOT_VAR_OR_LEAVE(vrtc->addr, meta, ret, done);
+ if (meta->op == VM_SNAPSHOT_RESTORE)
+ vrtc->base_uptime = sbinuptime();
+ SNAPSHOT_VAR_OR_LEAVE(vrtc->base_rtctime, meta, ret, done);
+
+ SNAPSHOT_VAR_OR_LEAVE(vrtc->rtcdev.sec, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vrtc->rtcdev.alarm_sec, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vrtc->rtcdev.min, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vrtc->rtcdev.alarm_min, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vrtc->rtcdev.hour, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vrtc->rtcdev.alarm_hour, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vrtc->rtcdev.day_of_week, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vrtc->rtcdev.day_of_month, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vrtc->rtcdev.month, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vrtc->rtcdev.year, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vrtc->rtcdev.reg_a, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vrtc->rtcdev.reg_b, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vrtc->rtcdev.reg_c, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vrtc->rtcdev.reg_d, meta, ret, done);
+ SNAPSHOT_BUF_OR_LEAVE(vrtc->rtcdev.nvram, sizeof(vrtc->rtcdev.nvram),
+ meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vrtc->rtcdev.century, meta, ret, done);
+ SNAPSHOT_BUF_OR_LEAVE(vrtc->rtcdev.nvram2, sizeof(vrtc->rtcdev.nvram2),
+ meta, ret, done);
+
+ vrtc_callout_reset(vrtc, vrtc_freq(vrtc));
+
+ VRTC_UNLOCK(vrtc);
+
+done:
+ return (ret);
+}
Index: sys/amd64/vmm/vmm.c
===================================================================
--- sys/amd64/vmm/vmm.c
+++ sys/amd64/vmm/vmm.c
@@ -44,7 +44,7 @@
#include <sys/rwlock.h>
#include <sys/sched.h>
#include <sys/smp.h>
-#include <sys/systm.h>
+#include <sys/vnode.h>
#include <vm/vm.h>
#include <vm/vm_object.h>
@@ -53,6 +53,11 @@
#include <vm/vm_map.h>
#include <vm/vm_extern.h>
#include <vm/vm_param.h>
+#include <vm/vm_pager.h>
+#include <vm/vm_kern.h>
+#include <vm/vnode_pager.h>
+#include <vm/swap_pager.h>
+#include <vm/uma.h>
#include <machine/cpu.h>
#include <machine/pcb.h>
@@ -64,6 +69,7 @@
#include <machine/vmm.h>
#include <machine/vmm_dev.h>
#include <machine/vmm_instruction_emul.h>
+#include <machine/vmm_snapshot.h>
#include "vmm_ioport.h"
#include "vmm_ktr.h"
@@ -111,6 +117,7 @@
void *stats; /* (a,i) statistics */
struct vm_exit exitinfo; /* (x) exit reason and collateral */
uint64_t nextrip; /* (x) next instruction to execute */
+ uint64_t tsc_offset; /* (o) TSC offsetting */
};
#define vcpu_lock_initialized(v) mtx_initialized(&((v)->mtx))
@@ -119,13 +126,6 @@
#define vcpu_unlock(v) mtx_unlock_spin(&((v)->mtx))
#define vcpu_assert_locked(v) mtx_assert(&((v)->mtx), MA_OWNED)
-struct mem_seg {
- size_t len;
- bool sysmem;
- struct vm_object *object;
-};
-#define VM_MAX_MEMSEGS 3
-
struct mem_map {
vm_paddr_t gpa;
size_t len;
@@ -181,6 +181,9 @@
#define VMM_RESUME() (ops != NULL ? (*ops->resume)() : 0)
#define VMINIT(vm, pmap) (ops != NULL ? (*ops->vminit)(vm, pmap): NULL)
+/*
+ * XXX: Updated VMRUN to test vcpu restoring
+ */
#define VMRUN(vmi, vcpu, rip, pmap, evinfo) \
(ops != NULL ? (*ops->vmrun)(vmi, vcpu, rip, pmap, evinfo) : ENXIO)
#define VMCLEANUP(vmi) (ops != NULL ? (*ops->vmcleanup)(vmi) : NULL)
@@ -204,6 +207,12 @@
(ops != NULL ? (*ops->vlapic_init)(vmi, vcpu) : NULL)
#define VLAPIC_CLEANUP(vmi, vlapic) \
(ops != NULL ? (*ops->vlapic_cleanup)(vmi, vlapic) : NULL)
+#define VM_SNAPSHOT_VMI(vmi, meta) \
+ (ops != NULL ? (*ops->vmsnapshot)(vmi, meta) : ENXIO)
+#define VM_SNAPSHOT_VMCX(vmi, meta, vcpuid) \
+ (ops != NULL ? (*ops->vmcx_snapshot)(vmi, meta, vcpuid) : ENXIO)
+#define VM_RESTORE_TSC(vmi, vcpuid, offset) \
+ (ops != NULL ? (*ops->vm_restore_tsc)(vmi, vcpuid, offset) : ENXIO)
#define fpu_start_emulating() load_cr0(rcr0() | CR0_TS)
#define fpu_stop_emulating() clts()
@@ -211,6 +220,7 @@
SDT_PROVIDER_DEFINE(vmm);
static MALLOC_DEFINE(M_VM, "vm", "vm");
+static MALLOC_DEFINE(M_RESTORE, "restore", "restore");
/* statistics */
static VMM_STAT(VCPU_TOTAL_RUNTIME, "vcpu total runtime");
@@ -289,6 +299,7 @@
vcpu->hostcpu = NOCPU;
vcpu->guestfpu = fpu_save_area_alloc();
vcpu->stats = vmm_stat_alloc();
+ vcpu->tsc_offset = 0;
}
vcpu->vlapic = VLAPIC_INIT(vm->cookie, vcpu_id);
@@ -795,6 +806,11 @@
}
}
+struct mem_seg * vm_get_memsegs(struct vm *vm)
+{
+ return (vm->mem_segs);
+}
+
static void
vm_free_memmap(struct vm *vm, int ident)
{
@@ -1701,6 +1717,8 @@
vmm_stat_incr(vm, vcpuid, VCPU_TOTAL_RUNTIME, rdtsc() - tscval);
+ if (curthread->td_critnest != 1)
+ return (EINVAL);
critical_exit();
if (error == 0) {
@@ -2709,3 +2727,217 @@
VMM_STAT_FUNC(VMM_MEM_RESIDENT, "Resident memory", vm_get_rescnt);
VMM_STAT_FUNC(VMM_MEM_WIRED, "Wired memory", vm_get_wiredcnt);
+
+static int
+vm_snapshot_vcpus(struct vm *vm, struct vm_snapshot_meta *meta)
+{
+ int ret;
+ int i;
+ struct vcpu *vcpu;
+
+ for (i = 0; i < VM_MAXCPU; i++) {
+ vcpu = &vm->vcpu[i];
+
+ SNAPSHOT_VAR_OR_LEAVE(vcpu->x2apic_state, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vcpu->exitintinfo, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vcpu->exc_vector, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vcpu->exc_errcode_valid, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vcpu->exc_errcode, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vcpu->guest_xcr0, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vcpu->exitinfo, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vcpu->nextrip, meta, ret, done);
+ /* XXX we're cheating here, since the value of tsc_offset as
+ * saved here is actually the value of the guest's TSC value.
+ *
+ * It will be turned turned back into an actual offset when the
+ * TSC restore function is called
+ */
+ SNAPSHOT_VAR_OR_LEAVE(vcpu->tsc_offset, meta, ret, done);
+ }
+
+done:
+ return (ret);
+}
+
+static int
+vm_snapshot_vm(struct vm *vm, struct vm_snapshot_meta *meta)
+{
+ int ret;
+ int i;
+ uint64_t now;
+
+ ret = 0;
+ now = rdtsc();
+
+ if (meta->op == VM_SNAPSHOT_SAVE) {
+ /* XXX make tsc_offset take the value TSC proper as seen by the
+ * guest
+ */
+ for (i = 0; i < VM_MAXCPU; i++)
+ vm->vcpu[i].tsc_offset += now;
+ }
+
+ ret = vm_snapshot_vcpus(vm, meta);
+ if (ret != 0) {
+ printf("%s: failed to copy vm data to user buffer", __func__);
+ goto done;
+ }
+
+ if (meta->op == VM_SNAPSHOT_SAVE) {
+ /* XXX turn tsc_offset back into an offset; actual value is only
+ * required for restore; using it otherwise would be wrong
+ */
+ for (i = 0; i < VM_MAXCPU; i++)
+ vm->vcpu[i].tsc_offset -= now;
+ }
+
+done:
+ return (ret);
+}
+
+static int
+vm_snapshot_vlapic(struct vm *vm, struct vm_snapshot_meta *meta)
+{
+ return vlapic_snapshot(vm, meta);
+}
+
+static int
+vm_snapshot_vioapic(struct vm *vm, struct vm_snapshot_meta *meta)
+{
+ return vioapic_snapshot(vm_ioapic(vm), meta);
+}
+
+static int
+vm_snapshot_vhpet(struct vm *vm, struct vm_snapshot_meta *meta)
+{
+ return vhpet_snapshot(vm_hpet(vm), meta);
+}
+
+static int
+vm_snapshot_vatpic(struct vm *vm, struct vm_snapshot_meta *meta)
+{
+ return vatpic_snapshot(vm_atpic(vm), meta);
+}
+
+static int
+vm_snapshot_vatpit(struct vm *vm, struct vm_snapshot_meta *meta)
+{
+ return vatpit_snapshot(vm_atpit(vm), meta);
+}
+
+static int
+vm_snapshot_vpmtmr(struct vm *vm, struct vm_snapshot_meta *meta)
+{
+ return vpmtmr_snapshot(vm_pmtmr(vm), meta);
+}
+
+static int
+vm_snapshot_vrtc(struct vm *vm, struct vm_snapshot_meta *meta)
+{
+ return vrtc_snapshot(vm_rtc(vm), meta);
+}
+
+static int
+vm_snapshot_vmcx(struct vm *vm, struct vm_snapshot_meta *meta)
+{
+ int i, error;
+
+ error = 0;
+
+ for (i = 0; i < VM_MAXCPU; i++) {
+ error = VM_SNAPSHOT_VMCX(vm->cookie, meta, i);
+ if (error != 0) {
+ printf("%s: failed to snapshot vmcs/vmcb data for "
+ "vCPU: %d; error: %d\n", __func__, i, error);
+ goto done;
+ }
+ }
+
+done:
+ return (error);
+}
+
+/*
+ * Save kernel-side structures to user-space for snapshotting.
+ */
+int
+vm_snapshot_req(struct vm *vm, struct vm_snapshot_meta *meta)
+{
+ int ret = 0;
+
+ switch (meta->dev_req) {
+ case STRUCT_VMX:
+ ret = VM_SNAPSHOT_VMI(vm->cookie, meta);
+ break;
+ case STRUCT_VMCX:
+ ret = vm_snapshot_vmcx(vm, meta);
+ break;
+ case STRUCT_VM:
+ ret = vm_snapshot_vm(vm, meta);
+ break;
+ case STRUCT_VIOAPIC:
+ ret = vm_snapshot_vioapic(vm, meta);
+ break;
+ case STRUCT_VLAPIC:
+ ret = vm_snapshot_vlapic(vm, meta);
+ break;
+ case STRUCT_VHPET:
+ ret = vm_snapshot_vhpet(vm, meta);
+ break;
+ case STRUCT_VATPIC:
+ ret = vm_snapshot_vatpic(vm, meta);
+ break;
+ case STRUCT_VATPIT:
+ ret = vm_snapshot_vatpit(vm, meta);
+ break;
+ case STRUCT_VPMTMR:
+ ret = vm_snapshot_vpmtmr(vm, meta);
+ break;
+ case STRUCT_VRTC:
+ ret = vm_snapshot_vrtc(vm, meta);
+ break;
+ default:
+ printf("%s: failed to find the requested type %#x\n",
+ __func__, meta->dev_req);
+ ret = (EINVAL);
+ }
+ return (ret);
+}
+
+int
+vm_set_tsc_offset(struct vm *vm, int vcpuid, uint64_t offset)
+{
+ struct vcpu *vcpu;
+
+ if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
+ return (EINVAL);
+
+ vcpu = &vm->vcpu[vcpuid];
+ vcpu->tsc_offset = offset;
+
+ return (0);
+}
+
+int
+vm_restore_time(struct vm *vm)
+{
+ int error, i;
+ uint64_t now;
+ struct vcpu *vcpu;
+
+ now = rdtsc();
+
+ error = vhpet_restore_time(vm_hpet(vm));
+ if (error)
+ return (error);
+
+ for (i = 0; i < nitems(vm->vcpu); i++) {
+ vcpu = &vm->vcpu[i];
+
+ error = VM_RESTORE_TSC(vm->cookie, i, vcpu->tsc_offset - now);
+ if (error)
+ return (error);
+ }
+
+ return (0);
+}
Index: sys/amd64/vmm/vmm_dev.c
===================================================================
--- sys/amd64/vmm/vmm_dev.c
+++ sys/amd64/vmm/vmm_dev.c
@@ -53,8 +53,9 @@
#include <machine/vmparam.h>
#include <machine/vmm.h>
-#include <machine/vmm_instruction_emul.h>
#include <machine/vmm_dev.h>
+#include <machine/vmm_instruction_emul.h>
+#include <machine/vmm_snapshot.h>
#include "vmm_lapic.h"
#include "vmm_stat.h"
@@ -369,6 +370,7 @@
struct vm_cpu_topology *topology;
uint64_t *regvals;
int *regnums;
+ struct vm_snapshot_req *snapshot_req;
error = vmm_priv_check(curthread->td_ucred);
if (error)
@@ -460,6 +462,13 @@
case VM_REINIT:
error = vm_reinit(sc->vm);
break;
+ case VM_VCPU_LOCK_ALL:
+ error = vcpu_lock_all(sc);
+ break;
+ case VM_VCPU_UNLOCK_ALL:
+ vcpu_unlock_all(sc);
+ error = 0;
+ break;
case VM_STAT_DESC: {
statdesc = (struct vm_stat_desc *)data;
error = vmm_stat_desc_copy(statdesc->index,
@@ -771,6 +780,12 @@
vm_get_topology(sc->vm, &topology->sockets, &topology->cores,
&topology->threads, &topology->maxcpus);
error = 0;
+ case VM_SNAPSHOT_REQ:
+ snapshot_req = (struct vm_snapshot_req *)data;
+ error = vm_snapshot_req(sc->vm, &snapshot_req->meta);
+ break;
+ case VM_RESTORE_TIME:
+ error = vm_restore_time(sc->vm);
break;
default:
error = ENOTTY;
@@ -1000,6 +1015,7 @@
error = make_dev_p(MAKEDEV_CHECKNAME, &cdev, &vmmdevsw, NULL,
UID_ROOT, GID_WHEEL, 0600, "vmm/%s", buf);
+ /* TODO: shouldn't be done inside lock? */
if (error != 0) {
vmmdev_destroy(sc);
return (error);
Index: sys/amd64/vmm/vmm_snapshot.c
===================================================================
--- /dev/null
+++ sys/amd64/vmm/vmm_snapshot.c
@@ -0,0 +1,110 @@
+#include <sys/types.h>
+#include <sys/systm.h>
+
+#include <machine/vmm_snapshot.h>
+
+void
+vm_snapshot_buf_err(const char *bufname, const enum vm_snapshot_op op)
+{
+ const char *__op;
+
+ if (op == VM_SNAPSHOT_SAVE)
+ __op = "save";
+ else if (op == VM_SNAPSHOT_RESTORE)
+ __op = "restore";
+ else
+ __op = "unknown";
+
+ printf("%s: snapshot-%s failed for %s\r\n", __func__, __op, bufname);
+}
+
+int
+vm_snapshot_buf(volatile void *data, size_t data_size,
+ struct vm_snapshot_meta *meta)
+{
+ struct vm_snapshot_buffer *buffer;
+ int op;
+ void *_data = *(void **)(void *)&data;
+
+ buffer = &meta->buffer;
+ op = meta->op;
+
+ if (buffer->buf_rem < data_size) {
+ printf("%s: buffer too small\r\n", __func__);
+ return (E2BIG);
+ }
+
+ if (op == VM_SNAPSHOT_SAVE)
+ copyout(_data, buffer->buf, data_size);
+ else if (op == VM_SNAPSHOT_RESTORE)
+ copyin(buffer->buf, _data, data_size);
+ else
+ return (EINVAL);
+
+ buffer->buf += data_size;
+ buffer->buf_rem -= data_size;
+
+ return (0);
+}
+
+size_t
+vm_get_snapshot_size(struct vm_snapshot_meta *meta)
+{
+ size_t length;
+ struct vm_snapshot_buffer *buffer;
+
+ buffer = &meta->buffer;
+
+ if (buffer->buf_size < buffer->buf_rem) {
+ printf("%s: Invalid buffer: size = %zu, rem = %zu\r\n",
+ __func__, buffer->buf_size, buffer->buf_rem);
+ length = 0;
+ } else {
+ length = buffer->buf_size - buffer->buf_rem;
+ }
+
+ return (length);
+}
+
+int
+vm_snapshot_guest2host_addr(void **addrp, size_t len, int restore_null,
+ struct vm_snapshot_meta *meta)
+{
+ /* The kernel devices/structures should not map guest memory */
+ return (0);
+}
+
+int
+vm_snapshot_buf_cmp(volatile void *data, size_t data_size,
+ struct vm_snapshot_meta *meta)
+{
+ struct vm_snapshot_buffer *buffer;
+ int op;
+ int ret;
+ void *_data = *(void **)(void *)&data;
+
+ buffer = &meta->buffer;
+ op = meta->op;
+
+ if (buffer->buf_rem < data_size) {
+ printf("%s: buffer too small\r\n", __func__);
+ ret = E2BIG;
+ goto done;
+ }
+
+ if (op == VM_SNAPSHOT_SAVE) {
+ ret = 0;
+ copyout(_data, buffer->buf, data_size);
+ } else if (op == VM_SNAPSHOT_RESTORE) {
+ ret = memcmp(_data, buffer->buf, data_size);
+ } else {
+ ret = EINVAL;
+ goto done;
+ }
+
+ buffer->buf += data_size;
+ buffer->buf_rem -= data_size;
+
+done:
+ return (ret);
+}
Index: sys/modules/vmm/Makefile
===================================================================
--- sys/modules/vmm/Makefile
+++ sys/modules/vmm/Makefile
@@ -18,6 +18,7 @@
vmm_dev.c \
vmm_host.c \
vmm_instruction_emul.c \
+ vmm_snapshot.c \
vmm_ioport.c \
vmm_lapic.c \
vmm_mem.c \
Index: usr.sbin/bhyve/Makefile
===================================================================
--- usr.sbin/bhyve/Makefile
+++ usr.sbin/bhyve/Makefile
@@ -57,6 +57,7 @@
rfb.c \
rtc.c \
smbiostbl.c \
+ snapshot.c \
sockstream.c \
task_switch.c \
uart_emul.c \
@@ -71,7 +72,7 @@
.PATH: ${BHYVE_SYSDIR}/sys/amd64/vmm
SRCS+= vmm_instruction_emul.c
-LIBADD= vmmapi md pthread z util sbuf cam
+LIBADD= vmmapi md pthread z util sbuf cam ucl xo
.if ${MK_INET_SUPPORT} != "no"
CFLAGS+=-DINET
@@ -92,6 +93,10 @@
.ifdef GDB_LOG
CFLAGS+=-DGDB_LOG
.endif
+CFLAGS+= -I${SRCTOP}/contrib/libucl/include
+
+# Temporary disable capsicum, until we integrate checkpoint code with it.
+CFLAGS+= -DWITHOUT_CAPSICUM
WARNS?= 2
Index: usr.sbin/bhyve/Makefile.depend
===================================================================
--- usr.sbin/bhyve/Makefile.depend
+++ usr.sbin/bhyve/Makefile.depend
@@ -12,8 +12,10 @@
lib/libcompiler_rt \
lib/libmd \
lib/libthr \
+ lib/libucl \
lib/libutil \
lib/libvmmapi \
+ lib/libxo \
lib/libz \
secure/lib/libcrypto \
Index: usr.sbin/bhyve/atkbdc.h
===================================================================
--- usr.sbin/bhyve/atkbdc.h
+++ usr.sbin/bhyve/atkbdc.h
@@ -30,9 +30,12 @@
#define _ATKBDC_H_
struct atkbdc_softc;
+struct vm_snapshot_meta;
struct vmctx;
void atkbdc_init(struct vmctx *ctx);
void atkbdc_event(struct atkbdc_softc *sc, int iskbd);
+int atkbdc_snapshot(struct vm_snapshot_meta *meta);
+
#endif /* _ATKBDC_H_ */
Index: usr.sbin/bhyve/atkbdc.c
===================================================================
--- usr.sbin/bhyve/atkbdc.c
+++ usr.sbin/bhyve/atkbdc.c
@@ -33,6 +33,7 @@
#include <sys/types.h>
#include <machine/vmm.h>
+#include <machine/vmm_snapshot.h>
#include <vmmapi.h>
@@ -137,6 +138,8 @@
struct aux_dev aux;
};
+static struct atkbdc_softc *atkbdc_sc = NULL;
+
static void
atkbdc_assert_kbd_intr(struct atkbdc_softc *sc)
{
@@ -548,6 +551,43 @@
sc->ps2kbd_sc = ps2kbd_init(sc);
sc->ps2mouse_sc = ps2mouse_init(sc);
+
+ assert(atkbdc_sc == NULL);
+ atkbdc_sc = sc;
+}
+
+int
+atkbdc_snapshot(struct vm_snapshot_meta *meta)
+{
+ int ret;
+
+ SNAPSHOT_VAR_OR_LEAVE(atkbdc_sc->status, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(atkbdc_sc->outport, meta, ret, done);
+ SNAPSHOT_BUF_OR_LEAVE(atkbdc_sc->ram,
+ sizeof(atkbdc_sc->ram), meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(atkbdc_sc->curcmd, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(atkbdc_sc->ctrlbyte, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(atkbdc_sc->kbd, meta, ret, done);
+
+ SNAPSHOT_VAR_OR_LEAVE(atkbdc_sc->kbd.irq_active, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(atkbdc_sc->kbd.irq, meta, ret, done);
+ SNAPSHOT_BUF_OR_LEAVE(atkbdc_sc->kbd.buffer,
+ sizeof(atkbdc_sc->kbd.buffer), meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(atkbdc_sc->kbd.brd, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(atkbdc_sc->kbd.bwr, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(atkbdc_sc->kbd.bcnt, meta, ret, done);
+
+ SNAPSHOT_VAR_OR_LEAVE(atkbdc_sc->aux.irq_active, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(atkbdc_sc->aux.irq, meta, ret, done);
+
+ ret = ps2kbd_snapshot(atkbdc_sc->ps2kbd_sc, meta);
+ if (ret != 0)
+ goto done;
+
+ ret = ps2mouse_snapshot(atkbdc_sc->ps2mouse_sc, meta);
+
+done:
+ return (ret);
}
static void
Index: usr.sbin/bhyve/bhyverun.h
===================================================================
--- usr.sbin/bhyve/bhyverun.h
+++ usr.sbin/bhyve/bhyverun.h
@@ -28,6 +28,8 @@
* $FreeBSD$
*/
+ #include <ucl.h>
+
#ifndef _FBSDRUN_H_
#define _FBSDRUN_H_
@@ -37,9 +39,10 @@
struct vmctx;
extern int guest_ncpus;
extern char *guest_uuid_str;
-extern char *vmname;
+extern const char *vmname;
void *paddr_guest2host(struct vmctx *ctx, uintptr_t addr, size_t len);
+uintptr_t paddr_host2guest(struct vmctx *ctx, void *addr);
void fbsdrun_set_capabilities(struct vmctx *ctx, int cpu);
void fbsdrun_addcpu(struct vmctx *ctx, int fromcpu, int newcpu, uint64_t rip);
Index: usr.sbin/bhyve/bhyverun.c
===================================================================
--- usr.sbin/bhyve/bhyverun.c
+++ usr.sbin/bhyve/bhyverun.c
@@ -36,6 +36,7 @@
#include <sys/capsicum.h>
#endif
#include <sys/mman.h>
+#include <sys/stat.h>
#include <sys/time.h>
#include <amd64/vmm/intel/vmcs.h>
@@ -81,10 +82,20 @@
#include "pci_irq.h"
#include "pci_lpc.h"
#include "smbiostbl.h"
+#include "snapshot.h"
#include "xmsr.h"
#include "spinup_ap.h"
#include "rtc.h"
+#include <sys/socket.h>
+#include <sys/un.h>
+#include <sys/types.h>
+#include <fcntl.h>
+
+#include <libxo/xo.h>
+#include <ucl.h>
+#include <unistd.h>
+
#define GUEST_NIO_PORT 0x488 /* guest upcalls via i/o port */
#define MB (1024UL * 1024)
@@ -157,10 +168,12 @@
[EXIT_REASON_XRSTORS] = "XRSTORS"
};
+#define MAX_SOCK_NAME 200
+
typedef int (*vmexit_handler_t)(struct vmctx *, struct vm_exit *, int *vcpu);
extern int vmexit_task_switch(struct vmctx *, struct vm_exit *, int *vcpu);
-char *vmname;
+const char *vmname;
int guest_ncpus;
uint16_t cores, maxcpus, sockets, threads;
@@ -223,6 +236,7 @@
" -H: vmexit from the guest on hlt\n"
" -l: LPC device configuration\n"
" -m: memory size in MB\n"
+ " -r: path to checkpoint file\n"
" -p: pin 'vcpu' to 'hostcpu'\n"
" -P: vmexit from the guest on pause\n"
" -s: <slot,driver,configinfo> PCI slot config\n"
@@ -382,6 +396,12 @@
return (vm_map_gpa(ctx, gaddr, len));
}
+uintptr_t
+paddr_host2guest(struct vmctx *ctx, void *addr)
+{
+ return (vm_rev_map_gpa(ctx, addr));
+}
+
int
fbsdrun_vmexit_on_pause(void)
{
@@ -972,17 +992,35 @@
return (ctx);
}
+void
+spinup_vcpu(struct vmctx *ctx, int vcpu)
+{
+ int error;
+ uint64_t rip;
+
+ error = vm_get_register(ctx, vcpu, VM_REG_GUEST_RIP, &rip);
+ assert(error == 0);
+
+ fbsdrun_set_capabilities(ctx, vcpu);
+ error = vm_set_capability(ctx, vcpu, VM_CAP_UNRESTRICTED_GUEST, 1);
+ assert(error == 0);
+
+ fbsdrun_addcpu(ctx, BSP, vcpu, rip);
+}
+
int
main(int argc, char *argv[])
{
int c, error, dbg_port, gdb_port, err, bvmcons;
- int max_vcpus, mptgen, memflags;
+ int max_vcpus, mptgen, memflags, vcpu;
int rtc_localtime;
bool gdb_stop;
struct vmctx *ctx;
- uint64_t rip;
size_t memsize;
- char *optstr;
+ char *optstr, *restore_file;
+ struct restore_state rstate;
+
+ restore_file = NULL;
bvmcons = 0;
progname = basename(argv[0]);
@@ -997,7 +1035,7 @@
rtc_localtime = 1;
memflags = 0;
- optstr = "abehuwxACHIPSWYp:g:G:c:s:m:l:U:";
+ optstr = "abehuwxACHIPSWYp:g:G:c:s:m:l:U:r:";
while ((c = getopt(argc, argv, optstr)) != -1) {
switch (c) {
case 'a':
@@ -1043,6 +1081,9 @@
"configuration '%s'", optarg);
}
break;
+ case 'r':
+ restore_file = optarg;
+ break;
case 's':
if (strncmp(optarg, "help", strlen(optarg)) == 0) {
pci_print_supported_devices();
@@ -1104,12 +1145,41 @@
argc -= optind;
argv += optind;
- if (argc != 1)
+ if (argc > 1 || (argc == 0 && restore_file == NULL))
usage(1);
+ if (restore_file != NULL) {
+ error = load_restore_file(restore_file, &rstate);
+ if (error) {
+ fprintf(stderr, "Failed to read checkpoint info from "
+ "file: '%s'.\n", restore_file);
+ exit(1);
+ }
+ }
+
+ if (argc == 1) {
vmname = argv[0];
+ } else {
+ vmname = lookup_vmname(&rstate);
+ if (vmname == NULL) {
+ fprintf(stderr, "Cannot find VM name in restore file. "
+ "Please specify one.\n");
+ exit(1);
+ }
+ }
ctx = do_open(vmname);
+ if (restore_file != NULL) {
+ guest_ncpus = lookup_guest_ncpus(&rstate);
+ memflags = lookup_memflags(&rstate);
+ memsize = lookup_memsize(&rstate);
+ }
+
+ if (guest_ncpus < 1) {
+ fprintf(stderr, "Invalid guest vCPUs (%d)\n", guest_ncpus);
+ exit(1);
+ }
+
max_vcpus = num_vcpus_allowed(ctx);
if (guest_ncpus > max_vcpus) {
fprintf(stderr, "%d vCPUs requested but only %d available\n",
@@ -1117,8 +1187,6 @@
exit(4);
}
- fbsdrun_set_capabilities(ctx, BSP);
-
vm_set_memflags(ctx, memflags);
err = vm_setup_memory(ctx, memsize, VM_MMAP_ALL);
if (err) {
@@ -1168,8 +1236,37 @@
assert(error == 0);
}
- error = vm_get_register(ctx, BSP, VM_REG_GUEST_RIP, &rip);
- assert(error == 0);
+ if (restore_file != NULL) {
+ fprintf(stdout, "Pausing pci devs...\r\n");
+ if (vm_pause_user_devs(ctx) != 0) {
+ fprintf(stderr, "Failed to pause PCI device state.\n");
+ exit(1);
+ }
+
+ fprintf(stdout, "Restoring vm mem...\r\n");
+ if (restore_vm_mem(ctx, &rstate) != 0) {
+ fprintf(stderr, "Failed to restore VM memory.\n");
+ exit(1);
+ }
+
+ fprintf(stdout, "Restoring pci devs...\r\n");
+ if (vm_restore_user_devs(ctx, &rstate) != 0) {
+ fprintf(stderr, "Failed to restore PCI device state.\n");
+ exit(1);
+ }
+
+ fprintf(stdout, "Restoring kernel structs...\r\n");
+ if (vm_restore_kern_structs(ctx, &rstate) != 0) {
+ fprintf(stderr, "Failed to restore kernel structs.\n");
+ exit(1);
+ }
+
+ fprintf(stdout, "Resuming pci devs...\r\n");
+ if (vm_resume_user_devs(ctx) != 0) {
+ fprintf(stderr, "Failed to resume PCI device state.\n");
+ exit(1);
+ }
+ }
/*
* build the guest tables, MP etc.
@@ -1208,10 +1305,34 @@
errx(EX_OSERR, "cap_enter() failed");
#endif
+ if (restore_file != NULL)
+ destroy_restore_state(&rstate);
+
/*
- * Add CPU 0
+ * checkpointing thread for communication with bhyvectl
+ */
+ if (init_checkpoint_thread(ctx) < 0)
+ printf("Failed to start checkpoint thread!\r\n");
+
+ /*
+ * Change the proc title to include the VM name.
*/
- fbsdrun_addcpu(ctx, BSP, BSP, rip);
+ setproctitle("%s", vmname);
+
+ if (restore_file != NULL) {
+ vm_restore_time(ctx);
+ }
+
+ /* Add CPU 0
+ * If we restore a VM, start all vCPUs now (including APs), otherwise,
+ * let the guest OS to spin them up later via vmexits.
+ */
+
+ for (vcpu = 0; vcpu < guest_ncpus; vcpu++)
+ if (vcpu == BSP || restore_file) {
+ fprintf(stdout, "spinning up vcpu no %d...\r\n", vcpu);
+ spinup_vcpu(ctx, vcpu);
+ }
/*
* Head off to the main event dispatch loop
Index: usr.sbin/bhyve/block_if.h
===================================================================
--- usr.sbin/bhyve/block_if.h
+++ usr.sbin/bhyve/block_if.h
@@ -41,6 +41,9 @@
#include <sys/uio.h>
#include <sys/unistd.h>
+struct vm_snapshot_meta;
+
+
#define BLOCKIF_IOV_MAX 33 /* not practical to be IOV_MAX */
struct blockif_req {
@@ -68,5 +71,11 @@
int blockif_delete(struct blockif_ctxt *bc, struct blockif_req *breq);
int blockif_cancel(struct blockif_ctxt *bc, struct blockif_req *breq);
int blockif_close(struct blockif_ctxt *bc);
+void blockif_pause(struct blockif_ctxt *bc);
+void blockif_resume(struct blockif_ctxt *bc);
+int blockif_snapshot_req(struct blockif_req *br,
+ struct vm_snapshot_meta *meta);
+int blockif_snapshot(struct blockif_ctxt *bc,
+ struct vm_snapshot_meta *meta);
#endif /* _BLOCK_IF_H_ */
Index: usr.sbin/bhyve/block_if.c
===================================================================
--- usr.sbin/bhyve/block_if.c
+++ usr.sbin/bhyve/block_if.c
@@ -57,6 +57,7 @@
#include <unistd.h>
#include <machine/atomic.h>
+#include <machine/vmm_snapshot.h>
#include "bhyverun.h"
#include "mevent.h"
@@ -67,6 +68,9 @@
#define BLOCKIF_NUMTHR 8
#define BLOCKIF_MAXREQ (64 + BLOCKIF_NUMTHR)
+#define NO_THREAD_IDX (-2)
+#define REQ_IDX_SEPARATOR (-1)
+
enum blockop {
BOP_READ,
BOP_WRITE,
@@ -103,9 +107,13 @@
int bc_psectsz;
int bc_psectoff;
int bc_closing;
+ int bc_paused;
+ int bc_work_count;
pthread_t bc_btid[BLOCKIF_NUMTHR];
pthread_mutex_t bc_mtx;
pthread_cond_t bc_cond;
+ pthread_cond_t bc_paused_cond;
+ pthread_cond_t bc_work_done_cond;
/* Request elements and free/pending/busy queues */
TAILQ_HEAD(, blockif_elem) bc_freeq;
@@ -208,6 +216,18 @@
TAILQ_INSERT_TAIL(&bc->bc_freeq, be, be_link);
}
+static int
+blockif_flush_bc(struct blockif_ctxt *bc)
+{
+ if (bc->bc_ischr) {
+ if (ioctl(bc->bc_fd, DIOCGFLUSH))
+ return (errno);
+ } else if (fsync(bc->bc_fd))
+ return (errno);
+
+ return (0);
+}
+
static void
blockif_proc(struct blockif_ctxt *bc, struct blockif_elem *be, uint8_t *buf)
{
@@ -298,11 +318,7 @@
}
break;
case BOP_FLUSH:
- if (bc->bc_ischr) {
- if (ioctl(bc->bc_fd, DIOCGFLUSH))
- err = errno;
- } else if (fsync(bc->bc_fd))
- err = errno;
+ err = blockif_flush_bc(bc);
break;
case BOP_DELETE:
if (!bc->bc_candelete)
@@ -346,15 +362,34 @@
pthread_mutex_lock(&bc->bc_mtx);
for (;;) {
- while (blockif_dequeue(bc, t, &be)) {
+ bc->bc_work_count++;
+
+ /* We cannot process work if the interface is paused */
+ while (!bc->bc_paused && blockif_dequeue(bc, t, &be)) {
pthread_mutex_unlock(&bc->bc_mtx);
blockif_proc(bc, be, buf);
pthread_mutex_lock(&bc->bc_mtx);
blockif_complete(bc, be);
}
- /* Check ctxt status here to see if exit requested */
+
+ bc->bc_work_count--;
+
+ /* If none of the workers is busy, notify the main thread */
+ if (bc->bc_work_count == 0)
+ pthread_cond_broadcast(&bc->bc_work_done_cond);
+
+ /*
+ * Check ctxt status here to see if exit requested
+ *
+ * No sense to wait while paused if closing anyway
+ */
if (bc->bc_closing)
break;
+
+ /* Make all worker threads wait here if the device is paused */
+ while (bc->bc_paused)
+ pthread_cond_wait(&bc->bc_paused_cond, &bc->bc_mtx);
+
pthread_cond_wait(&bc->bc_cond, &bc->bc_mtx);
}
pthread_mutex_unlock(&bc->bc_mtx);
@@ -558,6 +593,10 @@
bc->bc_psectoff = psectoff;
pthread_mutex_init(&bc->bc_mtx, NULL);
pthread_cond_init(&bc->bc_cond, NULL);
+ bc->bc_paused = 0;
+ bc->bc_work_count = 0;
+ pthread_cond_init(&bc->bc_paused_cond, NULL);
+ pthread_cond_init(&bc->bc_work_done_cond, NULL);
TAILQ_INIT(&bc->bc_freeq);
TAILQ_INIT(&bc->bc_pendq);
TAILQ_INIT(&bc->bc_busyq);
@@ -589,6 +628,7 @@
err = 0;
pthread_mutex_lock(&bc->bc_mtx);
+ /* should make thread wait if interface paused ? */
if (!TAILQ_EMPTY(&bc->bc_freeq)) {
/*
* Enqueue and inform the block i/o thread
@@ -650,6 +690,8 @@
assert(bc->bc_magic == BLOCKIF_SIG);
pthread_mutex_lock(&bc->bc_mtx);
+ /* XXX: not waiting while paused */
+
/*
* Check pending requests.
*/
@@ -848,3 +890,96 @@
assert(bc->bc_magic == BLOCKIF_SIG);
return (bc->bc_candelete);
}
+
+void
+blockif_pause(struct blockif_ctxt *bc)
+{
+ assert(bc != NULL);
+ assert(bc->bc_magic == BLOCKIF_SIG);
+
+ pthread_mutex_lock(&bc->bc_mtx);
+ bc->bc_paused = 1;
+ /* The interface is paused. Wait for workers to finish their work */
+ while (bc->bc_work_count)
+ pthread_cond_wait(&bc->bc_work_done_cond, &bc->bc_mtx);
+ pthread_mutex_unlock(&bc->bc_mtx);
+
+ if (blockif_flush_bc(bc))
+ fprintf(stderr, "%s: [WARN] failed to flush backing file.\r\n",
+ __func__);
+}
+
+void
+blockif_resume(struct blockif_ctxt *bc)
+{
+ assert(bc != NULL);
+ assert(bc->bc_magic == BLOCKIF_SIG);
+
+ pthread_mutex_lock(&bc->bc_mtx);
+ bc->bc_paused = 0;
+ /* resume the threads waiting for paused */
+ pthread_cond_broadcast(&bc->bc_paused_cond);
+ /* kick the threads after restore */
+ pthread_cond_broadcast(&bc->bc_cond);
+ pthread_mutex_unlock(&bc->bc_mtx);
+}
+
+int
+blockif_snapshot_req(struct blockif_req *br, struct vm_snapshot_meta *meta)
+{
+ int i;
+ struct iovec *iov;
+ int ret;
+
+ SNAPSHOT_VAR_OR_LEAVE(br->br_iovcnt, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(br->br_offset, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(br->br_resid, meta, ret, done);
+
+ /* XXX: The callback and parameter must be filled by the virtualized
+ * device that uses the interface, during its init; we're not touching
+ * them here
+ */
+
+ /* Snapshot the iovecs */
+ for (i = 0; i < br->br_iovcnt; i++) {
+ iov = &br->br_iov[i];
+
+ SNAPSHOT_VAR_OR_LEAVE(iov->iov_len, meta, ret, done);
+ /* we assume the iov is a guest-mapped address */
+ SNAPSHOT_GUEST2HOST_ADDR_OR_LEAVE(iov->iov_base, iov->iov_len,
+ false, meta, ret, done);
+ }
+
+done:
+ return (ret);
+}
+
+int
+blockif_snapshot(struct blockif_ctxt *bc, struct vm_snapshot_meta *meta)
+{
+ int ret;
+
+ if (bc->bc_paused == 0) {
+ fprintf(stderr, "%s: Snapshot failed: "
+ "interface not paused.\r\n", __func__);
+ return (ENXIO);
+ }
+
+ pthread_mutex_lock(&bc->bc_mtx);
+
+ SNAPSHOT_VAR_OR_LEAVE(bc->bc_magic, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(bc->bc_ischr, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(bc->bc_isgeom, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(bc->bc_candelete, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(bc->bc_rdonly, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(bc->bc_size, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(bc->bc_sectsz, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(bc->bc_psectsz, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(bc->bc_psectoff, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(bc->bc_closing, meta, ret, done);
+
+done:
+ pthread_mutex_unlock(&bc->bc_mtx);
+ return (ret);
+}
+
Index: usr.sbin/bhyve/pci_ahci.c
===================================================================
--- usr.sbin/bhyve/pci_ahci.c
+++ usr.sbin/bhyve/pci_ahci.c
@@ -41,6 +41,8 @@
#include <sys/ata.h>
#include <sys/endian.h>
+#include <machine/vmm_snapshot.h>
+
#include <errno.h>
#include <fcntl.h>
#include <stdio.h>
@@ -131,6 +133,7 @@
uint32_t done;
int slot;
int more;
+ int readop;
};
struct ahci_port {
@@ -724,6 +727,7 @@
aior->slot = slot;
aior->len = len;
aior->done = done;
+ aior->readop = readop;
breq = &aior->io_req;
breq->br_offset = lba + done;
ahci_build_iov(p, aior, prdt, hdr->prdtl);
@@ -1420,6 +1424,7 @@
aior->slot = slot;
aior->len = len;
aior->done = done;
+ aior->readop = 1;
breq = &aior->io_req;
breq->br_offset = lba + done;
ahci_build_iov(p, aior, prdt, hdr->prdtl);
@@ -2446,6 +2451,284 @@
return (pci_ahci_init(ctx, pi, opts, 1));
}
+static int
+pci_ahci_snapshot_save_queues(struct ahci_port *port,
+ struct vm_snapshot_meta *meta)
+{
+ int ret;
+ int idx;
+ struct ahci_ioreq *ioreq;
+
+ STAILQ_FOREACH(ioreq, &port->iofhd, io_flist) {
+ idx = ((void *) ioreq - (void *) port->ioreq) / sizeof(*ioreq);
+ SNAPSHOT_VAR_OR_LEAVE(idx, meta, ret, done);
+ }
+
+ idx = -1;
+ SNAPSHOT_VAR_OR_LEAVE(idx, meta, ret, done);
+
+ TAILQ_FOREACH(ioreq, &port->iobhd, io_blist) {
+ idx = ((void *) ioreq - (void *) port->ioreq) / sizeof(*ioreq);
+ SNAPSHOT_VAR_OR_LEAVE(idx, meta, ret, done);
+
+ /* snapshot only the busy requests
+ * other requests are not valid
+ */
+ ret = blockif_snapshot_req(&ioreq->io_req, meta);
+ if (ret != 0) {
+ fprintf(stderr, "%s: failed to snapshot req\r\n",
+ __func__);
+ goto done;
+ }
+ }
+
+ idx = -1;
+ SNAPSHOT_VAR_OR_LEAVE(idx, meta, ret, done);
+
+done:
+ return (ret);
+}
+
+static int
+pci_ahci_snapshot_restore_queues(struct ahci_port *port,
+ struct vm_snapshot_meta *meta)
+{
+ int ret;
+ int idx;
+ struct ahci_ioreq *ioreq;
+
+ /* empty the free queue before restoring */
+ while (!STAILQ_EMPTY(&port->iofhd))
+ STAILQ_REMOVE_HEAD(&port->iofhd, io_flist);
+
+ /* restore the free queue */
+ while (1) {
+ SNAPSHOT_VAR_OR_LEAVE(idx, meta, ret, done);
+ if (idx == -1)
+ break;
+
+ STAILQ_INSERT_TAIL(&port->iofhd, &port->ioreq[idx], io_flist);
+ }
+
+ /* restore the busy queue */
+ while (1) {
+ SNAPSHOT_VAR_OR_LEAVE(idx, meta, ret, done);
+ if (idx == -1)
+ break;
+
+ ioreq = &port->ioreq[idx];
+ TAILQ_INSERT_TAIL(&port->iobhd, ioreq, io_blist);
+
+ /* restore only the busy requests
+ * other requests are not valid
+ */
+ ret = blockif_snapshot_req(&ioreq->io_req, meta);
+ if (ret != 0) {
+ fprintf(stderr, "%s: failed to restore request\r\n",
+ __func__);
+
+ goto done;
+ }
+
+ /* re-enqueue the requests in the block interface */
+ if (ioreq->readop)
+ ret = blockif_read(port->bctx, &ioreq->io_req);
+ else
+ ret = blockif_write(port->bctx, &ioreq->io_req);
+
+ if (ret != 0) {
+ fprintf(stderr,
+ "%s: failed to re-enqueue request\r\n",
+ __func__);
+
+ goto done;
+ }
+ }
+
+done:
+ return (ret);
+}
+
+static int
+pci_ahci_snapshot(struct vm_snapshot_meta *meta)
+{
+ int i, j, ret;
+ void *bctx;
+ struct pci_devinst *pi;
+ struct pci_ahci_softc *sc;
+ struct ahci_port *port;
+ struct ahci_cmd_hdr *hdr;
+ struct ahci_ioreq *ioreq;
+
+ pi = meta->dev_data;
+ sc = pi->pi_arg;
+
+ /* TODO: add mtx lock/unlock */
+
+ SNAPSHOT_VAR_OR_LEAVE(sc->ports, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->cap, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->ghc, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->is, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->pi, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->vs, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->ccc_ctl, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->ccc_pts, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->em_loc, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->em_ctl, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->cap2, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->bohc, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->lintr, meta, ret, done);
+
+ for (i = 0; i < MAX_PORTS; i++) {
+ port = &sc->port[i];
+
+ if (meta->op == VM_SNAPSHOT_SAVE)
+ bctx = port->bctx;
+
+ SNAPSHOT_VAR_OR_LEAVE(bctx, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(port->port, meta, ret, done);
+
+ /* mostly for restore; save is ensured by the lines above */
+ if (((bctx == NULL) && (port->bctx != NULL)) ||
+ ((bctx != NULL) && (port->bctx == NULL))) {
+ fprintf(stderr, "%s: ports not matching\r\n", __func__);
+
+ ret = EINVAL;
+ goto done;
+ }
+
+ if (port->bctx == NULL)
+ continue;
+
+ if (port->port != i) {
+ fprintf(stderr, "%s: ports not matching: "
+ "actual: %d expected: %d\r\n",
+ __func__, port->port, i);
+
+ ret = EINVAL;
+ goto done;
+ }
+
+ SNAPSHOT_GUEST2HOST_ADDR_OR_LEAVE(port->cmd_lst,
+ AHCI_CL_SIZE * AHCI_MAX_SLOTS, false, meta, ret, done);
+ SNAPSHOT_GUEST2HOST_ADDR_OR_LEAVE(port->rfis, 256, false, meta,
+ ret, done);
+
+ SNAPSHOT_VAR_OR_LEAVE(port->ident, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(port->atapi, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(port->reset, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(port->waitforclear, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(port->mult_sectors, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(port->xfermode, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(port->err_cfis, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(port->sense_key, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(port->asc, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(port->ccs, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(port->pending, meta, ret, done);
+
+ SNAPSHOT_VAR_OR_LEAVE(port->clb, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(port->clbu, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(port->fb, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(port->fbu, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(port->is, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(port->ie, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(port->cmd, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(port->unused0, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(port->tfd, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(port->sig, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(port->ssts, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(port->sctl, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(port->serr, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(port->sact, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(port->ci, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(port->sntf, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(port->fbs, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(port->ioqsz, meta, ret, done);
+
+ for (j = 0; j < port->ioqsz; j++) {
+ ioreq = &port->ioreq[j];
+
+ /* blockif_req snapshot done only for busy requests */
+ hdr = (struct ahci_cmd_hdr *)(port->cmd_lst + j * AHCI_CL_SIZE);
+ SNAPSHOT_GUEST2HOST_ADDR_OR_LEAVE(ioreq->cfis,
+ 0x80 + hdr->prdtl * sizeof(struct ahci_prdt_entry),
+ false, meta, ret, done);
+
+ SNAPSHOT_VAR_OR_LEAVE(ioreq->len, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(ioreq->done, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(ioreq->slot, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(ioreq->more, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(ioreq->readop, meta, ret, done);
+ }
+
+ /* Perform save / restore specific operations */
+ if (meta->op == VM_SNAPSHOT_SAVE) {
+ ret = pci_ahci_snapshot_save_queues(port, meta);
+ if (ret != 0)
+ goto done;
+ } else if (meta->op == VM_SNAPSHOT_RESTORE) {
+ ret = pci_ahci_snapshot_restore_queues(port, meta);
+ if (ret != 0)
+ goto done;
+ } else {
+ /* error */
+ ret = EINVAL;
+ goto done;
+ }
+
+ ret = blockif_snapshot(port->bctx, meta);
+ if (ret != 0) {
+ fprintf(stderr, "%s: failed to restore blockif\r\n",
+ __func__);
+
+ goto done;
+ }
+ }
+
+done:
+ return (ret);
+}
+
+static int
+pci_ahci_pause(struct vmctx *ctx, struct pci_devinst *pi)
+{
+ struct pci_ahci_softc *sc;
+ struct blockif_ctxt *bctxt;
+ int i;
+
+ sc = pi->pi_arg;
+
+ for (i = 0; i < MAX_PORTS; i++) {
+ bctxt = sc->port[i].bctx;
+ if (bctxt == NULL)
+ continue;
+
+ blockif_pause(bctxt);
+ }
+
+ return (0);
+}
+
+static int
+pci_ahci_resume(struct vmctx *ctx, struct pci_devinst *pi)
+{
+ struct pci_ahci_softc *sc;
+ struct blockif_ctxt *bctxt;
+ int i;
+
+ sc = pi->pi_arg;
+
+ for (i = 0; i < MAX_PORTS; i++) {
+ bctxt = sc->port[i].bctx;
+ if (bctxt == NULL)
+ continue;
+
+ blockif_resume(bctxt);
+ }
+
+ return (0);
+}
+
/*
* Use separate emulation names to distinguish drive and atapi devices
*/
@@ -2453,7 +2736,10 @@
.pe_emu = "ahci",
.pe_init = pci_ahci_hd_init,
.pe_barwrite = pci_ahci_write,
- .pe_barread = pci_ahci_read
+ .pe_barread = pci_ahci_read,
+ .pe_snapshot = pci_ahci_snapshot,
+ .pe_pause = pci_ahci_pause,
+ .pe_resume = pci_ahci_resume,
};
PCI_EMUL_SET(pci_de_ahci);
@@ -2461,7 +2747,10 @@
.pe_emu = "ahci-hd",
.pe_init = pci_ahci_hd_init,
.pe_barwrite = pci_ahci_write,
- .pe_barread = pci_ahci_read
+ .pe_barread = pci_ahci_read,
+ .pe_snapshot = pci_ahci_snapshot,
+ .pe_pause = pci_ahci_pause,
+ .pe_resume = pci_ahci_resume,
};
PCI_EMUL_SET(pci_de_ahci_hd);
@@ -2469,6 +2758,9 @@
.pe_emu = "ahci-cd",
.pe_init = pci_ahci_atapi_init,
.pe_barwrite = pci_ahci_write,
- .pe_barread = pci_ahci_read
+ .pe_barread = pci_ahci_read,
+ .pe_snapshot = pci_ahci_snapshot,
+ .pe_pause = pci_ahci_pause,
+ .pe_resume = pci_ahci_resume,
};
PCI_EMUL_SET(pci_de_ahci_cd);
Index: usr.sbin/bhyve/pci_e82545.c
===================================================================
--- usr.sbin/bhyve/pci_e82545.c
+++ usr.sbin/bhyve/pci_e82545.c
@@ -46,6 +46,8 @@
#ifndef WITHOUT_CAPSICUM
#include <capsicum_helpers.h>
#endif
+#include <machine/vmm_snapshot.h>
+
#include <err.h>
#include <errno.h>
#include <fcntl.h>
@@ -2381,11 +2383,158 @@
return (0);
}
+static int
+e82545_snapshot(struct vm_snapshot_meta *meta)
+{
+ int i;
+ int ret;
+ struct e82545_softc *sc;
+ struct pci_devinst *pi;
+ uint64_t bitmap_value;
+
+ pi = meta->dev_data;
+ sc = pi->pi_arg;
+
+ /* esc_mevp and esc_mevpitr should be reinitiated at init */
+ SNAPSHOT_VAR_OR_LEAVE(sc->esc_mac, meta, ret, done);
+
+ /* General */
+ SNAPSHOT_VAR_OR_LEAVE(sc->esc_CTRL, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->esc_FCAL, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->esc_FCAH, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->esc_FCT, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->esc_VET, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->esc_FCTTV, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->esc_LEDCTL, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->esc_PBA, meta, ret, done);
+
+ /* Interrupt control */
+ SNAPSHOT_VAR_OR_LEAVE(sc->esc_irq_asserted, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->esc_ICR, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->esc_ITR, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->esc_ICS, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->esc_IMS, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->esc_IMC, meta, ret, done);
+
+ /* Transmit */
+ /* The fields in the unions are in superposition to access certain
+ * bytes in the larger uint variables
+ * e.g., ip_config = [ipcss|ipcso|ipcse0|ipcse1]
+ */
+ SNAPSHOT_VAR_OR_LEAVE(sc->esc_txctx.lower_setup.ip_config, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->esc_txctx.upper_setup.tcp_config, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->esc_txctx.cmd_and_length, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->esc_txctx.tcp_seg_setup.data, meta, ret, done);
+
+ SNAPSHOT_VAR_OR_LEAVE(sc->esc_tx_enabled, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->esc_tx_active, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->esc_TXCW, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->esc_TCTL, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->esc_TIPG, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->esc_AIT, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->esc_tdba, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->esc_TDBAL, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->esc_TDBAH, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->esc_TDLEN, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->esc_TDH, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->esc_TDHr, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->esc_TDT, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->esc_TIDV, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->esc_TXDCTL, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->esc_TADV, meta, ret, done);
+
+ /* Has dependency on esc_TDLEN; reoreder of fields from struct */
+ SNAPSHOT_GUEST2HOST_ADDR_OR_LEAVE(sc->esc_txdesc, sc->esc_TDLEN,
+ true, meta, ret, done);
+
+ /* L2 frame acceptance */
+ for (i = 0; i < nitems(sc->esc_uni); i++) {
+ SNAPSHOT_VAR_OR_LEAVE(sc->esc_uni[i].eu_valid, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->esc_uni[i].eu_addrsel, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->esc_uni[i].eu_eth, meta, ret, done);
+ }
+
+ SNAPSHOT_BUF_OR_LEAVE(sc->esc_fmcast, sizeof(sc->esc_fmcast),
+ meta, ret, done);
+ SNAPSHOT_BUF_OR_LEAVE(sc->esc_fvlan, sizeof(sc->esc_fvlan),
+ meta, ret, done);
+
+ /* Receive */
+ SNAPSHOT_VAR_OR_LEAVE(sc->esc_rx_enabled, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->esc_rx_active, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->esc_rx_loopback, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->esc_RCTL, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->esc_FCRTL, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->esc_FCRTH, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->esc_rdba, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->esc_RDBAL, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->esc_RDBAH, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->esc_RDLEN, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->esc_RDH, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->esc_RDT, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->esc_RDTR, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->esc_RXDCTL, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->esc_RADV, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->esc_RSRPD, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->esc_RXCSUM, meta, ret, done);
+
+ /* Has dependency on esc_RDLEN; reoreder of fields from struct */
+ SNAPSHOT_GUEST2HOST_ADDR_OR_LEAVE(sc->esc_rxdesc, sc->esc_TDLEN,
+ true, meta, ret, done);
+
+ /* IO Port register access */
+ SNAPSHOT_VAR_OR_LEAVE(sc->io_addr, meta, ret, done);
+ /* Shadow copy of MDIC */
+ SNAPSHOT_VAR_OR_LEAVE(sc->mdi_control, meta, ret, done);
+ /* Shadow copy of EECD */
+ SNAPSHOT_VAR_OR_LEAVE(sc->eeprom_control, meta, ret, done);
+ /* Latest NVM in/out */
+ SNAPSHOT_VAR_OR_LEAVE(sc->nvm_data, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->nvm_opaddr, meta, ret, done);
+ /* stats */
+ SNAPSHOT_VAR_OR_LEAVE(sc->missed_pkt_count, meta, ret, done);
+ SNAPSHOT_BUF_OR_LEAVE(sc->pkt_rx_by_size, sizeof(sc->pkt_rx_by_size),
+ meta, ret, done);
+ SNAPSHOT_BUF_OR_LEAVE(sc->pkt_tx_by_size, sizeof(sc->pkt_tx_by_size),
+ meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->good_pkt_rx_count, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->bcast_pkt_rx_count, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->mcast_pkt_rx_count, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->good_pkt_tx_count, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->bcast_pkt_tx_count, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->mcast_pkt_tx_count, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->oversize_rx_count, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->tso_tx_count, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->good_octets_rx, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->good_octets_tx, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->missed_octets, meta, ret, done);
+
+ if (meta->op == VM_SNAPSHOT_SAVE)
+ bitmap_value = sc->nvm_bits;
+ SNAPSHOT_VAR_OR_LEAVE(bitmap_value, meta, ret, done);
+ if (meta->op == VM_SNAPSHOT_RESTORE)
+ sc->nvm_bits = bitmap_value;
+
+ if (meta->op == VM_SNAPSHOT_SAVE)
+ bitmap_value = sc->nvm_bits;
+ SNAPSHOT_VAR_OR_LEAVE(bitmap_value, meta, ret, done);
+ if (meta->op == VM_SNAPSHOT_RESTORE)
+ sc->nvm_bits = bitmap_value;
+
+ /* EEPROM data */
+ SNAPSHOT_BUF_OR_LEAVE(sc->eeprom_data, sizeof(sc->eeprom_data),
+ meta, ret, done);
+
+done:
+ return (ret);
+}
+
struct pci_devemu pci_de_e82545 = {
.pe_emu = "e1000",
.pe_init = e82545_init,
.pe_barwrite = e82545_write,
- .pe_barread = e82545_read
+ .pe_barread = e82545_read,
+ .pe_snapshot = e82545_snapshot,
};
PCI_EMUL_SET(pci_de_e82545);
Index: usr.sbin/bhyve/pci_emul.h
===================================================================
--- usr.sbin/bhyve/pci_emul.h
+++ usr.sbin/bhyve/pci_emul.h
@@ -45,6 +45,7 @@
struct vmctx;
struct pci_devinst;
struct memory_region;
+struct vm_snapshot_meta;
struct pci_devemu {
char *pe_emu; /* Name of device emulation */
@@ -71,6 +72,11 @@
uint64_t (*pe_barread)(struct vmctx *ctx, int vcpu,
struct pci_devinst *pi, int baridx,
uint64_t offset, int size);
+
+ /* Save/restore device state */
+ int (*pe_snapshot)(struct vm_snapshot_meta *meta);
+ int (*pe_pause)(struct vmctx *ctx, struct pci_devinst *pi);
+ int (*pe_resume)(struct vmctx *ctx, struct pci_devinst *pi);
};
#define PCI_EMUL_SET(x) DATA_SET(pci_devemu_set, x);
@@ -245,6 +251,9 @@
void pci_write_dsdt(void);
uint64_t pci_ecfg_base(void);
int pci_bus_configured(int bus);
+int pci_snapshot(struct vm_snapshot_meta *meta);
+int pci_pause(struct vmctx *ctx, const char *dev_name);
+int pci_resume(struct vmctx *ctx, const char *dev_name);
static __inline void
pci_set_cfgdata8(struct pci_devinst *pi, int offset, uint8_t val)
Index: usr.sbin/bhyve/pci_emul.c
===================================================================
--- usr.sbin/bhyve/pci_emul.c
+++ usr.sbin/bhyve/pci_emul.c
@@ -45,6 +45,7 @@
#include <stdbool.h>
#include <machine/vmm.h>
+#include <machine/vmm_snapshot.h>
#include <vmmapi.h>
#include "acpi.h"
@@ -1936,6 +1937,187 @@
INOUT_PORT(pci_cfgdata, CONF1_DATA_PORT+2, IOPORT_F_INOUT, pci_emul_cfgdata);
INOUT_PORT(pci_cfgdata, CONF1_DATA_PORT+3, IOPORT_F_INOUT, pci_emul_cfgdata);
+/*
+ * Saves/restores PCI device emulated state. Returns 0 on success.
+ */
+static int
+pci_snapshot_pci_dev(struct vm_snapshot_meta *meta)
+{
+ struct pci_devinst *pi;
+ int i;
+ int ret;
+
+ pi = meta->dev_data;
+
+ SNAPSHOT_VAR_OR_LEAVE(pi->pi_msi.enabled, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(pi->pi_msi.addr, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(pi->pi_msi.msg_data, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(pi->pi_msi.maxmsgnum, meta, ret, done);
+
+ SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.enabled, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.table_bar, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.pba_bar, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.table_offset, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.table_count, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.pba_offset, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.pba_size, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.function_mask, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.pba_page_offset, meta, ret, done);
+
+ SNAPSHOT_BUF_OR_LEAVE(pi->pi_cfgdata, sizeof(pi->pi_cfgdata),
+ meta, ret, done);
+
+ for (i = 0; i < nitems(pi->pi_bar); i++) {
+ SNAPSHOT_VAR_OR_LEAVE(pi->pi_bar[i].type, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(pi->pi_bar[i].size, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(pi->pi_bar[i].addr, meta, ret, done);
+ }
+
+ /* Restore MSI-X table. */
+ for (i = 0; i < pi->pi_msix.table_count; i++) {
+ SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.table[i].addr,
+ meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.table[i].msg_data,
+ meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.table[i].vector_control,
+ meta, ret, done);
+ }
+
+done:
+ return (ret);
+}
+
+static int
+pci_find_slotted_dev(const char *dev_name, struct pci_devemu **pde,
+ struct pci_devinst **pdi)
+{
+ struct businfo *bi;
+ struct slotinfo *si;
+ struct funcinfo *fi;
+ int bus, slot, func;
+
+ assert(dev_name != NULL);
+ assert(pde != NULL);
+ assert(pdi != NULL);
+
+ for (bus = 0; bus < MAXBUSES; bus++) {
+ if ((bi = pci_businfo[bus]) == NULL)
+ continue;
+
+ for (slot = 0; slot < MAXSLOTS; slot++) {
+ si = &bi->slotinfo[slot];
+ for (func = 0; func < MAXFUNCS; func++) {
+ fi = &si->si_funcs[func];
+ if (fi->fi_name == NULL)
+ continue;
+ if (strcmp(dev_name, fi->fi_name))
+ continue;
+
+ *pde = pci_emul_finddev(fi->fi_name);
+ assert(*pde != NULL);
+
+ *pdi = fi->fi_devi;
+ return (0);
+ }
+ }
+ }
+
+ return (EINVAL);
+}
+
+int
+pci_snapshot(struct vm_snapshot_meta *meta)
+{
+ struct pci_devemu *pde;
+ struct pci_devinst *pdi;
+ int ret;
+
+ assert(meta->dev_name != NULL);
+
+ ret = pci_find_slotted_dev(meta->dev_name, &pde, &pdi);
+ if (ret != 0) {
+ fprintf(stderr, "%s: no such name: %s\r\n",
+ __func__, meta->dev_name);
+ memset(meta->buffer.buf_start, 0, meta->buffer.buf_size);
+ return (0);
+ }
+
+ meta->dev_data = pdi;
+
+ if (pde->pe_snapshot == NULL) {
+ fprintf(stderr, "%s: not implemented yet for: %s\r\n",
+ __func__, meta->dev_name);
+ return (-1);
+ }
+
+ ret = pci_snapshot_pci_dev(meta);
+ if (ret != 0) {
+ fprintf(stderr, "%s: failed to snapshot pci dev\r\n",
+ __func__);
+ return (-1);
+ }
+
+ ret = (*pde->pe_snapshot)(meta);
+
+ return (ret);
+}
+
+int
+pci_pause(struct vmctx *ctx, const char *dev_name)
+{
+ struct pci_devemu *pde;
+ struct pci_devinst *pdi;
+ int ret;
+
+ assert(dev_name != NULL);
+
+ ret = pci_find_slotted_dev(dev_name, &pde, &pdi);
+ if (ret != 0) {
+ /* it is possible to call this function without checking that
+ * the device is inserted first
+ */
+ fprintf(stderr, "%s: no such name: %s\n", __func__, dev_name);
+ return (0);
+ }
+
+ if (pde->pe_pause == NULL) {
+ /* The pause/resume functionality is optional */
+ fprintf(stderr, "%s: not implemented for: %s\n",
+ __func__, dev_name);
+ return (0);
+ }
+
+ return (*pde->pe_pause)(ctx, pdi);
+}
+
+int
+pci_resume(struct vmctx *ctx, const char *dev_name)
+{
+ struct pci_devemu *pde;
+ struct pci_devinst *pdi;
+ int ret;
+
+ assert(dev_name != NULL);
+
+ ret = pci_find_slotted_dev(dev_name, &pde, &pdi);
+ if (ret != 0) {
+ /* it is possible to call this function without checking that
+ * the device is inserted first
+ */
+ fprintf(stderr, "%s: no such name: %s\n", __func__, dev_name);
+ return (0);
+ }
+
+ if (pde->pe_resume == NULL) {
+ /* The pause/resume functionality is optional */
+ fprintf(stderr, "%s: not implemented for: %s\n",
+ __func__, dev_name);
+ return (0);
+ }
+
+ return (*pde->pe_resume)(ctx, pdi);
+}
+
#define PCI_EMUL_TEST
#ifdef PCI_EMUL_TEST
/*
@@ -2105,11 +2287,18 @@
return (value);
}
+int
+pci_emul_snapshot(struct vm_snapshot_meta *meta)
+{
+ return (0);
+}
+
struct pci_devemu pci_dummy = {
.pe_emu = "dummy",
.pe_init = pci_emul_dinit,
.pe_barwrite = pci_emul_diow,
- .pe_barread = pci_emul_dior
+ .pe_barread = pci_emul_dior,
+ .pe_snapshot = pci_emul_snapshot,
};
PCI_EMUL_SET(pci_dummy);
Index: usr.sbin/bhyve/pci_fbuf.c
===================================================================
--- usr.sbin/bhyve/pci_fbuf.c
+++ usr.sbin/bhyve/pci_fbuf.c
@@ -35,6 +35,7 @@
#include <sys/mman.h>
#include <machine/vmm.h>
+#include <machine/vmm_snapshot.h>
#include <vmmapi.h>
#include <stdio.h>
@@ -440,10 +441,22 @@
return (error);
}
+static int
+pci_fbuf_snapshot(struct vm_snapshot_meta *meta)
+{
+ int ret;
+
+ SNAPSHOT_BUF_OR_LEAVE(fbuf_sc->fb_base, FB_SIZE, meta, ret, err);
+
+err:
+ return (ret);
+}
+
struct pci_devemu pci_fbuf = {
.pe_emu = "fbuf",
.pe_init = pci_fbuf_init,
.pe_barwrite = pci_fbuf_write,
- .pe_barread = pci_fbuf_read
+ .pe_barread = pci_fbuf_read,
+ .pe_snapshot = pci_fbuf_snapshot,
};
PCI_EMUL_SET(pci_fbuf);
Index: usr.sbin/bhyve/pci_lpc.c
===================================================================
--- usr.sbin/bhyve/pci_lpc.c
+++ usr.sbin/bhyve/pci_lpc.c
@@ -34,6 +34,7 @@
#include <sys/types.h>
#include <machine/vmm.h>
+#include <machine/vmm_snapshot.h>
#include <stdio.h>
#include <stdlib.h>
@@ -451,12 +452,31 @@
pci_set_cfgdata8(lpc_bridge, 0x68 + pin, pirq_read(pin + 5));
}
+static int
+pci_lpc_snapshot(struct vm_snapshot_meta *meta)
+{
+ int unit, ret;
+ struct uart_softc *sc;
+
+ for (unit = 0; unit < LPC_UART_NUM; unit++) {
+ sc = lpc_uart_softc[unit].uart_softc;
+
+ ret = uart_snapshot(sc, meta);
+ if (ret != 0)
+ goto done;
+ }
+
+done:
+ return (ret);
+}
+
struct pci_devemu pci_de_lpc = {
.pe_emu = "lpc",
.pe_init = pci_lpc_init,
.pe_write_dsdt = pci_lpc_write_dsdt,
.pe_cfgwrite = pci_lpc_cfgwrite,
.pe_barwrite = pci_lpc_write,
- .pe_barread = pci_lpc_read
+ .pe_barread = pci_lpc_read,
+ .pe_snapshot = pci_lpc_snapshot,
};
PCI_EMUL_SET(pci_de_lpc);
Index: usr.sbin/bhyve/pci_virtio_block.c
===================================================================
--- usr.sbin/bhyve/pci_virtio_block.c
+++ usr.sbin/bhyve/pci_virtio_block.c
@@ -38,6 +38,8 @@
#include <sys/ioctl.h>
#include <sys/disk.h>
+#include <machine/vmm_snapshot.h>
+
#include <errno.h>
#include <fcntl.h>
#include <stdio.h>
@@ -144,6 +146,9 @@
};
static void pci_vtblk_reset(void *);
+static void pci_vtblk_pause(void *);
+static void pci_vtblk_resume(void *);
+static int pci_vtblk_snapshot(void *, struct vm_snapshot_meta *);
static void pci_vtblk_notify(void *, struct vqueue_info *);
static int pci_vtblk_cfgread(void *, int, int, uint32_t *);
static int pci_vtblk_cfgwrite(void *, int, int, uint32_t);
@@ -158,6 +163,9 @@
pci_vtblk_cfgwrite, /* write PCI config */
NULL, /* apply negotiated features */
VTBLK_S_HOSTCAPS, /* our capabilities */
+ pci_vtblk_pause, /* pause blockif threads */
+ pci_vtblk_resume, /* resume blockif threads */
+ pci_vtblk_snapshot, /* save / restore device state */
};
static void
@@ -169,6 +177,38 @@
vi_reset_dev(&sc->vbsc_vs);
}
+static void
+pci_vtblk_pause(void *vsc)
+{
+ struct pci_vtblk_softc *sc = vsc;
+
+ DPRINTF(("vtblk: device pause requested !\n"));
+ blockif_pause(sc->bc);
+}
+
+static void
+pci_vtblk_resume(void *vsc)
+{
+ struct pci_vtblk_softc *sc = vsc;
+
+ DPRINTF(("vtblk: device resume requested !\n"));
+ blockif_resume(sc->bc);
+}
+
+static int
+pci_vtblk_snapshot(void *vsc, struct vm_snapshot_meta *meta)
+{
+ int ret;
+ struct pci_vtblk_softc *sc = vsc;
+
+ SNAPSHOT_VAR_OR_LEAVE(sc->vbsc_cfg, meta, ret, done);
+ SNAPSHOT_BUF_OR_LEAVE(sc->vbsc_ident, sizeof(sc->vbsc_ident),
+ meta, ret, done);
+
+done:
+ return (ret);
+}
+
static void
pci_vtblk_done(struct blockif_req *br, int err)
{
@@ -408,6 +448,7 @@
.pe_emu = "virtio-blk",
.pe_init = pci_vtblk_init,
.pe_barwrite = vi_pci_write,
- .pe_barread = vi_pci_read
+ .pe_barread = vi_pci_read,
+ .pe_snapshot = vi_pci_snapshot,
};
PCI_EMUL_SET(pci_de_vblk);
Index: usr.sbin/bhyve/pci_virtio_net.c
===================================================================
--- usr.sbin/bhyve/pci_virtio_net.c
+++ usr.sbin/bhyve/pci_virtio_net.c
@@ -49,6 +49,8 @@
#ifndef WITHOUT_CAPSICUM
#include <capsicum_helpers.h>
#endif
+#include <machine/vmm_snapshot.h>
+
#include <err.h>
#include <errno.h>
#include <fcntl.h>
@@ -172,6 +174,9 @@
};
static void pci_vtnet_reset(void *);
+static void pci_vtnet_pause(void *);
+static void pci_vtnet_resume(void *);
+static int pci_vtnet_snapshot(void *, struct vm_snapshot_meta *);
/* static void pci_vtnet_notify(void *, struct vqueue_info *); */
static int pci_vtnet_cfgread(void *, int, int, uint32_t *);
static int pci_vtnet_cfgwrite(void *, int, int, uint32_t);
@@ -187,6 +192,9 @@
pci_vtnet_cfgwrite, /* write PCI config */
pci_vtnet_neg_features, /* apply negotiated features */
VTNET_S_HOSTCAPS, /* our capabilities */
+ pci_vtnet_pause, /* pause rx/tx threads */
+ pci_vtnet_resume, /* resume rx/tx threads */
+ pci_vtnet_snapshot, /* save / restore device state */
};
/*
@@ -247,6 +255,65 @@
sc->resetting = 0;
}
+static void
+pci_vtnet_pause(void *vsc)
+{
+ struct pci_vtnet_softc *sc = vsc;
+
+ DPRINTF(("vtnet: device pause requested !\n"));
+
+ pthread_mutex_lock(&sc->tx_mtx);
+ pthread_mutex_lock(&sc->rx_mtx);
+ sc->resetting = 1;
+ pthread_mutex_unlock(&sc->rx_mtx);
+ pthread_mutex_unlock(&sc->tx_mtx);
+
+ /*
+ * Wait for the transmit and receive threads to finish their
+ * processing.
+ */
+ pci_vtnet_txwait(sc);
+ pci_vtnet_rxwait(sc);
+}
+
+static void
+pci_vtnet_resume(void *vsc)
+{
+ struct pci_vtnet_softc *sc = vsc;
+
+ DPRINTF(("vtnet: device resume requested !\n"));
+
+ pthread_mutex_lock(&sc->tx_mtx);
+ pthread_mutex_lock(&sc->rx_mtx);
+ sc->resetting = 0;
+ pthread_mutex_unlock(&sc->rx_mtx);
+ pthread_mutex_unlock(&sc->tx_mtx);
+}
+
+static int
+pci_vtnet_snapshot(void *vsc, struct vm_snapshot_meta *meta)
+{
+ int ret;
+ struct pci_vtnet_softc *sc = vsc;
+
+ DPRINTF(("vtnet: device snapshot requested !\n"));
+
+ /*
+ * Queues and consts should have been saved by the more generic
+ * vi_pci_snapshot function. We need to save only our features and
+ * config.
+ */
+
+ SNAPSHOT_VAR_OR_LEAVE(sc->vsc_features, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->vsc_config, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->rx_vhdrlen, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->rx_merge, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->vsc_rx_ready, meta, ret, done);
+
+done:
+ return (ret);
+}
+
/*
* Called to send a buffer chain out to the tap device
*/
@@ -990,6 +1057,7 @@
.pe_emu = "virtio-net",
.pe_init = pci_vtnet_init,
.pe_barwrite = vi_pci_write,
- .pe_barread = vi_pci_read
+ .pe_barread = vi_pci_read,
+ .pe_snapshot = vi_pci_snapshot,
};
PCI_EMUL_SET(pci_de_vnet);
Index: usr.sbin/bhyve/pci_xhci.c
===================================================================
--- usr.sbin/bhyve/pci_xhci.c
+++ usr.sbin/bhyve/pci_xhci.c
@@ -48,6 +48,8 @@
#include <pthread.h>
#include <unistd.h>
+#include <machine/vmm_snapshot.h>
+
#include <dev/usb/usbdi.h>
#include <dev/usb/usb.h>
#include <dev/usb/usb_freebsd.h>
@@ -150,6 +152,8 @@
#define FIELD_COPY(a,b,m,s) (((a) & ~((m) << (s))) | \
(((b) & ((m) << (s)))))
+#define SNAP_DEV_NAME_LEN 128
+
struct pci_xhci_trb_ring {
uint64_t ringaddr; /* current dequeue guest address */
uint32_t ccs; /* consumer cycle state */
@@ -285,9 +289,10 @@
#define XHCI_HALTED(sc) ((sc)->opregs.usbsts & XHCI_STS_HCH)
+#define XHCI_GADDR_SIZE(a) (XHCI_PADDR_SZ - \
+ (((uint64_t) (a)) & (XHCI_PADDR_SZ - 1)))
#define XHCI_GADDR(sc,a) paddr_guest2host((sc)->xsc_pi->pi_vmctx, \
- (a), \
- XHCI_PADDR_SZ - ((a) & (XHCI_PADDR_SZ-1)))
+ (a), XHCI_GADDR_SIZE(a))
static int xhci_in_use;
@@ -2827,12 +2832,261 @@
return (error);
}
+static void
+pci_xhci_map_devs_slots(struct pci_xhci_softc *sc, int maps[])
+{
+ int i, j;
+ struct pci_xhci_dev_emu *dev, *slot;
+
+ memset(maps, 0, sizeof(maps[0]) * XHCI_MAX_SLOTS);
+
+ for (i = 1; i <= XHCI_MAX_SLOTS; i++) {
+ for (j = 1; j <= XHCI_MAX_DEVS; j++) {
+ slot = XHCI_SLOTDEV_PTR(sc, i);
+ dev = XHCI_DEVINST_PTR(sc, j);
+
+ if (slot == dev)
+ maps[i] = j;
+ }
+ }
+}
+static int
+pci_xhci_snapshot_ep(struct pci_xhci_softc *sc, struct pci_xhci_dev_emu *dev,
+ int idx, struct vm_snapshot_meta *meta)
+{
+ int k;
+ int ret;
+ struct usb_data_xfer *xfer;
+ struct usb_data_xfer_block *xfer_block;
+
+ /* some sanity checks */
+ if (meta->op == VM_SNAPSHOT_SAVE)
+ xfer = dev->eps[idx].ep_xfer;
+
+ SNAPSHOT_VAR_OR_LEAVE(xfer, meta, ret, done);
+ if (xfer == NULL) {
+ ret = 0;
+ goto done;
+ }
+
+ if (meta->op == VM_SNAPSHOT_RESTORE) {
+ pci_xhci_init_ep(dev, idx);
+ xfer = dev->eps[idx].ep_xfer;
+ }
+
+ /* save / restore proper */
+ for (k = 0; k < USB_MAX_XFER_BLOCKS; k++) {
+ xfer_block = &xfer->data[k];
+
+ SNAPSHOT_GUEST2HOST_ADDR_OR_LEAVE(xfer_block->buf,
+ XHCI_GADDR_SIZE(xfer_block->buf), true, meta, ret,
+ done);
+ SNAPSHOT_VAR_OR_LEAVE(xfer_block->blen, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(xfer_block->bdone, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(xfer_block->processed, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(xfer_block->hci_data, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(xfer_block->ccs, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(xfer_block->streamid, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(xfer_block->trbnext, meta, ret, done);
+ }
+
+ SNAPSHOT_VAR_OR_LEAVE(xfer->ureq, meta, ret, done);
+ if (xfer->ureq) {
+ /* xfer->ureq is not allocated at restore time */
+ if (meta->op == VM_SNAPSHOT_RESTORE)
+ xfer->ureq = malloc(sizeof(struct usb_device_request));
+
+ SNAPSHOT_BUF_OR_LEAVE(xfer->ureq,
+ sizeof(struct usb_device_request),
+ meta, ret, done);
+ }
+
+ SNAPSHOT_VAR_OR_LEAVE(xfer->ndata, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(xfer->head, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(xfer->tail, meta, ret, done);
+
+done:
+ return (ret);
+}
+
+static int
+pci_xhci_snapshot(struct vm_snapshot_meta *meta)
+{
+ int i, j;
+ int ret;
+ int restore_idx;
+ struct pci_devinst *pi;
+ struct pci_xhci_softc *sc;
+ struct pci_xhci_portregs *port;
+ struct pci_xhci_dev_emu *dev;
+ char dname[SNAP_DEV_NAME_LEN];
+ int maps[XHCI_MAX_SLOTS + 1];
+
+ pi = meta->dev_data;
+ sc = pi->pi_arg;
+
+ SNAPSHOT_VAR_OR_LEAVE(sc->caplength, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->hcsparams1, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->hcsparams2, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->hcsparams3, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->hccparams1, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->dboff, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->rtsoff, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->hccparams2, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->regsend, meta, ret, done);
+
+ /* opregs */
+ SNAPSHOT_VAR_OR_LEAVE(sc->opregs.usbcmd, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->opregs.usbsts, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->opregs.pgsz, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->opregs.dnctrl, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->opregs.crcr, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->opregs.dcbaap, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->opregs.config, meta, ret, done);
+
+ /* opregs.cr_p */
+ SNAPSHOT_GUEST2HOST_ADDR_OR_LEAVE(sc->opregs.cr_p,
+ XHCI_GADDR_SIZE(sc->opregs.cr_p), false, meta, ret, done);
+
+ /* opregs.dcbaa_p */
+ SNAPSHOT_GUEST2HOST_ADDR_OR_LEAVE(sc->opregs.dcbaa_p,
+ XHCI_GADDR_SIZE(sc->opregs.dcbaa_p), false, meta, ret, done);
+
+ /* rtsregs */
+ SNAPSHOT_VAR_OR_LEAVE(sc->rtsregs.mfindex, meta, ret, done);
+
+ /* rtsregs.intrreg */
+ SNAPSHOT_VAR_OR_LEAVE(sc->rtsregs.intrreg.iman, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->rtsregs.intrreg.imod, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->rtsregs.intrreg.erstsz, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->rtsregs.intrreg.rsvd, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->rtsregs.intrreg.erstba, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->rtsregs.intrreg.erdp, meta, ret, done);
+
+ /* rtsregs.erstba_p */
+ SNAPSHOT_GUEST2HOST_ADDR_OR_LEAVE(sc->rtsregs.erstba_p,
+ XHCI_GADDR_SIZE(sc->rtsregs.erstba_p), false, meta, ret, done);
+
+ /* rtsregs.erst_p */
+ SNAPSHOT_GUEST2HOST_ADDR_OR_LEAVE(sc->rtsregs.erst_p,
+ XHCI_GADDR_SIZE(sc->rtsregs.erst_p), false, meta, ret, done);
+
+ SNAPSHOT_VAR_OR_LEAVE(sc->rtsregs.er_deq_seg, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->rtsregs.er_enq_idx, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->rtsregs.er_enq_seg, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->rtsregs.er_events_cnt, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->rtsregs.event_pcs, meta, ret, done);
+
+ /* sanity checking */
+ for (i = 1; i <= XHCI_MAX_DEVS; i++) {
+ dev = XHCI_DEVINST_PTR(sc, i);
+ if (dev == NULL)
+ continue;
+
+ if (meta->op == VM_SNAPSHOT_SAVE)
+ restore_idx = i;
+ SNAPSHOT_VAR_OR_LEAVE(restore_idx, meta, ret, done);
+
+ /* check if the restored device (when restoring) is sane */
+ if (restore_idx != i) {
+ fprintf(stderr, "%s: idx not matching: actual: %d, "
+ "expected: %d\r\n", __func__, restore_idx, i);
+ ret = EINVAL;
+ goto done;
+ }
+
+ if (meta->op == VM_SNAPSHOT_SAVE) {
+ memset(dname, 0, sizeof(dname));
+ strncpy(dname, dev->dev_ue->ue_emu, sizeof(dname) - 1);
+ }
+
+ SNAPSHOT_BUF_OR_LEAVE(dname, sizeof(dname), meta, ret, done);
+
+ if (meta->op == VM_SNAPSHOT_RESTORE) {
+ dname[sizeof(dname) - 1] = '\0';
+ if (strcmp(dev->dev_ue->ue_emu, dname)) {
+ fprintf(stderr, "%s: device names mismatch: "
+ "actual: %s, expected: %s\r\n",
+ __func__, dname, dev->dev_ue->ue_emu);
+
+ ret = EINVAL;
+ goto done;
+ }
+ }
+ }
+
+ /* portregs */
+ for (i = 1; i <= XHCI_MAX_DEVS; i++) {
+ port = XHCI_PORTREG_PTR(sc, i);
+ dev = XHCI_DEVINST_PTR(sc, i);
+
+ if (dev == NULL)
+ continue;
+
+ SNAPSHOT_VAR_OR_LEAVE(port->portsc, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(port->portpmsc, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(port->portli, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(port->porthlpmc, meta, ret, done);
+ }
+
+ /* slots */
+ if (meta->op == VM_SNAPSHOT_SAVE)
+ pci_xhci_map_devs_slots(sc, maps);
+
+ for (i = 1; i <= XHCI_MAX_SLOTS; i++) {
+ SNAPSHOT_VAR_OR_LEAVE(maps[i], meta, ret, done);
+
+ if (meta->op == VM_SNAPSHOT_SAVE) {
+ dev = XHCI_SLOTDEV_PTR(sc, i);
+ } else if (meta->op == VM_SNAPSHOT_RESTORE) {
+ if (maps[i] != 0)
+ dev = XHCI_DEVINST_PTR(sc, maps[i]);
+ else
+ dev = NULL;
+
+ XHCI_SLOTDEV_PTR(sc, i) = dev;
+ } else {
+ /* error */
+ ret = EINVAL;
+ goto done;
+ }
+
+ if (dev == NULL)
+ continue;
+
+ SNAPSHOT_GUEST2HOST_ADDR_OR_LEAVE(dev->dev_ctx,
+ XHCI_GADDR_SIZE(dev->dev_ctx), false, meta, ret, done);
+
+ for (j = 1; j < XHCI_MAX_ENDPOINTS; j++) {
+ ret = pci_xhci_snapshot_ep(sc, dev, j, meta);
+ if (ret != 0)
+ goto done;
+ }
+
+ SNAPSHOT_VAR_OR_LEAVE(dev->dev_slotstate, meta, ret, done);
+
+ /* devices[i]->dev_sc */
+ dev->dev_ue->ue_snapshot(dev->dev_sc, meta);
+
+ /* devices[i]->hci */
+ SNAPSHOT_VAR_OR_LEAVE(dev->hci.hci_address, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(dev->hci.hci_port, meta, ret, done);
+ }
+
+ SNAPSHOT_VAR_OR_LEAVE(sc->ndevices, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->usb2_port_start, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->usb3_port_start, meta, ret, done);
+
+done:
+ return (ret);
+}
struct pci_devemu pci_de_xhci = {
.pe_emu = "xhci",
.pe_init = pci_xhci_init,
.pe_barwrite = pci_xhci_write,
- .pe_barread = pci_xhci_read
+ .pe_barread = pci_xhci_read,
+ .pe_snapshot = pci_xhci_snapshot,
};
PCI_EMUL_SET(pci_de_xhci);
Index: usr.sbin/bhyve/ps2kbd.h
===================================================================
--- usr.sbin/bhyve/ps2kbd.h
+++ usr.sbin/bhyve/ps2kbd.h
@@ -32,10 +32,13 @@
#define _PS2KBD_H_
struct atkbdc_softc;
+struct vm_snapshot_meta;
struct ps2kbd_softc *ps2kbd_init(struct atkbdc_softc *sc);
int ps2kbd_read(struct ps2kbd_softc *sc, uint8_t *val);
void ps2kbd_write(struct ps2kbd_softc *sc, uint8_t val);
+int ps2kbd_snapshot(struct ps2kbd_softc *sc, struct vm_snapshot_meta *meta);
+
#endif /* _PS2KBD_H_ */
Index: usr.sbin/bhyve/ps2kbd.c
===================================================================
--- usr.sbin/bhyve/ps2kbd.c
+++ usr.sbin/bhyve/ps2kbd.c
@@ -32,10 +32,13 @@
#include <sys/types.h>
+#include <machine/vmm_snapshot.h>
+
#include <assert.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
+#include <string.h>
#include <strings.h>
#include <pthread.h>
#include <pthread_np.h>
@@ -381,3 +384,14 @@
return (sc);
}
+int
+ps2kbd_snapshot(struct ps2kbd_softc *sc, struct vm_snapshot_meta *meta)
+{
+ int ret;
+
+ SNAPSHOT_VAR_OR_LEAVE(sc->enabled, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->curcmd, meta, ret, done);
+
+done:
+ return (ret);
+}
Index: usr.sbin/bhyve/ps2mouse.h
===================================================================
--- usr.sbin/bhyve/ps2mouse.h
+++ usr.sbin/bhyve/ps2mouse.h
@@ -32,6 +32,7 @@
#define _PS2MOUSE_H_
struct atkbdc_softc;
+struct vm_snapshot_meta;
struct ps2mouse_softc *ps2mouse_init(struct atkbdc_softc *sc);
@@ -40,4 +41,6 @@
void ps2mouse_toggle(struct ps2mouse_softc *sc, int enable);
int ps2mouse_fifocnt(struct ps2mouse_softc *sc);
+int ps2mouse_snapshot(struct ps2mouse_softc *sc, struct vm_snapshot_meta *meta);
+
#endif /* _PS2MOUSE_H_ */
Index: usr.sbin/bhyve/ps2mouse.c
===================================================================
--- usr.sbin/bhyve/ps2mouse.c
+++ usr.sbin/bhyve/ps2mouse.c
@@ -32,10 +32,13 @@
#include <sys/types.h>
+#include <machine/vmm_snapshot.h>
+
#include <assert.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
+#include <string.h>
#include <strings.h>
#include <pthread.h>
#include <pthread_np.h>
@@ -415,4 +418,21 @@
return (sc);
}
-
+int
+ps2mouse_snapshot(struct ps2mouse_softc *sc, struct vm_snapshot_meta *meta)
+{
+ int ret;
+
+ SNAPSHOT_VAR_OR_LEAVE(sc->status, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->resolution, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->sampling_rate, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->ctrlenable, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->curcmd, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->cur_x, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->cur_y, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->delta_x, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->delta_y, meta, ret, done);
+
+done:
+ return (ret);
+}
Index: usr.sbin/bhyve/snapshot.h
===================================================================
--- /dev/null
+++ usr.sbin/bhyve/snapshot.h
@@ -0,0 +1,68 @@
+#ifndef _BHYVE_SNAPSHOT_
+#define _BHYVE_SNAPSHOT_
+
+#include <machine/vmm_snapshot.h>
+#include <libxo/xo.h>
+#include <ucl.h>
+
+struct vmctx;
+
+struct __attribute__((packed)) restore_state {
+ int kdata_fd;
+ int vmmem_fd;
+
+ void *kdata_map;
+ size_t kdata_len;
+
+ size_t vmmem_len;
+
+ struct ucl_parser *meta_parser;
+ ucl_object_t *meta_root_obj;
+};
+
+struct checkpoint_thread_info {
+ struct vmctx *ctx;
+ int socket_fd;
+ struct sockaddr_un *addr;
+} checkpoint_info;
+
+typedef int (*vm_snapshot_dev_cb)(struct vm_snapshot_meta *);
+typedef int (*vm_pause_dev_cb) (struct vmctx *, const char *);
+typedef int (*vm_resume_dev_cb) (struct vmctx *, const char *);
+
+struct vm_snapshot_dev_info {
+ const char *dev_name; /* device name */
+ vm_snapshot_dev_cb snapshot_cb; /* callback for device snapshot */
+ vm_pause_dev_cb pause_cb; /* callback for device pause */
+ vm_resume_dev_cb resume_cb; /* callback for device resume */
+};
+
+struct vm_snapshot_kern_info {
+ const char *struct_name; /* kernel structure name*/
+ enum snapshot_req req; /* request type */
+};
+
+
+void destroy_restore_state(struct restore_state *rstate);
+
+const char * lookup_vmname(struct restore_state *rstate);
+int lookup_memflags(struct restore_state *rstate);
+size_t lookup_memsize(struct restore_state *rstate);
+int lookup_guest_ncpus(struct restore_state *rstate);
+
+
+int restore_vm_mem(struct vmctx *ctx, struct restore_state *rstate);
+int vm_restore_kern_structs(struct vmctx *ctx, struct restore_state *rstate);
+
+int vm_restore_user_devs(struct vmctx *ctx, struct restore_state *rstate);
+int vm_pause_user_devs(struct vmctx *ctx);
+int vm_resume_user_devs(struct vmctx *ctx);
+
+int get_checkpoint_msg(int conn_fd, struct vmctx *ctx);
+void *checkpoint_thread(void *param);
+int init_checkpoint_thread(struct vmctx *ctx);
+
+
+int load_restore_file(const char *filename, struct restore_state *rstate);
+
+#endif
Index: usr.sbin/bhyve/snapshot.c
===================================================================
--- /dev/null
+++ usr.sbin/bhyve/snapshot.c
@@ -0,0 +1,1423 @@
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/types.h>
+#ifndef WITHOUT_CAPSICUM
+#include <sys/capsicum.h>
+#endif
+#include <sys/mman.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/un.h>
+
+#include <machine/atomic.h>
+#include <machine/segments.h>
+
+#ifndef WITHOUT_CAPSICUM
+#include <capsicum_helpers.h>
+#endif
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <err.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <libgen.h>
+#include <unistd.h>
+#include <assert.h>
+#include <errno.h>
+#include <pthread.h>
+#include <pthread_np.h>
+#include <sysexits.h>
+#include <stdbool.h>
+
+#include <machine/vmm.h>
+#ifndef WITHOUT_CAPSICUM
+#include <machine/vmm_dev.h>
+#endif
+#include <machine/vmm_snapshot.h>
+#include <vmmapi.h>
+
+#include "bhyverun.h"
+#include "acpi.h"
+#include "atkbdc.h"
+#include "inout.h"
+#include "dbgport.h"
+#include "fwctl.h"
+#include "ioapic.h"
+#include "mem.h"
+#include "mevent.h"
+#include "mptbl.h"
+#include "pci_emul.h"
+#include "pci_irq.h"
+#include "pci_lpc.h"
+#include "smbiostbl.h"
+#include "snapshot.h"
+#include "xmsr.h"
+#include "spinup_ap.h"
+#include "rtc.h"
+
+#include <libxo/xo.h>
+#include <ucl.h>
+
+extern int guest_ncpus;
+
+#define MB (1024UL * 1024)
+#define GB (1024UL * MB)
+
+#define BHYVE_RUN_DIR "/var/run/bhyve"
+#define CHECKPOINT_RUN_DIR BHYVE_RUN_DIR "/checkpoint"
+#define MAX_VMNAME 100
+
+#define MAX_MSG_SIZE 1024
+
+#define SNAPSHOT_BUFFER_SIZE (20 * MB)
+
+#define JSON_STRUCT_ARR_KEY "structs"
+#define JSON_DEV_ARR_KEY "devices"
+#define JSON_BASIC_METADATA_KEY "basic metadata"
+#define JSON_SNAPSHOT_REQ_KEY "snapshot_req"
+#define JSON_SIZE_KEY "size"
+#define JSON_FILE_OFFSET_KEY "file_offset"
+
+#define JSON_NCPUS_KEY "ncpus"
+#define JSON_VMNAME_KEY "vmname"
+#define JSON_MEMSIZE_KEY "memsize"
+#define JSON_MEMFLAGS_KEY "memflags"
+
+const struct vm_snapshot_dev_info snapshot_devs[] = {
+ { "atkbdc", atkbdc_snapshot, NULL, NULL },
+ { "virtio-net", pci_snapshot, NULL, NULL },
+ { "virtio-blk", pci_snapshot, NULL, NULL },
+ { "lpc", pci_snapshot, NULL, NULL },
+ { "fbuf", pci_snapshot, NULL, NULL },
+ { "xhci", pci_snapshot, NULL, NULL },
+ { "e1000", pci_snapshot, NULL, NULL },
+ { "ahci", pci_snapshot, pci_pause, pci_resume },
+ { "ahci-hd", pci_snapshot, pci_pause, pci_resume },
+ { "ahci-cd", pci_snapshot, NULL, NULL },
+};
+
+const struct vm_snapshot_kern_info snapshot_kern_structs[] = {
+ { "vhpet", STRUCT_VHPET },
+ { "vm", STRUCT_VM },
+ { "vmx", STRUCT_VMX },
+ { "vioapic", STRUCT_VIOAPIC },
+ { "vlapic", STRUCT_VLAPIC },
+ { "vmcx", STRUCT_VMCX },
+ { "vatpit", STRUCT_VATPIT },
+ { "vatpic", STRUCT_VATPIC },
+ { "vpmtmr", STRUCT_VPMTMR },
+ { "vrtc", STRUCT_VRTC },
+};
+
+/*
+ * TODO: Harden this function and all of its callers since 'base_str' is a user
+ * provided string.
+ */
+static char *
+strcat_extension(const char *base_str, const char *ext)
+{
+ char *res;
+ size_t base_len, ext_len;
+
+ base_len = strnlen(base_str, MAX_VMNAME);
+ ext_len = strnlen(ext, MAX_VMNAME);
+
+ if (base_len + ext_len > MAX_VMNAME) {
+ fprintf(stderr, "Filename exceeds maximum length.\n");
+ return (NULL);
+ }
+
+ res = malloc(base_len + ext_len + 1);
+ if (res == NULL) {
+ perror("Failed to allocate memory.");
+ return (NULL);
+ }
+
+ memcpy(res, base_str, base_len);
+ memcpy(res + base_len, ext, ext_len);
+ res[base_len + ext_len] = 0;
+
+ return (res);
+}
+
+void
+destroy_restore_state(struct restore_state *rstate)
+{
+ if (rstate == NULL) {
+ fprintf(stderr, "Attempting to destroy NULL restore struct.\n");
+ return;
+ }
+
+ if (rstate->kdata_map != MAP_FAILED)
+ munmap(rstate->kdata_map, rstate->kdata_len);
+
+ if (rstate->kdata_fd > 0)
+ close(rstate->kdata_fd);
+ if (rstate->vmmem_fd > 0)
+ close(rstate->vmmem_fd);
+
+ if (rstate->meta_root_obj != NULL)
+ ucl_object_unref(rstate->meta_root_obj);
+ if (rstate->meta_parser != NULL)
+ ucl_parser_free(rstate->meta_parser);
+}
+
+static int
+load_vmmem_file(const char *filename, struct restore_state *rstate)
+{
+ struct stat sb;
+ int err;
+
+ rstate->vmmem_fd = open(filename, O_RDONLY);
+ if (rstate->vmmem_fd < 0) {
+ perror("Failed to open restore file");
+ return (-1);
+ }
+
+ err = fstat(rstate->vmmem_fd, &sb);
+ if (err < 0) {
+ perror("Failed to stat restore file");
+ goto err_load_vmmem;
+ }
+
+ if (sb.st_size == 0) {
+ fprintf(stderr, "Restore file is empty.\n");
+ goto err_load_vmmem;
+ }
+
+ rstate->vmmem_len = sb.st_size;
+
+ return (0);
+
+err_load_vmmem:
+ if (rstate->vmmem_fd > 0)
+ close(rstate->vmmem_fd);
+ return (-1);
+}
+
+static int
+load_kdata_file(const char *filename, struct restore_state *rstate)
+{
+ struct stat sb;
+ int err;
+
+ rstate->kdata_fd = open(filename, O_RDONLY);
+ if (rstate->kdata_fd < 0) {
+ perror("Failed to open kernel data file");
+ return (-1);
+ }
+
+ err = fstat(rstate->kdata_fd, &sb);
+ if (err < 0) {
+ perror("Failed to stat kernel data file");
+ goto err_load_kdata;
+ }
+
+ if (sb.st_size == 0) {
+ fprintf(stderr, "Kernel data file is empty.\n");
+ goto err_load_kdata;
+ }
+
+ rstate->kdata_len = sb.st_size;
+ rstate->kdata_map = mmap(NULL, rstate->kdata_len, PROT_READ,
+ MAP_SHARED, rstate->kdata_fd, 0);
+ if (rstate->kdata_map == MAP_FAILED) {
+ perror("Failed to map restore file");
+ goto err_load_kdata;
+ }
+
+ return (0);
+
+err_load_kdata:
+ if (rstate->kdata_fd > 0)
+ close(rstate->kdata_fd);
+ return (-1);
+}
+
+static int
+load_metadata_file(const char *filename, struct restore_state *rstate)
+{
+ const ucl_object_t *obj;
+ struct ucl_parser *parser;
+ int err;
+
+ parser = ucl_parser_new(UCL_PARSER_DEFAULT);
+ if (parser == NULL) {
+ fprintf(stderr, "Failed to initialize UCL parser.\n");
+ goto err_load_metadata;
+ }
+
+ err = ucl_parser_add_file(parser, filename);
+ if (err == 0) {
+ fprintf(stderr, "Failed to parse metadata file: '%s'\n",
+ filename);
+ err = -1;
+ goto err_load_metadata;
+ }
+
+ obj = ucl_parser_get_object(parser);
+ if (obj == NULL) {
+ fprintf(stderr, "Failed to parse object.\n");
+ err = -1;
+ goto err_load_metadata;
+ }
+
+ rstate->meta_parser = parser;
+ rstate->meta_root_obj = (ucl_object_t *)obj;
+
+ return (0);
+
+err_load_metadata:
+ if (parser != NULL)
+ ucl_parser_free(parser);
+ return (err);
+}
+
+int
+load_restore_file(const char *filename, struct restore_state *rstate)
+{
+ int err = 0;
+ char *kdata_filename = NULL, *meta_filename = NULL;
+
+ assert(filename != NULL);
+ assert(rstate != NULL);
+
+ memset(rstate, 0, sizeof(*rstate));
+ rstate->kdata_map = MAP_FAILED;
+
+ err = load_vmmem_file(filename, rstate);
+ if (err != 0) {
+ fprintf(stderr, "Failed to load guest RAM file.\n");
+ goto err_restore;
+ }
+
+ kdata_filename = strcat_extension(filename, ".kern");
+ if (kdata_filename == NULL) {
+ fprintf(stderr, "Failed to construct kernel data filename.\n");
+ goto err_restore;
+ }
+
+ err = load_kdata_file(kdata_filename, rstate);
+ if (err != 0) {
+ fprintf(stderr, "Failed to load guest kernel data file.\n");
+ goto err_restore;
+ }
+
+ meta_filename = strcat_extension(filename, ".meta");
+ if (meta_filename == NULL) {
+ fprintf(stderr, "Failed to construct kernel metadata filename.\n");
+ goto err_restore;
+ }
+
+ err = load_metadata_file(meta_filename, rstate);
+ if (err != 0) {
+ fprintf(stderr, "Failed to load guest metadata file.\n");
+ goto err_restore;
+ }
+
+ return (0);
+
+err_restore:
+ destroy_restore_state(rstate);
+ if (kdata_filename != NULL)
+ free(kdata_filename);
+ if (meta_filename != NULL)
+ free(meta_filename);
+ return (-1);
+}
+
+#define JSON_GET_INT_OR_RETURN(key, obj, result_ptr, ret) \
+do { \
+ const ucl_object_t *obj__; \
+ obj__ = ucl_object_lookup(obj, key); \
+ if (obj__ == NULL) { \
+ fprintf(stderr, "Missing key: '%s'", key); \
+ return (ret); \
+ } \
+ if (!ucl_object_toint_safe(obj__, result_ptr)) { \
+ fprintf(stderr, "Cannot convert '%s' value to int.", key); \
+ return (ret); \
+ } \
+} while(0)
+
+#define JSON_GET_STRING_OR_RETURN(key, obj, result_ptr, ret) \
+do { \
+ const ucl_object_t *obj__; \
+ obj__ = ucl_object_lookup(obj, key); \
+ if (obj__ == NULL) { \
+ fprintf(stderr, "Missing key: '%s'", key); \
+ return (ret); \
+ } \
+ if (!ucl_object_tostring_safe(obj__, result_ptr)) { \
+ fprintf(stderr, "Cannot convert '%s' value to string.", key); \
+ return (ret); \
+ } \
+} while(0)
+
+static void *
+lookup_struct(enum snapshot_req struct_id, struct restore_state *rstate,
+ size_t *struct_size)
+{
+ const ucl_object_t *structs = NULL, *obj = NULL;
+ ucl_object_iter_t it = NULL;
+ int64_t snapshot_req, size, file_offset;
+
+ structs = ucl_object_lookup(rstate->meta_root_obj, JSON_STRUCT_ARR_KEY);
+ if (structs == NULL) {
+ fprintf(stderr, "Failed to find '%s' object.\n",
+ JSON_STRUCT_ARR_KEY);
+ return (NULL);
+ }
+
+ if (ucl_object_type((ucl_object_t *)structs) != UCL_ARRAY) {
+ fprintf(stderr, "Object '%s' is not an array.\n",
+ JSON_STRUCT_ARR_KEY);
+ return (NULL);
+ }
+
+ while ((obj = ucl_object_iterate(structs, &it, true)) != NULL) {
+ snapshot_req = -1;
+ JSON_GET_INT_OR_RETURN(JSON_SNAPSHOT_REQ_KEY, obj,
+ &snapshot_req, NULL);
+ assert(snapshot_req >= 0);
+ if ((enum snapshot_req) snapshot_req == struct_id) {
+ JSON_GET_INT_OR_RETURN(JSON_SIZE_KEY, obj,
+ &size, NULL);
+ assert(size >= 0);
+
+ JSON_GET_INT_OR_RETURN(JSON_FILE_OFFSET_KEY, obj,
+ &file_offset, NULL);
+ assert(file_offset >= 0);
+ assert(file_offset + size <= rstate->kdata_len);
+
+ *struct_size = (size_t)size;
+ return (rstate->kdata_map + file_offset);
+ }
+ }
+
+ return (NULL);
+}
+
+static void *
+lookup_check_dev(const char *dev_name, struct restore_state *rstate,
+ const ucl_object_t *obj, size_t *data_size)
+{
+ const char *snapshot_req;
+ int64_t size, file_offset;
+
+ snapshot_req = NULL;
+ JSON_GET_STRING_OR_RETURN(JSON_SNAPSHOT_REQ_KEY, obj,
+ &snapshot_req, NULL);
+ assert(snapshot_req != NULL);
+ if (!strcmp(snapshot_req, dev_name)) {
+ JSON_GET_INT_OR_RETURN(JSON_SIZE_KEY, obj,
+ &size, NULL);
+ assert(size >= 0);
+
+ JSON_GET_INT_OR_RETURN(JSON_FILE_OFFSET_KEY, obj,
+ &file_offset, NULL);
+ assert(file_offset >= 0);
+ assert(file_offset + size <= rstate->kdata_len);
+
+ *data_size = (size_t)size;
+ return (rstate->kdata_map + file_offset);
+ }
+
+ return (NULL);
+}
+
+static void*
+lookup_dev(const char *dev_name, struct restore_state *rstate,
+ size_t *data_size)
+{
+ const ucl_object_t *devs = NULL, *obj = NULL;
+ ucl_object_iter_t it = NULL;
+ void *ret;
+
+ devs = ucl_object_lookup(rstate->meta_root_obj, JSON_DEV_ARR_KEY);
+ if (devs == NULL) {
+ fprintf(stderr, "Failed to find '%s' object.\n",
+ JSON_DEV_ARR_KEY);
+ return (NULL);
+ }
+
+ if (ucl_object_type((ucl_object_t *)devs) != UCL_ARRAY) {
+ fprintf(stderr, "Object '%s' is not an array.\n",
+ JSON_DEV_ARR_KEY);
+ return (NULL);
+ }
+
+ while ((obj = ucl_object_iterate(devs, &it, true)) != NULL) {
+ ret = lookup_check_dev(dev_name, rstate, obj, data_size);
+ if (ret != NULL)
+ return (ret);
+ }
+
+ return (NULL);
+}
+
+static const ucl_object_t *
+lookup_basic_metadata_object(struct restore_state *rstate)
+{
+ const ucl_object_t *basic_meta_obj = NULL;
+
+ basic_meta_obj = ucl_object_lookup(rstate->meta_root_obj,
+ JSON_BASIC_METADATA_KEY);
+ if (basic_meta_obj == NULL) {
+ fprintf(stderr, "Failed to find '%s' object.\n",
+ JSON_BASIC_METADATA_KEY);
+ return (NULL);
+ }
+
+ if (ucl_object_type((ucl_object_t *)basic_meta_obj) != UCL_OBJECT) {
+ fprintf(stderr, "Object '%s' is not a JSON object.\n",
+ JSON_BASIC_METADATA_KEY);
+ return (NULL);
+ }
+
+ return (basic_meta_obj);
+}
+
+const char *
+lookup_vmname(struct restore_state *rstate)
+{
+ const char *vmname;
+ const ucl_object_t *obj;
+
+ obj = lookup_basic_metadata_object(rstate);
+ if (obj == NULL)
+ return (NULL);
+
+ JSON_GET_STRING_OR_RETURN(JSON_VMNAME_KEY, obj, &vmname, NULL);
+ return (vmname);
+}
+
+int
+lookup_memflags(struct restore_state *rstate)
+{
+ int64_t memflags;
+ const ucl_object_t *obj;
+
+ obj = lookup_basic_metadata_object(rstate);
+ if (obj == NULL)
+ return (0);
+
+ JSON_GET_INT_OR_RETURN(JSON_MEMFLAGS_KEY, obj, &memflags, 0);
+
+ return ((int)memflags);
+}
+
+size_t
+lookup_memsize(struct restore_state *rstate)
+{
+ int64_t memsize;
+ const ucl_object_t *obj;
+
+ obj = lookup_basic_metadata_object(rstate);
+ if (obj == NULL)
+ return (0);
+
+ JSON_GET_INT_OR_RETURN(JSON_MEMSIZE_KEY, obj, &memsize, 0);
+ if (memsize < 0)
+ memsize = 0;
+
+ return ((size_t)memsize);
+}
+
+
+int
+lookup_guest_ncpus(struct restore_state *rstate)
+{
+ int64_t ncpus;
+ const ucl_object_t *obj;
+
+ obj = lookup_basic_metadata_object(rstate);
+ if (obj == NULL)
+ return (0);
+
+ JSON_GET_INT_OR_RETURN(JSON_NCPUS_KEY, obj, &ncpus, 0);
+ return ((int)ncpus);
+}
+
+int
+restore_vm_mem(struct vmctx *ctx, struct restore_state *rstate)
+{
+ return vm_restore_mem(ctx, rstate->vmmem_fd, rstate->vmmem_len);
+}
+
+static int
+vm_restore_kern_struct(struct vmctx *ctx, struct restore_state *rstate,
+ const struct vm_snapshot_kern_info *info)
+{
+ void *struct_ptr;
+ size_t struct_size;
+ int ret;
+ struct vm_snapshot_meta *meta;
+
+ struct_ptr = lookup_struct(info->req, rstate, &struct_size);
+ if (struct_ptr == NULL) {
+ fprintf(stderr, "%s: Failed to lookup struct %s\r\n",
+ __func__, info->struct_name);
+ ret = -1;
+ goto done;
+ }
+
+ if (struct_size == 0) {
+ fprintf(stderr, "%s: Kernel struct size was 0 for: %s\r\n",
+ __func__, info->struct_name);
+ ret = -1;
+ goto done;
+ }
+
+ meta = &(struct vm_snapshot_meta) {
+ .ctx = ctx,
+ .dev_name = info->struct_name,
+ .dev_req = info->req,
+
+ .buffer.buf_start = struct_ptr,
+ .buffer.buf_size = struct_size,
+
+ .buffer.buf = struct_ptr,
+ .buffer.buf_rem = struct_size,
+
+ .op = VM_SNAPSHOT_RESTORE,
+ };
+
+ ret = vm_snapshot_req(meta);
+ if (ret != 0) {
+ fprintf(stderr, "%s: Failed to restore struct: %s\r\n",
+ __func__, info->struct_name);
+ goto done;
+ }
+
+done:
+ return (ret);
+}
+
+int
+vm_restore_kern_structs(struct vmctx *ctx, struct restore_state *rstate)
+{
+ int ret;
+ int i;
+
+ for (i = 0; i < nitems(snapshot_kern_structs); i++) {
+ ret = vm_restore_kern_struct(ctx, rstate,
+ &snapshot_kern_structs[i]);
+ if (ret != 0)
+ return (ret);
+ }
+
+ return (0);
+}
+
+int
+vm_restore_user_dev(struct vmctx *ctx, struct restore_state *rstate,
+ const struct vm_snapshot_dev_info *info)
+{
+ void *dev_ptr;
+ size_t dev_size;
+ int ret;
+ struct vm_snapshot_meta *meta;
+
+ dev_ptr = lookup_dev(info->dev_name, rstate, &dev_size);
+ if (dev_ptr == NULL) {
+ fprintf(stderr, "Failed to lookup dev: %s\r\n", info->dev_name);
+ fprintf(stderr, "Continuing the restore/migration process\r\n");
+ return (0);
+ }
+
+ if (dev_size == 0) {
+ fprintf(stderr, "%s: Device size is 0. "
+ "Assuming %s is not used\r\n",
+ __func__, info->dev_name);
+ return (0);
+ }
+
+ meta = &(struct vm_snapshot_meta) {
+ .ctx = ctx,
+ .dev_name = info->dev_name,
+
+ .buffer.buf_start = dev_ptr,
+ .buffer.buf_size = dev_size,
+
+ .buffer.buf = dev_ptr,
+ .buffer.buf_rem = dev_size,
+
+ .op = VM_SNAPSHOT_RESTORE,
+ };
+
+ ret = (*info->snapshot_cb)(meta);
+ if (ret != 0) {
+ fprintf(stderr, "Failed to restore dev: %s\r\n",
+ info->dev_name);
+ return (-1);
+ }
+
+ return (0);
+}
+
+
+int
+vm_restore_user_devs(struct vmctx *ctx, struct restore_state *rstate)
+{
+ int ret;
+ int i;
+
+ for (i = 0; i < nitems(snapshot_devs); i++) {
+ ret = vm_restore_user_dev(ctx, rstate, &snapshot_devs[i]);
+ if (ret != 0)
+ return (ret);
+ }
+
+ return 0;
+}
+
+int
+vm_pause_user_devs(struct vmctx *ctx)
+{
+ const struct vm_snapshot_dev_info *info;
+ int ret;
+ int i;
+
+ for (i = 0; i < nitems(snapshot_devs); i++) {
+ info = &snapshot_devs[i];
+ if (info->pause_cb == NULL)
+ continue;
+
+ ret = info->pause_cb(ctx, info->dev_name);
+ if (ret != 0)
+ return (ret);
+ }
+
+ return (0);
+}
+
+int
+vm_resume_user_devs(struct vmctx *ctx)
+{
+ const struct vm_snapshot_dev_info *info;
+ int ret;
+ int i;
+
+ for (i = 0; i < nitems(snapshot_devs); i++) {
+ info = &snapshot_devs[i];
+ if (info->resume_cb == NULL)
+ continue;
+
+ ret = info->resume_cb(ctx, info->dev_name);
+ if (ret != 0)
+ return (ret);
+ }
+
+ return (0);
+}
+
+static int
+vm_snapshot_kern_struct(int data_fd, xo_handle_t *xop, const char *array_key,
+ struct vm_snapshot_meta *meta, off_t *offset)
+{
+ int ret;
+ size_t data_size;
+ ssize_t write_cnt;
+
+ ret = vm_snapshot_req(meta);
+ if (ret != 0) {
+ fprintf(stderr, "%s: Failed to snapshot struct %s\r\n",
+ __func__, meta->dev_name);
+ ret = -1;
+ goto done;
+ }
+
+ data_size = vm_get_snapshot_size(meta);
+
+ write_cnt = write(data_fd, meta->buffer.buf_start, data_size);
+ if (write_cnt != data_size) {
+ perror("Failed to write all snapshotted data.");
+ ret = -1;
+ goto done;
+ }
+
+ /* Write metadata. */
+ xo_open_instance_h(xop, array_key);
+ xo_emit_h(xop, "{:debug_name/%s}\n", meta->dev_name);
+ xo_emit_h(xop, "{:" JSON_SNAPSHOT_REQ_KEY "/%d}\n",
+ meta->dev_req);
+ xo_emit_h(xop, "{:" JSON_SIZE_KEY "/%lu}\n", data_size);
+ xo_emit_h(xop, "{:" JSON_FILE_OFFSET_KEY "/%lu}\n", *offset);
+ xo_close_instance_h(xop, JSON_STRUCT_ARR_KEY);
+
+ *offset += data_size;
+
+done:
+ return (ret);
+}
+
+static int
+vm_snapshot_kern_structs(struct vmctx *ctx, int data_fd, xo_handle_t *xop)
+{
+ int ret, i, error;
+ size_t offset, buf_size;
+ char *buffer;
+ struct vm_snapshot_meta *meta;
+
+ error = 0;
+ offset = 0;
+ buf_size = SNAPSHOT_BUFFER_SIZE;
+
+ buffer = malloc(SNAPSHOT_BUFFER_SIZE * sizeof(char));
+ if (buffer == NULL) {
+ error = ENOMEM;
+ perror("Failed to allocate memory for snapshot buffer");
+ goto err_vm_snapshot_kern_data;
+ }
+
+ meta = &(struct vm_snapshot_meta) {
+ .ctx = ctx,
+
+ .buffer.buf_start = buffer,
+ .buffer.buf_size = buf_size,
+
+ .op = VM_SNAPSHOT_SAVE,
+ };
+
+ xo_open_list_h(xop, JSON_STRUCT_ARR_KEY);
+ for (i = 0; i < nitems(snapshot_kern_structs); i++) {
+ meta->dev_name = snapshot_kern_structs[i].struct_name;
+ meta->dev_req = snapshot_kern_structs[i].req;
+
+ memset(meta->buffer.buf_start, 0, meta->buffer.buf_size);
+ meta->buffer.buf = meta->buffer.buf_start;
+ meta->buffer.buf_rem = meta->buffer.buf_size;
+
+ ret = vm_snapshot_kern_struct(data_fd, xop, JSON_DEV_ARR_KEY,
+ meta, &offset);
+ if (ret != 0) {
+ error = -1;
+ goto err_vm_snapshot_kern_data;
+ }
+ }
+ xo_close_list_h(xop, JSON_STRUCT_ARR_KEY);
+
+err_vm_snapshot_kern_data:
+ if (buffer != NULL)
+ free(buffer);
+ return (error);
+}
+
+static int
+vm_snapshot_basic_metadata(struct vmctx *ctx, xo_handle_t *xop)
+{
+ int error;
+ size_t memsize;
+ int memflags;
+ char vmname_buf[MAX_VMNAME];
+
+ memset(vmname_buf, 0, MAX_VMNAME);
+ error = vm_get_name(ctx, vmname_buf, MAX_VMNAME - 1);
+ if (error != 0) {
+ perror("Failed to get VM name");
+ goto err;
+ }
+
+ memsize = vm_get_lowmem_size(ctx) + vm_get_highmem_size(ctx);
+ memflags = vm_get_memflags(ctx);
+
+ xo_open_container_h(xop, JSON_BASIC_METADATA_KEY);
+ xo_emit_h(xop, "{:" JSON_NCPUS_KEY "/%ld}\n", guest_ncpus);
+ xo_emit_h(xop, "{:" JSON_VMNAME_KEY "/%s}\n", vmname_buf);
+ xo_emit_h(xop, "{:" JSON_MEMSIZE_KEY "/%lu}\n", memsize);
+ xo_emit_h(xop, "{:" JSON_MEMFLAGS_KEY "/%d}\n", memflags);
+ xo_close_container_h(xop, JSON_BASIC_METADATA_KEY);
+
+err:
+ return (error);
+}
+
+static int
+vm_snapshot_dev_write_data(int data_fd, xo_handle_t *xop, const char *array_key,
+ struct vm_snapshot_meta *meta, off_t *offset)
+{
+ int ret;
+ size_t data_size;
+
+ data_size = vm_get_snapshot_size(meta);
+
+ ret = write(data_fd, meta->buffer.buf_start, data_size);
+ if (ret != data_size) {
+ perror("Failed to write all snapshotted data.");
+ return (-1);
+ }
+
+ /* Write metadata. */
+ xo_open_instance_h(xop, array_key);
+ xo_emit_h(xop, "{:" JSON_SNAPSHOT_REQ_KEY "/%s}\n", meta->dev_name);
+ xo_emit_h(xop, "{:" JSON_SIZE_KEY "/%lu}\n", data_size);
+ xo_emit_h(xop, "{:" JSON_FILE_OFFSET_KEY "/%lu}\n", *offset);
+ xo_close_instance_h(xop, array_key);
+
+ *offset += data_size;
+
+ return (0);
+}
+
+static int
+vm_snapshot_user_dev(const struct vm_snapshot_dev_info *info,
+ int data_fd, xo_handle_t *xop,
+ struct vm_snapshot_meta *meta, off_t *offset)
+{
+ int ret;
+
+ ret = (*info->snapshot_cb)(meta);
+ if (ret != 0) {
+ fprintf(stderr, "Failed to snapshot %s; ret=%d\r\n",
+ meta->dev_name, ret);
+ return (ret);
+ }
+
+ ret = vm_snapshot_dev_write_data(data_fd, xop, JSON_DEV_ARR_KEY, meta,
+ offset);
+ if (ret != 0)
+ return (ret);
+
+ return (0);
+}
+
+static int
+vm_snapshot_user_devs(struct vmctx *ctx, int data_fd, xo_handle_t *xop)
+{
+ int ret, i;
+ off_t offset;
+ void *buffer;
+ size_t buf_size;
+ struct vm_snapshot_meta *meta;
+
+ buf_size = SNAPSHOT_BUFFER_SIZE;
+
+ offset = lseek(data_fd, 0, SEEK_CUR);
+ if (offset < 0) {
+ perror("Failed to get data file current offset.");
+ return (-1);
+ }
+
+ buffer = malloc(buf_size);
+ if (buffer == NULL) {
+ perror("Failed to allocate memory for snapshot buffer");
+ ret = ENOSPC;
+ goto snapshot_err;
+ }
+
+ meta = &(struct vm_snapshot_meta) {
+ .ctx = ctx,
+
+ .buffer.buf_start = buffer,
+ .buffer.buf_size = buf_size,
+
+ .op = VM_SNAPSHOT_SAVE,
+ };
+
+ xo_open_list_h(xop, JSON_DEV_ARR_KEY);
+
+ /* Restore other devices that support this feature */
+ for (i = 0; i < nitems(snapshot_devs); i++) {
+ meta->dev_name = snapshot_devs[i].dev_name;
+
+ memset(meta->buffer.buf_start, 0, meta->buffer.buf_size);
+ meta->buffer.buf = meta->buffer.buf_start;
+ meta->buffer.buf_rem = meta->buffer.buf_size;
+
+ ret = vm_snapshot_user_dev(&snapshot_devs[i], data_fd, xop,
+ meta, &offset);
+ if (ret != 0)
+ goto snapshot_err;
+ }
+
+ xo_close_list_h(xop, JSON_DEV_ARR_KEY);
+
+snapshot_err:
+ if (buffer != NULL)
+ free(buffer);
+ return (ret);
+}
+
+static int
+vm_mem_write_to_file(int fd, const void *src, size_t dst_offset, size_t len)
+{
+ size_t write_total;
+ ssize_t cnt_write;
+ size_t to_write;
+
+ write_total = 0;
+ to_write = len;
+
+ if (lseek(fd, dst_offset, SEEK_SET) < 0 ) {
+ perror("Failed to changed file offset");
+ return (-1);
+ }
+
+ while (write_total < len) {
+ cnt_write = write(fd, src + write_total, to_write);
+ if (cnt_write < 0) {
+ perror("Failed to write in file");
+ return (-1);
+ }
+ to_write -= cnt_write;
+ write_total += cnt_write;
+ }
+
+ return (0);
+}
+
+static int
+vm_checkpoint(struct vmctx *ctx, char *checkpoint_file, bool stop_vm)
+{
+ int fd_checkpoint = 0, kdata_fd = 0;
+ int ret = 0;
+ int error = 0;
+ size_t guest_lowmem, guest_highmem, guest_memsize;
+ char *guest_baseaddr;
+ char *guest_lowmem_addr, *guest_highmem_addr;
+ xo_handle_t *xop = NULL;
+ char *meta_filename = NULL;
+ char *kdata_filename = NULL;
+ FILE *meta_file = NULL;
+
+ kdata_filename = strcat_extension(checkpoint_file, ".kern");
+ if (kdata_filename == NULL) {
+ fprintf(stderr, "Failed to construct kernel data filename.\n");
+ return (-1);
+ }
+
+ kdata_fd = open(kdata_filename, O_WRONLY | O_CREAT | O_TRUNC, 0700);
+ if (kdata_fd < 0) {
+ perror("Failed to open kernel data snapshot file.");
+ error = -1;
+ goto done;
+ }
+
+ fd_checkpoint = open(checkpoint_file, O_RDWR | O_CREAT | O_TRUNC, 0700);
+
+ if (fd_checkpoint < 0) {
+ perror("Failed to create checkpoint file");
+ error = -1;
+ goto done;
+ }
+
+ ret = vm_get_guestmem_from_ctx(ctx, &guest_baseaddr, &guest_lowmem, &guest_highmem);
+ guest_memsize = guest_lowmem + guest_highmem;
+ if (ret < 0) {
+ fprintf(stderr, "Failed to get guest mem information (base, low, high)\n");
+ error = -1;
+ goto done;
+ }
+
+ /* make space for VMs address space */
+ ret = ftruncate(fd_checkpoint, guest_memsize);
+ if (ret < 0) {
+ perror("Failed to truncate checkpoint file\n");
+ goto done;
+ }
+
+ meta_filename = strcat_extension(checkpoint_file, ".meta");
+ if (meta_filename == NULL) {
+ fprintf(stderr, "Failed to construct vm metadata filename.\n");
+ goto done;
+ }
+
+ meta_file = fopen(meta_filename, "w");
+ if (meta_file == NULL) {
+ perror("Failed to open vm metadata snapshot file.");
+ goto done;
+ }
+
+ xop = xo_create_to_file(meta_file, XO_STYLE_JSON, XOF_PRETTY);
+ if (xop == NULL) {
+ perror("Failed to get libxo handle on metadata file.");
+ goto done;
+ }
+
+ ret = vm_snapshot_basic_metadata(ctx, xop);
+ if (ret != 0) {
+ fprintf(stderr, "Failed to snapshot vm basic metadata.\n");
+ error = -1;
+ goto done;
+ }
+
+ guest_lowmem_addr = guest_baseaddr;
+ if (guest_highmem > 0)
+ guest_highmem_addr = guest_baseaddr + 4*GB;
+
+ ret = vm_pause_user_devs(ctx);
+ if (ret != 0) {
+ fprintf(stderr, "Could not pause devices\r\n");
+ error = ret;
+ goto done;
+ }
+
+ vm_vcpu_lock_all(ctx);
+
+ ret = vm_snapshot_kern_structs(ctx, kdata_fd, xop);
+ if (ret != 0) {
+ fprintf(stderr, "Failed to snapshot vm kernel data.\n");
+ error = -1;
+ goto done_unlock;
+ }
+
+ ret = vm_snapshot_user_devs(ctx, kdata_fd, xop);
+ if (ret != 0) {
+ fprintf(stderr, "Failed to snapshot device state.\n");
+ error = -1;
+ goto done_unlock;
+ }
+
+ if (vm_mem_write_to_file(fd_checkpoint, guest_lowmem_addr,
+ 0, guest_lowmem) != 0) {
+ perror("Could not write lowmem");
+ error = -1;
+ goto done_unlock;
+ }
+
+ if (guest_highmem > 0) {
+ if (vm_mem_write_to_file(fd_checkpoint, guest_highmem_addr,
+ guest_lowmem, guest_highmem) != 0) {
+ perror("Could not write highmem");
+ error = -1;
+ goto done_unlock;
+ }
+ }
+
+ xo_finish_h(xop);
+
+ if (stop_vm) {
+ ret = vm_suspend(ctx, VM_SUSPEND_POWEROFF);
+ if (ret != 0) {
+ fprintf(stderr, "Failed to suspend vm\n");
+ }
+ vm_vcpu_unlock_all(ctx);
+
+ ret = vm_resume_user_devs(ctx);
+ if (ret != 0)
+ fprintf(stderr, "Could not resume devices\r\n");
+
+ /* Wait for CPUs to suspend. TODO: write this properly. */
+ sleep(5);
+ vm_destroy(ctx);
+ exit(0);
+ }
+
+done_unlock:
+ vm_vcpu_unlock_all(ctx);
+done:
+ ret = vm_resume_user_devs(ctx);
+ if (ret != 0)
+ fprintf(stderr, "Could not resume devices\r\n");
+ if (fd_checkpoint > 0)
+ close(fd_checkpoint);
+ if (meta_filename != NULL)
+ free(meta_filename);
+ if (kdata_filename != NULL)
+ free(kdata_filename);
+ if (xop != NULL)
+ xo_destroy(xop);
+ if (meta_file != NULL)
+ fclose(meta_file);
+ if (kdata_fd > 0)
+ close(kdata_fd);
+ return (error);
+}
+
+int get_checkpoint_msg(int conn_fd, struct vmctx *ctx)
+{
+ unsigned char buf[MAX_MSG_SIZE];
+ struct checkpoint_op *checkpoint_op;
+ int len, recv_len, total_recv = 0;
+ int err = 0;
+
+ len = sizeof(struct checkpoint_op); /* expected length */
+ while ((recv_len = recv(conn_fd, buf + total_recv, len - total_recv, 0)) > 0) {
+ total_recv += recv_len;
+ }
+ if (recv_len < 0) {
+ perror("Error while receiving data from bhyvectl");
+ err = -1;
+ goto done;
+ }
+
+ checkpoint_op = (struct checkpoint_op *)buf;
+ switch (checkpoint_op->op) {
+ case START_CHECKPOINT:
+ err = vm_checkpoint(ctx, checkpoint_op->snapshot_filename, false);
+ break;
+ case START_SUSPEND:
+ err = vm_checkpoint(ctx, checkpoint_op->snapshot_filename, true);
+ break;
+ default:
+ fprintf(stderr, "Unrecognized checkpoint operation.\n");
+ err = -1;
+ }
+
+done:
+ close(conn_fd);
+ return (err);
+}
+
+/*
+ * Listen for commands from bhyvectl
+ */
+void * checkpoint_thread(void *param)
+{
+ struct checkpoint_thread_info *thread_info;
+ socklen_t addr_len;
+ int conn_fd, ret;
+
+ thread_info = (struct checkpoint_thread_info *)param;
+
+ addr_len = sizeof(thread_info->addr);
+ while ((conn_fd = accept(thread_info->socket_fd,
+ (struct sockaddr *) thread_info->addr,
+ &addr_len)) > -1) {
+ ret = get_checkpoint_msg(conn_fd, thread_info->ctx);
+ if (ret != 0) {
+ fprintf(stderr, "Failed to read message on checkpoint "
+ "socket. Retrying.\n");
+ }
+
+ addr_len = sizeof(struct sockaddr_un);
+ }
+ if (conn_fd < -1) {
+ perror("Failed to accept connection");
+ }
+
+ return (NULL);
+}
+
+/*
+ * Create directory tree to store runtime specific information:
+ * i.e. UNIX sockets for IPC with bhyvectl.
+ */
+static int
+make_checkpoint_dir()
+{
+ int err;
+
+ err = mkdir(BHYVE_RUN_DIR, 0755);
+ if (err < 0 && errno != EEXIST)
+ return (err);
+
+ err = mkdir(CHECKPOINT_RUN_DIR, 0755);
+ if (err < 0 && errno != EEXIST)
+ return (err);
+
+ return 0;
+}
+
+/*
+ * Create the listening socket for IPC with bhyvectl
+ */
+int
+init_checkpoint_thread(struct vmctx *ctx)
+{
+ struct sockaddr_un addr;
+ int socket_fd;
+ pthread_t checkpoint_pthread;
+ char vmname_buf[MAX_VMNAME];
+ int ret, err = 0;
+
+ socket_fd = socket(PF_UNIX, SOCK_STREAM, 0);
+ if (socket_fd < 0) {
+ perror("Socket creation failed (IPC with bhyvectl");
+ err = -1;
+ goto fail;
+ }
+
+ err = make_checkpoint_dir();
+ if (err < 0) {
+ perror("Failed to create checkpoint runtime directory");
+ goto fail;
+ }
+
+ memset(&addr, 0, sizeof(struct sockaddr_un));
+ addr.sun_family = AF_UNIX;
+
+ err = vm_get_name(ctx, vmname_buf, MAX_VMNAME - 1);
+ if (err != 0) {
+ perror("Failed to get VM name");
+ goto fail;
+ }
+
+ snprintf(addr.sun_path, PATH_MAX, "%s/%s",
+ CHECKPOINT_RUN_DIR, vmname_buf);
+ unlink(addr.sun_path);
+
+ if (bind(socket_fd, (struct sockaddr *)&addr,
+ sizeof(struct sockaddr_un)) != 0) {
+ perror("Failed to bind socket (IPC with bhyvectl)");
+ err = -1;
+ goto fail;
+ }
+
+ if (listen(socket_fd, 10) < 0) {
+ perror("Failed to listen on socket (IPC with bhyvectl)");
+ err = -1;
+ goto fail;
+ }
+
+ memset(&checkpoint_info, 0, sizeof(struct checkpoint_thread_info));
+ checkpoint_info.ctx = ctx;
+ checkpoint_info.socket_fd = socket_fd;
+ checkpoint_info.addr = &addr;
+
+
+ /* TODO: start thread for listening connections */
+ pthread_set_name_np(checkpoint_pthread, "checkpoint thread");
+ ret = pthread_create(&checkpoint_pthread, NULL, checkpoint_thread,
+ &checkpoint_info);
+ if (ret < 0) {
+ err = ret;
+ goto fail;
+ }
+
+ return (0);
+fail:
+ if (socket_fd > 0)
+ close(socket_fd);
+ unlink(addr.sun_path);
+
+ return (err);
+}
+
+void
+vm_snapshot_buf_err(const char *bufname, const enum vm_snapshot_op op)
+{
+ const char *__op;
+
+ if (op == VM_SNAPSHOT_SAVE)
+ __op = "save";
+ else if (op == VM_SNAPSHOT_RESTORE)
+ __op = "restore";
+ else
+ __op = "unknown";
+
+ fprintf(stderr, "%s: snapshot-%s failed for %s\r\n",
+ __func__, __op, bufname);
+}
+
+int
+vm_snapshot_buf(volatile void *data, size_t data_size,
+ struct vm_snapshot_meta *meta)
+{
+ struct vm_snapshot_buffer *buffer;
+ int op;
+
+ buffer = &meta->buffer;
+ op = meta->op;
+
+ if (buffer->buf_rem < data_size) {
+ fprintf(stderr, "%s: buffer too small\r\n", __func__);
+ return (E2BIG);
+ }
+
+ if (op == VM_SNAPSHOT_SAVE)
+ memcpy(buffer->buf, (uint8_t *) data, data_size);
+ else if (op == VM_SNAPSHOT_RESTORE)
+ memcpy((uint8_t *) data, buffer->buf, data_size);
+ else
+ return (EINVAL);
+
+ buffer->buf += data_size;
+ buffer->buf_rem -= data_size;
+
+ return (0);
+}
+
+size_t
+vm_get_snapshot_size(struct vm_snapshot_meta *meta)
+{
+ size_t length;
+ struct vm_snapshot_buffer *buffer;
+
+ buffer = &meta->buffer;
+
+ if (buffer->buf_size < buffer->buf_rem) {
+ fprintf(stderr, "%s: Invalid buffer: size = %zu, rem = %zu\r\n",
+ __func__, buffer->buf_size, buffer->buf_rem);
+ length = 0;
+ } else {
+ length = buffer->buf_size - buffer->buf_rem;
+ }
+
+ return (length);
+}
+
+int
+vm_snapshot_guest2host_addr(void **addrp, size_t len, int restore_null,
+ struct vm_snapshot_meta *meta)
+{
+ int ret;
+ vm_paddr_t gaddr;
+
+ if (meta->op == VM_SNAPSHOT_SAVE) {
+ gaddr = paddr_host2guest(meta->ctx, *addrp);
+ if (gaddr == (vm_paddr_t) -1) {
+ if ((restore_null == false) ||
+ ((restore_null == true) && (*addrp != NULL))) {
+ ret = EFAULT;
+ goto done;
+ }
+ }
+
+ SNAPSHOT_VAR_OR_LEAVE(gaddr, meta, ret, done);
+ } else if (meta->op == VM_SNAPSHOT_RESTORE) {
+ SNAPSHOT_VAR_OR_LEAVE(gaddr, meta, ret, done);
+ if (gaddr == (vm_paddr_t) -1) {
+ if (restore_null == false) {
+ ret = EFAULT;
+ goto done;
+ }
+ }
+
+ *addrp = paddr_guest2host(meta->ctx, gaddr, len);
+ } else {
+ ret = EINVAL;
+ }
+
+done:
+ return (ret);
+}
+
+int
+vm_snapshot_buf_cmp(volatile void *data, size_t data_size,
+ struct vm_snapshot_meta *meta)
+{
+ struct vm_snapshot_buffer *buffer;
+ int op;
+ int ret;
+
+ buffer = &meta->buffer;
+ op = meta->op;
+
+ if (buffer->buf_rem < data_size) {
+ fprintf(stderr, "%s: buffer too small\r\n", __func__);
+ ret = E2BIG;
+ goto done;
+ }
+
+ if (op == VM_SNAPSHOT_SAVE) {
+ ret = 0;
+ memcpy(buffer->buf, (uint8_t *) data, data_size);
+ } else if (op == VM_SNAPSHOT_RESTORE) {
+ ret = memcmp((uint8_t *) data, buffer->buf, data_size);
+ } else {
+ ret = EINVAL;
+ goto done;
+ }
+
+ buffer->buf += data_size;
+ buffer->buf_rem -= data_size;
+
+done:
+ return (ret);
+}
Index: usr.sbin/bhyve/uart_emul.h
===================================================================
--- usr.sbin/bhyve/uart_emul.h
+++ usr.sbin/bhyve/uart_emul.h
@@ -31,10 +31,10 @@
#ifndef _UART_EMUL_H_
#define _UART_EMUL_H_
-
#define UART_IO_BAR_SIZE 8
struct uart_softc;
+struct vm_snapshot_meta;
typedef void (*uart_intr_func_t)(void *arg);
struct uart_softc *uart_init(uart_intr_func_t intr_assert,
@@ -44,4 +44,5 @@
uint8_t uart_read(struct uart_softc *sc, int offset);
void uart_write(struct uart_softc *sc, int offset, uint8_t value);
int uart_set_backend(struct uart_softc *sc, const char *opt);
+int uart_snapshot(struct uart_softc *sc, struct vm_snapshot_meta *meta);
#endif
Index: usr.sbin/bhyve/uart_emul.c
===================================================================
--- usr.sbin/bhyve/uart_emul.c
+++ usr.sbin/bhyve/uart_emul.c
@@ -39,6 +39,8 @@
#include <capsicum_helpers.h>
#endif
+#include <machine/vmm_snapshot.h>
+
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
@@ -699,3 +701,33 @@
return (retval);
}
+
+int
+uart_snapshot(struct uart_softc *sc, struct vm_snapshot_meta *meta)
+{
+ int ret;
+
+ SNAPSHOT_VAR_OR_LEAVE(sc->data, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->ier, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->lcr, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->mcr, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->lsr, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->msr, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->fcr, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->scr, meta, ret, done);
+
+ SNAPSHOT_VAR_OR_LEAVE(sc->dll, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->dlh, meta, ret, done);
+
+ SNAPSHOT_VAR_OR_LEAVE(sc->rxfifo.rindex, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->rxfifo.windex, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->rxfifo.num, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->rxfifo.size, meta, ret, done);
+ SNAPSHOT_BUF_OR_LEAVE(sc->rxfifo.buf, sizeof(sc->rxfifo.buf),
+ meta, ret, done);
+
+ sc->thre_int_pending = 1;
+
+done:
+ return (ret);
+}
Index: usr.sbin/bhyve/usb_emul.h
===================================================================
--- usr.sbin/bhyve/usb_emul.h
+++ usr.sbin/bhyve/usb_emul.h
@@ -41,10 +41,10 @@
#define USB_XFER_IN 1
-
struct usb_hci;
struct usb_device_request;
struct usb_data_xfer;
+struct vm_snapshot_meta;
/* Device emulation handlers */
struct usb_devemu {
@@ -62,6 +62,7 @@
int (*ue_reset)(void *sc);
int (*ue_remove)(void *sc);
int (*ue_stop)(void *sc);
+ int (*ue_snapshot)(void *scarg, struct vm_snapshot_meta *meta);
};
#define USB_EMUL_SET(x) DATA_SET(usb_emu_set, x);
@@ -148,7 +149,6 @@
pthread_mutex_unlock(&((x)->mtx)); \
} while (0)
-
struct usb_devemu *usb_emu_finddev(char *name);
struct usb_data_xfer_block *usb_data_xfer_append(struct usb_data_xfer *xfer,
Index: usr.sbin/bhyve/usb_mouse.c
===================================================================
--- usr.sbin/bhyve/usb_mouse.c
+++ usr.sbin/bhyve/usb_mouse.c
@@ -31,6 +31,8 @@
#include <sys/time.h>
+#include <machine/vmm_snapshot.h>
+
#include <pthread.h>
#include <stdio.h>
#include <stdlib.h>
@@ -787,6 +789,27 @@
return (0);
}
+static int
+umouse_snapshot(void *scarg, struct vm_snapshot_meta *meta)
+{
+ int ret;
+ struct umouse_softc *sc;
+
+ sc = scarg;
+
+ SNAPSHOT_VAR_OR_LEAVE(sc->um_report, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->newdata, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->hid.idle, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->hid.protocol, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->hid.feature, meta, ret, done);
+
+ SNAPSHOT_VAR_OR_LEAVE(sc->polling, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->prev_evt.tv_sec, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->prev_evt.tv_usec, meta, ret, done);
+
+done:
+ return (ret);
+}
struct usb_devemu ue_mouse = {
.ue_emu = "tablet",
@@ -797,6 +820,7 @@
.ue_data = umouse_data_handler,
.ue_reset = umouse_reset,
.ue_remove = umouse_remove,
- .ue_stop = umouse_stop
+ .ue_stop = umouse_stop,
+ .ue_snapshot = umouse_snapshot,
};
USB_EMUL_SET(ue_mouse);
Index: usr.sbin/bhyve/virtio.h
===================================================================
--- usr.sbin/bhyve/virtio.h
+++ usr.sbin/bhyve/virtio.h
@@ -285,6 +285,7 @@
struct vmctx;
struct pci_devinst;
struct vqueue_info;
+struct vm_snapshot_meta;
/*
* A virtual device, with some number (possibly 0) of virtual
@@ -359,6 +360,10 @@
void (*vc_apply_features)(void *, uint64_t);
/* called to apply negotiated features */
uint64_t vc_hv_caps; /* hypervisor-provided capabilities */
+ void (*vc_pause)(void *); /* called to pause device activity */
+ void (*vc_resume)(void *); /* called to resume device activity */
+ int (*vc_snapshot)(void *, struct vm_snapshot_meta *);
+ /* called to save / restore device state */
};
/*
@@ -465,4 +470,7 @@
int baridx, uint64_t offset, int size);
void vi_pci_write(struct vmctx *ctx, int vcpu, struct pci_devinst *pi,
int baridx, uint64_t offset, int size, uint64_t value);
+int vi_pci_snapshot(struct vm_snapshot_meta *meta);
+int vi_pci_pause(struct pci_devinst *pi);
+int vi_pci_resume(struct pci_devinst *pi);
#endif /* _VIRTIO_H_ */
Index: usr.sbin/bhyve/virtio.c
===================================================================
--- usr.sbin/bhyve/virtio.c
+++ usr.sbin/bhyve/virtio.c
@@ -32,6 +32,8 @@
#include <sys/param.h>
#include <sys/uio.h>
+#include <machine/vmm_snapshot.h>
+
#include <stdio.h>
#include <stdint.h>
#include <pthread.h>
@@ -777,3 +779,147 @@
if (vs->vs_mtx)
pthread_mutex_unlock(vs->vs_mtx);
}
+
+int
+vi_pci_pause(struct pci_devinst *pi)
+{
+ struct virtio_softc *vs;
+ struct virtio_consts *vc;
+
+ vs = pi->pi_arg;
+ vc = vs->vs_vc;
+
+ vc = vs->vs_vc;
+ assert(vc->vc_pause != NULL);
+ (*vc->vc_pause)(DEV_SOFTC(vs));
+
+ return (0);
+}
+
+int
+vi_pci_resume(struct pci_devinst *pi)
+{
+ struct virtio_softc *vs;
+ struct virtio_consts *vc;
+
+ vs = pi->pi_arg;
+ vc = vs->vs_vc;
+
+ vc = vs->vs_vc;
+ assert(vc->vc_resume != NULL);
+ (*vc->vc_resume)(DEV_SOFTC(vs));
+
+ return (0);
+}
+
+static int
+vi_pci_snapshot_softc(struct virtio_softc *vs, struct vm_snapshot_meta *meta)
+{
+ int ret;
+
+ SNAPSHOT_VAR_OR_LEAVE(vs->vs_flags, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vs->vs_negotiated_caps, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vs->vs_curq, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vs->vs_status, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vs->vs_isr, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vs->vs_msix_cfg_idx, meta, ret, done);
+
+done:
+ return (ret);
+}
+
+static int
+vi_pci_snapshot_consts(struct virtio_consts *vc, struct vm_snapshot_meta *meta)
+{
+ int ret;
+
+ SNAPSHOT_VAR_CMP_OR_LEAVE(vc->vc_nvq, meta, ret, done);
+ SNAPSHOT_VAR_CMP_OR_LEAVE(vc->vc_cfgsize, meta, ret, done);
+ SNAPSHOT_VAR_CMP_OR_LEAVE(vc->vc_hv_caps, meta, ret, done);
+
+done:
+ return (ret);
+}
+
+static int
+vi_pci_snapshot_queues(struct virtio_softc *vs, struct vm_snapshot_meta *meta)
+{
+ int i;
+ int ret;
+ struct virtio_consts *vc;
+ struct vqueue_info *vq;
+ uint64_t addr_size;
+
+ vc = vs->vs_vc;
+
+ /* Save virtio queue info */
+ for (i = 0; i < vc->vc_nvq; i++) {
+ vq = &vs->vs_queues[i];
+
+ SNAPSHOT_VAR_CMP_OR_LEAVE(vq->vq_qsize, meta, ret, done);
+ SNAPSHOT_VAR_CMP_OR_LEAVE(vq->vq_num, meta, ret, done);
+
+ SNAPSHOT_VAR_OR_LEAVE(vq->vq_flags, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vq->vq_last_avail, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vq->vq_save_used, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vq->vq_msix_idx, meta, ret, done);
+
+ SNAPSHOT_VAR_OR_LEAVE(vq->vq_pfn, meta, ret, done);
+
+ addr_size = vq->vq_qsize * sizeof(struct virtio_desc);
+ SNAPSHOT_GUEST2HOST_ADDR_OR_LEAVE(vq->vq_desc, addr_size,
+ false, meta, ret, done);
+
+ addr_size = (2 + vq->vq_qsize + 1) * sizeof(uint16_t);
+ SNAPSHOT_GUEST2HOST_ADDR_OR_LEAVE(vq->vq_avail, addr_size,
+ false, meta, ret, done);
+
+ addr_size = (2 + 2 * vq->vq_qsize + 1) * sizeof(uint16_t);
+ SNAPSHOT_GUEST2HOST_ADDR_OR_LEAVE(vq->vq_used, addr_size,
+ false, meta, ret, done);
+
+ SNAPSHOT_BUF_OR_LEAVE(vq->vq_desc, vring_size(vq->vq_qsize),
+ meta, ret, done);
+ }
+
+done:
+ return (ret);
+}
+
+int
+vi_pci_snapshot(struct vm_snapshot_meta *meta)
+{
+ int ret;
+ struct pci_devinst *pi;
+ struct virtio_softc *vs;
+ struct virtio_consts *vc;
+
+ pi = meta->dev_data;
+ vs = pi->pi_arg;
+ vc = vs->vs_vc;
+
+ /* Save virtio softc */
+ ret = vi_pci_snapshot_softc(vs, meta);
+ if (ret != 0)
+ goto done;
+
+ /* Save virtio consts */
+ ret = vi_pci_snapshot_consts(vc, meta);
+ if (ret != 0)
+ goto done;
+
+ /* Save virtio queue info */
+ ret = vi_pci_snapshot_queues(vs, meta);
+ if (ret != 0)
+ goto done;
+
+ /* Save device softc, if needed */
+ if (vc->vc_snapshot != NULL) {
+ ret = (*vc->vc_snapshot)(DEV_SOFTC(vs), meta);
+ if (ret != 0)
+ goto done;
+ }
+
+done:
+ return (ret);
+}
Index: usr.sbin/bhyvectl/bhyvectl.c
===================================================================
--- usr.sbin/bhyvectl/bhyvectl.c
+++ usr.sbin/bhyvectl/bhyvectl.c
@@ -57,6 +57,9 @@
#include <machine/vmm_dev.h>
#include <vmmapi.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+
#include "amd/vmcb.h"
#include "intel/vmcs.h"
@@ -67,6 +70,9 @@
#define NO_ARG no_argument
#define OPT_ARG optional_argument
+#define CHECKPOINT_RUN_DIR "/var/run/bhyve/checkpoint"
+#define MAX_VMNAME 100
+
static const char *progname;
static void
@@ -78,6 +84,8 @@
" [--cpu=<vcpu_number>]\n"
" [--create]\n"
" [--destroy]\n"
+ " [--checkpoint=<filename>]\n"
+ " [--suspend=<filename>]\n"
" [--get-all]\n"
" [--get-stats]\n"
" [--set-desc-ds]\n"
@@ -287,6 +295,10 @@
static int unassign_pptdev, bus, slot, func;
static int run;
static int get_cpu_topology;
+static int vm_checkpoint_opt;
+static int vm_suspend_opt;
+static int vcpu_lock_all_opt;
+static int vcpu_unlock_all_opt;
/*
* VMCB specific.
@@ -591,6 +603,8 @@
SET_RTC_TIME,
SET_RTC_NVRAM,
RTC_NVRAM_OFFSET,
+ SET_CHECKPOINT_FILE,
+ SET_SUSPEND_FILE,
};
static void
@@ -1459,6 +1473,10 @@
{ "get-suspended-cpus", NO_ARG, &get_suspended_cpus, 1 },
{ "get-intinfo", NO_ARG, &get_intinfo, 1 },
{ "get-cpu-topology", NO_ARG, &get_cpu_topology, 1 },
+ { "checkpoint", REQ_ARG, 0, SET_CHECKPOINT_FILE},
+ { "suspend", REQ_ARG, 0, SET_SUSPEND_FILE},
+ { "vcpu_lock_all", NO_ARG,&vcpu_lock_all_opt, 1 },
+ { "vcpu_unlock_all", NO_ARG,&vcpu_unlock_all_opt, 1 },
};
const struct option intel_opts[] = {
@@ -1676,6 +1694,80 @@
}
}
+static int
+send_checkpoint_op_req(struct vmctx *ctx, struct checkpoint_op *op)
+{
+ struct sockaddr_un addr;
+ int socket_fd, len, len_sent, total_sent;
+ int err = 0;
+ char vmname_buf[MAX_VMNAME];
+
+ socket_fd = socket(PF_UNIX, SOCK_STREAM, 0);
+ if (socket_fd < 0) {
+ perror("Error creating bhyvectl socket");
+ err = -1;
+ goto done;
+ }
+
+ memset(&addr, 0, sizeof(struct sockaddr_un));
+ addr.sun_family = AF_UNIX;
+
+ err = vm_get_name(ctx, vmname_buf, MAX_VMNAME - 1);
+ if (err != 0) {
+ perror("Failed to get VM name");
+ goto done;
+ }
+
+ snprintf(addr.sun_path, PATH_MAX, "%s/%s", CHECKPOINT_RUN_DIR, vmname_buf);
+
+ if (connect(socket_fd, (struct sockaddr *)&addr,
+ sizeof(struct sockaddr_un)) != 0) {
+ perror("Connect to VM socket failed");
+ err = -1;
+ goto done;
+ }
+
+ len = sizeof(*op);
+ total_sent = 0;
+ while ((len_sent = send(socket_fd, (char *)op + total_sent, len - total_sent, 0)) > 0) {
+ total_sent += len_sent;
+ }
+
+ if (len_sent < 0) {
+ perror("Failed to send checkpoint operation request");
+ err = -1;
+ }
+
+done:
+ if (socket_fd > 0)
+ close(socket_fd);
+ return (err);
+}
+
+static int
+send_start_checkpoint(struct vmctx *ctx, const char *checkpoint_file)
+{
+ struct checkpoint_op op;
+
+ op.op = START_CHECKPOINT;
+ strncpy(op.snapshot_filename, checkpoint_file, MAX_SNAPSHOT_VMNAME);
+ op.snapshot_filename[MAX_SNAPSHOT_VMNAME - 1] = 0;
+
+ return send_checkpoint_op_req(ctx, &op);
+}
+
+static int
+send_start_suspend(struct vmctx *ctx, const char *suspend_file)
+{
+ struct checkpoint_op op;
+
+ op.op = START_SUSPEND;
+ strncpy(op.snapshot_filename, suspend_file, MAX_SNAPSHOT_VMNAME);
+ op.snapshot_filename[MAX_SNAPSHOT_VMNAME - 1] = 0;
+
+ return send_checkpoint_op_req(ctx, &op);
+}
+
int
main(int argc, char *argv[])
{
@@ -1692,6 +1784,7 @@
uint64_t cs, ds, es, fs, gs, ss, tr, ldtr;
struct tm tm;
struct option *opts;
+ char *checkpoint_file, *suspend_file;
cpu_intel = cpu_vendor_intel();
opts = setup_options(cpu_intel);
@@ -1858,6 +1951,14 @@
case ASSERT_LAPIC_LVT:
assert_lapic_lvt = atoi(optarg);
break;
+ case SET_CHECKPOINT_FILE:
+ vm_checkpoint_opt = 1;
+ checkpoint_file = optarg;
+ break;
+ case SET_SUSPEND_FILE:
+ vm_suspend_opt = 1;
+ suspend_file = optarg;
+ break;
default:
usage(cpu_intel);
}
@@ -2343,6 +2444,18 @@
if (!error && destroy)
vm_destroy(ctx);
+ if (!error && vm_checkpoint_opt)
+ error = send_start_checkpoint(ctx, checkpoint_file);
+
+ if (!error && vm_suspend_opt)
+ error = send_start_suspend(ctx, suspend_file);
+
+ if (!error && vcpu_lock_all_opt)
+ error = vm_vcpu_lock_all(ctx);
+
+ if (!error && vcpu_unlock_all_opt)
+ error = vm_vcpu_unlock_all(ctx);
+
free (opts);
exit(error);
}
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Sun, Dec 22, 12:08 PM (5 h, 16 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
15553010
Default Alt Text
D19495.id58265.diff (172 KB)
Attached To
Mode
D19495: bhyve - Snapshot Save and Restore
Attached
Detach File
Event Timeline
Log In to Comment