Page MenuHomeFreeBSD

D19495.id62983.diff
No OneTemporary

D19495.id62983.diff

Index: lib/libvmmapi/vmmapi.h
===================================================================
--- lib/libvmmapi/vmmapi.h
+++ lib/libvmmapi/vmmapi.h
@@ -33,6 +33,7 @@
#include <sys/param.h>
#include <sys/cpuset.h>
+#include <machine/vmm_dev.h>
/*
* API version for out-of-tree consumers like grub-bhyve for making compile
@@ -42,6 +43,7 @@
struct iovec;
struct vmctx;
+struct vm_snapshot_meta;
enum x2apic_state;
/*
@@ -88,6 +90,10 @@
*/
int vm_mmap_getnext(struct vmctx *ctx, vm_paddr_t *gpa, int *segid,
vm_ooffset_t *segoff, size_t *len, int *prot, int *flags);
+
+int vm_get_guestmem_from_ctx(struct vmctx *ctx, char **guest_baseaddr,
+ size_t *lowmem_size, size_t *highmem_size);
+
/*
* Create a device memory segment identified by 'segid'.
*
@@ -110,6 +116,8 @@
int vm_parse_memsize(const char *optarg, size_t *memsize);
int vm_setup_memory(struct vmctx *ctx, size_t len, enum vm_mmap_style s);
void *vm_map_gpa(struct vmctx *ctx, vm_paddr_t gaddr, size_t len);
+/* inverse operation to vm_map_gpa - extract guest address from host pointer */
+vm_paddr_t vm_rev_map_gpa(struct vmctx *ctx, void *addr);
int vm_get_gpa_pmap(struct vmctx *, uint64_t gpa, uint64_t *pte, int *num);
int vm_gla2gpa(struct vmctx *, int vcpuid, struct vm_guest_paging *paging,
uint64_t gla, int prot, uint64_t *gpa, int *fault);
@@ -120,6 +128,7 @@
void vm_set_lowmem_limit(struct vmctx *ctx, uint32_t limit);
void vm_set_memflags(struct vmctx *ctx, int flags);
int vm_get_memflags(struct vmctx *ctx);
+int vm_get_name(struct vmctx *ctx, char *buffer, size_t max_len);
size_t vm_get_lowmem_size(struct vmctx *ctx);
size_t vm_get_highmem_size(struct vmctx *ctx);
int vm_set_desc(struct vmctx *ctx, int vcpu, int reg,
@@ -237,4 +246,25 @@
uint32_t eip, uint32_t gdtbase,
uint32_t esp);
void vm_setup_freebsd_gdt(uint64_t *gdtr);
+
+/*
+ * Save and restore
+ */
+
+#define MAX_SNAPSHOT_VMNAME 100
+
+enum checkpoint_opcodes {
+ START_CHECKPOINT = 0,
+ START_SUSPEND = 1,
+};
+
+struct checkpoint_op {
+ unsigned int op;
+ char snapshot_filename[MAX_SNAPSHOT_VMNAME];
+};
+
+int vm_snapshot_req(struct vm_snapshot_meta *meta);
+int vm_restore_time(struct vmctx *ctx);
+int vm_restore_mem(struct vmctx *ctx, int vmmem_fd, size_t size);
+
#endif /* _VMMAPI_H_ */
Index: lib/libvmmapi/vmmapi.c
===================================================================
--- lib/libvmmapi/vmmapi.c
+++ lib/libvmmapi/vmmapi.c
@@ -42,6 +42,7 @@
#include <machine/specialreg.h>
#include <errno.h>
+#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
@@ -51,8 +52,10 @@
#include <libutil.h>
+#include <vm/vm.h>
#include <machine/vmm.h>
#include <machine/vmm_dev.h>
+#include <machine/vmm_snapshot.h>
#include "vmmapi.h"
@@ -233,6 +236,17 @@
return (error);
}
+int
+vm_get_guestmem_from_ctx(struct vmctx *ctx, char **guest_baseaddr,
+ size_t *lowmem_size, size_t *highmem_size)
+{
+
+ *guest_baseaddr = ctx->baseaddr;
+ *lowmem_size = ctx->lowmem;
+ *highmem_size = ctx->highmem;
+ return (0);
+}
+
int
vm_mmap_getnext(struct vmctx *ctx, vm_paddr_t *gpa, int *segid,
vm_ooffset_t *segoff, size_t *len, int *prot, int *flags)
@@ -444,6 +458,34 @@
return (NULL);
}
+vm_paddr_t
+vm_rev_map_gpa(struct vmctx *ctx, void *addr)
+{
+ vm_paddr_t offaddr;
+
+ offaddr = (char *)addr - ctx->baseaddr;
+
+ if (ctx->lowmem > 0)
+ if (offaddr >= 0 && offaddr <= ctx->lowmem)
+ return (offaddr);
+
+ if (ctx->highmem > 0)
+ if (offaddr >= 4*GB && offaddr < 4*GB + ctx->highmem)
+ return (offaddr);
+
+ return ((vm_paddr_t)-1);
+}
+
+/* TODO: maximum size for vmname */
+int
+vm_get_name(struct vmctx *ctx, char *buf, size_t max_len)
+{
+
+ if (strlcpy(buf, ctx->name, max_len) >= max_len)
+ return (EINVAL);
+ return (0);
+}
+
size_t
vm_get_lowmem_size(struct vmctx *ctx)
{
@@ -1504,6 +1546,100 @@
return (ioctl(ctx->fd, VM_RESTART_INSTRUCTION, &vcpu));
}
+int
+vm_snapshot_req(struct vm_snapshot_meta *meta)
+{
+
+ if (ioctl(meta->ctx->fd, VM_SNAPSHOT_REQ, meta) == -1) {
+#ifdef SNAPSHOT_DEBUG
+ fprintf(stderr, "%s: snapshot failed for %s: %d\r\n",
+ __func__, meta->dev_name, errno);
+#endif
+ return (-1);
+ }
+ return (0);
+}
+
+static bool
+vm_mem_read_from_file(int fd, void *dest, size_t file_offset, size_t len)
+{
+ char *p;
+ ssize_t cnt_read;
+
+ if (lseek(fd, file_offset, SEEK_SET) == -1) {
+#ifdef SNAPSHOT_DEBUG
+ fprintf(stderr,
+ "%s: Could not change file offset errno = %d\r\n",
+ __func__, errno);
+#endif
+ return (false);
+ }
+
+ p = dest;
+ while (len > 0) {
+ cnt_read = read(fd, p, len);
+ if (cnt_read == 0) {
+ errno = ENOSPC;
+#ifdef SNAPSHOT_DEBUG
+ fprintf(stderr, "%s: short read\r\n", __func__);
+#endif
+ return (false);
+ } else if (cnt_read < 0) {
+#ifdef SNAPSHOT_DEBUG
+ fprintf(stderr,"%s: read error: %d\r\n",
+ __func__, errno);
+#endif
+ return (false);
+ }
+ p += cnt_read;
+ len -= cnt_read;
+ }
+ return (true);
+}
+
+int
+vm_restore_mem(struct vmctx *ctx, int vmmem_fd, size_t size)
+{
+
+ if (ctx->lowmem + ctx->highmem != size) {
+#ifdef SNAPSHOT_DEBUG
+ fprintf(stderr, "%s: mem size mismatch: %ld vs %ld\n",
+ __func__, ctx->lowmem + ctx->highmem, size);
+#endif
+ return (-1);
+ }
+
+ if (!vm_mem_read_from_file(vmmem_fd, ctx->baseaddr, 0, ctx->lowmem)) {
+#ifdef SNAPSHOT_DEBUG
+ fprintf(stderr,
+ "%s: Could not read lowmem from file\r\n", __func__);
+#endif
+ return (-1);
+ }
+
+ if (ctx->highmem > 0) {
+ if (!vm_mem_read_from_file(vmmem_fd, ctx->baseaddr + 4*GB,
+ ctx->lowmem, ctx->highmem)) {
+#ifdef SNAPSHOT_DEBUG
+ fprintf(stderr,
+ "%s: Could not read highmem from file\r\n",
+ __func__);
+#endif
+ return (-1);
+ }
+ }
+ return (0);
+}
+
+int
+vm_restore_time(struct vmctx *ctx)
+{
+ int dummy;
+
+ dummy = 0;
+ return (ioctl(ctx->fd, VM_RESTORE_TIME, &dummy));
+}
+
int
vm_set_topology(struct vmctx *ctx,
uint16_t sockets, uint16_t cores, uint16_t threads, uint16_t maxcpus)
Index: share/man/man5/src.conf.5
===================================================================
--- share/man/man5/src.conf.5
+++ share/man/man5/src.conf.5
@@ -170,6 +170,13 @@
associated utilities, and examples.
.Pp
This option only affects amd64/amd64.
+.It Va WITH_BHYVE_SNAPSHOT
+Set to include support for save and restore (snapshots) in
+.Xr bhyve 8
+and
+.Xr bhyvectl 8 .
+.Pp
+This option only affects amd64/amd64.
.It Va WITH_BIND_NOW
Build all binaries with the
.Dv DF_BIND_NOW
Index: share/mk/kmod.opts.mk
===================================================================
--- /dev/null
+++ share/mk/kmod.opts.mk
@@ -0,0 +1,20 @@
+# $FreeBSD$
+
+# Search for kernel source tree in standard places.
+.if empty(KERNBUILDDIR)
+.if !defined(SYSDIR)
+.for _dir in ${SRCTOP:D${SRCTOP}/sys} \
+ ${.CURDIR}/../.. ${.CURDIR}/../../.. ${.CURDIR}/../../../.. \
+ ${.CURDIR}/../../../../.. /sys /usr/src/sys
+.if !defined(SYSDIR) && exists(${_dir}/kern/) && exists(${_dir}/conf/kmod.opts.mk)
+SYSDIR= ${_dir:tA}
+.endif
+.endfor
+.endif
+.if !defined(SYSDIR) || !exists(${SYSDIR}/kern/) || \
+ !exists(${SYSDIR}/conf/kmod.opts.mk)
+.error Unable to locate the kernel source tree. Set SYSDIR to override.
+.endif
+.endif
+
+.include "${SYSDIR}/conf/kmod.opts.mk"
Index: share/mk/src.opts.mk
===================================================================
--- share/mk/src.opts.mk
+++ share/mk/src.opts.mk
@@ -193,6 +193,7 @@
__DEFAULT_NO_OPTIONS = \
BEARSSL \
+ BHYVE_SNAPSHOT \
BSD_GREP \
CLANG_EXTRAS \
DTRACE_TESTS \
Index: sys/amd64/conf/GENERIC
===================================================================
--- sys/amd64/conf/GENERIC
+++ sys/amd64/conf/GENERIC
@@ -356,6 +356,9 @@
device virtio_scsi # VirtIO SCSI device
device virtio_balloon # VirtIO Memory Balloon device
+# bhyve snapshot
+option BHYVE_SNAPSHOT
+
# HyperV drivers and enhancement support
device hyperv # HyperV drivers
Index: sys/amd64/include/vmm.h
===================================================================
--- sys/amd64/include/vmm.h
+++ sys/amd64/include/vmm.h
@@ -34,6 +34,8 @@
#include <sys/sdt.h>
#include <x86/segments.h>
+struct vm_snapshot_meta;
+
#ifdef _KERNEL
SDT_PROVIDER_DECLARE(vmm);
#endif
@@ -151,6 +153,7 @@
struct vm_object;
struct vm_guest_paging;
struct pmap;
+enum snapshot_req;
struct vm_eventinfo {
void *rptr; /* rendezvous cookie */
@@ -179,6 +182,10 @@
typedef void (*vmi_vmspace_free)(struct vmspace *vmspace);
typedef struct vlapic * (*vmi_vlapic_init)(void *vmi, int vcpu);
typedef void (*vmi_vlapic_cleanup)(void *vmi, struct vlapic *vlapic);
+typedef int (*vmi_snapshot_t)(void *vmi, struct vm_snapshot_meta *meta);
+typedef int (*vmi_snapshot_vmcx_t)(void *vmi, struct vm_snapshot_meta *meta,
+ int vcpu);
+typedef int (*vmi_restore_tsc_t)(void *vmi, int vcpuid, uint64_t now);
struct vmm_ops {
vmm_init_func_t init; /* module wide initialization */
@@ -198,6 +205,11 @@
vmi_vmspace_free vmspace_free;
vmi_vlapic_init vlapic_init;
vmi_vlapic_cleanup vlapic_cleanup;
+
+ /* checkpoint operations */
+ vmi_snapshot_t vmsnapshot;
+ vmi_snapshot_vmcx_t vmcx_snapshot;
+ vmi_restore_tsc_t vm_restore_tsc;
};
extern struct vmm_ops vmm_ops_intel;
@@ -271,6 +283,9 @@
void vm_exit_rendezvous(struct vm *vm, int vcpuid, uint64_t rip);
void vm_exit_astpending(struct vm *vm, int vcpuid, uint64_t rip);
void vm_exit_reqidle(struct vm *vm, int vcpuid, uint64_t rip);
+int vm_snapshot_req(struct vm *vm, struct vm_snapshot_meta *meta);
+int vm_restore_time(struct vm *vm);
+
#ifdef _SYS__CPUSET_H_
/*
@@ -408,6 +423,15 @@
int vm_get_intinfo(struct vm *vm, int vcpuid, uint64_t *info1, uint64_t *info2);
+/*
+ * Function used to keep track of the guest's TSC offset. The
+ * offset is used by the virutalization extensions to provide a consistent
+ * value for the Time Stamp Counter to the guest.
+ *
+ * Return value is 0 on success and non-zero on failure.
+ */
+int vm_set_tsc_offset(struct vm *vm, int vcpu_id, uint64_t offset);
+
enum vm_reg_name vm_segment_name(int seg_encoding);
struct vm_copyinfo {
Index: sys/amd64/include/vmm_dev.h
===================================================================
--- sys/amd64/include/vmm_dev.h
+++ sys/amd64/include/vmm_dev.h
@@ -31,6 +31,8 @@
#ifndef _VMM_DEV_H_
#define _VMM_DEV_H_
+struct vm_snapshot_meta;
+
#ifdef _KERNEL
void vmmdev_init(void);
int vmmdev_cleanup(void);
@@ -312,6 +314,11 @@
IOCNUM_RTC_WRITE = 101,
IOCNUM_RTC_SETTIME = 102,
IOCNUM_RTC_GETTIME = 103,
+
+ /* checkpoint */
+ IOCNUM_SNAPSHOT_REQ = 113,
+
+ IOCNUM_RESTORE_TIME = 115
};
#define VM_RUN \
@@ -422,4 +429,8 @@
_IOR('v', IOCNUM_RTC_GETTIME, struct vm_rtc_time)
#define VM_RESTART_INSTRUCTION \
_IOW('v', IOCNUM_RESTART_INSTRUCTION, int)
+#define VM_SNAPSHOT_REQ \
+ _IOWR('v', IOCNUM_SNAPSHOT_REQ, struct vm_snapshot_meta)
+#define VM_RESTORE_TIME \
+ _IOWR('v', IOCNUM_RESTORE_TIME, int)
#endif
Index: sys/amd64/include/vmm_snapshot.h
===================================================================
--- /dev/null
+++ sys/amd64/include/vmm_snapshot.h
@@ -0,0 +1,119 @@
+#ifndef _VMM_SNAPSHOT_
+#define _VMM_SNAPSHOT_
+
+#include <sys/errno.h>
+#include <sys/types.h>
+#ifndef _KERNEL
+#include <stdbool.h>
+#endif
+
+struct vmctx;
+
+enum snapshot_req {
+ STRUCT_VMX,
+ STRUCT_VIOAPIC,
+ STRUCT_VM,
+ STRUCT_VLAPIC,
+ VM_MEM,
+ STRUCT_VHPET,
+ STRUCT_VMCX,
+ STRUCT_VATPIC,
+ STRUCT_VATPIT,
+ STRUCT_VPMTMR,
+ STRUCT_VRTC,
+};
+
+struct vm_snapshot_buffer {
+ /*
+ * R/O for device-specific functions;
+ * written by generic snapshot functions.
+ */
+ uint8_t *const buf_start;
+ const size_t buf_size;
+
+ /*
+ * R/W for device-specific functions used to keep track of buffer
+ * current position and remaining size.
+ */
+ uint8_t *buf;
+ size_t buf_rem;
+
+ /*
+ * Length of the snapshot is either determined as (buf_size - buf_rem)
+ * or (buf - buf_start) -- the second variation returns a signed value
+ * so it may not be appropriate.
+ *
+ * Use vm_get_snapshot_size(meta).
+ */
+};
+
+enum vm_snapshot_op {
+ VM_SNAPSHOT_SAVE,
+ VM_SNAPSHOT_RESTORE,
+};
+
+struct vm_snapshot_meta {
+ struct vmctx *ctx;
+ void *dev_data;
+ const char *dev_name; /* identify userspace devices */
+ enum snapshot_req dev_req; /* identify kernel structs */
+
+ struct vm_snapshot_buffer buffer;
+
+ enum vm_snapshot_op op;
+};
+
+
+void vm_snapshot_buf_err(const char *bufname, const enum vm_snapshot_op op);
+int vm_snapshot_buf(volatile void *data, size_t data_size,
+ struct vm_snapshot_meta *meta);
+size_t vm_get_snapshot_size(struct vm_snapshot_meta *meta);
+int vm_snapshot_guest2host_addr(void **addrp, size_t len, bool restore_null,
+ struct vm_snapshot_meta *meta);
+int vm_snapshot_buf_cmp(volatile void *data, size_t data_size,
+ struct vm_snapshot_meta *meta);
+
+#define SNAPSHOT_BUF_OR_LEAVE(DATA, LEN, META, RES, LABEL) \
+do { \
+ (RES) = vm_snapshot_buf((DATA), (LEN), (META)); \
+ if ((RES) != 0) { \
+ vm_snapshot_buf_err(#DATA, (META)->op); \
+ goto LABEL; \
+ } \
+} while (0)
+
+#define SNAPSHOT_VAR_OR_LEAVE(DATA, META, RES, LABEL) \
+ SNAPSHOT_BUF_OR_LEAVE(&(DATA), sizeof(DATA), (META), (RES), LABEL)
+
+/*
+ * Address variables are pointers to guest memory.
+ *
+ * When RNULL != 0, do not enforce invalid address checks; instead, make the
+ * pointer NULL at restore time.
+ */
+#define SNAPSHOT_GUEST2HOST_ADDR_OR_LEAVE(ADDR, LEN, RNULL, META, RES, LABEL) \
+do { \
+ (RES) = vm_snapshot_guest2host_addr((void **)&(ADDR), (LEN), (RNULL), \
+ (META)); \
+ if ((RES) != 0) { \
+ if ((RES) == EFAULT) \
+ fprintf(stderr, "%s: invalid address: %s\r\n", \
+ __func__, #ADDR); \
+ goto LABEL; \
+ } \
+} while (0)
+
+/* compare the value in the meta buffer with the data */
+#define SNAPSHOT_BUF_CMP_OR_LEAVE(DATA, LEN, META, RES, LABEL) \
+do { \
+ (RES) = vm_snapshot_buf_cmp((DATA), (LEN), (META)); \
+ if ((RES) != 0) { \
+ vm_snapshot_buf_err(#DATA, (META)->op); \
+ goto LABEL; \
+ } \
+} while (0)
+
+#define SNAPSHOT_VAR_CMP_OR_LEAVE(DATA, META, RES, LABEL) \
+ SNAPSHOT_BUF_CMP_OR_LEAVE(&(DATA), sizeof(DATA), (META), (RES), LABEL)
+
+#endif
Index: sys/amd64/vmm/amd/svm.h
===================================================================
--- sys/amd64/vmm/amd/svm.h
+++ sys/amd64/vmm/amd/svm.h
@@ -32,6 +32,7 @@
#define _SVM_H_
struct pcpu;
+struct svm_softc;
/*
* Guest register state that is saved outside the VMCB.
@@ -66,5 +67,8 @@
};
void svm_launch(uint64_t pa, struct svm_regctx *gctx, struct pcpu *pcpu);
+#ifdef BHYVE_SNAPSHOT
+int svm_set_tsc_offset(struct svm_softc *sc, int vcpu, uint64_t offset);
+#endif
#endif /* _SVM_H_ */
Index: sys/amd64/vmm/amd/svm.c
===================================================================
--- sys/amd64/vmm/amd/svm.c
+++ sys/amd64/vmm/amd/svm.c
@@ -29,6 +29,8 @@
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
+#include "opt_bhyve_snapshot.h"
+
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/smp.h>
@@ -50,6 +52,7 @@
#include <machine/vmm.h>
#include <machine/vmm_dev.h>
#include <machine/vmm_instruction_emul.h>
+#include <machine/vmm_snapshot.h>
#include "vmm_lapic.h"
#include "vmm_stat.h"
@@ -275,6 +278,25 @@
svm_enable(NULL);
}
+#ifdef BHYVE_SNAPSHOT
+int
+svm_set_tsc_offset(struct svm_softc *sc, int vcpu, uint64_t offset)
+{
+ int error;
+ struct vmcb_ctrl *ctrl;
+
+ ctrl = svm_get_vmcb_ctrl(sc, vcpu);
+ ctrl->tsc_offset = offset;
+
+ svm_set_dirty(sc, vcpu, VMCB_CACHE_I);
+ VCPU_CTR1(sc->vm, vcpu, "tsc offset changed to %#lx", offset);
+
+ error = vm_set_tsc_offset(sc->vm, vcpu, offset);
+
+ return (error);
+}
+#endif
+
/* Pentium compatible MSRs */
#define MSR_PENTIUM_START 0
#define MSR_PENTIUM_END 0x1FFF
@@ -2197,6 +2219,36 @@
return (EINVAL);
}
+#ifdef BHYVE_SNAPSHOT
+static int
+svm_snapshot_reg(void *arg, int vcpu, int ident,
+ struct vm_snapshot_meta *meta)
+{
+ int ret;
+ uint64_t val;
+
+ if (meta->op == VM_SNAPSHOT_SAVE) {
+ ret = svm_getreg(arg, vcpu, ident, &val);
+ if (ret != 0)
+ goto done;
+
+ SNAPSHOT_VAR_OR_LEAVE(val, meta, ret, done);
+ } else if (meta->op == VM_SNAPSHOT_RESTORE) {
+ SNAPSHOT_VAR_OR_LEAVE(val, meta, ret, done);
+
+ ret = svm_setreg(arg, vcpu, ident, val);
+ if (ret != 0)
+ goto done;
+ } else {
+ ret = EINVAL;
+ goto done;
+ }
+
+done:
+ return (ret);
+}
+#endif
+
static int
svm_setcap(void *arg, int vcpu, int type, int val)
{
@@ -2279,6 +2331,306 @@
free(vlapic, M_SVM_VLAPIC);
}
+#ifdef BHYVE_SNAPSHOT
+static int
+svm_snapshot_vmi(void *arg, struct vm_snapshot_meta *meta)
+{
+ /* struct svm_softc is AMD's representation for SVM softc */
+ struct svm_softc *sc;
+ struct svm_vcpu *vcpu;
+ struct vmcb *vmcb;
+ uint64_t val;
+ int i;
+ int ret;
+
+ sc = arg;
+
+ KASSERT(sc != NULL, ("%s: arg was NULL", __func__));
+
+ SNAPSHOT_VAR_OR_LEAVE(sc->nptp, meta, ret, done);
+
+ for (i = 0; i < VM_MAXCPU; i++) {
+ vcpu = &sc->vcpu[i];
+ vmcb = &vcpu->vmcb;
+
+ /* VMCB fields for virtual cpu i */
+ SNAPSHOT_VAR_OR_LEAVE(vmcb->ctrl.v_tpr, meta, ret, done);
+ val = vmcb->ctrl.v_tpr;
+ SNAPSHOT_VAR_OR_LEAVE(val, meta, ret, done);
+ vmcb->ctrl.v_tpr = val;
+
+ SNAPSHOT_VAR_OR_LEAVE(vmcb->ctrl.asid, meta, ret, done);
+ val = vmcb->ctrl.np_enable;
+ SNAPSHOT_VAR_OR_LEAVE(val, meta, ret, done);
+ vmcb->ctrl.np_enable = val;
+
+ val = vmcb->ctrl.intr_shadow;
+ SNAPSHOT_VAR_OR_LEAVE(val, meta, ret, done);
+ vmcb->ctrl.intr_shadow = val;
+ SNAPSHOT_VAR_OR_LEAVE(vmcb->ctrl.tlb_ctrl, meta, ret, done);
+
+ SNAPSHOT_BUF_OR_LEAVE(vmcb->state.pad1,
+ sizeof(vmcb->state.pad1),
+ meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vmcb->state.cpl, meta, ret, done);
+ SNAPSHOT_BUF_OR_LEAVE(vmcb->state.pad2,
+ sizeof(vmcb->state.pad2),
+ meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vmcb->state.efer, meta, ret, done);
+ SNAPSHOT_BUF_OR_LEAVE(vmcb->state.pad3,
+ sizeof(vmcb->state.pad3),
+ meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vmcb->state.cr4, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vmcb->state.cr3, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vmcb->state.cr0, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vmcb->state.dr7, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vmcb->state.dr6, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vmcb->state.rflags, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vmcb->state.rip, meta, ret, done);
+ SNAPSHOT_BUF_OR_LEAVE(vmcb->state.pad4,
+ sizeof(vmcb->state.pad4),
+ meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vmcb->state.rsp, meta, ret, done);
+ SNAPSHOT_BUF_OR_LEAVE(vmcb->state.pad5,
+ sizeof(vmcb->state.pad5),
+ meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vmcb->state.rax, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vmcb->state.star, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vmcb->state.lstar, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vmcb->state.cstar, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vmcb->state.sfmask, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vmcb->state.kernelgsbase,
+ meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vmcb->state.sysenter_cs, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vmcb->state.sysenter_esp,
+ meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vmcb->state.sysenter_eip,
+ meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vmcb->state.cr2, meta, ret, done);
+ SNAPSHOT_BUF_OR_LEAVE(vmcb->state.pad6,
+ sizeof(vmcb->state.pad6),
+ meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vmcb->state.g_pat, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vmcb->state.dbgctl, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vmcb->state.br_from, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vmcb->state.br_to, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vmcb->state.int_from, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vmcb->state.int_to, meta, ret, done);
+ SNAPSHOT_BUF_OR_LEAVE(vmcb->state.pad7,
+ sizeof(vmcb->state.pad7),
+ meta, ret, done);
+
+ /* Snapshot swctx for virtual cpu i */
+ SNAPSHOT_VAR_OR_LEAVE(vcpu->swctx.sctx_rbp, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vcpu->swctx.sctx_rbx, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vcpu->swctx.sctx_rcx, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vcpu->swctx.sctx_rdx, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vcpu->swctx.sctx_rdi, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vcpu->swctx.sctx_rsi, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vcpu->swctx.sctx_r8, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vcpu->swctx.sctx_r9, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vcpu->swctx.sctx_r10, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vcpu->swctx.sctx_r11, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vcpu->swctx.sctx_r12, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vcpu->swctx.sctx_r13, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vcpu->swctx.sctx_r14, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vcpu->swctx.sctx_r15, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vcpu->swctx.sctx_dr0, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vcpu->swctx.sctx_dr1, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vcpu->swctx.sctx_dr2, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vcpu->swctx.sctx_dr3, meta, ret, done);
+
+ SNAPSHOT_VAR_OR_LEAVE(vcpu->swctx.host_dr0, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vcpu->swctx.host_dr1, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vcpu->swctx.host_dr2, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vcpu->swctx.host_dr3, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vcpu->swctx.host_dr6, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vcpu->swctx.host_dr7, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vcpu->swctx.host_debugctl, meta, ret,
+ done);
+
+ /* Restore other svm_vcpu struct fields */
+
+ /* Restore NEXTRIP field */
+ SNAPSHOT_VAR_OR_LEAVE(vcpu->nextrip, meta, ret, done);
+
+ /* Restore lastcpu field */
+ SNAPSHOT_VAR_OR_LEAVE(vcpu->lastcpu, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vcpu->dirty, meta, ret, done);
+
+ /* Restore EPTGEN field - EPT is Extended Page Tabel */
+ SNAPSHOT_VAR_OR_LEAVE(vcpu->eptgen, meta, ret, done);
+
+ SNAPSHOT_VAR_OR_LEAVE(vcpu->asid.gen, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vcpu->asid.num, meta, ret, done);
+
+ /* Set all caches dirty */
+ if (meta->op == VM_SNAPSHOT_RESTORE) {
+ svm_set_dirty(sc, i, VMCB_CACHE_ASID);
+ svm_set_dirty(sc, i, VMCB_CACHE_IOPM);
+ svm_set_dirty(sc, i, VMCB_CACHE_I);
+ svm_set_dirty(sc, i, VMCB_CACHE_TPR);
+ svm_set_dirty(sc, i, VMCB_CACHE_CR2);
+ svm_set_dirty(sc, i, VMCB_CACHE_CR);
+ svm_set_dirty(sc, i, VMCB_CACHE_DT);
+ svm_set_dirty(sc, i, VMCB_CACHE_SEG);
+ svm_set_dirty(sc, i, VMCB_CACHE_NP);
+ }
+ }
+
+ if (meta->op == VM_SNAPSHOT_RESTORE)
+ flush_by_asid();
+
+done:
+ return (ret);
+}
+
+static int
+svm_snapshot_vmcx(void *arg, struct vm_snapshot_meta *meta, int vcpu)
+{
+ struct vmcb *vmcb;
+ struct svm_softc *sc;
+ int err, running, hostcpu;
+
+ sc = (struct svm_softc *)arg;
+ err = 0;
+
+ KASSERT(arg != NULL, ("%s: arg was NULL", __func__));
+ vmcb = svm_get_vmcb(sc, vcpu);
+
+ running = vcpu_is_running(sc->vm, vcpu, &hostcpu);
+ if (running && hostcpu !=curcpu) {
+ printf("%s: %s%d is running", __func__, vm_name(sc->vm), vcpu);
+ return (EINVAL);
+ }
+
+ err += svm_snapshot_reg(sc, vcpu, VM_REG_GUEST_CR0, meta);
+ err += svm_snapshot_reg(sc, vcpu, VM_REG_GUEST_CR2, meta);
+ err += svm_snapshot_reg(sc, vcpu, VM_REG_GUEST_CR3, meta);
+ err += svm_snapshot_reg(sc, vcpu, VM_REG_GUEST_CR4, meta);
+
+ err += svm_snapshot_reg(sc, vcpu, VM_REG_GUEST_DR7, meta);
+
+ err += svm_snapshot_reg(sc, vcpu, VM_REG_GUEST_RAX, meta);
+
+ err += svm_snapshot_reg(sc, vcpu, VM_REG_GUEST_RSP, meta);
+ err += svm_snapshot_reg(sc, vcpu, VM_REG_GUEST_RIP, meta);
+ err += svm_snapshot_reg(sc, vcpu, VM_REG_GUEST_RFLAGS, meta);
+
+ /* Guest segments */
+ /* ES */
+ err += svm_snapshot_reg(sc, vcpu, VM_REG_GUEST_ES, meta);
+ err += vmcb_snapshot_desc(sc, vcpu, VM_REG_GUEST_ES, meta);
+
+ /* CS */
+ err += svm_snapshot_reg(sc, vcpu, VM_REG_GUEST_CS, meta);
+ err += vmcb_snapshot_desc(sc, vcpu, VM_REG_GUEST_CS, meta);
+
+ /* SS */
+ err += svm_snapshot_reg(sc, vcpu, VM_REG_GUEST_SS, meta);
+ err += vmcb_snapshot_desc(sc, vcpu, VM_REG_GUEST_SS, meta);
+
+ /* DS */
+ err += svm_snapshot_reg(sc, vcpu, VM_REG_GUEST_DS, meta);
+ err += vmcb_snapshot_desc(sc, vcpu, VM_REG_GUEST_DS, meta);
+
+ /* FS */
+ err += svm_snapshot_reg(sc, vcpu, VM_REG_GUEST_FS, meta);
+ err += vmcb_snapshot_desc(sc, vcpu, VM_REG_GUEST_FS, meta);
+
+ /* GS */
+ err += svm_snapshot_reg(sc, vcpu, VM_REG_GUEST_GS, meta);
+ err += vmcb_snapshot_desc(sc, vcpu, VM_REG_GUEST_GS, meta);
+
+ /* TR */
+ err += svm_snapshot_reg(sc, vcpu, VM_REG_GUEST_TR, meta);
+ err += vmcb_snapshot_desc(sc, vcpu, VM_REG_GUEST_TR, meta);
+
+ /* LDTR */
+ err += svm_snapshot_reg(sc, vcpu, VM_REG_GUEST_LDTR, meta);
+ err += vmcb_snapshot_desc(sc, vcpu, VM_REG_GUEST_LDTR, meta);
+
+ /* EFER */
+ err += svm_snapshot_reg(sc, vcpu, VM_REG_GUEST_EFER, meta);
+
+ /* IDTR and GDTR */
+ err += vmcb_snapshot_desc(sc, vcpu, VM_REG_GUEST_IDTR, meta);
+ err += vmcb_snapshot_desc(sc, vcpu, VM_REG_GUEST_GDTR, meta);
+
+ /* Specific AMD registers */
+ err += vmcb_snapshot_any(sc, vcpu,
+ VMCB_ACCESS(VMCB_OFF_SYSENTER_CS, 8), meta);
+ err += vmcb_snapshot_any(sc, vcpu,
+ VMCB_ACCESS(VMCB_OFF_SYSENTER_ESP, 8), meta);
+ err += vmcb_snapshot_any(sc, vcpu,
+ VMCB_ACCESS(VMCB_OFF_SYSENTER_EIP, 8), meta);
+
+ err += vmcb_snapshot_any(sc, vcpu,
+ VMCB_ACCESS(VMCB_OFF_NPT_BASE, 8), meta);
+
+ err += vmcb_snapshot_any(sc, vcpu,
+ VMCB_ACCESS(VMCB_OFF_CR_INTERCEPT, 4), meta);
+ err += vmcb_snapshot_any(sc, vcpu,
+ VMCB_ACCESS(VMCB_OFF_DR_INTERCEPT, 4), meta);
+ err += vmcb_snapshot_any(sc, vcpu,
+ VMCB_ACCESS(VMCB_OFF_EXC_INTERCEPT, 4), meta);
+ err += vmcb_snapshot_any(sc, vcpu,
+ VMCB_ACCESS(VMCB_OFF_INST1_INTERCEPT, 4), meta);
+ err += vmcb_snapshot_any(sc, vcpu,
+ VMCB_ACCESS(VMCB_OFF_INST2_INTERCEPT, 4), meta);
+
+ err += vmcb_snapshot_any(sc, vcpu,
+ VMCB_ACCESS(VMCB_OFF_TLB_CTRL, 4), meta);
+
+ err += vmcb_snapshot_any(sc, vcpu,
+ VMCB_ACCESS(VMCB_OFF_EXITINFO1, 8), meta);
+ err += vmcb_snapshot_any(sc, vcpu,
+ VMCB_ACCESS(VMCB_OFF_EXITINFO2, 8), meta);
+ err += vmcb_snapshot_any(sc, vcpu,
+ VMCB_ACCESS(VMCB_OFF_EXITINTINFO, 8), meta);
+
+ err += vmcb_snapshot_any(sc, vcpu,
+ VMCB_ACCESS(VMCB_OFF_VIRQ, 8), meta);
+
+ err += vmcb_snapshot_any(sc, vcpu,
+ VMCB_ACCESS(VMCB_OFF_GUEST_PAT, 8), meta);
+
+ err += vmcb_snapshot_any(sc, vcpu,
+ VMCB_ACCESS(VMCB_OFF_AVIC_BAR, 8), meta);
+ err += vmcb_snapshot_any(sc, vcpu,
+ VMCB_ACCESS(VMCB_OFF_AVIC_PAGE, 8), meta);
+ err += vmcb_snapshot_any(sc, vcpu,
+ VMCB_ACCESS(VMCB_OFF_AVIC_LT, 8), meta);
+ err += vmcb_snapshot_any(sc, vcpu,
+ VMCB_ACCESS(VMCB_OFF_AVIC_PT, 8), meta);
+
+ err += vmcb_snapshot_any(sc, vcpu,
+ VMCB_ACCESS(VMCB_OFF_IO_PERM, 8), meta);
+ err += vmcb_snapshot_any(sc, vcpu,
+ VMCB_ACCESS(VMCB_OFF_MSR_PERM, 8), meta);
+
+ err += vmcb_snapshot_any(sc, vcpu,
+ VMCB_ACCESS(VMCB_OFF_ASID, 4), meta);
+
+ err += vmcb_snapshot_any(sc, vcpu,
+ VMCB_ACCESS(VMCB_OFF_EXIT_REASON, 8), meta);
+
+ err += svm_snapshot_reg(sc, vcpu, VM_REG_GUEST_INTR_SHADOW, meta);
+
+ return (err);
+}
+
+static int
+svm_restore_tsc(void *arg, int vcpu, uint64_t offset)
+{
+ int err;
+
+ err = svm_set_tsc_offset(arg, vcpu, offset);
+
+ return (err);
+}
+#endif
+
struct vmm_ops vmm_ops_amd = {
.init = svm_init,
.cleanup = svm_cleanup,
@@ -2296,4 +2648,9 @@
.vmspace_free = svm_npt_free,
.vlapic_init = svm_vlapic_init,
.vlapic_cleanup = svm_vlapic_cleanup,
+#ifdef BHYVE_SNAPSHOT
+ .vmsnapshot = svm_snapshot_vmi,
+ .vmcx_snapshot = svm_snapshot_vmcx,
+ .vm_restore_tsc = svm_restore_tsc,
+#endif
};
Index: sys/amd64/vmm/amd/svm_msr.c
===================================================================
--- sys/amd64/vmm/amd/svm_msr.c
+++ sys/amd64/vmm/amd/svm_msr.c
@@ -29,6 +29,8 @@
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
+#include "opt_bhyve_snapshot.h"
+
#include <sys/param.h>
#include <sys/errno.h>
#include <sys/systm.h>
@@ -162,6 +164,11 @@
* Ignore writes to microcode update register.
*/
break;
+#ifdef BHYVE_SNAPSHOT
+ case MSR_TSC:
+ error = svm_set_tsc_offset(sc, vcpu, val - rdtsc());
+ break;
+#endif
case MSR_EXTFEATURES:
break;
default:
Index: sys/amd64/vmm/amd/vmcb.h
===================================================================
--- sys/amd64/vmm/amd/vmcb.h
+++ sys/amd64/vmm/amd/vmcb.h
@@ -31,8 +31,6 @@
#ifndef _VMCB_H_
#define _VMCB_H_
-struct svm_softc;
-
#define BIT(n) (1ULL << n)
/*
@@ -209,6 +207,10 @@
#define VMCB_ACCESS_OFFSET(v) ((v) & 0xFFF)
#ifdef _KERNEL
+
+struct svm_softc;
+struct vm_snapshot_meta;
+
/* VMCB save state area segment format */
struct vmcb_segment {
uint16_t selector;
@@ -331,6 +333,14 @@
int vmcb_setdesc(void *arg, int vcpu, int ident, struct seg_desc *desc);
int vmcb_getdesc(void *arg, int vcpu, int ident, struct seg_desc *desc);
int vmcb_seg(struct vmcb *vmcb, int ident, struct vmcb_segment *seg);
+#ifdef BHYVE_SNAPSHOT
+int vmcb_getany(struct svm_softc *sc, int vcpu, int ident, uint64_t *val);
+int vmcb_setany(struct svm_softc *sc, int vcpu, int ident, uint64_t val);
+int vmcb_snapshot_desc(void *arg, int vcpu, int reg,
+ struct vm_snapshot_meta *meta);
+int vmcb_snapshot_any(struct svm_softc *sc, int vcpu, int ident,
+ struct vm_snapshot_meta *meta);
+#endif
#endif /* _KERNEL */
#endif /* _VMCB_H_ */
Index: sys/amd64/vmm/amd/vmcb.c
===================================================================
--- sys/amd64/vmm/amd/vmcb.c
+++ sys/amd64/vmm/amd/vmcb.c
@@ -29,12 +29,15 @@
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
+#include "opt_bhyve_snapshot.h"
+
#include <sys/param.h>
#include <sys/systm.h>
#include <machine/segments.h>
#include <machine/specialreg.h>
#include <machine/vmm.h>
+#include <machine/vmm_snapshot.h>
#include "vmm_ktr.h"
@@ -452,3 +455,106 @@
return (0);
}
+
+#ifdef BHYVE_SNAPSHOT
+int
+vmcb_getany(struct svm_softc *sc, int vcpu, int ident, uint64_t *val)
+{
+ int error = 0;
+
+ if (vcpu < 0 || vcpu >= VM_MAXCPU) {
+ error = EINVAL;
+ goto err;
+ }
+
+ if (ident >= VM_REG_LAST) {
+ error = EINVAL;
+ goto err;
+ }
+
+ error = vm_get_register(sc->vm, vcpu, ident, val);
+
+err:
+ return (error);
+}
+
+int
+vmcb_setany(struct svm_softc *sc, int vcpu, int ident, uint64_t val)
+{
+ int error = 0;
+
+ if (vcpu < 0 || vcpu >= VM_MAXCPU) {
+ error = EINVAL;
+ goto err;
+ }
+
+ if (ident >= VM_REG_LAST) {
+ error = EINVAL;
+ goto err;
+ }
+
+ error = vm_set_register(sc->vm, vcpu, ident, val);
+
+err:
+ return (error);
+}
+
+int
+vmcb_snapshot_desc(void *arg, int vcpu, int reg, struct vm_snapshot_meta *meta)
+{
+ int ret;
+ struct seg_desc desc;
+
+ if (meta->op == VM_SNAPSHOT_SAVE) {
+ ret = vmcb_getdesc(arg, vcpu, reg, &desc);
+ if (ret != 0)
+ goto done;
+
+ SNAPSHOT_VAR_OR_LEAVE(desc.base, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(desc.limit, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(desc.access, meta, ret, done);
+ } else if (meta->op == VM_SNAPSHOT_RESTORE) {
+ SNAPSHOT_VAR_OR_LEAVE(desc.base, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(desc.limit, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(desc.access, meta, ret, done);
+
+ ret = vmcb_setdesc(arg, vcpu, reg, &desc);
+ if (ret != 0)
+ goto done;
+ } else {
+ ret = EINVAL;
+ goto done;
+ }
+
+done:
+ return (ret);
+}
+
+int
+vmcb_snapshot_any(struct svm_softc *sc, int vcpu, int ident,
+ struct vm_snapshot_meta *meta)
+{
+ int ret;
+ uint64_t val;
+
+ if (meta->op == VM_SNAPSHOT_SAVE) {
+ ret = vmcb_getany(sc, vcpu, ident, &val);
+ if (ret != 0)
+ goto done;
+
+ SNAPSHOT_VAR_OR_LEAVE(val, meta, ret, done);
+ } else if (meta->op == VM_SNAPSHOT_RESTORE) {
+ SNAPSHOT_VAR_OR_LEAVE(val, meta, ret, done);
+
+ ret = vmcb_setany(sc, vcpu, ident, val);
+ if (ret != 0)
+ goto done;
+ } else {
+ ret = EINVAL;
+ goto done;
+ }
+
+done:
+ return (ret);
+}
+#endif
Index: sys/amd64/vmm/intel/vmcs.h
===================================================================
--- sys/amd64/vmm/intel/vmcs.h
+++ sys/amd64/vmm/intel/vmcs.h
@@ -32,6 +32,9 @@
#define _VMCS_H_
#ifdef _KERNEL
+
+struct vm_snapshot_meta;
+
struct vmcs {
uint32_t identifier;
uint32_t abort_code;
@@ -55,6 +58,16 @@
struct seg_desc *desc);
int vmcs_setdesc(struct vmcs *vmcs, int running, int ident,
struct seg_desc *desc);
+#ifdef BHYVE_SNAPSHOT
+int vmcs_getany(struct vmcs *vmcs, int running, int ident, uint64_t *val);
+int vmcs_setany(struct vmcs *vmcs, int running, int ident, uint64_t val);
+int vmcs_snapshot_reg(struct vmcs *vmcs, int running, int ident,
+ struct vm_snapshot_meta *meta);
+int vmcs_snapshot_desc(struct vmcs *vmcs, int running, int seg,
+ struct vm_snapshot_meta *meta);
+int vmcs_snapshot_any(struct vmcs *vmcs, int running, int ident,
+ struct vm_snapshot_meta *meta);
+#endif
/*
* Avoid header pollution caused by inline use of 'vtophys()' in vmx_cpufunc.h
Index: sys/amd64/vmm/intel/vmcs.c
===================================================================
--- sys/amd64/vmm/intel/vmcs.c
+++ sys/amd64/vmm/intel/vmcs.c
@@ -28,6 +28,7 @@
* $FreeBSD$
*/
+#include "opt_bhyve_snapshot.h"
#include "opt_ddb.h"
#include <sys/cdefs.h>
@@ -43,6 +44,7 @@
#include <machine/segments.h>
#include <machine/vmm.h>
+#include <machine/vmm_snapshot.h>
#include "vmm_host.h"
#include "vmx_cpufunc.h"
#include "vmcs.h"
@@ -428,6 +430,128 @@
return (error);
}
+#ifdef BHYVE_SNAPSHOT
+int
+vmcs_getany(struct vmcs *vmcs, int running, int ident, uint64_t *val)
+{
+ int error;
+
+ if (!running)
+ VMPTRLD(vmcs);
+
+ error = vmread(ident, val);
+
+ if (!running)
+ VMCLEAR(vmcs);
+
+ return (error);
+}
+
+int
+vmcs_setany(struct vmcs *vmcs, int running, int ident, uint64_t val)
+{
+ int error;
+
+ if (!running)
+ VMPTRLD(vmcs);
+
+ error = vmwrite(ident, val);
+
+ if (!running)
+ VMCLEAR(vmcs);
+
+ return (error);
+}
+
+int
+vmcs_snapshot_reg(struct vmcs *vmcs, int running, int ident,
+ struct vm_snapshot_meta *meta)
+{
+ int ret;
+ uint64_t val;
+
+ if (meta->op == VM_SNAPSHOT_SAVE) {
+ ret = vmcs_getreg(vmcs, running, ident, &val);
+ if (ret != 0)
+ goto done;
+
+ SNAPSHOT_VAR_OR_LEAVE(val, meta, ret, done);
+ } else if (meta->op == VM_SNAPSHOT_RESTORE) {
+ SNAPSHOT_VAR_OR_LEAVE(val, meta, ret, done);
+
+ ret = vmcs_setreg(vmcs, running, ident, val);
+ if (ret != 0)
+ goto done;
+ } else {
+ ret = EINVAL;
+ goto done;
+ }
+
+done:
+ return (ret);
+}
+
+int
+vmcs_snapshot_desc(struct vmcs *vmcs, int running, int seg,
+ struct vm_snapshot_meta *meta)
+{
+ int ret;
+ struct seg_desc desc;
+
+ if (meta->op == VM_SNAPSHOT_SAVE) {
+ ret = vmcs_getdesc(vmcs, running, seg, &desc);
+ if (ret != 0)
+ goto done;
+
+ SNAPSHOT_VAR_OR_LEAVE(desc.base, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(desc.limit, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(desc.access, meta, ret, done);
+ } else if (meta->op == VM_SNAPSHOT_RESTORE) {
+ SNAPSHOT_VAR_OR_LEAVE(desc.base, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(desc.limit, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(desc.access, meta, ret, done);
+
+ ret = vmcs_setdesc(vmcs, running, seg, &desc);
+ if (ret != 0)
+ goto done;
+ } else {
+ ret = EINVAL;
+ goto done;
+ }
+
+done:
+ return (ret);
+}
+
+int
+vmcs_snapshot_any(struct vmcs *vmcs, int running, int ident,
+ struct vm_snapshot_meta *meta)
+{
+ int ret;
+ uint64_t val;
+
+ if (meta->op == VM_SNAPSHOT_SAVE) {
+ ret = vmcs_getany(vmcs, running, ident, &val);
+ if (ret != 0)
+ goto done;
+
+ SNAPSHOT_VAR_OR_LEAVE(val, meta, ret, done);
+ } else if (meta->op == VM_SNAPSHOT_RESTORE) {
+ SNAPSHOT_VAR_OR_LEAVE(val, meta, ret, done);
+
+ ret = vmcs_setany(vmcs, running, ident, val);
+ if (ret != 0)
+ goto done;
+ } else {
+ ret = EINVAL;
+ goto done;
+ }
+
+done:
+ return (ret);
+}
+#endif
+
#ifdef DDB
extern int vmxon_enabled[];
Index: sys/amd64/vmm/intel/vmx.c
===================================================================
--- sys/amd64/vmm/intel/vmx.c
+++ sys/amd64/vmm/intel/vmx.c
@@ -32,6 +32,8 @@
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
+#include "opt_bhyve_snapshot.h"
+
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/smp.h>
@@ -56,6 +58,8 @@
#include <machine/vmm.h>
#include <machine/vmm_dev.h>
#include <machine/vmm_instruction_emul.h>
+#include <machine/vmm_snapshot.h>
+
#include "vmm_lapic.h"
#include "vmm_host.h"
#include "vmm_ioport.h"
@@ -288,6 +292,9 @@
static int vmx_getreg(void *arg, int vcpu, int reg, uint64_t *retval);
static int vmxctx_setreg(struct vmxctx *vmxctx, int reg, uint64_t val);
static void vmx_inject_pir(struct vlapic *vlapic);
+#ifdef BHYVE_SNAPSHOT
+static int vmx_restore_tsc(void *arg, int vcpu, uint64_t now);
+#endif
#ifdef KTR
static const char *
@@ -1281,7 +1288,10 @@
}
error = vmwrite(VMCS_TSC_OFFSET, offset);
-
+#ifdef BHYVE_SNAPSHOT
+ if (error == 0)
+ error = vm_set_tsc_offset(vmx->vm, vcpu, offset);
+#endif
return (error);
}
@@ -3789,6 +3799,153 @@
free(vlapic, M_VLAPIC);
}
+#ifdef BHYVE_SNAPSHOT
+static int
+vmx_snapshot_vmi(void *arg, struct vm_snapshot_meta *meta)
+{
+ struct vmx *vmx;
+ struct vmxctx *vmxctx;
+ int i;
+ int ret;
+
+ vmx = arg;
+
+ KASSERT(vmx != NULL, ("%s: arg was NULL", __func__));
+
+ for (i = 0; i < VM_MAXCPU; i++) {
+ SNAPSHOT_BUF_OR_LEAVE(vmx->guest_msrs[i],
+ sizeof(vmx->guest_msrs[i]), meta, ret, done);
+
+ vmxctx = &vmx->ctx[i];
+ SNAPSHOT_VAR_OR_LEAVE(vmxctx->guest_rdi, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vmxctx->guest_rsi, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vmxctx->guest_rdx, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vmxctx->guest_rcx, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vmxctx->guest_r8, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vmxctx->guest_r9, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vmxctx->guest_rax, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vmxctx->guest_rbx, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vmxctx->guest_rbp, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vmxctx->guest_r10, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vmxctx->guest_r11, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vmxctx->guest_r12, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vmxctx->guest_r13, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vmxctx->guest_r14, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vmxctx->guest_r15, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vmxctx->guest_cr2, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vmxctx->guest_dr0, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vmxctx->guest_dr1, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vmxctx->guest_dr2, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vmxctx->guest_dr3, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vmxctx->guest_dr6, meta, ret, done);
+ }
+
+done:
+ return (ret);
+}
+
+static int
+vmx_snapshot_vmcx(void *arg, struct vm_snapshot_meta *meta, int vcpu)
+{
+ struct vmcs *vmcs;
+ struct vmx *vmx;
+ int err, run, hostcpu;
+
+ vmx = (struct vmx *)arg;
+ err = 0;
+
+ KASSERT(arg != NULL, ("%s: arg was NULL", __func__));
+ vmcs = &vmx->vmcs[vcpu];
+
+ run = vcpu_is_running(vmx->vm, vcpu, &hostcpu);
+ if (run && hostcpu != curcpu) {
+ printf("%s: %s%d is running", __func__, vm_name(vmx->vm), vcpu);
+ return (EINVAL);
+ }
+
+ err += vmcs_snapshot_reg(vmcs, run, VM_REG_GUEST_CR0, meta);
+ err += vmcs_snapshot_reg(vmcs, run, VM_REG_GUEST_CR3, meta);
+ err += vmcs_snapshot_reg(vmcs, run, VM_REG_GUEST_CR4, meta);
+ err += vmcs_snapshot_reg(vmcs, run, VM_REG_GUEST_DR7, meta);
+ err += vmcs_snapshot_reg(vmcs, run, VM_REG_GUEST_RSP, meta);
+ err += vmcs_snapshot_reg(vmcs, run, VM_REG_GUEST_RIP, meta);
+ err += vmcs_snapshot_reg(vmcs, run, VM_REG_GUEST_RFLAGS, meta);
+
+ /* Guest segments */
+ err += vmcs_snapshot_reg(vmcs, run, VM_REG_GUEST_ES, meta);
+ err += vmcs_snapshot_desc(vmcs, run, VM_REG_GUEST_ES, meta);
+
+ err += vmcs_snapshot_reg(vmcs, run, VM_REG_GUEST_CS, meta);
+ err += vmcs_snapshot_desc(vmcs, run, VM_REG_GUEST_CS, meta);
+
+ err += vmcs_snapshot_reg(vmcs, run, VM_REG_GUEST_SS, meta);
+ err += vmcs_snapshot_desc(vmcs, run, VM_REG_GUEST_SS, meta);
+
+ err += vmcs_snapshot_reg(vmcs, run, VM_REG_GUEST_DS, meta);
+ err += vmcs_snapshot_desc(vmcs, run, VM_REG_GUEST_DS, meta);
+
+ err += vmcs_snapshot_reg(vmcs, run, VM_REG_GUEST_FS, meta);
+ err += vmcs_snapshot_desc(vmcs, run, VM_REG_GUEST_FS, meta);
+
+ err += vmcs_snapshot_reg(vmcs, run, VM_REG_GUEST_GS, meta);
+ err += vmcs_snapshot_desc(vmcs, run, VM_REG_GUEST_GS, meta);
+
+ err += vmcs_snapshot_reg(vmcs, run, VM_REG_GUEST_TR, meta);
+ err += vmcs_snapshot_desc(vmcs, run, VM_REG_GUEST_TR, meta);
+
+ err += vmcs_snapshot_reg(vmcs, run, VM_REG_GUEST_LDTR, meta);
+ err += vmcs_snapshot_desc(vmcs, run, VM_REG_GUEST_LDTR, meta);
+
+ err += vmcs_snapshot_reg(vmcs, run, VM_REG_GUEST_EFER, meta);
+
+ err += vmcs_snapshot_desc(vmcs, run, VM_REG_GUEST_IDTR, meta);
+ err += vmcs_snapshot_desc(vmcs, run, VM_REG_GUEST_GDTR, meta);
+
+ /* Guest page tables */
+ err += vmcs_snapshot_reg(vmcs, run, VM_REG_GUEST_PDPTE0, meta);
+ err += vmcs_snapshot_reg(vmcs, run, VM_REG_GUEST_PDPTE1, meta);
+ err += vmcs_snapshot_reg(vmcs, run, VM_REG_GUEST_PDPTE2, meta);
+ err += vmcs_snapshot_reg(vmcs, run, VM_REG_GUEST_PDPTE3, meta);
+
+ /* Other guest state */
+ err += vmcs_snapshot_any(vmcs, run, VMCS_GUEST_IA32_SYSENTER_CS, meta);
+ err += vmcs_snapshot_any(vmcs, run, VMCS_GUEST_IA32_SYSENTER_ESP, meta);
+ err += vmcs_snapshot_any(vmcs, run, VMCS_GUEST_IA32_SYSENTER_EIP, meta);
+ err += vmcs_snapshot_any(vmcs, run, VMCS_GUEST_INTERRUPTIBILITY, meta);
+ err += vmcs_snapshot_any(vmcs, run, VMCS_GUEST_ACTIVITY, meta);
+ err += vmcs_snapshot_any(vmcs, run, VMCS_ENTRY_CTLS, meta);
+ err += vmcs_snapshot_any(vmcs, run, VMCS_EXIT_CTLS, meta);
+
+ return (err);
+}
+
+static int
+vmx_restore_tsc(void *arg, int vcpu, uint64_t offset)
+{
+ struct vmcs *vmcs;
+ struct vmx *vmx = (struct vmx *)arg;
+ int error, running, hostcpu;
+
+ KASSERT(arg != NULL, ("%s: arg was NULL", __func__));
+ vmcs = &vmx->vmcs[vcpu];
+
+ running = vcpu_is_running(vmx->vm, vcpu, &hostcpu);
+ if (running && hostcpu != curcpu) {
+ printf("%s: %s%d is running", __func__, vm_name(vmx->vm), vcpu);
+ return (EINVAL);
+ }
+
+ if (!running)
+ VMPTRLD(vmcs);
+
+ error = vmx_set_tsc_offset(vmx, vcpu, offset);
+
+ if (!running)
+ VMCLEAR(vmcs);
+ return (error);
+}
+#endif
+
struct vmm_ops vmm_ops_intel = {
.init = vmx_init,
.cleanup = vmx_cleanup,
@@ -3806,4 +3963,9 @@
.vmspace_free = ept_vmspace_free,
.vlapic_init = vmx_vlapic_init,
.vlapic_cleanup = vmx_vlapic_cleanup,
+#ifdef BHYVE_SNAPSHOT
+ .vmsnapshot = vmx_snapshot_vmi,
+ .vmcx_snapshot = vmx_snapshot_vmcx,
+ .vm_restore_tsc = vmx_restore_tsc,
+#endif
};
Index: sys/amd64/vmm/io/vatpic.h
===================================================================
--- sys/amd64/vmm/io/vatpic.h
+++ sys/amd64/vmm/io/vatpic.h
@@ -36,6 +36,8 @@
#define IO_ELCR1 0x4d0
#define IO_ELCR2 0x4d1
+struct vm_snapshot_meta;
+
struct vatpic *vatpic_init(struct vm *vm);
void vatpic_cleanup(struct vatpic *vatpic);
@@ -54,4 +56,8 @@
void vatpic_pending_intr(struct vm *vm, int *vecptr);
void vatpic_intr_accepted(struct vm *vm, int vector);
+#ifdef BHYVE_SNAPSHOT
+int vatpic_snapshot(struct vatpic *vatpic, struct vm_snapshot_meta *meta);
+#endif
+
#endif /* _VATPIC_H_ */
Index: sys/amd64/vmm/io/vatpic.c
===================================================================
--- sys/amd64/vmm/io/vatpic.c
+++ sys/amd64/vmm/io/vatpic.c
@@ -29,6 +29,8 @@
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
+#include "opt_bhyve_snapshot.h"
+
#include <sys/param.h>
#include <sys/types.h>
#include <sys/queue.h>
@@ -42,6 +44,7 @@
#include <dev/ic/i8259.h>
#include <machine/vmm.h>
+#include <machine/vmm_snapshot.h>
#include "vmm_ktr.h"
#include "vmm_lapic.h"
@@ -808,3 +811,43 @@
{
free(vatpic, M_VATPIC);
}
+
+#ifdef BHYVE_SNAPSHOT
+int
+vatpic_snapshot(struct vatpic *vatpic, struct vm_snapshot_meta *meta)
+{
+ int ret;
+ int i;
+ struct atpic *atpic;
+
+ for (i = 0; i < nitems(vatpic->atpic); i++) {
+ atpic = &vatpic->atpic[i];
+
+ SNAPSHOT_VAR_OR_LEAVE(atpic->ready, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(atpic->icw_num, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(atpic->rd_cmd_reg, meta, ret, done);
+
+ SNAPSHOT_VAR_OR_LEAVE(atpic->aeoi, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(atpic->poll, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(atpic->rotate, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(atpic->sfn, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(atpic->irq_base, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(atpic->request, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(atpic->service, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(atpic->mask, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(atpic->smm, meta, ret, done);
+
+ SNAPSHOT_BUF_OR_LEAVE(atpic->acnt, sizeof(atpic->acnt),
+ meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(atpic->lowprio, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(atpic->intr_raised, meta, ret, done);
+
+ }
+
+ SNAPSHOT_BUF_OR_LEAVE(vatpic->elc, sizeof(vatpic->elc),
+ meta, ret, done);
+
+done:
+ return (ret);
+}
+#endif
Index: sys/amd64/vmm/io/vatpit.h
===================================================================
--- sys/amd64/vmm/io/vatpit.h
+++ sys/amd64/vmm/io/vatpit.h
@@ -36,6 +36,8 @@
#define NMISC_PORT 0x61
+struct vm_snapshot_meta;
+
struct vatpit *vatpit_init(struct vm *vm);
void vatpit_cleanup(struct vatpit *vatpit);
@@ -43,5 +45,8 @@
uint32_t *eax);
int vatpit_nmisc_handler(struct vm *vm, int vcpuid, bool in, int port,
int bytes, uint32_t *eax);
+#ifdef BHYVE_SNAPSHOT
+int vatpit_snapshot(struct vatpit *vatpit, struct vm_snapshot_meta *meta);
+#endif
#endif /* _VATPIT_H_ */
Index: sys/amd64/vmm/io/vatpit.c
===================================================================
--- sys/amd64/vmm/io/vatpit.c
+++ sys/amd64/vmm/io/vatpit.c
@@ -29,6 +29,8 @@
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
+#include "opt_bhyve_snapshot.h"
+
#include <sys/param.h>
#include <sys/types.h>
#include <sys/queue.h>
@@ -39,6 +41,7 @@
#include <sys/systm.h>
#include <machine/vmm.h>
+#include <machine/vmm_snapshot.h>
#include "vmm_ktr.h"
#include "vatpic.h"
@@ -472,3 +475,42 @@
free(vatpit, M_VATPIT);
}
+
+#ifdef BHYVE_SNAPSHOT
+int
+vatpit_snapshot(struct vatpit *vatpit, struct vm_snapshot_meta *meta)
+{
+ int ret;
+ int i;
+ struct channel *channel;
+
+ SNAPSHOT_VAR_OR_LEAVE(vatpit->freq_bt.sec, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vatpit->freq_bt.frac, meta, ret, done);
+
+ /* properly restore timers; they will NOT work currently */
+ printf("%s: snapshot restore does not reset timers!\r\n", __func__);
+
+ for (i = 0; i < nitems(vatpit->channel); i++) {
+ channel = &vatpit->channel[i];
+
+ SNAPSHOT_VAR_OR_LEAVE(channel->mode, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(channel->initial, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(channel->now_bt.sec, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(channel->now_bt.frac, meta, ret, done);
+ SNAPSHOT_BUF_OR_LEAVE(channel->cr, sizeof(channel->cr),
+ meta, ret, done);
+ SNAPSHOT_BUF_OR_LEAVE(channel->ol, sizeof(channel->ol),
+ meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(channel->slatched, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(channel->status, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(channel->crbyte, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(channel->frbyte, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(channel->callout_bt.sec, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(channel->callout_bt.frac, meta, ret,
+ done);
+ }
+
+done:
+ return (ret);
+}
+#endif
Index: sys/amd64/vmm/io/vhpet.h
===================================================================
--- sys/amd64/vmm/io/vhpet.h
+++ sys/amd64/vmm/io/vhpet.h
@@ -35,6 +35,8 @@
#define VHPET_BASE 0xfed00000
#define VHPET_SIZE 1024
+struct vm_snapshot_meta;
+
struct vhpet *vhpet_init(struct vm *vm);
void vhpet_cleanup(struct vhpet *vhpet);
int vhpet_mmio_write(void *vm, int vcpuid, uint64_t gpa, uint64_t val,
@@ -42,5 +44,9 @@
int vhpet_mmio_read(void *vm, int vcpuid, uint64_t gpa, uint64_t *val,
int size, void *arg);
int vhpet_getcap(struct vm_hpet_cap *cap);
+#ifdef BHYVE_SNAPSHOT
+int vhpet_snapshot(struct vhpet *vhpet, struct vm_snapshot_meta *meta);
+int vhpet_restore_time(struct vhpet *vhpet);
+#endif
#endif /* _VHPET_H_ */
Index: sys/amd64/vmm/io/vhpet.c
===================================================================
--- sys/amd64/vmm/io/vhpet.c
+++ sys/amd64/vmm/io/vhpet.c
@@ -32,6 +32,8 @@
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
+#include "opt_bhyve_snapshot.h"
+
#include <sys/param.h>
#include <sys/lock.h>
#include <sys/mutex.h>
@@ -43,6 +45,7 @@
#include <machine/vmm.h>
#include <machine/vmm_dev.h>
+#include <machine/vmm_snapshot.h>
#include "vmm_lapic.h"
#include "vatpic.h"
@@ -761,3 +764,49 @@
cap->capabilities = vhpet_capabilities();
return (0);
}
+
+#ifdef BHYVE_SNAPSHOT
+int
+vhpet_snapshot(struct vhpet *vhpet, struct vm_snapshot_meta *meta)
+{
+ int i, ret;
+ uint32_t countbase;
+
+ SNAPSHOT_VAR_OR_LEAVE(vhpet->freq_sbt, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vhpet->config, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vhpet->isr, meta, ret, done);
+
+ /* at restore time the countbase should have the value it had when the
+ * snapshot was created; since the value is not directly kept in
+ * vhpet->countbase, but rather computed relative to the current system
+ * uptime using countbase_sbt, save the value retured by vhpet_counter
+ */
+ if (meta->op == VM_SNAPSHOT_SAVE)
+ countbase = vhpet_counter(vhpet, NULL);
+ SNAPSHOT_VAR_OR_LEAVE(countbase, meta, ret, done);
+ if (meta->op == VM_SNAPSHOT_RESTORE)
+ vhpet->countbase = countbase;
+
+ for (i = 0; i < nitems(vhpet->timer); i++) {
+ SNAPSHOT_VAR_OR_LEAVE(vhpet->timer[i].cap_config,
+ meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vhpet->timer[i].msireg, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vhpet->timer[i].compval, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vhpet->timer[i].comprate, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vhpet->timer[i].callout_sbt,
+ meta, ret, done);
+ }
+
+done:
+ return (ret);
+}
+
+int
+vhpet_restore_time(struct vhpet *vhpet)
+{
+ if (vhpet_counter_enabled(vhpet))
+ vhpet_start_counting(vhpet);
+
+ return (0);
+}
+#endif
Index: sys/amd64/vmm/io/vioapic.h
===================================================================
--- sys/amd64/vmm/io/vioapic.h
+++ sys/amd64/vmm/io/vioapic.h
@@ -32,6 +32,8 @@
#ifndef _VIOAPIC_H_
#define _VIOAPIC_H_
+struct vm_snapshot_meta;
+
#define VIOAPIC_BASE 0xFEC00000
#define VIOAPIC_SIZE 4096
@@ -49,4 +51,9 @@
int vioapic_pincount(struct vm *vm);
void vioapic_process_eoi(struct vm *vm, int vcpuid, int vector);
+#ifdef BHYVE_SNAPSHOT
+int vioapic_snapshot(struct vioapic *vioapic,
+ struct vm_snapshot_meta *meta);
+#endif
+
#endif
Index: sys/amd64/vmm/io/vioapic.c
===================================================================
--- sys/amd64/vmm/io/vioapic.c
+++ sys/amd64/vmm/io/vioapic.c
@@ -32,6 +32,8 @@
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
+#include "opt_bhyve_snapshot.h"
+
#include <sys/param.h>
#include <sys/queue.h>
#include <sys/lock.h>
@@ -42,6 +44,7 @@
#include <x86/apicreg.h>
#include <machine/vmm.h>
+#include <machine/vmm_snapshot.h>
#include "vmm_ktr.h"
#include "vmm_lapic.h"
@@ -499,3 +502,22 @@
return (REDIR_ENTRIES);
}
+
+#ifdef BHYVE_SNAPSHOT
+int
+vioapic_snapshot(struct vioapic *vioapic, struct vm_snapshot_meta *meta)
+{
+ int ret;
+ int i;
+
+ SNAPSHOT_VAR_OR_LEAVE(vioapic->ioregsel, meta, ret, done);
+
+ for (i = 0; i < nitems(vioapic->rtbl); i++) {
+ SNAPSHOT_VAR_OR_LEAVE(vioapic->rtbl[i].reg, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vioapic->rtbl[i].acnt, meta, ret, done);
+ }
+
+done:
+ return (ret);
+}
+#endif
Index: sys/amd64/vmm/io/vlapic.h
===================================================================
--- sys/amd64/vmm/io/vlapic.h
+++ sys/amd64/vmm/io/vlapic.h
@@ -32,6 +32,7 @@
#define _VLAPIC_H_
struct vm;
+struct vm_snapshot_meta;
enum x2apic_state;
int vlapic_write(struct vlapic *vlapic, int mmio_access, uint64_t offset,
@@ -107,4 +108,9 @@
void vlapic_dcr_write_handler(struct vlapic *vlapic);
void vlapic_lvt_write_handler(struct vlapic *vlapic, uint32_t offset);
void vlapic_self_ipi_handler(struct vlapic *vlapic, uint64_t val);
+
+#ifdef BHYVE_SNAPSHOT
+int vlapic_snapshot(struct vm *vm, struct vm_snapshot_meta *meta);
+#endif
+
#endif /* _VLAPIC_H_ */
Index: sys/amd64/vmm/io/vlapic.c
===================================================================
--- sys/amd64/vmm/io/vlapic.c
+++ sys/amd64/vmm/io/vlapic.c
@@ -32,6 +32,8 @@
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
+#include "opt_bhyve_snapshot.h"
+
#include <sys/param.h>
#include <sys/lock.h>
#include <sys/kernel.h>
@@ -47,6 +49,7 @@
#include <machine/smp.h>
#include <machine/vmm.h>
+#include <machine/vmm_snapshot.h>
#include "vmm_lapic.h"
#include "vmm_ktr.h"
@@ -1643,3 +1646,106 @@
VLAPIC_CTR1(vlapic, "vector %d set to level-triggered", vector);
vlapic_set_tmr(vlapic, vector, true);
}
+
+#ifdef BHYVE_SNAPSHOT
+static void
+vlapic_reset_callout(struct vlapic *vlapic, uint32_t ccr)
+{
+ /* The implementation is similar to the one in the
+ * `vlapic_icrtmr_write_handler` function
+ */
+ sbintime_t sbt;
+ struct bintime bt;
+
+ VLAPIC_TIMER_LOCK(vlapic);
+
+ bt = vlapic->timer_freq_bt;
+ bintime_mul(&bt, ccr);
+
+ if (ccr != 0) {
+ binuptime(&vlapic->timer_fire_bt);
+ bintime_add(&vlapic->timer_fire_bt, &bt);
+
+ sbt = bttosbt(bt);
+ callout_reset_sbt(&vlapic->callout, sbt, 0,
+ vlapic_callout_handler, vlapic, 0);
+ } else {
+ /* even if the CCR was 0, periodic timers should be reset */
+ if (vlapic_periodic_timer(vlapic)) {
+ binuptime(&vlapic->timer_fire_bt);
+ bintime_add(&vlapic->timer_fire_bt,
+ &vlapic->timer_period_bt);
+ sbt = bttosbt(vlapic->timer_period_bt);
+
+ callout_stop(&vlapic->callout);
+ callout_reset_sbt(&vlapic->callout, sbt, 0,
+ vlapic_callout_handler, vlapic, 0);
+ }
+ }
+
+ VLAPIC_TIMER_UNLOCK(vlapic);
+}
+
+int
+vlapic_snapshot(struct vm *vm, struct vm_snapshot_meta *meta)
+{
+ int i, ret;
+ struct vlapic *vlapic;
+ struct LAPIC *lapic;
+ uint32_t ccr;
+
+ KASSERT(vm != NULL, ("%s: arg was NULL", __func__));
+
+ ret = 0;
+
+ for (i = 0; i < VM_MAXCPU; i++) {
+ vlapic = vm_lapic(vm, i);
+
+ /* snapshot the page first; timer period depends on icr_timer */
+ lapic = vlapic->apic_page;
+ SNAPSHOT_BUF_OR_LEAVE(lapic, PAGE_SIZE, meta, ret, done);
+
+ SNAPSHOT_VAR_OR_LEAVE(vlapic->esr_pending, meta, ret, done);
+
+ SNAPSHOT_VAR_OR_LEAVE(vlapic->timer_freq_bt.sec,
+ meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vlapic->timer_freq_bt.frac,
+ meta, ret, done);
+
+ /*
+ * Timer period is equal to 'icr_timer' ticks at a frequency of
+ * 'timer_freq_bt'.
+ */
+ if (meta->op == VM_SNAPSHOT_RESTORE) {
+ vlapic->timer_period_bt = vlapic->timer_freq_bt;
+ bintime_mul(&vlapic->timer_period_bt, lapic->icr_timer);
+ }
+
+ SNAPSHOT_BUF_OR_LEAVE(vlapic->isrvec_stk,
+ sizeof(vlapic->isrvec_stk),
+ meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vlapic->isrvec_stk_top, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vlapic->boot_state, meta, ret, done);
+
+ SNAPSHOT_BUF_OR_LEAVE(vlapic->lvt_last,
+ sizeof(vlapic->lvt_last),
+ meta, ret, done);
+
+ if (meta->op == VM_SNAPSHOT_SAVE)
+ ccr = vlapic_get_ccr(vlapic);
+
+ SNAPSHOT_VAR_OR_LEAVE(ccr, meta, ret, done);
+
+ if (meta->op == VM_SNAPSHOT_RESTORE) {
+ /* Reset the value of the 'timer_fire_bt' and the vlapic
+ * callout based on the value of the current count
+ * register saved when the VM snapshot was created
+ */
+ vlapic_reset_callout(vlapic, ccr);
+ }
+ }
+
+done:
+ return (ret);
+}
+#endif
Index: sys/amd64/vmm/io/vpmtmr.h
===================================================================
--- sys/amd64/vmm/io/vpmtmr.h
+++ sys/amd64/vmm/io/vpmtmr.h
@@ -34,6 +34,7 @@
#define IO_PMTMR 0x408
struct vpmtmr;
+struct vm_snapshot_meta;
struct vpmtmr *vpmtmr_init(struct vm *vm);
void vpmtmr_cleanup(struct vpmtmr *pmtmr);
@@ -41,4 +42,8 @@
int vpmtmr_handler(struct vm *vm, int vcpuid, bool in, int port, int bytes,
uint32_t *val);
+#ifdef BHYVE_SNAPSHOT
+int vpmtmr_snapshot(struct vpmtmr *vpmtmr, struct vm_snapshot_meta *meta);
+#endif
+
#endif
Index: sys/amd64/vmm/io/vpmtmr.c
===================================================================
--- sys/amd64/vmm/io/vpmtmr.c
+++ sys/amd64/vmm/io/vpmtmr.c
@@ -29,6 +29,8 @@
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
+#include "opt_bhyve_snapshot.h"
+
#include <sys/param.h>
#include <sys/queue.h>
#include <sys/kernel.h>
@@ -36,6 +38,7 @@
#include <sys/systm.h>
#include <machine/vmm.h>
+#include <machine/vmm_snapshot.h>
#include "vpmtmr.h"
@@ -103,3 +106,16 @@
return (0);
}
+
+#ifdef BHYVE_SNAPSHOT
+int
+vpmtmr_snapshot(struct vpmtmr *vpmtmr, struct vm_snapshot_meta *meta)
+{
+ int ret;
+
+ SNAPSHOT_VAR_OR_LEAVE(vpmtmr->baseval, meta, ret, done);
+
+done:
+ return (ret);
+}
+#endif
Index: sys/amd64/vmm/io/vrtc.h
===================================================================
--- sys/amd64/vmm/io/vrtc.h
+++ sys/amd64/vmm/io/vrtc.h
@@ -34,6 +34,7 @@
#include <isa/isareg.h>
struct vrtc;
+struct vm_snapshot_meta;
struct vrtc *vrtc_init(struct vm *vm);
void vrtc_cleanup(struct vrtc *vrtc);
@@ -49,4 +50,8 @@
int vrtc_data_handler(struct vm *vm, int vcpuid, bool in, int port, int bytes,
uint32_t *val);
+#ifdef BHYVE_SNAPSHOT
+int vrtc_snapshot(struct vrtc *vrtc, struct vm_snapshot_meta *meta);
+#endif
+
#endif
Index: sys/amd64/vmm/io/vrtc.c
===================================================================
--- sys/amd64/vmm/io/vrtc.c
+++ sys/amd64/vmm/io/vrtc.c
@@ -29,6 +29,8 @@
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
+#include "opt_bhyve_snapshot.h"
+
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/queue.h>
@@ -40,6 +42,7 @@
#include <sys/sysctl.h>
#include <machine/vmm.h>
+#include <machine/vmm_snapshot.h>
#include <isa/rtc.h>
@@ -1019,3 +1022,45 @@
callout_drain(&vrtc->callout);
free(vrtc, M_VRTC);
}
+
+#ifdef BHYVE_SNAPSHOT
+int
+vrtc_snapshot(struct vrtc *vrtc, struct vm_snapshot_meta *meta)
+{
+ int ret;
+
+ VRTC_LOCK(vrtc);
+
+ SNAPSHOT_VAR_OR_LEAVE(vrtc->addr, meta, ret, done);
+ if (meta->op == VM_SNAPSHOT_RESTORE)
+ vrtc->base_uptime = sbinuptime();
+ SNAPSHOT_VAR_OR_LEAVE(vrtc->base_rtctime, meta, ret, done);
+
+ SNAPSHOT_VAR_OR_LEAVE(vrtc->rtcdev.sec, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vrtc->rtcdev.alarm_sec, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vrtc->rtcdev.min, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vrtc->rtcdev.alarm_min, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vrtc->rtcdev.hour, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vrtc->rtcdev.alarm_hour, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vrtc->rtcdev.day_of_week, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vrtc->rtcdev.day_of_month, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vrtc->rtcdev.month, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vrtc->rtcdev.year, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vrtc->rtcdev.reg_a, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vrtc->rtcdev.reg_b, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vrtc->rtcdev.reg_c, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vrtc->rtcdev.reg_d, meta, ret, done);
+ SNAPSHOT_BUF_OR_LEAVE(vrtc->rtcdev.nvram, sizeof(vrtc->rtcdev.nvram),
+ meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vrtc->rtcdev.century, meta, ret, done);
+ SNAPSHOT_BUF_OR_LEAVE(vrtc->rtcdev.nvram2, sizeof(vrtc->rtcdev.nvram2),
+ meta, ret, done);
+
+ vrtc_callout_reset(vrtc, vrtc_freq(vrtc));
+
+ VRTC_UNLOCK(vrtc);
+
+done:
+ return (ret);
+}
+#endif
Index: sys/amd64/vmm/vmm.c
===================================================================
--- sys/amd64/vmm/vmm.c
+++ sys/amd64/vmm/vmm.c
@@ -31,6 +31,8 @@
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
+#include "opt_bhyve_snapshot.h"
+
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
@@ -44,7 +46,7 @@
#include <sys/rwlock.h>
#include <sys/sched.h>
#include <sys/smp.h>
-#include <sys/systm.h>
+#include <sys/vnode.h>
#include <vm/vm.h>
#include <vm/vm_object.h>
@@ -53,6 +55,11 @@
#include <vm/vm_map.h>
#include <vm/vm_extern.h>
#include <vm/vm_param.h>
+#include <vm/vm_pager.h>
+#include <vm/vm_kern.h>
+#include <vm/vnode_pager.h>
+#include <vm/swap_pager.h>
+#include <vm/uma.h>
#include <machine/cpu.h>
#include <machine/pcb.h>
@@ -64,6 +71,7 @@
#include <machine/vmm.h>
#include <machine/vmm_dev.h>
#include <machine/vmm_instruction_emul.h>
+#include <machine/vmm_snapshot.h>
#include "vmm_ioport.h"
#include "vmm_ktr.h"
@@ -111,6 +119,7 @@
void *stats; /* (a,i) statistics */
struct vm_exit exitinfo; /* (x) exit reason and collateral */
uint64_t nextrip; /* (x) next instruction to execute */
+ uint64_t tsc_offset; /* (o) TSC offsetting */
};
#define vcpu_lock_initialized(v) mtx_initialized(&((v)->mtx))
@@ -204,6 +213,14 @@
(ops != NULL ? (*ops->vlapic_init)(vmi, vcpu) : NULL)
#define VLAPIC_CLEANUP(vmi, vlapic) \
(ops != NULL ? (*ops->vlapic_cleanup)(vmi, vlapic) : NULL)
+#ifdef BHYVE_SNAPSHOT
+#define VM_SNAPSHOT_VMI(vmi, meta) \
+ (ops != NULL ? (*ops->vmsnapshot)(vmi, meta) : ENXIO)
+#define VM_SNAPSHOT_VMCX(vmi, meta, vcpuid) \
+ (ops != NULL ? (*ops->vmcx_snapshot)(vmi, meta, vcpuid) : ENXIO)
+#define VM_RESTORE_TSC(vmi, vcpuid, offset) \
+ (ops != NULL ? (*ops->vm_restore_tsc)(vmi, vcpuid, offset) : ENXIO)
+#endif
#define fpu_start_emulating() load_cr0(rcr0() | CR0_TS)
#define fpu_stop_emulating() clts()
@@ -289,6 +306,7 @@
vcpu->hostcpu = NOCPU;
vcpu->guestfpu = fpu_save_area_alloc();
vcpu->stats = vmm_stat_alloc();
+ vcpu->tsc_offset = 0;
}
vcpu->vlapic = VLAPIC_INIT(vm->cookie, vcpu_id);
@@ -2715,3 +2733,177 @@
VMM_STAT_FUNC(VMM_MEM_RESIDENT, "Resident memory", vm_get_rescnt);
VMM_STAT_FUNC(VMM_MEM_WIRED, "Wired memory", vm_get_wiredcnt);
+
+#ifdef BHYVE_SNAPSHOT
+static int
+vm_snapshot_vcpus(struct vm *vm, struct vm_snapshot_meta *meta)
+{
+ int ret;
+ int i;
+ struct vcpu *vcpu;
+
+ for (i = 0; i < VM_MAXCPU; i++) {
+ vcpu = &vm->vcpu[i];
+
+ SNAPSHOT_VAR_OR_LEAVE(vcpu->x2apic_state, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vcpu->exitintinfo, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vcpu->exc_vector, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vcpu->exc_errcode_valid, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vcpu->exc_errcode, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vcpu->guest_xcr0, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vcpu->exitinfo, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vcpu->nextrip, meta, ret, done);
+ /* XXX we're cheating here, since the value of tsc_offset as
+ * saved here is actually the value of the guest's TSC value.
+ *
+ * It will be turned turned back into an actual offset when the
+ * TSC restore function is called
+ */
+ SNAPSHOT_VAR_OR_LEAVE(vcpu->tsc_offset, meta, ret, done);
+ }
+
+done:
+ return (ret);
+}
+
+static int
+vm_snapshot_vm(struct vm *vm, struct vm_snapshot_meta *meta)
+{
+ int ret;
+ int i;
+ uint64_t now;
+
+ ret = 0;
+ now = rdtsc();
+
+ if (meta->op == VM_SNAPSHOT_SAVE) {
+ /* XXX make tsc_offset take the value TSC proper as seen by the
+ * guest
+ */
+ for (i = 0; i < VM_MAXCPU; i++)
+ vm->vcpu[i].tsc_offset += now;
+ }
+
+ ret = vm_snapshot_vcpus(vm, meta);
+ if (ret != 0) {
+ printf("%s: failed to copy vm data to user buffer", __func__);
+ goto done;
+ }
+
+ if (meta->op == VM_SNAPSHOT_SAVE) {
+ /* XXX turn tsc_offset back into an offset; actual value is only
+ * required for restore; using it otherwise would be wrong
+ */
+ for (i = 0; i < VM_MAXCPU; i++)
+ vm->vcpu[i].tsc_offset -= now;
+ }
+
+done:
+ return (ret);
+}
+
+static int
+vm_snapshot_vmcx(struct vm *vm, struct vm_snapshot_meta *meta)
+{
+ int i, error;
+
+ error = 0;
+
+ for (i = 0; i < VM_MAXCPU; i++) {
+ error = VM_SNAPSHOT_VMCX(vm->cookie, meta, i);
+ if (error != 0) {
+ printf("%s: failed to snapshot vmcs/vmcb data for "
+ "vCPU: %d; error: %d\n", __func__, i, error);
+ goto done;
+ }
+ }
+
+done:
+ return (error);
+}
+
+/*
+ * Save kernel-side structures to user-space for snapshotting.
+ */
+int
+vm_snapshot_req(struct vm *vm, struct vm_snapshot_meta *meta)
+{
+ int ret = 0;
+
+ switch (meta->dev_req) {
+ case STRUCT_VMX:
+ ret = VM_SNAPSHOT_VMI(vm->cookie, meta);
+ break;
+ case STRUCT_VMCX:
+ ret = vm_snapshot_vmcx(vm, meta);
+ break;
+ case STRUCT_VM:
+ ret = vm_snapshot_vm(vm, meta);
+ break;
+ case STRUCT_VIOAPIC:
+ ret = vioapic_snapshot(vm_ioapic(vm), meta);
+ break;
+ case STRUCT_VLAPIC:
+ ret = vlapic_snapshot(vm, meta);
+ break;
+ case STRUCT_VHPET:
+ ret = vhpet_snapshot(vm_hpet(vm), meta);
+ break;
+ case STRUCT_VATPIC:
+ ret = vatpic_snapshot(vm_atpic(vm), meta);
+ break;
+ case STRUCT_VATPIT:
+ ret = vatpit_snapshot(vm_atpit(vm), meta);
+ break;
+ case STRUCT_VPMTMR:
+ ret = vpmtmr_snapshot(vm_pmtmr(vm), meta);
+ break;
+ case STRUCT_VRTC:
+ ret = vrtc_snapshot(vm_rtc(vm), meta);
+ break;
+ default:
+ printf("%s: failed to find the requested type %#x\n",
+ __func__, meta->dev_req);
+ ret = (EINVAL);
+ }
+ return (ret);
+}
+
+int
+vm_set_tsc_offset(struct vm *vm, int vcpuid, uint64_t offset)
+{
+ struct vcpu *vcpu;
+
+ if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
+ return (EINVAL);
+
+ vcpu = &vm->vcpu[vcpuid];
+ vcpu->tsc_offset = offset;
+
+ return (0);
+}
+
+int
+vm_restore_time(struct vm *vm)
+{
+ int error, i;
+ uint64_t now;
+ struct vcpu *vcpu;
+
+ now = rdtsc();
+
+ error = vhpet_restore_time(vm_hpet(vm));
+ if (error)
+ return (error);
+
+ for (i = 0; i < nitems(vm->vcpu); i++) {
+ vcpu = &vm->vcpu[i];
+
+ error = VM_RESTORE_TSC(vm->cookie, i, vcpu->tsc_offset - now);
+ if (error)
+ return (error);
+ }
+
+ return (0);
+}
+#endif
Index: sys/amd64/vmm/vmm_dev.c
===================================================================
--- sys/amd64/vmm/vmm_dev.c
+++ sys/amd64/vmm/vmm_dev.c
@@ -31,6 +31,8 @@
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
+#include "opt_bhyve_snapshot.h"
+
#include <sys/param.h>
#include <sys/kernel.h>
#include <sys/jail.h>
@@ -53,8 +55,9 @@
#include <machine/vmparam.h>
#include <machine/vmm.h>
-#include <machine/vmm_instruction_emul.h>
#include <machine/vmm_dev.h>
+#include <machine/vmm_instruction_emul.h>
+#include <machine/vmm_snapshot.h>
#include "vmm_lapic.h"
#include "vmm_stat.h"
@@ -381,6 +384,9 @@
struct vm_cpu_topology *topology;
uint64_t *regvals;
int *regnums;
+#ifdef BHYVE_SNAPSHOT
+ struct vm_snapshot_meta *snapshot_meta;
+#endif
error = vmm_priv_check(curthread->td_ucred);
if (error)
@@ -784,6 +790,15 @@
&topology->threads, &topology->maxcpus);
error = 0;
break;
+#ifdef BHYVE_SNAPSHOT
+ case VM_SNAPSHOT_REQ:
+ snapshot_meta = (struct vm_snapshot_meta *)data;
+ error = vm_snapshot_req(sc->vm, snapshot_meta);
+ break;
+ case VM_RESTORE_TIME:
+ error = vm_restore_time(sc->vm);
+ break;
+#endif
default:
error = ENOTTY;
break;
Index: sys/amd64/vmm/vmm_snapshot.c
===================================================================
--- /dev/null
+++ sys/amd64/vmm/vmm_snapshot.c
@@ -0,0 +1,103 @@
+#include <sys/types.h>
+#include <sys/systm.h>
+
+#include <machine/vmm_snapshot.h>
+
+void
+vm_snapshot_buf_err(const char *bufname, const enum vm_snapshot_op op)
+{
+ const char *opstr;
+
+ if (op == VM_SNAPSHOT_SAVE)
+ opstr = "save";
+ else if (op == VM_SNAPSHOT_RESTORE)
+ opstr = "restore";
+ else
+ opstr = "unknown";
+
+ printf("%s: snapshot-%s failed for %s\r\n", __func__, opstr, bufname);
+}
+
+int
+vm_snapshot_buf(volatile void *data, size_t data_size,
+ struct vm_snapshot_meta *meta)
+{
+ struct vm_snapshot_buffer *buffer;
+ int op;
+ void *nv_data;
+
+ nv_data = __DEVOLATILE(void *, data);
+ buffer = &meta->buffer;
+ op = meta->op;
+
+ if (buffer->buf_rem < data_size) {
+ printf("%s: buffer too small\r\n", __func__);
+ return (E2BIG);
+ }
+
+ if (op == VM_SNAPSHOT_SAVE)
+ copyout(nv_data, buffer->buf, data_size);
+ else if (op == VM_SNAPSHOT_RESTORE)
+ copyin(buffer->buf, nv_data, data_size);
+ else
+ return (EINVAL);
+
+ buffer->buf += data_size;
+ buffer->buf_rem -= data_size;
+
+ return (0);
+}
+
+size_t
+vm_get_snapshot_size(struct vm_snapshot_meta *meta)
+{
+ size_t length;
+ struct vm_snapshot_buffer *buffer;
+
+ buffer = &meta->buffer;
+
+ if (buffer->buf_size < buffer->buf_rem) {
+ printf("%s: Invalid buffer: size = %zu, rem = %zu\r\n",
+ __func__, buffer->buf_size, buffer->buf_rem);
+ length = 0;
+ } else {
+ length = buffer->buf_size - buffer->buf_rem;
+ }
+
+ return (length);
+}
+
+int
+vm_snapshot_buf_cmp(volatile void *data, size_t data_size,
+ struct vm_snapshot_meta *meta)
+{
+ struct vm_snapshot_buffer *buffer;
+ int op;
+ int ret;
+ void *_data = *(void **)(void *)&data;
+
+ buffer = &meta->buffer;
+ op = meta->op;
+
+ if (buffer->buf_rem < data_size) {
+ printf("%s: buffer too small\r\n", __func__);
+ ret = E2BIG;
+ goto done;
+ }
+
+ if (op == VM_SNAPSHOT_SAVE) {
+ ret = 0;
+ copyout(_data, buffer->buf, data_size);
+ } else if (op == VM_SNAPSHOT_RESTORE) {
+ ret = memcmp(_data, buffer->buf, data_size);
+ } else {
+ ret = EINVAL;
+ goto done;
+ }
+
+ buffer->buf += data_size;
+ buffer->buf_rem -= data_size;
+
+done:
+ return (ret);
+}
Index: sys/conf/config.mk
===================================================================
--- sys/conf/config.mk
+++ sys/conf/config.mk
@@ -8,6 +8,10 @@
# the code here when they all produce identical results
# (or should)
.if !defined(KERNBUILDDIR)
+.if ${MK_BHYVE_SNAPSHOT} != "no"
+opt_bhyve_snapshot.h:
+ @echo "#define BHYVE_SNAPSHOT 1" > ${.TARGET}
+.endif
opt_bpf.h:
echo "#define DEV_BPF 1" > ${.TARGET}
.if ${MK_INET_SUPPORT} != "no"
@@ -38,6 +42,9 @@
KERN_OPTS=MROUTING IEEE80211_DEBUG \
IEEE80211_SUPPORT_MESH DEV_BPF \
${KERN_OPTS.${MACHINE}} ${KERN_OPTS_EXTRA}
+.if ${MK_BHYVE_SNAPSHOT} != "no"
+KERN_OPTS+= BHYVE_SNAPSHOT
+.endif
.if ${MK_INET_SUPPORT} != "no"
KERN_OPTS+= INET TCP_OFFLOAD
.endif
Index: sys/conf/kern.opts.mk
===================================================================
--- sys/conf/kern.opts.mk
+++ sys/conf/kern.opts.mk
@@ -49,6 +49,7 @@
ZFS
__DEFAULT_NO_OPTIONS = \
+ BHYVE_SNAPSHOT \
EXTRA_TCP_STACKS \
KERNEL_RETPOLINE \
OFED \
Index: sys/conf/kmod.mk
===================================================================
--- sys/conf/kmod.mk
+++ sys/conf/kmod.mk
@@ -73,12 +73,7 @@
KMODISLOADED?= /sbin/kldstat -q -n
OBJCOPY?= objcopy
-.include <bsd.init.mk>
-# Grab all the options for a kernel build. For backwards compat, we need to
-# do this after bsd.own.mk.
-.include "kern.opts.mk"
-.include <bsd.compiler.mk>
-.include "config.mk"
+.include "kmod.opts.mk"
# Search for kernel source tree in standard places.
.if empty(KERNBUILDDIR)
Index: sys/conf/kmod.opts.mk
===================================================================
--- /dev/null
+++ sys/conf/kmod.opts.mk
@@ -0,0 +1,16 @@
+# $FreeBSD$
+#
+# Handle options (KERN_OPTS) for kernel module options. This can be included earlier in a kmod Makefile
+# to allow KERN_OPTS to control SRCS, etc.
+
+.if !target(__<kmod.opts.mk>__)
+__<kmod.opts.mk>__:
+
+.include <bsd.init.mk>
+# Grab all the options for a kernel build. For backwards compat, we need to
+# do this after bsd.own.mk.
+.include "kern.opts.mk"
+.include <bsd.compiler.mk>
+.include "config.mk"
+
+.endif # !target(__<kmod.opts.mk>__)
Index: sys/conf/options.amd64
===================================================================
--- sys/conf/options.amd64
+++ sys/conf/options.amd64
@@ -3,6 +3,7 @@
AUTO_EOI_1 opt_auto_eoi.h
AUTO_EOI_2 opt_auto_eoi.h
+BHYVE_SNAPSHOT
COUNT_XINVLTLB_HITS opt_smp.h
COUNT_IPIS opt_smp.h
MAXMEM
Index: sys/modules/vmm/Makefile
===================================================================
--- sys/modules/vmm/Makefile
+++ sys/modules/vmm/Makefile
@@ -1,8 +1,11 @@
# $FreeBSD$
+.include <kmod.opts.mk>
+
KMOD= vmm
-SRCS= opt_acpi.h opt_ddb.h device_if.h bus_if.h pci_if.h pcib_if.h acpi_if.h
+SRCS= opt_acpi.h opt_bhyve_snapshot.h opt_ddb.h
+SRCS+= device_if.h bus_if.h pci_if.h pcib_if.h acpi_if.h vnode_if.h
DPSRCS+= vmx_assym.h svm_assym.h
DPSRCS+= vmx_genassym.c svm_genassym.c offset.inc
@@ -55,6 +58,10 @@
amdvi_hw.c \
svm_msr.c
+.if ${KERN_OPTS:MBHYVE_SNAPSHOT} != ""
+SRCS+= vmm_snapshot.c
+.endif
+
CLEANFILES= vmx_assym.h vmx_genassym.o svm_assym.h svm_genassym.o
OBJS_DEPEND_GUESS.vmx_support.o+= vmx_assym.h
Index: tools/build/options/WITH_BHYVE_SNAPSHOT
===================================================================
--- /dev/null
+++ tools/build/options/WITH_BHYVE_SNAPSHOT
@@ -0,0 +1,7 @@
+.\" $FreeBSD$
+Set to include support for save and restore (snapshots) in
+.Xr bhyve 8
+and
+.Xr bhyvectl 8 .
+.Pp
+This option only affects amd64/amd64.
Index: usr.sbin/bhyve/Makefile
===================================================================
--- usr.sbin/bhyve/Makefile
+++ usr.sbin/bhyve/Makefile
@@ -71,10 +71,17 @@
spinup_ap.c \
iov.c
+.if ${MK_BHYVE_SNAPSHOT} != "no"
+SRCS+= snapshot.c
+.endif
+
.PATH: ${BHYVE_SYSDIR}/sys/amd64/vmm
SRCS+= vmm_instruction_emul.c
LIBADD= vmmapi md pthread z util sbuf cam
+.if ${MK_BHYVE_SNAPSHOT} != "no"
+LIBADD+= ucl xo
+.endif
.if ${MK_INET_SUPPORT} != "no"
CFLAGS+=-DINET
@@ -91,6 +98,14 @@
CFLAGS+= -I${BHYVE_SYSDIR}/sys/dev/e1000
CFLAGS+= -I${BHYVE_SYSDIR}/sys/dev/mii
CFLAGS+= -I${BHYVE_SYSDIR}/sys/dev/usb/controller
+.if ${MK_BHYVE_SNAPSHOT} != "no"
+CFLAGS+= -I${SRCTOP}/contrib/libucl/include
+
+# Temporary disable capsicum, until we integrate checkpoint code with it.
+CFLAGS+= -DWITHOUT_CAPSICUM
+
+CFLAGS+= -DBHYVE_SNAPSHOT
+.endif
.ifdef GDB_LOG
CFLAGS+=-DGDB_LOG
Index: usr.sbin/bhyve/Makefile.depend
===================================================================
--- usr.sbin/bhyve/Makefile.depend
+++ usr.sbin/bhyve/Makefile.depend
@@ -12,8 +12,10 @@
lib/libcompiler_rt \
lib/libmd \
lib/libthr \
+ lib/libucl \
lib/libutil \
lib/libvmmapi \
+ lib/libxo \
lib/libz \
secure/lib/libcrypto \
Index: usr.sbin/bhyve/atkbdc.h
===================================================================
--- usr.sbin/bhyve/atkbdc.h
+++ usr.sbin/bhyve/atkbdc.h
@@ -30,9 +30,14 @@
#define _ATKBDC_H_
struct atkbdc_softc;
+struct vm_snapshot_meta;
struct vmctx;
void atkbdc_init(struct vmctx *ctx);
void atkbdc_event(struct atkbdc_softc *sc, int iskbd);
+#ifdef BHYVE_SNAPSHOT
+int atkbdc_snapshot(struct vm_snapshot_meta *meta);
+#endif
+
#endif /* _ATKBDC_H_ */
Index: usr.sbin/bhyve/atkbdc.c
===================================================================
--- usr.sbin/bhyve/atkbdc.c
+++ usr.sbin/bhyve/atkbdc.c
@@ -33,6 +33,7 @@
#include <sys/types.h>
#include <machine/vmm.h>
+#include <machine/vmm_snapshot.h>
#include <vmmapi.h>
@@ -137,6 +138,10 @@
struct aux_dev aux;
};
+#ifdef BHYVE_SNAPSHOT
+static struct atkbdc_softc *atkbdc_sc = NULL;
+#endif
+
static void
atkbdc_assert_kbd_intr(struct atkbdc_softc *sc)
{
@@ -548,7 +553,48 @@
sc->ps2kbd_sc = ps2kbd_init(sc);
sc->ps2mouse_sc = ps2mouse_init(sc);
+
+#ifdef BHYVE_SNAPSHOT
+ assert(atkbdc_sc == NULL);
+ atkbdc_sc = sc;
+#endif
+}
+
+#ifdef BHYVE_SNAPSHOT
+int
+atkbdc_snapshot(struct vm_snapshot_meta *meta)
+{
+ int ret;
+
+ SNAPSHOT_VAR_OR_LEAVE(atkbdc_sc->status, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(atkbdc_sc->outport, meta, ret, done);
+ SNAPSHOT_BUF_OR_LEAVE(atkbdc_sc->ram,
+ sizeof(atkbdc_sc->ram), meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(atkbdc_sc->curcmd, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(atkbdc_sc->ctrlbyte, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(atkbdc_sc->kbd, meta, ret, done);
+
+ SNAPSHOT_VAR_OR_LEAVE(atkbdc_sc->kbd.irq_active, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(atkbdc_sc->kbd.irq, meta, ret, done);
+ SNAPSHOT_BUF_OR_LEAVE(atkbdc_sc->kbd.buffer,
+ sizeof(atkbdc_sc->kbd.buffer), meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(atkbdc_sc->kbd.brd, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(atkbdc_sc->kbd.bwr, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(atkbdc_sc->kbd.bcnt, meta, ret, done);
+
+ SNAPSHOT_VAR_OR_LEAVE(atkbdc_sc->aux.irq_active, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(atkbdc_sc->aux.irq, meta, ret, done);
+
+ ret = ps2kbd_snapshot(atkbdc_sc->ps2kbd_sc, meta);
+ if (ret != 0)
+ goto done;
+
+ ret = ps2mouse_snapshot(atkbdc_sc->ps2mouse_sc, meta);
+
+done:
+ return (ret);
}
+#endif
static void
atkbdc_dsdt(void)
Index: usr.sbin/bhyve/bhyve.8
===================================================================
--- usr.sbin/bhyve/bhyve.8
+++ usr.sbin/bhyve/bhyve.8
@@ -47,6 +47,7 @@
.Op Fl l Ar help|lpcdev Ns Op , Ns Ar conf
.Op Fl m Ar memsize Ns Op Ar K|k|M|m|G|g|T|t
.Op Fl p Ar vcpu:hostcpu
+.Op Fl r Ar file
.Op Fl s Ar help|slot,emulation Ns Op , Ns Ar conf
.Op Fl G Ar port
.Op Fl U Ar uuid
@@ -174,6 +175,21 @@
.Em hostcpu .
.It Fl P
Force the guest virtual CPU to exit when a PAUSE instruction is detected.
+.It Fl r Ar file
+Resume a guest from a snapshot.
+The guest memory contents are restored from
+.Ar file ,
+and the guest device and vCPU state are restored from the file
+.Dq Ar file Ns .kern .
+.Pp
+Note that the current snapshot file format requires that the configuration of
+devices in the new VM match the VM from which the snapshot was taken by specifying the
+same
+.Op Fl s
+and
+.Op Fl l
+options.
+The count of vCPUs and memory configuration are read from the snapshot.
.It Fl s Op Ar help|slot,emulation Ns Op , Ns Ar conf
Configure a virtual PCI slot and function.
.Pp
Index: usr.sbin/bhyve/bhyverun.h
===================================================================
--- usr.sbin/bhyve/bhyverun.h
+++ usr.sbin/bhyve/bhyverun.h
@@ -38,9 +38,12 @@
extern int guest_ncpus;
extern uint16_t cores, sockets, threads;
extern char *guest_uuid_str;
-extern char *vmname;
+extern const char *vmname;
void *paddr_guest2host(struct vmctx *ctx, uintptr_t addr, size_t len);
+#ifdef BHYVE_SNAPSHOT
+uintptr_t paddr_host2guest(struct vmctx *ctx, void *addr);
+#endif
void fbsdrun_set_capabilities(struct vmctx *ctx, int cpu);
void fbsdrun_addcpu(struct vmctx *ctx, int fromcpu, int newcpu, uint64_t rip);
Index: usr.sbin/bhyve/bhyverun.c
===================================================================
--- usr.sbin/bhyve/bhyverun.c
+++ usr.sbin/bhyve/bhyverun.c
@@ -36,7 +36,14 @@
#include <sys/capsicum.h>
#endif
#include <sys/mman.h>
+#ifdef BHYVE_SNAPSHOT
+#include <sys/socket.h>
+#include <sys/stat.h>
+#endif
#include <sys/time.h>
+#ifdef BHYVE_SNAPSHOT
+#include <sys/un.h>
+#endif
#include <amd64/vmm/intel/vmcs.h>
@@ -51,6 +58,9 @@
#include <string.h>
#include <err.h>
#include <errno.h>
+#ifdef BHYVE_SNAPSHOT
+#include <fcntl.h>
+#endif
#include <libgen.h>
#include <unistd.h>
#include <assert.h>
@@ -59,6 +69,12 @@
#include <sysexits.h>
#include <stdbool.h>
#include <stdint.h>
+#ifdef BHYVE_SNAPSHOT
+#include <ucl.h>
+#include <unistd.h>
+
+#include <libxo/xo.h>
+#endif
#include <machine/vmm.h>
#ifndef WITHOUT_CAPSICUM
@@ -81,6 +97,9 @@
#include "pci_irq.h"
#include "pci_lpc.h"
#include "smbiostbl.h"
+#ifdef BHYVE_SNAPSHOT
+#include "snapshot.h"
+#endif
#include "xmsr.h"
#include "spinup_ap.h"
#include "rtc.h"
@@ -160,7 +179,7 @@
typedef int (*vmexit_handler_t)(struct vmctx *, struct vm_exit *, int *vcpu);
extern int vmexit_task_switch(struct vmctx *, struct vm_exit *, int *vcpu);
-char *vmname;
+const char *vmname;
int guest_ncpus;
uint16_t cores, maxcpus, sockets, threads;
@@ -223,6 +242,9 @@
" -H: vmexit from the guest on hlt\n"
" -l: LPC device configuration\n"
" -m: memory size in MB\n"
+#ifdef BHYVE_SNAPSHOT
+ " -r: path to checkpoint file\n"
+#endif
" -p: pin 'vcpu' to 'hostcpu'\n"
" -P: vmexit from the guest on pause\n"
" -s: <slot,driver,configinfo> PCI slot config\n"
@@ -382,6 +404,14 @@
return (vm_map_gpa(ctx, gaddr, len));
}
+#ifdef BHYVE_SNAPSHOT
+uintptr_t
+paddr_host2guest(struct vmctx *ctx, void *addr)
+{
+ return (vm_rev_map_gpa(ctx, addr));
+}
+#endif
+
int
fbsdrun_vmexit_on_pause(void)
{
@@ -416,6 +446,9 @@
snprintf(tname, sizeof(tname), "vcpu %d", vcpu);
pthread_set_name_np(mtp->mt_thr, tname);
+#ifdef BHYVE_SNAPSHOT
+ checkpoint_cpu_add(vcpu);
+#endif
gdb_cpu_add(vcpu);
vm_loop(mtp->mt_ctx, vcpu, vmexit[vcpu].rip);
@@ -690,7 +723,13 @@
stats.vmexit_mtrap++;
+#ifdef BHYVE_SNAPSHOT
+ checkpoint_cpu_suspend(*pvcpu);
+#endif
gdb_cpu_mtrap(*pvcpu);
+#ifdef BHYVE_SNAPSHOT
+ checkpoint_cpu_resume(*pvcpu);
+#endif
return (VMEXIT_CONTINUE);
}
@@ -770,7 +809,13 @@
vmexit_debug(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
{
+#ifdef BHYVE_SNAPSHOT
+ checkpoint_cpu_suspend(*pvcpu);
+#endif
gdb_cpu_suspend(*pvcpu);
+#ifdef BHYVE_SNAPSHOT
+ checkpoint_cpu_resume(*pvcpu);
+#endif
return (VMEXIT_CONTINUE);
}
@@ -972,6 +1017,22 @@
return (ctx);
}
+void
+spinup_vcpu(struct vmctx *ctx, int vcpu)
+{
+ int error;
+ uint64_t rip;
+
+ error = vm_get_register(ctx, vcpu, VM_REG_GUEST_RIP, &rip);
+ assert(error == 0);
+
+ fbsdrun_set_capabilities(ctx, vcpu);
+ error = vm_set_capability(ctx, vcpu, VM_CAP_UNRESTRICTED_GUEST, 1);
+ assert(error == 0);
+
+ fbsdrun_addcpu(ctx, BSP, vcpu, rip);
+}
+
int
main(int argc, char *argv[])
{
@@ -983,6 +1044,13 @@
uint64_t rip;
size_t memsize;
char *optstr;
+#ifdef BHYVE_SNAPSHOT
+ char *restore_file;
+ struct restore_state rstate;
+ int vcpu;
+
+ restore_file = NULL;
+#endif
bvmcons = 0;
progname = basename(argv[0]);
@@ -997,7 +1065,11 @@
rtc_localtime = 1;
memflags = 0;
+#ifdef BHYVE_SNAPSHOT
+ optstr = "abehuwxACHIPSWYp:g:G:c:s:m:l:U:r:";
+#else
optstr = "abehuwxACHIPSWYp:g:G:c:s:m:l:U:";
+#endif
while ((c = getopt(argc, argv, optstr)) != -1) {
switch (c) {
case 'a':
@@ -1043,6 +1115,11 @@
"configuration '%s'", optarg);
}
break;
+#ifdef BHYVE_SNAPSHOT
+ case 'r':
+ restore_file = optarg;
+ break;
+#endif
case 's':
if (strncmp(optarg, "help", strlen(optarg)) == 0) {
pci_print_supported_devices();
@@ -1104,12 +1181,50 @@
argc -= optind;
argv += optind;
+#ifdef BHYVE_SNAPSHOT
+ if (argc > 1 || (argc == 0 && restore_file == NULL))
+ usage(1);
+
+ if (restore_file != NULL) {
+ error = load_restore_file(restore_file, &rstate);
+ if (error) {
+ fprintf(stderr, "Failed to read checkpoint info from "
+ "file: '%s'.\n", restore_file);
+ exit(1);
+ }
+ }
+
+ if (argc == 1) {
+ vmname = argv[0];
+ } else {
+ vmname = lookup_vmname(&rstate);
+ if (vmname == NULL) {
+ fprintf(stderr, "Cannot find VM name in restore file. "
+ "Please specify one.\n");
+ exit(1);
+ }
+ }
+#else
if (argc != 1)
usage(1);
vmname = argv[0];
+#endif
ctx = do_open(vmname);
+#ifdef BHYVE_SNAPSHOT
+ if (restore_file != NULL) {
+ guest_ncpus = lookup_guest_ncpus(&rstate);
+ memflags = lookup_memflags(&rstate);
+ memsize = lookup_memsize(&rstate);
+ }
+
+ if (guest_ncpus < 1) {
+ fprintf(stderr, "Invalid guest vCPUs (%d)\n", guest_ncpus);
+ exit(1);
+ }
+#endif
+
max_vcpus = num_vcpus_allowed(ctx);
if (guest_ncpus > max_vcpus) {
fprintf(stderr, "%d vCPUs requested but only %d available\n",
@@ -1168,6 +1283,40 @@
assert(error == 0);
}
+#ifdef BHYVE_SNAPSHOT
+ if (restore_file != NULL) {
+ fprintf(stdout, "Pausing pci devs...\r\n");
+ if (vm_pause_user_devs(ctx) != 0) {
+ fprintf(stderr, "Failed to pause PCI device state.\n");
+ exit(1);
+ }
+
+ fprintf(stdout, "Restoring vm mem...\r\n");
+ if (restore_vm_mem(ctx, &rstate) != 0) {
+ fprintf(stderr, "Failed to restore VM memory.\n");
+ exit(1);
+ }
+
+ fprintf(stdout, "Restoring pci devs...\r\n");
+ if (vm_restore_user_devs(ctx, &rstate) != 0) {
+ fprintf(stderr, "Failed to restore PCI device state.\n");
+ exit(1);
+ }
+
+ fprintf(stdout, "Restoring kernel structs...\r\n");
+ if (vm_restore_kern_structs(ctx, &rstate) != 0) {
+ fprintf(stderr, "Failed to restore kernel structs.\n");
+ exit(1);
+ }
+
+ fprintf(stdout, "Resuming pci devs...\r\n");
+ if (vm_resume_user_devs(ctx) != 0) {
+ fprintf(stderr, "Failed to resume PCI device state.\n");
+ exit(1);
+ }
+ }
+#endif
+
error = vm_get_register(ctx, BSP, VM_REG_GUEST_RIP, &rip);
assert(error == 0);
@@ -1208,11 +1357,41 @@
errx(EX_OSERR, "cap_enter() failed");
#endif
+#ifdef BHYVE_SNAPSHOT
+ if (restore_file != NULL)
+ destroy_restore_state(&rstate);
+
+ /*
+ * checkpointing thread for communication with bhyvectl
+ */
+ if (init_checkpoint_thread(ctx) < 0)
+ printf("Failed to start checkpoint thread!\r\n");
+
+ if (restore_file != NULL)
+ vm_restore_time(ctx);
+#endif
+
/*
* Add CPU 0
*/
fbsdrun_addcpu(ctx, BSP, BSP, rip);
+#ifdef BHYVE_SNAPSHOT
+ /*
+ * If we restore a VM, start all vCPUs now (including APs), otherwise,
+ * let the guest OS to spin them up later via vmexits.
+ */
+ if (restore_file != NULL) {
+ for (vcpu = 0; vcpu < guest_ncpus; vcpu++) {
+ if (vcpu == BSP)
+ continue;
+
+ fprintf(stdout, "spinning up vcpu no %d...\r\n", vcpu);
+ spinup_vcpu(ctx, vcpu);
+ }
+ }
+#endif
+
/*
* Head off to the main event dispatch loop
*/
Index: usr.sbin/bhyve/block_if.h
===================================================================
--- usr.sbin/bhyve/block_if.h
+++ usr.sbin/bhyve/block_if.h
@@ -41,6 +41,9 @@
#include <sys/uio.h>
#include <sys/unistd.h>
+struct vm_snapshot_meta;
+
+
/*
* BLOCKIF_IOV_MAX is the maximum number of scatter/gather entries in
* a single request. BLOCKIF_RING_MAX is the maxmimum number of
@@ -74,5 +77,13 @@
int blockif_delete(struct blockif_ctxt *bc, struct blockif_req *breq);
int blockif_cancel(struct blockif_ctxt *bc, struct blockif_req *breq);
int blockif_close(struct blockif_ctxt *bc);
+#ifdef BHYVE_SNAPSHOT
+void blockif_pause(struct blockif_ctxt *bc);
+void blockif_resume(struct blockif_ctxt *bc);
+int blockif_snapshot_req(struct blockif_req *br,
+ struct vm_snapshot_meta *meta);
+int blockif_snapshot(struct blockif_ctxt *bc,
+ struct vm_snapshot_meta *meta);
+#endif
#endif /* _BLOCK_IF_H_ */
Index: usr.sbin/bhyve/block_if.c
===================================================================
--- usr.sbin/bhyve/block_if.c
+++ usr.sbin/bhyve/block_if.c
@@ -57,6 +57,7 @@
#include <unistd.h>
#include <machine/atomic.h>
+#include <machine/vmm_snapshot.h>
#include "bhyverun.h"
#include "mevent.h"
@@ -103,9 +104,13 @@
int bc_psectsz;
int bc_psectoff;
int bc_closing;
+ int bc_paused;
+ int bc_work_count;
pthread_t bc_btid[BLOCKIF_NUMTHR];
pthread_mutex_t bc_mtx;
pthread_cond_t bc_cond;
+ pthread_cond_t bc_paused_cond;
+ pthread_cond_t bc_work_done_cond;
/* Request elements and free/pending/busy queues */
TAILQ_HEAD(, blockif_elem) bc_freeq;
@@ -208,6 +213,18 @@
TAILQ_INSERT_TAIL(&bc->bc_freeq, be, be_link);
}
+static int
+blockif_flush_bc(struct blockif_ctxt *bc)
+{
+ if (bc->bc_ischr) {
+ if (ioctl(bc->bc_fd, DIOCGFLUSH))
+ return (errno);
+ } else if (fsync(bc->bc_fd))
+ return (errno);
+
+ return (0);
+}
+
static void
blockif_proc(struct blockif_ctxt *bc, struct blockif_elem *be, uint8_t *buf)
{
@@ -298,11 +315,7 @@
}
break;
case BOP_FLUSH:
- if (bc->bc_ischr) {
- if (ioctl(bc->bc_fd, DIOCGFLUSH))
- err = errno;
- } else if (fsync(bc->bc_fd))
- err = errno;
+ err = blockif_flush_bc(bc);
break;
case BOP_DELETE:
if (!bc->bc_candelete)
@@ -346,15 +359,30 @@
pthread_mutex_lock(&bc->bc_mtx);
for (;;) {
- while (blockif_dequeue(bc, t, &be)) {
+ bc->bc_work_count++;
+
+ /* We cannot process work if the interface is paused */
+ while (!bc->bc_paused && blockif_dequeue(bc, t, &be)) {
pthread_mutex_unlock(&bc->bc_mtx);
blockif_proc(bc, be, buf);
pthread_mutex_lock(&bc->bc_mtx);
blockif_complete(bc, be);
}
+
+ bc->bc_work_count--;
+
+ /* If none of the workers are busy, notify the main thread */
+ if (bc->bc_work_count == 0)
+ pthread_cond_broadcast(&bc->bc_work_done_cond);
+
/* Check ctxt status here to see if exit requested */
if (bc->bc_closing)
break;
+
+ /* Make all worker threads wait here if the device is paused */
+ while (bc->bc_paused)
+ pthread_cond_wait(&bc->bc_paused_cond, &bc->bc_mtx);
+
pthread_cond_wait(&bc->bc_cond, &bc->bc_mtx);
}
pthread_mutex_unlock(&bc->bc_mtx);
@@ -558,6 +586,10 @@
bc->bc_psectoff = psectoff;
pthread_mutex_init(&bc->bc_mtx, NULL);
pthread_cond_init(&bc->bc_cond, NULL);
+ bc->bc_paused = 0;
+ bc->bc_work_count = 0;
+ pthread_cond_init(&bc->bc_paused_cond, NULL);
+ pthread_cond_init(&bc->bc_work_done_cond, NULL);
TAILQ_INIT(&bc->bc_freeq);
TAILQ_INIT(&bc->bc_pendq);
TAILQ_INIT(&bc->bc_busyq);
@@ -650,6 +682,8 @@
assert(bc->bc_magic == BLOCKIF_SIG);
pthread_mutex_lock(&bc->bc_mtx);
+ /* XXX: not waiting while paused */
+
/*
* Check pending requests.
*/
@@ -848,3 +882,100 @@
assert(bc->bc_magic == BLOCKIF_SIG);
return (bc->bc_candelete);
}
+
+#ifdef BHYVE_SNAPSHOT
+void
+blockif_pause(struct blockif_ctxt *bc)
+{
+ assert(bc != NULL);
+ assert(bc->bc_magic == BLOCKIF_SIG);
+
+ pthread_mutex_lock(&bc->bc_mtx);
+ bc->bc_paused = 1;
+
+ /* The interface is paused. Wait for workers to finish their work */
+ while (bc->bc_work_count)
+ pthread_cond_wait(&bc->bc_work_done_cond, &bc->bc_mtx);
+ pthread_mutex_unlock(&bc->bc_mtx);
+
+ if (blockif_flush_bc(bc))
+ fprintf(stderr, "%s: [WARN] failed to flush backing file.\r\n",
+ __func__);
+}
+
+void
+blockif_resume(struct blockif_ctxt *bc)
+{
+ assert(bc != NULL);
+ assert(bc->bc_magic == BLOCKIF_SIG);
+
+ pthread_mutex_lock(&bc->bc_mtx);
+ bc->bc_paused = 0;
+ /* resume the threads waiting for paused */
+ pthread_cond_broadcast(&bc->bc_paused_cond);
+ /* kick the threads after restore */
+ pthread_cond_broadcast(&bc->bc_cond);
+ pthread_mutex_unlock(&bc->bc_mtx);
+}
+
+int
+blockif_snapshot_req(struct blockif_req *br, struct vm_snapshot_meta *meta)
+{
+ int i;
+ struct iovec *iov;
+ int ret;
+
+ SNAPSHOT_VAR_OR_LEAVE(br->br_iovcnt, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(br->br_offset, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(br->br_resid, meta, ret, done);
+
+ /*
+ * XXX: The callback and parameter must be filled by the virtualized
+ * device that uses the interface, during its init; we're not touching
+ * them here.
+ */
+
+ /* Snapshot the iovecs. */
+ for (i = 0; i < br->br_iovcnt; i++) {
+ iov = &br->br_iov[i];
+
+ SNAPSHOT_VAR_OR_LEAVE(iov->iov_len, meta, ret, done);
+
+ /* We assume the iov is a guest-mapped address. */
+ SNAPSHOT_GUEST2HOST_ADDR_OR_LEAVE(iov->iov_base, iov->iov_len,
+ false, meta, ret, done);
+ }
+
+done:
+ return (ret);
+}
+
+int
+blockif_snapshot(struct blockif_ctxt *bc, struct vm_snapshot_meta *meta)
+{
+ int ret;
+
+ if (bc->bc_paused == 0) {
+ fprintf(stderr, "%s: Snapshot failed: "
+ "interface not paused.\r\n", __func__);
+ return (ENXIO);
+ }
+
+ pthread_mutex_lock(&bc->bc_mtx);
+
+ SNAPSHOT_VAR_OR_LEAVE(bc->bc_magic, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(bc->bc_ischr, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(bc->bc_isgeom, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(bc->bc_candelete, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(bc->bc_rdonly, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(bc->bc_size, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(bc->bc_sectsz, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(bc->bc_psectsz, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(bc->bc_psectoff, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(bc->bc_closing, meta, ret, done);
+
+done:
+ pthread_mutex_unlock(&bc->bc_mtx);
+ return (ret);
+}
+#endif
Index: usr.sbin/bhyve/mevent.c
===================================================================
--- usr.sbin/bhyve/mevent.c
+++ usr.sbin/bhyve/mevent.c
@@ -68,7 +68,7 @@
#define MEV_DEL_PENDING 4
#define MEV_ADD_DISABLED 5
-extern char *vmname;
+extern const char *vmname;
static pthread_t mevent_tid;
static int mevent_timid = 43;
Index: usr.sbin/bhyve/pci_ahci.c
===================================================================
--- usr.sbin/bhyve/pci_ahci.c
+++ usr.sbin/bhyve/pci_ahci.c
@@ -41,6 +41,8 @@
#include <sys/ata.h>
#include <sys/endian.h>
+#include <machine/vmm_snapshot.h>
+
#include <errno.h>
#include <fcntl.h>
#include <stdio.h>
@@ -131,6 +133,7 @@
uint32_t done;
int slot;
int more;
+ int readop;
};
struct ahci_port {
@@ -724,6 +727,7 @@
aior->slot = slot;
aior->len = len;
aior->done = done;
+ aior->readop = readop;
breq = &aior->io_req;
breq->br_offset = lba + done;
ahci_build_iov(p, aior, prdt, hdr->prdtl);
@@ -1420,6 +1424,7 @@
aior->slot = slot;
aior->len = len;
aior->done = done;
+ aior->readop = 1;
breq = &aior->io_req;
breq->br_offset = lba + done;
ahci_build_iov(p, aior, prdt, hdr->prdtl);
@@ -2446,6 +2451,283 @@
return (pci_ahci_init(ctx, pi, opts, 1));
}
+#ifdef BHYVE_SNAPSHOT
+static int
+pci_ahci_snapshot_save_queues(struct ahci_port *port,
+ struct vm_snapshot_meta *meta)
+{
+ int ret;
+ int idx;
+ struct ahci_ioreq *ioreq;
+
+ STAILQ_FOREACH(ioreq, &port->iofhd, io_flist) {
+ idx = ((void *) ioreq - (void *) port->ioreq) / sizeof(*ioreq);
+ SNAPSHOT_VAR_OR_LEAVE(idx, meta, ret, done);
+ }
+
+ idx = -1;
+ SNAPSHOT_VAR_OR_LEAVE(idx, meta, ret, done);
+
+ TAILQ_FOREACH(ioreq, &port->iobhd, io_blist) {
+ idx = ((void *) ioreq - (void *) port->ioreq) / sizeof(*ioreq);
+ SNAPSHOT_VAR_OR_LEAVE(idx, meta, ret, done);
+
+ /*
+ * Snapshot only the busy requests; other requests are
+ * not valid.
+ */
+ ret = blockif_snapshot_req(&ioreq->io_req, meta);
+ if (ret != 0) {
+ fprintf(stderr, "%s: failed to snapshot req\r\n",
+ __func__);
+ goto done;
+ }
+ }
+
+ idx = -1;
+ SNAPSHOT_VAR_OR_LEAVE(idx, meta, ret, done);
+
+done:
+ return (ret);
+}
+
+static int
+pci_ahci_snapshot_restore_queues(struct ahci_port *port,
+ struct vm_snapshot_meta *meta)
+{
+ int ret;
+ int idx;
+ struct ahci_ioreq *ioreq;
+
+ /* Empty the free queue before restoring. */
+ while (!STAILQ_EMPTY(&port->iofhd))
+ STAILQ_REMOVE_HEAD(&port->iofhd, io_flist);
+
+ /* Restore the free queue. */
+ while (1) {
+ SNAPSHOT_VAR_OR_LEAVE(idx, meta, ret, done);
+ if (idx == -1)
+ break;
+
+ STAILQ_INSERT_TAIL(&port->iofhd, &port->ioreq[idx], io_flist);
+ }
+
+ /* Restore the busy queue. */
+ while (1) {
+ SNAPSHOT_VAR_OR_LEAVE(idx, meta, ret, done);
+ if (idx == -1)
+ break;
+
+ ioreq = &port->ioreq[idx];
+ TAILQ_INSERT_TAIL(&port->iobhd, ioreq, io_blist);
+
+ /*
+ * Restore only the busy requests; other requests are
+ * not valid.
+ */
+ ret = blockif_snapshot_req(&ioreq->io_req, meta);
+ if (ret != 0) {
+ fprintf(stderr, "%s: failed to restore request\r\n",
+ __func__);
+ goto done;
+ }
+
+ /* Re-enqueue the requests in the block interface. */
+ if (ioreq->readop)
+ ret = blockif_read(port->bctx, &ioreq->io_req);
+ else
+ ret = blockif_write(port->bctx, &ioreq->io_req);
+
+ if (ret != 0) {
+ fprintf(stderr,
+ "%s: failed to re-enqueue request\r\n",
+ __func__);
+ goto done;
+ }
+ }
+
+done:
+ return (ret);
+}
+
+static int
+pci_ahci_snapshot(struct vm_snapshot_meta *meta)
+{
+ int i, j, ret;
+ void *bctx;
+ struct pci_devinst *pi;
+ struct pci_ahci_softc *sc;
+ struct ahci_port *port;
+ struct ahci_cmd_hdr *hdr;
+ struct ahci_ioreq *ioreq;
+
+ pi = meta->dev_data;
+ sc = pi->pi_arg;
+
+ /* TODO: add mtx lock/unlock */
+
+ SNAPSHOT_VAR_OR_LEAVE(sc->ports, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->cap, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->ghc, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->is, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->pi, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->vs, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->ccc_ctl, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->ccc_pts, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->em_loc, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->em_ctl, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->cap2, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->bohc, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->lintr, meta, ret, done);
+
+ for (i = 0; i < MAX_PORTS; i++) {
+ port = &sc->port[i];
+
+ if (meta->op == VM_SNAPSHOT_SAVE)
+ bctx = port->bctx;
+
+ SNAPSHOT_VAR_OR_LEAVE(bctx, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(port->port, meta, ret, done);
+
+ /* Mostly for restore; save is ensured by the lines above. */
+ if (((bctx == NULL) && (port->bctx != NULL)) ||
+ ((bctx != NULL) && (port->bctx == NULL))) {
+ fprintf(stderr, "%s: ports not matching\r\n", __func__);
+ ret = EINVAL;
+ goto done;
+ }
+
+ if (port->bctx == NULL)
+ continue;
+
+ if (port->port != i) {
+ fprintf(stderr, "%s: ports not matching: "
+ "actual: %d expected: %d\r\n",
+ __func__, port->port, i);
+ ret = EINVAL;
+ goto done;
+ }
+
+ SNAPSHOT_GUEST2HOST_ADDR_OR_LEAVE(port->cmd_lst,
+ AHCI_CL_SIZE * AHCI_MAX_SLOTS, false, meta, ret, done);
+ SNAPSHOT_GUEST2HOST_ADDR_OR_LEAVE(port->rfis, 256, false, meta,
+ ret, done);
+
+ SNAPSHOT_VAR_OR_LEAVE(port->ident, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(port->atapi, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(port->reset, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(port->waitforclear, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(port->mult_sectors, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(port->xfermode, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(port->err_cfis, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(port->sense_key, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(port->asc, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(port->ccs, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(port->pending, meta, ret, done);
+
+ SNAPSHOT_VAR_OR_LEAVE(port->clb, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(port->clbu, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(port->fb, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(port->fbu, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(port->is, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(port->ie, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(port->cmd, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(port->unused0, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(port->tfd, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(port->sig, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(port->ssts, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(port->sctl, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(port->serr, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(port->sact, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(port->ci, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(port->sntf, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(port->fbs, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(port->ioqsz, meta, ret, done);
+
+ for (j = 0; j < port->ioqsz; j++) {
+ ioreq = &port->ioreq[j];
+
+ /* blockif_req snapshot done only for busy requests. */
+ hdr = (struct ahci_cmd_hdr *)(port->cmd_lst +
+ ioreq->slot * AHCI_CL_SIZE);
+ SNAPSHOT_GUEST2HOST_ADDR_OR_LEAVE(ioreq->cfis,
+ 0x80 + hdr->prdtl * sizeof(struct ahci_prdt_entry),
+ false, meta, ret, done);
+
+ SNAPSHOT_VAR_OR_LEAVE(ioreq->len, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(ioreq->done, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(ioreq->slot, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(ioreq->more, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(ioreq->readop, meta, ret, done);
+ }
+
+ /* Perform save / restore specific operations. */
+ if (meta->op == VM_SNAPSHOT_SAVE) {
+ ret = pci_ahci_snapshot_save_queues(port, meta);
+ if (ret != 0)
+ goto done;
+ } else if (meta->op == VM_SNAPSHOT_RESTORE) {
+ ret = pci_ahci_snapshot_restore_queues(port, meta);
+ if (ret != 0)
+ goto done;
+ } else {
+ ret = EINVAL;
+ goto done;
+ }
+
+ ret = blockif_snapshot(port->bctx, meta);
+ if (ret != 0) {
+ fprintf(stderr, "%s: failed to restore blockif\r\n",
+ __func__);
+ goto done;
+ }
+ }
+
+done:
+ return (ret);
+}
+
+static int
+pci_ahci_pause(struct vmctx *ctx, struct pci_devinst *pi)
+{
+ struct pci_ahci_softc *sc;
+ struct blockif_ctxt *bctxt;
+ int i;
+
+ sc = pi->pi_arg;
+
+ for (i = 0; i < MAX_PORTS; i++) {
+ bctxt = sc->port[i].bctx;
+ if (bctxt == NULL)
+ continue;
+
+ blockif_pause(bctxt);
+ }
+
+ return (0);
+}
+
+static int
+pci_ahci_resume(struct vmctx *ctx, struct pci_devinst *pi)
+{
+ struct pci_ahci_softc *sc;
+ struct blockif_ctxt *bctxt;
+ int i;
+
+ sc = pi->pi_arg;
+
+ for (i = 0; i < MAX_PORTS; i++) {
+ bctxt = sc->port[i].bctx;
+ if (bctxt == NULL)
+ continue;
+
+ blockif_resume(bctxt);
+ }
+
+ return (0);
+}
+#endif
+
/*
* Use separate emulation names to distinguish drive and atapi devices
*/
@@ -2453,7 +2735,12 @@
.pe_emu = "ahci",
.pe_init = pci_ahci_hd_init,
.pe_barwrite = pci_ahci_write,
- .pe_barread = pci_ahci_read
+ .pe_barread = pci_ahci_read,
+#ifdef BHYVE_SNAPSHOT
+ .pe_snapshot = pci_ahci_snapshot,
+ .pe_pause = pci_ahci_pause,
+ .pe_resume = pci_ahci_resume,
+#endif
};
PCI_EMUL_SET(pci_de_ahci);
@@ -2461,7 +2748,12 @@
.pe_emu = "ahci-hd",
.pe_init = pci_ahci_hd_init,
.pe_barwrite = pci_ahci_write,
- .pe_barread = pci_ahci_read
+ .pe_barread = pci_ahci_read,
+#ifdef BHYVE_SNAPSHOT
+ .pe_snapshot = pci_ahci_snapshot,
+ .pe_pause = pci_ahci_pause,
+ .pe_resume = pci_ahci_resume,
+#endif
};
PCI_EMUL_SET(pci_de_ahci_hd);
@@ -2469,6 +2761,11 @@
.pe_emu = "ahci-cd",
.pe_init = pci_ahci_atapi_init,
.pe_barwrite = pci_ahci_write,
- .pe_barread = pci_ahci_read
+ .pe_barread = pci_ahci_read,
+#ifdef BHYVE_SNAPSHOT
+ .pe_snapshot = pci_ahci_snapshot,
+ .pe_pause = pci_ahci_pause,
+ .pe_resume = pci_ahci_resume,
+#endif
};
PCI_EMUL_SET(pci_de_ahci_cd);
Index: usr.sbin/bhyve/pci_e82545.c
===================================================================
--- usr.sbin/bhyve/pci_e82545.c
+++ usr.sbin/bhyve/pci_e82545.c
@@ -46,6 +46,8 @@
#ifndef WITHOUT_CAPSICUM
#include <capsicum_helpers.h>
#endif
+#include <machine/vmm_snapshot.h>
+
#include <err.h>
#include <errno.h>
#include <fcntl.h>
@@ -2357,11 +2359,168 @@
return (0);
}
+#ifdef BHYVE_SNAPSHOT
+static int
+e82545_snapshot(struct vm_snapshot_meta *meta)
+{
+ int i;
+ int ret;
+ struct e82545_softc *sc;
+ struct pci_devinst *pi;
+ uint64_t bitmap_value;
+
+ pi = meta->dev_data;
+ sc = pi->pi_arg;
+
+ /* esc_mevp and esc_mevpitr should be reinitiated at init. */
+ SNAPSHOT_VAR_OR_LEAVE(sc->esc_mac, meta, ret, done);
+
+ /* General */
+ SNAPSHOT_VAR_OR_LEAVE(sc->esc_CTRL, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->esc_FCAL, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->esc_FCAH, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->esc_FCT, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->esc_VET, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->esc_FCTTV, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->esc_LEDCTL, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->esc_PBA, meta, ret, done);
+
+ /* Interrupt control */
+ SNAPSHOT_VAR_OR_LEAVE(sc->esc_irq_asserted, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->esc_ICR, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->esc_ITR, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->esc_ICS, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->esc_IMS, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->esc_IMC, meta, ret, done);
+
+ /*
+ * Transmit
+ *
+ * The fields in the unions are in superposition to access certain
+ * bytes in the larger uint variables.
+ * e.g., ip_config = [ipcss|ipcso|ipcse0|ipcse1]
+ */
+ SNAPSHOT_VAR_OR_LEAVE(sc->esc_txctx.lower_setup.ip_config, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->esc_txctx.upper_setup.tcp_config, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->esc_txctx.cmd_and_length, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->esc_txctx.tcp_seg_setup.data, meta, ret, done);
+
+ SNAPSHOT_VAR_OR_LEAVE(sc->esc_tx_enabled, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->esc_tx_active, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->esc_TXCW, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->esc_TCTL, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->esc_TIPG, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->esc_AIT, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->esc_tdba, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->esc_TDBAL, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->esc_TDBAH, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->esc_TDLEN, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->esc_TDH, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->esc_TDHr, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->esc_TDT, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->esc_TIDV, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->esc_TXDCTL, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->esc_TADV, meta, ret, done);
+
+ /* Has dependency on esc_TDLEN; reoreder of fields from struct. */
+ SNAPSHOT_GUEST2HOST_ADDR_OR_LEAVE(sc->esc_txdesc, sc->esc_TDLEN,
+ true, meta, ret, done);
+
+ /* L2 frame acceptance */
+ for (i = 0; i < nitems(sc->esc_uni); i++) {
+ SNAPSHOT_VAR_OR_LEAVE(sc->esc_uni[i].eu_valid, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->esc_uni[i].eu_addrsel, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->esc_uni[i].eu_eth, meta, ret, done);
+ }
+
+ SNAPSHOT_BUF_OR_LEAVE(sc->esc_fmcast, sizeof(sc->esc_fmcast),
+ meta, ret, done);
+ SNAPSHOT_BUF_OR_LEAVE(sc->esc_fvlan, sizeof(sc->esc_fvlan),
+ meta, ret, done);
+
+ /* Receive */
+ SNAPSHOT_VAR_OR_LEAVE(sc->esc_rx_enabled, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->esc_rx_active, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->esc_rx_loopback, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->esc_RCTL, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->esc_FCRTL, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->esc_FCRTH, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->esc_rdba, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->esc_RDBAL, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->esc_RDBAH, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->esc_RDLEN, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->esc_RDH, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->esc_RDT, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->esc_RDTR, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->esc_RXDCTL, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->esc_RADV, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->esc_RSRPD, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->esc_RXCSUM, meta, ret, done);
+
+ /* Has dependency on esc_RDLEN; reoreder of fields from struct. */
+ SNAPSHOT_GUEST2HOST_ADDR_OR_LEAVE(sc->esc_rxdesc, sc->esc_TDLEN,
+ true, meta, ret, done);
+
+ /* IO Port register access */
+ SNAPSHOT_VAR_OR_LEAVE(sc->io_addr, meta, ret, done);
+
+ /* Shadow copy of MDIC */
+ SNAPSHOT_VAR_OR_LEAVE(sc->mdi_control, meta, ret, done);
+
+ /* Shadow copy of EECD */
+ SNAPSHOT_VAR_OR_LEAVE(sc->eeprom_control, meta, ret, done);
+
+ /* Latest NVM in/out */
+ SNAPSHOT_VAR_OR_LEAVE(sc->nvm_data, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->nvm_opaddr, meta, ret, done);
+
+ /* Stats */
+ SNAPSHOT_VAR_OR_LEAVE(sc->missed_pkt_count, meta, ret, done);
+ SNAPSHOT_BUF_OR_LEAVE(sc->pkt_rx_by_size, sizeof(sc->pkt_rx_by_size),
+ meta, ret, done);
+ SNAPSHOT_BUF_OR_LEAVE(sc->pkt_tx_by_size, sizeof(sc->pkt_tx_by_size),
+ meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->good_pkt_rx_count, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->bcast_pkt_rx_count, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->mcast_pkt_rx_count, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->good_pkt_tx_count, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->bcast_pkt_tx_count, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->mcast_pkt_tx_count, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->oversize_rx_count, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->tso_tx_count, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->good_octets_rx, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->good_octets_tx, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->missed_octets, meta, ret, done);
+
+ if (meta->op == VM_SNAPSHOT_SAVE)
+ bitmap_value = sc->nvm_bits;
+ SNAPSHOT_VAR_OR_LEAVE(bitmap_value, meta, ret, done);
+ if (meta->op == VM_SNAPSHOT_RESTORE)
+ sc->nvm_bits = bitmap_value;
+
+ if (meta->op == VM_SNAPSHOT_SAVE)
+ bitmap_value = sc->nvm_bits;
+ SNAPSHOT_VAR_OR_LEAVE(bitmap_value, meta, ret, done);
+ if (meta->op == VM_SNAPSHOT_RESTORE)
+ sc->nvm_bits = bitmap_value;
+
+ /* EEPROM data */
+ SNAPSHOT_BUF_OR_LEAVE(sc->eeprom_data, sizeof(sc->eeprom_data),
+ meta, ret, done);
+
+done:
+ return (ret);
+}
+#endif
+
struct pci_devemu pci_de_e82545 = {
.pe_emu = "e1000",
.pe_init = e82545_init,
.pe_barwrite = e82545_write,
- .pe_barread = e82545_read
+ .pe_barread = e82545_read,
+#ifdef BHYVE_SNAPSHOT
+ .pe_snapshot = e82545_snapshot,
+#endif
};
PCI_EMUL_SET(pci_de_e82545);
Index: usr.sbin/bhyve/pci_emul.h
===================================================================
--- usr.sbin/bhyve/pci_emul.h
+++ usr.sbin/bhyve/pci_emul.h
@@ -45,6 +45,7 @@
struct vmctx;
struct pci_devinst;
struct memory_region;
+struct vm_snapshot_meta;
struct pci_devemu {
char *pe_emu; /* Name of device emulation */
@@ -71,6 +72,11 @@
uint64_t (*pe_barread)(struct vmctx *ctx, int vcpu,
struct pci_devinst *pi, int baridx,
uint64_t offset, int size);
+
+ /* Save/restore device state */
+ int (*pe_snapshot)(struct vm_snapshot_meta *meta);
+ int (*pe_pause)(struct vmctx *ctx, struct pci_devinst *pi);
+ int (*pe_resume)(struct vmctx *ctx, struct pci_devinst *pi);
};
#define PCI_EMUL_SET(x) DATA_SET(pci_devemu_set, x);
@@ -246,6 +252,11 @@
void pci_write_dsdt(void);
uint64_t pci_ecfg_base(void);
int pci_bus_configured(int bus);
+#ifdef BHYVE_SNAPSHOT
+int pci_snapshot(struct vm_snapshot_meta *meta);
+int pci_pause(struct vmctx *ctx, const char *dev_name);
+int pci_resume(struct vmctx *ctx, const char *dev_name);
+#endif
static __inline void
pci_set_cfgdata8(struct pci_devinst *pi, int offset, uint8_t val)
Index: usr.sbin/bhyve/pci_emul.c
===================================================================
--- usr.sbin/bhyve/pci_emul.c
+++ usr.sbin/bhyve/pci_emul.c
@@ -45,6 +45,7 @@
#include <stdbool.h>
#include <machine/vmm.h>
+#include <machine/vmm_snapshot.h>
#include <vmmapi.h>
#include "acpi.h"
@@ -1962,6 +1963,191 @@
INOUT_PORT(pci_cfgdata, CONF1_DATA_PORT+2, IOPORT_F_INOUT, pci_emul_cfgdata);
INOUT_PORT(pci_cfgdata, CONF1_DATA_PORT+3, IOPORT_F_INOUT, pci_emul_cfgdata);
+#ifdef BHYVE_SNAPSHOT
+/*
+ * Saves/restores PCI device emulated state. Returns 0 on success.
+ */
+static int
+pci_snapshot_pci_dev(struct vm_snapshot_meta *meta)
+{
+ struct pci_devinst *pi;
+ int i;
+ int ret;
+
+ pi = meta->dev_data;
+
+ SNAPSHOT_VAR_OR_LEAVE(pi->pi_msi.enabled, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(pi->pi_msi.addr, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(pi->pi_msi.msg_data, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(pi->pi_msi.maxmsgnum, meta, ret, done);
+
+ SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.enabled, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.table_bar, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.pba_bar, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.table_offset, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.table_count, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.pba_offset, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.pba_size, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.function_mask, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.pba_page_offset, meta, ret, done);
+
+ SNAPSHOT_BUF_OR_LEAVE(pi->pi_cfgdata, sizeof(pi->pi_cfgdata),
+ meta, ret, done);
+
+ for (i = 0; i < nitems(pi->pi_bar); i++) {
+ SNAPSHOT_VAR_OR_LEAVE(pi->pi_bar[i].type, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(pi->pi_bar[i].size, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(pi->pi_bar[i].addr, meta, ret, done);
+ }
+
+ /* Restore MSI-X table. */
+ for (i = 0; i < pi->pi_msix.table_count; i++) {
+ SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.table[i].addr,
+ meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.table[i].msg_data,
+ meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(pi->pi_msix.table[i].vector_control,
+ meta, ret, done);
+ }
+
+done:
+ return (ret);
+}
+
+static int
+pci_find_slotted_dev(const char *dev_name, struct pci_devemu **pde,
+ struct pci_devinst **pdi)
+{
+ struct businfo *bi;
+ struct slotinfo *si;
+ struct funcinfo *fi;
+ int bus, slot, func;
+
+ assert(dev_name != NULL);
+ assert(pde != NULL);
+ assert(pdi != NULL);
+
+ for (bus = 0; bus < MAXBUSES; bus++) {
+ if ((bi = pci_businfo[bus]) == NULL)
+ continue;
+
+ for (slot = 0; slot < MAXSLOTS; slot++) {
+ si = &bi->slotinfo[slot];
+ for (func = 0; func < MAXFUNCS; func++) {
+ fi = &si->si_funcs[func];
+ if (fi->fi_name == NULL)
+ continue;
+ if (strcmp(dev_name, fi->fi_name))
+ continue;
+
+ *pde = pci_emul_finddev(fi->fi_name);
+ assert(*pde != NULL);
+
+ *pdi = fi->fi_devi;
+ return (0);
+ }
+ }
+ }
+
+ return (EINVAL);
+}
+
+int
+pci_snapshot(struct vm_snapshot_meta *meta)
+{
+ struct pci_devemu *pde;
+ struct pci_devinst *pdi;
+ int ret;
+
+ assert(meta->dev_name != NULL);
+
+ ret = pci_find_slotted_dev(meta->dev_name, &pde, &pdi);
+ if (ret != 0) {
+ fprintf(stderr, "%s: no such name: %s\r\n",
+ __func__, meta->dev_name);
+ memset(meta->buffer.buf_start, 0, meta->buffer.buf_size);
+ return (0);
+ }
+
+ meta->dev_data = pdi;
+
+ if (pde->pe_snapshot == NULL) {
+ fprintf(stderr, "%s: not implemented yet for: %s\r\n",
+ __func__, meta->dev_name);
+ return (-1);
+ }
+
+ ret = pci_snapshot_pci_dev(meta);
+ if (ret != 0) {
+ fprintf(stderr, "%s: failed to snapshot pci dev\r\n",
+ __func__);
+ return (-1);
+ }
+
+ ret = (*pde->pe_snapshot)(meta);
+
+ return (ret);
+}
+
+int
+pci_pause(struct vmctx *ctx, const char *dev_name)
+{
+ struct pci_devemu *pde;
+ struct pci_devinst *pdi;
+ int ret;
+
+ assert(dev_name != NULL);
+
+ ret = pci_find_slotted_dev(dev_name, &pde, &pdi);
+ if (ret != 0) {
+ /*
+ * It is possible to call this function without
+ * checking that the device is inserted first.
+ */
+ fprintf(stderr, "%s: no such name: %s\n", __func__, dev_name);
+ return (0);
+ }
+
+ if (pde->pe_pause == NULL) {
+ /* The pause/resume functionality is optional. */
+ fprintf(stderr, "%s: not implemented for: %s\n",
+ __func__, dev_name);
+ return (0);
+ }
+
+ return (*pde->pe_pause)(ctx, pdi);
+}
+
+int
+pci_resume(struct vmctx *ctx, const char *dev_name)
+{
+ struct pci_devemu *pde;
+ struct pci_devinst *pdi;
+ int ret;
+
+ assert(dev_name != NULL);
+
+ ret = pci_find_slotted_dev(dev_name, &pde, &pdi);
+ if (ret != 0) {
+ /*
+ * It is possible to call this function without
+ * checking that the device is inserted first.
+ */
+ fprintf(stderr, "%s: no such name: %s\n", __func__, dev_name);
+ return (0);
+ }
+
+ if (pde->pe_resume == NULL) {
+ /* The pause/resume functionality is optional. */
+ fprintf(stderr, "%s: not implemented for: %s\n",
+ __func__, dev_name);
+ return (0);
+ }
+
+ return (*pde->pe_resume)(ctx, pdi);
+}
+#endif
+
#define PCI_EMUL_TEST
#ifdef PCI_EMUL_TEST
/*
@@ -1970,7 +2156,7 @@
#define DIOSZ 8
#define DMEMSZ 4096
struct pci_emul_dsoftc {
- uint8_t ioregs[DIOSZ];
+ uint8_t ioregs[DIOSZ];
uint8_t memregs[2][DMEMSZ];
};
@@ -2062,7 +2248,7 @@
} else {
printf("diow: memw unknown size %d\n", size);
}
-
+
/*
* magic interrupt ??
*/
@@ -2087,7 +2273,7 @@
offset, size);
return (0);
}
-
+
value = 0;
if (size == 1) {
value = sc->ioregs[offset];
@@ -2106,7 +2292,7 @@
offset, size);
return (0);
}
-
+
i = baridx - 1; /* 'memregs' index */
if (size == 1) {
@@ -2131,11 +2317,23 @@
return (value);
}
+#ifdef BHYVE_SNAPSHOT
+int
+pci_emul_snapshot(struct vm_snapshot_meta *meta)
+{
+
+ return (0);
+}
+#endif
+
struct pci_devemu pci_dummy = {
.pe_emu = "dummy",
.pe_init = pci_emul_dinit,
.pe_barwrite = pci_emul_diow,
- .pe_barread = pci_emul_dior
+ .pe_barread = pci_emul_dior,
+#ifdef BHYVE_SNAPSHOT
+ .pe_snapshot = pci_emul_snapshot,
+#endif
};
PCI_EMUL_SET(pci_dummy);
Index: usr.sbin/bhyve/pci_fbuf.c
===================================================================
--- usr.sbin/bhyve/pci_fbuf.c
+++ usr.sbin/bhyve/pci_fbuf.c
@@ -35,6 +35,7 @@
#include <sys/mman.h>
#include <machine/vmm.h>
+#include <machine/vmm_snapshot.h>
#include <vmmapi.h>
#include <stdio.h>
@@ -439,10 +440,26 @@
return (error);
}
+#ifdef BHYVE_SNAPSHOT
+static int
+pci_fbuf_snapshot(struct vm_snapshot_meta *meta)
+{
+ int ret;
+
+ SNAPSHOT_BUF_OR_LEAVE(fbuf_sc->fb_base, FB_SIZE, meta, ret, err);
+
+err:
+ return (ret);
+}
+#endif
+
struct pci_devemu pci_fbuf = {
.pe_emu = "fbuf",
.pe_init = pci_fbuf_init,
.pe_barwrite = pci_fbuf_write,
- .pe_barread = pci_fbuf_read
+ .pe_barread = pci_fbuf_read,
+#ifdef BHYVE_SNAPSHOT
+ .pe_snapshot = pci_fbuf_snapshot,
+#endif
};
PCI_EMUL_SET(pci_fbuf);
Index: usr.sbin/bhyve/pci_lpc.c
===================================================================
--- usr.sbin/bhyve/pci_lpc.c
+++ usr.sbin/bhyve/pci_lpc.c
@@ -34,6 +34,7 @@
#include <sys/types.h>
#include <machine/vmm.h>
+#include <machine/vmm_snapshot.h>
#include <stdio.h>
#include <stdlib.h>
@@ -451,12 +452,35 @@
pci_set_cfgdata8(lpc_bridge, 0x68 + pin, pirq_read(pin + 5));
}
+#ifdef BHYVE_SNAPSHOT
+static int
+pci_lpc_snapshot(struct vm_snapshot_meta *meta)
+{
+ int unit, ret;
+ struct uart_softc *sc;
+
+ for (unit = 0; unit < LPC_UART_NUM; unit++) {
+ sc = lpc_uart_softc[unit].uart_softc;
+
+ ret = uart_snapshot(sc, meta);
+ if (ret != 0)
+ goto done;
+ }
+
+done:
+ return (ret);
+}
+#endif
+
struct pci_devemu pci_de_lpc = {
.pe_emu = "lpc",
.pe_init = pci_lpc_init,
.pe_write_dsdt = pci_lpc_write_dsdt,
.pe_cfgwrite = pci_lpc_cfgwrite,
.pe_barwrite = pci_lpc_write,
- .pe_barread = pci_lpc_read
+ .pe_barread = pci_lpc_read,
+#ifdef BHYVE_SNAPSHOT
+ .pe_snapshot = pci_lpc_snapshot,
+#endif
};
PCI_EMUL_SET(pci_de_lpc);
Index: usr.sbin/bhyve/pci_virtio_block.c
===================================================================
--- usr.sbin/bhyve/pci_virtio_block.c
+++ usr.sbin/bhyve/pci_virtio_block.c
@@ -39,6 +39,8 @@
#include <sys/ioctl.h>
#include <sys/disk.h>
+#include <machine/vmm_snapshot.h>
+
#include <errno.h>
#include <fcntl.h>
#include <stdio.h>
@@ -150,6 +152,11 @@
static void pci_vtblk_notify(void *, struct vqueue_info *);
static int pci_vtblk_cfgread(void *, int, int, uint32_t *);
static int pci_vtblk_cfgwrite(void *, int, int, uint32_t);
+#ifdef BHYVE_SNAPSHOT
+static void pci_vtblk_pause(void *);
+static void pci_vtblk_resume(void *);
+static int pci_vtblk_snapshot(void *, struct vm_snapshot_meta *);
+#endif
static struct virtio_consts vtblk_vi_consts = {
"vtblk", /* our name */
@@ -161,6 +168,11 @@
pci_vtblk_cfgwrite, /* write PCI config */
NULL, /* apply negotiated features */
VTBLK_S_HOSTCAPS, /* our capabilities */
+#ifdef BHYVE_SNAPSHOT
+ pci_vtblk_pause, /* pause blockif threads */
+ pci_vtblk_resume, /* resume blockif threads */
+ pci_vtblk_snapshot, /* save / restore device state */
+#endif
};
static void
@@ -172,6 +184,40 @@
vi_reset_dev(&sc->vbsc_vs);
}
+#ifdef BHYVE_SNAPSHOT
+static void
+pci_vtblk_pause(void *vsc)
+{
+ struct pci_vtblk_softc *sc = vsc;
+
+ DPRINTF(("vtblk: device pause requested !\n"));
+ blockif_pause(sc->bc);
+}
+
+static void
+pci_vtblk_resume(void *vsc)
+{
+ struct pci_vtblk_softc *sc = vsc;
+
+ DPRINTF(("vtblk: device resume requested !\n"));
+ blockif_resume(sc->bc);
+}
+
+static int
+pci_vtblk_snapshot(void *vsc, struct vm_snapshot_meta *meta)
+{
+ int ret;
+ struct pci_vtblk_softc *sc = vsc;
+
+ SNAPSHOT_VAR_OR_LEAVE(sc->vbsc_cfg, meta, ret, done);
+ SNAPSHOT_BUF_OR_LEAVE(sc->vbsc_ident, sizeof(sc->vbsc_ident),
+ meta, ret, done);
+
+done:
+ return (ret);
+}
+#endif
+
static void
pci_vtblk_done(struct blockif_req *br, int err)
{
@@ -419,6 +465,9 @@
.pe_emu = "virtio-blk",
.pe_init = pci_vtblk_init,
.pe_barwrite = vi_pci_write,
- .pe_barread = vi_pci_read
+ .pe_barread = vi_pci_read,
+#ifdef BHYVE_SNAPSHOT
+ .pe_snapshot = vi_pci_snapshot,
+#endif
};
PCI_EMUL_SET(pci_de_vblk);
Index: usr.sbin/bhyve/pci_virtio_net.c
===================================================================
--- usr.sbin/bhyve/pci_virtio_net.c
+++ usr.sbin/bhyve/pci_virtio_net.c
@@ -36,6 +36,7 @@
#include <sys/select.h>
#include <sys/uio.h>
#include <sys/ioctl.h>
+#include <machine/vmm_snapshot.h>
#include <net/ethernet.h>
#include <net/if.h> /* IFNAMSIZ */
@@ -124,6 +125,11 @@
static int pci_vtnet_cfgread(void *, int, int, uint32_t *);
static int pci_vtnet_cfgwrite(void *, int, int, uint32_t);
static void pci_vtnet_neg_features(void *, uint64_t);
+#ifdef BHYVE_SNAPSHOT
+static void pci_vtnet_pause(void *);
+static void pci_vtnet_resume(void *);
+static int pci_vtnet_snapshot(void *, struct vm_snapshot_meta *);
+#endif
static struct virtio_consts vtnet_vi_consts = {
"vtnet", /* our name */
@@ -135,6 +141,11 @@
pci_vtnet_cfgwrite, /* write PCI config */
pci_vtnet_neg_features, /* apply negotiated features */
VTNET_S_HOSTCAPS, /* our capabilities */
+#ifdef BHYVE_SNAPSHOT
+ pci_vtnet_pause, /* pause rx/tx threads */
+ pci_vtnet_resume, /* resume rx/tx threads */
+ pci_vtnet_snapshot, /* save / restore device state */
+#endif
};
static void
@@ -172,6 +183,68 @@
pthread_mutex_unlock(&sc->rx_mtx);
}
+#ifdef BHYVE_SNAPSHOT
+static void
+pci_vtnet_pause(void *vsc)
+{
+ struct pci_vtnet_softc *sc = vsc;
+
+ DPRINTF(("vtnet: device pause requested !\n"));
+
+ /* Acquire the RX lock to block RX processing. */
+ pthread_mutex_lock(&sc->rx_mtx);
+
+ /* Wait for the transmit thread to finish its processing. */
+ pthread_mutex_lock(&sc->tx_mtx);
+ while (sc->tx_in_progress) {
+ pthread_mutex_unlock(&sc->tx_mtx);
+ usleep(10000);
+ pthread_mutex_lock(&sc->tx_mtx);
+ }
+}
+
+static void
+pci_vtnet_resume(void *vsc)
+{
+ struct pci_vtnet_softc *sc = vsc;
+
+ DPRINTF(("vtnet: device resume requested !\n"));
+
+ pthread_mutex_unlock(&sc->tx_mtx);
+ /* The RX lock should have been acquired in vtnet_pause. */
+ pthread_mutex_unlock(&sc->rx_mtx);
+}
+
+static int
+pci_vtnet_snapshot(void *vsc, struct vm_snapshot_meta *meta)
+{
+ int ret;
+ struct pci_vtnet_softc *sc = vsc;
+
+ DPRINTF(("vtnet: device snapshot requested !\n"));
+
+ /*
+ * Queues and consts should have been saved by the more generic
+ * vi_pci_snapshot function. We need to save only our features and
+ * config.
+ */
+
+ SNAPSHOT_VAR_OR_LEAVE(sc->vsc_features, meta, ret, done);
+
+ /* Force reapply negociated features at restore time */
+ if (meta->op == VM_SNAPSHOT_RESTORE)
+ pci_vtnet_neg_features(sc, sc->vsc_features);
+
+ SNAPSHOT_VAR_OR_LEAVE(sc->vsc_config, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->rx_vhdrlen, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->rx_merge, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->vsc_rx_ready, meta, ret, done);
+
+done:
+ return (ret);
+}
+#endif
+
static void
pci_vtnet_rx(struct pci_vtnet_softc *sc)
{
@@ -529,6 +602,11 @@
.pe_emu = "virtio-net",
.pe_init = pci_vtnet_init,
.pe_barwrite = vi_pci_write,
- .pe_barread = vi_pci_read
+ .pe_barread = vi_pci_read,
+#ifdef BHYVE_SNAPSHOT
+ .pe_snapshot = vi_pci_snapshot,
+ .pe_pause = vi_pci_pause,
+ .pe_resume = vi_pci_resume,
+#endif
};
PCI_EMUL_SET(pci_de_vnet);
Index: usr.sbin/bhyve/pci_xhci.c
===================================================================
--- usr.sbin/bhyve/pci_xhci.c
+++ usr.sbin/bhyve/pci_xhci.c
@@ -48,6 +48,8 @@
#include <pthread.h>
#include <unistd.h>
+#include <machine/vmm_snapshot.h>
+
#include <dev/usb/usbdi.h>
#include <dev/usb/usb.h>
#include <dev/usb/usb_freebsd.h>
@@ -150,6 +152,8 @@
#define FIELD_COPY(a,b,m,s) (((a) & ~((m) << (s))) | \
(((b) & ((m) << (s)))))
+#define SNAP_DEV_NAME_LEN 128
+
struct pci_xhci_trb_ring {
uint64_t ringaddr; /* current dequeue guest address */
uint32_t ccs; /* consumer cycle state */
@@ -285,9 +289,10 @@
#define XHCI_HALTED(sc) ((sc)->opregs.usbsts & XHCI_STS_HCH)
+#define XHCI_GADDR_SIZE(a) (XHCI_PADDR_SZ - \
+ (((uint64_t) (a)) & (XHCI_PADDR_SZ - 1)))
#define XHCI_GADDR(sc,a) paddr_guest2host((sc)->xsc_pi->pi_vmctx, \
- (a), \
- XHCI_PADDR_SZ - ((a) & (XHCI_PADDR_SZ-1)))
+ (a), XHCI_GADDR_SIZE(a))
static int xhci_in_use;
@@ -2853,12 +2858,265 @@
return (error);
}
+#ifdef BHYVE_SNAPSHOT
+static void
+pci_xhci_map_devs_slots(struct pci_xhci_softc *sc, int maps[])
+{
+ int i, j;
+ struct pci_xhci_dev_emu *dev, *slot;
+
+ memset(maps, 0, sizeof(maps[0]) * XHCI_MAX_SLOTS);
+
+ for (i = 1; i <= XHCI_MAX_SLOTS; i++) {
+ for (j = 1; j <= XHCI_MAX_DEVS; j++) {
+ slot = XHCI_SLOTDEV_PTR(sc, i);
+ dev = XHCI_DEVINST_PTR(sc, j);
+
+ if (slot == dev)
+ maps[i] = j;
+ }
+ }
+}
+static int
+pci_xhci_snapshot_ep(struct pci_xhci_softc *sc, struct pci_xhci_dev_emu *dev,
+ int idx, struct vm_snapshot_meta *meta)
+{
+ int k;
+ int ret;
+ struct usb_data_xfer *xfer;
+ struct usb_data_xfer_block *xfer_block;
+
+ /* some sanity checks */
+ if (meta->op == VM_SNAPSHOT_SAVE)
+ xfer = dev->eps[idx].ep_xfer;
+
+ SNAPSHOT_VAR_OR_LEAVE(xfer, meta, ret, done);
+ if (xfer == NULL) {
+ ret = 0;
+ goto done;
+ }
+
+ if (meta->op == VM_SNAPSHOT_RESTORE) {
+ pci_xhci_init_ep(dev, idx);
+ xfer = dev->eps[idx].ep_xfer;
+ }
+
+ /* save / restore proper */
+ for (k = 0; k < USB_MAX_XFER_BLOCKS; k++) {
+ xfer_block = &xfer->data[k];
+
+ SNAPSHOT_GUEST2HOST_ADDR_OR_LEAVE(xfer_block->buf,
+ XHCI_GADDR_SIZE(xfer_block->buf), true, meta, ret,
+ done);
+ SNAPSHOT_VAR_OR_LEAVE(xfer_block->blen, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(xfer_block->bdone, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(xfer_block->processed, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(xfer_block->hci_data, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(xfer_block->ccs, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(xfer_block->streamid, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(xfer_block->trbnext, meta, ret, done);
+ }
+
+ SNAPSHOT_VAR_OR_LEAVE(xfer->ureq, meta, ret, done);
+ if (xfer->ureq) {
+ /* xfer->ureq is not allocated at restore time */
+ if (meta->op == VM_SNAPSHOT_RESTORE)
+ xfer->ureq = malloc(sizeof(struct usb_device_request));
+
+ SNAPSHOT_BUF_OR_LEAVE(xfer->ureq,
+ sizeof(struct usb_device_request),
+ meta, ret, done);
+ }
+
+ SNAPSHOT_VAR_OR_LEAVE(xfer->ndata, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(xfer->head, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(xfer->tail, meta, ret, done);
+
+done:
+ return (ret);
+}
+
+static int
+pci_xhci_snapshot(struct vm_snapshot_meta *meta)
+{
+ int i, j;
+ int ret;
+ int restore_idx;
+ struct pci_devinst *pi;
+ struct pci_xhci_softc *sc;
+ struct pci_xhci_portregs *port;
+ struct pci_xhci_dev_emu *dev;
+ char dname[SNAP_DEV_NAME_LEN];
+ int maps[XHCI_MAX_SLOTS + 1];
+
+ pi = meta->dev_data;
+ sc = pi->pi_arg;
+
+ SNAPSHOT_VAR_OR_LEAVE(sc->caplength, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->hcsparams1, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->hcsparams2, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->hcsparams3, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->hccparams1, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->dboff, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->rtsoff, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->hccparams2, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->regsend, meta, ret, done);
+
+ /* opregs */
+ SNAPSHOT_VAR_OR_LEAVE(sc->opregs.usbcmd, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->opregs.usbsts, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->opregs.pgsz, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->opregs.dnctrl, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->opregs.crcr, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->opregs.dcbaap, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->opregs.config, meta, ret, done);
+
+ /* opregs.cr_p */
+ SNAPSHOT_GUEST2HOST_ADDR_OR_LEAVE(sc->opregs.cr_p,
+ XHCI_GADDR_SIZE(sc->opregs.cr_p), false, meta, ret, done);
+
+ /* opregs.dcbaa_p */
+ SNAPSHOT_GUEST2HOST_ADDR_OR_LEAVE(sc->opregs.dcbaa_p,
+ XHCI_GADDR_SIZE(sc->opregs.dcbaa_p), false, meta, ret, done);
+
+ /* rtsregs */
+ SNAPSHOT_VAR_OR_LEAVE(sc->rtsregs.mfindex, meta, ret, done);
+
+ /* rtsregs.intrreg */
+ SNAPSHOT_VAR_OR_LEAVE(sc->rtsregs.intrreg.iman, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->rtsregs.intrreg.imod, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->rtsregs.intrreg.erstsz, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->rtsregs.intrreg.rsvd, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->rtsregs.intrreg.erstba, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->rtsregs.intrreg.erdp, meta, ret, done);
+
+ /* rtsregs.erstba_p */
+ SNAPSHOT_GUEST2HOST_ADDR_OR_LEAVE(sc->rtsregs.erstba_p,
+ XHCI_GADDR_SIZE(sc->rtsregs.erstba_p), false, meta, ret, done);
+
+ /* rtsregs.erst_p */
+ SNAPSHOT_GUEST2HOST_ADDR_OR_LEAVE(sc->rtsregs.erst_p,
+ XHCI_GADDR_SIZE(sc->rtsregs.erst_p), false, meta, ret, done);
+
+ SNAPSHOT_VAR_OR_LEAVE(sc->rtsregs.er_deq_seg, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->rtsregs.er_enq_idx, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->rtsregs.er_enq_seg, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->rtsregs.er_events_cnt, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->rtsregs.event_pcs, meta, ret, done);
+
+ /* sanity checking */
+ for (i = 1; i <= XHCI_MAX_DEVS; i++) {
+ dev = XHCI_DEVINST_PTR(sc, i);
+ if (dev == NULL)
+ continue;
+
+ if (meta->op == VM_SNAPSHOT_SAVE)
+ restore_idx = i;
+ SNAPSHOT_VAR_OR_LEAVE(restore_idx, meta, ret, done);
+
+ /* check if the restored device (when restoring) is sane */
+ if (restore_idx != i) {
+ fprintf(stderr, "%s: idx not matching: actual: %d, "
+ "expected: %d\r\n", __func__, restore_idx, i);
+ ret = EINVAL;
+ goto done;
+ }
+
+ if (meta->op == VM_SNAPSHOT_SAVE) {
+ memset(dname, 0, sizeof(dname));
+ strncpy(dname, dev->dev_ue->ue_emu, sizeof(dname) - 1);
+ }
+
+ SNAPSHOT_BUF_OR_LEAVE(dname, sizeof(dname), meta, ret, done);
+
+ if (meta->op == VM_SNAPSHOT_RESTORE) {
+ dname[sizeof(dname) - 1] = '\0';
+ if (strcmp(dev->dev_ue->ue_emu, dname)) {
+ fprintf(stderr, "%s: device names mismatch: "
+ "actual: %s, expected: %s\r\n",
+ __func__, dname, dev->dev_ue->ue_emu);
+
+ ret = EINVAL;
+ goto done;
+ }
+ }
+ }
+
+ /* portregs */
+ for (i = 1; i <= XHCI_MAX_DEVS; i++) {
+ port = XHCI_PORTREG_PTR(sc, i);
+ dev = XHCI_DEVINST_PTR(sc, i);
+
+ if (dev == NULL)
+ continue;
+
+ SNAPSHOT_VAR_OR_LEAVE(port->portsc, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(port->portpmsc, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(port->portli, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(port->porthlpmc, meta, ret, done);
+ }
+
+ /* slots */
+ if (meta->op == VM_SNAPSHOT_SAVE)
+ pci_xhci_map_devs_slots(sc, maps);
+
+ for (i = 1; i <= XHCI_MAX_SLOTS; i++) {
+ SNAPSHOT_VAR_OR_LEAVE(maps[i], meta, ret, done);
+
+ if (meta->op == VM_SNAPSHOT_SAVE) {
+ dev = XHCI_SLOTDEV_PTR(sc, i);
+ } else if (meta->op == VM_SNAPSHOT_RESTORE) {
+ if (maps[i] != 0)
+ dev = XHCI_DEVINST_PTR(sc, maps[i]);
+ else
+ dev = NULL;
+
+ XHCI_SLOTDEV_PTR(sc, i) = dev;
+ } else {
+ /* error */
+ ret = EINVAL;
+ goto done;
+ }
+
+ if (dev == NULL)
+ continue;
+
+ SNAPSHOT_GUEST2HOST_ADDR_OR_LEAVE(dev->dev_ctx,
+ XHCI_GADDR_SIZE(dev->dev_ctx), false, meta, ret, done);
+
+ for (j = 1; j < XHCI_MAX_ENDPOINTS; j++) {
+ ret = pci_xhci_snapshot_ep(sc, dev, j, meta);
+ if (ret != 0)
+ goto done;
+ }
+
+ SNAPSHOT_VAR_OR_LEAVE(dev->dev_slotstate, meta, ret, done);
+
+ /* devices[i]->dev_sc */
+ dev->dev_ue->ue_snapshot(dev->dev_sc, meta);
+
+ /* devices[i]->hci */
+ SNAPSHOT_VAR_OR_LEAVE(dev->hci.hci_address, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(dev->hci.hci_port, meta, ret, done);
+ }
+
+ SNAPSHOT_VAR_OR_LEAVE(sc->ndevices, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->usb2_port_start, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->usb3_port_start, meta, ret, done);
+
+done:
+ return (ret);
+}
+#endif
struct pci_devemu pci_de_xhci = {
.pe_emu = "xhci",
.pe_init = pci_xhci_init,
.pe_barwrite = pci_xhci_write,
- .pe_barread = pci_xhci_read
+ .pe_barread = pci_xhci_read,
+#ifdef BHYVE_SNAPSHOT
+ .pe_snapshot = pci_xhci_snapshot,
+#endif
};
PCI_EMUL_SET(pci_de_xhci);
Index: usr.sbin/bhyve/ps2kbd.h
===================================================================
--- usr.sbin/bhyve/ps2kbd.h
+++ usr.sbin/bhyve/ps2kbd.h
@@ -32,10 +32,15 @@
#define _PS2KBD_H_
struct atkbdc_softc;
+struct vm_snapshot_meta;
struct ps2kbd_softc *ps2kbd_init(struct atkbdc_softc *sc);
int ps2kbd_read(struct ps2kbd_softc *sc, uint8_t *val);
void ps2kbd_write(struct ps2kbd_softc *sc, uint8_t val);
+#ifdef BHYVE_SNAPSHOT
+int ps2kbd_snapshot(struct ps2kbd_softc *sc, struct vm_snapshot_meta *meta);
+#endif
+
#endif /* _PS2KBD_H_ */
Index: usr.sbin/bhyve/ps2kbd.c
===================================================================
--- usr.sbin/bhyve/ps2kbd.c
+++ usr.sbin/bhyve/ps2kbd.c
@@ -32,10 +32,13 @@
#include <sys/types.h>
+#include <machine/vmm_snapshot.h>
+
#include <assert.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
+#include <string.h>
#include <strings.h>
#include <pthread.h>
#include <pthread_np.h>
@@ -381,3 +384,17 @@
return (sc);
}
+#ifdef BHYVE_SNAPSHOT
+int
+ps2kbd_snapshot(struct ps2kbd_softc *sc, struct vm_snapshot_meta *meta)
+{
+ int ret;
+
+ SNAPSHOT_VAR_OR_LEAVE(sc->enabled, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->curcmd, meta, ret, done);
+
+done:
+ return (ret);
+}
+#endif
+
Index: usr.sbin/bhyve/ps2mouse.h
===================================================================
--- usr.sbin/bhyve/ps2mouse.h
+++ usr.sbin/bhyve/ps2mouse.h
@@ -32,6 +32,7 @@
#define _PS2MOUSE_H_
struct atkbdc_softc;
+struct vm_snapshot_meta;
struct ps2mouse_softc *ps2mouse_init(struct atkbdc_softc *sc);
@@ -40,4 +41,8 @@
void ps2mouse_toggle(struct ps2mouse_softc *sc, int enable);
int ps2mouse_fifocnt(struct ps2mouse_softc *sc);
+#ifdef BHYVE_SNAPSHOT
+int ps2mouse_snapshot(struct ps2mouse_softc *sc, struct vm_snapshot_meta *meta);
+#endif
+
#endif /* _PS2MOUSE_H_ */
Index: usr.sbin/bhyve/ps2mouse.c
===================================================================
--- usr.sbin/bhyve/ps2mouse.c
+++ usr.sbin/bhyve/ps2mouse.c
@@ -32,10 +32,13 @@
#include <sys/types.h>
+#include <machine/vmm_snapshot.h>
+
#include <assert.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
+#include <string.h>
#include <strings.h>
#include <pthread.h>
#include <pthread_np.h>
@@ -415,4 +418,23 @@
return (sc);
}
-
+#ifdef BHYVE_SNAPSHOT
+int
+ps2mouse_snapshot(struct ps2mouse_softc *sc, struct vm_snapshot_meta *meta)
+{
+ int ret;
+
+ SNAPSHOT_VAR_OR_LEAVE(sc->status, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->resolution, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->sampling_rate, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->ctrlenable, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->curcmd, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->cur_x, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->cur_y, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->delta_x, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->delta_y, meta, ret, done);
+
+done:
+ return (ret);
+}
+#endif
Index: usr.sbin/bhyve/snapshot.h
===================================================================
--- /dev/null
+++ usr.sbin/bhyve/snapshot.h
@@ -0,0 +1,71 @@
+#ifndef _BHYVE_SNAPSHOT_
+#define _BHYVE_SNAPSHOT_
+
+#include <machine/vmm_snapshot.h>
+#include <libxo/xo.h>
+#include <ucl.h>
+
+struct vmctx;
+
+struct restore_state {
+ int kdata_fd;
+ int vmmem_fd;
+
+ void *kdata_map;
+ size_t kdata_len;
+
+ size_t vmmem_len;
+
+ struct ucl_parser *meta_parser;
+ ucl_object_t *meta_root_obj;
+};
+
+struct checkpoint_thread_info {
+ struct vmctx *ctx;
+ int socket_fd;
+ struct sockaddr_un *addr;
+} checkpoint_info;
+
+typedef int (*vm_snapshot_dev_cb)(struct vm_snapshot_meta *);
+typedef int (*vm_pause_dev_cb) (struct vmctx *, const char *);
+typedef int (*vm_resume_dev_cb) (struct vmctx *, const char *);
+
+struct vm_snapshot_dev_info {
+ const char *dev_name; /* device name */
+ vm_snapshot_dev_cb snapshot_cb; /* callback for device snapshot */
+ vm_pause_dev_cb pause_cb; /* callback for device pause */
+ vm_resume_dev_cb resume_cb; /* callback for device resume */
+};
+
+struct vm_snapshot_kern_info {
+ const char *struct_name; /* kernel structure name*/
+ enum snapshot_req req; /* request type */
+};
+
+
+void destroy_restore_state(struct restore_state *rstate);
+
+const char *lookup_vmname(struct restore_state *rstate);
+int lookup_memflags(struct restore_state *rstate);
+size_t lookup_memsize(struct restore_state *rstate);
+int lookup_guest_ncpus(struct restore_state *rstate);
+
+void checkpoint_cpu_add(int vcpu);
+void checkpoint_cpu_resume(int vcpu);
+void checkpoint_cpu_suspend(int vcpu);
+
+int restore_vm_mem(struct vmctx *ctx, struct restore_state *rstate);
+int vm_restore_kern_structs(struct vmctx *ctx, struct restore_state *rstate);
+
+int vm_restore_user_devs(struct vmctx *ctx, struct restore_state *rstate);
+int vm_pause_user_devs(struct vmctx *ctx);
+int vm_resume_user_devs(struct vmctx *ctx);
+
+int get_checkpoint_msg(int conn_fd, struct vmctx *ctx);
+void *checkpoint_thread(void *param);
+int init_checkpoint_thread(struct vmctx *ctx);
+
+
+int load_restore_file(const char *filename, struct restore_state *rstate);
+
+#endif
Index: usr.sbin/bhyve/snapshot.c
===================================================================
--- /dev/null
+++ usr.sbin/bhyve/snapshot.c
@@ -0,0 +1,1498 @@
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/types.h>
+#ifndef WITHOUT_CAPSICUM
+#include <sys/capsicum.h>
+#endif
+#include <sys/mman.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/un.h>
+
+#include <machine/atomic.h>
+#include <machine/segments.h>
+
+#ifndef WITHOUT_CAPSICUM
+#include <capsicum_helpers.h>
+#endif
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <err.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <libgen.h>
+#include <unistd.h>
+#include <assert.h>
+#include <errno.h>
+#include <pthread.h>
+#include <pthread_np.h>
+#include <sysexits.h>
+#include <stdbool.h>
+
+#include <machine/vmm.h>
+#ifndef WITHOUT_CAPSICUM
+#include <machine/vmm_dev.h>
+#endif
+#include <machine/vmm_snapshot.h>
+#include <vmmapi.h>
+
+#include "bhyverun.h"
+#include "acpi.h"
+#include "atkbdc.h"
+#include "inout.h"
+#include "dbgport.h"
+#include "fwctl.h"
+#include "ioapic.h"
+#include "mem.h"
+#include "mevent.h"
+#include "mptbl.h"
+#include "pci_emul.h"
+#include "pci_irq.h"
+#include "pci_lpc.h"
+#include "smbiostbl.h"
+#include "snapshot.h"
+#include "xmsr.h"
+#include "spinup_ap.h"
+#include "rtc.h"
+
+#include <libxo/xo.h>
+#include <ucl.h>
+
+extern int guest_ncpus;
+
+#define MB (1024UL * 1024)
+#define GB (1024UL * MB)
+
+#define BHYVE_RUN_DIR "/var/run/bhyve"
+#define CHECKPOINT_RUN_DIR BHYVE_RUN_DIR "/checkpoint"
+#define MAX_VMNAME 100
+
+#define MAX_MSG_SIZE 1024
+
+#define SNAPSHOT_BUFFER_SIZE (20 * MB)
+
+#define JSON_STRUCT_ARR_KEY "structs"
+#define JSON_DEV_ARR_KEY "devices"
+#define JSON_BASIC_METADATA_KEY "basic metadata"
+#define JSON_SNAPSHOT_REQ_KEY "snapshot_req"
+#define JSON_SIZE_KEY "size"
+#define JSON_FILE_OFFSET_KEY "file_offset"
+
+#define JSON_NCPUS_KEY "ncpus"
+#define JSON_VMNAME_KEY "vmname"
+#define JSON_MEMSIZE_KEY "memsize"
+#define JSON_MEMFLAGS_KEY "memflags"
+
+const struct vm_snapshot_dev_info snapshot_devs[] = {
+ { "atkbdc", atkbdc_snapshot, NULL, NULL },
+ { "virtio-net", pci_snapshot, pci_pause, pci_resume },
+ { "virtio-blk", pci_snapshot, pci_pause, pci_resume },
+ { "lpc", pci_snapshot, NULL, NULL },
+ { "fbuf", pci_snapshot, NULL, NULL },
+ { "xhci", pci_snapshot, NULL, NULL },
+ { "e1000", pci_snapshot, NULL, NULL },
+ { "ahci", pci_snapshot, pci_pause, pci_resume },
+ { "ahci-hd", pci_snapshot, pci_pause, pci_resume },
+ { "ahci-cd", pci_snapshot, NULL, NULL },
+};
+
+const struct vm_snapshot_kern_info snapshot_kern_structs[] = {
+ { "vhpet", STRUCT_VHPET },
+ { "vm", STRUCT_VM },
+ { "vmx", STRUCT_VMX },
+ { "vioapic", STRUCT_VIOAPIC },
+ { "vlapic", STRUCT_VLAPIC },
+ { "vmcx", STRUCT_VMCX },
+ { "vatpit", STRUCT_VATPIT },
+ { "vatpic", STRUCT_VATPIC },
+ { "vpmtmr", STRUCT_VPMTMR },
+ { "vrtc", STRUCT_VRTC },
+};
+
+static cpuset_t vcpus_active, vcpus_suspended;
+static pthread_mutex_t vcpu_lock;
+static pthread_cond_t vcpus_idle, vcpus_can_run;
+static bool checkpoint_active;
+
+/*
+ * TODO: Harden this function and all of its callers since 'base_str' is a user
+ * provided string.
+ */
+static char *
+strcat_extension(const char *base_str, const char *ext)
+{
+ char *res;
+ size_t base_len, ext_len;
+
+ base_len = strnlen(base_str, MAX_VMNAME);
+ ext_len = strnlen(ext, MAX_VMNAME);
+
+ if (base_len + ext_len > MAX_VMNAME) {
+ fprintf(stderr, "Filename exceeds maximum length.\n");
+ return (NULL);
+ }
+
+ res = malloc(base_len + ext_len + 1);
+ if (res == NULL) {
+ perror("Failed to allocate memory.");
+ return (NULL);
+ }
+
+ memcpy(res, base_str, base_len);
+ memcpy(res + base_len, ext, ext_len);
+ res[base_len + ext_len] = 0;
+
+ return (res);
+}
+
+void
+destroy_restore_state(struct restore_state *rstate)
+{
+ if (rstate == NULL) {
+ fprintf(stderr, "Attempting to destroy NULL restore struct.\n");
+ return;
+ }
+
+ if (rstate->kdata_map != MAP_FAILED)
+ munmap(rstate->kdata_map, rstate->kdata_len);
+
+ if (rstate->kdata_fd > 0)
+ close(rstate->kdata_fd);
+ if (rstate->vmmem_fd > 0)
+ close(rstate->vmmem_fd);
+
+ if (rstate->meta_root_obj != NULL)
+ ucl_object_unref(rstate->meta_root_obj);
+ if (rstate->meta_parser != NULL)
+ ucl_parser_free(rstate->meta_parser);
+}
+
+static int
+load_vmmem_file(const char *filename, struct restore_state *rstate)
+{
+ struct stat sb;
+ int err;
+
+ rstate->vmmem_fd = open(filename, O_RDONLY);
+ if (rstate->vmmem_fd < 0) {
+ perror("Failed to open restore file");
+ return (-1);
+ }
+
+ err = fstat(rstate->vmmem_fd, &sb);
+ if (err < 0) {
+ perror("Failed to stat restore file");
+ goto err_load_vmmem;
+ }
+
+ if (sb.st_size == 0) {
+ fprintf(stderr, "Restore file is empty.\n");
+ goto err_load_vmmem;
+ }
+
+ rstate->vmmem_len = sb.st_size;
+
+ return (0);
+
+err_load_vmmem:
+ if (rstate->vmmem_fd > 0)
+ close(rstate->vmmem_fd);
+ return (-1);
+}
+
+static int
+load_kdata_file(const char *filename, struct restore_state *rstate)
+{
+ struct stat sb;
+ int err;
+
+ rstate->kdata_fd = open(filename, O_RDONLY);
+ if (rstate->kdata_fd < 0) {
+ perror("Failed to open kernel data file");
+ return (-1);
+ }
+
+ err = fstat(rstate->kdata_fd, &sb);
+ if (err < 0) {
+ perror("Failed to stat kernel data file");
+ goto err_load_kdata;
+ }
+
+ if (sb.st_size == 0) {
+ fprintf(stderr, "Kernel data file is empty.\n");
+ goto err_load_kdata;
+ }
+
+ rstate->kdata_len = sb.st_size;
+ rstate->kdata_map = mmap(NULL, rstate->kdata_len, PROT_READ,
+ MAP_SHARED, rstate->kdata_fd, 0);
+ if (rstate->kdata_map == MAP_FAILED) {
+ perror("Failed to map restore file");
+ goto err_load_kdata;
+ }
+
+ return (0);
+
+err_load_kdata:
+ if (rstate->kdata_fd > 0)
+ close(rstate->kdata_fd);
+ return (-1);
+}
+
+static int
+load_metadata_file(const char *filename, struct restore_state *rstate)
+{
+ const ucl_object_t *obj;
+ struct ucl_parser *parser;
+ int err;
+
+ parser = ucl_parser_new(UCL_PARSER_DEFAULT);
+ if (parser == NULL) {
+ fprintf(stderr, "Failed to initialize UCL parser.\n");
+ goto err_load_metadata;
+ }
+
+ err = ucl_parser_add_file(parser, filename);
+ if (err == 0) {
+ fprintf(stderr, "Failed to parse metadata file: '%s'\n",
+ filename);
+ err = -1;
+ goto err_load_metadata;
+ }
+
+ obj = ucl_parser_get_object(parser);
+ if (obj == NULL) {
+ fprintf(stderr, "Failed to parse object.\n");
+ err = -1;
+ goto err_load_metadata;
+ }
+
+ rstate->meta_parser = parser;
+ rstate->meta_root_obj = (ucl_object_t *)obj;
+
+ return (0);
+
+err_load_metadata:
+ if (parser != NULL)
+ ucl_parser_free(parser);
+ return (err);
+}
+
+int
+load_restore_file(const char *filename, struct restore_state *rstate)
+{
+ int err = 0;
+ char *kdata_filename = NULL, *meta_filename = NULL;
+
+ assert(filename != NULL);
+ assert(rstate != NULL);
+
+ memset(rstate, 0, sizeof(*rstate));
+ rstate->kdata_map = MAP_FAILED;
+
+ err = load_vmmem_file(filename, rstate);
+ if (err != 0) {
+ fprintf(stderr, "Failed to load guest RAM file.\n");
+ goto err_restore;
+ }
+
+ kdata_filename = strcat_extension(filename, ".kern");
+ if (kdata_filename == NULL) {
+ fprintf(stderr, "Failed to construct kernel data filename.\n");
+ goto err_restore;
+ }
+
+ err = load_kdata_file(kdata_filename, rstate);
+ if (err != 0) {
+ fprintf(stderr, "Failed to load guest kernel data file.\n");
+ goto err_restore;
+ }
+
+ meta_filename = strcat_extension(filename, ".meta");
+ if (meta_filename == NULL) {
+ fprintf(stderr, "Failed to construct kernel metadata filename.\n");
+ goto err_restore;
+ }
+
+ err = load_metadata_file(meta_filename, rstate);
+ if (err != 0) {
+ fprintf(stderr, "Failed to load guest metadata file.\n");
+ goto err_restore;
+ }
+
+ return (0);
+
+err_restore:
+ destroy_restore_state(rstate);
+ if (kdata_filename != NULL)
+ free(kdata_filename);
+ if (meta_filename != NULL)
+ free(meta_filename);
+ return (-1);
+}
+
+#define JSON_GET_INT_OR_RETURN(key, obj, result_ptr, ret) \
+do { \
+ const ucl_object_t *obj__; \
+ obj__ = ucl_object_lookup(obj, key); \
+ if (obj__ == NULL) { \
+ fprintf(stderr, "Missing key: '%s'", key); \
+ return (ret); \
+ } \
+ if (!ucl_object_toint_safe(obj__, result_ptr)) { \
+ fprintf(stderr, "Cannot convert '%s' value to int.", key); \
+ return (ret); \
+ } \
+} while(0)
+
+#define JSON_GET_STRING_OR_RETURN(key, obj, result_ptr, ret) \
+do { \
+ const ucl_object_t *obj__; \
+ obj__ = ucl_object_lookup(obj, key); \
+ if (obj__ == NULL) { \
+ fprintf(stderr, "Missing key: '%s'", key); \
+ return (ret); \
+ } \
+ if (!ucl_object_tostring_safe(obj__, result_ptr)) { \
+ fprintf(stderr, "Cannot convert '%s' value to string.", key); \
+ return (ret); \
+ } \
+} while(0)
+
+static void *
+lookup_struct(enum snapshot_req struct_id, struct restore_state *rstate,
+ size_t *struct_size)
+{
+ const ucl_object_t *structs = NULL, *obj = NULL;
+ ucl_object_iter_t it = NULL;
+ int64_t snapshot_req, size, file_offset;
+
+ structs = ucl_object_lookup(rstate->meta_root_obj, JSON_STRUCT_ARR_KEY);
+ if (structs == NULL) {
+ fprintf(stderr, "Failed to find '%s' object.\n",
+ JSON_STRUCT_ARR_KEY);
+ return (NULL);
+ }
+
+ if (ucl_object_type((ucl_object_t *)structs) != UCL_ARRAY) {
+ fprintf(stderr, "Object '%s' is not an array.\n",
+ JSON_STRUCT_ARR_KEY);
+ return (NULL);
+ }
+
+ while ((obj = ucl_object_iterate(structs, &it, true)) != NULL) {
+ snapshot_req = -1;
+ JSON_GET_INT_OR_RETURN(JSON_SNAPSHOT_REQ_KEY, obj,
+ &snapshot_req, NULL);
+ assert(snapshot_req >= 0);
+ if ((enum snapshot_req) snapshot_req == struct_id) {
+ JSON_GET_INT_OR_RETURN(JSON_SIZE_KEY, obj,
+ &size, NULL);
+ assert(size >= 0);
+
+ JSON_GET_INT_OR_RETURN(JSON_FILE_OFFSET_KEY, obj,
+ &file_offset, NULL);
+ assert(file_offset >= 0);
+ assert(file_offset + size <= rstate->kdata_len);
+
+ *struct_size = (size_t)size;
+ return (rstate->kdata_map + file_offset);
+ }
+ }
+
+ return (NULL);
+}
+
+static void *
+lookup_check_dev(const char *dev_name, struct restore_state *rstate,
+ const ucl_object_t *obj, size_t *data_size)
+{
+ const char *snapshot_req;
+ int64_t size, file_offset;
+
+ snapshot_req = NULL;
+ JSON_GET_STRING_OR_RETURN(JSON_SNAPSHOT_REQ_KEY, obj,
+ &snapshot_req, NULL);
+ assert(snapshot_req != NULL);
+ if (!strcmp(snapshot_req, dev_name)) {
+ JSON_GET_INT_OR_RETURN(JSON_SIZE_KEY, obj,
+ &size, NULL);
+ assert(size >= 0);
+
+ JSON_GET_INT_OR_RETURN(JSON_FILE_OFFSET_KEY, obj,
+ &file_offset, NULL);
+ assert(file_offset >= 0);
+ assert(file_offset + size <= rstate->kdata_len);
+
+ *data_size = (size_t)size;
+ return (rstate->kdata_map + file_offset);
+ }
+
+ return (NULL);
+}
+
+static void*
+lookup_dev(const char *dev_name, struct restore_state *rstate,
+ size_t *data_size)
+{
+ const ucl_object_t *devs = NULL, *obj = NULL;
+ ucl_object_iter_t it = NULL;
+ void *ret;
+
+ devs = ucl_object_lookup(rstate->meta_root_obj, JSON_DEV_ARR_KEY);
+ if (devs == NULL) {
+ fprintf(stderr, "Failed to find '%s' object.\n",
+ JSON_DEV_ARR_KEY);
+ return (NULL);
+ }
+
+ if (ucl_object_type((ucl_object_t *)devs) != UCL_ARRAY) {
+ fprintf(stderr, "Object '%s' is not an array.\n",
+ JSON_DEV_ARR_KEY);
+ return (NULL);
+ }
+
+ while ((obj = ucl_object_iterate(devs, &it, true)) != NULL) {
+ ret = lookup_check_dev(dev_name, rstate, obj, data_size);
+ if (ret != NULL)
+ return (ret);
+ }
+
+ return (NULL);
+}
+
+static const ucl_object_t *
+lookup_basic_metadata_object(struct restore_state *rstate)
+{
+ const ucl_object_t *basic_meta_obj = NULL;
+
+ basic_meta_obj = ucl_object_lookup(rstate->meta_root_obj,
+ JSON_BASIC_METADATA_KEY);
+ if (basic_meta_obj == NULL) {
+ fprintf(stderr, "Failed to find '%s' object.\n",
+ JSON_BASIC_METADATA_KEY);
+ return (NULL);
+ }
+
+ if (ucl_object_type((ucl_object_t *)basic_meta_obj) != UCL_OBJECT) {
+ fprintf(stderr, "Object '%s' is not a JSON object.\n",
+ JSON_BASIC_METADATA_KEY);
+ return (NULL);
+ }
+
+ return (basic_meta_obj);
+}
+
+const char *
+lookup_vmname(struct restore_state *rstate)
+{
+ const char *vmname;
+ const ucl_object_t *obj;
+
+ obj = lookup_basic_metadata_object(rstate);
+ if (obj == NULL)
+ return (NULL);
+
+ JSON_GET_STRING_OR_RETURN(JSON_VMNAME_KEY, obj, &vmname, NULL);
+ return (vmname);
+}
+
+int
+lookup_memflags(struct restore_state *rstate)
+{
+ int64_t memflags;
+ const ucl_object_t *obj;
+
+ obj = lookup_basic_metadata_object(rstate);
+ if (obj == NULL)
+ return (0);
+
+ JSON_GET_INT_OR_RETURN(JSON_MEMFLAGS_KEY, obj, &memflags, 0);
+
+ return ((int)memflags);
+}
+
+size_t
+lookup_memsize(struct restore_state *rstate)
+{
+ int64_t memsize;
+ const ucl_object_t *obj;
+
+ obj = lookup_basic_metadata_object(rstate);
+ if (obj == NULL)
+ return (0);
+
+ JSON_GET_INT_OR_RETURN(JSON_MEMSIZE_KEY, obj, &memsize, 0);
+ if (memsize < 0)
+ memsize = 0;
+
+ return ((size_t)memsize);
+}
+
+
+int
+lookup_guest_ncpus(struct restore_state *rstate)
+{
+ int64_t ncpus;
+ const ucl_object_t *obj;
+
+ obj = lookup_basic_metadata_object(rstate);
+ if (obj == NULL)
+ return (0);
+
+ JSON_GET_INT_OR_RETURN(JSON_NCPUS_KEY, obj, &ncpus, 0);
+ return ((int)ncpus);
+}
+
+int
+restore_vm_mem(struct vmctx *ctx, struct restore_state *rstate)
+{
+ return vm_restore_mem(ctx, rstate->vmmem_fd, rstate->vmmem_len);
+}
+
+static int
+vm_restore_kern_struct(struct vmctx *ctx, struct restore_state *rstate,
+ const struct vm_snapshot_kern_info *info)
+{
+ void *struct_ptr;
+ size_t struct_size;
+ int ret;
+ struct vm_snapshot_meta *meta;
+
+ struct_ptr = lookup_struct(info->req, rstate, &struct_size);
+ if (struct_ptr == NULL) {
+ fprintf(stderr, "%s: Failed to lookup struct %s\r\n",
+ __func__, info->struct_name);
+ ret = -1;
+ goto done;
+ }
+
+ if (struct_size == 0) {
+ fprintf(stderr, "%s: Kernel struct size was 0 for: %s\r\n",
+ __func__, info->struct_name);
+ ret = -1;
+ goto done;
+ }
+
+ meta = &(struct vm_snapshot_meta) {
+ .ctx = ctx,
+ .dev_name = info->struct_name,
+ .dev_req = info->req,
+
+ .buffer.buf_start = struct_ptr,
+ .buffer.buf_size = struct_size,
+
+ .buffer.buf = struct_ptr,
+ .buffer.buf_rem = struct_size,
+
+ .op = VM_SNAPSHOT_RESTORE,
+ };
+
+ ret = vm_snapshot_req(meta);
+ if (ret != 0) {
+ fprintf(stderr, "%s: Failed to restore struct: %s\r\n",
+ __func__, info->struct_name);
+ goto done;
+ }
+
+done:
+ return (ret);
+}
+
+int
+vm_restore_kern_structs(struct vmctx *ctx, struct restore_state *rstate)
+{
+ int ret;
+ int i;
+
+ for (i = 0; i < nitems(snapshot_kern_structs); i++) {
+ ret = vm_restore_kern_struct(ctx, rstate,
+ &snapshot_kern_structs[i]);
+ if (ret != 0)
+ return (ret);
+ }
+
+ return (0);
+}
+
+int
+vm_restore_user_dev(struct vmctx *ctx, struct restore_state *rstate,
+ const struct vm_snapshot_dev_info *info)
+{
+ void *dev_ptr;
+ size_t dev_size;
+ int ret;
+ struct vm_snapshot_meta *meta;
+
+ dev_ptr = lookup_dev(info->dev_name, rstate, &dev_size);
+ if (dev_ptr == NULL) {
+ fprintf(stderr, "Failed to lookup dev: %s\r\n", info->dev_name);
+ fprintf(stderr, "Continuing the restore/migration process\r\n");
+ return (0);
+ }
+
+ if (dev_size == 0) {
+ fprintf(stderr, "%s: Device size is 0. "
+ "Assuming %s is not used\r\n",
+ __func__, info->dev_name);
+ return (0);
+ }
+
+ meta = &(struct vm_snapshot_meta) {
+ .ctx = ctx,
+ .dev_name = info->dev_name,
+
+ .buffer.buf_start = dev_ptr,
+ .buffer.buf_size = dev_size,
+
+ .buffer.buf = dev_ptr,
+ .buffer.buf_rem = dev_size,
+
+ .op = VM_SNAPSHOT_RESTORE,
+ };
+
+ ret = (*info->snapshot_cb)(meta);
+ if (ret != 0) {
+ fprintf(stderr, "Failed to restore dev: %s\r\n",
+ info->dev_name);
+ return (-1);
+ }
+
+ return (0);
+}
+
+
+int
+vm_restore_user_devs(struct vmctx *ctx, struct restore_state *rstate)
+{
+ int ret;
+ int i;
+
+ for (i = 0; i < nitems(snapshot_devs); i++) {
+ ret = vm_restore_user_dev(ctx, rstate, &snapshot_devs[i]);
+ if (ret != 0)
+ return (ret);
+ }
+
+ return 0;
+}
+
+int
+vm_pause_user_devs(struct vmctx *ctx)
+{
+ const struct vm_snapshot_dev_info *info;
+ int ret;
+ int i;
+
+ for (i = 0; i < nitems(snapshot_devs); i++) {
+ info = &snapshot_devs[i];
+ if (info->pause_cb == NULL)
+ continue;
+
+ ret = info->pause_cb(ctx, info->dev_name);
+ if (ret != 0)
+ return (ret);
+ }
+
+ return (0);
+}
+
+int
+vm_resume_user_devs(struct vmctx *ctx)
+{
+ const struct vm_snapshot_dev_info *info;
+ int ret;
+ int i;
+
+ for (i = 0; i < nitems(snapshot_devs); i++) {
+ info = &snapshot_devs[i];
+ if (info->resume_cb == NULL)
+ continue;
+
+ ret = info->resume_cb(ctx, info->dev_name);
+ if (ret != 0)
+ return (ret);
+ }
+
+ return (0);
+}
+
+static int
+vm_snapshot_kern_struct(int data_fd, xo_handle_t *xop, const char *array_key,
+ struct vm_snapshot_meta *meta, off_t *offset)
+{
+ int ret;
+ size_t data_size;
+ ssize_t write_cnt;
+
+ ret = vm_snapshot_req(meta);
+ if (ret != 0) {
+ fprintf(stderr, "%s: Failed to snapshot struct %s\r\n",
+ __func__, meta->dev_name);
+ ret = -1;
+ goto done;
+ }
+
+ data_size = vm_get_snapshot_size(meta);
+
+ write_cnt = write(data_fd, meta->buffer.buf_start, data_size);
+ if (write_cnt != data_size) {
+ perror("Failed to write all snapshotted data.");
+ ret = -1;
+ goto done;
+ }
+
+ /* Write metadata. */
+ xo_open_instance_h(xop, array_key);
+ xo_emit_h(xop, "{:debug_name/%s}\n", meta->dev_name);
+ xo_emit_h(xop, "{:" JSON_SNAPSHOT_REQ_KEY "/%d}\n",
+ meta->dev_req);
+ xo_emit_h(xop, "{:" JSON_SIZE_KEY "/%lu}\n", data_size);
+ xo_emit_h(xop, "{:" JSON_FILE_OFFSET_KEY "/%lu}\n", *offset);
+ xo_close_instance_h(xop, JSON_STRUCT_ARR_KEY);
+
+ *offset += data_size;
+
+done:
+ return (ret);
+}
+
+static int
+vm_snapshot_kern_structs(struct vmctx *ctx, int data_fd, xo_handle_t *xop)
+{
+ int ret, i, error;
+ size_t offset, buf_size;
+ char *buffer;
+ struct vm_snapshot_meta *meta;
+
+ error = 0;
+ offset = 0;
+ buf_size = SNAPSHOT_BUFFER_SIZE;
+
+ buffer = malloc(SNAPSHOT_BUFFER_SIZE * sizeof(char));
+ if (buffer == NULL) {
+ error = ENOMEM;
+ perror("Failed to allocate memory for snapshot buffer");
+ goto err_vm_snapshot_kern_data;
+ }
+
+ meta = &(struct vm_snapshot_meta) {
+ .ctx = ctx,
+
+ .buffer.buf_start = buffer,
+ .buffer.buf_size = buf_size,
+
+ .op = VM_SNAPSHOT_SAVE,
+ };
+
+ xo_open_list_h(xop, JSON_STRUCT_ARR_KEY);
+ for (i = 0; i < nitems(snapshot_kern_structs); i++) {
+ meta->dev_name = snapshot_kern_structs[i].struct_name;
+ meta->dev_req = snapshot_kern_structs[i].req;
+
+ memset(meta->buffer.buf_start, 0, meta->buffer.buf_size);
+ meta->buffer.buf = meta->buffer.buf_start;
+ meta->buffer.buf_rem = meta->buffer.buf_size;
+
+ ret = vm_snapshot_kern_struct(data_fd, xop, JSON_DEV_ARR_KEY,
+ meta, &offset);
+ if (ret != 0) {
+ error = -1;
+ goto err_vm_snapshot_kern_data;
+ }
+ }
+ xo_close_list_h(xop, JSON_STRUCT_ARR_KEY);
+
+err_vm_snapshot_kern_data:
+ if (buffer != NULL)
+ free(buffer);
+ return (error);
+}
+
+static int
+vm_snapshot_basic_metadata(struct vmctx *ctx, xo_handle_t *xop)
+{
+ int error;
+ size_t memsize;
+ int memflags;
+ char vmname_buf[MAX_VMNAME];
+
+ memset(vmname_buf, 0, MAX_VMNAME);
+ error = vm_get_name(ctx, vmname_buf, MAX_VMNAME - 1);
+ if (error != 0) {
+ perror("Failed to get VM name");
+ goto err;
+ }
+
+ memsize = vm_get_lowmem_size(ctx) + vm_get_highmem_size(ctx);
+ memflags = vm_get_memflags(ctx);
+
+ xo_open_container_h(xop, JSON_BASIC_METADATA_KEY);
+ xo_emit_h(xop, "{:" JSON_NCPUS_KEY "/%ld}\n", guest_ncpus);
+ xo_emit_h(xop, "{:" JSON_VMNAME_KEY "/%s}\n", vmname_buf);
+ xo_emit_h(xop, "{:" JSON_MEMSIZE_KEY "/%lu}\n", memsize);
+ xo_emit_h(xop, "{:" JSON_MEMFLAGS_KEY "/%d}\n", memflags);
+ xo_close_container_h(xop, JSON_BASIC_METADATA_KEY);
+
+err:
+ return (error);
+}
+
+static int
+vm_snapshot_dev_write_data(int data_fd, xo_handle_t *xop, const char *array_key,
+ struct vm_snapshot_meta *meta, off_t *offset)
+{
+ int ret;
+ size_t data_size;
+
+ data_size = vm_get_snapshot_size(meta);
+
+ ret = write(data_fd, meta->buffer.buf_start, data_size);
+ if (ret != data_size) {
+ perror("Failed to write all snapshotted data.");
+ return (-1);
+ }
+
+ /* Write metadata. */
+ xo_open_instance_h(xop, array_key);
+ xo_emit_h(xop, "{:" JSON_SNAPSHOT_REQ_KEY "/%s}\n", meta->dev_name);
+ xo_emit_h(xop, "{:" JSON_SIZE_KEY "/%lu}\n", data_size);
+ xo_emit_h(xop, "{:" JSON_FILE_OFFSET_KEY "/%lu}\n", *offset);
+ xo_close_instance_h(xop, array_key);
+
+ *offset += data_size;
+
+ return (0);
+}
+
+static int
+vm_snapshot_user_dev(const struct vm_snapshot_dev_info *info,
+ int data_fd, xo_handle_t *xop,
+ struct vm_snapshot_meta *meta, off_t *offset)
+{
+ int ret;
+
+ ret = (*info->snapshot_cb)(meta);
+ if (ret != 0) {
+ fprintf(stderr, "Failed to snapshot %s; ret=%d\r\n",
+ meta->dev_name, ret);
+ return (ret);
+ }
+
+ ret = vm_snapshot_dev_write_data(data_fd, xop, JSON_DEV_ARR_KEY, meta,
+ offset);
+ if (ret != 0)
+ return (ret);
+
+ return (0);
+}
+
+static int
+vm_snapshot_user_devs(struct vmctx *ctx, int data_fd, xo_handle_t *xop)
+{
+ int ret, i;
+ off_t offset;
+ void *buffer;
+ size_t buf_size;
+ struct vm_snapshot_meta *meta;
+
+ buf_size = SNAPSHOT_BUFFER_SIZE;
+
+ offset = lseek(data_fd, 0, SEEK_CUR);
+ if (offset < 0) {
+ perror("Failed to get data file current offset.");
+ return (-1);
+ }
+
+ buffer = malloc(buf_size);
+ if (buffer == NULL) {
+ perror("Failed to allocate memory for snapshot buffer");
+ ret = ENOSPC;
+ goto snapshot_err;
+ }
+
+ meta = &(struct vm_snapshot_meta) {
+ .ctx = ctx,
+
+ .buffer.buf_start = buffer,
+ .buffer.buf_size = buf_size,
+
+ .op = VM_SNAPSHOT_SAVE,
+ };
+
+ xo_open_list_h(xop, JSON_DEV_ARR_KEY);
+
+ /* Restore other devices that support this feature */
+ for (i = 0; i < nitems(snapshot_devs); i++) {
+ meta->dev_name = snapshot_devs[i].dev_name;
+
+ memset(meta->buffer.buf_start, 0, meta->buffer.buf_size);
+ meta->buffer.buf = meta->buffer.buf_start;
+ meta->buffer.buf_rem = meta->buffer.buf_size;
+
+ ret = vm_snapshot_user_dev(&snapshot_devs[i], data_fd, xop,
+ meta, &offset);
+ if (ret != 0)
+ goto snapshot_err;
+ }
+
+ xo_close_list_h(xop, JSON_DEV_ARR_KEY);
+
+snapshot_err:
+ if (buffer != NULL)
+ free(buffer);
+ return (ret);
+}
+
+static int
+vm_mem_write_to_file(int fd, const void *src, size_t dst_offset, size_t len)
+{
+ size_t write_total;
+ ssize_t cnt_write;
+ size_t to_write;
+
+ write_total = 0;
+ to_write = len;
+
+ if (lseek(fd, dst_offset, SEEK_SET) < 0 ) {
+ perror("Failed to changed file offset");
+ return (-1);
+ }
+
+ while (write_total < len) {
+ cnt_write = write(fd, src + write_total, to_write);
+ if (cnt_write < 0) {
+ perror("Failed to write in file");
+ return (-1);
+ }
+ to_write -= cnt_write;
+ write_total += cnt_write;
+ }
+
+ return (0);
+}
+
+void
+checkpoint_cpu_add(int vcpu)
+{
+
+ pthread_mutex_lock(&vcpu_lock);
+ CPU_SET(vcpu, &vcpus_active);
+
+ if (checkpoint_active) {
+ CPU_SET(vcpu, &vcpus_suspended);
+ while (checkpoint_active)
+ pthread_cond_wait(&vcpus_can_run, &vcpu_lock);
+ CPU_CLR(vcpu, &vcpus_suspended);
+ }
+ pthread_mutex_unlock(&vcpu_lock);
+}
+
+/*
+ * When a vCPU is suspended for any reason, it calls
+ * checkpoint_cpu_suspend(). This records that the vCPU is idle.
+ * Before returning from suspension, checkpoint_cpu_resume() is
+ * called. In suspend we note that the vCPU is idle. In resume we
+ * pause the vCPU thread until the checkpoint is complete. The reason
+ * for the two-step process is that vCPUs might already be stopped in
+ * the debug server when a checkpoint is requested. This approach
+ * allows us to account for and handle those vCPUs.
+ */
+void
+checkpoint_cpu_suspend(int vcpu)
+{
+
+ pthread_mutex_lock(&vcpu_lock);
+ CPU_SET(vcpu, &vcpus_suspended);
+ if (checkpoint_active && CPU_CMP(&vcpus_active, &vcpus_suspended) == 0)
+ pthread_cond_signal(&vcpus_idle);
+ pthread_mutex_unlock(&vcpu_lock);
+}
+
+void
+checkpoint_cpu_resume(int vcpu)
+{
+
+ pthread_mutex_lock(&vcpu_lock);
+ while (checkpoint_active)
+ pthread_cond_wait(&vcpus_can_run, &vcpu_lock);
+ CPU_CLR(vcpu, &vcpus_suspended);
+ pthread_mutex_unlock(&vcpu_lock);
+}
+
+static void
+vm_vcpu_pause(struct vmctx *ctx)
+{
+
+ pthread_mutex_lock(&vcpu_lock);
+ checkpoint_active = true;
+ vm_suspend_cpu(ctx, -1);
+ while (CPU_CMP(&vcpus_active, &vcpus_suspended) != 0)
+ pthread_cond_wait(&vcpus_idle, &vcpu_lock);
+ pthread_mutex_unlock(&vcpu_lock);
+}
+
+static void
+vm_vcpu_resume(struct vmctx *ctx)
+{
+
+ pthread_mutex_lock(&vcpu_lock);
+ checkpoint_active = false;
+ pthread_mutex_unlock(&vcpu_lock);
+ vm_resume_cpu(ctx, -1);
+ pthread_cond_broadcast(&vcpus_can_run);
+}
+
+static int
+vm_checkpoint(struct vmctx *ctx, char *checkpoint_file, bool stop_vm)
+{
+ int fd_checkpoint = 0, kdata_fd = 0;
+ int ret = 0;
+ int error = 0;
+ size_t guest_lowmem, guest_highmem, guest_memsize;
+ char *guest_baseaddr;
+ char *guest_lowmem_addr, *guest_highmem_addr;
+ xo_handle_t *xop = NULL;
+ char *meta_filename = NULL;
+ char *kdata_filename = NULL;
+ FILE *meta_file = NULL;
+
+ kdata_filename = strcat_extension(checkpoint_file, ".kern");
+ if (kdata_filename == NULL) {
+ fprintf(stderr, "Failed to construct kernel data filename.\n");
+ return (-1);
+ }
+
+ kdata_fd = open(kdata_filename, O_WRONLY | O_CREAT | O_TRUNC, 0700);
+ if (kdata_fd < 0) {
+ perror("Failed to open kernel data snapshot file.");
+ error = -1;
+ goto done;
+ }
+
+ fd_checkpoint = open(checkpoint_file, O_RDWR | O_CREAT | O_TRUNC, 0700);
+
+ if (fd_checkpoint < 0) {
+ perror("Failed to create checkpoint file");
+ error = -1;
+ goto done;
+ }
+
+ ret = vm_get_guestmem_from_ctx(ctx, &guest_baseaddr, &guest_lowmem, &guest_highmem);
+ guest_memsize = guest_lowmem + guest_highmem;
+ if (ret < 0) {
+ fprintf(stderr, "Failed to get guest mem information (base, low, high)\n");
+ error = -1;
+ goto done;
+ }
+
+ /* make space for VMs address space */
+ ret = ftruncate(fd_checkpoint, guest_memsize);
+ if (ret < 0) {
+ perror("Failed to truncate checkpoint file\n");
+ goto done;
+ }
+
+ meta_filename = strcat_extension(checkpoint_file, ".meta");
+ if (meta_filename == NULL) {
+ fprintf(stderr, "Failed to construct vm metadata filename.\n");
+ goto done;
+ }
+
+ meta_file = fopen(meta_filename, "w");
+ if (meta_file == NULL) {
+ perror("Failed to open vm metadata snapshot file.");
+ goto done;
+ }
+
+ xop = xo_create_to_file(meta_file, XO_STYLE_JSON, XOF_PRETTY);
+ if (xop == NULL) {
+ perror("Failed to get libxo handle on metadata file.");
+ goto done;
+ }
+
+ ret = vm_snapshot_basic_metadata(ctx, xop);
+ if (ret != 0) {
+ fprintf(stderr, "Failed to snapshot vm basic metadata.\n");
+ error = -1;
+ goto done;
+ }
+
+ guest_lowmem_addr = guest_baseaddr;
+ if (guest_highmem > 0)
+ guest_highmem_addr = guest_baseaddr + 4*GB;
+
+ vm_vcpu_pause(ctx);
+
+ ret = vm_pause_user_devs(ctx);
+ if (ret != 0) {
+ fprintf(stderr, "Could not pause devices\r\n");
+ error = ret;
+ goto done;
+ }
+
+
+ ret = vm_snapshot_kern_structs(ctx, kdata_fd, xop);
+ if (ret != 0) {
+ fprintf(stderr, "Failed to snapshot vm kernel data.\n");
+ error = -1;
+ goto done;
+ }
+
+ ret = vm_snapshot_user_devs(ctx, kdata_fd, xop);
+ if (ret != 0) {
+ fprintf(stderr, "Failed to snapshot device state.\n");
+ error = -1;
+ goto done;
+ }
+
+ if (vm_mem_write_to_file(fd_checkpoint, guest_lowmem_addr,
+ 0, guest_lowmem) != 0) {
+ perror("Could not write lowmem");
+ error = -1;
+ goto done;
+ }
+
+ if (guest_highmem > 0) {
+ if (vm_mem_write_to_file(fd_checkpoint, guest_highmem_addr,
+ guest_lowmem, guest_highmem) != 0) {
+ perror("Could not write highmem");
+ error = -1;
+ goto done;
+ }
+ }
+
+ xo_finish_h(xop);
+
+ if (stop_vm) {
+ vm_destroy(ctx);
+ exit(0);
+ }
+
+done:
+ ret = vm_resume_user_devs(ctx);
+ if (ret != 0)
+ fprintf(stderr, "Could not resume devices\r\n");
+ vm_vcpu_resume(ctx);
+ if (fd_checkpoint > 0)
+ close(fd_checkpoint);
+ if (meta_filename != NULL)
+ free(meta_filename);
+ if (kdata_filename != NULL)
+ free(kdata_filename);
+ if (xop != NULL)
+ xo_destroy(xop);
+ if (meta_file != NULL)
+ fclose(meta_file);
+ if (kdata_fd > 0)
+ close(kdata_fd);
+ return (error);
+}
+
+int
+get_checkpoint_msg(int conn_fd, struct vmctx *ctx)
+{
+ unsigned char buf[MAX_MSG_SIZE];
+ struct checkpoint_op *checkpoint_op;
+ int len, recv_len, total_recv = 0;
+ int err = 0;
+
+ len = sizeof(struct checkpoint_op); /* expected length */
+ while ((recv_len = recv(conn_fd, buf + total_recv, len - total_recv, 0)) > 0) {
+ total_recv += recv_len;
+ }
+ if (recv_len < 0) {
+ perror("Error while receiving data from bhyvectl");
+ err = -1;
+ goto done;
+ }
+
+ checkpoint_op = (struct checkpoint_op *)buf;
+ switch (checkpoint_op->op) {
+ case START_CHECKPOINT:
+ err = vm_checkpoint(ctx, checkpoint_op->snapshot_filename, false);
+ break;
+ case START_SUSPEND:
+ err = vm_checkpoint(ctx, checkpoint_op->snapshot_filename, true);
+ break;
+ default:
+ fprintf(stderr, "Unrecognized checkpoint operation.\n");
+ err = -1;
+ }
+
+done:
+ close(conn_fd);
+ return (err);
+}
+
+/*
+ * Listen for commands from bhyvectl
+ */
+void *
+checkpoint_thread(void *param)
+{
+ struct checkpoint_thread_info *thread_info;
+ socklen_t addr_len;
+ int conn_fd, ret;
+
+ thread_info = (struct checkpoint_thread_info *)param;
+
+ addr_len = sizeof(thread_info->addr);
+ while ((conn_fd = accept(thread_info->socket_fd,
+ (struct sockaddr *) thread_info->addr,
+ &addr_len)) > -1) {
+ ret = get_checkpoint_msg(conn_fd, thread_info->ctx);
+ if (ret != 0) {
+ fprintf(stderr, "Failed to read message on checkpoint "
+ "socket. Retrying.\n");
+ }
+
+ addr_len = sizeof(struct sockaddr_un);
+ }
+ if (conn_fd < -1) {
+ perror("Failed to accept connection");
+ }
+
+ return (NULL);
+}
+
+/*
+ * Create directory tree to store runtime specific information:
+ * i.e. UNIX sockets for IPC with bhyvectl.
+ */
+static int
+make_checkpoint_dir(void)
+{
+ int err;
+
+ err = mkdir(BHYVE_RUN_DIR, 0755);
+ if (err < 0 && errno != EEXIST)
+ return (err);
+
+ err = mkdir(CHECKPOINT_RUN_DIR, 0755);
+ if (err < 0 && errno != EEXIST)
+ return (err);
+
+ return 0;
+}
+
+/*
+ * Create the listening socket for IPC with bhyvectl
+ */
+int
+init_checkpoint_thread(struct vmctx *ctx)
+{
+ struct sockaddr_un addr;
+ int socket_fd;
+ pthread_t checkpoint_pthread;
+ char vmname_buf[MAX_VMNAME];
+ int ret, err = 0;
+
+ err = pthread_mutex_init(&vcpu_lock, NULL);
+ if (err != 0)
+ errc(1, err, "checkpoint mutex init");
+ err = pthread_cond_init(&vcpus_idle, NULL);
+ if (err == 0)
+ err = pthread_cond_init(&vcpus_can_run, NULL);
+ if (err != 0)
+ errc(1, err, "checkpoint cv init");
+
+ socket_fd = socket(PF_UNIX, SOCK_STREAM, 0);
+ if (socket_fd < 0) {
+ perror("Socket creation failed (IPC with bhyvectl");
+ err = -1;
+ goto fail;
+ }
+
+ err = make_checkpoint_dir();
+ if (err < 0) {
+ perror("Failed to create checkpoint runtime directory");
+ goto fail;
+ }
+
+ memset(&addr, 0, sizeof(struct sockaddr_un));
+ addr.sun_family = AF_UNIX;
+
+ err = vm_get_name(ctx, vmname_buf, MAX_VMNAME - 1);
+ if (err != 0) {
+ perror("Failed to get VM name");
+ goto fail;
+ }
+
+ snprintf(addr.sun_path, sizeof(addr.sun_path), "%s/%s",
+ CHECKPOINT_RUN_DIR, vmname_buf);
+ unlink(addr.sun_path);
+
+ if (bind(socket_fd, (struct sockaddr *)&addr,
+ sizeof(struct sockaddr_un)) != 0) {
+ perror("Failed to bind socket (IPC with bhyvectl)");
+ err = -1;
+ goto fail;
+ }
+
+ if (listen(socket_fd, 10) < 0) {
+ perror("Failed to listen on socket (IPC with bhyvectl)");
+ err = -1;
+ goto fail;
+ }
+
+ memset(&checkpoint_info, 0, sizeof(struct checkpoint_thread_info));
+ checkpoint_info.ctx = ctx;
+ checkpoint_info.socket_fd = socket_fd;
+ checkpoint_info.addr = &addr;
+
+
+ /* TODO: start thread for listening connections */
+ pthread_set_name_np(checkpoint_pthread, "checkpoint thread");
+ ret = pthread_create(&checkpoint_pthread, NULL, checkpoint_thread,
+ &checkpoint_info);
+ if (ret < 0) {
+ err = ret;
+ goto fail;
+ }
+
+ return (0);
+fail:
+ if (socket_fd > 0)
+ close(socket_fd);
+ unlink(addr.sun_path);
+
+ return (err);
+}
+
+void
+vm_snapshot_buf_err(const char *bufname, const enum vm_snapshot_op op)
+{
+ const char *__op;
+
+ if (op == VM_SNAPSHOT_SAVE)
+ __op = "save";
+ else if (op == VM_SNAPSHOT_RESTORE)
+ __op = "restore";
+ else
+ __op = "unknown";
+
+ fprintf(stderr, "%s: snapshot-%s failed for %s\r\n",
+ __func__, __op, bufname);
+}
+
+int
+vm_snapshot_buf(volatile void *data, size_t data_size,
+ struct vm_snapshot_meta *meta)
+{
+ struct vm_snapshot_buffer *buffer;
+ int op;
+
+ buffer = &meta->buffer;
+ op = meta->op;
+
+ if (buffer->buf_rem < data_size) {
+ fprintf(stderr, "%s: buffer too small\r\n", __func__);
+ return (E2BIG);
+ }
+
+ if (op == VM_SNAPSHOT_SAVE)
+ memcpy(buffer->buf, (uint8_t *) data, data_size);
+ else if (op == VM_SNAPSHOT_RESTORE)
+ memcpy((uint8_t *) data, buffer->buf, data_size);
+ else
+ return (EINVAL);
+
+ buffer->buf += data_size;
+ buffer->buf_rem -= data_size;
+
+ return (0);
+}
+
+size_t
+vm_get_snapshot_size(struct vm_snapshot_meta *meta)
+{
+ size_t length;
+ struct vm_snapshot_buffer *buffer;
+
+ buffer = &meta->buffer;
+
+ if (buffer->buf_size < buffer->buf_rem) {
+ fprintf(stderr, "%s: Invalid buffer: size = %zu, rem = %zu\r\n",
+ __func__, buffer->buf_size, buffer->buf_rem);
+ length = 0;
+ } else {
+ length = buffer->buf_size - buffer->buf_rem;
+ }
+
+ return (length);
+}
+
+int
+vm_snapshot_guest2host_addr(void **addrp, size_t len, bool restore_null,
+ struct vm_snapshot_meta *meta)
+{
+ int ret;
+ vm_paddr_t gaddr;
+
+ if (meta->op == VM_SNAPSHOT_SAVE) {
+ gaddr = paddr_host2guest(meta->ctx, *addrp);
+ if (gaddr == (vm_paddr_t) -1) {
+ if (!restore_null ||
+ (restore_null && (*addrp != NULL))) {
+ ret = EFAULT;
+ goto done;
+ }
+ }
+
+ SNAPSHOT_VAR_OR_LEAVE(gaddr, meta, ret, done);
+ } else if (meta->op == VM_SNAPSHOT_RESTORE) {
+ SNAPSHOT_VAR_OR_LEAVE(gaddr, meta, ret, done);
+ if (gaddr == (vm_paddr_t) -1) {
+ if (!restore_null) {
+ ret = EFAULT;
+ goto done;
+ }
+ }
+
+ *addrp = paddr_guest2host(meta->ctx, gaddr, len);
+ } else {
+ ret = EINVAL;
+ }
+
+done:
+ return (ret);
+}
+
+int
+vm_snapshot_buf_cmp(volatile void *data, size_t data_size,
+ struct vm_snapshot_meta *meta)
+{
+ struct vm_snapshot_buffer *buffer;
+ int op;
+ int ret;
+
+ buffer = &meta->buffer;
+ op = meta->op;
+
+ if (buffer->buf_rem < data_size) {
+ fprintf(stderr, "%s: buffer too small\r\n", __func__);
+ ret = E2BIG;
+ goto done;
+ }
+
+ if (op == VM_SNAPSHOT_SAVE) {
+ ret = 0;
+ memcpy(buffer->buf, (uint8_t *) data, data_size);
+ } else if (op == VM_SNAPSHOT_RESTORE) {
+ ret = memcmp((uint8_t *) data, buffer->buf, data_size);
+ } else {
+ ret = EINVAL;
+ goto done;
+ }
+
+ buffer->buf += data_size;
+ buffer->buf_rem -= data_size;
+
+done:
+ return (ret);
+}
Index: usr.sbin/bhyve/uart_emul.h
===================================================================
--- usr.sbin/bhyve/uart_emul.h
+++ usr.sbin/bhyve/uart_emul.h
@@ -31,10 +31,10 @@
#ifndef _UART_EMUL_H_
#define _UART_EMUL_H_
-
#define UART_IO_BAR_SIZE 8
struct uart_softc;
+struct vm_snapshot_meta;
typedef void (*uart_intr_func_t)(void *arg);
struct uart_softc *uart_init(uart_intr_func_t intr_assert,
@@ -44,4 +44,7 @@
uint8_t uart_read(struct uart_softc *sc, int offset);
void uart_write(struct uart_softc *sc, int offset, uint8_t value);
int uart_set_backend(struct uart_softc *sc, const char *opt);
+#ifdef BHYVE_SNAPSHOT
+int uart_snapshot(struct uart_softc *sc, struct vm_snapshot_meta *meta);
+#endif
#endif
Index: usr.sbin/bhyve/uart_emul.c
===================================================================
--- usr.sbin/bhyve/uart_emul.c
+++ usr.sbin/bhyve/uart_emul.c
@@ -39,6 +39,8 @@
#include <capsicum_helpers.h>
#endif
+#include <machine/vmm_snapshot.h>
+
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
@@ -717,3 +719,35 @@
return (retval);
}
+
+#ifdef BHYVE_SNAPSHOT
+int
+uart_snapshot(struct uart_softc *sc, struct vm_snapshot_meta *meta)
+{
+ int ret;
+
+ SNAPSHOT_VAR_OR_LEAVE(sc->data, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->ier, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->lcr, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->mcr, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->lsr, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->msr, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->fcr, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->scr, meta, ret, done);
+
+ SNAPSHOT_VAR_OR_LEAVE(sc->dll, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->dlh, meta, ret, done);
+
+ SNAPSHOT_VAR_OR_LEAVE(sc->rxfifo.rindex, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->rxfifo.windex, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->rxfifo.num, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->rxfifo.size, meta, ret, done);
+ SNAPSHOT_BUF_OR_LEAVE(sc->rxfifo.buf, sizeof(sc->rxfifo.buf),
+ meta, ret, done);
+
+ sc->thre_int_pending = 1;
+
+done:
+ return (ret);
+}
+#endif
Index: usr.sbin/bhyve/usb_emul.h
===================================================================
--- usr.sbin/bhyve/usb_emul.h
+++ usr.sbin/bhyve/usb_emul.h
@@ -41,10 +41,10 @@
#define USB_XFER_IN 1
-
struct usb_hci;
struct usb_device_request;
struct usb_data_xfer;
+struct vm_snapshot_meta;
/* Device emulation handlers */
struct usb_devemu {
@@ -62,6 +62,7 @@
int (*ue_reset)(void *sc);
int (*ue_remove)(void *sc);
int (*ue_stop)(void *sc);
+ int (*ue_snapshot)(void *scarg, struct vm_snapshot_meta *meta);
};
#define USB_EMUL_SET(x) DATA_SET(usb_emu_set, x);
@@ -148,7 +149,6 @@
pthread_mutex_unlock(&((x)->mtx)); \
} while (0)
-
struct usb_devemu *usb_emu_finddev(char *name);
struct usb_data_xfer_block *usb_data_xfer_append(struct usb_data_xfer *xfer,
Index: usr.sbin/bhyve/usb_mouse.c
===================================================================
--- usr.sbin/bhyve/usb_mouse.c
+++ usr.sbin/bhyve/usb_mouse.c
@@ -31,6 +31,8 @@
#include <sys/time.h>
+#include <machine/vmm_snapshot.h>
+
#include <pthread.h>
#include <stdio.h>
#include <stdlib.h>
@@ -787,6 +789,29 @@
return (0);
}
+#ifdef BHYVE_SNAPSHOT
+static int
+umouse_snapshot(void *scarg, struct vm_snapshot_meta *meta)
+{
+ int ret;
+ struct umouse_softc *sc;
+
+ sc = scarg;
+
+ SNAPSHOT_VAR_OR_LEAVE(sc->um_report, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->newdata, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->hid.idle, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->hid.protocol, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->hid.feature, meta, ret, done);
+
+ SNAPSHOT_VAR_OR_LEAVE(sc->polling, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->prev_evt.tv_sec, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(sc->prev_evt.tv_usec, meta, ret, done);
+
+done:
+ return (ret);
+}
+#endif
struct usb_devemu ue_mouse = {
.ue_emu = "tablet",
@@ -797,6 +822,9 @@
.ue_data = umouse_data_handler,
.ue_reset = umouse_reset,
.ue_remove = umouse_remove,
- .ue_stop = umouse_stop
+ .ue_stop = umouse_stop,
+#ifdef BHYVE_SNAPSHOT
+ .ue_snapshot = umouse_snapshot,
+#endif
};
USB_EMUL_SET(ue_mouse);
Index: usr.sbin/bhyve/virtio.h
===================================================================
--- usr.sbin/bhyve/virtio.h
+++ usr.sbin/bhyve/virtio.h
@@ -287,6 +287,7 @@
struct vmctx;
struct pci_devinst;
struct vqueue_info;
+struct vm_snapshot_meta;
/*
* A virtual device, with some number (possibly 0) of virtual
@@ -361,6 +362,10 @@
void (*vc_apply_features)(void *, uint64_t);
/* called to apply negotiated features */
uint64_t vc_hv_caps; /* hypervisor-provided capabilities */
+ void (*vc_pause)(void *); /* called to pause device activity */
+ void (*vc_resume)(void *); /* called to resume device activity */
+ int (*vc_snapshot)(void *, struct vm_snapshot_meta *);
+ /* called to save / restore device state */
};
/*
@@ -487,4 +492,9 @@
int baridx, uint64_t offset, int size);
void vi_pci_write(struct vmctx *ctx, int vcpu, struct pci_devinst *pi,
int baridx, uint64_t offset, int size, uint64_t value);
+#ifdef BHYVE_SNAPSHOT
+int vi_pci_snapshot(struct vm_snapshot_meta *meta);
+int vi_pci_pause(struct vmctx *ctx, struct pci_devinst *pi);
+int vi_pci_resume(struct vmctx *ctx, struct pci_devinst *pi);
+#endif
#endif /* _VIRTIO_H_ */
Index: usr.sbin/bhyve/virtio.c
===================================================================
--- usr.sbin/bhyve/virtio.c
+++ usr.sbin/bhyve/virtio.c
@@ -34,6 +34,7 @@
#include <sys/uio.h>
#include <machine/atomic.h>
+#include <machine/vmm_snapshot.h>
#include <stdio.h>
#include <stdint.h>
@@ -794,3 +795,149 @@
if (vs->vs_mtx)
pthread_mutex_unlock(vs->vs_mtx);
}
+
+#ifdef BHYVE_SNAPSHOT
+int
+vi_pci_pause(struct vmctx *ctx, struct pci_devinst *pi)
+{
+ struct virtio_softc *vs;
+ struct virtio_consts *vc;
+
+ vs = pi->pi_arg;
+ vc = vs->vs_vc;
+
+ vc = vs->vs_vc;
+ assert(vc->vc_pause != NULL);
+ (*vc->vc_pause)(DEV_SOFTC(vs));
+
+ return (0);
+}
+
+int
+vi_pci_resume(struct vmctx *ctx, struct pci_devinst *pi)
+{
+ struct virtio_softc *vs;
+ struct virtio_consts *vc;
+
+ vs = pi->pi_arg;
+ vc = vs->vs_vc;
+
+ vc = vs->vs_vc;
+ assert(vc->vc_resume != NULL);
+ (*vc->vc_resume)(DEV_SOFTC(vs));
+
+ return (0);
+}
+
+static int
+vi_pci_snapshot_softc(struct virtio_softc *vs, struct vm_snapshot_meta *meta)
+{
+ int ret;
+
+ SNAPSHOT_VAR_OR_LEAVE(vs->vs_flags, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vs->vs_negotiated_caps, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vs->vs_curq, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vs->vs_status, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vs->vs_isr, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vs->vs_msix_cfg_idx, meta, ret, done);
+
+done:
+ return (ret);
+}
+
+static int
+vi_pci_snapshot_consts(struct virtio_consts *vc, struct vm_snapshot_meta *meta)
+{
+ int ret;
+
+ SNAPSHOT_VAR_CMP_OR_LEAVE(vc->vc_nvq, meta, ret, done);
+ SNAPSHOT_VAR_CMP_OR_LEAVE(vc->vc_cfgsize, meta, ret, done);
+ SNAPSHOT_VAR_CMP_OR_LEAVE(vc->vc_hv_caps, meta, ret, done);
+
+done:
+ return (ret);
+}
+
+static int
+vi_pci_snapshot_queues(struct virtio_softc *vs, struct vm_snapshot_meta *meta)
+{
+ int i;
+ int ret;
+ struct virtio_consts *vc;
+ struct vqueue_info *vq;
+ uint64_t addr_size;
+
+ vc = vs->vs_vc;
+
+ /* Save virtio queue info */
+ for (i = 0; i < vc->vc_nvq; i++) {
+ vq = &vs->vs_queues[i];
+
+ SNAPSHOT_VAR_CMP_OR_LEAVE(vq->vq_qsize, meta, ret, done);
+ SNAPSHOT_VAR_CMP_OR_LEAVE(vq->vq_num, meta, ret, done);
+
+ SNAPSHOT_VAR_OR_LEAVE(vq->vq_flags, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vq->vq_last_avail, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vq->vq_save_used, meta, ret, done);
+ SNAPSHOT_VAR_OR_LEAVE(vq->vq_msix_idx, meta, ret, done);
+
+ SNAPSHOT_VAR_OR_LEAVE(vq->vq_pfn, meta, ret, done);
+
+ addr_size = vq->vq_qsize * sizeof(struct virtio_desc);
+ SNAPSHOT_GUEST2HOST_ADDR_OR_LEAVE(vq->vq_desc, addr_size,
+ false, meta, ret, done);
+
+ addr_size = (2 + vq->vq_qsize + 1) * sizeof(uint16_t);
+ SNAPSHOT_GUEST2HOST_ADDR_OR_LEAVE(vq->vq_avail, addr_size,
+ false, meta, ret, done);
+
+ addr_size = (2 + 2 * vq->vq_qsize + 1) * sizeof(uint16_t);
+ SNAPSHOT_GUEST2HOST_ADDR_OR_LEAVE(vq->vq_used, addr_size,
+ false, meta, ret, done);
+
+ SNAPSHOT_BUF_OR_LEAVE(vq->vq_desc, vring_size(vq->vq_qsize),
+ meta, ret, done);
+ }
+
+done:
+ return (ret);
+}
+
+int
+vi_pci_snapshot(struct vm_snapshot_meta *meta)
+{
+ int ret;
+ struct pci_devinst *pi;
+ struct virtio_softc *vs;
+ struct virtio_consts *vc;
+
+ pi = meta->dev_data;
+ vs = pi->pi_arg;
+ vc = vs->vs_vc;
+
+ /* Save virtio softc */
+ ret = vi_pci_snapshot_softc(vs, meta);
+ if (ret != 0)
+ goto done;
+
+ /* Save virtio consts */
+ ret = vi_pci_snapshot_consts(vc, meta);
+ if (ret != 0)
+ goto done;
+
+ /* Save virtio queue info */
+ ret = vi_pci_snapshot_queues(vs, meta);
+ if (ret != 0)
+ goto done;
+
+ /* Save device softc, if needed */
+ if (vc->vc_snapshot != NULL) {
+ ret = (*vc->vc_snapshot)(DEV_SOFTC(vs), meta);
+ if (ret != 0)
+ goto done;
+ }
+
+done:
+ return (ret);
+}
+#endif
Index: usr.sbin/bhyvectl/Makefile
===================================================================
--- usr.sbin/bhyvectl/Makefile
+++ usr.sbin/bhyvectl/Makefile
@@ -2,6 +2,8 @@
# $FreeBSD$
#
+.include <src.opts.mk>
+
PROG= bhyvectl
SRCS= bhyvectl.c
PACKAGE= bhyve
@@ -14,4 +16,8 @@
CFLAGS+= -I${SRCTOP}/sys/amd64/vmm
+.if ${MK_BHYVE_SNAPSHOT} != "no"
+CFLAGS+= -DBHYVE_SNAPSHOT
+.endif
+
.include <bsd.prog.mk>
Index: usr.sbin/bhyvectl/bhyvectl.8
===================================================================
--- usr.sbin/bhyvectl/bhyvectl.8
+++ usr.sbin/bhyvectl/bhyvectl.8
@@ -39,6 +39,8 @@
.Op Fl -inject-nmi
.Op Fl -force-reset
.Op Fl -force-poweroff
+.Op Fl -checkpoint= Ns Ar <filename>
+.Op Fl -suspend= Ns Ar <filename>
.Sh DESCRIPTION
The
.Nm
@@ -72,6 +74,17 @@
Force the VM to reset.
.It Fl -force-poweroff
Force the VM to power off.
+.It Fl -checkpoint= Ns Ar <filename>
+Save a snapshot of a virtual machine.
+The guest memory contents are saved in the file given in
+.Ar <filename> .
+The guest device and vCPU state are saved in the file
+.Ar <filename>.kern .
+.It Fl -suspend= Ns Ar <filename>
+Save a snapshot of a virtual machine similar to
+.Fl -checkpoint .
+The virtual machine will terminate after the snapshot has been
+saved.
.El
.Sh EXIT STATUS
.Ex -std
@@ -79,6 +92,10 @@
Destroy the VM called fbsd10:
.Pp
.Dl "bhyvectl --vm=fbsd10 --destroy"
+.Sh COMPATIBILITY
+The snapshot file format is not yet stable and is subject to future changes.
+Backwards compatibility support for the current snapshot file format is not
+guaranteed when future changes are made.
.Sh SEE ALSO
.Xr bhyve 8 ,
.Xr bhyveload 8
Index: usr.sbin/bhyvectl/bhyvectl.c
===================================================================
--- usr.sbin/bhyvectl/bhyvectl.c
+++ usr.sbin/bhyvectl/bhyvectl.c
@@ -57,6 +57,9 @@
#include <machine/vmm_dev.h>
#include <vmmapi.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+
#include "amd/vmcb.h"
#include "intel/vmcs.h"
@@ -67,6 +70,9 @@
#define NO_ARG no_argument
#define OPT_ARG optional_argument
+#define CHECKPOINT_RUN_DIR "/var/run/bhyve/checkpoint"
+#define MAX_VMNAME 100
+
static const char *progname;
static void
@@ -78,6 +84,10 @@
" [--cpu=<vcpu_number>]\n"
" [--create]\n"
" [--destroy]\n"
+#ifdef BHYVE_SNAPSHOT
+ " [--checkpoint=<filename>]\n"
+ " [--suspend=<filename>]\n"
+#endif
" [--get-all]\n"
" [--get-stats]\n"
" [--set-desc-ds]\n"
@@ -287,6 +297,10 @@
static int unassign_pptdev, bus, slot, func;
static int run;
static int get_cpu_topology;
+#ifdef BHYVE_SNAPSHOT
+static int vm_checkpoint_opt;
+static int vm_suspend_opt;
+#endif
/*
* VMCB specific.
@@ -591,6 +605,10 @@
SET_RTC_TIME,
SET_RTC_NVRAM,
RTC_NVRAM_OFFSET,
+#ifdef BHYVE_SNAPSHOT
+ SET_CHECKPOINT_FILE,
+ SET_SUSPEND_FILE,
+#endif
};
static void
@@ -1459,6 +1477,10 @@
{ "get-suspended-cpus", NO_ARG, &get_suspended_cpus, 1 },
{ "get-intinfo", NO_ARG, &get_intinfo, 1 },
{ "get-cpu-topology", NO_ARG, &get_cpu_topology, 1 },
+#ifdef BHYVE_SNAPSHOT
+ { "checkpoint", REQ_ARG, 0, SET_CHECKPOINT_FILE},
+ { "suspend", REQ_ARG, 0, SET_SUSPEND_FILE},
+#endif
};
const struct option intel_opts[] = {
@@ -1676,6 +1698,82 @@
}
}
+#ifdef BHYVE_SNAPSHOT
+static int
+send_checkpoint_op_req(struct vmctx *ctx, struct checkpoint_op *op)
+{
+ struct sockaddr_un addr;
+ int socket_fd, len, len_sent, total_sent;
+ int err = 0;
+ char vmname_buf[MAX_VMNAME];
+
+ socket_fd = socket(PF_UNIX, SOCK_STREAM, 0);
+ if (socket_fd < 0) {
+ perror("Error creating bhyvectl socket");
+ err = -1;
+ goto done;
+ }
+
+ memset(&addr, 0, sizeof(struct sockaddr_un));
+ addr.sun_family = AF_UNIX;
+
+ err = vm_get_name(ctx, vmname_buf, MAX_VMNAME - 1);
+ if (err != 0) {
+ perror("Failed to get VM name");
+ goto done;
+ }
+
+ snprintf(addr.sun_path, PATH_MAX, "%s/%s", CHECKPOINT_RUN_DIR, vmname_buf);
+
+ if (connect(socket_fd, (struct sockaddr *)&addr,
+ sizeof(struct sockaddr_un)) != 0) {
+ perror("Connect to VM socket failed");
+ err = -1;
+ goto done;
+ }
+
+ len = sizeof(*op);
+ total_sent = 0;
+ while ((len_sent = send(socket_fd, (char *)op + total_sent, len - total_sent, 0)) > 0) {
+ total_sent += len_sent;
+ }
+
+ if (len_sent < 0) {
+ perror("Failed to send checkpoint operation request");
+ err = -1;
+ }
+
+done:
+ if (socket_fd > 0)
+ close(socket_fd);
+ return (err);
+}
+
+static int
+send_start_checkpoint(struct vmctx *ctx, const char *checkpoint_file)
+{
+ struct checkpoint_op op;
+
+ op.op = START_CHECKPOINT;
+ strncpy(op.snapshot_filename, checkpoint_file, MAX_SNAPSHOT_VMNAME);
+ op.snapshot_filename[MAX_SNAPSHOT_VMNAME - 1] = 0;
+
+ return (send_checkpoint_op_req(ctx, &op));
+}
+
+static int
+send_start_suspend(struct vmctx *ctx, const char *suspend_file)
+{
+ struct checkpoint_op op;
+
+ op.op = START_SUSPEND;
+ strncpy(op.snapshot_filename, suspend_file, MAX_SNAPSHOT_VMNAME);
+ op.snapshot_filename[MAX_SNAPSHOT_VMNAME - 1] = 0;
+
+ return (send_checkpoint_op_req(ctx, &op));
+}
+#endif
+
int
main(int argc, char *argv[])
{
@@ -1692,6 +1790,9 @@
uint64_t cs, ds, es, fs, gs, ss, tr, ldtr;
struct tm tm;
struct option *opts;
+#ifdef BHYVE_SNAPSHOT
+ char *checkpoint_file, *suspend_file;
+#endif
cpu_intel = cpu_vendor_intel();
opts = setup_options(cpu_intel);
@@ -1858,6 +1959,16 @@
case ASSERT_LAPIC_LVT:
assert_lapic_lvt = atoi(optarg);
break;
+#ifdef BHYVE_SNAPSHOT
+ case SET_CHECKPOINT_FILE:
+ vm_checkpoint_opt = 1;
+ checkpoint_file = optarg;
+ break;
+ case SET_SUSPEND_FILE:
+ vm_suspend_opt = 1;
+ suspend_file = optarg;
+ break;
+#endif
default:
usage(cpu_intel);
}
@@ -2343,6 +2454,14 @@
if (!error && destroy)
vm_destroy(ctx);
+#ifdef BHYVE_SNAPSHOT
+ if (!error && vm_checkpoint_opt)
+ error = send_start_checkpoint(ctx, checkpoint_file);
+
+ if (!error && vm_suspend_opt)
+ error = send_start_suspend(ctx, suspend_file);
+#endif
+
free (opts);
exit(error);
}

File Metadata

Mime Type
text/plain
Expires
Tue, Jan 14, 11:13 PM (7 h, 17 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
15802091
Default Alt Text
D19495.id62983.diff (184 KB)

Event Timeline