Page MenuHomeFreeBSD

D34811.id105516.diff
No OneTemporary

D34811.id105516.diff

diff --git a/sys/amd64/include/vmm.h b/sys/amd64/include/vmm.h
--- a/sys/amd64/include/vmm.h
+++ b/sys/amd64/include/vmm.h
@@ -35,6 +35,7 @@
#include <x86/segments.h>
struct vm_snapshot_meta;
+struct vm_get_dirty_page_list;
#ifdef _KERNEL
SDT_PROVIDER_DECLARE(vmm);
@@ -287,6 +288,7 @@
void vm_exit_reqidle(struct vm *vm, int vcpuid, uint64_t rip);
int vm_snapshot_req(struct vm *vm, struct vm_snapshot_meta *meta);
int vm_restore_time(struct vm *vm);
+int vm_get_dirty_page_list(struct vm *vm, struct vm_get_dirty_page_list *list);
#ifdef _SYS__CPUSET_H_
/*
diff --git a/sys/amd64/include/vmm_dev.h b/sys/amd64/include/vmm_dev.h
--- a/sys/amd64/include/vmm_dev.h
+++ b/sys/amd64/include/vmm_dev.h
@@ -31,6 +31,8 @@
#ifndef _VMM_DEV_H_
#define _VMM_DEV_H_
+#include "vmm_migration.h"
+
struct vm_snapshot_meta;
#ifdef _KERNEL
@@ -257,6 +259,13 @@
};
_Static_assert(sizeof(struct vm_readwrite_kernemu_device) == 24, "ABI");
+struct vm_get_dirty_page_list {
+ uint8_t *page_list;
+ size_t num_pages;
+ struct vmm_migration_segment lowmem;
+ struct vmm_migration_segment highmem;
+};
+
enum {
/* general routines */
IOCNUM_ABIVERS = 0,
@@ -345,7 +354,8 @@
/* checkpoint */
IOCNUM_SNAPSHOT_REQ = 113,
- IOCNUM_RESTORE_TIME = 115
+ IOCNUM_RESTORE_TIME = 115,
+ IOCNUM_VM_GET_DIRTY_PAGE_LIST = 117,
};
#define VM_RUN \
@@ -476,4 +486,6 @@
_IOWR('v', IOCNUM_SNAPSHOT_REQ, struct vm_snapshot_meta)
#define VM_RESTORE_TIME \
_IOWR('v', IOCNUM_RESTORE_TIME, int)
+#define VM_GET_DIRTY_PAGE_LIST \
+ _IOWR('v', IOCNUM_VM_GET_DIRTY_PAGE_LIST, struct vm_get_dirty_page_list)
#endif
diff --git a/sys/amd64/include/vmm_migration.h b/sys/amd64/include/vmm_migration.h
new file mode 100644
--- /dev/null
+++ b/sys/amd64/include/vmm_migration.h
@@ -0,0 +1,46 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2011 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+
+#ifndef _VMM_MIGRATION_H_
+#define _VMM_MIGRATION_H_
+
+#include <sys/types.h>
+
+/*
+ * A bhyve guest has two memory segments:
+ * - lowmem segment: mapped from 0GB to 3GB (which is lowmem_limit)
+ * - highmem segment: mapped starting from 4GB
+ * The object that represents a segment is identified by start and end values.
+ * */
+struct vmm_migration_segment {
+ vm_offset_t start;
+ vm_offset_t end;
+};
+
+#endif
diff --git a/sys/amd64/vmm/vmm.c b/sys/amd64/vmm/vmm.c
--- a/sys/amd64/vmm/vmm.c
+++ b/sys/amd64/vmm/vmm.c
@@ -65,6 +65,7 @@
#include <machine/pcb.h>
#include <machine/smp.h>
#include <machine/md_var.h>
+#include <machine/vmparam.h>
#include <x86/psl.h>
#include <x86/apicreg.h>
#include <x86/ifunc.h>
@@ -73,6 +74,7 @@
#include <machine/vmm_dev.h>
#include <machine/vmm_instruction_emul.h>
#include <machine/vmm_snapshot.h>
+#include <machine/vmm_migration.h>
#include "vmm_ioport.h"
#include "vmm_ktr.h"
@@ -146,6 +148,9 @@
};
#define VM_MAX_MEMMAPS 8
+#define MB (1024UL * 1024)
+#define GB (1024UL * MB)
+
/*
* Initialization:
* (o) initialized the first time the VM is created
@@ -2950,4 +2955,90 @@
return (0);
}
-#endif
+
+static inline void
+vm_search_dirty_pages_in_object(vm_object_t object, size_t start, size_t end,
+ size_t offset, uint8_t *page_list)
+{
+ vm_pindex_t pindex;
+ vm_page_t m;
+ uint8_t result;
+
+ for (pindex = start / PAGE_SIZE; pindex < end / PAGE_SIZE; pindex ++) {
+ VM_OBJECT_WLOCK(object);
+ m = vm_page_lookup(object, pindex);
+ VM_OBJECT_WUNLOCK(object);
+ if (m != NULL) {
+ result = vm_page_test_vmm_dirty(m);
+ copyout(&result, page_list + pindex - offset, sizeof(result));
+ }
+ }
+
+}
+
+int
+vm_get_dirty_page_list(struct vm *vm, struct vm_get_dirty_page_list *list)
+{
+ int error = 0;
+ struct vmspace *vm_vmspace;
+ struct vm_map *vmmap;
+ struct vm_map_entry *entry;
+ struct vm_object *object;
+ uint8_t *page_list;
+ size_t offset;
+
+ page_list = list->page_list;
+
+ if (page_list == NULL)
+ return (-1);
+
+ vm_vmspace = vm->vmspace;
+
+ if (vm_vmspace == NULL) {
+ printf("%s: vm_vmspace is null\r\n", __func__);
+ error = -1;
+ return (error);
+ }
+
+ vmmap = &vm_vmspace->vm_map;
+
+ vm_map_lock(vmmap);
+ if (vmmap->busy)
+ vm_map_wait_busy(vmmap);
+
+ for (entry = vmmap->header.right; entry != &vmmap->header; entry = entry->right) {
+ object = entry->object.vm_object;
+
+ if (entry->start == list->lowmem.start &&
+ entry->end == list->lowmem.end) {
+ // if object is lowmem
+ if (object == NULL)
+ continue;
+ vm_search_dirty_pages_in_object(object,
+ list->lowmem.start,
+ list->lowmem.end,
+ 0,
+ page_list);
+ }
+
+ if (entry->start == list->highmem.start &&
+ entry->end == list->highmem.end) {
+ if (object == NULL)
+ continue;
+ // if object is highmem
+ offset = (list->highmem.start - list->lowmem.end) / PAGE_SIZE;
+ vm_search_dirty_pages_in_object(object,
+ list->highmem.start,
+ list->highmem.end,
+ offset,
+ page_list);
+ }
+ }
+
+ vm_map_unlock(vmmap);
+
+ return (error);
+}
+
+#endif /* BHYVE_SNAPSHOT */
+
diff --git a/sys/amd64/vmm/vmm_dev.c b/sys/amd64/vmm/vmm_dev.c
--- a/sys/amd64/vmm/vmm_dev.c
+++ b/sys/amd64/vmm/vmm_dev.c
@@ -59,6 +59,7 @@
#include <machine/vmm_instruction_emul.h>
#include <machine/vmm_snapshot.h>
#include <x86/apicreg.h>
+#include <machine/vmm_migration.h>
#include "vmm_lapic.h"
#include "vmm_stat.h"
@@ -410,6 +411,7 @@
int *regnums;
#ifdef BHYVE_SNAPSHOT
struct vm_snapshot_meta *snapshot_meta;
+ struct vm_get_dirty_page_list *page_list;
#endif
error = vmm_priv_check(curthread->td_ucred);
@@ -903,6 +905,10 @@
case VM_RESTORE_TIME:
error = vm_restore_time(sc->vm);
break;
+ case VM_GET_DIRTY_PAGE_LIST:
+ page_list = (struct vm_get_dirty_page_list *)data;
+ error = vm_get_dirty_page_list(sc->vm, page_list);
+ break;
#endif
default:
error = ENOTTY;
diff --git a/sys/vm/vm_object.h b/sys/vm/vm_object.h
--- a/sys/vm/vm_object.h
+++ b/sys/vm/vm_object.h
@@ -392,6 +392,8 @@
vm_size_t length, uint8_t queue);
struct vnode *vm_object_vnode(vm_object_t object);
bool vm_object_is_active(vm_object_t obj);
+int vm_object_get_page(vm_object_t object, vm_pindex_t pindex, void *dst);
+int vm_object_set_page(vm_object_t object, vm_pindex_t pindex, void *src);
#endif /* _KERNEL */
#endif /* _VM_OBJECT_ */
diff --git a/sys/vm/vm_object.c b/sys/vm/vm_object.c
--- a/sys/vm/vm_object.c
+++ b/sys/vm/vm_object.c
@@ -2686,6 +2686,51 @@
CTLTYPE_STRUCT | CTLFLAG_RW | CTLFLAG_SKIP | CTLFLAG_MPSAFE, NULL, 0,
sysctl_vm_object_list_swap, "S,kinfo_vmobject",
"List of swap VM objects");
+int
+vm_object_get_page(vm_object_t object, vm_pindex_t pindex, void *dst)
+{
+ vm_page_t page;
+ vm_offset_t page_src;
+
+ VM_OBJECT_ASSERT_WLOCKED(object);
+
+ page = vm_page_lookup(object, pindex);
+ if (page == NULL) {
+ // cannot find page
+ return (-1);
+ }
+
+ vm_page_tryxbusy(page);
+ page->oflags &= ~VPO_VMM_DIRTY;
+
+ pmap_clear_modify(page);
+
+ page_src = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(page));
+ copyout((void *)page_src, dst, PAGE_SIZE);
+ vm_page_xunbusy(page);
+
+ return (0);
+}
+
+int
+vm_object_set_page(vm_object_t object, vm_pindex_t pindex, void *src)
+{
+ vm_page_t page;
+ vm_offset_t page_src;
+
+ VM_OBJECT_ASSERT_WLOCKED(object);
+
+ page = vm_page_lookup(object, pindex);
+ if (page == NULL) {
+ // cannot find page
+ return (-1);
+ }
+
+ page_src = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(page));
+ copyin(src, (void *)page_src, PAGE_SIZE);
+
+ return (0);
+}
#include "opt_ddb.h"
#ifdef DDB
diff --git a/sys/vm/vm_page.h b/sys/vm/vm_page.h
--- a/sys/vm/vm_page.h
+++ b/sys/vm/vm_page.h
@@ -295,6 +295,7 @@
#define VPO_SWAPSLEEP 0x02 /* waiting for swap to finish */
#define VPO_UNMANAGED 0x04 /* no PV management for page */
#define VPO_SWAPINPROG 0x08 /* swap I/O in progress on page */
+#define VPO_VMM_DIRTY 0x80 /* dirty bit used for bhyve migration */
/*
* Busy page implementation details.
@@ -705,6 +706,7 @@
void vm_page_valid(vm_page_t m);
int vm_page_is_valid(vm_page_t, int, int);
void vm_page_test_dirty(vm_page_t);
+uint8_t vm_page_test_vmm_dirty(vm_page_t m);
vm_page_bits_t vm_page_bits(int base, int size);
void vm_page_zero_invalid(vm_page_t m, boolean_t setvalid);
void vm_page_free_pages_toq(struct spglist *free, bool update_wire_count);
@@ -890,6 +892,7 @@
vm_page_dirty_KBI(m);
#else
m->dirty = VM_PAGE_BITS_ALL;
+ m->oflags |= VPO_VMM_DIRTY;
#endif
}
diff --git a/sys/vm/vm_page.c b/sys/vm/vm_page.c
--- a/sys/vm/vm_page.c
+++ b/sys/vm/vm_page.c
@@ -1416,6 +1416,25 @@
/* Refer to this operation by its public name. */
KASSERT(vm_page_all_valid(m), ("vm_page_dirty: page is invalid!"));
m->dirty = VM_PAGE_BITS_ALL;
+ m->oflags |= VPO_VMM_DIRTY;
+}
+
+uint8_t
+vm_page_test_vmm_dirty(vm_page_t m)
+{
+ uint64_t value;
+
+ vm_page_tryxbusy(m);
+ vm_page_test_dirty(m);
+ vm_page_xunbusy(m);
+
+// VM_OBJECT_ASSERT_WLOCKED(m->object);
+
+ value = m->oflags & VPO_VMM_DIRTY;
+ if (value == 0 && pmap_is_modified(m))
+ value = 1;
+
+ return (value != 0);
}
/*

File Metadata

Mime Type
text/plain
Expires
Tue, Jun 23, 9:34 PM (15 h, 53 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
34259528
Default Alt Text
D34811.id105516.diff (10 KB)

Event Timeline