diff --git a/sys/amd64/include/vmm.h b/sys/amd64/include/vmm.h --- a/sys/amd64/include/vmm.h +++ b/sys/amd64/include/vmm.h @@ -37,6 +37,7 @@ struct vcpu; struct vm_snapshot_meta; +struct vm_get_dirty_page_list; #ifdef _KERNEL SDT_PROVIDER_DECLARE(vmm); @@ -304,6 +305,7 @@ void vm_exit_reqidle(struct vcpu *vcpu, uint64_t rip); int vm_snapshot_req(struct vm *vm, struct vm_snapshot_meta *meta); int vm_restore_time(struct vm *vm); +int vm_get_dirty_page_list(struct vm *vm, struct vm_get_dirty_page_list *list); #ifdef _SYS__CPUSET_H_ /* diff --git a/sys/amd64/include/vmm_dev.h b/sys/amd64/include/vmm_dev.h --- a/sys/amd64/include/vmm_dev.h +++ b/sys/amd64/include/vmm_dev.h @@ -257,6 +257,16 @@ }; _Static_assert(sizeof(struct vm_readwrite_kernemu_device) == 24, "ABI"); +struct vm_get_dirty_page_list { + uint8_t *page_list; + size_t num_pages; + uint8_t is_all_dirty; + vm_offset_t lowmem_start; + vm_offset_t lowmem_end; + vm_offset_t highmem_start; + vm_offset_t highmem_end; +}; + enum { /* general routines */ IOCNUM_ABIVERS = 0, @@ -345,7 +355,8 @@ /* checkpoint */ IOCNUM_SNAPSHOT_REQ = 113, - IOCNUM_RESTORE_TIME = 115 + IOCNUM_RESTORE_TIME = 115, + IOCNUM_VM_GET_DIRTY_PAGE_LIST = 117, }; #define VM_RUN \ @@ -476,4 +487,6 @@ _IOWR('v', IOCNUM_SNAPSHOT_REQ, struct vm_snapshot_meta) #define VM_RESTORE_TIME \ _IOWR('v', IOCNUM_RESTORE_TIME, int) +#define VM_GET_DIRTY_PAGE_LIST \ + _IOWR('v', IOCNUM_VM_GET_DIRTY_PAGE_LIST, struct vm_get_dirty_page_list) #endif diff --git a/sys/amd64/vmm/vmm.c b/sys/amd64/vmm/vmm.c --- a/sys/amd64/vmm/vmm.c +++ b/sys/amd64/vmm/vmm.c @@ -2985,4 +2985,112 @@ return (0); } -#endif + +static int +vm_search_dirty_pages_in_object(vm_object_t object, size_t start, size_t end, + size_t offset, uint8_t *page_list, bool is_all_dirty) +{ + vm_pindex_t pindex, swap_pindex; + vm_page_t m; + int error = 0; + uint8_t result; + + VM_OBJECT_WLOCK(object); + + pindex = start / PAGE_SIZE; + swap_pindex = swap_pager_find_least(object, pindex); + + for (; pindex < end / PAGE_SIZE; pindex++) { + if (pindex == swap_pindex) { + swap_pindex = swap_pager_find_least(object, pindex + 1); + error = vm_page_grab_valid(&m, object, pindex, + VM_ALLOC_NORMAL | VM_ALLOC_COUNT(VM_ALLOC_COUNT_MAX)); + if (error != VM_PAGER_OK) { + error = EINVAL; + break; + } + } else { + m = vm_page_grab(object, pindex, VM_ALLOC_NOCREAT); + } + + if (m != NULL) { + result = vm_page_test_vmm_dirty(m) || is_all_dirty; + vm_page_xunbusy(m); + subyte(page_list + pindex - offset, result); + } + } + VM_OBJECT_WUNLOCK(object); + + return (error); +} + +int +vm_get_dirty_page_list(struct vm *vm, struct vm_get_dirty_page_list *list) +{ + struct vmspace *vm_vmspace; + struct vm_map *vmmap; + struct vm_map_entry *entry; + struct vm_object *object; + int error = 0; + uint8_t *page_list; + size_t offset; + + page_list = list->page_list; + + if (page_list == NULL) + return (EINVAL); + + vm_vmspace = vm->vmspace; + + if (vm_vmspace == NULL) { + printf("%s: vm_vmspace is null\r\n", __func__); + return (EINVAL); + } + + vmmap = &vm_vmspace->vm_map; + + vm_map_lock(vmmap); + if (vmmap->busy) + vm_map_wait_busy(vmmap); + + for (entry = vmmap->header.right; entry != &vmmap->header; entry = entry->right) { + object = entry->object.vm_object; + + /* if object is lowmem */ + if (entry->start == list->lowmem_start && entry->end == list->lowmem_end) { + if (object == NULL) + continue; + + error = vm_search_dirty_pages_in_object(object, + list->lowmem_start, + list->lowmem_end, + 0, + page_list, + list->is_all_dirty); + if (error != 0) + break; + } + + /* if object is highmem */ + if (entry->start == list->highmem_start && entry->end == list->highmem_end) { + if (object == NULL) + continue; + + offset = (list->highmem_start - list->lowmem_end) / PAGE_SIZE; + error = vm_search_dirty_pages_in_object(object, + list->highmem_start, + list->highmem_end, + offset, + page_list, + list->is_all_dirty); + if (error != 0) + break; + } + } + + vm_map_unlock(vmmap); + + return (error); +} +#endif /* BHYVE_SNAPSHOT */ + diff --git a/sys/amd64/vmm/vmm_dev.c b/sys/amd64/vmm/vmm_dev.c --- a/sys/amd64/vmm/vmm_dev.c +++ b/sys/amd64/vmm/vmm_dev.c @@ -433,6 +433,7 @@ #ifdef COMPAT_FREEBSD13 struct vm_snapshot_meta_old *snapshot_old; #endif + struct vm_get_dirty_page_list *page_list; #endif error = vmm_priv_check(curthread->td_ucred); @@ -986,6 +987,10 @@ case VM_RESTORE_TIME: error = vm_restore_time(sc->vm); break; + case VM_GET_DIRTY_PAGE_LIST: + page_list = (struct vm_get_dirty_page_list *)data; + error = vm_get_dirty_page_list(sc->vm, page_list); + break; #endif default: error = ENOTTY; diff --git a/sys/vm/vm_page.h b/sys/vm/vm_page.h --- a/sys/vm/vm_page.h +++ b/sys/vm/vm_page.h @@ -295,6 +295,9 @@ #define VPO_SWAPSLEEP 0x02 /* waiting for swap to finish */ #define VPO_UNMANAGED 0x04 /* no PV management for page */ #define VPO_SWAPINPROG 0x08 /* swap I/O in progress on page */ +#ifdef BHYVE_SNAPSHOT +#define VPO_VMM_DIRTY 0x80 /* dirty bit used for bhyve migration */ +#endif /* * Busy page implementation details. @@ -708,6 +711,9 @@ void vm_page_valid(vm_page_t m); int vm_page_is_valid(vm_page_t, int, int); void vm_page_test_dirty(vm_page_t); +#ifdef BHYVE_SNAPSHOT +bool vm_page_test_vmm_dirty(vm_page_t m); +#endif vm_page_bits_t vm_page_bits(int base, int size); void vm_page_zero_invalid(vm_page_t m, boolean_t setvalid); void vm_page_free_pages_toq(struct spglist *free, bool update_wire_count); @@ -893,6 +899,9 @@ vm_page_dirty_KBI(m); #else m->dirty = VM_PAGE_BITS_ALL; +#ifdef BHYVE_SNAPSHOT + m->oflags |= VPO_VMM_DIRTY; +#endif #endif } diff --git a/sys/vm/vm_page.c b/sys/vm/vm_page.c --- a/sys/vm/vm_page.c +++ b/sys/vm/vm_page.c @@ -69,6 +69,7 @@ #include __FBSDID("$FreeBSD$"); +#include "opt_bhyve_snapshot.h" #include "opt_vm.h" #include @@ -1416,8 +1417,33 @@ /* Refer to this operation by its public name. */ KASSERT(vm_page_all_valid(m), ("vm_page_dirty: page is invalid!")); m->dirty = VM_PAGE_BITS_ALL; +#ifdef BHYVE_SNAPSHOT + m->oflags |= VPO_VMM_DIRTY; +#endif } +#ifdef BHYVE_SNAPSHOT +bool +vm_page_test_vmm_dirty(vm_page_t m) +{ + uint64_t value; + + VM_OBJECT_ASSERT_WLOCKED(m->object); + vm_page_assert_busied(m); + + vm_page_test_dirty(m); + + value = m->oflags & VPO_VMM_DIRTY; + if (value == 0 && pmap_is_modified(m)) + value = 1; + + m->oflags &= ~VPO_VMM_DIRTY; + pmap_clear_modify(m); + + return (value != 0); +} +#endif + /* * vm_page_insert: [ internal use only ] *