Index: sys/conf/files =================================================================== --- sys/conf/files +++ sys/conf/files @@ -3144,6 +3144,9 @@ geom/raid3/g_raid3_ctl.c optional geom_raid3 geom/shsec/g_shsec.c optional geom_shsec geom/stripe/g_stripe.c optional geom_stripe +contrib/ck/src/ck_epoch.c standard \ + compile-with "${NORMAL_C} -I$S/contrib/ck/include" + contrib/xz-embedded/freebsd/xz_malloc.c \ optional xz_embedded | geom_uzip \ compile-with "${NORMAL_C} -I$S/contrib/xz-embedded/freebsd/ -I$S/contrib/xz-embedded/linux/lib/xz/ -I$S/contrib/xz-embedded/linux/include/linux/" @@ -4311,6 +4314,9 @@ vm/uma_core.c standard vm/uma_dbg.c standard vm/memguard.c optional DEBUG_MEMGUARD +vm/vm_eventhandler.c standard \ + compile-with "${NORMAL_C} -I$S/contrib/ck/include" + vm/vm_fault.c standard vm/vm_glue.c standard vm/vm_init.c standard Index: sys/sys/queue.h =================================================================== --- sys/sys/queue.h +++ sys/sys/queue.h @@ -520,6 +520,27 @@ TRASHIT(*oldprev); \ } while (0) +#define LIST_UNLINKED(elm, field) ((elm)->field.le_prev == NULL) + + +/* + * Must preserve the next pointer for current referents + * as well as prevent any potential re-ordering + */ +#define LIST_REMOVE_EBR(elm, field) do { \ + QMD_SAVELINK(oldprev, (elm)->field.le_prev); \ + QMD_LIST_CHECK_NEXT(elm, field); \ + QMD_LIST_CHECK_PREV(elm, field); \ + __compiler_membar(); \ + *(elm)->field.le_prev = LIST_NEXT((elm), field); \ + __compiler_membar(); \ + if (LIST_NEXT((elm), field) != NULL) \ + LIST_NEXT((elm), field)->field.le_prev = \ + (elm)->field.le_prev; \ + (elm)->field.le_prev = NULL; \ +} while (0) + + #define LIST_SWAP(head1, head2, type, field) do { \ QUEUE_TYPEOF(type) *swap_tmp = LIST_FIRST(head1); \ LIST_FIRST((head1)) = LIST_FIRST((head2)); \ Index: sys/vm/vm_eventhandler.h =================================================================== --- /dev/null +++ sys/vm/vm_eventhandler.h @@ -0,0 +1,88 @@ + +#ifndef _VM_EVENTHANDLER_H_ +#define _VM_EVENTHANDLER_H_ +#include +#include +#include + +struct vm_eventhandler_map { + LIST_HEAD(, vm_eventhandler) vem_head; + struct mtx vem_mtx; +}; + +struct vm_eventhandler_ops { + + void (*vme_exit)(struct vm_eventhandler *vme, + vm_map_t map); + + void (*vme_invalidate_page)(struct vm_eventhandler *vme, + vm_map_t map, + vm_offset_t addr); + + void (*vme_invalidate_range_start)(struct vm_eventhandler *vme, + vm_map_t map, + vm_offset_t start, + vm_offset_t end); + + void (*vme_invalidate_range_end)(struct vm_eventhandler *vme, + vm_map_t map, + vm_offset_t start, + vm_offset_t end); +#ifdef __notyet__ + /* needed for the Intel Shared Virtual Memory driver (not GPU) */ + void (*vme_update_mapping)(struct vm_eventhandler *vme, + vm_map_t map, + vm_offset_t addr, + pte_t pte); +#endif +}; + +struct vm_eventhandler { + LIST_ENTRY(vm_eventhandler) vme_entry; + const struct vm_eventhandler_ops vme_ops; +}; + + +static inline int +vme_map_has_eh(vm_map_t map) +{ + return (__predict_false(map->vem_map != NULL)); +} + +void vm_eventhandler_register(vm_map_t map, struct vm_eventhandler *ve); +void vm_eventhandler_deregister(vm_map_t map, struct vm_eventhandler *ve); + +int vme_map_has_invalidate_page(vm_map_t map); +void vme_invalidate_range_start_impl(vm_map_t map, vm_offset_t start, vm_offset_t end); +void vme_invalidate_range_end_impl(vm_map_t map, vm_offset_t start, vm_offset_t end); +void vme_invalidate_page_impl(vm_map_t map, vm_offset_t addr); +void vme_exit_impl(vm_map_t map); + + +static inline void +vme_invalidate_range_start(vm_map_t map, vm_offset_t start, vm_offset_t end) +{ + vm_offset_t addr; + + if (vme_map_has_eh(map)) + vme_invalidate_range_start_impl(map, start, end); + if (vme_map_has_eh(map) && vme_map_has_invalidate_page(map)) + for (addr = start; addr < end; addr += PAGE_SIZE) + vme_invalidate_page_impl(map, addr); +} + +static inline void +vme_invalidate_range_end(vm_map_t map, vm_offset_t start, vm_offset_t end) +{ + if (vme_map_has_eh(map)) + vme_invalidate_range_end_impl(map, start, end); +} + +static inline void +vme_exit(vm_map_t map) +{ + if (vme_map_has_eh(map)) + vme_exit_impl(map); +} + +#endif Index: sys/vm/vm_eventhandler.c =================================================================== --- /dev/null +++ sys/vm/vm_eventhandler.c @@ -0,0 +1,223 @@ + +/* + * Copyright (c) 2016 Matt Macy (mmacy@nextbsd.org) + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice unmodified, this list of conditions, and the following + * disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include + +static MALLOC_DEFINE(M_VMEVENTHANDLER, "vme", "VM eventhandler"); + +static ck_epoch_t vme_epoch; + +static void +vme_runtime_init(void *arg __unused) +{ + ck_epoch_record_t *record; + int i; + + ck_epoch_init(&vme_epoch); + + /* + * Populate the epoch with 2*ncpus # of records + */ + for (i = 0; i < 2*mp_ncpus; i++) { + record = malloc(sizeof *record, M_VMEVENTHANDLER, M_WAITOK); + ck_epoch_register(&vme_epoch, record); + ck_epoch_unregister(record); + } +} +SYSINIT(vm_eventhandler, SI_SUB_KTHREAD_PAGE, SI_ORDER_SECOND, vme_runtime_init, NULL); + + +static ck_epoch_record_t * +vme_get_record(void) +{ + ck_epoch_record_t *record; + + if (__predict_true((record = ck_epoch_recycle(&vme_epoch)) != NULL)) + return (record); + + + /* + * In order to get to here every CPU has to have + * 2 outstanding operations in VM eventhandler + */ + record = malloc(sizeof *record, M_VMEVENTHANDLER, M_WAITOK); + ck_epoch_register(&vme_epoch, record); + return (record); +} + + +void +vm_eventhandler_register(vm_map_t map, struct vm_eventhandler *ve) +{ + struct vm_eventhandler_map *vem; + + vem = malloc(sizeof(*vem), M_VMEVENTHANDLER, M_WAITOK|M_ZERO); + vm_map_lock(map); + if (!vme_map_has_eh(map)) { + mtx_init(&vem->vem_mtx, "vem lock", NULL, MTX_DEF); + map->vem_map = vem; + vem = NULL; + } + mtx_lock(&map->vem_map->vem_mtx); + LIST_INSERT_HEAD(&map->vem_map->vem_head, ve, vme_entry); + mtx_unlock(&map->vem_map->vem_mtx); + vm_map_unlock(map); + + + /* XXX How do we track the fact that we hold a reference to the map? */ + free(vem, M_VMEVENTHANDLER); +} + +void +vm_eventhandler_deregister(vm_map_t map, struct vm_eventhandler *ve) +{ + ck_epoch_record_t *record; + + record = vme_get_record(); + if (!LIST_UNLINKED(ve, vme_entry)) { + ck_epoch_begin(record, NULL); + if (ve->vme_ops.vme_exit) + ve->vme_ops.vme_exit(ve, map); + ck_epoch_end(record, NULL); + + mtx_lock(&map->vem_map->vem_mtx); + LIST_REMOVE_EBR(ve, vme_entry); + mtx_unlock(&map->vem_map->vem_mtx); + } + + ck_epoch_barrier(record); + ck_epoch_unregister(record); +} + +int +vme_map_has_invalidate_page(vm_map_t map) +{ + ck_epoch_record_t *record; + struct vm_eventhandler *vme; + int found; + + found = 0; + + record = vme_get_record(); + ck_epoch_begin(record, NULL); + LIST_FOREACH(vme, &map->vem_map->vem_head, vme_entry) { + if (vme->vme_ops.vme_invalidate_page) { + found = 1; + break; + } + } + ck_epoch_end(record, NULL); + ck_epoch_unregister(record); + return (found); +} + +void +vme_exit_impl(vm_map_t map) +{ + ck_epoch_record_t *record; + struct vm_eventhandler *vme; + + record = vme_get_record(); + ck_epoch_begin(record, NULL); + LIST_FOREACH(vme, &map->vem_map->vem_head, vme_entry) { + if (vme->vme_ops.vme_exit) + vme->vme_ops.vme_exit(vme, map); + } + ck_epoch_end(record, NULL); + + mtx_lock(&map->vem_map->vem_mtx); + while (__predict_false(!LIST_EMPTY(&map->vem_map->vem_head))) { + vme = LIST_FIRST(&map->vem_map->vem_head); + + LIST_REMOVE_EBR(vme, vme_entry); + } + mtx_unlock(&map->vem_map->vem_mtx); + ck_epoch_barrier(record); + ck_epoch_unregister(record); +} + +void +vme_invalidate_page_impl(vm_map_t map, vm_offset_t addr) +{ + ck_epoch_record_t *record; + struct vm_eventhandler *vme; + + record = vme_get_record(); + ck_epoch_begin(record, NULL); + LIST_FOREACH(vme, &map->vem_map->vem_head, vme_entry) { + if (vme->vme_ops.vme_invalidate_page) + vme->vme_ops.vme_invalidate_page(vme, map, addr); + } + ck_epoch_end(record, NULL); + ck_epoch_unregister(record); +} + +void +vme_invalidate_range_start_impl(vm_map_t map, vm_offset_t start, vm_offset_t end) +{ + ck_epoch_record_t *record; + struct vm_eventhandler *vme; + + record = vme_get_record(); + ck_epoch_begin(record, NULL); + LIST_FOREACH(vme, &map->vem_map->vem_head, vme_entry) { + if (vme->vme_ops.vme_invalidate_page) + vme->vme_ops.vme_invalidate_range_start(vme, map, start, end); + } + ck_epoch_end(record, NULL); + ck_epoch_unregister(record); +} + +void +vme_invalidate_range_end_impl(vm_map_t map, vm_offset_t start, vm_offset_t end) +{ + ck_epoch_record_t *record; + struct vm_eventhandler *vme; + + record = vme_get_record(); + ck_epoch_begin(record, NULL); + LIST_FOREACH(vme, &map->vem_map->vem_head, vme_entry) { + if (vme->vme_ops.vme_invalidate_page) + vme->vme_ops.vme_invalidate_range_end(vme, map, start, end); + } + ck_epoch_end(record, NULL); + ck_epoch_unregister(record); +} Index: sys/vm/vm_kern.c =================================================================== --- sys/vm/vm_kern.c +++ sys/vm/vm_kern.c @@ -86,6 +86,7 @@ #include #include #include +#include vm_map_t kernel_map; vm_map_t exec_map; @@ -391,7 +392,10 @@ KASSERT(object == kmem_object || object == kernel_object, ("kmem_unback: only supports kernel objects.")); + vme_invalidate_range_start(kernel_map, addr, addr + size); pmap_remove(kernel_pmap, addr, addr + size); + vme_invalidate_range_end(kernel_map, addr, addr + size); + offset = addr - VM_MIN_KERNEL_ADDRESS; VM_OBJECT_WLOCK(object); for (i = 0; i < size; i += PAGE_SIZE) { Index: sys/vm/vm_map.h =================================================================== --- sys/vm/vm_map.h +++ sys/vm/vm_map.h @@ -191,6 +191,7 @@ #define min_offset header.start /* (c) */ #define max_offset header.end /* (c) */ int busy; + struct vm_eventhandler_map *vem_map; }; /* Index: sys/vm/vm_map.c =================================================================== --- sys/vm/vm_map.c +++ sys/vm/vm_map.c @@ -95,6 +95,7 @@ #include #include #include +#include /* * Virtual memory maps provide for the mapping, protection, @@ -388,6 +389,7 @@ p = td->td_proc; vm = p->p_vmspace; + vme_exit(&vm->vm_map); atomic_add_int(&vmspace0.vm_refcnt, 1); do { refcnt = vm->vm_refcnt; @@ -2080,9 +2082,13 @@ if ((old_prot & ~current->protection) != 0) { #define MASK(entry) (((entry)->eflags & MAP_ENTRY_COW) ? ~VM_PROT_WRITE : \ VM_PROT_ALL) + if ((current->protection & MASK(current)) == VM_PROT_NONE) + vme_invalidate_range_start(map, current->start, current->end); pmap_protect(map->pmap, current->start, current->end, current->protection & MASK(current)); + if ((current->protection & MASK(current)) == VM_PROT_NONE) + vme_invalidate_range_start(map, current->start, current->end); #undef MASK } vm_map_simplify_entry(map, current); @@ -2823,8 +2829,11 @@ } } - if (invalidate) + if (invalidate) { + vme_invalidate_range_start(map, start, end); pmap_remove(map->pmap, start, end); + vme_invalidate_range_end(map, start, end); + } failed = FALSE; /* @@ -3046,8 +3055,9 @@ if (entry->wired_count != 0) { vm_map_entry_unwire(map, entry); } - + vme_invalidate_range_start(map, entry->start, entry->end); pmap_remove(map->pmap, entry->start, entry->end); + vme_invalidate_range_end(map, entry->start, entry->end); /* * Delete the entry only after removing all pmap Index: sys/vm/vm_pageout.c =================================================================== --- sys/vm/vm_pageout.c +++ sys/vm/vm_pageout.c @@ -111,6 +111,7 @@ #include #include #include +#include /* * System initialization @@ -751,8 +752,10 @@ * table pages. */ if (desired == 0 && nothingwired) { + vme_invalidate_range_start(map, vm_map_min(map), vm_map_max(map)); pmap_remove(vm_map_pmap(map), vm_map_min(map), - vm_map_max(map)); + vm_map_max(map)); + vme_invalidate_range_end(map, vm_map_min(map), vm_map_max(map)); } vm_map_unlock(map);