Index: sys/kern/subr_witness.c =================================================================== --- sys/kern/subr_witness.c +++ sys/kern/subr_witness.c @@ -136,7 +136,7 @@ #define WITNESS_COUNT 1536 #endif #define WITNESS_HASH_SIZE 251 /* Prime, gives load factor < 2 */ -#define WITNESS_PENDLIST (1024 + MAXCPU) +#define WITNESS_PENDLIST (1280 + MAXCPU) /* Allocate 256 KB of stack data space */ #define WITNESS_LO_DATA_COUNT 2048 Index: sys/vm/vm_page.c =================================================================== --- sys/vm/vm_page.c +++ sys/vm/vm_page.c @@ -121,6 +121,15 @@ #include /* + * Isilon: The per-cpu page cache only works when we do not require the + * vm_page_queue_free_mtx to protect the object's cache queue. The cache + * queue gives us no real advantage and so it is disabled by default. + */ +#ifndef VM_ENABLE_CACHE +#define VM_PERCPU_FREE 1 +#endif + +/* * Associated with page of user-allocatable memory is a * page structure. */ @@ -152,8 +161,10 @@ static uma_zone_t fakepg_zone; -static struct vnode *vm_page_alloc_init(vm_page_t m); +#ifdef VM_ENABLE_CACHE static void vm_page_cache_turn_free(vm_page_t m); +#endif +static struct vnode *vm_page_alloc_init(vm_page_t m); static void vm_page_clear_dirty_mask(vm_page_t m, vm_page_bits_t pagebits); static void vm_page_enqueue(uint8_t queue, vm_page_t m); static void vm_page_init_fakepg(void *dummy); @@ -179,6 +190,107 @@ #endif #endif +#ifndef VM_ENABLE_CACHE + +struct vm_page_percpu { + struct mtx vpp_lock; + struct pglist vpp_pages; + int vpp_cnt; +} __aligned(CACHE_LINE_SIZE); + +struct vm_page_percpu page_percpu[MAXCPU] __aligned(CACHE_LINE_SIZE); + +#define VM_PERCPU_MIN 128 +#define VM_PERCPU_TARGET (VM_PERCPU_MIN * 2) +#define VM_PERCPU_MAX (VM_PERCPU_MIN * 3) + +static void +vm_page_percpu_init(void) +{ + int i; + + for (i = 0; i < MAXCPU; i++) { + mtx_init(&page_percpu[i].vpp_lock, "per-cpu free mtx", NULL, + MTX_DEF); + TAILQ_INIT(&page_percpu[i].vpp_pages); + page_percpu[i].vpp_cnt = 0; + } +} + +static vm_page_t +vm_page_percpu_alloc(vm_object_t object) +{ + struct vm_page_percpu *ppcpu = &page_percpu[PCPU_GET(cpuid)]; + vm_page_t m; + +#if VM_NRESERVLEVEL > 0 + /* + * Skip the cache of free pages for objects that have reservations + * so that they can still get superpages. This will never be set + * for objects populated via the filesystem buffercache. + */ + if (object != NULL && (object->flags & OBJ_COLORED) != 0) + return (NULL); +#endif + + mtx_lock(&ppcpu->vpp_lock); + if (ppcpu->vpp_cnt < VM_PERCPU_MIN) { + mtx_lock(&vm_page_queue_free_mtx); + while (!vm_page_count_min() && + ppcpu->vpp_cnt < VM_PERCPU_TARGET) { + m = vm_phys_alloc_pages(object != NULL ? + VM_FREEPOOL_DEFAULT : VM_FREEPOOL_DIRECT, 0); + if (m == NULL) + break; + vm_phys_freecnt_adj(m, -1); + ppcpu->vpp_cnt++; + TAILQ_INSERT_TAIL(&ppcpu->vpp_pages, m, plinks.q); + } + mtx_unlock(&vm_page_queue_free_mtx); + } + m = NULL; + if (ppcpu->vpp_cnt > 0) { + m = TAILQ_FIRST(&ppcpu->vpp_pages); + TAILQ_REMOVE(&ppcpu->vpp_pages, m, plinks.q); + ppcpu->vpp_cnt--; + } + mtx_unlock(&ppcpu->vpp_lock); + + return (m); +} + +static inline void vm_page_free_wakeup(void); + +static void +vm_page_percpu_free(vm_page_t m) +{ + struct vm_page_percpu *ppcpu = &page_percpu[PCPU_GET(cpuid)]; + + mtx_lock(&ppcpu->vpp_lock); + TAILQ_INSERT_HEAD(&ppcpu->vpp_pages, m, plinks.q); + ppcpu->vpp_cnt++; + if (ppcpu->vpp_cnt > VM_PERCPU_MAX) { + mtx_lock(&vm_page_queue_free_mtx); + while (ppcpu->vpp_cnt > VM_PERCPU_TARGET) { + m = TAILQ_FIRST(&ppcpu->vpp_pages); + TAILQ_REMOVE(&ppcpu->vpp_pages, m, plinks.q); + ppcpu->vpp_cnt--; + vm_phys_freecnt_adj(m, 1); +#if VM_NRESERVLEVEL > 0 + if (!vm_reserv_free_page(m)) +#else + if (TRUE) +#endif + vm_phys_free_pages(m, 0); + } + vm_page_free_wakeup(); + mtx_unlock(&vm_page_queue_free_mtx); + } + mtx_unlock(&ppcpu->vpp_lock); +} + +#endif /* VM_ENABLE_CACHE */ + /* * Try to acquire a physical address lock while a pmap is locked. If we * fail to trylock we unlock and lock the pmap directly and cache the @@ -618,6 +730,9 @@ */ vm_reserv_init(); #endif +#ifdef VM_PERCPU_FREE + vm_page_percpu_init(); +#endif return (vaddr); } @@ -1433,6 +1548,7 @@ void vm_page_cache_free(vm_object_t object, vm_pindex_t start, vm_pindex_t end) { +#ifdef VM_ENABLE_CACHE vm_page_t m; boolean_t empty; @@ -1451,6 +1567,7 @@ mtx_unlock(&vm_page_queue_free_mtx); if (object->type == OBJT_VNODE && empty) vdrop(object->handle); +#endif } /* @@ -1459,6 +1576,7 @@ * * The free page queue must be locked. */ +#ifdef VM_ENABLE_CACHE static inline vm_page_t vm_page_cache_lookup(vm_object_t object, vm_pindex_t pindex) { @@ -1466,7 +1584,7 @@ mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); return (vm_radix_lookup(&object->cache, pindex)); } - +#endif /* * Remove the given cached page from its containing object's * collection of cached pages. @@ -1500,6 +1618,7 @@ vm_page_cache_transfer(vm_object_t orig_object, vm_pindex_t offidxstart, vm_object_t new_object) { +#ifdef VM_ENABLE_CACHE vm_page_t m; /* @@ -1529,6 +1648,7 @@ vm_page_cache_turn_free(m); } mtx_unlock(&vm_page_queue_free_mtx); +#endif } /* @@ -1540,7 +1660,9 @@ boolean_t vm_page_is_cached(vm_object_t object, vm_pindex_t pindex) { +#ifdef VM_ENABLE_CACHE vm_page_t m; +#endif /* * Insertion into an object's collection of cached pages requires the @@ -1552,10 +1674,15 @@ VM_OBJECT_ASSERT_WLOCKED(object); if (__predict_true(vm_object_cache_is_empty(object))) return (FALSE); +#ifdef VM_ENABLE_CACHE mtx_lock(&vm_page_queue_free_mtx); m = vm_page_cache_lookup(object, pindex); mtx_unlock(&vm_page_queue_free_mtx); return (m != NULL); +#else + panic("Found unexpected cached page."); + return (FALSE); +#endif } /* @@ -1591,7 +1718,9 @@ vm_page_alloc(vm_object_t object, vm_pindex_t pindex, int req) { struct vnode *vp = NULL; +#ifdef VM_ENABLE_CACHE vm_object_t m_object; +#endif vm_page_t m, mpred; int flags, req_class; @@ -1619,6 +1748,12 @@ ("vm_page_alloc: pindex already allocated")); } +#ifdef VM_PERCPU_FREE + if ((m = vm_page_percpu_alloc(object)) != NULL) { + flags = 0; + goto gotit; + } +#endif /* * The page allocation request can came from consumers which already * hold the free page queue mutex, like vm_page_insert() in @@ -1629,7 +1764,8 @@ (req_class == VM_ALLOC_SYSTEM && vm_cnt.v_free_count + vm_cnt.v_cache_count > vm_cnt.v_interrupt_free_min) || (req_class == VM_ALLOC_INTERRUPT && - vm_cnt.v_free_count + vm_cnt.v_cache_count > 0)) { + vm_cnt.v_free_count + vm_cnt.v_cache_count > 0)) { +#ifdef VM_ENABLE_CACHE /* * Allocate from the free queue if the number of free pages * exceeds the minimum for the request class. @@ -1648,8 +1784,10 @@ else #endif panic("vm_page_alloc: cache page %p is missing" - " from the free queue", m); - } else if ((req & VM_ALLOC_IFCACHED) != 0) { + " from the free queue", m); + } else +#endif + if ((req & VM_ALLOC_IFCACHED) != 0) { mtx_unlock(&vm_page_queue_free_mtx); return (NULL); #if VM_NRESERVLEVEL > 0 @@ -1695,6 +1833,7 @@ ("vm_page_alloc: page %p has unexpected memattr %d", m, pmap_page_get_memattr(m))); if ((m->flags & PG_CACHED) != 0) { +#ifdef VM_ENABLE_CACHE KASSERT((m->flags & PG_ZERO) == 0, ("vm_page_alloc: cached page %p is PG_ZERO", m)); KASSERT(m->valid != 0, @@ -1708,6 +1847,9 @@ if (m_object->type == OBJT_VNODE && vm_object_cache_is_empty(m_object)) vp = m_object->handle; +#else + panic("Found unexpected cached page."); +#endif } else { KASSERT(m->valid == 0, ("vm_page_alloc: free page %p is valid", m)); @@ -1724,6 +1866,9 @@ if ((req & VM_ALLOC_ZERO) != 0) flags = PG_ZERO; flags &= m->flags; +#ifdef VM_PERCPU_FREE +gotit: +#endif if ((req & VM_ALLOC_NODUMP) != 0) flags |= PG_NODUMP; m->flags = flags; @@ -2065,7 +2210,7 @@ (req_class == VM_ALLOC_SYSTEM && vm_cnt.v_free_count + vm_cnt.v_cache_count > vm_cnt.v_interrupt_free_min) || (req_class == VM_ALLOC_INTERRUPT && - vm_cnt.v_free_count + vm_cnt.v_cache_count > 0)) + vm_cnt.v_free_count + vm_cnt.v_cache_count > 0)) m = vm_phys_alloc_freelist_pages(flind, VM_FREEPOOL_DIRECT, 0); else { mtx_unlock(&vm_page_queue_free_mtx); @@ -2335,6 +2480,7 @@ } } +#ifdef VM_ENABLE_CACHE /* * Turn a cached page into a free page, by changing its attributes. * Keep the statistics up-to-date. @@ -2355,6 +2501,7 @@ vm_cnt.v_cache_count--; vm_phys_freecnt_adj(m, 1); } +#endif /* * vm_page_free_toq: @@ -2367,6 +2514,9 @@ void vm_page_free_toq(vm_page_t m) { +#ifdef VM_PERCPU_FREE + int can_cache; +#endif if ((m->oflags & VPO_UNMANAGED) == 0) { vm_page_lock_assert(m, MA_OWNED); @@ -2380,6 +2530,13 @@ if (vm_page_sbusied(m)) panic("vm_page_free: freeing busy page %p", m); +#ifdef VM_PERCPU_FREE + can_cache = 0; + if (m->object != NULL) { + VM_OBJECT_ASSERT_LOCKED(m->object); + can_cache = ((m->object->flags & OBJ_COLORED) == 0); + } +#endif /* * Unqueue, then remove page. Note that we cannot destroy * the page here because we do not want to call the pager's @@ -2414,6 +2571,12 @@ if (pmap_page_get_memattr(m) != VM_MEMATTR_DEFAULT) pmap_page_set_memattr(m, VM_MEMATTR_DEFAULT); +#ifdef VM_PERCPU_FREE + if (can_cache) { + vm_page_percpu_free(m); + return; + } +#endif /* * Insert the page into the physical memory allocator's * cache/free page queues. @@ -2491,7 +2654,7 @@ vm_page_unwire(vm_page_t m, uint8_t queue) { - KASSERT(queue < PQ_COUNT, + KASSERT(queue < PQ_COUNT || queue == PQ_NONE, ("vm_page_unwire: invalid queue %u request for page %p", queue, m)); if ((m->oflags & VPO_UNMANAGED) == 0) @@ -2510,7 +2673,8 @@ return; if (queue == PQ_INACTIVE) m->flags &= ~PG_WINATCFLS; - vm_page_enqueue(queue, m); + if (queue != PQ_NONE) + vm_page_enqueue(queue, m); } } else panic("vm_page_unwire: page %p's wire count is zero", m); @@ -2556,6 +2720,7 @@ pq = &vm_phys_domain(m)->vmd_pagequeues[PQ_INACTIVE]; vm_pagequeue_lock(pq); m->queue = PQ_INACTIVE; + if (athead) TAILQ_INSERT_HEAD(&pq->pq_pl, m, plinks.q); else @@ -2632,7 +2797,9 @@ vm_page_cache(vm_page_t m) { vm_object_t object; +#ifdef VM_ENABLE_CACHE boolean_t cache_was_empty; +#endif vm_page_lock_assert(m, MA_OWNED); object = m->object; @@ -2654,6 +2821,7 @@ vm_page_free(m); return; } +#ifdef VM_ENABLE_CACHE KASSERT((m->flags & PG_CACHED) == 0, ("vm_page_cache: page %p is already cached", m)); @@ -2724,6 +2892,9 @@ else if (!cache_was_empty && object->resident_page_count == 0) vdrop(object->handle); } +#else /* !VM_ENABLE_CACHE */ + vm_page_free(m); +#endif } /*