Index: sys/vm/vm_page.h =================================================================== --- sys/vm/vm_page.h +++ sys/vm/vm_page.h @@ -613,11 +613,16 @@ void vm_page_bits_set(vm_page_t m, vm_page_bits_t *bits, vm_page_bits_t set); bool vm_page_blacklist_add(vm_paddr_t pa, bool verbose); void vm_page_change_lock(vm_page_t m, struct mtx **mtx); -vm_page_t vm_page_grab (vm_object_t, vm_pindex_t, int); +vm_page_t vm_page_grab(vm_object_t, vm_pindex_t, int); +vm_page_t vm_page_grab_unlocked(vm_object_t, vm_pindex_t, int); int vm_page_grab_pages(vm_object_t object, vm_pindex_t pindex, int allocflags, vm_page_t *ma, int count); +int vm_page_grab_pages_unlocked(vm_object_t object, vm_pindex_t pindex, + int allocflags, vm_page_t *ma, int count); int vm_page_grab_valid(vm_page_t *mp, vm_object_t object, vm_pindex_t pindex, int allocflags); +int vm_page_grab_valid_unlocked(vm_page_t *mp, vm_object_t object, + vm_pindex_t pindex, int allocflags); void vm_page_deactivate(vm_page_t); void vm_page_deactivate_noreuse(vm_page_t); void vm_page_dequeue(vm_page_t m); @@ -628,8 +633,10 @@ int vm_page_insert (vm_page_t, vm_object_t, vm_pindex_t); void vm_page_invalid(vm_page_t m); void vm_page_launder(vm_page_t m); -vm_page_t vm_page_lookup (vm_object_t, vm_pindex_t); +vm_page_t vm_page_lookup(vm_object_t, vm_pindex_t); +vm_page_t vm_page_lookup_unlocked(vm_object_t, vm_pindex_t, int allocflags); vm_page_t vm_page_next(vm_page_t m); +vm_page_t vm_page_next_unlocked(vm_page_t m, int allocflags); void vm_page_pqbatch_drain(void); void vm_page_pqbatch_submit(vm_page_t m, uint8_t queue); bool vm_page_pqstate_commit(vm_page_t m, vm_page_astate_t *old, Index: sys/vm/vm_page.c =================================================================== --- sys/vm/vm_page.c +++ sys/vm/vm_page.c @@ -830,19 +830,38 @@ } static bool -vm_page_acquire_flags(vm_page_t m, int allocflags) +vm_page_trybusy_flags(vm_page_t m, int allocflags) { - bool locked; if ((allocflags & (VM_ALLOC_SBUSY | VM_ALLOC_IGN_SBUSY)) != 0) - locked = vm_page_trysbusy(m); + return (vm_page_trysbusy(m)); else - locked = vm_page_tryxbusy(m); + return (vm_page_tryxbusy(m)); +} + +static inline bool +vm_page_acquire_flags(vm_page_t m, int allocflags) +{ + bool locked; + + locked = vm_page_trybusy_flags(m, allocflags); if (locked && (allocflags & VM_ALLOC_WIRED) != 0) vm_page_wire(m); return (locked); } +static inline void +vm_page_busy_return(vm_page_t m, int allocflags) +{ + + if ((allocflags & VM_ALLOC_NOBUSY) != 0) { + if ((allocflags & VM_ALLOC_IGN_SBUSY) != 0) + vm_page_sunbusy(m); + else + vm_page_xunbusy(m); + } +} + /* * vm_page_busy_sleep_flags * @@ -1650,6 +1669,59 @@ return (vm_radix_lookup(&object->rtree, pindex)); } +/* + * This should only be used by lockless functions for releasing short term + * incorrect acquires. The page may have been freed after we acquired a + * busy lock. In this case busy_lock == VPB_FREED and we have nothing + * further to do. + */ +static void +vm_page_busy_release(vm_page_t m) +{ + u_int x; + + x = m->busy_lock; + for (;;) { + if (x == VPB_FREED) + break; + if ((x & VPB_BIT_SHARED) != 0 && VPB_SHARERS(x) > 1) { + if (atomic_fcmpset_int(&m->busy_lock, &x, + x - VPB_ONE_SHARER)) + break; + continue; + } + if (!atomic_fcmpset_rel_int(&m->busy_lock, &x, VPB_UNBUSIED)) + continue; + if ((x & VPB_BIT_WAITERS) != 0) + wakeup(m); + break; + } +} + +vm_page_t +vm_page_lookup_unlocked(vm_object_t object, vm_pindex_t pindex, int allocflags) +{ + vm_page_t m; + + for (;;) { + m = vm_radix_lookup_unlocked(&object->rtree, pindex); + if (m == NULL) + break; + if (!vm_page_trybusy_flags(m, allocflags)) { + m = NULL; + break; + } + if (m->object == object && m->pindex == pindex) { + if ((allocflags & VM_ALLOC_WIRED) != 0) + vm_page_wire(m); + vm_page_busy_return(m, allocflags); + return (m); + } + vm_page_busy_release(m); + } + return (NULL); +} + /* * vm_page_find_least: * @@ -1689,6 +1761,35 @@ return (next); } +vm_page_t +vm_page_next_unlocked(vm_page_t m, int allocflags) +{ + vm_object_t object; + vm_page_t next; + vm_pindex_t pindex; + + MPASS(vm_page_busied(m) || vm_page_wired(m)); + + object = m->object; + pindex = m->pindex + 1; + for (;;) { + next = TAILQ_NEXT(m, listq); + if (next == NULL || next->pindex != pindex) + break; + if (!vm_page_trybusy_flags(next, allocflags)) + break; + if (next->object == object && next->pindex == pindex) { + if ((allocflags & VM_ALLOC_WIRED) != 0) + vm_page_wire(next); + vm_page_busy_return(next, allocflags); + return (next); + } + vm_page_busy_release(next); + cpu_spinwait(); + } + return (NULL); +} + /* * Returns the given page's predecessor (by pindex) within the object if it is * resident; if none is found, NULL is returned. @@ -4313,12 +4414,23 @@ pmap_zero_page(m); out: - if ((allocflags & VM_ALLOC_NOBUSY) != 0) { - if ((allocflags & VM_ALLOC_IGN_SBUSY) != 0) - vm_page_sunbusy(m); - else - vm_page_xunbusy(m); - } + vm_page_busy_return(m, allocflags); + + return (m); +} + +vm_page_t +vm_page_grab_unlocked(vm_object_t object, vm_pindex_t pindex, int allocflags) +{ + vm_page_t m; + + m = vm_page_lookup_unlocked(object, pindex, allocflags); + if (m != NULL) + return (m); + VM_OBJECT_WLOCK(object); + m = vm_page_grab(object, pindex, allocflags); + VM_OBJECT_WUNLOCK(object); + return (m); } @@ -4334,7 +4446,6 @@ { vm_page_t m; vm_page_t ma[VM_INITIAL_PAGEIN]; - bool sleep, xbusy; int after, i, pflags, rv; KASSERT((allocflags & VM_ALLOC_SBUSY) == 0 || @@ -4348,7 +4459,6 @@ pflags |= VM_ALLOC_WAITFAIL; retrylookup: - xbusy = false; if ((m = vm_page_lookup(object, pindex)) != NULL) { /* * If the page is fully valid it can only become invalid @@ -4359,41 +4469,27 @@ * However, we will not end up with an invalid page and a * shared lock. */ - if (!vm_page_all_valid(m) || - (allocflags & (VM_ALLOC_IGN_SBUSY | VM_ALLOC_SBUSY)) == 0) { - sleep = !vm_page_tryxbusy(m); - xbusy = true; - } else - sleep = !vm_page_trysbusy(m); - if (sleep) { + if (!vm_page_trybusy_flags(m, + vm_page_all_valid(m) ? allocflags : 0)) { (void)vm_page_busy_sleep_flags(object, m, "pgrbwt", allocflags); goto retrylookup; } - if ((allocflags & VM_ALLOC_NOCREAT) != 0 && - !vm_page_all_valid(m)) { - if (xbusy) - vm_page_xunbusy(m); - else - vm_page_sunbusy(m); + if (vm_page_all_valid(m)) + goto out; + if ((allocflags & VM_ALLOC_NOCREAT) != 0) { + vm_page_busy_release(m); *mp = NULL; return (VM_PAGER_FAIL); } - if ((allocflags & VM_ALLOC_WIRED) != 0) - vm_page_wire(m); - if (vm_page_all_valid(m)) - goto out; } else if ((allocflags & VM_ALLOC_NOCREAT) != 0) { *mp = NULL; return (VM_PAGER_FAIL); - } else if ((m = vm_page_alloc(object, pindex, pflags)) != NULL) { - xbusy = true; - } else { + } else if ((m = vm_page_alloc(object, pindex, pflags)) == NULL) { goto retrylookup; } vm_page_assert_xbusied(m); - MPASS(xbusy); if (vm_pager_has_page(object, pindex, NULL, &after)) { after = MIN(after, VM_INITIAL_PAGEIN); after = MIN(after, allocflags >> VM_ALLOC_COUNT_SHIFT); @@ -4419,8 +4515,6 @@ /* Pager may have replaced a page. */ m = ma[0]; if (rv != VM_PAGER_OK) { - if ((allocflags & VM_ALLOC_WIRED) != 0) - vm_page_unwire_noq(m); for (i = 0; i < after; i++) { if (!vm_page_wired(ma[i])) vm_page_free(ma[i]); @@ -4437,18 +4531,50 @@ vm_page_zero_invalid(m, TRUE); } out: - if ((allocflags & VM_ALLOC_NOBUSY) != 0) { - if (xbusy) - vm_page_xunbusy(m); - else - vm_page_sunbusy(m); - } - if ((allocflags & VM_ALLOC_SBUSY) != 0 && xbusy) + if ((allocflags & VM_ALLOC_WIRED) != 0) + vm_page_wire(m); + if ((allocflags & VM_ALLOC_SBUSY) != 0 && vm_page_xbusied(m)) vm_page_busy_downgrade(m); + else if ((allocflags & VM_ALLOC_NOBUSY) != 0) + vm_page_busy_release(m); *mp = m; return (VM_PAGER_OK); } +int +vm_page_grab_valid_unlocked(vm_page_t *mp, vm_object_t object, + vm_pindex_t pindex, int allocflags) +{ + vm_page_t m; + int flags; + int error; + + /* + * Attempt a lockless lookup and busy. We need at least an sbusy + * before we can inspect the valid field and return a wired page. + */ + if (allocflags & (VM_ALLOC_NOBUSY | VM_ALLOC_SBUSY)) + flags = VM_ALLOC_SBUSY; + else + flags = 0; + m = vm_page_lookup_unlocked(object, pindex, flags); + if (m != NULL) { + if (vm_page_all_valid(m)) { + if ((allocflags & VM_ALLOC_WIRED) != 0) + vm_page_wire(m); + if ((allocflags & VM_ALLOC_NOBUSY) != 0) + vm_page_sunbusy(m); + *mp = m; + return (VM_PAGER_OK); + } + vm_page_busy_release(m); + } + VM_OBJECT_WLOCK(object); + error = vm_page_grab_valid(mp, object, pindex, allocflags); + VM_OBJECT_WUNLOCK(object); + return (error); +} + /* * Return the specified range of pages from the given object. For each * page offset within the range, if a page already exists within the object @@ -4524,18 +4650,54 @@ pmap_zero_page(m); vm_page_valid(m); } - if ((allocflags & VM_ALLOC_NOBUSY) != 0) { - if ((allocflags & VM_ALLOC_IGN_SBUSY) != 0) - vm_page_sunbusy(m); - else - vm_page_xunbusy(m); - } + vm_page_busy_return(m, allocflags); ma[i] = mpred = m; m = vm_page_next(m); } return (i); } +int +vm_page_grab_pages_unlocked(vm_object_t object, vm_pindex_t pindex, + int allocflags, vm_page_t *ma, int count) +{ + vm_page_t m; + int flags; + int i; + + i = 0; + flags = allocflags; + if ((allocflags & VM_ALLOC_NOBUSY) != 0) { + flags &= ~VM_ALLOC_NOBUSY; + if ((flags & VM_ALLOC_IGN_SBUSY) != 0) + flags |= VM_ALLOC_SBUSY; + } + m = vm_page_lookup_unlocked(object, pindex, flags); + while (m != NULL) { + if (vm_page_none_valid(m) && + (allocflags & VM_ALLOC_ZERO) != 0) { + if ((m->flags & PG_ZERO) == 0) + pmap_zero_page(m); + vm_page_valid(m); + } + /* m will still be wired or busy according to flags. */ + ma[i] = m; + vm_page_busy_return(m, allocflags); + if (++i == count) + break; + m = vm_page_next_unlocked(m, flags); + } + if (i < count) { + count -= i; + pindex += i; + VM_OBJECT_WLOCK(object); + i += vm_page_grab_pages(object, pindex, allocflags, &ma[i], + count); + VM_OBJECT_WUNLOCK(object); + } + return (i); +} + /* * Mapping function for valid or dirty bits in a page. *