Index: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c =================================================================== --- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c +++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c @@ -412,33 +412,14 @@ obj = vp->v_object; zfs_vmobject_assert_wlocked(obj); - for (;;) { - if ((pp = vm_page_lookup(obj, OFF_TO_IDX(start))) != NULL && - pp->valid) { - if (vm_page_xbusied(pp)) { - /* - * Reference the page before unlocking and - * sleeping so that the page daemon is less - * likely to reclaim it. - */ - vm_page_reference(pp); - vm_page_sleep_if_xbusy(pp, "zfsmwb"); - continue; - } - vm_page_sbusy(pp); - } else if (pp != NULL) { - ASSERT(!pp->valid); - pp = NULL; - } - - if (pp != NULL) { - ASSERT3U(pp->valid, ==, VM_PAGE_BITS_ALL); - vm_object_pip_add(obj, 1); - pmap_remove_write(pp); - if (nbytes != 0) - vm_page_clear_dirty(pp, off, nbytes); - } - break; + vm_page_grab_valid(&pp, obj, OFF_TO_IDX(start), VM_ALLOC_NOCREAT | + VM_ALLOC_SBUSY | VM_ALLOC_NORMAL | VM_ALLOC_IGN_SBUSY); + if (pp != NULL) { + ASSERT3U(pp->valid, ==, VM_PAGE_BITS_ALL); + vm_object_pip_add(obj, 1); + pmap_remove_write(pp); + if (nbytes != 0) + vm_page_clear_dirty(pp, off, nbytes); } return (pp); } @@ -455,32 +436,14 @@ page_wire(vnode_t *vp, int64_t start) { vm_object_t obj; - vm_page_t pp; + vm_page_t m; obj = vp->v_object; zfs_vmobject_assert_wlocked(obj); - for (;;) { - if ((pp = vm_page_lookup(obj, OFF_TO_IDX(start))) != NULL && - pp->valid) { - if (vm_page_xbusied(pp)) { - /* - * Reference the page before unlocking and - * sleeping so that the page daemon is less - * likely to reclaim it. - */ - vm_page_reference(pp); - vm_page_sleep_if_xbusy(pp, "zfsmwb"); - continue; - } - - ASSERT3U(pp->valid, ==, VM_PAGE_BITS_ALL); - vm_page_wire(pp); - } else - pp = NULL; - break; - } - return (pp); + vm_page_grab_valid(&m, obj, OFF_TO_IDX(start), VM_ALLOC_NOCREAT | + VM_ALLOC_WIRED | VM_ALLOC_IGN_SBUSY | VM_ALLOC_NOBUSY); + return (m); } static void Index: head/sys/compat/linuxkpi/common/src/linux_page.c =================================================================== --- head/sys/compat/linuxkpi/common/src/linux_page.c +++ head/sys/compat/linuxkpi/common/src/linux_page.c @@ -286,27 +286,11 @@ panic("GFP_NOWAIT is unimplemented"); VM_OBJECT_WLOCK(obj); - page = vm_page_grab(obj, pindex, VM_ALLOC_NORMAL | VM_ALLOC_NOBUSY | - VM_ALLOC_WIRED); - if (page->valid != VM_PAGE_BITS_ALL) { - vm_page_xbusy(page); - if (vm_pager_has_page(obj, pindex, NULL, NULL)) { - rv = vm_pager_get_pages(obj, &page, 1, NULL, NULL); - if (rv != VM_PAGER_OK) { - vm_page_unwire_noq(page); - vm_page_free(page); - VM_OBJECT_WUNLOCK(obj); - return (ERR_PTR(-EINVAL)); - } - MPASS(page->valid == VM_PAGE_BITS_ALL); - } else { - pmap_zero_page(page); - page->valid = VM_PAGE_BITS_ALL; - page->dirty = 0; - } - vm_page_xunbusy(page); - } + rv = vm_page_grab_valid(&page, obj, pindex, VM_ALLOC_NORMAL | + VM_ALLOC_NOBUSY | VM_ALLOC_WIRED); VM_OBJECT_WUNLOCK(obj); + if (rv != VM_PAGER_OK) + return (ERR_PTR(-EINVAL)); return (page); } Index: head/sys/dev/drm2/ttm/ttm_bo_vm.c =================================================================== --- head/sys/dev/drm2/ttm/ttm_bo_vm.c +++ head/sys/dev/drm2/ttm/ttm_bo_vm.c @@ -231,8 +231,7 @@ } VM_OBJECT_WLOCK(vm_obj); - if (vm_page_busied(m)) { - vm_page_sleep_if_busy(m, "ttmpbs"); + if (vm_page_busy_acquire(m, VM_ALLOC_WAITFAIL) == 0) { ttm_mem_io_unlock(man); ttm_bo_unreserve(bo); goto retry; @@ -240,6 +239,7 @@ m1 = vm_page_lookup(vm_obj, OFF_TO_IDX(offset)); if (m1 == NULL) { if (vm_page_insert(m, vm_obj, OFF_TO_IDX(offset))) { + vm_page_xunbusy(m); VM_OBJECT_WUNLOCK(vm_obj); vm_wait(vm_obj); VM_OBJECT_WLOCK(vm_obj); @@ -253,7 +253,6 @@ bo, m, m1, (uintmax_t)offset)); } m->valid = VM_PAGE_BITS_ALL; - vm_page_xbusy(m); if (*mres != NULL) { KASSERT(*mres != m, ("losing %p %p", *mres, m)); vm_page_free(*mres); @@ -375,7 +374,7 @@ m = vm_page_lookup(vm_obj, i); if (m == NULL) continue; - if (vm_page_sleep_if_busy(m, "ttm_unm")) + if (vm_page_busy_acquire(m, VM_ALLOC_WAITFAIL) == 0) goto retry; cdev_pager_free_page(vm_obj, m); } Index: head/sys/dev/drm2/ttm/ttm_tt.c =================================================================== --- head/sys/dev/drm2/ttm/ttm_tt.c +++ head/sys/dev/drm2/ttm/ttm_tt.c @@ -288,20 +288,12 @@ VM_OBJECT_WLOCK(obj); vm_object_pip_add(obj, 1); for (i = 0; i < ttm->num_pages; ++i) { - from_page = vm_page_grab(obj, i, VM_ALLOC_NORMAL); - if (from_page->valid != VM_PAGE_BITS_ALL) { - if (vm_pager_has_page(obj, i, NULL, NULL)) { - rv = vm_pager_get_pages(obj, &from_page, 1, - NULL, NULL); - if (rv != VM_PAGER_OK) { - vm_page_free(from_page); - ret = -EIO; - goto err_ret; - } - } else - vm_page_zero_invalid(from_page, TRUE); + rv = vm_page_grab_valid(&from_page, obj, i, + VM_ALLOC_NORMAL | VM_ALLOC_NOBUSY); + if (rv != VM_PAGER_OK) { + ret = -EIO; + goto err_ret; } - vm_page_xunbusy(from_page); to_page = ttm->pages[i]; if (unlikely(to_page == NULL)) { ret = -ENOMEM; Index: head/sys/dev/xen/gntdev/gntdev.c =================================================================== --- head/sys/dev/xen/gntdev/gntdev.c +++ head/sys/dev/xen/gntdev/gntdev.c @@ -606,7 +606,7 @@ m = vm_page_lookup(gmap->map->mem, i); if (m == NULL) continue; - if (vm_page_sleep_if_busy(m, "pcmdum")) + if (vm_page_busy_acquire(m, VM_ALLOC_WAITFAIL) == 0) goto retry; cdev_pager_free_page(gmap->map->mem, m); } Index: head/sys/dev/xen/privcmd/privcmd.c =================================================================== --- head/sys/dev/xen/privcmd/privcmd.c +++ head/sys/dev/xen/privcmd/privcmd.c @@ -128,7 +128,7 @@ m = vm_page_lookup(map->mem, i); if (m == NULL) continue; - if (vm_page_sleep_if_busy(m, "pcmdum")) + if (vm_page_busy_acquire(m, VM_ALLOC_WAITFAIL) == 0) goto retry; cdev_pager_free_page(map->mem, m); } Index: head/sys/kern/uipc_shm.c =================================================================== --- head/sys/kern/uipc_shm.c +++ head/sys/kern/uipc_shm.c @@ -188,24 +188,13 @@ * lock to page out tobj's pages because tobj is a OBJT_SWAP * type object. */ - m = vm_page_grab(obj, idx, VM_ALLOC_NORMAL | VM_ALLOC_NOBUSY | - VM_ALLOC_WIRED); - if (m->valid != VM_PAGE_BITS_ALL) { - vm_page_xbusy(m); - if (vm_pager_has_page(obj, idx, NULL, NULL)) { - rv = vm_pager_get_pages(obj, &m, 1, NULL, NULL); - if (rv != VM_PAGER_OK) { - printf( - "uiomove_object: vm_obj %p idx %jd valid %x pager error %d\n", - obj, idx, m->valid, rv); - vm_page_unwire_noq(m); - vm_page_free(m); - VM_OBJECT_WUNLOCK(obj); - return (EIO); - } - } else - vm_page_zero_invalid(m, TRUE); - vm_page_xunbusy(m); + rv = vm_page_grab_valid(&m, obj, idx, + VM_ALLOC_NORMAL | VM_ALLOC_WIRED | VM_ALLOC_NOBUSY); + if (rv != VM_PAGER_OK) { + VM_OBJECT_WUNLOCK(obj); + printf("uiomove_object: vm_obj %p idx %jd pager error %d\n", + obj, idx, rv); + return (EIO); } VM_OBJECT_WUNLOCK(obj); error = uiomove_fromphys(&m, offset, tlen, uio); Index: head/sys/vm/vm_glue.c =================================================================== --- head/sys/vm/vm_glue.c +++ head/sys/vm/vm_glue.c @@ -219,24 +219,11 @@ { vm_page_t m; vm_pindex_t pindex; - int rv; - VM_OBJECT_WLOCK(object); pindex = OFF_TO_IDX(offset); - m = vm_page_grab(object, pindex, VM_ALLOC_NORMAL | VM_ALLOC_NOBUSY | - VM_ALLOC_WIRED); - if (m->valid != VM_PAGE_BITS_ALL) { - vm_page_xbusy(m); - rv = vm_pager_get_pages(object, &m, 1, NULL, NULL); - if (rv != VM_PAGER_OK) { - vm_page_unwire_noq(m); - vm_page_free(m); - m = NULL; - goto out; - } - vm_page_xunbusy(m); - } -out: + VM_OBJECT_WLOCK(object); + (void)vm_page_grab_valid(&m, object, pindex, + VM_ALLOC_NORMAL | VM_ALLOC_NOBUSY | VM_ALLOC_WIRED); VM_OBJECT_WUNLOCK(object); return (m); } Index: head/sys/vm/vm_object.c =================================================================== --- head/sys/vm/vm_object.c +++ head/sys/vm/vm_object.c @@ -1954,14 +1954,10 @@ VM_OBJECT_ASSERT_WLOCKED(object); for (pindex = start; pindex < end; pindex++) { - m = vm_page_grab(object, pindex, VM_ALLOC_NORMAL); - if (m->valid != VM_PAGE_BITS_ALL) { - rv = vm_pager_get_pages(object, &m, 1, NULL, NULL); - if (rv != VM_PAGER_OK) { - vm_page_free(m); - break; - } - } + rv = vm_page_grab_valid(&m, object, pindex, VM_ALLOC_NORMAL); + if (rv != VM_PAGER_OK) + break; + /* * Keep "m" busy because a subsequent iteration may unlock * the object. Index: head/sys/vm/vm_page.h =================================================================== --- head/sys/vm/vm_page.h +++ head/sys/vm/vm_page.h @@ -497,6 +497,7 @@ #define VM_ALLOC_ZERO 0x0040 /* (acfgp) Allocate a prezeroed page */ #define VM_ALLOC_NOOBJ 0x0100 /* (acg) No associated object */ #define VM_ALLOC_NOBUSY 0x0200 /* (acgp) Do not excl busy the page */ +#define VM_ALLOC_NOCREAT 0x0400 /* (gp) Don't create a page */ #define VM_ALLOC_IGN_SBUSY 0x1000 /* (gp) Ignore shared busy flag */ #define VM_ALLOC_NODUMP 0x2000 /* (ag) don't include in dump */ #define VM_ALLOC_SBUSY 0x4000 /* (acgp) Shared busy the page */ @@ -539,6 +540,7 @@ #define PS_ALL_VALID 0x2 #define PS_NONE_BUSY 0x4 +int vm_page_busy_acquire(vm_page_t m, int allocflags); void vm_page_busy_downgrade(vm_page_t m); void vm_page_busy_sleep(vm_page_t m, const char *msg, bool nonshared); void vm_page_free(vm_page_t m); @@ -565,6 +567,8 @@ vm_page_t vm_page_grab (vm_object_t, vm_pindex_t, int); int vm_page_grab_pages(vm_object_t object, vm_pindex_t pindex, int allocflags, vm_page_t *ma, int count); +int vm_page_grab_valid(vm_page_t *mp, vm_object_t object, vm_pindex_t pindex, + int allocflags); void vm_page_deactivate(vm_page_t); void vm_page_deactivate_noreuse(vm_page_t); void vm_page_dequeue(vm_page_t m); Index: head/sys/vm/vm_page.c =================================================================== --- head/sys/vm/vm_page.c +++ head/sys/vm/vm_page.c @@ -866,6 +866,66 @@ } /* + * vm_page_busy_acquire: + * + * Acquire the busy lock as described by VM_ALLOC_* flags. Will loop + * and drop the object lock if necessary. + */ +int +vm_page_busy_acquire(vm_page_t m, int allocflags) +{ + vm_object_t obj; + u_int x; + bool locked; + + /* + * The page-specific object must be cached because page + * identity can change during the sleep, causing the + * re-lock of a different object. + * It is assumed that a reference to the object is already + * held by the callers. + */ + obj = m->object; + for (;;) { + if ((allocflags & VM_ALLOC_SBUSY) == 0) { + if (vm_page_tryxbusy(m)) + return (TRUE); + } else { + if (vm_page_trysbusy(m)) + return (TRUE); + } + if ((allocflags & VM_ALLOC_NOWAIT) != 0) + return (FALSE); + if (obj != NULL) { + locked = VM_OBJECT_WOWNED(obj); + } else { + MPASS(vm_page_wired(m)); + locked = FALSE; + } + sleepq_lock(m); + x = m->busy_lock; + if (x == VPB_UNBUSIED || + ((allocflags & VM_ALLOC_SBUSY) != 0 && + (x & VPB_BIT_SHARED) != 0) || + ((x & VPB_BIT_WAITERS) == 0 && + !atomic_cmpset_int(&m->busy_lock, x, + x | VPB_BIT_WAITERS))) { + sleepq_release(m); + continue; + } + if (locked) + VM_OBJECT_WUNLOCK(obj); + sleepq_add(m, NULL, "vmpba", 0, 0); + sleepq_wait(m, PVM); + if (locked) + VM_OBJECT_WLOCK(obj); + MPASS(m->object == obj || m->object == NULL); + if ((allocflags & VM_ALLOC_WAITFAIL) != 0) + return (FALSE); + } +} + +/* * vm_page_busy_downgrade: * * Downgrade an exclusive busy page into a single shared busy page. @@ -4046,10 +4106,13 @@ * sleeping so that the page daemon is less * likely to reclaim it. */ - vm_page_aflag_set(m, PGA_REFERENCED); + if ((allocflags & VM_ALLOC_NOCREAT) == 0) + vm_page_aflag_set(m, PGA_REFERENCED); vm_page_busy_sleep(m, "pgrbwt", (allocflags & VM_ALLOC_IGN_SBUSY) != 0); VM_OBJECT_WLOCK(object); + if ((allocflags & VM_ALLOC_WAITFAIL) != 0) + return (NULL); goto retrylookup; } else { if ((allocflags & VM_ALLOC_WIRED) != 0) @@ -4057,11 +4120,13 @@ if ((allocflags & (VM_ALLOC_NOBUSY | VM_ALLOC_SBUSY)) == 0) vm_page_xbusy(m); - if ((allocflags & VM_ALLOC_SBUSY) != 0) + else if ((allocflags & VM_ALLOC_SBUSY) != 0) vm_page_sbusy(m); return (m); } } + if ((allocflags & VM_ALLOC_NOCREAT) != 0) + return (NULL); m = vm_page_alloc(object, pindex, pflags); if (m == NULL) { if ((allocflags & VM_ALLOC_NOWAIT) != 0) @@ -4074,6 +4139,109 @@ } /* + * Grab a page and make it valid, paging in if necessary. Pages missing from + * their pager are zero filled and validated. + */ +int +vm_page_grab_valid(vm_page_t *mp, vm_object_t object, vm_pindex_t pindex, int allocflags) +{ + vm_page_t m; + bool sleep, xbusy; + int pflags; + int rv; + + KASSERT((allocflags & VM_ALLOC_SBUSY) == 0 || + (allocflags & VM_ALLOC_IGN_SBUSY) != 0, + ("vm_page_grab_valid: VM_ALLOC_SBUSY/VM_ALLOC_IGN_SBUSY mismatch")); + KASSERT((allocflags & + (VM_ALLOC_NOWAIT | VM_ALLOC_WAITFAIL | VM_ALLOC_ZERO)) == 0, + ("vm_page_grab_valid: Invalid flags 0x%X", allocflags)); + VM_OBJECT_ASSERT_WLOCKED(object); + pflags = allocflags & ~(VM_ALLOC_NOBUSY | VM_ALLOC_SBUSY); + pflags |= VM_ALLOC_WAITFAIL; + +retrylookup: + xbusy = false; + if ((m = vm_page_lookup(object, pindex)) != NULL) { + /* + * If the page is fully valid it can only become invalid + * with the object lock held. If it is not valid it can + * become valid with the busy lock held. Therefore, we + * may unnecessarily lock the exclusive busy here if we + * race with I/O completion not using the object lock. + * However, we will not end up with an invalid page and a + * shared lock. + */ + if (m->valid != VM_PAGE_BITS_ALL || + (allocflags & (VM_ALLOC_IGN_SBUSY | VM_ALLOC_SBUSY)) == 0) { + sleep = !vm_page_tryxbusy(m); + xbusy = true; + } else + sleep = !vm_page_trysbusy(m); + if (sleep) { + /* + * Reference the page before unlocking and + * sleeping so that the page daemon is less + * likely to reclaim it. + */ + if ((allocflags & VM_ALLOC_NOCREAT) == 0) + vm_page_aflag_set(m, PGA_REFERENCED); + vm_page_busy_sleep(m, "pgrbwt", (allocflags & + VM_ALLOC_IGN_SBUSY) != 0); + VM_OBJECT_WLOCK(object); + goto retrylookup; + } + if ((allocflags & VM_ALLOC_NOCREAT) != 0 && + m->valid != VM_PAGE_BITS_ALL) { + if (xbusy) + vm_page_xunbusy(m); + else + vm_page_sunbusy(m); + *mp = NULL; + return (VM_PAGER_FAIL); + } + if ((allocflags & VM_ALLOC_WIRED) != 0) + vm_page_wire(m); + if (m->valid == VM_PAGE_BITS_ALL) + goto out; + } else if ((allocflags & VM_ALLOC_NOCREAT) != 0) { + *mp = NULL; + return (VM_PAGER_FAIL); + } else if ((m = vm_page_alloc(object, pindex, pflags)) != NULL) { + xbusy = true; + } else { + goto retrylookup; + } + + vm_page_assert_xbusied(m); + MPASS(xbusy); + if (vm_pager_has_page(object, pindex, NULL, NULL)) { + rv = vm_pager_get_pages(object, &m, 1, NULL, NULL); + if (rv != VM_PAGER_OK) { + if (allocflags & VM_ALLOC_WIRED) + vm_page_unwire_noq(m); + vm_page_free(m); + *mp = NULL; + return (rv); + } + MPASS(m->valid == VM_PAGE_BITS_ALL); + } else { + vm_page_zero_invalid(m, TRUE); + } +out: + if ((allocflags & VM_ALLOC_NOBUSY) != 0) { + if (xbusy) + vm_page_xunbusy(m); + else + vm_page_sunbusy(m); + } + if ((allocflags & VM_ALLOC_SBUSY) != 0 && xbusy) + vm_page_busy_downgrade(m); + *mp = m; + return (VM_PAGER_OK); +} + +/* * Return the specified range of pages from the given object. For each * page offset within the range, if a page already exists within the object * at that offset and it is busy, then wait for it to change state. If, @@ -4143,7 +4311,8 @@ * sleeping so that the page daemon is less * likely to reclaim it. */ - vm_page_aflag_set(m, PGA_REFERENCED); + if ((allocflags & VM_ALLOC_NOCREAT) == 0) + vm_page_aflag_set(m, PGA_REFERENCED); vm_page_busy_sleep(m, "grbmaw", (allocflags & VM_ALLOC_IGN_SBUSY) != 0); VM_OBJECT_WLOCK(object); @@ -4157,6 +4326,8 @@ if ((allocflags & VM_ALLOC_SBUSY) != 0) vm_page_sbusy(m); } else { + if ((allocflags & VM_ALLOC_NOCREAT) != 0) + break; m = vm_page_alloc_after(object, pindex + i, pflags | VM_ALLOC_COUNT(count - i), mpred); if (m == NULL) {