Index: sys/amd64/sgx/sgx.c =================================================================== --- sys/amd64/sgx/sgx.c +++ sys/amd64/sgx/sgx.c @@ -220,8 +220,8 @@ page = PHYS_TO_VM_PAGE(epc->phys); - vm_page_insert(page, object, idx); page->valid = VM_PAGE_BITS_ALL; + vm_page_insert(page, object, idx); } return (0); @@ -610,8 +610,8 @@ VM_OBJECT_ASSERT_WLOCKED(object); - vm_page_insert(page, object, pidx); page->valid = VM_PAGE_BITS_ALL; + vm_page_insert(page, object, pidx); } static void Index: sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu.c =================================================================== --- sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu.c +++ sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu.c @@ -1702,11 +1702,13 @@ db = dbp[0]; for (i = 0; i < *rbehind; i++) { m = vm_page_grab(vmobj, ma[0]->pindex - 1 - i, - VM_ALLOC_NORMAL | VM_ALLOC_NOWAIT | VM_ALLOC_NOBUSY); + VM_ALLOC_NORMAL | VM_ALLOC_NOWAIT | + VM_ALLOC_SBUSY | VM_ALLOC_IGN_SBUSY); if (m == NULL) break; - if (m->valid != 0) { + if (!vm_page_none_valid(m)) { ASSERT3U(m->valid, ==, VM_PAGE_BITS_ALL); + vm_page_sunbusy(m); break; } ASSERT(m->dirty == 0); @@ -1717,13 +1719,14 @@ va = zfs_map_page(m, &sf); bcopy((char *)db->db_data + bufoff, va, PAGESIZE); zfs_unmap_page(sf); - m->valid = VM_PAGE_BITS_ALL; + vm_page_valid(m); vm_page_lock(m); if ((m->busy_lock & VPB_BIT_WAITERS) != 0) vm_page_activate(m); else vm_page_deactivate(m); vm_page_unlock(m); + vm_page_sunbusy(m); } *rbehind = i; @@ -1734,7 +1737,7 @@ m = ma[mi]; if (m != bogus_page) { vm_page_assert_xbusied(m); - ASSERT(m->valid == 0); + ASSERT(vm_page_none_valid(m)); ASSERT(m->dirty == 0); ASSERT(!pmap_page_is_mapped(m)); va = zfs_map_page(m, &sf); @@ -1762,7 +1765,7 @@ if (pgoff == PAGESIZE) { if (m != bogus_page) { zfs_unmap_page(sf); - m->valid = VM_PAGE_BITS_ALL; + vm_page_valid(m); } ASSERT(mi < count); mi++; @@ -1811,16 +1814,18 @@ ASSERT(m != bogus_page); bzero(va + pgoff, PAGESIZE - pgoff); zfs_unmap_page(sf); - m->valid = VM_PAGE_BITS_ALL; + vm_page_valid(m); } for (i = 0; i < *rahead; i++) { m = vm_page_grab(vmobj, ma[count - 1]->pindex + 1 + i, - VM_ALLOC_NORMAL | VM_ALLOC_NOWAIT | VM_ALLOC_NOBUSY); + VM_ALLOC_NORMAL | VM_ALLOC_NOWAIT | + VM_ALLOC_SBUSY | VM_ALLOC_IGN_SBUSY); if (m == NULL) break; - if (m->valid != 0) { + if (!vm_page_none_valid(m)) { ASSERT3U(m->valid, ==, VM_PAGE_BITS_ALL); + vm_page_sunbusy(m); break; } ASSERT(m->dirty == 0); @@ -1837,13 +1842,14 @@ bzero(va + tocpy, PAGESIZE - tocpy); } zfs_unmap_page(sf); - m->valid = VM_PAGE_BITS_ALL; + vm_page_valid(m); vm_page_lock(m); if ((m->busy_lock & VPB_BIT_WAITERS) != 0) vm_page_activate(m); else vm_page_deactivate(m); vm_page_unlock(m); + vm_page_sunbusy(m); } *rahead = i; zfs_vmobject_wunlock(vmobj); Index: sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c =================================================================== --- sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c +++ sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c @@ -534,7 +534,7 @@ pp = vm_page_grab(obj, OFF_TO_IDX(start), VM_ALLOC_SBUSY | VM_ALLOC_NORMAL | VM_ALLOC_IGN_SBUSY); - if (pp->valid == 0) { + if (vm_page_none_valid(pp)) { zfs_vmobject_wunlock(obj); va = zfs_map_page(pp, &sf); error = dmu_read(os, zp->z_id, start, bytes, va, @@ -543,17 +543,16 @@ bzero(va + bytes, PAGESIZE - bytes); zfs_unmap_page(sf); zfs_vmobject_wlock(obj); - vm_page_sunbusy(pp); - if (error) { - if (!vm_page_busied(pp) && !vm_page_wired(pp) && - pp->valid == 0) - vm_page_free(pp); - } else { - pp->valid = VM_PAGE_BITS_ALL; + if (error == 0) { + vm_page_valid(pp); vm_page_lock(pp); vm_page_activate(pp); vm_page_unlock(pp); } + vm_page_sunbusy(pp); + if (error != 0 && !vm_page_wired(pp) == 0 && + pp->valid == 0 && vm_page_tryxbusy(pp)) + vm_page_free(pp); } else { ASSERT3U(pp->valid, ==, VM_PAGE_BITS_ALL); vm_page_sunbusy(pp); Index: sys/compat/linuxkpi/common/src/linux_compat.c =================================================================== --- sys/compat/linuxkpi/common/src/linux_compat.c +++ sys/compat/linuxkpi/common/src/linux_compat.c @@ -514,7 +514,7 @@ vm_page_free(*mres); *mres = page; } - page->valid = VM_PAGE_BITS_ALL; + vm_page_valid(page); return (VM_PAGER_OK); } return (VM_PAGER_FAIL); Index: sys/dev/drm2/ttm/ttm_bo_vm.c =================================================================== --- sys/dev/drm2/ttm/ttm_bo_vm.c +++ sys/dev/drm2/ttm/ttm_bo_vm.c @@ -252,7 +252,7 @@ ("inconsistent insert bo %p m %p m1 %p offset %jx", bo, m, m1, (uintmax_t)offset)); } - m->valid = VM_PAGE_BITS_ALL; + vm_page_valid(m); if (*mres != NULL) { KASSERT(*mres != m, ("losing %p %p", *mres, m)); vm_page_free(*mres); Index: sys/dev/drm2/ttm/ttm_tt.c =================================================================== --- sys/dev/drm2/ttm/ttm_tt.c +++ sys/dev/drm2/ttm/ttm_tt.c @@ -344,7 +344,7 @@ continue; to_page = vm_page_grab(obj, i, VM_ALLOC_NORMAL); pmap_copy_page(from_page, to_page); - to_page->valid = VM_PAGE_BITS_ALL; + vm_page_valid(to_page); vm_page_dirty(to_page); vm_page_xunbusy(to_page); } Index: sys/dev/md/md.c =================================================================== --- sys/dev/md/md.c +++ sys/dev/md/md.c @@ -1074,7 +1074,7 @@ len = ((i == lastp) ? lastend : PAGE_SIZE) - offs; m = vm_page_grab(sc->object, i, VM_ALLOC_SYSTEM); if (bp->bio_cmd == BIO_READ) { - if (m->valid == VM_PAGE_BITS_ALL) + if (vm_page_all_valid(m)) rv = VM_PAGER_OK; else rv = vm_pager_get_pages(sc->object, &m, 1, @@ -1090,7 +1090,7 @@ * can be recreated if thrown out. */ pmap_zero_page(m); - m->valid = VM_PAGE_BITS_ALL; + vm_page_valid(m); } if ((bp->bio_flags & BIO_UNMAPPED) != 0) { pmap_copy_pages(&m, offs, bp->bio_ma, @@ -1104,7 +1104,7 @@ cpu_flush_dcache(p, len); } } else if (bp->bio_cmd == BIO_WRITE) { - if (len == PAGE_SIZE || m->valid == VM_PAGE_BITS_ALL) + if (len == PAGE_SIZE || vm_page_all_valid(m)) rv = VM_PAGER_OK; else rv = vm_pager_get_pages(sc->object, &m, 1, @@ -1125,13 +1125,13 @@ physcopyin(p, VM_PAGE_TO_PHYS(m) + offs, len); } - m->valid = VM_PAGE_BITS_ALL; + vm_page_valid(m); if (m->dirty != VM_PAGE_BITS_ALL) { vm_page_dirty(m); vm_pager_page_unswapped(m); } } else if (bp->bio_cmd == BIO_DELETE) { - if (len == PAGE_SIZE || m->valid == VM_PAGE_BITS_ALL) + if (len == PAGE_SIZE || vm_page_all_valid(m)) rv = VM_PAGER_OK; else rv = vm_pager_get_pages(sc->object, &m, 1, Index: sys/dev/netmap/netmap_freebsd.c =================================================================== --- sys/dev/netmap/netmap_freebsd.c +++ sys/dev/netmap/netmap_freebsd.c @@ -1056,7 +1056,7 @@ *mres = page; vm_page_insert(page, object, pidx); } - page->valid = VM_PAGE_BITS_ALL; + vm_page_valid(page); return (VM_PAGER_OK); } Index: sys/dev/xen/gntdev/gntdev.c =================================================================== --- sys/dev/xen/gntdev/gntdev.c +++ sys/dev/xen/gntdev/gntdev.c @@ -836,8 +836,8 @@ } vm_page_busy_acquire(page, 0); + vm_page_valid(page); vm_page_insert(page, object, pidx); - page->valid = VM_PAGE_BITS_ALL; *mres = page; return (VM_PAGER_OK); } Index: sys/dev/xen/privcmd/privcmd.c =================================================================== --- sys/dev/xen/privcmd/privcmd.c +++ sys/dev/xen/privcmd/privcmd.c @@ -179,8 +179,8 @@ } vm_page_busy_acquire(page, 0); + vm_page_valid(page); vm_page_insert(page, object, pidx); - page->valid = VM_PAGE_BITS_ALL; *mres = page; return (VM_PAGER_OK); } Index: sys/fs/nfsclient/nfs_clbio.c =================================================================== --- sys/fs/nfsclient/nfs_clbio.c +++ sys/fs/nfsclient/nfs_clbio.c @@ -174,7 +174,7 @@ * XXXGL: is that true for NFS, where short read can occur??? */ VM_OBJECT_WLOCK(object); - if (pages[npages - 1]->valid != 0 && --npages == 0) + if (!vm_page_none_valid(pages[npages - 1]) && --npages == 0) goto out; VM_OBJECT_WUNLOCK(object); @@ -227,14 +227,14 @@ /* * Read operation filled an entire page */ - m->valid = VM_PAGE_BITS_ALL; + vm_page_valid(m); KASSERT(m->dirty == 0, ("nfs_getpages: page %p is dirty", m)); } else if (size > toff) { /* * Read operation filled a partial page. */ - m->valid = 0; + vm_page_invalid(m); vm_page_set_valid_range(m, 0, size - toff); KASSERT(m->dirty == 0, ("nfs_getpages: page %p is dirty", m)); Index: sys/fs/smbfs/smbfs_io.c =================================================================== --- sys/fs/smbfs/smbfs_io.c +++ sys/fs/smbfs/smbfs_io.c @@ -457,7 +457,7 @@ * XXXGL: is that true for SMB filesystem? */ VM_OBJECT_WLOCK(object); - if (pages[npages - 1]->valid != 0 && --npages == 0) + if (!vm_page_none_valid(pages[npages - 1]) && --npages == 0) goto out; VM_OBJECT_WUNLOCK(object); @@ -505,14 +505,14 @@ /* * Read operation filled an entire page */ - m->valid = VM_PAGE_BITS_ALL; + vm_page_valid(m); KASSERT(m->dirty == 0, ("smbfs_getpages: page %p is dirty", m)); } else if (size > toff) { /* * Read operation filled a partial page. */ - m->valid = 0; + vm_page_invalid(m); vm_page_set_valid_range(m, 0, size - toff); KASSERT(m->dirty == 0, ("smbfs_getpages: page %p is dirty", m)); Index: sys/fs/tmpfs/tmpfs_subr.c =================================================================== --- sys/fs/tmpfs/tmpfs_subr.c +++ sys/fs/tmpfs/tmpfs_subr.c @@ -1408,7 +1408,7 @@ retry: m = vm_page_grab(uobj, idx, VM_ALLOC_NOCREAT); if (m != NULL) { - MPASS(m->valid == VM_PAGE_BITS_ALL); + MPASS(vm_page_all_valid(m)); } else if (vm_pager_has_page(uobj, idx, NULL, NULL)) { m = vm_page_alloc(uobj, idx, VM_ALLOC_NORMAL | VM_ALLOC_WAITFAIL); Index: sys/kern/kern_exec.c =================================================================== --- sys/kern/kern_exec.c +++ sys/kern/kern_exec.c @@ -979,11 +979,15 @@ retry: ma[0] = vm_page_grab(object, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOBUSY | VM_ALLOC_WIRED); - if (ma[0]->valid != VM_PAGE_BITS_ALL) { + if (!vm_page_all_valid(ma[0])) { if (vm_page_busy_acquire(ma[0], VM_ALLOC_WAITFAIL) == 0) { vm_page_unwire_noq(ma[0]); goto retry; } + if (vm_page_all_valid(ma[0])) { + vm_page_xunbusy(ma[0]); + goto out; + } if (!vm_pager_has_page(object, 0, NULL, &after)) { if (vm_page_unwire_noq(ma[0])) vm_page_free(ma[0]); @@ -1029,6 +1033,8 @@ for (i = 1; i < initial_pagein; i++) vm_page_readahead_finish(ma[i]); } + +out: VM_OBJECT_WUNLOCK(object); imgp->firstpage = sf_buf_alloc(ma[0], 0); Index: sys/kern/uipc_shm.c =================================================================== --- sys/kern/uipc_shm.c +++ sys/kern/uipc_shm.c @@ -459,7 +459,7 @@ retry: m = vm_page_grab(object, idx, VM_ALLOC_NOCREAT); if (m != NULL) { - MPASS(m->valid == VM_PAGE_BITS_ALL); + MPASS(vm_page_all_valid(m)); } else if (vm_pager_has_page(object, idx, NULL, NULL)) { m = vm_page_alloc(object, idx, VM_ALLOC_NORMAL | VM_ALLOC_WAITFAIL); @@ -485,7 +485,7 @@ } if (m != NULL) { pmap_zero_page_area(m, base, PAGE_SIZE - base); - KASSERT(m->valid == VM_PAGE_BITS_ALL, + KASSERT(vm_page_all_valid(m), ("shm_dotruncate: page %p is invalid", m)); vm_page_dirty(m); vm_page_xunbusy(m); Index: sys/kern/vfs_bio.c =================================================================== --- sys/kern/vfs_bio.c +++ sys/kern/vfs_bio.c @@ -956,6 +956,12 @@ { VM_OBJECT_ASSERT_LOCKED(m->object); + + /* + * This function and its results are protected by higher level + * synchronization requiring vnode and buf locks to page in and + * validate pages. + */ if (bp->b_flags & B_CACHE) { int base = (foff + off) & PAGE_MASK; if (vm_page_is_valid(m, base, size) == 0) @@ -4639,7 +4645,7 @@ if (clear_modify) { pmap_remove_write(m); vfs_page_set_validclean(bp, foff, m); - } else if (m->valid == VM_PAGE_BITS_ALL && + } else if (vm_page_all_valid(m) && (bp->b_flags & B_CACHE) == 0) { bp->b_pages[i] = bogus_page; bogus = true; @@ -4680,6 +4686,14 @@ n = PAGE_SIZE - (base & PAGE_MASK); VM_OBJECT_WLOCK(bp->b_bufobj->bo_object); + + /* + * Busy may not be strictly necessary here because the pages are + * unlikely to be fully valid and the vnode lock will synchronize + * their access via getpages. It is grabbed for consistency with + * other page validation. + */ + vfs_busy_pages_acquire(bp); for (i = base / PAGE_SIZE; size > 0 && i < bp->b_npages; ++i) { m = bp->b_pages[i]; if (n > size) @@ -4689,6 +4703,7 @@ size -= n; n = PAGE_SIZE; } + vfs_busy_pages_release(bp); VM_OBJECT_WUNLOCK(bp->b_bufobj->bo_object); } @@ -4716,6 +4731,7 @@ bp->b_flags &= ~B_INVAL; bp->b_ioflags &= ~BIO_ERROR; VM_OBJECT_WLOCK(bp->b_bufobj->bo_object); + vfs_busy_pages_acquire(bp); if ((bp->b_npages == 1) && (bp->b_bufsize < PAGE_SIZE) && (bp->b_offset & PAGE_MASK) == 0) { if (bp->b_pages[0] == bogus_page) @@ -4757,6 +4773,7 @@ bp->b_pages[i]->valid |= mask; } unlock: + vfs_busy_pages_release(bp); VM_OBJECT_WUNLOCK(bp->b_bufobj->bo_object); bp->b_resid = 0; } @@ -5188,7 +5205,7 @@ * the end of the function catches the race in a * reliable way (protected by the object lock). */ - if (m->valid == VM_PAGE_BITS_ALL) + if (vm_page_all_valid(m)) continue; poff = IDX_TO_OFF(m->pindex); @@ -5218,7 +5235,7 @@ * cache pressure. */ if (buf_pager_relbuf || - m->valid != VM_PAGE_BITS_ALL) + !vm_page_all_valid(m)) bp->b_flags |= B_RELBUF; bp->b_flags &= ~B_NOCACHE; @@ -5228,12 +5245,12 @@ } } KASSERT(1 /* racy, enable for debugging */ || - m->valid == VM_PAGE_BITS_ALL || i == count - 1, + vm_page_all_valid(m) || i == count - 1, ("buf %d %p invalid", i, m)); if (i == count - 1 && lpart) { VM_OBJECT_WLOCK(object); - if (m->valid != 0 && - m->valid != VM_PAGE_BITS_ALL) + if (!vm_page_none_valid(m) && + !vm_page_all_valid(m)) vm_page_zero_invalid(m, TRUE); VM_OBJECT_WUNLOCK(object); } @@ -5260,7 +5277,7 @@ * invalidated or removed, so we must restart for * safety as well. */ - if (ma[i]->valid != VM_PAGE_BITS_ALL) + if (!vm_page_all_valid(ma[i])) redo = true; } if (redo && error == 0) Index: sys/kern/vfs_cluster.c =================================================================== --- sys/kern/vfs_cluster.c +++ sys/kern/vfs_cluster.c @@ -465,11 +465,13 @@ if (toff + tinc > PAGE_SIZE) tinc = PAGE_SIZE - toff; VM_OBJECT_ASSERT_WLOCKED(tbp->b_pages[j]->object); - if ((tbp->b_pages[j]->valid & - vm_page_bits(toff, tinc)) != 0) - break; if (vm_page_trysbusy(tbp->b_pages[j]) == 0) break; + if ((tbp->b_pages[j]->valid & + vm_page_bits(toff, tinc)) != 0) { + vm_page_sunbusy(tbp->b_pages[j]); + break; + } vm_object_pip_add(tbp->b_bufobj->bo_object, 1); off += tinc; tsize -= tinc; @@ -524,7 +526,7 @@ bp->b_pages[bp->b_npages] = m; bp->b_npages++; } - if (m->valid == VM_PAGE_BITS_ALL) + if (vm_page_all_valid(m)) tbp->b_pages[j] = bogus_page; } VM_OBJECT_WUNLOCK(tbp->b_bufobj->bo_object); @@ -548,7 +550,7 @@ VM_OBJECT_WLOCK(bp->b_bufobj->bo_object); for (j = 0; j < bp->b_npages; j++) { VM_OBJECT_ASSERT_WLOCKED(bp->b_pages[j]->object); - if (bp->b_pages[j]->valid == VM_PAGE_BITS_ALL) + if (vm_page_all_valid(bp->b_pages[j])) bp->b_pages[j] = bogus_page; } VM_OBJECT_WUNLOCK(bp->b_bufobj->bo_object); Index: sys/vm/device_pager.c =================================================================== --- sys/vm/device_pager.c +++ sys/vm/device_pager.c @@ -395,7 +395,7 @@ vm_page_free(*mres); *mres = page; } - page->valid = VM_PAGE_BITS_ALL; + vm_page_valid(page); return (VM_PAGER_OK); } Index: sys/vm/phys_pager.c =================================================================== --- sys/vm/phys_pager.c +++ sys/vm/phys_pager.c @@ -145,12 +145,12 @@ VM_OBJECT_ASSERT_WLOCKED(object); for (i = 0; i < count; i++) { - if (m[i]->valid == 0) { + if (vm_page_none_valid(m[i])) { if ((m[i]->flags & PG_ZERO) == 0) pmap_zero_page(m[i]); - m[i]->valid = VM_PAGE_BITS_ALL; + vm_page_valid(m[i]); } - KASSERT(m[i]->valid == VM_PAGE_BITS_ALL, + KASSERT(vm_page_all_valid(m[i]), ("phys_pager_getpages: partially valid page %p", m[i])); KASSERT(m[i]->dirty == 0, ("phys_pager_getpages: dirty page %p", m[i])); @@ -209,10 +209,8 @@ ahead = MIN(end - i, PHYSALLOC); m = vm_page_grab(object, i, VM_ALLOC_NORMAL | VM_ALLOC_COUNT(ahead)); - if (m->valid != VM_PAGE_BITS_ALL) { + if (!vm_page_all_valid(m)) vm_page_zero_invalid(m, TRUE); - m->valid = VM_PAGE_BITS_ALL; - } KASSERT(m->dirty == 0, ("phys_pager_populate: dirty page %p", m)); } Index: sys/vm/sg_pager.c =================================================================== --- sys/vm/sg_pager.c +++ sys/vm/sg_pager.c @@ -198,7 +198,7 @@ vm_page_free(m[0]); vm_page_unlock(m[0]); m[0] = page; - page->valid = VM_PAGE_BITS_ALL; + vm_page_valid(page); if (rbehind) *rbehind = 0; Index: sys/vm/swap_pager.c =================================================================== --- sys/vm/swap_pager.c +++ sys/vm/swap_pager.c @@ -1554,7 +1554,7 @@ * be overridden by the original caller of * getpages so don't play cute tricks here. */ - m->valid = 0; + vm_page_invalid(m); } else { /* * If a write error occurs, reactivate page @@ -1582,7 +1582,7 @@ KASSERT(m->dirty == 0, ("swp_pager_async_iodone: page %p is dirty", m)); - m->valid = VM_PAGE_BITS_ALL; + vm_page_valid(m); if (i < bp->b_pgbefore || i >= bp->b_npages - bp->b_pgafter) vm_page_readahead_finish(m); Index: sys/vm/vm_fault.c =================================================================== --- sys/vm/vm_fault.c +++ sys/vm/vm_fault.c @@ -211,6 +211,7 @@ return; VM_OBJECT_ASSERT_LOCKED(m->object); + VM_PAGE_OBJECT_BUSY_ASSERT(m); need_dirty = ((fault_type & VM_PROT_WRITE) != 0 && (fault_flags & VM_FAULT_WIRE) == 0) || @@ -285,7 +286,7 @@ m = vm_page_lookup(fs->first_object, fs->first_pindex); /* A busy page can be mapped for read|execute access. */ if (m == NULL || ((prot & VM_PROT_WRITE) != 0 && - vm_page_busied(m)) || m->valid != VM_PAGE_BITS_ALL) { + vm_page_busied(m)) || !vm_page_all_valid(m)) { rv = KERN_FAILURE; goto out; } @@ -368,7 +369,7 @@ * valid, and exclusively busied. */ MPASS(m != NULL); - MPASS(m->valid == VM_PAGE_BITS_ALL); + MPASS(vm_page_all_valid(m)); MPASS(vm_page_xbusied(m)); } @@ -830,7 +831,7 @@ * (readable), jump to readrest, else break-out ( we * found the page ). */ - if (fs.m->valid != VM_PAGE_BITS_ALL) + if (!vm_page_all_valid(fs.m)) goto readrest; break; /* break to PAGE HAS BEEN FOUND */ } @@ -1154,7 +1155,7 @@ VM_CNT_INC(v_ozfod); } VM_CNT_INC(v_zfod); - fs.m->valid = VM_PAGE_BITS_ALL; + vm_page_valid(fs.m); /* Don't try to prefault neighboring pages. */ faultcount = 1; break; /* break to PAGE HAS BEEN FOUND */ @@ -1245,7 +1246,7 @@ * Oh, well, lets copy it. */ pmap_copy_page(fs.m, fs.first_m); - fs.first_m->valid = VM_PAGE_BITS_ALL; + vm_page_valid(fs.first_m); if (wired && (fault_flags & VM_FAULT_WIRE) == 0) { vm_page_wire(fs.first_m); @@ -1364,7 +1365,7 @@ * Page must be completely valid or it is not fit to * map into user space. vm_pager_get_pages() ensures this. */ - KASSERT(fs.m->valid == VM_PAGE_BITS_ALL, + KASSERT(vm_page_all_valid(fs.m), ("vm_fault: page %p partially invalid", fs.m)); VM_OBJECT_WUNLOCK(fs.object); @@ -1480,7 +1481,7 @@ entry->start); while ((m = m_next) != NULL && m->pindex < pend) { m_next = TAILQ_NEXT(m, listq); - if (m->valid != VM_PAGE_BITS_ALL || + if (!vm_page_all_valid(m) || vm_page_busied(m)) continue; @@ -1577,7 +1578,7 @@ VM_OBJECT_RUNLOCK(lobject); break; } - if (m->valid == VM_PAGE_BITS_ALL && + if (vm_page_all_valid(m) && (m->flags & PG_FICTITIOUS) == 0) pmap_enter_quick(pmap, addr, m, entry->protection); if (!obj_locked || lobject != entry->object.vm_object) @@ -1852,7 +1853,7 @@ * all copies of the wired map entry have similar * backing pages. */ - if (dst_m->valid == VM_PAGE_BITS_ALL) { + if (vm_page_all_valid(dst_m)) { pmap_enter(dst_map->pmap, vaddr, dst_m, prot, access | (upgrade ? PMAP_ENTER_WIRED : 0), 0); } Index: sys/vm/vm_map.c =================================================================== --- sys/vm/vm_map.c +++ sys/vm/vm_map.c @@ -2397,7 +2397,7 @@ psize = tmpidx; break; } - if (p->valid == VM_PAGE_BITS_ALL) { + if (vm_page_all_valid(p)) { if (p_start == NULL) { start = addr + ptoa(tmpidx); p_start = p; Index: sys/vm/vm_mmap.c =================================================================== --- sys/vm/vm_mmap.c +++ sys/vm/vm_mmap.c @@ -893,7 +893,7 @@ } } else vm_page_unlock(m); - KASSERT(m->valid == VM_PAGE_BITS_ALL, + KASSERT(vm_page_all_valid(m), ("mincore: page %p is mapped but invalid", m)); } else if (mincoreinfo == 0) { @@ -915,7 +915,7 @@ pindex = OFF_TO_IDX(current->offset + (addr - current->start)); m = vm_page_lookup(object, pindex); - if (m != NULL && m->valid == 0) + if (m != NULL && vm_page_none_valid(m)) m = NULL; if (m != NULL) mincoreinfo = MINCORE_INCORE; Index: sys/vm/vm_object.c =================================================================== --- sys/vm/vm_object.c +++ sys/vm/vm_object.c @@ -841,7 +841,7 @@ if (pi >= tend) break; np = TAILQ_NEXT(p, listq); - if (p->valid == 0) + if (vm_page_none_valid(p)) continue; if (vm_page_busy_acquire(p, VM_ALLOC_WAITFAIL) == 0) { if (object->generation != curgeneration) { @@ -1161,10 +1161,10 @@ } /* - * If the page is not in a normal state, skip it. + * If the page is not in a normal state, skip it. The page + * can not be invalidated while the object lock is held. */ - if (tm->valid != VM_PAGE_BITS_ALL || - vm_page_wired(tm)) + if (!vm_page_all_valid(tm) || vm_page_wired(tm)) goto next_pindex; KASSERT((tm->flags & PG_FICTITIOUS) == 0, ("vm_object_madvise: page %p is fictitious", tm)); @@ -1488,7 +1488,11 @@ * object and we might as well give up now. */ pp = vm_page_lookup(object, new_pindex); - if ((pp == NULL || pp->valid == 0) && + /* + * The valid check here is stable due to object lock being + * required to clear valid and initiate paging. + */ + if ((pp == NULL || vm_page_none_valid(pp)) && !vm_pager_has_page(object, new_pindex, NULL, NULL)) return (false); } @@ -1567,7 +1571,7 @@ continue; } - KASSERT(pp == NULL || pp->valid != 0, + KASSERT(pp == NULL || !vm_page_none_valid(pp), ("unbusy invalid page %p", pp)); if (pp != NULL || vm_pager_has_page(object, new_pindex, NULL, @@ -1894,7 +1898,7 @@ object->ref_count != 0) pmap_remove_all(p); if ((options & OBJPR_CLEANONLY) == 0) { - p->valid = 0; + vm_page_invalid(p); vm_page_undirty(p); } vm_page_xunbusy(p); @@ -1902,7 +1906,8 @@ } KASSERT((p->flags & PG_FICTITIOUS) == 0, ("vm_object_page_remove: page %p is fictitious", p)); - if ((options & OBJPR_CLEANONLY) != 0 && p->valid != 0) { + if ((options & OBJPR_CLEANONLY) != 0 && + !vm_page_none_valid(p)) { if ((options & OBJPR_NOTMAPPED) == 0 && object->ref_count != 0 && !vm_page_try_remove_write(p)) Index: sys/vm/vm_page.h =================================================================== --- sys/vm/vm_page.h +++ sys/vm/vm_page.h @@ -92,31 +92,66 @@ * and sundry status bits. * * In general, operations on this structure's mutable fields are - * synchronized using either one of or a combination of the lock on the - * object that the page belongs to (O), the page lock (P), - * the per-domain lock for the free queues (F), or the page's queue - * lock (Q). The physical address of a page is used to select its page - * lock from a pool. The queue lock for a page depends on the value of - * its queue field and described in detail below. If a field is - * annotated below with two of these locks, then holding either lock is - * sufficient for read access, but both locks are required for write - * access. An annotation of (C) indicates that the field is immutable. - * An annotation of (A) indicates that modifications to the field must - * be atomic. Accesses to such fields may require additional - * synchronization depending on the context. + * synchronized using either one of or a combination of locks. If a + * field is annotated with two of these locks then holding either is + * sufficient for read access but both are required for write access. + * The physical address of a page is used to select its page lock from + * a pool. The queue lock for a page depends on the value of its queue + * field and is described in detail below. + * + * The following annotations are possible: + * (A) the field is atomic and may require additional synchronization. + * (B) the page busy lock. + * (C) the field is immutable. + * (F) the per-domain lock for the free queues + * (M) Machine dependent, defined by pmap layer. + * (O) the object that the page belongs to. + * (P) the page lock. + * (Q) the page's queue lock. + * + * The busy lock is an embedded reader-writer lock that protects the + * page's contents and identity (i.e., its tuple) as + * well as certain valid/dirty modifications. To avoid bloating the + * the page structure, the busy lock lacks some of the features available + * the kernel's general-purpose synchronization primitives. As a result, + * busy lock ordering rules are not verified, lock recursion is not + * detected, and an attempt to xbusy a busy page or sbusy an xbusy page + * results will trigger a panic rather than causing the thread to block. + * vm_page_sleep_if_busy() can be used to sleep until the page's busy + * state changes, after which the caller must re-lookup the page and + * re-evaluate its state. vm_page_busy_acquire() will block until + * the lock is acquired. + * + * The valid field is protected by the page busy lock (B) and object + * lock (O). Transitions from invalid to valid are generally done + * via I/O or zero filling and do not require the object lock. + * These must be protected with the busy lock to prevent page-in or + * creation races. Page invalidation generally happens as a result + * of truncate or msync. When invalidated, pages must not be present + * in pmap and must hold the object lock to prevent concurrent + * speculative read-only mappings that do not require busy. I/O + * routines may check for validity without a lock if they are prepared + * to handle invalidation races with higher level locks (vnode) or are + * unconcerned with races so long as they hold a reference to prevent + * recycling. When a valid bit is set while holding a shared busy + * lock (A) atomic operations are used to protect against concurrent + * modification. * * In contrast, the synchronization of accesses to the page's - * dirty field is machine dependent (M). In the - * machine-independent layer, the lock on the object that the - * page belongs to must be held in order to operate on the field. - * However, the pmap layer is permitted to set all bits within - * the field without holding that lock. If the underlying - * architecture does not support atomic read-modify-write + * dirty field is a mix of machine dependent (M) and busy (B). In + * the machine-independent layer, the page busy must be held to + * operate on the field. However, the pmap layer is permitted to + * set all bits within the field without holding that lock. If the + * underlying architecture does not support atomic read-modify-write * operations on the field's type, then the machine-independent * layer uses a 32-bit atomic on the aligned 32-bit word that * contains the dirty field. In the machine-independent layer, * the implementation of read-modify-write operations on the - * field is encapsulated in vm_page_clear_dirty_mask(). + * field is encapsulated in vm_page_clear_dirty_mask(). An + * exclusive busy lock combined with pmap_remove_{write/all}() is the + * only way to ensure a page can not become dirty. I/O generally + * removes the page from pmap to ensure exclusive access and atomic + * writes. * * The ref_count field tracks references to the page. References that * prevent the page from being reclaimable are called wirings and are @@ -136,19 +171,6 @@ * The page daemon must therefore handle the possibility of a concurrent * free of the page. * - * The busy lock is an embedded reader-writer lock which protects the - * page's contents and identity (i.e., its tuple) and - * interlocks with the object lock (O). In particular, a page may be - * busied or unbusied only with the object write lock held. To avoid - * bloating the page structure, the busy lock lacks some of the - * features available to the kernel's general-purpose synchronization - * primitives. As a result, busy lock ordering rules are not verified, - * lock recursion is not detected, and an attempt to xbusy a busy page - * or sbusy an xbusy page results will trigger a panic rather than - * causing the thread to block. vm_page_sleep_if_busy() can be used to - * sleep until the page's busy state changes, after which the caller - * must re-lookup the page and re-evaluate its state. - * * The queue field is the index of the page queue containing the page, * or PQ_NONE if the page is not enqueued. The queue lock of a page is * the page queue lock corresponding to the page queue index, or the @@ -215,7 +237,7 @@ uint16_t flags; /* page PG_* flags (P) */ uint8_t order; /* index of the buddy queue (F) */ uint8_t pool; /* vm_phys freepool index (F) */ - uint8_t aflags; /* access is atomic */ + uint8_t aflags; /* atomic flags (A) */ uint8_t oflags; /* page VPO_* flags (O) */ uint8_t queue; /* page queue index (Q) */ int8_t psind; /* pagesizes[] index (O) */ @@ -223,8 +245,8 @@ u_char act_count; /* page usage count (P) */ /* NOTE that these must support one bit per DEV_BSIZE in a page */ /* so, on normal X86 kernels, they must be at least 8 bits wide */ - vm_page_bits_t valid; /* map of valid DEV_BSIZE chunks (O) */ - vm_page_bits_t dirty; /* map of dirty DEV_BSIZE chunks (M) */ + vm_page_bits_t valid; /* valid DEV_BSIZE chunk map (O,B) */ + vm_page_bits_t dirty; /* dirty DEV_BSIZE chunk map (M,B) */ }; /* @@ -579,6 +601,7 @@ vm_page_t vm_page_getfake(vm_paddr_t paddr, vm_memattr_t memattr); void vm_page_initfake(vm_page_t m, vm_paddr_t paddr, vm_memattr_t memattr); int vm_page_insert (vm_page_t, vm_object_t, vm_pindex_t); +void vm_page_invalid(vm_page_t m); void vm_page_launder(vm_page_t m); vm_page_t vm_page_lookup (vm_object_t, vm_pindex_t); vm_page_t vm_page_next(vm_page_t m); @@ -625,10 +648,11 @@ bool vm_page_wire_mapped(vm_page_t m); void vm_page_xunbusy_hard(vm_page_t m); void vm_page_set_validclean (vm_page_t, int, int); -void vm_page_clear_dirty (vm_page_t, int, int); -void vm_page_set_invalid (vm_page_t, int, int); -int vm_page_is_valid (vm_page_t, int, int); -void vm_page_test_dirty (vm_page_t); +void vm_page_clear_dirty(vm_page_t, int, int); +void vm_page_set_invalid(vm_page_t, int, int); +void vm_page_valid(vm_page_t m); +int vm_page_is_valid(vm_page_t, int, int); +void vm_page_test_dirty(vm_page_t); vm_page_bits_t vm_page_bits(int base, int size); void vm_page_zero_invalid(vm_page_t m, boolean_t setvalid); void vm_page_free_toq(vm_page_t m); @@ -928,5 +952,19 @@ return (VPRC_WIRE_COUNT(m->ref_count) > 0); } +static inline bool +vm_page_all_valid(vm_page_t m) +{ + + return (m->valid == VM_PAGE_BITS_ALL); +} + +static inline bool +vm_page_none_valid(vm_page_t m) +{ + + return (m->valid == 0); +} + #endif /* _KERNEL */ #endif /* !_VM_PAGE_ */ Index: sys/vm/vm_page.c =================================================================== --- sys/vm/vm_page.c +++ sys/vm/vm_page.c @@ -1329,7 +1329,7 @@ { /* We shouldn't put invalid pages on queues. */ - KASSERT(m->valid != 0, ("%s: %p is invalid", __func__, m)); + KASSERT(!vm_page_none_valid(m), ("%s: %p is invalid", __func__, m)); /* * Since the page is not the actually needed one, whether it should @@ -1429,8 +1429,7 @@ { /* Refer to this operation by its public name. */ - KASSERT(m->valid == VM_PAGE_BITS_ALL, - ("vm_page_dirty: page is invalid!")); + KASSERT(vm_page_all_valid(m), ("vm_page_dirty: page is invalid!")); m->dirty = VM_PAGE_BITS_ALL; } @@ -2656,7 +2655,7 @@ VPO_SWAPSLEEP | VPO_UNMANAGED)) == 0, ("page %p has unexpected oflags", m)); /* Don't care: VPO_NOSYNC. */ - if (m->valid != 0) { + if (!vm_page_none_valid(m)) { /* * First, try to allocate a new page * that is above "high". Failing @@ -4332,7 +4331,7 @@ * However, we will not end up with an invalid page and a * shared lock. */ - if (m->valid != VM_PAGE_BITS_ALL || + if (!vm_page_all_valid(m) || (allocflags & (VM_ALLOC_IGN_SBUSY | VM_ALLOC_SBUSY)) == 0) { sleep = !vm_page_tryxbusy(m); xbusy = true; @@ -4352,7 +4351,7 @@ goto retrylookup; } if ((allocflags & VM_ALLOC_NOCREAT) != 0 && - m->valid != VM_PAGE_BITS_ALL) { + !vm_page_all_valid(m)) { if (xbusy) vm_page_xunbusy(m); else @@ -4362,7 +4361,7 @@ } if ((allocflags & VM_ALLOC_WIRED) != 0) vm_page_wire(m); - if (m->valid == VM_PAGE_BITS_ALL) + if (vm_page_all_valid(m)) goto out; } else if ((allocflags & VM_ALLOC_NOCREAT) != 0) { *mp = NULL; @@ -4384,7 +4383,7 @@ *mp = NULL; return (rv); } - MPASS(m->valid == VM_PAGE_BITS_ALL); + MPASS(vm_page_all_valid(m)); } else { vm_page_zero_invalid(m, TRUE); } @@ -4497,10 +4496,11 @@ goto retrylookup; } } - if (m->valid == 0 && (allocflags & VM_ALLOC_ZERO) != 0) { + if (vm_page_none_valid(m) && + (allocflags & VM_ALLOC_ZERO) != 0) { if ((m->flags & PG_ZERO) == 0) pmap_zero_page(m); - m->valid = VM_PAGE_BITS_ALL; + vm_page_valid(m); } if ((allocflags & VM_ALLOC_NOBUSY) != 0) { if ((allocflags & VM_ALLOC_IGN_SBUSY) != 0) @@ -4540,6 +4540,72 @@ ((vm_page_bits_t)1 << first_bit)); } +static inline void +vm_page_bits_set(vm_page_t m, vm_page_bits_t *bits, vm_page_bits_t set) +{ + +#if PAGE_SIZE == 32768 + atomic_set_64((uint64_t *)bits, set); +#elif PAGE_SIZE == 16384 + atomic_set_32((uint32_t *)bits, set); +#elif (PAGE_SIZE == 8192) && defined(atomic_set_16) + atomic_set_16((uint16_t *)bits, set); +#elif (PAGE_SIZE == 4096) && defined(atomic_set_8) + atomic_set_8((uint8_t *)bits, set); +#else /* PAGE_SIZE <= 8192 */ + uintptr_t addr; + int shift; + + addr = (uintptr_t)bits; + /* + * Use a trick to perform a 32-bit atomic on the + * containing aligned word, to not depend on the existence + * of atomic_{set, clear}_{8, 16}. + */ + shift = addr & (sizeof(uint32_t) - 1); +#if BYTE_ORDER == BIG_ENDIAN + shift = (sizeof(uint32_t) - sizeof(vm_page_bits_t) - shift) * NBBY; +#else + shift *= NBBY; +#endif + addr &= ~(sizeof(uint32_t) - 1); + atomic_set_32((uint32_t *)addr, set << shift); +#endif /* PAGE_SIZE */ +} + +static inline void +vm_page_bits_clear(vm_page_t m, vm_page_bits_t *bits, vm_page_bits_t clear) +{ + +#if PAGE_SIZE == 32768 + atomic_clear_64((uint64_t *)bits, clear); +#elif PAGE_SIZE == 16384 + atomic_clear_32((uint32_t *)bits, clear); +#elif (PAGE_SIZE == 8192) && defined(atomic_clear_16) + atomic_clear_16((uint16_t *)bits, clear); +#elif (PAGE_SIZE == 4096) && defined(atomic_clear_8) + atomic_clear_8((uint8_t *)bits, clear); +#else /* PAGE_SIZE <= 8192 */ + uintptr_t addr; + int shift; + + addr = (uintptr_t)bits; + /* + * Use a trick to perform a 32-bit atomic on the + * containing aligned word, to not depend on the existence + * of atomic_{set, clear}_{8, 16}. + */ + shift = addr & (sizeof(uint32_t) - 1); +#if BYTE_ORDER == BIG_ENDIAN + shift = (sizeof(uint32_t) - sizeof(vm_page_bits_t) - shift) * NBBY; +#else + shift *= NBBY; +#endif + addr &= ~(sizeof(uint32_t) - 1); + atomic_clear_32((uint32_t *)addr, clear << shift); +#endif /* PAGE_SIZE */ +} + /* * vm_page_set_valid_range: * @@ -4554,8 +4620,9 @@ vm_page_set_valid_range(vm_page_t m, int base, int size) { int endoff, frag; + vm_page_bits_t pagebits; - VM_OBJECT_ASSERT_WLOCKED(m->object); + vm_page_assert_busied(m); if (size == 0) /* handle degenerate case */ return; @@ -4589,7 +4656,11 @@ /* * Set valid bits inclusive of any overlap. */ - m->valid |= vm_page_bits(base, size); + pagebits = vm_page_bits(base, size); + if (vm_page_xbusied(m)) + m->valid |= pagebits; + else + vm_page_bits_set(m, &m->valid, pagebits); } /* @@ -4598,52 +4669,20 @@ static __inline void vm_page_clear_dirty_mask(vm_page_t m, vm_page_bits_t pagebits) { - uintptr_t addr; -#if PAGE_SIZE < 16384 - int shift; -#endif + + vm_page_assert_busied(m); /* - * If the object is locked and the page is neither exclusive busy nor - * write mapped, then the page's dirty field cannot possibly be - * set by a concurrent pmap operation. + * If the page is xbusied and not write mapped we are the + * only thread that can modify dirty bits. Otherwise, The pmap + * layer can call vm_page_dirty() without holding a distinguished + * lock. The combination of page busy and atomic operations + * suffice to guarantee consistency of the page dirty field. */ - VM_OBJECT_ASSERT_WLOCKED(m->object); - if (!vm_page_xbusied(m) && !pmap_page_is_write_mapped(m)) + if (vm_page_xbusied(m) && !pmap_page_is_write_mapped(m)) m->dirty &= ~pagebits; - else { - /* - * The pmap layer can call vm_page_dirty() without - * holding a distinguished lock. The combination of - * the object's lock and an atomic operation suffice - * to guarantee consistency of the page dirty field. - * - * For PAGE_SIZE == 32768 case, compiler already - * properly aligns the dirty field, so no forcible - * alignment is needed. Only require existence of - * atomic_clear_64 when page size is 32768. - */ - addr = (uintptr_t)&m->dirty; -#if PAGE_SIZE == 32768 - atomic_clear_64((uint64_t *)addr, pagebits); -#elif PAGE_SIZE == 16384 - atomic_clear_32((uint32_t *)addr, pagebits); -#else /* PAGE_SIZE <= 8192 */ - /* - * Use a trick to perform a 32-bit atomic on the - * containing aligned word, to not depend on the existence - * of atomic_clear_{8, 16}. - */ - shift = addr & (sizeof(uint32_t) - 1); -#if BYTE_ORDER == BIG_ENDIAN - shift = (sizeof(uint32_t) - sizeof(m->dirty) - shift) * NBBY; -#else - shift *= NBBY; -#endif - addr &= ~(sizeof(uint32_t) - 1); - atomic_clear_32((uint32_t *)addr, pagebits << shift); -#endif /* PAGE_SIZE */ - } + else + vm_page_bits_clear(m, &m->dirty, pagebits); } /* @@ -4662,7 +4701,9 @@ vm_page_bits_t oldvalid, pagebits; int endoff, frag; + /* Object lock for VPO_NOSYNC */ VM_OBJECT_ASSERT_WLOCKED(m->object); + vm_page_assert_busied(m); if (size == 0) /* handle degenerate case */ return; @@ -4699,7 +4740,10 @@ */ oldvalid = m->valid; pagebits = vm_page_bits(base, size); - m->valid |= pagebits; + if (vm_page_xbusied(m)) + m->valid |= pagebits; + else + vm_page_bits_set(m, &m->valid, pagebits); #if 0 /* NOT YET */ if ((frag = base & (DEV_BSIZE - 1)) != 0) { frag = DEV_BSIZE - frag; @@ -4728,7 +4772,7 @@ pmap_clear_modify(m); m->dirty = 0; m->oflags &= ~VPO_NOSYNC; - } else if (oldvalid != VM_PAGE_BITS_ALL) + } else if (oldvalid != VM_PAGE_BITS_ALL && vm_page_xbusied(m)) m->dirty &= ~pagebits; else vm_page_clear_dirty_mask(m, pagebits); @@ -4753,21 +4797,53 @@ vm_page_bits_t bits; vm_object_t object; + /* + * The object lock is required so that pages can't be mapped + * read-only while we're in the process of invalidating them. + */ object = m->object; VM_OBJECT_ASSERT_WLOCKED(object); + vm_page_assert_busied(m); + if (object->type == OBJT_VNODE && base == 0 && IDX_TO_OFF(m->pindex) + size >= object->un_pager.vnp.vnp_size) bits = VM_PAGE_BITS_ALL; else bits = vm_page_bits(base, size); - if (object->ref_count != 0 && m->valid == VM_PAGE_BITS_ALL && - bits != 0) + if (object->ref_count != 0 && vm_page_all_valid(m) && bits != 0) pmap_remove_all(m); - KASSERT((bits == 0 && m->valid == VM_PAGE_BITS_ALL) || + KASSERT((bits == 0 && vm_page_all_valid(m)) || !pmap_page_is_mapped(m), ("vm_page_set_invalid: page %p is mapped", m)); - m->valid &= ~bits; - m->dirty &= ~bits; + if (vm_page_xbusied(m)) { + m->valid &= ~bits; + m->dirty &= ~bits; + } else { + vm_page_bits_clear(m, &m->valid, bits); + vm_page_bits_clear(m, &m->dirty, bits); + } +} + +/* + * vm_page_invalid: + * + * Invalidates the entire page. The page must be busy, unmapped, and + * the enclosing object must be locked. The object locks protects + * against concurrent read-only pmap enter which is done without + * busy. + */ +void +vm_page_invalid(vm_page_t m) +{ + + vm_page_assert_busied(m); + VM_OBJECT_ASSERT_LOCKED(m->object); + MPASS(!pmap_page_is_mapped(m)); + + if (vm_page_xbusied(m)) + m->valid = 0; + else + vm_page_bits_clear(m, &m->valid, VM_PAGE_BITS_ALL); } /* @@ -4787,7 +4863,6 @@ int b; int i; - VM_OBJECT_ASSERT_WLOCKED(m->object); /* * Scan the valid bits looking for invalid sections that * must be zeroed. Invalid sub-DEV_BSIZE'd areas ( where the @@ -4811,7 +4886,7 @@ * issues. e.g. it is ok to do with UFS, but not ok to do with NFS. */ if (setvalid) - m->valid = VM_PAGE_BITS_ALL; + vm_page_valid(m); } /* @@ -4820,13 +4895,16 @@ * Is (partial) page valid? Note that the case where size == 0 * will return FALSE in the degenerate case where the page is * entirely invalid, and TRUE otherwise. + * + * Some callers envoke this routine without the busy lock held and + * handle races via higher level locks. Typical callers should + * hold a busy lock to prevent invalidation. */ int vm_page_is_valid(vm_page_t m, int base, int size) { vm_page_bits_t bits; - VM_OBJECT_ASSERT_LOCKED(m->object); bits = vm_page_bits(base, size); return (m->valid != 0 && (m->valid & bits) == bits); } @@ -4884,11 +4962,22 @@ vm_page_test_dirty(vm_page_t m) { - VM_OBJECT_ASSERT_WLOCKED(m->object); + vm_page_assert_busied(m); if (m->dirty != VM_PAGE_BITS_ALL && pmap_is_modified(m)) vm_page_dirty(m); } +void +vm_page_valid(vm_page_t m) +{ + + vm_page_assert_busied(m); + if (vm_page_xbusied(m)) + m->valid = VM_PAGE_BITS_ALL; + else + vm_page_bits_set(m, &m->valid, VM_PAGE_BITS_ALL); +} + void vm_page_lock_KBI(vm_page_t m, const char *file, int line) { Index: sys/vm/vm_pageout.c =================================================================== --- sys/vm/vm_pageout.c +++ sys/vm/vm_pageout.c @@ -469,7 +469,7 @@ * edge case with file fragments. */ for (i = 0; i < count; i++) { - KASSERT(mc[i]->valid == VM_PAGE_BITS_ALL, + KASSERT(vm_page_all_valid(mc[i]), ("vm_pageout_flush: partially invalid page %p index %d/%d", mc[i], i, count)); KASSERT((mc[i]->aflags & PGA_WRITEABLE) == 0, @@ -829,7 +829,7 @@ * Invalid pages can be easily freed. They cannot be * mapped; vm_page_free() asserts this. */ - if (m->valid == 0) + if (vm_page_none_valid(m)) goto free_page; /* @@ -1560,7 +1560,7 @@ * Invalid pages can be easily freed. They cannot be * mapped, vm_page_free() asserts this. */ - if (m->valid == 0) + if (vm_page_none_valid(m)) goto free_page; /* Index: sys/vm/vm_swapout.c =================================================================== --- sys/vm/vm_swapout.c +++ sys/vm/vm_swapout.c @@ -586,14 +586,14 @@ pages); for (i = 0; i < pages;) { vm_page_assert_xbusied(ma[i]); - if (ma[i]->valid == VM_PAGE_BITS_ALL) { + if (vm_page_all_valid(ma[i])) { vm_page_xunbusy(ma[i]); i++; continue; } vm_object_pip_add(ksobj, 1); for (j = i + 1; j < pages; j++) - if (ma[j]->valid == VM_PAGE_BITS_ALL) + if (vm_page_all_valid(ma[j])) break; rv = vm_pager_has_page(ksobj, ma[i]->pindex, NULL, &a); KASSERT(rv == 1, ("%s: missing page %p", __func__, ma[i])); Index: sys/vm/vnode_pager.c =================================================================== --- sys/vm/vnode_pager.c +++ sys/vm/vnode_pager.c @@ -471,9 +471,12 @@ * completely invalid page and mark it partially valid * it can screw up NFS reads, so we don't allow the case. */ - if ((nsize & PAGE_MASK) && - (m = vm_page_lookup(object, OFF_TO_IDX(nsize))) != NULL && - m->valid != 0) { + if (!(nsize & PAGE_MASK)) + goto out; + m = vm_page_grab(object, OFF_TO_IDX(nsize), VM_ALLOC_NOCREAT); + if (m == NULL) + goto out; + if (!vm_page_none_valid(m)) { int base = (int)nsize & PAGE_MASK; int size = PAGE_SIZE - base; @@ -506,7 +509,9 @@ */ vm_page_clear_dirty(m, base, PAGE_SIZE - base); } + vm_page_xunbusy(m); } +out: object->un_pager.vnp.vnp_size = nsize; object->size = nobjsize; VM_OBJECT_WUNLOCK(object); @@ -701,7 +706,7 @@ } KASSERT(m->dirty == 0, ("vnode_pager_input_old: page %p is dirty", m)); if (!error) - m->valid = VM_PAGE_BITS_ALL; + vm_page_valid(m); return error ? VM_PAGER_ERROR : VM_PAGER_OK; } @@ -810,7 +815,7 @@ * exist at the end of file, and the page is made fully valid * by zeroing in vm_pager_get_pages(). */ - if (m[count - 1]->valid != 0 && --count == 0) { + if (!vm_page_none_valid(m[count - 1]) && --count == 0) { if (iodone != NULL) iodone(arg, m, 1, 0); return (VM_PAGER_OK); @@ -870,7 +875,7 @@ KASSERT(m[0]->dirty == 0, ("%s: page %p is dirty", __func__, m[0])); VM_OBJECT_WLOCK(object); - m[0]->valid = VM_PAGE_BITS_ALL; + vm_page_valid(m[0]); VM_OBJECT_WUNLOCK(object); return (VM_PAGER_OK); } @@ -1136,7 +1141,7 @@ /* * Read filled up entire page. */ - mt->valid = VM_PAGE_BITS_ALL; + vm_page_valid(mt); KASSERT(mt->dirty == 0, ("%s: page %p is dirty", __func__, mt)); KASSERT(!pmap_page_is_mapped(mt),