diff --git a/sys/kern/vfs_vnops.c b/sys/kern/vfs_vnops.c --- a/sys/kern/vfs_vnops.c +++ b/sys/kern/vfs_vnops.c @@ -2429,6 +2429,10 @@ return (setfown(td, active_cred, vp, uid, gid)); } +/* + * Remove pages in the range ["start", "end") from the vnode's VM object. If + * "end" is 0, then the range extends to the end of the object. + */ void vn_pages_remove(struct vnode *vp, vm_pindex_t start, vm_pindex_t end) { @@ -2441,6 +2445,24 @@ VM_OBJECT_WUNLOCK(object); } +/* + * Like vn_pages_remove(), but skips invalid pages, which by definition are not + * mapped into any process' address space. Filesystems may use this in + * preference to vn_pages_remove() to avoid blocking on pages busied in + * preparation for a VOP_GETPAGES. + */ +void +vn_pages_remove_valid(struct vnode *vp, vm_pindex_t start, vm_pindex_t end) +{ + vm_object_t object; + + if ((object = vp->v_object) == NULL) + return; + VM_OBJECT_WLOCK(object); + vm_object_page_remove(object, start, end, OBJPR_VALIDONLY); + VM_OBJECT_WUNLOCK(object); +} + int vn_bmap_seekhole_locked(struct vnode *vp, u_long cmd, off_t *off, struct ucred *cred) diff --git a/sys/sys/vnode.h b/sys/sys/vnode.h --- a/sys/sys/vnode.h +++ b/sys/sys/vnode.h @@ -770,6 +770,8 @@ int vn_open_vnode(struct vnode *vp, int fmode, struct ucred *cred, struct thread *td, struct file *fp); void vn_pages_remove(struct vnode *vp, vm_pindex_t start, vm_pindex_t end); +void vn_pages_remove_valid(struct vnode *vp, vm_pindex_t start, + vm_pindex_t end); int vn_pollrecord(struct vnode *vp, struct thread *p, int events); int vn_rdwr(enum uio_rw rw, struct vnode *vp, void *base, int len, off_t offset, enum uio_seg segflg, int ioflg, diff --git a/sys/vm/vm_object.h b/sys/vm/vm_object.h --- a/sys/vm/vm_object.h +++ b/sys/vm/vm_object.h @@ -232,6 +232,7 @@ */ #define OBJPR_CLEANONLY 0x1 /* Don't remove dirty pages. */ #define OBJPR_NOTMAPPED 0x2 /* Don't unmap pages. */ +#define OBJPR_VALIDONLY 0x4 /* Ignore invalid pages. */ TAILQ_HEAD(object_q, vm_object); diff --git a/sys/vm/vm_object.c b/sys/vm/vm_object.c --- a/sys/vm/vm_object.c +++ b/sys/vm/vm_object.c @@ -2099,6 +2099,21 @@ for (; p != NULL && (p->pindex < end || end == 0); p = next) { next = TAILQ_NEXT(p, listq); + /* + * Skip invalid pages if asked to do so. Try to avoid acquiring + * the busy lock, as some consumers rely on this to avoid + * deadlocks. + * + * A thread may concurrently transition the page from invalid to + * valid using only the busy lock, so the result of this check + * is immediately stale. It is up to consumers to handle this, + * for instance by ensuring that all invalid->valid transitions + * happen with a mutex held, as may be possible for a + * filesystem. + */ + if ((options & OBJPR_VALIDONLY) != 0 && vm_page_none_valid(p)) + continue; + /* * If the page is wired for any reason besides the existence * of managed, wired mappings, then it cannot be freed. For @@ -2112,6 +2127,10 @@ VM_OBJECT_WLOCK(object); goto again; } + if ((options & OBJPR_VALIDONLY) != 0 && vm_page_none_valid(p)) { + vm_page_xunbusy(p); + continue; + } if (vm_page_wired(p)) { wired: if ((options & OBJPR_NOTMAPPED) == 0 &&