Index: sys/fs/nfsclient/nfs_clbio.c =================================================================== --- sys/fs/nfsclient/nfs_clbio.c +++ sys/fs/nfsclient/nfs_clbio.c @@ -266,9 +266,7 @@ { struct uio uio; struct iovec iov; - vm_offset_t kva; - struct buf *bp; - int iomode, must_commit, i, error, npages, count; + int i, error, npages, count; off_t offset; int *rtvals; struct vnode *vp; @@ -322,44 +320,26 @@ } mtx_unlock(&np->n_mtx); - /* - * We use only the kva address for the buffer, but this is extremely - * convenient and fast. - */ - bp = getpbuf(&ncl_pbuf_freecnt); - - kva = (vm_offset_t) bp->b_data; - pmap_qenter(kva, pages, npages); PCPU_INC(cnt.v_vnodeout); PCPU_ADD(cnt.v_vnodepgsout, count); - iov.iov_base = (caddr_t) kva; + iov.iov_base = unmapped_buf; iov.iov_len = count; uio.uio_iov = &iov; uio.uio_iovcnt = 1; uio.uio_offset = offset; uio.uio_resid = count; - uio.uio_segflg = UIO_SYSSPACE; + uio.uio_segflg = UIO_NOCOPY; uio.uio_rw = UIO_WRITE; uio.uio_td = td; - if ((ap->a_sync & VM_PAGER_PUT_SYNC) == 0) - iomode = NFSWRITE_UNSTABLE; - else - iomode = NFSWRITE_FILESYNC; - - error = ncl_writerpc(vp, &uio, cred, &iomode, &must_commit, 0); + error = VOP_WRITE(vp, &uio, vnode_pager_putpages_ioflags(ap->a_sync), + cred); crfree(cred); - pmap_qremove(kva, npages); - relpbuf(bp, &ncl_pbuf_freecnt); - - if (error == 0 || !nfs_keep_dirty_on_error) { + if (error == 0 || !nfs_keep_dirty_on_error) vnode_pager_undirty_pages(pages, rtvals, count - uio.uio_resid); - if (must_commit) - ncl_clearcommit(vp->v_mount); - } - return rtvals[0]; + return (rtvals[0]); } /* @@ -394,8 +374,8 @@ */ old_lock = ncl_upgrade_vnlock(vp); if (vp->v_iflag & VI_DOOMED) { - ncl_downgrade_vnlock(vp, old_lock); - return (EBADF); + error = EBADF; + goto out; } mtx_lock(&np->n_mtx); @@ -406,7 +386,9 @@ panic("nfs: bioread, not dir"); ncl_invaldir(vp); error = ncl_vinvalbuf(vp, V_SAVE, td, 1); - if (error) + if (error == 0 && (vp->v_iflag & VI_DOOMED) != 0) + error = EBADF; + if (error != 0) goto out; } np->n_attrstamp = 0; @@ -429,7 +411,9 @@ if (vp->v_type == VDIR) ncl_invaldir(vp); error = ncl_vinvalbuf(vp, V_SAVE, td, 1); - if (error) + if (error == 0 && (vp->v_iflag & VI_DOOMED) != 0) + error = EBADF; + if (error != 0) goto out; mtx_lock(&np->n_mtx); np->n_mtime = vattr.va_mtime; @@ -439,7 +423,7 @@ } out: ncl_downgrade_vnlock(vp, old_lock); - return error; + return (error); } /* @@ -623,6 +607,9 @@ while (error == NFSERR_BAD_COOKIE) { ncl_invaldir(vp); error = ncl_vinvalbuf(vp, 0, td, 1); + if (error == 0 && (vp->v_iflag & VI_DOOMED) != 0) + return (EBADF); + /* * Yuck! The directory has been modified on the * server. The only way to get the block is by @@ -943,8 +930,11 @@ #endif np->n_attrstamp = 0; KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp); - error = ncl_vinvalbuf(vp, V_SAVE, td, 1); - if (error) + error = ncl_vinvalbuf(vp, V_SAVE | ((ioflag & + IO_VMIO) != 0 ? V_VMIO : 0), td, 1); + if (error == 0 && (vp->v_iflag & VI_DOOMED) != 0) + error = EBADF; + if (error != 0) return (error); } else mtx_unlock(&np->n_mtx); @@ -1023,8 +1013,12 @@ if (wouldcommit > nmp->nm_wcommitsize) { np->n_attrstamp = 0; KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp); - error = ncl_vinvalbuf(vp, V_SAVE, td, 1); - if (error) + error = ncl_vinvalbuf(vp, V_SAVE | ((ioflag & + IO_VMIO) != 0 ? V_VMIO : 0), td, 1); + if (error == 0 && + (vp->v_iflag & VI_DOOMED) != 0) + error = EBADF; + if (error != 0) return (error); wouldcommit = biosize; } @@ -1261,7 +1255,7 @@ error = error1; break; } - } else if ((n + on) == biosize) { + } else if ((n + on) == biosize || (ioflag & IO_ASYNC) != 0) { bp->b_flags |= B_ASYNC; (void) ncl_writebp(bp, 0, NULL); } else { @@ -1371,7 +1365,8 @@ /* * Now, flush as required. */ - if ((flags & V_SAVE) && (vp->v_bufobj.bo_object != NULL)) { + if ((flags & (V_SAVE | V_VMIO)) == V_SAVE && + vp->v_bufobj.bo_object != NULL) { VM_OBJECT_WLOCK(vp->v_bufobj.bo_object); vm_object_page_clean(vp->v_bufobj.bo_object, 0, 0, OBJPC_SYNC); VM_OBJECT_WUNLOCK(vp->v_bufobj.bo_object); Index: sys/fs/nfsclient/nfs_clvnops.c =================================================================== --- sys/fs/nfsclient/nfs_clvnops.c +++ sys/fs/nfsclient/nfs_clvnops.c @@ -520,6 +520,8 @@ if (np->n_flag & NMODIFIED) { mtx_unlock(&np->n_mtx); error = ncl_vinvalbuf(vp, V_SAVE, ap->a_td, 1); + if (error == 0 && (vp->v_iflag & VI_DOOMED) != 0) + return (EBADF); if (error == EINTR || error == EIO) { if (NFS_ISV4(vp)) (void) nfsrpc_close(vp, 0, ap->a_td); @@ -556,6 +558,8 @@ np->n_direofoffset = 0; mtx_unlock(&np->n_mtx); error = ncl_vinvalbuf(vp, V_SAVE, ap->a_td, 1); + if (error == 0 && (vp->v_iflag & VI_DOOMED) != 0) + return (EBADF); if (error == EINTR || error == EIO) { if (NFS_ISV4(vp)) (void) nfsrpc_close(vp, 0, ap->a_td); @@ -576,6 +580,8 @@ if (np->n_directio_opens == 0) { mtx_unlock(&np->n_mtx); error = ncl_vinvalbuf(vp, V_SAVE, ap->a_td, 1); + if (error == 0 && (vp->v_iflag & VI_DOOMED) != 0) + return (EBADF); if (error) { if (NFS_ISV4(vp)) (void) nfsrpc_close(vp, 0, ap->a_td); @@ -714,8 +720,11 @@ * np->n_flag &= ~NMODIFIED; */ } - } else - error = ncl_vinvalbuf(vp, V_SAVE, ap->a_td, 1); + } else { + error = ncl_vinvalbuf(vp, V_SAVE, ap->a_td, 1); + if (error == 0 && (vp->v_iflag & VI_DOOMED) != 0) + return (EBADF); + } mtx_lock(&np->n_mtx); } /* @@ -931,13 +940,13 @@ if (np->n_flag & NMODIFIED) { tsize = np->n_size; mtx_unlock(&np->n_mtx); - if (vap->va_size == 0) - error = ncl_vinvalbuf(vp, 0, td, 1); - else - error = ncl_vinvalbuf(vp, V_SAVE, td, 1); - if (error) { - vnode_pager_setsize(vp, tsize); - return (error); + error = ncl_vinvalbuf(vp, vap->va_size == 0 ? + 0 : V_SAVE, td, 1); + if (error == 0 && (vp->v_iflag & VI_DOOMED) != 0) + error = EBADF; + if (error != 0) { + vnode_pager_setsize(vp, tsize); + return (error); } /* * Call nfscl_delegmodtime() to set the modify time @@ -961,8 +970,10 @@ if ((vap->va_mtime.tv_sec != VNOVAL || vap->va_atime.tv_sec != VNOVAL) && (np->n_flag & NMODIFIED) && vp->v_type == VREG) { mtx_unlock(&np->n_mtx); - if ((error = ncl_vinvalbuf(vp, V_SAVE, td, 1)) != 0 && - (error == EINTR || error == EIO)) + error = ncl_vinvalbuf(vp, V_SAVE, td, 1); + if (error == 0 && (vp->v_iflag & VI_DOOMED) != 0) + return (EBADF); + if (error == EINTR || error == EIO) return (error); } else mtx_unlock(&np->n_mtx); @@ -1665,8 +1676,10 @@ * unnecessary delayed writes later. */ error = ncl_vinvalbuf(vp, 0, cnp->cn_thread, 1); - /* Do the rpc */ - if (error != EINTR && error != EIO) + if (error == 0 && (vp->v_iflag & VI_DOOMED) != 0) + error = EBADF; + else if (error != EINTR && error != EIO) + /* Do the rpc */ error = nfs_removerpc(dvp, vp, cnp->cn_nameptr, cnp->cn_namelen, cnp->cn_cred, cnp->cn_thread); /* @@ -3055,6 +3068,10 @@ if ((np->n_flag & NMODIFIED) || ret || np->n_change != va.va_filerev) { (void) ncl_vinvalbuf(vp, V_SAVE, td, 1); + if ((vp->v_iflag & VI_DOOMED) != 0) { + NFSVOPUNLOCK(vp, 0); + return (EBADF); + } np->n_attrstamp = 0; KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp); ret = VOP_GETATTR(vp, &va, cred); Index: sys/kern/vfs_subr.c =================================================================== --- sys/kern/vfs_subr.c +++ sys/kern/vfs_subr.c @@ -1673,13 +1673,15 @@ */ do { bufobj_wwait(bo, 0, 0); - BO_UNLOCK(bo); - if (bo->bo_object != NULL) { - VM_OBJECT_WLOCK(bo->bo_object); - vm_object_pip_wait(bo->bo_object, "bovlbx"); - VM_OBJECT_WUNLOCK(bo->bo_object); + if ((flags & V_VMIO) == 0) { + BO_UNLOCK(bo); + if (bo->bo_object != NULL) { + VM_OBJECT_WLOCK(bo->bo_object); + vm_object_pip_wait(bo->bo_object, "bovlbx"); + VM_OBJECT_WUNLOCK(bo->bo_object); + } + BO_LOCK(bo); } - BO_LOCK(bo); } while (bo->bo_numoutput > 0); BO_UNLOCK(bo); @@ -1687,7 +1689,7 @@ * Destroy the copy in the VM cache, too. */ if (bo->bo_object != NULL && - (flags & (V_ALT | V_NORMAL | V_CLEANONLY)) == 0) { + (flags & (V_ALT | V_NORMAL | V_CLEANONLY | V_VMIO)) == 0) { VM_OBJECT_WLOCK(bo->bo_object); vm_object_page_remove(bo->bo_object, 0, 0, (flags & V_SAVE) ? OBJPR_CLEANONLY : 0); @@ -1696,7 +1698,7 @@ #ifdef INVARIANTS BO_LOCK(bo); - if ((flags & (V_ALT | V_NORMAL | V_CLEANONLY)) == 0 && + if ((flags & (V_ALT | V_NORMAL | V_CLEANONLY | V_VMIO)) == 0 && (bo->bo_dirty.bv_cnt > 0 || bo->bo_clean.bv_cnt > 0)) panic("vinvalbuf: flush failed"); BO_UNLOCK(bo); Index: sys/sys/vnode.h =================================================================== --- sys/sys/vnode.h +++ sys/sys/vnode.h @@ -402,6 +402,7 @@ #define V_ALT 0x0002 /* vinvalbuf: invalidate only alternate bufs */ #define V_NORMAL 0x0004 /* vinvalbuf: invalidate only regular bufs */ #define V_CLEANONLY 0x0008 /* vinvalbuf: invalidate only clean bufs */ +#define V_VMIO 0x0010 /* vinvalbuf: called during pageout */ #define REVOKEALL 0x0001 /* vop_revoke: revoke all aliases */ #define V_WAIT 0x0001 /* vn_start_write: sleep for suspend */ #define V_NOWAIT 0x0002 /* vn_start_write: don't sleep for suspend */ Index: sys/vm/vnode_pager.h =================================================================== --- sys/vm/vnode_pager.h +++ sys/vm/vnode_pager.h @@ -44,11 +44,10 @@ int count, int *rbehind, int *rahead, vop_getpages_iodone_t iodone, void *arg); int vnode_pager_generic_putpages(struct vnode *vp, vm_page_t *m, - int count, boolean_t sync, - int *rtvals); + int count, int flags, int *rtvals); int vnode_pager_local_getpages(struct vop_getpages_args *ap); int vnode_pager_local_getpages_async(struct vop_getpages_async_args *ap); - +int vnode_pager_putpages_ioflags(int pager_flags); void vnode_pager_release_writecount(vm_object_t object, vm_offset_t start, vm_offset_t end); void vnode_pager_undirty_pages(vm_page_t *ma, int *rtvals, int written); Index: sys/vm/vnode_pager.c =================================================================== --- sys/vm/vnode_pager.c +++ sys/vm/vnode_pager.c @@ -1193,18 +1193,12 @@ vnode_pager_generic_putpages(struct vnode *vp, vm_page_t *ma, int bytecount, int flags, int *rtvals) { - int i; vm_object_t object; vm_page_t m; - int count; - - int maxsize, ncount; vm_ooffset_t poffset; struct uio auio; struct iovec aiov; - int error; - int ioflags; - int ppscheck = 0; + int count, error, i, maxsize, ncount, ppscheck; static struct timeval lastfail; static int curfail; @@ -1271,22 +1265,6 @@ } VM_OBJECT_WUNLOCK(object); - /* - * pageouts are already clustered, use IO_ASYNC to force a bawrite() - * rather then a bdwrite() to prevent paging I/O from saturating - * the buffer cache. Dummy-up the sequential heuristic to cause - * large ranges to cluster. If neither IO_SYNC or IO_ASYNC is set, - * the system decides how to cluster. - */ - ioflags = IO_VMIO; - if (flags & (VM_PAGER_PUT_SYNC | VM_PAGER_PUT_INVAL)) - ioflags |= IO_SYNC; - else if ((flags & VM_PAGER_CLUSTER_OK) == 0) - ioflags |= IO_ASYNC; - ioflags |= (flags & VM_PAGER_PUT_INVAL) ? IO_INVAL: 0; - ioflags |= (flags & VM_PAGER_PUT_NOREUSE) ? IO_NOREUSE : 0; - ioflags |= IO_SEQMAX << IO_SEQSHIFT; - aiov.iov_base = (caddr_t) 0; aiov.iov_len = maxsize; auio.uio_iov = &aiov; @@ -1296,10 +1274,12 @@ auio.uio_rw = UIO_WRITE; auio.uio_resid = maxsize; auio.uio_td = (struct thread *) 0; - error = VOP_WRITE(vp, &auio, ioflags, curthread->td_ucred); + error = VOP_WRITE(vp, &auio, vnode_pager_putpages_ioflags(flags), + curthread->td_ucred); PCPU_INC(cnt.v_vnodeout); PCPU_ADD(cnt.v_vnodepgsout, ncount); + ppscheck = 0; if (error) { if ((ppscheck = ppsratecheck(&lastfail, &curfail, 1))) printf("vnode_pager_putpages: I/O error %d\n", error); @@ -1315,6 +1295,30 @@ return rtvals[0]; } +int +vnode_pager_putpages_ioflags(int pager_flags) +{ + int ioflags; + + /* + * Pageouts are already clustered, use IO_ASYNC to force a + * bawrite() rather then a bdwrite() to prevent paging I/O + * from saturating the buffer cache. Dummy-up the sequential + * heuristic to cause large ranges to cluster. If neither + * IO_SYNC or IO_ASYNC is set, the system decides how to + * cluster. + */ + ioflags = IO_VMIO; + if ((pager_flags & (VM_PAGER_PUT_SYNC | VM_PAGER_PUT_INVAL)) != 0) + ioflags |= IO_SYNC; + else if ((pager_flags & VM_PAGER_CLUSTER_OK) == 0) + ioflags |= IO_ASYNC; + ioflags |= (pager_flags & VM_PAGER_PUT_INVAL) != 0 ? IO_INVAL: 0; + ioflags |= (pager_flags & VM_PAGER_PUT_NOREUSE) != 0 ? IO_NOREUSE : 0; + ioflags |= IO_SEQMAX << IO_SEQSHIFT; + return (ioflags); +} + void vnode_pager_undirty_pages(vm_page_t *ma, int *rtvals, int written) {