Index: sys/kern/vfs_subr.c =================================================================== --- sys/kern/vfs_subr.c +++ sys/kern/vfs_subr.c @@ -118,6 +118,8 @@ static void vfs_knl_assert_unlocked(void *arg); static void vnlru_return_batches(struct vfsops *mnt_op); static void destroy_vpollinfo(struct vpollinfo *vi); +static int v_inval_buf_range1(struct vnode *vp, struct bufobj *bo, + daddr_t startlbn, daddr_t endlbn); /* * These fences are intended for cases where some synchronization is @@ -1954,9 +1956,8 @@ vtruncbuf(struct vnode *vp, off_t length, int blksize) { struct buf *bp, *nbp; - int anyfreed; - daddr_t trunclbn; struct bufobj *bo; + daddr_t startlbn; CTR4(KTR_VFS, "%s: vp %p with block %d:%ju", __func__, vp, blksize, (uintmax_t)length); @@ -1964,91 +1965,158 @@ /* * Round up to the *next* lbn. */ - trunclbn = howmany(length, blksize); + startlbn = howmany(length, blksize); ASSERT_VOP_LOCKED(vp, "vtruncbuf"); -restart: + bo = &vp->v_bufobj; +restart_unlocked: BO_LOCK(bo); - anyfreed = 1; - for (;anyfreed;) { - anyfreed = 0; - TAILQ_FOREACH_SAFE(bp, &bo->bo_clean.bv_hd, b_bobufs, nbp) { - if (bp->b_lblkno < trunclbn) + + while (v_inval_buf_range1(vp, bo, startlbn, INT64_MAX) == EAGAIN) ; + + if (length > 0) { +restartsync: + TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) { + if (bp->b_lblkno > 0) continue; + /* + * Since we hold the vnode lock this should only + * fail if we're racing with the buf daemon. + */ if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK, BO_LOCKPTR(bo)) == ENOLCK) - goto restart; + goto restart_unlocked; - bremfree(bp); - bp->b_flags |= (B_INVAL | B_RELBUF); - bp->b_flags &= ~B_ASYNC; - brelse(bp); - anyfreed = 1; + VNASSERT((bp->b_flags & B_DELWRI), vp, + ("buf(%p) on dirty queue without DELWRI", bp)); + bremfree(bp); + bawrite(bp); BO_LOCK(bo); - if (nbp != NULL && - (((nbp->b_xflags & BX_VNCLEAN) == 0) || - (nbp->b_vp != vp) || - (nbp->b_flags & B_DELWRI))) { - BO_UNLOCK(bo); - goto restart; - } + goto restartsync; } + } + bufobj_wwait(bo, 0, 0); + BO_UNLOCK(bo); + vnode_pager_setsize(vp, length); + + return (0); +} + +/* + * Invalidate the cached pages of a file's buffer within the range of file + * offsets [start, end). Every buffer that overlaps that range will be + * invalidated. This must not result in any dirty data being lost. + */ +void +v_inval_buf_range(struct vnode *vp, off_t start, off_t end, int blksize) +{ + struct bufobj *bo; + daddr_t startlbn, endlbn; + vm_pindex_t startp, endp; + + /* Round "outwards" */ + startlbn = start / blksize; + endlbn = howmany(end, blksize); + startp = OFF_TO_IDX(start); + endp = OFF_TO_IDX(end + PAGE_SIZE - 1); + + ASSERT_VOP_LOCKED(vp, "v_inval_buf_range"); + bo = &vp->v_bufobj; + BO_LOCK(bo); + MPASS(blksize == bo->bo_bsize); + + do { +#ifdef INVARIANTS + struct buf *bp, *nbp; + TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) { - if (bp->b_lblkno < trunclbn) + /* + * Disallow invalidating dirty data outside of the + * requested offsets. Assume that data within the + * requested offsets is being invalidated for a good + * reason. + */ + off_t blkstart, blkend; + + blkstart = bp->b_offset; + blkend = bp->b_offset + bp->b_bcount; + KASSERT( + blkstart >= start && blkend <= end || + blkstart >= end || + blkend <= start, + ("Invalidating extra dirty data!")); + } +#endif + } while (v_inval_buf_range1(vp, bo, startlbn, endlbn) == EAGAIN); + + BO_UNLOCK(bo); + vn_pages_remove(vp, startp, endp); +} + +/* Like v_inval_buf_range, but operates on whole buffers instead of offsets */ +static int +v_inval_buf_range1(struct vnode *vp, struct bufobj *bo, + daddr_t startlbn, daddr_t endlbn) +{ + struct buf *bp, *nbp; + bool anyfreed; + + ASSERT_VOP_LOCKED(vp, "v_inval_buf_range1"); + ASSERT_BO_LOCKED(bo); + + do { + anyfreed = false; + TAILQ_FOREACH_SAFE(bp, &bo->bo_clean.bv_hd, b_bobufs, nbp) { + if (bp->b_lblkno < startlbn || bp->b_lblkno >= endlbn) continue; if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK, - BO_LOCKPTR(bo)) == ENOLCK) - goto restart; + BO_LOCKPTR(bo)) == ENOLCK) { + BO_LOCK(bo); + return (EAGAIN); + } + bremfree(bp); - bp->b_flags |= (B_INVAL | B_RELBUF); + bp->b_flags |= B_INVAL | B_RELBUF; bp->b_flags &= ~B_ASYNC; brelse(bp); - anyfreed = 1; + anyfreed = true; BO_LOCK(bo); if (nbp != NULL && - (((nbp->b_xflags & BX_VNDIRTY) == 0) || - (nbp->b_vp != vp) || - (nbp->b_flags & B_DELWRI) == 0)) { - BO_UNLOCK(bo); - goto restart; - } + (((nbp->b_xflags & BX_VNCLEAN) == 0) || + nbp->b_vp != vp || + (nbp->b_flags & B_DELWRI) != 0)) + return (EAGAIN); } - } - if (length > 0) { -restartsync: TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) { - if (bp->b_lblkno > 0) + if (bp->b_lblkno < startlbn || bp->b_lblkno >= endlbn) continue; - /* - * Since we hold the vnode lock this should only - * fail if we're racing with the buf daemon. - */ if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK, BO_LOCKPTR(bo)) == ENOLCK) { - goto restart; + BO_LOCK(bo); + return (EAGAIN); } - VNASSERT((bp->b_flags & B_DELWRI), vp, - ("buf(%p) on dirty queue without DELWRI", bp)); - bremfree(bp); - bawrite(bp); + bp->b_flags |= B_INVAL | B_RELBUF; + bp->b_flags &= ~B_ASYNC; + brelse(bp); + anyfreed = true; + BO_LOCK(bo); - goto restartsync; + if (nbp != NULL && + (((nbp->b_xflags & BX_VNDIRTY) == 0) || + (nbp->b_vp != vp) || + (nbp->b_flags & B_DELWRI) == 0)) + return (EAGAIN); } - } - - bufobj_wwait(bo, 0, 0); - BO_UNLOCK(bo); - vnode_pager_setsize(vp, length); - + } while (anyfreed); return (0); } Index: sys/sys/vnode.h =================================================================== --- sys/sys/vnode.h +++ sys/sys/vnode.h @@ -659,6 +659,8 @@ void vinactive(struct vnode *, struct thread *); int vinvalbuf(struct vnode *vp, int save, int slpflag, int slptimeo); int vtruncbuf(struct vnode *vp, off_t length, int blksize); +void v_inval_buf_range(struct vnode *vp, off_t start, off_t end, + int blksize); void vunref(struct vnode *); void vn_printf(struct vnode *vp, const char *fmt, ...) __printflike(2,3); int vrecycle(struct vnode *vp);