Index: sys/kern/vfs_bio.c =================================================================== --- sys/kern/vfs_bio.c +++ sys/kern/vfs_bio.c @@ -2135,30 +2135,39 @@ void (*ckhashfunc)(struct buf *), struct buf **bpp) { struct buf *bp; + struct thread *td; int readwait, rv; CTR3(KTR_BUF, "breadn(%p, %jd, %d)", vp, blkno, size); + td = curthread; /* - * Can only return NULL if GB_LOCK_NOWAIT flag is specified. + * Can only return NULL if GB_LOCK_NOWAIT or GB_SPARSE flags + * are specified. */ + if ((flags & GB_NOSPARSE) != 0) + td->td_errno = 0; *bpp = bp = getblk(vp, blkno, size, 0, 0, flags); - if (bp == NULL) + if (bp == NULL) { + if ((flags & GB_NOSPARSE) != 0 && td->td_errno == EJUSTRETURN) + return (EJUSTRETURN); return (EBUSY); + } + flags &= ~GB_NOSPARSE; /* * If not found in cache, do some I/O */ readwait = 0; if ((bp->b_flags & B_CACHE) == 0) { - if (!TD_IS_IDLETHREAD(curthread)) { + if (!TD_IS_IDLETHREAD(td)) { #ifdef RACCT if (racct_enable) { - PROC_LOCK(curproc); - racct_add_buf(curproc, bp, 0); - PROC_UNLOCK(curproc); + PROC_LOCK(td->td_proc); + racct_add_buf(td->td_proc, bp, 0); + PROC_UNLOCK(td->td_proc); } #endif /* RACCT */ - curthread->td_ru.ru_inblock++; + td->td_ru.ru_inblock++; } bp->b_iocmd = BIO_READ; bp->b_flags &= ~B_INVAL; @@ -3861,6 +3870,7 @@ { struct buf *bp; struct bufobj *bo; + daddr_t d_blkno; int bsize, error, maxsize, vmio; off_t offset; @@ -3875,6 +3885,7 @@ flags &= ~(GB_UNMAPPED | GB_KVAALLOC); bo = &vp->v_bufobj; + d_blkno = blkno; loop: BO_RLOCK(bo); bp = gbincore(bo, blkno); @@ -3886,7 +3897,7 @@ */ lockflags = LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK; - if (flags & GB_LOCK_NOWAIT) + if ((flags & GB_LOCK_NOWAIT) != 0) lockflags |= LK_NOWAIT; error = BUF_TIMELOCK(bp, lockflags, @@ -4005,10 +4016,10 @@ * here. */ if (flags & GB_NOCREAT) - return NULL; + return (NULL); if (bdomain[bo->bo_domain].bd_freebuffers == 0 && TD_IS_IDLETHREAD(curthread)) - return NULL; + return (NULL); bsize = vn_isdisk(vp, NULL) ? DEV_BSIZE : bo->bo_bsize; KASSERT(bsize != 0, ("bsize == 0, check bo->bo_bsize")); @@ -4022,11 +4033,24 @@ flags &= ~(GB_UNMAPPED | GB_KVAALLOC); } maxsize = imax(maxsize, bsize); + if ((flags & GB_NOSPARSE) != 0 && vmio && + !vn_isdisk(vp, NULL)) { + error = VOP_BMAP(vp, blkno, NULL, &d_blkno, 0, 0); + KASSERT(error != EOPNOTSUPP, + ("GB_NOSPARSE from fs not supporting bmap, vp %p", + vp)); + if (error != 0) + return (NULL); + if (d_blkno == -1) { + curthread->td_errno = EJUSTRETURN; + return (NULL); + } + } bp = getnewbuf(vp, slpflag, slptimeo, maxsize, flags); if (bp == NULL) { if (slpflag || slptimeo) - return NULL; + return (NULL); /* * XXX This is here until the sleep path is diagnosed * enough to work under very low memory conditions. @@ -4072,7 +4096,8 @@ * Insert the buffer into the hash, so that it can * be found by incore. */ - bp->b_blkno = bp->b_lblkno = blkno; + bp->b_lblkno = blkno; + bp->b_blkno = d_blkno; bp->b_offset = offset; bgetvp(vp, bp); BO_UNLOCK(bo); Index: sys/kern/vfs_cluster.c =================================================================== --- sys/kern/vfs_cluster.c +++ sys/kern/vfs_cluster.c @@ -94,12 +94,14 @@ { struct buf *bp, *rbp, *reqbp; struct bufobj *bo; + struct thread *td; daddr_t blkno, origblkno; int maxra, racluster; int error, ncontig; int i; error = 0; + td = curthread; bo = &vp->v_bufobj; if (!unmapped_buf_allowed) gbflags &= ~GB_UNMAPPED; @@ -118,9 +120,15 @@ /* * get the requested block */ + if ((gbflags & GB_NOSPARSE) != 0) + td->td_errno = 0; *bpp = reqbp = bp = getblk(vp, lblkno, size, 0, 0, gbflags); - if (bp == NULL) + if (bp == NULL) { + if ((gbflags & GB_NOSPARSE) != 0 && td->td_errno == EJUSTRETURN) + return (EJUSTRETURN); return (EBUSY); + } + gbflags &= ~GB_NOSPARSE; origblkno = lblkno; /* @@ -243,12 +251,12 @@ bstrategy(bp); #ifdef RACCT if (racct_enable) { - PROC_LOCK(curproc); - racct_add_buf(curproc, bp, 0); - PROC_UNLOCK(curproc); + PROC_LOCK(td->td_proc); + racct_add_buf(td->td_proc, bp, 0); + PROC_UNLOCK(td->td_proc); } #endif /* RACCT */ - curthread->td_ru.ru_inblock++; + td->td_ru.ru_inblock++; } /* @@ -303,12 +311,12 @@ bstrategy(rbp); #ifdef RACCT if (racct_enable) { - PROC_LOCK(curproc); - racct_add_buf(curproc, rbp, 0); - PROC_UNLOCK(curproc); + PROC_LOCK(td->td_proc); + racct_add_buf(td->td_proc, rbp, 0); + PROC_UNLOCK(td->td_proc); } #endif /* RACCT */ - curthread->td_ru.ru_inblock++; + td->td_ru.ru_inblock++; } if (reqbp) { Index: sys/sys/buf.h =================================================================== --- sys/sys/buf.h +++ sys/sys/buf.h @@ -479,6 +479,7 @@ #define GB_UNMAPPED 0x0008 /* Do not mmap buffer pages. */ #define GB_KVAALLOC 0x0010 /* But allocate KVA. */ #define GB_CKHASH 0x0020 /* If reading, calc checksum hash */ +#define GB_NOSPARSE 0x0040 /* Do not instantiate holes */ #ifdef _KERNEL extern int nbuf; /* The number of buffer headers */ Index: sys/ufs/ffs/ffs_vnops.c =================================================================== --- sys/ufs/ffs/ffs_vnops.c +++ sys/ufs/ffs/ffs_vnops.c @@ -462,6 +462,26 @@ #endif } +static int +ffs_read_hole(struct uio *uio, long xfersize, long *size) +{ + ssize_t saved_resid, tlen; + int error; + + while (xfersize > 0) { + tlen = min(xfersize, ZERO_REGION_SIZE); + saved_resid = uio->uio_resid; + error = vn_io_fault_uiomove(__DECONST(void *, zero_region), + tlen, uio); + if (error != 0) + return (error); + tlen = saved_resid - uio->uio_resid; + xfersize -= tlen; + *size -= tlen; + } + return (0); +} + /* * Vnode op for reading. */ @@ -566,7 +586,7 @@ * Don't do readahead if this is the end of the file. */ error = bread_gb(vp, lbn, size, NOCRED, - GB_UNMAPPED, &bp); + GB_UNMAPPED | GB_NOSPARSE, &bp); } else if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) { /* * Otherwise if we are allowed to cluster, @@ -577,7 +597,7 @@ */ error = cluster_read(vp, ip->i_size, lbn, size, NOCRED, blkoffset + uio->uio_resid, - seqcount, GB_UNMAPPED, &bp); + seqcount, GB_UNMAPPED | GB_NOSPARSE, &bp); } else if (seqcount > 1) { /* * If we are NOT allowed to cluster, then @@ -589,7 +609,8 @@ */ u_int nextsize = blksize(fs, ip, nextlbn); error = breadn_flags(vp, lbn, size, &nextlbn, - &nextsize, 1, NOCRED, GB_UNMAPPED, NULL, &bp); + &nextsize, 1, NOCRED, GB_UNMAPPED | GB_NOSPARSE, + NULL, &bp); } else { /* * Failing all of the above, just read what the @@ -597,9 +618,14 @@ * the first option above. */ error = bread_gb(vp, lbn, size, NOCRED, - GB_UNMAPPED, &bp); + GB_UNMAPPED | GB_NOSPARSE, &bp); } - if (error) { + if (error == EJUSTRETURN) { + error = ffs_read_hole(uio, xfersize, &size); + if (error == 0) + continue; + } + if (error != 0) { brelse(bp); bp = NULL; break;