Index: sys/kern/vfs_bio.c =================================================================== --- sys/kern/vfs_bio.c +++ sys/kern/vfs_bio.c @@ -3864,6 +3864,35 @@ bo = &vp->v_bufobj; d_blkno = dblkno; + + /* Attempt lockless lookup first. */ + bp = gbincore_unlocked(bo, blkno); + if (bp == NULL) + goto newbuf_unlocked; + else { + int lockflags; + + lockflags = LK_EXCLUSIVE | LK_SLEEPFAIL | + ((flags & GB_LOCK_NOWAIT) ? LK_NOWAIT : 0); + + error = BUF_TIMELOCK(bp, lockflags, NULL, "getblku", slpflag, + slptimeo); + if (error == ENOLCK) + goto loop; + else if (error != 0) + return (error); + /* XXX: Should we retry with BO_RLOCK for LK_NOWAIT/EBUSY? */ + + /* Verify buf identify has not changed since lookup. */ + if (bp->b_bufobj != bo || bp->b_lblkno != blkno) { + /* It changed, fallback to locked lookup. */ + /* XXX: Do we need to handle bremfreef()? */ + BUF_UNLOCK(bp); + goto loop; + } + goto existingbuf_unlocked; + } + loop: BO_RLOCK(bo); bp = gbincore(bo, blkno); @@ -3890,8 +3919,10 @@ /* We timed out or were interrupted. */ else if (error != 0) return (error); + +existingbuf_unlocked: /* If recursed, assume caller knows the rules. */ - else if (BUF_LOCKRECURSED(bp)) + if (BUF_LOCKRECURSED(bp)) goto end; /* @@ -3989,6 +4020,7 @@ * buffer is also considered valid (not marked B_INVAL). */ BO_RUNLOCK(bo); +newbuf_unlocked: /* * If the user does not want us to create the buffer, bail out * here. Index: sys/kern/vfs_subr.c =================================================================== --- sys/kern/vfs_subr.c +++ sys/kern/vfs_subr.c @@ -234,6 +234,7 @@ struct nfs_public nfs_pub; static uma_zone_t buf_trie_zone; +static smr_t buf_trie_smr; /* Zone for allocation of new vnodes - used exclusively by getnewvnode() */ static uma_zone_t vnode_zone; @@ -491,17 +492,16 @@ static void * buf_trie_alloc(struct pctrie *ptree) { - - return uma_zalloc(buf_trie_zone, M_NOWAIT); + return uma_zalloc_smr(buf_trie_zone, M_NOWAIT); } static void buf_trie_free(struct pctrie *ptree, void *node) { - - uma_zfree(buf_trie_zone, node); + uma_zfree_smr(buf_trie_zone, node); } -PCTRIE_DEFINE(BUF, buf, b_lblkno, buf_trie_alloc, buf_trie_free); +PCTRIE_DEFINE_SMR(BUF, buf, b_lblkno, buf_trie_alloc, buf_trie_free, + buf_trie_smr); /* * Initialize the vnode management data structures. @@ -675,7 +675,8 @@ */ buf_trie_zone = uma_zcreate("BUF TRIE", pctrie_node_size(), NULL, NULL, pctrie_zone_init, NULL, UMA_ALIGN_PTR, - UMA_ZONE_NOFREE); + UMA_ZONE_NOFREE | UMA_ZONE_SMR); + buf_trie_smr = uma_zone_get_smr(buf_trie_zone); uma_prealloc(buf_trie_zone, nbuf); vnodes_created = counter_u64_alloc(M_WAITOK); @@ -2333,6 +2334,24 @@ return BUF_PCTRIE_LOOKUP(&bo->bo_dirty.bv_root, lblkno); } +/* + * Look up a buf using the buffer tries, without the bufobj lock. This relies + * on SMR for safe lookup, and bufs being in a no-free zone to provide type + * stability of the result. Like other lockless lookups, the found buf may + * already be invalid by the time this function returns. + */ +struct buf * +gbincore_unlocked(struct bufobj *bo, daddr_t lblkno) +{ + struct buf *bp; + + ASSERT_BO_UNLOCKED(bo); + bp = BUF_PCTRIE_LOOKUP_UNLOCKED(&bo->bo_clean.bv_root, lblkno); + if (bp != NULL) + return (bp); + return BUF_PCTRIE_LOOKUP_UNLOCKED(&bo->bo_dirty.bv_root, lblkno); +} + /* * Associate a buffer with a vnode. */ Index: sys/sys/buf.h =================================================================== --- sys/sys/buf.h +++ sys/sys/buf.h @@ -547,6 +547,7 @@ void vfs_busy_pages_release(struct buf *bp); struct buf *incore(struct bufobj *, daddr_t); struct buf *gbincore(struct bufobj *, daddr_t); +struct buf *gbincore_unlocked(struct bufobj *, daddr_t); struct buf *getblk(struct vnode *, daddr_t, int, int, int, int); int getblkx(struct vnode *vp, daddr_t blkno, daddr_t dblkno, int size, int slpflag, int slptimeo, int flags, struct buf **bpp);