Index: head/sys/kern/vfs_bio.c =================================================================== --- head/sys/kern/vfs_bio.c +++ head/sys/kern/vfs_bio.c @@ -3849,7 +3849,7 @@ struct buf *bp; struct bufobj *bo; daddr_t d_blkno; - int bsize, error, maxsize, vmio; + int bsize, error, maxsize, vmio, lockflags; off_t offset; CTR3(KTR_BUF, "getblk(%p, %ld, %d)", vp, (long)blkno, size); @@ -3864,11 +3864,33 @@ bo = &vp->v_bufobj; d_blkno = dblkno; + + /* Attempt lockless lookup first. */ + bp = gbincore_unlocked(bo, blkno); + if (bp == NULL) + goto newbuf_unlocked; + + lockflags = LK_EXCLUSIVE | LK_SLEEPFAIL | + ((flags & GB_LOCK_NOWAIT) ? LK_NOWAIT : 0); + + error = BUF_TIMELOCK(bp, lockflags, NULL, "getblku", slpflag, + slptimeo); + if (error == EINTR || error == ERESTART) + return (error); + else if (error != 0) + goto loop; + + /* Verify buf identify has not changed since lookup. */ + if (bp->b_bufobj == bo && bp->b_lblkno == blkno) + goto foundbuf_fastpath; + + /* It changed, fallback to locked lookup. */ + BUF_UNLOCK_RAW(bp); + loop: BO_RLOCK(bo); bp = gbincore(bo, blkno); if (bp != NULL) { - int lockflags; /* * Buffer is in-core. If the buffer is not busy nor managed, * it must be on a queue. @@ -3890,8 +3912,10 @@ /* We timed out or were interrupted. */ else if (error != 0) return (error); + +foundbuf_fastpath: /* If recursed, assume caller knows the rules. */ - else if (BUF_LOCKRECURSED(bp)) + if (BUF_LOCKRECURSED(bp)) goto end; /* @@ -3989,6 +4013,7 @@ * buffer is also considered valid (not marked B_INVAL). */ BO_RUNLOCK(bo); +newbuf_unlocked: /* * If the user does not want us to create the buffer, bail out * here. Index: head/sys/kern/vfs_subr.c =================================================================== --- head/sys/kern/vfs_subr.c +++ head/sys/kern/vfs_subr.c @@ -234,6 +234,7 @@ struct nfs_public nfs_pub; static uma_zone_t buf_trie_zone; +static smr_t buf_trie_smr; /* Zone for allocation of new vnodes - used exclusively by getnewvnode() */ static uma_zone_t vnode_zone; @@ -491,17 +492,16 @@ static void * buf_trie_alloc(struct pctrie *ptree) { - - return uma_zalloc(buf_trie_zone, M_NOWAIT); + return (uma_zalloc_smr(buf_trie_zone, M_NOWAIT)); } static void buf_trie_free(struct pctrie *ptree, void *node) { - - uma_zfree(buf_trie_zone, node); + uma_zfree_smr(buf_trie_zone, node); } -PCTRIE_DEFINE(BUF, buf, b_lblkno, buf_trie_alloc, buf_trie_free); +PCTRIE_DEFINE_SMR(BUF, buf, b_lblkno, buf_trie_alloc, buf_trie_free, + buf_trie_smr); /* * Initialize the vnode management data structures. @@ -675,7 +675,8 @@ */ buf_trie_zone = uma_zcreate("BUF TRIE", pctrie_node_size(), NULL, NULL, pctrie_zone_init, NULL, UMA_ALIGN_PTR, - UMA_ZONE_NOFREE); + UMA_ZONE_NOFREE | UMA_ZONE_SMR); + buf_trie_smr = uma_zone_get_smr(buf_trie_zone); uma_prealloc(buf_trie_zone, nbuf); vnodes_created = counter_u64_alloc(M_WAITOK); @@ -2330,7 +2331,25 @@ bp = BUF_PCTRIE_LOOKUP(&bo->bo_clean.bv_root, lblkno); if (bp != NULL) return (bp); - return BUF_PCTRIE_LOOKUP(&bo->bo_dirty.bv_root, lblkno); + return (BUF_PCTRIE_LOOKUP(&bo->bo_dirty.bv_root, lblkno)); +} + +/* + * Look up a buf using the buffer tries, without the bufobj lock. This relies + * on SMR for safe lookup, and bufs being in a no-free zone to provide type + * stability of the result. Like other lockless lookups, the found buf may + * already be invalid by the time this function returns. + */ +struct buf * +gbincore_unlocked(struct bufobj *bo, daddr_t lblkno) +{ + struct buf *bp; + + ASSERT_BO_UNLOCKED(bo); + bp = BUF_PCTRIE_LOOKUP_UNLOCKED(&bo->bo_clean.bv_root, lblkno); + if (bp != NULL) + return (bp); + return (BUF_PCTRIE_LOOKUP_UNLOCKED(&bo->bo_dirty.bv_root, lblkno)); } /* Index: head/sys/sys/buf.h =================================================================== --- head/sys/sys/buf.h +++ head/sys/sys/buf.h @@ -326,6 +326,9 @@ KASSERT(((bp)->b_flags & B_REMFREE) == 0, \ ("BUF_UNLOCK %p while B_REMFREE is still set.", (bp))); \ \ + BUF_UNLOCK_RAW((bp)); \ +} while (0) +#define BUF_UNLOCK_RAW(bp) do { \ (void)_lockmgr_args(&(bp)->b_lock, LK_RELEASE, NULL, \ LK_WMESG_DEFAULT, LK_PRIO_DEFAULT, LK_TIMO_DEFAULT, \ LOCK_FILE, LOCK_LINE); \ @@ -547,6 +550,7 @@ void vfs_busy_pages_release(struct buf *bp); struct buf *incore(struct bufobj *, daddr_t); struct buf *gbincore(struct bufobj *, daddr_t); +struct buf *gbincore_unlocked(struct bufobj *, daddr_t); struct buf *getblk(struct vnode *, daddr_t, int, int, int, int); int getblkx(struct vnode *vp, daddr_t blkno, daddr_t dblkno, int size, int slpflag, int slptimeo, int flags, struct buf **bpp);