Page MenuHomeFreeBSD

D45395.diff
No OneTemporary

D45395.diff

diff --git a/sys/kern/vfs_bio.c b/sys/kern/vfs_bio.c
--- a/sys/kern/vfs_bio.c
+++ b/sys/kern/vfs_bio.c
@@ -4231,35 +4231,29 @@
}
/*
- * This code is used to make sure that a buffer is not
- * created while the getnewbuf routine is blocked.
- * This can be a problem whether the vnode is locked or not.
- * If the buffer is created out from under us, we have to
- * throw away the one we just created.
*
- * Note: this must occur before we associate the buffer
- * with the vp especially considering limitations in
- * the splay tree implementation when dealing with duplicate
- * lblkno's.
- */
- BO_LOCK(bo);
- if (gbincore(bo, blkno)) {
- BO_UNLOCK(bo);
- bp->b_flags |= B_INVAL;
- bufspace_release(bufdomain(bp), maxsize);
- brelse(bp);
- goto loop;
- }
-
- /*
* Insert the buffer into the hash, so that it can
* be found by incore.
+ *
+ * We don't hold the bufobj interlock while allocating the new
+ * buffer. Consequently, we can race on buffer creation. This
+ * can be a problem whether the vnode is locked or not. If the
+ * buffer is created out from under us, we have to throw away
+ * the one we just created.
*/
bp->b_lblkno = blkno;
bp->b_blkno = d_blkno;
bp->b_offset = offset;
- bgetvp(vp, bp);
- BO_UNLOCK(bo);
+ error = bgetvp(vp, bp);
+ if (error != 0) {
+ KASSERT(error == EEXIST,
+ ("getblk: unexpected error %d from bgetvp",
+ error));
+ bp->b_flags |= B_INVAL;
+ bufspace_release(bufdomain(bp), maxsize);
+ brelse(bp);
+ goto loop;
+ }
/*
* set B_VMIO bit. allocbuf() the buffer bigger. Since the
diff --git a/sys/kern/vfs_subr.c b/sys/kern/vfs_subr.c
--- a/sys/kern/vfs_subr.c
+++ b/sys/kern/vfs_subr.c
@@ -2697,12 +2697,12 @@
}
/*
- * Add the buffer to the sorted clean or dirty block list.
- *
- * NOTE: xflags is passed as a constant, optimizing this inline function!
+ * Add the buffer to the sorted clean or dirty block list. Return zero on
+ * success, EEXIST if a buffer with this identity already exists, or another
+ * error on allocation failure.
*/
-static void
-buf_vlist_add(struct buf *bp, struct bufobj *bo, b_xflags_t xflags)
+static inline int
+buf_vlist_find_or_add(struct buf *bp, struct bufobj *bo, b_xflags_t xflags)
{
struct bufv *bv;
struct buf *n;
@@ -2713,30 +2713,69 @@
("buf_vlist_add: bo %p does not allow bufs", bo));
KASSERT((xflags & BX_VNDIRTY) == 0 || (bo->bo_flag & BO_DEAD) == 0,
("dead bo %p", bo));
- KASSERT((bp->b_xflags & (BX_VNDIRTY|BX_VNCLEAN)) == 0,
- ("buf_vlist_add: Buf %p has existing xflags %d", bp, bp->b_xflags));
- bp->b_xflags |= xflags;
+ KASSERT((bp->b_xflags & (BX_VNDIRTY | BX_VNCLEAN)) == xflags,
+ ("buf_vlist_add: b_xflags %#x not set on bp %p", xflags, bp));
+
if (xflags & BX_VNDIRTY)
bv = &bo->bo_dirty;
else
bv = &bo->bo_clean;
- /*
- * Keep the list ordered. Optimize empty list insertion. Assume
- * we tend to grow at the tail so lookup_le should usually be cheaper
- * than _ge.
- */
- if (bv->bv_cnt == 0 ||
- bp->b_lblkno > TAILQ_LAST(&bv->bv_hd, buflists)->b_lblkno)
- TAILQ_INSERT_TAIL(&bv->bv_hd, bp, b_bobufs);
- else if ((n = BUF_PCTRIE_LOOKUP_LE(&bv->bv_root, bp->b_lblkno)) == NULL)
+ error = BUF_PCTRIE_INSERT_LOOKUP_LE(&bv->bv_root, bp, &n);
+ if (n == NULL) {
+ KASSERT(error != EEXIST,
+ ("buf_vlist_add: EEXIST but no existing buf found: bp %p",
+ bp));
+ } else {
+ KASSERT((uint64_t)n->b_lblkno <= (uint64_t)bp->b_lblkno,
+ ("buf_vlist_add: out of order insert/lookup: bp %p n %p",
+ bp, n));
+ KASSERT((n->b_lblkno == bp->b_lblkno) == (error == EEXIST),
+ ("buf_vlist_add: inconsistent result for existing buf: "
+ "error %d bp %p n %p", error, bp, n));
+ }
+ if (error != 0)
+ return (error);
+
+ /* Keep the list ordered. */
+ if (n == NULL) {
+ KASSERT(TAILQ_EMPTY(&bv->bv_hd) ||
+ (uint64_t)bp->b_lblkno <
+ (uint64_t)TAILQ_FIRST(&bv->bv_hd)->b_lblkno,
+ ("buf_vlist_add: queue order: "
+ "%p should be before first %p",
+ bp, TAILQ_FIRST(&bv->bv_hd)));
TAILQ_INSERT_HEAD(&bv->bv_hd, bp, b_bobufs);
- else
+ } else {
+ KASSERT(TAILQ_NEXT(n, b_bobufs) == NULL ||
+ (uint64_t)bp->b_lblkno <
+ (uint64_t)TAILQ_NEXT(n, b_bobufs)->b_lblkno,
+ ("buf_vlist_add: queue order: "
+ "%p should be before next %p",
+ bp, TAILQ_NEXT(n, b_bobufs)));
TAILQ_INSERT_AFTER(&bv->bv_hd, n, bp, b_bobufs);
- error = BUF_PCTRIE_INSERT(&bv->bv_root, bp);
- if (error)
- panic("buf_vlist_add: Preallocated nodes insufficient.");
+ }
+
bv->bv_cnt++;
+ return (0);
+}
+
+/*
+ * Add the buffer to the sorted clean or dirty block list.
+ *
+ * NOTE: xflags is passed as a constant, optimizing this inline function!
+ */
+static void
+buf_vlist_add(struct buf *bp, struct bufobj *bo, b_xflags_t xflags)
+{
+ int error;
+
+ KASSERT((bp->b_xflags & (BX_VNDIRTY | BX_VNCLEAN)) == 0,
+ ("buf_vlist_add: Buf %p has existing xflags %d", bp, bp->b_xflags));
+ bp->b_xflags |= xflags;
+ error = buf_vlist_find_or_add(bp, bo, xflags);
+ if (error)
+ panic("buf_vlist_add: error=%d", error);
}
/*
@@ -2775,26 +2814,42 @@
/*
* Associate a buffer with a vnode.
*/
-void
+int
bgetvp(struct vnode *vp, struct buf *bp)
{
struct bufobj *bo;
+ int error;
bo = &vp->v_bufobj;
- ASSERT_BO_WLOCKED(bo);
+ ASSERT_BO_UNLOCKED(bo);
VNASSERT(bp->b_vp == NULL, bp->b_vp, ("bgetvp: not free"));
CTR3(KTR_BUF, "bgetvp(%p) vp %p flags %X", bp, vp, bp->b_flags);
VNASSERT((bp->b_xflags & (BX_VNDIRTY|BX_VNCLEAN)) == 0, vp,
("bgetvp: bp already attached! %p", bp));
- vhold(vp);
- bp->b_vp = vp;
- bp->b_bufobj = bo;
/*
- * Insert onto list for new vnode.
+ * Add the buf to the vnode's clean list unless we lost a race and find
+ * an existing buf in either dirty or clean.
*/
- buf_vlist_add(bp, bo, BX_VNCLEAN);
+ bp->b_vp = vp;
+ bp->b_bufobj = bo;
+ bp->b_xflags |= BX_VNCLEAN;
+ error = EEXIST;
+ BO_LOCK(bo);
+ if (BUF_PCTRIE_LOOKUP(&bo->bo_dirty.bv_root, bp->b_lblkno) == NULL)
+ error = buf_vlist_find_or_add(bp, bo, BX_VNCLEAN);
+ BO_UNLOCK(bo);
+ if (__predict_true(error == 0)) {
+ vhold(vp);
+ return (0);
+ }
+ if (error != EEXIST)
+ panic("bgetvp: buf_vlist_add error: %d", error);
+ bp->b_vp = NULL;
+ bp->b_bufobj = NULL;
+ bp->b_xflags &= ~BX_VNCLEAN;
+ return (error);
}
/*
diff --git a/sys/sys/buf.h b/sys/sys/buf.h
--- a/sys/sys/buf.h
+++ b/sys/sys/buf.h
@@ -603,7 +603,7 @@
int vmapbuf(struct buf *, void *, size_t, int);
void vunmapbuf(struct buf *);
void brelvp(struct buf *);
-void bgetvp(struct vnode *, struct buf *);
+int bgetvp(struct vnode *, struct buf *) __result_use_check;
void pbgetbo(struct bufobj *bo, struct buf *bp);
void pbgetvp(struct vnode *, struct buf *);
void pbrelbo(struct buf *);

File Metadata

Mime Type
text/plain
Expires
Sun, Dec 29, 5:38 AM (8 h, 43 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
15628022
Default Alt Text
D45395.diff (6 KB)

Event Timeline