Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F152031813
D3726.id.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
15 KB
Referenced Files
None
Subscribers
None
D3726.id.diff
View Options
Index: head/sys/kern/vfs_bio.c
===================================================================
--- head/sys/kern/vfs_bio.c
+++ head/sys/kern/vfs_bio.c
@@ -1785,6 +1785,8 @@
bp, bp->b_vp, bp->b_flags);
KASSERT(!(bp->b_flags & (B_CLUSTER|B_PAGING)),
("brelse: inappropriate B_PAGING or B_CLUSTER bp %p", bp));
+ KASSERT((bp->b_flags & B_VMIO) != 0 || (bp->b_flags & B_NOREUSE) == 0,
+ ("brelse: non-VMIO buffer marked NOREUSE"));
if (BUF_LOCKRECURSED(bp)) {
/*
@@ -1873,8 +1875,10 @@
allocbuf(bp, 0);
}
- if ((bp->b_flags & (B_INVAL | B_RELBUF)) != 0) {
+ if ((bp->b_flags & (B_INVAL | B_RELBUF)) != 0 ||
+ (bp->b_flags & (B_DELWRI | B_NOREUSE)) == B_NOREUSE) {
allocbuf(bp, 0);
+ bp->b_flags &= ~B_NOREUSE;
if (bp->b_vp != NULL)
brelvp(bp);
}
@@ -1969,6 +1973,10 @@
if ((bp->b_flags & B_DELWRI) == 0 &&
(bp->b_xflags & BX_VNDIRTY))
panic("bqrelse: not dirty");
+ if ((bp->b_flags & B_NOREUSE) != 0) {
+ brelse(bp);
+ return;
+ }
qindex = QUEUE_CLEAN;
}
binsfree(bp, qindex);
@@ -2079,10 +2087,15 @@
freed = false;
if (!freed) {
/*
- * In order to maintain LRU page ordering, put
- * the page at the tail of the inactive queue.
+ * If the page is unlikely to be reused, let the
+ * VM know. Otherwise, maintain LRU page
+ * ordering and put the page at the tail of the
+ * inactive queue.
*/
- vm_page_deactivate(m);
+ if ((bp->b_flags & B_NOREUSE) != 0)
+ vm_page_deactivate_noreuse(m);
+ else
+ vm_page_deactivate(m);
}
}
vm_page_unlock(m);
@@ -2456,8 +2469,9 @@
* Note: we no longer distinguish between VMIO and non-VMIO
* buffers.
*/
- KASSERT((bp->b_flags & B_DELWRI) == 0,
- ("delwri buffer %p found in queue %d", bp, qindex));
+ KASSERT((bp->b_flags & (B_DELWRI | B_NOREUSE)) == 0,
+ ("invalid buffer %p flags %#x found in queue %d", bp, bp->b_flags,
+ qindex));
/*
* When recycling a clean buffer we have to truncate it and
Index: head/sys/kern/vfs_default.c
===================================================================
--- head/sys/kern/vfs_default.c
+++ head/sys/kern/vfs_default.c
@@ -1034,9 +1034,12 @@
int
vop_stdadvise(struct vop_advise_args *ap)
{
+ struct buf *bp;
+ struct buflists *bl;
struct vnode *vp;
+ daddr_t bn, startn, endn;
off_t start, end;
- int error;
+ int bsize, error;
vp = ap->a_vp;
switch (ap->a_advice) {
@@ -1049,28 +1052,59 @@
error = 0;
break;
case POSIX_FADV_DONTNEED:
- /*
- * Flush any open FS buffers and then remove pages
- * from the backing VM object. Using vinvalbuf() here
- * is a bit heavy-handed as it flushes all buffers for
- * the given vnode, not just the buffers covering the
- * requested range.
- */
error = 0;
vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
if (vp->v_iflag & VI_DOOMED) {
VOP_UNLOCK(vp, 0);
break;
}
- vinvalbuf(vp, V_CLEANONLY, 0, 0);
+
+ /*
+ * Deactivate pages in the specified range from the backing VM
+ * object. Pages that are resident in the buffer cache will
+ * remain wired until their corresponding buffers are released
+ * below.
+ */
if (vp->v_object != NULL) {
start = trunc_page(ap->a_start);
end = round_page(ap->a_end);
VM_OBJECT_WLOCK(vp->v_object);
- vm_object_page_cache(vp->v_object, OFF_TO_IDX(start),
+ vm_object_page_noreuse(vp->v_object, OFF_TO_IDX(start),
OFF_TO_IDX(end));
VM_OBJECT_WUNLOCK(vp->v_object);
}
+
+ BO_RLOCK(&vp->v_bufobj);
+ bsize = vp->v_bufobj.bo_bsize;
+ startn = ap->a_start / bsize;
+ if (ap->a_end == OFF_MAX) {
+ endn = -1;
+ bl = &vp->v_bufobj.bo_clean.bv_hd;
+ if (!TAILQ_EMPTY(bl))
+ endn = TAILQ_LAST(bl, buflists)->b_lblkno;
+ bl = &vp->v_bufobj.bo_dirty.bv_hd;
+ if (!TAILQ_EMPTY(bl) &&
+ endn < TAILQ_LAST(bl, buflists)->b_lblkno)
+ endn = TAILQ_LAST(bl, buflists)->b_lblkno;
+ } else
+ endn = ap->a_end / bsize;
+ BO_RUNLOCK(&vp->v_bufobj);
+ /*
+ * In the VMIO case, use the B_NOREUSE flag to hint that the
+ * pages backing each buffer in the range are unlikely to be
+ * reused. Dirty buffers will have the hint applied once
+ * they've been written.
+ */
+ for (bn = startn; bn <= endn; bn++) {
+ bp = getblk(vp, bn, bsize, 0, 0, GB_NOCREAT |
+ GB_UNMAPPED);
+ if (bp == NULL)
+ continue;
+ bp->b_flags |= B_RELBUF;
+ if (vp->v_object != NULL)
+ bp->b_flags |= B_NOREUSE;
+ brelse(bp);
+ }
VOP_UNLOCK(vp, 0);
break;
default:
Index: head/sys/kern/vfs_syscalls.c
===================================================================
--- head/sys/kern/vfs_syscalls.c
+++ head/sys/kern/vfs_syscalls.c
@@ -4610,8 +4610,6 @@
new->fa_advice = advice;
new->fa_start = offset;
new->fa_end = end;
- new->fa_prevstart = 0;
- new->fa_prevend = 0;
fp->f_advice = new;
new = fa;
}
Index: head/sys/kern/vfs_vnops.c
===================================================================
--- head/sys/kern/vfs_vnops.c
+++ head/sys/kern/vfs_vnops.c
@@ -770,10 +770,9 @@
struct thread *td;
{
struct vnode *vp;
- struct mtx *mtxp;
+ off_t orig_offset;
int error, ioflag;
int advice;
- off_t offset, start, end;
KASSERT(uio->uio_td == td, ("uio_td %p is not td %p",
uio->uio_td, td));
@@ -797,7 +796,7 @@
/* Disable read-ahead for random I/O. */
break;
}
- offset = uio->uio_offset;
+ orig_offset = uio->uio_offset;
#ifdef MAC
error = mac_vnode_check_read(active_cred, fp->f_cred, vp);
@@ -807,39 +806,14 @@
fp->f_nextoff = uio->uio_offset;
VOP_UNLOCK(vp, 0);
if (error == 0 && advice == POSIX_FADV_NOREUSE &&
- offset != uio->uio_offset) {
+ orig_offset != uio->uio_offset)
/*
- * Use POSIX_FADV_DONTNEED to flush clean pages and
- * buffers for the backing file after a
- * POSIX_FADV_NOREUSE read(2). To optimize the common
- * case of using POSIX_FADV_NOREUSE with sequential
- * access, track the previous implicit DONTNEED
- * request and grow this request to include the
- * current read(2) in addition to the previous
- * DONTNEED. With purely sequential access this will
- * cause the DONTNEED requests to continously grow to
- * cover all of the previously read regions of the
- * file. This allows filesystem blocks that are
- * accessed by multiple calls to read(2) to be flushed
- * once the last read(2) finishes.
+ * Use POSIX_FADV_DONTNEED to flush pages and buffers
+ * for the backing file after a POSIX_FADV_NOREUSE
+ * read(2).
*/
- start = offset;
- end = uio->uio_offset - 1;
- mtxp = mtx_pool_find(mtxpool_sleep, fp);
- mtx_lock(mtxp);
- if (fp->f_advice != NULL &&
- fp->f_advice->fa_advice == POSIX_FADV_NOREUSE) {
- if (start != 0 && fp->f_advice->fa_prevend + 1 == start)
- start = fp->f_advice->fa_prevstart;
- else if (fp->f_advice->fa_prevstart != 0 &&
- fp->f_advice->fa_prevstart == end + 1)
- end = fp->f_advice->fa_prevend;
- fp->f_advice->fa_prevstart = start;
- fp->f_advice->fa_prevend = end;
- }
- mtx_unlock(mtxp);
- error = VOP_ADVISE(vp, start, end, POSIX_FADV_DONTNEED);
- }
+ error = VOP_ADVISE(vp, orig_offset, uio->uio_offset - 1,
+ POSIX_FADV_DONTNEED);
return (error);
}
@@ -856,10 +830,9 @@
{
struct vnode *vp;
struct mount *mp;
- struct mtx *mtxp;
+ off_t orig_offset;
int error, ioflag, lock_flags;
int advice;
- off_t offset, start, end;
KASSERT(uio->uio_td == td, ("uio_td %p is not td %p",
uio->uio_td, td));
@@ -902,7 +875,7 @@
/* XXX: Is this correct? */
break;
}
- offset = uio->uio_offset;
+ orig_offset = uio->uio_offset;
#ifdef MAC
error = mac_vnode_check_write(active_cred, fp->f_cred, vp);
@@ -914,55 +887,14 @@
if (vp->v_type != VCHR)
vn_finished_write(mp);
if (error == 0 && advice == POSIX_FADV_NOREUSE &&
- offset != uio->uio_offset) {
+ orig_offset != uio->uio_offset)
/*
- * Use POSIX_FADV_DONTNEED to flush clean pages and
- * buffers for the backing file after a
- * POSIX_FADV_NOREUSE write(2). To optimize the
- * common case of using POSIX_FADV_NOREUSE with
- * sequential access, track the previous implicit
- * DONTNEED request and grow this request to include
- * the current write(2) in addition to the previous
- * DONTNEED. With purely sequential access this will
- * cause the DONTNEED requests to continously grow to
- * cover all of the previously written regions of the
- * file.
- *
- * Note that the blocks just written are almost
- * certainly still dirty, so this only works when
- * VOP_ADVISE() calls from subsequent writes push out
- * the data written by this write(2) once the backing
- * buffers are clean. However, as compared to forcing
- * IO_DIRECT, this gives much saner behavior. Write
- * clustering is still allowed, and clean pages are
- * merely moved to the cache page queue rather than
- * outright thrown away. This means a subsequent
- * read(2) can still avoid hitting the disk if the
- * pages have not been reclaimed.
- *
- * This does make POSIX_FADV_NOREUSE largely useless
- * with non-sequential access. However, sequential
- * access is the more common use case and the flag is
- * merely advisory.
+ * Use POSIX_FADV_DONTNEED to flush pages and buffers
+ * for the backing file after a POSIX_FADV_NOREUSE
+ * write(2).
*/
- start = offset;
- end = uio->uio_offset - 1;
- mtxp = mtx_pool_find(mtxpool_sleep, fp);
- mtx_lock(mtxp);
- if (fp->f_advice != NULL &&
- fp->f_advice->fa_advice == POSIX_FADV_NOREUSE) {
- if (start != 0 && fp->f_advice->fa_prevend + 1 == start)
- start = fp->f_advice->fa_prevstart;
- else if (fp->f_advice->fa_prevstart != 0 &&
- fp->f_advice->fa_prevstart == end + 1)
- end = fp->f_advice->fa_prevend;
- fp->f_advice->fa_prevstart = start;
- fp->f_advice->fa_prevend = end;
- }
- mtx_unlock(mtxp);
- error = VOP_ADVISE(vp, start, end, POSIX_FADV_DONTNEED);
- }
-
+ error = VOP_ADVISE(vp, orig_offset, uio->uio_offset - 1,
+ POSIX_FADV_DONTNEED);
unlock:
return (error);
}
Index: head/sys/sys/buf.h
===================================================================
--- head/sys/sys/buf.h
+++ head/sys/sys/buf.h
@@ -204,7 +204,7 @@
#define B_PERSISTENT 0x00000100 /* Perm. ref'ed while EXT2FS mounted. */
#define B_DONE 0x00000200 /* I/O completed. */
#define B_EINTR 0x00000400 /* I/O was interrupted */
-#define B_00000800 0x00000800 /* Available flag. */
+#define B_NOREUSE 0x00000800 /* Contents not reused once released. */
#define B_00001000 0x00001000 /* Available flag. */
#define B_INVAL 0x00002000 /* Does not contain valid info. */
#define B_BARRIER 0x00004000 /* Write this and all preceeding first. */
@@ -229,7 +229,7 @@
#define PRINT_BUF_FLAGS "\20\40remfree\37cluster\36vmio\35ram\34managed" \
"\33paging\32infreecnt\31nocopy\30b23\27relbuf\26dirty\25b20" \
"\24b19\23b18\22clusterok\21malloc\20nocache\17b14\16inval" \
- "\15b12\14b11\13eintr\12done\11persist\10delwri" \
+ "\15b12\14noreuse\13eintr\12done\11persist\10delwri" \
"\7validsuspwrt\6cache\5deferred\4direct\3async\2needcommit\1age"
/*
Index: head/sys/sys/file.h
===================================================================
--- head/sys/sys/file.h
+++ head/sys/sys/file.h
@@ -160,8 +160,6 @@
int fa_advice; /* (f) FADV_* type. */
off_t fa_start; /* (f) Region start. */
off_t fa_end; /* (f) Region end. */
- off_t fa_prevstart; /* (f) Previous NOREUSE start. */
- off_t fa_prevend; /* (f) Previous NOREUSE end. */
};
struct file {
Index: head/sys/vm/vm_object.h
===================================================================
--- head/sys/vm/vm_object.h
+++ head/sys/vm/vm_object.h
@@ -304,10 +304,10 @@
void vm_object_set_writeable_dirty (vm_object_t);
void vm_object_init (void);
void vm_object_madvise(vm_object_t, vm_pindex_t, vm_pindex_t, int);
-void vm_object_page_cache(vm_object_t object, vm_pindex_t start,
- vm_pindex_t end);
boolean_t vm_object_page_clean(vm_object_t object, vm_ooffset_t start,
vm_ooffset_t end, int flags);
+void vm_object_page_noreuse(vm_object_t object, vm_pindex_t start,
+ vm_pindex_t end);
void vm_object_page_remove(vm_object_t object, vm_pindex_t start,
vm_pindex_t end, int options);
boolean_t vm_object_populate(vm_object_t, vm_pindex_t, vm_pindex_t);
Index: head/sys/vm/vm_object.c
===================================================================
--- head/sys/vm/vm_object.c
+++ head/sys/vm/vm_object.c
@@ -1963,15 +1963,15 @@
}
/*
- * vm_object_page_cache:
+ * vm_object_page_noreuse:
*
- * For the given object, attempt to move the specified clean
- * pages to the cache queue. If a page is wired for any reason,
- * then it will not be changed. Pages are specified by the given
- * range ["start", "end"). As a special case, if "end" is zero,
- * then the range extends from "start" to the end of the object.
- * Any mappings to the specified pages are removed before the
- * pages are moved to the cache queue.
+ * For the given object, attempt to move the specified pages to
+ * the head of the inactive queue. This bypasses regular LRU
+ * operation and allows the pages to be reused quickly under memory
+ * pressure. If a page is wired for any reason, then it will not
+ * be queued. Pages are specified by the range ["start", "end").
+ * As a special case, if "end" is zero, then the range extends from
+ * "start" to the end of the object.
*
* This operation should only be performed on objects that
* contain non-fictitious, managed pages.
@@ -1979,14 +1979,14 @@
* The object must be locked.
*/
void
-vm_object_page_cache(vm_object_t object, vm_pindex_t start, vm_pindex_t end)
+vm_object_page_noreuse(vm_object_t object, vm_pindex_t start, vm_pindex_t end)
{
struct mtx *mtx, *new_mtx;
vm_page_t p, next;
VM_OBJECT_ASSERT_WLOCKED(object);
KASSERT((object->flags & (OBJ_FICTITIOUS | OBJ_UNMANAGED)) == 0,
- ("vm_object_page_cache: illegal object %p", object));
+ ("vm_object_page_noreuse: illegal object %p", object));
if (object->resident_page_count == 0)
return;
p = vm_page_find_least(object, start);
@@ -2009,7 +2009,7 @@
mtx = new_mtx;
mtx_lock(mtx);
}
- vm_page_try_to_cache(p);
+ vm_page_deactivate_noreuse(p);
}
if (mtx != NULL)
mtx_unlock(mtx);
Index: head/sys/vm/vm_page.h
===================================================================
--- head/sys/vm/vm_page.h
+++ head/sys/vm/vm_page.h
@@ -451,6 +451,7 @@
int vm_page_try_to_cache (vm_page_t);
int vm_page_try_to_free (vm_page_t);
void vm_page_deactivate (vm_page_t);
+void vm_page_deactivate_noreuse(vm_page_t);
void vm_page_dequeue(vm_page_t m);
void vm_page_dequeue_locked(vm_page_t m);
vm_page_t vm_page_find_least(vm_object_t, vm_pindex_t);
Index: head/sys/vm/vm_page.c
===================================================================
--- head/sys/vm/vm_page.c
+++ head/sys/vm/vm_page.c
@@ -2589,6 +2589,19 @@
}
/*
+ * Move the specified page to the inactive queue with the expectation
+ * that it is unlikely to be reused.
+ *
+ * The page must be locked.
+ */
+void
+vm_page_deactivate_noreuse(vm_page_t m)
+{
+
+ _vm_page_deactivate(m, 1);
+}
+
+/*
* vm_page_try_to_cache:
*
* Returns 0 on failure, 1 on success
@@ -2740,8 +2753,7 @@
/*
* vm_page_advise
*
- * Deactivate or do nothing, as appropriate. This routine is used
- * by madvise() and vop_stdadvise().
+ * Deactivate or do nothing, as appropriate.
*
* The object and page must be locked.
*/
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Mon, Apr 13, 5:48 AM (1 h, 33 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
31396879
Default Alt Text
D3726.id.diff (15 KB)
Attached To
Mode
D3726: rework handling of POSIX_FADV_{DONTNEED,NOREUSE}
Attached
Detach File
Event Timeline
Log In to Comment