Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F146216687
D3726.id8915.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
13 KB
Referenced Files
None
Subscribers
None
D3726.id8915.diff
View Options
Index: sys/kern/vfs_bio.c
===================================================================
--- sys/kern/vfs_bio.c
+++ sys/kern/vfs_bio.c
@@ -56,6 +56,7 @@
#include <sys/limits.h>
#include <sys/lock.h>
#include <sys/malloc.h>
+#include <sys/mman.h>
#include <sys/mount.h>
#include <sys/mutex.h>
#include <sys/kernel.h>
@@ -1786,6 +1787,8 @@
bp, bp->b_vp, bp->b_flags);
KASSERT(!(bp->b_flags & (B_CLUSTER|B_PAGING)),
("brelse: inappropriate B_PAGING or B_CLUSTER bp %p", bp));
+ KASSERT((bp->b_flags & B_VMIO) != 0 || (bp->b_flags & B_NOREUSE) == 0,
+ ("brelse: non-VMIO buffer marked as NOREUSE"));
if (BUF_LOCKRECURSED(bp)) {
/*
@@ -2143,10 +2146,15 @@
freed = false;
if (!freed) {
/*
- * In order to maintain LRU page ordering, put
- * the page at the tail of the inactive queue.
+ * If the page is unlikely to be reused, let the
+ * VM know. Otherwise, maintain LRU page
+ * ordering and put the page at the tail of the
+ * inactive queue.
*/
- vm_page_deactivate(m);
+ if ((bp->b_flags & B_NOREUSE) != 0)
+ vm_page_deactivate_noreuse(m);
+ else
+ vm_page_deactivate(m);
}
}
vm_page_unlock(m);
@@ -2157,7 +2165,7 @@
if (bp->b_bufsize)
bufspaceadjust(bp, 0);
bp->b_npages = 0;
- bp->b_flags &= ~B_VMIO;
+ bp->b_flags &= ~(B_VMIO | B_NOREUSE);
}
/*
Index: sys/kern/vfs_default.c
===================================================================
--- sys/kern/vfs_default.c
+++ sys/kern/vfs_default.c
@@ -1034,9 +1034,11 @@
int
vop_stdadvise(struct vop_advise_args *ap)
{
+ struct buf *bp;
+ struct buflists *bl;
struct vnode *vp;
- off_t start, end;
- int error;
+ daddr_t bn, startn, endn;
+ int bsize, error;
vp = ap->a_vp;
switch (ap->a_advice) {
@@ -1049,27 +1051,42 @@
error = 0;
break;
case POSIX_FADV_DONTNEED:
- /*
- * Flush any open FS buffers and then remove pages
- * from the backing VM object. Using vinvalbuf() here
- * is a bit heavy-handed as it flushes all buffers for
- * the given vnode, not just the buffers covering the
- * requested range.
- */
error = 0;
vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
if (vp->v_iflag & VI_DOOMED) {
VOP_UNLOCK(vp, 0);
break;
}
- vinvalbuf(vp, V_CLEANONLY, 0, 0);
- if (vp->v_object != NULL) {
- start = trunc_page(ap->a_start);
- end = round_page(ap->a_end);
- VM_OBJECT_WLOCK(vp->v_object);
- vm_object_page_cache(vp->v_object, OFF_TO_IDX(start),
- OFF_TO_IDX(end));
- VM_OBJECT_WUNLOCK(vp->v_object);
+
+ BO_RLOCK(&vp->v_bufobj);
+ bsize = vp->v_bufobj.bo_bsize;
+ startn = ap->a_start / bsize;
+ if (ap->a_end == OFF_MAX) {
+ endn = -1;
+ bl = &vp->v_bufobj.bo_clean.bv_hd;
+ if (!TAILQ_EMPTY(bl))
+ endn = TAILQ_LAST(bl, buflists)->b_lblkno;
+ bl = &vp->v_bufobj.bo_dirty.bv_hd;
+ if (!TAILQ_EMPTY(bl) &&
+ endn < TAILQ_LAST(bl, buflists)->b_lblkno)
+ endn = TAILQ_LAST(bl, buflists)->b_lblkno;
+ } else
+ endn = ap->a_end / bsize;
+ BO_RUNLOCK(&vp->v_bufobj);
+ /*
+ * Use the B_NOREUSE flag to hint that the pages backing each
+ * buffer in the range are unlikely to be reused. Dirty buffers
+ * will have the hint applied once they've been written.
+ */
+ for (bn = startn; bn <= endn; bn++) {
+ bp = getblk(vp, bn, bsize, 0, 0, GB_NOCREAT |
+ GB_UNMAPPED);
+ if (bp == NULL)
+ continue;
+ bp->b_flags |= B_NOREUSE;
+ if ((bp->b_flags & B_DELWRI) == 0)
+ bp->b_flags |= B_INVAL | B_RELBUF;
+ brelse(bp);
}
VOP_UNLOCK(vp, 0);
break;
Index: sys/kern/vfs_syscalls.c
===================================================================
--- sys/kern/vfs_syscalls.c
+++ sys/kern/vfs_syscalls.c
@@ -4610,8 +4610,6 @@
new->fa_advice = advice;
new->fa_start = offset;
new->fa_end = end;
- new->fa_prevstart = 0;
- new->fa_prevend = 0;
fp->f_advice = new;
new = fa;
}
Index: sys/kern/vfs_vnops.c
===================================================================
--- sys/kern/vfs_vnops.c
+++ sys/kern/vfs_vnops.c
@@ -762,18 +762,13 @@
* File table vnode read routine.
*/
static int
-vn_read(fp, uio, active_cred, flags, td)
- struct file *fp;
- struct uio *uio;
- struct ucred *active_cred;
- int flags;
- struct thread *td;
+vn_read(struct file *fp, struct uio *uio, struct ucred *active_cred, int flags,
+ struct thread *td)
{
struct vnode *vp;
- struct mtx *mtxp;
+ off_t offset;
int error, ioflag;
int advice;
- off_t offset, start, end;
KASSERT(uio->uio_td == td, ("uio_td %p is not td %p",
uio->uio_td, td));
@@ -807,39 +802,14 @@
fp->f_nextoff = uio->uio_offset;
VOP_UNLOCK(vp, 0);
if (error == 0 && advice == POSIX_FADV_NOREUSE &&
- offset != uio->uio_offset) {
+ offset != uio->uio_offset)
/*
* Use POSIX_FADV_DONTNEED to flush clean pages and
* buffers for the backing file after a
- * POSIX_FADV_NOREUSE read(2). To optimize the common
- * case of using POSIX_FADV_NOREUSE with sequential
- * access, track the previous implicit DONTNEED
- * request and grow this request to include the
- * current read(2) in addition to the previous
- * DONTNEED. With purely sequential access this will
- * cause the DONTNEED requests to continously grow to
- * cover all of the previously read regions of the
- * file. This allows filesystem blocks that are
- * accessed by multiple calls to read(2) to be flushed
- * once the last read(2) finishes.
+ * POSIX_FADV_NOREUSE read(2).
*/
- start = offset;
- end = uio->uio_offset - 1;
- mtxp = mtx_pool_find(mtxpool_sleep, fp);
- mtx_lock(mtxp);
- if (fp->f_advice != NULL &&
- fp->f_advice->fa_advice == POSIX_FADV_NOREUSE) {
- if (start != 0 && fp->f_advice->fa_prevend + 1 == start)
- start = fp->f_advice->fa_prevstart;
- else if (fp->f_advice->fa_prevstart != 0 &&
- fp->f_advice->fa_prevstart == end + 1)
- end = fp->f_advice->fa_prevend;
- fp->f_advice->fa_prevstart = start;
- fp->f_advice->fa_prevend = end;
- }
- mtx_unlock(mtxp);
- error = VOP_ADVISE(vp, start, end, POSIX_FADV_DONTNEED);
- }
+ error = VOP_ADVISE(vp, offset, uio->uio_offset - 1,
+ POSIX_FADV_DONTNEED);
return (error);
}
@@ -847,19 +817,14 @@
* File table vnode write routine.
*/
static int
-vn_write(fp, uio, active_cred, flags, td)
- struct file *fp;
- struct uio *uio;
- struct ucred *active_cred;
- int flags;
- struct thread *td;
+vn_write(struct file *fp, struct uio *uio, struct ucred *active_cred, int flags,
+ struct thread *td)
{
struct vnode *vp;
struct mount *mp;
- struct mtx *mtxp;
+ off_t offset;
int error, ioflag, lock_flags;
int advice;
- off_t offset, start, end;
KASSERT(uio->uio_td == td, ("uio_td %p is not td %p",
uio->uio_td, td));
@@ -914,55 +879,14 @@
if (vp->v_type != VCHR)
vn_finished_write(mp);
if (error == 0 && advice == POSIX_FADV_NOREUSE &&
- offset != uio->uio_offset) {
+ offset != uio->uio_offset)
/*
* Use POSIX_FADV_DONTNEED to flush clean pages and
* buffers for the backing file after a
- * POSIX_FADV_NOREUSE write(2). To optimize the
- * common case of using POSIX_FADV_NOREUSE with
- * sequential access, track the previous implicit
- * DONTNEED request and grow this request to include
- * the current write(2) in addition to the previous
- * DONTNEED. With purely sequential access this will
- * cause the DONTNEED requests to continously grow to
- * cover all of the previously written regions of the
- * file.
- *
- * Note that the blocks just written are almost
- * certainly still dirty, so this only works when
- * VOP_ADVISE() calls from subsequent writes push out
- * the data written by this write(2) once the backing
- * buffers are clean. However, as compared to forcing
- * IO_DIRECT, this gives much saner behavior. Write
- * clustering is still allowed, and clean pages are
- * merely moved to the cache page queue rather than
- * outright thrown away. This means a subsequent
- * read(2) can still avoid hitting the disk if the
- * pages have not been reclaimed.
- *
- * This does make POSIX_FADV_NOREUSE largely useless
- * with non-sequential access. However, sequential
- * access is the more common use case and the flag is
- * merely advisory.
+ * POSIX_FADV_NOREUSE write(2).
*/
- start = offset;
- end = uio->uio_offset - 1;
- mtxp = mtx_pool_find(mtxpool_sleep, fp);
- mtx_lock(mtxp);
- if (fp->f_advice != NULL &&
- fp->f_advice->fa_advice == POSIX_FADV_NOREUSE) {
- if (start != 0 && fp->f_advice->fa_prevend + 1 == start)
- start = fp->f_advice->fa_prevstart;
- else if (fp->f_advice->fa_prevstart != 0 &&
- fp->f_advice->fa_prevstart == end + 1)
- end = fp->f_advice->fa_prevend;
- fp->f_advice->fa_prevstart = start;
- fp->f_advice->fa_prevend = end;
- }
- mtx_unlock(mtxp);
- error = VOP_ADVISE(vp, start, end, POSIX_FADV_DONTNEED);
- }
-
+ error = VOP_ADVISE(vp, offset, uio->uio_offset - 1,
+ POSIX_FADV_DONTNEED);
unlock:
return (error);
}
Index: sys/sys/buf.h
===================================================================
--- sys/sys/buf.h
+++ sys/sys/buf.h
@@ -204,7 +204,7 @@
#define B_PERSISTENT 0x00000100 /* Perm. ref'ed while EXT2FS mounted. */
#define B_DONE 0x00000200 /* I/O completed. */
#define B_EINTR 0x00000400 /* I/O was interrupted */
-#define B_00000800 0x00000800 /* Available flag. */
+#define B_NOREUSE 0x00000800 /* Won't be reused once released. */
#define B_00001000 0x00001000 /* Available flag. */
#define B_INVAL 0x00002000 /* Does not contain valid info. */
#define B_BARRIER 0x00004000 /* Write this and all preceeding first. */
@@ -229,7 +229,7 @@
#define PRINT_BUF_FLAGS "\20\40remfree\37cluster\36vmio\35ram\34managed" \
"\33paging\32infreecnt\31nocopy\30b23\27relbuf\26dirty\25b20" \
"\24b19\23b18\22clusterok\21malloc\20nocache\17b14\16inval" \
- "\15b12\14b11\13eintr\12done\11persist\10delwri" \
+ "\15b12\14noreuse\13eintr\12done\11persist\10delwri" \
"\7validsuspwrt\6cache\5deferred\4direct\3async\2needcommit\1age"
/*
Index: sys/sys/file.h
===================================================================
--- sys/sys/file.h
+++ sys/sys/file.h
@@ -160,8 +160,6 @@
int fa_advice; /* (f) FADV_* type. */
off_t fa_start; /* (f) Region start. */
off_t fa_end; /* (f) Region end. */
- off_t fa_prevstart; /* (f) Previous NOREUSE start. */
- off_t fa_prevend; /* (f) Previous NOREUSE end. */
};
struct file {
Index: sys/vm/vm_object.h
===================================================================
--- sys/vm/vm_object.h
+++ sys/vm/vm_object.h
@@ -304,8 +304,6 @@
void vm_object_set_writeable_dirty (vm_object_t);
void vm_object_init (void);
void vm_object_madvise(vm_object_t, vm_pindex_t, vm_pindex_t, int);
-void vm_object_page_cache(vm_object_t object, vm_pindex_t start,
- vm_pindex_t end);
boolean_t vm_object_page_clean(vm_object_t object, vm_ooffset_t start,
vm_ooffset_t end, int flags);
void vm_object_page_remove(vm_object_t object, vm_pindex_t start,
Index: sys/vm/vm_object.c
===================================================================
--- sys/vm/vm_object.c
+++ sys/vm/vm_object.c
@@ -1963,59 +1963,6 @@
}
/*
- * vm_object_page_cache:
- *
- * For the given object, attempt to move the specified clean
- * pages to the cache queue. If a page is wired for any reason,
- * then it will not be changed. Pages are specified by the given
- * range ["start", "end"). As a special case, if "end" is zero,
- * then the range extends from "start" to the end of the object.
- * Any mappings to the specified pages are removed before the
- * pages are moved to the cache queue.
- *
- * This operation should only be performed on objects that
- * contain non-fictitious, managed pages.
- *
- * The object must be locked.
- */
-void
-vm_object_page_cache(vm_object_t object, vm_pindex_t start, vm_pindex_t end)
-{
- struct mtx *mtx, *new_mtx;
- vm_page_t p, next;
-
- VM_OBJECT_ASSERT_WLOCKED(object);
- KASSERT((object->flags & (OBJ_FICTITIOUS | OBJ_UNMANAGED)) == 0,
- ("vm_object_page_cache: illegal object %p", object));
- if (object->resident_page_count == 0)
- return;
- p = vm_page_find_least(object, start);
-
- /*
- * Here, the variable "p" is either (1) the page with the least pindex
- * greater than or equal to the parameter "start" or (2) NULL.
- */
- mtx = NULL;
- for (; p != NULL && (p->pindex < end || end == 0); p = next) {
- next = TAILQ_NEXT(p, listq);
-
- /*
- * Avoid releasing and reacquiring the same page lock.
- */
- new_mtx = vm_page_lockptr(p);
- if (mtx != new_mtx) {
- if (mtx != NULL)
- mtx_unlock(mtx);
- mtx = new_mtx;
- mtx_lock(mtx);
- }
- vm_page_try_to_cache(p);
- }
- if (mtx != NULL)
- mtx_unlock(mtx);
-}
-
-/*
* Populate the specified range of the object with valid pages. Returns
* TRUE if the range is successfully populated and FALSE otherwise.
*
Index: sys/vm/vm_page.h
===================================================================
--- sys/vm/vm_page.h
+++ sys/vm/vm_page.h
@@ -451,6 +451,7 @@
int vm_page_try_to_cache (vm_page_t);
int vm_page_try_to_free (vm_page_t);
void vm_page_deactivate (vm_page_t);
+void vm_page_deactivate_noreuse(vm_page_t);
void vm_page_dequeue(vm_page_t m);
void vm_page_dequeue_locked(vm_page_t m);
vm_page_t vm_page_find_least(vm_object_t, vm_pindex_t);
Index: sys/vm/vm_page.c
===================================================================
--- sys/vm/vm_page.c
+++ sys/vm/vm_page.c
@@ -2589,6 +2589,19 @@
}
/*
+ * Move the specified page to the inactive queue with the expectation
+ * that it is unlikely to be reused.
+ *
+ * The page must be locked.
+ */
+void
+vm_page_deactivate_noreuse(vm_page_t m)
+{
+
+ _vm_page_deactivate(m, 1);
+}
+
+/*
* vm_page_try_to_cache:
*
* Returns 0 on failure, 1 on success
@@ -2740,8 +2753,7 @@
/*
* vm_page_advise
*
- * Deactivate or do nothing, as appropriate. This routine is used
- * by madvise() and vop_stdadvise().
+ * Deactivate or do nothing, as appropriate.
*
* The object and page must be locked.
*/
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Sun, Mar 1, 8:16 PM (21 h, 7 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
29124818
Default Alt Text
D3726.id8915.diff (13 KB)
Attached To
Mode
D3726: rework handling of POSIX_FADV_{DONTNEED,NOREUSE}
Attached
Detach File
Event Timeline
Log In to Comment