Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F136996321
D25968.id75751.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
9 KB
Referenced Files
None
Subscribers
None
D25968.id75751.diff
View Options
Index: sys/fs/nullfs/null_subr.c
===================================================================
--- sys/fs/nullfs/null_subr.c
+++ sys/fs/nullfs/null_subr.c
@@ -258,6 +258,26 @@
if (lowervp == MOUNTTONULLMOUNT(mp)->nullm_lowerrootvp)
vp->v_vflag |= VV_ROOT;
+ /*
+ * We might miss the case where lower vnode sets VIRF_PGREAD
+ * some time after construction, which is typical case.
+ * null_open rechecks.
+ */
+ if ((lowervp->v_irflag & VIRF_PGREAD) != 0) {
+ MPASS(lowervp->v_object != NULL);
+ if ((vp->v_irflag & VIRF_PGREAD) == 0) {
+ if (vp->v_object == NULL)
+ vp->v_object = lowervp->v_object;
+ else
+ MPASS(vp->v_object == lowervp->v_object);
+ VI_LOCK(vp);
+ vp->v_irflag |= VIRF_PGREAD;
+ VI_UNLOCK(vp);
+ } else {
+ MPASS(vp->v_object != NULL);
+ }
+ }
+
/*
* Atomically insert our new node into the hash or vget existing
* if someone else has beaten us to it.
Index: sys/fs/nullfs/null_vnops.c
===================================================================
--- sys/fs/nullfs/null_vnops.c
+++ sys/fs/nullfs/null_vnops.c
@@ -439,8 +439,17 @@
vp = ap->a_vp;
ldvp = NULLVPTOLOWERVP(vp);
retval = null_bypass(&ap->a_gen);
- if (retval == 0)
+ if (retval == 0) {
vp->v_object = ldvp->v_object;
+ if ((ldvp->v_irflag & VIRF_PGREAD) != 0) {
+ MPASS(vp->v_object != NULL);
+ if ((vp->v_irflag & VIRF_PGREAD) == 0) {
+ VI_LOCK(vp);
+ vp->v_irflag |= VIRF_PGREAD;
+ VI_UNLOCK(vp);
+ }
+ }
+ }
return (retval);
}
Index: sys/kern/vfs_subr.c
===================================================================
--- sys/kern/vfs_subr.c
+++ sys/kern/vfs_subr.c
@@ -4211,7 +4211,9 @@
buf[1] = '\0';
if (vp->v_irflag & VIRF_DOOMED)
strlcat(buf, "|VIRF_DOOMED", sizeof(buf));
- flags = vp->v_irflag & ~(VIRF_DOOMED);
+ if (vp->v_irflag & VIRF_PGREAD)
+ strlcat(buf, "|VIRF_PGREAD", sizeof(buf));
+ flags = vp->v_irflag & ~(VIRF_DOOMED | VIRF_PGREAD);
if (flags != 0) {
snprintf(buf2, sizeof(buf2), "|VIRF(0x%lx)", flags);
strlcat(buf, buf2, sizeof(buf));
Index: sys/kern/vfs_vnops.c
===================================================================
--- sys/kern/vfs_vnops.c
+++ sys/kern/vfs_vnops.c
@@ -127,11 +127,15 @@
static const int io_hold_cnt = 16;
static int vn_io_fault_enable = 1;
-SYSCTL_INT(_debug, OID_AUTO, vn_io_fault_enable, CTLFLAG_RW,
+SYSCTL_INT(_debug, OID_AUTO, vn_io_fault_enable, CTLFLAG_RWTUN,
&vn_io_fault_enable, 0, "Enable vn_io_fault lock avoidance");
static int vn_io_fault_prefault = 0;
-SYSCTL_INT(_debug, OID_AUTO, vn_io_fault_prefault, CTLFLAG_RW,
+SYSCTL_INT(_debug, OID_AUTO, vn_io_fault_prefault, CTLFLAG_RWTUN,
&vn_io_fault_prefault, 0, "Enable vn_io_fault prefaulting");
+static int vn_io_pgcache_read_enable = 1;
+SYSCTL_INT(_debug, OID_AUTO, vn_io_pgcache_read_enable, CTLFLAG_RWTUN,
+ &vn_io_pgcache_read_enable, 0,
+ "Enable copying from page cache for reads, avoiding fs");
static u_long vn_io_faults_cnt;
SYSCTL_ULONG(_debug, OID_AUTO, vn_io_faults, CTLFLAG_RD,
&vn_io_faults_cnt, 0, "Count of vn_io_fault lock avoidance triggers");
@@ -844,6 +848,118 @@
return (ret);
}
+static int
+vn_read_from_obj(struct vnode *vp, struct uio *uio)
+{
+ vm_object_t obj;
+ vm_page_t ma[io_hold_cnt + 2];
+ off_t off, vsz;
+ ssize_t resid;
+ int error, i, j;
+
+ obj = vp->v_object;
+ MPASS(uio->uio_resid <= ptoa(io_hold_cnt + 2));
+ MPASS(obj != NULL);
+ MPASS(obj->type == VREG);
+
+ /*
+ * Depends on type stability of vm_objects.
+ */
+ vm_object_pip_add(obj, 1);
+ if ((obj->flags & OBJ_DEAD) != 0) {
+ /*
+ * Note that object might be already reused from the
+ * vnode, and the OBJ_DEAD flag cleared. This is fine,
+ * we recheck for DOOMED vnode state after all pages
+ * are busied, and retract then.
+ *
+ * But we check for OBJ_DEAD to ensure that we do not
+ * busy pages while vm_object_terminate_pages()
+ * processes the queue.
+ */
+ error = EJUSTRETURN;
+ goto out_pip;
+ }
+
+ resid = uio->uio_resid;
+ off = uio->uio_offset;
+ for (i = 0; resid > 0; i++) {
+ MPASS(i < io_hold_cnt + 2);
+ ma[i] = vm_page_grab_unlocked(obj, atop(off),
+ VM_ALLOC_NOCREAT | VM_ALLOC_SBUSY | VM_ALLOC_IGN_SBUSY |
+ VM_ALLOC_NOWAIT);
+ if (ma[i] == NULL)
+ break;
+
+ /*
+ * Skip invalid pages. Valid mask can be partial only
+ * at EOF, and we clip later.
+ */
+ if (vm_page_none_valid(ma[i])) {
+ vm_page_sunbusy(ma[i]);
+ break;
+ }
+
+ resid -= PAGE_SIZE;
+ off += PAGE_SIZE;
+ }
+ if (i == 0) {
+ error = EJUSTRETURN;
+ goto out_pip;
+ }
+
+ /*
+ * Check VIRF_DOOMED after we busied our pages. Since
+ * vgonel() terminates the vnode' vm_object, it cannot
+ * process past pages busied by us.
+ */
+ if (VN_IS_DOOMED(vp)) {
+ error = EJUSTRETURN;
+ goto out;
+ }
+
+ resid = PAGE_SIZE - (uio->uio_offset & PAGE_MASK) + ptoa(i - 1);
+ if (resid > uio->uio_resid)
+ resid = uio->uio_resid;
+
+ /*
+ * Unlocked read of vnp_size is safe because truncation cannot
+ * pass busied page. But we load vnp_size into a local
+ * variable so that possible concurrent extension does not
+ * break calculation.
+ */
+#if defined(__powerpc__) && !defined(__powerpc64__)
+ vsz = object->un_pager.vnp.vnp_size;
+#else
+ vsz = atomic_load_64(&obj->un_pager.vnp.vnp_size);
+#endif
+ if (uio->uio_offset + resid > vsz)
+ resid = vsz - uio->uio_offset;
+
+ error = vn_io_fault_pgmove(ma, uio->uio_offset & PAGE_MASK, resid, uio);
+
+out:
+ for (j = 0; j < i; j++) {
+ if (error == 0)
+ vm_page_reference(ma[j]);
+ vm_page_sunbusy(ma[j]);
+ }
+out_pip:
+ vm_object_pip_wakeup(obj);
+ if (error != 0)
+ return (error);
+ return (uio->uio_resid == 0 ? 0 : EJUSTRETURN);
+}
+
+static bool
+do_vn_read_from_pgcache(struct vnode *vp, struct uio *uio, struct file *fp)
+{
+ return ((vp->v_irflag & VIRF_PGREAD) != 0 &&
+ !mac_vnode_check_read_enabled() &&
+ uio->uio_resid <= ptoa(io_hold_cnt) && uio->uio_offset >= 0 &&
+ (fp->f_flag & O_DIRECT) == 0 && vn_io_pgcache_read_enable);
+}
+
/*
* File table vnode read routine.
*/
@@ -860,6 +976,15 @@
uio->uio_td, td));
KASSERT(flags & FOF_OFFSET, ("No FOF_OFFSET"));
vp = fp->f_vnode;
+ if (do_vn_read_from_pgcache(vp, uio, fp)) {
+ error = vn_read_from_obj(vp, uio);
+ if (error == 0) {
+ fp->f_nextoff[UIO_READ] = uio->uio_offset;
+ return (0);
+ }
+ if (error != EJUSTRETURN)
+ return (error);
+ }
ioflag = 0;
if (fp->f_flag & FNONBLOCK)
ioflag |= IO_NDELAY;
@@ -1164,8 +1289,8 @@
uio_clone->uio_iovcnt--;
continue;
}
- if (len > io_hold_cnt * PAGE_SIZE)
- len = io_hold_cnt * PAGE_SIZE;
+ if (len > ptoa(io_hold_cnt))
+ len = ptoa(io_hold_cnt);
addr = (uintptr_t)uio_clone->uio_iov->iov_base;
end = round_page(addr + len);
if (end < addr) {
Index: sys/sys/vnode.h
===================================================================
--- sys/sys/vnode.h
+++ sys/sys/vnode.h
@@ -244,6 +244,8 @@
#define VHOLD_ALL_FLAGS (VHOLD_NO_SMR)
#define VIRF_DOOMED 0x0001 /* This vnode is being recycled */
+#define VIRF_PGREAD 0x0002 /* Direct reads from the page cache are permitted,
+ never cleared once set */
#define VI_TEXT_REF 0x0001 /* Text ref grabbed use ref */
#define VI_MOUNT 0x0002 /* Mount in progress */
Index: sys/ufs/ufs/ufs_vnops.c
===================================================================
--- sys/ufs/ufs/ufs_vnops.c
+++ sys/ufs/ufs/ufs_vnops.c
@@ -282,13 +282,20 @@
return (EOPNOTSUPP);
ip = VTOI(vp);
+ vnode_create_vobject(vp, DIP(ip, i_size), ap->a_td);
+ if (vp->v_type == VREG && (vp->v_irflag & VIRF_PGREAD) == 0) {
+ VI_LOCK(vp);
+ vp->v_irflag |= VIRF_PGREAD;
+ VI_UNLOCK(vp);
+ }
+
/*
* Files marked append-only must be opened for appending.
*/
if ((ip->i_flags & APPEND) &&
(ap->a_mode & (FWRITE | O_APPEND)) == FWRITE)
return (EPERM);
- vnode_create_vobject(vp, DIP(ip, i_size), ap->a_td);
+
return (0);
}
Index: sys/vm/vm_object.c
===================================================================
--- sys/vm/vm_object.c
+++ sys/vm/vm_object.c
@@ -192,9 +192,11 @@
("object %p has reservations",
object));
#endif
+#if 0
KASSERT(blockcount_read(&object->paging_in_progress) == 0,
("object %p paging_in_progress = %d",
object, blockcount_read(&object->paging_in_progress)));
+#endif
KASSERT(!vm_object_busied(object),
("object %p busy = %d", object, blockcount_read(&object->busy)));
KASSERT(object->resident_page_count == 0,
@@ -294,6 +296,9 @@
* The lock portion of struct vm_object must be type stable due
* to vm_pageout_fallback_object_lock locking a vm object
* without holding any references to it.
+ *
+ * paging_in_progress is valid always. Lockless references to
+ * the objects may acquire pip and then check OBJ_DEAD.
*/
obj_zone = uma_zcreate("VM OBJECT", sizeof (struct vm_object), NULL,
#ifdef INVARIANTS
@@ -936,13 +941,14 @@
("terminating shadow obj %p", object));
/*
- * wait for the pageout daemon to be done with the object
+ * Wait for the pageout daemon and other current users to be
+ * done with the object. Note that new paging_in_progress
+ * users can come after this wait, but they must check
+ * OBJ_DEAD flag set (without unlocking the object), and avoid
+ * the object being terminated.
*/
vm_object_pip_wait(object, "objtrm");
- KASSERT(!blockcount_read(&object->paging_in_progress),
- ("vm_object_terminate: pageout in progress"));
-
KASSERT(object->ref_count == 0,
("vm_object_terminate: object with references, ref_count=%d",
object->ref_count));
Index: sys/vm/vnode_pager.c
===================================================================
--- sys/vm/vnode_pager.c
+++ sys/vm/vnode_pager.c
@@ -520,7 +520,11 @@
vm_page_xunbusy(m);
}
out:
+#if defined(__powerpc__) && !defined(__powerpc64__)
object->un_pager.vnp.vnp_size = nsize;
+#else
+ atomic_store_64(&object->un_pager.vnp.vnp_size, nsize);
+#endif
object->size = nobjsize;
VM_OBJECT_WUNLOCK(object);
}
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Fri, Nov 21, 10:57 PM (5 h, 29 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
25821861
Default Alt Text
D25968.id75751.diff (9 KB)
Attached To
Mode
D25968: VMIO read
Attached
Detach File
Event Timeline
Log In to Comment