Index: sys/fs/tmpfs/tmpfs_subr.c =================================================================== --- sys/fs/tmpfs/tmpfs_subr.c +++ sys/fs/tmpfs/tmpfs_subr.c @@ -700,6 +700,7 @@ vp->v_object = object; object->un_pager.swp.swp_tmpfs = vp; vm_object_set_flag(object, OBJ_TMPFS); + vp->v_irflag |= VIRF_PGREAD; VI_UNLOCK(vp); VM_OBJECT_WUNLOCK(object); break; Index: sys/fs/tmpfs/tmpfs_vnops.c =================================================================== --- sys/fs/tmpfs/tmpfs_vnops.c +++ sys/fs/tmpfs/tmpfs_vnops.c @@ -566,6 +566,52 @@ return (uiomove_object(node->tn_reg.tn_aobj, node->tn_size, uio)); } +static int +tmpfs_read_pgcache(struct vop_read_pgcache_args *v) +{ + struct vnode *vp; + struct tmpfs_node *node; + vm_object_t object; + off_t size; + int error; + bool in_smr; + + vp = v->a_vp; + if ((vp->v_irflag & (VIRF_DOOMED | VIRF_PGREAD)) != VIRF_PGREAD || + v->a_uio->uio_offset < 0) + return (EJUSTRETURN); + + error = EJUSTRETURN; + vfs_smr_enter(); + in_smr = true; + node = VP_TO_TMPFS_NODE_SMR(vp); + if (node == NULL) + goto out_smr; + + MPASS(node->tn_type == VREG); + object = node->tn_reg.tn_aobj; + if (object == NULL || + !refcount_acquire_if_gt(&object->ref_count, 0)) + goto out_smr; + + vm_object_pip_add(object, 1); + MPASS((object->flags & (OBJ_ANON | OBJ_DEAD | OBJ_TMPFS_NODE)) == + OBJ_TMPFS_NODE); + if (!VN_IS_DOOMED(vp)) { + /* size cannot become shorter due to ranglelock. */ + size = node->tn_size; + vfs_smr_exit(); + in_smr = false; + error = uiomove_object(object, size, v->a_uio); + } + vm_object_pip_wakeup(object); + vm_object_deallocate(object); +out_smr: + if (in_smr) + vfs_smr_exit(); + return (error); +} + static int tmpfs_write(struct vop_write_args *v) { Index: sys/kern/vfs_default.c =================================================================== --- sys/kern/vfs_default.c +++ sys/kern/vfs_default.c @@ -90,6 +90,7 @@ static int vop_stdcopy_file_range(struct vop_copy_file_range_args *ap); static int vop_stdfdatasync(struct vop_fdatasync_args *ap); static int vop_stdgetpages_async(struct vop_getpages_async_args *ap); +static int vop_stdread_pgcache(struct vop_read_pgcache_args *ap); static int vop_stdstat(struct vop_stat_args *ap); /* @@ -135,6 +136,7 @@ .vop_poll = vop_nopoll, .vop_putpages = vop_stdputpages, .vop_readlink = VOP_EINVAL, + .vop_read_pgcache = vop_stdread_pgcache, .vop_rename = vop_norename, .vop_revoke = VOP_PANIC, .vop_strategy = vop_nostrategy, @@ -1575,3 +1577,9 @@ out: return (vop_stat_helper_post(a, error)); } + +static int +vop_stdread_pgcache(struct vop_read_pgcache_args *ap __unused) +{ + return (EJUSTRETURN); +} Index: sys/kern/vfs_subr.c =================================================================== --- sys/kern/vfs_subr.c +++ sys/kern/vfs_subr.c @@ -5841,6 +5841,15 @@ VFS_KNOTE_LOCKED(a->a_vp, NOTE_READ); } +void +vop_read_pgcache_post(void *ap, int rc) +{ + struct vop_read_pgcache_args *a = ap; + + if (!rc) + VFS_KNOTE_UNLOCKED(a->a_vp, NOTE_READ); +} + void vop_readdir_post(void *ap, int rc) { Index: sys/kern/vfs_vnops.c =================================================================== --- sys/kern/vfs_vnops.c +++ sys/kern/vfs_vnops.c @@ -125,7 +125,7 @@ .fo_flags = DFLAG_PASSABLE | DFLAG_SEEKABLE }; -static const int io_hold_cnt = 16; +const u_int io_hold_cnt = 16; static int vn_io_fault_enable = 1; SYSCTL_INT(_debug, OID_AUTO, vn_io_fault_enable, CTLFLAG_RWTUN, &vn_io_fault_enable, 0, "Enable vn_io_fault lock avoidance"); @@ -848,7 +848,7 @@ return (ret); } -static int +int vn_read_from_obj(struct vnode *vp, struct uio *uio) { vm_object_t obj; @@ -951,15 +951,6 @@ return (uio->uio_resid == 0 ? 0 : EJUSTRETURN); } -static bool -do_vn_read_from_pgcache(struct vnode *vp, struct uio *uio, struct file *fp) -{ - return ((vp->v_irflag & (VIRF_DOOMED | VIRF_PGREAD)) == VIRF_PGREAD && - !mac_vnode_check_read_enabled() && - uio->uio_resid <= ptoa(io_hold_cnt) && uio->uio_offset >= 0 && - (fp->f_flag & O_DIRECT) == 0 && vn_io_pgcache_read_enable); -} - /* * File table vnode read routine. */ @@ -976,8 +967,14 @@ uio->uio_td, td)); KASSERT(flags & FOF_OFFSET, ("No FOF_OFFSET")); vp = fp->f_vnode; - if (do_vn_read_from_pgcache(vp, uio, fp)) { - error = vn_read_from_obj(vp, uio); + ioflag = 0; + if (fp->f_flag & FNONBLOCK) + ioflag |= IO_NDELAY; + if (fp->f_flag & O_DIRECT) + ioflag |= IO_DIRECT; + + if (vn_io_pgcache_read_enable && !mac_vnode_check_read_enabled()) { + error = VOP_READ_PGCACHE(vp, uio, ioflag, fp->f_cred); if (error == 0) { fp->f_nextoff[UIO_READ] = uio->uio_offset; return (0); @@ -985,11 +982,7 @@ if (error != EJUSTRETURN) return (error); } - ioflag = 0; - if (fp->f_flag & FNONBLOCK) - ioflag |= IO_NDELAY; - if (fp->f_flag & O_DIRECT) - ioflag |= IO_DIRECT; + advice = get_advice(fp, uio); vn_lock(vp, LK_SHARED | LK_RETRY); Index: sys/kern/vnode_if.src =================================================================== --- sys/kern/vnode_if.src +++ sys/kern/vnode_if.src @@ -225,6 +225,17 @@ }; +%% read_pgcache vp - - - +%! read_pgcache post vop_read_pgcache_post + +vop_read_pgcache { + IN struct vnode *vp; + INOUT struct uio *uio; + IN int ioflag; + IN struct ucred *cred; +}; + + %% write vp L L L %! write pre VOP_WRITE_PRE %! write post VOP_WRITE_POST Index: sys/sys/vnode.h =================================================================== --- sys/sys/vnode.h +++ sys/sys/vnode.h @@ -392,6 +392,7 @@ #endif extern u_int ncsizefactor; +extern const u_int io_hold_cnt; /* * Convert between vnode types and inode formats (since POSIX.1 @@ -734,7 +735,8 @@ size_t len, off_t offset, enum uio_seg segflg, int ioflg, struct ucred *active_cred, struct ucred *file_cred, size_t *aresid, struct thread *td); -int vn_rlimit_fsize(const struct vnode *vn, const struct uio *uio, +int vn_read_from_obj(struct vnode *vp, struct uio *uio); +int vn_rlimit_fsize(const struct vnode *vp, const struct uio *uio, struct thread *td); int vn_start_write(struct vnode *vp, struct mount **mpp, int flags); int vn_start_secondary_write(struct vnode *vp, struct mount **mpp, @@ -850,6 +852,7 @@ void vop_mknod_post(void *a, int rc); void vop_open_post(void *a, int rc); void vop_read_post(void *a, int rc); +void vop_read_pgcache_post(void *ap, int rc); void vop_readdir_post(void *a, int rc); void vop_reclaim_post(void *a, int rc); void vop_remove_pre(void *a); Index: sys/ufs/ufs/ufs_vnops.c =================================================================== --- sys/ufs/ufs/ufs_vnops.c +++ sys/ufs/ufs/ufs_vnops.c @@ -2874,6 +2874,21 @@ } } +static int +ufs_read_pgcache(struct vop_read_pgcache_args *ap) +{ + struct uio *uio; + struct vnode *vp; + + uio = ap->a_uio; + vp = ap->a_vp; + if ((vp->v_irflag & (VIRF_DOOMED | VIRF_PGREAD)) != VIRF_PGREAD || + uio->uio_resid > ptoa(io_hold_cnt) || uio->uio_offset < 0 || + (ap->a_ioflag & IO_DIRECT) != 0) + return (EJUSTRETURN); + return (vn_read_from_obj(vp, uio)); +} + /* Global vfs data structures for ufs. */ struct vop_vector ufs_vnodeops = { .vop_default = &default_vnodeops, @@ -2901,6 +2916,7 @@ .vop_pathconf = ufs_pathconf, .vop_poll = vop_stdpoll, .vop_print = ufs_print, + .vop_read_pgcache = ufs_read_pgcache, .vop_readdir = ufs_readdir, .vop_readlink = ufs_readlink, .vop_reclaim = ufs_reclaim,