Index: sys/fs/tmpfs/tmpfs.h =================================================================== --- sys/fs/tmpfs/tmpfs.h +++ sys/fs/tmpfs/tmpfs.h @@ -39,6 +39,7 @@ #include #include +#include #include #ifdef _SYS_MALLOC_H_ @@ -228,7 +229,7 @@ int tn_vpstate; /* (i) */ /* Transient refcounter on this node. */ - u_int tn_refcount; /* (m) + (i) */ + u_int tn_refcount; /* 0<->1 (m) + (i) */ /* misc data field for different tn_type node */ union { @@ -287,6 +288,7 @@ * a position within the file is accessed. */ vm_object_t tn_aobj; /* (c) */ + struct tmpfs_mount *tn_tmp; /* (c) */ } tn_reg; } tn_spec; /* (v) */ }; @@ -412,10 +414,10 @@ */ void tmpfs_ref_node(struct tmpfs_node *node); -void tmpfs_ref_node_locked(struct tmpfs_node *node); int tmpfs_alloc_node(struct mount *mp, struct tmpfs_mount *, enum vtype, uid_t uid, gid_t gid, mode_t mode, struct tmpfs_node *, const char *, dev_t, struct tmpfs_node **); +int tmpfs_fo_close(struct file *fp, struct thread *td); void tmpfs_free_node(struct tmpfs_mount *, struct tmpfs_node *); bool tmpfs_free_node_locked(struct tmpfs_mount *, struct tmpfs_node *, bool); void tmpfs_free_tmp(struct tmpfs_mount *); @@ -559,6 +561,8 @@ tmpfs_update(vp); } +extern struct fileops tmpfs_fnops; + #endif /* _KERNEL */ #endif /* _FS_TMPFS_TMPFS_H_ */ Index: sys/fs/tmpfs/tmpfs_subr.c =================================================================== --- sys/fs/tmpfs/tmpfs_subr.c +++ sys/fs/tmpfs/tmpfs_subr.c @@ -50,6 +50,7 @@ #include #include #include +#include #include #include #include @@ -212,20 +213,8 @@ tmpfs_ref_node(struct tmpfs_node *node) { - TMPFS_NODE_LOCK(node); - tmpfs_ref_node_locked(node); - TMPFS_NODE_UNLOCK(node); -} - -void -tmpfs_ref_node_locked(struct tmpfs_node *node) -{ - - TMPFS_NODE_ASSERT_LOCKED(node); KASSERT(node->tn_refcount > 0, ("node %p zero refcount", node)); - KASSERT(node->tn_refcount < UINT_MAX, ("node %p refcount %u", node, - node->tn_refcount)); - node->tn_refcount++; + refcount_acquire(&node->tn_refcount); } /* @@ -345,6 +334,7 @@ /* OBJ_TMPFS is set together with the setting of vp->v_object */ vm_object_set_flag(obj, OBJ_TMPFS_NODE); VM_OBJECT_WUNLOCK(obj); + nnode->tn_reg.tn_tmp = tmp; break; default: @@ -370,6 +360,8 @@ void tmpfs_free_node(struct tmpfs_mount *tmp, struct tmpfs_node *node) { + if (refcount_release_if_not_last(&node->tn_refcount)) + return; TMPFS_LOCK(tmp); TMPFS_NODE_LOCK(node); @@ -384,19 +376,20 @@ bool detach) { vm_object_t uobj; + bool last; TMPFS_MP_ASSERT_LOCKED(tmp); TMPFS_NODE_ASSERT_LOCKED(node); KASSERT(node->tn_refcount > 0, ("node %p refcount zero", node)); - node->tn_refcount--; - if (node->tn_attached && (detach || node->tn_refcount == 0)) { + last = refcount_release(&node->tn_refcount); + if (node->tn_attached && (detach || last)) { MPASS(tmp->tm_nodes_inuse > 0); tmp->tm_nodes_inuse--; LIST_REMOVE(node, tn_entries); node->tn_attached = false; } - if (node->tn_refcount > 0) + if (!last) return (false); #ifdef INVARIANTS @@ -596,7 +589,7 @@ error = 0; tm = VFS_TO_TMPFS(mp); TMPFS_NODE_LOCK(node); - tmpfs_ref_node_locked(node); + tmpfs_ref_node(node); loop: TMPFS_NODE_ASSERT_LOCKED(node); if ((vp = node->tn_vnode) != NULL) { @@ -700,6 +693,7 @@ vp->v_object = object; object->un_pager.swp.swp_tmpfs = vp; vm_object_set_flag(object, OBJ_TMPFS); + vp->v_irflag |= VIRF_PGREAD; VI_UNLOCK(vp); VM_OBJECT_WUNLOCK(object); break; Index: sys/fs/tmpfs/tmpfs_vfsops.c =================================================================== --- sys/fs/tmpfs/tmpfs_vfsops.c +++ sys/fs/tmpfs/tmpfs_vfsops.c @@ -51,6 +51,7 @@ #include #include #include +#include #include #include #include @@ -662,6 +663,8 @@ tmpfs_init(struct vfsconf *conf) { tmpfs_subr_init(); + memcpy(&tmpfs_fnops, &vnops, sizeof(struct fileops)); + tmpfs_fnops.fo_close = tmpfs_fo_close; return (0); } Index: sys/fs/tmpfs/tmpfs_vnops.c =================================================================== --- sys/fs/tmpfs/tmpfs_vnops.c +++ sys/fs/tmpfs/tmpfs_vnops.c @@ -42,6 +42,7 @@ #include #include #include +#include #include #include #include @@ -276,22 +277,25 @@ return tmpfs_alloc_file(dvp, vpp, vap, cnp, NULL); } +struct fileops tmpfs_fnops; + static int tmpfs_open(struct vop_open_args *v) { - struct vnode *vp = v->a_vp; - int mode = v->a_mode; - - int error; + struct vnode *vp; struct tmpfs_node *node; + struct file *fp; + int error, mode; - MPASS(VOP_ISLOCKED(vp)); - + vp = v->a_vp; + mode = v->a_mode; node = VP_TO_TMPFS_NODE(vp); - /* The file is still active but all its names have been removed + /* + * The file is still active but all its names have been removed * (e.g. by a "rmdir $(pwd)"). It cannot be opened any more as - * it is about to die. */ + * it is about to die. + */ if (node->tn_links < 1) return (ENOENT); @@ -306,8 +310,13 @@ vnode_create_vobject(vp, node->tn_size, v->a_td); } - MPASS(VOP_ISLOCKED(vp)); - return error; + fp = v->a_fp; + if (error == 0 && fp != NULL && vp->v_type == VREG) { + tmpfs_ref_node(node); + finit_fops(fp, node, &tmpfs_fnops); + } + + return (error); } static int @@ -321,6 +330,19 @@ return (0); } +int +tmpfs_fo_close(struct file *fp, struct thread *td) +{ + struct tmpfs_node *node; + + node = fp->f_data; + if (node != NULL) { + MPASS(node->tn_type == VREG); + tmpfs_free_node(node->tn_reg.tn_tmp, node); + } + return (vnops.fo_close(fp, td)); +} + /* * VOP_FPLOOKUP_VEXEC routines are subject to special circumstances, see * the comment above cache_fplookup for details. @@ -566,6 +588,47 @@ return (uiomove_object(node->tn_reg.tn_aobj, node->tn_size, uio)); } +static int +tmpfs_read_pgcache(struct vop_read_pgcache_args *v) +{ + struct vnode *vp; + struct tmpfs_node *node; + vm_object_t object; + off_t size; + int error; + + vp = v->a_vp; + if ((vp->v_irflag & (VIRF_DOOMED | VIRF_PGREAD)) != VIRF_PGREAD) + return (EJUSTRETURN); + if (v->a_uio->uio_offset < 0) + return (EINVAL); + + error = EJUSTRETURN; + vfs_smr_enter(); + + node = VP_TO_TMPFS_NODE_SMR(vp); + if (node == NULL) + goto out_smr; + MPASS(node->tn_type == VREG); + MPASS(node->tn_refcount >= 1); + object = node->tn_reg.tn_aobj; + if (object == NULL) + goto out_smr; + + MPASS((object->flags & (OBJ_ANON | OBJ_DEAD | OBJ_TMPFS_NODE)) == + OBJ_TMPFS_NODE); + if (!VN_IS_DOOMED(vp)) { + /* size cannot become shorter due to ranglelock. */ + size = node->tn_size; + vfs_smr_exit(); + error = uiomove_object(object, size, v->a_uio); + return (error); + } +out_smr: + vfs_smr_exit(); + return (error); +} + static int tmpfs_write(struct vop_write_args *v) { @@ -1663,7 +1726,7 @@ if (tnp->tn_type != VDIR) continue; TMPFS_NODE_LOCK(tnp); - tmpfs_ref_node_locked(tnp); + tmpfs_ref_node(tnp); /* * tn_vnode cannot be instantiated while we hold the @@ -1721,6 +1784,7 @@ .vop_getattr = tmpfs_getattr, .vop_setattr = tmpfs_setattr, .vop_read = tmpfs_read, + .vop_read_pgcache = tmpfs_read_pgcache, .vop_write = tmpfs_write, .vop_fsync = tmpfs_fsync, .vop_remove = tmpfs_remove, Index: sys/kern/kern_descrip.c =================================================================== --- sys/kern/kern_descrip.c +++ sys/kern/kern_descrip.c @@ -2616,9 +2616,15 @@ void finit(struct file *fp, u_int flag, short type, void *data, struct fileops *ops) { - fp->f_data = data; fp->f_flag = flag; fp->f_type = type; + finit_fops(fp, data, ops); +} + +void +finit_fops(struct file *fp, void *data, struct fileops *ops) +{ + fp->f_data = data; atomic_store_rel_ptr((volatile uintptr_t *)&fp->f_ops, (uintptr_t)ops); } Index: sys/kern/vfs_default.c =================================================================== --- sys/kern/vfs_default.c +++ sys/kern/vfs_default.c @@ -90,6 +90,7 @@ static int vop_stdcopy_file_range(struct vop_copy_file_range_args *ap); static int vop_stdfdatasync(struct vop_fdatasync_args *ap); static int vop_stdgetpages_async(struct vop_getpages_async_args *ap); +static int vop_stdread_pgcache(struct vop_read_pgcache_args *ap); static int vop_stdstat(struct vop_stat_args *ap); /* @@ -135,6 +136,7 @@ .vop_poll = vop_nopoll, .vop_putpages = vop_stdputpages, .vop_readlink = VOP_EINVAL, + .vop_read_pgcache = vop_stdread_pgcache, .vop_rename = vop_norename, .vop_revoke = VOP_PANIC, .vop_strategy = vop_nostrategy, @@ -1575,3 +1577,9 @@ out: return (vop_stat_helper_post(a, error)); } + +static int +vop_stdread_pgcache(struct vop_read_pgcache_args *ap __unused) +{ + return (EJUSTRETURN); +} Index: sys/kern/vfs_subr.c =================================================================== --- sys/kern/vfs_subr.c +++ sys/kern/vfs_subr.c @@ -5841,6 +5841,15 @@ VFS_KNOTE_LOCKED(a->a_vp, NOTE_READ); } +void +vop_read_pgcache_post(void *ap, int rc) +{ + struct vop_read_pgcache_args *a = ap; + + if (!rc) + VFS_KNOTE_UNLOCKED(a->a_vp, NOTE_READ); +} + void vop_readdir_post(void *ap, int rc) { Index: sys/kern/vfs_syscalls.c =================================================================== --- sys/kern/vfs_syscalls.c +++ sys/kern/vfs_syscalls.c @@ -4138,7 +4138,6 @@ vp = vp->v_mount->mnt_vnodecovered; VREF(vp); fp->f_vnode = vp; - fp->f_data = vp; foffset = 0; vput(tvp); goto unionread; @@ -4504,7 +4503,7 @@ fp->f_vnode = vp; fp->f_seqcount[UIO_READ] = 1; fp->f_seqcount[UIO_WRITE] = 1; - finit(fp, (fmode & FMASK) | (fp->f_flag & FHASLOCK), DTYPE_VNODE, vp, + finit(fp, (fmode & FMASK) | (fp->f_flag & FHASLOCK), DTYPE_VNODE, NULL, &vnops); VOP_UNLOCK(vp); if ((fmode & O_TRUNC) != 0) { Index: sys/kern/vfs_vnops.c =================================================================== --- sys/kern/vfs_vnops.c +++ sys/kern/vfs_vnops.c @@ -125,7 +125,7 @@ .fo_flags = DFLAG_PASSABLE | DFLAG_SEEKABLE }; -static const int io_hold_cnt = 16; +const u_int io_hold_cnt = 16; static int vn_io_fault_enable = 1; SYSCTL_INT(_debug, OID_AUTO, vn_io_fault_enable, CTLFLAG_RWTUN, &vn_io_fault_enable, 0, "Enable vn_io_fault lock avoidance"); @@ -848,7 +848,7 @@ return (ret); } -static int +int vn_read_from_obj(struct vnode *vp, struct uio *uio) { vm_object_t obj; @@ -951,15 +951,6 @@ return (uio->uio_resid == 0 ? 0 : EJUSTRETURN); } -static bool -do_vn_read_from_pgcache(struct vnode *vp, struct uio *uio, struct file *fp) -{ - return ((vp->v_irflag & (VIRF_DOOMED | VIRF_PGREAD)) == VIRF_PGREAD && - !mac_vnode_check_read_enabled() && - uio->uio_resid <= ptoa(io_hold_cnt) && uio->uio_offset >= 0 && - (fp->f_flag & O_DIRECT) == 0 && vn_io_pgcache_read_enable); -} - /* * File table vnode read routine. */ @@ -976,8 +967,14 @@ uio->uio_td, td)); KASSERT(flags & FOF_OFFSET, ("No FOF_OFFSET")); vp = fp->f_vnode; - if (do_vn_read_from_pgcache(vp, uio, fp)) { - error = vn_read_from_obj(vp, uio); + ioflag = 0; + if (fp->f_flag & FNONBLOCK) + ioflag |= IO_NDELAY; + if (fp->f_flag & O_DIRECT) + ioflag |= IO_DIRECT; + + if (vn_io_pgcache_read_enable && !mac_vnode_check_read_enabled()) { + error = VOP_READ_PGCACHE(vp, uio, ioflag, fp->f_cred); if (error == 0) { fp->f_nextoff[UIO_READ] = uio->uio_offset; return (0); @@ -985,11 +982,7 @@ if (error != EJUSTRETURN) return (error); } - ioflag = 0; - if (fp->f_flag & FNONBLOCK) - ioflag |= IO_NDELAY; - if (fp->f_flag & O_DIRECT) - ioflag |= IO_DIRECT; + advice = get_advice(fp, uio); vn_lock(vp, LK_SHARED | LK_RETRY); Index: sys/kern/vnode_if.src =================================================================== --- sys/kern/vnode_if.src +++ sys/kern/vnode_if.src @@ -225,6 +225,17 @@ }; +%% read_pgcache vp - - - +%! read_pgcache post vop_read_pgcache_post + +vop_read_pgcache { + IN struct vnode *vp; + INOUT struct uio *uio; + IN int ioflag; + IN struct ucred *cred; +}; + + %% write vp L L L %! write pre VOP_WRITE_PRE %! write post VOP_WRITE_POST Index: sys/sys/file.h =================================================================== --- sys/sys/file.h +++ sys/sys/file.h @@ -268,6 +268,7 @@ int vn_fill_kinfo_vnode(struct vnode *vp, struct kinfo_file *kif); void finit(struct file *, u_int, short, void *, struct fileops *); +void finit_fops(struct file *, void *, struct fileops *); int fgetvp(struct thread *td, int fd, cap_rights_t *rightsp, struct vnode **vpp); int fgetvp_exec(struct thread *td, int fd, cap_rights_t *rightsp, Index: sys/sys/vnode.h =================================================================== --- sys/sys/vnode.h +++ sys/sys/vnode.h @@ -392,6 +392,7 @@ #endif extern u_int ncsizefactor; +extern const u_int io_hold_cnt; /* * Convert between vnode types and inode formats (since POSIX.1 @@ -734,7 +735,8 @@ size_t len, off_t offset, enum uio_seg segflg, int ioflg, struct ucred *active_cred, struct ucred *file_cred, size_t *aresid, struct thread *td); -int vn_rlimit_fsize(const struct vnode *vn, const struct uio *uio, +int vn_read_from_obj(struct vnode *vp, struct uio *uio); +int vn_rlimit_fsize(const struct vnode *vp, const struct uio *uio, struct thread *td); int vn_start_write(struct vnode *vp, struct mount **mpp, int flags); int vn_start_secondary_write(struct vnode *vp, struct mount **mpp, @@ -850,6 +852,7 @@ void vop_mknod_post(void *a, int rc); void vop_open_post(void *a, int rc); void vop_read_post(void *a, int rc); +void vop_read_pgcache_post(void *ap, int rc); void vop_readdir_post(void *a, int rc); void vop_reclaim_post(void *a, int rc); void vop_remove_pre(void *a); Index: sys/ufs/ffs/ffs_alloc.c =================================================================== --- sys/ufs/ffs/ffs_alloc.c +++ sys/ufs/ffs/ffs_alloc.c @@ -3220,7 +3220,7 @@ if ((error = getvnode(td, cmd.handle, cap_rights_init(&rights, CAP_FSCK), &fp)) != 0) return (error); - vp = fp->f_data; + vp = fp->f_vnode; if (vp->v_type != VREG && vp->v_type != VDIR) { fdrop(fp, td); return (EINVAL); Index: sys/ufs/ufs/ufs_vnops.c =================================================================== --- sys/ufs/ufs/ufs_vnops.c +++ sys/ufs/ufs/ufs_vnops.c @@ -2874,6 +2874,21 @@ } } +static int +ufs_read_pgcache(struct vop_read_pgcache_args *ap) +{ + struct uio *uio; + struct vnode *vp; + + uio = ap->a_uio; + vp = ap->a_vp; + if ((vp->v_irflag & (VIRF_DOOMED | VIRF_PGREAD)) != VIRF_PGREAD || + uio->uio_resid > ptoa(io_hold_cnt) || uio->uio_offset < 0 || + (ap->a_ioflag & IO_DIRECT) != 0) + return (EJUSTRETURN); + return (vn_read_from_obj(vp, uio)); +} + /* Global vfs data structures for ufs. */ struct vop_vector ufs_vnodeops = { .vop_default = &default_vnodeops, @@ -2901,6 +2916,7 @@ .vop_pathconf = ufs_pathconf, .vop_poll = vop_stdpoll, .vop_print = ufs_print, + .vop_read_pgcache = ufs_read_pgcache, .vop_readdir = ufs_readdir, .vop_readlink = ufs_readlink, .vop_reclaim = ufs_reclaim,