Changeset View
Standalone View
sys/fs/unionfs/union_vnops.c
Show First 20 Lines • Show All 55 Lines • ▼ Show 20 Lines | |||||
#include <sys/stat.h> | #include <sys/stat.h> | ||||
#include <sys/dirent.h> | #include <sys/dirent.h> | ||||
#include <sys/proc.h> | #include <sys/proc.h> | ||||
#include <sys/bio.h> | #include <sys/bio.h> | ||||
#include <sys/buf.h> | #include <sys/buf.h> | ||||
#include <fs/unionfs/union.h> | #include <fs/unionfs/union.h> | ||||
#include <machine/atomic.h> | |||||
#include <vm/vm.h> | #include <vm/vm.h> | ||||
#include <vm/vm_extern.h> | #include <vm/vm_extern.h> | ||||
#include <vm/vm_object.h> | #include <vm/vm_object.h> | ||||
#include <vm/vnode_pager.h> | #include <vm/vnode_pager.h> | ||||
#if 0 | #if 0 | ||||
#define UNIONFS_INTERNAL_DEBUG(msg, args...) printf(msg, ## args) | #define UNIONFS_INTERNAL_DEBUG(msg, args...) printf(msg, ## args) | ||||
#define UNIONFS_IDBG_RENAME | #define UNIONFS_IDBG_RENAME | ||||
▲ Show 20 Lines • Show All 2,446 Lines • ▼ Show 20 Lines | unionfs_vptofh(struct vop_vptofh_args *ap) | ||||
return (EOPNOTSUPP); | return (EOPNOTSUPP); | ||||
} | } | ||||
static int | static int | ||||
unionfs_add_writecount(struct vop_add_writecount_args *ap) | unionfs_add_writecount(struct vop_add_writecount_args *ap) | ||||
{ | { | ||||
struct vnode *tvp, *vp; | struct vnode *tvp, *vp; | ||||
struct unionfs_node *unp; | struct unionfs_node *unp; | ||||
int error; | int error, writerefs; | ||||
vp = ap->a_vp; | vp = ap->a_vp; | ||||
unp = VTOUNIONFS(vp); | unp = VTOUNIONFS(vp); | ||||
tvp = unp->un_uppervp != NULL ? unp->un_uppervp : unp->un_lowervp; | tvp = unp->un_uppervp; | ||||
kib: Hm, I probably miss something. Isn't goal of unionfs is to always write to the upper? If yes… | |||||
Done Inline ActionsI noticed the same thing when I wrote the patch, but decided against changing the logic in case there was something I missed. As far as I can tell, and from the testing I've done so far, write references should always be taken on the upper. Shadow directory creation and file copying generally happen during lookup/open, before the write reference is taken. On second thought though, I think I really do want to change this to always use the upper and assert that it's non-NULL. The reason is that if I've missed something, and there is a situation in which we can take a write ref on the lower, and the code path that holds the ref does a unionfs copy-on-write operation, then the write ref will be undone on the wrong vnode. It seems better to catch this case immediately than to wait for some later call to vop_stdadd_writecount() to panic. jah: I noticed the same thing when I wrote the patch, but decided against changing the logic in case… | |||||
Not Done Inline ActionsYes, I agree. Unionfs should only put write reference on the upper vnode. BTW, isn't a situation possible where lower vnode is executed, so gained text references, and then unionfs still must allow writes by CoW? Or rather, it should allow writes if execution is done through lower vnode, but sould not if unionfs vnode was executed. For me, this sounds as a need to maintain text refs on unionfs vnode. kib: Yes, I agree. Unionfs should only put write reference on the upper vnode.
BTW, isn't a… | |||||
Done Inline ActionsIf a text ref is taken through the unionfs vnode at a time when only the lower vnode is available, won't the executable mapping then always be backed by the lower vnode? If so, then wouldn't it be fine to still allow copy-on-write? Any modified file contents would belong to the newly-created upper vnode. jah: If a text ref is taken through the unionfs vnode at a time when only the lower vnode is… | |||||
Not Done Inline ActionsI mean that this is not logical, although possibly acceptable. You executed a, then attempts to write to a should result in ETXTBUSY. Unionfs behavior is to allow write due to CoW. kib: I mean that this is not logical, although possibly acceptable. You executed a, then attempts… | |||||
Not Done Inline ActionsBy my reading of do_execve(), the vnode should be locked shared across the setting of the text ref and establishment of the backing object for the executable image. Unionfs copy-on-write paths should execute with the vnode locked exclusive (we assert this in unionfs_node_update()). Therefore it seems that vnode locking should prevent any case in which the text ref would be set on the lower vnode but the executable mapping would be backed by the upper vnode. Given that, I see this added bit of flexibility on the part of unionfs as being more feature than bug. Since tracking text refs on the unionfs vnode would add complexity, I'd prefer to avoid it unless/until lack of it causes a problem. jah: By my reading of do_execve(), the vnode should be locked shared across the setting of the text… | |||||
Not Done Inline ActionsOk. But do you still want to change the code to avoid passing writecount VOPs to lowervp? kib: Ok. But do you still want to change the code to avoid passing writecount VOPs to lowervp? | |||||
VI_LOCK(vp); | KASSERT(tvp != NULL, | ||||
/* text refs are bypassed to lowervp */ | ("%s: adding write ref without upper vnode", __func__)); | ||||
VNASSERT(vp->v_writecount >= 0, vp, ("wrong null writecount")); | |||||
VNASSERT(vp->v_writecount + ap->a_inc >= 0, vp, | |||||
("wrong writecount inc %d", ap->a_inc)); | |||||
if (tvp != NULL) | |||||
error = VOP_ADD_WRITECOUNT(tvp, ap->a_inc); | error = VOP_ADD_WRITECOUNT(tvp, ap->a_inc); | ||||
else if (vp->v_writecount < 0) | if (error != 0) | ||||
error = ETXTBSY; | |||||
else | |||||
error = 0; | |||||
if (error == 0) | |||||
vp->v_writecount += ap->a_inc; | |||||
VI_UNLOCK(vp); | |||||
return (error); | return (error); | ||||
/* | |||||
* We need to track the write refs we've passed to the underlying | |||||
* vnodes so that we can undo them in case we are forcibly unmounted. | |||||
*/ | |||||
writerefs = atomic_fetchadd_int(&vp->v_writecount, ap->a_inc); | |||||
/* text refs are bypassed to lowervp */ | |||||
VNASSERT(writerefs >= 0, vp, | |||||
("%s: invalid write count %d", __func__, writerefs)); | |||||
VNASSERT(writerefs + ap->a_inc >= 0, vp, | |||||
("%s: invalid write count inc %d + %d", __func__, | |||||
writerefs, ap->a_inc)); | |||||
return (0); | |||||
} | } | ||||
static int | static int | ||||
unionfs_vput_pair(struct vop_vput_pair_args *ap) | unionfs_vput_pair(struct vop_vput_pair_args *ap) | ||||
{ | { | ||||
struct mount *mp; | struct mount *mp; | ||||
struct vnode *dvp, *vp, **vpp, *lvp, *ldvp, *uvp, *udvp, *tempvp; | struct vnode *dvp, *vp, **vpp, *lvp, *ldvp, *uvp, *udvp, *tempvp; | ||||
struct unionfs_node *dunp, *unp; | struct unionfs_node *dunp, *unp; | ||||
▲ Show 20 Lines • Show All 108 Lines • ▼ Show 20 Lines | unionfs_vput_pair(struct vop_vput_pair_args *ap) | ||||
if (uvp != NULLVP) | if (uvp != NULLVP) | ||||
vdrop(uvp); | vdrop(uvp); | ||||
vdrop(vp); | vdrop(vp); | ||||
vfs_rel(mp); | vfs_rel(mp); | ||||
return (res); | return (res); | ||||
} | } | ||||
static int | |||||
unionfs_set_text(struct vop_set_text_args *ap) | |||||
{ | |||||
struct vnode *tvp; | |||||
struct unionfs_node *unp; | |||||
int error; | |||||
/* | |||||
* We assume text refs are managed against lvp/uvp through the | |||||
* executable mapping backed by its VM object. We therefore don't | |||||
* need to track leased text refs in the case of a forcible unmount. | |||||
*/ | |||||
unp = VTOUNIONFS(ap->a_vp); | |||||
ASSERT_VOP_LOCKED(ap->a_vp, __func__); | |||||
tvp = unp->un_uppervp != NULL ? unp->un_uppervp : unp->un_lowervp; | |||||
error = VOP_SET_TEXT(tvp); | |||||
return (error); | |||||
} | |||||
static int | |||||
unionfs_unset_text(struct vop_unset_text_args *ap) | |||||
{ | |||||
struct vnode *tvp; | |||||
struct unionfs_node *unp; | |||||
ASSERT_VOP_LOCKED(ap->a_vp, __func__); | |||||
unp = VTOUNIONFS(ap->a_vp); | |||||
tvp = unp->un_uppervp != NULL ? unp->un_uppervp : unp->un_lowervp; | |||||
VOP_UNSET_TEXT_CHECKED(tvp); | |||||
return (0); | |||||
} | |||||
struct vop_vector unionfs_vnodeops = { | struct vop_vector unionfs_vnodeops = { | ||||
.vop_default = &default_vnodeops, | .vop_default = &default_vnodeops, | ||||
.vop_access = unionfs_access, | .vop_access = unionfs_access, | ||||
.vop_aclcheck = unionfs_aclcheck, | .vop_aclcheck = unionfs_aclcheck, | ||||
.vop_advlock = unionfs_advlock, | .vop_advlock = unionfs_advlock, | ||||
.vop_bmap = VOP_EOPNOTSUPP, | .vop_bmap = VOP_EOPNOTSUPP, | ||||
.vop_cachedlookup = unionfs_lookup, | .vop_cachedlookup = unionfs_lookup, | ||||
Show All 35 Lines | struct vop_vector unionfs_vnodeops = { | ||||
.vop_strategy = unionfs_strategy, | .vop_strategy = unionfs_strategy, | ||||
.vop_symlink = unionfs_symlink, | .vop_symlink = unionfs_symlink, | ||||
.vop_unlock = unionfs_unlock, | .vop_unlock = unionfs_unlock, | ||||
.vop_whiteout = unionfs_whiteout, | .vop_whiteout = unionfs_whiteout, | ||||
.vop_write = unionfs_write, | .vop_write = unionfs_write, | ||||
.vop_vptofh = unionfs_vptofh, | .vop_vptofh = unionfs_vptofh, | ||||
.vop_add_writecount = unionfs_add_writecount, | .vop_add_writecount = unionfs_add_writecount, | ||||
.vop_vput_pair = unionfs_vput_pair, | .vop_vput_pair = unionfs_vput_pair, | ||||
.vop_set_text = unionfs_set_text, | |||||
.vop_unset_text = unionfs_unset_text, | |||||
}; | }; | ||||
VFS_VOP_VECTOR_REGISTER(unionfs_vnodeops); | VFS_VOP_VECTOR_REGISTER(unionfs_vnodeops); |
Hm, I probably miss something. Isn't goal of unionfs is to always write to the upper? If yes, how could it be that we put a write reference on lowervp?