Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F147976866
D55230.id171715.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
31 KB
Referenced Files
None
Subscribers
None
D55230.id171715.diff
View Options
diff --git a/sys/fs/fuse/fuse_file.c b/sys/fs/fuse/fuse_file.c
--- a/sys/fs/fuse/fuse_file.c
+++ b/sys/fs/fuse/fuse_file.c
@@ -194,6 +194,8 @@
int err = 0;
int op = FUSE_RELEASE;
+ ASSERT_VOP_ELOCKED(vp, __func__);
+
if (fuse_isdeadfs(vp)) {
goto out;
}
@@ -381,7 +383,11 @@
} else {
if ((foo->open_flags & FOPEN_KEEP_CACHE) == 0)
fuse_io_invalbuf(vp, td);
- VTOFUD(vp)->flag &= ~FN_DIRECTIO;
+ /*
+ * XXX Update the flag without the lock for now. See
+ * https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=293088
+ */
+ VTOFUD(vp)->flag &= ~FN_DIRECTIO;
}
}
diff --git a/sys/fs/fuse/fuse_internal.c b/sys/fs/fuse/fuse_internal.c
--- a/sys/fs/fuse/fuse_internal.c
+++ b/sys/fs/fuse/fuse_internal.c
@@ -262,7 +262,7 @@
fvdat = VTOFUD(vp);
data = fuse_get_mpdata(mp);
- ASSERT_VOP_ELOCKED(vp, "fuse_internal_cache_attrs");
+ ASSERT_CACHED_ATTRS_LOCKED(vp);
fuse_validity_2_bintime(attr_valid, attr_valid_nsec,
&fvdat->attr_cache_timeout);
@@ -478,7 +478,9 @@
cn.cn_namelen = fnieo.namelen;
err = cache_lookup(dvp, &vp, &cn, NULL, NULL);
MPASS(err == 0);
+ CACHED_ATTR_LOCK(dvp);
fuse_vnode_clear_attr_cache(dvp);
+ CACHED_ATTR_UNLOCK(dvp);
vput(dvp);
return (0);
}
@@ -498,8 +500,8 @@
if (fniio.ino == FUSE_ROOT_ID)
err = VFS_ROOT(mp, LK_EXCLUSIVE, &vp);
else
- err = fuse_internal_get_cached_vnode(mp, fniio.ino, LK_SHARED,
- &vp);
+ err = fuse_internal_get_cached_vnode(mp, fniio.ino,
+ LK_EXCLUSIVE, &vp);
SDT_PROBE2(fusefs, , internal, invalidate_inode, vp, &fniio);
if (err != 0 || vp == NULL)
return (err);
@@ -694,6 +696,8 @@
nlink_t nlink;
int err = 0;
+ ASSERT_CACHED_ATTRS_LOCKED(vp);
+
fdisp_init(&fdi, cnp->cn_namelen + 1);
fdisp_make_vp(&fdi, op, dvp, curthread, cnp->cn_cred);
@@ -891,15 +895,9 @@
struct fuse_vnode_data *fvdat = VTOFUD(vp);
struct fuse_getattr_in *fgai;
struct fuse_attr_out *fao;
- off_t old_filesize = fvdat->cached_attrs.va_size;
- struct timespec old_atime = fvdat->cached_attrs.va_atime;
- struct timespec old_ctime = fvdat->cached_attrs.va_ctime;
- struct timespec old_mtime = fvdat->cached_attrs.va_mtime;
__enum_uint8(vtype) vtyp;
int err;
- ASSERT_VOP_LOCKED(vp, __func__);
-
fdisp_init(&fdi, sizeof(*fgai));
fdisp_make_vp(&fdi, FUSE_GETATTR, vp, td, cred);
fgai = fdi.indata;
@@ -917,22 +915,27 @@
fao = (struct fuse_attr_out *)fdi.answ;
vtyp = IFTOVT(fao->attr.mode);
+
+ CACHED_ATTR_LOCK(vp);
if (fvdat->flag & FN_SIZECHANGE)
- fao->attr.size = old_filesize;
+ fao->attr.size = fvdat->cached_attrs.va_size;
if (fvdat->flag & FN_ATIMECHANGE) {
- fao->attr.atime = old_atime.tv_sec;
- fao->attr.atimensec = old_atime.tv_nsec;
+ fao->attr.atime = fvdat->cached_attrs.va_atime.tv_sec;
+ fao->attr.atimensec = fvdat->cached_attrs.va_atime.tv_nsec;
}
if (fvdat->flag & FN_CTIMECHANGE) {
- fao->attr.ctime = old_ctime.tv_sec;
- fao->attr.ctimensec = old_ctime.tv_nsec;
+ fao->attr.ctime = fvdat->cached_attrs.va_ctime.tv_sec;
+ fao->attr.ctimensec = fvdat->cached_attrs.va_ctime.tv_nsec;
}
if (fvdat->flag & FN_MTIMECHANGE) {
- fao->attr.mtime = old_mtime.tv_sec;
- fao->attr.mtimensec = old_mtime.tv_nsec;
+ fao->attr.mtime = fvdat->cached_attrs.va_mtime.tv_sec;
+ fao->attr.mtimensec = fvdat->cached_attrs.va_mtime.tv_nsec;
}
+
fuse_internal_cache_attrs(vp, &fao->attr, fao->attr_valid,
fao->attr_valid_nsec, vap, true);
+
+ CACHED_ATTR_UNLOCK(vp);
if (vtyp != vnode_vtype(vp)) {
fuse_internal_vnode_disappear(vp);
err = ENOENT;
@@ -950,10 +953,13 @@
{
struct vattr *attrs;
+ CACHED_ATTR_LOCK(vp);
if ((attrs = VTOVA(vp)) != NULL) {
*vap = *attrs; /* struct copy */
+ CACHED_ATTR_UNLOCK(vp);
return 0;
- }
+ } else
+ CACHED_ATTR_UNLOCK(vp);
return fuse_internal_do_getattr(vp, vap, cred, td);
}
@@ -1140,7 +1146,7 @@
int err = 0;
__enum_uint8(vtype) vtyp;
- ASSERT_VOP_ELOCKED(vp, __func__);
+ ASSERT_CACHED_ATTRS_LOCKED(vp);
mp = vnode_mount(vp);
fvdat = VTOFUD(vp);
diff --git a/sys/fs/fuse/fuse_io.c b/sys/fs/fuse/fuse_io.c
--- a/sys/fs/fuse/fuse_io.c
+++ b/sys/fs/fuse/fuse_io.c
@@ -401,6 +401,7 @@
fuse_warn(data, FSESS_WARN_WROTE_LONG,
"wrote more data than we provided it.");
/* This is bonkers. Clear attr cache. */
+ ASSERT_CACHED_ATTRS_LOCKED(vp);
fvdat->flag &= ~FN_SIZECHANGE;
fuse_vnode_clear_attr_cache(vp);
err = EINVAL;
@@ -416,8 +417,10 @@
fuse_vnode_setsize(vp, as_written_offset, false);
getnanouptime(&fvdat->last_local_modify);
}
- if (as_written_offset - diff >= filesize)
+ if (as_written_offset - diff >= filesize) {
+ ASSERT_CACHED_ATTRS_LOCKED(vp);
fvdat->flag &= ~FN_SIZECHANGE;
+ }
if (diff > 0) {
/* Short write */
@@ -454,8 +457,11 @@
fdisp_destroy(&fdi);
- if (wrote_anything)
+ if (wrote_anything) {
+ CACHED_ATTR_LOCK(vp);
fuse_vnode_undirty_cached_timestamps(vp, false);
+ CACHED_ATTR_UNLOCK(vp);
+ }
vn_rlimit_fsizex_res(uio, r);
return (err);
@@ -556,6 +562,7 @@
err = fuse_vnode_setsize(vp, uio->uio_offset + n, false);
filesize = uio->uio_offset + n;
getnanouptime(&fvdat->last_local_modify);
+ ASSERT_CACHED_ATTRS_LOCKED(vp);
fvdat->flag |= FN_SIZECHANGE;
if (err) {
brelse(bp);
@@ -806,6 +813,7 @@
left = uiop->uio_resid;
bzero((char *)bp->b_data + nread, left);
+ CACHED_ATTR_LOCK(vp);
if ((fvdat->flag & FN_SIZECHANGE) == 0) {
/*
* A short read with no error, when not using
@@ -838,6 +846,7 @@
"Short read of a dirty file");
uiop->uio_resid = 0;
}
+ CACHED_ATTR_UNLOCK(vp);
}
if (error) {
bp->b_ioflags |= BIO_ERROR;
@@ -855,10 +864,18 @@
* anything about it. In particular, we can't invalidate any
* part of the file's buffers because VOP_STRATEGY is called
* with them already locked.
+ *
+ * Normally the vnode should be exclusively locked at this
+ * point. However, if clustered reads are in use, then in a
+ * mixed read-write workload getblkx may need to flush a
+ * partially written buffer to disk during a read. In such a
+ * case, the vnode may only have a shared lock at this point.
*/
+ CACHED_ATTR_LOCK(vp);
filesize = fvdat->cached_attrs.va_size;
/* filesize must've been cached by fuse_vnop_open. */
KASSERT(filesize != VNOVAL, ("filesize should've been cached"));
+ CACHED_ATTR_UNLOCK(vp);
if ((off_t)bp->b_lblkno * biosize + bp->b_dirtyend > filesize)
bp->b_dirtyend = filesize -
diff --git a/sys/fs/fuse/fuse_node.h b/sys/fs/fuse/fuse_node.h
--- a/sys/fs/fuse/fuse_node.h
+++ b/sys/fs/fuse/fuse_node.h
@@ -79,6 +79,11 @@
* cache_attrs.va_size field does not time out.
*/
#define FN_SIZECHANGE 0x00000100
+/*
+ * Whether I/O to this vnode should bypass the cache.
+ * XXX BUG: this should be part of the file handle, not the vnode data.
+ * https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=293088
+ */
#define FN_DIRECTIO 0x00000200
/* Indicates that parent_nid is valid */
#define FN_PARENT_NID 0x00000400
@@ -92,38 +97,76 @@
#define FN_CTIMECHANGE 0x00001000
#define FN_ATIMECHANGE 0x00002000
+#define CACHED_ATTR_LOCK(vp) \
+do { \
+ if (VOP_ISLOCKED(vp) != LK_EXCLUSIVE) \
+ mtx_lock(&VTOFUD(vp)->cached_attr_mtx); \
+} while(0)
+
+#define CACHED_ATTR_UNLOCK(vp) \
+do { \
+ if (VOP_ISLOCKED(vp) != LK_EXCLUSIVE) \
+ mtx_unlock(&VTOFUD(vp)->cached_attr_mtx); \
+} while(0)
+
struct fuse_vnode_data {
- /** self **/
+ /* self's node id, similar to an inode number. Immutable. */
uint64_t nid;
+ /*
+ * Generation number. Distinguishes files with same nid but that don't
+ * overlap in time. Immutable.
+ */
uint64_t generation;
- /** parent **/
+ /* parent's node id. Protected by the vnode lock. */
uint64_t parent_nid;
/** I/O **/
- /* List of file handles for all of the vnode's open file descriptors */
+ /*
+ * List of file handles for all of the vnode's open file descriptors.
+ * Protected by the vnode lock.
+ */
LIST_HEAD(, fuse_filehandle) handles;
- /** flags **/
- uint32_t flag;
+ /* Protects flag, attr_cache_timeout and cached_attrs */
+ struct mtx cached_attr_mtx;
- /** meta **/
- /* The monotonic time after which the attr cache is invalid */
+ /*
+ * The monotonic time after which the attr cache is invalid
+ * Protected by an exclusive vnode lock or the cached_attr_mtx
+ */
struct bintime attr_cache_timeout;
- /*
+
+ /*
* Monotonic time after which the entry is invalid. Used for lookups
- * by nodeid instead of pathname.
+ * by nodeid instead of pathname. Protected by the vnode lock.
*/
struct bintime entry_cache_timeout;
+
/*
* Monotonic time of the last FUSE operation that modified the file
* size. Used to avoid races between mutator ops like VOP_SETATTR and
- * unlocked accessor ops like VOP_LOOKUP.
+ * unlocked accessor ops like VOP_LOOKUP. Protected by the vnode lock.
*/
struct timespec last_local_modify;
+
+ /* Protected by an exclusive vnode lock or the cached_attr_mtx */
struct vattr cached_attrs;
+
+ /* Number of FUSE_LOOKUPs minus FUSE_FORGETs. Protected by vnode lock */
uint64_t nlookup;
+
+ /*
+ * Misc flags. Protected by an exclusive vnode lock or the
+ * cached_attr_mtx, because some of the flags reflect the contents of
+ * cached_attrs.
+ */
+ uint32_t flag;
+
+ /* Vnode type. Immutable */
__enum_uint8(vtype) vtype;
+
+ /* State for clustered writes. Protected by vnode lock */
struct vn_clusterw clusterw;
};
@@ -141,18 +184,29 @@
#define VTOFUD(vp) \
((struct fuse_vnode_data *)((vp)->v_data))
#define VTOI(vp) (VTOFUD(vp)->nid)
+
+#define ASSERT_CACHED_ATTRS_LOCKED(vp) \
+ VNASSERT(VOP_ISLOCKED(vp) == LK_EXCLUSIVE || \
+ mtx_owned(&VTOFUD(vp)->cached_attr_mtx), vp, \
+ ("cached attrs not locked"))
+
static inline bool
fuse_vnode_attr_cache_valid(struct vnode *vp)
{
+ struct fuse_vnode_data *fvdat = VTOFUD(vp);
struct bintime now;
+ ASSERT_CACHED_ATTRS_LOCKED(vp);
+
getbinuptime(&now);
- return (bintime_cmp(&(VTOFUD(vp)->attr_cache_timeout), &now, >));
+ return (bintime_cmp(&fvdat->attr_cache_timeout, &now, >));
}
static inline struct vattr*
VTOVA(struct vnode *vp)
{
+ ASSERT_CACHED_ATTRS_LOCKED(vp);
+
if (fuse_vnode_attr_cache_valid(vp))
return &(VTOFUD(vp)->cached_attrs);
else
@@ -162,6 +216,8 @@
static inline void
fuse_vnode_clear_attr_cache(struct vnode *vp)
{
+ ASSERT_CACHED_ATTRS_LOCKED(vp);
+
bintime_clear(&VTOFUD(vp)->attr_cache_timeout);
}
@@ -184,10 +240,14 @@
fuse_vnode_setparent(struct vnode *vp, struct vnode *dvp)
{
if (dvp != NULL && vp->v_type == VDIR) {
+ ASSERT_VOP_ELOCKED(vp, __func__); /* for parent_nid */
+
MPASS(dvp->v_type == VDIR);
VTOFUD(vp)->parent_nid = VTOI(dvp);
VTOFUD(vp)->flag |= FN_PARENT_NID;
} else {
+ ASSERT_CACHED_ATTRS_LOCKED(vp);
+
VTOFUD(vp)->flag &= ~FN_PARENT_NID;
}
}
diff --git a/sys/fs/fuse/fuse_node.c b/sys/fs/fuse/fuse_node.c
--- a/sys/fs/fuse/fuse_node.c
+++ b/sys/fs/fuse/fuse_node.c
@@ -157,6 +157,8 @@
fvdat->nid = nodeid;
LIST_INIT(&fvdat->handles);
+ mtx_init(&fvdat->cached_attr_mtx, "fuse attr cache mutex", NULL,
+ MTX_DEF);
vattr_null(&fvdat->cached_attrs);
fvdat->cached_attrs.va_birthtime.tv_sec = -1;
fvdat->cached_attrs.va_birthtime.tv_nsec = 0;
@@ -181,6 +183,7 @@
struct fuse_vnode_data *fvdat = vp->v_data;
vp->v_data = NULL;
+ mtx_destroy(&fvdat->cached_attr_mtx);
KASSERT(LIST_EMPTY(&fvdat->handles),
("Destroying fuse vnode with open files!"));
free(fvdat, M_FUSEVN);
@@ -386,7 +389,8 @@
struct fuse_setattr_in *fsai;
int err = 0;
- ASSERT_VOP_ELOCKED(vp, "fuse_io_extend");
+ ASSERT_VOP_ELOCKED(vp, __func__); /* For flag and last_local_modify */
+ ASSERT_CACHED_ATTRS_LOCKED(vp);
if (fuse_isdeadfs(vp)) {
return EBADF;
@@ -425,6 +429,8 @@
return err;
}
+SDT_PROBE_DEFINE3(fusefs, , node, setsize_lockupgrade, "struct vnode*", "off_t",
+ "bool");
/*
* Adjust the vnode's size to a new value.
*
@@ -441,8 +447,26 @@
size_t iosize;
struct buf *bp = NULL;
int err = 0;
+ bool downgrade = false;
- ASSERT_VOP_ELOCKED(vp, "fuse_vnode_setsize");
+ ASSERT_VOP_LOCKED(vp, __func__);
+ if (VOP_ISLOCKED(vp) != LK_EXCLUSIVE) {
+ SDT_PROBE3(fusefs, , node, setsize_lockupgrade, vp, newsize,
+ from_server);
+ /*
+ * vnode must be exclusively locked for vtruncbuf and
+ * vnode_pager_setsize.
+ */
+ vn_lock(vp, LK_UPGRADE | LK_RETRY);
+ if (VN_IS_DOOMED(vp)) {
+ /*
+ * Probably a forced unmount while vn_lock dropped the
+ * shared vnode lock during the upgrade.
+ */
+ return (ENOENT);
+ }
+ downgrade = true;
+ }
iosize = fuse_iosize(vp);
oldsize = fvdat->cached_attrs.va_size;
@@ -488,6 +512,10 @@
if (bp)
brelse(bp);
vnode_pager_setsize(vp, newsize);
+
+ if (downgrade)
+ vn_lock(vp, LK_DOWNGRADE);
+
return err;
}
@@ -497,15 +525,28 @@
struct thread *td)
{
struct fuse_vnode_data *fvdat = VTOFUD(vp);
+ struct vattr va;
int error = 0;
+ ASSERT_VOP_LOCKED(vp, __func__);
+
+ CACHED_ATTR_LOCK(vp);
if (!(fvdat->flag & FN_SIZECHANGE) &&
(!fuse_vnode_attr_cache_valid(vp) ||
- fvdat->cached_attrs.va_size == VNOVAL))
- error = fuse_internal_do_getattr(vp, NULL, cred, td);
-
- if (!error)
+ fvdat->cached_attrs.va_size == VNOVAL)) {
+ CACHED_ATTR_UNLOCK(vp);
+ /*
+ * It incurs a large struct copy, but we supply &va so we don't
+ * have to acquire the lock a second time after
+ * fuse_internal_do_getattr returns.
+ */
+ error = fuse_internal_do_getattr(vp, &va, cred, td);
+ if (!error)
+ *filesize = va.va_size;
+ } else {
*filesize = fvdat->cached_attrs.va_size;
+ CACHED_ATTR_UNLOCK(vp);
+ }
return error;
}
@@ -515,6 +556,8 @@
{
struct fuse_vnode_data *fvdat = VTOFUD(vp);
+ ASSERT_CACHED_ATTRS_LOCKED(vp);
+
fvdat->flag &= ~(FN_MTIMECHANGE | FN_CTIMECHANGE);
if (atime)
fvdat->flag &= ~FN_ATIMECHANGE;
@@ -537,6 +580,8 @@
if (mp->mnt_flag & MNT_NOATIME)
flags &= ~FN_ATIMECHANGE;
+ CACHED_ATTR_LOCK(vp);
+
if (flags & FN_ATIMECHANGE)
fvdat->cached_attrs.va_atime = ts;
if (flags & FN_MTIMECHANGE)
@@ -545,6 +590,8 @@
fvdat->cached_attrs.va_ctime = ts;
fvdat->flag |= flags;
+
+ CACHED_ATTR_UNLOCK(vp);
}
void
diff --git a/sys/fs/fuse/fuse_vfsops.c b/sys/fs/fuse/fuse_vfsops.c
--- a/sys/fs/fuse/fuse_vfsops.c
+++ b/sys/fs/fuse/fuse_vfsops.c
@@ -601,6 +601,8 @@
error = fuse_vnode_get(mp, feo, nodeid, NULL, vpp, NULL, vtyp);
if (error)
goto out;
+ /* for last_local_modify and fuse_internal_cache_attrs */
+ ASSERT_VOP_ELOCKED(*vpp, __func__);
fvdat = VTOFUD(*vpp);
if (timespeccmp(&now, &fvdat->last_local_modify, >)) {
diff --git a/sys/fs/fuse/fuse_vnops.c b/sys/fs/fuse/fuse_vnops.c
--- a/sys/fs/fuse/fuse_vnops.c
+++ b/sys/fs/fuse/fuse_vnops.c
@@ -707,6 +707,8 @@
return (EXTERROR(EOPNOTSUPP, "This server does not implement "
"FUSE_FALLOCATE"));
+ ASSERT_CACHED_ATTRS_LOCKED(vp);
+
io.uio_offset = *offset;
io.uio_resid = *len;
err = vn_rlimit_fsize(vp, &io, curthread);
@@ -779,7 +781,7 @@
struct fuse_data *data;
struct fuse_vnode_data *fvdat = VTOFUD(vp);
uint64_t biosize;
- off_t fsize;
+ off_t fsize = VNOVAL;
daddr_t lbn = ap->a_bn;
daddr_t *pbn = ap->a_bnp;
int *runp = ap->a_runp;
@@ -822,9 +824,10 @@
* and the risk of getting it wrong is not worth the cost of
* another upcall.
*/
- if (fvdat->cached_attrs.va_size != VNOVAL)
- fsize = fvdat->cached_attrs.va_size;
- else
+ CACHED_ATTR_LOCK(vp);
+ fsize = fvdat->cached_attrs.va_size;
+ CACHED_ATTR_UNLOCK(vp);
+ if (fsize == VNOVAL)
error = fuse_vnode_size(vp, &fsize, td->td_ucred, td);
if (error == 0)
*runp = MIN(MAX(0, fsize / (off_t)biosize - lbn - 1),
@@ -894,6 +897,7 @@
cred = td->td_ucred;
err = fuse_flush(vp, cred, pid, fflag);
+ ASSERT_CACHED_ATTRS_LOCKED(vp); /* For fvdat->flag */
if (err == 0 && (fvdat->flag & FN_ATIMECHANGE) && !vfs_isrdonly(mp)) {
struct vattr vap;
struct fuse_data *data;
@@ -911,6 +915,7 @@
}
if (access_e == 0) {
VATTR_NULL(&vap);
+ ASSERT_CACHED_ATTRS_LOCKED(vp);
vap.va_atime = fvdat->cached_attrs.va_atime;
/*
* Ignore errors setting when setting atime. That
@@ -1035,6 +1040,7 @@
*ap->a_inoffp += fwo->size;
*ap->a_outoffp += fwo->size;
fuse_internal_clear_suid_on_write(outvp, outcred, td);
+ ASSERT_CACHED_ATTRS_LOCKED(outvp);
if (*ap->a_outoffp > outfvdat->cached_attrs.va_size) {
fuse_vnode_setsize(outvp, *ap->a_outoffp, false);
getnanouptime(&outfvdat->last_local_modify);
@@ -1337,6 +1343,7 @@
int need_flush = 1;
+ ASSERT_CACHED_ATTRS_LOCKED(vp); /* For fvdat->flag */
LIST_FOREACH_SAFE(fufh, &fvdat->handles, next, fufh_tmp) {
if (need_flush && vp->v_type == VREG) {
if ((VTOFUD(vp)->flag & FN_SIZECHANGE) != 0) {
@@ -1557,6 +1564,7 @@
else if ((err = fuse_internal_access(dvp, VEXEC, td, cred)))
return err;
+ ASSERT_CACHED_ATTRS_LOCKED(dvp); /* For flag */
is_dot = cnp->cn_namelen == 1 && *(cnp->cn_nameptr) == '.';
if (isdotdot && !(data->dataflags & FSESS_EXPORT_SUPPORT)) {
if (!(VTOFUD(dvp)->flag & FN_PARENT_NID)) {
@@ -1960,6 +1968,10 @@
"to be closed"));
}
+ /*
+ * XXX Check this flag without the lock. See
+ * https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=293088
+ */
if (VTOFUD(vp)->flag & FN_DIRECTIO) {
ioflag |= IO_DIRECT;
}
@@ -2220,6 +2232,8 @@
return err;
}
+SDT_PROBE_DEFINE4(fusefs, , vnops, erelookup, "struct vnode*",
+ "struct vnode*", "struct vnode*", "struct vnode*");
/*
struct vnop_rename_args {
struct vnode *a_fdvp;
@@ -2242,6 +2256,7 @@
struct fuse_data *data;
bool newparent = fdvp != tdvp;
bool isdir = fvp->v_type == VDIR;
+ int locktype;
int err = 0;
if (fuse_isdeadfs(fdvp)) {
@@ -2266,12 +2281,34 @@
* have write permission to it, so ".." can be modified.
*/
data = fuse_get_mpdata(vnode_mount(tdvp));
+
+ if (tdvp != fdvp)
+ locktype = LK_EXCLUSIVE; /* for fuse_vnode_setparent */
+ else
+ locktype = LK_SHARED;
+
+ /*
+ * Must use LK_NOWAIT to prevent LORs between fvp and tdvp or
+ * tvp
+ */
+ if (vn_lock(fvp, locktype | LK_NOWAIT) != 0) {
+ /*
+ * Can't release tdvp or tvp to try avoiding the LOR.
+ * Must return instead.
+ */
+ SDT_PROBE4(fusefs, , vnops, erelookup, fdvp, fvp, tdvp,
+ tvp);
+ err = ERELOOKUP;
+ goto out;
+ }
+
if (data->dataflags & FSESS_DEFAULT_PERMISSIONS && isdir && newparent) {
err = fuse_internal_access(fvp, VWRITE,
curthread, tcnp->cn_cred);
if (err)
- goto out;
+ goto unlock;
}
+
sx_xlock(&data->rename_lock);
err = fuse_internal_rename(fdvp, fcnp, tdvp, tcnp);
if (err == 0) {
@@ -2291,6 +2328,8 @@
}
cache_purge(fdvp);
}
+unlock:
+ VOP_UNLOCK(fvp);
out:
if (tdvp == tvp) {
vrele(tdvp);
@@ -2566,6 +2605,7 @@
fuse_vnop_write(struct vop_write_args *ap)
{
struct vnode *vp = ap->a_vp;
+ struct fuse_vnode_data *fvdat = VTOFUD(vp);
struct uio *uio = ap->a_uio;
int ioflag = ap->a_ioflag;
struct ucred *cred = ap->a_cred;
@@ -2581,9 +2621,12 @@
"to be closed"));
}
- if (VTOFUD(vp)->flag & FN_DIRECTIO) {
+ /*
+ * XXX Check this flag without the lock. See
+ * https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=293088
+ */
+ if (fvdat->flag & FN_DIRECTIO)
ioflag |= IO_DIRECT;
- }
err = fuse_filehandle_getrw(vp, FWRITE, &fufh, cred, pid);
if (err == EBADF && vnode_mount(vp)->mnt_flag & MNT_EXPORTED) {
diff --git a/tests/sys/fs/fusefs/bmap.cc b/tests/sys/fs/fusefs/bmap.cc
--- a/tests/sys/fs/fusefs/bmap.cc
+++ b/tests/sys/fs/fusefs/bmap.cc
@@ -44,10 +44,13 @@
const static char FULLPATH[] = "mountpoint/foo";
const static char RELPATH[] = "foo";
-class Bmap: public FuseTest {
+class Bmap: public FuseTest,
+ public WithParamInterface<tuple<int, int>>
+{
public:
virtual void SetUp() {
m_maxreadahead = UINT32_MAX;
+ m_init_flags |= get<0>(GetParam());
FuseTest::SetUp();
}
void expect_bmap(uint64_t ino, uint64_t lbn, uint32_t blocksize, uint64_t pbn)
@@ -73,12 +76,12 @@
}
};
-class BmapEof: public Bmap, public WithParamInterface<int> {};
+class BmapEof: public Bmap {};
/*
* Test FUSE_BMAP
*/
-TEST_F(Bmap, bmap)
+TEST_P(Bmap, bmap)
{
struct fiobmap2_arg arg;
/*
@@ -124,7 +127,7 @@
* If the daemon does not implement VOP_BMAP, fusefs should return sensible
* defaults.
*/
-TEST_F(Bmap, default_)
+TEST_P(Bmap, default_)
{
struct fiobmap2_arg arg;
const off_t filesize = 1 << 30;
@@ -181,7 +184,7 @@
* The server returns an error for some reason for FUSE_BMAP. fusefs should
* faithfully report that error up to the caller.
*/
-TEST_F(Bmap, einval)
+TEST_P(Bmap, einval)
{
struct fiobmap2_arg arg;
const off_t filesize = 1 << 30;
@@ -217,7 +220,7 @@
* https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=264196 . The bug did not
* lie in fusefs, but this is a convenient place for a regression test.
*/
-TEST_F(Bmap, spurious_einval)
+TEST_P(Bmap, spurious_einval)
{
const off_t filesize = 4ull << 30;
const ino_t ino = 42;
@@ -288,7 +291,7 @@
int fd;
int ngetattrs;
- ngetattrs = GetParam();
+ ngetattrs = get<1>(GetParam());
FuseTest::expect_lookup(RELPATH, ino, mode, filesize, 1, 0);
expect_open(ino, 0, 1);
// Depending on ngetattrs, FUSE_READ could be called with either
@@ -348,6 +351,17 @@
leak(fd);
}
-INSTANTIATE_TEST_SUITE_P(BE, BmapEof,
- Values(1, 2, 3)
-);
+/*
+ * Try with and without async reads, because it affects the type of vnode lock
+ * on entry to fuse_vnop_bmap.
+ */
+INSTANTIATE_TEST_SUITE_P(B, Bmap, Values(
+ tuple(0, 0),
+ tuple(FUSE_ASYNC_READ, 0)
+));
+
+INSTANTIATE_TEST_SUITE_P(BE, BmapEof, Values(
+ tuple(0, 1),
+ tuple(0, 2),
+ tuple(0, 3)
+));
diff --git a/tests/sys/fs/fusefs/notify.cc b/tests/sys/fs/fusefs/notify.cc
--- a/tests/sys/fs/fusefs/notify.cc
+++ b/tests/sys/fs/fusefs/notify.cc
@@ -47,8 +47,15 @@
* invalidation. This file tests our client's handling of those messages.
*/
-class Notify: public FuseTest {
+class Notify: public FuseTest,
+ public WithParamInterface<int>
+{
public:
+virtual void SetUp() {
+ m_init_flags |= GetParam();
+ FuseTest::SetUp();
+}
+
/* Ignore an optional FUSE_FSYNC */
void maybe_expect_fsync(uint64_t ino)
{
@@ -154,7 +161,7 @@
}
/* Invalidate a nonexistent entry */
-TEST_F(Notify, inval_entry_nonexistent)
+TEST_P(Notify, inval_entry_nonexistent)
{
const static char *name = "foo";
struct inval_entry_args iea;
@@ -173,7 +180,7 @@
}
/* Invalidate a cached entry */
-TEST_F(Notify, inval_entry)
+TEST_P(Notify, inval_entry)
{
const static char FULLPATH[] = "mountpoint/foo";
const static char RELPATH[] = "foo";
@@ -211,7 +218,7 @@
* Invalidate a cached entry beneath the root, which uses a slightly different
* code path.
*/
-TEST_F(Notify, inval_entry_below_root)
+TEST_P(Notify, inval_entry_below_root)
{
const static char FULLPATH[] = "mountpoint/some_dir/foo";
const static char DNAME[] = "some_dir";
@@ -258,7 +265,7 @@
}
/* Invalidating an entry invalidates the parent directory's attributes */
-TEST_F(Notify, inval_entry_invalidates_parent_attrs)
+TEST_P(Notify, inval_entry_invalidates_parent_attrs)
{
const static char FULLPATH[] = "mountpoint/foo";
const static char RELPATH[] = "foo";
@@ -302,7 +309,7 @@
}
-TEST_F(Notify, inval_inode_nonexistent)
+TEST_P(Notify, inval_inode_nonexistent)
{
struct inval_inode_args iia;
ino_t ino = 42;
@@ -320,7 +327,7 @@
EXPECT_EQ(0, (intptr_t)thr0_value);
}
-TEST_F(Notify, inval_inode_with_clean_cache)
+TEST_P(Notify, inval_inode_with_clean_cache)
{
const static char FULLPATH[] = "mountpoint/foo";
const static char RELPATH[] = "foo";
@@ -390,7 +397,7 @@
* nothing bad should happen.
* https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=290519
*/
-TEST_F(Notify, notify_after_unmount)
+TEST_P(Notify, notify_after_unmount)
{
const static char *name = "foo";
struct inval_entry_args iea;
@@ -408,7 +415,7 @@
/* FUSE_NOTIFY_STORE with a file that's not in the entry cache */
/* disabled because FUSE_NOTIFY_STORE is not yet implemented */
-TEST_F(Notify, DISABLED_store_nonexistent)
+TEST_P(Notify, DISABLED_store_nonexistent)
{
struct store_args sa;
ino_t ino = 42;
@@ -427,7 +434,7 @@
/* Store data into for a file that does not yet have anything cached */
/* disabled because FUSE_NOTIFY_STORE is not yet implemented */
-TEST_F(Notify, DISABLED_store_with_blank_cache)
+TEST_P(Notify, DISABLED_store_with_blank_cache)
{
const static char FULLPATH[] = "mountpoint/foo";
const static char RELPATH[] = "foo";
@@ -465,7 +472,7 @@
leak(fd);
}
-TEST_F(NotifyWriteback, inval_inode_with_dirty_cache)
+TEST_P(NotifyWriteback, inval_inode_with_dirty_cache)
{
const static char FULLPATH[] = "mountpoint/foo";
const static char RELPATH[] = "foo";
@@ -506,7 +513,7 @@
leak(fd);
}
-TEST_F(NotifyWriteback, inval_inode_attrs_only)
+TEST_P(NotifyWriteback, inval_inode_attrs_only)
{
const static char FULLPATH[] = "mountpoint/foo";
const static char RELPATH[] = "foo";
@@ -600,3 +607,10 @@
EXPECT_EQ(ENODEV, errno);
delete out;
}
+
+/*
+ * Try with and without async reads, because it affects the type of vnode lock
+ * acquired in fuse_internal_invalidate_entry.
+ */
+INSTANTIATE_TEST_SUITE_P(N, Notify, Values(0, FUSE_ASYNC_READ));
+INSTANTIATE_TEST_SUITE_P(N, NotifyWriteback, Values(0, FUSE_ASYNC_READ));
diff --git a/tests/sys/fs/fusefs/read.cc b/tests/sys/fs/fusefs/read.cc
--- a/tests/sys/fs/fusefs/read.cc
+++ b/tests/sys/fs/fusefs/read.cc
@@ -95,6 +95,19 @@
}
};
+class AsyncReadNoAttrCache: public Read {
+virtual void SetUp() {
+ m_init_flags = FUSE_ASYNC_READ;
+ Read::SetUp();
+}
+public:
+void expect_lookup(const char *relpath, uint64_t ino)
+{
+ // Don't return size, and set attr_valid=0
+ FuseTest::expect_lookup(relpath, ino, S_IFREG | 0644, 0, 1, 0);
+}
+};
+
class ReadAhead: public Read,
public WithParamInterface<tuple<bool, int>>
{
@@ -351,6 +364,90 @@
leak(fd);
}
+/*
+ * Regression test for a VFS locking bug: as of
+ * 22bb70a6b3bb7799276ab480e40665b7d6e4ce25 (17-December-2024), fusefs did not
+ * obtain an exclusive vnode lock before attempting to clear the attr cache
+ * after an unexpected eof. The vnode lock would already be exclusive except
+ * when FUSE_ASYNC_READ is set.
+ * https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=283391
+ */
+ TEST_F(AsyncRead, eof)
+{
+ const char FULLPATH[] = "mountpoint/some_file.txt";
+ const char RELPATH[] = "some_file.txt";
+ const char *CONTENTS = "abcdefghijklmnop";
+ uint64_t ino = 42;
+ int fd;
+ uint64_t offset = 100;
+ ssize_t bufsize = strlen(CONTENTS);
+ ssize_t partbufsize = 3 * bufsize / 4;
+ ssize_t r;
+ uint8_t buf[bufsize];
+ struct stat sb;
+
+ expect_lookup(RELPATH, ino, offset + bufsize);
+ expect_open(ino, 0, 1);
+ expect_read(ino, 0, offset + bufsize, offset + partbufsize, CONTENTS);
+ expect_getattr(ino, offset + partbufsize);
+
+ fd = open(FULLPATH, O_RDONLY);
+ ASSERT_LE(0, fd) << strerror(errno);
+
+ r = pread(fd, buf, bufsize, offset);
+ ASSERT_LE(0, r) << strerror(errno);
+ EXPECT_EQ(partbufsize, r) << strerror(errno);
+ ASSERT_EQ(0, fstat(fd, &sb));
+ EXPECT_EQ((off_t)(offset + partbufsize), sb.st_size);
+ leak(fd);
+}
+
+/*
+ * If the daemon disables the attribute cache (or if it has expired), then the
+ * kernel must fetch attributes during VOP_READ. If async reads are enabled,
+ * then fuse_internal_cache_attrs will be called without the vnode exclusively
+ * locked. Regression test for
+ * https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=291064
+ */
+TEST_F(AsyncReadNoAttrCache, read)
+{
+ const char FULLPATH[] = "mountpoint/some_file.txt";
+ const char RELPATH[] = "some_file.txt";
+ const char *CONTENTS = "abcdefgh";
+ uint64_t ino = 42;
+ mode_t mode = S_IFREG | 0644;
+ int fd;
+ ssize_t bufsize = strlen(CONTENTS);
+ uint8_t buf[bufsize];
+ Sequence seq;
+
+ expect_lookup(RELPATH, ino);
+ expect_open(ino, 0, 1);
+ EXPECT_CALL(*m_mock, process(
+ ResultOf([=](auto in) {
+ return (in.header.opcode == FUSE_GETATTR &&
+ in.header.nodeid == ino);
+ }, Eq(true)),
+ _)
+ ).WillRepeatedly(Invoke(ReturnImmediate([=](auto i __unused, auto& out)
+ {
+ SET_OUT_HEADER_LEN(out, attr);
+ out.body.attr.attr.ino = ino;
+ out.body.attr.attr.mode = mode;
+ out.body.attr.attr.size = bufsize;
+ out.body.attr.attr_valid = 0;
+ })));
+ expect_read(ino, 0, bufsize, bufsize, CONTENTS);
+
+ fd = open(FULLPATH, O_RDONLY);
+ ASSERT_LE(0, fd) << strerror(errno);
+
+ ASSERT_EQ(bufsize, read(fd, buf, bufsize)) << strerror(errno);
+ ASSERT_EQ(0, memcmp(buf, CONTENTS, bufsize));
+
+ leak(fd);
+}
+
/* The kernel should update the cached atime attribute during a read */
TEST_F(Read, atime)
{
diff --git a/tests/sys/fs/fusefs/rename.cc b/tests/sys/fs/fusefs/rename.cc
--- a/tests/sys/fs/fusefs/rename.cc
+++ b/tests/sys/fs/fusefs/rename.cc
@@ -163,6 +163,96 @@
ASSERT_EQ(0, access(FULLDST, F_OK)) << strerror(errno);
}
+static volatile int stopit = 0;
+
+static void* setattr_th(void* arg) {
+ char *path = (char*)arg;
+
+ while (stopit == 0)
+ chmod(path, 0777);
+ return 0;
+}
+
+/*
+ * Rename restarts the syscall to avoid a LOR
+ *
+ * This test triggers a race: the chmod() calls VOP_SETATTR, which locks its
+ * vnode, but fuse_vnop_rename also tries to lock the same vnode. The result
+ * is that, in order to avoid a LOR, fuse_vnop_rename returns ERELOOKUP to
+ * restart the syscall.
+ *
+ * To verify that the race is hit, watch the fusefs:fusefs:vnops:erelookup
+ * dtrace probe while the test is running. On my system, that probe fires 1 - 5
+ * times per second during this test.
+ */
+TEST_F(Rename, erelookup)
+{
+ const char FULLDST[] = "mountpoint/dstdir/dst";
+ const char RELDSTDIR[] = "dstdir";
+ const char RELDST[] = "dst";
+ const char FULLSRC[] = "mountpoint/src";
+ const char RELSRC[] = "src";
+ pthread_t th0;
+ uint64_t ino = 42;
+ uint64_t dst_dir_ino = 43;
+ uint32_t mode = S_IFDIR | 0644;
+ struct timespec now, timeout;
+
+ EXPECT_LOOKUP(FUSE_ROOT_ID, RELSRC)
+ .WillRepeatedly(Invoke(
+ ReturnImmediate([=](auto i __unused, auto& out) {
+ SET_OUT_HEADER_LEN(out, entry);
+ out.body.entry.attr.mode = mode;
+ out.body.entry.nodeid = ino;
+ out.body.entry.attr.nlink = 2;
+ out.body.entry.attr_valid = UINT64_MAX;
+ out.body.entry.attr.uid = 0;
+ out.body.entry.attr.gid = 0;
+ out.body.entry.entry_valid = UINT64_MAX;
+ }))
+ );
+ EXPECT_LOOKUP(FUSE_ROOT_ID, RELDSTDIR)
+ .WillRepeatedly(Invoke(ReturnImmediate([=](auto in __unused, auto& out)
+ {
+ SET_OUT_HEADER_LEN(out, entry);
+ out.body.entry.nodeid = dst_dir_ino;
+ out.body.entry.entry_valid = UINT64_MAX;
+ out.body.entry.attr_valid = UINT64_MAX;
+ out.body.entry.attr.mode = S_IFDIR | 0755;
+ out.body.entry.attr.ino = dst_dir_ino;
+ out.body.entry.attr.uid = geteuid();
+ out.body.entry.attr.gid = getegid();
+ })));
+ EXPECT_LOOKUP(dst_dir_ino, RELDST)
+ .WillRepeatedly(Invoke(ReturnErrno(ENOENT)));
+ EXPECT_CALL(*m_mock, process(
+ ResultOf([=](auto in) {
+ return (in.header.opcode == FUSE_SETATTR &&
+ in.header.nodeid == ino);
+ }, Eq(true)),
+ _)
+ ).WillRepeatedly(Invoke(ReturnErrno(EIO)));
+ EXPECT_CALL(*m_mock, process(
+ ResultOf([=](auto in) {
+ return (in.header.opcode == FUSE_RENAME);
+ }, Eq(true)),
+ _)
+ ).WillRepeatedly(Invoke(ReturnErrno(EIO)));
+
+ ASSERT_EQ(0, pthread_create(&th0, NULL, setattr_th, (void*)FULLSRC))
+ << strerror(errno);
+
+ ASSERT_EQ(0, clock_gettime(CLOCK_MONOTONIC, &timeout));
+ timeout.tv_sec += 1;
+ do {
+ ASSERT_EQ(-1, rename(FULLSRC, FULLDST));
+ EXPECT_EQ(EIO, errno);
+ ASSERT_EQ(0, clock_gettime(CLOCK_MONOTONIC, &now));
+ } while (timespeccmp(&now, &timeout, <));
+ stopit = 1;
+ pthread_join(th0, NULL);
+}
+
TEST_F(Rename, exdev)
{
const char FULLB[] = "mountpoint/src";
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Sun, Mar 15, 11:35 PM (25 m, 58 s)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
29741606
Default Alt Text
D55230.id171715.diff (31 KB)
Attached To
Mode
D55230: fusefs: redo vnode attribute locking
Attached
Detach File
Event Timeline
Log In to Comment