Page MenuHomeFreeBSD

D55230.id171715.diff
No OneTemporary

D55230.id171715.diff

diff --git a/sys/fs/fuse/fuse_file.c b/sys/fs/fuse/fuse_file.c
--- a/sys/fs/fuse/fuse_file.c
+++ b/sys/fs/fuse/fuse_file.c
@@ -194,6 +194,8 @@
int err = 0;
int op = FUSE_RELEASE;
+ ASSERT_VOP_ELOCKED(vp, __func__);
+
if (fuse_isdeadfs(vp)) {
goto out;
}
@@ -381,7 +383,11 @@
} else {
if ((foo->open_flags & FOPEN_KEEP_CACHE) == 0)
fuse_io_invalbuf(vp, td);
- VTOFUD(vp)->flag &= ~FN_DIRECTIO;
+ /*
+ * XXX Update the flag without the lock for now. See
+ * https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=293088
+ */
+ VTOFUD(vp)->flag &= ~FN_DIRECTIO;
}
}
diff --git a/sys/fs/fuse/fuse_internal.c b/sys/fs/fuse/fuse_internal.c
--- a/sys/fs/fuse/fuse_internal.c
+++ b/sys/fs/fuse/fuse_internal.c
@@ -262,7 +262,7 @@
fvdat = VTOFUD(vp);
data = fuse_get_mpdata(mp);
- ASSERT_VOP_ELOCKED(vp, "fuse_internal_cache_attrs");
+ ASSERT_CACHED_ATTRS_LOCKED(vp);
fuse_validity_2_bintime(attr_valid, attr_valid_nsec,
&fvdat->attr_cache_timeout);
@@ -478,7 +478,9 @@
cn.cn_namelen = fnieo.namelen;
err = cache_lookup(dvp, &vp, &cn, NULL, NULL);
MPASS(err == 0);
+ CACHED_ATTR_LOCK(dvp);
fuse_vnode_clear_attr_cache(dvp);
+ CACHED_ATTR_UNLOCK(dvp);
vput(dvp);
return (0);
}
@@ -498,8 +500,8 @@
if (fniio.ino == FUSE_ROOT_ID)
err = VFS_ROOT(mp, LK_EXCLUSIVE, &vp);
else
- err = fuse_internal_get_cached_vnode(mp, fniio.ino, LK_SHARED,
- &vp);
+ err = fuse_internal_get_cached_vnode(mp, fniio.ino,
+ LK_EXCLUSIVE, &vp);
SDT_PROBE2(fusefs, , internal, invalidate_inode, vp, &fniio);
if (err != 0 || vp == NULL)
return (err);
@@ -694,6 +696,8 @@
nlink_t nlink;
int err = 0;
+ ASSERT_CACHED_ATTRS_LOCKED(vp);
+
fdisp_init(&fdi, cnp->cn_namelen + 1);
fdisp_make_vp(&fdi, op, dvp, curthread, cnp->cn_cred);
@@ -891,15 +895,9 @@
struct fuse_vnode_data *fvdat = VTOFUD(vp);
struct fuse_getattr_in *fgai;
struct fuse_attr_out *fao;
- off_t old_filesize = fvdat->cached_attrs.va_size;
- struct timespec old_atime = fvdat->cached_attrs.va_atime;
- struct timespec old_ctime = fvdat->cached_attrs.va_ctime;
- struct timespec old_mtime = fvdat->cached_attrs.va_mtime;
__enum_uint8(vtype) vtyp;
int err;
- ASSERT_VOP_LOCKED(vp, __func__);
-
fdisp_init(&fdi, sizeof(*fgai));
fdisp_make_vp(&fdi, FUSE_GETATTR, vp, td, cred);
fgai = fdi.indata;
@@ -917,22 +915,27 @@
fao = (struct fuse_attr_out *)fdi.answ;
vtyp = IFTOVT(fao->attr.mode);
+
+ CACHED_ATTR_LOCK(vp);
if (fvdat->flag & FN_SIZECHANGE)
- fao->attr.size = old_filesize;
+ fao->attr.size = fvdat->cached_attrs.va_size;
if (fvdat->flag & FN_ATIMECHANGE) {
- fao->attr.atime = old_atime.tv_sec;
- fao->attr.atimensec = old_atime.tv_nsec;
+ fao->attr.atime = fvdat->cached_attrs.va_atime.tv_sec;
+ fao->attr.atimensec = fvdat->cached_attrs.va_atime.tv_nsec;
}
if (fvdat->flag & FN_CTIMECHANGE) {
- fao->attr.ctime = old_ctime.tv_sec;
- fao->attr.ctimensec = old_ctime.tv_nsec;
+ fao->attr.ctime = fvdat->cached_attrs.va_ctime.tv_sec;
+ fao->attr.ctimensec = fvdat->cached_attrs.va_ctime.tv_nsec;
}
if (fvdat->flag & FN_MTIMECHANGE) {
- fao->attr.mtime = old_mtime.tv_sec;
- fao->attr.mtimensec = old_mtime.tv_nsec;
+ fao->attr.mtime = fvdat->cached_attrs.va_mtime.tv_sec;
+ fao->attr.mtimensec = fvdat->cached_attrs.va_mtime.tv_nsec;
}
+
fuse_internal_cache_attrs(vp, &fao->attr, fao->attr_valid,
fao->attr_valid_nsec, vap, true);
+
+ CACHED_ATTR_UNLOCK(vp);
if (vtyp != vnode_vtype(vp)) {
fuse_internal_vnode_disappear(vp);
err = ENOENT;
@@ -950,10 +953,13 @@
{
struct vattr *attrs;
+ CACHED_ATTR_LOCK(vp);
if ((attrs = VTOVA(vp)) != NULL) {
*vap = *attrs; /* struct copy */
+ CACHED_ATTR_UNLOCK(vp);
return 0;
- }
+ } else
+ CACHED_ATTR_UNLOCK(vp);
return fuse_internal_do_getattr(vp, vap, cred, td);
}
@@ -1140,7 +1146,7 @@
int err = 0;
__enum_uint8(vtype) vtyp;
- ASSERT_VOP_ELOCKED(vp, __func__);
+ ASSERT_CACHED_ATTRS_LOCKED(vp);
mp = vnode_mount(vp);
fvdat = VTOFUD(vp);
diff --git a/sys/fs/fuse/fuse_io.c b/sys/fs/fuse/fuse_io.c
--- a/sys/fs/fuse/fuse_io.c
+++ b/sys/fs/fuse/fuse_io.c
@@ -401,6 +401,7 @@
fuse_warn(data, FSESS_WARN_WROTE_LONG,
"wrote more data than we provided it.");
/* This is bonkers. Clear attr cache. */
+ ASSERT_CACHED_ATTRS_LOCKED(vp);
fvdat->flag &= ~FN_SIZECHANGE;
fuse_vnode_clear_attr_cache(vp);
err = EINVAL;
@@ -416,8 +417,10 @@
fuse_vnode_setsize(vp, as_written_offset, false);
getnanouptime(&fvdat->last_local_modify);
}
- if (as_written_offset - diff >= filesize)
+ if (as_written_offset - diff >= filesize) {
+ ASSERT_CACHED_ATTRS_LOCKED(vp);
fvdat->flag &= ~FN_SIZECHANGE;
+ }
if (diff > 0) {
/* Short write */
@@ -454,8 +457,11 @@
fdisp_destroy(&fdi);
- if (wrote_anything)
+ if (wrote_anything) {
+ CACHED_ATTR_LOCK(vp);
fuse_vnode_undirty_cached_timestamps(vp, false);
+ CACHED_ATTR_UNLOCK(vp);
+ }
vn_rlimit_fsizex_res(uio, r);
return (err);
@@ -556,6 +562,7 @@
err = fuse_vnode_setsize(vp, uio->uio_offset + n, false);
filesize = uio->uio_offset + n;
getnanouptime(&fvdat->last_local_modify);
+ ASSERT_CACHED_ATTRS_LOCKED(vp);
fvdat->flag |= FN_SIZECHANGE;
if (err) {
brelse(bp);
@@ -806,6 +813,7 @@
left = uiop->uio_resid;
bzero((char *)bp->b_data + nread, left);
+ CACHED_ATTR_LOCK(vp);
if ((fvdat->flag & FN_SIZECHANGE) == 0) {
/*
* A short read with no error, when not using
@@ -838,6 +846,7 @@
"Short read of a dirty file");
uiop->uio_resid = 0;
}
+ CACHED_ATTR_UNLOCK(vp);
}
if (error) {
bp->b_ioflags |= BIO_ERROR;
@@ -855,10 +864,18 @@
* anything about it. In particular, we can't invalidate any
* part of the file's buffers because VOP_STRATEGY is called
* with them already locked.
+ *
+ * Normally the vnode should be exclusively locked at this
+ * point. However, if clustered reads are in use, then in a
+ * mixed read-write workload getblkx may need to flush a
+ * partially written buffer to disk during a read. In such a
+ * case, the vnode may only have a shared lock at this point.
*/
+ CACHED_ATTR_LOCK(vp);
filesize = fvdat->cached_attrs.va_size;
/* filesize must've been cached by fuse_vnop_open. */
KASSERT(filesize != VNOVAL, ("filesize should've been cached"));
+ CACHED_ATTR_UNLOCK(vp);
if ((off_t)bp->b_lblkno * biosize + bp->b_dirtyend > filesize)
bp->b_dirtyend = filesize -
diff --git a/sys/fs/fuse/fuse_node.h b/sys/fs/fuse/fuse_node.h
--- a/sys/fs/fuse/fuse_node.h
+++ b/sys/fs/fuse/fuse_node.h
@@ -79,6 +79,11 @@
* cache_attrs.va_size field does not time out.
*/
#define FN_SIZECHANGE 0x00000100
+/*
+ * Whether I/O to this vnode should bypass the cache.
+ * XXX BUG: this should be part of the file handle, not the vnode data.
+ * https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=293088
+ */
#define FN_DIRECTIO 0x00000200
/* Indicates that parent_nid is valid */
#define FN_PARENT_NID 0x00000400
@@ -92,38 +97,76 @@
#define FN_CTIMECHANGE 0x00001000
#define FN_ATIMECHANGE 0x00002000
+#define CACHED_ATTR_LOCK(vp) \
+do { \
+ if (VOP_ISLOCKED(vp) != LK_EXCLUSIVE) \
+ mtx_lock(&VTOFUD(vp)->cached_attr_mtx); \
+} while(0)
+
+#define CACHED_ATTR_UNLOCK(vp) \
+do { \
+ if (VOP_ISLOCKED(vp) != LK_EXCLUSIVE) \
+ mtx_unlock(&VTOFUD(vp)->cached_attr_mtx); \
+} while(0)
+
struct fuse_vnode_data {
- /** self **/
+ /* self's node id, similar to an inode number. Immutable. */
uint64_t nid;
+ /*
+ * Generation number. Distinguishes files with same nid but that don't
+ * overlap in time. Immutable.
+ */
uint64_t generation;
- /** parent **/
+ /* parent's node id. Protected by the vnode lock. */
uint64_t parent_nid;
/** I/O **/
- /* List of file handles for all of the vnode's open file descriptors */
+ /*
+ * List of file handles for all of the vnode's open file descriptors.
+ * Protected by the vnode lock.
+ */
LIST_HEAD(, fuse_filehandle) handles;
- /** flags **/
- uint32_t flag;
+ /* Protects flag, attr_cache_timeout and cached_attrs */
+ struct mtx cached_attr_mtx;
- /** meta **/
- /* The monotonic time after which the attr cache is invalid */
+ /*
+ * The monotonic time after which the attr cache is invalid
+ * Protected by an exclusive vnode lock or the cached_attr_mtx
+ */
struct bintime attr_cache_timeout;
- /*
+
+ /*
* Monotonic time after which the entry is invalid. Used for lookups
- * by nodeid instead of pathname.
+ * by nodeid instead of pathname. Protected by the vnode lock.
*/
struct bintime entry_cache_timeout;
+
/*
* Monotonic time of the last FUSE operation that modified the file
* size. Used to avoid races between mutator ops like VOP_SETATTR and
- * unlocked accessor ops like VOP_LOOKUP.
+ * unlocked accessor ops like VOP_LOOKUP. Protected by the vnode lock.
*/
struct timespec last_local_modify;
+
+ /* Protected by an exclusive vnode lock or the cached_attr_mtx */
struct vattr cached_attrs;
+
+ /* Number of FUSE_LOOKUPs minus FUSE_FORGETs. Protected by vnode lock */
uint64_t nlookup;
+
+ /*
+ * Misc flags. Protected by an exclusive vnode lock or the
+ * cached_attr_mtx, because some of the flags reflect the contents of
+ * cached_attrs.
+ */
+ uint32_t flag;
+
+ /* Vnode type. Immutable */
__enum_uint8(vtype) vtype;
+
+ /* State for clustered writes. Protected by vnode lock */
struct vn_clusterw clusterw;
};
@@ -141,18 +184,29 @@
#define VTOFUD(vp) \
((struct fuse_vnode_data *)((vp)->v_data))
#define VTOI(vp) (VTOFUD(vp)->nid)
+
+#define ASSERT_CACHED_ATTRS_LOCKED(vp) \
+ VNASSERT(VOP_ISLOCKED(vp) == LK_EXCLUSIVE || \
+ mtx_owned(&VTOFUD(vp)->cached_attr_mtx), vp, \
+ ("cached attrs not locked"))
+
static inline bool
fuse_vnode_attr_cache_valid(struct vnode *vp)
{
+ struct fuse_vnode_data *fvdat = VTOFUD(vp);
struct bintime now;
+ ASSERT_CACHED_ATTRS_LOCKED(vp);
+
getbinuptime(&now);
- return (bintime_cmp(&(VTOFUD(vp)->attr_cache_timeout), &now, >));
+ return (bintime_cmp(&fvdat->attr_cache_timeout, &now, >));
}
static inline struct vattr*
VTOVA(struct vnode *vp)
{
+ ASSERT_CACHED_ATTRS_LOCKED(vp);
+
if (fuse_vnode_attr_cache_valid(vp))
return &(VTOFUD(vp)->cached_attrs);
else
@@ -162,6 +216,8 @@
static inline void
fuse_vnode_clear_attr_cache(struct vnode *vp)
{
+ ASSERT_CACHED_ATTRS_LOCKED(vp);
+
bintime_clear(&VTOFUD(vp)->attr_cache_timeout);
}
@@ -184,10 +240,14 @@
fuse_vnode_setparent(struct vnode *vp, struct vnode *dvp)
{
if (dvp != NULL && vp->v_type == VDIR) {
+ ASSERT_VOP_ELOCKED(vp, __func__); /* for parent_nid */
+
MPASS(dvp->v_type == VDIR);
VTOFUD(vp)->parent_nid = VTOI(dvp);
VTOFUD(vp)->flag |= FN_PARENT_NID;
} else {
+ ASSERT_CACHED_ATTRS_LOCKED(vp);
+
VTOFUD(vp)->flag &= ~FN_PARENT_NID;
}
}
diff --git a/sys/fs/fuse/fuse_node.c b/sys/fs/fuse/fuse_node.c
--- a/sys/fs/fuse/fuse_node.c
+++ b/sys/fs/fuse/fuse_node.c
@@ -157,6 +157,8 @@
fvdat->nid = nodeid;
LIST_INIT(&fvdat->handles);
+ mtx_init(&fvdat->cached_attr_mtx, "fuse attr cache mutex", NULL,
+ MTX_DEF);
vattr_null(&fvdat->cached_attrs);
fvdat->cached_attrs.va_birthtime.tv_sec = -1;
fvdat->cached_attrs.va_birthtime.tv_nsec = 0;
@@ -181,6 +183,7 @@
struct fuse_vnode_data *fvdat = vp->v_data;
vp->v_data = NULL;
+ mtx_destroy(&fvdat->cached_attr_mtx);
KASSERT(LIST_EMPTY(&fvdat->handles),
("Destroying fuse vnode with open files!"));
free(fvdat, M_FUSEVN);
@@ -386,7 +389,8 @@
struct fuse_setattr_in *fsai;
int err = 0;
- ASSERT_VOP_ELOCKED(vp, "fuse_io_extend");
+ ASSERT_VOP_ELOCKED(vp, __func__); /* For flag and last_local_modify */
+ ASSERT_CACHED_ATTRS_LOCKED(vp);
if (fuse_isdeadfs(vp)) {
return EBADF;
@@ -425,6 +429,8 @@
return err;
}
+SDT_PROBE_DEFINE3(fusefs, , node, setsize_lockupgrade, "struct vnode*", "off_t",
+ "bool");
/*
* Adjust the vnode's size to a new value.
*
@@ -441,8 +447,26 @@
size_t iosize;
struct buf *bp = NULL;
int err = 0;
+ bool downgrade = false;
- ASSERT_VOP_ELOCKED(vp, "fuse_vnode_setsize");
+ ASSERT_VOP_LOCKED(vp, __func__);
+ if (VOP_ISLOCKED(vp) != LK_EXCLUSIVE) {
+ SDT_PROBE3(fusefs, , node, setsize_lockupgrade, vp, newsize,
+ from_server);
+ /*
+ * vnode must be exclusively locked for vtruncbuf and
+ * vnode_pager_setsize.
+ */
+ vn_lock(vp, LK_UPGRADE | LK_RETRY);
+ if (VN_IS_DOOMED(vp)) {
+ /*
+ * Probably a forced unmount while vn_lock dropped the
+ * shared vnode lock during the upgrade.
+ */
+ return (ENOENT);
+ }
+ downgrade = true;
+ }
iosize = fuse_iosize(vp);
oldsize = fvdat->cached_attrs.va_size;
@@ -488,6 +512,10 @@
if (bp)
brelse(bp);
vnode_pager_setsize(vp, newsize);
+
+ if (downgrade)
+ vn_lock(vp, LK_DOWNGRADE);
+
return err;
}
@@ -497,15 +525,28 @@
struct thread *td)
{
struct fuse_vnode_data *fvdat = VTOFUD(vp);
+ struct vattr va;
int error = 0;
+ ASSERT_VOP_LOCKED(vp, __func__);
+
+ CACHED_ATTR_LOCK(vp);
if (!(fvdat->flag & FN_SIZECHANGE) &&
(!fuse_vnode_attr_cache_valid(vp) ||
- fvdat->cached_attrs.va_size == VNOVAL))
- error = fuse_internal_do_getattr(vp, NULL, cred, td);
-
- if (!error)
+ fvdat->cached_attrs.va_size == VNOVAL)) {
+ CACHED_ATTR_UNLOCK(vp);
+ /*
+ * It incurs a large struct copy, but we supply &va so we don't
+ * have to acquire the lock a second time after
+ * fuse_internal_do_getattr returns.
+ */
+ error = fuse_internal_do_getattr(vp, &va, cred, td);
+ if (!error)
+ *filesize = va.va_size;
+ } else {
*filesize = fvdat->cached_attrs.va_size;
+ CACHED_ATTR_UNLOCK(vp);
+ }
return error;
}
@@ -515,6 +556,8 @@
{
struct fuse_vnode_data *fvdat = VTOFUD(vp);
+ ASSERT_CACHED_ATTRS_LOCKED(vp);
+
fvdat->flag &= ~(FN_MTIMECHANGE | FN_CTIMECHANGE);
if (atime)
fvdat->flag &= ~FN_ATIMECHANGE;
@@ -537,6 +580,8 @@
if (mp->mnt_flag & MNT_NOATIME)
flags &= ~FN_ATIMECHANGE;
+ CACHED_ATTR_LOCK(vp);
+
if (flags & FN_ATIMECHANGE)
fvdat->cached_attrs.va_atime = ts;
if (flags & FN_MTIMECHANGE)
@@ -545,6 +590,8 @@
fvdat->cached_attrs.va_ctime = ts;
fvdat->flag |= flags;
+
+ CACHED_ATTR_UNLOCK(vp);
}
void
diff --git a/sys/fs/fuse/fuse_vfsops.c b/sys/fs/fuse/fuse_vfsops.c
--- a/sys/fs/fuse/fuse_vfsops.c
+++ b/sys/fs/fuse/fuse_vfsops.c
@@ -601,6 +601,8 @@
error = fuse_vnode_get(mp, feo, nodeid, NULL, vpp, NULL, vtyp);
if (error)
goto out;
+ /* for last_local_modify and fuse_internal_cache_attrs */
+ ASSERT_VOP_ELOCKED(*vpp, __func__);
fvdat = VTOFUD(*vpp);
if (timespeccmp(&now, &fvdat->last_local_modify, >)) {
diff --git a/sys/fs/fuse/fuse_vnops.c b/sys/fs/fuse/fuse_vnops.c
--- a/sys/fs/fuse/fuse_vnops.c
+++ b/sys/fs/fuse/fuse_vnops.c
@@ -707,6 +707,8 @@
return (EXTERROR(EOPNOTSUPP, "This server does not implement "
"FUSE_FALLOCATE"));
+ ASSERT_CACHED_ATTRS_LOCKED(vp);
+
io.uio_offset = *offset;
io.uio_resid = *len;
err = vn_rlimit_fsize(vp, &io, curthread);
@@ -779,7 +781,7 @@
struct fuse_data *data;
struct fuse_vnode_data *fvdat = VTOFUD(vp);
uint64_t biosize;
- off_t fsize;
+ off_t fsize = VNOVAL;
daddr_t lbn = ap->a_bn;
daddr_t *pbn = ap->a_bnp;
int *runp = ap->a_runp;
@@ -822,9 +824,10 @@
* and the risk of getting it wrong is not worth the cost of
* another upcall.
*/
- if (fvdat->cached_attrs.va_size != VNOVAL)
- fsize = fvdat->cached_attrs.va_size;
- else
+ CACHED_ATTR_LOCK(vp);
+ fsize = fvdat->cached_attrs.va_size;
+ CACHED_ATTR_UNLOCK(vp);
+ if (fsize == VNOVAL)
error = fuse_vnode_size(vp, &fsize, td->td_ucred, td);
if (error == 0)
*runp = MIN(MAX(0, fsize / (off_t)biosize - lbn - 1),
@@ -894,6 +897,7 @@
cred = td->td_ucred;
err = fuse_flush(vp, cred, pid, fflag);
+ ASSERT_CACHED_ATTRS_LOCKED(vp); /* For fvdat->flag */
if (err == 0 && (fvdat->flag & FN_ATIMECHANGE) && !vfs_isrdonly(mp)) {
struct vattr vap;
struct fuse_data *data;
@@ -911,6 +915,7 @@
}
if (access_e == 0) {
VATTR_NULL(&vap);
+ ASSERT_CACHED_ATTRS_LOCKED(vp);
vap.va_atime = fvdat->cached_attrs.va_atime;
/*
* Ignore errors setting when setting atime. That
@@ -1035,6 +1040,7 @@
*ap->a_inoffp += fwo->size;
*ap->a_outoffp += fwo->size;
fuse_internal_clear_suid_on_write(outvp, outcred, td);
+ ASSERT_CACHED_ATTRS_LOCKED(outvp);
if (*ap->a_outoffp > outfvdat->cached_attrs.va_size) {
fuse_vnode_setsize(outvp, *ap->a_outoffp, false);
getnanouptime(&outfvdat->last_local_modify);
@@ -1337,6 +1343,7 @@
int need_flush = 1;
+ ASSERT_CACHED_ATTRS_LOCKED(vp); /* For fvdat->flag */
LIST_FOREACH_SAFE(fufh, &fvdat->handles, next, fufh_tmp) {
if (need_flush && vp->v_type == VREG) {
if ((VTOFUD(vp)->flag & FN_SIZECHANGE) != 0) {
@@ -1557,6 +1564,7 @@
else if ((err = fuse_internal_access(dvp, VEXEC, td, cred)))
return err;
+ ASSERT_CACHED_ATTRS_LOCKED(dvp); /* For flag */
is_dot = cnp->cn_namelen == 1 && *(cnp->cn_nameptr) == '.';
if (isdotdot && !(data->dataflags & FSESS_EXPORT_SUPPORT)) {
if (!(VTOFUD(dvp)->flag & FN_PARENT_NID)) {
@@ -1960,6 +1968,10 @@
"to be closed"));
}
+ /*
+ * XXX Check this flag without the lock. See
+ * https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=293088
+ */
if (VTOFUD(vp)->flag & FN_DIRECTIO) {
ioflag |= IO_DIRECT;
}
@@ -2220,6 +2232,8 @@
return err;
}
+SDT_PROBE_DEFINE4(fusefs, , vnops, erelookup, "struct vnode*",
+ "struct vnode*", "struct vnode*", "struct vnode*");
/*
struct vnop_rename_args {
struct vnode *a_fdvp;
@@ -2242,6 +2256,7 @@
struct fuse_data *data;
bool newparent = fdvp != tdvp;
bool isdir = fvp->v_type == VDIR;
+ int locktype;
int err = 0;
if (fuse_isdeadfs(fdvp)) {
@@ -2266,12 +2281,34 @@
* have write permission to it, so ".." can be modified.
*/
data = fuse_get_mpdata(vnode_mount(tdvp));
+
+ if (tdvp != fdvp)
+ locktype = LK_EXCLUSIVE; /* for fuse_vnode_setparent */
+ else
+ locktype = LK_SHARED;
+
+ /*
+ * Must use LK_NOWAIT to prevent LORs between fvp and tdvp or
+ * tvp
+ */
+ if (vn_lock(fvp, locktype | LK_NOWAIT) != 0) {
+ /*
+ * Can't release tdvp or tvp to try avoiding the LOR.
+ * Must return instead.
+ */
+ SDT_PROBE4(fusefs, , vnops, erelookup, fdvp, fvp, tdvp,
+ tvp);
+ err = ERELOOKUP;
+ goto out;
+ }
+
if (data->dataflags & FSESS_DEFAULT_PERMISSIONS && isdir && newparent) {
err = fuse_internal_access(fvp, VWRITE,
curthread, tcnp->cn_cred);
if (err)
- goto out;
+ goto unlock;
}
+
sx_xlock(&data->rename_lock);
err = fuse_internal_rename(fdvp, fcnp, tdvp, tcnp);
if (err == 0) {
@@ -2291,6 +2328,8 @@
}
cache_purge(fdvp);
}
+unlock:
+ VOP_UNLOCK(fvp);
out:
if (tdvp == tvp) {
vrele(tdvp);
@@ -2566,6 +2605,7 @@
fuse_vnop_write(struct vop_write_args *ap)
{
struct vnode *vp = ap->a_vp;
+ struct fuse_vnode_data *fvdat = VTOFUD(vp);
struct uio *uio = ap->a_uio;
int ioflag = ap->a_ioflag;
struct ucred *cred = ap->a_cred;
@@ -2581,9 +2621,12 @@
"to be closed"));
}
- if (VTOFUD(vp)->flag & FN_DIRECTIO) {
+ /*
+ * XXX Check this flag without the lock. See
+ * https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=293088
+ */
+ if (fvdat->flag & FN_DIRECTIO)
ioflag |= IO_DIRECT;
- }
err = fuse_filehandle_getrw(vp, FWRITE, &fufh, cred, pid);
if (err == EBADF && vnode_mount(vp)->mnt_flag & MNT_EXPORTED) {
diff --git a/tests/sys/fs/fusefs/bmap.cc b/tests/sys/fs/fusefs/bmap.cc
--- a/tests/sys/fs/fusefs/bmap.cc
+++ b/tests/sys/fs/fusefs/bmap.cc
@@ -44,10 +44,13 @@
const static char FULLPATH[] = "mountpoint/foo";
const static char RELPATH[] = "foo";
-class Bmap: public FuseTest {
+class Bmap: public FuseTest,
+ public WithParamInterface<tuple<int, int>>
+{
public:
virtual void SetUp() {
m_maxreadahead = UINT32_MAX;
+ m_init_flags |= get<0>(GetParam());
FuseTest::SetUp();
}
void expect_bmap(uint64_t ino, uint64_t lbn, uint32_t blocksize, uint64_t pbn)
@@ -73,12 +76,12 @@
}
};
-class BmapEof: public Bmap, public WithParamInterface<int> {};
+class BmapEof: public Bmap {};
/*
* Test FUSE_BMAP
*/
-TEST_F(Bmap, bmap)
+TEST_P(Bmap, bmap)
{
struct fiobmap2_arg arg;
/*
@@ -124,7 +127,7 @@
* If the daemon does not implement VOP_BMAP, fusefs should return sensible
* defaults.
*/
-TEST_F(Bmap, default_)
+TEST_P(Bmap, default_)
{
struct fiobmap2_arg arg;
const off_t filesize = 1 << 30;
@@ -181,7 +184,7 @@
* The server returns an error for some reason for FUSE_BMAP. fusefs should
* faithfully report that error up to the caller.
*/
-TEST_F(Bmap, einval)
+TEST_P(Bmap, einval)
{
struct fiobmap2_arg arg;
const off_t filesize = 1 << 30;
@@ -217,7 +220,7 @@
* https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=264196 . The bug did not
* lie in fusefs, but this is a convenient place for a regression test.
*/
-TEST_F(Bmap, spurious_einval)
+TEST_P(Bmap, spurious_einval)
{
const off_t filesize = 4ull << 30;
const ino_t ino = 42;
@@ -288,7 +291,7 @@
int fd;
int ngetattrs;
- ngetattrs = GetParam();
+ ngetattrs = get<1>(GetParam());
FuseTest::expect_lookup(RELPATH, ino, mode, filesize, 1, 0);
expect_open(ino, 0, 1);
// Depending on ngetattrs, FUSE_READ could be called with either
@@ -348,6 +351,17 @@
leak(fd);
}
-INSTANTIATE_TEST_SUITE_P(BE, BmapEof,
- Values(1, 2, 3)
-);
+/*
+ * Try with and without async reads, because it affects the type of vnode lock
+ * on entry to fuse_vnop_bmap.
+ */
+INSTANTIATE_TEST_SUITE_P(B, Bmap, Values(
+ tuple(0, 0),
+ tuple(FUSE_ASYNC_READ, 0)
+));
+
+INSTANTIATE_TEST_SUITE_P(BE, BmapEof, Values(
+ tuple(0, 1),
+ tuple(0, 2),
+ tuple(0, 3)
+));
diff --git a/tests/sys/fs/fusefs/notify.cc b/tests/sys/fs/fusefs/notify.cc
--- a/tests/sys/fs/fusefs/notify.cc
+++ b/tests/sys/fs/fusefs/notify.cc
@@ -47,8 +47,15 @@
* invalidation. This file tests our client's handling of those messages.
*/
-class Notify: public FuseTest {
+class Notify: public FuseTest,
+ public WithParamInterface<int>
+{
public:
+virtual void SetUp() {
+ m_init_flags |= GetParam();
+ FuseTest::SetUp();
+}
+
/* Ignore an optional FUSE_FSYNC */
void maybe_expect_fsync(uint64_t ino)
{
@@ -154,7 +161,7 @@
}
/* Invalidate a nonexistent entry */
-TEST_F(Notify, inval_entry_nonexistent)
+TEST_P(Notify, inval_entry_nonexistent)
{
const static char *name = "foo";
struct inval_entry_args iea;
@@ -173,7 +180,7 @@
}
/* Invalidate a cached entry */
-TEST_F(Notify, inval_entry)
+TEST_P(Notify, inval_entry)
{
const static char FULLPATH[] = "mountpoint/foo";
const static char RELPATH[] = "foo";
@@ -211,7 +218,7 @@
* Invalidate a cached entry beneath the root, which uses a slightly different
* code path.
*/
-TEST_F(Notify, inval_entry_below_root)
+TEST_P(Notify, inval_entry_below_root)
{
const static char FULLPATH[] = "mountpoint/some_dir/foo";
const static char DNAME[] = "some_dir";
@@ -258,7 +265,7 @@
}
/* Invalidating an entry invalidates the parent directory's attributes */
-TEST_F(Notify, inval_entry_invalidates_parent_attrs)
+TEST_P(Notify, inval_entry_invalidates_parent_attrs)
{
const static char FULLPATH[] = "mountpoint/foo";
const static char RELPATH[] = "foo";
@@ -302,7 +309,7 @@
}
-TEST_F(Notify, inval_inode_nonexistent)
+TEST_P(Notify, inval_inode_nonexistent)
{
struct inval_inode_args iia;
ino_t ino = 42;
@@ -320,7 +327,7 @@
EXPECT_EQ(0, (intptr_t)thr0_value);
}
-TEST_F(Notify, inval_inode_with_clean_cache)
+TEST_P(Notify, inval_inode_with_clean_cache)
{
const static char FULLPATH[] = "mountpoint/foo";
const static char RELPATH[] = "foo";
@@ -390,7 +397,7 @@
* nothing bad should happen.
* https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=290519
*/
-TEST_F(Notify, notify_after_unmount)
+TEST_P(Notify, notify_after_unmount)
{
const static char *name = "foo";
struct inval_entry_args iea;
@@ -408,7 +415,7 @@
/* FUSE_NOTIFY_STORE with a file that's not in the entry cache */
/* disabled because FUSE_NOTIFY_STORE is not yet implemented */
-TEST_F(Notify, DISABLED_store_nonexistent)
+TEST_P(Notify, DISABLED_store_nonexistent)
{
struct store_args sa;
ino_t ino = 42;
@@ -427,7 +434,7 @@
/* Store data into for a file that does not yet have anything cached */
/* disabled because FUSE_NOTIFY_STORE is not yet implemented */
-TEST_F(Notify, DISABLED_store_with_blank_cache)
+TEST_P(Notify, DISABLED_store_with_blank_cache)
{
const static char FULLPATH[] = "mountpoint/foo";
const static char RELPATH[] = "foo";
@@ -465,7 +472,7 @@
leak(fd);
}
-TEST_F(NotifyWriteback, inval_inode_with_dirty_cache)
+TEST_P(NotifyWriteback, inval_inode_with_dirty_cache)
{
const static char FULLPATH[] = "mountpoint/foo";
const static char RELPATH[] = "foo";
@@ -506,7 +513,7 @@
leak(fd);
}
-TEST_F(NotifyWriteback, inval_inode_attrs_only)
+TEST_P(NotifyWriteback, inval_inode_attrs_only)
{
const static char FULLPATH[] = "mountpoint/foo";
const static char RELPATH[] = "foo";
@@ -600,3 +607,10 @@
EXPECT_EQ(ENODEV, errno);
delete out;
}
+
+/*
+ * Try with and without async reads, because it affects the type of vnode lock
+ * acquired in fuse_internal_invalidate_entry.
+ */
+INSTANTIATE_TEST_SUITE_P(N, Notify, Values(0, FUSE_ASYNC_READ));
+INSTANTIATE_TEST_SUITE_P(N, NotifyWriteback, Values(0, FUSE_ASYNC_READ));
diff --git a/tests/sys/fs/fusefs/read.cc b/tests/sys/fs/fusefs/read.cc
--- a/tests/sys/fs/fusefs/read.cc
+++ b/tests/sys/fs/fusefs/read.cc
@@ -95,6 +95,19 @@
}
};
+class AsyncReadNoAttrCache: public Read {
+virtual void SetUp() {
+ m_init_flags = FUSE_ASYNC_READ;
+ Read::SetUp();
+}
+public:
+void expect_lookup(const char *relpath, uint64_t ino)
+{
+ // Don't return size, and set attr_valid=0
+ FuseTest::expect_lookup(relpath, ino, S_IFREG | 0644, 0, 1, 0);
+}
+};
+
class ReadAhead: public Read,
public WithParamInterface<tuple<bool, int>>
{
@@ -351,6 +364,90 @@
leak(fd);
}
+/*
+ * Regression test for a VFS locking bug: as of
+ * 22bb70a6b3bb7799276ab480e40665b7d6e4ce25 (17-December-2024), fusefs did not
+ * obtain an exclusive vnode lock before attempting to clear the attr cache
+ * after an unexpected eof. The vnode lock would already be exclusive except
+ * when FUSE_ASYNC_READ is set.
+ * https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=283391
+ */
+ TEST_F(AsyncRead, eof)
+{
+ const char FULLPATH[] = "mountpoint/some_file.txt";
+ const char RELPATH[] = "some_file.txt";
+ const char *CONTENTS = "abcdefghijklmnop";
+ uint64_t ino = 42;
+ int fd;
+ uint64_t offset = 100;
+ ssize_t bufsize = strlen(CONTENTS);
+ ssize_t partbufsize = 3 * bufsize / 4;
+ ssize_t r;
+ uint8_t buf[bufsize];
+ struct stat sb;
+
+ expect_lookup(RELPATH, ino, offset + bufsize);
+ expect_open(ino, 0, 1);
+ expect_read(ino, 0, offset + bufsize, offset + partbufsize, CONTENTS);
+ expect_getattr(ino, offset + partbufsize);
+
+ fd = open(FULLPATH, O_RDONLY);
+ ASSERT_LE(0, fd) << strerror(errno);
+
+ r = pread(fd, buf, bufsize, offset);
+ ASSERT_LE(0, r) << strerror(errno);
+ EXPECT_EQ(partbufsize, r) << strerror(errno);
+ ASSERT_EQ(0, fstat(fd, &sb));
+ EXPECT_EQ((off_t)(offset + partbufsize), sb.st_size);
+ leak(fd);
+}
+
+/*
+ * If the daemon disables the attribute cache (or if it has expired), then the
+ * kernel must fetch attributes during VOP_READ. If async reads are enabled,
+ * then fuse_internal_cache_attrs will be called without the vnode exclusively
+ * locked. Regression test for
+ * https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=291064
+ */
+TEST_F(AsyncReadNoAttrCache, read)
+{
+ const char FULLPATH[] = "mountpoint/some_file.txt";
+ const char RELPATH[] = "some_file.txt";
+ const char *CONTENTS = "abcdefgh";
+ uint64_t ino = 42;
+ mode_t mode = S_IFREG | 0644;
+ int fd;
+ ssize_t bufsize = strlen(CONTENTS);
+ uint8_t buf[bufsize];
+ Sequence seq;
+
+ expect_lookup(RELPATH, ino);
+ expect_open(ino, 0, 1);
+ EXPECT_CALL(*m_mock, process(
+ ResultOf([=](auto in) {
+ return (in.header.opcode == FUSE_GETATTR &&
+ in.header.nodeid == ino);
+ }, Eq(true)),
+ _)
+ ).WillRepeatedly(Invoke(ReturnImmediate([=](auto i __unused, auto& out)
+ {
+ SET_OUT_HEADER_LEN(out, attr);
+ out.body.attr.attr.ino = ino;
+ out.body.attr.attr.mode = mode;
+ out.body.attr.attr.size = bufsize;
+ out.body.attr.attr_valid = 0;
+ })));
+ expect_read(ino, 0, bufsize, bufsize, CONTENTS);
+
+ fd = open(FULLPATH, O_RDONLY);
+ ASSERT_LE(0, fd) << strerror(errno);
+
+ ASSERT_EQ(bufsize, read(fd, buf, bufsize)) << strerror(errno);
+ ASSERT_EQ(0, memcmp(buf, CONTENTS, bufsize));
+
+ leak(fd);
+}
+
/* The kernel should update the cached atime attribute during a read */
TEST_F(Read, atime)
{
diff --git a/tests/sys/fs/fusefs/rename.cc b/tests/sys/fs/fusefs/rename.cc
--- a/tests/sys/fs/fusefs/rename.cc
+++ b/tests/sys/fs/fusefs/rename.cc
@@ -163,6 +163,96 @@
ASSERT_EQ(0, access(FULLDST, F_OK)) << strerror(errno);
}
+static volatile int stopit = 0;
+
+static void* setattr_th(void* arg) {
+ char *path = (char*)arg;
+
+ while (stopit == 0)
+ chmod(path, 0777);
+ return 0;
+}
+
+/*
+ * Rename restarts the syscall to avoid a LOR
+ *
+ * This test triggers a race: the chmod() calls VOP_SETATTR, which locks its
+ * vnode, but fuse_vnop_rename also tries to lock the same vnode. The result
+ * is that, in order to avoid a LOR, fuse_vnop_rename returns ERELOOKUP to
+ * restart the syscall.
+ *
+ * To verify that the race is hit, watch the fusefs:fusefs:vnops:erelookup
+ * dtrace probe while the test is running. On my system, that probe fires 1 - 5
+ * times per second during this test.
+ */
+TEST_F(Rename, erelookup)
+{
+ const char FULLDST[] = "mountpoint/dstdir/dst";
+ const char RELDSTDIR[] = "dstdir";
+ const char RELDST[] = "dst";
+ const char FULLSRC[] = "mountpoint/src";
+ const char RELSRC[] = "src";
+ pthread_t th0;
+ uint64_t ino = 42;
+ uint64_t dst_dir_ino = 43;
+ uint32_t mode = S_IFDIR | 0644;
+ struct timespec now, timeout;
+
+ EXPECT_LOOKUP(FUSE_ROOT_ID, RELSRC)
+ .WillRepeatedly(Invoke(
+ ReturnImmediate([=](auto i __unused, auto& out) {
+ SET_OUT_HEADER_LEN(out, entry);
+ out.body.entry.attr.mode = mode;
+ out.body.entry.nodeid = ino;
+ out.body.entry.attr.nlink = 2;
+ out.body.entry.attr_valid = UINT64_MAX;
+ out.body.entry.attr.uid = 0;
+ out.body.entry.attr.gid = 0;
+ out.body.entry.entry_valid = UINT64_MAX;
+ }))
+ );
+ EXPECT_LOOKUP(FUSE_ROOT_ID, RELDSTDIR)
+ .WillRepeatedly(Invoke(ReturnImmediate([=](auto in __unused, auto& out)
+ {
+ SET_OUT_HEADER_LEN(out, entry);
+ out.body.entry.nodeid = dst_dir_ino;
+ out.body.entry.entry_valid = UINT64_MAX;
+ out.body.entry.attr_valid = UINT64_MAX;
+ out.body.entry.attr.mode = S_IFDIR | 0755;
+ out.body.entry.attr.ino = dst_dir_ino;
+ out.body.entry.attr.uid = geteuid();
+ out.body.entry.attr.gid = getegid();
+ })));
+ EXPECT_LOOKUP(dst_dir_ino, RELDST)
+ .WillRepeatedly(Invoke(ReturnErrno(ENOENT)));
+ EXPECT_CALL(*m_mock, process(
+ ResultOf([=](auto in) {
+ return (in.header.opcode == FUSE_SETATTR &&
+ in.header.nodeid == ino);
+ }, Eq(true)),
+ _)
+ ).WillRepeatedly(Invoke(ReturnErrno(EIO)));
+ EXPECT_CALL(*m_mock, process(
+ ResultOf([=](auto in) {
+ return (in.header.opcode == FUSE_RENAME);
+ }, Eq(true)),
+ _)
+ ).WillRepeatedly(Invoke(ReturnErrno(EIO)));
+
+ ASSERT_EQ(0, pthread_create(&th0, NULL, setattr_th, (void*)FULLSRC))
+ << strerror(errno);
+
+ ASSERT_EQ(0, clock_gettime(CLOCK_MONOTONIC, &timeout));
+ timeout.tv_sec += 1;
+ do {
+ ASSERT_EQ(-1, rename(FULLSRC, FULLDST));
+ EXPECT_EQ(EIO, errno);
+ ASSERT_EQ(0, clock_gettime(CLOCK_MONOTONIC, &now));
+ } while (timespeccmp(&now, &timeout, <));
+ stopit = 1;
+ pthread_join(th0, NULL);
+}
+
TEST_F(Rename, exdev)
{
const char FULLB[] = "mountpoint/src";

File Metadata

Mime Type
text/plain
Expires
Sun, Mar 15, 11:35 PM (25 m, 58 s)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
29741606
Default Alt Text
D55230.id171715.diff (31 KB)

Event Timeline