Page MenuHomeFreeBSD

D27946.id81602.diff
No OneTemporary

D27946.id81602.diff

Index: sys/fs/fuse/fuse_file.c
===================================================================
--- sys/fs/fuse/fuse_file.c
+++ sys/fs/fuse/fuse_file.c
@@ -171,10 +171,11 @@
{
struct fuse_dispatcher fdi;
struct fuse_release_in *fri;
-
int err = 0;
int op = FUSE_RELEASE;
+ ASSERT_VOP_ELOCKED(vp, __func__);
+
if (fuse_isdeadfs(vp)) {
goto out;
}
@@ -218,6 +219,8 @@
struct fuse_filehandle *fufh;
fufh_type_t fufh_type = fflags_2_fufh_type(mode);
+ ASSERT_VOP_LOCKED(vp, __func__);
+
/*
* Unlike fuse_filehandle_get, we want to search for a filehandle with
* the exact cred, and no fallback
@@ -253,6 +256,8 @@
struct fuse_filehandle *fufh;
fufh_type_t fufh_type;
+ ASSERT_VOP_LOCKED(vp, __func__);
+
fufh_type = fflags_2_fufh_type(fflag);
/* cred can be NULL for in-kernel clients */
if (cred == NULL)
@@ -290,6 +295,8 @@
struct fuse_vnode_data *fvdat = VTOFUD(vp);
struct fuse_filehandle *fufh;
+ ASSERT_VOP_LOCKED(vp, __func__);
+
if (cred == NULL)
goto fallback;
@@ -333,6 +340,8 @@
struct fuse_vnode_data *fvdat = VTOFUD(vp);
struct fuse_filehandle *fufh;
+ ASSERT_VOP_ELOCKED(vp, __func__);
+
fufh = malloc(sizeof(struct fuse_filehandle), M_FUSE_FILEHANDLE,
M_WAITOK);
MPASS(fufh != NULL);
@@ -352,7 +361,6 @@
counter_u64_add(fuse_fh_count, 1);
if (foo->open_flags & FOPEN_DIRECT_IO) {
- ASSERT_VOP_ELOCKED(vp, __func__);
VTOFUD(vp)->flag |= FN_DIRECTIO;
fuse_io_invalbuf(vp, td);
} else {
Index: sys/fs/fuse/fuse_internal.c
===================================================================
--- sys/fs/fuse/fuse_internal.c
+++ sys/fs/fuse/fuse_internal.c
@@ -337,6 +337,8 @@
int op = FUSE_FSYNC;
int err = 0;
+ ASSERT_VOP_LOCKED(vp, __func__);
+
if (fsess_not_impl(vnode_mount(vp),
(vnode_vtype(vp) == VDIR ? FUSE_FSYNCDIR : FUSE_FSYNC))) {
return 0;
@@ -411,10 +413,10 @@
return (0);
if (fnieo.parent == FUSE_ROOT_ID)
- err = VFS_ROOT(mp, LK_SHARED, &dvp);
+ err = VFS_ROOT(mp, LK_EXCLUSIVE, &dvp);
else
err = fuse_internal_get_cached_vnode( mp, fnieo.parent,
- LK_SHARED, &dvp);
+ LK_EXCLUSIVE, &dvp);
SDT_PROBE3(fusefs, , internal, invalidate_entry, dvp, &fnieo, name);
/*
* If dvp is not in the cache, then it must've been reclaimed. And
@@ -459,8 +461,8 @@
if (fniio.ino == FUSE_ROOT_ID)
err = VFS_ROOT(mp, LK_EXCLUSIVE, &vp);
else
- err = fuse_internal_get_cached_vnode(mp, fniio.ino, LK_SHARED,
- &vp);
+ err = fuse_internal_get_cached_vnode(mp, fniio.ino,
+ LK_EXCLUSIVE, &vp);
SDT_PROBE2(fusefs, , internal, invalidate_inode, vp, &fniio);
if (err != 0 || vp == NULL)
return (err);
@@ -702,6 +704,7 @@
* It will linger around until vnlru reclaims it, costing a bit of
* temporary memory.
*/
+ ASSERT_VOP_ELOCKED(vp, __func__);
nlink = VTOFUD(vp)->cached_attrs.va_nlink--;
/*
@@ -887,6 +890,8 @@
enum vtype vtyp;
int err;
+ ASSERT_VOP_LOCKED(vp, __func__);
+
fdisp_init(&fdi, sizeof(*fgai));
fdisp_make_vp(&fdi, FUSE_GETATTR, vp, td, cred);
fgai = fdi.indata;
@@ -932,8 +937,16 @@
int iosize = fuse_iosize(vp);
v_inval_buf_range(vp, 0, INT64_MAX, iosize);
}
- fuse_internal_cache_attrs(vp, &fao->attr, fao->attr_valid,
- fao->attr_valid_nsec, vap);
+ if (VOP_ISLOCKED(vp) == LK_EXCLUSIVE) {
+ fuse_internal_cache_attrs(vp, &fao->attr, fao->attr_valid,
+ fao->attr_valid_nsec, vap);
+ } else if (0 == vn_lock(vp, LK_UPGRADE | LK_NOWAIT)) {
+ fuse_internal_cache_attrs(vp, &fao->attr, fao->attr_valid,
+ fao->attr_valid_nsec, vap);
+ vn_lock(vp, LK_DOWNGRADE);
+ } else {
+ /* We can't update the cache. Too bad. */
+ }
if (vtyp != vnode_vtype(vp)) {
fuse_internal_vnode_disappear(vp);
err = ENOENT;
@@ -1135,6 +1148,8 @@
int sizechanged = -1;
uint64_t newsize = 0;
+ ASSERT_VOP_LOCKED(vp, __func__);
+
mp = vnode_mount(vp);
fvdat = VTOFUD(vp);
data = fuse_get_mpdata(mp);
Index: sys/fs/fuse/fuse_io.c
===================================================================
--- sys/fs/fuse/fuse_io.c
+++ sys/fs/fuse/fuse_io.c
@@ -982,7 +982,16 @@
*/
SDT_PROBE2(fusefs, , io, trace, 1,
"Short read of a clean file");
- fuse_vnode_clear_attr_cache(vp);
+ if (VOP_ISLOCKED(vp) == LK_EXCLUSIVE) {
+ fuse_vnode_clear_attr_cache(vp);
+ } else if (0 ==
+ vn_lock(vp, LK_UPGRADE | LK_NOWAIT))\
+ {
+ fuse_vnode_clear_attr_cache(vp);
+ vn_lock(vp, LK_DOWNGRADE);
+ } else {
+ /* We can't clear the attr cache. */
+ }
} else {
/*
* If dirty writes _are_ cached beyond EOF,
Index: sys/fs/fuse/fuse_node.h
===================================================================
--- sys/fs/fuse/fuse_node.h
+++ sys/fs/fuse/fuse_node.h
@@ -101,7 +101,10 @@
uint64_t parent_nid;
/** I/O **/
- /* List of file handles for all of the vnode's open file descriptors */
+ /*
+ * List of file handles for all of the vnode's open file descriptors.
+ * Protected by the vnode lock.
+ */
LIST_HEAD(, fuse_filehandle) handles;
/** flags **/
@@ -115,6 +118,7 @@
* by nodeid instead of pathname.
*/
struct bintime entry_cache_timeout;
+ /* Cached file attributes, protected by the vnode lock */
struct vattr cached_attrs;
uint64_t nlookup;
enum vtype vtype;
@@ -139,6 +143,8 @@
{
struct bintime now;
+ ASSERT_VOP_LOCKED(vp, __func__);
+
getbinuptime(&now);
return (bintime_cmp(&(VTOFUD(vp)->attr_cache_timeout), &now, >));
}
@@ -146,6 +152,8 @@
static inline struct vattr*
VTOVA(struct vnode *vp)
{
+ ASSERT_VOP_LOCKED(vp, __func__);
+
if (fuse_vnode_attr_cache_valid(vp))
return &(VTOFUD(vp)->cached_attrs);
else
@@ -155,6 +163,8 @@
static inline void
fuse_vnode_clear_attr_cache(struct vnode *vp)
{
+ ASSERT_VOP_ELOCKED(vp, __func__);
+
bintime_clear(&VTOFUD(vp)->attr_cache_timeout);
}
Index: sys/fs/fuse/fuse_node.c
===================================================================
--- sys/fs/fuse/fuse_node.c
+++ sys/fs/fuse/fuse_node.c
@@ -350,7 +350,7 @@
struct fuse_setattr_in *fsai;
int err = 0;
- ASSERT_VOP_ELOCKED(vp, "fuse_io_extend");
+ ASSERT_VOP_ELOCKED(vp, __func__);
if (fuse_isdeadfs(vp)) {
return EBADF;
@@ -449,6 +449,8 @@
struct fuse_vnode_data *fvdat = VTOFUD(vp);
int error = 0;
+ ASSERT_VOP_LOCKED(vp, __func__);
+
if (!(fvdat->flag & FN_SIZECHANGE) &&
(!fuse_vnode_attr_cache_valid(vp) ||
fvdat->cached_attrs.va_size == VNOVAL))
@@ -476,6 +478,8 @@
struct fuse_data *data = fuse_get_mpdata(vnode_mount(vp));
struct timespec ts;
+ ASSERT_VOP_ELOCKED(vp, __func__);
+
vfs_timestamp(&ts);
if (data->time_gran > 1)
Index: sys/fs/fuse/fuse_vfsops.c
===================================================================
--- sys/fs/fuse/fuse_vfsops.c
+++ sys/fs/fuse/fuse_vfsops.c
@@ -588,6 +588,7 @@
* * following attribute cache expiration, or
* * due a bug in the daemon, or
*/
+ ASSERT_VOP_LOCKED(*vpp, __func__);
fvdat = VTOFUD(*vpp);
if (vnode_isreg(*vpp) &&
filesize != fvdat->cached_attrs.va_size &&
Index: sys/fs/fuse/fuse_vnops.c
===================================================================
--- sys/fs/fuse/fuse_vnops.c
+++ sys/fs/fuse/fuse_vnops.c
@@ -1378,6 +1378,7 @@
* * following attribute cache expiration, or
* * due a bug in the daemon, or
*/
+ ASSERT_VOP_LOCKED(vp, __func__);
fvdat = VTOFUD(vp);
if (vnode_isreg(vp) &&
((filesize != fvdat->cached_attrs.va_size &&
@@ -1825,6 +1826,8 @@
return err;
}
+SDT_PROBE_DEFINE4(fusefs, , vnops, erelookup, "struct vnode*",
+ "struct vnode*", "struct vnode*", "struct vnode*");
/*
struct vnop_rename_args {
struct vnode *a_fdvp;
@@ -1871,10 +1874,26 @@
*/
data = fuse_get_mpdata(vnode_mount(tdvp));
if (data->dataflags & FSESS_DEFAULT_PERMISSIONS && isdir && newparent) {
- err = fuse_internal_access(fvp, VWRITE,
- tcnp->cn_thread, tcnp->cn_cred);
- if (err)
+ /*
+ * Must use LK_NOWAIT to prevent LORs between fvp and tdvp or
+ * tvp
+ */
+ if (0 == vn_lock(fvp, LK_SHARED | LK_NOWAIT)) {
+ err = fuse_internal_access(fvp, VWRITE,
+ tcnp->cn_thread, tcnp->cn_cred);
+ VOP_UNLOCK(fvp);
+ if (err)
+ goto out;
+ } else {
+ /*
+ * Can't release tdvp or tvp to try releasing the LOR.
+ * Must return instead.
+ */
+ SDT_PROBE4(fusefs, , vnops, erelookup, fdvp, fvp, tdvp,
+ tvp);
+ err = ERELOOKUP;
goto out;
+ }
}
sx_xlock(&data->rename_lock);
err = fuse_internal_rename(fdvp, fcnp, tdvp, tcnp);
@@ -2179,6 +2198,8 @@
struct ucred *cred = ap->a_cred;
pid_t pid = curthread->td_proc->p_pid;
+ /* vp should be ELOCKED, because we don't set MNTK_SHARED_WRITES */
+ ASSERT_VOP_ELOCKED(vp, __func__);
if (fuse_isdeadfs(vp)) {
return ENXIO;
}
Index: tests/sys/fs/fusefs/default_permissions.cc
===================================================================
--- tests/sys/fs/fusefs/default_permissions.cc
+++ tests/sys/fs/fusefs/default_permissions.cc
@@ -1019,6 +1019,83 @@
ASSERT_EQ(EPERM, errno);
}
+static volatile int stopit = 0;
+
+static void* setattr_th(void* arg) {
+ char *path = (char*)arg;
+
+ while (stopit == 0)
+ chmod(path, 0777);
+ return 0;
+}
+
+/*
+ * Rename restarts the syscall to avoid a LOR
+ *
+ * This test triggers a race: the chmod() calls VOP_SETATTR, which locks its
+ * vnode, but fuse_vnop_rename also tries to lock the same vnode in order to
+ * use FUSE_GETATTR on it. The result is that, in order to avoid a LOR,
+ * fuse_vnop_rename returns ERELOOKUP to restart the syscall.
+ *
+ * To verify that the race is hit, watch the fusefs:fusefs:vnops:erelookup
+ * dtrace probe while the test is running. On my system, that probe fires 1 - 5
+ * times per second during this test.
+ */
+TEST_F(Rename, erelookup)
+{
+ const char FULLDST[] = "mountpoint/dstdir/dst";
+ const char RELDSTDIR[] = "dstdir";
+ const char RELDST[] = "dst";
+ const char FULLSRC[] = "mountpoint/src";
+ const char RELSRC[] = "src";
+ pthread_t th0;
+ uint64_t ino = 42;
+ uint64_t dst_dir_ino = 43;
+ struct timespec now, timeout;
+
+ expect_getattr(FUSE_ROOT_ID, S_IFDIR | 0777, UINT64_MAX, 1, geteuid());
+ EXPECT_LOOKUP(FUSE_ROOT_ID, RELSRC)
+ .WillRepeatedly(Invoke(
+ ReturnImmediate([=](auto i __unused, auto& out) {
+ SET_OUT_HEADER_LEN(out, entry);
+ out.body.entry.attr.mode = S_IFDIR | 0644;
+ out.body.entry.nodeid = ino;
+ out.body.entry.attr.nlink = 2;
+ out.body.entry.attr_valid = UINT64_MAX;
+ out.body.entry.attr.uid = 0;
+ out.body.entry.attr.gid = 0;
+ out.body.entry.entry_valid = UINT64_MAX;
+ }))
+ );
+ EXPECT_LOOKUP(FUSE_ROOT_ID, RELDSTDIR)
+ .WillRepeatedly(Invoke(ReturnImmediate([=](auto in __unused, auto& out)
+ {
+ SET_OUT_HEADER_LEN(out, entry);
+ out.body.entry.nodeid = dst_dir_ino;
+ out.body.entry.entry_valid = UINT64_MAX;
+ out.body.entry.attr_valid = UINT64_MAX;
+ out.body.entry.attr.mode = S_IFDIR | 0755;
+ out.body.entry.attr.ino = dst_dir_ino;
+ out.body.entry.attr.uid = geteuid();
+ out.body.entry.attr.gid = getegid();
+ })));
+ EXPECT_LOOKUP(dst_dir_ino, RELDST)
+ .WillRepeatedly(Invoke(ReturnErrno(ENOENT)));
+
+ ASSERT_EQ(0, pthread_create(&th0, NULL, setattr_th, (void*)FULLSRC))
+ << strerror(errno);
+
+ ASSERT_EQ(0, clock_gettime(CLOCK_MONOTONIC, &timeout));
+ timeout.tv_sec += 1;
+ do {
+ ASSERT_EQ(-1, rename(FULLSRC, FULLDST));
+ EXPECT_EQ(EACCES, errno);
+ ASSERT_EQ(0, clock_gettime(CLOCK_MONOTONIC, &now));
+ } while (timespeccmp(&now, &timeout, <));
+ stopit = 1;
+ pthread_join(th0, NULL);
+}
+
/* Successfully rename a file, overwriting the destination */
TEST_F(Rename, ok)
{
Index: tests/sys/fs/fusefs/read.cc
===================================================================
--- tests/sys/fs/fusefs/read.cc
+++ tests/sys/fs/fusefs/read.cc
@@ -294,6 +294,43 @@
leak(fd);
}
+/*
+ * Regression test for a VFS locking bug: as of
+ * b021aa8b6bee3d1021c2973711cb2d6cda0477bc (2-January-2020), fusefs did not
+ * obtain an exclusive vnode lock before attempting to clear the attr cache
+ * after an unexpected eof. The vnode lock would already be exclusive except
+ * when FUSE_ASYNC_READ is set.
+ */
+TEST_F(AsyncRead, eof)
+{
+ const char FULLPATH[] = "mountpoint/some_file.txt";
+ const char RELPATH[] = "some_file.txt";
+ const char *CONTENTS = "abcdefghijklmnop";
+ uint64_t ino = 42;
+ int fd;
+ uint64_t offset = 100;
+ ssize_t bufsize = strlen(CONTENTS);
+ ssize_t partbufsize = 3 * bufsize / 4;
+ ssize_t r;
+ uint8_t buf[bufsize];
+ struct stat sb;
+
+ expect_lookup(RELPATH, ino, offset + bufsize);
+ expect_open(ino, 0, 1);
+ expect_read(ino, 0, offset + bufsize, offset + partbufsize, CONTENTS);
+ expect_getattr(ino, offset + partbufsize);
+
+ fd = open(FULLPATH, O_RDONLY);
+ ASSERT_LE(0, fd) << strerror(errno);
+
+ r = pread(fd, buf, bufsize, offset);
+ ASSERT_LE(0, r) << strerror(errno);
+ EXPECT_EQ(partbufsize, r) << strerror(errno);
+ ASSERT_EQ(0, fstat(fd, &sb));
+ EXPECT_EQ((off_t)(offset + partbufsize), sb.st_size);
+ leak(fd);
+}
+
/* 0-length reads shouldn't cause any confusion */
TEST_F(Read, direct_io_read_nothing)
{

File Metadata

Mime Type
text/plain
Expires
Fri, Nov 21, 10:42 PM (21 h, 40 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
25818985
Default Alt Text
D27946.id81602.diff (12 KB)

Event Timeline