diff --git a/sys/fs/fuse/fuse_ipc.h b/sys/fs/fuse/fuse_ipc.h index 5648624f4c63..3bfc859dbac9 100644 --- a/sys/fs/fuse/fuse_ipc.h +++ b/sys/fs/fuse/fuse_ipc.h @@ -1,461 +1,463 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 2007-2009 Google Inc. and Amit Singh * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are * met: * * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following disclaimer * in the documentation and/or other materials provided with the * distribution. * * Neither the name of Google Inc. nor the names of its * contributors may be used to endorse or promote products derived from * this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Copyright (C) 2005 Csaba Henk. * All rights reserved. * * Copyright (c) 2019 The FreeBSD Foundation * * Portions of this software were developed by BFF Storage Systems, LLC under * sponsorship from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #ifndef _FUSE_IPC_H_ #define _FUSE_IPC_H_ #include #include enum fuse_data_cache_mode { FUSE_CACHE_UC, FUSE_CACHE_WT, FUSE_CACHE_WB, }; struct fuse_iov { void *base; size_t len; size_t allocated_size; int credit; }; void fiov_init(struct fuse_iov *fiov, size_t size); void fiov_teardown(struct fuse_iov *fiov); void fiov_refresh(struct fuse_iov *fiov); void fiov_adjust(struct fuse_iov *fiov, size_t size); #define FUSE_DIMALLOC(fiov, spc1, spc2, amnt) do { \ fiov_adjust(fiov, (sizeof(*(spc1)) + (amnt))); \ (spc1) = (fiov)->base; \ (spc2) = (char *)(fiov)->base + (sizeof(*(spc1))); \ } while (0) #define FU_AT_LEAST(siz) max((siz), 160) #define FUSE_ASSERT_AW_DONE(ftick) \ KASSERT((ftick)->tk_aw_link.tqe_next == NULL && \ (ftick)->tk_aw_link.tqe_prev == NULL, \ ("FUSE: ticket still on answer delivery list %p", (ftick))) #define FUSE_ASSERT_MS_DONE(ftick) \ KASSERT((ftick)->tk_ms_link.stqe_next == NULL, \ ("FUSE: ticket still on message list %p", (ftick))) struct fuse_ticket; struct fuse_data; typedef int fuse_handler_t(struct fuse_ticket *ftick, struct uio *uio); struct fuse_ticket { /* fields giving the identity of the ticket */ uint64_t tk_unique; struct fuse_data *tk_data; int tk_flag; u_int tk_refcount; /* * If this ticket's operation has been interrupted, this will hold the * unique value of the FUSE_INTERRUPT operation. Otherwise, it will be * 0. */ uint64_t irq_unique; /* fields for initiating an upgoing message */ struct fuse_iov tk_ms_fiov; STAILQ_ENTRY(fuse_ticket) tk_ms_link; /* fields for handling answers coming from userspace */ struct fuse_iov tk_aw_fiov; struct fuse_out_header tk_aw_ohead; int tk_aw_errno; struct mtx tk_aw_mtx; fuse_handler_t *tk_aw_handler; TAILQ_ENTRY(fuse_ticket) tk_aw_link; }; #define FT_ANSW 0x01 /* request of ticket has already been answered */ #define FT_DIRTY 0x04 /* ticket has been used */ static inline struct fuse_iov * fticket_resp(struct fuse_ticket *ftick) { return (&ftick->tk_aw_fiov); } static inline bool fticket_answered(struct fuse_ticket *ftick) { mtx_assert(&ftick->tk_aw_mtx, MA_OWNED); return (ftick->tk_flag & FT_ANSW); } static inline void fticket_set_answered(struct fuse_ticket *ftick) { mtx_assert(&ftick->tk_aw_mtx, MA_OWNED); ftick->tk_flag |= FT_ANSW; } static inline struct fuse_in_header* fticket_in_header(struct fuse_ticket *ftick) { return (struct fuse_in_header *)(ftick->tk_ms_fiov.base); } static inline enum fuse_opcode fticket_opcode(struct fuse_ticket *ftick) { return fticket_in_header(ftick)->opcode; } int fticket_pull(struct fuse_ticket *ftick, struct uio *uio); /* * The data representing a FUSE session. */ struct fuse_data { struct cdev *fdev; struct mount *mp; struct vnode *vroot; struct ucred *daemoncred; int dataflags; int ref; struct mtx ms_mtx; STAILQ_HEAD(, fuse_ticket) ms_head; int ms_count; struct mtx aw_mtx; TAILQ_HEAD(, fuse_ticket) aw_head; /* * Holds the next value of the FUSE operation unique value. * Also, serves as a wakeup channel to prevent any operations from * being created before INIT completes. */ u_long ticketer; struct sx rename_lock; uint32_t fuse_libabi_major; uint32_t fuse_libabi_minor; uint32_t max_readahead_blocks; uint32_t max_write; uint32_t max_read; struct selinfo ks_rsel; int daemon_timeout; int linux_errnos; unsigned time_gran; /* A bitmask of FUSE RPCs that are not implemented by the server */ uint64_t notimpl; /* * A bitmask of FUSE RPCs that are implemented by the server. * If an operation is not present in either notimpl or isimpl, then it * may be implemented by the server, but the kernel doesn't know for * sure. */ uint64_t isimpl; uint64_t mnt_flag; enum fuse_data_cache_mode cache_mode; }; #define FSESS_DEAD 0x0001 /* session is to be closed */ #define FSESS_INITED 0x0004 /* session has been inited */ #define FSESS_DAEMON_CAN_SPY 0x0010 /* let non-owners access this fs */ /* (and being observed by the daemon) */ #define FSESS_PUSH_SYMLINKS_IN 0x0020 /* prefix absolute symlinks with mp */ #define FSESS_DEFAULT_PERMISSIONS 0x0040 /* kernel does permission checking */ #define FSESS_ASYNC_READ 0x1000 /* allow multiple reads of some file */ #define FSESS_POSIX_LOCKS 0x2000 /* daemon supports POSIX locks */ #define FSESS_EXPORT_SUPPORT 0x10000 /* daemon supports NFS-style lookups */ #define FSESS_INTR 0x20000 /* interruptible mounts */ #define FSESS_WARN_SHORT_WRITE 0x40000 /* Short write without direct_io */ #define FSESS_WARN_WROTE_LONG 0x80000 /* Wrote more data than provided */ #define FSESS_WARN_LSEXTATTR_LONG 0x100000 /* Returned too many extattrs */ #define FSESS_WARN_CACHE_INCOHERENT 0x200000 /* Read cache incoherent */ #define FSESS_WARN_WB_CACHE_INCOHERENT 0x400000 /* WB cache incoherent */ #define FSESS_WARN_ILLEGAL_INODE 0x800000 /* Illegal inode for new file */ #define FSESS_WARN_READLINK_EMBEDDED_NUL 0x1000000 /* corrupt READLINK output */ +#define FSESS_WARN_DOT_LOOKUP 0x2000000 /* Inconsistent . LOOKUP response */ +#define FSESS_WARN_INODE_MISMATCH 0x4000000 /* ino != nodeid */ #define FSESS_MNTOPTS_MASK ( \ FSESS_DAEMON_CAN_SPY | FSESS_PUSH_SYMLINKS_IN | \ FSESS_DEFAULT_PERMISSIONS | FSESS_INTR) extern int fuse_data_cache_mode; static inline struct fuse_data * fuse_get_mpdata(struct mount *mp) { return mp->mnt_data; } static inline bool fsess_is_impl(struct mount *mp, int opcode) { struct fuse_data *data = fuse_get_mpdata(mp); return ((data->isimpl & (1ULL << opcode)) != 0); } static inline bool fsess_maybe_impl(struct mount *mp, int opcode) { struct fuse_data *data = fuse_get_mpdata(mp); return ((data->notimpl & (1ULL << opcode)) == 0); } static inline bool fsess_not_impl(struct mount *mp, int opcode) { struct fuse_data *data = fuse_get_mpdata(mp); return ((data->notimpl & (1ULL << opcode)) != 0); } static inline void fsess_set_impl(struct mount *mp, int opcode) { struct fuse_data *data = fuse_get_mpdata(mp); data->isimpl |= (1ULL << opcode); } static inline void fsess_set_notimpl(struct mount *mp, int opcode) { struct fuse_data *data = fuse_get_mpdata(mp); data->notimpl |= (1ULL << opcode); } static inline bool fsess_opt_datacache(struct mount *mp) { struct fuse_data *data = fuse_get_mpdata(mp); return (data->cache_mode != FUSE_CACHE_UC); } static inline bool fsess_opt_mmap(struct mount *mp) { return (fsess_opt_datacache(mp)); } static inline bool fsess_opt_writeback(struct mount *mp) { struct fuse_data *data = fuse_get_mpdata(mp); return (data->cache_mode == FUSE_CACHE_WB); } /* Insert a new upgoing message */ static inline void fuse_ms_push(struct fuse_ticket *ftick) { mtx_assert(&ftick->tk_data->ms_mtx, MA_OWNED); refcount_acquire(&ftick->tk_refcount); STAILQ_INSERT_TAIL(&ftick->tk_data->ms_head, ftick, tk_ms_link); ftick->tk_data->ms_count++; } /* Insert a new upgoing message to the front of the queue */ static inline void fuse_ms_push_head(struct fuse_ticket *ftick) { mtx_assert(&ftick->tk_data->ms_mtx, MA_OWNED); refcount_acquire(&ftick->tk_refcount); STAILQ_INSERT_HEAD(&ftick->tk_data->ms_head, ftick, tk_ms_link); ftick->tk_data->ms_count++; } static inline struct fuse_ticket * fuse_ms_pop(struct fuse_data *data) { struct fuse_ticket *ftick = NULL; mtx_assert(&data->ms_mtx, MA_OWNED); if ((ftick = STAILQ_FIRST(&data->ms_head))) { STAILQ_REMOVE_HEAD(&data->ms_head, tk_ms_link); data->ms_count--; #ifdef INVARIANTS MPASS(data->ms_count >= 0); ftick->tk_ms_link.stqe_next = NULL; #endif } return (ftick); } static inline void fuse_aw_push(struct fuse_ticket *ftick) { mtx_assert(&ftick->tk_data->aw_mtx, MA_OWNED); refcount_acquire(&ftick->tk_refcount); TAILQ_INSERT_TAIL(&ftick->tk_data->aw_head, ftick, tk_aw_link); } static inline void fuse_aw_remove(struct fuse_ticket *ftick) { mtx_assert(&ftick->tk_data->aw_mtx, MA_OWNED); TAILQ_REMOVE(&ftick->tk_data->aw_head, ftick, tk_aw_link); #ifdef INVARIANTS ftick->tk_aw_link.tqe_next = NULL; ftick->tk_aw_link.tqe_prev = NULL; #endif } static inline struct fuse_ticket * fuse_aw_pop(struct fuse_data *data) { struct fuse_ticket *ftick; mtx_assert(&data->aw_mtx, MA_OWNED); if ((ftick = TAILQ_FIRST(&data->aw_head)) != NULL) fuse_aw_remove(ftick); return (ftick); } struct fuse_ticket *fuse_ticket_fetch(struct fuse_data *data); int fuse_ticket_drop(struct fuse_ticket *ftick); void fuse_insert_callback(struct fuse_ticket *ftick, fuse_handler_t *handler); void fuse_insert_message(struct fuse_ticket *ftick, bool irq); static inline bool fuse_libabi_geq(struct fuse_data *data, uint32_t abi_maj, uint32_t abi_min) { return (data->fuse_libabi_major > abi_maj || (data->fuse_libabi_major == abi_maj && data->fuse_libabi_minor >= abi_min)); } /* Print msg as a warning to the console, but no more than once per session */ void fuse_warn(struct fuse_data *data, unsigned flag, const char *msg); struct fuse_data *fdata_alloc(struct cdev *dev, struct ucred *cred); void fdata_trydestroy(struct fuse_data *data); void fdata_set_dead(struct fuse_data *data); static inline bool fdata_get_dead(struct fuse_data *data) { return (data->dataflags & FSESS_DEAD); } struct fuse_dispatcher { struct fuse_ticket *tick; struct fuse_in_header *finh; void *indata; size_t iosize; uint64_t nodeid; int answ_stat; void *answ; }; static inline void fdisp_init(struct fuse_dispatcher *fdisp, size_t iosize) { fdisp->iosize = iosize; fdisp->tick = NULL; } static inline void fdisp_destroy(struct fuse_dispatcher *fdisp) { fuse_ticket_drop(fdisp->tick); #ifdef INVARIANTS fdisp->tick = NULL; #endif } void fdisp_make(struct fuse_dispatcher *fdip, enum fuse_opcode op, struct mount *mp, uint64_t nid, struct thread *td, struct ucred *cred); void fdisp_make_vp(struct fuse_dispatcher *fdip, enum fuse_opcode op, struct vnode *vp, struct thread *td, struct ucred *cred); void fdisp_refresh_vp(struct fuse_dispatcher *fdip, enum fuse_opcode op, struct vnode *vp, struct thread *td, struct ucred *cred); int fdisp_wait_answ(struct fuse_dispatcher *fdip); static inline int fdisp_simple_putget_vp(struct fuse_dispatcher *fdip, enum fuse_opcode op, struct vnode *vp, struct thread *td, struct ucred *cred) { fdisp_make_vp(fdip, op, vp, td, cred); return (fdisp_wait_answ(fdip)); } #endif /* _FUSE_IPC_H_ */ diff --git a/sys/fs/fuse/fuse_node.c b/sys/fs/fuse/fuse_node.c index 0a24d0da4fac..742dc66bcafc 100644 --- a/sys/fs/fuse/fuse_node.c +++ b/sys/fs/fuse/fuse_node.c @@ -1,541 +1,560 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 2007-2009 Google Inc. and Amit Singh * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are * met: * * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following disclaimer * in the documentation and/or other materials provided with the * distribution. * * Neither the name of Google Inc. nor the names of its * contributors may be used to endorse or promote products derived from * this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Copyright (C) 2005 Csaba Henk. * All rights reserved. * * Copyright (c) 2019 The FreeBSD Foundation * * Portions of this software were developed by BFF Storage Systems, LLC under * sponsorship from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "fuse.h" #include "fuse_node.h" #include "fuse_internal.h" #include "fuse_io.h" #include "fuse_ipc.h" SDT_PROVIDER_DECLARE(fusefs); /* * Fuse trace probe: * arg0: verbosity. Higher numbers give more verbose messages * arg1: Textual message */ SDT_PROBE_DEFINE2(fusefs, , node, trace, "int", "char*"); MALLOC_DEFINE(M_FUSEVN, "fuse_vnode", "fuse vnode private data"); static int sysctl_fuse_cache_mode(SYSCTL_HANDLER_ARGS); static counter_u64_t fuse_node_count; SYSCTL_COUNTER_U64(_vfs_fusefs_stats, OID_AUTO, node_count, CTLFLAG_RD, &fuse_node_count, "Count of FUSE vnodes"); int fuse_data_cache_mode = FUSE_CACHE_WT; /* * OBSOLETE * This sysctl is no longer needed as of fuse protocol 7.23. Now, individual * servers can select the cache behavior they need for each mountpoint: * - writethrough: the default * - writeback: set FUSE_WRITEBACK_CACHE in fuse_init_out.flags * - uncached: set FOPEN_DIRECT_IO for every file * The sysctl is retained primarily due to the enduring popularity of libfuse2, * which is frozen at protocol version 7.19. As of 4-April-2024, 90% of * FreeBSD ports that use libfuse still bind to libfuse2. */ SYSCTL_PROC(_vfs_fusefs, OID_AUTO, data_cache_mode, CTLTYPE_INT | CTLFLAG_MPSAFE | CTLFLAG_RW, &fuse_data_cache_mode, 0, sysctl_fuse_cache_mode, "I", "Zero: disable caching of FUSE file data; One: write-through caching " "(default); Two: write-back caching (generally unsafe)"); static int sysctl_fuse_cache_mode(SYSCTL_HANDLER_ARGS) { int val, error; val = *(int *)arg1; error = sysctl_handle_int(oidp, &val, 0, req); if (error || !req->newptr) return (error); switch (val) { case FUSE_CACHE_UC: case FUSE_CACHE_WT: case FUSE_CACHE_WB: *(int *)arg1 = val; break; default: return (EDOM); } return (0); } static void fuse_vnode_init(struct vnode *vp, struct fuse_vnode_data *fvdat, uint64_t nodeid, __enum_uint8(vtype) vtyp) { fvdat->nid = nodeid; LIST_INIT(&fvdat->handles); vattr_null(&fvdat->cached_attrs); fvdat->cached_attrs.va_birthtime.tv_sec = -1; fvdat->cached_attrs.va_birthtime.tv_nsec = 0; fvdat->cached_attrs.va_fsid = VNOVAL; fvdat->cached_attrs.va_gen = 0; fvdat->cached_attrs.va_rdev = NODEV; if (nodeid == FUSE_ROOT_ID) { vp->v_vflag |= VV_ROOT; } vp->v_type = vtyp; vp->v_data = fvdat; cluster_init_vn(&fvdat->clusterw); timespecclear(&fvdat->last_local_modify); counter_u64_add(fuse_node_count, 1); } void fuse_vnode_destroy(struct vnode *vp) { struct fuse_vnode_data *fvdat = vp->v_data; vp->v_data = NULL; KASSERT(LIST_EMPTY(&fvdat->handles), ("Destroying fuse vnode with open files!")); free(fvdat, M_FUSEVN); counter_u64_add(fuse_node_count, -1); } int fuse_vnode_cmp(struct vnode *vp, void *nidp) { return (VTOI(vp) != *((uint64_t *)nidp)); } SDT_PROBE_DEFINE3(fusefs, , node, stale_vnode, "struct vnode*", "uint8_t", "uint64_t"); static int fuse_vnode_alloc(struct mount *mp, struct thread *td, uint64_t nodeid, __enum_uint8(vtype) vtyp, struct vnode **vpp) { struct fuse_data *data; struct fuse_vnode_data *fvdat; struct vnode *vp2; int err = 0; data = fuse_get_mpdata(mp); if (vtyp == VNON) { return EINVAL; } *vpp = NULL; err = vfs_hash_get(mp, fuse_vnode_hash(nodeid), LK_EXCLUSIVE, td, vpp, fuse_vnode_cmp, &nodeid); if (err) return (err); if (*vpp) { if ((*vpp)->v_type == vtyp) { /* Reuse a vnode that hasn't yet been reclaimed */ MPASS((*vpp)->v_data != NULL); MPASS(VTOFUD(*vpp)->nid == nodeid); SDT_PROBE2(fusefs, , node, trace, 1, "vnode taken from hash"); return (0); } else { /* * The inode changed types! If we get here, we can't * tell whether the inode's entry cache had expired * yet. So this could be the result of a buggy server, * but more likely the server just reused an inode * number following an entry cache expiration. */ SDT_PROBE3(fusefs, , node, stale_vnode, *vpp, vtyp, nodeid); fuse_internal_vnode_disappear(*vpp); vgone(*vpp); lockmgr((*vpp)->v_vnlock, LK_RELEASE, NULL); } } fvdat = malloc(sizeof(*fvdat), M_FUSEVN, M_WAITOK | M_ZERO); switch (vtyp) { case VFIFO: err = getnewvnode("fuse", mp, &fuse_fifoops, vpp); break; default: err = getnewvnode("fuse", mp, &fuse_vnops, vpp); break; } if (err) { free(fvdat, M_FUSEVN); return (err); } lockmgr((*vpp)->v_vnlock, LK_EXCLUSIVE, NULL); fuse_vnode_init(*vpp, fvdat, nodeid, vtyp); err = insmntque(*vpp, mp); ASSERT_VOP_ELOCKED(*vpp, "fuse_vnode_alloc"); if (err) { lockmgr((*vpp)->v_vnlock, LK_RELEASE, NULL); free(fvdat, M_FUSEVN); *vpp = NULL; return (err); } /* Disallow async reads for fifos because UFS does. I don't know why */ if (data->dataflags & FSESS_ASYNC_READ && vtyp != VFIFO) VN_LOCK_ASHARE(*vpp); vn_set_state(*vpp, VSTATE_CONSTRUCTED); err = vfs_hash_insert(*vpp, fuse_vnode_hash(nodeid), LK_EXCLUSIVE, td, &vp2, fuse_vnode_cmp, &nodeid); if (err) { lockmgr((*vpp)->v_vnlock, LK_RELEASE, NULL); free(fvdat, M_FUSEVN); *vpp = NULL; return (err); } if (vp2 != NULL) { *vpp = vp2; return (0); } ASSERT_VOP_ELOCKED(*vpp, "fuse_vnode_alloc"); return (0); } int fuse_vnode_get(struct mount *mp, struct fuse_entry_out *feo, uint64_t nodeid, struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, __enum_uint8(vtype) vtyp) { struct thread *td = curthread; + bool exportable = fuse_get_mpdata(mp)->dataflags & FSESS_EXPORT_SUPPORT; + /* * feo should only be NULL for the root directory, which (when libfuse * is used) always has generation 0 */ uint64_t generation = feo ? feo->generation : 0; int err = 0; if (dvp != NULL && VTOFUD(dvp)->nid == nodeid) { fuse_warn(fuse_get_mpdata(mp), FSESS_WARN_ILLEGAL_INODE, "Assigned same inode to both parent and child."); return EIO; } + if (feo && feo->nodeid != feo->attr.ino && exportable) { + /* + * NFS servers (both kernelspace and userspace) rely on + * VFS_VGET to lookup inodes. But that's only possible if the + * file's inode number matches its nodeid, which isn't + * necessarily the case for FUSE. If they don't match, then we + * can complete the current operation, but future VFS_VGET + * operations will almost certainly return spurious results. + * Warn the operator. + * + * But only warn the operator if the file system reports + * NFS-compatibility, because that's the only time that this + * matters, and dumb fuse servers abound. + */ + fuse_warn(fuse_get_mpdata(mp), FSESS_WARN_INODE_MISMATCH, + "file has different inode number and nodeid."); + } err = fuse_vnode_alloc(mp, td, nodeid, vtyp, vpp); if (err) { return err; } if (dvp != NULL) { MPASS(cnp && (cnp->cn_flags & ISDOTDOT) == 0); MPASS(cnp && !(cnp->cn_namelen == 1 && cnp->cn_nameptr[0] == '.')); fuse_vnode_setparent(*vpp, dvp); } if (dvp != NULL && cnp != NULL && (cnp->cn_flags & MAKEENTRY) != 0 && feo != NULL && (feo->entry_valid != 0 || feo->entry_valid_nsec != 0)) { struct timespec timeout; ASSERT_VOP_LOCKED(*vpp, "fuse_vnode_get"); ASSERT_VOP_LOCKED(dvp, "fuse_vnode_get"); fuse_validity_2_timespec(feo, &timeout); cache_enter_time(dvp, *vpp, cnp, &timeout, NULL); } VTOFUD(*vpp)->generation = generation; /* * In userland, libfuse uses cached lookups for dot and dotdot entries, * thus it does not really bump the nlookup counter for forget. * Follow the same semantic and avoid the bump in order to keep * nlookup counters consistent. */ if (cnp == NULL || ((cnp->cn_flags & ISDOTDOT) == 0 && (cnp->cn_namelen != 1 || cnp->cn_nameptr[0] != '.'))) VTOFUD(*vpp)->nlookup++; return 0; } /* * Called for every fusefs vnode open to initialize the vnode (not * fuse_filehandle) for use */ void fuse_vnode_open(struct vnode *vp, int32_t fuse_open_flags, struct thread *td) { if (vnode_vtype(vp) == VREG) vnode_create_vobject(vp, VNODE_NO_SIZE, td); } int fuse_vnode_savesize(struct vnode *vp, struct ucred *cred, pid_t pid) { struct fuse_vnode_data *fvdat = VTOFUD(vp); struct thread *td = curthread; struct fuse_filehandle *fufh = NULL; struct fuse_dispatcher fdi; struct fuse_setattr_in *fsai; int err = 0; ASSERT_VOP_ELOCKED(vp, "fuse_io_extend"); if (fuse_isdeadfs(vp)) { return EBADF; } if (vnode_vtype(vp) == VDIR) { return EISDIR; } if (vfs_isrdonly(vnode_mount(vp))) { return EROFS; } if (cred == NULL) { cred = td->td_ucred; } fdisp_init(&fdi, sizeof(*fsai)); fdisp_make_vp(&fdi, FUSE_SETATTR, vp, td, cred); fsai = fdi.indata; fsai->valid = 0; /* Truncate to a new value. */ MPASS((fvdat->flag & FN_SIZECHANGE) != 0); fsai->size = fvdat->cached_attrs.va_size; fsai->valid |= FATTR_SIZE; fuse_filehandle_getrw(vp, FWRITE, &fufh, cred, pid); if (fufh) { fsai->fh = fufh->fh_id; fsai->valid |= FATTR_FH; } err = fdisp_wait_answ(&fdi); fdisp_destroy(&fdi); if (err == 0) { getnanouptime(&fvdat->last_local_modify); fvdat->flag &= ~FN_SIZECHANGE; } return err; } /* * Adjust the vnode's size to a new value. * * If the new value came from the server, such as from a FUSE_GETATTR * operation, set `from_server` true. But if it came from a local operation, * such as write(2) or truncate(2), set `from_server` false. */ int fuse_vnode_setsize(struct vnode *vp, off_t newsize, bool from_server) { struct fuse_vnode_data *fvdat = VTOFUD(vp); struct vattr *attrs; off_t oldsize; size_t iosize; struct buf *bp = NULL; int err = 0; ASSERT_VOP_ELOCKED(vp, "fuse_vnode_setsize"); iosize = fuse_iosize(vp); oldsize = fvdat->cached_attrs.va_size; fvdat->cached_attrs.va_size = newsize; if ((attrs = VTOVA(vp)) != NULL) attrs->va_size = newsize; if (newsize < oldsize) { daddr_t lbn; err = vtruncbuf(vp, newsize, fuse_iosize(vp)); if (err) goto out; if (newsize % iosize == 0) goto out; /* * Zero the contents of the last partial block. * Sure seems like vtruncbuf should do this for us. */ lbn = newsize / iosize; bp = getblk(vp, lbn, iosize, PCATCH, 0, 0); if (!bp) { err = EINTR; goto out; } if (!(bp->b_flags & B_CACHE)) goto out; /* Nothing to do */ MPASS(bp->b_flags & B_VMIO); vfs_bio_clrbuf(bp); bp->b_dirtyend = MIN(bp->b_dirtyend, newsize - lbn * iosize); } else if (from_server && newsize > oldsize && oldsize != VNOVAL) { /* * The FUSE server changed the file size behind our back. We * should invalidate the entire cache. */ daddr_t end_lbn; end_lbn = howmany(newsize, iosize); v_inval_buf_range(vp, 0, end_lbn, iosize); } out: if (bp) brelse(bp); vnode_pager_setsize(vp, newsize); return err; } /* Get the current, possibly dirty, size of the file */ int fuse_vnode_size(struct vnode *vp, off_t *filesize, struct ucred *cred, struct thread *td) { struct fuse_vnode_data *fvdat = VTOFUD(vp); int error = 0; if (!(fvdat->flag & FN_SIZECHANGE) && (!fuse_vnode_attr_cache_valid(vp) || fvdat->cached_attrs.va_size == VNOVAL)) error = fuse_internal_do_getattr(vp, NULL, cred, td); if (!error) *filesize = fvdat->cached_attrs.va_size; return error; } void fuse_vnode_undirty_cached_timestamps(struct vnode *vp, bool atime) { struct fuse_vnode_data *fvdat = VTOFUD(vp); fvdat->flag &= ~(FN_MTIMECHANGE | FN_CTIMECHANGE); if (atime) fvdat->flag &= ~FN_ATIMECHANGE; } /* Update a fuse file's cached timestamps */ void fuse_vnode_update(struct vnode *vp, int flags) { struct fuse_vnode_data *fvdat = VTOFUD(vp); struct mount *mp = vnode_mount(vp); struct fuse_data *data = fuse_get_mpdata(mp); struct timespec ts; vfs_timestamp(&ts); if (data->time_gran > 1) ts.tv_nsec = rounddown(ts.tv_nsec, data->time_gran); if (mp->mnt_flag & MNT_NOATIME) flags &= ~FN_ATIMECHANGE; if (flags & FN_ATIMECHANGE) fvdat->cached_attrs.va_atime = ts; if (flags & FN_MTIMECHANGE) fvdat->cached_attrs.va_mtime = ts; if (flags & FN_CTIMECHANGE) fvdat->cached_attrs.va_ctime = ts; fvdat->flag |= flags; } void fuse_node_init(void) { fuse_node_count = counter_u64_alloc(M_WAITOK); } void fuse_node_destroy(void) { counter_u64_free(fuse_node_count); } diff --git a/sys/fs/fuse/fuse_vfsops.c b/sys/fs/fuse/fuse_vfsops.c index 0da51b865873..48b84d3c75af 100644 --- a/sys/fs/fuse/fuse_vfsops.c +++ b/sys/fs/fuse/fuse_vfsops.c @@ -1,686 +1,699 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 2007-2009 Google Inc. and Amit Singh * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are * met: * * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following disclaimer * in the documentation and/or other materials provided with the * distribution. * * Neither the name of Google Inc. nor the names of its * contributors may be used to endorse or promote products derived from * this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Copyright (C) 2005 Csaba Henk. * All rights reserved. * * Copyright (c) 2019 The FreeBSD Foundation * * Portions of this software were developed by BFF Storage Systems, LLC under * sponsorship from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "fuse.h" #include "fuse_node.h" #include "fuse_ipc.h" #include "fuse_internal.h" #include #include SDT_PROVIDER_DECLARE(fusefs); /* * Fuse trace probe: * arg0: verbosity. Higher numbers give more verbose messages * arg1: Textual message */ SDT_PROBE_DEFINE2(fusefs, , vfsops, trace, "int", "char*"); /* This will do for privilege types for now */ #ifndef PRIV_VFS_FUSE_ALLOWOTHER #define PRIV_VFS_FUSE_ALLOWOTHER PRIV_VFS_MOUNT_NONUSER #endif #ifndef PRIV_VFS_FUSE_MOUNT_NONUSER #define PRIV_VFS_FUSE_MOUNT_NONUSER PRIV_VFS_MOUNT_NONUSER #endif #ifndef PRIV_VFS_FUSE_SYNC_UNMOUNT #define PRIV_VFS_FUSE_SYNC_UNMOUNT PRIV_VFS_MOUNT_NONUSER #endif static vfs_fhtovp_t fuse_vfsop_fhtovp; static vfs_mount_t fuse_vfsop_mount; static vfs_unmount_t fuse_vfsop_unmount; static vfs_root_t fuse_vfsop_root; static vfs_statfs_t fuse_vfsop_statfs; static vfs_vget_t fuse_vfsop_vget; struct vfsops fuse_vfsops = { .vfs_fhtovp = fuse_vfsop_fhtovp, .vfs_mount = fuse_vfsop_mount, .vfs_unmount = fuse_vfsop_unmount, .vfs_root = fuse_vfsop_root, .vfs_statfs = fuse_vfsop_statfs, .vfs_vget = fuse_vfsop_vget, }; static int fuse_enforce_dev_perms = 0; SYSCTL_INT(_vfs_fusefs, OID_AUTO, enforce_dev_perms, CTLFLAG_RW, &fuse_enforce_dev_perms, 0, "enforce fuse device permissions for secondary mounts"); MALLOC_DEFINE(M_FUSEVFS, "fuse_filesystem", "buffer for fuse vfs layer"); static int fuse_getdevice(const char *fspec, struct thread *td, struct cdev **fdevp) { struct nameidata nd, *ndp = &nd; struct vnode *devvp; struct cdev *fdev; int err; /* * Not an update, or updating the name: look up the name * and verify that it refers to a sensible disk device. */ NDINIT(ndp, LOOKUP, FOLLOW, UIO_SYSSPACE, fspec); if ((err = namei(ndp)) != 0) return err; NDFREE_PNBUF(ndp); devvp = ndp->ni_vp; if (devvp->v_type != VCHR) { vrele(devvp); return ENXIO; } fdev = devvp->v_rdev; dev_ref(fdev); if (fuse_enforce_dev_perms) { /* * Check if mounter can open the fuse device. * * This has significance only if we are doing a secondary mount * which doesn't involve actually opening fuse devices, but we * still want to enforce the permissions of the device (in * order to keep control over the circle of fuse users). * * (In case of primary mounts, we are either the superuser so * we can do anything anyway, or we can mount only if the * device is already opened by us, ie. we are permitted to open * the device.) */ #if 0 #ifdef MAC err = mac_check_vnode_open(td->td_ucred, devvp, VREAD | VWRITE); if (!err) #endif #endif /* 0 */ err = VOP_ACCESS(devvp, VREAD | VWRITE, td->td_ucred, td); if (err) { vrele(devvp); dev_rel(fdev); return err; } } /* * according to coda code, no extra lock is needed -- * although in sys/vnode.h this field is marked "v" */ vrele(devvp); if (!fdev->si_devsw || strcmp("fuse", fdev->si_devsw->d_name)) { dev_rel(fdev); return ENXIO; } *fdevp = fdev; return 0; } #define FUSE_FLAGOPT(fnam, fval) do { \ vfs_flagopt(opts, #fnam, &mntopts, fval); \ vfs_flagopt(opts, "__" #fnam, &__mntopts, fval); \ } while (0) SDT_PROBE_DEFINE1(fusefs, , vfsops, mntopts, "uint64_t"); SDT_PROBE_DEFINE4(fusefs, , vfsops, mount_err, "char*", "struct fuse_data*", "struct mount*", "int"); static int fuse_vfs_remount(struct mount *mp, struct thread *td, uint64_t mntopts, uint32_t max_read, int daemon_timeout) { int err = 0; struct fuse_data *data = fuse_get_mpdata(mp); /* Don't allow these options to be changed */ const static unsigned long long cant_update_opts = MNT_USER; /* Mount owner must be the user running the daemon */ FUSE_LOCK(); if ((mp->mnt_flag ^ data->mnt_flag) & cant_update_opts) { err = EOPNOTSUPP; SDT_PROBE4(fusefs, , vfsops, mount_err, "Can't change these mount options during remount", data, mp, err); goto out; } if (((data->dataflags ^ mntopts) & FSESS_MNTOPTS_MASK) || (data->max_read != max_read) || (data->daemon_timeout != daemon_timeout)) { // TODO: allow changing options where it makes sense err = EOPNOTSUPP; SDT_PROBE4(fusefs, , vfsops, mount_err, "Can't change fuse mount options during remount", data, mp, err); goto out; } if (fdata_get_dead(data)) { err = ENOTCONN; SDT_PROBE4(fusefs, , vfsops, mount_err, "device is dead during mount", data, mp, err); goto out; } /* Sanity + permission checks */ if (!data->daemoncred) panic("fuse daemon found, but identity unknown"); if (mntopts & FSESS_DAEMON_CAN_SPY) err = priv_check(td, PRIV_VFS_FUSE_ALLOWOTHER); if (err == 0 && td->td_ucred->cr_uid != data->daemoncred->cr_uid) /* are we allowed to do the first mount? */ err = priv_check(td, PRIV_VFS_FUSE_MOUNT_NONUSER); out: FUSE_UNLOCK(); return err; } static int fuse_vfsop_fhtovp(struct mount *mp, struct fid *fhp, int flags, struct vnode **vpp) { struct fuse_fid *ffhp = (struct fuse_fid *)fhp; struct fuse_vnode_data *fvdat; struct vnode *nvp; int error; if (!(fuse_get_mpdata(mp)->dataflags & FSESS_EXPORT_SUPPORT)) return EOPNOTSUPP; error = VFS_VGET(mp, ffhp->nid, LK_EXCLUSIVE, &nvp); if (error) { *vpp = NULLVP; return (error); } fvdat = VTOFUD(nvp); if (fvdat->generation != ffhp->gen ) { vput(nvp); *vpp = NULLVP; return (ESTALE); } *vpp = nvp; vnode_create_vobject(*vpp, VNODE_NO_SIZE, curthread); return (0); } static int fuse_vfsop_mount(struct mount *mp) { int err; uint64_t mntopts, __mntopts; uint32_t max_read; int linux_errnos; int daemon_timeout; int fd; struct cdev *fdev; struct fuse_data *data = NULL; struct thread *td; struct file *fp, *fptmp; char *fspec, *subtype, *fsname = NULL; int fsnamelen; struct vfsoptlist *opts; subtype = NULL; max_read = ~0; linux_errnos = 0; err = 0; mntopts = 0; __mntopts = 0; td = curthread; /* Get the new options passed to mount */ opts = mp->mnt_optnew; if (!opts) return EINVAL; /* `fspath' contains the mount point (eg. /mnt/fuse/sshfs); REQUIRED */ if (!vfs_getopts(opts, "fspath", &err)) return err; /* * With the help of underscored options the mount program * can inform us from the flags it sets by default */ FUSE_FLAGOPT(allow_other, FSESS_DAEMON_CAN_SPY); FUSE_FLAGOPT(push_symlinks_in, FSESS_PUSH_SYMLINKS_IN); FUSE_FLAGOPT(default_permissions, FSESS_DEFAULT_PERMISSIONS); FUSE_FLAGOPT(intr, FSESS_INTR); (void)vfs_scanopt(opts, "max_read=", "%u", &max_read); (void)vfs_scanopt(opts, "linux_errnos", "%d", &linux_errnos); if (vfs_scanopt(opts, "timeout=", "%u", &daemon_timeout) == 1) { if (daemon_timeout < FUSE_MIN_DAEMON_TIMEOUT) daemon_timeout = FUSE_MIN_DAEMON_TIMEOUT; else if (daemon_timeout > FUSE_MAX_DAEMON_TIMEOUT) daemon_timeout = FUSE_MAX_DAEMON_TIMEOUT; } else { daemon_timeout = FUSE_DEFAULT_DAEMON_TIMEOUT; } subtype = vfs_getopts(opts, "subtype=", &err); SDT_PROBE1(fusefs, , vfsops, mntopts, mntopts); if (mp->mnt_flag & MNT_UPDATE) { return fuse_vfs_remount(mp, td, mntopts, max_read, daemon_timeout); } /* `from' contains the device name (eg. /dev/fuse0); REQUIRED */ fspec = vfs_getopts(opts, "from", &err); if (!fspec) return err; /* `fd' contains the filedescriptor for this session; REQUIRED */ if (vfs_scanopt(opts, "fd", "%d", &fd) != 1) return EINVAL; err = fuse_getdevice(fspec, td, &fdev); if (err != 0) return err; err = fget(td, fd, &cap_read_rights, &fp); if (err != 0) { SDT_PROBE2(fusefs, , vfsops, trace, 1, "invalid or not opened device"); goto out; } fptmp = td->td_fpop; td->td_fpop = fp; err = devfs_get_cdevpriv((void **)&data); td->td_fpop = fptmp; fdrop(fp, td); FUSE_LOCK(); if (err != 0 || data == NULL) { err = ENXIO; SDT_PROBE4(fusefs, , vfsops, mount_err, "invalid or not opened device", data, mp, err); FUSE_UNLOCK(); goto out; } if (fdata_get_dead(data)) { err = ENOTCONN; SDT_PROBE4(fusefs, , vfsops, mount_err, "device is dead during mount", data, mp, err); FUSE_UNLOCK(); goto out; } /* Sanity + permission checks */ if (!data->daemoncred) panic("fuse daemon found, but identity unknown"); if (mntopts & FSESS_DAEMON_CAN_SPY) err = priv_check(td, PRIV_VFS_FUSE_ALLOWOTHER); if (err == 0 && td->td_ucred->cr_uid != data->daemoncred->cr_uid) /* are we allowed to do the first mount? */ err = priv_check(td, PRIV_VFS_FUSE_MOUNT_NONUSER); if (err) { FUSE_UNLOCK(); goto out; } data->ref++; data->mp = mp; data->dataflags |= mntopts; data->max_read = max_read; data->daemon_timeout = daemon_timeout; data->linux_errnos = linux_errnos; data->mnt_flag = mp->mnt_flag & MNT_UPDATEMASK; FUSE_UNLOCK(); vfs_getnewfsid(mp); MNT_ILOCK(mp); mp->mnt_data = data; /* * FUSE file systems can be either local or remote, but the kernel * can't tell the difference. */ mp->mnt_flag &= ~MNT_LOCAL; mp->mnt_kern_flag |= MNTK_USES_BCACHE; /* * Disable nullfs cacheing because it can consume too many resources in * the FUSE server. */ mp->mnt_kern_flag |= MNTK_NULL_NOCACHE; MNT_IUNLOCK(mp); /* We need this here as this slot is used by getnewvnode() */ mp->mnt_stat.f_iosize = maxbcachebuf; if (subtype) { strlcat(mp->mnt_stat.f_fstypename, ".", MFSNAMELEN); strlcat(mp->mnt_stat.f_fstypename, subtype, MFSNAMELEN); } memset(mp->mnt_stat.f_mntfromname, 0, MNAMELEN); vfs_getopt(opts, "fsname=", (void**)&fsname, &fsnamelen); strlcpy(mp->mnt_stat.f_mntfromname, fsname == NULL ? fspec : fsname, MNAMELEN); mp->mnt_iosize_max = maxphys; /* Now handshaking with daemon */ fuse_internal_send_init(data, td); out: if (err) { FUSE_LOCK(); if (data != NULL && data->mp == mp) { /* * Destroy device only if we acquired reference to * it */ SDT_PROBE4(fusefs, , vfsops, mount_err, "mount failed, destroy device", data, mp, err); data->mp = NULL; mp->mnt_data = NULL; fdata_trydestroy(data); } FUSE_UNLOCK(); dev_rel(fdev); } return err; } static int fuse_vfsop_unmount(struct mount *mp, int mntflags) { int err = 0; int flags = 0; struct cdev *fdev; struct fuse_data *data; struct fuse_dispatcher fdi; struct thread *td = curthread; if (mntflags & MNT_FORCE) { flags |= FORCECLOSE; } data = fuse_get_mpdata(mp); if (!data) { panic("no private data for mount point?"); } /* There is 1 extra root vnode reference (mp->mnt_data). */ FUSE_LOCK(); if (data->vroot != NULL) { struct vnode *vroot = data->vroot; data->vroot = NULL; FUSE_UNLOCK(); vrele(vroot); } else FUSE_UNLOCK(); err = vflush(mp, 0, flags, td); if (err) { return err; } if (fdata_get_dead(data)) { goto alreadydead; } if (fsess_maybe_impl(mp, FUSE_DESTROY)) { fdisp_init(&fdi, 0); fdisp_make(&fdi, FUSE_DESTROY, mp, 0, td, NULL); (void)fdisp_wait_answ(&fdi); fdisp_destroy(&fdi); } fdata_set_dead(data); alreadydead: FUSE_LOCK(); data->mp = NULL; fdev = data->fdev; fdata_trydestroy(data); FUSE_UNLOCK(); MNT_ILOCK(mp); mp->mnt_data = NULL; MNT_IUNLOCK(mp); dev_rel(fdev); return 0; } SDT_PROBE_DEFINE1(fusefs, , vfsops, invalidate_without_export, "struct mount*"); static int fuse_vfsop_vget(struct mount *mp, ino_t ino, int flags, struct vnode **vpp) { struct fuse_data *data = fuse_get_mpdata(mp); uint64_t nodeid = ino; struct thread *td = curthread; struct fuse_dispatcher fdi; struct fuse_entry_out *feo; struct fuse_vnode_data *fvdat; struct timespec now; const char dot[] = "."; __enum_uint8(vtype) vtyp; int error; if (!(data->dataflags & FSESS_EXPORT_SUPPORT)) { /* * Unreachable unless you do something stupid, like export a * nullfs mount of a fusefs file system. */ SDT_PROBE1(fusefs, , vfsops, invalidate_without_export, mp); return (EOPNOTSUPP); } error = fuse_internal_get_cached_vnode(mp, ino, flags, vpp); if (error || *vpp != NULL) return error; getnanouptime(&now); /* Do a LOOKUP, using nodeid as the parent and "." as filename */ fdisp_init(&fdi, sizeof(dot)); fdisp_make(&fdi, FUSE_LOOKUP, mp, nodeid, td, td->td_ucred); memcpy(fdi.indata, dot, sizeof(dot)); error = fdisp_wait_answ(&fdi); if (error) goto out; feo = (struct fuse_entry_out *)fdi.answ; + if (feo->nodeid == 0) { /* zero nodeid means ENOENT and cache it */ error = ENOENT; goto out; } + if (feo->nodeid != nodeid) { + /* + * Something is very wrong with the server if "foo/." has a + * different inode number than "foo". + */ + fuse_warn(data, FSESS_WARN_DOT_LOOKUP, + "Inconsistent LOOKUP response: \"FILE/.\" has a different " + "inode number than \"FILE\"."); + error = EIO; + goto out; + } + vtyp = IFTOVT(feo->attr.mode); error = fuse_vnode_get(mp, feo, nodeid, NULL, vpp, NULL, vtyp); if (error) goto out; fvdat = VTOFUD(*vpp); if (timespeccmp(&now, &fvdat->last_local_modify, >)) { /* * Attributes from the server are definitely newer than the * last attributes we sent to the server, so cache them. */ fuse_internal_cache_attrs(*vpp, &feo->attr, feo->attr_valid, feo->attr_valid_nsec, NULL, true); } fuse_validity_2_bintime(feo->entry_valid, feo->entry_valid_nsec, &fvdat->entry_cache_timeout); out: fdisp_destroy(&fdi); return error; } static int fuse_vfsop_root(struct mount *mp, int lkflags, struct vnode **vpp) { struct fuse_data *data = fuse_get_mpdata(mp); int err = 0; if (data->vroot != NULL) { err = vget(data->vroot, lkflags); if (err == 0) *vpp = data->vroot; } else { err = fuse_vnode_get(mp, NULL, FUSE_ROOT_ID, NULL, vpp, NULL, VDIR); if (err == 0) { FUSE_LOCK(); MPASS(data->vroot == NULL || data->vroot == *vpp); if (data->vroot == NULL) { SDT_PROBE2(fusefs, , vfsops, trace, 1, "new root vnode"); data->vroot = *vpp; FUSE_UNLOCK(); vref(*vpp); } else if (data->vroot != *vpp) { SDT_PROBE2(fusefs, , vfsops, trace, 1, "root vnode race"); FUSE_UNLOCK(); vput(*vpp); vrecycle(*vpp); *vpp = data->vroot; } else FUSE_UNLOCK(); } } return err; } static int fuse_vfsop_statfs(struct mount *mp, struct statfs *sbp) { struct fuse_dispatcher fdi; int err = 0; struct fuse_statfs_out *fsfo; struct fuse_data *data; data = fuse_get_mpdata(mp); if (!(data->dataflags & FSESS_INITED)) goto fake; fdisp_init(&fdi, 0); fdisp_make(&fdi, FUSE_STATFS, mp, FUSE_ROOT_ID, NULL, NULL); err = fdisp_wait_answ(&fdi); if (err) { fdisp_destroy(&fdi); if (err == ENOTCONN) { /* * We want to seem a legitimate fs even if the daemon * is stiff dead... (so that, eg., we can still do path * based unmounting after the daemon dies). */ goto fake; } return err; } fsfo = fdi.answ; sbp->f_blocks = fsfo->st.blocks; sbp->f_bfree = fsfo->st.bfree; sbp->f_bavail = fsfo->st.bavail; sbp->f_files = fsfo->st.files; sbp->f_ffree = fsfo->st.ffree; /* cast from uint64_t to int64_t */ sbp->f_namemax = fsfo->st.namelen; sbp->f_bsize = fsfo->st.frsize; /* cast from uint32_t to uint64_t */ fdisp_destroy(&fdi); return 0; fake: sbp->f_blocks = 0; sbp->f_bfree = 0; sbp->f_bavail = 0; sbp->f_files = 0; sbp->f_ffree = 0; sbp->f_namemax = 0; sbp->f_bsize = S_BLKSIZE; return 0; } diff --git a/tests/sys/fs/fusefs/last_local_modify.cc b/tests/sys/fs/fusefs/last_local_modify.cc index 495bfd8aa959..5fcd3c36c892 100644 --- a/tests/sys/fs/fusefs/last_local_modify.cc +++ b/tests/sys/fs/fusefs/last_local_modify.cc @@ -1,514 +1,513 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2021 Alan Somers * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ extern "C" { #include #include #include #include #include #include } #include "mockfs.hh" #include "utils.hh" using namespace testing; /* * "Last Local Modify" bugs * * This file tests a class of race conditions caused by one thread fetching a * file's size with FUSE_LOOKUP while another thread simultaneously modifies it * with FUSE_SETATTR, FUSE_WRITE, FUSE_COPY_FILE_RANGE or similar. It's * possible for the second thread to start later yet finish first. If that * happens, the first thread must not override the size set by the second * thread. * * FUSE_GETATTR is not vulnerable to the same race, because it is always called * with the vnode lock held. * * A few other operations like FUSE_LINK can also trigger the same race but * with the file's ctime instead of size. However, the consequences of an * incorrect ctime are much less disastrous than an incorrect size, so fusefs * does not attempt to prevent such races. */ enum Mutator { VOP_ALLOCATE, VOP_COPY_FILE_RANGE, VOP_SETATTR, VOP_WRITE, }; /* * Translate a poll method's string representation to the enum value. * Using strings with ::testing::Values gives better output with * --gtest_list_tests */ enum Mutator writer_from_str(const char* s) { if (0 == strcmp("VOP_ALLOCATE", s)) return VOP_ALLOCATE; else if (0 == strcmp("VOP_COPY_FILE_RANGE", s)) return VOP_COPY_FILE_RANGE; else if (0 == strcmp("VOP_SETATTR", s)) return VOP_SETATTR; else return VOP_WRITE; } uint32_t fuse_op_from_mutator(enum Mutator mutator) { switch(mutator) { case VOP_ALLOCATE: return(FUSE_FALLOCATE); case VOP_COPY_FILE_RANGE: return(FUSE_COPY_FILE_RANGE); case VOP_SETATTR: return(FUSE_SETATTR); case VOP_WRITE: return(FUSE_WRITE); } } class LastLocalModify: public FuseTest, public WithParamInterface { public: virtual void SetUp() { m_init_flags = FUSE_EXPORT_SUPPORT; FuseTest::SetUp(); } }; static void* allocate_th(void* arg) { int fd; ssize_t r; sem_t *sem = (sem_t*) arg; if (sem) sem_wait(sem); fd = open("mountpoint/some_file.txt", O_RDWR); if (fd < 0) return (void*)(intptr_t)errno; r = posix_fallocate(fd, 0, 15); LastLocalModify::leak(fd); if (r >= 0) return 0; else return (void*)(intptr_t)errno; } static void* copy_file_range_th(void* arg) { ssize_t r; int fd; sem_t *sem = (sem_t*) arg; off_t off_in = 0; off_t off_out = 10; ssize_t len = 5; if (sem) sem_wait(sem); fd = open("mountpoint/some_file.txt", O_RDWR); if (fd < 0) return (void*)(intptr_t)errno; r = copy_file_range(fd, &off_in, fd, &off_out, len, 0); if (r >= 0) { LastLocalModify::leak(fd); return 0; } else return (void*)(intptr_t)errno; } static void* setattr_th(void* arg) { int fd; ssize_t r; sem_t *sem = (sem_t*) arg; if (sem) sem_wait(sem); fd = open("mountpoint/some_file.txt", O_RDWR); if (fd < 0) return (void*)(intptr_t)errno; r = ftruncate(fd, 15); LastLocalModify::leak(fd); if (r >= 0) return 0; else return (void*)(intptr_t)errno; } static void* write_th(void* arg) { ssize_t r; int fd; sem_t *sem = (sem_t*) arg; const char BUF[] = "abcdefghijklmn"; if (sem) sem_wait(sem); fd = open("mountpoint/some_file.txt", O_RDWR); if (fd < 0) return (void*)(intptr_t)errno; r = write(fd, BUF, sizeof(BUF)); if (r >= 0) { LastLocalModify::leak(fd); return 0; } else return (void*)(intptr_t)errno; } /* * VOP_LOOKUP should discard attributes returned by the server if they were * modified by another VOP while the VOP_LOOKUP was in progress. * * Sequence of operations: * * Thread 1 calls a mutator like ftruncate, which acquires the vnode lock * exclusively. * * Thread 2 calls stat, which does VOP_LOOKUP, which sends FUSE_LOOKUP to the * server. The server replies with the old file length. Thread 2 blocks * waiting for the vnode lock. * * Thread 1 sends the mutator operation like FUSE_SETATTR that changes the * file's size and updates the attribute cache. Then it releases the vnode * lock. * * Thread 2 acquires the vnode lock. At this point it must not add the * now-stale file size to the attribute cache. * * Regression test for https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=259071 */ TEST_P(LastLocalModify, lookup) { const char FULLPATH[] = "mountpoint/some_file.txt"; const char RELPATH[] = "some_file.txt"; Sequence seq; uint64_t ino = 3; uint64_t mutator_unique; const uint64_t oldsize = 10; const uint64_t newsize = 15; pthread_t th0; void *thr0_value; struct stat sb; static sem_t sem; Mutator mutator; uint32_t mutator_op; size_t mutator_size; mutator = writer_from_str(GetParam()); mutator_op = fuse_op_from_mutator(mutator); ASSERT_EQ(0, sem_init(&sem, 0, 0)) << strerror(errno); EXPECT_LOOKUP(FUSE_ROOT_ID, RELPATH) .InSequence(seq) .WillOnce(Invoke(ReturnImmediate([=](auto in __unused, auto& out) { /* Called by the mutator, caches attributes but not entries */ SET_OUT_HEADER_LEN(out, entry); out.body.entry.nodeid = ino; out.body.entry.attr.size = oldsize; - out.body.entry.nodeid = ino; out.body.entry.attr_valid_nsec = NAP_NS / 2; out.body.entry.attr.ino = ino; out.body.entry.attr.mode = S_IFREG | 0644; }))); expect_open(ino, 0, 1); EXPECT_CALL(*m_mock, process( ResultOf([=](auto in) { return (in.header.opcode == mutator_op && in.header.nodeid == ino); }, Eq(true)), _) ).InSequence(seq) .WillOnce(Invoke([&](auto in, auto &out __unused) { /* * The mutator changes the file size, but in order to simulate * a race, don't reply. Instead, just save the unique for * later. */ mutator_unique = in.header.unique; switch(mutator) { case VOP_WRITE: mutator_size = in.body.write.size; break; case VOP_COPY_FILE_RANGE: mutator_size = in.body.copy_file_range.len; break; default: break; } /* Allow the lookup thread to proceed */ sem_post(&sem); })); EXPECT_LOOKUP(FUSE_ROOT_ID, RELPATH) .InSequence(seq) .WillOnce(Invoke([&](auto in __unused, auto& out) { std::unique_ptr out0(new mockfs_buf_out); std::unique_ptr out1(new mockfs_buf_out); /* First complete the lookup request, returning the old size */ out0->header.unique = in.header.unique; SET_OUT_HEADER_LEN(*out0, entry); out0->body.entry.attr.mode = S_IFREG | 0644; out0->body.entry.nodeid = ino; + out0->body.entry.attr.ino = ino; out0->body.entry.entry_valid = UINT64_MAX; out0->body.entry.attr_valid = UINT64_MAX; out0->body.entry.attr.size = oldsize; out.push_back(std::move(out0)); /* Then, respond to the mutator request */ out1->header.unique = mutator_unique; switch(mutator) { case VOP_ALLOCATE: out1->header.error = 0; out1->header.len = sizeof(out1->header); break; case VOP_COPY_FILE_RANGE: SET_OUT_HEADER_LEN(*out1, write); out1->body.write.size = mutator_size; break; case VOP_SETATTR: SET_OUT_HEADER_LEN(*out1, attr); out1->body.attr.attr.ino = ino; out1->body.attr.attr.mode = S_IFREG | 0644; out1->body.attr.attr.size = newsize; // Changed size out1->body.attr.attr_valid = UINT64_MAX; break; case VOP_WRITE: SET_OUT_HEADER_LEN(*out1, write); out1->body.write.size = mutator_size; break; } out.push_back(std::move(out1)); })); /* Start the mutator thread */ switch(mutator) { case VOP_ALLOCATE: ASSERT_EQ(0, pthread_create(&th0, NULL, allocate_th, NULL)) << strerror(errno); break; case VOP_COPY_FILE_RANGE: ASSERT_EQ(0, pthread_create(&th0, NULL, copy_file_range_th, NULL)) << strerror(errno); break; case VOP_SETATTR: ASSERT_EQ(0, pthread_create(&th0, NULL, setattr_th, NULL)) << strerror(errno); break; case VOP_WRITE: ASSERT_EQ(0, pthread_create(&th0, NULL, write_th, NULL)) << strerror(errno); break; } /* Wait for FUSE_SETATTR to be sent */ sem_wait(&sem); /* Lookup again, which will race with setattr */ ASSERT_EQ(0, stat(FULLPATH, &sb)) << strerror(errno); ASSERT_EQ((off_t)newsize, sb.st_size); /* ftruncate should've completed without error */ pthread_join(th0, &thr0_value); EXPECT_EQ(0, (intptr_t)thr0_value); } /* * VFS_VGET should discard attributes returned by the server if they were * modified by another VOP while the VFS_VGET was in progress. * * Sequence of operations: * * Thread 1 calls fhstat, entering VFS_VGET, and issues FUSE_LOOKUP * * Thread 2 calls a mutator like ftruncate, which acquires the vnode lock * exclusively and issues a FUSE op like FUSE_SETATTR. * * Thread 1's FUSE_LOOKUP returns with the old size, but the thread blocks * waiting for the vnode lock. * * Thread 2's FUSE op returns, and that thread sets the file's new size * in the attribute cache. Finally it releases the vnode lock. * * The vnode lock acquired, thread 1 must not overwrite the attr cache's size * with the old value. * * Regression test for https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=259071 */ TEST_P(LastLocalModify, vfs_vget) { const char FULLPATH[] = "mountpoint/some_file.txt"; const char RELPATH[] = "some_file.txt"; Sequence seq; uint64_t ino = 3; uint64_t lookup_unique; const uint64_t oldsize = 10; const uint64_t newsize = 15; pthread_t th0; void *thr0_value; struct stat sb; static sem_t sem; fhandle_t fhp; Mutator mutator; uint32_t mutator_op; if (geteuid() != 0) GTEST_SKIP() << "This test requires a privileged user"; mutator = writer_from_str(GetParam()); mutator_op = fuse_op_from_mutator(mutator); ASSERT_EQ(0, sem_init(&sem, 0, 0)) << strerror(errno); EXPECT_LOOKUP(FUSE_ROOT_ID, RELPATH) .Times(1) .InSequence(seq) .WillOnce(Invoke(ReturnImmediate([=](auto in __unused, auto& out) { /* Called by getfh, caches attributes but not entries */ SET_OUT_HEADER_LEN(out, entry); out.body.entry.nodeid = ino; out.body.entry.attr.size = oldsize; - out.body.entry.nodeid = ino; out.body.entry.attr_valid_nsec = NAP_NS / 2; out.body.entry.attr.ino = ino; out.body.entry.attr.mode = S_IFREG | 0644; }))); EXPECT_LOOKUP(ino, ".") .InSequence(seq) .WillOnce(Invoke([&](auto in, auto &out __unused) { /* Called by fhstat. Block to simulate a race */ lookup_unique = in.header.unique; sem_post(&sem); })); EXPECT_LOOKUP(FUSE_ROOT_ID, RELPATH) .Times(1) .InSequence(seq) .WillRepeatedly(Invoke(ReturnImmediate([=](auto in __unused, auto& out) { /* Called by VOP_SETATTR, caches attributes but not entries */ SET_OUT_HEADER_LEN(out, entry); out.body.entry.nodeid = ino; out.body.entry.attr.size = oldsize; - out.body.entry.nodeid = ino; out.body.entry.attr_valid_nsec = NAP_NS / 2; out.body.entry.attr.ino = ino; out.body.entry.attr.mode = S_IFREG | 0644; }))); /* Called by the mutator thread */ expect_open(ino, 0, 1); EXPECT_CALL(*m_mock, process( ResultOf([=](auto in) { return (in.header.opcode == mutator_op && in.header.nodeid == ino); }, Eq(true)), _) ).InSequence(seq) .WillOnce(Invoke([&](auto in __unused, auto& out) { std::unique_ptr out0(new mockfs_buf_out); std::unique_ptr out1(new mockfs_buf_out); /* First complete the lookup request, returning the old size */ out0->header.unique = lookup_unique; SET_OUT_HEADER_LEN(*out0, entry); out0->body.entry.attr.mode = S_IFREG | 0644; out0->body.entry.nodeid = ino; + out0->body.entry.attr.ino = ino; out0->body.entry.entry_valid = UINT64_MAX; out0->body.entry.attr_valid = UINT64_MAX; out0->body.entry.attr.size = oldsize; out.push_back(std::move(out0)); /* Then, respond to the mutator request */ out1->header.unique = in.header.unique; switch(mutator) { case VOP_ALLOCATE: out1->header.error = 0; out1->header.len = sizeof(out1->header); break; case VOP_COPY_FILE_RANGE: SET_OUT_HEADER_LEN(*out1, write); out1->body.write.size = in.body.copy_file_range.len; break; case VOP_SETATTR: SET_OUT_HEADER_LEN(*out1, attr); out1->body.attr.attr.ino = ino; out1->body.attr.attr.mode = S_IFREG | 0644; out1->body.attr.attr.size = newsize; // Changed size out1->body.attr.attr_valid = UINT64_MAX; break; case VOP_WRITE: SET_OUT_HEADER_LEN(*out1, write); out1->body.write.size = in.body.write.size; break; } out.push_back(std::move(out1)); })); /* First get a file handle */ ASSERT_EQ(0, getfh(FULLPATH, &fhp)) << strerror(errno); /* Start the mutator thread */ switch(mutator) { case VOP_ALLOCATE: ASSERT_EQ(0, pthread_create(&th0, NULL, allocate_th, (void*)&sem)) << strerror(errno); break; case VOP_COPY_FILE_RANGE: ASSERT_EQ(0, pthread_create(&th0, NULL, copy_file_range_th, (void*)&sem)) << strerror(errno); break; case VOP_SETATTR: ASSERT_EQ(0, pthread_create(&th0, NULL, setattr_th, (void*)&sem)) << strerror(errno); break; case VOP_WRITE: ASSERT_EQ(0, pthread_create(&th0, NULL, write_th, (void*)&sem)) << strerror(errno); break; } /* Lookup again, which will race with setattr */ ASSERT_EQ(0, fhstat(&fhp, &sb)) << strerror(errno); ASSERT_EQ((off_t)newsize, sb.st_size); /* mutator should've completed without error */ pthread_join(th0, &thr0_value); EXPECT_EQ(0, (intptr_t)thr0_value); } INSTANTIATE_TEST_SUITE_P(LLM, LastLocalModify, Values( "VOP_ALLOCATE", "VOP_COPY_FILE_RANGE", "VOP_SETATTR", "VOP_WRITE" ) ); diff --git a/tests/sys/fs/fusefs/lookup.cc b/tests/sys/fs/fusefs/lookup.cc index 6d506c1ab700..2cfe888b6b08 100644 --- a/tests/sys/fs/fusefs/lookup.cc +++ b/tests/sys/fs/fusefs/lookup.cc @@ -1,655 +1,662 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2019 The FreeBSD Foundation * * This software was developed by BFF Storage Systems, LLC under sponsorship * from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ extern "C" { #include #include #include #include } #include "mockfs.hh" #include "utils.hh" using namespace testing; class Lookup: public FuseTest {}; class Lookup_7_8: public Lookup { public: virtual void SetUp() { m_kernel_minor_version = 8; Lookup::SetUp(); } }; class LookupExportable: public Lookup { public: virtual void SetUp() { m_init_flags = FUSE_EXPORT_SUPPORT; Lookup::SetUp(); } }; /* * If lookup returns a non-zero cache timeout, then subsequent VOP_GETATTRs * should use the cached attributes, rather than query the daemon */ TEST_F(Lookup, attr_cache) { const char FULLPATH[] = "mountpoint/some_file.txt"; const char RELPATH[] = "some_file.txt"; const uint64_t ino = 42; const uint64_t generation = 13; struct stat sb; EXPECT_LOOKUP(FUSE_ROOT_ID, RELPATH) .WillOnce(Invoke(ReturnImmediate([=](auto in __unused, auto& out) { SET_OUT_HEADER_LEN(out, entry); out.body.entry.nodeid = ino; out.body.entry.attr_valid = UINT64_MAX; out.body.entry.attr.ino = ino; // Must match nodeid out.body.entry.attr.mode = S_IFREG | 0644; out.body.entry.attr.size = 1; out.body.entry.attr.blocks = 2; out.body.entry.attr.atime = 3; out.body.entry.attr.mtime = 4; out.body.entry.attr.ctime = 5; out.body.entry.attr.atimensec = 6; out.body.entry.attr.mtimensec = 7; out.body.entry.attr.ctimensec = 8; out.body.entry.attr.nlink = 9; out.body.entry.attr.uid = 10; out.body.entry.attr.gid = 11; out.body.entry.attr.rdev = 12; out.body.entry.generation = generation; }))); /* stat(2) issues a VOP_LOOKUP followed by a VOP_GETATTR */ ASSERT_EQ(0, stat(FULLPATH, &sb)) << strerror(errno); EXPECT_EQ(1, sb.st_size); EXPECT_EQ(2, sb.st_blocks); EXPECT_EQ(3, sb.st_atim.tv_sec); EXPECT_EQ(6, sb.st_atim.tv_nsec); EXPECT_EQ(4, sb.st_mtim.tv_sec); EXPECT_EQ(7, sb.st_mtim.tv_nsec); EXPECT_EQ(5, sb.st_ctim.tv_sec); EXPECT_EQ(8, sb.st_ctim.tv_nsec); EXPECT_EQ(9ull, sb.st_nlink); EXPECT_EQ(10ul, sb.st_uid); EXPECT_EQ(11ul, sb.st_gid); EXPECT_EQ(12ul, sb.st_rdev); EXPECT_EQ(ino, sb.st_ino); EXPECT_EQ(S_IFREG | 0644, sb.st_mode); // fuse(4) does not _yet_ support inode generations //EXPECT_EQ(generation, sb.st_gen); /* * st_birthtim and st_flags are not supported by the fuse protocol. * They're only supported as OS-specific extensions to OSX. For * birthtime, the convention for "not supported" is "negative one * second". */ EXPECT_EQ(-1, sb.st_birthtim.tv_sec); EXPECT_EQ(0, sb.st_birthtim.tv_nsec); EXPECT_EQ(0u, sb.st_flags); } /* * If lookup returns a finite but non-zero cache timeout, then we should discard * the cached attributes and requery the daemon. */ TEST_F(Lookup, attr_cache_timeout) { const char FULLPATH[] = "mountpoint/some_file.txt"; const char RELPATH[] = "some_file.txt"; const uint64_t ino = 42; struct stat sb; EXPECT_LOOKUP(FUSE_ROOT_ID, RELPATH) .Times(2) .WillRepeatedly(Invoke(ReturnImmediate([=](auto in __unused, auto& out) { SET_OUT_HEADER_LEN(out, entry); out.body.entry.nodeid = ino; out.body.entry.attr_valid_nsec = NAP_NS / 2; out.body.entry.attr.ino = ino; // Must match nodeid out.body.entry.attr.mode = S_IFREG | 0644; }))); /* access(2) will issue a VOP_LOOKUP and fill the attr cache */ ASSERT_EQ(0, access(FULLPATH, F_OK)) << strerror(errno); /* Next access(2) will use the cached attributes */ nap(); /* The cache has timed out; VOP_GETATTR should query the daemon*/ ASSERT_EQ(0, stat(FULLPATH, &sb)) << strerror(errno); } TEST_F(Lookup, dot) { const char FULLPATH[] = "mountpoint/some_dir/."; const char RELDIRPATH[] = "some_dir"; uint64_t ino = 42; EXPECT_LOOKUP(FUSE_ROOT_ID, RELDIRPATH) .WillOnce(Invoke(ReturnImmediate([=](auto in __unused, auto& out) { SET_OUT_HEADER_LEN(out, entry); out.body.entry.attr.mode = S_IFDIR | 0755; out.body.entry.nodeid = ino; out.body.entry.attr_valid = UINT64_MAX; out.body.entry.entry_valid = UINT64_MAX; }))); /* * access(2) is one of the few syscalls that will not (always) follow * up a successful VOP_LOOKUP with another VOP. */ ASSERT_EQ(0, access(FULLPATH, F_OK)) << strerror(errno); } TEST_F(Lookup, dotdot) { const char FULLPATH[] = "mountpoint/some_dir/.."; const char RELDIRPATH[] = "some_dir"; EXPECT_LOOKUP(FUSE_ROOT_ID, RELDIRPATH) .WillOnce(Invoke(ReturnImmediate([=](auto in __unused, auto& out) { SET_OUT_HEADER_LEN(out, entry); out.body.entry.attr.mode = S_IFDIR | 0755; out.body.entry.nodeid = 14; out.body.entry.attr_valid = UINT64_MAX; out.body.entry.entry_valid = UINT64_MAX; }))); /* * access(2) is one of the few syscalls that will not (always) follow * up a successful VOP_LOOKUP with another VOP. */ ASSERT_EQ(0, access(FULLPATH, F_OK)) << strerror(errno); } /* * Lookup ".." when that vnode's entry cache has timed out, but its child's * hasn't. Since this file system doesn't set FUSE_EXPORT_SUPPORT, we have no * choice but to use the cached entry, even though it expired. */ TEST_F(Lookup, dotdot_entry_cache_timeout) { uint64_t foo_ino = 42; uint64_t bar_ino = 43; EXPECT_LOOKUP(FUSE_ROOT_ID, "foo") .WillOnce(Invoke(ReturnImmediate([=](auto in __unused, auto& out) { SET_OUT_HEADER_LEN(out, entry); out.body.entry.attr.mode = S_IFDIR | 0755; out.body.entry.nodeid = foo_ino; out.body.entry.attr_valid = UINT64_MAX; out.body.entry.entry_valid = 0; // immediate timeout }))); EXPECT_LOOKUP(foo_ino, "bar") .WillOnce(Invoke(ReturnImmediate([=](auto in __unused, auto& out) { SET_OUT_HEADER_LEN(out, entry); out.body.entry.attr.mode = S_IFDIR | 0755; out.body.entry.nodeid = bar_ino; out.body.entry.attr_valid = UINT64_MAX; out.body.entry.entry_valid = UINT64_MAX; }))); expect_opendir(bar_ino); int fd = open("mountpoint/foo/bar", O_EXEC| O_DIRECTORY); ASSERT_LE(0, fd) << strerror(errno); EXPECT_EQ(0, faccessat(fd, "../..", F_OK, 0)) << strerror(errno); } /* * Lookup ".." for a vnode with no valid parent nid * Regression test for https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=259974 * Since the file system is not exportable, we have no choice but to return an * error. */ TEST_F(Lookup, dotdot_no_parent_nid) { uint64_t foo_ino = 42; uint64_t bar_ino = 43; int fd; EXPECT_LOOKUP(FUSE_ROOT_ID, "foo") .WillOnce(Invoke(ReturnImmediate([=](auto in __unused, auto& out) { SET_OUT_HEADER_LEN(out, entry); out.body.entry.attr.mode = S_IFDIR | 0755; out.body.entry.nodeid = foo_ino; out.body.entry.attr_valid = UINT64_MAX; out.body.entry.entry_valid = UINT64_MAX; }))); EXPECT_LOOKUP(foo_ino, "bar") .WillOnce(Invoke(ReturnImmediate([=](auto in __unused, auto& out) { SET_OUT_HEADER_LEN(out, entry); out.body.entry.attr.mode = S_IFDIR | 0755; out.body.entry.nodeid = bar_ino; out.body.entry.attr_valid = UINT64_MAX; out.body.entry.entry_valid = UINT64_MAX; }))); EXPECT_CALL(*m_mock, process( ResultOf([=](auto in) { return (in.header.opcode == FUSE_OPENDIR); }, Eq(true)), _) ).WillOnce(Invoke(ReturnImmediate([=](auto in __unused, auto& out) { SET_OUT_HEADER_LEN(out, open); }))); expect_forget(foo_ino, 1, NULL); fd = open("mountpoint/foo/bar", O_EXEC| O_DIRECTORY); ASSERT_LE(0, fd) << strerror(errno); // Try (and fail) to unmount the file system, to reclaim the mountpoint // and foo vnodes. ASSERT_NE(0, unmount("mountpoint", 0)); EXPECT_EQ(EBUSY, errno); nap(); // Because vnode reclamation is asynchronous EXPECT_NE(0, faccessat(fd, "../..", F_OK, 0)); EXPECT_EQ(ESTALE, errno); } /* * A daemon that returns an illegal error value should be handled gracefully. * Regression test for https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=263220 */ TEST_F(Lookup, ejustreturn) { const char FULLPATH[] = "mountpoint/does_not_exist"; const char RELPATH[] = "does_not_exist"; EXPECT_LOOKUP(FUSE_ROOT_ID, RELPATH) .WillOnce(Invoke(ReturnImmediate([=](auto in __unused, auto& out) { out.header.len = sizeof(out.header); out.header.error = 2; out.expected_errno = EINVAL; }))); EXPECT_NE(0, access(FULLPATH, F_OK)); EXPECT_EQ(EIO, errno); } TEST_F(Lookup, enoent) { const char FULLPATH[] = "mountpoint/does_not_exist"; const char RELPATH[] = "does_not_exist"; EXPECT_LOOKUP(FUSE_ROOT_ID, RELPATH) .WillOnce(Invoke(ReturnErrno(ENOENT))); EXPECT_NE(0, access(FULLPATH, F_OK)); EXPECT_EQ(ENOENT, errno); } TEST_F(Lookup, enotdir) { const char FULLPATH[] = "mountpoint/not_a_dir/some_file.txt"; const char RELPATH[] = "not_a_dir"; EXPECT_LOOKUP(FUSE_ROOT_ID, RELPATH) .WillOnce(Invoke(ReturnImmediate([=](auto in __unused, auto& out) { SET_OUT_HEADER_LEN(out, entry); out.body.entry.entry_valid = UINT64_MAX; out.body.entry.attr.mode = S_IFREG | 0644; out.body.entry.nodeid = 42; }))); ASSERT_EQ(-1, access(FULLPATH, F_OK)); ASSERT_EQ(ENOTDIR, errno); } /* * If lookup returns a non-zero entry timeout, then subsequent VOP_LOOKUPs * should use the cached inode rather than requery the daemon */ TEST_F(Lookup, entry_cache) { const char FULLPATH[] = "mountpoint/some_file.txt"; const char RELPATH[] = "some_file.txt"; EXPECT_LOOKUP(FUSE_ROOT_ID, RELPATH) .WillOnce(Invoke(ReturnImmediate([=](auto in __unused, auto& out) { SET_OUT_HEADER_LEN(out, entry); out.body.entry.entry_valid = UINT64_MAX; out.body.entry.attr.mode = S_IFREG | 0644; out.body.entry.nodeid = 14; }))); ASSERT_EQ(0, access(FULLPATH, F_OK)) << strerror(errno); /* The second access(2) should use the cache */ ASSERT_EQ(0, access(FULLPATH, F_OK)) << strerror(errno); } /* * If the daemon returns an error of 0 and an inode of 0, that's a flag for * "ENOENT and cache it" with the given entry_timeout */ TEST_F(Lookup, entry_cache_negative) { struct timespec entry_valid = {.tv_sec = TIME_T_MAX, .tv_nsec = 0}; EXPECT_LOOKUP(FUSE_ROOT_ID, "does_not_exist") .Times(1) .WillOnce(Invoke(ReturnNegativeCache(&entry_valid))); EXPECT_NE(0, access("mountpoint/does_not_exist", F_OK)); EXPECT_EQ(ENOENT, errno); EXPECT_NE(0, access("mountpoint/does_not_exist", F_OK)); EXPECT_EQ(ENOENT, errno); } /* Negative entry caches should timeout, too */ TEST_F(Lookup, entry_cache_negative_timeout) { const char *RELPATH = "does_not_exist"; const char *FULLPATH = "mountpoint/does_not_exist"; struct timespec entry_valid = {.tv_sec = 0, .tv_nsec = NAP_NS / 2}; EXPECT_LOOKUP(FUSE_ROOT_ID, RELPATH) .Times(2) .WillRepeatedly(Invoke(ReturnNegativeCache(&entry_valid))); EXPECT_NE(0, access(FULLPATH, F_OK)); EXPECT_EQ(ENOENT, errno); nap(); /* The cache has timed out; VOP_LOOKUP should requery the daemon*/ EXPECT_NE(0, access(FULLPATH, F_OK)); EXPECT_EQ(ENOENT, errno); } /* * If lookup returns a finite but non-zero entry cache timeout, then we should * discard the cached inode and requery the daemon */ TEST_F(Lookup, entry_cache_timeout) { const char FULLPATH[] = "mountpoint/some_file.txt"; const char RELPATH[] = "some_file.txt"; EXPECT_LOOKUP(FUSE_ROOT_ID, RELPATH) .Times(2) .WillRepeatedly(Invoke(ReturnImmediate([=](auto in __unused, auto& out) { SET_OUT_HEADER_LEN(out, entry); out.body.entry.entry_valid_nsec = NAP_NS / 2; out.body.entry.attr.mode = S_IFREG | 0644; out.body.entry.nodeid = 14; }))); /* access(2) will issue a VOP_LOOKUP and fill the entry cache */ ASSERT_EQ(0, access(FULLPATH, F_OK)) << strerror(errno); /* Next access(2) will use the cached entry */ ASSERT_EQ(0, access(FULLPATH, F_OK)) << strerror(errno); nap(); /* The cache has timed out; VOP_LOOKUP should requery the daemon*/ ASSERT_EQ(0, access(FULLPATH, F_OK)) << strerror(errno); } TEST_F(Lookup, ok) { const char FULLPATH[] = "mountpoint/some_file.txt"; const char RELPATH[] = "some_file.txt"; EXPECT_LOOKUP(FUSE_ROOT_ID, RELPATH) .WillOnce(Invoke(ReturnImmediate([=](auto in __unused, auto& out) { SET_OUT_HEADER_LEN(out, entry); out.body.entry.attr.mode = S_IFREG | 0644; out.body.entry.nodeid = 14; }))); /* * access(2) is one of the few syscalls that will not (always) follow * up a successful VOP_LOOKUP with another VOP. */ ASSERT_EQ(0, access(FULLPATH, F_OK)) << strerror(errno); } /* * Lookup in a subdirectory of the fuse mount. The naughty server returns the * same inode for the child as for the parent. */ TEST_F(Lookup, parent_inode) { const char FULLPATH[] = "mountpoint/some_dir/some_file.txt"; const char DIRPATH[] = "some_dir"; const char RELPATH[] = "some_file.txt"; uint64_t dir_ino = 2; EXPECT_LOOKUP(FUSE_ROOT_ID, DIRPATH) .WillOnce(Invoke(ReturnImmediate([=](auto in __unused, auto& out) { SET_OUT_HEADER_LEN(out, entry); out.body.entry.attr.mode = S_IFDIR | 0755; out.body.entry.nodeid = dir_ino; }))); EXPECT_LOOKUP(dir_ino, RELPATH) .WillOnce(Invoke(ReturnImmediate([=](auto in __unused, auto& out) { SET_OUT_HEADER_LEN(out, entry); out.body.entry.attr.mode = S_IFREG | 0644; out.body.entry.nodeid = dir_ino; }))); /* * access(2) is one of the few syscalls that will not (always) follow * up a successful VOP_LOOKUP with another VOP. */ ASSERT_EQ(-1, access(FULLPATH, F_OK)); ASSERT_EQ(EIO, errno); } // Lookup in a subdirectory of the fuse mount TEST_F(Lookup, subdir) { const char FULLPATH[] = "mountpoint/some_dir/some_file.txt"; const char DIRPATH[] = "some_dir"; const char RELPATH[] = "some_file.txt"; uint64_t dir_ino = 2; uint64_t file_ino = 3; EXPECT_LOOKUP(FUSE_ROOT_ID, DIRPATH) .WillOnce(Invoke(ReturnImmediate([=](auto in __unused, auto& out) { SET_OUT_HEADER_LEN(out, entry); out.body.entry.attr.mode = S_IFDIR | 0755; out.body.entry.nodeid = dir_ino; }))); EXPECT_LOOKUP(dir_ino, RELPATH) .WillOnce(Invoke(ReturnImmediate([=](auto in __unused, auto& out) { SET_OUT_HEADER_LEN(out, entry); out.body.entry.attr.mode = S_IFREG | 0644; out.body.entry.nodeid = file_ino; }))); /* * access(2) is one of the few syscalls that will not (always) follow * up a successful VOP_LOOKUP with another VOP. */ ASSERT_EQ(0, access(FULLPATH, F_OK)) << strerror(errno); } /* * The server returns two different vtypes for the same nodeid. This is * technically allowed if the entry's cache has already expired. * https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=258022 */ TEST_F(Lookup, vtype_conflict) { const char FIRSTFULLPATH[] = "mountpoint/foo"; const char SECONDFULLPATH[] = "mountpoint/bar"; const char FIRSTRELPATH[] = "foo"; const char SECONDRELPATH[] = "bar"; uint64_t ino = 42; EXPECT_LOOKUP(FUSE_ROOT_ID, FIRSTRELPATH) .WillOnce(Invoke( ReturnImmediate([=](auto in __unused, auto& out) { SET_OUT_HEADER_LEN(out, entry); out.body.entry.attr.mode = S_IFDIR | 0644; out.body.entry.nodeid = ino; out.body.entry.attr.nlink = 1; }))); expect_lookup(SECONDRELPATH, ino, S_IFREG | 0755, 0, 1, UINT64_MAX); // VOP_FORGET happens asynchronously, so it may or may not arrive // before the test completes. EXPECT_CALL(*m_mock, process( ResultOf([=](auto in) { return (in.header.opcode == FUSE_FORGET && in.header.nodeid == ino && in.body.forget.nlookup == 1); }, Eq(true)), _) ).Times(AtMost(1)) .WillOnce(Invoke([=](auto in __unused, auto &out __unused) { })); ASSERT_EQ(0, access(FIRSTFULLPATH, F_OK)) << strerror(errno); EXPECT_EQ(0, access(SECONDFULLPATH, F_OK)) << strerror(errno); } TEST_F(Lookup_7_8, ok) { const char FULLPATH[] = "mountpoint/some_file.txt"; const char RELPATH[] = "some_file.txt"; EXPECT_LOOKUP(FUSE_ROOT_ID, RELPATH) .WillOnce(Invoke(ReturnImmediate([=](auto in __unused, auto& out) { SET_OUT_HEADER_LEN(out, entry_7_8); out.body.entry.attr.mode = S_IFREG | 0644; out.body.entry.nodeid = 14; }))); /* * access(2) is one of the few syscalls that will not (always) follow * up a successful VOP_LOOKUP with another VOP. */ ASSERT_EQ(0, access(FULLPATH, F_OK)) << strerror(errno); } /* * Lookup ".." when that vnode's entry cache has timed out, but its child's * hasn't. */ TEST_F(LookupExportable, dotdot_entry_cache_timeout) { uint64_t foo_ino = 42; uint64_t bar_ino = 43; EXPECT_LOOKUP(FUSE_ROOT_ID, "foo") .WillOnce(Invoke(ReturnImmediate([=](auto in __unused, auto& out) { SET_OUT_HEADER_LEN(out, entry); out.body.entry.attr.mode = S_IFDIR | 0755; out.body.entry.nodeid = foo_ino; + out.body.entry.attr.ino = foo_ino; out.body.entry.attr_valid = UINT64_MAX; out.body.entry.entry_valid = 0; // immediate timeout }))); EXPECT_LOOKUP(foo_ino, "bar") .WillOnce(Invoke(ReturnImmediate([=](auto in __unused, auto& out) { SET_OUT_HEADER_LEN(out, entry); out.body.entry.attr.mode = S_IFDIR | 0755; out.body.entry.nodeid = bar_ino; + out.body.entry.attr.ino = bar_ino; out.body.entry.attr_valid = UINT64_MAX; out.body.entry.entry_valid = UINT64_MAX; }))); expect_opendir(bar_ino); EXPECT_LOOKUP(foo_ino, "..") .WillOnce(Invoke(ReturnImmediate([=](auto in __unused, auto& out) { SET_OUT_HEADER_LEN(out, entry); out.body.entry.attr.mode = S_IFDIR | 0755; out.body.entry.nodeid = FUSE_ROOT_ID; + out.body.entry.attr.ino = FUSE_ROOT_ID; out.body.entry.attr_valid = UINT64_MAX; out.body.entry.entry_valid = UINT64_MAX; }))); int fd = open("mountpoint/foo/bar", O_EXEC| O_DIRECTORY); ASSERT_LE(0, fd) << strerror(errno); /* FreeBSD's fusefs driver always uses the same cache expiration time * for ".." as for the directory itself. So we need to look up two * levels to find an expired ".." cache entry. */ EXPECT_EQ(0, faccessat(fd, "../..", F_OK, 0)) << strerror(errno); } /* * Lookup ".." for a vnode with no valid parent nid * Regression test for https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=259974 * Since the file system is exportable, we should resolve the problem by * sending a FUSE_LOOKUP for "..". */ TEST_F(LookupExportable, dotdot_no_parent_nid) { uint64_t foo_ino = 42; uint64_t bar_ino = 43; int fd; EXPECT_LOOKUP(FUSE_ROOT_ID, "foo") .WillOnce(Invoke(ReturnImmediate([=](auto in __unused, auto& out) { SET_OUT_HEADER_LEN(out, entry); out.body.entry.attr.mode = S_IFDIR | 0755; out.body.entry.nodeid = foo_ino; + out.body.entry.attr.ino = foo_ino; out.body.entry.attr_valid = UINT64_MAX; out.body.entry.entry_valid = UINT64_MAX; }))); EXPECT_LOOKUP(foo_ino, "bar") .WillOnce(Invoke(ReturnImmediate([=](auto in __unused, auto& out) { SET_OUT_HEADER_LEN(out, entry); out.body.entry.attr.mode = S_IFDIR | 0755; out.body.entry.nodeid = bar_ino; + out.body.entry.attr.ino = bar_ino; out.body.entry.attr_valid = UINT64_MAX; out.body.entry.entry_valid = UINT64_MAX; }))); EXPECT_CALL(*m_mock, process( ResultOf([=](auto in) { return (in.header.opcode == FUSE_OPENDIR); }, Eq(true)), _) ).WillOnce(Invoke(ReturnImmediate([=](auto in __unused, auto& out) { SET_OUT_HEADER_LEN(out, open); }))); expect_forget(foo_ino, 1, NULL); EXPECT_LOOKUP(bar_ino, "..") .WillOnce(Invoke(ReturnImmediate([=](auto in __unused, auto& out) { SET_OUT_HEADER_LEN(out, entry); out.body.entry.attr.mode = S_IFDIR | 0755; out.body.entry.nodeid = foo_ino; + out.body.entry.attr.ino = foo_ino; out.body.entry.attr_valid = UINT64_MAX; out.body.entry.entry_valid = UINT64_MAX; }))); EXPECT_LOOKUP(foo_ino, "..") .WillOnce(Invoke(ReturnImmediate([=](auto in __unused, auto& out) { SET_OUT_HEADER_LEN(out, entry); out.body.entry.attr.mode = S_IFDIR | 0755; out.body.entry.nodeid = FUSE_ROOT_ID; + out.body.entry.attr.ino = FUSE_ROOT_ID; out.body.entry.attr_valid = UINT64_MAX; out.body.entry.entry_valid = UINT64_MAX; }))); fd = open("mountpoint/foo/bar", O_EXEC| O_DIRECTORY); ASSERT_LE(0, fd) << strerror(errno); // Try (and fail) to unmount the file system, to reclaim the mountpoint // and foo vnodes. ASSERT_NE(0, unmount("mountpoint", 0)); EXPECT_EQ(EBUSY, errno); nap(); // Because vnode reclamation is asynchronous EXPECT_EQ(0, faccessat(fd, "../..", F_OK, 0)) << strerror(errno); } diff --git a/tests/sys/fs/fusefs/nfs.cc b/tests/sys/fs/fusefs/nfs.cc index 27ffc8f5cbc1..2fa2b290f383 100644 --- a/tests/sys/fs/fusefs/nfs.cc +++ b/tests/sys/fs/fusefs/nfs.cc @@ -1,374 +1,480 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2019 The FreeBSD Foundation * * This software was developed by BFF Storage Systems, LLC under sponsorship * from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* This file tests functionality needed by NFS servers */ extern "C" { #include #include #include #include #include } #include "mockfs.hh" #include "utils.hh" using namespace std; using namespace testing; class Nfs: public FuseTest { public: virtual void SetUp() { if (geteuid() != 0) GTEST_SKIP() << "This test requires a privileged user"; FuseTest::SetUp(); } }; class Exportable: public Nfs { public: virtual void SetUp() { m_init_flags = FUSE_EXPORT_SUPPORT; Nfs::SetUp(); } }; class Fhstat: public Exportable {}; class FhstatNotExportable: public Nfs {}; class Getfh: public Exportable {}; class Readdir: public Exportable {}; /* If the server returns a different generation number, then file is stale */ TEST_F(Fhstat, estale) { const char FULLPATH[] = "mountpoint/some_dir/."; const char RELDIRPATH[] = "some_dir"; fhandle_t fhp; struct stat sb; const uint64_t ino = 42; const mode_t mode = S_IFDIR | 0755; Sequence seq; EXPECT_LOOKUP(FUSE_ROOT_ID, RELDIRPATH) .InSequence(seq) .WillOnce(Invoke(ReturnImmediate([=](auto in __unused, auto& out) { SET_OUT_HEADER_LEN(out, entry); out.body.entry.attr.mode = mode; out.body.entry.nodeid = ino; + out.body.entry.attr.ino = ino; out.body.entry.generation = 1; out.body.entry.attr_valid = UINT64_MAX; out.body.entry.entry_valid = 0; }))); EXPECT_LOOKUP(ino, ".") .InSequence(seq) .WillOnce(Invoke(ReturnImmediate([=](auto in __unused, auto& out) { SET_OUT_HEADER_LEN(out, entry); out.body.entry.attr.mode = mode; out.body.entry.nodeid = ino; + out.body.entry.attr.ino = ino; out.body.entry.generation = 2; out.body.entry.attr_valid = UINT64_MAX; out.body.entry.entry_valid = 0; }))); ASSERT_EQ(0, getfh(FULLPATH, &fhp)) << strerror(errno); ASSERT_EQ(-1, fhstat(&fhp, &sb)); EXPECT_EQ(ESTALE, errno); } /* If we must lookup an entry from the server, send a LOOKUP request for "." */ TEST_F(Fhstat, lookup_dot) { const char FULLPATH[] = "mountpoint/some_dir/."; const char RELDIRPATH[] = "some_dir"; fhandle_t fhp; struct stat sb; const uint64_t ino = 42; const mode_t mode = S_IFDIR | 0755; const uid_t uid = 12345; EXPECT_LOOKUP(FUSE_ROOT_ID, RELDIRPATH) .WillOnce(Invoke(ReturnImmediate([=](auto in __unused, auto& out) { SET_OUT_HEADER_LEN(out, entry); out.body.entry.attr.mode = mode; out.body.entry.nodeid = ino; + out.body.entry.attr.ino = ino; out.body.entry.generation = 1; out.body.entry.attr.uid = uid; out.body.entry.attr_valid = UINT64_MAX; out.body.entry.entry_valid = 0; }))); EXPECT_LOOKUP(ino, ".") .WillOnce(Invoke(ReturnImmediate([=](auto in __unused, auto& out) { SET_OUT_HEADER_LEN(out, entry); out.body.entry.attr.mode = mode; out.body.entry.nodeid = ino; + out.body.entry.attr.ino = ino; out.body.entry.generation = 1; out.body.entry.attr.uid = uid; out.body.entry.attr_valid = UINT64_MAX; out.body.entry.entry_valid = 0; }))); ASSERT_EQ(0, getfh(FULLPATH, &fhp)) << strerror(errno); ASSERT_EQ(0, fhstat(&fhp, &sb)) << strerror(errno); EXPECT_EQ(uid, sb.st_uid); EXPECT_EQ(mode, sb.st_mode); } /* Gracefully handle failures to lookup ".". */ TEST_F(Fhstat, lookup_dot_error) { const char FULLPATH[] = "mountpoint/some_dir/."; const char RELDIRPATH[] = "some_dir"; fhandle_t fhp; struct stat sb; const uint64_t ino = 42; const mode_t mode = S_IFDIR | 0755; const uid_t uid = 12345; EXPECT_LOOKUP(FUSE_ROOT_ID, RELDIRPATH) .WillOnce(Invoke(ReturnImmediate([=](auto in __unused, auto& out) { SET_OUT_HEADER_LEN(out, entry); out.body.entry.attr.mode = mode; out.body.entry.nodeid = ino; + out.body.entry.attr.ino = ino; out.body.entry.generation = 1; out.body.entry.attr.uid = uid; out.body.entry.attr_valid = UINT64_MAX; out.body.entry.entry_valid = 0; }))); EXPECT_LOOKUP(ino, ".") .WillOnce(Invoke(ReturnErrno(EDOOFUS))); ASSERT_EQ(0, getfh(FULLPATH, &fhp)) << strerror(errno); ASSERT_EQ(-1, fhstat(&fhp, &sb)); EXPECT_EQ(EDOOFUS, errno); } /* Use a file handle whose entry is still cached */ TEST_F(Fhstat, cached) { const char FULLPATH[] = "mountpoint/some_dir/."; const char RELDIRPATH[] = "some_dir"; fhandle_t fhp; struct stat sb; const uint64_t ino = 42; const mode_t mode = S_IFDIR | 0755; EXPECT_LOOKUP(FUSE_ROOT_ID, RELDIRPATH) .WillOnce(Invoke(ReturnImmediate([=](auto in __unused, auto& out) { SET_OUT_HEADER_LEN(out, entry); out.body.entry.attr.mode = mode; out.body.entry.nodeid = ino; + out.body.entry.attr.ino = ino; out.body.entry.generation = 1; out.body.entry.attr.ino = ino; out.body.entry.attr_valid = UINT64_MAX; out.body.entry.entry_valid = UINT64_MAX; }))); ASSERT_EQ(0, getfh(FULLPATH, &fhp)) << strerror(errno); ASSERT_EQ(0, fhstat(&fhp, &sb)) << strerror(errno); EXPECT_EQ(ino, sb.st_ino); } /* File handle entries should expire from the cache, too */ TEST_F(Fhstat, cache_expired) { const char FULLPATH[] = "mountpoint/some_dir/."; const char RELDIRPATH[] = "some_dir"; fhandle_t fhp; struct stat sb; const uint64_t ino = 42; const mode_t mode = S_IFDIR | 0755; EXPECT_LOOKUP(FUSE_ROOT_ID, RELDIRPATH) .WillOnce(Invoke(ReturnImmediate([=](auto in __unused, auto& out) { SET_OUT_HEADER_LEN(out, entry); out.body.entry.attr.mode = mode; out.body.entry.nodeid = ino; + out.body.entry.attr.ino = ino; out.body.entry.generation = 1; out.body.entry.attr.ino = ino; out.body.entry.attr_valid = UINT64_MAX; out.body.entry.entry_valid_nsec = NAP_NS / 2; }))); EXPECT_LOOKUP(ino, ".") .WillOnce(Invoke(ReturnImmediate([=](auto in __unused, auto& out) { SET_OUT_HEADER_LEN(out, entry); out.body.entry.attr.mode = mode; out.body.entry.nodeid = ino; + out.body.entry.attr.ino = ino; out.body.entry.generation = 1; out.body.entry.attr.ino = ino; out.body.entry.attr_valid = UINT64_MAX; out.body.entry.entry_valid = 0; }))); ASSERT_EQ(0, getfh(FULLPATH, &fhp)) << strerror(errno); ASSERT_EQ(0, fhstat(&fhp, &sb)) << strerror(errno); EXPECT_EQ(ino, sb.st_ino); nap(); /* Cache should be expired; fuse should issue a FUSE_LOOKUP */ ASSERT_EQ(0, fhstat(&fhp, &sb)) << strerror(errno); EXPECT_EQ(ino, sb.st_ino); } +/* + * If the server returns a FUSE_LOOKUP response for a nodeid that we didn't + * lookup, it's a bug. But we should handle it gracefully. + */ +TEST_F(Fhstat, inconsistent_nodeid) +{ + const char FULLPATH[] = "mountpoint/some_dir/."; + const char RELDIRPATH[] = "some_dir"; + fhandle_t fhp; + struct stat sb; + const uint64_t ino_in = 42; + const uint64_t ino_out = 43; + const mode_t mode = S_IFDIR | 0755; + const uid_t uid = 12345; + + EXPECT_LOOKUP(FUSE_ROOT_ID, RELDIRPATH) + .WillOnce(Invoke(ReturnImmediate([=](auto in __unused, auto& out) { + SET_OUT_HEADER_LEN(out, entry); + out.body.entry.nodeid = ino_in; + out.body.entry.attr.ino = ino_in; + out.body.entry.attr.mode = mode; + out.body.entry.generation = 1; + out.body.entry.attr.uid = uid; + out.body.entry.attr_valid = UINT64_MAX; + out.body.entry.entry_valid = 0; + }))); + + EXPECT_LOOKUP(ino_in, ".") + .WillOnce(Invoke(ReturnImmediate([=](auto in __unused, auto& out) { + SET_OUT_HEADER_LEN(out, entry); + out.body.entry.nodeid = ino_out; + out.body.entry.attr.ino = ino_out; + out.body.entry.attr.mode = mode; + out.body.entry.generation = 1; + out.body.entry.attr.uid = uid; + out.body.entry.attr_valid = UINT64_MAX; + out.body.entry.entry_valid = 0; + }))); + + ASSERT_EQ(0, getfh(FULLPATH, &fhp)) << strerror(errno); + EXPECT_NE(0, fhstat(&fhp, &sb)) << strerror(errno); + EXPECT_EQ(EIO, errno); +} + +/* + * If the server returns a FUSE_LOOKUP response where the nodeid doesn't match + * the inode number, and the file system is exported, it's a bug. But we + * should handle it gracefully. + */ +TEST_F(Fhstat, inconsistent_ino) +{ + const char FULLPATH[] = "mountpoint/some_dir/."; + const char RELDIRPATH[] = "some_dir"; + fhandle_t fhp; + struct stat sb; + const uint64_t nodeid = 42; + const uint64_t ino = 711; // Could be anything that != nodeid + const mode_t mode = S_IFDIR | 0755; + const uid_t uid = 12345; + + EXPECT_LOOKUP(FUSE_ROOT_ID, RELDIRPATH) + .WillOnce(Invoke(ReturnImmediate([=](auto in __unused, auto& out) { + SET_OUT_HEADER_LEN(out, entry); + out.body.entry.nodeid = nodeid; + out.body.entry.attr.ino = nodeid; + out.body.entry.attr.mode = mode; + out.body.entry.generation = 1; + out.body.entry.attr.uid = uid; + out.body.entry.attr_valid = UINT64_MAX; + out.body.entry.entry_valid = 0; + }))); + + EXPECT_LOOKUP(nodeid, ".") + .WillOnce(Invoke(ReturnImmediate([=](auto in __unused, auto& out) { + SET_OUT_HEADER_LEN(out, entry); + out.body.entry.nodeid = nodeid; + out.body.entry.attr.ino = ino; + out.body.entry.attr.mode = mode; + out.body.entry.generation = 1; + out.body.entry.attr.uid = uid; + out.body.entry.attr_valid = UINT64_MAX; + out.body.entry.entry_valid = 0; + }))); + + ASSERT_EQ(0, getfh(FULLPATH, &fhp)) << strerror(errno); + /* + * The fhstat operation will actually succeed. But future operations + * will likely fail. + */ + ASSERT_EQ(0, fhstat(&fhp, &sb)) << strerror(errno); + EXPECT_EQ(ino, sb.st_ino); +} + /* * If the server doesn't set FUSE_EXPORT_SUPPORT, then we can't do NFS-style * lookups */ TEST_F(FhstatNotExportable, lookup_dot) { const char FULLPATH[] = "mountpoint/some_dir/."; const char RELDIRPATH[] = "some_dir"; fhandle_t fhp; const uint64_t ino = 42; const mode_t mode = S_IFDIR | 0755; EXPECT_LOOKUP(FUSE_ROOT_ID, RELDIRPATH) .WillOnce(Invoke(ReturnImmediate([=](auto in __unused, auto& out) { SET_OUT_HEADER_LEN(out, entry); out.body.entry.attr.mode = mode; out.body.entry.nodeid = ino; + out.body.entry.attr.ino = ino; out.body.entry.generation = 1; out.body.entry.attr_valid = UINT64_MAX; out.body.entry.entry_valid = 0; }))); ASSERT_EQ(-1, getfh(FULLPATH, &fhp)); ASSERT_EQ(EOPNOTSUPP, errno); } /* FreeBSD's fid struct doesn't have enough space for 64-bit generations */ TEST_F(Getfh, eoverflow) { const char FULLPATH[] = "mountpoint/some_dir/."; const char RELDIRPATH[] = "some_dir"; fhandle_t fhp; uint64_t ino = 42; EXPECT_LOOKUP(FUSE_ROOT_ID, RELDIRPATH) .WillOnce(Invoke(ReturnImmediate([=](auto in __unused, auto& out) { SET_OUT_HEADER_LEN(out, entry); out.body.entry.attr.mode = S_IFDIR | 0755; out.body.entry.nodeid = ino; + out.body.entry.attr.ino = ino; out.body.entry.generation = (uint64_t)UINT32_MAX + 1; out.body.entry.attr_valid = UINT64_MAX; out.body.entry.entry_valid = UINT64_MAX; }))); ASSERT_NE(0, getfh(FULLPATH, &fhp)); EXPECT_EQ(EOVERFLOW, errno); } /* Get an NFS file handle */ TEST_F(Getfh, ok) { const char FULLPATH[] = "mountpoint/some_dir/."; const char RELDIRPATH[] = "some_dir"; fhandle_t fhp; uint64_t ino = 42; EXPECT_LOOKUP(FUSE_ROOT_ID, RELDIRPATH) .WillOnce(Invoke(ReturnImmediate([=](auto in __unused, auto& out) { SET_OUT_HEADER_LEN(out, entry); out.body.entry.attr.mode = S_IFDIR | 0755; out.body.entry.nodeid = ino; + out.body.entry.attr.ino = ino; out.body.entry.attr_valid = UINT64_MAX; out.body.entry.entry_valid = UINT64_MAX; }))); ASSERT_EQ(0, getfh(FULLPATH, &fhp)) << strerror(errno); } /* * Call readdir via a file handle. * * This is how a userspace nfs server like nfs-ganesha or unfs3 would call * readdir. The in-kernel NFS server never does any equivalent of open. I * haven't discovered a way to mimic nfsd's behavior short of actually running * nfsd. */ TEST_F(Readdir, getdirentries) { const char FULLPATH[] = "mountpoint/some_dir"; const char RELPATH[] = "some_dir"; uint64_t ino = 42; mode_t mode = S_IFDIR | 0755; fhandle_t fhp; int fd; char buf[8192]; ssize_t r; EXPECT_LOOKUP(FUSE_ROOT_ID, RELPATH) .WillOnce(Invoke(ReturnImmediate([=](auto in __unused, auto& out) { SET_OUT_HEADER_LEN(out, entry); out.body.entry.attr.mode = mode; out.body.entry.nodeid = ino; + out.body.entry.attr.ino = ino; out.body.entry.generation = 1; out.body.entry.attr_valid = UINT64_MAX; out.body.entry.entry_valid = 0; }))); EXPECT_LOOKUP(ino, ".") .WillOnce(Invoke(ReturnImmediate([=](auto in __unused, auto& out) { SET_OUT_HEADER_LEN(out, entry); out.body.entry.attr.mode = mode; out.body.entry.nodeid = ino; + out.body.entry.attr.ino = ino; out.body.entry.generation = 1; out.body.entry.attr_valid = UINT64_MAX; out.body.entry.entry_valid = 0; }))); expect_opendir(ino); EXPECT_CALL(*m_mock, process( ResultOf([=](auto in) { return (in.header.opcode == FUSE_READDIR && in.header.nodeid == ino && in.body.readdir.size == sizeof(buf)); }, Eq(true)), _) ).WillOnce(Invoke(ReturnImmediate([=](auto in __unused, auto& out) { out.header.error = 0; out.header.len = sizeof(out.header); }))); ASSERT_EQ(0, getfh(FULLPATH, &fhp)) << strerror(errno); fd = fhopen(&fhp, O_DIRECTORY); ASSERT_LE(0, fd) << strerror(errno); r = getdirentries(fd, buf, sizeof(buf), 0); ASSERT_EQ(0, r) << strerror(errno); leak(fd); }